ghc/compiler/nativeGen/MachCodeGen.hs

   1 -----------------------------------------------------------------------------
   2 --
   3 -- Generating machine code (instruction selection)
   4 --
   5 -- (c) The University of Glasgow 1996-2004
   6 --
   7 -----------------------------------------------------------------------------
   8
   9 -- This is a big module, but, if you pay attention to
  10 -- (a) the sectioning, (b) the type signatures, and
  11 -- (c) the #if blah_TARGET_ARCH} things, the
  12 -- structure should not be too overwhelming.
  13
  14 module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where
  15
  16 #include "HsVersions.h"
  17 #include "nativeGen/NCG.h"
  18
  19 -- NCG stuff:
  20 import MachInstrs
  21 import MachRegs
  22 import NCGMonad
  23
  24 -- Our intermediate code:
  25 import PprCmm           ( pprExpr )
  26 import Cmm
  27 import MachOp
  28 import CLabel
  29
  30 -- The rest:
  31 import CmdLineOpts      ( opt_Static )
  32 import ForeignCall      ( CCallConv(..) )
  33 import OrdList
  34 import Pretty
  35 import Outputable
  36 import qualified Outputable
  37 import FastString
  38 import FastTypes        ( isFastTrue )
  39
  40 #ifdef DEBUG
  41 import Outputable       ( assertPanic )
  42 import TRACE            ( trace )
  43 #endif
  44
  45 import Control.Monad    ( mapAndUnzipM )
  46 import Maybe            ( fromJust )
  47 import DATA_BITS
  48 import DATA_WORD
  49
  50 -- -----------------------------------------------------------------------------
  51 -- Top-level of the instruction selector
  52
  53 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
  54 -- They are really trees of insns to facilitate fast appending, where a
  55 -- left-to-right traversal (pre-order?) yields the insns in the correct
  56 -- order.
  57
  58 type InstrBlock = OrdList Instr
  59
  60 cmmTopCodeGen :: CmmTop -> NatM [NatCmmTop]
  61 cmmTopCodeGen (CmmProc info lab params blocks) = do
  62   (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
  63   return (CmmProc info lab params (concat nat_blocks) : concat statics)
  64 cmmTopCodeGen (CmmData sec dat) = do
  65   return [CmmData sec dat]  -- no translation, we just use CmmStatic
  66
  67 basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop])
  68 basicBlockCodeGen (BasicBlock id stmts) = do
  69   instrs <- stmtsToInstrs stmts
  70   -- code generation may introduce new basic block boundaries, which
  71   -- are indicated by the NEWBLOCK instruction.  We must split up the
  72   -- instruction stream into basic blocks again.  Also, we extract
  73   -- LDATAs here too.
  74   let
  75         (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
  76
  77         mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
  78           = ([], BasicBlock id instrs : blocks, statics)
  79         mkBlocks (LDATA sec dat) (instrs,blocks,statics)
  80           = (instrs, blocks, CmmData sec dat:statics)
  81         mkBlocks instr (instrs,blocks,statics)
  82           = (instr:instrs, blocks, statics)
  83   -- in
  84   return (BasicBlock id top : other_blocks, statics)
  85
  86 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
  87 stmtsToInstrs stmts
  88    = do instrss <- mapM stmtToInstrs stmts
  89         return (concatOL instrss)
  90
  91 stmtToInstrs :: CmmStmt -> NatM InstrBlock
  92 stmtToInstrs stmt = case stmt of
  93     CmmNop         -> return nilOL
  94     CmmComment s   -> return (unitOL (COMMENT s))
  95
  96     CmmAssign reg src
  97       | isFloatingRep kind -> assignReg_FltCode kind reg src
  98       | wordRep == I32 && kind == I64
  99                            -> assignReg_I64Code      reg src
 100       | otherwise          -> assignReg_IntCode kind reg src
 101         where kind = cmmRegRep reg
 102
 103     CmmStore addr src
 104       | isFloatingRep kind -> assignMem_FltCode kind addr src
 105       | wordRep == I32 && kind == I64
 106                          -> assignMem_I64Code      addr src
 107       | otherwise        -> assignMem_IntCode kind addr src
 108         where kind = cmmExprRep src
 109
 110     CmmCall target result_regs args vols
 111        -> genCCall target result_regs args vols
 112
 113     CmmBranch id          -> genBranch id
 114     CmmCondBranch arg id  -> genCondJump id arg
 115     CmmSwitch arg ids     -> genSwitch arg ids
 116     CmmJump arg params    -> genJump arg
 117
 118 -- -----------------------------------------------------------------------------
 119 -- General things for putting together code sequences
 120
 121 -- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
 122 -- CmmExprs into CmmRegOff?
 123 mangleIndexTree :: CmmExpr -> CmmExpr
 124 mangleIndexTree (CmmRegOff reg off)
 125   = CmmMachOp (MO_Add rep) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) rep)]
 126   where rep = cmmRegRep reg
 127
 128 -- -----------------------------------------------------------------------------
 129 --  Code gen for 64-bit arithmetic on 32-bit platforms
 130
 131 {-
 132 Simple support for generating 64-bit code (ie, 64 bit values and 64
 133 bit assignments) on 32-bit platforms.  Unlike the main code generator
 134 we merely shoot for generating working code as simply as possible, and
 135 pay little attention to code quality.  Specifically, there is no
 136 attempt to deal cleverly with the fixed-vs-floating register
 137 distinction; all values are generated into (pairs of) floating
 138 registers, even if this would mean some redundant reg-reg moves as a
 139 result.  Only one of the VRegUniques is returned, since it will be
 140 of the VRegUniqueLo form, and the upper-half VReg can be determined
 141 by applying getHiVRegFromLo to it.
 142 -}
 143
 144 data ChildCode64        -- a.k.a "Register64"
 145    = ChildCode64
 146         InstrBlock      -- code
 147         Reg             -- the lower 32-bit temporary which contains the
 148                         -- result; use getHiVRegFromLo to find the other
 149                         -- VRegUnique.  Rules of this simplified insn
 150                         -- selection game are therefore that the returned
 151                         -- Reg may be modified
 152
 153 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
 154 assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
 155 iselExpr64        :: CmmExpr -> NatM ChildCode64
 156
 157 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 158
 159 #if i386_TARGET_ARCH
 160
 161 assignMem_I64Code addrTree valueTree = do
 162   Amode addr addr_code <- getAmode addrTree
 163   ChildCode64 vcode rlo <- iselExpr64 valueTree
 164   let
 165         rhi = getHiVRegFromLo rlo
 166
 167         -- Little-endian store
 168         mov_lo = MOV I32 (OpReg rlo) (OpAddr addr)
 169         mov_hi = MOV I32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
 170   -- in
 171   return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 172
 173
 174 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
 175    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 176    let
 177          r_dst_lo = mkVReg u_dst I32
 178          r_dst_hi = getHiVRegFromLo r_dst_lo
 179          r_src_hi = getHiVRegFromLo r_src_lo
 180          mov_lo = MOV I32 (OpReg r_src_lo) (OpReg r_dst_lo)
 181          mov_hi = MOV I32 (OpReg r_src_hi) (OpReg r_dst_hi)
 182    -- in
 183    return (
 184         vcode `snocOL` mov_lo `snocOL` mov_hi
 185      )
 186
 187 assignReg_I64Code lvalue valueTree
 188    = panic "assignReg_I64Code(i386): invalid lvalue"
 189
 190 ------------
 191
 192 iselExpr64 (CmmLit (CmmInt i _)) = do
 193   (rlo,rhi) <- getNewRegPairNat I32
 194   let
 195         r = fromIntegral (fromIntegral i :: Word32)
 196         q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
 197         code = toOL [
 198                 MOV I32 (OpImm (ImmInteger r)) (OpReg rlo),
 199                 MOV I32 (OpImm (ImmInteger q)) (OpReg rhi)
 200                 ]
 201   -- in
 202   return (ChildCode64 code rlo)
 203
 204 iselExpr64 (CmmLoad addrTree I64) = do
 205    Amode addr addr_code <- getAmode addrTree
 206    (rlo,rhi) <- getNewRegPairNat I32
 207    let
 208         mov_lo = MOV I32 (OpAddr addr) (OpReg rlo)
 209         mov_hi = MOV I32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
 210    -- in
 211    return (
 212             ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
 213                         rlo
 214      )
 215
 216 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64)))
 217    = return (ChildCode64 nilOL (mkVReg vu I32))
 218
 219 -- we handle addition, but rather badly
 220 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
 221    ChildCode64 code1 r1lo <- iselExpr64 e1
 222    (rlo,rhi) <- getNewRegPairNat I32
 223    let
 224         r = fromIntegral (fromIntegral i :: Word32)
 225         q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
 226         r1hi = getHiVRegFromLo r1lo
 227         code =  code1 `appOL`
 228                 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
 229                        ADD I32 (OpImm (ImmInteger r)) (OpReg rlo),
 230                        MOV I32 (OpReg r1hi) (OpReg rhi),
 231                        ADC I32 (OpImm (ImmInteger q)) (OpReg rhi) ]
 232    -- in
 233    return (ChildCode64 code rlo)
 234
 235 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
 236    ChildCode64 code1 r1lo <- iselExpr64 e1
 237    ChildCode64 code2 r2lo <- iselExpr64 e2
 238    (rlo,rhi) <- getNewRegPairNat I32
 239    let
 240         r1hi = getHiVRegFromLo r1lo
 241         r2hi = getHiVRegFromLo r2lo
 242         code =  code1 `appOL`
 243                 code2 `appOL`
 244                 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
 245                        ADD I32 (OpReg r2lo) (OpReg rlo),
 246                        MOV I32 (OpReg r1hi) (OpReg rhi),
 247                        ADC I32 (OpReg r2hi) (OpReg rhi) ]
 248    -- in
 249    return (ChildCode64 code rlo)
 250
 251 iselExpr64 expr
 252    = pprPanic "iselExpr64(i386)" (ppr expr)
 253
 254 #endif /* i386_TARGET_ARCH */
 255
 256 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 257
 258 #if sparc_TARGET_ARCH
 259
 260 assignMem_I64Code addrTree valueTree
 261    = iselExpr64 valueTree               `thenNat` \ (ChildCode64 vcode vrlo) ->
 262      getRegister addrTree               `thenNat` \ register_addr ->
 263      getNewRegNat IntRep                `thenNat` \ t_addr ->
 264      let rlo = VirtualRegI vrlo
 265          rhi = getHiVRegFromLo rlo
 266          code_addr = registerCode register_addr t_addr
 267          reg_addr  = registerName register_addr t_addr
 268          -- Big-endian store
 269          mov_hi = ST W rhi (AddrRegImm reg_addr (ImmInt 0))
 270          mov_lo = ST W rlo (AddrRegImm reg_addr (ImmInt 4))
 271      in
 272          return (vcode `appOL` code_addr `snocOL` mov_hi `snocOL` mov_lo)
 273
 274
 275 assignReg_I64Code (StixTemp (StixVReg u_dst pk)) valueTree
 276    = iselExpr64 valueTree               `thenNat` \ (ChildCode64 vcode vr_src_lo) ->
 277      let
 278          r_dst_lo = mkVReg u_dst IntRep
 279          r_src_lo = VirtualRegI vr_src_lo
 280          r_dst_hi = getHiVRegFromLo r_dst_lo
 281          r_src_hi = getHiVRegFromLo r_src_lo
 282          mov_lo = mkMOV r_src_lo r_dst_lo
 283          mov_hi = mkMOV r_src_hi r_dst_hi
 284          mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
 285      in
 286          return (
 287             vcode `snocOL` mov_hi `snocOL` mov_lo
 288          )
 289 assignReg_I64Code lvalue valueTree
 290    = pprPanic "assignReg_I64Code(sparc): invalid lvalue"
 291               (pprStixReg lvalue)
 292
 293
 294 -- Don't delete this -- it's very handy for debugging.
 295 --iselExpr64 expr
 296 --   | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
 297 --   = panic "iselExpr64(???)"
 298
 299 iselExpr64 (CmmLoad I64 addrTree)
 300    = getRegister addrTree               `thenNat` \ register_addr ->
 301      getNewRegNat IntRep                `thenNat` \ t_addr ->
 302      getNewRegNat IntRep                `thenNat` \ rlo ->
 303      let rhi = getHiVRegFromLo rlo
 304          code_addr = registerCode register_addr t_addr
 305          reg_addr  = registerName register_addr t_addr
 306          mov_hi = LD W (AddrRegImm reg_addr (ImmInt 0)) rhi
 307          mov_lo = LD W (AddrRegImm reg_addr (ImmInt 4)) rlo
 308      in
 309          return (
 310             ChildCode64 (code_addr `snocOL` mov_hi `snocOL` mov_lo)
 311                         (getVRegUnique rlo)
 312          )
 313
 314 iselExpr64 (CmmReg (CmmLocal (LocalReg uq I64)))
 315    = getNewRegNat IntRep                `thenNat` \ r_dst_lo ->
 316      let r_dst_hi = getHiVRegFromLo r_dst_lo
 317          r_src_lo = mkVReg vu IntRep
 318          r_src_hi = getHiVRegFromLo r_src_lo
 319          mov_lo = mkMOV r_src_lo r_dst_lo
 320          mov_hi = mkMOV r_src_hi r_dst_hi
 321          mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
 322      in
 323          return (
 324             ChildCode64 (toOL [mov_hi, mov_lo]) (getVRegUnique r_dst_lo)
 325          )
 326
 327 iselExpr64 (StCall fn cconv I64 args)
 328   = genCCall fn cconv kind args                 `thenNat` \ call ->
 329     getNewRegNat IntRep                         `thenNat` \ r_dst_lo ->
 330     let r_dst_hi = getHiVRegFromLo r_dst_lo
 331         mov_lo = mkMOV o0 r_dst_lo
 332         mov_hi = mkMOV o1 r_dst_hi
 333         mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
 334     in
 335     return (
 336        ChildCode64 (call `snocOL` mov_hi `snocOL` mov_lo)
 337                    (getVRegUnique r_dst_lo)
 338     )
 339
 340 iselExpr64 expr
 341    = pprPanic "iselExpr64(sparc)" (pprCmmExpr expr)
 342
 343 #endif /* sparc_TARGET_ARCH */
 344
 345 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 346
 347 #if powerpc_TARGET_ARCH
 348
 349 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
 350 getI64Amodes addrTree = do
 351     Amode hi_addr addr_code <- getAmode addrTree
 352     case addrOffset hi_addr 4 of
 353         Just lo_addr -> return (hi_addr, lo_addr, addr_code)
 354         Nothing      -> do (hi_ptr, code) <- getSomeReg addrTree
 355                            return (AddrRegImm hi_ptr (ImmInt 0),
 356                                    AddrRegImm hi_ptr (ImmInt 4),
 357                                    code)
 358
 359 assignMem_I64Code addrTree valueTree = do
 360         (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
 361         ChildCode64 vcode rlo <- iselExpr64 valueTree
 362         let
 363                 rhi = getHiVRegFromLo rlo
 364
 365                 -- Big-endian store
 366                 mov_hi = ST I32 rhi hi_addr
 367                 mov_lo = ST I32 rlo lo_addr
 368         -- in
 369         return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 370
 371 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
 372    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 373    let
 374          r_dst_lo = mkVReg u_dst I32
 375          r_dst_hi = getHiVRegFromLo r_dst_lo
 376          r_src_hi = getHiVRegFromLo r_src_lo
 377          mov_lo = MR r_dst_lo r_src_lo
 378          mov_hi = MR r_dst_hi r_src_hi
 379    -- in
 380    return (
 381         vcode `snocOL` mov_lo `snocOL` mov_hi
 382      )
 383
 384 assignReg_I64Code lvalue valueTree
 385    = panic "assignReg_I64Code(powerpc): invalid lvalue"
 386
 387
 388 -- Don't delete this -- it's very handy for debugging.
 389 --iselExpr64 expr
 390 --   | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
 391 --   = panic "iselExpr64(???)"
 392
 393 iselExpr64 (CmmLoad addrTree I64) = do
 394     (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
 395     (rlo, rhi) <- getNewRegPairNat I32
 396     let mov_hi = LD I32 rhi hi_addr
 397         mov_lo = LD I32 rlo lo_addr
 398     return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
 399                          rlo
 400
 401 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64)))
 402    = return (ChildCode64 nilOL (mkVReg vu I32))
 403
 404 iselExpr64 (CmmLit (CmmInt i _)) = do
 405   (rlo,rhi) <- getNewRegPairNat I32
 406   let
 407         half0 = fromIntegral (fromIntegral i :: Word16)
 408         half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
 409         half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
 410         half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
 411
 412         code = toOL [
 413                 LIS rlo (ImmInt half1),
 414                 OR rlo rlo (RIImm $ ImmInt half0),
 415                 LIS rhi (ImmInt half3),
 416                 OR rlo rlo (RIImm $ ImmInt half2)
 417                 ]
 418   -- in
 419   return (ChildCode64 code rlo)
 420
 421 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
 422    ChildCode64 code1 r1lo <- iselExpr64 e1
 423    ChildCode64 code2 r2lo <- iselExpr64 e2
 424    (rlo,rhi) <- getNewRegPairNat I32
 425    let
 426         r1hi = getHiVRegFromLo r1lo
 427         r2hi = getHiVRegFromLo r2lo
 428         code =  code1 `appOL`
 429                 code2 `appOL`
 430                 toOL [ ADDC rlo r1lo r2lo,
 431                        ADDE rhi r1hi r2hi ]
 432    -- in
 433    return (ChildCode64 code rlo)
 434
 435 iselExpr64 expr
 436    = pprPanic "iselExpr64(powerpc)" (ppr expr)
 437
 438 #endif /* powerpc_TARGET_ARCH */
 439
 440
 441 -- -----------------------------------------------------------------------------
 442 -- The 'Register' type
 443
 444 -- 'Register's passed up the tree.  If the stix code forces the register
 445 -- to live in a pre-decided machine register, it comes out as @Fixed@;
 446 -- otherwise, it comes out as @Any@, and the parent can decide which
 447 -- register to put it in.
 448
 449 data Register
 450   = Fixed   MachRep Reg InstrBlock
 451   | Any     MachRep (Reg -> InstrBlock)
 452
 453 swizzleRegisterRep :: Register -> MachRep -> Register
 454 swizzleRegisterRep (Fixed _ reg code) rep = Fixed rep reg code
 455 swizzleRegisterRep (Any _ codefn)     rep = Any rep codefn
 456
 457
 458 -- -----------------------------------------------------------------------------
 459 -- Grab the Reg for a CmmReg
 460
 461 getRegisterReg :: CmmReg -> Reg
 462
 463 getRegisterReg (CmmLocal (LocalReg u pk))
 464   = mkVReg u pk
 465
 466 getRegisterReg (CmmGlobal mid)
 467   = case get_GlobalReg_reg_or_addr mid of
 468        Left (RealReg rrno) -> RealReg rrno
 469        _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
 470           -- By this stage, the only MagicIds remaining should be the
 471           -- ones which map to a real machine register on this
 472           -- platform.  Hence ...
 473
 474
 475 -- -----------------------------------------------------------------------------
 476 -- Generate code to get a subtree into a Register
 477
 478 -- Don't delete this -- it's very handy for debugging.
 479 --getRegister expr
 480 --   | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False
 481 --   = panic "getRegister(???)"
 482
 483 getRegister :: CmmExpr -> NatM Register
 484
 485 getRegister (CmmReg reg)
 486   = return (Fixed (cmmRegRep reg) (getRegisterReg reg) nilOL)
 487
 488 getRegister tree@(CmmRegOff _ _)
 489   = getRegister (mangleIndexTree tree)
 490
 491 -- end of machine-"independent" bit; here we go on the rest...
 492
 493 #if alpha_TARGET_ARCH
 494
 495 getRegister (StDouble d)
 496   = getBlockIdNat                   `thenNat` \ lbl ->
 497     getNewRegNat PtrRep             `thenNat` \ tmp ->
 498     let code dst = mkSeqInstrs [
 499             LDATA RoDataSegment lbl [
 500                     DATA TF [ImmLab (rational d)]
 501                 ],
 502             LDA tmp (AddrImm (ImmCLbl lbl)),
 503             LD TF dst (AddrReg tmp)]
 504     in
 505         return (Any F64 code)
 506
 507 getRegister (StPrim primop [x]) -- unary PrimOps
 508   = case primop of
 509       IntNegOp -> trivialUCode (NEG Q False) x
 510
 511       NotOp    -> trivialUCode NOT x
 512
 513       FloatNegOp  -> trivialUFCode FloatRep  (FNEG TF) x
 514       DoubleNegOp -> trivialUFCode F64 (FNEG TF) x
 515
 516       OrdOp -> coerceIntCode IntRep x
 517       ChrOp -> chrCode x
 518
 519       Float2IntOp  -> coerceFP2Int    x
 520       Int2FloatOp  -> coerceInt2FP pr x
 521       Double2IntOp -> coerceFP2Int    x
 522       Int2DoubleOp -> coerceInt2FP pr x
 523
 524       Double2FloatOp -> coerceFltCode x
 525       Float2DoubleOp -> coerceFltCode x
 526
 527       other_op -> getRegister (StCall fn CCallConv F64 [x])
 528         where
 529           fn = case other_op of
 530                  FloatExpOp    -> FSLIT("exp")
 531                  FloatLogOp    -> FSLIT("log")
 532                  FloatSqrtOp   -> FSLIT("sqrt")
 533                  FloatSinOp    -> FSLIT("sin")
 534                  FloatCosOp    -> FSLIT("cos")
 535                  FloatTanOp    -> FSLIT("tan")
 536                  FloatAsinOp   -> FSLIT("asin")
 537                  FloatAcosOp   -> FSLIT("acos")
 538                  FloatAtanOp   -> FSLIT("atan")
 539                  FloatSinhOp   -> FSLIT("sinh")
 540                  FloatCoshOp   -> FSLIT("cosh")
 541                  FloatTanhOp   -> FSLIT("tanh")
 542                  DoubleExpOp   -> FSLIT("exp")
 543                  DoubleLogOp   -> FSLIT("log")
 544                  DoubleSqrtOp  -> FSLIT("sqrt")
 545                  DoubleSinOp   -> FSLIT("sin")
 546                  DoubleCosOp   -> FSLIT("cos")
 547                  DoubleTanOp   -> FSLIT("tan")
 548                  DoubleAsinOp  -> FSLIT("asin")
 549                  DoubleAcosOp  -> FSLIT("acos")
 550                  DoubleAtanOp  -> FSLIT("atan")
 551                  DoubleSinhOp  -> FSLIT("sinh")
 552                  DoubleCoshOp  -> FSLIT("cosh")
 553                  DoubleTanhOp  -> FSLIT("tanh")
 554   where
 555     pr = panic "MachCode.getRegister: no primrep needed for Alpha"
 556
 557 getRegister (StPrim primop [x, y]) -- dyadic PrimOps
 558   = case primop of
 559       CharGtOp -> trivialCode (CMP LTT) y x
 560       CharGeOp -> trivialCode (CMP LE) y x
 561       CharEqOp -> trivialCode (CMP EQQ) x y
 562       CharNeOp -> int_NE_code x y
 563       CharLtOp -> trivialCode (CMP LTT) x y
 564       CharLeOp -> trivialCode (CMP LE) x y
 565
 566       IntGtOp  -> trivialCode (CMP LTT) y x
 567       IntGeOp  -> trivialCode (CMP LE) y x
 568       IntEqOp  -> trivialCode (CMP EQQ) x y
 569       IntNeOp  -> int_NE_code x y
 570       IntLtOp  -> trivialCode (CMP LTT) x y
 571       IntLeOp  -> trivialCode (CMP LE) x y
 572
 573       WordGtOp -> trivialCode (CMP ULT) y x
 574       WordGeOp -> trivialCode (CMP ULE) x y
 575       WordEqOp -> trivialCode (CMP EQQ)  x y
 576       WordNeOp -> int_NE_code x y
 577       WordLtOp -> trivialCode (CMP ULT) x y
 578       WordLeOp -> trivialCode (CMP ULE) x y
 579
 580       AddrGtOp -> trivialCode (CMP ULT) y x
 581       AddrGeOp -> trivialCode (CMP ULE) y x
 582       AddrEqOp -> trivialCode (CMP EQQ)  x y
 583       AddrNeOp -> int_NE_code x y
 584       AddrLtOp -> trivialCode (CMP ULT) x y
 585       AddrLeOp -> trivialCode (CMP ULE) x y
 586
 587       FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
 588       FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
 589       FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
 590       FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
 591       FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
 592       FloatLeOp -> cmpF_code (FCMP TF LE) NE x y
 593
 594       DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
 595       DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
 596       DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
 597       DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
 598       DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
 599       DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y
 600
 601       IntAddOp  -> trivialCode (ADD Q False) x y
 602       IntSubOp  -> trivialCode (SUB Q False) x y
 603       IntMulOp  -> trivialCode (MUL Q False) x y
 604       IntQuotOp -> trivialCode (DIV Q False) x y
 605       IntRemOp  -> trivialCode (REM Q False) x y
 606
 607       WordAddOp  -> trivialCode (ADD Q False) x y
 608       WordSubOp  -> trivialCode (SUB Q False) x y
 609       WordMulOp  -> trivialCode (MUL Q False) x y
 610       WordQuotOp -> trivialCode (DIV Q True) x y
 611       WordRemOp  -> trivialCode (REM Q True) x y
 612
 613       FloatAddOp -> trivialFCode  FloatRep (FADD TF) x y
 614       FloatSubOp -> trivialFCode  FloatRep (FSUB TF) x y
 615       FloatMulOp -> trivialFCode  FloatRep (FMUL TF) x y
 616       FloatDivOp -> trivialFCode  FloatRep (FDIV TF) x y
 617
 618       DoubleAddOp -> trivialFCode  F64 (FADD TF) x y
 619       DoubleSubOp -> trivialFCode  F64 (FSUB TF) x y
 620       DoubleMulOp -> trivialFCode  F64 (FMUL TF) x y
 621       DoubleDivOp -> trivialFCode  F64 (FDIV TF) x y
 622
 623       AddrAddOp  -> trivialCode (ADD Q False) x y
 624       AddrSubOp  -> trivialCode (SUB Q False) x y
 625       AddrRemOp  -> trivialCode (REM Q True) x y
 626
 627       AndOp  -> trivialCode AND x y
 628       OrOp   -> trivialCode OR  x y
 629       XorOp  -> trivialCode XOR x y
 630       SllOp  -> trivialCode SLL x y
 631       SrlOp  -> trivialCode SRL x y
 632
 633       ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
 634       ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
 635       ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"
 636
 637       FloatPowerOp  -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
 638       DoublePowerOp -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
 639   where
 640     {- ------------------------------------------------------------
 641         Some bizarre special code for getting condition codes into
 642         registers.  Integer non-equality is a test for equality
 643         followed by an XOR with 1.  (Integer comparisons always set
 644         the result register to 0 or 1.)  Floating point comparisons of
 645         any kind leave the result in a floating point register, so we
 646         need to wrangle an integer register out of things.
 647     -}
 648     int_NE_code :: StixTree -> StixTree -> NatM Register
 649
 650     int_NE_code x y
 651       = trivialCode (CMP EQQ) x y       `thenNat` \ register ->
 652         getNewRegNat IntRep             `thenNat` \ tmp ->
 653         let
 654             code = registerCode register tmp
 655             src  = registerName register tmp
 656             code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
 657         in
 658         return (Any IntRep code__2)
 659
 660     {- ------------------------------------------------------------
 661         Comments for int_NE_code also apply to cmpF_code
 662     -}
 663     cmpF_code
 664         :: (Reg -> Reg -> Reg -> Instr)
 665         -> Cond
 666         -> StixTree -> StixTree
 667         -> NatM Register
 668
 669     cmpF_code instr cond x y
 670       = trivialFCode pr instr x y       `thenNat` \ register ->
 671         getNewRegNat F64                `thenNat` \ tmp ->
 672         getBlockIdNat                   `thenNat` \ lbl ->
 673         let
 674             code = registerCode register tmp
 675             result  = registerName register tmp
 676
 677             code__2 dst = code . mkSeqInstrs [
 678                 OR zeroh (RIImm (ImmInt 1)) dst,
 679                 BF cond  result (ImmCLbl lbl),
 680                 OR zeroh (RIReg zeroh) dst,
 681                 NEWBLOCK lbl]
 682         in
 683         return (Any IntRep code__2)
 684       where
 685         pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
 686       ------------------------------------------------------------
 687
 688 getRegister (CmmLoad pk mem)
 689   = getAmode mem                    `thenNat` \ amode ->
 690     let
 691         code = amodeCode amode
 692         src   = amodeAddr amode
 693         size = primRepToSize pk
 694         code__2 dst = code . mkSeqInstr (LD size dst src)
 695     in
 696     return (Any pk code__2)
 697
 698 getRegister (StInt i)
 699   | fits8Bits i
 700   = let
 701         code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
 702     in
 703     return (Any IntRep code)
 704   | otherwise
 705   = let
 706         code dst = mkSeqInstr (LDI Q dst src)
 707     in
 708     return (Any IntRep code)
 709   where
 710     src = ImmInt (fromInteger i)
 711
 712 getRegister leaf
 713   | isJust imm
 714   = let
 715         code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
 716     in
 717     return (Any PtrRep code)
 718   where
 719     imm = maybeImm leaf
 720     imm__2 = case imm of Just x -> x
 721
 722 #endif /* alpha_TARGET_ARCH */
 723
 724 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 725
 726 #if i386_TARGET_ARCH
 727
 728 getRegister (CmmLit (CmmFloat f F32)) = do
 729     lbl <- getNewLabelNat
 730     let code dst = toOL [
 731             LDATA ReadOnlyData
 732                         [CmmDataLabel lbl,
 733                          CmmStaticLit (CmmFloat f F32)],
 734             GLD F32 (ImmAddr (ImmCLbl lbl) 0) dst
 735             ]
 736     -- in
 737     return (Any F32 code)
 738
 739
 740 getRegister (CmmLit (CmmFloat d F64))
 741   | d == 0.0
 742   = let code dst = unitOL (GLDZ dst)
 743     in  return (Any F64 code)
 744
 745   | d == 1.0
 746   = let code dst = unitOL (GLD1 dst)
 747     in  return (Any F64 code)
 748
 749   | otherwise = do
 750     lbl <- getNewLabelNat
 751     let code dst = toOL [
 752             LDATA ReadOnlyData
 753                         [CmmDataLabel lbl,
 754                          CmmStaticLit (CmmFloat d F64)],
 755             GLD F64 (ImmAddr (ImmCLbl lbl) 0) dst
 756             ]
 757     -- in
 758     return (Any F64 code)
 759
 760
 761 -- catch simple cases of zero- or sign-extended load
 762 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad addr _]) = do
 763   code <- intLoadCode (MOVZxL I8) addr
 764   return (Any I32 code)
 765
 766 getRegister (CmmMachOp (MO_S_Conv I8 I32) [CmmLoad addr _]) = do
 767   code <- intLoadCode (MOVSxL I8) addr
 768   return (Any I32 code)
 769
 770 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad addr _]) = do
 771   code <- intLoadCode (MOVZxL I16) addr
 772   return (Any I32 code)
 773
 774 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad addr _]) = do
 775   code <- intLoadCode (MOVSxL I16) addr
 776   return (Any I32 code)
 777
 778
 779 getRegister (CmmMachOp mop [x]) -- unary MachOps
 780   = case mop of
 781       MO_S_Neg F32 -> trivialUFCode F32 (GNEG F32) x
 782       MO_S_Neg F64 -> trivialUFCode F64 (GNEG F64) x
 783
 784       MO_S_Neg rep -> trivialUCode rep (NEGI rep) x
 785       MO_Not rep   -> trivialUCode rep (NOT  rep) x
 786
 787       -- Nop conversions
 788       -- TODO: these are only nops if the arg is not a fixed register that
 789       -- can't be byte-addressed.
 790       MO_U_Conv I32 I8  -> conversionNop I32 x
 791       MO_S_Conv I32 I8  -> conversionNop I32 x
 792       MO_U_Conv I16 I8  -> conversionNop I16 x
 793       MO_S_Conv I16 I8  -> conversionNop I16 x
 794       MO_U_Conv I32 I16 -> conversionNop I32 x
 795       MO_S_Conv I32 I16 -> conversionNop I32 x
 796       MO_U_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
 797       MO_S_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
 798
 799       -- widenings
 800       MO_U_Conv I8  I32 -> integerExtend I8  I32 MOVZxL x
 801       MO_U_Conv I16 I32 -> integerExtend I16 I32 MOVZxL x
 802       MO_U_Conv I8  I16 -> integerExtend I8  I16 MOVZxL x
 803
 804       MO_S_Conv I8  I32 -> integerExtend I8  I32 MOVSxL x
 805       MO_S_Conv I16 I32 -> integerExtend I16 I32 MOVSxL x
 806       MO_S_Conv I8  I16 -> integerExtend I8  I16 MOVSxL x
 807
 808       MO_S_Conv F32 F64 -> conversionNop F64 x
 809       MO_S_Conv F64 F32 -> conversionNop F32 x
 810       MO_S_Conv from to
 811         | isFloatingRep from -> coerceFP2Int from to x
 812         | isFloatingRep to   -> coerceInt2FP from to x
 813
 814    where
 815         -- signed or unsigned extension.
 816         integerExtend from to instr expr = do
 817             (reg,e_code) <- if from == I8 then getByteReg expr
 818                                           else getSomeReg expr
 819             let
 820                 code dst =
 821                   e_code `snocOL`
 822                   instr from (OpReg reg) (OpReg dst)
 823             return (Any to code)
 824
 825         conversionNop new_rep expr
 826             = do e_code <- getRegister expr
 827                  return (swizzleRegisterRep e_code new_rep)
 828
 829
 830 getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
 831   = ASSERT2(cmmExprRep x /= I8, pprExpr e)
 832     case mop of
 833       MO_Eq F32   -> condFltReg EQQ x y
 834       MO_Ne F32   -> condFltReg NE x y
 835       MO_S_Gt F32 -> condFltReg GTT x y
 836       MO_S_Ge F32 -> condFltReg GE x y
 837       MO_S_Lt F32 -> condFltReg LTT x y
 838       MO_S_Le F32 -> condFltReg LE x y
 839
 840       MO_Eq F64   -> condFltReg EQQ x y
 841       MO_Ne F64   -> condFltReg NE x y
 842       MO_S_Gt F64 -> condFltReg GTT x y
 843       MO_S_Ge F64 -> condFltReg GE x y
 844       MO_S_Lt F64 -> condFltReg LTT x y
 845       MO_S_Le F64 -> condFltReg LE x y
 846
 847       MO_Eq rep   -> condIntReg EQQ x y
 848       MO_Ne rep   -> condIntReg NE x y
 849
 850       MO_S_Gt rep -> condIntReg GTT x y
 851       MO_S_Ge rep -> condIntReg GE x y
 852       MO_S_Lt rep -> condIntReg LTT x y
 853       MO_S_Le rep -> condIntReg LE x y
 854
 855       MO_U_Gt rep -> condIntReg GU  x y
 856       MO_U_Ge rep -> condIntReg GEU x y
 857       MO_U_Lt rep -> condIntReg LU  x y
 858       MO_U_Le rep -> condIntReg LEU x y
 859
 860       MO_Add F32 -> trivialFCode F32  GADD x y
 861       MO_Sub F32 -> trivialFCode F32  GSUB x y
 862
 863       MO_Add F64 -> trivialFCode F64 GADD x y
 864       MO_Sub F64 -> trivialFCode F64 GSUB x y
 865
 866       MO_S_Quot F32 -> trivialFCode  F32  GDIV x y
 867       MO_S_Quot F64 -> trivialFCode F64 GDIV x y
 868
 869       MO_Add rep -> add_code rep x y
 870       MO_Sub rep -> sub_code rep x y
 871
 872       MO_S_Quot rep -> div_code rep True  True  x y
 873       MO_S_Rem  rep -> div_code rep True  False x y
 874       MO_U_Quot rep -> div_code rep False True  x y
 875       MO_U_Rem  rep -> div_code rep False False x y
 876
 877       MO_Mul   F32 -> trivialFCode F32 GMUL x y
 878       MO_Mul   F64 -> trivialFCode F64 GMUL x y
 879       MO_Mul   rep -> let op = IMUL rep in
 880                       trivialCode rep op (Just op) x y
 881
 882       MO_S_MulMayOflo rep -> imulMayOflo rep x y
 883
 884       MO_And rep -> let op = AND rep in
 885                     trivialCode rep op (Just op) x y
 886       MO_Or  rep -> let op = OR  rep in
 887                     trivialCode rep op (Just op) x y
 888       MO_Xor rep -> let op = XOR rep in
 889                     trivialCode rep op (Just op) x y
 890
 891         {- Shift ops on x86s have constraints on their source, it
 892            either has to be Imm, CL or 1
 893             => trivialCode is not restrictive enough (sigh.)
 894         -}
 895       MO_Shl rep   -> shift_code rep (SHL rep) x y {-False-}
 896       MO_U_Shr rep -> shift_code rep (SHR rep) x y {-False-}
 897       MO_S_Shr rep -> shift_code rep (SAR rep) x y {-False-}
 898
 899       other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
 900   where
 901     --------------------
 902     imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
 903     imulMayOflo I32 a b = do
 904          res_lo <- getNewRegNat I32
 905          res_hi <- getNewRegNat I32
 906          (a_reg, a_code) <- getNonClobberedReg a
 907          (b_reg, b_code) <- getSomeReg   b
 908          let
 909              code dst = a_code `appOL` b_code `appOL`
 910                         toOL [
 911                            MOV I32 (OpReg a_reg) (OpReg res_hi),
 912                            MOV I32 (OpReg b_reg) (OpReg res_lo),
 913                            IMUL64 res_hi res_lo,                -- result in res_hi:res_lo
 914                            SAR I32 (OpImm (ImmInt 31)) (OpReg res_lo),  -- sign extend lower part
 915                            SUB I32 (OpReg res_hi) (OpReg res_lo),       -- compare against upper
 916                            MOV I32 (OpReg res_lo) (OpReg dst)
 917                            -- dst==0 if high part == sign extended low part
 918                         ]
 919          -- in
 920          return (Any I32 code)
 921
 922     --------------------
 923     shift_code :: MachRep
 924                -> (Operand -> Operand -> Instr)
 925                -> CmmExpr
 926                -> CmmExpr
 927                -> NatM Register
 928
 929     {- Case1: shift length as immediate -}
 930     shift_code rep instr x y@(CmmLit lit) = do
 931           x_code <- getAnyReg x
 932           let
 933                code dst
 934                   = x_code dst `snocOL`
 935                     instr (OpImm (litToImm lit)) (OpReg dst)
 936           -- in
 937           return (Any rep code)
 938
 939     {- Case2: shift length is complex (non-immediate) -}
 940     shift_code rep instr x y{-amount-} = do
 941         (x_reg, x_code) <- getNonClobberedReg x
 942         y_code <- getAnyReg y
 943         let
 944            code = x_code `appOL`
 945                   y_code ecx `snocOL`
 946                   instr (OpReg ecx) (OpReg x_reg)
 947         -- in
 948         return (Fixed rep x_reg code)
 949
 950     --------------------
 951     add_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
 952     add_code rep x (CmmLit (CmmInt y _)) = add_int rep x y
 953     add_code rep x y = trivialCode rep (ADD rep) (Just (ADD rep)) x y
 954
 955     --------------------
 956     sub_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
 957     sub_code rep x (CmmLit (CmmInt y _)) = add_int rep x (-y)
 958     sub_code rep x y = trivialCode rep (SUB rep) Nothing x y
 959
 960     -- our three-operand add instruction:
 961     add_int rep x y = do
 962         (x_reg, x_code) <- getSomeReg x
 963         let
 964             imm = ImmInt (fromInteger y)
 965             code dst
 966                = x_code `snocOL`
 967                  LEA rep
 968                         (OpAddr (AddrBaseIndex (Just x_reg) Nothing imm))
 969                         (OpReg dst)
 970         --
 971         return (Any rep code)
 972
 973     ----------------------
 974     div_code rep signed quotient x y = do
 975            (y_op, y_code) <- getOperand y -- cannot be clobbered
 976            x_code <- getAnyReg x
 977            let
 978              widen | signed    = CLTD
 979                    | otherwise = XOR rep (OpReg edx) (OpReg edx)
 980
 981              instr | signed    = IDIV
 982                    | otherwise = DIV
 983
 984              code = y_code `appOL`
 985                     x_code eax `appOL`
 986                     toOL [widen, instr rep y_op]
 987
 988              result | quotient  = eax
 989                     | otherwise = edx
 990
 991            -- in
 992            return (Fixed rep result code)
 993
 994
 995
 996 getRegister (CmmLoad mem pk)
 997   | isFloatingRep pk
 998   = do
 999     Amode src mem_code <- getAmode mem
1000     let
1001         code dst = mem_code `snocOL`
1002                    GLD pk src dst
1003     --
1004     return (Any pk code)
1005
1006 getRegister (CmmLoad mem pk)
1007   | pk /= I64
1008   = do
1009     code <- intLoadCode (instr pk) mem
1010     return (Any pk code)
1011   where
1012         instr I8  = MOVZxL pk
1013         instr I16 = MOV I16
1014         instr I32 = MOV I32
1015         -- we always zero-extend 8-bit loads, if we
1016         -- can't think of anything better.  This is because
1017         -- we can't guarantee access to an 8-bit variant of every register
1018         -- (esi and edi don't have 8-bit variants), so to make things
1019         -- simpler we do our 8-bit arithmetic with full 32-bit registers.
1020
1021 getRegister (CmmLit (CmmInt 0 rep))
1022   = let
1023         code dst
1024            = unitOL (XOR rep (OpReg dst) (OpReg dst))
1025     in
1026         return (Any rep code)
1027
1028 getRegister (CmmLit lit)
1029   = let
1030         rep = cmmLitRep lit
1031         imm = litToImm lit
1032         code dst = unitOL (MOV rep (OpImm imm) (OpReg dst))
1033     in
1034         return (Any rep code)
1035
1036 getRegister other = panic "getRegister(x86)"
1037
1038
1039 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
1040    -> NatM (Reg -> InstrBlock)
1041 intLoadCode instr mem = do
1042   Amode src mem_code <- getAmode mem
1043   return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
1044
1045 -- Compute an expression into *any* register, adding the appropriate
1046 -- move instruction if necessary.
1047 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
1048 getAnyReg expr = do
1049   r <- getRegister expr
1050   anyReg r
1051
1052 anyReg :: Register -> NatM (Reg -> InstrBlock)
1053 anyReg (Any _ code)          = return code
1054 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
1055
1056 -- The dual to getAnyReg: compute an expression into a register, but
1057 -- we don't mind which one it is.
1058 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
1059 getSomeReg expr = do
1060   r <- getRegister expr
1061   case r of
1062     Any rep code -> do
1063         tmp <- getNewRegNat rep
1064         return (tmp, code tmp)
1065     Fixed _ reg code ->
1066         return (reg, code)
1067
1068 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
1069 -- Fixed registers might not be byte-addressable, so we make sure we've
1070 -- got a temporary, inserting an extra reg copy if necessary.
1071 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
1072 getByteReg expr = do
1073   r <- getRegister expr
1074   case r of
1075     Any rep code -> do
1076         tmp <- getNewRegNat rep
1077         return (tmp, code tmp)
1078     Fixed rep reg code
1079         | isVirtualReg reg -> return (reg,code)
1080         | otherwise -> do
1081             tmp <- getNewRegNat rep
1082             return (tmp, code `snocOL` reg2reg rep reg tmp)
1083         -- ToDo: could optimise slightly by checking for byte-addressable
1084         -- real registers, but that will happen very rarely if at all.
1085
1086 -- Another variant: this time we want the result in a register that cannot
1087 -- be modified by code to evaluate an arbitrary expression.
1088 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1089 getNonClobberedReg expr = do
1090   r <- getRegister expr
1091   case r of
1092     Any rep code -> do
1093         tmp <- getNewRegNat rep
1094         return (tmp, code tmp)
1095     Fixed rep reg code
1096         -- only free regs can be clobbered
1097         | RealReg rr <- reg, isFastTrue (freeReg rr) -> do
1098                 tmp <- getNewRegNat rep
1099                 return (tmp, code `snocOL` reg2reg rep reg tmp)
1100         | otherwise ->
1101                 return (reg, code)
1102
1103 reg2reg :: MachRep -> Reg -> Reg -> Instr
1104 reg2reg rep src dst
1105   | isFloatingRep rep = GMOV src dst
1106   | otherwise         = MOV rep (OpReg src) (OpReg dst)
1107
1108 #endif /* i386_TARGET_ARCH */
1109
1110 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1111
1112 #if sparc_TARGET_ARCH
1113
1114 getRegister (StFloat d)
1115   = getBlockIdNat                   `thenNat` \ lbl ->
1116     getNewRegNat PtrRep             `thenNat` \ tmp ->
1117     let code dst = toOL [
1118             SEGMENT DataSegment,
1119             NEWBLOCK lbl,
1120             DATA F [ImmFloat d],
1121             SEGMENT TextSegment,
1122             SETHI (HI (ImmCLbl lbl)) tmp,
1123             LD F (AddrRegImm tmp (LO (ImmCLbl lbl))) dst]
1124     in
1125         return (Any F32 code)
1126
1127 getRegister (StDouble d)
1128   = getBlockIdNat                   `thenNat` \ lbl ->
1129     getNewRegNat PtrRep             `thenNat` \ tmp ->
1130     let code dst = toOL [
1131             SEGMENT DataSegment,
1132             NEWBLOCK lbl,
1133             DATA DF [ImmDouble d],
1134             SEGMENT TextSegment,
1135             SETHI (HI (ImmCLbl lbl)) tmp,
1136             LD DF (AddrRegImm tmp (LO (ImmCLbl lbl))) dst]
1137     in
1138         return (Any F64 code)
1139
1140
1141 getRegister (CmmMachOp mop [x]) -- unary PrimOps
1142   = case mop of
1143       MO_NatS_Neg      -> trivialUCode (SUB False False g0) x
1144       MO_Nat_Not       -> trivialUCode (XNOR False g0) x
1145       MO_32U_to_8U     -> trivialCode (AND False) x (StInt 255)
1146
1147       MO_F32_Neg       -> trivialUFCode F32 (FNEG F) x
1148       MO_F64_Neg       -> trivialUFCode F64 (FNEG DF) x
1149
1150       MO_F64_to_Flt    -> coerceDbl2Flt x
1151       MO_F32_to_Dbl    -> coerceFlt2Dbl x
1152
1153       MO_F32_to_NatS   -> coerceFP2Int F32 x
1154       MO_NatS_to_Flt   -> coerceInt2FP F32 x
1155       MO_F64_to_NatS   -> coerceFP2Int F64 x
1156       MO_NatS_to_Dbl   -> coerceInt2FP F64 x
1157
1158       -- Conversions which are a nop on sparc
1159       MO_32U_to_NatS   -> conversionNop IntRep   x
1160       MO_32S_to_NatS  -> conversionNop IntRep   x
1161       MO_NatS_to_32U   -> conversionNop WordRep  x
1162       MO_32U_to_NatU   -> conversionNop WordRep  x
1163
1164       MO_NatU_to_NatS -> conversionNop IntRep    x
1165       MO_NatS_to_NatU -> conversionNop WordRep   x
1166       MO_NatP_to_NatU -> conversionNop WordRep   x
1167       MO_NatU_to_NatP -> conversionNop PtrRep    x
1168       MO_NatS_to_NatP -> conversionNop PtrRep    x
1169       MO_NatP_to_NatS -> conversionNop IntRep    x
1170
1171       -- sign-extending widenings
1172       MO_8U_to_32U    -> integerExtend False 24 x
1173       MO_8U_to_NatU   -> integerExtend False 24 x
1174       MO_8S_to_NatS   -> integerExtend True  24 x
1175       MO_16U_to_NatU  -> integerExtend False 16 x
1176       MO_16S_to_NatS  -> integerExtend True  16 x
1177
1178       other_op ->
1179         let fixed_x = if   is_float_op  -- promote to double
1180                       then CmmMachOp MO_F32_to_Dbl [x]
1181                       else x
1182         in
1183         getRegister (StCall (Left fn) CCallConv F64 [fixed_x])
1184     where
1185         integerExtend signed nBits x
1186            = getRegister (
1187                 CmmMachOp (if signed then MO_Nat_Sar else MO_Nat_Shr)
1188                          [CmmMachOp MO_Nat_Shl [x, StInt nBits], StInt nBits]
1189              )
1190         conversionNop new_rep expr
1191             = getRegister expr          `thenNat` \ e_code ->
1192               return (swizzleRegisterRep e_code new_rep)
1193
1194         (is_float_op, fn)
1195           = case mop of
1196               MO_F32_Exp    -> (True,  FSLIT("exp"))
1197               MO_F32_Log    -> (True,  FSLIT("log"))
1198               MO_F32_Sqrt   -> (True,  FSLIT("sqrt"))
1199
1200               MO_F32_Sin    -> (True,  FSLIT("sin"))
1201               MO_F32_Cos    -> (True,  FSLIT("cos"))
1202               MO_F32_Tan    -> (True,  FSLIT("tan"))
1203
1204               MO_F32_Asin   -> (True,  FSLIT("asin"))
1205               MO_F32_Acos   -> (True,  FSLIT("acos"))
1206               MO_F32_Atan   -> (True,  FSLIT("atan"))
1207
1208               MO_F32_Sinh   -> (True,  FSLIT("sinh"))
1209               MO_F32_Cosh   -> (True,  FSLIT("cosh"))
1210               MO_F32_Tanh   -> (True,  FSLIT("tanh"))
1211
1212               MO_F64_Exp    -> (False, FSLIT("exp"))
1213               MO_F64_Log    -> (False, FSLIT("log"))
1214               MO_F64_Sqrt   -> (False, FSLIT("sqrt"))
1215
1216               MO_F64_Sin    -> (False, FSLIT("sin"))
1217               MO_F64_Cos    -> (False, FSLIT("cos"))
1218               MO_F64_Tan    -> (False, FSLIT("tan"))
1219
1220               MO_F64_Asin   -> (False, FSLIT("asin"))
1221               MO_F64_Acos   -> (False, FSLIT("acos"))
1222               MO_F64_Atan   -> (False, FSLIT("atan"))
1223
1224               MO_F64_Sinh   -> (False, FSLIT("sinh"))
1225               MO_F64_Cosh   -> (False, FSLIT("cosh"))
1226               MO_F64_Tanh   -> (False, FSLIT("tanh"))
1227
1228               other -> pprPanic "getRegister(sparc) - binary CmmMachOp (2)"
1229                                 (pprMachOp mop)
1230
1231
1232 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1233   = case mop of
1234       MO_32U_Gt  -> condIntReg GTT x y
1235       MO_32U_Ge  -> condIntReg GE x y
1236       MO_32U_Eq  -> condIntReg EQQ x y
1237       MO_32U_Ne  -> condIntReg NE x y
1238       MO_32U_Lt  -> condIntReg LTT x y
1239       MO_32U_Le  -> condIntReg LE x y
1240
1241       MO_Nat_Eq   -> condIntReg EQQ x y
1242       MO_Nat_Ne   -> condIntReg NE x y
1243
1244       MO_NatS_Gt  -> condIntReg GTT x y
1245       MO_NatS_Ge  -> condIntReg GE x y
1246       MO_NatS_Lt  -> condIntReg LTT x y
1247       MO_NatS_Le  -> condIntReg LE x y
1248
1249       MO_NatU_Gt  -> condIntReg GU  x y
1250       MO_NatU_Ge  -> condIntReg GEU x y
1251       MO_NatU_Lt  -> condIntReg LU  x y
1252       MO_NatU_Le  -> condIntReg LEU x y
1253
1254       MO_F32_Gt -> condFltReg GTT x y
1255       MO_F32_Ge -> condFltReg GE x y
1256       MO_F32_Eq -> condFltReg EQQ x y
1257       MO_F32_Ne -> condFltReg NE x y
1258       MO_F32_Lt -> condFltReg LTT x y
1259       MO_F32_Le -> condFltReg LE x y
1260
1261       MO_F64_Gt -> condFltReg GTT x y
1262       MO_F64_Ge -> condFltReg GE x y
1263       MO_F64_Eq -> condFltReg EQQ x y
1264       MO_F64_Ne -> condFltReg NE x y
1265       MO_F64_Lt -> condFltReg LTT x y
1266       MO_F64_Le -> condFltReg LE x y
1267
1268       MO_Nat_Add -> trivialCode (ADD False False) x y
1269       MO_Nat_Sub -> trivialCode (SUB False False) x y
1270
1271       MO_NatS_Mul  -> trivialCode (SMUL False) x y
1272       MO_NatU_Mul  -> trivialCode (UMUL False) x y
1273       MO_NatS_MulMayOflo -> imulMayOflo x y
1274
1275       -- ToDo: teach about V8+ SPARC div instructions
1276       MO_NatS_Quot -> idiv FSLIT(".div")  x y
1277       MO_NatS_Rem  -> idiv FSLIT(".rem")  x y
1278       MO_NatU_Quot -> idiv FSLIT(".udiv")  x y
1279       MO_NatU_Rem  -> idiv FSLIT(".urem")  x y
1280
1281       MO_F32_Add   -> trivialFCode F32  FADD x y
1282       MO_F32_Sub   -> trivialFCode F32  FSUB x y
1283       MO_F32_Mul   -> trivialFCode F32  FMUL x y
1284       MO_F32_Div   -> trivialFCode F32  FDIV x y
1285
1286       MO_F64_Add   -> trivialFCode F64 FADD x y
1287       MO_F64_Sub   -> trivialFCode F64 FSUB x y
1288       MO_F64_Mul   -> trivialFCode F64 FMUL x y
1289       MO_F64_Div   -> trivialFCode F64 FDIV x y
1290
1291       MO_Nat_And   -> trivialCode (AND False) x y
1292       MO_Nat_Or    -> trivialCode (OR  False) x y
1293       MO_Nat_Xor   -> trivialCode (XOR False) x y
1294
1295       MO_Nat_Shl   -> trivialCode SLL x y
1296       MO_Nat_Shr   -> trivialCode SRL x y
1297       MO_Nat_Sar   -> trivialCode SRA x y
1298
1299       MO_F32_Pwr  -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1300                                          [promote x, promote y])
1301                        where promote x = CmmMachOp MO_F32_to_Dbl [x]
1302       MO_F64_Pwr -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1303                                         [x, y])
1304
1305       other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
1306   where
1307     idiv fn x y = getRegister (StCall (Left fn) CCallConv IntRep [x, y])
1308
1309     --------------------
1310     imulMayOflo :: CmmExpr -> CmmExpr -> NatM Register
1311     imulMayOflo a1 a2
1312        = getNewRegNat IntRep            `thenNat` \ t1 ->
1313          getNewRegNat IntRep            `thenNat` \ t2 ->
1314          getNewRegNat IntRep            `thenNat` \ res_lo ->
1315          getNewRegNat IntRep            `thenNat` \ res_hi ->
1316          getRegister a1                 `thenNat` \ reg1 ->
1317          getRegister a2                 `thenNat` \ reg2 ->
1318          let code1 = registerCode reg1 t1
1319              code2 = registerCode reg2 t2
1320              src1  = registerName reg1 t1
1321              src2  = registerName reg2 t2
1322              code dst = code1 `appOL` code2 `appOL`
1323                         toOL [
1324                            SMUL False src1 (RIReg src2) res_lo,
1325                            RDY res_hi,
1326                            SRA res_lo (RIImm (ImmInt 31)) res_lo,
1327                            SUB False False res_lo (RIReg res_hi) dst
1328                         ]
1329          in
1330             return (Any IntRep code)
1331
1332 getRegister (CmmLoad pk mem) = do
1333     Amode src code <- getAmode mem
1334     let
1335         size = primRepToSize pk
1336         code__2 dst = code `snocOL` LD size src dst
1337     --
1338     return (Any pk code__2)
1339
1340 getRegister (StInt i)
1341   | fits13Bits i
1342   = let
1343         src = ImmInt (fromInteger i)
1344         code dst = unitOL (OR False g0 (RIImm src) dst)
1345     in
1346         return (Any IntRep code)
1347
1348 getRegister leaf
1349   | isJust imm
1350   = let
1351         code dst = toOL [
1352             SETHI (HI imm__2) dst,
1353             OR False dst (RIImm (LO imm__2)) dst]
1354     in
1355         return (Any PtrRep code)
1356   | otherwise
1357   = ncgPrimopMoan "getRegister(sparc)" (pprCmmExpr leaf)
1358   where
1359     imm = maybeImm leaf
1360     imm__2 = case imm of Just x -> x
1361
1362 #endif /* sparc_TARGET_ARCH */
1363
1364 #if powerpc_TARGET_ARCH
1365 getRegister (CmmLoad mem pk)
1366   | pk /= I64
1367   = do
1368         Amode addr addr_code <- getAmode mem
1369         let code dst = ASSERT((regClass dst == RcDouble) == isFloatingRep pk)
1370                        addr_code `snocOL` LD pk dst addr
1371         return (Any pk code)
1372
1373 -- catch simple cases of zero- or sign-extended load
1374 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad mem _]) = do
1375     Amode addr addr_code <- getAmode mem
1376     return (Any I32 (\dst -> addr_code `snocOL` LD I8 dst addr))
1377
1378 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
1379
1380 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad mem _]) = do
1381     Amode addr addr_code <- getAmode mem
1382     return (Any I32 (\dst -> addr_code `snocOL` LD I16 dst addr))
1383
1384 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad mem _]) = do
1385     Amode addr addr_code <- getAmode mem
1386     return (Any I32 (\dst -> addr_code `snocOL` LA I16 dst addr))
1387
1388 getRegister (CmmMachOp mop [x]) -- unary MachOps
1389   = case mop of
1390       MO_Not rep   -> trivialUCode rep NOT x
1391
1392       MO_S_Conv F64 F32 -> trivialUCode F32 FRSP x
1393       MO_S_Conv F32 F64 -> conversionNop F64 x
1394
1395       MO_S_Conv from to
1396         | from == to         -> conversionNop to x
1397         | isFloatingRep from -> coerceFP2Int from to x
1398         | isFloatingRep to   -> coerceInt2FP from to x
1399
1400         -- narrowing is a nop: we treat the high bits as undefined
1401       MO_S_Conv I32 to -> conversionNop to x
1402       MO_S_Conv I16 I8 -> conversionNop I8 x
1403       MO_S_Conv I8 to -> trivialUCode to (EXTS I8) x
1404       MO_S_Conv I16 to -> trivialUCode to (EXTS I16) x
1405
1406       MO_U_Conv from to
1407         | from == to -> conversionNop to x
1408         -- narrowing is a nop: we treat the high bits as undefined
1409       MO_U_Conv I32 to -> conversionNop to x
1410       MO_U_Conv I16 I8 -> conversionNop I8 x
1411       MO_U_Conv I8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 I32))
1412       MO_U_Conv I16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 I32))
1413
1414       MO_S_Neg F32      -> trivialUCode F32 FNEG x
1415       MO_S_Neg F64      -> trivialUCode F64 FNEG x
1416       MO_S_Neg rep      -> trivialUCode rep NEG x
1417
1418     where
1419         conversionNop new_rep expr
1420             = do e_code <- getRegister expr
1421                  return (swizzleRegisterRep e_code new_rep)
1422
1423 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1424   = case mop of
1425       MO_Eq F32 -> condFltReg EQQ x y
1426       MO_Ne F32 -> condFltReg NE  x y
1427
1428       MO_S_Gt F32 -> condFltReg GTT x y
1429       MO_S_Ge F32 -> condFltReg GE  x y
1430       MO_S_Lt F32 -> condFltReg LTT x y
1431       MO_S_Le F32 -> condFltReg LE  x y
1432
1433       MO_Eq F64 -> condFltReg EQQ x y
1434       MO_Ne F64 -> condFltReg NE  x y
1435
1436       MO_S_Gt F64 -> condFltReg GTT x y
1437       MO_S_Ge F64 -> condFltReg GE  x y
1438       MO_S_Lt F64 -> condFltReg LTT x y
1439       MO_S_Le F64 -> condFltReg LE  x y
1440
1441       MO_Eq rep -> condIntReg EQQ  (extendUExpr rep x) (extendUExpr rep y)
1442       MO_Ne rep -> condIntReg NE   (extendUExpr rep x) (extendUExpr rep y)
1443
1444       MO_S_Gt rep -> condIntReg GTT  (extendSExpr rep x) (extendSExpr rep y)
1445       MO_S_Ge rep -> condIntReg GE   (extendSExpr rep x) (extendSExpr rep y)
1446       MO_S_Lt rep -> condIntReg LTT  (extendSExpr rep x) (extendSExpr rep y)
1447       MO_S_Le rep -> condIntReg LE   (extendSExpr rep x) (extendSExpr rep y)
1448
1449       MO_U_Gt rep -> condIntReg GU   (extendUExpr rep x) (extendUExpr rep y)
1450       MO_U_Ge rep -> condIntReg GEU  (extendUExpr rep x) (extendUExpr rep y)
1451       MO_U_Lt rep -> condIntReg LU   (extendUExpr rep x) (extendUExpr rep y)
1452       MO_U_Le rep -> condIntReg LEU  (extendUExpr rep x) (extendUExpr rep y)
1453
1454       MO_Add F32   -> trivialCodeNoImm F32 (FADD F32) x y
1455       MO_Sub F32   -> trivialCodeNoImm F32 (FSUB F32) x y
1456       MO_Mul F32   -> trivialCodeNoImm F32 (FMUL F32) x y
1457       MO_S_Quot F32   -> trivialCodeNoImm F32 (FDIV F32) x y
1458
1459       MO_Add F64   -> trivialCodeNoImm F64 (FADD F64) x y
1460       MO_Sub F64   -> trivialCodeNoImm F64 (FSUB F64) x y
1461       MO_Mul F64   -> trivialCodeNoImm F64 (FMUL F64) x y
1462       MO_S_Quot F64   -> trivialCodeNoImm F64 (FDIV F64) x y
1463
1464       MO_Add rep -> trivialCode rep True ADD x y
1465       MO_Sub rep ->
1466         case y of    -- subfi ('substract from' with immediate) doesn't exist
1467           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
1468             -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
1469           _ -> trivialCodeNoImm rep SUBF y x
1470
1471       MO_Mul rep -> trivialCode rep True MULLW x y
1472
1473       MO_S_MulMayOflo I32 -> trivialCodeNoImm I32 MULLW_MayOflo x y
1474
1475       MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= I32): not implemented"
1476       MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
1477
1478       MO_S_Quot rep -> trivialCodeNoImm rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1479       MO_U_Quot rep -> trivialCodeNoImm rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1480
1481       MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1482       MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1483
1484       MO_And rep   -> trivialCode rep False AND x y
1485       MO_Or rep    -> trivialCode rep False OR x y
1486       MO_Xor rep   -> trivialCode rep False XOR x y
1487
1488       MO_Shl rep   -> trivialCode rep False SLW x y
1489       MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
1490       MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
1491
1492 getRegister (CmmLit (CmmInt i rep))
1493   | Just imm <- makeImmediate rep True i
1494   = let
1495         code dst = unitOL (LI dst imm)
1496     in
1497         return (Any rep code)
1498
1499 getRegister (CmmLit (CmmFloat f F32)) = do
1500     lbl <- getNewLabelNat
1501     tmp <- getNewRegNat I32
1502     let code dst = toOL [
1503             LDATA ReadOnlyData  [CmmDataLabel lbl,
1504                                  CmmStaticLit (CmmFloat f F32)],
1505             LIS tmp (HA (ImmCLbl lbl)),
1506             LD F32 dst (AddrRegImm tmp (LO (ImmCLbl lbl)))
1507             ]
1508     -- in
1509     return (Any F32 code)
1510
1511 getRegister (CmmLit (CmmFloat d F64)) = do
1512     lbl <- getNewLabelNat
1513     tmp <- getNewRegNat I32
1514     let code dst = toOL [
1515             LDATA ReadOnlyData  [CmmDataLabel lbl,
1516                                  CmmStaticLit (CmmFloat d F64)],
1517             LIS tmp (HA (ImmCLbl lbl)),
1518             LD F64 dst (AddrRegImm tmp (LO (ImmCLbl lbl)))
1519             ]
1520     -- in
1521     return (Any F32 code)
1522
1523 #if darwin_TARGET_OS
1524 getRegister (CmmLit (CmmLabel lbl))
1525     | labelCouldBeDynamic lbl
1526     = do
1527         addImportNat False lbl
1528         let imm = ImmDyldNonLazyPtr lbl
1529             code dst = toOL [
1530                     LIS dst (HA imm),
1531                     LD  I32 dst (AddrRegImm dst (LO imm))
1532                 ]
1533         return (Any I32 code)
1534 #endif
1535
1536 getRegister (CmmLit lit)
1537   = let
1538         rep = cmmLitRep lit
1539         imm = litToImm lit
1540         code dst = toOL [
1541                 LIS dst (HI imm),
1542                 OR dst dst (RIImm (LO imm))
1543             ]
1544     in
1545         return (Any rep code)
1546 getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
1547
1548     -- extend?Rep: wrap integer expression of type rep
1549     -- in a conversion to I32
1550 extendSExpr I32 x = x
1551 extendSExpr rep x = CmmMachOp (MO_S_Conv rep I32) [x]
1552 extendUExpr I32 x = x
1553 extendUExpr rep x = CmmMachOp (MO_U_Conv rep I32) [x]
1554
1555 -- ###FIXME: exact code duplication from x86 case
1556 -- The dual to getAnyReg: compute an expression into a register, but
1557 -- we don't mind which one it is.
1558 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
1559 getSomeReg expr = do
1560   r <- getRegister expr
1561   case r of
1562     Any rep code -> do
1563         tmp <- getNewRegNat rep
1564         return (tmp, code tmp)
1565     Fixed _ reg code ->
1566         return (reg, code)
1567
1568 #endif /* powerpc_TARGET_ARCH */
1569
1570
1571 -- -----------------------------------------------------------------------------
1572 --  The 'Amode' type: Memory addressing modes passed up the tree.
1573
1574 data Amode = Amode AddrMode InstrBlock
1575
1576 {-
1577 Now, given a tree (the argument to an CmmLoad) that references memory,
1578 produce a suitable addressing mode.
1579
1580 A Rule of the Game (tm) for Amodes: use of the addr bit must
1581 immediately follow use of the code part, since the code part puts
1582 values in registers which the addr then refers to.  So you can't put
1583 anything in between, lest it overwrite some of those registers.  If
1584 you need to do some other computation between the code part and use of
1585 the addr bit, first store the effective address from the amode in a
1586 temporary, then do the other computation, and then use the temporary:
1587
1588     code
1589     LEA amode, tmp
1590     ... other computation ...
1591     ... (tmp) ...
1592 -}
1593
1594 getAmode :: CmmExpr -> NatM Amode
1595 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
1596
1597 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1598
1599 #if alpha_TARGET_ARCH
1600
1601 getAmode (StPrim IntSubOp [x, StInt i])
1602   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1603     getRegister x               `thenNat` \ register ->
1604     let
1605         code = registerCode register tmp
1606         reg  = registerName register tmp
1607         off  = ImmInt (-(fromInteger i))
1608     in
1609     return (Amode (AddrRegImm reg off) code)
1610
1611 getAmode (StPrim IntAddOp [x, StInt i])
1612   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1613     getRegister x               `thenNat` \ register ->
1614     let
1615         code = registerCode register tmp
1616         reg  = registerName register tmp
1617         off  = ImmInt (fromInteger i)
1618     in
1619     return (Amode (AddrRegImm reg off) code)
1620
1621 getAmode leaf
1622   | isJust imm
1623   = return (Amode (AddrImm imm__2) id)
1624   where
1625     imm = maybeImm leaf
1626     imm__2 = case imm of Just x -> x
1627
1628 getAmode other
1629   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1630     getRegister other           `thenNat` \ register ->
1631     let
1632         code = registerCode register tmp
1633         reg  = registerName register tmp
1634     in
1635     return (Amode (AddrReg reg) code)
1636
1637 #endif /* alpha_TARGET_ARCH */
1638
1639 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1640
1641 #if i386_TARGET_ARCH
1642
1643 -- This is all just ridiculous, since it carefully undoes
1644 -- what mangleIndexTree has just done.
1645 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)])
1646   -- ASSERT(rep == I32)???
1647   = do (x_reg, x_code) <- getSomeReg x
1648        let off = ImmInt (-(fromInteger i))
1649        return (Amode (AddrBaseIndex (Just x_reg) Nothing off) x_code)
1650
1651 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)])
1652   -- ASSERT(rep == I32)???
1653   = do (x_reg, x_code) <- getSomeReg x
1654        let off = ImmInt (fromInteger i)
1655        return (Amode (AddrBaseIndex (Just x_reg) Nothing off) x_code)
1656
1657 -- Turn (lit1 << n  + lit2) into  (lit2 + lit1 << n) so it will be
1658 -- recognised by the next rule.
1659 getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1660                                   b@(CmmLit _)])
1661   = getAmode (CmmMachOp (MO_Add rep) [b,a])
1662
1663 getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
1664                                         [y, CmmLit (CmmInt shift _)]])
1665   | shift == 0 || shift == 1 || shift == 2 || shift == 3
1666   = do (x_reg, x_code) <- getNonClobberedReg x
1667         -- x must be in a temp, because it has to stay live over y_code
1668         -- we could compre x_reg and y_reg and do something better here...
1669        (y_reg, y_code) <- getSomeReg y
1670        let
1671            code = x_code `appOL` y_code
1672            base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
1673        return (Amode (AddrBaseIndex (Just x_reg) (Just (y_reg,base)) (ImmInt 0))
1674                code)
1675
1676 getAmode (CmmLit lit)
1677   = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1678
1679 getAmode expr = do
1680   (reg,code) <- getSomeReg expr
1681   return (Amode (AddrBaseIndex (Just reg) Nothing (ImmInt 0)) code)
1682
1683 #endif /* i386_TARGET_ARCH */
1684
1685 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1686
1687 #if sparc_TARGET_ARCH
1688
1689 getAmode (CmmMachOp MO_Nat_Sub [x, StInt i])
1690   | fits13Bits (-i)
1691   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1692     getRegister x               `thenNat` \ register ->
1693     let
1694         code = registerCode register tmp
1695         reg  = registerName register tmp
1696         off  = ImmInt (-(fromInteger i))
1697     in
1698     return (Amode (AddrRegImm reg off) code)
1699
1700
1701 getAmode (CmmMachOp MO_Nat_Add [x, StInt i])
1702   | fits13Bits i
1703   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1704     getRegister x               `thenNat` \ register ->
1705     let
1706         code = registerCode register tmp
1707         reg  = registerName register tmp
1708         off  = ImmInt (fromInteger i)
1709     in
1710     return (Amode (AddrRegImm reg off) code)
1711
1712 getAmode (CmmMachOp MO_Nat_Add [x, y])
1713   = getNewRegNat PtrRep         `thenNat` \ tmp1 ->
1714     getNewRegNat IntRep         `thenNat` \ tmp2 ->
1715     getRegister x               `thenNat` \ register1 ->
1716     getRegister y               `thenNat` \ register2 ->
1717     let
1718         code1 = registerCode register1 tmp1
1719         reg1  = registerName register1 tmp1
1720         code2 = registerCode register2 tmp2
1721         reg2  = registerName register2 tmp2
1722         code__2 = code1 `appOL` code2
1723     in
1724     return (Amode (AddrRegReg reg1 reg2) code__2)
1725
1726 getAmode leaf
1727   | isJust imm
1728   = getNewRegNat PtrRep             `thenNat` \ tmp ->
1729     let
1730         code = unitOL (SETHI (HI imm__2) tmp)
1731     in
1732     return (Amode (AddrRegImm tmp (LO imm__2)) code)
1733   where
1734     imm    = maybeImm leaf
1735     imm__2 = case imm of Just x -> x
1736
1737 getAmode other
1738   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1739     getRegister other           `thenNat` \ register ->
1740     let
1741         code = registerCode register tmp
1742         reg  = registerName register tmp
1743         off  = ImmInt 0
1744     in
1745     return (Amode (AddrRegImm reg off) code)
1746
1747 #endif /* sparc_TARGET_ARCH */
1748
1749 #ifdef powerpc_TARGET_ARCH
1750 getAmode (CmmMachOp (MO_Sub I32) [x, CmmLit (CmmInt i _)])
1751   | Just off <- makeImmediate I32 True (-i)
1752   = do
1753         (reg, code) <- getSomeReg x
1754         return (Amode (AddrRegImm reg off) code)
1755
1756
1757 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit (CmmInt i _)])
1758   | Just off <- makeImmediate I32 True i
1759   = do
1760         (reg, code) <- getSomeReg x
1761         return (Amode (AddrRegImm reg off) code)
1762
1763 getAmode (CmmLit lit)
1764   = do
1765         tmp <- getNewRegNat I32
1766         let
1767             code = unitOL (LIS tmp (HA imm))
1768         return (Amode (AddrRegImm tmp (LO imm)) code)
1769     where
1770         imm = litToImm lit
1771
1772 getAmode (CmmMachOp (MO_Add I32) [x, y])
1773   = do
1774         (regX, codeX) <- getSomeReg x
1775         (regY, codeY) <- getSomeReg y
1776         return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
1777
1778 getAmode other
1779   = do
1780         (reg, code) <- getSomeReg other
1781         let
1782             off  = ImmInt 0
1783         return (Amode (AddrRegImm reg off) code)
1784 #endif /* powerpc_TARGET_ARCH */
1785
1786 -- -----------------------------------------------------------------------------
1787 -- getOperand: sometimes any operand will do.
1788
1789 -- getOperand gets a *safe* operand; that is, the value of the operand
1790 -- will remain valid across the computation of an arbitrary expression,
1791 -- unless the expression is computed directly into a register which
1792 -- the operand refers to (see trivialCode where this function is used
1793 -- for an example).
1794
1795 #ifdef i386_TARGET_ARCH
1796
1797 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1798 getOperand (CmmLoad mem pk)
1799   | not (isFloatingRep pk) && pk /= I64 = do
1800     Amode src mem_code <- getAmode mem
1801     (src',save_code) <-
1802         if (amodeCouldBeClobbered src)
1803                 then do
1804                    tmp <- getNewRegNat wordRep
1805                    return (AddrBaseIndex (Just tmp) Nothing (ImmInt 0),
1806                            unitOL (LEA I32 (OpAddr src) (OpReg tmp)))
1807                 else
1808                    return (src, nilOL)
1809     return (OpAddr src', save_code `appOL` mem_code)
1810
1811 getOperand e = do
1812     (reg, code) <- getNonClobberedReg e
1813     return (OpReg reg, code)
1814
1815 amodeCouldBeClobbered :: AddrMode -> Bool
1816 amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
1817
1818 regClobbered (RealReg rr) = isFastTrue (freeReg rr)
1819 regClobbered _ = False
1820
1821 #endif
1822
1823 -- -----------------------------------------------------------------------------
1824 --  The 'CondCode' type:  Condition codes passed up the tree.
1825
1826 data CondCode = CondCode Bool Cond InstrBlock
1827
1828 -- Set up a condition code for a conditional branch.
1829
1830 getCondCode :: CmmExpr -> NatM CondCode
1831
1832 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1833
1834 #if alpha_TARGET_ARCH
1835 getCondCode = panic "MachCode.getCondCode: not on Alphas"
1836 #endif /* alpha_TARGET_ARCH */
1837
1838 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1839
1840 #if i386_TARGET_ARCH || sparc_TARGET_ARCH
1841 -- yes, they really do seem to want exactly the same!
1842
1843 getCondCode (CmmMachOp mop [x, y])
1844   = ASSERT (cmmExprRep x /= I8) -- tmp, not set up to handle 8-bit comparisons
1845     case mop of
1846       MO_Eq F32 -> condFltCode EQQ x y
1847       MO_Ne F32 -> condFltCode NE  x y
1848
1849       MO_S_Gt F32 -> condFltCode GTT x y
1850       MO_S_Ge F32 -> condFltCode GE  x y
1851       MO_S_Lt F32 -> condFltCode LTT x y
1852       MO_S_Le F32 -> condFltCode LE  x y
1853
1854       MO_Eq F64 -> condFltCode EQQ x y
1855       MO_Ne F64 -> condFltCode NE  x y
1856
1857       MO_S_Gt F64 -> condFltCode GTT x y
1858       MO_S_Ge F64 -> condFltCode GE  x y
1859       MO_S_Lt F64 -> condFltCode LTT x y
1860       MO_S_Le F64 -> condFltCode LE  x y
1861
1862       MO_Eq rep -> condIntCode EQQ  x y
1863       MO_Ne rep -> condIntCode NE   x y
1864
1865       MO_S_Gt rep -> condIntCode GTT  x y
1866       MO_S_Ge rep -> condIntCode GE   x y
1867       MO_S_Lt rep -> condIntCode LTT  x y
1868       MO_S_Le rep -> condIntCode LE   x y
1869
1870       MO_U_Gt rep -> condIntCode GU   x y
1871       MO_U_Ge rep -> condIntCode GEU  x y
1872       MO_U_Lt rep -> condIntCode LU   x y
1873       MO_U_Le rep -> condIntCode LEU  x y
1874
1875       other -> pprPanic "getCondCode(x86,sparc)" (pprMachOp mop)
1876
1877 getCondCode other =  pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
1878
1879 #elif powerpc_TARGET_ARCH
1880
1881 -- almost the same as everywhere else - but we need to
1882 -- extend small integers to 32 bit first
1883
1884 getCondCode (CmmMachOp mop [x, y])
1885   = case mop of
1886       MO_Eq F32 -> condFltCode EQQ x y
1887       MO_Ne F32 -> condFltCode NE  x y
1888
1889       MO_S_Gt F32 -> condFltCode GTT x y
1890       MO_S_Ge F32 -> condFltCode GE  x y
1891       MO_S_Lt F32 -> condFltCode LTT x y
1892       MO_S_Le F32 -> condFltCode LE  x y
1893
1894       MO_Eq F64 -> condFltCode EQQ x y
1895       MO_Ne F64 -> condFltCode NE  x y
1896
1897       MO_S_Gt F64 -> condFltCode GTT x y
1898       MO_S_Ge F64 -> condFltCode GE  x y
1899       MO_S_Lt F64 -> condFltCode LTT x y
1900       MO_S_Le F64 -> condFltCode LE  x y
1901
1902       MO_Eq rep -> condIntCode EQQ  (extendUExpr rep x) (extendUExpr rep y)
1903       MO_Ne rep -> condIntCode NE   (extendUExpr rep x) (extendUExpr rep y)
1904
1905       MO_S_Gt rep -> condIntCode GTT  (extendSExpr rep x) (extendSExpr rep y)
1906       MO_S_Ge rep -> condIntCode GE   (extendSExpr rep x) (extendSExpr rep y)
1907       MO_S_Lt rep -> condIntCode LTT  (extendSExpr rep x) (extendSExpr rep y)
1908       MO_S_Le rep -> condIntCode LE   (extendSExpr rep x) (extendSExpr rep y)
1909
1910       MO_U_Gt rep -> condIntCode GU   (extendUExpr rep x) (extendUExpr rep y)
1911       MO_U_Ge rep -> condIntCode GEU  (extendUExpr rep x) (extendUExpr rep y)
1912       MO_U_Lt rep -> condIntCode LU   (extendUExpr rep x) (extendUExpr rep y)
1913       MO_U_Le rep -> condIntCode LEU  (extendUExpr rep x) (extendUExpr rep y)
1914
1915       other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
1916
1917 getCondCode other =  panic "getCondCode(2)(powerpc)"
1918
1919
1920 #endif
1921
1922
1923 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
1924 -- passed back up the tree.
1925
1926 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
1927
1928 #if alpha_TARGET_ARCH
1929 condIntCode = panic "MachCode.condIntCode: not on Alphas"
1930 condFltCode = panic "MachCode.condFltCode: not on Alphas"
1931 #endif /* alpha_TARGET_ARCH */
1932
1933 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1934 #if i386_TARGET_ARCH
1935
1936 -- memory vs immediate
1937 condIntCode cond (CmmLoad x pk) (CmmLit lit) = do
1938     Amode x_addr x_code <- getAmode x
1939     let
1940         imm  = litToImm lit
1941         code = x_code `snocOL`
1942                   CMP pk (OpImm imm) (OpAddr x_addr)
1943     --
1944     return (CondCode False cond code)
1945
1946 -- anything vs zero
1947 condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
1948     (x_reg, x_code) <- getSomeReg x
1949     let
1950         code = x_code `snocOL`
1951                   TEST pk (OpReg x_reg) (OpReg x_reg)
1952     --
1953     return (CondCode False cond code)
1954
1955 -- anything vs immediate
1956 condIntCode cond x (CmmLit lit) = do
1957     (x_reg, x_code) <- getSomeReg x
1958     let
1959         imm  = litToImm lit
1960         code = x_code `snocOL`
1961                   CMP (cmmLitRep lit) (OpImm imm) (OpReg x_reg)
1962     -- in
1963     return (CondCode False cond code)
1964
1965 -- memory vs anything
1966 condIntCode cond (CmmLoad x pk) y = do
1967     (y_reg, y_code) <- getNonClobberedReg y
1968     Amode x_addr x_code <- getAmode x
1969     let
1970         code = y_code `appOL`
1971                x_code `snocOL`
1972                   CMP pk (OpReg y_reg) (OpAddr x_addr)
1973     -- in
1974     return (CondCode False cond code)
1975
1976 -- anything vs memory
1977 condIntCode cond y (CmmLoad x pk) = do
1978     (y_reg, y_code) <- getNonClobberedReg y
1979     Amode x_addr x_code <- getAmode x
1980     let
1981         code = y_code `appOL`
1982                x_code `snocOL`
1983                   CMP pk (OpAddr x_addr) (OpReg y_reg)
1984     -- in
1985     return (CondCode False cond code)
1986
1987 -- anything vs anything
1988 condIntCode cond x y = do
1989   (x_op, x_code) <- getOperand x
1990   (y_reg, y_code) <- getSomeReg y
1991   let
1992         code = x_code `appOL`
1993                y_code `snocOL`
1994                   CMP (cmmExprRep x) (OpReg y_reg) x_op
1995   -- in
1996   return (CondCode False cond code)
1997
1998 -----------
1999 condFltCode cond x y
2000   = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
2001   (x_reg, x_code) <- getNonClobberedReg x
2002   (y_reg, y_code) <- getSomeReg y
2003   let
2004         code = x_code `appOL` y_code `snocOL`
2005                 GCMP cond x_reg y_reg
2006   -- The GCMP insn does the test and sets the zero flag if comparable
2007   -- and true.  Hence we always supply EQQ as the condition to test.
2008   return (CondCode True EQQ code)
2009
2010 #endif /* i386_TARGET_ARCH */
2011
2012 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2013
2014 #if sparc_TARGET_ARCH
2015
2016 condIntCode cond x (StInt y)
2017   | fits13Bits y
2018   = getRegister x               `thenNat` \ register ->
2019     getNewRegNat IntRep         `thenNat` \ tmp ->
2020     let
2021         code = registerCode register tmp
2022         src1 = registerName register tmp
2023         src2 = ImmInt (fromInteger y)
2024         code__2 = code `snocOL` SUB False True src1 (RIImm src2) g0
2025     in
2026     return (CondCode False cond code__2)
2027
2028 condIntCode cond x y
2029   = getRegister x               `thenNat` \ register1 ->
2030     getRegister y               `thenNat` \ register2 ->
2031     getNewRegNat IntRep         `thenNat` \ tmp1 ->
2032     getNewRegNat IntRep         `thenNat` \ tmp2 ->
2033     let
2034         code1 = registerCode register1 tmp1
2035         src1  = registerName register1 tmp1
2036         code2 = registerCode register2 tmp2
2037         src2  = registerName register2 tmp2
2038         code__2 = code1 `appOL` code2 `snocOL`
2039                   SUB False True src1 (RIReg src2) g0
2040     in
2041     return (CondCode False cond code__2)
2042
2043 -----------
2044 condFltCode cond x y
2045   = getRegister x               `thenNat` \ register1 ->
2046     getRegister y               `thenNat` \ register2 ->
2047     getNewRegNat (registerRep register1)
2048                                 `thenNat` \ tmp1 ->
2049     getNewRegNat (registerRep register2)
2050                                 `thenNat` \ tmp2 ->
2051     getNewRegNat F64    `thenNat` \ tmp ->
2052     let
2053         promote x = FxTOy F DF x tmp
2054
2055         pk1   = registerRep register1
2056         code1 = registerCode register1 tmp1
2057         src1  = registerName register1 tmp1
2058
2059         pk2   = registerRep register2
2060         code2 = registerCode register2 tmp2
2061         src2  = registerName register2 tmp2
2062
2063         code__2 =
2064                 if pk1 == pk2 then
2065                     code1 `appOL` code2 `snocOL`
2066                     FCMP True (primRepToSize pk1) src1 src2
2067                 else if pk1 == F32 then
2068                     code1 `snocOL` promote src1 `appOL` code2 `snocOL`
2069                     FCMP True DF tmp src2
2070                 else
2071                     code1 `appOL` code2 `snocOL` promote src2 `snocOL`
2072                     FCMP True DF src1 tmp
2073     in
2074     return (CondCode True cond code__2)
2075
2076 #endif /* sparc_TARGET_ARCH */
2077
2078 #if powerpc_TARGET_ARCH
2079 -- ###FIXME: I16 and I8!
2080 condIntCode cond x (CmmLit (CmmInt y rep))
2081   | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
2082   = do
2083         (src1, code) <- getSomeReg x
2084         let
2085             code' = code `snocOL`
2086                 (if condUnsigned cond then CMPL else CMP) I32 src1 (RIImm src2)
2087         return (CondCode False cond code')
2088
2089 condIntCode cond x y = do
2090     (src1, code1) <- getSomeReg x
2091     (src2, code2) <- getSomeReg y
2092     let
2093         code' = code1 `appOL` code2 `snocOL`
2094                   (if condUnsigned cond then CMPL else CMP) I32 src1 (RIReg src2)
2095     return (CondCode False cond code')
2096
2097 condFltCode cond x y = do
2098     (src1, code1) <- getSomeReg x
2099     (src2, code2) <- getSomeReg y
2100     let
2101         code'  = code1 `appOL` code2 `snocOL` FCMP src1 src2
2102         code'' = case cond of -- twiddle CR to handle unordered case
2103                     GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
2104                     LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
2105                     _ -> code'
2106                  where
2107                     ltbit = 0 ; eqbit = 2 ; gtbit = 1
2108     return (CondCode True cond code'')
2109
2110 #endif /* powerpc_TARGET_ARCH */
2111
2112 -- -----------------------------------------------------------------------------
2113 -- Generating assignments
2114
2115 -- Assignments are really at the heart of the whole code generation
2116 -- business.  Almost all top-level nodes of any real importance are
2117 -- assignments, which correspond to loads, stores, or register
2118 -- transfers.  If we're really lucky, some of the register transfers
2119 -- will go away, because we can use the destination register to
2120 -- complete the code generation for the right hand side.  This only
2121 -- fails when the right hand side is forced into a fixed register
2122 -- (e.g. the result of a call).
2123
2124 assignMem_IntCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2125 assignReg_IntCode :: MachRep -> CmmReg  -> CmmExpr -> NatM InstrBlock
2126
2127 assignMem_FltCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2128 assignReg_FltCode :: MachRep -> CmmReg  -> CmmExpr -> NatM InstrBlock
2129
2130 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2131
2132 #if alpha_TARGET_ARCH
2133
2134 assignIntCode pk (CmmLoad dst _) src
2135   = getNewRegNat IntRep             `thenNat` \ tmp ->
2136     getAmode dst                    `thenNat` \ amode ->
2137     getRegister src                 `thenNat` \ register ->
2138     let
2139         code1   = amodeCode amode []
2140         dst__2  = amodeAddr amode
2141         code2   = registerCode register tmp []
2142         src__2  = registerName register tmp
2143         sz      = primRepToSize pk
2144         code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2145     in
2146     return code__2
2147
2148 assignIntCode pk dst src
2149   = getRegister dst                         `thenNat` \ register1 ->
2150     getRegister src                         `thenNat` \ register2 ->
2151     let
2152         dst__2  = registerName register1 zeroh
2153         code    = registerCode register2 dst__2
2154         src__2  = registerName register2 dst__2
2155         code__2 = if isFixed register2
2156                   then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2)
2157                   else code
2158     in
2159     return code__2
2160
2161 #endif /* alpha_TARGET_ARCH */
2162
2163 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2164
2165 #if i386_TARGET_ARCH
2166
2167 -- integer assignment to memory
2168 assignMem_IntCode pk addr src = do
2169     Amode addr code_addr <- getAmode addr
2170     (code_src, op_src)   <- get_op_RI src
2171     let
2172         code = code_src `appOL`
2173                code_addr `snocOL`
2174                   MOV pk op_src (OpAddr addr)
2175         -- NOTE: op_src is stable, so it will still be valid
2176         -- after code_addr.  This may involve the introduction
2177         -- of an extra MOV to a temporary register, but we hope
2178         -- the register allocator will get rid of it.
2179     --
2180     return code
2181   where
2182     get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand)   -- code, operator
2183     get_op_RI (CmmLit lit)
2184       = return (nilOL, OpImm (litToImm lit))
2185     get_op_RI op
2186       = do (reg,code) <- getNonClobberedReg op
2187            return (code, OpReg reg)
2188
2189
2190 -- Assign; dst is a reg, rhs is mem
2191 assignReg_IntCode pk reg (CmmLoad src _) = do
2192   load_code <- intLoadCode (MOV pk) src
2193   return (load_code (getRegisterReg reg))
2194
2195 -- dst is a reg, but src could be anything
2196 assignReg_IntCode pk reg src = do
2197   code <- getAnyReg src
2198   return (code (getRegisterReg reg))
2199
2200 #endif /* i386_TARGET_ARCH */
2201
2202 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2203
2204 #if sparc_TARGET_ARCH
2205
2206 assignMem_IntCode pk addr src
2207   = getNewRegNat IntRep                     `thenNat` \ tmp ->
2208     getAmode addr                           `thenNat` \ amode ->
2209     getRegister src                         `thenNat` \ register ->
2210     let
2211         code1   = amodeCode amode
2212         dst__2  = amodeAddr amode
2213         code2   = registerCode register tmp
2214         src__2  = registerName register tmp
2215         sz      = primRepToSize pk
2216         code__2 = code1 `appOL` code2 `snocOL` ST sz src__2 dst__2
2217     in
2218     return code__2
2219
2220 assignReg_IntCode pk reg src
2221   = getRegister src                         `thenNat` \ register2 ->
2222     getRegisterReg reg                      `thenNat` \ register1 ->
2223     getNewRegNat IntRep                     `thenNat` \ tmp ->
2224     let
2225         dst__2  = registerName register1 tmp
2226         code    = registerCode register2 dst__2
2227         src__2  = registerName register2 dst__2
2228         code__2 = if isFixed register2
2229                   then code `snocOL` OR False g0 (RIReg src__2) dst__2
2230                   else code
2231     in
2232     return code__2
2233
2234 #endif /* sparc_TARGET_ARCH */
2235
2236 #if powerpc_TARGET_ARCH
2237
2238 assignMem_IntCode pk addr src = do
2239     (srcReg, code) <- getSomeReg src
2240     Amode dstAddr addr_code <- getAmode addr
2241     return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2242
2243 -- dst is a reg, but src could be anything
2244 assignReg_IntCode pk reg src
2245     = do
2246         r <- getRegister src
2247         return $ case r of
2248             Any _ code         -> code dst
2249             Fixed _ freg fcode -> fcode `snocOL` MR dst freg
2250     where
2251         dst = getRegisterReg reg
2252
2253 #endif /* powerpc_TARGET_ARCH */
2254
2255
2256 -- -----------------------------------------------------------------------------
2257 -- Floating-point assignments
2258
2259 #if alpha_TARGET_ARCH
2260
2261 assignFltCode pk (CmmLoad dst _) src
2262   = getNewRegNat pk                 `thenNat` \ tmp ->
2263     getAmode dst                    `thenNat` \ amode ->
2264     getRegister src                         `thenNat` \ register ->
2265     let
2266         code1   = amodeCode amode []
2267         dst__2  = amodeAddr amode
2268         code2   = registerCode register tmp []
2269         src__2  = registerName register tmp
2270         sz      = primRepToSize pk
2271         code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2272     in
2273     return code__2
2274
2275 assignFltCode pk dst src
2276   = getRegister dst                         `thenNat` \ register1 ->
2277     getRegister src                         `thenNat` \ register2 ->
2278     let
2279         dst__2  = registerName register1 zeroh
2280         code    = registerCode register2 dst__2
2281         src__2  = registerName register2 dst__2
2282         code__2 = if isFixed register2
2283                   then code . mkSeqInstr (FMOV src__2 dst__2)
2284                   else code
2285     in
2286     return code__2
2287
2288 #endif /* alpha_TARGET_ARCH */
2289
2290 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2291
2292 #if i386_TARGET_ARCH
2293
2294 -- Floating point assignment to memory
2295 assignMem_FltCode pk addr src = do
2296   (src_reg, src_code) <- getNonClobberedReg src
2297   Amode addr addr_code <- getAmode addr
2298   let
2299         code = src_code `appOL`
2300                addr_code `snocOL`
2301                 GST pk src_reg addr
2302   return code
2303
2304 -- Floating point assignment to a register/temporary
2305 assignReg_FltCode pk reg src = do
2306   src_code <- getAnyReg src
2307   return (src_code (getRegisterReg reg))
2308
2309 #endif /* i386_TARGET_ARCH */
2310
2311 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2312
2313 #if sparc_TARGET_ARCH
2314
2315 -- Floating point assignment to memory
2316 assignMem_FltCode pk addr src
2317   = getNewRegNat pk                 `thenNat` \ tmp1 ->
2318     getAmode addr                   `thenNat` \ amode ->
2319     getRegister src                 `thenNat` \ register ->
2320     let
2321         sz      = primRepToSize pk
2322         dst__2  = amodeAddr amode
2323
2324         code1   = amodeCode amode
2325         code2   = registerCode register tmp1
2326
2327         src__2  = registerName register tmp1
2328         pk__2   = registerRep register
2329         sz__2   = primRepToSize pk__2
2330
2331         code__2 = code1 `appOL` code2 `appOL`
2332             if   pk == pk__2
2333             then unitOL (ST sz src__2 dst__2)
2334             else toOL [FxTOy sz__2 sz src__2 tmp1, ST sz tmp1 dst__2]
2335     in
2336     return code__2
2337
2338 -- Floating point assignment to a register/temporary
2339 -- Why is this so bizarrely ugly?
2340 assignReg_FltCode pk reg src
2341   = getRegisterReg reg                      `thenNat` \ register1 ->
2342     getRegister src                         `thenNat` \ register2 ->
2343     let
2344         pk__2   = registerRep register2
2345         sz__2   = primRepToSize pk__2
2346     in
2347     getNewRegNat pk__2                      `thenNat` \ tmp ->
2348     let
2349         sz      = primRepToSize pk
2350         dst__2  = registerName register1 g0    -- must be Fixed
2351         reg__2  = if pk /= pk__2 then tmp else dst__2
2352         code    = registerCode register2 reg__2
2353         src__2  = registerName register2 reg__2
2354         code__2 =
2355                 if pk /= pk__2 then
2356                      code `snocOL` FxTOy sz__2 sz src__2 dst__2
2357                 else if isFixed register2 then
2358                      code `snocOL` FMOV sz src__2 dst__2
2359                 else
2360                      code
2361     in
2362     return code__2
2363
2364 #endif /* sparc_TARGET_ARCH */
2365
2366 #if powerpc_TARGET_ARCH
2367
2368 -- Easy, isn't it?
2369 assignMem_FltCode = assignMem_IntCode
2370 assignReg_FltCode = assignReg_IntCode
2371
2372 #endif /* powerpc_TARGET_ARCH */
2373
2374
2375 -- -----------------------------------------------------------------------------
2376 -- Generating an non-local jump
2377
2378 -- (If applicable) Do not fill the delay slots here; you will confuse the
2379 -- register allocator.
2380
2381 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
2382
2383 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2384
2385 #if alpha_TARGET_ARCH
2386
2387 genJump (CmmLabel lbl)
2388   | isAsmTemp lbl = returnInstr (BR target)
2389   | otherwise     = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0]
2390   where
2391     target = ImmCLbl lbl
2392
2393 genJump tree
2394   = getRegister tree                `thenNat` \ register ->
2395     getNewRegNat PtrRep             `thenNat` \ tmp ->
2396     let
2397         dst    = registerName register pv
2398         code   = registerCode register pv
2399         target = registerName register pv
2400     in
2401     if isFixed register then
2402         returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0]
2403     else
2404     return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0))
2405
2406 #endif /* alpha_TARGET_ARCH */
2407
2408 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2409
2410 #if i386_TARGET_ARCH
2411
2412 genJump (CmmLoad mem pk) = do
2413   Amode target code <- getAmode mem
2414   return (code `snocOL` JMP (OpAddr target))
2415
2416 genJump (CmmLit lit) = do
2417   return (unitOL (JMP (OpImm (litToImm lit))))
2418
2419 genJump expr = do
2420   (reg,code) <- getSomeReg expr
2421   return (code `snocOL` JMP (OpReg reg))
2422
2423 #endif /* i386_TARGET_ARCH */
2424
2425 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2426
2427 #if sparc_TARGET_ARCH
2428
2429 genJump (CmmLabel lbl)
2430   = return (toOL [CALL (Left target) 0 True, NOP])
2431   where
2432     target = ImmCLbl lbl
2433
2434 genJump tree
2435   = getRegister tree                        `thenNat` \ register ->
2436     getNewRegNat PtrRep             `thenNat` \ tmp ->
2437     let
2438         code   = registerCode register tmp
2439         target = registerName register tmp
2440     in
2441     return (code `snocOL` JMP dsts (AddrRegReg target g0) `snocOL` NOP)
2442
2443 #endif /* sparc_TARGET_ARCH */
2444
2445 #if powerpc_TARGET_ARCH
2446 genJump (CmmLit (CmmLabel lbl))
2447   = return (unitOL $ JMP lbl)
2448
2449 genJump tree
2450   = do
2451         (target,code) <- getSomeReg tree
2452         return (code `snocOL` MTCTR target `snocOL` BCTR [])
2453 #endif /* powerpc_TARGET_ARCH */
2454
2455
2456 -- -----------------------------------------------------------------------------
2457 --  Unconditional branches
2458
2459 genBranch :: BlockId -> NatM InstrBlock
2460
2461 #if alpha_TARGET_ARCH
2462 genBranch id = return (unitOL (BR id))
2463 #endif
2464
2465 #if i386_TARGET_ARCH
2466 genBranch id = return (unitOL (JXX ALWAYS id))
2467 #endif
2468
2469 #if sparc_TARGET_ARCH
2470 genBranch id = return (toOL [BI ALWAYS False id, NOP])
2471 #endif
2472
2473 #if powerpc_TARGET_ARCH
2474 genBranch id = return (unitOL (BCC ALWAYS id))
2475 #endif
2476
2477
2478 -- -----------------------------------------------------------------------------
2479 --  Conditional jumps
2480
2481 {-
2482 Conditional jumps are always to local labels, so we can use branch
2483 instructions.  We peek at the arguments to decide what kind of
2484 comparison to do.
2485
2486 ALPHA: For comparisons with 0, we're laughing, because we can just do
2487 the desired conditional branch.
2488
2489 I386: First, we have to ensure that the condition
2490 codes are set according to the supplied comparison operation.
2491
2492 SPARC: First, we have to ensure that the condition codes are set
2493 according to the supplied comparison operation.  We generate slightly
2494 different code for floating point comparisons, because a floating
2495 point operation cannot directly precede a @BF@.  We assume the worst
2496 and fill that slot with a @NOP@.
2497
2498 SPARC: Do not fill the delay slots here; you will confuse the register
2499 allocator.
2500 -}
2501
2502
2503 genCondJump
2504     :: BlockId      -- the branch target
2505     -> CmmExpr      -- the condition on which to branch
2506     -> NatM InstrBlock
2507
2508 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2509
2510 #if alpha_TARGET_ARCH
2511
2512 genCondJump id (StPrim op [x, StInt 0])
2513   = getRegister x                           `thenNat` \ register ->
2514     getNewRegNat (registerRep register)
2515                                     `thenNat` \ tmp ->
2516     let
2517         code   = registerCode register tmp
2518         value  = registerName register tmp
2519         pk     = registerRep register
2520         target = ImmCLbl lbl
2521     in
2522     returnSeq code [BI (cmpOp op) value target]
2523   where
2524     cmpOp CharGtOp = GTT
2525     cmpOp CharGeOp = GE
2526     cmpOp CharEqOp = EQQ
2527     cmpOp CharNeOp = NE
2528     cmpOp CharLtOp = LTT
2529     cmpOp CharLeOp = LE
2530     cmpOp IntGtOp = GTT
2531     cmpOp IntGeOp = GE
2532     cmpOp IntEqOp = EQQ
2533     cmpOp IntNeOp = NE
2534     cmpOp IntLtOp = LTT
2535     cmpOp IntLeOp = LE
2536     cmpOp WordGtOp = NE
2537     cmpOp WordGeOp = ALWAYS
2538     cmpOp WordEqOp = EQQ
2539     cmpOp WordNeOp = NE
2540     cmpOp WordLtOp = NEVER
2541     cmpOp WordLeOp = EQQ
2542     cmpOp AddrGtOp = NE
2543     cmpOp AddrGeOp = ALWAYS
2544     cmpOp AddrEqOp = EQQ
2545     cmpOp AddrNeOp = NE
2546     cmpOp AddrLtOp = NEVER
2547     cmpOp AddrLeOp = EQQ
2548
2549 genCondJump lbl (StPrim op [x, StDouble 0.0])
2550   = getRegister x                           `thenNat` \ register ->
2551     getNewRegNat (registerRep register)
2552                                     `thenNat` \ tmp ->
2553     let
2554         code   = registerCode register tmp
2555         value  = registerName register tmp
2556         pk     = registerRep register
2557         target = ImmCLbl lbl
2558     in
2559     return (code . mkSeqInstr (BF (cmpOp op) value target))
2560   where
2561     cmpOp FloatGtOp = GTT
2562     cmpOp FloatGeOp = GE
2563     cmpOp FloatEqOp = EQQ
2564     cmpOp FloatNeOp = NE
2565     cmpOp FloatLtOp = LTT
2566     cmpOp FloatLeOp = LE
2567     cmpOp DoubleGtOp = GTT
2568     cmpOp DoubleGeOp = GE
2569     cmpOp DoubleEqOp = EQQ
2570     cmpOp DoubleNeOp = NE
2571     cmpOp DoubleLtOp = LTT
2572     cmpOp DoubleLeOp = LE
2573
2574 genCondJump lbl (StPrim op [x, y])
2575   | fltCmpOp op
2576   = trivialFCode pr instr x y       `thenNat` \ register ->
2577     getNewRegNat F64                `thenNat` \ tmp ->
2578     let
2579         code   = registerCode register tmp
2580         result = registerName register tmp
2581         target = ImmCLbl lbl
2582     in
2583     return (code . mkSeqInstr (BF cond result target))
2584   where
2585     pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
2586
2587     fltCmpOp op = case op of
2588         FloatGtOp -> True
2589         FloatGeOp -> True
2590         FloatEqOp -> True
2591         FloatNeOp -> True
2592         FloatLtOp -> True
2593         FloatLeOp -> True
2594         DoubleGtOp -> True
2595         DoubleGeOp -> True
2596         DoubleEqOp -> True
2597         DoubleNeOp -> True
2598         DoubleLtOp -> True
2599         DoubleLeOp -> True
2600         _ -> False
2601     (instr, cond) = case op of
2602         FloatGtOp -> (FCMP TF LE, EQQ)
2603         FloatGeOp -> (FCMP TF LTT, EQQ)
2604         FloatEqOp -> (FCMP TF EQQ, NE)
2605         FloatNeOp -> (FCMP TF EQQ, EQQ)
2606         FloatLtOp -> (FCMP TF LTT, NE)
2607         FloatLeOp -> (FCMP TF LE, NE)
2608         DoubleGtOp -> (FCMP TF LE, EQQ)
2609         DoubleGeOp -> (FCMP TF LTT, EQQ)
2610         DoubleEqOp -> (FCMP TF EQQ, NE)
2611         DoubleNeOp -> (FCMP TF EQQ, EQQ)
2612         DoubleLtOp -> (FCMP TF LTT, NE)
2613         DoubleLeOp -> (FCMP TF LE, NE)
2614
2615 genCondJump lbl (StPrim op [x, y])
2616   = trivialCode instr x y           `thenNat` \ register ->
2617     getNewRegNat IntRep             `thenNat` \ tmp ->
2618     let
2619         code   = registerCode register tmp
2620         result = registerName register tmp
2621         target = ImmCLbl lbl
2622     in
2623     return (code . mkSeqInstr (BI cond result target))
2624   where
2625     (instr, cond) = case op of
2626         CharGtOp -> (CMP LE, EQQ)
2627         CharGeOp -> (CMP LTT, EQQ)
2628         CharEqOp -> (CMP EQQ, NE)
2629         CharNeOp -> (CMP EQQ, EQQ)
2630         CharLtOp -> (CMP LTT, NE)
2631         CharLeOp -> (CMP LE, NE)
2632         IntGtOp -> (CMP LE, EQQ)
2633         IntGeOp -> (CMP LTT, EQQ)
2634         IntEqOp -> (CMP EQQ, NE)
2635         IntNeOp -> (CMP EQQ, EQQ)
2636         IntLtOp -> (CMP LTT, NE)
2637         IntLeOp -> (CMP LE, NE)
2638         WordGtOp -> (CMP ULE, EQQ)
2639         WordGeOp -> (CMP ULT, EQQ)
2640         WordEqOp -> (CMP EQQ, NE)
2641         WordNeOp -> (CMP EQQ, EQQ)
2642         WordLtOp -> (CMP ULT, NE)
2643         WordLeOp -> (CMP ULE, NE)
2644         AddrGtOp -> (CMP ULE, EQQ)
2645         AddrGeOp -> (CMP ULT, EQQ)
2646         AddrEqOp -> (CMP EQQ, NE)
2647         AddrNeOp -> (CMP EQQ, EQQ)
2648         AddrLtOp -> (CMP ULT, NE)
2649         AddrLeOp -> (CMP ULE, NE)
2650
2651 #endif /* alpha_TARGET_ARCH */
2652
2653 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2654
2655 #if i386_TARGET_ARCH
2656
2657 genCondJump id bool = do
2658   CondCode _ cond code <- getCondCode bool
2659   return (code `snocOL` JXX cond id)
2660
2661 #endif /* i386_TARGET_ARCH */
2662
2663
2664 #if sparc_TARGET_ARCH
2665
2666 genCondJump id bool = do
2667   CondCode is_float cond code <- getCondCode bool
2668   return (
2669        code `appOL`
2670        toOL (
2671          if   is_float
2672          then [NOP, BF cond False id, NOP]
2673          else [BI cond False id, NOP]
2674        )
2675     )
2676
2677 #endif /* sparc_TARGET_ARCH */
2678
2679
2680 #if powerpc_TARGET_ARCH
2681
2682 genCondJump id bool = do
2683   CondCode is_float cond code <- getCondCode bool
2684   return (code `snocOL` BCC cond id)
2685
2686 #endif /* powerpc_TARGET_ARCH */
2687
2688
2689 -- -----------------------------------------------------------------------------
2690 --  Generating C calls
2691
2692 -- Now the biggest nightmare---calls.  Most of the nastiness is buried in
2693 -- @get_arg@, which moves the arguments to the correct registers/stack
2694 -- locations.  Apart from that, the code is easy.
2695 --
2696 -- (If applicable) Do not fill the delay slots here; you will confuse the
2697 -- register allocator.
2698
2699 genCCall
2700     :: CmmCallTarget            -- function to call
2701     -> [(CmmReg,MachHint)]      -- where to put the result
2702     -> [(CmmExpr,MachHint)]     -- arguments (of mixed type)
2703     -> Maybe [GlobalReg]        -- volatile regs to save
2704     -> NatM InstrBlock
2705
2706 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2707
2708 #if alpha_TARGET_ARCH
2709
2710 ccallResultRegs =
2711
2712 genCCall fn cconv result_regs args
2713   = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args
2714                           `thenNat` \ ((unused,_), argCode) ->
2715     let
2716         nRegs = length allArgRegs - length unused
2717         code = asmSeqThen (map ($ []) argCode)
2718     in
2719         returnSeq code [
2720             LDA pv (AddrImm (ImmLab (ptext fn))),
2721             JSR ra (AddrReg pv) nRegs,
2722             LDGP gp (AddrReg ra)]
2723   where
2724     ------------------------
2725     {-  Try to get a value into a specific register (or registers) for
2726         a call.  The first 6 arguments go into the appropriate
2727         argument register (separate registers for integer and floating
2728         point arguments, but used in lock-step), and the remaining
2729         arguments are dumped to the stack, beginning at 0(sp).  Our
2730         first argument is a pair of the list of remaining argument
2731         registers to be assigned for this call and the next stack
2732         offset to use for overflowing arguments.  This way,
2733         @get_Arg@ can be applied to all of a call's arguments using
2734         @mapAccumLNat@.
2735     -}
2736     get_arg
2737         :: ([(Reg,Reg)], Int)   -- Argument registers and stack offset (accumulator)
2738         -> StixTree             -- Current argument
2739         -> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code
2740
2741     -- We have to use up all of our argument registers first...
2742
2743     get_arg ((iDst,fDst):dsts, offset) arg
2744       = getRegister arg                     `thenNat` \ register ->
2745         let
2746             reg  = if isFloatingRep pk then fDst else iDst
2747             code = registerCode register reg
2748             src  = registerName register reg
2749             pk   = registerRep register
2750         in
2751         return (
2752             if isFloatingRep pk then
2753                 ((dsts, offset), if isFixed register then
2754                     code . mkSeqInstr (FMOV src fDst)
2755                     else code)
2756             else
2757                 ((dsts, offset), if isFixed register then
2758                     code . mkSeqInstr (OR src (RIReg src) iDst)
2759                     else code))
2760
2761     -- Once we have run out of argument registers, we move to the
2762     -- stack...
2763
2764     get_arg ([], offset) arg
2765       = getRegister arg                 `thenNat` \ register ->
2766         getNewRegNat (registerRep register)
2767                                         `thenNat` \ tmp ->
2768         let
2769             code = registerCode register tmp
2770             src  = registerName register tmp
2771             pk   = registerRep register
2772             sz   = primRepToSize pk
2773         in
2774         return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset)))
2775
2776 #endif /* alpha_TARGET_ARCH */
2777
2778 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2779
2780 #if i386_TARGET_ARCH
2781
2782 -- we only cope with a single result for foreign calls
2783 genCCall (CmmPrim op) [(r,_)] args vols = do
2784   case op of
2785         MO_F32_Sqrt -> actuallyInlineFloatOp F32  (GSQRT F32) args
2786         MO_F64_Sqrt -> actuallyInlineFloatOp F64 (GSQRT F64) args
2787
2788         MO_F32_Sin  -> actuallyInlineFloatOp F32  (GSIN F32) args
2789         MO_F64_Sin  -> actuallyInlineFloatOp F64 (GSIN F64) args
2790
2791         MO_F32_Cos  -> actuallyInlineFloatOp F32  (GCOS F32) args
2792         MO_F64_Cos  -> actuallyInlineFloatOp F64 (GCOS F64) args
2793
2794         MO_F32_Tan  -> actuallyInlineFloatOp F32  (GTAN F32) args
2795         MO_F64_Tan  -> actuallyInlineFloatOp F64 (GTAN F64) args
2796
2797         other_op    -> outOfLineFloatOp op r args vols
2798  where
2799   actuallyInlineFloatOp rep instr [(x,_)]
2800         = do res <- trivialUFCode rep instr x
2801              any <- anyReg res
2802              return (any (getRegisterReg r))
2803
2804 genCCall target dest_regs args vols = do
2805     sizes_n_codes <- mapM push_arg (reverse args)
2806     delta <- getDeltaNat
2807     let
2808         (sizes, push_codes) = unzip sizes_n_codes
2809         tot_arg_size        = sum sizes
2810     -- in
2811     -- deal with static vs dynamic call targets
2812     (callinsns,cconv) <-
2813       case target of
2814         -- CmmPrim -> ...
2815         CmmForeignCall (CmmLit (CmmLabel lbl)) conv
2816            -> -- ToDo: stdcall arg sizes
2817               return (unitOL (CALL (Left fn_imm)), conv)
2818            where fn_imm = ImmCLbl lbl
2819         CmmForeignCall expr conv
2820            -> do (dyn_c, dyn_r, dyn_rep) <- get_op expr
2821                  ASSERT(dyn_rep == I32)
2822                   return (dyn_c `snocOL` CALL (Right dyn_r), conv)
2823
2824     let push_code = concatOL push_codes
2825         call = callinsns `appOL`
2826                toOL (
2827                         -- Deallocate parameters after call for ccall;
2828                         -- but not for stdcall (callee does it)
2829                   (if cconv == StdCallConv then [] else
2830                    [ADD I32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
2831                   ++
2832                   [DELTA (delta + tot_arg_size)]
2833                )
2834     -- in
2835     setDeltaNat (delta + tot_arg_size)
2836
2837     let
2838         -- assign the results, if necessary
2839         assign_code []     = nilOL
2840         assign_code [(dest,_hint)] =
2841           case rep of
2842                 I64 -> toOL [MOV I32 (OpReg eax) (OpReg r_dest),
2843                              MOV I32 (OpReg edx) (OpReg r_dest_hi)]
2844                 F32 -> unitOL (GMOV fake0 r_dest)
2845                 F64 -> unitOL (GMOV fake0 r_dest)
2846                 rep -> unitOL (MOV rep (OpReg eax) (OpReg r_dest))
2847           where
2848                 r_dest_hi = getHiVRegFromLo r_dest
2849                 rep = cmmRegRep dest
2850                 r_dest = getRegisterReg dest
2851         assign_code many = panic "genCCall.assign_code many"
2852
2853     return (push_code `appOL`
2854             call `appOL`
2855             assign_code dest_regs)
2856
2857   where
2858     arg_size F64 = 8
2859     arg_size F32 = 4
2860     arg_size _   = 4
2861
2862     push_arg :: (CmmExpr,MachHint){-current argument-}
2863                     -> NatM (Int, InstrBlock)  -- argsz, code
2864
2865     push_arg (arg,_hint) -- we don't need the hints on x86
2866       | arg_rep == I64 = do
2867         ChildCode64 code r_lo <- iselExpr64 arg
2868         delta <- getDeltaNat
2869         setDeltaNat (delta - 8)
2870         let
2871             r_hi = getHiVRegFromLo r_lo
2872         -- in
2873         return (8,     code `appOL`
2874                        toOL [PUSH I32 (OpReg r_hi), DELTA (delta - 4),
2875                              PUSH I32 (OpReg r_lo), DELTA (delta - 8),
2876                              DELTA (delta-8)]
2877             )
2878
2879       | otherwise = do
2880         (code, reg, sz) <- get_op arg
2881         delta <- getDeltaNat
2882         let size = arg_size sz
2883         setDeltaNat (delta-size)
2884         if (case sz of F64 -> True; F32 -> True; _ -> False)
2885            then return (size,
2886                         code `appOL`
2887                         toOL [SUB I32 (OpImm (ImmInt size)) (OpReg esp),
2888                               DELTA (delta-size),
2889                               GST sz reg (AddrBaseIndex (Just esp)
2890                                                         Nothing
2891                                                         (ImmInt 0))]
2892                        )
2893            else return (size,
2894                         code `snocOL`
2895                         PUSH I32 (OpReg reg) `snocOL`
2896                         DELTA (delta-size)
2897                        )
2898       where
2899          arg_rep = cmmExprRep arg
2900
2901     ------------
2902     get_op :: CmmExpr -> NatM (InstrBlock, Reg, MachRep) -- code, reg, size
2903     get_op op = do
2904         (reg,code) <- getSomeReg op
2905         return (code, reg, cmmExprRep op)
2906
2907
2908 outOfLineFloatOp :: CallishMachOp -> CmmReg -> [(CmmExpr,MachHint)]
2909   -> Maybe [GlobalReg] -> NatM InstrBlock
2910 outOfLineFloatOp mop res args vols
2911   | cmmRegRep res == F64
2912   = stmtToInstrs (CmmCall target [(res,FloatHint)] args vols)
2913
2914   | otherwise
2915   = do uq <- getUniqueNat
2916        let
2917          tmp = CmmLocal (LocalReg uq F64)
2918        -- in
2919        code1 <- stmtToInstrs (CmmCall target [(tmp,FloatHint)] (map promote args) vols)
2920        code2 <- stmtToInstrs (CmmAssign res (demote (CmmReg tmp)))
2921        return (code1 `appOL` code2)
2922   where
2923         promote (x,hint) = (CmmMachOp (MO_S_Conv F32 F64) [x], hint)
2924         demote  x = CmmMachOp (MO_S_Conv F64 F32) [x]
2925
2926         target = CmmForeignCall (CmmLit lbl) CCallConv
2927         lbl = CmmLabel (mkForeignLabel fn Nothing False)
2928
2929         fn = case mop of
2930               MO_F32_Exp   -> FSLIT("exp")
2931               MO_F32_Log   -> FSLIT("log")
2932
2933               MO_F32_Asin  -> FSLIT("asin")
2934               MO_F32_Acos  -> FSLIT("acos")
2935               MO_F32_Atan  -> FSLIT("atan")
2936
2937               MO_F32_Sinh  -> FSLIT("sinh")
2938               MO_F32_Cosh  -> FSLIT("cosh")
2939               MO_F32_Tanh  -> FSLIT("tanh")
2940               MO_F32_Pwr   -> FSLIT("pow")
2941
2942               MO_F64_Exp   -> FSLIT("exp")
2943               MO_F64_Log   -> FSLIT("log")
2944
2945               MO_F64_Asin  -> FSLIT("asin")
2946               MO_F64_Acos  -> FSLIT("acos")
2947               MO_F64_Atan  -> FSLIT("atan")
2948
2949               MO_F64_Sinh  -> FSLIT("sinh")
2950               MO_F64_Cosh  -> FSLIT("cosh")
2951               MO_F64_Tanh  -> FSLIT("tanh")
2952               MO_F64_Pwr   -> FSLIT("pow")
2953
2954               other -> pprPanic "outOfLineFloatOp" (pprCallishMachOp mop)
2955
2956 #endif /* i386_TARGET_ARCH */
2957
2958 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2959
2960 #if sparc_TARGET_ARCH
2961 {-
2962    The SPARC calling convention is an absolute
2963    nightmare.  The first 6x32 bits of arguments are mapped into
2964    %o0 through %o5, and the remaining arguments are dumped to the
2965    stack, beginning at [%sp+92].  (Note that %o6 == %sp.)
2966
2967    If we have to put args on the stack, move %o6==%sp down by
2968    the number of words to go on the stack, to ensure there's enough space.
2969
2970    According to Fraser and Hanson's lcc book, page 478, fig 17.2,
2971    16 words above the stack pointer is a word for the address of
2972    a structure return value.  I use this as a temporary location
2973    for moving values from float to int regs.  Certainly it isn't
2974    safe to put anything in the 16 words starting at %sp, since
2975    this area can get trashed at any time due to window overflows
2976    caused by signal handlers.
2977
2978    A final complication (if the above isn't enough) is that
2979    we can't blithely calculate the arguments one by one into
2980    %o0 .. %o5.  Consider the following nested calls:
2981
2982        fff a (fff b c)
2983
2984    Naive code moves a into %o0, and (fff b c) into %o1.  Unfortunately
2985    the inner call will itself use %o0, which trashes the value put there
2986    in preparation for the outer call.  Upshot: we need to calculate the
2987    args into temporary regs, and move those to arg regs or onto the
2988    stack only immediately prior to the call proper.  Sigh.
2989 -}
2990
2991 genCCall fn cconv kind args
2992   = mapNat arg_to_int_vregs args `thenNat` \ argcode_and_vregs ->
2993     let
2994         (argcodes, vregss) = unzip argcode_and_vregs
2995         n_argRegs          = length allArgRegs
2996         n_argRegs_used     = min (length vregs) n_argRegs
2997         vregs              = concat vregss
2998     in
2999     -- deal with static vs dynamic call targets
3000     (case fn of
3001         Left t_static
3002            -> return (unitOL (CALL (Left fn__2) n_argRegs_used False))
3003         Right dyn
3004            -> arg_to_int_vregs dyn `thenNat` \ (dyn_c, [dyn_r]) ->
3005               return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3006     )
3007                                 `thenNat` \ callinsns ->
3008     let
3009         argcode = concatOL argcodes
3010         (move_sp_down, move_sp_up)
3011            = let diff = length vregs - n_argRegs
3012                  nn   = if odd diff then diff + 1 else diff -- keep 8-byte alignment
3013              in  if   nn <= 0
3014                  then (nilOL, nilOL)
3015                  else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
3016         transfer_code
3017            = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
3018     in
3019         return (argcode       `appOL`
3020                    move_sp_down  `appOL`
3021                    transfer_code `appOL`
3022                    callinsns     `appOL`
3023                    unitOL NOP    `appOL`
3024                    move_sp_up)
3025   where
3026      -- function names that begin with '.' are assumed to be special
3027      -- internally generated names like '.mul,' which don't get an
3028      -- underscore prefix
3029      -- ToDo:needed (WDP 96/03) ???
3030      fn_static = unLeft fn
3031      fn__2 = case (headFS fn_static) of
3032                 '.' -> ImmLit (ftext fn_static)
3033                 _   -> ImmCLbl (mkForeignLabel fn_static False)
3034
3035      -- move args from the integer vregs into which they have been
3036      -- marshalled, into %o0 .. %o5, and the rest onto the stack.
3037      move_final :: [Reg] -> [Reg] -> Int -> [Instr]
3038
3039      move_final [] _ offset          -- all args done
3040         = []
3041
3042      move_final (v:vs) [] offset     -- out of aregs; move to stack
3043         = ST W v (spRel offset)
3044           : move_final vs [] (offset+1)
3045
3046      move_final (v:vs) (a:az) offset -- move into an arg (%o[0..5]) reg
3047         = OR False g0 (RIReg v) a
3048           : move_final vs az offset
3049
3050      -- generate code to calculate an argument, and move it into one
3051      -- or two integer vregs.
3052      arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
3053      arg_to_int_vregs arg
3054         | is64BitRep (repOfCmmExpr arg)
3055         = iselExpr64 arg                `thenNat` \ (ChildCode64 code vr_lo) ->
3056           let r_lo = VirtualRegI vr_lo
3057               r_hi = getHiVRegFromLo r_lo
3058           in  return (code, [r_hi, r_lo])
3059         | otherwise
3060         = getRegister arg                     `thenNat` \ register ->
3061           getNewRegNat (registerRep register) `thenNat` \ tmp ->
3062           let code = registerCode register tmp
3063               src  = registerName register tmp
3064               pk   = registerRep register
3065           in
3066           -- the value is in src.  Get it into 1 or 2 int vregs.
3067           case pk of
3068              F64 ->
3069                 getNewRegNat WordRep  `thenNat` \ v1 ->
3070                 getNewRegNat WordRep  `thenNat` \ v2 ->
3071                 return (
3072                    code                          `snocOL`
3073                    FMOV DF src f0                `snocOL`
3074                    ST   F  f0 (spRel 16)         `snocOL`
3075                    LD   W  (spRel 16) v1         `snocOL`
3076                    ST   F  (fPair f0) (spRel 16) `snocOL`
3077                    LD   W  (spRel 16) v2
3078                    ,
3079                    [v1,v2]
3080                 )
3081              F32 ->
3082                 getNewRegNat WordRep  `thenNat` \ v1 ->
3083                 return (
3084                    code                    `snocOL`
3085                    ST   F  src (spRel 16)  `snocOL`
3086                    LD   W  (spRel 16) v1
3087                    ,
3088                    [v1]
3089                 )
3090              other ->
3091                 getNewRegNat WordRep  `thenNat` \ v1 ->
3092                 return (
3093                    code `snocOL` OR False g0 (RIReg src) v1
3094                    ,
3095                    [v1]
3096                 )
3097 #endif /* sparc_TARGET_ARCH */
3098
3099 #if powerpc_TARGET_ARCH
3100
3101 #if darwin_TARGET_OS || linux_TARGET_OS
3102 {-
3103     The PowerPC calling convention for Darwin/Mac OS X
3104     is described in Apple's document
3105     "Inside Mac OS X - Mach-O Runtime Architecture".
3106
3107     PowerPC Linux uses the System V Release 4 Calling Convention
3108     for PowerPC. It is described in the
3109     "System V Application Binary Interface PowerPC Processor Supplement".
3110
3111     Both conventions are similar:
3112     Parameters may be passed in general-purpose registers starting at r3, in
3113     floating point registers starting at f1, or on the stack.
3114
3115     But there are substantial differences:
3116     * The number of registers used for parameter passing and the exact set of
3117       nonvolatile registers differs (see MachRegs.lhs).
3118     * On Darwin, stack space is always reserved for parameters, even if they are
3119       passed in registers. The called routine may choose to save parameters from
3120       registers to the corresponding space on the stack.
3121     * On Darwin, a corresponding amount of GPRs is skipped when a floating point
3122       parameter is passed in an FPR.
3123     * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
3124       starting with an odd-numbered GPR. It may skip a GPR to achieve this.
3125       Darwin just treats an I64 like two separate I32s (high word first).
3126
3127     According to both conventions, The parameter area should be part of the
3128     caller's stack frame, allocated in the caller's prologue code (large enough
3129     to hold the parameter lists for all called routines). The NCG already
3130     uses the stack for register spilling, leaving 64 bytes free at the top.
3131     If we need a larger parameter area than that, we just allocate a new stack
3132     frame just before ccalling.
3133 -}
3134
3135 genCCall target dest_regs argsAndHints vols
3136   = ASSERT (not $ any (`elem` [I8,I16]) argReps)
3137         -- we rely on argument promotion in the codeGen
3138     do
3139         (finalStack,passArgumentsCode,usedRegs) <- passArguments
3140                                                         (zip args argReps)
3141                                                         allArgRegs allFPArgRegs
3142                                                         initialStackOffset
3143                                                         (toOL []) []
3144
3145         let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
3146             codeAfter = move_sp_up finalStack `appOL` moveResult
3147
3148         case labelOrExpr of
3149             Left lbl -> do
3150                 addImportNat True lbl
3151                 return (         codeBefore
3152                         `snocOL` BL lbl usedRegs
3153                         `appOL`  codeAfter)
3154             Right dyn -> do
3155                 (dynReg, dynCode) <- getSomeReg dyn
3156                 return (         dynCode
3157                         `snocOL` MTCTR dynReg
3158                         `appOL`  codeBefore
3159                         `snocOL` BCTRL usedRegs
3160                         `appOL`  codeAfter)
3161     where
3162 #if darwin_TARGET_OS
3163         initialStackOffset = 24
3164             -- size of linkage area + size of arguments, in bytes
3165         stackDelta _finalStack = roundTo16 $ (24 +) $ max 32 $ sum $
3166                                        map machRepByteWidth argReps
3167 #elif linux_TARGET_OS
3168         initialStackOffset = 8
3169         stackDelta finalStack = roundTo16 finalStack
3170 #endif
3171         args = map fst argsAndHints
3172         argReps = map cmmExprRep args
3173
3174         roundTo16 x | x `mod` 16 == 0 = x
3175                     | otherwise = x + 16 - (x `mod` 16)
3176
3177         move_sp_down finalStack
3178                | delta > 64 =
3179                         toOL [STU I32 sp (AddrRegImm sp (ImmInt (-delta))),
3180                               DELTA (-delta)]
3181                | otherwise = nilOL
3182                where delta = stackDelta finalStack
3183         move_sp_up finalStack
3184                | delta > 64 =
3185                         toOL [ADD sp sp (RIImm (ImmInt delta)),
3186                               DELTA 0]
3187                | otherwise = nilOL
3188                where delta = stackDelta finalStack
3189
3190
3191         passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
3192         passArguments ((arg,I64):args) gprs fprs stackOffset
3193                accumCode accumUsed =
3194             do
3195                 ChildCode64 code vr_lo <- iselExpr64 arg
3196                 let vr_hi = getHiVRegFromLo vr_lo
3197
3198 #if darwin_TARGET_OS
3199                 passArguments args
3200                               (drop 2 gprs)
3201                               fprs
3202                               (stackOffset+8)
3203                               (accumCode `appOL` code
3204                                     `snocOL` storeWord vr_hi gprs stackOffset
3205                                     `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
3206                               ((take 2 gprs) ++ accumUsed)
3207             where
3208                 storeWord vr (gpr:_) offset = MR gpr vr
3209                 storeWord vr [] offset = ST I32 vr (AddrRegImm sp (ImmInt offset))
3210
3211 #elif linux_TARGET_OS
3212                 let stackCode = accumCode `appOL` code
3213                         `snocOL` ST I32 vr_hi (AddrRegImm sp (ImmInt stackOffset))
3214                         `snocOL` ST I32 vr_lo (AddrRegImm sp (ImmInt (stackOffset+4)))
3215                     regCode hireg loreg =
3216                         accumCode `appOL` code
3217                             `snocOL` MR hireg vr_hi
3218                             `snocOL` MR loreg vr_lo
3219
3220                 case gprs of
3221                     hireg : loreg : regs | even (length gprs) ->
3222                         passArguments args regs fprs stackOffset
3223                                       (regCode hireg loreg) (hireg : loreg : accumUsed)
3224                     _skipped : hireg : loreg : regs ->
3225                         passArguments args regs fprs stackOffset
3226                                       (regCode hireg loreg) (hireg : loreg : accumUsed)
3227                     _ -> -- only one or no regs left
3228                         passArguments args [] fprs (stackOffset+8)
3229                                       stackCode accumUsed
3230 #endif
3231
3232         passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
3233             | reg : _ <- regs = do
3234                 register <- getRegister arg
3235                 let code = case register of
3236                             Fixed _ freg fcode -> fcode `snocOL` MR reg freg
3237                             Any _ acode -> acode reg
3238                 passArguments args
3239                               (drop nGprs gprs)
3240                               (drop nFprs fprs)
3241 #if darwin_TARGET_OS
3242         -- The Darwin ABI requires that we reserve stack slots for register parameters
3243                               (stackOffset + stackBytes)
3244 #elif linux_TARGET_OS
3245         -- ... the SysV ABI doesn't.
3246                               stackOffset
3247 #endif
3248                               (accumCode `appOL` code)
3249                               (reg : accumUsed)
3250             | otherwise = do
3251                 (vr, code) <- getSomeReg arg
3252                 passArguments args
3253                               (drop nGprs gprs)
3254                               (drop nFprs fprs)
3255                               (stackOffset + stackBytes)
3256                               (accumCode `appOL` code `snocOL` ST rep vr stackSlot)
3257                               accumUsed
3258             where
3259                 stackSlot = AddrRegImm sp (ImmInt stackOffset)
3260                 (nGprs, nFprs, stackBytes, regs) = case rep of
3261                     I32 -> (1, 0, 4, gprs)
3262 #if darwin_TARGET_OS
3263         -- The Darwin ABI requires that we skip a corresponding number of GPRs when
3264         -- we use the FPRs.
3265                     F32 -> (1, 1, 4, fprs)
3266                     F64 -> (2, 1, 8, fprs)
3267 #elif linux_TARGET_OS
3268         -- ... the SysV ABI doesn't.
3269                     F32 -> (0, 1, 4, fprs)
3270                     F64 -> (0, 1, 8, fprs)
3271 #endif
3272
3273         moveResult =
3274             case dest_regs of
3275                 [] -> nilOL
3276                 [(dest, _hint)]
3277                     | reduceToF32 && rep == F32 -> unitOL (FRSP r_dest f1)
3278                     | rep == F32 || rep == F64 -> unitOL (MR r_dest f1)
3279                     | rep == I64 -> toOL [MR (getHiVRegFromLo r_dest) r3,
3280                                           MR r_dest r4]
3281                     | otherwise -> unitOL (MR r_dest r3)
3282                     where rep = cmmRegRep dest
3283                           r_dest = getRegisterReg dest
3284
3285         (labelOrExpr, reduceToF32) = case target of
3286             CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> (Left lbl, False)
3287             CmmForeignCall expr conv -> (Right expr, False)
3288             CmmPrim mop -> (Left $ mkForeignLabel label Nothing False, reduce)
3289                 where
3290                     (label, reduce) = case mop of
3291                         MO_F32_Exp   -> (FSLIT("exp"), True)
3292                         MO_F32_Log   -> (FSLIT("log"), True)
3293                         MO_F32_Sqrt  -> (FSLIT("sqrt"), True)
3294
3295                         MO_F32_Sin   -> (FSLIT("sin"), True)
3296                         MO_F32_Cos   -> (FSLIT("cos"), True)
3297                         MO_F32_Tan   -> (FSLIT("tan"), True)
3298
3299                         MO_F32_Asin  -> (FSLIT("asin"), True)
3300                         MO_F32_Acos  -> (FSLIT("acos"), True)
3301                         MO_F32_Atan  -> (FSLIT("atan"), True)
3302
3303                         MO_F32_Sinh  -> (FSLIT("sinh"), True)
3304                         MO_F32_Cosh  -> (FSLIT("cosh"), True)
3305                         MO_F32_Tanh  -> (FSLIT("tanh"), True)
3306                         MO_F32_Pwr   -> (FSLIT("pow"), True)
3307
3308                         MO_F64_Exp   -> (FSLIT("exp"), False)
3309                         MO_F64_Log   -> (FSLIT("log"), False)
3310                         MO_F64_Sqrt  -> (FSLIT("sqrt"), False)
3311
3312                         MO_F64_Sin   -> (FSLIT("sin"), False)
3313                         MO_F64_Cos   -> (FSLIT("cos"), False)
3314                         MO_F64_Tan   -> (FSLIT("tan"), False)
3315
3316                         MO_F64_Asin  -> (FSLIT("asin"), False)
3317                         MO_F64_Acos  -> (FSLIT("acos"), False)
3318                         MO_F64_Atan  -> (FSLIT("atan"), False)
3319
3320                         MO_F64_Sinh  -> (FSLIT("sinh"), False)
3321                         MO_F64_Cosh  -> (FSLIT("cosh"), False)
3322                         MO_F64_Tanh  -> (FSLIT("tanh"), False)
3323                         MO_F64_Pwr   -> (FSLIT("pow"), False)
3324                         other -> pprPanic "genCCall(ppc): unknown callish op"
3325                                         (pprCallishMachOp other)
3326
3327 #endif /* darwin_TARGET_OS || linux_TARGET_OS */
3328
3329 #endif /* powerpc_TARGET_ARCH */
3330
3331
3332 -- -----------------------------------------------------------------------------
3333 -- Generating a table-branch
3334
3335 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
3336
3337 #if i386_TARGET_ARCH
3338 genSwitch expr ids = do
3339   (reg,e_code) <- getSomeReg expr
3340   lbl <- getNewLabelNat
3341   let
3342         jumpTable = map jumpTableEntry ids
3343         op = OpAddr (AddrBaseIndex Nothing (Just (reg,4)) (ImmCLbl lbl))
3344         code = e_code `appOL` toOL [
3345                 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3346                 JMP_TBL op [ id | Just id <- ids ]
3347              ]
3348   -- in
3349   return code
3350 #elif powerpc_TARGET_ARCH
3351 genSwitch expr ids = do
3352   (reg,e_code) <- getSomeReg expr
3353   tmp <- getNewRegNat I32
3354   lbl <- getNewLabelNat
3355   let
3356         jumpTable = map jumpTableEntry ids
3357
3358         code = e_code `appOL` toOL [
3359                         LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3360                         SLW tmp reg (RIImm (ImmInt 2)),
3361                         ADDIS tmp tmp (HA (ImmCLbl lbl)),
3362                         LD I32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
3363                         MTCTR tmp,
3364                         BCTR [ id | Just id <- ids ]
3365                 ]
3366   -- in
3367   return code
3368 #else
3369 genSwitch expr ids = panic "ToDo: genSwitch"
3370 #endif
3371
3372 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordRep)
3373 jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
3374     where blockLabel = mkAsmTempLabel id
3375
3376 -- -----------------------------------------------------------------------------
3377 -- Support bits
3378 -- -----------------------------------------------------------------------------
3379
3380
3381 -- -----------------------------------------------------------------------------
3382 -- 'condIntReg' and 'condFltReg': condition codes into registers
3383
3384 -- Turn those condition codes into integers now (when they appear on
3385 -- the right hand side of an assignment).
3386 --
3387 -- (If applicable) Do not fill the delay slots here; you will confuse the
3388 -- register allocator.
3389
3390 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
3391
3392 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3393
3394 #if alpha_TARGET_ARCH
3395 condIntReg = panic "MachCode.condIntReg (not on Alpha)"
3396 condFltReg = panic "MachCode.condFltReg (not on Alpha)"
3397 #endif /* alpha_TARGET_ARCH */
3398
3399 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3400
3401 #if i386_TARGET_ARCH
3402
3403 condIntReg cond x y = do
3404   CondCode _ cond cond_code <- condIntCode cond x y
3405   tmp <- getNewRegNat I8
3406   let
3407         code dst = cond_code `appOL` toOL [
3408                     SETCC cond (OpReg tmp),
3409                     MOV I32 (OpReg tmp) (OpReg dst),
3410                     AND I32 (OpImm (ImmInt 1)) (OpReg dst)
3411                   ]
3412                    -- NB. (1) Tha AND is needed here because the x86 only
3413                    -- sets the low byte in the SETCC instruction.
3414                    -- NB. (2) The extra temporary register is a hack to
3415                    -- work around the fact that the setcc instructions only
3416                    -- accept byte registers.  dst might not be a byte-able reg,
3417                    -- but currently all free registers are byte-able, so we're
3418                    -- guaranteed that a new temporary is byte-able.
3419   -- in
3420   return (Any I32 code)
3421
3422
3423 condFltReg cond x y = do
3424   lbl1 <- getBlockIdNat
3425   lbl2 <- getBlockIdNat
3426   CondCode _ cond cond_code <- condFltCode cond x y
3427   let
3428         code dst = cond_code `appOL` toOL [
3429             JXX cond lbl1,
3430             MOV I32 (OpImm (ImmInt 0)) (OpReg dst),
3431             JXX ALWAYS lbl2,
3432             NEWBLOCK lbl1,
3433             MOV I32 (OpImm (ImmInt 1)) (OpReg dst),
3434             JXX ALWAYS lbl2,
3435             NEWBLOCK lbl2]
3436                 -- SIGH, have to split up this block somehow...
3437   -- in
3438   return (Any I32 code)
3439
3440 #endif /* i386_TARGET_ARCH */
3441
3442 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3443
3444 #if sparc_TARGET_ARCH
3445
3446 condIntReg EQQ x (StInt 0)
3447   = getRegister x               `thenNat` \ register ->
3448     getNewRegNat IntRep         `thenNat` \ tmp ->
3449     let
3450         code = registerCode register tmp
3451         src  = registerName register tmp
3452         code__2 dst = code `appOL` toOL [
3453             SUB False True g0 (RIReg src) g0,
3454             SUB True False g0 (RIImm (ImmInt (-1))) dst]
3455     in
3456     return (Any IntRep code__2)
3457
3458 condIntReg EQQ x y
3459   = getRegister x               `thenNat` \ register1 ->
3460     getRegister y               `thenNat` \ register2 ->
3461     getNewRegNat IntRep         `thenNat` \ tmp1 ->
3462     getNewRegNat IntRep         `thenNat` \ tmp2 ->
3463     let
3464         code1 = registerCode register1 tmp1
3465         src1  = registerName register1 tmp1
3466         code2 = registerCode register2 tmp2
3467         src2  = registerName register2 tmp2
3468         code__2 dst = code1 `appOL` code2 `appOL` toOL [
3469             XOR False src1 (RIReg src2) dst,
3470             SUB False True g0 (RIReg dst) g0,
3471             SUB True False g0 (RIImm (ImmInt (-1))) dst]
3472     in
3473     return (Any IntRep code__2)
3474
3475 condIntReg NE x (StInt 0)
3476   = getRegister x               `thenNat` \ register ->
3477     getNewRegNat IntRep         `thenNat` \ tmp ->
3478     let
3479         code = registerCode register tmp
3480         src  = registerName register tmp
3481         code__2 dst = code `appOL` toOL [
3482             SUB False True g0 (RIReg src) g0,
3483             ADD True False g0 (RIImm (ImmInt 0)) dst]
3484     in
3485     return (Any IntRep code__2)
3486
3487 condIntReg NE x y
3488   = getRegister x               `thenNat` \ register1 ->
3489     getRegister y               `thenNat` \ register2 ->
3490     getNewRegNat IntRep         `thenNat` \ tmp1 ->
3491     getNewRegNat IntRep         `thenNat` \ tmp2 ->
3492     let
3493         code1 = registerCode register1 tmp1
3494         src1  = registerName register1 tmp1
3495         code2 = registerCode register2 tmp2
3496         src2  = registerName register2 tmp2
3497         code__2 dst = code1 `appOL` code2 `appOL` toOL [
3498             XOR False src1 (RIReg src2) dst,
3499             SUB False True g0 (RIReg dst) g0,
3500             ADD True False g0 (RIImm (ImmInt 0)) dst]
3501     in
3502     return (Any IntRep code__2)
3503
3504 condIntReg cond x y
3505   = getBlockIdNat               `thenNat` \ lbl1 ->
3506     getBlockIdNat               `thenNat` \ lbl2 ->
3507     condIntCode cond x y        `thenNat` \ condition ->
3508     let
3509         code = condCode condition
3510         cond = condName condition
3511         code__2 dst = code `appOL` toOL [
3512             BI cond False (ImmCLbl lbl1), NOP,
3513             OR False g0 (RIImm (ImmInt 0)) dst,
3514             BI ALWAYS False (ImmCLbl lbl2), NOP,
3515             NEWBLOCK lbl1,
3516             OR False g0 (RIImm (ImmInt 1)) dst,
3517             NEWBLOCK lbl2]
3518     in
3519     return (Any IntRep code__2)
3520
3521 condFltReg cond x y
3522   = getBlockIdNat               `thenNat` \ lbl1 ->
3523     getBlockIdNat               `thenNat` \ lbl2 ->
3524     condFltCode cond x y        `thenNat` \ condition ->
3525     let
3526         code = condCode condition
3527         cond = condName condition
3528         code__2 dst = code `appOL` toOL [
3529             NOP,
3530             BF cond False (ImmCLbl lbl1), NOP,
3531             OR False g0 (RIImm (ImmInt 0)) dst,
3532             BI ALWAYS False (ImmCLbl lbl2), NOP,
3533             NEWBLOCK lbl1,
3534             OR False g0 (RIImm (ImmInt 1)) dst,
3535             NEWBLOCK lbl2]
3536     in
3537     return (Any IntRep code__2)
3538
3539 #endif /* sparc_TARGET_ARCH */
3540
3541 #if powerpc_TARGET_ARCH
3542 condReg getCond = do
3543     lbl1 <- getBlockIdNat
3544     lbl2 <- getBlockIdNat
3545     CondCode _ cond cond_code <- getCond
3546     let
3547 {-        code dst = cond_code `appOL` toOL [
3548                 BCC cond lbl1,
3549                 LI dst (ImmInt 0),
3550                 BCC ALWAYS lbl2,
3551                 NEWBLOCK lbl1,
3552                 LI dst (ImmInt 1),
3553                 BCC ALWAYS lbl2,
3554                 NEWBLOCK lbl2
3555             ]-}
3556         code dst = cond_code
3557             `appOL` negate_code
3558             `appOL` toOL [
3559                 MFCR dst,
3560                 RLWINM dst dst (bit + 1) 31 31
3561             ]
3562
3563         negate_code | do_negate = unitOL (CRNOR bit bit bit)
3564                     | otherwise = nilOL
3565
3566         (bit, do_negate) = case cond of
3567             LTT -> (0, False)
3568             LE  -> (1, True)
3569             EQQ -> (2, False)
3570             GE  -> (0, True)
3571             GTT -> (1, False)
3572
3573             NE  -> (2, True)
3574
3575             LU  -> (0, False)
3576             LEU -> (1, True)
3577             GEU -> (0, True)
3578             GU  -> (1, False)
3579
3580     return (Any I32 code)
3581
3582 condIntReg cond x y = condReg (condIntCode cond x y)
3583 condFltReg cond x y = condReg (condFltCode cond x y)
3584 #endif /* powerpc_TARGET_ARCH */
3585
3586
3587 -- -----------------------------------------------------------------------------
3588 -- 'trivial*Code': deal with trivial instructions
3589
3590 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
3591 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
3592 -- Only look for constants on the right hand side, because that's
3593 -- where the generic optimizer will have put them.
3594
3595 -- Similarly, for unary instructions, we don't have to worry about
3596 -- matching an StInt as the argument, because genericOpt will already
3597 -- have handled the constant-folding.
3598
3599 trivialCode
3600     :: MachRep
3601     -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
3602       ,IF_ARCH_i386 ((Operand -> Operand -> Instr)
3603                      -> Maybe (Operand -> Operand -> Instr)
3604       ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
3605       ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
3606       ,))))
3607     -> CmmExpr -> CmmExpr -- the two arguments
3608     -> NatM Register
3609
3610 #ifndef powerpc_TARGET_ARCH
3611 trivialFCode
3612     :: MachRep
3613     -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
3614       ,IF_ARCH_sparc((Size -> Reg -> Reg -> Reg -> Instr)
3615       ,IF_ARCH_i386 ((MachRep -> Reg -> Reg -> Reg -> Instr)
3616       ,)))
3617     -> CmmExpr -> CmmExpr -- the two arguments
3618     -> NatM Register
3619 #endif
3620
3621 trivialUCode
3622     :: MachRep
3623     -> IF_ARCH_alpha((RI -> Reg -> Instr)
3624       ,IF_ARCH_i386 ((Operand -> Instr)
3625       ,IF_ARCH_sparc((RI -> Reg -> Instr)
3626       ,IF_ARCH_powerpc((Reg -> Reg -> Instr)
3627       ,))))
3628     -> CmmExpr  -- the one argument
3629     -> NatM Register
3630
3631 #ifndef powerpc_TARGET_ARCH
3632 trivialUFCode
3633     :: MachRep
3634     -> IF_ARCH_alpha((Reg -> Reg -> Instr)
3635       ,IF_ARCH_i386 ((Reg -> Reg -> Instr)
3636       ,IF_ARCH_sparc((Reg -> Reg -> Instr)
3637       ,)))
3638     -> CmmExpr -- the one argument
3639     -> NatM Register
3640 #endif
3641
3642 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3643
3644 #if alpha_TARGET_ARCH
3645
3646 trivialCode instr x (StInt y)
3647   | fits8Bits y
3648   = getRegister x               `thenNat` \ register ->
3649     getNewRegNat IntRep         `thenNat` \ tmp ->
3650     let
3651         code = registerCode register tmp
3652         src1 = registerName register tmp
3653         src2 = ImmInt (fromInteger y)
3654         code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst)
3655     in
3656     return (Any IntRep code__2)
3657
3658 trivialCode instr x y
3659   = getRegister x               `thenNat` \ register1 ->
3660     getRegister y               `thenNat` \ register2 ->
3661     getNewRegNat IntRep         `thenNat` \ tmp1 ->
3662     getNewRegNat IntRep         `thenNat` \ tmp2 ->
3663     let
3664         code1 = registerCode register1 tmp1 []
3665         src1  = registerName register1 tmp1
3666         code2 = registerCode register2 tmp2 []
3667         src2  = registerName register2 tmp2
3668         code__2 dst = asmSeqThen [code1, code2] .
3669                      mkSeqInstr (instr src1 (RIReg src2) dst)
3670     in
3671     return (Any IntRep code__2)
3672
3673 ------------
3674 trivialUCode instr x
3675   = getRegister x               `thenNat` \ register ->
3676     getNewRegNat IntRep         `thenNat` \ tmp ->
3677     let
3678         code = registerCode register tmp
3679         src  = registerName register tmp
3680         code__2 dst = code . mkSeqInstr (instr (RIReg src) dst)
3681     in
3682     return (Any IntRep code__2)
3683
3684 ------------
3685 trivialFCode _ instr x y
3686   = getRegister x               `thenNat` \ register1 ->
3687     getRegister y               `thenNat` \ register2 ->
3688     getNewRegNat F64    `thenNat` \ tmp1 ->
3689     getNewRegNat F64    `thenNat` \ tmp2 ->
3690     let
3691         code1 = registerCode register1 tmp1
3692         src1  = registerName register1 tmp1
3693
3694         code2 = registerCode register2 tmp2
3695         src2  = registerName register2 tmp2
3696
3697         code__2 dst = asmSeqThen [code1 [], code2 []] .
3698                       mkSeqInstr (instr src1 src2 dst)
3699     in
3700     return (Any F64 code__2)
3701
3702 trivialUFCode _ instr x
3703   = getRegister x               `thenNat` \ register ->
3704     getNewRegNat F64    `thenNat` \ tmp ->
3705     let
3706         code = registerCode register tmp
3707         src  = registerName register tmp
3708         code__2 dst = code . mkSeqInstr (instr src dst)
3709     in
3710     return (Any F64 code__2)
3711
3712 #endif /* alpha_TARGET_ARCH */
3713
3714 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3715
3716 #if i386_TARGET_ARCH
3717
3718 {-
3719 The Rules of the Game are:
3720
3721 * You cannot assume anything about the destination register dst;
3722   it may be anything, including a fixed reg.
3723
3724 * You may compute an operand into a fixed reg, but you may not
3725   subsequently change the contents of that fixed reg.  If you
3726   want to do so, first copy the value either to a temporary
3727   or into dst.  You are free to modify dst even if it happens
3728   to be a fixed reg -- that's not your problem.
3729
3730 * You cannot assume that a fixed reg will stay live over an
3731   arbitrary computation.  The same applies to the dst reg.
3732
3733 * Temporary regs obtained from getNewRegNat are distinct from
3734   each other and from all other regs, and stay live over
3735   arbitrary computations.
3736
3737 --------------------
3738
3739 SDM's version of The Rules:
3740
3741 * If getRegister returns Any, that means it can generate correct
3742   code which places the result in any register, period.  Even if that
3743   register happens to be read during the computation.
3744
3745   Corollary #1: this means that if you are generating code for an
3746   operation with two arbitrary operands, you cannot assign the result
3747   of the first operand into the destination register before computing
3748   the second operand.  The second operand might require the old value
3749   of the destination register.
3750
3751   Corollary #2: A function might be able to generate more efficient
3752   code if it knows the destination register is a new temporary (and
3753   therefore not read by any of the sub-computations).
3754
3755 * If getRegister returns Any, then the code it generates may modify only:
3756         (a) fresh temporaries
3757         (b) the destination register
3758         (c) known registers (eg. %ecx is used by shifts)
3759   In particular, it may *not* modify global registers, unless the global
3760   register happens to be the destination register.
3761 -}
3762
3763 trivialCode rep instr maybe_revinstr a (CmmLit lit_b) = do
3764   a_code <- getAnyReg a
3765   let
3766        code dst
3767           = a_code dst `snocOL`
3768             instr (OpImm (litToImm lit_b)) (OpReg dst)
3769   -- in
3770   return (Any rep code)
3771
3772 trivialCode rep instr (Just revinstr) (CmmLit lit_a) b = do
3773   b_code <- getAnyReg b
3774   let
3775        code dst
3776          = b_code dst `snocOL`
3777            revinstr (OpImm (litToImm lit_a)) (OpReg dst)
3778   -- in
3779   return (Any rep code)
3780
3781 trivialCode rep instr maybe_revinstr a b = do
3782   (b_op, b_code) <- getOperand b
3783   a_code <- getAnyReg a
3784   tmp <- getNewRegNat rep
3785   let
3786      -- We want the value of b to stay alive across the computation of a.
3787      -- But, we want to calculate a straight into the destination register,
3788      -- because the instruction only has two operands (dst := dst `op` src).
3789      -- The troublesome case is when the result of b is in the same register
3790      -- as the destination reg.  In this case, we have to save b in a
3791      -- new temporary across the computation of a.
3792      code dst
3793         | dst `clashesWith` b_op =
3794                 b_code `appOL`
3795                 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
3796                 a_code dst `snocOL`
3797                 instr (OpReg tmp) (OpReg dst)
3798         | otherwise =
3799                 b_code `appOL`
3800                 a_code dst `snocOL`
3801                 instr b_op (OpReg dst)
3802   -- in
3803   return (Any rep code)
3804  where
3805   reg `clashesWith` OpReg reg2   = reg == reg2
3806   reg `clashesWith` OpAddr amode = any (==reg) (addrModeRegs amode)
3807
3808 -----------
3809
3810 trivialUCode rep instr x = do
3811   x_code <- getAnyReg x
3812   let
3813      code dst =
3814         x_code dst `snocOL`
3815         instr (OpReg dst)
3816   -- in
3817   return (Any rep code)
3818
3819 -----------
3820
3821 trivialFCode pk instr x y = do
3822   (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
3823   (y_reg, y_code) <- getSomeReg y
3824   let
3825      code dst =
3826         x_code `appOL`
3827         y_code `snocOL`
3828         instr pk x_reg y_reg dst
3829   -- in
3830   return (Any pk code)
3831
3832 -------------
3833
3834 trivialUFCode rep instr x = do
3835   (x_reg, x_code) <- getSomeReg x
3836   let
3837      code dst =
3838         x_code `snocOL`
3839         instr x_reg dst
3840   -- in
3841   return (Any rep code)
3842
3843 #endif /* i386_TARGET_ARCH */
3844
3845 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3846
3847 #if sparc_TARGET_ARCH
3848
3849 trivialCode instr x (StInt y)
3850   | fits13Bits y
3851   = getRegister x               `thenNat` \ register ->
3852     getNewRegNat IntRep         `thenNat` \ tmp ->
3853     let
3854         code = registerCode register tmp
3855         src1 = registerName register tmp
3856         src2 = ImmInt (fromInteger y)
3857         code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
3858     in
3859     return (Any IntRep code__2)
3860
3861 trivialCode instr x y
3862   = getRegister x               `thenNat` \ register1 ->
3863     getRegister y               `thenNat` \ register2 ->
3864     getNewRegNat IntRep         `thenNat` \ tmp1 ->
3865     getNewRegNat IntRep         `thenNat` \ tmp2 ->
3866     let
3867         code1 = registerCode register1 tmp1
3868         src1  = registerName register1 tmp1
3869         code2 = registerCode register2 tmp2
3870         src2  = registerName register2 tmp2
3871         code__2 dst = code1 `appOL` code2 `snocOL`
3872                       instr src1 (RIReg src2) dst
3873     in
3874     return (Any IntRep code__2)
3875
3876 ------------
3877 trivialFCode pk instr x y
3878   = getRegister x               `thenNat` \ register1 ->
3879     getRegister y               `thenNat` \ register2 ->
3880     getNewRegNat (registerRep register1)
3881                                 `thenNat` \ tmp1 ->
3882     getNewRegNat (registerRep register2)
3883                                 `thenNat` \ tmp2 ->
3884     getNewRegNat F64    `thenNat` \ tmp ->
3885     let
3886         promote x = FxTOy F DF x tmp
3887
3888         pk1   = registerRep register1
3889         code1 = registerCode register1 tmp1
3890         src1  = registerName register1 tmp1
3891
3892         pk2   = registerRep register2
3893         code2 = registerCode register2 tmp2
3894         src2  = registerName register2 tmp2
3895
3896         code__2 dst =
3897                 if pk1 == pk2 then
3898                     code1 `appOL` code2 `snocOL`
3899                     instr (primRepToSize pk) src1 src2 dst
3900                 else if pk1 == F32 then
3901                     code1 `snocOL` promote src1 `appOL` code2 `snocOL`
3902                     instr DF tmp src2 dst
3903                 else
3904                     code1 `appOL` code2 `snocOL` promote src2 `snocOL`
3905                     instr DF src1 tmp dst
3906     in
3907     return (Any (if pk1 == pk2 then pk1 else F64) code__2)
3908
3909 ------------
3910 trivialUCode instr x
3911   = getRegister x               `thenNat` \ register ->
3912     getNewRegNat IntRep         `thenNat` \ tmp ->
3913     let
3914         code = registerCode register tmp
3915         src  = registerName register tmp
3916         code__2 dst = code `snocOL` instr (RIReg src) dst
3917     in
3918     return (Any IntRep code__2)
3919
3920 -------------
3921 trivialUFCode pk instr x
3922   = getRegister x               `thenNat` \ register ->
3923     getNewRegNat pk             `thenNat` \ tmp ->
3924     let
3925         code = registerCode register tmp
3926         src  = registerName register tmp
3927         code__2 dst = code `snocOL` instr src dst
3928     in
3929     return (Any pk code__2)
3930
3931 #endif /* sparc_TARGET_ARCH */
3932
3933 #if powerpc_TARGET_ARCH
3934
3935 {-
3936 Wolfgang's PowerPC version of The Rules:
3937
3938 A slightly modified version of The Rules to take advantage of the fact
3939 that PowerPC instructions work on all registers and don't implicitly
3940 clobber any fixed registers.
3941
3942 * The only expression for which getRegister returns Fixed is (CmmReg reg).
3943
3944 * If getRegister returns Any, then the code it generates may modify only:
3945         (a) fresh temporaries
3946         (b) the destination register
3947   It may *not* modify global registers, unless the global
3948   register happens to be the destination register.
3949   It may not clobber any other registers. In fact, only ccalls clobber any
3950   fixed registers.
3951   Also, it may not modify the counter register (used by genCCall).
3952
3953   Corollary: If a getRegister for a subexpression returns Fixed, you need
3954   not move it to a fresh temporary before evaluating the next subexpression.
3955   The Fixed register won't be modified.
3956   Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
3957
3958 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
3959   the value of the destination register.
3960 -}
3961
3962 trivialCode rep signed instr x (CmmLit (CmmInt y _))
3963     | Just imm <- makeImmediate rep signed y
3964     = do
3965         (src1, code1) <- getSomeReg x
3966         let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
3967         return (Any rep code)
3968
3969 trivialCode rep signed instr x y = do
3970     (src1, code1) <- getSomeReg x
3971     (src2, code2) <- getSomeReg y
3972     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
3973     return (Any rep code)
3974
3975 trivialCodeNoImm :: MachRep -> (Reg -> Reg -> Reg -> Instr)
3976     -> CmmExpr -> CmmExpr -> NatM Register
3977 trivialCodeNoImm rep instr x y = do
3978     (src1, code1) <- getSomeReg x
3979     (src2, code2) <- getSomeReg y
3980     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
3981     return (Any rep code)
3982
3983 trivialUCode rep instr x = do
3984     (src, code) <- getSomeReg x
3985     let code' dst = code `snocOL` instr dst src
3986     return (Any rep code')
3987
3988 -- There is no "remainder" instruction on the PPC, so we have to do
3989 -- it the hard way.
3990 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
3991
3992 remainderCode :: MachRep -> (Reg -> Reg -> Reg -> Instr)
3993     -> CmmExpr -> CmmExpr -> NatM Register
3994 remainderCode rep div x y = do
3995     (src1, code1) <- getSomeReg x
3996     (src2, code2) <- getSomeReg y
3997     let code dst = code1 `appOL` code2 `appOL` toOL [
3998                 div dst src1 src2,
3999                 MULLW dst dst (RIReg src2),
4000                 SUBF dst dst src1
4001             ]
4002     return (Any rep code)
4003
4004 #endif /* powerpc_TARGET_ARCH */
4005
4006
4007 -- -----------------------------------------------------------------------------
4008 --  Coercing to/from integer/floating-point...
4009
4010 -- @coerce(Int2FP|FP2Int)@ are more complicated integer/float
4011 -- conversions.  We have to store temporaries in memory to move
4012 -- between the integer and the floating point register sets.
4013
4014 -- @coerceDbl2Flt@ and @coerceFlt2Dbl@ are done this way because we
4015 -- pretend, on sparc at least, that double and float regs are seperate
4016 -- kinds, so the value has to be computed into one kind before being
4017 -- explicitly "converted" to live in the other kind.
4018
4019 coerceInt2FP :: MachRep -> MachRep -> CmmExpr -> NatM Register
4020 coerceFP2Int :: MachRep -> MachRep -> CmmExpr -> NatM Register
4021
4022 #ifdef sparc_TARGET_ARCH
4023 coerceDbl2Flt :: CmmExpr -> NatM Register
4024 coerceFlt2Dbl :: CmmExpr -> NatM Register
4025 #endif
4026
4027 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4028
4029 #if alpha_TARGET_ARCH
4030
4031 coerceInt2FP _ x
4032   = getRegister x               `thenNat` \ register ->
4033     getNewRegNat IntRep         `thenNat` \ reg ->
4034     let
4035         code = registerCode register reg
4036         src  = registerName register reg
4037
4038         code__2 dst = code . mkSeqInstrs [
4039             ST Q src (spRel 0),
4040             LD TF dst (spRel 0),
4041             CVTxy Q TF dst dst]
4042     in
4043     return (Any F64 code__2)
4044
4045 -------------
4046 coerceFP2Int x
4047   = getRegister x               `thenNat` \ register ->
4048     getNewRegNat F64    `thenNat` \ tmp ->
4049     let
4050         code = registerCode register tmp
4051         src  = registerName register tmp
4052
4053         code__2 dst = code . mkSeqInstrs [
4054             CVTxy TF Q src tmp,
4055             ST TF tmp (spRel 0),
4056             LD Q dst (spRel 0)]
4057     in
4058     return (Any IntRep code__2)
4059
4060 #endif /* alpha_TARGET_ARCH */
4061
4062 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4063
4064 #if i386_TARGET_ARCH
4065
4066 coerceInt2FP from to x = do
4067   (x_reg, x_code) <- getSomeReg x
4068   let
4069         opc  = case to of F32 -> GITOF; F64 -> GITOD
4070         code dst = x_code `snocOL` opc x_reg dst
4071         -- ToDo: works for non-I32 reps?
4072   -- in
4073   return (Any to code)
4074
4075 ------------
4076
4077 coerceFP2Int from to x = do
4078   (x_reg, x_code) <- getSomeReg x
4079   let
4080         opc  = case from of F32 -> GFTOI; F64 -> GDTOI
4081         code dst = x_code `snocOL` opc x_reg dst
4082         -- ToDo: works for non-I32 reps?
4083   -- in
4084   return (Any to code)
4085
4086 #endif /* i386_TARGET_ARCH */
4087
4088 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4089
4090 #if sparc_TARGET_ARCH
4091
4092 coerceInt2FP pk x
4093   = getRegister x               `thenNat` \ register ->
4094     getNewRegNat IntRep         `thenNat` \ reg ->
4095     let
4096         code = registerCode register reg
4097         src  = registerName register reg
4098
4099         code__2 dst = code `appOL` toOL [
4100             ST W src (spRel (-2)),
4101             LD W (spRel (-2)) dst,
4102             FxTOy W (primRepToSize pk) dst dst]
4103     in
4104     return (Any pk code__2)
4105
4106 ------------
4107 coerceFP2Int fprep x
4108   = ASSERT(fprep == F64 || fprep == F32)
4109     getRegister x               `thenNat` \ register ->
4110     getNewRegNat fprep          `thenNat` \ reg ->
4111     getNewRegNat F32    `thenNat` \ tmp ->
4112     let
4113         code = registerCode register reg
4114         src  = registerName register reg
4115         code__2 dst = code `appOL` toOL [
4116             FxTOy (primRepToSize fprep) W src tmp,
4117             ST W tmp (spRel (-2)),
4118             LD W (spRel (-2)) dst]
4119     in
4120     return (Any IntRep code__2)
4121
4122 ------------
4123 coerceDbl2Flt x
4124   = getRegister x               `thenNat` \ register ->
4125     getNewRegNat F64    `thenNat` \ tmp ->
4126     let code = registerCode register tmp
4127         src  = registerName register tmp
4128     in
4129         return (Any F32
4130                        (\dst -> code `snocOL` FxTOy DF F src dst))
4131
4132 ------------
4133 coerceFlt2Dbl x
4134   = getRegister x               `thenNat` \ register ->
4135     getNewRegNat F32    `thenNat` \ tmp ->
4136     let code = registerCode register tmp
4137         src  = registerName register tmp
4138     in
4139         return (Any F64
4140                        (\dst -> code `snocOL` FxTOy F DF src dst))
4141
4142 #endif /* sparc_TARGET_ARCH */
4143
4144 #if powerpc_TARGET_ARCH
4145 coerceInt2FP fromRep toRep x = do
4146     (src, code) <- getSomeReg x
4147     lbl <- getNewLabelNat
4148     itmp <- getNewRegNat I32
4149     ftmp <- getNewRegNat F64
4150     let
4151         code' dst = code `appOL` maybe_exts `appOL` toOL [
4152                 LDATA ReadOnlyData
4153                                 [CmmDataLabel lbl,
4154                                  CmmStaticLit (CmmInt 0x43300000 I32),
4155                                  CmmStaticLit (CmmInt 0x80000000 I32)],
4156                 XORIS itmp src (ImmInt 0x8000),
4157                 ST I32 itmp (spRel 3),
4158                 LIS itmp (ImmInt 0x4330),
4159                 ST I32 itmp (spRel 2),
4160                 LD F64 ftmp (spRel 2),
4161                 LIS itmp (HA (ImmCLbl lbl)),
4162                 LD F64 dst (AddrRegImm itmp (LO (ImmCLbl lbl))),
4163                 FSUB F64 dst ftmp dst
4164             ] `appOL` maybe_frsp dst
4165
4166         maybe_exts = case fromRep of
4167                         I8 ->  unitOL $ EXTS I8 src src
4168                         I16 -> unitOL $ EXTS I16 src src
4169                         I32 -> nilOL
4170         maybe_frsp dst = case toRep of
4171                         F32 -> unitOL $ FRSP dst dst
4172                         F64 -> nilOL
4173     return (Any toRep code')
4174
4175 coerceFP2Int fromRep toRep x = do
4176     -- the reps don't really matter: F*->F64 and I32->I* are no-ops
4177     (src, code) <- getSomeReg x
4178     tmp <- getNewRegNat F64
4179     let
4180         code' dst = code `appOL` toOL [
4181                 -- convert to int in FP reg
4182             FCTIWZ tmp src,
4183                 -- store value (64bit) from FP to stack
4184             ST F64 tmp (spRel 2),
4185                 -- read low word of value (high word is undefined)
4186             LD I32 dst (spRel 3)]
4187     return (Any toRep code')
4188 #endif /* powerpc_TARGET_ARCH */
4189
4190
4191 -- -----------------------------------------------------------------------------
4192 -- eXTRA_STK_ARGS_HERE
4193
4194 -- We (allegedly) put the first six C-call arguments in registers;
4195 -- where do we start putting the rest of them?
4196
4197 -- Moved from MachInstrs (SDM):
4198
4199 #if alpha_TARGET_ARCH || sparc_TARGET_ARCH
4200 eXTRA_STK_ARGS_HERE :: Int
4201 eXTRA_STK_ARGS_HERE
4202   = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???))
4203 #endif