ghc/compiler/nativeGen/MachCodeGen.hs

   1 -----------------------------------------------------------------------------
   2 --
   3 -- Generating machine code (instruction selection)
   4 --
   5 -- (c) The University of Glasgow 1996-2004
   6 --
   7 -----------------------------------------------------------------------------
   8
   9 -- This is a big module, but, if you pay attention to
  10 -- (a) the sectioning, (b) the type signatures, and
  11 -- (c) the #if blah_TARGET_ARCH} things, the
  12 -- structure should not be too overwhelming.
  13
  14 module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where
  15
  16 #include "HsVersions.h"
  17 #include "nativeGen/NCG.h"
  18 #include "MachDeps.h"
  19
  20 -- NCG stuff:
  21 import MachInstrs
  22 import MachRegs
  23 import NCGMonad
  24 import PositionIndependentCode ( cmmMakeDynamicReference, initializePicBase )
  25 import RegAllocInfo ( mkBranchInstr )
  26
  27 -- Our intermediate code:
  28 import PprCmm           ( pprExpr )
  29 import Cmm
  30 import MachOp
  31 import CLabel
  32
  33 -- The rest:
  34 import StaticFlags      ( opt_PIC )
  35 import ForeignCall      ( CCallConv(..) )
  36 import OrdList
  37 import Pretty
  38 import Outputable
  39 import FastString
  40 import FastTypes        ( isFastTrue )
  41 import Constants        ( wORD_SIZE )
  42
  43 #ifdef DEBUG
  44 import Outputable       ( assertPanic )
  45 import TRACE            ( trace )
  46 #endif
  47
  48 import Control.Monad    ( mapAndUnzipM )
  49 import Maybe            ( fromJust )
  50 import DATA_BITS
  51 import DATA_WORD
  52
  53 -- -----------------------------------------------------------------------------
  54 -- Top-level of the instruction selector
  55
  56 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
  57 -- They are really trees of insns to facilitate fast appending, where a
  58 -- left-to-right traversal (pre-order?) yields the insns in the correct
  59 -- order.
  60
  61 type InstrBlock = OrdList Instr
  62
  63 cmmTopCodeGen :: CmmTop -> NatM [NatCmmTop]
  64 cmmTopCodeGen (CmmProc info lab params blocks) = do
  65   (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
  66   picBaseMb <- getPicBaseMaybeNat
  67   let proc = CmmProc info lab params (concat nat_blocks)
  68       tops = proc : concat statics
  69   case picBaseMb of
  70       Just picBase -> initializePicBase picBase tops
  71       Nothing -> return tops
  72
  73 cmmTopCodeGen (CmmData sec dat) = do
  74   return [CmmData sec dat]  -- no translation, we just use CmmStatic
  75
  76 basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop])
  77 basicBlockCodeGen (BasicBlock id stmts) = do
  78   instrs <- stmtsToInstrs stmts
  79   -- code generation may introduce new basic block boundaries, which
  80   -- are indicated by the NEWBLOCK instruction.  We must split up the
  81   -- instruction stream into basic blocks again.  Also, we extract
  82   -- LDATAs here too.
  83   let
  84         (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
  85
  86         mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
  87           = ([], BasicBlock id instrs : blocks, statics)
  88         mkBlocks (LDATA sec dat) (instrs,blocks,statics)
  89           = (instrs, blocks, CmmData sec dat:statics)
  90         mkBlocks instr (instrs,blocks,statics)
  91           = (instr:instrs, blocks, statics)
  92   -- in
  93   return (BasicBlock id top : other_blocks, statics)
  94
  95 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
  96 stmtsToInstrs stmts
  97    = do instrss <- mapM stmtToInstrs stmts
  98         return (concatOL instrss)
  99
 100 stmtToInstrs :: CmmStmt -> NatM InstrBlock
 101 stmtToInstrs stmt = case stmt of
 102     CmmNop         -> return nilOL
 103     CmmComment s   -> return (unitOL (COMMENT s))
 104
 105     CmmAssign reg src
 106       | isFloatingRep kind -> assignReg_FltCode kind reg src
 107 #if WORD_SIZE_IN_BITS==32
 108       | kind == I64        -> assignReg_I64Code      reg src
 109 #endif
 110       | otherwise          -> assignReg_IntCode kind reg src
 111         where kind = cmmRegRep reg
 112
 113     CmmStore addr src
 114       | isFloatingRep kind -> assignMem_FltCode kind addr src
 115 #if WORD_SIZE_IN_BITS==32
 116       | kind == I64      -> assignMem_I64Code      addr src
 117 #endif
 118       | otherwise        -> assignMem_IntCode kind addr src
 119         where kind = cmmExprRep src
 120
 121     CmmCall target result_regs args vols
 122        -> genCCall target result_regs args vols
 123
 124     CmmBranch id          -> genBranch id
 125     CmmCondBranch arg id  -> genCondJump id arg
 126     CmmSwitch arg ids     -> genSwitch arg ids
 127     CmmJump arg params    -> genJump arg
 128
 129 -- -----------------------------------------------------------------------------
 130 -- General things for putting together code sequences
 131
 132 -- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
 133 -- CmmExprs into CmmRegOff?
 134 mangleIndexTree :: CmmExpr -> CmmExpr
 135 mangleIndexTree (CmmRegOff reg off)
 136   = CmmMachOp (MO_Add rep) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) rep)]
 137   where rep = cmmRegRep reg
 138
 139 -- -----------------------------------------------------------------------------
 140 --  Code gen for 64-bit arithmetic on 32-bit platforms
 141
 142 {-
 143 Simple support for generating 64-bit code (ie, 64 bit values and 64
 144 bit assignments) on 32-bit platforms.  Unlike the main code generator
 145 we merely shoot for generating working code as simply as possible, and
 146 pay little attention to code quality.  Specifically, there is no
 147 attempt to deal cleverly with the fixed-vs-floating register
 148 distinction; all values are generated into (pairs of) floating
 149 registers, even if this would mean some redundant reg-reg moves as a
 150 result.  Only one of the VRegUniques is returned, since it will be
 151 of the VRegUniqueLo form, and the upper-half VReg can be determined
 152 by applying getHiVRegFromLo to it.
 153 -}
 154
 155 data ChildCode64        -- a.k.a "Register64"
 156    = ChildCode64
 157         InstrBlock      -- code
 158         Reg             -- the lower 32-bit temporary which contains the
 159                         -- result; use getHiVRegFromLo to find the other
 160                         -- VRegUnique.  Rules of this simplified insn
 161                         -- selection game are therefore that the returned
 162                         -- Reg may be modified
 163
 164 #if WORD_SIZE_IN_BITS==32
 165 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
 166 assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
 167 #endif
 168
 169 #ifndef x86_64_TARGET_ARCH
 170 iselExpr64        :: CmmExpr -> NatM ChildCode64
 171 #endif
 172
 173 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 174
 175 #if i386_TARGET_ARCH
 176
 177 assignMem_I64Code addrTree valueTree = do
 178   Amode addr addr_code <- getAmode addrTree
 179   ChildCode64 vcode rlo <- iselExpr64 valueTree
 180   let
 181         rhi = getHiVRegFromLo rlo
 182
 183         -- Little-endian store
 184         mov_lo = MOV I32 (OpReg rlo) (OpAddr addr)
 185         mov_hi = MOV I32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
 186   -- in
 187   return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 188
 189
 190 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
 191    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 192    let
 193          r_dst_lo = mkVReg u_dst I32
 194          r_dst_hi = getHiVRegFromLo r_dst_lo
 195          r_src_hi = getHiVRegFromLo r_src_lo
 196          mov_lo = MOV I32 (OpReg r_src_lo) (OpReg r_dst_lo)
 197          mov_hi = MOV I32 (OpReg r_src_hi) (OpReg r_dst_hi)
 198    -- in
 199    return (
 200         vcode `snocOL` mov_lo `snocOL` mov_hi
 201      )
 202
 203 assignReg_I64Code lvalue valueTree
 204    = panic "assignReg_I64Code(i386): invalid lvalue"
 205
 206 ------------
 207
 208 iselExpr64 (CmmLit (CmmInt i _)) = do
 209   (rlo,rhi) <- getNewRegPairNat I32
 210   let
 211         r = fromIntegral (fromIntegral i :: Word32)
 212         q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
 213         code = toOL [
 214                 MOV I32 (OpImm (ImmInteger r)) (OpReg rlo),
 215                 MOV I32 (OpImm (ImmInteger q)) (OpReg rhi)
 216                 ]
 217   -- in
 218   return (ChildCode64 code rlo)
 219
 220 iselExpr64 (CmmLoad addrTree I64) = do
 221    Amode addr addr_code <- getAmode addrTree
 222    (rlo,rhi) <- getNewRegPairNat I32
 223    let
 224         mov_lo = MOV I32 (OpAddr addr) (OpReg rlo)
 225         mov_hi = MOV I32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
 226    -- in
 227    return (
 228             ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
 229                         rlo
 230      )
 231
 232 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64)))
 233    = return (ChildCode64 nilOL (mkVReg vu I32))
 234
 235 -- we handle addition, but rather badly
 236 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
 237    ChildCode64 code1 r1lo <- iselExpr64 e1
 238    (rlo,rhi) <- getNewRegPairNat I32
 239    let
 240         r = fromIntegral (fromIntegral i :: Word32)
 241         q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
 242         r1hi = getHiVRegFromLo r1lo
 243         code =  code1 `appOL`
 244                 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
 245                        ADD I32 (OpImm (ImmInteger r)) (OpReg rlo),
 246                        MOV I32 (OpReg r1hi) (OpReg rhi),
 247                        ADC I32 (OpImm (ImmInteger q)) (OpReg rhi) ]
 248    -- in
 249    return (ChildCode64 code rlo)
 250
 251 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
 252    ChildCode64 code1 r1lo <- iselExpr64 e1
 253    ChildCode64 code2 r2lo <- iselExpr64 e2
 254    (rlo,rhi) <- getNewRegPairNat I32
 255    let
 256         r1hi = getHiVRegFromLo r1lo
 257         r2hi = getHiVRegFromLo r2lo
 258         code =  code1 `appOL`
 259                 code2 `appOL`
 260                 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
 261                        ADD I32 (OpReg r2lo) (OpReg rlo),
 262                        MOV I32 (OpReg r1hi) (OpReg rhi),
 263                        ADC I32 (OpReg r2hi) (OpReg rhi) ]
 264    -- in
 265    return (ChildCode64 code rlo)
 266
 267 iselExpr64 expr
 268    = pprPanic "iselExpr64(i386)" (ppr expr)
 269
 270 #endif /* i386_TARGET_ARCH */
 271
 272 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 273
 274 #if sparc_TARGET_ARCH
 275
 276 assignMem_I64Code addrTree valueTree = do
 277      Amode addr addr_code <- getAmode addrTree
 278      ChildCode64 vcode rlo <- iselExpr64 valueTree
 279      (src, code) <- getSomeReg addrTree
 280      let
 281          rhi = getHiVRegFromLo rlo
 282          -- Big-endian store
 283          mov_hi = ST I32 rhi (AddrRegImm src (ImmInt 0))
 284          mov_lo = ST I32 rlo (AddrRegImm src (ImmInt 4))
 285      return (vcode `appOL` code `snocOL` mov_hi `snocOL` mov_lo)
 286
 287 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
 288      ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 289      let
 290          r_dst_lo = mkVReg u_dst pk
 291          r_dst_hi = getHiVRegFromLo r_dst_lo
 292          r_src_hi = getHiVRegFromLo r_src_lo
 293          mov_lo = mkMOV r_src_lo r_dst_lo
 294          mov_hi = mkMOV r_src_hi r_dst_hi
 295          mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
 296      return (vcode `snocOL` mov_hi `snocOL` mov_lo)
 297 assignReg_I64Code lvalue valueTree
 298    = panic "assignReg_I64Code(sparc): invalid lvalue"
 299
 300
 301 -- Don't delete this -- it's very handy for debugging.
 302 --iselExpr64 expr
 303 --   | trace ("iselExpr64: " ++ showSDoc (ppr expr)) False
 304 --   = panic "iselExpr64(???)"
 305
 306 iselExpr64 (CmmLoad addrTree I64) = do
 307      Amode (AddrRegReg r1 r2) addr_code <- getAmode addrTree
 308      rlo <- getNewRegNat I32
 309      let rhi = getHiVRegFromLo rlo
 310          mov_hi = LD I32 (AddrRegImm r1 (ImmInt 0)) rhi
 311          mov_lo = LD I32 (AddrRegImm r1 (ImmInt 4)) rlo
 312      return (
 313             ChildCode64 (addr_code `snocOL` mov_hi `snocOL` mov_lo)
 314                          rlo
 315           )
 316
 317 iselExpr64 (CmmReg (CmmLocal (LocalReg uq I64))) = do
 318      r_dst_lo <-  getNewRegNat I32
 319      let r_dst_hi = getHiVRegFromLo r_dst_lo
 320          r_src_lo = mkVReg uq I32
 321          r_src_hi = getHiVRegFromLo r_src_lo
 322          mov_lo = mkMOV r_src_lo r_dst_lo
 323          mov_hi = mkMOV r_src_hi r_dst_hi
 324          mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
 325      return (
 326             ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo
 327          )
 328
 329 iselExpr64 expr
 330    = pprPanic "iselExpr64(sparc)" (ppr expr)
 331
 332 #endif /* sparc_TARGET_ARCH */
 333
 334 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 335
 336 #if powerpc_TARGET_ARCH
 337
 338 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
 339 getI64Amodes addrTree = do
 340     Amode hi_addr addr_code <- getAmode addrTree
 341     case addrOffset hi_addr 4 of
 342         Just lo_addr -> return (hi_addr, lo_addr, addr_code)
 343         Nothing      -> do (hi_ptr, code) <- getSomeReg addrTree
 344                            return (AddrRegImm hi_ptr (ImmInt 0),
 345                                    AddrRegImm hi_ptr (ImmInt 4),
 346                                    code)
 347
 348 assignMem_I64Code addrTree valueTree = do
 349         (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
 350         ChildCode64 vcode rlo <- iselExpr64 valueTree
 351         let
 352                 rhi = getHiVRegFromLo rlo
 353
 354                 -- Big-endian store
 355                 mov_hi = ST I32 rhi hi_addr
 356                 mov_lo = ST I32 rlo lo_addr
 357         -- in
 358         return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 359
 360 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
 361    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 362    let
 363          r_dst_lo = mkVReg u_dst I32
 364          r_dst_hi = getHiVRegFromLo r_dst_lo
 365          r_src_hi = getHiVRegFromLo r_src_lo
 366          mov_lo = MR r_dst_lo r_src_lo
 367          mov_hi = MR r_dst_hi r_src_hi
 368    -- in
 369    return (
 370         vcode `snocOL` mov_lo `snocOL` mov_hi
 371      )
 372
 373 assignReg_I64Code lvalue valueTree
 374    = panic "assignReg_I64Code(powerpc): invalid lvalue"
 375
 376
 377 -- Don't delete this -- it's very handy for debugging.
 378 --iselExpr64 expr
 379 --   | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
 380 --   = panic "iselExpr64(???)"
 381
 382 iselExpr64 (CmmLoad addrTree I64) = do
 383     (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
 384     (rlo, rhi) <- getNewRegPairNat I32
 385     let mov_hi = LD I32 rhi hi_addr
 386         mov_lo = LD I32 rlo lo_addr
 387     return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
 388                          rlo
 389
 390 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64)))
 391    = return (ChildCode64 nilOL (mkVReg vu I32))
 392
 393 iselExpr64 (CmmLit (CmmInt i _)) = do
 394   (rlo,rhi) <- getNewRegPairNat I32
 395   let
 396         half0 = fromIntegral (fromIntegral i :: Word16)
 397         half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
 398         half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
 399         half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
 400
 401         code = toOL [
 402                 LIS rlo (ImmInt half1),
 403                 OR rlo rlo (RIImm $ ImmInt half0),
 404                 LIS rhi (ImmInt half3),
 405                 OR rlo rlo (RIImm $ ImmInt half2)
 406                 ]
 407   -- in
 408   return (ChildCode64 code rlo)
 409
 410 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
 411    ChildCode64 code1 r1lo <- iselExpr64 e1
 412    ChildCode64 code2 r2lo <- iselExpr64 e2
 413    (rlo,rhi) <- getNewRegPairNat I32
 414    let
 415         r1hi = getHiVRegFromLo r1lo
 416         r2hi = getHiVRegFromLo r2lo
 417         code =  code1 `appOL`
 418                 code2 `appOL`
 419                 toOL [ ADDC rlo r1lo r2lo,
 420                        ADDE rhi r1hi r2hi ]
 421    -- in
 422    return (ChildCode64 code rlo)
 423
 424 iselExpr64 expr
 425    = pprPanic "iselExpr64(powerpc)" (ppr expr)
 426
 427 #endif /* powerpc_TARGET_ARCH */
 428
 429
 430 -- -----------------------------------------------------------------------------
 431 -- The 'Register' type
 432
 433 -- 'Register's passed up the tree.  If the stix code forces the register
 434 -- to live in a pre-decided machine register, it comes out as @Fixed@;
 435 -- otherwise, it comes out as @Any@, and the parent can decide which
 436 -- register to put it in.
 437
 438 data Register
 439   = Fixed   MachRep Reg InstrBlock
 440   | Any     MachRep (Reg -> InstrBlock)
 441
 442 swizzleRegisterRep :: Register -> MachRep -> Register
 443 swizzleRegisterRep (Fixed _ reg code) rep = Fixed rep reg code
 444 swizzleRegisterRep (Any _ codefn)     rep = Any rep codefn
 445
 446
 447 -- -----------------------------------------------------------------------------
 448 -- Utils based on getRegister, below
 449
 450 -- The dual to getAnyReg: compute an expression into a register, but
 451 -- we don't mind which one it is.
 452 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
 453 getSomeReg expr = do
 454   r <- getRegister expr
 455   case r of
 456     Any rep code -> do
 457         tmp <- getNewRegNat rep
 458         return (tmp, code tmp)
 459     Fixed _ reg code ->
 460         return (reg, code)
 461
 462 -- -----------------------------------------------------------------------------
 463 -- Grab the Reg for a CmmReg
 464
 465 getRegisterReg :: CmmReg -> Reg
 466
 467 getRegisterReg (CmmLocal (LocalReg u pk))
 468   = mkVReg u pk
 469
 470 getRegisterReg (CmmGlobal mid)
 471   = case get_GlobalReg_reg_or_addr mid of
 472        Left (RealReg rrno) -> RealReg rrno
 473        _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
 474           -- By this stage, the only MagicIds remaining should be the
 475           -- ones which map to a real machine register on this
 476           -- platform.  Hence ...
 477
 478
 479 -- -----------------------------------------------------------------------------
 480 -- Generate code to get a subtree into a Register
 481
 482 -- Don't delete this -- it's very handy for debugging.
 483 --getRegister expr
 484 --   | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False
 485 --   = panic "getRegister(???)"
 486
 487 getRegister :: CmmExpr -> NatM Register
 488
 489 getRegister (CmmReg (CmmGlobal PicBaseReg))
 490   = do
 491       reg <- getPicBaseNat wordRep
 492       return (Fixed wordRep reg nilOL)
 493
 494 getRegister (CmmReg reg)
 495   = return (Fixed (cmmRegRep reg) (getRegisterReg reg) nilOL)
 496
 497 getRegister tree@(CmmRegOff _ _)
 498   = getRegister (mangleIndexTree tree)
 499
 500 -- end of machine-"independent" bit; here we go on the rest...
 501
 502 #if alpha_TARGET_ARCH
 503
 504 getRegister (StDouble d)
 505   = getBlockIdNat                   `thenNat` \ lbl ->
 506     getNewRegNat PtrRep             `thenNat` \ tmp ->
 507     let code dst = mkSeqInstrs [
 508             LDATA RoDataSegment lbl [
 509                     DATA TF [ImmLab (rational d)]
 510                 ],
 511             LDA tmp (AddrImm (ImmCLbl lbl)),
 512             LD TF dst (AddrReg tmp)]
 513     in
 514         return (Any F64 code)
 515
 516 getRegister (StPrim primop [x]) -- unary PrimOps
 517   = case primop of
 518       IntNegOp -> trivialUCode (NEG Q False) x
 519
 520       NotOp    -> trivialUCode NOT x
 521
 522       FloatNegOp  -> trivialUFCode FloatRep  (FNEG TF) x
 523       DoubleNegOp -> trivialUFCode F64 (FNEG TF) x
 524
 525       OrdOp -> coerceIntCode IntRep x
 526       ChrOp -> chrCode x
 527
 528       Float2IntOp  -> coerceFP2Int    x
 529       Int2FloatOp  -> coerceInt2FP pr x
 530       Double2IntOp -> coerceFP2Int    x
 531       Int2DoubleOp -> coerceInt2FP pr x
 532
 533       Double2FloatOp -> coerceFltCode x
 534       Float2DoubleOp -> coerceFltCode x
 535
 536       other_op -> getRegister (StCall fn CCallConv F64 [x])
 537         where
 538           fn = case other_op of
 539                  FloatExpOp    -> FSLIT("exp")
 540                  FloatLogOp    -> FSLIT("log")
 541                  FloatSqrtOp   -> FSLIT("sqrt")
 542                  FloatSinOp    -> FSLIT("sin")
 543                  FloatCosOp    -> FSLIT("cos")
 544                  FloatTanOp    -> FSLIT("tan")
 545                  FloatAsinOp   -> FSLIT("asin")
 546                  FloatAcosOp   -> FSLIT("acos")
 547                  FloatAtanOp   -> FSLIT("atan")
 548                  FloatSinhOp   -> FSLIT("sinh")
 549                  FloatCoshOp   -> FSLIT("cosh")
 550                  FloatTanhOp   -> FSLIT("tanh")
 551                  DoubleExpOp   -> FSLIT("exp")
 552                  DoubleLogOp   -> FSLIT("log")
 553                  DoubleSqrtOp  -> FSLIT("sqrt")
 554                  DoubleSinOp   -> FSLIT("sin")
 555                  DoubleCosOp   -> FSLIT("cos")
 556                  DoubleTanOp   -> FSLIT("tan")
 557                  DoubleAsinOp  -> FSLIT("asin")
 558                  DoubleAcosOp  -> FSLIT("acos")
 559                  DoubleAtanOp  -> FSLIT("atan")
 560                  DoubleSinhOp  -> FSLIT("sinh")
 561                  DoubleCoshOp  -> FSLIT("cosh")
 562                  DoubleTanhOp  -> FSLIT("tanh")
 563   where
 564     pr = panic "MachCode.getRegister: no primrep needed for Alpha"
 565
 566 getRegister (StPrim primop [x, y]) -- dyadic PrimOps
 567   = case primop of
 568       CharGtOp -> trivialCode (CMP LTT) y x
 569       CharGeOp -> trivialCode (CMP LE) y x
 570       CharEqOp -> trivialCode (CMP EQQ) x y
 571       CharNeOp -> int_NE_code x y
 572       CharLtOp -> trivialCode (CMP LTT) x y
 573       CharLeOp -> trivialCode (CMP LE) x y
 574
 575       IntGtOp  -> trivialCode (CMP LTT) y x
 576       IntGeOp  -> trivialCode (CMP LE) y x
 577       IntEqOp  -> trivialCode (CMP EQQ) x y
 578       IntNeOp  -> int_NE_code x y
 579       IntLtOp  -> trivialCode (CMP LTT) x y
 580       IntLeOp  -> trivialCode (CMP LE) x y
 581
 582       WordGtOp -> trivialCode (CMP ULT) y x
 583       WordGeOp -> trivialCode (CMP ULE) x y
 584       WordEqOp -> trivialCode (CMP EQQ)  x y
 585       WordNeOp -> int_NE_code x y
 586       WordLtOp -> trivialCode (CMP ULT) x y
 587       WordLeOp -> trivialCode (CMP ULE) x y
 588
 589       AddrGtOp -> trivialCode (CMP ULT) y x
 590       AddrGeOp -> trivialCode (CMP ULE) y x
 591       AddrEqOp -> trivialCode (CMP EQQ)  x y
 592       AddrNeOp -> int_NE_code x y
 593       AddrLtOp -> trivialCode (CMP ULT) x y
 594       AddrLeOp -> trivialCode (CMP ULE) x y
 595
 596       FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
 597       FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
 598       FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
 599       FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
 600       FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
 601       FloatLeOp -> cmpF_code (FCMP TF LE) NE x y
 602
 603       DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
 604       DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
 605       DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
 606       DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
 607       DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
 608       DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y
 609
 610       IntAddOp  -> trivialCode (ADD Q False) x y
 611       IntSubOp  -> trivialCode (SUB Q False) x y
 612       IntMulOp  -> trivialCode (MUL Q False) x y
 613       IntQuotOp -> trivialCode (DIV Q False) x y
 614       IntRemOp  -> trivialCode (REM Q False) x y
 615
 616       WordAddOp  -> trivialCode (ADD Q False) x y
 617       WordSubOp  -> trivialCode (SUB Q False) x y
 618       WordMulOp  -> trivialCode (MUL Q False) x y
 619       WordQuotOp -> trivialCode (DIV Q True) x y
 620       WordRemOp  -> trivialCode (REM Q True) x y
 621
 622       FloatAddOp -> trivialFCode  FloatRep (FADD TF) x y
 623       FloatSubOp -> trivialFCode  FloatRep (FSUB TF) x y
 624       FloatMulOp -> trivialFCode  FloatRep (FMUL TF) x y
 625       FloatDivOp -> trivialFCode  FloatRep (FDIV TF) x y
 626
 627       DoubleAddOp -> trivialFCode  F64 (FADD TF) x y
 628       DoubleSubOp -> trivialFCode  F64 (FSUB TF) x y
 629       DoubleMulOp -> trivialFCode  F64 (FMUL TF) x y
 630       DoubleDivOp -> trivialFCode  F64 (FDIV TF) x y
 631
 632       AddrAddOp  -> trivialCode (ADD Q False) x y
 633       AddrSubOp  -> trivialCode (SUB Q False) x y
 634       AddrRemOp  -> trivialCode (REM Q True) x y
 635
 636       AndOp  -> trivialCode AND x y
 637       OrOp   -> trivialCode OR  x y
 638       XorOp  -> trivialCode XOR x y
 639       SllOp  -> trivialCode SLL x y
 640       SrlOp  -> trivialCode SRL x y
 641
 642       ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
 643       ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
 644       ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"
 645
 646       FloatPowerOp  -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
 647       DoublePowerOp -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
 648   where
 649     {- ------------------------------------------------------------
 650         Some bizarre special code for getting condition codes into
 651         registers.  Integer non-equality is a test for equality
 652         followed by an XOR with 1.  (Integer comparisons always set
 653         the result register to 0 or 1.)  Floating point comparisons of
 654         any kind leave the result in a floating point register, so we
 655         need to wrangle an integer register out of things.
 656     -}
 657     int_NE_code :: StixTree -> StixTree -> NatM Register
 658
 659     int_NE_code x y
 660       = trivialCode (CMP EQQ) x y       `thenNat` \ register ->
 661         getNewRegNat IntRep             `thenNat` \ tmp ->
 662         let
 663             code = registerCode register tmp
 664             src  = registerName register tmp
 665             code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
 666         in
 667         return (Any IntRep code__2)
 668
 669     {- ------------------------------------------------------------
 670         Comments for int_NE_code also apply to cmpF_code
 671     -}
 672     cmpF_code
 673         :: (Reg -> Reg -> Reg -> Instr)
 674         -> Cond
 675         -> StixTree -> StixTree
 676         -> NatM Register
 677
 678     cmpF_code instr cond x y
 679       = trivialFCode pr instr x y       `thenNat` \ register ->
 680         getNewRegNat F64                `thenNat` \ tmp ->
 681         getBlockIdNat                   `thenNat` \ lbl ->
 682         let
 683             code = registerCode register tmp
 684             result  = registerName register tmp
 685
 686             code__2 dst = code . mkSeqInstrs [
 687                 OR zeroh (RIImm (ImmInt 1)) dst,
 688                 BF cond  result (ImmCLbl lbl),
 689                 OR zeroh (RIReg zeroh) dst,
 690                 NEWBLOCK lbl]
 691         in
 692         return (Any IntRep code__2)
 693       where
 694         pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
 695       ------------------------------------------------------------
 696
 697 getRegister (CmmLoad pk mem)
 698   = getAmode mem                    `thenNat` \ amode ->
 699     let
 700         code = amodeCode amode
 701         src   = amodeAddr amode
 702         size = primRepToSize pk
 703         code__2 dst = code . mkSeqInstr (LD size dst src)
 704     in
 705     return (Any pk code__2)
 706
 707 getRegister (StInt i)
 708   | fits8Bits i
 709   = let
 710         code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
 711     in
 712     return (Any IntRep code)
 713   | otherwise
 714   = let
 715         code dst = mkSeqInstr (LDI Q dst src)
 716     in
 717     return (Any IntRep code)
 718   where
 719     src = ImmInt (fromInteger i)
 720
 721 getRegister leaf
 722   | isJust imm
 723   = let
 724         code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
 725     in
 726     return (Any PtrRep code)
 727   where
 728     imm = maybeImm leaf
 729     imm__2 = case imm of Just x -> x
 730
 731 #endif /* alpha_TARGET_ARCH */
 732
 733 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 734
 735 #if i386_TARGET_ARCH
 736
 737 getRegister (CmmLit (CmmFloat f F32)) = do
 738     lbl <- getNewLabelNat
 739     dynRef <- cmmMakeDynamicReference addImportNat False lbl
 740     Amode addr addr_code <- getAmode dynRef
 741     let code dst =
 742             LDATA ReadOnlyData
 743                         [CmmDataLabel lbl,
 744                          CmmStaticLit (CmmFloat f F32)]
 745             `consOL` (addr_code `snocOL`
 746             GLD F32 addr dst)
 747     -- in
 748     return (Any F32 code)
 749
 750
 751 getRegister (CmmLit (CmmFloat d F64))
 752   | d == 0.0
 753   = let code dst = unitOL (GLDZ dst)
 754     in  return (Any F64 code)
 755
 756   | d == 1.0
 757   = let code dst = unitOL (GLD1 dst)
 758     in  return (Any F64 code)
 759
 760   | otherwise = do
 761     lbl <- getNewLabelNat
 762     dynRef <- cmmMakeDynamicReference addImportNat False lbl
 763     Amode addr addr_code <- getAmode dynRef
 764     let code dst =
 765             LDATA ReadOnlyData
 766                         [CmmDataLabel lbl,
 767                          CmmStaticLit (CmmFloat d F64)]
 768             `consOL` (addr_code `snocOL`
 769             GLD F64 addr dst)
 770     -- in
 771     return (Any F64 code)
 772
 773 #endif /* i386_TARGET_ARCH */
 774
 775 #if x86_64_TARGET_ARCH
 776
 777 getRegister (CmmLit (CmmFloat 0.0 rep)) = do
 778    let code dst = unitOL  (XOR rep (OpReg dst) (OpReg dst))
 779         -- I don't know why there are xorpd, xorps, and pxor instructions.
 780         -- They all appear to do the same thing --SDM
 781    return (Any rep code)
 782
 783 getRegister (CmmLit (CmmFloat f rep)) = do
 784     lbl <- getNewLabelNat
 785     let code dst = toOL [
 786             LDATA ReadOnlyData
 787                         [CmmDataLabel lbl,
 788                          CmmStaticLit (CmmFloat f rep)],
 789             MOV rep (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 790             ]
 791     -- in
 792     return (Any rep code)
 793
 794 #endif /* x86_64_TARGET_ARCH */
 795
 796 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
 797
 798 -- catch simple cases of zero- or sign-extended load
 799 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad addr _]) = do
 800   code <- intLoadCode (MOVZxL I8) addr
 801   return (Any I32 code)
 802
 803 getRegister (CmmMachOp (MO_S_Conv I8 I32) [CmmLoad addr _]) = do
 804   code <- intLoadCode (MOVSxL I8) addr
 805   return (Any I32 code)
 806
 807 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad addr _]) = do
 808   code <- intLoadCode (MOVZxL I16) addr
 809   return (Any I32 code)
 810
 811 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad addr _]) = do
 812   code <- intLoadCode (MOVSxL I16) addr
 813   return (Any I32 code)
 814
 815 #endif
 816
 817 #if x86_64_TARGET_ARCH
 818
 819 -- catch simple cases of zero- or sign-extended load
 820 getRegister (CmmMachOp (MO_U_Conv I8 I64) [CmmLoad addr _]) = do
 821   code <- intLoadCode (MOVZxL I8) addr
 822   return (Any I64 code)
 823
 824 getRegister (CmmMachOp (MO_S_Conv I8 I64) [CmmLoad addr _]) = do
 825   code <- intLoadCode (MOVSxL I8) addr
 826   return (Any I64 code)
 827
 828 getRegister (CmmMachOp (MO_U_Conv I16 I64) [CmmLoad addr _]) = do
 829   code <- intLoadCode (MOVZxL I16) addr
 830   return (Any I64 code)
 831
 832 getRegister (CmmMachOp (MO_S_Conv I16 I64) [CmmLoad addr _]) = do
 833   code <- intLoadCode (MOVSxL I16) addr
 834   return (Any I64 code)
 835
 836 getRegister (CmmMachOp (MO_U_Conv I32 I64) [CmmLoad addr _]) = do
 837   code <- intLoadCode (MOV I32) addr -- 32-bit loads zero-extend
 838   return (Any I64 code)
 839
 840 getRegister (CmmMachOp (MO_S_Conv I32 I64) [CmmLoad addr _]) = do
 841   code <- intLoadCode (MOVSxL I32) addr
 842   return (Any I64 code)
 843
 844 #endif
 845
 846 #if x86_64_TARGET_ARCH
 847 getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do
 848   x_code <- getAnyReg x
 849   lbl <- getNewLabelNat
 850   let
 851     code dst = x_code dst `appOL` toOL [
 852         -- This is how gcc does it, so it can't be that bad:
 853         LDATA ReadOnlyData16 [
 854                 CmmAlign 16,
 855                 CmmDataLabel lbl,
 856                 CmmStaticLit (CmmInt 0x80000000 I32),
 857                 CmmStaticLit (CmmInt 0 I32),
 858                 CmmStaticLit (CmmInt 0 I32),
 859                 CmmStaticLit (CmmInt 0 I32)
 860         ],
 861         XOR F32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 862                 -- xorps, so we need the 128-bit constant
 863                 -- ToDo: rip-relative
 864         ]
 865   --
 866   return (Any F32 code)
 867
 868 getRegister (CmmMachOp (MO_S_Neg F64) [x]) = do
 869   x_code <- getAnyReg x
 870   lbl <- getNewLabelNat
 871   let
 872         -- This is how gcc does it, so it can't be that bad:
 873     code dst = x_code dst `appOL` toOL [
 874         LDATA ReadOnlyData16 [
 875                 CmmAlign 16,
 876                 CmmDataLabel lbl,
 877                 CmmStaticLit (CmmInt 0x8000000000000000 I64),
 878                 CmmStaticLit (CmmInt 0 I64)
 879         ],
 880                 -- gcc puts an unpck here.  Wonder if we need it.
 881         XOR F64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 882                 -- xorpd, so we need the 128-bit constant
 883         ]
 884   --
 885   return (Any F64 code)
 886 #endif
 887
 888 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
 889
 890 getRegister (CmmMachOp mop [x]) -- unary MachOps
 891   = case mop of
 892 #if i386_TARGET_ARCH
 893       MO_S_Neg F32 -> trivialUFCode F32 (GNEG F32) x
 894       MO_S_Neg F64 -> trivialUFCode F64 (GNEG F64) x
 895 #endif
 896
 897       MO_S_Neg rep -> trivialUCode rep (NEGI rep) x
 898       MO_Not rep   -> trivialUCode rep (NOT  rep) x
 899
 900       -- Nop conversions
 901       -- TODO: these are only nops if the arg is not a fixed register that
 902       -- can't be byte-addressed.
 903       MO_U_Conv I32 I8  -> conversionNop I32 x
 904       MO_S_Conv I32 I8  -> conversionNop I32 x
 905       MO_U_Conv I16 I8  -> conversionNop I16 x
 906       MO_S_Conv I16 I8  -> conversionNop I16 x
 907       MO_U_Conv I32 I16 -> conversionNop I32 x
 908       MO_S_Conv I32 I16 -> conversionNop I32 x
 909 #if x86_64_TARGET_ARCH
 910       MO_U_Conv I64 I32 -> conversionNop I64 x
 911       MO_S_Conv I64 I32 -> conversionNop I64 x
 912       MO_U_Conv I64 I16 -> conversionNop I64 x
 913       MO_S_Conv I64 I16 -> conversionNop I64 x
 914       MO_U_Conv I64 I8  -> conversionNop I64 x
 915       MO_S_Conv I64 I8  -> conversionNop I64 x
 916 #endif
 917
 918       MO_U_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
 919       MO_S_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
 920
 921       -- widenings
 922       MO_U_Conv I8  I32 -> integerExtend I8  I32 MOVZxL x
 923       MO_U_Conv I16 I32 -> integerExtend I16 I32 MOVZxL x
 924       MO_U_Conv I8  I16 -> integerExtend I8  I16 MOVZxL x
 925
 926       MO_S_Conv I8  I32 -> integerExtend I8  I32 MOVSxL x
 927       MO_S_Conv I16 I32 -> integerExtend I16 I32 MOVSxL x
 928       MO_S_Conv I8  I16 -> integerExtend I8  I16 MOVSxL x
 929
 930 #if x86_64_TARGET_ARCH
 931       MO_U_Conv I8  I64 -> integerExtend I8  I64 MOVZxL x
 932       MO_U_Conv I16 I64 -> integerExtend I16 I64 MOVZxL x
 933       MO_U_Conv I32 I64 -> integerExtend I32 I64 MOVZxL x
 934       MO_S_Conv I8  I64 -> integerExtend I8  I64 MOVSxL x
 935       MO_S_Conv I16 I64 -> integerExtend I16 I64 MOVSxL x
 936       MO_S_Conv I32 I64 -> integerExtend I32 I64 MOVSxL x
 937         -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
 938         -- However, we don't want the register allocator to throw it
 939         -- away as an unnecessary reg-to-reg move, so we keep it in
 940         -- the form of a movzl and print it as a movl later.
 941 #endif
 942
 943 #if i386_TARGET_ARCH
 944       MO_S_Conv F32 F64 -> conversionNop F64 x
 945       MO_S_Conv F64 F32 -> conversionNop F32 x
 946 #else
 947       MO_S_Conv F32 F64 -> coerceFP2FP F64 x
 948       MO_S_Conv F64 F32 -> coerceFP2FP F32 x
 949 #endif
 950
 951       MO_S_Conv from to
 952         | isFloatingRep from -> coerceFP2Int from to x
 953         | isFloatingRep to   -> coerceInt2FP from to x
 954
 955       other -> pprPanic "getRegister" (pprMachOp mop)
 956    where
 957         -- signed or unsigned extension.
 958         integerExtend from to instr expr = do
 959             (reg,e_code) <- if from == I8 then getByteReg expr
 960                                           else getSomeReg expr
 961             let
 962                 code dst =
 963                   e_code `snocOL`
 964                   instr from (OpReg reg) (OpReg dst)
 965             return (Any to code)
 966
 967         conversionNop new_rep expr
 968             = do e_code <- getRegister expr
 969                  return (swizzleRegisterRep e_code new_rep)
 970
 971
 972 getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
 973   = ASSERT2(cmmExprRep x /= I8, pprExpr e)
 974     case mop of
 975       MO_Eq F32   -> condFltReg EQQ x y
 976       MO_Ne F32   -> condFltReg NE x y
 977       MO_S_Gt F32 -> condFltReg GTT x y
 978       MO_S_Ge F32 -> condFltReg GE x y
 979       MO_S_Lt F32 -> condFltReg LTT x y
 980       MO_S_Le F32 -> condFltReg LE x y
 981
 982       MO_Eq F64   -> condFltReg EQQ x y
 983       MO_Ne F64   -> condFltReg NE x y
 984       MO_S_Gt F64 -> condFltReg GTT x y
 985       MO_S_Ge F64 -> condFltReg GE x y
 986       MO_S_Lt F64 -> condFltReg LTT x y
 987       MO_S_Le F64 -> condFltReg LE x y
 988
 989       MO_Eq rep   -> condIntReg EQQ x y
 990       MO_Ne rep   -> condIntReg NE x y
 991
 992       MO_S_Gt rep -> condIntReg GTT x y
 993       MO_S_Ge rep -> condIntReg GE x y
 994       MO_S_Lt rep -> condIntReg LTT x y
 995       MO_S_Le rep -> condIntReg LE x y
 996
 997       MO_U_Gt rep -> condIntReg GU  x y
 998       MO_U_Ge rep -> condIntReg GEU x y
 999       MO_U_Lt rep -> condIntReg LU  x y
1000       MO_U_Le rep -> condIntReg LEU x y
1001
1002 #if i386_TARGET_ARCH
1003       MO_Add F32 -> trivialFCode F32 GADD x y
1004       MO_Sub F32 -> trivialFCode F32 GSUB x y
1005
1006       MO_Add F64 -> trivialFCode F64 GADD x y
1007       MO_Sub F64 -> trivialFCode F64 GSUB x y
1008
1009       MO_S_Quot F32 -> trivialFCode F32 GDIV x y
1010       MO_S_Quot F64 -> trivialFCode F64 GDIV x y
1011 #endif
1012
1013 #if x86_64_TARGET_ARCH
1014       MO_Add F32 -> trivialFCode F32 ADD x y
1015       MO_Sub F32 -> trivialFCode F32 SUB x y
1016
1017       MO_Add F64 -> trivialFCode F64 ADD x y
1018       MO_Sub F64 -> trivialFCode F64 SUB x y
1019
1020       MO_S_Quot F32 -> trivialFCode F32 FDIV x y
1021       MO_S_Quot F64 -> trivialFCode F64 FDIV x y
1022 #endif
1023
1024       MO_Add rep -> add_code rep x y
1025       MO_Sub rep -> sub_code rep x y
1026
1027       MO_S_Quot rep -> div_code rep True  True  x y
1028       MO_S_Rem  rep -> div_code rep True  False x y
1029       MO_U_Quot rep -> div_code rep False True  x y
1030       MO_U_Rem  rep -> div_code rep False False x y
1031
1032 #if i386_TARGET_ARCH
1033       MO_Mul   F32 -> trivialFCode F32 GMUL x y
1034       MO_Mul   F64 -> trivialFCode F64 GMUL x y
1035 #endif
1036
1037 #if x86_64_TARGET_ARCH
1038       MO_Mul   F32 -> trivialFCode F32 MUL x y
1039       MO_Mul   F64 -> trivialFCode F64 MUL x y
1040 #endif
1041
1042       MO_Mul   rep -> let op = IMUL rep in
1043                       trivialCode rep op (Just op) x y
1044
1045       MO_S_MulMayOflo rep -> imulMayOflo rep x y
1046
1047       MO_And rep -> let op = AND rep in
1048                     trivialCode rep op (Just op) x y
1049       MO_Or  rep -> let op = OR  rep in
1050                     trivialCode rep op (Just op) x y
1051       MO_Xor rep -> let op = XOR rep in
1052                     trivialCode rep op (Just op) x y
1053
1054         {- Shift ops on x86s have constraints on their source, it
1055            either has to be Imm, CL or 1
1056             => trivialCode is not restrictive enough (sigh.)
1057         -}
1058       MO_Shl rep   -> shift_code rep (SHL rep) x y {-False-}
1059       MO_U_Shr rep -> shift_code rep (SHR rep) x y {-False-}
1060       MO_S_Shr rep -> shift_code rep (SAR rep) x y {-False-}
1061
1062       other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
1063   where
1064     --------------------
1065     imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1066     imulMayOflo rep a b = do
1067          (a_reg, a_code) <- getNonClobberedReg a
1068          b_code <- getAnyReg b
1069          let
1070              shift_amt  = case rep of
1071                            I32 -> 31
1072                            I64 -> 63
1073                            _ -> panic "shift_amt"
1074
1075              code = a_code `appOL` b_code eax `appOL`
1076                         toOL [
1077                            IMUL2 rep (OpReg a_reg),   -- result in %edx:%eax
1078                            SAR rep (OpImm (ImmInt shift_amt)) (OpReg eax),
1079                                 -- sign extend lower part
1080                            SUB rep (OpReg edx) (OpReg eax)
1081                                 -- compare against upper
1082                            -- eax==0 if high part == sign extended low part
1083                         ]
1084          -- in
1085          return (Fixed rep eax code)
1086
1087     --------------------
1088     shift_code :: MachRep
1089                -> (Operand -> Operand -> Instr)
1090                -> CmmExpr
1091                -> CmmExpr
1092                -> NatM Register
1093
1094     {- Case1: shift length as immediate -}
1095     shift_code rep instr x y@(CmmLit lit) = do
1096           x_code <- getAnyReg x
1097           let
1098                code dst
1099                   = x_code dst `snocOL`
1100                     instr (OpImm (litToImm lit)) (OpReg dst)
1101           -- in
1102           return (Any rep code)
1103
1104     {- Case2: shift length is complex (non-immediate) -}
1105     shift_code rep instr x y{-amount-} = do
1106         (x_reg, x_code) <- getNonClobberedReg x
1107         y_code <- getAnyReg y
1108         let
1109            code = x_code `appOL`
1110                   y_code ecx `snocOL`
1111                   instr (OpReg ecx) (OpReg x_reg)
1112         -- in
1113         return (Fixed rep x_reg code)
1114
1115     --------------------
1116     add_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1117     add_code rep x (CmmLit (CmmInt y _))
1118         | not (is64BitInteger y) = add_int rep x y
1119     add_code rep x y = trivialCode rep (ADD rep) (Just (ADD rep)) x y
1120
1121     --------------------
1122     sub_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1123     sub_code rep x (CmmLit (CmmInt y _))
1124         | not (is64BitInteger (-y)) = add_int rep x (-y)
1125     sub_code rep x y = trivialCode rep (SUB rep) Nothing x y
1126
1127     -- our three-operand add instruction:
1128     add_int rep x y = do
1129         (x_reg, x_code) <- getSomeReg x
1130         let
1131             imm = ImmInt (fromInteger y)
1132             code dst
1133                = x_code `snocOL`
1134                  LEA rep
1135                         (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
1136                         (OpReg dst)
1137         --
1138         return (Any rep code)
1139
1140     ----------------------
1141     div_code rep signed quotient x y = do
1142            (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
1143            x_code <- getAnyReg x
1144            let
1145              widen | signed    = CLTD rep
1146                    | otherwise = XOR rep (OpReg edx) (OpReg edx)
1147
1148              instr | signed    = IDIV
1149                    | otherwise = DIV
1150
1151              code = y_code `appOL`
1152                     x_code eax `appOL`
1153                     toOL [widen, instr rep y_op]
1154
1155              result | quotient  = eax
1156                     | otherwise = edx
1157
1158            -- in
1159            return (Fixed rep result code)
1160
1161
1162 getRegister (CmmLoad mem pk)
1163   | isFloatingRep pk
1164   = do
1165     Amode src mem_code <- getAmode mem
1166     let
1167         code dst = mem_code `snocOL`
1168                    IF_ARCH_i386(GLD pk src dst,
1169                                 MOV pk (OpAddr src) (OpReg dst))
1170     --
1171     return (Any pk code)
1172
1173 #if i386_TARGET_ARCH
1174 getRegister (CmmLoad mem pk)
1175   | pk /= I64
1176   = do
1177     code <- intLoadCode (instr pk) mem
1178     return (Any pk code)
1179   where
1180         instr I8  = MOVZxL pk
1181         instr I16 = MOV I16
1182         instr I32 = MOV I32
1183         -- we always zero-extend 8-bit loads, if we
1184         -- can't think of anything better.  This is because
1185         -- we can't guarantee access to an 8-bit variant of every register
1186         -- (esi and edi don't have 8-bit variants), so to make things
1187         -- simpler we do our 8-bit arithmetic with full 32-bit registers.
1188 #endif
1189
1190 #if x86_64_TARGET_ARCH
1191 -- Simpler memory load code on x86_64
1192 getRegister (CmmLoad mem pk)
1193   = do
1194     code <- intLoadCode (MOV pk) mem
1195     return (Any pk code)
1196 #endif
1197
1198 getRegister (CmmLit (CmmInt 0 rep))
1199   = let
1200         -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
1201         adj_rep = case rep of I64 -> I32; _ -> rep
1202         rep1 = IF_ARCH_i386( rep, adj_rep )
1203         code dst
1204            = unitOL (XOR rep1 (OpReg dst) (OpReg dst))
1205     in
1206         return (Any rep code)
1207
1208 #if x86_64_TARGET_ARCH
1209   -- optimisation for loading small literals on x86_64: take advantage
1210   -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
1211   -- instruction forms are shorter.
1212 getRegister (CmmLit lit)
1213   | I64 <- cmmLitRep lit, not (isBigLit lit)
1214   = let
1215         imm = litToImm lit
1216         code dst = unitOL (MOV I32 (OpImm imm) (OpReg dst))
1217     in
1218         return (Any I64 code)
1219   where
1220    isBigLit (CmmInt i I64) = i < 0 || i > 0xffffffff
1221    isBigLit _ = False
1222         -- note1: not the same as is64BitLit, because that checks for
1223         -- signed literals that fit in 32 bits, but we want unsigned
1224         -- literals here.
1225         -- note2: all labels are small, because we're assuming the
1226         -- small memory model (see gcc docs, -mcmodel=small).
1227 #endif
1228
1229 getRegister (CmmLit lit)
1230   = let
1231         rep = cmmLitRep lit
1232         imm = litToImm lit
1233         code dst = unitOL (MOV rep (OpImm imm) (OpReg dst))
1234     in
1235         return (Any rep code)
1236
1237 getRegister other = pprPanic "getRegister(x86)" (ppr other)
1238
1239
1240 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
1241    -> NatM (Reg -> InstrBlock)
1242 intLoadCode instr mem = do
1243   Amode src mem_code <- getAmode mem
1244   return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
1245
1246 -- Compute an expression into *any* register, adding the appropriate
1247 -- move instruction if necessary.
1248 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
1249 getAnyReg expr = do
1250   r <- getRegister expr
1251   anyReg r
1252
1253 anyReg :: Register -> NatM (Reg -> InstrBlock)
1254 anyReg (Any _ code)          = return code
1255 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
1256
1257 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
1258 -- Fixed registers might not be byte-addressable, so we make sure we've
1259 -- got a temporary, inserting an extra reg copy if necessary.
1260 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
1261 #if x86_64_TARGET_ARCH
1262 getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
1263 #else
1264 getByteReg expr = do
1265   r <- getRegister expr
1266   case r of
1267     Any rep code -> do
1268         tmp <- getNewRegNat rep
1269         return (tmp, code tmp)
1270     Fixed rep reg code
1271         | isVirtualReg reg -> return (reg,code)
1272         | otherwise -> do
1273             tmp <- getNewRegNat rep
1274             return (tmp, code `snocOL` reg2reg rep reg tmp)
1275         -- ToDo: could optimise slightly by checking for byte-addressable
1276         -- real registers, but that will happen very rarely if at all.
1277 #endif
1278
1279 -- Another variant: this time we want the result in a register that cannot
1280 -- be modified by code to evaluate an arbitrary expression.
1281 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1282 getNonClobberedReg expr = do
1283   r <- getRegister expr
1284   case r of
1285     Any rep code -> do
1286         tmp <- getNewRegNat rep
1287         return (tmp, code tmp)
1288     Fixed rep reg code
1289         -- only free regs can be clobbered
1290         | RealReg rr <- reg, isFastTrue (freeReg rr) -> do
1291                 tmp <- getNewRegNat rep
1292                 return (tmp, code `snocOL` reg2reg rep reg tmp)
1293         | otherwise ->
1294                 return (reg, code)
1295
1296 reg2reg :: MachRep -> Reg -> Reg -> Instr
1297 reg2reg rep src dst
1298 #if i386_TARGET_ARCH
1299   | isFloatingRep rep = GMOV src dst
1300 #endif
1301   | otherwise         = MOV rep (OpReg src) (OpReg dst)
1302
1303 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1304
1305 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1306
1307 #if sparc_TARGET_ARCH
1308
1309 getRegister (CmmLit (CmmFloat f F32)) = do
1310     lbl <- getNewLabelNat
1311     let code dst = toOL [
1312             LDATA ReadOnlyData
1313                         [CmmDataLabel lbl,
1314                          CmmStaticLit (CmmFloat f F32)],
1315             SETHI (HI (ImmCLbl lbl)) dst,
1316             LD F32 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1317     return (Any F32 code)
1318
1319 getRegister (CmmLit (CmmFloat d F64)) = do
1320     lbl <- getNewLabelNat
1321     let code dst = toOL [
1322             LDATA ReadOnlyData
1323                         [CmmDataLabel lbl,
1324                          CmmStaticLit (CmmFloat d F64)],
1325             SETHI (HI (ImmCLbl lbl)) dst,
1326             LD F64 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1327     return (Any F64 code)
1328
1329 getRegister (CmmMachOp mop [x]) -- unary MachOps
1330   = case mop of
1331       MO_S_Neg F32     -> trivialUFCode F32 (FNEG F32) x
1332       MO_S_Neg F64     -> trivialUFCode F64 (FNEG F64) x
1333
1334       MO_S_Neg rep     -> trivialUCode rep (SUB False False g0) x
1335       MO_Not rep       -> trivialUCode rep (XNOR False g0) x
1336
1337       MO_U_Conv I32 I8 -> trivialCode I8 (AND False) x (CmmLit (CmmInt 255 I8))
1338
1339       MO_U_Conv F64 F32-> coerceDbl2Flt x
1340       MO_U_Conv F32 F64-> coerceFlt2Dbl x
1341
1342       MO_S_Conv F32 I32-> coerceFP2Int F32 I32 x
1343       MO_S_Conv I32 F32-> coerceInt2FP I32 F32 x
1344       MO_S_Conv F64 I32-> coerceFP2Int F64 I32 x
1345       MO_S_Conv I32 F64-> coerceInt2FP I32 F64 x
1346
1347       -- Conversions which are a nop on sparc
1348       MO_U_Conv from to
1349         | from == to   -> conversionNop to   x
1350       MO_U_Conv I32 to -> conversionNop to   x
1351       MO_S_Conv I32 to -> conversionNop to   x
1352
1353       -- widenings
1354       MO_U_Conv I8 I32  -> integerExtend False I8 I32  x
1355       MO_U_Conv I16 I32 -> integerExtend False I16 I32 x
1356       MO_U_Conv I8 I16  -> integerExtend False I8 I16  x
1357       MO_S_Conv I16 I32 -> integerExtend True I16 I32  x
1358
1359       other_op -> panic "Unknown unary mach op"
1360     where
1361         -- XXX SLL/SRL?
1362         integerExtend signed from to expr = do
1363            (reg, e_code) <- getSomeReg expr
1364            let
1365                code dst =
1366                    e_code `snocOL`
1367                    ((if signed then SRA else SRL)
1368                           reg (RIImm (ImmInt 0)) dst)
1369            return (Any to code)
1370         conversionNop new_rep expr
1371             = do e_code <- getRegister expr
1372                  return (swizzleRegisterRep e_code new_rep)
1373
1374 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1375   = case mop of
1376       MO_Eq F32 -> condFltReg EQQ x y
1377       MO_Ne F32 -> condFltReg NE x y
1378
1379       MO_S_Gt F32 -> condFltReg GTT x y
1380       MO_S_Ge F32 -> condFltReg GE x y
1381       MO_S_Lt F32 -> condFltReg LTT x y
1382       MO_S_Le F32 -> condFltReg LE x y
1383
1384       MO_Eq F64 -> condFltReg EQQ x y
1385       MO_Ne F64 -> condFltReg NE x y
1386
1387       MO_S_Gt F64 -> condFltReg GTT x y
1388       MO_S_Ge F64 -> condFltReg GE x y
1389       MO_S_Lt F64 -> condFltReg LTT x y
1390       MO_S_Le F64 -> condFltReg LE x y
1391
1392       MO_Eq rep -> condIntReg EQQ x y
1393       MO_Ne rep -> condIntReg NE x y
1394
1395       MO_S_Gt rep -> condIntReg GTT x y
1396       MO_S_Ge rep -> condIntReg GE x y
1397       MO_S_Lt rep -> condIntReg LTT x y
1398       MO_S_Le rep -> condIntReg LE x y
1399
1400       MO_U_Gt I32  -> condIntReg GTT x y
1401       MO_U_Ge I32  -> condIntReg GE x y
1402       MO_U_Lt I32  -> condIntReg LTT x y
1403       MO_U_Le I32  -> condIntReg LE x y
1404
1405       MO_U_Gt I16 -> condIntReg GU  x y
1406       MO_U_Ge I16 -> condIntReg GEU x y
1407       MO_U_Lt I16 -> condIntReg LU  x y
1408       MO_U_Le I16 -> condIntReg LEU x y
1409
1410       MO_Add I32 -> trivialCode I32 (ADD False False) x y
1411       MO_Sub I32 -> trivialCode I32 (SUB False False) x y
1412
1413       MO_S_MulMayOflo rep -> imulMayOflo rep x y
1414 {-
1415       -- ToDo: teach about V8+ SPARC div instructions
1416       MO_S_Quot I32 -> idiv FSLIT(".div")  x y
1417       MO_S_Rem I32  -> idiv FSLIT(".rem")  x y
1418       MO_U_Quot I32 -> idiv FSLIT(".udiv")  x y
1419       MO_U_Rem I32  -> idiv FSLIT(".urem")  x y
1420 -}
1421       MO_Add F32  -> trivialFCode F32 FADD  x y
1422       MO_Sub F32   -> trivialFCode F32  FSUB x y
1423       MO_Mul F32   -> trivialFCode F32  FMUL  x y
1424       MO_S_Quot F32   -> trivialFCode F32  FDIV x y
1425
1426       MO_Add F64   -> trivialFCode F64 FADD  x y
1427       MO_Sub F64   -> trivialFCode F64  FSUB x y
1428       MO_Mul F64   -> trivialFCode F64  FMUL x y
1429       MO_S_Quot F64   -> trivialFCode F64  FDIV x y
1430
1431       MO_And rep   -> trivialCode rep (AND False) x y
1432       MO_Or rep    -> trivialCode rep (OR  False) x y
1433       MO_Xor rep   -> trivialCode rep (XOR False) x y
1434
1435       MO_Mul rep -> trivialCode rep (SMUL False) x y
1436
1437       MO_Shl rep   -> trivialCode rep SLL  x y
1438       MO_U_Shr rep   -> trivialCode rep SRL x y
1439       MO_S_Shr rep   -> trivialCode rep SRA x y
1440
1441 {-
1442       MO_F32_Pwr  -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1443                                          [promote x, promote y])
1444                        where promote x = CmmMachOp MO_F32_to_Dbl [x]
1445       MO_F64_Pwr -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1446                                         [x, y])
1447 -}
1448       other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
1449   where
1450     --idiv fn x y = getRegister (StCall (Left fn) CCallConv I32 [x, y])
1451
1452     --------------------
1453     imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1454     imulMayOflo rep a b = do
1455          (a_reg, a_code) <- getSomeReg a
1456          (b_reg, b_code) <- getSomeReg b
1457          res_lo <- getNewRegNat I32
1458          res_hi <- getNewRegNat I32
1459          let
1460             shift_amt  = case rep of
1461                           I32 -> 31
1462                           I64 -> 63
1463                           _ -> panic "shift_amt"
1464             code dst = a_code `appOL` b_code `appOL`
1465                        toOL [
1466                            SMUL False a_reg (RIReg b_reg) res_lo,
1467                            RDY res_hi,
1468                            SRA res_lo (RIImm (ImmInt shift_amt)) res_lo,
1469                            SUB False False res_lo (RIReg res_hi) dst
1470                         ]
1471          return (Any I32 code)
1472
1473 getRegister (CmmLoad mem pk) = do
1474     Amode src code <- getAmode mem
1475     let
1476         code__2 dst = code `snocOL` LD pk src dst
1477     return (Any pk code__2)
1478
1479 getRegister (CmmLit (CmmInt i _))
1480   | fits13Bits i
1481   = let
1482         src = ImmInt (fromInteger i)
1483         code dst = unitOL (OR False g0 (RIImm src) dst)
1484     in
1485         return (Any I32 code)
1486
1487 getRegister (CmmLit lit)
1488   = let rep = cmmLitRep lit
1489         imm = litToImm lit
1490         code dst = toOL [
1491             SETHI (HI imm) dst,
1492             OR False dst (RIImm (LO imm)) dst]
1493     in return (Any I32 code)
1494
1495 #endif /* sparc_TARGET_ARCH */
1496
1497 #if powerpc_TARGET_ARCH
1498 getRegister (CmmLoad mem pk)
1499   | pk /= I64
1500   = do
1501         Amode addr addr_code <- getAmode mem
1502         let code dst = ASSERT((regClass dst == RcDouble) == isFloatingRep pk)
1503                        addr_code `snocOL` LD pk dst addr
1504         return (Any pk code)
1505
1506 -- catch simple cases of zero- or sign-extended load
1507 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad mem _]) = do
1508     Amode addr addr_code <- getAmode mem
1509     return (Any I32 (\dst -> addr_code `snocOL` LD I8 dst addr))
1510
1511 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
1512
1513 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad mem _]) = do
1514     Amode addr addr_code <- getAmode mem
1515     return (Any I32 (\dst -> addr_code `snocOL` LD I16 dst addr))
1516
1517 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad mem _]) = do
1518     Amode addr addr_code <- getAmode mem
1519     return (Any I32 (\dst -> addr_code `snocOL` LA I16 dst addr))
1520
1521 getRegister (CmmMachOp mop [x]) -- unary MachOps
1522   = case mop of
1523       MO_Not rep   -> trivialUCode rep NOT x
1524
1525       MO_S_Conv F64 F32 -> trivialUCode F32 FRSP x
1526       MO_S_Conv F32 F64 -> conversionNop F64 x
1527
1528       MO_S_Conv from to
1529         | from == to         -> conversionNop to x
1530         | isFloatingRep from -> coerceFP2Int from to x
1531         | isFloatingRep to   -> coerceInt2FP from to x
1532
1533         -- narrowing is a nop: we treat the high bits as undefined
1534       MO_S_Conv I32 to -> conversionNop to x
1535       MO_S_Conv I16 I8 -> conversionNop I8 x
1536       MO_S_Conv I8 to -> trivialUCode to (EXTS I8) x
1537       MO_S_Conv I16 to -> trivialUCode to (EXTS I16) x
1538
1539       MO_U_Conv from to
1540         | from == to -> conversionNop to x
1541         -- narrowing is a nop: we treat the high bits as undefined
1542       MO_U_Conv I32 to -> conversionNop to x
1543       MO_U_Conv I16 I8 -> conversionNop I8 x
1544       MO_U_Conv I8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 I32))
1545       MO_U_Conv I16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 I32))
1546
1547       MO_S_Neg F32      -> trivialUCode F32 FNEG x
1548       MO_S_Neg F64      -> trivialUCode F64 FNEG x
1549       MO_S_Neg rep      -> trivialUCode rep NEG x
1550
1551     where
1552         conversionNop new_rep expr
1553             = do e_code <- getRegister expr
1554                  return (swizzleRegisterRep e_code new_rep)
1555
1556 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1557   = case mop of
1558       MO_Eq F32 -> condFltReg EQQ x y
1559       MO_Ne F32 -> condFltReg NE  x y
1560
1561       MO_S_Gt F32 -> condFltReg GTT x y
1562       MO_S_Ge F32 -> condFltReg GE  x y
1563       MO_S_Lt F32 -> condFltReg LTT x y
1564       MO_S_Le F32 -> condFltReg LE  x y
1565
1566       MO_Eq F64 -> condFltReg EQQ x y
1567       MO_Ne F64 -> condFltReg NE  x y
1568
1569       MO_S_Gt F64 -> condFltReg GTT x y
1570       MO_S_Ge F64 -> condFltReg GE  x y
1571       MO_S_Lt F64 -> condFltReg LTT x y
1572       MO_S_Le F64 -> condFltReg LE  x y
1573
1574       MO_Eq rep -> condIntReg EQQ  (extendUExpr rep x) (extendUExpr rep y)
1575       MO_Ne rep -> condIntReg NE   (extendUExpr rep x) (extendUExpr rep y)
1576
1577       MO_S_Gt rep -> condIntReg GTT  (extendSExpr rep x) (extendSExpr rep y)
1578       MO_S_Ge rep -> condIntReg GE   (extendSExpr rep x) (extendSExpr rep y)
1579       MO_S_Lt rep -> condIntReg LTT  (extendSExpr rep x) (extendSExpr rep y)
1580       MO_S_Le rep -> condIntReg LE   (extendSExpr rep x) (extendSExpr rep y)
1581
1582       MO_U_Gt rep -> condIntReg GU   (extendUExpr rep x) (extendUExpr rep y)
1583       MO_U_Ge rep -> condIntReg GEU  (extendUExpr rep x) (extendUExpr rep y)
1584       MO_U_Lt rep -> condIntReg LU   (extendUExpr rep x) (extendUExpr rep y)
1585       MO_U_Le rep -> condIntReg LEU  (extendUExpr rep x) (extendUExpr rep y)
1586
1587       MO_Add F32   -> trivialCodeNoImm F32 (FADD F32) x y
1588       MO_Sub F32   -> trivialCodeNoImm F32 (FSUB F32) x y
1589       MO_Mul F32   -> trivialCodeNoImm F32 (FMUL F32) x y
1590       MO_S_Quot F32   -> trivialCodeNoImm F32 (FDIV F32) x y
1591
1592       MO_Add F64   -> trivialCodeNoImm F64 (FADD F64) x y
1593       MO_Sub F64   -> trivialCodeNoImm F64 (FSUB F64) x y
1594       MO_Mul F64   -> trivialCodeNoImm F64 (FMUL F64) x y
1595       MO_S_Quot F64   -> trivialCodeNoImm F64 (FDIV F64) x y
1596
1597          -- optimize addition with 32-bit immediate
1598          -- (needed for PIC)
1599       MO_Add I32 ->
1600         case y of
1601           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate I32 True (-imm)
1602             -> trivialCode I32 True ADD x (CmmLit $ CmmInt imm immrep)
1603           CmmLit lit
1604             -> do
1605                 (src, srcCode) <- getSomeReg x
1606                 let imm = litToImm lit
1607                     code dst = srcCode `appOL` toOL [
1608                                     ADDIS dst src (HA imm),
1609                                     ADD dst dst (RIImm (LO imm))
1610                                 ]
1611                 return (Any I32 code)
1612           _ -> trivialCode I32 True ADD x y
1613
1614       MO_Add rep -> trivialCode rep True ADD x y
1615       MO_Sub rep ->
1616         case y of    -- subfi ('substract from' with immediate) doesn't exist
1617           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
1618             -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
1619           _ -> trivialCodeNoImm rep SUBF y x
1620
1621       MO_Mul rep -> trivialCode rep True MULLW x y
1622
1623       MO_S_MulMayOflo I32 -> trivialCodeNoImm I32 MULLW_MayOflo x y
1624
1625       MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= I32): not implemented"
1626       MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
1627
1628       MO_S_Quot rep -> trivialCodeNoImm rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1629       MO_U_Quot rep -> trivialCodeNoImm rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1630
1631       MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1632       MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1633
1634       MO_And rep   -> trivialCode rep False AND x y
1635       MO_Or rep    -> trivialCode rep False OR x y
1636       MO_Xor rep   -> trivialCode rep False XOR x y
1637
1638       MO_Shl rep   -> trivialCode rep False SLW x y
1639       MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
1640       MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
1641
1642 getRegister (CmmLit (CmmInt i rep))
1643   | Just imm <- makeImmediate rep True i
1644   = let
1645         code dst = unitOL (LI dst imm)
1646     in
1647         return (Any rep code)
1648
1649 getRegister (CmmLit (CmmFloat f frep)) = do
1650     lbl <- getNewLabelNat
1651     dynRef <- cmmMakeDynamicReference addImportNat False lbl
1652     Amode addr addr_code <- getAmode dynRef
1653     let code dst =
1654             LDATA ReadOnlyData  [CmmDataLabel lbl,
1655                                  CmmStaticLit (CmmFloat f frep)]
1656             `consOL` (addr_code `snocOL` LD frep dst addr)
1657     return (Any frep code)
1658
1659 getRegister (CmmLit lit)
1660   = let rep = cmmLitRep lit
1661         imm = litToImm lit
1662         code dst = toOL [
1663               LIS dst (HI imm),
1664               OR dst dst (RIImm (LO imm))
1665           ]
1666     in return (Any rep code)
1667
1668 getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
1669
1670     -- extend?Rep: wrap integer expression of type rep
1671     -- in a conversion to I32
1672 extendSExpr I32 x = x
1673 extendSExpr rep x = CmmMachOp (MO_S_Conv rep I32) [x]
1674 extendUExpr I32 x = x
1675 extendUExpr rep x = CmmMachOp (MO_U_Conv rep I32) [x]
1676
1677 #endif /* powerpc_TARGET_ARCH */
1678
1679
1680 -- -----------------------------------------------------------------------------
1681 --  The 'Amode' type: Memory addressing modes passed up the tree.
1682
1683 data Amode = Amode AddrMode InstrBlock
1684
1685 {-
1686 Now, given a tree (the argument to an CmmLoad) that references memory,
1687 produce a suitable addressing mode.
1688
1689 A Rule of the Game (tm) for Amodes: use of the addr bit must
1690 immediately follow use of the code part, since the code part puts
1691 values in registers which the addr then refers to.  So you can't put
1692 anything in between, lest it overwrite some of those registers.  If
1693 you need to do some other computation between the code part and use of
1694 the addr bit, first store the effective address from the amode in a
1695 temporary, then do the other computation, and then use the temporary:
1696
1697     code
1698     LEA amode, tmp
1699     ... other computation ...
1700     ... (tmp) ...
1701 -}
1702
1703 getAmode :: CmmExpr -> NatM Amode
1704 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
1705
1706 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1707
1708 #if alpha_TARGET_ARCH
1709
1710 getAmode (StPrim IntSubOp [x, StInt i])
1711   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1712     getRegister x               `thenNat` \ register ->
1713     let
1714         code = registerCode register tmp
1715         reg  = registerName register tmp
1716         off  = ImmInt (-(fromInteger i))
1717     in
1718     return (Amode (AddrRegImm reg off) code)
1719
1720 getAmode (StPrim IntAddOp [x, StInt i])
1721   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1722     getRegister x               `thenNat` \ register ->
1723     let
1724         code = registerCode register tmp
1725         reg  = registerName register tmp
1726         off  = ImmInt (fromInteger i)
1727     in
1728     return (Amode (AddrRegImm reg off) code)
1729
1730 getAmode leaf
1731   | isJust imm
1732   = return (Amode (AddrImm imm__2) id)
1733   where
1734     imm = maybeImm leaf
1735     imm__2 = case imm of Just x -> x
1736
1737 getAmode other
1738   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1739     getRegister other           `thenNat` \ register ->
1740     let
1741         code = registerCode register tmp
1742         reg  = registerName register tmp
1743     in
1744     return (Amode (AddrReg reg) code)
1745
1746 #endif /* alpha_TARGET_ARCH */
1747
1748 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1749
1750 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1751
1752 -- This is all just ridiculous, since it carefully undoes
1753 -- what mangleIndexTree has just done.
1754 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
1755   | not (is64BitLit lit)
1756   -- ASSERT(rep == I32)???
1757   = do (x_reg, x_code) <- getSomeReg x
1758        let off = ImmInt (-(fromInteger i))
1759        return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1760
1761 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
1762   | not (is64BitLit lit)
1763   -- ASSERT(rep == I32)???
1764   = do (x_reg, x_code) <- getSomeReg x
1765        let off = ImmInt (fromInteger i)
1766        return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1767
1768 -- Turn (lit1 << n  + lit2) into  (lit2 + lit1 << n) so it will be
1769 -- recognised by the next rule.
1770 getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1771                                   b@(CmmLit _)])
1772   = getAmode (CmmMachOp (MO_Add rep) [b,a])
1773
1774 getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
1775                                         [y, CmmLit (CmmInt shift _)]])
1776   | shift == 0 || shift == 1 || shift == 2 || shift == 3
1777   = do (x_reg, x_code) <- getNonClobberedReg x
1778         -- x must be in a temp, because it has to stay live over y_code
1779         -- we could compre x_reg and y_reg and do something better here...
1780        (y_reg, y_code) <- getSomeReg y
1781        let
1782            code = x_code `appOL` y_code
1783            base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
1784        return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt 0))
1785                code)
1786
1787 getAmode (CmmLit lit) | not (is64BitLit lit)
1788   = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1789
1790 getAmode expr = do
1791   (reg,code) <- getSomeReg expr
1792   return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
1793
1794 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1795
1796 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1797
1798 #if sparc_TARGET_ARCH
1799
1800 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)])
1801   | fits13Bits (-i)
1802   = do
1803        (reg, code) <- getSomeReg x
1804        let
1805          off  = ImmInt (-(fromInteger i))
1806        return (Amode (AddrRegImm reg off) code)
1807
1808
1809 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)])
1810   | fits13Bits i
1811   = do
1812        (reg, code) <- getSomeReg x
1813        let
1814          off  = ImmInt (fromInteger i)
1815        return (Amode (AddrRegImm reg off) code)
1816
1817 getAmode (CmmMachOp (MO_Add rep) [x, y])
1818   = do
1819     (regX, codeX) <- getSomeReg x
1820     (regY, codeY) <- getSomeReg y
1821     let
1822         code = codeX `appOL` codeY
1823     return (Amode (AddrRegReg regX regY) code)
1824
1825 -- XXX Is this same as "leaf" in Stix?
1826 getAmode (CmmLit lit)
1827   = do
1828       tmp <- getNewRegNat I32
1829       let
1830         code = unitOL (SETHI (HI imm__2) tmp)
1831       return (Amode (AddrRegImm tmp (LO imm__2)) code)
1832       where
1833          imm__2 = litToImm lit
1834
1835 getAmode other
1836   = do
1837        (reg, code) <- getSomeReg other
1838        let
1839             off  = ImmInt 0
1840        return (Amode (AddrRegImm reg off) code)
1841
1842 #endif /* sparc_TARGET_ARCH */
1843
1844 #ifdef powerpc_TARGET_ARCH
1845 getAmode (CmmMachOp (MO_Sub I32) [x, CmmLit (CmmInt i _)])
1846   | Just off <- makeImmediate I32 True (-i)
1847   = do
1848         (reg, code) <- getSomeReg x
1849         return (Amode (AddrRegImm reg off) code)
1850
1851
1852 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit (CmmInt i _)])
1853   | Just off <- makeImmediate I32 True i
1854   = do
1855         (reg, code) <- getSomeReg x
1856         return (Amode (AddrRegImm reg off) code)
1857
1858    -- optimize addition with 32-bit immediate
1859    -- (needed for PIC)
1860 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit lit])
1861   = do
1862         tmp <- getNewRegNat I32
1863         (src, srcCode) <- getSomeReg x
1864         let imm = litToImm lit
1865             code = srcCode `snocOL` ADDIS tmp src (HA imm)
1866         return (Amode (AddrRegImm tmp (LO imm)) code)
1867
1868 getAmode (CmmLit lit)
1869   = do
1870         tmp <- getNewRegNat I32
1871         let imm = litToImm lit
1872             code = unitOL (LIS tmp (HA imm))
1873         return (Amode (AddrRegImm tmp (LO imm)) code)
1874
1875 getAmode (CmmMachOp (MO_Add I32) [x, y])
1876   = do
1877         (regX, codeX) <- getSomeReg x
1878         (regY, codeY) <- getSomeReg y
1879         return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
1880
1881 getAmode other
1882   = do
1883         (reg, code) <- getSomeReg other
1884         let
1885             off  = ImmInt 0
1886         return (Amode (AddrRegImm reg off) code)
1887 #endif /* powerpc_TARGET_ARCH */
1888
1889 -- -----------------------------------------------------------------------------
1890 -- getOperand: sometimes any operand will do.
1891
1892 -- getNonClobberedOperand: the value of the operand will remain valid across
1893 -- the computation of an arbitrary expression, unless the expression
1894 -- is computed directly into a register which the operand refers to
1895 -- (see trivialCode where this function is used for an example).
1896
1897 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1898
1899 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1900 #if x86_64_TARGET_ARCH
1901 getNonClobberedOperand (CmmLit lit)
1902   | isSuitableFloatingPointLit lit = do
1903     lbl <- getNewLabelNat
1904     let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
1905                                            CmmStaticLit lit])
1906     return (OpAddr (ripRel (ImmCLbl lbl)), code)
1907 #endif
1908 getNonClobberedOperand (CmmLit lit)
1909   | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) =
1910     return (OpImm (litToImm lit), nilOL)
1911 getNonClobberedOperand (CmmLoad mem pk)
1912   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
1913     Amode src mem_code <- getAmode mem
1914     (src',save_code) <-
1915         if (amodeCouldBeClobbered src)
1916                 then do
1917                    tmp <- getNewRegNat wordRep
1918                    return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
1919                            unitOL (LEA I32 (OpAddr src) (OpReg tmp)))
1920                 else
1921                    return (src, nilOL)
1922     return (OpAddr src', save_code `appOL` mem_code)
1923 getNonClobberedOperand e = do
1924     (reg, code) <- getNonClobberedReg e
1925     return (OpReg reg, code)
1926
1927 amodeCouldBeClobbered :: AddrMode -> Bool
1928 amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
1929
1930 regClobbered (RealReg rr) = isFastTrue (freeReg rr)
1931 regClobbered _ = False
1932
1933 -- getOperand: the operand is not required to remain valid across the
1934 -- computation of an arbitrary expression.
1935 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1936 #if x86_64_TARGET_ARCH
1937 getOperand (CmmLit lit)
1938   | isSuitableFloatingPointLit lit = do
1939     lbl <- getNewLabelNat
1940     let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
1941                                            CmmStaticLit lit])
1942     return (OpAddr (ripRel (ImmCLbl lbl)), code)
1943 #endif
1944 getOperand (CmmLit lit)
1945   | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) = do
1946     return (OpImm (litToImm lit), nilOL)
1947 getOperand (CmmLoad mem pk)
1948   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
1949     Amode src mem_code <- getAmode mem
1950     return (OpAddr src, mem_code)
1951 getOperand e = do
1952     (reg, code) <- getSomeReg e
1953     return (OpReg reg, code)
1954
1955 isOperand :: CmmExpr -> Bool
1956 isOperand (CmmLoad _ _) = True
1957 isOperand (CmmLit lit)  = not (is64BitLit lit)
1958                           || isSuitableFloatingPointLit lit
1959 isOperand _             = False
1960
1961 -- if we want a floating-point literal as an operand, we can
1962 -- use it directly from memory.  However, if the literal is
1963 -- zero, we're better off generating it into a register using
1964 -- xor.
1965 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
1966 isSuitableFloatingPointLit _ = False
1967
1968 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
1969 getRegOrMem (CmmLoad mem pk)
1970   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
1971     Amode src mem_code <- getAmode mem
1972     return (OpAddr src, mem_code)
1973 getRegOrMem e = do
1974     (reg, code) <- getNonClobberedReg e
1975     return (OpReg reg, code)
1976
1977 #if x86_64_TARGET_ARCH
1978 is64BitLit (CmmInt i I64) = is64BitInteger i
1979    -- assume that labels are in the range 0-2^31-1: this assumes the
1980    -- small memory model (see gcc docs, -mcmodel=small).
1981 #endif
1982 is64BitLit x = False
1983 #endif
1984
1985 is64BitInteger :: Integer -> Bool
1986 is64BitInteger i = i > 0x7fffffff || i < -0x80000000
1987
1988 -- -----------------------------------------------------------------------------
1989 --  The 'CondCode' type:  Condition codes passed up the tree.
1990
1991 data CondCode = CondCode Bool Cond InstrBlock
1992
1993 -- Set up a condition code for a conditional branch.
1994
1995 getCondCode :: CmmExpr -> NatM CondCode
1996
1997 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1998
1999 #if alpha_TARGET_ARCH
2000 getCondCode = panic "MachCode.getCondCode: not on Alphas"
2001 #endif /* alpha_TARGET_ARCH */
2002
2003 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2004
2005 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
2006 -- yes, they really do seem to want exactly the same!
2007
2008 getCondCode (CmmMachOp mop [x, y])
2009   = ASSERT (cmmExprRep x /= I8) -- tmp, not set up to handle 8-bit comparisons
2010     case mop of
2011       MO_Eq F32 -> condFltCode EQQ x y
2012       MO_Ne F32 -> condFltCode NE  x y
2013
2014       MO_S_Gt F32 -> condFltCode GTT x y
2015       MO_S_Ge F32 -> condFltCode GE  x y
2016       MO_S_Lt F32 -> condFltCode LTT x y
2017       MO_S_Le F32 -> condFltCode LE  x y
2018
2019       MO_Eq F64 -> condFltCode EQQ x y
2020       MO_Ne F64 -> condFltCode NE  x y
2021
2022       MO_S_Gt F64 -> condFltCode GTT x y
2023       MO_S_Ge F64 -> condFltCode GE  x y
2024       MO_S_Lt F64 -> condFltCode LTT x y
2025       MO_S_Le F64 -> condFltCode LE  x y
2026
2027       MO_Eq rep -> condIntCode EQQ  x y
2028       MO_Ne rep -> condIntCode NE   x y
2029
2030       MO_S_Gt rep -> condIntCode GTT  x y
2031       MO_S_Ge rep -> condIntCode GE   x y
2032       MO_S_Lt rep -> condIntCode LTT  x y
2033       MO_S_Le rep -> condIntCode LE   x y
2034
2035       MO_U_Gt rep -> condIntCode GU   x y
2036       MO_U_Ge rep -> condIntCode GEU  x y
2037       MO_U_Lt rep -> condIntCode LU   x y
2038       MO_U_Le rep -> condIntCode LEU  x y
2039
2040       other -> pprPanic "getCondCode(x86,sparc)" (pprMachOp mop)
2041
2042 getCondCode other =  pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
2043
2044 #elif powerpc_TARGET_ARCH
2045
2046 -- almost the same as everywhere else - but we need to
2047 -- extend small integers to 32 bit first
2048
2049 getCondCode (CmmMachOp mop [x, y])
2050   = case mop of
2051       MO_Eq F32 -> condFltCode EQQ x y
2052       MO_Ne F32 -> condFltCode NE  x y
2053
2054       MO_S_Gt F32 -> condFltCode GTT x y
2055       MO_S_Ge F32 -> condFltCode GE  x y
2056       MO_S_Lt F32 -> condFltCode LTT x y
2057       MO_S_Le F32 -> condFltCode LE  x y
2058
2059       MO_Eq F64 -> condFltCode EQQ x y
2060       MO_Ne F64 -> condFltCode NE  x y
2061
2062       MO_S_Gt F64 -> condFltCode GTT x y
2063       MO_S_Ge F64 -> condFltCode GE  x y
2064       MO_S_Lt F64 -> condFltCode LTT x y
2065       MO_S_Le F64 -> condFltCode LE  x y
2066
2067       MO_Eq rep -> condIntCode EQQ  (extendUExpr rep x) (extendUExpr rep y)
2068       MO_Ne rep -> condIntCode NE   (extendUExpr rep x) (extendUExpr rep y)
2069
2070       MO_S_Gt rep -> condIntCode GTT  (extendSExpr rep x) (extendSExpr rep y)
2071       MO_S_Ge rep -> condIntCode GE   (extendSExpr rep x) (extendSExpr rep y)
2072       MO_S_Lt rep -> condIntCode LTT  (extendSExpr rep x) (extendSExpr rep y)
2073       MO_S_Le rep -> condIntCode LE   (extendSExpr rep x) (extendSExpr rep y)
2074
2075       MO_U_Gt rep -> condIntCode GU   (extendUExpr rep x) (extendUExpr rep y)
2076       MO_U_Ge rep -> condIntCode GEU  (extendUExpr rep x) (extendUExpr rep y)
2077       MO_U_Lt rep -> condIntCode LU   (extendUExpr rep x) (extendUExpr rep y)
2078       MO_U_Le rep -> condIntCode LEU  (extendUExpr rep x) (extendUExpr rep y)
2079
2080       other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
2081
2082 getCondCode other =  panic "getCondCode(2)(powerpc)"
2083
2084
2085 #endif
2086
2087
2088 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
2089 -- passed back up the tree.
2090
2091 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
2092
2093 #if alpha_TARGET_ARCH
2094 condIntCode = panic "MachCode.condIntCode: not on Alphas"
2095 condFltCode = panic "MachCode.condFltCode: not on Alphas"
2096 #endif /* alpha_TARGET_ARCH */
2097
2098 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2099 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2100
2101 -- memory vs immediate
2102 condIntCode cond (CmmLoad x pk) (CmmLit lit) | not (is64BitLit lit) = do
2103     Amode x_addr x_code <- getAmode x
2104     let
2105         imm  = litToImm lit
2106         code = x_code `snocOL`
2107                   CMP pk (OpImm imm) (OpAddr x_addr)
2108     --
2109     return (CondCode False cond code)
2110
2111 -- anything vs zero
2112 condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
2113     (x_reg, x_code) <- getSomeReg x
2114     let
2115         code = x_code `snocOL`
2116                   TEST pk (OpReg x_reg) (OpReg x_reg)
2117     --
2118     return (CondCode False cond code)
2119
2120 -- anything vs operand
2121 condIntCode cond x y | isOperand y = do
2122     (x_reg, x_code) <- getNonClobberedReg x
2123     (y_op,  y_code) <- getOperand y
2124     let
2125         code = x_code `appOL` y_code `snocOL`
2126                   CMP (cmmExprRep x) y_op (OpReg x_reg)
2127     -- in
2128     return (CondCode False cond code)
2129
2130 -- anything vs anything
2131 condIntCode cond x y = do
2132   (y_reg, y_code) <- getNonClobberedReg y
2133   (x_op, x_code) <- getRegOrMem x
2134   let
2135         code = y_code `appOL`
2136                x_code `snocOL`
2137                   CMP (cmmExprRep x) (OpReg y_reg) x_op
2138   -- in
2139   return (CondCode False cond code)
2140 #endif
2141
2142 #if i386_TARGET_ARCH
2143 condFltCode cond x y
2144   = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
2145   (x_reg, x_code) <- getNonClobberedReg x
2146   (y_reg, y_code) <- getSomeReg y
2147   let
2148         code = x_code `appOL` y_code `snocOL`
2149                 GCMP cond x_reg y_reg
2150   -- The GCMP insn does the test and sets the zero flag if comparable
2151   -- and true.  Hence we always supply EQQ as the condition to test.
2152   return (CondCode True EQQ code)
2153 #endif /* i386_TARGET_ARCH */
2154
2155 #if x86_64_TARGET_ARCH
2156 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
2157 -- an operand, but the right must be a reg.  We can probably do better
2158 -- than this general case...
2159 condFltCode cond x y = do
2160   (x_reg, x_code) <- getNonClobberedReg x
2161   (y_op, y_code) <- getOperand y
2162   let
2163         code = x_code `appOL`
2164                y_code `snocOL`
2165                   CMP (cmmExprRep x) y_op (OpReg x_reg)
2166         -- NB(1): we need to use the unsigned comparison operators on the
2167         -- result of this comparison.
2168   -- in
2169   return (CondCode True (condToUnsigned cond) code)
2170 #endif
2171
2172 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2173
2174 #if sparc_TARGET_ARCH
2175
2176 condIntCode cond x (CmmLit (CmmInt y rep))
2177   | fits13Bits y
2178   = do
2179        (src1, code) <- getSomeReg x
2180        let
2181            src2 = ImmInt (fromInteger y)
2182            code' = code `snocOL` SUB False True src1 (RIImm src2) g0
2183        return (CondCode False cond code')
2184
2185 condIntCode cond x y = do
2186     (src1, code1) <- getSomeReg x
2187     (src2, code2) <- getSomeReg y
2188     let
2189         code__2 = code1 `appOL` code2 `snocOL`
2190                   SUB False True src1 (RIReg src2) g0
2191     return (CondCode False cond code__2)
2192
2193 -----------
2194 condFltCode cond x y = do
2195     (src1, code1) <- getSomeReg x
2196     (src2, code2) <- getSomeReg y
2197     tmp <- getNewRegNat F64
2198     let
2199         promote x = FxTOy F32 F64 x tmp
2200
2201         pk1   = cmmExprRep x
2202         pk2   = cmmExprRep y
2203
2204         code__2 =
2205                 if pk1 == pk2 then
2206                     code1 `appOL` code2 `snocOL`
2207                     FCMP True pk1 src1 src2
2208                 else if pk1 == F32 then
2209                     code1 `snocOL` promote src1 `appOL` code2 `snocOL`
2210                     FCMP True F64 tmp src2
2211                 else
2212                     code1 `appOL` code2 `snocOL` promote src2 `snocOL`
2213                     FCMP True F64 src1 tmp
2214     return (CondCode True cond code__2)
2215
2216 #endif /* sparc_TARGET_ARCH */
2217
2218 #if powerpc_TARGET_ARCH
2219 --  ###FIXME: I16 and I8!
2220 condIntCode cond x (CmmLit (CmmInt y rep))
2221   | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
2222   = do
2223         (src1, code) <- getSomeReg x
2224         let
2225             code' = code `snocOL`
2226                 (if condUnsigned cond then CMPL else CMP) I32 src1 (RIImm src2)
2227         return (CondCode False cond code')
2228
2229 condIntCode cond x y = do
2230     (src1, code1) <- getSomeReg x
2231     (src2, code2) <- getSomeReg y
2232     let
2233         code' = code1 `appOL` code2 `snocOL`
2234                   (if condUnsigned cond then CMPL else CMP) I32 src1 (RIReg src2)
2235     return (CondCode False cond code')
2236
2237 condFltCode cond x y = do
2238     (src1, code1) <- getSomeReg x
2239     (src2, code2) <- getSomeReg y
2240     let
2241         code'  = code1 `appOL` code2 `snocOL` FCMP src1 src2
2242         code'' = case cond of -- twiddle CR to handle unordered case
2243                     GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
2244                     LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
2245                     _ -> code'
2246                  where
2247                     ltbit = 0 ; eqbit = 2 ; gtbit = 1
2248     return (CondCode True cond code'')
2249
2250 #endif /* powerpc_TARGET_ARCH */
2251
2252 -- -----------------------------------------------------------------------------
2253 -- Generating assignments
2254
2255 -- Assignments are really at the heart of the whole code generation
2256 -- business.  Almost all top-level nodes of any real importance are
2257 -- assignments, which correspond to loads, stores, or register
2258 -- transfers.  If we're really lucky, some of the register transfers
2259 -- will go away, because we can use the destination register to
2260 -- complete the code generation for the right hand side.  This only
2261 -- fails when the right hand side is forced into a fixed register
2262 -- (e.g. the result of a call).
2263
2264 assignMem_IntCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2265 assignReg_IntCode :: MachRep -> CmmReg  -> CmmExpr -> NatM InstrBlock
2266
2267 assignMem_FltCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2268 assignReg_FltCode :: MachRep -> CmmReg  -> CmmExpr -> NatM InstrBlock
2269
2270 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2271
2272 #if alpha_TARGET_ARCH
2273
2274 assignIntCode pk (CmmLoad dst _) src
2275   = getNewRegNat IntRep             `thenNat` \ tmp ->
2276     getAmode dst                    `thenNat` \ amode ->
2277     getRegister src                 `thenNat` \ register ->
2278     let
2279         code1   = amodeCode amode []
2280         dst__2  = amodeAddr amode
2281         code2   = registerCode register tmp []
2282         src__2  = registerName register tmp
2283         sz      = primRepToSize pk
2284         code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2285     in
2286     return code__2
2287
2288 assignIntCode pk dst src
2289   = getRegister dst                         `thenNat` \ register1 ->
2290     getRegister src                         `thenNat` \ register2 ->
2291     let
2292         dst__2  = registerName register1 zeroh
2293         code    = registerCode register2 dst__2
2294         src__2  = registerName register2 dst__2
2295         code__2 = if isFixed register2
2296                   then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2)
2297                   else code
2298     in
2299     return code__2
2300
2301 #endif /* alpha_TARGET_ARCH */
2302
2303 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2304
2305 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2306
2307 -- integer assignment to memory
2308 assignMem_IntCode pk addr src = do
2309     Amode addr code_addr <- getAmode addr
2310     (code_src, op_src)   <- get_op_RI src
2311     let
2312         code = code_src `appOL`
2313                code_addr `snocOL`
2314                   MOV pk op_src (OpAddr addr)
2315         -- NOTE: op_src is stable, so it will still be valid
2316         -- after code_addr.  This may involve the introduction
2317         -- of an extra MOV to a temporary register, but we hope
2318         -- the register allocator will get rid of it.
2319     --
2320     return code
2321   where
2322     get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand)   -- code, operator
2323     get_op_RI (CmmLit lit) | not (is64BitLit lit)
2324       = return (nilOL, OpImm (litToImm lit))
2325     get_op_RI op
2326       = do (reg,code) <- getNonClobberedReg op
2327            return (code, OpReg reg)
2328
2329
2330 -- Assign; dst is a reg, rhs is mem
2331 assignReg_IntCode pk reg (CmmLoad src _) = do
2332   load_code <- intLoadCode (MOV pk) src
2333   return (load_code (getRegisterReg reg))
2334
2335 -- dst is a reg, but src could be anything
2336 assignReg_IntCode pk reg src = do
2337   code <- getAnyReg src
2338   return (code (getRegisterReg reg))
2339
2340 #endif /* i386_TARGET_ARCH */
2341
2342 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2343
2344 #if sparc_TARGET_ARCH
2345
2346 assignMem_IntCode pk addr src = do
2347     (srcReg, code) <- getSomeReg src
2348     Amode dstAddr addr_code <- getAmode addr
2349     return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2350
2351 assignReg_IntCode pk reg src = do
2352     r <- getRegister src
2353     return $ case r of
2354         Any _ code         -> code dst
2355         Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg dst) freg
2356     where
2357       dst = getRegisterReg reg
2358
2359
2360 #endif /* sparc_TARGET_ARCH */
2361
2362 #if powerpc_TARGET_ARCH
2363
2364 assignMem_IntCode pk addr src = do
2365     (srcReg, code) <- getSomeReg src
2366     Amode dstAddr addr_code <- getAmode addr
2367     return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2368
2369 -- dst is a reg, but src could be anything
2370 assignReg_IntCode pk reg src
2371     = do
2372         r <- getRegister src
2373         return $ case r of
2374             Any _ code         -> code dst
2375             Fixed _ freg fcode -> fcode `snocOL` MR dst freg
2376     where
2377         dst = getRegisterReg reg
2378
2379 #endif /* powerpc_TARGET_ARCH */
2380
2381
2382 -- -----------------------------------------------------------------------------
2383 -- Floating-point assignments
2384
2385 #if alpha_TARGET_ARCH
2386
2387 assignFltCode pk (CmmLoad dst _) src
2388   = getNewRegNat pk                 `thenNat` \ tmp ->
2389     getAmode dst                    `thenNat` \ amode ->
2390     getRegister src                         `thenNat` \ register ->
2391     let
2392         code1   = amodeCode amode []
2393         dst__2  = amodeAddr amode
2394         code2   = registerCode register tmp []
2395         src__2  = registerName register tmp
2396         sz      = primRepToSize pk
2397         code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2398     in
2399     return code__2
2400
2401 assignFltCode pk dst src
2402   = getRegister dst                         `thenNat` \ register1 ->
2403     getRegister src                         `thenNat` \ register2 ->
2404     let
2405         dst__2  = registerName register1 zeroh
2406         code    = registerCode register2 dst__2
2407         src__2  = registerName register2 dst__2
2408         code__2 = if isFixed register2
2409                   then code . mkSeqInstr (FMOV src__2 dst__2)
2410                   else code
2411     in
2412     return code__2
2413
2414 #endif /* alpha_TARGET_ARCH */
2415
2416 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2417
2418 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2419
2420 -- Floating point assignment to memory
2421 assignMem_FltCode pk addr src = do
2422   (src_reg, src_code) <- getNonClobberedReg src
2423   Amode addr addr_code <- getAmode addr
2424   let
2425         code = src_code `appOL`
2426                addr_code `snocOL`
2427                 IF_ARCH_i386(GST pk src_reg addr,
2428                              MOV pk (OpReg src_reg) (OpAddr addr))
2429   return code
2430
2431 -- Floating point assignment to a register/temporary
2432 assignReg_FltCode pk reg src = do
2433   src_code <- getAnyReg src
2434   return (src_code (getRegisterReg reg))
2435
2436 #endif /* i386_TARGET_ARCH */
2437
2438 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2439
2440 #if sparc_TARGET_ARCH
2441
2442 -- Floating point assignment to memory
2443 assignMem_FltCode pk addr src = do
2444     Amode dst__2 code1 <- getAmode addr
2445     (src__2, code2) <- getSomeReg src
2446     tmp1 <- getNewRegNat pk
2447     let
2448         pk__2   = cmmExprRep src
2449         code__2 = code1 `appOL` code2 `appOL`
2450             if   pk == pk__2
2451             then unitOL (ST pk src__2 dst__2)
2452             else toOL [FxTOy pk__2 pk src__2 tmp1, ST pk tmp1 dst__2]
2453     return code__2
2454
2455 -- Floating point assignment to a register/temporary
2456 -- ToDo: Verify correctness
2457 assignReg_FltCode pk reg src = do
2458     r <- getRegister src
2459     v1 <- getNewRegNat pk
2460     return $ case r of
2461         Any _ code         -> code dst
2462         Fixed _ freg fcode -> fcode `snocOL` FMOV pk freg v1
2463     where
2464       dst = getRegisterReg reg
2465
2466 #endif /* sparc_TARGET_ARCH */
2467
2468 #if powerpc_TARGET_ARCH
2469
2470 -- Easy, isn't it?
2471 assignMem_FltCode = assignMem_IntCode
2472 assignReg_FltCode = assignReg_IntCode
2473
2474 #endif /* powerpc_TARGET_ARCH */
2475
2476
2477 -- -----------------------------------------------------------------------------
2478 -- Generating an non-local jump
2479
2480 -- (If applicable) Do not fill the delay slots here; you will confuse the
2481 -- register allocator.
2482
2483 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
2484
2485 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2486
2487 #if alpha_TARGET_ARCH
2488
2489 genJump (CmmLabel lbl)
2490   | isAsmTemp lbl = returnInstr (BR target)
2491   | otherwise     = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0]
2492   where
2493     target = ImmCLbl lbl
2494
2495 genJump tree
2496   = getRegister tree                `thenNat` \ register ->
2497     getNewRegNat PtrRep             `thenNat` \ tmp ->
2498     let
2499         dst    = registerName register pv
2500         code   = registerCode register pv
2501         target = registerName register pv
2502     in
2503     if isFixed register then
2504         returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0]
2505     else
2506     return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0))
2507
2508 #endif /* alpha_TARGET_ARCH */
2509
2510 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2511
2512 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2513
2514 genJump (CmmLoad mem pk) = do
2515   Amode target code <- getAmode mem
2516   return (code `snocOL` JMP (OpAddr target))
2517
2518 genJump (CmmLit lit) = do
2519   return (unitOL (JMP (OpImm (litToImm lit))))
2520
2521 genJump expr = do
2522   (reg,code) <- getSomeReg expr
2523   return (code `snocOL` JMP (OpReg reg))
2524
2525 #endif /* i386_TARGET_ARCH */
2526
2527 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2528
2529 #if sparc_TARGET_ARCH
2530
2531 genJump (CmmLit (CmmLabel lbl))
2532   = return (toOL [CALL (Left target) 0 True, NOP])
2533   where
2534     target = ImmCLbl lbl
2535
2536 genJump tree
2537   = do
2538         (target, code) <- getSomeReg tree
2539         return (code `snocOL` JMP (AddrRegReg target g0)  `snocOL` NOP)
2540
2541 #endif /* sparc_TARGET_ARCH */
2542
2543 #if powerpc_TARGET_ARCH
2544 genJump (CmmLit (CmmLabel lbl))
2545   = return (unitOL $ JMP lbl)
2546
2547 genJump tree
2548   = do
2549         (target,code) <- getSomeReg tree
2550         return (code `snocOL` MTCTR target `snocOL` BCTR [])
2551 #endif /* powerpc_TARGET_ARCH */
2552
2553
2554 -- -----------------------------------------------------------------------------
2555 --  Unconditional branches
2556
2557 genBranch :: BlockId -> NatM InstrBlock
2558
2559 genBranch = return . toOL . mkBranchInstr
2560
2561 -- -----------------------------------------------------------------------------
2562 --  Conditional jumps
2563
2564 {-
2565 Conditional jumps are always to local labels, so we can use branch
2566 instructions.  We peek at the arguments to decide what kind of
2567 comparison to do.
2568
2569 ALPHA: For comparisons with 0, we're laughing, because we can just do
2570 the desired conditional branch.
2571
2572 I386: First, we have to ensure that the condition
2573 codes are set according to the supplied comparison operation.
2574
2575 SPARC: First, we have to ensure that the condition codes are set
2576 according to the supplied comparison operation.  We generate slightly
2577 different code for floating point comparisons, because a floating
2578 point operation cannot directly precede a @BF@.  We assume the worst
2579 and fill that slot with a @NOP@.
2580
2581 SPARC: Do not fill the delay slots here; you will confuse the register
2582 allocator.
2583 -}
2584
2585
2586 genCondJump
2587     :: BlockId      -- the branch target
2588     -> CmmExpr      -- the condition on which to branch
2589     -> NatM InstrBlock
2590
2591 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2592
2593 #if alpha_TARGET_ARCH
2594
2595 genCondJump id (StPrim op [x, StInt 0])
2596   = getRegister x                           `thenNat` \ register ->
2597     getNewRegNat (registerRep register)
2598                                     `thenNat` \ tmp ->
2599     let
2600         code   = registerCode register tmp
2601         value  = registerName register tmp
2602         pk     = registerRep register
2603         target = ImmCLbl lbl
2604     in
2605     returnSeq code [BI (cmpOp op) value target]
2606   where
2607     cmpOp CharGtOp = GTT
2608     cmpOp CharGeOp = GE
2609     cmpOp CharEqOp = EQQ
2610     cmpOp CharNeOp = NE
2611     cmpOp CharLtOp = LTT
2612     cmpOp CharLeOp = LE
2613     cmpOp IntGtOp = GTT
2614     cmpOp IntGeOp = GE
2615     cmpOp IntEqOp = EQQ
2616     cmpOp IntNeOp = NE
2617     cmpOp IntLtOp = LTT
2618     cmpOp IntLeOp = LE
2619     cmpOp WordGtOp = NE
2620     cmpOp WordGeOp = ALWAYS
2621     cmpOp WordEqOp = EQQ
2622     cmpOp WordNeOp = NE
2623     cmpOp WordLtOp = NEVER
2624     cmpOp WordLeOp = EQQ
2625     cmpOp AddrGtOp = NE
2626     cmpOp AddrGeOp = ALWAYS
2627     cmpOp AddrEqOp = EQQ
2628     cmpOp AddrNeOp = NE
2629     cmpOp AddrLtOp = NEVER
2630     cmpOp AddrLeOp = EQQ
2631
2632 genCondJump lbl (StPrim op [x, StDouble 0.0])
2633   = getRegister x                           `thenNat` \ register ->
2634     getNewRegNat (registerRep register)
2635                                     `thenNat` \ tmp ->
2636     let
2637         code   = registerCode register tmp
2638         value  = registerName register tmp
2639         pk     = registerRep register
2640         target = ImmCLbl lbl
2641     in
2642     return (code . mkSeqInstr (BF (cmpOp op) value target))
2643   where
2644     cmpOp FloatGtOp = GTT
2645     cmpOp FloatGeOp = GE
2646     cmpOp FloatEqOp = EQQ
2647     cmpOp FloatNeOp = NE
2648     cmpOp FloatLtOp = LTT
2649     cmpOp FloatLeOp = LE
2650     cmpOp DoubleGtOp = GTT
2651     cmpOp DoubleGeOp = GE
2652     cmpOp DoubleEqOp = EQQ
2653     cmpOp DoubleNeOp = NE
2654     cmpOp DoubleLtOp = LTT
2655     cmpOp DoubleLeOp = LE
2656
2657 genCondJump lbl (StPrim op [x, y])
2658   | fltCmpOp op
2659   = trivialFCode pr instr x y       `thenNat` \ register ->
2660     getNewRegNat F64                `thenNat` \ tmp ->
2661     let
2662         code   = registerCode register tmp
2663         result = registerName register tmp
2664         target = ImmCLbl lbl
2665     in
2666     return (code . mkSeqInstr (BF cond result target))
2667   where
2668     pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
2669
2670     fltCmpOp op = case op of
2671         FloatGtOp -> True
2672         FloatGeOp -> True
2673         FloatEqOp -> True
2674         FloatNeOp -> True
2675         FloatLtOp -> True
2676         FloatLeOp -> True
2677         DoubleGtOp -> True
2678         DoubleGeOp -> True
2679         DoubleEqOp -> True
2680         DoubleNeOp -> True
2681         DoubleLtOp -> True
2682         DoubleLeOp -> True
2683         _ -> False
2684     (instr, cond) = case op of
2685         FloatGtOp -> (FCMP TF LE, EQQ)
2686         FloatGeOp -> (FCMP TF LTT, EQQ)
2687         FloatEqOp -> (FCMP TF EQQ, NE)
2688         FloatNeOp -> (FCMP TF EQQ, EQQ)
2689         FloatLtOp -> (FCMP TF LTT, NE)
2690         FloatLeOp -> (FCMP TF LE, NE)
2691         DoubleGtOp -> (FCMP TF LE, EQQ)
2692         DoubleGeOp -> (FCMP TF LTT, EQQ)
2693         DoubleEqOp -> (FCMP TF EQQ, NE)
2694         DoubleNeOp -> (FCMP TF EQQ, EQQ)
2695         DoubleLtOp -> (FCMP TF LTT, NE)
2696         DoubleLeOp -> (FCMP TF LE, NE)
2697
2698 genCondJump lbl (StPrim op [x, y])
2699   = trivialCode instr x y           `thenNat` \ register ->
2700     getNewRegNat IntRep             `thenNat` \ tmp ->
2701     let
2702         code   = registerCode register tmp
2703         result = registerName register tmp
2704         target = ImmCLbl lbl
2705     in
2706     return (code . mkSeqInstr (BI cond result target))
2707   where
2708     (instr, cond) = case op of
2709         CharGtOp -> (CMP LE, EQQ)
2710         CharGeOp -> (CMP LTT, EQQ)
2711         CharEqOp -> (CMP EQQ, NE)
2712         CharNeOp -> (CMP EQQ, EQQ)
2713         CharLtOp -> (CMP LTT, NE)
2714         CharLeOp -> (CMP LE, NE)
2715         IntGtOp -> (CMP LE, EQQ)
2716         IntGeOp -> (CMP LTT, EQQ)
2717         IntEqOp -> (CMP EQQ, NE)
2718         IntNeOp -> (CMP EQQ, EQQ)
2719         IntLtOp -> (CMP LTT, NE)
2720         IntLeOp -> (CMP LE, NE)
2721         WordGtOp -> (CMP ULE, EQQ)
2722         WordGeOp -> (CMP ULT, EQQ)
2723         WordEqOp -> (CMP EQQ, NE)
2724         WordNeOp -> (CMP EQQ, EQQ)
2725         WordLtOp -> (CMP ULT, NE)
2726         WordLeOp -> (CMP ULE, NE)
2727         AddrGtOp -> (CMP ULE, EQQ)
2728         AddrGeOp -> (CMP ULT, EQQ)
2729         AddrEqOp -> (CMP EQQ, NE)
2730         AddrNeOp -> (CMP EQQ, EQQ)
2731         AddrLtOp -> (CMP ULT, NE)
2732         AddrLeOp -> (CMP ULE, NE)
2733
2734 #endif /* alpha_TARGET_ARCH */
2735
2736 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2737
2738 #if i386_TARGET_ARCH
2739
2740 genCondJump id bool = do
2741   CondCode _ cond code <- getCondCode bool
2742   return (code `snocOL` JXX cond id)
2743
2744 #endif
2745
2746 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2747
2748 #if x86_64_TARGET_ARCH
2749
2750 genCondJump id bool = do
2751   CondCode is_float cond cond_code <- getCondCode bool
2752   if not is_float
2753     then
2754         return (cond_code `snocOL` JXX cond id)
2755     else do
2756         lbl <- getBlockIdNat
2757
2758         -- see comment with condFltReg
2759         let code = case cond of
2760                         NE  -> or_unordered
2761                         GU  -> plain_test
2762                         GEU -> plain_test
2763                         _   -> and_ordered
2764
2765             plain_test = unitOL (
2766                   JXX cond id
2767                 )
2768             or_unordered = toOL [
2769                   JXX cond id,
2770                   JXX PARITY id
2771                 ]
2772             and_ordered = toOL [
2773                   JXX PARITY lbl,
2774                   JXX cond id,
2775                   JXX ALWAYS lbl,
2776                   NEWBLOCK lbl
2777                 ]
2778         return (cond_code `appOL` code)
2779
2780 #endif
2781
2782 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2783
2784 #if sparc_TARGET_ARCH
2785
2786 genCondJump (BlockId id) bool = do
2787   CondCode is_float cond code <- getCondCode bool
2788   return (
2789        code `appOL`
2790        toOL (
2791          if   is_float
2792          then [NOP, BF cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2793          else [BI cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2794        )
2795     )
2796
2797 #endif /* sparc_TARGET_ARCH */
2798
2799
2800 #if powerpc_TARGET_ARCH
2801
2802 genCondJump id bool = do
2803   CondCode is_float cond code <- getCondCode bool
2804   return (code `snocOL` BCC cond id)
2805
2806 #endif /* powerpc_TARGET_ARCH */
2807
2808
2809 -- -----------------------------------------------------------------------------
2810 --  Generating C calls
2811
2812 -- Now the biggest nightmare---calls.  Most of the nastiness is buried in
2813 -- @get_arg@, which moves the arguments to the correct registers/stack
2814 -- locations.  Apart from that, the code is easy.
2815 --
2816 -- (If applicable) Do not fill the delay slots here; you will confuse the
2817 -- register allocator.
2818
2819 genCCall
2820     :: CmmCallTarget            -- function to call
2821     -> [(CmmReg,MachHint)]      -- where to put the result
2822     -> [(CmmExpr,MachHint)]     -- arguments (of mixed type)
2823     -> Maybe [GlobalReg]        -- volatile regs to save
2824     -> NatM InstrBlock
2825
2826 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2827
2828 #if alpha_TARGET_ARCH
2829
2830 ccallResultRegs =
2831
2832 genCCall fn cconv result_regs args
2833   = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args
2834                           `thenNat` \ ((unused,_), argCode) ->
2835     let
2836         nRegs = length allArgRegs - length unused
2837         code = asmSeqThen (map ($ []) argCode)
2838     in
2839         returnSeq code [
2840             LDA pv (AddrImm (ImmLab (ptext fn))),
2841             JSR ra (AddrReg pv) nRegs,
2842             LDGP gp (AddrReg ra)]
2843   where
2844     ------------------------
2845     {-  Try to get a value into a specific register (or registers) for
2846         a call.  The first 6 arguments go into the appropriate
2847         argument register (separate registers for integer and floating
2848         point arguments, but used in lock-step), and the remaining
2849         arguments are dumped to the stack, beginning at 0(sp).  Our
2850         first argument is a pair of the list of remaining argument
2851         registers to be assigned for this call and the next stack
2852         offset to use for overflowing arguments.  This way,
2853         @get_Arg@ can be applied to all of a call's arguments using
2854         @mapAccumLNat@.
2855     -}
2856     get_arg
2857         :: ([(Reg,Reg)], Int)   -- Argument registers and stack offset (accumulator)
2858         -> StixTree             -- Current argument
2859         -> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code
2860
2861     -- We have to use up all of our argument registers first...
2862
2863     get_arg ((iDst,fDst):dsts, offset) arg
2864       = getRegister arg                     `thenNat` \ register ->
2865         let
2866             reg  = if isFloatingRep pk then fDst else iDst
2867             code = registerCode register reg
2868             src  = registerName register reg
2869             pk   = registerRep register
2870         in
2871         return (
2872             if isFloatingRep pk then
2873                 ((dsts, offset), if isFixed register then
2874                     code . mkSeqInstr (FMOV src fDst)
2875                     else code)
2876             else
2877                 ((dsts, offset), if isFixed register then
2878                     code . mkSeqInstr (OR src (RIReg src) iDst)
2879                     else code))
2880
2881     -- Once we have run out of argument registers, we move to the
2882     -- stack...
2883
2884     get_arg ([], offset) arg
2885       = getRegister arg                 `thenNat` \ register ->
2886         getNewRegNat (registerRep register)
2887                                         `thenNat` \ tmp ->
2888         let
2889             code = registerCode register tmp
2890             src  = registerName register tmp
2891             pk   = registerRep register
2892             sz   = primRepToSize pk
2893         in
2894         return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset)))
2895
2896 #endif /* alpha_TARGET_ARCH */
2897
2898 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2899
2900 #if i386_TARGET_ARCH
2901
2902 -- we only cope with a single result for foreign calls
2903 genCCall (CmmPrim op) [(r,_)] args vols = do
2904   case op of
2905         MO_F32_Sqrt -> actuallyInlineFloatOp F32  (GSQRT F32) args
2906         MO_F64_Sqrt -> actuallyInlineFloatOp F64 (GSQRT F64) args
2907
2908         MO_F32_Sin  -> actuallyInlineFloatOp F32  (GSIN F32) args
2909         MO_F64_Sin  -> actuallyInlineFloatOp F64 (GSIN F64) args
2910
2911         MO_F32_Cos  -> actuallyInlineFloatOp F32  (GCOS F32) args
2912         MO_F64_Cos  -> actuallyInlineFloatOp F64 (GCOS F64) args
2913
2914         MO_F32_Tan  -> actuallyInlineFloatOp F32  (GTAN F32) args
2915         MO_F64_Tan  -> actuallyInlineFloatOp F64 (GTAN F64) args
2916
2917         other_op    -> outOfLineFloatOp op r args vols
2918  where
2919   actuallyInlineFloatOp rep instr [(x,_)]
2920         = do res <- trivialUFCode rep instr x
2921              any <- anyReg res
2922              return (any (getRegisterReg r))
2923
2924 genCCall target dest_regs args vols = do
2925     let
2926         sizes               = map (arg_size . cmmExprRep . fst) (reverse args)
2927 #if !darwin_TARGET_OS
2928         tot_arg_size        = sum sizes
2929 #else
2930         raw_arg_size        = sum sizes
2931         tot_arg_size        = roundTo 16 raw_arg_size
2932         arg_pad_size        = tot_arg_size - raw_arg_size
2933     delta0 <- getDeltaNat
2934     setDeltaNat (delta0 - arg_pad_size)
2935 #endif
2936
2937     push_codes <- mapM push_arg (reverse args)
2938     delta <- getDeltaNat
2939
2940     -- in
2941     -- deal with static vs dynamic call targets
2942     (callinsns,cconv) <-
2943       case target of
2944         -- CmmPrim -> ...
2945         CmmForeignCall (CmmLit (CmmLabel lbl)) conv
2946            -> -- ToDo: stdcall arg sizes
2947               return (unitOL (CALL (Left fn_imm) []), conv)
2948            where fn_imm = ImmCLbl lbl
2949         CmmForeignCall expr conv
2950            -> do (dyn_c, dyn_r, dyn_rep) <- get_op expr
2951                  ASSERT(dyn_rep == I32)
2952                   return (dyn_c `snocOL` CALL (Right dyn_r) [], conv)
2953
2954     let push_code
2955 #if darwin_TARGET_OS
2956             | arg_pad_size /= 0
2957             = toOL [SUB I32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
2958                     DELTA (delta0 - arg_pad_size)]
2959               `appOL` concatOL push_codes
2960             | otherwise
2961 #endif
2962             = concatOL push_codes
2963         call = callinsns `appOL`
2964                toOL (
2965                         -- Deallocate parameters after call for ccall;
2966                         -- but not for stdcall (callee does it)
2967                   (if cconv == StdCallConv || tot_arg_size==0 then [] else
2968                    [ADD I32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
2969                   ++
2970                   [DELTA (delta + tot_arg_size)]
2971                )
2972     -- in
2973     setDeltaNat (delta + tot_arg_size)
2974
2975     let
2976         -- assign the results, if necessary
2977         assign_code []     = nilOL
2978         assign_code [(dest,_hint)] =
2979           case rep of
2980                 I64 -> toOL [MOV I32 (OpReg eax) (OpReg r_dest),
2981                              MOV I32 (OpReg edx) (OpReg r_dest_hi)]
2982                 F32 -> unitOL (GMOV fake0 r_dest)
2983                 F64 -> unitOL (GMOV fake0 r_dest)
2984                 rep -> unitOL (MOV rep (OpReg eax) (OpReg r_dest))
2985           where
2986                 r_dest_hi = getHiVRegFromLo r_dest
2987                 rep = cmmRegRep dest
2988                 r_dest = getRegisterReg dest
2989         assign_code many = panic "genCCall.assign_code many"
2990
2991     return (push_code `appOL`
2992             call `appOL`
2993             assign_code dest_regs)
2994
2995   where
2996     arg_size F64 = 8
2997     arg_size F32 = 4
2998     arg_size I64 = 8
2999     arg_size _   = 4
3000
3001     roundTo a x | x `mod` a == 0 = x
3002                 | otherwise = x + a - (x `mod` a)
3003
3004
3005     push_arg :: (CmmExpr,MachHint){-current argument-}
3006                     -> NatM InstrBlock  -- code
3007
3008     push_arg (arg,_hint) -- we don't need the hints on x86
3009       | arg_rep == I64 = do
3010         ChildCode64 code r_lo <- iselExpr64 arg
3011         delta <- getDeltaNat
3012         setDeltaNat (delta - 8)
3013         let
3014             r_hi = getHiVRegFromLo r_lo
3015         -- in
3016         return (       code `appOL`
3017                        toOL [PUSH I32 (OpReg r_hi), DELTA (delta - 4),
3018                              PUSH I32 (OpReg r_lo), DELTA (delta - 8),
3019                              DELTA (delta-8)]
3020             )
3021
3022       | otherwise = do
3023         (code, reg, sz) <- get_op arg
3024         delta <- getDeltaNat
3025         let size = arg_size sz
3026         setDeltaNat (delta-size)
3027         if (case sz of F64 -> True; F32 -> True; _ -> False)
3028            then return (code `appOL`
3029                         toOL [SUB I32 (OpImm (ImmInt size)) (OpReg esp),
3030                               DELTA (delta-size),
3031                               GST sz reg (AddrBaseIndex (EABaseReg esp)
3032                                                         EAIndexNone
3033                                                         (ImmInt 0))]
3034                        )
3035            else return (code `snocOL`
3036                         PUSH I32 (OpReg reg) `snocOL`
3037                         DELTA (delta-size)
3038                        )
3039       where
3040          arg_rep = cmmExprRep arg
3041
3042     ------------
3043     get_op :: CmmExpr -> NatM (InstrBlock, Reg, MachRep) -- code, reg, size
3044     get_op op = do
3045         (reg,code) <- getSomeReg op
3046         return (code, reg, cmmExprRep op)
3047
3048 #endif /* i386_TARGET_ARCH */
3049
3050 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3051
3052 outOfLineFloatOp :: CallishMachOp -> CmmReg -> [(CmmExpr,MachHint)]
3053   -> Maybe [GlobalReg] -> NatM InstrBlock
3054 outOfLineFloatOp mop res args vols
3055   = do
3056       targetExpr <- cmmMakeDynamicReference addImportNat True lbl
3057       let target = CmmForeignCall targetExpr CCallConv
3058
3059       if cmmRegRep res == F64
3060         then
3061           stmtToInstrs (CmmCall target [(res,FloatHint)] args vols)
3062         else do
3063           uq <- getUniqueNat
3064           let
3065             tmp = CmmLocal (LocalReg uq F64)
3066           -- in
3067           code1 <- stmtToInstrs (CmmCall target [(tmp,FloatHint)] args vols)
3068           code2 <- stmtToInstrs (CmmAssign res (CmmReg tmp))
3069           return (code1 `appOL` code2)
3070   where
3071         lbl = mkForeignLabel fn Nothing True
3072
3073         fn = case mop of
3074               MO_F32_Sqrt  -> FSLIT("sqrtf")
3075               MO_F32_Sin   -> FSLIT("sinf")
3076               MO_F32_Cos   -> FSLIT("cosf")
3077               MO_F32_Tan   -> FSLIT("tanf")
3078               MO_F32_Exp   -> FSLIT("expf")
3079               MO_F32_Log   -> FSLIT("logf")
3080
3081               MO_F32_Asin  -> FSLIT("asinf")
3082               MO_F32_Acos  -> FSLIT("acosf")
3083               MO_F32_Atan  -> FSLIT("atanf")
3084
3085               MO_F32_Sinh  -> FSLIT("sinhf")
3086               MO_F32_Cosh  -> FSLIT("coshf")
3087               MO_F32_Tanh  -> FSLIT("tanhf")
3088               MO_F32_Pwr   -> FSLIT("powf")
3089
3090               MO_F64_Sqrt  -> FSLIT("sqrt")
3091               MO_F64_Sin   -> FSLIT("sin")
3092               MO_F64_Cos   -> FSLIT("cos")
3093               MO_F64_Tan   -> FSLIT("tan")
3094               MO_F64_Exp   -> FSLIT("exp")
3095               MO_F64_Log   -> FSLIT("log")
3096
3097               MO_F64_Asin  -> FSLIT("asin")
3098               MO_F64_Acos  -> FSLIT("acos")
3099               MO_F64_Atan  -> FSLIT("atan")
3100
3101               MO_F64_Sinh  -> FSLIT("sinh")
3102               MO_F64_Cosh  -> FSLIT("cosh")
3103               MO_F64_Tanh  -> FSLIT("tanh")
3104               MO_F64_Pwr   -> FSLIT("pow")
3105
3106 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
3107
3108 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3109
3110 #if x86_64_TARGET_ARCH
3111
3112 genCCall (CmmPrim op) [(r,_)] args vols =
3113   outOfLineFloatOp op r args vols
3114
3115 genCCall target dest_regs args vols = do
3116
3117         -- load up the register arguments
3118     (stack_args, aregs, fregs, load_args_code)
3119          <- load_args args allArgRegs allFPArgRegs nilOL
3120
3121     let
3122         fp_regs_used  = reverse (drop (length fregs) (reverse allFPArgRegs))
3123         int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
3124         arg_regs = int_regs_used ++ fp_regs_used
3125                 -- for annotating the call instruction with
3126
3127         sse_regs = length fp_regs_used
3128
3129         tot_arg_size = arg_size * length stack_args
3130
3131         -- On entry to the called function, %rsp should be aligned
3132         -- on a 16-byte boundary +8 (i.e. the first stack arg after
3133         -- the return address is 16-byte aligned).  In STG land
3134         -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
3135         -- need to make sure we push a multiple of 16-bytes of args,
3136         -- plus the return address, to get the correct alignment.
3137         -- Urg, this is hard.  We need to feed the delta back into
3138         -- the arg pushing code.
3139     (real_size, adjust_rsp) <-
3140         if tot_arg_size `rem` 16 == 0
3141             then return (tot_arg_size, nilOL)
3142             else do -- we need to adjust...
3143                 delta <- getDeltaNat
3144                 setDeltaNat (delta-8)
3145                 return (tot_arg_size+8, toOL [
3146                                 SUB I64 (OpImm (ImmInt 8)) (OpReg rsp),
3147                                 DELTA (delta-8)
3148                         ])
3149
3150         -- push the stack args, right to left
3151     push_code <- push_args (reverse stack_args) nilOL
3152     delta <- getDeltaNat
3153
3154     -- deal with static vs dynamic call targets
3155     (callinsns,cconv) <-
3156       case target of
3157         -- CmmPrim -> ...
3158         CmmForeignCall (CmmLit (CmmLabel lbl)) conv
3159            -> -- ToDo: stdcall arg sizes
3160               return (unitOL (CALL (Left fn_imm) arg_regs), conv)
3161            where fn_imm = ImmCLbl lbl
3162         CmmForeignCall expr conv
3163            -> do (dyn_r, dyn_c) <- getSomeReg expr
3164                  return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
3165
3166     let
3167         -- The x86_64 ABI requires us to set %al to the number of SSE
3168         -- registers that contain arguments, if the called routine
3169         -- is a varargs function.  We don't know whether it's a
3170         -- varargs function or not, so we have to assume it is.
3171         --
3172         -- It's not safe to omit this assignment, even if the number
3173         -- of SSE regs in use is zero.  If %al is larger than 8
3174         -- on entry to a varargs function, seg faults ensue.
3175         assign_eax n = unitOL (MOV I32 (OpImm (ImmInt n)) (OpReg eax))
3176
3177     let call = callinsns `appOL`
3178                toOL (
3179                         -- Deallocate parameters after call for ccall;
3180                         -- but not for stdcall (callee does it)
3181                   (if cconv == StdCallConv || real_size==0 then [] else
3182                    [ADD wordRep (OpImm (ImmInt real_size)) (OpReg esp)])
3183                   ++
3184                   [DELTA (delta + real_size)]
3185                )
3186     -- in
3187     setDeltaNat (delta + real_size)
3188
3189     let
3190         -- assign the results, if necessary
3191         assign_code []     = nilOL
3192         assign_code [(dest,_hint)] =
3193           case rep of
3194                 F32 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3195                 F64 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3196                 rep -> unitOL (MOV rep (OpReg rax) (OpReg r_dest))
3197           where
3198                 rep = cmmRegRep dest
3199                 r_dest = getRegisterReg dest
3200         assign_code many = panic "genCCall.assign_code many"
3201
3202     return (load_args_code      `appOL`
3203             adjust_rsp          `appOL`
3204             push_code           `appOL`
3205             assign_eax sse_regs `appOL`
3206             call                `appOL`
3207             assign_code dest_regs)
3208
3209   where
3210     arg_size = 8 -- always, at the mo
3211
3212     load_args :: [(CmmExpr,MachHint)]
3213               -> [Reg]                  -- int regs avail for args
3214               -> [Reg]                  -- FP regs avail for args
3215               -> InstrBlock
3216               -> NatM ([(CmmExpr,MachHint)],[Reg],[Reg],InstrBlock)
3217     load_args args [] [] code     =  return (args, [], [], code)
3218         -- no more regs to use
3219     load_args [] aregs fregs code =  return ([], aregs, fregs, code)
3220         -- no more args to push
3221     load_args ((arg,hint) : rest) aregs fregs code
3222         | isFloatingRep arg_rep =
3223         case fregs of
3224           [] -> push_this_arg
3225           (r:rs) -> do
3226              arg_code <- getAnyReg arg
3227              load_args rest aregs rs (code `appOL` arg_code r)
3228         | otherwise =
3229         case aregs of
3230           [] -> push_this_arg
3231           (r:rs) -> do
3232              arg_code <- getAnyReg arg
3233              load_args rest rs fregs (code `appOL` arg_code r)
3234         where
3235           arg_rep = cmmExprRep arg
3236
3237           push_this_arg = do
3238             (args',ars,frs,code') <- load_args rest aregs fregs code
3239             return ((arg,hint):args', ars, frs, code')
3240
3241     push_args [] code = return code
3242     push_args ((arg,hint):rest) code
3243        | isFloatingRep arg_rep = do
3244          (arg_reg, arg_code) <- getSomeReg arg
3245          delta <- getDeltaNat
3246          setDeltaNat (delta-arg_size)
3247          let code' = code `appOL` toOL [
3248                         MOV arg_rep (OpReg arg_reg) (OpAddr  (spRel 0)),
3249                         SUB wordRep (OpImm (ImmInt arg_size)) (OpReg rsp) ,
3250                         DELTA (delta-arg_size)]
3251          push_args rest code'
3252
3253        | otherwise = do
3254        -- we only ever generate word-sized function arguments.  Promotion
3255        -- has already happened: our Int8# type is kept sign-extended
3256        -- in an Int#, for example.
3257          ASSERT(arg_rep == I64) return ()
3258          (arg_op, arg_code) <- getOperand arg
3259          delta <- getDeltaNat
3260          setDeltaNat (delta-arg_size)
3261          let code' = code `appOL` toOL [PUSH I64 arg_op,
3262                                         DELTA (delta-arg_size)]
3263          push_args rest code'
3264         where
3265           arg_rep = cmmExprRep arg
3266 #endif
3267
3268 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3269
3270 #if sparc_TARGET_ARCH
3271 {-
3272    The SPARC calling convention is an absolute
3273    nightmare.  The first 6x32 bits of arguments are mapped into
3274    %o0 through %o5, and the remaining arguments are dumped to the
3275    stack, beginning at [%sp+92].  (Note that %o6 == %sp.)
3276
3277    If we have to put args on the stack, move %o6==%sp down by
3278    the number of words to go on the stack, to ensure there's enough space.
3279
3280    According to Fraser and Hanson's lcc book, page 478, fig 17.2,
3281    16 words above the stack pointer is a word for the address of
3282    a structure return value.  I use this as a temporary location
3283    for moving values from float to int regs.  Certainly it isn't
3284    safe to put anything in the 16 words starting at %sp, since
3285    this area can get trashed at any time due to window overflows
3286    caused by signal handlers.
3287
3288    A final complication (if the above isn't enough) is that
3289    we can't blithely calculate the arguments one by one into
3290    %o0 .. %o5.  Consider the following nested calls:
3291
3292        fff a (fff b c)
3293
3294    Naive code moves a into %o0, and (fff b c) into %o1.  Unfortunately
3295    the inner call will itself use %o0, which trashes the value put there
3296    in preparation for the outer call.  Upshot: we need to calculate the
3297    args into temporary regs, and move those to arg regs or onto the
3298    stack only immediately prior to the call proper.  Sigh.
3299 -}
3300
3301 genCCall target dest_regs argsAndHints vols = do
3302     let
3303         args = map fst argsAndHints
3304     argcode_and_vregs <- mapM arg_to_int_vregs args
3305     let
3306         (argcodes, vregss) = unzip argcode_and_vregs
3307         n_argRegs          = length allArgRegs
3308         n_argRegs_used     = min (length vregs) n_argRegs
3309         vregs              = concat vregss
3310     -- deal with static vs dynamic call targets
3311     callinsns <- (case target of
3312         CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> do
3313                 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3314         CmmForeignCall expr conv -> do
3315                 (dyn_c, [dyn_r]) <- arg_to_int_vregs expr
3316                 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3317         CmmPrim mop -> do
3318                   (res, reduce) <- outOfLineFloatOp mop
3319                   lblOrMopExpr <- case res of
3320                        Left lbl -> do
3321                             return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3322                        Right mopExpr -> do
3323                             (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
3324                             return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3325                   if reduce then panic "genCCall(sparc): can not reduce" else return lblOrMopExpr
3326
3327       )
3328     let
3329         argcode = concatOL argcodes
3330         (move_sp_down, move_sp_up)
3331            = let diff = length vregs - n_argRegs
3332                  nn   = if odd diff then diff + 1 else diff -- keep 8-byte alignment
3333              in  if   nn <= 0
3334                  then (nilOL, nilOL)
3335                  else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
3336         transfer_code
3337            = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
3338     return (argcode       `appOL`
3339             move_sp_down  `appOL`
3340             transfer_code `appOL`
3341             callinsns     `appOL`
3342             unitOL NOP    `appOL`
3343             move_sp_up)
3344   where
3345      -- move args from the integer vregs into which they have been
3346      -- marshalled, into %o0 .. %o5, and the rest onto the stack.
3347      move_final :: [Reg] -> [Reg] -> Int -> [Instr]
3348
3349      move_final [] _ offset          -- all args done
3350         = []
3351
3352      move_final (v:vs) [] offset     -- out of aregs; move to stack
3353         = ST I32 v (spRel offset)
3354           : move_final vs [] (offset+1)
3355
3356      move_final (v:vs) (a:az) offset -- move into an arg (%o[0..5]) reg
3357         = OR False g0 (RIReg v) a
3358           : move_final vs az offset
3359
3360      -- generate code to calculate an argument, and move it into one
3361      -- or two integer vregs.
3362      arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
3363      arg_to_int_vregs arg
3364         | (cmmExprRep arg) == I64
3365         = do
3366           (ChildCode64 code r_lo) <- iselExpr64 arg
3367           let
3368               r_hi = getHiVRegFromLo r_lo
3369           return (code, [r_hi, r_lo])
3370         | otherwise
3371         = do
3372           (src, code) <- getSomeReg arg
3373           tmp <- getNewRegNat (cmmExprRep arg)
3374           let
3375               pk   = cmmExprRep arg
3376           case pk of
3377              F64 -> do
3378                       v1 <- getNewRegNat I32
3379                       v2 <- getNewRegNat I32
3380                       return (
3381                         code                          `snocOL`
3382                         FMOV F64 src f0                `snocOL`
3383                         ST   F32  f0 (spRel 16)         `snocOL`
3384                         LD   I32  (spRel 16) v1         `snocOL`
3385                         ST   F32  (fPair f0) (spRel 16) `snocOL`
3386                         LD   I32  (spRel 16) v2
3387                         ,
3388                         [v1,v2]
3389                        )
3390              F32 -> do
3391                       v1 <- getNewRegNat I32
3392                       return (
3393                         code                    `snocOL`
3394                         ST   F32  src (spRel 16)  `snocOL`
3395                         LD   I32  (spRel 16) v1
3396                         ,
3397                         [v1]
3398                        )
3399              other -> do
3400                         v1 <- getNewRegNat I32
3401                         return (
3402                           code `snocOL` OR False g0 (RIReg src) v1
3403                           ,
3404                           [v1]
3405                          )
3406 outOfLineFloatOp mop =
3407     do
3408       mopExpr <- cmmMakeDynamicReference addImportNat True $
3409                   mkForeignLabel functionName Nothing True
3410       let mopLabelOrExpr = case mopExpr of
3411                         CmmLit (CmmLabel lbl) -> Left lbl
3412                         _ -> Right mopExpr
3413       return (mopLabelOrExpr, reduce)
3414             where
3415                 (reduce, functionName) = case mop of
3416                   MO_F32_Exp    -> (True,  FSLIT("exp"))
3417                   MO_F32_Log    -> (True,  FSLIT("log"))
3418                   MO_F32_Sqrt   -> (True,  FSLIT("sqrt"))
3419
3420                   MO_F32_Sin    -> (True,  FSLIT("sin"))
3421                   MO_F32_Cos    -> (True,  FSLIT("cos"))
3422                   MO_F32_Tan    -> (True,  FSLIT("tan"))
3423
3424                   MO_F32_Asin   -> (True,  FSLIT("asin"))
3425                   MO_F32_Acos   -> (True,  FSLIT("acos"))
3426                   MO_F32_Atan   -> (True,  FSLIT("atan"))
3427
3428                   MO_F32_Sinh   -> (True,  FSLIT("sinh"))
3429                   MO_F32_Cosh   -> (True,  FSLIT("cosh"))
3430                   MO_F32_Tanh   -> (True,  FSLIT("tanh"))
3431
3432                   MO_F64_Exp    -> (False, FSLIT("exp"))
3433                   MO_F64_Log    -> (False, FSLIT("log"))
3434                   MO_F64_Sqrt   -> (False, FSLIT("sqrt"))
3435
3436                   MO_F64_Sin    -> (False, FSLIT("sin"))
3437                   MO_F64_Cos    -> (False, FSLIT("cos"))
3438                   MO_F64_Tan    -> (False, FSLIT("tan"))
3439
3440                   MO_F64_Asin   -> (False, FSLIT("asin"))
3441                   MO_F64_Acos   -> (False, FSLIT("acos"))
3442                   MO_F64_Atan   -> (False, FSLIT("atan"))
3443
3444                   MO_F64_Sinh   -> (False, FSLIT("sinh"))
3445                   MO_F64_Cosh   -> (False, FSLIT("cosh"))
3446                   MO_F64_Tanh   -> (False, FSLIT("tanh"))
3447
3448                   other -> pprPanic "outOfLineFloatOp(sparc) "
3449                                 (pprCallishMachOp mop)
3450
3451 #endif /* sparc_TARGET_ARCH */
3452
3453 #if powerpc_TARGET_ARCH
3454
3455 #if darwin_TARGET_OS || linux_TARGET_OS
3456 {-
3457     The PowerPC calling convention for Darwin/Mac OS X
3458     is described in Apple's document
3459     "Inside Mac OS X - Mach-O Runtime Architecture".
3460
3461     PowerPC Linux uses the System V Release 4 Calling Convention
3462     for PowerPC. It is described in the
3463     "System V Application Binary Interface PowerPC Processor Supplement".
3464
3465     Both conventions are similar:
3466     Parameters may be passed in general-purpose registers starting at r3, in
3467     floating point registers starting at f1, or on the stack.
3468
3469     But there are substantial differences:
3470     * The number of registers used for parameter passing and the exact set of
3471       nonvolatile registers differs (see MachRegs.lhs).
3472     * On Darwin, stack space is always reserved for parameters, even if they are
3473       passed in registers. The called routine may choose to save parameters from
3474       registers to the corresponding space on the stack.
3475     * On Darwin, a corresponding amount of GPRs is skipped when a floating point
3476       parameter is passed in an FPR.
3477     * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
3478       starting with an odd-numbered GPR. It may skip a GPR to achieve this.
3479       Darwin just treats an I64 like two separate I32s (high word first).
3480     * I64 and F64 arguments are 8-byte aligned on the stack for SysV, but only
3481       4-byte aligned like everything else on Darwin.
3482     * The SysV spec claims that F32 is represented as F64 on the stack. GCC on
3483       PowerPC Linux does not agree, so neither do we.
3484
3485     According to both conventions, The parameter area should be part of the
3486     caller's stack frame, allocated in the caller's prologue code (large enough
3487     to hold the parameter lists for all called routines). The NCG already
3488     uses the stack for register spilling, leaving 64 bytes free at the top.
3489     If we need a larger parameter area than that, we just allocate a new stack
3490     frame just before ccalling.
3491 -}
3492
3493 genCCall target dest_regs argsAndHints vols
3494   = ASSERT (not $ any (`elem` [I8,I16]) argReps)
3495         -- we rely on argument promotion in the codeGen
3496     do
3497         (finalStack,passArgumentsCode,usedRegs) <- passArguments
3498                                                         (zip args argReps)
3499                                                         allArgRegs allFPArgRegs
3500                                                         initialStackOffset
3501                                                         (toOL []) []
3502
3503         (labelOrExpr, reduceToF32) <- case target of
3504             CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
3505             CmmForeignCall expr conv -> return  (Right expr, False)
3506             CmmPrim mop -> outOfLineFloatOp mop
3507
3508         let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
3509             codeAfter = move_sp_up finalStack `appOL` moveResult reduceToF32
3510
3511         case labelOrExpr of
3512             Left lbl -> do
3513                 return (         codeBefore
3514                         `snocOL` BL lbl usedRegs
3515                         `appOL`  codeAfter)
3516             Right dyn -> do
3517                 (dynReg, dynCode) <- getSomeReg dyn
3518                 return (         dynCode
3519                         `snocOL` MTCTR dynReg
3520                         `appOL`  codeBefore
3521                         `snocOL` BCTRL usedRegs
3522                         `appOL`  codeAfter)
3523     where
3524 #if darwin_TARGET_OS
3525         initialStackOffset = 24
3526             -- size of linkage area + size of arguments, in bytes
3527         stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
3528                                        map machRepByteWidth argReps
3529 #elif linux_TARGET_OS
3530         initialStackOffset = 8
3531         stackDelta finalStack = roundTo 16 finalStack
3532 #endif
3533         args = map fst argsAndHints
3534         argReps = map cmmExprRep args
3535
3536         roundTo a x | x `mod` a == 0 = x
3537                     | otherwise = x + a - (x `mod` a)
3538
3539         move_sp_down finalStack
3540                | delta > 64 =
3541                         toOL [STU I32 sp (AddrRegImm sp (ImmInt (-delta))),
3542                               DELTA (-delta)]
3543                | otherwise = nilOL
3544                where delta = stackDelta finalStack
3545         move_sp_up finalStack
3546                | delta > 64 =
3547                         toOL [ADD sp sp (RIImm (ImmInt delta)),
3548                               DELTA 0]
3549                | otherwise = nilOL
3550                where delta = stackDelta finalStack
3551
3552
3553         passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
3554         passArguments ((arg,I64):args) gprs fprs stackOffset
3555                accumCode accumUsed =
3556             do
3557                 ChildCode64 code vr_lo <- iselExpr64 arg
3558                 let vr_hi = getHiVRegFromLo vr_lo
3559
3560 #if darwin_TARGET_OS
3561                 passArguments args
3562                               (drop 2 gprs)
3563                               fprs
3564                               (stackOffset+8)
3565                               (accumCode `appOL` code
3566                                     `snocOL` storeWord vr_hi gprs stackOffset
3567                                     `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
3568                               ((take 2 gprs) ++ accumUsed)
3569             where
3570                 storeWord vr (gpr:_) offset = MR gpr vr
3571                 storeWord vr [] offset = ST I32 vr (AddrRegImm sp (ImmInt offset))
3572
3573 #elif linux_TARGET_OS
3574                 let stackOffset' = roundTo 8 stackOffset
3575                     stackCode = accumCode `appOL` code
3576                         `snocOL` ST I32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
3577                         `snocOL` ST I32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
3578                     regCode hireg loreg =
3579                         accumCode `appOL` code
3580                             `snocOL` MR hireg vr_hi
3581                             `snocOL` MR loreg vr_lo
3582
3583                 case gprs of
3584                     hireg : loreg : regs | even (length gprs) ->
3585                         passArguments args regs fprs stackOffset
3586                                       (regCode hireg loreg) (hireg : loreg : accumUsed)
3587                     _skipped : hireg : loreg : regs ->
3588                         passArguments args regs fprs stackOffset
3589                                       (regCode hireg loreg) (hireg : loreg : accumUsed)
3590                     _ -> -- only one or no regs left
3591                         passArguments args [] fprs (stackOffset'+8)
3592                                       stackCode accumUsed
3593 #endif
3594
3595         passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
3596             | reg : _ <- regs = do
3597                 register <- getRegister arg
3598                 let code = case register of
3599                             Fixed _ freg fcode -> fcode `snocOL` MR reg freg
3600                             Any _ acode -> acode reg
3601                 passArguments args
3602                               (drop nGprs gprs)
3603                               (drop nFprs fprs)
3604 #if darwin_TARGET_OS
3605         -- The Darwin ABI requires that we reserve stack slots for register parameters
3606                               (stackOffset + stackBytes)
3607 #elif linux_TARGET_OS
3608         -- ... the SysV ABI doesn't.
3609                               stackOffset
3610 #endif
3611                               (accumCode `appOL` code)
3612                               (reg : accumUsed)
3613             | otherwise = do
3614                 (vr, code) <- getSomeReg arg
3615                 passArguments args
3616                               (drop nGprs gprs)
3617                               (drop nFprs fprs)
3618                               (stackOffset' + stackBytes)
3619                               (accumCode `appOL` code `snocOL` ST rep vr stackSlot)
3620                               accumUsed
3621             where
3622 #if darwin_TARGET_OS
3623         -- stackOffset is at least 4-byte aligned
3624         -- The Darwin ABI is happy with that.
3625                 stackOffset' = stackOffset
3626 #else
3627         -- ... the SysV ABI requires 8-byte alignment for doubles.
3628                 stackOffset' | rep == F64 = roundTo 8 stackOffset
3629                              | otherwise  =           stackOffset
3630 #endif
3631                 stackSlot = AddrRegImm sp (ImmInt stackOffset')
3632                 (nGprs, nFprs, stackBytes, regs) = case rep of
3633                     I32 -> (1, 0, 4, gprs)
3634 #if darwin_TARGET_OS
3635         -- The Darwin ABI requires that we skip a corresponding number of GPRs when
3636         -- we use the FPRs.
3637                     F32 -> (1, 1, 4, fprs)
3638                     F64 -> (2, 1, 8, fprs)
3639 #elif linux_TARGET_OS
3640         -- ... the SysV ABI doesn't.
3641                     F32 -> (0, 1, 4, fprs)
3642                     F64 -> (0, 1, 8, fprs)
3643 #endif
3644
3645         moveResult reduceToF32 =
3646             case dest_regs of
3647                 [] -> nilOL
3648                 [(dest, _hint)]
3649                     | reduceToF32 && rep == F32 -> unitOL (FRSP r_dest f1)
3650                     | rep == F32 || rep == F64 -> unitOL (MR r_dest f1)
3651                     | rep == I64 -> toOL [MR (getHiVRegFromLo r_dest) r3,
3652                                           MR r_dest r4]
3653                     | otherwise -> unitOL (MR r_dest r3)
3654                     where rep = cmmRegRep dest
3655                           r_dest = getRegisterReg dest
3656
3657         outOfLineFloatOp mop =
3658             do
3659                 mopExpr <- cmmMakeDynamicReference addImportNat True $
3660                               mkForeignLabel functionName Nothing True
3661                 let mopLabelOrExpr = case mopExpr of
3662                         CmmLit (CmmLabel lbl) -> Left lbl
3663                         _ -> Right mopExpr
3664                 return (mopLabelOrExpr, reduce)
3665             where
3666                 (functionName, reduce) = case mop of
3667                     MO_F32_Exp   -> (FSLIT("exp"), True)
3668                     MO_F32_Log   -> (FSLIT("log"), True)
3669                     MO_F32_Sqrt  -> (FSLIT("sqrt"), True)
3670
3671                     MO_F32_Sin   -> (FSLIT("sin"), True)
3672                     MO_F32_Cos   -> (FSLIT("cos"), True)
3673                     MO_F32_Tan   -> (FSLIT("tan"), True)
3674
3675                     MO_F32_Asin  -> (FSLIT("asin"), True)
3676                     MO_F32_Acos  -> (FSLIT("acos"), True)
3677                     MO_F32_Atan  -> (FSLIT("atan"), True)
3678
3679                     MO_F32_Sinh  -> (FSLIT("sinh"), True)
3680                     MO_F32_Cosh  -> (FSLIT("cosh"), True)
3681                     MO_F32_Tanh  -> (FSLIT("tanh"), True)
3682                     MO_F32_Pwr   -> (FSLIT("pow"), True)
3683
3684                     MO_F64_Exp   -> (FSLIT("exp"), False)
3685                     MO_F64_Log   -> (FSLIT("log"), False)
3686                     MO_F64_Sqrt  -> (FSLIT("sqrt"), False)
3687
3688                     MO_F64_Sin   -> (FSLIT("sin"), False)
3689                     MO_F64_Cos   -> (FSLIT("cos"), False)
3690                     MO_F64_Tan   -> (FSLIT("tan"), False)
3691
3692                     MO_F64_Asin  -> (FSLIT("asin"), False)
3693                     MO_F64_Acos  -> (FSLIT("acos"), False)
3694                     MO_F64_Atan  -> (FSLIT("atan"), False)
3695
3696                     MO_F64_Sinh  -> (FSLIT("sinh"), False)
3697                     MO_F64_Cosh  -> (FSLIT("cosh"), False)
3698                     MO_F64_Tanh  -> (FSLIT("tanh"), False)
3699                     MO_F64_Pwr   -> (FSLIT("pow"), False)
3700                     other -> pprPanic "genCCall(ppc): unknown callish op"
3701                                     (pprCallishMachOp other)
3702
3703 #endif /* darwin_TARGET_OS || linux_TARGET_OS */
3704
3705 #endif /* powerpc_TARGET_ARCH */
3706
3707
3708 -- -----------------------------------------------------------------------------
3709 -- Generating a table-branch
3710
3711 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
3712
3713 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3714 genSwitch expr ids
3715   | opt_PIC
3716   = do
3717         (reg,e_code) <- getSomeReg expr
3718         lbl <- getNewLabelNat
3719         dynRef <- cmmMakeDynamicReference addImportNat False lbl
3720         (tableReg,t_code) <- getSomeReg $ dynRef
3721         let
3722             jumpTable = map jumpTableEntryRel ids
3723
3724             jumpTableEntryRel Nothing
3725                 = CmmStaticLit (CmmInt 0 wordRep)
3726             jumpTableEntryRel (Just (BlockId id))
3727                 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3728                 where blockLabel = mkAsmTempLabel id
3729
3730             op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
3731                                        (EAIndex reg wORD_SIZE) (ImmInt 0))
3732
3733             code = e_code `appOL` t_code `appOL` toOL [
3734                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3735                             ADD wordRep op (OpReg tableReg),
3736                             JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
3737                     ]
3738         return code
3739   | otherwise
3740   = do
3741         (reg,e_code) <- getSomeReg expr
3742         lbl <- getNewLabelNat
3743         let
3744             jumpTable = map jumpTableEntry ids
3745             op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
3746             code = e_code `appOL` toOL [
3747                     LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3748                     JMP_TBL op [ id | Just id <- ids ]
3749                  ]
3750         -- in
3751         return code
3752 #elif powerpc_TARGET_ARCH
3753 genSwitch expr ids
3754   | opt_PIC
3755   = do
3756         (reg,e_code) <- getSomeReg expr
3757         tmp <- getNewRegNat I32
3758         lbl <- getNewLabelNat
3759         dynRef <- cmmMakeDynamicReference addImportNat False lbl
3760         (tableReg,t_code) <- getSomeReg $ dynRef
3761         let
3762             jumpTable = map jumpTableEntryRel ids
3763
3764             jumpTableEntryRel Nothing
3765                 = CmmStaticLit (CmmInt 0 wordRep)
3766             jumpTableEntryRel (Just (BlockId id))
3767                 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3768                 where blockLabel = mkAsmTempLabel id
3769
3770             code = e_code `appOL` t_code `appOL` toOL [
3771                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3772                             SLW tmp reg (RIImm (ImmInt 2)),
3773                             LD I32 tmp (AddrRegReg tableReg tmp),
3774                             ADD tmp tmp (RIReg tableReg),
3775                             MTCTR tmp,
3776                             BCTR [ id | Just id <- ids ]
3777                     ]
3778         return code
3779   | otherwise
3780   = do
3781         (reg,e_code) <- getSomeReg expr
3782         tmp <- getNewRegNat I32
3783         lbl <- getNewLabelNat
3784         let
3785             jumpTable = map jumpTableEntry ids
3786
3787             code = e_code `appOL` toOL [
3788                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3789                             SLW tmp reg (RIImm (ImmInt 2)),
3790                             ADDIS tmp tmp (HA (ImmCLbl lbl)),
3791                             LD I32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
3792                             MTCTR tmp,
3793                             BCTR [ id | Just id <- ids ]
3794                     ]
3795         return code
3796 #else
3797 genSwitch expr ids = panic "ToDo: genSwitch"
3798 #endif
3799
3800 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordRep)
3801 jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
3802     where blockLabel = mkAsmTempLabel id
3803
3804 -- -----------------------------------------------------------------------------
3805 -- Support bits
3806 -- -----------------------------------------------------------------------------
3807
3808
3809 -- -----------------------------------------------------------------------------
3810 -- 'condIntReg' and 'condFltReg': condition codes into registers
3811
3812 -- Turn those condition codes into integers now (when they appear on
3813 -- the right hand side of an assignment).
3814 --
3815 -- (If applicable) Do not fill the delay slots here; you will confuse the
3816 -- register allocator.
3817
3818 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
3819
3820 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3821
3822 #if alpha_TARGET_ARCH
3823 condIntReg = panic "MachCode.condIntReg (not on Alpha)"
3824 condFltReg = panic "MachCode.condFltReg (not on Alpha)"
3825 #endif /* alpha_TARGET_ARCH */
3826
3827 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3828
3829 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3830
3831 condIntReg cond x y = do
3832   CondCode _ cond cond_code <- condIntCode cond x y
3833   tmp <- getNewRegNat I8
3834   let
3835         code dst = cond_code `appOL` toOL [
3836                     SETCC cond (OpReg tmp),
3837                     MOVZxL I8 (OpReg tmp) (OpReg dst)
3838                   ]
3839   -- in
3840   return (Any I32 code)
3841
3842 #endif
3843
3844 #if i386_TARGET_ARCH
3845
3846 condFltReg cond x y = do
3847   CondCode _ cond cond_code <- condFltCode cond x y
3848   tmp <- getNewRegNat I8
3849   let
3850         code dst = cond_code `appOL` toOL [
3851                     SETCC cond (OpReg tmp),
3852                     MOVZxL I8 (OpReg tmp) (OpReg dst)
3853                   ]
3854   -- in
3855   return (Any I32 code)
3856
3857 #endif
3858
3859 #if x86_64_TARGET_ARCH
3860
3861 condFltReg cond x y = do
3862   CondCode _ cond cond_code <- condFltCode cond x y
3863   tmp1 <- getNewRegNat wordRep
3864   tmp2 <- getNewRegNat wordRep
3865   let
3866         -- We have to worry about unordered operands (eg. comparisons
3867         -- against NaN).  If the operands are unordered, the comparison
3868         -- sets the parity flag, carry flag and zero flag.
3869         -- All comparisons are supposed to return false for unordered
3870         -- operands except for !=, which returns true.
3871         --
3872         -- Optimisation: we don't have to test the parity flag if we
3873         -- know the test has already excluded the unordered case: eg >
3874         -- and >= test for a zero carry flag, which can only occur for
3875         -- ordered operands.
3876         --
3877         -- ToDo: by reversing comparisons we could avoid testing the
3878         -- parity flag in more cases.
3879
3880         code dst =
3881            cond_code `appOL`
3882              (case cond of
3883                 NE  -> or_unordered dst
3884                 GU  -> plain_test   dst
3885                 GEU -> plain_test   dst
3886                 _   -> and_ordered  dst)
3887
3888         plain_test dst = toOL [
3889                     SETCC cond (OpReg tmp1),
3890                     MOVZxL I8 (OpReg tmp1) (OpReg dst)
3891                  ]
3892         or_unordered dst = toOL [
3893                     SETCC cond (OpReg tmp1),
3894                     SETCC PARITY (OpReg tmp2),
3895                     OR I8 (OpReg tmp1) (OpReg tmp2),
3896                     MOVZxL I8 (OpReg tmp2) (OpReg dst)
3897                   ]
3898         and_ordered dst = toOL [
3899                     SETCC cond (OpReg tmp1),
3900                     SETCC NOTPARITY (OpReg tmp2),
3901                     AND I8 (OpReg tmp1) (OpReg tmp2),
3902                     MOVZxL I8 (OpReg tmp2) (OpReg dst)
3903                   ]
3904   -- in
3905   return (Any I32 code)
3906
3907 #endif
3908
3909 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3910
3911 #if sparc_TARGET_ARCH
3912
3913 condIntReg EQQ x (CmmLit (CmmInt 0 d)) = do
3914     (src, code) <- getSomeReg x
3915     tmp <- getNewRegNat I32
3916     let
3917         code__2 dst = code `appOL` toOL [
3918             SUB False True g0 (RIReg src) g0,
3919             SUB True False g0 (RIImm (ImmInt (-1))) dst]
3920     return (Any I32 code__2)
3921
3922 condIntReg EQQ x y = do
3923     (src1, code1) <- getSomeReg x
3924     (src2, code2) <- getSomeReg y
3925     tmp1 <- getNewRegNat I32
3926     tmp2 <- getNewRegNat I32
3927     let
3928         code__2 dst = code1 `appOL` code2 `appOL` toOL [
3929             XOR False src1 (RIReg src2) dst,
3930             SUB False True g0 (RIReg dst) g0,
3931             SUB True False g0 (RIImm (ImmInt (-1))) dst]
3932     return (Any I32 code__2)
3933
3934 condIntReg NE x (CmmLit (CmmInt 0 d)) = do
3935     (src, code) <- getSomeReg x
3936     tmp <- getNewRegNat I32
3937     let
3938         code__2 dst = code `appOL` toOL [
3939             SUB False True g0 (RIReg src) g0,
3940             ADD True False g0 (RIImm (ImmInt 0)) dst]
3941     return (Any I32 code__2)
3942
3943 condIntReg NE x y = do
3944     (src1, code1) <- getSomeReg x
3945     (src2, code2) <- getSomeReg y
3946     tmp1 <- getNewRegNat I32
3947     tmp2 <- getNewRegNat I32
3948     let
3949         code__2 dst = code1 `appOL` code2 `appOL` toOL [
3950             XOR False src1 (RIReg src2) dst,
3951             SUB False True g0 (RIReg dst) g0,
3952             ADD True False g0 (RIImm (ImmInt 0)) dst]
3953     return (Any I32 code__2)
3954
3955 condIntReg cond x y = do
3956     BlockId lbl1 <- getBlockIdNat
3957     BlockId lbl2 <- getBlockIdNat
3958     CondCode _ cond cond_code <- condIntCode cond x y
3959     let
3960         code__2 dst = cond_code `appOL` toOL [
3961             BI cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
3962             OR False g0 (RIImm (ImmInt 0)) dst,
3963             BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
3964             NEWBLOCK (BlockId lbl1),
3965             OR False g0 (RIImm (ImmInt 1)) dst,
3966             NEWBLOCK (BlockId lbl2)]
3967     return (Any I32 code__2)
3968
3969 condFltReg cond x y = do
3970     BlockId lbl1 <- getBlockIdNat
3971     BlockId lbl2 <- getBlockIdNat
3972     CondCode _ cond cond_code <- condFltCode cond x y
3973     let
3974         code__2 dst = cond_code `appOL` toOL [
3975             NOP,
3976             BF cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
3977             OR False g0 (RIImm (ImmInt 0)) dst,
3978             BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
3979             NEWBLOCK (BlockId lbl1),
3980             OR False g0 (RIImm (ImmInt 1)) dst,
3981             NEWBLOCK (BlockId lbl2)]
3982     return (Any I32 code__2)
3983
3984 #endif /* sparc_TARGET_ARCH */
3985
3986 #if powerpc_TARGET_ARCH
3987 condReg getCond = do
3988     lbl1 <- getBlockIdNat
3989     lbl2 <- getBlockIdNat
3990     CondCode _ cond cond_code <- getCond
3991     let
3992 {-        code dst = cond_code `appOL` toOL [
3993                 BCC cond lbl1,
3994                 LI dst (ImmInt 0),
3995                 BCC ALWAYS lbl2,
3996                 NEWBLOCK lbl1,
3997                 LI dst (ImmInt 1),
3998                 BCC ALWAYS lbl2,
3999                 NEWBLOCK lbl2
4000             ]-}
4001         code dst = cond_code
4002             `appOL` negate_code
4003             `appOL` toOL [
4004                 MFCR dst,
4005                 RLWINM dst dst (bit + 1) 31 31
4006             ]
4007
4008         negate_code | do_negate = unitOL (CRNOR bit bit bit)
4009                     | otherwise = nilOL
4010
4011         (bit, do_negate) = case cond of
4012             LTT -> (0, False)
4013             LE  -> (1, True)
4014             EQQ -> (2, False)
4015             GE  -> (0, True)
4016             GTT -> (1, False)
4017
4018             NE  -> (2, True)
4019
4020             LU  -> (0, False)
4021             LEU -> (1, True)
4022             GEU -> (0, True)
4023             GU  -> (1, False)
4024
4025     return (Any I32 code)
4026
4027 condIntReg cond x y = condReg (condIntCode cond x y)
4028 condFltReg cond x y = condReg (condFltCode cond x y)
4029 #endif /* powerpc_TARGET_ARCH */
4030
4031
4032 -- -----------------------------------------------------------------------------
4033 -- 'trivial*Code': deal with trivial instructions
4034
4035 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
4036 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
4037 -- Only look for constants on the right hand side, because that's
4038 -- where the generic optimizer will have put them.
4039
4040 -- Similarly, for unary instructions, we don't have to worry about
4041 -- matching an StInt as the argument, because genericOpt will already
4042 -- have handled the constant-folding.
4043
4044 trivialCode
4045     :: MachRep
4046     -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
4047       ,IF_ARCH_i386 ((Operand -> Operand -> Instr)
4048                      -> Maybe (Operand -> Operand -> Instr)
4049       ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr)
4050                      -> Maybe (Operand -> Operand -> Instr)
4051       ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
4052       ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
4053       ,)))))
4054     -> CmmExpr -> CmmExpr -- the two arguments
4055     -> NatM Register
4056
4057 #ifndef powerpc_TARGET_ARCH
4058 trivialFCode
4059     :: MachRep
4060     -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
4061       ,IF_ARCH_sparc((MachRep -> Reg -> Reg -> Reg -> Instr)
4062       ,IF_ARCH_i386 ((MachRep -> Reg -> Reg -> Reg -> Instr)
4063       ,IF_ARCH_x86_64 ((MachRep -> Operand -> Operand -> Instr)
4064       ,))))
4065     -> CmmExpr -> CmmExpr -- the two arguments
4066     -> NatM Register
4067 #endif
4068
4069 trivialUCode
4070     :: MachRep
4071     -> IF_ARCH_alpha((RI -> Reg -> Instr)
4072       ,IF_ARCH_i386 ((Operand -> Instr)
4073       ,IF_ARCH_x86_64 ((Operand -> Instr)
4074       ,IF_ARCH_sparc((RI -> Reg -> Instr)
4075       ,IF_ARCH_powerpc((Reg -> Reg -> Instr)
4076       ,)))))
4077     -> CmmExpr  -- the one argument
4078     -> NatM Register
4079
4080 #ifndef powerpc_TARGET_ARCH
4081 trivialUFCode
4082     :: MachRep
4083     -> IF_ARCH_alpha((Reg -> Reg -> Instr)
4084       ,IF_ARCH_i386 ((Reg -> Reg -> Instr)
4085       ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr)
4086       ,IF_ARCH_sparc((Reg -> Reg -> Instr)
4087       ,))))
4088     -> CmmExpr -- the one argument
4089     -> NatM Register
4090 #endif
4091
4092 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4093
4094 #if alpha_TARGET_ARCH
4095
4096 trivialCode instr x (StInt y)
4097   | fits8Bits y
4098   = getRegister x               `thenNat` \ register ->
4099     getNewRegNat IntRep         `thenNat` \ tmp ->
4100     let
4101         code = registerCode register tmp
4102         src1 = registerName register tmp
4103         src2 = ImmInt (fromInteger y)
4104         code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst)
4105     in
4106     return (Any IntRep code__2)
4107
4108 trivialCode instr x y
4109   = getRegister x               `thenNat` \ register1 ->
4110     getRegister y               `thenNat` \ register2 ->
4111     getNewRegNat IntRep         `thenNat` \ tmp1 ->
4112     getNewRegNat IntRep         `thenNat` \ tmp2 ->
4113     let
4114         code1 = registerCode register1 tmp1 []
4115         src1  = registerName register1 tmp1
4116         code2 = registerCode register2 tmp2 []
4117         src2  = registerName register2 tmp2
4118         code__2 dst = asmSeqThen [code1, code2] .
4119                      mkSeqInstr (instr src1 (RIReg src2) dst)
4120     in
4121     return (Any IntRep code__2)
4122
4123 ------------
4124 trivialUCode instr x
4125   = getRegister x               `thenNat` \ register ->
4126     getNewRegNat IntRep         `thenNat` \ tmp ->
4127     let
4128         code = registerCode register tmp
4129         src  = registerName register tmp
4130         code__2 dst = code . mkSeqInstr (instr (RIReg src) dst)
4131     in
4132     return (Any IntRep code__2)
4133
4134 ------------
4135 trivialFCode _ instr x y
4136   = getRegister x               `thenNat` \ register1 ->
4137     getRegister y               `thenNat` \ register2 ->
4138     getNewRegNat F64    `thenNat` \ tmp1 ->
4139     getNewRegNat F64    `thenNat` \ tmp2 ->
4140     let
4141         code1 = registerCode register1 tmp1
4142         src1  = registerName register1 tmp1
4143
4144         code2 = registerCode register2 tmp2
4145         src2  = registerName register2 tmp2
4146
4147         code__2 dst = asmSeqThen [code1 [], code2 []] .
4148                       mkSeqInstr (instr src1 src2 dst)
4149     in
4150     return (Any F64 code__2)
4151
4152 trivialUFCode _ instr x
4153   = getRegister x               `thenNat` \ register ->
4154     getNewRegNat F64    `thenNat` \ tmp ->
4155     let
4156         code = registerCode register tmp
4157         src  = registerName register tmp
4158         code__2 dst = code . mkSeqInstr (instr src dst)
4159     in
4160     return (Any F64 code__2)
4161
4162 #endif /* alpha_TARGET_ARCH */
4163
4164 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4165
4166 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4167
4168 {-
4169 The Rules of the Game are:
4170
4171 * You cannot assume anything about the destination register dst;
4172   it may be anything, including a fixed reg.
4173
4174 * You may compute an operand into a fixed reg, but you may not
4175   subsequently change the contents of that fixed reg.  If you
4176   want to do so, first copy the value either to a temporary
4177   or into dst.  You are free to modify dst even if it happens
4178   to be a fixed reg -- that's not your problem.
4179
4180 * You cannot assume that a fixed reg will stay live over an
4181   arbitrary computation.  The same applies to the dst reg.
4182
4183 * Temporary regs obtained from getNewRegNat are distinct from
4184   each other and from all other regs, and stay live over
4185   arbitrary computations.
4186
4187 --------------------
4188
4189 SDM's version of The Rules:
4190
4191 * If getRegister returns Any, that means it can generate correct
4192   code which places the result in any register, period.  Even if that
4193   register happens to be read during the computation.
4194
4195   Corollary #1: this means that if you are generating code for an
4196   operation with two arbitrary operands, you cannot assign the result
4197   of the first operand into the destination register before computing
4198   the second operand.  The second operand might require the old value
4199   of the destination register.
4200
4201   Corollary #2: A function might be able to generate more efficient
4202   code if it knows the destination register is a new temporary (and
4203   therefore not read by any of the sub-computations).
4204
4205 * If getRegister returns Any, then the code it generates may modify only:
4206         (a) fresh temporaries
4207         (b) the destination register
4208         (c) known registers (eg. %ecx is used by shifts)
4209   In particular, it may *not* modify global registers, unless the global
4210   register happens to be the destination register.
4211 -}
4212
4213 trivialCode rep instr (Just revinstr) (CmmLit lit_a) b
4214   | not (is64BitLit lit_a) = do
4215   b_code <- getAnyReg b
4216   let
4217        code dst
4218          = b_code dst `snocOL`
4219            revinstr (OpImm (litToImm lit_a)) (OpReg dst)
4220   -- in
4221   return (Any rep code)
4222
4223 trivialCode rep instr maybe_revinstr a b = genTrivialCode rep instr a b
4224
4225 -- This is re-used for floating pt instructions too.
4226 genTrivialCode rep instr a b = do
4227   (b_op, b_code) <- getNonClobberedOperand b
4228   a_code <- getAnyReg a
4229   tmp <- getNewRegNat rep
4230   let
4231      -- We want the value of b to stay alive across the computation of a.
4232      -- But, we want to calculate a straight into the destination register,
4233      -- because the instruction only has two operands (dst := dst `op` src).
4234      -- The troublesome case is when the result of b is in the same register
4235      -- as the destination reg.  In this case, we have to save b in a
4236      -- new temporary across the computation of a.
4237      code dst
4238         | dst `regClashesWithOp` b_op =
4239                 b_code `appOL`
4240                 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
4241                 a_code dst `snocOL`
4242                 instr (OpReg tmp) (OpReg dst)
4243         | otherwise =
4244                 b_code `appOL`
4245                 a_code dst `snocOL`
4246                 instr b_op (OpReg dst)
4247   -- in
4248   return (Any rep code)
4249
4250 reg `regClashesWithOp` OpReg reg2   = reg == reg2
4251 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
4252 reg `regClashesWithOp` _            = False
4253
4254 -----------
4255
4256 trivialUCode rep instr x = do
4257   x_code <- getAnyReg x
4258   let
4259      code dst =
4260         x_code dst `snocOL`
4261         instr (OpReg dst)
4262   -- in
4263   return (Any rep code)
4264
4265 -----------
4266
4267 #if i386_TARGET_ARCH
4268
4269 trivialFCode pk instr x y = do
4270   (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
4271   (y_reg, y_code) <- getSomeReg y
4272   let
4273      code dst =
4274         x_code `appOL`
4275         y_code `snocOL`
4276         instr pk x_reg y_reg dst
4277   -- in
4278   return (Any pk code)
4279
4280 #endif
4281
4282 #if x86_64_TARGET_ARCH
4283
4284 trivialFCode pk instr x y = genTrivialCode  pk (instr pk) x y
4285
4286 #endif
4287
4288 -------------
4289
4290 trivialUFCode rep instr x = do
4291   (x_reg, x_code) <- getSomeReg x
4292   let
4293      code dst =
4294         x_code `snocOL`
4295         instr x_reg dst
4296   -- in
4297   return (Any rep code)
4298
4299 #endif /* i386_TARGET_ARCH */
4300
4301 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4302
4303 #if sparc_TARGET_ARCH
4304
4305 trivialCode pk instr x (CmmLit (CmmInt y d))
4306   | fits13Bits y
4307   = do
4308       (src1, code) <- getSomeReg x
4309       tmp <- getNewRegNat I32
4310       let
4311         src2 = ImmInt (fromInteger y)
4312         code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
4313       return (Any I32 code__2)
4314
4315 trivialCode pk instr x y = do
4316     (src1, code1) <- getSomeReg x
4317     (src2, code2) <- getSomeReg y
4318     tmp1 <- getNewRegNat I32
4319     tmp2 <- getNewRegNat I32
4320     let
4321         code__2 dst = code1 `appOL` code2 `snocOL`
4322                       instr src1 (RIReg src2) dst
4323     return (Any I32 code__2)
4324
4325 ------------
4326 trivialFCode pk instr x y = do
4327     (src1, code1) <- getSomeReg x
4328     (src2, code2) <- getSomeReg y
4329     tmp1 <- getNewRegNat (cmmExprRep x)
4330     tmp2 <- getNewRegNat (cmmExprRep y)
4331     tmp <- getNewRegNat F64
4332     let
4333         promote x = FxTOy F32 F64 x tmp
4334
4335         pk1   = cmmExprRep x
4336         pk2   = cmmExprRep y
4337
4338         code__2 dst =
4339                 if pk1 == pk2 then
4340                     code1 `appOL` code2 `snocOL`
4341                     instr pk src1 src2 dst
4342                 else if pk1 == F32 then
4343                     code1 `snocOL` promote src1 `appOL` code2 `snocOL`
4344                     instr F64 tmp src2 dst
4345                 else
4346                     code1 `appOL` code2 `snocOL` promote src2 `snocOL`
4347                     instr F64 src1 tmp dst
4348     return (Any (if pk1 == pk2 then pk1 else F64) code__2)
4349
4350 ------------
4351 trivialUCode pk instr x = do
4352     (src, code) <- getSomeReg x
4353     tmp <- getNewRegNat pk
4354     let
4355         code__2 dst = code `snocOL` instr (RIReg src) dst
4356     return (Any pk code__2)
4357
4358 -------------
4359 trivialUFCode pk instr x = do
4360     (src, code) <- getSomeReg x
4361     tmp <- getNewRegNat pk
4362     let
4363         code__2 dst = code `snocOL` instr src dst
4364     return (Any pk code__2)
4365
4366 #endif /* sparc_TARGET_ARCH */
4367
4368 #if powerpc_TARGET_ARCH
4369
4370 {-
4371 Wolfgang's PowerPC version of The Rules:
4372
4373 A slightly modified version of The Rules to take advantage of the fact
4374 that PowerPC instructions work on all registers and don't implicitly
4375 clobber any fixed registers.
4376
4377 * The only expression for which getRegister returns Fixed is (CmmReg reg).
4378
4379 * If getRegister returns Any, then the code it generates may modify only:
4380         (a) fresh temporaries
4381         (b) the destination register
4382   It may *not* modify global registers, unless the global
4383   register happens to be the destination register.
4384   It may not clobber any other registers. In fact, only ccalls clobber any
4385   fixed registers.
4386   Also, it may not modify the counter register (used by genCCall).
4387
4388   Corollary: If a getRegister for a subexpression returns Fixed, you need
4389   not move it to a fresh temporary before evaluating the next subexpression.
4390   The Fixed register won't be modified.
4391   Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
4392
4393 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
4394   the value of the destination register.
4395 -}
4396
4397 trivialCode rep signed instr x (CmmLit (CmmInt y _))
4398     | Just imm <- makeImmediate rep signed y
4399     = do
4400         (src1, code1) <- getSomeReg x
4401         let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
4402         return (Any rep code)
4403
4404 trivialCode rep signed instr x y = do
4405     (src1, code1) <- getSomeReg x
4406     (src2, code2) <- getSomeReg y
4407     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
4408     return (Any rep code)
4409
4410 trivialCodeNoImm :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4411     -> CmmExpr -> CmmExpr -> NatM Register
4412 trivialCodeNoImm rep instr x y = do
4413     (src1, code1) <- getSomeReg x
4414     (src2, code2) <- getSomeReg y
4415     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
4416     return (Any rep code)
4417
4418 trivialUCode rep instr x = do
4419     (src, code) <- getSomeReg x
4420     let code' dst = code `snocOL` instr dst src
4421     return (Any rep code')
4422
4423 -- There is no "remainder" instruction on the PPC, so we have to do
4424 -- it the hard way.
4425 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
4426
4427 remainderCode :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4428     -> CmmExpr -> CmmExpr -> NatM Register
4429 remainderCode rep div x y = do
4430     (src1, code1) <- getSomeReg x
4431     (src2, code2) <- getSomeReg y
4432     let code dst = code1 `appOL` code2 `appOL` toOL [
4433                 div dst src1 src2,
4434                 MULLW dst dst (RIReg src2),
4435                 SUBF dst dst src1
4436             ]
4437     return (Any rep code)
4438
4439 #endif /* powerpc_TARGET_ARCH */
4440
4441
4442 -- -----------------------------------------------------------------------------
4443 --  Coercing to/from integer/floating-point...
4444
4445 -- @coerce(Int2FP|FP2Int)@ are more complicated integer/float
4446 -- conversions.  We have to store temporaries in memory to move
4447 -- between the integer and the floating point register sets.
4448
4449 -- @coerceDbl2Flt@ and @coerceFlt2Dbl@ are done this way because we
4450 -- pretend, on sparc at least, that double and float regs are seperate
4451 -- kinds, so the value has to be computed into one kind before being
4452 -- explicitly "converted" to live in the other kind.
4453
4454 coerceInt2FP :: MachRep -> MachRep -> CmmExpr -> NatM Register
4455 coerceFP2Int :: MachRep -> MachRep -> CmmExpr -> NatM Register
4456
4457 #if sparc_TARGET_ARCH
4458 coerceDbl2Flt :: CmmExpr -> NatM Register
4459 coerceFlt2Dbl :: CmmExpr -> NatM Register
4460 #endif
4461
4462 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4463
4464 #if alpha_TARGET_ARCH
4465
4466 coerceInt2FP _ x
4467   = getRegister x               `thenNat` \ register ->
4468     getNewRegNat IntRep         `thenNat` \ reg ->
4469     let
4470         code = registerCode register reg
4471         src  = registerName register reg
4472
4473         code__2 dst = code . mkSeqInstrs [
4474             ST Q src (spRel 0),
4475             LD TF dst (spRel 0),
4476             CVTxy Q TF dst dst]
4477     in
4478     return (Any F64 code__2)
4479
4480 -------------
4481 coerceFP2Int x
4482   = getRegister x               `thenNat` \ register ->
4483     getNewRegNat F64    `thenNat` \ tmp ->
4484     let
4485         code = registerCode register tmp
4486         src  = registerName register tmp
4487
4488         code__2 dst = code . mkSeqInstrs [
4489             CVTxy TF Q src tmp,
4490             ST TF tmp (spRel 0),
4491             LD Q dst (spRel 0)]
4492     in
4493     return (Any IntRep code__2)
4494
4495 #endif /* alpha_TARGET_ARCH */
4496
4497 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4498
4499 #if i386_TARGET_ARCH
4500
4501 coerceInt2FP from to x = do
4502   (x_reg, x_code) <- getSomeReg x
4503   let
4504         opc  = case to of F32 -> GITOF; F64 -> GITOD
4505         code dst = x_code `snocOL` opc x_reg dst
4506         -- ToDo: works for non-I32 reps?
4507   -- in
4508   return (Any to code)
4509
4510 ------------
4511
4512 coerceFP2Int from to x = do
4513   (x_reg, x_code) <- getSomeReg x
4514   let
4515         opc  = case from of F32 -> GFTOI; F64 -> GDTOI
4516         code dst = x_code `snocOL` opc x_reg dst
4517         -- ToDo: works for non-I32 reps?
4518   -- in
4519   return (Any to code)
4520
4521 #endif /* i386_TARGET_ARCH */
4522
4523 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4524
4525 #if x86_64_TARGET_ARCH
4526
4527 coerceFP2Int from to x = do
4528   (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
4529   let
4530         opc  = case from of F32 -> CVTSS2SI; F64 -> CVTSD2SI
4531         code dst = x_code `snocOL` opc x_op dst
4532   -- in
4533   return (Any to code) -- works even if the destination rep is <I32
4534
4535 coerceInt2FP from to x = do
4536   (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
4537   let
4538         opc  = case to of F32 -> CVTSI2SS; F64 -> CVTSI2SD
4539         code dst = x_code `snocOL` opc x_op dst
4540   -- in
4541   return (Any to code) -- works even if the destination rep is <I32
4542
4543 coerceFP2FP :: MachRep -> CmmExpr -> NatM Register
4544 coerceFP2FP to x = do
4545   (x_reg, x_code) <- getSomeReg x
4546   let
4547         opc  = case to of F32 -> CVTSD2SS; F64 -> CVTSS2SD
4548         code dst = x_code `snocOL` opc x_reg dst
4549   -- in
4550   return (Any to code)
4551
4552 #endif
4553
4554 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4555
4556 #if sparc_TARGET_ARCH
4557
4558 coerceInt2FP pk1 pk2 x = do
4559     (src, code) <- getSomeReg x
4560     let
4561         code__2 dst = code `appOL` toOL [
4562             ST pk1 src (spRel (-2)),
4563             LD pk1 (spRel (-2)) dst,
4564             FxTOy pk1 pk2 dst dst]
4565     return (Any pk2 code__2)
4566
4567 ------------
4568 coerceFP2Int pk fprep x = do
4569     (src, code) <- getSomeReg x
4570     reg <- getNewRegNat fprep
4571     tmp <- getNewRegNat pk
4572     let
4573         code__2 dst = ASSERT(fprep == F64 || fprep == F32)
4574             code `appOL` toOL [
4575             FxTOy fprep pk src tmp,
4576             ST pk tmp (spRel (-2)),
4577             LD pk (spRel (-2)) dst]
4578     return (Any pk code__2)
4579
4580 ------------
4581 coerceDbl2Flt x = do
4582     (src, code) <- getSomeReg x
4583     return (Any F32 (\dst -> code `snocOL` FxTOy F64 F32 src dst))
4584
4585 ------------
4586 coerceFlt2Dbl x = do
4587     (src, code) <- getSomeReg x
4588     return (Any F64 (\dst -> code `snocOL` FxTOy F32 F64 src dst))
4589
4590 #endif /* sparc_TARGET_ARCH */
4591
4592 #if powerpc_TARGET_ARCH
4593 coerceInt2FP fromRep toRep x = do
4594     (src, code) <- getSomeReg x
4595     lbl <- getNewLabelNat
4596     itmp <- getNewRegNat I32
4597     ftmp <- getNewRegNat F64
4598     dynRef <- cmmMakeDynamicReference addImportNat False lbl
4599     Amode addr addr_code <- getAmode dynRef
4600     let
4601         code' dst = code `appOL` maybe_exts `appOL` toOL [
4602                 LDATA ReadOnlyData
4603                                 [CmmDataLabel lbl,
4604                                  CmmStaticLit (CmmInt 0x43300000 I32),
4605                                  CmmStaticLit (CmmInt 0x80000000 I32)],
4606                 XORIS itmp src (ImmInt 0x8000),
4607                 ST I32 itmp (spRel 3),
4608                 LIS itmp (ImmInt 0x4330),
4609                 ST I32 itmp (spRel 2),
4610                 LD F64 ftmp (spRel 2)
4611             ] `appOL` addr_code `appOL` toOL [
4612                 LD F64 dst addr,
4613                 FSUB F64 dst ftmp dst
4614             ] `appOL` maybe_frsp dst
4615
4616         maybe_exts = case fromRep of
4617                         I8 ->  unitOL $ EXTS I8 src src
4618                         I16 -> unitOL $ EXTS I16 src src
4619                         I32 -> nilOL
4620         maybe_frsp dst = case toRep of
4621                         F32 -> unitOL $ FRSP dst dst
4622                         F64 -> nilOL
4623     return (Any toRep code')
4624
4625 coerceFP2Int fromRep toRep x = do
4626     -- the reps don't really matter: F*->F64 and I32->I* are no-ops
4627     (src, code) <- getSomeReg x
4628     tmp <- getNewRegNat F64
4629     let
4630         code' dst = code `appOL` toOL [
4631                 -- convert to int in FP reg
4632             FCTIWZ tmp src,
4633                 -- store value (64bit) from FP to stack
4634             ST F64 tmp (spRel 2),
4635                 -- read low word of value (high word is undefined)
4636             LD I32 dst (spRel 3)]
4637     return (Any toRep code')
4638 #endif /* powerpc_TARGET_ARCH */
4639
4640
4641 -- -----------------------------------------------------------------------------
4642 -- eXTRA_STK_ARGS_HERE
4643
4644 -- We (allegedly) put the first six C-call arguments in registers;
4645 -- where do we start putting the rest of them?
4646
4647 -- Moved from MachInstrs (SDM):
4648
4649 #if alpha_TARGET_ARCH || sparc_TARGET_ARCH
4650 eXTRA_STK_ARGS_HERE :: Int
4651 eXTRA_STK_ARGS_HERE
4652   = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???))
4653 #endif
4654