compiler/nativeGen/MachCodeGen.hs

   1 -----------------------------------------------------------------------------
   2 --
   3 -- Generating machine code (instruction selection)
   4 --
   5 -- (c) The University of Glasgow 1996-2004
   6 --
   7 -----------------------------------------------------------------------------
   8
   9 -- This is a big module, but, if you pay attention to
  10 -- (a) the sectioning, (b) the type signatures, and
  11 -- (c) the #if blah_TARGET_ARCH} things, the
  12 -- structure should not be too overwhelming.
  13
  14 module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where
  15
  16 #include "HsVersions.h"
  17 #include "nativeGen/NCG.h"
  18 #include "MachDeps.h"
  19
  20 -- NCG stuff:
  21 import MachInstrs
  22 import MachRegs
  23 import NCGMonad
  24 import PositionIndependentCode ( cmmMakeDynamicReference, initializePicBase )
  25 import RegAllocInfo ( mkBranchInstr )
  26
  27 -- Our intermediate code:
  28 import PprCmm           ( pprExpr )
  29 import Cmm
  30 import MachOp
  31 import CLabel
  32
  33 -- The rest:
  34 import StaticFlags      ( opt_PIC )
  35 import ForeignCall      ( CCallConv(..) )
  36 import OrdList
  37 import Pretty
  38 import Outputable
  39 import FastString
  40 import FastTypes        ( isFastTrue )
  41 import Constants        ( wORD_SIZE )
  42
  43 #ifdef DEBUG
  44 import Outputable       ( assertPanic )
  45 import TRACE            ( trace )
  46 #endif
  47
  48 import Control.Monad    ( mapAndUnzipM )
  49 import Maybe            ( fromJust )
  50 import DATA_BITS
  51 import DATA_WORD
  52
  53 -- -----------------------------------------------------------------------------
  54 -- Top-level of the instruction selector
  55
  56 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
  57 -- They are really trees of insns to facilitate fast appending, where a
  58 -- left-to-right traversal (pre-order?) yields the insns in the correct
  59 -- order.
  60
  61 type InstrBlock = OrdList Instr
  62
  63 cmmTopCodeGen :: CmmTop -> NatM [NatCmmTop]
  64 cmmTopCodeGen (CmmProc info lab params blocks) = do
  65   (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
  66   picBaseMb <- getPicBaseMaybeNat
  67   let proc = CmmProc info lab params (concat nat_blocks)
  68       tops = proc : concat statics
  69   case picBaseMb of
  70       Just picBase -> initializePicBase picBase tops
  71       Nothing -> return tops
  72
  73 cmmTopCodeGen (CmmData sec dat) = do
  74   return [CmmData sec dat]  -- no translation, we just use CmmStatic
  75
  76 basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop])
  77 basicBlockCodeGen (BasicBlock id stmts) = do
  78   instrs <- stmtsToInstrs stmts
  79   -- code generation may introduce new basic block boundaries, which
  80   -- are indicated by the NEWBLOCK instruction.  We must split up the
  81   -- instruction stream into basic blocks again.  Also, we extract
  82   -- LDATAs here too.
  83   let
  84         (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
  85
  86         mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
  87           = ([], BasicBlock id instrs : blocks, statics)
  88         mkBlocks (LDATA sec dat) (instrs,blocks,statics)
  89           = (instrs, blocks, CmmData sec dat:statics)
  90         mkBlocks instr (instrs,blocks,statics)
  91           = (instr:instrs, blocks, statics)
  92   -- in
  93   return (BasicBlock id top : other_blocks, statics)
  94
  95 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
  96 stmtsToInstrs stmts
  97    = do instrss <- mapM stmtToInstrs stmts
  98         return (concatOL instrss)
  99
 100 stmtToInstrs :: CmmStmt -> NatM InstrBlock
 101 stmtToInstrs stmt = case stmt of
 102     CmmNop         -> return nilOL
 103     CmmComment s   -> return (unitOL (COMMENT s))
 104
 105     CmmAssign reg src
 106       | isFloatingRep kind -> assignReg_FltCode kind reg src
 107 #if WORD_SIZE_IN_BITS==32
 108       | kind == I64        -> assignReg_I64Code      reg src
 109 #endif
 110       | otherwise          -> assignReg_IntCode kind reg src
 111         where kind = cmmRegRep reg
 112
 113     CmmStore addr src
 114       | isFloatingRep kind -> assignMem_FltCode kind addr src
 115 #if WORD_SIZE_IN_BITS==32
 116       | kind == I64      -> assignMem_I64Code      addr src
 117 #endif
 118       | otherwise        -> assignMem_IntCode kind addr src
 119         where kind = cmmExprRep src
 120
 121     CmmCall target result_regs args vols
 122        -> genCCall target result_regs args vols
 123
 124     CmmBranch id          -> genBranch id
 125     CmmCondBranch arg id  -> genCondJump id arg
 126     CmmSwitch arg ids     -> genSwitch arg ids
 127     CmmJump arg params    -> genJump arg
 128
 129 -- -----------------------------------------------------------------------------
 130 -- General things for putting together code sequences
 131
 132 -- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
 133 -- CmmExprs into CmmRegOff?
 134 mangleIndexTree :: CmmExpr -> CmmExpr
 135 mangleIndexTree (CmmRegOff reg off)
 136   = CmmMachOp (MO_Add rep) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) rep)]
 137   where rep = cmmRegRep reg
 138
 139 -- -----------------------------------------------------------------------------
 140 --  Code gen for 64-bit arithmetic on 32-bit platforms
 141
 142 {-
 143 Simple support for generating 64-bit code (ie, 64 bit values and 64
 144 bit assignments) on 32-bit platforms.  Unlike the main code generator
 145 we merely shoot for generating working code as simply as possible, and
 146 pay little attention to code quality.  Specifically, there is no
 147 attempt to deal cleverly with the fixed-vs-floating register
 148 distinction; all values are generated into (pairs of) floating
 149 registers, even if this would mean some redundant reg-reg moves as a
 150 result.  Only one of the VRegUniques is returned, since it will be
 151 of the VRegUniqueLo form, and the upper-half VReg can be determined
 152 by applying getHiVRegFromLo to it.
 153 -}
 154
 155 data ChildCode64        -- a.k.a "Register64"
 156    = ChildCode64
 157         InstrBlock      -- code
 158         Reg             -- the lower 32-bit temporary which contains the
 159                         -- result; use getHiVRegFromLo to find the other
 160                         -- VRegUnique.  Rules of this simplified insn
 161                         -- selection game are therefore that the returned
 162                         -- Reg may be modified
 163
 164 #if WORD_SIZE_IN_BITS==32
 165 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
 166 assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
 167 #endif
 168
 169 #ifndef x86_64_TARGET_ARCH
 170 iselExpr64        :: CmmExpr -> NatM ChildCode64
 171 #endif
 172
 173 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 174
 175 #if i386_TARGET_ARCH
 176
 177 assignMem_I64Code addrTree valueTree = do
 178   Amode addr addr_code <- getAmode addrTree
 179   ChildCode64 vcode rlo <- iselExpr64 valueTree
 180   let
 181         rhi = getHiVRegFromLo rlo
 182
 183         -- Little-endian store
 184         mov_lo = MOV I32 (OpReg rlo) (OpAddr addr)
 185         mov_hi = MOV I32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
 186   -- in
 187   return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 188
 189
 190 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
 191    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 192    let
 193          r_dst_lo = mkVReg u_dst I32
 194          r_dst_hi = getHiVRegFromLo r_dst_lo
 195          r_src_hi = getHiVRegFromLo r_src_lo
 196          mov_lo = MOV I32 (OpReg r_src_lo) (OpReg r_dst_lo)
 197          mov_hi = MOV I32 (OpReg r_src_hi) (OpReg r_dst_hi)
 198    -- in
 199    return (
 200         vcode `snocOL` mov_lo `snocOL` mov_hi
 201      )
 202
 203 assignReg_I64Code lvalue valueTree
 204    = panic "assignReg_I64Code(i386): invalid lvalue"
 205
 206 ------------
 207
 208 iselExpr64 (CmmLit (CmmInt i _)) = do
 209   (rlo,rhi) <- getNewRegPairNat I32
 210   let
 211         r = fromIntegral (fromIntegral i :: Word32)
 212         q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
 213         code = toOL [
 214                 MOV I32 (OpImm (ImmInteger r)) (OpReg rlo),
 215                 MOV I32 (OpImm (ImmInteger q)) (OpReg rhi)
 216                 ]
 217   -- in
 218   return (ChildCode64 code rlo)
 219
 220 iselExpr64 (CmmLoad addrTree I64) = do
 221    Amode addr addr_code <- getAmode addrTree
 222    (rlo,rhi) <- getNewRegPairNat I32
 223    let
 224         mov_lo = MOV I32 (OpAddr addr) (OpReg rlo)
 225         mov_hi = MOV I32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
 226    -- in
 227    return (
 228             ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
 229                         rlo
 230      )
 231
 232 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64)))
 233    = return (ChildCode64 nilOL (mkVReg vu I32))
 234
 235 -- we handle addition, but rather badly
 236 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
 237    ChildCode64 code1 r1lo <- iselExpr64 e1
 238    (rlo,rhi) <- getNewRegPairNat I32
 239    let
 240         r = fromIntegral (fromIntegral i :: Word32)
 241         q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
 242         r1hi = getHiVRegFromLo r1lo
 243         code =  code1 `appOL`
 244                 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
 245                        ADD I32 (OpImm (ImmInteger r)) (OpReg rlo),
 246                        MOV I32 (OpReg r1hi) (OpReg rhi),
 247                        ADC I32 (OpImm (ImmInteger q)) (OpReg rhi) ]
 248    -- in
 249    return (ChildCode64 code rlo)
 250
 251 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
 252    ChildCode64 code1 r1lo <- iselExpr64 e1
 253    ChildCode64 code2 r2lo <- iselExpr64 e2
 254    (rlo,rhi) <- getNewRegPairNat I32
 255    let
 256         r1hi = getHiVRegFromLo r1lo
 257         r2hi = getHiVRegFromLo r2lo
 258         code =  code1 `appOL`
 259                 code2 `appOL`
 260                 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
 261                        ADD I32 (OpReg r2lo) (OpReg rlo),
 262                        MOV I32 (OpReg r1hi) (OpReg rhi),
 263                        ADC I32 (OpReg r2hi) (OpReg rhi) ]
 264    -- in
 265    return (ChildCode64 code rlo)
 266
 267 iselExpr64 expr
 268    = pprPanic "iselExpr64(i386)" (ppr expr)
 269
 270 #endif /* i386_TARGET_ARCH */
 271
 272 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 273
 274 #if sparc_TARGET_ARCH
 275
 276 assignMem_I64Code addrTree valueTree = do
 277      Amode addr addr_code <- getAmode addrTree
 278      ChildCode64 vcode rlo <- iselExpr64 valueTree
 279      (src, code) <- getSomeReg addrTree
 280      let
 281          rhi = getHiVRegFromLo rlo
 282          -- Big-endian store
 283          mov_hi = ST I32 rhi (AddrRegImm src (ImmInt 0))
 284          mov_lo = ST I32 rlo (AddrRegImm src (ImmInt 4))
 285      return (vcode `appOL` code `snocOL` mov_hi `snocOL` mov_lo)
 286
 287 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
 288      ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 289      let
 290          r_dst_lo = mkVReg u_dst pk
 291          r_dst_hi = getHiVRegFromLo r_dst_lo
 292          r_src_hi = getHiVRegFromLo r_src_lo
 293          mov_lo = mkMOV r_src_lo r_dst_lo
 294          mov_hi = mkMOV r_src_hi r_dst_hi
 295          mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
 296      return (vcode `snocOL` mov_hi `snocOL` mov_lo)
 297 assignReg_I64Code lvalue valueTree
 298    = panic "assignReg_I64Code(sparc): invalid lvalue"
 299
 300
 301 -- Don't delete this -- it's very handy for debugging.
 302 --iselExpr64 expr
 303 --   | trace ("iselExpr64: " ++ showSDoc (ppr expr)) False
 304 --   = panic "iselExpr64(???)"
 305
 306 iselExpr64 (CmmLoad addrTree I64) = do
 307      Amode (AddrRegReg r1 r2) addr_code <- getAmode addrTree
 308      rlo <- getNewRegNat I32
 309      let rhi = getHiVRegFromLo rlo
 310          mov_hi = LD I32 (AddrRegImm r1 (ImmInt 0)) rhi
 311          mov_lo = LD I32 (AddrRegImm r1 (ImmInt 4)) rlo
 312      return (
 313             ChildCode64 (addr_code `snocOL` mov_hi `snocOL` mov_lo)
 314                          rlo
 315           )
 316
 317 iselExpr64 (CmmReg (CmmLocal (LocalReg uq I64))) = do
 318      r_dst_lo <-  getNewRegNat I32
 319      let r_dst_hi = getHiVRegFromLo r_dst_lo
 320          r_src_lo = mkVReg uq I32
 321          r_src_hi = getHiVRegFromLo r_src_lo
 322          mov_lo = mkMOV r_src_lo r_dst_lo
 323          mov_hi = mkMOV r_src_hi r_dst_hi
 324          mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
 325      return (
 326             ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo
 327          )
 328
 329 iselExpr64 expr
 330    = pprPanic "iselExpr64(sparc)" (ppr expr)
 331
 332 #endif /* sparc_TARGET_ARCH */
 333
 334 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 335
 336 #if powerpc_TARGET_ARCH
 337
 338 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
 339 getI64Amodes addrTree = do
 340     Amode hi_addr addr_code <- getAmode addrTree
 341     case addrOffset hi_addr 4 of
 342         Just lo_addr -> return (hi_addr, lo_addr, addr_code)
 343         Nothing      -> do (hi_ptr, code) <- getSomeReg addrTree
 344                            return (AddrRegImm hi_ptr (ImmInt 0),
 345                                    AddrRegImm hi_ptr (ImmInt 4),
 346                                    code)
 347
 348 assignMem_I64Code addrTree valueTree = do
 349         (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
 350         ChildCode64 vcode rlo <- iselExpr64 valueTree
 351         let
 352                 rhi = getHiVRegFromLo rlo
 353
 354                 -- Big-endian store
 355                 mov_hi = ST I32 rhi hi_addr
 356                 mov_lo = ST I32 rlo lo_addr
 357         -- in
 358         return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 359
 360 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
 361    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 362    let
 363          r_dst_lo = mkVReg u_dst I32
 364          r_dst_hi = getHiVRegFromLo r_dst_lo
 365          r_src_hi = getHiVRegFromLo r_src_lo
 366          mov_lo = MR r_dst_lo r_src_lo
 367          mov_hi = MR r_dst_hi r_src_hi
 368    -- in
 369    return (
 370         vcode `snocOL` mov_lo `snocOL` mov_hi
 371      )
 372
 373 assignReg_I64Code lvalue valueTree
 374    = panic "assignReg_I64Code(powerpc): invalid lvalue"
 375
 376
 377 -- Don't delete this -- it's very handy for debugging.
 378 --iselExpr64 expr
 379 --   | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
 380 --   = panic "iselExpr64(???)"
 381
 382 iselExpr64 (CmmLoad addrTree I64) = do
 383     (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
 384     (rlo, rhi) <- getNewRegPairNat I32
 385     let mov_hi = LD I32 rhi hi_addr
 386         mov_lo = LD I32 rlo lo_addr
 387     return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
 388                          rlo
 389
 390 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64)))
 391    = return (ChildCode64 nilOL (mkVReg vu I32))
 392
 393 iselExpr64 (CmmLit (CmmInt i _)) = do
 394   (rlo,rhi) <- getNewRegPairNat I32
 395   let
 396         half0 = fromIntegral (fromIntegral i :: Word16)
 397         half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
 398         half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
 399         half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
 400
 401         code = toOL [
 402                 LIS rlo (ImmInt half1),
 403                 OR rlo rlo (RIImm $ ImmInt half0),
 404                 LIS rhi (ImmInt half3),
 405                 OR rlo rlo (RIImm $ ImmInt half2)
 406                 ]
 407   -- in
 408   return (ChildCode64 code rlo)
 409
 410 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
 411    ChildCode64 code1 r1lo <- iselExpr64 e1
 412    ChildCode64 code2 r2lo <- iselExpr64 e2
 413    (rlo,rhi) <- getNewRegPairNat I32
 414    let
 415         r1hi = getHiVRegFromLo r1lo
 416         r2hi = getHiVRegFromLo r2lo
 417         code =  code1 `appOL`
 418                 code2 `appOL`
 419                 toOL [ ADDC rlo r1lo r2lo,
 420                        ADDE rhi r1hi r2hi ]
 421    -- in
 422    return (ChildCode64 code rlo)
 423
 424 iselExpr64 expr
 425    = pprPanic "iselExpr64(powerpc)" (ppr expr)
 426
 427 #endif /* powerpc_TARGET_ARCH */
 428
 429
 430 -- -----------------------------------------------------------------------------
 431 -- The 'Register' type
 432
 433 -- 'Register's passed up the tree.  If the stix code forces the register
 434 -- to live in a pre-decided machine register, it comes out as @Fixed@;
 435 -- otherwise, it comes out as @Any@, and the parent can decide which
 436 -- register to put it in.
 437
 438 data Register
 439   = Fixed   MachRep Reg InstrBlock
 440   | Any     MachRep (Reg -> InstrBlock)
 441
 442 swizzleRegisterRep :: Register -> MachRep -> Register
 443 swizzleRegisterRep (Fixed _ reg code) rep = Fixed rep reg code
 444 swizzleRegisterRep (Any _ codefn)     rep = Any rep codefn
 445
 446
 447 -- -----------------------------------------------------------------------------
 448 -- Utils based on getRegister, below
 449
 450 -- The dual to getAnyReg: compute an expression into a register, but
 451 -- we don't mind which one it is.
 452 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
 453 getSomeReg expr = do
 454   r <- getRegister expr
 455   case r of
 456     Any rep code -> do
 457         tmp <- getNewRegNat rep
 458         return (tmp, code tmp)
 459     Fixed _ reg code ->
 460         return (reg, code)
 461
 462 -- -----------------------------------------------------------------------------
 463 -- Grab the Reg for a CmmReg
 464
 465 getRegisterReg :: CmmReg -> Reg
 466
 467 getRegisterReg (CmmLocal (LocalReg u pk))
 468   = mkVReg u pk
 469
 470 getRegisterReg (CmmGlobal mid)
 471   = case get_GlobalReg_reg_or_addr mid of
 472        Left (RealReg rrno) -> RealReg rrno
 473        _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
 474           -- By this stage, the only MagicIds remaining should be the
 475           -- ones which map to a real machine register on this
 476           -- platform.  Hence ...
 477
 478
 479 -- -----------------------------------------------------------------------------
 480 -- Generate code to get a subtree into a Register
 481
 482 -- Don't delete this -- it's very handy for debugging.
 483 --getRegister expr
 484 --   | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False
 485 --   = panic "getRegister(???)"
 486
 487 getRegister :: CmmExpr -> NatM Register
 488
 489 getRegister (CmmReg (CmmGlobal PicBaseReg))
 490   = do
 491       reg <- getPicBaseNat wordRep
 492       return (Fixed wordRep reg nilOL)
 493
 494 getRegister (CmmReg reg)
 495   = return (Fixed (cmmRegRep reg) (getRegisterReg reg) nilOL)
 496
 497 getRegister tree@(CmmRegOff _ _)
 498   = getRegister (mangleIndexTree tree)
 499
 500 -- end of machine-"independent" bit; here we go on the rest...
 501
 502 #if alpha_TARGET_ARCH
 503
 504 getRegister (StDouble d)
 505   = getBlockIdNat                   `thenNat` \ lbl ->
 506     getNewRegNat PtrRep             `thenNat` \ tmp ->
 507     let code dst = mkSeqInstrs [
 508             LDATA RoDataSegment lbl [
 509                     DATA TF [ImmLab (rational d)]
 510                 ],
 511             LDA tmp (AddrImm (ImmCLbl lbl)),
 512             LD TF dst (AddrReg tmp)]
 513     in
 514         return (Any F64 code)
 515
 516 getRegister (StPrim primop [x]) -- unary PrimOps
 517   = case primop of
 518       IntNegOp -> trivialUCode (NEG Q False) x
 519
 520       NotOp    -> trivialUCode NOT x
 521
 522       FloatNegOp  -> trivialUFCode FloatRep  (FNEG TF) x
 523       DoubleNegOp -> trivialUFCode F64 (FNEG TF) x
 524
 525       OrdOp -> coerceIntCode IntRep x
 526       ChrOp -> chrCode x
 527
 528       Float2IntOp  -> coerceFP2Int    x
 529       Int2FloatOp  -> coerceInt2FP pr x
 530       Double2IntOp -> coerceFP2Int    x
 531       Int2DoubleOp -> coerceInt2FP pr x
 532
 533       Double2FloatOp -> coerceFltCode x
 534       Float2DoubleOp -> coerceFltCode x
 535
 536       other_op -> getRegister (StCall fn CCallConv F64 [x])
 537         where
 538           fn = case other_op of
 539                  FloatExpOp    -> FSLIT("exp")
 540                  FloatLogOp    -> FSLIT("log")
 541                  FloatSqrtOp   -> FSLIT("sqrt")
 542                  FloatSinOp    -> FSLIT("sin")
 543                  FloatCosOp    -> FSLIT("cos")
 544                  FloatTanOp    -> FSLIT("tan")
 545                  FloatAsinOp   -> FSLIT("asin")
 546                  FloatAcosOp   -> FSLIT("acos")
 547                  FloatAtanOp   -> FSLIT("atan")
 548                  FloatSinhOp   -> FSLIT("sinh")
 549                  FloatCoshOp   -> FSLIT("cosh")
 550                  FloatTanhOp   -> FSLIT("tanh")
 551                  DoubleExpOp   -> FSLIT("exp")
 552                  DoubleLogOp   -> FSLIT("log")
 553                  DoubleSqrtOp  -> FSLIT("sqrt")
 554                  DoubleSinOp   -> FSLIT("sin")
 555                  DoubleCosOp   -> FSLIT("cos")
 556                  DoubleTanOp   -> FSLIT("tan")
 557                  DoubleAsinOp  -> FSLIT("asin")
 558                  DoubleAcosOp  -> FSLIT("acos")
 559                  DoubleAtanOp  -> FSLIT("atan")
 560                  DoubleSinhOp  -> FSLIT("sinh")
 561                  DoubleCoshOp  -> FSLIT("cosh")
 562                  DoubleTanhOp  -> FSLIT("tanh")
 563   where
 564     pr = panic "MachCode.getRegister: no primrep needed for Alpha"
 565
 566 getRegister (StPrim primop [x, y]) -- dyadic PrimOps
 567   = case primop of
 568       CharGtOp -> trivialCode (CMP LTT) y x
 569       CharGeOp -> trivialCode (CMP LE) y x
 570       CharEqOp -> trivialCode (CMP EQQ) x y
 571       CharNeOp -> int_NE_code x y
 572       CharLtOp -> trivialCode (CMP LTT) x y
 573       CharLeOp -> trivialCode (CMP LE) x y
 574
 575       IntGtOp  -> trivialCode (CMP LTT) y x
 576       IntGeOp  -> trivialCode (CMP LE) y x
 577       IntEqOp  -> trivialCode (CMP EQQ) x y
 578       IntNeOp  -> int_NE_code x y
 579       IntLtOp  -> trivialCode (CMP LTT) x y
 580       IntLeOp  -> trivialCode (CMP LE) x y
 581
 582       WordGtOp -> trivialCode (CMP ULT) y x
 583       WordGeOp -> trivialCode (CMP ULE) x y
 584       WordEqOp -> trivialCode (CMP EQQ)  x y
 585       WordNeOp -> int_NE_code x y
 586       WordLtOp -> trivialCode (CMP ULT) x y
 587       WordLeOp -> trivialCode (CMP ULE) x y
 588
 589       AddrGtOp -> trivialCode (CMP ULT) y x
 590       AddrGeOp -> trivialCode (CMP ULE) y x
 591       AddrEqOp -> trivialCode (CMP EQQ)  x y
 592       AddrNeOp -> int_NE_code x y
 593       AddrLtOp -> trivialCode (CMP ULT) x y
 594       AddrLeOp -> trivialCode (CMP ULE) x y
 595
 596       FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
 597       FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
 598       FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
 599       FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
 600       FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
 601       FloatLeOp -> cmpF_code (FCMP TF LE) NE x y
 602
 603       DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
 604       DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
 605       DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
 606       DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
 607       DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
 608       DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y
 609
 610       IntAddOp  -> trivialCode (ADD Q False) x y
 611       IntSubOp  -> trivialCode (SUB Q False) x y
 612       IntMulOp  -> trivialCode (MUL Q False) x y
 613       IntQuotOp -> trivialCode (DIV Q False) x y
 614       IntRemOp  -> trivialCode (REM Q False) x y
 615
 616       WordAddOp  -> trivialCode (ADD Q False) x y
 617       WordSubOp  -> trivialCode (SUB Q False) x y
 618       WordMulOp  -> trivialCode (MUL Q False) x y
 619       WordQuotOp -> trivialCode (DIV Q True) x y
 620       WordRemOp  -> trivialCode (REM Q True) x y
 621
 622       FloatAddOp -> trivialFCode  FloatRep (FADD TF) x y
 623       FloatSubOp -> trivialFCode  FloatRep (FSUB TF) x y
 624       FloatMulOp -> trivialFCode  FloatRep (FMUL TF) x y
 625       FloatDivOp -> trivialFCode  FloatRep (FDIV TF) x y
 626
 627       DoubleAddOp -> trivialFCode  F64 (FADD TF) x y
 628       DoubleSubOp -> trivialFCode  F64 (FSUB TF) x y
 629       DoubleMulOp -> trivialFCode  F64 (FMUL TF) x y
 630       DoubleDivOp -> trivialFCode  F64 (FDIV TF) x y
 631
 632       AddrAddOp  -> trivialCode (ADD Q False) x y
 633       AddrSubOp  -> trivialCode (SUB Q False) x y
 634       AddrRemOp  -> trivialCode (REM Q True) x y
 635
 636       AndOp  -> trivialCode AND x y
 637       OrOp   -> trivialCode OR  x y
 638       XorOp  -> trivialCode XOR x y
 639       SllOp  -> trivialCode SLL x y
 640       SrlOp  -> trivialCode SRL x y
 641
 642       ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
 643       ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
 644       ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"
 645
 646       FloatPowerOp  -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
 647       DoublePowerOp -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
 648   where
 649     {- ------------------------------------------------------------
 650         Some bizarre special code for getting condition codes into
 651         registers.  Integer non-equality is a test for equality
 652         followed by an XOR with 1.  (Integer comparisons always set
 653         the result register to 0 or 1.)  Floating point comparisons of
 654         any kind leave the result in a floating point register, so we
 655         need to wrangle an integer register out of things.
 656     -}
 657     int_NE_code :: StixTree -> StixTree -> NatM Register
 658
 659     int_NE_code x y
 660       = trivialCode (CMP EQQ) x y       `thenNat` \ register ->
 661         getNewRegNat IntRep             `thenNat` \ tmp ->
 662         let
 663             code = registerCode register tmp
 664             src  = registerName register tmp
 665             code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
 666         in
 667         return (Any IntRep code__2)
 668
 669     {- ------------------------------------------------------------
 670         Comments for int_NE_code also apply to cmpF_code
 671     -}
 672     cmpF_code
 673         :: (Reg -> Reg -> Reg -> Instr)
 674         -> Cond
 675         -> StixTree -> StixTree
 676         -> NatM Register
 677
 678     cmpF_code instr cond x y
 679       = trivialFCode pr instr x y       `thenNat` \ register ->
 680         getNewRegNat F64                `thenNat` \ tmp ->
 681         getBlockIdNat                   `thenNat` \ lbl ->
 682         let
 683             code = registerCode register tmp
 684             result  = registerName register tmp
 685
 686             code__2 dst = code . mkSeqInstrs [
 687                 OR zeroh (RIImm (ImmInt 1)) dst,
 688                 BF cond  result (ImmCLbl lbl),
 689                 OR zeroh (RIReg zeroh) dst,
 690                 NEWBLOCK lbl]
 691         in
 692         return (Any IntRep code__2)
 693       where
 694         pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
 695       ------------------------------------------------------------
 696
 697 getRegister (CmmLoad pk mem)
 698   = getAmode mem                    `thenNat` \ amode ->
 699     let
 700         code = amodeCode amode
 701         src   = amodeAddr amode
 702         size = primRepToSize pk
 703         code__2 dst = code . mkSeqInstr (LD size dst src)
 704     in
 705     return (Any pk code__2)
 706
 707 getRegister (StInt i)
 708   | fits8Bits i
 709   = let
 710         code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
 711     in
 712     return (Any IntRep code)
 713   | otherwise
 714   = let
 715         code dst = mkSeqInstr (LDI Q dst src)
 716     in
 717     return (Any IntRep code)
 718   where
 719     src = ImmInt (fromInteger i)
 720
 721 getRegister leaf
 722   | isJust imm
 723   = let
 724         code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
 725     in
 726     return (Any PtrRep code)
 727   where
 728     imm = maybeImm leaf
 729     imm__2 = case imm of Just x -> x
 730
 731 #endif /* alpha_TARGET_ARCH */
 732
 733 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 734
 735 #if i386_TARGET_ARCH
 736
 737 getRegister (CmmLit (CmmFloat f F32)) = do
 738     lbl <- getNewLabelNat
 739     dynRef <- cmmMakeDynamicReference addImportNat False lbl
 740     Amode addr addr_code <- getAmode dynRef
 741     let code dst =
 742             LDATA ReadOnlyData
 743                         [CmmDataLabel lbl,
 744                          CmmStaticLit (CmmFloat f F32)]
 745             `consOL` (addr_code `snocOL`
 746             GLD F32 addr dst)
 747     -- in
 748     return (Any F32 code)
 749
 750
 751 getRegister (CmmLit (CmmFloat d F64))
 752   | d == 0.0
 753   = let code dst = unitOL (GLDZ dst)
 754     in  return (Any F64 code)
 755
 756   | d == 1.0
 757   = let code dst = unitOL (GLD1 dst)
 758     in  return (Any F64 code)
 759
 760   | otherwise = do
 761     lbl <- getNewLabelNat
 762     dynRef <- cmmMakeDynamicReference addImportNat False lbl
 763     Amode addr addr_code <- getAmode dynRef
 764     let code dst =
 765             LDATA ReadOnlyData
 766                         [CmmDataLabel lbl,
 767                          CmmStaticLit (CmmFloat d F64)]
 768             `consOL` (addr_code `snocOL`
 769             GLD F64 addr dst)
 770     -- in
 771     return (Any F64 code)
 772
 773 #endif /* i386_TARGET_ARCH */
 774
 775 #if x86_64_TARGET_ARCH
 776
 777 getRegister (CmmLit (CmmFloat 0.0 rep)) = do
 778    let code dst = unitOL  (XOR rep (OpReg dst) (OpReg dst))
 779         -- I don't know why there are xorpd, xorps, and pxor instructions.
 780         -- They all appear to do the same thing --SDM
 781    return (Any rep code)
 782
 783 getRegister (CmmLit (CmmFloat f rep)) = do
 784     lbl <- getNewLabelNat
 785     let code dst = toOL [
 786             LDATA ReadOnlyData
 787                         [CmmDataLabel lbl,
 788                          CmmStaticLit (CmmFloat f rep)],
 789             MOV rep (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 790             ]
 791     -- in
 792     return (Any rep code)
 793
 794 #endif /* x86_64_TARGET_ARCH */
 795
 796 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
 797
 798 -- catch simple cases of zero- or sign-extended load
 799 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad addr _]) = do
 800   code <- intLoadCode (MOVZxL I8) addr
 801   return (Any I32 code)
 802
 803 getRegister (CmmMachOp (MO_S_Conv I8 I32) [CmmLoad addr _]) = do
 804   code <- intLoadCode (MOVSxL I8) addr
 805   return (Any I32 code)
 806
 807 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad addr _]) = do
 808   code <- intLoadCode (MOVZxL I16) addr
 809   return (Any I32 code)
 810
 811 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad addr _]) = do
 812   code <- intLoadCode (MOVSxL I16) addr
 813   return (Any I32 code)
 814
 815 #endif
 816
 817 #if x86_64_TARGET_ARCH
 818
 819 -- catch simple cases of zero- or sign-extended load
 820 getRegister (CmmMachOp (MO_U_Conv I8 I64) [CmmLoad addr _]) = do
 821   code <- intLoadCode (MOVZxL I8) addr
 822   return (Any I64 code)
 823
 824 getRegister (CmmMachOp (MO_S_Conv I8 I64) [CmmLoad addr _]) = do
 825   code <- intLoadCode (MOVSxL I8) addr
 826   return (Any I64 code)
 827
 828 getRegister (CmmMachOp (MO_U_Conv I16 I64) [CmmLoad addr _]) = do
 829   code <- intLoadCode (MOVZxL I16) addr
 830   return (Any I64 code)
 831
 832 getRegister (CmmMachOp (MO_S_Conv I16 I64) [CmmLoad addr _]) = do
 833   code <- intLoadCode (MOVSxL I16) addr
 834   return (Any I64 code)
 835
 836 getRegister (CmmMachOp (MO_U_Conv I32 I64) [CmmLoad addr _]) = do
 837   code <- intLoadCode (MOV I32) addr -- 32-bit loads zero-extend
 838   return (Any I64 code)
 839
 840 getRegister (CmmMachOp (MO_S_Conv I32 I64) [CmmLoad addr _]) = do
 841   code <- intLoadCode (MOVSxL I32) addr
 842   return (Any I64 code)
 843
 844 #endif
 845
 846 #if x86_64_TARGET_ARCH
 847 getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do
 848   x_code <- getAnyReg x
 849   lbl <- getNewLabelNat
 850   let
 851     code dst = x_code dst `appOL` toOL [
 852         -- This is how gcc does it, so it can't be that bad:
 853         LDATA ReadOnlyData16 [
 854                 CmmAlign 16,
 855                 CmmDataLabel lbl,
 856                 CmmStaticLit (CmmInt 0x80000000 I32),
 857                 CmmStaticLit (CmmInt 0 I32),
 858                 CmmStaticLit (CmmInt 0 I32),
 859                 CmmStaticLit (CmmInt 0 I32)
 860         ],
 861         XOR F32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 862                 -- xorps, so we need the 128-bit constant
 863                 -- ToDo: rip-relative
 864         ]
 865   --
 866   return (Any F32 code)
 867
 868 getRegister (CmmMachOp (MO_S_Neg F64) [x]) = do
 869   x_code <- getAnyReg x
 870   lbl <- getNewLabelNat
 871   let
 872         -- This is how gcc does it, so it can't be that bad:
 873     code dst = x_code dst `appOL` toOL [
 874         LDATA ReadOnlyData16 [
 875                 CmmAlign 16,
 876                 CmmDataLabel lbl,
 877                 CmmStaticLit (CmmInt 0x8000000000000000 I64),
 878                 CmmStaticLit (CmmInt 0 I64)
 879         ],
 880                 -- gcc puts an unpck here.  Wonder if we need it.
 881         XOR F64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 882                 -- xorpd, so we need the 128-bit constant
 883         ]
 884   --
 885   return (Any F64 code)
 886 #endif
 887
 888 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
 889
 890 getRegister (CmmMachOp mop [x]) -- unary MachOps
 891   = case mop of
 892 #if i386_TARGET_ARCH
 893       MO_S_Neg F32 -> trivialUFCode F32 (GNEG F32) x
 894       MO_S_Neg F64 -> trivialUFCode F64 (GNEG F64) x
 895 #endif
 896
 897       MO_S_Neg rep -> trivialUCode rep (NEGI rep) x
 898       MO_Not rep   -> trivialUCode rep (NOT  rep) x
 899
 900       -- Nop conversions
 901       MO_U_Conv I32 I8  -> toI8Reg  I32 x
 902       MO_S_Conv I32 I8  -> toI8Reg  I32 x
 903       MO_U_Conv I16 I8  -> toI8Reg  I16 x
 904       MO_S_Conv I16 I8  -> toI8Reg  I16 x
 905       MO_U_Conv I32 I16 -> toI16Reg I32 x
 906       MO_S_Conv I32 I16 -> toI16Reg I32 x
 907 #if x86_64_TARGET_ARCH
 908       MO_U_Conv I64 I32 -> conversionNop I64 x
 909       MO_S_Conv I64 I32 -> conversionNop I64 x
 910       MO_U_Conv I64 I16 -> toI16Reg I64 x
 911       MO_S_Conv I64 I16 -> toI16Reg I64 x
 912       MO_U_Conv I64 I8  -> toI8Reg  I64 x
 913       MO_S_Conv I64 I8  -> toI8Reg  I64 x
 914 #endif
 915
 916       MO_U_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
 917       MO_S_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
 918
 919       -- widenings
 920       MO_U_Conv I8  I32 -> integerExtend I8  I32 MOVZxL x
 921       MO_U_Conv I16 I32 -> integerExtend I16 I32 MOVZxL x
 922       MO_U_Conv I8  I16 -> integerExtend I8  I16 MOVZxL x
 923
 924       MO_S_Conv I8  I32 -> integerExtend I8  I32 MOVSxL x
 925       MO_S_Conv I16 I32 -> integerExtend I16 I32 MOVSxL x
 926       MO_S_Conv I8  I16 -> integerExtend I8  I16 MOVSxL x
 927
 928 #if x86_64_TARGET_ARCH
 929       MO_U_Conv I8  I64 -> integerExtend I8  I64 MOVZxL x
 930       MO_U_Conv I16 I64 -> integerExtend I16 I64 MOVZxL x
 931       MO_U_Conv I32 I64 -> integerExtend I32 I64 MOVZxL x
 932       MO_S_Conv I8  I64 -> integerExtend I8  I64 MOVSxL x
 933       MO_S_Conv I16 I64 -> integerExtend I16 I64 MOVSxL x
 934       MO_S_Conv I32 I64 -> integerExtend I32 I64 MOVSxL x
 935         -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
 936         -- However, we don't want the register allocator to throw it
 937         -- away as an unnecessary reg-to-reg move, so we keep it in
 938         -- the form of a movzl and print it as a movl later.
 939 #endif
 940
 941 #if i386_TARGET_ARCH
 942       MO_S_Conv F32 F64 -> conversionNop F64 x
 943       MO_S_Conv F64 F32 -> conversionNop F32 x
 944 #else
 945       MO_S_Conv F32 F64 -> coerceFP2FP F64 x
 946       MO_S_Conv F64 F32 -> coerceFP2FP F32 x
 947 #endif
 948
 949       MO_S_Conv from to
 950         | isFloatingRep from -> coerceFP2Int from to x
 951         | isFloatingRep to   -> coerceInt2FP from to x
 952
 953       other -> pprPanic "getRegister" (pprMachOp mop)
 954    where
 955         -- signed or unsigned extension.
 956         integerExtend from to instr expr = do
 957             (reg,e_code) <- if from == I8 then getByteReg expr
 958                                           else getSomeReg expr
 959             let
 960                 code dst =
 961                   e_code `snocOL`
 962                   instr from (OpReg reg) (OpReg dst)
 963             return (Any to code)
 964
 965         toI8Reg new_rep expr
 966             = do codefn <- getAnyReg expr
 967                  return (Any new_rep codefn)
 968                 -- HACK: use getAnyReg to get a byte-addressable register.
 969                 -- If the source was a Fixed register, this will add the
 970                 -- mov instruction to put it into the desired destination.
 971                 -- We're assuming that the destination won't be a fixed
 972                 -- non-byte-addressable register; it won't be, because all
 973                 -- fixed registers are word-sized.
 974
 975         toI16Reg = toI8Reg -- for now
 976
 977         conversionNop new_rep expr
 978             = do e_code <- getRegister expr
 979                  return (swizzleRegisterRep e_code new_rep)
 980
 981
 982 getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
 983   = ASSERT2(cmmExprRep x /= I8, pprExpr e)
 984     case mop of
 985       MO_Eq F32   -> condFltReg EQQ x y
 986       MO_Ne F32   -> condFltReg NE x y
 987       MO_S_Gt F32 -> condFltReg GTT x y
 988       MO_S_Ge F32 -> condFltReg GE x y
 989       MO_S_Lt F32 -> condFltReg LTT x y
 990       MO_S_Le F32 -> condFltReg LE x y
 991
 992       MO_Eq F64   -> condFltReg EQQ x y
 993       MO_Ne F64   -> condFltReg NE x y
 994       MO_S_Gt F64 -> condFltReg GTT x y
 995       MO_S_Ge F64 -> condFltReg GE x y
 996       MO_S_Lt F64 -> condFltReg LTT x y
 997       MO_S_Le F64 -> condFltReg LE x y
 998
 999       MO_Eq rep   -> condIntReg EQQ x y
1000       MO_Ne rep   -> condIntReg NE x y
1001
1002       MO_S_Gt rep -> condIntReg GTT x y
1003       MO_S_Ge rep -> condIntReg GE x y
1004       MO_S_Lt rep -> condIntReg LTT x y
1005       MO_S_Le rep -> condIntReg LE x y
1006
1007       MO_U_Gt rep -> condIntReg GU  x y
1008       MO_U_Ge rep -> condIntReg GEU x y
1009       MO_U_Lt rep -> condIntReg LU  x y
1010       MO_U_Le rep -> condIntReg LEU x y
1011
1012 #if i386_TARGET_ARCH
1013       MO_Add F32 -> trivialFCode F32 GADD x y
1014       MO_Sub F32 -> trivialFCode F32 GSUB x y
1015
1016       MO_Add F64 -> trivialFCode F64 GADD x y
1017       MO_Sub F64 -> trivialFCode F64 GSUB x y
1018
1019       MO_S_Quot F32 -> trivialFCode F32 GDIV x y
1020       MO_S_Quot F64 -> trivialFCode F64 GDIV x y
1021 #endif
1022
1023 #if x86_64_TARGET_ARCH
1024       MO_Add F32 -> trivialFCode F32 ADD x y
1025       MO_Sub F32 -> trivialFCode F32 SUB x y
1026
1027       MO_Add F64 -> trivialFCode F64 ADD x y
1028       MO_Sub F64 -> trivialFCode F64 SUB x y
1029
1030       MO_S_Quot F32 -> trivialFCode F32 FDIV x y
1031       MO_S_Quot F64 -> trivialFCode F64 FDIV x y
1032 #endif
1033
1034       MO_Add rep -> add_code rep x y
1035       MO_Sub rep -> sub_code rep x y
1036
1037       MO_S_Quot rep -> div_code rep True  True  x y
1038       MO_S_Rem  rep -> div_code rep True  False x y
1039       MO_U_Quot rep -> div_code rep False True  x y
1040       MO_U_Rem  rep -> div_code rep False False x y
1041
1042 #if i386_TARGET_ARCH
1043       MO_Mul   F32 -> trivialFCode F32 GMUL x y
1044       MO_Mul   F64 -> trivialFCode F64 GMUL x y
1045 #endif
1046
1047 #if x86_64_TARGET_ARCH
1048       MO_Mul   F32 -> trivialFCode F32 MUL x y
1049       MO_Mul   F64 -> trivialFCode F64 MUL x y
1050 #endif
1051
1052       MO_Mul   rep -> let op = IMUL rep in
1053                       trivialCode rep op (Just op) x y
1054
1055       MO_S_MulMayOflo rep -> imulMayOflo rep x y
1056
1057       MO_And rep -> let op = AND rep in
1058                     trivialCode rep op (Just op) x y
1059       MO_Or  rep -> let op = OR  rep in
1060                     trivialCode rep op (Just op) x y
1061       MO_Xor rep -> let op = XOR rep in
1062                     trivialCode rep op (Just op) x y
1063
1064         {- Shift ops on x86s have constraints on their source, it
1065            either has to be Imm, CL or 1
1066             => trivialCode is not restrictive enough (sigh.)
1067         -}
1068       MO_Shl rep   -> shift_code rep (SHL rep) x y {-False-}
1069       MO_U_Shr rep -> shift_code rep (SHR rep) x y {-False-}
1070       MO_S_Shr rep -> shift_code rep (SAR rep) x y {-False-}
1071
1072       other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
1073   where
1074     --------------------
1075     imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1076     imulMayOflo rep a b = do
1077          (a_reg, a_code) <- getNonClobberedReg a
1078          b_code <- getAnyReg b
1079          let
1080              shift_amt  = case rep of
1081                            I32 -> 31
1082                            I64 -> 63
1083                            _ -> panic "shift_amt"
1084
1085              code = a_code `appOL` b_code eax `appOL`
1086                         toOL [
1087                            IMUL2 rep (OpReg a_reg),   -- result in %edx:%eax
1088                            SAR rep (OpImm (ImmInt shift_amt)) (OpReg eax),
1089                                 -- sign extend lower part
1090                            SUB rep (OpReg edx) (OpReg eax)
1091                                 -- compare against upper
1092                            -- eax==0 if high part == sign extended low part
1093                         ]
1094          -- in
1095          return (Fixed rep eax code)
1096
1097     --------------------
1098     shift_code :: MachRep
1099                -> (Operand -> Operand -> Instr)
1100                -> CmmExpr
1101                -> CmmExpr
1102                -> NatM Register
1103
1104     {- Case1: shift length as immediate -}
1105     shift_code rep instr x y@(CmmLit lit) = do
1106           x_code <- getAnyReg x
1107           let
1108                code dst
1109                   = x_code dst `snocOL`
1110                     instr (OpImm (litToImm lit)) (OpReg dst)
1111           -- in
1112           return (Any rep code)
1113
1114     {- Case2: shift length is complex (non-immediate) -}
1115     shift_code rep instr x y{-amount-} = do
1116         (x_reg, x_code) <- getNonClobberedReg x
1117         y_code <- getAnyReg y
1118         let
1119            code = x_code `appOL`
1120                   y_code ecx `snocOL`
1121                   instr (OpReg ecx) (OpReg x_reg)
1122         -- in
1123         return (Fixed rep x_reg code)
1124
1125     --------------------
1126     add_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1127     add_code rep x (CmmLit (CmmInt y _))
1128         | not (is64BitInteger y) = add_int rep x y
1129     add_code rep x y = trivialCode rep (ADD rep) (Just (ADD rep)) x y
1130
1131     --------------------
1132     sub_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1133     sub_code rep x (CmmLit (CmmInt y _))
1134         | not (is64BitInteger (-y)) = add_int rep x (-y)
1135     sub_code rep x y = trivialCode rep (SUB rep) Nothing x y
1136
1137     -- our three-operand add instruction:
1138     add_int rep x y = do
1139         (x_reg, x_code) <- getSomeReg x
1140         let
1141             imm = ImmInt (fromInteger y)
1142             code dst
1143                = x_code `snocOL`
1144                  LEA rep
1145                         (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
1146                         (OpReg dst)
1147         --
1148         return (Any rep code)
1149
1150     ----------------------
1151     div_code rep signed quotient x y = do
1152            (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
1153            x_code <- getAnyReg x
1154            let
1155              widen | signed    = CLTD rep
1156                    | otherwise = XOR rep (OpReg edx) (OpReg edx)
1157
1158              instr | signed    = IDIV
1159                    | otherwise = DIV
1160
1161              code = y_code `appOL`
1162                     x_code eax `appOL`
1163                     toOL [widen, instr rep y_op]
1164
1165              result | quotient  = eax
1166                     | otherwise = edx
1167
1168            -- in
1169            return (Fixed rep result code)
1170
1171
1172 getRegister (CmmLoad mem pk)
1173   | isFloatingRep pk
1174   = do
1175     Amode src mem_code <- getAmode mem
1176     let
1177         code dst = mem_code `snocOL`
1178                    IF_ARCH_i386(GLD pk src dst,
1179                                 MOV pk (OpAddr src) (OpReg dst))
1180     --
1181     return (Any pk code)
1182
1183 #if i386_TARGET_ARCH
1184 getRegister (CmmLoad mem pk)
1185   | pk /= I64
1186   = do
1187     code <- intLoadCode (instr pk) mem
1188     return (Any pk code)
1189   where
1190         instr I8  = MOVZxL pk
1191         instr I16 = MOV I16
1192         instr I32 = MOV I32
1193         -- we always zero-extend 8-bit loads, if we
1194         -- can't think of anything better.  This is because
1195         -- we can't guarantee access to an 8-bit variant of every register
1196         -- (esi and edi don't have 8-bit variants), so to make things
1197         -- simpler we do our 8-bit arithmetic with full 32-bit registers.
1198 #endif
1199
1200 #if x86_64_TARGET_ARCH
1201 -- Simpler memory load code on x86_64
1202 getRegister (CmmLoad mem pk)
1203   = do
1204     code <- intLoadCode (MOV pk) mem
1205     return (Any pk code)
1206 #endif
1207
1208 getRegister (CmmLit (CmmInt 0 rep))
1209   = let
1210         -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
1211         adj_rep = case rep of I64 -> I32; _ -> rep
1212         rep1 = IF_ARCH_i386( rep, adj_rep )
1213         code dst
1214            = unitOL (XOR rep1 (OpReg dst) (OpReg dst))
1215     in
1216         return (Any rep code)
1217
1218 #if x86_64_TARGET_ARCH
1219   -- optimisation for loading small literals on x86_64: take advantage
1220   -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
1221   -- instruction forms are shorter.
1222 getRegister (CmmLit lit)
1223   | I64 <- cmmLitRep lit, not (isBigLit lit)
1224   = let
1225         imm = litToImm lit
1226         code dst = unitOL (MOV I32 (OpImm imm) (OpReg dst))
1227     in
1228         return (Any I64 code)
1229   where
1230    isBigLit (CmmInt i I64) = i < 0 || i > 0xffffffff
1231    isBigLit _ = False
1232         -- note1: not the same as is64BitLit, because that checks for
1233         -- signed literals that fit in 32 bits, but we want unsigned
1234         -- literals here.
1235         -- note2: all labels are small, because we're assuming the
1236         -- small memory model (see gcc docs, -mcmodel=small).
1237 #endif
1238
1239 getRegister (CmmLit lit)
1240   = let
1241         rep = cmmLitRep lit
1242         imm = litToImm lit
1243         code dst = unitOL (MOV rep (OpImm imm) (OpReg dst))
1244     in
1245         return (Any rep code)
1246
1247 getRegister other = pprPanic "getRegister(x86)" (ppr other)
1248
1249
1250 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
1251    -> NatM (Reg -> InstrBlock)
1252 intLoadCode instr mem = do
1253   Amode src mem_code <- getAmode mem
1254   return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
1255
1256 -- Compute an expression into *any* register, adding the appropriate
1257 -- move instruction if necessary.
1258 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
1259 getAnyReg expr = do
1260   r <- getRegister expr
1261   anyReg r
1262
1263 anyReg :: Register -> NatM (Reg -> InstrBlock)
1264 anyReg (Any _ code)          = return code
1265 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
1266
1267 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
1268 -- Fixed registers might not be byte-addressable, so we make sure we've
1269 -- got a temporary, inserting an extra reg copy if necessary.
1270 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
1271 #if x86_64_TARGET_ARCH
1272 getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
1273 #else
1274 getByteReg expr = do
1275   r <- getRegister expr
1276   case r of
1277     Any rep code -> do
1278         tmp <- getNewRegNat rep
1279         return (tmp, code tmp)
1280     Fixed rep reg code
1281         | isVirtualReg reg -> return (reg,code)
1282         | otherwise -> do
1283             tmp <- getNewRegNat rep
1284             return (tmp, code `snocOL` reg2reg rep reg tmp)
1285         -- ToDo: could optimise slightly by checking for byte-addressable
1286         -- real registers, but that will happen very rarely if at all.
1287 #endif
1288
1289 -- Another variant: this time we want the result in a register that cannot
1290 -- be modified by code to evaluate an arbitrary expression.
1291 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1292 getNonClobberedReg expr = do
1293   r <- getRegister expr
1294   case r of
1295     Any rep code -> do
1296         tmp <- getNewRegNat rep
1297         return (tmp, code tmp)
1298     Fixed rep reg code
1299         -- only free regs can be clobbered
1300         | RealReg rr <- reg, isFastTrue (freeReg rr) -> do
1301                 tmp <- getNewRegNat rep
1302                 return (tmp, code `snocOL` reg2reg rep reg tmp)
1303         | otherwise ->
1304                 return (reg, code)
1305
1306 reg2reg :: MachRep -> Reg -> Reg -> Instr
1307 reg2reg rep src dst
1308 #if i386_TARGET_ARCH
1309   | isFloatingRep rep = GMOV src dst
1310 #endif
1311   | otherwise         = MOV rep (OpReg src) (OpReg dst)
1312
1313 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1314
1315 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1316
1317 #if sparc_TARGET_ARCH
1318
1319 getRegister (CmmLit (CmmFloat f F32)) = do
1320     lbl <- getNewLabelNat
1321     let code dst = toOL [
1322             LDATA ReadOnlyData
1323                         [CmmDataLabel lbl,
1324                          CmmStaticLit (CmmFloat f F32)],
1325             SETHI (HI (ImmCLbl lbl)) dst,
1326             LD F32 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1327     return (Any F32 code)
1328
1329 getRegister (CmmLit (CmmFloat d F64)) = do
1330     lbl <- getNewLabelNat
1331     let code dst = toOL [
1332             LDATA ReadOnlyData
1333                         [CmmDataLabel lbl,
1334                          CmmStaticLit (CmmFloat d F64)],
1335             SETHI (HI (ImmCLbl lbl)) dst,
1336             LD F64 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1337     return (Any F64 code)
1338
1339 getRegister (CmmMachOp mop [x]) -- unary MachOps
1340   = case mop of
1341       MO_S_Neg F32     -> trivialUFCode F32 (FNEG F32) x
1342       MO_S_Neg F64     -> trivialUFCode F64 (FNEG F64) x
1343
1344       MO_S_Neg rep     -> trivialUCode rep (SUB False False g0) x
1345       MO_Not rep       -> trivialUCode rep (XNOR False g0) x
1346
1347       MO_U_Conv I32 I8 -> trivialCode I8 (AND False) x (CmmLit (CmmInt 255 I8))
1348
1349       MO_U_Conv F64 F32-> coerceDbl2Flt x
1350       MO_U_Conv F32 F64-> coerceFlt2Dbl x
1351
1352       MO_S_Conv F32 I32-> coerceFP2Int F32 I32 x
1353       MO_S_Conv I32 F32-> coerceInt2FP I32 F32 x
1354       MO_S_Conv F64 I32-> coerceFP2Int F64 I32 x
1355       MO_S_Conv I32 F64-> coerceInt2FP I32 F64 x
1356
1357       -- Conversions which are a nop on sparc
1358       MO_U_Conv from to
1359         | from == to   -> conversionNop to   x
1360       MO_U_Conv I32 to -> conversionNop to   x
1361       MO_S_Conv I32 to -> conversionNop to   x
1362
1363       -- widenings
1364       MO_U_Conv I8 I32  -> integerExtend False I8 I32  x
1365       MO_U_Conv I16 I32 -> integerExtend False I16 I32 x
1366       MO_U_Conv I8 I16  -> integerExtend False I8 I16  x
1367       MO_S_Conv I16 I32 -> integerExtend True I16 I32  x
1368
1369       other_op -> panic "Unknown unary mach op"
1370     where
1371         -- XXX SLL/SRL?
1372         integerExtend signed from to expr = do
1373            (reg, e_code) <- getSomeReg expr
1374            let
1375                code dst =
1376                    e_code `snocOL`
1377                    ((if signed then SRA else SRL)
1378                           reg (RIImm (ImmInt 0)) dst)
1379            return (Any to code)
1380         conversionNop new_rep expr
1381             = do e_code <- getRegister expr
1382                  return (swizzleRegisterRep e_code new_rep)
1383
1384 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1385   = case mop of
1386       MO_Eq F32 -> condFltReg EQQ x y
1387       MO_Ne F32 -> condFltReg NE x y
1388
1389       MO_S_Gt F32 -> condFltReg GTT x y
1390       MO_S_Ge F32 -> condFltReg GE x y
1391       MO_S_Lt F32 -> condFltReg LTT x y
1392       MO_S_Le F32 -> condFltReg LE x y
1393
1394       MO_Eq F64 -> condFltReg EQQ x y
1395       MO_Ne F64 -> condFltReg NE x y
1396
1397       MO_S_Gt F64 -> condFltReg GTT x y
1398       MO_S_Ge F64 -> condFltReg GE x y
1399       MO_S_Lt F64 -> condFltReg LTT x y
1400       MO_S_Le F64 -> condFltReg LE x y
1401
1402       MO_Eq rep -> condIntReg EQQ x y
1403       MO_Ne rep -> condIntReg NE x y
1404
1405       MO_S_Gt rep -> condIntReg GTT x y
1406       MO_S_Ge rep -> condIntReg GE x y
1407       MO_S_Lt rep -> condIntReg LTT x y
1408       MO_S_Le rep -> condIntReg LE x y
1409
1410       MO_U_Gt I32  -> condIntReg GTT x y
1411       MO_U_Ge I32  -> condIntReg GE x y
1412       MO_U_Lt I32  -> condIntReg LTT x y
1413       MO_U_Le I32  -> condIntReg LE x y
1414
1415       MO_U_Gt I16 -> condIntReg GU  x y
1416       MO_U_Ge I16 -> condIntReg GEU x y
1417       MO_U_Lt I16 -> condIntReg LU  x y
1418       MO_U_Le I16 -> condIntReg LEU x y
1419
1420       MO_Add I32 -> trivialCode I32 (ADD False False) x y
1421       MO_Sub I32 -> trivialCode I32 (SUB False False) x y
1422
1423       MO_S_MulMayOflo rep -> imulMayOflo rep x y
1424 {-
1425       -- ToDo: teach about V8+ SPARC div instructions
1426       MO_S_Quot I32 -> idiv FSLIT(".div")  x y
1427       MO_S_Rem I32  -> idiv FSLIT(".rem")  x y
1428       MO_U_Quot I32 -> idiv FSLIT(".udiv")  x y
1429       MO_U_Rem I32  -> idiv FSLIT(".urem")  x y
1430 -}
1431       MO_Add F32  -> trivialFCode F32 FADD  x y
1432       MO_Sub F32   -> trivialFCode F32  FSUB x y
1433       MO_Mul F32   -> trivialFCode F32  FMUL  x y
1434       MO_S_Quot F32   -> trivialFCode F32  FDIV x y
1435
1436       MO_Add F64   -> trivialFCode F64 FADD  x y
1437       MO_Sub F64   -> trivialFCode F64  FSUB x y
1438       MO_Mul F64   -> trivialFCode F64  FMUL x y
1439       MO_S_Quot F64   -> trivialFCode F64  FDIV x y
1440
1441       MO_And rep   -> trivialCode rep (AND False) x y
1442       MO_Or rep    -> trivialCode rep (OR  False) x y
1443       MO_Xor rep   -> trivialCode rep (XOR False) x y
1444
1445       MO_Mul rep -> trivialCode rep (SMUL False) x y
1446
1447       MO_Shl rep   -> trivialCode rep SLL  x y
1448       MO_U_Shr rep   -> trivialCode rep SRL x y
1449       MO_S_Shr rep   -> trivialCode rep SRA x y
1450
1451 {-
1452       MO_F32_Pwr  -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1453                                          [promote x, promote y])
1454                        where promote x = CmmMachOp MO_F32_to_Dbl [x]
1455       MO_F64_Pwr -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1456                                         [x, y])
1457 -}
1458       other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
1459   where
1460     --idiv fn x y = getRegister (StCall (Left fn) CCallConv I32 [x, y])
1461
1462     --------------------
1463     imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1464     imulMayOflo rep a b = do
1465          (a_reg, a_code) <- getSomeReg a
1466          (b_reg, b_code) <- getSomeReg b
1467          res_lo <- getNewRegNat I32
1468          res_hi <- getNewRegNat I32
1469          let
1470             shift_amt  = case rep of
1471                           I32 -> 31
1472                           I64 -> 63
1473                           _ -> panic "shift_amt"
1474             code dst = a_code `appOL` b_code `appOL`
1475                        toOL [
1476                            SMUL False a_reg (RIReg b_reg) res_lo,
1477                            RDY res_hi,
1478                            SRA res_lo (RIImm (ImmInt shift_amt)) res_lo,
1479                            SUB False False res_lo (RIReg res_hi) dst
1480                         ]
1481          return (Any I32 code)
1482
1483 getRegister (CmmLoad mem pk) = do
1484     Amode src code <- getAmode mem
1485     let
1486         code__2 dst = code `snocOL` LD pk src dst
1487     return (Any pk code__2)
1488
1489 getRegister (CmmLit (CmmInt i _))
1490   | fits13Bits i
1491   = let
1492         src = ImmInt (fromInteger i)
1493         code dst = unitOL (OR False g0 (RIImm src) dst)
1494     in
1495         return (Any I32 code)
1496
1497 getRegister (CmmLit lit)
1498   = let rep = cmmLitRep lit
1499         imm = litToImm lit
1500         code dst = toOL [
1501             SETHI (HI imm) dst,
1502             OR False dst (RIImm (LO imm)) dst]
1503     in return (Any I32 code)
1504
1505 #endif /* sparc_TARGET_ARCH */
1506
1507 #if powerpc_TARGET_ARCH
1508 getRegister (CmmLoad mem pk)
1509   | pk /= I64
1510   = do
1511         Amode addr addr_code <- getAmode mem
1512         let code dst = ASSERT((regClass dst == RcDouble) == isFloatingRep pk)
1513                        addr_code `snocOL` LD pk dst addr
1514         return (Any pk code)
1515
1516 -- catch simple cases of zero- or sign-extended load
1517 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad mem _]) = do
1518     Amode addr addr_code <- getAmode mem
1519     return (Any I32 (\dst -> addr_code `snocOL` LD I8 dst addr))
1520
1521 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
1522
1523 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad mem _]) = do
1524     Amode addr addr_code <- getAmode mem
1525     return (Any I32 (\dst -> addr_code `snocOL` LD I16 dst addr))
1526
1527 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad mem _]) = do
1528     Amode addr addr_code <- getAmode mem
1529     return (Any I32 (\dst -> addr_code `snocOL` LA I16 dst addr))
1530
1531 getRegister (CmmMachOp mop [x]) -- unary MachOps
1532   = case mop of
1533       MO_Not rep   -> trivialUCode rep NOT x
1534
1535       MO_S_Conv F64 F32 -> trivialUCode F32 FRSP x
1536       MO_S_Conv F32 F64 -> conversionNop F64 x
1537
1538       MO_S_Conv from to
1539         | from == to         -> conversionNop to x
1540         | isFloatingRep from -> coerceFP2Int from to x
1541         | isFloatingRep to   -> coerceInt2FP from to x
1542
1543         -- narrowing is a nop: we treat the high bits as undefined
1544       MO_S_Conv I32 to -> conversionNop to x
1545       MO_S_Conv I16 I8 -> conversionNop I8 x
1546       MO_S_Conv I8 to -> trivialUCode to (EXTS I8) x
1547       MO_S_Conv I16 to -> trivialUCode to (EXTS I16) x
1548
1549       MO_U_Conv from to
1550         | from == to -> conversionNop to x
1551         -- narrowing is a nop: we treat the high bits as undefined
1552       MO_U_Conv I32 to -> conversionNop to x
1553       MO_U_Conv I16 I8 -> conversionNop I8 x
1554       MO_U_Conv I8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 I32))
1555       MO_U_Conv I16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 I32))
1556
1557       MO_S_Neg F32      -> trivialUCode F32 FNEG x
1558       MO_S_Neg F64      -> trivialUCode F64 FNEG x
1559       MO_S_Neg rep      -> trivialUCode rep NEG x
1560
1561     where
1562         conversionNop new_rep expr
1563             = do e_code <- getRegister expr
1564                  return (swizzleRegisterRep e_code new_rep)
1565
1566 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1567   = case mop of
1568       MO_Eq F32 -> condFltReg EQQ x y
1569       MO_Ne F32 -> condFltReg NE  x y
1570
1571       MO_S_Gt F32 -> condFltReg GTT x y
1572       MO_S_Ge F32 -> condFltReg GE  x y
1573       MO_S_Lt F32 -> condFltReg LTT x y
1574       MO_S_Le F32 -> condFltReg LE  x y
1575
1576       MO_Eq F64 -> condFltReg EQQ x y
1577       MO_Ne F64 -> condFltReg NE  x y
1578
1579       MO_S_Gt F64 -> condFltReg GTT x y
1580       MO_S_Ge F64 -> condFltReg GE  x y
1581       MO_S_Lt F64 -> condFltReg LTT x y
1582       MO_S_Le F64 -> condFltReg LE  x y
1583
1584       MO_Eq rep -> condIntReg EQQ  (extendUExpr rep x) (extendUExpr rep y)
1585       MO_Ne rep -> condIntReg NE   (extendUExpr rep x) (extendUExpr rep y)
1586
1587       MO_S_Gt rep -> condIntReg GTT  (extendSExpr rep x) (extendSExpr rep y)
1588       MO_S_Ge rep -> condIntReg GE   (extendSExpr rep x) (extendSExpr rep y)
1589       MO_S_Lt rep -> condIntReg LTT  (extendSExpr rep x) (extendSExpr rep y)
1590       MO_S_Le rep -> condIntReg LE   (extendSExpr rep x) (extendSExpr rep y)
1591
1592       MO_U_Gt rep -> condIntReg GU   (extendUExpr rep x) (extendUExpr rep y)
1593       MO_U_Ge rep -> condIntReg GEU  (extendUExpr rep x) (extendUExpr rep y)
1594       MO_U_Lt rep -> condIntReg LU   (extendUExpr rep x) (extendUExpr rep y)
1595       MO_U_Le rep -> condIntReg LEU  (extendUExpr rep x) (extendUExpr rep y)
1596
1597       MO_Add F32   -> trivialCodeNoImm F32 (FADD F32) x y
1598       MO_Sub F32   -> trivialCodeNoImm F32 (FSUB F32) x y
1599       MO_Mul F32   -> trivialCodeNoImm F32 (FMUL F32) x y
1600       MO_S_Quot F32   -> trivialCodeNoImm F32 (FDIV F32) x y
1601
1602       MO_Add F64   -> trivialCodeNoImm F64 (FADD F64) x y
1603       MO_Sub F64   -> trivialCodeNoImm F64 (FSUB F64) x y
1604       MO_Mul F64   -> trivialCodeNoImm F64 (FMUL F64) x y
1605       MO_S_Quot F64   -> trivialCodeNoImm F64 (FDIV F64) x y
1606
1607          -- optimize addition with 32-bit immediate
1608          -- (needed for PIC)
1609       MO_Add I32 ->
1610         case y of
1611           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate I32 True (-imm)
1612             -> trivialCode I32 True ADD x (CmmLit $ CmmInt imm immrep)
1613           CmmLit lit
1614             -> do
1615                 (src, srcCode) <- getSomeReg x
1616                 let imm = litToImm lit
1617                     code dst = srcCode `appOL` toOL [
1618                                     ADDIS dst src (HA imm),
1619                                     ADD dst dst (RIImm (LO imm))
1620                                 ]
1621                 return (Any I32 code)
1622           _ -> trivialCode I32 True ADD x y
1623
1624       MO_Add rep -> trivialCode rep True ADD x y
1625       MO_Sub rep ->
1626         case y of    -- subfi ('substract from' with immediate) doesn't exist
1627           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
1628             -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
1629           _ -> trivialCodeNoImm rep SUBF y x
1630
1631       MO_Mul rep -> trivialCode rep True MULLW x y
1632
1633       MO_S_MulMayOflo I32 -> trivialCodeNoImm I32 MULLW_MayOflo x y
1634
1635       MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= I32): not implemented"
1636       MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
1637
1638       MO_S_Quot rep -> trivialCodeNoImm rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1639       MO_U_Quot rep -> trivialCodeNoImm rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1640
1641       MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1642       MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1643
1644       MO_And rep   -> trivialCode rep False AND x y
1645       MO_Or rep    -> trivialCode rep False OR x y
1646       MO_Xor rep   -> trivialCode rep False XOR x y
1647
1648       MO_Shl rep   -> trivialCode rep False SLW x y
1649       MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
1650       MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
1651
1652 getRegister (CmmLit (CmmInt i rep))
1653   | Just imm <- makeImmediate rep True i
1654   = let
1655         code dst = unitOL (LI dst imm)
1656     in
1657         return (Any rep code)
1658
1659 getRegister (CmmLit (CmmFloat f frep)) = do
1660     lbl <- getNewLabelNat
1661     dynRef <- cmmMakeDynamicReference addImportNat False lbl
1662     Amode addr addr_code <- getAmode dynRef
1663     let code dst =
1664             LDATA ReadOnlyData  [CmmDataLabel lbl,
1665                                  CmmStaticLit (CmmFloat f frep)]
1666             `consOL` (addr_code `snocOL` LD frep dst addr)
1667     return (Any frep code)
1668
1669 getRegister (CmmLit lit)
1670   = let rep = cmmLitRep lit
1671         imm = litToImm lit
1672         code dst = toOL [
1673               LIS dst (HI imm),
1674               OR dst dst (RIImm (LO imm))
1675           ]
1676     in return (Any rep code)
1677
1678 getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
1679
1680     -- extend?Rep: wrap integer expression of type rep
1681     -- in a conversion to I32
1682 extendSExpr I32 x = x
1683 extendSExpr rep x = CmmMachOp (MO_S_Conv rep I32) [x]
1684 extendUExpr I32 x = x
1685 extendUExpr rep x = CmmMachOp (MO_U_Conv rep I32) [x]
1686
1687 #endif /* powerpc_TARGET_ARCH */
1688
1689
1690 -- -----------------------------------------------------------------------------
1691 --  The 'Amode' type: Memory addressing modes passed up the tree.
1692
1693 data Amode = Amode AddrMode InstrBlock
1694
1695 {-
1696 Now, given a tree (the argument to an CmmLoad) that references memory,
1697 produce a suitable addressing mode.
1698
1699 A Rule of the Game (tm) for Amodes: use of the addr bit must
1700 immediately follow use of the code part, since the code part puts
1701 values in registers which the addr then refers to.  So you can't put
1702 anything in between, lest it overwrite some of those registers.  If
1703 you need to do some other computation between the code part and use of
1704 the addr bit, first store the effective address from the amode in a
1705 temporary, then do the other computation, and then use the temporary:
1706
1707     code
1708     LEA amode, tmp
1709     ... other computation ...
1710     ... (tmp) ...
1711 -}
1712
1713 getAmode :: CmmExpr -> NatM Amode
1714 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
1715
1716 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1717
1718 #if alpha_TARGET_ARCH
1719
1720 getAmode (StPrim IntSubOp [x, StInt i])
1721   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1722     getRegister x               `thenNat` \ register ->
1723     let
1724         code = registerCode register tmp
1725         reg  = registerName register tmp
1726         off  = ImmInt (-(fromInteger i))
1727     in
1728     return (Amode (AddrRegImm reg off) code)
1729
1730 getAmode (StPrim IntAddOp [x, StInt i])
1731   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1732     getRegister x               `thenNat` \ register ->
1733     let
1734         code = registerCode register tmp
1735         reg  = registerName register tmp
1736         off  = ImmInt (fromInteger i)
1737     in
1738     return (Amode (AddrRegImm reg off) code)
1739
1740 getAmode leaf
1741   | isJust imm
1742   = return (Amode (AddrImm imm__2) id)
1743   where
1744     imm = maybeImm leaf
1745     imm__2 = case imm of Just x -> x
1746
1747 getAmode other
1748   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1749     getRegister other           `thenNat` \ register ->
1750     let
1751         code = registerCode register tmp
1752         reg  = registerName register tmp
1753     in
1754     return (Amode (AddrReg reg) code)
1755
1756 #endif /* alpha_TARGET_ARCH */
1757
1758 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1759
1760 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1761
1762 -- This is all just ridiculous, since it carefully undoes
1763 -- what mangleIndexTree has just done.
1764 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
1765   | not (is64BitLit lit)
1766   -- ASSERT(rep == I32)???
1767   = do (x_reg, x_code) <- getSomeReg x
1768        let off = ImmInt (-(fromInteger i))
1769        return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1770
1771 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
1772   | not (is64BitLit lit)
1773   -- ASSERT(rep == I32)???
1774   = do (x_reg, x_code) <- getSomeReg x
1775        let off = ImmInt (fromInteger i)
1776        return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1777
1778 -- Turn (lit1 << n  + lit2) into  (lit2 + lit1 << n) so it will be
1779 -- recognised by the next rule.
1780 getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1781                                   b@(CmmLit _)])
1782   = getAmode (CmmMachOp (MO_Add rep) [b,a])
1783
1784 getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
1785                                         [y, CmmLit (CmmInt shift _)]])
1786   | shift == 0 || shift == 1 || shift == 2 || shift == 3
1787   = do (x_reg, x_code) <- getNonClobberedReg x
1788         -- x must be in a temp, because it has to stay live over y_code
1789         -- we could compre x_reg and y_reg and do something better here...
1790        (y_reg, y_code) <- getSomeReg y
1791        let
1792            code = x_code `appOL` y_code
1793            base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
1794        return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt 0))
1795                code)
1796
1797 getAmode (CmmLit lit) | not (is64BitLit lit)
1798   = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1799
1800 getAmode expr = do
1801   (reg,code) <- getSomeReg expr
1802   return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
1803
1804 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1805
1806 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1807
1808 #if sparc_TARGET_ARCH
1809
1810 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)])
1811   | fits13Bits (-i)
1812   = do
1813        (reg, code) <- getSomeReg x
1814        let
1815          off  = ImmInt (-(fromInteger i))
1816        return (Amode (AddrRegImm reg off) code)
1817
1818
1819 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)])
1820   | fits13Bits i
1821   = do
1822        (reg, code) <- getSomeReg x
1823        let
1824          off  = ImmInt (fromInteger i)
1825        return (Amode (AddrRegImm reg off) code)
1826
1827 getAmode (CmmMachOp (MO_Add rep) [x, y])
1828   = do
1829     (regX, codeX) <- getSomeReg x
1830     (regY, codeY) <- getSomeReg y
1831     let
1832         code = codeX `appOL` codeY
1833     return (Amode (AddrRegReg regX regY) code)
1834
1835 -- XXX Is this same as "leaf" in Stix?
1836 getAmode (CmmLit lit)
1837   = do
1838       tmp <- getNewRegNat I32
1839       let
1840         code = unitOL (SETHI (HI imm__2) tmp)
1841       return (Amode (AddrRegImm tmp (LO imm__2)) code)
1842       where
1843          imm__2 = litToImm lit
1844
1845 getAmode other
1846   = do
1847        (reg, code) <- getSomeReg other
1848        let
1849             off  = ImmInt 0
1850        return (Amode (AddrRegImm reg off) code)
1851
1852 #endif /* sparc_TARGET_ARCH */
1853
1854 #ifdef powerpc_TARGET_ARCH
1855 getAmode (CmmMachOp (MO_Sub I32) [x, CmmLit (CmmInt i _)])
1856   | Just off <- makeImmediate I32 True (-i)
1857   = do
1858         (reg, code) <- getSomeReg x
1859         return (Amode (AddrRegImm reg off) code)
1860
1861
1862 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit (CmmInt i _)])
1863   | Just off <- makeImmediate I32 True i
1864   = do
1865         (reg, code) <- getSomeReg x
1866         return (Amode (AddrRegImm reg off) code)
1867
1868    -- optimize addition with 32-bit immediate
1869    -- (needed for PIC)
1870 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit lit])
1871   = do
1872         tmp <- getNewRegNat I32
1873         (src, srcCode) <- getSomeReg x
1874         let imm = litToImm lit
1875             code = srcCode `snocOL` ADDIS tmp src (HA imm)
1876         return (Amode (AddrRegImm tmp (LO imm)) code)
1877
1878 getAmode (CmmLit lit)
1879   = do
1880         tmp <- getNewRegNat I32
1881         let imm = litToImm lit
1882             code = unitOL (LIS tmp (HA imm))
1883         return (Amode (AddrRegImm tmp (LO imm)) code)
1884
1885 getAmode (CmmMachOp (MO_Add I32) [x, y])
1886   = do
1887         (regX, codeX) <- getSomeReg x
1888         (regY, codeY) <- getSomeReg y
1889         return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
1890
1891 getAmode other
1892   = do
1893         (reg, code) <- getSomeReg other
1894         let
1895             off  = ImmInt 0
1896         return (Amode (AddrRegImm reg off) code)
1897 #endif /* powerpc_TARGET_ARCH */
1898
1899 -- -----------------------------------------------------------------------------
1900 -- getOperand: sometimes any operand will do.
1901
1902 -- getNonClobberedOperand: the value of the operand will remain valid across
1903 -- the computation of an arbitrary expression, unless the expression
1904 -- is computed directly into a register which the operand refers to
1905 -- (see trivialCode where this function is used for an example).
1906
1907 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1908
1909 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1910 #if x86_64_TARGET_ARCH
1911 getNonClobberedOperand (CmmLit lit)
1912   | isSuitableFloatingPointLit lit = do
1913     lbl <- getNewLabelNat
1914     let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
1915                                            CmmStaticLit lit])
1916     return (OpAddr (ripRel (ImmCLbl lbl)), code)
1917 #endif
1918 getNonClobberedOperand (CmmLit lit)
1919   | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) =
1920     return (OpImm (litToImm lit), nilOL)
1921 getNonClobberedOperand (CmmLoad mem pk)
1922   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
1923     Amode src mem_code <- getAmode mem
1924     (src',save_code) <-
1925         if (amodeCouldBeClobbered src)
1926                 then do
1927                    tmp <- getNewRegNat wordRep
1928                    return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
1929                            unitOL (LEA I32 (OpAddr src) (OpReg tmp)))
1930                 else
1931                    return (src, nilOL)
1932     return (OpAddr src', save_code `appOL` mem_code)
1933 getNonClobberedOperand e = do
1934     (reg, code) <- getNonClobberedReg e
1935     return (OpReg reg, code)
1936
1937 amodeCouldBeClobbered :: AddrMode -> Bool
1938 amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
1939
1940 regClobbered (RealReg rr) = isFastTrue (freeReg rr)
1941 regClobbered _ = False
1942
1943 -- getOperand: the operand is not required to remain valid across the
1944 -- computation of an arbitrary expression.
1945 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1946 #if x86_64_TARGET_ARCH
1947 getOperand (CmmLit lit)
1948   | isSuitableFloatingPointLit lit = do
1949     lbl <- getNewLabelNat
1950     let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
1951                                            CmmStaticLit lit])
1952     return (OpAddr (ripRel (ImmCLbl lbl)), code)
1953 #endif
1954 getOperand (CmmLit lit)
1955   | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) = do
1956     return (OpImm (litToImm lit), nilOL)
1957 getOperand (CmmLoad mem pk)
1958   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
1959     Amode src mem_code <- getAmode mem
1960     return (OpAddr src, mem_code)
1961 getOperand e = do
1962     (reg, code) <- getSomeReg e
1963     return (OpReg reg, code)
1964
1965 isOperand :: CmmExpr -> Bool
1966 isOperand (CmmLoad _ _) = True
1967 isOperand (CmmLit lit)  = not (is64BitLit lit)
1968                           || isSuitableFloatingPointLit lit
1969 isOperand _             = False
1970
1971 -- if we want a floating-point literal as an operand, we can
1972 -- use it directly from memory.  However, if the literal is
1973 -- zero, we're better off generating it into a register using
1974 -- xor.
1975 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
1976 isSuitableFloatingPointLit _ = False
1977
1978 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
1979 getRegOrMem (CmmLoad mem pk)
1980   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
1981     Amode src mem_code <- getAmode mem
1982     return (OpAddr src, mem_code)
1983 getRegOrMem e = do
1984     (reg, code) <- getNonClobberedReg e
1985     return (OpReg reg, code)
1986
1987 #if x86_64_TARGET_ARCH
1988 is64BitLit (CmmInt i I64) = is64BitInteger i
1989    -- assume that labels are in the range 0-2^31-1: this assumes the
1990    -- small memory model (see gcc docs, -mcmodel=small).
1991 #endif
1992 is64BitLit x = False
1993 #endif
1994
1995 is64BitInteger :: Integer -> Bool
1996 is64BitInteger i = i > 0x7fffffff || i < -0x80000000
1997
1998 -- -----------------------------------------------------------------------------
1999 --  The 'CondCode' type:  Condition codes passed up the tree.
2000
2001 data CondCode = CondCode Bool Cond InstrBlock
2002
2003 -- Set up a condition code for a conditional branch.
2004
2005 getCondCode :: CmmExpr -> NatM CondCode
2006
2007 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2008
2009 #if alpha_TARGET_ARCH
2010 getCondCode = panic "MachCode.getCondCode: not on Alphas"
2011 #endif /* alpha_TARGET_ARCH */
2012
2013 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2014
2015 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
2016 -- yes, they really do seem to want exactly the same!
2017
2018 getCondCode (CmmMachOp mop [x, y])
2019   = ASSERT (cmmExprRep x /= I8) -- tmp, not set up to handle 8-bit comparisons
2020     case mop of
2021       MO_Eq F32 -> condFltCode EQQ x y
2022       MO_Ne F32 -> condFltCode NE  x y
2023
2024       MO_S_Gt F32 -> condFltCode GTT x y
2025       MO_S_Ge F32 -> condFltCode GE  x y
2026       MO_S_Lt F32 -> condFltCode LTT x y
2027       MO_S_Le F32 -> condFltCode LE  x y
2028
2029       MO_Eq F64 -> condFltCode EQQ x y
2030       MO_Ne F64 -> condFltCode NE  x y
2031
2032       MO_S_Gt F64 -> condFltCode GTT x y
2033       MO_S_Ge F64 -> condFltCode GE  x y
2034       MO_S_Lt F64 -> condFltCode LTT x y
2035       MO_S_Le F64 -> condFltCode LE  x y
2036
2037       MO_Eq rep -> condIntCode EQQ  x y
2038       MO_Ne rep -> condIntCode NE   x y
2039
2040       MO_S_Gt rep -> condIntCode GTT  x y
2041       MO_S_Ge rep -> condIntCode GE   x y
2042       MO_S_Lt rep -> condIntCode LTT  x y
2043       MO_S_Le rep -> condIntCode LE   x y
2044
2045       MO_U_Gt rep -> condIntCode GU   x y
2046       MO_U_Ge rep -> condIntCode GEU  x y
2047       MO_U_Lt rep -> condIntCode LU   x y
2048       MO_U_Le rep -> condIntCode LEU  x y
2049
2050       other -> pprPanic "getCondCode(x86,sparc)" (pprMachOp mop)
2051
2052 getCondCode other =  pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
2053
2054 #elif powerpc_TARGET_ARCH
2055
2056 -- almost the same as everywhere else - but we need to
2057 -- extend small integers to 32 bit first
2058
2059 getCondCode (CmmMachOp mop [x, y])
2060   = case mop of
2061       MO_Eq F32 -> condFltCode EQQ x y
2062       MO_Ne F32 -> condFltCode NE  x y
2063
2064       MO_S_Gt F32 -> condFltCode GTT x y
2065       MO_S_Ge F32 -> condFltCode GE  x y
2066       MO_S_Lt F32 -> condFltCode LTT x y
2067       MO_S_Le F32 -> condFltCode LE  x y
2068
2069       MO_Eq F64 -> condFltCode EQQ x y
2070       MO_Ne F64 -> condFltCode NE  x y
2071
2072       MO_S_Gt F64 -> condFltCode GTT x y
2073       MO_S_Ge F64 -> condFltCode GE  x y
2074       MO_S_Lt F64 -> condFltCode LTT x y
2075       MO_S_Le F64 -> condFltCode LE  x y
2076
2077       MO_Eq rep -> condIntCode EQQ  (extendUExpr rep x) (extendUExpr rep y)
2078       MO_Ne rep -> condIntCode NE   (extendUExpr rep x) (extendUExpr rep y)
2079
2080       MO_S_Gt rep -> condIntCode GTT  (extendSExpr rep x) (extendSExpr rep y)
2081       MO_S_Ge rep -> condIntCode GE   (extendSExpr rep x) (extendSExpr rep y)
2082       MO_S_Lt rep -> condIntCode LTT  (extendSExpr rep x) (extendSExpr rep y)
2083       MO_S_Le rep -> condIntCode LE   (extendSExpr rep x) (extendSExpr rep y)
2084
2085       MO_U_Gt rep -> condIntCode GU   (extendUExpr rep x) (extendUExpr rep y)
2086       MO_U_Ge rep -> condIntCode GEU  (extendUExpr rep x) (extendUExpr rep y)
2087       MO_U_Lt rep -> condIntCode LU   (extendUExpr rep x) (extendUExpr rep y)
2088       MO_U_Le rep -> condIntCode LEU  (extendUExpr rep x) (extendUExpr rep y)
2089
2090       other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
2091
2092 getCondCode other =  panic "getCondCode(2)(powerpc)"
2093
2094
2095 #endif
2096
2097
2098 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
2099 -- passed back up the tree.
2100
2101 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
2102
2103 #if alpha_TARGET_ARCH
2104 condIntCode = panic "MachCode.condIntCode: not on Alphas"
2105 condFltCode = panic "MachCode.condFltCode: not on Alphas"
2106 #endif /* alpha_TARGET_ARCH */
2107
2108 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2109 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2110
2111 -- memory vs immediate
2112 condIntCode cond (CmmLoad x pk) (CmmLit lit) | not (is64BitLit lit) = do
2113     Amode x_addr x_code <- getAmode x
2114     let
2115         imm  = litToImm lit
2116         code = x_code `snocOL`
2117                   CMP pk (OpImm imm) (OpAddr x_addr)
2118     --
2119     return (CondCode False cond code)
2120
2121 -- anything vs zero
2122 condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
2123     (x_reg, x_code) <- getSomeReg x
2124     let
2125         code = x_code `snocOL`
2126                   TEST pk (OpReg x_reg) (OpReg x_reg)
2127     --
2128     return (CondCode False cond code)
2129
2130 -- anything vs operand
2131 condIntCode cond x y | isOperand y = do
2132     (x_reg, x_code) <- getNonClobberedReg x
2133     (y_op,  y_code) <- getOperand y
2134     let
2135         code = x_code `appOL` y_code `snocOL`
2136                   CMP (cmmExprRep x) y_op (OpReg x_reg)
2137     -- in
2138     return (CondCode False cond code)
2139
2140 -- anything vs anything
2141 condIntCode cond x y = do
2142   (y_reg, y_code) <- getNonClobberedReg y
2143   (x_op, x_code) <- getRegOrMem x
2144   let
2145         code = y_code `appOL`
2146                x_code `snocOL`
2147                   CMP (cmmExprRep x) (OpReg y_reg) x_op
2148   -- in
2149   return (CondCode False cond code)
2150 #endif
2151
2152 #if i386_TARGET_ARCH
2153 condFltCode cond x y
2154   = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
2155   (x_reg, x_code) <- getNonClobberedReg x
2156   (y_reg, y_code) <- getSomeReg y
2157   let
2158         code = x_code `appOL` y_code `snocOL`
2159                 GCMP cond x_reg y_reg
2160   -- The GCMP insn does the test and sets the zero flag if comparable
2161   -- and true.  Hence we always supply EQQ as the condition to test.
2162   return (CondCode True EQQ code)
2163 #endif /* i386_TARGET_ARCH */
2164
2165 #if x86_64_TARGET_ARCH
2166 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
2167 -- an operand, but the right must be a reg.  We can probably do better
2168 -- than this general case...
2169 condFltCode cond x y = do
2170   (x_reg, x_code) <- getNonClobberedReg x
2171   (y_op, y_code) <- getOperand y
2172   let
2173         code = x_code `appOL`
2174                y_code `snocOL`
2175                   CMP (cmmExprRep x) y_op (OpReg x_reg)
2176         -- NB(1): we need to use the unsigned comparison operators on the
2177         -- result of this comparison.
2178   -- in
2179   return (CondCode True (condToUnsigned cond) code)
2180 #endif
2181
2182 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2183
2184 #if sparc_TARGET_ARCH
2185
2186 condIntCode cond x (CmmLit (CmmInt y rep))
2187   | fits13Bits y
2188   = do
2189        (src1, code) <- getSomeReg x
2190        let
2191            src2 = ImmInt (fromInteger y)
2192            code' = code `snocOL` SUB False True src1 (RIImm src2) g0
2193        return (CondCode False cond code')
2194
2195 condIntCode cond x y = do
2196     (src1, code1) <- getSomeReg x
2197     (src2, code2) <- getSomeReg y
2198     let
2199         code__2 = code1 `appOL` code2 `snocOL`
2200                   SUB False True src1 (RIReg src2) g0
2201     return (CondCode False cond code__2)
2202
2203 -----------
2204 condFltCode cond x y = do
2205     (src1, code1) <- getSomeReg x
2206     (src2, code2) <- getSomeReg y
2207     tmp <- getNewRegNat F64
2208     let
2209         promote x = FxTOy F32 F64 x tmp
2210
2211         pk1   = cmmExprRep x
2212         pk2   = cmmExprRep y
2213
2214         code__2 =
2215                 if pk1 == pk2 then
2216                     code1 `appOL` code2 `snocOL`
2217                     FCMP True pk1 src1 src2
2218                 else if pk1 == F32 then
2219                     code1 `snocOL` promote src1 `appOL` code2 `snocOL`
2220                     FCMP True F64 tmp src2
2221                 else
2222                     code1 `appOL` code2 `snocOL` promote src2 `snocOL`
2223                     FCMP True F64 src1 tmp
2224     return (CondCode True cond code__2)
2225
2226 #endif /* sparc_TARGET_ARCH */
2227
2228 #if powerpc_TARGET_ARCH
2229 --  ###FIXME: I16 and I8!
2230 condIntCode cond x (CmmLit (CmmInt y rep))
2231   | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
2232   = do
2233         (src1, code) <- getSomeReg x
2234         let
2235             code' = code `snocOL`
2236                 (if condUnsigned cond then CMPL else CMP) I32 src1 (RIImm src2)
2237         return (CondCode False cond code')
2238
2239 condIntCode cond x y = do
2240     (src1, code1) <- getSomeReg x
2241     (src2, code2) <- getSomeReg y
2242     let
2243         code' = code1 `appOL` code2 `snocOL`
2244                   (if condUnsigned cond then CMPL else CMP) I32 src1 (RIReg src2)
2245     return (CondCode False cond code')
2246
2247 condFltCode cond x y = do
2248     (src1, code1) <- getSomeReg x
2249     (src2, code2) <- getSomeReg y
2250     let
2251         code'  = code1 `appOL` code2 `snocOL` FCMP src1 src2
2252         code'' = case cond of -- twiddle CR to handle unordered case
2253                     GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
2254                     LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
2255                     _ -> code'
2256                  where
2257                     ltbit = 0 ; eqbit = 2 ; gtbit = 1
2258     return (CondCode True cond code'')
2259
2260 #endif /* powerpc_TARGET_ARCH */
2261
2262 -- -----------------------------------------------------------------------------
2263 -- Generating assignments
2264
2265 -- Assignments are really at the heart of the whole code generation
2266 -- business.  Almost all top-level nodes of any real importance are
2267 -- assignments, which correspond to loads, stores, or register
2268 -- transfers.  If we're really lucky, some of the register transfers
2269 -- will go away, because we can use the destination register to
2270 -- complete the code generation for the right hand side.  This only
2271 -- fails when the right hand side is forced into a fixed register
2272 -- (e.g. the result of a call).
2273
2274 assignMem_IntCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2275 assignReg_IntCode :: MachRep -> CmmReg  -> CmmExpr -> NatM InstrBlock
2276
2277 assignMem_FltCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2278 assignReg_FltCode :: MachRep -> CmmReg  -> CmmExpr -> NatM InstrBlock
2279
2280 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2281
2282 #if alpha_TARGET_ARCH
2283
2284 assignIntCode pk (CmmLoad dst _) src
2285   = getNewRegNat IntRep             `thenNat` \ tmp ->
2286     getAmode dst                    `thenNat` \ amode ->
2287     getRegister src                 `thenNat` \ register ->
2288     let
2289         code1   = amodeCode amode []
2290         dst__2  = amodeAddr amode
2291         code2   = registerCode register tmp []
2292         src__2  = registerName register tmp
2293         sz      = primRepToSize pk
2294         code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2295     in
2296     return code__2
2297
2298 assignIntCode pk dst src
2299   = getRegister dst                         `thenNat` \ register1 ->
2300     getRegister src                         `thenNat` \ register2 ->
2301     let
2302         dst__2  = registerName register1 zeroh
2303         code    = registerCode register2 dst__2
2304         src__2  = registerName register2 dst__2
2305         code__2 = if isFixed register2
2306                   then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2)
2307                   else code
2308     in
2309     return code__2
2310
2311 #endif /* alpha_TARGET_ARCH */
2312
2313 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2314
2315 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2316
2317 -- integer assignment to memory
2318 assignMem_IntCode pk addr src = do
2319     Amode addr code_addr <- getAmode addr
2320     (code_src, op_src)   <- get_op_RI src
2321     let
2322         code = code_src `appOL`
2323                code_addr `snocOL`
2324                   MOV pk op_src (OpAddr addr)
2325         -- NOTE: op_src is stable, so it will still be valid
2326         -- after code_addr.  This may involve the introduction
2327         -- of an extra MOV to a temporary register, but we hope
2328         -- the register allocator will get rid of it.
2329     --
2330     return code
2331   where
2332     get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand)   -- code, operator
2333     get_op_RI (CmmLit lit) | not (is64BitLit lit)
2334       = return (nilOL, OpImm (litToImm lit))
2335     get_op_RI op
2336       = do (reg,code) <- getNonClobberedReg op
2337            return (code, OpReg reg)
2338
2339
2340 -- Assign; dst is a reg, rhs is mem
2341 assignReg_IntCode pk reg (CmmLoad src _) = do
2342   load_code <- intLoadCode (MOV pk) src
2343   return (load_code (getRegisterReg reg))
2344
2345 -- dst is a reg, but src could be anything
2346 assignReg_IntCode pk reg src = do
2347   code <- getAnyReg src
2348   return (code (getRegisterReg reg))
2349
2350 #endif /* i386_TARGET_ARCH */
2351
2352 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2353
2354 #if sparc_TARGET_ARCH
2355
2356 assignMem_IntCode pk addr src = do
2357     (srcReg, code) <- getSomeReg src
2358     Amode dstAddr addr_code <- getAmode addr
2359     return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2360
2361 assignReg_IntCode pk reg src = do
2362     r <- getRegister src
2363     return $ case r of
2364         Any _ code         -> code dst
2365         Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg dst) freg
2366     where
2367       dst = getRegisterReg reg
2368
2369
2370 #endif /* sparc_TARGET_ARCH */
2371
2372 #if powerpc_TARGET_ARCH
2373
2374 assignMem_IntCode pk addr src = do
2375     (srcReg, code) <- getSomeReg src
2376     Amode dstAddr addr_code <- getAmode addr
2377     return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2378
2379 -- dst is a reg, but src could be anything
2380 assignReg_IntCode pk reg src
2381     = do
2382         r <- getRegister src
2383         return $ case r of
2384             Any _ code         -> code dst
2385             Fixed _ freg fcode -> fcode `snocOL` MR dst freg
2386     where
2387         dst = getRegisterReg reg
2388
2389 #endif /* powerpc_TARGET_ARCH */
2390
2391
2392 -- -----------------------------------------------------------------------------
2393 -- Floating-point assignments
2394
2395 #if alpha_TARGET_ARCH
2396
2397 assignFltCode pk (CmmLoad dst _) src
2398   = getNewRegNat pk                 `thenNat` \ tmp ->
2399     getAmode dst                    `thenNat` \ amode ->
2400     getRegister src                         `thenNat` \ register ->
2401     let
2402         code1   = amodeCode amode []
2403         dst__2  = amodeAddr amode
2404         code2   = registerCode register tmp []
2405         src__2  = registerName register tmp
2406         sz      = primRepToSize pk
2407         code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2408     in
2409     return code__2
2410
2411 assignFltCode pk dst src
2412   = getRegister dst                         `thenNat` \ register1 ->
2413     getRegister src                         `thenNat` \ register2 ->
2414     let
2415         dst__2  = registerName register1 zeroh
2416         code    = registerCode register2 dst__2
2417         src__2  = registerName register2 dst__2
2418         code__2 = if isFixed register2
2419                   then code . mkSeqInstr (FMOV src__2 dst__2)
2420                   else code
2421     in
2422     return code__2
2423
2424 #endif /* alpha_TARGET_ARCH */
2425
2426 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2427
2428 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2429
2430 -- Floating point assignment to memory
2431 assignMem_FltCode pk addr src = do
2432   (src_reg, src_code) <- getNonClobberedReg src
2433   Amode addr addr_code <- getAmode addr
2434   let
2435         code = src_code `appOL`
2436                addr_code `snocOL`
2437                 IF_ARCH_i386(GST pk src_reg addr,
2438                              MOV pk (OpReg src_reg) (OpAddr addr))
2439   return code
2440
2441 -- Floating point assignment to a register/temporary
2442 assignReg_FltCode pk reg src = do
2443   src_code <- getAnyReg src
2444   return (src_code (getRegisterReg reg))
2445
2446 #endif /* i386_TARGET_ARCH */
2447
2448 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2449
2450 #if sparc_TARGET_ARCH
2451
2452 -- Floating point assignment to memory
2453 assignMem_FltCode pk addr src = do
2454     Amode dst__2 code1 <- getAmode addr
2455     (src__2, code2) <- getSomeReg src
2456     tmp1 <- getNewRegNat pk
2457     let
2458         pk__2   = cmmExprRep src
2459         code__2 = code1 `appOL` code2 `appOL`
2460             if   pk == pk__2
2461             then unitOL (ST pk src__2 dst__2)
2462             else toOL [FxTOy pk__2 pk src__2 tmp1, ST pk tmp1 dst__2]
2463     return code__2
2464
2465 -- Floating point assignment to a register/temporary
2466 -- ToDo: Verify correctness
2467 assignReg_FltCode pk reg src = do
2468     r <- getRegister src
2469     v1 <- getNewRegNat pk
2470     return $ case r of
2471         Any _ code         -> code dst
2472         Fixed _ freg fcode -> fcode `snocOL` FMOV pk freg v1
2473     where
2474       dst = getRegisterReg reg
2475
2476 #endif /* sparc_TARGET_ARCH */
2477
2478 #if powerpc_TARGET_ARCH
2479
2480 -- Easy, isn't it?
2481 assignMem_FltCode = assignMem_IntCode
2482 assignReg_FltCode = assignReg_IntCode
2483
2484 #endif /* powerpc_TARGET_ARCH */
2485
2486
2487 -- -----------------------------------------------------------------------------
2488 -- Generating an non-local jump
2489
2490 -- (If applicable) Do not fill the delay slots here; you will confuse the
2491 -- register allocator.
2492
2493 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
2494
2495 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2496
2497 #if alpha_TARGET_ARCH
2498
2499 genJump (CmmLabel lbl)
2500   | isAsmTemp lbl = returnInstr (BR target)
2501   | otherwise     = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0]
2502   where
2503     target = ImmCLbl lbl
2504
2505 genJump tree
2506   = getRegister tree                `thenNat` \ register ->
2507     getNewRegNat PtrRep             `thenNat` \ tmp ->
2508     let
2509         dst    = registerName register pv
2510         code   = registerCode register pv
2511         target = registerName register pv
2512     in
2513     if isFixed register then
2514         returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0]
2515     else
2516     return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0))
2517
2518 #endif /* alpha_TARGET_ARCH */
2519
2520 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2521
2522 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2523
2524 genJump (CmmLoad mem pk) = do
2525   Amode target code <- getAmode mem
2526   return (code `snocOL` JMP (OpAddr target))
2527
2528 genJump (CmmLit lit) = do
2529   return (unitOL (JMP (OpImm (litToImm lit))))
2530
2531 genJump expr = do
2532   (reg,code) <- getSomeReg expr
2533   return (code `snocOL` JMP (OpReg reg))
2534
2535 #endif /* i386_TARGET_ARCH */
2536
2537 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2538
2539 #if sparc_TARGET_ARCH
2540
2541 genJump (CmmLit (CmmLabel lbl))
2542   = return (toOL [CALL (Left target) 0 True, NOP])
2543   where
2544     target = ImmCLbl lbl
2545
2546 genJump tree
2547   = do
2548         (target, code) <- getSomeReg tree
2549         return (code `snocOL` JMP (AddrRegReg target g0)  `snocOL` NOP)
2550
2551 #endif /* sparc_TARGET_ARCH */
2552
2553 #if powerpc_TARGET_ARCH
2554 genJump (CmmLit (CmmLabel lbl))
2555   = return (unitOL $ JMP lbl)
2556
2557 genJump tree
2558   = do
2559         (target,code) <- getSomeReg tree
2560         return (code `snocOL` MTCTR target `snocOL` BCTR [])
2561 #endif /* powerpc_TARGET_ARCH */
2562
2563
2564 -- -----------------------------------------------------------------------------
2565 --  Unconditional branches
2566
2567 genBranch :: BlockId -> NatM InstrBlock
2568
2569 genBranch = return . toOL . mkBranchInstr
2570
2571 -- -----------------------------------------------------------------------------
2572 --  Conditional jumps
2573
2574 {-
2575 Conditional jumps are always to local labels, so we can use branch
2576 instructions.  We peek at the arguments to decide what kind of
2577 comparison to do.
2578
2579 ALPHA: For comparisons with 0, we're laughing, because we can just do
2580 the desired conditional branch.
2581
2582 I386: First, we have to ensure that the condition
2583 codes are set according to the supplied comparison operation.
2584
2585 SPARC: First, we have to ensure that the condition codes are set
2586 according to the supplied comparison operation.  We generate slightly
2587 different code for floating point comparisons, because a floating
2588 point operation cannot directly precede a @BF@.  We assume the worst
2589 and fill that slot with a @NOP@.
2590
2591 SPARC: Do not fill the delay slots here; you will confuse the register
2592 allocator.
2593 -}
2594
2595
2596 genCondJump
2597     :: BlockId      -- the branch target
2598     -> CmmExpr      -- the condition on which to branch
2599     -> NatM InstrBlock
2600
2601 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2602
2603 #if alpha_TARGET_ARCH
2604
2605 genCondJump id (StPrim op [x, StInt 0])
2606   = getRegister x                           `thenNat` \ register ->
2607     getNewRegNat (registerRep register)
2608                                     `thenNat` \ tmp ->
2609     let
2610         code   = registerCode register tmp
2611         value  = registerName register tmp
2612         pk     = registerRep register
2613         target = ImmCLbl lbl
2614     in
2615     returnSeq code [BI (cmpOp op) value target]
2616   where
2617     cmpOp CharGtOp = GTT
2618     cmpOp CharGeOp = GE
2619     cmpOp CharEqOp = EQQ
2620     cmpOp CharNeOp = NE
2621     cmpOp CharLtOp = LTT
2622     cmpOp CharLeOp = LE
2623     cmpOp IntGtOp = GTT
2624     cmpOp IntGeOp = GE
2625     cmpOp IntEqOp = EQQ
2626     cmpOp IntNeOp = NE
2627     cmpOp IntLtOp = LTT
2628     cmpOp IntLeOp = LE
2629     cmpOp WordGtOp = NE
2630     cmpOp WordGeOp = ALWAYS
2631     cmpOp WordEqOp = EQQ
2632     cmpOp WordNeOp = NE
2633     cmpOp WordLtOp = NEVER
2634     cmpOp WordLeOp = EQQ
2635     cmpOp AddrGtOp = NE
2636     cmpOp AddrGeOp = ALWAYS
2637     cmpOp AddrEqOp = EQQ
2638     cmpOp AddrNeOp = NE
2639     cmpOp AddrLtOp = NEVER
2640     cmpOp AddrLeOp = EQQ
2641
2642 genCondJump lbl (StPrim op [x, StDouble 0.0])
2643   = getRegister x                           `thenNat` \ register ->
2644     getNewRegNat (registerRep register)
2645                                     `thenNat` \ tmp ->
2646     let
2647         code   = registerCode register tmp
2648         value  = registerName register tmp
2649         pk     = registerRep register
2650         target = ImmCLbl lbl
2651     in
2652     return (code . mkSeqInstr (BF (cmpOp op) value target))
2653   where
2654     cmpOp FloatGtOp = GTT
2655     cmpOp FloatGeOp = GE
2656     cmpOp FloatEqOp = EQQ
2657     cmpOp FloatNeOp = NE
2658     cmpOp FloatLtOp = LTT
2659     cmpOp FloatLeOp = LE
2660     cmpOp DoubleGtOp = GTT
2661     cmpOp DoubleGeOp = GE
2662     cmpOp DoubleEqOp = EQQ
2663     cmpOp DoubleNeOp = NE
2664     cmpOp DoubleLtOp = LTT
2665     cmpOp DoubleLeOp = LE
2666
2667 genCondJump lbl (StPrim op [x, y])
2668   | fltCmpOp op
2669   = trivialFCode pr instr x y       `thenNat` \ register ->
2670     getNewRegNat F64                `thenNat` \ tmp ->
2671     let
2672         code   = registerCode register tmp
2673         result = registerName register tmp
2674         target = ImmCLbl lbl
2675     in
2676     return (code . mkSeqInstr (BF cond result target))
2677   where
2678     pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
2679
2680     fltCmpOp op = case op of
2681         FloatGtOp -> True
2682         FloatGeOp -> True
2683         FloatEqOp -> True
2684         FloatNeOp -> True
2685         FloatLtOp -> True
2686         FloatLeOp -> True
2687         DoubleGtOp -> True
2688         DoubleGeOp -> True
2689         DoubleEqOp -> True
2690         DoubleNeOp -> True
2691         DoubleLtOp -> True
2692         DoubleLeOp -> True
2693         _ -> False
2694     (instr, cond) = case op of
2695         FloatGtOp -> (FCMP TF LE, EQQ)
2696         FloatGeOp -> (FCMP TF LTT, EQQ)
2697         FloatEqOp -> (FCMP TF EQQ, NE)
2698         FloatNeOp -> (FCMP TF EQQ, EQQ)
2699         FloatLtOp -> (FCMP TF LTT, NE)
2700         FloatLeOp -> (FCMP TF LE, NE)
2701         DoubleGtOp -> (FCMP TF LE, EQQ)
2702         DoubleGeOp -> (FCMP TF LTT, EQQ)
2703         DoubleEqOp -> (FCMP TF EQQ, NE)
2704         DoubleNeOp -> (FCMP TF EQQ, EQQ)
2705         DoubleLtOp -> (FCMP TF LTT, NE)
2706         DoubleLeOp -> (FCMP TF LE, NE)
2707
2708 genCondJump lbl (StPrim op [x, y])
2709   = trivialCode instr x y           `thenNat` \ register ->
2710     getNewRegNat IntRep             `thenNat` \ tmp ->
2711     let
2712         code   = registerCode register tmp
2713         result = registerName register tmp
2714         target = ImmCLbl lbl
2715     in
2716     return (code . mkSeqInstr (BI cond result target))
2717   where
2718     (instr, cond) = case op of
2719         CharGtOp -> (CMP LE, EQQ)
2720         CharGeOp -> (CMP LTT, EQQ)
2721         CharEqOp -> (CMP EQQ, NE)
2722         CharNeOp -> (CMP EQQ, EQQ)
2723         CharLtOp -> (CMP LTT, NE)
2724         CharLeOp -> (CMP LE, NE)
2725         IntGtOp -> (CMP LE, EQQ)
2726         IntGeOp -> (CMP LTT, EQQ)
2727         IntEqOp -> (CMP EQQ, NE)
2728         IntNeOp -> (CMP EQQ, EQQ)
2729         IntLtOp -> (CMP LTT, NE)
2730         IntLeOp -> (CMP LE, NE)
2731         WordGtOp -> (CMP ULE, EQQ)
2732         WordGeOp -> (CMP ULT, EQQ)
2733         WordEqOp -> (CMP EQQ, NE)
2734         WordNeOp -> (CMP EQQ, EQQ)
2735         WordLtOp -> (CMP ULT, NE)
2736         WordLeOp -> (CMP ULE, NE)
2737         AddrGtOp -> (CMP ULE, EQQ)
2738         AddrGeOp -> (CMP ULT, EQQ)
2739         AddrEqOp -> (CMP EQQ, NE)
2740         AddrNeOp -> (CMP EQQ, EQQ)
2741         AddrLtOp -> (CMP ULT, NE)
2742         AddrLeOp -> (CMP ULE, NE)
2743
2744 #endif /* alpha_TARGET_ARCH */
2745
2746 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2747
2748 #if i386_TARGET_ARCH
2749
2750 genCondJump id bool = do
2751   CondCode _ cond code <- getCondCode bool
2752   return (code `snocOL` JXX cond id)
2753
2754 #endif
2755
2756 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2757
2758 #if x86_64_TARGET_ARCH
2759
2760 genCondJump id bool = do
2761   CondCode is_float cond cond_code <- getCondCode bool
2762   if not is_float
2763     then
2764         return (cond_code `snocOL` JXX cond id)
2765     else do
2766         lbl <- getBlockIdNat
2767
2768         -- see comment with condFltReg
2769         let code = case cond of
2770                         NE  -> or_unordered
2771                         GU  -> plain_test
2772                         GEU -> plain_test
2773                         _   -> and_ordered
2774
2775             plain_test = unitOL (
2776                   JXX cond id
2777                 )
2778             or_unordered = toOL [
2779                   JXX cond id,
2780                   JXX PARITY id
2781                 ]
2782             and_ordered = toOL [
2783                   JXX PARITY lbl,
2784                   JXX cond id,
2785                   JXX ALWAYS lbl,
2786                   NEWBLOCK lbl
2787                 ]
2788         return (cond_code `appOL` code)
2789
2790 #endif
2791
2792 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2793
2794 #if sparc_TARGET_ARCH
2795
2796 genCondJump (BlockId id) bool = do
2797   CondCode is_float cond code <- getCondCode bool
2798   return (
2799        code `appOL`
2800        toOL (
2801          if   is_float
2802          then [NOP, BF cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2803          else [BI cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2804        )
2805     )
2806
2807 #endif /* sparc_TARGET_ARCH */
2808
2809
2810 #if powerpc_TARGET_ARCH
2811
2812 genCondJump id bool = do
2813   CondCode is_float cond code <- getCondCode bool
2814   return (code `snocOL` BCC cond id)
2815
2816 #endif /* powerpc_TARGET_ARCH */
2817
2818
2819 -- -----------------------------------------------------------------------------
2820 --  Generating C calls
2821
2822 -- Now the biggest nightmare---calls.  Most of the nastiness is buried in
2823 -- @get_arg@, which moves the arguments to the correct registers/stack
2824 -- locations.  Apart from that, the code is easy.
2825 --
2826 -- (If applicable) Do not fill the delay slots here; you will confuse the
2827 -- register allocator.
2828
2829 genCCall
2830     :: CmmCallTarget            -- function to call
2831     -> [(CmmReg,MachHint)]      -- where to put the result
2832     -> [(CmmExpr,MachHint)]     -- arguments (of mixed type)
2833     -> Maybe [GlobalReg]        -- volatile regs to save
2834     -> NatM InstrBlock
2835
2836 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2837
2838 #if alpha_TARGET_ARCH
2839
2840 ccallResultRegs =
2841
2842 genCCall fn cconv result_regs args
2843   = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args
2844                           `thenNat` \ ((unused,_), argCode) ->
2845     let
2846         nRegs = length allArgRegs - length unused
2847         code = asmSeqThen (map ($ []) argCode)
2848     in
2849         returnSeq code [
2850             LDA pv (AddrImm (ImmLab (ptext fn))),
2851             JSR ra (AddrReg pv) nRegs,
2852             LDGP gp (AddrReg ra)]
2853   where
2854     ------------------------
2855     {-  Try to get a value into a specific register (or registers) for
2856         a call.  The first 6 arguments go into the appropriate
2857         argument register (separate registers for integer and floating
2858         point arguments, but used in lock-step), and the remaining
2859         arguments are dumped to the stack, beginning at 0(sp).  Our
2860         first argument is a pair of the list of remaining argument
2861         registers to be assigned for this call and the next stack
2862         offset to use for overflowing arguments.  This way,
2863         @get_Arg@ can be applied to all of a call's arguments using
2864         @mapAccumLNat@.
2865     -}
2866     get_arg
2867         :: ([(Reg,Reg)], Int)   -- Argument registers and stack offset (accumulator)
2868         -> StixTree             -- Current argument
2869         -> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code
2870
2871     -- We have to use up all of our argument registers first...
2872
2873     get_arg ((iDst,fDst):dsts, offset) arg
2874       = getRegister arg                     `thenNat` \ register ->
2875         let
2876             reg  = if isFloatingRep pk then fDst else iDst
2877             code = registerCode register reg
2878             src  = registerName register reg
2879             pk   = registerRep register
2880         in
2881         return (
2882             if isFloatingRep pk then
2883                 ((dsts, offset), if isFixed register then
2884                     code . mkSeqInstr (FMOV src fDst)
2885                     else code)
2886             else
2887                 ((dsts, offset), if isFixed register then
2888                     code . mkSeqInstr (OR src (RIReg src) iDst)
2889                     else code))
2890
2891     -- Once we have run out of argument registers, we move to the
2892     -- stack...
2893
2894     get_arg ([], offset) arg
2895       = getRegister arg                 `thenNat` \ register ->
2896         getNewRegNat (registerRep register)
2897                                         `thenNat` \ tmp ->
2898         let
2899             code = registerCode register tmp
2900             src  = registerName register tmp
2901             pk   = registerRep register
2902             sz   = primRepToSize pk
2903         in
2904         return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset)))
2905
2906 #endif /* alpha_TARGET_ARCH */
2907
2908 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2909
2910 #if i386_TARGET_ARCH
2911
2912 -- we only cope with a single result for foreign calls
2913 genCCall (CmmPrim op) [(r,_)] args vols = do
2914   case op of
2915         MO_F32_Sqrt -> actuallyInlineFloatOp F32  (GSQRT F32) args
2916         MO_F64_Sqrt -> actuallyInlineFloatOp F64 (GSQRT F64) args
2917
2918         MO_F32_Sin  -> actuallyInlineFloatOp F32  (GSIN F32) args
2919         MO_F64_Sin  -> actuallyInlineFloatOp F64 (GSIN F64) args
2920
2921         MO_F32_Cos  -> actuallyInlineFloatOp F32  (GCOS F32) args
2922         MO_F64_Cos  -> actuallyInlineFloatOp F64 (GCOS F64) args
2923
2924         MO_F32_Tan  -> actuallyInlineFloatOp F32  (GTAN F32) args
2925         MO_F64_Tan  -> actuallyInlineFloatOp F64 (GTAN F64) args
2926
2927         other_op    -> outOfLineFloatOp op r args vols
2928  where
2929   actuallyInlineFloatOp rep instr [(x,_)]
2930         = do res <- trivialUFCode rep instr x
2931              any <- anyReg res
2932              return (any (getRegisterReg r))
2933
2934 genCCall target dest_regs args vols = do
2935     let
2936         sizes               = map (arg_size . cmmExprRep . fst) (reverse args)
2937 #if !darwin_TARGET_OS
2938         tot_arg_size        = sum sizes
2939 #else
2940         raw_arg_size        = sum sizes
2941         tot_arg_size        = roundTo 16 raw_arg_size
2942         arg_pad_size        = tot_arg_size - raw_arg_size
2943     delta0 <- getDeltaNat
2944     setDeltaNat (delta0 - arg_pad_size)
2945 #endif
2946
2947     push_codes <- mapM push_arg (reverse args)
2948     delta <- getDeltaNat
2949
2950     -- in
2951     -- deal with static vs dynamic call targets
2952     (callinsns,cconv) <-
2953       case target of
2954         -- CmmPrim -> ...
2955         CmmForeignCall (CmmLit (CmmLabel lbl)) conv
2956            -> -- ToDo: stdcall arg sizes
2957               return (unitOL (CALL (Left fn_imm) []), conv)
2958            where fn_imm = ImmCLbl lbl
2959         CmmForeignCall expr conv
2960            -> do (dyn_c, dyn_r, dyn_rep) <- get_op expr
2961                  ASSERT(dyn_rep == I32)
2962                   return (dyn_c `snocOL` CALL (Right dyn_r) [], conv)
2963
2964     let push_code
2965 #if darwin_TARGET_OS
2966             | arg_pad_size /= 0
2967             = toOL [SUB I32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
2968                     DELTA (delta0 - arg_pad_size)]
2969               `appOL` concatOL push_codes
2970             | otherwise
2971 #endif
2972             = concatOL push_codes
2973         call = callinsns `appOL`
2974                toOL (
2975                         -- Deallocate parameters after call for ccall;
2976                         -- but not for stdcall (callee does it)
2977                   (if cconv == StdCallConv || tot_arg_size==0 then [] else
2978                    [ADD I32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
2979                   ++
2980                   [DELTA (delta + tot_arg_size)]
2981                )
2982     -- in
2983     setDeltaNat (delta + tot_arg_size)
2984
2985     let
2986         -- assign the results, if necessary
2987         assign_code []     = nilOL
2988         assign_code [(dest,_hint)] =
2989           case rep of
2990                 I64 -> toOL [MOV I32 (OpReg eax) (OpReg r_dest),
2991                              MOV I32 (OpReg edx) (OpReg r_dest_hi)]
2992                 F32 -> unitOL (GMOV fake0 r_dest)
2993                 F64 -> unitOL (GMOV fake0 r_dest)
2994                 rep -> unitOL (MOV rep (OpReg eax) (OpReg r_dest))
2995           where
2996                 r_dest_hi = getHiVRegFromLo r_dest
2997                 rep = cmmRegRep dest
2998                 r_dest = getRegisterReg dest
2999         assign_code many = panic "genCCall.assign_code many"
3000
3001     return (push_code `appOL`
3002             call `appOL`
3003             assign_code dest_regs)
3004
3005   where
3006     arg_size F64 = 8
3007     arg_size F32 = 4
3008     arg_size I64 = 8
3009     arg_size _   = 4
3010
3011     roundTo a x | x `mod` a == 0 = x
3012                 | otherwise = x + a - (x `mod` a)
3013
3014
3015     push_arg :: (CmmExpr,MachHint){-current argument-}
3016                     -> NatM InstrBlock  -- code
3017
3018     push_arg (arg,_hint) -- we don't need the hints on x86
3019       | arg_rep == I64 = do
3020         ChildCode64 code r_lo <- iselExpr64 arg
3021         delta <- getDeltaNat
3022         setDeltaNat (delta - 8)
3023         let
3024             r_hi = getHiVRegFromLo r_lo
3025         -- in
3026         return (       code `appOL`
3027                        toOL [PUSH I32 (OpReg r_hi), DELTA (delta - 4),
3028                              PUSH I32 (OpReg r_lo), DELTA (delta - 8),
3029                              DELTA (delta-8)]
3030             )
3031
3032       | otherwise = do
3033         (code, reg, sz) <- get_op arg
3034         delta <- getDeltaNat
3035         let size = arg_size sz
3036         setDeltaNat (delta-size)
3037         if (case sz of F64 -> True; F32 -> True; _ -> False)
3038            then return (code `appOL`
3039                         toOL [SUB I32 (OpImm (ImmInt size)) (OpReg esp),
3040                               DELTA (delta-size),
3041                               GST sz reg (AddrBaseIndex (EABaseReg esp)
3042                                                         EAIndexNone
3043                                                         (ImmInt 0))]
3044                        )
3045            else return (code `snocOL`
3046                         PUSH I32 (OpReg reg) `snocOL`
3047                         DELTA (delta-size)
3048                        )
3049       where
3050          arg_rep = cmmExprRep arg
3051
3052     ------------
3053     get_op :: CmmExpr -> NatM (InstrBlock, Reg, MachRep) -- code, reg, size
3054     get_op op = do
3055         (reg,code) <- getSomeReg op
3056         return (code, reg, cmmExprRep op)
3057
3058 #endif /* i386_TARGET_ARCH */
3059
3060 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3061
3062 outOfLineFloatOp :: CallishMachOp -> CmmReg -> [(CmmExpr,MachHint)]
3063   -> Maybe [GlobalReg] -> NatM InstrBlock
3064 outOfLineFloatOp mop res args vols
3065   = do
3066       targetExpr <- cmmMakeDynamicReference addImportNat True lbl
3067       let target = CmmForeignCall targetExpr CCallConv
3068
3069       if cmmRegRep res == F64
3070         then
3071           stmtToInstrs (CmmCall target [(res,FloatHint)] args vols)
3072         else do
3073           uq <- getUniqueNat
3074           let
3075             tmp = CmmLocal (LocalReg uq F64)
3076           -- in
3077           code1 <- stmtToInstrs (CmmCall target [(tmp,FloatHint)] args vols)
3078           code2 <- stmtToInstrs (CmmAssign res (CmmReg tmp))
3079           return (code1 `appOL` code2)
3080   where
3081         lbl = mkForeignLabel fn Nothing True
3082
3083         fn = case mop of
3084               MO_F32_Sqrt  -> FSLIT("sqrtf")
3085               MO_F32_Sin   -> FSLIT("sinf")
3086               MO_F32_Cos   -> FSLIT("cosf")
3087               MO_F32_Tan   -> FSLIT("tanf")
3088               MO_F32_Exp   -> FSLIT("expf")
3089               MO_F32_Log   -> FSLIT("logf")
3090
3091               MO_F32_Asin  -> FSLIT("asinf")
3092               MO_F32_Acos  -> FSLIT("acosf")
3093               MO_F32_Atan  -> FSLIT("atanf")
3094
3095               MO_F32_Sinh  -> FSLIT("sinhf")
3096               MO_F32_Cosh  -> FSLIT("coshf")
3097               MO_F32_Tanh  -> FSLIT("tanhf")
3098               MO_F32_Pwr   -> FSLIT("powf")
3099
3100               MO_F64_Sqrt  -> FSLIT("sqrt")
3101               MO_F64_Sin   -> FSLIT("sin")
3102               MO_F64_Cos   -> FSLIT("cos")
3103               MO_F64_Tan   -> FSLIT("tan")
3104               MO_F64_Exp   -> FSLIT("exp")
3105               MO_F64_Log   -> FSLIT("log")
3106
3107               MO_F64_Asin  -> FSLIT("asin")
3108               MO_F64_Acos  -> FSLIT("acos")
3109               MO_F64_Atan  -> FSLIT("atan")
3110
3111               MO_F64_Sinh  -> FSLIT("sinh")
3112               MO_F64_Cosh  -> FSLIT("cosh")
3113               MO_F64_Tanh  -> FSLIT("tanh")
3114               MO_F64_Pwr   -> FSLIT("pow")
3115
3116 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
3117
3118 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3119
3120 #if x86_64_TARGET_ARCH
3121
3122 genCCall (CmmPrim op) [(r,_)] args vols =
3123   outOfLineFloatOp op r args vols
3124
3125 genCCall target dest_regs args vols = do
3126
3127         -- load up the register arguments
3128     (stack_args, aregs, fregs, load_args_code)
3129          <- load_args args allArgRegs allFPArgRegs nilOL
3130
3131     let
3132         fp_regs_used  = reverse (drop (length fregs) (reverse allFPArgRegs))
3133         int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
3134         arg_regs = int_regs_used ++ fp_regs_used
3135                 -- for annotating the call instruction with
3136
3137         sse_regs = length fp_regs_used
3138
3139         tot_arg_size = arg_size * length stack_args
3140
3141         -- On entry to the called function, %rsp should be aligned
3142         -- on a 16-byte boundary +8 (i.e. the first stack arg after
3143         -- the return address is 16-byte aligned).  In STG land
3144         -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
3145         -- need to make sure we push a multiple of 16-bytes of args,
3146         -- plus the return address, to get the correct alignment.
3147         -- Urg, this is hard.  We need to feed the delta back into
3148         -- the arg pushing code.
3149     (real_size, adjust_rsp) <-
3150         if tot_arg_size `rem` 16 == 0
3151             then return (tot_arg_size, nilOL)
3152             else do -- we need to adjust...
3153                 delta <- getDeltaNat
3154                 setDeltaNat (delta-8)
3155                 return (tot_arg_size+8, toOL [
3156                                 SUB I64 (OpImm (ImmInt 8)) (OpReg rsp),
3157                                 DELTA (delta-8)
3158                         ])
3159
3160         -- push the stack args, right to left
3161     push_code <- push_args (reverse stack_args) nilOL
3162     delta <- getDeltaNat
3163
3164     -- deal with static vs dynamic call targets
3165     (callinsns,cconv) <-
3166       case target of
3167         -- CmmPrim -> ...
3168         CmmForeignCall (CmmLit (CmmLabel lbl)) conv
3169            -> -- ToDo: stdcall arg sizes
3170               return (unitOL (CALL (Left fn_imm) arg_regs), conv)
3171            where fn_imm = ImmCLbl lbl
3172         CmmForeignCall expr conv
3173            -> do (dyn_r, dyn_c) <- getSomeReg expr
3174                  return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
3175
3176     let
3177         -- The x86_64 ABI requires us to set %al to the number of SSE
3178         -- registers that contain arguments, if the called routine
3179         -- is a varargs function.  We don't know whether it's a
3180         -- varargs function or not, so we have to assume it is.
3181         --
3182         -- It's not safe to omit this assignment, even if the number
3183         -- of SSE regs in use is zero.  If %al is larger than 8
3184         -- on entry to a varargs function, seg faults ensue.
3185         assign_eax n = unitOL (MOV I32 (OpImm (ImmInt n)) (OpReg eax))
3186
3187     let call = callinsns `appOL`
3188                toOL (
3189                         -- Deallocate parameters after call for ccall;
3190                         -- but not for stdcall (callee does it)
3191                   (if cconv == StdCallConv || real_size==0 then [] else
3192                    [ADD wordRep (OpImm (ImmInt real_size)) (OpReg esp)])
3193                   ++
3194                   [DELTA (delta + real_size)]
3195                )
3196     -- in
3197     setDeltaNat (delta + real_size)
3198
3199     let
3200         -- assign the results, if necessary
3201         assign_code []     = nilOL
3202         assign_code [(dest,_hint)] =
3203           case rep of
3204                 F32 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3205                 F64 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3206                 rep -> unitOL (MOV rep (OpReg rax) (OpReg r_dest))
3207           where
3208                 rep = cmmRegRep dest
3209                 r_dest = getRegisterReg dest
3210         assign_code many = panic "genCCall.assign_code many"
3211
3212     return (load_args_code      `appOL`
3213             adjust_rsp          `appOL`
3214             push_code           `appOL`
3215             assign_eax sse_regs `appOL`
3216             call                `appOL`
3217             assign_code dest_regs)
3218
3219   where
3220     arg_size = 8 -- always, at the mo
3221
3222     load_args :: [(CmmExpr,MachHint)]
3223               -> [Reg]                  -- int regs avail for args
3224               -> [Reg]                  -- FP regs avail for args
3225               -> InstrBlock
3226               -> NatM ([(CmmExpr,MachHint)],[Reg],[Reg],InstrBlock)
3227     load_args args [] [] code     =  return (args, [], [], code)
3228         -- no more regs to use
3229     load_args [] aregs fregs code =  return ([], aregs, fregs, code)
3230         -- no more args to push
3231     load_args ((arg,hint) : rest) aregs fregs code
3232         | isFloatingRep arg_rep =
3233         case fregs of
3234           [] -> push_this_arg
3235           (r:rs) -> do
3236              arg_code <- getAnyReg arg
3237              load_args rest aregs rs (code `appOL` arg_code r)
3238         | otherwise =
3239         case aregs of
3240           [] -> push_this_arg
3241           (r:rs) -> do
3242              arg_code <- getAnyReg arg
3243              load_args rest rs fregs (code `appOL` arg_code r)
3244         where
3245           arg_rep = cmmExprRep arg
3246
3247           push_this_arg = do
3248             (args',ars,frs,code') <- load_args rest aregs fregs code
3249             return ((arg,hint):args', ars, frs, code')
3250
3251     push_args [] code = return code
3252     push_args ((arg,hint):rest) code
3253        | isFloatingRep arg_rep = do
3254          (arg_reg, arg_code) <- getSomeReg arg
3255          delta <- getDeltaNat
3256          setDeltaNat (delta-arg_size)
3257          let code' = code `appOL` toOL [
3258                         MOV arg_rep (OpReg arg_reg) (OpAddr  (spRel 0)),
3259                         SUB wordRep (OpImm (ImmInt arg_size)) (OpReg rsp) ,
3260                         DELTA (delta-arg_size)]
3261          push_args rest code'
3262
3263        | otherwise = do
3264        -- we only ever generate word-sized function arguments.  Promotion
3265        -- has already happened: our Int8# type is kept sign-extended
3266        -- in an Int#, for example.
3267          ASSERT(arg_rep == I64) return ()
3268          (arg_op, arg_code) <- getOperand arg
3269          delta <- getDeltaNat
3270          setDeltaNat (delta-arg_size)
3271          let code' = code `appOL` toOL [PUSH I64 arg_op,
3272                                         DELTA (delta-arg_size)]
3273          push_args rest code'
3274         where
3275           arg_rep = cmmExprRep arg
3276 #endif
3277
3278 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3279
3280 #if sparc_TARGET_ARCH
3281 {-
3282    The SPARC calling convention is an absolute
3283    nightmare.  The first 6x32 bits of arguments are mapped into
3284    %o0 through %o5, and the remaining arguments are dumped to the
3285    stack, beginning at [%sp+92].  (Note that %o6 == %sp.)
3286
3287    If we have to put args on the stack, move %o6==%sp down by
3288    the number of words to go on the stack, to ensure there's enough space.
3289
3290    According to Fraser and Hanson's lcc book, page 478, fig 17.2,
3291    16 words above the stack pointer is a word for the address of
3292    a structure return value.  I use this as a temporary location
3293    for moving values from float to int regs.  Certainly it isn't
3294    safe to put anything in the 16 words starting at %sp, since
3295    this area can get trashed at any time due to window overflows
3296    caused by signal handlers.
3297
3298    A final complication (if the above isn't enough) is that
3299    we can't blithely calculate the arguments one by one into
3300    %o0 .. %o5.  Consider the following nested calls:
3301
3302        fff a (fff b c)
3303
3304    Naive code moves a into %o0, and (fff b c) into %o1.  Unfortunately
3305    the inner call will itself use %o0, which trashes the value put there
3306    in preparation for the outer call.  Upshot: we need to calculate the
3307    args into temporary regs, and move those to arg regs or onto the
3308    stack only immediately prior to the call proper.  Sigh.
3309 -}
3310
3311 genCCall target dest_regs argsAndHints vols = do
3312     let
3313         args = map fst argsAndHints
3314     argcode_and_vregs <- mapM arg_to_int_vregs args
3315     let
3316         (argcodes, vregss) = unzip argcode_and_vregs
3317         n_argRegs          = length allArgRegs
3318         n_argRegs_used     = min (length vregs) n_argRegs
3319         vregs              = concat vregss
3320     -- deal with static vs dynamic call targets
3321     callinsns <- (case target of
3322         CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> do
3323                 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3324         CmmForeignCall expr conv -> do
3325                 (dyn_c, [dyn_r]) <- arg_to_int_vregs expr
3326                 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3327         CmmPrim mop -> do
3328                   (res, reduce) <- outOfLineFloatOp mop
3329                   lblOrMopExpr <- case res of
3330                        Left lbl -> do
3331                             return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3332                        Right mopExpr -> do
3333                             (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
3334                             return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3335                   if reduce then panic "genCCall(sparc): can not reduce" else return lblOrMopExpr
3336
3337       )
3338     let
3339         argcode = concatOL argcodes
3340         (move_sp_down, move_sp_up)
3341            = let diff = length vregs - n_argRegs
3342                  nn   = if odd diff then diff + 1 else diff -- keep 8-byte alignment
3343              in  if   nn <= 0
3344                  then (nilOL, nilOL)
3345                  else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
3346         transfer_code
3347            = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
3348     return (argcode       `appOL`
3349             move_sp_down  `appOL`
3350             transfer_code `appOL`
3351             callinsns     `appOL`
3352             unitOL NOP    `appOL`
3353             move_sp_up)
3354   where
3355      -- move args from the integer vregs into which they have been
3356      -- marshalled, into %o0 .. %o5, and the rest onto the stack.
3357      move_final :: [Reg] -> [Reg] -> Int -> [Instr]
3358
3359      move_final [] _ offset          -- all args done
3360         = []
3361
3362      move_final (v:vs) [] offset     -- out of aregs; move to stack
3363         = ST I32 v (spRel offset)
3364           : move_final vs [] (offset+1)
3365
3366      move_final (v:vs) (a:az) offset -- move into an arg (%o[0..5]) reg
3367         = OR False g0 (RIReg v) a
3368           : move_final vs az offset
3369
3370      -- generate code to calculate an argument, and move it into one
3371      -- or two integer vregs.
3372      arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
3373      arg_to_int_vregs arg
3374         | (cmmExprRep arg) == I64
3375         = do
3376           (ChildCode64 code r_lo) <- iselExpr64 arg
3377           let
3378               r_hi = getHiVRegFromLo r_lo
3379           return (code, [r_hi, r_lo])
3380         | otherwise
3381         = do
3382           (src, code) <- getSomeReg arg
3383           tmp <- getNewRegNat (cmmExprRep arg)
3384           let
3385               pk   = cmmExprRep arg
3386           case pk of
3387              F64 -> do
3388                       v1 <- getNewRegNat I32
3389                       v2 <- getNewRegNat I32
3390                       return (
3391                         code                          `snocOL`
3392                         FMOV F64 src f0                `snocOL`
3393                         ST   F32  f0 (spRel 16)         `snocOL`
3394                         LD   I32  (spRel 16) v1         `snocOL`
3395                         ST   F32  (fPair f0) (spRel 16) `snocOL`
3396                         LD   I32  (spRel 16) v2
3397                         ,
3398                         [v1,v2]
3399                        )
3400              F32 -> do
3401                       v1 <- getNewRegNat I32
3402                       return (
3403                         code                    `snocOL`
3404                         ST   F32  src (spRel 16)  `snocOL`
3405                         LD   I32  (spRel 16) v1
3406                         ,
3407                         [v1]
3408                        )
3409              other -> do
3410                         v1 <- getNewRegNat I32
3411                         return (
3412                           code `snocOL` OR False g0 (RIReg src) v1
3413                           ,
3414                           [v1]
3415                          )
3416 outOfLineFloatOp mop =
3417     do
3418       mopExpr <- cmmMakeDynamicReference addImportNat True $
3419                   mkForeignLabel functionName Nothing True
3420       let mopLabelOrExpr = case mopExpr of
3421                         CmmLit (CmmLabel lbl) -> Left lbl
3422                         _ -> Right mopExpr
3423       return (mopLabelOrExpr, reduce)
3424             where
3425                 (reduce, functionName) = case mop of
3426                   MO_F32_Exp    -> (True,  FSLIT("exp"))
3427                   MO_F32_Log    -> (True,  FSLIT("log"))
3428                   MO_F32_Sqrt   -> (True,  FSLIT("sqrt"))
3429
3430                   MO_F32_Sin    -> (True,  FSLIT("sin"))
3431                   MO_F32_Cos    -> (True,  FSLIT("cos"))
3432                   MO_F32_Tan    -> (True,  FSLIT("tan"))
3433
3434                   MO_F32_Asin   -> (True,  FSLIT("asin"))
3435                   MO_F32_Acos   -> (True,  FSLIT("acos"))
3436                   MO_F32_Atan   -> (True,  FSLIT("atan"))
3437
3438                   MO_F32_Sinh   -> (True,  FSLIT("sinh"))
3439                   MO_F32_Cosh   -> (True,  FSLIT("cosh"))
3440                   MO_F32_Tanh   -> (True,  FSLIT("tanh"))
3441
3442                   MO_F64_Exp    -> (False, FSLIT("exp"))
3443                   MO_F64_Log    -> (False, FSLIT("log"))
3444                   MO_F64_Sqrt   -> (False, FSLIT("sqrt"))
3445
3446                   MO_F64_Sin    -> (False, FSLIT("sin"))
3447                   MO_F64_Cos    -> (False, FSLIT("cos"))
3448                   MO_F64_Tan    -> (False, FSLIT("tan"))
3449
3450                   MO_F64_Asin   -> (False, FSLIT("asin"))
3451                   MO_F64_Acos   -> (False, FSLIT("acos"))
3452                   MO_F64_Atan   -> (False, FSLIT("atan"))
3453
3454                   MO_F64_Sinh   -> (False, FSLIT("sinh"))
3455                   MO_F64_Cosh   -> (False, FSLIT("cosh"))
3456                   MO_F64_Tanh   -> (False, FSLIT("tanh"))
3457
3458                   other -> pprPanic "outOfLineFloatOp(sparc) "
3459                                 (pprCallishMachOp mop)
3460
3461 #endif /* sparc_TARGET_ARCH */
3462
3463 #if powerpc_TARGET_ARCH
3464
3465 #if darwin_TARGET_OS || linux_TARGET_OS
3466 {-
3467     The PowerPC calling convention for Darwin/Mac OS X
3468     is described in Apple's document
3469     "Inside Mac OS X - Mach-O Runtime Architecture".
3470
3471     PowerPC Linux uses the System V Release 4 Calling Convention
3472     for PowerPC. It is described in the
3473     "System V Application Binary Interface PowerPC Processor Supplement".
3474
3475     Both conventions are similar:
3476     Parameters may be passed in general-purpose registers starting at r3, in
3477     floating point registers starting at f1, or on the stack.
3478
3479     But there are substantial differences:
3480     * The number of registers used for parameter passing and the exact set of
3481       nonvolatile registers differs (see MachRegs.lhs).
3482     * On Darwin, stack space is always reserved for parameters, even if they are
3483       passed in registers. The called routine may choose to save parameters from
3484       registers to the corresponding space on the stack.
3485     * On Darwin, a corresponding amount of GPRs is skipped when a floating point
3486       parameter is passed in an FPR.
3487     * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
3488       starting with an odd-numbered GPR. It may skip a GPR to achieve this.
3489       Darwin just treats an I64 like two separate I32s (high word first).
3490     * I64 and F64 arguments are 8-byte aligned on the stack for SysV, but only
3491       4-byte aligned like everything else on Darwin.
3492     * The SysV spec claims that F32 is represented as F64 on the stack. GCC on
3493       PowerPC Linux does not agree, so neither do we.
3494
3495     According to both conventions, The parameter area should be part of the
3496     caller's stack frame, allocated in the caller's prologue code (large enough
3497     to hold the parameter lists for all called routines). The NCG already
3498     uses the stack for register spilling, leaving 64 bytes free at the top.
3499     If we need a larger parameter area than that, we just allocate a new stack
3500     frame just before ccalling.
3501 -}
3502
3503 genCCall target dest_regs argsAndHints vols
3504   = ASSERT (not $ any (`elem` [I8,I16]) argReps)
3505         -- we rely on argument promotion in the codeGen
3506     do
3507         (finalStack,passArgumentsCode,usedRegs) <- passArguments
3508                                                         (zip args argReps)
3509                                                         allArgRegs allFPArgRegs
3510                                                         initialStackOffset
3511                                                         (toOL []) []
3512
3513         (labelOrExpr, reduceToF32) <- case target of
3514             CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
3515             CmmForeignCall expr conv -> return  (Right expr, False)
3516             CmmPrim mop -> outOfLineFloatOp mop
3517
3518         let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
3519             codeAfter = move_sp_up finalStack `appOL` moveResult reduceToF32
3520
3521         case labelOrExpr of
3522             Left lbl -> do
3523                 return (         codeBefore
3524                         `snocOL` BL lbl usedRegs
3525                         `appOL`  codeAfter)
3526             Right dyn -> do
3527                 (dynReg, dynCode) <- getSomeReg dyn
3528                 return (         dynCode
3529                         `snocOL` MTCTR dynReg
3530                         `appOL`  codeBefore
3531                         `snocOL` BCTRL usedRegs
3532                         `appOL`  codeAfter)
3533     where
3534 #if darwin_TARGET_OS
3535         initialStackOffset = 24
3536             -- size of linkage area + size of arguments, in bytes
3537         stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
3538                                        map machRepByteWidth argReps
3539 #elif linux_TARGET_OS
3540         initialStackOffset = 8
3541         stackDelta finalStack = roundTo 16 finalStack
3542 #endif
3543         args = map fst argsAndHints
3544         argReps = map cmmExprRep args
3545
3546         roundTo a x | x `mod` a == 0 = x
3547                     | otherwise = x + a - (x `mod` a)
3548
3549         move_sp_down finalStack
3550                | delta > 64 =
3551                         toOL [STU I32 sp (AddrRegImm sp (ImmInt (-delta))),
3552                               DELTA (-delta)]
3553                | otherwise = nilOL
3554                where delta = stackDelta finalStack
3555         move_sp_up finalStack
3556                | delta > 64 =
3557                         toOL [ADD sp sp (RIImm (ImmInt delta)),
3558                               DELTA 0]
3559                | otherwise = nilOL
3560                where delta = stackDelta finalStack
3561
3562
3563         passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
3564         passArguments ((arg,I64):args) gprs fprs stackOffset
3565                accumCode accumUsed =
3566             do
3567                 ChildCode64 code vr_lo <- iselExpr64 arg
3568                 let vr_hi = getHiVRegFromLo vr_lo
3569
3570 #if darwin_TARGET_OS
3571                 passArguments args
3572                               (drop 2 gprs)
3573                               fprs
3574                               (stackOffset+8)
3575                               (accumCode `appOL` code
3576                                     `snocOL` storeWord vr_hi gprs stackOffset
3577                                     `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
3578                               ((take 2 gprs) ++ accumUsed)
3579             where
3580                 storeWord vr (gpr:_) offset = MR gpr vr
3581                 storeWord vr [] offset = ST I32 vr (AddrRegImm sp (ImmInt offset))
3582
3583 #elif linux_TARGET_OS
3584                 let stackOffset' = roundTo 8 stackOffset
3585                     stackCode = accumCode `appOL` code
3586                         `snocOL` ST I32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
3587                         `snocOL` ST I32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
3588                     regCode hireg loreg =
3589                         accumCode `appOL` code
3590                             `snocOL` MR hireg vr_hi
3591                             `snocOL` MR loreg vr_lo
3592
3593                 case gprs of
3594                     hireg : loreg : regs | even (length gprs) ->
3595                         passArguments args regs fprs stackOffset
3596                                       (regCode hireg loreg) (hireg : loreg : accumUsed)
3597                     _skipped : hireg : loreg : regs ->
3598                         passArguments args regs fprs stackOffset
3599                                       (regCode hireg loreg) (hireg : loreg : accumUsed)
3600                     _ -> -- only one or no regs left
3601                         passArguments args [] fprs (stackOffset'+8)
3602                                       stackCode accumUsed
3603 #endif
3604
3605         passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
3606             | reg : _ <- regs = do
3607                 register <- getRegister arg
3608                 let code = case register of
3609                             Fixed _ freg fcode -> fcode `snocOL` MR reg freg
3610                             Any _ acode -> acode reg
3611                 passArguments args
3612                               (drop nGprs gprs)
3613                               (drop nFprs fprs)
3614 #if darwin_TARGET_OS
3615         -- The Darwin ABI requires that we reserve stack slots for register parameters
3616                               (stackOffset + stackBytes)
3617 #elif linux_TARGET_OS
3618         -- ... the SysV ABI doesn't.
3619                               stackOffset
3620 #endif
3621                               (accumCode `appOL` code)
3622                               (reg : accumUsed)
3623             | otherwise = do
3624                 (vr, code) <- getSomeReg arg
3625                 passArguments args
3626                               (drop nGprs gprs)
3627                               (drop nFprs fprs)
3628                               (stackOffset' + stackBytes)
3629                               (accumCode `appOL` code `snocOL` ST rep vr stackSlot)
3630                               accumUsed
3631             where
3632 #if darwin_TARGET_OS
3633         -- stackOffset is at least 4-byte aligned
3634         -- The Darwin ABI is happy with that.
3635                 stackOffset' = stackOffset
3636 #else
3637         -- ... the SysV ABI requires 8-byte alignment for doubles.
3638                 stackOffset' | rep == F64 = roundTo 8 stackOffset
3639                              | otherwise  =           stackOffset
3640 #endif
3641                 stackSlot = AddrRegImm sp (ImmInt stackOffset')
3642                 (nGprs, nFprs, stackBytes, regs) = case rep of
3643                     I32 -> (1, 0, 4, gprs)
3644 #if darwin_TARGET_OS
3645         -- The Darwin ABI requires that we skip a corresponding number of GPRs when
3646         -- we use the FPRs.
3647                     F32 -> (1, 1, 4, fprs)
3648                     F64 -> (2, 1, 8, fprs)
3649 #elif linux_TARGET_OS
3650         -- ... the SysV ABI doesn't.
3651                     F32 -> (0, 1, 4, fprs)
3652                     F64 -> (0, 1, 8, fprs)
3653 #endif
3654
3655         moveResult reduceToF32 =
3656             case dest_regs of
3657                 [] -> nilOL
3658                 [(dest, _hint)]
3659                     | reduceToF32 && rep == F32 -> unitOL (FRSP r_dest f1)
3660                     | rep == F32 || rep == F64 -> unitOL (MR r_dest f1)
3661                     | rep == I64 -> toOL [MR (getHiVRegFromLo r_dest) r3,
3662                                           MR r_dest r4]
3663                     | otherwise -> unitOL (MR r_dest r3)
3664                     where rep = cmmRegRep dest
3665                           r_dest = getRegisterReg dest
3666
3667         outOfLineFloatOp mop =
3668             do
3669                 mopExpr <- cmmMakeDynamicReference addImportNat True $
3670                               mkForeignLabel functionName Nothing True
3671                 let mopLabelOrExpr = case mopExpr of
3672                         CmmLit (CmmLabel lbl) -> Left lbl
3673                         _ -> Right mopExpr
3674                 return (mopLabelOrExpr, reduce)
3675             where
3676                 (functionName, reduce) = case mop of
3677                     MO_F32_Exp   -> (FSLIT("exp"), True)
3678                     MO_F32_Log   -> (FSLIT("log"), True)
3679                     MO_F32_Sqrt  -> (FSLIT("sqrt"), True)
3680
3681                     MO_F32_Sin   -> (FSLIT("sin"), True)
3682                     MO_F32_Cos   -> (FSLIT("cos"), True)
3683                     MO_F32_Tan   -> (FSLIT("tan"), True)
3684
3685                     MO_F32_Asin  -> (FSLIT("asin"), True)
3686                     MO_F32_Acos  -> (FSLIT("acos"), True)
3687                     MO_F32_Atan  -> (FSLIT("atan"), True)
3688
3689                     MO_F32_Sinh  -> (FSLIT("sinh"), True)
3690                     MO_F32_Cosh  -> (FSLIT("cosh"), True)
3691                     MO_F32_Tanh  -> (FSLIT("tanh"), True)
3692                     MO_F32_Pwr   -> (FSLIT("pow"), True)
3693
3694                     MO_F64_Exp   -> (FSLIT("exp"), False)
3695                     MO_F64_Log   -> (FSLIT("log"), False)
3696                     MO_F64_Sqrt  -> (FSLIT("sqrt"), False)
3697
3698                     MO_F64_Sin   -> (FSLIT("sin"), False)
3699                     MO_F64_Cos   -> (FSLIT("cos"), False)
3700                     MO_F64_Tan   -> (FSLIT("tan"), False)
3701
3702                     MO_F64_Asin  -> (FSLIT("asin"), False)
3703                     MO_F64_Acos  -> (FSLIT("acos"), False)
3704                     MO_F64_Atan  -> (FSLIT("atan"), False)
3705
3706                     MO_F64_Sinh  -> (FSLIT("sinh"), False)
3707                     MO_F64_Cosh  -> (FSLIT("cosh"), False)
3708                     MO_F64_Tanh  -> (FSLIT("tanh"), False)
3709                     MO_F64_Pwr   -> (FSLIT("pow"), False)
3710                     other -> pprPanic "genCCall(ppc): unknown callish op"
3711                                     (pprCallishMachOp other)
3712
3713 #endif /* darwin_TARGET_OS || linux_TARGET_OS */
3714
3715 #endif /* powerpc_TARGET_ARCH */
3716
3717
3718 -- -----------------------------------------------------------------------------
3719 -- Generating a table-branch
3720
3721 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
3722
3723 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3724 genSwitch expr ids
3725   | opt_PIC
3726   = do
3727         (reg,e_code) <- getSomeReg expr
3728         lbl <- getNewLabelNat
3729         dynRef <- cmmMakeDynamicReference addImportNat False lbl
3730         (tableReg,t_code) <- getSomeReg $ dynRef
3731         let
3732             jumpTable = map jumpTableEntryRel ids
3733
3734             jumpTableEntryRel Nothing
3735                 = CmmStaticLit (CmmInt 0 wordRep)
3736             jumpTableEntryRel (Just (BlockId id))
3737                 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3738                 where blockLabel = mkAsmTempLabel id
3739
3740             op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
3741                                        (EAIndex reg wORD_SIZE) (ImmInt 0))
3742
3743             code = e_code `appOL` t_code `appOL` toOL [
3744                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3745                             ADD wordRep op (OpReg tableReg),
3746                             JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
3747                     ]
3748         return code
3749   | otherwise
3750   = do
3751         (reg,e_code) <- getSomeReg expr
3752         lbl <- getNewLabelNat
3753         let
3754             jumpTable = map jumpTableEntry ids
3755             op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
3756             code = e_code `appOL` toOL [
3757                     LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3758                     JMP_TBL op [ id | Just id <- ids ]
3759                  ]
3760         -- in
3761         return code
3762 #elif powerpc_TARGET_ARCH
3763 genSwitch expr ids
3764   | opt_PIC
3765   = do
3766         (reg,e_code) <- getSomeReg expr
3767         tmp <- getNewRegNat I32
3768         lbl <- getNewLabelNat
3769         dynRef <- cmmMakeDynamicReference addImportNat False lbl
3770         (tableReg,t_code) <- getSomeReg $ dynRef
3771         let
3772             jumpTable = map jumpTableEntryRel ids
3773
3774             jumpTableEntryRel Nothing
3775                 = CmmStaticLit (CmmInt 0 wordRep)
3776             jumpTableEntryRel (Just (BlockId id))
3777                 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3778                 where blockLabel = mkAsmTempLabel id
3779
3780             code = e_code `appOL` t_code `appOL` toOL [
3781                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3782                             SLW tmp reg (RIImm (ImmInt 2)),
3783                             LD I32 tmp (AddrRegReg tableReg tmp),
3784                             ADD tmp tmp (RIReg tableReg),
3785                             MTCTR tmp,
3786                             BCTR [ id | Just id <- ids ]
3787                     ]
3788         return code
3789   | otherwise
3790   = do
3791         (reg,e_code) <- getSomeReg expr
3792         tmp <- getNewRegNat I32
3793         lbl <- getNewLabelNat
3794         let
3795             jumpTable = map jumpTableEntry ids
3796
3797             code = e_code `appOL` toOL [
3798                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3799                             SLW tmp reg (RIImm (ImmInt 2)),
3800                             ADDIS tmp tmp (HA (ImmCLbl lbl)),
3801                             LD I32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
3802                             MTCTR tmp,
3803                             BCTR [ id | Just id <- ids ]
3804                     ]
3805         return code
3806 #else
3807 genSwitch expr ids = panic "ToDo: genSwitch"
3808 #endif
3809
3810 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordRep)
3811 jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
3812     where blockLabel = mkAsmTempLabel id
3813
3814 -- -----------------------------------------------------------------------------
3815 -- Support bits
3816 -- -----------------------------------------------------------------------------
3817
3818
3819 -- -----------------------------------------------------------------------------
3820 -- 'condIntReg' and 'condFltReg': condition codes into registers
3821
3822 -- Turn those condition codes into integers now (when they appear on
3823 -- the right hand side of an assignment).
3824 --
3825 -- (If applicable) Do not fill the delay slots here; you will confuse the
3826 -- register allocator.
3827
3828 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
3829
3830 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3831
3832 #if alpha_TARGET_ARCH
3833 condIntReg = panic "MachCode.condIntReg (not on Alpha)"
3834 condFltReg = panic "MachCode.condFltReg (not on Alpha)"
3835 #endif /* alpha_TARGET_ARCH */
3836
3837 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3838
3839 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3840
3841 condIntReg cond x y = do
3842   CondCode _ cond cond_code <- condIntCode cond x y
3843   tmp <- getNewRegNat I8
3844   let
3845         code dst = cond_code `appOL` toOL [
3846                     SETCC cond (OpReg tmp),
3847                     MOVZxL I8 (OpReg tmp) (OpReg dst)
3848                   ]
3849   -- in
3850   return (Any I32 code)
3851
3852 #endif
3853
3854 #if i386_TARGET_ARCH
3855
3856 condFltReg cond x y = do
3857   CondCode _ cond cond_code <- condFltCode cond x y
3858   tmp <- getNewRegNat I8
3859   let
3860         code dst = cond_code `appOL` toOL [
3861                     SETCC cond (OpReg tmp),
3862                     MOVZxL I8 (OpReg tmp) (OpReg dst)
3863                   ]
3864   -- in
3865   return (Any I32 code)
3866
3867 #endif
3868
3869 #if x86_64_TARGET_ARCH
3870
3871 condFltReg cond x y = do
3872   CondCode _ cond cond_code <- condFltCode cond x y
3873   tmp1 <- getNewRegNat wordRep
3874   tmp2 <- getNewRegNat wordRep
3875   let
3876         -- We have to worry about unordered operands (eg. comparisons
3877         -- against NaN).  If the operands are unordered, the comparison
3878         -- sets the parity flag, carry flag and zero flag.
3879         -- All comparisons are supposed to return false for unordered
3880         -- operands except for !=, which returns true.
3881         --
3882         -- Optimisation: we don't have to test the parity flag if we
3883         -- know the test has already excluded the unordered case: eg >
3884         -- and >= test for a zero carry flag, which can only occur for
3885         -- ordered operands.
3886         --
3887         -- ToDo: by reversing comparisons we could avoid testing the
3888         -- parity flag in more cases.
3889
3890         code dst =
3891            cond_code `appOL`
3892              (case cond of
3893                 NE  -> or_unordered dst
3894                 GU  -> plain_test   dst
3895                 GEU -> plain_test   dst
3896                 _   -> and_ordered  dst)
3897
3898         plain_test dst = toOL [
3899                     SETCC cond (OpReg tmp1),
3900                     MOVZxL I8 (OpReg tmp1) (OpReg dst)
3901                  ]
3902         or_unordered dst = toOL [
3903                     SETCC cond (OpReg tmp1),
3904                     SETCC PARITY (OpReg tmp2),
3905                     OR I8 (OpReg tmp1) (OpReg tmp2),
3906                     MOVZxL I8 (OpReg tmp2) (OpReg dst)
3907                   ]
3908         and_ordered dst = toOL [
3909                     SETCC cond (OpReg tmp1),
3910                     SETCC NOTPARITY (OpReg tmp2),
3911                     AND I8 (OpReg tmp1) (OpReg tmp2),
3912                     MOVZxL I8 (OpReg tmp2) (OpReg dst)
3913                   ]
3914   -- in
3915   return (Any I32 code)
3916
3917 #endif
3918
3919 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3920
3921 #if sparc_TARGET_ARCH
3922
3923 condIntReg EQQ x (CmmLit (CmmInt 0 d)) = do
3924     (src, code) <- getSomeReg x
3925     tmp <- getNewRegNat I32
3926     let
3927         code__2 dst = code `appOL` toOL [
3928             SUB False True g0 (RIReg src) g0,
3929             SUB True False g0 (RIImm (ImmInt (-1))) dst]
3930     return (Any I32 code__2)
3931
3932 condIntReg EQQ x y = do
3933     (src1, code1) <- getSomeReg x
3934     (src2, code2) <- getSomeReg y
3935     tmp1 <- getNewRegNat I32
3936     tmp2 <- getNewRegNat I32
3937     let
3938         code__2 dst = code1 `appOL` code2 `appOL` toOL [
3939             XOR False src1 (RIReg src2) dst,
3940             SUB False True g0 (RIReg dst) g0,
3941             SUB True False g0 (RIImm (ImmInt (-1))) dst]
3942     return (Any I32 code__2)
3943
3944 condIntReg NE x (CmmLit (CmmInt 0 d)) = do
3945     (src, code) <- getSomeReg x
3946     tmp <- getNewRegNat I32
3947     let
3948         code__2 dst = code `appOL` toOL [
3949             SUB False True g0 (RIReg src) g0,
3950             ADD True False g0 (RIImm (ImmInt 0)) dst]
3951     return (Any I32 code__2)
3952
3953 condIntReg NE x y = do
3954     (src1, code1) <- getSomeReg x
3955     (src2, code2) <- getSomeReg y
3956     tmp1 <- getNewRegNat I32
3957     tmp2 <- getNewRegNat I32
3958     let
3959         code__2 dst = code1 `appOL` code2 `appOL` toOL [
3960             XOR False src1 (RIReg src2) dst,
3961             SUB False True g0 (RIReg dst) g0,
3962             ADD True False g0 (RIImm (ImmInt 0)) dst]
3963     return (Any I32 code__2)
3964
3965 condIntReg cond x y = do
3966     BlockId lbl1 <- getBlockIdNat
3967     BlockId lbl2 <- getBlockIdNat
3968     CondCode _ cond cond_code <- condIntCode cond x y
3969     let
3970         code__2 dst = cond_code `appOL` toOL [
3971             BI cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
3972             OR False g0 (RIImm (ImmInt 0)) dst,
3973             BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
3974             NEWBLOCK (BlockId lbl1),
3975             OR False g0 (RIImm (ImmInt 1)) dst,
3976             NEWBLOCK (BlockId lbl2)]
3977     return (Any I32 code__2)
3978
3979 condFltReg cond x y = do
3980     BlockId lbl1 <- getBlockIdNat
3981     BlockId lbl2 <- getBlockIdNat
3982     CondCode _ cond cond_code <- condFltCode cond x y
3983     let
3984         code__2 dst = cond_code `appOL` toOL [
3985             NOP,
3986             BF cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
3987             OR False g0 (RIImm (ImmInt 0)) dst,
3988             BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
3989             NEWBLOCK (BlockId lbl1),
3990             OR False g0 (RIImm (ImmInt 1)) dst,
3991             NEWBLOCK (BlockId lbl2)]
3992     return (Any I32 code__2)
3993
3994 #endif /* sparc_TARGET_ARCH */
3995
3996 #if powerpc_TARGET_ARCH
3997 condReg getCond = do
3998     lbl1 <- getBlockIdNat
3999     lbl2 <- getBlockIdNat
4000     CondCode _ cond cond_code <- getCond
4001     let
4002 {-        code dst = cond_code `appOL` toOL [
4003                 BCC cond lbl1,
4004                 LI dst (ImmInt 0),
4005                 BCC ALWAYS lbl2,
4006                 NEWBLOCK lbl1,
4007                 LI dst (ImmInt 1),
4008                 BCC ALWAYS lbl2,
4009                 NEWBLOCK lbl2
4010             ]-}
4011         code dst = cond_code
4012             `appOL` negate_code
4013             `appOL` toOL [
4014                 MFCR dst,
4015                 RLWINM dst dst (bit + 1) 31 31
4016             ]
4017
4018         negate_code | do_negate = unitOL (CRNOR bit bit bit)
4019                     | otherwise = nilOL
4020
4021         (bit, do_negate) = case cond of
4022             LTT -> (0, False)
4023             LE  -> (1, True)
4024             EQQ -> (2, False)
4025             GE  -> (0, True)
4026             GTT -> (1, False)
4027
4028             NE  -> (2, True)
4029
4030             LU  -> (0, False)
4031             LEU -> (1, True)
4032             GEU -> (0, True)
4033             GU  -> (1, False)
4034
4035     return (Any I32 code)
4036
4037 condIntReg cond x y = condReg (condIntCode cond x y)
4038 condFltReg cond x y = condReg (condFltCode cond x y)
4039 #endif /* powerpc_TARGET_ARCH */
4040
4041
4042 -- -----------------------------------------------------------------------------
4043 -- 'trivial*Code': deal with trivial instructions
4044
4045 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
4046 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
4047 -- Only look for constants on the right hand side, because that's
4048 -- where the generic optimizer will have put them.
4049
4050 -- Similarly, for unary instructions, we don't have to worry about
4051 -- matching an StInt as the argument, because genericOpt will already
4052 -- have handled the constant-folding.
4053
4054 trivialCode
4055     :: MachRep
4056     -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
4057       ,IF_ARCH_i386 ((Operand -> Operand -> Instr)
4058                      -> Maybe (Operand -> Operand -> Instr)
4059       ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr)
4060                      -> Maybe (Operand -> Operand -> Instr)
4061       ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
4062       ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
4063       ,)))))
4064     -> CmmExpr -> CmmExpr -- the two arguments
4065     -> NatM Register
4066
4067 #ifndef powerpc_TARGET_ARCH
4068 trivialFCode
4069     :: MachRep
4070     -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
4071       ,IF_ARCH_sparc((MachRep -> Reg -> Reg -> Reg -> Instr)
4072       ,IF_ARCH_i386 ((MachRep -> Reg -> Reg -> Reg -> Instr)
4073       ,IF_ARCH_x86_64 ((MachRep -> Operand -> Operand -> Instr)
4074       ,))))
4075     -> CmmExpr -> CmmExpr -- the two arguments
4076     -> NatM Register
4077 #endif
4078
4079 trivialUCode
4080     :: MachRep
4081     -> IF_ARCH_alpha((RI -> Reg -> Instr)
4082       ,IF_ARCH_i386 ((Operand -> Instr)
4083       ,IF_ARCH_x86_64 ((Operand -> Instr)
4084       ,IF_ARCH_sparc((RI -> Reg -> Instr)
4085       ,IF_ARCH_powerpc((Reg -> Reg -> Instr)
4086       ,)))))
4087     -> CmmExpr  -- the one argument
4088     -> NatM Register
4089
4090 #ifndef powerpc_TARGET_ARCH
4091 trivialUFCode
4092     :: MachRep
4093     -> IF_ARCH_alpha((Reg -> Reg -> Instr)
4094       ,IF_ARCH_i386 ((Reg -> Reg -> Instr)
4095       ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr)
4096       ,IF_ARCH_sparc((Reg -> Reg -> Instr)
4097       ,))))
4098     -> CmmExpr -- the one argument
4099     -> NatM Register
4100 #endif
4101
4102 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4103
4104 #if alpha_TARGET_ARCH
4105
4106 trivialCode instr x (StInt y)
4107   | fits8Bits y
4108   = getRegister x               `thenNat` \ register ->
4109     getNewRegNat IntRep         `thenNat` \ tmp ->
4110     let
4111         code = registerCode register tmp
4112         src1 = registerName register tmp
4113         src2 = ImmInt (fromInteger y)
4114         code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst)
4115     in
4116     return (Any IntRep code__2)
4117
4118 trivialCode instr x y
4119   = getRegister x               `thenNat` \ register1 ->
4120     getRegister y               `thenNat` \ register2 ->
4121     getNewRegNat IntRep         `thenNat` \ tmp1 ->
4122     getNewRegNat IntRep         `thenNat` \ tmp2 ->
4123     let
4124         code1 = registerCode register1 tmp1 []
4125         src1  = registerName register1 tmp1
4126         code2 = registerCode register2 tmp2 []
4127         src2  = registerName register2 tmp2
4128         code__2 dst = asmSeqThen [code1, code2] .
4129                      mkSeqInstr (instr src1 (RIReg src2) dst)
4130     in
4131     return (Any IntRep code__2)
4132
4133 ------------
4134 trivialUCode instr x
4135   = getRegister x               `thenNat` \ register ->
4136     getNewRegNat IntRep         `thenNat` \ tmp ->
4137     let
4138         code = registerCode register tmp
4139         src  = registerName register tmp
4140         code__2 dst = code . mkSeqInstr (instr (RIReg src) dst)
4141     in
4142     return (Any IntRep code__2)
4143
4144 ------------
4145 trivialFCode _ instr x y
4146   = getRegister x               `thenNat` \ register1 ->
4147     getRegister y               `thenNat` \ register2 ->
4148     getNewRegNat F64    `thenNat` \ tmp1 ->
4149     getNewRegNat F64    `thenNat` \ tmp2 ->
4150     let
4151         code1 = registerCode register1 tmp1
4152         src1  = registerName register1 tmp1
4153
4154         code2 = registerCode register2 tmp2
4155         src2  = registerName register2 tmp2
4156
4157         code__2 dst = asmSeqThen [code1 [], code2 []] .
4158                       mkSeqInstr (instr src1 src2 dst)
4159     in
4160     return (Any F64 code__2)
4161
4162 trivialUFCode _ instr x
4163   = getRegister x               `thenNat` \ register ->
4164     getNewRegNat F64    `thenNat` \ tmp ->
4165     let
4166         code = registerCode register tmp
4167         src  = registerName register tmp
4168         code__2 dst = code . mkSeqInstr (instr src dst)
4169     in
4170     return (Any F64 code__2)
4171
4172 #endif /* alpha_TARGET_ARCH */
4173
4174 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4175
4176 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4177
4178 {-
4179 The Rules of the Game are:
4180
4181 * You cannot assume anything about the destination register dst;
4182   it may be anything, including a fixed reg.
4183
4184 * You may compute an operand into a fixed reg, but you may not
4185   subsequently change the contents of that fixed reg.  If you
4186   want to do so, first copy the value either to a temporary
4187   or into dst.  You are free to modify dst even if it happens
4188   to be a fixed reg -- that's not your problem.
4189
4190 * You cannot assume that a fixed reg will stay live over an
4191   arbitrary computation.  The same applies to the dst reg.
4192
4193 * Temporary regs obtained from getNewRegNat are distinct from
4194   each other and from all other regs, and stay live over
4195   arbitrary computations.
4196
4197 --------------------
4198
4199 SDM's version of The Rules:
4200
4201 * If getRegister returns Any, that means it can generate correct
4202   code which places the result in any register, period.  Even if that
4203   register happens to be read during the computation.
4204
4205   Corollary #1: this means that if you are generating code for an
4206   operation with two arbitrary operands, you cannot assign the result
4207   of the first operand into the destination register before computing
4208   the second operand.  The second operand might require the old value
4209   of the destination register.
4210
4211   Corollary #2: A function might be able to generate more efficient
4212   code if it knows the destination register is a new temporary (and
4213   therefore not read by any of the sub-computations).
4214
4215 * If getRegister returns Any, then the code it generates may modify only:
4216         (a) fresh temporaries
4217         (b) the destination register
4218         (c) known registers (eg. %ecx is used by shifts)
4219   In particular, it may *not* modify global registers, unless the global
4220   register happens to be the destination register.
4221 -}
4222
4223 trivialCode rep instr (Just revinstr) (CmmLit lit_a) b
4224   | not (is64BitLit lit_a) = do
4225   b_code <- getAnyReg b
4226   let
4227        code dst
4228          = b_code dst `snocOL`
4229            revinstr (OpImm (litToImm lit_a)) (OpReg dst)
4230   -- in
4231   return (Any rep code)
4232
4233 trivialCode rep instr maybe_revinstr a b = genTrivialCode rep instr a b
4234
4235 -- This is re-used for floating pt instructions too.
4236 genTrivialCode rep instr a b = do
4237   (b_op, b_code) <- getNonClobberedOperand b
4238   a_code <- getAnyReg a
4239   tmp <- getNewRegNat rep
4240   let
4241      -- We want the value of b to stay alive across the computation of a.
4242      -- But, we want to calculate a straight into the destination register,
4243      -- because the instruction only has two operands (dst := dst `op` src).
4244      -- The troublesome case is when the result of b is in the same register
4245      -- as the destination reg.  In this case, we have to save b in a
4246      -- new temporary across the computation of a.
4247      code dst
4248         | dst `regClashesWithOp` b_op =
4249                 b_code `appOL`
4250                 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
4251                 a_code dst `snocOL`
4252                 instr (OpReg tmp) (OpReg dst)
4253         | otherwise =
4254                 b_code `appOL`
4255                 a_code dst `snocOL`
4256                 instr b_op (OpReg dst)
4257   -- in
4258   return (Any rep code)
4259
4260 reg `regClashesWithOp` OpReg reg2   = reg == reg2
4261 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
4262 reg `regClashesWithOp` _            = False
4263
4264 -----------
4265
4266 trivialUCode rep instr x = do
4267   x_code <- getAnyReg x
4268   let
4269      code dst =
4270         x_code dst `snocOL`
4271         instr (OpReg dst)
4272   -- in
4273   return (Any rep code)
4274
4275 -----------
4276
4277 #if i386_TARGET_ARCH
4278
4279 trivialFCode pk instr x y = do
4280   (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
4281   (y_reg, y_code) <- getSomeReg y
4282   let
4283      code dst =
4284         x_code `appOL`
4285         y_code `snocOL`
4286         instr pk x_reg y_reg dst
4287   -- in
4288   return (Any pk code)
4289
4290 #endif
4291
4292 #if x86_64_TARGET_ARCH
4293
4294 trivialFCode pk instr x y = genTrivialCode  pk (instr pk) x y
4295
4296 #endif
4297
4298 -------------
4299
4300 trivialUFCode rep instr x = do
4301   (x_reg, x_code) <- getSomeReg x
4302   let
4303      code dst =
4304         x_code `snocOL`
4305         instr x_reg dst
4306   -- in
4307   return (Any rep code)
4308
4309 #endif /* i386_TARGET_ARCH */
4310
4311 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4312
4313 #if sparc_TARGET_ARCH
4314
4315 trivialCode pk instr x (CmmLit (CmmInt y d))
4316   | fits13Bits y
4317   = do
4318       (src1, code) <- getSomeReg x
4319       tmp <- getNewRegNat I32
4320       let
4321         src2 = ImmInt (fromInteger y)
4322         code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
4323       return (Any I32 code__2)
4324
4325 trivialCode pk instr x y = do
4326     (src1, code1) <- getSomeReg x
4327     (src2, code2) <- getSomeReg y
4328     tmp1 <- getNewRegNat I32
4329     tmp2 <- getNewRegNat I32
4330     let
4331         code__2 dst = code1 `appOL` code2 `snocOL`
4332                       instr src1 (RIReg src2) dst
4333     return (Any I32 code__2)
4334
4335 ------------
4336 trivialFCode pk instr x y = do
4337     (src1, code1) <- getSomeReg x
4338     (src2, code2) <- getSomeReg y
4339     tmp1 <- getNewRegNat (cmmExprRep x)
4340     tmp2 <- getNewRegNat (cmmExprRep y)
4341     tmp <- getNewRegNat F64
4342     let
4343         promote x = FxTOy F32 F64 x tmp
4344
4345         pk1   = cmmExprRep x
4346         pk2   = cmmExprRep y
4347
4348         code__2 dst =
4349                 if pk1 == pk2 then
4350                     code1 `appOL` code2 `snocOL`
4351                     instr pk src1 src2 dst
4352                 else if pk1 == F32 then
4353                     code1 `snocOL` promote src1 `appOL` code2 `snocOL`
4354                     instr F64 tmp src2 dst
4355                 else
4356                     code1 `appOL` code2 `snocOL` promote src2 `snocOL`
4357                     instr F64 src1 tmp dst
4358     return (Any (if pk1 == pk2 then pk1 else F64) code__2)
4359
4360 ------------
4361 trivialUCode pk instr x = do
4362     (src, code) <- getSomeReg x
4363     tmp <- getNewRegNat pk
4364     let
4365         code__2 dst = code `snocOL` instr (RIReg src) dst
4366     return (Any pk code__2)
4367
4368 -------------
4369 trivialUFCode pk instr x = do
4370     (src, code) <- getSomeReg x
4371     tmp <- getNewRegNat pk
4372     let
4373         code__2 dst = code `snocOL` instr src dst
4374     return (Any pk code__2)
4375
4376 #endif /* sparc_TARGET_ARCH */
4377
4378 #if powerpc_TARGET_ARCH
4379
4380 {-
4381 Wolfgang's PowerPC version of The Rules:
4382
4383 A slightly modified version of The Rules to take advantage of the fact
4384 that PowerPC instructions work on all registers and don't implicitly
4385 clobber any fixed registers.
4386
4387 * The only expression for which getRegister returns Fixed is (CmmReg reg).
4388
4389 * If getRegister returns Any, then the code it generates may modify only:
4390         (a) fresh temporaries
4391         (b) the destination register
4392   It may *not* modify global registers, unless the global
4393   register happens to be the destination register.
4394   It may not clobber any other registers. In fact, only ccalls clobber any
4395   fixed registers.
4396   Also, it may not modify the counter register (used by genCCall).
4397
4398   Corollary: If a getRegister for a subexpression returns Fixed, you need
4399   not move it to a fresh temporary before evaluating the next subexpression.
4400   The Fixed register won't be modified.
4401   Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
4402
4403 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
4404   the value of the destination register.
4405 -}
4406
4407 trivialCode rep signed instr x (CmmLit (CmmInt y _))
4408     | Just imm <- makeImmediate rep signed y
4409     = do
4410         (src1, code1) <- getSomeReg x
4411         let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
4412         return (Any rep code)
4413
4414 trivialCode rep signed instr x y = do
4415     (src1, code1) <- getSomeReg x
4416     (src2, code2) <- getSomeReg y
4417     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
4418     return (Any rep code)
4419
4420 trivialCodeNoImm :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4421     -> CmmExpr -> CmmExpr -> NatM Register
4422 trivialCodeNoImm rep instr x y = do
4423     (src1, code1) <- getSomeReg x
4424     (src2, code2) <- getSomeReg y
4425     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
4426     return (Any rep code)
4427
4428 trivialUCode rep instr x = do
4429     (src, code) <- getSomeReg x
4430     let code' dst = code `snocOL` instr dst src
4431     return (Any rep code')
4432
4433 -- There is no "remainder" instruction on the PPC, so we have to do
4434 -- it the hard way.
4435 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
4436
4437 remainderCode :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4438     -> CmmExpr -> CmmExpr -> NatM Register
4439 remainderCode rep div x y = do
4440     (src1, code1) <- getSomeReg x
4441     (src2, code2) <- getSomeReg y
4442     let code dst = code1 `appOL` code2 `appOL` toOL [
4443                 div dst src1 src2,
4444                 MULLW dst dst (RIReg src2),
4445                 SUBF dst dst src1
4446             ]
4447     return (Any rep code)
4448
4449 #endif /* powerpc_TARGET_ARCH */
4450
4451
4452 -- -----------------------------------------------------------------------------
4453 --  Coercing to/from integer/floating-point...
4454
4455 -- @coerce(Int2FP|FP2Int)@ are more complicated integer/float
4456 -- conversions.  We have to store temporaries in memory to move
4457 -- between the integer and the floating point register sets.
4458
4459 -- @coerceDbl2Flt@ and @coerceFlt2Dbl@ are done this way because we
4460 -- pretend, on sparc at least, that double and float regs are seperate
4461 -- kinds, so the value has to be computed into one kind before being
4462 -- explicitly "converted" to live in the other kind.
4463
4464 coerceInt2FP :: MachRep -> MachRep -> CmmExpr -> NatM Register
4465 coerceFP2Int :: MachRep -> MachRep -> CmmExpr -> NatM Register
4466
4467 #if sparc_TARGET_ARCH
4468 coerceDbl2Flt :: CmmExpr -> NatM Register
4469 coerceFlt2Dbl :: CmmExpr -> NatM Register
4470 #endif
4471
4472 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4473
4474 #if alpha_TARGET_ARCH
4475
4476 coerceInt2FP _ x
4477   = getRegister x               `thenNat` \ register ->
4478     getNewRegNat IntRep         `thenNat` \ reg ->
4479     let
4480         code = registerCode register reg
4481         src  = registerName register reg
4482
4483         code__2 dst = code . mkSeqInstrs [
4484             ST Q src (spRel 0),
4485             LD TF dst (spRel 0),
4486             CVTxy Q TF dst dst]
4487     in
4488     return (Any F64 code__2)
4489
4490 -------------
4491 coerceFP2Int x
4492   = getRegister x               `thenNat` \ register ->
4493     getNewRegNat F64    `thenNat` \ tmp ->
4494     let
4495         code = registerCode register tmp
4496         src  = registerName register tmp
4497
4498         code__2 dst = code . mkSeqInstrs [
4499             CVTxy TF Q src tmp,
4500             ST TF tmp (spRel 0),
4501             LD Q dst (spRel 0)]
4502     in
4503     return (Any IntRep code__2)
4504
4505 #endif /* alpha_TARGET_ARCH */
4506
4507 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4508
4509 #if i386_TARGET_ARCH
4510
4511 coerceInt2FP from to x = do
4512   (x_reg, x_code) <- getSomeReg x
4513   let
4514         opc  = case to of F32 -> GITOF; F64 -> GITOD
4515         code dst = x_code `snocOL` opc x_reg dst
4516         -- ToDo: works for non-I32 reps?
4517   -- in
4518   return (Any to code)
4519
4520 ------------
4521
4522 coerceFP2Int from to x = do
4523   (x_reg, x_code) <- getSomeReg x
4524   let
4525         opc  = case from of F32 -> GFTOI; F64 -> GDTOI
4526         code dst = x_code `snocOL` opc x_reg dst
4527         -- ToDo: works for non-I32 reps?
4528   -- in
4529   return (Any to code)
4530
4531 #endif /* i386_TARGET_ARCH */
4532
4533 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4534
4535 #if x86_64_TARGET_ARCH
4536
4537 coerceFP2Int from to x = do
4538   (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
4539   let
4540         opc  = case from of F32 -> CVTSS2SI; F64 -> CVTSD2SI
4541         code dst = x_code `snocOL` opc x_op dst
4542   -- in
4543   return (Any to code) -- works even if the destination rep is <I32
4544
4545 coerceInt2FP from to x = do
4546   (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
4547   let
4548         opc  = case to of F32 -> CVTSI2SS; F64 -> CVTSI2SD
4549         code dst = x_code `snocOL` opc x_op dst
4550   -- in
4551   return (Any to code) -- works even if the destination rep is <I32
4552
4553 coerceFP2FP :: MachRep -> CmmExpr -> NatM Register
4554 coerceFP2FP to x = do
4555   (x_reg, x_code) <- getSomeReg x
4556   let
4557         opc  = case to of F32 -> CVTSD2SS; F64 -> CVTSS2SD
4558         code dst = x_code `snocOL` opc x_reg dst
4559   -- in
4560   return (Any to code)
4561
4562 #endif
4563
4564 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4565
4566 #if sparc_TARGET_ARCH
4567
4568 coerceInt2FP pk1 pk2 x = do
4569     (src, code) <- getSomeReg x
4570     let
4571         code__2 dst = code `appOL` toOL [
4572             ST pk1 src (spRel (-2)),
4573             LD pk1 (spRel (-2)) dst,
4574             FxTOy pk1 pk2 dst dst]
4575     return (Any pk2 code__2)
4576
4577 ------------
4578 coerceFP2Int pk fprep x = do
4579     (src, code) <- getSomeReg x
4580     reg <- getNewRegNat fprep
4581     tmp <- getNewRegNat pk
4582     let
4583         code__2 dst = ASSERT(fprep == F64 || fprep == F32)
4584             code `appOL` toOL [
4585             FxTOy fprep pk src tmp,
4586             ST pk tmp (spRel (-2)),
4587             LD pk (spRel (-2)) dst]
4588     return (Any pk code__2)
4589
4590 ------------
4591 coerceDbl2Flt x = do
4592     (src, code) <- getSomeReg x
4593     return (Any F32 (\dst -> code `snocOL` FxTOy F64 F32 src dst))
4594
4595 ------------
4596 coerceFlt2Dbl x = do
4597     (src, code) <- getSomeReg x
4598     return (Any F64 (\dst -> code `snocOL` FxTOy F32 F64 src dst))
4599
4600 #endif /* sparc_TARGET_ARCH */
4601
4602 #if powerpc_TARGET_ARCH
4603 coerceInt2FP fromRep toRep x = do
4604     (src, code) <- getSomeReg x
4605     lbl <- getNewLabelNat
4606     itmp <- getNewRegNat I32
4607     ftmp <- getNewRegNat F64
4608     dynRef <- cmmMakeDynamicReference addImportNat False lbl
4609     Amode addr addr_code <- getAmode dynRef
4610     let
4611         code' dst = code `appOL` maybe_exts `appOL` toOL [
4612                 LDATA ReadOnlyData
4613                                 [CmmDataLabel lbl,
4614                                  CmmStaticLit (CmmInt 0x43300000 I32),
4615                                  CmmStaticLit (CmmInt 0x80000000 I32)],
4616                 XORIS itmp src (ImmInt 0x8000),
4617                 ST I32 itmp (spRel 3),
4618                 LIS itmp (ImmInt 0x4330),
4619                 ST I32 itmp (spRel 2),
4620                 LD F64 ftmp (spRel 2)
4621             ] `appOL` addr_code `appOL` toOL [
4622                 LD F64 dst addr,
4623                 FSUB F64 dst ftmp dst
4624             ] `appOL` maybe_frsp dst
4625
4626         maybe_exts = case fromRep of
4627                         I8 ->  unitOL $ EXTS I8 src src
4628                         I16 -> unitOL $ EXTS I16 src src
4629                         I32 -> nilOL
4630         maybe_frsp dst = case toRep of
4631                         F32 -> unitOL $ FRSP dst dst
4632                         F64 -> nilOL
4633     return (Any toRep code')
4634
4635 coerceFP2Int fromRep toRep x = do
4636     -- the reps don't really matter: F*->F64 and I32->I* are no-ops
4637     (src, code) <- getSomeReg x
4638     tmp <- getNewRegNat F64
4639     let
4640         code' dst = code `appOL` toOL [
4641                 -- convert to int in FP reg
4642             FCTIWZ tmp src,
4643                 -- store value (64bit) from FP to stack
4644             ST F64 tmp (spRel 2),
4645                 -- read low word of value (high word is undefined)
4646             LD I32 dst (spRel 3)]
4647     return (Any toRep code')
4648 #endif /* powerpc_TARGET_ARCH */
4649
4650
4651 -- -----------------------------------------------------------------------------
4652 -- eXTRA_STK_ARGS_HERE
4653
4654 -- We (allegedly) put the first six C-call arguments in registers;
4655 -- where do we start putting the rest of them?
4656
4657 -- Moved from MachInstrs (SDM):
4658
4659 #if alpha_TARGET_ARCH || sparc_TARGET_ARCH
4660 eXTRA_STK_ARGS_HERE :: Int
4661 eXTRA_STK_ARGS_HERE
4662   = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???))
4663 #endif
4664