ghc/compiler/nativeGen/MachCodeGen.hs

   1 -----------------------------------------------------------------------------
   2 --
   3 -- Generating machine code (instruction selection)
   4 --
   5 -- (c) The University of Glasgow 1996-2004
   6 --
   7 -----------------------------------------------------------------------------
   8
   9 -- This is a big module, but, if you pay attention to
  10 -- (a) the sectioning, (b) the type signatures, and
  11 -- (c) the #if blah_TARGET_ARCH} things, the
  12 -- structure should not be too overwhelming.
  13
  14 module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where
  15
  16 #include "HsVersions.h"
  17 #include "nativeGen/NCG.h"
  18 #include "MachDeps.h"
  19
  20 -- NCG stuff:
  21 import MachInstrs
  22 import MachRegs
  23 import NCGMonad
  24 import PositionIndependentCode ( cmmMakeDynamicReference, initializePicBase )
  25
  26 -- Our intermediate code:
  27 import PprCmm           ( pprExpr )
  28 import Cmm
  29 import MachOp
  30 import CLabel
  31
  32 -- The rest:
  33 import StaticFlags      ( opt_PIC )
  34 import ForeignCall      ( CCallConv(..) )
  35 import OrdList
  36 import Pretty
  37 import Outputable
  38 import FastString
  39 import FastTypes        ( isFastTrue )
  40 import Constants        ( wORD_SIZE )
  41
  42 #ifdef DEBUG
  43 import Outputable       ( assertPanic )
  44 import TRACE            ( trace )
  45 #endif
  46
  47 import Control.Monad    ( mapAndUnzipM )
  48 import Maybe            ( fromJust )
  49 import DATA_BITS
  50 import DATA_WORD
  51
  52 -- -----------------------------------------------------------------------------
  53 -- Top-level of the instruction selector
  54
  55 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
  56 -- They are really trees of insns to facilitate fast appending, where a
  57 -- left-to-right traversal (pre-order?) yields the insns in the correct
  58 -- order.
  59
  60 type InstrBlock = OrdList Instr
  61
  62 cmmTopCodeGen :: CmmTop -> NatM [NatCmmTop]
  63 cmmTopCodeGen (CmmProc info lab params blocks) = do
  64   (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
  65   picBaseMb <- getPicBaseMaybeNat
  66   let proc = CmmProc info lab params (concat nat_blocks)
  67       tops = proc : concat statics
  68   case picBaseMb of
  69       Just picBase -> initializePicBase picBase tops
  70       Nothing -> return tops
  71
  72 cmmTopCodeGen (CmmData sec dat) = do
  73   return [CmmData sec dat]  -- no translation, we just use CmmStatic
  74
  75 basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop])
  76 basicBlockCodeGen (BasicBlock id stmts) = do
  77   instrs <- stmtsToInstrs stmts
  78   -- code generation may introduce new basic block boundaries, which
  79   -- are indicated by the NEWBLOCK instruction.  We must split up the
  80   -- instruction stream into basic blocks again.  Also, we extract
  81   -- LDATAs here too.
  82   let
  83         (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
  84
  85         mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
  86           = ([], BasicBlock id instrs : blocks, statics)
  87         mkBlocks (LDATA sec dat) (instrs,blocks,statics)
  88           = (instrs, blocks, CmmData sec dat:statics)
  89         mkBlocks instr (instrs,blocks,statics)
  90           = (instr:instrs, blocks, statics)
  91   -- in
  92   return (BasicBlock id top : other_blocks, statics)
  93
  94 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
  95 stmtsToInstrs stmts
  96    = do instrss <- mapM stmtToInstrs stmts
  97         return (concatOL instrss)
  98
  99 stmtToInstrs :: CmmStmt -> NatM InstrBlock
 100 stmtToInstrs stmt = case stmt of
 101     CmmNop         -> return nilOL
 102     CmmComment s   -> return (unitOL (COMMENT s))
 103
 104     CmmAssign reg src
 105       | isFloatingRep kind -> assignReg_FltCode kind reg src
 106 #if WORD_SIZE_IN_BITS==32
 107       | kind == I64        -> assignReg_I64Code      reg src
 108 #endif
 109       | otherwise          -> assignReg_IntCode kind reg src
 110         where kind = cmmRegRep reg
 111
 112     CmmStore addr src
 113       | isFloatingRep kind -> assignMem_FltCode kind addr src
 114 #if WORD_SIZE_IN_BITS==32
 115       | kind == I64      -> assignMem_I64Code      addr src
 116 #endif
 117       | otherwise        -> assignMem_IntCode kind addr src
 118         where kind = cmmExprRep src
 119
 120     CmmCall target result_regs args vols
 121        -> genCCall target result_regs args vols
 122
 123     CmmBranch id          -> genBranch id
 124     CmmCondBranch arg id  -> genCondJump id arg
 125     CmmSwitch arg ids     -> genSwitch arg ids
 126     CmmJump arg params    -> genJump arg
 127
 128 -- -----------------------------------------------------------------------------
 129 -- General things for putting together code sequences
 130
 131 -- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
 132 -- CmmExprs into CmmRegOff?
 133 mangleIndexTree :: CmmExpr -> CmmExpr
 134 mangleIndexTree (CmmRegOff reg off)
 135   = CmmMachOp (MO_Add rep) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) rep)]
 136   where rep = cmmRegRep reg
 137
 138 -- -----------------------------------------------------------------------------
 139 --  Code gen for 64-bit arithmetic on 32-bit platforms
 140
 141 {-
 142 Simple support for generating 64-bit code (ie, 64 bit values and 64
 143 bit assignments) on 32-bit platforms.  Unlike the main code generator
 144 we merely shoot for generating working code as simply as possible, and
 145 pay little attention to code quality.  Specifically, there is no
 146 attempt to deal cleverly with the fixed-vs-floating register
 147 distinction; all values are generated into (pairs of) floating
 148 registers, even if this would mean some redundant reg-reg moves as a
 149 result.  Only one of the VRegUniques is returned, since it will be
 150 of the VRegUniqueLo form, and the upper-half VReg can be determined
 151 by applying getHiVRegFromLo to it.
 152 -}
 153
 154 data ChildCode64        -- a.k.a "Register64"
 155    = ChildCode64
 156         InstrBlock      -- code
 157         Reg             -- the lower 32-bit temporary which contains the
 158                         -- result; use getHiVRegFromLo to find the other
 159                         -- VRegUnique.  Rules of this simplified insn
 160                         -- selection game are therefore that the returned
 161                         -- Reg may be modified
 162
 163 #if WORD_SIZE_IN_BITS==32
 164 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
 165 assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
 166 #endif
 167
 168 #ifndef x86_64_TARGET_ARCH
 169 iselExpr64        :: CmmExpr -> NatM ChildCode64
 170 #endif
 171
 172 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 173
 174 #if i386_TARGET_ARCH
 175
 176 assignMem_I64Code addrTree valueTree = do
 177   Amode addr addr_code <- getAmode addrTree
 178   ChildCode64 vcode rlo <- iselExpr64 valueTree
 179   let
 180         rhi = getHiVRegFromLo rlo
 181
 182         -- Little-endian store
 183         mov_lo = MOV I32 (OpReg rlo) (OpAddr addr)
 184         mov_hi = MOV I32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
 185   -- in
 186   return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 187
 188
 189 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
 190    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 191    let
 192          r_dst_lo = mkVReg u_dst I32
 193          r_dst_hi = getHiVRegFromLo r_dst_lo
 194          r_src_hi = getHiVRegFromLo r_src_lo
 195          mov_lo = MOV I32 (OpReg r_src_lo) (OpReg r_dst_lo)
 196          mov_hi = MOV I32 (OpReg r_src_hi) (OpReg r_dst_hi)
 197    -- in
 198    return (
 199         vcode `snocOL` mov_lo `snocOL` mov_hi
 200      )
 201
 202 assignReg_I64Code lvalue valueTree
 203    = panic "assignReg_I64Code(i386): invalid lvalue"
 204
 205 ------------
 206
 207 iselExpr64 (CmmLit (CmmInt i _)) = do
 208   (rlo,rhi) <- getNewRegPairNat I32
 209   let
 210         r = fromIntegral (fromIntegral i :: Word32)
 211         q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
 212         code = toOL [
 213                 MOV I32 (OpImm (ImmInteger r)) (OpReg rlo),
 214                 MOV I32 (OpImm (ImmInteger q)) (OpReg rhi)
 215                 ]
 216   -- in
 217   return (ChildCode64 code rlo)
 218
 219 iselExpr64 (CmmLoad addrTree I64) = do
 220    Amode addr addr_code <- getAmode addrTree
 221    (rlo,rhi) <- getNewRegPairNat I32
 222    let
 223         mov_lo = MOV I32 (OpAddr addr) (OpReg rlo)
 224         mov_hi = MOV I32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
 225    -- in
 226    return (
 227             ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
 228                         rlo
 229      )
 230
 231 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64)))
 232    = return (ChildCode64 nilOL (mkVReg vu I32))
 233
 234 -- we handle addition, but rather badly
 235 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
 236    ChildCode64 code1 r1lo <- iselExpr64 e1
 237    (rlo,rhi) <- getNewRegPairNat I32
 238    let
 239         r = fromIntegral (fromIntegral i :: Word32)
 240         q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
 241         r1hi = getHiVRegFromLo r1lo
 242         code =  code1 `appOL`
 243                 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
 244                        ADD I32 (OpImm (ImmInteger r)) (OpReg rlo),
 245                        MOV I32 (OpReg r1hi) (OpReg rhi),
 246                        ADC I32 (OpImm (ImmInteger q)) (OpReg rhi) ]
 247    -- in
 248    return (ChildCode64 code rlo)
 249
 250 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
 251    ChildCode64 code1 r1lo <- iselExpr64 e1
 252    ChildCode64 code2 r2lo <- iselExpr64 e2
 253    (rlo,rhi) <- getNewRegPairNat I32
 254    let
 255         r1hi = getHiVRegFromLo r1lo
 256         r2hi = getHiVRegFromLo r2lo
 257         code =  code1 `appOL`
 258                 code2 `appOL`
 259                 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
 260                        ADD I32 (OpReg r2lo) (OpReg rlo),
 261                        MOV I32 (OpReg r1hi) (OpReg rhi),
 262                        ADC I32 (OpReg r2hi) (OpReg rhi) ]
 263    -- in
 264    return (ChildCode64 code rlo)
 265
 266 iselExpr64 expr
 267    = pprPanic "iselExpr64(i386)" (ppr expr)
 268
 269 #endif /* i386_TARGET_ARCH */
 270
 271 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 272
 273 #if sparc_TARGET_ARCH
 274
 275 assignMem_I64Code addrTree valueTree = do
 276      Amode addr addr_code <- getAmode addrTree
 277      ChildCode64 vcode rlo <- iselExpr64 valueTree
 278      (src, code) <- getSomeReg addrTree
 279      let
 280          rhi = getHiVRegFromLo rlo
 281          -- Big-endian store
 282          mov_hi = ST I32 rhi (AddrRegImm src (ImmInt 0))
 283          mov_lo = ST I32 rlo (AddrRegImm src (ImmInt 4))
 284      return (vcode `appOL` code `snocOL` mov_hi `snocOL` mov_lo)
 285
 286 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
 287      ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 288      let
 289          r_dst_lo = mkVReg u_dst pk
 290          r_dst_hi = getHiVRegFromLo r_dst_lo
 291          r_src_hi = getHiVRegFromLo r_src_lo
 292          mov_lo = mkMOV r_src_lo r_dst_lo
 293          mov_hi = mkMOV r_src_hi r_dst_hi
 294          mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
 295      return (vcode `snocOL` mov_hi `snocOL` mov_lo)
 296 assignReg_I64Code lvalue valueTree
 297    = panic "assignReg_I64Code(sparc): invalid lvalue"
 298
 299
 300 -- Don't delete this -- it's very handy for debugging.
 301 --iselExpr64 expr
 302 --   | trace ("iselExpr64: " ++ showSDoc (ppr expr)) False
 303 --   = panic "iselExpr64(???)"
 304
 305 iselExpr64 (CmmLoad addrTree I64) = do
 306      Amode (AddrRegReg r1 r2) addr_code <- getAmode addrTree
 307      rlo <- getNewRegNat I32
 308      let rhi = getHiVRegFromLo rlo
 309          mov_hi = LD I32 (AddrRegImm r1 (ImmInt 0)) rhi
 310          mov_lo = LD I32 (AddrRegImm r1 (ImmInt 4)) rlo
 311      return (
 312             ChildCode64 (addr_code `snocOL` mov_hi `snocOL` mov_lo)
 313                          rlo
 314           )
 315
 316 iselExpr64 (CmmReg (CmmLocal (LocalReg uq I64))) = do
 317      r_dst_lo <-  getNewRegNat I32
 318      let r_dst_hi = getHiVRegFromLo r_dst_lo
 319          r_src_lo = mkVReg uq I32
 320          r_src_hi = getHiVRegFromLo r_src_lo
 321          mov_lo = mkMOV r_src_lo r_dst_lo
 322          mov_hi = mkMOV r_src_hi r_dst_hi
 323          mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
 324      return (
 325             ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo
 326          )
 327
 328 iselExpr64 expr
 329    = pprPanic "iselExpr64(sparc)" (ppr expr)
 330
 331 #endif /* sparc_TARGET_ARCH */
 332
 333 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 334
 335 #if powerpc_TARGET_ARCH
 336
 337 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
 338 getI64Amodes addrTree = do
 339     Amode hi_addr addr_code <- getAmode addrTree
 340     case addrOffset hi_addr 4 of
 341         Just lo_addr -> return (hi_addr, lo_addr, addr_code)
 342         Nothing      -> do (hi_ptr, code) <- getSomeReg addrTree
 343                            return (AddrRegImm hi_ptr (ImmInt 0),
 344                                    AddrRegImm hi_ptr (ImmInt 4),
 345                                    code)
 346
 347 assignMem_I64Code addrTree valueTree = do
 348         (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
 349         ChildCode64 vcode rlo <- iselExpr64 valueTree
 350         let
 351                 rhi = getHiVRegFromLo rlo
 352
 353                 -- Big-endian store
 354                 mov_hi = ST I32 rhi hi_addr
 355                 mov_lo = ST I32 rlo lo_addr
 356         -- in
 357         return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 358
 359 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
 360    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 361    let
 362          r_dst_lo = mkVReg u_dst I32
 363          r_dst_hi = getHiVRegFromLo r_dst_lo
 364          r_src_hi = getHiVRegFromLo r_src_lo
 365          mov_lo = MR r_dst_lo r_src_lo
 366          mov_hi = MR r_dst_hi r_src_hi
 367    -- in
 368    return (
 369         vcode `snocOL` mov_lo `snocOL` mov_hi
 370      )
 371
 372 assignReg_I64Code lvalue valueTree
 373    = panic "assignReg_I64Code(powerpc): invalid lvalue"
 374
 375
 376 -- Don't delete this -- it's very handy for debugging.
 377 --iselExpr64 expr
 378 --   | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
 379 --   = panic "iselExpr64(???)"
 380
 381 iselExpr64 (CmmLoad addrTree I64) = do
 382     (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
 383     (rlo, rhi) <- getNewRegPairNat I32
 384     let mov_hi = LD I32 rhi hi_addr
 385         mov_lo = LD I32 rlo lo_addr
 386     return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
 387                          rlo
 388
 389 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64)))
 390    = return (ChildCode64 nilOL (mkVReg vu I32))
 391
 392 iselExpr64 (CmmLit (CmmInt i _)) = do
 393   (rlo,rhi) <- getNewRegPairNat I32
 394   let
 395         half0 = fromIntegral (fromIntegral i :: Word16)
 396         half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
 397         half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
 398         half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
 399
 400         code = toOL [
 401                 LIS rlo (ImmInt half1),
 402                 OR rlo rlo (RIImm $ ImmInt half0),
 403                 LIS rhi (ImmInt half3),
 404                 OR rlo rlo (RIImm $ ImmInt half2)
 405                 ]
 406   -- in
 407   return (ChildCode64 code rlo)
 408
 409 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
 410    ChildCode64 code1 r1lo <- iselExpr64 e1
 411    ChildCode64 code2 r2lo <- iselExpr64 e2
 412    (rlo,rhi) <- getNewRegPairNat I32
 413    let
 414         r1hi = getHiVRegFromLo r1lo
 415         r2hi = getHiVRegFromLo r2lo
 416         code =  code1 `appOL`
 417                 code2 `appOL`
 418                 toOL [ ADDC rlo r1lo r2lo,
 419                        ADDE rhi r1hi r2hi ]
 420    -- in
 421    return (ChildCode64 code rlo)
 422
 423 iselExpr64 expr
 424    = pprPanic "iselExpr64(powerpc)" (ppr expr)
 425
 426 #endif /* powerpc_TARGET_ARCH */
 427
 428
 429 -- -----------------------------------------------------------------------------
 430 -- The 'Register' type
 431
 432 -- 'Register's passed up the tree.  If the stix code forces the register
 433 -- to live in a pre-decided machine register, it comes out as @Fixed@;
 434 -- otherwise, it comes out as @Any@, and the parent can decide which
 435 -- register to put it in.
 436
 437 data Register
 438   = Fixed   MachRep Reg InstrBlock
 439   | Any     MachRep (Reg -> InstrBlock)
 440
 441 swizzleRegisterRep :: Register -> MachRep -> Register
 442 swizzleRegisterRep (Fixed _ reg code) rep = Fixed rep reg code
 443 swizzleRegisterRep (Any _ codefn)     rep = Any rep codefn
 444
 445
 446 -- -----------------------------------------------------------------------------
 447 -- Utils based on getRegister, below
 448
 449 -- The dual to getAnyReg: compute an expression into a register, but
 450 -- we don't mind which one it is.
 451 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
 452 getSomeReg expr = do
 453   r <- getRegister expr
 454   case r of
 455     Any rep code -> do
 456         tmp <- getNewRegNat rep
 457         return (tmp, code tmp)
 458     Fixed _ reg code ->
 459         return (reg, code)
 460
 461 -- -----------------------------------------------------------------------------
 462 -- Grab the Reg for a CmmReg
 463
 464 getRegisterReg :: CmmReg -> Reg
 465
 466 getRegisterReg (CmmLocal (LocalReg u pk))
 467   = mkVReg u pk
 468
 469 getRegisterReg (CmmGlobal mid)
 470   = case get_GlobalReg_reg_or_addr mid of
 471        Left (RealReg rrno) -> RealReg rrno
 472        _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
 473           -- By this stage, the only MagicIds remaining should be the
 474           -- ones which map to a real machine register on this
 475           -- platform.  Hence ...
 476
 477
 478 -- -----------------------------------------------------------------------------
 479 -- Generate code to get a subtree into a Register
 480
 481 -- Don't delete this -- it's very handy for debugging.
 482 --getRegister expr
 483 --   | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False
 484 --   = panic "getRegister(???)"
 485
 486 getRegister :: CmmExpr -> NatM Register
 487
 488 getRegister (CmmReg (CmmGlobal PicBaseReg))
 489   = do
 490       reg <- getPicBaseNat wordRep
 491       return (Fixed wordRep reg nilOL)
 492
 493 getRegister (CmmReg reg)
 494   = return (Fixed (cmmRegRep reg) (getRegisterReg reg) nilOL)
 495
 496 getRegister tree@(CmmRegOff _ _)
 497   = getRegister (mangleIndexTree tree)
 498
 499 -- end of machine-"independent" bit; here we go on the rest...
 500
 501 #if alpha_TARGET_ARCH
 502
 503 getRegister (StDouble d)
 504   = getBlockIdNat                   `thenNat` \ lbl ->
 505     getNewRegNat PtrRep             `thenNat` \ tmp ->
 506     let code dst = mkSeqInstrs [
 507             LDATA RoDataSegment lbl [
 508                     DATA TF [ImmLab (rational d)]
 509                 ],
 510             LDA tmp (AddrImm (ImmCLbl lbl)),
 511             LD TF dst (AddrReg tmp)]
 512     in
 513         return (Any F64 code)
 514
 515 getRegister (StPrim primop [x]) -- unary PrimOps
 516   = case primop of
 517       IntNegOp -> trivialUCode (NEG Q False) x
 518
 519       NotOp    -> trivialUCode NOT x
 520
 521       FloatNegOp  -> trivialUFCode FloatRep  (FNEG TF) x
 522       DoubleNegOp -> trivialUFCode F64 (FNEG TF) x
 523
 524       OrdOp -> coerceIntCode IntRep x
 525       ChrOp -> chrCode x
 526
 527       Float2IntOp  -> coerceFP2Int    x
 528       Int2FloatOp  -> coerceInt2FP pr x
 529       Double2IntOp -> coerceFP2Int    x
 530       Int2DoubleOp -> coerceInt2FP pr x
 531
 532       Double2FloatOp -> coerceFltCode x
 533       Float2DoubleOp -> coerceFltCode x
 534
 535       other_op -> getRegister (StCall fn CCallConv F64 [x])
 536         where
 537           fn = case other_op of
 538                  FloatExpOp    -> FSLIT("exp")
 539                  FloatLogOp    -> FSLIT("log")
 540                  FloatSqrtOp   -> FSLIT("sqrt")
 541                  FloatSinOp    -> FSLIT("sin")
 542                  FloatCosOp    -> FSLIT("cos")
 543                  FloatTanOp    -> FSLIT("tan")
 544                  FloatAsinOp   -> FSLIT("asin")
 545                  FloatAcosOp   -> FSLIT("acos")
 546                  FloatAtanOp   -> FSLIT("atan")
 547                  FloatSinhOp   -> FSLIT("sinh")
 548                  FloatCoshOp   -> FSLIT("cosh")
 549                  FloatTanhOp   -> FSLIT("tanh")
 550                  DoubleExpOp   -> FSLIT("exp")
 551                  DoubleLogOp   -> FSLIT("log")
 552                  DoubleSqrtOp  -> FSLIT("sqrt")
 553                  DoubleSinOp   -> FSLIT("sin")
 554                  DoubleCosOp   -> FSLIT("cos")
 555                  DoubleTanOp   -> FSLIT("tan")
 556                  DoubleAsinOp  -> FSLIT("asin")
 557                  DoubleAcosOp  -> FSLIT("acos")
 558                  DoubleAtanOp  -> FSLIT("atan")
 559                  DoubleSinhOp  -> FSLIT("sinh")
 560                  DoubleCoshOp  -> FSLIT("cosh")
 561                  DoubleTanhOp  -> FSLIT("tanh")
 562   where
 563     pr = panic "MachCode.getRegister: no primrep needed for Alpha"
 564
 565 getRegister (StPrim primop [x, y]) -- dyadic PrimOps
 566   = case primop of
 567       CharGtOp -> trivialCode (CMP LTT) y x
 568       CharGeOp -> trivialCode (CMP LE) y x
 569       CharEqOp -> trivialCode (CMP EQQ) x y
 570       CharNeOp -> int_NE_code x y
 571       CharLtOp -> trivialCode (CMP LTT) x y
 572       CharLeOp -> trivialCode (CMP LE) x y
 573
 574       IntGtOp  -> trivialCode (CMP LTT) y x
 575       IntGeOp  -> trivialCode (CMP LE) y x
 576       IntEqOp  -> trivialCode (CMP EQQ) x y
 577       IntNeOp  -> int_NE_code x y
 578       IntLtOp  -> trivialCode (CMP LTT) x y
 579       IntLeOp  -> trivialCode (CMP LE) x y
 580
 581       WordGtOp -> trivialCode (CMP ULT) y x
 582       WordGeOp -> trivialCode (CMP ULE) x y
 583       WordEqOp -> trivialCode (CMP EQQ)  x y
 584       WordNeOp -> int_NE_code x y
 585       WordLtOp -> trivialCode (CMP ULT) x y
 586       WordLeOp -> trivialCode (CMP ULE) x y
 587
 588       AddrGtOp -> trivialCode (CMP ULT) y x
 589       AddrGeOp -> trivialCode (CMP ULE) y x
 590       AddrEqOp -> trivialCode (CMP EQQ)  x y
 591       AddrNeOp -> int_NE_code x y
 592       AddrLtOp -> trivialCode (CMP ULT) x y
 593       AddrLeOp -> trivialCode (CMP ULE) x y
 594
 595       FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
 596       FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
 597       FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
 598       FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
 599       FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
 600       FloatLeOp -> cmpF_code (FCMP TF LE) NE x y
 601
 602       DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
 603       DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
 604       DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
 605       DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
 606       DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
 607       DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y
 608
 609       IntAddOp  -> trivialCode (ADD Q False) x y
 610       IntSubOp  -> trivialCode (SUB Q False) x y
 611       IntMulOp  -> trivialCode (MUL Q False) x y
 612       IntQuotOp -> trivialCode (DIV Q False) x y
 613       IntRemOp  -> trivialCode (REM Q False) x y
 614
 615       WordAddOp  -> trivialCode (ADD Q False) x y
 616       WordSubOp  -> trivialCode (SUB Q False) x y
 617       WordMulOp  -> trivialCode (MUL Q False) x y
 618       WordQuotOp -> trivialCode (DIV Q True) x y
 619       WordRemOp  -> trivialCode (REM Q True) x y
 620
 621       FloatAddOp -> trivialFCode  FloatRep (FADD TF) x y
 622       FloatSubOp -> trivialFCode  FloatRep (FSUB TF) x y
 623       FloatMulOp -> trivialFCode  FloatRep (FMUL TF) x y
 624       FloatDivOp -> trivialFCode  FloatRep (FDIV TF) x y
 625
 626       DoubleAddOp -> trivialFCode  F64 (FADD TF) x y
 627       DoubleSubOp -> trivialFCode  F64 (FSUB TF) x y
 628       DoubleMulOp -> trivialFCode  F64 (FMUL TF) x y
 629       DoubleDivOp -> trivialFCode  F64 (FDIV TF) x y
 630
 631       AddrAddOp  -> trivialCode (ADD Q False) x y
 632       AddrSubOp  -> trivialCode (SUB Q False) x y
 633       AddrRemOp  -> trivialCode (REM Q True) x y
 634
 635       AndOp  -> trivialCode AND x y
 636       OrOp   -> trivialCode OR  x y
 637       XorOp  -> trivialCode XOR x y
 638       SllOp  -> trivialCode SLL x y
 639       SrlOp  -> trivialCode SRL x y
 640
 641       ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
 642       ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
 643       ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"
 644
 645       FloatPowerOp  -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
 646       DoublePowerOp -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
 647   where
 648     {- ------------------------------------------------------------
 649         Some bizarre special code for getting condition codes into
 650         registers.  Integer non-equality is a test for equality
 651         followed by an XOR with 1.  (Integer comparisons always set
 652         the result register to 0 or 1.)  Floating point comparisons of
 653         any kind leave the result in a floating point register, so we
 654         need to wrangle an integer register out of things.
 655     -}
 656     int_NE_code :: StixTree -> StixTree -> NatM Register
 657
 658     int_NE_code x y
 659       = trivialCode (CMP EQQ) x y       `thenNat` \ register ->
 660         getNewRegNat IntRep             `thenNat` \ tmp ->
 661         let
 662             code = registerCode register tmp
 663             src  = registerName register tmp
 664             code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
 665         in
 666         return (Any IntRep code__2)
 667
 668     {- ------------------------------------------------------------
 669         Comments for int_NE_code also apply to cmpF_code
 670     -}
 671     cmpF_code
 672         :: (Reg -> Reg -> Reg -> Instr)
 673         -> Cond
 674         -> StixTree -> StixTree
 675         -> NatM Register
 676
 677     cmpF_code instr cond x y
 678       = trivialFCode pr instr x y       `thenNat` \ register ->
 679         getNewRegNat F64                `thenNat` \ tmp ->
 680         getBlockIdNat                   `thenNat` \ lbl ->
 681         let
 682             code = registerCode register tmp
 683             result  = registerName register tmp
 684
 685             code__2 dst = code . mkSeqInstrs [
 686                 OR zeroh (RIImm (ImmInt 1)) dst,
 687                 BF cond  result (ImmCLbl lbl),
 688                 OR zeroh (RIReg zeroh) dst,
 689                 NEWBLOCK lbl]
 690         in
 691         return (Any IntRep code__2)
 692       where
 693         pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
 694       ------------------------------------------------------------
 695
 696 getRegister (CmmLoad pk mem)
 697   = getAmode mem                    `thenNat` \ amode ->
 698     let
 699         code = amodeCode amode
 700         src   = amodeAddr amode
 701         size = primRepToSize pk
 702         code__2 dst = code . mkSeqInstr (LD size dst src)
 703     in
 704     return (Any pk code__2)
 705
 706 getRegister (StInt i)
 707   | fits8Bits i
 708   = let
 709         code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
 710     in
 711     return (Any IntRep code)
 712   | otherwise
 713   = let
 714         code dst = mkSeqInstr (LDI Q dst src)
 715     in
 716     return (Any IntRep code)
 717   where
 718     src = ImmInt (fromInteger i)
 719
 720 getRegister leaf
 721   | isJust imm
 722   = let
 723         code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
 724     in
 725     return (Any PtrRep code)
 726   where
 727     imm = maybeImm leaf
 728     imm__2 = case imm of Just x -> x
 729
 730 #endif /* alpha_TARGET_ARCH */
 731
 732 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 733
 734 #if i386_TARGET_ARCH
 735
 736 getRegister (CmmLit (CmmFloat f F32)) = do
 737     lbl <- getNewLabelNat
 738     let code dst = toOL [
 739             LDATA ReadOnlyData
 740                         [CmmDataLabel lbl,
 741                          CmmStaticLit (CmmFloat f F32)],
 742             GLD F32 (ImmAddr (ImmCLbl lbl) 0) dst
 743             ]
 744     -- in
 745     return (Any F32 code)
 746
 747
 748 getRegister (CmmLit (CmmFloat d F64))
 749   | d == 0.0
 750   = let code dst = unitOL (GLDZ dst)
 751     in  return (Any F64 code)
 752
 753   | d == 1.0
 754   = let code dst = unitOL (GLD1 dst)
 755     in  return (Any F64 code)
 756
 757   | otherwise = do
 758     lbl <- getNewLabelNat
 759     let code dst = toOL [
 760             LDATA ReadOnlyData
 761                         [CmmDataLabel lbl,
 762                          CmmStaticLit (CmmFloat d F64)],
 763             GLD F64 (ImmAddr (ImmCLbl lbl) 0) dst
 764             ]
 765     -- in
 766     return (Any F64 code)
 767
 768 #endif /* i386_TARGET_ARCH */
 769
 770 #if x86_64_TARGET_ARCH
 771
 772 getRegister (CmmLit (CmmFloat 0.0 rep)) = do
 773    let code dst = unitOL  (XOR rep (OpReg dst) (OpReg dst))
 774         -- I don't know why there are xorpd, xorps, and pxor instructions.
 775         -- They all appear to do the same thing --SDM
 776    return (Any rep code)
 777
 778 getRegister (CmmLit (CmmFloat f rep)) = do
 779     lbl <- getNewLabelNat
 780     let code dst = toOL [
 781             LDATA ReadOnlyData
 782                         [CmmDataLabel lbl,
 783                          CmmStaticLit (CmmFloat f rep)],
 784             MOV rep (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 785             ]
 786     -- in
 787     return (Any rep code)
 788
 789 #endif /* x86_64_TARGET_ARCH */
 790
 791 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
 792
 793 -- catch simple cases of zero- or sign-extended load
 794 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad addr _]) = do
 795   code <- intLoadCode (MOVZxL I8) addr
 796   return (Any I32 code)
 797
 798 getRegister (CmmMachOp (MO_S_Conv I8 I32) [CmmLoad addr _]) = do
 799   code <- intLoadCode (MOVSxL I8) addr
 800   return (Any I32 code)
 801
 802 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad addr _]) = do
 803   code <- intLoadCode (MOVZxL I16) addr
 804   return (Any I32 code)
 805
 806 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad addr _]) = do
 807   code <- intLoadCode (MOVSxL I16) addr
 808   return (Any I32 code)
 809
 810 #endif
 811
 812 #if x86_64_TARGET_ARCH
 813
 814 -- catch simple cases of zero- or sign-extended load
 815 getRegister (CmmMachOp (MO_U_Conv I8 I64) [CmmLoad addr _]) = do
 816   code <- intLoadCode (MOVZxL I8) addr
 817   return (Any I64 code)
 818
 819 getRegister (CmmMachOp (MO_S_Conv I8 I64) [CmmLoad addr _]) = do
 820   code <- intLoadCode (MOVSxL I8) addr
 821   return (Any I64 code)
 822
 823 getRegister (CmmMachOp (MO_U_Conv I16 I64) [CmmLoad addr _]) = do
 824   code <- intLoadCode (MOVZxL I16) addr
 825   return (Any I64 code)
 826
 827 getRegister (CmmMachOp (MO_S_Conv I16 I64) [CmmLoad addr _]) = do
 828   code <- intLoadCode (MOVSxL I16) addr
 829   return (Any I64 code)
 830
 831 getRegister (CmmMachOp (MO_U_Conv I32 I64) [CmmLoad addr _]) = do
 832   code <- intLoadCode (MOV I32) addr -- 32-bit loads zero-extend
 833   return (Any I64 code)
 834
 835 getRegister (CmmMachOp (MO_S_Conv I32 I64) [CmmLoad addr _]) = do
 836   code <- intLoadCode (MOVSxL I32) addr
 837   return (Any I64 code)
 838
 839 #endif
 840
 841 #if x86_64_TARGET_ARCH
 842 getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do
 843   x_code <- getAnyReg x
 844   lbl <- getNewLabelNat
 845   let
 846     code dst = x_code dst `appOL` toOL [
 847         -- This is how gcc does it, so it can't be that bad:
 848         LDATA ReadOnlyData16 [
 849                 CmmAlign 16,
 850                 CmmDataLabel lbl,
 851                 CmmStaticLit (CmmInt 0x80000000 I32),
 852                 CmmStaticLit (CmmInt 0 I32),
 853                 CmmStaticLit (CmmInt 0 I32),
 854                 CmmStaticLit (CmmInt 0 I32)
 855         ],
 856         XOR F32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 857                 -- xorps, so we need the 128-bit constant
 858                 -- ToDo: rip-relative
 859         ]
 860   --
 861   return (Any F32 code)
 862
 863 getRegister (CmmMachOp (MO_S_Neg F64) [x]) = do
 864   x_code <- getAnyReg x
 865   lbl <- getNewLabelNat
 866   let
 867         -- This is how gcc does it, so it can't be that bad:
 868     code dst = x_code dst `appOL` toOL [
 869         LDATA ReadOnlyData16 [
 870                 CmmAlign 16,
 871                 CmmDataLabel lbl,
 872                 CmmStaticLit (CmmInt 0x8000000000000000 I64),
 873                 CmmStaticLit (CmmInt 0 I64)
 874         ],
 875                 -- gcc puts an unpck here.  Wonder if we need it.
 876         XOR F64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 877                 -- xorpd, so we need the 128-bit constant
 878         ]
 879   --
 880   return (Any F64 code)
 881 #endif
 882
 883 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
 884
 885 getRegister (CmmMachOp mop [x]) -- unary MachOps
 886   = case mop of
 887 #if i386_TARGET_ARCH
 888       MO_S_Neg F32 -> trivialUFCode F32 (GNEG F32) x
 889       MO_S_Neg F64 -> trivialUFCode F64 (GNEG F64) x
 890 #endif
 891
 892       MO_S_Neg rep -> trivialUCode rep (NEGI rep) x
 893       MO_Not rep   -> trivialUCode rep (NOT  rep) x
 894
 895       -- Nop conversions
 896       -- TODO: these are only nops if the arg is not a fixed register that
 897       -- can't be byte-addressed.
 898       MO_U_Conv I32 I8  -> conversionNop I32 x
 899       MO_S_Conv I32 I8  -> conversionNop I32 x
 900       MO_U_Conv I16 I8  -> conversionNop I16 x
 901       MO_S_Conv I16 I8  -> conversionNop I16 x
 902       MO_U_Conv I32 I16 -> conversionNop I32 x
 903       MO_S_Conv I32 I16 -> conversionNop I32 x
 904 #if x86_64_TARGET_ARCH
 905       MO_U_Conv I64 I32 -> conversionNop I64 x
 906       MO_S_Conv I64 I32 -> conversionNop I64 x
 907       MO_U_Conv I64 I16 -> conversionNop I64 x
 908       MO_S_Conv I64 I16 -> conversionNop I64 x
 909       MO_U_Conv I64 I8  -> conversionNop I64 x
 910       MO_S_Conv I64 I8  -> conversionNop I64 x
 911 #endif
 912
 913       MO_U_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
 914       MO_S_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
 915
 916       -- widenings
 917       MO_U_Conv I8  I32 -> integerExtend I8  I32 MOVZxL x
 918       MO_U_Conv I16 I32 -> integerExtend I16 I32 MOVZxL x
 919       MO_U_Conv I8  I16 -> integerExtend I8  I16 MOVZxL x
 920
 921       MO_S_Conv I8  I32 -> integerExtend I8  I32 MOVSxL x
 922       MO_S_Conv I16 I32 -> integerExtend I16 I32 MOVSxL x
 923       MO_S_Conv I8  I16 -> integerExtend I8  I16 MOVSxL x
 924
 925 #if x86_64_TARGET_ARCH
 926       MO_U_Conv I8  I64 -> integerExtend I8  I64 MOVZxL x
 927       MO_U_Conv I16 I64 -> integerExtend I16 I64 MOVZxL x
 928       MO_U_Conv I32 I64 -> integerExtend I32 I64 MOVZxL x
 929       MO_S_Conv I8  I64 -> integerExtend I8  I64 MOVSxL x
 930       MO_S_Conv I16 I64 -> integerExtend I16 I64 MOVSxL x
 931       MO_S_Conv I32 I64 -> integerExtend I32 I64 MOVSxL x
 932         -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
 933         -- However, we don't want the register allocator to throw it
 934         -- away as an unnecessary reg-to-reg move, so we keep it in
 935         -- the form of a movzl and print it as a movl later.
 936 #endif
 937
 938 #if i386_TARGET_ARCH
 939       MO_S_Conv F32 F64 -> conversionNop F64 x
 940       MO_S_Conv F64 F32 -> conversionNop F32 x
 941 #else
 942       MO_S_Conv F32 F64 -> coerceFP2FP F64 x
 943       MO_S_Conv F64 F32 -> coerceFP2FP F32 x
 944 #endif
 945
 946       MO_S_Conv from to
 947         | isFloatingRep from -> coerceFP2Int from to x
 948         | isFloatingRep to   -> coerceInt2FP from to x
 949
 950       other -> pprPanic "getRegister" (pprMachOp mop)
 951    where
 952         -- signed or unsigned extension.
 953         integerExtend from to instr expr = do
 954             (reg,e_code) <- if from == I8 then getByteReg expr
 955                                           else getSomeReg expr
 956             let
 957                 code dst =
 958                   e_code `snocOL`
 959                   instr from (OpReg reg) (OpReg dst)
 960             return (Any to code)
 961
 962         conversionNop new_rep expr
 963             = do e_code <- getRegister expr
 964                  return (swizzleRegisterRep e_code new_rep)
 965
 966
 967 getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
 968   = ASSERT2(cmmExprRep x /= I8, pprExpr e)
 969     case mop of
 970       MO_Eq F32   -> condFltReg EQQ x y
 971       MO_Ne F32   -> condFltReg NE x y
 972       MO_S_Gt F32 -> condFltReg GTT x y
 973       MO_S_Ge F32 -> condFltReg GE x y
 974       MO_S_Lt F32 -> condFltReg LTT x y
 975       MO_S_Le F32 -> condFltReg LE x y
 976
 977       MO_Eq F64   -> condFltReg EQQ x y
 978       MO_Ne F64   -> condFltReg NE x y
 979       MO_S_Gt F64 -> condFltReg GTT x y
 980       MO_S_Ge F64 -> condFltReg GE x y
 981       MO_S_Lt F64 -> condFltReg LTT x y
 982       MO_S_Le F64 -> condFltReg LE x y
 983
 984       MO_Eq rep   -> condIntReg EQQ x y
 985       MO_Ne rep   -> condIntReg NE x y
 986
 987       MO_S_Gt rep -> condIntReg GTT x y
 988       MO_S_Ge rep -> condIntReg GE x y
 989       MO_S_Lt rep -> condIntReg LTT x y
 990       MO_S_Le rep -> condIntReg LE x y
 991
 992       MO_U_Gt rep -> condIntReg GU  x y
 993       MO_U_Ge rep -> condIntReg GEU x y
 994       MO_U_Lt rep -> condIntReg LU  x y
 995       MO_U_Le rep -> condIntReg LEU x y
 996
 997 #if i386_TARGET_ARCH
 998       MO_Add F32 -> trivialFCode F32 GADD x y
 999       MO_Sub F32 -> trivialFCode F32 GSUB x y
1000
1001       MO_Add F64 -> trivialFCode F64 GADD x y
1002       MO_Sub F64 -> trivialFCode F64 GSUB x y
1003
1004       MO_S_Quot F32 -> trivialFCode F32 GDIV x y
1005       MO_S_Quot F64 -> trivialFCode F64 GDIV x y
1006 #endif
1007
1008 #if x86_64_TARGET_ARCH
1009       MO_Add F32 -> trivialFCode F32 ADD x y
1010       MO_Sub F32 -> trivialFCode F32 SUB x y
1011
1012       MO_Add F64 -> trivialFCode F64 ADD x y
1013       MO_Sub F64 -> trivialFCode F64 SUB x y
1014
1015       MO_S_Quot F32 -> trivialFCode F32 FDIV x y
1016       MO_S_Quot F64 -> trivialFCode F64 FDIV x y
1017 #endif
1018
1019       MO_Add rep -> add_code rep x y
1020       MO_Sub rep -> sub_code rep x y
1021
1022       MO_S_Quot rep -> div_code rep True  True  x y
1023       MO_S_Rem  rep -> div_code rep True  False x y
1024       MO_U_Quot rep -> div_code rep False True  x y
1025       MO_U_Rem  rep -> div_code rep False False x y
1026
1027 #if i386_TARGET_ARCH
1028       MO_Mul   F32 -> trivialFCode F32 GMUL x y
1029       MO_Mul   F64 -> trivialFCode F64 GMUL x y
1030 #endif
1031
1032 #if x86_64_TARGET_ARCH
1033       MO_Mul   F32 -> trivialFCode F32 MUL x y
1034       MO_Mul   F64 -> trivialFCode F64 MUL x y
1035 #endif
1036
1037       MO_Mul   rep -> let op = IMUL rep in
1038                       trivialCode rep op (Just op) x y
1039
1040       MO_S_MulMayOflo rep -> imulMayOflo rep x y
1041
1042       MO_And rep -> let op = AND rep in
1043                     trivialCode rep op (Just op) x y
1044       MO_Or  rep -> let op = OR  rep in
1045                     trivialCode rep op (Just op) x y
1046       MO_Xor rep -> let op = XOR rep in
1047                     trivialCode rep op (Just op) x y
1048
1049         {- Shift ops on x86s have constraints on their source, it
1050            either has to be Imm, CL or 1
1051             => trivialCode is not restrictive enough (sigh.)
1052         -}
1053       MO_Shl rep   -> shift_code rep (SHL rep) x y {-False-}
1054       MO_U_Shr rep -> shift_code rep (SHR rep) x y {-False-}
1055       MO_S_Shr rep -> shift_code rep (SAR rep) x y {-False-}
1056
1057       other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
1058   where
1059     --------------------
1060     imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1061     imulMayOflo rep a b = do
1062          (a_reg, a_code) <- getNonClobberedReg a
1063          b_code <- getAnyReg b
1064          let
1065              shift_amt  = case rep of
1066                            I32 -> 31
1067                            I64 -> 63
1068                            _ -> panic "shift_amt"
1069
1070              code = a_code `appOL` b_code eax `appOL`
1071                         toOL [
1072                            IMUL2 rep (OpReg a_reg),   -- result in %edx:%eax
1073                            SAR rep (OpImm (ImmInt shift_amt)) (OpReg eax),
1074                                 -- sign extend lower part
1075                            SUB rep (OpReg edx) (OpReg eax)
1076                                 -- compare against upper
1077                            -- eax==0 if high part == sign extended low part
1078                         ]
1079          -- in
1080          return (Fixed rep eax code)
1081
1082     --------------------
1083     shift_code :: MachRep
1084                -> (Operand -> Operand -> Instr)
1085                -> CmmExpr
1086                -> CmmExpr
1087                -> NatM Register
1088
1089     {- Case1: shift length as immediate -}
1090     shift_code rep instr x y@(CmmLit lit) = do
1091           x_code <- getAnyReg x
1092           let
1093                code dst
1094                   = x_code dst `snocOL`
1095                     instr (OpImm (litToImm lit)) (OpReg dst)
1096           -- in
1097           return (Any rep code)
1098
1099     {- Case2: shift length is complex (non-immediate) -}
1100     shift_code rep instr x y{-amount-} = do
1101         (x_reg, x_code) <- getNonClobberedReg x
1102         y_code <- getAnyReg y
1103         let
1104            code = x_code `appOL`
1105                   y_code ecx `snocOL`
1106                   instr (OpReg ecx) (OpReg x_reg)
1107         -- in
1108         return (Fixed rep x_reg code)
1109
1110     --------------------
1111     add_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1112     add_code rep x (CmmLit (CmmInt y _)) = add_int rep x y
1113     add_code rep x y = trivialCode rep (ADD rep) (Just (ADD rep)) x y
1114
1115     --------------------
1116     sub_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1117     sub_code rep x (CmmLit (CmmInt y _)) = add_int rep x (-y)
1118     sub_code rep x y = trivialCode rep (SUB rep) Nothing x y
1119
1120     -- our three-operand add instruction:
1121     add_int rep x y = do
1122         (x_reg, x_code) <- getSomeReg x
1123         let
1124             imm = ImmInt (fromInteger y)
1125             code dst
1126                = x_code `snocOL`
1127                  LEA rep
1128                         (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
1129                         (OpReg dst)
1130         --
1131         return (Any rep code)
1132
1133     ----------------------
1134     div_code rep signed quotient x y = do
1135            (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
1136            x_code <- getAnyReg x
1137            let
1138              widen | signed    = CLTD rep
1139                    | otherwise = XOR rep (OpReg edx) (OpReg edx)
1140
1141              instr | signed    = IDIV
1142                    | otherwise = DIV
1143
1144              code = y_code `appOL`
1145                     x_code eax `appOL`
1146                     toOL [widen, instr rep y_op]
1147
1148              result | quotient  = eax
1149                     | otherwise = edx
1150
1151            -- in
1152            return (Fixed rep result code)
1153
1154
1155 getRegister (CmmLoad mem pk)
1156   | isFloatingRep pk
1157   = do
1158     Amode src mem_code <- getAmode mem
1159     let
1160         code dst = mem_code `snocOL`
1161                    IF_ARCH_i386(GLD pk src dst,
1162                                 MOV pk (OpAddr src) (OpReg dst))
1163     --
1164     return (Any pk code)
1165
1166 #if i386_TARGET_ARCH
1167 getRegister (CmmLoad mem pk)
1168   | pk /= I64
1169   = do
1170     code <- intLoadCode (instr pk) mem
1171     return (Any pk code)
1172   where
1173         instr I8  = MOVZxL pk
1174         instr I16 = MOV I16
1175         instr I32 = MOV I32
1176         -- we always zero-extend 8-bit loads, if we
1177         -- can't think of anything better.  This is because
1178         -- we can't guarantee access to an 8-bit variant of every register
1179         -- (esi and edi don't have 8-bit variants), so to make things
1180         -- simpler we do our 8-bit arithmetic with full 32-bit registers.
1181 #endif
1182
1183 #if x86_64_TARGET_ARCH
1184 -- Simpler memory load code on x86_64
1185 getRegister (CmmLoad mem pk)
1186   = do
1187     code <- intLoadCode (MOV pk) mem
1188     return (Any pk code)
1189 #endif
1190
1191 getRegister (CmmLit (CmmInt 0 rep))
1192   = let
1193         -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
1194         adj_rep = case rep of I64 -> I32; _ -> rep
1195         rep1 = IF_ARCH_i386( rep, adj_rep )
1196         code dst
1197            = unitOL (XOR rep1 (OpReg dst) (OpReg dst))
1198     in
1199         return (Any rep code)
1200
1201 #if x86_64_TARGET_ARCH
1202   -- optimisation for loading small literals on x86_64: take advantage
1203   -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
1204   -- instruction forms are shorter.
1205 getRegister (CmmLit lit)
1206   | I64 <- cmmLitRep lit, not (isBigLit lit)
1207   = let
1208         imm = litToImm lit
1209         code dst = unitOL (MOV I32 (OpImm imm) (OpReg dst))
1210     in
1211         return (Any I64 code)
1212   where
1213    isBigLit (CmmInt i I64) = i < 0 || i > 0xffffffff
1214    isBigLit _ = False
1215         -- note1: not the same as is64BitLit, because that checks for
1216         -- signed literals that fit in 32 bits, but we want unsigned
1217         -- literals here.
1218         -- note2: all labels are small, because we're assuming the
1219         -- small memory model (see gcc docs, -mcmodel=small).
1220 #endif
1221
1222 getRegister (CmmLit lit)
1223   = let
1224         rep = cmmLitRep lit
1225         imm = litToImm lit
1226         code dst = unitOL (MOV rep (OpImm imm) (OpReg dst))
1227     in
1228         return (Any rep code)
1229
1230 getRegister other = pprPanic "getRegister(x86)" (ppr other)
1231
1232
1233 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
1234    -> NatM (Reg -> InstrBlock)
1235 intLoadCode instr mem = do
1236   Amode src mem_code <- getAmode mem
1237   return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
1238
1239 -- Compute an expression into *any* register, adding the appropriate
1240 -- move instruction if necessary.
1241 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
1242 getAnyReg expr = do
1243   r <- getRegister expr
1244   anyReg r
1245
1246 anyReg :: Register -> NatM (Reg -> InstrBlock)
1247 anyReg (Any _ code)          = return code
1248 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
1249
1250 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
1251 -- Fixed registers might not be byte-addressable, so we make sure we've
1252 -- got a temporary, inserting an extra reg copy if necessary.
1253 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
1254 #if x86_64_TARGET_ARCH
1255 getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
1256 #else
1257 getByteReg expr = do
1258   r <- getRegister expr
1259   case r of
1260     Any rep code -> do
1261         tmp <- getNewRegNat rep
1262         return (tmp, code tmp)
1263     Fixed rep reg code
1264         | isVirtualReg reg -> return (reg,code)
1265         | otherwise -> do
1266             tmp <- getNewRegNat rep
1267             return (tmp, code `snocOL` reg2reg rep reg tmp)
1268         -- ToDo: could optimise slightly by checking for byte-addressable
1269         -- real registers, but that will happen very rarely if at all.
1270 #endif
1271
1272 -- Another variant: this time we want the result in a register that cannot
1273 -- be modified by code to evaluate an arbitrary expression.
1274 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1275 getNonClobberedReg expr = do
1276   r <- getRegister expr
1277   case r of
1278     Any rep code -> do
1279         tmp <- getNewRegNat rep
1280         return (tmp, code tmp)
1281     Fixed rep reg code
1282         -- only free regs can be clobbered
1283         | RealReg rr <- reg, isFastTrue (freeReg rr) -> do
1284                 tmp <- getNewRegNat rep
1285                 return (tmp, code `snocOL` reg2reg rep reg tmp)
1286         | otherwise ->
1287                 return (reg, code)
1288
1289 reg2reg :: MachRep -> Reg -> Reg -> Instr
1290 reg2reg rep src dst
1291 #if i386_TARGET_ARCH
1292   | isFloatingRep rep = GMOV src dst
1293 #endif
1294   | otherwise         = MOV rep (OpReg src) (OpReg dst)
1295
1296 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1297
1298 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1299
1300 #if sparc_TARGET_ARCH
1301
1302 getRegister (CmmLit (CmmFloat f F32)) = do
1303     lbl <- getNewLabelNat
1304     let code dst = toOL [
1305             LDATA ReadOnlyData
1306                         [CmmDataLabel lbl,
1307                          CmmStaticLit (CmmFloat f F32)],
1308             SETHI (HI (ImmCLbl lbl)) dst,
1309             LD F32 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1310     return (Any F32 code)
1311
1312 getRegister (CmmLit (CmmFloat d F64)) = do
1313     lbl <- getNewLabelNat
1314     let code dst = toOL [
1315             LDATA ReadOnlyData
1316                         [CmmDataLabel lbl,
1317                          CmmStaticLit (CmmFloat d F64)],
1318             SETHI (HI (ImmCLbl lbl)) dst,
1319             LD F64 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1320     return (Any F64 code)
1321
1322 getRegister (CmmMachOp mop [x]) -- unary MachOps
1323   = case mop of
1324       MO_S_Neg F32     -> trivialUFCode F32 (FNEG F32) x
1325       MO_S_Neg F64     -> trivialUFCode F64 (FNEG F64) x
1326
1327       MO_S_Neg rep     -> trivialUCode rep (SUB False False g0) x
1328       MO_Not rep       -> trivialUCode rep (XNOR False g0) x
1329
1330       MO_U_Conv I32 I8 -> trivialCode I8 (AND False) x (CmmLit (CmmInt 255 I8))
1331
1332       MO_U_Conv F64 F32-> coerceDbl2Flt x
1333       MO_U_Conv F32 F64-> coerceFlt2Dbl x
1334
1335       MO_S_Conv F32 I32-> coerceFP2Int F32 I32 x
1336       MO_S_Conv I32 F32-> coerceInt2FP I32 F32 x
1337       MO_S_Conv F64 I32-> coerceFP2Int F64 I32 x
1338       MO_S_Conv I32 F64-> coerceInt2FP I32 F64 x
1339
1340       -- Conversions which are a nop on sparc
1341       MO_U_Conv from to
1342         | from == to   -> conversionNop to   x
1343       MO_U_Conv I32 to -> conversionNop to   x
1344       MO_S_Conv I32 to -> conversionNop to   x
1345
1346       -- widenings
1347       MO_U_Conv I8 I32  -> integerExtend False I8 I32  x
1348       MO_U_Conv I16 I32 -> integerExtend False I16 I32 x
1349       MO_U_Conv I8 I16  -> integerExtend False I8 I16  x
1350       MO_S_Conv I16 I32 -> integerExtend True I16 I32  x
1351
1352       other_op -> panic "Unknown unary mach op"
1353     where
1354         -- XXX SLL/SRL?
1355         integerExtend signed from to expr = do
1356            (reg, e_code) <- getSomeReg expr
1357            let
1358                code dst =
1359                    e_code `snocOL`
1360                    ((if signed then SRA else SRL)
1361                           reg (RIImm (ImmInt 0)) dst)
1362            return (Any to code)
1363         conversionNop new_rep expr
1364             = do e_code <- getRegister expr
1365                  return (swizzleRegisterRep e_code new_rep)
1366
1367 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1368   = case mop of
1369       MO_Eq F32 -> condFltReg EQQ x y
1370       MO_Ne F32 -> condFltReg NE x y
1371
1372       MO_S_Gt F32 -> condFltReg GTT x y
1373       MO_S_Ge F32 -> condFltReg GE x y
1374       MO_S_Lt F32 -> condFltReg LTT x y
1375       MO_S_Le F32 -> condFltReg LE x y
1376
1377       MO_Eq F64 -> condFltReg EQQ x y
1378       MO_Ne F64 -> condFltReg NE x y
1379
1380       MO_S_Gt F64 -> condFltReg GTT x y
1381       MO_S_Ge F64 -> condFltReg GE x y
1382       MO_S_Lt F64 -> condFltReg LTT x y
1383       MO_S_Le F64 -> condFltReg LE x y
1384
1385       MO_Eq rep -> condIntReg EQQ x y
1386       MO_Ne rep -> condIntReg NE x y
1387
1388       MO_S_Gt rep -> condIntReg GTT x y
1389       MO_S_Ge rep -> condIntReg GE x y
1390       MO_S_Lt rep -> condIntReg LTT x y
1391       MO_S_Le rep -> condIntReg LE x y
1392
1393       MO_U_Gt I32  -> condIntReg GTT x y
1394       MO_U_Ge I32  -> condIntReg GE x y
1395       MO_U_Lt I32  -> condIntReg LTT x y
1396       MO_U_Le I32  -> condIntReg LE x y
1397
1398       MO_U_Gt I16 -> condIntReg GU  x y
1399       MO_U_Ge I16 -> condIntReg GEU x y
1400       MO_U_Lt I16 -> condIntReg LU  x y
1401       MO_U_Le I16 -> condIntReg LEU x y
1402
1403       MO_Add I32 -> trivialCode I32 (ADD False False) x y
1404       MO_Sub I32 -> trivialCode I32 (SUB False False) x y
1405
1406       MO_S_MulMayOflo rep -> imulMayOflo rep x y
1407 {-
1408       -- ToDo: teach about V8+ SPARC div instructions
1409       MO_S_Quot I32 -> idiv FSLIT(".div")  x y
1410       MO_S_Rem I32  -> idiv FSLIT(".rem")  x y
1411       MO_U_Quot I32 -> idiv FSLIT(".udiv")  x y
1412       MO_U_Rem I32  -> idiv FSLIT(".urem")  x y
1413 -}
1414       MO_Add F32  -> trivialFCode F32 FADD  x y
1415       MO_Sub F32   -> trivialFCode F32  FSUB x y
1416       MO_Mul F32   -> trivialFCode F32  FMUL  x y
1417       MO_S_Quot F32   -> trivialFCode F32  FDIV x y
1418
1419       MO_Add F64   -> trivialFCode F64 FADD  x y
1420       MO_Sub F64   -> trivialFCode F64  FSUB x y
1421       MO_Mul F64   -> trivialFCode F64  FMUL x y
1422       MO_S_Quot F64   -> trivialFCode F64  FDIV x y
1423
1424       MO_And rep   -> trivialCode rep (AND False) x y
1425       MO_Or rep    -> trivialCode rep (OR  False) x y
1426       MO_Xor rep   -> trivialCode rep (XOR False) x y
1427
1428       MO_Mul rep -> trivialCode rep (SMUL False) x y
1429
1430       MO_Shl rep   -> trivialCode rep SLL  x y
1431       MO_U_Shr rep   -> trivialCode rep SRL x y
1432       MO_S_Shr rep   -> trivialCode rep SRA x y
1433
1434 {-
1435       MO_F32_Pwr  -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1436                                          [promote x, promote y])
1437                        where promote x = CmmMachOp MO_F32_to_Dbl [x]
1438       MO_F64_Pwr -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1439                                         [x, y])
1440 -}
1441       other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
1442   where
1443     --idiv fn x y = getRegister (StCall (Left fn) CCallConv I32 [x, y])
1444
1445     --------------------
1446     imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1447     imulMayOflo rep a b = do
1448          (a_reg, a_code) <- getSomeReg a
1449          (b_reg, b_code) <- getSomeReg b
1450          res_lo <- getNewRegNat I32
1451          res_hi <- getNewRegNat I32
1452          let
1453             shift_amt  = case rep of
1454                           I32 -> 31
1455                           I64 -> 63
1456                           _ -> panic "shift_amt"
1457             code dst = a_code `appOL` b_code `appOL`
1458                        toOL [
1459                            SMUL False a_reg (RIReg b_reg) res_lo,
1460                            RDY res_hi,
1461                            SRA res_lo (RIImm (ImmInt shift_amt)) res_lo,
1462                            SUB False False res_lo (RIReg res_hi) dst
1463                         ]
1464          return (Any I32 code)
1465
1466 getRegister (CmmLoad mem pk) = do
1467     Amode src code <- getAmode mem
1468     let
1469         code__2 dst = code `snocOL` LD pk src dst
1470     return (Any pk code__2)
1471
1472 getRegister (CmmLit (CmmInt i _))
1473   | fits13Bits i
1474   = let
1475         src = ImmInt (fromInteger i)
1476         code dst = unitOL (OR False g0 (RIImm src) dst)
1477     in
1478         return (Any I32 code)
1479
1480 getRegister (CmmLit lit)
1481   = let rep = cmmLitRep lit
1482         imm = litToImm lit
1483         code dst = toOL [
1484             SETHI (HI imm) dst,
1485             OR False dst (RIImm (LO imm)) dst]
1486     in return (Any I32 code)
1487
1488 #endif /* sparc_TARGET_ARCH */
1489
1490 #if powerpc_TARGET_ARCH
1491 getRegister (CmmLoad mem pk)
1492   | pk /= I64
1493   = do
1494         Amode addr addr_code <- getAmode mem
1495         let code dst = ASSERT((regClass dst == RcDouble) == isFloatingRep pk)
1496                        addr_code `snocOL` LD pk dst addr
1497         return (Any pk code)
1498
1499 -- catch simple cases of zero- or sign-extended load
1500 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad mem _]) = do
1501     Amode addr addr_code <- getAmode mem
1502     return (Any I32 (\dst -> addr_code `snocOL` LD I8 dst addr))
1503
1504 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
1505
1506 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad mem _]) = do
1507     Amode addr addr_code <- getAmode mem
1508     return (Any I32 (\dst -> addr_code `snocOL` LD I16 dst addr))
1509
1510 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad mem _]) = do
1511     Amode addr addr_code <- getAmode mem
1512     return (Any I32 (\dst -> addr_code `snocOL` LA I16 dst addr))
1513
1514 getRegister (CmmMachOp mop [x]) -- unary MachOps
1515   = case mop of
1516       MO_Not rep   -> trivialUCode rep NOT x
1517
1518       MO_S_Conv F64 F32 -> trivialUCode F32 FRSP x
1519       MO_S_Conv F32 F64 -> conversionNop F64 x
1520
1521       MO_S_Conv from to
1522         | from == to         -> conversionNop to x
1523         | isFloatingRep from -> coerceFP2Int from to x
1524         | isFloatingRep to   -> coerceInt2FP from to x
1525
1526         -- narrowing is a nop: we treat the high bits as undefined
1527       MO_S_Conv I32 to -> conversionNop to x
1528       MO_S_Conv I16 I8 -> conversionNop I8 x
1529       MO_S_Conv I8 to -> trivialUCode to (EXTS I8) x
1530       MO_S_Conv I16 to -> trivialUCode to (EXTS I16) x
1531
1532       MO_U_Conv from to
1533         | from == to -> conversionNop to x
1534         -- narrowing is a nop: we treat the high bits as undefined
1535       MO_U_Conv I32 to -> conversionNop to x
1536       MO_U_Conv I16 I8 -> conversionNop I8 x
1537       MO_U_Conv I8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 I32))
1538       MO_U_Conv I16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 I32))
1539
1540       MO_S_Neg F32      -> trivialUCode F32 FNEG x
1541       MO_S_Neg F64      -> trivialUCode F64 FNEG x
1542       MO_S_Neg rep      -> trivialUCode rep NEG x
1543
1544     where
1545         conversionNop new_rep expr
1546             = do e_code <- getRegister expr
1547                  return (swizzleRegisterRep e_code new_rep)
1548
1549 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1550   = case mop of
1551       MO_Eq F32 -> condFltReg EQQ x y
1552       MO_Ne F32 -> condFltReg NE  x y
1553
1554       MO_S_Gt F32 -> condFltReg GTT x y
1555       MO_S_Ge F32 -> condFltReg GE  x y
1556       MO_S_Lt F32 -> condFltReg LTT x y
1557       MO_S_Le F32 -> condFltReg LE  x y
1558
1559       MO_Eq F64 -> condFltReg EQQ x y
1560       MO_Ne F64 -> condFltReg NE  x y
1561
1562       MO_S_Gt F64 -> condFltReg GTT x y
1563       MO_S_Ge F64 -> condFltReg GE  x y
1564       MO_S_Lt F64 -> condFltReg LTT x y
1565       MO_S_Le F64 -> condFltReg LE  x y
1566
1567       MO_Eq rep -> condIntReg EQQ  (extendUExpr rep x) (extendUExpr rep y)
1568       MO_Ne rep -> condIntReg NE   (extendUExpr rep x) (extendUExpr rep y)
1569
1570       MO_S_Gt rep -> condIntReg GTT  (extendSExpr rep x) (extendSExpr rep y)
1571       MO_S_Ge rep -> condIntReg GE   (extendSExpr rep x) (extendSExpr rep y)
1572       MO_S_Lt rep -> condIntReg LTT  (extendSExpr rep x) (extendSExpr rep y)
1573       MO_S_Le rep -> condIntReg LE   (extendSExpr rep x) (extendSExpr rep y)
1574
1575       MO_U_Gt rep -> condIntReg GU   (extendUExpr rep x) (extendUExpr rep y)
1576       MO_U_Ge rep -> condIntReg GEU  (extendUExpr rep x) (extendUExpr rep y)
1577       MO_U_Lt rep -> condIntReg LU   (extendUExpr rep x) (extendUExpr rep y)
1578       MO_U_Le rep -> condIntReg LEU  (extendUExpr rep x) (extendUExpr rep y)
1579
1580       MO_Add F32   -> trivialCodeNoImm F32 (FADD F32) x y
1581       MO_Sub F32   -> trivialCodeNoImm F32 (FSUB F32) x y
1582       MO_Mul F32   -> trivialCodeNoImm F32 (FMUL F32) x y
1583       MO_S_Quot F32   -> trivialCodeNoImm F32 (FDIV F32) x y
1584
1585       MO_Add F64   -> trivialCodeNoImm F64 (FADD F64) x y
1586       MO_Sub F64   -> trivialCodeNoImm F64 (FSUB F64) x y
1587       MO_Mul F64   -> trivialCodeNoImm F64 (FMUL F64) x y
1588       MO_S_Quot F64   -> trivialCodeNoImm F64 (FDIV F64) x y
1589
1590          -- optimize addition with 32-bit immediate
1591          -- (needed for PIC)
1592       MO_Add I32 ->
1593         case y of
1594           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate I32 True (-imm)
1595             -> trivialCode I32 True ADD x (CmmLit $ CmmInt imm immrep)
1596           CmmLit lit
1597             -> do
1598                 (src, srcCode) <- getSomeReg x
1599                 let imm = litToImm lit
1600                     code dst = srcCode `appOL` toOL [
1601                                     ADDIS dst src (HA imm),
1602                                     ADD dst dst (RIImm (LO imm))
1603                                 ]
1604                 return (Any I32 code)
1605           _ -> trivialCode I32 True ADD x y
1606
1607       MO_Add rep -> trivialCode rep True ADD x y
1608       MO_Sub rep ->
1609         case y of    -- subfi ('substract from' with immediate) doesn't exist
1610           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
1611             -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
1612           _ -> trivialCodeNoImm rep SUBF y x
1613
1614       MO_Mul rep -> trivialCode rep True MULLW x y
1615
1616       MO_S_MulMayOflo I32 -> trivialCodeNoImm I32 MULLW_MayOflo x y
1617
1618       MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= I32): not implemented"
1619       MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
1620
1621       MO_S_Quot rep -> trivialCodeNoImm rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1622       MO_U_Quot rep -> trivialCodeNoImm rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1623
1624       MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1625       MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1626
1627       MO_And rep   -> trivialCode rep False AND x y
1628       MO_Or rep    -> trivialCode rep False OR x y
1629       MO_Xor rep   -> trivialCode rep False XOR x y
1630
1631       MO_Shl rep   -> trivialCode rep False SLW x y
1632       MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
1633       MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
1634
1635 getRegister (CmmLit (CmmInt i rep))
1636   | Just imm <- makeImmediate rep True i
1637   = let
1638         code dst = unitOL (LI dst imm)
1639     in
1640         return (Any rep code)
1641
1642 getRegister (CmmLit (CmmFloat f frep)) = do
1643     lbl <- getNewLabelNat
1644     dynRef <- cmmMakeDynamicReference addImportNat False lbl
1645     Amode addr addr_code <- getAmode dynRef
1646     let code dst =
1647             LDATA ReadOnlyData  [CmmDataLabel lbl,
1648                                  CmmStaticLit (CmmFloat f frep)]
1649             `consOL` (addr_code `snocOL` LD frep dst addr)
1650     return (Any frep code)
1651
1652 getRegister (CmmLit lit)
1653   = let rep = cmmLitRep lit
1654         imm = litToImm lit
1655         code dst = toOL [
1656               LIS dst (HI imm),
1657               OR dst dst (RIImm (LO imm))
1658           ]
1659     in return (Any rep code)
1660
1661 getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
1662
1663     -- extend?Rep: wrap integer expression of type rep
1664     -- in a conversion to I32
1665 extendSExpr I32 x = x
1666 extendSExpr rep x = CmmMachOp (MO_S_Conv rep I32) [x]
1667 extendUExpr I32 x = x
1668 extendUExpr rep x = CmmMachOp (MO_U_Conv rep I32) [x]
1669
1670 #endif /* powerpc_TARGET_ARCH */
1671
1672
1673 -- -----------------------------------------------------------------------------
1674 --  The 'Amode' type: Memory addressing modes passed up the tree.
1675
1676 data Amode = Amode AddrMode InstrBlock
1677
1678 {-
1679 Now, given a tree (the argument to an CmmLoad) that references memory,
1680 produce a suitable addressing mode.
1681
1682 A Rule of the Game (tm) for Amodes: use of the addr bit must
1683 immediately follow use of the code part, since the code part puts
1684 values in registers which the addr then refers to.  So you can't put
1685 anything in between, lest it overwrite some of those registers.  If
1686 you need to do some other computation between the code part and use of
1687 the addr bit, first store the effective address from the amode in a
1688 temporary, then do the other computation, and then use the temporary:
1689
1690     code
1691     LEA amode, tmp
1692     ... other computation ...
1693     ... (tmp) ...
1694 -}
1695
1696 getAmode :: CmmExpr -> NatM Amode
1697 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
1698
1699 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1700
1701 #if alpha_TARGET_ARCH
1702
1703 getAmode (StPrim IntSubOp [x, StInt i])
1704   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1705     getRegister x               `thenNat` \ register ->
1706     let
1707         code = registerCode register tmp
1708         reg  = registerName register tmp
1709         off  = ImmInt (-(fromInteger i))
1710     in
1711     return (Amode (AddrRegImm reg off) code)
1712
1713 getAmode (StPrim IntAddOp [x, StInt i])
1714   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1715     getRegister x               `thenNat` \ register ->
1716     let
1717         code = registerCode register tmp
1718         reg  = registerName register tmp
1719         off  = ImmInt (fromInteger i)
1720     in
1721     return (Amode (AddrRegImm reg off) code)
1722
1723 getAmode leaf
1724   | isJust imm
1725   = return (Amode (AddrImm imm__2) id)
1726   where
1727     imm = maybeImm leaf
1728     imm__2 = case imm of Just x -> x
1729
1730 getAmode other
1731   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1732     getRegister other           `thenNat` \ register ->
1733     let
1734         code = registerCode register tmp
1735         reg  = registerName register tmp
1736     in
1737     return (Amode (AddrReg reg) code)
1738
1739 #endif /* alpha_TARGET_ARCH */
1740
1741 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1742
1743 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1744
1745 -- This is all just ridiculous, since it carefully undoes
1746 -- what mangleIndexTree has just done.
1747 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
1748   | not (is64BitLit lit)
1749   -- ASSERT(rep == I32)???
1750   = do (x_reg, x_code) <- getSomeReg x
1751        let off = ImmInt (-(fromInteger i))
1752        return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1753
1754 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
1755   | not (is64BitLit lit)
1756   -- ASSERT(rep == I32)???
1757   = do (x_reg, x_code) <- getSomeReg x
1758        let off = ImmInt (fromInteger i)
1759        return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1760
1761 -- Turn (lit1 << n  + lit2) into  (lit2 + lit1 << n) so it will be
1762 -- recognised by the next rule.
1763 getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1764                                   b@(CmmLit _)])
1765   = getAmode (CmmMachOp (MO_Add rep) [b,a])
1766
1767 getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
1768                                         [y, CmmLit (CmmInt shift _)]])
1769   | shift == 0 || shift == 1 || shift == 2 || shift == 3
1770   = do (x_reg, x_code) <- getNonClobberedReg x
1771         -- x must be in a temp, because it has to stay live over y_code
1772         -- we could compre x_reg and y_reg and do something better here...
1773        (y_reg, y_code) <- getSomeReg y
1774        let
1775            code = x_code `appOL` y_code
1776            base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
1777        return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt 0))
1778                code)
1779
1780 getAmode (CmmLit lit) | not (is64BitLit lit)
1781   = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1782
1783 getAmode expr = do
1784   (reg,code) <- getSomeReg expr
1785   return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
1786
1787 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1788
1789 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1790
1791 #if sparc_TARGET_ARCH
1792
1793 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)])
1794   | fits13Bits (-i)
1795   = do
1796        (reg, code) <- getSomeReg x
1797        let
1798          off  = ImmInt (-(fromInteger i))
1799        return (Amode (AddrRegImm reg off) code)
1800
1801
1802 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)])
1803   | fits13Bits i
1804   = do
1805        (reg, code) <- getSomeReg x
1806        let
1807          off  = ImmInt (fromInteger i)
1808        return (Amode (AddrRegImm reg off) code)
1809
1810 getAmode (CmmMachOp (MO_Add rep) [x, y])
1811   = do
1812     (regX, codeX) <- getSomeReg x
1813     (regY, codeY) <- getSomeReg y
1814     let
1815         code = codeX `appOL` codeY
1816     return (Amode (AddrRegReg regX regY) code)
1817
1818 -- XXX Is this same as "leaf" in Stix?
1819 getAmode (CmmLit lit)
1820   = do
1821       tmp <- getNewRegNat I32
1822       let
1823         code = unitOL (SETHI (HI imm__2) tmp)
1824       return (Amode (AddrRegImm tmp (LO imm__2)) code)
1825       where
1826          imm__2 = litToImm lit
1827
1828 getAmode other
1829   = do
1830        (reg, code) <- getSomeReg other
1831        let
1832             off  = ImmInt 0
1833        return (Amode (AddrRegImm reg off) code)
1834
1835 #endif /* sparc_TARGET_ARCH */
1836
1837 #ifdef powerpc_TARGET_ARCH
1838 getAmode (CmmMachOp (MO_Sub I32) [x, CmmLit (CmmInt i _)])
1839   | Just off <- makeImmediate I32 True (-i)
1840   = do
1841         (reg, code) <- getSomeReg x
1842         return (Amode (AddrRegImm reg off) code)
1843
1844
1845 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit (CmmInt i _)])
1846   | Just off <- makeImmediate I32 True i
1847   = do
1848         (reg, code) <- getSomeReg x
1849         return (Amode (AddrRegImm reg off) code)
1850
1851    -- optimize addition with 32-bit immediate
1852    -- (needed for PIC)
1853 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit lit])
1854   = do
1855         tmp <- getNewRegNat I32
1856         (src, srcCode) <- getSomeReg x
1857         let imm = litToImm lit
1858             code = srcCode `snocOL` ADDIS tmp src (HA imm)
1859         return (Amode (AddrRegImm tmp (LO imm)) code)
1860
1861 getAmode (CmmLit lit)
1862   = do
1863         tmp <- getNewRegNat I32
1864         let imm = litToImm lit
1865             code = unitOL (LIS tmp (HA imm))
1866         return (Amode (AddrRegImm tmp (LO imm)) code)
1867
1868 getAmode (CmmMachOp (MO_Add I32) [x, y])
1869   = do
1870         (regX, codeX) <- getSomeReg x
1871         (regY, codeY) <- getSomeReg y
1872         return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
1873
1874 getAmode other
1875   = do
1876         (reg, code) <- getSomeReg other
1877         let
1878             off  = ImmInt 0
1879         return (Amode (AddrRegImm reg off) code)
1880 #endif /* powerpc_TARGET_ARCH */
1881
1882 -- -----------------------------------------------------------------------------
1883 -- getOperand: sometimes any operand will do.
1884
1885 -- getNonClobberedOperand: the value of the operand will remain valid across
1886 -- the computation of an arbitrary expression, unless the expression
1887 -- is computed directly into a register which the operand refers to
1888 -- (see trivialCode where this function is used for an example).
1889
1890 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1891
1892 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1893 #if x86_64_TARGET_ARCH
1894 getNonClobberedOperand (CmmLit lit)
1895   | isSuitableFloatingPointLit lit = do
1896     lbl <- getNewLabelNat
1897     let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
1898                                            CmmStaticLit lit])
1899     return (OpAddr (ripRel (ImmCLbl lbl)), code)
1900 #endif
1901 getNonClobberedOperand (CmmLit lit)
1902   | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) =
1903     return (OpImm (litToImm lit), nilOL)
1904 getNonClobberedOperand (CmmLoad mem pk)
1905   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
1906     Amode src mem_code <- getAmode mem
1907     (src',save_code) <-
1908         if (amodeCouldBeClobbered src)
1909                 then do
1910                    tmp <- getNewRegNat wordRep
1911                    return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
1912                            unitOL (LEA I32 (OpAddr src) (OpReg tmp)))
1913                 else
1914                    return (src, nilOL)
1915     return (OpAddr src', save_code `appOL` mem_code)
1916 getNonClobberedOperand e = do
1917     (reg, code) <- getNonClobberedReg e
1918     return (OpReg reg, code)
1919
1920 amodeCouldBeClobbered :: AddrMode -> Bool
1921 amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
1922
1923 regClobbered (RealReg rr) = isFastTrue (freeReg rr)
1924 regClobbered _ = False
1925
1926 -- getOperand: the operand is not required to remain valid across the
1927 -- computation of an arbitrary expression.
1928 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1929 #if x86_64_TARGET_ARCH
1930 getOperand (CmmLit lit)
1931   | isSuitableFloatingPointLit lit = do
1932     lbl <- getNewLabelNat
1933     let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
1934                                            CmmStaticLit lit])
1935     return (OpAddr (ripRel (ImmCLbl lbl)), code)
1936 #endif
1937 getOperand (CmmLit lit)
1938   | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) = do
1939     return (OpImm (litToImm lit), nilOL)
1940 getOperand (CmmLoad mem pk)
1941   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
1942     Amode src mem_code <- getAmode mem
1943     return (OpAddr src, mem_code)
1944 getOperand e = do
1945     (reg, code) <- getSomeReg e
1946     return (OpReg reg, code)
1947
1948 isOperand :: CmmExpr -> Bool
1949 isOperand (CmmLoad _ _) = True
1950 isOperand (CmmLit lit)  = not (is64BitLit lit)
1951                           || isSuitableFloatingPointLit lit
1952 isOperand _             = False
1953
1954 -- if we want a floating-point literal as an operand, we can
1955 -- use it directly from memory.  However, if the literal is
1956 -- zero, we're better off generating it into a register using
1957 -- xor.
1958 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
1959 isSuitableFloatingPointLit _ = False
1960
1961 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
1962 getRegOrMem (CmmLoad mem pk)
1963   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
1964     Amode src mem_code <- getAmode mem
1965     return (OpAddr src, mem_code)
1966 getRegOrMem e = do
1967     (reg, code) <- getNonClobberedReg e
1968     return (OpReg reg, code)
1969
1970 #if x86_64_TARGET_ARCH
1971 is64BitLit (CmmInt i I64) = i > 0x7fffffff || i < -0x80000000
1972    -- assume that labels are in the range 0-2^31-1: this assumes the
1973    -- small memory model (see gcc docs, -mcmodel=small).
1974 #endif
1975 is64BitLit x = False
1976 #endif
1977
1978 -- -----------------------------------------------------------------------------
1979 --  The 'CondCode' type:  Condition codes passed up the tree.
1980
1981 data CondCode = CondCode Bool Cond InstrBlock
1982
1983 -- Set up a condition code for a conditional branch.
1984
1985 getCondCode :: CmmExpr -> NatM CondCode
1986
1987 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1988
1989 #if alpha_TARGET_ARCH
1990 getCondCode = panic "MachCode.getCondCode: not on Alphas"
1991 #endif /* alpha_TARGET_ARCH */
1992
1993 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1994
1995 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
1996 -- yes, they really do seem to want exactly the same!
1997
1998 getCondCode (CmmMachOp mop [x, y])
1999   = ASSERT (cmmExprRep x /= I8) -- tmp, not set up to handle 8-bit comparisons
2000     case mop of
2001       MO_Eq F32 -> condFltCode EQQ x y
2002       MO_Ne F32 -> condFltCode NE  x y
2003
2004       MO_S_Gt F32 -> condFltCode GTT x y
2005       MO_S_Ge F32 -> condFltCode GE  x y
2006       MO_S_Lt F32 -> condFltCode LTT x y
2007       MO_S_Le F32 -> condFltCode LE  x y
2008
2009       MO_Eq F64 -> condFltCode EQQ x y
2010       MO_Ne F64 -> condFltCode NE  x y
2011
2012       MO_S_Gt F64 -> condFltCode GTT x y
2013       MO_S_Ge F64 -> condFltCode GE  x y
2014       MO_S_Lt F64 -> condFltCode LTT x y
2015       MO_S_Le F64 -> condFltCode LE  x y
2016
2017       MO_Eq rep -> condIntCode EQQ  x y
2018       MO_Ne rep -> condIntCode NE   x y
2019
2020       MO_S_Gt rep -> condIntCode GTT  x y
2021       MO_S_Ge rep -> condIntCode GE   x y
2022       MO_S_Lt rep -> condIntCode LTT  x y
2023       MO_S_Le rep -> condIntCode LE   x y
2024
2025       MO_U_Gt rep -> condIntCode GU   x y
2026       MO_U_Ge rep -> condIntCode GEU  x y
2027       MO_U_Lt rep -> condIntCode LU   x y
2028       MO_U_Le rep -> condIntCode LEU  x y
2029
2030       other -> pprPanic "getCondCode(x86,sparc)" (pprMachOp mop)
2031
2032 getCondCode other =  pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
2033
2034 #elif powerpc_TARGET_ARCH
2035
2036 -- almost the same as everywhere else - but we need to
2037 -- extend small integers to 32 bit first
2038
2039 getCondCode (CmmMachOp mop [x, y])
2040   = case mop of
2041       MO_Eq F32 -> condFltCode EQQ x y
2042       MO_Ne F32 -> condFltCode NE  x y
2043
2044       MO_S_Gt F32 -> condFltCode GTT x y
2045       MO_S_Ge F32 -> condFltCode GE  x y
2046       MO_S_Lt F32 -> condFltCode LTT x y
2047       MO_S_Le F32 -> condFltCode LE  x y
2048
2049       MO_Eq F64 -> condFltCode EQQ x y
2050       MO_Ne F64 -> condFltCode NE  x y
2051
2052       MO_S_Gt F64 -> condFltCode GTT x y
2053       MO_S_Ge F64 -> condFltCode GE  x y
2054       MO_S_Lt F64 -> condFltCode LTT x y
2055       MO_S_Le F64 -> condFltCode LE  x y
2056
2057       MO_Eq rep -> condIntCode EQQ  (extendUExpr rep x) (extendUExpr rep y)
2058       MO_Ne rep -> condIntCode NE   (extendUExpr rep x) (extendUExpr rep y)
2059
2060       MO_S_Gt rep -> condIntCode GTT  (extendSExpr rep x) (extendSExpr rep y)
2061       MO_S_Ge rep -> condIntCode GE   (extendSExpr rep x) (extendSExpr rep y)
2062       MO_S_Lt rep -> condIntCode LTT  (extendSExpr rep x) (extendSExpr rep y)
2063       MO_S_Le rep -> condIntCode LE   (extendSExpr rep x) (extendSExpr rep y)
2064
2065       MO_U_Gt rep -> condIntCode GU   (extendUExpr rep x) (extendUExpr rep y)
2066       MO_U_Ge rep -> condIntCode GEU  (extendUExpr rep x) (extendUExpr rep y)
2067       MO_U_Lt rep -> condIntCode LU   (extendUExpr rep x) (extendUExpr rep y)
2068       MO_U_Le rep -> condIntCode LEU  (extendUExpr rep x) (extendUExpr rep y)
2069
2070       other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
2071
2072 getCondCode other =  panic "getCondCode(2)(powerpc)"
2073
2074
2075 #endif
2076
2077
2078 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
2079 -- passed back up the tree.
2080
2081 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
2082
2083 #if alpha_TARGET_ARCH
2084 condIntCode = panic "MachCode.condIntCode: not on Alphas"
2085 condFltCode = panic "MachCode.condFltCode: not on Alphas"
2086 #endif /* alpha_TARGET_ARCH */
2087
2088 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2089 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2090
2091 -- memory vs immediate
2092 condIntCode cond (CmmLoad x pk) (CmmLit lit) | not (is64BitLit lit) = do
2093     Amode x_addr x_code <- getAmode x
2094     let
2095         imm  = litToImm lit
2096         code = x_code `snocOL`
2097                   CMP pk (OpImm imm) (OpAddr x_addr)
2098     --
2099     return (CondCode False cond code)
2100
2101 -- anything vs zero
2102 condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
2103     (x_reg, x_code) <- getSomeReg x
2104     let
2105         code = x_code `snocOL`
2106                   TEST pk (OpReg x_reg) (OpReg x_reg)
2107     --
2108     return (CondCode False cond code)
2109
2110 -- anything vs operand
2111 condIntCode cond x y | isOperand y = do
2112     (x_reg, x_code) <- getNonClobberedReg x
2113     (y_op,  y_code) <- getOperand y
2114     let
2115         code = x_code `appOL` y_code `snocOL`
2116                   CMP (cmmExprRep x) y_op (OpReg x_reg)
2117     -- in
2118     return (CondCode False cond code)
2119
2120 -- anything vs anything
2121 condIntCode cond x y = do
2122   (y_reg, y_code) <- getNonClobberedReg y
2123   (x_op, x_code) <- getRegOrMem x
2124   let
2125         code = y_code `appOL`
2126                x_code `snocOL`
2127                   CMP (cmmExprRep x) (OpReg y_reg) x_op
2128   -- in
2129   return (CondCode False cond code)
2130 #endif
2131
2132 #if i386_TARGET_ARCH
2133 condFltCode cond x y
2134   = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
2135   (x_reg, x_code) <- getNonClobberedReg x
2136   (y_reg, y_code) <- getSomeReg y
2137   let
2138         code = x_code `appOL` y_code `snocOL`
2139                 GCMP cond x_reg y_reg
2140   -- The GCMP insn does the test and sets the zero flag if comparable
2141   -- and true.  Hence we always supply EQQ as the condition to test.
2142   return (CondCode True EQQ code)
2143 #endif /* i386_TARGET_ARCH */
2144
2145 #if x86_64_TARGET_ARCH
2146 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
2147 -- an operand, but the right must be a reg.  We can probably do better
2148 -- than this general case...
2149 condFltCode cond x y = do
2150   (x_reg, x_code) <- getNonClobberedReg x
2151   (y_op, y_code) <- getOperand y
2152   let
2153         code = x_code `appOL`
2154                y_code `snocOL`
2155                   CMP (cmmExprRep x) y_op (OpReg x_reg)
2156         -- NB(1): we need to use the unsigned comparison operators on the
2157         -- result of this comparison.
2158   -- in
2159   return (CondCode True (condToUnsigned cond) code)
2160 #endif
2161
2162 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2163
2164 #if sparc_TARGET_ARCH
2165
2166 condIntCode cond x (CmmLit (CmmInt y rep))
2167   | fits13Bits y
2168   = do
2169        (src1, code) <- getSomeReg x
2170        let
2171            src2 = ImmInt (fromInteger y)
2172            code' = code `snocOL` SUB False True src1 (RIImm src2) g0
2173        return (CondCode False cond code')
2174
2175 condIntCode cond x y = do
2176     (src1, code1) <- getSomeReg x
2177     (src2, code2) <- getSomeReg y
2178     let
2179         code__2 = code1 `appOL` code2 `snocOL`
2180                   SUB False True src1 (RIReg src2) g0
2181     return (CondCode False cond code__2)
2182
2183 -----------
2184 condFltCode cond x y = do
2185     (src1, code1) <- getSomeReg x
2186     (src2, code2) <- getSomeReg y
2187     tmp <- getNewRegNat F64
2188     let
2189         promote x = FxTOy F32 F64 x tmp
2190
2191         pk1   = cmmExprRep x
2192         pk2   = cmmExprRep y
2193
2194         code__2 =
2195                 if pk1 == pk2 then
2196                     code1 `appOL` code2 `snocOL`
2197                     FCMP True pk1 src1 src2
2198                 else if pk1 == F32 then
2199                     code1 `snocOL` promote src1 `appOL` code2 `snocOL`
2200                     FCMP True F64 tmp src2
2201                 else
2202                     code1 `appOL` code2 `snocOL` promote src2 `snocOL`
2203                     FCMP True F64 src1 tmp
2204     return (CondCode True cond code__2)
2205
2206 #endif /* sparc_TARGET_ARCH */
2207
2208 #if powerpc_TARGET_ARCH
2209 --  ###FIXME: I16 and I8!
2210 condIntCode cond x (CmmLit (CmmInt y rep))
2211   | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
2212   = do
2213         (src1, code) <- getSomeReg x
2214         let
2215             code' = code `snocOL`
2216                 (if condUnsigned cond then CMPL else CMP) I32 src1 (RIImm src2)
2217         return (CondCode False cond code')
2218
2219 condIntCode cond x y = do
2220     (src1, code1) <- getSomeReg x
2221     (src2, code2) <- getSomeReg y
2222     let
2223         code' = code1 `appOL` code2 `snocOL`
2224                   (if condUnsigned cond then CMPL else CMP) I32 src1 (RIReg src2)
2225     return (CondCode False cond code')
2226
2227 condFltCode cond x y = do
2228     (src1, code1) <- getSomeReg x
2229     (src2, code2) <- getSomeReg y
2230     let
2231         code'  = code1 `appOL` code2 `snocOL` FCMP src1 src2
2232         code'' = case cond of -- twiddle CR to handle unordered case
2233                     GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
2234                     LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
2235                     _ -> code'
2236                  where
2237                     ltbit = 0 ; eqbit = 2 ; gtbit = 1
2238     return (CondCode True cond code'')
2239
2240 #endif /* powerpc_TARGET_ARCH */
2241
2242 -- -----------------------------------------------------------------------------
2243 -- Generating assignments
2244
2245 -- Assignments are really at the heart of the whole code generation
2246 -- business.  Almost all top-level nodes of any real importance are
2247 -- assignments, which correspond to loads, stores, or register
2248 -- transfers.  If we're really lucky, some of the register transfers
2249 -- will go away, because we can use the destination register to
2250 -- complete the code generation for the right hand side.  This only
2251 -- fails when the right hand side is forced into a fixed register
2252 -- (e.g. the result of a call).
2253
2254 assignMem_IntCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2255 assignReg_IntCode :: MachRep -> CmmReg  -> CmmExpr -> NatM InstrBlock
2256
2257 assignMem_FltCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2258 assignReg_FltCode :: MachRep -> CmmReg  -> CmmExpr -> NatM InstrBlock
2259
2260 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2261
2262 #if alpha_TARGET_ARCH
2263
2264 assignIntCode pk (CmmLoad dst _) src
2265   = getNewRegNat IntRep             `thenNat` \ tmp ->
2266     getAmode dst                    `thenNat` \ amode ->
2267     getRegister src                 `thenNat` \ register ->
2268     let
2269         code1   = amodeCode amode []
2270         dst__2  = amodeAddr amode
2271         code2   = registerCode register tmp []
2272         src__2  = registerName register tmp
2273         sz      = primRepToSize pk
2274         code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2275     in
2276     return code__2
2277
2278 assignIntCode pk dst src
2279   = getRegister dst                         `thenNat` \ register1 ->
2280     getRegister src                         `thenNat` \ register2 ->
2281     let
2282         dst__2  = registerName register1 zeroh
2283         code    = registerCode register2 dst__2
2284         src__2  = registerName register2 dst__2
2285         code__2 = if isFixed register2
2286                   then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2)
2287                   else code
2288     in
2289     return code__2
2290
2291 #endif /* alpha_TARGET_ARCH */
2292
2293 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2294
2295 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2296
2297 -- integer assignment to memory
2298 assignMem_IntCode pk addr src = do
2299     Amode addr code_addr <- getAmode addr
2300     (code_src, op_src)   <- get_op_RI src
2301     let
2302         code = code_src `appOL`
2303                code_addr `snocOL`
2304                   MOV pk op_src (OpAddr addr)
2305         -- NOTE: op_src is stable, so it will still be valid
2306         -- after code_addr.  This may involve the introduction
2307         -- of an extra MOV to a temporary register, but we hope
2308         -- the register allocator will get rid of it.
2309     --
2310     return code
2311   where
2312     get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand)   -- code, operator
2313     get_op_RI (CmmLit lit) | not (is64BitLit lit)
2314       = return (nilOL, OpImm (litToImm lit))
2315     get_op_RI op
2316       = do (reg,code) <- getNonClobberedReg op
2317            return (code, OpReg reg)
2318
2319
2320 -- Assign; dst is a reg, rhs is mem
2321 assignReg_IntCode pk reg (CmmLoad src _) = do
2322   load_code <- intLoadCode (MOV pk) src
2323   return (load_code (getRegisterReg reg))
2324
2325 -- dst is a reg, but src could be anything
2326 assignReg_IntCode pk reg src = do
2327   code <- getAnyReg src
2328   return (code (getRegisterReg reg))
2329
2330 #endif /* i386_TARGET_ARCH */
2331
2332 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2333
2334 #if sparc_TARGET_ARCH
2335
2336 assignMem_IntCode pk addr src = do
2337     (srcReg, code) <- getSomeReg src
2338     Amode dstAddr addr_code <- getAmode addr
2339     return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2340
2341 assignReg_IntCode pk reg src = do
2342     r <- getRegister src
2343     return $ case r of
2344         Any _ code         -> code dst
2345         Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg dst) freg
2346     where
2347       dst = getRegisterReg reg
2348
2349
2350 #endif /* sparc_TARGET_ARCH */
2351
2352 #if powerpc_TARGET_ARCH
2353
2354 assignMem_IntCode pk addr src = do
2355     (srcReg, code) <- getSomeReg src
2356     Amode dstAddr addr_code <- getAmode addr
2357     return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2358
2359 -- dst is a reg, but src could be anything
2360 assignReg_IntCode pk reg src
2361     = do
2362         r <- getRegister src
2363         return $ case r of
2364             Any _ code         -> code dst
2365             Fixed _ freg fcode -> fcode `snocOL` MR dst freg
2366     where
2367         dst = getRegisterReg reg
2368
2369 #endif /* powerpc_TARGET_ARCH */
2370
2371
2372 -- -----------------------------------------------------------------------------
2373 -- Floating-point assignments
2374
2375 #if alpha_TARGET_ARCH
2376
2377 assignFltCode pk (CmmLoad dst _) src
2378   = getNewRegNat pk                 `thenNat` \ tmp ->
2379     getAmode dst                    `thenNat` \ amode ->
2380     getRegister src                         `thenNat` \ register ->
2381     let
2382         code1   = amodeCode amode []
2383         dst__2  = amodeAddr amode
2384         code2   = registerCode register tmp []
2385         src__2  = registerName register tmp
2386         sz      = primRepToSize pk
2387         code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2388     in
2389     return code__2
2390
2391 assignFltCode pk dst src
2392   = getRegister dst                         `thenNat` \ register1 ->
2393     getRegister src                         `thenNat` \ register2 ->
2394     let
2395         dst__2  = registerName register1 zeroh
2396         code    = registerCode register2 dst__2
2397         src__2  = registerName register2 dst__2
2398         code__2 = if isFixed register2
2399                   then code . mkSeqInstr (FMOV src__2 dst__2)
2400                   else code
2401     in
2402     return code__2
2403
2404 #endif /* alpha_TARGET_ARCH */
2405
2406 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2407
2408 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2409
2410 -- Floating point assignment to memory
2411 assignMem_FltCode pk addr src = do
2412   (src_reg, src_code) <- getNonClobberedReg src
2413   Amode addr addr_code <- getAmode addr
2414   let
2415         code = src_code `appOL`
2416                addr_code `snocOL`
2417                 IF_ARCH_i386(GST pk src_reg addr,
2418                              MOV pk (OpReg src_reg) (OpAddr addr))
2419   return code
2420
2421 -- Floating point assignment to a register/temporary
2422 assignReg_FltCode pk reg src = do
2423   src_code <- getAnyReg src
2424   return (src_code (getRegisterReg reg))
2425
2426 #endif /* i386_TARGET_ARCH */
2427
2428 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2429
2430 #if sparc_TARGET_ARCH
2431
2432 -- Floating point assignment to memory
2433 assignMem_FltCode pk addr src = do
2434     Amode dst__2 code1 <- getAmode addr
2435     (src__2, code2) <- getSomeReg src
2436     tmp1 <- getNewRegNat pk
2437     let
2438         pk__2   = cmmExprRep src
2439         code__2 = code1 `appOL` code2 `appOL`
2440             if   pk == pk__2
2441             then unitOL (ST pk src__2 dst__2)
2442             else toOL [FxTOy pk__2 pk src__2 tmp1, ST pk tmp1 dst__2]
2443     return code__2
2444
2445 -- Floating point assignment to a register/temporary
2446 -- ToDo: Verify correctness
2447 assignReg_FltCode pk reg src = do
2448     r <- getRegister src
2449     v1 <- getNewRegNat pk
2450     return $ case r of
2451         Any _ code         -> code dst
2452         Fixed _ freg fcode -> fcode `snocOL` FMOV pk freg v1
2453     where
2454       dst = getRegisterReg reg
2455
2456 #endif /* sparc_TARGET_ARCH */
2457
2458 #if powerpc_TARGET_ARCH
2459
2460 -- Easy, isn't it?
2461 assignMem_FltCode = assignMem_IntCode
2462 assignReg_FltCode = assignReg_IntCode
2463
2464 #endif /* powerpc_TARGET_ARCH */
2465
2466
2467 -- -----------------------------------------------------------------------------
2468 -- Generating an non-local jump
2469
2470 -- (If applicable) Do not fill the delay slots here; you will confuse the
2471 -- register allocator.
2472
2473 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
2474
2475 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2476
2477 #if alpha_TARGET_ARCH
2478
2479 genJump (CmmLabel lbl)
2480   | isAsmTemp lbl = returnInstr (BR target)
2481   | otherwise     = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0]
2482   where
2483     target = ImmCLbl lbl
2484
2485 genJump tree
2486   = getRegister tree                `thenNat` \ register ->
2487     getNewRegNat PtrRep             `thenNat` \ tmp ->
2488     let
2489         dst    = registerName register pv
2490         code   = registerCode register pv
2491         target = registerName register pv
2492     in
2493     if isFixed register then
2494         returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0]
2495     else
2496     return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0))
2497
2498 #endif /* alpha_TARGET_ARCH */
2499
2500 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2501
2502 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2503
2504 genJump (CmmLoad mem pk) = do
2505   Amode target code <- getAmode mem
2506   return (code `snocOL` JMP (OpAddr target))
2507
2508 genJump (CmmLit lit) = do
2509   return (unitOL (JMP (OpImm (litToImm lit))))
2510
2511 genJump expr = do
2512   (reg,code) <- getSomeReg expr
2513   return (code `snocOL` JMP (OpReg reg))
2514
2515 #endif /* i386_TARGET_ARCH */
2516
2517 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2518
2519 #if sparc_TARGET_ARCH
2520
2521 genJump (CmmLit (CmmLabel lbl))
2522   = return (toOL [CALL (Left target) 0 True, NOP])
2523   where
2524     target = ImmCLbl lbl
2525
2526 genJump tree
2527   = do
2528         (target, code) <- getSomeReg tree
2529         return (code `snocOL` JMP (AddrRegReg target g0)  `snocOL` NOP)
2530
2531 #endif /* sparc_TARGET_ARCH */
2532
2533 #if powerpc_TARGET_ARCH
2534 genJump (CmmLit (CmmLabel lbl))
2535   = return (unitOL $ JMP lbl)
2536
2537 genJump tree
2538   = do
2539         (target,code) <- getSomeReg tree
2540         return (code `snocOL` MTCTR target `snocOL` BCTR [])
2541 #endif /* powerpc_TARGET_ARCH */
2542
2543
2544 -- -----------------------------------------------------------------------------
2545 --  Unconditional branches
2546
2547 genBranch :: BlockId -> NatM InstrBlock
2548
2549 #if alpha_TARGET_ARCH
2550 genBranch id = return (unitOL (BR id))
2551 #endif
2552
2553 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2554 genBranch id = return (unitOL (JXX ALWAYS id))
2555 #endif
2556
2557 #if sparc_TARGET_ARCH
2558 genBranch (BlockId id) = return (toOL [BI ALWAYS False (ImmCLbl (mkAsmTempLabel id)), NOP])
2559 #endif
2560
2561 #if powerpc_TARGET_ARCH
2562 genBranch id = return (unitOL (BCC ALWAYS id))
2563 #endif
2564
2565
2566 -- -----------------------------------------------------------------------------
2567 --  Conditional jumps
2568
2569 {-
2570 Conditional jumps are always to local labels, so we can use branch
2571 instructions.  We peek at the arguments to decide what kind of
2572 comparison to do.
2573
2574 ALPHA: For comparisons with 0, we're laughing, because we can just do
2575 the desired conditional branch.
2576
2577 I386: First, we have to ensure that the condition
2578 codes are set according to the supplied comparison operation.
2579
2580 SPARC: First, we have to ensure that the condition codes are set
2581 according to the supplied comparison operation.  We generate slightly
2582 different code for floating point comparisons, because a floating
2583 point operation cannot directly precede a @BF@.  We assume the worst
2584 and fill that slot with a @NOP@.
2585
2586 SPARC: Do not fill the delay slots here; you will confuse the register
2587 allocator.
2588 -}
2589
2590
2591 genCondJump
2592     :: BlockId      -- the branch target
2593     -> CmmExpr      -- the condition on which to branch
2594     -> NatM InstrBlock
2595
2596 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2597
2598 #if alpha_TARGET_ARCH
2599
2600 genCondJump id (StPrim op [x, StInt 0])
2601   = getRegister x                           `thenNat` \ register ->
2602     getNewRegNat (registerRep register)
2603                                     `thenNat` \ tmp ->
2604     let
2605         code   = registerCode register tmp
2606         value  = registerName register tmp
2607         pk     = registerRep register
2608         target = ImmCLbl lbl
2609     in
2610     returnSeq code [BI (cmpOp op) value target]
2611   where
2612     cmpOp CharGtOp = GTT
2613     cmpOp CharGeOp = GE
2614     cmpOp CharEqOp = EQQ
2615     cmpOp CharNeOp = NE
2616     cmpOp CharLtOp = LTT
2617     cmpOp CharLeOp = LE
2618     cmpOp IntGtOp = GTT
2619     cmpOp IntGeOp = GE
2620     cmpOp IntEqOp = EQQ
2621     cmpOp IntNeOp = NE
2622     cmpOp IntLtOp = LTT
2623     cmpOp IntLeOp = LE
2624     cmpOp WordGtOp = NE
2625     cmpOp WordGeOp = ALWAYS
2626     cmpOp WordEqOp = EQQ
2627     cmpOp WordNeOp = NE
2628     cmpOp WordLtOp = NEVER
2629     cmpOp WordLeOp = EQQ
2630     cmpOp AddrGtOp = NE
2631     cmpOp AddrGeOp = ALWAYS
2632     cmpOp AddrEqOp = EQQ
2633     cmpOp AddrNeOp = NE
2634     cmpOp AddrLtOp = NEVER
2635     cmpOp AddrLeOp = EQQ
2636
2637 genCondJump lbl (StPrim op [x, StDouble 0.0])
2638   = getRegister x                           `thenNat` \ register ->
2639     getNewRegNat (registerRep register)
2640                                     `thenNat` \ tmp ->
2641     let
2642         code   = registerCode register tmp
2643         value  = registerName register tmp
2644         pk     = registerRep register
2645         target = ImmCLbl lbl
2646     in
2647     return (code . mkSeqInstr (BF (cmpOp op) value target))
2648   where
2649     cmpOp FloatGtOp = GTT
2650     cmpOp FloatGeOp = GE
2651     cmpOp FloatEqOp = EQQ
2652     cmpOp FloatNeOp = NE
2653     cmpOp FloatLtOp = LTT
2654     cmpOp FloatLeOp = LE
2655     cmpOp DoubleGtOp = GTT
2656     cmpOp DoubleGeOp = GE
2657     cmpOp DoubleEqOp = EQQ
2658     cmpOp DoubleNeOp = NE
2659     cmpOp DoubleLtOp = LTT
2660     cmpOp DoubleLeOp = LE
2661
2662 genCondJump lbl (StPrim op [x, y])
2663   | fltCmpOp op
2664   = trivialFCode pr instr x y       `thenNat` \ register ->
2665     getNewRegNat F64                `thenNat` \ tmp ->
2666     let
2667         code   = registerCode register tmp
2668         result = registerName register tmp
2669         target = ImmCLbl lbl
2670     in
2671     return (code . mkSeqInstr (BF cond result target))
2672   where
2673     pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
2674
2675     fltCmpOp op = case op of
2676         FloatGtOp -> True
2677         FloatGeOp -> True
2678         FloatEqOp -> True
2679         FloatNeOp -> True
2680         FloatLtOp -> True
2681         FloatLeOp -> True
2682         DoubleGtOp -> True
2683         DoubleGeOp -> True
2684         DoubleEqOp -> True
2685         DoubleNeOp -> True
2686         DoubleLtOp -> True
2687         DoubleLeOp -> True
2688         _ -> False
2689     (instr, cond) = case op of
2690         FloatGtOp -> (FCMP TF LE, EQQ)
2691         FloatGeOp -> (FCMP TF LTT, EQQ)
2692         FloatEqOp -> (FCMP TF EQQ, NE)
2693         FloatNeOp -> (FCMP TF EQQ, EQQ)
2694         FloatLtOp -> (FCMP TF LTT, NE)
2695         FloatLeOp -> (FCMP TF LE, NE)
2696         DoubleGtOp -> (FCMP TF LE, EQQ)
2697         DoubleGeOp -> (FCMP TF LTT, EQQ)
2698         DoubleEqOp -> (FCMP TF EQQ, NE)
2699         DoubleNeOp -> (FCMP TF EQQ, EQQ)
2700         DoubleLtOp -> (FCMP TF LTT, NE)
2701         DoubleLeOp -> (FCMP TF LE, NE)
2702
2703 genCondJump lbl (StPrim op [x, y])
2704   = trivialCode instr x y           `thenNat` \ register ->
2705     getNewRegNat IntRep             `thenNat` \ tmp ->
2706     let
2707         code   = registerCode register tmp
2708         result = registerName register tmp
2709         target = ImmCLbl lbl
2710     in
2711     return (code . mkSeqInstr (BI cond result target))
2712   where
2713     (instr, cond) = case op of
2714         CharGtOp -> (CMP LE, EQQ)
2715         CharGeOp -> (CMP LTT, EQQ)
2716         CharEqOp -> (CMP EQQ, NE)
2717         CharNeOp -> (CMP EQQ, EQQ)
2718         CharLtOp -> (CMP LTT, NE)
2719         CharLeOp -> (CMP LE, NE)
2720         IntGtOp -> (CMP LE, EQQ)
2721         IntGeOp -> (CMP LTT, EQQ)
2722         IntEqOp -> (CMP EQQ, NE)
2723         IntNeOp -> (CMP EQQ, EQQ)
2724         IntLtOp -> (CMP LTT, NE)
2725         IntLeOp -> (CMP LE, NE)
2726         WordGtOp -> (CMP ULE, EQQ)
2727         WordGeOp -> (CMP ULT, EQQ)
2728         WordEqOp -> (CMP EQQ, NE)
2729         WordNeOp -> (CMP EQQ, EQQ)
2730         WordLtOp -> (CMP ULT, NE)
2731         WordLeOp -> (CMP ULE, NE)
2732         AddrGtOp -> (CMP ULE, EQQ)
2733         AddrGeOp -> (CMP ULT, EQQ)
2734         AddrEqOp -> (CMP EQQ, NE)
2735         AddrNeOp -> (CMP EQQ, EQQ)
2736         AddrLtOp -> (CMP ULT, NE)
2737         AddrLeOp -> (CMP ULE, NE)
2738
2739 #endif /* alpha_TARGET_ARCH */
2740
2741 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2742
2743 #if i386_TARGET_ARCH
2744
2745 genCondJump id bool = do
2746   CondCode _ cond code <- getCondCode bool
2747   return (code `snocOL` JXX cond id)
2748
2749 #endif
2750
2751 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2752
2753 #if x86_64_TARGET_ARCH
2754
2755 genCondJump id bool = do
2756   CondCode is_float cond cond_code <- getCondCode bool
2757   if not is_float
2758     then
2759         return (cond_code `snocOL` JXX cond id)
2760     else do
2761         lbl <- getBlockIdNat
2762
2763         -- see comment with condFltReg
2764         let code = case cond of
2765                         NE  -> or_unordered
2766                         GU  -> plain_test
2767                         GEU -> plain_test
2768                         _   -> and_ordered
2769
2770             plain_test = unitOL (
2771                   JXX cond id
2772                 )
2773             or_unordered = toOL [
2774                   JXX cond id,
2775                   JXX PARITY id
2776                 ]
2777             and_ordered = toOL [
2778                   JXX PARITY lbl,
2779                   JXX cond id,
2780                   JXX ALWAYS lbl,
2781                   NEWBLOCK lbl
2782                 ]
2783         return (cond_code `appOL` code)
2784
2785 #endif
2786
2787 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2788
2789 #if sparc_TARGET_ARCH
2790
2791 genCondJump (BlockId id) bool = do
2792   CondCode is_float cond code <- getCondCode bool
2793   return (
2794        code `appOL`
2795        toOL (
2796          if   is_float
2797          then [NOP, BF cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2798          else [BI cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2799        )
2800     )
2801
2802 #endif /* sparc_TARGET_ARCH */
2803
2804
2805 #if powerpc_TARGET_ARCH
2806
2807 genCondJump id bool = do
2808   CondCode is_float cond code <- getCondCode bool
2809   return (code `snocOL` BCC cond id)
2810
2811 #endif /* powerpc_TARGET_ARCH */
2812
2813
2814 -- -----------------------------------------------------------------------------
2815 --  Generating C calls
2816
2817 -- Now the biggest nightmare---calls.  Most of the nastiness is buried in
2818 -- @get_arg@, which moves the arguments to the correct registers/stack
2819 -- locations.  Apart from that, the code is easy.
2820 --
2821 -- (If applicable) Do not fill the delay slots here; you will confuse the
2822 -- register allocator.
2823
2824 genCCall
2825     :: CmmCallTarget            -- function to call
2826     -> [(CmmReg,MachHint)]      -- where to put the result
2827     -> [(CmmExpr,MachHint)]     -- arguments (of mixed type)
2828     -> Maybe [GlobalReg]        -- volatile regs to save
2829     -> NatM InstrBlock
2830
2831 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2832
2833 #if alpha_TARGET_ARCH
2834
2835 ccallResultRegs =
2836
2837 genCCall fn cconv result_regs args
2838   = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args
2839                           `thenNat` \ ((unused,_), argCode) ->
2840     let
2841         nRegs = length allArgRegs - length unused
2842         code = asmSeqThen (map ($ []) argCode)
2843     in
2844         returnSeq code [
2845             LDA pv (AddrImm (ImmLab (ptext fn))),
2846             JSR ra (AddrReg pv) nRegs,
2847             LDGP gp (AddrReg ra)]
2848   where
2849     ------------------------
2850     {-  Try to get a value into a specific register (or registers) for
2851         a call.  The first 6 arguments go into the appropriate
2852         argument register (separate registers for integer and floating
2853         point arguments, but used in lock-step), and the remaining
2854         arguments are dumped to the stack, beginning at 0(sp).  Our
2855         first argument is a pair of the list of remaining argument
2856         registers to be assigned for this call and the next stack
2857         offset to use for overflowing arguments.  This way,
2858         @get_Arg@ can be applied to all of a call's arguments using
2859         @mapAccumLNat@.
2860     -}
2861     get_arg
2862         :: ([(Reg,Reg)], Int)   -- Argument registers and stack offset (accumulator)
2863         -> StixTree             -- Current argument
2864         -> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code
2865
2866     -- We have to use up all of our argument registers first...
2867
2868     get_arg ((iDst,fDst):dsts, offset) arg
2869       = getRegister arg                     `thenNat` \ register ->
2870         let
2871             reg  = if isFloatingRep pk then fDst else iDst
2872             code = registerCode register reg
2873             src  = registerName register reg
2874             pk   = registerRep register
2875         in
2876         return (
2877             if isFloatingRep pk then
2878                 ((dsts, offset), if isFixed register then
2879                     code . mkSeqInstr (FMOV src fDst)
2880                     else code)
2881             else
2882                 ((dsts, offset), if isFixed register then
2883                     code . mkSeqInstr (OR src (RIReg src) iDst)
2884                     else code))
2885
2886     -- Once we have run out of argument registers, we move to the
2887     -- stack...
2888
2889     get_arg ([], offset) arg
2890       = getRegister arg                 `thenNat` \ register ->
2891         getNewRegNat (registerRep register)
2892                                         `thenNat` \ tmp ->
2893         let
2894             code = registerCode register tmp
2895             src  = registerName register tmp
2896             pk   = registerRep register
2897             sz   = primRepToSize pk
2898         in
2899         return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset)))
2900
2901 #endif /* alpha_TARGET_ARCH */
2902
2903 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2904
2905 #if i386_TARGET_ARCH
2906
2907 -- we only cope with a single result for foreign calls
2908 genCCall (CmmPrim op) [(r,_)] args vols = do
2909   case op of
2910         MO_F32_Sqrt -> actuallyInlineFloatOp F32  (GSQRT F32) args
2911         MO_F64_Sqrt -> actuallyInlineFloatOp F64 (GSQRT F64) args
2912
2913         MO_F32_Sin  -> actuallyInlineFloatOp F32  (GSIN F32) args
2914         MO_F64_Sin  -> actuallyInlineFloatOp F64 (GSIN F64) args
2915
2916         MO_F32_Cos  -> actuallyInlineFloatOp F32  (GCOS F32) args
2917         MO_F64_Cos  -> actuallyInlineFloatOp F64 (GCOS F64) args
2918
2919         MO_F32_Tan  -> actuallyInlineFloatOp F32  (GTAN F32) args
2920         MO_F64_Tan  -> actuallyInlineFloatOp F64 (GTAN F64) args
2921
2922         other_op    -> outOfLineFloatOp op r args vols
2923  where
2924   actuallyInlineFloatOp rep instr [(x,_)]
2925         = do res <- trivialUFCode rep instr x
2926              any <- anyReg res
2927              return (any (getRegisterReg r))
2928
2929 genCCall target dest_regs args vols = do
2930     let
2931         sizes               = map (arg_size . cmmExprRep . fst) (reverse args)
2932 #if !darwin_TARGET_OS
2933         tot_arg_size        = sum sizes
2934 #else
2935         raw_arg_size        = sum sizes
2936         tot_arg_size        = roundTo 16 raw_arg_size
2937         arg_pad_size        = tot_arg_size - raw_arg_size
2938     delta0 <- getDeltaNat
2939     setDeltaNat (delta0 - arg_pad_size)
2940 #endif
2941
2942     push_codes <- mapM push_arg (reverse args)
2943     delta <- getDeltaNat
2944
2945     -- in
2946     -- deal with static vs dynamic call targets
2947     (callinsns,cconv) <-
2948       case target of
2949         -- CmmPrim -> ...
2950         CmmForeignCall (CmmLit (CmmLabel lbl)) conv
2951            -> -- ToDo: stdcall arg sizes
2952               return (unitOL (CALL (Left fn_imm) []), conv)
2953            where fn_imm = ImmCLbl lbl
2954         CmmForeignCall expr conv
2955            -> do (dyn_c, dyn_r, dyn_rep) <- get_op expr
2956                  ASSERT(dyn_rep == I32)
2957                   return (dyn_c `snocOL` CALL (Right dyn_r) [], conv)
2958
2959     let push_code
2960 #if darwin_TARGET_OS
2961             | arg_pad_size /= 0
2962             = toOL [SUB I32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
2963                     DELTA (delta0 - arg_pad_size)]
2964               `appOL` concatOL push_codes
2965             | otherwise
2966 #endif
2967             = concatOL push_codes
2968         call = callinsns `appOL`
2969                toOL (
2970                         -- Deallocate parameters after call for ccall;
2971                         -- but not for stdcall (callee does it)
2972                   (if cconv == StdCallConv || tot_arg_size==0 then [] else
2973                    [ADD I32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
2974                   ++
2975                   [DELTA (delta + tot_arg_size)]
2976                )
2977     -- in
2978     setDeltaNat (delta + tot_arg_size)
2979
2980     let
2981         -- assign the results, if necessary
2982         assign_code []     = nilOL
2983         assign_code [(dest,_hint)] =
2984           case rep of
2985                 I64 -> toOL [MOV I32 (OpReg eax) (OpReg r_dest),
2986                              MOV I32 (OpReg edx) (OpReg r_dest_hi)]
2987                 F32 -> unitOL (GMOV fake0 r_dest)
2988                 F64 -> unitOL (GMOV fake0 r_dest)
2989                 rep -> unitOL (MOV rep (OpReg eax) (OpReg r_dest))
2990           where
2991                 r_dest_hi = getHiVRegFromLo r_dest
2992                 rep = cmmRegRep dest
2993                 r_dest = getRegisterReg dest
2994         assign_code many = panic "genCCall.assign_code many"
2995
2996     return (push_code `appOL`
2997             call `appOL`
2998             assign_code dest_regs)
2999
3000   where
3001     arg_size F64 = 8
3002     arg_size F32 = 4
3003     arg_size I64 = 8
3004     arg_size _   = 4
3005
3006     roundTo a x | x `mod` a == 0 = x
3007                 | otherwise = x + a - (x `mod` a)
3008
3009
3010     push_arg :: (CmmExpr,MachHint){-current argument-}
3011                     -> NatM InstrBlock  -- code
3012
3013     push_arg (arg,_hint) -- we don't need the hints on x86
3014       | arg_rep == I64 = do
3015         ChildCode64 code r_lo <- iselExpr64 arg
3016         delta <- getDeltaNat
3017         setDeltaNat (delta - 8)
3018         let
3019             r_hi = getHiVRegFromLo r_lo
3020         -- in
3021         return (       code `appOL`
3022                        toOL [PUSH I32 (OpReg r_hi), DELTA (delta - 4),
3023                              PUSH I32 (OpReg r_lo), DELTA (delta - 8),
3024                              DELTA (delta-8)]
3025             )
3026
3027       | otherwise = do
3028         (code, reg, sz) <- get_op arg
3029         delta <- getDeltaNat
3030         let size = arg_size sz
3031         setDeltaNat (delta-size)
3032         if (case sz of F64 -> True; F32 -> True; _ -> False)
3033            then return (code `appOL`
3034                         toOL [SUB I32 (OpImm (ImmInt size)) (OpReg esp),
3035                               DELTA (delta-size),
3036                               GST sz reg (AddrBaseIndex (EABaseReg esp)
3037                                                         EAIndexNone
3038                                                         (ImmInt 0))]
3039                        )
3040            else return (code `snocOL`
3041                         PUSH I32 (OpReg reg) `snocOL`
3042                         DELTA (delta-size)
3043                        )
3044       where
3045          arg_rep = cmmExprRep arg
3046
3047     ------------
3048     get_op :: CmmExpr -> NatM (InstrBlock, Reg, MachRep) -- code, reg, size
3049     get_op op = do
3050         (reg,code) <- getSomeReg op
3051         return (code, reg, cmmExprRep op)
3052
3053 #endif /* i386_TARGET_ARCH */
3054
3055 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3056
3057 outOfLineFloatOp :: CallishMachOp -> CmmReg -> [(CmmExpr,MachHint)]
3058   -> Maybe [GlobalReg] -> NatM InstrBlock
3059 outOfLineFloatOp mop res args vols
3060   | cmmRegRep res == F64
3061   = stmtToInstrs (CmmCall target [(res,FloatHint)] args vols)
3062
3063   | otherwise
3064   = do uq <- getUniqueNat
3065        let
3066          tmp = CmmLocal (LocalReg uq F64)
3067        -- in
3068        code1 <- stmtToInstrs (CmmCall target [(tmp,FloatHint)] (map promote args) vols)
3069        code2 <- stmtToInstrs (CmmAssign res (demote (CmmReg tmp)))
3070        return (code1 `appOL` code2)
3071   where
3072         promote (x,hint) = (CmmMachOp (MO_S_Conv F32 F64) [x], hint)
3073         demote  x = CmmMachOp (MO_S_Conv F64 F32) [x]
3074
3075         target = CmmForeignCall (CmmLit lbl) CCallConv
3076         lbl = CmmLabel (mkForeignLabel fn Nothing False)
3077
3078         fn = case mop of
3079               MO_F32_Sqrt  -> FSLIT("sqrt")
3080               MO_F32_Sin   -> FSLIT("sin")
3081               MO_F32_Cos   -> FSLIT("cos")
3082               MO_F32_Tan   -> FSLIT("tan")
3083               MO_F32_Exp   -> FSLIT("exp")
3084               MO_F32_Log   -> FSLIT("log")
3085
3086               MO_F32_Asin  -> FSLIT("asin")
3087               MO_F32_Acos  -> FSLIT("acos")
3088               MO_F32_Atan  -> FSLIT("atan")
3089
3090               MO_F32_Sinh  -> FSLIT("sinh")
3091               MO_F32_Cosh  -> FSLIT("cosh")
3092               MO_F32_Tanh  -> FSLIT("tanh")
3093               MO_F32_Pwr   -> FSLIT("pow")
3094
3095               MO_F64_Sqrt  -> FSLIT("sqrt")
3096               MO_F64_Sin   -> FSLIT("sin")
3097               MO_F64_Cos   -> FSLIT("cos")
3098               MO_F64_Tan   -> FSLIT("tan")
3099               MO_F64_Exp   -> FSLIT("exp")
3100               MO_F64_Log   -> FSLIT("log")
3101
3102               MO_F64_Asin  -> FSLIT("asin")
3103               MO_F64_Acos  -> FSLIT("acos")
3104               MO_F64_Atan  -> FSLIT("atan")
3105
3106               MO_F64_Sinh  -> FSLIT("sinh")
3107               MO_F64_Cosh  -> FSLIT("cosh")
3108               MO_F64_Tanh  -> FSLIT("tanh")
3109               MO_F64_Pwr   -> FSLIT("pow")
3110
3111 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
3112
3113 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3114
3115 #if x86_64_TARGET_ARCH
3116
3117 genCCall (CmmPrim op) [(r,_)] args vols =
3118   outOfLineFloatOp op r args vols
3119
3120 genCCall target dest_regs args vols = do
3121
3122         -- load up the register arguments
3123     (stack_args, aregs, fregs, load_args_code)
3124          <- load_args args allArgRegs allFPArgRegs nilOL
3125
3126     let
3127         fp_regs_used  = reverse (drop (length fregs) (reverse allFPArgRegs))
3128         int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
3129         arg_regs = int_regs_used ++ fp_regs_used
3130                 -- for annotating the call instruction with
3131
3132         sse_regs = length fp_regs_used
3133
3134         tot_arg_size = arg_size * length stack_args
3135
3136         -- On entry to the called function, %rsp should be aligned
3137         -- on a 16-byte boundary +8 (i.e. the first stack arg after
3138         -- the return address is 16-byte aligned).  In STG land
3139         -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
3140         -- need to make sure we push a multiple of 16-bytes of args,
3141         -- plus the return address, to get the correct alignment.
3142         -- Urg, this is hard.  We need to feed the delta back into
3143         -- the arg pushing code.
3144     (real_size, adjust_rsp) <-
3145         if tot_arg_size `rem` 16 == 0
3146             then return (tot_arg_size, nilOL)
3147             else do -- we need to adjust...
3148                 delta <- getDeltaNat
3149                 setDeltaNat (delta-8)
3150                 return (tot_arg_size+8, toOL [
3151                                 SUB I64 (OpImm (ImmInt 8)) (OpReg rsp),
3152                                 DELTA (delta-8)
3153                         ])
3154
3155         -- push the stack args, right to left
3156     push_code <- push_args (reverse stack_args) nilOL
3157     delta <- getDeltaNat
3158
3159     -- deal with static vs dynamic call targets
3160     (callinsns,cconv) <-
3161       case target of
3162         -- CmmPrim -> ...
3163         CmmForeignCall (CmmLit (CmmLabel lbl)) conv
3164            -> -- ToDo: stdcall arg sizes
3165               return (unitOL (CALL (Left fn_imm) arg_regs), conv)
3166            where fn_imm = ImmCLbl lbl
3167         CmmForeignCall expr conv
3168            -> do (dyn_r, dyn_c) <- getSomeReg expr
3169                  return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
3170
3171     let
3172         -- The x86_64 ABI requires us to set %al to the number of SSE
3173         -- registers that contain arguments, if the called routine
3174         -- is a varargs function.  We don't know whether it's a
3175         -- varargs function or not, so we have to assume it is.
3176         --
3177         -- It's not safe to omit this assignment, even if the number
3178         -- of SSE regs in use is zero.  If %al is larger than 8
3179         -- on entry to a varargs function, seg faults ensue.
3180         assign_eax n = unitOL (MOV I32 (OpImm (ImmInt n)) (OpReg eax))
3181
3182     let call = callinsns `appOL`
3183                toOL (
3184                         -- Deallocate parameters after call for ccall;
3185                         -- but not for stdcall (callee does it)
3186                   (if cconv == StdCallConv || real_size==0 then [] else
3187                    [ADD wordRep (OpImm (ImmInt real_size)) (OpReg esp)])
3188                   ++
3189                   [DELTA (delta + real_size)]
3190                )
3191     -- in
3192     setDeltaNat (delta + real_size)
3193
3194     let
3195         -- assign the results, if necessary
3196         assign_code []     = nilOL
3197         assign_code [(dest,_hint)] =
3198           case rep of
3199                 F32 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3200                 F64 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3201                 rep -> unitOL (MOV rep (OpReg rax) (OpReg r_dest))
3202           where
3203                 rep = cmmRegRep dest
3204                 r_dest = getRegisterReg dest
3205         assign_code many = panic "genCCall.assign_code many"
3206
3207     return (load_args_code      `appOL`
3208             adjust_rsp          `appOL`
3209             push_code           `appOL`
3210             assign_eax sse_regs `appOL`
3211             call                `appOL`
3212             assign_code dest_regs)
3213
3214   where
3215     arg_size = 8 -- always, at the mo
3216
3217     load_args :: [(CmmExpr,MachHint)]
3218               -> [Reg]                  -- int regs avail for args
3219               -> [Reg]                  -- FP regs avail for args
3220               -> InstrBlock
3221               -> NatM ([(CmmExpr,MachHint)],[Reg],[Reg],InstrBlock)
3222     load_args args [] [] code     =  return (args, [], [], code)
3223         -- no more regs to use
3224     load_args [] aregs fregs code =  return ([], aregs, fregs, code)
3225         -- no more args to push
3226     load_args ((arg,hint) : rest) aregs fregs code
3227         | isFloatingRep arg_rep =
3228         case fregs of
3229           [] -> push_this_arg
3230           (r:rs) -> do
3231              arg_code <- getAnyReg arg
3232              load_args rest aregs rs (code `appOL` arg_code r)
3233         | otherwise =
3234         case aregs of
3235           [] -> push_this_arg
3236           (r:rs) -> do
3237              arg_code <- getAnyReg arg
3238              load_args rest rs fregs (code `appOL` arg_code r)
3239         where
3240           arg_rep = cmmExprRep arg
3241
3242           push_this_arg = do
3243             (args',ars,frs,code') <- load_args rest aregs fregs code
3244             return ((arg,hint):args', ars, frs, code')
3245
3246     push_args [] code = return code
3247     push_args ((arg,hint):rest) code
3248        | isFloatingRep arg_rep = do
3249          (arg_reg, arg_code) <- getSomeReg arg
3250          delta <- getDeltaNat
3251          setDeltaNat (delta-arg_size)
3252          let code' = code `appOL` toOL [
3253                         MOV arg_rep (OpReg arg_reg) (OpAddr  (spRel 0)),
3254                         SUB wordRep (OpImm (ImmInt arg_size)) (OpReg rsp) ,
3255                         DELTA (delta-arg_size)]
3256          push_args rest code'
3257
3258        | otherwise = do
3259        -- we only ever generate word-sized function arguments.  Promotion
3260        -- has already happened: our Int8# type is kept sign-extended
3261        -- in an Int#, for example.
3262          ASSERT(arg_rep == I64) return ()
3263          (arg_op, arg_code) <- getOperand arg
3264          delta <- getDeltaNat
3265          setDeltaNat (delta-arg_size)
3266          let code' = code `appOL` toOL [PUSH I64 arg_op,
3267                                         DELTA (delta-arg_size)]
3268          push_args rest code'
3269         where
3270           arg_rep = cmmExprRep arg
3271 #endif
3272
3273 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3274
3275 #if sparc_TARGET_ARCH
3276 {-
3277    The SPARC calling convention is an absolute
3278    nightmare.  The first 6x32 bits of arguments are mapped into
3279    %o0 through %o5, and the remaining arguments are dumped to the
3280    stack, beginning at [%sp+92].  (Note that %o6 == %sp.)
3281
3282    If we have to put args on the stack, move %o6==%sp down by
3283    the number of words to go on the stack, to ensure there's enough space.
3284
3285    According to Fraser and Hanson's lcc book, page 478, fig 17.2,
3286    16 words above the stack pointer is a word for the address of
3287    a structure return value.  I use this as a temporary location
3288    for moving values from float to int regs.  Certainly it isn't
3289    safe to put anything in the 16 words starting at %sp, since
3290    this area can get trashed at any time due to window overflows
3291    caused by signal handlers.
3292
3293    A final complication (if the above isn't enough) is that
3294    we can't blithely calculate the arguments one by one into
3295    %o0 .. %o5.  Consider the following nested calls:
3296
3297        fff a (fff b c)
3298
3299    Naive code moves a into %o0, and (fff b c) into %o1.  Unfortunately
3300    the inner call will itself use %o0, which trashes the value put there
3301    in preparation for the outer call.  Upshot: we need to calculate the
3302    args into temporary regs, and move those to arg regs or onto the
3303    stack only immediately prior to the call proper.  Sigh.
3304 -}
3305
3306 genCCall target dest_regs argsAndHints vols = do
3307     let
3308         args = map fst argsAndHints
3309     argcode_and_vregs <- mapM arg_to_int_vregs args
3310     let
3311         (argcodes, vregss) = unzip argcode_and_vregs
3312         n_argRegs          = length allArgRegs
3313         n_argRegs_used     = min (length vregs) n_argRegs
3314         vregs              = concat vregss
3315     -- deal with static vs dynamic call targets
3316     callinsns <- (case target of
3317         CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> do
3318                 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3319         CmmForeignCall expr conv -> do
3320                 (dyn_c, [dyn_r]) <- arg_to_int_vregs expr
3321                 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3322         CmmPrim mop -> do
3323                   (res, reduce) <- outOfLineFloatOp mop
3324                   lblOrMopExpr <- case res of
3325                        Left lbl -> do
3326                             return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3327                        Right mopExpr -> do
3328                             (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
3329                             return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3330                   if reduce then panic "genCCall(sparc): can not reduce" else return lblOrMopExpr
3331
3332       )
3333     let
3334         argcode = concatOL argcodes
3335         (move_sp_down, move_sp_up)
3336            = let diff = length vregs - n_argRegs
3337                  nn   = if odd diff then diff + 1 else diff -- keep 8-byte alignment
3338              in  if   nn <= 0
3339                  then (nilOL, nilOL)
3340                  else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
3341         transfer_code
3342            = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
3343     return (argcode       `appOL`
3344             move_sp_down  `appOL`
3345             transfer_code `appOL`
3346             callinsns     `appOL`
3347             unitOL NOP    `appOL`
3348             move_sp_up)
3349   where
3350      -- move args from the integer vregs into which they have been
3351      -- marshalled, into %o0 .. %o5, and the rest onto the stack.
3352      move_final :: [Reg] -> [Reg] -> Int -> [Instr]
3353
3354      move_final [] _ offset          -- all args done
3355         = []
3356
3357      move_final (v:vs) [] offset     -- out of aregs; move to stack
3358         = ST I32 v (spRel offset)
3359           : move_final vs [] (offset+1)
3360
3361      move_final (v:vs) (a:az) offset -- move into an arg (%o[0..5]) reg
3362         = OR False g0 (RIReg v) a
3363           : move_final vs az offset
3364
3365      -- generate code to calculate an argument, and move it into one
3366      -- or two integer vregs.
3367      arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
3368      arg_to_int_vregs arg
3369         | (cmmExprRep arg) == I64
3370         = do
3371           (ChildCode64 code r_lo) <- iselExpr64 arg
3372           let
3373               r_hi = getHiVRegFromLo r_lo
3374           return (code, [r_hi, r_lo])
3375         | otherwise
3376         = do
3377           (src, code) <- getSomeReg arg
3378           tmp <- getNewRegNat (cmmExprRep arg)
3379           let
3380               pk   = cmmExprRep arg
3381           case pk of
3382              F64 -> do
3383                       v1 <- getNewRegNat I32
3384                       v2 <- getNewRegNat I32
3385                       return (
3386                         code                          `snocOL`
3387                         FMOV F64 src f0                `snocOL`
3388                         ST   F32  f0 (spRel 16)         `snocOL`
3389                         LD   I32  (spRel 16) v1         `snocOL`
3390                         ST   F32  (fPair f0) (spRel 16) `snocOL`
3391                         LD   I32  (spRel 16) v2
3392                         ,
3393                         [v1,v2]
3394                        )
3395              F32 -> do
3396                       v1 <- getNewRegNat I32
3397                       return (
3398                         code                    `snocOL`
3399                         ST   F32  src (spRel 16)  `snocOL`
3400                         LD   I32  (spRel 16) v1
3401                         ,
3402                         [v1]
3403                        )
3404              other -> do
3405                         v1 <- getNewRegNat I32
3406                         return (
3407                           code `snocOL` OR False g0 (RIReg src) v1
3408                           ,
3409                           [v1]
3410                          )
3411 outOfLineFloatOp mop =
3412     do
3413       mopExpr <- cmmMakeDynamicReference addImportNat True $
3414                   mkForeignLabel functionName Nothing True
3415       let mopLabelOrExpr = case mopExpr of
3416                         CmmLit (CmmLabel lbl) -> Left lbl
3417                         _ -> Right mopExpr
3418       return (mopLabelOrExpr, reduce)
3419             where
3420                 (reduce, functionName) = case mop of
3421                   MO_F32_Exp    -> (True,  FSLIT("exp"))
3422                   MO_F32_Log    -> (True,  FSLIT("log"))
3423                   MO_F32_Sqrt   -> (True,  FSLIT("sqrt"))
3424
3425                   MO_F32_Sin    -> (True,  FSLIT("sin"))
3426                   MO_F32_Cos    -> (True,  FSLIT("cos"))
3427                   MO_F32_Tan    -> (True,  FSLIT("tan"))
3428
3429                   MO_F32_Asin   -> (True,  FSLIT("asin"))
3430                   MO_F32_Acos   -> (True,  FSLIT("acos"))
3431                   MO_F32_Atan   -> (True,  FSLIT("atan"))
3432
3433                   MO_F32_Sinh   -> (True,  FSLIT("sinh"))
3434                   MO_F32_Cosh   -> (True,  FSLIT("cosh"))
3435                   MO_F32_Tanh   -> (True,  FSLIT("tanh"))
3436
3437                   MO_F64_Exp    -> (False, FSLIT("exp"))
3438                   MO_F64_Log    -> (False, FSLIT("log"))
3439                   MO_F64_Sqrt   -> (False, FSLIT("sqrt"))
3440
3441                   MO_F64_Sin    -> (False, FSLIT("sin"))
3442                   MO_F64_Cos    -> (False, FSLIT("cos"))
3443                   MO_F64_Tan    -> (False, FSLIT("tan"))
3444
3445                   MO_F64_Asin   -> (False, FSLIT("asin"))
3446                   MO_F64_Acos   -> (False, FSLIT("acos"))
3447                   MO_F64_Atan   -> (False, FSLIT("atan"))
3448
3449                   MO_F64_Sinh   -> (False, FSLIT("sinh"))
3450                   MO_F64_Cosh   -> (False, FSLIT("cosh"))
3451                   MO_F64_Tanh   -> (False, FSLIT("tanh"))
3452
3453                   other -> pprPanic "outOfLineFloatOp(sparc) "
3454                                 (pprCallishMachOp mop)
3455
3456 #endif /* sparc_TARGET_ARCH */
3457
3458 #if powerpc_TARGET_ARCH
3459
3460 #if darwin_TARGET_OS || linux_TARGET_OS
3461 {-
3462     The PowerPC calling convention for Darwin/Mac OS X
3463     is described in Apple's document
3464     "Inside Mac OS X - Mach-O Runtime Architecture".
3465
3466     PowerPC Linux uses the System V Release 4 Calling Convention
3467     for PowerPC. It is described in the
3468     "System V Application Binary Interface PowerPC Processor Supplement".
3469
3470     Both conventions are similar:
3471     Parameters may be passed in general-purpose registers starting at r3, in
3472     floating point registers starting at f1, or on the stack.
3473
3474     But there are substantial differences:
3475     * The number of registers used for parameter passing and the exact set of
3476       nonvolatile registers differs (see MachRegs.lhs).
3477     * On Darwin, stack space is always reserved for parameters, even if they are
3478       passed in registers. The called routine may choose to save parameters from
3479       registers to the corresponding space on the stack.
3480     * On Darwin, a corresponding amount of GPRs is skipped when a floating point
3481       parameter is passed in an FPR.
3482     * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
3483       starting with an odd-numbered GPR. It may skip a GPR to achieve this.
3484       Darwin just treats an I64 like two separate I32s (high word first).
3485     * I64 and F64 arguments are 8-byte aligned on the stack for SysV, but only
3486       4-byte aligned like everything else on Darwin.
3487     * The SysV spec claims that F32 is represented as F64 on the stack. GCC on
3488       PowerPC Linux does not agree, so neither do we.
3489
3490     According to both conventions, The parameter area should be part of the
3491     caller's stack frame, allocated in the caller's prologue code (large enough
3492     to hold the parameter lists for all called routines). The NCG already
3493     uses the stack for register spilling, leaving 64 bytes free at the top.
3494     If we need a larger parameter area than that, we just allocate a new stack
3495     frame just before ccalling.
3496 -}
3497
3498 genCCall target dest_regs argsAndHints vols
3499   = ASSERT (not $ any (`elem` [I8,I16]) argReps)
3500         -- we rely on argument promotion in the codeGen
3501     do
3502         (finalStack,passArgumentsCode,usedRegs) <- passArguments
3503                                                         (zip args argReps)
3504                                                         allArgRegs allFPArgRegs
3505                                                         initialStackOffset
3506                                                         (toOL []) []
3507
3508         (labelOrExpr, reduceToF32) <- case target of
3509             CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
3510             CmmForeignCall expr conv -> return  (Right expr, False)
3511             CmmPrim mop -> outOfLineFloatOp mop
3512
3513         let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
3514             codeAfter = move_sp_up finalStack `appOL` moveResult reduceToF32
3515
3516         case labelOrExpr of
3517             Left lbl -> do
3518                 return (         codeBefore
3519                         `snocOL` BL lbl usedRegs
3520                         `appOL`  codeAfter)
3521             Right dyn -> do
3522                 (dynReg, dynCode) <- getSomeReg dyn
3523                 return (         dynCode
3524                         `snocOL` MTCTR dynReg
3525                         `appOL`  codeBefore
3526                         `snocOL` BCTRL usedRegs
3527                         `appOL`  codeAfter)
3528     where
3529 #if darwin_TARGET_OS
3530         initialStackOffset = 24
3531             -- size of linkage area + size of arguments, in bytes
3532         stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
3533                                        map machRepByteWidth argReps
3534 #elif linux_TARGET_OS
3535         initialStackOffset = 8
3536         stackDelta finalStack = roundTo 16 finalStack
3537 #endif
3538         args = map fst argsAndHints
3539         argReps = map cmmExprRep args
3540
3541         roundTo a x | x `mod` a == 0 = x
3542                     | otherwise = x + a - (x `mod` a)
3543
3544         move_sp_down finalStack
3545                | delta > 64 =
3546                         toOL [STU I32 sp (AddrRegImm sp (ImmInt (-delta))),
3547                               DELTA (-delta)]
3548                | otherwise = nilOL
3549                where delta = stackDelta finalStack
3550         move_sp_up finalStack
3551                | delta > 64 =
3552                         toOL [ADD sp sp (RIImm (ImmInt delta)),
3553                               DELTA 0]
3554                | otherwise = nilOL
3555                where delta = stackDelta finalStack
3556
3557
3558         passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
3559         passArguments ((arg,I64):args) gprs fprs stackOffset
3560                accumCode accumUsed =
3561             do
3562                 ChildCode64 code vr_lo <- iselExpr64 arg
3563                 let vr_hi = getHiVRegFromLo vr_lo
3564
3565 #if darwin_TARGET_OS
3566                 passArguments args
3567                               (drop 2 gprs)
3568                               fprs
3569                               (stackOffset+8)
3570                               (accumCode `appOL` code
3571                                     `snocOL` storeWord vr_hi gprs stackOffset
3572                                     `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
3573                               ((take 2 gprs) ++ accumUsed)
3574             where
3575                 storeWord vr (gpr:_) offset = MR gpr vr
3576                 storeWord vr [] offset = ST I32 vr (AddrRegImm sp (ImmInt offset))
3577
3578 #elif linux_TARGET_OS
3579                 let stackOffset' = roundTo 8 stackOffset
3580                     stackCode = accumCode `appOL` code
3581                         `snocOL` ST I32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
3582                         `snocOL` ST I32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
3583                     regCode hireg loreg =
3584                         accumCode `appOL` code
3585                             `snocOL` MR hireg vr_hi
3586                             `snocOL` MR loreg vr_lo
3587
3588                 case gprs of
3589                     hireg : loreg : regs | even (length gprs) ->
3590                         passArguments args regs fprs stackOffset
3591                                       (regCode hireg loreg) (hireg : loreg : accumUsed)
3592                     _skipped : hireg : loreg : regs ->
3593                         passArguments args regs fprs stackOffset
3594                                       (regCode hireg loreg) (hireg : loreg : accumUsed)
3595                     _ -> -- only one or no regs left
3596                         passArguments args [] fprs (stackOffset'+8)
3597                                       stackCode accumUsed
3598 #endif
3599
3600         passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
3601             | reg : _ <- regs = do
3602                 register <- getRegister arg
3603                 let code = case register of
3604                             Fixed _ freg fcode -> fcode `snocOL` MR reg freg
3605                             Any _ acode -> acode reg
3606                 passArguments args
3607                               (drop nGprs gprs)
3608                               (drop nFprs fprs)
3609 #if darwin_TARGET_OS
3610         -- The Darwin ABI requires that we reserve stack slots for register parameters
3611                               (stackOffset + stackBytes)
3612 #elif linux_TARGET_OS
3613         -- ... the SysV ABI doesn't.
3614                               stackOffset
3615 #endif
3616                               (accumCode `appOL` code)
3617                               (reg : accumUsed)
3618             | otherwise = do
3619                 (vr, code) <- getSomeReg arg
3620                 passArguments args
3621                               (drop nGprs gprs)
3622                               (drop nFprs fprs)
3623                               (stackOffset' + stackBytes)
3624                               (accumCode `appOL` code `snocOL` ST rep vr stackSlot)
3625                               accumUsed
3626             where
3627 #if darwin_TARGET_OS
3628         -- stackOffset is at least 4-byte aligned
3629         -- The Darwin ABI is happy with that.
3630                 stackOffset' = stackOffset
3631 #else
3632         -- ... the SysV ABI requires 8-byte alignment for doubles.
3633                 stackOffset' | rep == F64 = roundTo 8 stackOffset
3634                              | otherwise  =           stackOffset
3635 #endif
3636                 stackSlot = AddrRegImm sp (ImmInt stackOffset')
3637                 (nGprs, nFprs, stackBytes, regs) = case rep of
3638                     I32 -> (1, 0, 4, gprs)
3639 #if darwin_TARGET_OS
3640         -- The Darwin ABI requires that we skip a corresponding number of GPRs when
3641         -- we use the FPRs.
3642                     F32 -> (1, 1, 4, fprs)
3643                     F64 -> (2, 1, 8, fprs)
3644 #elif linux_TARGET_OS
3645         -- ... the SysV ABI doesn't.
3646                     F32 -> (0, 1, 4, fprs)
3647                     F64 -> (0, 1, 8, fprs)
3648 #endif
3649
3650         moveResult reduceToF32 =
3651             case dest_regs of
3652                 [] -> nilOL
3653                 [(dest, _hint)]
3654                     | reduceToF32 && rep == F32 -> unitOL (FRSP r_dest f1)
3655                     | rep == F32 || rep == F64 -> unitOL (MR r_dest f1)
3656                     | rep == I64 -> toOL [MR (getHiVRegFromLo r_dest) r3,
3657                                           MR r_dest r4]
3658                     | otherwise -> unitOL (MR r_dest r3)
3659                     where rep = cmmRegRep dest
3660                           r_dest = getRegisterReg dest
3661
3662         outOfLineFloatOp mop =
3663             do
3664                 mopExpr <- cmmMakeDynamicReference addImportNat True $
3665                               mkForeignLabel functionName Nothing True
3666                 let mopLabelOrExpr = case mopExpr of
3667                         CmmLit (CmmLabel lbl) -> Left lbl
3668                         _ -> Right mopExpr
3669                 return (mopLabelOrExpr, reduce)
3670             where
3671                 (functionName, reduce) = case mop of
3672                     MO_F32_Exp   -> (FSLIT("exp"), True)
3673                     MO_F32_Log   -> (FSLIT("log"), True)
3674                     MO_F32_Sqrt  -> (FSLIT("sqrt"), True)
3675
3676                     MO_F32_Sin   -> (FSLIT("sin"), True)
3677                     MO_F32_Cos   -> (FSLIT("cos"), True)
3678                     MO_F32_Tan   -> (FSLIT("tan"), True)
3679
3680                     MO_F32_Asin  -> (FSLIT("asin"), True)
3681                     MO_F32_Acos  -> (FSLIT("acos"), True)
3682                     MO_F32_Atan  -> (FSLIT("atan"), True)
3683
3684                     MO_F32_Sinh  -> (FSLIT("sinh"), True)
3685                     MO_F32_Cosh  -> (FSLIT("cosh"), True)
3686                     MO_F32_Tanh  -> (FSLIT("tanh"), True)
3687                     MO_F32_Pwr   -> (FSLIT("pow"), True)
3688
3689                     MO_F64_Exp   -> (FSLIT("exp"), False)
3690                     MO_F64_Log   -> (FSLIT("log"), False)
3691                     MO_F64_Sqrt  -> (FSLIT("sqrt"), False)
3692
3693                     MO_F64_Sin   -> (FSLIT("sin"), False)
3694                     MO_F64_Cos   -> (FSLIT("cos"), False)
3695                     MO_F64_Tan   -> (FSLIT("tan"), False)
3696
3697                     MO_F64_Asin  -> (FSLIT("asin"), False)
3698                     MO_F64_Acos  -> (FSLIT("acos"), False)
3699                     MO_F64_Atan  -> (FSLIT("atan"), False)
3700
3701                     MO_F64_Sinh  -> (FSLIT("sinh"), False)
3702                     MO_F64_Cosh  -> (FSLIT("cosh"), False)
3703                     MO_F64_Tanh  -> (FSLIT("tanh"), False)
3704                     MO_F64_Pwr   -> (FSLIT("pow"), False)
3705                     other -> pprPanic "genCCall(ppc): unknown callish op"
3706                                     (pprCallishMachOp other)
3707
3708 #endif /* darwin_TARGET_OS || linux_TARGET_OS */
3709
3710 #endif /* powerpc_TARGET_ARCH */
3711
3712
3713 -- -----------------------------------------------------------------------------
3714 -- Generating a table-branch
3715
3716 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
3717
3718 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3719 genSwitch expr ids = do
3720   (reg,e_code) <- getSomeReg expr
3721   lbl <- getNewLabelNat
3722   let
3723         jumpTable = map jumpTableEntry ids
3724         op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
3725         code = e_code `appOL` toOL [
3726                 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3727                 JMP_TBL op [ id | Just id <- ids ]
3728              ]
3729   -- in
3730   return code
3731 #elif powerpc_TARGET_ARCH
3732 genSwitch expr ids
3733   | opt_PIC
3734   = do
3735         (reg,e_code) <- getSomeReg expr
3736         tmp <- getNewRegNat I32
3737         lbl <- getNewLabelNat
3738         dynRef <- cmmMakeDynamicReference addImportNat False lbl
3739         (tableReg,t_code) <- getSomeReg $ dynRef
3740         let
3741             jumpTable = map jumpTableEntryRel ids
3742
3743             jumpTableEntryRel Nothing
3744                 = CmmStaticLit (CmmInt 0 wordRep)
3745             jumpTableEntryRel (Just (BlockId id))
3746                 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3747                 where blockLabel = mkAsmTempLabel id
3748
3749             code = e_code `appOL` t_code `appOL` toOL [
3750                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3751                             SLW tmp reg (RIImm (ImmInt 2)),
3752                             LD I32 tmp (AddrRegReg tableReg tmp),
3753                             ADD tmp tmp (RIReg tableReg),
3754                             MTCTR tmp,
3755                             BCTR [ id | Just id <- ids ]
3756                     ]
3757         return code
3758   | otherwise
3759   = do
3760         (reg,e_code) <- getSomeReg expr
3761         tmp <- getNewRegNat I32
3762         lbl <- getNewLabelNat
3763         let
3764             jumpTable = map jumpTableEntry ids
3765
3766             code = e_code `appOL` toOL [
3767                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3768                             SLW tmp reg (RIImm (ImmInt 2)),
3769                             ADDIS tmp tmp (HA (ImmCLbl lbl)),
3770                             LD I32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
3771                             MTCTR tmp,
3772                             BCTR [ id | Just id <- ids ]
3773                     ]
3774         return code
3775 #else
3776 genSwitch expr ids = panic "ToDo: genSwitch"
3777 #endif
3778
3779 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordRep)
3780 jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
3781     where blockLabel = mkAsmTempLabel id
3782
3783 -- -----------------------------------------------------------------------------
3784 -- Support bits
3785 -- -----------------------------------------------------------------------------
3786
3787
3788 -- -----------------------------------------------------------------------------
3789 -- 'condIntReg' and 'condFltReg': condition codes into registers
3790
3791 -- Turn those condition codes into integers now (when they appear on
3792 -- the right hand side of an assignment).
3793 --
3794 -- (If applicable) Do not fill the delay slots here; you will confuse the
3795 -- register allocator.
3796
3797 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
3798
3799 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3800
3801 #if alpha_TARGET_ARCH
3802 condIntReg = panic "MachCode.condIntReg (not on Alpha)"
3803 condFltReg = panic "MachCode.condFltReg (not on Alpha)"
3804 #endif /* alpha_TARGET_ARCH */
3805
3806 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3807
3808 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3809
3810 condIntReg cond x y = do
3811   CondCode _ cond cond_code <- condIntCode cond x y
3812   tmp <- getNewRegNat I8
3813   let
3814         code dst = cond_code `appOL` toOL [
3815                     SETCC cond (OpReg tmp),
3816                     MOVZxL I8 (OpReg tmp) (OpReg dst)
3817                   ]
3818   -- in
3819   return (Any I32 code)
3820
3821 #endif
3822
3823 #if i386_TARGET_ARCH
3824
3825 condFltReg cond x y = do
3826   CondCode _ cond cond_code <- condFltCode cond x y
3827   tmp <- getNewRegNat I8
3828   let
3829         code dst = cond_code `appOL` toOL [
3830                     SETCC cond (OpReg tmp),
3831                     MOVZxL I8 (OpReg tmp) (OpReg dst)
3832                   ]
3833   -- in
3834   return (Any I32 code)
3835
3836 #endif
3837
3838 #if x86_64_TARGET_ARCH
3839
3840 condFltReg cond x y = do
3841   CondCode _ cond cond_code <- condFltCode cond x y
3842   tmp1 <- getNewRegNat wordRep
3843   tmp2 <- getNewRegNat wordRep
3844   let
3845         -- We have to worry about unordered operands (eg. comparisons
3846         -- against NaN).  If the operands are unordered, the comparison
3847         -- sets the parity flag, carry flag and zero flag.
3848         -- All comparisons are supposed to return false for unordered
3849         -- operands except for !=, which returns true.
3850         --
3851         -- Optimisation: we don't have to test the parity flag if we
3852         -- know the test has already excluded the unordered case: eg >
3853         -- and >= test for a zero carry flag, which can only occur for
3854         -- ordered operands.
3855         --
3856         -- ToDo: by reversing comparisons we could avoid testing the
3857         -- parity flag in more cases.
3858
3859         code dst =
3860            cond_code `appOL`
3861              (case cond of
3862                 NE  -> or_unordered dst
3863                 GU  -> plain_test   dst
3864                 GEU -> plain_test   dst
3865                 _   -> and_ordered  dst)
3866
3867         plain_test dst = toOL [
3868                     SETCC cond (OpReg tmp1),
3869                     MOVZxL I8 (OpReg tmp1) (OpReg dst)
3870                  ]
3871         or_unordered dst = toOL [
3872                     SETCC cond (OpReg tmp1),
3873                     SETCC PARITY (OpReg tmp2),
3874                     OR I8 (OpReg tmp1) (OpReg tmp2),
3875                     MOVZxL I8 (OpReg tmp2) (OpReg dst)
3876                   ]
3877         and_ordered dst = toOL [
3878                     SETCC cond (OpReg tmp1),
3879                     SETCC NOTPARITY (OpReg tmp2),
3880                     AND I8 (OpReg tmp1) (OpReg tmp2),
3881                     MOVZxL I8 (OpReg tmp2) (OpReg dst)
3882                   ]
3883   -- in
3884   return (Any I32 code)
3885
3886 #endif
3887
3888 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3889
3890 #if sparc_TARGET_ARCH
3891
3892 condIntReg EQQ x (CmmLit (CmmInt 0 d)) = do
3893     (src, code) <- getSomeReg x
3894     tmp <- getNewRegNat I32
3895     let
3896         code__2 dst = code `appOL` toOL [
3897             SUB False True g0 (RIReg src) g0,
3898             SUB True False g0 (RIImm (ImmInt (-1))) dst]
3899     return (Any I32 code__2)
3900
3901 condIntReg EQQ x y = do
3902     (src1, code1) <- getSomeReg x
3903     (src2, code2) <- getSomeReg y
3904     tmp1 <- getNewRegNat I32
3905     tmp2 <- getNewRegNat I32
3906     let
3907         code__2 dst = code1 `appOL` code2 `appOL` toOL [
3908             XOR False src1 (RIReg src2) dst,
3909             SUB False True g0 (RIReg dst) g0,
3910             SUB True False g0 (RIImm (ImmInt (-1))) dst]
3911     return (Any I32 code__2)
3912
3913 condIntReg NE x (CmmLit (CmmInt 0 d)) = do
3914     (src, code) <- getSomeReg x
3915     tmp <- getNewRegNat I32
3916     let
3917         code__2 dst = code `appOL` toOL [
3918             SUB False True g0 (RIReg src) g0,
3919             ADD True False g0 (RIImm (ImmInt 0)) dst]
3920     return (Any I32 code__2)
3921
3922 condIntReg NE x y = do
3923     (src1, code1) <- getSomeReg x
3924     (src2, code2) <- getSomeReg y
3925     tmp1 <- getNewRegNat I32
3926     tmp2 <- getNewRegNat I32
3927     let
3928         code__2 dst = code1 `appOL` code2 `appOL` toOL [
3929             XOR False src1 (RIReg src2) dst,
3930             SUB False True g0 (RIReg dst) g0,
3931             ADD True False g0 (RIImm (ImmInt 0)) dst]
3932     return (Any I32 code__2)
3933
3934 condIntReg cond x y = do
3935     BlockId lbl1 <- getBlockIdNat
3936     BlockId lbl2 <- getBlockIdNat
3937     CondCode _ cond cond_code <- condIntCode cond x y
3938     let
3939         code__2 dst = cond_code `appOL` toOL [
3940             BI cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
3941             OR False g0 (RIImm (ImmInt 0)) dst,
3942             BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
3943             NEWBLOCK (BlockId lbl1),
3944             OR False g0 (RIImm (ImmInt 1)) dst,
3945             NEWBLOCK (BlockId lbl2)]
3946     return (Any I32 code__2)
3947
3948 condFltReg cond x y = do
3949     BlockId lbl1 <- getBlockIdNat
3950     BlockId lbl2 <- getBlockIdNat
3951     CondCode _ cond cond_code <- condFltCode cond x y
3952     let
3953         code__2 dst = cond_code `appOL` toOL [
3954             NOP,
3955             BF cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
3956             OR False g0 (RIImm (ImmInt 0)) dst,
3957             BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
3958             NEWBLOCK (BlockId lbl1),
3959             OR False g0 (RIImm (ImmInt 1)) dst,
3960             NEWBLOCK (BlockId lbl2)]
3961     return (Any I32 code__2)
3962
3963 #endif /* sparc_TARGET_ARCH */
3964
3965 #if powerpc_TARGET_ARCH
3966 condReg getCond = do
3967     lbl1 <- getBlockIdNat
3968     lbl2 <- getBlockIdNat
3969     CondCode _ cond cond_code <- getCond
3970     let
3971 {-        code dst = cond_code `appOL` toOL [
3972                 BCC cond lbl1,
3973                 LI dst (ImmInt 0),
3974                 BCC ALWAYS lbl2,
3975                 NEWBLOCK lbl1,
3976                 LI dst (ImmInt 1),
3977                 BCC ALWAYS lbl2,
3978                 NEWBLOCK lbl2
3979             ]-}
3980         code dst = cond_code
3981             `appOL` negate_code
3982             `appOL` toOL [
3983                 MFCR dst,
3984                 RLWINM dst dst (bit + 1) 31 31
3985             ]
3986
3987         negate_code | do_negate = unitOL (CRNOR bit bit bit)
3988                     | otherwise = nilOL
3989
3990         (bit, do_negate) = case cond of
3991             LTT -> (0, False)
3992             LE  -> (1, True)
3993             EQQ -> (2, False)
3994             GE  -> (0, True)
3995             GTT -> (1, False)
3996
3997             NE  -> (2, True)
3998
3999             LU  -> (0, False)
4000             LEU -> (1, True)
4001             GEU -> (0, True)
4002             GU  -> (1, False)
4003
4004     return (Any I32 code)
4005
4006 condIntReg cond x y = condReg (condIntCode cond x y)
4007 condFltReg cond x y = condReg (condFltCode cond x y)
4008 #endif /* powerpc_TARGET_ARCH */
4009
4010
4011 -- -----------------------------------------------------------------------------
4012 -- 'trivial*Code': deal with trivial instructions
4013
4014 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
4015 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
4016 -- Only look for constants on the right hand side, because that's
4017 -- where the generic optimizer will have put them.
4018
4019 -- Similarly, for unary instructions, we don't have to worry about
4020 -- matching an StInt as the argument, because genericOpt will already
4021 -- have handled the constant-folding.
4022
4023 trivialCode
4024     :: MachRep
4025     -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
4026       ,IF_ARCH_i386 ((Operand -> Operand -> Instr)
4027                      -> Maybe (Operand -> Operand -> Instr)
4028       ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr)
4029                      -> Maybe (Operand -> Operand -> Instr)
4030       ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
4031       ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
4032       ,)))))
4033     -> CmmExpr -> CmmExpr -- the two arguments
4034     -> NatM Register
4035
4036 #ifndef powerpc_TARGET_ARCH
4037 trivialFCode
4038     :: MachRep
4039     -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
4040       ,IF_ARCH_sparc((MachRep -> Reg -> Reg -> Reg -> Instr)
4041       ,IF_ARCH_i386 ((MachRep -> Reg -> Reg -> Reg -> Instr)
4042       ,IF_ARCH_x86_64 ((MachRep -> Operand -> Operand -> Instr)
4043       ,))))
4044     -> CmmExpr -> CmmExpr -- the two arguments
4045     -> NatM Register
4046 #endif
4047
4048 trivialUCode
4049     :: MachRep
4050     -> IF_ARCH_alpha((RI -> Reg -> Instr)
4051       ,IF_ARCH_i386 ((Operand -> Instr)
4052       ,IF_ARCH_x86_64 ((Operand -> Instr)
4053       ,IF_ARCH_sparc((RI -> Reg -> Instr)
4054       ,IF_ARCH_powerpc((Reg -> Reg -> Instr)
4055       ,)))))
4056     -> CmmExpr  -- the one argument
4057     -> NatM Register
4058
4059 #ifndef powerpc_TARGET_ARCH
4060 trivialUFCode
4061     :: MachRep
4062     -> IF_ARCH_alpha((Reg -> Reg -> Instr)
4063       ,IF_ARCH_i386 ((Reg -> Reg -> Instr)
4064       ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr)
4065       ,IF_ARCH_sparc((Reg -> Reg -> Instr)
4066       ,))))
4067     -> CmmExpr -- the one argument
4068     -> NatM Register
4069 #endif
4070
4071 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4072
4073 #if alpha_TARGET_ARCH
4074
4075 trivialCode instr x (StInt y)
4076   | fits8Bits y
4077   = getRegister x               `thenNat` \ register ->
4078     getNewRegNat IntRep         `thenNat` \ tmp ->
4079     let
4080         code = registerCode register tmp
4081         src1 = registerName register tmp
4082         src2 = ImmInt (fromInteger y)
4083         code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst)
4084     in
4085     return (Any IntRep code__2)
4086
4087 trivialCode instr x y
4088   = getRegister x               `thenNat` \ register1 ->
4089     getRegister y               `thenNat` \ register2 ->
4090     getNewRegNat IntRep         `thenNat` \ tmp1 ->
4091     getNewRegNat IntRep         `thenNat` \ tmp2 ->
4092     let
4093         code1 = registerCode register1 tmp1 []
4094         src1  = registerName register1 tmp1
4095         code2 = registerCode register2 tmp2 []
4096         src2  = registerName register2 tmp2
4097         code__2 dst = asmSeqThen [code1, code2] .
4098                      mkSeqInstr (instr src1 (RIReg src2) dst)
4099     in
4100     return (Any IntRep code__2)
4101
4102 ------------
4103 trivialUCode instr x
4104   = getRegister x               `thenNat` \ register ->
4105     getNewRegNat IntRep         `thenNat` \ tmp ->
4106     let
4107         code = registerCode register tmp
4108         src  = registerName register tmp
4109         code__2 dst = code . mkSeqInstr (instr (RIReg src) dst)
4110     in
4111     return (Any IntRep code__2)
4112
4113 ------------
4114 trivialFCode _ instr x y
4115   = getRegister x               `thenNat` \ register1 ->
4116     getRegister y               `thenNat` \ register2 ->
4117     getNewRegNat F64    `thenNat` \ tmp1 ->
4118     getNewRegNat F64    `thenNat` \ tmp2 ->
4119     let
4120         code1 = registerCode register1 tmp1
4121         src1  = registerName register1 tmp1
4122
4123         code2 = registerCode register2 tmp2
4124         src2  = registerName register2 tmp2
4125
4126         code__2 dst = asmSeqThen [code1 [], code2 []] .
4127                       mkSeqInstr (instr src1 src2 dst)
4128     in
4129     return (Any F64 code__2)
4130
4131 trivialUFCode _ instr x
4132   = getRegister x               `thenNat` \ register ->
4133     getNewRegNat F64    `thenNat` \ tmp ->
4134     let
4135         code = registerCode register tmp
4136         src  = registerName register tmp
4137         code__2 dst = code . mkSeqInstr (instr src dst)
4138     in
4139     return (Any F64 code__2)
4140
4141 #endif /* alpha_TARGET_ARCH */
4142
4143 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4144
4145 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4146
4147 {-
4148 The Rules of the Game are:
4149
4150 * You cannot assume anything about the destination register dst;
4151   it may be anything, including a fixed reg.
4152
4153 * You may compute an operand into a fixed reg, but you may not
4154   subsequently change the contents of that fixed reg.  If you
4155   want to do so, first copy the value either to a temporary
4156   or into dst.  You are free to modify dst even if it happens
4157   to be a fixed reg -- that's not your problem.
4158
4159 * You cannot assume that a fixed reg will stay live over an
4160   arbitrary computation.  The same applies to the dst reg.
4161
4162 * Temporary regs obtained from getNewRegNat are distinct from
4163   each other and from all other regs, and stay live over
4164   arbitrary computations.
4165
4166 --------------------
4167
4168 SDM's version of The Rules:
4169
4170 * If getRegister returns Any, that means it can generate correct
4171   code which places the result in any register, period.  Even if that
4172   register happens to be read during the computation.
4173
4174   Corollary #1: this means that if you are generating code for an
4175   operation with two arbitrary operands, you cannot assign the result
4176   of the first operand into the destination register before computing
4177   the second operand.  The second operand might require the old value
4178   of the destination register.
4179
4180   Corollary #2: A function might be able to generate more efficient
4181   code if it knows the destination register is a new temporary (and
4182   therefore not read by any of the sub-computations).
4183
4184 * If getRegister returns Any, then the code it generates may modify only:
4185         (a) fresh temporaries
4186         (b) the destination register
4187         (c) known registers (eg. %ecx is used by shifts)
4188   In particular, it may *not* modify global registers, unless the global
4189   register happens to be the destination register.
4190 -}
4191
4192 trivialCode rep instr (Just revinstr) (CmmLit lit_a) b
4193   | not (is64BitLit lit_a) = do
4194   b_code <- getAnyReg b
4195   let
4196        code dst
4197          = b_code dst `snocOL`
4198            revinstr (OpImm (litToImm lit_a)) (OpReg dst)
4199   -- in
4200   return (Any rep code)
4201
4202 trivialCode rep instr maybe_revinstr a b = genTrivialCode rep instr a b
4203
4204 -- This is re-used for floating pt instructions too.
4205 genTrivialCode rep instr a b = do
4206   (b_op, b_code) <- getNonClobberedOperand b
4207   a_code <- getAnyReg a
4208   tmp <- getNewRegNat rep
4209   let
4210      -- We want the value of b to stay alive across the computation of a.
4211      -- But, we want to calculate a straight into the destination register,
4212      -- because the instruction only has two operands (dst := dst `op` src).
4213      -- The troublesome case is when the result of b is in the same register
4214      -- as the destination reg.  In this case, we have to save b in a
4215      -- new temporary across the computation of a.
4216      code dst
4217         | dst `regClashesWithOp` b_op =
4218                 b_code `appOL`
4219                 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
4220                 a_code dst `snocOL`
4221                 instr (OpReg tmp) (OpReg dst)
4222         | otherwise =
4223                 b_code `appOL`
4224                 a_code dst `snocOL`
4225                 instr b_op (OpReg dst)
4226   -- in
4227   return (Any rep code)
4228
4229 reg `regClashesWithOp` OpReg reg2   = reg == reg2
4230 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
4231 reg `regClashesWithOp` _            = False
4232
4233 -----------
4234
4235 trivialUCode rep instr x = do
4236   x_code <- getAnyReg x
4237   let
4238      code dst =
4239         x_code dst `snocOL`
4240         instr (OpReg dst)
4241   -- in
4242   return (Any rep code)
4243
4244 -----------
4245
4246 #if i386_TARGET_ARCH
4247
4248 trivialFCode pk instr x y = do
4249   (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
4250   (y_reg, y_code) <- getSomeReg y
4251   let
4252      code dst =
4253         x_code `appOL`
4254         y_code `snocOL`
4255         instr pk x_reg y_reg dst
4256   -- in
4257   return (Any pk code)
4258
4259 #endif
4260
4261 #if x86_64_TARGET_ARCH
4262
4263 trivialFCode pk instr x y = genTrivialCode  pk (instr pk) x y
4264
4265 #endif
4266
4267 -------------
4268
4269 trivialUFCode rep instr x = do
4270   (x_reg, x_code) <- getSomeReg x
4271   let
4272      code dst =
4273         x_code `snocOL`
4274         instr x_reg dst
4275   -- in
4276   return (Any rep code)
4277
4278 #endif /* i386_TARGET_ARCH */
4279
4280 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4281
4282 #if sparc_TARGET_ARCH
4283
4284 trivialCode pk instr x (CmmLit (CmmInt y d))
4285   | fits13Bits y
4286   = do
4287       (src1, code) <- getSomeReg x
4288       tmp <- getNewRegNat I32
4289       let
4290         src2 = ImmInt (fromInteger y)
4291         code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
4292       return (Any I32 code__2)
4293
4294 trivialCode pk instr x y = do
4295     (src1, code1) <- getSomeReg x
4296     (src2, code2) <- getSomeReg y
4297     tmp1 <- getNewRegNat I32
4298     tmp2 <- getNewRegNat I32
4299     let
4300         code__2 dst = code1 `appOL` code2 `snocOL`
4301                       instr src1 (RIReg src2) dst
4302     return (Any I32 code__2)
4303
4304 ------------
4305 trivialFCode pk instr x y = do
4306     (src1, code1) <- getSomeReg x
4307     (src2, code2) <- getSomeReg y
4308     tmp1 <- getNewRegNat (cmmExprRep x)
4309     tmp2 <- getNewRegNat (cmmExprRep y)
4310     tmp <- getNewRegNat F64
4311     let
4312         promote x = FxTOy F32 F64 x tmp
4313
4314         pk1   = cmmExprRep x
4315         pk2   = cmmExprRep y
4316
4317         code__2 dst =
4318                 if pk1 == pk2 then
4319                     code1 `appOL` code2 `snocOL`
4320                     instr pk src1 src2 dst
4321                 else if pk1 == F32 then
4322                     code1 `snocOL` promote src1 `appOL` code2 `snocOL`
4323                     instr F64 tmp src2 dst
4324                 else
4325                     code1 `appOL` code2 `snocOL` promote src2 `snocOL`
4326                     instr F64 src1 tmp dst
4327     return (Any (if pk1 == pk2 then pk1 else F64) code__2)
4328
4329 ------------
4330 trivialUCode pk instr x = do
4331     (src, code) <- getSomeReg x
4332     tmp <- getNewRegNat pk
4333     let
4334         code__2 dst = code `snocOL` instr (RIReg src) dst
4335     return (Any pk code__2)
4336
4337 -------------
4338 trivialUFCode pk instr x = do
4339     (src, code) <- getSomeReg x
4340     tmp <- getNewRegNat pk
4341     let
4342         code__2 dst = code `snocOL` instr src dst
4343     return (Any pk code__2)
4344
4345 #endif /* sparc_TARGET_ARCH */
4346
4347 #if powerpc_TARGET_ARCH
4348
4349 {-
4350 Wolfgang's PowerPC version of The Rules:
4351
4352 A slightly modified version of The Rules to take advantage of the fact
4353 that PowerPC instructions work on all registers and don't implicitly
4354 clobber any fixed registers.
4355
4356 * The only expression for which getRegister returns Fixed is (CmmReg reg).
4357
4358 * If getRegister returns Any, then the code it generates may modify only:
4359         (a) fresh temporaries
4360         (b) the destination register
4361   It may *not* modify global registers, unless the global
4362   register happens to be the destination register.
4363   It may not clobber any other registers. In fact, only ccalls clobber any
4364   fixed registers.
4365   Also, it may not modify the counter register (used by genCCall).
4366
4367   Corollary: If a getRegister for a subexpression returns Fixed, you need
4368   not move it to a fresh temporary before evaluating the next subexpression.
4369   The Fixed register won't be modified.
4370   Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
4371
4372 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
4373   the value of the destination register.
4374 -}
4375
4376 trivialCode rep signed instr x (CmmLit (CmmInt y _))
4377     | Just imm <- makeImmediate rep signed y
4378     = do
4379         (src1, code1) <- getSomeReg x
4380         let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
4381         return (Any rep code)
4382
4383 trivialCode rep signed instr x y = do
4384     (src1, code1) <- getSomeReg x
4385     (src2, code2) <- getSomeReg y
4386     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
4387     return (Any rep code)
4388
4389 trivialCodeNoImm :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4390     -> CmmExpr -> CmmExpr -> NatM Register
4391 trivialCodeNoImm rep instr x y = do
4392     (src1, code1) <- getSomeReg x
4393     (src2, code2) <- getSomeReg y
4394     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
4395     return (Any rep code)
4396
4397 trivialUCode rep instr x = do
4398     (src, code) <- getSomeReg x
4399     let code' dst = code `snocOL` instr dst src
4400     return (Any rep code')
4401
4402 -- There is no "remainder" instruction on the PPC, so we have to do
4403 -- it the hard way.
4404 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
4405
4406 remainderCode :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4407     -> CmmExpr -> CmmExpr -> NatM Register
4408 remainderCode rep div x y = do
4409     (src1, code1) <- getSomeReg x
4410     (src2, code2) <- getSomeReg y
4411     let code dst = code1 `appOL` code2 `appOL` toOL [
4412                 div dst src1 src2,
4413                 MULLW dst dst (RIReg src2),
4414                 SUBF dst dst src1
4415             ]
4416     return (Any rep code)
4417
4418 #endif /* powerpc_TARGET_ARCH */
4419
4420
4421 -- -----------------------------------------------------------------------------
4422 --  Coercing to/from integer/floating-point...
4423
4424 -- @coerce(Int2FP|FP2Int)@ are more complicated integer/float
4425 -- conversions.  We have to store temporaries in memory to move
4426 -- between the integer and the floating point register sets.
4427
4428 -- @coerceDbl2Flt@ and @coerceFlt2Dbl@ are done this way because we
4429 -- pretend, on sparc at least, that double and float regs are seperate
4430 -- kinds, so the value has to be computed into one kind before being
4431 -- explicitly "converted" to live in the other kind.
4432
4433 coerceInt2FP :: MachRep -> MachRep -> CmmExpr -> NatM Register
4434 coerceFP2Int :: MachRep -> MachRep -> CmmExpr -> NatM Register
4435
4436 #if sparc_TARGET_ARCH
4437 coerceDbl2Flt :: CmmExpr -> NatM Register
4438 coerceFlt2Dbl :: CmmExpr -> NatM Register
4439 #endif
4440
4441 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4442
4443 #if alpha_TARGET_ARCH
4444
4445 coerceInt2FP _ x
4446   = getRegister x               `thenNat` \ register ->
4447     getNewRegNat IntRep         `thenNat` \ reg ->
4448     let
4449         code = registerCode register reg
4450         src  = registerName register reg
4451
4452         code__2 dst = code . mkSeqInstrs [
4453             ST Q src (spRel 0),
4454             LD TF dst (spRel 0),
4455             CVTxy Q TF dst dst]
4456     in
4457     return (Any F64 code__2)
4458
4459 -------------
4460 coerceFP2Int x
4461   = getRegister x               `thenNat` \ register ->
4462     getNewRegNat F64    `thenNat` \ tmp ->
4463     let
4464         code = registerCode register tmp
4465         src  = registerName register tmp
4466
4467         code__2 dst = code . mkSeqInstrs [
4468             CVTxy TF Q src tmp,
4469             ST TF tmp (spRel 0),
4470             LD Q dst (spRel 0)]
4471     in
4472     return (Any IntRep code__2)
4473
4474 #endif /* alpha_TARGET_ARCH */
4475
4476 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4477
4478 #if i386_TARGET_ARCH
4479
4480 coerceInt2FP from to x = do
4481   (x_reg, x_code) <- getSomeReg x
4482   let
4483         opc  = case to of F32 -> GITOF; F64 -> GITOD
4484         code dst = x_code `snocOL` opc x_reg dst
4485         -- ToDo: works for non-I32 reps?
4486   -- in
4487   return (Any to code)
4488
4489 ------------
4490
4491 coerceFP2Int from to x = do
4492   (x_reg, x_code) <- getSomeReg x
4493   let
4494         opc  = case from of F32 -> GFTOI; F64 -> GDTOI
4495         code dst = x_code `snocOL` opc x_reg dst
4496         -- ToDo: works for non-I32 reps?
4497   -- in
4498   return (Any to code)
4499
4500 #endif /* i386_TARGET_ARCH */
4501
4502 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4503
4504 #if x86_64_TARGET_ARCH
4505
4506 coerceFP2Int from to x = do
4507   (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
4508   let
4509         opc  = case from of F32 -> CVTSS2SI; F64 -> CVTSD2SI
4510         code dst = x_code `snocOL` opc x_op dst
4511   -- in
4512   return (Any to code) -- works even if the destination rep is <I32
4513
4514 coerceInt2FP from to x = do
4515   (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
4516   let
4517         opc  = case to of F32 -> CVTSI2SS; F64 -> CVTSI2SD
4518         code dst = x_code `snocOL` opc x_op dst
4519   -- in
4520   return (Any to code) -- works even if the destination rep is <I32
4521
4522 coerceFP2FP :: MachRep -> CmmExpr -> NatM Register
4523 coerceFP2FP to x = do
4524   (x_reg, x_code) <- getSomeReg x
4525   let
4526         opc  = case to of F32 -> CVTSD2SS; F64 -> CVTSS2SD
4527         code dst = x_code `snocOL` opc x_reg dst
4528   -- in
4529   return (Any to code)
4530
4531 #endif
4532
4533 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4534
4535 #if sparc_TARGET_ARCH
4536
4537 coerceInt2FP pk1 pk2 x = do
4538     (src, code) <- getSomeReg x
4539     let
4540         code__2 dst = code `appOL` toOL [
4541             ST pk1 src (spRel (-2)),
4542             LD pk1 (spRel (-2)) dst,
4543             FxTOy pk1 pk2 dst dst]
4544     return (Any pk2 code__2)
4545
4546 ------------
4547 coerceFP2Int pk fprep x = do
4548     (src, code) <- getSomeReg x
4549     reg <- getNewRegNat fprep
4550     tmp <- getNewRegNat pk
4551     let
4552         code__2 dst = ASSERT(fprep == F64 || fprep == F32)
4553             code `appOL` toOL [
4554             FxTOy fprep pk src tmp,
4555             ST pk tmp (spRel (-2)),
4556             LD pk (spRel (-2)) dst]
4557     return (Any pk code__2)
4558
4559 ------------
4560 coerceDbl2Flt x = do
4561     (src, code) <- getSomeReg x
4562     return (Any F32 (\dst -> code `snocOL` FxTOy F64 F32 src dst))
4563
4564 ------------
4565 coerceFlt2Dbl x = do
4566     (src, code) <- getSomeReg x
4567     return (Any F64 (\dst -> code `snocOL` FxTOy F32 F64 src dst))
4568
4569 #endif /* sparc_TARGET_ARCH */
4570
4571 #if powerpc_TARGET_ARCH
4572 coerceInt2FP fromRep toRep x = do
4573     (src, code) <- getSomeReg x
4574     lbl <- getNewLabelNat
4575     itmp <- getNewRegNat I32
4576     ftmp <- getNewRegNat F64
4577     dynRef <- cmmMakeDynamicReference addImportNat False lbl
4578     Amode addr addr_code <- getAmode dynRef
4579     let
4580         code' dst = code `appOL` maybe_exts `appOL` toOL [
4581                 LDATA ReadOnlyData
4582                                 [CmmDataLabel lbl,
4583                                  CmmStaticLit (CmmInt 0x43300000 I32),
4584                                  CmmStaticLit (CmmInt 0x80000000 I32)],
4585                 XORIS itmp src (ImmInt 0x8000),
4586                 ST I32 itmp (spRel 3),
4587                 LIS itmp (ImmInt 0x4330),
4588                 ST I32 itmp (spRel 2),
4589                 LD F64 ftmp (spRel 2)
4590             ] `appOL` addr_code `appOL` toOL [
4591                 LD F64 dst addr,
4592                 FSUB F64 dst ftmp dst
4593             ] `appOL` maybe_frsp dst
4594
4595         maybe_exts = case fromRep of
4596                         I8 ->  unitOL $ EXTS I8 src src
4597                         I16 -> unitOL $ EXTS I16 src src
4598                         I32 -> nilOL
4599         maybe_frsp dst = case toRep of
4600                         F32 -> unitOL $ FRSP dst dst
4601                         F64 -> nilOL
4602     return (Any toRep code')
4603
4604 coerceFP2Int fromRep toRep x = do
4605     -- the reps don't really matter: F*->F64 and I32->I* are no-ops
4606     (src, code) <- getSomeReg x
4607     tmp <- getNewRegNat F64
4608     let
4609         code' dst = code `appOL` toOL [
4610                 -- convert to int in FP reg
4611             FCTIWZ tmp src,
4612                 -- store value (64bit) from FP to stack
4613             ST F64 tmp (spRel 2),
4614                 -- read low word of value (high word is undefined)
4615             LD I32 dst (spRel 3)]
4616     return (Any toRep code')
4617 #endif /* powerpc_TARGET_ARCH */
4618
4619
4620 -- -----------------------------------------------------------------------------
4621 -- eXTRA_STK_ARGS_HERE
4622
4623 -- We (allegedly) put the first six C-call arguments in registers;
4624 -- where do we start putting the rest of them?
4625
4626 -- Moved from MachInstrs (SDM):
4627
4628 #if alpha_TARGET_ARCH || sparc_TARGET_ARCH
4629 eXTRA_STK_ARGS_HERE :: Int
4630 eXTRA_STK_ARGS_HERE
4631   = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???))
4632 #endif
4633