ghc/compiler/nativeGen/MachCodeGen.hs

   1 -----------------------------------------------------------------------------
   2 --
   3 -- Generating machine code (instruction selection)
   4 --
   5 -- (c) The University of Glasgow 1996-2004
   6 --
   7 -----------------------------------------------------------------------------
   8
   9 -- This is a big module, but, if you pay attention to
  10 -- (a) the sectioning, (b) the type signatures, and
  11 -- (c) the #if blah_TARGET_ARCH} things, the
  12 -- structure should not be too overwhelming.
  13
  14 module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where
  15
  16 #include "HsVersions.h"
  17 #include "nativeGen/NCG.h"
  18 #include "MachDeps.h"
  19
  20 -- NCG stuff:
  21 import MachInstrs
  22 import MachRegs
  23 import NCGMonad
  24 import PositionIndependentCode ( cmmMakeDynamicReference, initializePicBase )
  25
  26 -- Our intermediate code:
  27 import PprCmm           ( pprExpr )
  28 import Cmm
  29 import MachOp
  30 import CLabel
  31
  32 -- The rest:
  33 import StaticFlags      ( opt_PIC )
  34 import ForeignCall      ( CCallConv(..) )
  35 import OrdList
  36 import Pretty
  37 import Outputable
  38 import FastString
  39 import FastTypes        ( isFastTrue )
  40 import Constants        ( wORD_SIZE )
  41
  42 #ifdef DEBUG
  43 import Outputable       ( assertPanic )
  44 import TRACE            ( trace )
  45 #endif
  46
  47 import Control.Monad    ( mapAndUnzipM )
  48 import Maybe            ( fromJust )
  49 import DATA_BITS
  50 import DATA_WORD
  51
  52 -- -----------------------------------------------------------------------------
  53 -- Top-level of the instruction selector
  54
  55 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
  56 -- They are really trees of insns to facilitate fast appending, where a
  57 -- left-to-right traversal (pre-order?) yields the insns in the correct
  58 -- order.
  59
  60 type InstrBlock = OrdList Instr
  61
  62 cmmTopCodeGen :: CmmTop -> NatM [NatCmmTop]
  63 cmmTopCodeGen (CmmProc info lab params blocks) = do
  64   (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
  65   picBaseMb <- getPicBaseMaybeNat
  66   let proc = CmmProc info lab params (concat nat_blocks)
  67       tops = proc : concat statics
  68   case picBaseMb of
  69       Just picBase -> initializePicBase picBase tops
  70       Nothing -> return tops
  71
  72 cmmTopCodeGen (CmmData sec dat) = do
  73   return [CmmData sec dat]  -- no translation, we just use CmmStatic
  74
  75 basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop])
  76 basicBlockCodeGen (BasicBlock id stmts) = do
  77   instrs <- stmtsToInstrs stmts
  78   -- code generation may introduce new basic block boundaries, which
  79   -- are indicated by the NEWBLOCK instruction.  We must split up the
  80   -- instruction stream into basic blocks again.  Also, we extract
  81   -- LDATAs here too.
  82   let
  83         (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
  84
  85         mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
  86           = ([], BasicBlock id instrs : blocks, statics)
  87         mkBlocks (LDATA sec dat) (instrs,blocks,statics)
  88           = (instrs, blocks, CmmData sec dat:statics)
  89         mkBlocks instr (instrs,blocks,statics)
  90           = (instr:instrs, blocks, statics)
  91   -- in
  92   return (BasicBlock id top : other_blocks, statics)
  93
  94 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
  95 stmtsToInstrs stmts
  96    = do instrss <- mapM stmtToInstrs stmts
  97         return (concatOL instrss)
  98
  99 stmtToInstrs :: CmmStmt -> NatM InstrBlock
 100 stmtToInstrs stmt = case stmt of
 101     CmmNop         -> return nilOL
 102     CmmComment s   -> return (unitOL (COMMENT s))
 103
 104     CmmAssign reg src
 105       | isFloatingRep kind -> assignReg_FltCode kind reg src
 106 #if WORD_SIZE_IN_BITS==32
 107       | kind == I64        -> assignReg_I64Code      reg src
 108 #endif
 109       | otherwise          -> assignReg_IntCode kind reg src
 110         where kind = cmmRegRep reg
 111
 112     CmmStore addr src
 113       | isFloatingRep kind -> assignMem_FltCode kind addr src
 114 #if WORD_SIZE_IN_BITS==32
 115       | kind == I64      -> assignMem_I64Code      addr src
 116 #endif
 117       | otherwise        -> assignMem_IntCode kind addr src
 118         where kind = cmmExprRep src
 119
 120     CmmCall target result_regs args vols
 121        -> genCCall target result_regs args vols
 122
 123     CmmBranch id          -> genBranch id
 124     CmmCondBranch arg id  -> genCondJump id arg
 125     CmmSwitch arg ids     -> genSwitch arg ids
 126     CmmJump arg params    -> genJump arg
 127
 128 -- -----------------------------------------------------------------------------
 129 -- General things for putting together code sequences
 130
 131 -- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
 132 -- CmmExprs into CmmRegOff?
 133 mangleIndexTree :: CmmExpr -> CmmExpr
 134 mangleIndexTree (CmmRegOff reg off)
 135   = CmmMachOp (MO_Add rep) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) rep)]
 136   where rep = cmmRegRep reg
 137
 138 -- -----------------------------------------------------------------------------
 139 --  Code gen for 64-bit arithmetic on 32-bit platforms
 140
 141 {-
 142 Simple support for generating 64-bit code (ie, 64 bit values and 64
 143 bit assignments) on 32-bit platforms.  Unlike the main code generator
 144 we merely shoot for generating working code as simply as possible, and
 145 pay little attention to code quality.  Specifically, there is no
 146 attempt to deal cleverly with the fixed-vs-floating register
 147 distinction; all values are generated into (pairs of) floating
 148 registers, even if this would mean some redundant reg-reg moves as a
 149 result.  Only one of the VRegUniques is returned, since it will be
 150 of the VRegUniqueLo form, and the upper-half VReg can be determined
 151 by applying getHiVRegFromLo to it.
 152 -}
 153
 154 data ChildCode64        -- a.k.a "Register64"
 155    = ChildCode64
 156         InstrBlock      -- code
 157         Reg             -- the lower 32-bit temporary which contains the
 158                         -- result; use getHiVRegFromLo to find the other
 159                         -- VRegUnique.  Rules of this simplified insn
 160                         -- selection game are therefore that the returned
 161                         -- Reg may be modified
 162
 163 #if WORD_SIZE_IN_BITS==32
 164 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
 165 assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
 166 #endif
 167
 168 #ifndef x86_64_TARGET_ARCH
 169 iselExpr64        :: CmmExpr -> NatM ChildCode64
 170 #endif
 171
 172 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 173
 174 #if i386_TARGET_ARCH
 175
 176 assignMem_I64Code addrTree valueTree = do
 177   Amode addr addr_code <- getAmode addrTree
 178   ChildCode64 vcode rlo <- iselExpr64 valueTree
 179   let
 180         rhi = getHiVRegFromLo rlo
 181
 182         -- Little-endian store
 183         mov_lo = MOV I32 (OpReg rlo) (OpAddr addr)
 184         mov_hi = MOV I32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
 185   -- in
 186   return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 187
 188
 189 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
 190    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 191    let
 192          r_dst_lo = mkVReg u_dst I32
 193          r_dst_hi = getHiVRegFromLo r_dst_lo
 194          r_src_hi = getHiVRegFromLo r_src_lo
 195          mov_lo = MOV I32 (OpReg r_src_lo) (OpReg r_dst_lo)
 196          mov_hi = MOV I32 (OpReg r_src_hi) (OpReg r_dst_hi)
 197    -- in
 198    return (
 199         vcode `snocOL` mov_lo `snocOL` mov_hi
 200      )
 201
 202 assignReg_I64Code lvalue valueTree
 203    = panic "assignReg_I64Code(i386): invalid lvalue"
 204
 205 ------------
 206
 207 iselExpr64 (CmmLit (CmmInt i _)) = do
 208   (rlo,rhi) <- getNewRegPairNat I32
 209   let
 210         r = fromIntegral (fromIntegral i :: Word32)
 211         q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
 212         code = toOL [
 213                 MOV I32 (OpImm (ImmInteger r)) (OpReg rlo),
 214                 MOV I32 (OpImm (ImmInteger q)) (OpReg rhi)
 215                 ]
 216   -- in
 217   return (ChildCode64 code rlo)
 218
 219 iselExpr64 (CmmLoad addrTree I64) = do
 220    Amode addr addr_code <- getAmode addrTree
 221    (rlo,rhi) <- getNewRegPairNat I32
 222    let
 223         mov_lo = MOV I32 (OpAddr addr) (OpReg rlo)
 224         mov_hi = MOV I32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
 225    -- in
 226    return (
 227             ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
 228                         rlo
 229      )
 230
 231 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64)))
 232    = return (ChildCode64 nilOL (mkVReg vu I32))
 233
 234 -- we handle addition, but rather badly
 235 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
 236    ChildCode64 code1 r1lo <- iselExpr64 e1
 237    (rlo,rhi) <- getNewRegPairNat I32
 238    let
 239         r = fromIntegral (fromIntegral i :: Word32)
 240         q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
 241         r1hi = getHiVRegFromLo r1lo
 242         code =  code1 `appOL`
 243                 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
 244                        ADD I32 (OpImm (ImmInteger r)) (OpReg rlo),
 245                        MOV I32 (OpReg r1hi) (OpReg rhi),
 246                        ADC I32 (OpImm (ImmInteger q)) (OpReg rhi) ]
 247    -- in
 248    return (ChildCode64 code rlo)
 249
 250 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
 251    ChildCode64 code1 r1lo <- iselExpr64 e1
 252    ChildCode64 code2 r2lo <- iselExpr64 e2
 253    (rlo,rhi) <- getNewRegPairNat I32
 254    let
 255         r1hi = getHiVRegFromLo r1lo
 256         r2hi = getHiVRegFromLo r2lo
 257         code =  code1 `appOL`
 258                 code2 `appOL`
 259                 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
 260                        ADD I32 (OpReg r2lo) (OpReg rlo),
 261                        MOV I32 (OpReg r1hi) (OpReg rhi),
 262                        ADC I32 (OpReg r2hi) (OpReg rhi) ]
 263    -- in
 264    return (ChildCode64 code rlo)
 265
 266 iselExpr64 expr
 267    = pprPanic "iselExpr64(i386)" (ppr expr)
 268
 269 #endif /* i386_TARGET_ARCH */
 270
 271 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 272
 273 #if sparc_TARGET_ARCH
 274
 275 assignMem_I64Code addrTree valueTree = do
 276      Amode addr addr_code <- getAmode addrTree
 277      ChildCode64 vcode rlo <- iselExpr64 valueTree
 278      (src, code) <- getSomeReg addrTree
 279      let
 280          rhi = getHiVRegFromLo rlo
 281          -- Big-endian store
 282          mov_hi = ST I32 rhi (AddrRegImm src (ImmInt 0))
 283          mov_lo = ST I32 rlo (AddrRegImm src (ImmInt 4))
 284      return (vcode `appOL` code `snocOL` mov_hi `snocOL` mov_lo)
 285
 286 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
 287      ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 288      let
 289          r_dst_lo = mkVReg u_dst pk
 290          r_dst_hi = getHiVRegFromLo r_dst_lo
 291          r_src_hi = getHiVRegFromLo r_src_lo
 292          mov_lo = mkMOV r_src_lo r_dst_lo
 293          mov_hi = mkMOV r_src_hi r_dst_hi
 294          mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
 295      return (vcode `snocOL` mov_hi `snocOL` mov_lo)
 296 assignReg_I64Code lvalue valueTree
 297    = panic "assignReg_I64Code(sparc): invalid lvalue"
 298
 299
 300 -- Don't delete this -- it's very handy for debugging.
 301 --iselExpr64 expr
 302 --   | trace ("iselExpr64: " ++ showSDoc (ppr expr)) False
 303 --   = panic "iselExpr64(???)"
 304
 305 iselExpr64 (CmmLoad addrTree I64) = do
 306      Amode (AddrRegReg r1 r2) addr_code <- getAmode addrTree
 307      rlo <- getNewRegNat I32
 308      let rhi = getHiVRegFromLo rlo
 309          mov_hi = LD I32 (AddrRegImm r1 (ImmInt 0)) rhi
 310          mov_lo = LD I32 (AddrRegImm r1 (ImmInt 4)) rlo
 311      return (
 312             ChildCode64 (addr_code `snocOL` mov_hi `snocOL` mov_lo)
 313                          rlo
 314           )
 315
 316 iselExpr64 (CmmReg (CmmLocal (LocalReg uq I64))) = do
 317      r_dst_lo <-  getNewRegNat I32
 318      let r_dst_hi = getHiVRegFromLo r_dst_lo
 319          r_src_lo = mkVReg uq I32
 320          r_src_hi = getHiVRegFromLo r_src_lo
 321          mov_lo = mkMOV r_src_lo r_dst_lo
 322          mov_hi = mkMOV r_src_hi r_dst_hi
 323          mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
 324      return (
 325             ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo
 326          )
 327
 328 iselExpr64 expr
 329    = pprPanic "iselExpr64(sparc)" (ppr expr)
 330
 331 #endif /* sparc_TARGET_ARCH */
 332
 333 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 334
 335 #if powerpc_TARGET_ARCH
 336
 337 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
 338 getI64Amodes addrTree = do
 339     Amode hi_addr addr_code <- getAmode addrTree
 340     case addrOffset hi_addr 4 of
 341         Just lo_addr -> return (hi_addr, lo_addr, addr_code)
 342         Nothing      -> do (hi_ptr, code) <- getSomeReg addrTree
 343                            return (AddrRegImm hi_ptr (ImmInt 0),
 344                                    AddrRegImm hi_ptr (ImmInt 4),
 345                                    code)
 346
 347 assignMem_I64Code addrTree valueTree = do
 348         (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
 349         ChildCode64 vcode rlo <- iselExpr64 valueTree
 350         let
 351                 rhi = getHiVRegFromLo rlo
 352
 353                 -- Big-endian store
 354                 mov_hi = ST I32 rhi hi_addr
 355                 mov_lo = ST I32 rlo lo_addr
 356         -- in
 357         return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 358
 359 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
 360    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 361    let
 362          r_dst_lo = mkVReg u_dst I32
 363          r_dst_hi = getHiVRegFromLo r_dst_lo
 364          r_src_hi = getHiVRegFromLo r_src_lo
 365          mov_lo = MR r_dst_lo r_src_lo
 366          mov_hi = MR r_dst_hi r_src_hi
 367    -- in
 368    return (
 369         vcode `snocOL` mov_lo `snocOL` mov_hi
 370      )
 371
 372 assignReg_I64Code lvalue valueTree
 373    = panic "assignReg_I64Code(powerpc): invalid lvalue"
 374
 375
 376 -- Don't delete this -- it's very handy for debugging.
 377 --iselExpr64 expr
 378 --   | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
 379 --   = panic "iselExpr64(???)"
 380
 381 iselExpr64 (CmmLoad addrTree I64) = do
 382     (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
 383     (rlo, rhi) <- getNewRegPairNat I32
 384     let mov_hi = LD I32 rhi hi_addr
 385         mov_lo = LD I32 rlo lo_addr
 386     return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
 387                          rlo
 388
 389 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64)))
 390    = return (ChildCode64 nilOL (mkVReg vu I32))
 391
 392 iselExpr64 (CmmLit (CmmInt i _)) = do
 393   (rlo,rhi) <- getNewRegPairNat I32
 394   let
 395         half0 = fromIntegral (fromIntegral i :: Word16)
 396         half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
 397         half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
 398         half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
 399
 400         code = toOL [
 401                 LIS rlo (ImmInt half1),
 402                 OR rlo rlo (RIImm $ ImmInt half0),
 403                 LIS rhi (ImmInt half3),
 404                 OR rlo rlo (RIImm $ ImmInt half2)
 405                 ]
 406   -- in
 407   return (ChildCode64 code rlo)
 408
 409 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
 410    ChildCode64 code1 r1lo <- iselExpr64 e1
 411    ChildCode64 code2 r2lo <- iselExpr64 e2
 412    (rlo,rhi) <- getNewRegPairNat I32
 413    let
 414         r1hi = getHiVRegFromLo r1lo
 415         r2hi = getHiVRegFromLo r2lo
 416         code =  code1 `appOL`
 417                 code2 `appOL`
 418                 toOL [ ADDC rlo r1lo r2lo,
 419                        ADDE rhi r1hi r2hi ]
 420    -- in
 421    return (ChildCode64 code rlo)
 422
 423 iselExpr64 expr
 424    = pprPanic "iselExpr64(powerpc)" (ppr expr)
 425
 426 #endif /* powerpc_TARGET_ARCH */
 427
 428
 429 -- -----------------------------------------------------------------------------
 430 -- The 'Register' type
 431
 432 -- 'Register's passed up the tree.  If the stix code forces the register
 433 -- to live in a pre-decided machine register, it comes out as @Fixed@;
 434 -- otherwise, it comes out as @Any@, and the parent can decide which
 435 -- register to put it in.
 436
 437 data Register
 438   = Fixed   MachRep Reg InstrBlock
 439   | Any     MachRep (Reg -> InstrBlock)
 440
 441 swizzleRegisterRep :: Register -> MachRep -> Register
 442 swizzleRegisterRep (Fixed _ reg code) rep = Fixed rep reg code
 443 swizzleRegisterRep (Any _ codefn)     rep = Any rep codefn
 444
 445
 446 -- -----------------------------------------------------------------------------
 447 -- Utils based on getRegister, below
 448
 449 -- The dual to getAnyReg: compute an expression into a register, but
 450 -- we don't mind which one it is.
 451 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
 452 getSomeReg expr = do
 453   r <- getRegister expr
 454   case r of
 455     Any rep code -> do
 456         tmp <- getNewRegNat rep
 457         return (tmp, code tmp)
 458     Fixed _ reg code ->
 459         return (reg, code)
 460
 461 -- -----------------------------------------------------------------------------
 462 -- Grab the Reg for a CmmReg
 463
 464 getRegisterReg :: CmmReg -> Reg
 465
 466 getRegisterReg (CmmLocal (LocalReg u pk))
 467   = mkVReg u pk
 468
 469 getRegisterReg (CmmGlobal mid)
 470   = case get_GlobalReg_reg_or_addr mid of
 471        Left (RealReg rrno) -> RealReg rrno
 472        _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
 473           -- By this stage, the only MagicIds remaining should be the
 474           -- ones which map to a real machine register on this
 475           -- platform.  Hence ...
 476
 477
 478 -- -----------------------------------------------------------------------------
 479 -- Generate code to get a subtree into a Register
 480
 481 -- Don't delete this -- it's very handy for debugging.
 482 --getRegister expr
 483 --   | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False
 484 --   = panic "getRegister(???)"
 485
 486 getRegister :: CmmExpr -> NatM Register
 487
 488 getRegister (CmmReg (CmmGlobal PicBaseReg))
 489   = do
 490       reg <- getPicBaseNat wordRep
 491       return (Fixed wordRep reg nilOL)
 492
 493 getRegister (CmmReg reg)
 494   = return (Fixed (cmmRegRep reg) (getRegisterReg reg) nilOL)
 495
 496 getRegister tree@(CmmRegOff _ _)
 497   = getRegister (mangleIndexTree tree)
 498
 499 -- end of machine-"independent" bit; here we go on the rest...
 500
 501 #if alpha_TARGET_ARCH
 502
 503 getRegister (StDouble d)
 504   = getBlockIdNat                   `thenNat` \ lbl ->
 505     getNewRegNat PtrRep             `thenNat` \ tmp ->
 506     let code dst = mkSeqInstrs [
 507             LDATA RoDataSegment lbl [
 508                     DATA TF [ImmLab (rational d)]
 509                 ],
 510             LDA tmp (AddrImm (ImmCLbl lbl)),
 511             LD TF dst (AddrReg tmp)]
 512     in
 513         return (Any F64 code)
 514
 515 getRegister (StPrim primop [x]) -- unary PrimOps
 516   = case primop of
 517       IntNegOp -> trivialUCode (NEG Q False) x
 518
 519       NotOp    -> trivialUCode NOT x
 520
 521       FloatNegOp  -> trivialUFCode FloatRep  (FNEG TF) x
 522       DoubleNegOp -> trivialUFCode F64 (FNEG TF) x
 523
 524       OrdOp -> coerceIntCode IntRep x
 525       ChrOp -> chrCode x
 526
 527       Float2IntOp  -> coerceFP2Int    x
 528       Int2FloatOp  -> coerceInt2FP pr x
 529       Double2IntOp -> coerceFP2Int    x
 530       Int2DoubleOp -> coerceInt2FP pr x
 531
 532       Double2FloatOp -> coerceFltCode x
 533       Float2DoubleOp -> coerceFltCode x
 534
 535       other_op -> getRegister (StCall fn CCallConv F64 [x])
 536         where
 537           fn = case other_op of
 538                  FloatExpOp    -> FSLIT("exp")
 539                  FloatLogOp    -> FSLIT("log")
 540                  FloatSqrtOp   -> FSLIT("sqrt")
 541                  FloatSinOp    -> FSLIT("sin")
 542                  FloatCosOp    -> FSLIT("cos")
 543                  FloatTanOp    -> FSLIT("tan")
 544                  FloatAsinOp   -> FSLIT("asin")
 545                  FloatAcosOp   -> FSLIT("acos")
 546                  FloatAtanOp   -> FSLIT("atan")
 547                  FloatSinhOp   -> FSLIT("sinh")
 548                  FloatCoshOp   -> FSLIT("cosh")
 549                  FloatTanhOp   -> FSLIT("tanh")
 550                  DoubleExpOp   -> FSLIT("exp")
 551                  DoubleLogOp   -> FSLIT("log")
 552                  DoubleSqrtOp  -> FSLIT("sqrt")
 553                  DoubleSinOp   -> FSLIT("sin")
 554                  DoubleCosOp   -> FSLIT("cos")
 555                  DoubleTanOp   -> FSLIT("tan")
 556                  DoubleAsinOp  -> FSLIT("asin")
 557                  DoubleAcosOp  -> FSLIT("acos")
 558                  DoubleAtanOp  -> FSLIT("atan")
 559                  DoubleSinhOp  -> FSLIT("sinh")
 560                  DoubleCoshOp  -> FSLIT("cosh")
 561                  DoubleTanhOp  -> FSLIT("tanh")
 562   where
 563     pr = panic "MachCode.getRegister: no primrep needed for Alpha"
 564
 565 getRegister (StPrim primop [x, y]) -- dyadic PrimOps
 566   = case primop of
 567       CharGtOp -> trivialCode (CMP LTT) y x
 568       CharGeOp -> trivialCode (CMP LE) y x
 569       CharEqOp -> trivialCode (CMP EQQ) x y
 570       CharNeOp -> int_NE_code x y
 571       CharLtOp -> trivialCode (CMP LTT) x y
 572       CharLeOp -> trivialCode (CMP LE) x y
 573
 574       IntGtOp  -> trivialCode (CMP LTT) y x
 575       IntGeOp  -> trivialCode (CMP LE) y x
 576       IntEqOp  -> trivialCode (CMP EQQ) x y
 577       IntNeOp  -> int_NE_code x y
 578       IntLtOp  -> trivialCode (CMP LTT) x y
 579       IntLeOp  -> trivialCode (CMP LE) x y
 580
 581       WordGtOp -> trivialCode (CMP ULT) y x
 582       WordGeOp -> trivialCode (CMP ULE) x y
 583       WordEqOp -> trivialCode (CMP EQQ)  x y
 584       WordNeOp -> int_NE_code x y
 585       WordLtOp -> trivialCode (CMP ULT) x y
 586       WordLeOp -> trivialCode (CMP ULE) x y
 587
 588       AddrGtOp -> trivialCode (CMP ULT) y x
 589       AddrGeOp -> trivialCode (CMP ULE) y x
 590       AddrEqOp -> trivialCode (CMP EQQ)  x y
 591       AddrNeOp -> int_NE_code x y
 592       AddrLtOp -> trivialCode (CMP ULT) x y
 593       AddrLeOp -> trivialCode (CMP ULE) x y
 594
 595       FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
 596       FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
 597       FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
 598       FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
 599       FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
 600       FloatLeOp -> cmpF_code (FCMP TF LE) NE x y
 601
 602       DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
 603       DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
 604       DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
 605       DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
 606       DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
 607       DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y
 608
 609       IntAddOp  -> trivialCode (ADD Q False) x y
 610       IntSubOp  -> trivialCode (SUB Q False) x y
 611       IntMulOp  -> trivialCode (MUL Q False) x y
 612       IntQuotOp -> trivialCode (DIV Q False) x y
 613       IntRemOp  -> trivialCode (REM Q False) x y
 614
 615       WordAddOp  -> trivialCode (ADD Q False) x y
 616       WordSubOp  -> trivialCode (SUB Q False) x y
 617       WordMulOp  -> trivialCode (MUL Q False) x y
 618       WordQuotOp -> trivialCode (DIV Q True) x y
 619       WordRemOp  -> trivialCode (REM Q True) x y
 620
 621       FloatAddOp -> trivialFCode  FloatRep (FADD TF) x y
 622       FloatSubOp -> trivialFCode  FloatRep (FSUB TF) x y
 623       FloatMulOp -> trivialFCode  FloatRep (FMUL TF) x y
 624       FloatDivOp -> trivialFCode  FloatRep (FDIV TF) x y
 625
 626       DoubleAddOp -> trivialFCode  F64 (FADD TF) x y
 627       DoubleSubOp -> trivialFCode  F64 (FSUB TF) x y
 628       DoubleMulOp -> trivialFCode  F64 (FMUL TF) x y
 629       DoubleDivOp -> trivialFCode  F64 (FDIV TF) x y
 630
 631       AddrAddOp  -> trivialCode (ADD Q False) x y
 632       AddrSubOp  -> trivialCode (SUB Q False) x y
 633       AddrRemOp  -> trivialCode (REM Q True) x y
 634
 635       AndOp  -> trivialCode AND x y
 636       OrOp   -> trivialCode OR  x y
 637       XorOp  -> trivialCode XOR x y
 638       SllOp  -> trivialCode SLL x y
 639       SrlOp  -> trivialCode SRL x y
 640
 641       ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
 642       ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
 643       ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"
 644
 645       FloatPowerOp  -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
 646       DoublePowerOp -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
 647   where
 648     {- ------------------------------------------------------------
 649         Some bizarre special code for getting condition codes into
 650         registers.  Integer non-equality is a test for equality
 651         followed by an XOR with 1.  (Integer comparisons always set
 652         the result register to 0 or 1.)  Floating point comparisons of
 653         any kind leave the result in a floating point register, so we
 654         need to wrangle an integer register out of things.
 655     -}
 656     int_NE_code :: StixTree -> StixTree -> NatM Register
 657
 658     int_NE_code x y
 659       = trivialCode (CMP EQQ) x y       `thenNat` \ register ->
 660         getNewRegNat IntRep             `thenNat` \ tmp ->
 661         let
 662             code = registerCode register tmp
 663             src  = registerName register tmp
 664             code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
 665         in
 666         return (Any IntRep code__2)
 667
 668     {- ------------------------------------------------------------
 669         Comments for int_NE_code also apply to cmpF_code
 670     -}
 671     cmpF_code
 672         :: (Reg -> Reg -> Reg -> Instr)
 673         -> Cond
 674         -> StixTree -> StixTree
 675         -> NatM Register
 676
 677     cmpF_code instr cond x y
 678       = trivialFCode pr instr x y       `thenNat` \ register ->
 679         getNewRegNat F64                `thenNat` \ tmp ->
 680         getBlockIdNat                   `thenNat` \ lbl ->
 681         let
 682             code = registerCode register tmp
 683             result  = registerName register tmp
 684
 685             code__2 dst = code . mkSeqInstrs [
 686                 OR zeroh (RIImm (ImmInt 1)) dst,
 687                 BF cond  result (ImmCLbl lbl),
 688                 OR zeroh (RIReg zeroh) dst,
 689                 NEWBLOCK lbl]
 690         in
 691         return (Any IntRep code__2)
 692       where
 693         pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
 694       ------------------------------------------------------------
 695
 696 getRegister (CmmLoad pk mem)
 697   = getAmode mem                    `thenNat` \ amode ->
 698     let
 699         code = amodeCode amode
 700         src   = amodeAddr amode
 701         size = primRepToSize pk
 702         code__2 dst = code . mkSeqInstr (LD size dst src)
 703     in
 704     return (Any pk code__2)
 705
 706 getRegister (StInt i)
 707   | fits8Bits i
 708   = let
 709         code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
 710     in
 711     return (Any IntRep code)
 712   | otherwise
 713   = let
 714         code dst = mkSeqInstr (LDI Q dst src)
 715     in
 716     return (Any IntRep code)
 717   where
 718     src = ImmInt (fromInteger i)
 719
 720 getRegister leaf
 721   | isJust imm
 722   = let
 723         code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
 724     in
 725     return (Any PtrRep code)
 726   where
 727     imm = maybeImm leaf
 728     imm__2 = case imm of Just x -> x
 729
 730 #endif /* alpha_TARGET_ARCH */
 731
 732 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 733
 734 #if i386_TARGET_ARCH
 735
 736 getRegister (CmmLit (CmmFloat f F32)) = do
 737     lbl <- getNewLabelNat
 738     dynRef <- cmmMakeDynamicReference addImportNat False lbl
 739     Amode addr addr_code <- getAmode dynRef
 740     let code dst =
 741             LDATA ReadOnlyData
 742                         [CmmDataLabel lbl,
 743                          CmmStaticLit (CmmFloat f F32)]
 744             `consOL` (addr_code `snocOL`
 745             GLD F32 addr dst)
 746     -- in
 747     return (Any F32 code)
 748
 749
 750 getRegister (CmmLit (CmmFloat d F64))
 751   | d == 0.0
 752   = let code dst = unitOL (GLDZ dst)
 753     in  return (Any F64 code)
 754
 755   | d == 1.0
 756   = let code dst = unitOL (GLD1 dst)
 757     in  return (Any F64 code)
 758
 759   | otherwise = do
 760     lbl <- getNewLabelNat
 761     dynRef <- cmmMakeDynamicReference addImportNat False lbl
 762     Amode addr addr_code <- getAmode dynRef
 763     let code dst =
 764             LDATA ReadOnlyData
 765                         [CmmDataLabel lbl,
 766                          CmmStaticLit (CmmFloat d F64)]
 767             `consOL` (addr_code `snocOL`
 768             GLD F64 addr dst)
 769     -- in
 770     return (Any F64 code)
 771
 772 #endif /* i386_TARGET_ARCH */
 773
 774 #if x86_64_TARGET_ARCH
 775
 776 getRegister (CmmLit (CmmFloat 0.0 rep)) = do
 777    let code dst = unitOL  (XOR rep (OpReg dst) (OpReg dst))
 778         -- I don't know why there are xorpd, xorps, and pxor instructions.
 779         -- They all appear to do the same thing --SDM
 780    return (Any rep code)
 781
 782 getRegister (CmmLit (CmmFloat f rep)) = do
 783     lbl <- getNewLabelNat
 784     let code dst = toOL [
 785             LDATA ReadOnlyData
 786                         [CmmDataLabel lbl,
 787                          CmmStaticLit (CmmFloat f rep)],
 788             MOV rep (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 789             ]
 790     -- in
 791     return (Any rep code)
 792
 793 #endif /* x86_64_TARGET_ARCH */
 794
 795 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
 796
 797 -- catch simple cases of zero- or sign-extended load
 798 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad addr _]) = do
 799   code <- intLoadCode (MOVZxL I8) addr
 800   return (Any I32 code)
 801
 802 getRegister (CmmMachOp (MO_S_Conv I8 I32) [CmmLoad addr _]) = do
 803   code <- intLoadCode (MOVSxL I8) addr
 804   return (Any I32 code)
 805
 806 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad addr _]) = do
 807   code <- intLoadCode (MOVZxL I16) addr
 808   return (Any I32 code)
 809
 810 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad addr _]) = do
 811   code <- intLoadCode (MOVSxL I16) addr
 812   return (Any I32 code)
 813
 814 #endif
 815
 816 #if x86_64_TARGET_ARCH
 817
 818 -- catch simple cases of zero- or sign-extended load
 819 getRegister (CmmMachOp (MO_U_Conv I8 I64) [CmmLoad addr _]) = do
 820   code <- intLoadCode (MOVZxL I8) addr
 821   return (Any I64 code)
 822
 823 getRegister (CmmMachOp (MO_S_Conv I8 I64) [CmmLoad addr _]) = do
 824   code <- intLoadCode (MOVSxL I8) addr
 825   return (Any I64 code)
 826
 827 getRegister (CmmMachOp (MO_U_Conv I16 I64) [CmmLoad addr _]) = do
 828   code <- intLoadCode (MOVZxL I16) addr
 829   return (Any I64 code)
 830
 831 getRegister (CmmMachOp (MO_S_Conv I16 I64) [CmmLoad addr _]) = do
 832   code <- intLoadCode (MOVSxL I16) addr
 833   return (Any I64 code)
 834
 835 getRegister (CmmMachOp (MO_U_Conv I32 I64) [CmmLoad addr _]) = do
 836   code <- intLoadCode (MOV I32) addr -- 32-bit loads zero-extend
 837   return (Any I64 code)
 838
 839 getRegister (CmmMachOp (MO_S_Conv I32 I64) [CmmLoad addr _]) = do
 840   code <- intLoadCode (MOVSxL I32) addr
 841   return (Any I64 code)
 842
 843 #endif
 844
 845 #if x86_64_TARGET_ARCH
 846 getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do
 847   x_code <- getAnyReg x
 848   lbl <- getNewLabelNat
 849   let
 850     code dst = x_code dst `appOL` toOL [
 851         -- This is how gcc does it, so it can't be that bad:
 852         LDATA ReadOnlyData16 [
 853                 CmmAlign 16,
 854                 CmmDataLabel lbl,
 855                 CmmStaticLit (CmmInt 0x80000000 I32),
 856                 CmmStaticLit (CmmInt 0 I32),
 857                 CmmStaticLit (CmmInt 0 I32),
 858                 CmmStaticLit (CmmInt 0 I32)
 859         ],
 860         XOR F32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 861                 -- xorps, so we need the 128-bit constant
 862                 -- ToDo: rip-relative
 863         ]
 864   --
 865   return (Any F32 code)
 866
 867 getRegister (CmmMachOp (MO_S_Neg F64) [x]) = do
 868   x_code <- getAnyReg x
 869   lbl <- getNewLabelNat
 870   let
 871         -- This is how gcc does it, so it can't be that bad:
 872     code dst = x_code dst `appOL` toOL [
 873         LDATA ReadOnlyData16 [
 874                 CmmAlign 16,
 875                 CmmDataLabel lbl,
 876                 CmmStaticLit (CmmInt 0x8000000000000000 I64),
 877                 CmmStaticLit (CmmInt 0 I64)
 878         ],
 879                 -- gcc puts an unpck here.  Wonder if we need it.
 880         XOR F64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 881                 -- xorpd, so we need the 128-bit constant
 882         ]
 883   --
 884   return (Any F64 code)
 885 #endif
 886
 887 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
 888
 889 getRegister (CmmMachOp mop [x]) -- unary MachOps
 890   = case mop of
 891 #if i386_TARGET_ARCH
 892       MO_S_Neg F32 -> trivialUFCode F32 (GNEG F32) x
 893       MO_S_Neg F64 -> trivialUFCode F64 (GNEG F64) x
 894 #endif
 895
 896       MO_S_Neg rep -> trivialUCode rep (NEGI rep) x
 897       MO_Not rep   -> trivialUCode rep (NOT  rep) x
 898
 899       -- Nop conversions
 900       -- TODO: these are only nops if the arg is not a fixed register that
 901       -- can't be byte-addressed.
 902       MO_U_Conv I32 I8  -> conversionNop I32 x
 903       MO_S_Conv I32 I8  -> conversionNop I32 x
 904       MO_U_Conv I16 I8  -> conversionNop I16 x
 905       MO_S_Conv I16 I8  -> conversionNop I16 x
 906       MO_U_Conv I32 I16 -> conversionNop I32 x
 907       MO_S_Conv I32 I16 -> conversionNop I32 x
 908 #if x86_64_TARGET_ARCH
 909       MO_U_Conv I64 I32 -> conversionNop I64 x
 910       MO_S_Conv I64 I32 -> conversionNop I64 x
 911       MO_U_Conv I64 I16 -> conversionNop I64 x
 912       MO_S_Conv I64 I16 -> conversionNop I64 x
 913       MO_U_Conv I64 I8  -> conversionNop I64 x
 914       MO_S_Conv I64 I8  -> conversionNop I64 x
 915 #endif
 916
 917       MO_U_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
 918       MO_S_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
 919
 920       -- widenings
 921       MO_U_Conv I8  I32 -> integerExtend I8  I32 MOVZxL x
 922       MO_U_Conv I16 I32 -> integerExtend I16 I32 MOVZxL x
 923       MO_U_Conv I8  I16 -> integerExtend I8  I16 MOVZxL x
 924
 925       MO_S_Conv I8  I32 -> integerExtend I8  I32 MOVSxL x
 926       MO_S_Conv I16 I32 -> integerExtend I16 I32 MOVSxL x
 927       MO_S_Conv I8  I16 -> integerExtend I8  I16 MOVSxL x
 928
 929 #if x86_64_TARGET_ARCH
 930       MO_U_Conv I8  I64 -> integerExtend I8  I64 MOVZxL x
 931       MO_U_Conv I16 I64 -> integerExtend I16 I64 MOVZxL x
 932       MO_U_Conv I32 I64 -> integerExtend I32 I64 MOVZxL x
 933       MO_S_Conv I8  I64 -> integerExtend I8  I64 MOVSxL x
 934       MO_S_Conv I16 I64 -> integerExtend I16 I64 MOVSxL x
 935       MO_S_Conv I32 I64 -> integerExtend I32 I64 MOVSxL x
 936         -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
 937         -- However, we don't want the register allocator to throw it
 938         -- away as an unnecessary reg-to-reg move, so we keep it in
 939         -- the form of a movzl and print it as a movl later.
 940 #endif
 941
 942 #if i386_TARGET_ARCH
 943       MO_S_Conv F32 F64 -> conversionNop F64 x
 944       MO_S_Conv F64 F32 -> conversionNop F32 x
 945 #else
 946       MO_S_Conv F32 F64 -> coerceFP2FP F64 x
 947       MO_S_Conv F64 F32 -> coerceFP2FP F32 x
 948 #endif
 949
 950       MO_S_Conv from to
 951         | isFloatingRep from -> coerceFP2Int from to x
 952         | isFloatingRep to   -> coerceInt2FP from to x
 953
 954       other -> pprPanic "getRegister" (pprMachOp mop)
 955    where
 956         -- signed or unsigned extension.
 957         integerExtend from to instr expr = do
 958             (reg,e_code) <- if from == I8 then getByteReg expr
 959                                           else getSomeReg expr
 960             let
 961                 code dst =
 962                   e_code `snocOL`
 963                   instr from (OpReg reg) (OpReg dst)
 964             return (Any to code)
 965
 966         conversionNop new_rep expr
 967             = do e_code <- getRegister expr
 968                  return (swizzleRegisterRep e_code new_rep)
 969
 970
 971 getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
 972   = ASSERT2(cmmExprRep x /= I8, pprExpr e)
 973     case mop of
 974       MO_Eq F32   -> condFltReg EQQ x y
 975       MO_Ne F32   -> condFltReg NE x y
 976       MO_S_Gt F32 -> condFltReg GTT x y
 977       MO_S_Ge F32 -> condFltReg GE x y
 978       MO_S_Lt F32 -> condFltReg LTT x y
 979       MO_S_Le F32 -> condFltReg LE x y
 980
 981       MO_Eq F64   -> condFltReg EQQ x y
 982       MO_Ne F64   -> condFltReg NE x y
 983       MO_S_Gt F64 -> condFltReg GTT x y
 984       MO_S_Ge F64 -> condFltReg GE x y
 985       MO_S_Lt F64 -> condFltReg LTT x y
 986       MO_S_Le F64 -> condFltReg LE x y
 987
 988       MO_Eq rep   -> condIntReg EQQ x y
 989       MO_Ne rep   -> condIntReg NE x y
 990
 991       MO_S_Gt rep -> condIntReg GTT x y
 992       MO_S_Ge rep -> condIntReg GE x y
 993       MO_S_Lt rep -> condIntReg LTT x y
 994       MO_S_Le rep -> condIntReg LE x y
 995
 996       MO_U_Gt rep -> condIntReg GU  x y
 997       MO_U_Ge rep -> condIntReg GEU x y
 998       MO_U_Lt rep -> condIntReg LU  x y
 999       MO_U_Le rep -> condIntReg LEU x y
1000
1001 #if i386_TARGET_ARCH
1002       MO_Add F32 -> trivialFCode F32 GADD x y
1003       MO_Sub F32 -> trivialFCode F32 GSUB x y
1004
1005       MO_Add F64 -> trivialFCode F64 GADD x y
1006       MO_Sub F64 -> trivialFCode F64 GSUB x y
1007
1008       MO_S_Quot F32 -> trivialFCode F32 GDIV x y
1009       MO_S_Quot F64 -> trivialFCode F64 GDIV x y
1010 #endif
1011
1012 #if x86_64_TARGET_ARCH
1013       MO_Add F32 -> trivialFCode F32 ADD x y
1014       MO_Sub F32 -> trivialFCode F32 SUB x y
1015
1016       MO_Add F64 -> trivialFCode F64 ADD x y
1017       MO_Sub F64 -> trivialFCode F64 SUB x y
1018
1019       MO_S_Quot F32 -> trivialFCode F32 FDIV x y
1020       MO_S_Quot F64 -> trivialFCode F64 FDIV x y
1021 #endif
1022
1023       MO_Add rep -> add_code rep x y
1024       MO_Sub rep -> sub_code rep x y
1025
1026       MO_S_Quot rep -> div_code rep True  True  x y
1027       MO_S_Rem  rep -> div_code rep True  False x y
1028       MO_U_Quot rep -> div_code rep False True  x y
1029       MO_U_Rem  rep -> div_code rep False False x y
1030
1031 #if i386_TARGET_ARCH
1032       MO_Mul   F32 -> trivialFCode F32 GMUL x y
1033       MO_Mul   F64 -> trivialFCode F64 GMUL x y
1034 #endif
1035
1036 #if x86_64_TARGET_ARCH
1037       MO_Mul   F32 -> trivialFCode F32 MUL x y
1038       MO_Mul   F64 -> trivialFCode F64 MUL x y
1039 #endif
1040
1041       MO_Mul   rep -> let op = IMUL rep in
1042                       trivialCode rep op (Just op) x y
1043
1044       MO_S_MulMayOflo rep -> imulMayOflo rep x y
1045
1046       MO_And rep -> let op = AND rep in
1047                     trivialCode rep op (Just op) x y
1048       MO_Or  rep -> let op = OR  rep in
1049                     trivialCode rep op (Just op) x y
1050       MO_Xor rep -> let op = XOR rep in
1051                     trivialCode rep op (Just op) x y
1052
1053         {- Shift ops on x86s have constraints on their source, it
1054            either has to be Imm, CL or 1
1055             => trivialCode is not restrictive enough (sigh.)
1056         -}
1057       MO_Shl rep   -> shift_code rep (SHL rep) x y {-False-}
1058       MO_U_Shr rep -> shift_code rep (SHR rep) x y {-False-}
1059       MO_S_Shr rep -> shift_code rep (SAR rep) x y {-False-}
1060
1061       other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
1062   where
1063     --------------------
1064     imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1065     imulMayOflo rep a b = do
1066          (a_reg, a_code) <- getNonClobberedReg a
1067          b_code <- getAnyReg b
1068          let
1069              shift_amt  = case rep of
1070                            I32 -> 31
1071                            I64 -> 63
1072                            _ -> panic "shift_amt"
1073
1074              code = a_code `appOL` b_code eax `appOL`
1075                         toOL [
1076                            IMUL2 rep (OpReg a_reg),   -- result in %edx:%eax
1077                            SAR rep (OpImm (ImmInt shift_amt)) (OpReg eax),
1078                                 -- sign extend lower part
1079                            SUB rep (OpReg edx) (OpReg eax)
1080                                 -- compare against upper
1081                            -- eax==0 if high part == sign extended low part
1082                         ]
1083          -- in
1084          return (Fixed rep eax code)
1085
1086     --------------------
1087     shift_code :: MachRep
1088                -> (Operand -> Operand -> Instr)
1089                -> CmmExpr
1090                -> CmmExpr
1091                -> NatM Register
1092
1093     {- Case1: shift length as immediate -}
1094     shift_code rep instr x y@(CmmLit lit) = do
1095           x_code <- getAnyReg x
1096           let
1097                code dst
1098                   = x_code dst `snocOL`
1099                     instr (OpImm (litToImm lit)) (OpReg dst)
1100           -- in
1101           return (Any rep code)
1102
1103     {- Case2: shift length is complex (non-immediate) -}
1104     shift_code rep instr x y{-amount-} = do
1105         (x_reg, x_code) <- getNonClobberedReg x
1106         y_code <- getAnyReg y
1107         let
1108            code = x_code `appOL`
1109                   y_code ecx `snocOL`
1110                   instr (OpReg ecx) (OpReg x_reg)
1111         -- in
1112         return (Fixed rep x_reg code)
1113
1114     --------------------
1115     add_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1116     add_code rep x (CmmLit (CmmInt y _)) = add_int rep x y
1117     add_code rep x y = trivialCode rep (ADD rep) (Just (ADD rep)) x y
1118
1119     --------------------
1120     sub_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1121     sub_code rep x (CmmLit (CmmInt y _)) = add_int rep x (-y)
1122     sub_code rep x y = trivialCode rep (SUB rep) Nothing x y
1123
1124     -- our three-operand add instruction:
1125     add_int rep x y = do
1126         (x_reg, x_code) <- getSomeReg x
1127         let
1128             imm = ImmInt (fromInteger y)
1129             code dst
1130                = x_code `snocOL`
1131                  LEA rep
1132                         (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
1133                         (OpReg dst)
1134         --
1135         return (Any rep code)
1136
1137     ----------------------
1138     div_code rep signed quotient x y = do
1139            (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
1140            x_code <- getAnyReg x
1141            let
1142              widen | signed    = CLTD rep
1143                    | otherwise = XOR rep (OpReg edx) (OpReg edx)
1144
1145              instr | signed    = IDIV
1146                    | otherwise = DIV
1147
1148              code = y_code `appOL`
1149                     x_code eax `appOL`
1150                     toOL [widen, instr rep y_op]
1151
1152              result | quotient  = eax
1153                     | otherwise = edx
1154
1155            -- in
1156            return (Fixed rep result code)
1157
1158
1159 getRegister (CmmLoad mem pk)
1160   | isFloatingRep pk
1161   = do
1162     Amode src mem_code <- getAmode mem
1163     let
1164         code dst = mem_code `snocOL`
1165                    IF_ARCH_i386(GLD pk src dst,
1166                                 MOV pk (OpAddr src) (OpReg dst))
1167     --
1168     return (Any pk code)
1169
1170 #if i386_TARGET_ARCH
1171 getRegister (CmmLoad mem pk)
1172   | pk /= I64
1173   = do
1174     code <- intLoadCode (instr pk) mem
1175     return (Any pk code)
1176   where
1177         instr I8  = MOVZxL pk
1178         instr I16 = MOV I16
1179         instr I32 = MOV I32
1180         -- we always zero-extend 8-bit loads, if we
1181         -- can't think of anything better.  This is because
1182         -- we can't guarantee access to an 8-bit variant of every register
1183         -- (esi and edi don't have 8-bit variants), so to make things
1184         -- simpler we do our 8-bit arithmetic with full 32-bit registers.
1185 #endif
1186
1187 #if x86_64_TARGET_ARCH
1188 -- Simpler memory load code on x86_64
1189 getRegister (CmmLoad mem pk)
1190   = do
1191     code <- intLoadCode (MOV pk) mem
1192     return (Any pk code)
1193 #endif
1194
1195 getRegister (CmmLit (CmmInt 0 rep))
1196   = let
1197         -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
1198         adj_rep = case rep of I64 -> I32; _ -> rep
1199         rep1 = IF_ARCH_i386( rep, adj_rep )
1200         code dst
1201            = unitOL (XOR rep1 (OpReg dst) (OpReg dst))
1202     in
1203         return (Any rep code)
1204
1205 #if x86_64_TARGET_ARCH
1206   -- optimisation for loading small literals on x86_64: take advantage
1207   -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
1208   -- instruction forms are shorter.
1209 getRegister (CmmLit lit)
1210   | I64 <- cmmLitRep lit, not (isBigLit lit)
1211   = let
1212         imm = litToImm lit
1213         code dst = unitOL (MOV I32 (OpImm imm) (OpReg dst))
1214     in
1215         return (Any I64 code)
1216   where
1217    isBigLit (CmmInt i I64) = i < 0 || i > 0xffffffff
1218    isBigLit _ = False
1219         -- note1: not the same as is64BitLit, because that checks for
1220         -- signed literals that fit in 32 bits, but we want unsigned
1221         -- literals here.
1222         -- note2: all labels are small, because we're assuming the
1223         -- small memory model (see gcc docs, -mcmodel=small).
1224 #endif
1225
1226 getRegister (CmmLit lit)
1227   = let
1228         rep = cmmLitRep lit
1229         imm = litToImm lit
1230         code dst = unitOL (MOV rep (OpImm imm) (OpReg dst))
1231     in
1232         return (Any rep code)
1233
1234 getRegister other = pprPanic "getRegister(x86)" (ppr other)
1235
1236
1237 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
1238    -> NatM (Reg -> InstrBlock)
1239 intLoadCode instr mem = do
1240   Amode src mem_code <- getAmode mem
1241   return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
1242
1243 -- Compute an expression into *any* register, adding the appropriate
1244 -- move instruction if necessary.
1245 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
1246 getAnyReg expr = do
1247   r <- getRegister expr
1248   anyReg r
1249
1250 anyReg :: Register -> NatM (Reg -> InstrBlock)
1251 anyReg (Any _ code)          = return code
1252 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
1253
1254 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
1255 -- Fixed registers might not be byte-addressable, so we make sure we've
1256 -- got a temporary, inserting an extra reg copy if necessary.
1257 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
1258 #if x86_64_TARGET_ARCH
1259 getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
1260 #else
1261 getByteReg expr = do
1262   r <- getRegister expr
1263   case r of
1264     Any rep code -> do
1265         tmp <- getNewRegNat rep
1266         return (tmp, code tmp)
1267     Fixed rep reg code
1268         | isVirtualReg reg -> return (reg,code)
1269         | otherwise -> do
1270             tmp <- getNewRegNat rep
1271             return (tmp, code `snocOL` reg2reg rep reg tmp)
1272         -- ToDo: could optimise slightly by checking for byte-addressable
1273         -- real registers, but that will happen very rarely if at all.
1274 #endif
1275
1276 -- Another variant: this time we want the result in a register that cannot
1277 -- be modified by code to evaluate an arbitrary expression.
1278 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1279 getNonClobberedReg expr = do
1280   r <- getRegister expr
1281   case r of
1282     Any rep code -> do
1283         tmp <- getNewRegNat rep
1284         return (tmp, code tmp)
1285     Fixed rep reg code
1286         -- only free regs can be clobbered
1287         | RealReg rr <- reg, isFastTrue (freeReg rr) -> do
1288                 tmp <- getNewRegNat rep
1289                 return (tmp, code `snocOL` reg2reg rep reg tmp)
1290         | otherwise ->
1291                 return (reg, code)
1292
1293 reg2reg :: MachRep -> Reg -> Reg -> Instr
1294 reg2reg rep src dst
1295 #if i386_TARGET_ARCH
1296   | isFloatingRep rep = GMOV src dst
1297 #endif
1298   | otherwise         = MOV rep (OpReg src) (OpReg dst)
1299
1300 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1301
1302 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1303
1304 #if sparc_TARGET_ARCH
1305
1306 getRegister (CmmLit (CmmFloat f F32)) = do
1307     lbl <- getNewLabelNat
1308     let code dst = toOL [
1309             LDATA ReadOnlyData
1310                         [CmmDataLabel lbl,
1311                          CmmStaticLit (CmmFloat f F32)],
1312             SETHI (HI (ImmCLbl lbl)) dst,
1313             LD F32 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1314     return (Any F32 code)
1315
1316 getRegister (CmmLit (CmmFloat d F64)) = do
1317     lbl <- getNewLabelNat
1318     let code dst = toOL [
1319             LDATA ReadOnlyData
1320                         [CmmDataLabel lbl,
1321                          CmmStaticLit (CmmFloat d F64)],
1322             SETHI (HI (ImmCLbl lbl)) dst,
1323             LD F64 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1324     return (Any F64 code)
1325
1326 getRegister (CmmMachOp mop [x]) -- unary MachOps
1327   = case mop of
1328       MO_S_Neg F32     -> trivialUFCode F32 (FNEG F32) x
1329       MO_S_Neg F64     -> trivialUFCode F64 (FNEG F64) x
1330
1331       MO_S_Neg rep     -> trivialUCode rep (SUB False False g0) x
1332       MO_Not rep       -> trivialUCode rep (XNOR False g0) x
1333
1334       MO_U_Conv I32 I8 -> trivialCode I8 (AND False) x (CmmLit (CmmInt 255 I8))
1335
1336       MO_U_Conv F64 F32-> coerceDbl2Flt x
1337       MO_U_Conv F32 F64-> coerceFlt2Dbl x
1338
1339       MO_S_Conv F32 I32-> coerceFP2Int F32 I32 x
1340       MO_S_Conv I32 F32-> coerceInt2FP I32 F32 x
1341       MO_S_Conv F64 I32-> coerceFP2Int F64 I32 x
1342       MO_S_Conv I32 F64-> coerceInt2FP I32 F64 x
1343
1344       -- Conversions which are a nop on sparc
1345       MO_U_Conv from to
1346         | from == to   -> conversionNop to   x
1347       MO_U_Conv I32 to -> conversionNop to   x
1348       MO_S_Conv I32 to -> conversionNop to   x
1349
1350       -- widenings
1351       MO_U_Conv I8 I32  -> integerExtend False I8 I32  x
1352       MO_U_Conv I16 I32 -> integerExtend False I16 I32 x
1353       MO_U_Conv I8 I16  -> integerExtend False I8 I16  x
1354       MO_S_Conv I16 I32 -> integerExtend True I16 I32  x
1355
1356       other_op -> panic "Unknown unary mach op"
1357     where
1358         -- XXX SLL/SRL?
1359         integerExtend signed from to expr = do
1360            (reg, e_code) <- getSomeReg expr
1361            let
1362                code dst =
1363                    e_code `snocOL`
1364                    ((if signed then SRA else SRL)
1365                           reg (RIImm (ImmInt 0)) dst)
1366            return (Any to code)
1367         conversionNop new_rep expr
1368             = do e_code <- getRegister expr
1369                  return (swizzleRegisterRep e_code new_rep)
1370
1371 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1372   = case mop of
1373       MO_Eq F32 -> condFltReg EQQ x y
1374       MO_Ne F32 -> condFltReg NE x y
1375
1376       MO_S_Gt F32 -> condFltReg GTT x y
1377       MO_S_Ge F32 -> condFltReg GE x y
1378       MO_S_Lt F32 -> condFltReg LTT x y
1379       MO_S_Le F32 -> condFltReg LE x y
1380
1381       MO_Eq F64 -> condFltReg EQQ x y
1382       MO_Ne F64 -> condFltReg NE x y
1383
1384       MO_S_Gt F64 -> condFltReg GTT x y
1385       MO_S_Ge F64 -> condFltReg GE x y
1386       MO_S_Lt F64 -> condFltReg LTT x y
1387       MO_S_Le F64 -> condFltReg LE x y
1388
1389       MO_Eq rep -> condIntReg EQQ x y
1390       MO_Ne rep -> condIntReg NE x y
1391
1392       MO_S_Gt rep -> condIntReg GTT x y
1393       MO_S_Ge rep -> condIntReg GE x y
1394       MO_S_Lt rep -> condIntReg LTT x y
1395       MO_S_Le rep -> condIntReg LE x y
1396
1397       MO_U_Gt I32  -> condIntReg GTT x y
1398       MO_U_Ge I32  -> condIntReg GE x y
1399       MO_U_Lt I32  -> condIntReg LTT x y
1400       MO_U_Le I32  -> condIntReg LE x y
1401
1402       MO_U_Gt I16 -> condIntReg GU  x y
1403       MO_U_Ge I16 -> condIntReg GEU x y
1404       MO_U_Lt I16 -> condIntReg LU  x y
1405       MO_U_Le I16 -> condIntReg LEU x y
1406
1407       MO_Add I32 -> trivialCode I32 (ADD False False) x y
1408       MO_Sub I32 -> trivialCode I32 (SUB False False) x y
1409
1410       MO_S_MulMayOflo rep -> imulMayOflo rep x y
1411 {-
1412       -- ToDo: teach about V8+ SPARC div instructions
1413       MO_S_Quot I32 -> idiv FSLIT(".div")  x y
1414       MO_S_Rem I32  -> idiv FSLIT(".rem")  x y
1415       MO_U_Quot I32 -> idiv FSLIT(".udiv")  x y
1416       MO_U_Rem I32  -> idiv FSLIT(".urem")  x y
1417 -}
1418       MO_Add F32  -> trivialFCode F32 FADD  x y
1419       MO_Sub F32   -> trivialFCode F32  FSUB x y
1420       MO_Mul F32   -> trivialFCode F32  FMUL  x y
1421       MO_S_Quot F32   -> trivialFCode F32  FDIV x y
1422
1423       MO_Add F64   -> trivialFCode F64 FADD  x y
1424       MO_Sub F64   -> trivialFCode F64  FSUB x y
1425       MO_Mul F64   -> trivialFCode F64  FMUL x y
1426       MO_S_Quot F64   -> trivialFCode F64  FDIV x y
1427
1428       MO_And rep   -> trivialCode rep (AND False) x y
1429       MO_Or rep    -> trivialCode rep (OR  False) x y
1430       MO_Xor rep   -> trivialCode rep (XOR False) x y
1431
1432       MO_Mul rep -> trivialCode rep (SMUL False) x y
1433
1434       MO_Shl rep   -> trivialCode rep SLL  x y
1435       MO_U_Shr rep   -> trivialCode rep SRL x y
1436       MO_S_Shr rep   -> trivialCode rep SRA x y
1437
1438 {-
1439       MO_F32_Pwr  -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1440                                          [promote x, promote y])
1441                        where promote x = CmmMachOp MO_F32_to_Dbl [x]
1442       MO_F64_Pwr -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1443                                         [x, y])
1444 -}
1445       other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
1446   where
1447     --idiv fn x y = getRegister (StCall (Left fn) CCallConv I32 [x, y])
1448
1449     --------------------
1450     imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1451     imulMayOflo rep a b = do
1452          (a_reg, a_code) <- getSomeReg a
1453          (b_reg, b_code) <- getSomeReg b
1454          res_lo <- getNewRegNat I32
1455          res_hi <- getNewRegNat I32
1456          let
1457             shift_amt  = case rep of
1458                           I32 -> 31
1459                           I64 -> 63
1460                           _ -> panic "shift_amt"
1461             code dst = a_code `appOL` b_code `appOL`
1462                        toOL [
1463                            SMUL False a_reg (RIReg b_reg) res_lo,
1464                            RDY res_hi,
1465                            SRA res_lo (RIImm (ImmInt shift_amt)) res_lo,
1466                            SUB False False res_lo (RIReg res_hi) dst
1467                         ]
1468          return (Any I32 code)
1469
1470 getRegister (CmmLoad mem pk) = do
1471     Amode src code <- getAmode mem
1472     let
1473         code__2 dst = code `snocOL` LD pk src dst
1474     return (Any pk code__2)
1475
1476 getRegister (CmmLit (CmmInt i _))
1477   | fits13Bits i
1478   = let
1479         src = ImmInt (fromInteger i)
1480         code dst = unitOL (OR False g0 (RIImm src) dst)
1481     in
1482         return (Any I32 code)
1483
1484 getRegister (CmmLit lit)
1485   = let rep = cmmLitRep lit
1486         imm = litToImm lit
1487         code dst = toOL [
1488             SETHI (HI imm) dst,
1489             OR False dst (RIImm (LO imm)) dst]
1490     in return (Any I32 code)
1491
1492 #endif /* sparc_TARGET_ARCH */
1493
1494 #if powerpc_TARGET_ARCH
1495 getRegister (CmmLoad mem pk)
1496   | pk /= I64
1497   = do
1498         Amode addr addr_code <- getAmode mem
1499         let code dst = ASSERT((regClass dst == RcDouble) == isFloatingRep pk)
1500                        addr_code `snocOL` LD pk dst addr
1501         return (Any pk code)
1502
1503 -- catch simple cases of zero- or sign-extended load
1504 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad mem _]) = do
1505     Amode addr addr_code <- getAmode mem
1506     return (Any I32 (\dst -> addr_code `snocOL` LD I8 dst addr))
1507
1508 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
1509
1510 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad mem _]) = do
1511     Amode addr addr_code <- getAmode mem
1512     return (Any I32 (\dst -> addr_code `snocOL` LD I16 dst addr))
1513
1514 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad mem _]) = do
1515     Amode addr addr_code <- getAmode mem
1516     return (Any I32 (\dst -> addr_code `snocOL` LA I16 dst addr))
1517
1518 getRegister (CmmMachOp mop [x]) -- unary MachOps
1519   = case mop of
1520       MO_Not rep   -> trivialUCode rep NOT x
1521
1522       MO_S_Conv F64 F32 -> trivialUCode F32 FRSP x
1523       MO_S_Conv F32 F64 -> conversionNop F64 x
1524
1525       MO_S_Conv from to
1526         | from == to         -> conversionNop to x
1527         | isFloatingRep from -> coerceFP2Int from to x
1528         | isFloatingRep to   -> coerceInt2FP from to x
1529
1530         -- narrowing is a nop: we treat the high bits as undefined
1531       MO_S_Conv I32 to -> conversionNop to x
1532       MO_S_Conv I16 I8 -> conversionNop I8 x
1533       MO_S_Conv I8 to -> trivialUCode to (EXTS I8) x
1534       MO_S_Conv I16 to -> trivialUCode to (EXTS I16) x
1535
1536       MO_U_Conv from to
1537         | from == to -> conversionNop to x
1538         -- narrowing is a nop: we treat the high bits as undefined
1539       MO_U_Conv I32 to -> conversionNop to x
1540       MO_U_Conv I16 I8 -> conversionNop I8 x
1541       MO_U_Conv I8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 I32))
1542       MO_U_Conv I16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 I32))
1543
1544       MO_S_Neg F32      -> trivialUCode F32 FNEG x
1545       MO_S_Neg F64      -> trivialUCode F64 FNEG x
1546       MO_S_Neg rep      -> trivialUCode rep NEG x
1547
1548     where
1549         conversionNop new_rep expr
1550             = do e_code <- getRegister expr
1551                  return (swizzleRegisterRep e_code new_rep)
1552
1553 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1554   = case mop of
1555       MO_Eq F32 -> condFltReg EQQ x y
1556       MO_Ne F32 -> condFltReg NE  x y
1557
1558       MO_S_Gt F32 -> condFltReg GTT x y
1559       MO_S_Ge F32 -> condFltReg GE  x y
1560       MO_S_Lt F32 -> condFltReg LTT x y
1561       MO_S_Le F32 -> condFltReg LE  x y
1562
1563       MO_Eq F64 -> condFltReg EQQ x y
1564       MO_Ne F64 -> condFltReg NE  x y
1565
1566       MO_S_Gt F64 -> condFltReg GTT x y
1567       MO_S_Ge F64 -> condFltReg GE  x y
1568       MO_S_Lt F64 -> condFltReg LTT x y
1569       MO_S_Le F64 -> condFltReg LE  x y
1570
1571       MO_Eq rep -> condIntReg EQQ  (extendUExpr rep x) (extendUExpr rep y)
1572       MO_Ne rep -> condIntReg NE   (extendUExpr rep x) (extendUExpr rep y)
1573
1574       MO_S_Gt rep -> condIntReg GTT  (extendSExpr rep x) (extendSExpr rep y)
1575       MO_S_Ge rep -> condIntReg GE   (extendSExpr rep x) (extendSExpr rep y)
1576       MO_S_Lt rep -> condIntReg LTT  (extendSExpr rep x) (extendSExpr rep y)
1577       MO_S_Le rep -> condIntReg LE   (extendSExpr rep x) (extendSExpr rep y)
1578
1579       MO_U_Gt rep -> condIntReg GU   (extendUExpr rep x) (extendUExpr rep y)
1580       MO_U_Ge rep -> condIntReg GEU  (extendUExpr rep x) (extendUExpr rep y)
1581       MO_U_Lt rep -> condIntReg LU   (extendUExpr rep x) (extendUExpr rep y)
1582       MO_U_Le rep -> condIntReg LEU  (extendUExpr rep x) (extendUExpr rep y)
1583
1584       MO_Add F32   -> trivialCodeNoImm F32 (FADD F32) x y
1585       MO_Sub F32   -> trivialCodeNoImm F32 (FSUB F32) x y
1586       MO_Mul F32   -> trivialCodeNoImm F32 (FMUL F32) x y
1587       MO_S_Quot F32   -> trivialCodeNoImm F32 (FDIV F32) x y
1588
1589       MO_Add F64   -> trivialCodeNoImm F64 (FADD F64) x y
1590       MO_Sub F64   -> trivialCodeNoImm F64 (FSUB F64) x y
1591       MO_Mul F64   -> trivialCodeNoImm F64 (FMUL F64) x y
1592       MO_S_Quot F64   -> trivialCodeNoImm F64 (FDIV F64) x y
1593
1594          -- optimize addition with 32-bit immediate
1595          -- (needed for PIC)
1596       MO_Add I32 ->
1597         case y of
1598           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate I32 True (-imm)
1599             -> trivialCode I32 True ADD x (CmmLit $ CmmInt imm immrep)
1600           CmmLit lit
1601             -> do
1602                 (src, srcCode) <- getSomeReg x
1603                 let imm = litToImm lit
1604                     code dst = srcCode `appOL` toOL [
1605                                     ADDIS dst src (HA imm),
1606                                     ADD dst dst (RIImm (LO imm))
1607                                 ]
1608                 return (Any I32 code)
1609           _ -> trivialCode I32 True ADD x y
1610
1611       MO_Add rep -> trivialCode rep True ADD x y
1612       MO_Sub rep ->
1613         case y of    -- subfi ('substract from' with immediate) doesn't exist
1614           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
1615             -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
1616           _ -> trivialCodeNoImm rep SUBF y x
1617
1618       MO_Mul rep -> trivialCode rep True MULLW x y
1619
1620       MO_S_MulMayOflo I32 -> trivialCodeNoImm I32 MULLW_MayOflo x y
1621
1622       MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= I32): not implemented"
1623       MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
1624
1625       MO_S_Quot rep -> trivialCodeNoImm rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1626       MO_U_Quot rep -> trivialCodeNoImm rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1627
1628       MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1629       MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1630
1631       MO_And rep   -> trivialCode rep False AND x y
1632       MO_Or rep    -> trivialCode rep False OR x y
1633       MO_Xor rep   -> trivialCode rep False XOR x y
1634
1635       MO_Shl rep   -> trivialCode rep False SLW x y
1636       MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
1637       MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
1638
1639 getRegister (CmmLit (CmmInt i rep))
1640   | Just imm <- makeImmediate rep True i
1641   = let
1642         code dst = unitOL (LI dst imm)
1643     in
1644         return (Any rep code)
1645
1646 getRegister (CmmLit (CmmFloat f frep)) = do
1647     lbl <- getNewLabelNat
1648     dynRef <- cmmMakeDynamicReference addImportNat False lbl
1649     Amode addr addr_code <- getAmode dynRef
1650     let code dst =
1651             LDATA ReadOnlyData  [CmmDataLabel lbl,
1652                                  CmmStaticLit (CmmFloat f frep)]
1653             `consOL` (addr_code `snocOL` LD frep dst addr)
1654     return (Any frep code)
1655
1656 getRegister (CmmLit lit)
1657   = let rep = cmmLitRep lit
1658         imm = litToImm lit
1659         code dst = toOL [
1660               LIS dst (HI imm),
1661               OR dst dst (RIImm (LO imm))
1662           ]
1663     in return (Any rep code)
1664
1665 getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
1666
1667     -- extend?Rep: wrap integer expression of type rep
1668     -- in a conversion to I32
1669 extendSExpr I32 x = x
1670 extendSExpr rep x = CmmMachOp (MO_S_Conv rep I32) [x]
1671 extendUExpr I32 x = x
1672 extendUExpr rep x = CmmMachOp (MO_U_Conv rep I32) [x]
1673
1674 #endif /* powerpc_TARGET_ARCH */
1675
1676
1677 -- -----------------------------------------------------------------------------
1678 --  The 'Amode' type: Memory addressing modes passed up the tree.
1679
1680 data Amode = Amode AddrMode InstrBlock
1681
1682 {-
1683 Now, given a tree (the argument to an CmmLoad) that references memory,
1684 produce a suitable addressing mode.
1685
1686 A Rule of the Game (tm) for Amodes: use of the addr bit must
1687 immediately follow use of the code part, since the code part puts
1688 values in registers which the addr then refers to.  So you can't put
1689 anything in between, lest it overwrite some of those registers.  If
1690 you need to do some other computation between the code part and use of
1691 the addr bit, first store the effective address from the amode in a
1692 temporary, then do the other computation, and then use the temporary:
1693
1694     code
1695     LEA amode, tmp
1696     ... other computation ...
1697     ... (tmp) ...
1698 -}
1699
1700 getAmode :: CmmExpr -> NatM Amode
1701 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
1702
1703 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1704
1705 #if alpha_TARGET_ARCH
1706
1707 getAmode (StPrim IntSubOp [x, StInt i])
1708   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1709     getRegister x               `thenNat` \ register ->
1710     let
1711         code = registerCode register tmp
1712         reg  = registerName register tmp
1713         off  = ImmInt (-(fromInteger i))
1714     in
1715     return (Amode (AddrRegImm reg off) code)
1716
1717 getAmode (StPrim IntAddOp [x, StInt i])
1718   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1719     getRegister x               `thenNat` \ register ->
1720     let
1721         code = registerCode register tmp
1722         reg  = registerName register tmp
1723         off  = ImmInt (fromInteger i)
1724     in
1725     return (Amode (AddrRegImm reg off) code)
1726
1727 getAmode leaf
1728   | isJust imm
1729   = return (Amode (AddrImm imm__2) id)
1730   where
1731     imm = maybeImm leaf
1732     imm__2 = case imm of Just x -> x
1733
1734 getAmode other
1735   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1736     getRegister other           `thenNat` \ register ->
1737     let
1738         code = registerCode register tmp
1739         reg  = registerName register tmp
1740     in
1741     return (Amode (AddrReg reg) code)
1742
1743 #endif /* alpha_TARGET_ARCH */
1744
1745 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1746
1747 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1748
1749 -- This is all just ridiculous, since it carefully undoes
1750 -- what mangleIndexTree has just done.
1751 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
1752   | not (is64BitLit lit)
1753   -- ASSERT(rep == I32)???
1754   = do (x_reg, x_code) <- getSomeReg x
1755        let off = ImmInt (-(fromInteger i))
1756        return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1757
1758 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
1759   | not (is64BitLit lit)
1760   -- ASSERT(rep == I32)???
1761   = do (x_reg, x_code) <- getSomeReg x
1762        let off = ImmInt (fromInteger i)
1763        return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1764
1765 -- Turn (lit1 << n  + lit2) into  (lit2 + lit1 << n) so it will be
1766 -- recognised by the next rule.
1767 getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1768                                   b@(CmmLit _)])
1769   = getAmode (CmmMachOp (MO_Add rep) [b,a])
1770
1771 getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
1772                                         [y, CmmLit (CmmInt shift _)]])
1773   | shift == 0 || shift == 1 || shift == 2 || shift == 3
1774   = do (x_reg, x_code) <- getNonClobberedReg x
1775         -- x must be in a temp, because it has to stay live over y_code
1776         -- we could compre x_reg and y_reg and do something better here...
1777        (y_reg, y_code) <- getSomeReg y
1778        let
1779            code = x_code `appOL` y_code
1780            base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
1781        return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt 0))
1782                code)
1783
1784 getAmode (CmmLit lit) | not (is64BitLit lit)
1785   = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1786
1787 getAmode expr = do
1788   (reg,code) <- getSomeReg expr
1789   return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
1790
1791 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1792
1793 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1794
1795 #if sparc_TARGET_ARCH
1796
1797 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)])
1798   | fits13Bits (-i)
1799   = do
1800        (reg, code) <- getSomeReg x
1801        let
1802          off  = ImmInt (-(fromInteger i))
1803        return (Amode (AddrRegImm reg off) code)
1804
1805
1806 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)])
1807   | fits13Bits i
1808   = do
1809        (reg, code) <- getSomeReg x
1810        let
1811          off  = ImmInt (fromInteger i)
1812        return (Amode (AddrRegImm reg off) code)
1813
1814 getAmode (CmmMachOp (MO_Add rep) [x, y])
1815   = do
1816     (regX, codeX) <- getSomeReg x
1817     (regY, codeY) <- getSomeReg y
1818     let
1819         code = codeX `appOL` codeY
1820     return (Amode (AddrRegReg regX regY) code)
1821
1822 -- XXX Is this same as "leaf" in Stix?
1823 getAmode (CmmLit lit)
1824   = do
1825       tmp <- getNewRegNat I32
1826       let
1827         code = unitOL (SETHI (HI imm__2) tmp)
1828       return (Amode (AddrRegImm tmp (LO imm__2)) code)
1829       where
1830          imm__2 = litToImm lit
1831
1832 getAmode other
1833   = do
1834        (reg, code) <- getSomeReg other
1835        let
1836             off  = ImmInt 0
1837        return (Amode (AddrRegImm reg off) code)
1838
1839 #endif /* sparc_TARGET_ARCH */
1840
1841 #ifdef powerpc_TARGET_ARCH
1842 getAmode (CmmMachOp (MO_Sub I32) [x, CmmLit (CmmInt i _)])
1843   | Just off <- makeImmediate I32 True (-i)
1844   = do
1845         (reg, code) <- getSomeReg x
1846         return (Amode (AddrRegImm reg off) code)
1847
1848
1849 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit (CmmInt i _)])
1850   | Just off <- makeImmediate I32 True i
1851   = do
1852         (reg, code) <- getSomeReg x
1853         return (Amode (AddrRegImm reg off) code)
1854
1855    -- optimize addition with 32-bit immediate
1856    -- (needed for PIC)
1857 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit lit])
1858   = do
1859         tmp <- getNewRegNat I32
1860         (src, srcCode) <- getSomeReg x
1861         let imm = litToImm lit
1862             code = srcCode `snocOL` ADDIS tmp src (HA imm)
1863         return (Amode (AddrRegImm tmp (LO imm)) code)
1864
1865 getAmode (CmmLit lit)
1866   = do
1867         tmp <- getNewRegNat I32
1868         let imm = litToImm lit
1869             code = unitOL (LIS tmp (HA imm))
1870         return (Amode (AddrRegImm tmp (LO imm)) code)
1871
1872 getAmode (CmmMachOp (MO_Add I32) [x, y])
1873   = do
1874         (regX, codeX) <- getSomeReg x
1875         (regY, codeY) <- getSomeReg y
1876         return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
1877
1878 getAmode other
1879   = do
1880         (reg, code) <- getSomeReg other
1881         let
1882             off  = ImmInt 0
1883         return (Amode (AddrRegImm reg off) code)
1884 #endif /* powerpc_TARGET_ARCH */
1885
1886 -- -----------------------------------------------------------------------------
1887 -- getOperand: sometimes any operand will do.
1888
1889 -- getNonClobberedOperand: the value of the operand will remain valid across
1890 -- the computation of an arbitrary expression, unless the expression
1891 -- is computed directly into a register which the operand refers to
1892 -- (see trivialCode where this function is used for an example).
1893
1894 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1895
1896 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1897 #if x86_64_TARGET_ARCH
1898 getNonClobberedOperand (CmmLit lit)
1899   | isSuitableFloatingPointLit lit = do
1900     lbl <- getNewLabelNat
1901     let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
1902                                            CmmStaticLit lit])
1903     return (OpAddr (ripRel (ImmCLbl lbl)), code)
1904 #endif
1905 getNonClobberedOperand (CmmLit lit)
1906   | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) =
1907     return (OpImm (litToImm lit), nilOL)
1908 getNonClobberedOperand (CmmLoad mem pk)
1909   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
1910     Amode src mem_code <- getAmode mem
1911     (src',save_code) <-
1912         if (amodeCouldBeClobbered src)
1913                 then do
1914                    tmp <- getNewRegNat wordRep
1915                    return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
1916                            unitOL (LEA I32 (OpAddr src) (OpReg tmp)))
1917                 else
1918                    return (src, nilOL)
1919     return (OpAddr src', save_code `appOL` mem_code)
1920 getNonClobberedOperand e = do
1921     (reg, code) <- getNonClobberedReg e
1922     return (OpReg reg, code)
1923
1924 amodeCouldBeClobbered :: AddrMode -> Bool
1925 amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
1926
1927 regClobbered (RealReg rr) = isFastTrue (freeReg rr)
1928 regClobbered _ = False
1929
1930 -- getOperand: the operand is not required to remain valid across the
1931 -- computation of an arbitrary expression.
1932 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1933 #if x86_64_TARGET_ARCH
1934 getOperand (CmmLit lit)
1935   | isSuitableFloatingPointLit lit = do
1936     lbl <- getNewLabelNat
1937     let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
1938                                            CmmStaticLit lit])
1939     return (OpAddr (ripRel (ImmCLbl lbl)), code)
1940 #endif
1941 getOperand (CmmLit lit)
1942   | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) = do
1943     return (OpImm (litToImm lit), nilOL)
1944 getOperand (CmmLoad mem pk)
1945   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
1946     Amode src mem_code <- getAmode mem
1947     return (OpAddr src, mem_code)
1948 getOperand e = do
1949     (reg, code) <- getSomeReg e
1950     return (OpReg reg, code)
1951
1952 isOperand :: CmmExpr -> Bool
1953 isOperand (CmmLoad _ _) = True
1954 isOperand (CmmLit lit)  = not (is64BitLit lit)
1955                           || isSuitableFloatingPointLit lit
1956 isOperand _             = False
1957
1958 -- if we want a floating-point literal as an operand, we can
1959 -- use it directly from memory.  However, if the literal is
1960 -- zero, we're better off generating it into a register using
1961 -- xor.
1962 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
1963 isSuitableFloatingPointLit _ = False
1964
1965 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
1966 getRegOrMem (CmmLoad mem pk)
1967   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
1968     Amode src mem_code <- getAmode mem
1969     return (OpAddr src, mem_code)
1970 getRegOrMem e = do
1971     (reg, code) <- getNonClobberedReg e
1972     return (OpReg reg, code)
1973
1974 #if x86_64_TARGET_ARCH
1975 is64BitLit (CmmInt i I64) = i > 0x7fffffff || i < -0x80000000
1976    -- assume that labels are in the range 0-2^31-1: this assumes the
1977    -- small memory model (see gcc docs, -mcmodel=small).
1978 #endif
1979 is64BitLit x = False
1980 #endif
1981
1982 -- -----------------------------------------------------------------------------
1983 --  The 'CondCode' type:  Condition codes passed up the tree.
1984
1985 data CondCode = CondCode Bool Cond InstrBlock
1986
1987 -- Set up a condition code for a conditional branch.
1988
1989 getCondCode :: CmmExpr -> NatM CondCode
1990
1991 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1992
1993 #if alpha_TARGET_ARCH
1994 getCondCode = panic "MachCode.getCondCode: not on Alphas"
1995 #endif /* alpha_TARGET_ARCH */
1996
1997 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1998
1999 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
2000 -- yes, they really do seem to want exactly the same!
2001
2002 getCondCode (CmmMachOp mop [x, y])
2003   = ASSERT (cmmExprRep x /= I8) -- tmp, not set up to handle 8-bit comparisons
2004     case mop of
2005       MO_Eq F32 -> condFltCode EQQ x y
2006       MO_Ne F32 -> condFltCode NE  x y
2007
2008       MO_S_Gt F32 -> condFltCode GTT x y
2009       MO_S_Ge F32 -> condFltCode GE  x y
2010       MO_S_Lt F32 -> condFltCode LTT x y
2011       MO_S_Le F32 -> condFltCode LE  x y
2012
2013       MO_Eq F64 -> condFltCode EQQ x y
2014       MO_Ne F64 -> condFltCode NE  x y
2015
2016       MO_S_Gt F64 -> condFltCode GTT x y
2017       MO_S_Ge F64 -> condFltCode GE  x y
2018       MO_S_Lt F64 -> condFltCode LTT x y
2019       MO_S_Le F64 -> condFltCode LE  x y
2020
2021       MO_Eq rep -> condIntCode EQQ  x y
2022       MO_Ne rep -> condIntCode NE   x y
2023
2024       MO_S_Gt rep -> condIntCode GTT  x y
2025       MO_S_Ge rep -> condIntCode GE   x y
2026       MO_S_Lt rep -> condIntCode LTT  x y
2027       MO_S_Le rep -> condIntCode LE   x y
2028
2029       MO_U_Gt rep -> condIntCode GU   x y
2030       MO_U_Ge rep -> condIntCode GEU  x y
2031       MO_U_Lt rep -> condIntCode LU   x y
2032       MO_U_Le rep -> condIntCode LEU  x y
2033
2034       other -> pprPanic "getCondCode(x86,sparc)" (pprMachOp mop)
2035
2036 getCondCode other =  pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
2037
2038 #elif powerpc_TARGET_ARCH
2039
2040 -- almost the same as everywhere else - but we need to
2041 -- extend small integers to 32 bit first
2042
2043 getCondCode (CmmMachOp mop [x, y])
2044   = case mop of
2045       MO_Eq F32 -> condFltCode EQQ x y
2046       MO_Ne F32 -> condFltCode NE  x y
2047
2048       MO_S_Gt F32 -> condFltCode GTT x y
2049       MO_S_Ge F32 -> condFltCode GE  x y
2050       MO_S_Lt F32 -> condFltCode LTT x y
2051       MO_S_Le F32 -> condFltCode LE  x y
2052
2053       MO_Eq F64 -> condFltCode EQQ x y
2054       MO_Ne F64 -> condFltCode NE  x y
2055
2056       MO_S_Gt F64 -> condFltCode GTT x y
2057       MO_S_Ge F64 -> condFltCode GE  x y
2058       MO_S_Lt F64 -> condFltCode LTT x y
2059       MO_S_Le F64 -> condFltCode LE  x y
2060
2061       MO_Eq rep -> condIntCode EQQ  (extendUExpr rep x) (extendUExpr rep y)
2062       MO_Ne rep -> condIntCode NE   (extendUExpr rep x) (extendUExpr rep y)
2063
2064       MO_S_Gt rep -> condIntCode GTT  (extendSExpr rep x) (extendSExpr rep y)
2065       MO_S_Ge rep -> condIntCode GE   (extendSExpr rep x) (extendSExpr rep y)
2066       MO_S_Lt rep -> condIntCode LTT  (extendSExpr rep x) (extendSExpr rep y)
2067       MO_S_Le rep -> condIntCode LE   (extendSExpr rep x) (extendSExpr rep y)
2068
2069       MO_U_Gt rep -> condIntCode GU   (extendUExpr rep x) (extendUExpr rep y)
2070       MO_U_Ge rep -> condIntCode GEU  (extendUExpr rep x) (extendUExpr rep y)
2071       MO_U_Lt rep -> condIntCode LU   (extendUExpr rep x) (extendUExpr rep y)
2072       MO_U_Le rep -> condIntCode LEU  (extendUExpr rep x) (extendUExpr rep y)
2073
2074       other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
2075
2076 getCondCode other =  panic "getCondCode(2)(powerpc)"
2077
2078
2079 #endif
2080
2081
2082 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
2083 -- passed back up the tree.
2084
2085 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
2086
2087 #if alpha_TARGET_ARCH
2088 condIntCode = panic "MachCode.condIntCode: not on Alphas"
2089 condFltCode = panic "MachCode.condFltCode: not on Alphas"
2090 #endif /* alpha_TARGET_ARCH */
2091
2092 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2093 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2094
2095 -- memory vs immediate
2096 condIntCode cond (CmmLoad x pk) (CmmLit lit) | not (is64BitLit lit) = do
2097     Amode x_addr x_code <- getAmode x
2098     let
2099         imm  = litToImm lit
2100         code = x_code `snocOL`
2101                   CMP pk (OpImm imm) (OpAddr x_addr)
2102     --
2103     return (CondCode False cond code)
2104
2105 -- anything vs zero
2106 condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
2107     (x_reg, x_code) <- getSomeReg x
2108     let
2109         code = x_code `snocOL`
2110                   TEST pk (OpReg x_reg) (OpReg x_reg)
2111     --
2112     return (CondCode False cond code)
2113
2114 -- anything vs operand
2115 condIntCode cond x y | isOperand y = do
2116     (x_reg, x_code) <- getNonClobberedReg x
2117     (y_op,  y_code) <- getOperand y
2118     let
2119         code = x_code `appOL` y_code `snocOL`
2120                   CMP (cmmExprRep x) y_op (OpReg x_reg)
2121     -- in
2122     return (CondCode False cond code)
2123
2124 -- anything vs anything
2125 condIntCode cond x y = do
2126   (y_reg, y_code) <- getNonClobberedReg y
2127   (x_op, x_code) <- getRegOrMem x
2128   let
2129         code = y_code `appOL`
2130                x_code `snocOL`
2131                   CMP (cmmExprRep x) (OpReg y_reg) x_op
2132   -- in
2133   return (CondCode False cond code)
2134 #endif
2135
2136 #if i386_TARGET_ARCH
2137 condFltCode cond x y
2138   = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
2139   (x_reg, x_code) <- getNonClobberedReg x
2140   (y_reg, y_code) <- getSomeReg y
2141   let
2142         code = x_code `appOL` y_code `snocOL`
2143                 GCMP cond x_reg y_reg
2144   -- The GCMP insn does the test and sets the zero flag if comparable
2145   -- and true.  Hence we always supply EQQ as the condition to test.
2146   return (CondCode True EQQ code)
2147 #endif /* i386_TARGET_ARCH */
2148
2149 #if x86_64_TARGET_ARCH
2150 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
2151 -- an operand, but the right must be a reg.  We can probably do better
2152 -- than this general case...
2153 condFltCode cond x y = do
2154   (x_reg, x_code) <- getNonClobberedReg x
2155   (y_op, y_code) <- getOperand y
2156   let
2157         code = x_code `appOL`
2158                y_code `snocOL`
2159                   CMP (cmmExprRep x) y_op (OpReg x_reg)
2160         -- NB(1): we need to use the unsigned comparison operators on the
2161         -- result of this comparison.
2162   -- in
2163   return (CondCode True (condToUnsigned cond) code)
2164 #endif
2165
2166 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2167
2168 #if sparc_TARGET_ARCH
2169
2170 condIntCode cond x (CmmLit (CmmInt y rep))
2171   | fits13Bits y
2172   = do
2173        (src1, code) <- getSomeReg x
2174        let
2175            src2 = ImmInt (fromInteger y)
2176            code' = code `snocOL` SUB False True src1 (RIImm src2) g0
2177        return (CondCode False cond code')
2178
2179 condIntCode cond x y = do
2180     (src1, code1) <- getSomeReg x
2181     (src2, code2) <- getSomeReg y
2182     let
2183         code__2 = code1 `appOL` code2 `snocOL`
2184                   SUB False True src1 (RIReg src2) g0
2185     return (CondCode False cond code__2)
2186
2187 -----------
2188 condFltCode cond x y = do
2189     (src1, code1) <- getSomeReg x
2190     (src2, code2) <- getSomeReg y
2191     tmp <- getNewRegNat F64
2192     let
2193         promote x = FxTOy F32 F64 x tmp
2194
2195         pk1   = cmmExprRep x
2196         pk2   = cmmExprRep y
2197
2198         code__2 =
2199                 if pk1 == pk2 then
2200                     code1 `appOL` code2 `snocOL`
2201                     FCMP True pk1 src1 src2
2202                 else if pk1 == F32 then
2203                     code1 `snocOL` promote src1 `appOL` code2 `snocOL`
2204                     FCMP True F64 tmp src2
2205                 else
2206                     code1 `appOL` code2 `snocOL` promote src2 `snocOL`
2207                     FCMP True F64 src1 tmp
2208     return (CondCode True cond code__2)
2209
2210 #endif /* sparc_TARGET_ARCH */
2211
2212 #if powerpc_TARGET_ARCH
2213 --  ###FIXME: I16 and I8!
2214 condIntCode cond x (CmmLit (CmmInt y rep))
2215   | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
2216   = do
2217         (src1, code) <- getSomeReg x
2218         let
2219             code' = code `snocOL`
2220                 (if condUnsigned cond then CMPL else CMP) I32 src1 (RIImm src2)
2221         return (CondCode False cond code')
2222
2223 condIntCode cond x y = do
2224     (src1, code1) <- getSomeReg x
2225     (src2, code2) <- getSomeReg y
2226     let
2227         code' = code1 `appOL` code2 `snocOL`
2228                   (if condUnsigned cond then CMPL else CMP) I32 src1 (RIReg src2)
2229     return (CondCode False cond code')
2230
2231 condFltCode cond x y = do
2232     (src1, code1) <- getSomeReg x
2233     (src2, code2) <- getSomeReg y
2234     let
2235         code'  = code1 `appOL` code2 `snocOL` FCMP src1 src2
2236         code'' = case cond of -- twiddle CR to handle unordered case
2237                     GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
2238                     LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
2239                     _ -> code'
2240                  where
2241                     ltbit = 0 ; eqbit = 2 ; gtbit = 1
2242     return (CondCode True cond code'')
2243
2244 #endif /* powerpc_TARGET_ARCH */
2245
2246 -- -----------------------------------------------------------------------------
2247 -- Generating assignments
2248
2249 -- Assignments are really at the heart of the whole code generation
2250 -- business.  Almost all top-level nodes of any real importance are
2251 -- assignments, which correspond to loads, stores, or register
2252 -- transfers.  If we're really lucky, some of the register transfers
2253 -- will go away, because we can use the destination register to
2254 -- complete the code generation for the right hand side.  This only
2255 -- fails when the right hand side is forced into a fixed register
2256 -- (e.g. the result of a call).
2257
2258 assignMem_IntCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2259 assignReg_IntCode :: MachRep -> CmmReg  -> CmmExpr -> NatM InstrBlock
2260
2261 assignMem_FltCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2262 assignReg_FltCode :: MachRep -> CmmReg  -> CmmExpr -> NatM InstrBlock
2263
2264 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2265
2266 #if alpha_TARGET_ARCH
2267
2268 assignIntCode pk (CmmLoad dst _) src
2269   = getNewRegNat IntRep             `thenNat` \ tmp ->
2270     getAmode dst                    `thenNat` \ amode ->
2271     getRegister src                 `thenNat` \ register ->
2272     let
2273         code1   = amodeCode amode []
2274         dst__2  = amodeAddr amode
2275         code2   = registerCode register tmp []
2276         src__2  = registerName register tmp
2277         sz      = primRepToSize pk
2278         code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2279     in
2280     return code__2
2281
2282 assignIntCode pk dst src
2283   = getRegister dst                         `thenNat` \ register1 ->
2284     getRegister src                         `thenNat` \ register2 ->
2285     let
2286         dst__2  = registerName register1 zeroh
2287         code    = registerCode register2 dst__2
2288         src__2  = registerName register2 dst__2
2289         code__2 = if isFixed register2
2290                   then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2)
2291                   else code
2292     in
2293     return code__2
2294
2295 #endif /* alpha_TARGET_ARCH */
2296
2297 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2298
2299 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2300
2301 -- integer assignment to memory
2302 assignMem_IntCode pk addr src = do
2303     Amode addr code_addr <- getAmode addr
2304     (code_src, op_src)   <- get_op_RI src
2305     let
2306         code = code_src `appOL`
2307                code_addr `snocOL`
2308                   MOV pk op_src (OpAddr addr)
2309         -- NOTE: op_src is stable, so it will still be valid
2310         -- after code_addr.  This may involve the introduction
2311         -- of an extra MOV to a temporary register, but we hope
2312         -- the register allocator will get rid of it.
2313     --
2314     return code
2315   where
2316     get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand)   -- code, operator
2317     get_op_RI (CmmLit lit) | not (is64BitLit lit)
2318       = return (nilOL, OpImm (litToImm lit))
2319     get_op_RI op
2320       = do (reg,code) <- getNonClobberedReg op
2321            return (code, OpReg reg)
2322
2323
2324 -- Assign; dst is a reg, rhs is mem
2325 assignReg_IntCode pk reg (CmmLoad src _) = do
2326   load_code <- intLoadCode (MOV pk) src
2327   return (load_code (getRegisterReg reg))
2328
2329 -- dst is a reg, but src could be anything
2330 assignReg_IntCode pk reg src = do
2331   code <- getAnyReg src
2332   return (code (getRegisterReg reg))
2333
2334 #endif /* i386_TARGET_ARCH */
2335
2336 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2337
2338 #if sparc_TARGET_ARCH
2339
2340 assignMem_IntCode pk addr src = do
2341     (srcReg, code) <- getSomeReg src
2342     Amode dstAddr addr_code <- getAmode addr
2343     return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2344
2345 assignReg_IntCode pk reg src = do
2346     r <- getRegister src
2347     return $ case r of
2348         Any _ code         -> code dst
2349         Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg dst) freg
2350     where
2351       dst = getRegisterReg reg
2352
2353
2354 #endif /* sparc_TARGET_ARCH */
2355
2356 #if powerpc_TARGET_ARCH
2357
2358 assignMem_IntCode pk addr src = do
2359     (srcReg, code) <- getSomeReg src
2360     Amode dstAddr addr_code <- getAmode addr
2361     return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2362
2363 -- dst is a reg, but src could be anything
2364 assignReg_IntCode pk reg src
2365     = do
2366         r <- getRegister src
2367         return $ case r of
2368             Any _ code         -> code dst
2369             Fixed _ freg fcode -> fcode `snocOL` MR dst freg
2370     where
2371         dst = getRegisterReg reg
2372
2373 #endif /* powerpc_TARGET_ARCH */
2374
2375
2376 -- -----------------------------------------------------------------------------
2377 -- Floating-point assignments
2378
2379 #if alpha_TARGET_ARCH
2380
2381 assignFltCode pk (CmmLoad dst _) src
2382   = getNewRegNat pk                 `thenNat` \ tmp ->
2383     getAmode dst                    `thenNat` \ amode ->
2384     getRegister src                         `thenNat` \ register ->
2385     let
2386         code1   = amodeCode amode []
2387         dst__2  = amodeAddr amode
2388         code2   = registerCode register tmp []
2389         src__2  = registerName register tmp
2390         sz      = primRepToSize pk
2391         code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2392     in
2393     return code__2
2394
2395 assignFltCode pk dst src
2396   = getRegister dst                         `thenNat` \ register1 ->
2397     getRegister src                         `thenNat` \ register2 ->
2398     let
2399         dst__2  = registerName register1 zeroh
2400         code    = registerCode register2 dst__2
2401         src__2  = registerName register2 dst__2
2402         code__2 = if isFixed register2
2403                   then code . mkSeqInstr (FMOV src__2 dst__2)
2404                   else code
2405     in
2406     return code__2
2407
2408 #endif /* alpha_TARGET_ARCH */
2409
2410 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2411
2412 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2413
2414 -- Floating point assignment to memory
2415 assignMem_FltCode pk addr src = do
2416   (src_reg, src_code) <- getNonClobberedReg src
2417   Amode addr addr_code <- getAmode addr
2418   let
2419         code = src_code `appOL`
2420                addr_code `snocOL`
2421                 IF_ARCH_i386(GST pk src_reg addr,
2422                              MOV pk (OpReg src_reg) (OpAddr addr))
2423   return code
2424
2425 -- Floating point assignment to a register/temporary
2426 assignReg_FltCode pk reg src = do
2427   src_code <- getAnyReg src
2428   return (src_code (getRegisterReg reg))
2429
2430 #endif /* i386_TARGET_ARCH */
2431
2432 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2433
2434 #if sparc_TARGET_ARCH
2435
2436 -- Floating point assignment to memory
2437 assignMem_FltCode pk addr src = do
2438     Amode dst__2 code1 <- getAmode addr
2439     (src__2, code2) <- getSomeReg src
2440     tmp1 <- getNewRegNat pk
2441     let
2442         pk__2   = cmmExprRep src
2443         code__2 = code1 `appOL` code2 `appOL`
2444             if   pk == pk__2
2445             then unitOL (ST pk src__2 dst__2)
2446             else toOL [FxTOy pk__2 pk src__2 tmp1, ST pk tmp1 dst__2]
2447     return code__2
2448
2449 -- Floating point assignment to a register/temporary
2450 -- ToDo: Verify correctness
2451 assignReg_FltCode pk reg src = do
2452     r <- getRegister src
2453     v1 <- getNewRegNat pk
2454     return $ case r of
2455         Any _ code         -> code dst
2456         Fixed _ freg fcode -> fcode `snocOL` FMOV pk freg v1
2457     where
2458       dst = getRegisterReg reg
2459
2460 #endif /* sparc_TARGET_ARCH */
2461
2462 #if powerpc_TARGET_ARCH
2463
2464 -- Easy, isn't it?
2465 assignMem_FltCode = assignMem_IntCode
2466 assignReg_FltCode = assignReg_IntCode
2467
2468 #endif /* powerpc_TARGET_ARCH */
2469
2470
2471 -- -----------------------------------------------------------------------------
2472 -- Generating an non-local jump
2473
2474 -- (If applicable) Do not fill the delay slots here; you will confuse the
2475 -- register allocator.
2476
2477 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
2478
2479 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2480
2481 #if alpha_TARGET_ARCH
2482
2483 genJump (CmmLabel lbl)
2484   | isAsmTemp lbl = returnInstr (BR target)
2485   | otherwise     = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0]
2486   where
2487     target = ImmCLbl lbl
2488
2489 genJump tree
2490   = getRegister tree                `thenNat` \ register ->
2491     getNewRegNat PtrRep             `thenNat` \ tmp ->
2492     let
2493         dst    = registerName register pv
2494         code   = registerCode register pv
2495         target = registerName register pv
2496     in
2497     if isFixed register then
2498         returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0]
2499     else
2500     return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0))
2501
2502 #endif /* alpha_TARGET_ARCH */
2503
2504 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2505
2506 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2507
2508 genJump (CmmLoad mem pk) = do
2509   Amode target code <- getAmode mem
2510   return (code `snocOL` JMP (OpAddr target))
2511
2512 genJump (CmmLit lit) = do
2513   return (unitOL (JMP (OpImm (litToImm lit))))
2514
2515 genJump expr = do
2516   (reg,code) <- getSomeReg expr
2517   return (code `snocOL` JMP (OpReg reg))
2518
2519 #endif /* i386_TARGET_ARCH */
2520
2521 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2522
2523 #if sparc_TARGET_ARCH
2524
2525 genJump (CmmLit (CmmLabel lbl))
2526   = return (toOL [CALL (Left target) 0 True, NOP])
2527   where
2528     target = ImmCLbl lbl
2529
2530 genJump tree
2531   = do
2532         (target, code) <- getSomeReg tree
2533         return (code `snocOL` JMP (AddrRegReg target g0)  `snocOL` NOP)
2534
2535 #endif /* sparc_TARGET_ARCH */
2536
2537 #if powerpc_TARGET_ARCH
2538 genJump (CmmLit (CmmLabel lbl))
2539   = return (unitOL $ JMP lbl)
2540
2541 genJump tree
2542   = do
2543         (target,code) <- getSomeReg tree
2544         return (code `snocOL` MTCTR target `snocOL` BCTR [])
2545 #endif /* powerpc_TARGET_ARCH */
2546
2547
2548 -- -----------------------------------------------------------------------------
2549 --  Unconditional branches
2550
2551 genBranch :: BlockId -> NatM InstrBlock
2552
2553 #if alpha_TARGET_ARCH
2554 genBranch id = return (unitOL (BR id))
2555 #endif
2556
2557 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2558 genBranch id = return (unitOL (JXX ALWAYS id))
2559 #endif
2560
2561 #if sparc_TARGET_ARCH
2562 genBranch (BlockId id) = return (toOL [BI ALWAYS False (ImmCLbl (mkAsmTempLabel id)), NOP])
2563 #endif
2564
2565 #if powerpc_TARGET_ARCH
2566 genBranch id = return (unitOL (BCC ALWAYS id))
2567 #endif
2568
2569
2570 -- -----------------------------------------------------------------------------
2571 --  Conditional jumps
2572
2573 {-
2574 Conditional jumps are always to local labels, so we can use branch
2575 instructions.  We peek at the arguments to decide what kind of
2576 comparison to do.
2577
2578 ALPHA: For comparisons with 0, we're laughing, because we can just do
2579 the desired conditional branch.
2580
2581 I386: First, we have to ensure that the condition
2582 codes are set according to the supplied comparison operation.
2583
2584 SPARC: First, we have to ensure that the condition codes are set
2585 according to the supplied comparison operation.  We generate slightly
2586 different code for floating point comparisons, because a floating
2587 point operation cannot directly precede a @BF@.  We assume the worst
2588 and fill that slot with a @NOP@.
2589
2590 SPARC: Do not fill the delay slots here; you will confuse the register
2591 allocator.
2592 -}
2593
2594
2595 genCondJump
2596     :: BlockId      -- the branch target
2597     -> CmmExpr      -- the condition on which to branch
2598     -> NatM InstrBlock
2599
2600 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2601
2602 #if alpha_TARGET_ARCH
2603
2604 genCondJump id (StPrim op [x, StInt 0])
2605   = getRegister x                           `thenNat` \ register ->
2606     getNewRegNat (registerRep register)
2607                                     `thenNat` \ tmp ->
2608     let
2609         code   = registerCode register tmp
2610         value  = registerName register tmp
2611         pk     = registerRep register
2612         target = ImmCLbl lbl
2613     in
2614     returnSeq code [BI (cmpOp op) value target]
2615   where
2616     cmpOp CharGtOp = GTT
2617     cmpOp CharGeOp = GE
2618     cmpOp CharEqOp = EQQ
2619     cmpOp CharNeOp = NE
2620     cmpOp CharLtOp = LTT
2621     cmpOp CharLeOp = LE
2622     cmpOp IntGtOp = GTT
2623     cmpOp IntGeOp = GE
2624     cmpOp IntEqOp = EQQ
2625     cmpOp IntNeOp = NE
2626     cmpOp IntLtOp = LTT
2627     cmpOp IntLeOp = LE
2628     cmpOp WordGtOp = NE
2629     cmpOp WordGeOp = ALWAYS
2630     cmpOp WordEqOp = EQQ
2631     cmpOp WordNeOp = NE
2632     cmpOp WordLtOp = NEVER
2633     cmpOp WordLeOp = EQQ
2634     cmpOp AddrGtOp = NE
2635     cmpOp AddrGeOp = ALWAYS
2636     cmpOp AddrEqOp = EQQ
2637     cmpOp AddrNeOp = NE
2638     cmpOp AddrLtOp = NEVER
2639     cmpOp AddrLeOp = EQQ
2640
2641 genCondJump lbl (StPrim op [x, StDouble 0.0])
2642   = getRegister x                           `thenNat` \ register ->
2643     getNewRegNat (registerRep register)
2644                                     `thenNat` \ tmp ->
2645     let
2646         code   = registerCode register tmp
2647         value  = registerName register tmp
2648         pk     = registerRep register
2649         target = ImmCLbl lbl
2650     in
2651     return (code . mkSeqInstr (BF (cmpOp op) value target))
2652   where
2653     cmpOp FloatGtOp = GTT
2654     cmpOp FloatGeOp = GE
2655     cmpOp FloatEqOp = EQQ
2656     cmpOp FloatNeOp = NE
2657     cmpOp FloatLtOp = LTT
2658     cmpOp FloatLeOp = LE
2659     cmpOp DoubleGtOp = GTT
2660     cmpOp DoubleGeOp = GE
2661     cmpOp DoubleEqOp = EQQ
2662     cmpOp DoubleNeOp = NE
2663     cmpOp DoubleLtOp = LTT
2664     cmpOp DoubleLeOp = LE
2665
2666 genCondJump lbl (StPrim op [x, y])
2667   | fltCmpOp op
2668   = trivialFCode pr instr x y       `thenNat` \ register ->
2669     getNewRegNat F64                `thenNat` \ tmp ->
2670     let
2671         code   = registerCode register tmp
2672         result = registerName register tmp
2673         target = ImmCLbl lbl
2674     in
2675     return (code . mkSeqInstr (BF cond result target))
2676   where
2677     pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
2678
2679     fltCmpOp op = case op of
2680         FloatGtOp -> True
2681         FloatGeOp -> True
2682         FloatEqOp -> True
2683         FloatNeOp -> True
2684         FloatLtOp -> True
2685         FloatLeOp -> True
2686         DoubleGtOp -> True
2687         DoubleGeOp -> True
2688         DoubleEqOp -> True
2689         DoubleNeOp -> True
2690         DoubleLtOp -> True
2691         DoubleLeOp -> True
2692         _ -> False
2693     (instr, cond) = case op of
2694         FloatGtOp -> (FCMP TF LE, EQQ)
2695         FloatGeOp -> (FCMP TF LTT, EQQ)
2696         FloatEqOp -> (FCMP TF EQQ, NE)
2697         FloatNeOp -> (FCMP TF EQQ, EQQ)
2698         FloatLtOp -> (FCMP TF LTT, NE)
2699         FloatLeOp -> (FCMP TF LE, NE)
2700         DoubleGtOp -> (FCMP TF LE, EQQ)
2701         DoubleGeOp -> (FCMP TF LTT, EQQ)
2702         DoubleEqOp -> (FCMP TF EQQ, NE)
2703         DoubleNeOp -> (FCMP TF EQQ, EQQ)
2704         DoubleLtOp -> (FCMP TF LTT, NE)
2705         DoubleLeOp -> (FCMP TF LE, NE)
2706
2707 genCondJump lbl (StPrim op [x, y])
2708   = trivialCode instr x y           `thenNat` \ register ->
2709     getNewRegNat IntRep             `thenNat` \ tmp ->
2710     let
2711         code   = registerCode register tmp
2712         result = registerName register tmp
2713         target = ImmCLbl lbl
2714     in
2715     return (code . mkSeqInstr (BI cond result target))
2716   where
2717     (instr, cond) = case op of
2718         CharGtOp -> (CMP LE, EQQ)
2719         CharGeOp -> (CMP LTT, EQQ)
2720         CharEqOp -> (CMP EQQ, NE)
2721         CharNeOp -> (CMP EQQ, EQQ)
2722         CharLtOp -> (CMP LTT, NE)
2723         CharLeOp -> (CMP LE, NE)
2724         IntGtOp -> (CMP LE, EQQ)
2725         IntGeOp -> (CMP LTT, EQQ)
2726         IntEqOp -> (CMP EQQ, NE)
2727         IntNeOp -> (CMP EQQ, EQQ)
2728         IntLtOp -> (CMP LTT, NE)
2729         IntLeOp -> (CMP LE, NE)
2730         WordGtOp -> (CMP ULE, EQQ)
2731         WordGeOp -> (CMP ULT, EQQ)
2732         WordEqOp -> (CMP EQQ, NE)
2733         WordNeOp -> (CMP EQQ, EQQ)
2734         WordLtOp -> (CMP ULT, NE)
2735         WordLeOp -> (CMP ULE, NE)
2736         AddrGtOp -> (CMP ULE, EQQ)
2737         AddrGeOp -> (CMP ULT, EQQ)
2738         AddrEqOp -> (CMP EQQ, NE)
2739         AddrNeOp -> (CMP EQQ, EQQ)
2740         AddrLtOp -> (CMP ULT, NE)
2741         AddrLeOp -> (CMP ULE, NE)
2742
2743 #endif /* alpha_TARGET_ARCH */
2744
2745 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2746
2747 #if i386_TARGET_ARCH
2748
2749 genCondJump id bool = do
2750   CondCode _ cond code <- getCondCode bool
2751   return (code `snocOL` JXX cond id)
2752
2753 #endif
2754
2755 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2756
2757 #if x86_64_TARGET_ARCH
2758
2759 genCondJump id bool = do
2760   CondCode is_float cond cond_code <- getCondCode bool
2761   if not is_float
2762     then
2763         return (cond_code `snocOL` JXX cond id)
2764     else do
2765         lbl <- getBlockIdNat
2766
2767         -- see comment with condFltReg
2768         let code = case cond of
2769                         NE  -> or_unordered
2770                         GU  -> plain_test
2771                         GEU -> plain_test
2772                         _   -> and_ordered
2773
2774             plain_test = unitOL (
2775                   JXX cond id
2776                 )
2777             or_unordered = toOL [
2778                   JXX cond id,
2779                   JXX PARITY id
2780                 ]
2781             and_ordered = toOL [
2782                   JXX PARITY lbl,
2783                   JXX cond id,
2784                   JXX ALWAYS lbl,
2785                   NEWBLOCK lbl
2786                 ]
2787         return (cond_code `appOL` code)
2788
2789 #endif
2790
2791 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2792
2793 #if sparc_TARGET_ARCH
2794
2795 genCondJump (BlockId id) bool = do
2796   CondCode is_float cond code <- getCondCode bool
2797   return (
2798        code `appOL`
2799        toOL (
2800          if   is_float
2801          then [NOP, BF cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2802          else [BI cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2803        )
2804     )
2805
2806 #endif /* sparc_TARGET_ARCH */
2807
2808
2809 #if powerpc_TARGET_ARCH
2810
2811 genCondJump id bool = do
2812   CondCode is_float cond code <- getCondCode bool
2813   return (code `snocOL` BCC cond id)
2814
2815 #endif /* powerpc_TARGET_ARCH */
2816
2817
2818 -- -----------------------------------------------------------------------------
2819 --  Generating C calls
2820
2821 -- Now the biggest nightmare---calls.  Most of the nastiness is buried in
2822 -- @get_arg@, which moves the arguments to the correct registers/stack
2823 -- locations.  Apart from that, the code is easy.
2824 --
2825 -- (If applicable) Do not fill the delay slots here; you will confuse the
2826 -- register allocator.
2827
2828 genCCall
2829     :: CmmCallTarget            -- function to call
2830     -> [(CmmReg,MachHint)]      -- where to put the result
2831     -> [(CmmExpr,MachHint)]     -- arguments (of mixed type)
2832     -> Maybe [GlobalReg]        -- volatile regs to save
2833     -> NatM InstrBlock
2834
2835 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2836
2837 #if alpha_TARGET_ARCH
2838
2839 ccallResultRegs =
2840
2841 genCCall fn cconv result_regs args
2842   = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args
2843                           `thenNat` \ ((unused,_), argCode) ->
2844     let
2845         nRegs = length allArgRegs - length unused
2846         code = asmSeqThen (map ($ []) argCode)
2847     in
2848         returnSeq code [
2849             LDA pv (AddrImm (ImmLab (ptext fn))),
2850             JSR ra (AddrReg pv) nRegs,
2851             LDGP gp (AddrReg ra)]
2852   where
2853     ------------------------
2854     {-  Try to get a value into a specific register (or registers) for
2855         a call.  The first 6 arguments go into the appropriate
2856         argument register (separate registers for integer and floating
2857         point arguments, but used in lock-step), and the remaining
2858         arguments are dumped to the stack, beginning at 0(sp).  Our
2859         first argument is a pair of the list of remaining argument
2860         registers to be assigned for this call and the next stack
2861         offset to use for overflowing arguments.  This way,
2862         @get_Arg@ can be applied to all of a call's arguments using
2863         @mapAccumLNat@.
2864     -}
2865     get_arg
2866         :: ([(Reg,Reg)], Int)   -- Argument registers and stack offset (accumulator)
2867         -> StixTree             -- Current argument
2868         -> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code
2869
2870     -- We have to use up all of our argument registers first...
2871
2872     get_arg ((iDst,fDst):dsts, offset) arg
2873       = getRegister arg                     `thenNat` \ register ->
2874         let
2875             reg  = if isFloatingRep pk then fDst else iDst
2876             code = registerCode register reg
2877             src  = registerName register reg
2878             pk   = registerRep register
2879         in
2880         return (
2881             if isFloatingRep pk then
2882                 ((dsts, offset), if isFixed register then
2883                     code . mkSeqInstr (FMOV src fDst)
2884                     else code)
2885             else
2886                 ((dsts, offset), if isFixed register then
2887                     code . mkSeqInstr (OR src (RIReg src) iDst)
2888                     else code))
2889
2890     -- Once we have run out of argument registers, we move to the
2891     -- stack...
2892
2893     get_arg ([], offset) arg
2894       = getRegister arg                 `thenNat` \ register ->
2895         getNewRegNat (registerRep register)
2896                                         `thenNat` \ tmp ->
2897         let
2898             code = registerCode register tmp
2899             src  = registerName register tmp
2900             pk   = registerRep register
2901             sz   = primRepToSize pk
2902         in
2903         return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset)))
2904
2905 #endif /* alpha_TARGET_ARCH */
2906
2907 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2908
2909 #if i386_TARGET_ARCH
2910
2911 -- we only cope with a single result for foreign calls
2912 genCCall (CmmPrim op) [(r,_)] args vols = do
2913   case op of
2914         MO_F32_Sqrt -> actuallyInlineFloatOp F32  (GSQRT F32) args
2915         MO_F64_Sqrt -> actuallyInlineFloatOp F64 (GSQRT F64) args
2916
2917         MO_F32_Sin  -> actuallyInlineFloatOp F32  (GSIN F32) args
2918         MO_F64_Sin  -> actuallyInlineFloatOp F64 (GSIN F64) args
2919
2920         MO_F32_Cos  -> actuallyInlineFloatOp F32  (GCOS F32) args
2921         MO_F64_Cos  -> actuallyInlineFloatOp F64 (GCOS F64) args
2922
2923         MO_F32_Tan  -> actuallyInlineFloatOp F32  (GTAN F32) args
2924         MO_F64_Tan  -> actuallyInlineFloatOp F64 (GTAN F64) args
2925
2926         other_op    -> outOfLineFloatOp op r args vols
2927  where
2928   actuallyInlineFloatOp rep instr [(x,_)]
2929         = do res <- trivialUFCode rep instr x
2930              any <- anyReg res
2931              return (any (getRegisterReg r))
2932
2933 genCCall target dest_regs args vols = do
2934     let
2935         sizes               = map (arg_size . cmmExprRep . fst) (reverse args)
2936 #if !darwin_TARGET_OS
2937         tot_arg_size        = sum sizes
2938 #else
2939         raw_arg_size        = sum sizes
2940         tot_arg_size        = roundTo 16 raw_arg_size
2941         arg_pad_size        = tot_arg_size - raw_arg_size
2942     delta0 <- getDeltaNat
2943     setDeltaNat (delta0 - arg_pad_size)
2944 #endif
2945
2946     push_codes <- mapM push_arg (reverse args)
2947     delta <- getDeltaNat
2948
2949     -- in
2950     -- deal with static vs dynamic call targets
2951     (callinsns,cconv) <-
2952       case target of
2953         -- CmmPrim -> ...
2954         CmmForeignCall (CmmLit (CmmLabel lbl)) conv
2955            -> -- ToDo: stdcall arg sizes
2956               return (unitOL (CALL (Left fn_imm) []), conv)
2957            where fn_imm = ImmCLbl lbl
2958         CmmForeignCall expr conv
2959            -> do (dyn_c, dyn_r, dyn_rep) <- get_op expr
2960                  ASSERT(dyn_rep == I32)
2961                   return (dyn_c `snocOL` CALL (Right dyn_r) [], conv)
2962
2963     let push_code
2964 #if darwin_TARGET_OS
2965             | arg_pad_size /= 0
2966             = toOL [SUB I32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
2967                     DELTA (delta0 - arg_pad_size)]
2968               `appOL` concatOL push_codes
2969             | otherwise
2970 #endif
2971             = concatOL push_codes
2972         call = callinsns `appOL`
2973                toOL (
2974                         -- Deallocate parameters after call for ccall;
2975                         -- but not for stdcall (callee does it)
2976                   (if cconv == StdCallConv || tot_arg_size==0 then [] else
2977                    [ADD I32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
2978                   ++
2979                   [DELTA (delta + tot_arg_size)]
2980                )
2981     -- in
2982     setDeltaNat (delta + tot_arg_size)
2983
2984     let
2985         -- assign the results, if necessary
2986         assign_code []     = nilOL
2987         assign_code [(dest,_hint)] =
2988           case rep of
2989                 I64 -> toOL [MOV I32 (OpReg eax) (OpReg r_dest),
2990                              MOV I32 (OpReg edx) (OpReg r_dest_hi)]
2991                 F32 -> unitOL (GMOV fake0 r_dest)
2992                 F64 -> unitOL (GMOV fake0 r_dest)
2993                 rep -> unitOL (MOV rep (OpReg eax) (OpReg r_dest))
2994           where
2995                 r_dest_hi = getHiVRegFromLo r_dest
2996                 rep = cmmRegRep dest
2997                 r_dest = getRegisterReg dest
2998         assign_code many = panic "genCCall.assign_code many"
2999
3000     return (push_code `appOL`
3001             call `appOL`
3002             assign_code dest_regs)
3003
3004   where
3005     arg_size F64 = 8
3006     arg_size F32 = 4
3007     arg_size I64 = 8
3008     arg_size _   = 4
3009
3010     roundTo a x | x `mod` a == 0 = x
3011                 | otherwise = x + a - (x `mod` a)
3012
3013
3014     push_arg :: (CmmExpr,MachHint){-current argument-}
3015                     -> NatM InstrBlock  -- code
3016
3017     push_arg (arg,_hint) -- we don't need the hints on x86
3018       | arg_rep == I64 = do
3019         ChildCode64 code r_lo <- iselExpr64 arg
3020         delta <- getDeltaNat
3021         setDeltaNat (delta - 8)
3022         let
3023             r_hi = getHiVRegFromLo r_lo
3024         -- in
3025         return (       code `appOL`
3026                        toOL [PUSH I32 (OpReg r_hi), DELTA (delta - 4),
3027                              PUSH I32 (OpReg r_lo), DELTA (delta - 8),
3028                              DELTA (delta-8)]
3029             )
3030
3031       | otherwise = do
3032         (code, reg, sz) <- get_op arg
3033         delta <- getDeltaNat
3034         let size = arg_size sz
3035         setDeltaNat (delta-size)
3036         if (case sz of F64 -> True; F32 -> True; _ -> False)
3037            then return (code `appOL`
3038                         toOL [SUB I32 (OpImm (ImmInt size)) (OpReg esp),
3039                               DELTA (delta-size),
3040                               GST sz reg (AddrBaseIndex (EABaseReg esp)
3041                                                         EAIndexNone
3042                                                         (ImmInt 0))]
3043                        )
3044            else return (code `snocOL`
3045                         PUSH I32 (OpReg reg) `snocOL`
3046                         DELTA (delta-size)
3047                        )
3048       where
3049          arg_rep = cmmExprRep arg
3050
3051     ------------
3052     get_op :: CmmExpr -> NatM (InstrBlock, Reg, MachRep) -- code, reg, size
3053     get_op op = do
3054         (reg,code) <- getSomeReg op
3055         return (code, reg, cmmExprRep op)
3056
3057 #endif /* i386_TARGET_ARCH */
3058
3059 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3060
3061 outOfLineFloatOp :: CallishMachOp -> CmmReg -> [(CmmExpr,MachHint)]
3062   -> Maybe [GlobalReg] -> NatM InstrBlock
3063 outOfLineFloatOp mop res args vols
3064   = do
3065       targetExpr <- cmmMakeDynamicReference addImportNat True lbl
3066       let target = CmmForeignCall targetExpr CCallConv
3067
3068       if cmmRegRep res == F64
3069         then
3070           stmtToInstrs (CmmCall target [(res,FloatHint)] args vols)
3071         else do
3072           uq <- getUniqueNat
3073           let
3074             tmp = CmmLocal (LocalReg uq F64)
3075           -- in
3076           code1 <- stmtToInstrs (CmmCall target [(tmp,FloatHint)]
3077                                          (map promote args) vols)
3078           code2 <- stmtToInstrs (CmmAssign res (demote (CmmReg tmp)))
3079           return (code1 `appOL` code2)
3080   where
3081         promote (x,hint) = (CmmMachOp (MO_S_Conv F32 F64) [x], hint)
3082         demote  x = CmmMachOp (MO_S_Conv F64 F32) [x]
3083
3084         lbl = mkForeignLabel fn Nothing True
3085
3086         fn = case mop of
3087               MO_F32_Sqrt  -> FSLIT("sqrt")
3088               MO_F32_Sin   -> FSLIT("sin")
3089               MO_F32_Cos   -> FSLIT("cos")
3090               MO_F32_Tan   -> FSLIT("tan")
3091               MO_F32_Exp   -> FSLIT("exp")
3092               MO_F32_Log   -> FSLIT("log")
3093
3094               MO_F32_Asin  -> FSLIT("asin")
3095               MO_F32_Acos  -> FSLIT("acos")
3096               MO_F32_Atan  -> FSLIT("atan")
3097
3098               MO_F32_Sinh  -> FSLIT("sinh")
3099               MO_F32_Cosh  -> FSLIT("cosh")
3100               MO_F32_Tanh  -> FSLIT("tanh")
3101               MO_F32_Pwr   -> FSLIT("pow")
3102
3103               MO_F64_Sqrt  -> FSLIT("sqrt")
3104               MO_F64_Sin   -> FSLIT("sin")
3105               MO_F64_Cos   -> FSLIT("cos")
3106               MO_F64_Tan   -> FSLIT("tan")
3107               MO_F64_Exp   -> FSLIT("exp")
3108               MO_F64_Log   -> FSLIT("log")
3109
3110               MO_F64_Asin  -> FSLIT("asin")
3111               MO_F64_Acos  -> FSLIT("acos")
3112               MO_F64_Atan  -> FSLIT("atan")
3113
3114               MO_F64_Sinh  -> FSLIT("sinh")
3115               MO_F64_Cosh  -> FSLIT("cosh")
3116               MO_F64_Tanh  -> FSLIT("tanh")
3117               MO_F64_Pwr   -> FSLIT("pow")
3118
3119 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
3120
3121 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3122
3123 #if x86_64_TARGET_ARCH
3124
3125 genCCall (CmmPrim op) [(r,_)] args vols =
3126   outOfLineFloatOp op r args vols
3127
3128 genCCall target dest_regs args vols = do
3129
3130         -- load up the register arguments
3131     (stack_args, aregs, fregs, load_args_code)
3132          <- load_args args allArgRegs allFPArgRegs nilOL
3133
3134     let
3135         fp_regs_used  = reverse (drop (length fregs) (reverse allFPArgRegs))
3136         int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
3137         arg_regs = int_regs_used ++ fp_regs_used
3138                 -- for annotating the call instruction with
3139
3140         sse_regs = length fp_regs_used
3141
3142         tot_arg_size = arg_size * length stack_args
3143
3144         -- On entry to the called function, %rsp should be aligned
3145         -- on a 16-byte boundary +8 (i.e. the first stack arg after
3146         -- the return address is 16-byte aligned).  In STG land
3147         -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
3148         -- need to make sure we push a multiple of 16-bytes of args,
3149         -- plus the return address, to get the correct alignment.
3150         -- Urg, this is hard.  We need to feed the delta back into
3151         -- the arg pushing code.
3152     (real_size, adjust_rsp) <-
3153         if tot_arg_size `rem` 16 == 0
3154             then return (tot_arg_size, nilOL)
3155             else do -- we need to adjust...
3156                 delta <- getDeltaNat
3157                 setDeltaNat (delta-8)
3158                 return (tot_arg_size+8, toOL [
3159                                 SUB I64 (OpImm (ImmInt 8)) (OpReg rsp),
3160                                 DELTA (delta-8)
3161                         ])
3162
3163         -- push the stack args, right to left
3164     push_code <- push_args (reverse stack_args) nilOL
3165     delta <- getDeltaNat
3166
3167     -- deal with static vs dynamic call targets
3168     (callinsns,cconv) <-
3169       case target of
3170         -- CmmPrim -> ...
3171         CmmForeignCall (CmmLit (CmmLabel lbl)) conv
3172            -> -- ToDo: stdcall arg sizes
3173               return (unitOL (CALL (Left fn_imm) arg_regs), conv)
3174            where fn_imm = ImmCLbl lbl
3175         CmmForeignCall expr conv
3176            -> do (dyn_r, dyn_c) <- getSomeReg expr
3177                  return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
3178
3179     let
3180         -- The x86_64 ABI requires us to set %al to the number of SSE
3181         -- registers that contain arguments, if the called routine
3182         -- is a varargs function.  We don't know whether it's a
3183         -- varargs function or not, so we have to assume it is.
3184         --
3185         -- It's not safe to omit this assignment, even if the number
3186         -- of SSE regs in use is zero.  If %al is larger than 8
3187         -- on entry to a varargs function, seg faults ensue.
3188         assign_eax n = unitOL (MOV I32 (OpImm (ImmInt n)) (OpReg eax))
3189
3190     let call = callinsns `appOL`
3191                toOL (
3192                         -- Deallocate parameters after call for ccall;
3193                         -- but not for stdcall (callee does it)
3194                   (if cconv == StdCallConv || real_size==0 then [] else
3195                    [ADD wordRep (OpImm (ImmInt real_size)) (OpReg esp)])
3196                   ++
3197                   [DELTA (delta + real_size)]
3198                )
3199     -- in
3200     setDeltaNat (delta + real_size)
3201
3202     let
3203         -- assign the results, if necessary
3204         assign_code []     = nilOL
3205         assign_code [(dest,_hint)] =
3206           case rep of
3207                 F32 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3208                 F64 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3209                 rep -> unitOL (MOV rep (OpReg rax) (OpReg r_dest))
3210           where
3211                 rep = cmmRegRep dest
3212                 r_dest = getRegisterReg dest
3213         assign_code many = panic "genCCall.assign_code many"
3214
3215     return (load_args_code      `appOL`
3216             adjust_rsp          `appOL`
3217             push_code           `appOL`
3218             assign_eax sse_regs `appOL`
3219             call                `appOL`
3220             assign_code dest_regs)
3221
3222   where
3223     arg_size = 8 -- always, at the mo
3224
3225     load_args :: [(CmmExpr,MachHint)]
3226               -> [Reg]                  -- int regs avail for args
3227               -> [Reg]                  -- FP regs avail for args
3228               -> InstrBlock
3229               -> NatM ([(CmmExpr,MachHint)],[Reg],[Reg],InstrBlock)
3230     load_args args [] [] code     =  return (args, [], [], code)
3231         -- no more regs to use
3232     load_args [] aregs fregs code =  return ([], aregs, fregs, code)
3233         -- no more args to push
3234     load_args ((arg,hint) : rest) aregs fregs code
3235         | isFloatingRep arg_rep =
3236         case fregs of
3237           [] -> push_this_arg
3238           (r:rs) -> do
3239              arg_code <- getAnyReg arg
3240              load_args rest aregs rs (code `appOL` arg_code r)
3241         | otherwise =
3242         case aregs of
3243           [] -> push_this_arg
3244           (r:rs) -> do
3245              arg_code <- getAnyReg arg
3246              load_args rest rs fregs (code `appOL` arg_code r)
3247         where
3248           arg_rep = cmmExprRep arg
3249
3250           push_this_arg = do
3251             (args',ars,frs,code') <- load_args rest aregs fregs code
3252             return ((arg,hint):args', ars, frs, code')
3253
3254     push_args [] code = return code
3255     push_args ((arg,hint):rest) code
3256        | isFloatingRep arg_rep = do
3257          (arg_reg, arg_code) <- getSomeReg arg
3258          delta <- getDeltaNat
3259          setDeltaNat (delta-arg_size)
3260          let code' = code `appOL` toOL [
3261                         MOV arg_rep (OpReg arg_reg) (OpAddr  (spRel 0)),
3262                         SUB wordRep (OpImm (ImmInt arg_size)) (OpReg rsp) ,
3263                         DELTA (delta-arg_size)]
3264          push_args rest code'
3265
3266        | otherwise = do
3267        -- we only ever generate word-sized function arguments.  Promotion
3268        -- has already happened: our Int8# type is kept sign-extended
3269        -- in an Int#, for example.
3270          ASSERT(arg_rep == I64) return ()
3271          (arg_op, arg_code) <- getOperand arg
3272          delta <- getDeltaNat
3273          setDeltaNat (delta-arg_size)
3274          let code' = code `appOL` toOL [PUSH I64 arg_op,
3275                                         DELTA (delta-arg_size)]
3276          push_args rest code'
3277         where
3278           arg_rep = cmmExprRep arg
3279 #endif
3280
3281 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3282
3283 #if sparc_TARGET_ARCH
3284 {-
3285    The SPARC calling convention is an absolute
3286    nightmare.  The first 6x32 bits of arguments are mapped into
3287    %o0 through %o5, and the remaining arguments are dumped to the
3288    stack, beginning at [%sp+92].  (Note that %o6 == %sp.)
3289
3290    If we have to put args on the stack, move %o6==%sp down by
3291    the number of words to go on the stack, to ensure there's enough space.
3292
3293    According to Fraser and Hanson's lcc book, page 478, fig 17.2,
3294    16 words above the stack pointer is a word for the address of
3295    a structure return value.  I use this as a temporary location
3296    for moving values from float to int regs.  Certainly it isn't
3297    safe to put anything in the 16 words starting at %sp, since
3298    this area can get trashed at any time due to window overflows
3299    caused by signal handlers.
3300
3301    A final complication (if the above isn't enough) is that
3302    we can't blithely calculate the arguments one by one into
3303    %o0 .. %o5.  Consider the following nested calls:
3304
3305        fff a (fff b c)
3306
3307    Naive code moves a into %o0, and (fff b c) into %o1.  Unfortunately
3308    the inner call will itself use %o0, which trashes the value put there
3309    in preparation for the outer call.  Upshot: we need to calculate the
3310    args into temporary regs, and move those to arg regs or onto the
3311    stack only immediately prior to the call proper.  Sigh.
3312 -}
3313
3314 genCCall target dest_regs argsAndHints vols = do
3315     let
3316         args = map fst argsAndHints
3317     argcode_and_vregs <- mapM arg_to_int_vregs args
3318     let
3319         (argcodes, vregss) = unzip argcode_and_vregs
3320         n_argRegs          = length allArgRegs
3321         n_argRegs_used     = min (length vregs) n_argRegs
3322         vregs              = concat vregss
3323     -- deal with static vs dynamic call targets
3324     callinsns <- (case target of
3325         CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> do
3326                 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3327         CmmForeignCall expr conv -> do
3328                 (dyn_c, [dyn_r]) <- arg_to_int_vregs expr
3329                 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3330         CmmPrim mop -> do
3331                   (res, reduce) <- outOfLineFloatOp mop
3332                   lblOrMopExpr <- case res of
3333                        Left lbl -> do
3334                             return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3335                        Right mopExpr -> do
3336                             (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
3337                             return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3338                   if reduce then panic "genCCall(sparc): can not reduce" else return lblOrMopExpr
3339
3340       )
3341     let
3342         argcode = concatOL argcodes
3343         (move_sp_down, move_sp_up)
3344            = let diff = length vregs - n_argRegs
3345                  nn   = if odd diff then diff + 1 else diff -- keep 8-byte alignment
3346              in  if   nn <= 0
3347                  then (nilOL, nilOL)
3348                  else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
3349         transfer_code
3350            = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
3351     return (argcode       `appOL`
3352             move_sp_down  `appOL`
3353             transfer_code `appOL`
3354             callinsns     `appOL`
3355             unitOL NOP    `appOL`
3356             move_sp_up)
3357   where
3358      -- move args from the integer vregs into which they have been
3359      -- marshalled, into %o0 .. %o5, and the rest onto the stack.
3360      move_final :: [Reg] -> [Reg] -> Int -> [Instr]
3361
3362      move_final [] _ offset          -- all args done
3363         = []
3364
3365      move_final (v:vs) [] offset     -- out of aregs; move to stack
3366         = ST I32 v (spRel offset)
3367           : move_final vs [] (offset+1)
3368
3369      move_final (v:vs) (a:az) offset -- move into an arg (%o[0..5]) reg
3370         = OR False g0 (RIReg v) a
3371           : move_final vs az offset
3372
3373      -- generate code to calculate an argument, and move it into one
3374      -- or two integer vregs.
3375      arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
3376      arg_to_int_vregs arg
3377         | (cmmExprRep arg) == I64
3378         = do
3379           (ChildCode64 code r_lo) <- iselExpr64 arg
3380           let
3381               r_hi = getHiVRegFromLo r_lo
3382           return (code, [r_hi, r_lo])
3383         | otherwise
3384         = do
3385           (src, code) <- getSomeReg arg
3386           tmp <- getNewRegNat (cmmExprRep arg)
3387           let
3388               pk   = cmmExprRep arg
3389           case pk of
3390              F64 -> do
3391                       v1 <- getNewRegNat I32
3392                       v2 <- getNewRegNat I32
3393                       return (
3394                         code                          `snocOL`
3395                         FMOV F64 src f0                `snocOL`
3396                         ST   F32  f0 (spRel 16)         `snocOL`
3397                         LD   I32  (spRel 16) v1         `snocOL`
3398                         ST   F32  (fPair f0) (spRel 16) `snocOL`
3399                         LD   I32  (spRel 16) v2
3400                         ,
3401                         [v1,v2]
3402                        )
3403              F32 -> do
3404                       v1 <- getNewRegNat I32
3405                       return (
3406                         code                    `snocOL`
3407                         ST   F32  src (spRel 16)  `snocOL`
3408                         LD   I32  (spRel 16) v1
3409                         ,
3410                         [v1]
3411                        )
3412              other -> do
3413                         v1 <- getNewRegNat I32
3414                         return (
3415                           code `snocOL` OR False g0 (RIReg src) v1
3416                           ,
3417                           [v1]
3418                          )
3419 outOfLineFloatOp mop =
3420     do
3421       mopExpr <- cmmMakeDynamicReference addImportNat True $
3422                   mkForeignLabel functionName Nothing True
3423       let mopLabelOrExpr = case mopExpr of
3424                         CmmLit (CmmLabel lbl) -> Left lbl
3425                         _ -> Right mopExpr
3426       return (mopLabelOrExpr, reduce)
3427             where
3428                 (reduce, functionName) = case mop of
3429                   MO_F32_Exp    -> (True,  FSLIT("exp"))
3430                   MO_F32_Log    -> (True,  FSLIT("log"))
3431                   MO_F32_Sqrt   -> (True,  FSLIT("sqrt"))
3432
3433                   MO_F32_Sin    -> (True,  FSLIT("sin"))
3434                   MO_F32_Cos    -> (True,  FSLIT("cos"))
3435                   MO_F32_Tan    -> (True,  FSLIT("tan"))
3436
3437                   MO_F32_Asin   -> (True,  FSLIT("asin"))
3438                   MO_F32_Acos   -> (True,  FSLIT("acos"))
3439                   MO_F32_Atan   -> (True,  FSLIT("atan"))
3440
3441                   MO_F32_Sinh   -> (True,  FSLIT("sinh"))
3442                   MO_F32_Cosh   -> (True,  FSLIT("cosh"))
3443                   MO_F32_Tanh   -> (True,  FSLIT("tanh"))
3444
3445                   MO_F64_Exp    -> (False, FSLIT("exp"))
3446                   MO_F64_Log    -> (False, FSLIT("log"))
3447                   MO_F64_Sqrt   -> (False, FSLIT("sqrt"))
3448
3449                   MO_F64_Sin    -> (False, FSLIT("sin"))
3450                   MO_F64_Cos    -> (False, FSLIT("cos"))
3451                   MO_F64_Tan    -> (False, FSLIT("tan"))
3452
3453                   MO_F64_Asin   -> (False, FSLIT("asin"))
3454                   MO_F64_Acos   -> (False, FSLIT("acos"))
3455                   MO_F64_Atan   -> (False, FSLIT("atan"))
3456
3457                   MO_F64_Sinh   -> (False, FSLIT("sinh"))
3458                   MO_F64_Cosh   -> (False, FSLIT("cosh"))
3459                   MO_F64_Tanh   -> (False, FSLIT("tanh"))
3460
3461                   other -> pprPanic "outOfLineFloatOp(sparc) "
3462                                 (pprCallishMachOp mop)
3463
3464 #endif /* sparc_TARGET_ARCH */
3465
3466 #if powerpc_TARGET_ARCH
3467
3468 #if darwin_TARGET_OS || linux_TARGET_OS
3469 {-
3470     The PowerPC calling convention for Darwin/Mac OS X
3471     is described in Apple's document
3472     "Inside Mac OS X - Mach-O Runtime Architecture".
3473
3474     PowerPC Linux uses the System V Release 4 Calling Convention
3475     for PowerPC. It is described in the
3476     "System V Application Binary Interface PowerPC Processor Supplement".
3477
3478     Both conventions are similar:
3479     Parameters may be passed in general-purpose registers starting at r3, in
3480     floating point registers starting at f1, or on the stack.
3481
3482     But there are substantial differences:
3483     * The number of registers used for parameter passing and the exact set of
3484       nonvolatile registers differs (see MachRegs.lhs).
3485     * On Darwin, stack space is always reserved for parameters, even if they are
3486       passed in registers. The called routine may choose to save parameters from
3487       registers to the corresponding space on the stack.
3488     * On Darwin, a corresponding amount of GPRs is skipped when a floating point
3489       parameter is passed in an FPR.
3490     * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
3491       starting with an odd-numbered GPR. It may skip a GPR to achieve this.
3492       Darwin just treats an I64 like two separate I32s (high word first).
3493     * I64 and F64 arguments are 8-byte aligned on the stack for SysV, but only
3494       4-byte aligned like everything else on Darwin.
3495     * The SysV spec claims that F32 is represented as F64 on the stack. GCC on
3496       PowerPC Linux does not agree, so neither do we.
3497
3498     According to both conventions, The parameter area should be part of the
3499     caller's stack frame, allocated in the caller's prologue code (large enough
3500     to hold the parameter lists for all called routines). The NCG already
3501     uses the stack for register spilling, leaving 64 bytes free at the top.
3502     If we need a larger parameter area than that, we just allocate a new stack
3503     frame just before ccalling.
3504 -}
3505
3506 genCCall target dest_regs argsAndHints vols
3507   = ASSERT (not $ any (`elem` [I8,I16]) argReps)
3508         -- we rely on argument promotion in the codeGen
3509     do
3510         (finalStack,passArgumentsCode,usedRegs) <- passArguments
3511                                                         (zip args argReps)
3512                                                         allArgRegs allFPArgRegs
3513                                                         initialStackOffset
3514                                                         (toOL []) []
3515
3516         (labelOrExpr, reduceToF32) <- case target of
3517             CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
3518             CmmForeignCall expr conv -> return  (Right expr, False)
3519             CmmPrim mop -> outOfLineFloatOp mop
3520
3521         let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
3522             codeAfter = move_sp_up finalStack `appOL` moveResult reduceToF32
3523
3524         case labelOrExpr of
3525             Left lbl -> do
3526                 return (         codeBefore
3527                         `snocOL` BL lbl usedRegs
3528                         `appOL`  codeAfter)
3529             Right dyn -> do
3530                 (dynReg, dynCode) <- getSomeReg dyn
3531                 return (         dynCode
3532                         `snocOL` MTCTR dynReg
3533                         `appOL`  codeBefore
3534                         `snocOL` BCTRL usedRegs
3535                         `appOL`  codeAfter)
3536     where
3537 #if darwin_TARGET_OS
3538         initialStackOffset = 24
3539             -- size of linkage area + size of arguments, in bytes
3540         stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
3541                                        map machRepByteWidth argReps
3542 #elif linux_TARGET_OS
3543         initialStackOffset = 8
3544         stackDelta finalStack = roundTo 16 finalStack
3545 #endif
3546         args = map fst argsAndHints
3547         argReps = map cmmExprRep args
3548
3549         roundTo a x | x `mod` a == 0 = x
3550                     | otherwise = x + a - (x `mod` a)
3551
3552         move_sp_down finalStack
3553                | delta > 64 =
3554                         toOL [STU I32 sp (AddrRegImm sp (ImmInt (-delta))),
3555                               DELTA (-delta)]
3556                | otherwise = nilOL
3557                where delta = stackDelta finalStack
3558         move_sp_up finalStack
3559                | delta > 64 =
3560                         toOL [ADD sp sp (RIImm (ImmInt delta)),
3561                               DELTA 0]
3562                | otherwise = nilOL
3563                where delta = stackDelta finalStack
3564
3565
3566         passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
3567         passArguments ((arg,I64):args) gprs fprs stackOffset
3568                accumCode accumUsed =
3569             do
3570                 ChildCode64 code vr_lo <- iselExpr64 arg
3571                 let vr_hi = getHiVRegFromLo vr_lo
3572
3573 #if darwin_TARGET_OS
3574                 passArguments args
3575                               (drop 2 gprs)
3576                               fprs
3577                               (stackOffset+8)
3578                               (accumCode `appOL` code
3579                                     `snocOL` storeWord vr_hi gprs stackOffset
3580                                     `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
3581                               ((take 2 gprs) ++ accumUsed)
3582             where
3583                 storeWord vr (gpr:_) offset = MR gpr vr
3584                 storeWord vr [] offset = ST I32 vr (AddrRegImm sp (ImmInt offset))
3585
3586 #elif linux_TARGET_OS
3587                 let stackOffset' = roundTo 8 stackOffset
3588                     stackCode = accumCode `appOL` code
3589                         `snocOL` ST I32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
3590                         `snocOL` ST I32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
3591                     regCode hireg loreg =
3592                         accumCode `appOL` code
3593                             `snocOL` MR hireg vr_hi
3594                             `snocOL` MR loreg vr_lo
3595
3596                 case gprs of
3597                     hireg : loreg : regs | even (length gprs) ->
3598                         passArguments args regs fprs stackOffset
3599                                       (regCode hireg loreg) (hireg : loreg : accumUsed)
3600                     _skipped : hireg : loreg : regs ->
3601                         passArguments args regs fprs stackOffset
3602                                       (regCode hireg loreg) (hireg : loreg : accumUsed)
3603                     _ -> -- only one or no regs left
3604                         passArguments args [] fprs (stackOffset'+8)
3605                                       stackCode accumUsed
3606 #endif
3607
3608         passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
3609             | reg : _ <- regs = do
3610                 register <- getRegister arg
3611                 let code = case register of
3612                             Fixed _ freg fcode -> fcode `snocOL` MR reg freg
3613                             Any _ acode -> acode reg
3614                 passArguments args
3615                               (drop nGprs gprs)
3616                               (drop nFprs fprs)
3617 #if darwin_TARGET_OS
3618         -- The Darwin ABI requires that we reserve stack slots for register parameters
3619                               (stackOffset + stackBytes)
3620 #elif linux_TARGET_OS
3621         -- ... the SysV ABI doesn't.
3622                               stackOffset
3623 #endif
3624                               (accumCode `appOL` code)
3625                               (reg : accumUsed)
3626             | otherwise = do
3627                 (vr, code) <- getSomeReg arg
3628                 passArguments args
3629                               (drop nGprs gprs)
3630                               (drop nFprs fprs)
3631                               (stackOffset' + stackBytes)
3632                               (accumCode `appOL` code `snocOL` ST rep vr stackSlot)
3633                               accumUsed
3634             where
3635 #if darwin_TARGET_OS
3636         -- stackOffset is at least 4-byte aligned
3637         -- The Darwin ABI is happy with that.
3638                 stackOffset' = stackOffset
3639 #else
3640         -- ... the SysV ABI requires 8-byte alignment for doubles.
3641                 stackOffset' | rep == F64 = roundTo 8 stackOffset
3642                              | otherwise  =           stackOffset
3643 #endif
3644                 stackSlot = AddrRegImm sp (ImmInt stackOffset')
3645                 (nGprs, nFprs, stackBytes, regs) = case rep of
3646                     I32 -> (1, 0, 4, gprs)
3647 #if darwin_TARGET_OS
3648         -- The Darwin ABI requires that we skip a corresponding number of GPRs when
3649         -- we use the FPRs.
3650                     F32 -> (1, 1, 4, fprs)
3651                     F64 -> (2, 1, 8, fprs)
3652 #elif linux_TARGET_OS
3653         -- ... the SysV ABI doesn't.
3654                     F32 -> (0, 1, 4, fprs)
3655                     F64 -> (0, 1, 8, fprs)
3656 #endif
3657
3658         moveResult reduceToF32 =
3659             case dest_regs of
3660                 [] -> nilOL
3661                 [(dest, _hint)]
3662                     | reduceToF32 && rep == F32 -> unitOL (FRSP r_dest f1)
3663                     | rep == F32 || rep == F64 -> unitOL (MR r_dest f1)
3664                     | rep == I64 -> toOL [MR (getHiVRegFromLo r_dest) r3,
3665                                           MR r_dest r4]
3666                     | otherwise -> unitOL (MR r_dest r3)
3667                     where rep = cmmRegRep dest
3668                           r_dest = getRegisterReg dest
3669
3670         outOfLineFloatOp mop =
3671             do
3672                 mopExpr <- cmmMakeDynamicReference addImportNat True $
3673                               mkForeignLabel functionName Nothing True
3674                 let mopLabelOrExpr = case mopExpr of
3675                         CmmLit (CmmLabel lbl) -> Left lbl
3676                         _ -> Right mopExpr
3677                 return (mopLabelOrExpr, reduce)
3678             where
3679                 (functionName, reduce) = case mop of
3680                     MO_F32_Exp   -> (FSLIT("exp"), True)
3681                     MO_F32_Log   -> (FSLIT("log"), True)
3682                     MO_F32_Sqrt  -> (FSLIT("sqrt"), True)
3683
3684                     MO_F32_Sin   -> (FSLIT("sin"), True)
3685                     MO_F32_Cos   -> (FSLIT("cos"), True)
3686                     MO_F32_Tan   -> (FSLIT("tan"), True)
3687
3688                     MO_F32_Asin  -> (FSLIT("asin"), True)
3689                     MO_F32_Acos  -> (FSLIT("acos"), True)
3690                     MO_F32_Atan  -> (FSLIT("atan"), True)
3691
3692                     MO_F32_Sinh  -> (FSLIT("sinh"), True)
3693                     MO_F32_Cosh  -> (FSLIT("cosh"), True)
3694                     MO_F32_Tanh  -> (FSLIT("tanh"), True)
3695                     MO_F32_Pwr   -> (FSLIT("pow"), True)
3696
3697                     MO_F64_Exp   -> (FSLIT("exp"), False)
3698                     MO_F64_Log   -> (FSLIT("log"), False)
3699                     MO_F64_Sqrt  -> (FSLIT("sqrt"), False)
3700
3701                     MO_F64_Sin   -> (FSLIT("sin"), False)
3702                     MO_F64_Cos   -> (FSLIT("cos"), False)
3703                     MO_F64_Tan   -> (FSLIT("tan"), False)
3704
3705                     MO_F64_Asin  -> (FSLIT("asin"), False)
3706                     MO_F64_Acos  -> (FSLIT("acos"), False)
3707                     MO_F64_Atan  -> (FSLIT("atan"), False)
3708
3709                     MO_F64_Sinh  -> (FSLIT("sinh"), False)
3710                     MO_F64_Cosh  -> (FSLIT("cosh"), False)
3711                     MO_F64_Tanh  -> (FSLIT("tanh"), False)
3712                     MO_F64_Pwr   -> (FSLIT("pow"), False)
3713                     other -> pprPanic "genCCall(ppc): unknown callish op"
3714                                     (pprCallishMachOp other)
3715
3716 #endif /* darwin_TARGET_OS || linux_TARGET_OS */
3717
3718 #endif /* powerpc_TARGET_ARCH */
3719
3720
3721 -- -----------------------------------------------------------------------------
3722 -- Generating a table-branch
3723
3724 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
3725
3726 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3727 genSwitch expr ids
3728   | opt_PIC
3729   = do
3730         (reg,e_code) <- getSomeReg expr
3731         lbl <- getNewLabelNat
3732         dynRef <- cmmMakeDynamicReference addImportNat False lbl
3733         (tableReg,t_code) <- getSomeReg $ dynRef
3734         let
3735             jumpTable = map jumpTableEntryRel ids
3736
3737             jumpTableEntryRel Nothing
3738                 = CmmStaticLit (CmmInt 0 wordRep)
3739             jumpTableEntryRel (Just (BlockId id))
3740                 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3741                 where blockLabel = mkAsmTempLabel id
3742
3743             op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
3744                                        (EAIndex reg wORD_SIZE) (ImmInt 0))
3745
3746             code = e_code `appOL` t_code `appOL` toOL [
3747                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3748                             ADD wordRep op (OpReg tableReg),
3749                             JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
3750                     ]
3751         return code
3752   | otherwise
3753   = do
3754         (reg,e_code) <- getSomeReg expr
3755         lbl <- getNewLabelNat
3756         let
3757             jumpTable = map jumpTableEntry ids
3758             op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
3759             code = e_code `appOL` toOL [
3760                     LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3761                     JMP_TBL op [ id | Just id <- ids ]
3762                  ]
3763         -- in
3764         return code
3765 #elif powerpc_TARGET_ARCH
3766 genSwitch expr ids
3767   | opt_PIC
3768   = do
3769         (reg,e_code) <- getSomeReg expr
3770         tmp <- getNewRegNat I32
3771         lbl <- getNewLabelNat
3772         dynRef <- cmmMakeDynamicReference addImportNat False lbl
3773         (tableReg,t_code) <- getSomeReg $ dynRef
3774         let
3775             jumpTable = map jumpTableEntryRel ids
3776
3777             jumpTableEntryRel Nothing
3778                 = CmmStaticLit (CmmInt 0 wordRep)
3779             jumpTableEntryRel (Just (BlockId id))
3780                 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3781                 where blockLabel = mkAsmTempLabel id
3782
3783             code = e_code `appOL` t_code `appOL` toOL [
3784                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3785                             SLW tmp reg (RIImm (ImmInt 2)),
3786                             LD I32 tmp (AddrRegReg tableReg tmp),
3787                             ADD tmp tmp (RIReg tableReg),
3788                             MTCTR tmp,
3789                             BCTR [ id | Just id <- ids ]
3790                     ]
3791         return code
3792   | otherwise
3793   = do
3794         (reg,e_code) <- getSomeReg expr
3795         tmp <- getNewRegNat I32
3796         lbl <- getNewLabelNat
3797         let
3798             jumpTable = map jumpTableEntry ids
3799
3800             code = e_code `appOL` toOL [
3801                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3802                             SLW tmp reg (RIImm (ImmInt 2)),
3803                             ADDIS tmp tmp (HA (ImmCLbl lbl)),
3804                             LD I32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
3805                             MTCTR tmp,
3806                             BCTR [ id | Just id <- ids ]
3807                     ]
3808         return code
3809 #else
3810 genSwitch expr ids = panic "ToDo: genSwitch"
3811 #endif
3812
3813 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordRep)
3814 jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
3815     where blockLabel = mkAsmTempLabel id
3816
3817 -- -----------------------------------------------------------------------------
3818 -- Support bits
3819 -- -----------------------------------------------------------------------------
3820
3821
3822 -- -----------------------------------------------------------------------------
3823 -- 'condIntReg' and 'condFltReg': condition codes into registers
3824
3825 -- Turn those condition codes into integers now (when they appear on
3826 -- the right hand side of an assignment).
3827 --
3828 -- (If applicable) Do not fill the delay slots here; you will confuse the
3829 -- register allocator.
3830
3831 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
3832
3833 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3834
3835 #if alpha_TARGET_ARCH
3836 condIntReg = panic "MachCode.condIntReg (not on Alpha)"
3837 condFltReg = panic "MachCode.condFltReg (not on Alpha)"
3838 #endif /* alpha_TARGET_ARCH */
3839
3840 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3841
3842 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3843
3844 condIntReg cond x y = do
3845   CondCode _ cond cond_code <- condIntCode cond x y
3846   tmp <- getNewRegNat I8
3847   let
3848         code dst = cond_code `appOL` toOL [
3849                     SETCC cond (OpReg tmp),
3850                     MOVZxL I8 (OpReg tmp) (OpReg dst)
3851                   ]
3852   -- in
3853   return (Any I32 code)
3854
3855 #endif
3856
3857 #if i386_TARGET_ARCH
3858
3859 condFltReg cond x y = do
3860   CondCode _ cond cond_code <- condFltCode cond x y
3861   tmp <- getNewRegNat I8
3862   let
3863         code dst = cond_code `appOL` toOL [
3864                     SETCC cond (OpReg tmp),
3865                     MOVZxL I8 (OpReg tmp) (OpReg dst)
3866                   ]
3867   -- in
3868   return (Any I32 code)
3869
3870 #endif
3871
3872 #if x86_64_TARGET_ARCH
3873
3874 condFltReg cond x y = do
3875   CondCode _ cond cond_code <- condFltCode cond x y
3876   tmp1 <- getNewRegNat wordRep
3877   tmp2 <- getNewRegNat wordRep
3878   let
3879         -- We have to worry about unordered operands (eg. comparisons
3880         -- against NaN).  If the operands are unordered, the comparison
3881         -- sets the parity flag, carry flag and zero flag.
3882         -- All comparisons are supposed to return false for unordered
3883         -- operands except for !=, which returns true.
3884         --
3885         -- Optimisation: we don't have to test the parity flag if we
3886         -- know the test has already excluded the unordered case: eg >
3887         -- and >= test for a zero carry flag, which can only occur for
3888         -- ordered operands.
3889         --
3890         -- ToDo: by reversing comparisons we could avoid testing the
3891         -- parity flag in more cases.
3892
3893         code dst =
3894            cond_code `appOL`
3895              (case cond of
3896                 NE  -> or_unordered dst
3897                 GU  -> plain_test   dst
3898                 GEU -> plain_test   dst
3899                 _   -> and_ordered  dst)
3900
3901         plain_test dst = toOL [
3902                     SETCC cond (OpReg tmp1),
3903                     MOVZxL I8 (OpReg tmp1) (OpReg dst)
3904                  ]
3905         or_unordered dst = toOL [
3906                     SETCC cond (OpReg tmp1),
3907                     SETCC PARITY (OpReg tmp2),
3908                     OR I8 (OpReg tmp1) (OpReg tmp2),
3909                     MOVZxL I8 (OpReg tmp2) (OpReg dst)
3910                   ]
3911         and_ordered dst = toOL [
3912                     SETCC cond (OpReg tmp1),
3913                     SETCC NOTPARITY (OpReg tmp2),
3914                     AND I8 (OpReg tmp1) (OpReg tmp2),
3915                     MOVZxL I8 (OpReg tmp2) (OpReg dst)
3916                   ]
3917   -- in
3918   return (Any I32 code)
3919
3920 #endif
3921
3922 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3923
3924 #if sparc_TARGET_ARCH
3925
3926 condIntReg EQQ x (CmmLit (CmmInt 0 d)) = do
3927     (src, code) <- getSomeReg x
3928     tmp <- getNewRegNat I32
3929     let
3930         code__2 dst = code `appOL` toOL [
3931             SUB False True g0 (RIReg src) g0,
3932             SUB True False g0 (RIImm (ImmInt (-1))) dst]
3933     return (Any I32 code__2)
3934
3935 condIntReg EQQ x y = do
3936     (src1, code1) <- getSomeReg x
3937     (src2, code2) <- getSomeReg y
3938     tmp1 <- getNewRegNat I32
3939     tmp2 <- getNewRegNat I32
3940     let
3941         code__2 dst = code1 `appOL` code2 `appOL` toOL [
3942             XOR False src1 (RIReg src2) dst,
3943             SUB False True g0 (RIReg dst) g0,
3944             SUB True False g0 (RIImm (ImmInt (-1))) dst]
3945     return (Any I32 code__2)
3946
3947 condIntReg NE x (CmmLit (CmmInt 0 d)) = do
3948     (src, code) <- getSomeReg x
3949     tmp <- getNewRegNat I32
3950     let
3951         code__2 dst = code `appOL` toOL [
3952             SUB False True g0 (RIReg src) g0,
3953             ADD True False g0 (RIImm (ImmInt 0)) dst]
3954     return (Any I32 code__2)
3955
3956 condIntReg NE x y = do
3957     (src1, code1) <- getSomeReg x
3958     (src2, code2) <- getSomeReg y
3959     tmp1 <- getNewRegNat I32
3960     tmp2 <- getNewRegNat I32
3961     let
3962         code__2 dst = code1 `appOL` code2 `appOL` toOL [
3963             XOR False src1 (RIReg src2) dst,
3964             SUB False True g0 (RIReg dst) g0,
3965             ADD True False g0 (RIImm (ImmInt 0)) dst]
3966     return (Any I32 code__2)
3967
3968 condIntReg cond x y = do
3969     BlockId lbl1 <- getBlockIdNat
3970     BlockId lbl2 <- getBlockIdNat
3971     CondCode _ cond cond_code <- condIntCode cond x y
3972     let
3973         code__2 dst = cond_code `appOL` toOL [
3974             BI cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
3975             OR False g0 (RIImm (ImmInt 0)) dst,
3976             BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
3977             NEWBLOCK (BlockId lbl1),
3978             OR False g0 (RIImm (ImmInt 1)) dst,
3979             NEWBLOCK (BlockId lbl2)]
3980     return (Any I32 code__2)
3981
3982 condFltReg cond x y = do
3983     BlockId lbl1 <- getBlockIdNat
3984     BlockId lbl2 <- getBlockIdNat
3985     CondCode _ cond cond_code <- condFltCode cond x y
3986     let
3987         code__2 dst = cond_code `appOL` toOL [
3988             NOP,
3989             BF cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
3990             OR False g0 (RIImm (ImmInt 0)) dst,
3991             BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
3992             NEWBLOCK (BlockId lbl1),
3993             OR False g0 (RIImm (ImmInt 1)) dst,
3994             NEWBLOCK (BlockId lbl2)]
3995     return (Any I32 code__2)
3996
3997 #endif /* sparc_TARGET_ARCH */
3998
3999 #if powerpc_TARGET_ARCH
4000 condReg getCond = do
4001     lbl1 <- getBlockIdNat
4002     lbl2 <- getBlockIdNat
4003     CondCode _ cond cond_code <- getCond
4004     let
4005 {-        code dst = cond_code `appOL` toOL [
4006                 BCC cond lbl1,
4007                 LI dst (ImmInt 0),
4008                 BCC ALWAYS lbl2,
4009                 NEWBLOCK lbl1,
4010                 LI dst (ImmInt 1),
4011                 BCC ALWAYS lbl2,
4012                 NEWBLOCK lbl2
4013             ]-}
4014         code dst = cond_code
4015             `appOL` negate_code
4016             `appOL` toOL [
4017                 MFCR dst,
4018                 RLWINM dst dst (bit + 1) 31 31
4019             ]
4020
4021         negate_code | do_negate = unitOL (CRNOR bit bit bit)
4022                     | otherwise = nilOL
4023
4024         (bit, do_negate) = case cond of
4025             LTT -> (0, False)
4026             LE  -> (1, True)
4027             EQQ -> (2, False)
4028             GE  -> (0, True)
4029             GTT -> (1, False)
4030
4031             NE  -> (2, True)
4032
4033             LU  -> (0, False)
4034             LEU -> (1, True)
4035             GEU -> (0, True)
4036             GU  -> (1, False)
4037
4038     return (Any I32 code)
4039
4040 condIntReg cond x y = condReg (condIntCode cond x y)
4041 condFltReg cond x y = condReg (condFltCode cond x y)
4042 #endif /* powerpc_TARGET_ARCH */
4043
4044
4045 -- -----------------------------------------------------------------------------
4046 -- 'trivial*Code': deal with trivial instructions
4047
4048 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
4049 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
4050 -- Only look for constants on the right hand side, because that's
4051 -- where the generic optimizer will have put them.
4052
4053 -- Similarly, for unary instructions, we don't have to worry about
4054 -- matching an StInt as the argument, because genericOpt will already
4055 -- have handled the constant-folding.
4056
4057 trivialCode
4058     :: MachRep
4059     -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
4060       ,IF_ARCH_i386 ((Operand -> Operand -> Instr)
4061                      -> Maybe (Operand -> Operand -> Instr)
4062       ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr)
4063                      -> Maybe (Operand -> Operand -> Instr)
4064       ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
4065       ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
4066       ,)))))
4067     -> CmmExpr -> CmmExpr -- the two arguments
4068     -> NatM Register
4069
4070 #ifndef powerpc_TARGET_ARCH
4071 trivialFCode
4072     :: MachRep
4073     -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
4074       ,IF_ARCH_sparc((MachRep -> Reg -> Reg -> Reg -> Instr)
4075       ,IF_ARCH_i386 ((MachRep -> Reg -> Reg -> Reg -> Instr)
4076       ,IF_ARCH_x86_64 ((MachRep -> Operand -> Operand -> Instr)
4077       ,))))
4078     -> CmmExpr -> CmmExpr -- the two arguments
4079     -> NatM Register
4080 #endif
4081
4082 trivialUCode
4083     :: MachRep
4084     -> IF_ARCH_alpha((RI -> Reg -> Instr)
4085       ,IF_ARCH_i386 ((Operand -> Instr)
4086       ,IF_ARCH_x86_64 ((Operand -> Instr)
4087       ,IF_ARCH_sparc((RI -> Reg -> Instr)
4088       ,IF_ARCH_powerpc((Reg -> Reg -> Instr)
4089       ,)))))
4090     -> CmmExpr  -- the one argument
4091     -> NatM Register
4092
4093 #ifndef powerpc_TARGET_ARCH
4094 trivialUFCode
4095     :: MachRep
4096     -> IF_ARCH_alpha((Reg -> Reg -> Instr)
4097       ,IF_ARCH_i386 ((Reg -> Reg -> Instr)
4098       ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr)
4099       ,IF_ARCH_sparc((Reg -> Reg -> Instr)
4100       ,))))
4101     -> CmmExpr -- the one argument
4102     -> NatM Register
4103 #endif
4104
4105 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4106
4107 #if alpha_TARGET_ARCH
4108
4109 trivialCode instr x (StInt y)
4110   | fits8Bits y
4111   = getRegister x               `thenNat` \ register ->
4112     getNewRegNat IntRep         `thenNat` \ tmp ->
4113     let
4114         code = registerCode register tmp
4115         src1 = registerName register tmp
4116         src2 = ImmInt (fromInteger y)
4117         code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst)
4118     in
4119     return (Any IntRep code__2)
4120
4121 trivialCode instr x y
4122   = getRegister x               `thenNat` \ register1 ->
4123     getRegister y               `thenNat` \ register2 ->
4124     getNewRegNat IntRep         `thenNat` \ tmp1 ->
4125     getNewRegNat IntRep         `thenNat` \ tmp2 ->
4126     let
4127         code1 = registerCode register1 tmp1 []
4128         src1  = registerName register1 tmp1
4129         code2 = registerCode register2 tmp2 []
4130         src2  = registerName register2 tmp2
4131         code__2 dst = asmSeqThen [code1, code2] .
4132                      mkSeqInstr (instr src1 (RIReg src2) dst)
4133     in
4134     return (Any IntRep code__2)
4135
4136 ------------
4137 trivialUCode instr x
4138   = getRegister x               `thenNat` \ register ->
4139     getNewRegNat IntRep         `thenNat` \ tmp ->
4140     let
4141         code = registerCode register tmp
4142         src  = registerName register tmp
4143         code__2 dst = code . mkSeqInstr (instr (RIReg src) dst)
4144     in
4145     return (Any IntRep code__2)
4146
4147 ------------
4148 trivialFCode _ instr x y
4149   = getRegister x               `thenNat` \ register1 ->
4150     getRegister y               `thenNat` \ register2 ->
4151     getNewRegNat F64    `thenNat` \ tmp1 ->
4152     getNewRegNat F64    `thenNat` \ tmp2 ->
4153     let
4154         code1 = registerCode register1 tmp1
4155         src1  = registerName register1 tmp1
4156
4157         code2 = registerCode register2 tmp2
4158         src2  = registerName register2 tmp2
4159
4160         code__2 dst = asmSeqThen [code1 [], code2 []] .
4161                       mkSeqInstr (instr src1 src2 dst)
4162     in
4163     return (Any F64 code__2)
4164
4165 trivialUFCode _ instr x
4166   = getRegister x               `thenNat` \ register ->
4167     getNewRegNat F64    `thenNat` \ tmp ->
4168     let
4169         code = registerCode register tmp
4170         src  = registerName register tmp
4171         code__2 dst = code . mkSeqInstr (instr src dst)
4172     in
4173     return (Any F64 code__2)
4174
4175 #endif /* alpha_TARGET_ARCH */
4176
4177 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4178
4179 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4180
4181 {-
4182 The Rules of the Game are:
4183
4184 * You cannot assume anything about the destination register dst;
4185   it may be anything, including a fixed reg.
4186
4187 * You may compute an operand into a fixed reg, but you may not
4188   subsequently change the contents of that fixed reg.  If you
4189   want to do so, first copy the value either to a temporary
4190   or into dst.  You are free to modify dst even if it happens
4191   to be a fixed reg -- that's not your problem.
4192
4193 * You cannot assume that a fixed reg will stay live over an
4194   arbitrary computation.  The same applies to the dst reg.
4195
4196 * Temporary regs obtained from getNewRegNat are distinct from
4197   each other and from all other regs, and stay live over
4198   arbitrary computations.
4199
4200 --------------------
4201
4202 SDM's version of The Rules:
4203
4204 * If getRegister returns Any, that means it can generate correct
4205   code which places the result in any register, period.  Even if that
4206   register happens to be read during the computation.
4207
4208   Corollary #1: this means that if you are generating code for an
4209   operation with two arbitrary operands, you cannot assign the result
4210   of the first operand into the destination register before computing
4211   the second operand.  The second operand might require the old value
4212   of the destination register.
4213
4214   Corollary #2: A function might be able to generate more efficient
4215   code if it knows the destination register is a new temporary (and
4216   therefore not read by any of the sub-computations).
4217
4218 * If getRegister returns Any, then the code it generates may modify only:
4219         (a) fresh temporaries
4220         (b) the destination register
4221         (c) known registers (eg. %ecx is used by shifts)
4222   In particular, it may *not* modify global registers, unless the global
4223   register happens to be the destination register.
4224 -}
4225
4226 trivialCode rep instr (Just revinstr) (CmmLit lit_a) b
4227   | not (is64BitLit lit_a) = do
4228   b_code <- getAnyReg b
4229   let
4230        code dst
4231          = b_code dst `snocOL`
4232            revinstr (OpImm (litToImm lit_a)) (OpReg dst)
4233   -- in
4234   return (Any rep code)
4235
4236 trivialCode rep instr maybe_revinstr a b = genTrivialCode rep instr a b
4237
4238 -- This is re-used for floating pt instructions too.
4239 genTrivialCode rep instr a b = do
4240   (b_op, b_code) <- getNonClobberedOperand b
4241   a_code <- getAnyReg a
4242   tmp <- getNewRegNat rep
4243   let
4244      -- We want the value of b to stay alive across the computation of a.
4245      -- But, we want to calculate a straight into the destination register,
4246      -- because the instruction only has two operands (dst := dst `op` src).
4247      -- The troublesome case is when the result of b is in the same register
4248      -- as the destination reg.  In this case, we have to save b in a
4249      -- new temporary across the computation of a.
4250      code dst
4251         | dst `regClashesWithOp` b_op =
4252                 b_code `appOL`
4253                 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
4254                 a_code dst `snocOL`
4255                 instr (OpReg tmp) (OpReg dst)
4256         | otherwise =
4257                 b_code `appOL`
4258                 a_code dst `snocOL`
4259                 instr b_op (OpReg dst)
4260   -- in
4261   return (Any rep code)
4262
4263 reg `regClashesWithOp` OpReg reg2   = reg == reg2
4264 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
4265 reg `regClashesWithOp` _            = False
4266
4267 -----------
4268
4269 trivialUCode rep instr x = do
4270   x_code <- getAnyReg x
4271   let
4272      code dst =
4273         x_code dst `snocOL`
4274         instr (OpReg dst)
4275   -- in
4276   return (Any rep code)
4277
4278 -----------
4279
4280 #if i386_TARGET_ARCH
4281
4282 trivialFCode pk instr x y = do
4283   (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
4284   (y_reg, y_code) <- getSomeReg y
4285   let
4286      code dst =
4287         x_code `appOL`
4288         y_code `snocOL`
4289         instr pk x_reg y_reg dst
4290   -- in
4291   return (Any pk code)
4292
4293 #endif
4294
4295 #if x86_64_TARGET_ARCH
4296
4297 trivialFCode pk instr x y = genTrivialCode  pk (instr pk) x y
4298
4299 #endif
4300
4301 -------------
4302
4303 trivialUFCode rep instr x = do
4304   (x_reg, x_code) <- getSomeReg x
4305   let
4306      code dst =
4307         x_code `snocOL`
4308         instr x_reg dst
4309   -- in
4310   return (Any rep code)
4311
4312 #endif /* i386_TARGET_ARCH */
4313
4314 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4315
4316 #if sparc_TARGET_ARCH
4317
4318 trivialCode pk instr x (CmmLit (CmmInt y d))
4319   | fits13Bits y
4320   = do
4321       (src1, code) <- getSomeReg x
4322       tmp <- getNewRegNat I32
4323       let
4324         src2 = ImmInt (fromInteger y)
4325         code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
4326       return (Any I32 code__2)
4327
4328 trivialCode pk instr x y = do
4329     (src1, code1) <- getSomeReg x
4330     (src2, code2) <- getSomeReg y
4331     tmp1 <- getNewRegNat I32
4332     tmp2 <- getNewRegNat I32
4333     let
4334         code__2 dst = code1 `appOL` code2 `snocOL`
4335                       instr src1 (RIReg src2) dst
4336     return (Any I32 code__2)
4337
4338 ------------
4339 trivialFCode pk instr x y = do
4340     (src1, code1) <- getSomeReg x
4341     (src2, code2) <- getSomeReg y
4342     tmp1 <- getNewRegNat (cmmExprRep x)
4343     tmp2 <- getNewRegNat (cmmExprRep y)
4344     tmp <- getNewRegNat F64
4345     let
4346         promote x = FxTOy F32 F64 x tmp
4347
4348         pk1   = cmmExprRep x
4349         pk2   = cmmExprRep y
4350
4351         code__2 dst =
4352                 if pk1 == pk2 then
4353                     code1 `appOL` code2 `snocOL`
4354                     instr pk src1 src2 dst
4355                 else if pk1 == F32 then
4356                     code1 `snocOL` promote src1 `appOL` code2 `snocOL`
4357                     instr F64 tmp src2 dst
4358                 else
4359                     code1 `appOL` code2 `snocOL` promote src2 `snocOL`
4360                     instr F64 src1 tmp dst
4361     return (Any (if pk1 == pk2 then pk1 else F64) code__2)
4362
4363 ------------
4364 trivialUCode pk instr x = do
4365     (src, code) <- getSomeReg x
4366     tmp <- getNewRegNat pk
4367     let
4368         code__2 dst = code `snocOL` instr (RIReg src) dst
4369     return (Any pk code__2)
4370
4371 -------------
4372 trivialUFCode pk instr x = do
4373     (src, code) <- getSomeReg x
4374     tmp <- getNewRegNat pk
4375     let
4376         code__2 dst = code `snocOL` instr src dst
4377     return (Any pk code__2)
4378
4379 #endif /* sparc_TARGET_ARCH */
4380
4381 #if powerpc_TARGET_ARCH
4382
4383 {-
4384 Wolfgang's PowerPC version of The Rules:
4385
4386 A slightly modified version of The Rules to take advantage of the fact
4387 that PowerPC instructions work on all registers and don't implicitly
4388 clobber any fixed registers.
4389
4390 * The only expression for which getRegister returns Fixed is (CmmReg reg).
4391
4392 * If getRegister returns Any, then the code it generates may modify only:
4393         (a) fresh temporaries
4394         (b) the destination register
4395   It may *not* modify global registers, unless the global
4396   register happens to be the destination register.
4397   It may not clobber any other registers. In fact, only ccalls clobber any
4398   fixed registers.
4399   Also, it may not modify the counter register (used by genCCall).
4400
4401   Corollary: If a getRegister for a subexpression returns Fixed, you need
4402   not move it to a fresh temporary before evaluating the next subexpression.
4403   The Fixed register won't be modified.
4404   Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
4405
4406 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
4407   the value of the destination register.
4408 -}
4409
4410 trivialCode rep signed instr x (CmmLit (CmmInt y _))
4411     | Just imm <- makeImmediate rep signed y
4412     = do
4413         (src1, code1) <- getSomeReg x
4414         let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
4415         return (Any rep code)
4416
4417 trivialCode rep signed instr x y = do
4418     (src1, code1) <- getSomeReg x
4419     (src2, code2) <- getSomeReg y
4420     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
4421     return (Any rep code)
4422
4423 trivialCodeNoImm :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4424     -> CmmExpr -> CmmExpr -> NatM Register
4425 trivialCodeNoImm rep instr x y = do
4426     (src1, code1) <- getSomeReg x
4427     (src2, code2) <- getSomeReg y
4428     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
4429     return (Any rep code)
4430
4431 trivialUCode rep instr x = do
4432     (src, code) <- getSomeReg x
4433     let code' dst = code `snocOL` instr dst src
4434     return (Any rep code')
4435
4436 -- There is no "remainder" instruction on the PPC, so we have to do
4437 -- it the hard way.
4438 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
4439
4440 remainderCode :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4441     -> CmmExpr -> CmmExpr -> NatM Register
4442 remainderCode rep div x y = do
4443     (src1, code1) <- getSomeReg x
4444     (src2, code2) <- getSomeReg y
4445     let code dst = code1 `appOL` code2 `appOL` toOL [
4446                 div dst src1 src2,
4447                 MULLW dst dst (RIReg src2),
4448                 SUBF dst dst src1
4449             ]
4450     return (Any rep code)
4451
4452 #endif /* powerpc_TARGET_ARCH */
4453
4454
4455 -- -----------------------------------------------------------------------------
4456 --  Coercing to/from integer/floating-point...
4457
4458 -- @coerce(Int2FP|FP2Int)@ are more complicated integer/float
4459 -- conversions.  We have to store temporaries in memory to move
4460 -- between the integer and the floating point register sets.
4461
4462 -- @coerceDbl2Flt@ and @coerceFlt2Dbl@ are done this way because we
4463 -- pretend, on sparc at least, that double and float regs are seperate
4464 -- kinds, so the value has to be computed into one kind before being
4465 -- explicitly "converted" to live in the other kind.
4466
4467 coerceInt2FP :: MachRep -> MachRep -> CmmExpr -> NatM Register
4468 coerceFP2Int :: MachRep -> MachRep -> CmmExpr -> NatM Register
4469
4470 #if sparc_TARGET_ARCH
4471 coerceDbl2Flt :: CmmExpr -> NatM Register
4472 coerceFlt2Dbl :: CmmExpr -> NatM Register
4473 #endif
4474
4475 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4476
4477 #if alpha_TARGET_ARCH
4478
4479 coerceInt2FP _ x
4480   = getRegister x               `thenNat` \ register ->
4481     getNewRegNat IntRep         `thenNat` \ reg ->
4482     let
4483         code = registerCode register reg
4484         src  = registerName register reg
4485
4486         code__2 dst = code . mkSeqInstrs [
4487             ST Q src (spRel 0),
4488             LD TF dst (spRel 0),
4489             CVTxy Q TF dst dst]
4490     in
4491     return (Any F64 code__2)
4492
4493 -------------
4494 coerceFP2Int x
4495   = getRegister x               `thenNat` \ register ->
4496     getNewRegNat F64    `thenNat` \ tmp ->
4497     let
4498         code = registerCode register tmp
4499         src  = registerName register tmp
4500
4501         code__2 dst = code . mkSeqInstrs [
4502             CVTxy TF Q src tmp,
4503             ST TF tmp (spRel 0),
4504             LD Q dst (spRel 0)]
4505     in
4506     return (Any IntRep code__2)
4507
4508 #endif /* alpha_TARGET_ARCH */
4509
4510 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4511
4512 #if i386_TARGET_ARCH
4513
4514 coerceInt2FP from to x = do
4515   (x_reg, x_code) <- getSomeReg x
4516   let
4517         opc  = case to of F32 -> GITOF; F64 -> GITOD
4518         code dst = x_code `snocOL` opc x_reg dst
4519         -- ToDo: works for non-I32 reps?
4520   -- in
4521   return (Any to code)
4522
4523 ------------
4524
4525 coerceFP2Int from to x = do
4526   (x_reg, x_code) <- getSomeReg x
4527   let
4528         opc  = case from of F32 -> GFTOI; F64 -> GDTOI
4529         code dst = x_code `snocOL` opc x_reg dst
4530         -- ToDo: works for non-I32 reps?
4531   -- in
4532   return (Any to code)
4533
4534 #endif /* i386_TARGET_ARCH */
4535
4536 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4537
4538 #if x86_64_TARGET_ARCH
4539
4540 coerceFP2Int from to x = do
4541   (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
4542   let
4543         opc  = case from of F32 -> CVTSS2SI; F64 -> CVTSD2SI
4544         code dst = x_code `snocOL` opc x_op dst
4545   -- in
4546   return (Any to code) -- works even if the destination rep is <I32
4547
4548 coerceInt2FP from to x = do
4549   (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
4550   let
4551         opc  = case to of F32 -> CVTSI2SS; F64 -> CVTSI2SD
4552         code dst = x_code `snocOL` opc x_op dst
4553   -- in
4554   return (Any to code) -- works even if the destination rep is <I32
4555
4556 coerceFP2FP :: MachRep -> CmmExpr -> NatM Register
4557 coerceFP2FP to x = do
4558   (x_reg, x_code) <- getSomeReg x
4559   let
4560         opc  = case to of F32 -> CVTSD2SS; F64 -> CVTSS2SD
4561         code dst = x_code `snocOL` opc x_reg dst
4562   -- in
4563   return (Any to code)
4564
4565 #endif
4566
4567 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4568
4569 #if sparc_TARGET_ARCH
4570
4571 coerceInt2FP pk1 pk2 x = do
4572     (src, code) <- getSomeReg x
4573     let
4574         code__2 dst = code `appOL` toOL [
4575             ST pk1 src (spRel (-2)),
4576             LD pk1 (spRel (-2)) dst,
4577             FxTOy pk1 pk2 dst dst]
4578     return (Any pk2 code__2)
4579
4580 ------------
4581 coerceFP2Int pk fprep x = do
4582     (src, code) <- getSomeReg x
4583     reg <- getNewRegNat fprep
4584     tmp <- getNewRegNat pk
4585     let
4586         code__2 dst = ASSERT(fprep == F64 || fprep == F32)
4587             code `appOL` toOL [
4588             FxTOy fprep pk src tmp,
4589             ST pk tmp (spRel (-2)),
4590             LD pk (spRel (-2)) dst]
4591     return (Any pk code__2)
4592
4593 ------------
4594 coerceDbl2Flt x = do
4595     (src, code) <- getSomeReg x
4596     return (Any F32 (\dst -> code `snocOL` FxTOy F64 F32 src dst))
4597
4598 ------------
4599 coerceFlt2Dbl x = do
4600     (src, code) <- getSomeReg x
4601     return (Any F64 (\dst -> code `snocOL` FxTOy F32 F64 src dst))
4602
4603 #endif /* sparc_TARGET_ARCH */
4604
4605 #if powerpc_TARGET_ARCH
4606 coerceInt2FP fromRep toRep x = do
4607     (src, code) <- getSomeReg x
4608     lbl <- getNewLabelNat
4609     itmp <- getNewRegNat I32
4610     ftmp <- getNewRegNat F64
4611     dynRef <- cmmMakeDynamicReference addImportNat False lbl
4612     Amode addr addr_code <- getAmode dynRef
4613     let
4614         code' dst = code `appOL` maybe_exts `appOL` toOL [
4615                 LDATA ReadOnlyData
4616                                 [CmmDataLabel lbl,
4617                                  CmmStaticLit (CmmInt 0x43300000 I32),
4618                                  CmmStaticLit (CmmInt 0x80000000 I32)],
4619                 XORIS itmp src (ImmInt 0x8000),
4620                 ST I32 itmp (spRel 3),
4621                 LIS itmp (ImmInt 0x4330),
4622                 ST I32 itmp (spRel 2),
4623                 LD F64 ftmp (spRel 2)
4624             ] `appOL` addr_code `appOL` toOL [
4625                 LD F64 dst addr,
4626                 FSUB F64 dst ftmp dst
4627             ] `appOL` maybe_frsp dst
4628
4629         maybe_exts = case fromRep of
4630                         I8 ->  unitOL $ EXTS I8 src src
4631                         I16 -> unitOL $ EXTS I16 src src
4632                         I32 -> nilOL
4633         maybe_frsp dst = case toRep of
4634                         F32 -> unitOL $ FRSP dst dst
4635                         F64 -> nilOL
4636     return (Any toRep code')
4637
4638 coerceFP2Int fromRep toRep x = do
4639     -- the reps don't really matter: F*->F64 and I32->I* are no-ops
4640     (src, code) <- getSomeReg x
4641     tmp <- getNewRegNat F64
4642     let
4643         code' dst = code `appOL` toOL [
4644                 -- convert to int in FP reg
4645             FCTIWZ tmp src,
4646                 -- store value (64bit) from FP to stack
4647             ST F64 tmp (spRel 2),
4648                 -- read low word of value (high word is undefined)
4649             LD I32 dst (spRel 3)]
4650     return (Any toRep code')
4651 #endif /* powerpc_TARGET_ARCH */
4652
4653
4654 -- -----------------------------------------------------------------------------
4655 -- eXTRA_STK_ARGS_HERE
4656
4657 -- We (allegedly) put the first six C-call arguments in registers;
4658 -- where do we start putting the rest of them?
4659
4660 -- Moved from MachInstrs (SDM):
4661
4662 #if alpha_TARGET_ARCH || sparc_TARGET_ARCH
4663 eXTRA_STK_ARGS_HERE :: Int
4664 eXTRA_STK_ARGS_HERE
4665   = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???))
4666 #endif
4667