ghc/compiler/nativeGen/MachCodeGen.hs

   1 -----------------------------------------------------------------------------
   2 --
   3 -- Generating machine code (instruction selection)
   4 --
   5 -- (c) The University of Glasgow 1996-2004
   6 --
   7 -----------------------------------------------------------------------------
   8
   9 -- This is a big module, but, if you pay attention to
  10 -- (a) the sectioning, (b) the type signatures, and
  11 -- (c) the #if blah_TARGET_ARCH} things, the
  12 -- structure should not be too overwhelming.
  13
  14 module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where
  15
  16 #include "HsVersions.h"
  17 #include "nativeGen/NCG.h"
  18 #include "MachDeps.h"
  19
  20 -- NCG stuff:
  21 import MachInstrs
  22 import MachRegs
  23 import NCGMonad
  24 import PositionIndependentCode ( cmmMakeDynamicReference, initializePicBase )
  25
  26 -- Our intermediate code:
  27 import PprCmm           ( pprExpr )
  28 import Cmm
  29 import MachOp
  30 import CLabel
  31
  32 -- The rest:
  33 import StaticFlags      ( opt_PIC )
  34 import ForeignCall      ( CCallConv(..) )
  35 import OrdList
  36 import Pretty
  37 import Outputable
  38 import FastString
  39 import FastTypes        ( isFastTrue )
  40 import Constants        ( wORD_SIZE )
  41
  42 #ifdef DEBUG
  43 import Outputable       ( assertPanic )
  44 import TRACE            ( trace )
  45 #endif
  46
  47 import Control.Monad    ( mapAndUnzipM )
  48 import Maybe            ( fromJust )
  49 import DATA_BITS
  50 import DATA_WORD
  51
  52 -- -----------------------------------------------------------------------------
  53 -- Top-level of the instruction selector
  54
  55 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
  56 -- They are really trees of insns to facilitate fast appending, where a
  57 -- left-to-right traversal (pre-order?) yields the insns in the correct
  58 -- order.
  59
  60 type InstrBlock = OrdList Instr
  61
  62 cmmTopCodeGen :: CmmTop -> NatM [NatCmmTop]
  63 cmmTopCodeGen (CmmProc info lab params blocks) = do
  64   (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
  65   picBaseMb <- getPicBaseMaybeNat
  66   let proc = CmmProc info lab params (concat nat_blocks)
  67       tops = proc : concat statics
  68   case picBaseMb of
  69       Just picBase -> initializePicBase picBase tops
  70       Nothing -> return tops
  71
  72 cmmTopCodeGen (CmmData sec dat) = do
  73   return [CmmData sec dat]  -- no translation, we just use CmmStatic
  74
  75 basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop])
  76 basicBlockCodeGen (BasicBlock id stmts) = do
  77   instrs <- stmtsToInstrs stmts
  78   -- code generation may introduce new basic block boundaries, which
  79   -- are indicated by the NEWBLOCK instruction.  We must split up the
  80   -- instruction stream into basic blocks again.  Also, we extract
  81   -- LDATAs here too.
  82   let
  83         (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
  84
  85         mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
  86           = ([], BasicBlock id instrs : blocks, statics)
  87         mkBlocks (LDATA sec dat) (instrs,blocks,statics)
  88           = (instrs, blocks, CmmData sec dat:statics)
  89         mkBlocks instr (instrs,blocks,statics)
  90           = (instr:instrs, blocks, statics)
  91   -- in
  92   return (BasicBlock id top : other_blocks, statics)
  93
  94 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
  95 stmtsToInstrs stmts
  96    = do instrss <- mapM stmtToInstrs stmts
  97         return (concatOL instrss)
  98
  99 stmtToInstrs :: CmmStmt -> NatM InstrBlock
 100 stmtToInstrs stmt = case stmt of
 101     CmmNop         -> return nilOL
 102     CmmComment s   -> return (unitOL (COMMENT s))
 103
 104     CmmAssign reg src
 105       | isFloatingRep kind -> assignReg_FltCode kind reg src
 106 #if WORD_SIZE_IN_BITS==32
 107       | kind == I64        -> assignReg_I64Code      reg src
 108 #endif
 109       | otherwise          -> assignReg_IntCode kind reg src
 110         where kind = cmmRegRep reg
 111
 112     CmmStore addr src
 113       | isFloatingRep kind -> assignMem_FltCode kind addr src
 114 #if WORD_SIZE_IN_BITS==32
 115       | kind == I64      -> assignMem_I64Code      addr src
 116 #endif
 117       | otherwise        -> assignMem_IntCode kind addr src
 118         where kind = cmmExprRep src
 119
 120     CmmCall target result_regs args vols
 121        -> genCCall target result_regs args vols
 122
 123     CmmBranch id          -> genBranch id
 124     CmmCondBranch arg id  -> genCondJump id arg
 125     CmmSwitch arg ids     -> genSwitch arg ids
 126     CmmJump arg params    -> genJump arg
 127
 128 -- -----------------------------------------------------------------------------
 129 -- General things for putting together code sequences
 130
 131 -- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
 132 -- CmmExprs into CmmRegOff?
 133 mangleIndexTree :: CmmExpr -> CmmExpr
 134 mangleIndexTree (CmmRegOff reg off)
 135   = CmmMachOp (MO_Add rep) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) rep)]
 136   where rep = cmmRegRep reg
 137
 138 -- -----------------------------------------------------------------------------
 139 --  Code gen for 64-bit arithmetic on 32-bit platforms
 140
 141 {-
 142 Simple support for generating 64-bit code (ie, 64 bit values and 64
 143 bit assignments) on 32-bit platforms.  Unlike the main code generator
 144 we merely shoot for generating working code as simply as possible, and
 145 pay little attention to code quality.  Specifically, there is no
 146 attempt to deal cleverly with the fixed-vs-floating register
 147 distinction; all values are generated into (pairs of) floating
 148 registers, even if this would mean some redundant reg-reg moves as a
 149 result.  Only one of the VRegUniques is returned, since it will be
 150 of the VRegUniqueLo form, and the upper-half VReg can be determined
 151 by applying getHiVRegFromLo to it.
 152 -}
 153
 154 data ChildCode64        -- a.k.a "Register64"
 155    = ChildCode64
 156         InstrBlock      -- code
 157         Reg             -- the lower 32-bit temporary which contains the
 158                         -- result; use getHiVRegFromLo to find the other
 159                         -- VRegUnique.  Rules of this simplified insn
 160                         -- selection game are therefore that the returned
 161                         -- Reg may be modified
 162
 163 #if WORD_SIZE_IN_BITS==32
 164 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
 165 assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
 166 #endif
 167
 168 #ifndef x86_64_TARGET_ARCH
 169 iselExpr64        :: CmmExpr -> NatM ChildCode64
 170 #endif
 171
 172 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 173
 174 #if i386_TARGET_ARCH
 175
 176 assignMem_I64Code addrTree valueTree = do
 177   Amode addr addr_code <- getAmode addrTree
 178   ChildCode64 vcode rlo <- iselExpr64 valueTree
 179   let
 180         rhi = getHiVRegFromLo rlo
 181
 182         -- Little-endian store
 183         mov_lo = MOV I32 (OpReg rlo) (OpAddr addr)
 184         mov_hi = MOV I32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
 185   -- in
 186   return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 187
 188
 189 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
 190    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 191    let
 192          r_dst_lo = mkVReg u_dst I32
 193          r_dst_hi = getHiVRegFromLo r_dst_lo
 194          r_src_hi = getHiVRegFromLo r_src_lo
 195          mov_lo = MOV I32 (OpReg r_src_lo) (OpReg r_dst_lo)
 196          mov_hi = MOV I32 (OpReg r_src_hi) (OpReg r_dst_hi)
 197    -- in
 198    return (
 199         vcode `snocOL` mov_lo `snocOL` mov_hi
 200      )
 201
 202 assignReg_I64Code lvalue valueTree
 203    = panic "assignReg_I64Code(i386): invalid lvalue"
 204
 205 ------------
 206
 207 iselExpr64 (CmmLit (CmmInt i _)) = do
 208   (rlo,rhi) <- getNewRegPairNat I32
 209   let
 210         r = fromIntegral (fromIntegral i :: Word32)
 211         q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
 212         code = toOL [
 213                 MOV I32 (OpImm (ImmInteger r)) (OpReg rlo),
 214                 MOV I32 (OpImm (ImmInteger q)) (OpReg rhi)
 215                 ]
 216   -- in
 217   return (ChildCode64 code rlo)
 218
 219 iselExpr64 (CmmLoad addrTree I64) = do
 220    Amode addr addr_code <- getAmode addrTree
 221    (rlo,rhi) <- getNewRegPairNat I32
 222    let
 223         mov_lo = MOV I32 (OpAddr addr) (OpReg rlo)
 224         mov_hi = MOV I32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
 225    -- in
 226    return (
 227             ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
 228                         rlo
 229      )
 230
 231 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64)))
 232    = return (ChildCode64 nilOL (mkVReg vu I32))
 233
 234 -- we handle addition, but rather badly
 235 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
 236    ChildCode64 code1 r1lo <- iselExpr64 e1
 237    (rlo,rhi) <- getNewRegPairNat I32
 238    let
 239         r = fromIntegral (fromIntegral i :: Word32)
 240         q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
 241         r1hi = getHiVRegFromLo r1lo
 242         code =  code1 `appOL`
 243                 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
 244                        ADD I32 (OpImm (ImmInteger r)) (OpReg rlo),
 245                        MOV I32 (OpReg r1hi) (OpReg rhi),
 246                        ADC I32 (OpImm (ImmInteger q)) (OpReg rhi) ]
 247    -- in
 248    return (ChildCode64 code rlo)
 249
 250 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
 251    ChildCode64 code1 r1lo <- iselExpr64 e1
 252    ChildCode64 code2 r2lo <- iselExpr64 e2
 253    (rlo,rhi) <- getNewRegPairNat I32
 254    let
 255         r1hi = getHiVRegFromLo r1lo
 256         r2hi = getHiVRegFromLo r2lo
 257         code =  code1 `appOL`
 258                 code2 `appOL`
 259                 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
 260                        ADD I32 (OpReg r2lo) (OpReg rlo),
 261                        MOV I32 (OpReg r1hi) (OpReg rhi),
 262                        ADC I32 (OpReg r2hi) (OpReg rhi) ]
 263    -- in
 264    return (ChildCode64 code rlo)
 265
 266 iselExpr64 expr
 267    = pprPanic "iselExpr64(i386)" (ppr expr)
 268
 269 #endif /* i386_TARGET_ARCH */
 270
 271 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 272
 273 #if sparc_TARGET_ARCH
 274
 275 assignMem_I64Code addrTree valueTree = do
 276      Amode addr addr_code <- getAmode addrTree
 277      ChildCode64 vcode rlo <- iselExpr64 valueTree
 278      (src, code) <- getSomeReg addrTree
 279      let
 280          rhi = getHiVRegFromLo rlo
 281          -- Big-endian store
 282          mov_hi = ST I32 rhi (AddrRegImm src (ImmInt 0))
 283          mov_lo = ST I32 rlo (AddrRegImm src (ImmInt 4))
 284      return (vcode `appOL` code `snocOL` mov_hi `snocOL` mov_lo)
 285
 286 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
 287      ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 288      let
 289          r_dst_lo = mkVReg u_dst pk
 290          r_dst_hi = getHiVRegFromLo r_dst_lo
 291          r_src_hi = getHiVRegFromLo r_src_lo
 292          mov_lo = mkMOV r_src_lo r_dst_lo
 293          mov_hi = mkMOV r_src_hi r_dst_hi
 294          mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
 295      return (vcode `snocOL` mov_hi `snocOL` mov_lo)
 296 assignReg_I64Code lvalue valueTree
 297    = panic "assignReg_I64Code(sparc): invalid lvalue"
 298
 299
 300 -- Don't delete this -- it's very handy for debugging.
 301 --iselExpr64 expr
 302 --   | trace ("iselExpr64: " ++ showSDoc (ppr expr)) False
 303 --   = panic "iselExpr64(???)"
 304
 305 iselExpr64 (CmmLoad addrTree I64) = do
 306      Amode (AddrRegReg r1 r2) addr_code <- getAmode addrTree
 307      rlo <- getNewRegNat I32
 308      let rhi = getHiVRegFromLo rlo
 309          mov_hi = LD I32 (AddrRegImm r1 (ImmInt 0)) rhi
 310          mov_lo = LD I32 (AddrRegImm r1 (ImmInt 4)) rlo
 311      return (
 312             ChildCode64 (addr_code `snocOL` mov_hi `snocOL` mov_lo)
 313                          rlo
 314           )
 315
 316 iselExpr64 (CmmReg (CmmLocal (LocalReg uq I64))) = do
 317      r_dst_lo <-  getNewRegNat I32
 318      let r_dst_hi = getHiVRegFromLo r_dst_lo
 319          r_src_lo = mkVReg uq I32
 320          r_src_hi = getHiVRegFromLo r_src_lo
 321          mov_lo = mkMOV r_src_lo r_dst_lo
 322          mov_hi = mkMOV r_src_hi r_dst_hi
 323          mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
 324      return (
 325             ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo
 326          )
 327
 328 iselExpr64 expr
 329    = pprPanic "iselExpr64(sparc)" (ppr expr)
 330
 331 #endif /* sparc_TARGET_ARCH */
 332
 333 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 334
 335 #if powerpc_TARGET_ARCH
 336
 337 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
 338 getI64Amodes addrTree = do
 339     Amode hi_addr addr_code <- getAmode addrTree
 340     case addrOffset hi_addr 4 of
 341         Just lo_addr -> return (hi_addr, lo_addr, addr_code)
 342         Nothing      -> do (hi_ptr, code) <- getSomeReg addrTree
 343                            return (AddrRegImm hi_ptr (ImmInt 0),
 344                                    AddrRegImm hi_ptr (ImmInt 4),
 345                                    code)
 346
 347 assignMem_I64Code addrTree valueTree = do
 348         (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
 349         ChildCode64 vcode rlo <- iselExpr64 valueTree
 350         let
 351                 rhi = getHiVRegFromLo rlo
 352
 353                 -- Big-endian store
 354                 mov_hi = ST I32 rhi hi_addr
 355                 mov_lo = ST I32 rlo lo_addr
 356         -- in
 357         return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 358
 359 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
 360    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 361    let
 362          r_dst_lo = mkVReg u_dst I32
 363          r_dst_hi = getHiVRegFromLo r_dst_lo
 364          r_src_hi = getHiVRegFromLo r_src_lo
 365          mov_lo = MR r_dst_lo r_src_lo
 366          mov_hi = MR r_dst_hi r_src_hi
 367    -- in
 368    return (
 369         vcode `snocOL` mov_lo `snocOL` mov_hi
 370      )
 371
 372 assignReg_I64Code lvalue valueTree
 373    = panic "assignReg_I64Code(powerpc): invalid lvalue"
 374
 375
 376 -- Don't delete this -- it's very handy for debugging.
 377 --iselExpr64 expr
 378 --   | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
 379 --   = panic "iselExpr64(???)"
 380
 381 iselExpr64 (CmmLoad addrTree I64) = do
 382     (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
 383     (rlo, rhi) <- getNewRegPairNat I32
 384     let mov_hi = LD I32 rhi hi_addr
 385         mov_lo = LD I32 rlo lo_addr
 386     return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
 387                          rlo
 388
 389 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64)))
 390    = return (ChildCode64 nilOL (mkVReg vu I32))
 391
 392 iselExpr64 (CmmLit (CmmInt i _)) = do
 393   (rlo,rhi) <- getNewRegPairNat I32
 394   let
 395         half0 = fromIntegral (fromIntegral i :: Word16)
 396         half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
 397         half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
 398         half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
 399
 400         code = toOL [
 401                 LIS rlo (ImmInt half1),
 402                 OR rlo rlo (RIImm $ ImmInt half0),
 403                 LIS rhi (ImmInt half3),
 404                 OR rlo rlo (RIImm $ ImmInt half2)
 405                 ]
 406   -- in
 407   return (ChildCode64 code rlo)
 408
 409 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
 410    ChildCode64 code1 r1lo <- iselExpr64 e1
 411    ChildCode64 code2 r2lo <- iselExpr64 e2
 412    (rlo,rhi) <- getNewRegPairNat I32
 413    let
 414         r1hi = getHiVRegFromLo r1lo
 415         r2hi = getHiVRegFromLo r2lo
 416         code =  code1 `appOL`
 417                 code2 `appOL`
 418                 toOL [ ADDC rlo r1lo r2lo,
 419                        ADDE rhi r1hi r2hi ]
 420    -- in
 421    return (ChildCode64 code rlo)
 422
 423 iselExpr64 expr
 424    = pprPanic "iselExpr64(powerpc)" (ppr expr)
 425
 426 #endif /* powerpc_TARGET_ARCH */
 427
 428
 429 -- -----------------------------------------------------------------------------
 430 -- The 'Register' type
 431
 432 -- 'Register's passed up the tree.  If the stix code forces the register
 433 -- to live in a pre-decided machine register, it comes out as @Fixed@;
 434 -- otherwise, it comes out as @Any@, and the parent can decide which
 435 -- register to put it in.
 436
 437 data Register
 438   = Fixed   MachRep Reg InstrBlock
 439   | Any     MachRep (Reg -> InstrBlock)
 440
 441 swizzleRegisterRep :: Register -> MachRep -> Register
 442 swizzleRegisterRep (Fixed _ reg code) rep = Fixed rep reg code
 443 swizzleRegisterRep (Any _ codefn)     rep = Any rep codefn
 444
 445
 446 -- -----------------------------------------------------------------------------
 447 -- Utils based on getRegister, below
 448
 449 -- The dual to getAnyReg: compute an expression into a register, but
 450 -- we don't mind which one it is.
 451 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
 452 getSomeReg expr = do
 453   r <- getRegister expr
 454   case r of
 455     Any rep code -> do
 456         tmp <- getNewRegNat rep
 457         return (tmp, code tmp)
 458     Fixed _ reg code ->
 459         return (reg, code)
 460
 461 -- -----------------------------------------------------------------------------
 462 -- Grab the Reg for a CmmReg
 463
 464 getRegisterReg :: CmmReg -> Reg
 465
 466 getRegisterReg (CmmLocal (LocalReg u pk))
 467   = mkVReg u pk
 468
 469 getRegisterReg (CmmGlobal mid)
 470   = case get_GlobalReg_reg_or_addr mid of
 471        Left (RealReg rrno) -> RealReg rrno
 472        _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
 473           -- By this stage, the only MagicIds remaining should be the
 474           -- ones which map to a real machine register on this
 475           -- platform.  Hence ...
 476
 477
 478 -- -----------------------------------------------------------------------------
 479 -- Generate code to get a subtree into a Register
 480
 481 -- Don't delete this -- it's very handy for debugging.
 482 --getRegister expr
 483 --   | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False
 484 --   = panic "getRegister(???)"
 485
 486 getRegister :: CmmExpr -> NatM Register
 487
 488 getRegister (CmmReg (CmmGlobal PicBaseReg))
 489   = do
 490       reg <- getPicBaseNat wordRep
 491       return (Fixed wordRep reg nilOL)
 492
 493 getRegister (CmmReg reg)
 494   = return (Fixed (cmmRegRep reg) (getRegisterReg reg) nilOL)
 495
 496 getRegister tree@(CmmRegOff _ _)
 497   = getRegister (mangleIndexTree tree)
 498
 499 -- end of machine-"independent" bit; here we go on the rest...
 500
 501 #if alpha_TARGET_ARCH
 502
 503 getRegister (StDouble d)
 504   = getBlockIdNat                   `thenNat` \ lbl ->
 505     getNewRegNat PtrRep             `thenNat` \ tmp ->
 506     let code dst = mkSeqInstrs [
 507             LDATA RoDataSegment lbl [
 508                     DATA TF [ImmLab (rational d)]
 509                 ],
 510             LDA tmp (AddrImm (ImmCLbl lbl)),
 511             LD TF dst (AddrReg tmp)]
 512     in
 513         return (Any F64 code)
 514
 515 getRegister (StPrim primop [x]) -- unary PrimOps
 516   = case primop of
 517       IntNegOp -> trivialUCode (NEG Q False) x
 518
 519       NotOp    -> trivialUCode NOT x
 520
 521       FloatNegOp  -> trivialUFCode FloatRep  (FNEG TF) x
 522       DoubleNegOp -> trivialUFCode F64 (FNEG TF) x
 523
 524       OrdOp -> coerceIntCode IntRep x
 525       ChrOp -> chrCode x
 526
 527       Float2IntOp  -> coerceFP2Int    x
 528       Int2FloatOp  -> coerceInt2FP pr x
 529       Double2IntOp -> coerceFP2Int    x
 530       Int2DoubleOp -> coerceInt2FP pr x
 531
 532       Double2FloatOp -> coerceFltCode x
 533       Float2DoubleOp -> coerceFltCode x
 534
 535       other_op -> getRegister (StCall fn CCallConv F64 [x])
 536         where
 537           fn = case other_op of
 538                  FloatExpOp    -> FSLIT("exp")
 539                  FloatLogOp    -> FSLIT("log")
 540                  FloatSqrtOp   -> FSLIT("sqrt")
 541                  FloatSinOp    -> FSLIT("sin")
 542                  FloatCosOp    -> FSLIT("cos")
 543                  FloatTanOp    -> FSLIT("tan")
 544                  FloatAsinOp   -> FSLIT("asin")
 545                  FloatAcosOp   -> FSLIT("acos")
 546                  FloatAtanOp   -> FSLIT("atan")
 547                  FloatSinhOp   -> FSLIT("sinh")
 548                  FloatCoshOp   -> FSLIT("cosh")
 549                  FloatTanhOp   -> FSLIT("tanh")
 550                  DoubleExpOp   -> FSLIT("exp")
 551                  DoubleLogOp   -> FSLIT("log")
 552                  DoubleSqrtOp  -> FSLIT("sqrt")
 553                  DoubleSinOp   -> FSLIT("sin")
 554                  DoubleCosOp   -> FSLIT("cos")
 555                  DoubleTanOp   -> FSLIT("tan")
 556                  DoubleAsinOp  -> FSLIT("asin")
 557                  DoubleAcosOp  -> FSLIT("acos")
 558                  DoubleAtanOp  -> FSLIT("atan")
 559                  DoubleSinhOp  -> FSLIT("sinh")
 560                  DoubleCoshOp  -> FSLIT("cosh")
 561                  DoubleTanhOp  -> FSLIT("tanh")
 562   where
 563     pr = panic "MachCode.getRegister: no primrep needed for Alpha"
 564
 565 getRegister (StPrim primop [x, y]) -- dyadic PrimOps
 566   = case primop of
 567       CharGtOp -> trivialCode (CMP LTT) y x
 568       CharGeOp -> trivialCode (CMP LE) y x
 569       CharEqOp -> trivialCode (CMP EQQ) x y
 570       CharNeOp -> int_NE_code x y
 571       CharLtOp -> trivialCode (CMP LTT) x y
 572       CharLeOp -> trivialCode (CMP LE) x y
 573
 574       IntGtOp  -> trivialCode (CMP LTT) y x
 575       IntGeOp  -> trivialCode (CMP LE) y x
 576       IntEqOp  -> trivialCode (CMP EQQ) x y
 577       IntNeOp  -> int_NE_code x y
 578       IntLtOp  -> trivialCode (CMP LTT) x y
 579       IntLeOp  -> trivialCode (CMP LE) x y
 580
 581       WordGtOp -> trivialCode (CMP ULT) y x
 582       WordGeOp -> trivialCode (CMP ULE) x y
 583       WordEqOp -> trivialCode (CMP EQQ)  x y
 584       WordNeOp -> int_NE_code x y
 585       WordLtOp -> trivialCode (CMP ULT) x y
 586       WordLeOp -> trivialCode (CMP ULE) x y
 587
 588       AddrGtOp -> trivialCode (CMP ULT) y x
 589       AddrGeOp -> trivialCode (CMP ULE) y x
 590       AddrEqOp -> trivialCode (CMP EQQ)  x y
 591       AddrNeOp -> int_NE_code x y
 592       AddrLtOp -> trivialCode (CMP ULT) x y
 593       AddrLeOp -> trivialCode (CMP ULE) x y
 594
 595       FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
 596       FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
 597       FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
 598       FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
 599       FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
 600       FloatLeOp -> cmpF_code (FCMP TF LE) NE x y
 601
 602       DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
 603       DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
 604       DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
 605       DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
 606       DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
 607       DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y
 608
 609       IntAddOp  -> trivialCode (ADD Q False) x y
 610       IntSubOp  -> trivialCode (SUB Q False) x y
 611       IntMulOp  -> trivialCode (MUL Q False) x y
 612       IntQuotOp -> trivialCode (DIV Q False) x y
 613       IntRemOp  -> trivialCode (REM Q False) x y
 614
 615       WordAddOp  -> trivialCode (ADD Q False) x y
 616       WordSubOp  -> trivialCode (SUB Q False) x y
 617       WordMulOp  -> trivialCode (MUL Q False) x y
 618       WordQuotOp -> trivialCode (DIV Q True) x y
 619       WordRemOp  -> trivialCode (REM Q True) x y
 620
 621       FloatAddOp -> trivialFCode  FloatRep (FADD TF) x y
 622       FloatSubOp -> trivialFCode  FloatRep (FSUB TF) x y
 623       FloatMulOp -> trivialFCode  FloatRep (FMUL TF) x y
 624       FloatDivOp -> trivialFCode  FloatRep (FDIV TF) x y
 625
 626       DoubleAddOp -> trivialFCode  F64 (FADD TF) x y
 627       DoubleSubOp -> trivialFCode  F64 (FSUB TF) x y
 628       DoubleMulOp -> trivialFCode  F64 (FMUL TF) x y
 629       DoubleDivOp -> trivialFCode  F64 (FDIV TF) x y
 630
 631       AddrAddOp  -> trivialCode (ADD Q False) x y
 632       AddrSubOp  -> trivialCode (SUB Q False) x y
 633       AddrRemOp  -> trivialCode (REM Q True) x y
 634
 635       AndOp  -> trivialCode AND x y
 636       OrOp   -> trivialCode OR  x y
 637       XorOp  -> trivialCode XOR x y
 638       SllOp  -> trivialCode SLL x y
 639       SrlOp  -> trivialCode SRL x y
 640
 641       ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
 642       ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
 643       ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"
 644
 645       FloatPowerOp  -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
 646       DoublePowerOp -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
 647   where
 648     {- ------------------------------------------------------------
 649         Some bizarre special code for getting condition codes into
 650         registers.  Integer non-equality is a test for equality
 651         followed by an XOR with 1.  (Integer comparisons always set
 652         the result register to 0 or 1.)  Floating point comparisons of
 653         any kind leave the result in a floating point register, so we
 654         need to wrangle an integer register out of things.
 655     -}
 656     int_NE_code :: StixTree -> StixTree -> NatM Register
 657
 658     int_NE_code x y
 659       = trivialCode (CMP EQQ) x y       `thenNat` \ register ->
 660         getNewRegNat IntRep             `thenNat` \ tmp ->
 661         let
 662             code = registerCode register tmp
 663             src  = registerName register tmp
 664             code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
 665         in
 666         return (Any IntRep code__2)
 667
 668     {- ------------------------------------------------------------
 669         Comments for int_NE_code also apply to cmpF_code
 670     -}
 671     cmpF_code
 672         :: (Reg -> Reg -> Reg -> Instr)
 673         -> Cond
 674         -> StixTree -> StixTree
 675         -> NatM Register
 676
 677     cmpF_code instr cond x y
 678       = trivialFCode pr instr x y       `thenNat` \ register ->
 679         getNewRegNat F64                `thenNat` \ tmp ->
 680         getBlockIdNat                   `thenNat` \ lbl ->
 681         let
 682             code = registerCode register tmp
 683             result  = registerName register tmp
 684
 685             code__2 dst = code . mkSeqInstrs [
 686                 OR zeroh (RIImm (ImmInt 1)) dst,
 687                 BF cond  result (ImmCLbl lbl),
 688                 OR zeroh (RIReg zeroh) dst,
 689                 NEWBLOCK lbl]
 690         in
 691         return (Any IntRep code__2)
 692       where
 693         pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
 694       ------------------------------------------------------------
 695
 696 getRegister (CmmLoad pk mem)
 697   = getAmode mem                    `thenNat` \ amode ->
 698     let
 699         code = amodeCode amode
 700         src   = amodeAddr amode
 701         size = primRepToSize pk
 702         code__2 dst = code . mkSeqInstr (LD size dst src)
 703     in
 704     return (Any pk code__2)
 705
 706 getRegister (StInt i)
 707   | fits8Bits i
 708   = let
 709         code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
 710     in
 711     return (Any IntRep code)
 712   | otherwise
 713   = let
 714         code dst = mkSeqInstr (LDI Q dst src)
 715     in
 716     return (Any IntRep code)
 717   where
 718     src = ImmInt (fromInteger i)
 719
 720 getRegister leaf
 721   | isJust imm
 722   = let
 723         code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
 724     in
 725     return (Any PtrRep code)
 726   where
 727     imm = maybeImm leaf
 728     imm__2 = case imm of Just x -> x
 729
 730 #endif /* alpha_TARGET_ARCH */
 731
 732 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 733
 734 #if i386_TARGET_ARCH
 735
 736 getRegister (CmmLit (CmmFloat f F32)) = do
 737     lbl <- getNewLabelNat
 738     dynRef <- cmmMakeDynamicReference addImportNat False lbl
 739     Amode addr addr_code <- getAmode dynRef
 740     let code dst =
 741             LDATA ReadOnlyData
 742                         [CmmDataLabel lbl,
 743                          CmmStaticLit (CmmFloat f F32)]
 744             `consOL` (addr_code `snocOL`
 745             GLD F32 addr dst)
 746     -- in
 747     return (Any F32 code)
 748
 749
 750 getRegister (CmmLit (CmmFloat d F64))
 751   | d == 0.0
 752   = let code dst = unitOL (GLDZ dst)
 753     in  return (Any F64 code)
 754
 755   | d == 1.0
 756   = let code dst = unitOL (GLD1 dst)
 757     in  return (Any F64 code)
 758
 759   | otherwise = do
 760     lbl <- getNewLabelNat
 761     dynRef <- cmmMakeDynamicReference addImportNat False lbl
 762     Amode addr addr_code <- getAmode dynRef
 763     let code dst =
 764             LDATA ReadOnlyData
 765                         [CmmDataLabel lbl,
 766                          CmmStaticLit (CmmFloat d F64)]
 767             `consOL` (addr_code `snocOL`
 768             GLD F64 addr dst)
 769     -- in
 770     return (Any F64 code)
 771
 772 #endif /* i386_TARGET_ARCH */
 773
 774 #if x86_64_TARGET_ARCH
 775
 776 getRegister (CmmLit (CmmFloat 0.0 rep)) = do
 777    let code dst = unitOL  (XOR rep (OpReg dst) (OpReg dst))
 778         -- I don't know why there are xorpd, xorps, and pxor instructions.
 779         -- They all appear to do the same thing --SDM
 780    return (Any rep code)
 781
 782 getRegister (CmmLit (CmmFloat f rep)) = do
 783     lbl <- getNewLabelNat
 784     let code dst = toOL [
 785             LDATA ReadOnlyData
 786                         [CmmDataLabel lbl,
 787                          CmmStaticLit (CmmFloat f rep)],
 788             MOV rep (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 789             ]
 790     -- in
 791     return (Any rep code)
 792
 793 #endif /* x86_64_TARGET_ARCH */
 794
 795 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
 796
 797 -- catch simple cases of zero- or sign-extended load
 798 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad addr _]) = do
 799   code <- intLoadCode (MOVZxL I8) addr
 800   return (Any I32 code)
 801
 802 getRegister (CmmMachOp (MO_S_Conv I8 I32) [CmmLoad addr _]) = do
 803   code <- intLoadCode (MOVSxL I8) addr
 804   return (Any I32 code)
 805
 806 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad addr _]) = do
 807   code <- intLoadCode (MOVZxL I16) addr
 808   return (Any I32 code)
 809
 810 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad addr _]) = do
 811   code <- intLoadCode (MOVSxL I16) addr
 812   return (Any I32 code)
 813
 814 #endif
 815
 816 #if x86_64_TARGET_ARCH
 817
 818 -- catch simple cases of zero- or sign-extended load
 819 getRegister (CmmMachOp (MO_U_Conv I8 I64) [CmmLoad addr _]) = do
 820   code <- intLoadCode (MOVZxL I8) addr
 821   return (Any I64 code)
 822
 823 getRegister (CmmMachOp (MO_S_Conv I8 I64) [CmmLoad addr _]) = do
 824   code <- intLoadCode (MOVSxL I8) addr
 825   return (Any I64 code)
 826
 827 getRegister (CmmMachOp (MO_U_Conv I16 I64) [CmmLoad addr _]) = do
 828   code <- intLoadCode (MOVZxL I16) addr
 829   return (Any I64 code)
 830
 831 getRegister (CmmMachOp (MO_S_Conv I16 I64) [CmmLoad addr _]) = do
 832   code <- intLoadCode (MOVSxL I16) addr
 833   return (Any I64 code)
 834
 835 getRegister (CmmMachOp (MO_U_Conv I32 I64) [CmmLoad addr _]) = do
 836   code <- intLoadCode (MOV I32) addr -- 32-bit loads zero-extend
 837   return (Any I64 code)
 838
 839 getRegister (CmmMachOp (MO_S_Conv I32 I64) [CmmLoad addr _]) = do
 840   code <- intLoadCode (MOVSxL I32) addr
 841   return (Any I64 code)
 842
 843 #endif
 844
 845 #if x86_64_TARGET_ARCH
 846 getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do
 847   x_code <- getAnyReg x
 848   lbl <- getNewLabelNat
 849   let
 850     code dst = x_code dst `appOL` toOL [
 851         -- This is how gcc does it, so it can't be that bad:
 852         LDATA ReadOnlyData16 [
 853                 CmmAlign 16,
 854                 CmmDataLabel lbl,
 855                 CmmStaticLit (CmmInt 0x80000000 I32),
 856                 CmmStaticLit (CmmInt 0 I32),
 857                 CmmStaticLit (CmmInt 0 I32),
 858                 CmmStaticLit (CmmInt 0 I32)
 859         ],
 860         XOR F32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 861                 -- xorps, so we need the 128-bit constant
 862                 -- ToDo: rip-relative
 863         ]
 864   --
 865   return (Any F32 code)
 866
 867 getRegister (CmmMachOp (MO_S_Neg F64) [x]) = do
 868   x_code <- getAnyReg x
 869   lbl <- getNewLabelNat
 870   let
 871         -- This is how gcc does it, so it can't be that bad:
 872     code dst = x_code dst `appOL` toOL [
 873         LDATA ReadOnlyData16 [
 874                 CmmAlign 16,
 875                 CmmDataLabel lbl,
 876                 CmmStaticLit (CmmInt 0x8000000000000000 I64),
 877                 CmmStaticLit (CmmInt 0 I64)
 878         ],
 879                 -- gcc puts an unpck here.  Wonder if we need it.
 880         XOR F64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 881                 -- xorpd, so we need the 128-bit constant
 882         ]
 883   --
 884   return (Any F64 code)
 885 #endif
 886
 887 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
 888
 889 getRegister (CmmMachOp mop [x]) -- unary MachOps
 890   = case mop of
 891 #if i386_TARGET_ARCH
 892       MO_S_Neg F32 -> trivialUFCode F32 (GNEG F32) x
 893       MO_S_Neg F64 -> trivialUFCode F64 (GNEG F64) x
 894 #endif
 895
 896       MO_S_Neg rep -> trivialUCode rep (NEGI rep) x
 897       MO_Not rep   -> trivialUCode rep (NOT  rep) x
 898
 899       -- Nop conversions
 900       -- TODO: these are only nops if the arg is not a fixed register that
 901       -- can't be byte-addressed.
 902       MO_U_Conv I32 I8  -> conversionNop I32 x
 903       MO_S_Conv I32 I8  -> conversionNop I32 x
 904       MO_U_Conv I16 I8  -> conversionNop I16 x
 905       MO_S_Conv I16 I8  -> conversionNop I16 x
 906       MO_U_Conv I32 I16 -> conversionNop I32 x
 907       MO_S_Conv I32 I16 -> conversionNop I32 x
 908 #if x86_64_TARGET_ARCH
 909       MO_U_Conv I64 I32 -> conversionNop I64 x
 910       MO_S_Conv I64 I32 -> conversionNop I64 x
 911       MO_U_Conv I64 I16 -> conversionNop I64 x
 912       MO_S_Conv I64 I16 -> conversionNop I64 x
 913       MO_U_Conv I64 I8  -> conversionNop I64 x
 914       MO_S_Conv I64 I8  -> conversionNop I64 x
 915 #endif
 916
 917       MO_U_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
 918       MO_S_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
 919
 920       -- widenings
 921       MO_U_Conv I8  I32 -> integerExtend I8  I32 MOVZxL x
 922       MO_U_Conv I16 I32 -> integerExtend I16 I32 MOVZxL x
 923       MO_U_Conv I8  I16 -> integerExtend I8  I16 MOVZxL x
 924
 925       MO_S_Conv I8  I32 -> integerExtend I8  I32 MOVSxL x
 926       MO_S_Conv I16 I32 -> integerExtend I16 I32 MOVSxL x
 927       MO_S_Conv I8  I16 -> integerExtend I8  I16 MOVSxL x
 928
 929 #if x86_64_TARGET_ARCH
 930       MO_U_Conv I8  I64 -> integerExtend I8  I64 MOVZxL x
 931       MO_U_Conv I16 I64 -> integerExtend I16 I64 MOVZxL x
 932       MO_U_Conv I32 I64 -> integerExtend I32 I64 MOVZxL x
 933       MO_S_Conv I8  I64 -> integerExtend I8  I64 MOVSxL x
 934       MO_S_Conv I16 I64 -> integerExtend I16 I64 MOVSxL x
 935       MO_S_Conv I32 I64 -> integerExtend I32 I64 MOVSxL x
 936         -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
 937         -- However, we don't want the register allocator to throw it
 938         -- away as an unnecessary reg-to-reg move, so we keep it in
 939         -- the form of a movzl and print it as a movl later.
 940 #endif
 941
 942 #if i386_TARGET_ARCH
 943       MO_S_Conv F32 F64 -> conversionNop F64 x
 944       MO_S_Conv F64 F32 -> conversionNop F32 x
 945 #else
 946       MO_S_Conv F32 F64 -> coerceFP2FP F64 x
 947       MO_S_Conv F64 F32 -> coerceFP2FP F32 x
 948 #endif
 949
 950       MO_S_Conv from to
 951         | isFloatingRep from -> coerceFP2Int from to x
 952         | isFloatingRep to   -> coerceInt2FP from to x
 953
 954       other -> pprPanic "getRegister" (pprMachOp mop)
 955    where
 956         -- signed or unsigned extension.
 957         integerExtend from to instr expr = do
 958             (reg,e_code) <- if from == I8 then getByteReg expr
 959                                           else getSomeReg expr
 960             let
 961                 code dst =
 962                   e_code `snocOL`
 963                   instr from (OpReg reg) (OpReg dst)
 964             return (Any to code)
 965
 966         conversionNop new_rep expr
 967             = do e_code <- getRegister expr
 968                  return (swizzleRegisterRep e_code new_rep)
 969
 970
 971 getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
 972   = ASSERT2(cmmExprRep x /= I8, pprExpr e)
 973     case mop of
 974       MO_Eq F32   -> condFltReg EQQ x y
 975       MO_Ne F32   -> condFltReg NE x y
 976       MO_S_Gt F32 -> condFltReg GTT x y
 977       MO_S_Ge F32 -> condFltReg GE x y
 978       MO_S_Lt F32 -> condFltReg LTT x y
 979       MO_S_Le F32 -> condFltReg LE x y
 980
 981       MO_Eq F64   -> condFltReg EQQ x y
 982       MO_Ne F64   -> condFltReg NE x y
 983       MO_S_Gt F64 -> condFltReg GTT x y
 984       MO_S_Ge F64 -> condFltReg GE x y
 985       MO_S_Lt F64 -> condFltReg LTT x y
 986       MO_S_Le F64 -> condFltReg LE x y
 987
 988       MO_Eq rep   -> condIntReg EQQ x y
 989       MO_Ne rep   -> condIntReg NE x y
 990
 991       MO_S_Gt rep -> condIntReg GTT x y
 992       MO_S_Ge rep -> condIntReg GE x y
 993       MO_S_Lt rep -> condIntReg LTT x y
 994       MO_S_Le rep -> condIntReg LE x y
 995
 996       MO_U_Gt rep -> condIntReg GU  x y
 997       MO_U_Ge rep -> condIntReg GEU x y
 998       MO_U_Lt rep -> condIntReg LU  x y
 999       MO_U_Le rep -> condIntReg LEU x y
1000
1001 #if i386_TARGET_ARCH
1002       MO_Add F32 -> trivialFCode F32 GADD x y
1003       MO_Sub F32 -> trivialFCode F32 GSUB x y
1004
1005       MO_Add F64 -> trivialFCode F64 GADD x y
1006       MO_Sub F64 -> trivialFCode F64 GSUB x y
1007
1008       MO_S_Quot F32 -> trivialFCode F32 GDIV x y
1009       MO_S_Quot F64 -> trivialFCode F64 GDIV x y
1010 #endif
1011
1012 #if x86_64_TARGET_ARCH
1013       MO_Add F32 -> trivialFCode F32 ADD x y
1014       MO_Sub F32 -> trivialFCode F32 SUB x y
1015
1016       MO_Add F64 -> trivialFCode F64 ADD x y
1017       MO_Sub F64 -> trivialFCode F64 SUB x y
1018
1019       MO_S_Quot F32 -> trivialFCode F32 FDIV x y
1020       MO_S_Quot F64 -> trivialFCode F64 FDIV x y
1021 #endif
1022
1023       MO_Add rep -> add_code rep x y
1024       MO_Sub rep -> sub_code rep x y
1025
1026       MO_S_Quot rep -> div_code rep True  True  x y
1027       MO_S_Rem  rep -> div_code rep True  False x y
1028       MO_U_Quot rep -> div_code rep False True  x y
1029       MO_U_Rem  rep -> div_code rep False False x y
1030
1031 #if i386_TARGET_ARCH
1032       MO_Mul   F32 -> trivialFCode F32 GMUL x y
1033       MO_Mul   F64 -> trivialFCode F64 GMUL x y
1034 #endif
1035
1036 #if x86_64_TARGET_ARCH
1037       MO_Mul   F32 -> trivialFCode F32 MUL x y
1038       MO_Mul   F64 -> trivialFCode F64 MUL x y
1039 #endif
1040
1041       MO_Mul   rep -> let op = IMUL rep in
1042                       trivialCode rep op (Just op) x y
1043
1044       MO_S_MulMayOflo rep -> imulMayOflo rep x y
1045
1046       MO_And rep -> let op = AND rep in
1047                     trivialCode rep op (Just op) x y
1048       MO_Or  rep -> let op = OR  rep in
1049                     trivialCode rep op (Just op) x y
1050       MO_Xor rep -> let op = XOR rep in
1051                     trivialCode rep op (Just op) x y
1052
1053         {- Shift ops on x86s have constraints on their source, it
1054            either has to be Imm, CL or 1
1055             => trivialCode is not restrictive enough (sigh.)
1056         -}
1057       MO_Shl rep   -> shift_code rep (SHL rep) x y {-False-}
1058       MO_U_Shr rep -> shift_code rep (SHR rep) x y {-False-}
1059       MO_S_Shr rep -> shift_code rep (SAR rep) x y {-False-}
1060
1061       other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
1062   where
1063     --------------------
1064     imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1065     imulMayOflo rep a b = do
1066          (a_reg, a_code) <- getNonClobberedReg a
1067          b_code <- getAnyReg b
1068          let
1069              shift_amt  = case rep of
1070                            I32 -> 31
1071                            I64 -> 63
1072                            _ -> panic "shift_amt"
1073
1074              code = a_code `appOL` b_code eax `appOL`
1075                         toOL [
1076                            IMUL2 rep (OpReg a_reg),   -- result in %edx:%eax
1077                            SAR rep (OpImm (ImmInt shift_amt)) (OpReg eax),
1078                                 -- sign extend lower part
1079                            SUB rep (OpReg edx) (OpReg eax)
1080                                 -- compare against upper
1081                            -- eax==0 if high part == sign extended low part
1082                         ]
1083          -- in
1084          return (Fixed rep eax code)
1085
1086     --------------------
1087     shift_code :: MachRep
1088                -> (Operand -> Operand -> Instr)
1089                -> CmmExpr
1090                -> CmmExpr
1091                -> NatM Register
1092
1093     {- Case1: shift length as immediate -}
1094     shift_code rep instr x y@(CmmLit lit) = do
1095           x_code <- getAnyReg x
1096           let
1097                code dst
1098                   = x_code dst `snocOL`
1099                     instr (OpImm (litToImm lit)) (OpReg dst)
1100           -- in
1101           return (Any rep code)
1102
1103     {- Case2: shift length is complex (non-immediate) -}
1104     shift_code rep instr x y{-amount-} = do
1105         (x_reg, x_code) <- getNonClobberedReg x
1106         y_code <- getAnyReg y
1107         let
1108            code = x_code `appOL`
1109                   y_code ecx `snocOL`
1110                   instr (OpReg ecx) (OpReg x_reg)
1111         -- in
1112         return (Fixed rep x_reg code)
1113
1114     --------------------
1115     add_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1116     add_code rep x (CmmLit (CmmInt y _))
1117         | not (is64BitInteger y) = add_int rep x y
1118     add_code rep x y = trivialCode rep (ADD rep) (Just (ADD rep)) x y
1119
1120     --------------------
1121     sub_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1122     sub_code rep x (CmmLit (CmmInt y _))
1123         | not (is64BitInteger (-y)) = add_int rep x (-y)
1124     sub_code rep x y = trivialCode rep (SUB rep) Nothing x y
1125
1126     -- our three-operand add instruction:
1127     add_int rep x y = do
1128         (x_reg, x_code) <- getSomeReg x
1129         let
1130             imm = ImmInt (fromInteger y)
1131             code dst
1132                = x_code `snocOL`
1133                  LEA rep
1134                         (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
1135                         (OpReg dst)
1136         --
1137         return (Any rep code)
1138
1139     ----------------------
1140     div_code rep signed quotient x y = do
1141            (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
1142            x_code <- getAnyReg x
1143            let
1144              widen | signed    = CLTD rep
1145                    | otherwise = XOR rep (OpReg edx) (OpReg edx)
1146
1147              instr | signed    = IDIV
1148                    | otherwise = DIV
1149
1150              code = y_code `appOL`
1151                     x_code eax `appOL`
1152                     toOL [widen, instr rep y_op]
1153
1154              result | quotient  = eax
1155                     | otherwise = edx
1156
1157            -- in
1158            return (Fixed rep result code)
1159
1160
1161 getRegister (CmmLoad mem pk)
1162   | isFloatingRep pk
1163   = do
1164     Amode src mem_code <- getAmode mem
1165     let
1166         code dst = mem_code `snocOL`
1167                    IF_ARCH_i386(GLD pk src dst,
1168                                 MOV pk (OpAddr src) (OpReg dst))
1169     --
1170     return (Any pk code)
1171
1172 #if i386_TARGET_ARCH
1173 getRegister (CmmLoad mem pk)
1174   | pk /= I64
1175   = do
1176     code <- intLoadCode (instr pk) mem
1177     return (Any pk code)
1178   where
1179         instr I8  = MOVZxL pk
1180         instr I16 = MOV I16
1181         instr I32 = MOV I32
1182         -- we always zero-extend 8-bit loads, if we
1183         -- can't think of anything better.  This is because
1184         -- we can't guarantee access to an 8-bit variant of every register
1185         -- (esi and edi don't have 8-bit variants), so to make things
1186         -- simpler we do our 8-bit arithmetic with full 32-bit registers.
1187 #endif
1188
1189 #if x86_64_TARGET_ARCH
1190 -- Simpler memory load code on x86_64
1191 getRegister (CmmLoad mem pk)
1192   = do
1193     code <- intLoadCode (MOV pk) mem
1194     return (Any pk code)
1195 #endif
1196
1197 getRegister (CmmLit (CmmInt 0 rep))
1198   = let
1199         -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
1200         adj_rep = case rep of I64 -> I32; _ -> rep
1201         rep1 = IF_ARCH_i386( rep, adj_rep )
1202         code dst
1203            = unitOL (XOR rep1 (OpReg dst) (OpReg dst))
1204     in
1205         return (Any rep code)
1206
1207 #if x86_64_TARGET_ARCH
1208   -- optimisation for loading small literals on x86_64: take advantage
1209   -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
1210   -- instruction forms are shorter.
1211 getRegister (CmmLit lit)
1212   | I64 <- cmmLitRep lit, not (isBigLit lit)
1213   = let
1214         imm = litToImm lit
1215         code dst = unitOL (MOV I32 (OpImm imm) (OpReg dst))
1216     in
1217         return (Any I64 code)
1218   where
1219    isBigLit (CmmInt i I64) = i < 0 || i > 0xffffffff
1220    isBigLit _ = False
1221         -- note1: not the same as is64BitLit, because that checks for
1222         -- signed literals that fit in 32 bits, but we want unsigned
1223         -- literals here.
1224         -- note2: all labels are small, because we're assuming the
1225         -- small memory model (see gcc docs, -mcmodel=small).
1226 #endif
1227
1228 getRegister (CmmLit lit)
1229   = let
1230         rep = cmmLitRep lit
1231         imm = litToImm lit
1232         code dst = unitOL (MOV rep (OpImm imm) (OpReg dst))
1233     in
1234         return (Any rep code)
1235
1236 getRegister other = pprPanic "getRegister(x86)" (ppr other)
1237
1238
1239 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
1240    -> NatM (Reg -> InstrBlock)
1241 intLoadCode instr mem = do
1242   Amode src mem_code <- getAmode mem
1243   return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
1244
1245 -- Compute an expression into *any* register, adding the appropriate
1246 -- move instruction if necessary.
1247 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
1248 getAnyReg expr = do
1249   r <- getRegister expr
1250   anyReg r
1251
1252 anyReg :: Register -> NatM (Reg -> InstrBlock)
1253 anyReg (Any _ code)          = return code
1254 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
1255
1256 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
1257 -- Fixed registers might not be byte-addressable, so we make sure we've
1258 -- got a temporary, inserting an extra reg copy if necessary.
1259 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
1260 #if x86_64_TARGET_ARCH
1261 getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
1262 #else
1263 getByteReg expr = do
1264   r <- getRegister expr
1265   case r of
1266     Any rep code -> do
1267         tmp <- getNewRegNat rep
1268         return (tmp, code tmp)
1269     Fixed rep reg code
1270         | isVirtualReg reg -> return (reg,code)
1271         | otherwise -> do
1272             tmp <- getNewRegNat rep
1273             return (tmp, code `snocOL` reg2reg rep reg tmp)
1274         -- ToDo: could optimise slightly by checking for byte-addressable
1275         -- real registers, but that will happen very rarely if at all.
1276 #endif
1277
1278 -- Another variant: this time we want the result in a register that cannot
1279 -- be modified by code to evaluate an arbitrary expression.
1280 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1281 getNonClobberedReg expr = do
1282   r <- getRegister expr
1283   case r of
1284     Any rep code -> do
1285         tmp <- getNewRegNat rep
1286         return (tmp, code tmp)
1287     Fixed rep reg code
1288         -- only free regs can be clobbered
1289         | RealReg rr <- reg, isFastTrue (freeReg rr) -> do
1290                 tmp <- getNewRegNat rep
1291                 return (tmp, code `snocOL` reg2reg rep reg tmp)
1292         | otherwise ->
1293                 return (reg, code)
1294
1295 reg2reg :: MachRep -> Reg -> Reg -> Instr
1296 reg2reg rep src dst
1297 #if i386_TARGET_ARCH
1298   | isFloatingRep rep = GMOV src dst
1299 #endif
1300   | otherwise         = MOV rep (OpReg src) (OpReg dst)
1301
1302 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1303
1304 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1305
1306 #if sparc_TARGET_ARCH
1307
1308 getRegister (CmmLit (CmmFloat f F32)) = do
1309     lbl <- getNewLabelNat
1310     let code dst = toOL [
1311             LDATA ReadOnlyData
1312                         [CmmDataLabel lbl,
1313                          CmmStaticLit (CmmFloat f F32)],
1314             SETHI (HI (ImmCLbl lbl)) dst,
1315             LD F32 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1316     return (Any F32 code)
1317
1318 getRegister (CmmLit (CmmFloat d F64)) = do
1319     lbl <- getNewLabelNat
1320     let code dst = toOL [
1321             LDATA ReadOnlyData
1322                         [CmmDataLabel lbl,
1323                          CmmStaticLit (CmmFloat d F64)],
1324             SETHI (HI (ImmCLbl lbl)) dst,
1325             LD F64 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1326     return (Any F64 code)
1327
1328 getRegister (CmmMachOp mop [x]) -- unary MachOps
1329   = case mop of
1330       MO_S_Neg F32     -> trivialUFCode F32 (FNEG F32) x
1331       MO_S_Neg F64     -> trivialUFCode F64 (FNEG F64) x
1332
1333       MO_S_Neg rep     -> trivialUCode rep (SUB False False g0) x
1334       MO_Not rep       -> trivialUCode rep (XNOR False g0) x
1335
1336       MO_U_Conv I32 I8 -> trivialCode I8 (AND False) x (CmmLit (CmmInt 255 I8))
1337
1338       MO_U_Conv F64 F32-> coerceDbl2Flt x
1339       MO_U_Conv F32 F64-> coerceFlt2Dbl x
1340
1341       MO_S_Conv F32 I32-> coerceFP2Int F32 I32 x
1342       MO_S_Conv I32 F32-> coerceInt2FP I32 F32 x
1343       MO_S_Conv F64 I32-> coerceFP2Int F64 I32 x
1344       MO_S_Conv I32 F64-> coerceInt2FP I32 F64 x
1345
1346       -- Conversions which are a nop on sparc
1347       MO_U_Conv from to
1348         | from == to   -> conversionNop to   x
1349       MO_U_Conv I32 to -> conversionNop to   x
1350       MO_S_Conv I32 to -> conversionNop to   x
1351
1352       -- widenings
1353       MO_U_Conv I8 I32  -> integerExtend False I8 I32  x
1354       MO_U_Conv I16 I32 -> integerExtend False I16 I32 x
1355       MO_U_Conv I8 I16  -> integerExtend False I8 I16  x
1356       MO_S_Conv I16 I32 -> integerExtend True I16 I32  x
1357
1358       other_op -> panic "Unknown unary mach op"
1359     where
1360         -- XXX SLL/SRL?
1361         integerExtend signed from to expr = do
1362            (reg, e_code) <- getSomeReg expr
1363            let
1364                code dst =
1365                    e_code `snocOL`
1366                    ((if signed then SRA else SRL)
1367                           reg (RIImm (ImmInt 0)) dst)
1368            return (Any to code)
1369         conversionNop new_rep expr
1370             = do e_code <- getRegister expr
1371                  return (swizzleRegisterRep e_code new_rep)
1372
1373 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1374   = case mop of
1375       MO_Eq F32 -> condFltReg EQQ x y
1376       MO_Ne F32 -> condFltReg NE x y
1377
1378       MO_S_Gt F32 -> condFltReg GTT x y
1379       MO_S_Ge F32 -> condFltReg GE x y
1380       MO_S_Lt F32 -> condFltReg LTT x y
1381       MO_S_Le F32 -> condFltReg LE x y
1382
1383       MO_Eq F64 -> condFltReg EQQ x y
1384       MO_Ne F64 -> condFltReg NE x y
1385
1386       MO_S_Gt F64 -> condFltReg GTT x y
1387       MO_S_Ge F64 -> condFltReg GE x y
1388       MO_S_Lt F64 -> condFltReg LTT x y
1389       MO_S_Le F64 -> condFltReg LE x y
1390
1391       MO_Eq rep -> condIntReg EQQ x y
1392       MO_Ne rep -> condIntReg NE x y
1393
1394       MO_S_Gt rep -> condIntReg GTT x y
1395       MO_S_Ge rep -> condIntReg GE x y
1396       MO_S_Lt rep -> condIntReg LTT x y
1397       MO_S_Le rep -> condIntReg LE x y
1398
1399       MO_U_Gt I32  -> condIntReg GTT x y
1400       MO_U_Ge I32  -> condIntReg GE x y
1401       MO_U_Lt I32  -> condIntReg LTT x y
1402       MO_U_Le I32  -> condIntReg LE x y
1403
1404       MO_U_Gt I16 -> condIntReg GU  x y
1405       MO_U_Ge I16 -> condIntReg GEU x y
1406       MO_U_Lt I16 -> condIntReg LU  x y
1407       MO_U_Le I16 -> condIntReg LEU x y
1408
1409       MO_Add I32 -> trivialCode I32 (ADD False False) x y
1410       MO_Sub I32 -> trivialCode I32 (SUB False False) x y
1411
1412       MO_S_MulMayOflo rep -> imulMayOflo rep x y
1413 {-
1414       -- ToDo: teach about V8+ SPARC div instructions
1415       MO_S_Quot I32 -> idiv FSLIT(".div")  x y
1416       MO_S_Rem I32  -> idiv FSLIT(".rem")  x y
1417       MO_U_Quot I32 -> idiv FSLIT(".udiv")  x y
1418       MO_U_Rem I32  -> idiv FSLIT(".urem")  x y
1419 -}
1420       MO_Add F32  -> trivialFCode F32 FADD  x y
1421       MO_Sub F32   -> trivialFCode F32  FSUB x y
1422       MO_Mul F32   -> trivialFCode F32  FMUL  x y
1423       MO_S_Quot F32   -> trivialFCode F32  FDIV x y
1424
1425       MO_Add F64   -> trivialFCode F64 FADD  x y
1426       MO_Sub F64   -> trivialFCode F64  FSUB x y
1427       MO_Mul F64   -> trivialFCode F64  FMUL x y
1428       MO_S_Quot F64   -> trivialFCode F64  FDIV x y
1429
1430       MO_And rep   -> trivialCode rep (AND False) x y
1431       MO_Or rep    -> trivialCode rep (OR  False) x y
1432       MO_Xor rep   -> trivialCode rep (XOR False) x y
1433
1434       MO_Mul rep -> trivialCode rep (SMUL False) x y
1435
1436       MO_Shl rep   -> trivialCode rep SLL  x y
1437       MO_U_Shr rep   -> trivialCode rep SRL x y
1438       MO_S_Shr rep   -> trivialCode rep SRA x y
1439
1440 {-
1441       MO_F32_Pwr  -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1442                                          [promote x, promote y])
1443                        where promote x = CmmMachOp MO_F32_to_Dbl [x]
1444       MO_F64_Pwr -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1445                                         [x, y])
1446 -}
1447       other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
1448   where
1449     --idiv fn x y = getRegister (StCall (Left fn) CCallConv I32 [x, y])
1450
1451     --------------------
1452     imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1453     imulMayOflo rep a b = do
1454          (a_reg, a_code) <- getSomeReg a
1455          (b_reg, b_code) <- getSomeReg b
1456          res_lo <- getNewRegNat I32
1457          res_hi <- getNewRegNat I32
1458          let
1459             shift_amt  = case rep of
1460                           I32 -> 31
1461                           I64 -> 63
1462                           _ -> panic "shift_amt"
1463             code dst = a_code `appOL` b_code `appOL`
1464                        toOL [
1465                            SMUL False a_reg (RIReg b_reg) res_lo,
1466                            RDY res_hi,
1467                            SRA res_lo (RIImm (ImmInt shift_amt)) res_lo,
1468                            SUB False False res_lo (RIReg res_hi) dst
1469                         ]
1470          return (Any I32 code)
1471
1472 getRegister (CmmLoad mem pk) = do
1473     Amode src code <- getAmode mem
1474     let
1475         code__2 dst = code `snocOL` LD pk src dst
1476     return (Any pk code__2)
1477
1478 getRegister (CmmLit (CmmInt i _))
1479   | fits13Bits i
1480   = let
1481         src = ImmInt (fromInteger i)
1482         code dst = unitOL (OR False g0 (RIImm src) dst)
1483     in
1484         return (Any I32 code)
1485
1486 getRegister (CmmLit lit)
1487   = let rep = cmmLitRep lit
1488         imm = litToImm lit
1489         code dst = toOL [
1490             SETHI (HI imm) dst,
1491             OR False dst (RIImm (LO imm)) dst]
1492     in return (Any I32 code)
1493
1494 #endif /* sparc_TARGET_ARCH */
1495
1496 #if powerpc_TARGET_ARCH
1497 getRegister (CmmLoad mem pk)
1498   | pk /= I64
1499   = do
1500         Amode addr addr_code <- getAmode mem
1501         let code dst = ASSERT((regClass dst == RcDouble) == isFloatingRep pk)
1502                        addr_code `snocOL` LD pk dst addr
1503         return (Any pk code)
1504
1505 -- catch simple cases of zero- or sign-extended load
1506 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad mem _]) = do
1507     Amode addr addr_code <- getAmode mem
1508     return (Any I32 (\dst -> addr_code `snocOL` LD I8 dst addr))
1509
1510 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
1511
1512 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad mem _]) = do
1513     Amode addr addr_code <- getAmode mem
1514     return (Any I32 (\dst -> addr_code `snocOL` LD I16 dst addr))
1515
1516 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad mem _]) = do
1517     Amode addr addr_code <- getAmode mem
1518     return (Any I32 (\dst -> addr_code `snocOL` LA I16 dst addr))
1519
1520 getRegister (CmmMachOp mop [x]) -- unary MachOps
1521   = case mop of
1522       MO_Not rep   -> trivialUCode rep NOT x
1523
1524       MO_S_Conv F64 F32 -> trivialUCode F32 FRSP x
1525       MO_S_Conv F32 F64 -> conversionNop F64 x
1526
1527       MO_S_Conv from to
1528         | from == to         -> conversionNop to x
1529         | isFloatingRep from -> coerceFP2Int from to x
1530         | isFloatingRep to   -> coerceInt2FP from to x
1531
1532         -- narrowing is a nop: we treat the high bits as undefined
1533       MO_S_Conv I32 to -> conversionNop to x
1534       MO_S_Conv I16 I8 -> conversionNop I8 x
1535       MO_S_Conv I8 to -> trivialUCode to (EXTS I8) x
1536       MO_S_Conv I16 to -> trivialUCode to (EXTS I16) x
1537
1538       MO_U_Conv from to
1539         | from == to -> conversionNop to x
1540         -- narrowing is a nop: we treat the high bits as undefined
1541       MO_U_Conv I32 to -> conversionNop to x
1542       MO_U_Conv I16 I8 -> conversionNop I8 x
1543       MO_U_Conv I8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 I32))
1544       MO_U_Conv I16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 I32))
1545
1546       MO_S_Neg F32      -> trivialUCode F32 FNEG x
1547       MO_S_Neg F64      -> trivialUCode F64 FNEG x
1548       MO_S_Neg rep      -> trivialUCode rep NEG x
1549
1550     where
1551         conversionNop new_rep expr
1552             = do e_code <- getRegister expr
1553                  return (swizzleRegisterRep e_code new_rep)
1554
1555 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1556   = case mop of
1557       MO_Eq F32 -> condFltReg EQQ x y
1558       MO_Ne F32 -> condFltReg NE  x y
1559
1560       MO_S_Gt F32 -> condFltReg GTT x y
1561       MO_S_Ge F32 -> condFltReg GE  x y
1562       MO_S_Lt F32 -> condFltReg LTT x y
1563       MO_S_Le F32 -> condFltReg LE  x y
1564
1565       MO_Eq F64 -> condFltReg EQQ x y
1566       MO_Ne F64 -> condFltReg NE  x y
1567
1568       MO_S_Gt F64 -> condFltReg GTT x y
1569       MO_S_Ge F64 -> condFltReg GE  x y
1570       MO_S_Lt F64 -> condFltReg LTT x y
1571       MO_S_Le F64 -> condFltReg LE  x y
1572
1573       MO_Eq rep -> condIntReg EQQ  (extendUExpr rep x) (extendUExpr rep y)
1574       MO_Ne rep -> condIntReg NE   (extendUExpr rep x) (extendUExpr rep y)
1575
1576       MO_S_Gt rep -> condIntReg GTT  (extendSExpr rep x) (extendSExpr rep y)
1577       MO_S_Ge rep -> condIntReg GE   (extendSExpr rep x) (extendSExpr rep y)
1578       MO_S_Lt rep -> condIntReg LTT  (extendSExpr rep x) (extendSExpr rep y)
1579       MO_S_Le rep -> condIntReg LE   (extendSExpr rep x) (extendSExpr rep y)
1580
1581       MO_U_Gt rep -> condIntReg GU   (extendUExpr rep x) (extendUExpr rep y)
1582       MO_U_Ge rep -> condIntReg GEU  (extendUExpr rep x) (extendUExpr rep y)
1583       MO_U_Lt rep -> condIntReg LU   (extendUExpr rep x) (extendUExpr rep y)
1584       MO_U_Le rep -> condIntReg LEU  (extendUExpr rep x) (extendUExpr rep y)
1585
1586       MO_Add F32   -> trivialCodeNoImm F32 (FADD F32) x y
1587       MO_Sub F32   -> trivialCodeNoImm F32 (FSUB F32) x y
1588       MO_Mul F32   -> trivialCodeNoImm F32 (FMUL F32) x y
1589       MO_S_Quot F32   -> trivialCodeNoImm F32 (FDIV F32) x y
1590
1591       MO_Add F64   -> trivialCodeNoImm F64 (FADD F64) x y
1592       MO_Sub F64   -> trivialCodeNoImm F64 (FSUB F64) x y
1593       MO_Mul F64   -> trivialCodeNoImm F64 (FMUL F64) x y
1594       MO_S_Quot F64   -> trivialCodeNoImm F64 (FDIV F64) x y
1595
1596          -- optimize addition with 32-bit immediate
1597          -- (needed for PIC)
1598       MO_Add I32 ->
1599         case y of
1600           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate I32 True (-imm)
1601             -> trivialCode I32 True ADD x (CmmLit $ CmmInt imm immrep)
1602           CmmLit lit
1603             -> do
1604                 (src, srcCode) <- getSomeReg x
1605                 let imm = litToImm lit
1606                     code dst = srcCode `appOL` toOL [
1607                                     ADDIS dst src (HA imm),
1608                                     ADD dst dst (RIImm (LO imm))
1609                                 ]
1610                 return (Any I32 code)
1611           _ -> trivialCode I32 True ADD x y
1612
1613       MO_Add rep -> trivialCode rep True ADD x y
1614       MO_Sub rep ->
1615         case y of    -- subfi ('substract from' with immediate) doesn't exist
1616           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
1617             -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
1618           _ -> trivialCodeNoImm rep SUBF y x
1619
1620       MO_Mul rep -> trivialCode rep True MULLW x y
1621
1622       MO_S_MulMayOflo I32 -> trivialCodeNoImm I32 MULLW_MayOflo x y
1623
1624       MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= I32): not implemented"
1625       MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
1626
1627       MO_S_Quot rep -> trivialCodeNoImm rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1628       MO_U_Quot rep -> trivialCodeNoImm rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1629
1630       MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1631       MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1632
1633       MO_And rep   -> trivialCode rep False AND x y
1634       MO_Or rep    -> trivialCode rep False OR x y
1635       MO_Xor rep   -> trivialCode rep False XOR x y
1636
1637       MO_Shl rep   -> trivialCode rep False SLW x y
1638       MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
1639       MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
1640
1641 getRegister (CmmLit (CmmInt i rep))
1642   | Just imm <- makeImmediate rep True i
1643   = let
1644         code dst = unitOL (LI dst imm)
1645     in
1646         return (Any rep code)
1647
1648 getRegister (CmmLit (CmmFloat f frep)) = do
1649     lbl <- getNewLabelNat
1650     dynRef <- cmmMakeDynamicReference addImportNat False lbl
1651     Amode addr addr_code <- getAmode dynRef
1652     let code dst =
1653             LDATA ReadOnlyData  [CmmDataLabel lbl,
1654                                  CmmStaticLit (CmmFloat f frep)]
1655             `consOL` (addr_code `snocOL` LD frep dst addr)
1656     return (Any frep code)
1657
1658 getRegister (CmmLit lit)
1659   = let rep = cmmLitRep lit
1660         imm = litToImm lit
1661         code dst = toOL [
1662               LIS dst (HI imm),
1663               OR dst dst (RIImm (LO imm))
1664           ]
1665     in return (Any rep code)
1666
1667 getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
1668
1669     -- extend?Rep: wrap integer expression of type rep
1670     -- in a conversion to I32
1671 extendSExpr I32 x = x
1672 extendSExpr rep x = CmmMachOp (MO_S_Conv rep I32) [x]
1673 extendUExpr I32 x = x
1674 extendUExpr rep x = CmmMachOp (MO_U_Conv rep I32) [x]
1675
1676 #endif /* powerpc_TARGET_ARCH */
1677
1678
1679 -- -----------------------------------------------------------------------------
1680 --  The 'Amode' type: Memory addressing modes passed up the tree.
1681
1682 data Amode = Amode AddrMode InstrBlock
1683
1684 {-
1685 Now, given a tree (the argument to an CmmLoad) that references memory,
1686 produce a suitable addressing mode.
1687
1688 A Rule of the Game (tm) for Amodes: use of the addr bit must
1689 immediately follow use of the code part, since the code part puts
1690 values in registers which the addr then refers to.  So you can't put
1691 anything in between, lest it overwrite some of those registers.  If
1692 you need to do some other computation between the code part and use of
1693 the addr bit, first store the effective address from the amode in a
1694 temporary, then do the other computation, and then use the temporary:
1695
1696     code
1697     LEA amode, tmp
1698     ... other computation ...
1699     ... (tmp) ...
1700 -}
1701
1702 getAmode :: CmmExpr -> NatM Amode
1703 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
1704
1705 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1706
1707 #if alpha_TARGET_ARCH
1708
1709 getAmode (StPrim IntSubOp [x, StInt i])
1710   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1711     getRegister x               `thenNat` \ register ->
1712     let
1713         code = registerCode register tmp
1714         reg  = registerName register tmp
1715         off  = ImmInt (-(fromInteger i))
1716     in
1717     return (Amode (AddrRegImm reg off) code)
1718
1719 getAmode (StPrim IntAddOp [x, StInt i])
1720   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1721     getRegister x               `thenNat` \ register ->
1722     let
1723         code = registerCode register tmp
1724         reg  = registerName register tmp
1725         off  = ImmInt (fromInteger i)
1726     in
1727     return (Amode (AddrRegImm reg off) code)
1728
1729 getAmode leaf
1730   | isJust imm
1731   = return (Amode (AddrImm imm__2) id)
1732   where
1733     imm = maybeImm leaf
1734     imm__2 = case imm of Just x -> x
1735
1736 getAmode other
1737   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1738     getRegister other           `thenNat` \ register ->
1739     let
1740         code = registerCode register tmp
1741         reg  = registerName register tmp
1742     in
1743     return (Amode (AddrReg reg) code)
1744
1745 #endif /* alpha_TARGET_ARCH */
1746
1747 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1748
1749 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1750
1751 -- This is all just ridiculous, since it carefully undoes
1752 -- what mangleIndexTree has just done.
1753 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
1754   | not (is64BitLit lit)
1755   -- ASSERT(rep == I32)???
1756   = do (x_reg, x_code) <- getSomeReg x
1757        let off = ImmInt (-(fromInteger i))
1758        return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1759
1760 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
1761   | not (is64BitLit lit)
1762   -- ASSERT(rep == I32)???
1763   = do (x_reg, x_code) <- getSomeReg x
1764        let off = ImmInt (fromInteger i)
1765        return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1766
1767 -- Turn (lit1 << n  + lit2) into  (lit2 + lit1 << n) so it will be
1768 -- recognised by the next rule.
1769 getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1770                                   b@(CmmLit _)])
1771   = getAmode (CmmMachOp (MO_Add rep) [b,a])
1772
1773 getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
1774                                         [y, CmmLit (CmmInt shift _)]])
1775   | shift == 0 || shift == 1 || shift == 2 || shift == 3
1776   = do (x_reg, x_code) <- getNonClobberedReg x
1777         -- x must be in a temp, because it has to stay live over y_code
1778         -- we could compre x_reg and y_reg and do something better here...
1779        (y_reg, y_code) <- getSomeReg y
1780        let
1781            code = x_code `appOL` y_code
1782            base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
1783        return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt 0))
1784                code)
1785
1786 getAmode (CmmLit lit) | not (is64BitLit lit)
1787   = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1788
1789 getAmode expr = do
1790   (reg,code) <- getSomeReg expr
1791   return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
1792
1793 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1794
1795 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1796
1797 #if sparc_TARGET_ARCH
1798
1799 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)])
1800   | fits13Bits (-i)
1801   = do
1802        (reg, code) <- getSomeReg x
1803        let
1804          off  = ImmInt (-(fromInteger i))
1805        return (Amode (AddrRegImm reg off) code)
1806
1807
1808 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)])
1809   | fits13Bits i
1810   = do
1811        (reg, code) <- getSomeReg x
1812        let
1813          off  = ImmInt (fromInteger i)
1814        return (Amode (AddrRegImm reg off) code)
1815
1816 getAmode (CmmMachOp (MO_Add rep) [x, y])
1817   = do
1818     (regX, codeX) <- getSomeReg x
1819     (regY, codeY) <- getSomeReg y
1820     let
1821         code = codeX `appOL` codeY
1822     return (Amode (AddrRegReg regX regY) code)
1823
1824 -- XXX Is this same as "leaf" in Stix?
1825 getAmode (CmmLit lit)
1826   = do
1827       tmp <- getNewRegNat I32
1828       let
1829         code = unitOL (SETHI (HI imm__2) tmp)
1830       return (Amode (AddrRegImm tmp (LO imm__2)) code)
1831       where
1832          imm__2 = litToImm lit
1833
1834 getAmode other
1835   = do
1836        (reg, code) <- getSomeReg other
1837        let
1838             off  = ImmInt 0
1839        return (Amode (AddrRegImm reg off) code)
1840
1841 #endif /* sparc_TARGET_ARCH */
1842
1843 #ifdef powerpc_TARGET_ARCH
1844 getAmode (CmmMachOp (MO_Sub I32) [x, CmmLit (CmmInt i _)])
1845   | Just off <- makeImmediate I32 True (-i)
1846   = do
1847         (reg, code) <- getSomeReg x
1848         return (Amode (AddrRegImm reg off) code)
1849
1850
1851 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit (CmmInt i _)])
1852   | Just off <- makeImmediate I32 True i
1853   = do
1854         (reg, code) <- getSomeReg x
1855         return (Amode (AddrRegImm reg off) code)
1856
1857    -- optimize addition with 32-bit immediate
1858    -- (needed for PIC)
1859 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit lit])
1860   = do
1861         tmp <- getNewRegNat I32
1862         (src, srcCode) <- getSomeReg x
1863         let imm = litToImm lit
1864             code = srcCode `snocOL` ADDIS tmp src (HA imm)
1865         return (Amode (AddrRegImm tmp (LO imm)) code)
1866
1867 getAmode (CmmLit lit)
1868   = do
1869         tmp <- getNewRegNat I32
1870         let imm = litToImm lit
1871             code = unitOL (LIS tmp (HA imm))
1872         return (Amode (AddrRegImm tmp (LO imm)) code)
1873
1874 getAmode (CmmMachOp (MO_Add I32) [x, y])
1875   = do
1876         (regX, codeX) <- getSomeReg x
1877         (regY, codeY) <- getSomeReg y
1878         return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
1879
1880 getAmode other
1881   = do
1882         (reg, code) <- getSomeReg other
1883         let
1884             off  = ImmInt 0
1885         return (Amode (AddrRegImm reg off) code)
1886 #endif /* powerpc_TARGET_ARCH */
1887
1888 -- -----------------------------------------------------------------------------
1889 -- getOperand: sometimes any operand will do.
1890
1891 -- getNonClobberedOperand: the value of the operand will remain valid across
1892 -- the computation of an arbitrary expression, unless the expression
1893 -- is computed directly into a register which the operand refers to
1894 -- (see trivialCode where this function is used for an example).
1895
1896 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1897
1898 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1899 #if x86_64_TARGET_ARCH
1900 getNonClobberedOperand (CmmLit lit)
1901   | isSuitableFloatingPointLit lit = do
1902     lbl <- getNewLabelNat
1903     let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
1904                                            CmmStaticLit lit])
1905     return (OpAddr (ripRel (ImmCLbl lbl)), code)
1906 #endif
1907 getNonClobberedOperand (CmmLit lit)
1908   | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) =
1909     return (OpImm (litToImm lit), nilOL)
1910 getNonClobberedOperand (CmmLoad mem pk)
1911   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
1912     Amode src mem_code <- getAmode mem
1913     (src',save_code) <-
1914         if (amodeCouldBeClobbered src)
1915                 then do
1916                    tmp <- getNewRegNat wordRep
1917                    return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
1918                            unitOL (LEA I32 (OpAddr src) (OpReg tmp)))
1919                 else
1920                    return (src, nilOL)
1921     return (OpAddr src', save_code `appOL` mem_code)
1922 getNonClobberedOperand e = do
1923     (reg, code) <- getNonClobberedReg e
1924     return (OpReg reg, code)
1925
1926 amodeCouldBeClobbered :: AddrMode -> Bool
1927 amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
1928
1929 regClobbered (RealReg rr) = isFastTrue (freeReg rr)
1930 regClobbered _ = False
1931
1932 -- getOperand: the operand is not required to remain valid across the
1933 -- computation of an arbitrary expression.
1934 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1935 #if x86_64_TARGET_ARCH
1936 getOperand (CmmLit lit)
1937   | isSuitableFloatingPointLit lit = do
1938     lbl <- getNewLabelNat
1939     let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
1940                                            CmmStaticLit lit])
1941     return (OpAddr (ripRel (ImmCLbl lbl)), code)
1942 #endif
1943 getOperand (CmmLit lit)
1944   | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) = do
1945     return (OpImm (litToImm lit), nilOL)
1946 getOperand (CmmLoad mem pk)
1947   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
1948     Amode src mem_code <- getAmode mem
1949     return (OpAddr src, mem_code)
1950 getOperand e = do
1951     (reg, code) <- getSomeReg e
1952     return (OpReg reg, code)
1953
1954 isOperand :: CmmExpr -> Bool
1955 isOperand (CmmLoad _ _) = True
1956 isOperand (CmmLit lit)  = not (is64BitLit lit)
1957                           || isSuitableFloatingPointLit lit
1958 isOperand _             = False
1959
1960 -- if we want a floating-point literal as an operand, we can
1961 -- use it directly from memory.  However, if the literal is
1962 -- zero, we're better off generating it into a register using
1963 -- xor.
1964 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
1965 isSuitableFloatingPointLit _ = False
1966
1967 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
1968 getRegOrMem (CmmLoad mem pk)
1969   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
1970     Amode src mem_code <- getAmode mem
1971     return (OpAddr src, mem_code)
1972 getRegOrMem e = do
1973     (reg, code) <- getNonClobberedReg e
1974     return (OpReg reg, code)
1975
1976 #if x86_64_TARGET_ARCH
1977 is64BitLit (CmmInt i I64) = is64BitInteger i
1978    -- assume that labels are in the range 0-2^31-1: this assumes the
1979    -- small memory model (see gcc docs, -mcmodel=small).
1980 #endif
1981 is64BitLit x = False
1982 #endif
1983
1984 is64BitInteger :: Integer -> Bool
1985 is64BitInteger i = i > 0x7fffffff || i < -0x80000000
1986
1987 -- -----------------------------------------------------------------------------
1988 --  The 'CondCode' type:  Condition codes passed up the tree.
1989
1990 data CondCode = CondCode Bool Cond InstrBlock
1991
1992 -- Set up a condition code for a conditional branch.
1993
1994 getCondCode :: CmmExpr -> NatM CondCode
1995
1996 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1997
1998 #if alpha_TARGET_ARCH
1999 getCondCode = panic "MachCode.getCondCode: not on Alphas"
2000 #endif /* alpha_TARGET_ARCH */
2001
2002 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2003
2004 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
2005 -- yes, they really do seem to want exactly the same!
2006
2007 getCondCode (CmmMachOp mop [x, y])
2008   = ASSERT (cmmExprRep x /= I8) -- tmp, not set up to handle 8-bit comparisons
2009     case mop of
2010       MO_Eq F32 -> condFltCode EQQ x y
2011       MO_Ne F32 -> condFltCode NE  x y
2012
2013       MO_S_Gt F32 -> condFltCode GTT x y
2014       MO_S_Ge F32 -> condFltCode GE  x y
2015       MO_S_Lt F32 -> condFltCode LTT x y
2016       MO_S_Le F32 -> condFltCode LE  x y
2017
2018       MO_Eq F64 -> condFltCode EQQ x y
2019       MO_Ne F64 -> condFltCode NE  x y
2020
2021       MO_S_Gt F64 -> condFltCode GTT x y
2022       MO_S_Ge F64 -> condFltCode GE  x y
2023       MO_S_Lt F64 -> condFltCode LTT x y
2024       MO_S_Le F64 -> condFltCode LE  x y
2025
2026       MO_Eq rep -> condIntCode EQQ  x y
2027       MO_Ne rep -> condIntCode NE   x y
2028
2029       MO_S_Gt rep -> condIntCode GTT  x y
2030       MO_S_Ge rep -> condIntCode GE   x y
2031       MO_S_Lt rep -> condIntCode LTT  x y
2032       MO_S_Le rep -> condIntCode LE   x y
2033
2034       MO_U_Gt rep -> condIntCode GU   x y
2035       MO_U_Ge rep -> condIntCode GEU  x y
2036       MO_U_Lt rep -> condIntCode LU   x y
2037       MO_U_Le rep -> condIntCode LEU  x y
2038
2039       other -> pprPanic "getCondCode(x86,sparc)" (pprMachOp mop)
2040
2041 getCondCode other =  pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
2042
2043 #elif powerpc_TARGET_ARCH
2044
2045 -- almost the same as everywhere else - but we need to
2046 -- extend small integers to 32 bit first
2047
2048 getCondCode (CmmMachOp mop [x, y])
2049   = case mop of
2050       MO_Eq F32 -> condFltCode EQQ x y
2051       MO_Ne F32 -> condFltCode NE  x y
2052
2053       MO_S_Gt F32 -> condFltCode GTT x y
2054       MO_S_Ge F32 -> condFltCode GE  x y
2055       MO_S_Lt F32 -> condFltCode LTT x y
2056       MO_S_Le F32 -> condFltCode LE  x y
2057
2058       MO_Eq F64 -> condFltCode EQQ x y
2059       MO_Ne F64 -> condFltCode NE  x y
2060
2061       MO_S_Gt F64 -> condFltCode GTT x y
2062       MO_S_Ge F64 -> condFltCode GE  x y
2063       MO_S_Lt F64 -> condFltCode LTT x y
2064       MO_S_Le F64 -> condFltCode LE  x y
2065
2066       MO_Eq rep -> condIntCode EQQ  (extendUExpr rep x) (extendUExpr rep y)
2067       MO_Ne rep -> condIntCode NE   (extendUExpr rep x) (extendUExpr rep y)
2068
2069       MO_S_Gt rep -> condIntCode GTT  (extendSExpr rep x) (extendSExpr rep y)
2070       MO_S_Ge rep -> condIntCode GE   (extendSExpr rep x) (extendSExpr rep y)
2071       MO_S_Lt rep -> condIntCode LTT  (extendSExpr rep x) (extendSExpr rep y)
2072       MO_S_Le rep -> condIntCode LE   (extendSExpr rep x) (extendSExpr rep y)
2073
2074       MO_U_Gt rep -> condIntCode GU   (extendUExpr rep x) (extendUExpr rep y)
2075       MO_U_Ge rep -> condIntCode GEU  (extendUExpr rep x) (extendUExpr rep y)
2076       MO_U_Lt rep -> condIntCode LU   (extendUExpr rep x) (extendUExpr rep y)
2077       MO_U_Le rep -> condIntCode LEU  (extendUExpr rep x) (extendUExpr rep y)
2078
2079       other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
2080
2081 getCondCode other =  panic "getCondCode(2)(powerpc)"
2082
2083
2084 #endif
2085
2086
2087 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
2088 -- passed back up the tree.
2089
2090 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
2091
2092 #if alpha_TARGET_ARCH
2093 condIntCode = panic "MachCode.condIntCode: not on Alphas"
2094 condFltCode = panic "MachCode.condFltCode: not on Alphas"
2095 #endif /* alpha_TARGET_ARCH */
2096
2097 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2098 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2099
2100 -- memory vs immediate
2101 condIntCode cond (CmmLoad x pk) (CmmLit lit) | not (is64BitLit lit) = do
2102     Amode x_addr x_code <- getAmode x
2103     let
2104         imm  = litToImm lit
2105         code = x_code `snocOL`
2106                   CMP pk (OpImm imm) (OpAddr x_addr)
2107     --
2108     return (CondCode False cond code)
2109
2110 -- anything vs zero
2111 condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
2112     (x_reg, x_code) <- getSomeReg x
2113     let
2114         code = x_code `snocOL`
2115                   TEST pk (OpReg x_reg) (OpReg x_reg)
2116     --
2117     return (CondCode False cond code)
2118
2119 -- anything vs operand
2120 condIntCode cond x y | isOperand y = do
2121     (x_reg, x_code) <- getNonClobberedReg x
2122     (y_op,  y_code) <- getOperand y
2123     let
2124         code = x_code `appOL` y_code `snocOL`
2125                   CMP (cmmExprRep x) y_op (OpReg x_reg)
2126     -- in
2127     return (CondCode False cond code)
2128
2129 -- anything vs anything
2130 condIntCode cond x y = do
2131   (y_reg, y_code) <- getNonClobberedReg y
2132   (x_op, x_code) <- getRegOrMem x
2133   let
2134         code = y_code `appOL`
2135                x_code `snocOL`
2136                   CMP (cmmExprRep x) (OpReg y_reg) x_op
2137   -- in
2138   return (CondCode False cond code)
2139 #endif
2140
2141 #if i386_TARGET_ARCH
2142 condFltCode cond x y
2143   = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
2144   (x_reg, x_code) <- getNonClobberedReg x
2145   (y_reg, y_code) <- getSomeReg y
2146   let
2147         code = x_code `appOL` y_code `snocOL`
2148                 GCMP cond x_reg y_reg
2149   -- The GCMP insn does the test and sets the zero flag if comparable
2150   -- and true.  Hence we always supply EQQ as the condition to test.
2151   return (CondCode True EQQ code)
2152 #endif /* i386_TARGET_ARCH */
2153
2154 #if x86_64_TARGET_ARCH
2155 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
2156 -- an operand, but the right must be a reg.  We can probably do better
2157 -- than this general case...
2158 condFltCode cond x y = do
2159   (x_reg, x_code) <- getNonClobberedReg x
2160   (y_op, y_code) <- getOperand y
2161   let
2162         code = x_code `appOL`
2163                y_code `snocOL`
2164                   CMP (cmmExprRep x) y_op (OpReg x_reg)
2165         -- NB(1): we need to use the unsigned comparison operators on the
2166         -- result of this comparison.
2167   -- in
2168   return (CondCode True (condToUnsigned cond) code)
2169 #endif
2170
2171 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2172
2173 #if sparc_TARGET_ARCH
2174
2175 condIntCode cond x (CmmLit (CmmInt y rep))
2176   | fits13Bits y
2177   = do
2178        (src1, code) <- getSomeReg x
2179        let
2180            src2 = ImmInt (fromInteger y)
2181            code' = code `snocOL` SUB False True src1 (RIImm src2) g0
2182        return (CondCode False cond code')
2183
2184 condIntCode cond x y = do
2185     (src1, code1) <- getSomeReg x
2186     (src2, code2) <- getSomeReg y
2187     let
2188         code__2 = code1 `appOL` code2 `snocOL`
2189                   SUB False True src1 (RIReg src2) g0
2190     return (CondCode False cond code__2)
2191
2192 -----------
2193 condFltCode cond x y = do
2194     (src1, code1) <- getSomeReg x
2195     (src2, code2) <- getSomeReg y
2196     tmp <- getNewRegNat F64
2197     let
2198         promote x = FxTOy F32 F64 x tmp
2199
2200         pk1   = cmmExprRep x
2201         pk2   = cmmExprRep y
2202
2203         code__2 =
2204                 if pk1 == pk2 then
2205                     code1 `appOL` code2 `snocOL`
2206                     FCMP True pk1 src1 src2
2207                 else if pk1 == F32 then
2208                     code1 `snocOL` promote src1 `appOL` code2 `snocOL`
2209                     FCMP True F64 tmp src2
2210                 else
2211                     code1 `appOL` code2 `snocOL` promote src2 `snocOL`
2212                     FCMP True F64 src1 tmp
2213     return (CondCode True cond code__2)
2214
2215 #endif /* sparc_TARGET_ARCH */
2216
2217 #if powerpc_TARGET_ARCH
2218 --  ###FIXME: I16 and I8!
2219 condIntCode cond x (CmmLit (CmmInt y rep))
2220   | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
2221   = do
2222         (src1, code) <- getSomeReg x
2223         let
2224             code' = code `snocOL`
2225                 (if condUnsigned cond then CMPL else CMP) I32 src1 (RIImm src2)
2226         return (CondCode False cond code')
2227
2228 condIntCode cond x y = do
2229     (src1, code1) <- getSomeReg x
2230     (src2, code2) <- getSomeReg y
2231     let
2232         code' = code1 `appOL` code2 `snocOL`
2233                   (if condUnsigned cond then CMPL else CMP) I32 src1 (RIReg src2)
2234     return (CondCode False cond code')
2235
2236 condFltCode cond x y = do
2237     (src1, code1) <- getSomeReg x
2238     (src2, code2) <- getSomeReg y
2239     let
2240         code'  = code1 `appOL` code2 `snocOL` FCMP src1 src2
2241         code'' = case cond of -- twiddle CR to handle unordered case
2242                     GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
2243                     LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
2244                     _ -> code'
2245                  where
2246                     ltbit = 0 ; eqbit = 2 ; gtbit = 1
2247     return (CondCode True cond code'')
2248
2249 #endif /* powerpc_TARGET_ARCH */
2250
2251 -- -----------------------------------------------------------------------------
2252 -- Generating assignments
2253
2254 -- Assignments are really at the heart of the whole code generation
2255 -- business.  Almost all top-level nodes of any real importance are
2256 -- assignments, which correspond to loads, stores, or register
2257 -- transfers.  If we're really lucky, some of the register transfers
2258 -- will go away, because we can use the destination register to
2259 -- complete the code generation for the right hand side.  This only
2260 -- fails when the right hand side is forced into a fixed register
2261 -- (e.g. the result of a call).
2262
2263 assignMem_IntCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2264 assignReg_IntCode :: MachRep -> CmmReg  -> CmmExpr -> NatM InstrBlock
2265
2266 assignMem_FltCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2267 assignReg_FltCode :: MachRep -> CmmReg  -> CmmExpr -> NatM InstrBlock
2268
2269 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2270
2271 #if alpha_TARGET_ARCH
2272
2273 assignIntCode pk (CmmLoad dst _) src
2274   = getNewRegNat IntRep             `thenNat` \ tmp ->
2275     getAmode dst                    `thenNat` \ amode ->
2276     getRegister src                 `thenNat` \ register ->
2277     let
2278         code1   = amodeCode amode []
2279         dst__2  = amodeAddr amode
2280         code2   = registerCode register tmp []
2281         src__2  = registerName register tmp
2282         sz      = primRepToSize pk
2283         code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2284     in
2285     return code__2
2286
2287 assignIntCode pk dst src
2288   = getRegister dst                         `thenNat` \ register1 ->
2289     getRegister src                         `thenNat` \ register2 ->
2290     let
2291         dst__2  = registerName register1 zeroh
2292         code    = registerCode register2 dst__2
2293         src__2  = registerName register2 dst__2
2294         code__2 = if isFixed register2
2295                   then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2)
2296                   else code
2297     in
2298     return code__2
2299
2300 #endif /* alpha_TARGET_ARCH */
2301
2302 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2303
2304 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2305
2306 -- integer assignment to memory
2307 assignMem_IntCode pk addr src = do
2308     Amode addr code_addr <- getAmode addr
2309     (code_src, op_src)   <- get_op_RI src
2310     let
2311         code = code_src `appOL`
2312                code_addr `snocOL`
2313                   MOV pk op_src (OpAddr addr)
2314         -- NOTE: op_src is stable, so it will still be valid
2315         -- after code_addr.  This may involve the introduction
2316         -- of an extra MOV to a temporary register, but we hope
2317         -- the register allocator will get rid of it.
2318     --
2319     return code
2320   where
2321     get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand)   -- code, operator
2322     get_op_RI (CmmLit lit) | not (is64BitLit lit)
2323       = return (nilOL, OpImm (litToImm lit))
2324     get_op_RI op
2325       = do (reg,code) <- getNonClobberedReg op
2326            return (code, OpReg reg)
2327
2328
2329 -- Assign; dst is a reg, rhs is mem
2330 assignReg_IntCode pk reg (CmmLoad src _) = do
2331   load_code <- intLoadCode (MOV pk) src
2332   return (load_code (getRegisterReg reg))
2333
2334 -- dst is a reg, but src could be anything
2335 assignReg_IntCode pk reg src = do
2336   code <- getAnyReg src
2337   return (code (getRegisterReg reg))
2338
2339 #endif /* i386_TARGET_ARCH */
2340
2341 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2342
2343 #if sparc_TARGET_ARCH
2344
2345 assignMem_IntCode pk addr src = do
2346     (srcReg, code) <- getSomeReg src
2347     Amode dstAddr addr_code <- getAmode addr
2348     return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2349
2350 assignReg_IntCode pk reg src = do
2351     r <- getRegister src
2352     return $ case r of
2353         Any _ code         -> code dst
2354         Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg dst) freg
2355     where
2356       dst = getRegisterReg reg
2357
2358
2359 #endif /* sparc_TARGET_ARCH */
2360
2361 #if powerpc_TARGET_ARCH
2362
2363 assignMem_IntCode pk addr src = do
2364     (srcReg, code) <- getSomeReg src
2365     Amode dstAddr addr_code <- getAmode addr
2366     return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2367
2368 -- dst is a reg, but src could be anything
2369 assignReg_IntCode pk reg src
2370     = do
2371         r <- getRegister src
2372         return $ case r of
2373             Any _ code         -> code dst
2374             Fixed _ freg fcode -> fcode `snocOL` MR dst freg
2375     where
2376         dst = getRegisterReg reg
2377
2378 #endif /* powerpc_TARGET_ARCH */
2379
2380
2381 -- -----------------------------------------------------------------------------
2382 -- Floating-point assignments
2383
2384 #if alpha_TARGET_ARCH
2385
2386 assignFltCode pk (CmmLoad dst _) src
2387   = getNewRegNat pk                 `thenNat` \ tmp ->
2388     getAmode dst                    `thenNat` \ amode ->
2389     getRegister src                         `thenNat` \ register ->
2390     let
2391         code1   = amodeCode amode []
2392         dst__2  = amodeAddr amode
2393         code2   = registerCode register tmp []
2394         src__2  = registerName register tmp
2395         sz      = primRepToSize pk
2396         code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2397     in
2398     return code__2
2399
2400 assignFltCode pk dst src
2401   = getRegister dst                         `thenNat` \ register1 ->
2402     getRegister src                         `thenNat` \ register2 ->
2403     let
2404         dst__2  = registerName register1 zeroh
2405         code    = registerCode register2 dst__2
2406         src__2  = registerName register2 dst__2
2407         code__2 = if isFixed register2
2408                   then code . mkSeqInstr (FMOV src__2 dst__2)
2409                   else code
2410     in
2411     return code__2
2412
2413 #endif /* alpha_TARGET_ARCH */
2414
2415 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2416
2417 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2418
2419 -- Floating point assignment to memory
2420 assignMem_FltCode pk addr src = do
2421   (src_reg, src_code) <- getNonClobberedReg src
2422   Amode addr addr_code <- getAmode addr
2423   let
2424         code = src_code `appOL`
2425                addr_code `snocOL`
2426                 IF_ARCH_i386(GST pk src_reg addr,
2427                              MOV pk (OpReg src_reg) (OpAddr addr))
2428   return code
2429
2430 -- Floating point assignment to a register/temporary
2431 assignReg_FltCode pk reg src = do
2432   src_code <- getAnyReg src
2433   return (src_code (getRegisterReg reg))
2434
2435 #endif /* i386_TARGET_ARCH */
2436
2437 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2438
2439 #if sparc_TARGET_ARCH
2440
2441 -- Floating point assignment to memory
2442 assignMem_FltCode pk addr src = do
2443     Amode dst__2 code1 <- getAmode addr
2444     (src__2, code2) <- getSomeReg src
2445     tmp1 <- getNewRegNat pk
2446     let
2447         pk__2   = cmmExprRep src
2448         code__2 = code1 `appOL` code2 `appOL`
2449             if   pk == pk__2
2450             then unitOL (ST pk src__2 dst__2)
2451             else toOL [FxTOy pk__2 pk src__2 tmp1, ST pk tmp1 dst__2]
2452     return code__2
2453
2454 -- Floating point assignment to a register/temporary
2455 -- ToDo: Verify correctness
2456 assignReg_FltCode pk reg src = do
2457     r <- getRegister src
2458     v1 <- getNewRegNat pk
2459     return $ case r of
2460         Any _ code         -> code dst
2461         Fixed _ freg fcode -> fcode `snocOL` FMOV pk freg v1
2462     where
2463       dst = getRegisterReg reg
2464
2465 #endif /* sparc_TARGET_ARCH */
2466
2467 #if powerpc_TARGET_ARCH
2468
2469 -- Easy, isn't it?
2470 assignMem_FltCode = assignMem_IntCode
2471 assignReg_FltCode = assignReg_IntCode
2472
2473 #endif /* powerpc_TARGET_ARCH */
2474
2475
2476 -- -----------------------------------------------------------------------------
2477 -- Generating an non-local jump
2478
2479 -- (If applicable) Do not fill the delay slots here; you will confuse the
2480 -- register allocator.
2481
2482 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
2483
2484 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2485
2486 #if alpha_TARGET_ARCH
2487
2488 genJump (CmmLabel lbl)
2489   | isAsmTemp lbl = returnInstr (BR target)
2490   | otherwise     = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0]
2491   where
2492     target = ImmCLbl lbl
2493
2494 genJump tree
2495   = getRegister tree                `thenNat` \ register ->
2496     getNewRegNat PtrRep             `thenNat` \ tmp ->
2497     let
2498         dst    = registerName register pv
2499         code   = registerCode register pv
2500         target = registerName register pv
2501     in
2502     if isFixed register then
2503         returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0]
2504     else
2505     return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0))
2506
2507 #endif /* alpha_TARGET_ARCH */
2508
2509 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2510
2511 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2512
2513 genJump (CmmLoad mem pk) = do
2514   Amode target code <- getAmode mem
2515   return (code `snocOL` JMP (OpAddr target))
2516
2517 genJump (CmmLit lit) = do
2518   return (unitOL (JMP (OpImm (litToImm lit))))
2519
2520 genJump expr = do
2521   (reg,code) <- getSomeReg expr
2522   return (code `snocOL` JMP (OpReg reg))
2523
2524 #endif /* i386_TARGET_ARCH */
2525
2526 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2527
2528 #if sparc_TARGET_ARCH
2529
2530 genJump (CmmLit (CmmLabel lbl))
2531   = return (toOL [CALL (Left target) 0 True, NOP])
2532   where
2533     target = ImmCLbl lbl
2534
2535 genJump tree
2536   = do
2537         (target, code) <- getSomeReg tree
2538         return (code `snocOL` JMP (AddrRegReg target g0)  `snocOL` NOP)
2539
2540 #endif /* sparc_TARGET_ARCH */
2541
2542 #if powerpc_TARGET_ARCH
2543 genJump (CmmLit (CmmLabel lbl))
2544   = return (unitOL $ JMP lbl)
2545
2546 genJump tree
2547   = do
2548         (target,code) <- getSomeReg tree
2549         return (code `snocOL` MTCTR target `snocOL` BCTR [])
2550 #endif /* powerpc_TARGET_ARCH */
2551
2552
2553 -- -----------------------------------------------------------------------------
2554 --  Unconditional branches
2555
2556 genBranch :: BlockId -> NatM InstrBlock
2557
2558 #if alpha_TARGET_ARCH
2559 genBranch id = return (unitOL (BR id))
2560 #endif
2561
2562 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2563 genBranch id = return (unitOL (JXX ALWAYS id))
2564 #endif
2565
2566 #if sparc_TARGET_ARCH
2567 genBranch (BlockId id) = return (toOL [BI ALWAYS False (ImmCLbl (mkAsmTempLabel id)), NOP])
2568 #endif
2569
2570 #if powerpc_TARGET_ARCH
2571 genBranch id = return (unitOL (BCC ALWAYS id))
2572 #endif
2573
2574
2575 -- -----------------------------------------------------------------------------
2576 --  Conditional jumps
2577
2578 {-
2579 Conditional jumps are always to local labels, so we can use branch
2580 instructions.  We peek at the arguments to decide what kind of
2581 comparison to do.
2582
2583 ALPHA: For comparisons with 0, we're laughing, because we can just do
2584 the desired conditional branch.
2585
2586 I386: First, we have to ensure that the condition
2587 codes are set according to the supplied comparison operation.
2588
2589 SPARC: First, we have to ensure that the condition codes are set
2590 according to the supplied comparison operation.  We generate slightly
2591 different code for floating point comparisons, because a floating
2592 point operation cannot directly precede a @BF@.  We assume the worst
2593 and fill that slot with a @NOP@.
2594
2595 SPARC: Do not fill the delay slots here; you will confuse the register
2596 allocator.
2597 -}
2598
2599
2600 genCondJump
2601     :: BlockId      -- the branch target
2602     -> CmmExpr      -- the condition on which to branch
2603     -> NatM InstrBlock
2604
2605 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2606
2607 #if alpha_TARGET_ARCH
2608
2609 genCondJump id (StPrim op [x, StInt 0])
2610   = getRegister x                           `thenNat` \ register ->
2611     getNewRegNat (registerRep register)
2612                                     `thenNat` \ tmp ->
2613     let
2614         code   = registerCode register tmp
2615         value  = registerName register tmp
2616         pk     = registerRep register
2617         target = ImmCLbl lbl
2618     in
2619     returnSeq code [BI (cmpOp op) value target]
2620   where
2621     cmpOp CharGtOp = GTT
2622     cmpOp CharGeOp = GE
2623     cmpOp CharEqOp = EQQ
2624     cmpOp CharNeOp = NE
2625     cmpOp CharLtOp = LTT
2626     cmpOp CharLeOp = LE
2627     cmpOp IntGtOp = GTT
2628     cmpOp IntGeOp = GE
2629     cmpOp IntEqOp = EQQ
2630     cmpOp IntNeOp = NE
2631     cmpOp IntLtOp = LTT
2632     cmpOp IntLeOp = LE
2633     cmpOp WordGtOp = NE
2634     cmpOp WordGeOp = ALWAYS
2635     cmpOp WordEqOp = EQQ
2636     cmpOp WordNeOp = NE
2637     cmpOp WordLtOp = NEVER
2638     cmpOp WordLeOp = EQQ
2639     cmpOp AddrGtOp = NE
2640     cmpOp AddrGeOp = ALWAYS
2641     cmpOp AddrEqOp = EQQ
2642     cmpOp AddrNeOp = NE
2643     cmpOp AddrLtOp = NEVER
2644     cmpOp AddrLeOp = EQQ
2645
2646 genCondJump lbl (StPrim op [x, StDouble 0.0])
2647   = getRegister x                           `thenNat` \ register ->
2648     getNewRegNat (registerRep register)
2649                                     `thenNat` \ tmp ->
2650     let
2651         code   = registerCode register tmp
2652         value  = registerName register tmp
2653         pk     = registerRep register
2654         target = ImmCLbl lbl
2655     in
2656     return (code . mkSeqInstr (BF (cmpOp op) value target))
2657   where
2658     cmpOp FloatGtOp = GTT
2659     cmpOp FloatGeOp = GE
2660     cmpOp FloatEqOp = EQQ
2661     cmpOp FloatNeOp = NE
2662     cmpOp FloatLtOp = LTT
2663     cmpOp FloatLeOp = LE
2664     cmpOp DoubleGtOp = GTT
2665     cmpOp DoubleGeOp = GE
2666     cmpOp DoubleEqOp = EQQ
2667     cmpOp DoubleNeOp = NE
2668     cmpOp DoubleLtOp = LTT
2669     cmpOp DoubleLeOp = LE
2670
2671 genCondJump lbl (StPrim op [x, y])
2672   | fltCmpOp op
2673   = trivialFCode pr instr x y       `thenNat` \ register ->
2674     getNewRegNat F64                `thenNat` \ tmp ->
2675     let
2676         code   = registerCode register tmp
2677         result = registerName register tmp
2678         target = ImmCLbl lbl
2679     in
2680     return (code . mkSeqInstr (BF cond result target))
2681   where
2682     pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
2683
2684     fltCmpOp op = case op of
2685         FloatGtOp -> True
2686         FloatGeOp -> True
2687         FloatEqOp -> True
2688         FloatNeOp -> True
2689         FloatLtOp -> True
2690         FloatLeOp -> True
2691         DoubleGtOp -> True
2692         DoubleGeOp -> True
2693         DoubleEqOp -> True
2694         DoubleNeOp -> True
2695         DoubleLtOp -> True
2696         DoubleLeOp -> True
2697         _ -> False
2698     (instr, cond) = case op of
2699         FloatGtOp -> (FCMP TF LE, EQQ)
2700         FloatGeOp -> (FCMP TF LTT, EQQ)
2701         FloatEqOp -> (FCMP TF EQQ, NE)
2702         FloatNeOp -> (FCMP TF EQQ, EQQ)
2703         FloatLtOp -> (FCMP TF LTT, NE)
2704         FloatLeOp -> (FCMP TF LE, NE)
2705         DoubleGtOp -> (FCMP TF LE, EQQ)
2706         DoubleGeOp -> (FCMP TF LTT, EQQ)
2707         DoubleEqOp -> (FCMP TF EQQ, NE)
2708         DoubleNeOp -> (FCMP TF EQQ, EQQ)
2709         DoubleLtOp -> (FCMP TF LTT, NE)
2710         DoubleLeOp -> (FCMP TF LE, NE)
2711
2712 genCondJump lbl (StPrim op [x, y])
2713   = trivialCode instr x y           `thenNat` \ register ->
2714     getNewRegNat IntRep             `thenNat` \ tmp ->
2715     let
2716         code   = registerCode register tmp
2717         result = registerName register tmp
2718         target = ImmCLbl lbl
2719     in
2720     return (code . mkSeqInstr (BI cond result target))
2721   where
2722     (instr, cond) = case op of
2723         CharGtOp -> (CMP LE, EQQ)
2724         CharGeOp -> (CMP LTT, EQQ)
2725         CharEqOp -> (CMP EQQ, NE)
2726         CharNeOp -> (CMP EQQ, EQQ)
2727         CharLtOp -> (CMP LTT, NE)
2728         CharLeOp -> (CMP LE, NE)
2729         IntGtOp -> (CMP LE, EQQ)
2730         IntGeOp -> (CMP LTT, EQQ)
2731         IntEqOp -> (CMP EQQ, NE)
2732         IntNeOp -> (CMP EQQ, EQQ)
2733         IntLtOp -> (CMP LTT, NE)
2734         IntLeOp -> (CMP LE, NE)
2735         WordGtOp -> (CMP ULE, EQQ)
2736         WordGeOp -> (CMP ULT, EQQ)
2737         WordEqOp -> (CMP EQQ, NE)
2738         WordNeOp -> (CMP EQQ, EQQ)
2739         WordLtOp -> (CMP ULT, NE)
2740         WordLeOp -> (CMP ULE, NE)
2741         AddrGtOp -> (CMP ULE, EQQ)
2742         AddrGeOp -> (CMP ULT, EQQ)
2743         AddrEqOp -> (CMP EQQ, NE)
2744         AddrNeOp -> (CMP EQQ, EQQ)
2745         AddrLtOp -> (CMP ULT, NE)
2746         AddrLeOp -> (CMP ULE, NE)
2747
2748 #endif /* alpha_TARGET_ARCH */
2749
2750 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2751
2752 #if i386_TARGET_ARCH
2753
2754 genCondJump id bool = do
2755   CondCode _ cond code <- getCondCode bool
2756   return (code `snocOL` JXX cond id)
2757
2758 #endif
2759
2760 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2761
2762 #if x86_64_TARGET_ARCH
2763
2764 genCondJump id bool = do
2765   CondCode is_float cond cond_code <- getCondCode bool
2766   if not is_float
2767     then
2768         return (cond_code `snocOL` JXX cond id)
2769     else do
2770         lbl <- getBlockIdNat
2771
2772         -- see comment with condFltReg
2773         let code = case cond of
2774                         NE  -> or_unordered
2775                         GU  -> plain_test
2776                         GEU -> plain_test
2777                         _   -> and_ordered
2778
2779             plain_test = unitOL (
2780                   JXX cond id
2781                 )
2782             or_unordered = toOL [
2783                   JXX cond id,
2784                   JXX PARITY id
2785                 ]
2786             and_ordered = toOL [
2787                   JXX PARITY lbl,
2788                   JXX cond id,
2789                   JXX ALWAYS lbl,
2790                   NEWBLOCK lbl
2791                 ]
2792         return (cond_code `appOL` code)
2793
2794 #endif
2795
2796 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2797
2798 #if sparc_TARGET_ARCH
2799
2800 genCondJump (BlockId id) bool = do
2801   CondCode is_float cond code <- getCondCode bool
2802   return (
2803        code `appOL`
2804        toOL (
2805          if   is_float
2806          then [NOP, BF cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2807          else [BI cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2808        )
2809     )
2810
2811 #endif /* sparc_TARGET_ARCH */
2812
2813
2814 #if powerpc_TARGET_ARCH
2815
2816 genCondJump id bool = do
2817   CondCode is_float cond code <- getCondCode bool
2818   return (code `snocOL` BCC cond id)
2819
2820 #endif /* powerpc_TARGET_ARCH */
2821
2822
2823 -- -----------------------------------------------------------------------------
2824 --  Generating C calls
2825
2826 -- Now the biggest nightmare---calls.  Most of the nastiness is buried in
2827 -- @get_arg@, which moves the arguments to the correct registers/stack
2828 -- locations.  Apart from that, the code is easy.
2829 --
2830 -- (If applicable) Do not fill the delay slots here; you will confuse the
2831 -- register allocator.
2832
2833 genCCall
2834     :: CmmCallTarget            -- function to call
2835     -> [(CmmReg,MachHint)]      -- where to put the result
2836     -> [(CmmExpr,MachHint)]     -- arguments (of mixed type)
2837     -> Maybe [GlobalReg]        -- volatile regs to save
2838     -> NatM InstrBlock
2839
2840 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2841
2842 #if alpha_TARGET_ARCH
2843
2844 ccallResultRegs =
2845
2846 genCCall fn cconv result_regs args
2847   = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args
2848                           `thenNat` \ ((unused,_), argCode) ->
2849     let
2850         nRegs = length allArgRegs - length unused
2851         code = asmSeqThen (map ($ []) argCode)
2852     in
2853         returnSeq code [
2854             LDA pv (AddrImm (ImmLab (ptext fn))),
2855             JSR ra (AddrReg pv) nRegs,
2856             LDGP gp (AddrReg ra)]
2857   where
2858     ------------------------
2859     {-  Try to get a value into a specific register (or registers) for
2860         a call.  The first 6 arguments go into the appropriate
2861         argument register (separate registers for integer and floating
2862         point arguments, but used in lock-step), and the remaining
2863         arguments are dumped to the stack, beginning at 0(sp).  Our
2864         first argument is a pair of the list of remaining argument
2865         registers to be assigned for this call and the next stack
2866         offset to use for overflowing arguments.  This way,
2867         @get_Arg@ can be applied to all of a call's arguments using
2868         @mapAccumLNat@.
2869     -}
2870     get_arg
2871         :: ([(Reg,Reg)], Int)   -- Argument registers and stack offset (accumulator)
2872         -> StixTree             -- Current argument
2873         -> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code
2874
2875     -- We have to use up all of our argument registers first...
2876
2877     get_arg ((iDst,fDst):dsts, offset) arg
2878       = getRegister arg                     `thenNat` \ register ->
2879         let
2880             reg  = if isFloatingRep pk then fDst else iDst
2881             code = registerCode register reg
2882             src  = registerName register reg
2883             pk   = registerRep register
2884         in
2885         return (
2886             if isFloatingRep pk then
2887                 ((dsts, offset), if isFixed register then
2888                     code . mkSeqInstr (FMOV src fDst)
2889                     else code)
2890             else
2891                 ((dsts, offset), if isFixed register then
2892                     code . mkSeqInstr (OR src (RIReg src) iDst)
2893                     else code))
2894
2895     -- Once we have run out of argument registers, we move to the
2896     -- stack...
2897
2898     get_arg ([], offset) arg
2899       = getRegister arg                 `thenNat` \ register ->
2900         getNewRegNat (registerRep register)
2901                                         `thenNat` \ tmp ->
2902         let
2903             code = registerCode register tmp
2904             src  = registerName register tmp
2905             pk   = registerRep register
2906             sz   = primRepToSize pk
2907         in
2908         return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset)))
2909
2910 #endif /* alpha_TARGET_ARCH */
2911
2912 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2913
2914 #if i386_TARGET_ARCH
2915
2916 -- we only cope with a single result for foreign calls
2917 genCCall (CmmPrim op) [(r,_)] args vols = do
2918   case op of
2919         MO_F32_Sqrt -> actuallyInlineFloatOp F32  (GSQRT F32) args
2920         MO_F64_Sqrt -> actuallyInlineFloatOp F64 (GSQRT F64) args
2921
2922         MO_F32_Sin  -> actuallyInlineFloatOp F32  (GSIN F32) args
2923         MO_F64_Sin  -> actuallyInlineFloatOp F64 (GSIN F64) args
2924
2925         MO_F32_Cos  -> actuallyInlineFloatOp F32  (GCOS F32) args
2926         MO_F64_Cos  -> actuallyInlineFloatOp F64 (GCOS F64) args
2927
2928         MO_F32_Tan  -> actuallyInlineFloatOp F32  (GTAN F32) args
2929         MO_F64_Tan  -> actuallyInlineFloatOp F64 (GTAN F64) args
2930
2931         other_op    -> outOfLineFloatOp op r args vols
2932  where
2933   actuallyInlineFloatOp rep instr [(x,_)]
2934         = do res <- trivialUFCode rep instr x
2935              any <- anyReg res
2936              return (any (getRegisterReg r))
2937
2938 genCCall target dest_regs args vols = do
2939     let
2940         sizes               = map (arg_size . cmmExprRep . fst) (reverse args)
2941 #if !darwin_TARGET_OS
2942         tot_arg_size        = sum sizes
2943 #else
2944         raw_arg_size        = sum sizes
2945         tot_arg_size        = roundTo 16 raw_arg_size
2946         arg_pad_size        = tot_arg_size - raw_arg_size
2947     delta0 <- getDeltaNat
2948     setDeltaNat (delta0 - arg_pad_size)
2949 #endif
2950
2951     push_codes <- mapM push_arg (reverse args)
2952     delta <- getDeltaNat
2953
2954     -- in
2955     -- deal with static vs dynamic call targets
2956     (callinsns,cconv) <-
2957       case target of
2958         -- CmmPrim -> ...
2959         CmmForeignCall (CmmLit (CmmLabel lbl)) conv
2960            -> -- ToDo: stdcall arg sizes
2961               return (unitOL (CALL (Left fn_imm) []), conv)
2962            where fn_imm = ImmCLbl lbl
2963         CmmForeignCall expr conv
2964            -> do (dyn_c, dyn_r, dyn_rep) <- get_op expr
2965                  ASSERT(dyn_rep == I32)
2966                   return (dyn_c `snocOL` CALL (Right dyn_r) [], conv)
2967
2968     let push_code
2969 #if darwin_TARGET_OS
2970             | arg_pad_size /= 0
2971             = toOL [SUB I32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
2972                     DELTA (delta0 - arg_pad_size)]
2973               `appOL` concatOL push_codes
2974             | otherwise
2975 #endif
2976             = concatOL push_codes
2977         call = callinsns `appOL`
2978                toOL (
2979                         -- Deallocate parameters after call for ccall;
2980                         -- but not for stdcall (callee does it)
2981                   (if cconv == StdCallConv || tot_arg_size==0 then [] else
2982                    [ADD I32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
2983                   ++
2984                   [DELTA (delta + tot_arg_size)]
2985                )
2986     -- in
2987     setDeltaNat (delta + tot_arg_size)
2988
2989     let
2990         -- assign the results, if necessary
2991         assign_code []     = nilOL
2992         assign_code [(dest,_hint)] =
2993           case rep of
2994                 I64 -> toOL [MOV I32 (OpReg eax) (OpReg r_dest),
2995                              MOV I32 (OpReg edx) (OpReg r_dest_hi)]
2996                 F32 -> unitOL (GMOV fake0 r_dest)
2997                 F64 -> unitOL (GMOV fake0 r_dest)
2998                 rep -> unitOL (MOV rep (OpReg eax) (OpReg r_dest))
2999           where
3000                 r_dest_hi = getHiVRegFromLo r_dest
3001                 rep = cmmRegRep dest
3002                 r_dest = getRegisterReg dest
3003         assign_code many = panic "genCCall.assign_code many"
3004
3005     return (push_code `appOL`
3006             call `appOL`
3007             assign_code dest_regs)
3008
3009   where
3010     arg_size F64 = 8
3011     arg_size F32 = 4
3012     arg_size I64 = 8
3013     arg_size _   = 4
3014
3015     roundTo a x | x `mod` a == 0 = x
3016                 | otherwise = x + a - (x `mod` a)
3017
3018
3019     push_arg :: (CmmExpr,MachHint){-current argument-}
3020                     -> NatM InstrBlock  -- code
3021
3022     push_arg (arg,_hint) -- we don't need the hints on x86
3023       | arg_rep == I64 = do
3024         ChildCode64 code r_lo <- iselExpr64 arg
3025         delta <- getDeltaNat
3026         setDeltaNat (delta - 8)
3027         let
3028             r_hi = getHiVRegFromLo r_lo
3029         -- in
3030         return (       code `appOL`
3031                        toOL [PUSH I32 (OpReg r_hi), DELTA (delta - 4),
3032                              PUSH I32 (OpReg r_lo), DELTA (delta - 8),
3033                              DELTA (delta-8)]
3034             )
3035
3036       | otherwise = do
3037         (code, reg, sz) <- get_op arg
3038         delta <- getDeltaNat
3039         let size = arg_size sz
3040         setDeltaNat (delta-size)
3041         if (case sz of F64 -> True; F32 -> True; _ -> False)
3042            then return (code `appOL`
3043                         toOL [SUB I32 (OpImm (ImmInt size)) (OpReg esp),
3044                               DELTA (delta-size),
3045                               GST sz reg (AddrBaseIndex (EABaseReg esp)
3046                                                         EAIndexNone
3047                                                         (ImmInt 0))]
3048                        )
3049            else return (code `snocOL`
3050                         PUSH I32 (OpReg reg) `snocOL`
3051                         DELTA (delta-size)
3052                        )
3053       where
3054          arg_rep = cmmExprRep arg
3055
3056     ------------
3057     get_op :: CmmExpr -> NatM (InstrBlock, Reg, MachRep) -- code, reg, size
3058     get_op op = do
3059         (reg,code) <- getSomeReg op
3060         return (code, reg, cmmExprRep op)
3061
3062 #endif /* i386_TARGET_ARCH */
3063
3064 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3065
3066 outOfLineFloatOp :: CallishMachOp -> CmmReg -> [(CmmExpr,MachHint)]
3067   -> Maybe [GlobalReg] -> NatM InstrBlock
3068 outOfLineFloatOp mop res args vols
3069   = do
3070       targetExpr <- cmmMakeDynamicReference addImportNat True lbl
3071       let target = CmmForeignCall targetExpr CCallConv
3072
3073       if cmmRegRep res == F64
3074         then
3075           stmtToInstrs (CmmCall target [(res,FloatHint)] args vols)
3076         else do
3077           uq <- getUniqueNat
3078           let
3079             tmp = CmmLocal (LocalReg uq F64)
3080           -- in
3081           code1 <- stmtToInstrs (CmmCall target [(tmp,FloatHint)] args vols)
3082           code2 <- stmtToInstrs (CmmAssign res (CmmReg tmp))
3083           return (code1 `appOL` code2)
3084   where
3085         lbl = mkForeignLabel fn Nothing True
3086
3087         fn = case mop of
3088               MO_F32_Sqrt  -> FSLIT("sqrtf")
3089               MO_F32_Sin   -> FSLIT("sinf")
3090               MO_F32_Cos   -> FSLIT("cosf")
3091               MO_F32_Tan   -> FSLIT("tanf")
3092               MO_F32_Exp   -> FSLIT("expf")
3093               MO_F32_Log   -> FSLIT("logf")
3094
3095               MO_F32_Asin  -> FSLIT("asinf")
3096               MO_F32_Acos  -> FSLIT("acosf")
3097               MO_F32_Atan  -> FSLIT("atanf")
3098
3099               MO_F32_Sinh  -> FSLIT("sinhf")
3100               MO_F32_Cosh  -> FSLIT("coshf")
3101               MO_F32_Tanh  -> FSLIT("tanhf")
3102               MO_F32_Pwr   -> FSLIT("powf")
3103
3104               MO_F64_Sqrt  -> FSLIT("sqrt")
3105               MO_F64_Sin   -> FSLIT("sin")
3106               MO_F64_Cos   -> FSLIT("cos")
3107               MO_F64_Tan   -> FSLIT("tan")
3108               MO_F64_Exp   -> FSLIT("exp")
3109               MO_F64_Log   -> FSLIT("log")
3110
3111               MO_F64_Asin  -> FSLIT("asin")
3112               MO_F64_Acos  -> FSLIT("acos")
3113               MO_F64_Atan  -> FSLIT("atan")
3114
3115               MO_F64_Sinh  -> FSLIT("sinh")
3116               MO_F64_Cosh  -> FSLIT("cosh")
3117               MO_F64_Tanh  -> FSLIT("tanh")
3118               MO_F64_Pwr   -> FSLIT("pow")
3119
3120 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
3121
3122 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3123
3124 #if x86_64_TARGET_ARCH
3125
3126 genCCall (CmmPrim op) [(r,_)] args vols =
3127   outOfLineFloatOp op r args vols
3128
3129 genCCall target dest_regs args vols = do
3130
3131         -- load up the register arguments
3132     (stack_args, aregs, fregs, load_args_code)
3133          <- load_args args allArgRegs allFPArgRegs nilOL
3134
3135     let
3136         fp_regs_used  = reverse (drop (length fregs) (reverse allFPArgRegs))
3137         int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
3138         arg_regs = int_regs_used ++ fp_regs_used
3139                 -- for annotating the call instruction with
3140
3141         sse_regs = length fp_regs_used
3142
3143         tot_arg_size = arg_size * length stack_args
3144
3145         -- On entry to the called function, %rsp should be aligned
3146         -- on a 16-byte boundary +8 (i.e. the first stack arg after
3147         -- the return address is 16-byte aligned).  In STG land
3148         -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
3149         -- need to make sure we push a multiple of 16-bytes of args,
3150         -- plus the return address, to get the correct alignment.
3151         -- Urg, this is hard.  We need to feed the delta back into
3152         -- the arg pushing code.
3153     (real_size, adjust_rsp) <-
3154         if tot_arg_size `rem` 16 == 0
3155             then return (tot_arg_size, nilOL)
3156             else do -- we need to adjust...
3157                 delta <- getDeltaNat
3158                 setDeltaNat (delta-8)
3159                 return (tot_arg_size+8, toOL [
3160                                 SUB I64 (OpImm (ImmInt 8)) (OpReg rsp),
3161                                 DELTA (delta-8)
3162                         ])
3163
3164         -- push the stack args, right to left
3165     push_code <- push_args (reverse stack_args) nilOL
3166     delta <- getDeltaNat
3167
3168     -- deal with static vs dynamic call targets
3169     (callinsns,cconv) <-
3170       case target of
3171         -- CmmPrim -> ...
3172         CmmForeignCall (CmmLit (CmmLabel lbl)) conv
3173            -> -- ToDo: stdcall arg sizes
3174               return (unitOL (CALL (Left fn_imm) arg_regs), conv)
3175            where fn_imm = ImmCLbl lbl
3176         CmmForeignCall expr conv
3177            -> do (dyn_r, dyn_c) <- getSomeReg expr
3178                  return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
3179
3180     let
3181         -- The x86_64 ABI requires us to set %al to the number of SSE
3182         -- registers that contain arguments, if the called routine
3183         -- is a varargs function.  We don't know whether it's a
3184         -- varargs function or not, so we have to assume it is.
3185         --
3186         -- It's not safe to omit this assignment, even if the number
3187         -- of SSE regs in use is zero.  If %al is larger than 8
3188         -- on entry to a varargs function, seg faults ensue.
3189         assign_eax n = unitOL (MOV I32 (OpImm (ImmInt n)) (OpReg eax))
3190
3191     let call = callinsns `appOL`
3192                toOL (
3193                         -- Deallocate parameters after call for ccall;
3194                         -- but not for stdcall (callee does it)
3195                   (if cconv == StdCallConv || real_size==0 then [] else
3196                    [ADD wordRep (OpImm (ImmInt real_size)) (OpReg esp)])
3197                   ++
3198                   [DELTA (delta + real_size)]
3199                )
3200     -- in
3201     setDeltaNat (delta + real_size)
3202
3203     let
3204         -- assign the results, if necessary
3205         assign_code []     = nilOL
3206         assign_code [(dest,_hint)] =
3207           case rep of
3208                 F32 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3209                 F64 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3210                 rep -> unitOL (MOV rep (OpReg rax) (OpReg r_dest))
3211           where
3212                 rep = cmmRegRep dest
3213                 r_dest = getRegisterReg dest
3214         assign_code many = panic "genCCall.assign_code many"
3215
3216     return (load_args_code      `appOL`
3217             adjust_rsp          `appOL`
3218             push_code           `appOL`
3219             assign_eax sse_regs `appOL`
3220             call                `appOL`
3221             assign_code dest_regs)
3222
3223   where
3224     arg_size = 8 -- always, at the mo
3225
3226     load_args :: [(CmmExpr,MachHint)]
3227               -> [Reg]                  -- int regs avail for args
3228               -> [Reg]                  -- FP regs avail for args
3229               -> InstrBlock
3230               -> NatM ([(CmmExpr,MachHint)],[Reg],[Reg],InstrBlock)
3231     load_args args [] [] code     =  return (args, [], [], code)
3232         -- no more regs to use
3233     load_args [] aregs fregs code =  return ([], aregs, fregs, code)
3234         -- no more args to push
3235     load_args ((arg,hint) : rest) aregs fregs code
3236         | isFloatingRep arg_rep =
3237         case fregs of
3238           [] -> push_this_arg
3239           (r:rs) -> do
3240              arg_code <- getAnyReg arg
3241              load_args rest aregs rs (code `appOL` arg_code r)
3242         | otherwise =
3243         case aregs of
3244           [] -> push_this_arg
3245           (r:rs) -> do
3246              arg_code <- getAnyReg arg
3247              load_args rest rs fregs (code `appOL` arg_code r)
3248         where
3249           arg_rep = cmmExprRep arg
3250
3251           push_this_arg = do
3252             (args',ars,frs,code') <- load_args rest aregs fregs code
3253             return ((arg,hint):args', ars, frs, code')
3254
3255     push_args [] code = return code
3256     push_args ((arg,hint):rest) code
3257        | isFloatingRep arg_rep = do
3258          (arg_reg, arg_code) <- getSomeReg arg
3259          delta <- getDeltaNat
3260          setDeltaNat (delta-arg_size)
3261          let code' = code `appOL` toOL [
3262                         MOV arg_rep (OpReg arg_reg) (OpAddr  (spRel 0)),
3263                         SUB wordRep (OpImm (ImmInt arg_size)) (OpReg rsp) ,
3264                         DELTA (delta-arg_size)]
3265          push_args rest code'
3266
3267        | otherwise = do
3268        -- we only ever generate word-sized function arguments.  Promotion
3269        -- has already happened: our Int8# type is kept sign-extended
3270        -- in an Int#, for example.
3271          ASSERT(arg_rep == I64) return ()
3272          (arg_op, arg_code) <- getOperand arg
3273          delta <- getDeltaNat
3274          setDeltaNat (delta-arg_size)
3275          let code' = code `appOL` toOL [PUSH I64 arg_op,
3276                                         DELTA (delta-arg_size)]
3277          push_args rest code'
3278         where
3279           arg_rep = cmmExprRep arg
3280 #endif
3281
3282 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3283
3284 #if sparc_TARGET_ARCH
3285 {-
3286    The SPARC calling convention is an absolute
3287    nightmare.  The first 6x32 bits of arguments are mapped into
3288    %o0 through %o5, and the remaining arguments are dumped to the
3289    stack, beginning at [%sp+92].  (Note that %o6 == %sp.)
3290
3291    If we have to put args on the stack, move %o6==%sp down by
3292    the number of words to go on the stack, to ensure there's enough space.
3293
3294    According to Fraser and Hanson's lcc book, page 478, fig 17.2,
3295    16 words above the stack pointer is a word for the address of
3296    a structure return value.  I use this as a temporary location
3297    for moving values from float to int regs.  Certainly it isn't
3298    safe to put anything in the 16 words starting at %sp, since
3299    this area can get trashed at any time due to window overflows
3300    caused by signal handlers.
3301
3302    A final complication (if the above isn't enough) is that
3303    we can't blithely calculate the arguments one by one into
3304    %o0 .. %o5.  Consider the following nested calls:
3305
3306        fff a (fff b c)
3307
3308    Naive code moves a into %o0, and (fff b c) into %o1.  Unfortunately
3309    the inner call will itself use %o0, which trashes the value put there
3310    in preparation for the outer call.  Upshot: we need to calculate the
3311    args into temporary regs, and move those to arg regs or onto the
3312    stack only immediately prior to the call proper.  Sigh.
3313 -}
3314
3315 genCCall target dest_regs argsAndHints vols = do
3316     let
3317         args = map fst argsAndHints
3318     argcode_and_vregs <- mapM arg_to_int_vregs args
3319     let
3320         (argcodes, vregss) = unzip argcode_and_vregs
3321         n_argRegs          = length allArgRegs
3322         n_argRegs_used     = min (length vregs) n_argRegs
3323         vregs              = concat vregss
3324     -- deal with static vs dynamic call targets
3325     callinsns <- (case target of
3326         CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> do
3327                 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3328         CmmForeignCall expr conv -> do
3329                 (dyn_c, [dyn_r]) <- arg_to_int_vregs expr
3330                 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3331         CmmPrim mop -> do
3332                   (res, reduce) <- outOfLineFloatOp mop
3333                   lblOrMopExpr <- case res of
3334                        Left lbl -> do
3335                             return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3336                        Right mopExpr -> do
3337                             (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
3338                             return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3339                   if reduce then panic "genCCall(sparc): can not reduce" else return lblOrMopExpr
3340
3341       )
3342     let
3343         argcode = concatOL argcodes
3344         (move_sp_down, move_sp_up)
3345            = let diff = length vregs - n_argRegs
3346                  nn   = if odd diff then diff + 1 else diff -- keep 8-byte alignment
3347              in  if   nn <= 0
3348                  then (nilOL, nilOL)
3349                  else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
3350         transfer_code
3351            = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
3352     return (argcode       `appOL`
3353             move_sp_down  `appOL`
3354             transfer_code `appOL`
3355             callinsns     `appOL`
3356             unitOL NOP    `appOL`
3357             move_sp_up)
3358   where
3359      -- move args from the integer vregs into which they have been
3360      -- marshalled, into %o0 .. %o5, and the rest onto the stack.
3361      move_final :: [Reg] -> [Reg] -> Int -> [Instr]
3362
3363      move_final [] _ offset          -- all args done
3364         = []
3365
3366      move_final (v:vs) [] offset     -- out of aregs; move to stack
3367         = ST I32 v (spRel offset)
3368           : move_final vs [] (offset+1)
3369
3370      move_final (v:vs) (a:az) offset -- move into an arg (%o[0..5]) reg
3371         = OR False g0 (RIReg v) a
3372           : move_final vs az offset
3373
3374      -- generate code to calculate an argument, and move it into one
3375      -- or two integer vregs.
3376      arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
3377      arg_to_int_vregs arg
3378         | (cmmExprRep arg) == I64
3379         = do
3380           (ChildCode64 code r_lo) <- iselExpr64 arg
3381           let
3382               r_hi = getHiVRegFromLo r_lo
3383           return (code, [r_hi, r_lo])
3384         | otherwise
3385         = do
3386           (src, code) <- getSomeReg arg
3387           tmp <- getNewRegNat (cmmExprRep arg)
3388           let
3389               pk   = cmmExprRep arg
3390           case pk of
3391              F64 -> do
3392                       v1 <- getNewRegNat I32
3393                       v2 <- getNewRegNat I32
3394                       return (
3395                         code                          `snocOL`
3396                         FMOV F64 src f0                `snocOL`
3397                         ST   F32  f0 (spRel 16)         `snocOL`
3398                         LD   I32  (spRel 16) v1         `snocOL`
3399                         ST   F32  (fPair f0) (spRel 16) `snocOL`
3400                         LD   I32  (spRel 16) v2
3401                         ,
3402                         [v1,v2]
3403                        )
3404              F32 -> do
3405                       v1 <- getNewRegNat I32
3406                       return (
3407                         code                    `snocOL`
3408                         ST   F32  src (spRel 16)  `snocOL`
3409                         LD   I32  (spRel 16) v1
3410                         ,
3411                         [v1]
3412                        )
3413              other -> do
3414                         v1 <- getNewRegNat I32
3415                         return (
3416                           code `snocOL` OR False g0 (RIReg src) v1
3417                           ,
3418                           [v1]
3419                          )
3420 outOfLineFloatOp mop =
3421     do
3422       mopExpr <- cmmMakeDynamicReference addImportNat True $
3423                   mkForeignLabel functionName Nothing True
3424       let mopLabelOrExpr = case mopExpr of
3425                         CmmLit (CmmLabel lbl) -> Left lbl
3426                         _ -> Right mopExpr
3427       return (mopLabelOrExpr, reduce)
3428             where
3429                 (reduce, functionName) = case mop of
3430                   MO_F32_Exp    -> (True,  FSLIT("exp"))
3431                   MO_F32_Log    -> (True,  FSLIT("log"))
3432                   MO_F32_Sqrt   -> (True,  FSLIT("sqrt"))
3433
3434                   MO_F32_Sin    -> (True,  FSLIT("sin"))
3435                   MO_F32_Cos    -> (True,  FSLIT("cos"))
3436                   MO_F32_Tan    -> (True,  FSLIT("tan"))
3437
3438                   MO_F32_Asin   -> (True,  FSLIT("asin"))
3439                   MO_F32_Acos   -> (True,  FSLIT("acos"))
3440                   MO_F32_Atan   -> (True,  FSLIT("atan"))
3441
3442                   MO_F32_Sinh   -> (True,  FSLIT("sinh"))
3443                   MO_F32_Cosh   -> (True,  FSLIT("cosh"))
3444                   MO_F32_Tanh   -> (True,  FSLIT("tanh"))
3445
3446                   MO_F64_Exp    -> (False, FSLIT("exp"))
3447                   MO_F64_Log    -> (False, FSLIT("log"))
3448                   MO_F64_Sqrt   -> (False, FSLIT("sqrt"))
3449
3450                   MO_F64_Sin    -> (False, FSLIT("sin"))
3451                   MO_F64_Cos    -> (False, FSLIT("cos"))
3452                   MO_F64_Tan    -> (False, FSLIT("tan"))
3453
3454                   MO_F64_Asin   -> (False, FSLIT("asin"))
3455                   MO_F64_Acos   -> (False, FSLIT("acos"))
3456                   MO_F64_Atan   -> (False, FSLIT("atan"))
3457
3458                   MO_F64_Sinh   -> (False, FSLIT("sinh"))
3459                   MO_F64_Cosh   -> (False, FSLIT("cosh"))
3460                   MO_F64_Tanh   -> (False, FSLIT("tanh"))
3461
3462                   other -> pprPanic "outOfLineFloatOp(sparc) "
3463                                 (pprCallishMachOp mop)
3464
3465 #endif /* sparc_TARGET_ARCH */
3466
3467 #if powerpc_TARGET_ARCH
3468
3469 #if darwin_TARGET_OS || linux_TARGET_OS
3470 {-
3471     The PowerPC calling convention for Darwin/Mac OS X
3472     is described in Apple's document
3473     "Inside Mac OS X - Mach-O Runtime Architecture".
3474
3475     PowerPC Linux uses the System V Release 4 Calling Convention
3476     for PowerPC. It is described in the
3477     "System V Application Binary Interface PowerPC Processor Supplement".
3478
3479     Both conventions are similar:
3480     Parameters may be passed in general-purpose registers starting at r3, in
3481     floating point registers starting at f1, or on the stack.
3482
3483     But there are substantial differences:
3484     * The number of registers used for parameter passing and the exact set of
3485       nonvolatile registers differs (see MachRegs.lhs).
3486     * On Darwin, stack space is always reserved for parameters, even if they are
3487       passed in registers. The called routine may choose to save parameters from
3488       registers to the corresponding space on the stack.
3489     * On Darwin, a corresponding amount of GPRs is skipped when a floating point
3490       parameter is passed in an FPR.
3491     * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
3492       starting with an odd-numbered GPR. It may skip a GPR to achieve this.
3493       Darwin just treats an I64 like two separate I32s (high word first).
3494     * I64 and F64 arguments are 8-byte aligned on the stack for SysV, but only
3495       4-byte aligned like everything else on Darwin.
3496     * The SysV spec claims that F32 is represented as F64 on the stack. GCC on
3497       PowerPC Linux does not agree, so neither do we.
3498
3499     According to both conventions, The parameter area should be part of the
3500     caller's stack frame, allocated in the caller's prologue code (large enough
3501     to hold the parameter lists for all called routines). The NCG already
3502     uses the stack for register spilling, leaving 64 bytes free at the top.
3503     If we need a larger parameter area than that, we just allocate a new stack
3504     frame just before ccalling.
3505 -}
3506
3507 genCCall target dest_regs argsAndHints vols
3508   = ASSERT (not $ any (`elem` [I8,I16]) argReps)
3509         -- we rely on argument promotion in the codeGen
3510     do
3511         (finalStack,passArgumentsCode,usedRegs) <- passArguments
3512                                                         (zip args argReps)
3513                                                         allArgRegs allFPArgRegs
3514                                                         initialStackOffset
3515                                                         (toOL []) []
3516
3517         (labelOrExpr, reduceToF32) <- case target of
3518             CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
3519             CmmForeignCall expr conv -> return  (Right expr, False)
3520             CmmPrim mop -> outOfLineFloatOp mop
3521
3522         let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
3523             codeAfter = move_sp_up finalStack `appOL` moveResult reduceToF32
3524
3525         case labelOrExpr of
3526             Left lbl -> do
3527                 return (         codeBefore
3528                         `snocOL` BL lbl usedRegs
3529                         `appOL`  codeAfter)
3530             Right dyn -> do
3531                 (dynReg, dynCode) <- getSomeReg dyn
3532                 return (         dynCode
3533                         `snocOL` MTCTR dynReg
3534                         `appOL`  codeBefore
3535                         `snocOL` BCTRL usedRegs
3536                         `appOL`  codeAfter)
3537     where
3538 #if darwin_TARGET_OS
3539         initialStackOffset = 24
3540             -- size of linkage area + size of arguments, in bytes
3541         stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
3542                                        map machRepByteWidth argReps
3543 #elif linux_TARGET_OS
3544         initialStackOffset = 8
3545         stackDelta finalStack = roundTo 16 finalStack
3546 #endif
3547         args = map fst argsAndHints
3548         argReps = map cmmExprRep args
3549
3550         roundTo a x | x `mod` a == 0 = x
3551                     | otherwise = x + a - (x `mod` a)
3552
3553         move_sp_down finalStack
3554                | delta > 64 =
3555                         toOL [STU I32 sp (AddrRegImm sp (ImmInt (-delta))),
3556                               DELTA (-delta)]
3557                | otherwise = nilOL
3558                where delta = stackDelta finalStack
3559         move_sp_up finalStack
3560                | delta > 64 =
3561                         toOL [ADD sp sp (RIImm (ImmInt delta)),
3562                               DELTA 0]
3563                | otherwise = nilOL
3564                where delta = stackDelta finalStack
3565
3566
3567         passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
3568         passArguments ((arg,I64):args) gprs fprs stackOffset
3569                accumCode accumUsed =
3570             do
3571                 ChildCode64 code vr_lo <- iselExpr64 arg
3572                 let vr_hi = getHiVRegFromLo vr_lo
3573
3574 #if darwin_TARGET_OS
3575                 passArguments args
3576                               (drop 2 gprs)
3577                               fprs
3578                               (stackOffset+8)
3579                               (accumCode `appOL` code
3580                                     `snocOL` storeWord vr_hi gprs stackOffset
3581                                     `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
3582                               ((take 2 gprs) ++ accumUsed)
3583             where
3584                 storeWord vr (gpr:_) offset = MR gpr vr
3585                 storeWord vr [] offset = ST I32 vr (AddrRegImm sp (ImmInt offset))
3586
3587 #elif linux_TARGET_OS
3588                 let stackOffset' = roundTo 8 stackOffset
3589                     stackCode = accumCode `appOL` code
3590                         `snocOL` ST I32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
3591                         `snocOL` ST I32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
3592                     regCode hireg loreg =
3593                         accumCode `appOL` code
3594                             `snocOL` MR hireg vr_hi
3595                             `snocOL` MR loreg vr_lo
3596
3597                 case gprs of
3598                     hireg : loreg : regs | even (length gprs) ->
3599                         passArguments args regs fprs stackOffset
3600                                       (regCode hireg loreg) (hireg : loreg : accumUsed)
3601                     _skipped : hireg : loreg : regs ->
3602                         passArguments args regs fprs stackOffset
3603                                       (regCode hireg loreg) (hireg : loreg : accumUsed)
3604                     _ -> -- only one or no regs left
3605                         passArguments args [] fprs (stackOffset'+8)
3606                                       stackCode accumUsed
3607 #endif
3608
3609         passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
3610             | reg : _ <- regs = do
3611                 register <- getRegister arg
3612                 let code = case register of
3613                             Fixed _ freg fcode -> fcode `snocOL` MR reg freg
3614                             Any _ acode -> acode reg
3615                 passArguments args
3616                               (drop nGprs gprs)
3617                               (drop nFprs fprs)
3618 #if darwin_TARGET_OS
3619         -- The Darwin ABI requires that we reserve stack slots for register parameters
3620                               (stackOffset + stackBytes)
3621 #elif linux_TARGET_OS
3622         -- ... the SysV ABI doesn't.
3623                               stackOffset
3624 #endif
3625                               (accumCode `appOL` code)
3626                               (reg : accumUsed)
3627             | otherwise = do
3628                 (vr, code) <- getSomeReg arg
3629                 passArguments args
3630                               (drop nGprs gprs)
3631                               (drop nFprs fprs)
3632                               (stackOffset' + stackBytes)
3633                               (accumCode `appOL` code `snocOL` ST rep vr stackSlot)
3634                               accumUsed
3635             where
3636 #if darwin_TARGET_OS
3637         -- stackOffset is at least 4-byte aligned
3638         -- The Darwin ABI is happy with that.
3639                 stackOffset' = stackOffset
3640 #else
3641         -- ... the SysV ABI requires 8-byte alignment for doubles.
3642                 stackOffset' | rep == F64 = roundTo 8 stackOffset
3643                              | otherwise  =           stackOffset
3644 #endif
3645                 stackSlot = AddrRegImm sp (ImmInt stackOffset')
3646                 (nGprs, nFprs, stackBytes, regs) = case rep of
3647                     I32 -> (1, 0, 4, gprs)
3648 #if darwin_TARGET_OS
3649         -- The Darwin ABI requires that we skip a corresponding number of GPRs when
3650         -- we use the FPRs.
3651                     F32 -> (1, 1, 4, fprs)
3652                     F64 -> (2, 1, 8, fprs)
3653 #elif linux_TARGET_OS
3654         -- ... the SysV ABI doesn't.
3655                     F32 -> (0, 1, 4, fprs)
3656                     F64 -> (0, 1, 8, fprs)
3657 #endif
3658
3659         moveResult reduceToF32 =
3660             case dest_regs of
3661                 [] -> nilOL
3662                 [(dest, _hint)]
3663                     | reduceToF32 && rep == F32 -> unitOL (FRSP r_dest f1)
3664                     | rep == F32 || rep == F64 -> unitOL (MR r_dest f1)
3665                     | rep == I64 -> toOL [MR (getHiVRegFromLo r_dest) r3,
3666                                           MR r_dest r4]
3667                     | otherwise -> unitOL (MR r_dest r3)
3668                     where rep = cmmRegRep dest
3669                           r_dest = getRegisterReg dest
3670
3671         outOfLineFloatOp mop =
3672             do
3673                 mopExpr <- cmmMakeDynamicReference addImportNat True $
3674                               mkForeignLabel functionName Nothing True
3675                 let mopLabelOrExpr = case mopExpr of
3676                         CmmLit (CmmLabel lbl) -> Left lbl
3677                         _ -> Right mopExpr
3678                 return (mopLabelOrExpr, reduce)
3679             where
3680                 (functionName, reduce) = case mop of
3681                     MO_F32_Exp   -> (FSLIT("exp"), True)
3682                     MO_F32_Log   -> (FSLIT("log"), True)
3683                     MO_F32_Sqrt  -> (FSLIT("sqrt"), True)
3684
3685                     MO_F32_Sin   -> (FSLIT("sin"), True)
3686                     MO_F32_Cos   -> (FSLIT("cos"), True)
3687                     MO_F32_Tan   -> (FSLIT("tan"), True)
3688
3689                     MO_F32_Asin  -> (FSLIT("asin"), True)
3690                     MO_F32_Acos  -> (FSLIT("acos"), True)
3691                     MO_F32_Atan  -> (FSLIT("atan"), True)
3692
3693                     MO_F32_Sinh  -> (FSLIT("sinh"), True)
3694                     MO_F32_Cosh  -> (FSLIT("cosh"), True)
3695                     MO_F32_Tanh  -> (FSLIT("tanh"), True)
3696                     MO_F32_Pwr   -> (FSLIT("pow"), True)
3697
3698                     MO_F64_Exp   -> (FSLIT("exp"), False)
3699                     MO_F64_Log   -> (FSLIT("log"), False)
3700                     MO_F64_Sqrt  -> (FSLIT("sqrt"), False)
3701
3702                     MO_F64_Sin   -> (FSLIT("sin"), False)
3703                     MO_F64_Cos   -> (FSLIT("cos"), False)
3704                     MO_F64_Tan   -> (FSLIT("tan"), False)
3705
3706                     MO_F64_Asin  -> (FSLIT("asin"), False)
3707                     MO_F64_Acos  -> (FSLIT("acos"), False)
3708                     MO_F64_Atan  -> (FSLIT("atan"), False)
3709
3710                     MO_F64_Sinh  -> (FSLIT("sinh"), False)
3711                     MO_F64_Cosh  -> (FSLIT("cosh"), False)
3712                     MO_F64_Tanh  -> (FSLIT("tanh"), False)
3713                     MO_F64_Pwr   -> (FSLIT("pow"), False)
3714                     other -> pprPanic "genCCall(ppc): unknown callish op"
3715                                     (pprCallishMachOp other)
3716
3717 #endif /* darwin_TARGET_OS || linux_TARGET_OS */
3718
3719 #endif /* powerpc_TARGET_ARCH */
3720
3721
3722 -- -----------------------------------------------------------------------------
3723 -- Generating a table-branch
3724
3725 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
3726
3727 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3728 genSwitch expr ids
3729   | opt_PIC
3730   = do
3731         (reg,e_code) <- getSomeReg expr
3732         lbl <- getNewLabelNat
3733         dynRef <- cmmMakeDynamicReference addImportNat False lbl
3734         (tableReg,t_code) <- getSomeReg $ dynRef
3735         let
3736             jumpTable = map jumpTableEntryRel ids
3737
3738             jumpTableEntryRel Nothing
3739                 = CmmStaticLit (CmmInt 0 wordRep)
3740             jumpTableEntryRel (Just (BlockId id))
3741                 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3742                 where blockLabel = mkAsmTempLabel id
3743
3744             op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
3745                                        (EAIndex reg wORD_SIZE) (ImmInt 0))
3746
3747             code = e_code `appOL` t_code `appOL` toOL [
3748                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3749                             ADD wordRep op (OpReg tableReg),
3750                             JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
3751                     ]
3752         return code
3753   | otherwise
3754   = do
3755         (reg,e_code) <- getSomeReg expr
3756         lbl <- getNewLabelNat
3757         let
3758             jumpTable = map jumpTableEntry ids
3759             op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
3760             code = e_code `appOL` toOL [
3761                     LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3762                     JMP_TBL op [ id | Just id <- ids ]
3763                  ]
3764         -- in
3765         return code
3766 #elif powerpc_TARGET_ARCH
3767 genSwitch expr ids
3768   | opt_PIC
3769   = do
3770         (reg,e_code) <- getSomeReg expr
3771         tmp <- getNewRegNat I32
3772         lbl <- getNewLabelNat
3773         dynRef <- cmmMakeDynamicReference addImportNat False lbl
3774         (tableReg,t_code) <- getSomeReg $ dynRef
3775         let
3776             jumpTable = map jumpTableEntryRel ids
3777
3778             jumpTableEntryRel Nothing
3779                 = CmmStaticLit (CmmInt 0 wordRep)
3780             jumpTableEntryRel (Just (BlockId id))
3781                 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3782                 where blockLabel = mkAsmTempLabel id
3783
3784             code = e_code `appOL` t_code `appOL` toOL [
3785                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3786                             SLW tmp reg (RIImm (ImmInt 2)),
3787                             LD I32 tmp (AddrRegReg tableReg tmp),
3788                             ADD tmp tmp (RIReg tableReg),
3789                             MTCTR tmp,
3790                             BCTR [ id | Just id <- ids ]
3791                     ]
3792         return code
3793   | otherwise
3794   = do
3795         (reg,e_code) <- getSomeReg expr
3796         tmp <- getNewRegNat I32
3797         lbl <- getNewLabelNat
3798         let
3799             jumpTable = map jumpTableEntry ids
3800
3801             code = e_code `appOL` toOL [
3802                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3803                             SLW tmp reg (RIImm (ImmInt 2)),
3804                             ADDIS tmp tmp (HA (ImmCLbl lbl)),
3805                             LD I32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
3806                             MTCTR tmp,
3807                             BCTR [ id | Just id <- ids ]
3808                     ]
3809         return code
3810 #else
3811 genSwitch expr ids = panic "ToDo: genSwitch"
3812 #endif
3813
3814 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordRep)
3815 jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
3816     where blockLabel = mkAsmTempLabel id
3817
3818 -- -----------------------------------------------------------------------------
3819 -- Support bits
3820 -- -----------------------------------------------------------------------------
3821
3822
3823 -- -----------------------------------------------------------------------------
3824 -- 'condIntReg' and 'condFltReg': condition codes into registers
3825
3826 -- Turn those condition codes into integers now (when they appear on
3827 -- the right hand side of an assignment).
3828 --
3829 -- (If applicable) Do not fill the delay slots here; you will confuse the
3830 -- register allocator.
3831
3832 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
3833
3834 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3835
3836 #if alpha_TARGET_ARCH
3837 condIntReg = panic "MachCode.condIntReg (not on Alpha)"
3838 condFltReg = panic "MachCode.condFltReg (not on Alpha)"
3839 #endif /* alpha_TARGET_ARCH */
3840
3841 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3842
3843 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3844
3845 condIntReg cond x y = do
3846   CondCode _ cond cond_code <- condIntCode cond x y
3847   tmp <- getNewRegNat I8
3848   let
3849         code dst = cond_code `appOL` toOL [
3850                     SETCC cond (OpReg tmp),
3851                     MOVZxL I8 (OpReg tmp) (OpReg dst)
3852                   ]
3853   -- in
3854   return (Any I32 code)
3855
3856 #endif
3857
3858 #if i386_TARGET_ARCH
3859
3860 condFltReg cond x y = do
3861   CondCode _ cond cond_code <- condFltCode cond x y
3862   tmp <- getNewRegNat I8
3863   let
3864         code dst = cond_code `appOL` toOL [
3865                     SETCC cond (OpReg tmp),
3866                     MOVZxL I8 (OpReg tmp) (OpReg dst)
3867                   ]
3868   -- in
3869   return (Any I32 code)
3870
3871 #endif
3872
3873 #if x86_64_TARGET_ARCH
3874
3875 condFltReg cond x y = do
3876   CondCode _ cond cond_code <- condFltCode cond x y
3877   tmp1 <- getNewRegNat wordRep
3878   tmp2 <- getNewRegNat wordRep
3879   let
3880         -- We have to worry about unordered operands (eg. comparisons
3881         -- against NaN).  If the operands are unordered, the comparison
3882         -- sets the parity flag, carry flag and zero flag.
3883         -- All comparisons are supposed to return false for unordered
3884         -- operands except for !=, which returns true.
3885         --
3886         -- Optimisation: we don't have to test the parity flag if we
3887         -- know the test has already excluded the unordered case: eg >
3888         -- and >= test for a zero carry flag, which can only occur for
3889         -- ordered operands.
3890         --
3891         -- ToDo: by reversing comparisons we could avoid testing the
3892         -- parity flag in more cases.
3893
3894         code dst =
3895            cond_code `appOL`
3896              (case cond of
3897                 NE  -> or_unordered dst
3898                 GU  -> plain_test   dst
3899                 GEU -> plain_test   dst
3900                 _   -> and_ordered  dst)
3901
3902         plain_test dst = toOL [
3903                     SETCC cond (OpReg tmp1),
3904                     MOVZxL I8 (OpReg tmp1) (OpReg dst)
3905                  ]
3906         or_unordered dst = toOL [
3907                     SETCC cond (OpReg tmp1),
3908                     SETCC PARITY (OpReg tmp2),
3909                     OR I8 (OpReg tmp1) (OpReg tmp2),
3910                     MOVZxL I8 (OpReg tmp2) (OpReg dst)
3911                   ]
3912         and_ordered dst = toOL [
3913                     SETCC cond (OpReg tmp1),
3914                     SETCC NOTPARITY (OpReg tmp2),
3915                     AND I8 (OpReg tmp1) (OpReg tmp2),
3916                     MOVZxL I8 (OpReg tmp2) (OpReg dst)
3917                   ]
3918   -- in
3919   return (Any I32 code)
3920
3921 #endif
3922
3923 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3924
3925 #if sparc_TARGET_ARCH
3926
3927 condIntReg EQQ x (CmmLit (CmmInt 0 d)) = do
3928     (src, code) <- getSomeReg x
3929     tmp <- getNewRegNat I32
3930     let
3931         code__2 dst = code `appOL` toOL [
3932             SUB False True g0 (RIReg src) g0,
3933             SUB True False g0 (RIImm (ImmInt (-1))) dst]
3934     return (Any I32 code__2)
3935
3936 condIntReg EQQ x y = do
3937     (src1, code1) <- getSomeReg x
3938     (src2, code2) <- getSomeReg y
3939     tmp1 <- getNewRegNat I32
3940     tmp2 <- getNewRegNat I32
3941     let
3942         code__2 dst = code1 `appOL` code2 `appOL` toOL [
3943             XOR False src1 (RIReg src2) dst,
3944             SUB False True g0 (RIReg dst) g0,
3945             SUB True False g0 (RIImm (ImmInt (-1))) dst]
3946     return (Any I32 code__2)
3947
3948 condIntReg NE x (CmmLit (CmmInt 0 d)) = do
3949     (src, code) <- getSomeReg x
3950     tmp <- getNewRegNat I32
3951     let
3952         code__2 dst = code `appOL` toOL [
3953             SUB False True g0 (RIReg src) g0,
3954             ADD True False g0 (RIImm (ImmInt 0)) dst]
3955     return (Any I32 code__2)
3956
3957 condIntReg NE x y = do
3958     (src1, code1) <- getSomeReg x
3959     (src2, code2) <- getSomeReg y
3960     tmp1 <- getNewRegNat I32
3961     tmp2 <- getNewRegNat I32
3962     let
3963         code__2 dst = code1 `appOL` code2 `appOL` toOL [
3964             XOR False src1 (RIReg src2) dst,
3965             SUB False True g0 (RIReg dst) g0,
3966             ADD True False g0 (RIImm (ImmInt 0)) dst]
3967     return (Any I32 code__2)
3968
3969 condIntReg cond x y = do
3970     BlockId lbl1 <- getBlockIdNat
3971     BlockId lbl2 <- getBlockIdNat
3972     CondCode _ cond cond_code <- condIntCode cond x y
3973     let
3974         code__2 dst = cond_code `appOL` toOL [
3975             BI cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
3976             OR False g0 (RIImm (ImmInt 0)) dst,
3977             BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
3978             NEWBLOCK (BlockId lbl1),
3979             OR False g0 (RIImm (ImmInt 1)) dst,
3980             NEWBLOCK (BlockId lbl2)]
3981     return (Any I32 code__2)
3982
3983 condFltReg cond x y = do
3984     BlockId lbl1 <- getBlockIdNat
3985     BlockId lbl2 <- getBlockIdNat
3986     CondCode _ cond cond_code <- condFltCode cond x y
3987     let
3988         code__2 dst = cond_code `appOL` toOL [
3989             NOP,
3990             BF cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
3991             OR False g0 (RIImm (ImmInt 0)) dst,
3992             BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
3993             NEWBLOCK (BlockId lbl1),
3994             OR False g0 (RIImm (ImmInt 1)) dst,
3995             NEWBLOCK (BlockId lbl2)]
3996     return (Any I32 code__2)
3997
3998 #endif /* sparc_TARGET_ARCH */
3999
4000 #if powerpc_TARGET_ARCH
4001 condReg getCond = do
4002     lbl1 <- getBlockIdNat
4003     lbl2 <- getBlockIdNat
4004     CondCode _ cond cond_code <- getCond
4005     let
4006 {-        code dst = cond_code `appOL` toOL [
4007                 BCC cond lbl1,
4008                 LI dst (ImmInt 0),
4009                 BCC ALWAYS lbl2,
4010                 NEWBLOCK lbl1,
4011                 LI dst (ImmInt 1),
4012                 BCC ALWAYS lbl2,
4013                 NEWBLOCK lbl2
4014             ]-}
4015         code dst = cond_code
4016             `appOL` negate_code
4017             `appOL` toOL [
4018                 MFCR dst,
4019                 RLWINM dst dst (bit + 1) 31 31
4020             ]
4021
4022         negate_code | do_negate = unitOL (CRNOR bit bit bit)
4023                     | otherwise = nilOL
4024
4025         (bit, do_negate) = case cond of
4026             LTT -> (0, False)
4027             LE  -> (1, True)
4028             EQQ -> (2, False)
4029             GE  -> (0, True)
4030             GTT -> (1, False)
4031
4032             NE  -> (2, True)
4033
4034             LU  -> (0, False)
4035             LEU -> (1, True)
4036             GEU -> (0, True)
4037             GU  -> (1, False)
4038
4039     return (Any I32 code)
4040
4041 condIntReg cond x y = condReg (condIntCode cond x y)
4042 condFltReg cond x y = condReg (condFltCode cond x y)
4043 #endif /* powerpc_TARGET_ARCH */
4044
4045
4046 -- -----------------------------------------------------------------------------
4047 -- 'trivial*Code': deal with trivial instructions
4048
4049 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
4050 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
4051 -- Only look for constants on the right hand side, because that's
4052 -- where the generic optimizer will have put them.
4053
4054 -- Similarly, for unary instructions, we don't have to worry about
4055 -- matching an StInt as the argument, because genericOpt will already
4056 -- have handled the constant-folding.
4057
4058 trivialCode
4059     :: MachRep
4060     -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
4061       ,IF_ARCH_i386 ((Operand -> Operand -> Instr)
4062                      -> Maybe (Operand -> Operand -> Instr)
4063       ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr)
4064                      -> Maybe (Operand -> Operand -> Instr)
4065       ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
4066       ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
4067       ,)))))
4068     -> CmmExpr -> CmmExpr -- the two arguments
4069     -> NatM Register
4070
4071 #ifndef powerpc_TARGET_ARCH
4072 trivialFCode
4073     :: MachRep
4074     -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
4075       ,IF_ARCH_sparc((MachRep -> Reg -> Reg -> Reg -> Instr)
4076       ,IF_ARCH_i386 ((MachRep -> Reg -> Reg -> Reg -> Instr)
4077       ,IF_ARCH_x86_64 ((MachRep -> Operand -> Operand -> Instr)
4078       ,))))
4079     -> CmmExpr -> CmmExpr -- the two arguments
4080     -> NatM Register
4081 #endif
4082
4083 trivialUCode
4084     :: MachRep
4085     -> IF_ARCH_alpha((RI -> Reg -> Instr)
4086       ,IF_ARCH_i386 ((Operand -> Instr)
4087       ,IF_ARCH_x86_64 ((Operand -> Instr)
4088       ,IF_ARCH_sparc((RI -> Reg -> Instr)
4089       ,IF_ARCH_powerpc((Reg -> Reg -> Instr)
4090       ,)))))
4091     -> CmmExpr  -- the one argument
4092     -> NatM Register
4093
4094 #ifndef powerpc_TARGET_ARCH
4095 trivialUFCode
4096     :: MachRep
4097     -> IF_ARCH_alpha((Reg -> Reg -> Instr)
4098       ,IF_ARCH_i386 ((Reg -> Reg -> Instr)
4099       ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr)
4100       ,IF_ARCH_sparc((Reg -> Reg -> Instr)
4101       ,))))
4102     -> CmmExpr -- the one argument
4103     -> NatM Register
4104 #endif
4105
4106 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4107
4108 #if alpha_TARGET_ARCH
4109
4110 trivialCode instr x (StInt y)
4111   | fits8Bits y
4112   = getRegister x               `thenNat` \ register ->
4113     getNewRegNat IntRep         `thenNat` \ tmp ->
4114     let
4115         code = registerCode register tmp
4116         src1 = registerName register tmp
4117         src2 = ImmInt (fromInteger y)
4118         code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst)
4119     in
4120     return (Any IntRep code__2)
4121
4122 trivialCode instr x y
4123   = getRegister x               `thenNat` \ register1 ->
4124     getRegister y               `thenNat` \ register2 ->
4125     getNewRegNat IntRep         `thenNat` \ tmp1 ->
4126     getNewRegNat IntRep         `thenNat` \ tmp2 ->
4127     let
4128         code1 = registerCode register1 tmp1 []
4129         src1  = registerName register1 tmp1
4130         code2 = registerCode register2 tmp2 []
4131         src2  = registerName register2 tmp2
4132         code__2 dst = asmSeqThen [code1, code2] .
4133                      mkSeqInstr (instr src1 (RIReg src2) dst)
4134     in
4135     return (Any IntRep code__2)
4136
4137 ------------
4138 trivialUCode instr x
4139   = getRegister x               `thenNat` \ register ->
4140     getNewRegNat IntRep         `thenNat` \ tmp ->
4141     let
4142         code = registerCode register tmp
4143         src  = registerName register tmp
4144         code__2 dst = code . mkSeqInstr (instr (RIReg src) dst)
4145     in
4146     return (Any IntRep code__2)
4147
4148 ------------
4149 trivialFCode _ instr x y
4150   = getRegister x               `thenNat` \ register1 ->
4151     getRegister y               `thenNat` \ register2 ->
4152     getNewRegNat F64    `thenNat` \ tmp1 ->
4153     getNewRegNat F64    `thenNat` \ tmp2 ->
4154     let
4155         code1 = registerCode register1 tmp1
4156         src1  = registerName register1 tmp1
4157
4158         code2 = registerCode register2 tmp2
4159         src2  = registerName register2 tmp2
4160
4161         code__2 dst = asmSeqThen [code1 [], code2 []] .
4162                       mkSeqInstr (instr src1 src2 dst)
4163     in
4164     return (Any F64 code__2)
4165
4166 trivialUFCode _ instr x
4167   = getRegister x               `thenNat` \ register ->
4168     getNewRegNat F64    `thenNat` \ tmp ->
4169     let
4170         code = registerCode register tmp
4171         src  = registerName register tmp
4172         code__2 dst = code . mkSeqInstr (instr src dst)
4173     in
4174     return (Any F64 code__2)
4175
4176 #endif /* alpha_TARGET_ARCH */
4177
4178 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4179
4180 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4181
4182 {-
4183 The Rules of the Game are:
4184
4185 * You cannot assume anything about the destination register dst;
4186   it may be anything, including a fixed reg.
4187
4188 * You may compute an operand into a fixed reg, but you may not
4189   subsequently change the contents of that fixed reg.  If you
4190   want to do so, first copy the value either to a temporary
4191   or into dst.  You are free to modify dst even if it happens
4192   to be a fixed reg -- that's not your problem.
4193
4194 * You cannot assume that a fixed reg will stay live over an
4195   arbitrary computation.  The same applies to the dst reg.
4196
4197 * Temporary regs obtained from getNewRegNat are distinct from
4198   each other and from all other regs, and stay live over
4199   arbitrary computations.
4200
4201 --------------------
4202
4203 SDM's version of The Rules:
4204
4205 * If getRegister returns Any, that means it can generate correct
4206   code which places the result in any register, period.  Even if that
4207   register happens to be read during the computation.
4208
4209   Corollary #1: this means that if you are generating code for an
4210   operation with two arbitrary operands, you cannot assign the result
4211   of the first operand into the destination register before computing
4212   the second operand.  The second operand might require the old value
4213   of the destination register.
4214
4215   Corollary #2: A function might be able to generate more efficient
4216   code if it knows the destination register is a new temporary (and
4217   therefore not read by any of the sub-computations).
4218
4219 * If getRegister returns Any, then the code it generates may modify only:
4220         (a) fresh temporaries
4221         (b) the destination register
4222         (c) known registers (eg. %ecx is used by shifts)
4223   In particular, it may *not* modify global registers, unless the global
4224   register happens to be the destination register.
4225 -}
4226
4227 trivialCode rep instr (Just revinstr) (CmmLit lit_a) b
4228   | not (is64BitLit lit_a) = do
4229   b_code <- getAnyReg b
4230   let
4231        code dst
4232          = b_code dst `snocOL`
4233            revinstr (OpImm (litToImm lit_a)) (OpReg dst)
4234   -- in
4235   return (Any rep code)
4236
4237 trivialCode rep instr maybe_revinstr a b = genTrivialCode rep instr a b
4238
4239 -- This is re-used for floating pt instructions too.
4240 genTrivialCode rep instr a b = do
4241   (b_op, b_code) <- getNonClobberedOperand b
4242   a_code <- getAnyReg a
4243   tmp <- getNewRegNat rep
4244   let
4245      -- We want the value of b to stay alive across the computation of a.
4246      -- But, we want to calculate a straight into the destination register,
4247      -- because the instruction only has two operands (dst := dst `op` src).
4248      -- The troublesome case is when the result of b is in the same register
4249      -- as the destination reg.  In this case, we have to save b in a
4250      -- new temporary across the computation of a.
4251      code dst
4252         | dst `regClashesWithOp` b_op =
4253                 b_code `appOL`
4254                 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
4255                 a_code dst `snocOL`
4256                 instr (OpReg tmp) (OpReg dst)
4257         | otherwise =
4258                 b_code `appOL`
4259                 a_code dst `snocOL`
4260                 instr b_op (OpReg dst)
4261   -- in
4262   return (Any rep code)
4263
4264 reg `regClashesWithOp` OpReg reg2   = reg == reg2
4265 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
4266 reg `regClashesWithOp` _            = False
4267
4268 -----------
4269
4270 trivialUCode rep instr x = do
4271   x_code <- getAnyReg x
4272   let
4273      code dst =
4274         x_code dst `snocOL`
4275         instr (OpReg dst)
4276   -- in
4277   return (Any rep code)
4278
4279 -----------
4280
4281 #if i386_TARGET_ARCH
4282
4283 trivialFCode pk instr x y = do
4284   (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
4285   (y_reg, y_code) <- getSomeReg y
4286   let
4287      code dst =
4288         x_code `appOL`
4289         y_code `snocOL`
4290         instr pk x_reg y_reg dst
4291   -- in
4292   return (Any pk code)
4293
4294 #endif
4295
4296 #if x86_64_TARGET_ARCH
4297
4298 trivialFCode pk instr x y = genTrivialCode  pk (instr pk) x y
4299
4300 #endif
4301
4302 -------------
4303
4304 trivialUFCode rep instr x = do
4305   (x_reg, x_code) <- getSomeReg x
4306   let
4307      code dst =
4308         x_code `snocOL`
4309         instr x_reg dst
4310   -- in
4311   return (Any rep code)
4312
4313 #endif /* i386_TARGET_ARCH */
4314
4315 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4316
4317 #if sparc_TARGET_ARCH
4318
4319 trivialCode pk instr x (CmmLit (CmmInt y d))
4320   | fits13Bits y
4321   = do
4322       (src1, code) <- getSomeReg x
4323       tmp <- getNewRegNat I32
4324       let
4325         src2 = ImmInt (fromInteger y)
4326         code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
4327       return (Any I32 code__2)
4328
4329 trivialCode pk instr x y = do
4330     (src1, code1) <- getSomeReg x
4331     (src2, code2) <- getSomeReg y
4332     tmp1 <- getNewRegNat I32
4333     tmp2 <- getNewRegNat I32
4334     let
4335         code__2 dst = code1 `appOL` code2 `snocOL`
4336                       instr src1 (RIReg src2) dst
4337     return (Any I32 code__2)
4338
4339 ------------
4340 trivialFCode pk instr x y = do
4341     (src1, code1) <- getSomeReg x
4342     (src2, code2) <- getSomeReg y
4343     tmp1 <- getNewRegNat (cmmExprRep x)
4344     tmp2 <- getNewRegNat (cmmExprRep y)
4345     tmp <- getNewRegNat F64
4346     let
4347         promote x = FxTOy F32 F64 x tmp
4348
4349         pk1   = cmmExprRep x
4350         pk2   = cmmExprRep y
4351
4352         code__2 dst =
4353                 if pk1 == pk2 then
4354                     code1 `appOL` code2 `snocOL`
4355                     instr pk src1 src2 dst
4356                 else if pk1 == F32 then
4357                     code1 `snocOL` promote src1 `appOL` code2 `snocOL`
4358                     instr F64 tmp src2 dst
4359                 else
4360                     code1 `appOL` code2 `snocOL` promote src2 `snocOL`
4361                     instr F64 src1 tmp dst
4362     return (Any (if pk1 == pk2 then pk1 else F64) code__2)
4363
4364 ------------
4365 trivialUCode pk instr x = do
4366     (src, code) <- getSomeReg x
4367     tmp <- getNewRegNat pk
4368     let
4369         code__2 dst = code `snocOL` instr (RIReg src) dst
4370     return (Any pk code__2)
4371
4372 -------------
4373 trivialUFCode pk instr x = do
4374     (src, code) <- getSomeReg x
4375     tmp <- getNewRegNat pk
4376     let
4377         code__2 dst = code `snocOL` instr src dst
4378     return (Any pk code__2)
4379
4380 #endif /* sparc_TARGET_ARCH */
4381
4382 #if powerpc_TARGET_ARCH
4383
4384 {-
4385 Wolfgang's PowerPC version of The Rules:
4386
4387 A slightly modified version of The Rules to take advantage of the fact
4388 that PowerPC instructions work on all registers and don't implicitly
4389 clobber any fixed registers.
4390
4391 * The only expression for which getRegister returns Fixed is (CmmReg reg).
4392
4393 * If getRegister returns Any, then the code it generates may modify only:
4394         (a) fresh temporaries
4395         (b) the destination register
4396   It may *not* modify global registers, unless the global
4397   register happens to be the destination register.
4398   It may not clobber any other registers. In fact, only ccalls clobber any
4399   fixed registers.
4400   Also, it may not modify the counter register (used by genCCall).
4401
4402   Corollary: If a getRegister for a subexpression returns Fixed, you need
4403   not move it to a fresh temporary before evaluating the next subexpression.
4404   The Fixed register won't be modified.
4405   Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
4406
4407 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
4408   the value of the destination register.
4409 -}
4410
4411 trivialCode rep signed instr x (CmmLit (CmmInt y _))
4412     | Just imm <- makeImmediate rep signed y
4413     = do
4414         (src1, code1) <- getSomeReg x
4415         let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
4416         return (Any rep code)
4417
4418 trivialCode rep signed instr x y = do
4419     (src1, code1) <- getSomeReg x
4420     (src2, code2) <- getSomeReg y
4421     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
4422     return (Any rep code)
4423
4424 trivialCodeNoImm :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4425     -> CmmExpr -> CmmExpr -> NatM Register
4426 trivialCodeNoImm rep instr x y = do
4427     (src1, code1) <- getSomeReg x
4428     (src2, code2) <- getSomeReg y
4429     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
4430     return (Any rep code)
4431
4432 trivialUCode rep instr x = do
4433     (src, code) <- getSomeReg x
4434     let code' dst = code `snocOL` instr dst src
4435     return (Any rep code')
4436
4437 -- There is no "remainder" instruction on the PPC, so we have to do
4438 -- it the hard way.
4439 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
4440
4441 remainderCode :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4442     -> CmmExpr -> CmmExpr -> NatM Register
4443 remainderCode rep div x y = do
4444     (src1, code1) <- getSomeReg x
4445     (src2, code2) <- getSomeReg y
4446     let code dst = code1 `appOL` code2 `appOL` toOL [
4447                 div dst src1 src2,
4448                 MULLW dst dst (RIReg src2),
4449                 SUBF dst dst src1
4450             ]
4451     return (Any rep code)
4452
4453 #endif /* powerpc_TARGET_ARCH */
4454
4455
4456 -- -----------------------------------------------------------------------------
4457 --  Coercing to/from integer/floating-point...
4458
4459 -- @coerce(Int2FP|FP2Int)@ are more complicated integer/float
4460 -- conversions.  We have to store temporaries in memory to move
4461 -- between the integer and the floating point register sets.
4462
4463 -- @coerceDbl2Flt@ and @coerceFlt2Dbl@ are done this way because we
4464 -- pretend, on sparc at least, that double and float regs are seperate
4465 -- kinds, so the value has to be computed into one kind before being
4466 -- explicitly "converted" to live in the other kind.
4467
4468 coerceInt2FP :: MachRep -> MachRep -> CmmExpr -> NatM Register
4469 coerceFP2Int :: MachRep -> MachRep -> CmmExpr -> NatM Register
4470
4471 #if sparc_TARGET_ARCH
4472 coerceDbl2Flt :: CmmExpr -> NatM Register
4473 coerceFlt2Dbl :: CmmExpr -> NatM Register
4474 #endif
4475
4476 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4477
4478 #if alpha_TARGET_ARCH
4479
4480 coerceInt2FP _ x
4481   = getRegister x               `thenNat` \ register ->
4482     getNewRegNat IntRep         `thenNat` \ reg ->
4483     let
4484         code = registerCode register reg
4485         src  = registerName register reg
4486
4487         code__2 dst = code . mkSeqInstrs [
4488             ST Q src (spRel 0),
4489             LD TF dst (spRel 0),
4490             CVTxy Q TF dst dst]
4491     in
4492     return (Any F64 code__2)
4493
4494 -------------
4495 coerceFP2Int x
4496   = getRegister x               `thenNat` \ register ->
4497     getNewRegNat F64    `thenNat` \ tmp ->
4498     let
4499         code = registerCode register tmp
4500         src  = registerName register tmp
4501
4502         code__2 dst = code . mkSeqInstrs [
4503             CVTxy TF Q src tmp,
4504             ST TF tmp (spRel 0),
4505             LD Q dst (spRel 0)]
4506     in
4507     return (Any IntRep code__2)
4508
4509 #endif /* alpha_TARGET_ARCH */
4510
4511 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4512
4513 #if i386_TARGET_ARCH
4514
4515 coerceInt2FP from to x = do
4516   (x_reg, x_code) <- getSomeReg x
4517   let
4518         opc  = case to of F32 -> GITOF; F64 -> GITOD
4519         code dst = x_code `snocOL` opc x_reg dst
4520         -- ToDo: works for non-I32 reps?
4521   -- in
4522   return (Any to code)
4523
4524 ------------
4525
4526 coerceFP2Int from to x = do
4527   (x_reg, x_code) <- getSomeReg x
4528   let
4529         opc  = case from of F32 -> GFTOI; F64 -> GDTOI
4530         code dst = x_code `snocOL` opc x_reg dst
4531         -- ToDo: works for non-I32 reps?
4532   -- in
4533   return (Any to code)
4534
4535 #endif /* i386_TARGET_ARCH */
4536
4537 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4538
4539 #if x86_64_TARGET_ARCH
4540
4541 coerceFP2Int from to x = do
4542   (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
4543   let
4544         opc  = case from of F32 -> CVTSS2SI; F64 -> CVTSD2SI
4545         code dst = x_code `snocOL` opc x_op dst
4546   -- in
4547   return (Any to code) -- works even if the destination rep is <I32
4548
4549 coerceInt2FP from to x = do
4550   (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
4551   let
4552         opc  = case to of F32 -> CVTSI2SS; F64 -> CVTSI2SD
4553         code dst = x_code `snocOL` opc x_op dst
4554   -- in
4555   return (Any to code) -- works even if the destination rep is <I32
4556
4557 coerceFP2FP :: MachRep -> CmmExpr -> NatM Register
4558 coerceFP2FP to x = do
4559   (x_reg, x_code) <- getSomeReg x
4560   let
4561         opc  = case to of F32 -> CVTSD2SS; F64 -> CVTSS2SD
4562         code dst = x_code `snocOL` opc x_reg dst
4563   -- in
4564   return (Any to code)
4565
4566 #endif
4567
4568 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4569
4570 #if sparc_TARGET_ARCH
4571
4572 coerceInt2FP pk1 pk2 x = do
4573     (src, code) <- getSomeReg x
4574     let
4575         code__2 dst = code `appOL` toOL [
4576             ST pk1 src (spRel (-2)),
4577             LD pk1 (spRel (-2)) dst,
4578             FxTOy pk1 pk2 dst dst]
4579     return (Any pk2 code__2)
4580
4581 ------------
4582 coerceFP2Int pk fprep x = do
4583     (src, code) <- getSomeReg x
4584     reg <- getNewRegNat fprep
4585     tmp <- getNewRegNat pk
4586     let
4587         code__2 dst = ASSERT(fprep == F64 || fprep == F32)
4588             code `appOL` toOL [
4589             FxTOy fprep pk src tmp,
4590             ST pk tmp (spRel (-2)),
4591             LD pk (spRel (-2)) dst]
4592     return (Any pk code__2)
4593
4594 ------------
4595 coerceDbl2Flt x = do
4596     (src, code) <- getSomeReg x
4597     return (Any F32 (\dst -> code `snocOL` FxTOy F64 F32 src dst))
4598
4599 ------------
4600 coerceFlt2Dbl x = do
4601     (src, code) <- getSomeReg x
4602     return (Any F64 (\dst -> code `snocOL` FxTOy F32 F64 src dst))
4603
4604 #endif /* sparc_TARGET_ARCH */
4605
4606 #if powerpc_TARGET_ARCH
4607 coerceInt2FP fromRep toRep x = do
4608     (src, code) <- getSomeReg x
4609     lbl <- getNewLabelNat
4610     itmp <- getNewRegNat I32
4611     ftmp <- getNewRegNat F64
4612     dynRef <- cmmMakeDynamicReference addImportNat False lbl
4613     Amode addr addr_code <- getAmode dynRef
4614     let
4615         code' dst = code `appOL` maybe_exts `appOL` toOL [
4616                 LDATA ReadOnlyData
4617                                 [CmmDataLabel lbl,
4618                                  CmmStaticLit (CmmInt 0x43300000 I32),
4619                                  CmmStaticLit (CmmInt 0x80000000 I32)],
4620                 XORIS itmp src (ImmInt 0x8000),
4621                 ST I32 itmp (spRel 3),
4622                 LIS itmp (ImmInt 0x4330),
4623                 ST I32 itmp (spRel 2),
4624                 LD F64 ftmp (spRel 2)
4625             ] `appOL` addr_code `appOL` toOL [
4626                 LD F64 dst addr,
4627                 FSUB F64 dst ftmp dst
4628             ] `appOL` maybe_frsp dst
4629
4630         maybe_exts = case fromRep of
4631                         I8 ->  unitOL $ EXTS I8 src src
4632                         I16 -> unitOL $ EXTS I16 src src
4633                         I32 -> nilOL
4634         maybe_frsp dst = case toRep of
4635                         F32 -> unitOL $ FRSP dst dst
4636                         F64 -> nilOL
4637     return (Any toRep code')
4638
4639 coerceFP2Int fromRep toRep x = do
4640     -- the reps don't really matter: F*->F64 and I32->I* are no-ops
4641     (src, code) <- getSomeReg x
4642     tmp <- getNewRegNat F64
4643     let
4644         code' dst = code `appOL` toOL [
4645                 -- convert to int in FP reg
4646             FCTIWZ tmp src,
4647                 -- store value (64bit) from FP to stack
4648             ST F64 tmp (spRel 2),
4649                 -- read low word of value (high word is undefined)
4650             LD I32 dst (spRel 3)]
4651     return (Any toRep code')
4652 #endif /* powerpc_TARGET_ARCH */
4653
4654
4655 -- -----------------------------------------------------------------------------
4656 -- eXTRA_STK_ARGS_HERE
4657
4658 -- We (allegedly) put the first six C-call arguments in registers;
4659 -- where do we start putting the rest of them?
4660
4661 -- Moved from MachInstrs (SDM):
4662
4663 #if alpha_TARGET_ARCH || sparc_TARGET_ARCH
4664 eXTRA_STK_ARGS_HERE :: Int
4665 eXTRA_STK_ARGS_HERE
4666   = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???))
4667 #endif
4668