compiler/nativeGen/MachCodeGen.hs

   1 -----------------------------------------------------------------------------
   2 --
   3 -- Generating machine code (instruction selection)
   4 --
   5 -- (c) The University of Glasgow 1996-2004
   6 --
   7 -----------------------------------------------------------------------------
   8
   9 -- This is a big module, but, if you pay attention to
  10 -- (a) the sectioning, (b) the type signatures, and
  11 -- (c) the #if blah_TARGET_ARCH} things, the
  12 -- structure should not be too overwhelming.
  13
  14 module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where
  15
  16 #include "HsVersions.h"
  17 #include "nativeGen/NCG.h"
  18 #include "MachDeps.h"
  19
  20 -- NCG stuff:
  21 import MachInstrs
  22 import MachRegs
  23 import NCGMonad
  24 import PositionIndependentCode
  25 import RegAllocInfo ( mkBranchInstr )
  26
  27 -- Our intermediate code:
  28 import PprCmm           ( pprExpr )
  29 import Cmm
  30 import MachOp
  31 import CLabel
  32
  33 -- The rest:
  34 import StaticFlags      ( opt_PIC )
  35 import ForeignCall      ( CCallConv(..) )
  36 import OrdList
  37 import Pretty
  38 import Outputable
  39 import FastString
  40 import FastTypes        ( isFastTrue )
  41 import Constants        ( wORD_SIZE )
  42
  43 #ifdef DEBUG
  44 import Outputable       ( assertPanic )
  45 import Debug.Trace      ( trace )
  46 #endif
  47
  48 import Control.Monad    ( mapAndUnzipM )
  49 import Data.Maybe       ( fromJust )
  50 import Data.Bits
  51 import Data.Word
  52
  53 -- -----------------------------------------------------------------------------
  54 -- Top-level of the instruction selector
  55
  56 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
  57 -- They are really trees of insns to facilitate fast appending, where a
  58 -- left-to-right traversal (pre-order?) yields the insns in the correct
  59 -- order.
  60
  61 type InstrBlock = OrdList Instr
  62
  63 cmmTopCodeGen :: CmmTop -> NatM [NatCmmTop]
  64 cmmTopCodeGen (CmmProc info lab params blocks) = do
  65   (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
  66   picBaseMb <- getPicBaseMaybeNat
  67   let proc = CmmProc info lab params (concat nat_blocks)
  68       tops = proc : concat statics
  69   case picBaseMb of
  70       Just picBase -> initializePicBase picBase tops
  71       Nothing -> return tops
  72
  73 cmmTopCodeGen (CmmData sec dat) = do
  74   return [CmmData sec dat]  -- no translation, we just use CmmStatic
  75
  76 basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop])
  77 basicBlockCodeGen (BasicBlock id stmts) = do
  78   instrs <- stmtsToInstrs stmts
  79   -- code generation may introduce new basic block boundaries, which
  80   -- are indicated by the NEWBLOCK instruction.  We must split up the
  81   -- instruction stream into basic blocks again.  Also, we extract
  82   -- LDATAs here too.
  83   let
  84         (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
  85
  86         mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
  87           = ([], BasicBlock id instrs : blocks, statics)
  88         mkBlocks (LDATA sec dat) (instrs,blocks,statics)
  89           = (instrs, blocks, CmmData sec dat:statics)
  90         mkBlocks instr (instrs,blocks,statics)
  91           = (instr:instrs, blocks, statics)
  92   -- in
  93   return (BasicBlock id top : other_blocks, statics)
  94
  95 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
  96 stmtsToInstrs stmts
  97    = do instrss <- mapM stmtToInstrs stmts
  98         return (concatOL instrss)
  99
 100 stmtToInstrs :: CmmStmt -> NatM InstrBlock
 101 stmtToInstrs stmt = case stmt of
 102     CmmNop         -> return nilOL
 103     CmmComment s   -> return (unitOL (COMMENT s))
 104
 105     CmmAssign reg src
 106       | isFloatingRep kind -> assignReg_FltCode kind reg src
 107 #if WORD_SIZE_IN_BITS==32
 108       | kind == I64        -> assignReg_I64Code      reg src
 109 #endif
 110       | otherwise          -> assignReg_IntCode kind reg src
 111         where kind = cmmRegRep reg
 112
 113     CmmStore addr src
 114       | isFloatingRep kind -> assignMem_FltCode kind addr src
 115 #if WORD_SIZE_IN_BITS==32
 116       | kind == I64      -> assignMem_I64Code      addr src
 117 #endif
 118       | otherwise        -> assignMem_IntCode kind addr src
 119         where kind = cmmExprRep src
 120
 121     CmmCall target result_regs args vols
 122        -> genCCall target result_regs args vols
 123
 124     CmmBranch id          -> genBranch id
 125     CmmCondBranch arg id  -> genCondJump id arg
 126     CmmSwitch arg ids     -> genSwitch arg ids
 127     CmmJump arg params    -> genJump arg
 128
 129 -- -----------------------------------------------------------------------------
 130 -- General things for putting together code sequences
 131
 132 -- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
 133 -- CmmExprs into CmmRegOff?
 134 mangleIndexTree :: CmmExpr -> CmmExpr
 135 mangleIndexTree (CmmRegOff reg off)
 136   = CmmMachOp (MO_Add rep) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) rep)]
 137   where rep = cmmRegRep reg
 138
 139 -- -----------------------------------------------------------------------------
 140 --  Code gen for 64-bit arithmetic on 32-bit platforms
 141
 142 {-
 143 Simple support for generating 64-bit code (ie, 64 bit values and 64
 144 bit assignments) on 32-bit platforms.  Unlike the main code generator
 145 we merely shoot for generating working code as simply as possible, and
 146 pay little attention to code quality.  Specifically, there is no
 147 attempt to deal cleverly with the fixed-vs-floating register
 148 distinction; all values are generated into (pairs of) floating
 149 registers, even if this would mean some redundant reg-reg moves as a
 150 result.  Only one of the VRegUniques is returned, since it will be
 151 of the VRegUniqueLo form, and the upper-half VReg can be determined
 152 by applying getHiVRegFromLo to it.
 153 -}
 154
 155 data ChildCode64        -- a.k.a "Register64"
 156    = ChildCode64
 157         InstrBlock      -- code
 158         Reg             -- the lower 32-bit temporary which contains the
 159                         -- result; use getHiVRegFromLo to find the other
 160                         -- VRegUnique.  Rules of this simplified insn
 161                         -- selection game are therefore that the returned
 162                         -- Reg may be modified
 163
 164 #if WORD_SIZE_IN_BITS==32
 165 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
 166 assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
 167 #endif
 168
 169 #ifndef x86_64_TARGET_ARCH
 170 iselExpr64        :: CmmExpr -> NatM ChildCode64
 171 #endif
 172
 173 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 174
 175 #if i386_TARGET_ARCH
 176
 177 assignMem_I64Code addrTree valueTree = do
 178   Amode addr addr_code <- getAmode addrTree
 179   ChildCode64 vcode rlo <- iselExpr64 valueTree
 180   let
 181         rhi = getHiVRegFromLo rlo
 182
 183         -- Little-endian store
 184         mov_lo = MOV I32 (OpReg rlo) (OpAddr addr)
 185         mov_hi = MOV I32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
 186   -- in
 187   return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 188
 189
 190 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
 191    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 192    let
 193          r_dst_lo = mkVReg u_dst I32
 194          r_dst_hi = getHiVRegFromLo r_dst_lo
 195          r_src_hi = getHiVRegFromLo r_src_lo
 196          mov_lo = MOV I32 (OpReg r_src_lo) (OpReg r_dst_lo)
 197          mov_hi = MOV I32 (OpReg r_src_hi) (OpReg r_dst_hi)
 198    -- in
 199    return (
 200         vcode `snocOL` mov_lo `snocOL` mov_hi
 201      )
 202
 203 assignReg_I64Code lvalue valueTree
 204    = panic "assignReg_I64Code(i386): invalid lvalue"
 205
 206 ------------
 207
 208 iselExpr64 (CmmLit (CmmInt i _)) = do
 209   (rlo,rhi) <- getNewRegPairNat I32
 210   let
 211         r = fromIntegral (fromIntegral i :: Word32)
 212         q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
 213         code = toOL [
 214                 MOV I32 (OpImm (ImmInteger r)) (OpReg rlo),
 215                 MOV I32 (OpImm (ImmInteger q)) (OpReg rhi)
 216                 ]
 217   -- in
 218   return (ChildCode64 code rlo)
 219
 220 iselExpr64 (CmmLoad addrTree I64) = do
 221    Amode addr addr_code <- getAmode addrTree
 222    (rlo,rhi) <- getNewRegPairNat I32
 223    let
 224         mov_lo = MOV I32 (OpAddr addr) (OpReg rlo)
 225         mov_hi = MOV I32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
 226    -- in
 227    return (
 228             ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
 229                         rlo
 230      )
 231
 232 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64)))
 233    = return (ChildCode64 nilOL (mkVReg vu I32))
 234
 235 -- we handle addition, but rather badly
 236 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
 237    ChildCode64 code1 r1lo <- iselExpr64 e1
 238    (rlo,rhi) <- getNewRegPairNat I32
 239    let
 240         r = fromIntegral (fromIntegral i :: Word32)
 241         q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
 242         r1hi = getHiVRegFromLo r1lo
 243         code =  code1 `appOL`
 244                 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
 245                        ADD I32 (OpImm (ImmInteger r)) (OpReg rlo),
 246                        MOV I32 (OpReg r1hi) (OpReg rhi),
 247                        ADC I32 (OpImm (ImmInteger q)) (OpReg rhi) ]
 248    -- in
 249    return (ChildCode64 code rlo)
 250
 251 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
 252    ChildCode64 code1 r1lo <- iselExpr64 e1
 253    ChildCode64 code2 r2lo <- iselExpr64 e2
 254    (rlo,rhi) <- getNewRegPairNat I32
 255    let
 256         r1hi = getHiVRegFromLo r1lo
 257         r2hi = getHiVRegFromLo r2lo
 258         code =  code1 `appOL`
 259                 code2 `appOL`
 260                 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
 261                        ADD I32 (OpReg r2lo) (OpReg rlo),
 262                        MOV I32 (OpReg r1hi) (OpReg rhi),
 263                        ADC I32 (OpReg r2hi) (OpReg rhi) ]
 264    -- in
 265    return (ChildCode64 code rlo)
 266
 267 iselExpr64 expr
 268    = pprPanic "iselExpr64(i386)" (ppr expr)
 269
 270 #endif /* i386_TARGET_ARCH */
 271
 272 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 273
 274 #if sparc_TARGET_ARCH
 275
 276 assignMem_I64Code addrTree valueTree = do
 277      Amode addr addr_code <- getAmode addrTree
 278      ChildCode64 vcode rlo <- iselExpr64 valueTree
 279      (src, code) <- getSomeReg addrTree
 280      let
 281          rhi = getHiVRegFromLo rlo
 282          -- Big-endian store
 283          mov_hi = ST I32 rhi (AddrRegImm src (ImmInt 0))
 284          mov_lo = ST I32 rlo (AddrRegImm src (ImmInt 4))
 285      return (vcode `appOL` code `snocOL` mov_hi `snocOL` mov_lo)
 286
 287 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
 288      ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 289      let
 290          r_dst_lo = mkVReg u_dst pk
 291          r_dst_hi = getHiVRegFromLo r_dst_lo
 292          r_src_hi = getHiVRegFromLo r_src_lo
 293          mov_lo = mkMOV r_src_lo r_dst_lo
 294          mov_hi = mkMOV r_src_hi r_dst_hi
 295          mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
 296      return (vcode `snocOL` mov_hi `snocOL` mov_lo)
 297 assignReg_I64Code lvalue valueTree
 298    = panic "assignReg_I64Code(sparc): invalid lvalue"
 299
 300
 301 -- Don't delete this -- it's very handy for debugging.
 302 --iselExpr64 expr
 303 --   | trace ("iselExpr64: " ++ showSDoc (ppr expr)) False
 304 --   = panic "iselExpr64(???)"
 305
 306 iselExpr64 (CmmLoad addrTree I64) = do
 307      Amode (AddrRegReg r1 r2) addr_code <- getAmode addrTree
 308      rlo <- getNewRegNat I32
 309      let rhi = getHiVRegFromLo rlo
 310          mov_hi = LD I32 (AddrRegImm r1 (ImmInt 0)) rhi
 311          mov_lo = LD I32 (AddrRegImm r1 (ImmInt 4)) rlo
 312      return (
 313             ChildCode64 (addr_code `snocOL` mov_hi `snocOL` mov_lo)
 314                          rlo
 315           )
 316
 317 iselExpr64 (CmmReg (CmmLocal (LocalReg uq I64))) = do
 318      r_dst_lo <-  getNewRegNat I32
 319      let r_dst_hi = getHiVRegFromLo r_dst_lo
 320          r_src_lo = mkVReg uq I32
 321          r_src_hi = getHiVRegFromLo r_src_lo
 322          mov_lo = mkMOV r_src_lo r_dst_lo
 323          mov_hi = mkMOV r_src_hi r_dst_hi
 324          mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
 325      return (
 326             ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo
 327          )
 328
 329 iselExpr64 expr
 330    = pprPanic "iselExpr64(sparc)" (ppr expr)
 331
 332 #endif /* sparc_TARGET_ARCH */
 333
 334 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 335
 336 #if powerpc_TARGET_ARCH
 337
 338 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
 339 getI64Amodes addrTree = do
 340     Amode hi_addr addr_code <- getAmode addrTree
 341     case addrOffset hi_addr 4 of
 342         Just lo_addr -> return (hi_addr, lo_addr, addr_code)
 343         Nothing      -> do (hi_ptr, code) <- getSomeReg addrTree
 344                            return (AddrRegImm hi_ptr (ImmInt 0),
 345                                    AddrRegImm hi_ptr (ImmInt 4),
 346                                    code)
 347
 348 assignMem_I64Code addrTree valueTree = do
 349         (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
 350         ChildCode64 vcode rlo <- iselExpr64 valueTree
 351         let
 352                 rhi = getHiVRegFromLo rlo
 353
 354                 -- Big-endian store
 355                 mov_hi = ST I32 rhi hi_addr
 356                 mov_lo = ST I32 rlo lo_addr
 357         -- in
 358         return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 359
 360 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
 361    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 362    let
 363          r_dst_lo = mkVReg u_dst I32
 364          r_dst_hi = getHiVRegFromLo r_dst_lo
 365          r_src_hi = getHiVRegFromLo r_src_lo
 366          mov_lo = MR r_dst_lo r_src_lo
 367          mov_hi = MR r_dst_hi r_src_hi
 368    -- in
 369    return (
 370         vcode `snocOL` mov_lo `snocOL` mov_hi
 371      )
 372
 373 assignReg_I64Code lvalue valueTree
 374    = panic "assignReg_I64Code(powerpc): invalid lvalue"
 375
 376
 377 -- Don't delete this -- it's very handy for debugging.
 378 --iselExpr64 expr
 379 --   | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
 380 --   = panic "iselExpr64(???)"
 381
 382 iselExpr64 (CmmLoad addrTree I64) = do
 383     (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
 384     (rlo, rhi) <- getNewRegPairNat I32
 385     let mov_hi = LD I32 rhi hi_addr
 386         mov_lo = LD I32 rlo lo_addr
 387     return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
 388                          rlo
 389
 390 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64)))
 391    = return (ChildCode64 nilOL (mkVReg vu I32))
 392
 393 iselExpr64 (CmmLit (CmmInt i _)) = do
 394   (rlo,rhi) <- getNewRegPairNat I32
 395   let
 396         half0 = fromIntegral (fromIntegral i :: Word16)
 397         half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
 398         half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
 399         half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
 400
 401         code = toOL [
 402                 LIS rlo (ImmInt half1),
 403                 OR rlo rlo (RIImm $ ImmInt half0),
 404                 LIS rhi (ImmInt half3),
 405                 OR rlo rlo (RIImm $ ImmInt half2)
 406                 ]
 407   -- in
 408   return (ChildCode64 code rlo)
 409
 410 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
 411    ChildCode64 code1 r1lo <- iselExpr64 e1
 412    ChildCode64 code2 r2lo <- iselExpr64 e2
 413    (rlo,rhi) <- getNewRegPairNat I32
 414    let
 415         r1hi = getHiVRegFromLo r1lo
 416         r2hi = getHiVRegFromLo r2lo
 417         code =  code1 `appOL`
 418                 code2 `appOL`
 419                 toOL [ ADDC rlo r1lo r2lo,
 420                        ADDE rhi r1hi r2hi ]
 421    -- in
 422    return (ChildCode64 code rlo)
 423
 424 iselExpr64 expr
 425    = pprPanic "iselExpr64(powerpc)" (ppr expr)
 426
 427 #endif /* powerpc_TARGET_ARCH */
 428
 429
 430 -- -----------------------------------------------------------------------------
 431 -- The 'Register' type
 432
 433 -- 'Register's passed up the tree.  If the stix code forces the register
 434 -- to live in a pre-decided machine register, it comes out as @Fixed@;
 435 -- otherwise, it comes out as @Any@, and the parent can decide which
 436 -- register to put it in.
 437
 438 data Register
 439   = Fixed   MachRep Reg InstrBlock
 440   | Any     MachRep (Reg -> InstrBlock)
 441
 442 swizzleRegisterRep :: Register -> MachRep -> Register
 443 swizzleRegisterRep (Fixed _ reg code) rep = Fixed rep reg code
 444 swizzleRegisterRep (Any _ codefn)     rep = Any rep codefn
 445
 446
 447 -- -----------------------------------------------------------------------------
 448 -- Utils based on getRegister, below
 449
 450 -- The dual to getAnyReg: compute an expression into a register, but
 451 -- we don't mind which one it is.
 452 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
 453 getSomeReg expr = do
 454   r <- getRegister expr
 455   case r of
 456     Any rep code -> do
 457         tmp <- getNewRegNat rep
 458         return (tmp, code tmp)
 459     Fixed _ reg code ->
 460         return (reg, code)
 461
 462 -- -----------------------------------------------------------------------------
 463 -- Grab the Reg for a CmmReg
 464
 465 getRegisterReg :: CmmReg -> Reg
 466
 467 getRegisterReg (CmmLocal (LocalReg u pk))
 468   = mkVReg u pk
 469
 470 getRegisterReg (CmmGlobal mid)
 471   = case get_GlobalReg_reg_or_addr mid of
 472        Left (RealReg rrno) -> RealReg rrno
 473        _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
 474           -- By this stage, the only MagicIds remaining should be the
 475           -- ones which map to a real machine register on this
 476           -- platform.  Hence ...
 477
 478
 479 -- -----------------------------------------------------------------------------
 480 -- Generate code to get a subtree into a Register
 481
 482 -- Don't delete this -- it's very handy for debugging.
 483 --getRegister expr
 484 --   | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False
 485 --   = panic "getRegister(???)"
 486
 487 getRegister :: CmmExpr -> NatM Register
 488
 489 #if !x86_64_TARGET_ARCH
 490     -- on x86_64, we have %rip for PicBaseReg, but it's not a full-featured
 491     -- register, it can only be used for rip-relative addressing.
 492 getRegister (CmmReg (CmmGlobal PicBaseReg))
 493   = do
 494       reg <- getPicBaseNat wordRep
 495       return (Fixed wordRep reg nilOL)
 496 #endif
 497
 498 getRegister (CmmReg reg)
 499   = return (Fixed (cmmRegRep reg) (getRegisterReg reg) nilOL)
 500
 501 getRegister tree@(CmmRegOff _ _)
 502   = getRegister (mangleIndexTree tree)
 503
 504
 505 #if WORD_SIZE_IN_BITS==32
 506     -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
 507     -- TO_W_(x), TO_W_(x >> 32)
 508
 509 getRegister (CmmMachOp (MO_U_Conv I64 I32)
 510              [CmmMachOp (MO_U_Shr I64) [x,CmmLit (CmmInt 32 _)]]) = do
 511   ChildCode64 code rlo <- iselExpr64 x
 512   return $ Fixed I32 (getHiVRegFromLo rlo) code
 513
 514 getRegister (CmmMachOp (MO_S_Conv I64 I32)
 515              [CmmMachOp (MO_U_Shr I64) [x,CmmLit (CmmInt 32 _)]]) = do
 516   ChildCode64 code rlo <- iselExpr64 x
 517   return $ Fixed I32 (getHiVRegFromLo rlo) code
 518
 519 getRegister (CmmMachOp (MO_U_Conv I64 I32) [x]) = do
 520   ChildCode64 code rlo <- iselExpr64 x
 521   return $ Fixed I32 rlo code
 522
 523 getRegister (CmmMachOp (MO_S_Conv I64 I32) [x]) = do
 524   ChildCode64 code rlo <- iselExpr64 x
 525   return $ Fixed I32 rlo code
 526
 527 #endif
 528
 529 -- end of machine-"independent" bit; here we go on the rest...
 530
 531 #if alpha_TARGET_ARCH
 532
 533 getRegister (StDouble d)
 534   = getBlockIdNat                   `thenNat` \ lbl ->
 535     getNewRegNat PtrRep             `thenNat` \ tmp ->
 536     let code dst = mkSeqInstrs [
 537             LDATA RoDataSegment lbl [
 538                     DATA TF [ImmLab (rational d)]
 539                 ],
 540             LDA tmp (AddrImm (ImmCLbl lbl)),
 541             LD TF dst (AddrReg tmp)]
 542     in
 543         return (Any F64 code)
 544
 545 getRegister (StPrim primop [x]) -- unary PrimOps
 546   = case primop of
 547       IntNegOp -> trivialUCode (NEG Q False) x
 548
 549       NotOp    -> trivialUCode NOT x
 550
 551       FloatNegOp  -> trivialUFCode FloatRep  (FNEG TF) x
 552       DoubleNegOp -> trivialUFCode F64 (FNEG TF) x
 553
 554       OrdOp -> coerceIntCode IntRep x
 555       ChrOp -> chrCode x
 556
 557       Float2IntOp  -> coerceFP2Int    x
 558       Int2FloatOp  -> coerceInt2FP pr x
 559       Double2IntOp -> coerceFP2Int    x
 560       Int2DoubleOp -> coerceInt2FP pr x
 561
 562       Double2FloatOp -> coerceFltCode x
 563       Float2DoubleOp -> coerceFltCode x
 564
 565       other_op -> getRegister (StCall fn CCallConv F64 [x])
 566         where
 567           fn = case other_op of
 568                  FloatExpOp    -> FSLIT("exp")
 569                  FloatLogOp    -> FSLIT("log")
 570                  FloatSqrtOp   -> FSLIT("sqrt")
 571                  FloatSinOp    -> FSLIT("sin")
 572                  FloatCosOp    -> FSLIT("cos")
 573                  FloatTanOp    -> FSLIT("tan")
 574                  FloatAsinOp   -> FSLIT("asin")
 575                  FloatAcosOp   -> FSLIT("acos")
 576                  FloatAtanOp   -> FSLIT("atan")
 577                  FloatSinhOp   -> FSLIT("sinh")
 578                  FloatCoshOp   -> FSLIT("cosh")
 579                  FloatTanhOp   -> FSLIT("tanh")
 580                  DoubleExpOp   -> FSLIT("exp")
 581                  DoubleLogOp   -> FSLIT("log")
 582                  DoubleSqrtOp  -> FSLIT("sqrt")
 583                  DoubleSinOp   -> FSLIT("sin")
 584                  DoubleCosOp   -> FSLIT("cos")
 585                  DoubleTanOp   -> FSLIT("tan")
 586                  DoubleAsinOp  -> FSLIT("asin")
 587                  DoubleAcosOp  -> FSLIT("acos")
 588                  DoubleAtanOp  -> FSLIT("atan")
 589                  DoubleSinhOp  -> FSLIT("sinh")
 590                  DoubleCoshOp  -> FSLIT("cosh")
 591                  DoubleTanhOp  -> FSLIT("tanh")
 592   where
 593     pr = panic "MachCode.getRegister: no primrep needed for Alpha"
 594
 595 getRegister (StPrim primop [x, y]) -- dyadic PrimOps
 596   = case primop of
 597       CharGtOp -> trivialCode (CMP LTT) y x
 598       CharGeOp -> trivialCode (CMP LE) y x
 599       CharEqOp -> trivialCode (CMP EQQ) x y
 600       CharNeOp -> int_NE_code x y
 601       CharLtOp -> trivialCode (CMP LTT) x y
 602       CharLeOp -> trivialCode (CMP LE) x y
 603
 604       IntGtOp  -> trivialCode (CMP LTT) y x
 605       IntGeOp  -> trivialCode (CMP LE) y x
 606       IntEqOp  -> trivialCode (CMP EQQ) x y
 607       IntNeOp  -> int_NE_code x y
 608       IntLtOp  -> trivialCode (CMP LTT) x y
 609       IntLeOp  -> trivialCode (CMP LE) x y
 610
 611       WordGtOp -> trivialCode (CMP ULT) y x
 612       WordGeOp -> trivialCode (CMP ULE) x y
 613       WordEqOp -> trivialCode (CMP EQQ)  x y
 614       WordNeOp -> int_NE_code x y
 615       WordLtOp -> trivialCode (CMP ULT) x y
 616       WordLeOp -> trivialCode (CMP ULE) x y
 617
 618       AddrGtOp -> trivialCode (CMP ULT) y x
 619       AddrGeOp -> trivialCode (CMP ULE) y x
 620       AddrEqOp -> trivialCode (CMP EQQ)  x y
 621       AddrNeOp -> int_NE_code x y
 622       AddrLtOp -> trivialCode (CMP ULT) x y
 623       AddrLeOp -> trivialCode (CMP ULE) x y
 624
 625       FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
 626       FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
 627       FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
 628       FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
 629       FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
 630       FloatLeOp -> cmpF_code (FCMP TF LE) NE x y
 631
 632       DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
 633       DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
 634       DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
 635       DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
 636       DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
 637       DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y
 638
 639       IntAddOp  -> trivialCode (ADD Q False) x y
 640       IntSubOp  -> trivialCode (SUB Q False) x y
 641       IntMulOp  -> trivialCode (MUL Q False) x y
 642       IntQuotOp -> trivialCode (DIV Q False) x y
 643       IntRemOp  -> trivialCode (REM Q False) x y
 644
 645       WordAddOp  -> trivialCode (ADD Q False) x y
 646       WordSubOp  -> trivialCode (SUB Q False) x y
 647       WordMulOp  -> trivialCode (MUL Q False) x y
 648       WordQuotOp -> trivialCode (DIV Q True) x y
 649       WordRemOp  -> trivialCode (REM Q True) x y
 650
 651       FloatAddOp -> trivialFCode  FloatRep (FADD TF) x y
 652       FloatSubOp -> trivialFCode  FloatRep (FSUB TF) x y
 653       FloatMulOp -> trivialFCode  FloatRep (FMUL TF) x y
 654       FloatDivOp -> trivialFCode  FloatRep (FDIV TF) x y
 655
 656       DoubleAddOp -> trivialFCode  F64 (FADD TF) x y
 657       DoubleSubOp -> trivialFCode  F64 (FSUB TF) x y
 658       DoubleMulOp -> trivialFCode  F64 (FMUL TF) x y
 659       DoubleDivOp -> trivialFCode  F64 (FDIV TF) x y
 660
 661       AddrAddOp  -> trivialCode (ADD Q False) x y
 662       AddrSubOp  -> trivialCode (SUB Q False) x y
 663       AddrRemOp  -> trivialCode (REM Q True) x y
 664
 665       AndOp  -> trivialCode AND x y
 666       OrOp   -> trivialCode OR  x y
 667       XorOp  -> trivialCode XOR x y
 668       SllOp  -> trivialCode SLL x y
 669       SrlOp  -> trivialCode SRL x y
 670
 671       ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
 672       ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
 673       ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"
 674
 675       FloatPowerOp  -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
 676       DoublePowerOp -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
 677   where
 678     {- ------------------------------------------------------------
 679         Some bizarre special code for getting condition codes into
 680         registers.  Integer non-equality is a test for equality
 681         followed by an XOR with 1.  (Integer comparisons always set
 682         the result register to 0 or 1.)  Floating point comparisons of
 683         any kind leave the result in a floating point register, so we
 684         need to wrangle an integer register out of things.
 685     -}
 686     int_NE_code :: StixTree -> StixTree -> NatM Register
 687
 688     int_NE_code x y
 689       = trivialCode (CMP EQQ) x y       `thenNat` \ register ->
 690         getNewRegNat IntRep             `thenNat` \ tmp ->
 691         let
 692             code = registerCode register tmp
 693             src  = registerName register tmp
 694             code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
 695         in
 696         return (Any IntRep code__2)
 697
 698     {- ------------------------------------------------------------
 699         Comments for int_NE_code also apply to cmpF_code
 700     -}
 701     cmpF_code
 702         :: (Reg -> Reg -> Reg -> Instr)
 703         -> Cond
 704         -> StixTree -> StixTree
 705         -> NatM Register
 706
 707     cmpF_code instr cond x y
 708       = trivialFCode pr instr x y       `thenNat` \ register ->
 709         getNewRegNat F64                `thenNat` \ tmp ->
 710         getBlockIdNat                   `thenNat` \ lbl ->
 711         let
 712             code = registerCode register tmp
 713             result  = registerName register tmp
 714
 715             code__2 dst = code . mkSeqInstrs [
 716                 OR zeroh (RIImm (ImmInt 1)) dst,
 717                 BF cond  result (ImmCLbl lbl),
 718                 OR zeroh (RIReg zeroh) dst,
 719                 NEWBLOCK lbl]
 720         in
 721         return (Any IntRep code__2)
 722       where
 723         pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
 724       ------------------------------------------------------------
 725
 726 getRegister (CmmLoad pk mem)
 727   = getAmode mem                    `thenNat` \ amode ->
 728     let
 729         code = amodeCode amode
 730         src   = amodeAddr amode
 731         size = primRepToSize pk
 732         code__2 dst = code . mkSeqInstr (LD size dst src)
 733     in
 734     return (Any pk code__2)
 735
 736 getRegister (StInt i)
 737   | fits8Bits i
 738   = let
 739         code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
 740     in
 741     return (Any IntRep code)
 742   | otherwise
 743   = let
 744         code dst = mkSeqInstr (LDI Q dst src)
 745     in
 746     return (Any IntRep code)
 747   where
 748     src = ImmInt (fromInteger i)
 749
 750 getRegister leaf
 751   | isJust imm
 752   = let
 753         code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
 754     in
 755     return (Any PtrRep code)
 756   where
 757     imm = maybeImm leaf
 758     imm__2 = case imm of Just x -> x
 759
 760 #endif /* alpha_TARGET_ARCH */
 761
 762 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 763
 764 #if i386_TARGET_ARCH
 765
 766 getRegister (CmmLit (CmmFloat f F32)) = do
 767     lbl <- getNewLabelNat
 768     dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
 769     Amode addr addr_code <- getAmode dynRef
 770     let code dst =
 771             LDATA ReadOnlyData
 772                         [CmmDataLabel lbl,
 773                          CmmStaticLit (CmmFloat f F32)]
 774             `consOL` (addr_code `snocOL`
 775             GLD F32 addr dst)
 776     -- in
 777     return (Any F32 code)
 778
 779
 780 getRegister (CmmLit (CmmFloat d F64))
 781   | d == 0.0
 782   = let code dst = unitOL (GLDZ dst)
 783     in  return (Any F64 code)
 784
 785   | d == 1.0
 786   = let code dst = unitOL (GLD1 dst)
 787     in  return (Any F64 code)
 788
 789   | otherwise = do
 790     lbl <- getNewLabelNat
 791     dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
 792     Amode addr addr_code <- getAmode dynRef
 793     let code dst =
 794             LDATA ReadOnlyData
 795                         [CmmDataLabel lbl,
 796                          CmmStaticLit (CmmFloat d F64)]
 797             `consOL` (addr_code `snocOL`
 798             GLD F64 addr dst)
 799     -- in
 800     return (Any F64 code)
 801
 802 #endif /* i386_TARGET_ARCH */
 803
 804 #if x86_64_TARGET_ARCH
 805
 806 getRegister (CmmLit (CmmFloat 0.0 rep)) = do
 807    let code dst = unitOL  (XOR rep (OpReg dst) (OpReg dst))
 808         -- I don't know why there are xorpd, xorps, and pxor instructions.
 809         -- They all appear to do the same thing --SDM
 810    return (Any rep code)
 811
 812 getRegister (CmmLit (CmmFloat f rep)) = do
 813     lbl <- getNewLabelNat
 814     let code dst = toOL [
 815             LDATA ReadOnlyData
 816                         [CmmDataLabel lbl,
 817                          CmmStaticLit (CmmFloat f rep)],
 818             MOV rep (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 819             ]
 820     -- in
 821     return (Any rep code)
 822
 823 #endif /* x86_64_TARGET_ARCH */
 824
 825 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
 826
 827 -- catch simple cases of zero- or sign-extended load
 828 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad addr _]) = do
 829   code <- intLoadCode (MOVZxL I8) addr
 830   return (Any I32 code)
 831
 832 getRegister (CmmMachOp (MO_S_Conv I8 I32) [CmmLoad addr _]) = do
 833   code <- intLoadCode (MOVSxL I8) addr
 834   return (Any I32 code)
 835
 836 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad addr _]) = do
 837   code <- intLoadCode (MOVZxL I16) addr
 838   return (Any I32 code)
 839
 840 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad addr _]) = do
 841   code <- intLoadCode (MOVSxL I16) addr
 842   return (Any I32 code)
 843
 844 #endif
 845
 846 #if x86_64_TARGET_ARCH
 847
 848 -- catch simple cases of zero- or sign-extended load
 849 getRegister (CmmMachOp (MO_U_Conv I8 I64) [CmmLoad addr _]) = do
 850   code <- intLoadCode (MOVZxL I8) addr
 851   return (Any I64 code)
 852
 853 getRegister (CmmMachOp (MO_S_Conv I8 I64) [CmmLoad addr _]) = do
 854   code <- intLoadCode (MOVSxL I8) addr
 855   return (Any I64 code)
 856
 857 getRegister (CmmMachOp (MO_U_Conv I16 I64) [CmmLoad addr _]) = do
 858   code <- intLoadCode (MOVZxL I16) addr
 859   return (Any I64 code)
 860
 861 getRegister (CmmMachOp (MO_S_Conv I16 I64) [CmmLoad addr _]) = do
 862   code <- intLoadCode (MOVSxL I16) addr
 863   return (Any I64 code)
 864
 865 getRegister (CmmMachOp (MO_U_Conv I32 I64) [CmmLoad addr _]) = do
 866   code <- intLoadCode (MOV I32) addr -- 32-bit loads zero-extend
 867   return (Any I64 code)
 868
 869 getRegister (CmmMachOp (MO_S_Conv I32 I64) [CmmLoad addr _]) = do
 870   code <- intLoadCode (MOVSxL I32) addr
 871   return (Any I64 code)
 872
 873 #endif
 874
 875 #if x86_64_TARGET_ARCH
 876 getRegister (CmmMachOp (MO_Add I64) [CmmReg (CmmGlobal PicBaseReg),
 877                                      CmmLit displacement])
 878     = return $ Any I64 (\dst -> unitOL $
 879         LEA I64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
 880 #endif
 881
 882 #if x86_64_TARGET_ARCH
 883 getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do
 884   x_code <- getAnyReg x
 885   lbl <- getNewLabelNat
 886   let
 887     code dst = x_code dst `appOL` toOL [
 888         -- This is how gcc does it, so it can't be that bad:
 889         LDATA ReadOnlyData16 [
 890                 CmmAlign 16,
 891                 CmmDataLabel lbl,
 892                 CmmStaticLit (CmmInt 0x80000000 I32),
 893                 CmmStaticLit (CmmInt 0 I32),
 894                 CmmStaticLit (CmmInt 0 I32),
 895                 CmmStaticLit (CmmInt 0 I32)
 896         ],
 897         XOR F32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 898                 -- xorps, so we need the 128-bit constant
 899                 -- ToDo: rip-relative
 900         ]
 901   --
 902   return (Any F32 code)
 903
 904 getRegister (CmmMachOp (MO_S_Neg F64) [x]) = do
 905   x_code <- getAnyReg x
 906   lbl <- getNewLabelNat
 907   let
 908         -- This is how gcc does it, so it can't be that bad:
 909     code dst = x_code dst `appOL` toOL [
 910         LDATA ReadOnlyData16 [
 911                 CmmAlign 16,
 912                 CmmDataLabel lbl,
 913                 CmmStaticLit (CmmInt 0x8000000000000000 I64),
 914                 CmmStaticLit (CmmInt 0 I64)
 915         ],
 916                 -- gcc puts an unpck here.  Wonder if we need it.
 917         XOR F64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 918                 -- xorpd, so we need the 128-bit constant
 919         ]
 920   --
 921   return (Any F64 code)
 922 #endif
 923
 924 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
 925
 926 getRegister (CmmMachOp mop [x]) -- unary MachOps
 927   = case mop of
 928 #if i386_TARGET_ARCH
 929       MO_S_Neg F32 -> trivialUFCode F32 (GNEG F32) x
 930       MO_S_Neg F64 -> trivialUFCode F64 (GNEG F64) x
 931 #endif
 932
 933       MO_S_Neg rep -> trivialUCode rep (NEGI rep) x
 934       MO_Not rep   -> trivialUCode rep (NOT  rep) x
 935
 936       -- Nop conversions
 937       MO_U_Conv I32 I8  -> toI8Reg  I32 x
 938       MO_S_Conv I32 I8  -> toI8Reg  I32 x
 939       MO_U_Conv I16 I8  -> toI8Reg  I16 x
 940       MO_S_Conv I16 I8  -> toI8Reg  I16 x
 941       MO_U_Conv I32 I16 -> toI16Reg I32 x
 942       MO_S_Conv I32 I16 -> toI16Reg I32 x
 943 #if x86_64_TARGET_ARCH
 944       MO_U_Conv I64 I32 -> conversionNop I64 x
 945       MO_S_Conv I64 I32 -> conversionNop I64 x
 946       MO_U_Conv I64 I16 -> toI16Reg I64 x
 947       MO_S_Conv I64 I16 -> toI16Reg I64 x
 948       MO_U_Conv I64 I8  -> toI8Reg  I64 x
 949       MO_S_Conv I64 I8  -> toI8Reg  I64 x
 950 #endif
 951
 952       MO_U_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
 953       MO_S_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
 954
 955       -- widenings
 956       MO_U_Conv I8  I32 -> integerExtend I8  I32 MOVZxL x
 957       MO_U_Conv I16 I32 -> integerExtend I16 I32 MOVZxL x
 958       MO_U_Conv I8  I16 -> integerExtend I8  I16 MOVZxL x
 959
 960       MO_S_Conv I8  I32 -> integerExtend I8  I32 MOVSxL x
 961       MO_S_Conv I16 I32 -> integerExtend I16 I32 MOVSxL x
 962       MO_S_Conv I8  I16 -> integerExtend I8  I16 MOVSxL x
 963
 964 #if x86_64_TARGET_ARCH
 965       MO_U_Conv I8  I64 -> integerExtend I8  I64 MOVZxL x
 966       MO_U_Conv I16 I64 -> integerExtend I16 I64 MOVZxL x
 967       MO_U_Conv I32 I64 -> integerExtend I32 I64 MOVZxL x
 968       MO_S_Conv I8  I64 -> integerExtend I8  I64 MOVSxL x
 969       MO_S_Conv I16 I64 -> integerExtend I16 I64 MOVSxL x
 970       MO_S_Conv I32 I64 -> integerExtend I32 I64 MOVSxL x
 971         -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
 972         -- However, we don't want the register allocator to throw it
 973         -- away as an unnecessary reg-to-reg move, so we keep it in
 974         -- the form of a movzl and print it as a movl later.
 975 #endif
 976
 977 #if i386_TARGET_ARCH
 978       MO_S_Conv F32 F64 -> conversionNop F64 x
 979       MO_S_Conv F64 F32 -> conversionNop F32 x
 980 #else
 981       MO_S_Conv F32 F64 -> coerceFP2FP F64 x
 982       MO_S_Conv F64 F32 -> coerceFP2FP F32 x
 983 #endif
 984
 985       MO_S_Conv from to
 986         | isFloatingRep from -> coerceFP2Int from to x
 987         | isFloatingRep to   -> coerceInt2FP from to x
 988
 989       other -> pprPanic "getRegister" (pprMachOp mop)
 990    where
 991         -- signed or unsigned extension.
 992         integerExtend from to instr expr = do
 993             (reg,e_code) <- if from == I8 then getByteReg expr
 994                                           else getSomeReg expr
 995             let
 996                 code dst =
 997                   e_code `snocOL`
 998                   instr from (OpReg reg) (OpReg dst)
 999             return (Any to code)
1000
1001         toI8Reg new_rep expr
1002             = do codefn <- getAnyReg expr
1003                  return (Any new_rep codefn)
1004                 -- HACK: use getAnyReg to get a byte-addressable register.
1005                 -- If the source was a Fixed register, this will add the
1006                 -- mov instruction to put it into the desired destination.
1007                 -- We're assuming that the destination won't be a fixed
1008                 -- non-byte-addressable register; it won't be, because all
1009                 -- fixed registers are word-sized.
1010
1011         toI16Reg = toI8Reg -- for now
1012
1013         conversionNop new_rep expr
1014             = do e_code <- getRegister expr
1015                  return (swizzleRegisterRep e_code new_rep)
1016
1017
1018 getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
1019   = ASSERT2(cmmExprRep x /= I8, pprExpr e)
1020     case mop of
1021       MO_Eq F32   -> condFltReg EQQ x y
1022       MO_Ne F32   -> condFltReg NE x y
1023       MO_S_Gt F32 -> condFltReg GTT x y
1024       MO_S_Ge F32 -> condFltReg GE x y
1025       MO_S_Lt F32 -> condFltReg LTT x y
1026       MO_S_Le F32 -> condFltReg LE x y
1027
1028       MO_Eq F64   -> condFltReg EQQ x y
1029       MO_Ne F64   -> condFltReg NE x y
1030       MO_S_Gt F64 -> condFltReg GTT x y
1031       MO_S_Ge F64 -> condFltReg GE x y
1032       MO_S_Lt F64 -> condFltReg LTT x y
1033       MO_S_Le F64 -> condFltReg LE x y
1034
1035       MO_Eq rep   -> condIntReg EQQ x y
1036       MO_Ne rep   -> condIntReg NE x y
1037
1038       MO_S_Gt rep -> condIntReg GTT x y
1039       MO_S_Ge rep -> condIntReg GE x y
1040       MO_S_Lt rep -> condIntReg LTT x y
1041       MO_S_Le rep -> condIntReg LE x y
1042
1043       MO_U_Gt rep -> condIntReg GU  x y
1044       MO_U_Ge rep -> condIntReg GEU x y
1045       MO_U_Lt rep -> condIntReg LU  x y
1046       MO_U_Le rep -> condIntReg LEU x y
1047
1048 #if i386_TARGET_ARCH
1049       MO_Add F32 -> trivialFCode F32 GADD x y
1050       MO_Sub F32 -> trivialFCode F32 GSUB x y
1051
1052       MO_Add F64 -> trivialFCode F64 GADD x y
1053       MO_Sub F64 -> trivialFCode F64 GSUB x y
1054
1055       MO_S_Quot F32 -> trivialFCode F32 GDIV x y
1056       MO_S_Quot F64 -> trivialFCode F64 GDIV x y
1057 #endif
1058
1059 #if x86_64_TARGET_ARCH
1060       MO_Add F32 -> trivialFCode F32 ADD x y
1061       MO_Sub F32 -> trivialFCode F32 SUB x y
1062
1063       MO_Add F64 -> trivialFCode F64 ADD x y
1064       MO_Sub F64 -> trivialFCode F64 SUB x y
1065
1066       MO_S_Quot F32 -> trivialFCode F32 FDIV x y
1067       MO_S_Quot F64 -> trivialFCode F64 FDIV x y
1068 #endif
1069
1070       MO_Add rep -> add_code rep x y
1071       MO_Sub rep -> sub_code rep x y
1072
1073       MO_S_Quot rep -> div_code rep True  True  x y
1074       MO_S_Rem  rep -> div_code rep True  False x y
1075       MO_U_Quot rep -> div_code rep False True  x y
1076       MO_U_Rem  rep -> div_code rep False False x y
1077
1078 #if i386_TARGET_ARCH
1079       MO_Mul   F32 -> trivialFCode F32 GMUL x y
1080       MO_Mul   F64 -> trivialFCode F64 GMUL x y
1081 #endif
1082
1083 #if x86_64_TARGET_ARCH
1084       MO_Mul   F32 -> trivialFCode F32 MUL x y
1085       MO_Mul   F64 -> trivialFCode F64 MUL x y
1086 #endif
1087
1088       MO_Mul   rep -> let op = IMUL rep in
1089                       trivialCode rep op (Just op) x y
1090
1091       MO_S_MulMayOflo rep -> imulMayOflo rep x y
1092
1093       MO_And rep -> let op = AND rep in
1094                     trivialCode rep op (Just op) x y
1095       MO_Or  rep -> let op = OR  rep in
1096                     trivialCode rep op (Just op) x y
1097       MO_Xor rep -> let op = XOR rep in
1098                     trivialCode rep op (Just op) x y
1099
1100         {- Shift ops on x86s have constraints on their source, it
1101            either has to be Imm, CL or 1
1102             => trivialCode is not restrictive enough (sigh.)
1103         -}
1104       MO_Shl rep   -> shift_code rep (SHL rep) x y {-False-}
1105       MO_U_Shr rep -> shift_code rep (SHR rep) x y {-False-}
1106       MO_S_Shr rep -> shift_code rep (SAR rep) x y {-False-}
1107
1108       other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
1109   where
1110     --------------------
1111     imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1112     imulMayOflo rep a b = do
1113          (a_reg, a_code) <- getNonClobberedReg a
1114          b_code <- getAnyReg b
1115          let
1116              shift_amt  = case rep of
1117                            I32 -> 31
1118                            I64 -> 63
1119                            _ -> panic "shift_amt"
1120
1121              code = a_code `appOL` b_code eax `appOL`
1122                         toOL [
1123                            IMUL2 rep (OpReg a_reg),   -- result in %edx:%eax
1124                            SAR rep (OpImm (ImmInt shift_amt)) (OpReg eax),
1125                                 -- sign extend lower part
1126                            SUB rep (OpReg edx) (OpReg eax)
1127                                 -- compare against upper
1128                            -- eax==0 if high part == sign extended low part
1129                         ]
1130          -- in
1131          return (Fixed rep eax code)
1132
1133     --------------------
1134     shift_code :: MachRep
1135                -> (Operand -> Operand -> Instr)
1136                -> CmmExpr
1137                -> CmmExpr
1138                -> NatM Register
1139
1140     {- Case1: shift length as immediate -}
1141     shift_code rep instr x y@(CmmLit lit) = do
1142           x_code <- getAnyReg x
1143           let
1144                code dst
1145                   = x_code dst `snocOL`
1146                     instr (OpImm (litToImm lit)) (OpReg dst)
1147           -- in
1148           return (Any rep code)
1149
1150     {- Case2: shift length is complex (non-immediate) -}
1151     shift_code rep instr x y{-amount-} = do
1152         (x_reg, x_code) <- getNonClobberedReg x
1153         y_code <- getAnyReg y
1154         let
1155            code = x_code `appOL`
1156                   y_code ecx `snocOL`
1157                   instr (OpReg ecx) (OpReg x_reg)
1158         -- in
1159         return (Fixed rep x_reg code)
1160
1161     --------------------
1162     add_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1163     add_code rep x (CmmLit (CmmInt y _))
1164         | not (is64BitInteger y) = add_int rep x y
1165     add_code rep x y = trivialCode rep (ADD rep) (Just (ADD rep)) x y
1166
1167     --------------------
1168     sub_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1169     sub_code rep x (CmmLit (CmmInt y _))
1170         | not (is64BitInteger (-y)) = add_int rep x (-y)
1171     sub_code rep x y = trivialCode rep (SUB rep) Nothing x y
1172
1173     -- our three-operand add instruction:
1174     add_int rep x y = do
1175         (x_reg, x_code) <- getSomeReg x
1176         let
1177             imm = ImmInt (fromInteger y)
1178             code dst
1179                = x_code `snocOL`
1180                  LEA rep
1181                         (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
1182                         (OpReg dst)
1183         --
1184         return (Any rep code)
1185
1186     ----------------------
1187     div_code rep signed quotient x y = do
1188            (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
1189            x_code <- getAnyReg x
1190            let
1191              widen | signed    = CLTD rep
1192                    | otherwise = XOR rep (OpReg edx) (OpReg edx)
1193
1194              instr | signed    = IDIV
1195                    | otherwise = DIV
1196
1197              code = y_code `appOL`
1198                     x_code eax `appOL`
1199                     toOL [widen, instr rep y_op]
1200
1201              result | quotient  = eax
1202                     | otherwise = edx
1203
1204            -- in
1205            return (Fixed rep result code)
1206
1207
1208 getRegister (CmmLoad mem pk)
1209   | isFloatingRep pk
1210   = do
1211     Amode src mem_code <- getAmode mem
1212     let
1213         code dst = mem_code `snocOL`
1214                    IF_ARCH_i386(GLD pk src dst,
1215                                 MOV pk (OpAddr src) (OpReg dst))
1216     --
1217     return (Any pk code)
1218
1219 #if i386_TARGET_ARCH
1220 getRegister (CmmLoad mem pk)
1221   | pk /= I64
1222   = do
1223     code <- intLoadCode (instr pk) mem
1224     return (Any pk code)
1225   where
1226         instr I8  = MOVZxL pk
1227         instr I16 = MOV I16
1228         instr I32 = MOV I32
1229         -- we always zero-extend 8-bit loads, if we
1230         -- can't think of anything better.  This is because
1231         -- we can't guarantee access to an 8-bit variant of every register
1232         -- (esi and edi don't have 8-bit variants), so to make things
1233         -- simpler we do our 8-bit arithmetic with full 32-bit registers.
1234 #endif
1235
1236 #if x86_64_TARGET_ARCH
1237 -- Simpler memory load code on x86_64
1238 getRegister (CmmLoad mem pk)
1239   = do
1240     code <- intLoadCode (MOV pk) mem
1241     return (Any pk code)
1242 #endif
1243
1244 getRegister (CmmLit (CmmInt 0 rep))
1245   = let
1246         -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
1247         adj_rep = case rep of I64 -> I32; _ -> rep
1248         rep1 = IF_ARCH_i386( rep, adj_rep )
1249         code dst
1250            = unitOL (XOR rep1 (OpReg dst) (OpReg dst))
1251     in
1252         return (Any rep code)
1253
1254 #if x86_64_TARGET_ARCH
1255   -- optimisation for loading small literals on x86_64: take advantage
1256   -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
1257   -- instruction forms are shorter.
1258 getRegister (CmmLit lit)
1259   | I64 <- cmmLitRep lit, not (isBigLit lit)
1260   = let
1261         imm = litToImm lit
1262         code dst = unitOL (MOV I32 (OpImm imm) (OpReg dst))
1263     in
1264         return (Any I64 code)
1265   where
1266    isBigLit (CmmInt i I64) = i < 0 || i > 0xffffffff
1267    isBigLit _ = False
1268         -- note1: not the same as is64BitLit, because that checks for
1269         -- signed literals that fit in 32 bits, but we want unsigned
1270         -- literals here.
1271         -- note2: all labels are small, because we're assuming the
1272         -- small memory model (see gcc docs, -mcmodel=small).
1273 #endif
1274
1275 getRegister (CmmLit lit)
1276   = let
1277         rep = cmmLitRep lit
1278         imm = litToImm lit
1279         code dst = unitOL (MOV rep (OpImm imm) (OpReg dst))
1280     in
1281         return (Any rep code)
1282
1283 getRegister other = pprPanic "getRegister(x86)" (ppr other)
1284
1285
1286 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
1287    -> NatM (Reg -> InstrBlock)
1288 intLoadCode instr mem = do
1289   Amode src mem_code <- getAmode mem
1290   return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
1291
1292 -- Compute an expression into *any* register, adding the appropriate
1293 -- move instruction if necessary.
1294 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
1295 getAnyReg expr = do
1296   r <- getRegister expr
1297   anyReg r
1298
1299 anyReg :: Register -> NatM (Reg -> InstrBlock)
1300 anyReg (Any _ code)          = return code
1301 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
1302
1303 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
1304 -- Fixed registers might not be byte-addressable, so we make sure we've
1305 -- got a temporary, inserting an extra reg copy if necessary.
1306 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
1307 #if x86_64_TARGET_ARCH
1308 getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
1309 #else
1310 getByteReg expr = do
1311   r <- getRegister expr
1312   case r of
1313     Any rep code -> do
1314         tmp <- getNewRegNat rep
1315         return (tmp, code tmp)
1316     Fixed rep reg code
1317         | isVirtualReg reg -> return (reg,code)
1318         | otherwise -> do
1319             tmp <- getNewRegNat rep
1320             return (tmp, code `snocOL` reg2reg rep reg tmp)
1321         -- ToDo: could optimise slightly by checking for byte-addressable
1322         -- real registers, but that will happen very rarely if at all.
1323 #endif
1324
1325 -- Another variant: this time we want the result in a register that cannot
1326 -- be modified by code to evaluate an arbitrary expression.
1327 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1328 getNonClobberedReg expr = do
1329   r <- getRegister expr
1330   case r of
1331     Any rep code -> do
1332         tmp <- getNewRegNat rep
1333         return (tmp, code tmp)
1334     Fixed rep reg code
1335         -- only free regs can be clobbered
1336         | RealReg rr <- reg, isFastTrue (freeReg rr) -> do
1337                 tmp <- getNewRegNat rep
1338                 return (tmp, code `snocOL` reg2reg rep reg tmp)
1339         | otherwise ->
1340                 return (reg, code)
1341
1342 reg2reg :: MachRep -> Reg -> Reg -> Instr
1343 reg2reg rep src dst
1344 #if i386_TARGET_ARCH
1345   | isFloatingRep rep = GMOV src dst
1346 #endif
1347   | otherwise         = MOV rep (OpReg src) (OpReg dst)
1348
1349 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1350
1351 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1352
1353 #if sparc_TARGET_ARCH
1354
1355 getRegister (CmmLit (CmmFloat f F32)) = do
1356     lbl <- getNewLabelNat
1357     let code dst = toOL [
1358             LDATA ReadOnlyData
1359                         [CmmDataLabel lbl,
1360                          CmmStaticLit (CmmFloat f F32)],
1361             SETHI (HI (ImmCLbl lbl)) dst,
1362             LD F32 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1363     return (Any F32 code)
1364
1365 getRegister (CmmLit (CmmFloat d F64)) = do
1366     lbl <- getNewLabelNat
1367     let code dst = toOL [
1368             LDATA ReadOnlyData
1369                         [CmmDataLabel lbl,
1370                          CmmStaticLit (CmmFloat d F64)],
1371             SETHI (HI (ImmCLbl lbl)) dst,
1372             LD F64 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1373     return (Any F64 code)
1374
1375 getRegister (CmmMachOp mop [x]) -- unary MachOps
1376   = case mop of
1377       MO_S_Neg F32     -> trivialUFCode F32 (FNEG F32) x
1378       MO_S_Neg F64     -> trivialUFCode F64 (FNEG F64) x
1379
1380       MO_S_Neg rep     -> trivialUCode rep (SUB False False g0) x
1381       MO_Not rep       -> trivialUCode rep (XNOR False g0) x
1382
1383       MO_U_Conv I32 I8 -> trivialCode I8 (AND False) x (CmmLit (CmmInt 255 I8))
1384
1385       MO_U_Conv F64 F32-> coerceDbl2Flt x
1386       MO_U_Conv F32 F64-> coerceFlt2Dbl x
1387
1388       MO_S_Conv F32 I32-> coerceFP2Int F32 I32 x
1389       MO_S_Conv I32 F32-> coerceInt2FP I32 F32 x
1390       MO_S_Conv F64 I32-> coerceFP2Int F64 I32 x
1391       MO_S_Conv I32 F64-> coerceInt2FP I32 F64 x
1392
1393       -- Conversions which are a nop on sparc
1394       MO_U_Conv from to
1395         | from == to   -> conversionNop to   x
1396       MO_U_Conv I32 to -> conversionNop to   x
1397       MO_S_Conv I32 to -> conversionNop to   x
1398
1399       -- widenings
1400       MO_U_Conv I8 I32  -> integerExtend False I8 I32  x
1401       MO_U_Conv I16 I32 -> integerExtend False I16 I32 x
1402       MO_U_Conv I8 I16  -> integerExtend False I8 I16  x
1403       MO_S_Conv I16 I32 -> integerExtend True I16 I32  x
1404
1405       other_op -> panic "Unknown unary mach op"
1406     where
1407         -- XXX SLL/SRL?
1408         integerExtend signed from to expr = do
1409            (reg, e_code) <- getSomeReg expr
1410            let
1411                code dst =
1412                    e_code `snocOL`
1413                    ((if signed then SRA else SRL)
1414                           reg (RIImm (ImmInt 0)) dst)
1415            return (Any to code)
1416         conversionNop new_rep expr
1417             = do e_code <- getRegister expr
1418                  return (swizzleRegisterRep e_code new_rep)
1419
1420 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1421   = case mop of
1422       MO_Eq F32 -> condFltReg EQQ x y
1423       MO_Ne F32 -> condFltReg NE x y
1424
1425       MO_S_Gt F32 -> condFltReg GTT x y
1426       MO_S_Ge F32 -> condFltReg GE x y
1427       MO_S_Lt F32 -> condFltReg LTT x y
1428       MO_S_Le F32 -> condFltReg LE x y
1429
1430       MO_Eq F64 -> condFltReg EQQ x y
1431       MO_Ne F64 -> condFltReg NE x y
1432
1433       MO_S_Gt F64 -> condFltReg GTT x y
1434       MO_S_Ge F64 -> condFltReg GE x y
1435       MO_S_Lt F64 -> condFltReg LTT x y
1436       MO_S_Le F64 -> condFltReg LE x y
1437
1438       MO_Eq rep -> condIntReg EQQ x y
1439       MO_Ne rep -> condIntReg NE x y
1440
1441       MO_S_Gt rep -> condIntReg GTT x y
1442       MO_S_Ge rep -> condIntReg GE x y
1443       MO_S_Lt rep -> condIntReg LTT x y
1444       MO_S_Le rep -> condIntReg LE x y
1445
1446       MO_U_Gt I32  -> condIntReg GTT x y
1447       MO_U_Ge I32  -> condIntReg GE x y
1448       MO_U_Lt I32  -> condIntReg LTT x y
1449       MO_U_Le I32  -> condIntReg LE x y
1450
1451       MO_U_Gt I16 -> condIntReg GU  x y
1452       MO_U_Ge I16 -> condIntReg GEU x y
1453       MO_U_Lt I16 -> condIntReg LU  x y
1454       MO_U_Le I16 -> condIntReg LEU x y
1455
1456       MO_Add I32 -> trivialCode I32 (ADD False False) x y
1457       MO_Sub I32 -> trivialCode I32 (SUB False False) x y
1458
1459       MO_S_MulMayOflo rep -> imulMayOflo rep x y
1460 {-
1461       -- ToDo: teach about V8+ SPARC div instructions
1462       MO_S_Quot I32 -> idiv FSLIT(".div")  x y
1463       MO_S_Rem I32  -> idiv FSLIT(".rem")  x y
1464       MO_U_Quot I32 -> idiv FSLIT(".udiv")  x y
1465       MO_U_Rem I32  -> idiv FSLIT(".urem")  x y
1466 -}
1467       MO_Add F32  -> trivialFCode F32 FADD  x y
1468       MO_Sub F32   -> trivialFCode F32  FSUB x y
1469       MO_Mul F32   -> trivialFCode F32  FMUL  x y
1470       MO_S_Quot F32   -> trivialFCode F32  FDIV x y
1471
1472       MO_Add F64   -> trivialFCode F64 FADD  x y
1473       MO_Sub F64   -> trivialFCode F64  FSUB x y
1474       MO_Mul F64   -> trivialFCode F64  FMUL x y
1475       MO_S_Quot F64   -> trivialFCode F64  FDIV x y
1476
1477       MO_And rep   -> trivialCode rep (AND False) x y
1478       MO_Or rep    -> trivialCode rep (OR  False) x y
1479       MO_Xor rep   -> trivialCode rep (XOR False) x y
1480
1481       MO_Mul rep -> trivialCode rep (SMUL False) x y
1482
1483       MO_Shl rep   -> trivialCode rep SLL  x y
1484       MO_U_Shr rep   -> trivialCode rep SRL x y
1485       MO_S_Shr rep   -> trivialCode rep SRA x y
1486
1487 {-
1488       MO_F32_Pwr  -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1489                                          [promote x, promote y])
1490                        where promote x = CmmMachOp MO_F32_to_Dbl [x]
1491       MO_F64_Pwr -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1492                                         [x, y])
1493 -}
1494       other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
1495   where
1496     --idiv fn x y = getRegister (StCall (Left fn) CCallConv I32 [x, y])
1497
1498     --------------------
1499     imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1500     imulMayOflo rep a b = do
1501          (a_reg, a_code) <- getSomeReg a
1502          (b_reg, b_code) <- getSomeReg b
1503          res_lo <- getNewRegNat I32
1504          res_hi <- getNewRegNat I32
1505          let
1506             shift_amt  = case rep of
1507                           I32 -> 31
1508                           I64 -> 63
1509                           _ -> panic "shift_amt"
1510             code dst = a_code `appOL` b_code `appOL`
1511                        toOL [
1512                            SMUL False a_reg (RIReg b_reg) res_lo,
1513                            RDY res_hi,
1514                            SRA res_lo (RIImm (ImmInt shift_amt)) res_lo,
1515                            SUB False False res_lo (RIReg res_hi) dst
1516                         ]
1517          return (Any I32 code)
1518
1519 getRegister (CmmLoad mem pk) = do
1520     Amode src code <- getAmode mem
1521     let
1522         code__2 dst = code `snocOL` LD pk src dst
1523     return (Any pk code__2)
1524
1525 getRegister (CmmLit (CmmInt i _))
1526   | fits13Bits i
1527   = let
1528         src = ImmInt (fromInteger i)
1529         code dst = unitOL (OR False g0 (RIImm src) dst)
1530     in
1531         return (Any I32 code)
1532
1533 getRegister (CmmLit lit)
1534   = let rep = cmmLitRep lit
1535         imm = litToImm lit
1536         code dst = toOL [
1537             SETHI (HI imm) dst,
1538             OR False dst (RIImm (LO imm)) dst]
1539     in return (Any I32 code)
1540
1541 #endif /* sparc_TARGET_ARCH */
1542
1543 #if powerpc_TARGET_ARCH
1544 getRegister (CmmLoad mem pk)
1545   | pk /= I64
1546   = do
1547         Amode addr addr_code <- getAmode mem
1548         let code dst = ASSERT((regClass dst == RcDouble) == isFloatingRep pk)
1549                        addr_code `snocOL` LD pk dst addr
1550         return (Any pk code)
1551
1552 -- catch simple cases of zero- or sign-extended load
1553 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad mem _]) = do
1554     Amode addr addr_code <- getAmode mem
1555     return (Any I32 (\dst -> addr_code `snocOL` LD I8 dst addr))
1556
1557 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
1558
1559 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad mem _]) = do
1560     Amode addr addr_code <- getAmode mem
1561     return (Any I32 (\dst -> addr_code `snocOL` LD I16 dst addr))
1562
1563 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad mem _]) = do
1564     Amode addr addr_code <- getAmode mem
1565     return (Any I32 (\dst -> addr_code `snocOL` LA I16 dst addr))
1566
1567 getRegister (CmmMachOp mop [x]) -- unary MachOps
1568   = case mop of
1569       MO_Not rep   -> trivialUCode rep NOT x
1570
1571       MO_S_Conv F64 F32 -> trivialUCode F32 FRSP x
1572       MO_S_Conv F32 F64 -> conversionNop F64 x
1573
1574       MO_S_Conv from to
1575         | from == to         -> conversionNop to x
1576         | isFloatingRep from -> coerceFP2Int from to x
1577         | isFloatingRep to   -> coerceInt2FP from to x
1578
1579         -- narrowing is a nop: we treat the high bits as undefined
1580       MO_S_Conv I32 to -> conversionNop to x
1581       MO_S_Conv I16 I8 -> conversionNop I8 x
1582       MO_S_Conv I8 to -> trivialUCode to (EXTS I8) x
1583       MO_S_Conv I16 to -> trivialUCode to (EXTS I16) x
1584
1585       MO_U_Conv from to
1586         | from == to -> conversionNop to x
1587         -- narrowing is a nop: we treat the high bits as undefined
1588       MO_U_Conv I32 to -> conversionNop to x
1589       MO_U_Conv I16 I8 -> conversionNop I8 x
1590       MO_U_Conv I8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 I32))
1591       MO_U_Conv I16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 I32))
1592
1593       MO_S_Neg F32      -> trivialUCode F32 FNEG x
1594       MO_S_Neg F64      -> trivialUCode F64 FNEG x
1595       MO_S_Neg rep      -> trivialUCode rep NEG x
1596
1597     where
1598         conversionNop new_rep expr
1599             = do e_code <- getRegister expr
1600                  return (swizzleRegisterRep e_code new_rep)
1601
1602 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1603   = case mop of
1604       MO_Eq F32 -> condFltReg EQQ x y
1605       MO_Ne F32 -> condFltReg NE  x y
1606
1607       MO_S_Gt F32 -> condFltReg GTT x y
1608       MO_S_Ge F32 -> condFltReg GE  x y
1609       MO_S_Lt F32 -> condFltReg LTT x y
1610       MO_S_Le F32 -> condFltReg LE  x y
1611
1612       MO_Eq F64 -> condFltReg EQQ x y
1613       MO_Ne F64 -> condFltReg NE  x y
1614
1615       MO_S_Gt F64 -> condFltReg GTT x y
1616       MO_S_Ge F64 -> condFltReg GE  x y
1617       MO_S_Lt F64 -> condFltReg LTT x y
1618       MO_S_Le F64 -> condFltReg LE  x y
1619
1620       MO_Eq rep -> condIntReg EQQ  (extendUExpr rep x) (extendUExpr rep y)
1621       MO_Ne rep -> condIntReg NE   (extendUExpr rep x) (extendUExpr rep y)
1622
1623       MO_S_Gt rep -> condIntReg GTT  (extendSExpr rep x) (extendSExpr rep y)
1624       MO_S_Ge rep -> condIntReg GE   (extendSExpr rep x) (extendSExpr rep y)
1625       MO_S_Lt rep -> condIntReg LTT  (extendSExpr rep x) (extendSExpr rep y)
1626       MO_S_Le rep -> condIntReg LE   (extendSExpr rep x) (extendSExpr rep y)
1627
1628       MO_U_Gt rep -> condIntReg GU   (extendUExpr rep x) (extendUExpr rep y)
1629       MO_U_Ge rep -> condIntReg GEU  (extendUExpr rep x) (extendUExpr rep y)
1630       MO_U_Lt rep -> condIntReg LU   (extendUExpr rep x) (extendUExpr rep y)
1631       MO_U_Le rep -> condIntReg LEU  (extendUExpr rep x) (extendUExpr rep y)
1632
1633       MO_Add F32   -> trivialCodeNoImm F32 (FADD F32) x y
1634       MO_Sub F32   -> trivialCodeNoImm F32 (FSUB F32) x y
1635       MO_Mul F32   -> trivialCodeNoImm F32 (FMUL F32) x y
1636       MO_S_Quot F32   -> trivialCodeNoImm F32 (FDIV F32) x y
1637
1638       MO_Add F64   -> trivialCodeNoImm F64 (FADD F64) x y
1639       MO_Sub F64   -> trivialCodeNoImm F64 (FSUB F64) x y
1640       MO_Mul F64   -> trivialCodeNoImm F64 (FMUL F64) x y
1641       MO_S_Quot F64   -> trivialCodeNoImm F64 (FDIV F64) x y
1642
1643          -- optimize addition with 32-bit immediate
1644          -- (needed for PIC)
1645       MO_Add I32 ->
1646         case y of
1647           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate I32 True (-imm)
1648             -> trivialCode I32 True ADD x (CmmLit $ CmmInt imm immrep)
1649           CmmLit lit
1650             -> do
1651                 (src, srcCode) <- getSomeReg x
1652                 let imm = litToImm lit
1653                     code dst = srcCode `appOL` toOL [
1654                                     ADDIS dst src (HA imm),
1655                                     ADD dst dst (RIImm (LO imm))
1656                                 ]
1657                 return (Any I32 code)
1658           _ -> trivialCode I32 True ADD x y
1659
1660       MO_Add rep -> trivialCode rep True ADD x y
1661       MO_Sub rep ->
1662         case y of    -- subfi ('substract from' with immediate) doesn't exist
1663           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
1664             -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
1665           _ -> trivialCodeNoImm rep SUBF y x
1666
1667       MO_Mul rep -> trivialCode rep True MULLW x y
1668
1669       MO_S_MulMayOflo I32 -> trivialCodeNoImm I32 MULLW_MayOflo x y
1670
1671       MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= I32): not implemented"
1672       MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
1673
1674       MO_S_Quot rep -> trivialCodeNoImm rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1675       MO_U_Quot rep -> trivialCodeNoImm rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1676
1677       MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1678       MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1679
1680       MO_And rep   -> trivialCode rep False AND x y
1681       MO_Or rep    -> trivialCode rep False OR x y
1682       MO_Xor rep   -> trivialCode rep False XOR x y
1683
1684       MO_Shl rep   -> trivialCode rep False SLW x y
1685       MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
1686       MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
1687
1688 getRegister (CmmLit (CmmInt i rep))
1689   | Just imm <- makeImmediate rep True i
1690   = let
1691         code dst = unitOL (LI dst imm)
1692     in
1693         return (Any rep code)
1694
1695 getRegister (CmmLit (CmmFloat f frep)) = do
1696     lbl <- getNewLabelNat
1697     dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
1698     Amode addr addr_code <- getAmode dynRef
1699     let code dst =
1700             LDATA ReadOnlyData  [CmmDataLabel lbl,
1701                                  CmmStaticLit (CmmFloat f frep)]
1702             `consOL` (addr_code `snocOL` LD frep dst addr)
1703     return (Any frep code)
1704
1705 getRegister (CmmLit lit)
1706   = let rep = cmmLitRep lit
1707         imm = litToImm lit
1708         code dst = toOL [
1709               LIS dst (HI imm),
1710               OR dst dst (RIImm (LO imm))
1711           ]
1712     in return (Any rep code)
1713
1714 getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
1715
1716     -- extend?Rep: wrap integer expression of type rep
1717     -- in a conversion to I32
1718 extendSExpr I32 x = x
1719 extendSExpr rep x = CmmMachOp (MO_S_Conv rep I32) [x]
1720 extendUExpr I32 x = x
1721 extendUExpr rep x = CmmMachOp (MO_U_Conv rep I32) [x]
1722
1723 #endif /* powerpc_TARGET_ARCH */
1724
1725
1726 -- -----------------------------------------------------------------------------
1727 --  The 'Amode' type: Memory addressing modes passed up the tree.
1728
1729 data Amode = Amode AddrMode InstrBlock
1730
1731 {-
1732 Now, given a tree (the argument to an CmmLoad) that references memory,
1733 produce a suitable addressing mode.
1734
1735 A Rule of the Game (tm) for Amodes: use of the addr bit must
1736 immediately follow use of the code part, since the code part puts
1737 values in registers which the addr then refers to.  So you can't put
1738 anything in between, lest it overwrite some of those registers.  If
1739 you need to do some other computation between the code part and use of
1740 the addr bit, first store the effective address from the amode in a
1741 temporary, then do the other computation, and then use the temporary:
1742
1743     code
1744     LEA amode, tmp
1745     ... other computation ...
1746     ... (tmp) ...
1747 -}
1748
1749 getAmode :: CmmExpr -> NatM Amode
1750 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
1751
1752 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1753
1754 #if alpha_TARGET_ARCH
1755
1756 getAmode (StPrim IntSubOp [x, StInt i])
1757   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1758     getRegister x               `thenNat` \ register ->
1759     let
1760         code = registerCode register tmp
1761         reg  = registerName register tmp
1762         off  = ImmInt (-(fromInteger i))
1763     in
1764     return (Amode (AddrRegImm reg off) code)
1765
1766 getAmode (StPrim IntAddOp [x, StInt i])
1767   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1768     getRegister x               `thenNat` \ register ->
1769     let
1770         code = registerCode register tmp
1771         reg  = registerName register tmp
1772         off  = ImmInt (fromInteger i)
1773     in
1774     return (Amode (AddrRegImm reg off) code)
1775
1776 getAmode leaf
1777   | isJust imm
1778   = return (Amode (AddrImm imm__2) id)
1779   where
1780     imm = maybeImm leaf
1781     imm__2 = case imm of Just x -> x
1782
1783 getAmode other
1784   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1785     getRegister other           `thenNat` \ register ->
1786     let
1787         code = registerCode register tmp
1788         reg  = registerName register tmp
1789     in
1790     return (Amode (AddrReg reg) code)
1791
1792 #endif /* alpha_TARGET_ARCH */
1793
1794 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1795
1796 #if x86_64_TARGET_ARCH
1797
1798 getAmode (CmmMachOp (MO_Add I64) [CmmReg (CmmGlobal PicBaseReg),
1799                                      CmmLit displacement])
1800     = return $ Amode (ripRel (litToImm displacement)) nilOL
1801
1802 #endif
1803
1804 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1805
1806 -- This is all just ridiculous, since it carefully undoes
1807 -- what mangleIndexTree has just done.
1808 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
1809   | not (is64BitLit lit)
1810   -- ASSERT(rep == I32)???
1811   = do (x_reg, x_code) <- getSomeReg x
1812        let off = ImmInt (-(fromInteger i))
1813        return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1814
1815 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
1816   | not (is64BitLit lit)
1817   -- ASSERT(rep == I32)???
1818   = do (x_reg, x_code) <- getSomeReg x
1819        let off = ImmInt (fromInteger i)
1820        return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1821
1822 -- Turn (lit1 << n  + lit2) into  (lit2 + lit1 << n) so it will be
1823 -- recognised by the next rule.
1824 getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1825                                   b@(CmmLit _)])
1826   = getAmode (CmmMachOp (MO_Add rep) [b,a])
1827
1828 getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
1829                                         [y, CmmLit (CmmInt shift _)]])
1830   | shift == 0 || shift == 1 || shift == 2 || shift == 3
1831   = do (x_reg, x_code) <- getNonClobberedReg x
1832         -- x must be in a temp, because it has to stay live over y_code
1833         -- we could compre x_reg and y_reg and do something better here...
1834        (y_reg, y_code) <- getSomeReg y
1835        let
1836            code = x_code `appOL` y_code
1837            base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
1838        return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt 0))
1839                code)
1840
1841 getAmode (CmmLit lit) | not (is64BitLit lit)
1842   = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1843
1844 getAmode expr = do
1845   (reg,code) <- getSomeReg expr
1846   return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
1847
1848 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1849
1850 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1851
1852 #if sparc_TARGET_ARCH
1853
1854 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)])
1855   | fits13Bits (-i)
1856   = do
1857        (reg, code) <- getSomeReg x
1858        let
1859          off  = ImmInt (-(fromInteger i))
1860        return (Amode (AddrRegImm reg off) code)
1861
1862
1863 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)])
1864   | fits13Bits i
1865   = do
1866        (reg, code) <- getSomeReg x
1867        let
1868          off  = ImmInt (fromInteger i)
1869        return (Amode (AddrRegImm reg off) code)
1870
1871 getAmode (CmmMachOp (MO_Add rep) [x, y])
1872   = do
1873     (regX, codeX) <- getSomeReg x
1874     (regY, codeY) <- getSomeReg y
1875     let
1876         code = codeX `appOL` codeY
1877     return (Amode (AddrRegReg regX regY) code)
1878
1879 -- XXX Is this same as "leaf" in Stix?
1880 getAmode (CmmLit lit)
1881   = do
1882       tmp <- getNewRegNat I32
1883       let
1884         code = unitOL (SETHI (HI imm__2) tmp)
1885       return (Amode (AddrRegImm tmp (LO imm__2)) code)
1886       where
1887          imm__2 = litToImm lit
1888
1889 getAmode other
1890   = do
1891        (reg, code) <- getSomeReg other
1892        let
1893             off  = ImmInt 0
1894        return (Amode (AddrRegImm reg off) code)
1895
1896 #endif /* sparc_TARGET_ARCH */
1897
1898 #ifdef powerpc_TARGET_ARCH
1899 getAmode (CmmMachOp (MO_Sub I32) [x, CmmLit (CmmInt i _)])
1900   | Just off <- makeImmediate I32 True (-i)
1901   = do
1902         (reg, code) <- getSomeReg x
1903         return (Amode (AddrRegImm reg off) code)
1904
1905
1906 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit (CmmInt i _)])
1907   | Just off <- makeImmediate I32 True i
1908   = do
1909         (reg, code) <- getSomeReg x
1910         return (Amode (AddrRegImm reg off) code)
1911
1912    -- optimize addition with 32-bit immediate
1913    -- (needed for PIC)
1914 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit lit])
1915   = do
1916         tmp <- getNewRegNat I32
1917         (src, srcCode) <- getSomeReg x
1918         let imm = litToImm lit
1919             code = srcCode `snocOL` ADDIS tmp src (HA imm)
1920         return (Amode (AddrRegImm tmp (LO imm)) code)
1921
1922 getAmode (CmmLit lit)
1923   = do
1924         tmp <- getNewRegNat I32
1925         let imm = litToImm lit
1926             code = unitOL (LIS tmp (HA imm))
1927         return (Amode (AddrRegImm tmp (LO imm)) code)
1928
1929 getAmode (CmmMachOp (MO_Add I32) [x, y])
1930   = do
1931         (regX, codeX) <- getSomeReg x
1932         (regY, codeY) <- getSomeReg y
1933         return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
1934
1935 getAmode other
1936   = do
1937         (reg, code) <- getSomeReg other
1938         let
1939             off  = ImmInt 0
1940         return (Amode (AddrRegImm reg off) code)
1941 #endif /* powerpc_TARGET_ARCH */
1942
1943 -- -----------------------------------------------------------------------------
1944 -- getOperand: sometimes any operand will do.
1945
1946 -- getNonClobberedOperand: the value of the operand will remain valid across
1947 -- the computation of an arbitrary expression, unless the expression
1948 -- is computed directly into a register which the operand refers to
1949 -- (see trivialCode where this function is used for an example).
1950
1951 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1952
1953 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1954 #if x86_64_TARGET_ARCH
1955 getNonClobberedOperand (CmmLit lit)
1956   | isSuitableFloatingPointLit lit = do
1957     lbl <- getNewLabelNat
1958     let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
1959                                            CmmStaticLit lit])
1960     return (OpAddr (ripRel (ImmCLbl lbl)), code)
1961 #endif
1962 getNonClobberedOperand (CmmLit lit)
1963   | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) =
1964     return (OpImm (litToImm lit), nilOL)
1965 getNonClobberedOperand (CmmLoad mem pk)
1966   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
1967     Amode src mem_code <- getAmode mem
1968     (src',save_code) <-
1969         if (amodeCouldBeClobbered src)
1970                 then do
1971                    tmp <- getNewRegNat wordRep
1972                    return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
1973                            unitOL (LEA I32 (OpAddr src) (OpReg tmp)))
1974                 else
1975                    return (src, nilOL)
1976     return (OpAddr src', save_code `appOL` mem_code)
1977 getNonClobberedOperand e = do
1978     (reg, code) <- getNonClobberedReg e
1979     return (OpReg reg, code)
1980
1981 amodeCouldBeClobbered :: AddrMode -> Bool
1982 amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
1983
1984 regClobbered (RealReg rr) = isFastTrue (freeReg rr)
1985 regClobbered _ = False
1986
1987 -- getOperand: the operand is not required to remain valid across the
1988 -- computation of an arbitrary expression.
1989 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1990 #if x86_64_TARGET_ARCH
1991 getOperand (CmmLit lit)
1992   | isSuitableFloatingPointLit lit = do
1993     lbl <- getNewLabelNat
1994     let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
1995                                            CmmStaticLit lit])
1996     return (OpAddr (ripRel (ImmCLbl lbl)), code)
1997 #endif
1998 getOperand (CmmLit lit)
1999   | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) = do
2000     return (OpImm (litToImm lit), nilOL)
2001 getOperand (CmmLoad mem pk)
2002   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2003     Amode src mem_code <- getAmode mem
2004     return (OpAddr src, mem_code)
2005 getOperand e = do
2006     (reg, code) <- getSomeReg e
2007     return (OpReg reg, code)
2008
2009 isOperand :: CmmExpr -> Bool
2010 isOperand (CmmLoad _ _) = True
2011 isOperand (CmmLit lit)  = not (is64BitLit lit)
2012                           || isSuitableFloatingPointLit lit
2013 isOperand _             = False
2014
2015 -- if we want a floating-point literal as an operand, we can
2016 -- use it directly from memory.  However, if the literal is
2017 -- zero, we're better off generating it into a register using
2018 -- xor.
2019 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
2020 isSuitableFloatingPointLit _ = False
2021
2022 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
2023 getRegOrMem (CmmLoad mem pk)
2024   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2025     Amode src mem_code <- getAmode mem
2026     return (OpAddr src, mem_code)
2027 getRegOrMem e = do
2028     (reg, code) <- getNonClobberedReg e
2029     return (OpReg reg, code)
2030
2031 #if x86_64_TARGET_ARCH
2032 is64BitLit (CmmInt i I64) = is64BitInteger i
2033    -- assume that labels are in the range 0-2^31-1: this assumes the
2034    -- small memory model (see gcc docs, -mcmodel=small).
2035 #endif
2036 is64BitLit x = False
2037 #endif
2038
2039 is64BitInteger :: Integer -> Bool
2040 is64BitInteger i = i > 0x7fffffff || i < -0x80000000
2041
2042 -- -----------------------------------------------------------------------------
2043 --  The 'CondCode' type:  Condition codes passed up the tree.
2044
2045 data CondCode = CondCode Bool Cond InstrBlock
2046
2047 -- Set up a condition code for a conditional branch.
2048
2049 getCondCode :: CmmExpr -> NatM CondCode
2050
2051 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2052
2053 #if alpha_TARGET_ARCH
2054 getCondCode = panic "MachCode.getCondCode: not on Alphas"
2055 #endif /* alpha_TARGET_ARCH */
2056
2057 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2058
2059 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
2060 -- yes, they really do seem to want exactly the same!
2061
2062 getCondCode (CmmMachOp mop [x, y])
2063   = ASSERT (cmmExprRep x /= I8) -- tmp, not set up to handle 8-bit comparisons
2064     case mop of
2065       MO_Eq F32 -> condFltCode EQQ x y
2066       MO_Ne F32 -> condFltCode NE  x y
2067
2068       MO_S_Gt F32 -> condFltCode GTT x y
2069       MO_S_Ge F32 -> condFltCode GE  x y
2070       MO_S_Lt F32 -> condFltCode LTT x y
2071       MO_S_Le F32 -> condFltCode LE  x y
2072
2073       MO_Eq F64 -> condFltCode EQQ x y
2074       MO_Ne F64 -> condFltCode NE  x y
2075
2076       MO_S_Gt F64 -> condFltCode GTT x y
2077       MO_S_Ge F64 -> condFltCode GE  x y
2078       MO_S_Lt F64 -> condFltCode LTT x y
2079       MO_S_Le F64 -> condFltCode LE  x y
2080
2081       MO_Eq rep -> condIntCode EQQ  x y
2082       MO_Ne rep -> condIntCode NE   x y
2083
2084       MO_S_Gt rep -> condIntCode GTT  x y
2085       MO_S_Ge rep -> condIntCode GE   x y
2086       MO_S_Lt rep -> condIntCode LTT  x y
2087       MO_S_Le rep -> condIntCode LE   x y
2088
2089       MO_U_Gt rep -> condIntCode GU   x y
2090       MO_U_Ge rep -> condIntCode GEU  x y
2091       MO_U_Lt rep -> condIntCode LU   x y
2092       MO_U_Le rep -> condIntCode LEU  x y
2093
2094       other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y]))
2095
2096 getCondCode other =  pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
2097
2098 #elif powerpc_TARGET_ARCH
2099
2100 -- almost the same as everywhere else - but we need to
2101 -- extend small integers to 32 bit first
2102
2103 getCondCode (CmmMachOp mop [x, y])
2104   = case mop of
2105       MO_Eq F32 -> condFltCode EQQ x y
2106       MO_Ne F32 -> condFltCode NE  x y
2107
2108       MO_S_Gt F32 -> condFltCode GTT x y
2109       MO_S_Ge F32 -> condFltCode GE  x y
2110       MO_S_Lt F32 -> condFltCode LTT x y
2111       MO_S_Le F32 -> condFltCode LE  x y
2112
2113       MO_Eq F64 -> condFltCode EQQ x y
2114       MO_Ne F64 -> condFltCode NE  x y
2115
2116       MO_S_Gt F64 -> condFltCode GTT x y
2117       MO_S_Ge F64 -> condFltCode GE  x y
2118       MO_S_Lt F64 -> condFltCode LTT x y
2119       MO_S_Le F64 -> condFltCode LE  x y
2120
2121       MO_Eq rep -> condIntCode EQQ  (extendUExpr rep x) (extendUExpr rep y)
2122       MO_Ne rep -> condIntCode NE   (extendUExpr rep x) (extendUExpr rep y)
2123
2124       MO_S_Gt rep -> condIntCode GTT  (extendSExpr rep x) (extendSExpr rep y)
2125       MO_S_Ge rep -> condIntCode GE   (extendSExpr rep x) (extendSExpr rep y)
2126       MO_S_Lt rep -> condIntCode LTT  (extendSExpr rep x) (extendSExpr rep y)
2127       MO_S_Le rep -> condIntCode LE   (extendSExpr rep x) (extendSExpr rep y)
2128
2129       MO_U_Gt rep -> condIntCode GU   (extendUExpr rep x) (extendUExpr rep y)
2130       MO_U_Ge rep -> condIntCode GEU  (extendUExpr rep x) (extendUExpr rep y)
2131       MO_U_Lt rep -> condIntCode LU   (extendUExpr rep x) (extendUExpr rep y)
2132       MO_U_Le rep -> condIntCode LEU  (extendUExpr rep x) (extendUExpr rep y)
2133
2134       other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
2135
2136 getCondCode other =  panic "getCondCode(2)(powerpc)"
2137
2138
2139 #endif
2140
2141
2142 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
2143 -- passed back up the tree.
2144
2145 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
2146
2147 #if alpha_TARGET_ARCH
2148 condIntCode = panic "MachCode.condIntCode: not on Alphas"
2149 condFltCode = panic "MachCode.condFltCode: not on Alphas"
2150 #endif /* alpha_TARGET_ARCH */
2151
2152 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2153 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2154
2155 -- memory vs immediate
2156 condIntCode cond (CmmLoad x pk) (CmmLit lit) | not (is64BitLit lit) = do
2157     Amode x_addr x_code <- getAmode x
2158     let
2159         imm  = litToImm lit
2160         code = x_code `snocOL`
2161                   CMP pk (OpImm imm) (OpAddr x_addr)
2162     --
2163     return (CondCode False cond code)
2164
2165 -- anything vs zero
2166 condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
2167     (x_reg, x_code) <- getSomeReg x
2168     let
2169         code = x_code `snocOL`
2170                   TEST pk (OpReg x_reg) (OpReg x_reg)
2171     --
2172     return (CondCode False cond code)
2173
2174 -- anything vs operand
2175 condIntCode cond x y | isOperand y = do
2176     (x_reg, x_code) <- getNonClobberedReg x
2177     (y_op,  y_code) <- getOperand y
2178     let
2179         code = x_code `appOL` y_code `snocOL`
2180                   CMP (cmmExprRep x) y_op (OpReg x_reg)
2181     -- in
2182     return (CondCode False cond code)
2183
2184 -- anything vs anything
2185 condIntCode cond x y = do
2186   (y_reg, y_code) <- getNonClobberedReg y
2187   (x_op, x_code) <- getRegOrMem x
2188   let
2189         code = y_code `appOL`
2190                x_code `snocOL`
2191                   CMP (cmmExprRep x) (OpReg y_reg) x_op
2192   -- in
2193   return (CondCode False cond code)
2194 #endif
2195
2196 #if i386_TARGET_ARCH
2197 condFltCode cond x y
2198   = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
2199   (x_reg, x_code) <- getNonClobberedReg x
2200   (y_reg, y_code) <- getSomeReg y
2201   let
2202         code = x_code `appOL` y_code `snocOL`
2203                 GCMP cond x_reg y_reg
2204   -- The GCMP insn does the test and sets the zero flag if comparable
2205   -- and true.  Hence we always supply EQQ as the condition to test.
2206   return (CondCode True EQQ code)
2207 #endif /* i386_TARGET_ARCH */
2208
2209 #if x86_64_TARGET_ARCH
2210 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
2211 -- an operand, but the right must be a reg.  We can probably do better
2212 -- than this general case...
2213 condFltCode cond x y = do
2214   (x_reg, x_code) <- getNonClobberedReg x
2215   (y_op, y_code) <- getOperand y
2216   let
2217         code = x_code `appOL`
2218                y_code `snocOL`
2219                   CMP (cmmExprRep x) y_op (OpReg x_reg)
2220         -- NB(1): we need to use the unsigned comparison operators on the
2221         -- result of this comparison.
2222   -- in
2223   return (CondCode True (condToUnsigned cond) code)
2224 #endif
2225
2226 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2227
2228 #if sparc_TARGET_ARCH
2229
2230 condIntCode cond x (CmmLit (CmmInt y rep))
2231   | fits13Bits y
2232   = do
2233        (src1, code) <- getSomeReg x
2234        let
2235            src2 = ImmInt (fromInteger y)
2236            code' = code `snocOL` SUB False True src1 (RIImm src2) g0
2237        return (CondCode False cond code')
2238
2239 condIntCode cond x y = do
2240     (src1, code1) <- getSomeReg x
2241     (src2, code2) <- getSomeReg y
2242     let
2243         code__2 = code1 `appOL` code2 `snocOL`
2244                   SUB False True src1 (RIReg src2) g0
2245     return (CondCode False cond code__2)
2246
2247 -----------
2248 condFltCode cond x y = do
2249     (src1, code1) <- getSomeReg x
2250     (src2, code2) <- getSomeReg y
2251     tmp <- getNewRegNat F64
2252     let
2253         promote x = FxTOy F32 F64 x tmp
2254
2255         pk1   = cmmExprRep x
2256         pk2   = cmmExprRep y
2257
2258         code__2 =
2259                 if pk1 == pk2 then
2260                     code1 `appOL` code2 `snocOL`
2261                     FCMP True pk1 src1 src2
2262                 else if pk1 == F32 then
2263                     code1 `snocOL` promote src1 `appOL` code2 `snocOL`
2264                     FCMP True F64 tmp src2
2265                 else
2266                     code1 `appOL` code2 `snocOL` promote src2 `snocOL`
2267                     FCMP True F64 src1 tmp
2268     return (CondCode True cond code__2)
2269
2270 #endif /* sparc_TARGET_ARCH */
2271
2272 #if powerpc_TARGET_ARCH
2273 --  ###FIXME: I16 and I8!
2274 condIntCode cond x (CmmLit (CmmInt y rep))
2275   | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
2276   = do
2277         (src1, code) <- getSomeReg x
2278         let
2279             code' = code `snocOL`
2280                 (if condUnsigned cond then CMPL else CMP) I32 src1 (RIImm src2)
2281         return (CondCode False cond code')
2282
2283 condIntCode cond x y = do
2284     (src1, code1) <- getSomeReg x
2285     (src2, code2) <- getSomeReg y
2286     let
2287         code' = code1 `appOL` code2 `snocOL`
2288                   (if condUnsigned cond then CMPL else CMP) I32 src1 (RIReg src2)
2289     return (CondCode False cond code')
2290
2291 condFltCode cond x y = do
2292     (src1, code1) <- getSomeReg x
2293     (src2, code2) <- getSomeReg y
2294     let
2295         code'  = code1 `appOL` code2 `snocOL` FCMP src1 src2
2296         code'' = case cond of -- twiddle CR to handle unordered case
2297                     GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
2298                     LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
2299                     _ -> code'
2300                  where
2301                     ltbit = 0 ; eqbit = 2 ; gtbit = 1
2302     return (CondCode True cond code'')
2303
2304 #endif /* powerpc_TARGET_ARCH */
2305
2306 -- -----------------------------------------------------------------------------
2307 -- Generating assignments
2308
2309 -- Assignments are really at the heart of the whole code generation
2310 -- business.  Almost all top-level nodes of any real importance are
2311 -- assignments, which correspond to loads, stores, or register
2312 -- transfers.  If we're really lucky, some of the register transfers
2313 -- will go away, because we can use the destination register to
2314 -- complete the code generation for the right hand side.  This only
2315 -- fails when the right hand side is forced into a fixed register
2316 -- (e.g. the result of a call).
2317
2318 assignMem_IntCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2319 assignReg_IntCode :: MachRep -> CmmReg  -> CmmExpr -> NatM InstrBlock
2320
2321 assignMem_FltCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2322 assignReg_FltCode :: MachRep -> CmmReg  -> CmmExpr -> NatM InstrBlock
2323
2324 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2325
2326 #if alpha_TARGET_ARCH
2327
2328 assignIntCode pk (CmmLoad dst _) src
2329   = getNewRegNat IntRep             `thenNat` \ tmp ->
2330     getAmode dst                    `thenNat` \ amode ->
2331     getRegister src                 `thenNat` \ register ->
2332     let
2333         code1   = amodeCode amode []
2334         dst__2  = amodeAddr amode
2335         code2   = registerCode register tmp []
2336         src__2  = registerName register tmp
2337         sz      = primRepToSize pk
2338         code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2339     in
2340     return code__2
2341
2342 assignIntCode pk dst src
2343   = getRegister dst                         `thenNat` \ register1 ->
2344     getRegister src                         `thenNat` \ register2 ->
2345     let
2346         dst__2  = registerName register1 zeroh
2347         code    = registerCode register2 dst__2
2348         src__2  = registerName register2 dst__2
2349         code__2 = if isFixed register2
2350                   then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2)
2351                   else code
2352     in
2353     return code__2
2354
2355 #endif /* alpha_TARGET_ARCH */
2356
2357 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2358
2359 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2360
2361 -- integer assignment to memory
2362 assignMem_IntCode pk addr src = do
2363     Amode addr code_addr <- getAmode addr
2364     (code_src, op_src)   <- get_op_RI src
2365     let
2366         code = code_src `appOL`
2367                code_addr `snocOL`
2368                   MOV pk op_src (OpAddr addr)
2369         -- NOTE: op_src is stable, so it will still be valid
2370         -- after code_addr.  This may involve the introduction
2371         -- of an extra MOV to a temporary register, but we hope
2372         -- the register allocator will get rid of it.
2373     --
2374     return code
2375   where
2376     get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand)   -- code, operator
2377     get_op_RI (CmmLit lit) | not (is64BitLit lit)
2378       = return (nilOL, OpImm (litToImm lit))
2379     get_op_RI op
2380       = do (reg,code) <- getNonClobberedReg op
2381            return (code, OpReg reg)
2382
2383
2384 -- Assign; dst is a reg, rhs is mem
2385 assignReg_IntCode pk reg (CmmLoad src _) = do
2386   load_code <- intLoadCode (MOV pk) src
2387   return (load_code (getRegisterReg reg))
2388
2389 -- dst is a reg, but src could be anything
2390 assignReg_IntCode pk reg src = do
2391   code <- getAnyReg src
2392   return (code (getRegisterReg reg))
2393
2394 #endif /* i386_TARGET_ARCH */
2395
2396 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2397
2398 #if sparc_TARGET_ARCH
2399
2400 assignMem_IntCode pk addr src = do
2401     (srcReg, code) <- getSomeReg src
2402     Amode dstAddr addr_code <- getAmode addr
2403     return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2404
2405 assignReg_IntCode pk reg src = do
2406     r <- getRegister src
2407     return $ case r of
2408         Any _ code         -> code dst
2409         Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg dst) freg
2410     where
2411       dst = getRegisterReg reg
2412
2413
2414 #endif /* sparc_TARGET_ARCH */
2415
2416 #if powerpc_TARGET_ARCH
2417
2418 assignMem_IntCode pk addr src = do
2419     (srcReg, code) <- getSomeReg src
2420     Amode dstAddr addr_code <- getAmode addr
2421     return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2422
2423 -- dst is a reg, but src could be anything
2424 assignReg_IntCode pk reg src
2425     = do
2426         r <- getRegister src
2427         return $ case r of
2428             Any _ code         -> code dst
2429             Fixed _ freg fcode -> fcode `snocOL` MR dst freg
2430     where
2431         dst = getRegisterReg reg
2432
2433 #endif /* powerpc_TARGET_ARCH */
2434
2435
2436 -- -----------------------------------------------------------------------------
2437 -- Floating-point assignments
2438
2439 #if alpha_TARGET_ARCH
2440
2441 assignFltCode pk (CmmLoad dst _) src
2442   = getNewRegNat pk                 `thenNat` \ tmp ->
2443     getAmode dst                    `thenNat` \ amode ->
2444     getRegister src                         `thenNat` \ register ->
2445     let
2446         code1   = amodeCode amode []
2447         dst__2  = amodeAddr amode
2448         code2   = registerCode register tmp []
2449         src__2  = registerName register tmp
2450         sz      = primRepToSize pk
2451         code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2452     in
2453     return code__2
2454
2455 assignFltCode pk dst src
2456   = getRegister dst                         `thenNat` \ register1 ->
2457     getRegister src                         `thenNat` \ register2 ->
2458     let
2459         dst__2  = registerName register1 zeroh
2460         code    = registerCode register2 dst__2
2461         src__2  = registerName register2 dst__2
2462         code__2 = if isFixed register2
2463                   then code . mkSeqInstr (FMOV src__2 dst__2)
2464                   else code
2465     in
2466     return code__2
2467
2468 #endif /* alpha_TARGET_ARCH */
2469
2470 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2471
2472 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2473
2474 -- Floating point assignment to memory
2475 assignMem_FltCode pk addr src = do
2476   (src_reg, src_code) <- getNonClobberedReg src
2477   Amode addr addr_code <- getAmode addr
2478   let
2479         code = src_code `appOL`
2480                addr_code `snocOL`
2481                 IF_ARCH_i386(GST pk src_reg addr,
2482                              MOV pk (OpReg src_reg) (OpAddr addr))
2483   return code
2484
2485 -- Floating point assignment to a register/temporary
2486 assignReg_FltCode pk reg src = do
2487   src_code <- getAnyReg src
2488   return (src_code (getRegisterReg reg))
2489
2490 #endif /* i386_TARGET_ARCH */
2491
2492 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2493
2494 #if sparc_TARGET_ARCH
2495
2496 -- Floating point assignment to memory
2497 assignMem_FltCode pk addr src = do
2498     Amode dst__2 code1 <- getAmode addr
2499     (src__2, code2) <- getSomeReg src
2500     tmp1 <- getNewRegNat pk
2501     let
2502         pk__2   = cmmExprRep src
2503         code__2 = code1 `appOL` code2 `appOL`
2504             if   pk == pk__2
2505             then unitOL (ST pk src__2 dst__2)
2506             else toOL [FxTOy pk__2 pk src__2 tmp1, ST pk tmp1 dst__2]
2507     return code__2
2508
2509 -- Floating point assignment to a register/temporary
2510 -- ToDo: Verify correctness
2511 assignReg_FltCode pk reg src = do
2512     r <- getRegister src
2513     v1 <- getNewRegNat pk
2514     return $ case r of
2515         Any _ code         -> code dst
2516         Fixed _ freg fcode -> fcode `snocOL` FMOV pk freg v1
2517     where
2518       dst = getRegisterReg reg
2519
2520 #endif /* sparc_TARGET_ARCH */
2521
2522 #if powerpc_TARGET_ARCH
2523
2524 -- Easy, isn't it?
2525 assignMem_FltCode = assignMem_IntCode
2526 assignReg_FltCode = assignReg_IntCode
2527
2528 #endif /* powerpc_TARGET_ARCH */
2529
2530
2531 -- -----------------------------------------------------------------------------
2532 -- Generating an non-local jump
2533
2534 -- (If applicable) Do not fill the delay slots here; you will confuse the
2535 -- register allocator.
2536
2537 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
2538
2539 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2540
2541 #if alpha_TARGET_ARCH
2542
2543 genJump (CmmLabel lbl)
2544   | isAsmTemp lbl = returnInstr (BR target)
2545   | otherwise     = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0]
2546   where
2547     target = ImmCLbl lbl
2548
2549 genJump tree
2550   = getRegister tree                `thenNat` \ register ->
2551     getNewRegNat PtrRep             `thenNat` \ tmp ->
2552     let
2553         dst    = registerName register pv
2554         code   = registerCode register pv
2555         target = registerName register pv
2556     in
2557     if isFixed register then
2558         returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0]
2559     else
2560     return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0))
2561
2562 #endif /* alpha_TARGET_ARCH */
2563
2564 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2565
2566 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2567
2568 genJump (CmmLoad mem pk) = do
2569   Amode target code <- getAmode mem
2570   return (code `snocOL` JMP (OpAddr target))
2571
2572 genJump (CmmLit lit) = do
2573   return (unitOL (JMP (OpImm (litToImm lit))))
2574
2575 genJump expr = do
2576   (reg,code) <- getSomeReg expr
2577   return (code `snocOL` JMP (OpReg reg))
2578
2579 #endif /* i386_TARGET_ARCH */
2580
2581 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2582
2583 #if sparc_TARGET_ARCH
2584
2585 genJump (CmmLit (CmmLabel lbl))
2586   = return (toOL [CALL (Left target) 0 True, NOP])
2587   where
2588     target = ImmCLbl lbl
2589
2590 genJump tree
2591   = do
2592         (target, code) <- getSomeReg tree
2593         return (code `snocOL` JMP (AddrRegReg target g0)  `snocOL` NOP)
2594
2595 #endif /* sparc_TARGET_ARCH */
2596
2597 #if powerpc_TARGET_ARCH
2598 genJump (CmmLit (CmmLabel lbl))
2599   = return (unitOL $ JMP lbl)
2600
2601 genJump tree
2602   = do
2603         (target,code) <- getSomeReg tree
2604         return (code `snocOL` MTCTR target `snocOL` BCTR [])
2605 #endif /* powerpc_TARGET_ARCH */
2606
2607
2608 -- -----------------------------------------------------------------------------
2609 --  Unconditional branches
2610
2611 genBranch :: BlockId -> NatM InstrBlock
2612
2613 genBranch = return . toOL . mkBranchInstr
2614
2615 -- -----------------------------------------------------------------------------
2616 --  Conditional jumps
2617
2618 {-
2619 Conditional jumps are always to local labels, so we can use branch
2620 instructions.  We peek at the arguments to decide what kind of
2621 comparison to do.
2622
2623 ALPHA: For comparisons with 0, we're laughing, because we can just do
2624 the desired conditional branch.
2625
2626 I386: First, we have to ensure that the condition
2627 codes are set according to the supplied comparison operation.
2628
2629 SPARC: First, we have to ensure that the condition codes are set
2630 according to the supplied comparison operation.  We generate slightly
2631 different code for floating point comparisons, because a floating
2632 point operation cannot directly precede a @BF@.  We assume the worst
2633 and fill that slot with a @NOP@.
2634
2635 SPARC: Do not fill the delay slots here; you will confuse the register
2636 allocator.
2637 -}
2638
2639
2640 genCondJump
2641     :: BlockId      -- the branch target
2642     -> CmmExpr      -- the condition on which to branch
2643     -> NatM InstrBlock
2644
2645 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2646
2647 #if alpha_TARGET_ARCH
2648
2649 genCondJump id (StPrim op [x, StInt 0])
2650   = getRegister x                           `thenNat` \ register ->
2651     getNewRegNat (registerRep register)
2652                                     `thenNat` \ tmp ->
2653     let
2654         code   = registerCode register tmp
2655         value  = registerName register tmp
2656         pk     = registerRep register
2657         target = ImmCLbl lbl
2658     in
2659     returnSeq code [BI (cmpOp op) value target]
2660   where
2661     cmpOp CharGtOp = GTT
2662     cmpOp CharGeOp = GE
2663     cmpOp CharEqOp = EQQ
2664     cmpOp CharNeOp = NE
2665     cmpOp CharLtOp = LTT
2666     cmpOp CharLeOp = LE
2667     cmpOp IntGtOp = GTT
2668     cmpOp IntGeOp = GE
2669     cmpOp IntEqOp = EQQ
2670     cmpOp IntNeOp = NE
2671     cmpOp IntLtOp = LTT
2672     cmpOp IntLeOp = LE
2673     cmpOp WordGtOp = NE
2674     cmpOp WordGeOp = ALWAYS
2675     cmpOp WordEqOp = EQQ
2676     cmpOp WordNeOp = NE
2677     cmpOp WordLtOp = NEVER
2678     cmpOp WordLeOp = EQQ
2679     cmpOp AddrGtOp = NE
2680     cmpOp AddrGeOp = ALWAYS
2681     cmpOp AddrEqOp = EQQ
2682     cmpOp AddrNeOp = NE
2683     cmpOp AddrLtOp = NEVER
2684     cmpOp AddrLeOp = EQQ
2685
2686 genCondJump lbl (StPrim op [x, StDouble 0.0])
2687   = getRegister x                           `thenNat` \ register ->
2688     getNewRegNat (registerRep register)
2689                                     `thenNat` \ tmp ->
2690     let
2691         code   = registerCode register tmp
2692         value  = registerName register tmp
2693         pk     = registerRep register
2694         target = ImmCLbl lbl
2695     in
2696     return (code . mkSeqInstr (BF (cmpOp op) value target))
2697   where
2698     cmpOp FloatGtOp = GTT
2699     cmpOp FloatGeOp = GE
2700     cmpOp FloatEqOp = EQQ
2701     cmpOp FloatNeOp = NE
2702     cmpOp FloatLtOp = LTT
2703     cmpOp FloatLeOp = LE
2704     cmpOp DoubleGtOp = GTT
2705     cmpOp DoubleGeOp = GE
2706     cmpOp DoubleEqOp = EQQ
2707     cmpOp DoubleNeOp = NE
2708     cmpOp DoubleLtOp = LTT
2709     cmpOp DoubleLeOp = LE
2710
2711 genCondJump lbl (StPrim op [x, y])
2712   | fltCmpOp op
2713   = trivialFCode pr instr x y       `thenNat` \ register ->
2714     getNewRegNat F64                `thenNat` \ tmp ->
2715     let
2716         code   = registerCode register tmp
2717         result = registerName register tmp
2718         target = ImmCLbl lbl
2719     in
2720     return (code . mkSeqInstr (BF cond result target))
2721   where
2722     pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
2723
2724     fltCmpOp op = case op of
2725         FloatGtOp -> True
2726         FloatGeOp -> True
2727         FloatEqOp -> True
2728         FloatNeOp -> True
2729         FloatLtOp -> True
2730         FloatLeOp -> True
2731         DoubleGtOp -> True
2732         DoubleGeOp -> True
2733         DoubleEqOp -> True
2734         DoubleNeOp -> True
2735         DoubleLtOp -> True
2736         DoubleLeOp -> True
2737         _ -> False
2738     (instr, cond) = case op of
2739         FloatGtOp -> (FCMP TF LE, EQQ)
2740         FloatGeOp -> (FCMP TF LTT, EQQ)
2741         FloatEqOp -> (FCMP TF EQQ, NE)
2742         FloatNeOp -> (FCMP TF EQQ, EQQ)
2743         FloatLtOp -> (FCMP TF LTT, NE)
2744         FloatLeOp -> (FCMP TF LE, NE)
2745         DoubleGtOp -> (FCMP TF LE, EQQ)
2746         DoubleGeOp -> (FCMP TF LTT, EQQ)
2747         DoubleEqOp -> (FCMP TF EQQ, NE)
2748         DoubleNeOp -> (FCMP TF EQQ, EQQ)
2749         DoubleLtOp -> (FCMP TF LTT, NE)
2750         DoubleLeOp -> (FCMP TF LE, NE)
2751
2752 genCondJump lbl (StPrim op [x, y])
2753   = trivialCode instr x y           `thenNat` \ register ->
2754     getNewRegNat IntRep             `thenNat` \ tmp ->
2755     let
2756         code   = registerCode register tmp
2757         result = registerName register tmp
2758         target = ImmCLbl lbl
2759     in
2760     return (code . mkSeqInstr (BI cond result target))
2761   where
2762     (instr, cond) = case op of
2763         CharGtOp -> (CMP LE, EQQ)
2764         CharGeOp -> (CMP LTT, EQQ)
2765         CharEqOp -> (CMP EQQ, NE)
2766         CharNeOp -> (CMP EQQ, EQQ)
2767         CharLtOp -> (CMP LTT, NE)
2768         CharLeOp -> (CMP LE, NE)
2769         IntGtOp -> (CMP LE, EQQ)
2770         IntGeOp -> (CMP LTT, EQQ)
2771         IntEqOp -> (CMP EQQ, NE)
2772         IntNeOp -> (CMP EQQ, EQQ)
2773         IntLtOp -> (CMP LTT, NE)
2774         IntLeOp -> (CMP LE, NE)
2775         WordGtOp -> (CMP ULE, EQQ)
2776         WordGeOp -> (CMP ULT, EQQ)
2777         WordEqOp -> (CMP EQQ, NE)
2778         WordNeOp -> (CMP EQQ, EQQ)
2779         WordLtOp -> (CMP ULT, NE)
2780         WordLeOp -> (CMP ULE, NE)
2781         AddrGtOp -> (CMP ULE, EQQ)
2782         AddrGeOp -> (CMP ULT, EQQ)
2783         AddrEqOp -> (CMP EQQ, NE)
2784         AddrNeOp -> (CMP EQQ, EQQ)
2785         AddrLtOp -> (CMP ULT, NE)
2786         AddrLeOp -> (CMP ULE, NE)
2787
2788 #endif /* alpha_TARGET_ARCH */
2789
2790 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2791
2792 #if i386_TARGET_ARCH
2793
2794 genCondJump id bool = do
2795   CondCode _ cond code <- getCondCode bool
2796   return (code `snocOL` JXX cond id)
2797
2798 #endif
2799
2800 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2801
2802 #if x86_64_TARGET_ARCH
2803
2804 genCondJump id bool = do
2805   CondCode is_float cond cond_code <- getCondCode bool
2806   if not is_float
2807     then
2808         return (cond_code `snocOL` JXX cond id)
2809     else do
2810         lbl <- getBlockIdNat
2811
2812         -- see comment with condFltReg
2813         let code = case cond of
2814                         NE  -> or_unordered
2815                         GU  -> plain_test
2816                         GEU -> plain_test
2817                         _   -> and_ordered
2818
2819             plain_test = unitOL (
2820                   JXX cond id
2821                 )
2822             or_unordered = toOL [
2823                   JXX cond id,
2824                   JXX PARITY id
2825                 ]
2826             and_ordered = toOL [
2827                   JXX PARITY lbl,
2828                   JXX cond id,
2829                   JXX ALWAYS lbl,
2830                   NEWBLOCK lbl
2831                 ]
2832         return (cond_code `appOL` code)
2833
2834 #endif
2835
2836 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2837
2838 #if sparc_TARGET_ARCH
2839
2840 genCondJump (BlockId id) bool = do
2841   CondCode is_float cond code <- getCondCode bool
2842   return (
2843        code `appOL`
2844        toOL (
2845          if   is_float
2846          then [NOP, BF cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2847          else [BI cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2848        )
2849     )
2850
2851 #endif /* sparc_TARGET_ARCH */
2852
2853
2854 #if powerpc_TARGET_ARCH
2855
2856 genCondJump id bool = do
2857   CondCode is_float cond code <- getCondCode bool
2858   return (code `snocOL` BCC cond id)
2859
2860 #endif /* powerpc_TARGET_ARCH */
2861
2862
2863 -- -----------------------------------------------------------------------------
2864 --  Generating C calls
2865
2866 -- Now the biggest nightmare---calls.  Most of the nastiness is buried in
2867 -- @get_arg@, which moves the arguments to the correct registers/stack
2868 -- locations.  Apart from that, the code is easy.
2869 --
2870 -- (If applicable) Do not fill the delay slots here; you will confuse the
2871 -- register allocator.
2872
2873 genCCall
2874     :: CmmCallTarget            -- function to call
2875     -> [(CmmReg,MachHint)]      -- where to put the result
2876     -> [(CmmExpr,MachHint)]     -- arguments (of mixed type)
2877     -> Maybe [GlobalReg]        -- volatile regs to save
2878     -> NatM InstrBlock
2879
2880 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2881
2882 #if alpha_TARGET_ARCH
2883
2884 ccallResultRegs =
2885
2886 genCCall fn cconv result_regs args
2887   = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args
2888                           `thenNat` \ ((unused,_), argCode) ->
2889     let
2890         nRegs = length allArgRegs - length unused
2891         code = asmSeqThen (map ($ []) argCode)
2892     in
2893         returnSeq code [
2894             LDA pv (AddrImm (ImmLab (ptext fn))),
2895             JSR ra (AddrReg pv) nRegs,
2896             LDGP gp (AddrReg ra)]
2897   where
2898     ------------------------
2899     {-  Try to get a value into a specific register (or registers) for
2900         a call.  The first 6 arguments go into the appropriate
2901         argument register (separate registers for integer and floating
2902         point arguments, but used in lock-step), and the remaining
2903         arguments are dumped to the stack, beginning at 0(sp).  Our
2904         first argument is a pair of the list of remaining argument
2905         registers to be assigned for this call and the next stack
2906         offset to use for overflowing arguments.  This way,
2907         @get_Arg@ can be applied to all of a call's arguments using
2908         @mapAccumLNat@.
2909     -}
2910     get_arg
2911         :: ([(Reg,Reg)], Int)   -- Argument registers and stack offset (accumulator)
2912         -> StixTree             -- Current argument
2913         -> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code
2914
2915     -- We have to use up all of our argument registers first...
2916
2917     get_arg ((iDst,fDst):dsts, offset) arg
2918       = getRegister arg                     `thenNat` \ register ->
2919         let
2920             reg  = if isFloatingRep pk then fDst else iDst
2921             code = registerCode register reg
2922             src  = registerName register reg
2923             pk   = registerRep register
2924         in
2925         return (
2926             if isFloatingRep pk then
2927                 ((dsts, offset), if isFixed register then
2928                     code . mkSeqInstr (FMOV src fDst)
2929                     else code)
2930             else
2931                 ((dsts, offset), if isFixed register then
2932                     code . mkSeqInstr (OR src (RIReg src) iDst)
2933                     else code))
2934
2935     -- Once we have run out of argument registers, we move to the
2936     -- stack...
2937
2938     get_arg ([], offset) arg
2939       = getRegister arg                 `thenNat` \ register ->
2940         getNewRegNat (registerRep register)
2941                                         `thenNat` \ tmp ->
2942         let
2943             code = registerCode register tmp
2944             src  = registerName register tmp
2945             pk   = registerRep register
2946             sz   = primRepToSize pk
2947         in
2948         return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset)))
2949
2950 #endif /* alpha_TARGET_ARCH */
2951
2952 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2953
2954 #if i386_TARGET_ARCH
2955
2956 genCCall (CmmPrim MO_WriteBarrier) _ _ _ = return nilOL
2957         -- write barrier compiles to no code on x86/x86-64;
2958         -- we keep it this long in order to prevent earlier optimisations.
2959
2960 -- we only cope with a single result for foreign calls
2961 genCCall (CmmPrim op) [(r,_)] args vols = do
2962   case op of
2963         MO_F32_Sqrt -> actuallyInlineFloatOp F32  (GSQRT F32) args
2964         MO_F64_Sqrt -> actuallyInlineFloatOp F64 (GSQRT F64) args
2965
2966         MO_F32_Sin  -> actuallyInlineFloatOp F32  (GSIN F32) args
2967         MO_F64_Sin  -> actuallyInlineFloatOp F64 (GSIN F64) args
2968
2969         MO_F32_Cos  -> actuallyInlineFloatOp F32  (GCOS F32) args
2970         MO_F64_Cos  -> actuallyInlineFloatOp F64 (GCOS F64) args
2971
2972         MO_F32_Tan  -> actuallyInlineFloatOp F32  (GTAN F32) args
2973         MO_F64_Tan  -> actuallyInlineFloatOp F64 (GTAN F64) args
2974
2975         other_op    -> outOfLineFloatOp op r args vols
2976  where
2977   actuallyInlineFloatOp rep instr [(x,_)]
2978         = do res <- trivialUFCode rep instr x
2979              any <- anyReg res
2980              return (any (getRegisterReg r))
2981
2982 genCCall target dest_regs args vols = do
2983     let
2984         sizes               = map (arg_size . cmmExprRep . fst) (reverse args)
2985 #if !darwin_TARGET_OS
2986         tot_arg_size        = sum sizes
2987 #else
2988         raw_arg_size        = sum sizes
2989         tot_arg_size        = roundTo 16 raw_arg_size
2990         arg_pad_size        = tot_arg_size - raw_arg_size
2991     delta0 <- getDeltaNat
2992     setDeltaNat (delta0 - arg_pad_size)
2993 #endif
2994
2995     push_codes <- mapM push_arg (reverse args)
2996     delta <- getDeltaNat
2997
2998     -- in
2999     -- deal with static vs dynamic call targets
3000     (callinsns,cconv) <-
3001       case target of
3002         -- CmmPrim -> ...
3003         CmmForeignCall (CmmLit (CmmLabel lbl)) conv
3004            -> -- ToDo: stdcall arg sizes
3005               return (unitOL (CALL (Left fn_imm) []), conv)
3006            where fn_imm = ImmCLbl lbl
3007         CmmForeignCall expr conv
3008            -> do (dyn_c, dyn_r, dyn_rep) <- get_op expr
3009                  ASSERT(dyn_rep == I32)
3010                   return (dyn_c `snocOL` CALL (Right dyn_r) [], conv)
3011
3012     let push_code
3013 #if darwin_TARGET_OS
3014             | arg_pad_size /= 0
3015             = toOL [SUB I32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
3016                     DELTA (delta0 - arg_pad_size)]
3017               `appOL` concatOL push_codes
3018             | otherwise
3019 #endif
3020             = concatOL push_codes
3021         call = callinsns `appOL`
3022                toOL (
3023                         -- Deallocate parameters after call for ccall;
3024                         -- but not for stdcall (callee does it)
3025                   (if cconv == StdCallConv || tot_arg_size==0 then [] else
3026                    [ADD I32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
3027                   ++
3028                   [DELTA (delta + tot_arg_size)]
3029                )
3030     -- in
3031     setDeltaNat (delta + tot_arg_size)
3032
3033     let
3034         -- assign the results, if necessary
3035         assign_code []     = nilOL
3036         assign_code [(dest,_hint)] =
3037           case rep of
3038                 I64 -> toOL [MOV I32 (OpReg eax) (OpReg r_dest),
3039                              MOV I32 (OpReg edx) (OpReg r_dest_hi)]
3040                 F32 -> unitOL (GMOV fake0 r_dest)
3041                 F64 -> unitOL (GMOV fake0 r_dest)
3042                 rep -> unitOL (MOV rep (OpReg eax) (OpReg r_dest))
3043           where
3044                 r_dest_hi = getHiVRegFromLo r_dest
3045                 rep = cmmRegRep dest
3046                 r_dest = getRegisterReg dest
3047         assign_code many = panic "genCCall.assign_code many"
3048
3049     return (push_code `appOL`
3050             call `appOL`
3051             assign_code dest_regs)
3052
3053   where
3054     arg_size F64 = 8
3055     arg_size F32 = 4
3056     arg_size I64 = 8
3057     arg_size _   = 4
3058
3059     roundTo a x | x `mod` a == 0 = x
3060                 | otherwise = x + a - (x `mod` a)
3061
3062
3063     push_arg :: (CmmExpr,MachHint){-current argument-}
3064                     -> NatM InstrBlock  -- code
3065
3066     push_arg (arg,_hint) -- we don't need the hints on x86
3067       | arg_rep == I64 = do
3068         ChildCode64 code r_lo <- iselExpr64 arg
3069         delta <- getDeltaNat
3070         setDeltaNat (delta - 8)
3071         let
3072             r_hi = getHiVRegFromLo r_lo
3073         -- in
3074         return (       code `appOL`
3075                        toOL [PUSH I32 (OpReg r_hi), DELTA (delta - 4),
3076                              PUSH I32 (OpReg r_lo), DELTA (delta - 8),
3077                              DELTA (delta-8)]
3078             )
3079
3080       | otherwise = do
3081         (code, reg, sz) <- get_op arg
3082         delta <- getDeltaNat
3083         let size = arg_size sz
3084         setDeltaNat (delta-size)
3085         if (case sz of F64 -> True; F32 -> True; _ -> False)
3086            then return (code `appOL`
3087                         toOL [SUB I32 (OpImm (ImmInt size)) (OpReg esp),
3088                               DELTA (delta-size),
3089                               GST sz reg (AddrBaseIndex (EABaseReg esp)
3090                                                         EAIndexNone
3091                                                         (ImmInt 0))]
3092                        )
3093            else return (code `snocOL`
3094                         PUSH I32 (OpReg reg) `snocOL`
3095                         DELTA (delta-size)
3096                        )
3097       where
3098          arg_rep = cmmExprRep arg
3099
3100     ------------
3101     get_op :: CmmExpr -> NatM (InstrBlock, Reg, MachRep) -- code, reg, size
3102     get_op op = do
3103         (reg,code) <- getSomeReg op
3104         return (code, reg, cmmExprRep op)
3105
3106 #endif /* i386_TARGET_ARCH */
3107
3108 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3109
3110 outOfLineFloatOp :: CallishMachOp -> CmmReg -> [(CmmExpr,MachHint)]
3111   -> Maybe [GlobalReg] -> NatM InstrBlock
3112 outOfLineFloatOp mop res args vols
3113   = do
3114       targetExpr <- cmmMakeDynamicReference addImportNat CallReference lbl
3115       let target = CmmForeignCall targetExpr CCallConv
3116
3117       if cmmRegRep res == F64
3118         then
3119           stmtToInstrs (CmmCall target [(res,FloatHint)] args vols)
3120         else do
3121           uq <- getUniqueNat
3122           let
3123             tmp = CmmLocal (LocalReg uq F64)
3124           -- in
3125           code1 <- stmtToInstrs (CmmCall target [(tmp,FloatHint)] args vols)
3126           code2 <- stmtToInstrs (CmmAssign res (CmmReg tmp))
3127           return (code1 `appOL` code2)
3128   where
3129         lbl = mkForeignLabel fn Nothing False
3130
3131         fn = case mop of
3132               MO_F32_Sqrt  -> FSLIT("sqrtf")
3133               MO_F32_Sin   -> FSLIT("sinf")
3134               MO_F32_Cos   -> FSLIT("cosf")
3135               MO_F32_Tan   -> FSLIT("tanf")
3136               MO_F32_Exp   -> FSLIT("expf")
3137               MO_F32_Log   -> FSLIT("logf")
3138
3139               MO_F32_Asin  -> FSLIT("asinf")
3140               MO_F32_Acos  -> FSLIT("acosf")
3141               MO_F32_Atan  -> FSLIT("atanf")
3142
3143               MO_F32_Sinh  -> FSLIT("sinhf")
3144               MO_F32_Cosh  -> FSLIT("coshf")
3145               MO_F32_Tanh  -> FSLIT("tanhf")
3146               MO_F32_Pwr   -> FSLIT("powf")
3147
3148               MO_F64_Sqrt  -> FSLIT("sqrt")
3149               MO_F64_Sin   -> FSLIT("sin")
3150               MO_F64_Cos   -> FSLIT("cos")
3151               MO_F64_Tan   -> FSLIT("tan")
3152               MO_F64_Exp   -> FSLIT("exp")
3153               MO_F64_Log   -> FSLIT("log")
3154
3155               MO_F64_Asin  -> FSLIT("asin")
3156               MO_F64_Acos  -> FSLIT("acos")
3157               MO_F64_Atan  -> FSLIT("atan")
3158
3159               MO_F64_Sinh  -> FSLIT("sinh")
3160               MO_F64_Cosh  -> FSLIT("cosh")
3161               MO_F64_Tanh  -> FSLIT("tanh")
3162               MO_F64_Pwr   -> FSLIT("pow")
3163
3164 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
3165
3166 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3167
3168 #if x86_64_TARGET_ARCH
3169
3170 genCCall (CmmPrim MO_WriteBarrier) _ _ _ = return nilOL
3171         -- write barrier compiles to no code on x86/x86-64;
3172         -- we keep it this long in order to prevent earlier optimisations.
3173
3174 genCCall (CmmPrim op) [(r,_)] args vols =
3175   outOfLineFloatOp op r args vols
3176
3177 genCCall target dest_regs args vols = do
3178
3179         -- load up the register arguments
3180     (stack_args, aregs, fregs, load_args_code)
3181          <- load_args args allArgRegs allFPArgRegs nilOL
3182
3183     let
3184         fp_regs_used  = reverse (drop (length fregs) (reverse allFPArgRegs))
3185         int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
3186         arg_regs = [eax] ++ int_regs_used ++ fp_regs_used
3187                 -- for annotating the call instruction with
3188
3189         sse_regs = length fp_regs_used
3190
3191         tot_arg_size = arg_size * length stack_args
3192
3193         -- On entry to the called function, %rsp should be aligned
3194         -- on a 16-byte boundary +8 (i.e. the first stack arg after
3195         -- the return address is 16-byte aligned).  In STG land
3196         -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
3197         -- need to make sure we push a multiple of 16-bytes of args,
3198         -- plus the return address, to get the correct alignment.
3199         -- Urg, this is hard.  We need to feed the delta back into
3200         -- the arg pushing code.
3201     (real_size, adjust_rsp) <-
3202         if tot_arg_size `rem` 16 == 0
3203             then return (tot_arg_size, nilOL)
3204             else do -- we need to adjust...
3205                 delta <- getDeltaNat
3206                 setDeltaNat (delta-8)
3207                 return (tot_arg_size+8, toOL [
3208                                 SUB I64 (OpImm (ImmInt 8)) (OpReg rsp),
3209                                 DELTA (delta-8)
3210                         ])
3211
3212         -- push the stack args, right to left
3213     push_code <- push_args (reverse stack_args) nilOL
3214     delta <- getDeltaNat
3215
3216     -- deal with static vs dynamic call targets
3217     (callinsns,cconv) <-
3218       case target of
3219         -- CmmPrim -> ...
3220         CmmForeignCall (CmmLit (CmmLabel lbl)) conv
3221            -> -- ToDo: stdcall arg sizes
3222               return (unitOL (CALL (Left fn_imm) arg_regs), conv)
3223            where fn_imm = ImmCLbl lbl
3224         CmmForeignCall expr conv
3225            -> do (dyn_r, dyn_c) <- getSomeReg expr
3226                  return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
3227
3228     let
3229         -- The x86_64 ABI requires us to set %al to the number of SSE
3230         -- registers that contain arguments, if the called routine
3231         -- is a varargs function.  We don't know whether it's a
3232         -- varargs function or not, so we have to assume it is.
3233         --
3234         -- It's not safe to omit this assignment, even if the number
3235         -- of SSE regs in use is zero.  If %al is larger than 8
3236         -- on entry to a varargs function, seg faults ensue.
3237         assign_eax n = unitOL (MOV I32 (OpImm (ImmInt n)) (OpReg eax))
3238
3239     let call = callinsns `appOL`
3240                toOL (
3241                         -- Deallocate parameters after call for ccall;
3242                         -- but not for stdcall (callee does it)
3243                   (if cconv == StdCallConv || real_size==0 then [] else
3244                    [ADD wordRep (OpImm (ImmInt real_size)) (OpReg esp)])
3245                   ++
3246                   [DELTA (delta + real_size)]
3247                )
3248     -- in
3249     setDeltaNat (delta + real_size)
3250
3251     let
3252         -- assign the results, if necessary
3253         assign_code []     = nilOL
3254         assign_code [(dest,_hint)] =
3255           case rep of
3256                 F32 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3257                 F64 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3258                 rep -> unitOL (MOV rep (OpReg rax) (OpReg r_dest))
3259           where
3260                 rep = cmmRegRep dest
3261                 r_dest = getRegisterReg dest
3262         assign_code many = panic "genCCall.assign_code many"
3263
3264     return (load_args_code      `appOL`
3265             adjust_rsp          `appOL`
3266             push_code           `appOL`
3267             assign_eax sse_regs `appOL`
3268             call                `appOL`
3269             assign_code dest_regs)
3270
3271   where
3272     arg_size = 8 -- always, at the mo
3273
3274     load_args :: [(CmmExpr,MachHint)]
3275               -> [Reg]                  -- int regs avail for args
3276               -> [Reg]                  -- FP regs avail for args
3277               -> InstrBlock
3278               -> NatM ([(CmmExpr,MachHint)],[Reg],[Reg],InstrBlock)
3279     load_args args [] [] code     =  return (args, [], [], code)
3280         -- no more regs to use
3281     load_args [] aregs fregs code =  return ([], aregs, fregs, code)
3282         -- no more args to push
3283     load_args ((arg,hint) : rest) aregs fregs code
3284         | isFloatingRep arg_rep =
3285         case fregs of
3286           [] -> push_this_arg
3287           (r:rs) -> do
3288              arg_code <- getAnyReg arg
3289              load_args rest aregs rs (code `appOL` arg_code r)
3290         | otherwise =
3291         case aregs of
3292           [] -> push_this_arg
3293           (r:rs) -> do
3294              arg_code <- getAnyReg arg
3295              load_args rest rs fregs (code `appOL` arg_code r)
3296         where
3297           arg_rep = cmmExprRep arg
3298
3299           push_this_arg = do
3300             (args',ars,frs,code') <- load_args rest aregs fregs code
3301             return ((arg,hint):args', ars, frs, code')
3302
3303     push_args [] code = return code
3304     push_args ((arg,hint):rest) code
3305        | isFloatingRep arg_rep = do
3306          (arg_reg, arg_code) <- getSomeReg arg
3307          delta <- getDeltaNat
3308          setDeltaNat (delta-arg_size)
3309          let code' = code `appOL` toOL [
3310                         MOV arg_rep (OpReg arg_reg) (OpAddr  (spRel 0)),
3311                         SUB wordRep (OpImm (ImmInt arg_size)) (OpReg rsp) ,
3312                         DELTA (delta-arg_size)]
3313          push_args rest code'
3314
3315        | otherwise = do
3316        -- we only ever generate word-sized function arguments.  Promotion
3317        -- has already happened: our Int8# type is kept sign-extended
3318        -- in an Int#, for example.
3319          ASSERT(arg_rep == I64) return ()
3320          (arg_op, arg_code) <- getOperand arg
3321          delta <- getDeltaNat
3322          setDeltaNat (delta-arg_size)
3323          let code' = code `appOL` toOL [PUSH I64 arg_op,
3324                                         DELTA (delta-arg_size)]
3325          push_args rest code'
3326         where
3327           arg_rep = cmmExprRep arg
3328 #endif
3329
3330 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3331
3332 #if sparc_TARGET_ARCH
3333 {-
3334    The SPARC calling convention is an absolute
3335    nightmare.  The first 6x32 bits of arguments are mapped into
3336    %o0 through %o5, and the remaining arguments are dumped to the
3337    stack, beginning at [%sp+92].  (Note that %o6 == %sp.)
3338
3339    If we have to put args on the stack, move %o6==%sp down by
3340    the number of words to go on the stack, to ensure there's enough space.
3341
3342    According to Fraser and Hanson's lcc book, page 478, fig 17.2,
3343    16 words above the stack pointer is a word for the address of
3344    a structure return value.  I use this as a temporary location
3345    for moving values from float to int regs.  Certainly it isn't
3346    safe to put anything in the 16 words starting at %sp, since
3347    this area can get trashed at any time due to window overflows
3348    caused by signal handlers.
3349
3350    A final complication (if the above isn't enough) is that
3351    we can't blithely calculate the arguments one by one into
3352    %o0 .. %o5.  Consider the following nested calls:
3353
3354        fff a (fff b c)
3355
3356    Naive code moves a into %o0, and (fff b c) into %o1.  Unfortunately
3357    the inner call will itself use %o0, which trashes the value put there
3358    in preparation for the outer call.  Upshot: we need to calculate the
3359    args into temporary regs, and move those to arg regs or onto the
3360    stack only immediately prior to the call proper.  Sigh.
3361 -}
3362
3363 genCCall target dest_regs argsAndHints vols = do
3364     let
3365         args = map fst argsAndHints
3366     argcode_and_vregs <- mapM arg_to_int_vregs args
3367     let
3368         (argcodes, vregss) = unzip argcode_and_vregs
3369         n_argRegs          = length allArgRegs
3370         n_argRegs_used     = min (length vregs) n_argRegs
3371         vregs              = concat vregss
3372     -- deal with static vs dynamic call targets
3373     callinsns <- (case target of
3374         CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> do
3375                 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3376         CmmForeignCall expr conv -> do
3377                 (dyn_c, [dyn_r]) <- arg_to_int_vregs expr
3378                 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3379         CmmPrim mop -> do
3380                   (res, reduce) <- outOfLineFloatOp mop
3381                   lblOrMopExpr <- case res of
3382                        Left lbl -> do
3383                             return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3384                        Right mopExpr -> do
3385                             (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
3386                             return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3387                   if reduce then panic "genCCall(sparc): can not reduce" else return lblOrMopExpr
3388
3389       )
3390     let
3391         argcode = concatOL argcodes
3392         (move_sp_down, move_sp_up)
3393            = let diff = length vregs - n_argRegs
3394                  nn   = if odd diff then diff + 1 else diff -- keep 8-byte alignment
3395              in  if   nn <= 0
3396                  then (nilOL, nilOL)
3397                  else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
3398         transfer_code
3399            = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
3400     return (argcode       `appOL`
3401             move_sp_down  `appOL`
3402             transfer_code `appOL`
3403             callinsns     `appOL`
3404             unitOL NOP    `appOL`
3405             move_sp_up)
3406   where
3407      -- move args from the integer vregs into which they have been
3408      -- marshalled, into %o0 .. %o5, and the rest onto the stack.
3409      move_final :: [Reg] -> [Reg] -> Int -> [Instr]
3410
3411      move_final [] _ offset          -- all args done
3412         = []
3413
3414      move_final (v:vs) [] offset     -- out of aregs; move to stack
3415         = ST I32 v (spRel offset)
3416           : move_final vs [] (offset+1)
3417
3418      move_final (v:vs) (a:az) offset -- move into an arg (%o[0..5]) reg
3419         = OR False g0 (RIReg v) a
3420           : move_final vs az offset
3421
3422      -- generate code to calculate an argument, and move it into one
3423      -- or two integer vregs.
3424      arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
3425      arg_to_int_vregs arg
3426         | (cmmExprRep arg) == I64
3427         = do
3428           (ChildCode64 code r_lo) <- iselExpr64 arg
3429           let
3430               r_hi = getHiVRegFromLo r_lo
3431           return (code, [r_hi, r_lo])
3432         | otherwise
3433         = do
3434           (src, code) <- getSomeReg arg
3435           tmp <- getNewRegNat (cmmExprRep arg)
3436           let
3437               pk   = cmmExprRep arg
3438           case pk of
3439              F64 -> do
3440                       v1 <- getNewRegNat I32
3441                       v2 <- getNewRegNat I32
3442                       return (
3443                         code                          `snocOL`
3444                         FMOV F64 src f0                `snocOL`
3445                         ST   F32  f0 (spRel 16)         `snocOL`
3446                         LD   I32  (spRel 16) v1         `snocOL`
3447                         ST   F32  (fPair f0) (spRel 16) `snocOL`
3448                         LD   I32  (spRel 16) v2
3449                         ,
3450                         [v1,v2]
3451                        )
3452              F32 -> do
3453                       v1 <- getNewRegNat I32
3454                       return (
3455                         code                    `snocOL`
3456                         ST   F32  src (spRel 16)  `snocOL`
3457                         LD   I32  (spRel 16) v1
3458                         ,
3459                         [v1]
3460                        )
3461              other -> do
3462                         v1 <- getNewRegNat I32
3463                         return (
3464                           code `snocOL` OR False g0 (RIReg src) v1
3465                           ,
3466                           [v1]
3467                          )
3468 outOfLineFloatOp mop =
3469     do
3470       mopExpr <- cmmMakeDynamicReference addImportNat CallReference $
3471                   mkForeignLabel functionName Nothing True
3472       let mopLabelOrExpr = case mopExpr of
3473                         CmmLit (CmmLabel lbl) -> Left lbl
3474                         _ -> Right mopExpr
3475       return (mopLabelOrExpr, reduce)
3476             where
3477                 (reduce, functionName) = case mop of
3478                   MO_F32_Exp    -> (True,  FSLIT("exp"))
3479                   MO_F32_Log    -> (True,  FSLIT("log"))
3480                   MO_F32_Sqrt   -> (True,  FSLIT("sqrt"))
3481
3482                   MO_F32_Sin    -> (True,  FSLIT("sin"))
3483                   MO_F32_Cos    -> (True,  FSLIT("cos"))
3484                   MO_F32_Tan    -> (True,  FSLIT("tan"))
3485
3486                   MO_F32_Asin   -> (True,  FSLIT("asin"))
3487                   MO_F32_Acos   -> (True,  FSLIT("acos"))
3488                   MO_F32_Atan   -> (True,  FSLIT("atan"))
3489
3490                   MO_F32_Sinh   -> (True,  FSLIT("sinh"))
3491                   MO_F32_Cosh   -> (True,  FSLIT("cosh"))
3492                   MO_F32_Tanh   -> (True,  FSLIT("tanh"))
3493
3494                   MO_F64_Exp    -> (False, FSLIT("exp"))
3495                   MO_F64_Log    -> (False, FSLIT("log"))
3496                   MO_F64_Sqrt   -> (False, FSLIT("sqrt"))
3497
3498                   MO_F64_Sin    -> (False, FSLIT("sin"))
3499                   MO_F64_Cos    -> (False, FSLIT("cos"))
3500                   MO_F64_Tan    -> (False, FSLIT("tan"))
3501
3502                   MO_F64_Asin   -> (False, FSLIT("asin"))
3503                   MO_F64_Acos   -> (False, FSLIT("acos"))
3504                   MO_F64_Atan   -> (False, FSLIT("atan"))
3505
3506                   MO_F64_Sinh   -> (False, FSLIT("sinh"))
3507                   MO_F64_Cosh   -> (False, FSLIT("cosh"))
3508                   MO_F64_Tanh   -> (False, FSLIT("tanh"))
3509
3510                   other -> pprPanic "outOfLineFloatOp(sparc) "
3511                                 (pprCallishMachOp mop)
3512
3513 #endif /* sparc_TARGET_ARCH */
3514
3515 #if powerpc_TARGET_ARCH
3516
3517 #if darwin_TARGET_OS || linux_TARGET_OS
3518 {-
3519     The PowerPC calling convention for Darwin/Mac OS X
3520     is described in Apple's document
3521     "Inside Mac OS X - Mach-O Runtime Architecture".
3522
3523     PowerPC Linux uses the System V Release 4 Calling Convention
3524     for PowerPC. It is described in the
3525     "System V Application Binary Interface PowerPC Processor Supplement".
3526
3527     Both conventions are similar:
3528     Parameters may be passed in general-purpose registers starting at r3, in
3529     floating point registers starting at f1, or on the stack.
3530
3531     But there are substantial differences:
3532     * The number of registers used for parameter passing and the exact set of
3533       nonvolatile registers differs (see MachRegs.lhs).
3534     * On Darwin, stack space is always reserved for parameters, even if they are
3535       passed in registers. The called routine may choose to save parameters from
3536       registers to the corresponding space on the stack.
3537     * On Darwin, a corresponding amount of GPRs is skipped when a floating point
3538       parameter is passed in an FPR.
3539     * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
3540       starting with an odd-numbered GPR. It may skip a GPR to achieve this.
3541       Darwin just treats an I64 like two separate I32s (high word first).
3542     * I64 and F64 arguments are 8-byte aligned on the stack for SysV, but only
3543       4-byte aligned like everything else on Darwin.
3544     * The SysV spec claims that F32 is represented as F64 on the stack. GCC on
3545       PowerPC Linux does not agree, so neither do we.
3546
3547     According to both conventions, The parameter area should be part of the
3548     caller's stack frame, allocated in the caller's prologue code (large enough
3549     to hold the parameter lists for all called routines). The NCG already
3550     uses the stack for register spilling, leaving 64 bytes free at the top.
3551     If we need a larger parameter area than that, we just allocate a new stack
3552     frame just before ccalling.
3553 -}
3554
3555
3556 genCCall (CmmPrim MO_WriteBarrier) _ _ _
3557  = return $ unitOL LWSYNC
3558
3559 genCCall target dest_regs argsAndHints vols
3560   = ASSERT (not $ any (`elem` [I8,I16]) argReps)
3561         -- we rely on argument promotion in the codeGen
3562     do
3563         (finalStack,passArgumentsCode,usedRegs) <- passArguments
3564                                                         (zip args argReps)
3565                                                         allArgRegs allFPArgRegs
3566                                                         initialStackOffset
3567                                                         (toOL []) []
3568
3569         (labelOrExpr, reduceToF32) <- case target of
3570             CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
3571             CmmForeignCall expr conv -> return  (Right expr, False)
3572             CmmPrim mop -> outOfLineFloatOp mop
3573
3574         let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
3575             codeAfter = move_sp_up finalStack `appOL` moveResult reduceToF32
3576
3577         case labelOrExpr of
3578             Left lbl -> do
3579                 return (         codeBefore
3580                         `snocOL` BL lbl usedRegs
3581                         `appOL`  codeAfter)
3582             Right dyn -> do
3583                 (dynReg, dynCode) <- getSomeReg dyn
3584                 return (         dynCode
3585                         `snocOL` MTCTR dynReg
3586                         `appOL`  codeBefore
3587                         `snocOL` BCTRL usedRegs
3588                         `appOL`  codeAfter)
3589     where
3590 #if darwin_TARGET_OS
3591         initialStackOffset = 24
3592             -- size of linkage area + size of arguments, in bytes
3593         stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
3594                                        map machRepByteWidth argReps
3595 #elif linux_TARGET_OS
3596         initialStackOffset = 8
3597         stackDelta finalStack = roundTo 16 finalStack
3598 #endif
3599         args = map fst argsAndHints
3600         argReps = map cmmExprRep args
3601
3602         roundTo a x | x `mod` a == 0 = x
3603                     | otherwise = x + a - (x `mod` a)
3604
3605         move_sp_down finalStack
3606                | delta > 64 =
3607                         toOL [STU I32 sp (AddrRegImm sp (ImmInt (-delta))),
3608                               DELTA (-delta)]
3609                | otherwise = nilOL
3610                where delta = stackDelta finalStack
3611         move_sp_up finalStack
3612                | delta > 64 =
3613                         toOL [ADD sp sp (RIImm (ImmInt delta)),
3614                               DELTA 0]
3615                | otherwise = nilOL
3616                where delta = stackDelta finalStack
3617
3618
3619         passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
3620         passArguments ((arg,I64):args) gprs fprs stackOffset
3621                accumCode accumUsed =
3622             do
3623                 ChildCode64 code vr_lo <- iselExpr64 arg
3624                 let vr_hi = getHiVRegFromLo vr_lo
3625
3626 #if darwin_TARGET_OS
3627                 passArguments args
3628                               (drop 2 gprs)
3629                               fprs
3630                               (stackOffset+8)
3631                               (accumCode `appOL` code
3632                                     `snocOL` storeWord vr_hi gprs stackOffset
3633                                     `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
3634                               ((take 2 gprs) ++ accumUsed)
3635             where
3636                 storeWord vr (gpr:_) offset = MR gpr vr
3637                 storeWord vr [] offset = ST I32 vr (AddrRegImm sp (ImmInt offset))
3638
3639 #elif linux_TARGET_OS
3640                 let stackOffset' = roundTo 8 stackOffset
3641                     stackCode = accumCode `appOL` code
3642                         `snocOL` ST I32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
3643                         `snocOL` ST I32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
3644                     regCode hireg loreg =
3645                         accumCode `appOL` code
3646                             `snocOL` MR hireg vr_hi
3647                             `snocOL` MR loreg vr_lo
3648
3649                 case gprs of
3650                     hireg : loreg : regs | even (length gprs) ->
3651                         passArguments args regs fprs stackOffset
3652                                       (regCode hireg loreg) (hireg : loreg : accumUsed)
3653                     _skipped : hireg : loreg : regs ->
3654                         passArguments args regs fprs stackOffset
3655                                       (regCode hireg loreg) (hireg : loreg : accumUsed)
3656                     _ -> -- only one or no regs left
3657                         passArguments args [] fprs (stackOffset'+8)
3658                                       stackCode accumUsed
3659 #endif
3660
3661         passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
3662             | reg : _ <- regs = do
3663                 register <- getRegister arg
3664                 let code = case register of
3665                             Fixed _ freg fcode -> fcode `snocOL` MR reg freg
3666                             Any _ acode -> acode reg
3667                 passArguments args
3668                               (drop nGprs gprs)
3669                               (drop nFprs fprs)
3670 #if darwin_TARGET_OS
3671         -- The Darwin ABI requires that we reserve stack slots for register parameters
3672                               (stackOffset + stackBytes)
3673 #elif linux_TARGET_OS
3674         -- ... the SysV ABI doesn't.
3675                               stackOffset
3676 #endif
3677                               (accumCode `appOL` code)
3678                               (reg : accumUsed)
3679             | otherwise = do
3680                 (vr, code) <- getSomeReg arg
3681                 passArguments args
3682                               (drop nGprs gprs)
3683                               (drop nFprs fprs)
3684                               (stackOffset' + stackBytes)
3685                               (accumCode `appOL` code `snocOL` ST rep vr stackSlot)
3686                               accumUsed
3687             where
3688 #if darwin_TARGET_OS
3689         -- stackOffset is at least 4-byte aligned
3690         -- The Darwin ABI is happy with that.
3691                 stackOffset' = stackOffset
3692 #else
3693         -- ... the SysV ABI requires 8-byte alignment for doubles.
3694                 stackOffset' | rep == F64 = roundTo 8 stackOffset
3695                              | otherwise  =           stackOffset
3696 #endif
3697                 stackSlot = AddrRegImm sp (ImmInt stackOffset')
3698                 (nGprs, nFprs, stackBytes, regs) = case rep of
3699                     I32 -> (1, 0, 4, gprs)
3700 #if darwin_TARGET_OS
3701         -- The Darwin ABI requires that we skip a corresponding number of GPRs when
3702         -- we use the FPRs.
3703                     F32 -> (1, 1, 4, fprs)
3704                     F64 -> (2, 1, 8, fprs)
3705 #elif linux_TARGET_OS
3706         -- ... the SysV ABI doesn't.
3707                     F32 -> (0, 1, 4, fprs)
3708                     F64 -> (0, 1, 8, fprs)
3709 #endif
3710
3711         moveResult reduceToF32 =
3712             case dest_regs of
3713                 [] -> nilOL
3714                 [(dest, _hint)]
3715                     | reduceToF32 && rep == F32 -> unitOL (FRSP r_dest f1)
3716                     | rep == F32 || rep == F64 -> unitOL (MR r_dest f1)
3717                     | rep == I64 -> toOL [MR (getHiVRegFromLo r_dest) r3,
3718                                           MR r_dest r4]
3719                     | otherwise -> unitOL (MR r_dest r3)
3720                     where rep = cmmRegRep dest
3721                           r_dest = getRegisterReg dest
3722
3723         outOfLineFloatOp mop =
3724             do
3725                 mopExpr <- cmmMakeDynamicReference addImportNat CallReference $
3726                               mkForeignLabel functionName Nothing True
3727                 let mopLabelOrExpr = case mopExpr of
3728                         CmmLit (CmmLabel lbl) -> Left lbl
3729                         _ -> Right mopExpr
3730                 return (mopLabelOrExpr, reduce)
3731             where
3732                 (functionName, reduce) = case mop of
3733                     MO_F32_Exp   -> (FSLIT("exp"), True)
3734                     MO_F32_Log   -> (FSLIT("log"), True)
3735                     MO_F32_Sqrt  -> (FSLIT("sqrt"), True)
3736
3737                     MO_F32_Sin   -> (FSLIT("sin"), True)
3738                     MO_F32_Cos   -> (FSLIT("cos"), True)
3739                     MO_F32_Tan   -> (FSLIT("tan"), True)
3740
3741                     MO_F32_Asin  -> (FSLIT("asin"), True)
3742                     MO_F32_Acos  -> (FSLIT("acos"), True)
3743                     MO_F32_Atan  -> (FSLIT("atan"), True)
3744
3745                     MO_F32_Sinh  -> (FSLIT("sinh"), True)
3746                     MO_F32_Cosh  -> (FSLIT("cosh"), True)
3747                     MO_F32_Tanh  -> (FSLIT("tanh"), True)
3748                     MO_F32_Pwr   -> (FSLIT("pow"), True)
3749
3750                     MO_F64_Exp   -> (FSLIT("exp"), False)
3751                     MO_F64_Log   -> (FSLIT("log"), False)
3752                     MO_F64_Sqrt  -> (FSLIT("sqrt"), False)
3753
3754                     MO_F64_Sin   -> (FSLIT("sin"), False)
3755                     MO_F64_Cos   -> (FSLIT("cos"), False)
3756                     MO_F64_Tan   -> (FSLIT("tan"), False)
3757
3758                     MO_F64_Asin  -> (FSLIT("asin"), False)
3759                     MO_F64_Acos  -> (FSLIT("acos"), False)
3760                     MO_F64_Atan  -> (FSLIT("atan"), False)
3761
3762                     MO_F64_Sinh  -> (FSLIT("sinh"), False)
3763                     MO_F64_Cosh  -> (FSLIT("cosh"), False)
3764                     MO_F64_Tanh  -> (FSLIT("tanh"), False)
3765                     MO_F64_Pwr   -> (FSLIT("pow"), False)
3766                     other -> pprPanic "genCCall(ppc): unknown callish op"
3767                                     (pprCallishMachOp other)
3768
3769 #endif /* darwin_TARGET_OS || linux_TARGET_OS */
3770
3771 #endif /* powerpc_TARGET_ARCH */
3772
3773
3774 -- -----------------------------------------------------------------------------
3775 -- Generating a table-branch
3776
3777 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
3778
3779 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3780 genSwitch expr ids
3781   | opt_PIC
3782   = do
3783         (reg,e_code) <- getSomeReg expr
3784         lbl <- getNewLabelNat
3785         dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
3786         (tableReg,t_code) <- getSomeReg $ dynRef
3787         let
3788             jumpTable = map jumpTableEntryRel ids
3789
3790             jumpTableEntryRel Nothing
3791                 = CmmStaticLit (CmmInt 0 wordRep)
3792             jumpTableEntryRel (Just (BlockId id))
3793                 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3794                 where blockLabel = mkAsmTempLabel id
3795
3796             op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
3797                                        (EAIndex reg wORD_SIZE) (ImmInt 0))
3798
3799 #if x86_64_TARGET_ARCH && darwin_TARGET_OS
3800     -- on Mac OS X/x86_64, put the jump table in the text section
3801     -- to work around a limitation of the linker.
3802     -- ld64 is unable to handle the relocations for
3803     --     .quad L1 - L0
3804     -- if L0 is not preceded by a non-anonymous label in its section.
3805
3806             code = e_code `appOL` t_code `appOL` toOL [
3807                             ADD wordRep op (OpReg tableReg),
3808                             JMP_TBL (OpReg tableReg) [ id | Just id <- ids ],
3809                             LDATA Text (CmmDataLabel lbl : jumpTable)
3810                     ]
3811 #else
3812             code = e_code `appOL` t_code `appOL` toOL [
3813                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3814                             ADD wordRep op (OpReg tableReg),
3815                             JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
3816                     ]
3817 #endif
3818         return code
3819   | otherwise
3820   = do
3821         (reg,e_code) <- getSomeReg expr
3822         lbl <- getNewLabelNat
3823         let
3824             jumpTable = map jumpTableEntry ids
3825             op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
3826             code = e_code `appOL` toOL [
3827                     LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3828                     JMP_TBL op [ id | Just id <- ids ]
3829                  ]
3830         -- in
3831         return code
3832 #elif powerpc_TARGET_ARCH
3833 genSwitch expr ids
3834   | opt_PIC
3835   = do
3836         (reg,e_code) <- getSomeReg expr
3837         tmp <- getNewRegNat I32
3838         lbl <- getNewLabelNat
3839         dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
3840         (tableReg,t_code) <- getSomeReg $ dynRef
3841         let
3842             jumpTable = map jumpTableEntryRel ids
3843
3844             jumpTableEntryRel Nothing
3845                 = CmmStaticLit (CmmInt 0 wordRep)
3846             jumpTableEntryRel (Just (BlockId id))
3847                 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3848                 where blockLabel = mkAsmTempLabel id
3849
3850             code = e_code `appOL` t_code `appOL` toOL [
3851                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3852                             SLW tmp reg (RIImm (ImmInt 2)),
3853                             LD I32 tmp (AddrRegReg tableReg tmp),
3854                             ADD tmp tmp (RIReg tableReg),
3855                             MTCTR tmp,
3856                             BCTR [ id | Just id <- ids ]
3857                     ]
3858         return code
3859   | otherwise
3860   = do
3861         (reg,e_code) <- getSomeReg expr
3862         tmp <- getNewRegNat I32
3863         lbl <- getNewLabelNat
3864         let
3865             jumpTable = map jumpTableEntry ids
3866
3867             code = e_code `appOL` toOL [
3868                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3869                             SLW tmp reg (RIImm (ImmInt 2)),
3870                             ADDIS tmp tmp (HA (ImmCLbl lbl)),
3871                             LD I32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
3872                             MTCTR tmp,
3873                             BCTR [ id | Just id <- ids ]
3874                     ]
3875         return code
3876 #else
3877 genSwitch expr ids = panic "ToDo: genSwitch"
3878 #endif
3879
3880 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordRep)
3881 jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
3882     where blockLabel = mkAsmTempLabel id
3883
3884 -- -----------------------------------------------------------------------------
3885 -- Support bits
3886 -- -----------------------------------------------------------------------------
3887
3888
3889 -- -----------------------------------------------------------------------------
3890 -- 'condIntReg' and 'condFltReg': condition codes into registers
3891
3892 -- Turn those condition codes into integers now (when they appear on
3893 -- the right hand side of an assignment).
3894 --
3895 -- (If applicable) Do not fill the delay slots here; you will confuse the
3896 -- register allocator.
3897
3898 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
3899
3900 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3901
3902 #if alpha_TARGET_ARCH
3903 condIntReg = panic "MachCode.condIntReg (not on Alpha)"
3904 condFltReg = panic "MachCode.condFltReg (not on Alpha)"
3905 #endif /* alpha_TARGET_ARCH */
3906
3907 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3908
3909 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3910
3911 condIntReg cond x y = do
3912   CondCode _ cond cond_code <- condIntCode cond x y
3913   tmp <- getNewRegNat I8
3914   let
3915         code dst = cond_code `appOL` toOL [
3916                     SETCC cond (OpReg tmp),
3917                     MOVZxL I8 (OpReg tmp) (OpReg dst)
3918                   ]
3919   -- in
3920   return (Any I32 code)
3921
3922 #endif
3923
3924 #if i386_TARGET_ARCH
3925
3926 condFltReg cond x y = do
3927   CondCode _ cond cond_code <- condFltCode cond x y
3928   tmp <- getNewRegNat I8
3929   let
3930         code dst = cond_code `appOL` toOL [
3931                     SETCC cond (OpReg tmp),
3932                     MOVZxL I8 (OpReg tmp) (OpReg dst)
3933                   ]
3934   -- in
3935   return (Any I32 code)
3936
3937 #endif
3938
3939 #if x86_64_TARGET_ARCH
3940
3941 condFltReg cond x y = do
3942   CondCode _ cond cond_code <- condFltCode cond x y
3943   tmp1 <- getNewRegNat wordRep
3944   tmp2 <- getNewRegNat wordRep
3945   let
3946         -- We have to worry about unordered operands (eg. comparisons
3947         -- against NaN).  If the operands are unordered, the comparison
3948         -- sets the parity flag, carry flag and zero flag.
3949         -- All comparisons are supposed to return false for unordered
3950         -- operands except for !=, which returns true.
3951         --
3952         -- Optimisation: we don't have to test the parity flag if we
3953         -- know the test has already excluded the unordered case: eg >
3954         -- and >= test for a zero carry flag, which can only occur for
3955         -- ordered operands.
3956         --
3957         -- ToDo: by reversing comparisons we could avoid testing the
3958         -- parity flag in more cases.
3959
3960         code dst =
3961            cond_code `appOL`
3962              (case cond of
3963                 NE  -> or_unordered dst
3964                 GU  -> plain_test   dst
3965                 GEU -> plain_test   dst
3966                 _   -> and_ordered  dst)
3967
3968         plain_test dst = toOL [
3969                     SETCC cond (OpReg tmp1),
3970                     MOVZxL I8 (OpReg tmp1) (OpReg dst)
3971                  ]
3972         or_unordered dst = toOL [
3973                     SETCC cond (OpReg tmp1),
3974                     SETCC PARITY (OpReg tmp2),
3975                     OR I8 (OpReg tmp1) (OpReg tmp2),
3976                     MOVZxL I8 (OpReg tmp2) (OpReg dst)
3977                   ]
3978         and_ordered dst = toOL [
3979                     SETCC cond (OpReg tmp1),
3980                     SETCC NOTPARITY (OpReg tmp2),
3981                     AND I8 (OpReg tmp1) (OpReg tmp2),
3982                     MOVZxL I8 (OpReg tmp2) (OpReg dst)
3983                   ]
3984   -- in
3985   return (Any I32 code)
3986
3987 #endif
3988
3989 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3990
3991 #if sparc_TARGET_ARCH
3992
3993 condIntReg EQQ x (CmmLit (CmmInt 0 d)) = do
3994     (src, code) <- getSomeReg x
3995     tmp <- getNewRegNat I32
3996     let
3997         code__2 dst = code `appOL` toOL [
3998             SUB False True g0 (RIReg src) g0,
3999             SUB True False g0 (RIImm (ImmInt (-1))) dst]
4000     return (Any I32 code__2)
4001
4002 condIntReg EQQ x y = do
4003     (src1, code1) <- getSomeReg x
4004     (src2, code2) <- getSomeReg y
4005     tmp1 <- getNewRegNat I32
4006     tmp2 <- getNewRegNat I32
4007     let
4008         code__2 dst = code1 `appOL` code2 `appOL` toOL [
4009             XOR False src1 (RIReg src2) dst,
4010             SUB False True g0 (RIReg dst) g0,
4011             SUB True False g0 (RIImm (ImmInt (-1))) dst]
4012     return (Any I32 code__2)
4013
4014 condIntReg NE x (CmmLit (CmmInt 0 d)) = do
4015     (src, code) <- getSomeReg x
4016     tmp <- getNewRegNat I32
4017     let
4018         code__2 dst = code `appOL` toOL [
4019             SUB False True g0 (RIReg src) g0,
4020             ADD True False g0 (RIImm (ImmInt 0)) dst]
4021     return (Any I32 code__2)
4022
4023 condIntReg NE x y = do
4024     (src1, code1) <- getSomeReg x
4025     (src2, code2) <- getSomeReg y
4026     tmp1 <- getNewRegNat I32
4027     tmp2 <- getNewRegNat I32
4028     let
4029         code__2 dst = code1 `appOL` code2 `appOL` toOL [
4030             XOR False src1 (RIReg src2) dst,
4031             SUB False True g0 (RIReg dst) g0,
4032             ADD True False g0 (RIImm (ImmInt 0)) dst]
4033     return (Any I32 code__2)
4034
4035 condIntReg cond x y = do
4036     BlockId lbl1 <- getBlockIdNat
4037     BlockId lbl2 <- getBlockIdNat
4038     CondCode _ cond cond_code <- condIntCode cond x y
4039     let
4040         code__2 dst = cond_code `appOL` toOL [
4041             BI cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
4042             OR False g0 (RIImm (ImmInt 0)) dst,
4043             BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
4044             NEWBLOCK (BlockId lbl1),
4045             OR False g0 (RIImm (ImmInt 1)) dst,
4046             NEWBLOCK (BlockId lbl2)]
4047     return (Any I32 code__2)
4048
4049 condFltReg cond x y = do
4050     BlockId lbl1 <- getBlockIdNat
4051     BlockId lbl2 <- getBlockIdNat
4052     CondCode _ cond cond_code <- condFltCode cond x y
4053     let
4054         code__2 dst = cond_code `appOL` toOL [
4055             NOP,
4056             BF cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
4057             OR False g0 (RIImm (ImmInt 0)) dst,
4058             BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
4059             NEWBLOCK (BlockId lbl1),
4060             OR False g0 (RIImm (ImmInt 1)) dst,
4061             NEWBLOCK (BlockId lbl2)]
4062     return (Any I32 code__2)
4063
4064 #endif /* sparc_TARGET_ARCH */
4065
4066 #if powerpc_TARGET_ARCH
4067 condReg getCond = do
4068     lbl1 <- getBlockIdNat
4069     lbl2 <- getBlockIdNat
4070     CondCode _ cond cond_code <- getCond
4071     let
4072 {-        code dst = cond_code `appOL` toOL [
4073                 BCC cond lbl1,
4074                 LI dst (ImmInt 0),
4075                 BCC ALWAYS lbl2,
4076                 NEWBLOCK lbl1,
4077                 LI dst (ImmInt 1),
4078                 BCC ALWAYS lbl2,
4079                 NEWBLOCK lbl2
4080             ]-}
4081         code dst = cond_code
4082             `appOL` negate_code
4083             `appOL` toOL [
4084                 MFCR dst,
4085                 RLWINM dst dst (bit + 1) 31 31
4086             ]
4087
4088         negate_code | do_negate = unitOL (CRNOR bit bit bit)
4089                     | otherwise = nilOL
4090
4091         (bit, do_negate) = case cond of
4092             LTT -> (0, False)
4093             LE  -> (1, True)
4094             EQQ -> (2, False)
4095             GE  -> (0, True)
4096             GTT -> (1, False)
4097
4098             NE  -> (2, True)
4099
4100             LU  -> (0, False)
4101             LEU -> (1, True)
4102             GEU -> (0, True)
4103             GU  -> (1, False)
4104
4105     return (Any I32 code)
4106
4107 condIntReg cond x y = condReg (condIntCode cond x y)
4108 condFltReg cond x y = condReg (condFltCode cond x y)
4109 #endif /* powerpc_TARGET_ARCH */
4110
4111
4112 -- -----------------------------------------------------------------------------
4113 -- 'trivial*Code': deal with trivial instructions
4114
4115 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
4116 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
4117 -- Only look for constants on the right hand side, because that's
4118 -- where the generic optimizer will have put them.
4119
4120 -- Similarly, for unary instructions, we don't have to worry about
4121 -- matching an StInt as the argument, because genericOpt will already
4122 -- have handled the constant-folding.
4123
4124 trivialCode
4125     :: MachRep
4126     -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
4127       ,IF_ARCH_i386 ((Operand -> Operand -> Instr)
4128                      -> Maybe (Operand -> Operand -> Instr)
4129       ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr)
4130                      -> Maybe (Operand -> Operand -> Instr)
4131       ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
4132       ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
4133       ,)))))
4134     -> CmmExpr -> CmmExpr -- the two arguments
4135     -> NatM Register
4136
4137 #ifndef powerpc_TARGET_ARCH
4138 trivialFCode
4139     :: MachRep
4140     -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
4141       ,IF_ARCH_sparc((MachRep -> Reg -> Reg -> Reg -> Instr)
4142       ,IF_ARCH_i386 ((MachRep -> Reg -> Reg -> Reg -> Instr)
4143       ,IF_ARCH_x86_64 ((MachRep -> Operand -> Operand -> Instr)
4144       ,))))
4145     -> CmmExpr -> CmmExpr -- the two arguments
4146     -> NatM Register
4147 #endif
4148
4149 trivialUCode
4150     :: MachRep
4151     -> IF_ARCH_alpha((RI -> Reg -> Instr)
4152       ,IF_ARCH_i386 ((Operand -> Instr)
4153       ,IF_ARCH_x86_64 ((Operand -> Instr)
4154       ,IF_ARCH_sparc((RI -> Reg -> Instr)
4155       ,IF_ARCH_powerpc((Reg -> Reg -> Instr)
4156       ,)))))
4157     -> CmmExpr  -- the one argument
4158     -> NatM Register
4159
4160 #ifndef powerpc_TARGET_ARCH
4161 trivialUFCode
4162     :: MachRep
4163     -> IF_ARCH_alpha((Reg -> Reg -> Instr)
4164       ,IF_ARCH_i386 ((Reg -> Reg -> Instr)
4165       ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr)
4166       ,IF_ARCH_sparc((Reg -> Reg -> Instr)
4167       ,))))
4168     -> CmmExpr -- the one argument
4169     -> NatM Register
4170 #endif
4171
4172 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4173
4174 #if alpha_TARGET_ARCH
4175
4176 trivialCode instr x (StInt y)
4177   | fits8Bits y
4178   = getRegister x               `thenNat` \ register ->
4179     getNewRegNat IntRep         `thenNat` \ tmp ->
4180     let
4181         code = registerCode register tmp
4182         src1 = registerName register tmp
4183         src2 = ImmInt (fromInteger y)
4184         code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst)
4185     in
4186     return (Any IntRep code__2)
4187
4188 trivialCode instr x y
4189   = getRegister x               `thenNat` \ register1 ->
4190     getRegister y               `thenNat` \ register2 ->
4191     getNewRegNat IntRep         `thenNat` \ tmp1 ->
4192     getNewRegNat IntRep         `thenNat` \ tmp2 ->
4193     let
4194         code1 = registerCode register1 tmp1 []
4195         src1  = registerName register1 tmp1
4196         code2 = registerCode register2 tmp2 []
4197         src2  = registerName register2 tmp2
4198         code__2 dst = asmSeqThen [code1, code2] .
4199                      mkSeqInstr (instr src1 (RIReg src2) dst)
4200     in
4201     return (Any IntRep code__2)
4202
4203 ------------
4204 trivialUCode instr x
4205   = getRegister x               `thenNat` \ register ->
4206     getNewRegNat IntRep         `thenNat` \ tmp ->
4207     let
4208         code = registerCode register tmp
4209         src  = registerName register tmp
4210         code__2 dst = code . mkSeqInstr (instr (RIReg src) dst)
4211     in
4212     return (Any IntRep code__2)
4213
4214 ------------
4215 trivialFCode _ instr x y
4216   = getRegister x               `thenNat` \ register1 ->
4217     getRegister y               `thenNat` \ register2 ->
4218     getNewRegNat F64    `thenNat` \ tmp1 ->
4219     getNewRegNat F64    `thenNat` \ tmp2 ->
4220     let
4221         code1 = registerCode register1 tmp1
4222         src1  = registerName register1 tmp1
4223
4224         code2 = registerCode register2 tmp2
4225         src2  = registerName register2 tmp2
4226
4227         code__2 dst = asmSeqThen [code1 [], code2 []] .
4228                       mkSeqInstr (instr src1 src2 dst)
4229     in
4230     return (Any F64 code__2)
4231
4232 trivialUFCode _ instr x
4233   = getRegister x               `thenNat` \ register ->
4234     getNewRegNat F64    `thenNat` \ tmp ->
4235     let
4236         code = registerCode register tmp
4237         src  = registerName register tmp
4238         code__2 dst = code . mkSeqInstr (instr src dst)
4239     in
4240     return (Any F64 code__2)
4241
4242 #endif /* alpha_TARGET_ARCH */
4243
4244 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4245
4246 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4247
4248 {-
4249 The Rules of the Game are:
4250
4251 * You cannot assume anything about the destination register dst;
4252   it may be anything, including a fixed reg.
4253
4254 * You may compute an operand into a fixed reg, but you may not
4255   subsequently change the contents of that fixed reg.  If you
4256   want to do so, first copy the value either to a temporary
4257   or into dst.  You are free to modify dst even if it happens
4258   to be a fixed reg -- that's not your problem.
4259
4260 * You cannot assume that a fixed reg will stay live over an
4261   arbitrary computation.  The same applies to the dst reg.
4262
4263 * Temporary regs obtained from getNewRegNat are distinct from
4264   each other and from all other regs, and stay live over
4265   arbitrary computations.
4266
4267 --------------------
4268
4269 SDM's version of The Rules:
4270
4271 * If getRegister returns Any, that means it can generate correct
4272   code which places the result in any register, period.  Even if that
4273   register happens to be read during the computation.
4274
4275   Corollary #1: this means that if you are generating code for an
4276   operation with two arbitrary operands, you cannot assign the result
4277   of the first operand into the destination register before computing
4278   the second operand.  The second operand might require the old value
4279   of the destination register.
4280
4281   Corollary #2: A function might be able to generate more efficient
4282   code if it knows the destination register is a new temporary (and
4283   therefore not read by any of the sub-computations).
4284
4285 * If getRegister returns Any, then the code it generates may modify only:
4286         (a) fresh temporaries
4287         (b) the destination register
4288         (c) known registers (eg. %ecx is used by shifts)
4289   In particular, it may *not* modify global registers, unless the global
4290   register happens to be the destination register.
4291 -}
4292
4293 trivialCode rep instr (Just revinstr) (CmmLit lit_a) b
4294   | not (is64BitLit lit_a) = do
4295   b_code <- getAnyReg b
4296   let
4297        code dst
4298          = b_code dst `snocOL`
4299            revinstr (OpImm (litToImm lit_a)) (OpReg dst)
4300   -- in
4301   return (Any rep code)
4302
4303 trivialCode rep instr maybe_revinstr a b = genTrivialCode rep instr a b
4304
4305 -- This is re-used for floating pt instructions too.
4306 genTrivialCode rep instr a b = do
4307   (b_op, b_code) <- getNonClobberedOperand b
4308   a_code <- getAnyReg a
4309   tmp <- getNewRegNat rep
4310   let
4311      -- We want the value of b to stay alive across the computation of a.
4312      -- But, we want to calculate a straight into the destination register,
4313      -- because the instruction only has two operands (dst := dst `op` src).
4314      -- The troublesome case is when the result of b is in the same register
4315      -- as the destination reg.  In this case, we have to save b in a
4316      -- new temporary across the computation of a.
4317      code dst
4318         | dst `regClashesWithOp` b_op =
4319                 b_code `appOL`
4320                 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
4321                 a_code dst `snocOL`
4322                 instr (OpReg tmp) (OpReg dst)
4323         | otherwise =
4324                 b_code `appOL`
4325                 a_code dst `snocOL`
4326                 instr b_op (OpReg dst)
4327   -- in
4328   return (Any rep code)
4329
4330 reg `regClashesWithOp` OpReg reg2   = reg == reg2
4331 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
4332 reg `regClashesWithOp` _            = False
4333
4334 -----------
4335
4336 trivialUCode rep instr x = do
4337   x_code <- getAnyReg x
4338   let
4339      code dst =
4340         x_code dst `snocOL`
4341         instr (OpReg dst)
4342   -- in
4343   return (Any rep code)
4344
4345 -----------
4346
4347 #if i386_TARGET_ARCH
4348
4349 trivialFCode pk instr x y = do
4350   (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
4351   (y_reg, y_code) <- getSomeReg y
4352   let
4353      code dst =
4354         x_code `appOL`
4355         y_code `snocOL`
4356         instr pk x_reg y_reg dst
4357   -- in
4358   return (Any pk code)
4359
4360 #endif
4361
4362 #if x86_64_TARGET_ARCH
4363
4364 trivialFCode pk instr x y = genTrivialCode  pk (instr pk) x y
4365
4366 #endif
4367
4368 -------------
4369
4370 trivialUFCode rep instr x = do
4371   (x_reg, x_code) <- getSomeReg x
4372   let
4373      code dst =
4374         x_code `snocOL`
4375         instr x_reg dst
4376   -- in
4377   return (Any rep code)
4378
4379 #endif /* i386_TARGET_ARCH */
4380
4381 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4382
4383 #if sparc_TARGET_ARCH
4384
4385 trivialCode pk instr x (CmmLit (CmmInt y d))
4386   | fits13Bits y
4387   = do
4388       (src1, code) <- getSomeReg x
4389       tmp <- getNewRegNat I32
4390       let
4391         src2 = ImmInt (fromInteger y)
4392         code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
4393       return (Any I32 code__2)
4394
4395 trivialCode pk instr x y = do
4396     (src1, code1) <- getSomeReg x
4397     (src2, code2) <- getSomeReg y
4398     tmp1 <- getNewRegNat I32
4399     tmp2 <- getNewRegNat I32
4400     let
4401         code__2 dst = code1 `appOL` code2 `snocOL`
4402                       instr src1 (RIReg src2) dst
4403     return (Any I32 code__2)
4404
4405 ------------
4406 trivialFCode pk instr x y = do
4407     (src1, code1) <- getSomeReg x
4408     (src2, code2) <- getSomeReg y
4409     tmp1 <- getNewRegNat (cmmExprRep x)
4410     tmp2 <- getNewRegNat (cmmExprRep y)
4411     tmp <- getNewRegNat F64
4412     let
4413         promote x = FxTOy F32 F64 x tmp
4414
4415         pk1   = cmmExprRep x
4416         pk2   = cmmExprRep y
4417
4418         code__2 dst =
4419                 if pk1 == pk2 then
4420                     code1 `appOL` code2 `snocOL`
4421                     instr pk src1 src2 dst
4422                 else if pk1 == F32 then
4423                     code1 `snocOL` promote src1 `appOL` code2 `snocOL`
4424                     instr F64 tmp src2 dst
4425                 else
4426                     code1 `appOL` code2 `snocOL` promote src2 `snocOL`
4427                     instr F64 src1 tmp dst
4428     return (Any (if pk1 == pk2 then pk1 else F64) code__2)
4429
4430 ------------
4431 trivialUCode pk instr x = do
4432     (src, code) <- getSomeReg x
4433     tmp <- getNewRegNat pk
4434     let
4435         code__2 dst = code `snocOL` instr (RIReg src) dst
4436     return (Any pk code__2)
4437
4438 -------------
4439 trivialUFCode pk instr x = do
4440     (src, code) <- getSomeReg x
4441     tmp <- getNewRegNat pk
4442     let
4443         code__2 dst = code `snocOL` instr src dst
4444     return (Any pk code__2)
4445
4446 #endif /* sparc_TARGET_ARCH */
4447
4448 #if powerpc_TARGET_ARCH
4449
4450 {-
4451 Wolfgang's PowerPC version of The Rules:
4452
4453 A slightly modified version of The Rules to take advantage of the fact
4454 that PowerPC instructions work on all registers and don't implicitly
4455 clobber any fixed registers.
4456
4457 * The only expression for which getRegister returns Fixed is (CmmReg reg).
4458
4459 * If getRegister returns Any, then the code it generates may modify only:
4460         (a) fresh temporaries
4461         (b) the destination register
4462   It may *not* modify global registers, unless the global
4463   register happens to be the destination register.
4464   It may not clobber any other registers. In fact, only ccalls clobber any
4465   fixed registers.
4466   Also, it may not modify the counter register (used by genCCall).
4467
4468   Corollary: If a getRegister for a subexpression returns Fixed, you need
4469   not move it to a fresh temporary before evaluating the next subexpression.
4470   The Fixed register won't be modified.
4471   Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
4472
4473 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
4474   the value of the destination register.
4475 -}
4476
4477 trivialCode rep signed instr x (CmmLit (CmmInt y _))
4478     | Just imm <- makeImmediate rep signed y
4479     = do
4480         (src1, code1) <- getSomeReg x
4481         let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
4482         return (Any rep code)
4483
4484 trivialCode rep signed instr x y = do
4485     (src1, code1) <- getSomeReg x
4486     (src2, code2) <- getSomeReg y
4487     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
4488     return (Any rep code)
4489
4490 trivialCodeNoImm :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4491     -> CmmExpr -> CmmExpr -> NatM Register
4492 trivialCodeNoImm rep instr x y = do
4493     (src1, code1) <- getSomeReg x
4494     (src2, code2) <- getSomeReg y
4495     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
4496     return (Any rep code)
4497
4498 trivialUCode rep instr x = do
4499     (src, code) <- getSomeReg x
4500     let code' dst = code `snocOL` instr dst src
4501     return (Any rep code')
4502
4503 -- There is no "remainder" instruction on the PPC, so we have to do
4504 -- it the hard way.
4505 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
4506
4507 remainderCode :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4508     -> CmmExpr -> CmmExpr -> NatM Register
4509 remainderCode rep div x y = do
4510     (src1, code1) <- getSomeReg x
4511     (src2, code2) <- getSomeReg y
4512     let code dst = code1 `appOL` code2 `appOL` toOL [
4513                 div dst src1 src2,
4514                 MULLW dst dst (RIReg src2),
4515                 SUBF dst dst src1
4516             ]
4517     return (Any rep code)
4518
4519 #endif /* powerpc_TARGET_ARCH */
4520
4521
4522 -- -----------------------------------------------------------------------------
4523 --  Coercing to/from integer/floating-point...
4524
4525 -- @coerce(Int2FP|FP2Int)@ are more complicated integer/float
4526 -- conversions.  We have to store temporaries in memory to move
4527 -- between the integer and the floating point register sets.
4528
4529 -- @coerceDbl2Flt@ and @coerceFlt2Dbl@ are done this way because we
4530 -- pretend, on sparc at least, that double and float regs are seperate
4531 -- kinds, so the value has to be computed into one kind before being
4532 -- explicitly "converted" to live in the other kind.
4533
4534 coerceInt2FP :: MachRep -> MachRep -> CmmExpr -> NatM Register
4535 coerceFP2Int :: MachRep -> MachRep -> CmmExpr -> NatM Register
4536
4537 #if sparc_TARGET_ARCH
4538 coerceDbl2Flt :: CmmExpr -> NatM Register
4539 coerceFlt2Dbl :: CmmExpr -> NatM Register
4540 #endif
4541
4542 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4543
4544 #if alpha_TARGET_ARCH
4545
4546 coerceInt2FP _ x
4547   = getRegister x               `thenNat` \ register ->
4548     getNewRegNat IntRep         `thenNat` \ reg ->
4549     let
4550         code = registerCode register reg
4551         src  = registerName register reg
4552
4553         code__2 dst = code . mkSeqInstrs [
4554             ST Q src (spRel 0),
4555             LD TF dst (spRel 0),
4556             CVTxy Q TF dst dst]
4557     in
4558     return (Any F64 code__2)
4559
4560 -------------
4561 coerceFP2Int x
4562   = getRegister x               `thenNat` \ register ->
4563     getNewRegNat F64    `thenNat` \ tmp ->
4564     let
4565         code = registerCode register tmp
4566         src  = registerName register tmp
4567
4568         code__2 dst = code . mkSeqInstrs [
4569             CVTxy TF Q src tmp,
4570             ST TF tmp (spRel 0),
4571             LD Q dst (spRel 0)]
4572     in
4573     return (Any IntRep code__2)
4574
4575 #endif /* alpha_TARGET_ARCH */
4576
4577 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4578
4579 #if i386_TARGET_ARCH
4580
4581 coerceInt2FP from to x = do
4582   (x_reg, x_code) <- getSomeReg x
4583   let
4584         opc  = case to of F32 -> GITOF; F64 -> GITOD
4585         code dst = x_code `snocOL` opc x_reg dst
4586         -- ToDo: works for non-I32 reps?
4587   -- in
4588   return (Any to code)
4589
4590 ------------
4591
4592 coerceFP2Int from to x = do
4593   (x_reg, x_code) <- getSomeReg x
4594   let
4595         opc  = case from of F32 -> GFTOI; F64 -> GDTOI
4596         code dst = x_code `snocOL` opc x_reg dst
4597         -- ToDo: works for non-I32 reps?
4598   -- in
4599   return (Any to code)
4600
4601 #endif /* i386_TARGET_ARCH */
4602
4603 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4604
4605 #if x86_64_TARGET_ARCH
4606
4607 coerceFP2Int from to x = do
4608   (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
4609   let
4610         opc  = case from of F32 -> CVTSS2SI; F64 -> CVTSD2SI
4611         code dst = x_code `snocOL` opc x_op dst
4612   -- in
4613   return (Any to code) -- works even if the destination rep is <I32
4614
4615 coerceInt2FP from to x = do
4616   (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
4617   let
4618         opc  = case to of F32 -> CVTSI2SS; F64 -> CVTSI2SD
4619         code dst = x_code `snocOL` opc x_op dst
4620   -- in
4621   return (Any to code) -- works even if the destination rep is <I32
4622
4623 coerceFP2FP :: MachRep -> CmmExpr -> NatM Register
4624 coerceFP2FP to x = do
4625   (x_reg, x_code) <- getSomeReg x
4626   let
4627         opc  = case to of F32 -> CVTSD2SS; F64 -> CVTSS2SD
4628         code dst = x_code `snocOL` opc x_reg dst
4629   -- in
4630   return (Any to code)
4631
4632 #endif
4633
4634 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4635
4636 #if sparc_TARGET_ARCH
4637
4638 coerceInt2FP pk1 pk2 x = do
4639     (src, code) <- getSomeReg x
4640     let
4641         code__2 dst = code `appOL` toOL [
4642             ST pk1 src (spRel (-2)),
4643             LD pk1 (spRel (-2)) dst,
4644             FxTOy pk1 pk2 dst dst]
4645     return (Any pk2 code__2)
4646
4647 ------------
4648 coerceFP2Int pk fprep x = do
4649     (src, code) <- getSomeReg x
4650     reg <- getNewRegNat fprep
4651     tmp <- getNewRegNat pk
4652     let
4653         code__2 dst = ASSERT(fprep == F64 || fprep == F32)
4654             code `appOL` toOL [
4655             FxTOy fprep pk src tmp,
4656             ST pk tmp (spRel (-2)),
4657             LD pk (spRel (-2)) dst]
4658     return (Any pk code__2)
4659
4660 ------------
4661 coerceDbl2Flt x = do
4662     (src, code) <- getSomeReg x
4663     return (Any F32 (\dst -> code `snocOL` FxTOy F64 F32 src dst))
4664
4665 ------------
4666 coerceFlt2Dbl x = do
4667     (src, code) <- getSomeReg x
4668     return (Any F64 (\dst -> code `snocOL` FxTOy F32 F64 src dst))
4669
4670 #endif /* sparc_TARGET_ARCH */
4671
4672 #if powerpc_TARGET_ARCH
4673 coerceInt2FP fromRep toRep x = do
4674     (src, code) <- getSomeReg x
4675     lbl <- getNewLabelNat
4676     itmp <- getNewRegNat I32
4677     ftmp <- getNewRegNat F64
4678     dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
4679     Amode addr addr_code <- getAmode dynRef
4680     let
4681         code' dst = code `appOL` maybe_exts `appOL` toOL [
4682                 LDATA ReadOnlyData
4683                                 [CmmDataLabel lbl,
4684                                  CmmStaticLit (CmmInt 0x43300000 I32),
4685                                  CmmStaticLit (CmmInt 0x80000000 I32)],
4686                 XORIS itmp src (ImmInt 0x8000),
4687                 ST I32 itmp (spRel 3),
4688                 LIS itmp (ImmInt 0x4330),
4689                 ST I32 itmp (spRel 2),
4690                 LD F64 ftmp (spRel 2)
4691             ] `appOL` addr_code `appOL` toOL [
4692                 LD F64 dst addr,
4693                 FSUB F64 dst ftmp dst
4694             ] `appOL` maybe_frsp dst
4695
4696         maybe_exts = case fromRep of
4697                         I8 ->  unitOL $ EXTS I8 src src
4698                         I16 -> unitOL $ EXTS I16 src src
4699                         I32 -> nilOL
4700         maybe_frsp dst = case toRep of
4701                         F32 -> unitOL $ FRSP dst dst
4702                         F64 -> nilOL
4703     return (Any toRep code')
4704
4705 coerceFP2Int fromRep toRep x = do
4706     -- the reps don't really matter: F*->F64 and I32->I* are no-ops
4707     (src, code) <- getSomeReg x
4708     tmp <- getNewRegNat F64
4709     let
4710         code' dst = code `appOL` toOL [
4711                 -- convert to int in FP reg
4712             FCTIWZ tmp src,
4713                 -- store value (64bit) from FP to stack
4714             ST F64 tmp (spRel 2),
4715                 -- read low word of value (high word is undefined)
4716             LD I32 dst (spRel 3)]
4717     return (Any toRep code')
4718 #endif /* powerpc_TARGET_ARCH */
4719
4720
4721 -- -----------------------------------------------------------------------------
4722 -- eXTRA_STK_ARGS_HERE
4723
4724 -- We (allegedly) put the first six C-call arguments in registers;
4725 -- where do we start putting the rest of them?
4726
4727 -- Moved from MachInstrs (SDM):
4728
4729 #if alpha_TARGET_ARCH || sparc_TARGET_ARCH
4730 eXTRA_STK_ARGS_HERE :: Int
4731 eXTRA_STK_ARGS_HERE
4732   = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???))
4733 #endif
4734