compiler/nativeGen/MachCodeGen.hs

   1 -----------------------------------------------------------------------------
   2 --
   3 -- Generating machine code (instruction selection)
   4 --
   5 -- (c) The University of Glasgow 1996-2004
   6 --
   7 -----------------------------------------------------------------------------
   8
   9 -- This is a big module, but, if you pay attention to
  10 -- (a) the sectioning, (b) the type signatures, and
  11 -- (c) the #if blah_TARGET_ARCH} things, the
  12 -- structure should not be too overwhelming.
  13
  14 module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where
  15
  16 #include "HsVersions.h"
  17 #include "nativeGen/NCG.h"
  18 #include "MachDeps.h"
  19
  20 -- NCG stuff:
  21 import MachInstrs
  22 import MachRegs
  23 import NCGMonad
  24 import PositionIndependentCode
  25 import RegAllocInfo ( mkBranchInstr )
  26
  27 -- Our intermediate code:
  28 import PprCmm           ( pprExpr )
  29 import Cmm
  30 import MachOp
  31 import CLabel
  32
  33 -- The rest:
  34 import StaticFlags      ( opt_PIC )
  35 import ForeignCall      ( CCallConv(..) )
  36 import OrdList
  37 import Pretty
  38 import Outputable
  39 import FastString
  40 import FastTypes        ( isFastTrue )
  41 import Constants        ( wORD_SIZE )
  42
  43 #ifdef DEBUG
  44 import Outputable       ( assertPanic )
  45 import Debug.Trace      ( trace )
  46 #endif
  47
  48 import Control.Monad    ( mapAndUnzipM )
  49 import Data.Maybe       ( fromJust )
  50 import Data.Bits
  51 import Data.Word
  52 import Data.Int
  53
  54 -- -----------------------------------------------------------------------------
  55 -- Top-level of the instruction selector
  56
  57 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
  58 -- They are really trees of insns to facilitate fast appending, where a
  59 -- left-to-right traversal (pre-order?) yields the insns in the correct
  60 -- order.
  61
  62 type InstrBlock = OrdList Instr
  63
  64 cmmTopCodeGen :: CmmTop -> NatM [NatCmmTop]
  65 cmmTopCodeGen (CmmProc info lab params blocks) = do
  66   (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
  67   picBaseMb <- getPicBaseMaybeNat
  68   let proc = CmmProc info lab params (concat nat_blocks)
  69       tops = proc : concat statics
  70   case picBaseMb of
  71       Just picBase -> initializePicBase picBase tops
  72       Nothing -> return tops
  73
  74 cmmTopCodeGen (CmmData sec dat) = do
  75   return [CmmData sec dat]  -- no translation, we just use CmmStatic
  76
  77 basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop])
  78 basicBlockCodeGen (BasicBlock id stmts) = do
  79   instrs <- stmtsToInstrs stmts
  80   -- code generation may introduce new basic block boundaries, which
  81   -- are indicated by the NEWBLOCK instruction.  We must split up the
  82   -- instruction stream into basic blocks again.  Also, we extract
  83   -- LDATAs here too.
  84   let
  85         (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
  86
  87         mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
  88           = ([], BasicBlock id instrs : blocks, statics)
  89         mkBlocks (LDATA sec dat) (instrs,blocks,statics)
  90           = (instrs, blocks, CmmData sec dat:statics)
  91         mkBlocks instr (instrs,blocks,statics)
  92           = (instr:instrs, blocks, statics)
  93   -- in
  94   return (BasicBlock id top : other_blocks, statics)
  95
  96 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
  97 stmtsToInstrs stmts
  98    = do instrss <- mapM stmtToInstrs stmts
  99         return (concatOL instrss)
 100
 101 stmtToInstrs :: CmmStmt -> NatM InstrBlock
 102 stmtToInstrs stmt = case stmt of
 103     CmmNop         -> return nilOL
 104     CmmComment s   -> return (unitOL (COMMENT s))
 105
 106     CmmAssign reg src
 107       | isFloatingRep kind -> assignReg_FltCode kind reg src
 108 #if WORD_SIZE_IN_BITS==32
 109       | kind == I64        -> assignReg_I64Code      reg src
 110 #endif
 111       | otherwise          -> assignReg_IntCode kind reg src
 112         where kind = cmmRegRep reg
 113
 114     CmmStore addr src
 115       | isFloatingRep kind -> assignMem_FltCode kind addr src
 116 #if WORD_SIZE_IN_BITS==32
 117       | kind == I64      -> assignMem_I64Code      addr src
 118 #endif
 119       | otherwise        -> assignMem_IntCode kind addr src
 120         where kind = cmmExprRep src
 121
 122     CmmCall target result_regs args vols
 123        -> genCCall target result_regs args vols
 124
 125     CmmBranch id          -> genBranch id
 126     CmmCondBranch arg id  -> genCondJump id arg
 127     CmmSwitch arg ids     -> genSwitch arg ids
 128     CmmJump arg params    -> genJump arg
 129
 130 -- -----------------------------------------------------------------------------
 131 -- General things for putting together code sequences
 132
 133 -- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
 134 -- CmmExprs into CmmRegOff?
 135 mangleIndexTree :: CmmExpr -> CmmExpr
 136 mangleIndexTree (CmmRegOff reg off)
 137   = CmmMachOp (MO_Add rep) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) rep)]
 138   where rep = cmmRegRep reg
 139
 140 -- -----------------------------------------------------------------------------
 141 --  Code gen for 64-bit arithmetic on 32-bit platforms
 142
 143 {-
 144 Simple support for generating 64-bit code (ie, 64 bit values and 64
 145 bit assignments) on 32-bit platforms.  Unlike the main code generator
 146 we merely shoot for generating working code as simply as possible, and
 147 pay little attention to code quality.  Specifically, there is no
 148 attempt to deal cleverly with the fixed-vs-floating register
 149 distinction; all values are generated into (pairs of) floating
 150 registers, even if this would mean some redundant reg-reg moves as a
 151 result.  Only one of the VRegUniques is returned, since it will be
 152 of the VRegUniqueLo form, and the upper-half VReg can be determined
 153 by applying getHiVRegFromLo to it.
 154 -}
 155
 156 data ChildCode64        -- a.k.a "Register64"
 157    = ChildCode64
 158         InstrBlock      -- code
 159         Reg             -- the lower 32-bit temporary which contains the
 160                         -- result; use getHiVRegFromLo to find the other
 161                         -- VRegUnique.  Rules of this simplified insn
 162                         -- selection game are therefore that the returned
 163                         -- Reg may be modified
 164
 165 #if WORD_SIZE_IN_BITS==32
 166 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
 167 assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
 168 #endif
 169
 170 #ifndef x86_64_TARGET_ARCH
 171 iselExpr64        :: CmmExpr -> NatM ChildCode64
 172 #endif
 173
 174 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 175
 176 #if i386_TARGET_ARCH
 177
 178 assignMem_I64Code addrTree valueTree = do
 179   Amode addr addr_code <- getAmode addrTree
 180   ChildCode64 vcode rlo <- iselExpr64 valueTree
 181   let
 182         rhi = getHiVRegFromLo rlo
 183
 184         -- Little-endian store
 185         mov_lo = MOV I32 (OpReg rlo) (OpAddr addr)
 186         mov_hi = MOV I32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
 187   -- in
 188   return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 189
 190
 191 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
 192    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 193    let
 194          r_dst_lo = mkVReg u_dst I32
 195          r_dst_hi = getHiVRegFromLo r_dst_lo
 196          r_src_hi = getHiVRegFromLo r_src_lo
 197          mov_lo = MOV I32 (OpReg r_src_lo) (OpReg r_dst_lo)
 198          mov_hi = MOV I32 (OpReg r_src_hi) (OpReg r_dst_hi)
 199    -- in
 200    return (
 201         vcode `snocOL` mov_lo `snocOL` mov_hi
 202      )
 203
 204 assignReg_I64Code lvalue valueTree
 205    = panic "assignReg_I64Code(i386): invalid lvalue"
 206
 207 ------------
 208
 209 iselExpr64 (CmmLit (CmmInt i _)) = do
 210   (rlo,rhi) <- getNewRegPairNat I32
 211   let
 212         r = fromIntegral (fromIntegral i :: Word32)
 213         q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
 214         code = toOL [
 215                 MOV I32 (OpImm (ImmInteger r)) (OpReg rlo),
 216                 MOV I32 (OpImm (ImmInteger q)) (OpReg rhi)
 217                 ]
 218   -- in
 219   return (ChildCode64 code rlo)
 220
 221 iselExpr64 (CmmLoad addrTree I64) = do
 222    Amode addr addr_code <- getAmode addrTree
 223    (rlo,rhi) <- getNewRegPairNat I32
 224    let
 225         mov_lo = MOV I32 (OpAddr addr) (OpReg rlo)
 226         mov_hi = MOV I32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
 227    -- in
 228    return (
 229             ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
 230                         rlo
 231      )
 232
 233 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64)))
 234    = return (ChildCode64 nilOL (mkVReg vu I32))
 235
 236 -- we handle addition, but rather badly
 237 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
 238    ChildCode64 code1 r1lo <- iselExpr64 e1
 239    (rlo,rhi) <- getNewRegPairNat I32
 240    let
 241         r = fromIntegral (fromIntegral i :: Word32)
 242         q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
 243         r1hi = getHiVRegFromLo r1lo
 244         code =  code1 `appOL`
 245                 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
 246                        ADD I32 (OpImm (ImmInteger r)) (OpReg rlo),
 247                        MOV I32 (OpReg r1hi) (OpReg rhi),
 248                        ADC I32 (OpImm (ImmInteger q)) (OpReg rhi) ]
 249    -- in
 250    return (ChildCode64 code rlo)
 251
 252 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
 253    ChildCode64 code1 r1lo <- iselExpr64 e1
 254    ChildCode64 code2 r2lo <- iselExpr64 e2
 255    (rlo,rhi) <- getNewRegPairNat I32
 256    let
 257         r1hi = getHiVRegFromLo r1lo
 258         r2hi = getHiVRegFromLo r2lo
 259         code =  code1 `appOL`
 260                 code2 `appOL`
 261                 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
 262                        ADD I32 (OpReg r2lo) (OpReg rlo),
 263                        MOV I32 (OpReg r1hi) (OpReg rhi),
 264                        ADC I32 (OpReg r2hi) (OpReg rhi) ]
 265    -- in
 266    return (ChildCode64 code rlo)
 267
 268 iselExpr64 expr
 269    = pprPanic "iselExpr64(i386)" (ppr expr)
 270
 271 #endif /* i386_TARGET_ARCH */
 272
 273 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 274
 275 #if sparc_TARGET_ARCH
 276
 277 assignMem_I64Code addrTree valueTree = do
 278      Amode addr addr_code <- getAmode addrTree
 279      ChildCode64 vcode rlo <- iselExpr64 valueTree
 280      (src, code) <- getSomeReg addrTree
 281      let
 282          rhi = getHiVRegFromLo rlo
 283          -- Big-endian store
 284          mov_hi = ST I32 rhi (AddrRegImm src (ImmInt 0))
 285          mov_lo = ST I32 rlo (AddrRegImm src (ImmInt 4))
 286      return (vcode `appOL` code `snocOL` mov_hi `snocOL` mov_lo)
 287
 288 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
 289      ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 290      let
 291          r_dst_lo = mkVReg u_dst pk
 292          r_dst_hi = getHiVRegFromLo r_dst_lo
 293          r_src_hi = getHiVRegFromLo r_src_lo
 294          mov_lo = mkMOV r_src_lo r_dst_lo
 295          mov_hi = mkMOV r_src_hi r_dst_hi
 296          mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
 297      return (vcode `snocOL` mov_hi `snocOL` mov_lo)
 298 assignReg_I64Code lvalue valueTree
 299    = panic "assignReg_I64Code(sparc): invalid lvalue"
 300
 301
 302 -- Don't delete this -- it's very handy for debugging.
 303 --iselExpr64 expr
 304 --   | trace ("iselExpr64: " ++ showSDoc (ppr expr)) False
 305 --   = panic "iselExpr64(???)"
 306
 307 iselExpr64 (CmmLoad addrTree I64) = do
 308      Amode (AddrRegReg r1 r2) addr_code <- getAmode addrTree
 309      rlo <- getNewRegNat I32
 310      let rhi = getHiVRegFromLo rlo
 311          mov_hi = LD I32 (AddrRegImm r1 (ImmInt 0)) rhi
 312          mov_lo = LD I32 (AddrRegImm r1 (ImmInt 4)) rlo
 313      return (
 314             ChildCode64 (addr_code `snocOL` mov_hi `snocOL` mov_lo)
 315                          rlo
 316           )
 317
 318 iselExpr64 (CmmReg (CmmLocal (LocalReg uq I64))) = do
 319      r_dst_lo <-  getNewRegNat I32
 320      let r_dst_hi = getHiVRegFromLo r_dst_lo
 321          r_src_lo = mkVReg uq I32
 322          r_src_hi = getHiVRegFromLo r_src_lo
 323          mov_lo = mkMOV r_src_lo r_dst_lo
 324          mov_hi = mkMOV r_src_hi r_dst_hi
 325          mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
 326      return (
 327             ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo
 328          )
 329
 330 iselExpr64 expr
 331    = pprPanic "iselExpr64(sparc)" (ppr expr)
 332
 333 #endif /* sparc_TARGET_ARCH */
 334
 335 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 336
 337 #if powerpc_TARGET_ARCH
 338
 339 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
 340 getI64Amodes addrTree = do
 341     Amode hi_addr addr_code <- getAmode addrTree
 342     case addrOffset hi_addr 4 of
 343         Just lo_addr -> return (hi_addr, lo_addr, addr_code)
 344         Nothing      -> do (hi_ptr, code) <- getSomeReg addrTree
 345                            return (AddrRegImm hi_ptr (ImmInt 0),
 346                                    AddrRegImm hi_ptr (ImmInt 4),
 347                                    code)
 348
 349 assignMem_I64Code addrTree valueTree = do
 350         (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
 351         ChildCode64 vcode rlo <- iselExpr64 valueTree
 352         let
 353                 rhi = getHiVRegFromLo rlo
 354
 355                 -- Big-endian store
 356                 mov_hi = ST I32 rhi hi_addr
 357                 mov_lo = ST I32 rlo lo_addr
 358         -- in
 359         return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 360
 361 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
 362    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 363    let
 364          r_dst_lo = mkVReg u_dst I32
 365          r_dst_hi = getHiVRegFromLo r_dst_lo
 366          r_src_hi = getHiVRegFromLo r_src_lo
 367          mov_lo = MR r_dst_lo r_src_lo
 368          mov_hi = MR r_dst_hi r_src_hi
 369    -- in
 370    return (
 371         vcode `snocOL` mov_lo `snocOL` mov_hi
 372      )
 373
 374 assignReg_I64Code lvalue valueTree
 375    = panic "assignReg_I64Code(powerpc): invalid lvalue"
 376
 377
 378 -- Don't delete this -- it's very handy for debugging.
 379 --iselExpr64 expr
 380 --   | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
 381 --   = panic "iselExpr64(???)"
 382
 383 iselExpr64 (CmmLoad addrTree I64) = do
 384     (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
 385     (rlo, rhi) <- getNewRegPairNat I32
 386     let mov_hi = LD I32 rhi hi_addr
 387         mov_lo = LD I32 rlo lo_addr
 388     return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
 389                          rlo
 390
 391 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64)))
 392    = return (ChildCode64 nilOL (mkVReg vu I32))
 393
 394 iselExpr64 (CmmLit (CmmInt i _)) = do
 395   (rlo,rhi) <- getNewRegPairNat I32
 396   let
 397         half0 = fromIntegral (fromIntegral i :: Word16)
 398         half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
 399         half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
 400         half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
 401
 402         code = toOL [
 403                 LIS rlo (ImmInt half1),
 404                 OR rlo rlo (RIImm $ ImmInt half0),
 405                 LIS rhi (ImmInt half3),
 406                 OR rlo rlo (RIImm $ ImmInt half2)
 407                 ]
 408   -- in
 409   return (ChildCode64 code rlo)
 410
 411 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
 412    ChildCode64 code1 r1lo <- iselExpr64 e1
 413    ChildCode64 code2 r2lo <- iselExpr64 e2
 414    (rlo,rhi) <- getNewRegPairNat I32
 415    let
 416         r1hi = getHiVRegFromLo r1lo
 417         r2hi = getHiVRegFromLo r2lo
 418         code =  code1 `appOL`
 419                 code2 `appOL`
 420                 toOL [ ADDC rlo r1lo r2lo,
 421                        ADDE rhi r1hi r2hi ]
 422    -- in
 423    return (ChildCode64 code rlo)
 424
 425 iselExpr64 expr
 426    = pprPanic "iselExpr64(powerpc)" (ppr expr)
 427
 428 #endif /* powerpc_TARGET_ARCH */
 429
 430
 431 -- -----------------------------------------------------------------------------
 432 -- The 'Register' type
 433
 434 -- 'Register's passed up the tree.  If the stix code forces the register
 435 -- to live in a pre-decided machine register, it comes out as @Fixed@;
 436 -- otherwise, it comes out as @Any@, and the parent can decide which
 437 -- register to put it in.
 438
 439 data Register
 440   = Fixed   MachRep Reg InstrBlock
 441   | Any     MachRep (Reg -> InstrBlock)
 442
 443 swizzleRegisterRep :: Register -> MachRep -> Register
 444 swizzleRegisterRep (Fixed _ reg code) rep = Fixed rep reg code
 445 swizzleRegisterRep (Any _ codefn)     rep = Any rep codefn
 446
 447
 448 -- -----------------------------------------------------------------------------
 449 -- Utils based on getRegister, below
 450
 451 -- The dual to getAnyReg: compute an expression into a register, but
 452 -- we don't mind which one it is.
 453 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
 454 getSomeReg expr = do
 455   r <- getRegister expr
 456   case r of
 457     Any rep code -> do
 458         tmp <- getNewRegNat rep
 459         return (tmp, code tmp)
 460     Fixed _ reg code ->
 461         return (reg, code)
 462
 463 -- -----------------------------------------------------------------------------
 464 -- Grab the Reg for a CmmReg
 465
 466 getRegisterReg :: CmmReg -> Reg
 467
 468 getRegisterReg (CmmLocal (LocalReg u pk))
 469   = mkVReg u pk
 470
 471 getRegisterReg (CmmGlobal mid)
 472   = case get_GlobalReg_reg_or_addr mid of
 473        Left (RealReg rrno) -> RealReg rrno
 474        _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
 475           -- By this stage, the only MagicIds remaining should be the
 476           -- ones which map to a real machine register on this
 477           -- platform.  Hence ...
 478
 479
 480 -- -----------------------------------------------------------------------------
 481 -- Generate code to get a subtree into a Register
 482
 483 -- Don't delete this -- it's very handy for debugging.
 484 --getRegister expr
 485 --   | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False
 486 --   = panic "getRegister(???)"
 487
 488 getRegister :: CmmExpr -> NatM Register
 489
 490 #if !x86_64_TARGET_ARCH
 491     -- on x86_64, we have %rip for PicBaseReg, but it's not a full-featured
 492     -- register, it can only be used for rip-relative addressing.
 493 getRegister (CmmReg (CmmGlobal PicBaseReg))
 494   = do
 495       reg <- getPicBaseNat wordRep
 496       return (Fixed wordRep reg nilOL)
 497 #endif
 498
 499 getRegister (CmmReg reg)
 500   = return (Fixed (cmmRegRep reg) (getRegisterReg reg) nilOL)
 501
 502 getRegister tree@(CmmRegOff _ _)
 503   = getRegister (mangleIndexTree tree)
 504
 505
 506 #if WORD_SIZE_IN_BITS==32
 507     -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
 508     -- TO_W_(x), TO_W_(x >> 32)
 509
 510 getRegister (CmmMachOp (MO_U_Conv I64 I32)
 511              [CmmMachOp (MO_U_Shr I64) [x,CmmLit (CmmInt 32 _)]]) = do
 512   ChildCode64 code rlo <- iselExpr64 x
 513   return $ Fixed I32 (getHiVRegFromLo rlo) code
 514
 515 getRegister (CmmMachOp (MO_S_Conv I64 I32)
 516              [CmmMachOp (MO_U_Shr I64) [x,CmmLit (CmmInt 32 _)]]) = do
 517   ChildCode64 code rlo <- iselExpr64 x
 518   return $ Fixed I32 (getHiVRegFromLo rlo) code
 519
 520 getRegister (CmmMachOp (MO_U_Conv I64 I32) [x]) = do
 521   ChildCode64 code rlo <- iselExpr64 x
 522   return $ Fixed I32 rlo code
 523
 524 getRegister (CmmMachOp (MO_S_Conv I64 I32) [x]) = do
 525   ChildCode64 code rlo <- iselExpr64 x
 526   return $ Fixed I32 rlo code
 527
 528 #endif
 529
 530 -- end of machine-"independent" bit; here we go on the rest...
 531
 532 #if alpha_TARGET_ARCH
 533
 534 getRegister (StDouble d)
 535   = getBlockIdNat                   `thenNat` \ lbl ->
 536     getNewRegNat PtrRep             `thenNat` \ tmp ->
 537     let code dst = mkSeqInstrs [
 538             LDATA RoDataSegment lbl [
 539                     DATA TF [ImmLab (rational d)]
 540                 ],
 541             LDA tmp (AddrImm (ImmCLbl lbl)),
 542             LD TF dst (AddrReg tmp)]
 543     in
 544         return (Any F64 code)
 545
 546 getRegister (StPrim primop [x]) -- unary PrimOps
 547   = case primop of
 548       IntNegOp -> trivialUCode (NEG Q False) x
 549
 550       NotOp    -> trivialUCode NOT x
 551
 552       FloatNegOp  -> trivialUFCode FloatRep  (FNEG TF) x
 553       DoubleNegOp -> trivialUFCode F64 (FNEG TF) x
 554
 555       OrdOp -> coerceIntCode IntRep x
 556       ChrOp -> chrCode x
 557
 558       Float2IntOp  -> coerceFP2Int    x
 559       Int2FloatOp  -> coerceInt2FP pr x
 560       Double2IntOp -> coerceFP2Int    x
 561       Int2DoubleOp -> coerceInt2FP pr x
 562
 563       Double2FloatOp -> coerceFltCode x
 564       Float2DoubleOp -> coerceFltCode x
 565
 566       other_op -> getRegister (StCall fn CCallConv F64 [x])
 567         where
 568           fn = case other_op of
 569                  FloatExpOp    -> FSLIT("exp")
 570                  FloatLogOp    -> FSLIT("log")
 571                  FloatSqrtOp   -> FSLIT("sqrt")
 572                  FloatSinOp    -> FSLIT("sin")
 573                  FloatCosOp    -> FSLIT("cos")
 574                  FloatTanOp    -> FSLIT("tan")
 575                  FloatAsinOp   -> FSLIT("asin")
 576                  FloatAcosOp   -> FSLIT("acos")
 577                  FloatAtanOp   -> FSLIT("atan")
 578                  FloatSinhOp   -> FSLIT("sinh")
 579                  FloatCoshOp   -> FSLIT("cosh")
 580                  FloatTanhOp   -> FSLIT("tanh")
 581                  DoubleExpOp   -> FSLIT("exp")
 582                  DoubleLogOp   -> FSLIT("log")
 583                  DoubleSqrtOp  -> FSLIT("sqrt")
 584                  DoubleSinOp   -> FSLIT("sin")
 585                  DoubleCosOp   -> FSLIT("cos")
 586                  DoubleTanOp   -> FSLIT("tan")
 587                  DoubleAsinOp  -> FSLIT("asin")
 588                  DoubleAcosOp  -> FSLIT("acos")
 589                  DoubleAtanOp  -> FSLIT("atan")
 590                  DoubleSinhOp  -> FSLIT("sinh")
 591                  DoubleCoshOp  -> FSLIT("cosh")
 592                  DoubleTanhOp  -> FSLIT("tanh")
 593   where
 594     pr = panic "MachCode.getRegister: no primrep needed for Alpha"
 595
 596 getRegister (StPrim primop [x, y]) -- dyadic PrimOps
 597   = case primop of
 598       CharGtOp -> trivialCode (CMP LTT) y x
 599       CharGeOp -> trivialCode (CMP LE) y x
 600       CharEqOp -> trivialCode (CMP EQQ) x y
 601       CharNeOp -> int_NE_code x y
 602       CharLtOp -> trivialCode (CMP LTT) x y
 603       CharLeOp -> trivialCode (CMP LE) x y
 604
 605       IntGtOp  -> trivialCode (CMP LTT) y x
 606       IntGeOp  -> trivialCode (CMP LE) y x
 607       IntEqOp  -> trivialCode (CMP EQQ) x y
 608       IntNeOp  -> int_NE_code x y
 609       IntLtOp  -> trivialCode (CMP LTT) x y
 610       IntLeOp  -> trivialCode (CMP LE) x y
 611
 612       WordGtOp -> trivialCode (CMP ULT) y x
 613       WordGeOp -> trivialCode (CMP ULE) x y
 614       WordEqOp -> trivialCode (CMP EQQ)  x y
 615       WordNeOp -> int_NE_code x y
 616       WordLtOp -> trivialCode (CMP ULT) x y
 617       WordLeOp -> trivialCode (CMP ULE) x y
 618
 619       AddrGtOp -> trivialCode (CMP ULT) y x
 620       AddrGeOp -> trivialCode (CMP ULE) y x
 621       AddrEqOp -> trivialCode (CMP EQQ)  x y
 622       AddrNeOp -> int_NE_code x y
 623       AddrLtOp -> trivialCode (CMP ULT) x y
 624       AddrLeOp -> trivialCode (CMP ULE) x y
 625
 626       FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
 627       FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
 628       FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
 629       FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
 630       FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
 631       FloatLeOp -> cmpF_code (FCMP TF LE) NE x y
 632
 633       DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
 634       DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
 635       DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
 636       DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
 637       DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
 638       DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y
 639
 640       IntAddOp  -> trivialCode (ADD Q False) x y
 641       IntSubOp  -> trivialCode (SUB Q False) x y
 642       IntMulOp  -> trivialCode (MUL Q False) x y
 643       IntQuotOp -> trivialCode (DIV Q False) x y
 644       IntRemOp  -> trivialCode (REM Q False) x y
 645
 646       WordAddOp  -> trivialCode (ADD Q False) x y
 647       WordSubOp  -> trivialCode (SUB Q False) x y
 648       WordMulOp  -> trivialCode (MUL Q False) x y
 649       WordQuotOp -> trivialCode (DIV Q True) x y
 650       WordRemOp  -> trivialCode (REM Q True) x y
 651
 652       FloatAddOp -> trivialFCode  FloatRep (FADD TF) x y
 653       FloatSubOp -> trivialFCode  FloatRep (FSUB TF) x y
 654       FloatMulOp -> trivialFCode  FloatRep (FMUL TF) x y
 655       FloatDivOp -> trivialFCode  FloatRep (FDIV TF) x y
 656
 657       DoubleAddOp -> trivialFCode  F64 (FADD TF) x y
 658       DoubleSubOp -> trivialFCode  F64 (FSUB TF) x y
 659       DoubleMulOp -> trivialFCode  F64 (FMUL TF) x y
 660       DoubleDivOp -> trivialFCode  F64 (FDIV TF) x y
 661
 662       AddrAddOp  -> trivialCode (ADD Q False) x y
 663       AddrSubOp  -> trivialCode (SUB Q False) x y
 664       AddrRemOp  -> trivialCode (REM Q True) x y
 665
 666       AndOp  -> trivialCode AND x y
 667       OrOp   -> trivialCode OR  x y
 668       XorOp  -> trivialCode XOR x y
 669       SllOp  -> trivialCode SLL x y
 670       SrlOp  -> trivialCode SRL x y
 671
 672       ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
 673       ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
 674       ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"
 675
 676       FloatPowerOp  -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
 677       DoublePowerOp -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
 678   where
 679     {- ------------------------------------------------------------
 680         Some bizarre special code for getting condition codes into
 681         registers.  Integer non-equality is a test for equality
 682         followed by an XOR with 1.  (Integer comparisons always set
 683         the result register to 0 or 1.)  Floating point comparisons of
 684         any kind leave the result in a floating point register, so we
 685         need to wrangle an integer register out of things.
 686     -}
 687     int_NE_code :: StixTree -> StixTree -> NatM Register
 688
 689     int_NE_code x y
 690       = trivialCode (CMP EQQ) x y       `thenNat` \ register ->
 691         getNewRegNat IntRep             `thenNat` \ tmp ->
 692         let
 693             code = registerCode register tmp
 694             src  = registerName register tmp
 695             code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
 696         in
 697         return (Any IntRep code__2)
 698
 699     {- ------------------------------------------------------------
 700         Comments for int_NE_code also apply to cmpF_code
 701     -}
 702     cmpF_code
 703         :: (Reg -> Reg -> Reg -> Instr)
 704         -> Cond
 705         -> StixTree -> StixTree
 706         -> NatM Register
 707
 708     cmpF_code instr cond x y
 709       = trivialFCode pr instr x y       `thenNat` \ register ->
 710         getNewRegNat F64                `thenNat` \ tmp ->
 711         getBlockIdNat                   `thenNat` \ lbl ->
 712         let
 713             code = registerCode register tmp
 714             result  = registerName register tmp
 715
 716             code__2 dst = code . mkSeqInstrs [
 717                 OR zeroh (RIImm (ImmInt 1)) dst,
 718                 BF cond  result (ImmCLbl lbl),
 719                 OR zeroh (RIReg zeroh) dst,
 720                 NEWBLOCK lbl]
 721         in
 722         return (Any IntRep code__2)
 723       where
 724         pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
 725       ------------------------------------------------------------
 726
 727 getRegister (CmmLoad pk mem)
 728   = getAmode mem                    `thenNat` \ amode ->
 729     let
 730         code = amodeCode amode
 731         src   = amodeAddr amode
 732         size = primRepToSize pk
 733         code__2 dst = code . mkSeqInstr (LD size dst src)
 734     in
 735     return (Any pk code__2)
 736
 737 getRegister (StInt i)
 738   | fits8Bits i
 739   = let
 740         code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
 741     in
 742     return (Any IntRep code)
 743   | otherwise
 744   = let
 745         code dst = mkSeqInstr (LDI Q dst src)
 746     in
 747     return (Any IntRep code)
 748   where
 749     src = ImmInt (fromInteger i)
 750
 751 getRegister leaf
 752   | isJust imm
 753   = let
 754         code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
 755     in
 756     return (Any PtrRep code)
 757   where
 758     imm = maybeImm leaf
 759     imm__2 = case imm of Just x -> x
 760
 761 #endif /* alpha_TARGET_ARCH */
 762
 763 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 764
 765 #if i386_TARGET_ARCH
 766
 767 getRegister (CmmLit (CmmFloat f F32)) = do
 768     lbl <- getNewLabelNat
 769     dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
 770     Amode addr addr_code <- getAmode dynRef
 771     let code dst =
 772             LDATA ReadOnlyData
 773                         [CmmDataLabel lbl,
 774                          CmmStaticLit (CmmFloat f F32)]
 775             `consOL` (addr_code `snocOL`
 776             GLD F32 addr dst)
 777     -- in
 778     return (Any F32 code)
 779
 780
 781 getRegister (CmmLit (CmmFloat d F64))
 782   | d == 0.0
 783   = let code dst = unitOL (GLDZ dst)
 784     in  return (Any F64 code)
 785
 786   | d == 1.0
 787   = let code dst = unitOL (GLD1 dst)
 788     in  return (Any F64 code)
 789
 790   | otherwise = do
 791     lbl <- getNewLabelNat
 792     dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
 793     Amode addr addr_code <- getAmode dynRef
 794     let code dst =
 795             LDATA ReadOnlyData
 796                         [CmmDataLabel lbl,
 797                          CmmStaticLit (CmmFloat d F64)]
 798             `consOL` (addr_code `snocOL`
 799             GLD F64 addr dst)
 800     -- in
 801     return (Any F64 code)
 802
 803 #endif /* i386_TARGET_ARCH */
 804
 805 #if x86_64_TARGET_ARCH
 806
 807 getRegister (CmmLit (CmmFloat 0.0 rep)) = do
 808    let code dst = unitOL  (XOR rep (OpReg dst) (OpReg dst))
 809         -- I don't know why there are xorpd, xorps, and pxor instructions.
 810         -- They all appear to do the same thing --SDM
 811    return (Any rep code)
 812
 813 getRegister (CmmLit (CmmFloat f rep)) = do
 814     lbl <- getNewLabelNat
 815     let code dst = toOL [
 816             LDATA ReadOnlyData
 817                         [CmmDataLabel lbl,
 818                          CmmStaticLit (CmmFloat f rep)],
 819             MOV rep (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 820             ]
 821     -- in
 822     return (Any rep code)
 823
 824 #endif /* x86_64_TARGET_ARCH */
 825
 826 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
 827
 828 -- catch simple cases of zero- or sign-extended load
 829 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad addr _]) = do
 830   code <- intLoadCode (MOVZxL I8) addr
 831   return (Any I32 code)
 832
 833 getRegister (CmmMachOp (MO_S_Conv I8 I32) [CmmLoad addr _]) = do
 834   code <- intLoadCode (MOVSxL I8) addr
 835   return (Any I32 code)
 836
 837 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad addr _]) = do
 838   code <- intLoadCode (MOVZxL I16) addr
 839   return (Any I32 code)
 840
 841 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad addr _]) = do
 842   code <- intLoadCode (MOVSxL I16) addr
 843   return (Any I32 code)
 844
 845 #endif
 846
 847 #if x86_64_TARGET_ARCH
 848
 849 -- catch simple cases of zero- or sign-extended load
 850 getRegister (CmmMachOp (MO_U_Conv I8 I64) [CmmLoad addr _]) = do
 851   code <- intLoadCode (MOVZxL I8) addr
 852   return (Any I64 code)
 853
 854 getRegister (CmmMachOp (MO_S_Conv I8 I64) [CmmLoad addr _]) = do
 855   code <- intLoadCode (MOVSxL I8) addr
 856   return (Any I64 code)
 857
 858 getRegister (CmmMachOp (MO_U_Conv I16 I64) [CmmLoad addr _]) = do
 859   code <- intLoadCode (MOVZxL I16) addr
 860   return (Any I64 code)
 861
 862 getRegister (CmmMachOp (MO_S_Conv I16 I64) [CmmLoad addr _]) = do
 863   code <- intLoadCode (MOVSxL I16) addr
 864   return (Any I64 code)
 865
 866 getRegister (CmmMachOp (MO_U_Conv I32 I64) [CmmLoad addr _]) = do
 867   code <- intLoadCode (MOV I32) addr -- 32-bit loads zero-extend
 868   return (Any I64 code)
 869
 870 getRegister (CmmMachOp (MO_S_Conv I32 I64) [CmmLoad addr _]) = do
 871   code <- intLoadCode (MOVSxL I32) addr
 872   return (Any I64 code)
 873
 874 #endif
 875
 876 #if x86_64_TARGET_ARCH
 877 getRegister (CmmMachOp (MO_Add I64) [CmmReg (CmmGlobal PicBaseReg),
 878                                      CmmLit displacement])
 879     = return $ Any I64 (\dst -> unitOL $
 880         LEA I64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
 881 #endif
 882
 883 #if x86_64_TARGET_ARCH
 884 getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do
 885   x_code <- getAnyReg x
 886   lbl <- getNewLabelNat
 887   let
 888     code dst = x_code dst `appOL` toOL [
 889         -- This is how gcc does it, so it can't be that bad:
 890         LDATA ReadOnlyData16 [
 891                 CmmAlign 16,
 892                 CmmDataLabel lbl,
 893                 CmmStaticLit (CmmInt 0x80000000 I32),
 894                 CmmStaticLit (CmmInt 0 I32),
 895                 CmmStaticLit (CmmInt 0 I32),
 896                 CmmStaticLit (CmmInt 0 I32)
 897         ],
 898         XOR F32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 899                 -- xorps, so we need the 128-bit constant
 900                 -- ToDo: rip-relative
 901         ]
 902   --
 903   return (Any F32 code)
 904
 905 getRegister (CmmMachOp (MO_S_Neg F64) [x]) = do
 906   x_code <- getAnyReg x
 907   lbl <- getNewLabelNat
 908   let
 909         -- This is how gcc does it, so it can't be that bad:
 910     code dst = x_code dst `appOL` toOL [
 911         LDATA ReadOnlyData16 [
 912                 CmmAlign 16,
 913                 CmmDataLabel lbl,
 914                 CmmStaticLit (CmmInt 0x8000000000000000 I64),
 915                 CmmStaticLit (CmmInt 0 I64)
 916         ],
 917                 -- gcc puts an unpck here.  Wonder if we need it.
 918         XOR F64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 919                 -- xorpd, so we need the 128-bit constant
 920         ]
 921   --
 922   return (Any F64 code)
 923 #endif
 924
 925 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
 926
 927 getRegister (CmmMachOp mop [x]) -- unary MachOps
 928   = case mop of
 929 #if i386_TARGET_ARCH
 930       MO_S_Neg F32 -> trivialUFCode F32 (GNEG F32) x
 931       MO_S_Neg F64 -> trivialUFCode F64 (GNEG F64) x
 932 #endif
 933
 934       MO_S_Neg rep -> trivialUCode rep (NEGI rep) x
 935       MO_Not rep   -> trivialUCode rep (NOT  rep) x
 936
 937       -- Nop conversions
 938       MO_U_Conv I32 I8  -> toI8Reg  I32 x
 939       MO_S_Conv I32 I8  -> toI8Reg  I32 x
 940       MO_U_Conv I16 I8  -> toI8Reg  I16 x
 941       MO_S_Conv I16 I8  -> toI8Reg  I16 x
 942       MO_U_Conv I32 I16 -> toI16Reg I32 x
 943       MO_S_Conv I32 I16 -> toI16Reg I32 x
 944 #if x86_64_TARGET_ARCH
 945       MO_U_Conv I64 I32 -> conversionNop I64 x
 946       MO_S_Conv I64 I32 -> conversionNop I64 x
 947       MO_U_Conv I64 I16 -> toI16Reg I64 x
 948       MO_S_Conv I64 I16 -> toI16Reg I64 x
 949       MO_U_Conv I64 I8  -> toI8Reg  I64 x
 950       MO_S_Conv I64 I8  -> toI8Reg  I64 x
 951 #endif
 952
 953       MO_U_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
 954       MO_S_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
 955
 956       -- widenings
 957       MO_U_Conv I8  I32 -> integerExtend I8  I32 MOVZxL x
 958       MO_U_Conv I16 I32 -> integerExtend I16 I32 MOVZxL x
 959       MO_U_Conv I8  I16 -> integerExtend I8  I16 MOVZxL x
 960
 961       MO_S_Conv I8  I32 -> integerExtend I8  I32 MOVSxL x
 962       MO_S_Conv I16 I32 -> integerExtend I16 I32 MOVSxL x
 963       MO_S_Conv I8  I16 -> integerExtend I8  I16 MOVSxL x
 964
 965 #if x86_64_TARGET_ARCH
 966       MO_U_Conv I8  I64 -> integerExtend I8  I64 MOVZxL x
 967       MO_U_Conv I16 I64 -> integerExtend I16 I64 MOVZxL x
 968       MO_U_Conv I32 I64 -> integerExtend I32 I64 MOVZxL x
 969       MO_S_Conv I8  I64 -> integerExtend I8  I64 MOVSxL x
 970       MO_S_Conv I16 I64 -> integerExtend I16 I64 MOVSxL x
 971       MO_S_Conv I32 I64 -> integerExtend I32 I64 MOVSxL x
 972         -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
 973         -- However, we don't want the register allocator to throw it
 974         -- away as an unnecessary reg-to-reg move, so we keep it in
 975         -- the form of a movzl and print it as a movl later.
 976 #endif
 977
 978 #if i386_TARGET_ARCH
 979       MO_S_Conv F32 F64 -> conversionNop F64 x
 980       MO_S_Conv F64 F32 -> conversionNop F32 x
 981 #else
 982       MO_S_Conv F32 F64 -> coerceFP2FP F64 x
 983       MO_S_Conv F64 F32 -> coerceFP2FP F32 x
 984 #endif
 985
 986       MO_S_Conv from to
 987         | isFloatingRep from -> coerceFP2Int from to x
 988         | isFloatingRep to   -> coerceInt2FP from to x
 989
 990       other -> pprPanic "getRegister" (pprMachOp mop)
 991    where
 992         -- signed or unsigned extension.
 993         integerExtend from to instr expr = do
 994             (reg,e_code) <- if from == I8 then getByteReg expr
 995                                           else getSomeReg expr
 996             let
 997                 code dst =
 998                   e_code `snocOL`
 999                   instr from (OpReg reg) (OpReg dst)
1000             return (Any to code)
1001
1002         toI8Reg new_rep expr
1003             = do codefn <- getAnyReg expr
1004                  return (Any new_rep codefn)
1005                 -- HACK: use getAnyReg to get a byte-addressable register.
1006                 -- If the source was a Fixed register, this will add the
1007                 -- mov instruction to put it into the desired destination.
1008                 -- We're assuming that the destination won't be a fixed
1009                 -- non-byte-addressable register; it won't be, because all
1010                 -- fixed registers are word-sized.
1011
1012         toI16Reg = toI8Reg -- for now
1013
1014         conversionNop new_rep expr
1015             = do e_code <- getRegister expr
1016                  return (swizzleRegisterRep e_code new_rep)
1017
1018
1019 getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
1020   = ASSERT2(cmmExprRep x /= I8, pprExpr e)
1021     case mop of
1022       MO_Eq F32   -> condFltReg EQQ x y
1023       MO_Ne F32   -> condFltReg NE x y
1024       MO_S_Gt F32 -> condFltReg GTT x y
1025       MO_S_Ge F32 -> condFltReg GE x y
1026       MO_S_Lt F32 -> condFltReg LTT x y
1027       MO_S_Le F32 -> condFltReg LE x y
1028
1029       MO_Eq F64   -> condFltReg EQQ x y
1030       MO_Ne F64   -> condFltReg NE x y
1031       MO_S_Gt F64 -> condFltReg GTT x y
1032       MO_S_Ge F64 -> condFltReg GE x y
1033       MO_S_Lt F64 -> condFltReg LTT x y
1034       MO_S_Le F64 -> condFltReg LE x y
1035
1036       MO_Eq rep   -> condIntReg EQQ x y
1037       MO_Ne rep   -> condIntReg NE x y
1038
1039       MO_S_Gt rep -> condIntReg GTT x y
1040       MO_S_Ge rep -> condIntReg GE x y
1041       MO_S_Lt rep -> condIntReg LTT x y
1042       MO_S_Le rep -> condIntReg LE x y
1043
1044       MO_U_Gt rep -> condIntReg GU  x y
1045       MO_U_Ge rep -> condIntReg GEU x y
1046       MO_U_Lt rep -> condIntReg LU  x y
1047       MO_U_Le rep -> condIntReg LEU x y
1048
1049 #if i386_TARGET_ARCH
1050       MO_Add F32 -> trivialFCode F32 GADD x y
1051       MO_Sub F32 -> trivialFCode F32 GSUB x y
1052
1053       MO_Add F64 -> trivialFCode F64 GADD x y
1054       MO_Sub F64 -> trivialFCode F64 GSUB x y
1055
1056       MO_S_Quot F32 -> trivialFCode F32 GDIV x y
1057       MO_S_Quot F64 -> trivialFCode F64 GDIV x y
1058 #endif
1059
1060 #if x86_64_TARGET_ARCH
1061       MO_Add F32 -> trivialFCode F32 ADD x y
1062       MO_Sub F32 -> trivialFCode F32 SUB x y
1063
1064       MO_Add F64 -> trivialFCode F64 ADD x y
1065       MO_Sub F64 -> trivialFCode F64 SUB x y
1066
1067       MO_S_Quot F32 -> trivialFCode F32 FDIV x y
1068       MO_S_Quot F64 -> trivialFCode F64 FDIV x y
1069 #endif
1070
1071       MO_Add rep -> add_code rep x y
1072       MO_Sub rep -> sub_code rep x y
1073
1074       MO_S_Quot rep -> div_code rep True  True  x y
1075       MO_S_Rem  rep -> div_code rep True  False x y
1076       MO_U_Quot rep -> div_code rep False True  x y
1077       MO_U_Rem  rep -> div_code rep False False x y
1078
1079 #if i386_TARGET_ARCH
1080       MO_Mul   F32 -> trivialFCode F32 GMUL x y
1081       MO_Mul   F64 -> trivialFCode F64 GMUL x y
1082 #endif
1083
1084 #if x86_64_TARGET_ARCH
1085       MO_Mul   F32 -> trivialFCode F32 MUL x y
1086       MO_Mul   F64 -> trivialFCode F64 MUL x y
1087 #endif
1088
1089       MO_Mul   rep -> let op = IMUL rep in
1090                       trivialCode rep op (Just op) x y
1091
1092       MO_S_MulMayOflo rep -> imulMayOflo rep x y
1093
1094       MO_And rep -> let op = AND rep in
1095                     trivialCode rep op (Just op) x y
1096       MO_Or  rep -> let op = OR  rep in
1097                     trivialCode rep op (Just op) x y
1098       MO_Xor rep -> let op = XOR rep in
1099                     trivialCode rep op (Just op) x y
1100
1101         {- Shift ops on x86s have constraints on their source, it
1102            either has to be Imm, CL or 1
1103             => trivialCode is not restrictive enough (sigh.)
1104         -}
1105       MO_Shl rep   -> shift_code rep (SHL rep) x y {-False-}
1106       MO_U_Shr rep -> shift_code rep (SHR rep) x y {-False-}
1107       MO_S_Shr rep -> shift_code rep (SAR rep) x y {-False-}
1108
1109       other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
1110   where
1111     --------------------
1112     imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1113     imulMayOflo rep a b = do
1114          (a_reg, a_code) <- getNonClobberedReg a
1115          b_code <- getAnyReg b
1116          let
1117              shift_amt  = case rep of
1118                            I32 -> 31
1119                            I64 -> 63
1120                            _ -> panic "shift_amt"
1121
1122              code = a_code `appOL` b_code eax `appOL`
1123                         toOL [
1124                            IMUL2 rep (OpReg a_reg),   -- result in %edx:%eax
1125                            SAR rep (OpImm (ImmInt shift_amt)) (OpReg eax),
1126                                 -- sign extend lower part
1127                            SUB rep (OpReg edx) (OpReg eax)
1128                                 -- compare against upper
1129                            -- eax==0 if high part == sign extended low part
1130                         ]
1131          -- in
1132          return (Fixed rep eax code)
1133
1134     --------------------
1135     shift_code :: MachRep
1136                -> (Operand -> Operand -> Instr)
1137                -> CmmExpr
1138                -> CmmExpr
1139                -> NatM Register
1140
1141     {- Case1: shift length as immediate -}
1142     shift_code rep instr x y@(CmmLit lit) = do
1143           x_code <- getAnyReg x
1144           let
1145                code dst
1146                   = x_code dst `snocOL`
1147                     instr (OpImm (litToImm lit)) (OpReg dst)
1148           -- in
1149           return (Any rep code)
1150
1151     {- Case2: shift length is complex (non-immediate)
1152       * y must go in %ecx.
1153       * we cannot do y first *and* put its result in %ecx, because
1154         %ecx might be clobbered by x.
1155       * if we do y second, then x cannot be
1156         in a clobbered reg.  Also, we cannot clobber x's reg
1157         with the instruction itself.
1158       * so we can either:
1159         - do y first, put its result in a fresh tmp, then copy it to %ecx later
1160         - do y second and put its result into %ecx.  x gets placed in a fresh
1161           tmp.  This is likely to be better, becuase the reg alloc can
1162           eliminate this reg->reg move here (it won't eliminate the other one,
1163           because the move is into the fixed %ecx).
1164     -}
1165     shift_code rep instr x y{-amount-} = do
1166         x_code <- getAnyReg x
1167         tmp <- getNewRegNat rep
1168         y_code <- getAnyReg y
1169         let
1170            code = x_code tmp `appOL`
1171                   y_code ecx `snocOL`
1172                   instr (OpReg ecx) (OpReg tmp)
1173         -- in
1174         return (Fixed rep tmp code)
1175
1176     --------------------
1177     add_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1178     add_code rep x (CmmLit (CmmInt y _))
1179         | not (is64BitInteger y) = add_int rep x y
1180     add_code rep x y = trivialCode rep (ADD rep) (Just (ADD rep)) x y
1181
1182     --------------------
1183     sub_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1184     sub_code rep x (CmmLit (CmmInt y _))
1185         | not (is64BitInteger (-y)) = add_int rep x (-y)
1186     sub_code rep x y = trivialCode rep (SUB rep) Nothing x y
1187
1188     -- our three-operand add instruction:
1189     add_int rep x y = do
1190         (x_reg, x_code) <- getSomeReg x
1191         let
1192             imm = ImmInt (fromInteger y)
1193             code dst
1194                = x_code `snocOL`
1195                  LEA rep
1196                         (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
1197                         (OpReg dst)
1198         --
1199         return (Any rep code)
1200
1201     ----------------------
1202     div_code rep signed quotient x y = do
1203            (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
1204            x_code <- getAnyReg x
1205            let
1206              widen | signed    = CLTD rep
1207                    | otherwise = XOR rep (OpReg edx) (OpReg edx)
1208
1209              instr | signed    = IDIV
1210                    | otherwise = DIV
1211
1212              code = y_code `appOL`
1213                     x_code eax `appOL`
1214                     toOL [widen, instr rep y_op]
1215
1216              result | quotient  = eax
1217                     | otherwise = edx
1218
1219            -- in
1220            return (Fixed rep result code)
1221
1222
1223 getRegister (CmmLoad mem pk)
1224   | isFloatingRep pk
1225   = do
1226     Amode src mem_code <- getAmode mem
1227     let
1228         code dst = mem_code `snocOL`
1229                    IF_ARCH_i386(GLD pk src dst,
1230                                 MOV pk (OpAddr src) (OpReg dst))
1231     --
1232     return (Any pk code)
1233
1234 #if i386_TARGET_ARCH
1235 getRegister (CmmLoad mem pk)
1236   | pk /= I64
1237   = do
1238     code <- intLoadCode (instr pk) mem
1239     return (Any pk code)
1240   where
1241         instr I8  = MOVZxL pk
1242         instr I16 = MOV I16
1243         instr I32 = MOV I32
1244         -- we always zero-extend 8-bit loads, if we
1245         -- can't think of anything better.  This is because
1246         -- we can't guarantee access to an 8-bit variant of every register
1247         -- (esi and edi don't have 8-bit variants), so to make things
1248         -- simpler we do our 8-bit arithmetic with full 32-bit registers.
1249 #endif
1250
1251 #if x86_64_TARGET_ARCH
1252 -- Simpler memory load code on x86_64
1253 getRegister (CmmLoad mem pk)
1254   = do
1255     code <- intLoadCode (MOV pk) mem
1256     return (Any pk code)
1257 #endif
1258
1259 getRegister (CmmLit (CmmInt 0 rep))
1260   = let
1261         -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
1262         adj_rep = case rep of I64 -> I32; _ -> rep
1263         rep1 = IF_ARCH_i386( rep, adj_rep )
1264         code dst
1265            = unitOL (XOR rep1 (OpReg dst) (OpReg dst))
1266     in
1267         return (Any rep code)
1268
1269 #if x86_64_TARGET_ARCH
1270   -- optimisation for loading small literals on x86_64: take advantage
1271   -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
1272   -- instruction forms are shorter.
1273 getRegister (CmmLit lit)
1274   | I64 <- cmmLitRep lit, not (isBigLit lit)
1275   = let
1276         imm = litToImm lit
1277         code dst = unitOL (MOV I32 (OpImm imm) (OpReg dst))
1278     in
1279         return (Any I64 code)
1280   where
1281    isBigLit (CmmInt i I64) = i < 0 || i > 0xffffffff
1282    isBigLit _ = False
1283         -- note1: not the same as is64BitLit, because that checks for
1284         -- signed literals that fit in 32 bits, but we want unsigned
1285         -- literals here.
1286         -- note2: all labels are small, because we're assuming the
1287         -- small memory model (see gcc docs, -mcmodel=small).
1288 #endif
1289
1290 getRegister (CmmLit lit)
1291   = let
1292         rep = cmmLitRep lit
1293         imm = litToImm lit
1294         code dst = unitOL (MOV rep (OpImm imm) (OpReg dst))
1295     in
1296         return (Any rep code)
1297
1298 getRegister other = pprPanic "getRegister(x86)" (ppr other)
1299
1300
1301 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
1302    -> NatM (Reg -> InstrBlock)
1303 intLoadCode instr mem = do
1304   Amode src mem_code <- getAmode mem
1305   return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
1306
1307 -- Compute an expression into *any* register, adding the appropriate
1308 -- move instruction if necessary.
1309 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
1310 getAnyReg expr = do
1311   r <- getRegister expr
1312   anyReg r
1313
1314 anyReg :: Register -> NatM (Reg -> InstrBlock)
1315 anyReg (Any _ code)          = return code
1316 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
1317
1318 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
1319 -- Fixed registers might not be byte-addressable, so we make sure we've
1320 -- got a temporary, inserting an extra reg copy if necessary.
1321 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
1322 #if x86_64_TARGET_ARCH
1323 getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
1324 #else
1325 getByteReg expr = do
1326   r <- getRegister expr
1327   case r of
1328     Any rep code -> do
1329         tmp <- getNewRegNat rep
1330         return (tmp, code tmp)
1331     Fixed rep reg code
1332         | isVirtualReg reg -> return (reg,code)
1333         | otherwise -> do
1334             tmp <- getNewRegNat rep
1335             return (tmp, code `snocOL` reg2reg rep reg tmp)
1336         -- ToDo: could optimise slightly by checking for byte-addressable
1337         -- real registers, but that will happen very rarely if at all.
1338 #endif
1339
1340 -- Another variant: this time we want the result in a register that cannot
1341 -- be modified by code to evaluate an arbitrary expression.
1342 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1343 getNonClobberedReg expr = do
1344   r <- getRegister expr
1345   case r of
1346     Any rep code -> do
1347         tmp <- getNewRegNat rep
1348         return (tmp, code tmp)
1349     Fixed rep reg code
1350         -- only free regs can be clobbered
1351         | RealReg rr <- reg, isFastTrue (freeReg rr) -> do
1352                 tmp <- getNewRegNat rep
1353                 return (tmp, code `snocOL` reg2reg rep reg tmp)
1354         | otherwise ->
1355                 return (reg, code)
1356
1357 reg2reg :: MachRep -> Reg -> Reg -> Instr
1358 reg2reg rep src dst
1359 #if i386_TARGET_ARCH
1360   | isFloatingRep rep = GMOV src dst
1361 #endif
1362   | otherwise         = MOV rep (OpReg src) (OpReg dst)
1363
1364 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1365
1366 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1367
1368 #if sparc_TARGET_ARCH
1369
1370 getRegister (CmmLit (CmmFloat f F32)) = do
1371     lbl <- getNewLabelNat
1372     let code dst = toOL [
1373             LDATA ReadOnlyData
1374                         [CmmDataLabel lbl,
1375                          CmmStaticLit (CmmFloat f F32)],
1376             SETHI (HI (ImmCLbl lbl)) dst,
1377             LD F32 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1378     return (Any F32 code)
1379
1380 getRegister (CmmLit (CmmFloat d F64)) = do
1381     lbl <- getNewLabelNat
1382     let code dst = toOL [
1383             LDATA ReadOnlyData
1384                         [CmmDataLabel lbl,
1385                          CmmStaticLit (CmmFloat d F64)],
1386             SETHI (HI (ImmCLbl lbl)) dst,
1387             LD F64 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1388     return (Any F64 code)
1389
1390 getRegister (CmmMachOp mop [x]) -- unary MachOps
1391   = case mop of
1392       MO_S_Neg F32     -> trivialUFCode F32 (FNEG F32) x
1393       MO_S_Neg F64     -> trivialUFCode F64 (FNEG F64) x
1394
1395       MO_S_Neg rep     -> trivialUCode rep (SUB False False g0) x
1396       MO_Not rep       -> trivialUCode rep (XNOR False g0) x
1397
1398       MO_U_Conv I32 I8 -> trivialCode I8 (AND False) x (CmmLit (CmmInt 255 I8))
1399
1400       MO_U_Conv F64 F32-> coerceDbl2Flt x
1401       MO_U_Conv F32 F64-> coerceFlt2Dbl x
1402
1403       MO_S_Conv F32 I32-> coerceFP2Int F32 I32 x
1404       MO_S_Conv I32 F32-> coerceInt2FP I32 F32 x
1405       MO_S_Conv F64 I32-> coerceFP2Int F64 I32 x
1406       MO_S_Conv I32 F64-> coerceInt2FP I32 F64 x
1407
1408       -- Conversions which are a nop on sparc
1409       MO_U_Conv from to
1410         | from == to   -> conversionNop to   x
1411       MO_U_Conv I32 to -> conversionNop to   x
1412       MO_S_Conv I32 to -> conversionNop to   x
1413
1414       -- widenings
1415       MO_U_Conv I8 I32  -> integerExtend False I8 I32  x
1416       MO_U_Conv I16 I32 -> integerExtend False I16 I32 x
1417       MO_U_Conv I8 I16  -> integerExtend False I8 I16  x
1418       MO_S_Conv I16 I32 -> integerExtend True I16 I32  x
1419
1420       other_op -> panic "Unknown unary mach op"
1421     where
1422         -- XXX SLL/SRL?
1423         integerExtend signed from to expr = do
1424            (reg, e_code) <- getSomeReg expr
1425            let
1426                code dst =
1427                    e_code `snocOL`
1428                    ((if signed then SRA else SRL)
1429                           reg (RIImm (ImmInt 0)) dst)
1430            return (Any to code)
1431         conversionNop new_rep expr
1432             = do e_code <- getRegister expr
1433                  return (swizzleRegisterRep e_code new_rep)
1434
1435 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1436   = case mop of
1437       MO_Eq F32 -> condFltReg EQQ x y
1438       MO_Ne F32 -> condFltReg NE x y
1439
1440       MO_S_Gt F32 -> condFltReg GTT x y
1441       MO_S_Ge F32 -> condFltReg GE x y
1442       MO_S_Lt F32 -> condFltReg LTT x y
1443       MO_S_Le F32 -> condFltReg LE x y
1444
1445       MO_Eq F64 -> condFltReg EQQ x y
1446       MO_Ne F64 -> condFltReg NE x y
1447
1448       MO_S_Gt F64 -> condFltReg GTT x y
1449       MO_S_Ge F64 -> condFltReg GE x y
1450       MO_S_Lt F64 -> condFltReg LTT x y
1451       MO_S_Le F64 -> condFltReg LE x y
1452
1453       MO_Eq rep -> condIntReg EQQ x y
1454       MO_Ne rep -> condIntReg NE x y
1455
1456       MO_S_Gt rep -> condIntReg GTT x y
1457       MO_S_Ge rep -> condIntReg GE x y
1458       MO_S_Lt rep -> condIntReg LTT x y
1459       MO_S_Le rep -> condIntReg LE x y
1460
1461       MO_U_Gt I32  -> condIntReg GTT x y
1462       MO_U_Ge I32  -> condIntReg GE x y
1463       MO_U_Lt I32  -> condIntReg LTT x y
1464       MO_U_Le I32  -> condIntReg LE x y
1465
1466       MO_U_Gt I16 -> condIntReg GU  x y
1467       MO_U_Ge I16 -> condIntReg GEU x y
1468       MO_U_Lt I16 -> condIntReg LU  x y
1469       MO_U_Le I16 -> condIntReg LEU x y
1470
1471       MO_Add I32 -> trivialCode I32 (ADD False False) x y
1472       MO_Sub I32 -> trivialCode I32 (SUB False False) x y
1473
1474       MO_S_MulMayOflo rep -> imulMayOflo rep x y
1475 {-
1476       -- ToDo: teach about V8+ SPARC div instructions
1477       MO_S_Quot I32 -> idiv FSLIT(".div")  x y
1478       MO_S_Rem I32  -> idiv FSLIT(".rem")  x y
1479       MO_U_Quot I32 -> idiv FSLIT(".udiv")  x y
1480       MO_U_Rem I32  -> idiv FSLIT(".urem")  x y
1481 -}
1482       MO_Add F32  -> trivialFCode F32 FADD  x y
1483       MO_Sub F32   -> trivialFCode F32  FSUB x y
1484       MO_Mul F32   -> trivialFCode F32  FMUL  x y
1485       MO_S_Quot F32   -> trivialFCode F32  FDIV x y
1486
1487       MO_Add F64   -> trivialFCode F64 FADD  x y
1488       MO_Sub F64   -> trivialFCode F64  FSUB x y
1489       MO_Mul F64   -> trivialFCode F64  FMUL x y
1490       MO_S_Quot F64   -> trivialFCode F64  FDIV x y
1491
1492       MO_And rep   -> trivialCode rep (AND False) x y
1493       MO_Or rep    -> trivialCode rep (OR  False) x y
1494       MO_Xor rep   -> trivialCode rep (XOR False) x y
1495
1496       MO_Mul rep -> trivialCode rep (SMUL False) x y
1497
1498       MO_Shl rep   -> trivialCode rep SLL  x y
1499       MO_U_Shr rep   -> trivialCode rep SRL x y
1500       MO_S_Shr rep   -> trivialCode rep SRA x y
1501
1502 {-
1503       MO_F32_Pwr  -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1504                                          [promote x, promote y])
1505                        where promote x = CmmMachOp MO_F32_to_Dbl [x]
1506       MO_F64_Pwr -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1507                                         [x, y])
1508 -}
1509       other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
1510   where
1511     --idiv fn x y = getRegister (StCall (Left fn) CCallConv I32 [x, y])
1512
1513     --------------------
1514     imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1515     imulMayOflo rep a b = do
1516          (a_reg, a_code) <- getSomeReg a
1517          (b_reg, b_code) <- getSomeReg b
1518          res_lo <- getNewRegNat I32
1519          res_hi <- getNewRegNat I32
1520          let
1521             shift_amt  = case rep of
1522                           I32 -> 31
1523                           I64 -> 63
1524                           _ -> panic "shift_amt"
1525             code dst = a_code `appOL` b_code `appOL`
1526                        toOL [
1527                            SMUL False a_reg (RIReg b_reg) res_lo,
1528                            RDY res_hi,
1529                            SRA res_lo (RIImm (ImmInt shift_amt)) res_lo,
1530                            SUB False False res_lo (RIReg res_hi) dst
1531                         ]
1532          return (Any I32 code)
1533
1534 getRegister (CmmLoad mem pk) = do
1535     Amode src code <- getAmode mem
1536     let
1537         code__2 dst = code `snocOL` LD pk src dst
1538     return (Any pk code__2)
1539
1540 getRegister (CmmLit (CmmInt i _))
1541   | fits13Bits i
1542   = let
1543         src = ImmInt (fromInteger i)
1544         code dst = unitOL (OR False g0 (RIImm src) dst)
1545     in
1546         return (Any I32 code)
1547
1548 getRegister (CmmLit lit)
1549   = let rep = cmmLitRep lit
1550         imm = litToImm lit
1551         code dst = toOL [
1552             SETHI (HI imm) dst,
1553             OR False dst (RIImm (LO imm)) dst]
1554     in return (Any I32 code)
1555
1556 #endif /* sparc_TARGET_ARCH */
1557
1558 #if powerpc_TARGET_ARCH
1559 getRegister (CmmLoad mem pk)
1560   | pk /= I64
1561   = do
1562         Amode addr addr_code <- getAmode mem
1563         let code dst = ASSERT((regClass dst == RcDouble) == isFloatingRep pk)
1564                        addr_code `snocOL` LD pk dst addr
1565         return (Any pk code)
1566
1567 -- catch simple cases of zero- or sign-extended load
1568 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad mem _]) = do
1569     Amode addr addr_code <- getAmode mem
1570     return (Any I32 (\dst -> addr_code `snocOL` LD I8 dst addr))
1571
1572 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
1573
1574 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad mem _]) = do
1575     Amode addr addr_code <- getAmode mem
1576     return (Any I32 (\dst -> addr_code `snocOL` LD I16 dst addr))
1577
1578 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad mem _]) = do
1579     Amode addr addr_code <- getAmode mem
1580     return (Any I32 (\dst -> addr_code `snocOL` LA I16 dst addr))
1581
1582 getRegister (CmmMachOp mop [x]) -- unary MachOps
1583   = case mop of
1584       MO_Not rep   -> trivialUCode rep NOT x
1585
1586       MO_S_Conv F64 F32 -> trivialUCode F32 FRSP x
1587       MO_S_Conv F32 F64 -> conversionNop F64 x
1588
1589       MO_S_Conv from to
1590         | from == to         -> conversionNop to x
1591         | isFloatingRep from -> coerceFP2Int from to x
1592         | isFloatingRep to   -> coerceInt2FP from to x
1593
1594         -- narrowing is a nop: we treat the high bits as undefined
1595       MO_S_Conv I32 to -> conversionNop to x
1596       MO_S_Conv I16 I8 -> conversionNop I8 x
1597       MO_S_Conv I8 to -> trivialUCode to (EXTS I8) x
1598       MO_S_Conv I16 to -> trivialUCode to (EXTS I16) x
1599
1600       MO_U_Conv from to
1601         | from == to -> conversionNop to x
1602         -- narrowing is a nop: we treat the high bits as undefined
1603       MO_U_Conv I32 to -> conversionNop to x
1604       MO_U_Conv I16 I8 -> conversionNop I8 x
1605       MO_U_Conv I8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 I32))
1606       MO_U_Conv I16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 I32))
1607
1608       MO_S_Neg F32      -> trivialUCode F32 FNEG x
1609       MO_S_Neg F64      -> trivialUCode F64 FNEG x
1610       MO_S_Neg rep      -> trivialUCode rep NEG x
1611
1612     where
1613         conversionNop new_rep expr
1614             = do e_code <- getRegister expr
1615                  return (swizzleRegisterRep e_code new_rep)
1616
1617 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1618   = case mop of
1619       MO_Eq F32 -> condFltReg EQQ x y
1620       MO_Ne F32 -> condFltReg NE  x y
1621
1622       MO_S_Gt F32 -> condFltReg GTT x y
1623       MO_S_Ge F32 -> condFltReg GE  x y
1624       MO_S_Lt F32 -> condFltReg LTT x y
1625       MO_S_Le F32 -> condFltReg LE  x y
1626
1627       MO_Eq F64 -> condFltReg EQQ x y
1628       MO_Ne F64 -> condFltReg NE  x y
1629
1630       MO_S_Gt F64 -> condFltReg GTT x y
1631       MO_S_Ge F64 -> condFltReg GE  x y
1632       MO_S_Lt F64 -> condFltReg LTT x y
1633       MO_S_Le F64 -> condFltReg LE  x y
1634
1635       MO_Eq rep -> condIntReg EQQ  (extendUExpr rep x) (extendUExpr rep y)
1636       MO_Ne rep -> condIntReg NE   (extendUExpr rep x) (extendUExpr rep y)
1637
1638       MO_S_Gt rep -> condIntReg GTT  (extendSExpr rep x) (extendSExpr rep y)
1639       MO_S_Ge rep -> condIntReg GE   (extendSExpr rep x) (extendSExpr rep y)
1640       MO_S_Lt rep -> condIntReg LTT  (extendSExpr rep x) (extendSExpr rep y)
1641       MO_S_Le rep -> condIntReg LE   (extendSExpr rep x) (extendSExpr rep y)
1642
1643       MO_U_Gt rep -> condIntReg GU   (extendUExpr rep x) (extendUExpr rep y)
1644       MO_U_Ge rep -> condIntReg GEU  (extendUExpr rep x) (extendUExpr rep y)
1645       MO_U_Lt rep -> condIntReg LU   (extendUExpr rep x) (extendUExpr rep y)
1646       MO_U_Le rep -> condIntReg LEU  (extendUExpr rep x) (extendUExpr rep y)
1647
1648       MO_Add F32   -> trivialCodeNoImm F32 (FADD F32) x y
1649       MO_Sub F32   -> trivialCodeNoImm F32 (FSUB F32) x y
1650       MO_Mul F32   -> trivialCodeNoImm F32 (FMUL F32) x y
1651       MO_S_Quot F32   -> trivialCodeNoImm F32 (FDIV F32) x y
1652
1653       MO_Add F64   -> trivialCodeNoImm F64 (FADD F64) x y
1654       MO_Sub F64   -> trivialCodeNoImm F64 (FSUB F64) x y
1655       MO_Mul F64   -> trivialCodeNoImm F64 (FMUL F64) x y
1656       MO_S_Quot F64   -> trivialCodeNoImm F64 (FDIV F64) x y
1657
1658          -- optimize addition with 32-bit immediate
1659          -- (needed for PIC)
1660       MO_Add I32 ->
1661         case y of
1662           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate I32 True (-imm)
1663             -> trivialCode I32 True ADD x (CmmLit $ CmmInt imm immrep)
1664           CmmLit lit
1665             -> do
1666                 (src, srcCode) <- getSomeReg x
1667                 let imm = litToImm lit
1668                     code dst = srcCode `appOL` toOL [
1669                                     ADDIS dst src (HA imm),
1670                                     ADD dst dst (RIImm (LO imm))
1671                                 ]
1672                 return (Any I32 code)
1673           _ -> trivialCode I32 True ADD x y
1674
1675       MO_Add rep -> trivialCode rep True ADD x y
1676       MO_Sub rep ->
1677         case y of    -- subfi ('substract from' with immediate) doesn't exist
1678           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
1679             -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
1680           _ -> trivialCodeNoImm rep SUBF y x
1681
1682       MO_Mul rep -> trivialCode rep True MULLW x y
1683
1684       MO_S_MulMayOflo I32 -> trivialCodeNoImm I32 MULLW_MayOflo x y
1685
1686       MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= I32): not implemented"
1687       MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
1688
1689       MO_S_Quot rep -> trivialCodeNoImm rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1690       MO_U_Quot rep -> trivialCodeNoImm rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1691
1692       MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1693       MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1694
1695       MO_And rep   -> trivialCode rep False AND x y
1696       MO_Or rep    -> trivialCode rep False OR x y
1697       MO_Xor rep   -> trivialCode rep False XOR x y
1698
1699       MO_Shl rep   -> trivialCode rep False SLW x y
1700       MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
1701       MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
1702
1703 getRegister (CmmLit (CmmInt i rep))
1704   | Just imm <- makeImmediate rep True i
1705   = let
1706         code dst = unitOL (LI dst imm)
1707     in
1708         return (Any rep code)
1709
1710 getRegister (CmmLit (CmmFloat f frep)) = do
1711     lbl <- getNewLabelNat
1712     dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
1713     Amode addr addr_code <- getAmode dynRef
1714     let code dst =
1715             LDATA ReadOnlyData  [CmmDataLabel lbl,
1716                                  CmmStaticLit (CmmFloat f frep)]
1717             `consOL` (addr_code `snocOL` LD frep dst addr)
1718     return (Any frep code)
1719
1720 getRegister (CmmLit lit)
1721   = let rep = cmmLitRep lit
1722         imm = litToImm lit
1723         code dst = toOL [
1724               LIS dst (HI imm),
1725               OR dst dst (RIImm (LO imm))
1726           ]
1727     in return (Any rep code)
1728
1729 getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
1730
1731     -- extend?Rep: wrap integer expression of type rep
1732     -- in a conversion to I32
1733 extendSExpr I32 x = x
1734 extendSExpr rep x = CmmMachOp (MO_S_Conv rep I32) [x]
1735 extendUExpr I32 x = x
1736 extendUExpr rep x = CmmMachOp (MO_U_Conv rep I32) [x]
1737
1738 #endif /* powerpc_TARGET_ARCH */
1739
1740
1741 -- -----------------------------------------------------------------------------
1742 --  The 'Amode' type: Memory addressing modes passed up the tree.
1743
1744 data Amode = Amode AddrMode InstrBlock
1745
1746 {-
1747 Now, given a tree (the argument to an CmmLoad) that references memory,
1748 produce a suitable addressing mode.
1749
1750 A Rule of the Game (tm) for Amodes: use of the addr bit must
1751 immediately follow use of the code part, since the code part puts
1752 values in registers which the addr then refers to.  So you can't put
1753 anything in between, lest it overwrite some of those registers.  If
1754 you need to do some other computation between the code part and use of
1755 the addr bit, first store the effective address from the amode in a
1756 temporary, then do the other computation, and then use the temporary:
1757
1758     code
1759     LEA amode, tmp
1760     ... other computation ...
1761     ... (tmp) ...
1762 -}
1763
1764 getAmode :: CmmExpr -> NatM Amode
1765 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
1766
1767 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1768
1769 #if alpha_TARGET_ARCH
1770
1771 getAmode (StPrim IntSubOp [x, StInt i])
1772   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1773     getRegister x               `thenNat` \ register ->
1774     let
1775         code = registerCode register tmp
1776         reg  = registerName register tmp
1777         off  = ImmInt (-(fromInteger i))
1778     in
1779     return (Amode (AddrRegImm reg off) code)
1780
1781 getAmode (StPrim IntAddOp [x, StInt i])
1782   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1783     getRegister x               `thenNat` \ register ->
1784     let
1785         code = registerCode register tmp
1786         reg  = registerName register tmp
1787         off  = ImmInt (fromInteger i)
1788     in
1789     return (Amode (AddrRegImm reg off) code)
1790
1791 getAmode leaf
1792   | isJust imm
1793   = return (Amode (AddrImm imm__2) id)
1794   where
1795     imm = maybeImm leaf
1796     imm__2 = case imm of Just x -> x
1797
1798 getAmode other
1799   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1800     getRegister other           `thenNat` \ register ->
1801     let
1802         code = registerCode register tmp
1803         reg  = registerName register tmp
1804     in
1805     return (Amode (AddrReg reg) code)
1806
1807 #endif /* alpha_TARGET_ARCH */
1808
1809 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1810
1811 #if x86_64_TARGET_ARCH
1812
1813 getAmode (CmmMachOp (MO_Add I64) [CmmReg (CmmGlobal PicBaseReg),
1814                                      CmmLit displacement])
1815     = return $ Amode (ripRel (litToImm displacement)) nilOL
1816
1817 #endif
1818
1819 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1820
1821 -- This is all just ridiculous, since it carefully undoes
1822 -- what mangleIndexTree has just done.
1823 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
1824   | not (is64BitLit lit)
1825   -- ASSERT(rep == I32)???
1826   = do (x_reg, x_code) <- getSomeReg x
1827        let off = ImmInt (-(fromInteger i))
1828        return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1829
1830 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
1831   | not (is64BitLit lit)
1832   -- ASSERT(rep == I32)???
1833   = do (x_reg, x_code) <- getSomeReg x
1834        let off = ImmInt (fromInteger i)
1835        return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1836
1837 -- Turn (lit1 << n  + lit2) into  (lit2 + lit1 << n) so it will be
1838 -- recognised by the next rule.
1839 getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1840                                   b@(CmmLit _)])
1841   = getAmode (CmmMachOp (MO_Add rep) [b,a])
1842
1843 getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
1844                                         [y, CmmLit (CmmInt shift _)]])
1845   | shift == 0 || shift == 1 || shift == 2 || shift == 3
1846   = x86_complex_amode x y shift 0
1847
1848 getAmode (CmmMachOp (MO_Add rep)
1849                 [x, CmmMachOp (MO_Add _)
1850                         [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)],
1851                          CmmLit (CmmInt offset _)]])
1852   | shift == 0 || shift == 1 || shift == 2 || shift == 3
1853   && not (is64BitInteger offset)
1854   = x86_complex_amode x y shift offset
1855
1856 getAmode (CmmMachOp (MO_Add rep) [x,y])
1857   = x86_complex_amode x y 0 0
1858
1859 getAmode (CmmLit lit) | not (is64BitLit lit)
1860   = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1861
1862 getAmode expr = do
1863   (reg,code) <- getSomeReg expr
1864   return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
1865
1866
1867 x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
1868 x86_complex_amode base index shift offset
1869   = do (x_reg, x_code) <- getNonClobberedReg base
1870         -- x must be in a temp, because it has to stay live over y_code
1871         -- we could compre x_reg and y_reg and do something better here...
1872        (y_reg, y_code) <- getSomeReg index
1873        let
1874            code = x_code `appOL` y_code
1875            base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
1876        return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset)))
1877                code)
1878
1879 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1880
1881 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1882
1883 #if sparc_TARGET_ARCH
1884
1885 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)])
1886   | fits13Bits (-i)
1887   = do
1888        (reg, code) <- getSomeReg x
1889        let
1890          off  = ImmInt (-(fromInteger i))
1891        return (Amode (AddrRegImm reg off) code)
1892
1893
1894 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)])
1895   | fits13Bits i
1896   = do
1897        (reg, code) <- getSomeReg x
1898        let
1899          off  = ImmInt (fromInteger i)
1900        return (Amode (AddrRegImm reg off) code)
1901
1902 getAmode (CmmMachOp (MO_Add rep) [x, y])
1903   = do
1904     (regX, codeX) <- getSomeReg x
1905     (regY, codeY) <- getSomeReg y
1906     let
1907         code = codeX `appOL` codeY
1908     return (Amode (AddrRegReg regX regY) code)
1909
1910 -- XXX Is this same as "leaf" in Stix?
1911 getAmode (CmmLit lit)
1912   = do
1913       tmp <- getNewRegNat I32
1914       let
1915         code = unitOL (SETHI (HI imm__2) tmp)
1916       return (Amode (AddrRegImm tmp (LO imm__2)) code)
1917       where
1918          imm__2 = litToImm lit
1919
1920 getAmode other
1921   = do
1922        (reg, code) <- getSomeReg other
1923        let
1924             off  = ImmInt 0
1925        return (Amode (AddrRegImm reg off) code)
1926
1927 #endif /* sparc_TARGET_ARCH */
1928
1929 #ifdef powerpc_TARGET_ARCH
1930 getAmode (CmmMachOp (MO_Sub I32) [x, CmmLit (CmmInt i _)])
1931   | Just off <- makeImmediate I32 True (-i)
1932   = do
1933         (reg, code) <- getSomeReg x
1934         return (Amode (AddrRegImm reg off) code)
1935
1936
1937 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit (CmmInt i _)])
1938   | Just off <- makeImmediate I32 True i
1939   = do
1940         (reg, code) <- getSomeReg x
1941         return (Amode (AddrRegImm reg off) code)
1942
1943    -- optimize addition with 32-bit immediate
1944    -- (needed for PIC)
1945 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit lit])
1946   = do
1947         tmp <- getNewRegNat I32
1948         (src, srcCode) <- getSomeReg x
1949         let imm = litToImm lit
1950             code = srcCode `snocOL` ADDIS tmp src (HA imm)
1951         return (Amode (AddrRegImm tmp (LO imm)) code)
1952
1953 getAmode (CmmLit lit)
1954   = do
1955         tmp <- getNewRegNat I32
1956         let imm = litToImm lit
1957             code = unitOL (LIS tmp (HA imm))
1958         return (Amode (AddrRegImm tmp (LO imm)) code)
1959
1960 getAmode (CmmMachOp (MO_Add I32) [x, y])
1961   = do
1962         (regX, codeX) <- getSomeReg x
1963         (regY, codeY) <- getSomeReg y
1964         return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
1965
1966 getAmode other
1967   = do
1968         (reg, code) <- getSomeReg other
1969         let
1970             off  = ImmInt 0
1971         return (Amode (AddrRegImm reg off) code)
1972 #endif /* powerpc_TARGET_ARCH */
1973
1974 -- -----------------------------------------------------------------------------
1975 -- getOperand: sometimes any operand will do.
1976
1977 -- getNonClobberedOperand: the value of the operand will remain valid across
1978 -- the computation of an arbitrary expression, unless the expression
1979 -- is computed directly into a register which the operand refers to
1980 -- (see trivialCode where this function is used for an example).
1981
1982 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1983
1984 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1985 #if x86_64_TARGET_ARCH
1986 getNonClobberedOperand (CmmLit lit)
1987   | isSuitableFloatingPointLit lit = do
1988     lbl <- getNewLabelNat
1989     let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
1990                                            CmmStaticLit lit])
1991     return (OpAddr (ripRel (ImmCLbl lbl)), code)
1992 #endif
1993 getNonClobberedOperand (CmmLit lit)
1994   | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) =
1995     return (OpImm (litToImm lit), nilOL)
1996 getNonClobberedOperand (CmmLoad mem pk)
1997   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
1998     Amode src mem_code <- getAmode mem
1999     (src',save_code) <-
2000         if (amodeCouldBeClobbered src)
2001                 then do
2002                    tmp <- getNewRegNat wordRep
2003                    return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
2004                            unitOL (LEA I32 (OpAddr src) (OpReg tmp)))
2005                 else
2006                    return (src, nilOL)
2007     return (OpAddr src', save_code `appOL` mem_code)
2008 getNonClobberedOperand e = do
2009     (reg, code) <- getNonClobberedReg e
2010     return (OpReg reg, code)
2011
2012 amodeCouldBeClobbered :: AddrMode -> Bool
2013 amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
2014
2015 regClobbered (RealReg rr) = isFastTrue (freeReg rr)
2016 regClobbered _ = False
2017
2018 -- getOperand: the operand is not required to remain valid across the
2019 -- computation of an arbitrary expression.
2020 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2021 #if x86_64_TARGET_ARCH
2022 getOperand (CmmLit lit)
2023   | isSuitableFloatingPointLit lit = do
2024     lbl <- getNewLabelNat
2025     let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
2026                                            CmmStaticLit lit])
2027     return (OpAddr (ripRel (ImmCLbl lbl)), code)
2028 #endif
2029 getOperand (CmmLit lit)
2030   | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) = do
2031     return (OpImm (litToImm lit), nilOL)
2032 getOperand (CmmLoad mem pk)
2033   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2034     Amode src mem_code <- getAmode mem
2035     return (OpAddr src, mem_code)
2036 getOperand e = do
2037     (reg, code) <- getSomeReg e
2038     return (OpReg reg, code)
2039
2040 isOperand :: CmmExpr -> Bool
2041 isOperand (CmmLoad _ _) = True
2042 isOperand (CmmLit lit)  = not (is64BitLit lit)
2043                           || isSuitableFloatingPointLit lit
2044 isOperand _             = False
2045
2046 -- if we want a floating-point literal as an operand, we can
2047 -- use it directly from memory.  However, if the literal is
2048 -- zero, we're better off generating it into a register using
2049 -- xor.
2050 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
2051 isSuitableFloatingPointLit _ = False
2052
2053 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
2054 getRegOrMem (CmmLoad mem pk)
2055   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2056     Amode src mem_code <- getAmode mem
2057     return (OpAddr src, mem_code)
2058 getRegOrMem e = do
2059     (reg, code) <- getNonClobberedReg e
2060     return (OpReg reg, code)
2061
2062 #if x86_64_TARGET_ARCH
2063 is64BitLit (CmmInt i I64) = is64BitInteger i
2064    -- assume that labels are in the range 0-2^31-1: this assumes the
2065    -- small memory model (see gcc docs, -mcmodel=small).
2066 #endif
2067 is64BitLit x = False
2068 #endif
2069
2070 is64BitInteger :: Integer -> Bool
2071 is64BitInteger i = i64 > 0x7fffffff || i64 < -0x80000000
2072   where i64 = fromIntegral i :: Int64
2073   -- a CmmInt is intended to be truncated to the appropriate
2074   -- number of bits, so here we truncate it to Int64.  This is
2075   -- important because e.g. -1 as a CmmInt might be either
2076   -- -1 or 18446744073709551615.
2077
2078 -- -----------------------------------------------------------------------------
2079 --  The 'CondCode' type:  Condition codes passed up the tree.
2080
2081 data CondCode = CondCode Bool Cond InstrBlock
2082
2083 -- Set up a condition code for a conditional branch.
2084
2085 getCondCode :: CmmExpr -> NatM CondCode
2086
2087 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2088
2089 #if alpha_TARGET_ARCH
2090 getCondCode = panic "MachCode.getCondCode: not on Alphas"
2091 #endif /* alpha_TARGET_ARCH */
2092
2093 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2094
2095 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
2096 -- yes, they really do seem to want exactly the same!
2097
2098 getCondCode (CmmMachOp mop [x, y])
2099   =
2100     case mop of
2101       MO_Eq F32 -> condFltCode EQQ x y
2102       MO_Ne F32 -> condFltCode NE  x y
2103
2104       MO_S_Gt F32 -> condFltCode GTT x y
2105       MO_S_Ge F32 -> condFltCode GE  x y
2106       MO_S_Lt F32 -> condFltCode LTT x y
2107       MO_S_Le F32 -> condFltCode LE  x y
2108
2109       MO_Eq F64 -> condFltCode EQQ x y
2110       MO_Ne F64 -> condFltCode NE  x y
2111
2112       MO_S_Gt F64 -> condFltCode GTT x y
2113       MO_S_Ge F64 -> condFltCode GE  x y
2114       MO_S_Lt F64 -> condFltCode LTT x y
2115       MO_S_Le F64 -> condFltCode LE  x y
2116
2117       MO_Eq rep -> condIntCode EQQ  x y
2118       MO_Ne rep -> condIntCode NE   x y
2119
2120       MO_S_Gt rep -> condIntCode GTT  x y
2121       MO_S_Ge rep -> condIntCode GE   x y
2122       MO_S_Lt rep -> condIntCode LTT  x y
2123       MO_S_Le rep -> condIntCode LE   x y
2124
2125       MO_U_Gt rep -> condIntCode GU   x y
2126       MO_U_Ge rep -> condIntCode GEU  x y
2127       MO_U_Lt rep -> condIntCode LU   x y
2128       MO_U_Le rep -> condIntCode LEU  x y
2129
2130       other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y]))
2131
2132 getCondCode other =  pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
2133
2134 #elif powerpc_TARGET_ARCH
2135
2136 -- almost the same as everywhere else - but we need to
2137 -- extend small integers to 32 bit first
2138
2139 getCondCode (CmmMachOp mop [x, y])
2140   = case mop of
2141       MO_Eq F32 -> condFltCode EQQ x y
2142       MO_Ne F32 -> condFltCode NE  x y
2143
2144       MO_S_Gt F32 -> condFltCode GTT x y
2145       MO_S_Ge F32 -> condFltCode GE  x y
2146       MO_S_Lt F32 -> condFltCode LTT x y
2147       MO_S_Le F32 -> condFltCode LE  x y
2148
2149       MO_Eq F64 -> condFltCode EQQ x y
2150       MO_Ne F64 -> condFltCode NE  x y
2151
2152       MO_S_Gt F64 -> condFltCode GTT x y
2153       MO_S_Ge F64 -> condFltCode GE  x y
2154       MO_S_Lt F64 -> condFltCode LTT x y
2155       MO_S_Le F64 -> condFltCode LE  x y
2156
2157       MO_Eq rep -> condIntCode EQQ  (extendUExpr rep x) (extendUExpr rep y)
2158       MO_Ne rep -> condIntCode NE   (extendUExpr rep x) (extendUExpr rep y)
2159
2160       MO_S_Gt rep -> condIntCode GTT  (extendSExpr rep x) (extendSExpr rep y)
2161       MO_S_Ge rep -> condIntCode GE   (extendSExpr rep x) (extendSExpr rep y)
2162       MO_S_Lt rep -> condIntCode LTT  (extendSExpr rep x) (extendSExpr rep y)
2163       MO_S_Le rep -> condIntCode LE   (extendSExpr rep x) (extendSExpr rep y)
2164
2165       MO_U_Gt rep -> condIntCode GU   (extendUExpr rep x) (extendUExpr rep y)
2166       MO_U_Ge rep -> condIntCode GEU  (extendUExpr rep x) (extendUExpr rep y)
2167       MO_U_Lt rep -> condIntCode LU   (extendUExpr rep x) (extendUExpr rep y)
2168       MO_U_Le rep -> condIntCode LEU  (extendUExpr rep x) (extendUExpr rep y)
2169
2170       other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
2171
2172 getCondCode other =  panic "getCondCode(2)(powerpc)"
2173
2174
2175 #endif
2176
2177
2178 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
2179 -- passed back up the tree.
2180
2181 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
2182
2183 #if alpha_TARGET_ARCH
2184 condIntCode = panic "MachCode.condIntCode: not on Alphas"
2185 condFltCode = panic "MachCode.condFltCode: not on Alphas"
2186 #endif /* alpha_TARGET_ARCH */
2187
2188 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2189 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2190
2191 -- memory vs immediate
2192 condIntCode cond (CmmLoad x pk) (CmmLit lit) | not (is64BitLit lit) = do
2193     Amode x_addr x_code <- getAmode x
2194     let
2195         imm  = litToImm lit
2196         code = x_code `snocOL`
2197                   CMP pk (OpImm imm) (OpAddr x_addr)
2198     --
2199     return (CondCode False cond code)
2200
2201 -- anything vs zero
2202 condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
2203     (x_reg, x_code) <- getSomeReg x
2204     let
2205         code = x_code `snocOL`
2206                   TEST pk (OpReg x_reg) (OpReg x_reg)
2207     --
2208     return (CondCode False cond code)
2209
2210 -- anything vs operand
2211 condIntCode cond x y | isOperand y = do
2212     (x_reg, x_code) <- getNonClobberedReg x
2213     (y_op,  y_code) <- getOperand y
2214     let
2215         code = x_code `appOL` y_code `snocOL`
2216                   CMP (cmmExprRep x) y_op (OpReg x_reg)
2217     -- in
2218     return (CondCode False cond code)
2219
2220 -- anything vs anything
2221 condIntCode cond x y = do
2222   (y_reg, y_code) <- getNonClobberedReg y
2223   (x_op, x_code) <- getRegOrMem x
2224   let
2225         code = y_code `appOL`
2226                x_code `snocOL`
2227                   CMP (cmmExprRep x) (OpReg y_reg) x_op
2228   -- in
2229   return (CondCode False cond code)
2230 #endif
2231
2232 #if i386_TARGET_ARCH
2233 condFltCode cond x y
2234   = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
2235   (x_reg, x_code) <- getNonClobberedReg x
2236   (y_reg, y_code) <- getSomeReg y
2237   let
2238         code = x_code `appOL` y_code `snocOL`
2239                 GCMP cond x_reg y_reg
2240   -- The GCMP insn does the test and sets the zero flag if comparable
2241   -- and true.  Hence we always supply EQQ as the condition to test.
2242   return (CondCode True EQQ code)
2243 #endif /* i386_TARGET_ARCH */
2244
2245 #if x86_64_TARGET_ARCH
2246 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
2247 -- an operand, but the right must be a reg.  We can probably do better
2248 -- than this general case...
2249 condFltCode cond x y = do
2250   (x_reg, x_code) <- getNonClobberedReg x
2251   (y_op, y_code) <- getOperand y
2252   let
2253         code = x_code `appOL`
2254                y_code `snocOL`
2255                   CMP (cmmExprRep x) y_op (OpReg x_reg)
2256         -- NB(1): we need to use the unsigned comparison operators on the
2257         -- result of this comparison.
2258   -- in
2259   return (CondCode True (condToUnsigned cond) code)
2260 #endif
2261
2262 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2263
2264 #if sparc_TARGET_ARCH
2265
2266 condIntCode cond x (CmmLit (CmmInt y rep))
2267   | fits13Bits y
2268   = do
2269        (src1, code) <- getSomeReg x
2270        let
2271            src2 = ImmInt (fromInteger y)
2272            code' = code `snocOL` SUB False True src1 (RIImm src2) g0
2273        return (CondCode False cond code')
2274
2275 condIntCode cond x y = do
2276     (src1, code1) <- getSomeReg x
2277     (src2, code2) <- getSomeReg y
2278     let
2279         code__2 = code1 `appOL` code2 `snocOL`
2280                   SUB False True src1 (RIReg src2) g0
2281     return (CondCode False cond code__2)
2282
2283 -----------
2284 condFltCode cond x y = do
2285     (src1, code1) <- getSomeReg x
2286     (src2, code2) <- getSomeReg y
2287     tmp <- getNewRegNat F64
2288     let
2289         promote x = FxTOy F32 F64 x tmp
2290
2291         pk1   = cmmExprRep x
2292         pk2   = cmmExprRep y
2293
2294         code__2 =
2295                 if pk1 == pk2 then
2296                     code1 `appOL` code2 `snocOL`
2297                     FCMP True pk1 src1 src2
2298                 else if pk1 == F32 then
2299                     code1 `snocOL` promote src1 `appOL` code2 `snocOL`
2300                     FCMP True F64 tmp src2
2301                 else
2302                     code1 `appOL` code2 `snocOL` promote src2 `snocOL`
2303                     FCMP True F64 src1 tmp
2304     return (CondCode True cond code__2)
2305
2306 #endif /* sparc_TARGET_ARCH */
2307
2308 #if powerpc_TARGET_ARCH
2309 --  ###FIXME: I16 and I8!
2310 condIntCode cond x (CmmLit (CmmInt y rep))
2311   | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
2312   = do
2313         (src1, code) <- getSomeReg x
2314         let
2315             code' = code `snocOL`
2316                 (if condUnsigned cond then CMPL else CMP) I32 src1 (RIImm src2)
2317         return (CondCode False cond code')
2318
2319 condIntCode cond x y = do
2320     (src1, code1) <- getSomeReg x
2321     (src2, code2) <- getSomeReg y
2322     let
2323         code' = code1 `appOL` code2 `snocOL`
2324                   (if condUnsigned cond then CMPL else CMP) I32 src1 (RIReg src2)
2325     return (CondCode False cond code')
2326
2327 condFltCode cond x y = do
2328     (src1, code1) <- getSomeReg x
2329     (src2, code2) <- getSomeReg y
2330     let
2331         code'  = code1 `appOL` code2 `snocOL` FCMP src1 src2
2332         code'' = case cond of -- twiddle CR to handle unordered case
2333                     GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
2334                     LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
2335                     _ -> code'
2336                  where
2337                     ltbit = 0 ; eqbit = 2 ; gtbit = 1
2338     return (CondCode True cond code'')
2339
2340 #endif /* powerpc_TARGET_ARCH */
2341
2342 -- -----------------------------------------------------------------------------
2343 -- Generating assignments
2344
2345 -- Assignments are really at the heart of the whole code generation
2346 -- business.  Almost all top-level nodes of any real importance are
2347 -- assignments, which correspond to loads, stores, or register
2348 -- transfers.  If we're really lucky, some of the register transfers
2349 -- will go away, because we can use the destination register to
2350 -- complete the code generation for the right hand side.  This only
2351 -- fails when the right hand side is forced into a fixed register
2352 -- (e.g. the result of a call).
2353
2354 assignMem_IntCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2355 assignReg_IntCode :: MachRep -> CmmReg  -> CmmExpr -> NatM InstrBlock
2356
2357 assignMem_FltCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2358 assignReg_FltCode :: MachRep -> CmmReg  -> CmmExpr -> NatM InstrBlock
2359
2360 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2361
2362 #if alpha_TARGET_ARCH
2363
2364 assignIntCode pk (CmmLoad dst _) src
2365   = getNewRegNat IntRep             `thenNat` \ tmp ->
2366     getAmode dst                    `thenNat` \ amode ->
2367     getRegister src                 `thenNat` \ register ->
2368     let
2369         code1   = amodeCode amode []
2370         dst__2  = amodeAddr amode
2371         code2   = registerCode register tmp []
2372         src__2  = registerName register tmp
2373         sz      = primRepToSize pk
2374         code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2375     in
2376     return code__2
2377
2378 assignIntCode pk dst src
2379   = getRegister dst                         `thenNat` \ register1 ->
2380     getRegister src                         `thenNat` \ register2 ->
2381     let
2382         dst__2  = registerName register1 zeroh
2383         code    = registerCode register2 dst__2
2384         src__2  = registerName register2 dst__2
2385         code__2 = if isFixed register2
2386                   then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2)
2387                   else code
2388     in
2389     return code__2
2390
2391 #endif /* alpha_TARGET_ARCH */
2392
2393 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2394
2395 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2396
2397 -- integer assignment to memory
2398
2399 -- specific case of adding/subtracting an integer to a particular address.
2400 -- ToDo: catch other cases where we can use an operation directly on a memory
2401 -- address.
2402 assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _,
2403                                                  CmmLit (CmmInt i _)])
2404    | addr == addr2, pk /= I64 || not (is64BitInteger i),
2405      Just instr <- check op
2406    = do Amode amode code_addr <- getAmode addr
2407         let code = code_addr `snocOL`
2408                    instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode)
2409         return code
2410    where
2411         check (MO_Add _) = Just ADD
2412         check (MO_Sub _) = Just SUB
2413         check _ = Nothing
2414         -- ToDo: more?
2415
2416 -- general case
2417 assignMem_IntCode pk addr src = do
2418     Amode addr code_addr <- getAmode addr
2419     (code_src, op_src)   <- get_op_RI src
2420     let
2421         code = code_src `appOL`
2422                code_addr `snocOL`
2423                   MOV pk op_src (OpAddr addr)
2424         -- NOTE: op_src is stable, so it will still be valid
2425         -- after code_addr.  This may involve the introduction
2426         -- of an extra MOV to a temporary register, but we hope
2427         -- the register allocator will get rid of it.
2428     --
2429     return code
2430   where
2431     get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand)   -- code, operator
2432     get_op_RI (CmmLit lit) | not (is64BitLit lit)
2433       = return (nilOL, OpImm (litToImm lit))
2434     get_op_RI op
2435       = do (reg,code) <- getNonClobberedReg op
2436            return (code, OpReg reg)
2437
2438
2439 -- Assign; dst is a reg, rhs is mem
2440 assignReg_IntCode pk reg (CmmLoad src _) = do
2441   load_code <- intLoadCode (MOV pk) src
2442   return (load_code (getRegisterReg reg))
2443
2444 -- dst is a reg, but src could be anything
2445 assignReg_IntCode pk reg src = do
2446   code <- getAnyReg src
2447   return (code (getRegisterReg reg))
2448
2449 #endif /* i386_TARGET_ARCH */
2450
2451 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2452
2453 #if sparc_TARGET_ARCH
2454
2455 assignMem_IntCode pk addr src = do
2456     (srcReg, code) <- getSomeReg src
2457     Amode dstAddr addr_code <- getAmode addr
2458     return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2459
2460 assignReg_IntCode pk reg src = do
2461     r <- getRegister src
2462     return $ case r of
2463         Any _ code         -> code dst
2464         Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg dst) freg
2465     where
2466       dst = getRegisterReg reg
2467
2468
2469 #endif /* sparc_TARGET_ARCH */
2470
2471 #if powerpc_TARGET_ARCH
2472
2473 assignMem_IntCode pk addr src = do
2474     (srcReg, code) <- getSomeReg src
2475     Amode dstAddr addr_code <- getAmode addr
2476     return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2477
2478 -- dst is a reg, but src could be anything
2479 assignReg_IntCode pk reg src
2480     = do
2481         r <- getRegister src
2482         return $ case r of
2483             Any _ code         -> code dst
2484             Fixed _ freg fcode -> fcode `snocOL` MR dst freg
2485     where
2486         dst = getRegisterReg reg
2487
2488 #endif /* powerpc_TARGET_ARCH */
2489
2490
2491 -- -----------------------------------------------------------------------------
2492 -- Floating-point assignments
2493
2494 #if alpha_TARGET_ARCH
2495
2496 assignFltCode pk (CmmLoad dst _) src
2497   = getNewRegNat pk                 `thenNat` \ tmp ->
2498     getAmode dst                    `thenNat` \ amode ->
2499     getRegister src                         `thenNat` \ register ->
2500     let
2501         code1   = amodeCode amode []
2502         dst__2  = amodeAddr amode
2503         code2   = registerCode register tmp []
2504         src__2  = registerName register tmp
2505         sz      = primRepToSize pk
2506         code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2507     in
2508     return code__2
2509
2510 assignFltCode pk dst src
2511   = getRegister dst                         `thenNat` \ register1 ->
2512     getRegister src                         `thenNat` \ register2 ->
2513     let
2514         dst__2  = registerName register1 zeroh
2515         code    = registerCode register2 dst__2
2516         src__2  = registerName register2 dst__2
2517         code__2 = if isFixed register2
2518                   then code . mkSeqInstr (FMOV src__2 dst__2)
2519                   else code
2520     in
2521     return code__2
2522
2523 #endif /* alpha_TARGET_ARCH */
2524
2525 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2526
2527 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2528
2529 -- Floating point assignment to memory
2530 assignMem_FltCode pk addr src = do
2531   (src_reg, src_code) <- getNonClobberedReg src
2532   Amode addr addr_code <- getAmode addr
2533   let
2534         code = src_code `appOL`
2535                addr_code `snocOL`
2536                 IF_ARCH_i386(GST pk src_reg addr,
2537                              MOV pk (OpReg src_reg) (OpAddr addr))
2538   return code
2539
2540 -- Floating point assignment to a register/temporary
2541 assignReg_FltCode pk reg src = do
2542   src_code <- getAnyReg src
2543   return (src_code (getRegisterReg reg))
2544
2545 #endif /* i386_TARGET_ARCH */
2546
2547 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2548
2549 #if sparc_TARGET_ARCH
2550
2551 -- Floating point assignment to memory
2552 assignMem_FltCode pk addr src = do
2553     Amode dst__2 code1 <- getAmode addr
2554     (src__2, code2) <- getSomeReg src
2555     tmp1 <- getNewRegNat pk
2556     let
2557         pk__2   = cmmExprRep src
2558         code__2 = code1 `appOL` code2 `appOL`
2559             if   pk == pk__2
2560             then unitOL (ST pk src__2 dst__2)
2561             else toOL [FxTOy pk__2 pk src__2 tmp1, ST pk tmp1 dst__2]
2562     return code__2
2563
2564 -- Floating point assignment to a register/temporary
2565 -- ToDo: Verify correctness
2566 assignReg_FltCode pk reg src = do
2567     r <- getRegister src
2568     v1 <- getNewRegNat pk
2569     return $ case r of
2570         Any _ code         -> code dst
2571         Fixed _ freg fcode -> fcode `snocOL` FMOV pk freg v1
2572     where
2573       dst = getRegisterReg reg
2574
2575 #endif /* sparc_TARGET_ARCH */
2576
2577 #if powerpc_TARGET_ARCH
2578
2579 -- Easy, isn't it?
2580 assignMem_FltCode = assignMem_IntCode
2581 assignReg_FltCode = assignReg_IntCode
2582
2583 #endif /* powerpc_TARGET_ARCH */
2584
2585
2586 -- -----------------------------------------------------------------------------
2587 -- Generating an non-local jump
2588
2589 -- (If applicable) Do not fill the delay slots here; you will confuse the
2590 -- register allocator.
2591
2592 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
2593
2594 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2595
2596 #if alpha_TARGET_ARCH
2597
2598 genJump (CmmLabel lbl)
2599   | isAsmTemp lbl = returnInstr (BR target)
2600   | otherwise     = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0]
2601   where
2602     target = ImmCLbl lbl
2603
2604 genJump tree
2605   = getRegister tree                `thenNat` \ register ->
2606     getNewRegNat PtrRep             `thenNat` \ tmp ->
2607     let
2608         dst    = registerName register pv
2609         code   = registerCode register pv
2610         target = registerName register pv
2611     in
2612     if isFixed register then
2613         returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0]
2614     else
2615     return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0))
2616
2617 #endif /* alpha_TARGET_ARCH */
2618
2619 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2620
2621 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2622
2623 genJump (CmmLoad mem pk) = do
2624   Amode target code <- getAmode mem
2625   return (code `snocOL` JMP (OpAddr target))
2626
2627 genJump (CmmLit lit) = do
2628   return (unitOL (JMP (OpImm (litToImm lit))))
2629
2630 genJump expr = do
2631   (reg,code) <- getSomeReg expr
2632   return (code `snocOL` JMP (OpReg reg))
2633
2634 #endif /* i386_TARGET_ARCH */
2635
2636 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2637
2638 #if sparc_TARGET_ARCH
2639
2640 genJump (CmmLit (CmmLabel lbl))
2641   = return (toOL [CALL (Left target) 0 True, NOP])
2642   where
2643     target = ImmCLbl lbl
2644
2645 genJump tree
2646   = do
2647         (target, code) <- getSomeReg tree
2648         return (code `snocOL` JMP (AddrRegReg target g0)  `snocOL` NOP)
2649
2650 #endif /* sparc_TARGET_ARCH */
2651
2652 #if powerpc_TARGET_ARCH
2653 genJump (CmmLit (CmmLabel lbl))
2654   = return (unitOL $ JMP lbl)
2655
2656 genJump tree
2657   = do
2658         (target,code) <- getSomeReg tree
2659         return (code `snocOL` MTCTR target `snocOL` BCTR [])
2660 #endif /* powerpc_TARGET_ARCH */
2661
2662
2663 -- -----------------------------------------------------------------------------
2664 --  Unconditional branches
2665
2666 genBranch :: BlockId -> NatM InstrBlock
2667
2668 genBranch = return . toOL . mkBranchInstr
2669
2670 -- -----------------------------------------------------------------------------
2671 --  Conditional jumps
2672
2673 {-
2674 Conditional jumps are always to local labels, so we can use branch
2675 instructions.  We peek at the arguments to decide what kind of
2676 comparison to do.
2677
2678 ALPHA: For comparisons with 0, we're laughing, because we can just do
2679 the desired conditional branch.
2680
2681 I386: First, we have to ensure that the condition
2682 codes are set according to the supplied comparison operation.
2683
2684 SPARC: First, we have to ensure that the condition codes are set
2685 according to the supplied comparison operation.  We generate slightly
2686 different code for floating point comparisons, because a floating
2687 point operation cannot directly precede a @BF@.  We assume the worst
2688 and fill that slot with a @NOP@.
2689
2690 SPARC: Do not fill the delay slots here; you will confuse the register
2691 allocator.
2692 -}
2693
2694
2695 genCondJump
2696     :: BlockId      -- the branch target
2697     -> CmmExpr      -- the condition on which to branch
2698     -> NatM InstrBlock
2699
2700 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2701
2702 #if alpha_TARGET_ARCH
2703
2704 genCondJump id (StPrim op [x, StInt 0])
2705   = getRegister x                           `thenNat` \ register ->
2706     getNewRegNat (registerRep register)
2707                                     `thenNat` \ tmp ->
2708     let
2709         code   = registerCode register tmp
2710         value  = registerName register tmp
2711         pk     = registerRep register
2712         target = ImmCLbl lbl
2713     in
2714     returnSeq code [BI (cmpOp op) value target]
2715   where
2716     cmpOp CharGtOp = GTT
2717     cmpOp CharGeOp = GE
2718     cmpOp CharEqOp = EQQ
2719     cmpOp CharNeOp = NE
2720     cmpOp CharLtOp = LTT
2721     cmpOp CharLeOp = LE
2722     cmpOp IntGtOp = GTT
2723     cmpOp IntGeOp = GE
2724     cmpOp IntEqOp = EQQ
2725     cmpOp IntNeOp = NE
2726     cmpOp IntLtOp = LTT
2727     cmpOp IntLeOp = LE
2728     cmpOp WordGtOp = NE
2729     cmpOp WordGeOp = ALWAYS
2730     cmpOp WordEqOp = EQQ
2731     cmpOp WordNeOp = NE
2732     cmpOp WordLtOp = NEVER
2733     cmpOp WordLeOp = EQQ
2734     cmpOp AddrGtOp = NE
2735     cmpOp AddrGeOp = ALWAYS
2736     cmpOp AddrEqOp = EQQ
2737     cmpOp AddrNeOp = NE
2738     cmpOp AddrLtOp = NEVER
2739     cmpOp AddrLeOp = EQQ
2740
2741 genCondJump lbl (StPrim op [x, StDouble 0.0])
2742   = getRegister x                           `thenNat` \ register ->
2743     getNewRegNat (registerRep register)
2744                                     `thenNat` \ tmp ->
2745     let
2746         code   = registerCode register tmp
2747         value  = registerName register tmp
2748         pk     = registerRep register
2749         target = ImmCLbl lbl
2750     in
2751     return (code . mkSeqInstr (BF (cmpOp op) value target))
2752   where
2753     cmpOp FloatGtOp = GTT
2754     cmpOp FloatGeOp = GE
2755     cmpOp FloatEqOp = EQQ
2756     cmpOp FloatNeOp = NE
2757     cmpOp FloatLtOp = LTT
2758     cmpOp FloatLeOp = LE
2759     cmpOp DoubleGtOp = GTT
2760     cmpOp DoubleGeOp = GE
2761     cmpOp DoubleEqOp = EQQ
2762     cmpOp DoubleNeOp = NE
2763     cmpOp DoubleLtOp = LTT
2764     cmpOp DoubleLeOp = LE
2765
2766 genCondJump lbl (StPrim op [x, y])
2767   | fltCmpOp op
2768   = trivialFCode pr instr x y       `thenNat` \ register ->
2769     getNewRegNat F64                `thenNat` \ tmp ->
2770     let
2771         code   = registerCode register tmp
2772         result = registerName register tmp
2773         target = ImmCLbl lbl
2774     in
2775     return (code . mkSeqInstr (BF cond result target))
2776   where
2777     pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
2778
2779     fltCmpOp op = case op of
2780         FloatGtOp -> True
2781         FloatGeOp -> True
2782         FloatEqOp -> True
2783         FloatNeOp -> True
2784         FloatLtOp -> True
2785         FloatLeOp -> True
2786         DoubleGtOp -> True
2787         DoubleGeOp -> True
2788         DoubleEqOp -> True
2789         DoubleNeOp -> True
2790         DoubleLtOp -> True
2791         DoubleLeOp -> True
2792         _ -> False
2793     (instr, cond) = case op of
2794         FloatGtOp -> (FCMP TF LE, EQQ)
2795         FloatGeOp -> (FCMP TF LTT, EQQ)
2796         FloatEqOp -> (FCMP TF EQQ, NE)
2797         FloatNeOp -> (FCMP TF EQQ, EQQ)
2798         FloatLtOp -> (FCMP TF LTT, NE)
2799         FloatLeOp -> (FCMP TF LE, NE)
2800         DoubleGtOp -> (FCMP TF LE, EQQ)
2801         DoubleGeOp -> (FCMP TF LTT, EQQ)
2802         DoubleEqOp -> (FCMP TF EQQ, NE)
2803         DoubleNeOp -> (FCMP TF EQQ, EQQ)
2804         DoubleLtOp -> (FCMP TF LTT, NE)
2805         DoubleLeOp -> (FCMP TF LE, NE)
2806
2807 genCondJump lbl (StPrim op [x, y])
2808   = trivialCode instr x y           `thenNat` \ register ->
2809     getNewRegNat IntRep             `thenNat` \ tmp ->
2810     let
2811         code   = registerCode register tmp
2812         result = registerName register tmp
2813         target = ImmCLbl lbl
2814     in
2815     return (code . mkSeqInstr (BI cond result target))
2816   where
2817     (instr, cond) = case op of
2818         CharGtOp -> (CMP LE, EQQ)
2819         CharGeOp -> (CMP LTT, EQQ)
2820         CharEqOp -> (CMP EQQ, NE)
2821         CharNeOp -> (CMP EQQ, EQQ)
2822         CharLtOp -> (CMP LTT, NE)
2823         CharLeOp -> (CMP LE, NE)
2824         IntGtOp -> (CMP LE, EQQ)
2825         IntGeOp -> (CMP LTT, EQQ)
2826         IntEqOp -> (CMP EQQ, NE)
2827         IntNeOp -> (CMP EQQ, EQQ)
2828         IntLtOp -> (CMP LTT, NE)
2829         IntLeOp -> (CMP LE, NE)
2830         WordGtOp -> (CMP ULE, EQQ)
2831         WordGeOp -> (CMP ULT, EQQ)
2832         WordEqOp -> (CMP EQQ, NE)
2833         WordNeOp -> (CMP EQQ, EQQ)
2834         WordLtOp -> (CMP ULT, NE)
2835         WordLeOp -> (CMP ULE, NE)
2836         AddrGtOp -> (CMP ULE, EQQ)
2837         AddrGeOp -> (CMP ULT, EQQ)
2838         AddrEqOp -> (CMP EQQ, NE)
2839         AddrNeOp -> (CMP EQQ, EQQ)
2840         AddrLtOp -> (CMP ULT, NE)
2841         AddrLeOp -> (CMP ULE, NE)
2842
2843 #endif /* alpha_TARGET_ARCH */
2844
2845 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2846
2847 #if i386_TARGET_ARCH
2848
2849 genCondJump id bool = do
2850   CondCode _ cond code <- getCondCode bool
2851   return (code `snocOL` JXX cond id)
2852
2853 #endif
2854
2855 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2856
2857 #if x86_64_TARGET_ARCH
2858
2859 genCondJump id bool = do
2860   CondCode is_float cond cond_code <- getCondCode bool
2861   if not is_float
2862     then
2863         return (cond_code `snocOL` JXX cond id)
2864     else do
2865         lbl <- getBlockIdNat
2866
2867         -- see comment with condFltReg
2868         let code = case cond of
2869                         NE  -> or_unordered
2870                         GU  -> plain_test
2871                         GEU -> plain_test
2872                         _   -> and_ordered
2873
2874             plain_test = unitOL (
2875                   JXX cond id
2876                 )
2877             or_unordered = toOL [
2878                   JXX cond id,
2879                   JXX PARITY id
2880                 ]
2881             and_ordered = toOL [
2882                   JXX PARITY lbl,
2883                   JXX cond id,
2884                   JXX ALWAYS lbl,
2885                   NEWBLOCK lbl
2886                 ]
2887         return (cond_code `appOL` code)
2888
2889 #endif
2890
2891 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2892
2893 #if sparc_TARGET_ARCH
2894
2895 genCondJump (BlockId id) bool = do
2896   CondCode is_float cond code <- getCondCode bool
2897   return (
2898        code `appOL`
2899        toOL (
2900          if   is_float
2901          then [NOP, BF cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2902          else [BI cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2903        )
2904     )
2905
2906 #endif /* sparc_TARGET_ARCH */
2907
2908
2909 #if powerpc_TARGET_ARCH
2910
2911 genCondJump id bool = do
2912   CondCode is_float cond code <- getCondCode bool
2913   return (code `snocOL` BCC cond id)
2914
2915 #endif /* powerpc_TARGET_ARCH */
2916
2917
2918 -- -----------------------------------------------------------------------------
2919 --  Generating C calls
2920
2921 -- Now the biggest nightmare---calls.  Most of the nastiness is buried in
2922 -- @get_arg@, which moves the arguments to the correct registers/stack
2923 -- locations.  Apart from that, the code is easy.
2924 --
2925 -- (If applicable) Do not fill the delay slots here; you will confuse the
2926 -- register allocator.
2927
2928 genCCall
2929     :: CmmCallTarget            -- function to call
2930     -> [(CmmReg,MachHint)]      -- where to put the result
2931     -> [(CmmExpr,MachHint)]     -- arguments (of mixed type)
2932     -> Maybe [GlobalReg]        -- volatile regs to save
2933     -> NatM InstrBlock
2934
2935 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2936
2937 #if alpha_TARGET_ARCH
2938
2939 ccallResultRegs =
2940
2941 genCCall fn cconv result_regs args
2942   = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args
2943                           `thenNat` \ ((unused,_), argCode) ->
2944     let
2945         nRegs = length allArgRegs - length unused
2946         code = asmSeqThen (map ($ []) argCode)
2947     in
2948         returnSeq code [
2949             LDA pv (AddrImm (ImmLab (ptext fn))),
2950             JSR ra (AddrReg pv) nRegs,
2951             LDGP gp (AddrReg ra)]
2952   where
2953     ------------------------
2954     {-  Try to get a value into a specific register (or registers) for
2955         a call.  The first 6 arguments go into the appropriate
2956         argument register (separate registers for integer and floating
2957         point arguments, but used in lock-step), and the remaining
2958         arguments are dumped to the stack, beginning at 0(sp).  Our
2959         first argument is a pair of the list of remaining argument
2960         registers to be assigned for this call and the next stack
2961         offset to use for overflowing arguments.  This way,
2962         @get_Arg@ can be applied to all of a call's arguments using
2963         @mapAccumLNat@.
2964     -}
2965     get_arg
2966         :: ([(Reg,Reg)], Int)   -- Argument registers and stack offset (accumulator)
2967         -> StixTree             -- Current argument
2968         -> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code
2969
2970     -- We have to use up all of our argument registers first...
2971
2972     get_arg ((iDst,fDst):dsts, offset) arg
2973       = getRegister arg                     `thenNat` \ register ->
2974         let
2975             reg  = if isFloatingRep pk then fDst else iDst
2976             code = registerCode register reg
2977             src  = registerName register reg
2978             pk   = registerRep register
2979         in
2980         return (
2981             if isFloatingRep pk then
2982                 ((dsts, offset), if isFixed register then
2983                     code . mkSeqInstr (FMOV src fDst)
2984                     else code)
2985             else
2986                 ((dsts, offset), if isFixed register then
2987                     code . mkSeqInstr (OR src (RIReg src) iDst)
2988                     else code))
2989
2990     -- Once we have run out of argument registers, we move to the
2991     -- stack...
2992
2993     get_arg ([], offset) arg
2994       = getRegister arg                 `thenNat` \ register ->
2995         getNewRegNat (registerRep register)
2996                                         `thenNat` \ tmp ->
2997         let
2998             code = registerCode register tmp
2999             src  = registerName register tmp
3000             pk   = registerRep register
3001             sz   = primRepToSize pk
3002         in
3003         return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset)))
3004
3005 #endif /* alpha_TARGET_ARCH */
3006
3007 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3008
3009 #if i386_TARGET_ARCH
3010
3011 genCCall (CmmPrim MO_WriteBarrier) _ _ _ = return nilOL
3012         -- write barrier compiles to no code on x86/x86-64;
3013         -- we keep it this long in order to prevent earlier optimisations.
3014
3015 -- we only cope with a single result for foreign calls
3016 genCCall (CmmPrim op) [(r,_)] args vols = do
3017   case op of
3018         MO_F32_Sqrt -> actuallyInlineFloatOp F32  (GSQRT F32) args
3019         MO_F64_Sqrt -> actuallyInlineFloatOp F64 (GSQRT F64) args
3020
3021         MO_F32_Sin  -> actuallyInlineFloatOp F32  (GSIN F32) args
3022         MO_F64_Sin  -> actuallyInlineFloatOp F64 (GSIN F64) args
3023
3024         MO_F32_Cos  -> actuallyInlineFloatOp F32  (GCOS F32) args
3025         MO_F64_Cos  -> actuallyInlineFloatOp F64 (GCOS F64) args
3026
3027         MO_F32_Tan  -> actuallyInlineFloatOp F32  (GTAN F32) args
3028         MO_F64_Tan  -> actuallyInlineFloatOp F64 (GTAN F64) args
3029
3030         other_op    -> outOfLineFloatOp op r args vols
3031  where
3032   actuallyInlineFloatOp rep instr [(x,_)]
3033         = do res <- trivialUFCode rep instr x
3034              any <- anyReg res
3035              return (any (getRegisterReg r))
3036
3037 genCCall target dest_regs args vols = do
3038     let
3039         sizes               = map (arg_size . cmmExprRep . fst) (reverse args)
3040 #if !darwin_TARGET_OS
3041         tot_arg_size        = sum sizes
3042 #else
3043         raw_arg_size        = sum sizes
3044         tot_arg_size        = roundTo 16 raw_arg_size
3045         arg_pad_size        = tot_arg_size - raw_arg_size
3046     delta0 <- getDeltaNat
3047     setDeltaNat (delta0 - arg_pad_size)
3048 #endif
3049
3050     push_codes <- mapM push_arg (reverse args)
3051     delta <- getDeltaNat
3052
3053     -- in
3054     -- deal with static vs dynamic call targets
3055     (callinsns,cconv) <-
3056       case target of
3057         -- CmmPrim -> ...
3058         CmmForeignCall (CmmLit (CmmLabel lbl)) conv
3059            -> -- ToDo: stdcall arg sizes
3060               return (unitOL (CALL (Left fn_imm) []), conv)
3061            where fn_imm = ImmCLbl lbl
3062         CmmForeignCall expr conv
3063            -> do (dyn_c, dyn_r, dyn_rep) <- get_op expr
3064                  ASSERT(dyn_rep == I32)
3065                   return (dyn_c `snocOL` CALL (Right dyn_r) [], conv)
3066
3067     let push_code
3068 #if darwin_TARGET_OS
3069             | arg_pad_size /= 0
3070             = toOL [SUB I32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
3071                     DELTA (delta0 - arg_pad_size)]
3072               `appOL` concatOL push_codes
3073             | otherwise
3074 #endif
3075             = concatOL push_codes
3076         call = callinsns `appOL`
3077                toOL (
3078                         -- Deallocate parameters after call for ccall;
3079                         -- but not for stdcall (callee does it)
3080                   (if cconv == StdCallConv || tot_arg_size==0 then [] else
3081                    [ADD I32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
3082                   ++
3083                   [DELTA (delta + tot_arg_size)]
3084                )
3085     -- in
3086     setDeltaNat (delta + tot_arg_size)
3087
3088     let
3089         -- assign the results, if necessary
3090         assign_code []     = nilOL
3091         assign_code [(dest,_hint)] =
3092           case rep of
3093                 I64 -> toOL [MOV I32 (OpReg eax) (OpReg r_dest),
3094                              MOV I32 (OpReg edx) (OpReg r_dest_hi)]
3095                 F32 -> unitOL (GMOV fake0 r_dest)
3096                 F64 -> unitOL (GMOV fake0 r_dest)
3097                 rep -> unitOL (MOV rep (OpReg eax) (OpReg r_dest))
3098           where
3099                 r_dest_hi = getHiVRegFromLo r_dest
3100                 rep = cmmRegRep dest
3101                 r_dest = getRegisterReg dest
3102         assign_code many = panic "genCCall.assign_code many"
3103
3104     return (push_code `appOL`
3105             call `appOL`
3106             assign_code dest_regs)
3107
3108   where
3109     arg_size F64 = 8
3110     arg_size F32 = 4
3111     arg_size I64 = 8
3112     arg_size _   = 4
3113
3114     roundTo a x | x `mod` a == 0 = x
3115                 | otherwise = x + a - (x `mod` a)
3116
3117
3118     push_arg :: (CmmExpr,MachHint){-current argument-}
3119                     -> NatM InstrBlock  -- code
3120
3121     push_arg (arg,_hint) -- we don't need the hints on x86
3122       | arg_rep == I64 = do
3123         ChildCode64 code r_lo <- iselExpr64 arg
3124         delta <- getDeltaNat
3125         setDeltaNat (delta - 8)
3126         let
3127             r_hi = getHiVRegFromLo r_lo
3128         -- in
3129         return (       code `appOL`
3130                        toOL [PUSH I32 (OpReg r_hi), DELTA (delta - 4),
3131                              PUSH I32 (OpReg r_lo), DELTA (delta - 8),
3132                              DELTA (delta-8)]
3133             )
3134
3135       | otherwise = do
3136         (code, reg, sz) <- get_op arg
3137         delta <- getDeltaNat
3138         let size = arg_size sz
3139         setDeltaNat (delta-size)
3140         if (case sz of F64 -> True; F32 -> True; _ -> False)
3141            then return (code `appOL`
3142                         toOL [SUB I32 (OpImm (ImmInt size)) (OpReg esp),
3143                               DELTA (delta-size),
3144                               GST sz reg (AddrBaseIndex (EABaseReg esp)
3145                                                         EAIndexNone
3146                                                         (ImmInt 0))]
3147                        )
3148            else return (code `snocOL`
3149                         PUSH I32 (OpReg reg) `snocOL`
3150                         DELTA (delta-size)
3151                        )
3152       where
3153          arg_rep = cmmExprRep arg
3154
3155     ------------
3156     get_op :: CmmExpr -> NatM (InstrBlock, Reg, MachRep) -- code, reg, size
3157     get_op op = do
3158         (reg,code) <- getSomeReg op
3159         return (code, reg, cmmExprRep op)
3160
3161 #endif /* i386_TARGET_ARCH */
3162
3163 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3164
3165 outOfLineFloatOp :: CallishMachOp -> CmmReg -> [(CmmExpr,MachHint)]
3166   -> Maybe [GlobalReg] -> NatM InstrBlock
3167 outOfLineFloatOp mop res args vols
3168   = do
3169       targetExpr <- cmmMakeDynamicReference addImportNat CallReference lbl
3170       let target = CmmForeignCall targetExpr CCallConv
3171
3172       if cmmRegRep res == F64
3173         then
3174           stmtToInstrs (CmmCall target [(res,FloatHint)] args vols)
3175         else do
3176           uq <- getUniqueNat
3177           let
3178             tmp = CmmLocal (LocalReg uq F64)
3179           -- in
3180           code1 <- stmtToInstrs (CmmCall target [(tmp,FloatHint)] args vols)
3181           code2 <- stmtToInstrs (CmmAssign res (CmmReg tmp))
3182           return (code1 `appOL` code2)
3183   where
3184         lbl = mkForeignLabel fn Nothing False
3185
3186         fn = case mop of
3187               MO_F32_Sqrt  -> FSLIT("sqrtf")
3188               MO_F32_Sin   -> FSLIT("sinf")
3189               MO_F32_Cos   -> FSLIT("cosf")
3190               MO_F32_Tan   -> FSLIT("tanf")
3191               MO_F32_Exp   -> FSLIT("expf")
3192               MO_F32_Log   -> FSLIT("logf")
3193
3194               MO_F32_Asin  -> FSLIT("asinf")
3195               MO_F32_Acos  -> FSLIT("acosf")
3196               MO_F32_Atan  -> FSLIT("atanf")
3197
3198               MO_F32_Sinh  -> FSLIT("sinhf")
3199               MO_F32_Cosh  -> FSLIT("coshf")
3200               MO_F32_Tanh  -> FSLIT("tanhf")
3201               MO_F32_Pwr   -> FSLIT("powf")
3202
3203               MO_F64_Sqrt  -> FSLIT("sqrt")
3204               MO_F64_Sin   -> FSLIT("sin")
3205               MO_F64_Cos   -> FSLIT("cos")
3206               MO_F64_Tan   -> FSLIT("tan")
3207               MO_F64_Exp   -> FSLIT("exp")
3208               MO_F64_Log   -> FSLIT("log")
3209
3210               MO_F64_Asin  -> FSLIT("asin")
3211               MO_F64_Acos  -> FSLIT("acos")
3212               MO_F64_Atan  -> FSLIT("atan")
3213
3214               MO_F64_Sinh  -> FSLIT("sinh")
3215               MO_F64_Cosh  -> FSLIT("cosh")
3216               MO_F64_Tanh  -> FSLIT("tanh")
3217               MO_F64_Pwr   -> FSLIT("pow")
3218
3219 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
3220
3221 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3222
3223 #if x86_64_TARGET_ARCH
3224
3225 genCCall (CmmPrim MO_WriteBarrier) _ _ _ = return nilOL
3226         -- write barrier compiles to no code on x86/x86-64;
3227         -- we keep it this long in order to prevent earlier optimisations.
3228
3229 genCCall (CmmPrim op) [(r,_)] args vols =
3230   outOfLineFloatOp op r args vols
3231
3232 genCCall target dest_regs args vols = do
3233
3234         -- load up the register arguments
3235     (stack_args, aregs, fregs, load_args_code)
3236          <- load_args args allArgRegs allFPArgRegs nilOL
3237
3238     let
3239         fp_regs_used  = reverse (drop (length fregs) (reverse allFPArgRegs))
3240         int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
3241         arg_regs = [eax] ++ int_regs_used ++ fp_regs_used
3242                 -- for annotating the call instruction with
3243
3244         sse_regs = length fp_regs_used
3245
3246         tot_arg_size = arg_size * length stack_args
3247
3248         -- On entry to the called function, %rsp should be aligned
3249         -- on a 16-byte boundary +8 (i.e. the first stack arg after
3250         -- the return address is 16-byte aligned).  In STG land
3251         -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
3252         -- need to make sure we push a multiple of 16-bytes of args,
3253         -- plus the return address, to get the correct alignment.
3254         -- Urg, this is hard.  We need to feed the delta back into
3255         -- the arg pushing code.
3256     (real_size, adjust_rsp) <-
3257         if tot_arg_size `rem` 16 == 0
3258             then return (tot_arg_size, nilOL)
3259             else do -- we need to adjust...
3260                 delta <- getDeltaNat
3261                 setDeltaNat (delta-8)
3262                 return (tot_arg_size+8, toOL [
3263                                 SUB I64 (OpImm (ImmInt 8)) (OpReg rsp),
3264                                 DELTA (delta-8)
3265                         ])
3266
3267         -- push the stack args, right to left
3268     push_code <- push_args (reverse stack_args) nilOL
3269     delta <- getDeltaNat
3270
3271     -- deal with static vs dynamic call targets
3272     (callinsns,cconv) <-
3273       case target of
3274         -- CmmPrim -> ...
3275         CmmForeignCall (CmmLit (CmmLabel lbl)) conv
3276            -> -- ToDo: stdcall arg sizes
3277               return (unitOL (CALL (Left fn_imm) arg_regs), conv)
3278            where fn_imm = ImmCLbl lbl
3279         CmmForeignCall expr conv
3280            -> do (dyn_r, dyn_c) <- getSomeReg expr
3281                  return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
3282
3283     let
3284         -- The x86_64 ABI requires us to set %al to the number of SSE
3285         -- registers that contain arguments, if the called routine
3286         -- is a varargs function.  We don't know whether it's a
3287         -- varargs function or not, so we have to assume it is.
3288         --
3289         -- It's not safe to omit this assignment, even if the number
3290         -- of SSE regs in use is zero.  If %al is larger than 8
3291         -- on entry to a varargs function, seg faults ensue.
3292         assign_eax n = unitOL (MOV I32 (OpImm (ImmInt n)) (OpReg eax))
3293
3294     let call = callinsns `appOL`
3295                toOL (
3296                         -- Deallocate parameters after call for ccall;
3297                         -- but not for stdcall (callee does it)
3298                   (if cconv == StdCallConv || real_size==0 then [] else
3299                    [ADD wordRep (OpImm (ImmInt real_size)) (OpReg esp)])
3300                   ++
3301                   [DELTA (delta + real_size)]
3302                )
3303     -- in
3304     setDeltaNat (delta + real_size)
3305
3306     let
3307         -- assign the results, if necessary
3308         assign_code []     = nilOL
3309         assign_code [(dest,_hint)] =
3310           case rep of
3311                 F32 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3312                 F64 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3313                 rep -> unitOL (MOV rep (OpReg rax) (OpReg r_dest))
3314           where
3315                 rep = cmmRegRep dest
3316                 r_dest = getRegisterReg dest
3317         assign_code many = panic "genCCall.assign_code many"
3318
3319     return (load_args_code      `appOL`
3320             adjust_rsp          `appOL`
3321             push_code           `appOL`
3322             assign_eax sse_regs `appOL`
3323             call                `appOL`
3324             assign_code dest_regs)
3325
3326   where
3327     arg_size = 8 -- always, at the mo
3328
3329     load_args :: [(CmmExpr,MachHint)]
3330               -> [Reg]                  -- int regs avail for args
3331               -> [Reg]                  -- FP regs avail for args
3332               -> InstrBlock
3333               -> NatM ([(CmmExpr,MachHint)],[Reg],[Reg],InstrBlock)
3334     load_args args [] [] code     =  return (args, [], [], code)
3335         -- no more regs to use
3336     load_args [] aregs fregs code =  return ([], aregs, fregs, code)
3337         -- no more args to push
3338     load_args ((arg,hint) : rest) aregs fregs code
3339         | isFloatingRep arg_rep =
3340         case fregs of
3341           [] -> push_this_arg
3342           (r:rs) -> do
3343              arg_code <- getAnyReg arg
3344              load_args rest aregs rs (code `appOL` arg_code r)
3345         | otherwise =
3346         case aregs of
3347           [] -> push_this_arg
3348           (r:rs) -> do
3349              arg_code <- getAnyReg arg
3350              load_args rest rs fregs (code `appOL` arg_code r)
3351         where
3352           arg_rep = cmmExprRep arg
3353
3354           push_this_arg = do
3355             (args',ars,frs,code') <- load_args rest aregs fregs code
3356             return ((arg,hint):args', ars, frs, code')
3357
3358     push_args [] code = return code
3359     push_args ((arg,hint):rest) code
3360        | isFloatingRep arg_rep = do
3361          (arg_reg, arg_code) <- getSomeReg arg
3362          delta <- getDeltaNat
3363          setDeltaNat (delta-arg_size)
3364          let code' = code `appOL` toOL [
3365                         MOV arg_rep (OpReg arg_reg) (OpAddr  (spRel 0)),
3366                         SUB wordRep (OpImm (ImmInt arg_size)) (OpReg rsp) ,
3367                         DELTA (delta-arg_size)]
3368          push_args rest code'
3369
3370        | otherwise = do
3371        -- we only ever generate word-sized function arguments.  Promotion
3372        -- has already happened: our Int8# type is kept sign-extended
3373        -- in an Int#, for example.
3374          ASSERT(arg_rep == I64) return ()
3375          (arg_op, arg_code) <- getOperand arg
3376          delta <- getDeltaNat
3377          setDeltaNat (delta-arg_size)
3378          let code' = code `appOL` toOL [PUSH I64 arg_op,
3379                                         DELTA (delta-arg_size)]
3380          push_args rest code'
3381         where
3382           arg_rep = cmmExprRep arg
3383 #endif
3384
3385 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3386
3387 #if sparc_TARGET_ARCH
3388 {-
3389    The SPARC calling convention is an absolute
3390    nightmare.  The first 6x32 bits of arguments are mapped into
3391    %o0 through %o5, and the remaining arguments are dumped to the
3392    stack, beginning at [%sp+92].  (Note that %o6 == %sp.)
3393
3394    If we have to put args on the stack, move %o6==%sp down by
3395    the number of words to go on the stack, to ensure there's enough space.
3396
3397    According to Fraser and Hanson's lcc book, page 478, fig 17.2,
3398    16 words above the stack pointer is a word for the address of
3399    a structure return value.  I use this as a temporary location
3400    for moving values from float to int regs.  Certainly it isn't
3401    safe to put anything in the 16 words starting at %sp, since
3402    this area can get trashed at any time due to window overflows
3403    caused by signal handlers.
3404
3405    A final complication (if the above isn't enough) is that
3406    we can't blithely calculate the arguments one by one into
3407    %o0 .. %o5.  Consider the following nested calls:
3408
3409        fff a (fff b c)
3410
3411    Naive code moves a into %o0, and (fff b c) into %o1.  Unfortunately
3412    the inner call will itself use %o0, which trashes the value put there
3413    in preparation for the outer call.  Upshot: we need to calculate the
3414    args into temporary regs, and move those to arg regs or onto the
3415    stack only immediately prior to the call proper.  Sigh.
3416 -}
3417
3418 genCCall target dest_regs argsAndHints vols = do
3419     let
3420         args = map fst argsAndHints
3421     argcode_and_vregs <- mapM arg_to_int_vregs args
3422     let
3423         (argcodes, vregss) = unzip argcode_and_vregs
3424         n_argRegs          = length allArgRegs
3425         n_argRegs_used     = min (length vregs) n_argRegs
3426         vregs              = concat vregss
3427     -- deal with static vs dynamic call targets
3428     callinsns <- (case target of
3429         CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> do
3430                 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3431         CmmForeignCall expr conv -> do
3432                 (dyn_c, [dyn_r]) <- arg_to_int_vregs expr
3433                 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3434         CmmPrim mop -> do
3435                   (res, reduce) <- outOfLineFloatOp mop
3436                   lblOrMopExpr <- case res of
3437                        Left lbl -> do
3438                             return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3439                        Right mopExpr -> do
3440                             (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
3441                             return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3442                   if reduce then panic "genCCall(sparc): can not reduce" else return lblOrMopExpr
3443
3444       )
3445     let
3446         argcode = concatOL argcodes
3447         (move_sp_down, move_sp_up)
3448            = let diff = length vregs - n_argRegs
3449                  nn   = if odd diff then diff + 1 else diff -- keep 8-byte alignment
3450              in  if   nn <= 0
3451                  then (nilOL, nilOL)
3452                  else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
3453         transfer_code
3454            = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
3455     return (argcode       `appOL`
3456             move_sp_down  `appOL`
3457             transfer_code `appOL`
3458             callinsns     `appOL`
3459             unitOL NOP    `appOL`
3460             move_sp_up)
3461   where
3462      -- move args from the integer vregs into which they have been
3463      -- marshalled, into %o0 .. %o5, and the rest onto the stack.
3464      move_final :: [Reg] -> [Reg] -> Int -> [Instr]
3465
3466      move_final [] _ offset          -- all args done
3467         = []
3468
3469      move_final (v:vs) [] offset     -- out of aregs; move to stack
3470         = ST I32 v (spRel offset)
3471           : move_final vs [] (offset+1)
3472
3473      move_final (v:vs) (a:az) offset -- move into an arg (%o[0..5]) reg
3474         = OR False g0 (RIReg v) a
3475           : move_final vs az offset
3476
3477      -- generate code to calculate an argument, and move it into one
3478      -- or two integer vregs.
3479      arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
3480      arg_to_int_vregs arg
3481         | (cmmExprRep arg) == I64
3482         = do
3483           (ChildCode64 code r_lo) <- iselExpr64 arg
3484           let
3485               r_hi = getHiVRegFromLo r_lo
3486           return (code, [r_hi, r_lo])
3487         | otherwise
3488         = do
3489           (src, code) <- getSomeReg arg
3490           tmp <- getNewRegNat (cmmExprRep arg)
3491           let
3492               pk   = cmmExprRep arg
3493           case pk of
3494              F64 -> do
3495                       v1 <- getNewRegNat I32
3496                       v2 <- getNewRegNat I32
3497                       return (
3498                         code                          `snocOL`
3499                         FMOV F64 src f0                `snocOL`
3500                         ST   F32  f0 (spRel 16)         `snocOL`
3501                         LD   I32  (spRel 16) v1         `snocOL`
3502                         ST   F32  (fPair f0) (spRel 16) `snocOL`
3503                         LD   I32  (spRel 16) v2
3504                         ,
3505                         [v1,v2]
3506                        )
3507              F32 -> do
3508                       v1 <- getNewRegNat I32
3509                       return (
3510                         code                    `snocOL`
3511                         ST   F32  src (spRel 16)  `snocOL`
3512                         LD   I32  (spRel 16) v1
3513                         ,
3514                         [v1]
3515                        )
3516              other -> do
3517                         v1 <- getNewRegNat I32
3518                         return (
3519                           code `snocOL` OR False g0 (RIReg src) v1
3520                           ,
3521                           [v1]
3522                          )
3523 outOfLineFloatOp mop =
3524     do
3525       mopExpr <- cmmMakeDynamicReference addImportNat CallReference $
3526                   mkForeignLabel functionName Nothing True
3527       let mopLabelOrExpr = case mopExpr of
3528                         CmmLit (CmmLabel lbl) -> Left lbl
3529                         _ -> Right mopExpr
3530       return (mopLabelOrExpr, reduce)
3531             where
3532                 (reduce, functionName) = case mop of
3533                   MO_F32_Exp    -> (True,  FSLIT("exp"))
3534                   MO_F32_Log    -> (True,  FSLIT("log"))
3535                   MO_F32_Sqrt   -> (True,  FSLIT("sqrt"))
3536
3537                   MO_F32_Sin    -> (True,  FSLIT("sin"))
3538                   MO_F32_Cos    -> (True,  FSLIT("cos"))
3539                   MO_F32_Tan    -> (True,  FSLIT("tan"))
3540
3541                   MO_F32_Asin   -> (True,  FSLIT("asin"))
3542                   MO_F32_Acos   -> (True,  FSLIT("acos"))
3543                   MO_F32_Atan   -> (True,  FSLIT("atan"))
3544
3545                   MO_F32_Sinh   -> (True,  FSLIT("sinh"))
3546                   MO_F32_Cosh   -> (True,  FSLIT("cosh"))
3547                   MO_F32_Tanh   -> (True,  FSLIT("tanh"))
3548
3549                   MO_F64_Exp    -> (False, FSLIT("exp"))
3550                   MO_F64_Log    -> (False, FSLIT("log"))
3551                   MO_F64_Sqrt   -> (False, FSLIT("sqrt"))
3552
3553                   MO_F64_Sin    -> (False, FSLIT("sin"))
3554                   MO_F64_Cos    -> (False, FSLIT("cos"))
3555                   MO_F64_Tan    -> (False, FSLIT("tan"))
3556
3557                   MO_F64_Asin   -> (False, FSLIT("asin"))
3558                   MO_F64_Acos   -> (False, FSLIT("acos"))
3559                   MO_F64_Atan   -> (False, FSLIT("atan"))
3560
3561                   MO_F64_Sinh   -> (False, FSLIT("sinh"))
3562                   MO_F64_Cosh   -> (False, FSLIT("cosh"))
3563                   MO_F64_Tanh   -> (False, FSLIT("tanh"))
3564
3565                   other -> pprPanic "outOfLineFloatOp(sparc) "
3566                                 (pprCallishMachOp mop)
3567
3568 #endif /* sparc_TARGET_ARCH */
3569
3570 #if powerpc_TARGET_ARCH
3571
3572 #if darwin_TARGET_OS || linux_TARGET_OS
3573 {-
3574     The PowerPC calling convention for Darwin/Mac OS X
3575     is described in Apple's document
3576     "Inside Mac OS X - Mach-O Runtime Architecture".
3577
3578     PowerPC Linux uses the System V Release 4 Calling Convention
3579     for PowerPC. It is described in the
3580     "System V Application Binary Interface PowerPC Processor Supplement".
3581
3582     Both conventions are similar:
3583     Parameters may be passed in general-purpose registers starting at r3, in
3584     floating point registers starting at f1, or on the stack.
3585
3586     But there are substantial differences:
3587     * The number of registers used for parameter passing and the exact set of
3588       nonvolatile registers differs (see MachRegs.lhs).
3589     * On Darwin, stack space is always reserved for parameters, even if they are
3590       passed in registers. The called routine may choose to save parameters from
3591       registers to the corresponding space on the stack.
3592     * On Darwin, a corresponding amount of GPRs is skipped when a floating point
3593       parameter is passed in an FPR.
3594     * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
3595       starting with an odd-numbered GPR. It may skip a GPR to achieve this.
3596       Darwin just treats an I64 like two separate I32s (high word first).
3597     * I64 and F64 arguments are 8-byte aligned on the stack for SysV, but only
3598       4-byte aligned like everything else on Darwin.
3599     * The SysV spec claims that F32 is represented as F64 on the stack. GCC on
3600       PowerPC Linux does not agree, so neither do we.
3601
3602     According to both conventions, The parameter area should be part of the
3603     caller's stack frame, allocated in the caller's prologue code (large enough
3604     to hold the parameter lists for all called routines). The NCG already
3605     uses the stack for register spilling, leaving 64 bytes free at the top.
3606     If we need a larger parameter area than that, we just allocate a new stack
3607     frame just before ccalling.
3608 -}
3609
3610
3611 genCCall (CmmPrim MO_WriteBarrier) _ _ _
3612  = return $ unitOL LWSYNC
3613
3614 genCCall target dest_regs argsAndHints vols
3615   = ASSERT (not $ any (`elem` [I8,I16]) argReps)
3616         -- we rely on argument promotion in the codeGen
3617     do
3618         (finalStack,passArgumentsCode,usedRegs) <- passArguments
3619                                                         (zip args argReps)
3620                                                         allArgRegs allFPArgRegs
3621                                                         initialStackOffset
3622                                                         (toOL []) []
3623
3624         (labelOrExpr, reduceToF32) <- case target of
3625             CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
3626             CmmForeignCall expr conv -> return  (Right expr, False)
3627             CmmPrim mop -> outOfLineFloatOp mop
3628
3629         let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
3630             codeAfter = move_sp_up finalStack `appOL` moveResult reduceToF32
3631
3632         case labelOrExpr of
3633             Left lbl -> do
3634                 return (         codeBefore
3635                         `snocOL` BL lbl usedRegs
3636                         `appOL`  codeAfter)
3637             Right dyn -> do
3638                 (dynReg, dynCode) <- getSomeReg dyn
3639                 return (         dynCode
3640                         `snocOL` MTCTR dynReg
3641                         `appOL`  codeBefore
3642                         `snocOL` BCTRL usedRegs
3643                         `appOL`  codeAfter)
3644     where
3645 #if darwin_TARGET_OS
3646         initialStackOffset = 24
3647             -- size of linkage area + size of arguments, in bytes
3648         stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
3649                                        map machRepByteWidth argReps
3650 #elif linux_TARGET_OS
3651         initialStackOffset = 8
3652         stackDelta finalStack = roundTo 16 finalStack
3653 #endif
3654         args = map fst argsAndHints
3655         argReps = map cmmExprRep args
3656
3657         roundTo a x | x `mod` a == 0 = x
3658                     | otherwise = x + a - (x `mod` a)
3659
3660         move_sp_down finalStack
3661                | delta > 64 =
3662                         toOL [STU I32 sp (AddrRegImm sp (ImmInt (-delta))),
3663                               DELTA (-delta)]
3664                | otherwise = nilOL
3665                where delta = stackDelta finalStack
3666         move_sp_up finalStack
3667                | delta > 64 =
3668                         toOL [ADD sp sp (RIImm (ImmInt delta)),
3669                               DELTA 0]
3670                | otherwise = nilOL
3671                where delta = stackDelta finalStack
3672
3673
3674         passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
3675         passArguments ((arg,I64):args) gprs fprs stackOffset
3676                accumCode accumUsed =
3677             do
3678                 ChildCode64 code vr_lo <- iselExpr64 arg
3679                 let vr_hi = getHiVRegFromLo vr_lo
3680
3681 #if darwin_TARGET_OS
3682                 passArguments args
3683                               (drop 2 gprs)
3684                               fprs
3685                               (stackOffset+8)
3686                               (accumCode `appOL` code
3687                                     `snocOL` storeWord vr_hi gprs stackOffset
3688                                     `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
3689                               ((take 2 gprs) ++ accumUsed)
3690             where
3691                 storeWord vr (gpr:_) offset = MR gpr vr
3692                 storeWord vr [] offset = ST I32 vr (AddrRegImm sp (ImmInt offset))
3693
3694 #elif linux_TARGET_OS
3695                 let stackOffset' = roundTo 8 stackOffset
3696                     stackCode = accumCode `appOL` code
3697                         `snocOL` ST I32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
3698                         `snocOL` ST I32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
3699                     regCode hireg loreg =
3700                         accumCode `appOL` code
3701                             `snocOL` MR hireg vr_hi
3702                             `snocOL` MR loreg vr_lo
3703
3704                 case gprs of
3705                     hireg : loreg : regs | even (length gprs) ->
3706                         passArguments args regs fprs stackOffset
3707                                       (regCode hireg loreg) (hireg : loreg : accumUsed)
3708                     _skipped : hireg : loreg : regs ->
3709                         passArguments args regs fprs stackOffset
3710                                       (regCode hireg loreg) (hireg : loreg : accumUsed)
3711                     _ -> -- only one or no regs left
3712                         passArguments args [] fprs (stackOffset'+8)
3713                                       stackCode accumUsed
3714 #endif
3715
3716         passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
3717             | reg : _ <- regs = do
3718                 register <- getRegister arg
3719                 let code = case register of
3720                             Fixed _ freg fcode -> fcode `snocOL` MR reg freg
3721                             Any _ acode -> acode reg
3722                 passArguments args
3723                               (drop nGprs gprs)
3724                               (drop nFprs fprs)
3725 #if darwin_TARGET_OS
3726         -- The Darwin ABI requires that we reserve stack slots for register parameters
3727                               (stackOffset + stackBytes)
3728 #elif linux_TARGET_OS
3729         -- ... the SysV ABI doesn't.
3730                               stackOffset
3731 #endif
3732                               (accumCode `appOL` code)
3733                               (reg : accumUsed)
3734             | otherwise = do
3735                 (vr, code) <- getSomeReg arg
3736                 passArguments args
3737                               (drop nGprs gprs)
3738                               (drop nFprs fprs)
3739                               (stackOffset' + stackBytes)
3740                               (accumCode `appOL` code `snocOL` ST rep vr stackSlot)
3741                               accumUsed
3742             where
3743 #if darwin_TARGET_OS
3744         -- stackOffset is at least 4-byte aligned
3745         -- The Darwin ABI is happy with that.
3746                 stackOffset' = stackOffset
3747 #else
3748         -- ... the SysV ABI requires 8-byte alignment for doubles.
3749                 stackOffset' | rep == F64 = roundTo 8 stackOffset
3750                              | otherwise  =           stackOffset
3751 #endif
3752                 stackSlot = AddrRegImm sp (ImmInt stackOffset')
3753                 (nGprs, nFprs, stackBytes, regs) = case rep of
3754                     I32 -> (1, 0, 4, gprs)
3755 #if darwin_TARGET_OS
3756         -- The Darwin ABI requires that we skip a corresponding number of GPRs when
3757         -- we use the FPRs.
3758                     F32 -> (1, 1, 4, fprs)
3759                     F64 -> (2, 1, 8, fprs)
3760 #elif linux_TARGET_OS
3761         -- ... the SysV ABI doesn't.
3762                     F32 -> (0, 1, 4, fprs)
3763                     F64 -> (0, 1, 8, fprs)
3764 #endif
3765
3766         moveResult reduceToF32 =
3767             case dest_regs of
3768                 [] -> nilOL
3769                 [(dest, _hint)]
3770                     | reduceToF32 && rep == F32 -> unitOL (FRSP r_dest f1)
3771                     | rep == F32 || rep == F64 -> unitOL (MR r_dest f1)
3772                     | rep == I64 -> toOL [MR (getHiVRegFromLo r_dest) r3,
3773                                           MR r_dest r4]
3774                     | otherwise -> unitOL (MR r_dest r3)
3775                     where rep = cmmRegRep dest
3776                           r_dest = getRegisterReg dest
3777
3778         outOfLineFloatOp mop =
3779             do
3780                 mopExpr <- cmmMakeDynamicReference addImportNat CallReference $
3781                               mkForeignLabel functionName Nothing True
3782                 let mopLabelOrExpr = case mopExpr of
3783                         CmmLit (CmmLabel lbl) -> Left lbl
3784                         _ -> Right mopExpr
3785                 return (mopLabelOrExpr, reduce)
3786             where
3787                 (functionName, reduce) = case mop of
3788                     MO_F32_Exp   -> (FSLIT("exp"), True)
3789                     MO_F32_Log   -> (FSLIT("log"), True)
3790                     MO_F32_Sqrt  -> (FSLIT("sqrt"), True)
3791
3792                     MO_F32_Sin   -> (FSLIT("sin"), True)
3793                     MO_F32_Cos   -> (FSLIT("cos"), True)
3794                     MO_F32_Tan   -> (FSLIT("tan"), True)
3795
3796                     MO_F32_Asin  -> (FSLIT("asin"), True)
3797                     MO_F32_Acos  -> (FSLIT("acos"), True)
3798                     MO_F32_Atan  -> (FSLIT("atan"), True)
3799
3800                     MO_F32_Sinh  -> (FSLIT("sinh"), True)
3801                     MO_F32_Cosh  -> (FSLIT("cosh"), True)
3802                     MO_F32_Tanh  -> (FSLIT("tanh"), True)
3803                     MO_F32_Pwr   -> (FSLIT("pow"), True)
3804
3805                     MO_F64_Exp   -> (FSLIT("exp"), False)
3806                     MO_F64_Log   -> (FSLIT("log"), False)
3807                     MO_F64_Sqrt  -> (FSLIT("sqrt"), False)
3808
3809                     MO_F64_Sin   -> (FSLIT("sin"), False)
3810                     MO_F64_Cos   -> (FSLIT("cos"), False)
3811                     MO_F64_Tan   -> (FSLIT("tan"), False)
3812
3813                     MO_F64_Asin  -> (FSLIT("asin"), False)
3814                     MO_F64_Acos  -> (FSLIT("acos"), False)
3815                     MO_F64_Atan  -> (FSLIT("atan"), False)
3816
3817                     MO_F64_Sinh  -> (FSLIT("sinh"), False)
3818                     MO_F64_Cosh  -> (FSLIT("cosh"), False)
3819                     MO_F64_Tanh  -> (FSLIT("tanh"), False)
3820                     MO_F64_Pwr   -> (FSLIT("pow"), False)
3821                     other -> pprPanic "genCCall(ppc): unknown callish op"
3822                                     (pprCallishMachOp other)
3823
3824 #endif /* darwin_TARGET_OS || linux_TARGET_OS */
3825
3826 #endif /* powerpc_TARGET_ARCH */
3827
3828
3829 -- -----------------------------------------------------------------------------
3830 -- Generating a table-branch
3831
3832 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
3833
3834 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3835 genSwitch expr ids
3836   | opt_PIC
3837   = do
3838         (reg,e_code) <- getSomeReg expr
3839         lbl <- getNewLabelNat
3840         dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
3841         (tableReg,t_code) <- getSomeReg $ dynRef
3842         let
3843             jumpTable = map jumpTableEntryRel ids
3844
3845             jumpTableEntryRel Nothing
3846                 = CmmStaticLit (CmmInt 0 wordRep)
3847             jumpTableEntryRel (Just (BlockId id))
3848                 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3849                 where blockLabel = mkAsmTempLabel id
3850
3851             op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
3852                                        (EAIndex reg wORD_SIZE) (ImmInt 0))
3853
3854 #if x86_64_TARGET_ARCH && darwin_TARGET_OS
3855     -- on Mac OS X/x86_64, put the jump table in the text section
3856     -- to work around a limitation of the linker.
3857     -- ld64 is unable to handle the relocations for
3858     --     .quad L1 - L0
3859     -- if L0 is not preceded by a non-anonymous label in its section.
3860
3861             code = e_code `appOL` t_code `appOL` toOL [
3862                             ADD wordRep op (OpReg tableReg),
3863                             JMP_TBL (OpReg tableReg) [ id | Just id <- ids ],
3864                             LDATA Text (CmmDataLabel lbl : jumpTable)
3865                     ]
3866 #else
3867             code = e_code `appOL` t_code `appOL` toOL [
3868                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3869                             ADD wordRep op (OpReg tableReg),
3870                             JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
3871                     ]
3872 #endif
3873         return code
3874   | otherwise
3875   = do
3876         (reg,e_code) <- getSomeReg expr
3877         lbl <- getNewLabelNat
3878         let
3879             jumpTable = map jumpTableEntry ids
3880             op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
3881             code = e_code `appOL` toOL [
3882                     LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3883                     JMP_TBL op [ id | Just id <- ids ]
3884                  ]
3885         -- in
3886         return code
3887 #elif powerpc_TARGET_ARCH
3888 genSwitch expr ids
3889   | opt_PIC
3890   = do
3891         (reg,e_code) <- getSomeReg expr
3892         tmp <- getNewRegNat I32
3893         lbl <- getNewLabelNat
3894         dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
3895         (tableReg,t_code) <- getSomeReg $ dynRef
3896         let
3897             jumpTable = map jumpTableEntryRel ids
3898
3899             jumpTableEntryRel Nothing
3900                 = CmmStaticLit (CmmInt 0 wordRep)
3901             jumpTableEntryRel (Just (BlockId id))
3902                 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3903                 where blockLabel = mkAsmTempLabel id
3904
3905             code = e_code `appOL` t_code `appOL` toOL [
3906                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3907                             SLW tmp reg (RIImm (ImmInt 2)),
3908                             LD I32 tmp (AddrRegReg tableReg tmp),
3909                             ADD tmp tmp (RIReg tableReg),
3910                             MTCTR tmp,
3911                             BCTR [ id | Just id <- ids ]
3912                     ]
3913         return code
3914   | otherwise
3915   = do
3916         (reg,e_code) <- getSomeReg expr
3917         tmp <- getNewRegNat I32
3918         lbl <- getNewLabelNat
3919         let
3920             jumpTable = map jumpTableEntry ids
3921
3922             code = e_code `appOL` toOL [
3923                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3924                             SLW tmp reg (RIImm (ImmInt 2)),
3925                             ADDIS tmp tmp (HA (ImmCLbl lbl)),
3926                             LD I32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
3927                             MTCTR tmp,
3928                             BCTR [ id | Just id <- ids ]
3929                     ]
3930         return code
3931 #else
3932 genSwitch expr ids = panic "ToDo: genSwitch"
3933 #endif
3934
3935 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordRep)
3936 jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
3937     where blockLabel = mkAsmTempLabel id
3938
3939 -- -----------------------------------------------------------------------------
3940 -- Support bits
3941 -- -----------------------------------------------------------------------------
3942
3943
3944 -- -----------------------------------------------------------------------------
3945 -- 'condIntReg' and 'condFltReg': condition codes into registers
3946
3947 -- Turn those condition codes into integers now (when they appear on
3948 -- the right hand side of an assignment).
3949 --
3950 -- (If applicable) Do not fill the delay slots here; you will confuse the
3951 -- register allocator.
3952
3953 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
3954
3955 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3956
3957 #if alpha_TARGET_ARCH
3958 condIntReg = panic "MachCode.condIntReg (not on Alpha)"
3959 condFltReg = panic "MachCode.condFltReg (not on Alpha)"
3960 #endif /* alpha_TARGET_ARCH */
3961
3962 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3963
3964 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3965
3966 condIntReg cond x y = do
3967   CondCode _ cond cond_code <- condIntCode cond x y
3968   tmp <- getNewRegNat I8
3969   let
3970         code dst = cond_code `appOL` toOL [
3971                     SETCC cond (OpReg tmp),
3972                     MOVZxL I8 (OpReg tmp) (OpReg dst)
3973                   ]
3974   -- in
3975   return (Any I32 code)
3976
3977 #endif
3978
3979 #if i386_TARGET_ARCH
3980
3981 condFltReg cond x y = do
3982   CondCode _ cond cond_code <- condFltCode cond x y
3983   tmp <- getNewRegNat I8
3984   let
3985         code dst = cond_code `appOL` toOL [
3986                     SETCC cond (OpReg tmp),
3987                     MOVZxL I8 (OpReg tmp) (OpReg dst)
3988                   ]
3989   -- in
3990   return (Any I32 code)
3991
3992 #endif
3993
3994 #if x86_64_TARGET_ARCH
3995
3996 condFltReg cond x y = do
3997   CondCode _ cond cond_code <- condFltCode cond x y
3998   tmp1 <- getNewRegNat wordRep
3999   tmp2 <- getNewRegNat wordRep
4000   let
4001         -- We have to worry about unordered operands (eg. comparisons
4002         -- against NaN).  If the operands are unordered, the comparison
4003         -- sets the parity flag, carry flag and zero flag.
4004         -- All comparisons are supposed to return false for unordered
4005         -- operands except for !=, which returns true.
4006         --
4007         -- Optimisation: we don't have to test the parity flag if we
4008         -- know the test has already excluded the unordered case: eg >
4009         -- and >= test for a zero carry flag, which can only occur for
4010         -- ordered operands.
4011         --
4012         -- ToDo: by reversing comparisons we could avoid testing the
4013         -- parity flag in more cases.
4014
4015         code dst =
4016            cond_code `appOL`
4017              (case cond of
4018                 NE  -> or_unordered dst
4019                 GU  -> plain_test   dst
4020                 GEU -> plain_test   dst
4021                 _   -> and_ordered  dst)
4022
4023         plain_test dst = toOL [
4024                     SETCC cond (OpReg tmp1),
4025                     MOVZxL I8 (OpReg tmp1) (OpReg dst)
4026                  ]
4027         or_unordered dst = toOL [
4028                     SETCC cond (OpReg tmp1),
4029                     SETCC PARITY (OpReg tmp2),
4030                     OR I8 (OpReg tmp1) (OpReg tmp2),
4031                     MOVZxL I8 (OpReg tmp2) (OpReg dst)
4032                   ]
4033         and_ordered dst = toOL [
4034                     SETCC cond (OpReg tmp1),
4035                     SETCC NOTPARITY (OpReg tmp2),
4036                     AND I8 (OpReg tmp1) (OpReg tmp2),
4037                     MOVZxL I8 (OpReg tmp2) (OpReg dst)
4038                   ]
4039   -- in
4040   return (Any I32 code)
4041
4042 #endif
4043
4044 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4045
4046 #if sparc_TARGET_ARCH
4047
4048 condIntReg EQQ x (CmmLit (CmmInt 0 d)) = do
4049     (src, code) <- getSomeReg x
4050     tmp <- getNewRegNat I32
4051     let
4052         code__2 dst = code `appOL` toOL [
4053             SUB False True g0 (RIReg src) g0,
4054             SUB True False g0 (RIImm (ImmInt (-1))) dst]
4055     return (Any I32 code__2)
4056
4057 condIntReg EQQ x y = do
4058     (src1, code1) <- getSomeReg x
4059     (src2, code2) <- getSomeReg y
4060     tmp1 <- getNewRegNat I32
4061     tmp2 <- getNewRegNat I32
4062     let
4063         code__2 dst = code1 `appOL` code2 `appOL` toOL [
4064             XOR False src1 (RIReg src2) dst,
4065             SUB False True g0 (RIReg dst) g0,
4066             SUB True False g0 (RIImm (ImmInt (-1))) dst]
4067     return (Any I32 code__2)
4068
4069 condIntReg NE x (CmmLit (CmmInt 0 d)) = do
4070     (src, code) <- getSomeReg x
4071     tmp <- getNewRegNat I32
4072     let
4073         code__2 dst = code `appOL` toOL [
4074             SUB False True g0 (RIReg src) g0,
4075             ADD True False g0 (RIImm (ImmInt 0)) dst]
4076     return (Any I32 code__2)
4077
4078 condIntReg NE x y = do
4079     (src1, code1) <- getSomeReg x
4080     (src2, code2) <- getSomeReg y
4081     tmp1 <- getNewRegNat I32
4082     tmp2 <- getNewRegNat I32
4083     let
4084         code__2 dst = code1 `appOL` code2 `appOL` toOL [
4085             XOR False src1 (RIReg src2) dst,
4086             SUB False True g0 (RIReg dst) g0,
4087             ADD True False g0 (RIImm (ImmInt 0)) dst]
4088     return (Any I32 code__2)
4089
4090 condIntReg cond x y = do
4091     BlockId lbl1 <- getBlockIdNat
4092     BlockId lbl2 <- getBlockIdNat
4093     CondCode _ cond cond_code <- condIntCode cond x y
4094     let
4095         code__2 dst = cond_code `appOL` toOL [
4096             BI cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
4097             OR False g0 (RIImm (ImmInt 0)) dst,
4098             BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
4099             NEWBLOCK (BlockId lbl1),
4100             OR False g0 (RIImm (ImmInt 1)) dst,
4101             NEWBLOCK (BlockId lbl2)]
4102     return (Any I32 code__2)
4103
4104 condFltReg cond x y = do
4105     BlockId lbl1 <- getBlockIdNat
4106     BlockId lbl2 <- getBlockIdNat
4107     CondCode _ cond cond_code <- condFltCode cond x y
4108     let
4109         code__2 dst = cond_code `appOL` toOL [
4110             NOP,
4111             BF cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
4112             OR False g0 (RIImm (ImmInt 0)) dst,
4113             BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
4114             NEWBLOCK (BlockId lbl1),
4115             OR False g0 (RIImm (ImmInt 1)) dst,
4116             NEWBLOCK (BlockId lbl2)]
4117     return (Any I32 code__2)
4118
4119 #endif /* sparc_TARGET_ARCH */
4120
4121 #if powerpc_TARGET_ARCH
4122 condReg getCond = do
4123     lbl1 <- getBlockIdNat
4124     lbl2 <- getBlockIdNat
4125     CondCode _ cond cond_code <- getCond
4126     let
4127 {-        code dst = cond_code `appOL` toOL [
4128                 BCC cond lbl1,
4129                 LI dst (ImmInt 0),
4130                 BCC ALWAYS lbl2,
4131                 NEWBLOCK lbl1,
4132                 LI dst (ImmInt 1),
4133                 BCC ALWAYS lbl2,
4134                 NEWBLOCK lbl2
4135             ]-}
4136         code dst = cond_code
4137             `appOL` negate_code
4138             `appOL` toOL [
4139                 MFCR dst,
4140                 RLWINM dst dst (bit + 1) 31 31
4141             ]
4142
4143         negate_code | do_negate = unitOL (CRNOR bit bit bit)
4144                     | otherwise = nilOL
4145
4146         (bit, do_negate) = case cond of
4147             LTT -> (0, False)
4148             LE  -> (1, True)
4149             EQQ -> (2, False)
4150             GE  -> (0, True)
4151             GTT -> (1, False)
4152
4153             NE  -> (2, True)
4154
4155             LU  -> (0, False)
4156             LEU -> (1, True)
4157             GEU -> (0, True)
4158             GU  -> (1, False)
4159
4160     return (Any I32 code)
4161
4162 condIntReg cond x y = condReg (condIntCode cond x y)
4163 condFltReg cond x y = condReg (condFltCode cond x y)
4164 #endif /* powerpc_TARGET_ARCH */
4165
4166
4167 -- -----------------------------------------------------------------------------
4168 -- 'trivial*Code': deal with trivial instructions
4169
4170 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
4171 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
4172 -- Only look for constants on the right hand side, because that's
4173 -- where the generic optimizer will have put them.
4174
4175 -- Similarly, for unary instructions, we don't have to worry about
4176 -- matching an StInt as the argument, because genericOpt will already
4177 -- have handled the constant-folding.
4178
4179 trivialCode
4180     :: MachRep
4181     -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
4182       ,IF_ARCH_i386 ((Operand -> Operand -> Instr)
4183                      -> Maybe (Operand -> Operand -> Instr)
4184       ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr)
4185                      -> Maybe (Operand -> Operand -> Instr)
4186       ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
4187       ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
4188       ,)))))
4189     -> CmmExpr -> CmmExpr -- the two arguments
4190     -> NatM Register
4191
4192 #ifndef powerpc_TARGET_ARCH
4193 trivialFCode
4194     :: MachRep
4195     -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
4196       ,IF_ARCH_sparc((MachRep -> Reg -> Reg -> Reg -> Instr)
4197       ,IF_ARCH_i386 ((MachRep -> Reg -> Reg -> Reg -> Instr)
4198       ,IF_ARCH_x86_64 ((MachRep -> Operand -> Operand -> Instr)
4199       ,))))
4200     -> CmmExpr -> CmmExpr -- the two arguments
4201     -> NatM Register
4202 #endif
4203
4204 trivialUCode
4205     :: MachRep
4206     -> IF_ARCH_alpha((RI -> Reg -> Instr)
4207       ,IF_ARCH_i386 ((Operand -> Instr)
4208       ,IF_ARCH_x86_64 ((Operand -> Instr)
4209       ,IF_ARCH_sparc((RI -> Reg -> Instr)
4210       ,IF_ARCH_powerpc((Reg -> Reg -> Instr)
4211       ,)))))
4212     -> CmmExpr  -- the one argument
4213     -> NatM Register
4214
4215 #ifndef powerpc_TARGET_ARCH
4216 trivialUFCode
4217     :: MachRep
4218     -> IF_ARCH_alpha((Reg -> Reg -> Instr)
4219       ,IF_ARCH_i386 ((Reg -> Reg -> Instr)
4220       ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr)
4221       ,IF_ARCH_sparc((Reg -> Reg -> Instr)
4222       ,))))
4223     -> CmmExpr -- the one argument
4224     -> NatM Register
4225 #endif
4226
4227 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4228
4229 #if alpha_TARGET_ARCH
4230
4231 trivialCode instr x (StInt y)
4232   | fits8Bits y
4233   = getRegister x               `thenNat` \ register ->
4234     getNewRegNat IntRep         `thenNat` \ tmp ->
4235     let
4236         code = registerCode register tmp
4237         src1 = registerName register tmp
4238         src2 = ImmInt (fromInteger y)
4239         code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst)
4240     in
4241     return (Any IntRep code__2)
4242
4243 trivialCode instr x y
4244   = getRegister x               `thenNat` \ register1 ->
4245     getRegister y               `thenNat` \ register2 ->
4246     getNewRegNat IntRep         `thenNat` \ tmp1 ->
4247     getNewRegNat IntRep         `thenNat` \ tmp2 ->
4248     let
4249         code1 = registerCode register1 tmp1 []
4250         src1  = registerName register1 tmp1
4251         code2 = registerCode register2 tmp2 []
4252         src2  = registerName register2 tmp2
4253         code__2 dst = asmSeqThen [code1, code2] .
4254                      mkSeqInstr (instr src1 (RIReg src2) dst)
4255     in
4256     return (Any IntRep code__2)
4257
4258 ------------
4259 trivialUCode instr x
4260   = getRegister x               `thenNat` \ register ->
4261     getNewRegNat IntRep         `thenNat` \ tmp ->
4262     let
4263         code = registerCode register tmp
4264         src  = registerName register tmp
4265         code__2 dst = code . mkSeqInstr (instr (RIReg src) dst)
4266     in
4267     return (Any IntRep code__2)
4268
4269 ------------
4270 trivialFCode _ instr x y
4271   = getRegister x               `thenNat` \ register1 ->
4272     getRegister y               `thenNat` \ register2 ->
4273     getNewRegNat F64    `thenNat` \ tmp1 ->
4274     getNewRegNat F64    `thenNat` \ tmp2 ->
4275     let
4276         code1 = registerCode register1 tmp1
4277         src1  = registerName register1 tmp1
4278
4279         code2 = registerCode register2 tmp2
4280         src2  = registerName register2 tmp2
4281
4282         code__2 dst = asmSeqThen [code1 [], code2 []] .
4283                       mkSeqInstr (instr src1 src2 dst)
4284     in
4285     return (Any F64 code__2)
4286
4287 trivialUFCode _ instr x
4288   = getRegister x               `thenNat` \ register ->
4289     getNewRegNat F64    `thenNat` \ tmp ->
4290     let
4291         code = registerCode register tmp
4292         src  = registerName register tmp
4293         code__2 dst = code . mkSeqInstr (instr src dst)
4294     in
4295     return (Any F64 code__2)
4296
4297 #endif /* alpha_TARGET_ARCH */
4298
4299 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4300
4301 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4302
4303 {-
4304 The Rules of the Game are:
4305
4306 * You cannot assume anything about the destination register dst;
4307   it may be anything, including a fixed reg.
4308
4309 * You may compute an operand into a fixed reg, but you may not
4310   subsequently change the contents of that fixed reg.  If you
4311   want to do so, first copy the value either to a temporary
4312   or into dst.  You are free to modify dst even if it happens
4313   to be a fixed reg -- that's not your problem.
4314
4315 * You cannot assume that a fixed reg will stay live over an
4316   arbitrary computation.  The same applies to the dst reg.
4317
4318 * Temporary regs obtained from getNewRegNat are distinct from
4319   each other and from all other regs, and stay live over
4320   arbitrary computations.
4321
4322 --------------------
4323
4324 SDM's version of The Rules:
4325
4326 * If getRegister returns Any, that means it can generate correct
4327   code which places the result in any register, period.  Even if that
4328   register happens to be read during the computation.
4329
4330   Corollary #1: this means that if you are generating code for an
4331   operation with two arbitrary operands, you cannot assign the result
4332   of the first operand into the destination register before computing
4333   the second operand.  The second operand might require the old value
4334   of the destination register.
4335
4336   Corollary #2: A function might be able to generate more efficient
4337   code if it knows the destination register is a new temporary (and
4338   therefore not read by any of the sub-computations).
4339
4340 * If getRegister returns Any, then the code it generates may modify only:
4341         (a) fresh temporaries
4342         (b) the destination register
4343         (c) known registers (eg. %ecx is used by shifts)
4344   In particular, it may *not* modify global registers, unless the global
4345   register happens to be the destination register.
4346 -}
4347
4348 trivialCode rep instr (Just revinstr) (CmmLit lit_a) b
4349   | not (is64BitLit lit_a) = do
4350   b_code <- getAnyReg b
4351   let
4352        code dst
4353          = b_code dst `snocOL`
4354            revinstr (OpImm (litToImm lit_a)) (OpReg dst)
4355   -- in
4356   return (Any rep code)
4357
4358 trivialCode rep instr maybe_revinstr a b = genTrivialCode rep instr a b
4359
4360 -- This is re-used for floating pt instructions too.
4361 genTrivialCode rep instr a b = do
4362   (b_op, b_code) <- getNonClobberedOperand b
4363   a_code <- getAnyReg a
4364   tmp <- getNewRegNat rep
4365   let
4366      -- We want the value of b to stay alive across the computation of a.
4367      -- But, we want to calculate a straight into the destination register,
4368      -- because the instruction only has two operands (dst := dst `op` src).
4369      -- The troublesome case is when the result of b is in the same register
4370      -- as the destination reg.  In this case, we have to save b in a
4371      -- new temporary across the computation of a.
4372      code dst
4373         | dst `regClashesWithOp` b_op =
4374                 b_code `appOL`
4375                 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
4376                 a_code dst `snocOL`
4377                 instr (OpReg tmp) (OpReg dst)
4378         | otherwise =
4379                 b_code `appOL`
4380                 a_code dst `snocOL`
4381                 instr b_op (OpReg dst)
4382   -- in
4383   return (Any rep code)
4384
4385 reg `regClashesWithOp` OpReg reg2   = reg == reg2
4386 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
4387 reg `regClashesWithOp` _            = False
4388
4389 -----------
4390
4391 trivialUCode rep instr x = do
4392   x_code <- getAnyReg x
4393   let
4394      code dst =
4395         x_code dst `snocOL`
4396         instr (OpReg dst)
4397   -- in
4398   return (Any rep code)
4399
4400 -----------
4401
4402 #if i386_TARGET_ARCH
4403
4404 trivialFCode pk instr x y = do
4405   (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
4406   (y_reg, y_code) <- getSomeReg y
4407   let
4408      code dst =
4409         x_code `appOL`
4410         y_code `snocOL`
4411         instr pk x_reg y_reg dst
4412   -- in
4413   return (Any pk code)
4414
4415 #endif
4416
4417 #if x86_64_TARGET_ARCH
4418
4419 trivialFCode pk instr x y = genTrivialCode  pk (instr pk) x y
4420
4421 #endif
4422
4423 -------------
4424
4425 trivialUFCode rep instr x = do
4426   (x_reg, x_code) <- getSomeReg x
4427   let
4428      code dst =
4429         x_code `snocOL`
4430         instr x_reg dst
4431   -- in
4432   return (Any rep code)
4433
4434 #endif /* i386_TARGET_ARCH */
4435
4436 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4437
4438 #if sparc_TARGET_ARCH
4439
4440 trivialCode pk instr x (CmmLit (CmmInt y d))
4441   | fits13Bits y
4442   = do
4443       (src1, code) <- getSomeReg x
4444       tmp <- getNewRegNat I32
4445       let
4446         src2 = ImmInt (fromInteger y)
4447         code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
4448       return (Any I32 code__2)
4449
4450 trivialCode pk instr x y = do
4451     (src1, code1) <- getSomeReg x
4452     (src2, code2) <- getSomeReg y
4453     tmp1 <- getNewRegNat I32
4454     tmp2 <- getNewRegNat I32
4455     let
4456         code__2 dst = code1 `appOL` code2 `snocOL`
4457                       instr src1 (RIReg src2) dst
4458     return (Any I32 code__2)
4459
4460 ------------
4461 trivialFCode pk instr x y = do
4462     (src1, code1) <- getSomeReg x
4463     (src2, code2) <- getSomeReg y
4464     tmp1 <- getNewRegNat (cmmExprRep x)
4465     tmp2 <- getNewRegNat (cmmExprRep y)
4466     tmp <- getNewRegNat F64
4467     let
4468         promote x = FxTOy F32 F64 x tmp
4469
4470         pk1   = cmmExprRep x
4471         pk2   = cmmExprRep y
4472
4473         code__2 dst =
4474                 if pk1 == pk2 then
4475                     code1 `appOL` code2 `snocOL`
4476                     instr pk src1 src2 dst
4477                 else if pk1 == F32 then
4478                     code1 `snocOL` promote src1 `appOL` code2 `snocOL`
4479                     instr F64 tmp src2 dst
4480                 else
4481                     code1 `appOL` code2 `snocOL` promote src2 `snocOL`
4482                     instr F64 src1 tmp dst
4483     return (Any (if pk1 == pk2 then pk1 else F64) code__2)
4484
4485 ------------
4486 trivialUCode pk instr x = do
4487     (src, code) <- getSomeReg x
4488     tmp <- getNewRegNat pk
4489     let
4490         code__2 dst = code `snocOL` instr (RIReg src) dst
4491     return (Any pk code__2)
4492
4493 -------------
4494 trivialUFCode pk instr x = do
4495     (src, code) <- getSomeReg x
4496     tmp <- getNewRegNat pk
4497     let
4498         code__2 dst = code `snocOL` instr src dst
4499     return (Any pk code__2)
4500
4501 #endif /* sparc_TARGET_ARCH */
4502
4503 #if powerpc_TARGET_ARCH
4504
4505 {-
4506 Wolfgang's PowerPC version of The Rules:
4507
4508 A slightly modified version of The Rules to take advantage of the fact
4509 that PowerPC instructions work on all registers and don't implicitly
4510 clobber any fixed registers.
4511
4512 * The only expression for which getRegister returns Fixed is (CmmReg reg).
4513
4514 * If getRegister returns Any, then the code it generates may modify only:
4515         (a) fresh temporaries
4516         (b) the destination register
4517   It may *not* modify global registers, unless the global
4518   register happens to be the destination register.
4519   It may not clobber any other registers. In fact, only ccalls clobber any
4520   fixed registers.
4521   Also, it may not modify the counter register (used by genCCall).
4522
4523   Corollary: If a getRegister for a subexpression returns Fixed, you need
4524   not move it to a fresh temporary before evaluating the next subexpression.
4525   The Fixed register won't be modified.
4526   Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
4527
4528 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
4529   the value of the destination register.
4530 -}
4531
4532 trivialCode rep signed instr x (CmmLit (CmmInt y _))
4533     | Just imm <- makeImmediate rep signed y
4534     = do
4535         (src1, code1) <- getSomeReg x
4536         let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
4537         return (Any rep code)
4538
4539 trivialCode rep signed instr x y = do
4540     (src1, code1) <- getSomeReg x
4541     (src2, code2) <- getSomeReg y
4542     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
4543     return (Any rep code)
4544
4545 trivialCodeNoImm :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4546     -> CmmExpr -> CmmExpr -> NatM Register
4547 trivialCodeNoImm rep instr x y = do
4548     (src1, code1) <- getSomeReg x
4549     (src2, code2) <- getSomeReg y
4550     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
4551     return (Any rep code)
4552
4553 trivialUCode rep instr x = do
4554     (src, code) <- getSomeReg x
4555     let code' dst = code `snocOL` instr dst src
4556     return (Any rep code')
4557
4558 -- There is no "remainder" instruction on the PPC, so we have to do
4559 -- it the hard way.
4560 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
4561
4562 remainderCode :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4563     -> CmmExpr -> CmmExpr -> NatM Register
4564 remainderCode rep div x y = do
4565     (src1, code1) <- getSomeReg x
4566     (src2, code2) <- getSomeReg y
4567     let code dst = code1 `appOL` code2 `appOL` toOL [
4568                 div dst src1 src2,
4569                 MULLW dst dst (RIReg src2),
4570                 SUBF dst dst src1
4571             ]
4572     return (Any rep code)
4573
4574 #endif /* powerpc_TARGET_ARCH */
4575
4576
4577 -- -----------------------------------------------------------------------------
4578 --  Coercing to/from integer/floating-point...
4579
4580 -- When going to integer, we truncate (round towards 0).
4581
4582 -- @coerce(Int2FP|FP2Int)@ are more complicated integer/float
4583 -- conversions.  We have to store temporaries in memory to move
4584 -- between the integer and the floating point register sets.
4585
4586 -- @coerceDbl2Flt@ and @coerceFlt2Dbl@ are done this way because we
4587 -- pretend, on sparc at least, that double and float regs are seperate
4588 -- kinds, so the value has to be computed into one kind before being
4589 -- explicitly "converted" to live in the other kind.
4590
4591 coerceInt2FP :: MachRep -> MachRep -> CmmExpr -> NatM Register
4592 coerceFP2Int :: MachRep -> MachRep -> CmmExpr -> NatM Register
4593
4594 #if sparc_TARGET_ARCH
4595 coerceDbl2Flt :: CmmExpr -> NatM Register
4596 coerceFlt2Dbl :: CmmExpr -> NatM Register
4597 #endif
4598
4599 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4600
4601 #if alpha_TARGET_ARCH
4602
4603 coerceInt2FP _ x
4604   = getRegister x               `thenNat` \ register ->
4605     getNewRegNat IntRep         `thenNat` \ reg ->
4606     let
4607         code = registerCode register reg
4608         src  = registerName register reg
4609
4610         code__2 dst = code . mkSeqInstrs [
4611             ST Q src (spRel 0),
4612             LD TF dst (spRel 0),
4613             CVTxy Q TF dst dst]
4614     in
4615     return (Any F64 code__2)
4616
4617 -------------
4618 coerceFP2Int x
4619   = getRegister x               `thenNat` \ register ->
4620     getNewRegNat F64    `thenNat` \ tmp ->
4621     let
4622         code = registerCode register tmp
4623         src  = registerName register tmp
4624
4625         code__2 dst = code . mkSeqInstrs [
4626             CVTxy TF Q src tmp,
4627             ST TF tmp (spRel 0),
4628             LD Q dst (spRel 0)]
4629     in
4630     return (Any IntRep code__2)
4631
4632 #endif /* alpha_TARGET_ARCH */
4633
4634 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4635
4636 #if i386_TARGET_ARCH
4637
4638 coerceInt2FP from to x = do
4639   (x_reg, x_code) <- getSomeReg x
4640   let
4641         opc  = case to of F32 -> GITOF; F64 -> GITOD
4642         code dst = x_code `snocOL` opc x_reg dst
4643         -- ToDo: works for non-I32 reps?
4644   -- in
4645   return (Any to code)
4646
4647 ------------
4648
4649 coerceFP2Int from to x = do
4650   (x_reg, x_code) <- getSomeReg x
4651   let
4652         opc  = case from of F32 -> GFTOI; F64 -> GDTOI
4653         code dst = x_code `snocOL` opc x_reg dst
4654         -- ToDo: works for non-I32 reps?
4655   -- in
4656   return (Any to code)
4657
4658 #endif /* i386_TARGET_ARCH */
4659
4660 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4661
4662 #if x86_64_TARGET_ARCH
4663
4664 coerceFP2Int from to x = do
4665   (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
4666   let
4667         opc  = case from of F32 -> CVTTSS2SIQ; F64 -> CVTTSD2SIQ
4668         code dst = x_code `snocOL` opc x_op dst
4669   -- in
4670   return (Any to code) -- works even if the destination rep is <I32
4671
4672 coerceInt2FP from to x = do
4673   (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
4674   let
4675         opc  = case to of F32 -> CVTSI2SS; F64 -> CVTSI2SD
4676         code dst = x_code `snocOL` opc x_op dst
4677   -- in
4678   return (Any to code) -- works even if the destination rep is <I32
4679
4680 coerceFP2FP :: MachRep -> CmmExpr -> NatM Register
4681 coerceFP2FP to x = do
4682   (x_reg, x_code) <- getSomeReg x
4683   let
4684         opc  = case to of F32 -> CVTSD2SS; F64 -> CVTSS2SD
4685         code dst = x_code `snocOL` opc x_reg dst
4686   -- in
4687   return (Any to code)
4688
4689 #endif
4690
4691 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4692
4693 #if sparc_TARGET_ARCH
4694
4695 coerceInt2FP pk1 pk2 x = do
4696     (src, code) <- getSomeReg x
4697     let
4698         code__2 dst = code `appOL` toOL [
4699             ST pk1 src (spRel (-2)),
4700             LD pk1 (spRel (-2)) dst,
4701             FxTOy pk1 pk2 dst dst]
4702     return (Any pk2 code__2)
4703
4704 ------------
4705 coerceFP2Int pk fprep x = do
4706     (src, code) <- getSomeReg x
4707     reg <- getNewRegNat fprep
4708     tmp <- getNewRegNat pk
4709     let
4710         code__2 dst = ASSERT(fprep == F64 || fprep == F32)
4711             code `appOL` toOL [
4712             FxTOy fprep pk src tmp,
4713             ST pk tmp (spRel (-2)),
4714             LD pk (spRel (-2)) dst]
4715     return (Any pk code__2)
4716
4717 ------------
4718 coerceDbl2Flt x = do
4719     (src, code) <- getSomeReg x
4720     return (Any F32 (\dst -> code `snocOL` FxTOy F64 F32 src dst))
4721
4722 ------------
4723 coerceFlt2Dbl x = do
4724     (src, code) <- getSomeReg x
4725     return (Any F64 (\dst -> code `snocOL` FxTOy F32 F64 src dst))
4726
4727 #endif /* sparc_TARGET_ARCH */
4728
4729 #if powerpc_TARGET_ARCH
4730 coerceInt2FP fromRep toRep x = do
4731     (src, code) <- getSomeReg x
4732     lbl <- getNewLabelNat
4733     itmp <- getNewRegNat I32
4734     ftmp <- getNewRegNat F64
4735     dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
4736     Amode addr addr_code <- getAmode dynRef
4737     let
4738         code' dst = code `appOL` maybe_exts `appOL` toOL [
4739                 LDATA ReadOnlyData
4740                                 [CmmDataLabel lbl,
4741                                  CmmStaticLit (CmmInt 0x43300000 I32),
4742                                  CmmStaticLit (CmmInt 0x80000000 I32)],
4743                 XORIS itmp src (ImmInt 0x8000),
4744                 ST I32 itmp (spRel 3),
4745                 LIS itmp (ImmInt 0x4330),
4746                 ST I32 itmp (spRel 2),
4747                 LD F64 ftmp (spRel 2)
4748             ] `appOL` addr_code `appOL` toOL [
4749                 LD F64 dst addr,
4750                 FSUB F64 dst ftmp dst
4751             ] `appOL` maybe_frsp dst
4752
4753         maybe_exts = case fromRep of
4754                         I8 ->  unitOL $ EXTS I8 src src
4755                         I16 -> unitOL $ EXTS I16 src src
4756                         I32 -> nilOL
4757         maybe_frsp dst = case toRep of
4758                         F32 -> unitOL $ FRSP dst dst
4759                         F64 -> nilOL
4760     return (Any toRep code')
4761
4762 coerceFP2Int fromRep toRep x = do
4763     -- the reps don't really matter: F*->F64 and I32->I* are no-ops
4764     (src, code) <- getSomeReg x
4765     tmp <- getNewRegNat F64
4766     let
4767         code' dst = code `appOL` toOL [
4768                 -- convert to int in FP reg
4769             FCTIWZ tmp src,
4770                 -- store value (64bit) from FP to stack
4771             ST F64 tmp (spRel 2),
4772                 -- read low word of value (high word is undefined)
4773             LD I32 dst (spRel 3)]
4774     return (Any toRep code')
4775 #endif /* powerpc_TARGET_ARCH */
4776
4777
4778 -- -----------------------------------------------------------------------------
4779 -- eXTRA_STK_ARGS_HERE
4780
4781 -- We (allegedly) put the first six C-call arguments in registers;
4782 -- where do we start putting the rest of them?
4783
4784 -- Moved from MachInstrs (SDM):
4785
4786 #if alpha_TARGET_ARCH || sparc_TARGET_ARCH
4787 eXTRA_STK_ARGS_HERE :: Int
4788 eXTRA_STK_ARGS_HERE
4789   = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???))
4790 #endif
4791