compiler/nativeGen/MachCodeGen.hs

   1 -----------------------------------------------------------------------------
   2 --
   3 -- Generating machine code (instruction selection)
   4 --
   5 -- (c) The University of Glasgow 1996-2004
   6 --
   7 -----------------------------------------------------------------------------
   8
   9 -- This is a big module, but, if you pay attention to
  10 -- (a) the sectioning, (b) the type signatures, and
  11 -- (c) the #if blah_TARGET_ARCH} things, the
  12 -- structure should not be too overwhelming.
  13
  14 module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where
  15
  16 #include "HsVersions.h"
  17 #include "nativeGen/NCG.h"
  18 #include "MachDeps.h"
  19
  20 -- NCG stuff:
  21 import MachInstrs
  22 import MachRegs
  23 import NCGMonad
  24 import PositionIndependentCode
  25 import RegAllocInfo ( mkBranchInstr )
  26
  27 -- Our intermediate code:
  28 import PprCmm           ( pprExpr )
  29 import Cmm
  30 import MachOp
  31 import CLabel
  32 import ClosureInfo      ( C_SRT(..) )
  33
  34 -- The rest:
  35 import StaticFlags      ( opt_PIC )
  36 import ForeignCall      ( CCallConv(..) )
  37 import OrdList
  38 import Pretty
  39 import Outputable
  40 import FastString
  41 import FastTypes        ( isFastTrue )
  42 import Constants        ( wORD_SIZE )
  43
  44 #ifdef DEBUG
  45 import Debug.Trace      ( trace )
  46 #endif
  47
  48 import Control.Monad    ( mapAndUnzipM )
  49 import Data.Maybe       ( fromJust )
  50 import Data.Bits
  51 import Data.Word
  52 import Data.Int
  53
  54 -- -----------------------------------------------------------------------------
  55 -- Top-level of the instruction selector
  56
  57 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
  58 -- They are really trees of insns to facilitate fast appending, where a
  59 -- left-to-right traversal (pre-order?) yields the insns in the correct
  60 -- order.
  61
  62 type InstrBlock = OrdList Instr
  63
  64 cmmTopCodeGen :: RawCmmTop -> NatM [NatCmmTop]
  65 cmmTopCodeGen (CmmProc info lab params blocks) = do
  66   (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
  67   picBaseMb <- getPicBaseMaybeNat
  68   let proc = CmmProc info lab params (concat nat_blocks)
  69       tops = proc : concat statics
  70   case picBaseMb of
  71       Just picBase -> initializePicBase picBase tops
  72       Nothing -> return tops
  73
  74 cmmTopCodeGen (CmmData sec dat) = do
  75   return [CmmData sec dat]  -- no translation, we just use CmmStatic
  76
  77 basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop])
  78 basicBlockCodeGen (BasicBlock id stmts) = do
  79   instrs <- stmtsToInstrs stmts
  80   -- code generation may introduce new basic block boundaries, which
  81   -- are indicated by the NEWBLOCK instruction.  We must split up the
  82   -- instruction stream into basic blocks again.  Also, we extract
  83   -- LDATAs here too.
  84   let
  85         (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
  86
  87         mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
  88           = ([], BasicBlock id instrs : blocks, statics)
  89         mkBlocks (LDATA sec dat) (instrs,blocks,statics)
  90           = (instrs, blocks, CmmData sec dat:statics)
  91         mkBlocks instr (instrs,blocks,statics)
  92           = (instr:instrs, blocks, statics)
  93   -- in
  94   return (BasicBlock id top : other_blocks, statics)
  95
  96 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
  97 stmtsToInstrs stmts
  98    = do instrss <- mapM stmtToInstrs stmts
  99         return (concatOL instrss)
 100
 101 stmtToInstrs :: CmmStmt -> NatM InstrBlock
 102 stmtToInstrs stmt = case stmt of
 103     CmmNop         -> return nilOL
 104     CmmComment s   -> return (unitOL (COMMENT s))
 105
 106     CmmAssign reg src
 107       | isFloatingRep kind -> assignReg_FltCode kind reg src
 108 #if WORD_SIZE_IN_BITS==32
 109       | kind == I64        -> assignReg_I64Code      reg src
 110 #endif
 111       | otherwise          -> assignReg_IntCode kind reg src
 112         where kind = cmmRegRep reg
 113
 114     CmmStore addr src
 115       | isFloatingRep kind -> assignMem_FltCode kind addr src
 116 #if WORD_SIZE_IN_BITS==32
 117       | kind == I64      -> assignMem_I64Code      addr src
 118 #endif
 119       | otherwise        -> assignMem_IntCode kind addr src
 120         where kind = cmmExprRep src
 121
 122     CmmCall target result_regs args _
 123        -> genCCall target result_regs args
 124
 125     CmmBranch id          -> genBranch id
 126     CmmCondBranch arg id  -> genCondJump id arg
 127     CmmSwitch arg ids     -> genSwitch arg ids
 128     CmmJump arg params    -> genJump arg
 129
 130 -- -----------------------------------------------------------------------------
 131 -- General things for putting together code sequences
 132
 133 -- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
 134 -- CmmExprs into CmmRegOff?
 135 mangleIndexTree :: CmmExpr -> CmmExpr
 136 mangleIndexTree (CmmRegOff reg off)
 137   = CmmMachOp (MO_Add rep) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) rep)]
 138   where rep = cmmRegRep reg
 139
 140 -- -----------------------------------------------------------------------------
 141 --  Code gen for 64-bit arithmetic on 32-bit platforms
 142
 143 {-
 144 Simple support for generating 64-bit code (ie, 64 bit values and 64
 145 bit assignments) on 32-bit platforms.  Unlike the main code generator
 146 we merely shoot for generating working code as simply as possible, and
 147 pay little attention to code quality.  Specifically, there is no
 148 attempt to deal cleverly with the fixed-vs-floating register
 149 distinction; all values are generated into (pairs of) floating
 150 registers, even if this would mean some redundant reg-reg moves as a
 151 result.  Only one of the VRegUniques is returned, since it will be
 152 of the VRegUniqueLo form, and the upper-half VReg can be determined
 153 by applying getHiVRegFromLo to it.
 154 -}
 155
 156 data ChildCode64        -- a.k.a "Register64"
 157    = ChildCode64
 158         InstrBlock      -- code
 159         Reg             -- the lower 32-bit temporary which contains the
 160                         -- result; use getHiVRegFromLo to find the other
 161                         -- VRegUnique.  Rules of this simplified insn
 162                         -- selection game are therefore that the returned
 163                         -- Reg may be modified
 164
 165 #if WORD_SIZE_IN_BITS==32
 166 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
 167 assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
 168 #endif
 169
 170 #ifndef x86_64_TARGET_ARCH
 171 iselExpr64        :: CmmExpr -> NatM ChildCode64
 172 #endif
 173
 174 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 175
 176 #if i386_TARGET_ARCH
 177
 178 assignMem_I64Code addrTree valueTree = do
 179   Amode addr addr_code <- getAmode addrTree
 180   ChildCode64 vcode rlo <- iselExpr64 valueTree
 181   let
 182         rhi = getHiVRegFromLo rlo
 183
 184         -- Little-endian store
 185         mov_lo = MOV I32 (OpReg rlo) (OpAddr addr)
 186         mov_hi = MOV I32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
 187   -- in
 188   return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 189
 190
 191 assignReg_I64Code (CmmLocal (LocalReg u_dst pk _)) valueTree = do
 192    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 193    let
 194          r_dst_lo = mkVReg u_dst I32
 195          r_dst_hi = getHiVRegFromLo r_dst_lo
 196          r_src_hi = getHiVRegFromLo r_src_lo
 197          mov_lo = MOV I32 (OpReg r_src_lo) (OpReg r_dst_lo)
 198          mov_hi = MOV I32 (OpReg r_src_hi) (OpReg r_dst_hi)
 199    -- in
 200    return (
 201         vcode `snocOL` mov_lo `snocOL` mov_hi
 202      )
 203
 204 assignReg_I64Code lvalue valueTree
 205    = panic "assignReg_I64Code(i386): invalid lvalue"
 206
 207 ------------
 208
 209 iselExpr64 (CmmLit (CmmInt i _)) = do
 210   (rlo,rhi) <- getNewRegPairNat I32
 211   let
 212         r = fromIntegral (fromIntegral i :: Word32)
 213         q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
 214         code = toOL [
 215                 MOV I32 (OpImm (ImmInteger r)) (OpReg rlo),
 216                 MOV I32 (OpImm (ImmInteger q)) (OpReg rhi)
 217                 ]
 218   -- in
 219   return (ChildCode64 code rlo)
 220
 221 iselExpr64 (CmmLoad addrTree I64) = do
 222    Amode addr addr_code <- getAmode addrTree
 223    (rlo,rhi) <- getNewRegPairNat I32
 224    let
 225         mov_lo = MOV I32 (OpAddr addr) (OpReg rlo)
 226         mov_hi = MOV I32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
 227    -- in
 228    return (
 229             ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
 230                         rlo
 231      )
 232
 233 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64 _)))
 234    = return (ChildCode64 nilOL (mkVReg vu I32))
 235
 236 -- we handle addition, but rather badly
 237 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
 238    ChildCode64 code1 r1lo <- iselExpr64 e1
 239    (rlo,rhi) <- getNewRegPairNat I32
 240    let
 241         r = fromIntegral (fromIntegral i :: Word32)
 242         q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
 243         r1hi = getHiVRegFromLo r1lo
 244         code =  code1 `appOL`
 245                 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
 246                        ADD I32 (OpImm (ImmInteger r)) (OpReg rlo),
 247                        MOV I32 (OpReg r1hi) (OpReg rhi),
 248                        ADC I32 (OpImm (ImmInteger q)) (OpReg rhi) ]
 249    -- in
 250    return (ChildCode64 code rlo)
 251
 252 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
 253    ChildCode64 code1 r1lo <- iselExpr64 e1
 254    ChildCode64 code2 r2lo <- iselExpr64 e2
 255    (rlo,rhi) <- getNewRegPairNat I32
 256    let
 257         r1hi = getHiVRegFromLo r1lo
 258         r2hi = getHiVRegFromLo r2lo
 259         code =  code1 `appOL`
 260                 code2 `appOL`
 261                 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
 262                        ADD I32 (OpReg r2lo) (OpReg rlo),
 263                        MOV I32 (OpReg r1hi) (OpReg rhi),
 264                        ADC I32 (OpReg r2hi) (OpReg rhi) ]
 265    -- in
 266    return (ChildCode64 code rlo)
 267
 268 iselExpr64 (CmmMachOp (MO_U_Conv _ I64) [expr]) = do
 269      fn <- getAnyReg expr
 270      r_dst_lo <-  getNewRegNat I32
 271      let r_dst_hi = getHiVRegFromLo r_dst_lo
 272          code = fn r_dst_lo
 273      return (
 274              ChildCode64 (code `snocOL`
 275                           MOV I32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
 276                           r_dst_lo
 277             )
 278
 279 iselExpr64 expr
 280    = pprPanic "iselExpr64(i386)" (ppr expr)
 281
 282 #endif /* i386_TARGET_ARCH */
 283
 284 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 285
 286 #if sparc_TARGET_ARCH
 287
 288 assignMem_I64Code addrTree valueTree = do
 289      Amode addr addr_code <- getAmode addrTree
 290      ChildCode64 vcode rlo <- iselExpr64 valueTree
 291      (src, code) <- getSomeReg addrTree
 292      let
 293          rhi = getHiVRegFromLo rlo
 294          -- Big-endian store
 295          mov_hi = ST I32 rhi (AddrRegImm src (ImmInt 0))
 296          mov_lo = ST I32 rlo (AddrRegImm src (ImmInt 4))
 297      return (vcode `appOL` code `snocOL` mov_hi `snocOL` mov_lo)
 298
 299 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
 300      ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 301      let
 302          r_dst_lo = mkVReg u_dst pk
 303          r_dst_hi = getHiVRegFromLo r_dst_lo
 304          r_src_hi = getHiVRegFromLo r_src_lo
 305          mov_lo = mkMOV r_src_lo r_dst_lo
 306          mov_hi = mkMOV r_src_hi r_dst_hi
 307          mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
 308      return (vcode `snocOL` mov_hi `snocOL` mov_lo)
 309 assignReg_I64Code lvalue valueTree
 310    = panic "assignReg_I64Code(sparc): invalid lvalue"
 311
 312
 313 -- Don't delete this -- it's very handy for debugging.
 314 --iselExpr64 expr
 315 --   | trace ("iselExpr64: " ++ showSDoc (ppr expr)) False
 316 --   = panic "iselExpr64(???)"
 317
 318 iselExpr64 (CmmLoad addrTree I64) = do
 319      Amode (AddrRegReg r1 r2) addr_code <- getAmode addrTree
 320      rlo <- getNewRegNat I32
 321      let rhi = getHiVRegFromLo rlo
 322          mov_hi = LD I32 (AddrRegImm r1 (ImmInt 0)) rhi
 323          mov_lo = LD I32 (AddrRegImm r1 (ImmInt 4)) rlo
 324      return (
 325             ChildCode64 (addr_code `snocOL` mov_hi `snocOL` mov_lo)
 326                          rlo
 327           )
 328
 329 iselExpr64 (CmmReg (CmmLocal (LocalReg uq I64))) = do
 330      r_dst_lo <-  getNewRegNat I32
 331      let r_dst_hi = getHiVRegFromLo r_dst_lo
 332          r_src_lo = mkVReg uq I32
 333          r_src_hi = getHiVRegFromLo r_src_lo
 334          mov_lo = mkMOV r_src_lo r_dst_lo
 335          mov_hi = mkMOV r_src_hi r_dst_hi
 336          mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
 337      return (
 338             ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo
 339          )
 340
 341 iselExpr64 expr
 342    = pprPanic "iselExpr64(sparc)" (ppr expr)
 343
 344 #endif /* sparc_TARGET_ARCH */
 345
 346 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 347
 348 #if powerpc_TARGET_ARCH
 349
 350 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
 351 getI64Amodes addrTree = do
 352     Amode hi_addr addr_code <- getAmode addrTree
 353     case addrOffset hi_addr 4 of
 354         Just lo_addr -> return (hi_addr, lo_addr, addr_code)
 355         Nothing      -> do (hi_ptr, code) <- getSomeReg addrTree
 356                            return (AddrRegImm hi_ptr (ImmInt 0),
 357                                    AddrRegImm hi_ptr (ImmInt 4),
 358                                    code)
 359
 360 assignMem_I64Code addrTree valueTree = do
 361         (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
 362         ChildCode64 vcode rlo <- iselExpr64 valueTree
 363         let
 364                 rhi = getHiVRegFromLo rlo
 365
 366                 -- Big-endian store
 367                 mov_hi = ST I32 rhi hi_addr
 368                 mov_lo = ST I32 rlo lo_addr
 369         -- in
 370         return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 371
 372 assignReg_I64Code (CmmLocal (LocalReg u_dst pk _)) valueTree = do
 373    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 374    let
 375          r_dst_lo = mkVReg u_dst I32
 376          r_dst_hi = getHiVRegFromLo r_dst_lo
 377          r_src_hi = getHiVRegFromLo r_src_lo
 378          mov_lo = MR r_dst_lo r_src_lo
 379          mov_hi = MR r_dst_hi r_src_hi
 380    -- in
 381    return (
 382         vcode `snocOL` mov_lo `snocOL` mov_hi
 383      )
 384
 385 assignReg_I64Code lvalue valueTree
 386    = panic "assignReg_I64Code(powerpc): invalid lvalue"
 387
 388
 389 -- Don't delete this -- it's very handy for debugging.
 390 --iselExpr64 expr
 391 --   | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
 392 --   = panic "iselExpr64(???)"
 393
 394 iselExpr64 (CmmLoad addrTree I64) = do
 395     (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
 396     (rlo, rhi) <- getNewRegPairNat I32
 397     let mov_hi = LD I32 rhi hi_addr
 398         mov_lo = LD I32 rlo lo_addr
 399     return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
 400                          rlo
 401
 402 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64 _)))
 403    = return (ChildCode64 nilOL (mkVReg vu I32))
 404
 405 iselExpr64 (CmmLit (CmmInt i _)) = do
 406   (rlo,rhi) <- getNewRegPairNat I32
 407   let
 408         half0 = fromIntegral (fromIntegral i :: Word16)
 409         half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
 410         half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
 411         half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
 412
 413         code = toOL [
 414                 LIS rlo (ImmInt half1),
 415                 OR rlo rlo (RIImm $ ImmInt half0),
 416                 LIS rhi (ImmInt half3),
 417                 OR rlo rlo (RIImm $ ImmInt half2)
 418                 ]
 419   -- in
 420   return (ChildCode64 code rlo)
 421
 422 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
 423    ChildCode64 code1 r1lo <- iselExpr64 e1
 424    ChildCode64 code2 r2lo <- iselExpr64 e2
 425    (rlo,rhi) <- getNewRegPairNat I32
 426    let
 427         r1hi = getHiVRegFromLo r1lo
 428         r2hi = getHiVRegFromLo r2lo
 429         code =  code1 `appOL`
 430                 code2 `appOL`
 431                 toOL [ ADDC rlo r1lo r2lo,
 432                        ADDE rhi r1hi r2hi ]
 433    -- in
 434    return (ChildCode64 code rlo)
 435
 436 iselExpr64 expr
 437    = pprPanic "iselExpr64(powerpc)" (ppr expr)
 438
 439 #endif /* powerpc_TARGET_ARCH */
 440
 441
 442 -- -----------------------------------------------------------------------------
 443 -- The 'Register' type
 444
 445 -- 'Register's passed up the tree.  If the stix code forces the register
 446 -- to live in a pre-decided machine register, it comes out as @Fixed@;
 447 -- otherwise, it comes out as @Any@, and the parent can decide which
 448 -- register to put it in.
 449
 450 data Register
 451   = Fixed   MachRep Reg InstrBlock
 452   | Any     MachRep (Reg -> InstrBlock)
 453
 454 swizzleRegisterRep :: Register -> MachRep -> Register
 455 swizzleRegisterRep (Fixed _ reg code) rep = Fixed rep reg code
 456 swizzleRegisterRep (Any _ codefn)     rep = Any rep codefn
 457
 458
 459 -- -----------------------------------------------------------------------------
 460 -- Utils based on getRegister, below
 461
 462 -- The dual to getAnyReg: compute an expression into a register, but
 463 -- we don't mind which one it is.
 464 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
 465 getSomeReg expr = do
 466   r <- getRegister expr
 467   case r of
 468     Any rep code -> do
 469         tmp <- getNewRegNat rep
 470         return (tmp, code tmp)
 471     Fixed _ reg code ->
 472         return (reg, code)
 473
 474 -- -----------------------------------------------------------------------------
 475 -- Grab the Reg for a CmmReg
 476
 477 getRegisterReg :: CmmReg -> Reg
 478
 479 getRegisterReg (CmmLocal (LocalReg u pk _))
 480   = mkVReg u pk
 481
 482 getRegisterReg (CmmGlobal mid)
 483   = case get_GlobalReg_reg_or_addr mid of
 484        Left (RealReg rrno) -> RealReg rrno
 485        _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
 486           -- By this stage, the only MagicIds remaining should be the
 487           -- ones which map to a real machine register on this
 488           -- platform.  Hence ...
 489
 490
 491 -- -----------------------------------------------------------------------------
 492 -- Generate code to get a subtree into a Register
 493
 494 -- Don't delete this -- it's very handy for debugging.
 495 --getRegister expr
 496 --   | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False
 497 --   = panic "getRegister(???)"
 498
 499 getRegister :: CmmExpr -> NatM Register
 500
 501 #if !x86_64_TARGET_ARCH
 502     -- on x86_64, we have %rip for PicBaseReg, but it's not a full-featured
 503     -- register, it can only be used for rip-relative addressing.
 504 getRegister (CmmReg (CmmGlobal PicBaseReg))
 505   = do
 506       reg <- getPicBaseNat wordRep
 507       return (Fixed wordRep reg nilOL)
 508 #endif
 509
 510 getRegister (CmmReg reg)
 511   = return (Fixed (cmmRegRep reg) (getRegisterReg reg) nilOL)
 512
 513 getRegister tree@(CmmRegOff _ _)
 514   = getRegister (mangleIndexTree tree)
 515
 516
 517 #if WORD_SIZE_IN_BITS==32
 518     -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
 519     -- TO_W_(x), TO_W_(x >> 32)
 520
 521 getRegister (CmmMachOp (MO_U_Conv I64 I32)
 522              [CmmMachOp (MO_U_Shr I64) [x,CmmLit (CmmInt 32 _)]]) = do
 523   ChildCode64 code rlo <- iselExpr64 x
 524   return $ Fixed I32 (getHiVRegFromLo rlo) code
 525
 526 getRegister (CmmMachOp (MO_S_Conv I64 I32)
 527              [CmmMachOp (MO_U_Shr I64) [x,CmmLit (CmmInt 32 _)]]) = do
 528   ChildCode64 code rlo <- iselExpr64 x
 529   return $ Fixed I32 (getHiVRegFromLo rlo) code
 530
 531 getRegister (CmmMachOp (MO_U_Conv I64 I32) [x]) = do
 532   ChildCode64 code rlo <- iselExpr64 x
 533   return $ Fixed I32 rlo code
 534
 535 getRegister (CmmMachOp (MO_S_Conv I64 I32) [x]) = do
 536   ChildCode64 code rlo <- iselExpr64 x
 537   return $ Fixed I32 rlo code
 538
 539 #endif
 540
 541 -- end of machine-"independent" bit; here we go on the rest...
 542
 543 #if alpha_TARGET_ARCH
 544
 545 getRegister (StDouble d)
 546   = getBlockIdNat                   `thenNat` \ lbl ->
 547     getNewRegNat PtrRep             `thenNat` \ tmp ->
 548     let code dst = mkSeqInstrs [
 549             LDATA RoDataSegment lbl [
 550                     DATA TF [ImmLab (rational d)]
 551                 ],
 552             LDA tmp (AddrImm (ImmCLbl lbl)),
 553             LD TF dst (AddrReg tmp)]
 554     in
 555         return (Any F64 code)
 556
 557 getRegister (StPrim primop [x]) -- unary PrimOps
 558   = case primop of
 559       IntNegOp -> trivialUCode (NEG Q False) x
 560
 561       NotOp    -> trivialUCode NOT x
 562
 563       FloatNegOp  -> trivialUFCode FloatRep  (FNEG TF) x
 564       DoubleNegOp -> trivialUFCode F64 (FNEG TF) x
 565
 566       OrdOp -> coerceIntCode IntRep x
 567       ChrOp -> chrCode x
 568
 569       Float2IntOp  -> coerceFP2Int    x
 570       Int2FloatOp  -> coerceInt2FP pr x
 571       Double2IntOp -> coerceFP2Int    x
 572       Int2DoubleOp -> coerceInt2FP pr x
 573
 574       Double2FloatOp -> coerceFltCode x
 575       Float2DoubleOp -> coerceFltCode x
 576
 577       other_op -> getRegister (StCall fn CCallConv F64 [x])
 578         where
 579           fn = case other_op of
 580                  FloatExpOp    -> FSLIT("exp")
 581                  FloatLogOp    -> FSLIT("log")
 582                  FloatSqrtOp   -> FSLIT("sqrt")
 583                  FloatSinOp    -> FSLIT("sin")
 584                  FloatCosOp    -> FSLIT("cos")
 585                  FloatTanOp    -> FSLIT("tan")
 586                  FloatAsinOp   -> FSLIT("asin")
 587                  FloatAcosOp   -> FSLIT("acos")
 588                  FloatAtanOp   -> FSLIT("atan")
 589                  FloatSinhOp   -> FSLIT("sinh")
 590                  FloatCoshOp   -> FSLIT("cosh")
 591                  FloatTanhOp   -> FSLIT("tanh")
 592                  DoubleExpOp   -> FSLIT("exp")
 593                  DoubleLogOp   -> FSLIT("log")
 594                  DoubleSqrtOp  -> FSLIT("sqrt")
 595                  DoubleSinOp   -> FSLIT("sin")
 596                  DoubleCosOp   -> FSLIT("cos")
 597                  DoubleTanOp   -> FSLIT("tan")
 598                  DoubleAsinOp  -> FSLIT("asin")
 599                  DoubleAcosOp  -> FSLIT("acos")
 600                  DoubleAtanOp  -> FSLIT("atan")
 601                  DoubleSinhOp  -> FSLIT("sinh")
 602                  DoubleCoshOp  -> FSLIT("cosh")
 603                  DoubleTanhOp  -> FSLIT("tanh")
 604   where
 605     pr = panic "MachCode.getRegister: no primrep needed for Alpha"
 606
 607 getRegister (StPrim primop [x, y]) -- dyadic PrimOps
 608   = case primop of
 609       CharGtOp -> trivialCode (CMP LTT) y x
 610       CharGeOp -> trivialCode (CMP LE) y x
 611       CharEqOp -> trivialCode (CMP EQQ) x y
 612       CharNeOp -> int_NE_code x y
 613       CharLtOp -> trivialCode (CMP LTT) x y
 614       CharLeOp -> trivialCode (CMP LE) x y
 615
 616       IntGtOp  -> trivialCode (CMP LTT) y x
 617       IntGeOp  -> trivialCode (CMP LE) y x
 618       IntEqOp  -> trivialCode (CMP EQQ) x y
 619       IntNeOp  -> int_NE_code x y
 620       IntLtOp  -> trivialCode (CMP LTT) x y
 621       IntLeOp  -> trivialCode (CMP LE) x y
 622
 623       WordGtOp -> trivialCode (CMP ULT) y x
 624       WordGeOp -> trivialCode (CMP ULE) x y
 625       WordEqOp -> trivialCode (CMP EQQ)  x y
 626       WordNeOp -> int_NE_code x y
 627       WordLtOp -> trivialCode (CMP ULT) x y
 628       WordLeOp -> trivialCode (CMP ULE) x y
 629
 630       AddrGtOp -> trivialCode (CMP ULT) y x
 631       AddrGeOp -> trivialCode (CMP ULE) y x
 632       AddrEqOp -> trivialCode (CMP EQQ)  x y
 633       AddrNeOp -> int_NE_code x y
 634       AddrLtOp -> trivialCode (CMP ULT) x y
 635       AddrLeOp -> trivialCode (CMP ULE) x y
 636
 637       FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
 638       FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
 639       FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
 640       FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
 641       FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
 642       FloatLeOp -> cmpF_code (FCMP TF LE) NE x y
 643
 644       DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
 645       DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
 646       DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
 647       DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
 648       DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
 649       DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y
 650
 651       IntAddOp  -> trivialCode (ADD Q False) x y
 652       IntSubOp  -> trivialCode (SUB Q False) x y
 653       IntMulOp  -> trivialCode (MUL Q False) x y
 654       IntQuotOp -> trivialCode (DIV Q False) x y
 655       IntRemOp  -> trivialCode (REM Q False) x y
 656
 657       WordAddOp  -> trivialCode (ADD Q False) x y
 658       WordSubOp  -> trivialCode (SUB Q False) x y
 659       WordMulOp  -> trivialCode (MUL Q False) x y
 660       WordQuotOp -> trivialCode (DIV Q True) x y
 661       WordRemOp  -> trivialCode (REM Q True) x y
 662
 663       FloatAddOp -> trivialFCode  FloatRep (FADD TF) x y
 664       FloatSubOp -> trivialFCode  FloatRep (FSUB TF) x y
 665       FloatMulOp -> trivialFCode  FloatRep (FMUL TF) x y
 666       FloatDivOp -> trivialFCode  FloatRep (FDIV TF) x y
 667
 668       DoubleAddOp -> trivialFCode  F64 (FADD TF) x y
 669       DoubleSubOp -> trivialFCode  F64 (FSUB TF) x y
 670       DoubleMulOp -> trivialFCode  F64 (FMUL TF) x y
 671       DoubleDivOp -> trivialFCode  F64 (FDIV TF) x y
 672
 673       AddrAddOp  -> trivialCode (ADD Q False) x y
 674       AddrSubOp  -> trivialCode (SUB Q False) x y
 675       AddrRemOp  -> trivialCode (REM Q True) x y
 676
 677       AndOp  -> trivialCode AND x y
 678       OrOp   -> trivialCode OR  x y
 679       XorOp  -> trivialCode XOR x y
 680       SllOp  -> trivialCode SLL x y
 681       SrlOp  -> trivialCode SRL x y
 682
 683       ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
 684       ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
 685       ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"
 686
 687       FloatPowerOp  -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
 688       DoublePowerOp -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
 689   where
 690     {- ------------------------------------------------------------
 691         Some bizarre special code for getting condition codes into
 692         registers.  Integer non-equality is a test for equality
 693         followed by an XOR with 1.  (Integer comparisons always set
 694         the result register to 0 or 1.)  Floating point comparisons of
 695         any kind leave the result in a floating point register, so we
 696         need to wrangle an integer register out of things.
 697     -}
 698     int_NE_code :: StixTree -> StixTree -> NatM Register
 699
 700     int_NE_code x y
 701       = trivialCode (CMP EQQ) x y       `thenNat` \ register ->
 702         getNewRegNat IntRep             `thenNat` \ tmp ->
 703         let
 704             code = registerCode register tmp
 705             src  = registerName register tmp
 706             code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
 707         in
 708         return (Any IntRep code__2)
 709
 710     {- ------------------------------------------------------------
 711         Comments for int_NE_code also apply to cmpF_code
 712     -}
 713     cmpF_code
 714         :: (Reg -> Reg -> Reg -> Instr)
 715         -> Cond
 716         -> StixTree -> StixTree
 717         -> NatM Register
 718
 719     cmpF_code instr cond x y
 720       = trivialFCode pr instr x y       `thenNat` \ register ->
 721         getNewRegNat F64                `thenNat` \ tmp ->
 722         getBlockIdNat                   `thenNat` \ lbl ->
 723         let
 724             code = registerCode register tmp
 725             result  = registerName register tmp
 726
 727             code__2 dst = code . mkSeqInstrs [
 728                 OR zeroh (RIImm (ImmInt 1)) dst,
 729                 BF cond  result (ImmCLbl lbl),
 730                 OR zeroh (RIReg zeroh) dst,
 731                 NEWBLOCK lbl]
 732         in
 733         return (Any IntRep code__2)
 734       where
 735         pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
 736       ------------------------------------------------------------
 737
 738 getRegister (CmmLoad pk mem)
 739   = getAmode mem                    `thenNat` \ amode ->
 740     let
 741         code = amodeCode amode
 742         src   = amodeAddr amode
 743         size = primRepToSize pk
 744         code__2 dst = code . mkSeqInstr (LD size dst src)
 745     in
 746     return (Any pk code__2)
 747
 748 getRegister (StInt i)
 749   | fits8Bits i
 750   = let
 751         code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
 752     in
 753     return (Any IntRep code)
 754   | otherwise
 755   = let
 756         code dst = mkSeqInstr (LDI Q dst src)
 757     in
 758     return (Any IntRep code)
 759   where
 760     src = ImmInt (fromInteger i)
 761
 762 getRegister leaf
 763   | isJust imm
 764   = let
 765         code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
 766     in
 767     return (Any PtrRep code)
 768   where
 769     imm = maybeImm leaf
 770     imm__2 = case imm of Just x -> x
 771
 772 #endif /* alpha_TARGET_ARCH */
 773
 774 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 775
 776 #if i386_TARGET_ARCH
 777
 778 getRegister (CmmLit (CmmFloat f F32)) = do
 779     lbl <- getNewLabelNat
 780     dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
 781     Amode addr addr_code <- getAmode dynRef
 782     let code dst =
 783             LDATA ReadOnlyData
 784                         [CmmDataLabel lbl,
 785                          CmmStaticLit (CmmFloat f F32)]
 786             `consOL` (addr_code `snocOL`
 787             GLD F32 addr dst)
 788     -- in
 789     return (Any F32 code)
 790
 791
 792 getRegister (CmmLit (CmmFloat d F64))
 793   | d == 0.0
 794   = let code dst = unitOL (GLDZ dst)
 795     in  return (Any F64 code)
 796
 797   | d == 1.0
 798   = let code dst = unitOL (GLD1 dst)
 799     in  return (Any F64 code)
 800
 801   | otherwise = do
 802     lbl <- getNewLabelNat
 803     dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
 804     Amode addr addr_code <- getAmode dynRef
 805     let code dst =
 806             LDATA ReadOnlyData
 807                         [CmmDataLabel lbl,
 808                          CmmStaticLit (CmmFloat d F64)]
 809             `consOL` (addr_code `snocOL`
 810             GLD F64 addr dst)
 811     -- in
 812     return (Any F64 code)
 813
 814 #endif /* i386_TARGET_ARCH */
 815
 816 #if x86_64_TARGET_ARCH
 817
 818 getRegister (CmmLit (CmmFloat 0.0 rep)) = do
 819    let code dst = unitOL  (XOR rep (OpReg dst) (OpReg dst))
 820         -- I don't know why there are xorpd, xorps, and pxor instructions.
 821         -- They all appear to do the same thing --SDM
 822    return (Any rep code)
 823
 824 getRegister (CmmLit (CmmFloat f rep)) = do
 825     lbl <- getNewLabelNat
 826     let code dst = toOL [
 827             LDATA ReadOnlyData
 828                         [CmmDataLabel lbl,
 829                          CmmStaticLit (CmmFloat f rep)],
 830             MOV rep (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 831             ]
 832     -- in
 833     return (Any rep code)
 834
 835 #endif /* x86_64_TARGET_ARCH */
 836
 837 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
 838
 839 -- catch simple cases of zero- or sign-extended load
 840 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad addr _]) = do
 841   code <- intLoadCode (MOVZxL I8) addr
 842   return (Any I32 code)
 843
 844 getRegister (CmmMachOp (MO_S_Conv I8 I32) [CmmLoad addr _]) = do
 845   code <- intLoadCode (MOVSxL I8) addr
 846   return (Any I32 code)
 847
 848 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad addr _]) = do
 849   code <- intLoadCode (MOVZxL I16) addr
 850   return (Any I32 code)
 851
 852 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad addr _]) = do
 853   code <- intLoadCode (MOVSxL I16) addr
 854   return (Any I32 code)
 855
 856 #endif
 857
 858 #if x86_64_TARGET_ARCH
 859
 860 -- catch simple cases of zero- or sign-extended load
 861 getRegister (CmmMachOp (MO_U_Conv I8 I64) [CmmLoad addr _]) = do
 862   code <- intLoadCode (MOVZxL I8) addr
 863   return (Any I64 code)
 864
 865 getRegister (CmmMachOp (MO_S_Conv I8 I64) [CmmLoad addr _]) = do
 866   code <- intLoadCode (MOVSxL I8) addr
 867   return (Any I64 code)
 868
 869 getRegister (CmmMachOp (MO_U_Conv I16 I64) [CmmLoad addr _]) = do
 870   code <- intLoadCode (MOVZxL I16) addr
 871   return (Any I64 code)
 872
 873 getRegister (CmmMachOp (MO_S_Conv I16 I64) [CmmLoad addr _]) = do
 874   code <- intLoadCode (MOVSxL I16) addr
 875   return (Any I64 code)
 876
 877 getRegister (CmmMachOp (MO_U_Conv I32 I64) [CmmLoad addr _]) = do
 878   code <- intLoadCode (MOV I32) addr -- 32-bit loads zero-extend
 879   return (Any I64 code)
 880
 881 getRegister (CmmMachOp (MO_S_Conv I32 I64) [CmmLoad addr _]) = do
 882   code <- intLoadCode (MOVSxL I32) addr
 883   return (Any I64 code)
 884
 885 #endif
 886
 887 #if x86_64_TARGET_ARCH
 888 getRegister (CmmMachOp (MO_Add I64) [CmmReg (CmmGlobal PicBaseReg),
 889                                      CmmLit displacement])
 890     = return $ Any I64 (\dst -> unitOL $
 891         LEA I64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
 892 #endif
 893
 894 #if x86_64_TARGET_ARCH
 895 getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do
 896   x_code <- getAnyReg x
 897   lbl <- getNewLabelNat
 898   let
 899     code dst = x_code dst `appOL` toOL [
 900         -- This is how gcc does it, so it can't be that bad:
 901         LDATA ReadOnlyData16 [
 902                 CmmAlign 16,
 903                 CmmDataLabel lbl,
 904                 CmmStaticLit (CmmInt 0x80000000 I32),
 905                 CmmStaticLit (CmmInt 0 I32),
 906                 CmmStaticLit (CmmInt 0 I32),
 907                 CmmStaticLit (CmmInt 0 I32)
 908         ],
 909         XOR F32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 910                 -- xorps, so we need the 128-bit constant
 911                 -- ToDo: rip-relative
 912         ]
 913   --
 914   return (Any F32 code)
 915
 916 getRegister (CmmMachOp (MO_S_Neg F64) [x]) = do
 917   x_code <- getAnyReg x
 918   lbl <- getNewLabelNat
 919   let
 920         -- This is how gcc does it, so it can't be that bad:
 921     code dst = x_code dst `appOL` toOL [
 922         LDATA ReadOnlyData16 [
 923                 CmmAlign 16,
 924                 CmmDataLabel lbl,
 925                 CmmStaticLit (CmmInt 0x8000000000000000 I64),
 926                 CmmStaticLit (CmmInt 0 I64)
 927         ],
 928                 -- gcc puts an unpck here.  Wonder if we need it.
 929         XOR F64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 930                 -- xorpd, so we need the 128-bit constant
 931         ]
 932   --
 933   return (Any F64 code)
 934 #endif
 935
 936 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
 937
 938 getRegister (CmmMachOp mop [x]) -- unary MachOps
 939   = case mop of
 940 #if i386_TARGET_ARCH
 941       MO_S_Neg F32 -> trivialUFCode F32 (GNEG F32) x
 942       MO_S_Neg F64 -> trivialUFCode F64 (GNEG F64) x
 943 #endif
 944
 945       MO_S_Neg rep -> trivialUCode rep (NEGI rep) x
 946       MO_Not rep   -> trivialUCode rep (NOT  rep) x
 947
 948       -- Nop conversions
 949       MO_U_Conv I32 I8  -> toI8Reg  I32 x
 950       MO_S_Conv I32 I8  -> toI8Reg  I32 x
 951       MO_U_Conv I16 I8  -> toI8Reg  I16 x
 952       MO_S_Conv I16 I8  -> toI8Reg  I16 x
 953       MO_U_Conv I32 I16 -> toI16Reg I32 x
 954       MO_S_Conv I32 I16 -> toI16Reg I32 x
 955 #if x86_64_TARGET_ARCH
 956       MO_U_Conv I64 I32 -> conversionNop I64 x
 957       MO_S_Conv I64 I32 -> conversionNop I64 x
 958       MO_U_Conv I64 I16 -> toI16Reg I64 x
 959       MO_S_Conv I64 I16 -> toI16Reg I64 x
 960       MO_U_Conv I64 I8  -> toI8Reg  I64 x
 961       MO_S_Conv I64 I8  -> toI8Reg  I64 x
 962 #endif
 963
 964       MO_U_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
 965       MO_S_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
 966
 967       -- widenings
 968       MO_U_Conv I8  I32 -> integerExtend I8  I32 MOVZxL x
 969       MO_U_Conv I16 I32 -> integerExtend I16 I32 MOVZxL x
 970       MO_U_Conv I8  I16 -> integerExtend I8  I16 MOVZxL x
 971
 972       MO_S_Conv I8  I32 -> integerExtend I8  I32 MOVSxL x
 973       MO_S_Conv I16 I32 -> integerExtend I16 I32 MOVSxL x
 974       MO_S_Conv I8  I16 -> integerExtend I8  I16 MOVSxL x
 975
 976 #if x86_64_TARGET_ARCH
 977       MO_U_Conv I8  I64 -> integerExtend I8  I64 MOVZxL x
 978       MO_U_Conv I16 I64 -> integerExtend I16 I64 MOVZxL x
 979       MO_U_Conv I32 I64 -> integerExtend I32 I64 MOVZxL x
 980       MO_S_Conv I8  I64 -> integerExtend I8  I64 MOVSxL x
 981       MO_S_Conv I16 I64 -> integerExtend I16 I64 MOVSxL x
 982       MO_S_Conv I32 I64 -> integerExtend I32 I64 MOVSxL x
 983         -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
 984         -- However, we don't want the register allocator to throw it
 985         -- away as an unnecessary reg-to-reg move, so we keep it in
 986         -- the form of a movzl and print it as a movl later.
 987 #endif
 988
 989 #if i386_TARGET_ARCH
 990       MO_S_Conv F32 F64 -> conversionNop F64 x
 991       MO_S_Conv F64 F32 -> conversionNop F32 x
 992 #else
 993       MO_S_Conv F32 F64 -> coerceFP2FP F64 x
 994       MO_S_Conv F64 F32 -> coerceFP2FP F32 x
 995 #endif
 996
 997       MO_S_Conv from to
 998         | isFloatingRep from -> coerceFP2Int from to x
 999         | isFloatingRep to   -> coerceInt2FP from to x
1000
1001       other -> pprPanic "getRegister" (pprMachOp mop)
1002    where
1003         -- signed or unsigned extension.
1004         integerExtend from to instr expr = do
1005             (reg,e_code) <- if from == I8 then getByteReg expr
1006                                           else getSomeReg expr
1007             let
1008                 code dst =
1009                   e_code `snocOL`
1010                   instr from (OpReg reg) (OpReg dst)
1011             return (Any to code)
1012
1013         toI8Reg new_rep expr
1014             = do codefn <- getAnyReg expr
1015                  return (Any new_rep codefn)
1016                 -- HACK: use getAnyReg to get a byte-addressable register.
1017                 -- If the source was a Fixed register, this will add the
1018                 -- mov instruction to put it into the desired destination.
1019                 -- We're assuming that the destination won't be a fixed
1020                 -- non-byte-addressable register; it won't be, because all
1021                 -- fixed registers are word-sized.
1022
1023         toI16Reg = toI8Reg -- for now
1024
1025         conversionNop new_rep expr
1026             = do e_code <- getRegister expr
1027                  return (swizzleRegisterRep e_code new_rep)
1028
1029
1030 getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
1031   = ASSERT2(cmmExprRep x /= I8, pprExpr e)
1032     case mop of
1033       MO_Eq F32   -> condFltReg EQQ x y
1034       MO_Ne F32   -> condFltReg NE x y
1035       MO_S_Gt F32 -> condFltReg GTT x y
1036       MO_S_Ge F32 -> condFltReg GE x y
1037       MO_S_Lt F32 -> condFltReg LTT x y
1038       MO_S_Le F32 -> condFltReg LE x y
1039
1040       MO_Eq F64   -> condFltReg EQQ x y
1041       MO_Ne F64   -> condFltReg NE x y
1042       MO_S_Gt F64 -> condFltReg GTT x y
1043       MO_S_Ge F64 -> condFltReg GE x y
1044       MO_S_Lt F64 -> condFltReg LTT x y
1045       MO_S_Le F64 -> condFltReg LE x y
1046
1047       MO_Eq rep   -> condIntReg EQQ x y
1048       MO_Ne rep   -> condIntReg NE x y
1049
1050       MO_S_Gt rep -> condIntReg GTT x y
1051       MO_S_Ge rep -> condIntReg GE x y
1052       MO_S_Lt rep -> condIntReg LTT x y
1053       MO_S_Le rep -> condIntReg LE x y
1054
1055       MO_U_Gt rep -> condIntReg GU  x y
1056       MO_U_Ge rep -> condIntReg GEU x y
1057       MO_U_Lt rep -> condIntReg LU  x y
1058       MO_U_Le rep -> condIntReg LEU x y
1059
1060 #if i386_TARGET_ARCH
1061       MO_Add F32 -> trivialFCode F32 GADD x y
1062       MO_Sub F32 -> trivialFCode F32 GSUB x y
1063
1064       MO_Add F64 -> trivialFCode F64 GADD x y
1065       MO_Sub F64 -> trivialFCode F64 GSUB x y
1066
1067       MO_S_Quot F32 -> trivialFCode F32 GDIV x y
1068       MO_S_Quot F64 -> trivialFCode F64 GDIV x y
1069 #endif
1070
1071 #if x86_64_TARGET_ARCH
1072       MO_Add F32 -> trivialFCode F32 ADD x y
1073       MO_Sub F32 -> trivialFCode F32 SUB x y
1074
1075       MO_Add F64 -> trivialFCode F64 ADD x y
1076       MO_Sub F64 -> trivialFCode F64 SUB x y
1077
1078       MO_S_Quot F32 -> trivialFCode F32 FDIV x y
1079       MO_S_Quot F64 -> trivialFCode F64 FDIV x y
1080 #endif
1081
1082       MO_Add rep -> add_code rep x y
1083       MO_Sub rep -> sub_code rep x y
1084
1085       MO_S_Quot rep -> div_code rep True  True  x y
1086       MO_S_Rem  rep -> div_code rep True  False x y
1087       MO_U_Quot rep -> div_code rep False True  x y
1088       MO_U_Rem  rep -> div_code rep False False x y
1089
1090 #if i386_TARGET_ARCH
1091       MO_Mul   F32 -> trivialFCode F32 GMUL x y
1092       MO_Mul   F64 -> trivialFCode F64 GMUL x y
1093 #endif
1094
1095 #if x86_64_TARGET_ARCH
1096       MO_Mul   F32 -> trivialFCode F32 MUL x y
1097       MO_Mul   F64 -> trivialFCode F64 MUL x y
1098 #endif
1099
1100       MO_Mul   rep -> let op = IMUL rep in
1101                       trivialCode rep op (Just op) x y
1102
1103       MO_S_MulMayOflo rep -> imulMayOflo rep x y
1104
1105       MO_And rep -> let op = AND rep in
1106                     trivialCode rep op (Just op) x y
1107       MO_Or  rep -> let op = OR  rep in
1108                     trivialCode rep op (Just op) x y
1109       MO_Xor rep -> let op = XOR rep in
1110                     trivialCode rep op (Just op) x y
1111
1112         {- Shift ops on x86s have constraints on their source, it
1113            either has to be Imm, CL or 1
1114             => trivialCode is not restrictive enough (sigh.)
1115         -}
1116       MO_Shl rep   -> shift_code rep (SHL rep) x y {-False-}
1117       MO_U_Shr rep -> shift_code rep (SHR rep) x y {-False-}
1118       MO_S_Shr rep -> shift_code rep (SAR rep) x y {-False-}
1119
1120       other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
1121   where
1122     --------------------
1123     imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1124     imulMayOflo rep a b = do
1125          (a_reg, a_code) <- getNonClobberedReg a
1126          b_code <- getAnyReg b
1127          let
1128              shift_amt  = case rep of
1129                            I32 -> 31
1130                            I64 -> 63
1131                            _ -> panic "shift_amt"
1132
1133              code = a_code `appOL` b_code eax `appOL`
1134                         toOL [
1135                            IMUL2 rep (OpReg a_reg),   -- result in %edx:%eax
1136                            SAR rep (OpImm (ImmInt shift_amt)) (OpReg eax),
1137                                 -- sign extend lower part
1138                            SUB rep (OpReg edx) (OpReg eax)
1139                                 -- compare against upper
1140                            -- eax==0 if high part == sign extended low part
1141                         ]
1142          -- in
1143          return (Fixed rep eax code)
1144
1145     --------------------
1146     shift_code :: MachRep
1147                -> (Operand -> Operand -> Instr)
1148                -> CmmExpr
1149                -> CmmExpr
1150                -> NatM Register
1151
1152     {- Case1: shift length as immediate -}
1153     shift_code rep instr x y@(CmmLit lit) = do
1154           x_code <- getAnyReg x
1155           let
1156                code dst
1157                   = x_code dst `snocOL`
1158                     instr (OpImm (litToImm lit)) (OpReg dst)
1159           -- in
1160           return (Any rep code)
1161
1162     {- Case2: shift length is complex (non-immediate)
1163       * y must go in %ecx.
1164       * we cannot do y first *and* put its result in %ecx, because
1165         %ecx might be clobbered by x.
1166       * if we do y second, then x cannot be
1167         in a clobbered reg.  Also, we cannot clobber x's reg
1168         with the instruction itself.
1169       * so we can either:
1170         - do y first, put its result in a fresh tmp, then copy it to %ecx later
1171         - do y second and put its result into %ecx.  x gets placed in a fresh
1172           tmp.  This is likely to be better, becuase the reg alloc can
1173           eliminate this reg->reg move here (it won't eliminate the other one,
1174           because the move is into the fixed %ecx).
1175     -}
1176     shift_code rep instr x y{-amount-} = do
1177         x_code <- getAnyReg x
1178         tmp <- getNewRegNat rep
1179         y_code <- getAnyReg y
1180         let
1181            code = x_code tmp `appOL`
1182                   y_code ecx `snocOL`
1183                   instr (OpReg ecx) (OpReg tmp)
1184         -- in
1185         return (Fixed rep tmp code)
1186
1187     --------------------
1188     add_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1189     add_code rep x (CmmLit (CmmInt y _))
1190         | not (is64BitInteger y) = add_int rep x y
1191     add_code rep x y = trivialCode rep (ADD rep) (Just (ADD rep)) x y
1192
1193     --------------------
1194     sub_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1195     sub_code rep x (CmmLit (CmmInt y _))
1196         | not (is64BitInteger (-y)) = add_int rep x (-y)
1197     sub_code rep x y = trivialCode rep (SUB rep) Nothing x y
1198
1199     -- our three-operand add instruction:
1200     add_int rep x y = do
1201         (x_reg, x_code) <- getSomeReg x
1202         let
1203             imm = ImmInt (fromInteger y)
1204             code dst
1205                = x_code `snocOL`
1206                  LEA rep
1207                         (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
1208                         (OpReg dst)
1209         --
1210         return (Any rep code)
1211
1212     ----------------------
1213     div_code rep signed quotient x y = do
1214            (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
1215            x_code <- getAnyReg x
1216            let
1217              widen | signed    = CLTD rep
1218                    | otherwise = XOR rep (OpReg edx) (OpReg edx)
1219
1220              instr | signed    = IDIV
1221                    | otherwise = DIV
1222
1223              code = y_code `appOL`
1224                     x_code eax `appOL`
1225                     toOL [widen, instr rep y_op]
1226
1227              result | quotient  = eax
1228                     | otherwise = edx
1229
1230            -- in
1231            return (Fixed rep result code)
1232
1233
1234 getRegister (CmmLoad mem pk)
1235   | isFloatingRep pk
1236   = do
1237     Amode src mem_code <- getAmode mem
1238     let
1239         code dst = mem_code `snocOL`
1240                    IF_ARCH_i386(GLD pk src dst,
1241                                 MOV pk (OpAddr src) (OpReg dst))
1242     --
1243     return (Any pk code)
1244
1245 #if i386_TARGET_ARCH
1246 getRegister (CmmLoad mem pk)
1247   | pk /= I64
1248   = do
1249     code <- intLoadCode (instr pk) mem
1250     return (Any pk code)
1251   where
1252         instr I8  = MOVZxL pk
1253         instr I16 = MOV I16
1254         instr I32 = MOV I32
1255         -- we always zero-extend 8-bit loads, if we
1256         -- can't think of anything better.  This is because
1257         -- we can't guarantee access to an 8-bit variant of every register
1258         -- (esi and edi don't have 8-bit variants), so to make things
1259         -- simpler we do our 8-bit arithmetic with full 32-bit registers.
1260 #endif
1261
1262 #if x86_64_TARGET_ARCH
1263 -- Simpler memory load code on x86_64
1264 getRegister (CmmLoad mem pk)
1265   = do
1266     code <- intLoadCode (MOV pk) mem
1267     return (Any pk code)
1268 #endif
1269
1270 getRegister (CmmLit (CmmInt 0 rep))
1271   = let
1272         -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
1273         adj_rep = case rep of I64 -> I32; _ -> rep
1274         rep1 = IF_ARCH_i386( rep, adj_rep )
1275         code dst
1276            = unitOL (XOR rep1 (OpReg dst) (OpReg dst))
1277     in
1278         return (Any rep code)
1279
1280 #if x86_64_TARGET_ARCH
1281   -- optimisation for loading small literals on x86_64: take advantage
1282   -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
1283   -- instruction forms are shorter.
1284 getRegister (CmmLit lit)
1285   | I64 <- cmmLitRep lit, not (isBigLit lit)
1286   = let
1287         imm = litToImm lit
1288         code dst = unitOL (MOV I32 (OpImm imm) (OpReg dst))
1289     in
1290         return (Any I64 code)
1291   where
1292    isBigLit (CmmInt i I64) = i < 0 || i > 0xffffffff
1293    isBigLit _ = False
1294         -- note1: not the same as is64BitLit, because that checks for
1295         -- signed literals that fit in 32 bits, but we want unsigned
1296         -- literals here.
1297         -- note2: all labels are small, because we're assuming the
1298         -- small memory model (see gcc docs, -mcmodel=small).
1299 #endif
1300
1301 getRegister (CmmLit lit)
1302   = let
1303         rep = cmmLitRep lit
1304         imm = litToImm lit
1305         code dst = unitOL (MOV rep (OpImm imm) (OpReg dst))
1306     in
1307         return (Any rep code)
1308
1309 getRegister other = pprPanic "getRegister(x86)" (ppr other)
1310
1311
1312 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
1313    -> NatM (Reg -> InstrBlock)
1314 intLoadCode instr mem = do
1315   Amode src mem_code <- getAmode mem
1316   return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
1317
1318 -- Compute an expression into *any* register, adding the appropriate
1319 -- move instruction if necessary.
1320 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
1321 getAnyReg expr = do
1322   r <- getRegister expr
1323   anyReg r
1324
1325 anyReg :: Register -> NatM (Reg -> InstrBlock)
1326 anyReg (Any _ code)          = return code
1327 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
1328
1329 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
1330 -- Fixed registers might not be byte-addressable, so we make sure we've
1331 -- got a temporary, inserting an extra reg copy if necessary.
1332 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
1333 #if x86_64_TARGET_ARCH
1334 getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
1335 #else
1336 getByteReg expr = do
1337   r <- getRegister expr
1338   case r of
1339     Any rep code -> do
1340         tmp <- getNewRegNat rep
1341         return (tmp, code tmp)
1342     Fixed rep reg code
1343         | isVirtualReg reg -> return (reg,code)
1344         | otherwise -> do
1345             tmp <- getNewRegNat rep
1346             return (tmp, code `snocOL` reg2reg rep reg tmp)
1347         -- ToDo: could optimise slightly by checking for byte-addressable
1348         -- real registers, but that will happen very rarely if at all.
1349 #endif
1350
1351 -- Another variant: this time we want the result in a register that cannot
1352 -- be modified by code to evaluate an arbitrary expression.
1353 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1354 getNonClobberedReg expr = do
1355   r <- getRegister expr
1356   case r of
1357     Any rep code -> do
1358         tmp <- getNewRegNat rep
1359         return (tmp, code tmp)
1360     Fixed rep reg code
1361         -- only free regs can be clobbered
1362         | RealReg rr <- reg, isFastTrue (freeReg rr) -> do
1363                 tmp <- getNewRegNat rep
1364                 return (tmp, code `snocOL` reg2reg rep reg tmp)
1365         | otherwise ->
1366                 return (reg, code)
1367
1368 reg2reg :: MachRep -> Reg -> Reg -> Instr
1369 reg2reg rep src dst
1370 #if i386_TARGET_ARCH
1371   | isFloatingRep rep = GMOV src dst
1372 #endif
1373   | otherwise         = MOV rep (OpReg src) (OpReg dst)
1374
1375 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1376
1377 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1378
1379 #if sparc_TARGET_ARCH
1380
1381 getRegister (CmmLit (CmmFloat f F32)) = do
1382     lbl <- getNewLabelNat
1383     let code dst = toOL [
1384             LDATA ReadOnlyData
1385                         [CmmDataLabel lbl,
1386                          CmmStaticLit (CmmFloat f F32)],
1387             SETHI (HI (ImmCLbl lbl)) dst,
1388             LD F32 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1389     return (Any F32 code)
1390
1391 getRegister (CmmLit (CmmFloat d F64)) = do
1392     lbl <- getNewLabelNat
1393     let code dst = toOL [
1394             LDATA ReadOnlyData
1395                         [CmmDataLabel lbl,
1396                          CmmStaticLit (CmmFloat d F64)],
1397             SETHI (HI (ImmCLbl lbl)) dst,
1398             LD F64 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1399     return (Any F64 code)
1400
1401 getRegister (CmmMachOp mop [x]) -- unary MachOps
1402   = case mop of
1403       MO_S_Neg F32     -> trivialUFCode F32 (FNEG F32) x
1404       MO_S_Neg F64     -> trivialUFCode F64 (FNEG F64) x
1405
1406       MO_S_Neg rep     -> trivialUCode rep (SUB False False g0) x
1407       MO_Not rep       -> trivialUCode rep (XNOR False g0) x
1408
1409       MO_U_Conv I32 I8 -> trivialCode I8 (AND False) x (CmmLit (CmmInt 255 I8))
1410
1411       MO_U_Conv F64 F32-> coerceDbl2Flt x
1412       MO_U_Conv F32 F64-> coerceFlt2Dbl x
1413
1414       MO_S_Conv F32 I32-> coerceFP2Int F32 I32 x
1415       MO_S_Conv I32 F32-> coerceInt2FP I32 F32 x
1416       MO_S_Conv F64 I32-> coerceFP2Int F64 I32 x
1417       MO_S_Conv I32 F64-> coerceInt2FP I32 F64 x
1418
1419       -- Conversions which are a nop on sparc
1420       MO_U_Conv from to
1421         | from == to   -> conversionNop to   x
1422       MO_U_Conv I32 to -> conversionNop to   x
1423       MO_S_Conv I32 to -> conversionNop to   x
1424
1425       -- widenings
1426       MO_U_Conv I8 I32  -> integerExtend False I8 I32  x
1427       MO_U_Conv I16 I32 -> integerExtend False I16 I32 x
1428       MO_U_Conv I8 I16  -> integerExtend False I8 I16  x
1429       MO_S_Conv I16 I32 -> integerExtend True I16 I32  x
1430
1431       other_op -> panic "Unknown unary mach op"
1432     where
1433         -- XXX SLL/SRL?
1434         integerExtend signed from to expr = do
1435            (reg, e_code) <- getSomeReg expr
1436            let
1437                code dst =
1438                    e_code `snocOL`
1439                    ((if signed then SRA else SRL)
1440                           reg (RIImm (ImmInt 0)) dst)
1441            return (Any to code)
1442         conversionNop new_rep expr
1443             = do e_code <- getRegister expr
1444                  return (swizzleRegisterRep e_code new_rep)
1445
1446 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1447   = case mop of
1448       MO_Eq F32 -> condFltReg EQQ x y
1449       MO_Ne F32 -> condFltReg NE x y
1450
1451       MO_S_Gt F32 -> condFltReg GTT x y
1452       MO_S_Ge F32 -> condFltReg GE x y
1453       MO_S_Lt F32 -> condFltReg LTT x y
1454       MO_S_Le F32 -> condFltReg LE x y
1455
1456       MO_Eq F64 -> condFltReg EQQ x y
1457       MO_Ne F64 -> condFltReg NE x y
1458
1459       MO_S_Gt F64 -> condFltReg GTT x y
1460       MO_S_Ge F64 -> condFltReg GE x y
1461       MO_S_Lt F64 -> condFltReg LTT x y
1462       MO_S_Le F64 -> condFltReg LE x y
1463
1464       MO_Eq rep -> condIntReg EQQ x y
1465       MO_Ne rep -> condIntReg NE x y
1466
1467       MO_S_Gt rep -> condIntReg GTT x y
1468       MO_S_Ge rep -> condIntReg GE x y
1469       MO_S_Lt rep -> condIntReg LTT x y
1470       MO_S_Le rep -> condIntReg LE x y
1471
1472       MO_U_Gt I32  -> condIntReg GTT x y
1473       MO_U_Ge I32  -> condIntReg GE x y
1474       MO_U_Lt I32  -> condIntReg LTT x y
1475       MO_U_Le I32  -> condIntReg LE x y
1476
1477       MO_U_Gt I16 -> condIntReg GU  x y
1478       MO_U_Ge I16 -> condIntReg GEU x y
1479       MO_U_Lt I16 -> condIntReg LU  x y
1480       MO_U_Le I16 -> condIntReg LEU x y
1481
1482       MO_Add I32 -> trivialCode I32 (ADD False False) x y
1483       MO_Sub I32 -> trivialCode I32 (SUB False False) x y
1484
1485       MO_S_MulMayOflo rep -> imulMayOflo rep x y
1486 {-
1487       -- ToDo: teach about V8+ SPARC div instructions
1488       MO_S_Quot I32 -> idiv FSLIT(".div")  x y
1489       MO_S_Rem I32  -> idiv FSLIT(".rem")  x y
1490       MO_U_Quot I32 -> idiv FSLIT(".udiv")  x y
1491       MO_U_Rem I32  -> idiv FSLIT(".urem")  x y
1492 -}
1493       MO_Add F32  -> trivialFCode F32 FADD  x y
1494       MO_Sub F32   -> trivialFCode F32  FSUB x y
1495       MO_Mul F32   -> trivialFCode F32  FMUL  x y
1496       MO_S_Quot F32   -> trivialFCode F32  FDIV x y
1497
1498       MO_Add F64   -> trivialFCode F64 FADD  x y
1499       MO_Sub F64   -> trivialFCode F64  FSUB x y
1500       MO_Mul F64   -> trivialFCode F64  FMUL x y
1501       MO_S_Quot F64   -> trivialFCode F64  FDIV x y
1502
1503       MO_And rep   -> trivialCode rep (AND False) x y
1504       MO_Or rep    -> trivialCode rep (OR  False) x y
1505       MO_Xor rep   -> trivialCode rep (XOR False) x y
1506
1507       MO_Mul rep -> trivialCode rep (SMUL False) x y
1508
1509       MO_Shl rep   -> trivialCode rep SLL  x y
1510       MO_U_Shr rep   -> trivialCode rep SRL x y
1511       MO_S_Shr rep   -> trivialCode rep SRA x y
1512
1513 {-
1514       MO_F32_Pwr  -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1515                                          [promote x, promote y])
1516                        where promote x = CmmMachOp MO_F32_to_Dbl [x]
1517       MO_F64_Pwr -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1518                                         [x, y])
1519 -}
1520       other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
1521   where
1522     --idiv fn x y = getRegister (StCall (Left fn) CCallConv I32 [x, y])
1523
1524     --------------------
1525     imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1526     imulMayOflo rep a b = do
1527          (a_reg, a_code) <- getSomeReg a
1528          (b_reg, b_code) <- getSomeReg b
1529          res_lo <- getNewRegNat I32
1530          res_hi <- getNewRegNat I32
1531          let
1532             shift_amt  = case rep of
1533                           I32 -> 31
1534                           I64 -> 63
1535                           _ -> panic "shift_amt"
1536             code dst = a_code `appOL` b_code `appOL`
1537                        toOL [
1538                            SMUL False a_reg (RIReg b_reg) res_lo,
1539                            RDY res_hi,
1540                            SRA res_lo (RIImm (ImmInt shift_amt)) res_lo,
1541                            SUB False False res_lo (RIReg res_hi) dst
1542                         ]
1543          return (Any I32 code)
1544
1545 getRegister (CmmLoad mem pk) = do
1546     Amode src code <- getAmode mem
1547     let
1548         code__2 dst = code `snocOL` LD pk src dst
1549     return (Any pk code__2)
1550
1551 getRegister (CmmLit (CmmInt i _))
1552   | fits13Bits i
1553   = let
1554         src = ImmInt (fromInteger i)
1555         code dst = unitOL (OR False g0 (RIImm src) dst)
1556     in
1557         return (Any I32 code)
1558
1559 getRegister (CmmLit lit)
1560   = let rep = cmmLitRep lit
1561         imm = litToImm lit
1562         code dst = toOL [
1563             SETHI (HI imm) dst,
1564             OR False dst (RIImm (LO imm)) dst]
1565     in return (Any I32 code)
1566
1567 #endif /* sparc_TARGET_ARCH */
1568
1569 #if powerpc_TARGET_ARCH
1570 getRegister (CmmLoad mem pk)
1571   | pk /= I64
1572   = do
1573         Amode addr addr_code <- getAmode mem
1574         let code dst = ASSERT((regClass dst == RcDouble) == isFloatingRep pk)
1575                        addr_code `snocOL` LD pk dst addr
1576         return (Any pk code)
1577
1578 -- catch simple cases of zero- or sign-extended load
1579 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad mem _]) = do
1580     Amode addr addr_code <- getAmode mem
1581     return (Any I32 (\dst -> addr_code `snocOL` LD I8 dst addr))
1582
1583 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
1584
1585 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad mem _]) = do
1586     Amode addr addr_code <- getAmode mem
1587     return (Any I32 (\dst -> addr_code `snocOL` LD I16 dst addr))
1588
1589 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad mem _]) = do
1590     Amode addr addr_code <- getAmode mem
1591     return (Any I32 (\dst -> addr_code `snocOL` LA I16 dst addr))
1592
1593 getRegister (CmmMachOp mop [x]) -- unary MachOps
1594   = case mop of
1595       MO_Not rep   -> trivialUCode rep NOT x
1596
1597       MO_S_Conv F64 F32 -> trivialUCode F32 FRSP x
1598       MO_S_Conv F32 F64 -> conversionNop F64 x
1599
1600       MO_S_Conv from to
1601         | from == to         -> conversionNop to x
1602         | isFloatingRep from -> coerceFP2Int from to x
1603         | isFloatingRep to   -> coerceInt2FP from to x
1604
1605         -- narrowing is a nop: we treat the high bits as undefined
1606       MO_S_Conv I32 to -> conversionNop to x
1607       MO_S_Conv I16 I8 -> conversionNop I8 x
1608       MO_S_Conv I8 to -> trivialUCode to (EXTS I8) x
1609       MO_S_Conv I16 to -> trivialUCode to (EXTS I16) x
1610
1611       MO_U_Conv from to
1612         | from == to -> conversionNop to x
1613         -- narrowing is a nop: we treat the high bits as undefined
1614       MO_U_Conv I32 to -> conversionNop to x
1615       MO_U_Conv I16 I8 -> conversionNop I8 x
1616       MO_U_Conv I8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 I32))
1617       MO_U_Conv I16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 I32))
1618
1619       MO_S_Neg F32      -> trivialUCode F32 FNEG x
1620       MO_S_Neg F64      -> trivialUCode F64 FNEG x
1621       MO_S_Neg rep      -> trivialUCode rep NEG x
1622
1623     where
1624         conversionNop new_rep expr
1625             = do e_code <- getRegister expr
1626                  return (swizzleRegisterRep e_code new_rep)
1627
1628 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1629   = case mop of
1630       MO_Eq F32 -> condFltReg EQQ x y
1631       MO_Ne F32 -> condFltReg NE  x y
1632
1633       MO_S_Gt F32 -> condFltReg GTT x y
1634       MO_S_Ge F32 -> condFltReg GE  x y
1635       MO_S_Lt F32 -> condFltReg LTT x y
1636       MO_S_Le F32 -> condFltReg LE  x y
1637
1638       MO_Eq F64 -> condFltReg EQQ x y
1639       MO_Ne F64 -> condFltReg NE  x y
1640
1641       MO_S_Gt F64 -> condFltReg GTT x y
1642       MO_S_Ge F64 -> condFltReg GE  x y
1643       MO_S_Lt F64 -> condFltReg LTT x y
1644       MO_S_Le F64 -> condFltReg LE  x y
1645
1646       MO_Eq rep -> condIntReg EQQ  (extendUExpr rep x) (extendUExpr rep y)
1647       MO_Ne rep -> condIntReg NE   (extendUExpr rep x) (extendUExpr rep y)
1648
1649       MO_S_Gt rep -> condIntReg GTT  (extendSExpr rep x) (extendSExpr rep y)
1650       MO_S_Ge rep -> condIntReg GE   (extendSExpr rep x) (extendSExpr rep y)
1651       MO_S_Lt rep -> condIntReg LTT  (extendSExpr rep x) (extendSExpr rep y)
1652       MO_S_Le rep -> condIntReg LE   (extendSExpr rep x) (extendSExpr rep y)
1653
1654       MO_U_Gt rep -> condIntReg GU   (extendUExpr rep x) (extendUExpr rep y)
1655       MO_U_Ge rep -> condIntReg GEU  (extendUExpr rep x) (extendUExpr rep y)
1656       MO_U_Lt rep -> condIntReg LU   (extendUExpr rep x) (extendUExpr rep y)
1657       MO_U_Le rep -> condIntReg LEU  (extendUExpr rep x) (extendUExpr rep y)
1658
1659       MO_Add F32   -> trivialCodeNoImm F32 (FADD F32) x y
1660       MO_Sub F32   -> trivialCodeNoImm F32 (FSUB F32) x y
1661       MO_Mul F32   -> trivialCodeNoImm F32 (FMUL F32) x y
1662       MO_S_Quot F32   -> trivialCodeNoImm F32 (FDIV F32) x y
1663
1664       MO_Add F64   -> trivialCodeNoImm F64 (FADD F64) x y
1665       MO_Sub F64   -> trivialCodeNoImm F64 (FSUB F64) x y
1666       MO_Mul F64   -> trivialCodeNoImm F64 (FMUL F64) x y
1667       MO_S_Quot F64   -> trivialCodeNoImm F64 (FDIV F64) x y
1668
1669          -- optimize addition with 32-bit immediate
1670          -- (needed for PIC)
1671       MO_Add I32 ->
1672         case y of
1673           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate I32 True (-imm)
1674             -> trivialCode I32 True ADD x (CmmLit $ CmmInt imm immrep)
1675           CmmLit lit
1676             -> do
1677                 (src, srcCode) <- getSomeReg x
1678                 let imm = litToImm lit
1679                     code dst = srcCode `appOL` toOL [
1680                                     ADDIS dst src (HA imm),
1681                                     ADD dst dst (RIImm (LO imm))
1682                                 ]
1683                 return (Any I32 code)
1684           _ -> trivialCode I32 True ADD x y
1685
1686       MO_Add rep -> trivialCode rep True ADD x y
1687       MO_Sub rep ->
1688         case y of    -- subfi ('substract from' with immediate) doesn't exist
1689           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
1690             -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
1691           _ -> trivialCodeNoImm rep SUBF y x
1692
1693       MO_Mul rep -> trivialCode rep True MULLW x y
1694
1695       MO_S_MulMayOflo I32 -> trivialCodeNoImm I32 MULLW_MayOflo x y
1696
1697       MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= I32): not implemented"
1698       MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
1699
1700       MO_S_Quot rep -> trivialCodeNoImm rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1701       MO_U_Quot rep -> trivialCodeNoImm rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1702
1703       MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1704       MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1705
1706       MO_And rep   -> trivialCode rep False AND x y
1707       MO_Or rep    -> trivialCode rep False OR x y
1708       MO_Xor rep   -> trivialCode rep False XOR x y
1709
1710       MO_Shl rep   -> trivialCode rep False SLW x y
1711       MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
1712       MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
1713
1714 getRegister (CmmLit (CmmInt i rep))
1715   | Just imm <- makeImmediate rep True i
1716   = let
1717         code dst = unitOL (LI dst imm)
1718     in
1719         return (Any rep code)
1720
1721 getRegister (CmmLit (CmmFloat f frep)) = do
1722     lbl <- getNewLabelNat
1723     dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
1724     Amode addr addr_code <- getAmode dynRef
1725     let code dst =
1726             LDATA ReadOnlyData  [CmmDataLabel lbl,
1727                                  CmmStaticLit (CmmFloat f frep)]
1728             `consOL` (addr_code `snocOL` LD frep dst addr)
1729     return (Any frep code)
1730
1731 getRegister (CmmLit lit)
1732   = let rep = cmmLitRep lit
1733         imm = litToImm lit
1734         code dst = toOL [
1735               LIS dst (HI imm),
1736               OR dst dst (RIImm (LO imm))
1737           ]
1738     in return (Any rep code)
1739
1740 getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
1741
1742     -- extend?Rep: wrap integer expression of type rep
1743     -- in a conversion to I32
1744 extendSExpr I32 x = x
1745 extendSExpr rep x = CmmMachOp (MO_S_Conv rep I32) [x]
1746 extendUExpr I32 x = x
1747 extendUExpr rep x = CmmMachOp (MO_U_Conv rep I32) [x]
1748
1749 #endif /* powerpc_TARGET_ARCH */
1750
1751
1752 -- -----------------------------------------------------------------------------
1753 --  The 'Amode' type: Memory addressing modes passed up the tree.
1754
1755 data Amode = Amode AddrMode InstrBlock
1756
1757 {-
1758 Now, given a tree (the argument to an CmmLoad) that references memory,
1759 produce a suitable addressing mode.
1760
1761 A Rule of the Game (tm) for Amodes: use of the addr bit must
1762 immediately follow use of the code part, since the code part puts
1763 values in registers which the addr then refers to.  So you can't put
1764 anything in between, lest it overwrite some of those registers.  If
1765 you need to do some other computation between the code part and use of
1766 the addr bit, first store the effective address from the amode in a
1767 temporary, then do the other computation, and then use the temporary:
1768
1769     code
1770     LEA amode, tmp
1771     ... other computation ...
1772     ... (tmp) ...
1773 -}
1774
1775 getAmode :: CmmExpr -> NatM Amode
1776 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
1777
1778 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1779
1780 #if alpha_TARGET_ARCH
1781
1782 getAmode (StPrim IntSubOp [x, StInt i])
1783   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1784     getRegister x               `thenNat` \ register ->
1785     let
1786         code = registerCode register tmp
1787         reg  = registerName register tmp
1788         off  = ImmInt (-(fromInteger i))
1789     in
1790     return (Amode (AddrRegImm reg off) code)
1791
1792 getAmode (StPrim IntAddOp [x, StInt i])
1793   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1794     getRegister x               `thenNat` \ register ->
1795     let
1796         code = registerCode register tmp
1797         reg  = registerName register tmp
1798         off  = ImmInt (fromInteger i)
1799     in
1800     return (Amode (AddrRegImm reg off) code)
1801
1802 getAmode leaf
1803   | isJust imm
1804   = return (Amode (AddrImm imm__2) id)
1805   where
1806     imm = maybeImm leaf
1807     imm__2 = case imm of Just x -> x
1808
1809 getAmode other
1810   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1811     getRegister other           `thenNat` \ register ->
1812     let
1813         code = registerCode register tmp
1814         reg  = registerName register tmp
1815     in
1816     return (Amode (AddrReg reg) code)
1817
1818 #endif /* alpha_TARGET_ARCH */
1819
1820 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1821
1822 #if x86_64_TARGET_ARCH
1823
1824 getAmode (CmmMachOp (MO_Add I64) [CmmReg (CmmGlobal PicBaseReg),
1825                                      CmmLit displacement])
1826     = return $ Amode (ripRel (litToImm displacement)) nilOL
1827
1828 #endif
1829
1830 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1831
1832 -- This is all just ridiculous, since it carefully undoes
1833 -- what mangleIndexTree has just done.
1834 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
1835   | not (is64BitLit lit)
1836   -- ASSERT(rep == I32)???
1837   = do (x_reg, x_code) <- getSomeReg x
1838        let off = ImmInt (-(fromInteger i))
1839        return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1840
1841 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
1842   | not (is64BitLit lit)
1843   -- ASSERT(rep == I32)???
1844   = do (x_reg, x_code) <- getSomeReg x
1845        let off = ImmInt (fromInteger i)
1846        return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1847
1848 -- Turn (lit1 << n  + lit2) into  (lit2 + lit1 << n) so it will be
1849 -- recognised by the next rule.
1850 getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1851                                   b@(CmmLit _)])
1852   = getAmode (CmmMachOp (MO_Add rep) [b,a])
1853
1854 getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
1855                                         [y, CmmLit (CmmInt shift _)]])
1856   | shift == 0 || shift == 1 || shift == 2 || shift == 3
1857   = x86_complex_amode x y shift 0
1858
1859 getAmode (CmmMachOp (MO_Add rep)
1860                 [x, CmmMachOp (MO_Add _)
1861                         [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)],
1862                          CmmLit (CmmInt offset _)]])
1863   | shift == 0 || shift == 1 || shift == 2 || shift == 3
1864   && not (is64BitInteger offset)
1865   = x86_complex_amode x y shift offset
1866
1867 getAmode (CmmMachOp (MO_Add rep) [x,y])
1868   = x86_complex_amode x y 0 0
1869
1870 getAmode (CmmLit lit) | not (is64BitLit lit)
1871   = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1872
1873 getAmode expr = do
1874   (reg,code) <- getSomeReg expr
1875   return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
1876
1877
1878 x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
1879 x86_complex_amode base index shift offset
1880   = do (x_reg, x_code) <- getNonClobberedReg base
1881         -- x must be in a temp, because it has to stay live over y_code
1882         -- we could compre x_reg and y_reg and do something better here...
1883        (y_reg, y_code) <- getSomeReg index
1884        let
1885            code = x_code `appOL` y_code
1886            base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
1887        return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset)))
1888                code)
1889
1890 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1891
1892 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1893
1894 #if sparc_TARGET_ARCH
1895
1896 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)])
1897   | fits13Bits (-i)
1898   = do
1899        (reg, code) <- getSomeReg x
1900        let
1901          off  = ImmInt (-(fromInteger i))
1902        return (Amode (AddrRegImm reg off) code)
1903
1904
1905 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)])
1906   | fits13Bits i
1907   = do
1908        (reg, code) <- getSomeReg x
1909        let
1910          off  = ImmInt (fromInteger i)
1911        return (Amode (AddrRegImm reg off) code)
1912
1913 getAmode (CmmMachOp (MO_Add rep) [x, y])
1914   = do
1915     (regX, codeX) <- getSomeReg x
1916     (regY, codeY) <- getSomeReg y
1917     let
1918         code = codeX `appOL` codeY
1919     return (Amode (AddrRegReg regX regY) code)
1920
1921 -- XXX Is this same as "leaf" in Stix?
1922 getAmode (CmmLit lit)
1923   = do
1924       tmp <- getNewRegNat I32
1925       let
1926         code = unitOL (SETHI (HI imm__2) tmp)
1927       return (Amode (AddrRegImm tmp (LO imm__2)) code)
1928       where
1929          imm__2 = litToImm lit
1930
1931 getAmode other
1932   = do
1933        (reg, code) <- getSomeReg other
1934        let
1935             off  = ImmInt 0
1936        return (Amode (AddrRegImm reg off) code)
1937
1938 #endif /* sparc_TARGET_ARCH */
1939
1940 #ifdef powerpc_TARGET_ARCH
1941 getAmode (CmmMachOp (MO_Sub I32) [x, CmmLit (CmmInt i _)])
1942   | Just off <- makeImmediate I32 True (-i)
1943   = do
1944         (reg, code) <- getSomeReg x
1945         return (Amode (AddrRegImm reg off) code)
1946
1947
1948 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit (CmmInt i _)])
1949   | Just off <- makeImmediate I32 True i
1950   = do
1951         (reg, code) <- getSomeReg x
1952         return (Amode (AddrRegImm reg off) code)
1953
1954    -- optimize addition with 32-bit immediate
1955    -- (needed for PIC)
1956 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit lit])
1957   = do
1958         tmp <- getNewRegNat I32
1959         (src, srcCode) <- getSomeReg x
1960         let imm = litToImm lit
1961             code = srcCode `snocOL` ADDIS tmp src (HA imm)
1962         return (Amode (AddrRegImm tmp (LO imm)) code)
1963
1964 getAmode (CmmLit lit)
1965   = do
1966         tmp <- getNewRegNat I32
1967         let imm = litToImm lit
1968             code = unitOL (LIS tmp (HA imm))
1969         return (Amode (AddrRegImm tmp (LO imm)) code)
1970
1971 getAmode (CmmMachOp (MO_Add I32) [x, y])
1972   = do
1973         (regX, codeX) <- getSomeReg x
1974         (regY, codeY) <- getSomeReg y
1975         return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
1976
1977 getAmode other
1978   = do
1979         (reg, code) <- getSomeReg other
1980         let
1981             off  = ImmInt 0
1982         return (Amode (AddrRegImm reg off) code)
1983 #endif /* powerpc_TARGET_ARCH */
1984
1985 -- -----------------------------------------------------------------------------
1986 -- getOperand: sometimes any operand will do.
1987
1988 -- getNonClobberedOperand: the value of the operand will remain valid across
1989 -- the computation of an arbitrary expression, unless the expression
1990 -- is computed directly into a register which the operand refers to
1991 -- (see trivialCode where this function is used for an example).
1992
1993 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1994
1995 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1996 #if x86_64_TARGET_ARCH
1997 getNonClobberedOperand (CmmLit lit)
1998   | isSuitableFloatingPointLit lit = do
1999     lbl <- getNewLabelNat
2000     let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
2001                                            CmmStaticLit lit])
2002     return (OpAddr (ripRel (ImmCLbl lbl)), code)
2003 #endif
2004 getNonClobberedOperand (CmmLit lit)
2005   | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) =
2006     return (OpImm (litToImm lit), nilOL)
2007 getNonClobberedOperand (CmmLoad mem pk)
2008   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2009     Amode src mem_code <- getAmode mem
2010     (src',save_code) <-
2011         if (amodeCouldBeClobbered src)
2012                 then do
2013                    tmp <- getNewRegNat wordRep
2014                    return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
2015                            unitOL (LEA I32 (OpAddr src) (OpReg tmp)))
2016                 else
2017                    return (src, nilOL)
2018     return (OpAddr src', save_code `appOL` mem_code)
2019 getNonClobberedOperand e = do
2020     (reg, code) <- getNonClobberedReg e
2021     return (OpReg reg, code)
2022
2023 amodeCouldBeClobbered :: AddrMode -> Bool
2024 amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
2025
2026 regClobbered (RealReg rr) = isFastTrue (freeReg rr)
2027 regClobbered _ = False
2028
2029 -- getOperand: the operand is not required to remain valid across the
2030 -- computation of an arbitrary expression.
2031 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2032 #if x86_64_TARGET_ARCH
2033 getOperand (CmmLit lit)
2034   | isSuitableFloatingPointLit lit = do
2035     lbl <- getNewLabelNat
2036     let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
2037                                            CmmStaticLit lit])
2038     return (OpAddr (ripRel (ImmCLbl lbl)), code)
2039 #endif
2040 getOperand (CmmLit lit)
2041   | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) = do
2042     return (OpImm (litToImm lit), nilOL)
2043 getOperand (CmmLoad mem pk)
2044   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2045     Amode src mem_code <- getAmode mem
2046     return (OpAddr src, mem_code)
2047 getOperand e = do
2048     (reg, code) <- getSomeReg e
2049     return (OpReg reg, code)
2050
2051 isOperand :: CmmExpr -> Bool
2052 isOperand (CmmLoad _ _) = True
2053 isOperand (CmmLit lit)  = not (is64BitLit lit)
2054                           || isSuitableFloatingPointLit lit
2055 isOperand _             = False
2056
2057 -- if we want a floating-point literal as an operand, we can
2058 -- use it directly from memory.  However, if the literal is
2059 -- zero, we're better off generating it into a register using
2060 -- xor.
2061 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
2062 isSuitableFloatingPointLit _ = False
2063
2064 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
2065 getRegOrMem (CmmLoad mem pk)
2066   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2067     Amode src mem_code <- getAmode mem
2068     return (OpAddr src, mem_code)
2069 getRegOrMem e = do
2070     (reg, code) <- getNonClobberedReg e
2071     return (OpReg reg, code)
2072
2073 #if x86_64_TARGET_ARCH
2074 is64BitLit (CmmInt i I64) = is64BitInteger i
2075    -- assume that labels are in the range 0-2^31-1: this assumes the
2076    -- small memory model (see gcc docs, -mcmodel=small).
2077 #endif
2078 is64BitLit x = False
2079 #endif
2080
2081 is64BitInteger :: Integer -> Bool
2082 is64BitInteger i = i64 > 0x7fffffff || i64 < -0x80000000
2083   where i64 = fromIntegral i :: Int64
2084   -- a CmmInt is intended to be truncated to the appropriate
2085   -- number of bits, so here we truncate it to Int64.  This is
2086   -- important because e.g. -1 as a CmmInt might be either
2087   -- -1 or 18446744073709551615.
2088
2089 -- -----------------------------------------------------------------------------
2090 --  The 'CondCode' type:  Condition codes passed up the tree.
2091
2092 data CondCode = CondCode Bool Cond InstrBlock
2093
2094 -- Set up a condition code for a conditional branch.
2095
2096 getCondCode :: CmmExpr -> NatM CondCode
2097
2098 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2099
2100 #if alpha_TARGET_ARCH
2101 getCondCode = panic "MachCode.getCondCode: not on Alphas"
2102 #endif /* alpha_TARGET_ARCH */
2103
2104 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2105
2106 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
2107 -- yes, they really do seem to want exactly the same!
2108
2109 getCondCode (CmmMachOp mop [x, y])
2110   =
2111     case mop of
2112       MO_Eq F32 -> condFltCode EQQ x y
2113       MO_Ne F32 -> condFltCode NE  x y
2114
2115       MO_S_Gt F32 -> condFltCode GTT x y
2116       MO_S_Ge F32 -> condFltCode GE  x y
2117       MO_S_Lt F32 -> condFltCode LTT x y
2118       MO_S_Le F32 -> condFltCode LE  x y
2119
2120       MO_Eq F64 -> condFltCode EQQ x y
2121       MO_Ne F64 -> condFltCode NE  x y
2122
2123       MO_S_Gt F64 -> condFltCode GTT x y
2124       MO_S_Ge F64 -> condFltCode GE  x y
2125       MO_S_Lt F64 -> condFltCode LTT x y
2126       MO_S_Le F64 -> condFltCode LE  x y
2127
2128       MO_Eq rep -> condIntCode EQQ  x y
2129       MO_Ne rep -> condIntCode NE   x y
2130
2131       MO_S_Gt rep -> condIntCode GTT  x y
2132       MO_S_Ge rep -> condIntCode GE   x y
2133       MO_S_Lt rep -> condIntCode LTT  x y
2134       MO_S_Le rep -> condIntCode LE   x y
2135
2136       MO_U_Gt rep -> condIntCode GU   x y
2137       MO_U_Ge rep -> condIntCode GEU  x y
2138       MO_U_Lt rep -> condIntCode LU   x y
2139       MO_U_Le rep -> condIntCode LEU  x y
2140
2141       other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y]))
2142
2143 getCondCode other =  pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
2144
2145 #elif powerpc_TARGET_ARCH
2146
2147 -- almost the same as everywhere else - but we need to
2148 -- extend small integers to 32 bit first
2149
2150 getCondCode (CmmMachOp mop [x, y])
2151   = case mop of
2152       MO_Eq F32 -> condFltCode EQQ x y
2153       MO_Ne F32 -> condFltCode NE  x y
2154
2155       MO_S_Gt F32 -> condFltCode GTT x y
2156       MO_S_Ge F32 -> condFltCode GE  x y
2157       MO_S_Lt F32 -> condFltCode LTT x y
2158       MO_S_Le F32 -> condFltCode LE  x y
2159
2160       MO_Eq F64 -> condFltCode EQQ x y
2161       MO_Ne F64 -> condFltCode NE  x y
2162
2163       MO_S_Gt F64 -> condFltCode GTT x y
2164       MO_S_Ge F64 -> condFltCode GE  x y
2165       MO_S_Lt F64 -> condFltCode LTT x y
2166       MO_S_Le F64 -> condFltCode LE  x y
2167
2168       MO_Eq rep -> condIntCode EQQ  (extendUExpr rep x) (extendUExpr rep y)
2169       MO_Ne rep -> condIntCode NE   (extendUExpr rep x) (extendUExpr rep y)
2170
2171       MO_S_Gt rep -> condIntCode GTT  (extendSExpr rep x) (extendSExpr rep y)
2172       MO_S_Ge rep -> condIntCode GE   (extendSExpr rep x) (extendSExpr rep y)
2173       MO_S_Lt rep -> condIntCode LTT  (extendSExpr rep x) (extendSExpr rep y)
2174       MO_S_Le rep -> condIntCode LE   (extendSExpr rep x) (extendSExpr rep y)
2175
2176       MO_U_Gt rep -> condIntCode GU   (extendUExpr rep x) (extendUExpr rep y)
2177       MO_U_Ge rep -> condIntCode GEU  (extendUExpr rep x) (extendUExpr rep y)
2178       MO_U_Lt rep -> condIntCode LU   (extendUExpr rep x) (extendUExpr rep y)
2179       MO_U_Le rep -> condIntCode LEU  (extendUExpr rep x) (extendUExpr rep y)
2180
2181       other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
2182
2183 getCondCode other =  panic "getCondCode(2)(powerpc)"
2184
2185
2186 #endif
2187
2188
2189 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
2190 -- passed back up the tree.
2191
2192 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
2193
2194 #if alpha_TARGET_ARCH
2195 condIntCode = panic "MachCode.condIntCode: not on Alphas"
2196 condFltCode = panic "MachCode.condFltCode: not on Alphas"
2197 #endif /* alpha_TARGET_ARCH */
2198
2199 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2200 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2201
2202 -- memory vs immediate
2203 condIntCode cond (CmmLoad x pk) (CmmLit lit) | not (is64BitLit lit) = do
2204     Amode x_addr x_code <- getAmode x
2205     let
2206         imm  = litToImm lit
2207         code = x_code `snocOL`
2208                   CMP pk (OpImm imm) (OpAddr x_addr)
2209     --
2210     return (CondCode False cond code)
2211
2212 -- anything vs zero
2213 condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
2214     (x_reg, x_code) <- getSomeReg x
2215     let
2216         code = x_code `snocOL`
2217                   TEST pk (OpReg x_reg) (OpReg x_reg)
2218     --
2219     return (CondCode False cond code)
2220
2221 -- anything vs operand
2222 condIntCode cond x y | isOperand y = do
2223     (x_reg, x_code) <- getNonClobberedReg x
2224     (y_op,  y_code) <- getOperand y
2225     let
2226         code = x_code `appOL` y_code `snocOL`
2227                   CMP (cmmExprRep x) y_op (OpReg x_reg)
2228     -- in
2229     return (CondCode False cond code)
2230
2231 -- anything vs anything
2232 condIntCode cond x y = do
2233   (y_reg, y_code) <- getNonClobberedReg y
2234   (x_op, x_code) <- getRegOrMem x
2235   let
2236         code = y_code `appOL`
2237                x_code `snocOL`
2238                   CMP (cmmExprRep x) (OpReg y_reg) x_op
2239   -- in
2240   return (CondCode False cond code)
2241 #endif
2242
2243 #if i386_TARGET_ARCH
2244 condFltCode cond x y
2245   = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
2246   (x_reg, x_code) <- getNonClobberedReg x
2247   (y_reg, y_code) <- getSomeReg y
2248   let
2249         code = x_code `appOL` y_code `snocOL`
2250                 GCMP cond x_reg y_reg
2251   -- The GCMP insn does the test and sets the zero flag if comparable
2252   -- and true.  Hence we always supply EQQ as the condition to test.
2253   return (CondCode True EQQ code)
2254 #endif /* i386_TARGET_ARCH */
2255
2256 #if x86_64_TARGET_ARCH
2257 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
2258 -- an operand, but the right must be a reg.  We can probably do better
2259 -- than this general case...
2260 condFltCode cond x y = do
2261   (x_reg, x_code) <- getNonClobberedReg x
2262   (y_op, y_code) <- getOperand y
2263   let
2264         code = x_code `appOL`
2265                y_code `snocOL`
2266                   CMP (cmmExprRep x) y_op (OpReg x_reg)
2267         -- NB(1): we need to use the unsigned comparison operators on the
2268         -- result of this comparison.
2269   -- in
2270   return (CondCode True (condToUnsigned cond) code)
2271 #endif
2272
2273 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2274
2275 #if sparc_TARGET_ARCH
2276
2277 condIntCode cond x (CmmLit (CmmInt y rep))
2278   | fits13Bits y
2279   = do
2280        (src1, code) <- getSomeReg x
2281        let
2282            src2 = ImmInt (fromInteger y)
2283            code' = code `snocOL` SUB False True src1 (RIImm src2) g0
2284        return (CondCode False cond code')
2285
2286 condIntCode cond x y = do
2287     (src1, code1) <- getSomeReg x
2288     (src2, code2) <- getSomeReg y
2289     let
2290         code__2 = code1 `appOL` code2 `snocOL`
2291                   SUB False True src1 (RIReg src2) g0
2292     return (CondCode False cond code__2)
2293
2294 -----------
2295 condFltCode cond x y = do
2296     (src1, code1) <- getSomeReg x
2297     (src2, code2) <- getSomeReg y
2298     tmp <- getNewRegNat F64
2299     let
2300         promote x = FxTOy F32 F64 x tmp
2301
2302         pk1   = cmmExprRep x
2303         pk2   = cmmExprRep y
2304
2305         code__2 =
2306                 if pk1 == pk2 then
2307                     code1 `appOL` code2 `snocOL`
2308                     FCMP True pk1 src1 src2
2309                 else if pk1 == F32 then
2310                     code1 `snocOL` promote src1 `appOL` code2 `snocOL`
2311                     FCMP True F64 tmp src2
2312                 else
2313                     code1 `appOL` code2 `snocOL` promote src2 `snocOL`
2314                     FCMP True F64 src1 tmp
2315     return (CondCode True cond code__2)
2316
2317 #endif /* sparc_TARGET_ARCH */
2318
2319 #if powerpc_TARGET_ARCH
2320 --  ###FIXME: I16 and I8!
2321 condIntCode cond x (CmmLit (CmmInt y rep))
2322   | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
2323   = do
2324         (src1, code) <- getSomeReg x
2325         let
2326             code' = code `snocOL`
2327                 (if condUnsigned cond then CMPL else CMP) I32 src1 (RIImm src2)
2328         return (CondCode False cond code')
2329
2330 condIntCode cond x y = do
2331     (src1, code1) <- getSomeReg x
2332     (src2, code2) <- getSomeReg y
2333     let
2334         code' = code1 `appOL` code2 `snocOL`
2335                   (if condUnsigned cond then CMPL else CMP) I32 src1 (RIReg src2)
2336     return (CondCode False cond code')
2337
2338 condFltCode cond x y = do
2339     (src1, code1) <- getSomeReg x
2340     (src2, code2) <- getSomeReg y
2341     let
2342         code'  = code1 `appOL` code2 `snocOL` FCMP src1 src2
2343         code'' = case cond of -- twiddle CR to handle unordered case
2344                     GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
2345                     LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
2346                     _ -> code'
2347                  where
2348                     ltbit = 0 ; eqbit = 2 ; gtbit = 1
2349     return (CondCode True cond code'')
2350
2351 #endif /* powerpc_TARGET_ARCH */
2352
2353 -- -----------------------------------------------------------------------------
2354 -- Generating assignments
2355
2356 -- Assignments are really at the heart of the whole code generation
2357 -- business.  Almost all top-level nodes of any real importance are
2358 -- assignments, which correspond to loads, stores, or register
2359 -- transfers.  If we're really lucky, some of the register transfers
2360 -- will go away, because we can use the destination register to
2361 -- complete the code generation for the right hand side.  This only
2362 -- fails when the right hand side is forced into a fixed register
2363 -- (e.g. the result of a call).
2364
2365 assignMem_IntCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2366 assignReg_IntCode :: MachRep -> CmmReg  -> CmmExpr -> NatM InstrBlock
2367
2368 assignMem_FltCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2369 assignReg_FltCode :: MachRep -> CmmReg  -> CmmExpr -> NatM InstrBlock
2370
2371 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2372
2373 #if alpha_TARGET_ARCH
2374
2375 assignIntCode pk (CmmLoad dst _) src
2376   = getNewRegNat IntRep             `thenNat` \ tmp ->
2377     getAmode dst                    `thenNat` \ amode ->
2378     getRegister src                 `thenNat` \ register ->
2379     let
2380         code1   = amodeCode amode []
2381         dst__2  = amodeAddr amode
2382         code2   = registerCode register tmp []
2383         src__2  = registerName register tmp
2384         sz      = primRepToSize pk
2385         code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2386     in
2387     return code__2
2388
2389 assignIntCode pk dst src
2390   = getRegister dst                         `thenNat` \ register1 ->
2391     getRegister src                         `thenNat` \ register2 ->
2392     let
2393         dst__2  = registerName register1 zeroh
2394         code    = registerCode register2 dst__2
2395         src__2  = registerName register2 dst__2
2396         code__2 = if isFixed register2
2397                   then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2)
2398                   else code
2399     in
2400     return code__2
2401
2402 #endif /* alpha_TARGET_ARCH */
2403
2404 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2405
2406 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2407
2408 -- integer assignment to memory
2409
2410 -- specific case of adding/subtracting an integer to a particular address.
2411 -- ToDo: catch other cases where we can use an operation directly on a memory
2412 -- address.
2413 assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _,
2414                                                  CmmLit (CmmInt i _)])
2415    | addr == addr2, pk /= I64 || not (is64BitInteger i),
2416      Just instr <- check op
2417    = do Amode amode code_addr <- getAmode addr
2418         let code = code_addr `snocOL`
2419                    instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode)
2420         return code
2421    where
2422         check (MO_Add _) = Just ADD
2423         check (MO_Sub _) = Just SUB
2424         check _ = Nothing
2425         -- ToDo: more?
2426
2427 -- general case
2428 assignMem_IntCode pk addr src = do
2429     Amode addr code_addr <- getAmode addr
2430     (code_src, op_src)   <- get_op_RI src
2431     let
2432         code = code_src `appOL`
2433                code_addr `snocOL`
2434                   MOV pk op_src (OpAddr addr)
2435         -- NOTE: op_src is stable, so it will still be valid
2436         -- after code_addr.  This may involve the introduction
2437         -- of an extra MOV to a temporary register, but we hope
2438         -- the register allocator will get rid of it.
2439     --
2440     return code
2441   where
2442     get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand)   -- code, operator
2443     get_op_RI (CmmLit lit) | not (is64BitLit lit)
2444       = return (nilOL, OpImm (litToImm lit))
2445     get_op_RI op
2446       = do (reg,code) <- getNonClobberedReg op
2447            return (code, OpReg reg)
2448
2449
2450 -- Assign; dst is a reg, rhs is mem
2451 assignReg_IntCode pk reg (CmmLoad src _) = do
2452   load_code <- intLoadCode (MOV pk) src
2453   return (load_code (getRegisterReg reg))
2454
2455 -- dst is a reg, but src could be anything
2456 assignReg_IntCode pk reg src = do
2457   code <- getAnyReg src
2458   return (code (getRegisterReg reg))
2459
2460 #endif /* i386_TARGET_ARCH */
2461
2462 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2463
2464 #if sparc_TARGET_ARCH
2465
2466 assignMem_IntCode pk addr src = do
2467     (srcReg, code) <- getSomeReg src
2468     Amode dstAddr addr_code <- getAmode addr
2469     return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2470
2471 assignReg_IntCode pk reg src = do
2472     r <- getRegister src
2473     return $ case r of
2474         Any _ code         -> code dst
2475         Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg dst) freg
2476     where
2477       dst = getRegisterReg reg
2478
2479
2480 #endif /* sparc_TARGET_ARCH */
2481
2482 #if powerpc_TARGET_ARCH
2483
2484 assignMem_IntCode pk addr src = do
2485     (srcReg, code) <- getSomeReg src
2486     Amode dstAddr addr_code <- getAmode addr
2487     return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2488
2489 -- dst is a reg, but src could be anything
2490 assignReg_IntCode pk reg src
2491     = do
2492         r <- getRegister src
2493         return $ case r of
2494             Any _ code         -> code dst
2495             Fixed _ freg fcode -> fcode `snocOL` MR dst freg
2496     where
2497         dst = getRegisterReg reg
2498
2499 #endif /* powerpc_TARGET_ARCH */
2500
2501
2502 -- -----------------------------------------------------------------------------
2503 -- Floating-point assignments
2504
2505 #if alpha_TARGET_ARCH
2506
2507 assignFltCode pk (CmmLoad dst _) src
2508   = getNewRegNat pk                 `thenNat` \ tmp ->
2509     getAmode dst                    `thenNat` \ amode ->
2510     getRegister src                         `thenNat` \ register ->
2511     let
2512         code1   = amodeCode amode []
2513         dst__2  = amodeAddr amode
2514         code2   = registerCode register tmp []
2515         src__2  = registerName register tmp
2516         sz      = primRepToSize pk
2517         code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2518     in
2519     return code__2
2520
2521 assignFltCode pk dst src
2522   = getRegister dst                         `thenNat` \ register1 ->
2523     getRegister src                         `thenNat` \ register2 ->
2524     let
2525         dst__2  = registerName register1 zeroh
2526         code    = registerCode register2 dst__2
2527         src__2  = registerName register2 dst__2
2528         code__2 = if isFixed register2
2529                   then code . mkSeqInstr (FMOV src__2 dst__2)
2530                   else code
2531     in
2532     return code__2
2533
2534 #endif /* alpha_TARGET_ARCH */
2535
2536 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2537
2538 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2539
2540 -- Floating point assignment to memory
2541 assignMem_FltCode pk addr src = do
2542   (src_reg, src_code) <- getNonClobberedReg src
2543   Amode addr addr_code <- getAmode addr
2544   let
2545         code = src_code `appOL`
2546                addr_code `snocOL`
2547                 IF_ARCH_i386(GST pk src_reg addr,
2548                              MOV pk (OpReg src_reg) (OpAddr addr))
2549   return code
2550
2551 -- Floating point assignment to a register/temporary
2552 assignReg_FltCode pk reg src = do
2553   src_code <- getAnyReg src
2554   return (src_code (getRegisterReg reg))
2555
2556 #endif /* i386_TARGET_ARCH */
2557
2558 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2559
2560 #if sparc_TARGET_ARCH
2561
2562 -- Floating point assignment to memory
2563 assignMem_FltCode pk addr src = do
2564     Amode dst__2 code1 <- getAmode addr
2565     (src__2, code2) <- getSomeReg src
2566     tmp1 <- getNewRegNat pk
2567     let
2568         pk__2   = cmmExprRep src
2569         code__2 = code1 `appOL` code2 `appOL`
2570             if   pk == pk__2
2571             then unitOL (ST pk src__2 dst__2)
2572             else toOL [FxTOy pk__2 pk src__2 tmp1, ST pk tmp1 dst__2]
2573     return code__2
2574
2575 -- Floating point assignment to a register/temporary
2576 -- ToDo: Verify correctness
2577 assignReg_FltCode pk reg src = do
2578     r <- getRegister src
2579     v1 <- getNewRegNat pk
2580     return $ case r of
2581         Any _ code         -> code dst
2582         Fixed _ freg fcode -> fcode `snocOL` FMOV pk freg v1
2583     where
2584       dst = getRegisterReg reg
2585
2586 #endif /* sparc_TARGET_ARCH */
2587
2588 #if powerpc_TARGET_ARCH
2589
2590 -- Easy, isn't it?
2591 assignMem_FltCode = assignMem_IntCode
2592 assignReg_FltCode = assignReg_IntCode
2593
2594 #endif /* powerpc_TARGET_ARCH */
2595
2596
2597 -- -----------------------------------------------------------------------------
2598 -- Generating an non-local jump
2599
2600 -- (If applicable) Do not fill the delay slots here; you will confuse the
2601 -- register allocator.
2602
2603 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
2604
2605 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2606
2607 #if alpha_TARGET_ARCH
2608
2609 genJump (CmmLabel lbl)
2610   | isAsmTemp lbl = returnInstr (BR target)
2611   | otherwise     = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0]
2612   where
2613     target = ImmCLbl lbl
2614
2615 genJump tree
2616   = getRegister tree                `thenNat` \ register ->
2617     getNewRegNat PtrRep             `thenNat` \ tmp ->
2618     let
2619         dst    = registerName register pv
2620         code   = registerCode register pv
2621         target = registerName register pv
2622     in
2623     if isFixed register then
2624         returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0]
2625     else
2626     return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0))
2627
2628 #endif /* alpha_TARGET_ARCH */
2629
2630 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2631
2632 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2633
2634 genJump (CmmLoad mem pk) = do
2635   Amode target code <- getAmode mem
2636   return (code `snocOL` JMP (OpAddr target))
2637
2638 genJump (CmmLit lit) = do
2639   return (unitOL (JMP (OpImm (litToImm lit))))
2640
2641 genJump expr = do
2642   (reg,code) <- getSomeReg expr
2643   return (code `snocOL` JMP (OpReg reg))
2644
2645 #endif /* i386_TARGET_ARCH */
2646
2647 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2648
2649 #if sparc_TARGET_ARCH
2650
2651 genJump (CmmLit (CmmLabel lbl))
2652   = return (toOL [CALL (Left target) 0 True, NOP])
2653   where
2654     target = ImmCLbl lbl
2655
2656 genJump tree
2657   = do
2658         (target, code) <- getSomeReg tree
2659         return (code `snocOL` JMP (AddrRegReg target g0)  `snocOL` NOP)
2660
2661 #endif /* sparc_TARGET_ARCH */
2662
2663 #if powerpc_TARGET_ARCH
2664 genJump (CmmLit (CmmLabel lbl))
2665   = return (unitOL $ JMP lbl)
2666
2667 genJump tree
2668   = do
2669         (target,code) <- getSomeReg tree
2670         return (code `snocOL` MTCTR target `snocOL` BCTR [])
2671 #endif /* powerpc_TARGET_ARCH */
2672
2673
2674 -- -----------------------------------------------------------------------------
2675 --  Unconditional branches
2676
2677 genBranch :: BlockId -> NatM InstrBlock
2678
2679 genBranch = return . toOL . mkBranchInstr
2680
2681 -- -----------------------------------------------------------------------------
2682 --  Conditional jumps
2683
2684 {-
2685 Conditional jumps are always to local labels, so we can use branch
2686 instructions.  We peek at the arguments to decide what kind of
2687 comparison to do.
2688
2689 ALPHA: For comparisons with 0, we're laughing, because we can just do
2690 the desired conditional branch.
2691
2692 I386: First, we have to ensure that the condition
2693 codes are set according to the supplied comparison operation.
2694
2695 SPARC: First, we have to ensure that the condition codes are set
2696 according to the supplied comparison operation.  We generate slightly
2697 different code for floating point comparisons, because a floating
2698 point operation cannot directly precede a @BF@.  We assume the worst
2699 and fill that slot with a @NOP@.
2700
2701 SPARC: Do not fill the delay slots here; you will confuse the register
2702 allocator.
2703 -}
2704
2705
2706 genCondJump
2707     :: BlockId      -- the branch target
2708     -> CmmExpr      -- the condition on which to branch
2709     -> NatM InstrBlock
2710
2711 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2712
2713 #if alpha_TARGET_ARCH
2714
2715 genCondJump id (StPrim op [x, StInt 0])
2716   = getRegister x                           `thenNat` \ register ->
2717     getNewRegNat (registerRep register)
2718                                     `thenNat` \ tmp ->
2719     let
2720         code   = registerCode register tmp
2721         value  = registerName register tmp
2722         pk     = registerRep register
2723         target = ImmCLbl lbl
2724     in
2725     returnSeq code [BI (cmpOp op) value target]
2726   where
2727     cmpOp CharGtOp = GTT
2728     cmpOp CharGeOp = GE
2729     cmpOp CharEqOp = EQQ
2730     cmpOp CharNeOp = NE
2731     cmpOp CharLtOp = LTT
2732     cmpOp CharLeOp = LE
2733     cmpOp IntGtOp = GTT
2734     cmpOp IntGeOp = GE
2735     cmpOp IntEqOp = EQQ
2736     cmpOp IntNeOp = NE
2737     cmpOp IntLtOp = LTT
2738     cmpOp IntLeOp = LE
2739     cmpOp WordGtOp = NE
2740     cmpOp WordGeOp = ALWAYS
2741     cmpOp WordEqOp = EQQ
2742     cmpOp WordNeOp = NE
2743     cmpOp WordLtOp = NEVER
2744     cmpOp WordLeOp = EQQ
2745     cmpOp AddrGtOp = NE
2746     cmpOp AddrGeOp = ALWAYS
2747     cmpOp AddrEqOp = EQQ
2748     cmpOp AddrNeOp = NE
2749     cmpOp AddrLtOp = NEVER
2750     cmpOp AddrLeOp = EQQ
2751
2752 genCondJump lbl (StPrim op [x, StDouble 0.0])
2753   = getRegister x                           `thenNat` \ register ->
2754     getNewRegNat (registerRep register)
2755                                     `thenNat` \ tmp ->
2756     let
2757         code   = registerCode register tmp
2758         value  = registerName register tmp
2759         pk     = registerRep register
2760         target = ImmCLbl lbl
2761     in
2762     return (code . mkSeqInstr (BF (cmpOp op) value target))
2763   where
2764     cmpOp FloatGtOp = GTT
2765     cmpOp FloatGeOp = GE
2766     cmpOp FloatEqOp = EQQ
2767     cmpOp FloatNeOp = NE
2768     cmpOp FloatLtOp = LTT
2769     cmpOp FloatLeOp = LE
2770     cmpOp DoubleGtOp = GTT
2771     cmpOp DoubleGeOp = GE
2772     cmpOp DoubleEqOp = EQQ
2773     cmpOp DoubleNeOp = NE
2774     cmpOp DoubleLtOp = LTT
2775     cmpOp DoubleLeOp = LE
2776
2777 genCondJump lbl (StPrim op [x, y])
2778   | fltCmpOp op
2779   = trivialFCode pr instr x y       `thenNat` \ register ->
2780     getNewRegNat F64                `thenNat` \ tmp ->
2781     let
2782         code   = registerCode register tmp
2783         result = registerName register tmp
2784         target = ImmCLbl lbl
2785     in
2786     return (code . mkSeqInstr (BF cond result target))
2787   where
2788     pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
2789
2790     fltCmpOp op = case op of
2791         FloatGtOp -> True
2792         FloatGeOp -> True
2793         FloatEqOp -> True
2794         FloatNeOp -> True
2795         FloatLtOp -> True
2796         FloatLeOp -> True
2797         DoubleGtOp -> True
2798         DoubleGeOp -> True
2799         DoubleEqOp -> True
2800         DoubleNeOp -> True
2801         DoubleLtOp -> True
2802         DoubleLeOp -> True
2803         _ -> False
2804     (instr, cond) = case op of
2805         FloatGtOp -> (FCMP TF LE, EQQ)
2806         FloatGeOp -> (FCMP TF LTT, EQQ)
2807         FloatEqOp -> (FCMP TF EQQ, NE)
2808         FloatNeOp -> (FCMP TF EQQ, EQQ)
2809         FloatLtOp -> (FCMP TF LTT, NE)
2810         FloatLeOp -> (FCMP TF LE, NE)
2811         DoubleGtOp -> (FCMP TF LE, EQQ)
2812         DoubleGeOp -> (FCMP TF LTT, EQQ)
2813         DoubleEqOp -> (FCMP TF EQQ, NE)
2814         DoubleNeOp -> (FCMP TF EQQ, EQQ)
2815         DoubleLtOp -> (FCMP TF LTT, NE)
2816         DoubleLeOp -> (FCMP TF LE, NE)
2817
2818 genCondJump lbl (StPrim op [x, y])
2819   = trivialCode instr x y           `thenNat` \ register ->
2820     getNewRegNat IntRep             `thenNat` \ tmp ->
2821     let
2822         code   = registerCode register tmp
2823         result = registerName register tmp
2824         target = ImmCLbl lbl
2825     in
2826     return (code . mkSeqInstr (BI cond result target))
2827   where
2828     (instr, cond) = case op of
2829         CharGtOp -> (CMP LE, EQQ)
2830         CharGeOp -> (CMP LTT, EQQ)
2831         CharEqOp -> (CMP EQQ, NE)
2832         CharNeOp -> (CMP EQQ, EQQ)
2833         CharLtOp -> (CMP LTT, NE)
2834         CharLeOp -> (CMP LE, NE)
2835         IntGtOp -> (CMP LE, EQQ)
2836         IntGeOp -> (CMP LTT, EQQ)
2837         IntEqOp -> (CMP EQQ, NE)
2838         IntNeOp -> (CMP EQQ, EQQ)
2839         IntLtOp -> (CMP LTT, NE)
2840         IntLeOp -> (CMP LE, NE)
2841         WordGtOp -> (CMP ULE, EQQ)
2842         WordGeOp -> (CMP ULT, EQQ)
2843         WordEqOp -> (CMP EQQ, NE)
2844         WordNeOp -> (CMP EQQ, EQQ)
2845         WordLtOp -> (CMP ULT, NE)
2846         WordLeOp -> (CMP ULE, NE)
2847         AddrGtOp -> (CMP ULE, EQQ)
2848         AddrGeOp -> (CMP ULT, EQQ)
2849         AddrEqOp -> (CMP EQQ, NE)
2850         AddrNeOp -> (CMP EQQ, EQQ)
2851         AddrLtOp -> (CMP ULT, NE)
2852         AddrLeOp -> (CMP ULE, NE)
2853
2854 #endif /* alpha_TARGET_ARCH */
2855
2856 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2857
2858 #if i386_TARGET_ARCH
2859
2860 genCondJump id bool = do
2861   CondCode _ cond code <- getCondCode bool
2862   return (code `snocOL` JXX cond id)
2863
2864 #endif
2865
2866 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2867
2868 #if x86_64_TARGET_ARCH
2869
2870 genCondJump id bool = do
2871   CondCode is_float cond cond_code <- getCondCode bool
2872   if not is_float
2873     then
2874         return (cond_code `snocOL` JXX cond id)
2875     else do
2876         lbl <- getBlockIdNat
2877
2878         -- see comment with condFltReg
2879         let code = case cond of
2880                         NE  -> or_unordered
2881                         GU  -> plain_test
2882                         GEU -> plain_test
2883                         _   -> and_ordered
2884
2885             plain_test = unitOL (
2886                   JXX cond id
2887                 )
2888             or_unordered = toOL [
2889                   JXX cond id,
2890                   JXX PARITY id
2891                 ]
2892             and_ordered = toOL [
2893                   JXX PARITY lbl,
2894                   JXX cond id,
2895                   JXX ALWAYS lbl,
2896                   NEWBLOCK lbl
2897                 ]
2898         return (cond_code `appOL` code)
2899
2900 #endif
2901
2902 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2903
2904 #if sparc_TARGET_ARCH
2905
2906 genCondJump (BlockId id) bool = do
2907   CondCode is_float cond code <- getCondCode bool
2908   return (
2909        code `appOL`
2910        toOL (
2911          if   is_float
2912          then [NOP, BF cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2913          else [BI cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2914        )
2915     )
2916
2917 #endif /* sparc_TARGET_ARCH */
2918
2919
2920 #if powerpc_TARGET_ARCH
2921
2922 genCondJump id bool = do
2923   CondCode is_float cond code <- getCondCode bool
2924   return (code `snocOL` BCC cond id)
2925
2926 #endif /* powerpc_TARGET_ARCH */
2927
2928
2929 -- -----------------------------------------------------------------------------
2930 --  Generating C calls
2931
2932 -- Now the biggest nightmare---calls.  Most of the nastiness is buried in
2933 -- @get_arg@, which moves the arguments to the correct registers/stack
2934 -- locations.  Apart from that, the code is easy.
2935 --
2936 -- (If applicable) Do not fill the delay slots here; you will confuse the
2937 -- register allocator.
2938
2939 genCCall
2940     :: CmmCallTarget            -- function to call
2941     -> CmmHintFormals           -- where to put the result
2942     -> CmmActuals               -- arguments (of mixed type)
2943     -> NatM InstrBlock
2944
2945 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2946
2947 #if alpha_TARGET_ARCH
2948
2949 ccallResultRegs =
2950
2951 genCCall fn cconv result_regs args
2952   = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args
2953                           `thenNat` \ ((unused,_), argCode) ->
2954     let
2955         nRegs = length allArgRegs - length unused
2956         code = asmSeqThen (map ($ []) argCode)
2957     in
2958         returnSeq code [
2959             LDA pv (AddrImm (ImmLab (ptext fn))),
2960             JSR ra (AddrReg pv) nRegs,
2961             LDGP gp (AddrReg ra)]
2962   where
2963     ------------------------
2964     {-  Try to get a value into a specific register (or registers) for
2965         a call.  The first 6 arguments go into the appropriate
2966         argument register (separate registers for integer and floating
2967         point arguments, but used in lock-step), and the remaining
2968         arguments are dumped to the stack, beginning at 0(sp).  Our
2969         first argument is a pair of the list of remaining argument
2970         registers to be assigned for this call and the next stack
2971         offset to use for overflowing arguments.  This way,
2972         @get_Arg@ can be applied to all of a call's arguments using
2973         @mapAccumLNat@.
2974     -}
2975     get_arg
2976         :: ([(Reg,Reg)], Int)   -- Argument registers and stack offset (accumulator)
2977         -> StixTree             -- Current argument
2978         -> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code
2979
2980     -- We have to use up all of our argument registers first...
2981
2982     get_arg ((iDst,fDst):dsts, offset) arg
2983       = getRegister arg                     `thenNat` \ register ->
2984         let
2985             reg  = if isFloatingRep pk then fDst else iDst
2986             code = registerCode register reg
2987             src  = registerName register reg
2988             pk   = registerRep register
2989         in
2990         return (
2991             if isFloatingRep pk then
2992                 ((dsts, offset), if isFixed register then
2993                     code . mkSeqInstr (FMOV src fDst)
2994                     else code)
2995             else
2996                 ((dsts, offset), if isFixed register then
2997                     code . mkSeqInstr (OR src (RIReg src) iDst)
2998                     else code))
2999
3000     -- Once we have run out of argument registers, we move to the
3001     -- stack...
3002
3003     get_arg ([], offset) arg
3004       = getRegister arg                 `thenNat` \ register ->
3005         getNewRegNat (registerRep register)
3006                                         `thenNat` \ tmp ->
3007         let
3008             code = registerCode register tmp
3009             src  = registerName register tmp
3010             pk   = registerRep register
3011             sz   = primRepToSize pk
3012         in
3013         return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset)))
3014
3015 #endif /* alpha_TARGET_ARCH */
3016
3017 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3018
3019 #if i386_TARGET_ARCH
3020
3021 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
3022         -- write barrier compiles to no code on x86/x86-64;
3023         -- we keep it this long in order to prevent earlier optimisations.
3024
3025 -- we only cope with a single result for foreign calls
3026 genCCall (CmmPrim op) [(r,_)] args = do
3027   case op of
3028         MO_F32_Sqrt -> actuallyInlineFloatOp F32  (GSQRT F32) args
3029         MO_F64_Sqrt -> actuallyInlineFloatOp F64 (GSQRT F64) args
3030
3031         MO_F32_Sin  -> actuallyInlineFloatOp F32  (GSIN F32) args
3032         MO_F64_Sin  -> actuallyInlineFloatOp F64 (GSIN F64) args
3033
3034         MO_F32_Cos  -> actuallyInlineFloatOp F32  (GCOS F32) args
3035         MO_F64_Cos  -> actuallyInlineFloatOp F64 (GCOS F64) args
3036
3037         MO_F32_Tan  -> actuallyInlineFloatOp F32  (GTAN F32) args
3038         MO_F64_Tan  -> actuallyInlineFloatOp F64 (GTAN F64) args
3039
3040         other_op    -> outOfLineFloatOp op r args
3041  where
3042   actuallyInlineFloatOp rep instr [(x,_)]
3043         = do res <- trivialUFCode rep instr x
3044              any <- anyReg res
3045              return (any (getRegisterReg (CmmLocal r)))
3046
3047 genCCall target dest_regs args = do
3048     let
3049         sizes               = map (arg_size . cmmExprRep . fst) (reverse args)
3050 #if !darwin_TARGET_OS
3051         tot_arg_size        = sum sizes
3052 #else
3053         raw_arg_size        = sum sizes
3054         tot_arg_size        = roundTo 16 raw_arg_size
3055         arg_pad_size        = tot_arg_size - raw_arg_size
3056     delta0 <- getDeltaNat
3057     setDeltaNat (delta0 - arg_pad_size)
3058 #endif
3059
3060     push_codes <- mapM push_arg (reverse args)
3061     delta <- getDeltaNat
3062
3063     -- in
3064     -- deal with static vs dynamic call targets
3065     (callinsns,cconv) <-
3066       case target of
3067         -- CmmPrim -> ...
3068         CmmForeignCall (CmmLit (CmmLabel lbl)) conv
3069            -> -- ToDo: stdcall arg sizes
3070               return (unitOL (CALL (Left fn_imm) []), conv)
3071            where fn_imm = ImmCLbl lbl
3072         CmmForeignCall expr conv
3073            -> do (dyn_c, dyn_r, dyn_rep) <- get_op expr
3074                  ASSERT(dyn_rep == I32)
3075                   return (dyn_c `snocOL` CALL (Right dyn_r) [], conv)
3076
3077     let push_code
3078 #if darwin_TARGET_OS
3079             | arg_pad_size /= 0
3080             = toOL [SUB I32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
3081                     DELTA (delta0 - arg_pad_size)]
3082               `appOL` concatOL push_codes
3083             | otherwise
3084 #endif
3085             = concatOL push_codes
3086         call = callinsns `appOL`
3087                toOL (
3088                         -- Deallocate parameters after call for ccall;
3089                         -- but not for stdcall (callee does it)
3090                   (if cconv == StdCallConv || tot_arg_size==0 then [] else
3091                    [ADD I32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
3092                   ++
3093                   [DELTA (delta + tot_arg_size)]
3094                )
3095     -- in
3096     setDeltaNat (delta + tot_arg_size)
3097
3098     let
3099         -- assign the results, if necessary
3100         assign_code []     = nilOL
3101         assign_code [(dest,_hint)] =
3102           case rep of
3103                 I64 -> toOL [MOV I32 (OpReg eax) (OpReg r_dest),
3104                              MOV I32 (OpReg edx) (OpReg r_dest_hi)]
3105                 F32 -> unitOL (GMOV fake0 r_dest)
3106                 F64 -> unitOL (GMOV fake0 r_dest)
3107                 rep -> unitOL (MOV rep (OpReg eax) (OpReg r_dest))
3108           where
3109                 r_dest_hi = getHiVRegFromLo r_dest
3110                 rep = localRegRep dest
3111                 r_dest = getRegisterReg (CmmLocal dest)
3112         assign_code many = panic "genCCall.assign_code many"
3113
3114     return (push_code `appOL`
3115             call `appOL`
3116             assign_code dest_regs)
3117
3118   where
3119     arg_size F64 = 8
3120     arg_size F32 = 4
3121     arg_size I64 = 8
3122     arg_size _   = 4
3123
3124     roundTo a x | x `mod` a == 0 = x
3125                 | otherwise = x + a - (x `mod` a)
3126
3127
3128     push_arg :: (CmmExpr,MachHint){-current argument-}
3129                     -> NatM InstrBlock  -- code
3130
3131     push_arg (arg,_hint) -- we don't need the hints on x86
3132       | arg_rep == I64 = do
3133         ChildCode64 code r_lo <- iselExpr64 arg
3134         delta <- getDeltaNat
3135         setDeltaNat (delta - 8)
3136         let
3137             r_hi = getHiVRegFromLo r_lo
3138         -- in
3139         return (       code `appOL`
3140                        toOL [PUSH I32 (OpReg r_hi), DELTA (delta - 4),
3141                              PUSH I32 (OpReg r_lo), DELTA (delta - 8),
3142                              DELTA (delta-8)]
3143             )
3144
3145       | otherwise = do
3146         (code, reg, sz) <- get_op arg
3147         delta <- getDeltaNat
3148         let size = arg_size sz
3149         setDeltaNat (delta-size)
3150         if (case sz of F64 -> True; F32 -> True; _ -> False)
3151            then return (code `appOL`
3152                         toOL [SUB I32 (OpImm (ImmInt size)) (OpReg esp),
3153                               DELTA (delta-size),
3154                               GST sz reg (AddrBaseIndex (EABaseReg esp)
3155                                                         EAIndexNone
3156                                                         (ImmInt 0))]
3157                        )
3158            else return (code `snocOL`
3159                         PUSH I32 (OpReg reg) `snocOL`
3160                         DELTA (delta-size)
3161                        )
3162       where
3163          arg_rep = cmmExprRep arg
3164
3165     ------------
3166     get_op :: CmmExpr -> NatM (InstrBlock, Reg, MachRep) -- code, reg, size
3167     get_op op = do
3168         (reg,code) <- getSomeReg op
3169         return (code, reg, cmmExprRep op)
3170
3171 #endif /* i386_TARGET_ARCH */
3172
3173 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3174
3175 outOfLineFloatOp :: CallishMachOp -> CmmFormal -> CmmActuals
3176   -> NatM InstrBlock
3177 outOfLineFloatOp mop res args
3178   = do
3179       targetExpr <- cmmMakeDynamicReference addImportNat CallReference lbl
3180       let target = CmmForeignCall targetExpr CCallConv
3181
3182       if localRegRep res == F64
3183         then
3184           stmtToInstrs (CmmCall target [(res,FloatHint)] args CmmUnsafe)
3185         else do
3186           uq <- getUniqueNat
3187           let
3188             tmp = LocalReg uq F64 KindNonPtr
3189           -- in
3190           code1 <- stmtToInstrs (CmmCall target [(tmp,FloatHint)] args CmmUnsafe)
3191           code2 <- stmtToInstrs (CmmAssign (CmmLocal res) (CmmReg (CmmLocal tmp)))
3192           return (code1 `appOL` code2)
3193   where
3194         lbl = mkForeignLabel fn Nothing False
3195
3196         fn = case mop of
3197               MO_F32_Sqrt  -> FSLIT("sqrtf")
3198               MO_F32_Sin   -> FSLIT("sinf")
3199               MO_F32_Cos   -> FSLIT("cosf")
3200               MO_F32_Tan   -> FSLIT("tanf")
3201               MO_F32_Exp   -> FSLIT("expf")
3202               MO_F32_Log   -> FSLIT("logf")
3203
3204               MO_F32_Asin  -> FSLIT("asinf")
3205               MO_F32_Acos  -> FSLIT("acosf")
3206               MO_F32_Atan  -> FSLIT("atanf")
3207
3208               MO_F32_Sinh  -> FSLIT("sinhf")
3209               MO_F32_Cosh  -> FSLIT("coshf")
3210               MO_F32_Tanh  -> FSLIT("tanhf")
3211               MO_F32_Pwr   -> FSLIT("powf")
3212
3213               MO_F64_Sqrt  -> FSLIT("sqrt")
3214               MO_F64_Sin   -> FSLIT("sin")
3215               MO_F64_Cos   -> FSLIT("cos")
3216               MO_F64_Tan   -> FSLIT("tan")
3217               MO_F64_Exp   -> FSLIT("exp")
3218               MO_F64_Log   -> FSLIT("log")
3219
3220               MO_F64_Asin  -> FSLIT("asin")
3221               MO_F64_Acos  -> FSLIT("acos")
3222               MO_F64_Atan  -> FSLIT("atan")
3223
3224               MO_F64_Sinh  -> FSLIT("sinh")
3225               MO_F64_Cosh  -> FSLIT("cosh")
3226               MO_F64_Tanh  -> FSLIT("tanh")
3227               MO_F64_Pwr   -> FSLIT("pow")
3228
3229 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
3230
3231 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3232
3233 #if x86_64_TARGET_ARCH
3234
3235 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
3236         -- write barrier compiles to no code on x86/x86-64;
3237         -- we keep it this long in order to prevent earlier optimisations.
3238
3239 genCCall (CmmPrim op) [(r,_)] args =
3240   outOfLineFloatOp op r args
3241
3242 genCCall target dest_regs args = do
3243
3244         -- load up the register arguments
3245     (stack_args, aregs, fregs, load_args_code)
3246          <- load_args args allArgRegs allFPArgRegs nilOL
3247
3248     let
3249         fp_regs_used  = reverse (drop (length fregs) (reverse allFPArgRegs))
3250         int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
3251         arg_regs = [eax] ++ int_regs_used ++ fp_regs_used
3252                 -- for annotating the call instruction with
3253
3254         sse_regs = length fp_regs_used
3255
3256         tot_arg_size = arg_size * length stack_args
3257
3258         -- On entry to the called function, %rsp should be aligned
3259         -- on a 16-byte boundary +8 (i.e. the first stack arg after
3260         -- the return address is 16-byte aligned).  In STG land
3261         -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
3262         -- need to make sure we push a multiple of 16-bytes of args,
3263         -- plus the return address, to get the correct alignment.
3264         -- Urg, this is hard.  We need to feed the delta back into
3265         -- the arg pushing code.
3266     (real_size, adjust_rsp) <-
3267         if tot_arg_size `rem` 16 == 0
3268             then return (tot_arg_size, nilOL)
3269             else do -- we need to adjust...
3270                 delta <- getDeltaNat
3271                 setDeltaNat (delta-8)
3272                 return (tot_arg_size+8, toOL [
3273                                 SUB I64 (OpImm (ImmInt 8)) (OpReg rsp),
3274                                 DELTA (delta-8)
3275                         ])
3276
3277         -- push the stack args, right to left
3278     push_code <- push_args (reverse stack_args) nilOL
3279     delta <- getDeltaNat
3280
3281     -- deal with static vs dynamic call targets
3282     (callinsns,cconv) <-
3283       case target of
3284         -- CmmPrim -> ...
3285         CmmForeignCall (CmmLit (CmmLabel lbl)) conv
3286            -> -- ToDo: stdcall arg sizes
3287               return (unitOL (CALL (Left fn_imm) arg_regs), conv)
3288            where fn_imm = ImmCLbl lbl
3289         CmmForeignCall expr conv
3290            -> do (dyn_r, dyn_c) <- getSomeReg expr
3291                  return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
3292
3293     let
3294         -- The x86_64 ABI requires us to set %al to the number of SSE
3295         -- registers that contain arguments, if the called routine
3296         -- is a varargs function.  We don't know whether it's a
3297         -- varargs function or not, so we have to assume it is.
3298         --
3299         -- It's not safe to omit this assignment, even if the number
3300         -- of SSE regs in use is zero.  If %al is larger than 8
3301         -- on entry to a varargs function, seg faults ensue.
3302         assign_eax n = unitOL (MOV I32 (OpImm (ImmInt n)) (OpReg eax))
3303
3304     let call = callinsns `appOL`
3305                toOL (
3306                         -- Deallocate parameters after call for ccall;
3307                         -- but not for stdcall (callee does it)
3308                   (if cconv == StdCallConv || real_size==0 then [] else
3309                    [ADD wordRep (OpImm (ImmInt real_size)) (OpReg esp)])
3310                   ++
3311                   [DELTA (delta + real_size)]
3312                )
3313     -- in
3314     setDeltaNat (delta + real_size)
3315
3316     let
3317         -- assign the results, if necessary
3318         assign_code []     = nilOL
3319         assign_code [(dest,_hint)] =
3320           case rep of
3321                 F32 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3322                 F64 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3323                 rep -> unitOL (MOV rep (OpReg rax) (OpReg r_dest))
3324           where
3325                 rep = localRegRep dest
3326                 r_dest = getRegisterReg (CmmLocal dest)
3327         assign_code many = panic "genCCall.assign_code many"
3328
3329     return (load_args_code      `appOL`
3330             adjust_rsp          `appOL`
3331             push_code           `appOL`
3332             assign_eax sse_regs `appOL`
3333             call                `appOL`
3334             assign_code dest_regs)
3335
3336   where
3337     arg_size = 8 -- always, at the mo
3338
3339     load_args :: [(CmmExpr,MachHint)]
3340               -> [Reg]                  -- int regs avail for args
3341               -> [Reg]                  -- FP regs avail for args
3342               -> InstrBlock
3343               -> NatM ([(CmmExpr,MachHint)],[Reg],[Reg],InstrBlock)
3344     load_args args [] [] code     =  return (args, [], [], code)
3345         -- no more regs to use
3346     load_args [] aregs fregs code =  return ([], aregs, fregs, code)
3347         -- no more args to push
3348     load_args ((arg,hint) : rest) aregs fregs code
3349         | isFloatingRep arg_rep =
3350         case fregs of
3351           [] -> push_this_arg
3352           (r:rs) -> do
3353              arg_code <- getAnyReg arg
3354              load_args rest aregs rs (code `appOL` arg_code r)
3355         | otherwise =
3356         case aregs of
3357           [] -> push_this_arg
3358           (r:rs) -> do
3359              arg_code <- getAnyReg arg
3360              load_args rest rs fregs (code `appOL` arg_code r)
3361         where
3362           arg_rep = cmmExprRep arg
3363
3364           push_this_arg = do
3365             (args',ars,frs,code') <- load_args rest aregs fregs code
3366             return ((arg,hint):args', ars, frs, code')
3367
3368     push_args [] code = return code
3369     push_args ((arg,hint):rest) code
3370        | isFloatingRep arg_rep = do
3371          (arg_reg, arg_code) <- getSomeReg arg
3372          delta <- getDeltaNat
3373          setDeltaNat (delta-arg_size)
3374          let code' = code `appOL` arg_code `appOL` toOL [
3375                         SUB wordRep (OpImm (ImmInt arg_size)) (OpReg rsp) ,
3376                         DELTA (delta-arg_size),
3377                         MOV arg_rep (OpReg arg_reg) (OpAddr  (spRel 0))]
3378          push_args rest code'
3379
3380        | otherwise = do
3381        -- we only ever generate word-sized function arguments.  Promotion
3382        -- has already happened: our Int8# type is kept sign-extended
3383        -- in an Int#, for example.
3384          ASSERT(arg_rep == I64) return ()
3385          (arg_op, arg_code) <- getOperand arg
3386          delta <- getDeltaNat
3387          setDeltaNat (delta-arg_size)
3388          let code' = code `appOL` toOL [PUSH I64 arg_op,
3389                                         DELTA (delta-arg_size)]
3390          push_args rest code'
3391         where
3392           arg_rep = cmmExprRep arg
3393 #endif
3394
3395 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3396
3397 #if sparc_TARGET_ARCH
3398 {-
3399    The SPARC calling convention is an absolute
3400    nightmare.  The first 6x32 bits of arguments are mapped into
3401    %o0 through %o5, and the remaining arguments are dumped to the
3402    stack, beginning at [%sp+92].  (Note that %o6 == %sp.)
3403
3404    If we have to put args on the stack, move %o6==%sp down by
3405    the number of words to go on the stack, to ensure there's enough space.
3406
3407    According to Fraser and Hanson's lcc book, page 478, fig 17.2,
3408    16 words above the stack pointer is a word for the address of
3409    a structure return value.  I use this as a temporary location
3410    for moving values from float to int regs.  Certainly it isn't
3411    safe to put anything in the 16 words starting at %sp, since
3412    this area can get trashed at any time due to window overflows
3413    caused by signal handlers.
3414
3415    A final complication (if the above isn't enough) is that
3416    we can't blithely calculate the arguments one by one into
3417    %o0 .. %o5.  Consider the following nested calls:
3418
3419        fff a (fff b c)
3420
3421    Naive code moves a into %o0, and (fff b c) into %o1.  Unfortunately
3422    the inner call will itself use %o0, which trashes the value put there
3423    in preparation for the outer call.  Upshot: we need to calculate the
3424    args into temporary regs, and move those to arg regs or onto the
3425    stack only immediately prior to the call proper.  Sigh.
3426 -}
3427
3428 genCCall target dest_regs argsAndHints = do
3429     let
3430         args = map fst argsAndHints
3431     argcode_and_vregs <- mapM arg_to_int_vregs args
3432     let
3433         (argcodes, vregss) = unzip argcode_and_vregs
3434         n_argRegs          = length allArgRegs
3435         n_argRegs_used     = min (length vregs) n_argRegs
3436         vregs              = concat vregss
3437     -- deal with static vs dynamic call targets
3438     callinsns <- (case target of
3439         CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> do
3440                 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3441         CmmForeignCall expr conv -> do
3442                 (dyn_c, [dyn_r]) <- arg_to_int_vregs expr
3443                 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3444         CmmPrim mop -> do
3445                   (res, reduce) <- outOfLineFloatOp mop
3446                   lblOrMopExpr <- case res of
3447                        Left lbl -> do
3448                             return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3449                        Right mopExpr -> do
3450                             (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
3451                             return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3452                   if reduce then panic "genCCall(sparc): can not reduce" else return lblOrMopExpr
3453
3454       )
3455     let
3456         argcode = concatOL argcodes
3457         (move_sp_down, move_sp_up)
3458            = let diff = length vregs - n_argRegs
3459                  nn   = if odd diff then diff + 1 else diff -- keep 8-byte alignment
3460              in  if   nn <= 0
3461                  then (nilOL, nilOL)
3462                  else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
3463         transfer_code
3464            = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
3465     return (argcode       `appOL`
3466             move_sp_down  `appOL`
3467             transfer_code `appOL`
3468             callinsns     `appOL`
3469             unitOL NOP    `appOL`
3470             move_sp_up)
3471   where
3472      -- move args from the integer vregs into which they have been
3473      -- marshalled, into %o0 .. %o5, and the rest onto the stack.
3474      move_final :: [Reg] -> [Reg] -> Int -> [Instr]
3475
3476      move_final [] _ offset          -- all args done
3477         = []
3478
3479      move_final (v:vs) [] offset     -- out of aregs; move to stack
3480         = ST I32 v (spRel offset)
3481           : move_final vs [] (offset+1)
3482
3483      move_final (v:vs) (a:az) offset -- move into an arg (%o[0..5]) reg
3484         = OR False g0 (RIReg v) a
3485           : move_final vs az offset
3486
3487      -- generate code to calculate an argument, and move it into one
3488      -- or two integer vregs.
3489      arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
3490      arg_to_int_vregs arg
3491         | (cmmExprRep arg) == I64
3492         = do
3493           (ChildCode64 code r_lo) <- iselExpr64 arg
3494           let
3495               r_hi = getHiVRegFromLo r_lo
3496           return (code, [r_hi, r_lo])
3497         | otherwise
3498         = do
3499           (src, code) <- getSomeReg arg
3500           tmp <- getNewRegNat (cmmExprRep arg)
3501           let
3502               pk   = cmmExprRep arg
3503           case pk of
3504              F64 -> do
3505                       v1 <- getNewRegNat I32
3506                       v2 <- getNewRegNat I32
3507                       return (
3508                         code                          `snocOL`
3509                         FMOV F64 src f0                `snocOL`
3510                         ST   F32  f0 (spRel 16)         `snocOL`
3511                         LD   I32  (spRel 16) v1         `snocOL`
3512                         ST   F32  (fPair f0) (spRel 16) `snocOL`
3513                         LD   I32  (spRel 16) v2
3514                         ,
3515                         [v1,v2]
3516                        )
3517              F32 -> do
3518                       v1 <- getNewRegNat I32
3519                       return (
3520                         code                    `snocOL`
3521                         ST   F32  src (spRel 16)  `snocOL`
3522                         LD   I32  (spRel 16) v1
3523                         ,
3524                         [v1]
3525                        )
3526              other -> do
3527                         v1 <- getNewRegNat I32
3528                         return (
3529                           code `snocOL` OR False g0 (RIReg src) v1
3530                           ,
3531                           [v1]
3532                          )
3533 outOfLineFloatOp mop =
3534     do
3535       mopExpr <- cmmMakeDynamicReference addImportNat CallReference $
3536                   mkForeignLabel functionName Nothing True
3537       let mopLabelOrExpr = case mopExpr of
3538                         CmmLit (CmmLabel lbl) -> Left lbl
3539                         _ -> Right mopExpr
3540       return (mopLabelOrExpr, reduce)
3541             where
3542                 (reduce, functionName) = case mop of
3543                   MO_F32_Exp    -> (True,  FSLIT("exp"))
3544                   MO_F32_Log    -> (True,  FSLIT("log"))
3545                   MO_F32_Sqrt   -> (True,  FSLIT("sqrt"))
3546
3547                   MO_F32_Sin    -> (True,  FSLIT("sin"))
3548                   MO_F32_Cos    -> (True,  FSLIT("cos"))
3549                   MO_F32_Tan    -> (True,  FSLIT("tan"))
3550
3551                   MO_F32_Asin   -> (True,  FSLIT("asin"))
3552                   MO_F32_Acos   -> (True,  FSLIT("acos"))
3553                   MO_F32_Atan   -> (True,  FSLIT("atan"))
3554
3555                   MO_F32_Sinh   -> (True,  FSLIT("sinh"))
3556                   MO_F32_Cosh   -> (True,  FSLIT("cosh"))
3557                   MO_F32_Tanh   -> (True,  FSLIT("tanh"))
3558
3559                   MO_F64_Exp    -> (False, FSLIT("exp"))
3560                   MO_F64_Log    -> (False, FSLIT("log"))
3561                   MO_F64_Sqrt   -> (False, FSLIT("sqrt"))
3562
3563                   MO_F64_Sin    -> (False, FSLIT("sin"))
3564                   MO_F64_Cos    -> (False, FSLIT("cos"))
3565                   MO_F64_Tan    -> (False, FSLIT("tan"))
3566
3567                   MO_F64_Asin   -> (False, FSLIT("asin"))
3568                   MO_F64_Acos   -> (False, FSLIT("acos"))
3569                   MO_F64_Atan   -> (False, FSLIT("atan"))
3570
3571                   MO_F64_Sinh   -> (False, FSLIT("sinh"))
3572                   MO_F64_Cosh   -> (False, FSLIT("cosh"))
3573                   MO_F64_Tanh   -> (False, FSLIT("tanh"))
3574
3575                   other -> pprPanic "outOfLineFloatOp(sparc) "
3576                                 (pprCallishMachOp mop)
3577
3578 #endif /* sparc_TARGET_ARCH */
3579
3580 #if powerpc_TARGET_ARCH
3581
3582 #if darwin_TARGET_OS || linux_TARGET_OS
3583 {-
3584     The PowerPC calling convention for Darwin/Mac OS X
3585     is described in Apple's document
3586     "Inside Mac OS X - Mach-O Runtime Architecture".
3587
3588     PowerPC Linux uses the System V Release 4 Calling Convention
3589     for PowerPC. It is described in the
3590     "System V Application Binary Interface PowerPC Processor Supplement".
3591
3592     Both conventions are similar:
3593     Parameters may be passed in general-purpose registers starting at r3, in
3594     floating point registers starting at f1, or on the stack.
3595
3596     But there are substantial differences:
3597     * The number of registers used for parameter passing and the exact set of
3598       nonvolatile registers differs (see MachRegs.lhs).
3599     * On Darwin, stack space is always reserved for parameters, even if they are
3600       passed in registers. The called routine may choose to save parameters from
3601       registers to the corresponding space on the stack.
3602     * On Darwin, a corresponding amount of GPRs is skipped when a floating point
3603       parameter is passed in an FPR.
3604     * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
3605       starting with an odd-numbered GPR. It may skip a GPR to achieve this.
3606       Darwin just treats an I64 like two separate I32s (high word first).
3607     * I64 and F64 arguments are 8-byte aligned on the stack for SysV, but only
3608       4-byte aligned like everything else on Darwin.
3609     * The SysV spec claims that F32 is represented as F64 on the stack. GCC on
3610       PowerPC Linux does not agree, so neither do we.
3611
3612     According to both conventions, The parameter area should be part of the
3613     caller's stack frame, allocated in the caller's prologue code (large enough
3614     to hold the parameter lists for all called routines). The NCG already
3615     uses the stack for register spilling, leaving 64 bytes free at the top.
3616     If we need a larger parameter area than that, we just allocate a new stack
3617     frame just before ccalling.
3618 -}
3619
3620
3621 genCCall (CmmPrim MO_WriteBarrier) _ _
3622  = return $ unitOL LWSYNC
3623
3624 genCCall target dest_regs argsAndHints
3625   = ASSERT (not $ any (`elem` [I8,I16]) argReps)
3626         -- we rely on argument promotion in the codeGen
3627     do
3628         (finalStack,passArgumentsCode,usedRegs) <- passArguments
3629                                                         (zip args argReps)
3630                                                         allArgRegs allFPArgRegs
3631                                                         initialStackOffset
3632                                                         (toOL []) []
3633
3634         (labelOrExpr, reduceToF32) <- case target of
3635             CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
3636             CmmForeignCall expr conv -> return  (Right expr, False)
3637             CmmPrim mop -> outOfLineFloatOp mop
3638
3639         let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
3640             codeAfter = move_sp_up finalStack `appOL` moveResult reduceToF32
3641
3642         case labelOrExpr of
3643             Left lbl -> do
3644                 return (         codeBefore
3645                         `snocOL` BL lbl usedRegs
3646                         `appOL`  codeAfter)
3647             Right dyn -> do
3648                 (dynReg, dynCode) <- getSomeReg dyn
3649                 return (         dynCode
3650                         `snocOL` MTCTR dynReg
3651                         `appOL`  codeBefore
3652                         `snocOL` BCTRL usedRegs
3653                         `appOL`  codeAfter)
3654     where
3655 #if darwin_TARGET_OS
3656         initialStackOffset = 24
3657             -- size of linkage area + size of arguments, in bytes
3658         stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
3659                                        map machRepByteWidth argReps
3660 #elif linux_TARGET_OS
3661         initialStackOffset = 8
3662         stackDelta finalStack = roundTo 16 finalStack
3663 #endif
3664         args = map fst argsAndHints
3665         argReps = map cmmExprRep args
3666
3667         roundTo a x | x `mod` a == 0 = x
3668                     | otherwise = x + a - (x `mod` a)
3669
3670         move_sp_down finalStack
3671                | delta > 64 =
3672                         toOL [STU I32 sp (AddrRegImm sp (ImmInt (-delta))),
3673                               DELTA (-delta)]
3674                | otherwise = nilOL
3675                where delta = stackDelta finalStack
3676         move_sp_up finalStack
3677                | delta > 64 =
3678                         toOL [ADD sp sp (RIImm (ImmInt delta)),
3679                               DELTA 0]
3680                | otherwise = nilOL
3681                where delta = stackDelta finalStack
3682
3683
3684         passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
3685         passArguments ((arg,I64):args) gprs fprs stackOffset
3686                accumCode accumUsed =
3687             do
3688                 ChildCode64 code vr_lo <- iselExpr64 arg
3689                 let vr_hi = getHiVRegFromLo vr_lo
3690
3691 #if darwin_TARGET_OS
3692                 passArguments args
3693                               (drop 2 gprs)
3694                               fprs
3695                               (stackOffset+8)
3696                               (accumCode `appOL` code
3697                                     `snocOL` storeWord vr_hi gprs stackOffset
3698                                     `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
3699                               ((take 2 gprs) ++ accumUsed)
3700             where
3701                 storeWord vr (gpr:_) offset = MR gpr vr
3702                 storeWord vr [] offset = ST I32 vr (AddrRegImm sp (ImmInt offset))
3703
3704 #elif linux_TARGET_OS
3705                 let stackOffset' = roundTo 8 stackOffset
3706                     stackCode = accumCode `appOL` code
3707                         `snocOL` ST I32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
3708                         `snocOL` ST I32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
3709                     regCode hireg loreg =
3710                         accumCode `appOL` code
3711                             `snocOL` MR hireg vr_hi
3712                             `snocOL` MR loreg vr_lo
3713
3714                 case gprs of
3715                     hireg : loreg : regs | even (length gprs) ->
3716                         passArguments args regs fprs stackOffset
3717                                       (regCode hireg loreg) (hireg : loreg : accumUsed)
3718                     _skipped : hireg : loreg : regs ->
3719                         passArguments args regs fprs stackOffset
3720                                       (regCode hireg loreg) (hireg : loreg : accumUsed)
3721                     _ -> -- only one or no regs left
3722                         passArguments args [] fprs (stackOffset'+8)
3723                                       stackCode accumUsed
3724 #endif
3725
3726         passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
3727             | reg : _ <- regs = do
3728                 register <- getRegister arg
3729                 let code = case register of
3730                             Fixed _ freg fcode -> fcode `snocOL` MR reg freg
3731                             Any _ acode -> acode reg
3732                 passArguments args
3733                               (drop nGprs gprs)
3734                               (drop nFprs fprs)
3735 #if darwin_TARGET_OS
3736         -- The Darwin ABI requires that we reserve stack slots for register parameters
3737                               (stackOffset + stackBytes)
3738 #elif linux_TARGET_OS
3739         -- ... the SysV ABI doesn't.
3740                               stackOffset
3741 #endif
3742                               (accumCode `appOL` code)
3743                               (reg : accumUsed)
3744             | otherwise = do
3745                 (vr, code) <- getSomeReg arg
3746                 passArguments args
3747                               (drop nGprs gprs)
3748                               (drop nFprs fprs)
3749                               (stackOffset' + stackBytes)
3750                               (accumCode `appOL` code `snocOL` ST rep vr stackSlot)
3751                               accumUsed
3752             where
3753 #if darwin_TARGET_OS
3754         -- stackOffset is at least 4-byte aligned
3755         -- The Darwin ABI is happy with that.
3756                 stackOffset' = stackOffset
3757 #else
3758         -- ... the SysV ABI requires 8-byte alignment for doubles.
3759                 stackOffset' | rep == F64 = roundTo 8 stackOffset
3760                              | otherwise  =           stackOffset
3761 #endif
3762                 stackSlot = AddrRegImm sp (ImmInt stackOffset')
3763                 (nGprs, nFprs, stackBytes, regs) = case rep of
3764                     I32 -> (1, 0, 4, gprs)
3765 #if darwin_TARGET_OS
3766         -- The Darwin ABI requires that we skip a corresponding number of GPRs when
3767         -- we use the FPRs.
3768                     F32 -> (1, 1, 4, fprs)
3769                     F64 -> (2, 1, 8, fprs)
3770 #elif linux_TARGET_OS
3771         -- ... the SysV ABI doesn't.
3772                     F32 -> (0, 1, 4, fprs)
3773                     F64 -> (0, 1, 8, fprs)
3774 #endif
3775
3776         moveResult reduceToF32 =
3777             case dest_regs of
3778                 [] -> nilOL
3779                 [(dest, _hint)]
3780                     | reduceToF32 && rep == F32 -> unitOL (FRSP r_dest f1)
3781                     | rep == F32 || rep == F64 -> unitOL (MR r_dest f1)
3782                     | rep == I64 -> toOL [MR (getHiVRegFromLo r_dest) r3,
3783                                           MR r_dest r4]
3784                     | otherwise -> unitOL (MR r_dest r3)
3785                     where rep = cmmRegRep (CmmLocal dest)
3786                           r_dest = getRegisterReg (CmmLocal dest)
3787
3788         outOfLineFloatOp mop =
3789             do
3790                 mopExpr <- cmmMakeDynamicReference addImportNat CallReference $
3791                               mkForeignLabel functionName Nothing True
3792                 let mopLabelOrExpr = case mopExpr of
3793                         CmmLit (CmmLabel lbl) -> Left lbl
3794                         _ -> Right mopExpr
3795                 return (mopLabelOrExpr, reduce)
3796             where
3797                 (functionName, reduce) = case mop of
3798                     MO_F32_Exp   -> (FSLIT("exp"), True)
3799                     MO_F32_Log   -> (FSLIT("log"), True)
3800                     MO_F32_Sqrt  -> (FSLIT("sqrt"), True)
3801
3802                     MO_F32_Sin   -> (FSLIT("sin"), True)
3803                     MO_F32_Cos   -> (FSLIT("cos"), True)
3804                     MO_F32_Tan   -> (FSLIT("tan"), True)
3805
3806                     MO_F32_Asin  -> (FSLIT("asin"), True)
3807                     MO_F32_Acos  -> (FSLIT("acos"), True)
3808                     MO_F32_Atan  -> (FSLIT("atan"), True)
3809
3810                     MO_F32_Sinh  -> (FSLIT("sinh"), True)
3811                     MO_F32_Cosh  -> (FSLIT("cosh"), True)
3812                     MO_F32_Tanh  -> (FSLIT("tanh"), True)
3813                     MO_F32_Pwr   -> (FSLIT("pow"), True)
3814
3815                     MO_F64_Exp   -> (FSLIT("exp"), False)
3816                     MO_F64_Log   -> (FSLIT("log"), False)
3817                     MO_F64_Sqrt  -> (FSLIT("sqrt"), False)
3818
3819                     MO_F64_Sin   -> (FSLIT("sin"), False)
3820                     MO_F64_Cos   -> (FSLIT("cos"), False)
3821                     MO_F64_Tan   -> (FSLIT("tan"), False)
3822
3823                     MO_F64_Asin  -> (FSLIT("asin"), False)
3824                     MO_F64_Acos  -> (FSLIT("acos"), False)
3825                     MO_F64_Atan  -> (FSLIT("atan"), False)
3826
3827                     MO_F64_Sinh  -> (FSLIT("sinh"), False)
3828                     MO_F64_Cosh  -> (FSLIT("cosh"), False)
3829                     MO_F64_Tanh  -> (FSLIT("tanh"), False)
3830                     MO_F64_Pwr   -> (FSLIT("pow"), False)
3831                     other -> pprPanic "genCCall(ppc): unknown callish op"
3832                                     (pprCallishMachOp other)
3833
3834 #endif /* darwin_TARGET_OS || linux_TARGET_OS */
3835
3836 #endif /* powerpc_TARGET_ARCH */
3837
3838
3839 -- -----------------------------------------------------------------------------
3840 -- Generating a table-branch
3841
3842 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
3843
3844 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3845 genSwitch expr ids
3846   | opt_PIC
3847   = do
3848         (reg,e_code) <- getSomeReg expr
3849         lbl <- getNewLabelNat
3850         dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
3851         (tableReg,t_code) <- getSomeReg $ dynRef
3852         let
3853             jumpTable = map jumpTableEntryRel ids
3854
3855             jumpTableEntryRel Nothing
3856                 = CmmStaticLit (CmmInt 0 wordRep)
3857             jumpTableEntryRel (Just (BlockId id))
3858                 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3859                 where blockLabel = mkAsmTempLabel id
3860
3861             op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
3862                                        (EAIndex reg wORD_SIZE) (ImmInt 0))
3863
3864 #if x86_64_TARGET_ARCH && darwin_TARGET_OS
3865     -- on Mac OS X/x86_64, put the jump table in the text section
3866     -- to work around a limitation of the linker.
3867     -- ld64 is unable to handle the relocations for
3868     --     .quad L1 - L0
3869     -- if L0 is not preceded by a non-anonymous label in its section.
3870
3871             code = e_code `appOL` t_code `appOL` toOL [
3872                             ADD wordRep op (OpReg tableReg),
3873                             JMP_TBL (OpReg tableReg) [ id | Just id <- ids ],
3874                             LDATA Text (CmmDataLabel lbl : jumpTable)
3875                     ]
3876 #else
3877             code = e_code `appOL` t_code `appOL` toOL [
3878                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3879                             ADD wordRep op (OpReg tableReg),
3880                             JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
3881                     ]
3882 #endif
3883         return code
3884   | otherwise
3885   = do
3886         (reg,e_code) <- getSomeReg expr
3887         lbl <- getNewLabelNat
3888         let
3889             jumpTable = map jumpTableEntry ids
3890             op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
3891             code = e_code `appOL` toOL [
3892                     LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3893                     JMP_TBL op [ id | Just id <- ids ]
3894                  ]
3895         -- in
3896         return code
3897 #elif powerpc_TARGET_ARCH
3898 genSwitch expr ids
3899   | opt_PIC
3900   = do
3901         (reg,e_code) <- getSomeReg expr
3902         tmp <- getNewRegNat I32
3903         lbl <- getNewLabelNat
3904         dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
3905         (tableReg,t_code) <- getSomeReg $ dynRef
3906         let
3907             jumpTable = map jumpTableEntryRel ids
3908
3909             jumpTableEntryRel Nothing
3910                 = CmmStaticLit (CmmInt 0 wordRep)
3911             jumpTableEntryRel (Just (BlockId id))
3912                 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3913                 where blockLabel = mkAsmTempLabel id
3914
3915             code = e_code `appOL` t_code `appOL` toOL [
3916                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3917                             SLW tmp reg (RIImm (ImmInt 2)),
3918                             LD I32 tmp (AddrRegReg tableReg tmp),
3919                             ADD tmp tmp (RIReg tableReg),
3920                             MTCTR tmp,
3921                             BCTR [ id | Just id <- ids ]
3922                     ]
3923         return code
3924   | otherwise
3925   = do
3926         (reg,e_code) <- getSomeReg expr
3927         tmp <- getNewRegNat I32
3928         lbl <- getNewLabelNat
3929         let
3930             jumpTable = map jumpTableEntry ids
3931
3932             code = e_code `appOL` toOL [
3933                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3934                             SLW tmp reg (RIImm (ImmInt 2)),
3935                             ADDIS tmp tmp (HA (ImmCLbl lbl)),
3936                             LD I32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
3937                             MTCTR tmp,
3938                             BCTR [ id | Just id <- ids ]
3939                     ]
3940         return code
3941 #else
3942 genSwitch expr ids = panic "ToDo: genSwitch"
3943 #endif
3944
3945 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordRep)
3946 jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
3947     where blockLabel = mkAsmTempLabel id
3948
3949 -- -----------------------------------------------------------------------------
3950 -- Support bits
3951 -- -----------------------------------------------------------------------------
3952
3953
3954 -- -----------------------------------------------------------------------------
3955 -- 'condIntReg' and 'condFltReg': condition codes into registers
3956
3957 -- Turn those condition codes into integers now (when they appear on
3958 -- the right hand side of an assignment).
3959 --
3960 -- (If applicable) Do not fill the delay slots here; you will confuse the
3961 -- register allocator.
3962
3963 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
3964
3965 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3966
3967 #if alpha_TARGET_ARCH
3968 condIntReg = panic "MachCode.condIntReg (not on Alpha)"
3969 condFltReg = panic "MachCode.condFltReg (not on Alpha)"
3970 #endif /* alpha_TARGET_ARCH */
3971
3972 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3973
3974 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3975
3976 condIntReg cond x y = do
3977   CondCode _ cond cond_code <- condIntCode cond x y
3978   tmp <- getNewRegNat I8
3979   let
3980         code dst = cond_code `appOL` toOL [
3981                     SETCC cond (OpReg tmp),
3982                     MOVZxL I8 (OpReg tmp) (OpReg dst)
3983                   ]
3984   -- in
3985   return (Any I32 code)
3986
3987 #endif
3988
3989 #if i386_TARGET_ARCH
3990
3991 condFltReg cond x y = do
3992   CondCode _ cond cond_code <- condFltCode cond x y
3993   tmp <- getNewRegNat I8
3994   let
3995         code dst = cond_code `appOL` toOL [
3996                     SETCC cond (OpReg tmp),
3997                     MOVZxL I8 (OpReg tmp) (OpReg dst)
3998                   ]
3999   -- in
4000   return (Any I32 code)
4001
4002 #endif
4003
4004 #if x86_64_TARGET_ARCH
4005
4006 condFltReg cond x y = do
4007   CondCode _ cond cond_code <- condFltCode cond x y
4008   tmp1 <- getNewRegNat wordRep
4009   tmp2 <- getNewRegNat wordRep
4010   let
4011         -- We have to worry about unordered operands (eg. comparisons
4012         -- against NaN).  If the operands are unordered, the comparison
4013         -- sets the parity flag, carry flag and zero flag.
4014         -- All comparisons are supposed to return false for unordered
4015         -- operands except for !=, which returns true.
4016         --
4017         -- Optimisation: we don't have to test the parity flag if we
4018         -- know the test has already excluded the unordered case: eg >
4019         -- and >= test for a zero carry flag, which can only occur for
4020         -- ordered operands.
4021         --
4022         -- ToDo: by reversing comparisons we could avoid testing the
4023         -- parity flag in more cases.
4024
4025         code dst =
4026            cond_code `appOL`
4027              (case cond of
4028                 NE  -> or_unordered dst
4029                 GU  -> plain_test   dst
4030                 GEU -> plain_test   dst
4031                 _   -> and_ordered  dst)
4032
4033         plain_test dst = toOL [
4034                     SETCC cond (OpReg tmp1),
4035                     MOVZxL I8 (OpReg tmp1) (OpReg dst)
4036                  ]
4037         or_unordered dst = toOL [
4038                     SETCC cond (OpReg tmp1),
4039                     SETCC PARITY (OpReg tmp2),
4040                     OR I8 (OpReg tmp1) (OpReg tmp2),
4041                     MOVZxL I8 (OpReg tmp2) (OpReg dst)
4042                   ]
4043         and_ordered dst = toOL [
4044                     SETCC cond (OpReg tmp1),
4045                     SETCC NOTPARITY (OpReg tmp2),
4046                     AND I8 (OpReg tmp1) (OpReg tmp2),
4047                     MOVZxL I8 (OpReg tmp2) (OpReg dst)
4048                   ]
4049   -- in
4050   return (Any I32 code)
4051
4052 #endif
4053
4054 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4055
4056 #if sparc_TARGET_ARCH
4057
4058 condIntReg EQQ x (CmmLit (CmmInt 0 d)) = do
4059     (src, code) <- getSomeReg x
4060     tmp <- getNewRegNat I32
4061     let
4062         code__2 dst = code `appOL` toOL [
4063             SUB False True g0 (RIReg src) g0,
4064             SUB True False g0 (RIImm (ImmInt (-1))) dst]
4065     return (Any I32 code__2)
4066
4067 condIntReg EQQ x y = do
4068     (src1, code1) <- getSomeReg x
4069     (src2, code2) <- getSomeReg y
4070     tmp1 <- getNewRegNat I32
4071     tmp2 <- getNewRegNat I32
4072     let
4073         code__2 dst = code1 `appOL` code2 `appOL` toOL [
4074             XOR False src1 (RIReg src2) dst,
4075             SUB False True g0 (RIReg dst) g0,
4076             SUB True False g0 (RIImm (ImmInt (-1))) dst]
4077     return (Any I32 code__2)
4078
4079 condIntReg NE x (CmmLit (CmmInt 0 d)) = do
4080     (src, code) <- getSomeReg x
4081     tmp <- getNewRegNat I32
4082     let
4083         code__2 dst = code `appOL` toOL [
4084             SUB False True g0 (RIReg src) g0,
4085             ADD True False g0 (RIImm (ImmInt 0)) dst]
4086     return (Any I32 code__2)
4087
4088 condIntReg NE x y = do
4089     (src1, code1) <- getSomeReg x
4090     (src2, code2) <- getSomeReg y
4091     tmp1 <- getNewRegNat I32
4092     tmp2 <- getNewRegNat I32
4093     let
4094         code__2 dst = code1 `appOL` code2 `appOL` toOL [
4095             XOR False src1 (RIReg src2) dst,
4096             SUB False True g0 (RIReg dst) g0,
4097             ADD True False g0 (RIImm (ImmInt 0)) dst]
4098     return (Any I32 code__2)
4099
4100 condIntReg cond x y = do
4101     BlockId lbl1 <- getBlockIdNat
4102     BlockId lbl2 <- getBlockIdNat
4103     CondCode _ cond cond_code <- condIntCode cond x y
4104     let
4105         code__2 dst = cond_code `appOL` toOL [
4106             BI cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
4107             OR False g0 (RIImm (ImmInt 0)) dst,
4108             BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
4109             NEWBLOCK (BlockId lbl1),
4110             OR False g0 (RIImm (ImmInt 1)) dst,
4111             NEWBLOCK (BlockId lbl2)]
4112     return (Any I32 code__2)
4113
4114 condFltReg cond x y = do
4115     BlockId lbl1 <- getBlockIdNat
4116     BlockId lbl2 <- getBlockIdNat
4117     CondCode _ cond cond_code <- condFltCode cond x y
4118     let
4119         code__2 dst = cond_code `appOL` toOL [
4120             NOP,
4121             BF cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
4122             OR False g0 (RIImm (ImmInt 0)) dst,
4123             BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
4124             NEWBLOCK (BlockId lbl1),
4125             OR False g0 (RIImm (ImmInt 1)) dst,
4126             NEWBLOCK (BlockId lbl2)]
4127     return (Any I32 code__2)
4128
4129 #endif /* sparc_TARGET_ARCH */
4130
4131 #if powerpc_TARGET_ARCH
4132 condReg getCond = do
4133     lbl1 <- getBlockIdNat
4134     lbl2 <- getBlockIdNat
4135     CondCode _ cond cond_code <- getCond
4136     let
4137 {-        code dst = cond_code `appOL` toOL [
4138                 BCC cond lbl1,
4139                 LI dst (ImmInt 0),
4140                 BCC ALWAYS lbl2,
4141                 NEWBLOCK lbl1,
4142                 LI dst (ImmInt 1),
4143                 BCC ALWAYS lbl2,
4144                 NEWBLOCK lbl2
4145             ]-}
4146         code dst = cond_code
4147             `appOL` negate_code
4148             `appOL` toOL [
4149                 MFCR dst,
4150                 RLWINM dst dst (bit + 1) 31 31
4151             ]
4152
4153         negate_code | do_negate = unitOL (CRNOR bit bit bit)
4154                     | otherwise = nilOL
4155
4156         (bit, do_negate) = case cond of
4157             LTT -> (0, False)
4158             LE  -> (1, True)
4159             EQQ -> (2, False)
4160             GE  -> (0, True)
4161             GTT -> (1, False)
4162
4163             NE  -> (2, True)
4164
4165             LU  -> (0, False)
4166             LEU -> (1, True)
4167             GEU -> (0, True)
4168             GU  -> (1, False)
4169
4170     return (Any I32 code)
4171
4172 condIntReg cond x y = condReg (condIntCode cond x y)
4173 condFltReg cond x y = condReg (condFltCode cond x y)
4174 #endif /* powerpc_TARGET_ARCH */
4175
4176
4177 -- -----------------------------------------------------------------------------
4178 -- 'trivial*Code': deal with trivial instructions
4179
4180 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
4181 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
4182 -- Only look for constants on the right hand side, because that's
4183 -- where the generic optimizer will have put them.
4184
4185 -- Similarly, for unary instructions, we don't have to worry about
4186 -- matching an StInt as the argument, because genericOpt will already
4187 -- have handled the constant-folding.
4188
4189 trivialCode
4190     :: MachRep
4191     -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
4192       ,IF_ARCH_i386 ((Operand -> Operand -> Instr)
4193                      -> Maybe (Operand -> Operand -> Instr)
4194       ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr)
4195                      -> Maybe (Operand -> Operand -> Instr)
4196       ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
4197       ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
4198       ,)))))
4199     -> CmmExpr -> CmmExpr -- the two arguments
4200     -> NatM Register
4201
4202 #ifndef powerpc_TARGET_ARCH
4203 trivialFCode
4204     :: MachRep
4205     -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
4206       ,IF_ARCH_sparc((MachRep -> Reg -> Reg -> Reg -> Instr)
4207       ,IF_ARCH_i386 ((MachRep -> Reg -> Reg -> Reg -> Instr)
4208       ,IF_ARCH_x86_64 ((MachRep -> Operand -> Operand -> Instr)
4209       ,))))
4210     -> CmmExpr -> CmmExpr -- the two arguments
4211     -> NatM Register
4212 #endif
4213
4214 trivialUCode
4215     :: MachRep
4216     -> IF_ARCH_alpha((RI -> Reg -> Instr)
4217       ,IF_ARCH_i386 ((Operand -> Instr)
4218       ,IF_ARCH_x86_64 ((Operand -> Instr)
4219       ,IF_ARCH_sparc((RI -> Reg -> Instr)
4220       ,IF_ARCH_powerpc((Reg -> Reg -> Instr)
4221       ,)))))
4222     -> CmmExpr  -- the one argument
4223     -> NatM Register
4224
4225 #ifndef powerpc_TARGET_ARCH
4226 trivialUFCode
4227     :: MachRep
4228     -> IF_ARCH_alpha((Reg -> Reg -> Instr)
4229       ,IF_ARCH_i386 ((Reg -> Reg -> Instr)
4230       ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr)
4231       ,IF_ARCH_sparc((Reg -> Reg -> Instr)
4232       ,))))
4233     -> CmmExpr -- the one argument
4234     -> NatM Register
4235 #endif
4236
4237 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4238
4239 #if alpha_TARGET_ARCH
4240
4241 trivialCode instr x (StInt y)
4242   | fits8Bits y
4243   = getRegister x               `thenNat` \ register ->
4244     getNewRegNat IntRep         `thenNat` \ tmp ->
4245     let
4246         code = registerCode register tmp
4247         src1 = registerName register tmp
4248         src2 = ImmInt (fromInteger y)
4249         code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst)
4250     in
4251     return (Any IntRep code__2)
4252
4253 trivialCode instr x y
4254   = getRegister x               `thenNat` \ register1 ->
4255     getRegister y               `thenNat` \ register2 ->
4256     getNewRegNat IntRep         `thenNat` \ tmp1 ->
4257     getNewRegNat IntRep         `thenNat` \ tmp2 ->
4258     let
4259         code1 = registerCode register1 tmp1 []
4260         src1  = registerName register1 tmp1
4261         code2 = registerCode register2 tmp2 []
4262         src2  = registerName register2 tmp2
4263         code__2 dst = asmSeqThen [code1, code2] .
4264                      mkSeqInstr (instr src1 (RIReg src2) dst)
4265     in
4266     return (Any IntRep code__2)
4267
4268 ------------
4269 trivialUCode instr x
4270   = getRegister x               `thenNat` \ register ->
4271     getNewRegNat IntRep         `thenNat` \ tmp ->
4272     let
4273         code = registerCode register tmp
4274         src  = registerName register tmp
4275         code__2 dst = code . mkSeqInstr (instr (RIReg src) dst)
4276     in
4277     return (Any IntRep code__2)
4278
4279 ------------
4280 trivialFCode _ instr x y
4281   = getRegister x               `thenNat` \ register1 ->
4282     getRegister y               `thenNat` \ register2 ->
4283     getNewRegNat F64    `thenNat` \ tmp1 ->
4284     getNewRegNat F64    `thenNat` \ tmp2 ->
4285     let
4286         code1 = registerCode register1 tmp1
4287         src1  = registerName register1 tmp1
4288
4289         code2 = registerCode register2 tmp2
4290         src2  = registerName register2 tmp2
4291
4292         code__2 dst = asmSeqThen [code1 [], code2 []] .
4293                       mkSeqInstr (instr src1 src2 dst)
4294     in
4295     return (Any F64 code__2)
4296
4297 trivialUFCode _ instr x
4298   = getRegister x               `thenNat` \ register ->
4299     getNewRegNat F64    `thenNat` \ tmp ->
4300     let
4301         code = registerCode register tmp
4302         src  = registerName register tmp
4303         code__2 dst = code . mkSeqInstr (instr src dst)
4304     in
4305     return (Any F64 code__2)
4306
4307 #endif /* alpha_TARGET_ARCH */
4308
4309 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4310
4311 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4312
4313 {-
4314 The Rules of the Game are:
4315
4316 * You cannot assume anything about the destination register dst;
4317   it may be anything, including a fixed reg.
4318
4319 * You may compute an operand into a fixed reg, but you may not
4320   subsequently change the contents of that fixed reg.  If you
4321   want to do so, first copy the value either to a temporary
4322   or into dst.  You are free to modify dst even if it happens
4323   to be a fixed reg -- that's not your problem.
4324
4325 * You cannot assume that a fixed reg will stay live over an
4326   arbitrary computation.  The same applies to the dst reg.
4327
4328 * Temporary regs obtained from getNewRegNat are distinct from
4329   each other and from all other regs, and stay live over
4330   arbitrary computations.
4331
4332 --------------------
4333
4334 SDM's version of The Rules:
4335
4336 * If getRegister returns Any, that means it can generate correct
4337   code which places the result in any register, period.  Even if that
4338   register happens to be read during the computation.
4339
4340   Corollary #1: this means that if you are generating code for an
4341   operation with two arbitrary operands, you cannot assign the result
4342   of the first operand into the destination register before computing
4343   the second operand.  The second operand might require the old value
4344   of the destination register.
4345
4346   Corollary #2: A function might be able to generate more efficient
4347   code if it knows the destination register is a new temporary (and
4348   therefore not read by any of the sub-computations).
4349
4350 * If getRegister returns Any, then the code it generates may modify only:
4351         (a) fresh temporaries
4352         (b) the destination register
4353         (c) known registers (eg. %ecx is used by shifts)
4354   In particular, it may *not* modify global registers, unless the global
4355   register happens to be the destination register.
4356 -}
4357
4358 trivialCode rep instr (Just revinstr) (CmmLit lit_a) b
4359   | not (is64BitLit lit_a) = do
4360   b_code <- getAnyReg b
4361   let
4362        code dst
4363          = b_code dst `snocOL`
4364            revinstr (OpImm (litToImm lit_a)) (OpReg dst)
4365   -- in
4366   return (Any rep code)
4367
4368 trivialCode rep instr maybe_revinstr a b = genTrivialCode rep instr a b
4369
4370 -- This is re-used for floating pt instructions too.
4371 genTrivialCode rep instr a b = do
4372   (b_op, b_code) <- getNonClobberedOperand b
4373   a_code <- getAnyReg a
4374   tmp <- getNewRegNat rep
4375   let
4376      -- We want the value of b to stay alive across the computation of a.
4377      -- But, we want to calculate a straight into the destination register,
4378      -- because the instruction only has two operands (dst := dst `op` src).
4379      -- The troublesome case is when the result of b is in the same register
4380      -- as the destination reg.  In this case, we have to save b in a
4381      -- new temporary across the computation of a.
4382      code dst
4383         | dst `regClashesWithOp` b_op =
4384                 b_code `appOL`
4385                 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
4386                 a_code dst `snocOL`
4387                 instr (OpReg tmp) (OpReg dst)
4388         | otherwise =
4389                 b_code `appOL`
4390                 a_code dst `snocOL`
4391                 instr b_op (OpReg dst)
4392   -- in
4393   return (Any rep code)
4394
4395 reg `regClashesWithOp` OpReg reg2   = reg == reg2
4396 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
4397 reg `regClashesWithOp` _            = False
4398
4399 -----------
4400
4401 trivialUCode rep instr x = do
4402   x_code <- getAnyReg x
4403   let
4404      code dst =
4405         x_code dst `snocOL`
4406         instr (OpReg dst)
4407   -- in
4408   return (Any rep code)
4409
4410 -----------
4411
4412 #if i386_TARGET_ARCH
4413
4414 trivialFCode pk instr x y = do
4415   (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
4416   (y_reg, y_code) <- getSomeReg y
4417   let
4418      code dst =
4419         x_code `appOL`
4420         y_code `snocOL`
4421         instr pk x_reg y_reg dst
4422   -- in
4423   return (Any pk code)
4424
4425 #endif
4426
4427 #if x86_64_TARGET_ARCH
4428
4429 trivialFCode pk instr x y = genTrivialCode  pk (instr pk) x y
4430
4431 #endif
4432
4433 -------------
4434
4435 trivialUFCode rep instr x = do
4436   (x_reg, x_code) <- getSomeReg x
4437   let
4438      code dst =
4439         x_code `snocOL`
4440         instr x_reg dst
4441   -- in
4442   return (Any rep code)
4443
4444 #endif /* i386_TARGET_ARCH */
4445
4446 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4447
4448 #if sparc_TARGET_ARCH
4449
4450 trivialCode pk instr x (CmmLit (CmmInt y d))
4451   | fits13Bits y
4452   = do
4453       (src1, code) <- getSomeReg x
4454       tmp <- getNewRegNat I32
4455       let
4456         src2 = ImmInt (fromInteger y)
4457         code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
4458       return (Any I32 code__2)
4459
4460 trivialCode pk instr x y = do
4461     (src1, code1) <- getSomeReg x
4462     (src2, code2) <- getSomeReg y
4463     tmp1 <- getNewRegNat I32
4464     tmp2 <- getNewRegNat I32
4465     let
4466         code__2 dst = code1 `appOL` code2 `snocOL`
4467                       instr src1 (RIReg src2) dst
4468     return (Any I32 code__2)
4469
4470 ------------
4471 trivialFCode pk instr x y = do
4472     (src1, code1) <- getSomeReg x
4473     (src2, code2) <- getSomeReg y
4474     tmp1 <- getNewRegNat (cmmExprRep x)
4475     tmp2 <- getNewRegNat (cmmExprRep y)
4476     tmp <- getNewRegNat F64
4477     let
4478         promote x = FxTOy F32 F64 x tmp
4479
4480         pk1   = cmmExprRep x
4481         pk2   = cmmExprRep y
4482
4483         code__2 dst =
4484                 if pk1 == pk2 then
4485                     code1 `appOL` code2 `snocOL`
4486                     instr pk src1 src2 dst
4487                 else if pk1 == F32 then
4488                     code1 `snocOL` promote src1 `appOL` code2 `snocOL`
4489                     instr F64 tmp src2 dst
4490                 else
4491                     code1 `appOL` code2 `snocOL` promote src2 `snocOL`
4492                     instr F64 src1 tmp dst
4493     return (Any (if pk1 == pk2 then pk1 else F64) code__2)
4494
4495 ------------
4496 trivialUCode pk instr x = do
4497     (src, code) <- getSomeReg x
4498     tmp <- getNewRegNat pk
4499     let
4500         code__2 dst = code `snocOL` instr (RIReg src) dst
4501     return (Any pk code__2)
4502
4503 -------------
4504 trivialUFCode pk instr x = do
4505     (src, code) <- getSomeReg x
4506     tmp <- getNewRegNat pk
4507     let
4508         code__2 dst = code `snocOL` instr src dst
4509     return (Any pk code__2)
4510
4511 #endif /* sparc_TARGET_ARCH */
4512
4513 #if powerpc_TARGET_ARCH
4514
4515 {-
4516 Wolfgang's PowerPC version of The Rules:
4517
4518 A slightly modified version of The Rules to take advantage of the fact
4519 that PowerPC instructions work on all registers and don't implicitly
4520 clobber any fixed registers.
4521
4522 * The only expression for which getRegister returns Fixed is (CmmReg reg).
4523
4524 * If getRegister returns Any, then the code it generates may modify only:
4525         (a) fresh temporaries
4526         (b) the destination register
4527   It may *not* modify global registers, unless the global
4528   register happens to be the destination register.
4529   It may not clobber any other registers. In fact, only ccalls clobber any
4530   fixed registers.
4531   Also, it may not modify the counter register (used by genCCall).
4532
4533   Corollary: If a getRegister for a subexpression returns Fixed, you need
4534   not move it to a fresh temporary before evaluating the next subexpression.
4535   The Fixed register won't be modified.
4536   Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
4537
4538 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
4539   the value of the destination register.
4540 -}
4541
4542 trivialCode rep signed instr x (CmmLit (CmmInt y _))
4543     | Just imm <- makeImmediate rep signed y
4544     = do
4545         (src1, code1) <- getSomeReg x
4546         let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
4547         return (Any rep code)
4548
4549 trivialCode rep signed instr x y = do
4550     (src1, code1) <- getSomeReg x
4551     (src2, code2) <- getSomeReg y
4552     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
4553     return (Any rep code)
4554
4555 trivialCodeNoImm :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4556     -> CmmExpr -> CmmExpr -> NatM Register
4557 trivialCodeNoImm rep instr x y = do
4558     (src1, code1) <- getSomeReg x
4559     (src2, code2) <- getSomeReg y
4560     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
4561     return (Any rep code)
4562
4563 trivialUCode rep instr x = do
4564     (src, code) <- getSomeReg x
4565     let code' dst = code `snocOL` instr dst src
4566     return (Any rep code')
4567
4568 -- There is no "remainder" instruction on the PPC, so we have to do
4569 -- it the hard way.
4570 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
4571
4572 remainderCode :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4573     -> CmmExpr -> CmmExpr -> NatM Register
4574 remainderCode rep div x y = do
4575     (src1, code1) <- getSomeReg x
4576     (src2, code2) <- getSomeReg y
4577     let code dst = code1 `appOL` code2 `appOL` toOL [
4578                 div dst src1 src2,
4579                 MULLW dst dst (RIReg src2),
4580                 SUBF dst dst src1
4581             ]
4582     return (Any rep code)
4583
4584 #endif /* powerpc_TARGET_ARCH */
4585
4586
4587 -- -----------------------------------------------------------------------------
4588 --  Coercing to/from integer/floating-point...
4589
4590 -- When going to integer, we truncate (round towards 0).
4591
4592 -- @coerce(Int2FP|FP2Int)@ are more complicated integer/float
4593 -- conversions.  We have to store temporaries in memory to move
4594 -- between the integer and the floating point register sets.
4595
4596 -- @coerceDbl2Flt@ and @coerceFlt2Dbl@ are done this way because we
4597 -- pretend, on sparc at least, that double and float regs are seperate
4598 -- kinds, so the value has to be computed into one kind before being
4599 -- explicitly "converted" to live in the other kind.
4600
4601 coerceInt2FP :: MachRep -> MachRep -> CmmExpr -> NatM Register
4602 coerceFP2Int :: MachRep -> MachRep -> CmmExpr -> NatM Register
4603
4604 #if sparc_TARGET_ARCH
4605 coerceDbl2Flt :: CmmExpr -> NatM Register
4606 coerceFlt2Dbl :: CmmExpr -> NatM Register
4607 #endif
4608
4609 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4610
4611 #if alpha_TARGET_ARCH
4612
4613 coerceInt2FP _ x
4614   = getRegister x               `thenNat` \ register ->
4615     getNewRegNat IntRep         `thenNat` \ reg ->
4616     let
4617         code = registerCode register reg
4618         src  = registerName register reg
4619
4620         code__2 dst = code . mkSeqInstrs [
4621             ST Q src (spRel 0),
4622             LD TF dst (spRel 0),
4623             CVTxy Q TF dst dst]
4624     in
4625     return (Any F64 code__2)
4626
4627 -------------
4628 coerceFP2Int x
4629   = getRegister x               `thenNat` \ register ->
4630     getNewRegNat F64    `thenNat` \ tmp ->
4631     let
4632         code = registerCode register tmp
4633         src  = registerName register tmp
4634
4635         code__2 dst = code . mkSeqInstrs [
4636             CVTxy TF Q src tmp,
4637             ST TF tmp (spRel 0),
4638             LD Q dst (spRel 0)]
4639     in
4640     return (Any IntRep code__2)
4641
4642 #endif /* alpha_TARGET_ARCH */
4643
4644 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4645
4646 #if i386_TARGET_ARCH
4647
4648 coerceInt2FP from to x = do
4649   (x_reg, x_code) <- getSomeReg x
4650   let
4651         opc  = case to of F32 -> GITOF; F64 -> GITOD
4652         code dst = x_code `snocOL` opc x_reg dst
4653         -- ToDo: works for non-I32 reps?
4654   -- in
4655   return (Any to code)
4656
4657 ------------
4658
4659 coerceFP2Int from to x = do
4660   (x_reg, x_code) <- getSomeReg x
4661   let
4662         opc  = case from of F32 -> GFTOI; F64 -> GDTOI
4663         code dst = x_code `snocOL` opc x_reg dst
4664         -- ToDo: works for non-I32 reps?
4665   -- in
4666   return (Any to code)
4667
4668 #endif /* i386_TARGET_ARCH */
4669
4670 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4671
4672 #if x86_64_TARGET_ARCH
4673
4674 coerceFP2Int from to x = do
4675   (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
4676   let
4677         opc  = case from of F32 -> CVTTSS2SIQ; F64 -> CVTTSD2SIQ
4678         code dst = x_code `snocOL` opc x_op dst
4679   -- in
4680   return (Any to code) -- works even if the destination rep is <I32
4681
4682 coerceInt2FP from to x = do
4683   (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
4684   let
4685         opc  = case to of F32 -> CVTSI2SS; F64 -> CVTSI2SD
4686         code dst = x_code `snocOL` opc x_op dst
4687   -- in
4688   return (Any to code) -- works even if the destination rep is <I32
4689
4690 coerceFP2FP :: MachRep -> CmmExpr -> NatM Register
4691 coerceFP2FP to x = do
4692   (x_reg, x_code) <- getSomeReg x
4693   let
4694         opc  = case to of F32 -> CVTSD2SS; F64 -> CVTSS2SD
4695         code dst = x_code `snocOL` opc x_reg dst
4696   -- in
4697   return (Any to code)
4698
4699 #endif
4700
4701 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4702
4703 #if sparc_TARGET_ARCH
4704
4705 coerceInt2FP pk1 pk2 x = do
4706     (src, code) <- getSomeReg x
4707     let
4708         code__2 dst = code `appOL` toOL [
4709             ST pk1 src (spRel (-2)),
4710             LD pk1 (spRel (-2)) dst,
4711             FxTOy pk1 pk2 dst dst]
4712     return (Any pk2 code__2)
4713
4714 ------------
4715 coerceFP2Int pk fprep x = do
4716     (src, code) <- getSomeReg x
4717     reg <- getNewRegNat fprep
4718     tmp <- getNewRegNat pk
4719     let
4720         code__2 dst = ASSERT(fprep == F64 || fprep == F32)
4721             code `appOL` toOL [
4722             FxTOy fprep pk src tmp,
4723             ST pk tmp (spRel (-2)),
4724             LD pk (spRel (-2)) dst]
4725     return (Any pk code__2)
4726
4727 ------------
4728 coerceDbl2Flt x = do
4729     (src, code) <- getSomeReg x
4730     return (Any F32 (\dst -> code `snocOL` FxTOy F64 F32 src dst))
4731
4732 ------------
4733 coerceFlt2Dbl x = do
4734     (src, code) <- getSomeReg x
4735     return (Any F64 (\dst -> code `snocOL` FxTOy F32 F64 src dst))
4736
4737 #endif /* sparc_TARGET_ARCH */
4738
4739 #if powerpc_TARGET_ARCH
4740 coerceInt2FP fromRep toRep x = do
4741     (src, code) <- getSomeReg x
4742     lbl <- getNewLabelNat
4743     itmp <- getNewRegNat I32
4744     ftmp <- getNewRegNat F64
4745     dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
4746     Amode addr addr_code <- getAmode dynRef
4747     let
4748         code' dst = code `appOL` maybe_exts `appOL` toOL [
4749                 LDATA ReadOnlyData
4750                                 [CmmDataLabel lbl,
4751                                  CmmStaticLit (CmmInt 0x43300000 I32),
4752                                  CmmStaticLit (CmmInt 0x80000000 I32)],
4753                 XORIS itmp src (ImmInt 0x8000),
4754                 ST I32 itmp (spRel 3),
4755                 LIS itmp (ImmInt 0x4330),
4756                 ST I32 itmp (spRel 2),
4757                 LD F64 ftmp (spRel 2)
4758             ] `appOL` addr_code `appOL` toOL [
4759                 LD F64 dst addr,
4760                 FSUB F64 dst ftmp dst
4761             ] `appOL` maybe_frsp dst
4762
4763         maybe_exts = case fromRep of
4764                         I8 ->  unitOL $ EXTS I8 src src
4765                         I16 -> unitOL $ EXTS I16 src src
4766                         I32 -> nilOL
4767         maybe_frsp dst = case toRep of
4768                         F32 -> unitOL $ FRSP dst dst
4769                         F64 -> nilOL
4770     return (Any toRep code')
4771
4772 coerceFP2Int fromRep toRep x = do
4773     -- the reps don't really matter: F*->F64 and I32->I* are no-ops
4774     (src, code) <- getSomeReg x
4775     tmp <- getNewRegNat F64
4776     let
4777         code' dst = code `appOL` toOL [
4778                 -- convert to int in FP reg
4779             FCTIWZ tmp src,
4780                 -- store value (64bit) from FP to stack
4781             ST F64 tmp (spRel 2),
4782                 -- read low word of value (high word is undefined)
4783             LD I32 dst (spRel 3)]
4784     return (Any toRep code')
4785 #endif /* powerpc_TARGET_ARCH */
4786
4787
4788 -- -----------------------------------------------------------------------------
4789 -- eXTRA_STK_ARGS_HERE
4790
4791 -- We (allegedly) put the first six C-call arguments in registers;
4792 -- where do we start putting the rest of them?
4793
4794 -- Moved from MachInstrs (SDM):
4795
4796 #if alpha_TARGET_ARCH || sparc_TARGET_ARCH
4797 eXTRA_STK_ARGS_HERE :: Int
4798 eXTRA_STK_ARGS_HERE
4799   = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???))
4800 #endif
4801