compiler/nativeGen/MachCodeGen.hs

   1 -----------------------------------------------------------------------------
   2 --
   3 -- Generating machine code (instruction selection)
   4 --
   5 -- (c) The University of Glasgow 1996-2004
   6 --
   7 -----------------------------------------------------------------------------
   8
   9 -- This is a big module, but, if you pay attention to
  10 -- (a) the sectioning, (b) the type signatures, and
  11 -- (c) the #if blah_TARGET_ARCH} things, the
  12 -- structure should not be too overwhelming.
  13
  14 module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where
  15
  16 #include "HsVersions.h"
  17 #include "nativeGen/NCG.h"
  18 #include "MachDeps.h"
  19
  20 -- NCG stuff:
  21 import MachInstrs
  22 import MachRegs
  23 import NCGMonad
  24 import PositionIndependentCode
  25 import RegAllocInfo ( mkBranchInstr )
  26
  27 -- Our intermediate code:
  28 import PprCmm           ( pprExpr )
  29 import Cmm
  30 import MachOp
  31 import CLabel
  32
  33 -- The rest:
  34 import StaticFlags      ( opt_PIC )
  35 import ForeignCall      ( CCallConv(..) )
  36 import OrdList
  37 import Pretty
  38 import Outputable
  39 import FastString
  40 import FastTypes        ( isFastTrue )
  41 import Constants        ( wORD_SIZE )
  42
  43 #ifdef DEBUG
  44 import Outputable       ( assertPanic )
  45 import Debug.Trace      ( trace )
  46 #endif
  47
  48 import Control.Monad    ( mapAndUnzipM )
  49 import Data.Maybe       ( fromJust )
  50 import Data.Bits
  51 import Data.Word
  52 import Data.Int
  53
  54 -- -----------------------------------------------------------------------------
  55 -- Top-level of the instruction selector
  56
  57 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
  58 -- They are really trees of insns to facilitate fast appending, where a
  59 -- left-to-right traversal (pre-order?) yields the insns in the correct
  60 -- order.
  61
  62 type InstrBlock = OrdList Instr
  63
  64 cmmTopCodeGen :: CmmTop -> NatM [NatCmmTop]
  65 cmmTopCodeGen (CmmProc info lab params blocks) = do
  66   (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
  67   picBaseMb <- getPicBaseMaybeNat
  68   let proc = CmmProc info lab params (concat nat_blocks)
  69       tops = proc : concat statics
  70   case picBaseMb of
  71       Just picBase -> initializePicBase picBase tops
  72       Nothing -> return tops
  73
  74 cmmTopCodeGen (CmmData sec dat) = do
  75   return [CmmData sec dat]  -- no translation, we just use CmmStatic
  76
  77 basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop])
  78 basicBlockCodeGen (BasicBlock id stmts) = do
  79   instrs <- stmtsToInstrs stmts
  80   -- code generation may introduce new basic block boundaries, which
  81   -- are indicated by the NEWBLOCK instruction.  We must split up the
  82   -- instruction stream into basic blocks again.  Also, we extract
  83   -- LDATAs here too.
  84   let
  85         (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
  86
  87         mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
  88           = ([], BasicBlock id instrs : blocks, statics)
  89         mkBlocks (LDATA sec dat) (instrs,blocks,statics)
  90           = (instrs, blocks, CmmData sec dat:statics)
  91         mkBlocks instr (instrs,blocks,statics)
  92           = (instr:instrs, blocks, statics)
  93   -- in
  94   return (BasicBlock id top : other_blocks, statics)
  95
  96 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
  97 stmtsToInstrs stmts
  98    = do instrss <- mapM stmtToInstrs stmts
  99         return (concatOL instrss)
 100
 101 stmtToInstrs :: CmmStmt -> NatM InstrBlock
 102 stmtToInstrs stmt = case stmt of
 103     CmmNop         -> return nilOL
 104     CmmComment s   -> return (unitOL (COMMENT s))
 105
 106     CmmAssign reg src
 107       | isFloatingRep kind -> assignReg_FltCode kind reg src
 108 #if WORD_SIZE_IN_BITS==32
 109       | kind == I64        -> assignReg_I64Code      reg src
 110 #endif
 111       | otherwise          -> assignReg_IntCode kind reg src
 112         where kind = cmmRegRep reg
 113
 114     CmmStore addr src
 115       | isFloatingRep kind -> assignMem_FltCode kind addr src
 116 #if WORD_SIZE_IN_BITS==32
 117       | kind == I64      -> assignMem_I64Code      addr src
 118 #endif
 119       | otherwise        -> assignMem_IntCode kind addr src
 120         where kind = cmmExprRep src
 121
 122     CmmCall target result_regs args vols
 123        -> genCCall target result_regs args vols
 124
 125     CmmBranch id          -> genBranch id
 126     CmmCondBranch arg id  -> genCondJump id arg
 127     CmmSwitch arg ids     -> genSwitch arg ids
 128     CmmJump arg params    -> genJump arg
 129
 130 -- -----------------------------------------------------------------------------
 131 -- General things for putting together code sequences
 132
 133 -- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
 134 -- CmmExprs into CmmRegOff?
 135 mangleIndexTree :: CmmExpr -> CmmExpr
 136 mangleIndexTree (CmmRegOff reg off)
 137   = CmmMachOp (MO_Add rep) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) rep)]
 138   where rep = cmmRegRep reg
 139
 140 -- -----------------------------------------------------------------------------
 141 --  Code gen for 64-bit arithmetic on 32-bit platforms
 142
 143 {-
 144 Simple support for generating 64-bit code (ie, 64 bit values and 64
 145 bit assignments) on 32-bit platforms.  Unlike the main code generator
 146 we merely shoot for generating working code as simply as possible, and
 147 pay little attention to code quality.  Specifically, there is no
 148 attempt to deal cleverly with the fixed-vs-floating register
 149 distinction; all values are generated into (pairs of) floating
 150 registers, even if this would mean some redundant reg-reg moves as a
 151 result.  Only one of the VRegUniques is returned, since it will be
 152 of the VRegUniqueLo form, and the upper-half VReg can be determined
 153 by applying getHiVRegFromLo to it.
 154 -}
 155
 156 data ChildCode64        -- a.k.a "Register64"
 157    = ChildCode64
 158         InstrBlock      -- code
 159         Reg             -- the lower 32-bit temporary which contains the
 160                         -- result; use getHiVRegFromLo to find the other
 161                         -- VRegUnique.  Rules of this simplified insn
 162                         -- selection game are therefore that the returned
 163                         -- Reg may be modified
 164
 165 #if WORD_SIZE_IN_BITS==32
 166 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
 167 assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
 168 #endif
 169
 170 #ifndef x86_64_TARGET_ARCH
 171 iselExpr64        :: CmmExpr -> NatM ChildCode64
 172 #endif
 173
 174 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 175
 176 #if i386_TARGET_ARCH
 177
 178 assignMem_I64Code addrTree valueTree = do
 179   Amode addr addr_code <- getAmode addrTree
 180   ChildCode64 vcode rlo <- iselExpr64 valueTree
 181   let
 182         rhi = getHiVRegFromLo rlo
 183
 184         -- Little-endian store
 185         mov_lo = MOV I32 (OpReg rlo) (OpAddr addr)
 186         mov_hi = MOV I32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
 187   -- in
 188   return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 189
 190
 191 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
 192    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 193    let
 194          r_dst_lo = mkVReg u_dst I32
 195          r_dst_hi = getHiVRegFromLo r_dst_lo
 196          r_src_hi = getHiVRegFromLo r_src_lo
 197          mov_lo = MOV I32 (OpReg r_src_lo) (OpReg r_dst_lo)
 198          mov_hi = MOV I32 (OpReg r_src_hi) (OpReg r_dst_hi)
 199    -- in
 200    return (
 201         vcode `snocOL` mov_lo `snocOL` mov_hi
 202      )
 203
 204 assignReg_I64Code lvalue valueTree
 205    = panic "assignReg_I64Code(i386): invalid lvalue"
 206
 207 ------------
 208
 209 iselExpr64 (CmmLit (CmmInt i _)) = do
 210   (rlo,rhi) <- getNewRegPairNat I32
 211   let
 212         r = fromIntegral (fromIntegral i :: Word32)
 213         q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
 214         code = toOL [
 215                 MOV I32 (OpImm (ImmInteger r)) (OpReg rlo),
 216                 MOV I32 (OpImm (ImmInteger q)) (OpReg rhi)
 217                 ]
 218   -- in
 219   return (ChildCode64 code rlo)
 220
 221 iselExpr64 (CmmLoad addrTree I64) = do
 222    Amode addr addr_code <- getAmode addrTree
 223    (rlo,rhi) <- getNewRegPairNat I32
 224    let
 225         mov_lo = MOV I32 (OpAddr addr) (OpReg rlo)
 226         mov_hi = MOV I32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
 227    -- in
 228    return (
 229             ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
 230                         rlo
 231      )
 232
 233 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64)))
 234    = return (ChildCode64 nilOL (mkVReg vu I32))
 235
 236 -- we handle addition, but rather badly
 237 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
 238    ChildCode64 code1 r1lo <- iselExpr64 e1
 239    (rlo,rhi) <- getNewRegPairNat I32
 240    let
 241         r = fromIntegral (fromIntegral i :: Word32)
 242         q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
 243         r1hi = getHiVRegFromLo r1lo
 244         code =  code1 `appOL`
 245                 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
 246                        ADD I32 (OpImm (ImmInteger r)) (OpReg rlo),
 247                        MOV I32 (OpReg r1hi) (OpReg rhi),
 248                        ADC I32 (OpImm (ImmInteger q)) (OpReg rhi) ]
 249    -- in
 250    return (ChildCode64 code rlo)
 251
 252 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
 253    ChildCode64 code1 r1lo <- iselExpr64 e1
 254    ChildCode64 code2 r2lo <- iselExpr64 e2
 255    (rlo,rhi) <- getNewRegPairNat I32
 256    let
 257         r1hi = getHiVRegFromLo r1lo
 258         r2hi = getHiVRegFromLo r2lo
 259         code =  code1 `appOL`
 260                 code2 `appOL`
 261                 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
 262                        ADD I32 (OpReg r2lo) (OpReg rlo),
 263                        MOV I32 (OpReg r1hi) (OpReg rhi),
 264                        ADC I32 (OpReg r2hi) (OpReg rhi) ]
 265    -- in
 266    return (ChildCode64 code rlo)
 267
 268 iselExpr64 (CmmMachOp (MO_U_Conv _ I64) [expr]) = do
 269      fn <- getAnyReg expr
 270      r_dst_lo <-  getNewRegNat I32
 271      let r_dst_hi = getHiVRegFromLo r_dst_lo
 272          code = fn r_dst_lo
 273      return (
 274              ChildCode64 (code `snocOL`
 275                           MOV I32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
 276                           r_dst_lo
 277             )
 278
 279 iselExpr64 expr
 280    = pprPanic "iselExpr64(i386)" (ppr expr)
 281
 282 #endif /* i386_TARGET_ARCH */
 283
 284 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 285
 286 #if sparc_TARGET_ARCH
 287
 288 assignMem_I64Code addrTree valueTree = do
 289      Amode addr addr_code <- getAmode addrTree
 290      ChildCode64 vcode rlo <- iselExpr64 valueTree
 291      (src, code) <- getSomeReg addrTree
 292      let
 293          rhi = getHiVRegFromLo rlo
 294          -- Big-endian store
 295          mov_hi = ST I32 rhi (AddrRegImm src (ImmInt 0))
 296          mov_lo = ST I32 rlo (AddrRegImm src (ImmInt 4))
 297      return (vcode `appOL` code `snocOL` mov_hi `snocOL` mov_lo)
 298
 299 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
 300      ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 301      let
 302          r_dst_lo = mkVReg u_dst pk
 303          r_dst_hi = getHiVRegFromLo r_dst_lo
 304          r_src_hi = getHiVRegFromLo r_src_lo
 305          mov_lo = mkMOV r_src_lo r_dst_lo
 306          mov_hi = mkMOV r_src_hi r_dst_hi
 307          mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
 308      return (vcode `snocOL` mov_hi `snocOL` mov_lo)
 309 assignReg_I64Code lvalue valueTree
 310    = panic "assignReg_I64Code(sparc): invalid lvalue"
 311
 312
 313 -- Don't delete this -- it's very handy for debugging.
 314 --iselExpr64 expr
 315 --   | trace ("iselExpr64: " ++ showSDoc (ppr expr)) False
 316 --   = panic "iselExpr64(???)"
 317
 318 iselExpr64 (CmmLoad addrTree I64) = do
 319      Amode (AddrRegReg r1 r2) addr_code <- getAmode addrTree
 320      rlo <- getNewRegNat I32
 321      let rhi = getHiVRegFromLo rlo
 322          mov_hi = LD I32 (AddrRegImm r1 (ImmInt 0)) rhi
 323          mov_lo = LD I32 (AddrRegImm r1 (ImmInt 4)) rlo
 324      return (
 325             ChildCode64 (addr_code `snocOL` mov_hi `snocOL` mov_lo)
 326                          rlo
 327           )
 328
 329 iselExpr64 (CmmReg (CmmLocal (LocalReg uq I64))) = do
 330      r_dst_lo <-  getNewRegNat I32
 331      let r_dst_hi = getHiVRegFromLo r_dst_lo
 332          r_src_lo = mkVReg uq I32
 333          r_src_hi = getHiVRegFromLo r_src_lo
 334          mov_lo = mkMOV r_src_lo r_dst_lo
 335          mov_hi = mkMOV r_src_hi r_dst_hi
 336          mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
 337      return (
 338             ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo
 339          )
 340
 341 iselExpr64 expr
 342    = pprPanic "iselExpr64(sparc)" (ppr expr)
 343
 344 #endif /* sparc_TARGET_ARCH */
 345
 346 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 347
 348 #if powerpc_TARGET_ARCH
 349
 350 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
 351 getI64Amodes addrTree = do
 352     Amode hi_addr addr_code <- getAmode addrTree
 353     case addrOffset hi_addr 4 of
 354         Just lo_addr -> return (hi_addr, lo_addr, addr_code)
 355         Nothing      -> do (hi_ptr, code) <- getSomeReg addrTree
 356                            return (AddrRegImm hi_ptr (ImmInt 0),
 357                                    AddrRegImm hi_ptr (ImmInt 4),
 358                                    code)
 359
 360 assignMem_I64Code addrTree valueTree = do
 361         (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
 362         ChildCode64 vcode rlo <- iselExpr64 valueTree
 363         let
 364                 rhi = getHiVRegFromLo rlo
 365
 366                 -- Big-endian store
 367                 mov_hi = ST I32 rhi hi_addr
 368                 mov_lo = ST I32 rlo lo_addr
 369         -- in
 370         return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 371
 372 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
 373    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 374    let
 375          r_dst_lo = mkVReg u_dst I32
 376          r_dst_hi = getHiVRegFromLo r_dst_lo
 377          r_src_hi = getHiVRegFromLo r_src_lo
 378          mov_lo = MR r_dst_lo r_src_lo
 379          mov_hi = MR r_dst_hi r_src_hi
 380    -- in
 381    return (
 382         vcode `snocOL` mov_lo `snocOL` mov_hi
 383      )
 384
 385 assignReg_I64Code lvalue valueTree
 386    = panic "assignReg_I64Code(powerpc): invalid lvalue"
 387
 388
 389 -- Don't delete this -- it's very handy for debugging.
 390 --iselExpr64 expr
 391 --   | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
 392 --   = panic "iselExpr64(???)"
 393
 394 iselExpr64 (CmmLoad addrTree I64) = do
 395     (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
 396     (rlo, rhi) <- getNewRegPairNat I32
 397     let mov_hi = LD I32 rhi hi_addr
 398         mov_lo = LD I32 rlo lo_addr
 399     return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
 400                          rlo
 401
 402 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64)))
 403    = return (ChildCode64 nilOL (mkVReg vu I32))
 404
 405 iselExpr64 (CmmLit (CmmInt i _)) = do
 406   (rlo,rhi) <- getNewRegPairNat I32
 407   let
 408         half0 = fromIntegral (fromIntegral i :: Word16)
 409         half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
 410         half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
 411         half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
 412
 413         code = toOL [
 414                 LIS rlo (ImmInt half1),
 415                 OR rlo rlo (RIImm $ ImmInt half0),
 416                 LIS rhi (ImmInt half3),
 417                 OR rlo rlo (RIImm $ ImmInt half2)
 418                 ]
 419   -- in
 420   return (ChildCode64 code rlo)
 421
 422 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
 423    ChildCode64 code1 r1lo <- iselExpr64 e1
 424    ChildCode64 code2 r2lo <- iselExpr64 e2
 425    (rlo,rhi) <- getNewRegPairNat I32
 426    let
 427         r1hi = getHiVRegFromLo r1lo
 428         r2hi = getHiVRegFromLo r2lo
 429         code =  code1 `appOL`
 430                 code2 `appOL`
 431                 toOL [ ADDC rlo r1lo r2lo,
 432                        ADDE rhi r1hi r2hi ]
 433    -- in
 434    return (ChildCode64 code rlo)
 435
 436 iselExpr64 expr
 437    = pprPanic "iselExpr64(powerpc)" (ppr expr)
 438
 439 #endif /* powerpc_TARGET_ARCH */
 440
 441
 442 -- -----------------------------------------------------------------------------
 443 -- The 'Register' type
 444
 445 -- 'Register's passed up the tree.  If the stix code forces the register
 446 -- to live in a pre-decided machine register, it comes out as @Fixed@;
 447 -- otherwise, it comes out as @Any@, and the parent can decide which
 448 -- register to put it in.
 449
 450 data Register
 451   = Fixed   MachRep Reg InstrBlock
 452   | Any     MachRep (Reg -> InstrBlock)
 453
 454 swizzleRegisterRep :: Register -> MachRep -> Register
 455 swizzleRegisterRep (Fixed _ reg code) rep = Fixed rep reg code
 456 swizzleRegisterRep (Any _ codefn)     rep = Any rep codefn
 457
 458
 459 -- -----------------------------------------------------------------------------
 460 -- Utils based on getRegister, below
 461
 462 -- The dual to getAnyReg: compute an expression into a register, but
 463 -- we don't mind which one it is.
 464 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
 465 getSomeReg expr = do
 466   r <- getRegister expr
 467   case r of
 468     Any rep code -> do
 469         tmp <- getNewRegNat rep
 470         return (tmp, code tmp)
 471     Fixed _ reg code ->
 472         return (reg, code)
 473
 474 -- -----------------------------------------------------------------------------
 475 -- Grab the Reg for a CmmReg
 476
 477 getRegisterReg :: CmmReg -> Reg
 478
 479 getRegisterReg (CmmLocal (LocalReg u pk))
 480   = mkVReg u pk
 481
 482 getRegisterReg (CmmGlobal mid)
 483   = case get_GlobalReg_reg_or_addr mid of
 484        Left (RealReg rrno) -> RealReg rrno
 485        _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
 486           -- By this stage, the only MagicIds remaining should be the
 487           -- ones which map to a real machine register on this
 488           -- platform.  Hence ...
 489
 490
 491 -- -----------------------------------------------------------------------------
 492 -- Generate code to get a subtree into a Register
 493
 494 -- Don't delete this -- it's very handy for debugging.
 495 --getRegister expr
 496 --   | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False
 497 --   = panic "getRegister(???)"
 498
 499 getRegister :: CmmExpr -> NatM Register
 500
 501 #if !x86_64_TARGET_ARCH
 502     -- on x86_64, we have %rip for PicBaseReg, but it's not a full-featured
 503     -- register, it can only be used for rip-relative addressing.
 504 getRegister (CmmReg (CmmGlobal PicBaseReg))
 505   = do
 506       reg <- getPicBaseNat wordRep
 507       return (Fixed wordRep reg nilOL)
 508 #endif
 509
 510 getRegister (CmmReg reg)
 511   = return (Fixed (cmmRegRep reg) (getRegisterReg reg) nilOL)
 512
 513 getRegister tree@(CmmRegOff _ _)
 514   = getRegister (mangleIndexTree tree)
 515
 516
 517 #if WORD_SIZE_IN_BITS==32
 518     -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
 519     -- TO_W_(x), TO_W_(x >> 32)
 520
 521 getRegister (CmmMachOp (MO_U_Conv I64 I32)
 522              [CmmMachOp (MO_U_Shr I64) [x,CmmLit (CmmInt 32 _)]]) = do
 523   ChildCode64 code rlo <- iselExpr64 x
 524   return $ Fixed I32 (getHiVRegFromLo rlo) code
 525
 526 getRegister (CmmMachOp (MO_S_Conv I64 I32)
 527              [CmmMachOp (MO_U_Shr I64) [x,CmmLit (CmmInt 32 _)]]) = do
 528   ChildCode64 code rlo <- iselExpr64 x
 529   return $ Fixed I32 (getHiVRegFromLo rlo) code
 530
 531 getRegister (CmmMachOp (MO_U_Conv I64 I32) [x]) = do
 532   ChildCode64 code rlo <- iselExpr64 x
 533   return $ Fixed I32 rlo code
 534
 535 getRegister (CmmMachOp (MO_S_Conv I64 I32) [x]) = do
 536   ChildCode64 code rlo <- iselExpr64 x
 537   return $ Fixed I32 rlo code
 538
 539 #endif
 540
 541 -- end of machine-"independent" bit; here we go on the rest...
 542
 543 #if alpha_TARGET_ARCH
 544
 545 getRegister (StDouble d)
 546   = getBlockIdNat                   `thenNat` \ lbl ->
 547     getNewRegNat PtrRep             `thenNat` \ tmp ->
 548     let code dst = mkSeqInstrs [
 549             LDATA RoDataSegment lbl [
 550                     DATA TF [ImmLab (rational d)]
 551                 ],
 552             LDA tmp (AddrImm (ImmCLbl lbl)),
 553             LD TF dst (AddrReg tmp)]
 554     in
 555         return (Any F64 code)
 556
 557 getRegister (StPrim primop [x]) -- unary PrimOps
 558   = case primop of
 559       IntNegOp -> trivialUCode (NEG Q False) x
 560
 561       NotOp    -> trivialUCode NOT x
 562
 563       FloatNegOp  -> trivialUFCode FloatRep  (FNEG TF) x
 564       DoubleNegOp -> trivialUFCode F64 (FNEG TF) x
 565
 566       OrdOp -> coerceIntCode IntRep x
 567       ChrOp -> chrCode x
 568
 569       Float2IntOp  -> coerceFP2Int    x
 570       Int2FloatOp  -> coerceInt2FP pr x
 571       Double2IntOp -> coerceFP2Int    x
 572       Int2DoubleOp -> coerceInt2FP pr x
 573
 574       Double2FloatOp -> coerceFltCode x
 575       Float2DoubleOp -> coerceFltCode x
 576
 577       other_op -> getRegister (StCall fn CCallConv F64 [x])
 578         where
 579           fn = case other_op of
 580                  FloatExpOp    -> FSLIT("exp")
 581                  FloatLogOp    -> FSLIT("log")
 582                  FloatSqrtOp   -> FSLIT("sqrt")
 583                  FloatSinOp    -> FSLIT("sin")
 584                  FloatCosOp    -> FSLIT("cos")
 585                  FloatTanOp    -> FSLIT("tan")
 586                  FloatAsinOp   -> FSLIT("asin")
 587                  FloatAcosOp   -> FSLIT("acos")
 588                  FloatAtanOp   -> FSLIT("atan")
 589                  FloatSinhOp   -> FSLIT("sinh")
 590                  FloatCoshOp   -> FSLIT("cosh")
 591                  FloatTanhOp   -> FSLIT("tanh")
 592                  DoubleExpOp   -> FSLIT("exp")
 593                  DoubleLogOp   -> FSLIT("log")
 594                  DoubleSqrtOp  -> FSLIT("sqrt")
 595                  DoubleSinOp   -> FSLIT("sin")
 596                  DoubleCosOp   -> FSLIT("cos")
 597                  DoubleTanOp   -> FSLIT("tan")
 598                  DoubleAsinOp  -> FSLIT("asin")
 599                  DoubleAcosOp  -> FSLIT("acos")
 600                  DoubleAtanOp  -> FSLIT("atan")
 601                  DoubleSinhOp  -> FSLIT("sinh")
 602                  DoubleCoshOp  -> FSLIT("cosh")
 603                  DoubleTanhOp  -> FSLIT("tanh")
 604   where
 605     pr = panic "MachCode.getRegister: no primrep needed for Alpha"
 606
 607 getRegister (StPrim primop [x, y]) -- dyadic PrimOps
 608   = case primop of
 609       CharGtOp -> trivialCode (CMP LTT) y x
 610       CharGeOp -> trivialCode (CMP LE) y x
 611       CharEqOp -> trivialCode (CMP EQQ) x y
 612       CharNeOp -> int_NE_code x y
 613       CharLtOp -> trivialCode (CMP LTT) x y
 614       CharLeOp -> trivialCode (CMP LE) x y
 615
 616       IntGtOp  -> trivialCode (CMP LTT) y x
 617       IntGeOp  -> trivialCode (CMP LE) y x
 618       IntEqOp  -> trivialCode (CMP EQQ) x y
 619       IntNeOp  -> int_NE_code x y
 620       IntLtOp  -> trivialCode (CMP LTT) x y
 621       IntLeOp  -> trivialCode (CMP LE) x y
 622
 623       WordGtOp -> trivialCode (CMP ULT) y x
 624       WordGeOp -> trivialCode (CMP ULE) x y
 625       WordEqOp -> trivialCode (CMP EQQ)  x y
 626       WordNeOp -> int_NE_code x y
 627       WordLtOp -> trivialCode (CMP ULT) x y
 628       WordLeOp -> trivialCode (CMP ULE) x y
 629
 630       AddrGtOp -> trivialCode (CMP ULT) y x
 631       AddrGeOp -> trivialCode (CMP ULE) y x
 632       AddrEqOp -> trivialCode (CMP EQQ)  x y
 633       AddrNeOp -> int_NE_code x y
 634       AddrLtOp -> trivialCode (CMP ULT) x y
 635       AddrLeOp -> trivialCode (CMP ULE) x y
 636
 637       FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
 638       FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
 639       FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
 640       FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
 641       FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
 642       FloatLeOp -> cmpF_code (FCMP TF LE) NE x y
 643
 644       DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
 645       DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
 646       DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
 647       DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
 648       DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
 649       DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y
 650
 651       IntAddOp  -> trivialCode (ADD Q False) x y
 652       IntSubOp  -> trivialCode (SUB Q False) x y
 653       IntMulOp  -> trivialCode (MUL Q False) x y
 654       IntQuotOp -> trivialCode (DIV Q False) x y
 655       IntRemOp  -> trivialCode (REM Q False) x y
 656
 657       WordAddOp  -> trivialCode (ADD Q False) x y
 658       WordSubOp  -> trivialCode (SUB Q False) x y
 659       WordMulOp  -> trivialCode (MUL Q False) x y
 660       WordQuotOp -> trivialCode (DIV Q True) x y
 661       WordRemOp  -> trivialCode (REM Q True) x y
 662
 663       FloatAddOp -> trivialFCode  FloatRep (FADD TF) x y
 664       FloatSubOp -> trivialFCode  FloatRep (FSUB TF) x y
 665       FloatMulOp -> trivialFCode  FloatRep (FMUL TF) x y
 666       FloatDivOp -> trivialFCode  FloatRep (FDIV TF) x y
 667
 668       DoubleAddOp -> trivialFCode  F64 (FADD TF) x y
 669       DoubleSubOp -> trivialFCode  F64 (FSUB TF) x y
 670       DoubleMulOp -> trivialFCode  F64 (FMUL TF) x y
 671       DoubleDivOp -> trivialFCode  F64 (FDIV TF) x y
 672
 673       AddrAddOp  -> trivialCode (ADD Q False) x y
 674       AddrSubOp  -> trivialCode (SUB Q False) x y
 675       AddrRemOp  -> trivialCode (REM Q True) x y
 676
 677       AndOp  -> trivialCode AND x y
 678       OrOp   -> trivialCode OR  x y
 679       XorOp  -> trivialCode XOR x y
 680       SllOp  -> trivialCode SLL x y
 681       SrlOp  -> trivialCode SRL x y
 682
 683       ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
 684       ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
 685       ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"
 686
 687       FloatPowerOp  -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
 688       DoublePowerOp -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
 689   where
 690     {- ------------------------------------------------------------
 691         Some bizarre special code for getting condition codes into
 692         registers.  Integer non-equality is a test for equality
 693         followed by an XOR with 1.  (Integer comparisons always set
 694         the result register to 0 or 1.)  Floating point comparisons of
 695         any kind leave the result in a floating point register, so we
 696         need to wrangle an integer register out of things.
 697     -}
 698     int_NE_code :: StixTree -> StixTree -> NatM Register
 699
 700     int_NE_code x y
 701       = trivialCode (CMP EQQ) x y       `thenNat` \ register ->
 702         getNewRegNat IntRep             `thenNat` \ tmp ->
 703         let
 704             code = registerCode register tmp
 705             src  = registerName register tmp
 706             code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
 707         in
 708         return (Any IntRep code__2)
 709
 710     {- ------------------------------------------------------------
 711         Comments for int_NE_code also apply to cmpF_code
 712     -}
 713     cmpF_code
 714         :: (Reg -> Reg -> Reg -> Instr)
 715         -> Cond
 716         -> StixTree -> StixTree
 717         -> NatM Register
 718
 719     cmpF_code instr cond x y
 720       = trivialFCode pr instr x y       `thenNat` \ register ->
 721         getNewRegNat F64                `thenNat` \ tmp ->
 722         getBlockIdNat                   `thenNat` \ lbl ->
 723         let
 724             code = registerCode register tmp
 725             result  = registerName register tmp
 726
 727             code__2 dst = code . mkSeqInstrs [
 728                 OR zeroh (RIImm (ImmInt 1)) dst,
 729                 BF cond  result (ImmCLbl lbl),
 730                 OR zeroh (RIReg zeroh) dst,
 731                 NEWBLOCK lbl]
 732         in
 733         return (Any IntRep code__2)
 734       where
 735         pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
 736       ------------------------------------------------------------
 737
 738 getRegister (CmmLoad pk mem)
 739   = getAmode mem                    `thenNat` \ amode ->
 740     let
 741         code = amodeCode amode
 742         src   = amodeAddr amode
 743         size = primRepToSize pk
 744         code__2 dst = code . mkSeqInstr (LD size dst src)
 745     in
 746     return (Any pk code__2)
 747
 748 getRegister (StInt i)
 749   | fits8Bits i
 750   = let
 751         code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
 752     in
 753     return (Any IntRep code)
 754   | otherwise
 755   = let
 756         code dst = mkSeqInstr (LDI Q dst src)
 757     in
 758     return (Any IntRep code)
 759   where
 760     src = ImmInt (fromInteger i)
 761
 762 getRegister leaf
 763   | isJust imm
 764   = let
 765         code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
 766     in
 767     return (Any PtrRep code)
 768   where
 769     imm = maybeImm leaf
 770     imm__2 = case imm of Just x -> x
 771
 772 #endif /* alpha_TARGET_ARCH */
 773
 774 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 775
 776 #if i386_TARGET_ARCH
 777
 778 getRegister (CmmLit (CmmFloat f F32)) = do
 779     lbl <- getNewLabelNat
 780     dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
 781     Amode addr addr_code <- getAmode dynRef
 782     let code dst =
 783             LDATA ReadOnlyData
 784                         [CmmDataLabel lbl,
 785                          CmmStaticLit (CmmFloat f F32)]
 786             `consOL` (addr_code `snocOL`
 787             GLD F32 addr dst)
 788     -- in
 789     return (Any F32 code)
 790
 791
 792 getRegister (CmmLit (CmmFloat d F64))
 793   | d == 0.0
 794   = let code dst = unitOL (GLDZ dst)
 795     in  return (Any F64 code)
 796
 797   | d == 1.0
 798   = let code dst = unitOL (GLD1 dst)
 799     in  return (Any F64 code)
 800
 801   | otherwise = do
 802     lbl <- getNewLabelNat
 803     dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
 804     Amode addr addr_code <- getAmode dynRef
 805     let code dst =
 806             LDATA ReadOnlyData
 807                         [CmmDataLabel lbl,
 808                          CmmStaticLit (CmmFloat d F64)]
 809             `consOL` (addr_code `snocOL`
 810             GLD F64 addr dst)
 811     -- in
 812     return (Any F64 code)
 813
 814 #endif /* i386_TARGET_ARCH */
 815
 816 #if x86_64_TARGET_ARCH
 817
 818 getRegister (CmmLit (CmmFloat 0.0 rep)) = do
 819    let code dst = unitOL  (XOR rep (OpReg dst) (OpReg dst))
 820         -- I don't know why there are xorpd, xorps, and pxor instructions.
 821         -- They all appear to do the same thing --SDM
 822    return (Any rep code)
 823
 824 getRegister (CmmLit (CmmFloat f rep)) = do
 825     lbl <- getNewLabelNat
 826     let code dst = toOL [
 827             LDATA ReadOnlyData
 828                         [CmmDataLabel lbl,
 829                          CmmStaticLit (CmmFloat f rep)],
 830             MOV rep (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 831             ]
 832     -- in
 833     return (Any rep code)
 834
 835 #endif /* x86_64_TARGET_ARCH */
 836
 837 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
 838
 839 -- catch simple cases of zero- or sign-extended load
 840 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad addr _]) = do
 841   code <- intLoadCode (MOVZxL I8) addr
 842   return (Any I32 code)
 843
 844 getRegister (CmmMachOp (MO_S_Conv I8 I32) [CmmLoad addr _]) = do
 845   code <- intLoadCode (MOVSxL I8) addr
 846   return (Any I32 code)
 847
 848 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad addr _]) = do
 849   code <- intLoadCode (MOVZxL I16) addr
 850   return (Any I32 code)
 851
 852 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad addr _]) = do
 853   code <- intLoadCode (MOVSxL I16) addr
 854   return (Any I32 code)
 855
 856 #endif
 857
 858 #if x86_64_TARGET_ARCH
 859
 860 -- catch simple cases of zero- or sign-extended load
 861 getRegister (CmmMachOp (MO_U_Conv I8 I64) [CmmLoad addr _]) = do
 862   code <- intLoadCode (MOVZxL I8) addr
 863   return (Any I64 code)
 864
 865 getRegister (CmmMachOp (MO_S_Conv I8 I64) [CmmLoad addr _]) = do
 866   code <- intLoadCode (MOVSxL I8) addr
 867   return (Any I64 code)
 868
 869 getRegister (CmmMachOp (MO_U_Conv I16 I64) [CmmLoad addr _]) = do
 870   code <- intLoadCode (MOVZxL I16) addr
 871   return (Any I64 code)
 872
 873 getRegister (CmmMachOp (MO_S_Conv I16 I64) [CmmLoad addr _]) = do
 874   code <- intLoadCode (MOVSxL I16) addr
 875   return (Any I64 code)
 876
 877 getRegister (CmmMachOp (MO_U_Conv I32 I64) [CmmLoad addr _]) = do
 878   code <- intLoadCode (MOV I32) addr -- 32-bit loads zero-extend
 879   return (Any I64 code)
 880
 881 getRegister (CmmMachOp (MO_S_Conv I32 I64) [CmmLoad addr _]) = do
 882   code <- intLoadCode (MOVSxL I32) addr
 883   return (Any I64 code)
 884
 885 #endif
 886
 887 #if x86_64_TARGET_ARCH
 888 getRegister (CmmMachOp (MO_Add I64) [CmmReg (CmmGlobal PicBaseReg),
 889                                      CmmLit displacement])
 890     = return $ Any I64 (\dst -> unitOL $
 891         LEA I64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
 892 #endif
 893
 894 #if x86_64_TARGET_ARCH
 895 getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do
 896   x_code <- getAnyReg x
 897   lbl <- getNewLabelNat
 898   let
 899     code dst = x_code dst `appOL` toOL [
 900         -- This is how gcc does it, so it can't be that bad:
 901         LDATA ReadOnlyData16 [
 902                 CmmAlign 16,
 903                 CmmDataLabel lbl,
 904                 CmmStaticLit (CmmInt 0x80000000 I32),
 905                 CmmStaticLit (CmmInt 0 I32),
 906                 CmmStaticLit (CmmInt 0 I32),
 907                 CmmStaticLit (CmmInt 0 I32)
 908         ],
 909         XOR F32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 910                 -- xorps, so we need the 128-bit constant
 911                 -- ToDo: rip-relative
 912         ]
 913   --
 914   return (Any F32 code)
 915
 916 getRegister (CmmMachOp (MO_S_Neg F64) [x]) = do
 917   x_code <- getAnyReg x
 918   lbl <- getNewLabelNat
 919   let
 920         -- This is how gcc does it, so it can't be that bad:
 921     code dst = x_code dst `appOL` toOL [
 922         LDATA ReadOnlyData16 [
 923                 CmmAlign 16,
 924                 CmmDataLabel lbl,
 925                 CmmStaticLit (CmmInt 0x8000000000000000 I64),
 926                 CmmStaticLit (CmmInt 0 I64)
 927         ],
 928                 -- gcc puts an unpck here.  Wonder if we need it.
 929         XOR F64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 930                 -- xorpd, so we need the 128-bit constant
 931         ]
 932   --
 933   return (Any F64 code)
 934 #endif
 935
 936 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
 937
 938 getRegister (CmmMachOp mop [x]) -- unary MachOps
 939   = case mop of
 940 #if i386_TARGET_ARCH
 941       MO_S_Neg F32 -> trivialUFCode F32 (GNEG F32) x
 942       MO_S_Neg F64 -> trivialUFCode F64 (GNEG F64) x
 943 #endif
 944
 945       MO_S_Neg rep -> trivialUCode rep (NEGI rep) x
 946       MO_Not rep   -> trivialUCode rep (NOT  rep) x
 947
 948       -- Nop conversions
 949       MO_U_Conv I32 I8  -> toI8Reg  I32 x
 950       MO_S_Conv I32 I8  -> toI8Reg  I32 x
 951       MO_U_Conv I16 I8  -> toI8Reg  I16 x
 952       MO_S_Conv I16 I8  -> toI8Reg  I16 x
 953       MO_U_Conv I32 I16 -> toI16Reg I32 x
 954       MO_S_Conv I32 I16 -> toI16Reg I32 x
 955 #if x86_64_TARGET_ARCH
 956       MO_U_Conv I64 I32 -> conversionNop I64 x
 957       MO_S_Conv I64 I32 -> conversionNop I64 x
 958       MO_U_Conv I64 I16 -> toI16Reg I64 x
 959       MO_S_Conv I64 I16 -> toI16Reg I64 x
 960       MO_U_Conv I64 I8  -> toI8Reg  I64 x
 961       MO_S_Conv I64 I8  -> toI8Reg  I64 x
 962 #endif
 963
 964       MO_U_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
 965       MO_S_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
 966
 967       -- widenings
 968       MO_U_Conv I8  I32 -> integerExtend I8  I32 MOVZxL x
 969       MO_U_Conv I16 I32 -> integerExtend I16 I32 MOVZxL x
 970       MO_U_Conv I8  I16 -> integerExtend I8  I16 MOVZxL x
 971
 972       MO_S_Conv I8  I32 -> integerExtend I8  I32 MOVSxL x
 973       MO_S_Conv I16 I32 -> integerExtend I16 I32 MOVSxL x
 974       MO_S_Conv I8  I16 -> integerExtend I8  I16 MOVSxL x
 975
 976 #if x86_64_TARGET_ARCH
 977       MO_U_Conv I8  I64 -> integerExtend I8  I64 MOVZxL x
 978       MO_U_Conv I16 I64 -> integerExtend I16 I64 MOVZxL x
 979       MO_U_Conv I32 I64 -> integerExtend I32 I64 MOVZxL x
 980       MO_S_Conv I8  I64 -> integerExtend I8  I64 MOVSxL x
 981       MO_S_Conv I16 I64 -> integerExtend I16 I64 MOVSxL x
 982       MO_S_Conv I32 I64 -> integerExtend I32 I64 MOVSxL x
 983         -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
 984         -- However, we don't want the register allocator to throw it
 985         -- away as an unnecessary reg-to-reg move, so we keep it in
 986         -- the form of a movzl and print it as a movl later.
 987 #endif
 988
 989 #if i386_TARGET_ARCH
 990       MO_S_Conv F32 F64 -> conversionNop F64 x
 991       MO_S_Conv F64 F32 -> conversionNop F32 x
 992 #else
 993       MO_S_Conv F32 F64 -> coerceFP2FP F64 x
 994       MO_S_Conv F64 F32 -> coerceFP2FP F32 x
 995 #endif
 996
 997       MO_S_Conv from to
 998         | isFloatingRep from -> coerceFP2Int from to x
 999         | isFloatingRep to   -> coerceInt2FP from to x
1000
1001       other -> pprPanic "getRegister" (pprMachOp mop)
1002    where
1003         -- signed or unsigned extension.
1004         integerExtend from to instr expr = do
1005             (reg,e_code) <- if from == I8 then getByteReg expr
1006                                           else getSomeReg expr
1007             let
1008                 code dst =
1009                   e_code `snocOL`
1010                   instr from (OpReg reg) (OpReg dst)
1011             return (Any to code)
1012
1013         toI8Reg new_rep expr
1014             = do codefn <- getAnyReg expr
1015                  return (Any new_rep codefn)
1016                 -- HACK: use getAnyReg to get a byte-addressable register.
1017                 -- If the source was a Fixed register, this will add the
1018                 -- mov instruction to put it into the desired destination.
1019                 -- We're assuming that the destination won't be a fixed
1020                 -- non-byte-addressable register; it won't be, because all
1021                 -- fixed registers are word-sized.
1022
1023         toI16Reg = toI8Reg -- for now
1024
1025         conversionNop new_rep expr
1026             = do e_code <- getRegister expr
1027                  return (swizzleRegisterRep e_code new_rep)
1028
1029
1030 getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
1031   = ASSERT2(cmmExprRep x /= I8, pprExpr e)
1032     case mop of
1033       MO_Eq F32   -> condFltReg EQQ x y
1034       MO_Ne F32   -> condFltReg NE x y
1035       MO_S_Gt F32 -> condFltReg GTT x y
1036       MO_S_Ge F32 -> condFltReg GE x y
1037       MO_S_Lt F32 -> condFltReg LTT x y
1038       MO_S_Le F32 -> condFltReg LE x y
1039
1040       MO_Eq F64   -> condFltReg EQQ x y
1041       MO_Ne F64   -> condFltReg NE x y
1042       MO_S_Gt F64 -> condFltReg GTT x y
1043       MO_S_Ge F64 -> condFltReg GE x y
1044       MO_S_Lt F64 -> condFltReg LTT x y
1045       MO_S_Le F64 -> condFltReg LE x y
1046
1047       MO_Eq rep   -> condIntReg EQQ x y
1048       MO_Ne rep   -> condIntReg NE x y
1049
1050       MO_S_Gt rep -> condIntReg GTT x y
1051       MO_S_Ge rep -> condIntReg GE x y
1052       MO_S_Lt rep -> condIntReg LTT x y
1053       MO_S_Le rep -> condIntReg LE x y
1054
1055       MO_U_Gt rep -> condIntReg GU  x y
1056       MO_U_Ge rep -> condIntReg GEU x y
1057       MO_U_Lt rep -> condIntReg LU  x y
1058       MO_U_Le rep -> condIntReg LEU x y
1059
1060 #if i386_TARGET_ARCH
1061       MO_Add F32 -> trivialFCode F32 GADD x y
1062       MO_Sub F32 -> trivialFCode F32 GSUB x y
1063
1064       MO_Add F64 -> trivialFCode F64 GADD x y
1065       MO_Sub F64 -> trivialFCode F64 GSUB x y
1066
1067       MO_S_Quot F32 -> trivialFCode F32 GDIV x y
1068       MO_S_Quot F64 -> trivialFCode F64 GDIV x y
1069 #endif
1070
1071 #if x86_64_TARGET_ARCH
1072       MO_Add F32 -> trivialFCode F32 ADD x y
1073       MO_Sub F32 -> trivialFCode F32 SUB x y
1074
1075       MO_Add F64 -> trivialFCode F64 ADD x y
1076       MO_Sub F64 -> trivialFCode F64 SUB x y
1077
1078       MO_S_Quot F32 -> trivialFCode F32 FDIV x y
1079       MO_S_Quot F64 -> trivialFCode F64 FDIV x y
1080 #endif
1081
1082       MO_Add rep -> add_code rep x y
1083       MO_Sub rep -> sub_code rep x y
1084
1085       MO_S_Quot rep -> div_code rep True  True  x y
1086       MO_S_Rem  rep -> div_code rep True  False x y
1087       MO_U_Quot rep -> div_code rep False True  x y
1088       MO_U_Rem  rep -> div_code rep False False x y
1089
1090 #if i386_TARGET_ARCH
1091       MO_Mul   F32 -> trivialFCode F32 GMUL x y
1092       MO_Mul   F64 -> trivialFCode F64 GMUL x y
1093 #endif
1094
1095 #if x86_64_TARGET_ARCH
1096       MO_Mul   F32 -> trivialFCode F32 MUL x y
1097       MO_Mul   F64 -> trivialFCode F64 MUL x y
1098 #endif
1099
1100       MO_Mul   rep -> let op = IMUL rep in
1101                       trivialCode rep op (Just op) x y
1102
1103       MO_S_MulMayOflo rep -> imulMayOflo rep x y
1104
1105       MO_And rep -> let op = AND rep in
1106                     trivialCode rep op (Just op) x y
1107       MO_Or  rep -> let op = OR  rep in
1108                     trivialCode rep op (Just op) x y
1109       MO_Xor rep -> let op = XOR rep in
1110                     trivialCode rep op (Just op) x y
1111
1112         {- Shift ops on x86s have constraints on their source, it
1113            either has to be Imm, CL or 1
1114             => trivialCode is not restrictive enough (sigh.)
1115         -}
1116       MO_Shl rep   -> shift_code rep (SHL rep) x y {-False-}
1117       MO_U_Shr rep -> shift_code rep (SHR rep) x y {-False-}
1118       MO_S_Shr rep -> shift_code rep (SAR rep) x y {-False-}
1119
1120       other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
1121   where
1122     --------------------
1123     imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1124     imulMayOflo rep a b = do
1125          (a_reg, a_code) <- getNonClobberedReg a
1126          b_code <- getAnyReg b
1127          let
1128              shift_amt  = case rep of
1129                            I32 -> 31
1130                            I64 -> 63
1131                            _ -> panic "shift_amt"
1132
1133              code = a_code `appOL` b_code eax `appOL`
1134                         toOL [
1135                            IMUL2 rep (OpReg a_reg),   -- result in %edx:%eax
1136                            SAR rep (OpImm (ImmInt shift_amt)) (OpReg eax),
1137                                 -- sign extend lower part
1138                            SUB rep (OpReg edx) (OpReg eax)
1139                                 -- compare against upper
1140                            -- eax==0 if high part == sign extended low part
1141                         ]
1142          -- in
1143          return (Fixed rep eax code)
1144
1145     --------------------
1146     shift_code :: MachRep
1147                -> (Operand -> Operand -> Instr)
1148                -> CmmExpr
1149                -> CmmExpr
1150                -> NatM Register
1151
1152     {- Case1: shift length as immediate -}
1153     shift_code rep instr x y@(CmmLit lit) = do
1154           x_code <- getAnyReg x
1155           let
1156                code dst
1157                   = x_code dst `snocOL`
1158                     instr (OpImm (litToImm lit)) (OpReg dst)
1159           -- in
1160           return (Any rep code)
1161
1162     {- Case2: shift length is complex (non-immediate)
1163       * y must go in %ecx.
1164       * we cannot do y first *and* put its result in %ecx, because
1165         %ecx might be clobbered by x.
1166       * if we do y second, then x cannot be
1167         in a clobbered reg.  Also, we cannot clobber x's reg
1168         with the instruction itself.
1169       * so we can either:
1170         - do y first, put its result in a fresh tmp, then copy it to %ecx later
1171         - do y second and put its result into %ecx.  x gets placed in a fresh
1172           tmp.  This is likely to be better, becuase the reg alloc can
1173           eliminate this reg->reg move here (it won't eliminate the other one,
1174           because the move is into the fixed %ecx).
1175     -}
1176     shift_code rep instr x y{-amount-} = do
1177         x_code <- getAnyReg x
1178         tmp <- getNewRegNat rep
1179         y_code <- getAnyReg y
1180         let
1181            code = x_code tmp `appOL`
1182                   y_code ecx `snocOL`
1183                   instr (OpReg ecx) (OpReg tmp)
1184         -- in
1185         return (Fixed rep tmp code)
1186
1187     --------------------
1188     add_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1189     add_code rep x (CmmLit (CmmInt y _))
1190         | not (is64BitInteger y) = add_int rep x y
1191     add_code rep x y = trivialCode rep (ADD rep) (Just (ADD rep)) x y
1192
1193     --------------------
1194     sub_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1195     sub_code rep x (CmmLit (CmmInt y _))
1196         | not (is64BitInteger (-y)) = add_int rep x (-y)
1197     sub_code rep x y = trivialCode rep (SUB rep) Nothing x y
1198
1199     -- our three-operand add instruction:
1200     add_int rep x y = do
1201         (x_reg, x_code) <- getSomeReg x
1202         let
1203             imm = ImmInt (fromInteger y)
1204             code dst
1205                = x_code `snocOL`
1206                  LEA rep
1207                         (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
1208                         (OpReg dst)
1209         --
1210         return (Any rep code)
1211
1212     ----------------------
1213     div_code rep signed quotient x y = do
1214            (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
1215            x_code <- getAnyReg x
1216            let
1217              widen | signed    = CLTD rep
1218                    | otherwise = XOR rep (OpReg edx) (OpReg edx)
1219
1220              instr | signed    = IDIV
1221                    | otherwise = DIV
1222
1223              code = y_code `appOL`
1224                     x_code eax `appOL`
1225                     toOL [widen, instr rep y_op]
1226
1227              result | quotient  = eax
1228                     | otherwise = edx
1229
1230            -- in
1231            return (Fixed rep result code)
1232
1233
1234 getRegister (CmmLoad mem pk)
1235   | isFloatingRep pk
1236   = do
1237     Amode src mem_code <- getAmode mem
1238     let
1239         code dst = mem_code `snocOL`
1240                    IF_ARCH_i386(GLD pk src dst,
1241                                 MOV pk (OpAddr src) (OpReg dst))
1242     --
1243     return (Any pk code)
1244
1245 #if i386_TARGET_ARCH
1246 getRegister (CmmLoad mem pk)
1247   | pk /= I64
1248   = do
1249     code <- intLoadCode (instr pk) mem
1250     return (Any pk code)
1251   where
1252         instr I8  = MOVZxL pk
1253         instr I16 = MOV I16
1254         instr I32 = MOV I32
1255         -- we always zero-extend 8-bit loads, if we
1256         -- can't think of anything better.  This is because
1257         -- we can't guarantee access to an 8-bit variant of every register
1258         -- (esi and edi don't have 8-bit variants), so to make things
1259         -- simpler we do our 8-bit arithmetic with full 32-bit registers.
1260 #endif
1261
1262 #if x86_64_TARGET_ARCH
1263 -- Simpler memory load code on x86_64
1264 getRegister (CmmLoad mem pk)
1265   = do
1266     code <- intLoadCode (MOV pk) mem
1267     return (Any pk code)
1268 #endif
1269
1270 getRegister (CmmLit (CmmInt 0 rep))
1271   = let
1272         -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
1273         adj_rep = case rep of I64 -> I32; _ -> rep
1274         rep1 = IF_ARCH_i386( rep, adj_rep )
1275         code dst
1276            = unitOL (XOR rep1 (OpReg dst) (OpReg dst))
1277     in
1278         return (Any rep code)
1279
1280 #if x86_64_TARGET_ARCH
1281   -- optimisation for loading small literals on x86_64: take advantage
1282   -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
1283   -- instruction forms are shorter.
1284 getRegister (CmmLit lit)
1285   | I64 <- cmmLitRep lit, not (isBigLit lit)
1286   = let
1287         imm = litToImm lit
1288         code dst = unitOL (MOV I32 (OpImm imm) (OpReg dst))
1289     in
1290         return (Any I64 code)
1291   where
1292    isBigLit (CmmInt i I64) = i < 0 || i > 0xffffffff
1293    isBigLit _ = False
1294         -- note1: not the same as is64BitLit, because that checks for
1295         -- signed literals that fit in 32 bits, but we want unsigned
1296         -- literals here.
1297         -- note2: all labels are small, because we're assuming the
1298         -- small memory model (see gcc docs, -mcmodel=small).
1299 #endif
1300
1301 getRegister (CmmLit lit)
1302   = let
1303         rep = cmmLitRep lit
1304         imm = litToImm lit
1305         code dst = unitOL (MOV rep (OpImm imm) (OpReg dst))
1306     in
1307         return (Any rep code)
1308
1309 getRegister other = pprPanic "getRegister(x86)" (ppr other)
1310
1311
1312 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
1313    -> NatM (Reg -> InstrBlock)
1314 intLoadCode instr mem = do
1315   Amode src mem_code <- getAmode mem
1316   return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
1317
1318 -- Compute an expression into *any* register, adding the appropriate
1319 -- move instruction if necessary.
1320 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
1321 getAnyReg expr = do
1322   r <- getRegister expr
1323   anyReg r
1324
1325 anyReg :: Register -> NatM (Reg -> InstrBlock)
1326 anyReg (Any _ code)          = return code
1327 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
1328
1329 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
1330 -- Fixed registers might not be byte-addressable, so we make sure we've
1331 -- got a temporary, inserting an extra reg copy if necessary.
1332 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
1333 #if x86_64_TARGET_ARCH
1334 getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
1335 #else
1336 getByteReg expr = do
1337   r <- getRegister expr
1338   case r of
1339     Any rep code -> do
1340         tmp <- getNewRegNat rep
1341         return (tmp, code tmp)
1342     Fixed rep reg code
1343         | isVirtualReg reg -> return (reg,code)
1344         | otherwise -> do
1345             tmp <- getNewRegNat rep
1346             return (tmp, code `snocOL` reg2reg rep reg tmp)
1347         -- ToDo: could optimise slightly by checking for byte-addressable
1348         -- real registers, but that will happen very rarely if at all.
1349 #endif
1350
1351 -- Another variant: this time we want the result in a register that cannot
1352 -- be modified by code to evaluate an arbitrary expression.
1353 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1354 getNonClobberedReg expr = do
1355   r <- getRegister expr
1356   case r of
1357     Any rep code -> do
1358         tmp <- getNewRegNat rep
1359         return (tmp, code tmp)
1360     Fixed rep reg code
1361         -- only free regs can be clobbered
1362         | RealReg rr <- reg, isFastTrue (freeReg rr) -> do
1363                 tmp <- getNewRegNat rep
1364                 return (tmp, code `snocOL` reg2reg rep reg tmp)
1365         | otherwise ->
1366                 return (reg, code)
1367
1368 reg2reg :: MachRep -> Reg -> Reg -> Instr
1369 reg2reg rep src dst
1370 #if i386_TARGET_ARCH
1371   | isFloatingRep rep = GMOV src dst
1372 #endif
1373   | otherwise         = MOV rep (OpReg src) (OpReg dst)
1374
1375 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1376
1377 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1378
1379 #if sparc_TARGET_ARCH
1380
1381 getRegister (CmmLit (CmmFloat f F32)) = do
1382     lbl <- getNewLabelNat
1383     let code dst = toOL [
1384             LDATA ReadOnlyData
1385                         [CmmDataLabel lbl,
1386                          CmmStaticLit (CmmFloat f F32)],
1387             SETHI (HI (ImmCLbl lbl)) dst,
1388             LD F32 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1389     return (Any F32 code)
1390
1391 getRegister (CmmLit (CmmFloat d F64)) = do
1392     lbl <- getNewLabelNat
1393     let code dst = toOL [
1394             LDATA ReadOnlyData
1395                         [CmmDataLabel lbl,
1396                          CmmStaticLit (CmmFloat d F64)],
1397             SETHI (HI (ImmCLbl lbl)) dst,
1398             LD F64 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1399     return (Any F64 code)
1400
1401 getRegister (CmmMachOp mop [x]) -- unary MachOps
1402   = case mop of
1403       MO_S_Neg F32     -> trivialUFCode F32 (FNEG F32) x
1404       MO_S_Neg F64     -> trivialUFCode F64 (FNEG F64) x
1405
1406       MO_S_Neg rep     -> trivialUCode rep (SUB False False g0) x
1407       MO_Not rep       -> trivialUCode rep (XNOR False g0) x
1408
1409       MO_U_Conv I32 I8 -> trivialCode I8 (AND False) x (CmmLit (CmmInt 255 I8))
1410
1411       MO_U_Conv F64 F32-> coerceDbl2Flt x
1412       MO_U_Conv F32 F64-> coerceFlt2Dbl x
1413
1414       MO_S_Conv F32 I32-> coerceFP2Int F32 I32 x
1415       MO_S_Conv I32 F32-> coerceInt2FP I32 F32 x
1416       MO_S_Conv F64 I32-> coerceFP2Int F64 I32 x
1417       MO_S_Conv I32 F64-> coerceInt2FP I32 F64 x
1418
1419       -- Conversions which are a nop on sparc
1420       MO_U_Conv from to
1421         | from == to   -> conversionNop to   x
1422       MO_U_Conv I32 to -> conversionNop to   x
1423       MO_S_Conv I32 to -> conversionNop to   x
1424
1425       -- widenings
1426       MO_U_Conv I8 I32  -> integerExtend False I8 I32  x
1427       MO_U_Conv I16 I32 -> integerExtend False I16 I32 x
1428       MO_U_Conv I8 I16  -> integerExtend False I8 I16  x
1429       MO_S_Conv I16 I32 -> integerExtend True I16 I32  x
1430
1431       other_op -> panic "Unknown unary mach op"
1432     where
1433         -- XXX SLL/SRL?
1434         integerExtend signed from to expr = do
1435            (reg, e_code) <- getSomeReg expr
1436            let
1437                code dst =
1438                    e_code `snocOL`
1439                    ((if signed then SRA else SRL)
1440                           reg (RIImm (ImmInt 0)) dst)
1441            return (Any to code)
1442         conversionNop new_rep expr
1443             = do e_code <- getRegister expr
1444                  return (swizzleRegisterRep e_code new_rep)
1445
1446 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1447   = case mop of
1448       MO_Eq F32 -> condFltReg EQQ x y
1449       MO_Ne F32 -> condFltReg NE x y
1450
1451       MO_S_Gt F32 -> condFltReg GTT x y
1452       MO_S_Ge F32 -> condFltReg GE x y
1453       MO_S_Lt F32 -> condFltReg LTT x y
1454       MO_S_Le F32 -> condFltReg LE x y
1455
1456       MO_Eq F64 -> condFltReg EQQ x y
1457       MO_Ne F64 -> condFltReg NE x y
1458
1459       MO_S_Gt F64 -> condFltReg GTT x y
1460       MO_S_Ge F64 -> condFltReg GE x y
1461       MO_S_Lt F64 -> condFltReg LTT x y
1462       MO_S_Le F64 -> condFltReg LE x y
1463
1464       MO_Eq rep -> condIntReg EQQ x y
1465       MO_Ne rep -> condIntReg NE x y
1466
1467       MO_S_Gt rep -> condIntReg GTT x y
1468       MO_S_Ge rep -> condIntReg GE x y
1469       MO_S_Lt rep -> condIntReg LTT x y
1470       MO_S_Le rep -> condIntReg LE x y
1471
1472       MO_U_Gt I32  -> condIntReg GTT x y
1473       MO_U_Ge I32  -> condIntReg GE x y
1474       MO_U_Lt I32  -> condIntReg LTT x y
1475       MO_U_Le I32  -> condIntReg LE x y
1476
1477       MO_U_Gt I16 -> condIntReg GU  x y
1478       MO_U_Ge I16 -> condIntReg GEU x y
1479       MO_U_Lt I16 -> condIntReg LU  x y
1480       MO_U_Le I16 -> condIntReg LEU x y
1481
1482       MO_Add I32 -> trivialCode I32 (ADD False False) x y
1483       MO_Sub I32 -> trivialCode I32 (SUB False False) x y
1484
1485       MO_S_MulMayOflo rep -> imulMayOflo rep x y
1486 {-
1487       -- ToDo: teach about V8+ SPARC div instructions
1488       MO_S_Quot I32 -> idiv FSLIT(".div")  x y
1489       MO_S_Rem I32  -> idiv FSLIT(".rem")  x y
1490       MO_U_Quot I32 -> idiv FSLIT(".udiv")  x y
1491       MO_U_Rem I32  -> idiv FSLIT(".urem")  x y
1492 -}
1493       MO_Add F32  -> trivialFCode F32 FADD  x y
1494       MO_Sub F32   -> trivialFCode F32  FSUB x y
1495       MO_Mul F32   -> trivialFCode F32  FMUL  x y
1496       MO_S_Quot F32   -> trivialFCode F32  FDIV x y
1497
1498       MO_Add F64   -> trivialFCode F64 FADD  x y
1499       MO_Sub F64   -> trivialFCode F64  FSUB x y
1500       MO_Mul F64   -> trivialFCode F64  FMUL x y
1501       MO_S_Quot F64   -> trivialFCode F64  FDIV x y
1502
1503       MO_And rep   -> trivialCode rep (AND False) x y
1504       MO_Or rep    -> trivialCode rep (OR  False) x y
1505       MO_Xor rep   -> trivialCode rep (XOR False) x y
1506
1507       MO_Mul rep -> trivialCode rep (SMUL False) x y
1508
1509       MO_Shl rep   -> trivialCode rep SLL  x y
1510       MO_U_Shr rep   -> trivialCode rep SRL x y
1511       MO_S_Shr rep   -> trivialCode rep SRA x y
1512
1513 {-
1514       MO_F32_Pwr  -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1515                                          [promote x, promote y])
1516                        where promote x = CmmMachOp MO_F32_to_Dbl [x]
1517       MO_F64_Pwr -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1518                                         [x, y])
1519 -}
1520       other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
1521   where
1522     --idiv fn x y = getRegister (StCall (Left fn) CCallConv I32 [x, y])
1523
1524     --------------------
1525     imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1526     imulMayOflo rep a b = do
1527          (a_reg, a_code) <- getSomeReg a
1528          (b_reg, b_code) <- getSomeReg b
1529          res_lo <- getNewRegNat I32
1530          res_hi <- getNewRegNat I32
1531          let
1532             shift_amt  = case rep of
1533                           I32 -> 31
1534                           I64 -> 63
1535                           _ -> panic "shift_amt"
1536             code dst = a_code `appOL` b_code `appOL`
1537                        toOL [
1538                            SMUL False a_reg (RIReg b_reg) res_lo,
1539                            RDY res_hi,
1540                            SRA res_lo (RIImm (ImmInt shift_amt)) res_lo,
1541                            SUB False False res_lo (RIReg res_hi) dst
1542                         ]
1543          return (Any I32 code)
1544
1545 getRegister (CmmLoad mem pk) = do
1546     Amode src code <- getAmode mem
1547     let
1548         code__2 dst = code `snocOL` LD pk src dst
1549     return (Any pk code__2)
1550
1551 getRegister (CmmLit (CmmInt i _))
1552   | fits13Bits i
1553   = let
1554         src = ImmInt (fromInteger i)
1555         code dst = unitOL (OR False g0 (RIImm src) dst)
1556     in
1557         return (Any I32 code)
1558
1559 getRegister (CmmLit lit)
1560   = let rep = cmmLitRep lit
1561         imm = litToImm lit
1562         code dst = toOL [
1563             SETHI (HI imm) dst,
1564             OR False dst (RIImm (LO imm)) dst]
1565     in return (Any I32 code)
1566
1567 #endif /* sparc_TARGET_ARCH */
1568
1569 #if powerpc_TARGET_ARCH
1570 getRegister (CmmLoad mem pk)
1571   | pk /= I64
1572   = do
1573         Amode addr addr_code <- getAmode mem
1574         let code dst = ASSERT((regClass dst == RcDouble) == isFloatingRep pk)
1575                        addr_code `snocOL` LD pk dst addr
1576         return (Any pk code)
1577
1578 -- catch simple cases of zero- or sign-extended load
1579 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad mem _]) = do
1580     Amode addr addr_code <- getAmode mem
1581     return (Any I32 (\dst -> addr_code `snocOL` LD I8 dst addr))
1582
1583 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
1584
1585 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad mem _]) = do
1586     Amode addr addr_code <- getAmode mem
1587     return (Any I32 (\dst -> addr_code `snocOL` LD I16 dst addr))
1588
1589 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad mem _]) = do
1590     Amode addr addr_code <- getAmode mem
1591     return (Any I32 (\dst -> addr_code `snocOL` LA I16 dst addr))
1592
1593 getRegister (CmmMachOp mop [x]) -- unary MachOps
1594   = case mop of
1595       MO_Not rep   -> trivialUCode rep NOT x
1596
1597       MO_S_Conv F64 F32 -> trivialUCode F32 FRSP x
1598       MO_S_Conv F32 F64 -> conversionNop F64 x
1599
1600       MO_S_Conv from to
1601         | from == to         -> conversionNop to x
1602         | isFloatingRep from -> coerceFP2Int from to x
1603         | isFloatingRep to   -> coerceInt2FP from to x
1604
1605         -- narrowing is a nop: we treat the high bits as undefined
1606       MO_S_Conv I32 to -> conversionNop to x
1607       MO_S_Conv I16 I8 -> conversionNop I8 x
1608       MO_S_Conv I8 to -> trivialUCode to (EXTS I8) x
1609       MO_S_Conv I16 to -> trivialUCode to (EXTS I16) x
1610
1611       MO_U_Conv from to
1612         | from == to -> conversionNop to x
1613         -- narrowing is a nop: we treat the high bits as undefined
1614       MO_U_Conv I32 to -> conversionNop to x
1615       MO_U_Conv I16 I8 -> conversionNop I8 x
1616       MO_U_Conv I8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 I32))
1617       MO_U_Conv I16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 I32))
1618
1619       MO_S_Neg F32      -> trivialUCode F32 FNEG x
1620       MO_S_Neg F64      -> trivialUCode F64 FNEG x
1621       MO_S_Neg rep      -> trivialUCode rep NEG x
1622
1623     where
1624         conversionNop new_rep expr
1625             = do e_code <- getRegister expr
1626                  return (swizzleRegisterRep e_code new_rep)
1627
1628 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1629   = case mop of
1630       MO_Eq F32 -> condFltReg EQQ x y
1631       MO_Ne F32 -> condFltReg NE  x y
1632
1633       MO_S_Gt F32 -> condFltReg GTT x y
1634       MO_S_Ge F32 -> condFltReg GE  x y
1635       MO_S_Lt F32 -> condFltReg LTT x y
1636       MO_S_Le F32 -> condFltReg LE  x y
1637
1638       MO_Eq F64 -> condFltReg EQQ x y
1639       MO_Ne F64 -> condFltReg NE  x y
1640
1641       MO_S_Gt F64 -> condFltReg GTT x y
1642       MO_S_Ge F64 -> condFltReg GE  x y
1643       MO_S_Lt F64 -> condFltReg LTT x y
1644       MO_S_Le F64 -> condFltReg LE  x y
1645
1646       MO_Eq rep -> condIntReg EQQ  (extendUExpr rep x) (extendUExpr rep y)
1647       MO_Ne rep -> condIntReg NE   (extendUExpr rep x) (extendUExpr rep y)
1648
1649       MO_S_Gt rep -> condIntReg GTT  (extendSExpr rep x) (extendSExpr rep y)
1650       MO_S_Ge rep -> condIntReg GE   (extendSExpr rep x) (extendSExpr rep y)
1651       MO_S_Lt rep -> condIntReg LTT  (extendSExpr rep x) (extendSExpr rep y)
1652       MO_S_Le rep -> condIntReg LE   (extendSExpr rep x) (extendSExpr rep y)
1653
1654       MO_U_Gt rep -> condIntReg GU   (extendUExpr rep x) (extendUExpr rep y)
1655       MO_U_Ge rep -> condIntReg GEU  (extendUExpr rep x) (extendUExpr rep y)
1656       MO_U_Lt rep -> condIntReg LU   (extendUExpr rep x) (extendUExpr rep y)
1657       MO_U_Le rep -> condIntReg LEU  (extendUExpr rep x) (extendUExpr rep y)
1658
1659       MO_Add F32   -> trivialCodeNoImm F32 (FADD F32) x y
1660       MO_Sub F32   -> trivialCodeNoImm F32 (FSUB F32) x y
1661       MO_Mul F32   -> trivialCodeNoImm F32 (FMUL F32) x y
1662       MO_S_Quot F32   -> trivialCodeNoImm F32 (FDIV F32) x y
1663
1664       MO_Add F64   -> trivialCodeNoImm F64 (FADD F64) x y
1665       MO_Sub F64   -> trivialCodeNoImm F64 (FSUB F64) x y
1666       MO_Mul F64   -> trivialCodeNoImm F64 (FMUL F64) x y
1667       MO_S_Quot F64   -> trivialCodeNoImm F64 (FDIV F64) x y
1668
1669          -- optimize addition with 32-bit immediate
1670          -- (needed for PIC)
1671       MO_Add I32 ->
1672         case y of
1673           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate I32 True (-imm)
1674             -> trivialCode I32 True ADD x (CmmLit $ CmmInt imm immrep)
1675           CmmLit lit
1676             -> do
1677                 (src, srcCode) <- getSomeReg x
1678                 let imm = litToImm lit
1679                     code dst = srcCode `appOL` toOL [
1680                                     ADDIS dst src (HA imm),
1681                                     ADD dst dst (RIImm (LO imm))
1682                                 ]
1683                 return (Any I32 code)
1684           _ -> trivialCode I32 True ADD x y
1685
1686       MO_Add rep -> trivialCode rep True ADD x y
1687       MO_Sub rep ->
1688         case y of    -- subfi ('substract from' with immediate) doesn't exist
1689           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
1690             -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
1691           _ -> trivialCodeNoImm rep SUBF y x
1692
1693       MO_Mul rep -> trivialCode rep True MULLW x y
1694
1695       MO_S_MulMayOflo I32 -> trivialCodeNoImm I32 MULLW_MayOflo x y
1696
1697       MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= I32): not implemented"
1698       MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
1699
1700       MO_S_Quot rep -> trivialCodeNoImm rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1701       MO_U_Quot rep -> trivialCodeNoImm rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1702
1703       MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1704       MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1705
1706       MO_And rep   -> trivialCode rep False AND x y
1707       MO_Or rep    -> trivialCode rep False OR x y
1708       MO_Xor rep   -> trivialCode rep False XOR x y
1709
1710       MO_Shl rep   -> trivialCode rep False SLW x y
1711       MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
1712       MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
1713
1714 getRegister (CmmLit (CmmInt i rep))
1715   | Just imm <- makeImmediate rep True i
1716   = let
1717         code dst = unitOL (LI dst imm)
1718     in
1719         return (Any rep code)
1720
1721 getRegister (CmmLit (CmmFloat f frep)) = do
1722     lbl <- getNewLabelNat
1723     dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
1724     Amode addr addr_code <- getAmode dynRef
1725     let code dst =
1726             LDATA ReadOnlyData  [CmmDataLabel lbl,
1727                                  CmmStaticLit (CmmFloat f frep)]
1728             `consOL` (addr_code `snocOL` LD frep dst addr)
1729     return (Any frep code)
1730
1731 getRegister (CmmLit lit)
1732   = let rep = cmmLitRep lit
1733         imm = litToImm lit
1734         code dst = toOL [
1735               LIS dst (HI imm),
1736               OR dst dst (RIImm (LO imm))
1737           ]
1738     in return (Any rep code)
1739
1740 getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
1741
1742     -- extend?Rep: wrap integer expression of type rep
1743     -- in a conversion to I32
1744 extendSExpr I32 x = x
1745 extendSExpr rep x = CmmMachOp (MO_S_Conv rep I32) [x]
1746 extendUExpr I32 x = x
1747 extendUExpr rep x = CmmMachOp (MO_U_Conv rep I32) [x]
1748
1749 #endif /* powerpc_TARGET_ARCH */
1750
1751
1752 -- -----------------------------------------------------------------------------
1753 --  The 'Amode' type: Memory addressing modes passed up the tree.
1754
1755 data Amode = Amode AddrMode InstrBlock
1756
1757 {-
1758 Now, given a tree (the argument to an CmmLoad) that references memory,
1759 produce a suitable addressing mode.
1760
1761 A Rule of the Game (tm) for Amodes: use of the addr bit must
1762 immediately follow use of the code part, since the code part puts
1763 values in registers which the addr then refers to.  So you can't put
1764 anything in between, lest it overwrite some of those registers.  If
1765 you need to do some other computation between the code part and use of
1766 the addr bit, first store the effective address from the amode in a
1767 temporary, then do the other computation, and then use the temporary:
1768
1769     code
1770     LEA amode, tmp
1771     ... other computation ...
1772     ... (tmp) ...
1773 -}
1774
1775 getAmode :: CmmExpr -> NatM Amode
1776 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
1777
1778 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1779
1780 #if alpha_TARGET_ARCH
1781
1782 getAmode (StPrim IntSubOp [x, StInt i])
1783   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1784     getRegister x               `thenNat` \ register ->
1785     let
1786         code = registerCode register tmp
1787         reg  = registerName register tmp
1788         off  = ImmInt (-(fromInteger i))
1789     in
1790     return (Amode (AddrRegImm reg off) code)
1791
1792 getAmode (StPrim IntAddOp [x, StInt i])
1793   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1794     getRegister x               `thenNat` \ register ->
1795     let
1796         code = registerCode register tmp
1797         reg  = registerName register tmp
1798         off  = ImmInt (fromInteger i)
1799     in
1800     return (Amode (AddrRegImm reg off) code)
1801
1802 getAmode leaf
1803   | isJust imm
1804   = return (Amode (AddrImm imm__2) id)
1805   where
1806     imm = maybeImm leaf
1807     imm__2 = case imm of Just x -> x
1808
1809 getAmode other
1810   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1811     getRegister other           `thenNat` \ register ->
1812     let
1813         code = registerCode register tmp
1814         reg  = registerName register tmp
1815     in
1816     return (Amode (AddrReg reg) code)
1817
1818 #endif /* alpha_TARGET_ARCH */
1819
1820 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1821
1822 #if x86_64_TARGET_ARCH
1823
1824 getAmode (CmmMachOp (MO_Add I64) [CmmReg (CmmGlobal PicBaseReg),
1825                                      CmmLit displacement])
1826     = return $ Amode (ripRel (litToImm displacement)) nilOL
1827
1828 #endif
1829
1830 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1831
1832 -- This is all just ridiculous, since it carefully undoes
1833 -- what mangleIndexTree has just done.
1834 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
1835   | not (is64BitLit lit)
1836   -- ASSERT(rep == I32)???
1837   = do (x_reg, x_code) <- getSomeReg x
1838        let off = ImmInt (-(fromInteger i))
1839        return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1840
1841 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
1842   | not (is64BitLit lit)
1843   -- ASSERT(rep == I32)???
1844   = do (x_reg, x_code) <- getSomeReg x
1845        let off = ImmInt (fromInteger i)
1846        return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1847
1848 -- Turn (lit1 << n  + lit2) into  (lit2 + lit1 << n) so it will be
1849 -- recognised by the next rule.
1850 getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1851                                   b@(CmmLit _)])
1852   = getAmode (CmmMachOp (MO_Add rep) [b,a])
1853
1854 getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
1855                                         [y, CmmLit (CmmInt shift _)]])
1856   | shift == 0 || shift == 1 || shift == 2 || shift == 3
1857   = x86_complex_amode x y shift 0
1858
1859 getAmode (CmmMachOp (MO_Add rep)
1860                 [x, CmmMachOp (MO_Add _)
1861                         [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)],
1862                          CmmLit (CmmInt offset _)]])
1863   | shift == 0 || shift == 1 || shift == 2 || shift == 3
1864   && not (is64BitInteger offset)
1865   = x86_complex_amode x y shift offset
1866
1867 getAmode (CmmMachOp (MO_Add rep) [x,y])
1868   = x86_complex_amode x y 0 0
1869
1870 getAmode (CmmLit lit) | not (is64BitLit lit)
1871   = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1872
1873 getAmode expr = do
1874   (reg,code) <- getSomeReg expr
1875   return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
1876
1877
1878 x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
1879 x86_complex_amode base index shift offset
1880   = do (x_reg, x_code) <- getNonClobberedReg base
1881         -- x must be in a temp, because it has to stay live over y_code
1882         -- we could compre x_reg and y_reg and do something better here...
1883        (y_reg, y_code) <- getSomeReg index
1884        let
1885            code = x_code `appOL` y_code
1886            base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
1887        return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset)))
1888                code)
1889
1890 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1891
1892 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1893
1894 #if sparc_TARGET_ARCH
1895
1896 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)])
1897   | fits13Bits (-i)
1898   = do
1899        (reg, code) <- getSomeReg x
1900        let
1901          off  = ImmInt (-(fromInteger i))
1902        return (Amode (AddrRegImm reg off) code)
1903
1904
1905 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)])
1906   | fits13Bits i
1907   = do
1908        (reg, code) <- getSomeReg x
1909        let
1910          off  = ImmInt (fromInteger i)
1911        return (Amode (AddrRegImm reg off) code)
1912
1913 getAmode (CmmMachOp (MO_Add rep) [x, y])
1914   = do
1915     (regX, codeX) <- getSomeReg x
1916     (regY, codeY) <- getSomeReg y
1917     let
1918         code = codeX `appOL` codeY
1919     return (Amode (AddrRegReg regX regY) code)
1920
1921 -- XXX Is this same as "leaf" in Stix?
1922 getAmode (CmmLit lit)
1923   = do
1924       tmp <- getNewRegNat I32
1925       let
1926         code = unitOL (SETHI (HI imm__2) tmp)
1927       return (Amode (AddrRegImm tmp (LO imm__2)) code)
1928       where
1929          imm__2 = litToImm lit
1930
1931 getAmode other
1932   = do
1933        (reg, code) <- getSomeReg other
1934        let
1935             off  = ImmInt 0
1936        return (Amode (AddrRegImm reg off) code)
1937
1938 #endif /* sparc_TARGET_ARCH */
1939
1940 #ifdef powerpc_TARGET_ARCH
1941 getAmode (CmmMachOp (MO_Sub I32) [x, CmmLit (CmmInt i _)])
1942   | Just off <- makeImmediate I32 True (-i)
1943   = do
1944         (reg, code) <- getSomeReg x
1945         return (Amode (AddrRegImm reg off) code)
1946
1947
1948 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit (CmmInt i _)])
1949   | Just off <- makeImmediate I32 True i
1950   = do
1951         (reg, code) <- getSomeReg x
1952         return (Amode (AddrRegImm reg off) code)
1953
1954    -- optimize addition with 32-bit immediate
1955    -- (needed for PIC)
1956 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit lit])
1957   = do
1958         tmp <- getNewRegNat I32
1959         (src, srcCode) <- getSomeReg x
1960         let imm = litToImm lit
1961             code = srcCode `snocOL` ADDIS tmp src (HA imm)
1962         return (Amode (AddrRegImm tmp (LO imm)) code)
1963
1964 getAmode (CmmLit lit)
1965   = do
1966         tmp <- getNewRegNat I32
1967         let imm = litToImm lit
1968             code = unitOL (LIS tmp (HA imm))
1969         return (Amode (AddrRegImm tmp (LO imm)) code)
1970
1971 getAmode (CmmMachOp (MO_Add I32) [x, y])
1972   = do
1973         (regX, codeX) <- getSomeReg x
1974         (regY, codeY) <- getSomeReg y
1975         return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
1976
1977 getAmode other
1978   = do
1979         (reg, code) <- getSomeReg other
1980         let
1981             off  = ImmInt 0
1982         return (Amode (AddrRegImm reg off) code)
1983 #endif /* powerpc_TARGET_ARCH */
1984
1985 -- -----------------------------------------------------------------------------
1986 -- getOperand: sometimes any operand will do.
1987
1988 -- getNonClobberedOperand: the value of the operand will remain valid across
1989 -- the computation of an arbitrary expression, unless the expression
1990 -- is computed directly into a register which the operand refers to
1991 -- (see trivialCode where this function is used for an example).
1992
1993 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1994
1995 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1996 #if x86_64_TARGET_ARCH
1997 getNonClobberedOperand (CmmLit lit)
1998   | isSuitableFloatingPointLit lit = do
1999     lbl <- getNewLabelNat
2000     let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
2001                                            CmmStaticLit lit])
2002     return (OpAddr (ripRel (ImmCLbl lbl)), code)
2003 #endif
2004 getNonClobberedOperand (CmmLit lit)
2005   | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) =
2006     return (OpImm (litToImm lit), nilOL)
2007 getNonClobberedOperand (CmmLoad mem pk)
2008   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2009     Amode src mem_code <- getAmode mem
2010     (src',save_code) <-
2011         if (amodeCouldBeClobbered src)
2012                 then do
2013                    tmp <- getNewRegNat wordRep
2014                    return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
2015                            unitOL (LEA I32 (OpAddr src) (OpReg tmp)))
2016                 else
2017                    return (src, nilOL)
2018     return (OpAddr src', save_code `appOL` mem_code)
2019 getNonClobberedOperand e = do
2020     (reg, code) <- getNonClobberedReg e
2021     return (OpReg reg, code)
2022
2023 amodeCouldBeClobbered :: AddrMode -> Bool
2024 amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
2025
2026 regClobbered (RealReg rr) = isFastTrue (freeReg rr)
2027 regClobbered _ = False
2028
2029 -- getOperand: the operand is not required to remain valid across the
2030 -- computation of an arbitrary expression.
2031 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2032 #if x86_64_TARGET_ARCH
2033 getOperand (CmmLit lit)
2034   | isSuitableFloatingPointLit lit = do
2035     lbl <- getNewLabelNat
2036     let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
2037                                            CmmStaticLit lit])
2038     return (OpAddr (ripRel (ImmCLbl lbl)), code)
2039 #endif
2040 getOperand (CmmLit lit)
2041   | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) = do
2042     return (OpImm (litToImm lit), nilOL)
2043 getOperand (CmmLoad mem pk)
2044   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2045     Amode src mem_code <- getAmode mem
2046     return (OpAddr src, mem_code)
2047 getOperand e = do
2048     (reg, code) <- getSomeReg e
2049     return (OpReg reg, code)
2050
2051 isOperand :: CmmExpr -> Bool
2052 isOperand (CmmLoad _ _) = True
2053 isOperand (CmmLit lit)  = not (is64BitLit lit)
2054                           || isSuitableFloatingPointLit lit
2055 isOperand _             = False
2056
2057 -- if we want a floating-point literal as an operand, we can
2058 -- use it directly from memory.  However, if the literal is
2059 -- zero, we're better off generating it into a register using
2060 -- xor.
2061 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
2062 isSuitableFloatingPointLit _ = False
2063
2064 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
2065 getRegOrMem (CmmLoad mem pk)
2066   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2067     Amode src mem_code <- getAmode mem
2068     return (OpAddr src, mem_code)
2069 getRegOrMem e = do
2070     (reg, code) <- getNonClobberedReg e
2071     return (OpReg reg, code)
2072
2073 #if x86_64_TARGET_ARCH
2074 is64BitLit (CmmInt i I64) = is64BitInteger i
2075    -- assume that labels are in the range 0-2^31-1: this assumes the
2076    -- small memory model (see gcc docs, -mcmodel=small).
2077 #endif
2078 is64BitLit x = False
2079 #endif
2080
2081 is64BitInteger :: Integer -> Bool
2082 is64BitInteger i = i64 > 0x7fffffff || i64 < -0x80000000
2083   where i64 = fromIntegral i :: Int64
2084   -- a CmmInt is intended to be truncated to the appropriate
2085   -- number of bits, so here we truncate it to Int64.  This is
2086   -- important because e.g. -1 as a CmmInt might be either
2087   -- -1 or 18446744073709551615.
2088
2089 -- -----------------------------------------------------------------------------
2090 --  The 'CondCode' type:  Condition codes passed up the tree.
2091
2092 data CondCode = CondCode Bool Cond InstrBlock
2093
2094 -- Set up a condition code for a conditional branch.
2095
2096 getCondCode :: CmmExpr -> NatM CondCode
2097
2098 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2099
2100 #if alpha_TARGET_ARCH
2101 getCondCode = panic "MachCode.getCondCode: not on Alphas"
2102 #endif /* alpha_TARGET_ARCH */
2103
2104 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2105
2106 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
2107 -- yes, they really do seem to want exactly the same!
2108
2109 getCondCode (CmmMachOp mop [x, y])
2110   =
2111     case mop of
2112       MO_Eq F32 -> condFltCode EQQ x y
2113       MO_Ne F32 -> condFltCode NE  x y
2114
2115       MO_S_Gt F32 -> condFltCode GTT x y
2116       MO_S_Ge F32 -> condFltCode GE  x y
2117       MO_S_Lt F32 -> condFltCode LTT x y
2118       MO_S_Le F32 -> condFltCode LE  x y
2119
2120       MO_Eq F64 -> condFltCode EQQ x y
2121       MO_Ne F64 -> condFltCode NE  x y
2122
2123       MO_S_Gt F64 -> condFltCode GTT x y
2124       MO_S_Ge F64 -> condFltCode GE  x y
2125       MO_S_Lt F64 -> condFltCode LTT x y
2126       MO_S_Le F64 -> condFltCode LE  x y
2127
2128       MO_Eq rep -> condIntCode EQQ  x y
2129       MO_Ne rep -> condIntCode NE   x y
2130
2131       MO_S_Gt rep -> condIntCode GTT  x y
2132       MO_S_Ge rep -> condIntCode GE   x y
2133       MO_S_Lt rep -> condIntCode LTT  x y
2134       MO_S_Le rep -> condIntCode LE   x y
2135
2136       MO_U_Gt rep -> condIntCode GU   x y
2137       MO_U_Ge rep -> condIntCode GEU  x y
2138       MO_U_Lt rep -> condIntCode LU   x y
2139       MO_U_Le rep -> condIntCode LEU  x y
2140
2141       other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y]))
2142
2143 getCondCode other =  pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
2144
2145 #elif powerpc_TARGET_ARCH
2146
2147 -- almost the same as everywhere else - but we need to
2148 -- extend small integers to 32 bit first
2149
2150 getCondCode (CmmMachOp mop [x, y])
2151   = case mop of
2152       MO_Eq F32 -> condFltCode EQQ x y
2153       MO_Ne F32 -> condFltCode NE  x y
2154
2155       MO_S_Gt F32 -> condFltCode GTT x y
2156       MO_S_Ge F32 -> condFltCode GE  x y
2157       MO_S_Lt F32 -> condFltCode LTT x y
2158       MO_S_Le F32 -> condFltCode LE  x y
2159
2160       MO_Eq F64 -> condFltCode EQQ x y
2161       MO_Ne F64 -> condFltCode NE  x y
2162
2163       MO_S_Gt F64 -> condFltCode GTT x y
2164       MO_S_Ge F64 -> condFltCode GE  x y
2165       MO_S_Lt F64 -> condFltCode LTT x y
2166       MO_S_Le F64 -> condFltCode LE  x y
2167
2168       MO_Eq rep -> condIntCode EQQ  (extendUExpr rep x) (extendUExpr rep y)
2169       MO_Ne rep -> condIntCode NE   (extendUExpr rep x) (extendUExpr rep y)
2170
2171       MO_S_Gt rep -> condIntCode GTT  (extendSExpr rep x) (extendSExpr rep y)
2172       MO_S_Ge rep -> condIntCode GE   (extendSExpr rep x) (extendSExpr rep y)
2173       MO_S_Lt rep -> condIntCode LTT  (extendSExpr rep x) (extendSExpr rep y)
2174       MO_S_Le rep -> condIntCode LE   (extendSExpr rep x) (extendSExpr rep y)
2175
2176       MO_U_Gt rep -> condIntCode GU   (extendUExpr rep x) (extendUExpr rep y)
2177       MO_U_Ge rep -> condIntCode GEU  (extendUExpr rep x) (extendUExpr rep y)
2178       MO_U_Lt rep -> condIntCode LU   (extendUExpr rep x) (extendUExpr rep y)
2179       MO_U_Le rep -> condIntCode LEU  (extendUExpr rep x) (extendUExpr rep y)
2180
2181       other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
2182
2183 getCondCode other =  panic "getCondCode(2)(powerpc)"
2184
2185
2186 #endif
2187
2188
2189 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
2190 -- passed back up the tree.
2191
2192 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
2193
2194 #if alpha_TARGET_ARCH
2195 condIntCode = panic "MachCode.condIntCode: not on Alphas"
2196 condFltCode = panic "MachCode.condFltCode: not on Alphas"
2197 #endif /* alpha_TARGET_ARCH */
2198
2199 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2200 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2201
2202 -- memory vs immediate
2203 condIntCode cond (CmmLoad x pk) (CmmLit lit) | not (is64BitLit lit) = do
2204     Amode x_addr x_code <- getAmode x
2205     let
2206         imm  = litToImm lit
2207         code = x_code `snocOL`
2208                   CMP pk (OpImm imm) (OpAddr x_addr)
2209     --
2210     return (CondCode False cond code)
2211
2212 -- anything vs zero
2213 condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
2214     (x_reg, x_code) <- getSomeReg x
2215     let
2216         code = x_code `snocOL`
2217                   TEST pk (OpReg x_reg) (OpReg x_reg)
2218     --
2219     return (CondCode False cond code)
2220
2221 -- anything vs operand
2222 condIntCode cond x y | isOperand y = do
2223     (x_reg, x_code) <- getNonClobberedReg x
2224     (y_op,  y_code) <- getOperand y
2225     let
2226         code = x_code `appOL` y_code `snocOL`
2227                   CMP (cmmExprRep x) y_op (OpReg x_reg)
2228     -- in
2229     return (CondCode False cond code)
2230
2231 -- anything vs anything
2232 condIntCode cond x y = do
2233   (y_reg, y_code) <- getNonClobberedReg y
2234   (x_op, x_code) <- getRegOrMem x
2235   let
2236         code = y_code `appOL`
2237                x_code `snocOL`
2238                   CMP (cmmExprRep x) (OpReg y_reg) x_op
2239   -- in
2240   return (CondCode False cond code)
2241 #endif
2242
2243 #if i386_TARGET_ARCH
2244 condFltCode cond x y
2245   = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
2246   (x_reg, x_code) <- getNonClobberedReg x
2247   (y_reg, y_code) <- getSomeReg y
2248   let
2249         code = x_code `appOL` y_code `snocOL`
2250                 GCMP cond x_reg y_reg
2251   -- The GCMP insn does the test and sets the zero flag if comparable
2252   -- and true.  Hence we always supply EQQ as the condition to test.
2253   return (CondCode True EQQ code)
2254 #endif /* i386_TARGET_ARCH */
2255
2256 #if x86_64_TARGET_ARCH
2257 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
2258 -- an operand, but the right must be a reg.  We can probably do better
2259 -- than this general case...
2260 condFltCode cond x y = do
2261   (x_reg, x_code) <- getNonClobberedReg x
2262   (y_op, y_code) <- getOperand y
2263   let
2264         code = x_code `appOL`
2265                y_code `snocOL`
2266                   CMP (cmmExprRep x) y_op (OpReg x_reg)
2267         -- NB(1): we need to use the unsigned comparison operators on the
2268         -- result of this comparison.
2269   -- in
2270   return (CondCode True (condToUnsigned cond) code)
2271 #endif
2272
2273 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2274
2275 #if sparc_TARGET_ARCH
2276
2277 condIntCode cond x (CmmLit (CmmInt y rep))
2278   | fits13Bits y
2279   = do
2280        (src1, code) <- getSomeReg x
2281        let
2282            src2 = ImmInt (fromInteger y)
2283            code' = code `snocOL` SUB False True src1 (RIImm src2) g0
2284        return (CondCode False cond code')
2285
2286 condIntCode cond x y = do
2287     (src1, code1) <- getSomeReg x
2288     (src2, code2) <- getSomeReg y
2289     let
2290         code__2 = code1 `appOL` code2 `snocOL`
2291                   SUB False True src1 (RIReg src2) g0
2292     return (CondCode False cond code__2)
2293
2294 -----------
2295 condFltCode cond x y = do
2296     (src1, code1) <- getSomeReg x
2297     (src2, code2) <- getSomeReg y
2298     tmp <- getNewRegNat F64
2299     let
2300         promote x = FxTOy F32 F64 x tmp
2301
2302         pk1   = cmmExprRep x
2303         pk2   = cmmExprRep y
2304
2305         code__2 =
2306                 if pk1 == pk2 then
2307                     code1 `appOL` code2 `snocOL`
2308                     FCMP True pk1 src1 src2
2309                 else if pk1 == F32 then
2310                     code1 `snocOL` promote src1 `appOL` code2 `snocOL`
2311                     FCMP True F64 tmp src2
2312                 else
2313                     code1 `appOL` code2 `snocOL` promote src2 `snocOL`
2314                     FCMP True F64 src1 tmp
2315     return (CondCode True cond code__2)
2316
2317 #endif /* sparc_TARGET_ARCH */
2318
2319 #if powerpc_TARGET_ARCH
2320 --  ###FIXME: I16 and I8!
2321 condIntCode cond x (CmmLit (CmmInt y rep))
2322   | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
2323   = do
2324         (src1, code) <- getSomeReg x
2325         let
2326             code' = code `snocOL`
2327                 (if condUnsigned cond then CMPL else CMP) I32 src1 (RIImm src2)
2328         return (CondCode False cond code')
2329
2330 condIntCode cond x y = do
2331     (src1, code1) <- getSomeReg x
2332     (src2, code2) <- getSomeReg y
2333     let
2334         code' = code1 `appOL` code2 `snocOL`
2335                   (if condUnsigned cond then CMPL else CMP) I32 src1 (RIReg src2)
2336     return (CondCode False cond code')
2337
2338 condFltCode cond x y = do
2339     (src1, code1) <- getSomeReg x
2340     (src2, code2) <- getSomeReg y
2341     let
2342         code'  = code1 `appOL` code2 `snocOL` FCMP src1 src2
2343         code'' = case cond of -- twiddle CR to handle unordered case
2344                     GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
2345                     LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
2346                     _ -> code'
2347                  where
2348                     ltbit = 0 ; eqbit = 2 ; gtbit = 1
2349     return (CondCode True cond code'')
2350
2351 #endif /* powerpc_TARGET_ARCH */
2352
2353 -- -----------------------------------------------------------------------------
2354 -- Generating assignments
2355
2356 -- Assignments are really at the heart of the whole code generation
2357 -- business.  Almost all top-level nodes of any real importance are
2358 -- assignments, which correspond to loads, stores, or register
2359 -- transfers.  If we're really lucky, some of the register transfers
2360 -- will go away, because we can use the destination register to
2361 -- complete the code generation for the right hand side.  This only
2362 -- fails when the right hand side is forced into a fixed register
2363 -- (e.g. the result of a call).
2364
2365 assignMem_IntCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2366 assignReg_IntCode :: MachRep -> CmmReg  -> CmmExpr -> NatM InstrBlock
2367
2368 assignMem_FltCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2369 assignReg_FltCode :: MachRep -> CmmReg  -> CmmExpr -> NatM InstrBlock
2370
2371 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2372
2373 #if alpha_TARGET_ARCH
2374
2375 assignIntCode pk (CmmLoad dst _) src
2376   = getNewRegNat IntRep             `thenNat` \ tmp ->
2377     getAmode dst                    `thenNat` \ amode ->
2378     getRegister src                 `thenNat` \ register ->
2379     let
2380         code1   = amodeCode amode []
2381         dst__2  = amodeAddr amode
2382         code2   = registerCode register tmp []
2383         src__2  = registerName register tmp
2384         sz      = primRepToSize pk
2385         code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2386     in
2387     return code__2
2388
2389 assignIntCode pk dst src
2390   = getRegister dst                         `thenNat` \ register1 ->
2391     getRegister src                         `thenNat` \ register2 ->
2392     let
2393         dst__2  = registerName register1 zeroh
2394         code    = registerCode register2 dst__2
2395         src__2  = registerName register2 dst__2
2396         code__2 = if isFixed register2
2397                   then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2)
2398                   else code
2399     in
2400     return code__2
2401
2402 #endif /* alpha_TARGET_ARCH */
2403
2404 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2405
2406 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2407
2408 -- integer assignment to memory
2409
2410 -- specific case of adding/subtracting an integer to a particular address.
2411 -- ToDo: catch other cases where we can use an operation directly on a memory
2412 -- address.
2413 assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _,
2414                                                  CmmLit (CmmInt i _)])
2415    | addr == addr2, pk /= I64 || not (is64BitInteger i),
2416      Just instr <- check op
2417    = do Amode amode code_addr <- getAmode addr
2418         let code = code_addr `snocOL`
2419                    instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode)
2420         return code
2421    where
2422         check (MO_Add _) = Just ADD
2423         check (MO_Sub _) = Just SUB
2424         check _ = Nothing
2425         -- ToDo: more?
2426
2427 -- general case
2428 assignMem_IntCode pk addr src = do
2429     Amode addr code_addr <- getAmode addr
2430     (code_src, op_src)   <- get_op_RI src
2431     let
2432         code = code_src `appOL`
2433                code_addr `snocOL`
2434                   MOV pk op_src (OpAddr addr)
2435         -- NOTE: op_src is stable, so it will still be valid
2436         -- after code_addr.  This may involve the introduction
2437         -- of an extra MOV to a temporary register, but we hope
2438         -- the register allocator will get rid of it.
2439     --
2440     return code
2441   where
2442     get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand)   -- code, operator
2443     get_op_RI (CmmLit lit) | not (is64BitLit lit)
2444       = return (nilOL, OpImm (litToImm lit))
2445     get_op_RI op
2446       = do (reg,code) <- getNonClobberedReg op
2447            return (code, OpReg reg)
2448
2449
2450 -- Assign; dst is a reg, rhs is mem
2451 assignReg_IntCode pk reg (CmmLoad src _) = do
2452   load_code <- intLoadCode (MOV pk) src
2453   return (load_code (getRegisterReg reg))
2454
2455 -- dst is a reg, but src could be anything
2456 assignReg_IntCode pk reg src = do
2457   code <- getAnyReg src
2458   return (code (getRegisterReg reg))
2459
2460 #endif /* i386_TARGET_ARCH */
2461
2462 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2463
2464 #if sparc_TARGET_ARCH
2465
2466 assignMem_IntCode pk addr src = do
2467     (srcReg, code) <- getSomeReg src
2468     Amode dstAddr addr_code <- getAmode addr
2469     return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2470
2471 assignReg_IntCode pk reg src = do
2472     r <- getRegister src
2473     return $ case r of
2474         Any _ code         -> code dst
2475         Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg dst) freg
2476     where
2477       dst = getRegisterReg reg
2478
2479
2480 #endif /* sparc_TARGET_ARCH */
2481
2482 #if powerpc_TARGET_ARCH
2483
2484 assignMem_IntCode pk addr src = do
2485     (srcReg, code) <- getSomeReg src
2486     Amode dstAddr addr_code <- getAmode addr
2487     return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2488
2489 -- dst is a reg, but src could be anything
2490 assignReg_IntCode pk reg src
2491     = do
2492         r <- getRegister src
2493         return $ case r of
2494             Any _ code         -> code dst
2495             Fixed _ freg fcode -> fcode `snocOL` MR dst freg
2496     where
2497         dst = getRegisterReg reg
2498
2499 #endif /* powerpc_TARGET_ARCH */
2500
2501
2502 -- -----------------------------------------------------------------------------
2503 -- Floating-point assignments
2504
2505 #if alpha_TARGET_ARCH
2506
2507 assignFltCode pk (CmmLoad dst _) src
2508   = getNewRegNat pk                 `thenNat` \ tmp ->
2509     getAmode dst                    `thenNat` \ amode ->
2510     getRegister src                         `thenNat` \ register ->
2511     let
2512         code1   = amodeCode amode []
2513         dst__2  = amodeAddr amode
2514         code2   = registerCode register tmp []
2515         src__2  = registerName register tmp
2516         sz      = primRepToSize pk
2517         code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2518     in
2519     return code__2
2520
2521 assignFltCode pk dst src
2522   = getRegister dst                         `thenNat` \ register1 ->
2523     getRegister src                         `thenNat` \ register2 ->
2524     let
2525         dst__2  = registerName register1 zeroh
2526         code    = registerCode register2 dst__2
2527         src__2  = registerName register2 dst__2
2528         code__2 = if isFixed register2
2529                   then code . mkSeqInstr (FMOV src__2 dst__2)
2530                   else code
2531     in
2532     return code__2
2533
2534 #endif /* alpha_TARGET_ARCH */
2535
2536 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2537
2538 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2539
2540 -- Floating point assignment to memory
2541 assignMem_FltCode pk addr src = do
2542   (src_reg, src_code) <- getNonClobberedReg src
2543   Amode addr addr_code <- getAmode addr
2544   let
2545         code = src_code `appOL`
2546                addr_code `snocOL`
2547                 IF_ARCH_i386(GST pk src_reg addr,
2548                              MOV pk (OpReg src_reg) (OpAddr addr))
2549   return code
2550
2551 -- Floating point assignment to a register/temporary
2552 assignReg_FltCode pk reg src = do
2553   src_code <- getAnyReg src
2554   return (src_code (getRegisterReg reg))
2555
2556 #endif /* i386_TARGET_ARCH */
2557
2558 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2559
2560 #if sparc_TARGET_ARCH
2561
2562 -- Floating point assignment to memory
2563 assignMem_FltCode pk addr src = do
2564     Amode dst__2 code1 <- getAmode addr
2565     (src__2, code2) <- getSomeReg src
2566     tmp1 <- getNewRegNat pk
2567     let
2568         pk__2   = cmmExprRep src
2569         code__2 = code1 `appOL` code2 `appOL`
2570             if   pk == pk__2
2571             then unitOL (ST pk src__2 dst__2)
2572             else toOL [FxTOy pk__2 pk src__2 tmp1, ST pk tmp1 dst__2]
2573     return code__2
2574
2575 -- Floating point assignment to a register/temporary
2576 -- ToDo: Verify correctness
2577 assignReg_FltCode pk reg src = do
2578     r <- getRegister src
2579     v1 <- getNewRegNat pk
2580     return $ case r of
2581         Any _ code         -> code dst
2582         Fixed _ freg fcode -> fcode `snocOL` FMOV pk freg v1
2583     where
2584       dst = getRegisterReg reg
2585
2586 #endif /* sparc_TARGET_ARCH */
2587
2588 #if powerpc_TARGET_ARCH
2589
2590 -- Easy, isn't it?
2591 assignMem_FltCode = assignMem_IntCode
2592 assignReg_FltCode = assignReg_IntCode
2593
2594 #endif /* powerpc_TARGET_ARCH */
2595
2596
2597 -- -----------------------------------------------------------------------------
2598 -- Generating an non-local jump
2599
2600 -- (If applicable) Do not fill the delay slots here; you will confuse the
2601 -- register allocator.
2602
2603 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
2604
2605 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2606
2607 #if alpha_TARGET_ARCH
2608
2609 genJump (CmmLabel lbl)
2610   | isAsmTemp lbl = returnInstr (BR target)
2611   | otherwise     = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0]
2612   where
2613     target = ImmCLbl lbl
2614
2615 genJump tree
2616   = getRegister tree                `thenNat` \ register ->
2617     getNewRegNat PtrRep             `thenNat` \ tmp ->
2618     let
2619         dst    = registerName register pv
2620         code   = registerCode register pv
2621         target = registerName register pv
2622     in
2623     if isFixed register then
2624         returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0]
2625     else
2626     return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0))
2627
2628 #endif /* alpha_TARGET_ARCH */
2629
2630 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2631
2632 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2633
2634 genJump (CmmLoad mem pk) = do
2635   Amode target code <- getAmode mem
2636   return (code `snocOL` JMP (OpAddr target))
2637
2638 genJump (CmmLit lit) = do
2639   return (unitOL (JMP (OpImm (litToImm lit))))
2640
2641 genJump expr = do
2642   (reg,code) <- getSomeReg expr
2643   return (code `snocOL` JMP (OpReg reg))
2644
2645 #endif /* i386_TARGET_ARCH */
2646
2647 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2648
2649 #if sparc_TARGET_ARCH
2650
2651 genJump (CmmLit (CmmLabel lbl))
2652   = return (toOL [CALL (Left target) 0 True, NOP])
2653   where
2654     target = ImmCLbl lbl
2655
2656 genJump tree
2657   = do
2658         (target, code) <- getSomeReg tree
2659         return (code `snocOL` JMP (AddrRegReg target g0)  `snocOL` NOP)
2660
2661 #endif /* sparc_TARGET_ARCH */
2662
2663 #if powerpc_TARGET_ARCH
2664 genJump (CmmLit (CmmLabel lbl))
2665   = return (unitOL $ JMP lbl)
2666
2667 genJump tree
2668   = do
2669         (target,code) <- getSomeReg tree
2670         return (code `snocOL` MTCTR target `snocOL` BCTR [])
2671 #endif /* powerpc_TARGET_ARCH */
2672
2673
2674 -- -----------------------------------------------------------------------------
2675 --  Unconditional branches
2676
2677 genBranch :: BlockId -> NatM InstrBlock
2678
2679 genBranch = return . toOL . mkBranchInstr
2680
2681 -- -----------------------------------------------------------------------------
2682 --  Conditional jumps
2683
2684 {-
2685 Conditional jumps are always to local labels, so we can use branch
2686 instructions.  We peek at the arguments to decide what kind of
2687 comparison to do.
2688
2689 ALPHA: For comparisons with 0, we're laughing, because we can just do
2690 the desired conditional branch.
2691
2692 I386: First, we have to ensure that the condition
2693 codes are set according to the supplied comparison operation.
2694
2695 SPARC: First, we have to ensure that the condition codes are set
2696 according to the supplied comparison operation.  We generate slightly
2697 different code for floating point comparisons, because a floating
2698 point operation cannot directly precede a @BF@.  We assume the worst
2699 and fill that slot with a @NOP@.
2700
2701 SPARC: Do not fill the delay slots here; you will confuse the register
2702 allocator.
2703 -}
2704
2705
2706 genCondJump
2707     :: BlockId      -- the branch target
2708     -> CmmExpr      -- the condition on which to branch
2709     -> NatM InstrBlock
2710
2711 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2712
2713 #if alpha_TARGET_ARCH
2714
2715 genCondJump id (StPrim op [x, StInt 0])
2716   = getRegister x                           `thenNat` \ register ->
2717     getNewRegNat (registerRep register)
2718                                     `thenNat` \ tmp ->
2719     let
2720         code   = registerCode register tmp
2721         value  = registerName register tmp
2722         pk     = registerRep register
2723         target = ImmCLbl lbl
2724     in
2725     returnSeq code [BI (cmpOp op) value target]
2726   where
2727     cmpOp CharGtOp = GTT
2728     cmpOp CharGeOp = GE
2729     cmpOp CharEqOp = EQQ
2730     cmpOp CharNeOp = NE
2731     cmpOp CharLtOp = LTT
2732     cmpOp CharLeOp = LE
2733     cmpOp IntGtOp = GTT
2734     cmpOp IntGeOp = GE
2735     cmpOp IntEqOp = EQQ
2736     cmpOp IntNeOp = NE
2737     cmpOp IntLtOp = LTT
2738     cmpOp IntLeOp = LE
2739     cmpOp WordGtOp = NE
2740     cmpOp WordGeOp = ALWAYS
2741     cmpOp WordEqOp = EQQ
2742     cmpOp WordNeOp = NE
2743     cmpOp WordLtOp = NEVER
2744     cmpOp WordLeOp = EQQ
2745     cmpOp AddrGtOp = NE
2746     cmpOp AddrGeOp = ALWAYS
2747     cmpOp AddrEqOp = EQQ
2748     cmpOp AddrNeOp = NE
2749     cmpOp AddrLtOp = NEVER
2750     cmpOp AddrLeOp = EQQ
2751
2752 genCondJump lbl (StPrim op [x, StDouble 0.0])
2753   = getRegister x                           `thenNat` \ register ->
2754     getNewRegNat (registerRep register)
2755                                     `thenNat` \ tmp ->
2756     let
2757         code   = registerCode register tmp
2758         value  = registerName register tmp
2759         pk     = registerRep register
2760         target = ImmCLbl lbl
2761     in
2762     return (code . mkSeqInstr (BF (cmpOp op) value target))
2763   where
2764     cmpOp FloatGtOp = GTT
2765     cmpOp FloatGeOp = GE
2766     cmpOp FloatEqOp = EQQ
2767     cmpOp FloatNeOp = NE
2768     cmpOp FloatLtOp = LTT
2769     cmpOp FloatLeOp = LE
2770     cmpOp DoubleGtOp = GTT
2771     cmpOp DoubleGeOp = GE
2772     cmpOp DoubleEqOp = EQQ
2773     cmpOp DoubleNeOp = NE
2774     cmpOp DoubleLtOp = LTT
2775     cmpOp DoubleLeOp = LE
2776
2777 genCondJump lbl (StPrim op [x, y])
2778   | fltCmpOp op
2779   = trivialFCode pr instr x y       `thenNat` \ register ->
2780     getNewRegNat F64                `thenNat` \ tmp ->
2781     let
2782         code   = registerCode register tmp
2783         result = registerName register tmp
2784         target = ImmCLbl lbl
2785     in
2786     return (code . mkSeqInstr (BF cond result target))
2787   where
2788     pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
2789
2790     fltCmpOp op = case op of
2791         FloatGtOp -> True
2792         FloatGeOp -> True
2793         FloatEqOp -> True
2794         FloatNeOp -> True
2795         FloatLtOp -> True
2796         FloatLeOp -> True
2797         DoubleGtOp -> True
2798         DoubleGeOp -> True
2799         DoubleEqOp -> True
2800         DoubleNeOp -> True
2801         DoubleLtOp -> True
2802         DoubleLeOp -> True
2803         _ -> False
2804     (instr, cond) = case op of
2805         FloatGtOp -> (FCMP TF LE, EQQ)
2806         FloatGeOp -> (FCMP TF LTT, EQQ)
2807         FloatEqOp -> (FCMP TF EQQ, NE)
2808         FloatNeOp -> (FCMP TF EQQ, EQQ)
2809         FloatLtOp -> (FCMP TF LTT, NE)
2810         FloatLeOp -> (FCMP TF LE, NE)
2811         DoubleGtOp -> (FCMP TF LE, EQQ)
2812         DoubleGeOp -> (FCMP TF LTT, EQQ)
2813         DoubleEqOp -> (FCMP TF EQQ, NE)
2814         DoubleNeOp -> (FCMP TF EQQ, EQQ)
2815         DoubleLtOp -> (FCMP TF LTT, NE)
2816         DoubleLeOp -> (FCMP TF LE, NE)
2817
2818 genCondJump lbl (StPrim op [x, y])
2819   = trivialCode instr x y           `thenNat` \ register ->
2820     getNewRegNat IntRep             `thenNat` \ tmp ->
2821     let
2822         code   = registerCode register tmp
2823         result = registerName register tmp
2824         target = ImmCLbl lbl
2825     in
2826     return (code . mkSeqInstr (BI cond result target))
2827   where
2828     (instr, cond) = case op of
2829         CharGtOp -> (CMP LE, EQQ)
2830         CharGeOp -> (CMP LTT, EQQ)
2831         CharEqOp -> (CMP EQQ, NE)
2832         CharNeOp -> (CMP EQQ, EQQ)
2833         CharLtOp -> (CMP LTT, NE)
2834         CharLeOp -> (CMP LE, NE)
2835         IntGtOp -> (CMP LE, EQQ)
2836         IntGeOp -> (CMP LTT, EQQ)
2837         IntEqOp -> (CMP EQQ, NE)
2838         IntNeOp -> (CMP EQQ, EQQ)
2839         IntLtOp -> (CMP LTT, NE)
2840         IntLeOp -> (CMP LE, NE)
2841         WordGtOp -> (CMP ULE, EQQ)
2842         WordGeOp -> (CMP ULT, EQQ)
2843         WordEqOp -> (CMP EQQ, NE)
2844         WordNeOp -> (CMP EQQ, EQQ)
2845         WordLtOp -> (CMP ULT, NE)
2846         WordLeOp -> (CMP ULE, NE)
2847         AddrGtOp -> (CMP ULE, EQQ)
2848         AddrGeOp -> (CMP ULT, EQQ)
2849         AddrEqOp -> (CMP EQQ, NE)
2850         AddrNeOp -> (CMP EQQ, EQQ)
2851         AddrLtOp -> (CMP ULT, NE)
2852         AddrLeOp -> (CMP ULE, NE)
2853
2854 #endif /* alpha_TARGET_ARCH */
2855
2856 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2857
2858 #if i386_TARGET_ARCH
2859
2860 genCondJump id bool = do
2861   CondCode _ cond code <- getCondCode bool
2862   return (code `snocOL` JXX cond id)
2863
2864 #endif
2865
2866 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2867
2868 #if x86_64_TARGET_ARCH
2869
2870 genCondJump id bool = do
2871   CondCode is_float cond cond_code <- getCondCode bool
2872   if not is_float
2873     then
2874         return (cond_code `snocOL` JXX cond id)
2875     else do
2876         lbl <- getBlockIdNat
2877
2878         -- see comment with condFltReg
2879         let code = case cond of
2880                         NE  -> or_unordered
2881                         GU  -> plain_test
2882                         GEU -> plain_test
2883                         _   -> and_ordered
2884
2885             plain_test = unitOL (
2886                   JXX cond id
2887                 )
2888             or_unordered = toOL [
2889                   JXX cond id,
2890                   JXX PARITY id
2891                 ]
2892             and_ordered = toOL [
2893                   JXX PARITY lbl,
2894                   JXX cond id,
2895                   JXX ALWAYS lbl,
2896                   NEWBLOCK lbl
2897                 ]
2898         return (cond_code `appOL` code)
2899
2900 #endif
2901
2902 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2903
2904 #if sparc_TARGET_ARCH
2905
2906 genCondJump (BlockId id) bool = do
2907   CondCode is_float cond code <- getCondCode bool
2908   return (
2909        code `appOL`
2910        toOL (
2911          if   is_float
2912          then [NOP, BF cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2913          else [BI cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2914        )
2915     )
2916
2917 #endif /* sparc_TARGET_ARCH */
2918
2919
2920 #if powerpc_TARGET_ARCH
2921
2922 genCondJump id bool = do
2923   CondCode is_float cond code <- getCondCode bool
2924   return (code `snocOL` BCC cond id)
2925
2926 #endif /* powerpc_TARGET_ARCH */
2927
2928
2929 -- -----------------------------------------------------------------------------
2930 --  Generating C calls
2931
2932 -- Now the biggest nightmare---calls.  Most of the nastiness is buried in
2933 -- @get_arg@, which moves the arguments to the correct registers/stack
2934 -- locations.  Apart from that, the code is easy.
2935 --
2936 -- (If applicable) Do not fill the delay slots here; you will confuse the
2937 -- register allocator.
2938
2939 genCCall
2940     :: CmmCallTarget            -- function to call
2941     -> [(CmmReg,MachHint)]      -- where to put the result
2942     -> [(CmmExpr,MachHint)]     -- arguments (of mixed type)
2943     -> Maybe [GlobalReg]        -- volatile regs to save
2944     -> NatM InstrBlock
2945
2946 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2947
2948 #if alpha_TARGET_ARCH
2949
2950 ccallResultRegs =
2951
2952 genCCall fn cconv result_regs args
2953   = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args
2954                           `thenNat` \ ((unused,_), argCode) ->
2955     let
2956         nRegs = length allArgRegs - length unused
2957         code = asmSeqThen (map ($ []) argCode)
2958     in
2959         returnSeq code [
2960             LDA pv (AddrImm (ImmLab (ptext fn))),
2961             JSR ra (AddrReg pv) nRegs,
2962             LDGP gp (AddrReg ra)]
2963   where
2964     ------------------------
2965     {-  Try to get a value into a specific register (or registers) for
2966         a call.  The first 6 arguments go into the appropriate
2967         argument register (separate registers for integer and floating
2968         point arguments, but used in lock-step), and the remaining
2969         arguments are dumped to the stack, beginning at 0(sp).  Our
2970         first argument is a pair of the list of remaining argument
2971         registers to be assigned for this call and the next stack
2972         offset to use for overflowing arguments.  This way,
2973         @get_Arg@ can be applied to all of a call's arguments using
2974         @mapAccumLNat@.
2975     -}
2976     get_arg
2977         :: ([(Reg,Reg)], Int)   -- Argument registers and stack offset (accumulator)
2978         -> StixTree             -- Current argument
2979         -> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code
2980
2981     -- We have to use up all of our argument registers first...
2982
2983     get_arg ((iDst,fDst):dsts, offset) arg
2984       = getRegister arg                     `thenNat` \ register ->
2985         let
2986             reg  = if isFloatingRep pk then fDst else iDst
2987             code = registerCode register reg
2988             src  = registerName register reg
2989             pk   = registerRep register
2990         in
2991         return (
2992             if isFloatingRep pk then
2993                 ((dsts, offset), if isFixed register then
2994                     code . mkSeqInstr (FMOV src fDst)
2995                     else code)
2996             else
2997                 ((dsts, offset), if isFixed register then
2998                     code . mkSeqInstr (OR src (RIReg src) iDst)
2999                     else code))
3000
3001     -- Once we have run out of argument registers, we move to the
3002     -- stack...
3003
3004     get_arg ([], offset) arg
3005       = getRegister arg                 `thenNat` \ register ->
3006         getNewRegNat (registerRep register)
3007                                         `thenNat` \ tmp ->
3008         let
3009             code = registerCode register tmp
3010             src  = registerName register tmp
3011             pk   = registerRep register
3012             sz   = primRepToSize pk
3013         in
3014         return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset)))
3015
3016 #endif /* alpha_TARGET_ARCH */
3017
3018 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3019
3020 #if i386_TARGET_ARCH
3021
3022 genCCall (CmmPrim MO_WriteBarrier) _ _ _ = return nilOL
3023         -- write barrier compiles to no code on x86/x86-64;
3024         -- we keep it this long in order to prevent earlier optimisations.
3025
3026 -- we only cope with a single result for foreign calls
3027 genCCall (CmmPrim op) [(r,_)] args vols = do
3028   case op of
3029         MO_F32_Sqrt -> actuallyInlineFloatOp F32  (GSQRT F32) args
3030         MO_F64_Sqrt -> actuallyInlineFloatOp F64 (GSQRT F64) args
3031
3032         MO_F32_Sin  -> actuallyInlineFloatOp F32  (GSIN F32) args
3033         MO_F64_Sin  -> actuallyInlineFloatOp F64 (GSIN F64) args
3034
3035         MO_F32_Cos  -> actuallyInlineFloatOp F32  (GCOS F32) args
3036         MO_F64_Cos  -> actuallyInlineFloatOp F64 (GCOS F64) args
3037
3038         MO_F32_Tan  -> actuallyInlineFloatOp F32  (GTAN F32) args
3039         MO_F64_Tan  -> actuallyInlineFloatOp F64 (GTAN F64) args
3040
3041         other_op    -> outOfLineFloatOp op r args vols
3042  where
3043   actuallyInlineFloatOp rep instr [(x,_)]
3044         = do res <- trivialUFCode rep instr x
3045              any <- anyReg res
3046              return (any (getRegisterReg r))
3047
3048 genCCall target dest_regs args vols = do
3049     let
3050         sizes               = map (arg_size . cmmExprRep . fst) (reverse args)
3051 #if !darwin_TARGET_OS
3052         tot_arg_size        = sum sizes
3053 #else
3054         raw_arg_size        = sum sizes
3055         tot_arg_size        = roundTo 16 raw_arg_size
3056         arg_pad_size        = tot_arg_size - raw_arg_size
3057     delta0 <- getDeltaNat
3058     setDeltaNat (delta0 - arg_pad_size)
3059 #endif
3060
3061     push_codes <- mapM push_arg (reverse args)
3062     delta <- getDeltaNat
3063
3064     -- in
3065     -- deal with static vs dynamic call targets
3066     (callinsns,cconv) <-
3067       case target of
3068         -- CmmPrim -> ...
3069         CmmForeignCall (CmmLit (CmmLabel lbl)) conv
3070            -> -- ToDo: stdcall arg sizes
3071               return (unitOL (CALL (Left fn_imm) []), conv)
3072            where fn_imm = ImmCLbl lbl
3073         CmmForeignCall expr conv
3074            -> do (dyn_c, dyn_r, dyn_rep) <- get_op expr
3075                  ASSERT(dyn_rep == I32)
3076                   return (dyn_c `snocOL` CALL (Right dyn_r) [], conv)
3077
3078     let push_code
3079 #if darwin_TARGET_OS
3080             | arg_pad_size /= 0
3081             = toOL [SUB I32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
3082                     DELTA (delta0 - arg_pad_size)]
3083               `appOL` concatOL push_codes
3084             | otherwise
3085 #endif
3086             = concatOL push_codes
3087         call = callinsns `appOL`
3088                toOL (
3089                         -- Deallocate parameters after call for ccall;
3090                         -- but not for stdcall (callee does it)
3091                   (if cconv == StdCallConv || tot_arg_size==0 then [] else
3092                    [ADD I32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
3093                   ++
3094                   [DELTA (delta + tot_arg_size)]
3095                )
3096     -- in
3097     setDeltaNat (delta + tot_arg_size)
3098
3099     let
3100         -- assign the results, if necessary
3101         assign_code []     = nilOL
3102         assign_code [(dest,_hint)] =
3103           case rep of
3104                 I64 -> toOL [MOV I32 (OpReg eax) (OpReg r_dest),
3105                              MOV I32 (OpReg edx) (OpReg r_dest_hi)]
3106                 F32 -> unitOL (GMOV fake0 r_dest)
3107                 F64 -> unitOL (GMOV fake0 r_dest)
3108                 rep -> unitOL (MOV rep (OpReg eax) (OpReg r_dest))
3109           where
3110                 r_dest_hi = getHiVRegFromLo r_dest
3111                 rep = cmmRegRep dest
3112                 r_dest = getRegisterReg dest
3113         assign_code many = panic "genCCall.assign_code many"
3114
3115     return (push_code `appOL`
3116             call `appOL`
3117             assign_code dest_regs)
3118
3119   where
3120     arg_size F64 = 8
3121     arg_size F32 = 4
3122     arg_size I64 = 8
3123     arg_size _   = 4
3124
3125     roundTo a x | x `mod` a == 0 = x
3126                 | otherwise = x + a - (x `mod` a)
3127
3128
3129     push_arg :: (CmmExpr,MachHint){-current argument-}
3130                     -> NatM InstrBlock  -- code
3131
3132     push_arg (arg,_hint) -- we don't need the hints on x86
3133       | arg_rep == I64 = do
3134         ChildCode64 code r_lo <- iselExpr64 arg
3135         delta <- getDeltaNat
3136         setDeltaNat (delta - 8)
3137         let
3138             r_hi = getHiVRegFromLo r_lo
3139         -- in
3140         return (       code `appOL`
3141                        toOL [PUSH I32 (OpReg r_hi), DELTA (delta - 4),
3142                              PUSH I32 (OpReg r_lo), DELTA (delta - 8),
3143                              DELTA (delta-8)]
3144             )
3145
3146       | otherwise = do
3147         (code, reg, sz) <- get_op arg
3148         delta <- getDeltaNat
3149         let size = arg_size sz
3150         setDeltaNat (delta-size)
3151         if (case sz of F64 -> True; F32 -> True; _ -> False)
3152            then return (code `appOL`
3153                         toOL [SUB I32 (OpImm (ImmInt size)) (OpReg esp),
3154                               DELTA (delta-size),
3155                               GST sz reg (AddrBaseIndex (EABaseReg esp)
3156                                                         EAIndexNone
3157                                                         (ImmInt 0))]
3158                        )
3159            else return (code `snocOL`
3160                         PUSH I32 (OpReg reg) `snocOL`
3161                         DELTA (delta-size)
3162                        )
3163       where
3164          arg_rep = cmmExprRep arg
3165
3166     ------------
3167     get_op :: CmmExpr -> NatM (InstrBlock, Reg, MachRep) -- code, reg, size
3168     get_op op = do
3169         (reg,code) <- getSomeReg op
3170         return (code, reg, cmmExprRep op)
3171
3172 #endif /* i386_TARGET_ARCH */
3173
3174 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3175
3176 outOfLineFloatOp :: CallishMachOp -> CmmReg -> [(CmmExpr,MachHint)]
3177   -> Maybe [GlobalReg] -> NatM InstrBlock
3178 outOfLineFloatOp mop res args vols
3179   = do
3180       targetExpr <- cmmMakeDynamicReference addImportNat CallReference lbl
3181       let target = CmmForeignCall targetExpr CCallConv
3182
3183       if cmmRegRep res == F64
3184         then
3185           stmtToInstrs (CmmCall target [(res,FloatHint)] args vols)
3186         else do
3187           uq <- getUniqueNat
3188           let
3189             tmp = CmmLocal (LocalReg uq F64)
3190           -- in
3191           code1 <- stmtToInstrs (CmmCall target [(tmp,FloatHint)] args vols)
3192           code2 <- stmtToInstrs (CmmAssign res (CmmReg tmp))
3193           return (code1 `appOL` code2)
3194   where
3195         lbl = mkForeignLabel fn Nothing False
3196
3197         fn = case mop of
3198               MO_F32_Sqrt  -> FSLIT("sqrtf")
3199               MO_F32_Sin   -> FSLIT("sinf")
3200               MO_F32_Cos   -> FSLIT("cosf")
3201               MO_F32_Tan   -> FSLIT("tanf")
3202               MO_F32_Exp   -> FSLIT("expf")
3203               MO_F32_Log   -> FSLIT("logf")
3204
3205               MO_F32_Asin  -> FSLIT("asinf")
3206               MO_F32_Acos  -> FSLIT("acosf")
3207               MO_F32_Atan  -> FSLIT("atanf")
3208
3209               MO_F32_Sinh  -> FSLIT("sinhf")
3210               MO_F32_Cosh  -> FSLIT("coshf")
3211               MO_F32_Tanh  -> FSLIT("tanhf")
3212               MO_F32_Pwr   -> FSLIT("powf")
3213
3214               MO_F64_Sqrt  -> FSLIT("sqrt")
3215               MO_F64_Sin   -> FSLIT("sin")
3216               MO_F64_Cos   -> FSLIT("cos")
3217               MO_F64_Tan   -> FSLIT("tan")
3218               MO_F64_Exp   -> FSLIT("exp")
3219               MO_F64_Log   -> FSLIT("log")
3220
3221               MO_F64_Asin  -> FSLIT("asin")
3222               MO_F64_Acos  -> FSLIT("acos")
3223               MO_F64_Atan  -> FSLIT("atan")
3224
3225               MO_F64_Sinh  -> FSLIT("sinh")
3226               MO_F64_Cosh  -> FSLIT("cosh")
3227               MO_F64_Tanh  -> FSLIT("tanh")
3228               MO_F64_Pwr   -> FSLIT("pow")
3229
3230 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
3231
3232 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3233
3234 #if x86_64_TARGET_ARCH
3235
3236 genCCall (CmmPrim MO_WriteBarrier) _ _ _ = return nilOL
3237         -- write barrier compiles to no code on x86/x86-64;
3238         -- we keep it this long in order to prevent earlier optimisations.
3239
3240 genCCall (CmmPrim op) [(r,_)] args vols =
3241   outOfLineFloatOp op r args vols
3242
3243 genCCall target dest_regs args vols = do
3244
3245         -- load up the register arguments
3246     (stack_args, aregs, fregs, load_args_code)
3247          <- load_args args allArgRegs allFPArgRegs nilOL
3248
3249     let
3250         fp_regs_used  = reverse (drop (length fregs) (reverse allFPArgRegs))
3251         int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
3252         arg_regs = [eax] ++ int_regs_used ++ fp_regs_used
3253                 -- for annotating the call instruction with
3254
3255         sse_regs = length fp_regs_used
3256
3257         tot_arg_size = arg_size * length stack_args
3258
3259         -- On entry to the called function, %rsp should be aligned
3260         -- on a 16-byte boundary +8 (i.e. the first stack arg after
3261         -- the return address is 16-byte aligned).  In STG land
3262         -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
3263         -- need to make sure we push a multiple of 16-bytes of args,
3264         -- plus the return address, to get the correct alignment.
3265         -- Urg, this is hard.  We need to feed the delta back into
3266         -- the arg pushing code.
3267     (real_size, adjust_rsp) <-
3268         if tot_arg_size `rem` 16 == 0
3269             then return (tot_arg_size, nilOL)
3270             else do -- we need to adjust...
3271                 delta <- getDeltaNat
3272                 setDeltaNat (delta-8)
3273                 return (tot_arg_size+8, toOL [
3274                                 SUB I64 (OpImm (ImmInt 8)) (OpReg rsp),
3275                                 DELTA (delta-8)
3276                         ])
3277
3278         -- push the stack args, right to left
3279     push_code <- push_args (reverse stack_args) nilOL
3280     delta <- getDeltaNat
3281
3282     -- deal with static vs dynamic call targets
3283     (callinsns,cconv) <-
3284       case target of
3285         -- CmmPrim -> ...
3286         CmmForeignCall (CmmLit (CmmLabel lbl)) conv
3287            -> -- ToDo: stdcall arg sizes
3288               return (unitOL (CALL (Left fn_imm) arg_regs), conv)
3289            where fn_imm = ImmCLbl lbl
3290         CmmForeignCall expr conv
3291            -> do (dyn_r, dyn_c) <- getSomeReg expr
3292                  return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
3293
3294     let
3295         -- The x86_64 ABI requires us to set %al to the number of SSE
3296         -- registers that contain arguments, if the called routine
3297         -- is a varargs function.  We don't know whether it's a
3298         -- varargs function or not, so we have to assume it is.
3299         --
3300         -- It's not safe to omit this assignment, even if the number
3301         -- of SSE regs in use is zero.  If %al is larger than 8
3302         -- on entry to a varargs function, seg faults ensue.
3303         assign_eax n = unitOL (MOV I32 (OpImm (ImmInt n)) (OpReg eax))
3304
3305     let call = callinsns `appOL`
3306                toOL (
3307                         -- Deallocate parameters after call for ccall;
3308                         -- but not for stdcall (callee does it)
3309                   (if cconv == StdCallConv || real_size==0 then [] else
3310                    [ADD wordRep (OpImm (ImmInt real_size)) (OpReg esp)])
3311                   ++
3312                   [DELTA (delta + real_size)]
3313                )
3314     -- in
3315     setDeltaNat (delta + real_size)
3316
3317     let
3318         -- assign the results, if necessary
3319         assign_code []     = nilOL
3320         assign_code [(dest,_hint)] =
3321           case rep of
3322                 F32 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3323                 F64 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3324                 rep -> unitOL (MOV rep (OpReg rax) (OpReg r_dest))
3325           where
3326                 rep = cmmRegRep dest
3327                 r_dest = getRegisterReg dest
3328         assign_code many = panic "genCCall.assign_code many"
3329
3330     return (load_args_code      `appOL`
3331             adjust_rsp          `appOL`
3332             push_code           `appOL`
3333             assign_eax sse_regs `appOL`
3334             call                `appOL`
3335             assign_code dest_regs)
3336
3337   where
3338     arg_size = 8 -- always, at the mo
3339
3340     load_args :: [(CmmExpr,MachHint)]
3341               -> [Reg]                  -- int regs avail for args
3342               -> [Reg]                  -- FP regs avail for args
3343               -> InstrBlock
3344               -> NatM ([(CmmExpr,MachHint)],[Reg],[Reg],InstrBlock)
3345     load_args args [] [] code     =  return (args, [], [], code)
3346         -- no more regs to use
3347     load_args [] aregs fregs code =  return ([], aregs, fregs, code)
3348         -- no more args to push
3349     load_args ((arg,hint) : rest) aregs fregs code
3350         | isFloatingRep arg_rep =
3351         case fregs of
3352           [] -> push_this_arg
3353           (r:rs) -> do
3354              arg_code <- getAnyReg arg
3355              load_args rest aregs rs (code `appOL` arg_code r)
3356         | otherwise =
3357         case aregs of
3358           [] -> push_this_arg
3359           (r:rs) -> do
3360              arg_code <- getAnyReg arg
3361              load_args rest rs fregs (code `appOL` arg_code r)
3362         where
3363           arg_rep = cmmExprRep arg
3364
3365           push_this_arg = do
3366             (args',ars,frs,code') <- load_args rest aregs fregs code
3367             return ((arg,hint):args', ars, frs, code')
3368
3369     push_args [] code = return code
3370     push_args ((arg,hint):rest) code
3371        | isFloatingRep arg_rep = do
3372          (arg_reg, arg_code) <- getSomeReg arg
3373          delta <- getDeltaNat
3374          setDeltaNat (delta-arg_size)
3375          let code' = code `appOL` arg_code `appOL` toOL [
3376                         SUB wordRep (OpImm (ImmInt arg_size)) (OpReg rsp) ,
3377                         DELTA (delta-arg_size),
3378                         MOV arg_rep (OpReg arg_reg) (OpAddr  (spRel 0))]
3379          push_args rest code'
3380
3381        | otherwise = do
3382        -- we only ever generate word-sized function arguments.  Promotion
3383        -- has already happened: our Int8# type is kept sign-extended
3384        -- in an Int#, for example.
3385          ASSERT(arg_rep == I64) return ()
3386          (arg_op, arg_code) <- getOperand arg
3387          delta <- getDeltaNat
3388          setDeltaNat (delta-arg_size)
3389          let code' = code `appOL` toOL [PUSH I64 arg_op,
3390                                         DELTA (delta-arg_size)]
3391          push_args rest code'
3392         where
3393           arg_rep = cmmExprRep arg
3394 #endif
3395
3396 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3397
3398 #if sparc_TARGET_ARCH
3399 {-
3400    The SPARC calling convention is an absolute
3401    nightmare.  The first 6x32 bits of arguments are mapped into
3402    %o0 through %o5, and the remaining arguments are dumped to the
3403    stack, beginning at [%sp+92].  (Note that %o6 == %sp.)
3404
3405    If we have to put args on the stack, move %o6==%sp down by
3406    the number of words to go on the stack, to ensure there's enough space.
3407
3408    According to Fraser and Hanson's lcc book, page 478, fig 17.2,
3409    16 words above the stack pointer is a word for the address of
3410    a structure return value.  I use this as a temporary location
3411    for moving values from float to int regs.  Certainly it isn't
3412    safe to put anything in the 16 words starting at %sp, since
3413    this area can get trashed at any time due to window overflows
3414    caused by signal handlers.
3415
3416    A final complication (if the above isn't enough) is that
3417    we can't blithely calculate the arguments one by one into
3418    %o0 .. %o5.  Consider the following nested calls:
3419
3420        fff a (fff b c)
3421
3422    Naive code moves a into %o0, and (fff b c) into %o1.  Unfortunately
3423    the inner call will itself use %o0, which trashes the value put there
3424    in preparation for the outer call.  Upshot: we need to calculate the
3425    args into temporary regs, and move those to arg regs or onto the
3426    stack only immediately prior to the call proper.  Sigh.
3427 -}
3428
3429 genCCall target dest_regs argsAndHints vols = do
3430     let
3431         args = map fst argsAndHints
3432     argcode_and_vregs <- mapM arg_to_int_vregs args
3433     let
3434         (argcodes, vregss) = unzip argcode_and_vregs
3435         n_argRegs          = length allArgRegs
3436         n_argRegs_used     = min (length vregs) n_argRegs
3437         vregs              = concat vregss
3438     -- deal with static vs dynamic call targets
3439     callinsns <- (case target of
3440         CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> do
3441                 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3442         CmmForeignCall expr conv -> do
3443                 (dyn_c, [dyn_r]) <- arg_to_int_vregs expr
3444                 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3445         CmmPrim mop -> do
3446                   (res, reduce) <- outOfLineFloatOp mop
3447                   lblOrMopExpr <- case res of
3448                        Left lbl -> do
3449                             return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3450                        Right mopExpr -> do
3451                             (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
3452                             return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3453                   if reduce then panic "genCCall(sparc): can not reduce" else return lblOrMopExpr
3454
3455       )
3456     let
3457         argcode = concatOL argcodes
3458         (move_sp_down, move_sp_up)
3459            = let diff = length vregs - n_argRegs
3460                  nn   = if odd diff then diff + 1 else diff -- keep 8-byte alignment
3461              in  if   nn <= 0
3462                  then (nilOL, nilOL)
3463                  else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
3464         transfer_code
3465            = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
3466     return (argcode       `appOL`
3467             move_sp_down  `appOL`
3468             transfer_code `appOL`
3469             callinsns     `appOL`
3470             unitOL NOP    `appOL`
3471             move_sp_up)
3472   where
3473      -- move args from the integer vregs into which they have been
3474      -- marshalled, into %o0 .. %o5, and the rest onto the stack.
3475      move_final :: [Reg] -> [Reg] -> Int -> [Instr]
3476
3477      move_final [] _ offset          -- all args done
3478         = []
3479
3480      move_final (v:vs) [] offset     -- out of aregs; move to stack
3481         = ST I32 v (spRel offset)
3482           : move_final vs [] (offset+1)
3483
3484      move_final (v:vs) (a:az) offset -- move into an arg (%o[0..5]) reg
3485         = OR False g0 (RIReg v) a
3486           : move_final vs az offset
3487
3488      -- generate code to calculate an argument, and move it into one
3489      -- or two integer vregs.
3490      arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
3491      arg_to_int_vregs arg
3492         | (cmmExprRep arg) == I64
3493         = do
3494           (ChildCode64 code r_lo) <- iselExpr64 arg
3495           let
3496               r_hi = getHiVRegFromLo r_lo
3497           return (code, [r_hi, r_lo])
3498         | otherwise
3499         = do
3500           (src, code) <- getSomeReg arg
3501           tmp <- getNewRegNat (cmmExprRep arg)
3502           let
3503               pk   = cmmExprRep arg
3504           case pk of
3505              F64 -> do
3506                       v1 <- getNewRegNat I32
3507                       v2 <- getNewRegNat I32
3508                       return (
3509                         code                          `snocOL`
3510                         FMOV F64 src f0                `snocOL`
3511                         ST   F32  f0 (spRel 16)         `snocOL`
3512                         LD   I32  (spRel 16) v1         `snocOL`
3513                         ST   F32  (fPair f0) (spRel 16) `snocOL`
3514                         LD   I32  (spRel 16) v2
3515                         ,
3516                         [v1,v2]
3517                        )
3518              F32 -> do
3519                       v1 <- getNewRegNat I32
3520                       return (
3521                         code                    `snocOL`
3522                         ST   F32  src (spRel 16)  `snocOL`
3523                         LD   I32  (spRel 16) v1
3524                         ,
3525                         [v1]
3526                        )
3527              other -> do
3528                         v1 <- getNewRegNat I32
3529                         return (
3530                           code `snocOL` OR False g0 (RIReg src) v1
3531                           ,
3532                           [v1]
3533                          )
3534 outOfLineFloatOp mop =
3535     do
3536       mopExpr <- cmmMakeDynamicReference addImportNat CallReference $
3537                   mkForeignLabel functionName Nothing True
3538       let mopLabelOrExpr = case mopExpr of
3539                         CmmLit (CmmLabel lbl) -> Left lbl
3540                         _ -> Right mopExpr
3541       return (mopLabelOrExpr, reduce)
3542             where
3543                 (reduce, functionName) = case mop of
3544                   MO_F32_Exp    -> (True,  FSLIT("exp"))
3545                   MO_F32_Log    -> (True,  FSLIT("log"))
3546                   MO_F32_Sqrt   -> (True,  FSLIT("sqrt"))
3547
3548                   MO_F32_Sin    -> (True,  FSLIT("sin"))
3549                   MO_F32_Cos    -> (True,  FSLIT("cos"))
3550                   MO_F32_Tan    -> (True,  FSLIT("tan"))
3551
3552                   MO_F32_Asin   -> (True,  FSLIT("asin"))
3553                   MO_F32_Acos   -> (True,  FSLIT("acos"))
3554                   MO_F32_Atan   -> (True,  FSLIT("atan"))
3555
3556                   MO_F32_Sinh   -> (True,  FSLIT("sinh"))
3557                   MO_F32_Cosh   -> (True,  FSLIT("cosh"))
3558                   MO_F32_Tanh   -> (True,  FSLIT("tanh"))
3559
3560                   MO_F64_Exp    -> (False, FSLIT("exp"))
3561                   MO_F64_Log    -> (False, FSLIT("log"))
3562                   MO_F64_Sqrt   -> (False, FSLIT("sqrt"))
3563
3564                   MO_F64_Sin    -> (False, FSLIT("sin"))
3565                   MO_F64_Cos    -> (False, FSLIT("cos"))
3566                   MO_F64_Tan    -> (False, FSLIT("tan"))
3567
3568                   MO_F64_Asin   -> (False, FSLIT("asin"))
3569                   MO_F64_Acos   -> (False, FSLIT("acos"))
3570                   MO_F64_Atan   -> (False, FSLIT("atan"))
3571
3572                   MO_F64_Sinh   -> (False, FSLIT("sinh"))
3573                   MO_F64_Cosh   -> (False, FSLIT("cosh"))
3574                   MO_F64_Tanh   -> (False, FSLIT("tanh"))
3575
3576                   other -> pprPanic "outOfLineFloatOp(sparc) "
3577                                 (pprCallishMachOp mop)
3578
3579 #endif /* sparc_TARGET_ARCH */
3580
3581 #if powerpc_TARGET_ARCH
3582
3583 #if darwin_TARGET_OS || linux_TARGET_OS
3584 {-
3585     The PowerPC calling convention for Darwin/Mac OS X
3586     is described in Apple's document
3587     "Inside Mac OS X - Mach-O Runtime Architecture".
3588
3589     PowerPC Linux uses the System V Release 4 Calling Convention
3590     for PowerPC. It is described in the
3591     "System V Application Binary Interface PowerPC Processor Supplement".
3592
3593     Both conventions are similar:
3594     Parameters may be passed in general-purpose registers starting at r3, in
3595     floating point registers starting at f1, or on the stack.
3596
3597     But there are substantial differences:
3598     * The number of registers used for parameter passing and the exact set of
3599       nonvolatile registers differs (see MachRegs.lhs).
3600     * On Darwin, stack space is always reserved for parameters, even if they are
3601       passed in registers. The called routine may choose to save parameters from
3602       registers to the corresponding space on the stack.
3603     * On Darwin, a corresponding amount of GPRs is skipped when a floating point
3604       parameter is passed in an FPR.
3605     * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
3606       starting with an odd-numbered GPR. It may skip a GPR to achieve this.
3607       Darwin just treats an I64 like two separate I32s (high word first).
3608     * I64 and F64 arguments are 8-byte aligned on the stack for SysV, but only
3609       4-byte aligned like everything else on Darwin.
3610     * The SysV spec claims that F32 is represented as F64 on the stack. GCC on
3611       PowerPC Linux does not agree, so neither do we.
3612
3613     According to both conventions, The parameter area should be part of the
3614     caller's stack frame, allocated in the caller's prologue code (large enough
3615     to hold the parameter lists for all called routines). The NCG already
3616     uses the stack for register spilling, leaving 64 bytes free at the top.
3617     If we need a larger parameter area than that, we just allocate a new stack
3618     frame just before ccalling.
3619 -}
3620
3621
3622 genCCall (CmmPrim MO_WriteBarrier) _ _ _
3623  = return $ unitOL LWSYNC
3624
3625 genCCall target dest_regs argsAndHints vols
3626   = ASSERT (not $ any (`elem` [I8,I16]) argReps)
3627         -- we rely on argument promotion in the codeGen
3628     do
3629         (finalStack,passArgumentsCode,usedRegs) <- passArguments
3630                                                         (zip args argReps)
3631                                                         allArgRegs allFPArgRegs
3632                                                         initialStackOffset
3633                                                         (toOL []) []
3634
3635         (labelOrExpr, reduceToF32) <- case target of
3636             CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
3637             CmmForeignCall expr conv -> return  (Right expr, False)
3638             CmmPrim mop -> outOfLineFloatOp mop
3639
3640         let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
3641             codeAfter = move_sp_up finalStack `appOL` moveResult reduceToF32
3642
3643         case labelOrExpr of
3644             Left lbl -> do
3645                 return (         codeBefore
3646                         `snocOL` BL lbl usedRegs
3647                         `appOL`  codeAfter)
3648             Right dyn -> do
3649                 (dynReg, dynCode) <- getSomeReg dyn
3650                 return (         dynCode
3651                         `snocOL` MTCTR dynReg
3652                         `appOL`  codeBefore
3653                         `snocOL` BCTRL usedRegs
3654                         `appOL`  codeAfter)
3655     where
3656 #if darwin_TARGET_OS
3657         initialStackOffset = 24
3658             -- size of linkage area + size of arguments, in bytes
3659         stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
3660                                        map machRepByteWidth argReps
3661 #elif linux_TARGET_OS
3662         initialStackOffset = 8
3663         stackDelta finalStack = roundTo 16 finalStack
3664 #endif
3665         args = map fst argsAndHints
3666         argReps = map cmmExprRep args
3667
3668         roundTo a x | x `mod` a == 0 = x
3669                     | otherwise = x + a - (x `mod` a)
3670
3671         move_sp_down finalStack
3672                | delta > 64 =
3673                         toOL [STU I32 sp (AddrRegImm sp (ImmInt (-delta))),
3674                               DELTA (-delta)]
3675                | otherwise = nilOL
3676                where delta = stackDelta finalStack
3677         move_sp_up finalStack
3678                | delta > 64 =
3679                         toOL [ADD sp sp (RIImm (ImmInt delta)),
3680                               DELTA 0]
3681                | otherwise = nilOL
3682                where delta = stackDelta finalStack
3683
3684
3685         passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
3686         passArguments ((arg,I64):args) gprs fprs stackOffset
3687                accumCode accumUsed =
3688             do
3689                 ChildCode64 code vr_lo <- iselExpr64 arg
3690                 let vr_hi = getHiVRegFromLo vr_lo
3691
3692 #if darwin_TARGET_OS
3693                 passArguments args
3694                               (drop 2 gprs)
3695                               fprs
3696                               (stackOffset+8)
3697                               (accumCode `appOL` code
3698                                     `snocOL` storeWord vr_hi gprs stackOffset
3699                                     `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
3700                               ((take 2 gprs) ++ accumUsed)
3701             where
3702                 storeWord vr (gpr:_) offset = MR gpr vr
3703                 storeWord vr [] offset = ST I32 vr (AddrRegImm sp (ImmInt offset))
3704
3705 #elif linux_TARGET_OS
3706                 let stackOffset' = roundTo 8 stackOffset
3707                     stackCode = accumCode `appOL` code
3708                         `snocOL` ST I32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
3709                         `snocOL` ST I32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
3710                     regCode hireg loreg =
3711                         accumCode `appOL` code
3712                             `snocOL` MR hireg vr_hi
3713                             `snocOL` MR loreg vr_lo
3714
3715                 case gprs of
3716                     hireg : loreg : regs | even (length gprs) ->
3717                         passArguments args regs fprs stackOffset
3718                                       (regCode hireg loreg) (hireg : loreg : accumUsed)
3719                     _skipped : hireg : loreg : regs ->
3720                         passArguments args regs fprs stackOffset
3721                                       (regCode hireg loreg) (hireg : loreg : accumUsed)
3722                     _ -> -- only one or no regs left
3723                         passArguments args [] fprs (stackOffset'+8)
3724                                       stackCode accumUsed
3725 #endif
3726
3727         passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
3728             | reg : _ <- regs = do
3729                 register <- getRegister arg
3730                 let code = case register of
3731                             Fixed _ freg fcode -> fcode `snocOL` MR reg freg
3732                             Any _ acode -> acode reg
3733                 passArguments args
3734                               (drop nGprs gprs)
3735                               (drop nFprs fprs)
3736 #if darwin_TARGET_OS
3737         -- The Darwin ABI requires that we reserve stack slots for register parameters
3738                               (stackOffset + stackBytes)
3739 #elif linux_TARGET_OS
3740         -- ... the SysV ABI doesn't.
3741                               stackOffset
3742 #endif
3743                               (accumCode `appOL` code)
3744                               (reg : accumUsed)
3745             | otherwise = do
3746                 (vr, code) <- getSomeReg arg
3747                 passArguments args
3748                               (drop nGprs gprs)
3749                               (drop nFprs fprs)
3750                               (stackOffset' + stackBytes)
3751                               (accumCode `appOL` code `snocOL` ST rep vr stackSlot)
3752                               accumUsed
3753             where
3754 #if darwin_TARGET_OS
3755         -- stackOffset is at least 4-byte aligned
3756         -- The Darwin ABI is happy with that.
3757                 stackOffset' = stackOffset
3758 #else
3759         -- ... the SysV ABI requires 8-byte alignment for doubles.
3760                 stackOffset' | rep == F64 = roundTo 8 stackOffset
3761                              | otherwise  =           stackOffset
3762 #endif
3763                 stackSlot = AddrRegImm sp (ImmInt stackOffset')
3764                 (nGprs, nFprs, stackBytes, regs) = case rep of
3765                     I32 -> (1, 0, 4, gprs)
3766 #if darwin_TARGET_OS
3767         -- The Darwin ABI requires that we skip a corresponding number of GPRs when
3768         -- we use the FPRs.
3769                     F32 -> (1, 1, 4, fprs)
3770                     F64 -> (2, 1, 8, fprs)
3771 #elif linux_TARGET_OS
3772         -- ... the SysV ABI doesn't.
3773                     F32 -> (0, 1, 4, fprs)
3774                     F64 -> (0, 1, 8, fprs)
3775 #endif
3776
3777         moveResult reduceToF32 =
3778             case dest_regs of
3779                 [] -> nilOL
3780                 [(dest, _hint)]
3781                     | reduceToF32 && rep == F32 -> unitOL (FRSP r_dest f1)
3782                     | rep == F32 || rep == F64 -> unitOL (MR r_dest f1)
3783                     | rep == I64 -> toOL [MR (getHiVRegFromLo r_dest) r3,
3784                                           MR r_dest r4]
3785                     | otherwise -> unitOL (MR r_dest r3)
3786                     where rep = cmmRegRep dest
3787                           r_dest = getRegisterReg dest
3788
3789         outOfLineFloatOp mop =
3790             do
3791                 mopExpr <- cmmMakeDynamicReference addImportNat CallReference $
3792                               mkForeignLabel functionName Nothing True
3793                 let mopLabelOrExpr = case mopExpr of
3794                         CmmLit (CmmLabel lbl) -> Left lbl
3795                         _ -> Right mopExpr
3796                 return (mopLabelOrExpr, reduce)
3797             where
3798                 (functionName, reduce) = case mop of
3799                     MO_F32_Exp   -> (FSLIT("exp"), True)
3800                     MO_F32_Log   -> (FSLIT("log"), True)
3801                     MO_F32_Sqrt  -> (FSLIT("sqrt"), True)
3802
3803                     MO_F32_Sin   -> (FSLIT("sin"), True)
3804                     MO_F32_Cos   -> (FSLIT("cos"), True)
3805                     MO_F32_Tan   -> (FSLIT("tan"), True)
3806
3807                     MO_F32_Asin  -> (FSLIT("asin"), True)
3808                     MO_F32_Acos  -> (FSLIT("acos"), True)
3809                     MO_F32_Atan  -> (FSLIT("atan"), True)
3810
3811                     MO_F32_Sinh  -> (FSLIT("sinh"), True)
3812                     MO_F32_Cosh  -> (FSLIT("cosh"), True)
3813                     MO_F32_Tanh  -> (FSLIT("tanh"), True)
3814                     MO_F32_Pwr   -> (FSLIT("pow"), True)
3815
3816                     MO_F64_Exp   -> (FSLIT("exp"), False)
3817                     MO_F64_Log   -> (FSLIT("log"), False)
3818                     MO_F64_Sqrt  -> (FSLIT("sqrt"), False)
3819
3820                     MO_F64_Sin   -> (FSLIT("sin"), False)
3821                     MO_F64_Cos   -> (FSLIT("cos"), False)
3822                     MO_F64_Tan   -> (FSLIT("tan"), False)
3823
3824                     MO_F64_Asin  -> (FSLIT("asin"), False)
3825                     MO_F64_Acos  -> (FSLIT("acos"), False)
3826                     MO_F64_Atan  -> (FSLIT("atan"), False)
3827
3828                     MO_F64_Sinh  -> (FSLIT("sinh"), False)
3829                     MO_F64_Cosh  -> (FSLIT("cosh"), False)
3830                     MO_F64_Tanh  -> (FSLIT("tanh"), False)
3831                     MO_F64_Pwr   -> (FSLIT("pow"), False)
3832                     other -> pprPanic "genCCall(ppc): unknown callish op"
3833                                     (pprCallishMachOp other)
3834
3835 #endif /* darwin_TARGET_OS || linux_TARGET_OS */
3836
3837 #endif /* powerpc_TARGET_ARCH */
3838
3839
3840 -- -----------------------------------------------------------------------------
3841 -- Generating a table-branch
3842
3843 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
3844
3845 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3846 genSwitch expr ids
3847   | opt_PIC
3848   = do
3849         (reg,e_code) <- getSomeReg expr
3850         lbl <- getNewLabelNat
3851         dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
3852         (tableReg,t_code) <- getSomeReg $ dynRef
3853         let
3854             jumpTable = map jumpTableEntryRel ids
3855
3856             jumpTableEntryRel Nothing
3857                 = CmmStaticLit (CmmInt 0 wordRep)
3858             jumpTableEntryRel (Just (BlockId id))
3859                 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3860                 where blockLabel = mkAsmTempLabel id
3861
3862             op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
3863                                        (EAIndex reg wORD_SIZE) (ImmInt 0))
3864
3865 #if x86_64_TARGET_ARCH && darwin_TARGET_OS
3866     -- on Mac OS X/x86_64, put the jump table in the text section
3867     -- to work around a limitation of the linker.
3868     -- ld64 is unable to handle the relocations for
3869     --     .quad L1 - L0
3870     -- if L0 is not preceded by a non-anonymous label in its section.
3871
3872             code = e_code `appOL` t_code `appOL` toOL [
3873                             ADD wordRep op (OpReg tableReg),
3874                             JMP_TBL (OpReg tableReg) [ id | Just id <- ids ],
3875                             LDATA Text (CmmDataLabel lbl : jumpTable)
3876                     ]
3877 #else
3878             code = e_code `appOL` t_code `appOL` toOL [
3879                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3880                             ADD wordRep op (OpReg tableReg),
3881                             JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
3882                     ]
3883 #endif
3884         return code
3885   | otherwise
3886   = do
3887         (reg,e_code) <- getSomeReg expr
3888         lbl <- getNewLabelNat
3889         let
3890             jumpTable = map jumpTableEntry ids
3891             op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
3892             code = e_code `appOL` toOL [
3893                     LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3894                     JMP_TBL op [ id | Just id <- ids ]
3895                  ]
3896         -- in
3897         return code
3898 #elif powerpc_TARGET_ARCH
3899 genSwitch expr ids
3900   | opt_PIC
3901   = do
3902         (reg,e_code) <- getSomeReg expr
3903         tmp <- getNewRegNat I32
3904         lbl <- getNewLabelNat
3905         dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
3906         (tableReg,t_code) <- getSomeReg $ dynRef
3907         let
3908             jumpTable = map jumpTableEntryRel ids
3909
3910             jumpTableEntryRel Nothing
3911                 = CmmStaticLit (CmmInt 0 wordRep)
3912             jumpTableEntryRel (Just (BlockId id))
3913                 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3914                 where blockLabel = mkAsmTempLabel id
3915
3916             code = e_code `appOL` t_code `appOL` toOL [
3917                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3918                             SLW tmp reg (RIImm (ImmInt 2)),
3919                             LD I32 tmp (AddrRegReg tableReg tmp),
3920                             ADD tmp tmp (RIReg tableReg),
3921                             MTCTR tmp,
3922                             BCTR [ id | Just id <- ids ]
3923                     ]
3924         return code
3925   | otherwise
3926   = do
3927         (reg,e_code) <- getSomeReg expr
3928         tmp <- getNewRegNat I32
3929         lbl <- getNewLabelNat
3930         let
3931             jumpTable = map jumpTableEntry ids
3932
3933             code = e_code `appOL` toOL [
3934                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3935                             SLW tmp reg (RIImm (ImmInt 2)),
3936                             ADDIS tmp tmp (HA (ImmCLbl lbl)),
3937                             LD I32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
3938                             MTCTR tmp,
3939                             BCTR [ id | Just id <- ids ]
3940                     ]
3941         return code
3942 #else
3943 genSwitch expr ids = panic "ToDo: genSwitch"
3944 #endif
3945
3946 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordRep)
3947 jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
3948     where blockLabel = mkAsmTempLabel id
3949
3950 -- -----------------------------------------------------------------------------
3951 -- Support bits
3952 -- -----------------------------------------------------------------------------
3953
3954
3955 -- -----------------------------------------------------------------------------
3956 -- 'condIntReg' and 'condFltReg': condition codes into registers
3957
3958 -- Turn those condition codes into integers now (when they appear on
3959 -- the right hand side of an assignment).
3960 --
3961 -- (If applicable) Do not fill the delay slots here; you will confuse the
3962 -- register allocator.
3963
3964 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
3965
3966 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3967
3968 #if alpha_TARGET_ARCH
3969 condIntReg = panic "MachCode.condIntReg (not on Alpha)"
3970 condFltReg = panic "MachCode.condFltReg (not on Alpha)"
3971 #endif /* alpha_TARGET_ARCH */
3972
3973 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3974
3975 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3976
3977 condIntReg cond x y = do
3978   CondCode _ cond cond_code <- condIntCode cond x y
3979   tmp <- getNewRegNat I8
3980   let
3981         code dst = cond_code `appOL` toOL [
3982                     SETCC cond (OpReg tmp),
3983                     MOVZxL I8 (OpReg tmp) (OpReg dst)
3984                   ]
3985   -- in
3986   return (Any I32 code)
3987
3988 #endif
3989
3990 #if i386_TARGET_ARCH
3991
3992 condFltReg cond x y = do
3993   CondCode _ cond cond_code <- condFltCode cond x y
3994   tmp <- getNewRegNat I8
3995   let
3996         code dst = cond_code `appOL` toOL [
3997                     SETCC cond (OpReg tmp),
3998                     MOVZxL I8 (OpReg tmp) (OpReg dst)
3999                   ]
4000   -- in
4001   return (Any I32 code)
4002
4003 #endif
4004
4005 #if x86_64_TARGET_ARCH
4006
4007 condFltReg cond x y = do
4008   CondCode _ cond cond_code <- condFltCode cond x y
4009   tmp1 <- getNewRegNat wordRep
4010   tmp2 <- getNewRegNat wordRep
4011   let
4012         -- We have to worry about unordered operands (eg. comparisons
4013         -- against NaN).  If the operands are unordered, the comparison
4014         -- sets the parity flag, carry flag and zero flag.
4015         -- All comparisons are supposed to return false for unordered
4016         -- operands except for !=, which returns true.
4017         --
4018         -- Optimisation: we don't have to test the parity flag if we
4019         -- know the test has already excluded the unordered case: eg >
4020         -- and >= test for a zero carry flag, which can only occur for
4021         -- ordered operands.
4022         --
4023         -- ToDo: by reversing comparisons we could avoid testing the
4024         -- parity flag in more cases.
4025
4026         code dst =
4027            cond_code `appOL`
4028              (case cond of
4029                 NE  -> or_unordered dst
4030                 GU  -> plain_test   dst
4031                 GEU -> plain_test   dst
4032                 _   -> and_ordered  dst)
4033
4034         plain_test dst = toOL [
4035                     SETCC cond (OpReg tmp1),
4036                     MOVZxL I8 (OpReg tmp1) (OpReg dst)
4037                  ]
4038         or_unordered dst = toOL [
4039                     SETCC cond (OpReg tmp1),
4040                     SETCC PARITY (OpReg tmp2),
4041                     OR I8 (OpReg tmp1) (OpReg tmp2),
4042                     MOVZxL I8 (OpReg tmp2) (OpReg dst)
4043                   ]
4044         and_ordered dst = toOL [
4045                     SETCC cond (OpReg tmp1),
4046                     SETCC NOTPARITY (OpReg tmp2),
4047                     AND I8 (OpReg tmp1) (OpReg tmp2),
4048                     MOVZxL I8 (OpReg tmp2) (OpReg dst)
4049                   ]
4050   -- in
4051   return (Any I32 code)
4052
4053 #endif
4054
4055 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4056
4057 #if sparc_TARGET_ARCH
4058
4059 condIntReg EQQ x (CmmLit (CmmInt 0 d)) = do
4060     (src, code) <- getSomeReg x
4061     tmp <- getNewRegNat I32
4062     let
4063         code__2 dst = code `appOL` toOL [
4064             SUB False True g0 (RIReg src) g0,
4065             SUB True False g0 (RIImm (ImmInt (-1))) dst]
4066     return (Any I32 code__2)
4067
4068 condIntReg EQQ x y = do
4069     (src1, code1) <- getSomeReg x
4070     (src2, code2) <- getSomeReg y
4071     tmp1 <- getNewRegNat I32
4072     tmp2 <- getNewRegNat I32
4073     let
4074         code__2 dst = code1 `appOL` code2 `appOL` toOL [
4075             XOR False src1 (RIReg src2) dst,
4076             SUB False True g0 (RIReg dst) g0,
4077             SUB True False g0 (RIImm (ImmInt (-1))) dst]
4078     return (Any I32 code__2)
4079
4080 condIntReg NE x (CmmLit (CmmInt 0 d)) = do
4081     (src, code) <- getSomeReg x
4082     tmp <- getNewRegNat I32
4083     let
4084         code__2 dst = code `appOL` toOL [
4085             SUB False True g0 (RIReg src) g0,
4086             ADD True False g0 (RIImm (ImmInt 0)) dst]
4087     return (Any I32 code__2)
4088
4089 condIntReg NE x y = do
4090     (src1, code1) <- getSomeReg x
4091     (src2, code2) <- getSomeReg y
4092     tmp1 <- getNewRegNat I32
4093     tmp2 <- getNewRegNat I32
4094     let
4095         code__2 dst = code1 `appOL` code2 `appOL` toOL [
4096             XOR False src1 (RIReg src2) dst,
4097             SUB False True g0 (RIReg dst) g0,
4098             ADD True False g0 (RIImm (ImmInt 0)) dst]
4099     return (Any I32 code__2)
4100
4101 condIntReg cond x y = do
4102     BlockId lbl1 <- getBlockIdNat
4103     BlockId lbl2 <- getBlockIdNat
4104     CondCode _ cond cond_code <- condIntCode cond x y
4105     let
4106         code__2 dst = cond_code `appOL` toOL [
4107             BI cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
4108             OR False g0 (RIImm (ImmInt 0)) dst,
4109             BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
4110             NEWBLOCK (BlockId lbl1),
4111             OR False g0 (RIImm (ImmInt 1)) dst,
4112             NEWBLOCK (BlockId lbl2)]
4113     return (Any I32 code__2)
4114
4115 condFltReg cond x y = do
4116     BlockId lbl1 <- getBlockIdNat
4117     BlockId lbl2 <- getBlockIdNat
4118     CondCode _ cond cond_code <- condFltCode cond x y
4119     let
4120         code__2 dst = cond_code `appOL` toOL [
4121             NOP,
4122             BF cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
4123             OR False g0 (RIImm (ImmInt 0)) dst,
4124             BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
4125             NEWBLOCK (BlockId lbl1),
4126             OR False g0 (RIImm (ImmInt 1)) dst,
4127             NEWBLOCK (BlockId lbl2)]
4128     return (Any I32 code__2)
4129
4130 #endif /* sparc_TARGET_ARCH */
4131
4132 #if powerpc_TARGET_ARCH
4133 condReg getCond = do
4134     lbl1 <- getBlockIdNat
4135     lbl2 <- getBlockIdNat
4136     CondCode _ cond cond_code <- getCond
4137     let
4138 {-        code dst = cond_code `appOL` toOL [
4139                 BCC cond lbl1,
4140                 LI dst (ImmInt 0),
4141                 BCC ALWAYS lbl2,
4142                 NEWBLOCK lbl1,
4143                 LI dst (ImmInt 1),
4144                 BCC ALWAYS lbl2,
4145                 NEWBLOCK lbl2
4146             ]-}
4147         code dst = cond_code
4148             `appOL` negate_code
4149             `appOL` toOL [
4150                 MFCR dst,
4151                 RLWINM dst dst (bit + 1) 31 31
4152             ]
4153
4154         negate_code | do_negate = unitOL (CRNOR bit bit bit)
4155                     | otherwise = nilOL
4156
4157         (bit, do_negate) = case cond of
4158             LTT -> (0, False)
4159             LE  -> (1, True)
4160             EQQ -> (2, False)
4161             GE  -> (0, True)
4162             GTT -> (1, False)
4163
4164             NE  -> (2, True)
4165
4166             LU  -> (0, False)
4167             LEU -> (1, True)
4168             GEU -> (0, True)
4169             GU  -> (1, False)
4170
4171     return (Any I32 code)
4172
4173 condIntReg cond x y = condReg (condIntCode cond x y)
4174 condFltReg cond x y = condReg (condFltCode cond x y)
4175 #endif /* powerpc_TARGET_ARCH */
4176
4177
4178 -- -----------------------------------------------------------------------------
4179 -- 'trivial*Code': deal with trivial instructions
4180
4181 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
4182 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
4183 -- Only look for constants on the right hand side, because that's
4184 -- where the generic optimizer will have put them.
4185
4186 -- Similarly, for unary instructions, we don't have to worry about
4187 -- matching an StInt as the argument, because genericOpt will already
4188 -- have handled the constant-folding.
4189
4190 trivialCode
4191     :: MachRep
4192     -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
4193       ,IF_ARCH_i386 ((Operand -> Operand -> Instr)
4194                      -> Maybe (Operand -> Operand -> Instr)
4195       ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr)
4196                      -> Maybe (Operand -> Operand -> Instr)
4197       ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
4198       ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
4199       ,)))))
4200     -> CmmExpr -> CmmExpr -- the two arguments
4201     -> NatM Register
4202
4203 #ifndef powerpc_TARGET_ARCH
4204 trivialFCode
4205     :: MachRep
4206     -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
4207       ,IF_ARCH_sparc((MachRep -> Reg -> Reg -> Reg -> Instr)
4208       ,IF_ARCH_i386 ((MachRep -> Reg -> Reg -> Reg -> Instr)
4209       ,IF_ARCH_x86_64 ((MachRep -> Operand -> Operand -> Instr)
4210       ,))))
4211     -> CmmExpr -> CmmExpr -- the two arguments
4212     -> NatM Register
4213 #endif
4214
4215 trivialUCode
4216     :: MachRep
4217     -> IF_ARCH_alpha((RI -> Reg -> Instr)
4218       ,IF_ARCH_i386 ((Operand -> Instr)
4219       ,IF_ARCH_x86_64 ((Operand -> Instr)
4220       ,IF_ARCH_sparc((RI -> Reg -> Instr)
4221       ,IF_ARCH_powerpc((Reg -> Reg -> Instr)
4222       ,)))))
4223     -> CmmExpr  -- the one argument
4224     -> NatM Register
4225
4226 #ifndef powerpc_TARGET_ARCH
4227 trivialUFCode
4228     :: MachRep
4229     -> IF_ARCH_alpha((Reg -> Reg -> Instr)
4230       ,IF_ARCH_i386 ((Reg -> Reg -> Instr)
4231       ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr)
4232       ,IF_ARCH_sparc((Reg -> Reg -> Instr)
4233       ,))))
4234     -> CmmExpr -- the one argument
4235     -> NatM Register
4236 #endif
4237
4238 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4239
4240 #if alpha_TARGET_ARCH
4241
4242 trivialCode instr x (StInt y)
4243   | fits8Bits y
4244   = getRegister x               `thenNat` \ register ->
4245     getNewRegNat IntRep         `thenNat` \ tmp ->
4246     let
4247         code = registerCode register tmp
4248         src1 = registerName register tmp
4249         src2 = ImmInt (fromInteger y)
4250         code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst)
4251     in
4252     return (Any IntRep code__2)
4253
4254 trivialCode instr x y
4255   = getRegister x               `thenNat` \ register1 ->
4256     getRegister y               `thenNat` \ register2 ->
4257     getNewRegNat IntRep         `thenNat` \ tmp1 ->
4258     getNewRegNat IntRep         `thenNat` \ tmp2 ->
4259     let
4260         code1 = registerCode register1 tmp1 []
4261         src1  = registerName register1 tmp1
4262         code2 = registerCode register2 tmp2 []
4263         src2  = registerName register2 tmp2
4264         code__2 dst = asmSeqThen [code1, code2] .
4265                      mkSeqInstr (instr src1 (RIReg src2) dst)
4266     in
4267     return (Any IntRep code__2)
4268
4269 ------------
4270 trivialUCode instr x
4271   = getRegister x               `thenNat` \ register ->
4272     getNewRegNat IntRep         `thenNat` \ tmp ->
4273     let
4274         code = registerCode register tmp
4275         src  = registerName register tmp
4276         code__2 dst = code . mkSeqInstr (instr (RIReg src) dst)
4277     in
4278     return (Any IntRep code__2)
4279
4280 ------------
4281 trivialFCode _ instr x y
4282   = getRegister x               `thenNat` \ register1 ->
4283     getRegister y               `thenNat` \ register2 ->
4284     getNewRegNat F64    `thenNat` \ tmp1 ->
4285     getNewRegNat F64    `thenNat` \ tmp2 ->
4286     let
4287         code1 = registerCode register1 tmp1
4288         src1  = registerName register1 tmp1
4289
4290         code2 = registerCode register2 tmp2
4291         src2  = registerName register2 tmp2
4292
4293         code__2 dst = asmSeqThen [code1 [], code2 []] .
4294                       mkSeqInstr (instr src1 src2 dst)
4295     in
4296     return (Any F64 code__2)
4297
4298 trivialUFCode _ instr x
4299   = getRegister x               `thenNat` \ register ->
4300     getNewRegNat F64    `thenNat` \ tmp ->
4301     let
4302         code = registerCode register tmp
4303         src  = registerName register tmp
4304         code__2 dst = code . mkSeqInstr (instr src dst)
4305     in
4306     return (Any F64 code__2)
4307
4308 #endif /* alpha_TARGET_ARCH */
4309
4310 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4311
4312 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4313
4314 {-
4315 The Rules of the Game are:
4316
4317 * You cannot assume anything about the destination register dst;
4318   it may be anything, including a fixed reg.
4319
4320 * You may compute an operand into a fixed reg, but you may not
4321   subsequently change the contents of that fixed reg.  If you
4322   want to do so, first copy the value either to a temporary
4323   or into dst.  You are free to modify dst even if it happens
4324   to be a fixed reg -- that's not your problem.
4325
4326 * You cannot assume that a fixed reg will stay live over an
4327   arbitrary computation.  The same applies to the dst reg.
4328
4329 * Temporary regs obtained from getNewRegNat are distinct from
4330   each other and from all other regs, and stay live over
4331   arbitrary computations.
4332
4333 --------------------
4334
4335 SDM's version of The Rules:
4336
4337 * If getRegister returns Any, that means it can generate correct
4338   code which places the result in any register, period.  Even if that
4339   register happens to be read during the computation.
4340
4341   Corollary #1: this means that if you are generating code for an
4342   operation with two arbitrary operands, you cannot assign the result
4343   of the first operand into the destination register before computing
4344   the second operand.  The second operand might require the old value
4345   of the destination register.
4346
4347   Corollary #2: A function might be able to generate more efficient
4348   code if it knows the destination register is a new temporary (and
4349   therefore not read by any of the sub-computations).
4350
4351 * If getRegister returns Any, then the code it generates may modify only:
4352         (a) fresh temporaries
4353         (b) the destination register
4354         (c) known registers (eg. %ecx is used by shifts)
4355   In particular, it may *not* modify global registers, unless the global
4356   register happens to be the destination register.
4357 -}
4358
4359 trivialCode rep instr (Just revinstr) (CmmLit lit_a) b
4360   | not (is64BitLit lit_a) = do
4361   b_code <- getAnyReg b
4362   let
4363        code dst
4364          = b_code dst `snocOL`
4365            revinstr (OpImm (litToImm lit_a)) (OpReg dst)
4366   -- in
4367   return (Any rep code)
4368
4369 trivialCode rep instr maybe_revinstr a b = genTrivialCode rep instr a b
4370
4371 -- This is re-used for floating pt instructions too.
4372 genTrivialCode rep instr a b = do
4373   (b_op, b_code) <- getNonClobberedOperand b
4374   a_code <- getAnyReg a
4375   tmp <- getNewRegNat rep
4376   let
4377      -- We want the value of b to stay alive across the computation of a.
4378      -- But, we want to calculate a straight into the destination register,
4379      -- because the instruction only has two operands (dst := dst `op` src).
4380      -- The troublesome case is when the result of b is in the same register
4381      -- as the destination reg.  In this case, we have to save b in a
4382      -- new temporary across the computation of a.
4383      code dst
4384         | dst `regClashesWithOp` b_op =
4385                 b_code `appOL`
4386                 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
4387                 a_code dst `snocOL`
4388                 instr (OpReg tmp) (OpReg dst)
4389         | otherwise =
4390                 b_code `appOL`
4391                 a_code dst `snocOL`
4392                 instr b_op (OpReg dst)
4393   -- in
4394   return (Any rep code)
4395
4396 reg `regClashesWithOp` OpReg reg2   = reg == reg2
4397 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
4398 reg `regClashesWithOp` _            = False
4399
4400 -----------
4401
4402 trivialUCode rep instr x = do
4403   x_code <- getAnyReg x
4404   let
4405      code dst =
4406         x_code dst `snocOL`
4407         instr (OpReg dst)
4408   -- in
4409   return (Any rep code)
4410
4411 -----------
4412
4413 #if i386_TARGET_ARCH
4414
4415 trivialFCode pk instr x y = do
4416   (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
4417   (y_reg, y_code) <- getSomeReg y
4418   let
4419      code dst =
4420         x_code `appOL`
4421         y_code `snocOL`
4422         instr pk x_reg y_reg dst
4423   -- in
4424   return (Any pk code)
4425
4426 #endif
4427
4428 #if x86_64_TARGET_ARCH
4429
4430 trivialFCode pk instr x y = genTrivialCode  pk (instr pk) x y
4431
4432 #endif
4433
4434 -------------
4435
4436 trivialUFCode rep instr x = do
4437   (x_reg, x_code) <- getSomeReg x
4438   let
4439      code dst =
4440         x_code `snocOL`
4441         instr x_reg dst
4442   -- in
4443   return (Any rep code)
4444
4445 #endif /* i386_TARGET_ARCH */
4446
4447 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4448
4449 #if sparc_TARGET_ARCH
4450
4451 trivialCode pk instr x (CmmLit (CmmInt y d))
4452   | fits13Bits y
4453   = do
4454       (src1, code) <- getSomeReg x
4455       tmp <- getNewRegNat I32
4456       let
4457         src2 = ImmInt (fromInteger y)
4458         code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
4459       return (Any I32 code__2)
4460
4461 trivialCode pk instr x y = do
4462     (src1, code1) <- getSomeReg x
4463     (src2, code2) <- getSomeReg y
4464     tmp1 <- getNewRegNat I32
4465     tmp2 <- getNewRegNat I32
4466     let
4467         code__2 dst = code1 `appOL` code2 `snocOL`
4468                       instr src1 (RIReg src2) dst
4469     return (Any I32 code__2)
4470
4471 ------------
4472 trivialFCode pk instr x y = do
4473     (src1, code1) <- getSomeReg x
4474     (src2, code2) <- getSomeReg y
4475     tmp1 <- getNewRegNat (cmmExprRep x)
4476     tmp2 <- getNewRegNat (cmmExprRep y)
4477     tmp <- getNewRegNat F64
4478     let
4479         promote x = FxTOy F32 F64 x tmp
4480
4481         pk1   = cmmExprRep x
4482         pk2   = cmmExprRep y
4483
4484         code__2 dst =
4485                 if pk1 == pk2 then
4486                     code1 `appOL` code2 `snocOL`
4487                     instr pk src1 src2 dst
4488                 else if pk1 == F32 then
4489                     code1 `snocOL` promote src1 `appOL` code2 `snocOL`
4490                     instr F64 tmp src2 dst
4491                 else
4492                     code1 `appOL` code2 `snocOL` promote src2 `snocOL`
4493                     instr F64 src1 tmp dst
4494     return (Any (if pk1 == pk2 then pk1 else F64) code__2)
4495
4496 ------------
4497 trivialUCode pk instr x = do
4498     (src, code) <- getSomeReg x
4499     tmp <- getNewRegNat pk
4500     let
4501         code__2 dst = code `snocOL` instr (RIReg src) dst
4502     return (Any pk code__2)
4503
4504 -------------
4505 trivialUFCode pk instr x = do
4506     (src, code) <- getSomeReg x
4507     tmp <- getNewRegNat pk
4508     let
4509         code__2 dst = code `snocOL` instr src dst
4510     return (Any pk code__2)
4511
4512 #endif /* sparc_TARGET_ARCH */
4513
4514 #if powerpc_TARGET_ARCH
4515
4516 {-
4517 Wolfgang's PowerPC version of The Rules:
4518
4519 A slightly modified version of The Rules to take advantage of the fact
4520 that PowerPC instructions work on all registers and don't implicitly
4521 clobber any fixed registers.
4522
4523 * The only expression for which getRegister returns Fixed is (CmmReg reg).
4524
4525 * If getRegister returns Any, then the code it generates may modify only:
4526         (a) fresh temporaries
4527         (b) the destination register
4528   It may *not* modify global registers, unless the global
4529   register happens to be the destination register.
4530   It may not clobber any other registers. In fact, only ccalls clobber any
4531   fixed registers.
4532   Also, it may not modify the counter register (used by genCCall).
4533
4534   Corollary: If a getRegister for a subexpression returns Fixed, you need
4535   not move it to a fresh temporary before evaluating the next subexpression.
4536   The Fixed register won't be modified.
4537   Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
4538
4539 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
4540   the value of the destination register.
4541 -}
4542
4543 trivialCode rep signed instr x (CmmLit (CmmInt y _))
4544     | Just imm <- makeImmediate rep signed y
4545     = do
4546         (src1, code1) <- getSomeReg x
4547         let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
4548         return (Any rep code)
4549
4550 trivialCode rep signed instr x y = do
4551     (src1, code1) <- getSomeReg x
4552     (src2, code2) <- getSomeReg y
4553     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
4554     return (Any rep code)
4555
4556 trivialCodeNoImm :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4557     -> CmmExpr -> CmmExpr -> NatM Register
4558 trivialCodeNoImm rep instr x y = do
4559     (src1, code1) <- getSomeReg x
4560     (src2, code2) <- getSomeReg y
4561     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
4562     return (Any rep code)
4563
4564 trivialUCode rep instr x = do
4565     (src, code) <- getSomeReg x
4566     let code' dst = code `snocOL` instr dst src
4567     return (Any rep code')
4568
4569 -- There is no "remainder" instruction on the PPC, so we have to do
4570 -- it the hard way.
4571 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
4572
4573 remainderCode :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4574     -> CmmExpr -> CmmExpr -> NatM Register
4575 remainderCode rep div x y = do
4576     (src1, code1) <- getSomeReg x
4577     (src2, code2) <- getSomeReg y
4578     let code dst = code1 `appOL` code2 `appOL` toOL [
4579                 div dst src1 src2,
4580                 MULLW dst dst (RIReg src2),
4581                 SUBF dst dst src1
4582             ]
4583     return (Any rep code)
4584
4585 #endif /* powerpc_TARGET_ARCH */
4586
4587
4588 -- -----------------------------------------------------------------------------
4589 --  Coercing to/from integer/floating-point...
4590
4591 -- When going to integer, we truncate (round towards 0).
4592
4593 -- @coerce(Int2FP|FP2Int)@ are more complicated integer/float
4594 -- conversions.  We have to store temporaries in memory to move
4595 -- between the integer and the floating point register sets.
4596
4597 -- @coerceDbl2Flt@ and @coerceFlt2Dbl@ are done this way because we
4598 -- pretend, on sparc at least, that double and float regs are seperate
4599 -- kinds, so the value has to be computed into one kind before being
4600 -- explicitly "converted" to live in the other kind.
4601
4602 coerceInt2FP :: MachRep -> MachRep -> CmmExpr -> NatM Register
4603 coerceFP2Int :: MachRep -> MachRep -> CmmExpr -> NatM Register
4604
4605 #if sparc_TARGET_ARCH
4606 coerceDbl2Flt :: CmmExpr -> NatM Register
4607 coerceFlt2Dbl :: CmmExpr -> NatM Register
4608 #endif
4609
4610 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4611
4612 #if alpha_TARGET_ARCH
4613
4614 coerceInt2FP _ x
4615   = getRegister x               `thenNat` \ register ->
4616     getNewRegNat IntRep         `thenNat` \ reg ->
4617     let
4618         code = registerCode register reg
4619         src  = registerName register reg
4620
4621         code__2 dst = code . mkSeqInstrs [
4622             ST Q src (spRel 0),
4623             LD TF dst (spRel 0),
4624             CVTxy Q TF dst dst]
4625     in
4626     return (Any F64 code__2)
4627
4628 -------------
4629 coerceFP2Int x
4630   = getRegister x               `thenNat` \ register ->
4631     getNewRegNat F64    `thenNat` \ tmp ->
4632     let
4633         code = registerCode register tmp
4634         src  = registerName register tmp
4635
4636         code__2 dst = code . mkSeqInstrs [
4637             CVTxy TF Q src tmp,
4638             ST TF tmp (spRel 0),
4639             LD Q dst (spRel 0)]
4640     in
4641     return (Any IntRep code__2)
4642
4643 #endif /* alpha_TARGET_ARCH */
4644
4645 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4646
4647 #if i386_TARGET_ARCH
4648
4649 coerceInt2FP from to x = do
4650   (x_reg, x_code) <- getSomeReg x
4651   let
4652         opc  = case to of F32 -> GITOF; F64 -> GITOD
4653         code dst = x_code `snocOL` opc x_reg dst
4654         -- ToDo: works for non-I32 reps?
4655   -- in
4656   return (Any to code)
4657
4658 ------------
4659
4660 coerceFP2Int from to x = do
4661   (x_reg, x_code) <- getSomeReg x
4662   let
4663         opc  = case from of F32 -> GFTOI; F64 -> GDTOI
4664         code dst = x_code `snocOL` opc x_reg dst
4665         -- ToDo: works for non-I32 reps?
4666   -- in
4667   return (Any to code)
4668
4669 #endif /* i386_TARGET_ARCH */
4670
4671 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4672
4673 #if x86_64_TARGET_ARCH
4674
4675 coerceFP2Int from to x = do
4676   (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
4677   let
4678         opc  = case from of F32 -> CVTTSS2SIQ; F64 -> CVTTSD2SIQ
4679         code dst = x_code `snocOL` opc x_op dst
4680   -- in
4681   return (Any to code) -- works even if the destination rep is <I32
4682
4683 coerceInt2FP from to x = do
4684   (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
4685   let
4686         opc  = case to of F32 -> CVTSI2SS; F64 -> CVTSI2SD
4687         code dst = x_code `snocOL` opc x_op dst
4688   -- in
4689   return (Any to code) -- works even if the destination rep is <I32
4690
4691 coerceFP2FP :: MachRep -> CmmExpr -> NatM Register
4692 coerceFP2FP to x = do
4693   (x_reg, x_code) <- getSomeReg x
4694   let
4695         opc  = case to of F32 -> CVTSD2SS; F64 -> CVTSS2SD
4696         code dst = x_code `snocOL` opc x_reg dst
4697   -- in
4698   return (Any to code)
4699
4700 #endif
4701
4702 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4703
4704 #if sparc_TARGET_ARCH
4705
4706 coerceInt2FP pk1 pk2 x = do
4707     (src, code) <- getSomeReg x
4708     let
4709         code__2 dst = code `appOL` toOL [
4710             ST pk1 src (spRel (-2)),
4711             LD pk1 (spRel (-2)) dst,
4712             FxTOy pk1 pk2 dst dst]
4713     return (Any pk2 code__2)
4714
4715 ------------
4716 coerceFP2Int pk fprep x = do
4717     (src, code) <- getSomeReg x
4718     reg <- getNewRegNat fprep
4719     tmp <- getNewRegNat pk
4720     let
4721         code__2 dst = ASSERT(fprep == F64 || fprep == F32)
4722             code `appOL` toOL [
4723             FxTOy fprep pk src tmp,
4724             ST pk tmp (spRel (-2)),
4725             LD pk (spRel (-2)) dst]
4726     return (Any pk code__2)
4727
4728 ------------
4729 coerceDbl2Flt x = do
4730     (src, code) <- getSomeReg x
4731     return (Any F32 (\dst -> code `snocOL` FxTOy F64 F32 src dst))
4732
4733 ------------
4734 coerceFlt2Dbl x = do
4735     (src, code) <- getSomeReg x
4736     return (Any F64 (\dst -> code `snocOL` FxTOy F32 F64 src dst))
4737
4738 #endif /* sparc_TARGET_ARCH */
4739
4740 #if powerpc_TARGET_ARCH
4741 coerceInt2FP fromRep toRep x = do
4742     (src, code) <- getSomeReg x
4743     lbl <- getNewLabelNat
4744     itmp <- getNewRegNat I32
4745     ftmp <- getNewRegNat F64
4746     dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
4747     Amode addr addr_code <- getAmode dynRef
4748     let
4749         code' dst = code `appOL` maybe_exts `appOL` toOL [
4750                 LDATA ReadOnlyData
4751                                 [CmmDataLabel lbl,
4752                                  CmmStaticLit (CmmInt 0x43300000 I32),
4753                                  CmmStaticLit (CmmInt 0x80000000 I32)],
4754                 XORIS itmp src (ImmInt 0x8000),
4755                 ST I32 itmp (spRel 3),
4756                 LIS itmp (ImmInt 0x4330),
4757                 ST I32 itmp (spRel 2),
4758                 LD F64 ftmp (spRel 2)
4759             ] `appOL` addr_code `appOL` toOL [
4760                 LD F64 dst addr,
4761                 FSUB F64 dst ftmp dst
4762             ] `appOL` maybe_frsp dst
4763
4764         maybe_exts = case fromRep of
4765                         I8 ->  unitOL $ EXTS I8 src src
4766                         I16 -> unitOL $ EXTS I16 src src
4767                         I32 -> nilOL
4768         maybe_frsp dst = case toRep of
4769                         F32 -> unitOL $ FRSP dst dst
4770                         F64 -> nilOL
4771     return (Any toRep code')
4772
4773 coerceFP2Int fromRep toRep x = do
4774     -- the reps don't really matter: F*->F64 and I32->I* are no-ops
4775     (src, code) <- getSomeReg x
4776     tmp <- getNewRegNat F64
4777     let
4778         code' dst = code `appOL` toOL [
4779                 -- convert to int in FP reg
4780             FCTIWZ tmp src,
4781                 -- store value (64bit) from FP to stack
4782             ST F64 tmp (spRel 2),
4783                 -- read low word of value (high word is undefined)
4784             LD I32 dst (spRel 3)]
4785     return (Any toRep code')
4786 #endif /* powerpc_TARGET_ARCH */
4787
4788
4789 -- -----------------------------------------------------------------------------
4790 -- eXTRA_STK_ARGS_HERE
4791
4792 -- We (allegedly) put the first six C-call arguments in registers;
4793 -- where do we start putting the rest of them?
4794
4795 -- Moved from MachInstrs (SDM):
4796
4797 #if alpha_TARGET_ARCH || sparc_TARGET_ARCH
4798 eXTRA_STK_ARGS_HERE :: Int
4799 eXTRA_STK_ARGS_HERE
4800   = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???))
4801 #endif
4802