compiler/nativeGen/MachCodeGen.hs

   1 -----------------------------------------------------------------------------
   2 --
   3 -- Generating machine code (instruction selection)
   4 --
   5 -- (c) The University of Glasgow 1996-2004
   6 --
   7 -----------------------------------------------------------------------------
   8
   9 -- This is a big module, but, if you pay attention to
  10 -- (a) the sectioning, (b) the type signatures, and
  11 -- (c) the #if blah_TARGET_ARCH} things, the
  12 -- structure should not be too overwhelming.
  13
  14 module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where
  15
  16 #include "HsVersions.h"
  17 #include "nativeGen/NCG.h"
  18 #include "MachDeps.h"
  19
  20 -- NCG stuff:
  21 import MachInstrs
  22 import MachRegs
  23 import NCGMonad
  24 import PositionIndependentCode
  25 import RegAllocInfo ( mkBranchInstr )
  26
  27 -- Our intermediate code:
  28 import PprCmm           ( pprExpr )
  29 import Cmm
  30 import MachOp
  31 import CLabel
  32 import ClosureInfo      ( C_SRT(..) )
  33
  34 -- The rest:
  35 import StaticFlags      ( opt_PIC )
  36 import ForeignCall      ( CCallConv(..) )
  37 import OrdList
  38 import Pretty
  39 import Outputable
  40 import FastString
  41 import FastTypes        ( isFastTrue )
  42 import Constants        ( wORD_SIZE )
  43
  44 #ifdef DEBUG
  45 import Debug.Trace      ( trace )
  46 #endif
  47
  48 import Control.Monad    ( mapAndUnzipM )
  49 import Data.Maybe       ( fromJust )
  50 import Data.Bits
  51 import Data.Word
  52 import Data.Int
  53
  54 -- -----------------------------------------------------------------------------
  55 -- Top-level of the instruction selector
  56
  57 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
  58 -- They are really trees of insns to facilitate fast appending, where a
  59 -- left-to-right traversal (pre-order?) yields the insns in the correct
  60 -- order.
  61
  62 type InstrBlock = OrdList Instr
  63
  64 cmmTopCodeGen :: RawCmmTop -> NatM [NatCmmTop]
  65 cmmTopCodeGen (CmmProc info lab params blocks) = do
  66   (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
  67   picBaseMb <- getPicBaseMaybeNat
  68   let proc = CmmProc info lab params (concat nat_blocks)
  69       tops = proc : concat statics
  70   case picBaseMb of
  71       Just picBase -> initializePicBase picBase tops
  72       Nothing -> return tops
  73
  74 cmmTopCodeGen (CmmData sec dat) = do
  75   return [CmmData sec dat]  -- no translation, we just use CmmStatic
  76
  77 basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop])
  78 basicBlockCodeGen (BasicBlock id stmts) = do
  79   instrs <- stmtsToInstrs stmts
  80   -- code generation may introduce new basic block boundaries, which
  81   -- are indicated by the NEWBLOCK instruction.  We must split up the
  82   -- instruction stream into basic blocks again.  Also, we extract
  83   -- LDATAs here too.
  84   let
  85         (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
  86
  87         mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
  88           = ([], BasicBlock id instrs : blocks, statics)
  89         mkBlocks (LDATA sec dat) (instrs,blocks,statics)
  90           = (instrs, blocks, CmmData sec dat:statics)
  91         mkBlocks instr (instrs,blocks,statics)
  92           = (instr:instrs, blocks, statics)
  93   -- in
  94   return (BasicBlock id top : other_blocks, statics)
  95
  96 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
  97 stmtsToInstrs stmts
  98    = do instrss <- mapM stmtToInstrs stmts
  99         return (concatOL instrss)
 100
 101 stmtToInstrs :: CmmStmt -> NatM InstrBlock
 102 stmtToInstrs stmt = case stmt of
 103     CmmNop         -> return nilOL
 104     CmmComment s   -> return (unitOL (COMMENT s))
 105
 106     CmmAssign reg src
 107       | isFloatingRep kind -> assignReg_FltCode kind reg src
 108 #if WORD_SIZE_IN_BITS==32
 109       | kind == I64        -> assignReg_I64Code      reg src
 110 #endif
 111       | otherwise          -> assignReg_IntCode kind reg src
 112         where kind = cmmRegRep reg
 113
 114     CmmStore addr src
 115       | isFloatingRep kind -> assignMem_FltCode kind addr src
 116 #if WORD_SIZE_IN_BITS==32
 117       | kind == I64      -> assignMem_I64Code      addr src
 118 #endif
 119       | otherwise        -> assignMem_IntCode kind addr src
 120         where kind = cmmExprRep src
 121
 122     CmmCall target result_regs args _
 123        -> genCCall target result_regs args
 124
 125     CmmBranch id          -> genBranch id
 126     CmmCondBranch arg id  -> genCondJump id arg
 127     CmmSwitch arg ids     -> genSwitch arg ids
 128     CmmJump arg params    -> genJump arg
 129
 130 -- -----------------------------------------------------------------------------
 131 -- General things for putting together code sequences
 132
 133 -- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
 134 -- CmmExprs into CmmRegOff?
 135 mangleIndexTree :: CmmExpr -> CmmExpr
 136 mangleIndexTree (CmmRegOff reg off)
 137   = CmmMachOp (MO_Add rep) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) rep)]
 138   where rep = cmmRegRep reg
 139
 140 -- -----------------------------------------------------------------------------
 141 --  Code gen for 64-bit arithmetic on 32-bit platforms
 142
 143 {-
 144 Simple support for generating 64-bit code (ie, 64 bit values and 64
 145 bit assignments) on 32-bit platforms.  Unlike the main code generator
 146 we merely shoot for generating working code as simply as possible, and
 147 pay little attention to code quality.  Specifically, there is no
 148 attempt to deal cleverly with the fixed-vs-floating register
 149 distinction; all values are generated into (pairs of) floating
 150 registers, even if this would mean some redundant reg-reg moves as a
 151 result.  Only one of the VRegUniques is returned, since it will be
 152 of the VRegUniqueLo form, and the upper-half VReg can be determined
 153 by applying getHiVRegFromLo to it.
 154 -}
 155
 156 data ChildCode64        -- a.k.a "Register64"
 157    = ChildCode64
 158         InstrBlock      -- code
 159         Reg             -- the lower 32-bit temporary which contains the
 160                         -- result; use getHiVRegFromLo to find the other
 161                         -- VRegUnique.  Rules of this simplified insn
 162                         -- selection game are therefore that the returned
 163                         -- Reg may be modified
 164
 165 #if WORD_SIZE_IN_BITS==32
 166 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
 167 assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
 168 #endif
 169
 170 #ifndef x86_64_TARGET_ARCH
 171 iselExpr64        :: CmmExpr -> NatM ChildCode64
 172 #endif
 173
 174 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 175
 176 #if i386_TARGET_ARCH
 177
 178 assignMem_I64Code addrTree valueTree = do
 179   Amode addr addr_code <- getAmode addrTree
 180   ChildCode64 vcode rlo <- iselExpr64 valueTree
 181   let
 182         rhi = getHiVRegFromLo rlo
 183
 184         -- Little-endian store
 185         mov_lo = MOV I32 (OpReg rlo) (OpAddr addr)
 186         mov_hi = MOV I32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
 187   -- in
 188   return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 189
 190
 191 assignReg_I64Code (CmmLocal (LocalReg u_dst pk _)) valueTree = do
 192    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 193    let
 194          r_dst_lo = mkVReg u_dst I32
 195          r_dst_hi = getHiVRegFromLo r_dst_lo
 196          r_src_hi = getHiVRegFromLo r_src_lo
 197          mov_lo = MOV I32 (OpReg r_src_lo) (OpReg r_dst_lo)
 198          mov_hi = MOV I32 (OpReg r_src_hi) (OpReg r_dst_hi)
 199    -- in
 200    return (
 201         vcode `snocOL` mov_lo `snocOL` mov_hi
 202      )
 203
 204 assignReg_I64Code lvalue valueTree
 205    = panic "assignReg_I64Code(i386): invalid lvalue"
 206
 207 ------------
 208
 209 iselExpr64 (CmmLit (CmmInt i _)) = do
 210   (rlo,rhi) <- getNewRegPairNat I32
 211   let
 212         r = fromIntegral (fromIntegral i :: Word32)
 213         q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
 214         code = toOL [
 215                 MOV I32 (OpImm (ImmInteger r)) (OpReg rlo),
 216                 MOV I32 (OpImm (ImmInteger q)) (OpReg rhi)
 217                 ]
 218   -- in
 219   return (ChildCode64 code rlo)
 220
 221 iselExpr64 (CmmLoad addrTree I64) = do
 222    Amode addr addr_code <- getAmode addrTree
 223    (rlo,rhi) <- getNewRegPairNat I32
 224    let
 225         mov_lo = MOV I32 (OpAddr addr) (OpReg rlo)
 226         mov_hi = MOV I32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
 227    -- in
 228    return (
 229             ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
 230                         rlo
 231      )
 232
 233 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64 _)))
 234    = return (ChildCode64 nilOL (mkVReg vu I32))
 235
 236 -- we handle addition, but rather badly
 237 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
 238    ChildCode64 code1 r1lo <- iselExpr64 e1
 239    (rlo,rhi) <- getNewRegPairNat I32
 240    let
 241         r = fromIntegral (fromIntegral i :: Word32)
 242         q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
 243         r1hi = getHiVRegFromLo r1lo
 244         code =  code1 `appOL`
 245                 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
 246                        ADD I32 (OpImm (ImmInteger r)) (OpReg rlo),
 247                        MOV I32 (OpReg r1hi) (OpReg rhi),
 248                        ADC I32 (OpImm (ImmInteger q)) (OpReg rhi) ]
 249    -- in
 250    return (ChildCode64 code rlo)
 251
 252 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
 253    ChildCode64 code1 r1lo <- iselExpr64 e1
 254    ChildCode64 code2 r2lo <- iselExpr64 e2
 255    (rlo,rhi) <- getNewRegPairNat I32
 256    let
 257         r1hi = getHiVRegFromLo r1lo
 258         r2hi = getHiVRegFromLo r2lo
 259         code =  code1 `appOL`
 260                 code2 `appOL`
 261                 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
 262                        ADD I32 (OpReg r2lo) (OpReg rlo),
 263                        MOV I32 (OpReg r1hi) (OpReg rhi),
 264                        ADC I32 (OpReg r2hi) (OpReg rhi) ]
 265    -- in
 266    return (ChildCode64 code rlo)
 267
 268 iselExpr64 (CmmMachOp (MO_U_Conv _ I64) [expr]) = do
 269      fn <- getAnyReg expr
 270      r_dst_lo <-  getNewRegNat I32
 271      let r_dst_hi = getHiVRegFromLo r_dst_lo
 272          code = fn r_dst_lo
 273      return (
 274              ChildCode64 (code `snocOL`
 275                           MOV I32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
 276                           r_dst_lo
 277             )
 278
 279 iselExpr64 expr
 280    = pprPanic "iselExpr64(i386)" (ppr expr)
 281
 282 #endif /* i386_TARGET_ARCH */
 283
 284 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 285
 286 #if sparc_TARGET_ARCH
 287
 288 assignMem_I64Code addrTree valueTree = do
 289      Amode addr addr_code <- getAmode addrTree
 290      ChildCode64 vcode rlo <- iselExpr64 valueTree
 291      (src, code) <- getSomeReg addrTree
 292      let
 293          rhi = getHiVRegFromLo rlo
 294          -- Big-endian store
 295          mov_hi = ST I32 rhi (AddrRegImm src (ImmInt 0))
 296          mov_lo = ST I32 rlo (AddrRegImm src (ImmInt 4))
 297      return (vcode `appOL` code `snocOL` mov_hi `snocOL` mov_lo)
 298
 299 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
 300      ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 301      let
 302          r_dst_lo = mkVReg u_dst pk
 303          r_dst_hi = getHiVRegFromLo r_dst_lo
 304          r_src_hi = getHiVRegFromLo r_src_lo
 305          mov_lo = mkMOV r_src_lo r_dst_lo
 306          mov_hi = mkMOV r_src_hi r_dst_hi
 307          mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
 308      return (vcode `snocOL` mov_hi `snocOL` mov_lo)
 309 assignReg_I64Code lvalue valueTree
 310    = panic "assignReg_I64Code(sparc): invalid lvalue"
 311
 312
 313 -- Don't delete this -- it's very handy for debugging.
 314 --iselExpr64 expr
 315 --   | trace ("iselExpr64: " ++ showSDoc (ppr expr)) False
 316 --   = panic "iselExpr64(???)"
 317
 318 iselExpr64 (CmmLoad addrTree I64) = do
 319      Amode (AddrRegReg r1 r2) addr_code <- getAmode addrTree
 320      rlo <- getNewRegNat I32
 321      let rhi = getHiVRegFromLo rlo
 322          mov_hi = LD I32 (AddrRegImm r1 (ImmInt 0)) rhi
 323          mov_lo = LD I32 (AddrRegImm r1 (ImmInt 4)) rlo
 324      return (
 325             ChildCode64 (addr_code `snocOL` mov_hi `snocOL` mov_lo)
 326                          rlo
 327           )
 328
 329 iselExpr64 (CmmReg (CmmLocal (LocalReg uq I64))) = do
 330      r_dst_lo <-  getNewRegNat I32
 331      let r_dst_hi = getHiVRegFromLo r_dst_lo
 332          r_src_lo = mkVReg uq I32
 333          r_src_hi = getHiVRegFromLo r_src_lo
 334          mov_lo = mkMOV r_src_lo r_dst_lo
 335          mov_hi = mkMOV r_src_hi r_dst_hi
 336          mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
 337      return (
 338             ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo
 339          )
 340
 341 iselExpr64 expr
 342    = pprPanic "iselExpr64(sparc)" (ppr expr)
 343
 344 #endif /* sparc_TARGET_ARCH */
 345
 346 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 347
 348 #if powerpc_TARGET_ARCH
 349
 350 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
 351 getI64Amodes addrTree = do
 352     Amode hi_addr addr_code <- getAmode addrTree
 353     case addrOffset hi_addr 4 of
 354         Just lo_addr -> return (hi_addr, lo_addr, addr_code)
 355         Nothing      -> do (hi_ptr, code) <- getSomeReg addrTree
 356                            return (AddrRegImm hi_ptr (ImmInt 0),
 357                                    AddrRegImm hi_ptr (ImmInt 4),
 358                                    code)
 359
 360 assignMem_I64Code addrTree valueTree = do
 361         (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
 362         ChildCode64 vcode rlo <- iselExpr64 valueTree
 363         let
 364                 rhi = getHiVRegFromLo rlo
 365
 366                 -- Big-endian store
 367                 mov_hi = ST I32 rhi hi_addr
 368                 mov_lo = ST I32 rlo lo_addr
 369         -- in
 370         return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 371
 372 assignReg_I64Code (CmmLocal (LocalReg u_dst pk _)) valueTree = do
 373    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 374    let
 375          r_dst_lo = mkVReg u_dst I32
 376          r_dst_hi = getHiVRegFromLo r_dst_lo
 377          r_src_hi = getHiVRegFromLo r_src_lo
 378          mov_lo = MR r_dst_lo r_src_lo
 379          mov_hi = MR r_dst_hi r_src_hi
 380    -- in
 381    return (
 382         vcode `snocOL` mov_lo `snocOL` mov_hi
 383      )
 384
 385 assignReg_I64Code lvalue valueTree
 386    = panic "assignReg_I64Code(powerpc): invalid lvalue"
 387
 388
 389 -- Don't delete this -- it's very handy for debugging.
 390 --iselExpr64 expr
 391 --   | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
 392 --   = panic "iselExpr64(???)"
 393
 394 iselExpr64 (CmmLoad addrTree I64) = do
 395     (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
 396     (rlo, rhi) <- getNewRegPairNat I32
 397     let mov_hi = LD I32 rhi hi_addr
 398         mov_lo = LD I32 rlo lo_addr
 399     return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
 400                          rlo
 401
 402 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64 _)))
 403    = return (ChildCode64 nilOL (mkVReg vu I32))
 404
 405 iselExpr64 (CmmLit (CmmInt i _)) = do
 406   (rlo,rhi) <- getNewRegPairNat I32
 407   let
 408         half0 = fromIntegral (fromIntegral i :: Word16)
 409         half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
 410         half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
 411         half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
 412
 413         code = toOL [
 414                 LIS rlo (ImmInt half1),
 415                 OR rlo rlo (RIImm $ ImmInt half0),
 416                 LIS rhi (ImmInt half3),
 417                 OR rlo rlo (RIImm $ ImmInt half2)
 418                 ]
 419   -- in
 420   return (ChildCode64 code rlo)
 421
 422 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
 423    ChildCode64 code1 r1lo <- iselExpr64 e1
 424    ChildCode64 code2 r2lo <- iselExpr64 e2
 425    (rlo,rhi) <- getNewRegPairNat I32
 426    let
 427         r1hi = getHiVRegFromLo r1lo
 428         r2hi = getHiVRegFromLo r2lo
 429         code =  code1 `appOL`
 430                 code2 `appOL`
 431                 toOL [ ADDC rlo r1lo r2lo,
 432                        ADDE rhi r1hi r2hi ]
 433    -- in
 434    return (ChildCode64 code rlo)
 435
 436 iselExpr64 (CmmMachOp (MO_U_Conv I32 I64) [expr]) = do
 437     (expr_reg,expr_code) <- getSomeReg expr
 438     (rlo, rhi) <- getNewRegPairNat I32
 439     let mov_hi = LI rhi (ImmInt 0)
 440         mov_lo = MR rlo expr_reg
 441     return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
 442                          rlo
 443 iselExpr64 expr
 444    = pprPanic "iselExpr64(powerpc)" (ppr expr)
 445
 446 #endif /* powerpc_TARGET_ARCH */
 447
 448
 449 -- -----------------------------------------------------------------------------
 450 -- The 'Register' type
 451
 452 -- 'Register's passed up the tree.  If the stix code forces the register
 453 -- to live in a pre-decided machine register, it comes out as @Fixed@;
 454 -- otherwise, it comes out as @Any@, and the parent can decide which
 455 -- register to put it in.
 456
 457 data Register
 458   = Fixed   MachRep Reg InstrBlock
 459   | Any     MachRep (Reg -> InstrBlock)
 460
 461 swizzleRegisterRep :: Register -> MachRep -> Register
 462 swizzleRegisterRep (Fixed _ reg code) rep = Fixed rep reg code
 463 swizzleRegisterRep (Any _ codefn)     rep = Any rep codefn
 464
 465
 466 -- -----------------------------------------------------------------------------
 467 -- Utils based on getRegister, below
 468
 469 -- The dual to getAnyReg: compute an expression into a register, but
 470 -- we don't mind which one it is.
 471 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
 472 getSomeReg expr = do
 473   r <- getRegister expr
 474   case r of
 475     Any rep code -> do
 476         tmp <- getNewRegNat rep
 477         return (tmp, code tmp)
 478     Fixed _ reg code ->
 479         return (reg, code)
 480
 481 -- -----------------------------------------------------------------------------
 482 -- Grab the Reg for a CmmReg
 483
 484 getRegisterReg :: CmmReg -> Reg
 485
 486 getRegisterReg (CmmLocal (LocalReg u pk _))
 487   = mkVReg u pk
 488
 489 getRegisterReg (CmmGlobal mid)
 490   = case get_GlobalReg_reg_or_addr mid of
 491        Left (RealReg rrno) -> RealReg rrno
 492        _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
 493           -- By this stage, the only MagicIds remaining should be the
 494           -- ones which map to a real machine register on this
 495           -- platform.  Hence ...
 496
 497
 498 -- -----------------------------------------------------------------------------
 499 -- Generate code to get a subtree into a Register
 500
 501 -- Don't delete this -- it's very handy for debugging.
 502 --getRegister expr
 503 --   | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False
 504 --   = panic "getRegister(???)"
 505
 506 getRegister :: CmmExpr -> NatM Register
 507
 508 #if !x86_64_TARGET_ARCH
 509     -- on x86_64, we have %rip for PicBaseReg, but it's not a full-featured
 510     -- register, it can only be used for rip-relative addressing.
 511 getRegister (CmmReg (CmmGlobal PicBaseReg))
 512   = do
 513       reg <- getPicBaseNat wordRep
 514       return (Fixed wordRep reg nilOL)
 515 #endif
 516
 517 getRegister (CmmReg reg)
 518   = return (Fixed (cmmRegRep reg) (getRegisterReg reg) nilOL)
 519
 520 getRegister tree@(CmmRegOff _ _)
 521   = getRegister (mangleIndexTree tree)
 522
 523
 524 #if WORD_SIZE_IN_BITS==32
 525     -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
 526     -- TO_W_(x), TO_W_(x >> 32)
 527
 528 getRegister (CmmMachOp (MO_U_Conv I64 I32)
 529              [CmmMachOp (MO_U_Shr I64) [x,CmmLit (CmmInt 32 _)]]) = do
 530   ChildCode64 code rlo <- iselExpr64 x
 531   return $ Fixed I32 (getHiVRegFromLo rlo) code
 532
 533 getRegister (CmmMachOp (MO_S_Conv I64 I32)
 534              [CmmMachOp (MO_U_Shr I64) [x,CmmLit (CmmInt 32 _)]]) = do
 535   ChildCode64 code rlo <- iselExpr64 x
 536   return $ Fixed I32 (getHiVRegFromLo rlo) code
 537
 538 getRegister (CmmMachOp (MO_U_Conv I64 I32) [x]) = do
 539   ChildCode64 code rlo <- iselExpr64 x
 540   return $ Fixed I32 rlo code
 541
 542 getRegister (CmmMachOp (MO_S_Conv I64 I32) [x]) = do
 543   ChildCode64 code rlo <- iselExpr64 x
 544   return $ Fixed I32 rlo code
 545
 546 #endif
 547
 548 -- end of machine-"independent" bit; here we go on the rest...
 549
 550 #if alpha_TARGET_ARCH
 551
 552 getRegister (StDouble d)
 553   = getBlockIdNat                   `thenNat` \ lbl ->
 554     getNewRegNat PtrRep             `thenNat` \ tmp ->
 555     let code dst = mkSeqInstrs [
 556             LDATA RoDataSegment lbl [
 557                     DATA TF [ImmLab (rational d)]
 558                 ],
 559             LDA tmp (AddrImm (ImmCLbl lbl)),
 560             LD TF dst (AddrReg tmp)]
 561     in
 562         return (Any F64 code)
 563
 564 getRegister (StPrim primop [x]) -- unary PrimOps
 565   = case primop of
 566       IntNegOp -> trivialUCode (NEG Q False) x
 567
 568       NotOp    -> trivialUCode NOT x
 569
 570       FloatNegOp  -> trivialUFCode FloatRep  (FNEG TF) x
 571       DoubleNegOp -> trivialUFCode F64 (FNEG TF) x
 572
 573       OrdOp -> coerceIntCode IntRep x
 574       ChrOp -> chrCode x
 575
 576       Float2IntOp  -> coerceFP2Int    x
 577       Int2FloatOp  -> coerceInt2FP pr x
 578       Double2IntOp -> coerceFP2Int    x
 579       Int2DoubleOp -> coerceInt2FP pr x
 580
 581       Double2FloatOp -> coerceFltCode x
 582       Float2DoubleOp -> coerceFltCode x
 583
 584       other_op -> getRegister (StCall fn CCallConv F64 [x])
 585         where
 586           fn = case other_op of
 587                  FloatExpOp    -> FSLIT("exp")
 588                  FloatLogOp    -> FSLIT("log")
 589                  FloatSqrtOp   -> FSLIT("sqrt")
 590                  FloatSinOp    -> FSLIT("sin")
 591                  FloatCosOp    -> FSLIT("cos")
 592                  FloatTanOp    -> FSLIT("tan")
 593                  FloatAsinOp   -> FSLIT("asin")
 594                  FloatAcosOp   -> FSLIT("acos")
 595                  FloatAtanOp   -> FSLIT("atan")
 596                  FloatSinhOp   -> FSLIT("sinh")
 597                  FloatCoshOp   -> FSLIT("cosh")
 598                  FloatTanhOp   -> FSLIT("tanh")
 599                  DoubleExpOp   -> FSLIT("exp")
 600                  DoubleLogOp   -> FSLIT("log")
 601                  DoubleSqrtOp  -> FSLIT("sqrt")
 602                  DoubleSinOp   -> FSLIT("sin")
 603                  DoubleCosOp   -> FSLIT("cos")
 604                  DoubleTanOp   -> FSLIT("tan")
 605                  DoubleAsinOp  -> FSLIT("asin")
 606                  DoubleAcosOp  -> FSLIT("acos")
 607                  DoubleAtanOp  -> FSLIT("atan")
 608                  DoubleSinhOp  -> FSLIT("sinh")
 609                  DoubleCoshOp  -> FSLIT("cosh")
 610                  DoubleTanhOp  -> FSLIT("tanh")
 611   where
 612     pr = panic "MachCode.getRegister: no primrep needed for Alpha"
 613
 614 getRegister (StPrim primop [x, y]) -- dyadic PrimOps
 615   = case primop of
 616       CharGtOp -> trivialCode (CMP LTT) y x
 617       CharGeOp -> trivialCode (CMP LE) y x
 618       CharEqOp -> trivialCode (CMP EQQ) x y
 619       CharNeOp -> int_NE_code x y
 620       CharLtOp -> trivialCode (CMP LTT) x y
 621       CharLeOp -> trivialCode (CMP LE) x y
 622
 623       IntGtOp  -> trivialCode (CMP LTT) y x
 624       IntGeOp  -> trivialCode (CMP LE) y x
 625       IntEqOp  -> trivialCode (CMP EQQ) x y
 626       IntNeOp  -> int_NE_code x y
 627       IntLtOp  -> trivialCode (CMP LTT) x y
 628       IntLeOp  -> trivialCode (CMP LE) x y
 629
 630       WordGtOp -> trivialCode (CMP ULT) y x
 631       WordGeOp -> trivialCode (CMP ULE) x y
 632       WordEqOp -> trivialCode (CMP EQQ)  x y
 633       WordNeOp -> int_NE_code x y
 634       WordLtOp -> trivialCode (CMP ULT) x y
 635       WordLeOp -> trivialCode (CMP ULE) x y
 636
 637       AddrGtOp -> trivialCode (CMP ULT) y x
 638       AddrGeOp -> trivialCode (CMP ULE) y x
 639       AddrEqOp -> trivialCode (CMP EQQ)  x y
 640       AddrNeOp -> int_NE_code x y
 641       AddrLtOp -> trivialCode (CMP ULT) x y
 642       AddrLeOp -> trivialCode (CMP ULE) x y
 643
 644       FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
 645       FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
 646       FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
 647       FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
 648       FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
 649       FloatLeOp -> cmpF_code (FCMP TF LE) NE x y
 650
 651       DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
 652       DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
 653       DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
 654       DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
 655       DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
 656       DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y
 657
 658       IntAddOp  -> trivialCode (ADD Q False) x y
 659       IntSubOp  -> trivialCode (SUB Q False) x y
 660       IntMulOp  -> trivialCode (MUL Q False) x y
 661       IntQuotOp -> trivialCode (DIV Q False) x y
 662       IntRemOp  -> trivialCode (REM Q False) x y
 663
 664       WordAddOp  -> trivialCode (ADD Q False) x y
 665       WordSubOp  -> trivialCode (SUB Q False) x y
 666       WordMulOp  -> trivialCode (MUL Q False) x y
 667       WordQuotOp -> trivialCode (DIV Q True) x y
 668       WordRemOp  -> trivialCode (REM Q True) x y
 669
 670       FloatAddOp -> trivialFCode  FloatRep (FADD TF) x y
 671       FloatSubOp -> trivialFCode  FloatRep (FSUB TF) x y
 672       FloatMulOp -> trivialFCode  FloatRep (FMUL TF) x y
 673       FloatDivOp -> trivialFCode  FloatRep (FDIV TF) x y
 674
 675       DoubleAddOp -> trivialFCode  F64 (FADD TF) x y
 676       DoubleSubOp -> trivialFCode  F64 (FSUB TF) x y
 677       DoubleMulOp -> trivialFCode  F64 (FMUL TF) x y
 678       DoubleDivOp -> trivialFCode  F64 (FDIV TF) x y
 679
 680       AddrAddOp  -> trivialCode (ADD Q False) x y
 681       AddrSubOp  -> trivialCode (SUB Q False) x y
 682       AddrRemOp  -> trivialCode (REM Q True) x y
 683
 684       AndOp  -> trivialCode AND x y
 685       OrOp   -> trivialCode OR  x y
 686       XorOp  -> trivialCode XOR x y
 687       SllOp  -> trivialCode SLL x y
 688       SrlOp  -> trivialCode SRL x y
 689
 690       ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
 691       ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
 692       ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"
 693
 694       FloatPowerOp  -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
 695       DoublePowerOp -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
 696   where
 697     {- ------------------------------------------------------------
 698         Some bizarre special code for getting condition codes into
 699         registers.  Integer non-equality is a test for equality
 700         followed by an XOR with 1.  (Integer comparisons always set
 701         the result register to 0 or 1.)  Floating point comparisons of
 702         any kind leave the result in a floating point register, so we
 703         need to wrangle an integer register out of things.
 704     -}
 705     int_NE_code :: StixTree -> StixTree -> NatM Register
 706
 707     int_NE_code x y
 708       = trivialCode (CMP EQQ) x y       `thenNat` \ register ->
 709         getNewRegNat IntRep             `thenNat` \ tmp ->
 710         let
 711             code = registerCode register tmp
 712             src  = registerName register tmp
 713             code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
 714         in
 715         return (Any IntRep code__2)
 716
 717     {- ------------------------------------------------------------
 718         Comments for int_NE_code also apply to cmpF_code
 719     -}
 720     cmpF_code
 721         :: (Reg -> Reg -> Reg -> Instr)
 722         -> Cond
 723         -> StixTree -> StixTree
 724         -> NatM Register
 725
 726     cmpF_code instr cond x y
 727       = trivialFCode pr instr x y       `thenNat` \ register ->
 728         getNewRegNat F64                `thenNat` \ tmp ->
 729         getBlockIdNat                   `thenNat` \ lbl ->
 730         let
 731             code = registerCode register tmp
 732             result  = registerName register tmp
 733
 734             code__2 dst = code . mkSeqInstrs [
 735                 OR zeroh (RIImm (ImmInt 1)) dst,
 736                 BF cond  result (ImmCLbl lbl),
 737                 OR zeroh (RIReg zeroh) dst,
 738                 NEWBLOCK lbl]
 739         in
 740         return (Any IntRep code__2)
 741       where
 742         pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
 743       ------------------------------------------------------------
 744
 745 getRegister (CmmLoad pk mem)
 746   = getAmode mem                    `thenNat` \ amode ->
 747     let
 748         code = amodeCode amode
 749         src   = amodeAddr amode
 750         size = primRepToSize pk
 751         code__2 dst = code . mkSeqInstr (LD size dst src)
 752     in
 753     return (Any pk code__2)
 754
 755 getRegister (StInt i)
 756   | fits8Bits i
 757   = let
 758         code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
 759     in
 760     return (Any IntRep code)
 761   | otherwise
 762   = let
 763         code dst = mkSeqInstr (LDI Q dst src)
 764     in
 765     return (Any IntRep code)
 766   where
 767     src = ImmInt (fromInteger i)
 768
 769 getRegister leaf
 770   | isJust imm
 771   = let
 772         code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
 773     in
 774     return (Any PtrRep code)
 775   where
 776     imm = maybeImm leaf
 777     imm__2 = case imm of Just x -> x
 778
 779 #endif /* alpha_TARGET_ARCH */
 780
 781 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 782
 783 #if i386_TARGET_ARCH
 784
 785 getRegister (CmmLit (CmmFloat f F32)) = do
 786     lbl <- getNewLabelNat
 787     dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
 788     Amode addr addr_code <- getAmode dynRef
 789     let code dst =
 790             LDATA ReadOnlyData
 791                         [CmmDataLabel lbl,
 792                          CmmStaticLit (CmmFloat f F32)]
 793             `consOL` (addr_code `snocOL`
 794             GLD F32 addr dst)
 795     -- in
 796     return (Any F32 code)
 797
 798
 799 getRegister (CmmLit (CmmFloat d F64))
 800   | d == 0.0
 801   = let code dst = unitOL (GLDZ dst)
 802     in  return (Any F64 code)
 803
 804   | d == 1.0
 805   = let code dst = unitOL (GLD1 dst)
 806     in  return (Any F64 code)
 807
 808   | otherwise = do
 809     lbl <- getNewLabelNat
 810     dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
 811     Amode addr addr_code <- getAmode dynRef
 812     let code dst =
 813             LDATA ReadOnlyData
 814                         [CmmDataLabel lbl,
 815                          CmmStaticLit (CmmFloat d F64)]
 816             `consOL` (addr_code `snocOL`
 817             GLD F64 addr dst)
 818     -- in
 819     return (Any F64 code)
 820
 821 #endif /* i386_TARGET_ARCH */
 822
 823 #if x86_64_TARGET_ARCH
 824
 825 getRegister (CmmLit (CmmFloat 0.0 rep)) = do
 826    let code dst = unitOL  (XOR rep (OpReg dst) (OpReg dst))
 827         -- I don't know why there are xorpd, xorps, and pxor instructions.
 828         -- They all appear to do the same thing --SDM
 829    return (Any rep code)
 830
 831 getRegister (CmmLit (CmmFloat f rep)) = do
 832     lbl <- getNewLabelNat
 833     let code dst = toOL [
 834             LDATA ReadOnlyData
 835                         [CmmDataLabel lbl,
 836                          CmmStaticLit (CmmFloat f rep)],
 837             MOV rep (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 838             ]
 839     -- in
 840     return (Any rep code)
 841
 842 #endif /* x86_64_TARGET_ARCH */
 843
 844 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
 845
 846 -- catch simple cases of zero- or sign-extended load
 847 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad addr _]) = do
 848   code <- intLoadCode (MOVZxL I8) addr
 849   return (Any I32 code)
 850
 851 getRegister (CmmMachOp (MO_S_Conv I8 I32) [CmmLoad addr _]) = do
 852   code <- intLoadCode (MOVSxL I8) addr
 853   return (Any I32 code)
 854
 855 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad addr _]) = do
 856   code <- intLoadCode (MOVZxL I16) addr
 857   return (Any I32 code)
 858
 859 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad addr _]) = do
 860   code <- intLoadCode (MOVSxL I16) addr
 861   return (Any I32 code)
 862
 863 #endif
 864
 865 #if x86_64_TARGET_ARCH
 866
 867 -- catch simple cases of zero- or sign-extended load
 868 getRegister (CmmMachOp (MO_U_Conv I8 I64) [CmmLoad addr _]) = do
 869   code <- intLoadCode (MOVZxL I8) addr
 870   return (Any I64 code)
 871
 872 getRegister (CmmMachOp (MO_S_Conv I8 I64) [CmmLoad addr _]) = do
 873   code <- intLoadCode (MOVSxL I8) addr
 874   return (Any I64 code)
 875
 876 getRegister (CmmMachOp (MO_U_Conv I16 I64) [CmmLoad addr _]) = do
 877   code <- intLoadCode (MOVZxL I16) addr
 878   return (Any I64 code)
 879
 880 getRegister (CmmMachOp (MO_S_Conv I16 I64) [CmmLoad addr _]) = do
 881   code <- intLoadCode (MOVSxL I16) addr
 882   return (Any I64 code)
 883
 884 getRegister (CmmMachOp (MO_U_Conv I32 I64) [CmmLoad addr _]) = do
 885   code <- intLoadCode (MOV I32) addr -- 32-bit loads zero-extend
 886   return (Any I64 code)
 887
 888 getRegister (CmmMachOp (MO_S_Conv I32 I64) [CmmLoad addr _]) = do
 889   code <- intLoadCode (MOVSxL I32) addr
 890   return (Any I64 code)
 891
 892 #endif
 893
 894 #if x86_64_TARGET_ARCH
 895 getRegister (CmmMachOp (MO_Add I64) [CmmReg (CmmGlobal PicBaseReg),
 896                                      CmmLit displacement])
 897     = return $ Any I64 (\dst -> unitOL $
 898         LEA I64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
 899 #endif
 900
 901 #if x86_64_TARGET_ARCH
 902 getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do
 903   x_code <- getAnyReg x
 904   lbl <- getNewLabelNat
 905   let
 906     code dst = x_code dst `appOL` toOL [
 907         -- This is how gcc does it, so it can't be that bad:
 908         LDATA ReadOnlyData16 [
 909                 CmmAlign 16,
 910                 CmmDataLabel lbl,
 911                 CmmStaticLit (CmmInt 0x80000000 I32),
 912                 CmmStaticLit (CmmInt 0 I32),
 913                 CmmStaticLit (CmmInt 0 I32),
 914                 CmmStaticLit (CmmInt 0 I32)
 915         ],
 916         XOR F32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 917                 -- xorps, so we need the 128-bit constant
 918                 -- ToDo: rip-relative
 919         ]
 920   --
 921   return (Any F32 code)
 922
 923 getRegister (CmmMachOp (MO_S_Neg F64) [x]) = do
 924   x_code <- getAnyReg x
 925   lbl <- getNewLabelNat
 926   let
 927         -- This is how gcc does it, so it can't be that bad:
 928     code dst = x_code dst `appOL` toOL [
 929         LDATA ReadOnlyData16 [
 930                 CmmAlign 16,
 931                 CmmDataLabel lbl,
 932                 CmmStaticLit (CmmInt 0x8000000000000000 I64),
 933                 CmmStaticLit (CmmInt 0 I64)
 934         ],
 935                 -- gcc puts an unpck here.  Wonder if we need it.
 936         XOR F64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 937                 -- xorpd, so we need the 128-bit constant
 938         ]
 939   --
 940   return (Any F64 code)
 941 #endif
 942
 943 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
 944
 945 getRegister (CmmMachOp mop [x]) -- unary MachOps
 946   = case mop of
 947 #if i386_TARGET_ARCH
 948       MO_S_Neg F32 -> trivialUFCode F32 (GNEG F32) x
 949       MO_S_Neg F64 -> trivialUFCode F64 (GNEG F64) x
 950 #endif
 951
 952       MO_S_Neg rep -> trivialUCode rep (NEGI rep) x
 953       MO_Not rep   -> trivialUCode rep (NOT  rep) x
 954
 955       -- Nop conversions
 956       MO_U_Conv I32 I8  -> toI8Reg  I32 x
 957       MO_S_Conv I32 I8  -> toI8Reg  I32 x
 958       MO_U_Conv I16 I8  -> toI8Reg  I16 x
 959       MO_S_Conv I16 I8  -> toI8Reg  I16 x
 960       MO_U_Conv I32 I16 -> toI16Reg I32 x
 961       MO_S_Conv I32 I16 -> toI16Reg I32 x
 962 #if x86_64_TARGET_ARCH
 963       MO_U_Conv I64 I32 -> conversionNop I64 x
 964       MO_S_Conv I64 I32 -> conversionNop I64 x
 965       MO_U_Conv I64 I16 -> toI16Reg I64 x
 966       MO_S_Conv I64 I16 -> toI16Reg I64 x
 967       MO_U_Conv I64 I8  -> toI8Reg  I64 x
 968       MO_S_Conv I64 I8  -> toI8Reg  I64 x
 969 #endif
 970
 971       MO_U_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
 972       MO_S_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
 973
 974       -- widenings
 975       MO_U_Conv I8  I32 -> integerExtend I8  I32 MOVZxL x
 976       MO_U_Conv I16 I32 -> integerExtend I16 I32 MOVZxL x
 977       MO_U_Conv I8  I16 -> integerExtend I8  I16 MOVZxL x
 978
 979       MO_S_Conv I8  I32 -> integerExtend I8  I32 MOVSxL x
 980       MO_S_Conv I16 I32 -> integerExtend I16 I32 MOVSxL x
 981       MO_S_Conv I8  I16 -> integerExtend I8  I16 MOVSxL x
 982
 983 #if x86_64_TARGET_ARCH
 984       MO_U_Conv I8  I64 -> integerExtend I8  I64 MOVZxL x
 985       MO_U_Conv I16 I64 -> integerExtend I16 I64 MOVZxL x
 986       MO_U_Conv I32 I64 -> integerExtend I32 I64 MOVZxL x
 987       MO_S_Conv I8  I64 -> integerExtend I8  I64 MOVSxL x
 988       MO_S_Conv I16 I64 -> integerExtend I16 I64 MOVSxL x
 989       MO_S_Conv I32 I64 -> integerExtend I32 I64 MOVSxL x
 990         -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
 991         -- However, we don't want the register allocator to throw it
 992         -- away as an unnecessary reg-to-reg move, so we keep it in
 993         -- the form of a movzl and print it as a movl later.
 994 #endif
 995
 996 #if i386_TARGET_ARCH
 997       MO_S_Conv F32 F64 -> conversionNop F64 x
 998       MO_S_Conv F64 F32 -> conversionNop F32 x
 999 #else
1000       MO_S_Conv F32 F64 -> coerceFP2FP F64 x
1001       MO_S_Conv F64 F32 -> coerceFP2FP F32 x
1002 #endif
1003
1004       MO_S_Conv from to
1005         | isFloatingRep from -> coerceFP2Int from to x
1006         | isFloatingRep to   -> coerceInt2FP from to x
1007
1008       other -> pprPanic "getRegister" (pprMachOp mop)
1009    where
1010         -- signed or unsigned extension.
1011         integerExtend from to instr expr = do
1012             (reg,e_code) <- if from == I8 then getByteReg expr
1013                                           else getSomeReg expr
1014             let
1015                 code dst =
1016                   e_code `snocOL`
1017                   instr from (OpReg reg) (OpReg dst)
1018             return (Any to code)
1019
1020         toI8Reg new_rep expr
1021             = do codefn <- getAnyReg expr
1022                  return (Any new_rep codefn)
1023                 -- HACK: use getAnyReg to get a byte-addressable register.
1024                 -- If the source was a Fixed register, this will add the
1025                 -- mov instruction to put it into the desired destination.
1026                 -- We're assuming that the destination won't be a fixed
1027                 -- non-byte-addressable register; it won't be, because all
1028                 -- fixed registers are word-sized.
1029
1030         toI16Reg = toI8Reg -- for now
1031
1032         conversionNop new_rep expr
1033             = do e_code <- getRegister expr
1034                  return (swizzleRegisterRep e_code new_rep)
1035
1036
1037 getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
1038   = ASSERT2(cmmExprRep x /= I8, pprExpr e)
1039     case mop of
1040       MO_Eq F32   -> condFltReg EQQ x y
1041       MO_Ne F32   -> condFltReg NE x y
1042       MO_S_Gt F32 -> condFltReg GTT x y
1043       MO_S_Ge F32 -> condFltReg GE x y
1044       MO_S_Lt F32 -> condFltReg LTT x y
1045       MO_S_Le F32 -> condFltReg LE x y
1046
1047       MO_Eq F64   -> condFltReg EQQ x y
1048       MO_Ne F64   -> condFltReg NE x y
1049       MO_S_Gt F64 -> condFltReg GTT x y
1050       MO_S_Ge F64 -> condFltReg GE x y
1051       MO_S_Lt F64 -> condFltReg LTT x y
1052       MO_S_Le F64 -> condFltReg LE x y
1053
1054       MO_Eq rep   -> condIntReg EQQ x y
1055       MO_Ne rep   -> condIntReg NE x y
1056
1057       MO_S_Gt rep -> condIntReg GTT x y
1058       MO_S_Ge rep -> condIntReg GE x y
1059       MO_S_Lt rep -> condIntReg LTT x y
1060       MO_S_Le rep -> condIntReg LE x y
1061
1062       MO_U_Gt rep -> condIntReg GU  x y
1063       MO_U_Ge rep -> condIntReg GEU x y
1064       MO_U_Lt rep -> condIntReg LU  x y
1065       MO_U_Le rep -> condIntReg LEU x y
1066
1067 #if i386_TARGET_ARCH
1068       MO_Add F32 -> trivialFCode F32 GADD x y
1069       MO_Sub F32 -> trivialFCode F32 GSUB x y
1070
1071       MO_Add F64 -> trivialFCode F64 GADD x y
1072       MO_Sub F64 -> trivialFCode F64 GSUB x y
1073
1074       MO_S_Quot F32 -> trivialFCode F32 GDIV x y
1075       MO_S_Quot F64 -> trivialFCode F64 GDIV x y
1076 #endif
1077
1078 #if x86_64_TARGET_ARCH
1079       MO_Add F32 -> trivialFCode F32 ADD x y
1080       MO_Sub F32 -> trivialFCode F32 SUB x y
1081
1082       MO_Add F64 -> trivialFCode F64 ADD x y
1083       MO_Sub F64 -> trivialFCode F64 SUB x y
1084
1085       MO_S_Quot F32 -> trivialFCode F32 FDIV x y
1086       MO_S_Quot F64 -> trivialFCode F64 FDIV x y
1087 #endif
1088
1089       MO_Add rep -> add_code rep x y
1090       MO_Sub rep -> sub_code rep x y
1091
1092       MO_S_Quot rep -> div_code rep True  True  x y
1093       MO_S_Rem  rep -> div_code rep True  False x y
1094       MO_U_Quot rep -> div_code rep False True  x y
1095       MO_U_Rem  rep -> div_code rep False False x y
1096
1097 #if i386_TARGET_ARCH
1098       MO_Mul   F32 -> trivialFCode F32 GMUL x y
1099       MO_Mul   F64 -> trivialFCode F64 GMUL x y
1100 #endif
1101
1102 #if x86_64_TARGET_ARCH
1103       MO_Mul   F32 -> trivialFCode F32 MUL x y
1104       MO_Mul   F64 -> trivialFCode F64 MUL x y
1105 #endif
1106
1107       MO_Mul   rep -> let op = IMUL rep in
1108                       trivialCode rep op (Just op) x y
1109
1110       MO_S_MulMayOflo rep -> imulMayOflo rep x y
1111
1112       MO_And rep -> let op = AND rep in
1113                     trivialCode rep op (Just op) x y
1114       MO_Or  rep -> let op = OR  rep in
1115                     trivialCode rep op (Just op) x y
1116       MO_Xor rep -> let op = XOR rep in
1117                     trivialCode rep op (Just op) x y
1118
1119         {- Shift ops on x86s have constraints on their source, it
1120            either has to be Imm, CL or 1
1121             => trivialCode is not restrictive enough (sigh.)
1122         -}
1123       MO_Shl rep   -> shift_code rep (SHL rep) x y {-False-}
1124       MO_U_Shr rep -> shift_code rep (SHR rep) x y {-False-}
1125       MO_S_Shr rep -> shift_code rep (SAR rep) x y {-False-}
1126
1127       other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
1128   where
1129     --------------------
1130     imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1131     imulMayOflo rep a b = do
1132          (a_reg, a_code) <- getNonClobberedReg a
1133          b_code <- getAnyReg b
1134          let
1135              shift_amt  = case rep of
1136                            I32 -> 31
1137                            I64 -> 63
1138                            _ -> panic "shift_amt"
1139
1140              code = a_code `appOL` b_code eax `appOL`
1141                         toOL [
1142                            IMUL2 rep (OpReg a_reg),   -- result in %edx:%eax
1143                            SAR rep (OpImm (ImmInt shift_amt)) (OpReg eax),
1144                                 -- sign extend lower part
1145                            SUB rep (OpReg edx) (OpReg eax)
1146                                 -- compare against upper
1147                            -- eax==0 if high part == sign extended low part
1148                         ]
1149          -- in
1150          return (Fixed rep eax code)
1151
1152     --------------------
1153     shift_code :: MachRep
1154                -> (Operand -> Operand -> Instr)
1155                -> CmmExpr
1156                -> CmmExpr
1157                -> NatM Register
1158
1159     {- Case1: shift length as immediate -}
1160     shift_code rep instr x y@(CmmLit lit) = do
1161           x_code <- getAnyReg x
1162           let
1163                code dst
1164                   = x_code dst `snocOL`
1165                     instr (OpImm (litToImm lit)) (OpReg dst)
1166           -- in
1167           return (Any rep code)
1168
1169     {- Case2: shift length is complex (non-immediate)
1170       * y must go in %ecx.
1171       * we cannot do y first *and* put its result in %ecx, because
1172         %ecx might be clobbered by x.
1173       * if we do y second, then x cannot be
1174         in a clobbered reg.  Also, we cannot clobber x's reg
1175         with the instruction itself.
1176       * so we can either:
1177         - do y first, put its result in a fresh tmp, then copy it to %ecx later
1178         - do y second and put its result into %ecx.  x gets placed in a fresh
1179           tmp.  This is likely to be better, becuase the reg alloc can
1180           eliminate this reg->reg move here (it won't eliminate the other one,
1181           because the move is into the fixed %ecx).
1182     -}
1183     shift_code rep instr x y{-amount-} = do
1184         x_code <- getAnyReg x
1185         tmp <- getNewRegNat rep
1186         y_code <- getAnyReg y
1187         let
1188            code = x_code tmp `appOL`
1189                   y_code ecx `snocOL`
1190                   instr (OpReg ecx) (OpReg tmp)
1191         -- in
1192         return (Fixed rep tmp code)
1193
1194     --------------------
1195     add_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1196     add_code rep x (CmmLit (CmmInt y _))
1197         | not (is64BitInteger y) = add_int rep x y
1198     add_code rep x y = trivialCode rep (ADD rep) (Just (ADD rep)) x y
1199
1200     --------------------
1201     sub_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1202     sub_code rep x (CmmLit (CmmInt y _))
1203         | not (is64BitInteger (-y)) = add_int rep x (-y)
1204     sub_code rep x y = trivialCode rep (SUB rep) Nothing x y
1205
1206     -- our three-operand add instruction:
1207     add_int rep x y = do
1208         (x_reg, x_code) <- getSomeReg x
1209         let
1210             imm = ImmInt (fromInteger y)
1211             code dst
1212                = x_code `snocOL`
1213                  LEA rep
1214                         (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
1215                         (OpReg dst)
1216         --
1217         return (Any rep code)
1218
1219     ----------------------
1220     div_code rep signed quotient x y = do
1221            (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
1222            x_code <- getAnyReg x
1223            let
1224              widen | signed    = CLTD rep
1225                    | otherwise = XOR rep (OpReg edx) (OpReg edx)
1226
1227              instr | signed    = IDIV
1228                    | otherwise = DIV
1229
1230              code = y_code `appOL`
1231                     x_code eax `appOL`
1232                     toOL [widen, instr rep y_op]
1233
1234              result | quotient  = eax
1235                     | otherwise = edx
1236
1237            -- in
1238            return (Fixed rep result code)
1239
1240
1241 getRegister (CmmLoad mem pk)
1242   | isFloatingRep pk
1243   = do
1244     Amode src mem_code <- getAmode mem
1245     let
1246         code dst = mem_code `snocOL`
1247                    IF_ARCH_i386(GLD pk src dst,
1248                                 MOV pk (OpAddr src) (OpReg dst))
1249     --
1250     return (Any pk code)
1251
1252 #if i386_TARGET_ARCH
1253 getRegister (CmmLoad mem pk)
1254   | pk /= I64
1255   = do
1256     code <- intLoadCode (instr pk) mem
1257     return (Any pk code)
1258   where
1259         instr I8  = MOVZxL pk
1260         instr I16 = MOV I16
1261         instr I32 = MOV I32
1262         -- we always zero-extend 8-bit loads, if we
1263         -- can't think of anything better.  This is because
1264         -- we can't guarantee access to an 8-bit variant of every register
1265         -- (esi and edi don't have 8-bit variants), so to make things
1266         -- simpler we do our 8-bit arithmetic with full 32-bit registers.
1267 #endif
1268
1269 #if x86_64_TARGET_ARCH
1270 -- Simpler memory load code on x86_64
1271 getRegister (CmmLoad mem pk)
1272   = do
1273     code <- intLoadCode (MOV pk) mem
1274     return (Any pk code)
1275 #endif
1276
1277 getRegister (CmmLit (CmmInt 0 rep))
1278   = let
1279         -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
1280         adj_rep = case rep of I64 -> I32; _ -> rep
1281         rep1 = IF_ARCH_i386( rep, adj_rep )
1282         code dst
1283            = unitOL (XOR rep1 (OpReg dst) (OpReg dst))
1284     in
1285         return (Any rep code)
1286
1287 #if x86_64_TARGET_ARCH
1288   -- optimisation for loading small literals on x86_64: take advantage
1289   -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
1290   -- instruction forms are shorter.
1291 getRegister (CmmLit lit)
1292   | I64 <- cmmLitRep lit, not (isBigLit lit)
1293   = let
1294         imm = litToImm lit
1295         code dst = unitOL (MOV I32 (OpImm imm) (OpReg dst))
1296     in
1297         return (Any I64 code)
1298   where
1299    isBigLit (CmmInt i I64) = i < 0 || i > 0xffffffff
1300    isBigLit _ = False
1301         -- note1: not the same as is64BitLit, because that checks for
1302         -- signed literals that fit in 32 bits, but we want unsigned
1303         -- literals here.
1304         -- note2: all labels are small, because we're assuming the
1305         -- small memory model (see gcc docs, -mcmodel=small).
1306 #endif
1307
1308 getRegister (CmmLit lit)
1309   = let
1310         rep = cmmLitRep lit
1311         imm = litToImm lit
1312         code dst = unitOL (MOV rep (OpImm imm) (OpReg dst))
1313     in
1314         return (Any rep code)
1315
1316 getRegister other = pprPanic "getRegister(x86)" (ppr other)
1317
1318
1319 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
1320    -> NatM (Reg -> InstrBlock)
1321 intLoadCode instr mem = do
1322   Amode src mem_code <- getAmode mem
1323   return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
1324
1325 -- Compute an expression into *any* register, adding the appropriate
1326 -- move instruction if necessary.
1327 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
1328 getAnyReg expr = do
1329   r <- getRegister expr
1330   anyReg r
1331
1332 anyReg :: Register -> NatM (Reg -> InstrBlock)
1333 anyReg (Any _ code)          = return code
1334 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
1335
1336 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
1337 -- Fixed registers might not be byte-addressable, so we make sure we've
1338 -- got a temporary, inserting an extra reg copy if necessary.
1339 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
1340 #if x86_64_TARGET_ARCH
1341 getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
1342 #else
1343 getByteReg expr = do
1344   r <- getRegister expr
1345   case r of
1346     Any rep code -> do
1347         tmp <- getNewRegNat rep
1348         return (tmp, code tmp)
1349     Fixed rep reg code
1350         | isVirtualReg reg -> return (reg,code)
1351         | otherwise -> do
1352             tmp <- getNewRegNat rep
1353             return (tmp, code `snocOL` reg2reg rep reg tmp)
1354         -- ToDo: could optimise slightly by checking for byte-addressable
1355         -- real registers, but that will happen very rarely if at all.
1356 #endif
1357
1358 -- Another variant: this time we want the result in a register that cannot
1359 -- be modified by code to evaluate an arbitrary expression.
1360 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1361 getNonClobberedReg expr = do
1362   r <- getRegister expr
1363   case r of
1364     Any rep code -> do
1365         tmp <- getNewRegNat rep
1366         return (tmp, code tmp)
1367     Fixed rep reg code
1368         -- only free regs can be clobbered
1369         | RealReg rr <- reg, isFastTrue (freeReg rr) -> do
1370                 tmp <- getNewRegNat rep
1371                 return (tmp, code `snocOL` reg2reg rep reg tmp)
1372         | otherwise ->
1373                 return (reg, code)
1374
1375 reg2reg :: MachRep -> Reg -> Reg -> Instr
1376 reg2reg rep src dst
1377 #if i386_TARGET_ARCH
1378   | isFloatingRep rep = GMOV src dst
1379 #endif
1380   | otherwise         = MOV rep (OpReg src) (OpReg dst)
1381
1382 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1383
1384 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1385
1386 #if sparc_TARGET_ARCH
1387
1388 getRegister (CmmLit (CmmFloat f F32)) = do
1389     lbl <- getNewLabelNat
1390     let code dst = toOL [
1391             LDATA ReadOnlyData
1392                         [CmmDataLabel lbl,
1393                          CmmStaticLit (CmmFloat f F32)],
1394             SETHI (HI (ImmCLbl lbl)) dst,
1395             LD F32 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1396     return (Any F32 code)
1397
1398 getRegister (CmmLit (CmmFloat d F64)) = do
1399     lbl <- getNewLabelNat
1400     let code dst = toOL [
1401             LDATA ReadOnlyData
1402                         [CmmDataLabel lbl,
1403                          CmmStaticLit (CmmFloat d F64)],
1404             SETHI (HI (ImmCLbl lbl)) dst,
1405             LD F64 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1406     return (Any F64 code)
1407
1408 getRegister (CmmMachOp mop [x]) -- unary MachOps
1409   = case mop of
1410       MO_S_Neg F32     -> trivialUFCode F32 (FNEG F32) x
1411       MO_S_Neg F64     -> trivialUFCode F64 (FNEG F64) x
1412
1413       MO_S_Neg rep     -> trivialUCode rep (SUB False False g0) x
1414       MO_Not rep       -> trivialUCode rep (XNOR False g0) x
1415
1416       MO_U_Conv I32 I8 -> trivialCode I8 (AND False) x (CmmLit (CmmInt 255 I8))
1417
1418       MO_U_Conv F64 F32-> coerceDbl2Flt x
1419       MO_U_Conv F32 F64-> coerceFlt2Dbl x
1420
1421       MO_S_Conv F32 I32-> coerceFP2Int F32 I32 x
1422       MO_S_Conv I32 F32-> coerceInt2FP I32 F32 x
1423       MO_S_Conv F64 I32-> coerceFP2Int F64 I32 x
1424       MO_S_Conv I32 F64-> coerceInt2FP I32 F64 x
1425
1426       -- Conversions which are a nop on sparc
1427       MO_U_Conv from to
1428         | from == to   -> conversionNop to   x
1429       MO_U_Conv I32 to -> conversionNop to   x
1430       MO_S_Conv I32 to -> conversionNop to   x
1431
1432       -- widenings
1433       MO_U_Conv I8 I32  -> integerExtend False I8 I32  x
1434       MO_U_Conv I16 I32 -> integerExtend False I16 I32 x
1435       MO_U_Conv I8 I16  -> integerExtend False I8 I16  x
1436       MO_S_Conv I16 I32 -> integerExtend True I16 I32  x
1437
1438       other_op -> panic "Unknown unary mach op"
1439     where
1440         -- XXX SLL/SRL?
1441         integerExtend signed from to expr = do
1442            (reg, e_code) <- getSomeReg expr
1443            let
1444                code dst =
1445                    e_code `snocOL`
1446                    ((if signed then SRA else SRL)
1447                           reg (RIImm (ImmInt 0)) dst)
1448            return (Any to code)
1449         conversionNop new_rep expr
1450             = do e_code <- getRegister expr
1451                  return (swizzleRegisterRep e_code new_rep)
1452
1453 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1454   = case mop of
1455       MO_Eq F32 -> condFltReg EQQ x y
1456       MO_Ne F32 -> condFltReg NE x y
1457
1458       MO_S_Gt F32 -> condFltReg GTT x y
1459       MO_S_Ge F32 -> condFltReg GE x y
1460       MO_S_Lt F32 -> condFltReg LTT x y
1461       MO_S_Le F32 -> condFltReg LE x y
1462
1463       MO_Eq F64 -> condFltReg EQQ x y
1464       MO_Ne F64 -> condFltReg NE x y
1465
1466       MO_S_Gt F64 -> condFltReg GTT x y
1467       MO_S_Ge F64 -> condFltReg GE x y
1468       MO_S_Lt F64 -> condFltReg LTT x y
1469       MO_S_Le F64 -> condFltReg LE x y
1470
1471       MO_Eq rep -> condIntReg EQQ x y
1472       MO_Ne rep -> condIntReg NE x y
1473
1474       MO_S_Gt rep -> condIntReg GTT x y
1475       MO_S_Ge rep -> condIntReg GE x y
1476       MO_S_Lt rep -> condIntReg LTT x y
1477       MO_S_Le rep -> condIntReg LE x y
1478
1479       MO_U_Gt I32  -> condIntReg GTT x y
1480       MO_U_Ge I32  -> condIntReg GE x y
1481       MO_U_Lt I32  -> condIntReg LTT x y
1482       MO_U_Le I32  -> condIntReg LE x y
1483
1484       MO_U_Gt I16 -> condIntReg GU  x y
1485       MO_U_Ge I16 -> condIntReg GEU x y
1486       MO_U_Lt I16 -> condIntReg LU  x y
1487       MO_U_Le I16 -> condIntReg LEU x y
1488
1489       MO_Add I32 -> trivialCode I32 (ADD False False) x y
1490       MO_Sub I32 -> trivialCode I32 (SUB False False) x y
1491
1492       MO_S_MulMayOflo rep -> imulMayOflo rep x y
1493 {-
1494       -- ToDo: teach about V8+ SPARC div instructions
1495       MO_S_Quot I32 -> idiv FSLIT(".div")  x y
1496       MO_S_Rem I32  -> idiv FSLIT(".rem")  x y
1497       MO_U_Quot I32 -> idiv FSLIT(".udiv")  x y
1498       MO_U_Rem I32  -> idiv FSLIT(".urem")  x y
1499 -}
1500       MO_Add F32  -> trivialFCode F32 FADD  x y
1501       MO_Sub F32   -> trivialFCode F32  FSUB x y
1502       MO_Mul F32   -> trivialFCode F32  FMUL  x y
1503       MO_S_Quot F32   -> trivialFCode F32  FDIV x y
1504
1505       MO_Add F64   -> trivialFCode F64 FADD  x y
1506       MO_Sub F64   -> trivialFCode F64  FSUB x y
1507       MO_Mul F64   -> trivialFCode F64  FMUL x y
1508       MO_S_Quot F64   -> trivialFCode F64  FDIV x y
1509
1510       MO_And rep   -> trivialCode rep (AND False) x y
1511       MO_Or rep    -> trivialCode rep (OR  False) x y
1512       MO_Xor rep   -> trivialCode rep (XOR False) x y
1513
1514       MO_Mul rep -> trivialCode rep (SMUL False) x y
1515
1516       MO_Shl rep   -> trivialCode rep SLL  x y
1517       MO_U_Shr rep   -> trivialCode rep SRL x y
1518       MO_S_Shr rep   -> trivialCode rep SRA x y
1519
1520 {-
1521       MO_F32_Pwr  -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1522                                          [promote x, promote y])
1523                        where promote x = CmmMachOp MO_F32_to_Dbl [x]
1524       MO_F64_Pwr -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1525                                         [x, y])
1526 -}
1527       other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
1528   where
1529     --idiv fn x y = getRegister (StCall (Left fn) CCallConv I32 [x, y])
1530
1531     --------------------
1532     imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1533     imulMayOflo rep a b = do
1534          (a_reg, a_code) <- getSomeReg a
1535          (b_reg, b_code) <- getSomeReg b
1536          res_lo <- getNewRegNat I32
1537          res_hi <- getNewRegNat I32
1538          let
1539             shift_amt  = case rep of
1540                           I32 -> 31
1541                           I64 -> 63
1542                           _ -> panic "shift_amt"
1543             code dst = a_code `appOL` b_code `appOL`
1544                        toOL [
1545                            SMUL False a_reg (RIReg b_reg) res_lo,
1546                            RDY res_hi,
1547                            SRA res_lo (RIImm (ImmInt shift_amt)) res_lo,
1548                            SUB False False res_lo (RIReg res_hi) dst
1549                         ]
1550          return (Any I32 code)
1551
1552 getRegister (CmmLoad mem pk) = do
1553     Amode src code <- getAmode mem
1554     let
1555         code__2 dst = code `snocOL` LD pk src dst
1556     return (Any pk code__2)
1557
1558 getRegister (CmmLit (CmmInt i _))
1559   | fits13Bits i
1560   = let
1561         src = ImmInt (fromInteger i)
1562         code dst = unitOL (OR False g0 (RIImm src) dst)
1563     in
1564         return (Any I32 code)
1565
1566 getRegister (CmmLit lit)
1567   = let rep = cmmLitRep lit
1568         imm = litToImm lit
1569         code dst = toOL [
1570             SETHI (HI imm) dst,
1571             OR False dst (RIImm (LO imm)) dst]
1572     in return (Any I32 code)
1573
1574 #endif /* sparc_TARGET_ARCH */
1575
1576 #if powerpc_TARGET_ARCH
1577 getRegister (CmmLoad mem pk)
1578   | pk /= I64
1579   = do
1580         Amode addr addr_code <- getAmode mem
1581         let code dst = ASSERT((regClass dst == RcDouble) == isFloatingRep pk)
1582                        addr_code `snocOL` LD pk dst addr
1583         return (Any pk code)
1584
1585 -- catch simple cases of zero- or sign-extended load
1586 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad mem _]) = do
1587     Amode addr addr_code <- getAmode mem
1588     return (Any I32 (\dst -> addr_code `snocOL` LD I8 dst addr))
1589
1590 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
1591
1592 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad mem _]) = do
1593     Amode addr addr_code <- getAmode mem
1594     return (Any I32 (\dst -> addr_code `snocOL` LD I16 dst addr))
1595
1596 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad mem _]) = do
1597     Amode addr addr_code <- getAmode mem
1598     return (Any I32 (\dst -> addr_code `snocOL` LA I16 dst addr))
1599
1600 getRegister (CmmMachOp mop [x]) -- unary MachOps
1601   = case mop of
1602       MO_Not rep   -> trivialUCode rep NOT x
1603
1604       MO_S_Conv F64 F32 -> trivialUCode F32 FRSP x
1605       MO_S_Conv F32 F64 -> conversionNop F64 x
1606
1607       MO_S_Conv from to
1608         | from == to         -> conversionNop to x
1609         | isFloatingRep from -> coerceFP2Int from to x
1610         | isFloatingRep to   -> coerceInt2FP from to x
1611
1612         -- narrowing is a nop: we treat the high bits as undefined
1613       MO_S_Conv I32 to -> conversionNop to x
1614       MO_S_Conv I16 I8 -> conversionNop I8 x
1615       MO_S_Conv I8 to -> trivialUCode to (EXTS I8) x
1616       MO_S_Conv I16 to -> trivialUCode to (EXTS I16) x
1617
1618       MO_U_Conv from to
1619         | from == to -> conversionNop to x
1620         -- narrowing is a nop: we treat the high bits as undefined
1621       MO_U_Conv I32 to -> conversionNop to x
1622       MO_U_Conv I16 I8 -> conversionNop I8 x
1623       MO_U_Conv I8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 I32))
1624       MO_U_Conv I16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 I32))
1625
1626       MO_S_Neg F32      -> trivialUCode F32 FNEG x
1627       MO_S_Neg F64      -> trivialUCode F64 FNEG x
1628       MO_S_Neg rep      -> trivialUCode rep NEG x
1629
1630     where
1631         conversionNop new_rep expr
1632             = do e_code <- getRegister expr
1633                  return (swizzleRegisterRep e_code new_rep)
1634
1635 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1636   = case mop of
1637       MO_Eq F32 -> condFltReg EQQ x y
1638       MO_Ne F32 -> condFltReg NE  x y
1639
1640       MO_S_Gt F32 -> condFltReg GTT x y
1641       MO_S_Ge F32 -> condFltReg GE  x y
1642       MO_S_Lt F32 -> condFltReg LTT x y
1643       MO_S_Le F32 -> condFltReg LE  x y
1644
1645       MO_Eq F64 -> condFltReg EQQ x y
1646       MO_Ne F64 -> condFltReg NE  x y
1647
1648       MO_S_Gt F64 -> condFltReg GTT x y
1649       MO_S_Ge F64 -> condFltReg GE  x y
1650       MO_S_Lt F64 -> condFltReg LTT x y
1651       MO_S_Le F64 -> condFltReg LE  x y
1652
1653       MO_Eq rep -> condIntReg EQQ  (extendUExpr rep x) (extendUExpr rep y)
1654       MO_Ne rep -> condIntReg NE   (extendUExpr rep x) (extendUExpr rep y)
1655
1656       MO_S_Gt rep -> condIntReg GTT  (extendSExpr rep x) (extendSExpr rep y)
1657       MO_S_Ge rep -> condIntReg GE   (extendSExpr rep x) (extendSExpr rep y)
1658       MO_S_Lt rep -> condIntReg LTT  (extendSExpr rep x) (extendSExpr rep y)
1659       MO_S_Le rep -> condIntReg LE   (extendSExpr rep x) (extendSExpr rep y)
1660
1661       MO_U_Gt rep -> condIntReg GU   (extendUExpr rep x) (extendUExpr rep y)
1662       MO_U_Ge rep -> condIntReg GEU  (extendUExpr rep x) (extendUExpr rep y)
1663       MO_U_Lt rep -> condIntReg LU   (extendUExpr rep x) (extendUExpr rep y)
1664       MO_U_Le rep -> condIntReg LEU  (extendUExpr rep x) (extendUExpr rep y)
1665
1666       MO_Add F32   -> trivialCodeNoImm F32 (FADD F32) x y
1667       MO_Sub F32   -> trivialCodeNoImm F32 (FSUB F32) x y
1668       MO_Mul F32   -> trivialCodeNoImm F32 (FMUL F32) x y
1669       MO_S_Quot F32   -> trivialCodeNoImm F32 (FDIV F32) x y
1670
1671       MO_Add F64   -> trivialCodeNoImm F64 (FADD F64) x y
1672       MO_Sub F64   -> trivialCodeNoImm F64 (FSUB F64) x y
1673       MO_Mul F64   -> trivialCodeNoImm F64 (FMUL F64) x y
1674       MO_S_Quot F64   -> trivialCodeNoImm F64 (FDIV F64) x y
1675
1676          -- optimize addition with 32-bit immediate
1677          -- (needed for PIC)
1678       MO_Add I32 ->
1679         case y of
1680           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate I32 True (-imm)
1681             -> trivialCode I32 True ADD x (CmmLit $ CmmInt imm immrep)
1682           CmmLit lit
1683             -> do
1684                 (src, srcCode) <- getSomeReg x
1685                 let imm = litToImm lit
1686                     code dst = srcCode `appOL` toOL [
1687                                     ADDIS dst src (HA imm),
1688                                     ADD dst dst (RIImm (LO imm))
1689                                 ]
1690                 return (Any I32 code)
1691           _ -> trivialCode I32 True ADD x y
1692
1693       MO_Add rep -> trivialCode rep True ADD x y
1694       MO_Sub rep ->
1695         case y of    -- subfi ('substract from' with immediate) doesn't exist
1696           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
1697             -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
1698           _ -> trivialCodeNoImm rep SUBF y x
1699
1700       MO_Mul rep -> trivialCode rep True MULLW x y
1701
1702       MO_S_MulMayOflo I32 -> trivialCodeNoImm I32 MULLW_MayOflo x y
1703
1704       MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= I32): not implemented"
1705       MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
1706
1707       MO_S_Quot rep -> trivialCodeNoImm rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1708       MO_U_Quot rep -> trivialCodeNoImm rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1709
1710       MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1711       MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1712
1713       MO_And rep   -> trivialCode rep False AND x y
1714       MO_Or rep    -> trivialCode rep False OR x y
1715       MO_Xor rep   -> trivialCode rep False XOR x y
1716
1717       MO_Shl rep   -> trivialCode rep False SLW x y
1718       MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
1719       MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
1720
1721 getRegister (CmmLit (CmmInt i rep))
1722   | Just imm <- makeImmediate rep True i
1723   = let
1724         code dst = unitOL (LI dst imm)
1725     in
1726         return (Any rep code)
1727
1728 getRegister (CmmLit (CmmFloat f frep)) = do
1729     lbl <- getNewLabelNat
1730     dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
1731     Amode addr addr_code <- getAmode dynRef
1732     let code dst =
1733             LDATA ReadOnlyData  [CmmDataLabel lbl,
1734                                  CmmStaticLit (CmmFloat f frep)]
1735             `consOL` (addr_code `snocOL` LD frep dst addr)
1736     return (Any frep code)
1737
1738 getRegister (CmmLit lit)
1739   = let rep = cmmLitRep lit
1740         imm = litToImm lit
1741         code dst = toOL [
1742               LIS dst (HI imm),
1743               OR dst dst (RIImm (LO imm))
1744           ]
1745     in return (Any rep code)
1746
1747 getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
1748
1749     -- extend?Rep: wrap integer expression of type rep
1750     -- in a conversion to I32
1751 extendSExpr I32 x = x
1752 extendSExpr rep x = CmmMachOp (MO_S_Conv rep I32) [x]
1753 extendUExpr I32 x = x
1754 extendUExpr rep x = CmmMachOp (MO_U_Conv rep I32) [x]
1755
1756 #endif /* powerpc_TARGET_ARCH */
1757
1758
1759 -- -----------------------------------------------------------------------------
1760 --  The 'Amode' type: Memory addressing modes passed up the tree.
1761
1762 data Amode = Amode AddrMode InstrBlock
1763
1764 {-
1765 Now, given a tree (the argument to an CmmLoad) that references memory,
1766 produce a suitable addressing mode.
1767
1768 A Rule of the Game (tm) for Amodes: use of the addr bit must
1769 immediately follow use of the code part, since the code part puts
1770 values in registers which the addr then refers to.  So you can't put
1771 anything in between, lest it overwrite some of those registers.  If
1772 you need to do some other computation between the code part and use of
1773 the addr bit, first store the effective address from the amode in a
1774 temporary, then do the other computation, and then use the temporary:
1775
1776     code
1777     LEA amode, tmp
1778     ... other computation ...
1779     ... (tmp) ...
1780 -}
1781
1782 getAmode :: CmmExpr -> NatM Amode
1783 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
1784
1785 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1786
1787 #if alpha_TARGET_ARCH
1788
1789 getAmode (StPrim IntSubOp [x, StInt i])
1790   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1791     getRegister x               `thenNat` \ register ->
1792     let
1793         code = registerCode register tmp
1794         reg  = registerName register tmp
1795         off  = ImmInt (-(fromInteger i))
1796     in
1797     return (Amode (AddrRegImm reg off) code)
1798
1799 getAmode (StPrim IntAddOp [x, StInt i])
1800   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1801     getRegister x               `thenNat` \ register ->
1802     let
1803         code = registerCode register tmp
1804         reg  = registerName register tmp
1805         off  = ImmInt (fromInteger i)
1806     in
1807     return (Amode (AddrRegImm reg off) code)
1808
1809 getAmode leaf
1810   | isJust imm
1811   = return (Amode (AddrImm imm__2) id)
1812   where
1813     imm = maybeImm leaf
1814     imm__2 = case imm of Just x -> x
1815
1816 getAmode other
1817   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1818     getRegister other           `thenNat` \ register ->
1819     let
1820         code = registerCode register tmp
1821         reg  = registerName register tmp
1822     in
1823     return (Amode (AddrReg reg) code)
1824
1825 #endif /* alpha_TARGET_ARCH */
1826
1827 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1828
1829 #if x86_64_TARGET_ARCH
1830
1831 getAmode (CmmMachOp (MO_Add I64) [CmmReg (CmmGlobal PicBaseReg),
1832                                      CmmLit displacement])
1833     = return $ Amode (ripRel (litToImm displacement)) nilOL
1834
1835 #endif
1836
1837 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1838
1839 -- This is all just ridiculous, since it carefully undoes
1840 -- what mangleIndexTree has just done.
1841 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
1842   | not (is64BitLit lit)
1843   -- ASSERT(rep == I32)???
1844   = do (x_reg, x_code) <- getSomeReg x
1845        let off = ImmInt (-(fromInteger i))
1846        return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1847
1848 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
1849   | not (is64BitLit lit)
1850   -- ASSERT(rep == I32)???
1851   = do (x_reg, x_code) <- getSomeReg x
1852        let off = ImmInt (fromInteger i)
1853        return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1854
1855 -- Turn (lit1 << n  + lit2) into  (lit2 + lit1 << n) so it will be
1856 -- recognised by the next rule.
1857 getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1858                                   b@(CmmLit _)])
1859   = getAmode (CmmMachOp (MO_Add rep) [b,a])
1860
1861 getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
1862                                         [y, CmmLit (CmmInt shift _)]])
1863   | shift == 0 || shift == 1 || shift == 2 || shift == 3
1864   = x86_complex_amode x y shift 0
1865
1866 getAmode (CmmMachOp (MO_Add rep)
1867                 [x, CmmMachOp (MO_Add _)
1868                         [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)],
1869                          CmmLit (CmmInt offset _)]])
1870   | shift == 0 || shift == 1 || shift == 2 || shift == 3
1871   && not (is64BitInteger offset)
1872   = x86_complex_amode x y shift offset
1873
1874 getAmode (CmmMachOp (MO_Add rep) [x,y])
1875   = x86_complex_amode x y 0 0
1876
1877 getAmode (CmmLit lit) | not (is64BitLit lit)
1878   = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1879
1880 getAmode expr = do
1881   (reg,code) <- getSomeReg expr
1882   return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
1883
1884
1885 x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
1886 x86_complex_amode base index shift offset
1887   = do (x_reg, x_code) <- getNonClobberedReg base
1888         -- x must be in a temp, because it has to stay live over y_code
1889         -- we could compre x_reg and y_reg and do something better here...
1890        (y_reg, y_code) <- getSomeReg index
1891        let
1892            code = x_code `appOL` y_code
1893            base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
1894        return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset)))
1895                code)
1896
1897 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1898
1899 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1900
1901 #if sparc_TARGET_ARCH
1902
1903 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)])
1904   | fits13Bits (-i)
1905   = do
1906        (reg, code) <- getSomeReg x
1907        let
1908          off  = ImmInt (-(fromInteger i))
1909        return (Amode (AddrRegImm reg off) code)
1910
1911
1912 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)])
1913   | fits13Bits i
1914   = do
1915        (reg, code) <- getSomeReg x
1916        let
1917          off  = ImmInt (fromInteger i)
1918        return (Amode (AddrRegImm reg off) code)
1919
1920 getAmode (CmmMachOp (MO_Add rep) [x, y])
1921   = do
1922     (regX, codeX) <- getSomeReg x
1923     (regY, codeY) <- getSomeReg y
1924     let
1925         code = codeX `appOL` codeY
1926     return (Amode (AddrRegReg regX regY) code)
1927
1928 -- XXX Is this same as "leaf" in Stix?
1929 getAmode (CmmLit lit)
1930   = do
1931       tmp <- getNewRegNat I32
1932       let
1933         code = unitOL (SETHI (HI imm__2) tmp)
1934       return (Amode (AddrRegImm tmp (LO imm__2)) code)
1935       where
1936          imm__2 = litToImm lit
1937
1938 getAmode other
1939   = do
1940        (reg, code) <- getSomeReg other
1941        let
1942             off  = ImmInt 0
1943        return (Amode (AddrRegImm reg off) code)
1944
1945 #endif /* sparc_TARGET_ARCH */
1946
1947 #ifdef powerpc_TARGET_ARCH
1948 getAmode (CmmMachOp (MO_Sub I32) [x, CmmLit (CmmInt i _)])
1949   | Just off <- makeImmediate I32 True (-i)
1950   = do
1951         (reg, code) <- getSomeReg x
1952         return (Amode (AddrRegImm reg off) code)
1953
1954
1955 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit (CmmInt i _)])
1956   | Just off <- makeImmediate I32 True i
1957   = do
1958         (reg, code) <- getSomeReg x
1959         return (Amode (AddrRegImm reg off) code)
1960
1961    -- optimize addition with 32-bit immediate
1962    -- (needed for PIC)
1963 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit lit])
1964   = do
1965         tmp <- getNewRegNat I32
1966         (src, srcCode) <- getSomeReg x
1967         let imm = litToImm lit
1968             code = srcCode `snocOL` ADDIS tmp src (HA imm)
1969         return (Amode (AddrRegImm tmp (LO imm)) code)
1970
1971 getAmode (CmmLit lit)
1972   = do
1973         tmp <- getNewRegNat I32
1974         let imm = litToImm lit
1975             code = unitOL (LIS tmp (HA imm))
1976         return (Amode (AddrRegImm tmp (LO imm)) code)
1977
1978 getAmode (CmmMachOp (MO_Add I32) [x, y])
1979   = do
1980         (regX, codeX) <- getSomeReg x
1981         (regY, codeY) <- getSomeReg y
1982         return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
1983
1984 getAmode other
1985   = do
1986         (reg, code) <- getSomeReg other
1987         let
1988             off  = ImmInt 0
1989         return (Amode (AddrRegImm reg off) code)
1990 #endif /* powerpc_TARGET_ARCH */
1991
1992 -- -----------------------------------------------------------------------------
1993 -- getOperand: sometimes any operand will do.
1994
1995 -- getNonClobberedOperand: the value of the operand will remain valid across
1996 -- the computation of an arbitrary expression, unless the expression
1997 -- is computed directly into a register which the operand refers to
1998 -- (see trivialCode where this function is used for an example).
1999
2000 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2001
2002 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2003 #if x86_64_TARGET_ARCH
2004 getNonClobberedOperand (CmmLit lit)
2005   | isSuitableFloatingPointLit lit = do
2006     lbl <- getNewLabelNat
2007     let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
2008                                            CmmStaticLit lit])
2009     return (OpAddr (ripRel (ImmCLbl lbl)), code)
2010 #endif
2011 getNonClobberedOperand (CmmLit lit)
2012   | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) =
2013     return (OpImm (litToImm lit), nilOL)
2014 getNonClobberedOperand (CmmLoad mem pk)
2015   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2016     Amode src mem_code <- getAmode mem
2017     (src',save_code) <-
2018         if (amodeCouldBeClobbered src)
2019                 then do
2020                    tmp <- getNewRegNat wordRep
2021                    return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
2022                            unitOL (LEA I32 (OpAddr src) (OpReg tmp)))
2023                 else
2024                    return (src, nilOL)
2025     return (OpAddr src', save_code `appOL` mem_code)
2026 getNonClobberedOperand e = do
2027     (reg, code) <- getNonClobberedReg e
2028     return (OpReg reg, code)
2029
2030 amodeCouldBeClobbered :: AddrMode -> Bool
2031 amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
2032
2033 regClobbered (RealReg rr) = isFastTrue (freeReg rr)
2034 regClobbered _ = False
2035
2036 -- getOperand: the operand is not required to remain valid across the
2037 -- computation of an arbitrary expression.
2038 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2039 #if x86_64_TARGET_ARCH
2040 getOperand (CmmLit lit)
2041   | isSuitableFloatingPointLit lit = do
2042     lbl <- getNewLabelNat
2043     let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
2044                                            CmmStaticLit lit])
2045     return (OpAddr (ripRel (ImmCLbl lbl)), code)
2046 #endif
2047 getOperand (CmmLit lit)
2048   | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) = do
2049     return (OpImm (litToImm lit), nilOL)
2050 getOperand (CmmLoad mem pk)
2051   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2052     Amode src mem_code <- getAmode mem
2053     return (OpAddr src, mem_code)
2054 getOperand e = do
2055     (reg, code) <- getSomeReg e
2056     return (OpReg reg, code)
2057
2058 isOperand :: CmmExpr -> Bool
2059 isOperand (CmmLoad _ _) = True
2060 isOperand (CmmLit lit)  = not (is64BitLit lit)
2061                           || isSuitableFloatingPointLit lit
2062 isOperand _             = False
2063
2064 -- if we want a floating-point literal as an operand, we can
2065 -- use it directly from memory.  However, if the literal is
2066 -- zero, we're better off generating it into a register using
2067 -- xor.
2068 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
2069 isSuitableFloatingPointLit _ = False
2070
2071 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
2072 getRegOrMem (CmmLoad mem pk)
2073   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2074     Amode src mem_code <- getAmode mem
2075     return (OpAddr src, mem_code)
2076 getRegOrMem e = do
2077     (reg, code) <- getNonClobberedReg e
2078     return (OpReg reg, code)
2079
2080 #if x86_64_TARGET_ARCH
2081 is64BitLit (CmmInt i I64) = is64BitInteger i
2082    -- assume that labels are in the range 0-2^31-1: this assumes the
2083    -- small memory model (see gcc docs, -mcmodel=small).
2084 #endif
2085 is64BitLit x = False
2086 #endif
2087
2088 is64BitInteger :: Integer -> Bool
2089 is64BitInteger i = i64 > 0x7fffffff || i64 < -0x80000000
2090   where i64 = fromIntegral i :: Int64
2091   -- a CmmInt is intended to be truncated to the appropriate
2092   -- number of bits, so here we truncate it to Int64.  This is
2093   -- important because e.g. -1 as a CmmInt might be either
2094   -- -1 or 18446744073709551615.
2095
2096 -- -----------------------------------------------------------------------------
2097 --  The 'CondCode' type:  Condition codes passed up the tree.
2098
2099 data CondCode = CondCode Bool Cond InstrBlock
2100
2101 -- Set up a condition code for a conditional branch.
2102
2103 getCondCode :: CmmExpr -> NatM CondCode
2104
2105 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2106
2107 #if alpha_TARGET_ARCH
2108 getCondCode = panic "MachCode.getCondCode: not on Alphas"
2109 #endif /* alpha_TARGET_ARCH */
2110
2111 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2112
2113 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
2114 -- yes, they really do seem to want exactly the same!
2115
2116 getCondCode (CmmMachOp mop [x, y])
2117   =
2118     case mop of
2119       MO_Eq F32 -> condFltCode EQQ x y
2120       MO_Ne F32 -> condFltCode NE  x y
2121
2122       MO_S_Gt F32 -> condFltCode GTT x y
2123       MO_S_Ge F32 -> condFltCode GE  x y
2124       MO_S_Lt F32 -> condFltCode LTT x y
2125       MO_S_Le F32 -> condFltCode LE  x y
2126
2127       MO_Eq F64 -> condFltCode EQQ x y
2128       MO_Ne F64 -> condFltCode NE  x y
2129
2130       MO_S_Gt F64 -> condFltCode GTT x y
2131       MO_S_Ge F64 -> condFltCode GE  x y
2132       MO_S_Lt F64 -> condFltCode LTT x y
2133       MO_S_Le F64 -> condFltCode LE  x y
2134
2135       MO_Eq rep -> condIntCode EQQ  x y
2136       MO_Ne rep -> condIntCode NE   x y
2137
2138       MO_S_Gt rep -> condIntCode GTT  x y
2139       MO_S_Ge rep -> condIntCode GE   x y
2140       MO_S_Lt rep -> condIntCode LTT  x y
2141       MO_S_Le rep -> condIntCode LE   x y
2142
2143       MO_U_Gt rep -> condIntCode GU   x y
2144       MO_U_Ge rep -> condIntCode GEU  x y
2145       MO_U_Lt rep -> condIntCode LU   x y
2146       MO_U_Le rep -> condIntCode LEU  x y
2147
2148       other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y]))
2149
2150 getCondCode other =  pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
2151
2152 #elif powerpc_TARGET_ARCH
2153
2154 -- almost the same as everywhere else - but we need to
2155 -- extend small integers to 32 bit first
2156
2157 getCondCode (CmmMachOp mop [x, y])
2158   = case mop of
2159       MO_Eq F32 -> condFltCode EQQ x y
2160       MO_Ne F32 -> condFltCode NE  x y
2161
2162       MO_S_Gt F32 -> condFltCode GTT x y
2163       MO_S_Ge F32 -> condFltCode GE  x y
2164       MO_S_Lt F32 -> condFltCode LTT x y
2165       MO_S_Le F32 -> condFltCode LE  x y
2166
2167       MO_Eq F64 -> condFltCode EQQ x y
2168       MO_Ne F64 -> condFltCode NE  x y
2169
2170       MO_S_Gt F64 -> condFltCode GTT x y
2171       MO_S_Ge F64 -> condFltCode GE  x y
2172       MO_S_Lt F64 -> condFltCode LTT x y
2173       MO_S_Le F64 -> condFltCode LE  x y
2174
2175       MO_Eq rep -> condIntCode EQQ  (extendUExpr rep x) (extendUExpr rep y)
2176       MO_Ne rep -> condIntCode NE   (extendUExpr rep x) (extendUExpr rep y)
2177
2178       MO_S_Gt rep -> condIntCode GTT  (extendSExpr rep x) (extendSExpr rep y)
2179       MO_S_Ge rep -> condIntCode GE   (extendSExpr rep x) (extendSExpr rep y)
2180       MO_S_Lt rep -> condIntCode LTT  (extendSExpr rep x) (extendSExpr rep y)
2181       MO_S_Le rep -> condIntCode LE   (extendSExpr rep x) (extendSExpr rep y)
2182
2183       MO_U_Gt rep -> condIntCode GU   (extendUExpr rep x) (extendUExpr rep y)
2184       MO_U_Ge rep -> condIntCode GEU  (extendUExpr rep x) (extendUExpr rep y)
2185       MO_U_Lt rep -> condIntCode LU   (extendUExpr rep x) (extendUExpr rep y)
2186       MO_U_Le rep -> condIntCode LEU  (extendUExpr rep x) (extendUExpr rep y)
2187
2188       other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
2189
2190 getCondCode other =  panic "getCondCode(2)(powerpc)"
2191
2192
2193 #endif
2194
2195
2196 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
2197 -- passed back up the tree.
2198
2199 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
2200
2201 #if alpha_TARGET_ARCH
2202 condIntCode = panic "MachCode.condIntCode: not on Alphas"
2203 condFltCode = panic "MachCode.condFltCode: not on Alphas"
2204 #endif /* alpha_TARGET_ARCH */
2205
2206 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2207 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2208
2209 -- memory vs immediate
2210 condIntCode cond (CmmLoad x pk) (CmmLit lit) | not (is64BitLit lit) = do
2211     Amode x_addr x_code <- getAmode x
2212     let
2213         imm  = litToImm lit
2214         code = x_code `snocOL`
2215                   CMP pk (OpImm imm) (OpAddr x_addr)
2216     --
2217     return (CondCode False cond code)
2218
2219 -- anything vs zero, using a mask
2220 -- TODO: Add some sanity checking!!!!
2221 condIntCode cond (CmmMachOp (MO_And rep) [x,o2]) (CmmLit (CmmInt 0 pk))
2222     | (CmmLit (CmmInt mask pk2)) <- o2
2223     = do
2224       (x_reg, x_code) <- getSomeReg x
2225       let
2226          code = x_code `snocOL`
2227                 TEST pk (OpImm (ImmInteger mask)) (OpReg x_reg)
2228       --
2229       return (CondCode False cond code)
2230
2231 -- anything vs zero
2232 condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
2233     (x_reg, x_code) <- getSomeReg x
2234     let
2235         code = x_code `snocOL`
2236                   TEST pk (OpReg x_reg) (OpReg x_reg)
2237     --
2238     return (CondCode False cond code)
2239
2240 -- anything vs operand
2241 condIntCode cond x y | isOperand y = do
2242     (x_reg, x_code) <- getNonClobberedReg x
2243     (y_op,  y_code) <- getOperand y
2244     let
2245         code = x_code `appOL` y_code `snocOL`
2246                   CMP (cmmExprRep x) y_op (OpReg x_reg)
2247     -- in
2248     return (CondCode False cond code)
2249
2250 -- anything vs anything
2251 condIntCode cond x y = do
2252   (y_reg, y_code) <- getNonClobberedReg y
2253   (x_op, x_code) <- getRegOrMem x
2254   let
2255         code = y_code `appOL`
2256                x_code `snocOL`
2257                   CMP (cmmExprRep x) (OpReg y_reg) x_op
2258   -- in
2259   return (CondCode False cond code)
2260 #endif
2261
2262 #if i386_TARGET_ARCH
2263 condFltCode cond x y
2264   = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
2265   (x_reg, x_code) <- getNonClobberedReg x
2266   (y_reg, y_code) <- getSomeReg y
2267   let
2268         code = x_code `appOL` y_code `snocOL`
2269                 GCMP cond x_reg y_reg
2270   -- The GCMP insn does the test and sets the zero flag if comparable
2271   -- and true.  Hence we always supply EQQ as the condition to test.
2272   return (CondCode True EQQ code)
2273 #endif /* i386_TARGET_ARCH */
2274
2275 #if x86_64_TARGET_ARCH
2276 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
2277 -- an operand, but the right must be a reg.  We can probably do better
2278 -- than this general case...
2279 condFltCode cond x y = do
2280   (x_reg, x_code) <- getNonClobberedReg x
2281   (y_op, y_code) <- getOperand y
2282   let
2283         code = x_code `appOL`
2284                y_code `snocOL`
2285                   CMP (cmmExprRep x) y_op (OpReg x_reg)
2286         -- NB(1): we need to use the unsigned comparison operators on the
2287         -- result of this comparison.
2288   -- in
2289   return (CondCode True (condToUnsigned cond) code)
2290 #endif
2291
2292 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2293
2294 #if sparc_TARGET_ARCH
2295
2296 condIntCode cond x (CmmLit (CmmInt y rep))
2297   | fits13Bits y
2298   = do
2299        (src1, code) <- getSomeReg x
2300        let
2301            src2 = ImmInt (fromInteger y)
2302            code' = code `snocOL` SUB False True src1 (RIImm src2) g0
2303        return (CondCode False cond code')
2304
2305 condIntCode cond x y = do
2306     (src1, code1) <- getSomeReg x
2307     (src2, code2) <- getSomeReg y
2308     let
2309         code__2 = code1 `appOL` code2 `snocOL`
2310                   SUB False True src1 (RIReg src2) g0
2311     return (CondCode False cond code__2)
2312
2313 -----------
2314 condFltCode cond x y = do
2315     (src1, code1) <- getSomeReg x
2316     (src2, code2) <- getSomeReg y
2317     tmp <- getNewRegNat F64
2318     let
2319         promote x = FxTOy F32 F64 x tmp
2320
2321         pk1   = cmmExprRep x
2322         pk2   = cmmExprRep y
2323
2324         code__2 =
2325                 if pk1 == pk2 then
2326                     code1 `appOL` code2 `snocOL`
2327                     FCMP True pk1 src1 src2
2328                 else if pk1 == F32 then
2329                     code1 `snocOL` promote src1 `appOL` code2 `snocOL`
2330                     FCMP True F64 tmp src2
2331                 else
2332                     code1 `appOL` code2 `snocOL` promote src2 `snocOL`
2333                     FCMP True F64 src1 tmp
2334     return (CondCode True cond code__2)
2335
2336 #endif /* sparc_TARGET_ARCH */
2337
2338 #if powerpc_TARGET_ARCH
2339 --  ###FIXME: I16 and I8!
2340 condIntCode cond x (CmmLit (CmmInt y rep))
2341   | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
2342   = do
2343         (src1, code) <- getSomeReg x
2344         let
2345             code' = code `snocOL`
2346                 (if condUnsigned cond then CMPL else CMP) I32 src1 (RIImm src2)
2347         return (CondCode False cond code')
2348
2349 condIntCode cond x y = do
2350     (src1, code1) <- getSomeReg x
2351     (src2, code2) <- getSomeReg y
2352     let
2353         code' = code1 `appOL` code2 `snocOL`
2354                   (if condUnsigned cond then CMPL else CMP) I32 src1 (RIReg src2)
2355     return (CondCode False cond code')
2356
2357 condFltCode cond x y = do
2358     (src1, code1) <- getSomeReg x
2359     (src2, code2) <- getSomeReg y
2360     let
2361         code'  = code1 `appOL` code2 `snocOL` FCMP src1 src2
2362         code'' = case cond of -- twiddle CR to handle unordered case
2363                     GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
2364                     LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
2365                     _ -> code'
2366                  where
2367                     ltbit = 0 ; eqbit = 2 ; gtbit = 1
2368     return (CondCode True cond code'')
2369
2370 #endif /* powerpc_TARGET_ARCH */
2371
2372 -- -----------------------------------------------------------------------------
2373 -- Generating assignments
2374
2375 -- Assignments are really at the heart of the whole code generation
2376 -- business.  Almost all top-level nodes of any real importance are
2377 -- assignments, which correspond to loads, stores, or register
2378 -- transfers.  If we're really lucky, some of the register transfers
2379 -- will go away, because we can use the destination register to
2380 -- complete the code generation for the right hand side.  This only
2381 -- fails when the right hand side is forced into a fixed register
2382 -- (e.g. the result of a call).
2383
2384 assignMem_IntCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2385 assignReg_IntCode :: MachRep -> CmmReg  -> CmmExpr -> NatM InstrBlock
2386
2387 assignMem_FltCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2388 assignReg_FltCode :: MachRep -> CmmReg  -> CmmExpr -> NatM InstrBlock
2389
2390 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2391
2392 #if alpha_TARGET_ARCH
2393
2394 assignIntCode pk (CmmLoad dst _) src
2395   = getNewRegNat IntRep             `thenNat` \ tmp ->
2396     getAmode dst                    `thenNat` \ amode ->
2397     getRegister src                 `thenNat` \ register ->
2398     let
2399         code1   = amodeCode amode []
2400         dst__2  = amodeAddr amode
2401         code2   = registerCode register tmp []
2402         src__2  = registerName register tmp
2403         sz      = primRepToSize pk
2404         code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2405     in
2406     return code__2
2407
2408 assignIntCode pk dst src
2409   = getRegister dst                         `thenNat` \ register1 ->
2410     getRegister src                         `thenNat` \ register2 ->
2411     let
2412         dst__2  = registerName register1 zeroh
2413         code    = registerCode register2 dst__2
2414         src__2  = registerName register2 dst__2
2415         code__2 = if isFixed register2
2416                   then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2)
2417                   else code
2418     in
2419     return code__2
2420
2421 #endif /* alpha_TARGET_ARCH */
2422
2423 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2424
2425 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2426
2427 -- integer assignment to memory
2428
2429 -- specific case of adding/subtracting an integer to a particular address.
2430 -- ToDo: catch other cases where we can use an operation directly on a memory
2431 -- address.
2432 assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _,
2433                                                  CmmLit (CmmInt i _)])
2434    | addr == addr2, pk /= I64 || not (is64BitInteger i),
2435      Just instr <- check op
2436    = do Amode amode code_addr <- getAmode addr
2437         let code = code_addr `snocOL`
2438                    instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode)
2439         return code
2440    where
2441         check (MO_Add _) = Just ADD
2442         check (MO_Sub _) = Just SUB
2443         check _ = Nothing
2444         -- ToDo: more?
2445
2446 -- general case
2447 assignMem_IntCode pk addr src = do
2448     Amode addr code_addr <- getAmode addr
2449     (code_src, op_src)   <- get_op_RI src
2450     let
2451         code = code_src `appOL`
2452                code_addr `snocOL`
2453                   MOV pk op_src (OpAddr addr)
2454         -- NOTE: op_src is stable, so it will still be valid
2455         -- after code_addr.  This may involve the introduction
2456         -- of an extra MOV to a temporary register, but we hope
2457         -- the register allocator will get rid of it.
2458     --
2459     return code
2460   where
2461     get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand)   -- code, operator
2462     get_op_RI (CmmLit lit) | not (is64BitLit lit)
2463       = return (nilOL, OpImm (litToImm lit))
2464     get_op_RI op
2465       = do (reg,code) <- getNonClobberedReg op
2466            return (code, OpReg reg)
2467
2468
2469 -- Assign; dst is a reg, rhs is mem
2470 assignReg_IntCode pk reg (CmmLoad src _) = do
2471   load_code <- intLoadCode (MOV pk) src
2472   return (load_code (getRegisterReg reg))
2473
2474 -- dst is a reg, but src could be anything
2475 assignReg_IntCode pk reg src = do
2476   code <- getAnyReg src
2477   return (code (getRegisterReg reg))
2478
2479 #endif /* i386_TARGET_ARCH */
2480
2481 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2482
2483 #if sparc_TARGET_ARCH
2484
2485 assignMem_IntCode pk addr src = do
2486     (srcReg, code) <- getSomeReg src
2487     Amode dstAddr addr_code <- getAmode addr
2488     return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2489
2490 assignReg_IntCode pk reg src = do
2491     r <- getRegister src
2492     return $ case r of
2493         Any _ code         -> code dst
2494         Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg dst) freg
2495     where
2496       dst = getRegisterReg reg
2497
2498
2499 #endif /* sparc_TARGET_ARCH */
2500
2501 #if powerpc_TARGET_ARCH
2502
2503 assignMem_IntCode pk addr src = do
2504     (srcReg, code) <- getSomeReg src
2505     Amode dstAddr addr_code <- getAmode addr
2506     return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2507
2508 -- dst is a reg, but src could be anything
2509 assignReg_IntCode pk reg src
2510     = do
2511         r <- getRegister src
2512         return $ case r of
2513             Any _ code         -> code dst
2514             Fixed _ freg fcode -> fcode `snocOL` MR dst freg
2515     where
2516         dst = getRegisterReg reg
2517
2518 #endif /* powerpc_TARGET_ARCH */
2519
2520
2521 -- -----------------------------------------------------------------------------
2522 -- Floating-point assignments
2523
2524 #if alpha_TARGET_ARCH
2525
2526 assignFltCode pk (CmmLoad dst _) src
2527   = getNewRegNat pk                 `thenNat` \ tmp ->
2528     getAmode dst                    `thenNat` \ amode ->
2529     getRegister src                         `thenNat` \ register ->
2530     let
2531         code1   = amodeCode amode []
2532         dst__2  = amodeAddr amode
2533         code2   = registerCode register tmp []
2534         src__2  = registerName register tmp
2535         sz      = primRepToSize pk
2536         code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2537     in
2538     return code__2
2539
2540 assignFltCode pk dst src
2541   = getRegister dst                         `thenNat` \ register1 ->
2542     getRegister src                         `thenNat` \ register2 ->
2543     let
2544         dst__2  = registerName register1 zeroh
2545         code    = registerCode register2 dst__2
2546         src__2  = registerName register2 dst__2
2547         code__2 = if isFixed register2
2548                   then code . mkSeqInstr (FMOV src__2 dst__2)
2549                   else code
2550     in
2551     return code__2
2552
2553 #endif /* alpha_TARGET_ARCH */
2554
2555 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2556
2557 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2558
2559 -- Floating point assignment to memory
2560 assignMem_FltCode pk addr src = do
2561   (src_reg, src_code) <- getNonClobberedReg src
2562   Amode addr addr_code <- getAmode addr
2563   let
2564         code = src_code `appOL`
2565                addr_code `snocOL`
2566                 IF_ARCH_i386(GST pk src_reg addr,
2567                              MOV pk (OpReg src_reg) (OpAddr addr))
2568   return code
2569
2570 -- Floating point assignment to a register/temporary
2571 assignReg_FltCode pk reg src = do
2572   src_code <- getAnyReg src
2573   return (src_code (getRegisterReg reg))
2574
2575 #endif /* i386_TARGET_ARCH */
2576
2577 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2578
2579 #if sparc_TARGET_ARCH
2580
2581 -- Floating point assignment to memory
2582 assignMem_FltCode pk addr src = do
2583     Amode dst__2 code1 <- getAmode addr
2584     (src__2, code2) <- getSomeReg src
2585     tmp1 <- getNewRegNat pk
2586     let
2587         pk__2   = cmmExprRep src
2588         code__2 = code1 `appOL` code2 `appOL`
2589             if   pk == pk__2
2590             then unitOL (ST pk src__2 dst__2)
2591             else toOL [FxTOy pk__2 pk src__2 tmp1, ST pk tmp1 dst__2]
2592     return code__2
2593
2594 -- Floating point assignment to a register/temporary
2595 -- ToDo: Verify correctness
2596 assignReg_FltCode pk reg src = do
2597     r <- getRegister src
2598     v1 <- getNewRegNat pk
2599     return $ case r of
2600         Any _ code         -> code dst
2601         Fixed _ freg fcode -> fcode `snocOL` FMOV pk freg v1
2602     where
2603       dst = getRegisterReg reg
2604
2605 #endif /* sparc_TARGET_ARCH */
2606
2607 #if powerpc_TARGET_ARCH
2608
2609 -- Easy, isn't it?
2610 assignMem_FltCode = assignMem_IntCode
2611 assignReg_FltCode = assignReg_IntCode
2612
2613 #endif /* powerpc_TARGET_ARCH */
2614
2615
2616 -- -----------------------------------------------------------------------------
2617 -- Generating an non-local jump
2618
2619 -- (If applicable) Do not fill the delay slots here; you will confuse the
2620 -- register allocator.
2621
2622 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
2623
2624 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2625
2626 #if alpha_TARGET_ARCH
2627
2628 genJump (CmmLabel lbl)
2629   | isAsmTemp lbl = returnInstr (BR target)
2630   | otherwise     = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0]
2631   where
2632     target = ImmCLbl lbl
2633
2634 genJump tree
2635   = getRegister tree                `thenNat` \ register ->
2636     getNewRegNat PtrRep             `thenNat` \ tmp ->
2637     let
2638         dst    = registerName register pv
2639         code   = registerCode register pv
2640         target = registerName register pv
2641     in
2642     if isFixed register then
2643         returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0]
2644     else
2645     return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0))
2646
2647 #endif /* alpha_TARGET_ARCH */
2648
2649 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2650
2651 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2652
2653 genJump (CmmLoad mem pk) = do
2654   Amode target code <- getAmode mem
2655   return (code `snocOL` JMP (OpAddr target))
2656
2657 genJump (CmmLit lit) = do
2658   return (unitOL (JMP (OpImm (litToImm lit))))
2659
2660 genJump expr = do
2661   (reg,code) <- getSomeReg expr
2662   return (code `snocOL` JMP (OpReg reg))
2663
2664 #endif /* i386_TARGET_ARCH */
2665
2666 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2667
2668 #if sparc_TARGET_ARCH
2669
2670 genJump (CmmLit (CmmLabel lbl))
2671   = return (toOL [CALL (Left target) 0 True, NOP])
2672   where
2673     target = ImmCLbl lbl
2674
2675 genJump tree
2676   = do
2677         (target, code) <- getSomeReg tree
2678         return (code `snocOL` JMP (AddrRegReg target g0)  `snocOL` NOP)
2679
2680 #endif /* sparc_TARGET_ARCH */
2681
2682 #if powerpc_TARGET_ARCH
2683 genJump (CmmLit (CmmLabel lbl))
2684   = return (unitOL $ JMP lbl)
2685
2686 genJump tree
2687   = do
2688         (target,code) <- getSomeReg tree
2689         return (code `snocOL` MTCTR target `snocOL` BCTR [])
2690 #endif /* powerpc_TARGET_ARCH */
2691
2692
2693 -- -----------------------------------------------------------------------------
2694 --  Unconditional branches
2695
2696 genBranch :: BlockId -> NatM InstrBlock
2697
2698 genBranch = return . toOL . mkBranchInstr
2699
2700 -- -----------------------------------------------------------------------------
2701 --  Conditional jumps
2702
2703 {-
2704 Conditional jumps are always to local labels, so we can use branch
2705 instructions.  We peek at the arguments to decide what kind of
2706 comparison to do.
2707
2708 ALPHA: For comparisons with 0, we're laughing, because we can just do
2709 the desired conditional branch.
2710
2711 I386: First, we have to ensure that the condition
2712 codes are set according to the supplied comparison operation.
2713
2714 SPARC: First, we have to ensure that the condition codes are set
2715 according to the supplied comparison operation.  We generate slightly
2716 different code for floating point comparisons, because a floating
2717 point operation cannot directly precede a @BF@.  We assume the worst
2718 and fill that slot with a @NOP@.
2719
2720 SPARC: Do not fill the delay slots here; you will confuse the register
2721 allocator.
2722 -}
2723
2724
2725 genCondJump
2726     :: BlockId      -- the branch target
2727     -> CmmExpr      -- the condition on which to branch
2728     -> NatM InstrBlock
2729
2730 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2731
2732 #if alpha_TARGET_ARCH
2733
2734 genCondJump id (StPrim op [x, StInt 0])
2735   = getRegister x                           `thenNat` \ register ->
2736     getNewRegNat (registerRep register)
2737                                     `thenNat` \ tmp ->
2738     let
2739         code   = registerCode register tmp
2740         value  = registerName register tmp
2741         pk     = registerRep register
2742         target = ImmCLbl lbl
2743     in
2744     returnSeq code [BI (cmpOp op) value target]
2745   where
2746     cmpOp CharGtOp = GTT
2747     cmpOp CharGeOp = GE
2748     cmpOp CharEqOp = EQQ
2749     cmpOp CharNeOp = NE
2750     cmpOp CharLtOp = LTT
2751     cmpOp CharLeOp = LE
2752     cmpOp IntGtOp = GTT
2753     cmpOp IntGeOp = GE
2754     cmpOp IntEqOp = EQQ
2755     cmpOp IntNeOp = NE
2756     cmpOp IntLtOp = LTT
2757     cmpOp IntLeOp = LE
2758     cmpOp WordGtOp = NE
2759     cmpOp WordGeOp = ALWAYS
2760     cmpOp WordEqOp = EQQ
2761     cmpOp WordNeOp = NE
2762     cmpOp WordLtOp = NEVER
2763     cmpOp WordLeOp = EQQ
2764     cmpOp AddrGtOp = NE
2765     cmpOp AddrGeOp = ALWAYS
2766     cmpOp AddrEqOp = EQQ
2767     cmpOp AddrNeOp = NE
2768     cmpOp AddrLtOp = NEVER
2769     cmpOp AddrLeOp = EQQ
2770
2771 genCondJump lbl (StPrim op [x, StDouble 0.0])
2772   = getRegister x                           `thenNat` \ register ->
2773     getNewRegNat (registerRep register)
2774                                     `thenNat` \ tmp ->
2775     let
2776         code   = registerCode register tmp
2777         value  = registerName register tmp
2778         pk     = registerRep register
2779         target = ImmCLbl lbl
2780     in
2781     return (code . mkSeqInstr (BF (cmpOp op) value target))
2782   where
2783     cmpOp FloatGtOp = GTT
2784     cmpOp FloatGeOp = GE
2785     cmpOp FloatEqOp = EQQ
2786     cmpOp FloatNeOp = NE
2787     cmpOp FloatLtOp = LTT
2788     cmpOp FloatLeOp = LE
2789     cmpOp DoubleGtOp = GTT
2790     cmpOp DoubleGeOp = GE
2791     cmpOp DoubleEqOp = EQQ
2792     cmpOp DoubleNeOp = NE
2793     cmpOp DoubleLtOp = LTT
2794     cmpOp DoubleLeOp = LE
2795
2796 genCondJump lbl (StPrim op [x, y])
2797   | fltCmpOp op
2798   = trivialFCode pr instr x y       `thenNat` \ register ->
2799     getNewRegNat F64                `thenNat` \ tmp ->
2800     let
2801         code   = registerCode register tmp
2802         result = registerName register tmp
2803         target = ImmCLbl lbl
2804     in
2805     return (code . mkSeqInstr (BF cond result target))
2806   where
2807     pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
2808
2809     fltCmpOp op = case op of
2810         FloatGtOp -> True
2811         FloatGeOp -> True
2812         FloatEqOp -> True
2813         FloatNeOp -> True
2814         FloatLtOp -> True
2815         FloatLeOp -> True
2816         DoubleGtOp -> True
2817         DoubleGeOp -> True
2818         DoubleEqOp -> True
2819         DoubleNeOp -> True
2820         DoubleLtOp -> True
2821         DoubleLeOp -> True
2822         _ -> False
2823     (instr, cond) = case op of
2824         FloatGtOp -> (FCMP TF LE, EQQ)
2825         FloatGeOp -> (FCMP TF LTT, EQQ)
2826         FloatEqOp -> (FCMP TF EQQ, NE)
2827         FloatNeOp -> (FCMP TF EQQ, EQQ)
2828         FloatLtOp -> (FCMP TF LTT, NE)
2829         FloatLeOp -> (FCMP TF LE, NE)
2830         DoubleGtOp -> (FCMP TF LE, EQQ)
2831         DoubleGeOp -> (FCMP TF LTT, EQQ)
2832         DoubleEqOp -> (FCMP TF EQQ, NE)
2833         DoubleNeOp -> (FCMP TF EQQ, EQQ)
2834         DoubleLtOp -> (FCMP TF LTT, NE)
2835         DoubleLeOp -> (FCMP TF LE, NE)
2836
2837 genCondJump lbl (StPrim op [x, y])
2838   = trivialCode instr x y           `thenNat` \ register ->
2839     getNewRegNat IntRep             `thenNat` \ tmp ->
2840     let
2841         code   = registerCode register tmp
2842         result = registerName register tmp
2843         target = ImmCLbl lbl
2844     in
2845     return (code . mkSeqInstr (BI cond result target))
2846   where
2847     (instr, cond) = case op of
2848         CharGtOp -> (CMP LE, EQQ)
2849         CharGeOp -> (CMP LTT, EQQ)
2850         CharEqOp -> (CMP EQQ, NE)
2851         CharNeOp -> (CMP EQQ, EQQ)
2852         CharLtOp -> (CMP LTT, NE)
2853         CharLeOp -> (CMP LE, NE)
2854         IntGtOp -> (CMP LE, EQQ)
2855         IntGeOp -> (CMP LTT, EQQ)
2856         IntEqOp -> (CMP EQQ, NE)
2857         IntNeOp -> (CMP EQQ, EQQ)
2858         IntLtOp -> (CMP LTT, NE)
2859         IntLeOp -> (CMP LE, NE)
2860         WordGtOp -> (CMP ULE, EQQ)
2861         WordGeOp -> (CMP ULT, EQQ)
2862         WordEqOp -> (CMP EQQ, NE)
2863         WordNeOp -> (CMP EQQ, EQQ)
2864         WordLtOp -> (CMP ULT, NE)
2865         WordLeOp -> (CMP ULE, NE)
2866         AddrGtOp -> (CMP ULE, EQQ)
2867         AddrGeOp -> (CMP ULT, EQQ)
2868         AddrEqOp -> (CMP EQQ, NE)
2869         AddrNeOp -> (CMP EQQ, EQQ)
2870         AddrLtOp -> (CMP ULT, NE)
2871         AddrLeOp -> (CMP ULE, NE)
2872
2873 #endif /* alpha_TARGET_ARCH */
2874
2875 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2876
2877 #if i386_TARGET_ARCH
2878
2879 genCondJump id bool = do
2880   CondCode _ cond code <- getCondCode bool
2881   return (code `snocOL` JXX cond id)
2882
2883 #endif
2884
2885 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2886
2887 #if x86_64_TARGET_ARCH
2888
2889 genCondJump id bool = do
2890   CondCode is_float cond cond_code <- getCondCode bool
2891   if not is_float
2892     then
2893         return (cond_code `snocOL` JXX cond id)
2894     else do
2895         lbl <- getBlockIdNat
2896
2897         -- see comment with condFltReg
2898         let code = case cond of
2899                         NE  -> or_unordered
2900                         GU  -> plain_test
2901                         GEU -> plain_test
2902                         _   -> and_ordered
2903
2904             plain_test = unitOL (
2905                   JXX cond id
2906                 )
2907             or_unordered = toOL [
2908                   JXX cond id,
2909                   JXX PARITY id
2910                 ]
2911             and_ordered = toOL [
2912                   JXX PARITY lbl,
2913                   JXX cond id,
2914                   JXX ALWAYS lbl,
2915                   NEWBLOCK lbl
2916                 ]
2917         return (cond_code `appOL` code)
2918
2919 #endif
2920
2921 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2922
2923 #if sparc_TARGET_ARCH
2924
2925 genCondJump (BlockId id) bool = do
2926   CondCode is_float cond code <- getCondCode bool
2927   return (
2928        code `appOL`
2929        toOL (
2930          if   is_float
2931          then [NOP, BF cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2932          else [BI cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2933        )
2934     )
2935
2936 #endif /* sparc_TARGET_ARCH */
2937
2938
2939 #if powerpc_TARGET_ARCH
2940
2941 genCondJump id bool = do
2942   CondCode is_float cond code <- getCondCode bool
2943   return (code `snocOL` BCC cond id)
2944
2945 #endif /* powerpc_TARGET_ARCH */
2946
2947
2948 -- -----------------------------------------------------------------------------
2949 --  Generating C calls
2950
2951 -- Now the biggest nightmare---calls.  Most of the nastiness is buried in
2952 -- @get_arg@, which moves the arguments to the correct registers/stack
2953 -- locations.  Apart from that, the code is easy.
2954 --
2955 -- (If applicable) Do not fill the delay slots here; you will confuse the
2956 -- register allocator.
2957
2958 genCCall
2959     :: CmmCallTarget            -- function to call
2960     -> CmmHintFormals           -- where to put the result
2961     -> CmmActuals               -- arguments (of mixed type)
2962     -> NatM InstrBlock
2963
2964 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2965
2966 #if alpha_TARGET_ARCH
2967
2968 ccallResultRegs =
2969
2970 genCCall fn cconv result_regs args
2971   = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args
2972                           `thenNat` \ ((unused,_), argCode) ->
2973     let
2974         nRegs = length allArgRegs - length unused
2975         code = asmSeqThen (map ($ []) argCode)
2976     in
2977         returnSeq code [
2978             LDA pv (AddrImm (ImmLab (ptext fn))),
2979             JSR ra (AddrReg pv) nRegs,
2980             LDGP gp (AddrReg ra)]
2981   where
2982     ------------------------
2983     {-  Try to get a value into a specific register (or registers) for
2984         a call.  The first 6 arguments go into the appropriate
2985         argument register (separate registers for integer and floating
2986         point arguments, but used in lock-step), and the remaining
2987         arguments are dumped to the stack, beginning at 0(sp).  Our
2988         first argument is a pair of the list of remaining argument
2989         registers to be assigned for this call and the next stack
2990         offset to use for overflowing arguments.  This way,
2991         @get_Arg@ can be applied to all of a call's arguments using
2992         @mapAccumLNat@.
2993     -}
2994     get_arg
2995         :: ([(Reg,Reg)], Int)   -- Argument registers and stack offset (accumulator)
2996         -> StixTree             -- Current argument
2997         -> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code
2998
2999     -- We have to use up all of our argument registers first...
3000
3001     get_arg ((iDst,fDst):dsts, offset) arg
3002       = getRegister arg                     `thenNat` \ register ->
3003         let
3004             reg  = if isFloatingRep pk then fDst else iDst
3005             code = registerCode register reg
3006             src  = registerName register reg
3007             pk   = registerRep register
3008         in
3009         return (
3010             if isFloatingRep pk then
3011                 ((dsts, offset), if isFixed register then
3012                     code . mkSeqInstr (FMOV src fDst)
3013                     else code)
3014             else
3015                 ((dsts, offset), if isFixed register then
3016                     code . mkSeqInstr (OR src (RIReg src) iDst)
3017                     else code))
3018
3019     -- Once we have run out of argument registers, we move to the
3020     -- stack...
3021
3022     get_arg ([], offset) arg
3023       = getRegister arg                 `thenNat` \ register ->
3024         getNewRegNat (registerRep register)
3025                                         `thenNat` \ tmp ->
3026         let
3027             code = registerCode register tmp
3028             src  = registerName register tmp
3029             pk   = registerRep register
3030             sz   = primRepToSize pk
3031         in
3032         return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset)))
3033
3034 #endif /* alpha_TARGET_ARCH */
3035
3036 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3037
3038 #if i386_TARGET_ARCH
3039
3040 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
3041         -- write barrier compiles to no code on x86/x86-64;
3042         -- we keep it this long in order to prevent earlier optimisations.
3043
3044 -- we only cope with a single result for foreign calls
3045 genCCall (CmmPrim op) [(r,_)] args = do
3046   case op of
3047         MO_F32_Sqrt -> actuallyInlineFloatOp F32  (GSQRT F32) args
3048         MO_F64_Sqrt -> actuallyInlineFloatOp F64 (GSQRT F64) args
3049
3050         MO_F32_Sin  -> actuallyInlineFloatOp F32  (GSIN F32) args
3051         MO_F64_Sin  -> actuallyInlineFloatOp F64 (GSIN F64) args
3052
3053         MO_F32_Cos  -> actuallyInlineFloatOp F32  (GCOS F32) args
3054         MO_F64_Cos  -> actuallyInlineFloatOp F64 (GCOS F64) args
3055
3056         MO_F32_Tan  -> actuallyInlineFloatOp F32  (GTAN F32) args
3057         MO_F64_Tan  -> actuallyInlineFloatOp F64 (GTAN F64) args
3058
3059         other_op    -> outOfLineFloatOp op r args
3060  where
3061   actuallyInlineFloatOp rep instr [(x,_)]
3062         = do res <- trivialUFCode rep instr x
3063              any <- anyReg res
3064              return (any (getRegisterReg (CmmLocal r)))
3065
3066 genCCall target dest_regs args = do
3067     let
3068         sizes               = map (arg_size . cmmExprRep . fst) (reverse args)
3069 #if !darwin_TARGET_OS
3070         tot_arg_size        = sum sizes
3071 #else
3072         raw_arg_size        = sum sizes
3073         tot_arg_size        = roundTo 16 raw_arg_size
3074         arg_pad_size        = tot_arg_size - raw_arg_size
3075     delta0 <- getDeltaNat
3076     setDeltaNat (delta0 - arg_pad_size)
3077 #endif
3078
3079     push_codes <- mapM push_arg (reverse args)
3080     delta <- getDeltaNat
3081
3082     -- in
3083     -- deal with static vs dynamic call targets
3084     (callinsns,cconv) <-
3085       case target of
3086         -- CmmPrim -> ...
3087         CmmForeignCall (CmmLit (CmmLabel lbl)) conv
3088            -> -- ToDo: stdcall arg sizes
3089               return (unitOL (CALL (Left fn_imm) []), conv)
3090            where fn_imm = ImmCLbl lbl
3091         CmmForeignCall expr conv
3092            -> do (dyn_c, dyn_r, dyn_rep) <- get_op expr
3093                  ASSERT(dyn_rep == I32)
3094                   return (dyn_c `snocOL` CALL (Right dyn_r) [], conv)
3095
3096     let push_code
3097 #if darwin_TARGET_OS
3098             | arg_pad_size /= 0
3099             = toOL [SUB I32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
3100                     DELTA (delta0 - arg_pad_size)]
3101               `appOL` concatOL push_codes
3102             | otherwise
3103 #endif
3104             = concatOL push_codes
3105         call = callinsns `appOL`
3106                toOL (
3107                         -- Deallocate parameters after call for ccall;
3108                         -- but not for stdcall (callee does it)
3109                   (if cconv == StdCallConv || tot_arg_size==0 then [] else
3110                    [ADD I32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
3111                   ++
3112                   [DELTA (delta + tot_arg_size)]
3113                )
3114     -- in
3115     setDeltaNat (delta + tot_arg_size)
3116
3117     let
3118         -- assign the results, if necessary
3119         assign_code []     = nilOL
3120         assign_code [(dest,_hint)] =
3121           case rep of
3122                 I64 -> toOL [MOV I32 (OpReg eax) (OpReg r_dest),
3123                              MOV I32 (OpReg edx) (OpReg r_dest_hi)]
3124                 F32 -> unitOL (GMOV fake0 r_dest)
3125                 F64 -> unitOL (GMOV fake0 r_dest)
3126                 rep -> unitOL (MOV rep (OpReg eax) (OpReg r_dest))
3127           where
3128                 r_dest_hi = getHiVRegFromLo r_dest
3129                 rep = localRegRep dest
3130                 r_dest = getRegisterReg (CmmLocal dest)
3131         assign_code many = panic "genCCall.assign_code many"
3132
3133     return (push_code `appOL`
3134             call `appOL`
3135             assign_code dest_regs)
3136
3137   where
3138     arg_size F64 = 8
3139     arg_size F32 = 4
3140     arg_size I64 = 8
3141     arg_size _   = 4
3142
3143     roundTo a x | x `mod` a == 0 = x
3144                 | otherwise = x + a - (x `mod` a)
3145
3146
3147     push_arg :: (CmmExpr,MachHint){-current argument-}
3148                     -> NatM InstrBlock  -- code
3149
3150     push_arg (arg,_hint) -- we don't need the hints on x86
3151       | arg_rep == I64 = do
3152         ChildCode64 code r_lo <- iselExpr64 arg
3153         delta <- getDeltaNat
3154         setDeltaNat (delta - 8)
3155         let
3156             r_hi = getHiVRegFromLo r_lo
3157         -- in
3158         return (       code `appOL`
3159                        toOL [PUSH I32 (OpReg r_hi), DELTA (delta - 4),
3160                              PUSH I32 (OpReg r_lo), DELTA (delta - 8),
3161                              DELTA (delta-8)]
3162             )
3163
3164       | otherwise = do
3165         (code, reg, sz) <- get_op arg
3166         delta <- getDeltaNat
3167         let size = arg_size sz
3168         setDeltaNat (delta-size)
3169         if (case sz of F64 -> True; F32 -> True; _ -> False)
3170            then return (code `appOL`
3171                         toOL [SUB I32 (OpImm (ImmInt size)) (OpReg esp),
3172                               DELTA (delta-size),
3173                               GST sz reg (AddrBaseIndex (EABaseReg esp)
3174                                                         EAIndexNone
3175                                                         (ImmInt 0))]
3176                        )
3177            else return (code `snocOL`
3178                         PUSH I32 (OpReg reg) `snocOL`
3179                         DELTA (delta-size)
3180                        )
3181       where
3182          arg_rep = cmmExprRep arg
3183
3184     ------------
3185     get_op :: CmmExpr -> NatM (InstrBlock, Reg, MachRep) -- code, reg, size
3186     get_op op = do
3187         (reg,code) <- getSomeReg op
3188         return (code, reg, cmmExprRep op)
3189
3190 #endif /* i386_TARGET_ARCH */
3191
3192 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3193
3194 outOfLineFloatOp :: CallishMachOp -> CmmFormal -> CmmActuals
3195   -> NatM InstrBlock
3196 outOfLineFloatOp mop res args
3197   = do
3198       targetExpr <- cmmMakeDynamicReference addImportNat CallReference lbl
3199       let target = CmmForeignCall targetExpr CCallConv
3200
3201       if localRegRep res == F64
3202         then
3203           stmtToInstrs (CmmCall target [(res,FloatHint)] args CmmUnsafe)
3204         else do
3205           uq <- getUniqueNat
3206           let
3207             tmp = LocalReg uq F64 KindNonPtr
3208           -- in
3209           code1 <- stmtToInstrs (CmmCall target [(tmp,FloatHint)] args CmmUnsafe)
3210           code2 <- stmtToInstrs (CmmAssign (CmmLocal res) (CmmReg (CmmLocal tmp)))
3211           return (code1 `appOL` code2)
3212   where
3213         lbl = mkForeignLabel fn Nothing False
3214
3215         fn = case mop of
3216               MO_F32_Sqrt  -> FSLIT("sqrtf")
3217               MO_F32_Sin   -> FSLIT("sinf")
3218               MO_F32_Cos   -> FSLIT("cosf")
3219               MO_F32_Tan   -> FSLIT("tanf")
3220               MO_F32_Exp   -> FSLIT("expf")
3221               MO_F32_Log   -> FSLIT("logf")
3222
3223               MO_F32_Asin  -> FSLIT("asinf")
3224               MO_F32_Acos  -> FSLIT("acosf")
3225               MO_F32_Atan  -> FSLIT("atanf")
3226
3227               MO_F32_Sinh  -> FSLIT("sinhf")
3228               MO_F32_Cosh  -> FSLIT("coshf")
3229               MO_F32_Tanh  -> FSLIT("tanhf")
3230               MO_F32_Pwr   -> FSLIT("powf")
3231
3232               MO_F64_Sqrt  -> FSLIT("sqrt")
3233               MO_F64_Sin   -> FSLIT("sin")
3234               MO_F64_Cos   -> FSLIT("cos")
3235               MO_F64_Tan   -> FSLIT("tan")
3236               MO_F64_Exp   -> FSLIT("exp")
3237               MO_F64_Log   -> FSLIT("log")
3238
3239               MO_F64_Asin  -> FSLIT("asin")
3240               MO_F64_Acos  -> FSLIT("acos")
3241               MO_F64_Atan  -> FSLIT("atan")
3242
3243               MO_F64_Sinh  -> FSLIT("sinh")
3244               MO_F64_Cosh  -> FSLIT("cosh")
3245               MO_F64_Tanh  -> FSLIT("tanh")
3246               MO_F64_Pwr   -> FSLIT("pow")
3247
3248 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
3249
3250 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3251
3252 #if x86_64_TARGET_ARCH
3253
3254 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
3255         -- write barrier compiles to no code on x86/x86-64;
3256         -- we keep it this long in order to prevent earlier optimisations.
3257
3258 genCCall (CmmPrim op) [(r,_)] args =
3259   outOfLineFloatOp op r args
3260
3261 genCCall target dest_regs args = do
3262
3263         -- load up the register arguments
3264     (stack_args, aregs, fregs, load_args_code)
3265          <- load_args args allArgRegs allFPArgRegs nilOL
3266
3267     let
3268         fp_regs_used  = reverse (drop (length fregs) (reverse allFPArgRegs))
3269         int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
3270         arg_regs = [eax] ++ int_regs_used ++ fp_regs_used
3271                 -- for annotating the call instruction with
3272
3273         sse_regs = length fp_regs_used
3274
3275         tot_arg_size = arg_size * length stack_args
3276
3277         -- On entry to the called function, %rsp should be aligned
3278         -- on a 16-byte boundary +8 (i.e. the first stack arg after
3279         -- the return address is 16-byte aligned).  In STG land
3280         -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
3281         -- need to make sure we push a multiple of 16-bytes of args,
3282         -- plus the return address, to get the correct alignment.
3283         -- Urg, this is hard.  We need to feed the delta back into
3284         -- the arg pushing code.
3285     (real_size, adjust_rsp) <-
3286         if tot_arg_size `rem` 16 == 0
3287             then return (tot_arg_size, nilOL)
3288             else do -- we need to adjust...
3289                 delta <- getDeltaNat
3290                 setDeltaNat (delta-8)
3291                 return (tot_arg_size+8, toOL [
3292                                 SUB I64 (OpImm (ImmInt 8)) (OpReg rsp),
3293                                 DELTA (delta-8)
3294                         ])
3295
3296         -- push the stack args, right to left
3297     push_code <- push_args (reverse stack_args) nilOL
3298     delta <- getDeltaNat
3299
3300     -- deal with static vs dynamic call targets
3301     (callinsns,cconv) <-
3302       case target of
3303         -- CmmPrim -> ...
3304         CmmForeignCall (CmmLit (CmmLabel lbl)) conv
3305            -> -- ToDo: stdcall arg sizes
3306               return (unitOL (CALL (Left fn_imm) arg_regs), conv)
3307            where fn_imm = ImmCLbl lbl
3308         CmmForeignCall expr conv
3309            -> do (dyn_r, dyn_c) <- getSomeReg expr
3310                  return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
3311
3312     let
3313         -- The x86_64 ABI requires us to set %al to the number of SSE
3314         -- registers that contain arguments, if the called routine
3315         -- is a varargs function.  We don't know whether it's a
3316         -- varargs function or not, so we have to assume it is.
3317         --
3318         -- It's not safe to omit this assignment, even if the number
3319         -- of SSE regs in use is zero.  If %al is larger than 8
3320         -- on entry to a varargs function, seg faults ensue.
3321         assign_eax n = unitOL (MOV I32 (OpImm (ImmInt n)) (OpReg eax))
3322
3323     let call = callinsns `appOL`
3324                toOL (
3325                         -- Deallocate parameters after call for ccall;
3326                         -- but not for stdcall (callee does it)
3327                   (if cconv == StdCallConv || real_size==0 then [] else
3328                    [ADD wordRep (OpImm (ImmInt real_size)) (OpReg esp)])
3329                   ++
3330                   [DELTA (delta + real_size)]
3331                )
3332     -- in
3333     setDeltaNat (delta + real_size)
3334
3335     let
3336         -- assign the results, if necessary
3337         assign_code []     = nilOL
3338         assign_code [(dest,_hint)] =
3339           case rep of
3340                 F32 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3341                 F64 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3342                 rep -> unitOL (MOV rep (OpReg rax) (OpReg r_dest))
3343           where
3344                 rep = localRegRep dest
3345                 r_dest = getRegisterReg (CmmLocal dest)
3346         assign_code many = panic "genCCall.assign_code many"
3347
3348     return (load_args_code      `appOL`
3349             adjust_rsp          `appOL`
3350             push_code           `appOL`
3351             assign_eax sse_regs `appOL`
3352             call                `appOL`
3353             assign_code dest_regs)
3354
3355   where
3356     arg_size = 8 -- always, at the mo
3357
3358     load_args :: [(CmmExpr,MachHint)]
3359               -> [Reg]                  -- int regs avail for args
3360               -> [Reg]                  -- FP regs avail for args
3361               -> InstrBlock
3362               -> NatM ([(CmmExpr,MachHint)],[Reg],[Reg],InstrBlock)
3363     load_args args [] [] code     =  return (args, [], [], code)
3364         -- no more regs to use
3365     load_args [] aregs fregs code =  return ([], aregs, fregs, code)
3366         -- no more args to push
3367     load_args ((arg,hint) : rest) aregs fregs code
3368         | isFloatingRep arg_rep =
3369         case fregs of
3370           [] -> push_this_arg
3371           (r:rs) -> do
3372              arg_code <- getAnyReg arg
3373              load_args rest aregs rs (code `appOL` arg_code r)
3374         | otherwise =
3375         case aregs of
3376           [] -> push_this_arg
3377           (r:rs) -> do
3378              arg_code <- getAnyReg arg
3379              load_args rest rs fregs (code `appOL` arg_code r)
3380         where
3381           arg_rep = cmmExprRep arg
3382
3383           push_this_arg = do
3384             (args',ars,frs,code') <- load_args rest aregs fregs code
3385             return ((arg,hint):args', ars, frs, code')
3386
3387     push_args [] code = return code
3388     push_args ((arg,hint):rest) code
3389        | isFloatingRep arg_rep = do
3390          (arg_reg, arg_code) <- getSomeReg arg
3391          delta <- getDeltaNat
3392          setDeltaNat (delta-arg_size)
3393          let code' = code `appOL` arg_code `appOL` toOL [
3394                         SUB wordRep (OpImm (ImmInt arg_size)) (OpReg rsp) ,
3395                         DELTA (delta-arg_size),
3396                         MOV arg_rep (OpReg arg_reg) (OpAddr  (spRel 0))]
3397          push_args rest code'
3398
3399        | otherwise = do
3400        -- we only ever generate word-sized function arguments.  Promotion
3401        -- has already happened: our Int8# type is kept sign-extended
3402        -- in an Int#, for example.
3403          ASSERT(arg_rep == I64) return ()
3404          (arg_op, arg_code) <- getOperand arg
3405          delta <- getDeltaNat
3406          setDeltaNat (delta-arg_size)
3407          let code' = code `appOL` toOL [PUSH I64 arg_op,
3408                                         DELTA (delta-arg_size)]
3409          push_args rest code'
3410         where
3411           arg_rep = cmmExprRep arg
3412 #endif
3413
3414 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3415
3416 #if sparc_TARGET_ARCH
3417 {-
3418    The SPARC calling convention is an absolute
3419    nightmare.  The first 6x32 bits of arguments are mapped into
3420    %o0 through %o5, and the remaining arguments are dumped to the
3421    stack, beginning at [%sp+92].  (Note that %o6 == %sp.)
3422
3423    If we have to put args on the stack, move %o6==%sp down by
3424    the number of words to go on the stack, to ensure there's enough space.
3425
3426    According to Fraser and Hanson's lcc book, page 478, fig 17.2,
3427    16 words above the stack pointer is a word for the address of
3428    a structure return value.  I use this as a temporary location
3429    for moving values from float to int regs.  Certainly it isn't
3430    safe to put anything in the 16 words starting at %sp, since
3431    this area can get trashed at any time due to window overflows
3432    caused by signal handlers.
3433
3434    A final complication (if the above isn't enough) is that
3435    we can't blithely calculate the arguments one by one into
3436    %o0 .. %o5.  Consider the following nested calls:
3437
3438        fff a (fff b c)
3439
3440    Naive code moves a into %o0, and (fff b c) into %o1.  Unfortunately
3441    the inner call will itself use %o0, which trashes the value put there
3442    in preparation for the outer call.  Upshot: we need to calculate the
3443    args into temporary regs, and move those to arg regs or onto the
3444    stack only immediately prior to the call proper.  Sigh.
3445 -}
3446
3447 genCCall target dest_regs argsAndHints = do
3448     let
3449         args = map fst argsAndHints
3450     argcode_and_vregs <- mapM arg_to_int_vregs args
3451     let
3452         (argcodes, vregss) = unzip argcode_and_vregs
3453         n_argRegs          = length allArgRegs
3454         n_argRegs_used     = min (length vregs) n_argRegs
3455         vregs              = concat vregss
3456     -- deal with static vs dynamic call targets
3457     callinsns <- (case target of
3458         CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> do
3459                 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3460         CmmForeignCall expr conv -> do
3461                 (dyn_c, [dyn_r]) <- arg_to_int_vregs expr
3462                 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3463         CmmPrim mop -> do
3464                   (res, reduce) <- outOfLineFloatOp mop
3465                   lblOrMopExpr <- case res of
3466                        Left lbl -> do
3467                             return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3468                        Right mopExpr -> do
3469                             (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
3470                             return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3471                   if reduce then panic "genCCall(sparc): can not reduce" else return lblOrMopExpr
3472
3473       )
3474     let
3475         argcode = concatOL argcodes
3476         (move_sp_down, move_sp_up)
3477            = let diff = length vregs - n_argRegs
3478                  nn   = if odd diff then diff + 1 else diff -- keep 8-byte alignment
3479              in  if   nn <= 0
3480                  then (nilOL, nilOL)
3481                  else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
3482         transfer_code
3483            = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
3484     return (argcode       `appOL`
3485             move_sp_down  `appOL`
3486             transfer_code `appOL`
3487             callinsns     `appOL`
3488             unitOL NOP    `appOL`
3489             move_sp_up)
3490   where
3491      -- move args from the integer vregs into which they have been
3492      -- marshalled, into %o0 .. %o5, and the rest onto the stack.
3493      move_final :: [Reg] -> [Reg] -> Int -> [Instr]
3494
3495      move_final [] _ offset          -- all args done
3496         = []
3497
3498      move_final (v:vs) [] offset     -- out of aregs; move to stack
3499         = ST I32 v (spRel offset)
3500           : move_final vs [] (offset+1)
3501
3502      move_final (v:vs) (a:az) offset -- move into an arg (%o[0..5]) reg
3503         = OR False g0 (RIReg v) a
3504           : move_final vs az offset
3505
3506      -- generate code to calculate an argument, and move it into one
3507      -- or two integer vregs.
3508      arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
3509      arg_to_int_vregs arg
3510         | (cmmExprRep arg) == I64
3511         = do
3512           (ChildCode64 code r_lo) <- iselExpr64 arg
3513           let
3514               r_hi = getHiVRegFromLo r_lo
3515           return (code, [r_hi, r_lo])
3516         | otherwise
3517         = do
3518           (src, code) <- getSomeReg arg
3519           tmp <- getNewRegNat (cmmExprRep arg)
3520           let
3521               pk   = cmmExprRep arg
3522           case pk of
3523              F64 -> do
3524                       v1 <- getNewRegNat I32
3525                       v2 <- getNewRegNat I32
3526                       return (
3527                         code                          `snocOL`
3528                         FMOV F64 src f0                `snocOL`
3529                         ST   F32  f0 (spRel 16)         `snocOL`
3530                         LD   I32  (spRel 16) v1         `snocOL`
3531                         ST   F32  (fPair f0) (spRel 16) `snocOL`
3532                         LD   I32  (spRel 16) v2
3533                         ,
3534                         [v1,v2]
3535                        )
3536              F32 -> do
3537                       v1 <- getNewRegNat I32
3538                       return (
3539                         code                    `snocOL`
3540                         ST   F32  src (spRel 16)  `snocOL`
3541                         LD   I32  (spRel 16) v1
3542                         ,
3543                         [v1]
3544                        )
3545              other -> do
3546                         v1 <- getNewRegNat I32
3547                         return (
3548                           code `snocOL` OR False g0 (RIReg src) v1
3549                           ,
3550                           [v1]
3551                          )
3552 outOfLineFloatOp mop =
3553     do
3554       mopExpr <- cmmMakeDynamicReference addImportNat CallReference $
3555                   mkForeignLabel functionName Nothing True
3556       let mopLabelOrExpr = case mopExpr of
3557                         CmmLit (CmmLabel lbl) -> Left lbl
3558                         _ -> Right mopExpr
3559       return (mopLabelOrExpr, reduce)
3560             where
3561                 (reduce, functionName) = case mop of
3562                   MO_F32_Exp    -> (True,  FSLIT("exp"))
3563                   MO_F32_Log    -> (True,  FSLIT("log"))
3564                   MO_F32_Sqrt   -> (True,  FSLIT("sqrt"))
3565
3566                   MO_F32_Sin    -> (True,  FSLIT("sin"))
3567                   MO_F32_Cos    -> (True,  FSLIT("cos"))
3568                   MO_F32_Tan    -> (True,  FSLIT("tan"))
3569
3570                   MO_F32_Asin   -> (True,  FSLIT("asin"))
3571                   MO_F32_Acos   -> (True,  FSLIT("acos"))
3572                   MO_F32_Atan   -> (True,  FSLIT("atan"))
3573
3574                   MO_F32_Sinh   -> (True,  FSLIT("sinh"))
3575                   MO_F32_Cosh   -> (True,  FSLIT("cosh"))
3576                   MO_F32_Tanh   -> (True,  FSLIT("tanh"))
3577
3578                   MO_F64_Exp    -> (False, FSLIT("exp"))
3579                   MO_F64_Log    -> (False, FSLIT("log"))
3580                   MO_F64_Sqrt   -> (False, FSLIT("sqrt"))
3581
3582                   MO_F64_Sin    -> (False, FSLIT("sin"))
3583                   MO_F64_Cos    -> (False, FSLIT("cos"))
3584                   MO_F64_Tan    -> (False, FSLIT("tan"))
3585
3586                   MO_F64_Asin   -> (False, FSLIT("asin"))
3587                   MO_F64_Acos   -> (False, FSLIT("acos"))
3588                   MO_F64_Atan   -> (False, FSLIT("atan"))
3589
3590                   MO_F64_Sinh   -> (False, FSLIT("sinh"))
3591                   MO_F64_Cosh   -> (False, FSLIT("cosh"))
3592                   MO_F64_Tanh   -> (False, FSLIT("tanh"))
3593
3594                   other -> pprPanic "outOfLineFloatOp(sparc) "
3595                                 (pprCallishMachOp mop)
3596
3597 #endif /* sparc_TARGET_ARCH */
3598
3599 #if powerpc_TARGET_ARCH
3600
3601 #if darwin_TARGET_OS || linux_TARGET_OS
3602 {-
3603     The PowerPC calling convention for Darwin/Mac OS X
3604     is described in Apple's document
3605     "Inside Mac OS X - Mach-O Runtime Architecture".
3606
3607     PowerPC Linux uses the System V Release 4 Calling Convention
3608     for PowerPC. It is described in the
3609     "System V Application Binary Interface PowerPC Processor Supplement".
3610
3611     Both conventions are similar:
3612     Parameters may be passed in general-purpose registers starting at r3, in
3613     floating point registers starting at f1, or on the stack.
3614
3615     But there are substantial differences:
3616     * The number of registers used for parameter passing and the exact set of
3617       nonvolatile registers differs (see MachRegs.lhs).
3618     * On Darwin, stack space is always reserved for parameters, even if they are
3619       passed in registers. The called routine may choose to save parameters from
3620       registers to the corresponding space on the stack.
3621     * On Darwin, a corresponding amount of GPRs is skipped when a floating point
3622       parameter is passed in an FPR.
3623     * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
3624       starting with an odd-numbered GPR. It may skip a GPR to achieve this.
3625       Darwin just treats an I64 like two separate I32s (high word first).
3626     * I64 and F64 arguments are 8-byte aligned on the stack for SysV, but only
3627       4-byte aligned like everything else on Darwin.
3628     * The SysV spec claims that F32 is represented as F64 on the stack. GCC on
3629       PowerPC Linux does not agree, so neither do we.
3630
3631     According to both conventions, The parameter area should be part of the
3632     caller's stack frame, allocated in the caller's prologue code (large enough
3633     to hold the parameter lists for all called routines). The NCG already
3634     uses the stack for register spilling, leaving 64 bytes free at the top.
3635     If we need a larger parameter area than that, we just allocate a new stack
3636     frame just before ccalling.
3637 -}
3638
3639
3640 genCCall (CmmPrim MO_WriteBarrier) _ _
3641  = return $ unitOL LWSYNC
3642
3643 genCCall target dest_regs argsAndHints
3644   = ASSERT (not $ any (`elem` [I8,I16]) argReps)
3645         -- we rely on argument promotion in the codeGen
3646     do
3647         (finalStack,passArgumentsCode,usedRegs) <- passArguments
3648                                                         (zip args argReps)
3649                                                         allArgRegs allFPArgRegs
3650                                                         initialStackOffset
3651                                                         (toOL []) []
3652
3653         (labelOrExpr, reduceToF32) <- case target of
3654             CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
3655             CmmForeignCall expr conv -> return  (Right expr, False)
3656             CmmPrim mop -> outOfLineFloatOp mop
3657
3658         let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
3659             codeAfter = move_sp_up finalStack `appOL` moveResult reduceToF32
3660
3661         case labelOrExpr of
3662             Left lbl -> do
3663                 return (         codeBefore
3664                         `snocOL` BL lbl usedRegs
3665                         `appOL`  codeAfter)
3666             Right dyn -> do
3667                 (dynReg, dynCode) <- getSomeReg dyn
3668                 return (         dynCode
3669                         `snocOL` MTCTR dynReg
3670                         `appOL`  codeBefore
3671                         `snocOL` BCTRL usedRegs
3672                         `appOL`  codeAfter)
3673     where
3674 #if darwin_TARGET_OS
3675         initialStackOffset = 24
3676             -- size of linkage area + size of arguments, in bytes
3677         stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
3678                                        map machRepByteWidth argReps
3679 #elif linux_TARGET_OS
3680         initialStackOffset = 8
3681         stackDelta finalStack = roundTo 16 finalStack
3682 #endif
3683         args = map fst argsAndHints
3684         argReps = map cmmExprRep args
3685
3686         roundTo a x | x `mod` a == 0 = x
3687                     | otherwise = x + a - (x `mod` a)
3688
3689         move_sp_down finalStack
3690                | delta > 64 =
3691                         toOL [STU I32 sp (AddrRegImm sp (ImmInt (-delta))),
3692                               DELTA (-delta)]
3693                | otherwise = nilOL
3694                where delta = stackDelta finalStack
3695         move_sp_up finalStack
3696                | delta > 64 =
3697                         toOL [ADD sp sp (RIImm (ImmInt delta)),
3698                               DELTA 0]
3699                | otherwise = nilOL
3700                where delta = stackDelta finalStack
3701
3702
3703         passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
3704         passArguments ((arg,I64):args) gprs fprs stackOffset
3705                accumCode accumUsed =
3706             do
3707                 ChildCode64 code vr_lo <- iselExpr64 arg
3708                 let vr_hi = getHiVRegFromLo vr_lo
3709
3710 #if darwin_TARGET_OS
3711                 passArguments args
3712                               (drop 2 gprs)
3713                               fprs
3714                               (stackOffset+8)
3715                               (accumCode `appOL` code
3716                                     `snocOL` storeWord vr_hi gprs stackOffset
3717                                     `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
3718                               ((take 2 gprs) ++ accumUsed)
3719             where
3720                 storeWord vr (gpr:_) offset = MR gpr vr
3721                 storeWord vr [] offset = ST I32 vr (AddrRegImm sp (ImmInt offset))
3722
3723 #elif linux_TARGET_OS
3724                 let stackOffset' = roundTo 8 stackOffset
3725                     stackCode = accumCode `appOL` code
3726                         `snocOL` ST I32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
3727                         `snocOL` ST I32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
3728                     regCode hireg loreg =
3729                         accumCode `appOL` code
3730                             `snocOL` MR hireg vr_hi
3731                             `snocOL` MR loreg vr_lo
3732
3733                 case gprs of
3734                     hireg : loreg : regs | even (length gprs) ->
3735                         passArguments args regs fprs stackOffset
3736                                       (regCode hireg loreg) (hireg : loreg : accumUsed)
3737                     _skipped : hireg : loreg : regs ->
3738                         passArguments args regs fprs stackOffset
3739                                       (regCode hireg loreg) (hireg : loreg : accumUsed)
3740                     _ -> -- only one or no regs left
3741                         passArguments args [] fprs (stackOffset'+8)
3742                                       stackCode accumUsed
3743 #endif
3744
3745         passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
3746             | reg : _ <- regs = do
3747                 register <- getRegister arg
3748                 let code = case register of
3749                             Fixed _ freg fcode -> fcode `snocOL` MR reg freg
3750                             Any _ acode -> acode reg
3751                 passArguments args
3752                               (drop nGprs gprs)
3753                               (drop nFprs fprs)
3754 #if darwin_TARGET_OS
3755         -- The Darwin ABI requires that we reserve stack slots for register parameters
3756                               (stackOffset + stackBytes)
3757 #elif linux_TARGET_OS
3758         -- ... the SysV ABI doesn't.
3759                               stackOffset
3760 #endif
3761                               (accumCode `appOL` code)
3762                               (reg : accumUsed)
3763             | otherwise = do
3764                 (vr, code) <- getSomeReg arg
3765                 passArguments args
3766                               (drop nGprs gprs)
3767                               (drop nFprs fprs)
3768                               (stackOffset' + stackBytes)
3769                               (accumCode `appOL` code `snocOL` ST rep vr stackSlot)
3770                               accumUsed
3771             where
3772 #if darwin_TARGET_OS
3773         -- stackOffset is at least 4-byte aligned
3774         -- The Darwin ABI is happy with that.
3775                 stackOffset' = stackOffset
3776 #else
3777         -- ... the SysV ABI requires 8-byte alignment for doubles.
3778                 stackOffset' | rep == F64 = roundTo 8 stackOffset
3779                              | otherwise  =           stackOffset
3780 #endif
3781                 stackSlot = AddrRegImm sp (ImmInt stackOffset')
3782                 (nGprs, nFprs, stackBytes, regs) = case rep of
3783                     I32 -> (1, 0, 4, gprs)
3784 #if darwin_TARGET_OS
3785         -- The Darwin ABI requires that we skip a corresponding number of GPRs when
3786         -- we use the FPRs.
3787                     F32 -> (1, 1, 4, fprs)
3788                     F64 -> (2, 1, 8, fprs)
3789 #elif linux_TARGET_OS
3790         -- ... the SysV ABI doesn't.
3791                     F32 -> (0, 1, 4, fprs)
3792                     F64 -> (0, 1, 8, fprs)
3793 #endif
3794
3795         moveResult reduceToF32 =
3796             case dest_regs of
3797                 [] -> nilOL
3798                 [(dest, _hint)]
3799                     | reduceToF32 && rep == F32 -> unitOL (FRSP r_dest f1)
3800                     | rep == F32 || rep == F64 -> unitOL (MR r_dest f1)
3801                     | rep == I64 -> toOL [MR (getHiVRegFromLo r_dest) r3,
3802                                           MR r_dest r4]
3803                     | otherwise -> unitOL (MR r_dest r3)
3804                     where rep = cmmRegRep (CmmLocal dest)
3805                           r_dest = getRegisterReg (CmmLocal dest)
3806
3807         outOfLineFloatOp mop =
3808             do
3809                 mopExpr <- cmmMakeDynamicReference addImportNat CallReference $
3810                               mkForeignLabel functionName Nothing True
3811                 let mopLabelOrExpr = case mopExpr of
3812                         CmmLit (CmmLabel lbl) -> Left lbl
3813                         _ -> Right mopExpr
3814                 return (mopLabelOrExpr, reduce)
3815             where
3816                 (functionName, reduce) = case mop of
3817                     MO_F32_Exp   -> (FSLIT("exp"), True)
3818                     MO_F32_Log   -> (FSLIT("log"), True)
3819                     MO_F32_Sqrt  -> (FSLIT("sqrt"), True)
3820
3821                     MO_F32_Sin   -> (FSLIT("sin"), True)
3822                     MO_F32_Cos   -> (FSLIT("cos"), True)
3823                     MO_F32_Tan   -> (FSLIT("tan"), True)
3824
3825                     MO_F32_Asin  -> (FSLIT("asin"), True)
3826                     MO_F32_Acos  -> (FSLIT("acos"), True)
3827                     MO_F32_Atan  -> (FSLIT("atan"), True)
3828
3829                     MO_F32_Sinh  -> (FSLIT("sinh"), True)
3830                     MO_F32_Cosh  -> (FSLIT("cosh"), True)
3831                     MO_F32_Tanh  -> (FSLIT("tanh"), True)
3832                     MO_F32_Pwr   -> (FSLIT("pow"), True)
3833
3834                     MO_F64_Exp   -> (FSLIT("exp"), False)
3835                     MO_F64_Log   -> (FSLIT("log"), False)
3836                     MO_F64_Sqrt  -> (FSLIT("sqrt"), False)
3837
3838                     MO_F64_Sin   -> (FSLIT("sin"), False)
3839                     MO_F64_Cos   -> (FSLIT("cos"), False)
3840                     MO_F64_Tan   -> (FSLIT("tan"), False)
3841
3842                     MO_F64_Asin  -> (FSLIT("asin"), False)
3843                     MO_F64_Acos  -> (FSLIT("acos"), False)
3844                     MO_F64_Atan  -> (FSLIT("atan"), False)
3845
3846                     MO_F64_Sinh  -> (FSLIT("sinh"), False)
3847                     MO_F64_Cosh  -> (FSLIT("cosh"), False)
3848                     MO_F64_Tanh  -> (FSLIT("tanh"), False)
3849                     MO_F64_Pwr   -> (FSLIT("pow"), False)
3850                     other -> pprPanic "genCCall(ppc): unknown callish op"
3851                                     (pprCallishMachOp other)
3852
3853 #endif /* darwin_TARGET_OS || linux_TARGET_OS */
3854
3855 #endif /* powerpc_TARGET_ARCH */
3856
3857
3858 -- -----------------------------------------------------------------------------
3859 -- Generating a table-branch
3860
3861 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
3862
3863 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3864 genSwitch expr ids
3865   | opt_PIC
3866   = do
3867         (reg,e_code) <- getSomeReg expr
3868         lbl <- getNewLabelNat
3869         dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
3870         (tableReg,t_code) <- getSomeReg $ dynRef
3871         let
3872             jumpTable = map jumpTableEntryRel ids
3873
3874             jumpTableEntryRel Nothing
3875                 = CmmStaticLit (CmmInt 0 wordRep)
3876             jumpTableEntryRel (Just (BlockId id))
3877                 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3878                 where blockLabel = mkAsmTempLabel id
3879
3880             op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
3881                                        (EAIndex reg wORD_SIZE) (ImmInt 0))
3882
3883 #if x86_64_TARGET_ARCH && darwin_TARGET_OS
3884     -- on Mac OS X/x86_64, put the jump table in the text section
3885     -- to work around a limitation of the linker.
3886     -- ld64 is unable to handle the relocations for
3887     --     .quad L1 - L0
3888     -- if L0 is not preceded by a non-anonymous label in its section.
3889
3890             code = e_code `appOL` t_code `appOL` toOL [
3891                             ADD wordRep op (OpReg tableReg),
3892                             JMP_TBL (OpReg tableReg) [ id | Just id <- ids ],
3893                             LDATA Text (CmmDataLabel lbl : jumpTable)
3894                     ]
3895 #else
3896             code = e_code `appOL` t_code `appOL` toOL [
3897                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3898                             ADD wordRep op (OpReg tableReg),
3899                             JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
3900                     ]
3901 #endif
3902         return code
3903   | otherwise
3904   = do
3905         (reg,e_code) <- getSomeReg expr
3906         lbl <- getNewLabelNat
3907         let
3908             jumpTable = map jumpTableEntry ids
3909             op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
3910             code = e_code `appOL` toOL [
3911                     LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3912                     JMP_TBL op [ id | Just id <- ids ]
3913                  ]
3914         -- in
3915         return code
3916 #elif powerpc_TARGET_ARCH
3917 genSwitch expr ids
3918   | opt_PIC
3919   = do
3920         (reg,e_code) <- getSomeReg expr
3921         tmp <- getNewRegNat I32
3922         lbl <- getNewLabelNat
3923         dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
3924         (tableReg,t_code) <- getSomeReg $ dynRef
3925         let
3926             jumpTable = map jumpTableEntryRel ids
3927
3928             jumpTableEntryRel Nothing
3929                 = CmmStaticLit (CmmInt 0 wordRep)
3930             jumpTableEntryRel (Just (BlockId id))
3931                 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3932                 where blockLabel = mkAsmTempLabel id
3933
3934             code = e_code `appOL` t_code `appOL` toOL [
3935                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3936                             SLW tmp reg (RIImm (ImmInt 2)),
3937                             LD I32 tmp (AddrRegReg tableReg tmp),
3938                             ADD tmp tmp (RIReg tableReg),
3939                             MTCTR tmp,
3940                             BCTR [ id | Just id <- ids ]
3941                     ]
3942         return code
3943   | otherwise
3944   = do
3945         (reg,e_code) <- getSomeReg expr
3946         tmp <- getNewRegNat I32
3947         lbl <- getNewLabelNat
3948         let
3949             jumpTable = map jumpTableEntry ids
3950
3951             code = e_code `appOL` toOL [
3952                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3953                             SLW tmp reg (RIImm (ImmInt 2)),
3954                             ADDIS tmp tmp (HA (ImmCLbl lbl)),
3955                             LD I32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
3956                             MTCTR tmp,
3957                             BCTR [ id | Just id <- ids ]
3958                     ]
3959         return code
3960 #else
3961 genSwitch expr ids = panic "ToDo: genSwitch"
3962 #endif
3963
3964 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordRep)
3965 jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
3966     where blockLabel = mkAsmTempLabel id
3967
3968 -- -----------------------------------------------------------------------------
3969 -- Support bits
3970 -- -----------------------------------------------------------------------------
3971
3972
3973 -- -----------------------------------------------------------------------------
3974 -- 'condIntReg' and 'condFltReg': condition codes into registers
3975
3976 -- Turn those condition codes into integers now (when they appear on
3977 -- the right hand side of an assignment).
3978 --
3979 -- (If applicable) Do not fill the delay slots here; you will confuse the
3980 -- register allocator.
3981
3982 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
3983
3984 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3985
3986 #if alpha_TARGET_ARCH
3987 condIntReg = panic "MachCode.condIntReg (not on Alpha)"
3988 condFltReg = panic "MachCode.condFltReg (not on Alpha)"
3989 #endif /* alpha_TARGET_ARCH */
3990
3991 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3992
3993 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3994
3995 condIntReg cond x y = do
3996   CondCode _ cond cond_code <- condIntCode cond x y
3997   tmp <- getNewRegNat I8
3998   let
3999         code dst = cond_code `appOL` toOL [
4000                     SETCC cond (OpReg tmp),
4001                     MOVZxL I8 (OpReg tmp) (OpReg dst)
4002                   ]
4003   -- in
4004   return (Any I32 code)
4005
4006 #endif
4007
4008 #if i386_TARGET_ARCH
4009
4010 condFltReg cond x y = do
4011   CondCode _ cond cond_code <- condFltCode cond x y
4012   tmp <- getNewRegNat I8
4013   let
4014         code dst = cond_code `appOL` toOL [
4015                     SETCC cond (OpReg tmp),
4016                     MOVZxL I8 (OpReg tmp) (OpReg dst)
4017                   ]
4018   -- in
4019   return (Any I32 code)
4020
4021 #endif
4022
4023 #if x86_64_TARGET_ARCH
4024
4025 condFltReg cond x y = do
4026   CondCode _ cond cond_code <- condFltCode cond x y
4027   tmp1 <- getNewRegNat wordRep
4028   tmp2 <- getNewRegNat wordRep
4029   let
4030         -- We have to worry about unordered operands (eg. comparisons
4031         -- against NaN).  If the operands are unordered, the comparison
4032         -- sets the parity flag, carry flag and zero flag.
4033         -- All comparisons are supposed to return false for unordered
4034         -- operands except for !=, which returns true.
4035         --
4036         -- Optimisation: we don't have to test the parity flag if we
4037         -- know the test has already excluded the unordered case: eg >
4038         -- and >= test for a zero carry flag, which can only occur for
4039         -- ordered operands.
4040         --
4041         -- ToDo: by reversing comparisons we could avoid testing the
4042         -- parity flag in more cases.
4043
4044         code dst =
4045            cond_code `appOL`
4046              (case cond of
4047                 NE  -> or_unordered dst
4048                 GU  -> plain_test   dst
4049                 GEU -> plain_test   dst
4050                 _   -> and_ordered  dst)
4051
4052         plain_test dst = toOL [
4053                     SETCC cond (OpReg tmp1),
4054                     MOVZxL I8 (OpReg tmp1) (OpReg dst)
4055                  ]
4056         or_unordered dst = toOL [
4057                     SETCC cond (OpReg tmp1),
4058                     SETCC PARITY (OpReg tmp2),
4059                     OR I8 (OpReg tmp1) (OpReg tmp2),
4060                     MOVZxL I8 (OpReg tmp2) (OpReg dst)
4061                   ]
4062         and_ordered dst = toOL [
4063                     SETCC cond (OpReg tmp1),
4064                     SETCC NOTPARITY (OpReg tmp2),
4065                     AND I8 (OpReg tmp1) (OpReg tmp2),
4066                     MOVZxL I8 (OpReg tmp2) (OpReg dst)
4067                   ]
4068   -- in
4069   return (Any I32 code)
4070
4071 #endif
4072
4073 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4074
4075 #if sparc_TARGET_ARCH
4076
4077 condIntReg EQQ x (CmmLit (CmmInt 0 d)) = do
4078     (src, code) <- getSomeReg x
4079     tmp <- getNewRegNat I32
4080     let
4081         code__2 dst = code `appOL` toOL [
4082             SUB False True g0 (RIReg src) g0,
4083             SUB True False g0 (RIImm (ImmInt (-1))) dst]
4084     return (Any I32 code__2)
4085
4086 condIntReg EQQ x y = do
4087     (src1, code1) <- getSomeReg x
4088     (src2, code2) <- getSomeReg y
4089     tmp1 <- getNewRegNat I32
4090     tmp2 <- getNewRegNat I32
4091     let
4092         code__2 dst = code1 `appOL` code2 `appOL` toOL [
4093             XOR False src1 (RIReg src2) dst,
4094             SUB False True g0 (RIReg dst) g0,
4095             SUB True False g0 (RIImm (ImmInt (-1))) dst]
4096     return (Any I32 code__2)
4097
4098 condIntReg NE x (CmmLit (CmmInt 0 d)) = do
4099     (src, code) <- getSomeReg x
4100     tmp <- getNewRegNat I32
4101     let
4102         code__2 dst = code `appOL` toOL [
4103             SUB False True g0 (RIReg src) g0,
4104             ADD True False g0 (RIImm (ImmInt 0)) dst]
4105     return (Any I32 code__2)
4106
4107 condIntReg NE x y = do
4108     (src1, code1) <- getSomeReg x
4109     (src2, code2) <- getSomeReg y
4110     tmp1 <- getNewRegNat I32
4111     tmp2 <- getNewRegNat I32
4112     let
4113         code__2 dst = code1 `appOL` code2 `appOL` toOL [
4114             XOR False src1 (RIReg src2) dst,
4115             SUB False True g0 (RIReg dst) g0,
4116             ADD True False g0 (RIImm (ImmInt 0)) dst]
4117     return (Any I32 code__2)
4118
4119 condIntReg cond x y = do
4120     BlockId lbl1 <- getBlockIdNat
4121     BlockId lbl2 <- getBlockIdNat
4122     CondCode _ cond cond_code <- condIntCode cond x y
4123     let
4124         code__2 dst = cond_code `appOL` toOL [
4125             BI cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
4126             OR False g0 (RIImm (ImmInt 0)) dst,
4127             BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
4128             NEWBLOCK (BlockId lbl1),
4129             OR False g0 (RIImm (ImmInt 1)) dst,
4130             NEWBLOCK (BlockId lbl2)]
4131     return (Any I32 code__2)
4132
4133 condFltReg cond x y = do
4134     BlockId lbl1 <- getBlockIdNat
4135     BlockId lbl2 <- getBlockIdNat
4136     CondCode _ cond cond_code <- condFltCode cond x y
4137     let
4138         code__2 dst = cond_code `appOL` toOL [
4139             NOP,
4140             BF cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
4141             OR False g0 (RIImm (ImmInt 0)) dst,
4142             BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
4143             NEWBLOCK (BlockId lbl1),
4144             OR False g0 (RIImm (ImmInt 1)) dst,
4145             NEWBLOCK (BlockId lbl2)]
4146     return (Any I32 code__2)
4147
4148 #endif /* sparc_TARGET_ARCH */
4149
4150 #if powerpc_TARGET_ARCH
4151 condReg getCond = do
4152     lbl1 <- getBlockIdNat
4153     lbl2 <- getBlockIdNat
4154     CondCode _ cond cond_code <- getCond
4155     let
4156 {-        code dst = cond_code `appOL` toOL [
4157                 BCC cond lbl1,
4158                 LI dst (ImmInt 0),
4159                 BCC ALWAYS lbl2,
4160                 NEWBLOCK lbl1,
4161                 LI dst (ImmInt 1),
4162                 BCC ALWAYS lbl2,
4163                 NEWBLOCK lbl2
4164             ]-}
4165         code dst = cond_code
4166             `appOL` negate_code
4167             `appOL` toOL [
4168                 MFCR dst,
4169                 RLWINM dst dst (bit + 1) 31 31
4170             ]
4171
4172         negate_code | do_negate = unitOL (CRNOR bit bit bit)
4173                     | otherwise = nilOL
4174
4175         (bit, do_negate) = case cond of
4176             LTT -> (0, False)
4177             LE  -> (1, True)
4178             EQQ -> (2, False)
4179             GE  -> (0, True)
4180             GTT -> (1, False)
4181
4182             NE  -> (2, True)
4183
4184             LU  -> (0, False)
4185             LEU -> (1, True)
4186             GEU -> (0, True)
4187             GU  -> (1, False)
4188
4189     return (Any I32 code)
4190
4191 condIntReg cond x y = condReg (condIntCode cond x y)
4192 condFltReg cond x y = condReg (condFltCode cond x y)
4193 #endif /* powerpc_TARGET_ARCH */
4194
4195
4196 -- -----------------------------------------------------------------------------
4197 -- 'trivial*Code': deal with trivial instructions
4198
4199 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
4200 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
4201 -- Only look for constants on the right hand side, because that's
4202 -- where the generic optimizer will have put them.
4203
4204 -- Similarly, for unary instructions, we don't have to worry about
4205 -- matching an StInt as the argument, because genericOpt will already
4206 -- have handled the constant-folding.
4207
4208 trivialCode
4209     :: MachRep
4210     -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
4211       ,IF_ARCH_i386 ((Operand -> Operand -> Instr)
4212                      -> Maybe (Operand -> Operand -> Instr)
4213       ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr)
4214                      -> Maybe (Operand -> Operand -> Instr)
4215       ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
4216       ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
4217       ,)))))
4218     -> CmmExpr -> CmmExpr -- the two arguments
4219     -> NatM Register
4220
4221 #ifndef powerpc_TARGET_ARCH
4222 trivialFCode
4223     :: MachRep
4224     -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
4225       ,IF_ARCH_sparc((MachRep -> Reg -> Reg -> Reg -> Instr)
4226       ,IF_ARCH_i386 ((MachRep -> Reg -> Reg -> Reg -> Instr)
4227       ,IF_ARCH_x86_64 ((MachRep -> Operand -> Operand -> Instr)
4228       ,))))
4229     -> CmmExpr -> CmmExpr -- the two arguments
4230     -> NatM Register
4231 #endif
4232
4233 trivialUCode
4234     :: MachRep
4235     -> IF_ARCH_alpha((RI -> Reg -> Instr)
4236       ,IF_ARCH_i386 ((Operand -> Instr)
4237       ,IF_ARCH_x86_64 ((Operand -> Instr)
4238       ,IF_ARCH_sparc((RI -> Reg -> Instr)
4239       ,IF_ARCH_powerpc((Reg -> Reg -> Instr)
4240       ,)))))
4241     -> CmmExpr  -- the one argument
4242     -> NatM Register
4243
4244 #ifndef powerpc_TARGET_ARCH
4245 trivialUFCode
4246     :: MachRep
4247     -> IF_ARCH_alpha((Reg -> Reg -> Instr)
4248       ,IF_ARCH_i386 ((Reg -> Reg -> Instr)
4249       ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr)
4250       ,IF_ARCH_sparc((Reg -> Reg -> Instr)
4251       ,))))
4252     -> CmmExpr -- the one argument
4253     -> NatM Register
4254 #endif
4255
4256 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4257
4258 #if alpha_TARGET_ARCH
4259
4260 trivialCode instr x (StInt y)
4261   | fits8Bits y
4262   = getRegister x               `thenNat` \ register ->
4263     getNewRegNat IntRep         `thenNat` \ tmp ->
4264     let
4265         code = registerCode register tmp
4266         src1 = registerName register tmp
4267         src2 = ImmInt (fromInteger y)
4268         code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst)
4269     in
4270     return (Any IntRep code__2)
4271
4272 trivialCode instr x y
4273   = getRegister x               `thenNat` \ register1 ->
4274     getRegister y               `thenNat` \ register2 ->
4275     getNewRegNat IntRep         `thenNat` \ tmp1 ->
4276     getNewRegNat IntRep         `thenNat` \ tmp2 ->
4277     let
4278         code1 = registerCode register1 tmp1 []
4279         src1  = registerName register1 tmp1
4280         code2 = registerCode register2 tmp2 []
4281         src2  = registerName register2 tmp2
4282         code__2 dst = asmSeqThen [code1, code2] .
4283                      mkSeqInstr (instr src1 (RIReg src2) dst)
4284     in
4285     return (Any IntRep code__2)
4286
4287 ------------
4288 trivialUCode instr x
4289   = getRegister x               `thenNat` \ register ->
4290     getNewRegNat IntRep         `thenNat` \ tmp ->
4291     let
4292         code = registerCode register tmp
4293         src  = registerName register tmp
4294         code__2 dst = code . mkSeqInstr (instr (RIReg src) dst)
4295     in
4296     return (Any IntRep code__2)
4297
4298 ------------
4299 trivialFCode _ instr x y
4300   = getRegister x               `thenNat` \ register1 ->
4301     getRegister y               `thenNat` \ register2 ->
4302     getNewRegNat F64    `thenNat` \ tmp1 ->
4303     getNewRegNat F64    `thenNat` \ tmp2 ->
4304     let
4305         code1 = registerCode register1 tmp1
4306         src1  = registerName register1 tmp1
4307
4308         code2 = registerCode register2 tmp2
4309         src2  = registerName register2 tmp2
4310
4311         code__2 dst = asmSeqThen [code1 [], code2 []] .
4312                       mkSeqInstr (instr src1 src2 dst)
4313     in
4314     return (Any F64 code__2)
4315
4316 trivialUFCode _ instr x
4317   = getRegister x               `thenNat` \ register ->
4318     getNewRegNat F64    `thenNat` \ tmp ->
4319     let
4320         code = registerCode register tmp
4321         src  = registerName register tmp
4322         code__2 dst = code . mkSeqInstr (instr src dst)
4323     in
4324     return (Any F64 code__2)
4325
4326 #endif /* alpha_TARGET_ARCH */
4327
4328 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4329
4330 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4331
4332 {-
4333 The Rules of the Game are:
4334
4335 * You cannot assume anything about the destination register dst;
4336   it may be anything, including a fixed reg.
4337
4338 * You may compute an operand into a fixed reg, but you may not
4339   subsequently change the contents of that fixed reg.  If you
4340   want to do so, first copy the value either to a temporary
4341   or into dst.  You are free to modify dst even if it happens
4342   to be a fixed reg -- that's not your problem.
4343
4344 * You cannot assume that a fixed reg will stay live over an
4345   arbitrary computation.  The same applies to the dst reg.
4346
4347 * Temporary regs obtained from getNewRegNat are distinct from
4348   each other and from all other regs, and stay live over
4349   arbitrary computations.
4350
4351 --------------------
4352
4353 SDM's version of The Rules:
4354
4355 * If getRegister returns Any, that means it can generate correct
4356   code which places the result in any register, period.  Even if that
4357   register happens to be read during the computation.
4358
4359   Corollary #1: this means that if you are generating code for an
4360   operation with two arbitrary operands, you cannot assign the result
4361   of the first operand into the destination register before computing
4362   the second operand.  The second operand might require the old value
4363   of the destination register.
4364
4365   Corollary #2: A function might be able to generate more efficient
4366   code if it knows the destination register is a new temporary (and
4367   therefore not read by any of the sub-computations).
4368
4369 * If getRegister returns Any, then the code it generates may modify only:
4370         (a) fresh temporaries
4371         (b) the destination register
4372         (c) known registers (eg. %ecx is used by shifts)
4373   In particular, it may *not* modify global registers, unless the global
4374   register happens to be the destination register.
4375 -}
4376
4377 trivialCode rep instr (Just revinstr) (CmmLit lit_a) b
4378   | not (is64BitLit lit_a) = do
4379   b_code <- getAnyReg b
4380   let
4381        code dst
4382          = b_code dst `snocOL`
4383            revinstr (OpImm (litToImm lit_a)) (OpReg dst)
4384   -- in
4385   return (Any rep code)
4386
4387 trivialCode rep instr maybe_revinstr a b = genTrivialCode rep instr a b
4388
4389 -- This is re-used for floating pt instructions too.
4390 genTrivialCode rep instr a b = do
4391   (b_op, b_code) <- getNonClobberedOperand b
4392   a_code <- getAnyReg a
4393   tmp <- getNewRegNat rep
4394   let
4395      -- We want the value of b to stay alive across the computation of a.
4396      -- But, we want to calculate a straight into the destination register,
4397      -- because the instruction only has two operands (dst := dst `op` src).
4398      -- The troublesome case is when the result of b is in the same register
4399      -- as the destination reg.  In this case, we have to save b in a
4400      -- new temporary across the computation of a.
4401      code dst
4402         | dst `regClashesWithOp` b_op =
4403                 b_code `appOL`
4404                 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
4405                 a_code dst `snocOL`
4406                 instr (OpReg tmp) (OpReg dst)
4407         | otherwise =
4408                 b_code `appOL`
4409                 a_code dst `snocOL`
4410                 instr b_op (OpReg dst)
4411   -- in
4412   return (Any rep code)
4413
4414 reg `regClashesWithOp` OpReg reg2   = reg == reg2
4415 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
4416 reg `regClashesWithOp` _            = False
4417
4418 -----------
4419
4420 trivialUCode rep instr x = do
4421   x_code <- getAnyReg x
4422   let
4423      code dst =
4424         x_code dst `snocOL`
4425         instr (OpReg dst)
4426   -- in
4427   return (Any rep code)
4428
4429 -----------
4430
4431 #if i386_TARGET_ARCH
4432
4433 trivialFCode pk instr x y = do
4434   (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
4435   (y_reg, y_code) <- getSomeReg y
4436   let
4437      code dst =
4438         x_code `appOL`
4439         y_code `snocOL`
4440         instr pk x_reg y_reg dst
4441   -- in
4442   return (Any pk code)
4443
4444 #endif
4445
4446 #if x86_64_TARGET_ARCH
4447
4448 trivialFCode pk instr x y = genTrivialCode  pk (instr pk) x y
4449
4450 #endif
4451
4452 -------------
4453
4454 trivialUFCode rep instr x = do
4455   (x_reg, x_code) <- getSomeReg x
4456   let
4457      code dst =
4458         x_code `snocOL`
4459         instr x_reg dst
4460   -- in
4461   return (Any rep code)
4462
4463 #endif /* i386_TARGET_ARCH */
4464
4465 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4466
4467 #if sparc_TARGET_ARCH
4468
4469 trivialCode pk instr x (CmmLit (CmmInt y d))
4470   | fits13Bits y
4471   = do
4472       (src1, code) <- getSomeReg x
4473       tmp <- getNewRegNat I32
4474       let
4475         src2 = ImmInt (fromInteger y)
4476         code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
4477       return (Any I32 code__2)
4478
4479 trivialCode pk instr x y = do
4480     (src1, code1) <- getSomeReg x
4481     (src2, code2) <- getSomeReg y
4482     tmp1 <- getNewRegNat I32
4483     tmp2 <- getNewRegNat I32
4484     let
4485         code__2 dst = code1 `appOL` code2 `snocOL`
4486                       instr src1 (RIReg src2) dst
4487     return (Any I32 code__2)
4488
4489 ------------
4490 trivialFCode pk instr x y = do
4491     (src1, code1) <- getSomeReg x
4492     (src2, code2) <- getSomeReg y
4493     tmp1 <- getNewRegNat (cmmExprRep x)
4494     tmp2 <- getNewRegNat (cmmExprRep y)
4495     tmp <- getNewRegNat F64
4496     let
4497         promote x = FxTOy F32 F64 x tmp
4498
4499         pk1   = cmmExprRep x
4500         pk2   = cmmExprRep y
4501
4502         code__2 dst =
4503                 if pk1 == pk2 then
4504                     code1 `appOL` code2 `snocOL`
4505                     instr pk src1 src2 dst
4506                 else if pk1 == F32 then
4507                     code1 `snocOL` promote src1 `appOL` code2 `snocOL`
4508                     instr F64 tmp src2 dst
4509                 else
4510                     code1 `appOL` code2 `snocOL` promote src2 `snocOL`
4511                     instr F64 src1 tmp dst
4512     return (Any (if pk1 == pk2 then pk1 else F64) code__2)
4513
4514 ------------
4515 trivialUCode pk instr x = do
4516     (src, code) <- getSomeReg x
4517     tmp <- getNewRegNat pk
4518     let
4519         code__2 dst = code `snocOL` instr (RIReg src) dst
4520     return (Any pk code__2)
4521
4522 -------------
4523 trivialUFCode pk instr x = do
4524     (src, code) <- getSomeReg x
4525     tmp <- getNewRegNat pk
4526     let
4527         code__2 dst = code `snocOL` instr src dst
4528     return (Any pk code__2)
4529
4530 #endif /* sparc_TARGET_ARCH */
4531
4532 #if powerpc_TARGET_ARCH
4533
4534 {-
4535 Wolfgang's PowerPC version of The Rules:
4536
4537 A slightly modified version of The Rules to take advantage of the fact
4538 that PowerPC instructions work on all registers and don't implicitly
4539 clobber any fixed registers.
4540
4541 * The only expression for which getRegister returns Fixed is (CmmReg reg).
4542
4543 * If getRegister returns Any, then the code it generates may modify only:
4544         (a) fresh temporaries
4545         (b) the destination register
4546   It may *not* modify global registers, unless the global
4547   register happens to be the destination register.
4548   It may not clobber any other registers. In fact, only ccalls clobber any
4549   fixed registers.
4550   Also, it may not modify the counter register (used by genCCall).
4551
4552   Corollary: If a getRegister for a subexpression returns Fixed, you need
4553   not move it to a fresh temporary before evaluating the next subexpression.
4554   The Fixed register won't be modified.
4555   Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
4556
4557 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
4558   the value of the destination register.
4559 -}
4560
4561 trivialCode rep signed instr x (CmmLit (CmmInt y _))
4562     | Just imm <- makeImmediate rep signed y
4563     = do
4564         (src1, code1) <- getSomeReg x
4565         let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
4566         return (Any rep code)
4567
4568 trivialCode rep signed instr x y = do
4569     (src1, code1) <- getSomeReg x
4570     (src2, code2) <- getSomeReg y
4571     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
4572     return (Any rep code)
4573
4574 trivialCodeNoImm :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4575     -> CmmExpr -> CmmExpr -> NatM Register
4576 trivialCodeNoImm rep instr x y = do
4577     (src1, code1) <- getSomeReg x
4578     (src2, code2) <- getSomeReg y
4579     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
4580     return (Any rep code)
4581
4582 trivialUCode rep instr x = do
4583     (src, code) <- getSomeReg x
4584     let code' dst = code `snocOL` instr dst src
4585     return (Any rep code')
4586
4587 -- There is no "remainder" instruction on the PPC, so we have to do
4588 -- it the hard way.
4589 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
4590
4591 remainderCode :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4592     -> CmmExpr -> CmmExpr -> NatM Register
4593 remainderCode rep div x y = do
4594     (src1, code1) <- getSomeReg x
4595     (src2, code2) <- getSomeReg y
4596     let code dst = code1 `appOL` code2 `appOL` toOL [
4597                 div dst src1 src2,
4598                 MULLW dst dst (RIReg src2),
4599                 SUBF dst dst src1
4600             ]
4601     return (Any rep code)
4602
4603 #endif /* powerpc_TARGET_ARCH */
4604
4605
4606 -- -----------------------------------------------------------------------------
4607 --  Coercing to/from integer/floating-point...
4608
4609 -- When going to integer, we truncate (round towards 0).
4610
4611 -- @coerce(Int2FP|FP2Int)@ are more complicated integer/float
4612 -- conversions.  We have to store temporaries in memory to move
4613 -- between the integer and the floating point register sets.
4614
4615 -- @coerceDbl2Flt@ and @coerceFlt2Dbl@ are done this way because we
4616 -- pretend, on sparc at least, that double and float regs are seperate
4617 -- kinds, so the value has to be computed into one kind before being
4618 -- explicitly "converted" to live in the other kind.
4619
4620 coerceInt2FP :: MachRep -> MachRep -> CmmExpr -> NatM Register
4621 coerceFP2Int :: MachRep -> MachRep -> CmmExpr -> NatM Register
4622
4623 #if sparc_TARGET_ARCH
4624 coerceDbl2Flt :: CmmExpr -> NatM Register
4625 coerceFlt2Dbl :: CmmExpr -> NatM Register
4626 #endif
4627
4628 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4629
4630 #if alpha_TARGET_ARCH
4631
4632 coerceInt2FP _ x
4633   = getRegister x               `thenNat` \ register ->
4634     getNewRegNat IntRep         `thenNat` \ reg ->
4635     let
4636         code = registerCode register reg
4637         src  = registerName register reg
4638
4639         code__2 dst = code . mkSeqInstrs [
4640             ST Q src (spRel 0),
4641             LD TF dst (spRel 0),
4642             CVTxy Q TF dst dst]
4643     in
4644     return (Any F64 code__2)
4645
4646 -------------
4647 coerceFP2Int x
4648   = getRegister x               `thenNat` \ register ->
4649     getNewRegNat F64    `thenNat` \ tmp ->
4650     let
4651         code = registerCode register tmp
4652         src  = registerName register tmp
4653
4654         code__2 dst = code . mkSeqInstrs [
4655             CVTxy TF Q src tmp,
4656             ST TF tmp (spRel 0),
4657             LD Q dst (spRel 0)]
4658     in
4659     return (Any IntRep code__2)
4660
4661 #endif /* alpha_TARGET_ARCH */
4662
4663 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4664
4665 #if i386_TARGET_ARCH
4666
4667 coerceInt2FP from to x = do
4668   (x_reg, x_code) <- getSomeReg x
4669   let
4670         opc  = case to of F32 -> GITOF; F64 -> GITOD
4671         code dst = x_code `snocOL` opc x_reg dst
4672         -- ToDo: works for non-I32 reps?
4673   -- in
4674   return (Any to code)
4675
4676 ------------
4677
4678 coerceFP2Int from to x = do
4679   (x_reg, x_code) <- getSomeReg x
4680   let
4681         opc  = case from of F32 -> GFTOI; F64 -> GDTOI
4682         code dst = x_code `snocOL` opc x_reg dst
4683         -- ToDo: works for non-I32 reps?
4684   -- in
4685   return (Any to code)
4686
4687 #endif /* i386_TARGET_ARCH */
4688
4689 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4690
4691 #if x86_64_TARGET_ARCH
4692
4693 coerceFP2Int from to x = do
4694   (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
4695   let
4696         opc  = case from of F32 -> CVTTSS2SIQ; F64 -> CVTTSD2SIQ
4697         code dst = x_code `snocOL` opc x_op dst
4698   -- in
4699   return (Any to code) -- works even if the destination rep is <I32
4700
4701 coerceInt2FP from to x = do
4702   (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
4703   let
4704         opc  = case to of F32 -> CVTSI2SS; F64 -> CVTSI2SD
4705         code dst = x_code `snocOL` opc x_op dst
4706   -- in
4707   return (Any to code) -- works even if the destination rep is <I32
4708
4709 coerceFP2FP :: MachRep -> CmmExpr -> NatM Register
4710 coerceFP2FP to x = do
4711   (x_reg, x_code) <- getSomeReg x
4712   let
4713         opc  = case to of F32 -> CVTSD2SS; F64 -> CVTSS2SD
4714         code dst = x_code `snocOL` opc x_reg dst
4715   -- in
4716   return (Any to code)
4717
4718 #endif
4719
4720 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4721
4722 #if sparc_TARGET_ARCH
4723
4724 coerceInt2FP pk1 pk2 x = do
4725     (src, code) <- getSomeReg x
4726     let
4727         code__2 dst = code `appOL` toOL [
4728             ST pk1 src (spRel (-2)),
4729             LD pk1 (spRel (-2)) dst,
4730             FxTOy pk1 pk2 dst dst]
4731     return (Any pk2 code__2)
4732
4733 ------------
4734 coerceFP2Int pk fprep x = do
4735     (src, code) <- getSomeReg x
4736     reg <- getNewRegNat fprep
4737     tmp <- getNewRegNat pk
4738     let
4739         code__2 dst = ASSERT(fprep == F64 || fprep == F32)
4740             code `appOL` toOL [
4741             FxTOy fprep pk src tmp,
4742             ST pk tmp (spRel (-2)),
4743             LD pk (spRel (-2)) dst]
4744     return (Any pk code__2)
4745
4746 ------------
4747 coerceDbl2Flt x = do
4748     (src, code) <- getSomeReg x
4749     return (Any F32 (\dst -> code `snocOL` FxTOy F64 F32 src dst))
4750
4751 ------------
4752 coerceFlt2Dbl x = do
4753     (src, code) <- getSomeReg x
4754     return (Any F64 (\dst -> code `snocOL` FxTOy F32 F64 src dst))
4755
4756 #endif /* sparc_TARGET_ARCH */
4757
4758 #if powerpc_TARGET_ARCH
4759 coerceInt2FP fromRep toRep x = do
4760     (src, code) <- getSomeReg x
4761     lbl <- getNewLabelNat
4762     itmp <- getNewRegNat I32
4763     ftmp <- getNewRegNat F64
4764     dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
4765     Amode addr addr_code <- getAmode dynRef
4766     let
4767         code' dst = code `appOL` maybe_exts `appOL` toOL [
4768                 LDATA ReadOnlyData
4769                                 [CmmDataLabel lbl,
4770                                  CmmStaticLit (CmmInt 0x43300000 I32),
4771                                  CmmStaticLit (CmmInt 0x80000000 I32)],
4772                 XORIS itmp src (ImmInt 0x8000),
4773                 ST I32 itmp (spRel 3),
4774                 LIS itmp (ImmInt 0x4330),
4775                 ST I32 itmp (spRel 2),
4776                 LD F64 ftmp (spRel 2)
4777             ] `appOL` addr_code `appOL` toOL [
4778                 LD F64 dst addr,
4779                 FSUB F64 dst ftmp dst
4780             ] `appOL` maybe_frsp dst
4781
4782         maybe_exts = case fromRep of
4783                         I8 ->  unitOL $ EXTS I8 src src
4784                         I16 -> unitOL $ EXTS I16 src src
4785                         I32 -> nilOL
4786         maybe_frsp dst = case toRep of
4787                         F32 -> unitOL $ FRSP dst dst
4788                         F64 -> nilOL
4789     return (Any toRep code')
4790
4791 coerceFP2Int fromRep toRep x = do
4792     -- the reps don't really matter: F*->F64 and I32->I* are no-ops
4793     (src, code) <- getSomeReg x
4794     tmp <- getNewRegNat F64
4795     let
4796         code' dst = code `appOL` toOL [
4797                 -- convert to int in FP reg
4798             FCTIWZ tmp src,
4799                 -- store value (64bit) from FP to stack
4800             ST F64 tmp (spRel 2),
4801                 -- read low word of value (high word is undefined)
4802             LD I32 dst (spRel 3)]
4803     return (Any toRep code')
4804 #endif /* powerpc_TARGET_ARCH */
4805
4806
4807 -- -----------------------------------------------------------------------------
4808 -- eXTRA_STK_ARGS_HERE
4809
4810 -- We (allegedly) put the first six C-call arguments in registers;
4811 -- where do we start putting the rest of them?
4812
4813 -- Moved from MachInstrs (SDM):
4814
4815 #if alpha_TARGET_ARCH || sparc_TARGET_ARCH
4816 eXTRA_STK_ARGS_HERE :: Int
4817 eXTRA_STK_ARGS_HERE
4818   = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???))
4819 #endif
4820