compiler/nativeGen/MachCodeGen.hs

   1 -----------------------------------------------------------------------------
   2 --
   3 -- Generating machine code (instruction selection)
   4 --
   5 -- (c) The University of Glasgow 1996-2004
   6 --
   7 -----------------------------------------------------------------------------
   8
   9 -- This is a big module, but, if you pay attention to
  10 -- (a) the sectioning, (b) the type signatures, and
  11 -- (c) the #if blah_TARGET_ARCH} things, the
  12 -- structure should not be too overwhelming.
  13
  14 module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where
  15
  16 #include "HsVersions.h"
  17 #include "nativeGen/NCG.h"
  18 #include "MachDeps.h"
  19
  20 -- NCG stuff:
  21 import MachInstrs
  22 import MachRegs
  23 import NCGMonad
  24 import PositionIndependentCode
  25 import RegAllocInfo ( mkBranchInstr )
  26
  27 -- Our intermediate code:
  28 import PprCmm           ( pprExpr )
  29 import Cmm
  30 import MachOp
  31 import CLabel
  32 import ClosureInfo      ( C_SRT(..) )
  33
  34 -- The rest:
  35 import StaticFlags      ( opt_PIC )
  36 import ForeignCall      ( CCallConv(..) )
  37 import OrdList
  38 import Pretty
  39 import Outputable
  40 import FastString
  41 import FastTypes        ( isFastTrue )
  42 import Constants        ( wORD_SIZE )
  43
  44 #ifdef DEBUG
  45 import Outputable       ( assertPanic )
  46 import Debug.Trace      ( trace )
  47 #endif
  48 import Debug.Trace      ( trace )
  49
  50 import Control.Monad    ( mapAndUnzipM )
  51 import Data.Maybe       ( fromJust )
  52 import Data.Bits
  53 import Data.Word
  54 import Data.Int
  55
  56 -- -----------------------------------------------------------------------------
  57 -- Top-level of the instruction selector
  58
  59 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
  60 -- They are really trees of insns to facilitate fast appending, where a
  61 -- left-to-right traversal (pre-order?) yields the insns in the correct
  62 -- order.
  63
  64 type InstrBlock = OrdList Instr
  65
  66 cmmTopCodeGen :: RawCmmTop -> NatM [NatCmmTop]
  67 cmmTopCodeGen (CmmProc info lab params blocks) = do
  68   (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
  69   picBaseMb <- getPicBaseMaybeNat
  70   let proc = CmmProc info lab params (concat nat_blocks)
  71       tops = proc : concat statics
  72   case picBaseMb of
  73       Just picBase -> initializePicBase picBase tops
  74       Nothing -> return tops
  75
  76 cmmTopCodeGen (CmmData sec dat) = do
  77   return [CmmData sec dat]  -- no translation, we just use CmmStatic
  78
  79 basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop])
  80 basicBlockCodeGen (BasicBlock id stmts) = do
  81   instrs <- stmtsToInstrs stmts
  82   -- code generation may introduce new basic block boundaries, which
  83   -- are indicated by the NEWBLOCK instruction.  We must split up the
  84   -- instruction stream into basic blocks again.  Also, we extract
  85   -- LDATAs here too.
  86   let
  87         (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
  88
  89         mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
  90           = ([], BasicBlock id instrs : blocks, statics)
  91         mkBlocks (LDATA sec dat) (instrs,blocks,statics)
  92           = (instrs, blocks, CmmData sec dat:statics)
  93         mkBlocks instr (instrs,blocks,statics)
  94           = (instr:instrs, blocks, statics)
  95   -- in
  96   return (BasicBlock id top : other_blocks, statics)
  97
  98 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
  99 stmtsToInstrs stmts
 100    = do instrss <- mapM stmtToInstrs stmts
 101         return (concatOL instrss)
 102
 103 stmtToInstrs :: CmmStmt -> NatM InstrBlock
 104 stmtToInstrs stmt = case stmt of
 105     CmmNop         -> return nilOL
 106     CmmComment s   -> return (unitOL (COMMENT s))
 107
 108     CmmAssign reg src
 109       | isFloatingRep kind -> assignReg_FltCode kind reg src
 110 #if WORD_SIZE_IN_BITS==32
 111       | kind == I64        -> assignReg_I64Code      reg src
 112 #endif
 113       | otherwise          -> assignReg_IntCode kind reg src
 114         where kind = cmmRegRep reg
 115
 116     CmmStore addr src
 117       | isFloatingRep kind -> assignMem_FltCode kind addr src
 118 #if WORD_SIZE_IN_BITS==32
 119       | kind == I64      -> assignMem_I64Code      addr src
 120 #endif
 121       | otherwise        -> assignMem_IntCode kind addr src
 122         where kind = cmmExprRep src
 123
 124     CmmCall target result_regs args _ _
 125        -> genCCall target result_regs args
 126
 127     CmmBranch id          -> genBranch id
 128     CmmCondBranch arg id  -> genCondJump id arg
 129     CmmSwitch arg ids     -> genSwitch arg ids
 130     CmmJump arg params    -> genJump arg
 131
 132 -- -----------------------------------------------------------------------------
 133 -- General things for putting together code sequences
 134
 135 -- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
 136 -- CmmExprs into CmmRegOff?
 137 mangleIndexTree :: CmmExpr -> CmmExpr
 138 mangleIndexTree (CmmRegOff reg off)
 139   = CmmMachOp (MO_Add rep) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) rep)]
 140   where rep = cmmRegRep reg
 141
 142 -- -----------------------------------------------------------------------------
 143 --  Code gen for 64-bit arithmetic on 32-bit platforms
 144
 145 {-
 146 Simple support for generating 64-bit code (ie, 64 bit values and 64
 147 bit assignments) on 32-bit platforms.  Unlike the main code generator
 148 we merely shoot for generating working code as simply as possible, and
 149 pay little attention to code quality.  Specifically, there is no
 150 attempt to deal cleverly with the fixed-vs-floating register
 151 distinction; all values are generated into (pairs of) floating
 152 registers, even if this would mean some redundant reg-reg moves as a
 153 result.  Only one of the VRegUniques is returned, since it will be
 154 of the VRegUniqueLo form, and the upper-half VReg can be determined
 155 by applying getHiVRegFromLo to it.
 156 -}
 157
 158 data ChildCode64        -- a.k.a "Register64"
 159    = ChildCode64
 160         InstrBlock      -- code
 161         Reg             -- the lower 32-bit temporary which contains the
 162                         -- result; use getHiVRegFromLo to find the other
 163                         -- VRegUnique.  Rules of this simplified insn
 164                         -- selection game are therefore that the returned
 165                         -- Reg may be modified
 166
 167 #if WORD_SIZE_IN_BITS==32
 168 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
 169 assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
 170 #endif
 171
 172 #ifndef x86_64_TARGET_ARCH
 173 iselExpr64        :: CmmExpr -> NatM ChildCode64
 174 #endif
 175
 176 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 177
 178 #if i386_TARGET_ARCH
 179
 180 assignMem_I64Code addrTree valueTree = do
 181   Amode addr addr_code <- getAmode addrTree
 182   ChildCode64 vcode rlo <- iselExpr64 valueTree
 183   let
 184         rhi = getHiVRegFromLo rlo
 185
 186         -- Little-endian store
 187         mov_lo = MOV I32 (OpReg rlo) (OpAddr addr)
 188         mov_hi = MOV I32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
 189   -- in
 190   return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 191
 192
 193 assignReg_I64Code (CmmLocal (LocalReg u_dst pk _)) valueTree = do
 194    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 195    let
 196          r_dst_lo = mkVReg u_dst I32
 197          r_dst_hi = getHiVRegFromLo r_dst_lo
 198          r_src_hi = getHiVRegFromLo r_src_lo
 199          mov_lo = MOV I32 (OpReg r_src_lo) (OpReg r_dst_lo)
 200          mov_hi = MOV I32 (OpReg r_src_hi) (OpReg r_dst_hi)
 201    -- in
 202    return (
 203         vcode `snocOL` mov_lo `snocOL` mov_hi
 204      )
 205
 206 assignReg_I64Code lvalue valueTree
 207    = panic "assignReg_I64Code(i386): invalid lvalue"
 208
 209 ------------
 210
 211 iselExpr64 (CmmLit (CmmInt i _)) = do
 212   (rlo,rhi) <- getNewRegPairNat I32
 213   let
 214         r = fromIntegral (fromIntegral i :: Word32)
 215         q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
 216         code = toOL [
 217                 MOV I32 (OpImm (ImmInteger r)) (OpReg rlo),
 218                 MOV I32 (OpImm (ImmInteger q)) (OpReg rhi)
 219                 ]
 220   -- in
 221   return (ChildCode64 code rlo)
 222
 223 iselExpr64 (CmmLoad addrTree I64) = do
 224    Amode addr addr_code <- getAmode addrTree
 225    (rlo,rhi) <- getNewRegPairNat I32
 226    let
 227         mov_lo = MOV I32 (OpAddr addr) (OpReg rlo)
 228         mov_hi = MOV I32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
 229    -- in
 230    return (
 231             ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
 232                         rlo
 233      )
 234
 235 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64 _)))
 236    = return (ChildCode64 nilOL (mkVReg vu I32))
 237
 238 -- we handle addition, but rather badly
 239 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
 240    ChildCode64 code1 r1lo <- iselExpr64 e1
 241    (rlo,rhi) <- getNewRegPairNat I32
 242    let
 243         r = fromIntegral (fromIntegral i :: Word32)
 244         q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
 245         r1hi = getHiVRegFromLo r1lo
 246         code =  code1 `appOL`
 247                 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
 248                        ADD I32 (OpImm (ImmInteger r)) (OpReg rlo),
 249                        MOV I32 (OpReg r1hi) (OpReg rhi),
 250                        ADC I32 (OpImm (ImmInteger q)) (OpReg rhi) ]
 251    -- in
 252    return (ChildCode64 code rlo)
 253
 254 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
 255    ChildCode64 code1 r1lo <- iselExpr64 e1
 256    ChildCode64 code2 r2lo <- iselExpr64 e2
 257    (rlo,rhi) <- getNewRegPairNat I32
 258    let
 259         r1hi = getHiVRegFromLo r1lo
 260         r2hi = getHiVRegFromLo r2lo
 261         code =  code1 `appOL`
 262                 code2 `appOL`
 263                 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
 264                        ADD I32 (OpReg r2lo) (OpReg rlo),
 265                        MOV I32 (OpReg r1hi) (OpReg rhi),
 266                        ADC I32 (OpReg r2hi) (OpReg rhi) ]
 267    -- in
 268    return (ChildCode64 code rlo)
 269
 270 iselExpr64 (CmmMachOp (MO_U_Conv _ I64) [expr]) = do
 271      fn <- getAnyReg expr
 272      r_dst_lo <-  getNewRegNat I32
 273      let r_dst_hi = getHiVRegFromLo r_dst_lo
 274          code = fn r_dst_lo
 275      return (
 276              ChildCode64 (code `snocOL`
 277                           MOV I32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
 278                           r_dst_lo
 279             )
 280
 281 iselExpr64 expr
 282    = pprPanic "iselExpr64(i386)" (ppr expr)
 283
 284 #endif /* i386_TARGET_ARCH */
 285
 286 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 287
 288 #if sparc_TARGET_ARCH
 289
 290 assignMem_I64Code addrTree valueTree = do
 291      Amode addr addr_code <- getAmode addrTree
 292      ChildCode64 vcode rlo <- iselExpr64 valueTree
 293      (src, code) <- getSomeReg addrTree
 294      let
 295          rhi = getHiVRegFromLo rlo
 296          -- Big-endian store
 297          mov_hi = ST I32 rhi (AddrRegImm src (ImmInt 0))
 298          mov_lo = ST I32 rlo (AddrRegImm src (ImmInt 4))
 299      return (vcode `appOL` code `snocOL` mov_hi `snocOL` mov_lo)
 300
 301 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
 302      ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 303      let
 304          r_dst_lo = mkVReg u_dst pk
 305          r_dst_hi = getHiVRegFromLo r_dst_lo
 306          r_src_hi = getHiVRegFromLo r_src_lo
 307          mov_lo = mkMOV r_src_lo r_dst_lo
 308          mov_hi = mkMOV r_src_hi r_dst_hi
 309          mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
 310      return (vcode `snocOL` mov_hi `snocOL` mov_lo)
 311 assignReg_I64Code lvalue valueTree
 312    = panic "assignReg_I64Code(sparc): invalid lvalue"
 313
 314
 315 -- Don't delete this -- it's very handy for debugging.
 316 --iselExpr64 expr
 317 --   | trace ("iselExpr64: " ++ showSDoc (ppr expr)) False
 318 --   = panic "iselExpr64(???)"
 319
 320 iselExpr64 (CmmLoad addrTree I64) = do
 321      Amode (AddrRegReg r1 r2) addr_code <- getAmode addrTree
 322      rlo <- getNewRegNat I32
 323      let rhi = getHiVRegFromLo rlo
 324          mov_hi = LD I32 (AddrRegImm r1 (ImmInt 0)) rhi
 325          mov_lo = LD I32 (AddrRegImm r1 (ImmInt 4)) rlo
 326      return (
 327             ChildCode64 (addr_code `snocOL` mov_hi `snocOL` mov_lo)
 328                          rlo
 329           )
 330
 331 iselExpr64 (CmmReg (CmmLocal (LocalReg uq I64))) = do
 332      r_dst_lo <-  getNewRegNat I32
 333      let r_dst_hi = getHiVRegFromLo r_dst_lo
 334          r_src_lo = mkVReg uq I32
 335          r_src_hi = getHiVRegFromLo r_src_lo
 336          mov_lo = mkMOV r_src_lo r_dst_lo
 337          mov_hi = mkMOV r_src_hi r_dst_hi
 338          mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
 339      return (
 340             ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo
 341          )
 342
 343 iselExpr64 expr
 344    = pprPanic "iselExpr64(sparc)" (ppr expr)
 345
 346 #endif /* sparc_TARGET_ARCH */
 347
 348 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 349
 350 #if powerpc_TARGET_ARCH
 351
 352 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
 353 getI64Amodes addrTree = do
 354     Amode hi_addr addr_code <- getAmode addrTree
 355     case addrOffset hi_addr 4 of
 356         Just lo_addr -> return (hi_addr, lo_addr, addr_code)
 357         Nothing      -> do (hi_ptr, code) <- getSomeReg addrTree
 358                            return (AddrRegImm hi_ptr (ImmInt 0),
 359                                    AddrRegImm hi_ptr (ImmInt 4),
 360                                    code)
 361
 362 assignMem_I64Code addrTree valueTree = do
 363         (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
 364         ChildCode64 vcode rlo <- iselExpr64 valueTree
 365         let
 366                 rhi = getHiVRegFromLo rlo
 367
 368                 -- Big-endian store
 369                 mov_hi = ST I32 rhi hi_addr
 370                 mov_lo = ST I32 rlo lo_addr
 371         -- in
 372         return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 373
 374 assignReg_I64Code (CmmLocal (LocalReg u_dst pk _)) valueTree = do
 375    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 376    let
 377          r_dst_lo = mkVReg u_dst I32
 378          r_dst_hi = getHiVRegFromLo r_dst_lo
 379          r_src_hi = getHiVRegFromLo r_src_lo
 380          mov_lo = MR r_dst_lo r_src_lo
 381          mov_hi = MR r_dst_hi r_src_hi
 382    -- in
 383    return (
 384         vcode `snocOL` mov_lo `snocOL` mov_hi
 385      )
 386
 387 assignReg_I64Code lvalue valueTree
 388    = panic "assignReg_I64Code(powerpc): invalid lvalue"
 389
 390
 391 -- Don't delete this -- it's very handy for debugging.
 392 --iselExpr64 expr
 393 --   | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
 394 --   = panic "iselExpr64(???)"
 395
 396 iselExpr64 (CmmLoad addrTree I64) = do
 397     (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
 398     (rlo, rhi) <- getNewRegPairNat I32
 399     let mov_hi = LD I32 rhi hi_addr
 400         mov_lo = LD I32 rlo lo_addr
 401     return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
 402                          rlo
 403
 404 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64 _)))
 405    = return (ChildCode64 nilOL (mkVReg vu I32))
 406
 407 iselExpr64 (CmmLit (CmmInt i _)) = do
 408   (rlo,rhi) <- getNewRegPairNat I32
 409   let
 410         half0 = fromIntegral (fromIntegral i :: Word16)
 411         half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
 412         half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
 413         half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
 414
 415         code = toOL [
 416                 LIS rlo (ImmInt half1),
 417                 OR rlo rlo (RIImm $ ImmInt half0),
 418                 LIS rhi (ImmInt half3),
 419                 OR rlo rlo (RIImm $ ImmInt half2)
 420                 ]
 421   -- in
 422   return (ChildCode64 code rlo)
 423
 424 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
 425    ChildCode64 code1 r1lo <- iselExpr64 e1
 426    ChildCode64 code2 r2lo <- iselExpr64 e2
 427    (rlo,rhi) <- getNewRegPairNat I32
 428    let
 429         r1hi = getHiVRegFromLo r1lo
 430         r2hi = getHiVRegFromLo r2lo
 431         code =  code1 `appOL`
 432                 code2 `appOL`
 433                 toOL [ ADDC rlo r1lo r2lo,
 434                        ADDE rhi r1hi r2hi ]
 435    -- in
 436    return (ChildCode64 code rlo)
 437
 438 iselExpr64 (CmmMachOp (MO_U_Conv I32 I64) [expr]) = do
 439     (expr_reg,expr_code) <- getSomeReg expr
 440     (rlo, rhi) <- getNewRegPairNat I32
 441     let mov_hi = LI rhi (ImmInt 0)
 442         mov_lo = MR rlo expr_reg
 443     return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
 444                          rlo
 445 iselExpr64 expr
 446    = pprPanic "iselExpr64(powerpc)" (ppr expr)
 447
 448 #endif /* powerpc_TARGET_ARCH */
 449
 450
 451 -- -----------------------------------------------------------------------------
 452 -- The 'Register' type
 453
 454 -- 'Register's passed up the tree.  If the stix code forces the register
 455 -- to live in a pre-decided machine register, it comes out as @Fixed@;
 456 -- otherwise, it comes out as @Any@, and the parent can decide which
 457 -- register to put it in.
 458
 459 data Register
 460   = Fixed   MachRep Reg InstrBlock
 461   | Any     MachRep (Reg -> InstrBlock)
 462
 463 swizzleRegisterRep :: Register -> MachRep -> Register
 464 swizzleRegisterRep (Fixed _ reg code) rep = Fixed rep reg code
 465 swizzleRegisterRep (Any _ codefn)     rep = Any rep codefn
 466
 467
 468 -- -----------------------------------------------------------------------------
 469 -- Utils based on getRegister, below
 470
 471 -- The dual to getAnyReg: compute an expression into a register, but
 472 -- we don't mind which one it is.
 473 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
 474 getSomeReg expr = do
 475   r <- getRegister expr
 476   case r of
 477     Any rep code -> do
 478         tmp <- getNewRegNat rep
 479         return (tmp, code tmp)
 480     Fixed _ reg code ->
 481         return (reg, code)
 482
 483 -- -----------------------------------------------------------------------------
 484 -- Grab the Reg for a CmmReg
 485
 486 getRegisterReg :: CmmReg -> Reg
 487
 488 getRegisterReg (CmmLocal (LocalReg u pk _))
 489   = mkVReg u pk
 490
 491 getRegisterReg (CmmGlobal mid)
 492   = case get_GlobalReg_reg_or_addr mid of
 493        Left (RealReg rrno) -> RealReg rrno
 494        _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
 495           -- By this stage, the only MagicIds remaining should be the
 496           -- ones which map to a real machine register on this
 497           -- platform.  Hence ...
 498
 499
 500 -- -----------------------------------------------------------------------------
 501 -- Generate code to get a subtree into a Register
 502
 503 -- Don't delete this -- it's very handy for debugging.
 504 --getRegister expr
 505 --   | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False
 506 --   = panic "getRegister(???)"
 507
 508 getRegister :: CmmExpr -> NatM Register
 509
 510 #if !x86_64_TARGET_ARCH
 511     -- on x86_64, we have %rip for PicBaseReg, but it's not a full-featured
 512     -- register, it can only be used for rip-relative addressing.
 513 getRegister (CmmReg (CmmGlobal PicBaseReg))
 514   = do
 515       reg <- getPicBaseNat wordRep
 516       return (Fixed wordRep reg nilOL)
 517 #endif
 518
 519 getRegister (CmmReg reg)
 520   = return (Fixed (cmmRegRep reg) (getRegisterReg reg) nilOL)
 521
 522 getRegister tree@(CmmRegOff _ _)
 523   = getRegister (mangleIndexTree tree)
 524
 525
 526 #if WORD_SIZE_IN_BITS==32
 527     -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
 528     -- TO_W_(x), TO_W_(x >> 32)
 529
 530 getRegister (CmmMachOp (MO_U_Conv I64 I32)
 531              [CmmMachOp (MO_U_Shr I64) [x,CmmLit (CmmInt 32 _)]]) = do
 532   ChildCode64 code rlo <- iselExpr64 x
 533   return $ Fixed I32 (getHiVRegFromLo rlo) code
 534
 535 getRegister (CmmMachOp (MO_S_Conv I64 I32)
 536              [CmmMachOp (MO_U_Shr I64) [x,CmmLit (CmmInt 32 _)]]) = do
 537   ChildCode64 code rlo <- iselExpr64 x
 538   return $ Fixed I32 (getHiVRegFromLo rlo) code
 539
 540 getRegister (CmmMachOp (MO_U_Conv I64 I32) [x]) = do
 541   ChildCode64 code rlo <- iselExpr64 x
 542   return $ Fixed I32 rlo code
 543
 544 getRegister (CmmMachOp (MO_S_Conv I64 I32) [x]) = do
 545   ChildCode64 code rlo <- iselExpr64 x
 546   return $ Fixed I32 rlo code
 547
 548 #endif
 549
 550 -- end of machine-"independent" bit; here we go on the rest...
 551
 552 #if alpha_TARGET_ARCH
 553
 554 getRegister (StDouble d)
 555   = getBlockIdNat                   `thenNat` \ lbl ->
 556     getNewRegNat PtrRep             `thenNat` \ tmp ->
 557     let code dst = mkSeqInstrs [
 558             LDATA RoDataSegment lbl [
 559                     DATA TF [ImmLab (rational d)]
 560                 ],
 561             LDA tmp (AddrImm (ImmCLbl lbl)),
 562             LD TF dst (AddrReg tmp)]
 563     in
 564         return (Any F64 code)
 565
 566 getRegister (StPrim primop [x]) -- unary PrimOps
 567   = case primop of
 568       IntNegOp -> trivialUCode (NEG Q False) x
 569
 570       NotOp    -> trivialUCode NOT x
 571
 572       FloatNegOp  -> trivialUFCode FloatRep  (FNEG TF) x
 573       DoubleNegOp -> trivialUFCode F64 (FNEG TF) x
 574
 575       OrdOp -> coerceIntCode IntRep x
 576       ChrOp -> chrCode x
 577
 578       Float2IntOp  -> coerceFP2Int    x
 579       Int2FloatOp  -> coerceInt2FP pr x
 580       Double2IntOp -> coerceFP2Int    x
 581       Int2DoubleOp -> coerceInt2FP pr x
 582
 583       Double2FloatOp -> coerceFltCode x
 584       Float2DoubleOp -> coerceFltCode x
 585
 586       other_op -> getRegister (StCall fn CCallConv F64 [x])
 587         where
 588           fn = case other_op of
 589                  FloatExpOp    -> FSLIT("exp")
 590                  FloatLogOp    -> FSLIT("log")
 591                  FloatSqrtOp   -> FSLIT("sqrt")
 592                  FloatSinOp    -> FSLIT("sin")
 593                  FloatCosOp    -> FSLIT("cos")
 594                  FloatTanOp    -> FSLIT("tan")
 595                  FloatAsinOp   -> FSLIT("asin")
 596                  FloatAcosOp   -> FSLIT("acos")
 597                  FloatAtanOp   -> FSLIT("atan")
 598                  FloatSinhOp   -> FSLIT("sinh")
 599                  FloatCoshOp   -> FSLIT("cosh")
 600                  FloatTanhOp   -> FSLIT("tanh")
 601                  DoubleExpOp   -> FSLIT("exp")
 602                  DoubleLogOp   -> FSLIT("log")
 603                  DoubleSqrtOp  -> FSLIT("sqrt")
 604                  DoubleSinOp   -> FSLIT("sin")
 605                  DoubleCosOp   -> FSLIT("cos")
 606                  DoubleTanOp   -> FSLIT("tan")
 607                  DoubleAsinOp  -> FSLIT("asin")
 608                  DoubleAcosOp  -> FSLIT("acos")
 609                  DoubleAtanOp  -> FSLIT("atan")
 610                  DoubleSinhOp  -> FSLIT("sinh")
 611                  DoubleCoshOp  -> FSLIT("cosh")
 612                  DoubleTanhOp  -> FSLIT("tanh")
 613   where
 614     pr = panic "MachCode.getRegister: no primrep needed for Alpha"
 615
 616 getRegister (StPrim primop [x, y]) -- dyadic PrimOps
 617   = case primop of
 618       CharGtOp -> trivialCode (CMP LTT) y x
 619       CharGeOp -> trivialCode (CMP LE) y x
 620       CharEqOp -> trivialCode (CMP EQQ) x y
 621       CharNeOp -> int_NE_code x y
 622       CharLtOp -> trivialCode (CMP LTT) x y
 623       CharLeOp -> trivialCode (CMP LE) x y
 624
 625       IntGtOp  -> trivialCode (CMP LTT) y x
 626       IntGeOp  -> trivialCode (CMP LE) y x
 627       IntEqOp  -> trivialCode (CMP EQQ) x y
 628       IntNeOp  -> int_NE_code x y
 629       IntLtOp  -> trivialCode (CMP LTT) x y
 630       IntLeOp  -> trivialCode (CMP LE) x y
 631
 632       WordGtOp -> trivialCode (CMP ULT) y x
 633       WordGeOp -> trivialCode (CMP ULE) x y
 634       WordEqOp -> trivialCode (CMP EQQ)  x y
 635       WordNeOp -> int_NE_code x y
 636       WordLtOp -> trivialCode (CMP ULT) x y
 637       WordLeOp -> trivialCode (CMP ULE) x y
 638
 639       AddrGtOp -> trivialCode (CMP ULT) y x
 640       AddrGeOp -> trivialCode (CMP ULE) y x
 641       AddrEqOp -> trivialCode (CMP EQQ)  x y
 642       AddrNeOp -> int_NE_code x y
 643       AddrLtOp -> trivialCode (CMP ULT) x y
 644       AddrLeOp -> trivialCode (CMP ULE) x y
 645
 646       FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
 647       FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
 648       FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
 649       FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
 650       FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
 651       FloatLeOp -> cmpF_code (FCMP TF LE) NE x y
 652
 653       DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
 654       DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
 655       DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
 656       DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
 657       DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
 658       DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y
 659
 660       IntAddOp  -> trivialCode (ADD Q False) x y
 661       IntSubOp  -> trivialCode (SUB Q False) x y
 662       IntMulOp  -> trivialCode (MUL Q False) x y
 663       IntQuotOp -> trivialCode (DIV Q False) x y
 664       IntRemOp  -> trivialCode (REM Q False) x y
 665
 666       WordAddOp  -> trivialCode (ADD Q False) x y
 667       WordSubOp  -> trivialCode (SUB Q False) x y
 668       WordMulOp  -> trivialCode (MUL Q False) x y
 669       WordQuotOp -> trivialCode (DIV Q True) x y
 670       WordRemOp  -> trivialCode (REM Q True) x y
 671
 672       FloatAddOp -> trivialFCode  FloatRep (FADD TF) x y
 673       FloatSubOp -> trivialFCode  FloatRep (FSUB TF) x y
 674       FloatMulOp -> trivialFCode  FloatRep (FMUL TF) x y
 675       FloatDivOp -> trivialFCode  FloatRep (FDIV TF) x y
 676
 677       DoubleAddOp -> trivialFCode  F64 (FADD TF) x y
 678       DoubleSubOp -> trivialFCode  F64 (FSUB TF) x y
 679       DoubleMulOp -> trivialFCode  F64 (FMUL TF) x y
 680       DoubleDivOp -> trivialFCode  F64 (FDIV TF) x y
 681
 682       AddrAddOp  -> trivialCode (ADD Q False) x y
 683       AddrSubOp  -> trivialCode (SUB Q False) x y
 684       AddrRemOp  -> trivialCode (REM Q True) x y
 685
 686       AndOp  -> trivialCode AND x y
 687       OrOp   -> trivialCode OR  x y
 688       XorOp  -> trivialCode XOR x y
 689       SllOp  -> trivialCode SLL x y
 690       SrlOp  -> trivialCode SRL x y
 691
 692       ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
 693       ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
 694       ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"
 695
 696       FloatPowerOp  -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
 697       DoublePowerOp -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
 698   where
 699     {- ------------------------------------------------------------
 700         Some bizarre special code for getting condition codes into
 701         registers.  Integer non-equality is a test for equality
 702         followed by an XOR with 1.  (Integer comparisons always set
 703         the result register to 0 or 1.)  Floating point comparisons of
 704         any kind leave the result in a floating point register, so we
 705         need to wrangle an integer register out of things.
 706     -}
 707     int_NE_code :: StixTree -> StixTree -> NatM Register
 708
 709     int_NE_code x y
 710       = trivialCode (CMP EQQ) x y       `thenNat` \ register ->
 711         getNewRegNat IntRep             `thenNat` \ tmp ->
 712         let
 713             code = registerCode register tmp
 714             src  = registerName register tmp
 715             code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
 716         in
 717         return (Any IntRep code__2)
 718
 719     {- ------------------------------------------------------------
 720         Comments for int_NE_code also apply to cmpF_code
 721     -}
 722     cmpF_code
 723         :: (Reg -> Reg -> Reg -> Instr)
 724         -> Cond
 725         -> StixTree -> StixTree
 726         -> NatM Register
 727
 728     cmpF_code instr cond x y
 729       = trivialFCode pr instr x y       `thenNat` \ register ->
 730         getNewRegNat F64                `thenNat` \ tmp ->
 731         getBlockIdNat                   `thenNat` \ lbl ->
 732         let
 733             code = registerCode register tmp
 734             result  = registerName register tmp
 735
 736             code__2 dst = code . mkSeqInstrs [
 737                 OR zeroh (RIImm (ImmInt 1)) dst,
 738                 BF cond  result (ImmCLbl lbl),
 739                 OR zeroh (RIReg zeroh) dst,
 740                 NEWBLOCK lbl]
 741         in
 742         return (Any IntRep code__2)
 743       where
 744         pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
 745       ------------------------------------------------------------
 746
 747 getRegister (CmmLoad pk mem)
 748   = getAmode mem                    `thenNat` \ amode ->
 749     let
 750         code = amodeCode amode
 751         src   = amodeAddr amode
 752         size = primRepToSize pk
 753         code__2 dst = code . mkSeqInstr (LD size dst src)
 754     in
 755     return (Any pk code__2)
 756
 757 getRegister (StInt i)
 758   | fits8Bits i
 759   = let
 760         code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
 761     in
 762     return (Any IntRep code)
 763   | otherwise
 764   = let
 765         code dst = mkSeqInstr (LDI Q dst src)
 766     in
 767     return (Any IntRep code)
 768   where
 769     src = ImmInt (fromInteger i)
 770
 771 getRegister leaf
 772   | isJust imm
 773   = let
 774         code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
 775     in
 776     return (Any PtrRep code)
 777   where
 778     imm = maybeImm leaf
 779     imm__2 = case imm of Just x -> x
 780
 781 #endif /* alpha_TARGET_ARCH */
 782
 783 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 784
 785 #if i386_TARGET_ARCH
 786
 787 getRegister (CmmLit (CmmFloat f F32)) = do
 788     lbl <- getNewLabelNat
 789     dflags <- getDynFlagsNat
 790     dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
 791     Amode addr addr_code <- getAmode dynRef
 792     let code dst =
 793             LDATA ReadOnlyData
 794                         [CmmDataLabel lbl,
 795                          CmmStaticLit (CmmFloat f F32)]
 796             `consOL` (addr_code `snocOL`
 797             GLD F32 addr dst)
 798     -- in
 799     return (Any F32 code)
 800
 801
 802 getRegister (CmmLit (CmmFloat d F64))
 803   | d == 0.0
 804   = let code dst = unitOL (GLDZ dst)
 805     in  return (Any F64 code)
 806
 807   | d == 1.0
 808   = let code dst = unitOL (GLD1 dst)
 809     in  return (Any F64 code)
 810
 811   | otherwise = do
 812     lbl <- getNewLabelNat
 813     dflags <- getDynFlagsNat
 814     dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
 815     Amode addr addr_code <- getAmode dynRef
 816     let code dst =
 817             LDATA ReadOnlyData
 818                         [CmmDataLabel lbl,
 819                          CmmStaticLit (CmmFloat d F64)]
 820             `consOL` (addr_code `snocOL`
 821             GLD F64 addr dst)
 822     -- in
 823     return (Any F64 code)
 824
 825 #endif /* i386_TARGET_ARCH */
 826
 827 #if x86_64_TARGET_ARCH
 828
 829 getRegister (CmmLit (CmmFloat 0.0 rep)) = do
 830    let code dst = unitOL  (XOR rep (OpReg dst) (OpReg dst))
 831         -- I don't know why there are xorpd, xorps, and pxor instructions.
 832         -- They all appear to do the same thing --SDM
 833    return (Any rep code)
 834
 835 getRegister (CmmLit (CmmFloat f rep)) = do
 836     lbl <- getNewLabelNat
 837     let code dst = toOL [
 838             LDATA ReadOnlyData
 839                         [CmmDataLabel lbl,
 840                          CmmStaticLit (CmmFloat f rep)],
 841             MOV rep (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 842             ]
 843     -- in
 844     return (Any rep code)
 845
 846 #endif /* x86_64_TARGET_ARCH */
 847
 848 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
 849
 850 -- catch simple cases of zero- or sign-extended load
 851 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad addr _]) = do
 852   code <- intLoadCode (MOVZxL I8) addr
 853   return (Any I32 code)
 854
 855 getRegister (CmmMachOp (MO_S_Conv I8 I32) [CmmLoad addr _]) = do
 856   code <- intLoadCode (MOVSxL I8) addr
 857   return (Any I32 code)
 858
 859 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad addr _]) = do
 860   code <- intLoadCode (MOVZxL I16) addr
 861   return (Any I32 code)
 862
 863 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad addr _]) = do
 864   code <- intLoadCode (MOVSxL I16) addr
 865   return (Any I32 code)
 866
 867 #endif
 868
 869 #if x86_64_TARGET_ARCH
 870
 871 -- catch simple cases of zero- or sign-extended load
 872 getRegister (CmmMachOp (MO_U_Conv I8 I64) [CmmLoad addr _]) = do
 873   code <- intLoadCode (MOVZxL I8) addr
 874   return (Any I64 code)
 875
 876 getRegister (CmmMachOp (MO_S_Conv I8 I64) [CmmLoad addr _]) = do
 877   code <- intLoadCode (MOVSxL I8) addr
 878   return (Any I64 code)
 879
 880 getRegister (CmmMachOp (MO_U_Conv I16 I64) [CmmLoad addr _]) = do
 881   code <- intLoadCode (MOVZxL I16) addr
 882   return (Any I64 code)
 883
 884 getRegister (CmmMachOp (MO_S_Conv I16 I64) [CmmLoad addr _]) = do
 885   code <- intLoadCode (MOVSxL I16) addr
 886   return (Any I64 code)
 887
 888 getRegister (CmmMachOp (MO_U_Conv I32 I64) [CmmLoad addr _]) = do
 889   code <- intLoadCode (MOV I32) addr -- 32-bit loads zero-extend
 890   return (Any I64 code)
 891
 892 getRegister (CmmMachOp (MO_S_Conv I32 I64) [CmmLoad addr _]) = do
 893   code <- intLoadCode (MOVSxL I32) addr
 894   return (Any I64 code)
 895
 896 #endif
 897
 898 #if x86_64_TARGET_ARCH
 899 getRegister (CmmMachOp (MO_Add I64) [CmmReg (CmmGlobal PicBaseReg),
 900                                      CmmLit displacement])
 901     = return $ Any I64 (\dst -> unitOL $
 902         LEA I64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
 903 #endif
 904
 905 #if x86_64_TARGET_ARCH
 906 getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do
 907   x_code <- getAnyReg x
 908   lbl <- getNewLabelNat
 909   let
 910     code dst = x_code dst `appOL` toOL [
 911         -- This is how gcc does it, so it can't be that bad:
 912         LDATA ReadOnlyData16 [
 913                 CmmAlign 16,
 914                 CmmDataLabel lbl,
 915                 CmmStaticLit (CmmInt 0x80000000 I32),
 916                 CmmStaticLit (CmmInt 0 I32),
 917                 CmmStaticLit (CmmInt 0 I32),
 918                 CmmStaticLit (CmmInt 0 I32)
 919         ],
 920         XOR F32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 921                 -- xorps, so we need the 128-bit constant
 922                 -- ToDo: rip-relative
 923         ]
 924   --
 925   return (Any F32 code)
 926
 927 getRegister (CmmMachOp (MO_S_Neg F64) [x]) = do
 928   x_code <- getAnyReg x
 929   lbl <- getNewLabelNat
 930   let
 931         -- This is how gcc does it, so it can't be that bad:
 932     code dst = x_code dst `appOL` toOL [
 933         LDATA ReadOnlyData16 [
 934                 CmmAlign 16,
 935                 CmmDataLabel lbl,
 936                 CmmStaticLit (CmmInt 0x8000000000000000 I64),
 937                 CmmStaticLit (CmmInt 0 I64)
 938         ],
 939                 -- gcc puts an unpck here.  Wonder if we need it.
 940         XOR F64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 941                 -- xorpd, so we need the 128-bit constant
 942         ]
 943   --
 944   return (Any F64 code)
 945 #endif
 946
 947 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
 948
 949 getRegister (CmmMachOp mop [x]) -- unary MachOps
 950   = case mop of
 951 #if i386_TARGET_ARCH
 952       MO_S_Neg F32 -> trivialUFCode F32 (GNEG F32) x
 953       MO_S_Neg F64 -> trivialUFCode F64 (GNEG F64) x
 954 #endif
 955
 956       MO_S_Neg rep -> trivialUCode rep (NEGI rep) x
 957       MO_Not rep   -> trivialUCode rep (NOT  rep) x
 958
 959       -- Nop conversions
 960       MO_U_Conv I32 I8  -> toI8Reg  I32 x
 961       MO_S_Conv I32 I8  -> toI8Reg  I32 x
 962       MO_U_Conv I16 I8  -> toI8Reg  I16 x
 963       MO_S_Conv I16 I8  -> toI8Reg  I16 x
 964       MO_U_Conv I32 I16 -> toI16Reg I32 x
 965       MO_S_Conv I32 I16 -> toI16Reg I32 x
 966 #if x86_64_TARGET_ARCH
 967       MO_U_Conv I64 I32 -> conversionNop I64 x
 968       MO_S_Conv I64 I32 -> conversionNop I64 x
 969       MO_U_Conv I64 I16 -> toI16Reg I64 x
 970       MO_S_Conv I64 I16 -> toI16Reg I64 x
 971       MO_U_Conv I64 I8  -> toI8Reg  I64 x
 972       MO_S_Conv I64 I8  -> toI8Reg  I64 x
 973 #endif
 974
 975       MO_U_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
 976       MO_S_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
 977
 978       -- widenings
 979       MO_U_Conv I8  I32 -> integerExtend I8  I32 MOVZxL x
 980       MO_U_Conv I16 I32 -> integerExtend I16 I32 MOVZxL x
 981       MO_U_Conv I8  I16 -> integerExtend I8  I16 MOVZxL x
 982
 983       MO_S_Conv I8  I32 -> integerExtend I8  I32 MOVSxL x
 984       MO_S_Conv I16 I32 -> integerExtend I16 I32 MOVSxL x
 985       MO_S_Conv I8  I16 -> integerExtend I8  I16 MOVSxL x
 986
 987 #if x86_64_TARGET_ARCH
 988       MO_U_Conv I8  I64 -> integerExtend I8  I64 MOVZxL x
 989       MO_U_Conv I16 I64 -> integerExtend I16 I64 MOVZxL x
 990       MO_U_Conv I32 I64 -> integerExtend I32 I64 MOVZxL x
 991       MO_S_Conv I8  I64 -> integerExtend I8  I64 MOVSxL x
 992       MO_S_Conv I16 I64 -> integerExtend I16 I64 MOVSxL x
 993       MO_S_Conv I32 I64 -> integerExtend I32 I64 MOVSxL x
 994         -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
 995         -- However, we don't want the register allocator to throw it
 996         -- away as an unnecessary reg-to-reg move, so we keep it in
 997         -- the form of a movzl and print it as a movl later.
 998 #endif
 999
1000 #if i386_TARGET_ARCH
1001       MO_S_Conv F32 F64 -> conversionNop F64 x
1002       MO_S_Conv F64 F32 -> conversionNop F32 x
1003 #else
1004       MO_S_Conv F32 F64 -> coerceFP2FP F64 x
1005       MO_S_Conv F64 F32 -> coerceFP2FP F32 x
1006 #endif
1007
1008       MO_S_Conv from to
1009         | isFloatingRep from -> coerceFP2Int from to x
1010         | isFloatingRep to   -> coerceInt2FP from to x
1011
1012       other -> pprPanic "getRegister" (pprMachOp mop)
1013    where
1014         -- signed or unsigned extension.
1015         integerExtend from to instr expr = do
1016             (reg,e_code) <- if from == I8 then getByteReg expr
1017                                           else getSomeReg expr
1018             let
1019                 code dst =
1020                   e_code `snocOL`
1021                   instr from (OpReg reg) (OpReg dst)
1022             return (Any to code)
1023
1024         toI8Reg new_rep expr
1025             = do codefn <- getAnyReg expr
1026                  return (Any new_rep codefn)
1027                 -- HACK: use getAnyReg to get a byte-addressable register.
1028                 -- If the source was a Fixed register, this will add the
1029                 -- mov instruction to put it into the desired destination.
1030                 -- We're assuming that the destination won't be a fixed
1031                 -- non-byte-addressable register; it won't be, because all
1032                 -- fixed registers are word-sized.
1033
1034         toI16Reg = toI8Reg -- for now
1035
1036         conversionNop new_rep expr
1037             = do e_code <- getRegister expr
1038                  return (swizzleRegisterRep e_code new_rep)
1039
1040
1041 getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
1042   = ASSERT2(cmmExprRep x /= I8, pprExpr e)
1043     case mop of
1044       MO_Eq F32   -> condFltReg EQQ x y
1045       MO_Ne F32   -> condFltReg NE x y
1046       MO_S_Gt F32 -> condFltReg GTT x y
1047       MO_S_Ge F32 -> condFltReg GE x y
1048       MO_S_Lt F32 -> condFltReg LTT x y
1049       MO_S_Le F32 -> condFltReg LE x y
1050
1051       MO_Eq F64   -> condFltReg EQQ x y
1052       MO_Ne F64   -> condFltReg NE x y
1053       MO_S_Gt F64 -> condFltReg GTT x y
1054       MO_S_Ge F64 -> condFltReg GE x y
1055       MO_S_Lt F64 -> condFltReg LTT x y
1056       MO_S_Le F64 -> condFltReg LE x y
1057
1058       MO_Eq rep   -> condIntReg EQQ x y
1059       MO_Ne rep   -> condIntReg NE x y
1060
1061       MO_S_Gt rep -> condIntReg GTT x y
1062       MO_S_Ge rep -> condIntReg GE x y
1063       MO_S_Lt rep -> condIntReg LTT x y
1064       MO_S_Le rep -> condIntReg LE x y
1065
1066       MO_U_Gt rep -> condIntReg GU  x y
1067       MO_U_Ge rep -> condIntReg GEU x y
1068       MO_U_Lt rep -> condIntReg LU  x y
1069       MO_U_Le rep -> condIntReg LEU x y
1070
1071 #if i386_TARGET_ARCH
1072       MO_Add F32 -> trivialFCode F32 GADD x y
1073       MO_Sub F32 -> trivialFCode F32 GSUB x y
1074
1075       MO_Add F64 -> trivialFCode F64 GADD x y
1076       MO_Sub F64 -> trivialFCode F64 GSUB x y
1077
1078       MO_S_Quot F32 -> trivialFCode F32 GDIV x y
1079       MO_S_Quot F64 -> trivialFCode F64 GDIV x y
1080 #endif
1081
1082 #if x86_64_TARGET_ARCH
1083       MO_Add F32 -> trivialFCode F32 ADD x y
1084       MO_Sub F32 -> trivialFCode F32 SUB x y
1085
1086       MO_Add F64 -> trivialFCode F64 ADD x y
1087       MO_Sub F64 -> trivialFCode F64 SUB x y
1088
1089       MO_S_Quot F32 -> trivialFCode F32 FDIV x y
1090       MO_S_Quot F64 -> trivialFCode F64 FDIV x y
1091 #endif
1092
1093       MO_Add rep -> add_code rep x y
1094       MO_Sub rep -> sub_code rep x y
1095
1096       MO_S_Quot rep -> div_code rep True  True  x y
1097       MO_S_Rem  rep -> div_code rep True  False x y
1098       MO_U_Quot rep -> div_code rep False True  x y
1099       MO_U_Rem  rep -> div_code rep False False x y
1100
1101 #if i386_TARGET_ARCH
1102       MO_Mul   F32 -> trivialFCode F32 GMUL x y
1103       MO_Mul   F64 -> trivialFCode F64 GMUL x y
1104 #endif
1105
1106 #if x86_64_TARGET_ARCH
1107       MO_Mul   F32 -> trivialFCode F32 MUL x y
1108       MO_Mul   F64 -> trivialFCode F64 MUL x y
1109 #endif
1110
1111       MO_Mul   rep -> let op = IMUL rep in
1112                       trivialCode rep op (Just op) x y
1113
1114       MO_S_MulMayOflo rep -> imulMayOflo rep x y
1115
1116       MO_And rep -> let op = AND rep in
1117                     trivialCode rep op (Just op) x y
1118       MO_Or  rep -> let op = OR  rep in
1119                     trivialCode rep op (Just op) x y
1120       MO_Xor rep -> let op = XOR rep in
1121                     trivialCode rep op (Just op) x y
1122
1123         {- Shift ops on x86s have constraints on their source, it
1124            either has to be Imm, CL or 1
1125             => trivialCode is not restrictive enough (sigh.)
1126         -}
1127       MO_Shl rep   -> shift_code rep (SHL rep) x y {-False-}
1128       MO_U_Shr rep -> shift_code rep (SHR rep) x y {-False-}
1129       MO_S_Shr rep -> shift_code rep (SAR rep) x y {-False-}
1130
1131       other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
1132   where
1133     --------------------
1134     imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1135     imulMayOflo rep a b = do
1136          (a_reg, a_code) <- getNonClobberedReg a
1137          b_code <- getAnyReg b
1138          let
1139              shift_amt  = case rep of
1140                            I32 -> 31
1141                            I64 -> 63
1142                            _ -> panic "shift_amt"
1143
1144              code = a_code `appOL` b_code eax `appOL`
1145                         toOL [
1146                            IMUL2 rep (OpReg a_reg),   -- result in %edx:%eax
1147                            SAR rep (OpImm (ImmInt shift_amt)) (OpReg eax),
1148                                 -- sign extend lower part
1149                            SUB rep (OpReg edx) (OpReg eax)
1150                                 -- compare against upper
1151                            -- eax==0 if high part == sign extended low part
1152                         ]
1153          -- in
1154          return (Fixed rep eax code)
1155
1156     --------------------
1157     shift_code :: MachRep
1158                -> (Operand -> Operand -> Instr)
1159                -> CmmExpr
1160                -> CmmExpr
1161                -> NatM Register
1162
1163     {- Case1: shift length as immediate -}
1164     shift_code rep instr x y@(CmmLit lit) = do
1165           x_code <- getAnyReg x
1166           let
1167                code dst
1168                   = x_code dst `snocOL`
1169                     instr (OpImm (litToImm lit)) (OpReg dst)
1170           -- in
1171           return (Any rep code)
1172
1173     {- Case2: shift length is complex (non-immediate)
1174       * y must go in %ecx.
1175       * we cannot do y first *and* put its result in %ecx, because
1176         %ecx might be clobbered by x.
1177       * if we do y second, then x cannot be
1178         in a clobbered reg.  Also, we cannot clobber x's reg
1179         with the instruction itself.
1180       * so we can either:
1181         - do y first, put its result in a fresh tmp, then copy it to %ecx later
1182         - do y second and put its result into %ecx.  x gets placed in a fresh
1183           tmp.  This is likely to be better, becuase the reg alloc can
1184           eliminate this reg->reg move here (it won't eliminate the other one,
1185           because the move is into the fixed %ecx).
1186     -}
1187     shift_code rep instr x y{-amount-} = do
1188         x_code <- getAnyReg x
1189         tmp <- getNewRegNat rep
1190         y_code <- getAnyReg y
1191         let
1192            code = x_code tmp `appOL`
1193                   y_code ecx `snocOL`
1194                   instr (OpReg ecx) (OpReg tmp)
1195         -- in
1196         return (Fixed rep tmp code)
1197
1198     --------------------
1199     add_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1200     add_code rep x (CmmLit (CmmInt y _))
1201         | not (is64BitInteger y) = add_int rep x y
1202     add_code rep x y = trivialCode rep (ADD rep) (Just (ADD rep)) x y
1203
1204     --------------------
1205     sub_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1206     sub_code rep x (CmmLit (CmmInt y _))
1207         | not (is64BitInteger (-y)) = add_int rep x (-y)
1208     sub_code rep x y = trivialCode rep (SUB rep) Nothing x y
1209
1210     -- our three-operand add instruction:
1211     add_int rep x y = do
1212         (x_reg, x_code) <- getSomeReg x
1213         let
1214             imm = ImmInt (fromInteger y)
1215             code dst
1216                = x_code `snocOL`
1217                  LEA rep
1218                         (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
1219                         (OpReg dst)
1220         --
1221         return (Any rep code)
1222
1223     ----------------------
1224     div_code rep signed quotient x y = do
1225            (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
1226            x_code <- getAnyReg x
1227            let
1228              widen | signed    = CLTD rep
1229                    | otherwise = XOR rep (OpReg edx) (OpReg edx)
1230
1231              instr | signed    = IDIV
1232                    | otherwise = DIV
1233
1234              code = y_code `appOL`
1235                     x_code eax `appOL`
1236                     toOL [widen, instr rep y_op]
1237
1238              result | quotient  = eax
1239                     | otherwise = edx
1240
1241            -- in
1242            return (Fixed rep result code)
1243
1244
1245 getRegister (CmmLoad mem pk)
1246   | isFloatingRep pk
1247   = do
1248     Amode src mem_code <- getAmode mem
1249     let
1250         code dst = mem_code `snocOL`
1251                    IF_ARCH_i386(GLD pk src dst,
1252                                 MOV pk (OpAddr src) (OpReg dst))
1253     --
1254     return (Any pk code)
1255
1256 #if i386_TARGET_ARCH
1257 getRegister (CmmLoad mem pk)
1258   | pk /= I64
1259   = do
1260     code <- intLoadCode (instr pk) mem
1261     return (Any pk code)
1262   where
1263         instr I8  = MOVZxL pk
1264         instr I16 = MOV I16
1265         instr I32 = MOV I32
1266         -- we always zero-extend 8-bit loads, if we
1267         -- can't think of anything better.  This is because
1268         -- we can't guarantee access to an 8-bit variant of every register
1269         -- (esi and edi don't have 8-bit variants), so to make things
1270         -- simpler we do our 8-bit arithmetic with full 32-bit registers.
1271 #endif
1272
1273 #if x86_64_TARGET_ARCH
1274 -- Simpler memory load code on x86_64
1275 getRegister (CmmLoad mem pk)
1276   = do
1277     code <- intLoadCode (MOV pk) mem
1278     return (Any pk code)
1279 #endif
1280
1281 getRegister (CmmLit (CmmInt 0 rep))
1282   = let
1283         -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
1284         adj_rep = case rep of I64 -> I32; _ -> rep
1285         rep1 = IF_ARCH_i386( rep, adj_rep )
1286         code dst
1287            = unitOL (XOR rep1 (OpReg dst) (OpReg dst))
1288     in
1289         return (Any rep code)
1290
1291 #if x86_64_TARGET_ARCH
1292   -- optimisation for loading small literals on x86_64: take advantage
1293   -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
1294   -- instruction forms are shorter.
1295 getRegister (CmmLit lit)
1296   | I64 <- cmmLitRep lit, not (isBigLit lit)
1297   = let
1298         imm = litToImm lit
1299         code dst = unitOL (MOV I32 (OpImm imm) (OpReg dst))
1300     in
1301         return (Any I64 code)
1302   where
1303    isBigLit (CmmInt i I64) = i < 0 || i > 0xffffffff
1304    isBigLit _ = False
1305         -- note1: not the same as is64BitLit, because that checks for
1306         -- signed literals that fit in 32 bits, but we want unsigned
1307         -- literals here.
1308         -- note2: all labels are small, because we're assuming the
1309         -- small memory model (see gcc docs, -mcmodel=small).
1310 #endif
1311
1312 getRegister (CmmLit lit)
1313   = let
1314         rep = cmmLitRep lit
1315         imm = litToImm lit
1316         code dst = unitOL (MOV rep (OpImm imm) (OpReg dst))
1317     in
1318         return (Any rep code)
1319
1320 getRegister other = pprPanic "getRegister(x86)" (ppr other)
1321
1322
1323 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
1324    -> NatM (Reg -> InstrBlock)
1325 intLoadCode instr mem = do
1326   Amode src mem_code <- getAmode mem
1327   return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
1328
1329 -- Compute an expression into *any* register, adding the appropriate
1330 -- move instruction if necessary.
1331 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
1332 getAnyReg expr = do
1333   r <- getRegister expr
1334   anyReg r
1335
1336 anyReg :: Register -> NatM (Reg -> InstrBlock)
1337 anyReg (Any _ code)          = return code
1338 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
1339
1340 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
1341 -- Fixed registers might not be byte-addressable, so we make sure we've
1342 -- got a temporary, inserting an extra reg copy if necessary.
1343 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
1344 #if x86_64_TARGET_ARCH
1345 getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
1346 #else
1347 getByteReg expr = do
1348   r <- getRegister expr
1349   case r of
1350     Any rep code -> do
1351         tmp <- getNewRegNat rep
1352         return (tmp, code tmp)
1353     Fixed rep reg code
1354         | isVirtualReg reg -> return (reg,code)
1355         | otherwise -> do
1356             tmp <- getNewRegNat rep
1357             return (tmp, code `snocOL` reg2reg rep reg tmp)
1358         -- ToDo: could optimise slightly by checking for byte-addressable
1359         -- real registers, but that will happen very rarely if at all.
1360 #endif
1361
1362 -- Another variant: this time we want the result in a register that cannot
1363 -- be modified by code to evaluate an arbitrary expression.
1364 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1365 getNonClobberedReg expr = do
1366   r <- getRegister expr
1367   case r of
1368     Any rep code -> do
1369         tmp <- getNewRegNat rep
1370         return (tmp, code tmp)
1371     Fixed rep reg code
1372         -- only free regs can be clobbered
1373         | RealReg rr <- reg, isFastTrue (freeReg rr) -> do
1374                 tmp <- getNewRegNat rep
1375                 return (tmp, code `snocOL` reg2reg rep reg tmp)
1376         | otherwise ->
1377                 return (reg, code)
1378
1379 reg2reg :: MachRep -> Reg -> Reg -> Instr
1380 reg2reg rep src dst
1381 #if i386_TARGET_ARCH
1382   | isFloatingRep rep = GMOV src dst
1383 #endif
1384   | otherwise         = MOV rep (OpReg src) (OpReg dst)
1385
1386 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1387
1388 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1389
1390 #if sparc_TARGET_ARCH
1391
1392 getRegister (CmmLit (CmmFloat f F32)) = do
1393     lbl <- getNewLabelNat
1394     let code dst = toOL [
1395             LDATA ReadOnlyData
1396                         [CmmDataLabel lbl,
1397                          CmmStaticLit (CmmFloat f F32)],
1398             SETHI (HI (ImmCLbl lbl)) dst,
1399             LD F32 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1400     return (Any F32 code)
1401
1402 getRegister (CmmLit (CmmFloat d F64)) = do
1403     lbl <- getNewLabelNat
1404     let code dst = toOL [
1405             LDATA ReadOnlyData
1406                         [CmmDataLabel lbl,
1407                          CmmStaticLit (CmmFloat d F64)],
1408             SETHI (HI (ImmCLbl lbl)) dst,
1409             LD F64 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1410     return (Any F64 code)
1411
1412 getRegister (CmmMachOp mop [x]) -- unary MachOps
1413   = case mop of
1414       MO_S_Neg F32     -> trivialUFCode F32 (FNEG F32) x
1415       MO_S_Neg F64     -> trivialUFCode F64 (FNEG F64) x
1416
1417       MO_S_Neg rep     -> trivialUCode rep (SUB False False g0) x
1418       MO_Not rep       -> trivialUCode rep (XNOR False g0) x
1419
1420       MO_U_Conv I32 I8 -> trivialCode I8 (AND False) x (CmmLit (CmmInt 255 I8))
1421
1422       MO_U_Conv F64 F32-> coerceDbl2Flt x
1423       MO_U_Conv F32 F64-> coerceFlt2Dbl x
1424
1425       MO_S_Conv F32 I32-> coerceFP2Int F32 I32 x
1426       MO_S_Conv I32 F32-> coerceInt2FP I32 F32 x
1427       MO_S_Conv F64 I32-> coerceFP2Int F64 I32 x
1428       MO_S_Conv I32 F64-> coerceInt2FP I32 F64 x
1429
1430       -- Conversions which are a nop on sparc
1431       MO_U_Conv from to
1432         | from == to   -> conversionNop to   x
1433       MO_U_Conv I32 to -> conversionNop to   x
1434       MO_S_Conv I32 to -> conversionNop to   x
1435
1436       -- widenings
1437       MO_U_Conv I8 I32  -> integerExtend False I8 I32  x
1438       MO_U_Conv I16 I32 -> integerExtend False I16 I32 x
1439       MO_U_Conv I8 I16  -> integerExtend False I8 I16  x
1440       MO_S_Conv I16 I32 -> integerExtend True I16 I32  x
1441
1442       other_op -> panic "Unknown unary mach op"
1443     where
1444         -- XXX SLL/SRL?
1445         integerExtend signed from to expr = do
1446            (reg, e_code) <- getSomeReg expr
1447            let
1448                code dst =
1449                    e_code `snocOL`
1450                    ((if signed then SRA else SRL)
1451                           reg (RIImm (ImmInt 0)) dst)
1452            return (Any to code)
1453         conversionNop new_rep expr
1454             = do e_code <- getRegister expr
1455                  return (swizzleRegisterRep e_code new_rep)
1456
1457 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1458   = case mop of
1459       MO_Eq F32 -> condFltReg EQQ x y
1460       MO_Ne F32 -> condFltReg NE x y
1461
1462       MO_S_Gt F32 -> condFltReg GTT x y
1463       MO_S_Ge F32 -> condFltReg GE x y
1464       MO_S_Lt F32 -> condFltReg LTT x y
1465       MO_S_Le F32 -> condFltReg LE x y
1466
1467       MO_Eq F64 -> condFltReg EQQ x y
1468       MO_Ne F64 -> condFltReg NE x y
1469
1470       MO_S_Gt F64 -> condFltReg GTT x y
1471       MO_S_Ge F64 -> condFltReg GE x y
1472       MO_S_Lt F64 -> condFltReg LTT x y
1473       MO_S_Le F64 -> condFltReg LE x y
1474
1475       MO_Eq rep -> condIntReg EQQ x y
1476       MO_Ne rep -> condIntReg NE x y
1477
1478       MO_S_Gt rep -> condIntReg GTT x y
1479       MO_S_Ge rep -> condIntReg GE x y
1480       MO_S_Lt rep -> condIntReg LTT x y
1481       MO_S_Le rep -> condIntReg LE x y
1482
1483       MO_U_Gt I32  -> condIntReg GTT x y
1484       MO_U_Ge I32  -> condIntReg GE x y
1485       MO_U_Lt I32  -> condIntReg LTT x y
1486       MO_U_Le I32  -> condIntReg LE x y
1487
1488       MO_U_Gt I16 -> condIntReg GU  x y
1489       MO_U_Ge I16 -> condIntReg GEU x y
1490       MO_U_Lt I16 -> condIntReg LU  x y
1491       MO_U_Le I16 -> condIntReg LEU x y
1492
1493       MO_Add I32 -> trivialCode I32 (ADD False False) x y
1494       MO_Sub I32 -> trivialCode I32 (SUB False False) x y
1495
1496       MO_S_MulMayOflo rep -> imulMayOflo rep x y
1497 {-
1498       -- ToDo: teach about V8+ SPARC div instructions
1499       MO_S_Quot I32 -> idiv FSLIT(".div")  x y
1500       MO_S_Rem I32  -> idiv FSLIT(".rem")  x y
1501       MO_U_Quot I32 -> idiv FSLIT(".udiv")  x y
1502       MO_U_Rem I32  -> idiv FSLIT(".urem")  x y
1503 -}
1504       MO_Add F32  -> trivialFCode F32 FADD  x y
1505       MO_Sub F32   -> trivialFCode F32  FSUB x y
1506       MO_Mul F32   -> trivialFCode F32  FMUL  x y
1507       MO_S_Quot F32   -> trivialFCode F32  FDIV x y
1508
1509       MO_Add F64   -> trivialFCode F64 FADD  x y
1510       MO_Sub F64   -> trivialFCode F64  FSUB x y
1511       MO_Mul F64   -> trivialFCode F64  FMUL x y
1512       MO_S_Quot F64   -> trivialFCode F64  FDIV x y
1513
1514       MO_And rep   -> trivialCode rep (AND False) x y
1515       MO_Or rep    -> trivialCode rep (OR  False) x y
1516       MO_Xor rep   -> trivialCode rep (XOR False) x y
1517
1518       MO_Mul rep -> trivialCode rep (SMUL False) x y
1519
1520       MO_Shl rep   -> trivialCode rep SLL  x y
1521       MO_U_Shr rep   -> trivialCode rep SRL x y
1522       MO_S_Shr rep   -> trivialCode rep SRA x y
1523
1524 {-
1525       MO_F32_Pwr  -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1526                                          [promote x, promote y])
1527                        where promote x = CmmMachOp MO_F32_to_Dbl [x]
1528       MO_F64_Pwr -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1529                                         [x, y])
1530 -}
1531       other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
1532   where
1533     --idiv fn x y = getRegister (StCall (Left fn) CCallConv I32 [x, y])
1534
1535     --------------------
1536     imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1537     imulMayOflo rep a b = do
1538          (a_reg, a_code) <- getSomeReg a
1539          (b_reg, b_code) <- getSomeReg b
1540          res_lo <- getNewRegNat I32
1541          res_hi <- getNewRegNat I32
1542          let
1543             shift_amt  = case rep of
1544                           I32 -> 31
1545                           I64 -> 63
1546                           _ -> panic "shift_amt"
1547             code dst = a_code `appOL` b_code `appOL`
1548                        toOL [
1549                            SMUL False a_reg (RIReg b_reg) res_lo,
1550                            RDY res_hi,
1551                            SRA res_lo (RIImm (ImmInt shift_amt)) res_lo,
1552                            SUB False False res_lo (RIReg res_hi) dst
1553                         ]
1554          return (Any I32 code)
1555
1556 getRegister (CmmLoad mem pk) = do
1557     Amode src code <- getAmode mem
1558     let
1559         code__2 dst = code `snocOL` LD pk src dst
1560     return (Any pk code__2)
1561
1562 getRegister (CmmLit (CmmInt i _))
1563   | fits13Bits i
1564   = let
1565         src = ImmInt (fromInteger i)
1566         code dst = unitOL (OR False g0 (RIImm src) dst)
1567     in
1568         return (Any I32 code)
1569
1570 getRegister (CmmLit lit)
1571   = let rep = cmmLitRep lit
1572         imm = litToImm lit
1573         code dst = toOL [
1574             SETHI (HI imm) dst,
1575             OR False dst (RIImm (LO imm)) dst]
1576     in return (Any I32 code)
1577
1578 #endif /* sparc_TARGET_ARCH */
1579
1580 #if powerpc_TARGET_ARCH
1581 getRegister (CmmLoad mem pk)
1582   | pk /= I64
1583   = do
1584         Amode addr addr_code <- getAmode mem
1585         let code dst = ASSERT((regClass dst == RcDouble) == isFloatingRep pk)
1586                        addr_code `snocOL` LD pk dst addr
1587         return (Any pk code)
1588
1589 -- catch simple cases of zero- or sign-extended load
1590 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad mem _]) = do
1591     Amode addr addr_code <- getAmode mem
1592     return (Any I32 (\dst -> addr_code `snocOL` LD I8 dst addr))
1593
1594 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
1595
1596 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad mem _]) = do
1597     Amode addr addr_code <- getAmode mem
1598     return (Any I32 (\dst -> addr_code `snocOL` LD I16 dst addr))
1599
1600 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad mem _]) = do
1601     Amode addr addr_code <- getAmode mem
1602     return (Any I32 (\dst -> addr_code `snocOL` LA I16 dst addr))
1603
1604 getRegister (CmmMachOp mop [x]) -- unary MachOps
1605   = case mop of
1606       MO_Not rep   -> trivialUCode rep NOT x
1607
1608       MO_S_Conv F64 F32 -> trivialUCode F32 FRSP x
1609       MO_S_Conv F32 F64 -> conversionNop F64 x
1610
1611       MO_S_Conv from to
1612         | from == to         -> conversionNop to x
1613         | isFloatingRep from -> coerceFP2Int from to x
1614         | isFloatingRep to   -> coerceInt2FP from to x
1615
1616         -- narrowing is a nop: we treat the high bits as undefined
1617       MO_S_Conv I32 to -> conversionNop to x
1618       MO_S_Conv I16 I8 -> conversionNop I8 x
1619       MO_S_Conv I8 to -> trivialUCode to (EXTS I8) x
1620       MO_S_Conv I16 to -> trivialUCode to (EXTS I16) x
1621
1622       MO_U_Conv from to
1623         | from == to -> conversionNop to x
1624         -- narrowing is a nop: we treat the high bits as undefined
1625       MO_U_Conv I32 to -> conversionNop to x
1626       MO_U_Conv I16 I8 -> conversionNop I8 x
1627       MO_U_Conv I8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 I32))
1628       MO_U_Conv I16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 I32))
1629
1630       MO_S_Neg F32      -> trivialUCode F32 FNEG x
1631       MO_S_Neg F64      -> trivialUCode F64 FNEG x
1632       MO_S_Neg rep      -> trivialUCode rep NEG x
1633
1634     where
1635         conversionNop new_rep expr
1636             = do e_code <- getRegister expr
1637                  return (swizzleRegisterRep e_code new_rep)
1638
1639 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1640   = case mop of
1641       MO_Eq F32 -> condFltReg EQQ x y
1642       MO_Ne F32 -> condFltReg NE  x y
1643
1644       MO_S_Gt F32 -> condFltReg GTT x y
1645       MO_S_Ge F32 -> condFltReg GE  x y
1646       MO_S_Lt F32 -> condFltReg LTT x y
1647       MO_S_Le F32 -> condFltReg LE  x y
1648
1649       MO_Eq F64 -> condFltReg EQQ x y
1650       MO_Ne F64 -> condFltReg NE  x y
1651
1652       MO_S_Gt F64 -> condFltReg GTT x y
1653       MO_S_Ge F64 -> condFltReg GE  x y
1654       MO_S_Lt F64 -> condFltReg LTT x y
1655       MO_S_Le F64 -> condFltReg LE  x y
1656
1657       MO_Eq rep -> condIntReg EQQ  (extendUExpr rep x) (extendUExpr rep y)
1658       MO_Ne rep -> condIntReg NE   (extendUExpr rep x) (extendUExpr rep y)
1659
1660       MO_S_Gt rep -> condIntReg GTT  (extendSExpr rep x) (extendSExpr rep y)
1661       MO_S_Ge rep -> condIntReg GE   (extendSExpr rep x) (extendSExpr rep y)
1662       MO_S_Lt rep -> condIntReg LTT  (extendSExpr rep x) (extendSExpr rep y)
1663       MO_S_Le rep -> condIntReg LE   (extendSExpr rep x) (extendSExpr rep y)
1664
1665       MO_U_Gt rep -> condIntReg GU   (extendUExpr rep x) (extendUExpr rep y)
1666       MO_U_Ge rep -> condIntReg GEU  (extendUExpr rep x) (extendUExpr rep y)
1667       MO_U_Lt rep -> condIntReg LU   (extendUExpr rep x) (extendUExpr rep y)
1668       MO_U_Le rep -> condIntReg LEU  (extendUExpr rep x) (extendUExpr rep y)
1669
1670       MO_Add F32   -> trivialCodeNoImm F32 (FADD F32) x y
1671       MO_Sub F32   -> trivialCodeNoImm F32 (FSUB F32) x y
1672       MO_Mul F32   -> trivialCodeNoImm F32 (FMUL F32) x y
1673       MO_S_Quot F32   -> trivialCodeNoImm F32 (FDIV F32) x y
1674
1675       MO_Add F64   -> trivialCodeNoImm F64 (FADD F64) x y
1676       MO_Sub F64   -> trivialCodeNoImm F64 (FSUB F64) x y
1677       MO_Mul F64   -> trivialCodeNoImm F64 (FMUL F64) x y
1678       MO_S_Quot F64   -> trivialCodeNoImm F64 (FDIV F64) x y
1679
1680          -- optimize addition with 32-bit immediate
1681          -- (needed for PIC)
1682       MO_Add I32 ->
1683         case y of
1684           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate I32 True (-imm)
1685             -> trivialCode I32 True ADD x (CmmLit $ CmmInt imm immrep)
1686           CmmLit lit
1687             -> do
1688                 (src, srcCode) <- getSomeReg x
1689                 let imm = litToImm lit
1690                     code dst = srcCode `appOL` toOL [
1691                                     ADDIS dst src (HA imm),
1692                                     ADD dst dst (RIImm (LO imm))
1693                                 ]
1694                 return (Any I32 code)
1695           _ -> trivialCode I32 True ADD x y
1696
1697       MO_Add rep -> trivialCode rep True ADD x y
1698       MO_Sub rep ->
1699         case y of    -- subfi ('substract from' with immediate) doesn't exist
1700           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
1701             -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
1702           _ -> trivialCodeNoImm rep SUBF y x
1703
1704       MO_Mul rep -> trivialCode rep True MULLW x y
1705
1706       MO_S_MulMayOflo I32 -> trivialCodeNoImm I32 MULLW_MayOflo x y
1707
1708       MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= I32): not implemented"
1709       MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
1710
1711       MO_S_Quot rep -> trivialCodeNoImm rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1712       MO_U_Quot rep -> trivialCodeNoImm rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1713
1714       MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1715       MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1716
1717       MO_And rep   -> trivialCode rep False AND x y
1718       MO_Or rep    -> trivialCode rep False OR x y
1719       MO_Xor rep   -> trivialCode rep False XOR x y
1720
1721       MO_Shl rep   -> trivialCode rep False SLW x y
1722       MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
1723       MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
1724
1725 getRegister (CmmLit (CmmInt i rep))
1726   | Just imm <- makeImmediate rep True i
1727   = let
1728         code dst = unitOL (LI dst imm)
1729     in
1730         return (Any rep code)
1731
1732 getRegister (CmmLit (CmmFloat f frep)) = do
1733     lbl <- getNewLabelNat
1734     dflags <- getDynFlagsNat
1735     dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
1736     Amode addr addr_code <- getAmode dynRef
1737     let code dst =
1738             LDATA ReadOnlyData  [CmmDataLabel lbl,
1739                                  CmmStaticLit (CmmFloat f frep)]
1740             `consOL` (addr_code `snocOL` LD frep dst addr)
1741     return (Any frep code)
1742
1743 getRegister (CmmLit lit)
1744   = let rep = cmmLitRep lit
1745         imm = litToImm lit
1746         code dst = toOL [
1747               LIS dst (HI imm),
1748               OR dst dst (RIImm (LO imm))
1749           ]
1750     in return (Any rep code)
1751
1752 getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
1753
1754     -- extend?Rep: wrap integer expression of type rep
1755     -- in a conversion to I32
1756 extendSExpr I32 x = x
1757 extendSExpr rep x = CmmMachOp (MO_S_Conv rep I32) [x]
1758 extendUExpr I32 x = x
1759 extendUExpr rep x = CmmMachOp (MO_U_Conv rep I32) [x]
1760
1761 #endif /* powerpc_TARGET_ARCH */
1762
1763
1764 -- -----------------------------------------------------------------------------
1765 --  The 'Amode' type: Memory addressing modes passed up the tree.
1766
1767 data Amode = Amode AddrMode InstrBlock
1768
1769 {-
1770 Now, given a tree (the argument to an CmmLoad) that references memory,
1771 produce a suitable addressing mode.
1772
1773 A Rule of the Game (tm) for Amodes: use of the addr bit must
1774 immediately follow use of the code part, since the code part puts
1775 values in registers which the addr then refers to.  So you can't put
1776 anything in between, lest it overwrite some of those registers.  If
1777 you need to do some other computation between the code part and use of
1778 the addr bit, first store the effective address from the amode in a
1779 temporary, then do the other computation, and then use the temporary:
1780
1781     code
1782     LEA amode, tmp
1783     ... other computation ...
1784     ... (tmp) ...
1785 -}
1786
1787 getAmode :: CmmExpr -> NatM Amode
1788 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
1789
1790 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1791
1792 #if alpha_TARGET_ARCH
1793
1794 getAmode (StPrim IntSubOp [x, StInt i])
1795   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1796     getRegister x               `thenNat` \ register ->
1797     let
1798         code = registerCode register tmp
1799         reg  = registerName register tmp
1800         off  = ImmInt (-(fromInteger i))
1801     in
1802     return (Amode (AddrRegImm reg off) code)
1803
1804 getAmode (StPrim IntAddOp [x, StInt i])
1805   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1806     getRegister x               `thenNat` \ register ->
1807     let
1808         code = registerCode register tmp
1809         reg  = registerName register tmp
1810         off  = ImmInt (fromInteger i)
1811     in
1812     return (Amode (AddrRegImm reg off) code)
1813
1814 getAmode leaf
1815   | isJust imm
1816   = return (Amode (AddrImm imm__2) id)
1817   where
1818     imm = maybeImm leaf
1819     imm__2 = case imm of Just x -> x
1820
1821 getAmode other
1822   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1823     getRegister other           `thenNat` \ register ->
1824     let
1825         code = registerCode register tmp
1826         reg  = registerName register tmp
1827     in
1828     return (Amode (AddrReg reg) code)
1829
1830 #endif /* alpha_TARGET_ARCH */
1831
1832 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1833
1834 #if x86_64_TARGET_ARCH
1835
1836 getAmode (CmmMachOp (MO_Add I64) [CmmReg (CmmGlobal PicBaseReg),
1837                                      CmmLit displacement])
1838     = return $ Amode (ripRel (litToImm displacement)) nilOL
1839
1840 #endif
1841
1842 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1843
1844 -- This is all just ridiculous, since it carefully undoes
1845 -- what mangleIndexTree has just done.
1846 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
1847   | not (is64BitLit lit)
1848   -- ASSERT(rep == I32)???
1849   = do (x_reg, x_code) <- getSomeReg x
1850        let off = ImmInt (-(fromInteger i))
1851        return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1852
1853 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
1854   | not (is64BitLit lit)
1855   -- ASSERT(rep == I32)???
1856   = do (x_reg, x_code) <- getSomeReg x
1857        let off = ImmInt (fromInteger i)
1858        return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1859
1860 -- Turn (lit1 << n  + lit2) into  (lit2 + lit1 << n) so it will be
1861 -- recognised by the next rule.
1862 getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1863                                   b@(CmmLit _)])
1864   = getAmode (CmmMachOp (MO_Add rep) [b,a])
1865
1866 getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
1867                                         [y, CmmLit (CmmInt shift _)]])
1868   | shift == 0 || shift == 1 || shift == 2 || shift == 3
1869   = x86_complex_amode x y shift 0
1870
1871 getAmode (CmmMachOp (MO_Add rep)
1872                 [x, CmmMachOp (MO_Add _)
1873                         [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)],
1874                          CmmLit (CmmInt offset _)]])
1875   | shift == 0 || shift == 1 || shift == 2 || shift == 3
1876   && not (is64BitInteger offset)
1877   = x86_complex_amode x y shift offset
1878
1879 getAmode (CmmMachOp (MO_Add rep) [x,y])
1880   = x86_complex_amode x y 0 0
1881
1882 getAmode (CmmLit lit) | not (is64BitLit lit)
1883   = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1884
1885 getAmode expr = do
1886   (reg,code) <- getSomeReg expr
1887   return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
1888
1889
1890 x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
1891 x86_complex_amode base index shift offset
1892   = do (x_reg, x_code) <- getNonClobberedReg base
1893         -- x must be in a temp, because it has to stay live over y_code
1894         -- we could compre x_reg and y_reg and do something better here...
1895        (y_reg, y_code) <- getSomeReg index
1896        let
1897            code = x_code `appOL` y_code
1898            base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
1899        return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset)))
1900                code)
1901
1902 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1903
1904 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1905
1906 #if sparc_TARGET_ARCH
1907
1908 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)])
1909   | fits13Bits (-i)
1910   = do
1911        (reg, code) <- getSomeReg x
1912        let
1913          off  = ImmInt (-(fromInteger i))
1914        return (Amode (AddrRegImm reg off) code)
1915
1916
1917 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)])
1918   | fits13Bits i
1919   = do
1920        (reg, code) <- getSomeReg x
1921        let
1922          off  = ImmInt (fromInteger i)
1923        return (Amode (AddrRegImm reg off) code)
1924
1925 getAmode (CmmMachOp (MO_Add rep) [x, y])
1926   = do
1927     (regX, codeX) <- getSomeReg x
1928     (regY, codeY) <- getSomeReg y
1929     let
1930         code = codeX `appOL` codeY
1931     return (Amode (AddrRegReg regX regY) code)
1932
1933 -- XXX Is this same as "leaf" in Stix?
1934 getAmode (CmmLit lit)
1935   = do
1936       tmp <- getNewRegNat I32
1937       let
1938         code = unitOL (SETHI (HI imm__2) tmp)
1939       return (Amode (AddrRegImm tmp (LO imm__2)) code)
1940       where
1941          imm__2 = litToImm lit
1942
1943 getAmode other
1944   = do
1945        (reg, code) <- getSomeReg other
1946        let
1947             off  = ImmInt 0
1948        return (Amode (AddrRegImm reg off) code)
1949
1950 #endif /* sparc_TARGET_ARCH */
1951
1952 #ifdef powerpc_TARGET_ARCH
1953 getAmode (CmmMachOp (MO_Sub I32) [x, CmmLit (CmmInt i _)])
1954   | Just off <- makeImmediate I32 True (-i)
1955   = do
1956         (reg, code) <- getSomeReg x
1957         return (Amode (AddrRegImm reg off) code)
1958
1959
1960 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit (CmmInt i _)])
1961   | Just off <- makeImmediate I32 True i
1962   = do
1963         (reg, code) <- getSomeReg x
1964         return (Amode (AddrRegImm reg off) code)
1965
1966    -- optimize addition with 32-bit immediate
1967    -- (needed for PIC)
1968 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit lit])
1969   = do
1970         tmp <- getNewRegNat I32
1971         (src, srcCode) <- getSomeReg x
1972         let imm = litToImm lit
1973             code = srcCode `snocOL` ADDIS tmp src (HA imm)
1974         return (Amode (AddrRegImm tmp (LO imm)) code)
1975
1976 getAmode (CmmLit lit)
1977   = do
1978         tmp <- getNewRegNat I32
1979         let imm = litToImm lit
1980             code = unitOL (LIS tmp (HA imm))
1981         return (Amode (AddrRegImm tmp (LO imm)) code)
1982
1983 getAmode (CmmMachOp (MO_Add I32) [x, y])
1984   = do
1985         (regX, codeX) <- getSomeReg x
1986         (regY, codeY) <- getSomeReg y
1987         return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
1988
1989 getAmode other
1990   = do
1991         (reg, code) <- getSomeReg other
1992         let
1993             off  = ImmInt 0
1994         return (Amode (AddrRegImm reg off) code)
1995 #endif /* powerpc_TARGET_ARCH */
1996
1997 -- -----------------------------------------------------------------------------
1998 -- getOperand: sometimes any operand will do.
1999
2000 -- getNonClobberedOperand: the value of the operand will remain valid across
2001 -- the computation of an arbitrary expression, unless the expression
2002 -- is computed directly into a register which the operand refers to
2003 -- (see trivialCode where this function is used for an example).
2004
2005 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2006
2007 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2008 #if x86_64_TARGET_ARCH
2009 getNonClobberedOperand (CmmLit lit)
2010   | isSuitableFloatingPointLit lit = do
2011     lbl <- getNewLabelNat
2012     let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
2013                                            CmmStaticLit lit])
2014     return (OpAddr (ripRel (ImmCLbl lbl)), code)
2015 #endif
2016 getNonClobberedOperand (CmmLit lit)
2017   | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) =
2018     return (OpImm (litToImm lit), nilOL)
2019 getNonClobberedOperand (CmmLoad mem pk)
2020   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2021     Amode src mem_code <- getAmode mem
2022     (src',save_code) <-
2023         if (amodeCouldBeClobbered src)
2024                 then do
2025                    tmp <- getNewRegNat wordRep
2026                    return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
2027                            unitOL (LEA I32 (OpAddr src) (OpReg tmp)))
2028                 else
2029                    return (src, nilOL)
2030     return (OpAddr src', save_code `appOL` mem_code)
2031 getNonClobberedOperand e = do
2032     (reg, code) <- getNonClobberedReg e
2033     return (OpReg reg, code)
2034
2035 amodeCouldBeClobbered :: AddrMode -> Bool
2036 amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
2037
2038 regClobbered (RealReg rr) = isFastTrue (freeReg rr)
2039 regClobbered _ = False
2040
2041 -- getOperand: the operand is not required to remain valid across the
2042 -- computation of an arbitrary expression.
2043 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2044 #if x86_64_TARGET_ARCH
2045 getOperand (CmmLit lit)
2046   | isSuitableFloatingPointLit lit = do
2047     lbl <- getNewLabelNat
2048     let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
2049                                            CmmStaticLit lit])
2050     return (OpAddr (ripRel (ImmCLbl lbl)), code)
2051 #endif
2052 getOperand (CmmLit lit)
2053   | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) = do
2054     return (OpImm (litToImm lit), nilOL)
2055 getOperand (CmmLoad mem pk)
2056   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2057     Amode src mem_code <- getAmode mem
2058     return (OpAddr src, mem_code)
2059 getOperand e = do
2060     (reg, code) <- getSomeReg e
2061     return (OpReg reg, code)
2062
2063 isOperand :: CmmExpr -> Bool
2064 isOperand (CmmLoad _ _) = True
2065 isOperand (CmmLit lit)  = not (is64BitLit lit)
2066                           || isSuitableFloatingPointLit lit
2067 isOperand _             = False
2068
2069 -- if we want a floating-point literal as an operand, we can
2070 -- use it directly from memory.  However, if the literal is
2071 -- zero, we're better off generating it into a register using
2072 -- xor.
2073 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
2074 isSuitableFloatingPointLit _ = False
2075
2076 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
2077 getRegOrMem (CmmLoad mem pk)
2078   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2079     Amode src mem_code <- getAmode mem
2080     return (OpAddr src, mem_code)
2081 getRegOrMem e = do
2082     (reg, code) <- getNonClobberedReg e
2083     return (OpReg reg, code)
2084
2085 #if x86_64_TARGET_ARCH
2086 is64BitLit (CmmInt i I64) = is64BitInteger i
2087    -- assume that labels are in the range 0-2^31-1: this assumes the
2088    -- small memory model (see gcc docs, -mcmodel=small).
2089 #endif
2090 is64BitLit x = False
2091 #endif
2092
2093 is64BitInteger :: Integer -> Bool
2094 is64BitInteger i = i64 > 0x7fffffff || i64 < -0x80000000
2095   where i64 = fromIntegral i :: Int64
2096   -- a CmmInt is intended to be truncated to the appropriate
2097   -- number of bits, so here we truncate it to Int64.  This is
2098   -- important because e.g. -1 as a CmmInt might be either
2099   -- -1 or 18446744073709551615.
2100
2101 -- -----------------------------------------------------------------------------
2102 --  The 'CondCode' type:  Condition codes passed up the tree.
2103
2104 data CondCode = CondCode Bool Cond InstrBlock
2105
2106 -- Set up a condition code for a conditional branch.
2107
2108 getCondCode :: CmmExpr -> NatM CondCode
2109
2110 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2111
2112 #if alpha_TARGET_ARCH
2113 getCondCode = panic "MachCode.getCondCode: not on Alphas"
2114 #endif /* alpha_TARGET_ARCH */
2115
2116 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2117
2118 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
2119 -- yes, they really do seem to want exactly the same!
2120
2121 getCondCode (CmmMachOp mop [x, y])
2122   =
2123     case mop of
2124       MO_Eq F32 -> condFltCode EQQ x y
2125       MO_Ne F32 -> condFltCode NE  x y
2126
2127       MO_S_Gt F32 -> condFltCode GTT x y
2128       MO_S_Ge F32 -> condFltCode GE  x y
2129       MO_S_Lt F32 -> condFltCode LTT x y
2130       MO_S_Le F32 -> condFltCode LE  x y
2131
2132       MO_Eq F64 -> condFltCode EQQ x y
2133       MO_Ne F64 -> condFltCode NE  x y
2134
2135       MO_S_Gt F64 -> condFltCode GTT x y
2136       MO_S_Ge F64 -> condFltCode GE  x y
2137       MO_S_Lt F64 -> condFltCode LTT x y
2138       MO_S_Le F64 -> condFltCode LE  x y
2139
2140       MO_Eq rep -> condIntCode EQQ  x y
2141       MO_Ne rep -> condIntCode NE   x y
2142
2143       MO_S_Gt rep -> condIntCode GTT  x y
2144       MO_S_Ge rep -> condIntCode GE   x y
2145       MO_S_Lt rep -> condIntCode LTT  x y
2146       MO_S_Le rep -> condIntCode LE   x y
2147
2148       MO_U_Gt rep -> condIntCode GU   x y
2149       MO_U_Ge rep -> condIntCode GEU  x y
2150       MO_U_Lt rep -> condIntCode LU   x y
2151       MO_U_Le rep -> condIntCode LEU  x y
2152
2153       other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y]))
2154
2155 getCondCode other =  pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
2156
2157 #elif powerpc_TARGET_ARCH
2158
2159 -- almost the same as everywhere else - but we need to
2160 -- extend small integers to 32 bit first
2161
2162 getCondCode (CmmMachOp mop [x, y])
2163   = case mop of
2164       MO_Eq F32 -> condFltCode EQQ x y
2165       MO_Ne F32 -> condFltCode NE  x y
2166
2167       MO_S_Gt F32 -> condFltCode GTT x y
2168       MO_S_Ge F32 -> condFltCode GE  x y
2169       MO_S_Lt F32 -> condFltCode LTT x y
2170       MO_S_Le F32 -> condFltCode LE  x y
2171
2172       MO_Eq F64 -> condFltCode EQQ x y
2173       MO_Ne F64 -> condFltCode NE  x y
2174
2175       MO_S_Gt F64 -> condFltCode GTT x y
2176       MO_S_Ge F64 -> condFltCode GE  x y
2177       MO_S_Lt F64 -> condFltCode LTT x y
2178       MO_S_Le F64 -> condFltCode LE  x y
2179
2180       MO_Eq rep -> condIntCode EQQ  (extendUExpr rep x) (extendUExpr rep y)
2181       MO_Ne rep -> condIntCode NE   (extendUExpr rep x) (extendUExpr rep y)
2182
2183       MO_S_Gt rep -> condIntCode GTT  (extendSExpr rep x) (extendSExpr rep y)
2184       MO_S_Ge rep -> condIntCode GE   (extendSExpr rep x) (extendSExpr rep y)
2185       MO_S_Lt rep -> condIntCode LTT  (extendSExpr rep x) (extendSExpr rep y)
2186       MO_S_Le rep -> condIntCode LE   (extendSExpr rep x) (extendSExpr rep y)
2187
2188       MO_U_Gt rep -> condIntCode GU   (extendUExpr rep x) (extendUExpr rep y)
2189       MO_U_Ge rep -> condIntCode GEU  (extendUExpr rep x) (extendUExpr rep y)
2190       MO_U_Lt rep -> condIntCode LU   (extendUExpr rep x) (extendUExpr rep y)
2191       MO_U_Le rep -> condIntCode LEU  (extendUExpr rep x) (extendUExpr rep y)
2192
2193       other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
2194
2195 getCondCode other =  panic "getCondCode(2)(powerpc)"
2196
2197
2198 #endif
2199
2200
2201 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
2202 -- passed back up the tree.
2203
2204 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
2205
2206 #if alpha_TARGET_ARCH
2207 condIntCode = panic "MachCode.condIntCode: not on Alphas"
2208 condFltCode = panic "MachCode.condFltCode: not on Alphas"
2209 #endif /* alpha_TARGET_ARCH */
2210
2211 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2212 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2213
2214 -- memory vs immediate
2215 condIntCode cond (CmmLoad x pk) (CmmLit lit) | not (is64BitLit lit) = do
2216     Amode x_addr x_code <- getAmode x
2217     let
2218         imm  = litToImm lit
2219         code = x_code `snocOL`
2220                   CMP pk (OpImm imm) (OpAddr x_addr)
2221     --
2222     return (CondCode False cond code)
2223
2224 -- anything vs zero, using a mask
2225 -- TODO: Add some sanity checking!!!!
2226 condIntCode cond (CmmMachOp (MO_And rep) [x,o2]) (CmmLit (CmmInt 0 pk))
2227     | (CmmLit (CmmInt mask pk2)) <- o2
2228     = do
2229       (x_reg, x_code) <- getSomeReg x
2230       let
2231          code = x_code `snocOL`
2232                 TEST pk (OpImm (ImmInteger mask)) (OpReg x_reg)
2233       --
2234       return (CondCode False cond code)
2235
2236 -- anything vs zero
2237 condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
2238     (x_reg, x_code) <- getSomeReg x
2239     let
2240         code = x_code `snocOL`
2241                   TEST pk (OpReg x_reg) (OpReg x_reg)
2242     --
2243     return (CondCode False cond code)
2244
2245 -- anything vs operand
2246 condIntCode cond x y | isOperand y = do
2247     (x_reg, x_code) <- getNonClobberedReg x
2248     (y_op,  y_code) <- getOperand y
2249     let
2250         code = x_code `appOL` y_code `snocOL`
2251                   CMP (cmmExprRep x) y_op (OpReg x_reg)
2252     -- in
2253     return (CondCode False cond code)
2254
2255 -- anything vs anything
2256 condIntCode cond x y = do
2257   (y_reg, y_code) <- getNonClobberedReg y
2258   (x_op, x_code) <- getRegOrMem x
2259   let
2260         code = y_code `appOL`
2261                x_code `snocOL`
2262                   CMP (cmmExprRep x) (OpReg y_reg) x_op
2263   -- in
2264   return (CondCode False cond code)
2265 #endif
2266
2267 #if i386_TARGET_ARCH
2268 condFltCode cond x y
2269   = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
2270   (x_reg, x_code) <- getNonClobberedReg x
2271   (y_reg, y_code) <- getSomeReg y
2272   let
2273         code = x_code `appOL` y_code `snocOL`
2274                 GCMP cond x_reg y_reg
2275   -- The GCMP insn does the test and sets the zero flag if comparable
2276   -- and true.  Hence we always supply EQQ as the condition to test.
2277   return (CondCode True EQQ code)
2278 #endif /* i386_TARGET_ARCH */
2279
2280 #if x86_64_TARGET_ARCH
2281 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
2282 -- an operand, but the right must be a reg.  We can probably do better
2283 -- than this general case...
2284 condFltCode cond x y = do
2285   (x_reg, x_code) <- getNonClobberedReg x
2286   (y_op, y_code) <- getOperand y
2287   let
2288         code = x_code `appOL`
2289                y_code `snocOL`
2290                   CMP (cmmExprRep x) y_op (OpReg x_reg)
2291         -- NB(1): we need to use the unsigned comparison operators on the
2292         -- result of this comparison.
2293   -- in
2294   return (CondCode True (condToUnsigned cond) code)
2295 #endif
2296
2297 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2298
2299 #if sparc_TARGET_ARCH
2300
2301 condIntCode cond x (CmmLit (CmmInt y rep))
2302   | fits13Bits y
2303   = do
2304        (src1, code) <- getSomeReg x
2305        let
2306            src2 = ImmInt (fromInteger y)
2307            code' = code `snocOL` SUB False True src1 (RIImm src2) g0
2308        return (CondCode False cond code')
2309
2310 condIntCode cond x y = do
2311     (src1, code1) <- getSomeReg x
2312     (src2, code2) <- getSomeReg y
2313     let
2314         code__2 = code1 `appOL` code2 `snocOL`
2315                   SUB False True src1 (RIReg src2) g0
2316     return (CondCode False cond code__2)
2317
2318 -----------
2319 condFltCode cond x y = do
2320     (src1, code1) <- getSomeReg x
2321     (src2, code2) <- getSomeReg y
2322     tmp <- getNewRegNat F64
2323     let
2324         promote x = FxTOy F32 F64 x tmp
2325
2326         pk1   = cmmExprRep x
2327         pk2   = cmmExprRep y
2328
2329         code__2 =
2330                 if pk1 == pk2 then
2331                     code1 `appOL` code2 `snocOL`
2332                     FCMP True pk1 src1 src2
2333                 else if pk1 == F32 then
2334                     code1 `snocOL` promote src1 `appOL` code2 `snocOL`
2335                     FCMP True F64 tmp src2
2336                 else
2337                     code1 `appOL` code2 `snocOL` promote src2 `snocOL`
2338                     FCMP True F64 src1 tmp
2339     return (CondCode True cond code__2)
2340
2341 #endif /* sparc_TARGET_ARCH */
2342
2343 #if powerpc_TARGET_ARCH
2344 --  ###FIXME: I16 and I8!
2345 condIntCode cond x (CmmLit (CmmInt y rep))
2346   | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
2347   = do
2348         (src1, code) <- getSomeReg x
2349         let
2350             code' = code `snocOL`
2351                 (if condUnsigned cond then CMPL else CMP) I32 src1 (RIImm src2)
2352         return (CondCode False cond code')
2353
2354 condIntCode cond x y = do
2355     (src1, code1) <- getSomeReg x
2356     (src2, code2) <- getSomeReg y
2357     let
2358         code' = code1 `appOL` code2 `snocOL`
2359                   (if condUnsigned cond then CMPL else CMP) I32 src1 (RIReg src2)
2360     return (CondCode False cond code')
2361
2362 condFltCode cond x y = do
2363     (src1, code1) <- getSomeReg x
2364     (src2, code2) <- getSomeReg y
2365     let
2366         code'  = code1 `appOL` code2 `snocOL` FCMP src1 src2
2367         code'' = case cond of -- twiddle CR to handle unordered case
2368                     GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
2369                     LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
2370                     _ -> code'
2371                  where
2372                     ltbit = 0 ; eqbit = 2 ; gtbit = 1
2373     return (CondCode True cond code'')
2374
2375 #endif /* powerpc_TARGET_ARCH */
2376
2377 -- -----------------------------------------------------------------------------
2378 -- Generating assignments
2379
2380 -- Assignments are really at the heart of the whole code generation
2381 -- business.  Almost all top-level nodes of any real importance are
2382 -- assignments, which correspond to loads, stores, or register
2383 -- transfers.  If we're really lucky, some of the register transfers
2384 -- will go away, because we can use the destination register to
2385 -- complete the code generation for the right hand side.  This only
2386 -- fails when the right hand side is forced into a fixed register
2387 -- (e.g. the result of a call).
2388
2389 assignMem_IntCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2390 assignReg_IntCode :: MachRep -> CmmReg  -> CmmExpr -> NatM InstrBlock
2391
2392 assignMem_FltCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2393 assignReg_FltCode :: MachRep -> CmmReg  -> CmmExpr -> NatM InstrBlock
2394
2395 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2396
2397 #if alpha_TARGET_ARCH
2398
2399 assignIntCode pk (CmmLoad dst _) src
2400   = getNewRegNat IntRep             `thenNat` \ tmp ->
2401     getAmode dst                    `thenNat` \ amode ->
2402     getRegister src                 `thenNat` \ register ->
2403     let
2404         code1   = amodeCode amode []
2405         dst__2  = amodeAddr amode
2406         code2   = registerCode register tmp []
2407         src__2  = registerName register tmp
2408         sz      = primRepToSize pk
2409         code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2410     in
2411     return code__2
2412
2413 assignIntCode pk dst src
2414   = getRegister dst                         `thenNat` \ register1 ->
2415     getRegister src                         `thenNat` \ register2 ->
2416     let
2417         dst__2  = registerName register1 zeroh
2418         code    = registerCode register2 dst__2
2419         src__2  = registerName register2 dst__2
2420         code__2 = if isFixed register2
2421                   then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2)
2422                   else code
2423     in
2424     return code__2
2425
2426 #endif /* alpha_TARGET_ARCH */
2427
2428 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2429
2430 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2431
2432 -- integer assignment to memory
2433
2434 -- specific case of adding/subtracting an integer to a particular address.
2435 -- ToDo: catch other cases where we can use an operation directly on a memory
2436 -- address.
2437 assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _,
2438                                                  CmmLit (CmmInt i _)])
2439    | addr == addr2, pk /= I64 || not (is64BitInteger i),
2440      Just instr <- check op
2441    = do Amode amode code_addr <- getAmode addr
2442         let code = code_addr `snocOL`
2443                    instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode)
2444         return code
2445    where
2446         check (MO_Add _) = Just ADD
2447         check (MO_Sub _) = Just SUB
2448         check _ = Nothing
2449         -- ToDo: more?
2450
2451 -- general case
2452 assignMem_IntCode pk addr src = do
2453     Amode addr code_addr <- getAmode addr
2454     (code_src, op_src)   <- get_op_RI src
2455     let
2456         code = code_src `appOL`
2457                code_addr `snocOL`
2458                   MOV pk op_src (OpAddr addr)
2459         -- NOTE: op_src is stable, so it will still be valid
2460         -- after code_addr.  This may involve the introduction
2461         -- of an extra MOV to a temporary register, but we hope
2462         -- the register allocator will get rid of it.
2463     --
2464     return code
2465   where
2466     get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand)   -- code, operator
2467     get_op_RI (CmmLit lit) | not (is64BitLit lit)
2468       = return (nilOL, OpImm (litToImm lit))
2469     get_op_RI op
2470       = do (reg,code) <- getNonClobberedReg op
2471            return (code, OpReg reg)
2472
2473
2474 -- Assign; dst is a reg, rhs is mem
2475 assignReg_IntCode pk reg (CmmLoad src _) = do
2476   load_code <- intLoadCode (MOV pk) src
2477   return (load_code (getRegisterReg reg))
2478
2479 -- dst is a reg, but src could be anything
2480 assignReg_IntCode pk reg src = do
2481   code <- getAnyReg src
2482   return (code (getRegisterReg reg))
2483
2484 #endif /* i386_TARGET_ARCH */
2485
2486 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2487
2488 #if sparc_TARGET_ARCH
2489
2490 assignMem_IntCode pk addr src = do
2491     (srcReg, code) <- getSomeReg src
2492     Amode dstAddr addr_code <- getAmode addr
2493     return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2494
2495 assignReg_IntCode pk reg src = do
2496     r <- getRegister src
2497     return $ case r of
2498         Any _ code         -> code dst
2499         Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg dst) freg
2500     where
2501       dst = getRegisterReg reg
2502
2503
2504 #endif /* sparc_TARGET_ARCH */
2505
2506 #if powerpc_TARGET_ARCH
2507
2508 assignMem_IntCode pk addr src = do
2509     (srcReg, code) <- getSomeReg src
2510     Amode dstAddr addr_code <- getAmode addr
2511     return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2512
2513 -- dst is a reg, but src could be anything
2514 assignReg_IntCode pk reg src
2515     = do
2516         r <- getRegister src
2517         return $ case r of
2518             Any _ code         -> code dst
2519             Fixed _ freg fcode -> fcode `snocOL` MR dst freg
2520     where
2521         dst = getRegisterReg reg
2522
2523 #endif /* powerpc_TARGET_ARCH */
2524
2525
2526 -- -----------------------------------------------------------------------------
2527 -- Floating-point assignments
2528
2529 #if alpha_TARGET_ARCH
2530
2531 assignFltCode pk (CmmLoad dst _) src
2532   = getNewRegNat pk                 `thenNat` \ tmp ->
2533     getAmode dst                    `thenNat` \ amode ->
2534     getRegister src                         `thenNat` \ register ->
2535     let
2536         code1   = amodeCode amode []
2537         dst__2  = amodeAddr amode
2538         code2   = registerCode register tmp []
2539         src__2  = registerName register tmp
2540         sz      = primRepToSize pk
2541         code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2542     in
2543     return code__2
2544
2545 assignFltCode pk dst src
2546   = getRegister dst                         `thenNat` \ register1 ->
2547     getRegister src                         `thenNat` \ register2 ->
2548     let
2549         dst__2  = registerName register1 zeroh
2550         code    = registerCode register2 dst__2
2551         src__2  = registerName register2 dst__2
2552         code__2 = if isFixed register2
2553                   then code . mkSeqInstr (FMOV src__2 dst__2)
2554                   else code
2555     in
2556     return code__2
2557
2558 #endif /* alpha_TARGET_ARCH */
2559
2560 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2561
2562 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2563
2564 -- Floating point assignment to memory
2565 assignMem_FltCode pk addr src = do
2566   (src_reg, src_code) <- getNonClobberedReg src
2567   Amode addr addr_code <- getAmode addr
2568   let
2569         code = src_code `appOL`
2570                addr_code `snocOL`
2571                 IF_ARCH_i386(GST pk src_reg addr,
2572                              MOV pk (OpReg src_reg) (OpAddr addr))
2573   return code
2574
2575 -- Floating point assignment to a register/temporary
2576 assignReg_FltCode pk reg src = do
2577   src_code <- getAnyReg src
2578   return (src_code (getRegisterReg reg))
2579
2580 #endif /* i386_TARGET_ARCH */
2581
2582 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2583
2584 #if sparc_TARGET_ARCH
2585
2586 -- Floating point assignment to memory
2587 assignMem_FltCode pk addr src = do
2588     Amode dst__2 code1 <- getAmode addr
2589     (src__2, code2) <- getSomeReg src
2590     tmp1 <- getNewRegNat pk
2591     let
2592         pk__2   = cmmExprRep src
2593         code__2 = code1 `appOL` code2 `appOL`
2594             if   pk == pk__2
2595             then unitOL (ST pk src__2 dst__2)
2596             else toOL [FxTOy pk__2 pk src__2 tmp1, ST pk tmp1 dst__2]
2597     return code__2
2598
2599 -- Floating point assignment to a register/temporary
2600 -- ToDo: Verify correctness
2601 assignReg_FltCode pk reg src = do
2602     r <- getRegister src
2603     v1 <- getNewRegNat pk
2604     return $ case r of
2605         Any _ code         -> code dst
2606         Fixed _ freg fcode -> fcode `snocOL` FMOV pk freg v1
2607     where
2608       dst = getRegisterReg reg
2609
2610 #endif /* sparc_TARGET_ARCH */
2611
2612 #if powerpc_TARGET_ARCH
2613
2614 -- Easy, isn't it?
2615 assignMem_FltCode = assignMem_IntCode
2616 assignReg_FltCode = assignReg_IntCode
2617
2618 #endif /* powerpc_TARGET_ARCH */
2619
2620
2621 -- -----------------------------------------------------------------------------
2622 -- Generating an non-local jump
2623
2624 -- (If applicable) Do not fill the delay slots here; you will confuse the
2625 -- register allocator.
2626
2627 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
2628
2629 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2630
2631 #if alpha_TARGET_ARCH
2632
2633 genJump (CmmLabel lbl)
2634   | isAsmTemp lbl = returnInstr (BR target)
2635   | otherwise     = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0]
2636   where
2637     target = ImmCLbl lbl
2638
2639 genJump tree
2640   = getRegister tree                `thenNat` \ register ->
2641     getNewRegNat PtrRep             `thenNat` \ tmp ->
2642     let
2643         dst    = registerName register pv
2644         code   = registerCode register pv
2645         target = registerName register pv
2646     in
2647     if isFixed register then
2648         returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0]
2649     else
2650     return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0))
2651
2652 #endif /* alpha_TARGET_ARCH */
2653
2654 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2655
2656 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2657
2658 genJump (CmmLoad mem pk) = do
2659   Amode target code <- getAmode mem
2660   return (code `snocOL` JMP (OpAddr target))
2661
2662 genJump (CmmLit lit) = do
2663   return (unitOL (JMP (OpImm (litToImm lit))))
2664
2665 genJump expr = do
2666   (reg,code) <- getSomeReg expr
2667   return (code `snocOL` JMP (OpReg reg))
2668
2669 #endif /* i386_TARGET_ARCH */
2670
2671 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2672
2673 #if sparc_TARGET_ARCH
2674
2675 genJump (CmmLit (CmmLabel lbl))
2676   = return (toOL [CALL (Left target) 0 True, NOP])
2677   where
2678     target = ImmCLbl lbl
2679
2680 genJump tree
2681   = do
2682         (target, code) <- getSomeReg tree
2683         return (code `snocOL` JMP (AddrRegReg target g0)  `snocOL` NOP)
2684
2685 #endif /* sparc_TARGET_ARCH */
2686
2687 #if powerpc_TARGET_ARCH
2688 genJump (CmmLit (CmmLabel lbl))
2689   = return (unitOL $ JMP lbl)
2690
2691 genJump tree
2692   = do
2693         (target,code) <- getSomeReg tree
2694         return (code `snocOL` MTCTR target `snocOL` BCTR [])
2695 #endif /* powerpc_TARGET_ARCH */
2696
2697
2698 -- -----------------------------------------------------------------------------
2699 --  Unconditional branches
2700
2701 genBranch :: BlockId -> NatM InstrBlock
2702
2703 genBranch = return . toOL . mkBranchInstr
2704
2705 -- -----------------------------------------------------------------------------
2706 --  Conditional jumps
2707
2708 {-
2709 Conditional jumps are always to local labels, so we can use branch
2710 instructions.  We peek at the arguments to decide what kind of
2711 comparison to do.
2712
2713 ALPHA: For comparisons with 0, we're laughing, because we can just do
2714 the desired conditional branch.
2715
2716 I386: First, we have to ensure that the condition
2717 codes are set according to the supplied comparison operation.
2718
2719 SPARC: First, we have to ensure that the condition codes are set
2720 according to the supplied comparison operation.  We generate slightly
2721 different code for floating point comparisons, because a floating
2722 point operation cannot directly precede a @BF@.  We assume the worst
2723 and fill that slot with a @NOP@.
2724
2725 SPARC: Do not fill the delay slots here; you will confuse the register
2726 allocator.
2727 -}
2728
2729
2730 genCondJump
2731     :: BlockId      -- the branch target
2732     -> CmmExpr      -- the condition on which to branch
2733     -> NatM InstrBlock
2734
2735 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2736
2737 #if alpha_TARGET_ARCH
2738
2739 genCondJump id (StPrim op [x, StInt 0])
2740   = getRegister x                           `thenNat` \ register ->
2741     getNewRegNat (registerRep register)
2742                                     `thenNat` \ tmp ->
2743     let
2744         code   = registerCode register tmp
2745         value  = registerName register tmp
2746         pk     = registerRep register
2747         target = ImmCLbl lbl
2748     in
2749     returnSeq code [BI (cmpOp op) value target]
2750   where
2751     cmpOp CharGtOp = GTT
2752     cmpOp CharGeOp = GE
2753     cmpOp CharEqOp = EQQ
2754     cmpOp CharNeOp = NE
2755     cmpOp CharLtOp = LTT
2756     cmpOp CharLeOp = LE
2757     cmpOp IntGtOp = GTT
2758     cmpOp IntGeOp = GE
2759     cmpOp IntEqOp = EQQ
2760     cmpOp IntNeOp = NE
2761     cmpOp IntLtOp = LTT
2762     cmpOp IntLeOp = LE
2763     cmpOp WordGtOp = NE
2764     cmpOp WordGeOp = ALWAYS
2765     cmpOp WordEqOp = EQQ
2766     cmpOp WordNeOp = NE
2767     cmpOp WordLtOp = NEVER
2768     cmpOp WordLeOp = EQQ
2769     cmpOp AddrGtOp = NE
2770     cmpOp AddrGeOp = ALWAYS
2771     cmpOp AddrEqOp = EQQ
2772     cmpOp AddrNeOp = NE
2773     cmpOp AddrLtOp = NEVER
2774     cmpOp AddrLeOp = EQQ
2775
2776 genCondJump lbl (StPrim op [x, StDouble 0.0])
2777   = getRegister x                           `thenNat` \ register ->
2778     getNewRegNat (registerRep register)
2779                                     `thenNat` \ tmp ->
2780     let
2781         code   = registerCode register tmp
2782         value  = registerName register tmp
2783         pk     = registerRep register
2784         target = ImmCLbl lbl
2785     in
2786     return (code . mkSeqInstr (BF (cmpOp op) value target))
2787   where
2788     cmpOp FloatGtOp = GTT
2789     cmpOp FloatGeOp = GE
2790     cmpOp FloatEqOp = EQQ
2791     cmpOp FloatNeOp = NE
2792     cmpOp FloatLtOp = LTT
2793     cmpOp FloatLeOp = LE
2794     cmpOp DoubleGtOp = GTT
2795     cmpOp DoubleGeOp = GE
2796     cmpOp DoubleEqOp = EQQ
2797     cmpOp DoubleNeOp = NE
2798     cmpOp DoubleLtOp = LTT
2799     cmpOp DoubleLeOp = LE
2800
2801 genCondJump lbl (StPrim op [x, y])
2802   | fltCmpOp op
2803   = trivialFCode pr instr x y       `thenNat` \ register ->
2804     getNewRegNat F64                `thenNat` \ tmp ->
2805     let
2806         code   = registerCode register tmp
2807         result = registerName register tmp
2808         target = ImmCLbl lbl
2809     in
2810     return (code . mkSeqInstr (BF cond result target))
2811   where
2812     pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
2813
2814     fltCmpOp op = case op of
2815         FloatGtOp -> True
2816         FloatGeOp -> True
2817         FloatEqOp -> True
2818         FloatNeOp -> True
2819         FloatLtOp -> True
2820         FloatLeOp -> True
2821         DoubleGtOp -> True
2822         DoubleGeOp -> True
2823         DoubleEqOp -> True
2824         DoubleNeOp -> True
2825         DoubleLtOp -> True
2826         DoubleLeOp -> True
2827         _ -> False
2828     (instr, cond) = case op of
2829         FloatGtOp -> (FCMP TF LE, EQQ)
2830         FloatGeOp -> (FCMP TF LTT, EQQ)
2831         FloatEqOp -> (FCMP TF EQQ, NE)
2832         FloatNeOp -> (FCMP TF EQQ, EQQ)
2833         FloatLtOp -> (FCMP TF LTT, NE)
2834         FloatLeOp -> (FCMP TF LE, NE)
2835         DoubleGtOp -> (FCMP TF LE, EQQ)
2836         DoubleGeOp -> (FCMP TF LTT, EQQ)
2837         DoubleEqOp -> (FCMP TF EQQ, NE)
2838         DoubleNeOp -> (FCMP TF EQQ, EQQ)
2839         DoubleLtOp -> (FCMP TF LTT, NE)
2840         DoubleLeOp -> (FCMP TF LE, NE)
2841
2842 genCondJump lbl (StPrim op [x, y])
2843   = trivialCode instr x y           `thenNat` \ register ->
2844     getNewRegNat IntRep             `thenNat` \ tmp ->
2845     let
2846         code   = registerCode register tmp
2847         result = registerName register tmp
2848         target = ImmCLbl lbl
2849     in
2850     return (code . mkSeqInstr (BI cond result target))
2851   where
2852     (instr, cond) = case op of
2853         CharGtOp -> (CMP LE, EQQ)
2854         CharGeOp -> (CMP LTT, EQQ)
2855         CharEqOp -> (CMP EQQ, NE)
2856         CharNeOp -> (CMP EQQ, EQQ)
2857         CharLtOp -> (CMP LTT, NE)
2858         CharLeOp -> (CMP LE, NE)
2859         IntGtOp -> (CMP LE, EQQ)
2860         IntGeOp -> (CMP LTT, EQQ)
2861         IntEqOp -> (CMP EQQ, NE)
2862         IntNeOp -> (CMP EQQ, EQQ)
2863         IntLtOp -> (CMP LTT, NE)
2864         IntLeOp -> (CMP LE, NE)
2865         WordGtOp -> (CMP ULE, EQQ)
2866         WordGeOp -> (CMP ULT, EQQ)
2867         WordEqOp -> (CMP EQQ, NE)
2868         WordNeOp -> (CMP EQQ, EQQ)
2869         WordLtOp -> (CMP ULT, NE)
2870         WordLeOp -> (CMP ULE, NE)
2871         AddrGtOp -> (CMP ULE, EQQ)
2872         AddrGeOp -> (CMP ULT, EQQ)
2873         AddrEqOp -> (CMP EQQ, NE)
2874         AddrNeOp -> (CMP EQQ, EQQ)
2875         AddrLtOp -> (CMP ULT, NE)
2876         AddrLeOp -> (CMP ULE, NE)
2877
2878 #endif /* alpha_TARGET_ARCH */
2879
2880 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2881
2882 #if i386_TARGET_ARCH
2883
2884 genCondJump id bool = do
2885   CondCode _ cond code <- getCondCode bool
2886   return (code `snocOL` JXX cond id)
2887
2888 #endif
2889
2890 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2891
2892 #if x86_64_TARGET_ARCH
2893
2894 genCondJump id bool = do
2895   CondCode is_float cond cond_code <- getCondCode bool
2896   if not is_float
2897     then
2898         return (cond_code `snocOL` JXX cond id)
2899     else do
2900         lbl <- getBlockIdNat
2901
2902         -- see comment with condFltReg
2903         let code = case cond of
2904                         NE  -> or_unordered
2905                         GU  -> plain_test
2906                         GEU -> plain_test
2907                         _   -> and_ordered
2908
2909             plain_test = unitOL (
2910                   JXX cond id
2911                 )
2912             or_unordered = toOL [
2913                   JXX cond id,
2914                   JXX PARITY id
2915                 ]
2916             and_ordered = toOL [
2917                   JXX PARITY lbl,
2918                   JXX cond id,
2919                   JXX ALWAYS lbl,
2920                   NEWBLOCK lbl
2921                 ]
2922         return (cond_code `appOL` code)
2923
2924 #endif
2925
2926 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2927
2928 #if sparc_TARGET_ARCH
2929
2930 genCondJump (BlockId id) bool = do
2931   CondCode is_float cond code <- getCondCode bool
2932   return (
2933        code `appOL`
2934        toOL (
2935          if   is_float
2936          then [NOP, BF cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2937          else [BI cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2938        )
2939     )
2940
2941 #endif /* sparc_TARGET_ARCH */
2942
2943
2944 #if powerpc_TARGET_ARCH
2945
2946 genCondJump id bool = do
2947   CondCode is_float cond code <- getCondCode bool
2948   return (code `snocOL` BCC cond id)
2949
2950 #endif /* powerpc_TARGET_ARCH */
2951
2952
2953 -- -----------------------------------------------------------------------------
2954 --  Generating C calls
2955
2956 -- Now the biggest nightmare---calls.  Most of the nastiness is buried in
2957 -- @get_arg@, which moves the arguments to the correct registers/stack
2958 -- locations.  Apart from that, the code is easy.
2959 --
2960 -- (If applicable) Do not fill the delay slots here; you will confuse the
2961 -- register allocator.
2962
2963 genCCall
2964     :: CmmCallTarget            -- function to call
2965     -> CmmHintFormals           -- where to put the result
2966     -> CmmActuals               -- arguments (of mixed type)
2967     -> NatM InstrBlock
2968
2969 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2970
2971 #if alpha_TARGET_ARCH
2972
2973 ccallResultRegs =
2974
2975 genCCall fn cconv result_regs args
2976   = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args
2977                           `thenNat` \ ((unused,_), argCode) ->
2978     let
2979         nRegs = length allArgRegs - length unused
2980         code = asmSeqThen (map ($ []) argCode)
2981     in
2982         returnSeq code [
2983             LDA pv (AddrImm (ImmLab (ptext fn))),
2984             JSR ra (AddrReg pv) nRegs,
2985             LDGP gp (AddrReg ra)]
2986   where
2987     ------------------------
2988     {-  Try to get a value into a specific register (or registers) for
2989         a call.  The first 6 arguments go into the appropriate
2990         argument register (separate registers for integer and floating
2991         point arguments, but used in lock-step), and the remaining
2992         arguments are dumped to the stack, beginning at 0(sp).  Our
2993         first argument is a pair of the list of remaining argument
2994         registers to be assigned for this call and the next stack
2995         offset to use for overflowing arguments.  This way,
2996         @get_Arg@ can be applied to all of a call's arguments using
2997         @mapAccumLNat@.
2998     -}
2999     get_arg
3000         :: ([(Reg,Reg)], Int)   -- Argument registers and stack offset (accumulator)
3001         -> StixTree             -- Current argument
3002         -> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code
3003
3004     -- We have to use up all of our argument registers first...
3005
3006     get_arg ((iDst,fDst):dsts, offset) arg
3007       = getRegister arg                     `thenNat` \ register ->
3008         let
3009             reg  = if isFloatingRep pk then fDst else iDst
3010             code = registerCode register reg
3011             src  = registerName register reg
3012             pk   = registerRep register
3013         in
3014         return (
3015             if isFloatingRep pk then
3016                 ((dsts, offset), if isFixed register then
3017                     code . mkSeqInstr (FMOV src fDst)
3018                     else code)
3019             else
3020                 ((dsts, offset), if isFixed register then
3021                     code . mkSeqInstr (OR src (RIReg src) iDst)
3022                     else code))
3023
3024     -- Once we have run out of argument registers, we move to the
3025     -- stack...
3026
3027     get_arg ([], offset) arg
3028       = getRegister arg                 `thenNat` \ register ->
3029         getNewRegNat (registerRep register)
3030                                         `thenNat` \ tmp ->
3031         let
3032             code = registerCode register tmp
3033             src  = registerName register tmp
3034             pk   = registerRep register
3035             sz   = primRepToSize pk
3036         in
3037         return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset)))
3038
3039 #endif /* alpha_TARGET_ARCH */
3040
3041 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3042
3043 #if i386_TARGET_ARCH
3044
3045 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
3046         -- write barrier compiles to no code on x86/x86-64;
3047         -- we keep it this long in order to prevent earlier optimisations.
3048
3049 -- we only cope with a single result for foreign calls
3050 genCCall (CmmPrim op) [(r,_)] args = do
3051   case op of
3052         MO_F32_Sqrt -> actuallyInlineFloatOp F32  (GSQRT F32) args
3053         MO_F64_Sqrt -> actuallyInlineFloatOp F64 (GSQRT F64) args
3054
3055         MO_F32_Sin  -> actuallyInlineFloatOp F32  (GSIN F32) args
3056         MO_F64_Sin  -> actuallyInlineFloatOp F64 (GSIN F64) args
3057
3058         MO_F32_Cos  -> actuallyInlineFloatOp F32  (GCOS F32) args
3059         MO_F64_Cos  -> actuallyInlineFloatOp F64 (GCOS F64) args
3060
3061         MO_F32_Tan  -> actuallyInlineFloatOp F32  (GTAN F32) args
3062         MO_F64_Tan  -> actuallyInlineFloatOp F64 (GTAN F64) args
3063
3064         other_op    -> outOfLineFloatOp op r args
3065  where
3066   actuallyInlineFloatOp rep instr [(x,_)]
3067         = do res <- trivialUFCode rep instr x
3068              any <- anyReg res
3069              return (any (getRegisterReg (CmmLocal r)))
3070
3071 genCCall target dest_regs args = do
3072     let
3073         sizes               = map (arg_size . cmmExprRep . fst) (reverse args)
3074 #if !darwin_TARGET_OS
3075         tot_arg_size        = sum sizes
3076 #else
3077         raw_arg_size        = sum sizes
3078         tot_arg_size        = roundTo 16 raw_arg_size
3079         arg_pad_size        = tot_arg_size - raw_arg_size
3080     delta0 <- getDeltaNat
3081     setDeltaNat (delta0 - arg_pad_size)
3082 #endif
3083
3084     push_codes <- mapM push_arg (reverse args)
3085     delta <- getDeltaNat
3086
3087     -- in
3088     -- deal with static vs dynamic call targets
3089     (callinsns,cconv) <-
3090       case target of
3091         -- CmmPrim -> ...
3092         CmmCallee (CmmLit (CmmLabel lbl)) conv
3093            -> -- ToDo: stdcall arg sizes
3094               return (unitOL (CALL (Left fn_imm) []), conv)
3095            where fn_imm = ImmCLbl lbl
3096         CmmCallee expr conv
3097            -> do (dyn_c, dyn_r, dyn_rep) <- get_op expr
3098                  ASSERT(dyn_rep == I32)
3099                   return (dyn_c `snocOL` CALL (Right dyn_r) [], conv)
3100
3101     let push_code
3102 #if darwin_TARGET_OS
3103             | arg_pad_size /= 0
3104             = toOL [SUB I32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
3105                     DELTA (delta0 - arg_pad_size)]
3106               `appOL` concatOL push_codes
3107             | otherwise
3108 #endif
3109             = concatOL push_codes
3110         call = callinsns `appOL`
3111                toOL (
3112                         -- Deallocate parameters after call for ccall;
3113                         -- but not for stdcall (callee does it)
3114                   (if cconv == StdCallConv || tot_arg_size==0 then [] else
3115                    [ADD I32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
3116                   ++
3117                   [DELTA (delta + tot_arg_size)]
3118                )
3119     -- in
3120     setDeltaNat (delta + tot_arg_size)
3121
3122     let
3123         -- assign the results, if necessary
3124         assign_code []     = nilOL
3125         assign_code [(dest,_hint)] =
3126           case rep of
3127                 I64 -> toOL [MOV I32 (OpReg eax) (OpReg r_dest),
3128                              MOV I32 (OpReg edx) (OpReg r_dest_hi)]
3129                 F32 -> unitOL (GMOV fake0 r_dest)
3130                 F64 -> unitOL (GMOV fake0 r_dest)
3131                 rep -> unitOL (MOV rep (OpReg eax) (OpReg r_dest))
3132           where
3133                 r_dest_hi = getHiVRegFromLo r_dest
3134                 rep = localRegRep dest
3135                 r_dest = getRegisterReg (CmmLocal dest)
3136         assign_code many = panic "genCCall.assign_code many"
3137
3138     return (push_code `appOL`
3139             call `appOL`
3140             assign_code dest_regs)
3141
3142   where
3143     arg_size F64 = 8
3144     arg_size F32 = 4
3145     arg_size I64 = 8
3146     arg_size _   = 4
3147
3148     roundTo a x | x `mod` a == 0 = x
3149                 | otherwise = x + a - (x `mod` a)
3150
3151
3152     push_arg :: (CmmExpr,MachHint){-current argument-}
3153                     -> NatM InstrBlock  -- code
3154
3155     push_arg (arg,_hint) -- we don't need the hints on x86
3156       | arg_rep == I64 = do
3157         ChildCode64 code r_lo <- iselExpr64 arg
3158         delta <- getDeltaNat
3159         setDeltaNat (delta - 8)
3160         let
3161             r_hi = getHiVRegFromLo r_lo
3162         -- in
3163         return (       code `appOL`
3164                        toOL [PUSH I32 (OpReg r_hi), DELTA (delta - 4),
3165                              PUSH I32 (OpReg r_lo), DELTA (delta - 8),
3166                              DELTA (delta-8)]
3167             )
3168
3169       | otherwise = do
3170         (code, reg, sz) <- get_op arg
3171         delta <- getDeltaNat
3172         let size = arg_size sz
3173         setDeltaNat (delta-size)
3174         if (case sz of F64 -> True; F32 -> True; _ -> False)
3175            then return (code `appOL`
3176                         toOL [SUB I32 (OpImm (ImmInt size)) (OpReg esp),
3177                               DELTA (delta-size),
3178                               GST sz reg (AddrBaseIndex (EABaseReg esp)
3179                                                         EAIndexNone
3180                                                         (ImmInt 0))]
3181                        )
3182            else return (code `snocOL`
3183                         PUSH I32 (OpReg reg) `snocOL`
3184                         DELTA (delta-size)
3185                        )
3186       where
3187          arg_rep = cmmExprRep arg
3188
3189     ------------
3190     get_op :: CmmExpr -> NatM (InstrBlock, Reg, MachRep) -- code, reg, size
3191     get_op op = do
3192         (reg,code) <- getSomeReg op
3193         return (code, reg, cmmExprRep op)
3194
3195 #endif /* i386_TARGET_ARCH */
3196
3197 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3198
3199 outOfLineFloatOp :: CallishMachOp -> CmmFormal -> CmmActuals
3200   -> NatM InstrBlock
3201 outOfLineFloatOp mop res args
3202   = do
3203       dflags <- getDynFlagsNat
3204       targetExpr <- cmmMakeDynamicReference dflags addImportNat CallReference lbl
3205       let target = CmmCallee targetExpr CCallConv
3206
3207       if localRegRep res == F64
3208         then
3209           stmtToInstrs (CmmCall target [(res,FloatHint)] args CmmUnsafe CmmMayReturn)
3210         else do
3211           uq <- getUniqueNat
3212           let
3213             tmp = LocalReg uq F64 KindNonPtr
3214           -- in
3215           code1 <- stmtToInstrs (CmmCall target [(tmp,FloatHint)] args CmmUnsafe CmmMayReturn)
3216           code2 <- stmtToInstrs (CmmAssign (CmmLocal res) (CmmReg (CmmLocal tmp)))
3217           return (code1 `appOL` code2)
3218   where
3219         lbl = mkForeignLabel fn Nothing False
3220
3221         fn = case mop of
3222               MO_F32_Sqrt  -> FSLIT("sqrtf")
3223               MO_F32_Sin   -> FSLIT("sinf")
3224               MO_F32_Cos   -> FSLIT("cosf")
3225               MO_F32_Tan   -> FSLIT("tanf")
3226               MO_F32_Exp   -> FSLIT("expf")
3227               MO_F32_Log   -> FSLIT("logf")
3228
3229               MO_F32_Asin  -> FSLIT("asinf")
3230               MO_F32_Acos  -> FSLIT("acosf")
3231               MO_F32_Atan  -> FSLIT("atanf")
3232
3233               MO_F32_Sinh  -> FSLIT("sinhf")
3234               MO_F32_Cosh  -> FSLIT("coshf")
3235               MO_F32_Tanh  -> FSLIT("tanhf")
3236               MO_F32_Pwr   -> FSLIT("powf")
3237
3238               MO_F64_Sqrt  -> FSLIT("sqrt")
3239               MO_F64_Sin   -> FSLIT("sin")
3240               MO_F64_Cos   -> FSLIT("cos")
3241               MO_F64_Tan   -> FSLIT("tan")
3242               MO_F64_Exp   -> FSLIT("exp")
3243               MO_F64_Log   -> FSLIT("log")
3244
3245               MO_F64_Asin  -> FSLIT("asin")
3246               MO_F64_Acos  -> FSLIT("acos")
3247               MO_F64_Atan  -> FSLIT("atan")
3248
3249               MO_F64_Sinh  -> FSLIT("sinh")
3250               MO_F64_Cosh  -> FSLIT("cosh")
3251               MO_F64_Tanh  -> FSLIT("tanh")
3252               MO_F64_Pwr   -> FSLIT("pow")
3253
3254 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
3255
3256 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3257
3258 #if x86_64_TARGET_ARCH
3259
3260 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
3261         -- write barrier compiles to no code on x86/x86-64;
3262         -- we keep it this long in order to prevent earlier optimisations.
3263
3264 genCCall (CmmPrim op) [(r,_)] args =
3265   outOfLineFloatOp op r args
3266
3267 genCCall target dest_regs args = do
3268
3269         -- load up the register arguments
3270     (stack_args, aregs, fregs, load_args_code)
3271          <- load_args args allArgRegs allFPArgRegs nilOL
3272
3273     let
3274         fp_regs_used  = reverse (drop (length fregs) (reverse allFPArgRegs))
3275         int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
3276         arg_regs = [eax] ++ int_regs_used ++ fp_regs_used
3277                 -- for annotating the call instruction with
3278
3279         sse_regs = length fp_regs_used
3280
3281         tot_arg_size = arg_size * length stack_args
3282
3283         -- On entry to the called function, %rsp should be aligned
3284         -- on a 16-byte boundary +8 (i.e. the first stack arg after
3285         -- the return address is 16-byte aligned).  In STG land
3286         -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
3287         -- need to make sure we push a multiple of 16-bytes of args,
3288         -- plus the return address, to get the correct alignment.
3289         -- Urg, this is hard.  We need to feed the delta back into
3290         -- the arg pushing code.
3291     (real_size, adjust_rsp) <-
3292         if tot_arg_size `rem` 16 == 0
3293             then return (tot_arg_size, nilOL)
3294             else do -- we need to adjust...
3295                 delta <- getDeltaNat
3296                 setDeltaNat (delta-8)
3297                 return (tot_arg_size+8, toOL [
3298                                 SUB I64 (OpImm (ImmInt 8)) (OpReg rsp),
3299                                 DELTA (delta-8)
3300                         ])
3301
3302         -- push the stack args, right to left
3303     push_code <- push_args (reverse stack_args) nilOL
3304     delta <- getDeltaNat
3305
3306     -- deal with static vs dynamic call targets
3307     (callinsns,cconv) <-
3308       case target of
3309         -- CmmPrim -> ...
3310         CmmCallee (CmmLit (CmmLabel lbl)) conv
3311            -> -- ToDo: stdcall arg sizes
3312               return (unitOL (CALL (Left fn_imm) arg_regs), conv)
3313            where fn_imm = ImmCLbl lbl
3314         CmmCallee expr conv
3315            -> do (dyn_r, dyn_c) <- getSomeReg expr
3316                  return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
3317
3318     let
3319         -- The x86_64 ABI requires us to set %al to the number of SSE
3320         -- registers that contain arguments, if the called routine
3321         -- is a varargs function.  We don't know whether it's a
3322         -- varargs function or not, so we have to assume it is.
3323         --
3324         -- It's not safe to omit this assignment, even if the number
3325         -- of SSE regs in use is zero.  If %al is larger than 8
3326         -- on entry to a varargs function, seg faults ensue.
3327         assign_eax n = unitOL (MOV I32 (OpImm (ImmInt n)) (OpReg eax))
3328
3329     let call = callinsns `appOL`
3330                toOL (
3331                         -- Deallocate parameters after call for ccall;
3332                         -- but not for stdcall (callee does it)
3333                   (if cconv == StdCallConv || real_size==0 then [] else
3334                    [ADD wordRep (OpImm (ImmInt real_size)) (OpReg esp)])
3335                   ++
3336                   [DELTA (delta + real_size)]
3337                )
3338     -- in
3339     setDeltaNat (delta + real_size)
3340
3341     let
3342         -- assign the results, if necessary
3343         assign_code []     = nilOL
3344         assign_code [(dest,_hint)] =
3345           case rep of
3346                 F32 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3347                 F64 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3348                 rep -> unitOL (MOV rep (OpReg rax) (OpReg r_dest))
3349           where
3350                 rep = localRegRep dest
3351                 r_dest = getRegisterReg (CmmLocal dest)
3352         assign_code many = panic "genCCall.assign_code many"
3353
3354     return (load_args_code      `appOL`
3355             adjust_rsp          `appOL`
3356             push_code           `appOL`
3357             assign_eax sse_regs `appOL`
3358             call                `appOL`
3359             assign_code dest_regs)
3360
3361   where
3362     arg_size = 8 -- always, at the mo
3363
3364     load_args :: [(CmmExpr,MachHint)]
3365               -> [Reg]                  -- int regs avail for args
3366               -> [Reg]                  -- FP regs avail for args
3367               -> InstrBlock
3368               -> NatM ([(CmmExpr,MachHint)],[Reg],[Reg],InstrBlock)
3369     load_args args [] [] code     =  return (args, [], [], code)
3370         -- no more regs to use
3371     load_args [] aregs fregs code =  return ([], aregs, fregs, code)
3372         -- no more args to push
3373     load_args ((arg,hint) : rest) aregs fregs code
3374         | isFloatingRep arg_rep =
3375         case fregs of
3376           [] -> push_this_arg
3377           (r:rs) -> do
3378              arg_code <- getAnyReg arg
3379              load_args rest aregs rs (code `appOL` arg_code r)
3380         | otherwise =
3381         case aregs of
3382           [] -> push_this_arg
3383           (r:rs) -> do
3384              arg_code <- getAnyReg arg
3385              load_args rest rs fregs (code `appOL` arg_code r)
3386         where
3387           arg_rep = cmmExprRep arg
3388
3389           push_this_arg = do
3390             (args',ars,frs,code') <- load_args rest aregs fregs code
3391             return ((arg,hint):args', ars, frs, code')
3392
3393     push_args [] code = return code
3394     push_args ((arg,hint):rest) code
3395        | isFloatingRep arg_rep = do
3396          (arg_reg, arg_code) <- getSomeReg arg
3397          delta <- getDeltaNat
3398          setDeltaNat (delta-arg_size)
3399          let code' = code `appOL` arg_code `appOL` toOL [
3400                         SUB wordRep (OpImm (ImmInt arg_size)) (OpReg rsp) ,
3401                         DELTA (delta-arg_size),
3402                         MOV arg_rep (OpReg arg_reg) (OpAddr  (spRel 0))]
3403          push_args rest code'
3404
3405        | otherwise = do
3406        -- we only ever generate word-sized function arguments.  Promotion
3407        -- has already happened: our Int8# type is kept sign-extended
3408        -- in an Int#, for example.
3409          ASSERT(arg_rep == I64) return ()
3410          (arg_op, arg_code) <- getOperand arg
3411          delta <- getDeltaNat
3412          setDeltaNat (delta-arg_size)
3413          let code' = code `appOL` toOL [PUSH I64 arg_op,
3414                                         DELTA (delta-arg_size)]
3415          push_args rest code'
3416         where
3417           arg_rep = cmmExprRep arg
3418 #endif
3419
3420 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3421
3422 #if sparc_TARGET_ARCH
3423 {-
3424    The SPARC calling convention is an absolute
3425    nightmare.  The first 6x32 bits of arguments are mapped into
3426    %o0 through %o5, and the remaining arguments are dumped to the
3427    stack, beginning at [%sp+92].  (Note that %o6 == %sp.)
3428
3429    If we have to put args on the stack, move %o6==%sp down by
3430    the number of words to go on the stack, to ensure there's enough space.
3431
3432    According to Fraser and Hanson's lcc book, page 478, fig 17.2,
3433    16 words above the stack pointer is a word for the address of
3434    a structure return value.  I use this as a temporary location
3435    for moving values from float to int regs.  Certainly it isn't
3436    safe to put anything in the 16 words starting at %sp, since
3437    this area can get trashed at any time due to window overflows
3438    caused by signal handlers.
3439
3440    A final complication (if the above isn't enough) is that
3441    we can't blithely calculate the arguments one by one into
3442    %o0 .. %o5.  Consider the following nested calls:
3443
3444        fff a (fff b c)
3445
3446    Naive code moves a into %o0, and (fff b c) into %o1.  Unfortunately
3447    the inner call will itself use %o0, which trashes the value put there
3448    in preparation for the outer call.  Upshot: we need to calculate the
3449    args into temporary regs, and move those to arg regs or onto the
3450    stack only immediately prior to the call proper.  Sigh.
3451 -}
3452
3453 genCCall target dest_regs argsAndHints = do
3454     let
3455         args = map fst argsAndHints
3456     argcode_and_vregs <- mapM arg_to_int_vregs args
3457     let
3458         (argcodes, vregss) = unzip argcode_and_vregs
3459         n_argRegs          = length allArgRegs
3460         n_argRegs_used     = min (length vregs) n_argRegs
3461         vregs              = concat vregss
3462     -- deal with static vs dynamic call targets
3463     callinsns <- (case target of
3464         CmmCallee (CmmLit (CmmLabel lbl)) conv -> do
3465                 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3466         CmmCallee expr conv -> do
3467                 (dyn_c, [dyn_r]) <- arg_to_int_vregs expr
3468                 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3469         CmmPrim mop -> do
3470                   (res, reduce) <- outOfLineFloatOp mop
3471                   lblOrMopExpr <- case res of
3472                        Left lbl -> do
3473                             return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3474                        Right mopExpr -> do
3475                             (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
3476                             return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3477                   if reduce then panic "genCCall(sparc): can not reduce" else return lblOrMopExpr
3478
3479       )
3480     let
3481         argcode = concatOL argcodes
3482         (move_sp_down, move_sp_up)
3483            = let diff = length vregs - n_argRegs
3484                  nn   = if odd diff then diff + 1 else diff -- keep 8-byte alignment
3485              in  if   nn <= 0
3486                  then (nilOL, nilOL)
3487                  else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
3488         transfer_code
3489            = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
3490     return (argcode       `appOL`
3491             move_sp_down  `appOL`
3492             transfer_code `appOL`
3493             callinsns     `appOL`
3494             unitOL NOP    `appOL`
3495             move_sp_up)
3496   where
3497      -- move args from the integer vregs into which they have been
3498      -- marshalled, into %o0 .. %o5, and the rest onto the stack.
3499      move_final :: [Reg] -> [Reg] -> Int -> [Instr]
3500
3501      move_final [] _ offset          -- all args done
3502         = []
3503
3504      move_final (v:vs) [] offset     -- out of aregs; move to stack
3505         = ST I32 v (spRel offset)
3506           : move_final vs [] (offset+1)
3507
3508      move_final (v:vs) (a:az) offset -- move into an arg (%o[0..5]) reg
3509         = OR False g0 (RIReg v) a
3510           : move_final vs az offset
3511
3512      -- generate code to calculate an argument, and move it into one
3513      -- or two integer vregs.
3514      arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
3515      arg_to_int_vregs arg
3516         | (cmmExprRep arg) == I64
3517         = do
3518           (ChildCode64 code r_lo) <- iselExpr64 arg
3519           let
3520               r_hi = getHiVRegFromLo r_lo
3521           return (code, [r_hi, r_lo])
3522         | otherwise
3523         = do
3524           (src, code) <- getSomeReg arg
3525           tmp <- getNewRegNat (cmmExprRep arg)
3526           let
3527               pk   = cmmExprRep arg
3528           case pk of
3529              F64 -> do
3530                       v1 <- getNewRegNat I32
3531                       v2 <- getNewRegNat I32
3532                       return (
3533                         code                          `snocOL`
3534                         FMOV F64 src f0                `snocOL`
3535                         ST   F32  f0 (spRel 16)         `snocOL`
3536                         LD   I32  (spRel 16) v1         `snocOL`
3537                         ST   F32  (fPair f0) (spRel 16) `snocOL`
3538                         LD   I32  (spRel 16) v2
3539                         ,
3540                         [v1,v2]
3541                        )
3542              F32 -> do
3543                       v1 <- getNewRegNat I32
3544                       return (
3545                         code                    `snocOL`
3546                         ST   F32  src (spRel 16)  `snocOL`
3547                         LD   I32  (spRel 16) v1
3548                         ,
3549                         [v1]
3550                        )
3551              other -> do
3552                         v1 <- getNewRegNat I32
3553                         return (
3554                           code `snocOL` OR False g0 (RIReg src) v1
3555                           ,
3556                           [v1]
3557                          )
3558 outOfLineFloatOp mop =
3559     do
3560       dflags <- getDynFlagsNat
3561       mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference $
3562                   mkForeignLabel functionName Nothing True
3563       let mopLabelOrExpr = case mopExpr of
3564                         CmmLit (CmmLabel lbl) -> Left lbl
3565                         _ -> Right mopExpr
3566       return (mopLabelOrExpr, reduce)
3567             where
3568                 (reduce, functionName) = case mop of
3569                   MO_F32_Exp    -> (True,  FSLIT("exp"))
3570                   MO_F32_Log    -> (True,  FSLIT("log"))
3571                   MO_F32_Sqrt   -> (True,  FSLIT("sqrt"))
3572
3573                   MO_F32_Sin    -> (True,  FSLIT("sin"))
3574                   MO_F32_Cos    -> (True,  FSLIT("cos"))
3575                   MO_F32_Tan    -> (True,  FSLIT("tan"))
3576
3577                   MO_F32_Asin   -> (True,  FSLIT("asin"))
3578                   MO_F32_Acos   -> (True,  FSLIT("acos"))
3579                   MO_F32_Atan   -> (True,  FSLIT("atan"))
3580
3581                   MO_F32_Sinh   -> (True,  FSLIT("sinh"))
3582                   MO_F32_Cosh   -> (True,  FSLIT("cosh"))
3583                   MO_F32_Tanh   -> (True,  FSLIT("tanh"))
3584
3585                   MO_F64_Exp    -> (False, FSLIT("exp"))
3586                   MO_F64_Log    -> (False, FSLIT("log"))
3587                   MO_F64_Sqrt   -> (False, FSLIT("sqrt"))
3588
3589                   MO_F64_Sin    -> (False, FSLIT("sin"))
3590                   MO_F64_Cos    -> (False, FSLIT("cos"))
3591                   MO_F64_Tan    -> (False, FSLIT("tan"))
3592
3593                   MO_F64_Asin   -> (False, FSLIT("asin"))
3594                   MO_F64_Acos   -> (False, FSLIT("acos"))
3595                   MO_F64_Atan   -> (False, FSLIT("atan"))
3596
3597                   MO_F64_Sinh   -> (False, FSLIT("sinh"))
3598                   MO_F64_Cosh   -> (False, FSLIT("cosh"))
3599                   MO_F64_Tanh   -> (False, FSLIT("tanh"))
3600
3601                   other -> pprPanic "outOfLineFloatOp(sparc) "
3602                                 (pprCallishMachOp mop)
3603
3604 #endif /* sparc_TARGET_ARCH */
3605
3606 #if powerpc_TARGET_ARCH
3607
3608 #if darwin_TARGET_OS || linux_TARGET_OS
3609 {-
3610     The PowerPC calling convention for Darwin/Mac OS X
3611     is described in Apple's document
3612     "Inside Mac OS X - Mach-O Runtime Architecture".
3613
3614     PowerPC Linux uses the System V Release 4 Calling Convention
3615     for PowerPC. It is described in the
3616     "System V Application Binary Interface PowerPC Processor Supplement".
3617
3618     Both conventions are similar:
3619     Parameters may be passed in general-purpose registers starting at r3, in
3620     floating point registers starting at f1, or on the stack.
3621
3622     But there are substantial differences:
3623     * The number of registers used for parameter passing and the exact set of
3624       nonvolatile registers differs (see MachRegs.lhs).
3625     * On Darwin, stack space is always reserved for parameters, even if they are
3626       passed in registers. The called routine may choose to save parameters from
3627       registers to the corresponding space on the stack.
3628     * On Darwin, a corresponding amount of GPRs is skipped when a floating point
3629       parameter is passed in an FPR.
3630     * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
3631       starting with an odd-numbered GPR. It may skip a GPR to achieve this.
3632       Darwin just treats an I64 like two separate I32s (high word first).
3633     * I64 and F64 arguments are 8-byte aligned on the stack for SysV, but only
3634       4-byte aligned like everything else on Darwin.
3635     * The SysV spec claims that F32 is represented as F64 on the stack. GCC on
3636       PowerPC Linux does not agree, so neither do we.
3637
3638     According to both conventions, The parameter area should be part of the
3639     caller's stack frame, allocated in the caller's prologue code (large enough
3640     to hold the parameter lists for all called routines). The NCG already
3641     uses the stack for register spilling, leaving 64 bytes free at the top.
3642     If we need a larger parameter area than that, we just allocate a new stack
3643     frame just before ccalling.
3644 -}
3645
3646
3647 genCCall (CmmPrim MO_WriteBarrier) _ _
3648  = return $ unitOL LWSYNC
3649
3650 genCCall target dest_regs argsAndHints
3651   = ASSERT (not $ any (`elem` [I8,I16]) argReps)
3652         -- we rely on argument promotion in the codeGen
3653     do
3654         (finalStack,passArgumentsCode,usedRegs) <- passArguments
3655                                                         (zip args argReps)
3656                                                         allArgRegs allFPArgRegs
3657                                                         initialStackOffset
3658                                                         (toOL []) []
3659
3660         (labelOrExpr, reduceToF32) <- case target of
3661             CmmCallee (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
3662             CmmCallee expr conv -> return  (Right expr, False)
3663             CmmPrim mop -> outOfLineFloatOp mop
3664
3665         let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
3666             codeAfter = move_sp_up finalStack `appOL` moveResult reduceToF32
3667
3668         case labelOrExpr of
3669             Left lbl -> do
3670                 return (         codeBefore
3671                         `snocOL` BL lbl usedRegs
3672                         `appOL`  codeAfter)
3673             Right dyn -> do
3674                 (dynReg, dynCode) <- getSomeReg dyn
3675                 return (         dynCode
3676                         `snocOL` MTCTR dynReg
3677                         `appOL`  codeBefore
3678                         `snocOL` BCTRL usedRegs
3679                         `appOL`  codeAfter)
3680     where
3681 #if darwin_TARGET_OS
3682         initialStackOffset = 24
3683             -- size of linkage area + size of arguments, in bytes
3684         stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
3685                                        map machRepByteWidth argReps
3686 #elif linux_TARGET_OS
3687         initialStackOffset = 8
3688         stackDelta finalStack = roundTo 16 finalStack
3689 #endif
3690         args = map fst argsAndHints
3691         argReps = map cmmExprRep args
3692
3693         roundTo a x | x `mod` a == 0 = x
3694                     | otherwise = x + a - (x `mod` a)
3695
3696         move_sp_down finalStack
3697                | delta > 64 =
3698                         toOL [STU I32 sp (AddrRegImm sp (ImmInt (-delta))),
3699                               DELTA (-delta)]
3700                | otherwise = nilOL
3701                where delta = stackDelta finalStack
3702         move_sp_up finalStack
3703                | delta > 64 =
3704                         toOL [ADD sp sp (RIImm (ImmInt delta)),
3705                               DELTA 0]
3706                | otherwise = nilOL
3707                where delta = stackDelta finalStack
3708
3709
3710         passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
3711         passArguments ((arg,I64):args) gprs fprs stackOffset
3712                accumCode accumUsed =
3713             do
3714                 ChildCode64 code vr_lo <- iselExpr64 arg
3715                 let vr_hi = getHiVRegFromLo vr_lo
3716
3717 #if darwin_TARGET_OS
3718                 passArguments args
3719                               (drop 2 gprs)
3720                               fprs
3721                               (stackOffset+8)
3722                               (accumCode `appOL` code
3723                                     `snocOL` storeWord vr_hi gprs stackOffset
3724                                     `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
3725                               ((take 2 gprs) ++ accumUsed)
3726             where
3727                 storeWord vr (gpr:_) offset = MR gpr vr
3728                 storeWord vr [] offset = ST I32 vr (AddrRegImm sp (ImmInt offset))
3729
3730 #elif linux_TARGET_OS
3731                 let stackOffset' = roundTo 8 stackOffset
3732                     stackCode = accumCode `appOL` code
3733                         `snocOL` ST I32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
3734                         `snocOL` ST I32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
3735                     regCode hireg loreg =
3736                         accumCode `appOL` code
3737                             `snocOL` MR hireg vr_hi
3738                             `snocOL` MR loreg vr_lo
3739
3740                 case gprs of
3741                     hireg : loreg : regs | even (length gprs) ->
3742                         passArguments args regs fprs stackOffset
3743                                       (regCode hireg loreg) (hireg : loreg : accumUsed)
3744                     _skipped : hireg : loreg : regs ->
3745                         passArguments args regs fprs stackOffset
3746                                       (regCode hireg loreg) (hireg : loreg : accumUsed)
3747                     _ -> -- only one or no regs left
3748                         passArguments args [] fprs (stackOffset'+8)
3749                                       stackCode accumUsed
3750 #endif
3751
3752         passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
3753             | reg : _ <- regs = do
3754                 register <- getRegister arg
3755                 let code = case register of
3756                             Fixed _ freg fcode -> fcode `snocOL` MR reg freg
3757                             Any _ acode -> acode reg
3758                 passArguments args
3759                               (drop nGprs gprs)
3760                               (drop nFprs fprs)
3761 #if darwin_TARGET_OS
3762         -- The Darwin ABI requires that we reserve stack slots for register parameters
3763                               (stackOffset + stackBytes)
3764 #elif linux_TARGET_OS
3765         -- ... the SysV ABI doesn't.
3766                               stackOffset
3767 #endif
3768                               (accumCode `appOL` code)
3769                               (reg : accumUsed)
3770             | otherwise = do
3771                 (vr, code) <- getSomeReg arg
3772                 passArguments args
3773                               (drop nGprs gprs)
3774                               (drop nFprs fprs)
3775                               (stackOffset' + stackBytes)
3776                               (accumCode `appOL` code `snocOL` ST rep vr stackSlot)
3777                               accumUsed
3778             where
3779 #if darwin_TARGET_OS
3780         -- stackOffset is at least 4-byte aligned
3781         -- The Darwin ABI is happy with that.
3782                 stackOffset' = stackOffset
3783 #else
3784         -- ... the SysV ABI requires 8-byte alignment for doubles.
3785                 stackOffset' | rep == F64 = roundTo 8 stackOffset
3786                              | otherwise  =           stackOffset
3787 #endif
3788                 stackSlot = AddrRegImm sp (ImmInt stackOffset')
3789                 (nGprs, nFprs, stackBytes, regs) = case rep of
3790                     I32 -> (1, 0, 4, gprs)
3791 #if darwin_TARGET_OS
3792         -- The Darwin ABI requires that we skip a corresponding number of GPRs when
3793         -- we use the FPRs.
3794                     F32 -> (1, 1, 4, fprs)
3795                     F64 -> (2, 1, 8, fprs)
3796 #elif linux_TARGET_OS
3797         -- ... the SysV ABI doesn't.
3798                     F32 -> (0, 1, 4, fprs)
3799                     F64 -> (0, 1, 8, fprs)
3800 #endif
3801
3802         moveResult reduceToF32 =
3803             case dest_regs of
3804                 [] -> nilOL
3805                 [(dest, _hint)]
3806                     | reduceToF32 && rep == F32 -> unitOL (FRSP r_dest f1)
3807                     | rep == F32 || rep == F64 -> unitOL (MR r_dest f1)
3808                     | rep == I64 -> toOL [MR (getHiVRegFromLo r_dest) r3,
3809                                           MR r_dest r4]
3810                     | otherwise -> unitOL (MR r_dest r3)
3811                     where rep = cmmRegRep (CmmLocal dest)
3812                           r_dest = getRegisterReg (CmmLocal dest)
3813
3814         outOfLineFloatOp mop =
3815             do
3816                 dflags <- getDynFlagsNat
3817                 mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference $
3818                               mkForeignLabel functionName Nothing True
3819                 let mopLabelOrExpr = case mopExpr of
3820                         CmmLit (CmmLabel lbl) -> Left lbl
3821                         _ -> Right mopExpr
3822                 return (mopLabelOrExpr, reduce)
3823             where
3824                 (functionName, reduce) = case mop of
3825                     MO_F32_Exp   -> (FSLIT("exp"), True)
3826                     MO_F32_Log   -> (FSLIT("log"), True)
3827                     MO_F32_Sqrt  -> (FSLIT("sqrt"), True)
3828
3829                     MO_F32_Sin   -> (FSLIT("sin"), True)
3830                     MO_F32_Cos   -> (FSLIT("cos"), True)
3831                     MO_F32_Tan   -> (FSLIT("tan"), True)
3832
3833                     MO_F32_Asin  -> (FSLIT("asin"), True)
3834                     MO_F32_Acos  -> (FSLIT("acos"), True)
3835                     MO_F32_Atan  -> (FSLIT("atan"), True)
3836
3837                     MO_F32_Sinh  -> (FSLIT("sinh"), True)
3838                     MO_F32_Cosh  -> (FSLIT("cosh"), True)
3839                     MO_F32_Tanh  -> (FSLIT("tanh"), True)
3840                     MO_F32_Pwr   -> (FSLIT("pow"), True)
3841
3842                     MO_F64_Exp   -> (FSLIT("exp"), False)
3843                     MO_F64_Log   -> (FSLIT("log"), False)
3844                     MO_F64_Sqrt  -> (FSLIT("sqrt"), False)
3845
3846                     MO_F64_Sin   -> (FSLIT("sin"), False)
3847                     MO_F64_Cos   -> (FSLIT("cos"), False)
3848                     MO_F64_Tan   -> (FSLIT("tan"), False)
3849
3850                     MO_F64_Asin  -> (FSLIT("asin"), False)
3851                     MO_F64_Acos  -> (FSLIT("acos"), False)
3852                     MO_F64_Atan  -> (FSLIT("atan"), False)
3853
3854                     MO_F64_Sinh  -> (FSLIT("sinh"), False)
3855                     MO_F64_Cosh  -> (FSLIT("cosh"), False)
3856                     MO_F64_Tanh  -> (FSLIT("tanh"), False)
3857                     MO_F64_Pwr   -> (FSLIT("pow"), False)
3858                     other -> pprPanic "genCCall(ppc): unknown callish op"
3859                                     (pprCallishMachOp other)
3860
3861 #endif /* darwin_TARGET_OS || linux_TARGET_OS */
3862
3863 #endif /* powerpc_TARGET_ARCH */
3864
3865
3866 -- -----------------------------------------------------------------------------
3867 -- Generating a table-branch
3868
3869 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
3870
3871 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3872 genSwitch expr ids
3873   | opt_PIC
3874   = do
3875         (reg,e_code) <- getSomeReg expr
3876         lbl <- getNewLabelNat
3877         dflags <- getDynFlagsNat
3878         dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
3879         (tableReg,t_code) <- getSomeReg $ dynRef
3880         let
3881             jumpTable = map jumpTableEntryRel ids
3882
3883             jumpTableEntryRel Nothing
3884                 = CmmStaticLit (CmmInt 0 wordRep)
3885             jumpTableEntryRel (Just (BlockId id))
3886                 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3887                 where blockLabel = mkAsmTempLabel id
3888
3889             op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
3890                                        (EAIndex reg wORD_SIZE) (ImmInt 0))
3891
3892 #if x86_64_TARGET_ARCH && darwin_TARGET_OS
3893     -- on Mac OS X/x86_64, put the jump table in the text section
3894     -- to work around a limitation of the linker.
3895     -- ld64 is unable to handle the relocations for
3896     --     .quad L1 - L0
3897     -- if L0 is not preceded by a non-anonymous label in its section.
3898
3899             code = e_code `appOL` t_code `appOL` toOL [
3900                             ADD wordRep op (OpReg tableReg),
3901                             JMP_TBL (OpReg tableReg) [ id | Just id <- ids ],
3902                             LDATA Text (CmmDataLabel lbl : jumpTable)
3903                     ]
3904 #else
3905             code = e_code `appOL` t_code `appOL` toOL [
3906                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3907                             ADD wordRep op (OpReg tableReg),
3908                             JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
3909                     ]
3910 #endif
3911         return code
3912   | otherwise
3913   = do
3914         (reg,e_code) <- getSomeReg expr
3915         lbl <- getNewLabelNat
3916         let
3917             jumpTable = map jumpTableEntry ids
3918             op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
3919             code = e_code `appOL` toOL [
3920                     LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3921                     JMP_TBL op [ id | Just id <- ids ]
3922                  ]
3923         -- in
3924         return code
3925 #elif powerpc_TARGET_ARCH
3926 genSwitch expr ids
3927   | opt_PIC
3928   = do
3929         (reg,e_code) <- getSomeReg expr
3930         tmp <- getNewRegNat I32
3931         lbl <- getNewLabelNat
3932         dflags <- getDynFlagsNat
3933         dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
3934         (tableReg,t_code) <- getSomeReg $ dynRef
3935         let
3936             jumpTable = map jumpTableEntryRel ids
3937
3938             jumpTableEntryRel Nothing
3939                 = CmmStaticLit (CmmInt 0 wordRep)
3940             jumpTableEntryRel (Just (BlockId id))
3941                 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3942                 where blockLabel = mkAsmTempLabel id
3943
3944             code = e_code `appOL` t_code `appOL` toOL [
3945                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3946                             SLW tmp reg (RIImm (ImmInt 2)),
3947                             LD I32 tmp (AddrRegReg tableReg tmp),
3948                             ADD tmp tmp (RIReg tableReg),
3949                             MTCTR tmp,
3950                             BCTR [ id | Just id <- ids ]
3951                     ]
3952         return code
3953   | otherwise
3954   = do
3955         (reg,e_code) <- getSomeReg expr
3956         tmp <- getNewRegNat I32
3957         lbl <- getNewLabelNat
3958         let
3959             jumpTable = map jumpTableEntry ids
3960
3961             code = e_code `appOL` toOL [
3962                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3963                             SLW tmp reg (RIImm (ImmInt 2)),
3964                             ADDIS tmp tmp (HA (ImmCLbl lbl)),
3965                             LD I32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
3966                             MTCTR tmp,
3967                             BCTR [ id | Just id <- ids ]
3968                     ]
3969         return code
3970 #else
3971 genSwitch expr ids = panic "ToDo: genSwitch"
3972 #endif
3973
3974 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordRep)
3975 jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
3976     where blockLabel = mkAsmTempLabel id
3977
3978 -- -----------------------------------------------------------------------------
3979 -- Support bits
3980 -- -----------------------------------------------------------------------------
3981
3982
3983 -- -----------------------------------------------------------------------------
3984 -- 'condIntReg' and 'condFltReg': condition codes into registers
3985
3986 -- Turn those condition codes into integers now (when they appear on
3987 -- the right hand side of an assignment).
3988 --
3989 -- (If applicable) Do not fill the delay slots here; you will confuse the
3990 -- register allocator.
3991
3992 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
3993
3994 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3995
3996 #if alpha_TARGET_ARCH
3997 condIntReg = panic "MachCode.condIntReg (not on Alpha)"
3998 condFltReg = panic "MachCode.condFltReg (not on Alpha)"
3999 #endif /* alpha_TARGET_ARCH */
4000
4001 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4002
4003 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4004
4005 condIntReg cond x y = do
4006   CondCode _ cond cond_code <- condIntCode cond x y
4007   tmp <- getNewRegNat I8
4008   let
4009         code dst = cond_code `appOL` toOL [
4010                     SETCC cond (OpReg tmp),
4011                     MOVZxL I8 (OpReg tmp) (OpReg dst)
4012                   ]
4013   -- in
4014   return (Any I32 code)
4015
4016 #endif
4017
4018 #if i386_TARGET_ARCH
4019
4020 condFltReg cond x y = do
4021   CondCode _ cond cond_code <- condFltCode cond x y
4022   tmp <- getNewRegNat I8
4023   let
4024         code dst = cond_code `appOL` toOL [
4025                     SETCC cond (OpReg tmp),
4026                     MOVZxL I8 (OpReg tmp) (OpReg dst)
4027                   ]
4028   -- in
4029   return (Any I32 code)
4030
4031 #endif
4032
4033 #if x86_64_TARGET_ARCH
4034
4035 condFltReg cond x y = do
4036   CondCode _ cond cond_code <- condFltCode cond x y
4037   tmp1 <- getNewRegNat wordRep
4038   tmp2 <- getNewRegNat wordRep
4039   let
4040         -- We have to worry about unordered operands (eg. comparisons
4041         -- against NaN).  If the operands are unordered, the comparison
4042         -- sets the parity flag, carry flag and zero flag.
4043         -- All comparisons are supposed to return false for unordered
4044         -- operands except for !=, which returns true.
4045         --
4046         -- Optimisation: we don't have to test the parity flag if we
4047         -- know the test has already excluded the unordered case: eg >
4048         -- and >= test for a zero carry flag, which can only occur for
4049         -- ordered operands.
4050         --
4051         -- ToDo: by reversing comparisons we could avoid testing the
4052         -- parity flag in more cases.
4053
4054         code dst =
4055            cond_code `appOL`
4056              (case cond of
4057                 NE  -> or_unordered dst
4058                 GU  -> plain_test   dst
4059                 GEU -> plain_test   dst
4060                 _   -> and_ordered  dst)
4061
4062         plain_test dst = toOL [
4063                     SETCC cond (OpReg tmp1),
4064                     MOVZxL I8 (OpReg tmp1) (OpReg dst)
4065                  ]
4066         or_unordered dst = toOL [
4067                     SETCC cond (OpReg tmp1),
4068                     SETCC PARITY (OpReg tmp2),
4069                     OR I8 (OpReg tmp1) (OpReg tmp2),
4070                     MOVZxL I8 (OpReg tmp2) (OpReg dst)
4071                   ]
4072         and_ordered dst = toOL [
4073                     SETCC cond (OpReg tmp1),
4074                     SETCC NOTPARITY (OpReg tmp2),
4075                     AND I8 (OpReg tmp1) (OpReg tmp2),
4076                     MOVZxL I8 (OpReg tmp2) (OpReg dst)
4077                   ]
4078   -- in
4079   return (Any I32 code)
4080
4081 #endif
4082
4083 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4084
4085 #if sparc_TARGET_ARCH
4086
4087 condIntReg EQQ x (CmmLit (CmmInt 0 d)) = do
4088     (src, code) <- getSomeReg x
4089     tmp <- getNewRegNat I32
4090     let
4091         code__2 dst = code `appOL` toOL [
4092             SUB False True g0 (RIReg src) g0,
4093             SUB True False g0 (RIImm (ImmInt (-1))) dst]
4094     return (Any I32 code__2)
4095
4096 condIntReg EQQ x y = do
4097     (src1, code1) <- getSomeReg x
4098     (src2, code2) <- getSomeReg y
4099     tmp1 <- getNewRegNat I32
4100     tmp2 <- getNewRegNat I32
4101     let
4102         code__2 dst = code1 `appOL` code2 `appOL` toOL [
4103             XOR False src1 (RIReg src2) dst,
4104             SUB False True g0 (RIReg dst) g0,
4105             SUB True False g0 (RIImm (ImmInt (-1))) dst]
4106     return (Any I32 code__2)
4107
4108 condIntReg NE x (CmmLit (CmmInt 0 d)) = do
4109     (src, code) <- getSomeReg x
4110     tmp <- getNewRegNat I32
4111     let
4112         code__2 dst = code `appOL` toOL [
4113             SUB False True g0 (RIReg src) g0,
4114             ADD True False g0 (RIImm (ImmInt 0)) dst]
4115     return (Any I32 code__2)
4116
4117 condIntReg NE x y = do
4118     (src1, code1) <- getSomeReg x
4119     (src2, code2) <- getSomeReg y
4120     tmp1 <- getNewRegNat I32
4121     tmp2 <- getNewRegNat I32
4122     let
4123         code__2 dst = code1 `appOL` code2 `appOL` toOL [
4124             XOR False src1 (RIReg src2) dst,
4125             SUB False True g0 (RIReg dst) g0,
4126             ADD True False g0 (RIImm (ImmInt 0)) dst]
4127     return (Any I32 code__2)
4128
4129 condIntReg cond x y = do
4130     BlockId lbl1 <- getBlockIdNat
4131     BlockId lbl2 <- getBlockIdNat
4132     CondCode _ cond cond_code <- condIntCode cond x y
4133     let
4134         code__2 dst = cond_code `appOL` toOL [
4135             BI cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
4136             OR False g0 (RIImm (ImmInt 0)) dst,
4137             BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
4138             NEWBLOCK (BlockId lbl1),
4139             OR False g0 (RIImm (ImmInt 1)) dst,
4140             NEWBLOCK (BlockId lbl2)]
4141     return (Any I32 code__2)
4142
4143 condFltReg cond x y = do
4144     BlockId lbl1 <- getBlockIdNat
4145     BlockId lbl2 <- getBlockIdNat
4146     CondCode _ cond cond_code <- condFltCode cond x y
4147     let
4148         code__2 dst = cond_code `appOL` toOL [
4149             NOP,
4150             BF cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
4151             OR False g0 (RIImm (ImmInt 0)) dst,
4152             BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
4153             NEWBLOCK (BlockId lbl1),
4154             OR False g0 (RIImm (ImmInt 1)) dst,
4155             NEWBLOCK (BlockId lbl2)]
4156     return (Any I32 code__2)
4157
4158 #endif /* sparc_TARGET_ARCH */
4159
4160 #if powerpc_TARGET_ARCH
4161 condReg getCond = do
4162     lbl1 <- getBlockIdNat
4163     lbl2 <- getBlockIdNat
4164     CondCode _ cond cond_code <- getCond
4165     let
4166 {-        code dst = cond_code `appOL` toOL [
4167                 BCC cond lbl1,
4168                 LI dst (ImmInt 0),
4169                 BCC ALWAYS lbl2,
4170                 NEWBLOCK lbl1,
4171                 LI dst (ImmInt 1),
4172                 BCC ALWAYS lbl2,
4173                 NEWBLOCK lbl2
4174             ]-}
4175         code dst = cond_code
4176             `appOL` negate_code
4177             `appOL` toOL [
4178                 MFCR dst,
4179                 RLWINM dst dst (bit + 1) 31 31
4180             ]
4181
4182         negate_code | do_negate = unitOL (CRNOR bit bit bit)
4183                     | otherwise = nilOL
4184
4185         (bit, do_negate) = case cond of
4186             LTT -> (0, False)
4187             LE  -> (1, True)
4188             EQQ -> (2, False)
4189             GE  -> (0, True)
4190             GTT -> (1, False)
4191
4192             NE  -> (2, True)
4193
4194             LU  -> (0, False)
4195             LEU -> (1, True)
4196             GEU -> (0, True)
4197             GU  -> (1, False)
4198
4199     return (Any I32 code)
4200
4201 condIntReg cond x y = condReg (condIntCode cond x y)
4202 condFltReg cond x y = condReg (condFltCode cond x y)
4203 #endif /* powerpc_TARGET_ARCH */
4204
4205
4206 -- -----------------------------------------------------------------------------
4207 -- 'trivial*Code': deal with trivial instructions
4208
4209 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
4210 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
4211 -- Only look for constants on the right hand side, because that's
4212 -- where the generic optimizer will have put them.
4213
4214 -- Similarly, for unary instructions, we don't have to worry about
4215 -- matching an StInt as the argument, because genericOpt will already
4216 -- have handled the constant-folding.
4217
4218 trivialCode
4219     :: MachRep
4220     -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
4221       ,IF_ARCH_i386 ((Operand -> Operand -> Instr)
4222                      -> Maybe (Operand -> Operand -> Instr)
4223       ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr)
4224                      -> Maybe (Operand -> Operand -> Instr)
4225       ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
4226       ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
4227       ,)))))
4228     -> CmmExpr -> CmmExpr -- the two arguments
4229     -> NatM Register
4230
4231 #ifndef powerpc_TARGET_ARCH
4232 trivialFCode
4233     :: MachRep
4234     -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
4235       ,IF_ARCH_sparc((MachRep -> Reg -> Reg -> Reg -> Instr)
4236       ,IF_ARCH_i386 ((MachRep -> Reg -> Reg -> Reg -> Instr)
4237       ,IF_ARCH_x86_64 ((MachRep -> Operand -> Operand -> Instr)
4238       ,))))
4239     -> CmmExpr -> CmmExpr -- the two arguments
4240     -> NatM Register
4241 #endif
4242
4243 trivialUCode
4244     :: MachRep
4245     -> IF_ARCH_alpha((RI -> Reg -> Instr)
4246       ,IF_ARCH_i386 ((Operand -> Instr)
4247       ,IF_ARCH_x86_64 ((Operand -> Instr)
4248       ,IF_ARCH_sparc((RI -> Reg -> Instr)
4249       ,IF_ARCH_powerpc((Reg -> Reg -> Instr)
4250       ,)))))
4251     -> CmmExpr  -- the one argument
4252     -> NatM Register
4253
4254 #ifndef powerpc_TARGET_ARCH
4255 trivialUFCode
4256     :: MachRep
4257     -> IF_ARCH_alpha((Reg -> Reg -> Instr)
4258       ,IF_ARCH_i386 ((Reg -> Reg -> Instr)
4259       ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr)
4260       ,IF_ARCH_sparc((Reg -> Reg -> Instr)
4261       ,))))
4262     -> CmmExpr -- the one argument
4263     -> NatM Register
4264 #endif
4265
4266 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4267
4268 #if alpha_TARGET_ARCH
4269
4270 trivialCode instr x (StInt y)
4271   | fits8Bits y
4272   = getRegister x               `thenNat` \ register ->
4273     getNewRegNat IntRep         `thenNat` \ tmp ->
4274     let
4275         code = registerCode register tmp
4276         src1 = registerName register tmp
4277         src2 = ImmInt (fromInteger y)
4278         code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst)
4279     in
4280     return (Any IntRep code__2)
4281
4282 trivialCode instr x y
4283   = getRegister x               `thenNat` \ register1 ->
4284     getRegister y               `thenNat` \ register2 ->
4285     getNewRegNat IntRep         `thenNat` \ tmp1 ->
4286     getNewRegNat IntRep         `thenNat` \ tmp2 ->
4287     let
4288         code1 = registerCode register1 tmp1 []
4289         src1  = registerName register1 tmp1
4290         code2 = registerCode register2 tmp2 []
4291         src2  = registerName register2 tmp2
4292         code__2 dst = asmSeqThen [code1, code2] .
4293                      mkSeqInstr (instr src1 (RIReg src2) dst)
4294     in
4295     return (Any IntRep code__2)
4296
4297 ------------
4298 trivialUCode instr x
4299   = getRegister x               `thenNat` \ register ->
4300     getNewRegNat IntRep         `thenNat` \ tmp ->
4301     let
4302         code = registerCode register tmp
4303         src  = registerName register tmp
4304         code__2 dst = code . mkSeqInstr (instr (RIReg src) dst)
4305     in
4306     return (Any IntRep code__2)
4307
4308 ------------
4309 trivialFCode _ instr x y
4310   = getRegister x               `thenNat` \ register1 ->
4311     getRegister y               `thenNat` \ register2 ->
4312     getNewRegNat F64    `thenNat` \ tmp1 ->
4313     getNewRegNat F64    `thenNat` \ tmp2 ->
4314     let
4315         code1 = registerCode register1 tmp1
4316         src1  = registerName register1 tmp1
4317
4318         code2 = registerCode register2 tmp2
4319         src2  = registerName register2 tmp2
4320
4321         code__2 dst = asmSeqThen [code1 [], code2 []] .
4322                       mkSeqInstr (instr src1 src2 dst)
4323     in
4324     return (Any F64 code__2)
4325
4326 trivialUFCode _ instr x
4327   = getRegister x               `thenNat` \ register ->
4328     getNewRegNat F64    `thenNat` \ tmp ->
4329     let
4330         code = registerCode register tmp
4331         src  = registerName register tmp
4332         code__2 dst = code . mkSeqInstr (instr src dst)
4333     in
4334     return (Any F64 code__2)
4335
4336 #endif /* alpha_TARGET_ARCH */
4337
4338 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4339
4340 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4341
4342 {-
4343 The Rules of the Game are:
4344
4345 * You cannot assume anything about the destination register dst;
4346   it may be anything, including a fixed reg.
4347
4348 * You may compute an operand into a fixed reg, but you may not
4349   subsequently change the contents of that fixed reg.  If you
4350   want to do so, first copy the value either to a temporary
4351   or into dst.  You are free to modify dst even if it happens
4352   to be a fixed reg -- that's not your problem.
4353
4354 * You cannot assume that a fixed reg will stay live over an
4355   arbitrary computation.  The same applies to the dst reg.
4356
4357 * Temporary regs obtained from getNewRegNat are distinct from
4358   each other and from all other regs, and stay live over
4359   arbitrary computations.
4360
4361 --------------------
4362
4363 SDM's version of The Rules:
4364
4365 * If getRegister returns Any, that means it can generate correct
4366   code which places the result in any register, period.  Even if that
4367   register happens to be read during the computation.
4368
4369   Corollary #1: this means that if you are generating code for an
4370   operation with two arbitrary operands, you cannot assign the result
4371   of the first operand into the destination register before computing
4372   the second operand.  The second operand might require the old value
4373   of the destination register.
4374
4375   Corollary #2: A function might be able to generate more efficient
4376   code if it knows the destination register is a new temporary (and
4377   therefore not read by any of the sub-computations).
4378
4379 * If getRegister returns Any, then the code it generates may modify only:
4380         (a) fresh temporaries
4381         (b) the destination register
4382         (c) known registers (eg. %ecx is used by shifts)
4383   In particular, it may *not* modify global registers, unless the global
4384   register happens to be the destination register.
4385 -}
4386
4387 trivialCode rep instr (Just revinstr) (CmmLit lit_a) b
4388   | not (is64BitLit lit_a) = do
4389   b_code <- getAnyReg b
4390   let
4391        code dst
4392          = b_code dst `snocOL`
4393            revinstr (OpImm (litToImm lit_a)) (OpReg dst)
4394   -- in
4395   return (Any rep code)
4396
4397 trivialCode rep instr maybe_revinstr a b = genTrivialCode rep instr a b
4398
4399 -- This is re-used for floating pt instructions too.
4400 genTrivialCode rep instr a b = do
4401   (b_op, b_code) <- getNonClobberedOperand b
4402   a_code <- getAnyReg a
4403   tmp <- getNewRegNat rep
4404   let
4405      -- We want the value of b to stay alive across the computation of a.
4406      -- But, we want to calculate a straight into the destination register,
4407      -- because the instruction only has two operands (dst := dst `op` src).
4408      -- The troublesome case is when the result of b is in the same register
4409      -- as the destination reg.  In this case, we have to save b in a
4410      -- new temporary across the computation of a.
4411      code dst
4412         | dst `regClashesWithOp` b_op =
4413                 b_code `appOL`
4414                 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
4415                 a_code dst `snocOL`
4416                 instr (OpReg tmp) (OpReg dst)
4417         | otherwise =
4418                 b_code `appOL`
4419                 a_code dst `snocOL`
4420                 instr b_op (OpReg dst)
4421   -- in
4422   return (Any rep code)
4423
4424 reg `regClashesWithOp` OpReg reg2   = reg == reg2
4425 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
4426 reg `regClashesWithOp` _            = False
4427
4428 -----------
4429
4430 trivialUCode rep instr x = do
4431   x_code <- getAnyReg x
4432   let
4433      code dst =
4434         x_code dst `snocOL`
4435         instr (OpReg dst)
4436   -- in
4437   return (Any rep code)
4438
4439 -----------
4440
4441 #if i386_TARGET_ARCH
4442
4443 trivialFCode pk instr x y = do
4444   (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
4445   (y_reg, y_code) <- getSomeReg y
4446   let
4447      code dst =
4448         x_code `appOL`
4449         y_code `snocOL`
4450         instr pk x_reg y_reg dst
4451   -- in
4452   return (Any pk code)
4453
4454 #endif
4455
4456 #if x86_64_TARGET_ARCH
4457
4458 trivialFCode pk instr x y = genTrivialCode  pk (instr pk) x y
4459
4460 #endif
4461
4462 -------------
4463
4464 trivialUFCode rep instr x = do
4465   (x_reg, x_code) <- getSomeReg x
4466   let
4467      code dst =
4468         x_code `snocOL`
4469         instr x_reg dst
4470   -- in
4471   return (Any rep code)
4472
4473 #endif /* i386_TARGET_ARCH */
4474
4475 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4476
4477 #if sparc_TARGET_ARCH
4478
4479 trivialCode pk instr x (CmmLit (CmmInt y d))
4480   | fits13Bits y
4481   = do
4482       (src1, code) <- getSomeReg x
4483       tmp <- getNewRegNat I32
4484       let
4485         src2 = ImmInt (fromInteger y)
4486         code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
4487       return (Any I32 code__2)
4488
4489 trivialCode pk instr x y = do
4490     (src1, code1) <- getSomeReg x
4491     (src2, code2) <- getSomeReg y
4492     tmp1 <- getNewRegNat I32
4493     tmp2 <- getNewRegNat I32
4494     let
4495         code__2 dst = code1 `appOL` code2 `snocOL`
4496                       instr src1 (RIReg src2) dst
4497     return (Any I32 code__2)
4498
4499 ------------
4500 trivialFCode pk instr x y = do
4501     (src1, code1) <- getSomeReg x
4502     (src2, code2) <- getSomeReg y
4503     tmp1 <- getNewRegNat (cmmExprRep x)
4504     tmp2 <- getNewRegNat (cmmExprRep y)
4505     tmp <- getNewRegNat F64
4506     let
4507         promote x = FxTOy F32 F64 x tmp
4508
4509         pk1   = cmmExprRep x
4510         pk2   = cmmExprRep y
4511
4512         code__2 dst =
4513                 if pk1 == pk2 then
4514                     code1 `appOL` code2 `snocOL`
4515                     instr pk src1 src2 dst
4516                 else if pk1 == F32 then
4517                     code1 `snocOL` promote src1 `appOL` code2 `snocOL`
4518                     instr F64 tmp src2 dst
4519                 else
4520                     code1 `appOL` code2 `snocOL` promote src2 `snocOL`
4521                     instr F64 src1 tmp dst
4522     return (Any (if pk1 == pk2 then pk1 else F64) code__2)
4523
4524 ------------
4525 trivialUCode pk instr x = do
4526     (src, code) <- getSomeReg x
4527     tmp <- getNewRegNat pk
4528     let
4529         code__2 dst = code `snocOL` instr (RIReg src) dst
4530     return (Any pk code__2)
4531
4532 -------------
4533 trivialUFCode pk instr x = do
4534     (src, code) <- getSomeReg x
4535     tmp <- getNewRegNat pk
4536     let
4537         code__2 dst = code `snocOL` instr src dst
4538     return (Any pk code__2)
4539
4540 #endif /* sparc_TARGET_ARCH */
4541
4542 #if powerpc_TARGET_ARCH
4543
4544 {-
4545 Wolfgang's PowerPC version of The Rules:
4546
4547 A slightly modified version of The Rules to take advantage of the fact
4548 that PowerPC instructions work on all registers and don't implicitly
4549 clobber any fixed registers.
4550
4551 * The only expression for which getRegister returns Fixed is (CmmReg reg).
4552
4553 * If getRegister returns Any, then the code it generates may modify only:
4554         (a) fresh temporaries
4555         (b) the destination register
4556   It may *not* modify global registers, unless the global
4557   register happens to be the destination register.
4558   It may not clobber any other registers. In fact, only ccalls clobber any
4559   fixed registers.
4560   Also, it may not modify the counter register (used by genCCall).
4561
4562   Corollary: If a getRegister for a subexpression returns Fixed, you need
4563   not move it to a fresh temporary before evaluating the next subexpression.
4564   The Fixed register won't be modified.
4565   Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
4566
4567 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
4568   the value of the destination register.
4569 -}
4570
4571 trivialCode rep signed instr x (CmmLit (CmmInt y _))
4572     | Just imm <- makeImmediate rep signed y
4573     = do
4574         (src1, code1) <- getSomeReg x
4575         let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
4576         return (Any rep code)
4577
4578 trivialCode rep signed instr x y = do
4579     (src1, code1) <- getSomeReg x
4580     (src2, code2) <- getSomeReg y
4581     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
4582     return (Any rep code)
4583
4584 trivialCodeNoImm :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4585     -> CmmExpr -> CmmExpr -> NatM Register
4586 trivialCodeNoImm rep instr x y = do
4587     (src1, code1) <- getSomeReg x
4588     (src2, code2) <- getSomeReg y
4589     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
4590     return (Any rep code)
4591
4592 trivialUCode rep instr x = do
4593     (src, code) <- getSomeReg x
4594     let code' dst = code `snocOL` instr dst src
4595     return (Any rep code')
4596
4597 -- There is no "remainder" instruction on the PPC, so we have to do
4598 -- it the hard way.
4599 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
4600
4601 remainderCode :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4602     -> CmmExpr -> CmmExpr -> NatM Register
4603 remainderCode rep div x y = do
4604     (src1, code1) <- getSomeReg x
4605     (src2, code2) <- getSomeReg y
4606     let code dst = code1 `appOL` code2 `appOL` toOL [
4607                 div dst src1 src2,
4608                 MULLW dst dst (RIReg src2),
4609                 SUBF dst dst src1
4610             ]
4611     return (Any rep code)
4612
4613 #endif /* powerpc_TARGET_ARCH */
4614
4615
4616 -- -----------------------------------------------------------------------------
4617 --  Coercing to/from integer/floating-point...
4618
4619 -- When going to integer, we truncate (round towards 0).
4620
4621 -- @coerce(Int2FP|FP2Int)@ are more complicated integer/float
4622 -- conversions.  We have to store temporaries in memory to move
4623 -- between the integer and the floating point register sets.
4624
4625 -- @coerceDbl2Flt@ and @coerceFlt2Dbl@ are done this way because we
4626 -- pretend, on sparc at least, that double and float regs are seperate
4627 -- kinds, so the value has to be computed into one kind before being
4628 -- explicitly "converted" to live in the other kind.
4629
4630 coerceInt2FP :: MachRep -> MachRep -> CmmExpr -> NatM Register
4631 coerceFP2Int :: MachRep -> MachRep -> CmmExpr -> NatM Register
4632
4633 #if sparc_TARGET_ARCH
4634 coerceDbl2Flt :: CmmExpr -> NatM Register
4635 coerceFlt2Dbl :: CmmExpr -> NatM Register
4636 #endif
4637
4638 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4639
4640 #if alpha_TARGET_ARCH
4641
4642 coerceInt2FP _ x
4643   = getRegister x               `thenNat` \ register ->
4644     getNewRegNat IntRep         `thenNat` \ reg ->
4645     let
4646         code = registerCode register reg
4647         src  = registerName register reg
4648
4649         code__2 dst = code . mkSeqInstrs [
4650             ST Q src (spRel 0),
4651             LD TF dst (spRel 0),
4652             CVTxy Q TF dst dst]
4653     in
4654     return (Any F64 code__2)
4655
4656 -------------
4657 coerceFP2Int x
4658   = getRegister x               `thenNat` \ register ->
4659     getNewRegNat F64    `thenNat` \ tmp ->
4660     let
4661         code = registerCode register tmp
4662         src  = registerName register tmp
4663
4664         code__2 dst = code . mkSeqInstrs [
4665             CVTxy TF Q src tmp,
4666             ST TF tmp (spRel 0),
4667             LD Q dst (spRel 0)]
4668     in
4669     return (Any IntRep code__2)
4670
4671 #endif /* alpha_TARGET_ARCH */
4672
4673 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4674
4675 #if i386_TARGET_ARCH
4676
4677 coerceInt2FP from to x = do
4678   (x_reg, x_code) <- getSomeReg x
4679   let
4680         opc  = case to of F32 -> GITOF; F64 -> GITOD
4681         code dst = x_code `snocOL` opc x_reg dst
4682         -- ToDo: works for non-I32 reps?
4683   -- in
4684   return (Any to code)
4685
4686 ------------
4687
4688 coerceFP2Int from to x = do
4689   (x_reg, x_code) <- getSomeReg x
4690   let
4691         opc  = case from of F32 -> GFTOI; F64 -> GDTOI
4692         code dst = x_code `snocOL` opc x_reg dst
4693         -- ToDo: works for non-I32 reps?
4694   -- in
4695   return (Any to code)
4696
4697 #endif /* i386_TARGET_ARCH */
4698
4699 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4700
4701 #if x86_64_TARGET_ARCH
4702
4703 coerceFP2Int from to x = do
4704   (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
4705   let
4706         opc  = case from of F32 -> CVTTSS2SIQ; F64 -> CVTTSD2SIQ
4707         code dst = x_code `snocOL` opc x_op dst
4708   -- in
4709   return (Any to code) -- works even if the destination rep is <I32
4710
4711 coerceInt2FP from to x = do
4712   (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
4713   let
4714         opc  = case to of F32 -> CVTSI2SS; F64 -> CVTSI2SD
4715         code dst = x_code `snocOL` opc x_op dst
4716   -- in
4717   return (Any to code) -- works even if the destination rep is <I32
4718
4719 coerceFP2FP :: MachRep -> CmmExpr -> NatM Register
4720 coerceFP2FP to x = do
4721   (x_reg, x_code) <- getSomeReg x
4722   let
4723         opc  = case to of F32 -> CVTSD2SS; F64 -> CVTSS2SD
4724         code dst = x_code `snocOL` opc x_reg dst
4725   -- in
4726   return (Any to code)
4727
4728 #endif
4729
4730 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4731
4732 #if sparc_TARGET_ARCH
4733
4734 coerceInt2FP pk1 pk2 x = do
4735     (src, code) <- getSomeReg x
4736     let
4737         code__2 dst = code `appOL` toOL [
4738             ST pk1 src (spRel (-2)),
4739             LD pk1 (spRel (-2)) dst,
4740             FxTOy pk1 pk2 dst dst]
4741     return (Any pk2 code__2)
4742
4743 ------------
4744 coerceFP2Int pk fprep x = do
4745     (src, code) <- getSomeReg x
4746     reg <- getNewRegNat fprep
4747     tmp <- getNewRegNat pk
4748     let
4749         code__2 dst = ASSERT(fprep == F64 || fprep == F32)
4750             code `appOL` toOL [
4751             FxTOy fprep pk src tmp,
4752             ST pk tmp (spRel (-2)),
4753             LD pk (spRel (-2)) dst]
4754     return (Any pk code__2)
4755
4756 ------------
4757 coerceDbl2Flt x = do
4758     (src, code) <- getSomeReg x
4759     return (Any F32 (\dst -> code `snocOL` FxTOy F64 F32 src dst))
4760
4761 ------------
4762 coerceFlt2Dbl x = do
4763     (src, code) <- getSomeReg x
4764     return (Any F64 (\dst -> code `snocOL` FxTOy F32 F64 src dst))
4765
4766 #endif /* sparc_TARGET_ARCH */
4767
4768 #if powerpc_TARGET_ARCH
4769 coerceInt2FP fromRep toRep x = do
4770     (src, code) <- getSomeReg x
4771     lbl <- getNewLabelNat
4772     itmp <- getNewRegNat I32
4773     ftmp <- getNewRegNat F64
4774     dflags <- getDynFlagsNat
4775     dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
4776     Amode addr addr_code <- getAmode dynRef
4777     let
4778         code' dst = code `appOL` maybe_exts `appOL` toOL [
4779                 LDATA ReadOnlyData
4780                                 [CmmDataLabel lbl,
4781                                  CmmStaticLit (CmmInt 0x43300000 I32),
4782                                  CmmStaticLit (CmmInt 0x80000000 I32)],
4783                 XORIS itmp src (ImmInt 0x8000),
4784                 ST I32 itmp (spRel 3),
4785                 LIS itmp (ImmInt 0x4330),
4786                 ST I32 itmp (spRel 2),
4787                 LD F64 ftmp (spRel 2)
4788             ] `appOL` addr_code `appOL` toOL [
4789                 LD F64 dst addr,
4790                 FSUB F64 dst ftmp dst
4791             ] `appOL` maybe_frsp dst
4792
4793         maybe_exts = case fromRep of
4794                         I8 ->  unitOL $ EXTS I8 src src
4795                         I16 -> unitOL $ EXTS I16 src src
4796                         I32 -> nilOL
4797         maybe_frsp dst = case toRep of
4798                         F32 -> unitOL $ FRSP dst dst
4799                         F64 -> nilOL
4800     return (Any toRep code')
4801
4802 coerceFP2Int fromRep toRep x = do
4803     -- the reps don't really matter: F*->F64 and I32->I* are no-ops
4804     (src, code) <- getSomeReg x
4805     tmp <- getNewRegNat F64
4806     let
4807         code' dst = code `appOL` toOL [
4808                 -- convert to int in FP reg
4809             FCTIWZ tmp src,
4810                 -- store value (64bit) from FP to stack
4811             ST F64 tmp (spRel 2),
4812                 -- read low word of value (high word is undefined)
4813             LD I32 dst (spRel 3)]
4814     return (Any toRep code')
4815 #endif /* powerpc_TARGET_ARCH */
4816
4817
4818 -- -----------------------------------------------------------------------------
4819 -- eXTRA_STK_ARGS_HERE
4820
4821 -- We (allegedly) put the first six C-call arguments in registers;
4822 -- where do we start putting the rest of them?
4823
4824 -- Moved from MachInstrs (SDM):
4825
4826 #if alpha_TARGET_ARCH || sparc_TARGET_ARCH
4827 eXTRA_STK_ARGS_HERE :: Int
4828 eXTRA_STK_ARGS_HERE
4829   = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???))
4830 #endif
4831