compiler/nativeGen/MachCodeGen.hs

   1 {-# OPTIONS -w #-}
   2 -- The above warning supression flag is a temporary kludge.
   3 -- While working on this module you are encouraged to remove it and fix
   4 -- any warnings in the module. See
   5 --     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
   6 -- for details
   7
   8 -----------------------------------------------------------------------------
   9 --
  10 -- Generating machine code (instruction selection)
  11 --
  12 -- (c) The University of Glasgow 1996-2004
  13 --
  14 -----------------------------------------------------------------------------
  15
  16 -- This is a big module, but, if you pay attention to
  17 -- (a) the sectioning, (b) the type signatures, and
  18 -- (c) the #if blah_TARGET_ARCH} things, the
  19 -- structure should not be too overwhelming.
  20
  21 module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where
  22
  23 #include "HsVersions.h"
  24 #include "nativeGen/NCG.h"
  25 #include "MachDeps.h"
  26
  27 -- NCG stuff:
  28 import MachInstrs
  29 import MachRegs
  30 import NCGMonad
  31 import PositionIndependentCode
  32 import RegAllocInfo     ( mkBranchInstr, mkRegRegMoveInstr )
  33 import MachRegs
  34 import PprMach
  35
  36 -- Our intermediate code:
  37 import BlockId
  38 import PprCmm           ( pprExpr )
  39 import Cmm
  40 import CLabel
  41 import ClosureInfo      ( C_SRT(..) )
  42
  43 -- The rest:
  44 import StaticFlags      ( opt_PIC )
  45 import ForeignCall      ( CCallConv(..) )
  46 import OrdList
  47 import Pretty
  48 import qualified Outputable as O
  49 import Outputable
  50 import FastString
  51 import FastBool         ( isFastTrue )
  52 import Constants        ( wORD_SIZE )
  53
  54 import Debug.Trace      ( trace )
  55
  56 import Control.Monad    ( mapAndUnzipM )
  57 import Data.Maybe       ( fromJust )
  58 import Data.Bits
  59 import Data.Word
  60 import Data.Int
  61
  62
  63 -- -----------------------------------------------------------------------------
  64 -- Top-level of the instruction selector
  65
  66 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
  67 -- They are really trees of insns to facilitate fast appending, where a
  68 -- left-to-right traversal (pre-order?) yields the insns in the correct
  69 -- order.
  70
  71 type InstrBlock = OrdList Instr
  72
  73 cmmTopCodeGen :: RawCmmTop -> NatM [NatCmmTop]
  74 cmmTopCodeGen (CmmProc info lab params (ListGraph blocks)) = do
  75   (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
  76   picBaseMb <- getPicBaseMaybeNat
  77   let proc = CmmProc info lab params (ListGraph $ concat nat_blocks)
  78       tops = proc : concat statics
  79   case picBaseMb of
  80       Just picBase -> initializePicBase picBase tops
  81       Nothing -> return tops
  82
  83 cmmTopCodeGen (CmmData sec dat) = do
  84   return [CmmData sec dat]  -- no translation, we just use CmmStatic
  85
  86 basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop])
  87 basicBlockCodeGen (BasicBlock id stmts) = do
  88   instrs <- stmtsToInstrs stmts
  89   -- code generation may introduce new basic block boundaries, which
  90   -- are indicated by the NEWBLOCK instruction.  We must split up the
  91   -- instruction stream into basic blocks again.  Also, we extract
  92   -- LDATAs here too.
  93   let
  94         (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
  95
  96         mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
  97           = ([], BasicBlock id instrs : blocks, statics)
  98         mkBlocks (LDATA sec dat) (instrs,blocks,statics)
  99           = (instrs, blocks, CmmData sec dat:statics)
 100         mkBlocks instr (instrs,blocks,statics)
 101           = (instr:instrs, blocks, statics)
 102   -- in
 103   return (BasicBlock id top : other_blocks, statics)
 104
 105 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
 106 stmtsToInstrs stmts
 107    = do instrss <- mapM stmtToInstrs stmts
 108         return (concatOL instrss)
 109
 110 stmtToInstrs :: CmmStmt -> NatM InstrBlock
 111 stmtToInstrs stmt = case stmt of
 112     CmmNop         -> return nilOL
 113     CmmComment s   -> return (unitOL (COMMENT s))
 114
 115     CmmAssign reg src
 116       | isFloatType ty -> assignReg_FltCode size reg src
 117 #if WORD_SIZE_IN_BITS==32
 118       | isWord64 ty    -> assignReg_I64Code      reg src
 119 #endif
 120       | otherwise        -> assignReg_IntCode size reg src
 121         where ty = cmmRegType reg
 122               size = cmmTypeSize ty
 123
 124     CmmStore addr src
 125       | isFloatType ty -> assignMem_FltCode size addr src
 126 #if WORD_SIZE_IN_BITS==32
 127       | isWord64 ty      -> assignMem_I64Code      addr src
 128 #endif
 129       | otherwise        -> assignMem_IntCode size addr src
 130         where ty = cmmExprType src
 131               size = cmmTypeSize ty
 132
 133     CmmCall target result_regs args _ _
 134        -> genCCall target result_regs args
 135
 136     CmmBranch id          -> genBranch id
 137     CmmCondBranch arg id  -> genCondJump id arg
 138     CmmSwitch arg ids     -> genSwitch arg ids
 139     CmmJump arg params    -> genJump arg
 140     CmmReturn params      ->
 141       panic "stmtToInstrs: return statement should have been cps'd away"
 142
 143 -- -----------------------------------------------------------------------------
 144 -- General things for putting together code sequences
 145
 146 -- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
 147 -- CmmExprs into CmmRegOff?
 148 mangleIndexTree :: CmmExpr -> CmmExpr
 149 mangleIndexTree (CmmRegOff reg off)
 150   = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
 151   where width = typeWidth (cmmRegType reg)
 152
 153 -- -----------------------------------------------------------------------------
 154 --  Code gen for 64-bit arithmetic on 32-bit platforms
 155
 156 {-
 157 Simple support for generating 64-bit code (ie, 64 bit values and 64
 158 bit assignments) on 32-bit platforms.  Unlike the main code generator
 159 we merely shoot for generating working code as simply as possible, and
 160 pay little attention to code quality.  Specifically, there is no
 161 attempt to deal cleverly with the fixed-vs-floating register
 162 distinction; all values are generated into (pairs of) floating
 163 registers, even if this would mean some redundant reg-reg moves as a
 164 result.  Only one of the VRegUniques is returned, since it will be
 165 of the VRegUniqueLo form, and the upper-half VReg can be determined
 166 by applying getHiVRegFromLo to it.
 167 -}
 168
 169 data ChildCode64        -- a.k.a "Register64"
 170    = ChildCode64
 171         InstrBlock      -- code
 172         Reg             -- the lower 32-bit temporary which contains the
 173                         -- result; use getHiVRegFromLo to find the other
 174                         -- VRegUnique.  Rules of this simplified insn
 175                         -- selection game are therefore that the returned
 176                         -- Reg may be modified
 177
 178 #if WORD_SIZE_IN_BITS==32
 179 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
 180 assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
 181 #endif
 182
 183 #ifndef x86_64_TARGET_ARCH
 184 iselExpr64        :: CmmExpr -> NatM ChildCode64
 185 #endif
 186
 187 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 188
 189 #if i386_TARGET_ARCH
 190
 191 assignMem_I64Code addrTree valueTree = do
 192   Amode addr addr_code <- getAmode addrTree
 193   ChildCode64 vcode rlo <- iselExpr64 valueTree
 194   let
 195         rhi = getHiVRegFromLo rlo
 196
 197         -- Little-endian store
 198         mov_lo = MOV II32 (OpReg rlo) (OpAddr addr)
 199         mov_hi = MOV II32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
 200   -- in
 201   return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 202
 203
 204 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
 205    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 206    let
 207          r_dst_lo = mkVReg u_dst II32
 208          r_dst_hi = getHiVRegFromLo r_dst_lo
 209          r_src_hi = getHiVRegFromLo r_src_lo
 210          mov_lo = MOV II32 (OpReg r_src_lo) (OpReg r_dst_lo)
 211          mov_hi = MOV II32 (OpReg r_src_hi) (OpReg r_dst_hi)
 212    -- in
 213    return (
 214         vcode `snocOL` mov_lo `snocOL` mov_hi
 215      )
 216
 217 assignReg_I64Code lvalue valueTree
 218    = panic "assignReg_I64Code(i386): invalid lvalue"
 219
 220 ------------
 221
 222 iselExpr64 (CmmLit (CmmInt i _)) = do
 223   (rlo,rhi) <- getNewRegPairNat II32
 224   let
 225         r = fromIntegral (fromIntegral i :: Word32)
 226         q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
 227         code = toOL [
 228                 MOV II32 (OpImm (ImmInteger r)) (OpReg rlo),
 229                 MOV II32 (OpImm (ImmInteger q)) (OpReg rhi)
 230                 ]
 231   -- in
 232   return (ChildCode64 code rlo)
 233
 234 iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
 235    Amode addr addr_code <- getAmode addrTree
 236    (rlo,rhi) <- getNewRegPairNat II32
 237    let
 238         mov_lo = MOV II32 (OpAddr addr) (OpReg rlo)
 239         mov_hi = MOV II32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
 240    -- in
 241    return (
 242             ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
 243                         rlo
 244      )
 245
 246 iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
 247    = return (ChildCode64 nilOL (mkVReg vu II32))
 248
 249 -- we handle addition, but rather badly
 250 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
 251    ChildCode64 code1 r1lo <- iselExpr64 e1
 252    (rlo,rhi) <- getNewRegPairNat II32
 253    let
 254         r = fromIntegral (fromIntegral i :: Word32)
 255         q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
 256         r1hi = getHiVRegFromLo r1lo
 257         code =  code1 `appOL`
 258                 toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
 259                        ADD II32 (OpImm (ImmInteger r)) (OpReg rlo),
 260                        MOV II32 (OpReg r1hi) (OpReg rhi),
 261                        ADC II32 (OpImm (ImmInteger q)) (OpReg rhi) ]
 262    -- in
 263    return (ChildCode64 code rlo)
 264
 265 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
 266    ChildCode64 code1 r1lo <- iselExpr64 e1
 267    ChildCode64 code2 r2lo <- iselExpr64 e2
 268    (rlo,rhi) <- getNewRegPairNat II32
 269    let
 270         r1hi = getHiVRegFromLo r1lo
 271         r2hi = getHiVRegFromLo r2lo
 272         code =  code1 `appOL`
 273                 code2 `appOL`
 274                 toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
 275                        ADD II32 (OpReg r2lo) (OpReg rlo),
 276                        MOV II32 (OpReg r1hi) (OpReg rhi),
 277                        ADC II32 (OpReg r2hi) (OpReg rhi) ]
 278    -- in
 279    return (ChildCode64 code rlo)
 280
 281 iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do
 282      fn <- getAnyReg expr
 283      r_dst_lo <-  getNewRegNat II32
 284      let r_dst_hi = getHiVRegFromLo r_dst_lo
 285          code = fn r_dst_lo
 286      return (
 287              ChildCode64 (code `snocOL`
 288                           MOV II32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
 289                           r_dst_lo
 290             )
 291
 292 iselExpr64 expr
 293    = pprPanic "iselExpr64(i386)" (ppr expr)
 294
 295 #endif /* i386_TARGET_ARCH */
 296
 297 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 298
 299 #if sparc_TARGET_ARCH
 300
 301 assignMem_I64Code addrTree valueTree = do
 302      Amode addr addr_code <- getAmode addrTree
 303      ChildCode64 vcode rlo <- iselExpr64 valueTree
 304      (src, code) <- getSomeReg addrTree
 305      let
 306          rhi = getHiVRegFromLo rlo
 307          -- Big-endian store
 308          mov_hi = ST II32 rhi (AddrRegImm src (ImmInt 0))
 309          mov_lo = ST II32 rlo (AddrRegImm src (ImmInt 4))
 310      return (vcode `appOL` code `snocOL` mov_hi `snocOL` mov_lo)
 311
 312 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
 313      ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 314      let
 315          r_dst_lo = mkVReg u_dst (cmmTypeSize pk)
 316          r_dst_hi = getHiVRegFromLo r_dst_lo
 317          r_src_hi = getHiVRegFromLo r_src_lo
 318          mov_lo = mkMOV r_src_lo r_dst_lo
 319          mov_hi = mkMOV r_src_hi r_dst_hi
 320          mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
 321      return (vcode `snocOL` mov_hi `snocOL` mov_lo)
 322 assignReg_I64Code lvalue valueTree
 323    = panic "assignReg_I64Code(sparc): invalid lvalue"
 324
 325
 326 -- Load a 64 bit word
 327 iselExpr64 (CmmLoad addrTree ty)
 328  | isWord64 ty
 329  = do   Amode amode addr_code   <- getAmode addrTree
 330         let result
 331
 332                 | AddrRegReg r1 r2      <- amode
 333                 = do    rlo     <- getNewRegNat II32
 334                         tmp     <- getNewRegNat II32
 335                         let rhi = getHiVRegFromLo rlo
 336
 337                         return  $ ChildCode64
 338                                 (        addr_code
 339                                 `appOL`  toOL
 340                                          [ ADD False False r1 (RIReg r2) tmp
 341                                          , LD II32 (AddrRegImm tmp (ImmInt 0)) rhi
 342                                          , LD II32 (AddrRegImm tmp (ImmInt 4)) rlo ])
 343                                 rlo
 344
 345                 | AddrRegImm r1 (ImmInt i) <- amode
 346                 = do    rlo     <- getNewRegNat II32
 347                         let rhi = getHiVRegFromLo rlo
 348
 349                         return  $ ChildCode64
 350                                 (        addr_code
 351                                 `appOL`  toOL
 352                                          [ LD II32 (AddrRegImm r1 (ImmInt $ 0 + i)) rhi
 353                                          , LD II32 (AddrRegImm r1 (ImmInt $ 4 + i)) rlo ])
 354                                 rlo
 355
 356         result
 357
 358
 359 -- Add a literal to a 64 bit integer
 360 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)])
 361  = do   ChildCode64 code1 r1_lo <- iselExpr64 e1
 362         let r1_hi       = getHiVRegFromLo r1_lo
 363
 364         r_dst_lo        <- getNewRegNat II32
 365         let r_dst_hi    =  getHiVRegFromLo r_dst_lo
 366
 367         return  $ ChildCode64
 368                         ( toOL
 369                         [ ADD False False r1_lo (RIImm (ImmInteger i)) r_dst_lo
 370                         , ADD True  False r1_hi (RIReg g0)         r_dst_hi ])
 371                         r_dst_lo
 372
 373
 374 iselExpr64 (CmmReg (CmmLocal (LocalReg uq ty))) | isWord64 ty = do
 375      r_dst_lo <-  getNewRegNat II32
 376      let r_dst_hi = getHiVRegFromLo r_dst_lo
 377          r_src_lo = mkVReg uq II32
 378          r_src_hi = getHiVRegFromLo r_src_lo
 379          mov_lo = mkMOV r_src_lo r_dst_lo
 380          mov_hi = mkMOV r_src_hi r_dst_hi
 381          mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
 382      return (
 383             ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo
 384          )
 385
 386 iselExpr64 expr
 387    = pprPanic "iselExpr64(sparc)" (ppr expr)
 388
 389 #endif /* sparc_TARGET_ARCH */
 390
 391 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 392
 393 #if powerpc_TARGET_ARCH
 394
 395 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
 396 getI64Amodes addrTree = do
 397     Amode hi_addr addr_code <- getAmode addrTree
 398     case addrOffset hi_addr 4 of
 399         Just lo_addr -> return (hi_addr, lo_addr, addr_code)
 400         Nothing      -> do (hi_ptr, code) <- getSomeReg addrTree
 401                            return (AddrRegImm hi_ptr (ImmInt 0),
 402                                    AddrRegImm hi_ptr (ImmInt 4),
 403                                    code)
 404
 405 assignMem_I64Code addrTree valueTree = do
 406         (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
 407         ChildCode64 vcode rlo <- iselExpr64 valueTree
 408         let
 409                 rhi = getHiVRegFromLo rlo
 410
 411                 -- Big-endian store
 412                 mov_hi = ST II32 rhi hi_addr
 413                 mov_lo = ST II32 rlo lo_addr
 414         -- in
 415         return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 416
 417 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
 418    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 419    let
 420          r_dst_lo = mkVReg u_dst II32
 421          r_dst_hi = getHiVRegFromLo r_dst_lo
 422          r_src_hi = getHiVRegFromLo r_src_lo
 423          mov_lo = MR r_dst_lo r_src_lo
 424          mov_hi = MR r_dst_hi r_src_hi
 425    -- in
 426    return (
 427         vcode `snocOL` mov_lo `snocOL` mov_hi
 428      )
 429
 430 assignReg_I64Code lvalue valueTree
 431    = panic "assignReg_I64Code(powerpc): invalid lvalue"
 432
 433
 434 -- Don't delete this -- it's very handy for debugging.
 435 --iselExpr64 expr
 436 --   | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
 437 --   = panic "iselExpr64(???)"
 438
 439 iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
 440     (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
 441     (rlo, rhi) <- getNewRegPairNat II32
 442     let mov_hi = LD II32 rhi hi_addr
 443         mov_lo = LD II32 rlo lo_addr
 444     return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
 445                          rlo
 446
 447 iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
 448    = return (ChildCode64 nilOL (mkVReg vu II32))
 449
 450 iselExpr64 (CmmLit (CmmInt i _)) = do
 451   (rlo,rhi) <- getNewRegPairNat II32
 452   let
 453         half0 = fromIntegral (fromIntegral i :: Word16)
 454         half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
 455         half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
 456         half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
 457
 458         code = toOL [
 459                 LIS rlo (ImmInt half1),
 460                 OR rlo rlo (RIImm $ ImmInt half0),
 461                 LIS rhi (ImmInt half3),
 462                 OR rlo rlo (RIImm $ ImmInt half2)
 463                 ]
 464   -- in
 465   return (ChildCode64 code rlo)
 466
 467 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
 468    ChildCode64 code1 r1lo <- iselExpr64 e1
 469    ChildCode64 code2 r2lo <- iselExpr64 e2
 470    (rlo,rhi) <- getNewRegPairNat II32
 471    let
 472         r1hi = getHiVRegFromLo r1lo
 473         r2hi = getHiVRegFromLo r2lo
 474         code =  code1 `appOL`
 475                 code2 `appOL`
 476                 toOL [ ADDC rlo r1lo r2lo,
 477                        ADDE rhi r1hi r2hi ]
 478    -- in
 479    return (ChildCode64 code rlo)
 480
 481 iselExpr64 (CmmMachOp (MO_UU_Conv W32 W64) [expr]) = do
 482     (expr_reg,expr_code) <- getSomeReg expr
 483     (rlo, rhi) <- getNewRegPairNat II32
 484     let mov_hi = LI rhi (ImmInt 0)
 485         mov_lo = MR rlo expr_reg
 486     return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
 487                          rlo
 488 iselExpr64 expr
 489    = pprPanic "iselExpr64(powerpc)" (ppr expr)
 490
 491 #endif /* powerpc_TARGET_ARCH */
 492
 493
 494 -- -----------------------------------------------------------------------------
 495 -- The 'Register' type
 496
 497 -- 'Register's passed up the tree.  If the stix code forces the register
 498 -- to live in a pre-decided machine register, it comes out as @Fixed@;
 499 -- otherwise, it comes out as @Any@, and the parent can decide which
 500 -- register to put it in.
 501
 502 data Register
 503   = Fixed   Size Reg InstrBlock
 504   | Any     Size (Reg -> InstrBlock)
 505
 506 swizzleRegisterRep :: Register -> Size -> Register
 507 -- Change the width; it's a no-op
 508 swizzleRegisterRep (Fixed _ reg code) size = Fixed size reg code
 509 swizzleRegisterRep (Any _ codefn)     size = Any   size codefn
 510
 511
 512 -- -----------------------------------------------------------------------------
 513 -- Utils based on getRegister, below
 514
 515 -- The dual to getAnyReg: compute an expression into a register, but
 516 -- we don't mind which one it is.
 517 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
 518 getSomeReg expr = do
 519   r <- getRegister expr
 520   case r of
 521     Any rep code -> do
 522         tmp <- getNewRegNat rep
 523         return (tmp, code tmp)
 524     Fixed _ reg code ->
 525         return (reg, code)
 526
 527 -- -----------------------------------------------------------------------------
 528 -- Grab the Reg for a CmmReg
 529
 530 getRegisterReg :: CmmReg -> Reg
 531
 532 getRegisterReg (CmmLocal (LocalReg u pk))
 533   = mkVReg u (cmmTypeSize pk)
 534
 535 getRegisterReg (CmmGlobal mid)
 536   = case get_GlobalReg_reg_or_addr mid of
 537        Left (RealReg rrno) -> RealReg rrno
 538        _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
 539           -- By this stage, the only MagicIds remaining should be the
 540           -- ones which map to a real machine register on this
 541           -- platform.  Hence ...
 542
 543
 544 -- -----------------------------------------------------------------------------
 545 -- Generate code to get a subtree into a Register
 546
 547 -- Don't delete this -- it's very handy for debugging.
 548 --getRegister expr
 549 --   | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False
 550 --   = panic "getRegister(???)"
 551
 552 getRegister :: CmmExpr -> NatM Register
 553
 554 #if !x86_64_TARGET_ARCH
 555     -- on x86_64, we have %rip for PicBaseReg, but it's not a full-featured
 556     -- register, it can only be used for rip-relative addressing.
 557 getRegister (CmmReg (CmmGlobal PicBaseReg))
 558   = do
 559       reg <- getPicBaseNat wordSize
 560       return (Fixed wordSize reg nilOL)
 561 #endif
 562
 563 getRegister (CmmReg reg)
 564   = return (Fixed (cmmTypeSize (cmmRegType reg))
 565                   (getRegisterReg reg) nilOL)
 566
 567 getRegister tree@(CmmRegOff _ _)
 568   = getRegister (mangleIndexTree tree)
 569
 570
 571 #if WORD_SIZE_IN_BITS==32
 572     -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
 573     -- TO_W_(x), TO_W_(x >> 32)
 574
 575 getRegister (CmmMachOp (MO_UU_Conv W64 W32)
 576              [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
 577   ChildCode64 code rlo <- iselExpr64 x
 578   return $ Fixed II32 (getHiVRegFromLo rlo) code
 579
 580 getRegister (CmmMachOp (MO_SS_Conv W64 W32)
 581              [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
 582   ChildCode64 code rlo <- iselExpr64 x
 583   return $ Fixed II32 (getHiVRegFromLo rlo) code
 584
 585 getRegister (CmmMachOp (MO_UU_Conv W64 W32) [x]) = do
 586   ChildCode64 code rlo <- iselExpr64 x
 587   return $ Fixed II32 rlo code
 588
 589 getRegister (CmmMachOp (MO_SS_Conv W64 W32) [x]) = do
 590   ChildCode64 code rlo <- iselExpr64 x
 591   return $ Fixed II32 rlo code
 592
 593 #endif
 594
 595 -- end of machine-"independent" bit; here we go on the rest...
 596
 597 #if alpha_TARGET_ARCH
 598
 599 getRegister (StDouble d)
 600   = getBlockIdNat                   `thenNat` \ lbl ->
 601     getNewRegNat PtrRep             `thenNat` \ tmp ->
 602     let code dst = mkSeqInstrs [
 603             LDATA RoDataSegment lbl [
 604                     DATA TF [ImmLab (rational d)]
 605                 ],
 606             LDA tmp (AddrImm (ImmCLbl lbl)),
 607             LD TF dst (AddrReg tmp)]
 608     in
 609         return (Any FF64 code)
 610
 611 getRegister (StPrim primop [x]) -- unary PrimOps
 612   = case primop of
 613       IntNegOp -> trivialUCode (NEG Q False) x
 614
 615       NotOp    -> trivialUCode NOT x
 616
 617       FloatNegOp  -> trivialUFCode FloatRep (FNEG TF) x
 618       DoubleNegOp -> trivialUFCode FF64 (FNEG TF) x
 619
 620       OrdOp -> coerceIntCode IntRep x
 621       ChrOp -> chrCode x
 622
 623       Float2IntOp  -> coerceFP2Int    x
 624       Int2FloatOp  -> coerceInt2FP pr x
 625       Double2IntOp -> coerceFP2Int    x
 626       Int2DoubleOp -> coerceInt2FP pr x
 627
 628       Double2FloatOp -> coerceFltCode x
 629       Float2DoubleOp -> coerceFltCode x
 630
 631       other_op -> getRegister (StCall fn CCallConv FF64 [x])
 632         where
 633           fn = case other_op of
 634                  FloatExpOp    -> fsLit "exp"
 635                  FloatLogOp    -> fsLit "log"
 636                  FloatSqrtOp   -> fsLit "sqrt"
 637                  FloatSinOp    -> fsLit "sin"
 638                  FloatCosOp    -> fsLit "cos"
 639                  FloatTanOp    -> fsLit "tan"
 640                  FloatAsinOp   -> fsLit "asin"
 641                  FloatAcosOp   -> fsLit "acos"
 642                  FloatAtanOp   -> fsLit "atan"
 643                  FloatSinhOp   -> fsLit "sinh"
 644                  FloatCoshOp   -> fsLit "cosh"
 645                  FloatTanhOp   -> fsLit "tanh"
 646                  DoubleExpOp   -> fsLit "exp"
 647                  DoubleLogOp   -> fsLit "log"
 648                  DoubleSqrtOp  -> fsLit "sqrt"
 649                  DoubleSinOp   -> fsLit "sin"
 650                  DoubleCosOp   -> fsLit "cos"
 651                  DoubleTanOp   -> fsLit "tan"
 652                  DoubleAsinOp  -> fsLit "asin"
 653                  DoubleAcosOp  -> fsLit "acos"
 654                  DoubleAtanOp  -> fsLit "atan"
 655                  DoubleSinhOp  -> fsLit "sinh"
 656                  DoubleCoshOp  -> fsLit "cosh"
 657                  DoubleTanhOp  -> fsLit "tanh"
 658   where
 659     pr = panic "MachCode.getRegister: no primrep needed for Alpha"
 660
 661 getRegister (StPrim primop [x, y]) -- dyadic PrimOps
 662   = case primop of
 663       CharGtOp -> trivialCode (CMP LTT) y x
 664       CharGeOp -> trivialCode (CMP LE) y x
 665       CharEqOp -> trivialCode (CMP EQQ) x y
 666       CharNeOp -> int_NE_code x y
 667       CharLtOp -> trivialCode (CMP LTT) x y
 668       CharLeOp -> trivialCode (CMP LE) x y
 669
 670       IntGtOp  -> trivialCode (CMP LTT) y x
 671       IntGeOp  -> trivialCode (CMP LE) y x
 672       IntEqOp  -> trivialCode (CMP EQQ) x y
 673       IntNeOp  -> int_NE_code x y
 674       IntLtOp  -> trivialCode (CMP LTT) x y
 675       IntLeOp  -> trivialCode (CMP LE) x y
 676
 677       WordGtOp -> trivialCode (CMP ULT) y x
 678       WordGeOp -> trivialCode (CMP ULE) x y
 679       WordEqOp -> trivialCode (CMP EQQ)  x y
 680       WordNeOp -> int_NE_code x y
 681       WordLtOp -> trivialCode (CMP ULT) x y
 682       WordLeOp -> trivialCode (CMP ULE) x y
 683
 684       AddrGtOp -> trivialCode (CMP ULT) y x
 685       AddrGeOp -> trivialCode (CMP ULE) y x
 686       AddrEqOp -> trivialCode (CMP EQQ)  x y
 687       AddrNeOp -> int_NE_code x y
 688       AddrLtOp -> trivialCode (CMP ULT) x y
 689       AddrLeOp -> trivialCode (CMP ULE) x y
 690
 691       FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
 692       FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
 693       FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
 694       FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
 695       FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
 696       FloatLeOp -> cmpF_code (FCMP TF LE) NE x y
 697
 698       DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
 699       DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
 700       DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
 701       DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
 702       DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
 703       DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y
 704
 705       IntAddOp  -> trivialCode (ADD Q False) x y
 706       IntSubOp  -> trivialCode (SUB Q False) x y
 707       IntMulOp  -> trivialCode (MUL Q False) x y
 708       IntQuotOp -> trivialCode (DIV Q False) x y
 709       IntRemOp  -> trivialCode (REM Q False) x y
 710
 711       WordAddOp  -> trivialCode (ADD Q False) x y
 712       WordSubOp  -> trivialCode (SUB Q False) x y
 713       WordMulOp  -> trivialCode (MUL Q False) x y
 714       WordQuotOp -> trivialCode (DIV Q True) x y
 715       WordRemOp  -> trivialCode (REM Q True) x y
 716
 717       FloatAddOp -> trivialFCode  W32 (FADD TF) x y
 718       FloatSubOp -> trivialFCode  W32 (FSUB TF) x y
 719       FloatMulOp -> trivialFCode  W32 (FMUL TF) x y
 720       FloatDivOp -> trivialFCode  W32 (FDIV TF) x y
 721
 722       DoubleAddOp -> trivialFCode  W64 (FADD TF) x y
 723       DoubleSubOp -> trivialFCode  W64 (FSUB TF) x y
 724       DoubleMulOp -> trivialFCode  W64 (FMUL TF) x y
 725       DoubleDivOp -> trivialFCode  W64 (FDIV TF) x y
 726
 727       AddrAddOp  -> trivialCode (ADD Q False) x y
 728       AddrSubOp  -> trivialCode (SUB Q False) x y
 729       AddrRemOp  -> trivialCode (REM Q True) x y
 730
 731       AndOp  -> trivialCode AND x y
 732       OrOp   -> trivialCode OR  x y
 733       XorOp  -> trivialCode XOR x y
 734       SllOp  -> trivialCode SLL x y
 735       SrlOp  -> trivialCode SRL x y
 736
 737       ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
 738       ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
 739       ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"
 740
 741       FloatPowerOp  -> getRegister (StCall (fsLit "pow") CCallConv FF64 [x,y])
 742       DoublePowerOp -> getRegister (StCall (fsLit "pow") CCallConv FF64 [x,y])
 743   where
 744     {- ------------------------------------------------------------
 745         Some bizarre special code for getting condition codes into
 746         registers.  Integer non-equality is a test for equality
 747         followed by an XOR with 1.  (Integer comparisons always set
 748         the result register to 0 or 1.)  Floating point comparisons of
 749         any kind leave the result in a floating point register, so we
 750         need to wrangle an integer register out of things.
 751     -}
 752     int_NE_code :: StixTree -> StixTree -> NatM Register
 753
 754     int_NE_code x y
 755       = trivialCode (CMP EQQ) x y       `thenNat` \ register ->
 756         getNewRegNat IntRep             `thenNat` \ tmp ->
 757         let
 758             code = registerCode register tmp
 759             src  = registerName register tmp
 760             code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
 761         in
 762         return (Any IntRep code__2)
 763
 764     {- ------------------------------------------------------------
 765         Comments for int_NE_code also apply to cmpF_code
 766     -}
 767     cmpF_code
 768         :: (Reg -> Reg -> Reg -> Instr)
 769         -> Cond
 770         -> StixTree -> StixTree
 771         -> NatM Register
 772
 773     cmpF_code instr cond x y
 774       = trivialFCode pr instr x y       `thenNat` \ register ->
 775         getNewRegNat FF64               `thenNat` \ tmp ->
 776         getBlockIdNat                   `thenNat` \ lbl ->
 777         let
 778             code = registerCode register tmp
 779             result  = registerName register tmp
 780
 781             code__2 dst = code . mkSeqInstrs [
 782                 OR zeroh (RIImm (ImmInt 1)) dst,
 783                 BF cond  result (ImmCLbl lbl),
 784                 OR zeroh (RIReg zeroh) dst,
 785                 NEWBLOCK lbl]
 786         in
 787         return (Any IntRep code__2)
 788       where
 789         pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
 790       ------------------------------------------------------------
 791
 792 getRegister (CmmLoad pk mem)
 793   = getAmode mem                    `thenNat` \ amode ->
 794     let
 795         code = amodeCode amode
 796         src   = amodeAddr amode
 797         size = primRepToSize pk
 798         code__2 dst = code . mkSeqInstr (LD size dst src)
 799     in
 800     return (Any pk code__2)
 801
 802 getRegister (StInt i)
 803   | fits8Bits i
 804   = let
 805         code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
 806     in
 807     return (Any IntRep code)
 808   | otherwise
 809   = let
 810         code dst = mkSeqInstr (LDI Q dst src)
 811     in
 812     return (Any IntRep code)
 813   where
 814     src = ImmInt (fromInteger i)
 815
 816 getRegister leaf
 817   | isJust imm
 818   = let
 819         code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
 820     in
 821     return (Any PtrRep code)
 822   where
 823     imm = maybeImm leaf
 824     imm__2 = case imm of Just x -> x
 825
 826 #endif /* alpha_TARGET_ARCH */
 827
 828 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 829
 830 #if i386_TARGET_ARCH
 831
 832 getRegister (CmmLit (CmmFloat f W32)) = do
 833     lbl <- getNewLabelNat
 834     dflags <- getDynFlagsNat
 835     dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
 836     Amode addr addr_code <- getAmode dynRef
 837     let code dst =
 838             LDATA ReadOnlyData
 839                         [CmmDataLabel lbl,
 840                          CmmStaticLit (CmmFloat f W32)]
 841             `consOL` (addr_code `snocOL`
 842             GLD FF32 addr dst)
 843     -- in
 844     return (Any FF32 code)
 845
 846
 847 getRegister (CmmLit (CmmFloat d W64))
 848   | d == 0.0
 849   = let code dst = unitOL (GLDZ dst)
 850     in  return (Any FF64 code)
 851
 852   | d == 1.0
 853   = let code dst = unitOL (GLD1 dst)
 854     in  return (Any FF64 code)
 855
 856   | otherwise = do
 857     lbl <- getNewLabelNat
 858     dflags <- getDynFlagsNat
 859     dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
 860     Amode addr addr_code <- getAmode dynRef
 861     let code dst =
 862             LDATA ReadOnlyData
 863                         [CmmDataLabel lbl,
 864                          CmmStaticLit (CmmFloat d W64)]
 865             `consOL` (addr_code `snocOL`
 866             GLD FF64 addr dst)
 867     -- in
 868     return (Any FF64 code)
 869
 870 #endif /* i386_TARGET_ARCH */
 871
 872 #if x86_64_TARGET_ARCH
 873
 874 getRegister (CmmLit (CmmFloat 0.0 w)) = do
 875    let size = floatSize w
 876        code dst = unitOL  (XOR size (OpReg dst) (OpReg dst))
 877         -- I don't know why there are xorpd, xorps, and pxor instructions.
 878         -- They all appear to do the same thing --SDM
 879    return (Any size code)
 880
 881 getRegister (CmmLit (CmmFloat f w)) = do
 882     lbl <- getNewLabelNat
 883     let code dst = toOL [
 884             LDATA ReadOnlyData
 885                         [CmmDataLabel lbl,
 886                          CmmStaticLit (CmmFloat f w)],
 887             MOV size (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 888             ]
 889     -- in
 890     return (Any size code)
 891   where size = floatSize w
 892
 893 #endif /* x86_64_TARGET_ARCH */
 894
 895 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
 896
 897 -- catch simple cases of zero- or sign-extended load
 898 getRegister (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _]) = do
 899   code <- intLoadCode (MOVZxL II8) addr
 900   return (Any II32 code)
 901
 902 getRegister (CmmMachOp (MO_SS_Conv W8 W32) [CmmLoad addr _]) = do
 903   code <- intLoadCode (MOVSxL II8) addr
 904   return (Any II32 code)
 905
 906 getRegister (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad addr _]) = do
 907   code <- intLoadCode (MOVZxL II16) addr
 908   return (Any II32 code)
 909
 910 getRegister (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad addr _]) = do
 911   code <- intLoadCode (MOVSxL II16) addr
 912   return (Any II32 code)
 913
 914 #endif
 915
 916 #if x86_64_TARGET_ARCH
 917
 918 -- catch simple cases of zero- or sign-extended load
 919 getRegister (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad addr _]) = do
 920   code <- intLoadCode (MOVZxL II8) addr
 921   return (Any II64 code)
 922
 923 getRegister (CmmMachOp (MO_SS_Conv W8 W64) [CmmLoad addr _]) = do
 924   code <- intLoadCode (MOVSxL II8) addr
 925   return (Any II64 code)
 926
 927 getRegister (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad addr _]) = do
 928   code <- intLoadCode (MOVZxL II16) addr
 929   return (Any II64 code)
 930
 931 getRegister (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad addr _]) = do
 932   code <- intLoadCode (MOVSxL II16) addr
 933   return (Any II64 code)
 934
 935 getRegister (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad addr _]) = do
 936   code <- intLoadCode (MOV II32) addr -- 32-bit loads zero-extend
 937   return (Any II64 code)
 938
 939 getRegister (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad addr _]) = do
 940   code <- intLoadCode (MOVSxL II32) addr
 941   return (Any II64 code)
 942
 943 #endif
 944
 945 #if x86_64_TARGET_ARCH
 946 getRegister (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
 947                                      CmmLit displacement])
 948     = return $ Any II64 (\dst -> unitOL $
 949         LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
 950 #endif
 951
 952 #if x86_64_TARGET_ARCH
 953 getRegister (CmmMachOp (MO_F_Neg W32) [x]) = do
 954   x_code <- getAnyReg x
 955   lbl <- getNewLabelNat
 956   let
 957     code dst = x_code dst `appOL` toOL [
 958         -- This is how gcc does it, so it can't be that bad:
 959         LDATA ReadOnlyData16 [
 960                 CmmAlign 16,
 961                 CmmDataLabel lbl,
 962                 CmmStaticLit (CmmInt 0x80000000 W32),
 963                 CmmStaticLit (CmmInt 0 W32),
 964                 CmmStaticLit (CmmInt 0 W32),
 965                 CmmStaticLit (CmmInt 0 W32)
 966         ],
 967         XOR FF32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 968                 -- xorps, so we need the 128-bit constant
 969                 -- ToDo: rip-relative
 970         ]
 971   --
 972   return (Any FF32 code)
 973
 974 getRegister (CmmMachOp (MO_F_Neg W64) [x]) = do
 975   x_code <- getAnyReg x
 976   lbl <- getNewLabelNat
 977   let
 978         -- This is how gcc does it, so it can't be that bad:
 979     code dst = x_code dst `appOL` toOL [
 980         LDATA ReadOnlyData16 [
 981                 CmmAlign 16,
 982                 CmmDataLabel lbl,
 983                 CmmStaticLit (CmmInt 0x8000000000000000 W64),
 984                 CmmStaticLit (CmmInt 0 W64)
 985         ],
 986                 -- gcc puts an unpck here.  Wonder if we need it.
 987         XOR FF64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 988                 -- xorpd, so we need the 128-bit constant
 989         ]
 990   --
 991   return (Any FF64 code)
 992 #endif
 993
 994 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
 995
 996 getRegister (CmmMachOp mop [x]) -- unary MachOps
 997   = case mop of
 998 #if i386_TARGET_ARCH
 999       MO_F_Neg W32 -> trivialUFCode FF32 (GNEG FF32) x
1000       MO_F_Neg W64 -> trivialUFCode FF64 (GNEG FF64) x
1001 #endif
1002
1003       MO_S_Neg w -> triv_ucode NEGI (intSize w)
1004       MO_F_Neg w -> triv_ucode NEGI (floatSize w)
1005       MO_Not w   -> triv_ucode NOT  (intSize w)
1006
1007       -- Nop conversions
1008       MO_UU_Conv W32 W8  -> toI8Reg  W32 x
1009       MO_SS_Conv W32 W8  -> toI8Reg  W32 x
1010       MO_UU_Conv W16 W8  -> toI8Reg  W16 x
1011       MO_SS_Conv W16 W8  -> toI8Reg  W16 x
1012       MO_UU_Conv W32 W16 -> toI16Reg W32 x
1013       MO_SS_Conv W32 W16 -> toI16Reg W32 x
1014
1015 #if x86_64_TARGET_ARCH
1016       MO_UU_Conv W64 W32 -> conversionNop II64 x
1017       MO_SS_Conv W64 W32 -> conversionNop II64 x
1018       MO_UU_Conv W64 W16 -> toI16Reg W64 x
1019       MO_SS_Conv W64 W16 -> toI16Reg W64 x
1020       MO_UU_Conv W64 W8  -> toI8Reg  W64 x
1021       MO_SS_Conv W64 W8  -> toI8Reg  W64 x
1022 #endif
1023
1024       MO_UU_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
1025       MO_SS_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
1026
1027       -- widenings
1028       MO_UU_Conv W8  W32 -> integerExtend W8  W32 MOVZxL x
1029       MO_UU_Conv W16 W32 -> integerExtend W16 W32 MOVZxL x
1030       MO_UU_Conv W8  W16 -> integerExtend W8  W16 MOVZxL x
1031
1032       MO_SS_Conv W8  W32 -> integerExtend W8  W32 MOVSxL x
1033       MO_SS_Conv W16 W32 -> integerExtend W16 W32 MOVSxL x
1034       MO_SS_Conv W8  W16 -> integerExtend W8  W16 MOVSxL x
1035
1036 #if x86_64_TARGET_ARCH
1037       MO_UU_Conv W8  W64 -> integerExtend W8  W64 MOVZxL x
1038       MO_UU_Conv W16 W64 -> integerExtend W16 W64 MOVZxL x
1039       MO_UU_Conv W32 W64 -> integerExtend W32 W64 MOVZxL x
1040       MO_SS_Conv W8  W64 -> integerExtend W8  W64 MOVSxL x
1041       MO_SS_Conv W16 W64 -> integerExtend W16 W64 MOVSxL x
1042       MO_SS_Conv W32 W64 -> integerExtend W32 W64 MOVSxL x
1043         -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
1044         -- However, we don't want the register allocator to throw it
1045         -- away as an unnecessary reg-to-reg move, so we keep it in
1046         -- the form of a movzl and print it as a movl later.
1047 #endif
1048
1049 #if i386_TARGET_ARCH
1050       MO_FF_Conv W32 W64 -> conversionNop FF64 x
1051       MO_FF_Conv W64 W32 -> conversionNop FF32 x
1052 #else
1053       MO_FF_Conv W32 W64 -> coerceFP2FP W64 x
1054       MO_FF_Conv W64 W32 -> coerceFP2FP W32 x
1055 #endif
1056
1057       MO_FS_Conv from to -> coerceFP2Int from to x
1058       MO_SF_Conv from to -> coerceInt2FP from to x
1059
1060       other -> pprPanic "getRegister" (pprMachOp mop)
1061    where
1062         triv_ucode :: (Size -> Operand -> Instr) -> Size -> NatM Register
1063         triv_ucode instr size = trivialUCode size (instr size) x
1064
1065         -- signed or unsigned extension.
1066         integerExtend :: Width -> Width
1067                       -> (Size -> Operand -> Operand -> Instr)
1068                       -> CmmExpr -> NatM Register
1069         integerExtend from to instr expr = do
1070             (reg,e_code) <- if from == W8 then getByteReg expr
1071                                           else getSomeReg expr
1072             let
1073                 code dst =
1074                   e_code `snocOL`
1075                   instr (intSize from) (OpReg reg) (OpReg dst)
1076             return (Any (intSize to) code)
1077
1078         toI8Reg :: Width -> CmmExpr -> NatM Register
1079         toI8Reg new_rep expr
1080             = do codefn <- getAnyReg expr
1081                  return (Any (intSize new_rep) codefn)
1082                 -- HACK: use getAnyReg to get a byte-addressable register.
1083                 -- If the source was a Fixed register, this will add the
1084                 -- mov instruction to put it into the desired destination.
1085                 -- We're assuming that the destination won't be a fixed
1086                 -- non-byte-addressable register; it won't be, because all
1087                 -- fixed registers are word-sized.
1088
1089         toI16Reg = toI8Reg -- for now
1090
1091         conversionNop :: Size -> CmmExpr -> NatM Register
1092         conversionNop new_size expr
1093             = do e_code <- getRegister expr
1094                  return (swizzleRegisterRep e_code new_size)
1095
1096
1097 getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
1098   = case mop of
1099       MO_F_Eq w -> condFltReg EQQ x y
1100       MO_F_Ne w -> condFltReg NE x y
1101       MO_F_Gt w -> condFltReg GTT x y
1102       MO_F_Ge w -> condFltReg GE x y
1103       MO_F_Lt w -> condFltReg LTT x y
1104       MO_F_Le w -> condFltReg LE x y
1105
1106       MO_Eq rep   -> condIntReg EQQ x y
1107       MO_Ne rep   -> condIntReg NE x y
1108
1109       MO_S_Gt rep -> condIntReg GTT x y
1110       MO_S_Ge rep -> condIntReg GE x y
1111       MO_S_Lt rep -> condIntReg LTT x y
1112       MO_S_Le rep -> condIntReg LE x y
1113
1114       MO_U_Gt rep -> condIntReg GU  x y
1115       MO_U_Ge rep -> condIntReg GEU x y
1116       MO_U_Lt rep -> condIntReg LU  x y
1117       MO_U_Le rep -> condIntReg LEU x y
1118
1119 #if i386_TARGET_ARCH
1120       MO_F_Add w -> trivialFCode w GADD x y
1121       MO_F_Sub w -> trivialFCode w GSUB x y
1122       MO_F_Quot w -> trivialFCode w GDIV x y
1123       MO_F_Mul w -> trivialFCode w GMUL x y
1124 #endif
1125
1126 #if x86_64_TARGET_ARCH
1127       MO_F_Add w -> trivialFCode w ADD x y
1128       MO_F_Sub w -> trivialFCode w SUB x y
1129       MO_F_Quot w -> trivialFCode w FDIV x y
1130       MO_F_Mul w -> trivialFCode w MUL x y
1131 #endif
1132
1133       MO_Add rep -> add_code rep x y
1134       MO_Sub rep -> sub_code rep x y
1135
1136       MO_S_Quot rep -> div_code rep True  True  x y
1137       MO_S_Rem  rep -> div_code rep True  False x y
1138       MO_U_Quot rep -> div_code rep False True  x y
1139       MO_U_Rem  rep -> div_code rep False False x y
1140
1141       MO_S_MulMayOflo rep -> imulMayOflo rep x y
1142
1143       MO_Mul rep -> triv_op rep IMUL
1144       MO_And rep -> triv_op rep AND
1145       MO_Or  rep -> triv_op rep OR
1146       MO_Xor rep -> triv_op rep XOR
1147
1148         {- Shift ops on x86s have constraints on their source, it
1149            either has to be Imm, CL or 1
1150             => trivialCode is not restrictive enough (sigh.)
1151         -}
1152       MO_Shl rep   -> shift_code rep SHL x y {-False-}
1153       MO_U_Shr rep -> shift_code rep SHR x y {-False-}
1154       MO_S_Shr rep -> shift_code rep SAR x y {-False-}
1155
1156       other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
1157   where
1158     --------------------
1159     triv_op width instr = trivialCode width op (Just op) x y
1160                         where op   = instr (intSize width)
1161
1162     imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
1163     imulMayOflo rep a b = do
1164          (a_reg, a_code) <- getNonClobberedReg a
1165          b_code <- getAnyReg b
1166          let
1167              shift_amt  = case rep of
1168                            W32 -> 31
1169                            W64 -> 63
1170                            _ -> panic "shift_amt"
1171
1172              size = intSize rep
1173              code = a_code `appOL` b_code eax `appOL`
1174                         toOL [
1175                            IMUL2 size (OpReg a_reg),   -- result in %edx:%eax
1176                            SAR size (OpImm (ImmInt shift_amt)) (OpReg eax),
1177                                 -- sign extend lower part
1178                            SUB size (OpReg edx) (OpReg eax)
1179                                 -- compare against upper
1180                            -- eax==0 if high part == sign extended low part
1181                         ]
1182          -- in
1183          return (Fixed size eax code)
1184
1185     --------------------
1186     shift_code :: Width
1187                -> (Size -> Operand -> Operand -> Instr)
1188                -> CmmExpr
1189                -> CmmExpr
1190                -> NatM Register
1191
1192     {- Case1: shift length as immediate -}
1193     shift_code width instr x y@(CmmLit lit) = do
1194           x_code <- getAnyReg x
1195           let
1196                size = intSize width
1197                code dst
1198                   = x_code dst `snocOL`
1199                     instr size (OpImm (litToImm lit)) (OpReg dst)
1200           -- in
1201           return (Any size code)
1202
1203     {- Case2: shift length is complex (non-immediate)
1204       * y must go in %ecx.
1205       * we cannot do y first *and* put its result in %ecx, because
1206         %ecx might be clobbered by x.
1207       * if we do y second, then x cannot be
1208         in a clobbered reg.  Also, we cannot clobber x's reg
1209         with the instruction itself.
1210       * so we can either:
1211         - do y first, put its result in a fresh tmp, then copy it to %ecx later
1212         - do y second and put its result into %ecx.  x gets placed in a fresh
1213           tmp.  This is likely to be better, becuase the reg alloc can
1214           eliminate this reg->reg move here (it won't eliminate the other one,
1215           because the move is into the fixed %ecx).
1216     -}
1217     shift_code width instr x y{-amount-} = do
1218         x_code <- getAnyReg x
1219         let size = intSize width
1220         tmp <- getNewRegNat size
1221         y_code <- getAnyReg y
1222         let
1223            code = x_code tmp `appOL`
1224                   y_code ecx `snocOL`
1225                   instr size (OpReg ecx) (OpReg tmp)
1226         -- in
1227         return (Fixed size tmp code)
1228
1229     --------------------
1230     add_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
1231     add_code rep x (CmmLit (CmmInt y _))
1232         | is32BitInteger y = add_int rep x y
1233     add_code rep x y = trivialCode rep (ADD size) (Just (ADD size)) x y
1234       where size = intSize rep
1235
1236     --------------------
1237     sub_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
1238     sub_code rep x (CmmLit (CmmInt y _))
1239         | is32BitInteger (-y) = add_int rep x (-y)
1240     sub_code rep x y = trivialCode rep (SUB (intSize rep)) Nothing x y
1241
1242     -- our three-operand add instruction:
1243     add_int width x y = do
1244         (x_reg, x_code) <- getSomeReg x
1245         let
1246             size = intSize width
1247             imm = ImmInt (fromInteger y)
1248             code dst
1249                = x_code `snocOL`
1250                  LEA size
1251                         (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
1252                         (OpReg dst)
1253         --
1254         return (Any size code)
1255
1256     ----------------------
1257     div_code width signed quotient x y = do
1258            (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
1259            x_code <- getAnyReg x
1260            let
1261              size = intSize width
1262              widen | signed    = CLTD size
1263                    | otherwise = XOR size (OpReg edx) (OpReg edx)
1264
1265              instr | signed    = IDIV
1266                    | otherwise = DIV
1267
1268              code = y_code `appOL`
1269                     x_code eax `appOL`
1270                     toOL [widen, instr size y_op]
1271
1272              result | quotient  = eax
1273                     | otherwise = edx
1274
1275            -- in
1276            return (Fixed size result code)
1277
1278
1279 getRegister (CmmLoad mem pk)
1280   | isFloatType pk
1281   = do
1282     Amode src mem_code <- getAmode mem
1283     let
1284         size = cmmTypeSize pk
1285         code dst = mem_code `snocOL`
1286                    IF_ARCH_i386(GLD size src dst,
1287                                 MOV size (OpAddr src) (OpReg dst))
1288     return (Any size code)
1289
1290 #if i386_TARGET_ARCH
1291 getRegister (CmmLoad mem pk)
1292   | not (isWord64 pk)
1293   = do
1294     code <- intLoadCode instr mem
1295     return (Any size code)
1296   where
1297     width = typeWidth pk
1298     size = intSize width
1299     instr = case width of
1300                 W8     -> MOVZxL II8
1301                 _other -> MOV size
1302         -- We always zero-extend 8-bit loads, if we
1303         -- can't think of anything better.  This is because
1304         -- we can't guarantee access to an 8-bit variant of every register
1305         -- (esi and edi don't have 8-bit variants), so to make things
1306         -- simpler we do our 8-bit arithmetic with full 32-bit registers.
1307 #endif
1308
1309 #if x86_64_TARGET_ARCH
1310 -- Simpler memory load code on x86_64
1311 getRegister (CmmLoad mem pk)
1312   = do
1313     code <- intLoadCode (MOV size) mem
1314     return (Any size code)
1315   where size = intSize $ typeWidth pk
1316 #endif
1317
1318 getRegister (CmmLit (CmmInt 0 width))
1319   = let
1320         size = intSize width
1321
1322         -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
1323         adj_size = case size of II64 -> II32; _ -> size
1324         size1 = IF_ARCH_i386( size, adj_size )
1325         code dst
1326            = unitOL (XOR size1 (OpReg dst) (OpReg dst))
1327     in
1328         return (Any size code)
1329
1330 #if x86_64_TARGET_ARCH
1331   -- optimisation for loading small literals on x86_64: take advantage
1332   -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
1333   -- instruction forms are shorter.
1334 getRegister (CmmLit lit)
1335   | isWord64 (cmmLitType lit), not (isBigLit lit)
1336   = let
1337         imm = litToImm lit
1338         code dst = unitOL (MOV II32 (OpImm imm) (OpReg dst))
1339     in
1340         return (Any II64 code)
1341   where
1342    isBigLit (CmmInt i _) = i < 0 || i > 0xffffffff
1343    isBigLit _ = False
1344         -- note1: not the same as (not.is32BitLit), because that checks for
1345         -- signed literals that fit in 32 bits, but we want unsigned
1346         -- literals here.
1347         -- note2: all labels are small, because we're assuming the
1348         -- small memory model (see gcc docs, -mcmodel=small).
1349 #endif
1350
1351 getRegister (CmmLit lit)
1352   = let
1353         size = cmmTypeSize (cmmLitType lit)
1354         imm = litToImm lit
1355         code dst = unitOL (MOV size (OpImm imm) (OpReg dst))
1356     in
1357         return (Any size code)
1358
1359 getRegister other = pprPanic "getRegister(x86)" (ppr other)
1360
1361
1362 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
1363    -> NatM (Reg -> InstrBlock)
1364 intLoadCode instr mem = do
1365   Amode src mem_code <- getAmode mem
1366   return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
1367
1368 -- Compute an expression into *any* register, adding the appropriate
1369 -- move instruction if necessary.
1370 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
1371 getAnyReg expr = do
1372   r <- getRegister expr
1373   anyReg r
1374
1375 anyReg :: Register -> NatM (Reg -> InstrBlock)
1376 anyReg (Any _ code)          = return code
1377 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
1378
1379 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
1380 -- Fixed registers might not be byte-addressable, so we make sure we've
1381 -- got a temporary, inserting an extra reg copy if necessary.
1382 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
1383 #if x86_64_TARGET_ARCH
1384 getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
1385 #else
1386 getByteReg expr = do
1387   r <- getRegister expr
1388   case r of
1389     Any rep code -> do
1390         tmp <- getNewRegNat rep
1391         return (tmp, code tmp)
1392     Fixed rep reg code
1393         | isVirtualReg reg -> return (reg,code)
1394         | otherwise -> do
1395             tmp <- getNewRegNat rep
1396             return (tmp, code `snocOL` reg2reg rep reg tmp)
1397         -- ToDo: could optimise slightly by checking for byte-addressable
1398         -- real registers, but that will happen very rarely if at all.
1399 #endif
1400
1401 -- Another variant: this time we want the result in a register that cannot
1402 -- be modified by code to evaluate an arbitrary expression.
1403 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1404 getNonClobberedReg expr = do
1405   r <- getRegister expr
1406   case r of
1407     Any rep code -> do
1408         tmp <- getNewRegNat rep
1409         return (tmp, code tmp)
1410     Fixed rep reg code
1411         -- only free regs can be clobbered
1412         | RealReg rr <- reg, isFastTrue (freeReg rr) -> do
1413                 tmp <- getNewRegNat rep
1414                 return (tmp, code `snocOL` reg2reg rep reg tmp)
1415         | otherwise ->
1416                 return (reg, code)
1417
1418 reg2reg :: Size -> Reg -> Reg -> Instr
1419 reg2reg size src dst
1420 #if i386_TARGET_ARCH
1421   | isFloatSize size = GMOV src dst
1422 #endif
1423   | otherwise        = MOV size (OpReg src) (OpReg dst)
1424
1425 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1426
1427 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1428
1429 #if sparc_TARGET_ARCH
1430
1431 -- getRegister :: CmmExpr -> NatM Register
1432
1433 -- Load a literal float into a float register.
1434 --      The actual literal is stored in a new data area, and we load it
1435 --      at runtime.
1436 getRegister (CmmLit (CmmFloat f W32)) = do
1437
1438     -- a label for the new data area
1439     lbl <- getNewLabelNat
1440     tmp <- getNewRegNat II32
1441
1442     let code dst = toOL [
1443             -- the data area
1444             LDATA ReadOnlyData
1445                         [CmmDataLabel lbl,
1446                          CmmStaticLit (CmmFloat f W32)],
1447
1448             -- load the literal
1449             SETHI (HI (ImmCLbl lbl)) tmp,
1450             LD II32 (AddrRegImm tmp (LO (ImmCLbl lbl))) dst]
1451
1452     return (Any FF32 code)
1453
1454 getRegister (CmmLit (CmmFloat d W64)) = do
1455     lbl <- getNewLabelNat
1456     tmp <- getNewRegNat II32
1457     let code dst = toOL [
1458             LDATA ReadOnlyData
1459                         [CmmDataLabel lbl,
1460                          CmmStaticLit (CmmFloat d W64)],
1461             SETHI (HI (ImmCLbl lbl)) tmp,
1462             LD II64 (AddrRegImm tmp (LO (ImmCLbl lbl))) dst]
1463     return (Any FF64 code)
1464
1465 getRegister (CmmMachOp mop [x]) -- unary MachOps
1466   = case mop of
1467       MO_F_Neg W32     -> trivialUFCode FF32 (FNEG FF32) x
1468       MO_F_Neg W64     -> trivialUFCode FF64 (FNEG FF64) x
1469
1470       MO_S_Neg rep     -> trivialUCode (intSize rep) (SUB False False g0) x
1471       MO_Not rep       -> trivialUCode (intSize rep) (XNOR False g0) x
1472
1473       MO_FF_Conv W64 W32-> coerceDbl2Flt x
1474       MO_FF_Conv W32 W64-> coerceFlt2Dbl x
1475
1476       MO_FS_Conv from to -> coerceFP2Int from to x
1477       MO_SF_Conv from to -> coerceInt2FP from to x
1478
1479       -- Conversions which are a nop on sparc
1480       MO_UU_Conv from to
1481         | from == to    -> conversionNop (intSize to)  x
1482       MO_UU_Conv W32 W8 -> trivialCode W8 (AND False) x (CmmLit (CmmInt 255 W8))
1483       MO_UU_Conv W32 to -> conversionNop (intSize to) x
1484       MO_SS_Conv W32 to -> conversionNop (intSize to) x
1485
1486       MO_UU_Conv W8  to@W32  -> conversionNop (intSize to)  x
1487       MO_UU_Conv W16 to@W32  -> conversionNop (intSize to)  x
1488       MO_UU_Conv W8  to@W16  -> conversionNop (intSize to)  x
1489
1490       -- sign extension
1491       MO_SS_Conv W8  W32  -> integerExtend W8  W32 x
1492       MO_SS_Conv W16 W32  -> integerExtend W16 W32 x
1493       MO_SS_Conv W8  W16  -> integerExtend W8  W16 x
1494
1495       other_op -> panic ("Unknown unary mach op: " ++ show mop)
1496     where
1497
1498         -- | sign extend and widen
1499         integerExtend
1500                 :: Width                -- ^ width of source expression
1501                 -> Width                -- ^ width of result
1502                 -> CmmExpr              -- ^ source expression
1503                 -> NatM Register
1504
1505         integerExtend from to expr
1506          = do   -- load the expr into some register
1507                 (reg, e_code)   <- getSomeReg expr
1508                 tmp             <- getNewRegNat II32
1509                 let bitCount
1510                         = case (from, to) of
1511                                 (W8,  W32)      -> 24
1512                                 (W16, W32)      -> 16
1513                                 (W8,  W16)      -> 24
1514                 let code dst
1515                         = e_code
1516
1517                         -- local shift word left to load the sign bit
1518                         `snocOL`  SLL reg (RIImm (ImmInt bitCount)) tmp
1519
1520                         -- arithmetic shift right to sign extend
1521                         `snocOL`  SRA tmp (RIImm (ImmInt bitCount)) dst
1522
1523                 return (Any (intSize to) code)
1524
1525
1526         conversionNop new_rep expr
1527             = do e_code <- getRegister expr
1528                  return (swizzleRegisterRep e_code new_rep)
1529
1530 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1531   = case mop of
1532       MO_Eq rep -> condIntReg EQQ x y
1533       MO_Ne rep -> condIntReg NE x y
1534
1535       MO_S_Gt rep -> condIntReg GTT x y
1536       MO_S_Ge rep -> condIntReg GE x y
1537       MO_S_Lt rep -> condIntReg LTT x y
1538       MO_S_Le rep -> condIntReg LE x y
1539
1540       MO_U_Gt W32  -> condIntReg GTT x y
1541       MO_U_Ge W32  -> condIntReg GE x y
1542       MO_U_Lt W32  -> condIntReg LTT x y
1543       MO_U_Le W32  -> condIntReg LE x y
1544
1545       MO_U_Gt W16 -> condIntReg GU  x y
1546       MO_U_Ge W16 -> condIntReg GEU x y
1547       MO_U_Lt W16 -> condIntReg LU  x y
1548       MO_U_Le W16 -> condIntReg LEU x y
1549
1550       MO_Add W32 -> trivialCode W32 (ADD False False) x y
1551       MO_Sub W32 -> trivialCode W32 (SUB False False) x y
1552
1553       MO_S_MulMayOflo rep -> imulMayOflo rep x y
1554
1555       MO_S_Quot W32     -> idiv True  False x y
1556       MO_U_Quot W32     -> idiv False False x y
1557
1558       MO_S_Rem  W32     -> irem True  x y
1559       MO_U_Rem  W32     -> irem False x y
1560
1561       MO_F_Eq w -> condFltReg EQQ x y
1562       MO_F_Ne w -> condFltReg NE x y
1563
1564       MO_F_Gt w -> condFltReg GTT x y
1565       MO_F_Ge w -> condFltReg GE x y
1566       MO_F_Lt w -> condFltReg LTT x y
1567       MO_F_Le w -> condFltReg LE x y
1568
1569       MO_F_Add  w  -> trivialFCode w FADD x y
1570       MO_F_Sub  w  -> trivialFCode w FSUB x y
1571       MO_F_Mul  w  -> trivialFCode w FMUL x y
1572       MO_F_Quot w  -> trivialFCode w FDIV x y
1573
1574       MO_And rep   -> trivialCode rep (AND False) x y
1575       MO_Or rep    -> trivialCode rep (OR  False) x y
1576       MO_Xor rep   -> trivialCode rep (XOR False) x y
1577
1578       MO_Mul rep -> trivialCode rep (SMUL False) x y
1579
1580       MO_Shl rep   -> trivialCode rep SLL  x y
1581       MO_U_Shr rep   -> trivialCode rep SRL x y
1582       MO_S_Shr rep   -> trivialCode rep SRA x y
1583
1584 {-
1585       MO_F32_Pwr  -> getRegister (StCall (Left (fsLit "pow")) CCallConv FF64
1586                                          [promote x, promote y])
1587                        where promote x = CmmMachOp MO_F32_to_Dbl [x]
1588       MO_F64_Pwr -> getRegister (StCall (Left (fsLit "pow")) CCallConv FF64
1589                                         [x, y])
1590 -}
1591       other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
1592   where
1593     -- idiv fn x y = getRegister (StCall (Left fn) CCallConv II32 [x, y])
1594
1595
1596     -- | Generate an integer division instruction.
1597     idiv :: Bool -> Bool -> CmmExpr -> CmmExpr -> NatM Register
1598
1599     -- For unsigned division with a 32 bit numerator,
1600     --          we can just clear the Y register.
1601     idiv False cc x y = do
1602         (a_reg, a_code)         <- getSomeReg x
1603         (b_reg, b_code)         <- getSomeReg y
1604
1605         let code dst
1606                 =       a_code
1607                 `appOL` b_code
1608                 `appOL` toOL
1609                         [ WRY  g0 g0
1610                         , UDIV cc a_reg (RIReg b_reg) dst]
1611
1612         return (Any II32 code)
1613
1614
1615     -- For _signed_ division with a 32 bit numerator,
1616     --          we have to sign extend the numerator into the Y register.
1617     idiv True cc x y = do
1618         (a_reg, a_code)         <- getSomeReg x
1619         (b_reg, b_code)         <- getSomeReg y
1620
1621         tmp                     <- getNewRegNat II32
1622
1623         let code dst
1624                 =       a_code
1625                 `appOL` b_code
1626                 `appOL` toOL
1627                         [ SRA  a_reg (RIImm (ImmInt 16)) tmp            -- sign extend
1628                         , SRA  tmp   (RIImm (ImmInt 16)) tmp
1629
1630                         , WRY  tmp g0
1631                         , SDIV cc a_reg (RIReg b_reg) dst]
1632
1633         return (Any II32 code)
1634
1635
1636     -- | Do an integer remainder.
1637     --
1638     --   NOTE:  The SPARC v8 architecture manual says that integer division
1639     --          instructions _may_ generate a remainder, depending on the implementation.
1640     --          If so it is _recommended_ that the remainder is placed in the Y register.
1641     --
1642     --          The UltraSparc 2007 manual says Y is _undefined_ after division.
1643     --
1644     --          The SPARC T2 doesn't store the remainder, not sure about the others.
1645     --          It's probably best not to worry about it, and just generate our own
1646     --          remainders.
1647     --
1648     irem :: Bool -> CmmExpr -> CmmExpr -> NatM Register
1649
1650     -- For unsigned operands:
1651     --          Division is between a 64 bit numerator and a 32 bit denominator,
1652     --          so we still have to clear the Y register.
1653     irem False x y = do
1654         (a_reg, a_code) <- getSomeReg x
1655         (b_reg, b_code) <- getSomeReg y
1656
1657         tmp_reg         <- getNewRegNat II32
1658
1659         let code dst
1660                 =       a_code
1661                 `appOL` b_code
1662                 `appOL` toOL
1663                         [ WRY   g0 g0
1664                         , UDIV  False         a_reg (RIReg b_reg) tmp_reg
1665                         , UMUL  False       tmp_reg (RIReg b_reg) tmp_reg
1666                         , SUB   False False   a_reg (RIReg tmp_reg) dst]
1667
1668         return  (Any II32 code)
1669
1670
1671     -- For signed operands:
1672     --          Make sure to sign extend into the Y register, or the remainder
1673     --          will have the wrong sign when the numerator is negative.
1674     --
1675     --  TODO:   When sign extending, GCC only shifts the a_reg right by 17 bits,
1676     --          not the full 32. Not sure why this is, something to do with overflow?
1677     --          If anyone cares enough about the speed of signed remainder they
1678     --          can work it out themselves (then tell me). -- BL 2009/01/20
1679
1680     irem True x y = do
1681         (a_reg, a_code) <- getSomeReg x
1682         (b_reg, b_code) <- getSomeReg y
1683
1684         tmp1_reg        <- getNewRegNat II32
1685         tmp2_reg        <- getNewRegNat II32
1686
1687         let code dst
1688                 =       a_code
1689                 `appOL` b_code
1690                 `appOL` toOL
1691                         [ SRA   a_reg      (RIImm (ImmInt 16)) tmp1_reg -- sign extend
1692                         , SRA   tmp1_reg   (RIImm (ImmInt 16)) tmp1_reg -- sign extend
1693                         , WRY   tmp1_reg g0
1694
1695                         , SDIV  False          a_reg (RIReg b_reg)    tmp2_reg
1696                         , SMUL  False       tmp2_reg (RIReg b_reg)    tmp2_reg
1697                         , SUB   False False    a_reg (RIReg tmp2_reg) dst]
1698
1699         return (Any II32 code)
1700
1701
1702     imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
1703     imulMayOflo rep a b = do
1704          (a_reg, a_code) <- getSomeReg a
1705          (b_reg, b_code) <- getSomeReg b
1706          res_lo <- getNewRegNat II32
1707          res_hi <- getNewRegNat II32
1708          let
1709             shift_amt  = case rep of
1710                           W32 -> 31
1711                           W64 -> 63
1712                           _ -> panic "shift_amt"
1713             code dst = a_code `appOL` b_code `appOL`
1714                        toOL [
1715                            SMUL False a_reg (RIReg b_reg) res_lo,
1716                            RDY res_hi,
1717                            SRA res_lo (RIImm (ImmInt shift_amt)) res_lo,
1718                            SUB False False res_lo (RIReg res_hi) dst
1719                         ]
1720          return (Any II32 code)
1721
1722 getRegister (CmmLoad mem pk) = do
1723     Amode src code <- getAmode mem
1724     let
1725         code__2 dst     = code `snocOL` LD (cmmTypeSize pk) src dst
1726     return (Any (cmmTypeSize pk) code__2)
1727
1728 getRegister (CmmLit (CmmInt i _))
1729   | fits13Bits i
1730   = let
1731         src = ImmInt (fromInteger i)
1732         code dst = unitOL (OR False g0 (RIImm src) dst)
1733     in
1734         return (Any II32 code)
1735
1736 getRegister (CmmLit lit)
1737   = let rep = cmmLitType lit
1738         imm = litToImm lit
1739         code dst = toOL [
1740             SETHI (HI imm) dst,
1741             OR False dst (RIImm (LO imm)) dst]
1742     in return (Any II32 code)
1743
1744 #endif /* sparc_TARGET_ARCH */
1745
1746 #if powerpc_TARGET_ARCH
1747 getRegister (CmmLoad mem pk)
1748   | not (isWord64 pk)
1749   = do
1750         Amode addr addr_code <- getAmode mem
1751         let code dst = ASSERT((regClass dst == RcDouble) == isFloatType pk)
1752                        addr_code `snocOL` LD size dst addr
1753         return (Any size code)
1754           where size = cmmTypeSize pk
1755
1756 -- catch simple cases of zero- or sign-extended load
1757 getRegister (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad mem _]) = do
1758     Amode addr addr_code <- getAmode mem
1759     return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr))
1760
1761 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
1762
1763 getRegister (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad mem _]) = do
1764     Amode addr addr_code <- getAmode mem
1765     return (Any II32 (\dst -> addr_code `snocOL` LD II16 dst addr))
1766
1767 getRegister (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad mem _]) = do
1768     Amode addr addr_code <- getAmode mem
1769     return (Any II32 (\dst -> addr_code `snocOL` LA II16 dst addr))
1770
1771 getRegister (CmmMachOp mop [x]) -- unary MachOps
1772   = case mop of
1773       MO_Not rep   -> triv_ucode_int rep NOT
1774
1775       MO_F_Neg w   -> triv_ucode_float w FNEG
1776       MO_S_Neg w   -> triv_ucode_int   w NEG
1777
1778       MO_FF_Conv W64 W32 -> trivialUCode  FF32 FRSP x
1779       MO_FF_Conv W32 W64 -> conversionNop FF64 x
1780
1781       MO_FS_Conv from to -> coerceFP2Int from to x
1782       MO_SF_Conv from to -> coerceInt2FP from to x
1783
1784       MO_SS_Conv from to
1785         | from == to    -> conversionNop (intSize to) x
1786
1787         -- narrowing is a nop: we treat the high bits as undefined
1788       MO_SS_Conv W32 to -> conversionNop (intSize to) x
1789       MO_SS_Conv W16 W8 -> conversionNop II8 x
1790       MO_SS_Conv W8  to -> triv_ucode_int to (EXTS II8)
1791       MO_SS_Conv W16 to -> triv_ucode_int to (EXTS II16)
1792
1793       MO_UU_Conv from to
1794         | from == to -> conversionNop (intSize to) x
1795         -- narrowing is a nop: we treat the high bits as undefined
1796       MO_UU_Conv W32 to -> conversionNop (intSize to) x
1797       MO_UU_Conv W16 W8 -> conversionNop II8 x
1798       MO_UU_Conv W8 to  -> trivialCode to False AND x (CmmLit (CmmInt 255 W32))
1799       MO_UU_Conv W16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 W32))
1800
1801     where
1802         triv_ucode_int   width instr = trivialUCode (intSize   width) instr x
1803         triv_ucode_float width instr = trivialUCode (floatSize width) instr x
1804
1805         conversionNop new_size expr
1806             = do e_code <- getRegister expr
1807                  return (swizzleRegisterRep e_code new_size)
1808
1809 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1810   = case mop of
1811       MO_F_Eq w -> condFltReg EQQ x y
1812       MO_F_Ne w -> condFltReg NE  x y
1813       MO_F_Gt w -> condFltReg GTT x y
1814       MO_F_Ge w -> condFltReg GE  x y
1815       MO_F_Lt w -> condFltReg LTT x y
1816       MO_F_Le w -> condFltReg LE  x y
1817
1818       MO_Eq rep -> condIntReg EQQ  (extendUExpr rep x) (extendUExpr rep y)
1819       MO_Ne rep -> condIntReg NE   (extendUExpr rep x) (extendUExpr rep y)
1820
1821       MO_S_Gt rep -> condIntReg GTT  (extendSExpr rep x) (extendSExpr rep y)
1822       MO_S_Ge rep -> condIntReg GE   (extendSExpr rep x) (extendSExpr rep y)
1823       MO_S_Lt rep -> condIntReg LTT  (extendSExpr rep x) (extendSExpr rep y)
1824       MO_S_Le rep -> condIntReg LE   (extendSExpr rep x) (extendSExpr rep y)
1825
1826       MO_U_Gt rep -> condIntReg GU   (extendUExpr rep x) (extendUExpr rep y)
1827       MO_U_Ge rep -> condIntReg GEU  (extendUExpr rep x) (extendUExpr rep y)
1828       MO_U_Lt rep -> condIntReg LU   (extendUExpr rep x) (extendUExpr rep y)
1829       MO_U_Le rep -> condIntReg LEU  (extendUExpr rep x) (extendUExpr rep y)
1830
1831       MO_F_Add w  -> triv_float w FADD
1832       MO_F_Sub w  -> triv_float w FSUB
1833       MO_F_Mul w  -> triv_float w FMUL
1834       MO_F_Quot w -> triv_float w FDIV
1835
1836          -- optimize addition with 32-bit immediate
1837          -- (needed for PIC)
1838       MO_Add W32 ->
1839         case y of
1840           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate W32 True (-imm)
1841             -> trivialCode W32 True ADD x (CmmLit $ CmmInt imm immrep)
1842           CmmLit lit
1843             -> do
1844                 (src, srcCode) <- getSomeReg x
1845                 let imm = litToImm lit
1846                     code dst = srcCode `appOL` toOL [
1847                                     ADDIS dst src (HA imm),
1848                                     ADD dst dst (RIImm (LO imm))
1849                                 ]
1850                 return (Any II32 code)
1851           _ -> trivialCode W32 True ADD x y
1852
1853       MO_Add rep -> trivialCode rep True ADD x y
1854       MO_Sub rep ->
1855         case y of    -- subfi ('substract from' with immediate) doesn't exist
1856           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
1857             -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
1858           _ -> trivialCodeNoImm' (intSize rep) SUBF y x
1859
1860       MO_Mul rep -> trivialCode rep True MULLW x y
1861
1862       MO_S_MulMayOflo W32 -> trivialCodeNoImm' II32 MULLW_MayOflo x y
1863
1864       MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= II32): not implemented"
1865       MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
1866
1867       MO_S_Quot rep -> trivialCodeNoImm' (intSize rep) DIVW (extendSExpr rep x) (extendSExpr rep y)
1868       MO_U_Quot rep -> trivialCodeNoImm' (intSize rep) DIVWU (extendUExpr rep x) (extendUExpr rep y)
1869
1870       MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1871       MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1872
1873       MO_And rep   -> trivialCode rep False AND x y
1874       MO_Or rep    -> trivialCode rep False OR x y
1875       MO_Xor rep   -> trivialCode rep False XOR x y
1876
1877       MO_Shl rep   -> trivialCode rep False SLW x y
1878       MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
1879       MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
1880   where
1881     triv_float :: Width -> (Size -> Reg -> Reg -> Reg -> Instr) -> NatM Register
1882     triv_float width instr = trivialCodeNoImm (floatSize width) instr x y
1883
1884 getRegister (CmmLit (CmmInt i rep))
1885   | Just imm <- makeImmediate rep True i
1886   = let
1887         code dst = unitOL (LI dst imm)
1888     in
1889         return (Any (intSize rep) code)
1890
1891 getRegister (CmmLit (CmmFloat f frep)) = do
1892     lbl <- getNewLabelNat
1893     dflags <- getDynFlagsNat
1894     dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
1895     Amode addr addr_code <- getAmode dynRef
1896     let size = floatSize frep
1897         code dst =
1898             LDATA ReadOnlyData  [CmmDataLabel lbl,
1899                                  CmmStaticLit (CmmFloat f frep)]
1900             `consOL` (addr_code `snocOL` LD size dst addr)
1901     return (Any size code)
1902
1903 getRegister (CmmLit lit)
1904   = let rep = cmmLitType lit
1905         imm = litToImm lit
1906         code dst = toOL [
1907               LIS dst (HA imm),
1908               ADD dst dst (RIImm (LO imm))
1909           ]
1910     in return (Any (cmmTypeSize rep) code)
1911
1912 getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
1913
1914     -- extend?Rep: wrap integer expression of type rep
1915     -- in a conversion to II32
1916 extendSExpr W32 x = x
1917 extendSExpr rep x = CmmMachOp (MO_SS_Conv rep W32) [x]
1918 extendUExpr W32 x = x
1919 extendUExpr rep x = CmmMachOp (MO_UU_Conv rep W32) [x]
1920
1921 #endif /* powerpc_TARGET_ARCH */
1922
1923
1924 -- -----------------------------------------------------------------------------
1925 --  The 'Amode' type: Memory addressing modes passed up the tree.
1926
1927 data Amode = Amode AddrMode InstrBlock
1928
1929 {-
1930 Now, given a tree (the argument to an CmmLoad) that references memory,
1931 produce a suitable addressing mode.
1932
1933 A Rule of the Game (tm) for Amodes: use of the addr bit must
1934 immediately follow use of the code part, since the code part puts
1935 values in registers which the addr then refers to.  So you can't put
1936 anything in between, lest it overwrite some of those registers.  If
1937 you need to do some other computation between the code part and use of
1938 the addr bit, first store the effective address from the amode in a
1939 temporary, then do the other computation, and then use the temporary:
1940
1941     code
1942     LEA amode, tmp
1943     ... other computation ...
1944     ... (tmp) ...
1945 -}
1946
1947 getAmode :: CmmExpr -> NatM Amode
1948 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
1949
1950 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1951
1952 #if alpha_TARGET_ARCH
1953
1954 getAmode (StPrim IntSubOp [x, StInt i])
1955   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1956     getRegister x               `thenNat` \ register ->
1957     let
1958         code = registerCode register tmp
1959         reg  = registerName register tmp
1960         off  = ImmInt (-(fromInteger i))
1961     in
1962     return (Amode (AddrRegImm reg off) code)
1963
1964 getAmode (StPrim IntAddOp [x, StInt i])
1965   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1966     getRegister x               `thenNat` \ register ->
1967     let
1968         code = registerCode register tmp
1969         reg  = registerName register tmp
1970         off  = ImmInt (fromInteger i)
1971     in
1972     return (Amode (AddrRegImm reg off) code)
1973
1974 getAmode leaf
1975   | isJust imm
1976   = return (Amode (AddrImm imm__2) id)
1977   where
1978     imm = maybeImm leaf
1979     imm__2 = case imm of Just x -> x
1980
1981 getAmode other
1982   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1983     getRegister other           `thenNat` \ register ->
1984     let
1985         code = registerCode register tmp
1986         reg  = registerName register tmp
1987     in
1988     return (Amode (AddrReg reg) code)
1989
1990 #endif /* alpha_TARGET_ARCH */
1991
1992 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1993
1994 #if x86_64_TARGET_ARCH
1995
1996 getAmode (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
1997                                      CmmLit displacement])
1998     = return $ Amode (ripRel (litToImm displacement)) nilOL
1999
2000 #endif
2001
2002 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2003
2004 -- This is all just ridiculous, since it carefully undoes
2005 -- what mangleIndexTree has just done.
2006 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
2007   | is32BitLit lit
2008   -- ASSERT(rep == II32)???
2009   = do (x_reg, x_code) <- getSomeReg x
2010        let off = ImmInt (-(fromInteger i))
2011        return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
2012
2013 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
2014   | is32BitLit lit
2015   -- ASSERT(rep == II32)???
2016   = do (x_reg, x_code) <- getSomeReg x
2017        let off = ImmInt (fromInteger i)
2018        return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
2019
2020 -- Turn (lit1 << n  + lit2) into  (lit2 + lit1 << n) so it will be
2021 -- recognised by the next rule.
2022 getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
2023                                   b@(CmmLit _)])
2024   = getAmode (CmmMachOp (MO_Add rep) [b,a])
2025
2026 getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
2027                                         [y, CmmLit (CmmInt shift _)]])
2028   | shift == 0 || shift == 1 || shift == 2 || shift == 3
2029   = x86_complex_amode x y shift 0
2030
2031 getAmode (CmmMachOp (MO_Add rep)
2032                 [x, CmmMachOp (MO_Add _)
2033                         [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)],
2034                          CmmLit (CmmInt offset _)]])
2035   | shift == 0 || shift == 1 || shift == 2 || shift == 3
2036   && is32BitInteger offset
2037   = x86_complex_amode x y shift offset
2038
2039 getAmode (CmmMachOp (MO_Add rep) [x,y])
2040   = x86_complex_amode x y 0 0
2041
2042 getAmode (CmmLit lit) | is32BitLit lit
2043   = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
2044
2045 getAmode expr = do
2046   (reg,code) <- getSomeReg expr
2047   return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
2048
2049
2050 x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
2051 x86_complex_amode base index shift offset
2052   = do (x_reg, x_code) <- getNonClobberedReg base
2053         -- x must be in a temp, because it has to stay live over y_code
2054         -- we could compre x_reg and y_reg and do something better here...
2055        (y_reg, y_code) <- getSomeReg index
2056        let
2057            code = x_code `appOL` y_code
2058            base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
2059        return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset)))
2060                code)
2061
2062 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
2063
2064 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2065
2066 #if sparc_TARGET_ARCH
2067
2068 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)])
2069   | fits13Bits (-i)
2070   = do
2071        (reg, code) <- getSomeReg x
2072        let
2073          off  = ImmInt (-(fromInteger i))
2074        return (Amode (AddrRegImm reg off) code)
2075
2076
2077 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)])
2078   | fits13Bits i
2079   = do
2080        (reg, code) <- getSomeReg x
2081        let
2082          off  = ImmInt (fromInteger i)
2083        return (Amode (AddrRegImm reg off) code)
2084
2085 getAmode (CmmMachOp (MO_Add rep) [x, y])
2086   = do
2087     (regX, codeX) <- getSomeReg x
2088     (regY, codeY) <- getSomeReg y
2089     let
2090         code = codeX `appOL` codeY
2091     return (Amode (AddrRegReg regX regY) code)
2092
2093 getAmode (CmmLit lit)
2094   = do
2095         let imm__2      = litToImm lit
2096         tmp1    <- getNewRegNat II32
2097         tmp2    <- getNewRegNat II32
2098
2099         let code = toOL [ SETHI (HI imm__2) tmp1
2100                         , OR    False tmp1 (RIImm (LO imm__2)) tmp2]
2101
2102         return (Amode (AddrRegReg tmp2 g0) code)
2103
2104 getAmode other
2105   = do
2106        (reg, code) <- getSomeReg other
2107        let
2108             off  = ImmInt 0
2109        return (Amode (AddrRegImm reg off) code)
2110
2111 #endif /* sparc_TARGET_ARCH */
2112
2113 #ifdef powerpc_TARGET_ARCH
2114 getAmode (CmmMachOp (MO_Sub W32) [x, CmmLit (CmmInt i _)])
2115   | Just off <- makeImmediate W32 True (-i)
2116   = do
2117         (reg, code) <- getSomeReg x
2118         return (Amode (AddrRegImm reg off) code)
2119
2120
2121 getAmode (CmmMachOp (MO_Add W32) [x, CmmLit (CmmInt i _)])
2122   | Just off <- makeImmediate W32 True i
2123   = do
2124         (reg, code) <- getSomeReg x
2125         return (Amode (AddrRegImm reg off) code)
2126
2127    -- optimize addition with 32-bit immediate
2128    -- (needed for PIC)
2129 getAmode (CmmMachOp (MO_Add W32) [x, CmmLit lit])
2130   = do
2131         tmp <- getNewRegNat II32
2132         (src, srcCode) <- getSomeReg x
2133         let imm = litToImm lit
2134             code = srcCode `snocOL` ADDIS tmp src (HA imm)
2135         return (Amode (AddrRegImm tmp (LO imm)) code)
2136
2137 getAmode (CmmLit lit)
2138   = do
2139         tmp <- getNewRegNat II32
2140         let imm = litToImm lit
2141             code = unitOL (LIS tmp (HA imm))
2142         return (Amode (AddrRegImm tmp (LO imm)) code)
2143
2144 getAmode (CmmMachOp (MO_Add W32) [x, y])
2145   = do
2146         (regX, codeX) <- getSomeReg x
2147         (regY, codeY) <- getSomeReg y
2148         return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
2149
2150 getAmode other
2151   = do
2152         (reg, code) <- getSomeReg other
2153         let
2154             off  = ImmInt 0
2155         return (Amode (AddrRegImm reg off) code)
2156 #endif /* powerpc_TARGET_ARCH */
2157
2158 -- -----------------------------------------------------------------------------
2159 -- getOperand: sometimes any operand will do.
2160
2161 -- getNonClobberedOperand: the value of the operand will remain valid across
2162 -- the computation of an arbitrary expression, unless the expression
2163 -- is computed directly into a register which the operand refers to
2164 -- (see trivialCode where this function is used for an example).
2165
2166 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2167
2168 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2169 #if x86_64_TARGET_ARCH
2170 getNonClobberedOperand (CmmLit lit)
2171   | isSuitableFloatingPointLit lit = do
2172     lbl <- getNewLabelNat
2173     let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
2174                                            CmmStaticLit lit])
2175     return (OpAddr (ripRel (ImmCLbl lbl)), code)
2176 #endif
2177 getNonClobberedOperand (CmmLit lit)
2178   | is32BitLit lit && not (isFloatType (cmmLitType lit)) =
2179     return (OpImm (litToImm lit), nilOL)
2180 getNonClobberedOperand (CmmLoad mem pk)
2181   | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
2182     Amode src mem_code <- getAmode mem
2183     (src',save_code) <-
2184         if (amodeCouldBeClobbered src)
2185                 then do
2186                    tmp <- getNewRegNat wordSize
2187                    return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
2188                            unitOL (LEA II32 (OpAddr src) (OpReg tmp)))
2189                 else
2190                    return (src, nilOL)
2191     return (OpAddr src', save_code `appOL` mem_code)
2192 getNonClobberedOperand e = do
2193     (reg, code) <- getNonClobberedReg e
2194     return (OpReg reg, code)
2195
2196 amodeCouldBeClobbered :: AddrMode -> Bool
2197 amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
2198
2199 regClobbered (RealReg rr) = isFastTrue (freeReg rr)
2200 regClobbered _ = False
2201
2202 -- getOperand: the operand is not required to remain valid across the
2203 -- computation of an arbitrary expression.
2204 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2205 #if x86_64_TARGET_ARCH
2206 getOperand (CmmLit lit)
2207   | isSuitableFloatingPointLit lit = do
2208     lbl <- getNewLabelNat
2209     let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
2210                                            CmmStaticLit lit])
2211     return (OpAddr (ripRel (ImmCLbl lbl)), code)
2212 #endif
2213 getOperand (CmmLit lit)
2214   | is32BitLit lit && not (isFloatType (cmmLitType lit)) = do
2215     return (OpImm (litToImm lit), nilOL)
2216 getOperand (CmmLoad mem pk)
2217   | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
2218     Amode src mem_code <- getAmode mem
2219     return (OpAddr src, mem_code)
2220 getOperand e = do
2221     (reg, code) <- getSomeReg e
2222     return (OpReg reg, code)
2223
2224 isOperand :: CmmExpr -> Bool
2225 isOperand (CmmLoad _ _) = True
2226 isOperand (CmmLit lit)  = is32BitLit lit
2227                           || isSuitableFloatingPointLit lit
2228 isOperand _             = False
2229
2230 -- if we want a floating-point literal as an operand, we can
2231 -- use it directly from memory.  However, if the literal is
2232 -- zero, we're better off generating it into a register using
2233 -- xor.
2234 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
2235 isSuitableFloatingPointLit _ = False
2236
2237 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
2238 getRegOrMem (CmmLoad mem pk)
2239   | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
2240     Amode src mem_code <- getAmode mem
2241     return (OpAddr src, mem_code)
2242 getRegOrMem e = do
2243     (reg, code) <- getNonClobberedReg e
2244     return (OpReg reg, code)
2245
2246 #if x86_64_TARGET_ARCH
2247 is32BitLit (CmmInt i W64) = is32BitInteger i
2248    -- assume that labels are in the range 0-2^31-1: this assumes the
2249    -- small memory model (see gcc docs, -mcmodel=small).
2250 #endif
2251 is32BitLit x = True
2252 #endif
2253
2254 is32BitInteger :: Integer -> Bool
2255 is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000
2256   where i64 = fromIntegral i :: Int64
2257   -- a CmmInt is intended to be truncated to the appropriate
2258   -- number of bits, so here we truncate it to Int64.  This is
2259   -- important because e.g. -1 as a CmmInt might be either
2260   -- -1 or 18446744073709551615.
2261
2262 -- -----------------------------------------------------------------------------
2263 --  The 'CondCode' type:  Condition codes passed up the tree.
2264
2265 data CondCode = CondCode Bool Cond InstrBlock
2266
2267 -- Set up a condition code for a conditional branch.
2268
2269 getCondCode :: CmmExpr -> NatM CondCode
2270
2271 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2272
2273 #if alpha_TARGET_ARCH
2274 getCondCode = panic "MachCode.getCondCode: not on Alphas"
2275 #endif /* alpha_TARGET_ARCH */
2276
2277 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2278
2279 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
2280 -- yes, they really do seem to want exactly the same!
2281
2282 getCondCode (CmmMachOp mop [x, y])
2283   =
2284     case mop of
2285       MO_F_Eq W32 -> condFltCode EQQ x y
2286       MO_F_Ne W32 -> condFltCode NE  x y
2287       MO_F_Gt W32 -> condFltCode GTT x y
2288       MO_F_Ge W32 -> condFltCode GE  x y
2289       MO_F_Lt W32 -> condFltCode LTT x y
2290       MO_F_Le W32 -> condFltCode LE  x y
2291
2292       MO_F_Eq W64 -> condFltCode EQQ x y
2293       MO_F_Ne W64 -> condFltCode NE  x y
2294       MO_F_Gt W64 -> condFltCode GTT x y
2295       MO_F_Ge W64 -> condFltCode GE  x y
2296       MO_F_Lt W64 -> condFltCode LTT x y
2297       MO_F_Le W64 -> condFltCode LE  x y
2298
2299       MO_Eq rep -> condIntCode EQQ  x y
2300       MO_Ne rep -> condIntCode NE   x y
2301
2302       MO_S_Gt rep -> condIntCode GTT  x y
2303       MO_S_Ge rep -> condIntCode GE   x y
2304       MO_S_Lt rep -> condIntCode LTT  x y
2305       MO_S_Le rep -> condIntCode LE   x y
2306
2307       MO_U_Gt rep -> condIntCode GU   x y
2308       MO_U_Ge rep -> condIntCode GEU  x y
2309       MO_U_Lt rep -> condIntCode LU   x y
2310       MO_U_Le rep -> condIntCode LEU  x y
2311
2312       other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y]))
2313
2314 getCondCode other =  pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
2315
2316 #elif powerpc_TARGET_ARCH
2317
2318 -- almost the same as everywhere else - but we need to
2319 -- extend small integers to 32 bit first
2320
2321 getCondCode (CmmMachOp mop [x, y])
2322   = case mop of
2323       MO_F_Eq W32 -> condFltCode EQQ x y
2324       MO_F_Ne W32 -> condFltCode NE  x y
2325       MO_F_Gt W32 -> condFltCode GTT x y
2326       MO_F_Ge W32 -> condFltCode GE  x y
2327       MO_F_Lt W32 -> condFltCode LTT x y
2328       MO_F_Le W32 -> condFltCode LE  x y
2329
2330       MO_F_Eq W64 -> condFltCode EQQ x y
2331       MO_F_Ne W64 -> condFltCode NE  x y
2332       MO_F_Gt W64 -> condFltCode GTT x y
2333       MO_F_Ge W64 -> condFltCode GE  x y
2334       MO_F_Lt W64 -> condFltCode LTT x y
2335       MO_F_Le W64 -> condFltCode LE  x y
2336
2337       MO_Eq rep -> condIntCode EQQ  (extendUExpr rep x) (extendUExpr rep y)
2338       MO_Ne rep -> condIntCode NE   (extendUExpr rep x) (extendUExpr rep y)
2339
2340       MO_S_Gt rep -> condIntCode GTT  (extendSExpr rep x) (extendSExpr rep y)
2341       MO_S_Ge rep -> condIntCode GE   (extendSExpr rep x) (extendSExpr rep y)
2342       MO_S_Lt rep -> condIntCode LTT  (extendSExpr rep x) (extendSExpr rep y)
2343       MO_S_Le rep -> condIntCode LE   (extendSExpr rep x) (extendSExpr rep y)
2344
2345       MO_U_Gt rep -> condIntCode GU   (extendUExpr rep x) (extendUExpr rep y)
2346       MO_U_Ge rep -> condIntCode GEU  (extendUExpr rep x) (extendUExpr rep y)
2347       MO_U_Lt rep -> condIntCode LU   (extendUExpr rep x) (extendUExpr rep y)
2348       MO_U_Le rep -> condIntCode LEU  (extendUExpr rep x) (extendUExpr rep y)
2349
2350       other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
2351
2352 getCondCode other =  panic "getCondCode(2)(powerpc)"
2353
2354
2355 #endif
2356
2357
2358 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
2359 -- passed back up the tree.
2360
2361 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
2362
2363 #if alpha_TARGET_ARCH
2364 condIntCode = panic "MachCode.condIntCode: not on Alphas"
2365 condFltCode = panic "MachCode.condFltCode: not on Alphas"
2366 #endif /* alpha_TARGET_ARCH */
2367
2368 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2369 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2370
2371 -- memory vs immediate
2372 condIntCode cond (CmmLoad x pk) (CmmLit lit) | is32BitLit lit = do
2373     Amode x_addr x_code <- getAmode x
2374     let
2375         imm  = litToImm lit
2376         code = x_code `snocOL`
2377                   CMP (cmmTypeSize pk) (OpImm imm) (OpAddr x_addr)
2378     --
2379     return (CondCode False cond code)
2380
2381 -- anything vs zero, using a mask
2382 -- TODO: Add some sanity checking!!!!
2383 condIntCode cond (CmmMachOp (MO_And rep) [x,o2]) (CmmLit (CmmInt 0 pk))
2384     | (CmmLit lit@(CmmInt mask pk2)) <- o2, is32BitLit lit
2385     = do
2386       (x_reg, x_code) <- getSomeReg x
2387       let
2388          code = x_code `snocOL`
2389                 TEST (intSize pk) (OpImm (ImmInteger mask)) (OpReg x_reg)
2390       --
2391       return (CondCode False cond code)
2392
2393 -- anything vs zero
2394 condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
2395     (x_reg, x_code) <- getSomeReg x
2396     let
2397         code = x_code `snocOL`
2398                   TEST (intSize pk) (OpReg x_reg) (OpReg x_reg)
2399     --
2400     return (CondCode False cond code)
2401
2402 -- anything vs operand
2403 condIntCode cond x y | isOperand y = do
2404     (x_reg, x_code) <- getNonClobberedReg x
2405     (y_op,  y_code) <- getOperand y
2406     let
2407         code = x_code `appOL` y_code `snocOL`
2408                   CMP (cmmTypeSize (cmmExprType x)) y_op (OpReg x_reg)
2409     -- in
2410     return (CondCode False cond code)
2411
2412 -- anything vs anything
2413 condIntCode cond x y = do
2414   (y_reg, y_code) <- getNonClobberedReg y
2415   (x_op, x_code) <- getRegOrMem x
2416   let
2417         code = y_code `appOL`
2418                x_code `snocOL`
2419                   CMP (cmmTypeSize (cmmExprType x)) (OpReg y_reg) x_op
2420   -- in
2421   return (CondCode False cond code)
2422 #endif
2423
2424 #if i386_TARGET_ARCH
2425 condFltCode cond x y
2426   = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
2427   (x_reg, x_code) <- getNonClobberedReg x
2428   (y_reg, y_code) <- getSomeReg y
2429   let
2430         code = x_code `appOL` y_code `snocOL`
2431                 GCMP cond x_reg y_reg
2432   -- The GCMP insn does the test and sets the zero flag if comparable
2433   -- and true.  Hence we always supply EQQ as the condition to test.
2434   return (CondCode True EQQ code)
2435 #endif /* i386_TARGET_ARCH */
2436
2437 #if x86_64_TARGET_ARCH
2438 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
2439 -- an operand, but the right must be a reg.  We can probably do better
2440 -- than this general case...
2441 condFltCode cond x y = do
2442   (x_reg, x_code) <- getNonClobberedReg x
2443   (y_op, y_code) <- getOperand y
2444   let
2445         code = x_code `appOL`
2446                y_code `snocOL`
2447                   CMP (floatSize $ cmmExprWidth x) y_op (OpReg x_reg)
2448         -- NB(1): we need to use the unsigned comparison operators on the
2449         -- result of this comparison.
2450   -- in
2451   return (CondCode True (condToUnsigned cond) code)
2452 #endif
2453
2454 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2455
2456 #if sparc_TARGET_ARCH
2457
2458 condIntCode cond x (CmmLit (CmmInt y rep))
2459   | fits13Bits y
2460   = do
2461        (src1, code) <- getSomeReg x
2462        let
2463            src2 = ImmInt (fromInteger y)
2464            code' = code `snocOL` SUB False True src1 (RIImm src2) g0
2465        return (CondCode False cond code')
2466
2467 condIntCode cond x y = do
2468     (src1, code1) <- getSomeReg x
2469     (src2, code2) <- getSomeReg y
2470     let
2471         code__2 = code1 `appOL` code2 `snocOL`
2472                   SUB False True src1 (RIReg src2) g0
2473     return (CondCode False cond code__2)
2474
2475 -----------
2476 condFltCode cond x y = do
2477     (src1, code1) <- getSomeReg x
2478     (src2, code2) <- getSomeReg y
2479     tmp <- getNewRegNat FF64
2480     let
2481         promote x = FxTOy FF32 FF64 x tmp
2482
2483         pk1   = cmmExprType x
2484         pk2   = cmmExprType y
2485
2486         code__2 =
2487                 if pk1 `cmmEqType` pk2 then
2488                     code1 `appOL` code2 `snocOL`
2489                     FCMP True (cmmTypeSize pk1) src1 src2
2490                 else if typeWidth pk1 == W32 then
2491                     code1 `snocOL` promote src1 `appOL` code2 `snocOL`
2492                     FCMP True FF64 tmp src2
2493                 else
2494                     code1 `appOL` code2 `snocOL` promote src2 `snocOL`
2495                     FCMP True FF64 src1 tmp
2496     return (CondCode True cond code__2)
2497
2498 #endif /* sparc_TARGET_ARCH */
2499
2500 #if powerpc_TARGET_ARCH
2501 --  ###FIXME: I16 and I8!
2502 condIntCode cond x (CmmLit (CmmInt y rep))
2503   | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
2504   = do
2505         (src1, code) <- getSomeReg x
2506         let
2507             code' = code `snocOL`
2508                 (if condUnsigned cond then CMPL else CMP) II32 src1 (RIImm src2)
2509         return (CondCode False cond code')
2510
2511 condIntCode cond x y = do
2512     (src1, code1) <- getSomeReg x
2513     (src2, code2) <- getSomeReg y
2514     let
2515         code' = code1 `appOL` code2 `snocOL`
2516                   (if condUnsigned cond then CMPL else CMP) II32 src1 (RIReg src2)
2517     return (CondCode False cond code')
2518
2519 condFltCode cond x y = do
2520     (src1, code1) <- getSomeReg x
2521     (src2, code2) <- getSomeReg y
2522     let
2523         code'  = code1 `appOL` code2 `snocOL` FCMP src1 src2
2524         code'' = case cond of -- twiddle CR to handle unordered case
2525                     GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
2526                     LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
2527                     _ -> code'
2528                  where
2529                     ltbit = 0 ; eqbit = 2 ; gtbit = 1
2530     return (CondCode True cond code'')
2531
2532 #endif /* powerpc_TARGET_ARCH */
2533
2534 -- -----------------------------------------------------------------------------
2535 -- Generating assignments
2536
2537 -- Assignments are really at the heart of the whole code generation
2538 -- business.  Almost all top-level nodes of any real importance are
2539 -- assignments, which correspond to loads, stores, or register
2540 -- transfers.  If we're really lucky, some of the register transfers
2541 -- will go away, because we can use the destination register to
2542 -- complete the code generation for the right hand side.  This only
2543 -- fails when the right hand side is forced into a fixed register
2544 -- (e.g. the result of a call).
2545
2546 assignMem_IntCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
2547 assignReg_IntCode :: Size -> CmmReg  -> CmmExpr -> NatM InstrBlock
2548
2549 assignMem_FltCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
2550 assignReg_FltCode :: Size -> CmmReg  -> CmmExpr -> NatM InstrBlock
2551
2552 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2553
2554 #if alpha_TARGET_ARCH
2555
2556 assignIntCode pk (CmmLoad dst _) src
2557   = getNewRegNat IntRep             `thenNat` \ tmp ->
2558     getAmode dst                    `thenNat` \ amode ->
2559     getRegister src                 `thenNat` \ register ->
2560     let
2561         code1   = amodeCode amode []
2562         dst__2  = amodeAddr amode
2563         code2   = registerCode register tmp []
2564         src__2  = registerName register tmp
2565         sz      = primRepToSize pk
2566         code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2567     in
2568     return code__2
2569
2570 assignIntCode pk dst src
2571   = getRegister dst                         `thenNat` \ register1 ->
2572     getRegister src                         `thenNat` \ register2 ->
2573     let
2574         dst__2  = registerName register1 zeroh
2575         code    = registerCode register2 dst__2
2576         src__2  = registerName register2 dst__2
2577         code__2 = if isFixed register2
2578                   then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2)
2579                   else code
2580     in
2581     return code__2
2582
2583 #endif /* alpha_TARGET_ARCH */
2584
2585 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2586
2587 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2588
2589 -- integer assignment to memory
2590
2591 -- specific case of adding/subtracting an integer to a particular address.
2592 -- ToDo: catch other cases where we can use an operation directly on a memory
2593 -- address.
2594 assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _,
2595                                                  CmmLit (CmmInt i _)])
2596    | addr == addr2, pk /= II64 || is32BitInteger i,
2597      Just instr <- check op
2598    = do Amode amode code_addr <- getAmode addr
2599         let code = code_addr `snocOL`
2600                    instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode)
2601         return code
2602    where
2603         check (MO_Add _) = Just ADD
2604         check (MO_Sub _) = Just SUB
2605         check _ = Nothing
2606         -- ToDo: more?
2607
2608 -- general case
2609 assignMem_IntCode pk addr src = do
2610     Amode addr code_addr <- getAmode addr
2611     (code_src, op_src)   <- get_op_RI src
2612     let
2613         code = code_src `appOL`
2614                code_addr `snocOL`
2615                   MOV pk op_src (OpAddr addr)
2616         -- NOTE: op_src is stable, so it will still be valid
2617         -- after code_addr.  This may involve the introduction
2618         -- of an extra MOV to a temporary register, but we hope
2619         -- the register allocator will get rid of it.
2620     --
2621     return code
2622   where
2623     get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand)   -- code, operator
2624     get_op_RI (CmmLit lit) | is32BitLit lit
2625       = return (nilOL, OpImm (litToImm lit))
2626     get_op_RI op
2627       = do (reg,code) <- getNonClobberedReg op
2628            return (code, OpReg reg)
2629
2630
2631 -- Assign; dst is a reg, rhs is mem
2632 assignReg_IntCode pk reg (CmmLoad src _) = do
2633   load_code <- intLoadCode (MOV pk) src
2634   return (load_code (getRegisterReg reg))
2635
2636 -- dst is a reg, but src could be anything
2637 assignReg_IntCode pk reg src = do
2638   code <- getAnyReg src
2639   return (code (getRegisterReg reg))
2640
2641 #endif /* i386_TARGET_ARCH */
2642
2643 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2644
2645 #if sparc_TARGET_ARCH
2646
2647 assignMem_IntCode pk addr src = do
2648     (srcReg, code) <- getSomeReg src
2649     Amode dstAddr addr_code <- getAmode addr
2650     return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2651
2652 assignReg_IntCode pk reg src = do
2653     r <- getRegister src
2654     return $ case r of
2655         Any _ code         -> code dst
2656         Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg freg) dst
2657     where
2658       dst = getRegisterReg reg
2659
2660
2661 #endif /* sparc_TARGET_ARCH */
2662
2663 #if powerpc_TARGET_ARCH
2664
2665 assignMem_IntCode pk addr src = do
2666     (srcReg, code) <- getSomeReg src
2667     Amode dstAddr addr_code <- getAmode addr
2668     return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2669
2670 -- dst is a reg, but src could be anything
2671 assignReg_IntCode pk reg src
2672     = do
2673         r <- getRegister src
2674         return $ case r of
2675             Any _ code         -> code dst
2676             Fixed _ freg fcode -> fcode `snocOL` MR dst freg
2677     where
2678         dst = getRegisterReg reg
2679
2680 #endif /* powerpc_TARGET_ARCH */
2681
2682
2683 -- -----------------------------------------------------------------------------
2684 -- Floating-point assignments
2685
2686 #if alpha_TARGET_ARCH
2687
2688 assignFltCode pk (CmmLoad dst _) src
2689   = getNewRegNat pk                 `thenNat` \ tmp ->
2690     getAmode dst                    `thenNat` \ amode ->
2691     getRegister src                         `thenNat` \ register ->
2692     let
2693         code1   = amodeCode amode []
2694         dst__2  = amodeAddr amode
2695         code2   = registerCode register tmp []
2696         src__2  = registerName register tmp
2697         sz      = primRepToSize pk
2698         code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2699     in
2700     return code__2
2701
2702 assignFltCode pk dst src
2703   = getRegister dst                         `thenNat` \ register1 ->
2704     getRegister src                         `thenNat` \ register2 ->
2705     let
2706         dst__2  = registerName register1 zeroh
2707         code    = registerCode register2 dst__2
2708         src__2  = registerName register2 dst__2
2709         code__2 = if isFixed register2
2710                   then code . mkSeqInstr (FMOV src__2 dst__2)
2711                   else code
2712     in
2713     return code__2
2714
2715 #endif /* alpha_TARGET_ARCH */
2716
2717 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2718
2719 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2720
2721 -- Floating point assignment to memory
2722 assignMem_FltCode pk addr src = do
2723   (src_reg, src_code) <- getNonClobberedReg src
2724   Amode addr addr_code <- getAmode addr
2725   let
2726         code = src_code `appOL`
2727                addr_code `snocOL`
2728                 IF_ARCH_i386(GST pk src_reg addr,
2729                              MOV pk (OpReg src_reg) (OpAddr addr))
2730   return code
2731
2732 -- Floating point assignment to a register/temporary
2733 assignReg_FltCode pk reg src = do
2734   src_code <- getAnyReg src
2735   return (src_code (getRegisterReg reg))
2736
2737 #endif /* i386_TARGET_ARCH */
2738
2739 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2740
2741 #if sparc_TARGET_ARCH
2742
2743 -- Floating point assignment to memory
2744 assignMem_FltCode pk addr src = do
2745     Amode dst__2 code1 <- getAmode addr
2746     (src__2, code2) <- getSomeReg src
2747     tmp1 <- getNewRegNat pk
2748     let
2749         pk__2   = cmmExprType src
2750         code__2 = code1 `appOL` code2 `appOL`
2751             if   sizeToWidth pk == typeWidth pk__2
2752             then unitOL (ST pk src__2 dst__2)
2753             else toOL   [ FxTOy (cmmTypeSize pk__2) pk src__2 tmp1
2754                         , ST    pk tmp1 dst__2]
2755     return code__2
2756
2757 -- Floating point assignment to a register/temporary
2758 assignReg_FltCode pk dstCmmReg srcCmmExpr = do
2759     srcRegister <- getRegister srcCmmExpr
2760     let dstReg  = getRegisterReg dstCmmReg
2761
2762     return $ case srcRegister of
2763         Any _ code                  -> code dstReg
2764         Fixed _ srcFixedReg srcCode -> srcCode `snocOL` FMOV pk srcFixedReg dstReg
2765
2766 #endif /* sparc_TARGET_ARCH */
2767
2768 #if powerpc_TARGET_ARCH
2769
2770 -- Easy, isn't it?
2771 assignMem_FltCode = assignMem_IntCode
2772 assignReg_FltCode = assignReg_IntCode
2773
2774 #endif /* powerpc_TARGET_ARCH */
2775
2776
2777 -- -----------------------------------------------------------------------------
2778 -- Generating an non-local jump
2779
2780 -- (If applicable) Do not fill the delay slots here; you will confuse the
2781 -- register allocator.
2782
2783 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
2784
2785 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2786
2787 #if alpha_TARGET_ARCH
2788
2789 genJump (CmmLabel lbl)
2790   | isAsmTemp lbl = returnInstr (BR target)
2791   | otherwise     = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0]
2792   where
2793     target = ImmCLbl lbl
2794
2795 genJump tree
2796   = getRegister tree                `thenNat` \ register ->
2797     getNewRegNat PtrRep             `thenNat` \ tmp ->
2798     let
2799         dst    = registerName register pv
2800         code   = registerCode register pv
2801         target = registerName register pv
2802     in
2803     if isFixed register then
2804         returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0]
2805     else
2806     return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0))
2807
2808 #endif /* alpha_TARGET_ARCH */
2809
2810 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2811
2812 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2813
2814 genJump (CmmLoad mem pk) = do
2815   Amode target code <- getAmode mem
2816   return (code `snocOL` JMP (OpAddr target))
2817
2818 genJump (CmmLit lit) = do
2819   return (unitOL (JMP (OpImm (litToImm lit))))
2820
2821 genJump expr = do
2822   (reg,code) <- getSomeReg expr
2823   return (code `snocOL` JMP (OpReg reg))
2824
2825 #endif /* i386_TARGET_ARCH */
2826
2827 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2828
2829 #if sparc_TARGET_ARCH
2830
2831 genJump (CmmLit (CmmLabel lbl))
2832   = return (toOL [CALL (Left target) 0 True, NOP])
2833   where
2834     target = ImmCLbl lbl
2835
2836 genJump tree
2837   = do
2838         (target, code) <- getSomeReg tree
2839         return (code `snocOL` JMP (AddrRegReg target g0)  `snocOL` NOP)
2840
2841 #endif /* sparc_TARGET_ARCH */
2842
2843 #if powerpc_TARGET_ARCH
2844 genJump (CmmLit (CmmLabel lbl))
2845   = return (unitOL $ JMP lbl)
2846
2847 genJump tree
2848   = do
2849         (target,code) <- getSomeReg tree
2850         return (code `snocOL` MTCTR target `snocOL` BCTR [])
2851 #endif /* powerpc_TARGET_ARCH */
2852
2853
2854 -- -----------------------------------------------------------------------------
2855 --  Unconditional branches
2856
2857 genBranch :: BlockId -> NatM InstrBlock
2858
2859 genBranch = return . toOL . mkBranchInstr
2860
2861 -- -----------------------------------------------------------------------------
2862 --  Conditional jumps
2863
2864 {-
2865 Conditional jumps are always to local labels, so we can use branch
2866 instructions.  We peek at the arguments to decide what kind of
2867 comparison to do.
2868
2869 ALPHA: For comparisons with 0, we're laughing, because we can just do
2870 the desired conditional branch.
2871
2872 I386: First, we have to ensure that the condition
2873 codes are set according to the supplied comparison operation.
2874
2875 SPARC: First, we have to ensure that the condition codes are set
2876 according to the supplied comparison operation.  We generate slightly
2877 different code for floating point comparisons, because a floating
2878 point operation cannot directly precede a @BF@.  We assume the worst
2879 and fill that slot with a @NOP@.
2880
2881 SPARC: Do not fill the delay slots here; you will confuse the register
2882 allocator.
2883 -}
2884
2885
2886 genCondJump
2887     :: BlockId      -- the branch target
2888     -> CmmExpr      -- the condition on which to branch
2889     -> NatM InstrBlock
2890
2891 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2892
2893 #if alpha_TARGET_ARCH
2894
2895 genCondJump id (StPrim op [x, StInt 0])
2896   = getRegister x                           `thenNat` \ register ->
2897     getNewRegNat (registerRep register)
2898                                     `thenNat` \ tmp ->
2899     let
2900         code   = registerCode register tmp
2901         value  = registerName register tmp
2902         pk     = registerRep register
2903         target = ImmCLbl lbl
2904     in
2905     returnSeq code [BI (cmpOp op) value target]
2906   where
2907     cmpOp CharGtOp = GTT
2908     cmpOp CharGeOp = GE
2909     cmpOp CharEqOp = EQQ
2910     cmpOp CharNeOp = NE
2911     cmpOp CharLtOp = LTT
2912     cmpOp CharLeOp = LE
2913     cmpOp IntGtOp = GTT
2914     cmpOp IntGeOp = GE
2915     cmpOp IntEqOp = EQQ
2916     cmpOp IntNeOp = NE
2917     cmpOp IntLtOp = LTT
2918     cmpOp IntLeOp = LE
2919     cmpOp WordGtOp = NE
2920     cmpOp WordGeOp = ALWAYS
2921     cmpOp WordEqOp = EQQ
2922     cmpOp WordNeOp = NE
2923     cmpOp WordLtOp = NEVER
2924     cmpOp WordLeOp = EQQ
2925     cmpOp AddrGtOp = NE
2926     cmpOp AddrGeOp = ALWAYS
2927     cmpOp AddrEqOp = EQQ
2928     cmpOp AddrNeOp = NE
2929     cmpOp AddrLtOp = NEVER
2930     cmpOp AddrLeOp = EQQ
2931
2932 genCondJump lbl (StPrim op [x, StDouble 0.0])
2933   = getRegister x                           `thenNat` \ register ->
2934     getNewRegNat (registerRep register)
2935                                     `thenNat` \ tmp ->
2936     let
2937         code   = registerCode register tmp
2938         value  = registerName register tmp
2939         pk     = registerRep register
2940         target = ImmCLbl lbl
2941     in
2942     return (code . mkSeqInstr (BF (cmpOp op) value target))
2943   where
2944     cmpOp FloatGtOp = GTT
2945     cmpOp FloatGeOp = GE
2946     cmpOp FloatEqOp = EQQ
2947     cmpOp FloatNeOp = NE
2948     cmpOp FloatLtOp = LTT
2949     cmpOp FloatLeOp = LE
2950     cmpOp DoubleGtOp = GTT
2951     cmpOp DoubleGeOp = GE
2952     cmpOp DoubleEqOp = EQQ
2953     cmpOp DoubleNeOp = NE
2954     cmpOp DoubleLtOp = LTT
2955     cmpOp DoubleLeOp = LE
2956
2957 genCondJump lbl (StPrim op [x, y])
2958   | fltCmpOp op
2959   = trivialFCode pr instr x y       `thenNat` \ register ->
2960     getNewRegNat FF64               `thenNat` \ tmp ->
2961     let
2962         code   = registerCode register tmp
2963         result = registerName register tmp
2964         target = ImmCLbl lbl
2965     in
2966     return (code . mkSeqInstr (BF cond result target))
2967   where
2968     pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
2969
2970     fltCmpOp op = case op of
2971         FloatGtOp -> True
2972         FloatGeOp -> True
2973         FloatEqOp -> True
2974         FloatNeOp -> True
2975         FloatLtOp -> True
2976         FloatLeOp -> True
2977         DoubleGtOp -> True
2978         DoubleGeOp -> True
2979         DoubleEqOp -> True
2980         DoubleNeOp -> True
2981         DoubleLtOp -> True
2982         DoubleLeOp -> True
2983         _ -> False
2984     (instr, cond) = case op of
2985         FloatGtOp -> (FCMP TF LE, EQQ)
2986         FloatGeOp -> (FCMP TF LTT, EQQ)
2987         FloatEqOp -> (FCMP TF EQQ, NE)
2988         FloatNeOp -> (FCMP TF EQQ, EQQ)
2989         FloatLtOp -> (FCMP TF LTT, NE)
2990         FloatLeOp -> (FCMP TF LE, NE)
2991         DoubleGtOp -> (FCMP TF LE, EQQ)
2992         DoubleGeOp -> (FCMP TF LTT, EQQ)
2993         DoubleEqOp -> (FCMP TF EQQ, NE)
2994         DoubleNeOp -> (FCMP TF EQQ, EQQ)
2995         DoubleLtOp -> (FCMP TF LTT, NE)
2996         DoubleLeOp -> (FCMP TF LE, NE)
2997
2998 genCondJump lbl (StPrim op [x, y])
2999   = trivialCode instr x y           `thenNat` \ register ->
3000     getNewRegNat IntRep             `thenNat` \ tmp ->
3001     let
3002         code   = registerCode register tmp
3003         result = registerName register tmp
3004         target = ImmCLbl lbl
3005     in
3006     return (code . mkSeqInstr (BI cond result target))
3007   where
3008     (instr, cond) = case op of
3009         CharGtOp -> (CMP LE, EQQ)
3010         CharGeOp -> (CMP LTT, EQQ)
3011         CharEqOp -> (CMP EQQ, NE)
3012         CharNeOp -> (CMP EQQ, EQQ)
3013         CharLtOp -> (CMP LTT, NE)
3014         CharLeOp -> (CMP LE, NE)
3015         IntGtOp -> (CMP LE, EQQ)
3016         IntGeOp -> (CMP LTT, EQQ)
3017         IntEqOp -> (CMP EQQ, NE)
3018         IntNeOp -> (CMP EQQ, EQQ)
3019         IntLtOp -> (CMP LTT, NE)
3020         IntLeOp -> (CMP LE, NE)
3021         WordGtOp -> (CMP ULE, EQQ)
3022         WordGeOp -> (CMP ULT, EQQ)
3023         WordEqOp -> (CMP EQQ, NE)
3024         WordNeOp -> (CMP EQQ, EQQ)
3025         WordLtOp -> (CMP ULT, NE)
3026         WordLeOp -> (CMP ULE, NE)
3027         AddrGtOp -> (CMP ULE, EQQ)
3028         AddrGeOp -> (CMP ULT, EQQ)
3029         AddrEqOp -> (CMP EQQ, NE)
3030         AddrNeOp -> (CMP EQQ, EQQ)
3031         AddrLtOp -> (CMP ULT, NE)
3032         AddrLeOp -> (CMP ULE, NE)
3033
3034 #endif /* alpha_TARGET_ARCH */
3035
3036 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3037
3038 #if i386_TARGET_ARCH
3039
3040 genCondJump id bool = do
3041   CondCode _ cond code <- getCondCode bool
3042   return (code `snocOL` JXX cond id)
3043
3044 #endif
3045
3046 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3047
3048 #if x86_64_TARGET_ARCH
3049
3050 genCondJump id bool = do
3051   CondCode is_float cond cond_code <- getCondCode bool
3052   if not is_float
3053     then
3054         return (cond_code `snocOL` JXX cond id)
3055     else do
3056         lbl <- getBlockIdNat
3057
3058         -- see comment with condFltReg
3059         let code = case cond of
3060                         NE  -> or_unordered
3061                         GU  -> plain_test
3062                         GEU -> plain_test
3063                         _   -> and_ordered
3064
3065             plain_test = unitOL (
3066                   JXX cond id
3067                 )
3068             or_unordered = toOL [
3069                   JXX cond id,
3070                   JXX PARITY id
3071                 ]
3072             and_ordered = toOL [
3073                   JXX PARITY lbl,
3074                   JXX cond id,
3075                   JXX ALWAYS lbl,
3076                   NEWBLOCK lbl
3077                 ]
3078         return (cond_code `appOL` code)
3079
3080 #endif
3081
3082 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3083
3084 #if sparc_TARGET_ARCH
3085
3086 genCondJump bid bool = do
3087   CondCode is_float cond code <- getCondCode bool
3088   return (
3089        code `appOL`
3090        toOL (
3091          if   is_float
3092          then [NOP, BF cond False bid, NOP]
3093          else [BI cond False bid, NOP]
3094        )
3095     )
3096
3097 #endif /* sparc_TARGET_ARCH */
3098
3099
3100 #if powerpc_TARGET_ARCH
3101
3102 genCondJump id bool = do
3103   CondCode is_float cond code <- getCondCode bool
3104   return (code `snocOL` BCC cond id)
3105
3106 #endif /* powerpc_TARGET_ARCH */
3107
3108
3109 -- -----------------------------------------------------------------------------
3110 --  Generating C calls
3111
3112 -- Now the biggest nightmare---calls.  Most of the nastiness is buried in
3113 -- @get_arg@, which moves the arguments to the correct registers/stack
3114 -- locations.  Apart from that, the code is easy.
3115 --
3116 -- (If applicable) Do not fill the delay slots here; you will confuse the
3117 -- register allocator.
3118
3119 genCCall
3120     :: CmmCallTarget            -- function to call
3121     -> HintedCmmFormals         -- where to put the result
3122     -> HintedCmmActuals         -- arguments (of mixed type)
3123     -> NatM InstrBlock
3124
3125 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3126
3127 #if alpha_TARGET_ARCH
3128
3129 ccallResultRegs =
3130
3131 genCCall fn cconv result_regs args
3132   = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args
3133                           `thenNat` \ ((unused,_), argCode) ->
3134     let
3135         nRegs = length allArgRegs - length unused
3136         code = asmSeqThen (map ($ []) argCode)
3137     in
3138         returnSeq code [
3139             LDA pv (AddrImm (ImmLab (ptext fn))),
3140             JSR ra (AddrReg pv) nRegs,
3141             LDGP gp (AddrReg ra)]
3142   where
3143     ------------------------
3144     {-  Try to get a value into a specific register (or registers) for
3145         a call.  The first 6 arguments go into the appropriate
3146         argument register (separate registers for integer and floating
3147         point arguments, but used in lock-step), and the remaining
3148         arguments are dumped to the stack, beginning at 0(sp).  Our
3149         first argument is a pair of the list of remaining argument
3150         registers to be assigned for this call and the next stack
3151         offset to use for overflowing arguments.  This way,
3152         @get_Arg@ can be applied to all of a call's arguments using
3153         @mapAccumLNat@.
3154     -}
3155     get_arg
3156         :: ([(Reg,Reg)], Int)   -- Argument registers and stack offset (accumulator)
3157         -> StixTree             -- Current argument
3158         -> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code
3159
3160     -- We have to use up all of our argument registers first...
3161
3162     get_arg ((iDst,fDst):dsts, offset) arg
3163       = getRegister arg                     `thenNat` \ register ->
3164         let
3165             reg  = if isFloatType pk then fDst else iDst
3166             code = registerCode register reg
3167             src  = registerName register reg
3168             pk   = registerRep register
3169         in
3170         return (
3171             if isFloatType pk then
3172                 ((dsts, offset), if isFixed register then
3173                     code . mkSeqInstr (FMOV src fDst)
3174                     else code)
3175             else
3176                 ((dsts, offset), if isFixed register then
3177                     code . mkSeqInstr (OR src (RIReg src) iDst)
3178                     else code))
3179
3180     -- Once we have run out of argument registers, we move to the
3181     -- stack...
3182
3183     get_arg ([], offset) arg
3184       = getRegister arg                 `thenNat` \ register ->
3185         getNewRegNat (registerRep register)
3186                                         `thenNat` \ tmp ->
3187         let
3188             code = registerCode register tmp
3189             src  = registerName register tmp
3190             pk   = registerRep register
3191             sz   = primRepToSize pk
3192         in
3193         return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset)))
3194
3195 #endif /* alpha_TARGET_ARCH */
3196
3197 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3198
3199 #if i386_TARGET_ARCH
3200
3201 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
3202         -- write barrier compiles to no code on x86/x86-64;
3203         -- we keep it this long in order to prevent earlier optimisations.
3204
3205 -- we only cope with a single result for foreign calls
3206 genCCall (CmmPrim op) [CmmHinted r _] args = do
3207   l1 <- getNewLabelNat
3208   l2 <- getNewLabelNat
3209   case op of
3210         MO_F32_Sqrt -> actuallyInlineFloatOp GSQRT FF32 args
3211         MO_F64_Sqrt -> actuallyInlineFloatOp GSQRT FF64 args
3212
3213         MO_F32_Sin  -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF32 args
3214         MO_F64_Sin  -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF64 args
3215
3216         MO_F32_Cos  -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF32 args
3217         MO_F64_Cos  -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF64 args
3218
3219         MO_F32_Tan  -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF32 args
3220         MO_F64_Tan  -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF64 args
3221
3222         other_op    -> outOfLineFloatOp op r args
3223  where
3224   actuallyInlineFloatOp instr size [CmmHinted x _]
3225         = do res <- trivialUFCode size (instr size) x
3226              any <- anyReg res
3227              return (any (getRegisterReg (CmmLocal r)))
3228
3229 genCCall target dest_regs args = do
3230     let
3231         sizes               = map (arg_size . cmmExprType . hintlessCmm) (reverse args)
3232 #if !darwin_TARGET_OS
3233         tot_arg_size        = sum sizes
3234 #else
3235         raw_arg_size        = sum sizes
3236         tot_arg_size        = roundTo 16 raw_arg_size
3237         arg_pad_size        = tot_arg_size - raw_arg_size
3238     delta0 <- getDeltaNat
3239     setDeltaNat (delta0 - arg_pad_size)
3240 #endif
3241
3242     push_codes <- mapM push_arg (reverse args)
3243     delta <- getDeltaNat
3244
3245     -- in
3246     -- deal with static vs dynamic call targets
3247     (callinsns,cconv) <-
3248       case target of
3249         -- CmmPrim -> ...
3250         CmmCallee (CmmLit (CmmLabel lbl)) conv
3251            -> -- ToDo: stdcall arg sizes
3252               return (unitOL (CALL (Left fn_imm) []), conv)
3253            where fn_imm = ImmCLbl lbl
3254         CmmCallee expr conv
3255            -> do { (dyn_c, dyn_r) <- get_op expr
3256                  ; ASSERT( isWord32 (cmmExprType expr) )
3257                    return (dyn_c `snocOL` CALL (Right dyn_r) [], conv) }
3258
3259     let push_code
3260 #if darwin_TARGET_OS
3261             | arg_pad_size /= 0
3262             = toOL [SUB II32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
3263                     DELTA (delta0 - arg_pad_size)]
3264               `appOL` concatOL push_codes
3265             | otherwise
3266 #endif
3267             = concatOL push_codes
3268         call = callinsns `appOL`
3269                toOL (
3270                         -- Deallocate parameters after call for ccall;
3271                         -- but not for stdcall (callee does it)
3272                   (if cconv == StdCallConv || tot_arg_size==0 then [] else
3273                    [ADD II32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
3274                   ++
3275                   [DELTA (delta + tot_arg_size)]
3276                )
3277     -- in
3278     setDeltaNat (delta + tot_arg_size)
3279
3280     let
3281         -- assign the results, if necessary
3282         assign_code []     = nilOL
3283         assign_code [CmmHinted dest _hint]
3284           | isFloatType ty = unitOL (GMOV fake0 r_dest)
3285           | isWord64 ty    = toOL [MOV II32 (OpReg eax) (OpReg r_dest),
3286                                     MOV II32 (OpReg edx) (OpReg r_dest_hi)]
3287           | otherwise      = unitOL (MOV (intSize w) (OpReg eax) (OpReg r_dest))
3288           where
3289                 ty = localRegType dest
3290                 w  = typeWidth ty
3291                 r_dest_hi = getHiVRegFromLo r_dest
3292                 r_dest    = getRegisterReg (CmmLocal dest)
3293         assign_code many = panic "genCCall.assign_code many"
3294
3295     return (push_code `appOL`
3296             call `appOL`
3297             assign_code dest_regs)
3298
3299   where
3300     arg_size :: CmmType -> Int  -- Width in bytes
3301     arg_size ty = widthInBytes (typeWidth ty)
3302
3303     roundTo a x | x `mod` a == 0 = x
3304                 | otherwise = x + a - (x `mod` a)
3305
3306
3307     push_arg :: HintedCmmActual {-current argument-}
3308                     -> NatM InstrBlock  -- code
3309
3310     push_arg (CmmHinted arg _hint) -- we don't need the hints on x86
3311       | isWord64 arg_ty = do
3312         ChildCode64 code r_lo <- iselExpr64 arg
3313         delta <- getDeltaNat
3314         setDeltaNat (delta - 8)
3315         let
3316             r_hi = getHiVRegFromLo r_lo
3317         -- in
3318         return (       code `appOL`
3319                        toOL [PUSH II32 (OpReg r_hi), DELTA (delta - 4),
3320                              PUSH II32 (OpReg r_lo), DELTA (delta - 8),
3321                              DELTA (delta-8)]
3322             )
3323
3324       | otherwise = do
3325         (code, reg) <- get_op arg
3326         delta <- getDeltaNat
3327         let size = arg_size arg_ty      -- Byte size
3328         setDeltaNat (delta-size)
3329         if (isFloatType arg_ty)
3330            then return (code `appOL`
3331                         toOL [SUB II32 (OpImm (ImmInt size)) (OpReg esp),
3332                               DELTA (delta-size),
3333                               GST (floatSize (typeWidth arg_ty))
3334                                   reg (AddrBaseIndex (EABaseReg esp)
3335                                                         EAIndexNone
3336                                                         (ImmInt 0))]
3337                        )
3338            else return (code `snocOL`
3339                         PUSH II32 (OpReg reg) `snocOL`
3340                         DELTA (delta-size)
3341                        )
3342       where
3343          arg_ty = cmmExprType arg
3344
3345     ------------
3346     get_op :: CmmExpr -> NatM (InstrBlock, Reg) -- code, reg
3347     get_op op = do
3348         (reg,code) <- getSomeReg op
3349         return (code, reg)
3350
3351 #endif /* i386_TARGET_ARCH */
3352
3353 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3354
3355 outOfLineFloatOp :: CallishMachOp -> CmmFormal -> HintedCmmActuals
3356   -> NatM InstrBlock
3357 outOfLineFloatOp mop res args
3358   = do
3359       dflags <- getDynFlagsNat
3360       targetExpr <- cmmMakeDynamicReference dflags addImportNat CallReference lbl
3361       let target = CmmCallee targetExpr CCallConv
3362
3363       if isFloat64 (localRegType res)
3364         then
3365           stmtToInstrs (CmmCall target [CmmHinted res NoHint] args CmmUnsafe CmmMayReturn)
3366         else do
3367           uq <- getUniqueNat
3368           let
3369             tmp = LocalReg uq f64
3370           -- in
3371           code1 <- stmtToInstrs (CmmCall target [CmmHinted tmp NoHint] args CmmUnsafe CmmMayReturn)
3372           code2 <- stmtToInstrs (CmmAssign (CmmLocal res) (CmmReg (CmmLocal tmp)))
3373           return (code1 `appOL` code2)
3374   where
3375         lbl = mkForeignLabel fn Nothing False
3376
3377         fn = case mop of
3378               MO_F32_Sqrt  -> fsLit "sqrtf"
3379               MO_F32_Sin   -> fsLit "sinf"
3380               MO_F32_Cos   -> fsLit "cosf"
3381               MO_F32_Tan   -> fsLit "tanf"
3382               MO_F32_Exp   -> fsLit "expf"
3383               MO_F32_Log   -> fsLit "logf"
3384
3385               MO_F32_Asin  -> fsLit "asinf"
3386               MO_F32_Acos  -> fsLit "acosf"
3387               MO_F32_Atan  -> fsLit "atanf"
3388
3389               MO_F32_Sinh  -> fsLit "sinhf"
3390               MO_F32_Cosh  -> fsLit "coshf"
3391               MO_F32_Tanh  -> fsLit "tanhf"
3392               MO_F32_Pwr   -> fsLit "powf"
3393
3394               MO_F64_Sqrt  -> fsLit "sqrt"
3395               MO_F64_Sin   -> fsLit "sin"
3396               MO_F64_Cos   -> fsLit "cos"
3397               MO_F64_Tan   -> fsLit "tan"
3398               MO_F64_Exp   -> fsLit "exp"
3399               MO_F64_Log   -> fsLit "log"
3400
3401               MO_F64_Asin  -> fsLit "asin"
3402               MO_F64_Acos  -> fsLit "acos"
3403               MO_F64_Atan  -> fsLit "atan"
3404
3405               MO_F64_Sinh  -> fsLit "sinh"
3406               MO_F64_Cosh  -> fsLit "cosh"
3407               MO_F64_Tanh  -> fsLit "tanh"
3408               MO_F64_Pwr   -> fsLit "pow"
3409
3410 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
3411
3412 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3413
3414 #if x86_64_TARGET_ARCH
3415
3416 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
3417         -- write barrier compiles to no code on x86/x86-64;
3418         -- we keep it this long in order to prevent earlier optimisations.
3419
3420
3421 genCCall (CmmPrim op) [CmmHinted r _] args =
3422   outOfLineFloatOp op r args
3423
3424 genCCall target dest_regs args = do
3425
3426         -- load up the register arguments
3427     (stack_args, aregs, fregs, load_args_code)
3428          <- load_args args allArgRegs allFPArgRegs nilOL
3429
3430     let
3431         fp_regs_used  = reverse (drop (length fregs) (reverse allFPArgRegs))
3432         int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
3433         arg_regs = [eax] ++ int_regs_used ++ fp_regs_used
3434                 -- for annotating the call instruction with
3435
3436         sse_regs = length fp_regs_used
3437
3438         tot_arg_size = arg_size * length stack_args
3439
3440         -- On entry to the called function, %rsp should be aligned
3441         -- on a 16-byte boundary +8 (i.e. the first stack arg after
3442         -- the return address is 16-byte aligned).  In STG land
3443         -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
3444         -- need to make sure we push a multiple of 16-bytes of args,
3445         -- plus the return address, to get the correct alignment.
3446         -- Urg, this is hard.  We need to feed the delta back into
3447         -- the arg pushing code.
3448     (real_size, adjust_rsp) <-
3449         if tot_arg_size `rem` 16 == 0
3450             then return (tot_arg_size, nilOL)
3451             else do -- we need to adjust...
3452                 delta <- getDeltaNat
3453                 setDeltaNat (delta-8)
3454                 return (tot_arg_size+8, toOL [
3455                                 SUB II64 (OpImm (ImmInt 8)) (OpReg rsp),
3456                                 DELTA (delta-8)
3457                         ])
3458
3459         -- push the stack args, right to left
3460     push_code <- push_args (reverse stack_args) nilOL
3461     delta <- getDeltaNat
3462
3463     -- deal with static vs dynamic call targets
3464     (callinsns,cconv) <-
3465       case target of
3466         -- CmmPrim -> ...
3467         CmmCallee (CmmLit (CmmLabel lbl)) conv
3468            -> -- ToDo: stdcall arg sizes
3469               return (unitOL (CALL (Left fn_imm) arg_regs), conv)
3470            where fn_imm = ImmCLbl lbl
3471         CmmCallee expr conv
3472            -> do (dyn_r, dyn_c) <- getSomeReg expr
3473                  return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
3474
3475     let
3476         -- The x86_64 ABI requires us to set %al to the number of SSE
3477         -- registers that contain arguments, if the called routine
3478         -- is a varargs function.  We don't know whether it's a
3479         -- varargs function or not, so we have to assume it is.
3480         --
3481         -- It's not safe to omit this assignment, even if the number
3482         -- of SSE regs in use is zero.  If %al is larger than 8
3483         -- on entry to a varargs function, seg faults ensue.
3484         assign_eax n = unitOL (MOV II32 (OpImm (ImmInt n)) (OpReg eax))
3485
3486     let call = callinsns `appOL`
3487                toOL (
3488                         -- Deallocate parameters after call for ccall;
3489                         -- but not for stdcall (callee does it)
3490                   (if cconv == StdCallConv || real_size==0 then [] else
3491                    [ADD (intSize wordWidth) (OpImm (ImmInt real_size)) (OpReg esp)])
3492                   ++
3493                   [DELTA (delta + real_size)]
3494                )
3495     -- in
3496     setDeltaNat (delta + real_size)
3497
3498     let
3499         -- assign the results, if necessary
3500         assign_code []     = nilOL
3501         assign_code [CmmHinted dest _hint] =
3502           case typeWidth rep of
3503                 W32 | isFloatType rep -> unitOL (MOV (floatSize W32) (OpReg xmm0) (OpReg r_dest))
3504                 W64 | isFloatType rep -> unitOL (MOV (floatSize W32) (OpReg xmm0) (OpReg r_dest))
3505                 _ -> unitOL (MOV (cmmTypeSize rep) (OpReg rax) (OpReg r_dest))
3506           where
3507                 rep = localRegType dest
3508                 r_dest = getRegisterReg (CmmLocal dest)
3509         assign_code many = panic "genCCall.assign_code many"
3510
3511     return (load_args_code      `appOL`
3512             adjust_rsp          `appOL`
3513             push_code           `appOL`
3514             assign_eax sse_regs `appOL`
3515             call                `appOL`
3516             assign_code dest_regs)
3517
3518   where
3519     arg_size = 8 -- always, at the mo
3520
3521     load_args :: [CmmHinted CmmExpr]
3522               -> [Reg]                  -- int regs avail for args
3523               -> [Reg]                  -- FP regs avail for args
3524               -> InstrBlock
3525               -> NatM ([CmmHinted CmmExpr],[Reg],[Reg],InstrBlock)
3526     load_args args [] [] code     =  return (args, [], [], code)
3527         -- no more regs to use
3528     load_args [] aregs fregs code =  return ([], aregs, fregs, code)
3529         -- no more args to push
3530     load_args ((CmmHinted arg hint) : rest) aregs fregs code
3531         | isFloatType arg_rep =
3532         case fregs of
3533           [] -> push_this_arg
3534           (r:rs) -> do
3535              arg_code <- getAnyReg arg
3536              load_args rest aregs rs (code `appOL` arg_code r)
3537         | otherwise =
3538         case aregs of
3539           [] -> push_this_arg
3540           (r:rs) -> do
3541              arg_code <- getAnyReg arg
3542              load_args rest rs fregs (code `appOL` arg_code r)
3543         where
3544           arg_rep = cmmExprType arg
3545
3546           push_this_arg = do
3547             (args',ars,frs,code') <- load_args rest aregs fregs code
3548             return ((CmmHinted arg hint):args', ars, frs, code')
3549
3550     push_args [] code = return code
3551     push_args ((CmmHinted arg hint):rest) code
3552        | isFloatType arg_rep = do
3553          (arg_reg, arg_code) <- getSomeReg arg
3554          delta <- getDeltaNat
3555          setDeltaNat (delta-arg_size)
3556          let code' = code `appOL` arg_code `appOL` toOL [
3557                         SUB (intSize wordWidth) (OpImm (ImmInt arg_size)) (OpReg rsp) ,
3558                         DELTA (delta-arg_size),
3559                         MOV (floatSize width) (OpReg arg_reg) (OpAddr  (spRel 0))]
3560          push_args rest code'
3561
3562        | otherwise = do
3563        -- we only ever generate word-sized function arguments.  Promotion
3564        -- has already happened: our Int8# type is kept sign-extended
3565        -- in an Int#, for example.
3566          ASSERT(width == W64) return ()
3567          (arg_op, arg_code) <- getOperand arg
3568          delta <- getDeltaNat
3569          setDeltaNat (delta-arg_size)
3570          let code' = code `appOL` toOL [PUSH II64 arg_op,
3571                                         DELTA (delta-arg_size)]
3572          push_args rest code'
3573         where
3574           arg_rep = cmmExprType arg
3575           width = typeWidth arg_rep
3576 #endif
3577
3578 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3579
3580 #if sparc_TARGET_ARCH
3581 {-
3582    The SPARC calling convention is an absolute
3583    nightmare.  The first 6x32 bits of arguments are mapped into
3584    %o0 through %o5, and the remaining arguments are dumped to the
3585    stack, beginning at [%sp+92].  (Note that %o6 == %sp.)
3586
3587    If we have to put args on the stack, move %o6==%sp down by
3588    the number of words to go on the stack, to ensure there's enough space.
3589
3590    According to Fraser and Hanson's lcc book, page 478, fig 17.2,
3591    16 words above the stack pointer is a word for the address of
3592    a structure return value.  I use this as a temporary location
3593    for moving values from float to int regs.  Certainly it isn't
3594    safe to put anything in the 16 words starting at %sp, since
3595    this area can get trashed at any time due to window overflows
3596    caused by signal handlers.
3597
3598    A final complication (if the above isn't enough) is that
3599    we can't blithely calculate the arguments one by one into
3600    %o0 .. %o5.  Consider the following nested calls:
3601
3602        fff a (fff b c)
3603
3604    Naive code moves a into %o0, and (fff b c) into %o1.  Unfortunately
3605    the inner call will itself use %o0, which trashes the value put there
3606    in preparation for the outer call.  Upshot: we need to calculate the
3607    args into temporary regs, and move those to arg regs or onto the
3608    stack only immediately prior to the call proper.  Sigh.
3609 -}
3610
3611 genCCall target dest_regs argsAndHints = do
3612     let
3613         args = map hintlessCmm argsAndHints
3614     argcode_and_vregs <- mapM arg_to_int_vregs args
3615     let
3616         (argcodes, vregss) = unzip argcode_and_vregs
3617         n_argRegs          = length allArgRegs
3618         n_argRegs_used     = min (length vregs) n_argRegs
3619         vregs              = concat vregss
3620     -- deal with static vs dynamic call targets
3621     callinsns <- (case target of
3622         CmmCallee (CmmLit (CmmLabel lbl)) conv -> do
3623                 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3624         CmmCallee expr conv -> do
3625                 (dyn_c, [dyn_r]) <- arg_to_int_vregs expr
3626                 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3627         CmmPrim mop -> do
3628                   (res, reduce) <- outOfLineFloatOp mop
3629                   lblOrMopExpr <- case res of
3630                        Left lbl -> do
3631                             return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3632                        Right mopExpr -> do
3633                             (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
3634                             return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3635                   if reduce then panic "genCCall(sparc): can not reduce" else return lblOrMopExpr
3636
3637       )
3638     let
3639         argcode = concatOL argcodes
3640         (move_sp_down, move_sp_up)
3641            = let diff = length vregs - n_argRegs
3642                  nn   = if odd diff then diff + 1 else diff -- keep 8-byte alignment
3643              in  if   nn <= 0
3644                  then (nilOL, nilOL)
3645                  else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
3646
3647         transfer_code
3648            = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
3649
3650         -- assign the results, if necessary
3651         assign_code []  = nilOL
3652
3653         assign_code [CmmHinted dest _hint]
3654          = let  rep     = localRegType dest
3655                 width   = typeWidth rep
3656                 r_dest  = getRegisterReg (CmmLocal dest)
3657
3658                 result
3659                         | isFloatType rep
3660                         , W32   <- width
3661                         = unitOL $ FMOV FF32 (RealReg $ fReg 0) r_dest
3662
3663                         | isFloatType rep
3664                         , W64   <- width
3665                         = unitOL $ FMOV FF64 (RealReg $ fReg 0) r_dest
3666
3667                         | not $ isFloatType rep
3668                         , W32   <- width
3669                         = unitOL $ mkRegRegMoveInstr (RealReg $ oReg 0) r_dest
3670
3671                         | not $ isFloatType rep
3672                         , W64           <- width
3673                         , r_dest_hi     <- getHiVRegFromLo r_dest
3674                         = toOL  [ mkRegRegMoveInstr (RealReg $ oReg 0) r_dest_hi
3675                                 , mkRegRegMoveInstr (RealReg $ oReg 1) r_dest]
3676
3677            in   result
3678
3679     return (argcode       `appOL`
3680             move_sp_down  `appOL`
3681             transfer_code `appOL`
3682             callinsns     `appOL`
3683             unitOL NOP    `appOL`
3684             move_sp_up    `appOL`
3685             assign_code dest_regs)
3686   where
3687      -- move args from the integer vregs into which they have been
3688      -- marshalled, into %o0 .. %o5, and the rest onto the stack.
3689      move_final :: [Reg] -> [Reg] -> Int -> [Instr]
3690
3691      move_final [] _ offset          -- all args done
3692         = []
3693
3694      move_final (v:vs) [] offset     -- out of aregs; move to stack
3695         = ST II32 v (spRel offset)
3696           : move_final vs [] (offset+1)
3697
3698      move_final (v:vs) (a:az) offset -- move into an arg (%o[0..5]) reg
3699         = OR False g0 (RIReg v) a
3700           : move_final vs az offset
3701
3702      -- generate code to calculate an argument, and move it into one
3703      -- or two integer vregs.
3704      arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
3705      arg_to_int_vregs arg
3706         | isWord64 (cmmExprType arg)
3707         = do
3708           (ChildCode64 code r_lo) <- iselExpr64 arg
3709           let
3710               r_hi = getHiVRegFromLo r_lo
3711           return (code, [r_hi, r_lo])
3712         | otherwise
3713         = do
3714           (src, code) <- getSomeReg arg
3715           tmp <- getNewRegNat (cmmTypeSize $ cmmExprType arg)
3716           let
3717               pk        = cmmExprType arg
3718               Just f0_high = fPair f0
3719           case cmmTypeSize pk of
3720              FF64 -> do
3721                       v1 <- getNewRegNat II32
3722                       v2 <- getNewRegNat II32
3723                       return (
3724                         code                          `snocOL`
3725                         FMOV FF64 src f0                `snocOL`
3726                         ST   FF32  f0 (spRel 16)         `snocOL`
3727                         LD   II32  (spRel 16) v1         `snocOL`
3728                         ST   FF32  f0_high (spRel 16) `snocOL`
3729                         LD   II32  (spRel 16) v2
3730                         ,
3731                         [v1,v2]
3732                        )
3733              FF32 -> do
3734                       v1 <- getNewRegNat II32
3735                       return (
3736                         code                    `snocOL`
3737                         ST   FF32  src (spRel 16)  `snocOL`
3738                         LD   II32  (spRel 16) v1
3739                         ,
3740                         [v1]
3741                        )
3742              other -> do
3743                         v1 <- getNewRegNat II32
3744                         return (
3745                           code `snocOL` OR False g0 (RIReg src) v1
3746                           ,
3747                           [v1]
3748                          )
3749 outOfLineFloatOp mop =
3750     do
3751       dflags <- getDynFlagsNat
3752       mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference $
3753                   mkForeignLabel functionName Nothing True
3754       let mopLabelOrExpr = case mopExpr of
3755                         CmmLit (CmmLabel lbl) -> Left lbl
3756                         _ -> Right mopExpr
3757       return (mopLabelOrExpr, reduce)
3758             where
3759                 (reduce, functionName) = case mop of
3760                   MO_F32_Exp    -> (True,  fsLit "exp")
3761                   MO_F32_Log    -> (True,  fsLit "log")
3762                   MO_F32_Sqrt   -> (True,  fsLit "sqrt")
3763
3764                   MO_F32_Sin    -> (True,  fsLit "sin")
3765                   MO_F32_Cos    -> (True,  fsLit "cos")
3766                   MO_F32_Tan    -> (True,  fsLit "tan")
3767
3768                   MO_F32_Asin   -> (True,  fsLit "asin")
3769                   MO_F32_Acos   -> (True,  fsLit "acos")
3770                   MO_F32_Atan   -> (True,  fsLit "atan")
3771
3772                   MO_F32_Sinh   -> (True,  fsLit "sinh")
3773                   MO_F32_Cosh   -> (True,  fsLit "cosh")
3774                   MO_F32_Tanh   -> (True,  fsLit "tanh")
3775
3776                   MO_F64_Exp    -> (False, fsLit "exp")
3777                   MO_F64_Log    -> (False, fsLit "log")
3778                   MO_F64_Sqrt   -> (False, fsLit "sqrt")
3779
3780                   MO_F64_Sin    -> (False, fsLit "sin")
3781                   MO_F64_Cos    -> (False, fsLit "cos")
3782                   MO_F64_Tan    -> (False, fsLit "tan")
3783
3784                   MO_F64_Asin   -> (False, fsLit "asin")
3785                   MO_F64_Acos   -> (False, fsLit "acos")
3786                   MO_F64_Atan   -> (False, fsLit "atan")
3787
3788                   MO_F64_Sinh   -> (False, fsLit "sinh")
3789                   MO_F64_Cosh   -> (False, fsLit "cosh")
3790                   MO_F64_Tanh   -> (False, fsLit "tanh")
3791
3792                   other -> pprPanic "outOfLineFloatOp(sparc) "
3793                                 (pprCallishMachOp mop)
3794
3795 #endif /* sparc_TARGET_ARCH */
3796
3797 #if powerpc_TARGET_ARCH
3798
3799 #if darwin_TARGET_OS || linux_TARGET_OS
3800 {-
3801     The PowerPC calling convention for Darwin/Mac OS X
3802     is described in Apple's document
3803     "Inside Mac OS X - Mach-O Runtime Architecture".
3804
3805     PowerPC Linux uses the System V Release 4 Calling Convention
3806     for PowerPC. It is described in the
3807     "System V Application Binary Interface PowerPC Processor Supplement".
3808
3809     Both conventions are similar:
3810     Parameters may be passed in general-purpose registers starting at r3, in
3811     floating point registers starting at f1, or on the stack.
3812
3813     But there are substantial differences:
3814     * The number of registers used for parameter passing and the exact set of
3815       nonvolatile registers differs (see MachRegs.lhs).
3816     * On Darwin, stack space is always reserved for parameters, even if they are
3817       passed in registers. The called routine may choose to save parameters from
3818       registers to the corresponding space on the stack.
3819     * On Darwin, a corresponding amount of GPRs is skipped when a floating point
3820       parameter is passed in an FPR.
3821     * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
3822       starting with an odd-numbered GPR. It may skip a GPR to achieve this.
3823       Darwin just treats an I64 like two separate II32s (high word first).
3824     * I64 and FF64 arguments are 8-byte aligned on the stack for SysV, but only
3825       4-byte aligned like everything else on Darwin.
3826     * The SysV spec claims that FF32 is represented as FF64 on the stack. GCC on
3827       PowerPC Linux does not agree, so neither do we.
3828
3829     According to both conventions, The parameter area should be part of the
3830     caller's stack frame, allocated in the caller's prologue code (large enough
3831     to hold the parameter lists for all called routines). The NCG already
3832     uses the stack for register spilling, leaving 64 bytes free at the top.
3833     If we need a larger parameter area than that, we just allocate a new stack
3834     frame just before ccalling.
3835 -}
3836
3837
3838 genCCall (CmmPrim MO_WriteBarrier) _ _
3839  = return $ unitOL LWSYNC
3840
3841 genCCall target dest_regs argsAndHints
3842   = ASSERT (not $ any (`elem` [II8,II16]) $ map cmmTypeSize argReps)
3843         -- we rely on argument promotion in the codeGen
3844     do
3845         (finalStack,passArgumentsCode,usedRegs) <- passArguments
3846                                                         (zip args argReps)
3847                                                         allArgRegs allFPArgRegs
3848                                                         initialStackOffset
3849                                                         (toOL []) []
3850
3851         (labelOrExpr, reduceToFF32) <- case target of
3852             CmmCallee (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
3853             CmmCallee expr conv -> return  (Right expr, False)
3854             CmmPrim mop -> outOfLineFloatOp mop
3855
3856         let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
3857             codeAfter = move_sp_up finalStack `appOL` moveResult reduceToFF32
3858
3859         case labelOrExpr of
3860             Left lbl -> do
3861                 return (         codeBefore
3862                         `snocOL` BL lbl usedRegs
3863                         `appOL`  codeAfter)
3864             Right dyn -> do
3865                 (dynReg, dynCode) <- getSomeReg dyn
3866                 return (         dynCode
3867                         `snocOL` MTCTR dynReg
3868                         `appOL`  codeBefore
3869                         `snocOL` BCTRL usedRegs
3870                         `appOL`  codeAfter)
3871     where
3872 #if darwin_TARGET_OS
3873         initialStackOffset = 24
3874             -- size of linkage area + size of arguments, in bytes
3875         stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
3876                                  map (widthInBytes . typeWidth) argReps
3877 #elif linux_TARGET_OS
3878         initialStackOffset = 8
3879         stackDelta finalStack = roundTo 16 finalStack
3880 #endif
3881         args = map hintlessCmm argsAndHints
3882         argReps = map cmmExprType args
3883
3884         roundTo a x | x `mod` a == 0 = x
3885                     | otherwise = x + a - (x `mod` a)
3886
3887         move_sp_down finalStack
3888                | delta > 64 =
3889                         toOL [STU II32 sp (AddrRegImm sp (ImmInt (-delta))),
3890                               DELTA (-delta)]
3891                | otherwise = nilOL
3892                where delta = stackDelta finalStack
3893         move_sp_up finalStack
3894                | delta > 64 =
3895                         toOL [ADD sp sp (RIImm (ImmInt delta)),
3896                               DELTA 0]
3897                | otherwise = nilOL
3898                where delta = stackDelta finalStack
3899
3900
3901         passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
3902         passArguments ((arg,arg_ty):args) gprs fprs stackOffset
3903                accumCode accumUsed | isWord64 arg_ty =
3904             do
3905                 ChildCode64 code vr_lo <- iselExpr64 arg
3906                 let vr_hi = getHiVRegFromLo vr_lo
3907
3908 #if darwin_TARGET_OS
3909                 passArguments args
3910                               (drop 2 gprs)
3911                               fprs
3912                               (stackOffset+8)
3913                               (accumCode `appOL` code
3914                                     `snocOL` storeWord vr_hi gprs stackOffset
3915                                     `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
3916                               ((take 2 gprs) ++ accumUsed)
3917             where
3918                 storeWord vr (gpr:_) offset = MR gpr vr
3919                 storeWord vr [] offset = ST II32 vr (AddrRegImm sp (ImmInt offset))
3920
3921 #elif linux_TARGET_OS
3922                 let stackOffset' = roundTo 8 stackOffset
3923                     stackCode = accumCode `appOL` code
3924                         `snocOL` ST II32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
3925                         `snocOL` ST II32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
3926                     regCode hireg loreg =
3927                         accumCode `appOL` code
3928                             `snocOL` MR hireg vr_hi
3929                             `snocOL` MR loreg vr_lo
3930
3931                 case gprs of
3932                     hireg : loreg : regs | even (length gprs) ->
3933                         passArguments args regs fprs stackOffset
3934                                       (regCode hireg loreg) (hireg : loreg : accumUsed)
3935                     _skipped : hireg : loreg : regs ->
3936                         passArguments args regs fprs stackOffset
3937                                       (regCode hireg loreg) (hireg : loreg : accumUsed)
3938                     _ -> -- only one or no regs left
3939                         passArguments args [] fprs (stackOffset'+8)
3940                                       stackCode accumUsed
3941 #endif
3942
3943         passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
3944             | reg : _ <- regs = do
3945                 register <- getRegister arg
3946                 let code = case register of
3947                             Fixed _ freg fcode -> fcode `snocOL` MR reg freg
3948                             Any _ acode -> acode reg
3949                 passArguments args
3950                               (drop nGprs gprs)
3951                               (drop nFprs fprs)
3952 #if darwin_TARGET_OS
3953         -- The Darwin ABI requires that we reserve stack slots for register parameters
3954                               (stackOffset + stackBytes)
3955 #elif linux_TARGET_OS
3956         -- ... the SysV ABI doesn't.
3957                               stackOffset
3958 #endif
3959                               (accumCode `appOL` code)
3960                               (reg : accumUsed)
3961             | otherwise = do
3962                 (vr, code) <- getSomeReg arg
3963                 passArguments args
3964                               (drop nGprs gprs)
3965                               (drop nFprs fprs)
3966                               (stackOffset' + stackBytes)
3967                               (accumCode `appOL` code `snocOL` ST (cmmTypeSize rep) vr stackSlot)
3968                               accumUsed
3969             where
3970 #if darwin_TARGET_OS
3971         -- stackOffset is at least 4-byte aligned
3972         -- The Darwin ABI is happy with that.
3973                 stackOffset' = stackOffset
3974 #else
3975         -- ... the SysV ABI requires 8-byte alignment for doubles.
3976                 stackOffset' | isFloatType rep && typeWidth rep == W64 =
3977                                  roundTo 8 stackOffset
3978                              | otherwise  =           stackOffset
3979 #endif
3980                 stackSlot = AddrRegImm sp (ImmInt stackOffset')
3981                 (nGprs, nFprs, stackBytes, regs) = case cmmTypeSize rep of
3982                     II32 -> (1, 0, 4, gprs)
3983 #if darwin_TARGET_OS
3984         -- The Darwin ABI requires that we skip a corresponding number of GPRs when
3985         -- we use the FPRs.
3986                     FF32 -> (1, 1, 4, fprs)
3987                     FF64 -> (2, 1, 8, fprs)
3988 #elif linux_TARGET_OS
3989         -- ... the SysV ABI doesn't.
3990                     FF32 -> (0, 1, 4, fprs)
3991                     FF64 -> (0, 1, 8, fprs)
3992 #endif
3993
3994         moveResult reduceToFF32 =
3995             case dest_regs of
3996                 [] -> nilOL
3997                 [CmmHinted dest _hint]
3998                     | reduceToFF32 && isFloat32 rep   -> unitOL (FRSP r_dest f1)
3999                     | isFloat32 rep || isFloat64 rep -> unitOL (MR r_dest f1)
4000                     | isWord64 rep -> toOL [MR (getHiVRegFromLo r_dest) r3,
4001                                           MR r_dest r4]
4002                     | otherwise -> unitOL (MR r_dest r3)
4003                     where rep = cmmRegType (CmmLocal dest)
4004                           r_dest = getRegisterReg (CmmLocal dest)
4005
4006         outOfLineFloatOp mop =
4007             do
4008                 dflags <- getDynFlagsNat
4009                 mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference $
4010                               mkForeignLabel functionName Nothing True
4011                 let mopLabelOrExpr = case mopExpr of
4012                         CmmLit (CmmLabel lbl) -> Left lbl
4013                         _ -> Right mopExpr
4014                 return (mopLabelOrExpr, reduce)
4015             where
4016                 (functionName, reduce) = case mop of
4017                     MO_F32_Exp   -> (fsLit "exp", True)
4018                     MO_F32_Log   -> (fsLit "log", True)
4019                     MO_F32_Sqrt  -> (fsLit "sqrt", True)
4020
4021                     MO_F32_Sin   -> (fsLit "sin", True)
4022                     MO_F32_Cos   -> (fsLit "cos", True)
4023                     MO_F32_Tan   -> (fsLit "tan", True)
4024
4025                     MO_F32_Asin  -> (fsLit "asin", True)
4026                     MO_F32_Acos  -> (fsLit "acos", True)
4027                     MO_F32_Atan  -> (fsLit "atan", True)
4028
4029                     MO_F32_Sinh  -> (fsLit "sinh", True)
4030                     MO_F32_Cosh  -> (fsLit "cosh", True)
4031                     MO_F32_Tanh  -> (fsLit "tanh", True)
4032                     MO_F32_Pwr   -> (fsLit "pow", True)
4033
4034                     MO_F64_Exp   -> (fsLit "exp", False)
4035                     MO_F64_Log   -> (fsLit "log", False)
4036                     MO_F64_Sqrt  -> (fsLit "sqrt", False)
4037
4038                     MO_F64_Sin   -> (fsLit "sin", False)
4039                     MO_F64_Cos   -> (fsLit "cos", False)
4040                     MO_F64_Tan   -> (fsLit "tan", False)
4041
4042                     MO_F64_Asin  -> (fsLit "asin", False)
4043                     MO_F64_Acos  -> (fsLit "acos", False)
4044                     MO_F64_Atan  -> (fsLit "atan", False)
4045
4046                     MO_F64_Sinh  -> (fsLit "sinh", False)
4047                     MO_F64_Cosh  -> (fsLit "cosh", False)
4048                     MO_F64_Tanh  -> (fsLit "tanh", False)
4049                     MO_F64_Pwr   -> (fsLit "pow", False)
4050                     other -> pprPanic "genCCall(ppc): unknown callish op"
4051                                     (pprCallishMachOp other)
4052
4053 #endif /* darwin_TARGET_OS || linux_TARGET_OS */
4054
4055 #endif /* powerpc_TARGET_ARCH */
4056
4057
4058 -- -----------------------------------------------------------------------------
4059 -- Generating a table-branch
4060
4061 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
4062
4063 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4064 genSwitch expr ids
4065   | opt_PIC
4066   = do
4067         (reg,e_code) <- getSomeReg expr
4068         lbl <- getNewLabelNat
4069         dflags <- getDynFlagsNat
4070         dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
4071         (tableReg,t_code) <- getSomeReg $ dynRef
4072         let
4073             jumpTable = map jumpTableEntryRel ids
4074
4075             jumpTableEntryRel Nothing
4076                 = CmmStaticLit (CmmInt 0 wordWidth)
4077             jumpTableEntryRel (Just (BlockId id))
4078                 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
4079                 where blockLabel = mkAsmTempLabel id
4080
4081             op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
4082                                        (EAIndex reg wORD_SIZE) (ImmInt 0))
4083
4084 #if x86_64_TARGET_ARCH
4085 #if darwin_TARGET_OS
4086     -- on Mac OS X/x86_64, put the jump table in the text section
4087     -- to work around a limitation of the linker.
4088     -- ld64 is unable to handle the relocations for
4089     --     .quad L1 - L0
4090     -- if L0 is not preceded by a non-anonymous label in its section.
4091
4092             code = e_code `appOL` t_code `appOL` toOL [
4093                             ADD (intSize wordWidth) op (OpReg tableReg),
4094                             JMP_TBL (OpReg tableReg) [ id | Just id <- ids ],
4095                             LDATA Text (CmmDataLabel lbl : jumpTable)
4096                     ]
4097 #else
4098     -- HACK: On x86_64 binutils<2.17 is only able to generate PC32
4099     -- relocations, hence we only get 32-bit offsets in the jump
4100     -- table. As these offsets are always negative we need to properly
4101     -- sign extend them to 64-bit. This hack should be removed in
4102     -- conjunction with the hack in PprMach.hs/pprDataItem once
4103     -- binutils 2.17 is standard.
4104             code = e_code `appOL` t_code `appOL` toOL [
4105                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
4106                             MOVSxL II32
4107                                    (OpAddr (AddrBaseIndex (EABaseReg tableReg)
4108                                                           (EAIndex reg wORD_SIZE) (ImmInt 0)))
4109                                    (OpReg reg),
4110                             ADD (intSize wordWidth) (OpReg reg) (OpReg tableReg),
4111                             JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
4112                    ]
4113 #endif
4114 #else
4115             code = e_code `appOL` t_code `appOL` toOL [
4116                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
4117                             ADD (intSize wordWidth) op (OpReg tableReg),
4118                             JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
4119                     ]
4120 #endif
4121         return code
4122   | otherwise
4123   = do
4124         (reg,e_code) <- getSomeReg expr
4125         lbl <- getNewLabelNat
4126         let
4127             jumpTable = map jumpTableEntry ids
4128             op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
4129             code = e_code `appOL` toOL [
4130                     LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
4131                     JMP_TBL op [ id | Just id <- ids ]
4132                  ]
4133         -- in
4134         return code
4135 #elif powerpc_TARGET_ARCH
4136 genSwitch expr ids
4137   | opt_PIC
4138   = do
4139         (reg,e_code) <- getSomeReg expr
4140         tmp <- getNewRegNat II32
4141         lbl <- getNewLabelNat
4142         dflags <- getDynFlagsNat
4143         dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
4144         (tableReg,t_code) <- getSomeReg $ dynRef
4145         let
4146             jumpTable = map jumpTableEntryRel ids
4147
4148             jumpTableEntryRel Nothing
4149                 = CmmStaticLit (CmmInt 0 wordWidth)
4150             jumpTableEntryRel (Just (BlockId id))
4151                 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
4152                 where blockLabel = mkAsmTempLabel id
4153
4154             code = e_code `appOL` t_code `appOL` toOL [
4155                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
4156                             SLW tmp reg (RIImm (ImmInt 2)),
4157                             LD II32 tmp (AddrRegReg tableReg tmp),
4158                             ADD tmp tmp (RIReg tableReg),
4159                             MTCTR tmp,
4160                             BCTR [ id | Just id <- ids ]
4161                     ]
4162         return code
4163   | otherwise
4164   = do
4165         (reg,e_code) <- getSomeReg expr
4166         tmp <- getNewRegNat II32
4167         lbl <- getNewLabelNat
4168         let
4169             jumpTable = map jumpTableEntry ids
4170
4171             code = e_code `appOL` toOL [
4172                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
4173                             SLW tmp reg (RIImm (ImmInt 2)),
4174                             ADDIS tmp tmp (HA (ImmCLbl lbl)),
4175                             LD II32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
4176                             MTCTR tmp,
4177                             BCTR [ id | Just id <- ids ]
4178                     ]
4179         return code
4180 #elif sparc_TARGET_ARCH
4181 genSwitch expr ids
4182         | opt_PIC
4183         = error "MachCodeGen: sparc genSwitch PIC not finished\n"
4184
4185         | otherwise
4186         = do    (e_reg, e_code) <- getSomeReg expr
4187
4188                 base_reg        <- getNewRegNat II32
4189                 offset_reg      <- getNewRegNat II32
4190                 dst             <- getNewRegNat II32
4191
4192                 label           <- getNewLabelNat
4193                 let jumpTable   = map jumpTableEntry ids
4194
4195                 return $ e_code `appOL`
4196                  toOL
4197                         -- the jump table
4198                         [ LDATA ReadOnlyData (CmmDataLabel label : jumpTable)
4199
4200                         -- load base of jump table
4201                         , SETHI (HI (ImmCLbl label)) base_reg
4202                         , OR    False base_reg (RIImm $ LO $ ImmCLbl label) base_reg
4203
4204                         -- the addrs in the table are 32 bits wide..
4205                         , SLL   e_reg (RIImm $ ImmInt 2) offset_reg
4206
4207                         -- load and jump to the destination
4208                         , LD    II32 (AddrRegReg base_reg offset_reg) dst
4209                         , JMP   (AddrRegImm dst (ImmInt 0))
4210                         , NOP ]
4211
4212 #else
4213 #error "ToDo: genSwitch"
4214 #endif
4215
4216
4217 -- | Convert a BlockId to some CmmStatic data
4218 jumpTableEntry :: Maybe BlockId -> CmmStatic
4219 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordWidth)
4220 jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
4221     where blockLabel = mkAsmTempLabel id
4222
4223 -- -----------------------------------------------------------------------------
4224 -- Support bits
4225 -- -----------------------------------------------------------------------------
4226
4227
4228 -- -----------------------------------------------------------------------------
4229 -- 'condIntReg' and 'condFltReg': condition codes into registers
4230
4231 -- Turn those condition codes into integers now (when they appear on
4232 -- the right hand side of an assignment).
4233 --
4234 -- (If applicable) Do not fill the delay slots here; you will confuse the
4235 -- register allocator.
4236
4237 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
4238
4239 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4240
4241 #if alpha_TARGET_ARCH
4242 condIntReg = panic "MachCode.condIntReg (not on Alpha)"
4243 condFltReg = panic "MachCode.condFltReg (not on Alpha)"
4244 #endif /* alpha_TARGET_ARCH */
4245
4246 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4247
4248 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4249
4250 condIntReg cond x y = do
4251   CondCode _ cond cond_code <- condIntCode cond x y
4252   tmp <- getNewRegNat II8
4253   let
4254         code dst = cond_code `appOL` toOL [
4255                     SETCC cond (OpReg tmp),
4256                     MOVZxL II8 (OpReg tmp) (OpReg dst)
4257                   ]
4258   -- in
4259   return (Any II32 code)
4260
4261 #endif
4262
4263 #if i386_TARGET_ARCH
4264
4265 condFltReg cond x y = do
4266   CondCode _ cond cond_code <- condFltCode cond x y
4267   tmp <- getNewRegNat II8
4268   let
4269         code dst = cond_code `appOL` toOL [
4270                     SETCC cond (OpReg tmp),
4271                     MOVZxL II8 (OpReg tmp) (OpReg dst)
4272                   ]
4273   -- in
4274   return (Any II32 code)
4275
4276 #endif
4277
4278 #if x86_64_TARGET_ARCH
4279
4280 condFltReg cond x y = do
4281   CondCode _ cond cond_code <- condFltCode cond x y
4282   tmp1 <- getNewRegNat wordSize
4283   tmp2 <- getNewRegNat wordSize
4284   let
4285         -- We have to worry about unordered operands (eg. comparisons
4286         -- against NaN).  If the operands are unordered, the comparison
4287         -- sets the parity flag, carry flag and zero flag.
4288         -- All comparisons are supposed to return false for unordered
4289         -- operands except for !=, which returns true.
4290         --
4291         -- Optimisation: we don't have to test the parity flag if we
4292         -- know the test has already excluded the unordered case: eg >
4293         -- and >= test for a zero carry flag, which can only occur for
4294         -- ordered operands.
4295         --
4296         -- ToDo: by reversing comparisons we could avoid testing the
4297         -- parity flag in more cases.
4298
4299         code dst =
4300            cond_code `appOL`
4301              (case cond of
4302                 NE  -> or_unordered dst
4303                 GU  -> plain_test   dst
4304                 GEU -> plain_test   dst
4305                 _   -> and_ordered  dst)
4306
4307         plain_test dst = toOL [
4308                     SETCC cond (OpReg tmp1),
4309                     MOVZxL II8 (OpReg tmp1) (OpReg dst)
4310                  ]
4311         or_unordered dst = toOL [
4312                     SETCC cond (OpReg tmp1),
4313                     SETCC PARITY (OpReg tmp2),
4314                     OR II8 (OpReg tmp1) (OpReg tmp2),
4315                     MOVZxL II8 (OpReg tmp2) (OpReg dst)
4316                   ]
4317         and_ordered dst = toOL [
4318                     SETCC cond (OpReg tmp1),
4319                     SETCC NOTPARITY (OpReg tmp2),
4320                     AND II8 (OpReg tmp1) (OpReg tmp2),
4321                     MOVZxL II8 (OpReg tmp2) (OpReg dst)
4322                   ]
4323   -- in
4324   return (Any II32 code)
4325
4326 #endif
4327
4328 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4329
4330 #if sparc_TARGET_ARCH
4331
4332 condIntReg EQQ x (CmmLit (CmmInt 0 d)) = do
4333     (src, code) <- getSomeReg x
4334     tmp <- getNewRegNat II32
4335     let
4336         code__2 dst = code `appOL` toOL [
4337             SUB False True g0 (RIReg src) g0,
4338             SUB True False g0 (RIImm (ImmInt (-1))) dst]
4339     return (Any II32 code__2)
4340
4341 condIntReg EQQ x y = do
4342     (src1, code1) <- getSomeReg x
4343     (src2, code2) <- getSomeReg y
4344     tmp1 <- getNewRegNat II32
4345     tmp2 <- getNewRegNat II32
4346     let
4347         code__2 dst = code1 `appOL` code2 `appOL` toOL [
4348             XOR False src1 (RIReg src2) dst,
4349             SUB False True g0 (RIReg dst) g0,
4350             SUB True False g0 (RIImm (ImmInt (-1))) dst]
4351     return (Any II32 code__2)
4352
4353 condIntReg NE x (CmmLit (CmmInt 0 d)) = do
4354     (src, code) <- getSomeReg x
4355     tmp <- getNewRegNat II32
4356     let
4357         code__2 dst = code `appOL` toOL [
4358             SUB False True g0 (RIReg src) g0,
4359             ADD True False g0 (RIImm (ImmInt 0)) dst]
4360     return (Any II32 code__2)
4361
4362 condIntReg NE x y = do
4363     (src1, code1) <- getSomeReg x
4364     (src2, code2) <- getSomeReg y
4365     tmp1 <- getNewRegNat II32
4366     tmp2 <- getNewRegNat II32
4367     let
4368         code__2 dst = code1 `appOL` code2 `appOL` toOL [
4369             XOR False src1 (RIReg src2) dst,
4370             SUB False True g0 (RIReg dst) g0,
4371             ADD True False g0 (RIImm (ImmInt 0)) dst]
4372     return (Any II32 code__2)
4373
4374 condIntReg cond x y = do
4375     bid1@(BlockId lbl1) <- getBlockIdNat
4376     bid2@(BlockId lbl2) <- getBlockIdNat
4377     CondCode _ cond cond_code <- condIntCode cond x y
4378     let
4379         code__2 dst = cond_code `appOL` toOL [
4380             BI cond False bid1, NOP,
4381             OR False g0 (RIImm (ImmInt 0)) dst,
4382             BI ALWAYS False bid2, NOP,
4383             NEWBLOCK bid1,
4384             OR False g0 (RIImm (ImmInt 1)) dst,
4385             NEWBLOCK bid2]
4386     return (Any II32 code__2)
4387
4388 condFltReg cond x y = do
4389     bid1@(BlockId lbl1) <- getBlockIdNat
4390     bid2@(BlockId lbl2) <- getBlockIdNat
4391     CondCode _ cond cond_code <- condFltCode cond x y
4392     let
4393         code__2 dst = cond_code `appOL` toOL [
4394             NOP,
4395             BF cond False bid1, NOP,
4396             OR False g0 (RIImm (ImmInt 0)) dst,
4397             BI ALWAYS False bid2, NOP,
4398             NEWBLOCK bid1,
4399             OR False g0 (RIImm (ImmInt 1)) dst,
4400             NEWBLOCK bid2]
4401     return (Any II32 code__2)
4402
4403 #endif /* sparc_TARGET_ARCH */
4404
4405 #if powerpc_TARGET_ARCH
4406 condReg getCond = do
4407     lbl1 <- getBlockIdNat
4408     lbl2 <- getBlockIdNat
4409     CondCode _ cond cond_code <- getCond
4410     let
4411 {-        code dst = cond_code `appOL` toOL [
4412                 BCC cond lbl1,
4413                 LI dst (ImmInt 0),
4414                 BCC ALWAYS lbl2,
4415                 NEWBLOCK lbl1,
4416                 LI dst (ImmInt 1),
4417                 BCC ALWAYS lbl2,
4418                 NEWBLOCK lbl2
4419             ]-}
4420         code dst = cond_code
4421             `appOL` negate_code
4422             `appOL` toOL [
4423                 MFCR dst,
4424                 RLWINM dst dst (bit + 1) 31 31
4425             ]
4426
4427         negate_code | do_negate = unitOL (CRNOR bit bit bit)
4428                     | otherwise = nilOL
4429
4430         (bit, do_negate) = case cond of
4431             LTT -> (0, False)
4432             LE  -> (1, True)
4433             EQQ -> (2, False)
4434             GE  -> (0, True)
4435             GTT -> (1, False)
4436
4437             NE  -> (2, True)
4438
4439             LU  -> (0, False)
4440             LEU -> (1, True)
4441             GEU -> (0, True)
4442             GU  -> (1, False)
4443
4444     return (Any II32 code)
4445
4446 condIntReg cond x y = condReg (condIntCode cond x y)
4447 condFltReg cond x y = condReg (condFltCode cond x y)
4448 #endif /* powerpc_TARGET_ARCH */
4449
4450
4451 -- -----------------------------------------------------------------------------
4452 -- 'trivial*Code': deal with trivial instructions
4453
4454 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
4455 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
4456 -- Only look for constants on the right hand side, because that's
4457 -- where the generic optimizer will have put them.
4458
4459 -- Similarly, for unary instructions, we don't have to worry about
4460 -- matching an StInt as the argument, because genericOpt will already
4461 -- have handled the constant-folding.
4462
4463 trivialCode
4464     :: Width    -- Int only
4465     -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
4466       ,IF_ARCH_i386 ((Operand -> Operand -> Instr)
4467                      -> Maybe (Operand -> Operand -> Instr)
4468       ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr)
4469                      -> Maybe (Operand -> Operand -> Instr)
4470       ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
4471       ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
4472       ,)))))
4473     -> CmmExpr -> CmmExpr -- the two arguments
4474     -> NatM Register
4475
4476 #ifndef powerpc_TARGET_ARCH
4477 trivialFCode
4478     :: Width    -- Floating point only
4479     -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
4480       ,IF_ARCH_sparc((Size -> Reg -> Reg -> Reg -> Instr)
4481       ,IF_ARCH_i386 ((Size -> Reg -> Reg -> Reg -> Instr)
4482       ,IF_ARCH_x86_64 ((Size -> Operand -> Operand -> Instr)
4483       ,))))
4484     -> CmmExpr -> CmmExpr -- the two arguments
4485     -> NatM Register
4486 #endif
4487
4488 trivialUCode
4489     :: Size
4490     -> IF_ARCH_alpha((RI -> Reg -> Instr)
4491       ,IF_ARCH_i386 ((Operand -> Instr)
4492       ,IF_ARCH_x86_64 ((Operand -> Instr)
4493       ,IF_ARCH_sparc((RI -> Reg -> Instr)
4494       ,IF_ARCH_powerpc((Reg -> Reg -> Instr)
4495       ,)))))
4496     -> CmmExpr  -- the one argument
4497     -> NatM Register
4498
4499 #ifndef powerpc_TARGET_ARCH
4500 trivialUFCode
4501     :: Size
4502     -> IF_ARCH_alpha((Reg -> Reg -> Instr)
4503       ,IF_ARCH_i386 ((Reg -> Reg -> Instr)
4504       ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr)
4505       ,IF_ARCH_sparc((Reg -> Reg -> Instr)
4506       ,))))
4507     -> CmmExpr -- the one argument
4508     -> NatM Register
4509 #endif
4510
4511 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4512
4513 #if alpha_TARGET_ARCH
4514
4515 trivialCode instr x (StInt y)
4516   | fits8Bits y
4517   = getRegister x               `thenNat` \ register ->
4518     getNewRegNat IntRep         `thenNat` \ tmp ->
4519     let
4520         code = registerCode register tmp
4521         src1 = registerName register tmp
4522         src2 = ImmInt (fromInteger y)
4523         code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst)
4524     in
4525     return (Any IntRep code__2)
4526
4527 trivialCode instr x y
4528   = getRegister x               `thenNat` \ register1 ->
4529     getRegister y               `thenNat` \ register2 ->
4530     getNewRegNat IntRep         `thenNat` \ tmp1 ->
4531     getNewRegNat IntRep         `thenNat` \ tmp2 ->
4532     let
4533         code1 = registerCode register1 tmp1 []
4534         src1  = registerName register1 tmp1
4535         code2 = registerCode register2 tmp2 []
4536         src2  = registerName register2 tmp2
4537         code__2 dst = asmSeqThen [code1, code2] .
4538                      mkSeqInstr (instr src1 (RIReg src2) dst)
4539     in
4540     return (Any IntRep code__2)
4541
4542 ------------
4543 trivialUCode instr x
4544   = getRegister x               `thenNat` \ register ->
4545     getNewRegNat IntRep         `thenNat` \ tmp ->
4546     let
4547         code = registerCode register tmp
4548         src  = registerName register tmp
4549         code__2 dst = code . mkSeqInstr (instr (RIReg src) dst)
4550     in
4551     return (Any IntRep code__2)
4552
4553 ------------
4554 trivialFCode _ instr x y
4555   = getRegister x               `thenNat` \ register1 ->
4556     getRegister y               `thenNat` \ register2 ->
4557     getNewRegNat FF64   `thenNat` \ tmp1 ->
4558     getNewRegNat FF64   `thenNat` \ tmp2 ->
4559     let
4560         code1 = registerCode register1 tmp1
4561         src1  = registerName register1 tmp1
4562
4563         code2 = registerCode register2 tmp2
4564         src2  = registerName register2 tmp2
4565
4566         code__2 dst = asmSeqThen [code1 [], code2 []] .
4567                       mkSeqInstr (instr src1 src2 dst)
4568     in
4569     return (Any FF64 code__2)
4570
4571 trivialUFCode _ instr x
4572   = getRegister x               `thenNat` \ register ->
4573     getNewRegNat FF64   `thenNat` \ tmp ->
4574     let
4575         code = registerCode register tmp
4576         src  = registerName register tmp
4577         code__2 dst = code . mkSeqInstr (instr src dst)
4578     in
4579     return (Any FF64 code__2)
4580
4581 #endif /* alpha_TARGET_ARCH */
4582
4583 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4584
4585 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4586
4587 {-
4588 The Rules of the Game are:
4589
4590 * You cannot assume anything about the destination register dst;
4591   it may be anything, including a fixed reg.
4592
4593 * You may compute an operand into a fixed reg, but you may not
4594   subsequently change the contents of that fixed reg.  If you
4595   want to do so, first copy the value either to a temporary
4596   or into dst.  You are free to modify dst even if it happens
4597   to be a fixed reg -- that's not your problem.
4598
4599 * You cannot assume that a fixed reg will stay live over an
4600   arbitrary computation.  The same applies to the dst reg.
4601
4602 * Temporary regs obtained from getNewRegNat are distinct from
4603   each other and from all other regs, and stay live over
4604   arbitrary computations.
4605
4606 --------------------
4607
4608 SDM's version of The Rules:
4609
4610 * If getRegister returns Any, that means it can generate correct
4611   code which places the result in any register, period.  Even if that
4612   register happens to be read during the computation.
4613
4614   Corollary #1: this means that if you are generating code for an
4615   operation with two arbitrary operands, you cannot assign the result
4616   of the first operand into the destination register before computing
4617   the second operand.  The second operand might require the old value
4618   of the destination register.
4619
4620   Corollary #2: A function might be able to generate more efficient
4621   code if it knows the destination register is a new temporary (and
4622   therefore not read by any of the sub-computations).
4623
4624 * If getRegister returns Any, then the code it generates may modify only:
4625         (a) fresh temporaries
4626         (b) the destination register
4627         (c) known registers (eg. %ecx is used by shifts)
4628   In particular, it may *not* modify global registers, unless the global
4629   register happens to be the destination register.
4630 -}
4631
4632 trivialCode width instr (Just revinstr) (CmmLit lit_a) b
4633   | is32BitLit lit_a = do
4634   b_code <- getAnyReg b
4635   let
4636        code dst
4637          = b_code dst `snocOL`
4638            revinstr (OpImm (litToImm lit_a)) (OpReg dst)
4639   -- in
4640   return (Any (intSize width) code)
4641
4642 trivialCode width instr maybe_revinstr a b
4643   = genTrivialCode (intSize width) instr a b
4644
4645 -- This is re-used for floating pt instructions too.
4646 genTrivialCode rep instr a b = do
4647   (b_op, b_code) <- getNonClobberedOperand b
4648   a_code <- getAnyReg a
4649   tmp <- getNewRegNat rep
4650   let
4651      -- We want the value of b to stay alive across the computation of a.
4652      -- But, we want to calculate a straight into the destination register,
4653      -- because the instruction only has two operands (dst := dst `op` src).
4654      -- The troublesome case is when the result of b is in the same register
4655      -- as the destination reg.  In this case, we have to save b in a
4656      -- new temporary across the computation of a.
4657      code dst
4658         | dst `regClashesWithOp` b_op =
4659                 b_code `appOL`
4660                 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
4661                 a_code dst `snocOL`
4662                 instr (OpReg tmp) (OpReg dst)
4663         | otherwise =
4664                 b_code `appOL`
4665                 a_code dst `snocOL`
4666                 instr b_op (OpReg dst)
4667   -- in
4668   return (Any rep code)
4669
4670 reg `regClashesWithOp` OpReg reg2   = reg == reg2
4671 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
4672 reg `regClashesWithOp` _            = False
4673
4674 -----------
4675
4676 trivialUCode rep instr x = do
4677   x_code <- getAnyReg x
4678   let
4679      code dst =
4680         x_code dst `snocOL`
4681         instr (OpReg dst)
4682   return (Any rep code)
4683
4684 -----------
4685
4686 #if i386_TARGET_ARCH
4687
4688 trivialFCode width instr x y = do
4689   (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
4690   (y_reg, y_code) <- getSomeReg y
4691   let
4692      size = floatSize width
4693      code dst =
4694         x_code `appOL`
4695         y_code `snocOL`
4696         instr size x_reg y_reg dst
4697   return (Any size code)
4698
4699 #endif
4700
4701 #if x86_64_TARGET_ARCH
4702 trivialFCode pk instr x y
4703   = genTrivialCode size (instr size) x y
4704   where size = floatSize pk
4705 #endif
4706
4707 -------------
4708
4709 trivialUFCode size instr x = do
4710   (x_reg, x_code) <- getSomeReg x
4711   let
4712      code dst =
4713         x_code `snocOL`
4714         instr x_reg dst
4715   -- in
4716   return (Any size code)
4717
4718 #endif /* i386_TARGET_ARCH */
4719
4720 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4721
4722 #if sparc_TARGET_ARCH
4723
4724 trivialCode pk instr x (CmmLit (CmmInt y d))
4725   | fits13Bits y
4726   = do
4727       (src1, code) <- getSomeReg x
4728       tmp <- getNewRegNat II32
4729       let
4730         src2 = ImmInt (fromInteger y)
4731         code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
4732       return (Any II32 code__2)
4733
4734 trivialCode pk instr x y = do
4735     (src1, code1) <- getSomeReg x
4736     (src2, code2) <- getSomeReg y
4737     tmp1 <- getNewRegNat II32
4738     tmp2 <- getNewRegNat II32
4739     let
4740         code__2 dst = code1 `appOL` code2 `snocOL`
4741                       instr src1 (RIReg src2) dst
4742     return (Any II32 code__2)
4743
4744 ------------
4745 trivialFCode pk instr x y = do
4746     (src1, code1) <- getSomeReg x
4747     (src2, code2) <- getSomeReg y
4748     tmp1 <- getNewRegNat (cmmTypeSize $ cmmExprType x)
4749     tmp2 <- getNewRegNat (cmmTypeSize $ cmmExprType y)
4750     tmp <- getNewRegNat FF64
4751     let
4752         promote x = FxTOy FF32 FF64 x tmp
4753
4754         pk1   = cmmExprType x
4755         pk2   = cmmExprType y
4756
4757         code__2 dst =
4758                 if pk1 `cmmEqType` pk2 then
4759                     code1 `appOL` code2 `snocOL`
4760                     instr (floatSize pk) src1 src2 dst
4761                 else if typeWidth pk1 == W32 then
4762                     code1 `snocOL` promote src1 `appOL` code2 `snocOL`
4763                     instr FF64 tmp src2 dst
4764                 else
4765                     code1 `appOL` code2 `snocOL` promote src2 `snocOL`
4766                     instr FF64 src1 tmp dst
4767     return (Any (cmmTypeSize $ if pk1 `cmmEqType` pk2 then pk1 else cmmFloat W64)
4768                 code__2)
4769
4770 ------------
4771 trivialUCode size instr x = do
4772     (src, code) <- getSomeReg x
4773     tmp <- getNewRegNat size
4774     let
4775         code__2 dst = code `snocOL` instr (RIReg src) dst
4776     return (Any size code__2)
4777
4778 -------------
4779 trivialUFCode pk instr x = do
4780     (src, code) <- getSomeReg x
4781     tmp <- getNewRegNat pk
4782     let
4783         code__2 dst = code `snocOL` instr src dst
4784     return (Any pk code__2)
4785
4786 #endif /* sparc_TARGET_ARCH */
4787
4788 #if powerpc_TARGET_ARCH
4789
4790 {-
4791 Wolfgang's PowerPC version of The Rules:
4792
4793 A slightly modified version of The Rules to take advantage of the fact
4794 that PowerPC instructions work on all registers and don't implicitly
4795 clobber any fixed registers.
4796
4797 * The only expression for which getRegister returns Fixed is (CmmReg reg).
4798
4799 * If getRegister returns Any, then the code it generates may modify only:
4800         (a) fresh temporaries
4801         (b) the destination register
4802   It may *not* modify global registers, unless the global
4803   register happens to be the destination register.
4804   It may not clobber any other registers. In fact, only ccalls clobber any
4805   fixed registers.
4806   Also, it may not modify the counter register (used by genCCall).
4807
4808   Corollary: If a getRegister for a subexpression returns Fixed, you need
4809   not move it to a fresh temporary before evaluating the next subexpression.
4810   The Fixed register won't be modified.
4811   Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
4812
4813 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
4814   the value of the destination register.
4815 -}
4816
4817 trivialCode rep signed instr x (CmmLit (CmmInt y _))
4818     | Just imm <- makeImmediate rep signed y
4819     = do
4820         (src1, code1) <- getSomeReg x
4821         let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
4822         return (Any (intSize rep) code)
4823
4824 trivialCode rep signed instr x y = do
4825     (src1, code1) <- getSomeReg x
4826     (src2, code2) <- getSomeReg y
4827     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
4828     return (Any (intSize rep) code)
4829
4830 trivialCodeNoImm' :: Size -> (Reg -> Reg -> Reg -> Instr)
4831                  -> CmmExpr -> CmmExpr -> NatM Register
4832 trivialCodeNoImm' size instr x y = do
4833     (src1, code1) <- getSomeReg x
4834     (src2, code2) <- getSomeReg y
4835     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
4836     return (Any size code)
4837
4838 trivialCodeNoImm :: Size -> (Size -> Reg -> Reg -> Reg -> Instr)
4839                  -> CmmExpr -> CmmExpr -> NatM Register
4840 trivialCodeNoImm size instr x y = trivialCodeNoImm' size (instr size) x y
4841
4842 trivialUCode rep instr x = do
4843     (src, code) <- getSomeReg x
4844     let code' dst = code `snocOL` instr dst src
4845     return (Any rep code')
4846
4847 -- There is no "remainder" instruction on the PPC, so we have to do
4848 -- it the hard way.
4849 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
4850
4851 remainderCode :: Width -> (Reg -> Reg -> Reg -> Instr)
4852     -> CmmExpr -> CmmExpr -> NatM Register
4853 remainderCode rep div x y = do
4854     (src1, code1) <- getSomeReg x
4855     (src2, code2) <- getSomeReg y
4856     let code dst = code1 `appOL` code2 `appOL` toOL [
4857                 div dst src1 src2,
4858                 MULLW dst dst (RIReg src2),
4859                 SUBF dst dst src1
4860             ]
4861     return (Any (intSize rep) code)
4862
4863 #endif /* powerpc_TARGET_ARCH */
4864
4865
4866 -- -----------------------------------------------------------------------------
4867 --  Coercing to/from integer/floating-point...
4868
4869 -- When going to integer, we truncate (round towards 0).
4870
4871 -- @coerce(Int2FP|FP2Int)@ are more complicated integer/float
4872 -- conversions.  We have to store temporaries in memory to move
4873 -- between the integer and the floating point register sets.
4874
4875 -- @coerceDbl2Flt@ and @coerceFlt2Dbl@ are done this way because we
4876 -- pretend, on sparc at least, that double and float regs are seperate
4877 -- kinds, so the value has to be computed into one kind before being
4878 -- explicitly "converted" to live in the other kind.
4879
4880 coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
4881 coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
4882
4883 #if sparc_TARGET_ARCH
4884 coerceDbl2Flt :: CmmExpr -> NatM Register
4885 coerceFlt2Dbl :: CmmExpr -> NatM Register
4886 #endif
4887
4888 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4889
4890 #if alpha_TARGET_ARCH
4891
4892 coerceInt2FP _ x
4893   = getRegister x               `thenNat` \ register ->
4894     getNewRegNat IntRep         `thenNat` \ reg ->
4895     let
4896         code = registerCode register reg
4897         src  = registerName register reg
4898
4899         code__2 dst = code . mkSeqInstrs [
4900             ST Q src (spRel 0),
4901             LD TF dst (spRel 0),
4902             CVTxy Q TF dst dst]
4903     in
4904     return (Any FF64 code__2)
4905
4906 -------------
4907 coerceFP2Int x
4908   = getRegister x               `thenNat` \ register ->
4909     getNewRegNat FF64   `thenNat` \ tmp ->
4910     let
4911         code = registerCode register tmp
4912         src  = registerName register tmp
4913
4914         code__2 dst = code . mkSeqInstrs [
4915             CVTxy TF Q src tmp,
4916             ST TF tmp (spRel 0),
4917             LD Q dst (spRel 0)]
4918     in
4919     return (Any IntRep code__2)
4920
4921 #endif /* alpha_TARGET_ARCH */
4922
4923 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4924
4925 #if i386_TARGET_ARCH
4926
4927 coerceInt2FP from to x = do
4928   (x_reg, x_code) <- getSomeReg x
4929   let
4930         opc  = case to of W32 -> GITOF; W64 -> GITOD
4931         code dst = x_code `snocOL` opc x_reg dst
4932         -- ToDo: works for non-II32 reps?
4933   return (Any (floatSize to) code)
4934
4935 ------------
4936
4937 coerceFP2Int from to x = do
4938   (x_reg, x_code) <- getSomeReg x
4939   let
4940         opc  = case from of W32 -> GFTOI; W64 -> GDTOI
4941         code dst = x_code `snocOL` opc x_reg dst
4942         -- ToDo: works for non-II32 reps?
4943   -- in
4944   return (Any (intSize to) code)
4945
4946 #endif /* i386_TARGET_ARCH */
4947
4948 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4949
4950 #if x86_64_TARGET_ARCH
4951
4952 coerceFP2Int from to x = do
4953   (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
4954   let
4955         opc  = case from of W32 -> CVTTSS2SIQ; W64 -> CVTTSD2SIQ
4956         code dst = x_code `snocOL` opc x_op dst
4957   -- in
4958   return (Any (intSize to) code) -- works even if the destination rep is <II32
4959
4960 coerceInt2FP from to x = do
4961   (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
4962   let
4963         opc  = case to of W32 -> CVTSI2SS; W64 -> CVTSI2SD
4964         code dst = x_code `snocOL` opc x_op dst
4965   -- in
4966   return (Any (floatSize to) code) -- works even if the destination rep is <II32
4967
4968 coerceFP2FP :: Width -> CmmExpr -> NatM Register
4969 coerceFP2FP to x = do
4970   (x_reg, x_code) <- getSomeReg x
4971   let
4972         opc  = case to of W32 -> CVTSD2SS; W64 -> CVTSS2SD
4973         code dst = x_code `snocOL` opc x_reg dst
4974   -- in
4975   return (Any (floatSize to) code)
4976 #endif
4977
4978 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4979
4980 #if sparc_TARGET_ARCH
4981
4982 coerceInt2FP width1 width2 x = do
4983     (src, code) <- getSomeReg x
4984     let
4985         code__2 dst = code `appOL` toOL [
4986             ST (intSize width1) src (spRel (-2)),
4987             LD (intSize width1) (spRel (-2)) dst,
4988             FxTOy (intSize width1) (floatSize width2) dst dst]
4989     return (Any (floatSize $ width2) code__2)
4990
4991
4992 -- | Coerce a floating point value to integer
4993 --
4994 --   NOTE: On sparc v9 there are no instructions to move a value from an
4995 --         FP register directly to an int register, so we have to use a load/store.
4996 --
4997 coerceFP2Int width1 width2 x
4998  = do   let fsize1      = floatSize width1
4999             fsize2      = floatSize width2
5000
5001             isize2      = intSize   width2
5002
5003         (fsrc, code)    <- getSomeReg x
5004         fdst            <- getNewRegNat fsize2
5005
5006         let code2 dst
5007                 =       code
5008                 `appOL` toOL
5009                         -- convert float to int format, leaving it in a float reg.
5010                         [ FxTOy fsize1 isize2 fsrc fdst
5011
5012                         -- store the int into mem, then load it back to move
5013                         --      it into an actual int reg.
5014                         , ST    fsize2 fdst (spRel (-2))
5015                         , LD    isize2 (spRel (-2)) dst]
5016
5017         return (Any isize2 code2)
5018
5019 ------------
5020 coerceDbl2Flt x = do
5021     (src, code) <- getSomeReg x
5022     return (Any FF32 (\dst -> code `snocOL` FxTOy FF64 FF32 src dst))
5023
5024 ------------
5025 coerceFlt2Dbl x = do
5026     (src, code) <- getSomeReg x
5027     return (Any FF64 (\dst -> code `snocOL` FxTOy FF32 FF64 src dst))
5028
5029 #endif /* sparc_TARGET_ARCH */
5030
5031 #if powerpc_TARGET_ARCH
5032 coerceInt2FP fromRep toRep x = do
5033     (src, code) <- getSomeReg x
5034     lbl <- getNewLabelNat
5035     itmp <- getNewRegNat II32
5036     ftmp <- getNewRegNat FF64
5037     dflags <- getDynFlagsNat
5038     dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
5039     Amode addr addr_code <- getAmode dynRef
5040     let
5041         code' dst = code `appOL` maybe_exts `appOL` toOL [
5042                 LDATA ReadOnlyData
5043                                 [CmmDataLabel lbl,
5044                                  CmmStaticLit (CmmInt 0x43300000 W32),
5045                                  CmmStaticLit (CmmInt 0x80000000 W32)],
5046                 XORIS itmp src (ImmInt 0x8000),
5047                 ST II32 itmp (spRel 3),
5048                 LIS itmp (ImmInt 0x4330),
5049                 ST II32 itmp (spRel 2),
5050                 LD FF64 ftmp (spRel 2)
5051             ] `appOL` addr_code `appOL` toOL [
5052                 LD FF64 dst addr,
5053                 FSUB FF64 dst ftmp dst
5054             ] `appOL` maybe_frsp dst
5055
5056         maybe_exts = case fromRep of
5057                         W8 ->  unitOL $ EXTS II8 src src
5058                         W16 -> unitOL $ EXTS II16 src src
5059                         W32 -> nilOL
5060         maybe_frsp dst = case toRep of
5061                         W32 -> unitOL $ FRSP dst dst
5062                         W64 -> nilOL
5063     return (Any (floatSize toRep) code')
5064
5065 coerceFP2Int fromRep toRep x = do
5066     -- the reps don't really matter: F*->FF64 and II32->I* are no-ops
5067     (src, code) <- getSomeReg x
5068     tmp <- getNewRegNat FF64
5069     let
5070         code' dst = code `appOL` toOL [
5071                 -- convert to int in FP reg
5072             FCTIWZ tmp src,
5073                 -- store value (64bit) from FP to stack
5074             ST FF64 tmp (spRel 2),
5075                 -- read low word of value (high word is undefined)
5076             LD II32 dst (spRel 3)]
5077     return (Any (intSize toRep) code')
5078 #endif /* powerpc_TARGET_ARCH */
5079
5080
5081 -- -----------------------------------------------------------------------------
5082 -- eXTRA_STK_ARGS_HERE
5083
5084 -- We (allegedly) put the first six C-call arguments in registers;
5085 -- where do we start putting the rest of them?
5086
5087 -- Moved from MachInstrs (SDM):
5088
5089 #if alpha_TARGET_ARCH || sparc_TARGET_ARCH
5090 eXTRA_STK_ARGS_HERE :: Int
5091 eXTRA_STK_ARGS_HERE
5092   = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???))
5093 #endif