compiler/nativeGen/MachCodeGen.hs

   1 {-# OPTIONS -w #-}
   2 -- The above warning supression flag is a temporary kludge.
   3 -- While working on this module you are encouraged to remove it and fix
   4 -- any warnings in the module. See
   5 --     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
   6 -- for details
   7
   8 -----------------------------------------------------------------------------
   9 --
  10 -- Generating machine code (instruction selection)
  11 --
  12 -- (c) The University of Glasgow 1996-2004
  13 --
  14 -----------------------------------------------------------------------------
  15
  16 -- This is a big module, but, if you pay attention to
  17 -- (a) the sectioning, (b) the type signatures, and
  18 -- (c) the #if blah_TARGET_ARCH} things, the
  19 -- structure should not be too overwhelming.
  20
  21 module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where
  22
  23 #include "HsVersions.h"
  24 #include "nativeGen/NCG.h"
  25 #include "MachDeps.h"
  26
  27 -- NCG stuff:
  28 import MachInstrs
  29 import MachRegs
  30 import NCGMonad
  31 import PositionIndependentCode
  32 import RegAllocInfo ( mkBranchInstr )
  33
  34 -- Our intermediate code:
  35 import BlockId
  36 import PprCmm           ( pprExpr )
  37 import Cmm
  38 import CLabel
  39 import ClosureInfo      ( C_SRT(..) )
  40
  41 -- The rest:
  42 import StaticFlags      ( opt_PIC )
  43 import ForeignCall      ( CCallConv(..) )
  44 import OrdList
  45 import Pretty
  46 import qualified Outputable as O
  47 import Outputable
  48 import FastString
  49 import FastBool         ( isFastTrue )
  50 import Constants        ( wORD_SIZE )
  51
  52 import Debug.Trace      ( trace )
  53
  54 import Control.Monad    ( mapAndUnzipM )
  55 import Data.Maybe       ( fromJust )
  56 import Data.Bits
  57 import Data.Word
  58 import Data.Int
  59
  60 -- -----------------------------------------------------------------------------
  61 -- Top-level of the instruction selector
  62
  63 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
  64 -- They are really trees of insns to facilitate fast appending, where a
  65 -- left-to-right traversal (pre-order?) yields the insns in the correct
  66 -- order.
  67
  68 type InstrBlock = OrdList Instr
  69
  70 cmmTopCodeGen :: RawCmmTop -> NatM [NatCmmTop]
  71 cmmTopCodeGen (CmmProc info lab params (ListGraph blocks)) = do
  72   (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
  73   picBaseMb <- getPicBaseMaybeNat
  74   let proc = CmmProc info lab params (ListGraph $ concat nat_blocks)
  75       tops = proc : concat statics
  76   case picBaseMb of
  77       Just picBase -> initializePicBase picBase tops
  78       Nothing -> return tops
  79
  80 cmmTopCodeGen (CmmData sec dat) = do
  81   return [CmmData sec dat]  -- no translation, we just use CmmStatic
  82
  83 basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop])
  84 basicBlockCodeGen (BasicBlock id stmts) = do
  85   instrs <- stmtsToInstrs stmts
  86   -- code generation may introduce new basic block boundaries, which
  87   -- are indicated by the NEWBLOCK instruction.  We must split up the
  88   -- instruction stream into basic blocks again.  Also, we extract
  89   -- LDATAs here too.
  90   let
  91         (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
  92
  93         mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
  94           = ([], BasicBlock id instrs : blocks, statics)
  95         mkBlocks (LDATA sec dat) (instrs,blocks,statics)
  96           = (instrs, blocks, CmmData sec dat:statics)
  97         mkBlocks instr (instrs,blocks,statics)
  98           = (instr:instrs, blocks, statics)
  99   -- in
 100   return (BasicBlock id top : other_blocks, statics)
 101
 102 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
 103 stmtsToInstrs stmts
 104    = do instrss <- mapM stmtToInstrs stmts
 105         return (concatOL instrss)
 106
 107 stmtToInstrs :: CmmStmt -> NatM InstrBlock
 108 stmtToInstrs stmt = case stmt of
 109     CmmNop         -> return nilOL
 110     CmmComment s   -> return (unitOL (COMMENT s))
 111
 112     CmmAssign reg src
 113       | isFloatType ty -> assignReg_FltCode size reg src
 114 #if WORD_SIZE_IN_BITS==32
 115       | isWord64 ty    -> assignReg_I64Code      reg src
 116 #endif
 117       | otherwise        -> assignReg_IntCode size reg src
 118         where ty = cmmRegType reg
 119               size = cmmTypeSize ty
 120
 121     CmmStore addr src
 122       | isFloatType ty -> assignMem_FltCode size addr src
 123 #if WORD_SIZE_IN_BITS==32
 124       | isWord64 ty      -> assignMem_I64Code      addr src
 125 #endif
 126       | otherwise        -> assignMem_IntCode size addr src
 127         where ty = cmmExprType src
 128               size = cmmTypeSize ty
 129
 130     CmmCall target result_regs args _ _
 131        -> genCCall target result_regs args
 132
 133     CmmBranch id          -> genBranch id
 134     CmmCondBranch arg id  -> genCondJump id arg
 135     CmmSwitch arg ids     -> genSwitch arg ids
 136     CmmJump arg params    -> genJump arg
 137     CmmReturn params      ->
 138       panic "stmtToInstrs: return statement should have been cps'd away"
 139
 140 -- -----------------------------------------------------------------------------
 141 -- General things for putting together code sequences
 142
 143 -- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
 144 -- CmmExprs into CmmRegOff?
 145 mangleIndexTree :: CmmExpr -> CmmExpr
 146 mangleIndexTree (CmmRegOff reg off)
 147   = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
 148   where width = typeWidth (cmmRegType reg)
 149
 150 -- -----------------------------------------------------------------------------
 151 --  Code gen for 64-bit arithmetic on 32-bit platforms
 152
 153 {-
 154 Simple support for generating 64-bit code (ie, 64 bit values and 64
 155 bit assignments) on 32-bit platforms.  Unlike the main code generator
 156 we merely shoot for generating working code as simply as possible, and
 157 pay little attention to code quality.  Specifically, there is no
 158 attempt to deal cleverly with the fixed-vs-floating register
 159 distinction; all values are generated into (pairs of) floating
 160 registers, even if this would mean some redundant reg-reg moves as a
 161 result.  Only one of the VRegUniques is returned, since it will be
 162 of the VRegUniqueLo form, and the upper-half VReg can be determined
 163 by applying getHiVRegFromLo to it.
 164 -}
 165
 166 data ChildCode64        -- a.k.a "Register64"
 167    = ChildCode64
 168         InstrBlock      -- code
 169         Reg             -- the lower 32-bit temporary which contains the
 170                         -- result; use getHiVRegFromLo to find the other
 171                         -- VRegUnique.  Rules of this simplified insn
 172                         -- selection game are therefore that the returned
 173                         -- Reg may be modified
 174
 175 #if WORD_SIZE_IN_BITS==32
 176 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
 177 assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
 178 #endif
 179
 180 #ifndef x86_64_TARGET_ARCH
 181 iselExpr64        :: CmmExpr -> NatM ChildCode64
 182 #endif
 183
 184 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 185
 186 #if i386_TARGET_ARCH
 187
 188 assignMem_I64Code addrTree valueTree = do
 189   Amode addr addr_code <- getAmode addrTree
 190   ChildCode64 vcode rlo <- iselExpr64 valueTree
 191   let
 192         rhi = getHiVRegFromLo rlo
 193
 194         -- Little-endian store
 195         mov_lo = MOV II32 (OpReg rlo) (OpAddr addr)
 196         mov_hi = MOV II32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
 197   -- in
 198   return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 199
 200
 201 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
 202    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 203    let
 204          r_dst_lo = mkVReg u_dst II32
 205          r_dst_hi = getHiVRegFromLo r_dst_lo
 206          r_src_hi = getHiVRegFromLo r_src_lo
 207          mov_lo = MOV II32 (OpReg r_src_lo) (OpReg r_dst_lo)
 208          mov_hi = MOV II32 (OpReg r_src_hi) (OpReg r_dst_hi)
 209    -- in
 210    return (
 211         vcode `snocOL` mov_lo `snocOL` mov_hi
 212      )
 213
 214 assignReg_I64Code lvalue valueTree
 215    = panic "assignReg_I64Code(i386): invalid lvalue"
 216
 217 ------------
 218
 219 iselExpr64 (CmmLit (CmmInt i _)) = do
 220   (rlo,rhi) <- getNewRegPairNat II32
 221   let
 222         r = fromIntegral (fromIntegral i :: Word32)
 223         q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
 224         code = toOL [
 225                 MOV II32 (OpImm (ImmInteger r)) (OpReg rlo),
 226                 MOV II32 (OpImm (ImmInteger q)) (OpReg rhi)
 227                 ]
 228   -- in
 229   return (ChildCode64 code rlo)
 230
 231 iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
 232    Amode addr addr_code <- getAmode addrTree
 233    (rlo,rhi) <- getNewRegPairNat II32
 234    let
 235         mov_lo = MOV II32 (OpAddr addr) (OpReg rlo)
 236         mov_hi = MOV II32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
 237    -- in
 238    return (
 239             ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
 240                         rlo
 241      )
 242
 243 iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
 244    = return (ChildCode64 nilOL (mkVReg vu II32))
 245
 246 -- we handle addition, but rather badly
 247 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
 248    ChildCode64 code1 r1lo <- iselExpr64 e1
 249    (rlo,rhi) <- getNewRegPairNat II32
 250    let
 251         r = fromIntegral (fromIntegral i :: Word32)
 252         q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
 253         r1hi = getHiVRegFromLo r1lo
 254         code =  code1 `appOL`
 255                 toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
 256                        ADD II32 (OpImm (ImmInteger r)) (OpReg rlo),
 257                        MOV II32 (OpReg r1hi) (OpReg rhi),
 258                        ADC II32 (OpImm (ImmInteger q)) (OpReg rhi) ]
 259    -- in
 260    return (ChildCode64 code rlo)
 261
 262 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
 263    ChildCode64 code1 r1lo <- iselExpr64 e1
 264    ChildCode64 code2 r2lo <- iselExpr64 e2
 265    (rlo,rhi) <- getNewRegPairNat II32
 266    let
 267         r1hi = getHiVRegFromLo r1lo
 268         r2hi = getHiVRegFromLo r2lo
 269         code =  code1 `appOL`
 270                 code2 `appOL`
 271                 toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
 272                        ADD II32 (OpReg r2lo) (OpReg rlo),
 273                        MOV II32 (OpReg r1hi) (OpReg rhi),
 274                        ADC II32 (OpReg r2hi) (OpReg rhi) ]
 275    -- in
 276    return (ChildCode64 code rlo)
 277
 278 iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do
 279      fn <- getAnyReg expr
 280      r_dst_lo <-  getNewRegNat II32
 281      let r_dst_hi = getHiVRegFromLo r_dst_lo
 282          code = fn r_dst_lo
 283      return (
 284              ChildCode64 (code `snocOL`
 285                           MOV II32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
 286                           r_dst_lo
 287             )
 288
 289 iselExpr64 expr
 290    = pprPanic "iselExpr64(i386)" (ppr expr)
 291
 292 #endif /* i386_TARGET_ARCH */
 293
 294 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 295
 296 #if sparc_TARGET_ARCH
 297
 298 assignMem_I64Code addrTree valueTree = do
 299      Amode addr addr_code <- getAmode addrTree
 300      ChildCode64 vcode rlo <- iselExpr64 valueTree
 301      (src, code) <- getSomeReg addrTree
 302      let
 303          rhi = getHiVRegFromLo rlo
 304          -- Big-endian store
 305          mov_hi = ST II32 rhi (AddrRegImm src (ImmInt 0))
 306          mov_lo = ST II32 rlo (AddrRegImm src (ImmInt 4))
 307      return (vcode `appOL` code `snocOL` mov_hi `snocOL` mov_lo)
 308
 309 assignReg_I64Code (CmmLocal (LocalReg u_dst pk _)) valueTree = do
 310      ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 311      let
 312          r_dst_lo = mkVReg u_dst pk
 313          r_dst_hi = getHiVRegFromLo r_dst_lo
 314          r_src_hi = getHiVRegFromLo r_src_lo
 315          mov_lo = mkMOV r_src_lo r_dst_lo
 316          mov_hi = mkMOV r_src_hi r_dst_hi
 317          mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
 318      return (vcode `snocOL` mov_hi `snocOL` mov_lo)
 319 assignReg_I64Code lvalue valueTree
 320    = panic "assignReg_I64Code(sparc): invalid lvalue"
 321
 322
 323 -- Don't delete this -- it's very handy for debugging.
 324 --iselExpr64 expr
 325 --   | trace ("iselExpr64: " ++ showSDoc (ppr expr)) False
 326 --   = panic "iselExpr64(???)"
 327
 328 iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
 329      Amode (AddrRegReg r1 r2) addr_code <- getAmode addrTree
 330      rlo <- getNewRegNat II32
 331      let rhi = getHiVRegFromLo rlo
 332          mov_hi = LD II32 (AddrRegImm r1 (ImmInt 0)) rhi
 333          mov_lo = LD II32 (AddrRegImm r1 (ImmInt 4)) rlo
 334      return (
 335             ChildCode64 (addr_code `snocOL` mov_hi `snocOL` mov_lo)
 336                          rlo
 337           )
 338
 339 iselExpr64 (CmmReg (CmmLocal (LocalReg uq ty))) isWord64 ty = do
 340      r_dst_lo <-  getNewRegNat b32
 341      let r_dst_hi = getHiVRegFromLo r_dst_lo
 342          r_src_lo = mkVReg uq b32
 343          r_src_hi = getHiVRegFromLo r_src_lo
 344          mov_lo = mkMOV r_src_lo r_dst_lo
 345          mov_hi = mkMOV r_src_hi r_dst_hi
 346          mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
 347      return (
 348             ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo
 349          )
 350
 351 iselExpr64 expr
 352    = pprPanic "iselExpr64(sparc)" (ppr expr)
 353
 354 #endif /* sparc_TARGET_ARCH */
 355
 356 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 357
 358 #if powerpc_TARGET_ARCH
 359
 360 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
 361 getI64Amodes addrTree = do
 362     Amode hi_addr addr_code <- getAmode addrTree
 363     case addrOffset hi_addr 4 of
 364         Just lo_addr -> return (hi_addr, lo_addr, addr_code)
 365         Nothing      -> do (hi_ptr, code) <- getSomeReg addrTree
 366                            return (AddrRegImm hi_ptr (ImmInt 0),
 367                                    AddrRegImm hi_ptr (ImmInt 4),
 368                                    code)
 369
 370 assignMem_I64Code addrTree valueTree = do
 371         (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
 372         ChildCode64 vcode rlo <- iselExpr64 valueTree
 373         let
 374                 rhi = getHiVRegFromLo rlo
 375
 376                 -- Big-endian store
 377                 mov_hi = ST II32 rhi hi_addr
 378                 mov_lo = ST II32 rlo lo_addr
 379         -- in
 380         return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 381
 382 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
 383    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 384    let
 385          r_dst_lo = mkVReg u_dst II32
 386          r_dst_hi = getHiVRegFromLo r_dst_lo
 387          r_src_hi = getHiVRegFromLo r_src_lo
 388          mov_lo = MR r_dst_lo r_src_lo
 389          mov_hi = MR r_dst_hi r_src_hi
 390    -- in
 391    return (
 392         vcode `snocOL` mov_lo `snocOL` mov_hi
 393      )
 394
 395 assignReg_I64Code lvalue valueTree
 396    = panic "assignReg_I64Code(powerpc): invalid lvalue"
 397
 398
 399 -- Don't delete this -- it's very handy for debugging.
 400 --iselExpr64 expr
 401 --   | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
 402 --   = panic "iselExpr64(???)"
 403
 404 iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
 405     (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
 406     (rlo, rhi) <- getNewRegPairNat II32
 407     let mov_hi = LD II32 rhi hi_addr
 408         mov_lo = LD II32 rlo lo_addr
 409     return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
 410                          rlo
 411
 412 iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
 413    = return (ChildCode64 nilOL (mkVReg vu II32))
 414
 415 iselExpr64 (CmmLit (CmmInt i _)) = do
 416   (rlo,rhi) <- getNewRegPairNat II32
 417   let
 418         half0 = fromIntegral (fromIntegral i :: Word16)
 419         half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
 420         half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
 421         half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
 422
 423         code = toOL [
 424                 LIS rlo (ImmInt half1),
 425                 OR rlo rlo (RIImm $ ImmInt half0),
 426                 LIS rhi (ImmInt half3),
 427                 OR rlo rlo (RIImm $ ImmInt half2)
 428                 ]
 429   -- in
 430   return (ChildCode64 code rlo)
 431
 432 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
 433    ChildCode64 code1 r1lo <- iselExpr64 e1
 434    ChildCode64 code2 r2lo <- iselExpr64 e2
 435    (rlo,rhi) <- getNewRegPairNat II32
 436    let
 437         r1hi = getHiVRegFromLo r1lo
 438         r2hi = getHiVRegFromLo r2lo
 439         code =  code1 `appOL`
 440                 code2 `appOL`
 441                 toOL [ ADDC rlo r1lo r2lo,
 442                        ADDE rhi r1hi r2hi ]
 443    -- in
 444    return (ChildCode64 code rlo)
 445
 446 iselExpr64 (CmmMachOp (MO_UU_Conv W32 W64) [expr]) = do
 447     (expr_reg,expr_code) <- getSomeReg expr
 448     (rlo, rhi) <- getNewRegPairNat II32
 449     let mov_hi = LI rhi (ImmInt 0)
 450         mov_lo = MR rlo expr_reg
 451     return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
 452                          rlo
 453 iselExpr64 expr
 454    = pprPanic "iselExpr64(powerpc)" (ppr expr)
 455
 456 #endif /* powerpc_TARGET_ARCH */
 457
 458
 459 -- -----------------------------------------------------------------------------
 460 -- The 'Register' type
 461
 462 -- 'Register's passed up the tree.  If the stix code forces the register
 463 -- to live in a pre-decided machine register, it comes out as @Fixed@;
 464 -- otherwise, it comes out as @Any@, and the parent can decide which
 465 -- register to put it in.
 466
 467 data Register
 468   = Fixed   Size Reg InstrBlock
 469   | Any     Size (Reg -> InstrBlock)
 470
 471 swizzleRegisterRep :: Register -> Size -> Register
 472 -- Change the width; it's a no-op
 473 swizzleRegisterRep (Fixed _ reg code) size = Fixed size reg code
 474 swizzleRegisterRep (Any _ codefn)     size = Any   size codefn
 475
 476
 477 -- -----------------------------------------------------------------------------
 478 -- Utils based on getRegister, below
 479
 480 -- The dual to getAnyReg: compute an expression into a register, but
 481 -- we don't mind which one it is.
 482 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
 483 getSomeReg expr = do
 484   r <- getRegister expr
 485   case r of
 486     Any rep code -> do
 487         tmp <- getNewRegNat rep
 488         return (tmp, code tmp)
 489     Fixed _ reg code ->
 490         return (reg, code)
 491
 492 -- -----------------------------------------------------------------------------
 493 -- Grab the Reg for a CmmReg
 494
 495 getRegisterReg :: CmmReg -> Reg
 496
 497 getRegisterReg (CmmLocal (LocalReg u pk))
 498   = mkVReg u (cmmTypeSize pk)
 499
 500 getRegisterReg (CmmGlobal mid)
 501   = case get_GlobalReg_reg_or_addr mid of
 502        Left (RealReg rrno) -> RealReg rrno
 503        _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
 504           -- By this stage, the only MagicIds remaining should be the
 505           -- ones which map to a real machine register on this
 506           -- platform.  Hence ...
 507
 508
 509 -- -----------------------------------------------------------------------------
 510 -- Generate code to get a subtree into a Register
 511
 512 -- Don't delete this -- it's very handy for debugging.
 513 --getRegister expr
 514 --   | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False
 515 --   = panic "getRegister(???)"
 516
 517 getRegister :: CmmExpr -> NatM Register
 518
 519 #if !x86_64_TARGET_ARCH
 520     -- on x86_64, we have %rip for PicBaseReg, but it's not a full-featured
 521     -- register, it can only be used for rip-relative addressing.
 522 getRegister (CmmReg (CmmGlobal PicBaseReg))
 523   = do
 524       reg <- getPicBaseNat wordSize
 525       return (Fixed wordSize reg nilOL)
 526 #endif
 527
 528 getRegister (CmmReg reg)
 529   = return (Fixed (cmmTypeSize (cmmRegType reg))
 530                   (getRegisterReg reg) nilOL)
 531
 532 getRegister tree@(CmmRegOff _ _)
 533   = getRegister (mangleIndexTree tree)
 534
 535
 536 #if WORD_SIZE_IN_BITS==32
 537     -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
 538     -- TO_W_(x), TO_W_(x >> 32)
 539
 540 getRegister (CmmMachOp (MO_UU_Conv W64 W32)
 541              [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
 542   ChildCode64 code rlo <- iselExpr64 x
 543   return $ Fixed II32 (getHiVRegFromLo rlo) code
 544
 545 getRegister (CmmMachOp (MO_SS_Conv W64 W32)
 546              [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
 547   ChildCode64 code rlo <- iselExpr64 x
 548   return $ Fixed II32 (getHiVRegFromLo rlo) code
 549
 550 getRegister (CmmMachOp (MO_UU_Conv W64 W32) [x]) = do
 551   ChildCode64 code rlo <- iselExpr64 x
 552   return $ Fixed II32 rlo code
 553
 554 getRegister (CmmMachOp (MO_SS_Conv W64 W32) [x]) = do
 555   ChildCode64 code rlo <- iselExpr64 x
 556   return $ Fixed II32 rlo code
 557
 558 #endif
 559
 560 -- end of machine-"independent" bit; here we go on the rest...
 561
 562 #if alpha_TARGET_ARCH
 563
 564 getRegister (StDouble d)
 565   = getBlockIdNat                   `thenNat` \ lbl ->
 566     getNewRegNat PtrRep             `thenNat` \ tmp ->
 567     let code dst = mkSeqInstrs [
 568             LDATA RoDataSegment lbl [
 569                     DATA TF [ImmLab (rational d)]
 570                 ],
 571             LDA tmp (AddrImm (ImmCLbl lbl)),
 572             LD TF dst (AddrReg tmp)]
 573     in
 574         return (Any FF64 code)
 575
 576 getRegister (StPrim primop [x]) -- unary PrimOps
 577   = case primop of
 578       IntNegOp -> trivialUCode (NEG Q False) x
 579
 580       NotOp    -> trivialUCode NOT x
 581
 582       FloatNegOp  -> trivialUFCode FloatRep (FNEG TF) x
 583       DoubleNegOp -> trivialUFCode FF64 (FNEG TF) x
 584
 585       OrdOp -> coerceIntCode IntRep x
 586       ChrOp -> chrCode x
 587
 588       Float2IntOp  -> coerceFP2Int    x
 589       Int2FloatOp  -> coerceInt2FP pr x
 590       Double2IntOp -> coerceFP2Int    x
 591       Int2DoubleOp -> coerceInt2FP pr x
 592
 593       Double2FloatOp -> coerceFltCode x
 594       Float2DoubleOp -> coerceFltCode x
 595
 596       other_op -> getRegister (StCall fn CCallConv FF64 [x])
 597         where
 598           fn = case other_op of
 599                  FloatExpOp    -> fsLit "exp"
 600                  FloatLogOp    -> fsLit "log"
 601                  FloatSqrtOp   -> fsLit "sqrt"
 602                  FloatSinOp    -> fsLit "sin"
 603                  FloatCosOp    -> fsLit "cos"
 604                  FloatTanOp    -> fsLit "tan"
 605                  FloatAsinOp   -> fsLit "asin"
 606                  FloatAcosOp   -> fsLit "acos"
 607                  FloatAtanOp   -> fsLit "atan"
 608                  FloatSinhOp   -> fsLit "sinh"
 609                  FloatCoshOp   -> fsLit "cosh"
 610                  FloatTanhOp   -> fsLit "tanh"
 611                  DoubleExpOp   -> fsLit "exp"
 612                  DoubleLogOp   -> fsLit "log"
 613                  DoubleSqrtOp  -> fsLit "sqrt"
 614                  DoubleSinOp   -> fsLit "sin"
 615                  DoubleCosOp   -> fsLit "cos"
 616                  DoubleTanOp   -> fsLit "tan"
 617                  DoubleAsinOp  -> fsLit "asin"
 618                  DoubleAcosOp  -> fsLit "acos"
 619                  DoubleAtanOp  -> fsLit "atan"
 620                  DoubleSinhOp  -> fsLit "sinh"
 621                  DoubleCoshOp  -> fsLit "cosh"
 622                  DoubleTanhOp  -> fsLit "tanh"
 623   where
 624     pr = panic "MachCode.getRegister: no primrep needed for Alpha"
 625
 626 getRegister (StPrim primop [x, y]) -- dyadic PrimOps
 627   = case primop of
 628       CharGtOp -> trivialCode (CMP LTT) y x
 629       CharGeOp -> trivialCode (CMP LE) y x
 630       CharEqOp -> trivialCode (CMP EQQ) x y
 631       CharNeOp -> int_NE_code x y
 632       CharLtOp -> trivialCode (CMP LTT) x y
 633       CharLeOp -> trivialCode (CMP LE) x y
 634
 635       IntGtOp  -> trivialCode (CMP LTT) y x
 636       IntGeOp  -> trivialCode (CMP LE) y x
 637       IntEqOp  -> trivialCode (CMP EQQ) x y
 638       IntNeOp  -> int_NE_code x y
 639       IntLtOp  -> trivialCode (CMP LTT) x y
 640       IntLeOp  -> trivialCode (CMP LE) x y
 641
 642       WordGtOp -> trivialCode (CMP ULT) y x
 643       WordGeOp -> trivialCode (CMP ULE) x y
 644       WordEqOp -> trivialCode (CMP EQQ)  x y
 645       WordNeOp -> int_NE_code x y
 646       WordLtOp -> trivialCode (CMP ULT) x y
 647       WordLeOp -> trivialCode (CMP ULE) x y
 648
 649       AddrGtOp -> trivialCode (CMP ULT) y x
 650       AddrGeOp -> trivialCode (CMP ULE) y x
 651       AddrEqOp -> trivialCode (CMP EQQ)  x y
 652       AddrNeOp -> int_NE_code x y
 653       AddrLtOp -> trivialCode (CMP ULT) x y
 654       AddrLeOp -> trivialCode (CMP ULE) x y
 655
 656       FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
 657       FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
 658       FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
 659       FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
 660       FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
 661       FloatLeOp -> cmpF_code (FCMP TF LE) NE x y
 662
 663       DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
 664       DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
 665       DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
 666       DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
 667       DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
 668       DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y
 669
 670       IntAddOp  -> trivialCode (ADD Q False) x y
 671       IntSubOp  -> trivialCode (SUB Q False) x y
 672       IntMulOp  -> trivialCode (MUL Q False) x y
 673       IntQuotOp -> trivialCode (DIV Q False) x y
 674       IntRemOp  -> trivialCode (REM Q False) x y
 675
 676       WordAddOp  -> trivialCode (ADD Q False) x y
 677       WordSubOp  -> trivialCode (SUB Q False) x y
 678       WordMulOp  -> trivialCode (MUL Q False) x y
 679       WordQuotOp -> trivialCode (DIV Q True) x y
 680       WordRemOp  -> trivialCode (REM Q True) x y
 681
 682       FloatAddOp -> trivialFCode  W32 (FADD TF) x y
 683       FloatSubOp -> trivialFCode  W32 (FSUB TF) x y
 684       FloatMulOp -> trivialFCode  W32 (FMUL TF) x y
 685       FloatDivOp -> trivialFCode  W32 (FDIV TF) x y
 686
 687       DoubleAddOp -> trivialFCode  W64 (FADD TF) x y
 688       DoubleSubOp -> trivialFCode  W64 (FSUB TF) x y
 689       DoubleMulOp -> trivialFCode  W64 (FMUL TF) x y
 690       DoubleDivOp -> trivialFCode  W64 (FDIV TF) x y
 691
 692       AddrAddOp  -> trivialCode (ADD Q False) x y
 693       AddrSubOp  -> trivialCode (SUB Q False) x y
 694       AddrRemOp  -> trivialCode (REM Q True) x y
 695
 696       AndOp  -> trivialCode AND x y
 697       OrOp   -> trivialCode OR  x y
 698       XorOp  -> trivialCode XOR x y
 699       SllOp  -> trivialCode SLL x y
 700       SrlOp  -> trivialCode SRL x y
 701
 702       ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
 703       ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
 704       ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"
 705
 706       FloatPowerOp  -> getRegister (StCall (fsLit "pow") CCallConv FF64 [x,y])
 707       DoublePowerOp -> getRegister (StCall (fsLit "pow") CCallConv FF64 [x,y])
 708   where
 709     {- ------------------------------------------------------------
 710         Some bizarre special code for getting condition codes into
 711         registers.  Integer non-equality is a test for equality
 712         followed by an XOR with 1.  (Integer comparisons always set
 713         the result register to 0 or 1.)  Floating point comparisons of
 714         any kind leave the result in a floating point register, so we
 715         need to wrangle an integer register out of things.
 716     -}
 717     int_NE_code :: StixTree -> StixTree -> NatM Register
 718
 719     int_NE_code x y
 720       = trivialCode (CMP EQQ) x y       `thenNat` \ register ->
 721         getNewRegNat IntRep             `thenNat` \ tmp ->
 722         let
 723             code = registerCode register tmp
 724             src  = registerName register tmp
 725             code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
 726         in
 727         return (Any IntRep code__2)
 728
 729     {- ------------------------------------------------------------
 730         Comments for int_NE_code also apply to cmpF_code
 731     -}
 732     cmpF_code
 733         :: (Reg -> Reg -> Reg -> Instr)
 734         -> Cond
 735         -> StixTree -> StixTree
 736         -> NatM Register
 737
 738     cmpF_code instr cond x y
 739       = trivialFCode pr instr x y       `thenNat` \ register ->
 740         getNewRegNat FF64               `thenNat` \ tmp ->
 741         getBlockIdNat                   `thenNat` \ lbl ->
 742         let
 743             code = registerCode register tmp
 744             result  = registerName register tmp
 745
 746             code__2 dst = code . mkSeqInstrs [
 747                 OR zeroh (RIImm (ImmInt 1)) dst,
 748                 BF cond  result (ImmCLbl lbl),
 749                 OR zeroh (RIReg zeroh) dst,
 750                 NEWBLOCK lbl]
 751         in
 752         return (Any IntRep code__2)
 753       where
 754         pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
 755       ------------------------------------------------------------
 756
 757 getRegister (CmmLoad pk mem)
 758   = getAmode mem                    `thenNat` \ amode ->
 759     let
 760         code = amodeCode amode
 761         src   = amodeAddr amode
 762         size = primRepToSize pk
 763         code__2 dst = code . mkSeqInstr (LD size dst src)
 764     in
 765     return (Any pk code__2)
 766
 767 getRegister (StInt i)
 768   | fits8Bits i
 769   = let
 770         code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
 771     in
 772     return (Any IntRep code)
 773   | otherwise
 774   = let
 775         code dst = mkSeqInstr (LDI Q dst src)
 776     in
 777     return (Any IntRep code)
 778   where
 779     src = ImmInt (fromInteger i)
 780
 781 getRegister leaf
 782   | isJust imm
 783   = let
 784         code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
 785     in
 786     return (Any PtrRep code)
 787   where
 788     imm = maybeImm leaf
 789     imm__2 = case imm of Just x -> x
 790
 791 #endif /* alpha_TARGET_ARCH */
 792
 793 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 794
 795 #if i386_TARGET_ARCH
 796
 797 getRegister (CmmLit (CmmFloat f W32)) = do
 798     lbl <- getNewLabelNat
 799     dflags <- getDynFlagsNat
 800     dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
 801     Amode addr addr_code <- getAmode dynRef
 802     let code dst =
 803             LDATA ReadOnlyData
 804                         [CmmDataLabel lbl,
 805                          CmmStaticLit (CmmFloat f W32)]
 806             `consOL` (addr_code `snocOL`
 807             GLD FF32 addr dst)
 808     -- in
 809     return (Any FF32 code)
 810
 811
 812 getRegister (CmmLit (CmmFloat d W64))
 813   | d == 0.0
 814   = let code dst = unitOL (GLDZ dst)
 815     in  return (Any FF64 code)
 816
 817   | d == 1.0
 818   = let code dst = unitOL (GLD1 dst)
 819     in  return (Any FF64 code)
 820
 821   | otherwise = do
 822     lbl <- getNewLabelNat
 823     dflags <- getDynFlagsNat
 824     dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
 825     Amode addr addr_code <- getAmode dynRef
 826     let code dst =
 827             LDATA ReadOnlyData
 828                         [CmmDataLabel lbl,
 829                          CmmStaticLit (CmmFloat d W64)]
 830             `consOL` (addr_code `snocOL`
 831             GLD FF64 addr dst)
 832     -- in
 833     return (Any FF64 code)
 834
 835 #endif /* i386_TARGET_ARCH */
 836
 837 #if x86_64_TARGET_ARCH
 838
 839 getRegister (CmmLit (CmmFloat 0.0 w)) = do
 840    let size = floatSize w
 841        code dst = unitOL  (XOR size (OpReg dst) (OpReg dst))
 842         -- I don't know why there are xorpd, xorps, and pxor instructions.
 843         -- They all appear to do the same thing --SDM
 844    return (Any size code)
 845
 846 getRegister (CmmLit (CmmFloat f w)) = do
 847     lbl <- getNewLabelNat
 848     let code dst = toOL [
 849             LDATA ReadOnlyData
 850                         [CmmDataLabel lbl,
 851                          CmmStaticLit (CmmFloat f w)],
 852             MOV size (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 853             ]
 854     -- in
 855     return (Any size code)
 856   where size = floatSize w
 857
 858 #endif /* x86_64_TARGET_ARCH */
 859
 860 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
 861
 862 -- catch simple cases of zero- or sign-extended load
 863 getRegister (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _]) = do
 864   code <- intLoadCode (MOVZxL II8) addr
 865   return (Any II32 code)
 866
 867 getRegister (CmmMachOp (MO_SS_Conv W8 W32) [CmmLoad addr _]) = do
 868   code <- intLoadCode (MOVSxL II8) addr
 869   return (Any II32 code)
 870
 871 getRegister (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad addr _]) = do
 872   code <- intLoadCode (MOVZxL II16) addr
 873   return (Any II32 code)
 874
 875 getRegister (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad addr _]) = do
 876   code <- intLoadCode (MOVSxL II16) addr
 877   return (Any II32 code)
 878
 879 #endif
 880
 881 #if x86_64_TARGET_ARCH
 882
 883 -- catch simple cases of zero- or sign-extended load
 884 getRegister (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad addr _]) = do
 885   code <- intLoadCode (MOVZxL II8) addr
 886   return (Any II64 code)
 887
 888 getRegister (CmmMachOp (MO_SS_Conv W8 W64) [CmmLoad addr _]) = do
 889   code <- intLoadCode (MOVSxL II8) addr
 890   return (Any II64 code)
 891
 892 getRegister (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad addr _]) = do
 893   code <- intLoadCode (MOVZxL II16) addr
 894   return (Any II64 code)
 895
 896 getRegister (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad addr _]) = do
 897   code <- intLoadCode (MOVSxL II16) addr
 898   return (Any II64 code)
 899
 900 getRegister (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad addr _]) = do
 901   code <- intLoadCode (MOV II32) addr -- 32-bit loads zero-extend
 902   return (Any II64 code)
 903
 904 getRegister (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad addr _]) = do
 905   code <- intLoadCode (MOVSxL II32) addr
 906   return (Any II64 code)
 907
 908 #endif
 909
 910 #if x86_64_TARGET_ARCH
 911 getRegister (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
 912                                      CmmLit displacement])
 913     = return $ Any II64 (\dst -> unitOL $
 914         LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
 915 #endif
 916
 917 #if x86_64_TARGET_ARCH
 918 getRegister (CmmMachOp (MO_F_Neg W32) [x]) = do
 919   x_code <- getAnyReg x
 920   lbl <- getNewLabelNat
 921   let
 922     code dst = x_code dst `appOL` toOL [
 923         -- This is how gcc does it, so it can't be that bad:
 924         LDATA ReadOnlyData16 [
 925                 CmmAlign 16,
 926                 CmmDataLabel lbl,
 927                 CmmStaticLit (CmmInt 0x80000000 W32),
 928                 CmmStaticLit (CmmInt 0 W32),
 929                 CmmStaticLit (CmmInt 0 W32),
 930                 CmmStaticLit (CmmInt 0 W32)
 931         ],
 932         XOR FF32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 933                 -- xorps, so we need the 128-bit constant
 934                 -- ToDo: rip-relative
 935         ]
 936   --
 937   return (Any FF32 code)
 938
 939 getRegister (CmmMachOp (MO_F_Neg W64) [x]) = do
 940   x_code <- getAnyReg x
 941   lbl <- getNewLabelNat
 942   let
 943         -- This is how gcc does it, so it can't be that bad:
 944     code dst = x_code dst `appOL` toOL [
 945         LDATA ReadOnlyData16 [
 946                 CmmAlign 16,
 947                 CmmDataLabel lbl,
 948                 CmmStaticLit (CmmInt 0x8000000000000000 W64),
 949                 CmmStaticLit (CmmInt 0 W64)
 950         ],
 951                 -- gcc puts an unpck here.  Wonder if we need it.
 952         XOR FF64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 953                 -- xorpd, so we need the 128-bit constant
 954         ]
 955   --
 956   return (Any FF64 code)
 957 #endif
 958
 959 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
 960
 961 getRegister (CmmMachOp mop [x]) -- unary MachOps
 962   = case mop of
 963 #if i386_TARGET_ARCH
 964       MO_F_Neg W32 -> trivialUFCode FF32 (GNEG FF32) x
 965       MO_F_Neg W64 -> trivialUFCode FF64 (GNEG FF64) x
 966 #endif
 967
 968       MO_S_Neg w -> triv_ucode NEGI (intSize w)
 969       MO_F_Neg w -> triv_ucode NEGI (floatSize w)
 970       MO_Not w   -> triv_ucode NOT  (intSize w)
 971
 972       -- Nop conversions
 973       MO_UU_Conv W32 W8  -> toI8Reg  W32 x
 974       MO_SS_Conv W32 W8  -> toI8Reg  W32 x
 975       MO_UU_Conv W16 W8  -> toI8Reg  W16 x
 976       MO_SS_Conv W16 W8  -> toI8Reg  W16 x
 977       MO_UU_Conv W32 W16 -> toI16Reg W32 x
 978       MO_SS_Conv W32 W16 -> toI16Reg W32 x
 979
 980 #if x86_64_TARGET_ARCH
 981       MO_UU_Conv W64 W32 -> conversionNop II64 x
 982       MO_SS_Conv W64 W32 -> conversionNop II64 x
 983       MO_UU_Conv W64 W16 -> toI16Reg W64 x
 984       MO_SS_Conv W64 W16 -> toI16Reg W64 x
 985       MO_UU_Conv W64 W8  -> toI8Reg  W64 x
 986       MO_SS_Conv W64 W8  -> toI8Reg  W64 x
 987 #endif
 988
 989       MO_UU_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
 990       MO_SS_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
 991
 992       -- widenings
 993       MO_UU_Conv W8  W32 -> integerExtend W8  W32 MOVZxL x
 994       MO_UU_Conv W16 W32 -> integerExtend W16 W32 MOVZxL x
 995       MO_UU_Conv W8  W16 -> integerExtend W8  W16 MOVZxL x
 996
 997       MO_SS_Conv W8  W32 -> integerExtend W8  W32 MOVSxL x
 998       MO_SS_Conv W16 W32 -> integerExtend W16 W32 MOVSxL x
 999       MO_SS_Conv W8  W16 -> integerExtend W8  W16 MOVSxL x
1000
1001 #if x86_64_TARGET_ARCH
1002       MO_UU_Conv W8  W64 -> integerExtend W8  W64 MOVZxL x
1003       MO_UU_Conv W16 W64 -> integerExtend W16 W64 MOVZxL x
1004       MO_UU_Conv W32 W64 -> integerExtend W32 W64 MOVZxL x
1005       MO_SS_Conv W8  W64 -> integerExtend W8  W64 MOVSxL x
1006       MO_SS_Conv W16 W64 -> integerExtend W16 W64 MOVSxL x
1007       MO_SS_Conv W32 W64 -> integerExtend W32 W64 MOVSxL x
1008         -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
1009         -- However, we don't want the register allocator to throw it
1010         -- away as an unnecessary reg-to-reg move, so we keep it in
1011         -- the form of a movzl and print it as a movl later.
1012 #endif
1013
1014 #if i386_TARGET_ARCH
1015       MO_FF_Conv W32 W64 -> conversionNop FF64 x
1016       MO_FF_Conv W64 W32 -> conversionNop FF32 x
1017 #else
1018       MO_FF_Conv W32 W64 -> coerceFP2FP W64 x
1019       MO_FF_Conv W64 W32 -> coerceFP2FP W32 x
1020 #endif
1021
1022       MO_FS_Conv from to -> coerceFP2Int from to x
1023       MO_SF_Conv from to -> coerceInt2FP from to x
1024
1025       other -> pprPanic "getRegister" (pprMachOp mop)
1026    where
1027         triv_ucode :: (Size -> Operand -> Instr) -> Size -> NatM Register
1028         triv_ucode instr size = trivialUCode size (instr size) x
1029
1030         -- signed or unsigned extension.
1031         integerExtend :: Width -> Width
1032                       -> (Size -> Operand -> Operand -> Instr)
1033                       -> CmmExpr -> NatM Register
1034         integerExtend from to instr expr = do
1035             (reg,e_code) <- if from == W8 then getByteReg expr
1036                                           else getSomeReg expr
1037             let
1038                 code dst =
1039                   e_code `snocOL`
1040                   instr (intSize from) (OpReg reg) (OpReg dst)
1041             return (Any (intSize to) code)
1042
1043         toI8Reg :: Width -> CmmExpr -> NatM Register
1044         toI8Reg new_rep expr
1045             = do codefn <- getAnyReg expr
1046                  return (Any (intSize new_rep) codefn)
1047                 -- HACK: use getAnyReg to get a byte-addressable register.
1048                 -- If the source was a Fixed register, this will add the
1049                 -- mov instruction to put it into the desired destination.
1050                 -- We're assuming that the destination won't be a fixed
1051                 -- non-byte-addressable register; it won't be, because all
1052                 -- fixed registers are word-sized.
1053
1054         toI16Reg = toI8Reg -- for now
1055
1056         conversionNop :: Size -> CmmExpr -> NatM Register
1057         conversionNop new_size expr
1058             = do e_code <- getRegister expr
1059                  return (swizzleRegisterRep e_code new_size)
1060
1061
1062 getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
1063   = case mop of
1064       MO_F_Eq w -> condFltReg EQQ x y
1065       MO_F_Ne w -> condFltReg NE x y
1066       MO_F_Gt w -> condFltReg GTT x y
1067       MO_F_Ge w -> condFltReg GE x y
1068       MO_F_Lt w -> condFltReg LTT x y
1069       MO_F_Le w -> condFltReg LE x y
1070
1071       MO_Eq rep   -> condIntReg EQQ x y
1072       MO_Ne rep   -> condIntReg NE x y
1073
1074       MO_S_Gt rep -> condIntReg GTT x y
1075       MO_S_Ge rep -> condIntReg GE x y
1076       MO_S_Lt rep -> condIntReg LTT x y
1077       MO_S_Le rep -> condIntReg LE x y
1078
1079       MO_U_Gt rep -> condIntReg GU  x y
1080       MO_U_Ge rep -> condIntReg GEU x y
1081       MO_U_Lt rep -> condIntReg LU  x y
1082       MO_U_Le rep -> condIntReg LEU x y
1083
1084 #if i386_TARGET_ARCH
1085       MO_F_Add w -> trivialFCode w GADD x y
1086       MO_F_Sub w -> trivialFCode w GSUB x y
1087       MO_F_Quot w -> trivialFCode w GDIV x y
1088       MO_F_Mul w -> trivialFCode w GMUL x y
1089 #endif
1090
1091 #if x86_64_TARGET_ARCH
1092       MO_F_Add w -> trivialFCode w ADD x y
1093       MO_F_Sub w -> trivialFCode w SUB x y
1094       MO_F_Quot w -> trivialFCode w FDIV x y
1095       MO_F_Mul w -> trivialFCode w MUL x y
1096 #endif
1097
1098       MO_Add rep -> add_code rep x y
1099       MO_Sub rep -> sub_code rep x y
1100
1101       MO_S_Quot rep -> div_code rep True  True  x y
1102       MO_S_Rem  rep -> div_code rep True  False x y
1103       MO_U_Quot rep -> div_code rep False True  x y
1104       MO_U_Rem  rep -> div_code rep False False x y
1105
1106       MO_S_MulMayOflo rep -> imulMayOflo rep x y
1107
1108       MO_Mul rep -> triv_op rep IMUL
1109       MO_And rep -> triv_op rep AND
1110       MO_Or  rep -> triv_op rep OR
1111       MO_Xor rep -> triv_op rep XOR
1112
1113         {- Shift ops on x86s have constraints on their source, it
1114            either has to be Imm, CL or 1
1115             => trivialCode is not restrictive enough (sigh.)
1116         -}
1117       MO_Shl rep   -> shift_code rep SHL x y {-False-}
1118       MO_U_Shr rep -> shift_code rep SHR x y {-False-}
1119       MO_S_Shr rep -> shift_code rep SAR x y {-False-}
1120
1121       other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
1122   where
1123     --------------------
1124     triv_op width instr = trivialCode width op (Just op) x y
1125                         where op   = instr (intSize width)
1126
1127     imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
1128     imulMayOflo rep a b = do
1129          (a_reg, a_code) <- getNonClobberedReg a
1130          b_code <- getAnyReg b
1131          let
1132              shift_amt  = case rep of
1133                            W32 -> 31
1134                            W64 -> 63
1135                            _ -> panic "shift_amt"
1136
1137              size = intSize rep
1138              code = a_code `appOL` b_code eax `appOL`
1139                         toOL [
1140                            IMUL2 size (OpReg a_reg),   -- result in %edx:%eax
1141                            SAR size (OpImm (ImmInt shift_amt)) (OpReg eax),
1142                                 -- sign extend lower part
1143                            SUB size (OpReg edx) (OpReg eax)
1144                                 -- compare against upper
1145                            -- eax==0 if high part == sign extended low part
1146                         ]
1147          -- in
1148          return (Fixed size eax code)
1149
1150     --------------------
1151     shift_code :: Width
1152                -> (Size -> Operand -> Operand -> Instr)
1153                -> CmmExpr
1154                -> CmmExpr
1155                -> NatM Register
1156
1157     {- Case1: shift length as immediate -}
1158     shift_code width instr x y@(CmmLit lit) = do
1159           x_code <- getAnyReg x
1160           let
1161                size = intSize width
1162                code dst
1163                   = x_code dst `snocOL`
1164                     instr size (OpImm (litToImm lit)) (OpReg dst)
1165           -- in
1166           return (Any size code)
1167
1168     {- Case2: shift length is complex (non-immediate)
1169       * y must go in %ecx.
1170       * we cannot do y first *and* put its result in %ecx, because
1171         %ecx might be clobbered by x.
1172       * if we do y second, then x cannot be
1173         in a clobbered reg.  Also, we cannot clobber x's reg
1174         with the instruction itself.
1175       * so we can either:
1176         - do y first, put its result in a fresh tmp, then copy it to %ecx later
1177         - do y second and put its result into %ecx.  x gets placed in a fresh
1178           tmp.  This is likely to be better, becuase the reg alloc can
1179           eliminate this reg->reg move here (it won't eliminate the other one,
1180           because the move is into the fixed %ecx).
1181     -}
1182     shift_code width instr x y{-amount-} = do
1183         x_code <- getAnyReg x
1184         let size = intSize width
1185         tmp <- getNewRegNat size
1186         y_code <- getAnyReg y
1187         let
1188            code = x_code tmp `appOL`
1189                   y_code ecx `snocOL`
1190                   instr size (OpReg ecx) (OpReg tmp)
1191         -- in
1192         return (Fixed size tmp code)
1193
1194     --------------------
1195     add_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
1196     add_code rep x (CmmLit (CmmInt y _))
1197         | is32BitInteger y = add_int rep x y
1198     add_code rep x y = trivialCode rep (ADD size) (Just (ADD size)) x y
1199       where size = intSize rep
1200
1201     --------------------
1202     sub_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
1203     sub_code rep x (CmmLit (CmmInt y _))
1204         | is32BitInteger (-y) = add_int rep x (-y)
1205     sub_code rep x y = trivialCode rep (SUB (intSize rep)) Nothing x y
1206
1207     -- our three-operand add instruction:
1208     add_int width x y = do
1209         (x_reg, x_code) <- getSomeReg x
1210         let
1211             size = intSize width
1212             imm = ImmInt (fromInteger y)
1213             code dst
1214                = x_code `snocOL`
1215                  LEA size
1216                         (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
1217                         (OpReg dst)
1218         --
1219         return (Any size code)
1220
1221     ----------------------
1222     div_code width signed quotient x y = do
1223            (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
1224            x_code <- getAnyReg x
1225            let
1226              size = intSize width
1227              widen | signed    = CLTD size
1228                    | otherwise = XOR size (OpReg edx) (OpReg edx)
1229
1230              instr | signed    = IDIV
1231                    | otherwise = DIV
1232
1233              code = y_code `appOL`
1234                     x_code eax `appOL`
1235                     toOL [widen, instr size y_op]
1236
1237              result | quotient  = eax
1238                     | otherwise = edx
1239
1240            -- in
1241            return (Fixed size result code)
1242
1243
1244 getRegister (CmmLoad mem pk)
1245   | isFloatType pk
1246   = do
1247     Amode src mem_code <- getAmode mem
1248     let
1249         size = cmmTypeSize pk
1250         code dst = mem_code `snocOL`
1251                    IF_ARCH_i386(GLD size src dst,
1252                                 MOV size (OpAddr src) (OpReg dst))
1253     return (Any size code)
1254
1255 #if i386_TARGET_ARCH
1256 getRegister (CmmLoad mem pk)
1257   | not (isWord64 pk)
1258   = do
1259     code <- intLoadCode instr mem
1260     return (Any size code)
1261   where
1262     width = typeWidth pk
1263     size = intSize width
1264     instr = case width of
1265                 W8     -> MOVZxL II8
1266                 _other -> MOV size
1267         -- We always zero-extend 8-bit loads, if we
1268         -- can't think of anything better.  This is because
1269         -- we can't guarantee access to an 8-bit variant of every register
1270         -- (esi and edi don't have 8-bit variants), so to make things
1271         -- simpler we do our 8-bit arithmetic with full 32-bit registers.
1272 #endif
1273
1274 #if x86_64_TARGET_ARCH
1275 -- Simpler memory load code on x86_64
1276 getRegister (CmmLoad mem pk)
1277   = do
1278     code <- intLoadCode (MOV size) mem
1279     return (Any size code)
1280   where size = intSize $ typeWidth pk
1281 #endif
1282
1283 getRegister (CmmLit (CmmInt 0 width))
1284   = let
1285         size = intSize width
1286
1287         -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
1288         adj_size = case size of II64 -> II32; _ -> size
1289         size1 = IF_ARCH_i386( size, adj_size )
1290         code dst
1291            = unitOL (XOR size1 (OpReg dst) (OpReg dst))
1292     in
1293         return (Any size code)
1294
1295 #if x86_64_TARGET_ARCH
1296   -- optimisation for loading small literals on x86_64: take advantage
1297   -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
1298   -- instruction forms are shorter.
1299 getRegister (CmmLit lit)
1300   | isWord64 (cmmLitType lit), not (isBigLit lit)
1301   = let
1302         imm = litToImm lit
1303         code dst = unitOL (MOV II32 (OpImm imm) (OpReg dst))
1304     in
1305         return (Any II64 code)
1306   where
1307    isBigLit (CmmInt i _) = i < 0 || i > 0xffffffff
1308    isBigLit _ = False
1309         -- note1: not the same as (not.is32BitLit), because that checks for
1310         -- signed literals that fit in 32 bits, but we want unsigned
1311         -- literals here.
1312         -- note2: all labels are small, because we're assuming the
1313         -- small memory model (see gcc docs, -mcmodel=small).
1314 #endif
1315
1316 getRegister (CmmLit lit)
1317   = let
1318         size = cmmTypeSize (cmmLitType lit)
1319         imm = litToImm lit
1320         code dst = unitOL (MOV size (OpImm imm) (OpReg dst))
1321     in
1322         return (Any size code)
1323
1324 getRegister other = pprPanic "getRegister(x86)" (ppr other)
1325
1326
1327 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
1328    -> NatM (Reg -> InstrBlock)
1329 intLoadCode instr mem = do
1330   Amode src mem_code <- getAmode mem
1331   return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
1332
1333 -- Compute an expression into *any* register, adding the appropriate
1334 -- move instruction if necessary.
1335 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
1336 getAnyReg expr = do
1337   r <- getRegister expr
1338   anyReg r
1339
1340 anyReg :: Register -> NatM (Reg -> InstrBlock)
1341 anyReg (Any _ code)          = return code
1342 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
1343
1344 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
1345 -- Fixed registers might not be byte-addressable, so we make sure we've
1346 -- got a temporary, inserting an extra reg copy if necessary.
1347 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
1348 #if x86_64_TARGET_ARCH
1349 getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
1350 #else
1351 getByteReg expr = do
1352   r <- getRegister expr
1353   case r of
1354     Any rep code -> do
1355         tmp <- getNewRegNat rep
1356         return (tmp, code tmp)
1357     Fixed rep reg code
1358         | isVirtualReg reg -> return (reg,code)
1359         | otherwise -> do
1360             tmp <- getNewRegNat rep
1361             return (tmp, code `snocOL` reg2reg rep reg tmp)
1362         -- ToDo: could optimise slightly by checking for byte-addressable
1363         -- real registers, but that will happen very rarely if at all.
1364 #endif
1365
1366 -- Another variant: this time we want the result in a register that cannot
1367 -- be modified by code to evaluate an arbitrary expression.
1368 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1369 getNonClobberedReg expr = do
1370   r <- getRegister expr
1371   case r of
1372     Any rep code -> do
1373         tmp <- getNewRegNat rep
1374         return (tmp, code tmp)
1375     Fixed rep reg code
1376         -- only free regs can be clobbered
1377         | RealReg rr <- reg, isFastTrue (freeReg rr) -> do
1378                 tmp <- getNewRegNat rep
1379                 return (tmp, code `snocOL` reg2reg rep reg tmp)
1380         | otherwise ->
1381                 return (reg, code)
1382
1383 reg2reg :: Size -> Reg -> Reg -> Instr
1384 reg2reg size src dst
1385 #if i386_TARGET_ARCH
1386   | isFloatSize size = GMOV src dst
1387 #endif
1388   | otherwise        = MOV size (OpReg src) (OpReg dst)
1389
1390 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1391
1392 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1393
1394 #if sparc_TARGET_ARCH
1395
1396 getRegister (CmmLit (CmmFloat f W32)) = do
1397     lbl <- getNewLabelNat
1398     let code dst = toOL [
1399             LDATA ReadOnlyData
1400                         [CmmDataLabel lbl,
1401                          CmmStaticLit (CmmFloat f W32)],
1402             SETHI (HI (ImmCLbl lbl)) dst,
1403             LD FF32 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1404     return (Any FF32 code)
1405
1406 getRegister (CmmLit (CmmFloat d W64)) = do
1407     lbl <- getNewLabelNat
1408     let code dst = toOL [
1409             LDATA ReadOnlyData
1410                         [CmmDataLabel lbl,
1411                          CmmStaticLit (CmmFloat d W64)],
1412             SETHI (HI (ImmCLbl lbl)) dst,
1413             LD FF64 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1414     return (Any FF64 code)
1415
1416 getRegister (CmmMachOp mop [x]) -- unary MachOps
1417   = case mop of
1418       MO_F_Neg W32     -> trivialUFCode FF32 (FNEG FF32) x
1419       MO_F_Neg W64     -> trivialUFCode FF64 (FNEG FF64) x
1420
1421       MO_S_Neg rep     -> trivialUCode (intSize rep) (SUB False False g0) x
1422       MO_Not rep       -> trivialUCode (intSize rep) (XNOR False g0) x
1423
1424       MO_FF_Conv W64 W32-> coerceDbl2Flt x
1425       MO_FF_Conv W32 W64-> coerceFlt2Dbl x
1426
1427       MO_FS_Conv from to -> coerceFP2Int from to x
1428       MO_SF_Conv from to -> coerceInt2FP from to x
1429
1430       -- Conversions which are a nop on sparc
1431       MO_UU_Conv from to
1432         | from == to    -> conversionNop to  x
1433       MO_UU_Conv W32 W8 -> trivialCode I8 (AND False) x (CmmLit (CmmInt 255 I8))
1434       MO_UU_Conv W32 to -> conversionNop to x
1435       MO_SS_Conv W32 to -> conversionNop to x
1436
1437       -- widenings
1438       MO_UU_Conv W8 W32  -> integerExtend False W8 W32  x
1439       MO_UU_Conv W16 W32 -> integerExtend False W16 W32 x
1440       MO_UU_Conv W8 W16  -> integerExtend False W8 W16  x
1441       MO_SS_Conv W16 W32 -> integerExtend True  W16 W32 x
1442
1443       other_op -> panic "Unknown unary mach op"
1444     where
1445         -- XXX SLL/SRL?
1446         integerExtend signed from to expr = do
1447            (reg, e_code) <- getSomeReg expr
1448            let
1449                code dst =
1450                    e_code `snocOL`
1451                    ((if signed then SRA else SRL)
1452                           reg (RIImm (ImmInt 0)) dst)
1453            return (Any (intSize to) code)
1454         conversionNop new_rep expr
1455             = do e_code <- getRegister expr
1456                  return (swizzleRegisterRep e_code new_rep)
1457
1458 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1459   = case mop of
1460       MO_Eq rep -> condIntReg EQQ x y
1461       MO_Ne rep -> condIntReg NE x y
1462
1463       MO_S_Gt rep -> condIntReg GTT x y
1464       MO_S_Ge rep -> condIntReg GE x y
1465       MO_S_Lt rep -> condIntReg LTT x y
1466       MO_S_Le rep -> condIntReg LE x y
1467
1468       MO_U_Gt W32  -> condIntReg GTT x y
1469       MO_U_Ge W32  -> condIntReg GE x y
1470       MO_U_Lt W32  -> condIntReg LTT x y
1471       MO_U_Le W32  -> condIntReg LE x y
1472
1473       MO_U_Gt W16 -> condIntReg GU  x y
1474       MO_U_Ge W16 -> condIntReg GEU x y
1475       MO_U_Lt W16 -> condIntReg LU  x y
1476       MO_U_Le W16 -> condIntReg LEU x y
1477
1478       MO_Add W32 -> trivialCode W32 (ADD False False) x y
1479       MO_Sub W32 -> trivialCode W32 (SUB False False) x y
1480
1481       MO_S_MulMayOflo rep -> imulMayOflo rep x y
1482 {-
1483       -- ToDo: teach about V8+ SPARC div instructions
1484       MO_S_Quot W32 -> idiv FSLIT(".div")   x y
1485       MO_S_Rem W32  -> idiv FSLIT(".rem")   x y
1486       MO_U_Quot W32 -> idiv FSLIT(".udiv")  x y
1487       MO_U_Rem W32  -> idiv FSLIT(".urem")  x y
1488 -}
1489
1490       MO_F_Eq w -> condFltReg EQQ x y
1491       MO_F_Ne w -> condFltReg NE x y
1492
1493       MO_F_Gt w -> condFltReg GTT x y
1494       MO_F_Ge w -> condFltReg GE x y
1495       MO_F_Lt w -> condFltReg LTT x y
1496       MO_F_Le w -> condFltReg LE x y
1497
1498       MO_F_Add  w  -> trivialFCode w FADD x y
1499       MO_F_Sub  w  -> trivialFCode w FSUB x y
1500       MO_F_Mul  w  -> trivialFCode w FMUL x y
1501       MO_F_Quot w  -> trivialFCode w FDIV x y
1502
1503       MO_And rep   -> trivialCode rep (AND False) x y
1504       MO_Or rep    -> trivialCode rep (OR  False) x y
1505       MO_Xor rep   -> trivialCode rep (XOR False) x y
1506
1507       MO_Mul rep -> trivialCode rep (SMUL False) x y
1508
1509       MO_Shl rep   -> trivialCode rep SLL  x y
1510       MO_U_Shr rep   -> trivialCode rep SRL x y
1511       MO_S_Shr rep   -> trivialCode rep SRA x y
1512
1513 {-
1514       MO_F32_Pwr  -> getRegister (StCall (Left (fsLit "pow")) CCallConv FF64
1515                                          [promote x, promote y])
1516                        where promote x = CmmMachOp MO_F32_to_Dbl [x]
1517       MO_F64_Pwr -> getRegister (StCall (Left (fsLit "pow")) CCallConv FF64
1518                                         [x, y])
1519 -}
1520       other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
1521   where
1522     --idiv fn x y = getRegister (StCall (Left fn) CCallConv II32 [x, y])
1523
1524     --------------------
1525     imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
1526     imulMayOflo rep a b = do
1527          (a_reg, a_code) <- getSomeReg a
1528          (b_reg, b_code) <- getSomeReg b
1529          res_lo <- getNewRegNat b32
1530          res_hi <- getNewRegNat b32
1531          let
1532             shift_amt  = case rep of
1533                           W32 -> 31
1534                           W64 -> 63
1535                           _ -> panic "shift_amt"
1536             code dst = a_code `appOL` b_code `appOL`
1537                        toOL [
1538                            SMUL False a_reg (RIReg b_reg) res_lo,
1539                            RDY res_hi,
1540                            SRA res_lo (RIImm (ImmInt shift_amt)) res_lo,
1541                            SUB False False res_lo (RIReg res_hi) dst
1542                         ]
1543          return (Any II32 code)
1544
1545 getRegister (CmmLoad mem pk) = do
1546     Amode src code <- getAmode mem
1547     let
1548         code__2 dst = code `snocOL` LD pk src dst
1549     return (Any pk code__2)
1550
1551 getRegister (CmmLit (CmmInt i _))
1552   | fits13Bits i
1553   = let
1554         src = ImmInt (fromInteger i)
1555         code dst = unitOL (OR False g0 (RIImm src) dst)
1556     in
1557         return (Any II32 code)
1558
1559 getRegister (CmmLit lit)
1560   = let rep = cmmLitType lit
1561         imm = litToImm lit
1562         code dst = toOL [
1563             SETHI (HI imm) dst,
1564             OR False dst (RIImm (LO imm)) dst]
1565     in return (Any II32 code)
1566
1567 #endif /* sparc_TARGET_ARCH */
1568
1569 #if powerpc_TARGET_ARCH
1570 getRegister (CmmLoad mem pk)
1571   | not (isWord64 pk)
1572   = do
1573         Amode addr addr_code <- getAmode mem
1574         let code dst = ASSERT((regClass dst == RcDouble) == isFloatType pk)
1575                        addr_code `snocOL` LD size dst addr
1576         return (Any size code)
1577           where size = cmmTypeSize pk
1578
1579 -- catch simple cases of zero- or sign-extended load
1580 getRegister (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad mem _]) = do
1581     Amode addr addr_code <- getAmode mem
1582     return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr))
1583
1584 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
1585
1586 getRegister (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad mem _]) = do
1587     Amode addr addr_code <- getAmode mem
1588     return (Any II32 (\dst -> addr_code `snocOL` LD II16 dst addr))
1589
1590 getRegister (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad mem _]) = do
1591     Amode addr addr_code <- getAmode mem
1592     return (Any II32 (\dst -> addr_code `snocOL` LA II16 dst addr))
1593
1594 getRegister (CmmMachOp mop [x]) -- unary MachOps
1595   = case mop of
1596       MO_Not rep   -> triv_ucode_int rep NOT
1597
1598       MO_F_Neg w   -> triv_ucode_float w FNEG
1599       MO_S_Neg w   -> triv_ucode_int   w NEG
1600
1601       MO_FF_Conv W64 W32 -> trivialUCode  FF32 FRSP x
1602       MO_FF_Conv W32 W64 -> conversionNop FF64 x
1603
1604       MO_FS_Conv from to -> coerceFP2Int from to x
1605       MO_SF_Conv from to -> coerceInt2FP from to x
1606
1607       MO_SS_Conv from to
1608         | from == to    -> conversionNop (intSize to) x
1609
1610         -- narrowing is a nop: we treat the high bits as undefined
1611       MO_SS_Conv W32 to -> conversionNop (intSize to) x
1612       MO_SS_Conv W16 W8 -> conversionNop II8 x
1613       MO_SS_Conv W8  to -> triv_ucode_int to (EXTS II8)
1614       MO_SS_Conv W16 to -> triv_ucode_int to (EXTS II16)
1615
1616       MO_UU_Conv from to
1617         | from == to -> conversionNop (intSize to) x
1618         -- narrowing is a nop: we treat the high bits as undefined
1619       MO_UU_Conv W32 to -> conversionNop (intSize to) x
1620       MO_UU_Conv W16 W8 -> conversionNop II8 x
1621       MO_UU_Conv W8 to  -> trivialCode to False AND x (CmmLit (CmmInt 255 W32))
1622       MO_UU_Conv W16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 W32))
1623
1624     where
1625         triv_ucode_int   width instr = trivialUCode (intSize   width) instr x
1626         triv_ucode_float width instr = trivialUCode (floatSize width) instr x
1627
1628         conversionNop new_size expr
1629             = do e_code <- getRegister expr
1630                  return (swizzleRegisterRep e_code new_size)
1631
1632 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1633   = case mop of
1634       MO_F_Eq w -> condFltReg EQQ x y
1635       MO_F_Ne w -> condFltReg NE  x y
1636       MO_F_Gt w -> condFltReg GTT x y
1637       MO_F_Ge w -> condFltReg GE  x y
1638       MO_F_Lt w -> condFltReg LTT x y
1639       MO_F_Le w -> condFltReg LE  x y
1640
1641       MO_Eq rep -> condIntReg EQQ  (extendUExpr rep x) (extendUExpr rep y)
1642       MO_Ne rep -> condIntReg NE   (extendUExpr rep x) (extendUExpr rep y)
1643
1644       MO_S_Gt rep -> condIntReg GTT  (extendSExpr rep x) (extendSExpr rep y)
1645       MO_S_Ge rep -> condIntReg GE   (extendSExpr rep x) (extendSExpr rep y)
1646       MO_S_Lt rep -> condIntReg LTT  (extendSExpr rep x) (extendSExpr rep y)
1647       MO_S_Le rep -> condIntReg LE   (extendSExpr rep x) (extendSExpr rep y)
1648
1649       MO_U_Gt rep -> condIntReg GU   (extendUExpr rep x) (extendUExpr rep y)
1650       MO_U_Ge rep -> condIntReg GEU  (extendUExpr rep x) (extendUExpr rep y)
1651       MO_U_Lt rep -> condIntReg LU   (extendUExpr rep x) (extendUExpr rep y)
1652       MO_U_Le rep -> condIntReg LEU  (extendUExpr rep x) (extendUExpr rep y)
1653
1654       MO_F_Add w  -> triv_float w FADD
1655       MO_F_Sub w  -> triv_float w FSUB
1656       MO_F_Mul w  -> triv_float w FMUL
1657       MO_F_Quot w -> triv_float w FDIV
1658
1659          -- optimize addition with 32-bit immediate
1660          -- (needed for PIC)
1661       MO_Add W32 ->
1662         case y of
1663           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate W32 True (-imm)
1664             -> trivialCode W32 True ADD x (CmmLit $ CmmInt imm immrep)
1665           CmmLit lit
1666             -> do
1667                 (src, srcCode) <- getSomeReg x
1668                 let imm = litToImm lit
1669                     code dst = srcCode `appOL` toOL [
1670                                     ADDIS dst src (HA imm),
1671                                     ADD dst dst (RIImm (LO imm))
1672                                 ]
1673                 return (Any II32 code)
1674           _ -> trivialCode W32 True ADD x y
1675
1676       MO_Add rep -> trivialCode rep True ADD x y
1677       MO_Sub rep ->
1678         case y of    -- subfi ('substract from' with immediate) doesn't exist
1679           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
1680             -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
1681           _ -> trivialCodeNoImm' (intSize rep) SUBF y x
1682
1683       MO_Mul rep -> trivialCode rep True MULLW x y
1684
1685       MO_S_MulMayOflo W32 -> trivialCodeNoImm' II32 MULLW_MayOflo x y
1686
1687       MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= II32): not implemented"
1688       MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
1689
1690       MO_S_Quot rep -> trivialCodeNoImm' (intSize rep) DIVW (extendSExpr rep x) (extendSExpr rep y)
1691       MO_U_Quot rep -> trivialCodeNoImm' (intSize rep) DIVWU (extendUExpr rep x) (extendUExpr rep y)
1692
1693       MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1694       MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1695
1696       MO_And rep   -> trivialCode rep False AND x y
1697       MO_Or rep    -> trivialCode rep False OR x y
1698       MO_Xor rep   -> trivialCode rep False XOR x y
1699
1700       MO_Shl rep   -> trivialCode rep False SLW x y
1701       MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
1702       MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
1703   where
1704     triv_float :: Width -> (Size -> Reg -> Reg -> Reg -> Instr) -> NatM Register
1705     triv_float width instr = trivialCodeNoImm (floatSize width) instr x y
1706
1707 getRegister (CmmLit (CmmInt i rep))
1708   | Just imm <- makeImmediate rep True i
1709   = let
1710         code dst = unitOL (LI dst imm)
1711     in
1712         return (Any (intSize rep) code)
1713
1714 getRegister (CmmLit (CmmFloat f frep)) = do
1715     lbl <- getNewLabelNat
1716     dflags <- getDynFlagsNat
1717     dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
1718     Amode addr addr_code <- getAmode dynRef
1719     let size = floatSize frep
1720         code dst =
1721             LDATA ReadOnlyData  [CmmDataLabel lbl,
1722                                  CmmStaticLit (CmmFloat f frep)]
1723             `consOL` (addr_code `snocOL` LD size dst addr)
1724     return (Any size code)
1725
1726 getRegister (CmmLit lit)
1727   = let rep = cmmLitType lit
1728         imm = litToImm lit
1729         code dst = toOL [
1730               LIS dst (HA imm),
1731               ADD dst dst (RIImm (LO imm))
1732           ]
1733     in return (Any (cmmTypeSize rep) code)
1734
1735 getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
1736
1737     -- extend?Rep: wrap integer expression of type rep
1738     -- in a conversion to II32
1739 extendSExpr W32 x = x
1740 extendSExpr rep x = CmmMachOp (MO_SS_Conv rep W32) [x]
1741 extendUExpr W32 x = x
1742 extendUExpr rep x = CmmMachOp (MO_UU_Conv rep W32) [x]
1743
1744 #endif /* powerpc_TARGET_ARCH */
1745
1746
1747 -- -----------------------------------------------------------------------------
1748 --  The 'Amode' type: Memory addressing modes passed up the tree.
1749
1750 data Amode = Amode AddrMode InstrBlock
1751
1752 {-
1753 Now, given a tree (the argument to an CmmLoad) that references memory,
1754 produce a suitable addressing mode.
1755
1756 A Rule of the Game (tm) for Amodes: use of the addr bit must
1757 immediately follow use of the code part, since the code part puts
1758 values in registers which the addr then refers to.  So you can't put
1759 anything in between, lest it overwrite some of those registers.  If
1760 you need to do some other computation between the code part and use of
1761 the addr bit, first store the effective address from the amode in a
1762 temporary, then do the other computation, and then use the temporary:
1763
1764     code
1765     LEA amode, tmp
1766     ... other computation ...
1767     ... (tmp) ...
1768 -}
1769
1770 getAmode :: CmmExpr -> NatM Amode
1771 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
1772
1773 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1774
1775 #if alpha_TARGET_ARCH
1776
1777 getAmode (StPrim IntSubOp [x, StInt i])
1778   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1779     getRegister x               `thenNat` \ register ->
1780     let
1781         code = registerCode register tmp
1782         reg  = registerName register tmp
1783         off  = ImmInt (-(fromInteger i))
1784     in
1785     return (Amode (AddrRegImm reg off) code)
1786
1787 getAmode (StPrim IntAddOp [x, StInt i])
1788   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1789     getRegister x               `thenNat` \ register ->
1790     let
1791         code = registerCode register tmp
1792         reg  = registerName register tmp
1793         off  = ImmInt (fromInteger i)
1794     in
1795     return (Amode (AddrRegImm reg off) code)
1796
1797 getAmode leaf
1798   | isJust imm
1799   = return (Amode (AddrImm imm__2) id)
1800   where
1801     imm = maybeImm leaf
1802     imm__2 = case imm of Just x -> x
1803
1804 getAmode other
1805   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1806     getRegister other           `thenNat` \ register ->
1807     let
1808         code = registerCode register tmp
1809         reg  = registerName register tmp
1810     in
1811     return (Amode (AddrReg reg) code)
1812
1813 #endif /* alpha_TARGET_ARCH */
1814
1815 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1816
1817 #if x86_64_TARGET_ARCH
1818
1819 getAmode (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
1820                                      CmmLit displacement])
1821     = return $ Amode (ripRel (litToImm displacement)) nilOL
1822
1823 #endif
1824
1825 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1826
1827 -- This is all just ridiculous, since it carefully undoes
1828 -- what mangleIndexTree has just done.
1829 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
1830   | is32BitLit lit
1831   -- ASSERT(rep == II32)???
1832   = do (x_reg, x_code) <- getSomeReg x
1833        let off = ImmInt (-(fromInteger i))
1834        return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1835
1836 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
1837   | is32BitLit lit
1838   -- ASSERT(rep == II32)???
1839   = do (x_reg, x_code) <- getSomeReg x
1840        let off = ImmInt (fromInteger i)
1841        return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1842
1843 -- Turn (lit1 << n  + lit2) into  (lit2 + lit1 << n) so it will be
1844 -- recognised by the next rule.
1845 getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1846                                   b@(CmmLit _)])
1847   = getAmode (CmmMachOp (MO_Add rep) [b,a])
1848
1849 getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
1850                                         [y, CmmLit (CmmInt shift _)]])
1851   | shift == 0 || shift == 1 || shift == 2 || shift == 3
1852   = x86_complex_amode x y shift 0
1853
1854 getAmode (CmmMachOp (MO_Add rep)
1855                 [x, CmmMachOp (MO_Add _)
1856                         [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)],
1857                          CmmLit (CmmInt offset _)]])
1858   | shift == 0 || shift == 1 || shift == 2 || shift == 3
1859   && is32BitInteger offset
1860   = x86_complex_amode x y shift offset
1861
1862 getAmode (CmmMachOp (MO_Add rep) [x,y])
1863   = x86_complex_amode x y 0 0
1864
1865 getAmode (CmmLit lit) | is32BitLit lit
1866   = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1867
1868 getAmode expr = do
1869   (reg,code) <- getSomeReg expr
1870   return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
1871
1872
1873 x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
1874 x86_complex_amode base index shift offset
1875   = do (x_reg, x_code) <- getNonClobberedReg base
1876         -- x must be in a temp, because it has to stay live over y_code
1877         -- we could compre x_reg and y_reg and do something better here...
1878        (y_reg, y_code) <- getSomeReg index
1879        let
1880            code = x_code `appOL` y_code
1881            base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
1882        return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset)))
1883                code)
1884
1885 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1886
1887 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1888
1889 #if sparc_TARGET_ARCH
1890
1891 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)])
1892   | fits13Bits (-i)
1893   = do
1894        (reg, code) <- getSomeReg x
1895        let
1896          off  = ImmInt (-(fromInteger i))
1897        return (Amode (AddrRegImm reg off) code)
1898
1899
1900 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)])
1901   | fits13Bits i
1902   = do
1903        (reg, code) <- getSomeReg x
1904        let
1905          off  = ImmInt (fromInteger i)
1906        return (Amode (AddrRegImm reg off) code)
1907
1908 getAmode (CmmMachOp (MO_Add rep) [x, y])
1909   = do
1910     (regX, codeX) <- getSomeReg x
1911     (regY, codeY) <- getSomeReg y
1912     let
1913         code = codeX `appOL` codeY
1914     return (Amode (AddrRegReg regX regY) code)
1915
1916 -- XXX Is this same as "leaf" in Stix?
1917 getAmode (CmmLit lit)
1918   = do
1919       tmp <- getNewRegNat b32
1920       let
1921         code = unitOL (SETHI (HI imm__2) tmp)
1922       return (Amode (AddrRegImm tmp (LO imm__2)) code)
1923       where
1924          imm__2 = litToImm lit
1925
1926 getAmode other
1927   = do
1928        (reg, code) <- getSomeReg other
1929        let
1930             off  = ImmInt 0
1931        return (Amode (AddrRegImm reg off) code)
1932
1933 #endif /* sparc_TARGET_ARCH */
1934
1935 #ifdef powerpc_TARGET_ARCH
1936 getAmode (CmmMachOp (MO_Sub W32) [x, CmmLit (CmmInt i _)])
1937   | Just off <- makeImmediate W32 True (-i)
1938   = do
1939         (reg, code) <- getSomeReg x
1940         return (Amode (AddrRegImm reg off) code)
1941
1942
1943 getAmode (CmmMachOp (MO_Add W32) [x, CmmLit (CmmInt i _)])
1944   | Just off <- makeImmediate W32 True i
1945   = do
1946         (reg, code) <- getSomeReg x
1947         return (Amode (AddrRegImm reg off) code)
1948
1949    -- optimize addition with 32-bit immediate
1950    -- (needed for PIC)
1951 getAmode (CmmMachOp (MO_Add W32) [x, CmmLit lit])
1952   = do
1953         tmp <- getNewRegNat II32
1954         (src, srcCode) <- getSomeReg x
1955         let imm = litToImm lit
1956             code = srcCode `snocOL` ADDIS tmp src (HA imm)
1957         return (Amode (AddrRegImm tmp (LO imm)) code)
1958
1959 getAmode (CmmLit lit)
1960   = do
1961         tmp <- getNewRegNat II32
1962         let imm = litToImm lit
1963             code = unitOL (LIS tmp (HA imm))
1964         return (Amode (AddrRegImm tmp (LO imm)) code)
1965
1966 getAmode (CmmMachOp (MO_Add W32) [x, y])
1967   = do
1968         (regX, codeX) <- getSomeReg x
1969         (regY, codeY) <- getSomeReg y
1970         return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
1971
1972 getAmode other
1973   = do
1974         (reg, code) <- getSomeReg other
1975         let
1976             off  = ImmInt 0
1977         return (Amode (AddrRegImm reg off) code)
1978 #endif /* powerpc_TARGET_ARCH */
1979
1980 -- -----------------------------------------------------------------------------
1981 -- getOperand: sometimes any operand will do.
1982
1983 -- getNonClobberedOperand: the value of the operand will remain valid across
1984 -- the computation of an arbitrary expression, unless the expression
1985 -- is computed directly into a register which the operand refers to
1986 -- (see trivialCode where this function is used for an example).
1987
1988 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1989
1990 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1991 #if x86_64_TARGET_ARCH
1992 getNonClobberedOperand (CmmLit lit)
1993   | isSuitableFloatingPointLit lit = do
1994     lbl <- getNewLabelNat
1995     let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
1996                                            CmmStaticLit lit])
1997     return (OpAddr (ripRel (ImmCLbl lbl)), code)
1998 #endif
1999 getNonClobberedOperand (CmmLit lit)
2000   | is32BitLit lit && not (isFloatType (cmmLitType lit)) =
2001     return (OpImm (litToImm lit), nilOL)
2002 getNonClobberedOperand (CmmLoad mem pk)
2003   | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
2004     Amode src mem_code <- getAmode mem
2005     (src',save_code) <-
2006         if (amodeCouldBeClobbered src)
2007                 then do
2008                    tmp <- getNewRegNat wordSize
2009                    return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
2010                            unitOL (LEA II32 (OpAddr src) (OpReg tmp)))
2011                 else
2012                    return (src, nilOL)
2013     return (OpAddr src', save_code `appOL` mem_code)
2014 getNonClobberedOperand e = do
2015     (reg, code) <- getNonClobberedReg e
2016     return (OpReg reg, code)
2017
2018 amodeCouldBeClobbered :: AddrMode -> Bool
2019 amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
2020
2021 regClobbered (RealReg rr) = isFastTrue (freeReg rr)
2022 regClobbered _ = False
2023
2024 -- getOperand: the operand is not required to remain valid across the
2025 -- computation of an arbitrary expression.
2026 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2027 #if x86_64_TARGET_ARCH
2028 getOperand (CmmLit lit)
2029   | isSuitableFloatingPointLit lit = do
2030     lbl <- getNewLabelNat
2031     let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
2032                                            CmmStaticLit lit])
2033     return (OpAddr (ripRel (ImmCLbl lbl)), code)
2034 #endif
2035 getOperand (CmmLit lit)
2036   | is32BitLit lit && not (isFloatType (cmmLitType lit)) = do
2037     return (OpImm (litToImm lit), nilOL)
2038 getOperand (CmmLoad mem pk)
2039   | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
2040     Amode src mem_code <- getAmode mem
2041     return (OpAddr src, mem_code)
2042 getOperand e = do
2043     (reg, code) <- getSomeReg e
2044     return (OpReg reg, code)
2045
2046 isOperand :: CmmExpr -> Bool
2047 isOperand (CmmLoad _ _) = True
2048 isOperand (CmmLit lit)  = is32BitLit lit
2049                           || isSuitableFloatingPointLit lit
2050 isOperand _             = False
2051
2052 -- if we want a floating-point literal as an operand, we can
2053 -- use it directly from memory.  However, if the literal is
2054 -- zero, we're better off generating it into a register using
2055 -- xor.
2056 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
2057 isSuitableFloatingPointLit _ = False
2058
2059 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
2060 getRegOrMem (CmmLoad mem pk)
2061   | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
2062     Amode src mem_code <- getAmode mem
2063     return (OpAddr src, mem_code)
2064 getRegOrMem e = do
2065     (reg, code) <- getNonClobberedReg e
2066     return (OpReg reg, code)
2067
2068 #if x86_64_TARGET_ARCH
2069 is32BitLit (CmmInt i W64) = is32BitInteger i
2070    -- assume that labels are in the range 0-2^31-1: this assumes the
2071    -- small memory model (see gcc docs, -mcmodel=small).
2072 #endif
2073 is32BitLit x = True
2074 #endif
2075
2076 is32BitInteger :: Integer -> Bool
2077 is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000
2078   where i64 = fromIntegral i :: Int64
2079   -- a CmmInt is intended to be truncated to the appropriate
2080   -- number of bits, so here we truncate it to Int64.  This is
2081   -- important because e.g. -1 as a CmmInt might be either
2082   -- -1 or 18446744073709551615.
2083
2084 -- -----------------------------------------------------------------------------
2085 --  The 'CondCode' type:  Condition codes passed up the tree.
2086
2087 data CondCode = CondCode Bool Cond InstrBlock
2088
2089 -- Set up a condition code for a conditional branch.
2090
2091 getCondCode :: CmmExpr -> NatM CondCode
2092
2093 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2094
2095 #if alpha_TARGET_ARCH
2096 getCondCode = panic "MachCode.getCondCode: not on Alphas"
2097 #endif /* alpha_TARGET_ARCH */
2098
2099 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2100
2101 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
2102 -- yes, they really do seem to want exactly the same!
2103
2104 getCondCode (CmmMachOp mop [x, y])
2105   =
2106     case mop of
2107       MO_F_Eq W32 -> condFltCode EQQ x y
2108       MO_F_Ne W32 -> condFltCode NE  x y
2109       MO_F_Gt W32 -> condFltCode GTT x y
2110       MO_F_Ge W32 -> condFltCode GE  x y
2111       MO_F_Lt W32 -> condFltCode LTT x y
2112       MO_F_Le W32 -> condFltCode LE  x y
2113
2114       MO_F_Eq W64 -> condFltCode EQQ x y
2115       MO_F_Ne W64 -> condFltCode NE  x y
2116       MO_F_Gt W64 -> condFltCode GTT x y
2117       MO_F_Ge W64 -> condFltCode GE  x y
2118       MO_F_Lt W64 -> condFltCode LTT x y
2119       MO_F_Le W64 -> condFltCode LE  x y
2120
2121       MO_Eq rep -> condIntCode EQQ  x y
2122       MO_Ne rep -> condIntCode NE   x y
2123
2124       MO_S_Gt rep -> condIntCode GTT  x y
2125       MO_S_Ge rep -> condIntCode GE   x y
2126       MO_S_Lt rep -> condIntCode LTT  x y
2127       MO_S_Le rep -> condIntCode LE   x y
2128
2129       MO_U_Gt rep -> condIntCode GU   x y
2130       MO_U_Ge rep -> condIntCode GEU  x y
2131       MO_U_Lt rep -> condIntCode LU   x y
2132       MO_U_Le rep -> condIntCode LEU  x y
2133
2134       other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y]))
2135
2136 getCondCode other =  pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
2137
2138 #elif powerpc_TARGET_ARCH
2139
2140 -- almost the same as everywhere else - but we need to
2141 -- extend small integers to 32 bit first
2142
2143 getCondCode (CmmMachOp mop [x, y])
2144   = case mop of
2145       MO_F_Eq W32 -> condFltCode EQQ x y
2146       MO_F_Ne W32 -> condFltCode NE  x y
2147       MO_F_Gt W32 -> condFltCode GTT x y
2148       MO_F_Ge W32 -> condFltCode GE  x y
2149       MO_F_Lt W32 -> condFltCode LTT x y
2150       MO_F_Le W32 -> condFltCode LE  x y
2151
2152       MO_F_Eq W64 -> condFltCode EQQ x y
2153       MO_F_Ne W64 -> condFltCode NE  x y
2154       MO_F_Gt W64 -> condFltCode GTT x y
2155       MO_F_Ge W64 -> condFltCode GE  x y
2156       MO_F_Lt W64 -> condFltCode LTT x y
2157       MO_F_Le W64 -> condFltCode LE  x y
2158
2159       MO_Eq rep -> condIntCode EQQ  (extendUExpr rep x) (extendUExpr rep y)
2160       MO_Ne rep -> condIntCode NE   (extendUExpr rep x) (extendUExpr rep y)
2161
2162       MO_S_Gt rep -> condIntCode GTT  (extendSExpr rep x) (extendSExpr rep y)
2163       MO_S_Ge rep -> condIntCode GE   (extendSExpr rep x) (extendSExpr rep y)
2164       MO_S_Lt rep -> condIntCode LTT  (extendSExpr rep x) (extendSExpr rep y)
2165       MO_S_Le rep -> condIntCode LE   (extendSExpr rep x) (extendSExpr rep y)
2166
2167       MO_U_Gt rep -> condIntCode GU   (extendUExpr rep x) (extendUExpr rep y)
2168       MO_U_Ge rep -> condIntCode GEU  (extendUExpr rep x) (extendUExpr rep y)
2169       MO_U_Lt rep -> condIntCode LU   (extendUExpr rep x) (extendUExpr rep y)
2170       MO_U_Le rep -> condIntCode LEU  (extendUExpr rep x) (extendUExpr rep y)
2171
2172       other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
2173
2174 getCondCode other =  panic "getCondCode(2)(powerpc)"
2175
2176
2177 #endif
2178
2179
2180 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
2181 -- passed back up the tree.
2182
2183 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
2184
2185 #if alpha_TARGET_ARCH
2186 condIntCode = panic "MachCode.condIntCode: not on Alphas"
2187 condFltCode = panic "MachCode.condFltCode: not on Alphas"
2188 #endif /* alpha_TARGET_ARCH */
2189
2190 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2191 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2192
2193 -- memory vs immediate
2194 condIntCode cond (CmmLoad x pk) (CmmLit lit) | is32BitLit lit = do
2195     Amode x_addr x_code <- getAmode x
2196     let
2197         imm  = litToImm lit
2198         code = x_code `snocOL`
2199                   CMP (cmmTypeSize pk) (OpImm imm) (OpAddr x_addr)
2200     --
2201     return (CondCode False cond code)
2202
2203 -- anything vs zero, using a mask
2204 -- TODO: Add some sanity checking!!!!
2205 condIntCode cond (CmmMachOp (MO_And rep) [x,o2]) (CmmLit (CmmInt 0 pk))
2206     | (CmmLit lit@(CmmInt mask pk2)) <- o2, is32BitLit lit
2207     = do
2208       (x_reg, x_code) <- getSomeReg x
2209       let
2210          code = x_code `snocOL`
2211                 TEST (intSize pk) (OpImm (ImmInteger mask)) (OpReg x_reg)
2212       --
2213       return (CondCode False cond code)
2214
2215 -- anything vs zero
2216 condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
2217     (x_reg, x_code) <- getSomeReg x
2218     let
2219         code = x_code `snocOL`
2220                   TEST (intSize pk) (OpReg x_reg) (OpReg x_reg)
2221     --
2222     return (CondCode False cond code)
2223
2224 -- anything vs operand
2225 condIntCode cond x y | isOperand y = do
2226     (x_reg, x_code) <- getNonClobberedReg x
2227     (y_op,  y_code) <- getOperand y
2228     let
2229         code = x_code `appOL` y_code `snocOL`
2230                   CMP (cmmTypeSize (cmmExprType x)) y_op (OpReg x_reg)
2231     -- in
2232     return (CondCode False cond code)
2233
2234 -- anything vs anything
2235 condIntCode cond x y = do
2236   (y_reg, y_code) <- getNonClobberedReg y
2237   (x_op, x_code) <- getRegOrMem x
2238   let
2239         code = y_code `appOL`
2240                x_code `snocOL`
2241                   CMP (cmmTypeSize (cmmExprType x)) (OpReg y_reg) x_op
2242   -- in
2243   return (CondCode False cond code)
2244 #endif
2245
2246 #if i386_TARGET_ARCH
2247 condFltCode cond x y
2248   = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
2249   (x_reg, x_code) <- getNonClobberedReg x
2250   (y_reg, y_code) <- getSomeReg y
2251   let
2252         code = x_code `appOL` y_code `snocOL`
2253                 GCMP cond x_reg y_reg
2254   -- The GCMP insn does the test and sets the zero flag if comparable
2255   -- and true.  Hence we always supply EQQ as the condition to test.
2256   return (CondCode True EQQ code)
2257 #endif /* i386_TARGET_ARCH */
2258
2259 #if x86_64_TARGET_ARCH
2260 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
2261 -- an operand, but the right must be a reg.  We can probably do better
2262 -- than this general case...
2263 condFltCode cond x y = do
2264   (x_reg, x_code) <- getNonClobberedReg x
2265   (y_op, y_code) <- getOperand y
2266   let
2267         code = x_code `appOL`
2268                y_code `snocOL`
2269                   CMP (floatSize $ cmmExprWidth x) y_op (OpReg x_reg)
2270         -- NB(1): we need to use the unsigned comparison operators on the
2271         -- result of this comparison.
2272   -- in
2273   return (CondCode True (condToUnsigned cond) code)
2274 #endif
2275
2276 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2277
2278 #if sparc_TARGET_ARCH
2279
2280 condIntCode cond x (CmmLit (CmmInt y rep))
2281   | fits13Bits y
2282   = do
2283        (src1, code) <- getSomeReg x
2284        let
2285            src2 = ImmInt (fromInteger y)
2286            code' = code `snocOL` SUB False True src1 (RIImm src2) g0
2287        return (CondCode False cond code')
2288
2289 condIntCode cond x y = do
2290     (src1, code1) <- getSomeReg x
2291     (src2, code2) <- getSomeReg y
2292     let
2293         code__2 = code1 `appOL` code2 `snocOL`
2294                   SUB False True src1 (RIReg src2) g0
2295     return (CondCode False cond code__2)
2296
2297 -----------
2298 condFltCode cond x y = do
2299     (src1, code1) <- getSomeReg x
2300     (src2, code2) <- getSomeReg y
2301     tmp <- getNewRegNat FF64
2302     let
2303         promote x = FxTOy FF32 FF64 x tmp
2304
2305         pk1   = cmmExprType x
2306         pk2   = cmmExprType y
2307
2308         code__2 =
2309                 if pk1 == pk2 then
2310                     code1 `appOL` code2 `snocOL`
2311                     FCMP True pk1 src1 src2
2312                 else if typeWidth pk1 == W32 then
2313                     code1 `snocOL` promote src1 `appOL` code2 `snocOL`
2314                     FCMP True FF64 tmp src2
2315                 else
2316                     code1 `appOL` code2 `snocOL` promote src2 `snocOL`
2317                     FCMP True FF64 src1 tmp
2318     return (CondCode True cond code__2)
2319
2320 #endif /* sparc_TARGET_ARCH */
2321
2322 #if powerpc_TARGET_ARCH
2323 --  ###FIXME: I16 and I8!
2324 condIntCode cond x (CmmLit (CmmInt y rep))
2325   | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
2326   = do
2327         (src1, code) <- getSomeReg x
2328         let
2329             code' = code `snocOL`
2330                 (if condUnsigned cond then CMPL else CMP) II32 src1 (RIImm src2)
2331         return (CondCode False cond code')
2332
2333 condIntCode cond x y = do
2334     (src1, code1) <- getSomeReg x
2335     (src2, code2) <- getSomeReg y
2336     let
2337         code' = code1 `appOL` code2 `snocOL`
2338                   (if condUnsigned cond then CMPL else CMP) II32 src1 (RIReg src2)
2339     return (CondCode False cond code')
2340
2341 condFltCode cond x y = do
2342     (src1, code1) <- getSomeReg x
2343     (src2, code2) <- getSomeReg y
2344     let
2345         code'  = code1 `appOL` code2 `snocOL` FCMP src1 src2
2346         code'' = case cond of -- twiddle CR to handle unordered case
2347                     GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
2348                     LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
2349                     _ -> code'
2350                  where
2351                     ltbit = 0 ; eqbit = 2 ; gtbit = 1
2352     return (CondCode True cond code'')
2353
2354 #endif /* powerpc_TARGET_ARCH */
2355
2356 -- -----------------------------------------------------------------------------
2357 -- Generating assignments
2358
2359 -- Assignments are really at the heart of the whole code generation
2360 -- business.  Almost all top-level nodes of any real importance are
2361 -- assignments, which correspond to loads, stores, or register
2362 -- transfers.  If we're really lucky, some of the register transfers
2363 -- will go away, because we can use the destination register to
2364 -- complete the code generation for the right hand side.  This only
2365 -- fails when the right hand side is forced into a fixed register
2366 -- (e.g. the result of a call).
2367
2368 assignMem_IntCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
2369 assignReg_IntCode :: Size -> CmmReg  -> CmmExpr -> NatM InstrBlock
2370
2371 assignMem_FltCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
2372 assignReg_FltCode :: Size -> CmmReg  -> CmmExpr -> NatM InstrBlock
2373
2374 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2375
2376 #if alpha_TARGET_ARCH
2377
2378 assignIntCode pk (CmmLoad dst _) src
2379   = getNewRegNat IntRep             `thenNat` \ tmp ->
2380     getAmode dst                    `thenNat` \ amode ->
2381     getRegister src                 `thenNat` \ register ->
2382     let
2383         code1   = amodeCode amode []
2384         dst__2  = amodeAddr amode
2385         code2   = registerCode register tmp []
2386         src__2  = registerName register tmp
2387         sz      = primRepToSize pk
2388         code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2389     in
2390     return code__2
2391
2392 assignIntCode pk dst src
2393   = getRegister dst                         `thenNat` \ register1 ->
2394     getRegister src                         `thenNat` \ register2 ->
2395     let
2396         dst__2  = registerName register1 zeroh
2397         code    = registerCode register2 dst__2
2398         src__2  = registerName register2 dst__2
2399         code__2 = if isFixed register2
2400                   then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2)
2401                   else code
2402     in
2403     return code__2
2404
2405 #endif /* alpha_TARGET_ARCH */
2406
2407 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2408
2409 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2410
2411 -- integer assignment to memory
2412
2413 -- specific case of adding/subtracting an integer to a particular address.
2414 -- ToDo: catch other cases where we can use an operation directly on a memory
2415 -- address.
2416 assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _,
2417                                                  CmmLit (CmmInt i _)])
2418    | addr == addr2, pk /= II64 || is32BitInteger i,
2419      Just instr <- check op
2420    = do Amode amode code_addr <- getAmode addr
2421         let code = code_addr `snocOL`
2422                    instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode)
2423         return code
2424    where
2425         check (MO_Add _) = Just ADD
2426         check (MO_Sub _) = Just SUB
2427         check _ = Nothing
2428         -- ToDo: more?
2429
2430 -- general case
2431 assignMem_IntCode pk addr src = do
2432     Amode addr code_addr <- getAmode addr
2433     (code_src, op_src)   <- get_op_RI src
2434     let
2435         code = code_src `appOL`
2436                code_addr `snocOL`
2437                   MOV pk op_src (OpAddr addr)
2438         -- NOTE: op_src is stable, so it will still be valid
2439         -- after code_addr.  This may involve the introduction
2440         -- of an extra MOV to a temporary register, but we hope
2441         -- the register allocator will get rid of it.
2442     --
2443     return code
2444   where
2445     get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand)   -- code, operator
2446     get_op_RI (CmmLit lit) | is32BitLit lit
2447       = return (nilOL, OpImm (litToImm lit))
2448     get_op_RI op
2449       = do (reg,code) <- getNonClobberedReg op
2450            return (code, OpReg reg)
2451
2452
2453 -- Assign; dst is a reg, rhs is mem
2454 assignReg_IntCode pk reg (CmmLoad src _) = do
2455   load_code <- intLoadCode (MOV pk) src
2456   return (load_code (getRegisterReg reg))
2457
2458 -- dst is a reg, but src could be anything
2459 assignReg_IntCode pk reg src = do
2460   code <- getAnyReg src
2461   return (code (getRegisterReg reg))
2462
2463 #endif /* i386_TARGET_ARCH */
2464
2465 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2466
2467 #if sparc_TARGET_ARCH
2468
2469 assignMem_IntCode pk addr src = do
2470     (srcReg, code) <- getSomeReg src
2471     Amode dstAddr addr_code <- getAmode addr
2472     return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2473
2474 assignReg_IntCode pk reg src = do
2475     r <- getRegister src
2476     return $ case r of
2477         Any _ code         -> code dst
2478         Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg dst) freg
2479     where
2480       dst = getRegisterReg reg
2481
2482
2483 #endif /* sparc_TARGET_ARCH */
2484
2485 #if powerpc_TARGET_ARCH
2486
2487 assignMem_IntCode pk addr src = do
2488     (srcReg, code) <- getSomeReg src
2489     Amode dstAddr addr_code <- getAmode addr
2490     return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2491
2492 -- dst is a reg, but src could be anything
2493 assignReg_IntCode pk reg src
2494     = do
2495         r <- getRegister src
2496         return $ case r of
2497             Any _ code         -> code dst
2498             Fixed _ freg fcode -> fcode `snocOL` MR dst freg
2499     where
2500         dst = getRegisterReg reg
2501
2502 #endif /* powerpc_TARGET_ARCH */
2503
2504
2505 -- -----------------------------------------------------------------------------
2506 -- Floating-point assignments
2507
2508 #if alpha_TARGET_ARCH
2509
2510 assignFltCode pk (CmmLoad dst _) src
2511   = getNewRegNat pk                 `thenNat` \ tmp ->
2512     getAmode dst                    `thenNat` \ amode ->
2513     getRegister src                         `thenNat` \ register ->
2514     let
2515         code1   = amodeCode amode []
2516         dst__2  = amodeAddr amode
2517         code2   = registerCode register tmp []
2518         src__2  = registerName register tmp
2519         sz      = primRepToSize pk
2520         code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2521     in
2522     return code__2
2523
2524 assignFltCode pk dst src
2525   = getRegister dst                         `thenNat` \ register1 ->
2526     getRegister src                         `thenNat` \ register2 ->
2527     let
2528         dst__2  = registerName register1 zeroh
2529         code    = registerCode register2 dst__2
2530         src__2  = registerName register2 dst__2
2531         code__2 = if isFixed register2
2532                   then code . mkSeqInstr (FMOV src__2 dst__2)
2533                   else code
2534     in
2535     return code__2
2536
2537 #endif /* alpha_TARGET_ARCH */
2538
2539 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2540
2541 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2542
2543 -- Floating point assignment to memory
2544 assignMem_FltCode pk addr src = do
2545   (src_reg, src_code) <- getNonClobberedReg src
2546   Amode addr addr_code <- getAmode addr
2547   let
2548         code = src_code `appOL`
2549                addr_code `snocOL`
2550                 IF_ARCH_i386(GST pk src_reg addr,
2551                              MOV pk (OpReg src_reg) (OpAddr addr))
2552   return code
2553
2554 -- Floating point assignment to a register/temporary
2555 assignReg_FltCode pk reg src = do
2556   src_code <- getAnyReg src
2557   return (src_code (getRegisterReg reg))
2558
2559 #endif /* i386_TARGET_ARCH */
2560
2561 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2562
2563 #if sparc_TARGET_ARCH
2564
2565 -- Floating point assignment to memory
2566 assignMem_FltCode pk addr src = do
2567     Amode dst__2 code1 <- getAmode addr
2568     (src__2, code2) <- getSomeReg src
2569     tmp1 <- getNewRegNat pk
2570     let
2571         pk__2   = cmmExprType src
2572         code__2 = code1 `appOL` code2 `appOL`
2573             if   pk == pk__2
2574             then unitOL (ST pk src__2 dst__2)
2575             else toOL [FxTOy pk__2 pk src__2 tmp1, ST pk tmp1 dst__2]
2576     return code__2
2577
2578 -- Floating point assignment to a register/temporary
2579 -- ToDo: Verify correctness
2580 assignReg_FltCode pk reg src = do
2581     r <- getRegister src
2582     v1 <- getNewRegNat pk
2583     return $ case r of
2584         Any _ code         -> code dst
2585         Fixed _ freg fcode -> fcode `snocOL` FMOV pk freg v1
2586     where
2587       dst = getRegisterReg reg
2588
2589 #endif /* sparc_TARGET_ARCH */
2590
2591 #if powerpc_TARGET_ARCH
2592
2593 -- Easy, isn't it?
2594 assignMem_FltCode = assignMem_IntCode
2595 assignReg_FltCode = assignReg_IntCode
2596
2597 #endif /* powerpc_TARGET_ARCH */
2598
2599
2600 -- -----------------------------------------------------------------------------
2601 -- Generating an non-local jump
2602
2603 -- (If applicable) Do not fill the delay slots here; you will confuse the
2604 -- register allocator.
2605
2606 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
2607
2608 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2609
2610 #if alpha_TARGET_ARCH
2611
2612 genJump (CmmLabel lbl)
2613   | isAsmTemp lbl = returnInstr (BR target)
2614   | otherwise     = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0]
2615   where
2616     target = ImmCLbl lbl
2617
2618 genJump tree
2619   = getRegister tree                `thenNat` \ register ->
2620     getNewRegNat PtrRep             `thenNat` \ tmp ->
2621     let
2622         dst    = registerName register pv
2623         code   = registerCode register pv
2624         target = registerName register pv
2625     in
2626     if isFixed register then
2627         returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0]
2628     else
2629     return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0))
2630
2631 #endif /* alpha_TARGET_ARCH */
2632
2633 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2634
2635 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2636
2637 genJump (CmmLoad mem pk) = do
2638   Amode target code <- getAmode mem
2639   return (code `snocOL` JMP (OpAddr target))
2640
2641 genJump (CmmLit lit) = do
2642   return (unitOL (JMP (OpImm (litToImm lit))))
2643
2644 genJump expr = do
2645   (reg,code) <- getSomeReg expr
2646   return (code `snocOL` JMP (OpReg reg))
2647
2648 #endif /* i386_TARGET_ARCH */
2649
2650 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2651
2652 #if sparc_TARGET_ARCH
2653
2654 genJump (CmmLit (CmmLabel lbl))
2655   = return (toOL [CALL (Left target) 0 True, NOP])
2656   where
2657     target = ImmCLbl lbl
2658
2659 genJump tree
2660   = do
2661         (target, code) <- getSomeReg tree
2662         return (code `snocOL` JMP (AddrRegReg target g0)  `snocOL` NOP)
2663
2664 #endif /* sparc_TARGET_ARCH */
2665
2666 #if powerpc_TARGET_ARCH
2667 genJump (CmmLit (CmmLabel lbl))
2668   = return (unitOL $ JMP lbl)
2669
2670 genJump tree
2671   = do
2672         (target,code) <- getSomeReg tree
2673         return (code `snocOL` MTCTR target `snocOL` BCTR [])
2674 #endif /* powerpc_TARGET_ARCH */
2675
2676
2677 -- -----------------------------------------------------------------------------
2678 --  Unconditional branches
2679
2680 genBranch :: BlockId -> NatM InstrBlock
2681
2682 genBranch = return . toOL . mkBranchInstr
2683
2684 -- -----------------------------------------------------------------------------
2685 --  Conditional jumps
2686
2687 {-
2688 Conditional jumps are always to local labels, so we can use branch
2689 instructions.  We peek at the arguments to decide what kind of
2690 comparison to do.
2691
2692 ALPHA: For comparisons with 0, we're laughing, because we can just do
2693 the desired conditional branch.
2694
2695 I386: First, we have to ensure that the condition
2696 codes are set according to the supplied comparison operation.
2697
2698 SPARC: First, we have to ensure that the condition codes are set
2699 according to the supplied comparison operation.  We generate slightly
2700 different code for floating point comparisons, because a floating
2701 point operation cannot directly precede a @BF@.  We assume the worst
2702 and fill that slot with a @NOP@.
2703
2704 SPARC: Do not fill the delay slots here; you will confuse the register
2705 allocator.
2706 -}
2707
2708
2709 genCondJump
2710     :: BlockId      -- the branch target
2711     -> CmmExpr      -- the condition on which to branch
2712     -> NatM InstrBlock
2713
2714 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2715
2716 #if alpha_TARGET_ARCH
2717
2718 genCondJump id (StPrim op [x, StInt 0])
2719   = getRegister x                           `thenNat` \ register ->
2720     getNewRegNat (registerRep register)
2721                                     `thenNat` \ tmp ->
2722     let
2723         code   = registerCode register tmp
2724         value  = registerName register tmp
2725         pk     = registerRep register
2726         target = ImmCLbl lbl
2727     in
2728     returnSeq code [BI (cmpOp op) value target]
2729   where
2730     cmpOp CharGtOp = GTT
2731     cmpOp CharGeOp = GE
2732     cmpOp CharEqOp = EQQ
2733     cmpOp CharNeOp = NE
2734     cmpOp CharLtOp = LTT
2735     cmpOp CharLeOp = LE
2736     cmpOp IntGtOp = GTT
2737     cmpOp IntGeOp = GE
2738     cmpOp IntEqOp = EQQ
2739     cmpOp IntNeOp = NE
2740     cmpOp IntLtOp = LTT
2741     cmpOp IntLeOp = LE
2742     cmpOp WordGtOp = NE
2743     cmpOp WordGeOp = ALWAYS
2744     cmpOp WordEqOp = EQQ
2745     cmpOp WordNeOp = NE
2746     cmpOp WordLtOp = NEVER
2747     cmpOp WordLeOp = EQQ
2748     cmpOp AddrGtOp = NE
2749     cmpOp AddrGeOp = ALWAYS
2750     cmpOp AddrEqOp = EQQ
2751     cmpOp AddrNeOp = NE
2752     cmpOp AddrLtOp = NEVER
2753     cmpOp AddrLeOp = EQQ
2754
2755 genCondJump lbl (StPrim op [x, StDouble 0.0])
2756   = getRegister x                           `thenNat` \ register ->
2757     getNewRegNat (registerRep register)
2758                                     `thenNat` \ tmp ->
2759     let
2760         code   = registerCode register tmp
2761         value  = registerName register tmp
2762         pk     = registerRep register
2763         target = ImmCLbl lbl
2764     in
2765     return (code . mkSeqInstr (BF (cmpOp op) value target))
2766   where
2767     cmpOp FloatGtOp = GTT
2768     cmpOp FloatGeOp = GE
2769     cmpOp FloatEqOp = EQQ
2770     cmpOp FloatNeOp = NE
2771     cmpOp FloatLtOp = LTT
2772     cmpOp FloatLeOp = LE
2773     cmpOp DoubleGtOp = GTT
2774     cmpOp DoubleGeOp = GE
2775     cmpOp DoubleEqOp = EQQ
2776     cmpOp DoubleNeOp = NE
2777     cmpOp DoubleLtOp = LTT
2778     cmpOp DoubleLeOp = LE
2779
2780 genCondJump lbl (StPrim op [x, y])
2781   | fltCmpOp op
2782   = trivialFCode pr instr x y       `thenNat` \ register ->
2783     getNewRegNat FF64               `thenNat` \ tmp ->
2784     let
2785         code   = registerCode register tmp
2786         result = registerName register tmp
2787         target = ImmCLbl lbl
2788     in
2789     return (code . mkSeqInstr (BF cond result target))
2790   where
2791     pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
2792
2793     fltCmpOp op = case op of
2794         FloatGtOp -> True
2795         FloatGeOp -> True
2796         FloatEqOp -> True
2797         FloatNeOp -> True
2798         FloatLtOp -> True
2799         FloatLeOp -> True
2800         DoubleGtOp -> True
2801         DoubleGeOp -> True
2802         DoubleEqOp -> True
2803         DoubleNeOp -> True
2804         DoubleLtOp -> True
2805         DoubleLeOp -> True
2806         _ -> False
2807     (instr, cond) = case op of
2808         FloatGtOp -> (FCMP TF LE, EQQ)
2809         FloatGeOp -> (FCMP TF LTT, EQQ)
2810         FloatEqOp -> (FCMP TF EQQ, NE)
2811         FloatNeOp -> (FCMP TF EQQ, EQQ)
2812         FloatLtOp -> (FCMP TF LTT, NE)
2813         FloatLeOp -> (FCMP TF LE, NE)
2814         DoubleGtOp -> (FCMP TF LE, EQQ)
2815         DoubleGeOp -> (FCMP TF LTT, EQQ)
2816         DoubleEqOp -> (FCMP TF EQQ, NE)
2817         DoubleNeOp -> (FCMP TF EQQ, EQQ)
2818         DoubleLtOp -> (FCMP TF LTT, NE)
2819         DoubleLeOp -> (FCMP TF LE, NE)
2820
2821 genCondJump lbl (StPrim op [x, y])
2822   = trivialCode instr x y           `thenNat` \ register ->
2823     getNewRegNat IntRep             `thenNat` \ tmp ->
2824     let
2825         code   = registerCode register tmp
2826         result = registerName register tmp
2827         target = ImmCLbl lbl
2828     in
2829     return (code . mkSeqInstr (BI cond result target))
2830   where
2831     (instr, cond) = case op of
2832         CharGtOp -> (CMP LE, EQQ)
2833         CharGeOp -> (CMP LTT, EQQ)
2834         CharEqOp -> (CMP EQQ, NE)
2835         CharNeOp -> (CMP EQQ, EQQ)
2836         CharLtOp -> (CMP LTT, NE)
2837         CharLeOp -> (CMP LE, NE)
2838         IntGtOp -> (CMP LE, EQQ)
2839         IntGeOp -> (CMP LTT, EQQ)
2840         IntEqOp -> (CMP EQQ, NE)
2841         IntNeOp -> (CMP EQQ, EQQ)
2842         IntLtOp -> (CMP LTT, NE)
2843         IntLeOp -> (CMP LE, NE)
2844         WordGtOp -> (CMP ULE, EQQ)
2845         WordGeOp -> (CMP ULT, EQQ)
2846         WordEqOp -> (CMP EQQ, NE)
2847         WordNeOp -> (CMP EQQ, EQQ)
2848         WordLtOp -> (CMP ULT, NE)
2849         WordLeOp -> (CMP ULE, NE)
2850         AddrGtOp -> (CMP ULE, EQQ)
2851         AddrGeOp -> (CMP ULT, EQQ)
2852         AddrEqOp -> (CMP EQQ, NE)
2853         AddrNeOp -> (CMP EQQ, EQQ)
2854         AddrLtOp -> (CMP ULT, NE)
2855         AddrLeOp -> (CMP ULE, NE)
2856
2857 #endif /* alpha_TARGET_ARCH */
2858
2859 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2860
2861 #if i386_TARGET_ARCH
2862
2863 genCondJump id bool = do
2864   CondCode _ cond code <- getCondCode bool
2865   return (code `snocOL` JXX cond id)
2866
2867 #endif
2868
2869 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2870
2871 #if x86_64_TARGET_ARCH
2872
2873 genCondJump id bool = do
2874   CondCode is_float cond cond_code <- getCondCode bool
2875   if not is_float
2876     then
2877         return (cond_code `snocOL` JXX cond id)
2878     else do
2879         lbl <- getBlockIdNat
2880
2881         -- see comment with condFltReg
2882         let code = case cond of
2883                         NE  -> or_unordered
2884                         GU  -> plain_test
2885                         GEU -> plain_test
2886                         _   -> and_ordered
2887
2888             plain_test = unitOL (
2889                   JXX cond id
2890                 )
2891             or_unordered = toOL [
2892                   JXX cond id,
2893                   JXX PARITY id
2894                 ]
2895             and_ordered = toOL [
2896                   JXX PARITY lbl,
2897                   JXX cond id,
2898                   JXX ALWAYS lbl,
2899                   NEWBLOCK lbl
2900                 ]
2901         return (cond_code `appOL` code)
2902
2903 #endif
2904
2905 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2906
2907 #if sparc_TARGET_ARCH
2908
2909 genCondJump (BlockId id) bool = do
2910   CondCode is_float cond code <- getCondCode bool
2911   return (
2912        code `appOL`
2913        toOL (
2914          if   is_float
2915          then [NOP, BF cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2916          else [BI cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2917        )
2918     )
2919
2920 #endif /* sparc_TARGET_ARCH */
2921
2922
2923 #if powerpc_TARGET_ARCH
2924
2925 genCondJump id bool = do
2926   CondCode is_float cond code <- getCondCode bool
2927   return (code `snocOL` BCC cond id)
2928
2929 #endif /* powerpc_TARGET_ARCH */
2930
2931
2932 -- -----------------------------------------------------------------------------
2933 --  Generating C calls
2934
2935 -- Now the biggest nightmare---calls.  Most of the nastiness is buried in
2936 -- @get_arg@, which moves the arguments to the correct registers/stack
2937 -- locations.  Apart from that, the code is easy.
2938 --
2939 -- (If applicable) Do not fill the delay slots here; you will confuse the
2940 -- register allocator.
2941
2942 genCCall
2943     :: CmmCallTarget            -- function to call
2944     -> HintedCmmFormals         -- where to put the result
2945     -> HintedCmmActuals         -- arguments (of mixed type)
2946     -> NatM InstrBlock
2947
2948 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2949
2950 #if alpha_TARGET_ARCH
2951
2952 ccallResultRegs =
2953
2954 genCCall fn cconv result_regs args
2955   = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args
2956                           `thenNat` \ ((unused,_), argCode) ->
2957     let
2958         nRegs = length allArgRegs - length unused
2959         code = asmSeqThen (map ($ []) argCode)
2960     in
2961         returnSeq code [
2962             LDA pv (AddrImm (ImmLab (ptext fn))),
2963             JSR ra (AddrReg pv) nRegs,
2964             LDGP gp (AddrReg ra)]
2965   where
2966     ------------------------
2967     {-  Try to get a value into a specific register (or registers) for
2968         a call.  The first 6 arguments go into the appropriate
2969         argument register (separate registers for integer and floating
2970         point arguments, but used in lock-step), and the remaining
2971         arguments are dumped to the stack, beginning at 0(sp).  Our
2972         first argument is a pair of the list of remaining argument
2973         registers to be assigned for this call and the next stack
2974         offset to use for overflowing arguments.  This way,
2975         @get_Arg@ can be applied to all of a call's arguments using
2976         @mapAccumLNat@.
2977     -}
2978     get_arg
2979         :: ([(Reg,Reg)], Int)   -- Argument registers and stack offset (accumulator)
2980         -> StixTree             -- Current argument
2981         -> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code
2982
2983     -- We have to use up all of our argument registers first...
2984
2985     get_arg ((iDst,fDst):dsts, offset) arg
2986       = getRegister arg                     `thenNat` \ register ->
2987         let
2988             reg  = if isFloatType pk then fDst else iDst
2989             code = registerCode register reg
2990             src  = registerName register reg
2991             pk   = registerRep register
2992         in
2993         return (
2994             if isFloatType pk then
2995                 ((dsts, offset), if isFixed register then
2996                     code . mkSeqInstr (FMOV src fDst)
2997                     else code)
2998             else
2999                 ((dsts, offset), if isFixed register then
3000                     code . mkSeqInstr (OR src (RIReg src) iDst)
3001                     else code))
3002
3003     -- Once we have run out of argument registers, we move to the
3004     -- stack...
3005
3006     get_arg ([], offset) arg
3007       = getRegister arg                 `thenNat` \ register ->
3008         getNewRegNat (registerRep register)
3009                                         `thenNat` \ tmp ->
3010         let
3011             code = registerCode register tmp
3012             src  = registerName register tmp
3013             pk   = registerRep register
3014             sz   = primRepToSize pk
3015         in
3016         return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset)))
3017
3018 #endif /* alpha_TARGET_ARCH */
3019
3020 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3021
3022 #if i386_TARGET_ARCH
3023
3024 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
3025         -- write barrier compiles to no code on x86/x86-64;
3026         -- we keep it this long in order to prevent earlier optimisations.
3027
3028 -- we only cope with a single result for foreign calls
3029 genCCall (CmmPrim op) [CmmHinted r _] args = do
3030   l1 <- getNewLabelNat
3031   l2 <- getNewLabelNat
3032   case op of
3033         MO_F32_Sqrt -> actuallyInlineFloatOp GSQRT FF32 args
3034         MO_F64_Sqrt -> actuallyInlineFloatOp GSQRT FF64 args
3035
3036         MO_F32_Sin  -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF32 args
3037         MO_F64_Sin  -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF64 args
3038
3039         MO_F32_Cos  -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF32 args
3040         MO_F64_Cos  -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF64 args
3041
3042         MO_F32_Tan  -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF32 args
3043         MO_F64_Tan  -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF64 args
3044
3045         other_op    -> outOfLineFloatOp op r args
3046  where
3047   actuallyInlineFloatOp instr size [CmmHinted x _]
3048         = do res <- trivialUFCode size (instr size) x
3049              any <- anyReg res
3050              return (any (getRegisterReg (CmmLocal r)))
3051
3052 genCCall target dest_regs args = do
3053     let
3054         sizes               = map (arg_size . cmmExprType . hintlessCmm) (reverse args)
3055 #if !darwin_TARGET_OS
3056         tot_arg_size        = sum sizes
3057 #else
3058         raw_arg_size        = sum sizes
3059         tot_arg_size        = roundTo 16 raw_arg_size
3060         arg_pad_size        = tot_arg_size - raw_arg_size
3061     delta0 <- getDeltaNat
3062     setDeltaNat (delta0 - arg_pad_size)
3063 #endif
3064
3065     push_codes <- mapM push_arg (reverse args)
3066     delta <- getDeltaNat
3067
3068     -- in
3069     -- deal with static vs dynamic call targets
3070     (callinsns,cconv) <-
3071       case target of
3072         -- CmmPrim -> ...
3073         CmmCallee (CmmLit (CmmLabel lbl)) conv
3074            -> -- ToDo: stdcall arg sizes
3075               return (unitOL (CALL (Left fn_imm) []), conv)
3076            where fn_imm = ImmCLbl lbl
3077         CmmCallee expr conv
3078            -> do { (dyn_c, dyn_r) <- get_op expr
3079                  ; ASSERT( isWord32 (cmmExprType expr) )
3080                    return (dyn_c `snocOL` CALL (Right dyn_r) [], conv) }
3081
3082     let push_code
3083 #if darwin_TARGET_OS
3084             | arg_pad_size /= 0
3085             = toOL [SUB II32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
3086                     DELTA (delta0 - arg_pad_size)]
3087               `appOL` concatOL push_codes
3088             | otherwise
3089 #endif
3090             = concatOL push_codes
3091         call = callinsns `appOL`
3092                toOL (
3093                         -- Deallocate parameters after call for ccall;
3094                         -- but not for stdcall (callee does it)
3095                   (if cconv == StdCallConv || tot_arg_size==0 then [] else
3096                    [ADD II32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
3097                   ++
3098                   [DELTA (delta + tot_arg_size)]
3099                )
3100     -- in
3101     setDeltaNat (delta + tot_arg_size)
3102
3103     let
3104         -- assign the results, if necessary
3105         assign_code []     = nilOL
3106         assign_code [CmmHinted dest _hint]
3107           | isFloatType ty = unitOL (GMOV fake0 r_dest)
3108           | isWord64 ty    = toOL [MOV II32 (OpReg eax) (OpReg r_dest),
3109                                     MOV II32 (OpReg edx) (OpReg r_dest_hi)]
3110           | otherwise      = unitOL (MOV (intSize w) (OpReg eax) (OpReg r_dest))
3111           where
3112                 ty = localRegType dest
3113                 w  = typeWidth ty
3114                 r_dest_hi = getHiVRegFromLo r_dest
3115                 r_dest    = getRegisterReg (CmmLocal dest)
3116         assign_code many = panic "genCCall.assign_code many"
3117
3118     return (push_code `appOL`
3119             call `appOL`
3120             assign_code dest_regs)
3121
3122   where
3123     arg_size :: CmmType -> Int  -- Width in bytes
3124     arg_size ty = widthInBytes (typeWidth ty)
3125
3126     roundTo a x | x `mod` a == 0 = x
3127                 | otherwise = x + a - (x `mod` a)
3128
3129
3130     push_arg :: HintedCmmActual {-current argument-}
3131                     -> NatM InstrBlock  -- code
3132
3133     push_arg (CmmHinted arg _hint) -- we don't need the hints on x86
3134       | isWord64 arg_ty = do
3135         ChildCode64 code r_lo <- iselExpr64 arg
3136         delta <- getDeltaNat
3137         setDeltaNat (delta - 8)
3138         let
3139             r_hi = getHiVRegFromLo r_lo
3140         -- in
3141         return (       code `appOL`
3142                        toOL [PUSH II32 (OpReg r_hi), DELTA (delta - 4),
3143                              PUSH II32 (OpReg r_lo), DELTA (delta - 8),
3144                              DELTA (delta-8)]
3145             )
3146
3147       | otherwise = do
3148         (code, reg) <- get_op arg
3149         delta <- getDeltaNat
3150         let size = arg_size arg_ty      -- Byte size
3151         setDeltaNat (delta-size)
3152         if (isFloatType arg_ty)
3153            then return (code `appOL`
3154                         toOL [SUB II32 (OpImm (ImmInt size)) (OpReg esp),
3155                               DELTA (delta-size),
3156                               GST (floatSize (typeWidth arg_ty))
3157                                   reg (AddrBaseIndex (EABaseReg esp)
3158                                                         EAIndexNone
3159                                                         (ImmInt 0))]
3160                        )
3161            else return (code `snocOL`
3162                         PUSH II32 (OpReg reg) `snocOL`
3163                         DELTA (delta-size)
3164                        )
3165       where
3166          arg_ty = cmmExprType arg
3167
3168     ------------
3169     get_op :: CmmExpr -> NatM (InstrBlock, Reg) -- code, reg
3170     get_op op = do
3171         (reg,code) <- getSomeReg op
3172         return (code, reg)
3173
3174 #endif /* i386_TARGET_ARCH */
3175
3176 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3177
3178 outOfLineFloatOp :: CallishMachOp -> CmmFormal -> HintedCmmActuals
3179   -> NatM InstrBlock
3180 outOfLineFloatOp mop res args
3181   = do
3182       dflags <- getDynFlagsNat
3183       targetExpr <- cmmMakeDynamicReference dflags addImportNat CallReference lbl
3184       let target = CmmCallee targetExpr CCallConv
3185
3186       if isFloat64 (localRegType res)
3187         then
3188           stmtToInstrs (CmmCall target [CmmHinted res NoHint] args CmmUnsafe CmmMayReturn)
3189         else do
3190           uq <- getUniqueNat
3191           let
3192             tmp = LocalReg uq f64
3193           -- in
3194           code1 <- stmtToInstrs (CmmCall target [CmmHinted tmp NoHint] args CmmUnsafe CmmMayReturn)
3195           code2 <- stmtToInstrs (CmmAssign (CmmLocal res) (CmmReg (CmmLocal tmp)))
3196           return (code1 `appOL` code2)
3197   where
3198         lbl = mkForeignLabel fn Nothing False
3199
3200         fn = case mop of
3201               MO_F32_Sqrt  -> fsLit "sqrtf"
3202               MO_F32_Sin   -> fsLit "sinf"
3203               MO_F32_Cos   -> fsLit "cosf"
3204               MO_F32_Tan   -> fsLit "tanf"
3205               MO_F32_Exp   -> fsLit "expf"
3206               MO_F32_Log   -> fsLit "logf"
3207
3208               MO_F32_Asin  -> fsLit "asinf"
3209               MO_F32_Acos  -> fsLit "acosf"
3210               MO_F32_Atan  -> fsLit "atanf"
3211
3212               MO_F32_Sinh  -> fsLit "sinhf"
3213               MO_F32_Cosh  -> fsLit "coshf"
3214               MO_F32_Tanh  -> fsLit "tanhf"
3215               MO_F32_Pwr   -> fsLit "powf"
3216
3217               MO_F64_Sqrt  -> fsLit "sqrt"
3218               MO_F64_Sin   -> fsLit "sin"
3219               MO_F64_Cos   -> fsLit "cos"
3220               MO_F64_Tan   -> fsLit "tan"
3221               MO_F64_Exp   -> fsLit "exp"
3222               MO_F64_Log   -> fsLit "log"
3223
3224               MO_F64_Asin  -> fsLit "asin"
3225               MO_F64_Acos  -> fsLit "acos"
3226               MO_F64_Atan  -> fsLit "atan"
3227
3228               MO_F64_Sinh  -> fsLit "sinh"
3229               MO_F64_Cosh  -> fsLit "cosh"
3230               MO_F64_Tanh  -> fsLit "tanh"
3231               MO_F64_Pwr   -> fsLit "pow"
3232
3233 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
3234
3235 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3236
3237 #if x86_64_TARGET_ARCH
3238
3239 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
3240         -- write barrier compiles to no code on x86/x86-64;
3241         -- we keep it this long in order to prevent earlier optimisations.
3242
3243
3244 genCCall (CmmPrim op) [CmmHinted r _] args =
3245   outOfLineFloatOp op r args
3246
3247 genCCall target dest_regs args = do
3248
3249         -- load up the register arguments
3250     (stack_args, aregs, fregs, load_args_code)
3251          <- load_args args allArgRegs allFPArgRegs nilOL
3252
3253     let
3254         fp_regs_used  = reverse (drop (length fregs) (reverse allFPArgRegs))
3255         int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
3256         arg_regs = [eax] ++ int_regs_used ++ fp_regs_used
3257                 -- for annotating the call instruction with
3258
3259         sse_regs = length fp_regs_used
3260
3261         tot_arg_size = arg_size * length stack_args
3262
3263         -- On entry to the called function, %rsp should be aligned
3264         -- on a 16-byte boundary +8 (i.e. the first stack arg after
3265         -- the return address is 16-byte aligned).  In STG land
3266         -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
3267         -- need to make sure we push a multiple of 16-bytes of args,
3268         -- plus the return address, to get the correct alignment.
3269         -- Urg, this is hard.  We need to feed the delta back into
3270         -- the arg pushing code.
3271     (real_size, adjust_rsp) <-
3272         if tot_arg_size `rem` 16 == 0
3273             then return (tot_arg_size, nilOL)
3274             else do -- we need to adjust...
3275                 delta <- getDeltaNat
3276                 setDeltaNat (delta-8)
3277                 return (tot_arg_size+8, toOL [
3278                                 SUB II64 (OpImm (ImmInt 8)) (OpReg rsp),
3279                                 DELTA (delta-8)
3280                         ])
3281
3282         -- push the stack args, right to left
3283     push_code <- push_args (reverse stack_args) nilOL
3284     delta <- getDeltaNat
3285
3286     -- deal with static vs dynamic call targets
3287     (callinsns,cconv) <-
3288       case target of
3289         -- CmmPrim -> ...
3290         CmmCallee (CmmLit (CmmLabel lbl)) conv
3291            -> -- ToDo: stdcall arg sizes
3292               return (unitOL (CALL (Left fn_imm) arg_regs), conv)
3293            where fn_imm = ImmCLbl lbl
3294         CmmCallee expr conv
3295            -> do (dyn_r, dyn_c) <- getSomeReg expr
3296                  return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
3297
3298     let
3299         -- The x86_64 ABI requires us to set %al to the number of SSE
3300         -- registers that contain arguments, if the called routine
3301         -- is a varargs function.  We don't know whether it's a
3302         -- varargs function or not, so we have to assume it is.
3303         --
3304         -- It's not safe to omit this assignment, even if the number
3305         -- of SSE regs in use is zero.  If %al is larger than 8
3306         -- on entry to a varargs function, seg faults ensue.
3307         assign_eax n = unitOL (MOV II32 (OpImm (ImmInt n)) (OpReg eax))
3308
3309     let call = callinsns `appOL`
3310                toOL (
3311                         -- Deallocate parameters after call for ccall;
3312                         -- but not for stdcall (callee does it)
3313                   (if cconv == StdCallConv || real_size==0 then [] else
3314                    [ADD (intSize wordWidth) (OpImm (ImmInt real_size)) (OpReg esp)])
3315                   ++
3316                   [DELTA (delta + real_size)]
3317                )
3318     -- in
3319     setDeltaNat (delta + real_size)
3320
3321     let
3322         -- assign the results, if necessary
3323         assign_code []     = nilOL
3324         assign_code [CmmHinted dest _hint] =
3325           case typeWidth rep of
3326                 W32 | isFloatType rep -> unitOL (MOV (floatSize W32) (OpReg xmm0) (OpReg r_dest))
3327                 W64 | isFloatType rep -> unitOL (MOV (floatSize W32) (OpReg xmm0) (OpReg r_dest))
3328                 _ -> unitOL (MOV (cmmTypeSize rep) (OpReg rax) (OpReg r_dest))
3329           where
3330                 rep = localRegType dest
3331                 r_dest = getRegisterReg (CmmLocal dest)
3332         assign_code many = panic "genCCall.assign_code many"
3333
3334     return (load_args_code      `appOL`
3335             adjust_rsp          `appOL`
3336             push_code           `appOL`
3337             assign_eax sse_regs `appOL`
3338             call                `appOL`
3339             assign_code dest_regs)
3340
3341   where
3342     arg_size = 8 -- always, at the mo
3343
3344     load_args :: [CmmHinted CmmExpr]
3345               -> [Reg]                  -- int regs avail for args
3346               -> [Reg]                  -- FP regs avail for args
3347               -> InstrBlock
3348               -> NatM ([CmmHinted CmmExpr],[Reg],[Reg],InstrBlock)
3349     load_args args [] [] code     =  return (args, [], [], code)
3350         -- no more regs to use
3351     load_args [] aregs fregs code =  return ([], aregs, fregs, code)
3352         -- no more args to push
3353     load_args ((CmmHinted arg hint) : rest) aregs fregs code
3354         | isFloatType arg_rep =
3355         case fregs of
3356           [] -> push_this_arg
3357           (r:rs) -> do
3358              arg_code <- getAnyReg arg
3359              load_args rest aregs rs (code `appOL` arg_code r)
3360         | otherwise =
3361         case aregs of
3362           [] -> push_this_arg
3363           (r:rs) -> do
3364              arg_code <- getAnyReg arg
3365              load_args rest rs fregs (code `appOL` arg_code r)
3366         where
3367           arg_rep = cmmExprType arg
3368
3369           push_this_arg = do
3370             (args',ars,frs,code') <- load_args rest aregs fregs code
3371             return ((CmmHinted arg hint):args', ars, frs, code')
3372
3373     push_args [] code = return code
3374     push_args ((CmmHinted arg hint):rest) code
3375        | isFloatType arg_rep = do
3376          (arg_reg, arg_code) <- getSomeReg arg
3377          delta <- getDeltaNat
3378          setDeltaNat (delta-arg_size)
3379          let code' = code `appOL` arg_code `appOL` toOL [
3380                         SUB (intSize wordWidth) (OpImm (ImmInt arg_size)) (OpReg rsp) ,
3381                         DELTA (delta-arg_size),
3382                         MOV (floatSize width) (OpReg arg_reg) (OpAddr  (spRel 0))]
3383          push_args rest code'
3384
3385        | otherwise = do
3386        -- we only ever generate word-sized function arguments.  Promotion
3387        -- has already happened: our Int8# type is kept sign-extended
3388        -- in an Int#, for example.
3389          ASSERT(width == W64) return ()
3390          (arg_op, arg_code) <- getOperand arg
3391          delta <- getDeltaNat
3392          setDeltaNat (delta-arg_size)
3393          let code' = code `appOL` toOL [PUSH II64 arg_op,
3394                                         DELTA (delta-arg_size)]
3395          push_args rest code'
3396         where
3397           arg_rep = cmmExprType arg
3398           width = typeWidth arg_rep
3399 #endif
3400
3401 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3402
3403 #if sparc_TARGET_ARCH
3404 {-
3405    The SPARC calling convention is an absolute
3406    nightmare.  The first 6x32 bits of arguments are mapped into
3407    %o0 through %o5, and the remaining arguments are dumped to the
3408    stack, beginning at [%sp+92].  (Note that %o6 == %sp.)
3409
3410    If we have to put args on the stack, move %o6==%sp down by
3411    the number of words to go on the stack, to ensure there's enough space.
3412
3413    According to Fraser and Hanson's lcc book, page 478, fig 17.2,
3414    16 words above the stack pointer is a word for the address of
3415    a structure return value.  I use this as a temporary location
3416    for moving values from float to int regs.  Certainly it isn't
3417    safe to put anything in the 16 words starting at %sp, since
3418    this area can get trashed at any time due to window overflows
3419    caused by signal handlers.
3420
3421    A final complication (if the above isn't enough) is that
3422    we can't blithely calculate the arguments one by one into
3423    %o0 .. %o5.  Consider the following nested calls:
3424
3425        fff a (fff b c)
3426
3427    Naive code moves a into %o0, and (fff b c) into %o1.  Unfortunately
3428    the inner call will itself use %o0, which trashes the value put there
3429    in preparation for the outer call.  Upshot: we need to calculate the
3430    args into temporary regs, and move those to arg regs or onto the
3431    stack only immediately prior to the call proper.  Sigh.
3432 -}
3433
3434 genCCall target dest_regs argsAndHints = do
3435     let
3436         args = map hintlessCmm argsAndHints
3437     argcode_and_vregs <- mapM arg_to_int_vregs args
3438     let
3439         (argcodes, vregss) = unzip argcode_and_vregs
3440         n_argRegs          = length allArgRegs
3441         n_argRegs_used     = min (length vregs) n_argRegs
3442         vregs              = concat vregss
3443     -- deal with static vs dynamic call targets
3444     callinsns <- (case target of
3445         CmmCallee (CmmLit (CmmLabel lbl)) conv -> do
3446                 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3447         CmmCallee expr conv -> do
3448                 (dyn_c, [dyn_r]) <- arg_to_int_vregs expr
3449                 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3450         CmmPrim mop -> do
3451                   (res, reduce) <- outOfLineFloatOp mop
3452                   lblOrMopExpr <- case res of
3453                        Left lbl -> do
3454                             return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3455                        Right mopExpr -> do
3456                             (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
3457                             return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3458                   if reduce then panic "genCCall(sparc): can not reduce" else return lblOrMopExpr
3459
3460       )
3461     let
3462         argcode = concatOL argcodes
3463         (move_sp_down, move_sp_up)
3464            = let diff = length vregs - n_argRegs
3465                  nn   = if odd diff then diff + 1 else diff -- keep 8-byte alignment
3466              in  if   nn <= 0
3467                  then (nilOL, nilOL)
3468                  else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
3469         transfer_code
3470            = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
3471     return (argcode       `appOL`
3472             move_sp_down  `appOL`
3473             transfer_code `appOL`
3474             callinsns     `appOL`
3475             unitOL NOP    `appOL`
3476             move_sp_up)
3477   where
3478      -- move args from the integer vregs into which they have been
3479      -- marshalled, into %o0 .. %o5, and the rest onto the stack.
3480      move_final :: [Reg] -> [Reg] -> Int -> [Instr]
3481
3482      move_final [] _ offset          -- all args done
3483         = []
3484
3485      move_final (v:vs) [] offset     -- out of aregs; move to stack
3486         = ST II32 v (spRel offset)
3487           : move_final vs [] (offset+1)
3488
3489      move_final (v:vs) (a:az) offset -- move into an arg (%o[0..5]) reg
3490         = OR False g0 (RIReg v) a
3491           : move_final vs az offset
3492
3493      -- generate code to calculate an argument, and move it into one
3494      -- or two integer vregs.
3495      arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
3496      arg_to_int_vregs arg
3497         | isWord64 (cmmExprType arg)
3498         = do
3499           (ChildCode64 code r_lo) <- iselExpr64 arg
3500           let
3501               r_hi = getHiVRegFromLo r_lo
3502           return (code, [r_hi, r_lo])
3503         | otherwise
3504         = do
3505           (src, code) <- getSomeReg arg
3506           tmp <- getNewRegNat (cmmExprType arg)
3507           let
3508               pk   = cmmExprType arg
3509           case pk of
3510              FF64 -> do
3511                       v1 <- getNewRegNat II32
3512                       v2 <- getNewRegNat II32
3513                       return (
3514                         code                          `snocOL`
3515                         FMOV FF64 src f0                `snocOL`
3516                         ST   FF32  f0 (spRel 16)         `snocOL`
3517                         LD   II32  (spRel 16) v1         `snocOL`
3518                         ST   FF32  (fPair f0) (spRel 16) `snocOL`
3519                         LD   II32  (spRel 16) v2
3520                         ,
3521                         [v1,v2]
3522                        )
3523              FF32 -> do
3524                       v1 <- getNewRegNat II32
3525                       return (
3526                         code                    `snocOL`
3527                         ST   FF32  src (spRel 16)  `snocOL`
3528                         LD   II32  (spRel 16) v1
3529                         ,
3530                         [v1]
3531                        )
3532              other -> do
3533                         v1 <- getNewRegNat II32
3534                         return (
3535                           code `snocOL` OR False g0 (RIReg src) v1
3536                           ,
3537                           [v1]
3538                          )
3539 outOfLineFloatOp mop =
3540     do
3541       dflags <- getDynFlagsNat
3542       mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference $
3543                   mkForeignLabel functionName Nothing True
3544       let mopLabelOrExpr = case mopExpr of
3545                         CmmLit (CmmLabel lbl) -> Left lbl
3546                         _ -> Right mopExpr
3547       return (mopLabelOrExpr, reduce)
3548             where
3549                 (reduce, functionName) = case mop of
3550                   MO_F32_Exp    -> (True,  fsLit "exp")
3551                   MO_F32_Log    -> (True,  fsLit "log")
3552                   MO_F32_Sqrt   -> (True,  fsLit "sqrt")
3553
3554                   MO_F32_Sin    -> (True,  fsLit "sin")
3555                   MO_F32_Cos    -> (True,  fsLit "cos")
3556                   MO_F32_Tan    -> (True,  fsLit "tan")
3557
3558                   MO_F32_Asin   -> (True,  fsLit "asin")
3559                   MO_F32_Acos   -> (True,  fsLit "acos")
3560                   MO_F32_Atan   -> (True,  fsLit "atan")
3561
3562                   MO_F32_Sinh   -> (True,  fsLit "sinh")
3563                   MO_F32_Cosh   -> (True,  fsLit "cosh")
3564                   MO_F32_Tanh   -> (True,  fsLit "tanh")
3565
3566                   MO_F64_Exp    -> (False, fsLit "exp")
3567                   MO_F64_Log    -> (False, fsLit "log")
3568                   MO_F64_Sqrt   -> (False, fsLit "sqrt")
3569
3570                   MO_F64_Sin    -> (False, fsLit "sin")
3571                   MO_F64_Cos    -> (False, fsLit "cos")
3572                   MO_F64_Tan    -> (False, fsLit "tan")
3573
3574                   MO_F64_Asin   -> (False, fsLit "asin")
3575                   MO_F64_Acos   -> (False, fsLit "acos")
3576                   MO_F64_Atan   -> (False, fsLit "atan")
3577
3578                   MO_F64_Sinh   -> (False, fsLit "sinh")
3579                   MO_F64_Cosh   -> (False, fsLit "cosh")
3580                   MO_F64_Tanh   -> (False, fsLit "tanh")
3581
3582                   other -> pprPanic "outOfLineFloatOp(sparc) "
3583                                 (pprCallishMachOp mop)
3584
3585 #endif /* sparc_TARGET_ARCH */
3586
3587 #if powerpc_TARGET_ARCH
3588
3589 #if darwin_TARGET_OS || linux_TARGET_OS
3590 {-
3591     The PowerPC calling convention for Darwin/Mac OS X
3592     is described in Apple's document
3593     "Inside Mac OS X - Mach-O Runtime Architecture".
3594
3595     PowerPC Linux uses the System V Release 4 Calling Convention
3596     for PowerPC. It is described in the
3597     "System V Application Binary Interface PowerPC Processor Supplement".
3598
3599     Both conventions are similar:
3600     Parameters may be passed in general-purpose registers starting at r3, in
3601     floating point registers starting at f1, or on the stack.
3602
3603     But there are substantial differences:
3604     * The number of registers used for parameter passing and the exact set of
3605       nonvolatile registers differs (see MachRegs.lhs).
3606     * On Darwin, stack space is always reserved for parameters, even if they are
3607       passed in registers. The called routine may choose to save parameters from
3608       registers to the corresponding space on the stack.
3609     * On Darwin, a corresponding amount of GPRs is skipped when a floating point
3610       parameter is passed in an FPR.
3611     * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
3612       starting with an odd-numbered GPR. It may skip a GPR to achieve this.
3613       Darwin just treats an I64 like two separate II32s (high word first).
3614     * I64 and FF64 arguments are 8-byte aligned on the stack for SysV, but only
3615       4-byte aligned like everything else on Darwin.
3616     * The SysV spec claims that FF32 is represented as FF64 on the stack. GCC on
3617       PowerPC Linux does not agree, so neither do we.
3618
3619     According to both conventions, The parameter area should be part of the
3620     caller's stack frame, allocated in the caller's prologue code (large enough
3621     to hold the parameter lists for all called routines). The NCG already
3622     uses the stack for register spilling, leaving 64 bytes free at the top.
3623     If we need a larger parameter area than that, we just allocate a new stack
3624     frame just before ccalling.
3625 -}
3626
3627
3628 genCCall (CmmPrim MO_WriteBarrier) _ _
3629  = return $ unitOL LWSYNC
3630
3631 genCCall target dest_regs argsAndHints
3632   = ASSERT (not $ any (`elem` [II8,II16]) $ map cmmTypeSize argReps)
3633         -- we rely on argument promotion in the codeGen
3634     do
3635         (finalStack,passArgumentsCode,usedRegs) <- passArguments
3636                                                         (zip args argReps)
3637                                                         allArgRegs allFPArgRegs
3638                                                         initialStackOffset
3639                                                         (toOL []) []
3640
3641         (labelOrExpr, reduceToFF32) <- case target of
3642             CmmCallee (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
3643             CmmCallee expr conv -> return  (Right expr, False)
3644             CmmPrim mop -> outOfLineFloatOp mop
3645
3646         let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
3647             codeAfter = move_sp_up finalStack `appOL` moveResult reduceToFF32
3648
3649         case labelOrExpr of
3650             Left lbl -> do
3651                 return (         codeBefore
3652                         `snocOL` BL lbl usedRegs
3653                         `appOL`  codeAfter)
3654             Right dyn -> do
3655                 (dynReg, dynCode) <- getSomeReg dyn
3656                 return (         dynCode
3657                         `snocOL` MTCTR dynReg
3658                         `appOL`  codeBefore
3659                         `snocOL` BCTRL usedRegs
3660                         `appOL`  codeAfter)
3661     where
3662 #if darwin_TARGET_OS
3663         initialStackOffset = 24
3664             -- size of linkage area + size of arguments, in bytes
3665         stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
3666                                  map (widthInBytes . typeWidth) argReps
3667 #elif linux_TARGET_OS
3668         initialStackOffset = 8
3669         stackDelta finalStack = roundTo 16 finalStack
3670 #endif
3671         args = map hintlessCmm argsAndHints
3672         argReps = map cmmExprType args
3673
3674         roundTo a x | x `mod` a == 0 = x
3675                     | otherwise = x + a - (x `mod` a)
3676
3677         move_sp_down finalStack
3678                | delta > 64 =
3679                         toOL [STU II32 sp (AddrRegImm sp (ImmInt (-delta))),
3680                               DELTA (-delta)]
3681                | otherwise = nilOL
3682                where delta = stackDelta finalStack
3683         move_sp_up finalStack
3684                | delta > 64 =
3685                         toOL [ADD sp sp (RIImm (ImmInt delta)),
3686                               DELTA 0]
3687                | otherwise = nilOL
3688                where delta = stackDelta finalStack
3689
3690
3691         passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
3692         passArguments ((arg,arg_ty):args) gprs fprs stackOffset
3693                accumCode accumUsed | isWord64 arg_ty =
3694             do
3695                 ChildCode64 code vr_lo <- iselExpr64 arg
3696                 let vr_hi = getHiVRegFromLo vr_lo
3697
3698 #if darwin_TARGET_OS
3699                 passArguments args
3700                               (drop 2 gprs)
3701                               fprs
3702                               (stackOffset+8)
3703                               (accumCode `appOL` code
3704                                     `snocOL` storeWord vr_hi gprs stackOffset
3705                                     `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
3706                               ((take 2 gprs) ++ accumUsed)
3707             where
3708                 storeWord vr (gpr:_) offset = MR gpr vr
3709                 storeWord vr [] offset = ST II32 vr (AddrRegImm sp (ImmInt offset))
3710
3711 #elif linux_TARGET_OS
3712                 let stackOffset' = roundTo 8 stackOffset
3713                     stackCode = accumCode `appOL` code
3714                         `snocOL` ST II32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
3715                         `snocOL` ST II32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
3716                     regCode hireg loreg =
3717                         accumCode `appOL` code
3718                             `snocOL` MR hireg vr_hi
3719                             `snocOL` MR loreg vr_lo
3720
3721                 case gprs of
3722                     hireg : loreg : regs | even (length gprs) ->
3723                         passArguments args regs fprs stackOffset
3724                                       (regCode hireg loreg) (hireg : loreg : accumUsed)
3725                     _skipped : hireg : loreg : regs ->
3726                         passArguments args regs fprs stackOffset
3727                                       (regCode hireg loreg) (hireg : loreg : accumUsed)
3728                     _ -> -- only one or no regs left
3729                         passArguments args [] fprs (stackOffset'+8)
3730                                       stackCode accumUsed
3731 #endif
3732
3733         passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
3734             | reg : _ <- regs = do
3735                 register <- getRegister arg
3736                 let code = case register of
3737                             Fixed _ freg fcode -> fcode `snocOL` MR reg freg
3738                             Any _ acode -> acode reg
3739                 passArguments args
3740                               (drop nGprs gprs)
3741                               (drop nFprs fprs)
3742 #if darwin_TARGET_OS
3743         -- The Darwin ABI requires that we reserve stack slots for register parameters
3744                               (stackOffset + stackBytes)
3745 #elif linux_TARGET_OS
3746         -- ... the SysV ABI doesn't.
3747                               stackOffset
3748 #endif
3749                               (accumCode `appOL` code)
3750                               (reg : accumUsed)
3751             | otherwise = do
3752                 (vr, code) <- getSomeReg arg
3753                 passArguments args
3754                               (drop nGprs gprs)
3755                               (drop nFprs fprs)
3756                               (stackOffset' + stackBytes)
3757                               (accumCode `appOL` code `snocOL` ST (cmmTypeSize rep) vr stackSlot)
3758                               accumUsed
3759             where
3760 #if darwin_TARGET_OS
3761         -- stackOffset is at least 4-byte aligned
3762         -- The Darwin ABI is happy with that.
3763                 stackOffset' = stackOffset
3764 #else
3765         -- ... the SysV ABI requires 8-byte alignment for doubles.
3766                 stackOffset' | isFloatType rep && typeWidth rep == W64 =
3767                                  roundTo 8 stackOffset
3768                              | otherwise  =           stackOffset
3769 #endif
3770                 stackSlot = AddrRegImm sp (ImmInt stackOffset')
3771                 (nGprs, nFprs, stackBytes, regs) = case cmmTypeSize rep of
3772                     II32 -> (1, 0, 4, gprs)
3773 #if darwin_TARGET_OS
3774         -- The Darwin ABI requires that we skip a corresponding number of GPRs when
3775         -- we use the FPRs.
3776                     FF32 -> (1, 1, 4, fprs)
3777                     FF64 -> (2, 1, 8, fprs)
3778 #elif linux_TARGET_OS
3779         -- ... the SysV ABI doesn't.
3780                     FF32 -> (0, 1, 4, fprs)
3781                     FF64 -> (0, 1, 8, fprs)
3782 #endif
3783
3784         moveResult reduceToFF32 =
3785             case dest_regs of
3786                 [] -> nilOL
3787                 [CmmHinted dest _hint]
3788                     | reduceToFF32 && isFloat32 rep   -> unitOL (FRSP r_dest f1)
3789                     | isFloat32 rep || isFloat64 rep -> unitOL (MR r_dest f1)
3790                     | isWord64 rep -> toOL [MR (getHiVRegFromLo r_dest) r3,
3791                                           MR r_dest r4]
3792                     | otherwise -> unitOL (MR r_dest r3)
3793                     where rep = cmmRegType (CmmLocal dest)
3794                           r_dest = getRegisterReg (CmmLocal dest)
3795
3796         outOfLineFloatOp mop =
3797             do
3798                 dflags <- getDynFlagsNat
3799                 mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference $
3800                               mkForeignLabel functionName Nothing True
3801                 let mopLabelOrExpr = case mopExpr of
3802                         CmmLit (CmmLabel lbl) -> Left lbl
3803                         _ -> Right mopExpr
3804                 return (mopLabelOrExpr, reduce)
3805             where
3806                 (functionName, reduce) = case mop of
3807                     MO_F32_Exp   -> (fsLit "exp", True)
3808                     MO_F32_Log   -> (fsLit "log", True)
3809                     MO_F32_Sqrt  -> (fsLit "sqrt", True)
3810
3811                     MO_F32_Sin   -> (fsLit "sin", True)
3812                     MO_F32_Cos   -> (fsLit "cos", True)
3813                     MO_F32_Tan   -> (fsLit "tan", True)
3814
3815                     MO_F32_Asin  -> (fsLit "asin", True)
3816                     MO_F32_Acos  -> (fsLit "acos", True)
3817                     MO_F32_Atan  -> (fsLit "atan", True)
3818
3819                     MO_F32_Sinh  -> (fsLit "sinh", True)
3820                     MO_F32_Cosh  -> (fsLit "cosh", True)
3821                     MO_F32_Tanh  -> (fsLit "tanh", True)
3822                     MO_F32_Pwr   -> (fsLit "pow", True)
3823
3824                     MO_F64_Exp   -> (fsLit "exp", False)
3825                     MO_F64_Log   -> (fsLit "log", False)
3826                     MO_F64_Sqrt  -> (fsLit "sqrt", False)
3827
3828                     MO_F64_Sin   -> (fsLit "sin", False)
3829                     MO_F64_Cos   -> (fsLit "cos", False)
3830                     MO_F64_Tan   -> (fsLit "tan", False)
3831
3832                     MO_F64_Asin  -> (fsLit "asin", False)
3833                     MO_F64_Acos  -> (fsLit "acos", False)
3834                     MO_F64_Atan  -> (fsLit "atan", False)
3835
3836                     MO_F64_Sinh  -> (fsLit "sinh", False)
3837                     MO_F64_Cosh  -> (fsLit "cosh", False)
3838                     MO_F64_Tanh  -> (fsLit "tanh", False)
3839                     MO_F64_Pwr   -> (fsLit "pow", False)
3840                     other -> pprPanic "genCCall(ppc): unknown callish op"
3841                                     (pprCallishMachOp other)
3842
3843 #endif /* darwin_TARGET_OS || linux_TARGET_OS */
3844
3845 #endif /* powerpc_TARGET_ARCH */
3846
3847
3848 -- -----------------------------------------------------------------------------
3849 -- Generating a table-branch
3850
3851 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
3852
3853 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3854 genSwitch expr ids
3855   | opt_PIC
3856   = do
3857         (reg,e_code) <- getSomeReg expr
3858         lbl <- getNewLabelNat
3859         dflags <- getDynFlagsNat
3860         dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
3861         (tableReg,t_code) <- getSomeReg $ dynRef
3862         let
3863             jumpTable = map jumpTableEntryRel ids
3864
3865             jumpTableEntryRel Nothing
3866                 = CmmStaticLit (CmmInt 0 wordWidth)
3867             jumpTableEntryRel (Just (BlockId id))
3868                 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3869                 where blockLabel = mkAsmTempLabel id
3870
3871             op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
3872                                        (EAIndex reg wORD_SIZE) (ImmInt 0))
3873
3874 #if x86_64_TARGET_ARCH
3875 #if darwin_TARGET_OS
3876     -- on Mac OS X/x86_64, put the jump table in the text section
3877     -- to work around a limitation of the linker.
3878     -- ld64 is unable to handle the relocations for
3879     --     .quad L1 - L0
3880     -- if L0 is not preceded by a non-anonymous label in its section.
3881
3882             code = e_code `appOL` t_code `appOL` toOL [
3883                             ADD (intSize wordWidth) op (OpReg tableReg),
3884                             JMP_TBL (OpReg tableReg) [ id | Just id <- ids ],
3885                             LDATA Text (CmmDataLabel lbl : jumpTable)
3886                     ]
3887 #else
3888     -- HACK: On x86_64 binutils<2.17 is only able to generate PC32
3889     -- relocations, hence we only get 32-bit offsets in the jump
3890     -- table. As these offsets are always negative we need to properly
3891     -- sign extend them to 64-bit. This hack should be removed in
3892     -- conjunction with the hack in PprMach.hs/pprDataItem once
3893     -- binutils 2.17 is standard.
3894             code = e_code `appOL` t_code `appOL` toOL [
3895                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3896                             MOVSxL II32
3897                                    (OpAddr (AddrBaseIndex (EABaseReg tableReg)
3898                                                           (EAIndex reg wORD_SIZE) (ImmInt 0)))
3899                                    (OpReg reg),
3900                             ADD (intSize wordWidth) (OpReg reg) (OpReg tableReg),
3901                             JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
3902                    ]
3903 #endif
3904 #else
3905             code = e_code `appOL` t_code `appOL` toOL [
3906                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3907                             ADD (intSize wordWidth) op (OpReg tableReg),
3908                             JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
3909                     ]
3910 #endif
3911         return code
3912   | otherwise
3913   = do
3914         (reg,e_code) <- getSomeReg expr
3915         lbl <- getNewLabelNat
3916         let
3917             jumpTable = map jumpTableEntry ids
3918             op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
3919             code = e_code `appOL` toOL [
3920                     LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3921                     JMP_TBL op [ id | Just id <- ids ]
3922                  ]
3923         -- in
3924         return code
3925 #elif powerpc_TARGET_ARCH
3926 genSwitch expr ids
3927   | opt_PIC
3928   = do
3929         (reg,e_code) <- getSomeReg expr
3930         tmp <- getNewRegNat II32
3931         lbl <- getNewLabelNat
3932         dflags <- getDynFlagsNat
3933         dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
3934         (tableReg,t_code) <- getSomeReg $ dynRef
3935         let
3936             jumpTable = map jumpTableEntryRel ids
3937
3938             jumpTableEntryRel Nothing
3939                 = CmmStaticLit (CmmInt 0 wordWidth)
3940             jumpTableEntryRel (Just (BlockId id))
3941                 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3942                 where blockLabel = mkAsmTempLabel id
3943
3944             code = e_code `appOL` t_code `appOL` toOL [
3945                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3946                             SLW tmp reg (RIImm (ImmInt 2)),
3947                             LD II32 tmp (AddrRegReg tableReg tmp),
3948                             ADD tmp tmp (RIReg tableReg),
3949                             MTCTR tmp,
3950                             BCTR [ id | Just id <- ids ]
3951                     ]
3952         return code
3953   | otherwise
3954   = do
3955         (reg,e_code) <- getSomeReg expr
3956         tmp <- getNewRegNat II32
3957         lbl <- getNewLabelNat
3958         let
3959             jumpTable = map jumpTableEntry ids
3960
3961             code = e_code `appOL` toOL [
3962                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3963                             SLW tmp reg (RIImm (ImmInt 2)),
3964                             ADDIS tmp tmp (HA (ImmCLbl lbl)),
3965                             LD II32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
3966                             MTCTR tmp,
3967                             BCTR [ id | Just id <- ids ]
3968                     ]
3969         return code
3970 #else
3971 #error "ToDo: genSwitch"
3972 #endif
3973
3974 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordWidth)
3975 jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
3976     where blockLabel = mkAsmTempLabel id
3977
3978 -- -----------------------------------------------------------------------------
3979 -- Support bits
3980 -- -----------------------------------------------------------------------------
3981
3982
3983 -- -----------------------------------------------------------------------------
3984 -- 'condIntReg' and 'condFltReg': condition codes into registers
3985
3986 -- Turn those condition codes into integers now (when they appear on
3987 -- the right hand side of an assignment).
3988 --
3989 -- (If applicable) Do not fill the delay slots here; you will confuse the
3990 -- register allocator.
3991
3992 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
3993
3994 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3995
3996 #if alpha_TARGET_ARCH
3997 condIntReg = panic "MachCode.condIntReg (not on Alpha)"
3998 condFltReg = panic "MachCode.condFltReg (not on Alpha)"
3999 #endif /* alpha_TARGET_ARCH */
4000
4001 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4002
4003 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4004
4005 condIntReg cond x y = do
4006   CondCode _ cond cond_code <- condIntCode cond x y
4007   tmp <- getNewRegNat II8
4008   let
4009         code dst = cond_code `appOL` toOL [
4010                     SETCC cond (OpReg tmp),
4011                     MOVZxL II8 (OpReg tmp) (OpReg dst)
4012                   ]
4013   -- in
4014   return (Any II32 code)
4015
4016 #endif
4017
4018 #if i386_TARGET_ARCH
4019
4020 condFltReg cond x y = do
4021   CondCode _ cond cond_code <- condFltCode cond x y
4022   tmp <- getNewRegNat II8
4023   let
4024         code dst = cond_code `appOL` toOL [
4025                     SETCC cond (OpReg tmp),
4026                     MOVZxL II8 (OpReg tmp) (OpReg dst)
4027                   ]
4028   -- in
4029   return (Any II32 code)
4030
4031 #endif
4032
4033 #if x86_64_TARGET_ARCH
4034
4035 condFltReg cond x y = do
4036   CondCode _ cond cond_code <- condFltCode cond x y
4037   tmp1 <- getNewRegNat wordSize
4038   tmp2 <- getNewRegNat wordSize
4039   let
4040         -- We have to worry about unordered operands (eg. comparisons
4041         -- against NaN).  If the operands are unordered, the comparison
4042         -- sets the parity flag, carry flag and zero flag.
4043         -- All comparisons are supposed to return false for unordered
4044         -- operands except for !=, which returns true.
4045         --
4046         -- Optimisation: we don't have to test the parity flag if we
4047         -- know the test has already excluded the unordered case: eg >
4048         -- and >= test for a zero carry flag, which can only occur for
4049         -- ordered operands.
4050         --
4051         -- ToDo: by reversing comparisons we could avoid testing the
4052         -- parity flag in more cases.
4053
4054         code dst =
4055            cond_code `appOL`
4056              (case cond of
4057                 NE  -> or_unordered dst
4058                 GU  -> plain_test   dst
4059                 GEU -> plain_test   dst
4060                 _   -> and_ordered  dst)
4061
4062         plain_test dst = toOL [
4063                     SETCC cond (OpReg tmp1),
4064                     MOVZxL II8 (OpReg tmp1) (OpReg dst)
4065                  ]
4066         or_unordered dst = toOL [
4067                     SETCC cond (OpReg tmp1),
4068                     SETCC PARITY (OpReg tmp2),
4069                     OR II8 (OpReg tmp1) (OpReg tmp2),
4070                     MOVZxL II8 (OpReg tmp2) (OpReg dst)
4071                   ]
4072         and_ordered dst = toOL [
4073                     SETCC cond (OpReg tmp1),
4074                     SETCC NOTPARITY (OpReg tmp2),
4075                     AND II8 (OpReg tmp1) (OpReg tmp2),
4076                     MOVZxL II8 (OpReg tmp2) (OpReg dst)
4077                   ]
4078   -- in
4079   return (Any II32 code)
4080
4081 #endif
4082
4083 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4084
4085 #if sparc_TARGET_ARCH
4086
4087 condIntReg EQQ x (CmmLit (CmmInt 0 d)) = do
4088     (src, code) <- getSomeReg x
4089     tmp <- getNewRegNat II32
4090     let
4091         code__2 dst = code `appOL` toOL [
4092             SUB False True g0 (RIReg src) g0,
4093             SUB True False g0 (RIImm (ImmInt (-1))) dst]
4094     return (Any II32 code__2)
4095
4096 condIntReg EQQ x y = do
4097     (src1, code1) <- getSomeReg x
4098     (src2, code2) <- getSomeReg y
4099     tmp1 <- getNewRegNat II32
4100     tmp2 <- getNewRegNat II32
4101     let
4102         code__2 dst = code1 `appOL` code2 `appOL` toOL [
4103             XOR False src1 (RIReg src2) dst,
4104             SUB False True g0 (RIReg dst) g0,
4105             SUB True False g0 (RIImm (ImmInt (-1))) dst]
4106     return (Any II32 code__2)
4107
4108 condIntReg NE x (CmmLit (CmmInt 0 d)) = do
4109     (src, code) <- getSomeReg x
4110     tmp <- getNewRegNat II32
4111     let
4112         code__2 dst = code `appOL` toOL [
4113             SUB False True g0 (RIReg src) g0,
4114             ADD True False g0 (RIImm (ImmInt 0)) dst]
4115     return (Any II32 code__2)
4116
4117 condIntReg NE x y = do
4118     (src1, code1) <- getSomeReg x
4119     (src2, code2) <- getSomeReg y
4120     tmp1 <- getNewRegNat II32
4121     tmp2 <- getNewRegNat II32
4122     let
4123         code__2 dst = code1 `appOL` code2 `appOL` toOL [
4124             XOR False src1 (RIReg src2) dst,
4125             SUB False True g0 (RIReg dst) g0,
4126             ADD True False g0 (RIImm (ImmInt 0)) dst]
4127     return (Any II32 code__2)
4128
4129 condIntReg cond x y = do
4130     BlockId lbl1 <- getBlockIdNat
4131     BlockId lbl2 <- getBlockIdNat
4132     CondCode _ cond cond_code <- condIntCode cond x y
4133     let
4134         code__2 dst = cond_code `appOL` toOL [
4135             BI cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
4136             OR False g0 (RIImm (ImmInt 0)) dst,
4137             BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
4138             NEWBLOCK (BlockId lbl1),
4139             OR False g0 (RIImm (ImmInt 1)) dst,
4140             NEWBLOCK (BlockId lbl2)]
4141     return (Any II32 code__2)
4142
4143 condFltReg cond x y = do
4144     BlockId lbl1 <- getBlockIdNat
4145     BlockId lbl2 <- getBlockIdNat
4146     CondCode _ cond cond_code <- condFltCode cond x y
4147     let
4148         code__2 dst = cond_code `appOL` toOL [
4149             NOP,
4150             BF cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
4151             OR False g0 (RIImm (ImmInt 0)) dst,
4152             BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
4153             NEWBLOCK (BlockId lbl1),
4154             OR False g0 (RIImm (ImmInt 1)) dst,
4155             NEWBLOCK (BlockId lbl2)]
4156     return (Any II32 code__2)
4157
4158 #endif /* sparc_TARGET_ARCH */
4159
4160 #if powerpc_TARGET_ARCH
4161 condReg getCond = do
4162     lbl1 <- getBlockIdNat
4163     lbl2 <- getBlockIdNat
4164     CondCode _ cond cond_code <- getCond
4165     let
4166 {-        code dst = cond_code `appOL` toOL [
4167                 BCC cond lbl1,
4168                 LI dst (ImmInt 0),
4169                 BCC ALWAYS lbl2,
4170                 NEWBLOCK lbl1,
4171                 LI dst (ImmInt 1),
4172                 BCC ALWAYS lbl2,
4173                 NEWBLOCK lbl2
4174             ]-}
4175         code dst = cond_code
4176             `appOL` negate_code
4177             `appOL` toOL [
4178                 MFCR dst,
4179                 RLWINM dst dst (bit + 1) 31 31
4180             ]
4181
4182         negate_code | do_negate = unitOL (CRNOR bit bit bit)
4183                     | otherwise = nilOL
4184
4185         (bit, do_negate) = case cond of
4186             LTT -> (0, False)
4187             LE  -> (1, True)
4188             EQQ -> (2, False)
4189             GE  -> (0, True)
4190             GTT -> (1, False)
4191
4192             NE  -> (2, True)
4193
4194             LU  -> (0, False)
4195             LEU -> (1, True)
4196             GEU -> (0, True)
4197             GU  -> (1, False)
4198
4199     return (Any II32 code)
4200
4201 condIntReg cond x y = condReg (condIntCode cond x y)
4202 condFltReg cond x y = condReg (condFltCode cond x y)
4203 #endif /* powerpc_TARGET_ARCH */
4204
4205
4206 -- -----------------------------------------------------------------------------
4207 -- 'trivial*Code': deal with trivial instructions
4208
4209 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
4210 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
4211 -- Only look for constants on the right hand side, because that's
4212 -- where the generic optimizer will have put them.
4213
4214 -- Similarly, for unary instructions, we don't have to worry about
4215 -- matching an StInt as the argument, because genericOpt will already
4216 -- have handled the constant-folding.
4217
4218 trivialCode
4219     :: Width    -- Int only
4220     -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
4221       ,IF_ARCH_i386 ((Operand -> Operand -> Instr)
4222                      -> Maybe (Operand -> Operand -> Instr)
4223       ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr)
4224                      -> Maybe (Operand -> Operand -> Instr)
4225       ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
4226       ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
4227       ,)))))
4228     -> CmmExpr -> CmmExpr -- the two arguments
4229     -> NatM Register
4230
4231 #ifndef powerpc_TARGET_ARCH
4232 trivialFCode
4233     :: Width    -- Floating point only
4234     -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
4235       ,IF_ARCH_sparc((Size -> Reg -> Reg -> Reg -> Instr)
4236       ,IF_ARCH_i386 ((Size -> Reg -> Reg -> Reg -> Instr)
4237       ,IF_ARCH_x86_64 ((Size -> Operand -> Operand -> Instr)
4238       ,))))
4239     -> CmmExpr -> CmmExpr -- the two arguments
4240     -> NatM Register
4241 #endif
4242
4243 trivialUCode
4244     :: Size
4245     -> IF_ARCH_alpha((RI -> Reg -> Instr)
4246       ,IF_ARCH_i386 ((Operand -> Instr)
4247       ,IF_ARCH_x86_64 ((Operand -> Instr)
4248       ,IF_ARCH_sparc((RI -> Reg -> Instr)
4249       ,IF_ARCH_powerpc((Reg -> Reg -> Instr)
4250       ,)))))
4251     -> CmmExpr  -- the one argument
4252     -> NatM Register
4253
4254 #ifndef powerpc_TARGET_ARCH
4255 trivialUFCode
4256     :: Size
4257     -> IF_ARCH_alpha((Reg -> Reg -> Instr)
4258       ,IF_ARCH_i386 ((Reg -> Reg -> Instr)
4259       ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr)
4260       ,IF_ARCH_sparc((Reg -> Reg -> Instr)
4261       ,))))
4262     -> CmmExpr -- the one argument
4263     -> NatM Register
4264 #endif
4265
4266 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4267
4268 #if alpha_TARGET_ARCH
4269
4270 trivialCode instr x (StInt y)
4271   | fits8Bits y
4272   = getRegister x               `thenNat` \ register ->
4273     getNewRegNat IntRep         `thenNat` \ tmp ->
4274     let
4275         code = registerCode register tmp
4276         src1 = registerName register tmp
4277         src2 = ImmInt (fromInteger y)
4278         code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst)
4279     in
4280     return (Any IntRep code__2)
4281
4282 trivialCode instr x y
4283   = getRegister x               `thenNat` \ register1 ->
4284     getRegister y               `thenNat` \ register2 ->
4285     getNewRegNat IntRep         `thenNat` \ tmp1 ->
4286     getNewRegNat IntRep         `thenNat` \ tmp2 ->
4287     let
4288         code1 = registerCode register1 tmp1 []
4289         src1  = registerName register1 tmp1
4290         code2 = registerCode register2 tmp2 []
4291         src2  = registerName register2 tmp2
4292         code__2 dst = asmSeqThen [code1, code2] .
4293                      mkSeqInstr (instr src1 (RIReg src2) dst)
4294     in
4295     return (Any IntRep code__2)
4296
4297 ------------
4298 trivialUCode instr x
4299   = getRegister x               `thenNat` \ register ->
4300     getNewRegNat IntRep         `thenNat` \ tmp ->
4301     let
4302         code = registerCode register tmp
4303         src  = registerName register tmp
4304         code__2 dst = code . mkSeqInstr (instr (RIReg src) dst)
4305     in
4306     return (Any IntRep code__2)
4307
4308 ------------
4309 trivialFCode _ instr x y
4310   = getRegister x               `thenNat` \ register1 ->
4311     getRegister y               `thenNat` \ register2 ->
4312     getNewRegNat FF64   `thenNat` \ tmp1 ->
4313     getNewRegNat FF64   `thenNat` \ tmp2 ->
4314     let
4315         code1 = registerCode register1 tmp1
4316         src1  = registerName register1 tmp1
4317
4318         code2 = registerCode register2 tmp2
4319         src2  = registerName register2 tmp2
4320
4321         code__2 dst = asmSeqThen [code1 [], code2 []] .
4322                       mkSeqInstr (instr src1 src2 dst)
4323     in
4324     return (Any FF64 code__2)
4325
4326 trivialUFCode _ instr x
4327   = getRegister x               `thenNat` \ register ->
4328     getNewRegNat FF64   `thenNat` \ tmp ->
4329     let
4330         code = registerCode register tmp
4331         src  = registerName register tmp
4332         code__2 dst = code . mkSeqInstr (instr src dst)
4333     in
4334     return (Any FF64 code__2)
4335
4336 #endif /* alpha_TARGET_ARCH */
4337
4338 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4339
4340 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4341
4342 {-
4343 The Rules of the Game are:
4344
4345 * You cannot assume anything about the destination register dst;
4346   it may be anything, including a fixed reg.
4347
4348 * You may compute an operand into a fixed reg, but you may not
4349   subsequently change the contents of that fixed reg.  If you
4350   want to do so, first copy the value either to a temporary
4351   or into dst.  You are free to modify dst even if it happens
4352   to be a fixed reg -- that's not your problem.
4353
4354 * You cannot assume that a fixed reg will stay live over an
4355   arbitrary computation.  The same applies to the dst reg.
4356
4357 * Temporary regs obtained from getNewRegNat are distinct from
4358   each other and from all other regs, and stay live over
4359   arbitrary computations.
4360
4361 --------------------
4362
4363 SDM's version of The Rules:
4364
4365 * If getRegister returns Any, that means it can generate correct
4366   code which places the result in any register, period.  Even if that
4367   register happens to be read during the computation.
4368
4369   Corollary #1: this means that if you are generating code for an
4370   operation with two arbitrary operands, you cannot assign the result
4371   of the first operand into the destination register before computing
4372   the second operand.  The second operand might require the old value
4373   of the destination register.
4374
4375   Corollary #2: A function might be able to generate more efficient
4376   code if it knows the destination register is a new temporary (and
4377   therefore not read by any of the sub-computations).
4378
4379 * If getRegister returns Any, then the code it generates may modify only:
4380         (a) fresh temporaries
4381         (b) the destination register
4382         (c) known registers (eg. %ecx is used by shifts)
4383   In particular, it may *not* modify global registers, unless the global
4384   register happens to be the destination register.
4385 -}
4386
4387 trivialCode width instr (Just revinstr) (CmmLit lit_a) b
4388   | is32BitLit lit_a = do
4389   b_code <- getAnyReg b
4390   let
4391        code dst
4392          = b_code dst `snocOL`
4393            revinstr (OpImm (litToImm lit_a)) (OpReg dst)
4394   -- in
4395   return (Any (intSize width) code)
4396
4397 trivialCode width instr maybe_revinstr a b
4398   = genTrivialCode (intSize width) instr a b
4399
4400 -- This is re-used for floating pt instructions too.
4401 genTrivialCode rep instr a b = do
4402   (b_op, b_code) <- getNonClobberedOperand b
4403   a_code <- getAnyReg a
4404   tmp <- getNewRegNat rep
4405   let
4406      -- We want the value of b to stay alive across the computation of a.
4407      -- But, we want to calculate a straight into the destination register,
4408      -- because the instruction only has two operands (dst := dst `op` src).
4409      -- The troublesome case is when the result of b is in the same register
4410      -- as the destination reg.  In this case, we have to save b in a
4411      -- new temporary across the computation of a.
4412      code dst
4413         | dst `regClashesWithOp` b_op =
4414                 b_code `appOL`
4415                 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
4416                 a_code dst `snocOL`
4417                 instr (OpReg tmp) (OpReg dst)
4418         | otherwise =
4419                 b_code `appOL`
4420                 a_code dst `snocOL`
4421                 instr b_op (OpReg dst)
4422   -- in
4423   return (Any rep code)
4424
4425 reg `regClashesWithOp` OpReg reg2   = reg == reg2
4426 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
4427 reg `regClashesWithOp` _            = False
4428
4429 -----------
4430
4431 trivialUCode rep instr x = do
4432   x_code <- getAnyReg x
4433   let
4434      code dst =
4435         x_code dst `snocOL`
4436         instr (OpReg dst)
4437   return (Any rep code)
4438
4439 -----------
4440
4441 #if i386_TARGET_ARCH
4442
4443 trivialFCode width instr x y = do
4444   (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
4445   (y_reg, y_code) <- getSomeReg y
4446   let
4447      size = floatSize width
4448      code dst =
4449         x_code `appOL`
4450         y_code `snocOL`
4451         instr size x_reg y_reg dst
4452   return (Any size code)
4453
4454 #endif
4455
4456 #if x86_64_TARGET_ARCH
4457 trivialFCode pk instr x y
4458   = genTrivialCode size (instr size) x y
4459   where size = floatSize pk
4460 #endif
4461
4462 -------------
4463
4464 trivialUFCode size instr x = do
4465   (x_reg, x_code) <- getSomeReg x
4466   let
4467      code dst =
4468         x_code `snocOL`
4469         instr x_reg dst
4470   -- in
4471   return (Any size code)
4472
4473 #endif /* i386_TARGET_ARCH */
4474
4475 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4476
4477 #if sparc_TARGET_ARCH
4478
4479 trivialCode pk instr x (CmmLit (CmmInt y d))
4480   | fits13Bits y
4481   = do
4482       (src1, code) <- getSomeReg x
4483       tmp <- getNewRegNat II32
4484       let
4485         src2 = ImmInt (fromInteger y)
4486         code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
4487       return (Any II32 code__2)
4488
4489 trivialCode pk instr x y = do
4490     (src1, code1) <- getSomeReg x
4491     (src2, code2) <- getSomeReg y
4492     tmp1 <- getNewRegNat II32
4493     tmp2 <- getNewRegNat II32
4494     let
4495         code__2 dst = code1 `appOL` code2 `snocOL`
4496                       instr src1 (RIReg src2) dst
4497     return (Any II32 code__2)
4498
4499 ------------
4500 trivialFCode pk instr x y = do
4501     (src1, code1) <- getSomeReg x
4502     (src2, code2) <- getSomeReg y
4503     tmp1 <- getNewRegNat (cmmExprType x)
4504     tmp2 <- getNewRegNat (cmmExprType y)
4505     tmp <- getNewRegNat FF64
4506     let
4507         promote x = FxTOy FF32 FF64 x tmp
4508
4509         pk1   = cmmExprType x
4510         pk2   = cmmExprType y
4511
4512         code__2 dst =
4513                 if pk1 == pk2 then
4514                     code1 `appOL` code2 `snocOL`
4515                     instr (floatSize pk) src1 src2 dst
4516                 else if typeWidth pk1 == W32 then
4517                     code1 `snocOL` promote src1 `appOL` code2 `snocOL`
4518                     instr FF64 tmp src2 dst
4519                 else
4520                     code1 `appOL` code2 `snocOL` promote src2 `snocOL`
4521                     instr FF64 src1 tmp dst
4522     return (Any (if pk1 == pk2 then pk1 else cmmFloat W64) code__2)
4523
4524 ------------
4525 trivialUCode size instr x = do
4526     (src, code) <- getSomeReg x
4527     tmp <- getNewRegNat size
4528     let
4529         code__2 dst = code `snocOL` instr (RIReg src) dst
4530     return (Any size code__2)
4531
4532 -------------
4533 trivialUFCode pk instr x = do
4534     (src, code) <- getSomeReg x
4535     tmp <- getNewRegNat pk
4536     let
4537         code__2 dst = code `snocOL` instr src dst
4538     return (Any pk code__2)
4539
4540 #endif /* sparc_TARGET_ARCH */
4541
4542 #if powerpc_TARGET_ARCH
4543
4544 {-
4545 Wolfgang's PowerPC version of The Rules:
4546
4547 A slightly modified version of The Rules to take advantage of the fact
4548 that PowerPC instructions work on all registers and don't implicitly
4549 clobber any fixed registers.
4550
4551 * The only expression for which getRegister returns Fixed is (CmmReg reg).
4552
4553 * If getRegister returns Any, then the code it generates may modify only:
4554         (a) fresh temporaries
4555         (b) the destination register
4556   It may *not* modify global registers, unless the global
4557   register happens to be the destination register.
4558   It may not clobber any other registers. In fact, only ccalls clobber any
4559   fixed registers.
4560   Also, it may not modify the counter register (used by genCCall).
4561
4562   Corollary: If a getRegister for a subexpression returns Fixed, you need
4563   not move it to a fresh temporary before evaluating the next subexpression.
4564   The Fixed register won't be modified.
4565   Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
4566
4567 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
4568   the value of the destination register.
4569 -}
4570
4571 trivialCode rep signed instr x (CmmLit (CmmInt y _))
4572     | Just imm <- makeImmediate rep signed y
4573     = do
4574         (src1, code1) <- getSomeReg x
4575         let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
4576         return (Any (intSize rep) code)
4577
4578 trivialCode rep signed instr x y = do
4579     (src1, code1) <- getSomeReg x
4580     (src2, code2) <- getSomeReg y
4581     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
4582     return (Any (intSize rep) code)
4583
4584 trivialCodeNoImm' :: Size -> (Reg -> Reg -> Reg -> Instr)
4585                  -> CmmExpr -> CmmExpr -> NatM Register
4586 trivialCodeNoImm' size instr x y = do
4587     (src1, code1) <- getSomeReg x
4588     (src2, code2) <- getSomeReg y
4589     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
4590     return (Any size code)
4591
4592 trivialCodeNoImm :: Size -> (Size -> Reg -> Reg -> Reg -> Instr)
4593                  -> CmmExpr -> CmmExpr -> NatM Register
4594 trivialCodeNoImm size instr x y = trivialCodeNoImm' size (instr size) x y
4595
4596 trivialUCode rep instr x = do
4597     (src, code) <- getSomeReg x
4598     let code' dst = code `snocOL` instr dst src
4599     return (Any rep code')
4600
4601 -- There is no "remainder" instruction on the PPC, so we have to do
4602 -- it the hard way.
4603 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
4604
4605 remainderCode :: Width -> (Reg -> Reg -> Reg -> Instr)
4606     -> CmmExpr -> CmmExpr -> NatM Register
4607 remainderCode rep div x y = do
4608     (src1, code1) <- getSomeReg x
4609     (src2, code2) <- getSomeReg y
4610     let code dst = code1 `appOL` code2 `appOL` toOL [
4611                 div dst src1 src2,
4612                 MULLW dst dst (RIReg src2),
4613                 SUBF dst dst src1
4614             ]
4615     return (Any (intSize rep) code)
4616
4617 #endif /* powerpc_TARGET_ARCH */
4618
4619
4620 -- -----------------------------------------------------------------------------
4621 --  Coercing to/from integer/floating-point...
4622
4623 -- When going to integer, we truncate (round towards 0).
4624
4625 -- @coerce(Int2FP|FP2Int)@ are more complicated integer/float
4626 -- conversions.  We have to store temporaries in memory to move
4627 -- between the integer and the floating point register sets.
4628
4629 -- @coerceDbl2Flt@ and @coerceFlt2Dbl@ are done this way because we
4630 -- pretend, on sparc at least, that double and float regs are seperate
4631 -- kinds, so the value has to be computed into one kind before being
4632 -- explicitly "converted" to live in the other kind.
4633
4634 coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
4635 coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
4636
4637 #if sparc_TARGET_ARCH
4638 coerceDbl2Flt :: CmmExpr -> NatM Register
4639 coerceFlt2Dbl :: CmmExpr -> NatM Register
4640 #endif
4641
4642 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4643
4644 #if alpha_TARGET_ARCH
4645
4646 coerceInt2FP _ x
4647   = getRegister x               `thenNat` \ register ->
4648     getNewRegNat IntRep         `thenNat` \ reg ->
4649     let
4650         code = registerCode register reg
4651         src  = registerName register reg
4652
4653         code__2 dst = code . mkSeqInstrs [
4654             ST Q src (spRel 0),
4655             LD TF dst (spRel 0),
4656             CVTxy Q TF dst dst]
4657     in
4658     return (Any FF64 code__2)
4659
4660 -------------
4661 coerceFP2Int x
4662   = getRegister x               `thenNat` \ register ->
4663     getNewRegNat FF64   `thenNat` \ tmp ->
4664     let
4665         code = registerCode register tmp
4666         src  = registerName register tmp
4667
4668         code__2 dst = code . mkSeqInstrs [
4669             CVTxy TF Q src tmp,
4670             ST TF tmp (spRel 0),
4671             LD Q dst (spRel 0)]
4672     in
4673     return (Any IntRep code__2)
4674
4675 #endif /* alpha_TARGET_ARCH */
4676
4677 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4678
4679 #if i386_TARGET_ARCH
4680
4681 coerceInt2FP from to x = do
4682   (x_reg, x_code) <- getSomeReg x
4683   let
4684         opc  = case to of W32 -> GITOF; W64 -> GITOD
4685         code dst = x_code `snocOL` opc x_reg dst
4686         -- ToDo: works for non-II32 reps?
4687   return (Any (floatSize to) code)
4688
4689 ------------
4690
4691 coerceFP2Int from to x = do
4692   (x_reg, x_code) <- getSomeReg x
4693   let
4694         opc  = case from of W32 -> GFTOI; W64 -> GDTOI
4695         code dst = x_code `snocOL` opc x_reg dst
4696         -- ToDo: works for non-II32 reps?
4697   -- in
4698   return (Any (intSize to) code)
4699
4700 #endif /* i386_TARGET_ARCH */
4701
4702 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4703
4704 #if x86_64_TARGET_ARCH
4705
4706 coerceFP2Int from to x = do
4707   (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
4708   let
4709         opc  = case from of W32 -> CVTTSS2SIQ; W64 -> CVTTSD2SIQ
4710         code dst = x_code `snocOL` opc x_op dst
4711   -- in
4712   return (Any (intSize to) code) -- works even if the destination rep is <II32
4713
4714 coerceInt2FP from to x = do
4715   (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
4716   let
4717         opc  = case to of W32 -> CVTSI2SS; W64 -> CVTSI2SD
4718         code dst = x_code `snocOL` opc x_op dst
4719   -- in
4720   return (Any (floatSize to) code) -- works even if the destination rep is <II32
4721
4722 coerceFP2FP :: Width -> CmmExpr -> NatM Register
4723 coerceFP2FP to x = do
4724   (x_reg, x_code) <- getSomeReg x
4725   let
4726         opc  = case to of W32 -> CVTSD2SS; W64 -> CVTSS2SD
4727         code dst = x_code `snocOL` opc x_reg dst
4728   -- in
4729   return (Any (floatSize to) code)
4730 #endif
4731
4732 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4733
4734 #if sparc_TARGET_ARCH
4735
4736 coerceInt2FP pk1 pk2 x = do
4737     (src, code) <- getSomeReg x
4738     let
4739         code__2 dst = code `appOL` toOL [
4740             ST pk1 src (spRel (-2)),
4741             LD pk1 (spRel (-2)) dst,
4742             FxTOy pk1 pk2 dst dst]
4743     return (Any pk2 code__2)
4744
4745 ------------
4746 coerceFP2Int pk fprep x = do
4747     (src, code) <- getSomeReg x
4748     reg <- getNewRegNat fprep
4749     tmp <- getNewRegNat pk
4750     let
4751         code__2 dst = ASSERT(fprep == FF64 || fprep == FF32)
4752             code `appOL` toOL [
4753             FxTOy fprep pk src tmp,
4754             ST pk tmp (spRel (-2)),
4755             LD pk (spRel (-2)) dst]
4756     return (Any pk code__2)
4757
4758 ------------
4759 coerceDbl2Flt x = do
4760     (src, code) <- getSomeReg x
4761     return (Any FF32 (\dst -> code `snocOL` FxTOy FF64 FF32 src dst))
4762
4763 ------------
4764 coerceFlt2Dbl x = do
4765     (src, code) <- getSomeReg x
4766     return (Any FF64 (\dst -> code `snocOL` FxTOy FF32 FF64 src dst))
4767
4768 #endif /* sparc_TARGET_ARCH */
4769
4770 #if powerpc_TARGET_ARCH
4771 coerceInt2FP fromRep toRep x = do
4772     (src, code) <- getSomeReg x
4773     lbl <- getNewLabelNat
4774     itmp <- getNewRegNat II32
4775     ftmp <- getNewRegNat FF64
4776     dflags <- getDynFlagsNat
4777     dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
4778     Amode addr addr_code <- getAmode dynRef
4779     let
4780         code' dst = code `appOL` maybe_exts `appOL` toOL [
4781                 LDATA ReadOnlyData
4782                                 [CmmDataLabel lbl,
4783                                  CmmStaticLit (CmmInt 0x43300000 W32),
4784                                  CmmStaticLit (CmmInt 0x80000000 W32)],
4785                 XORIS itmp src (ImmInt 0x8000),
4786                 ST II32 itmp (spRel 3),
4787                 LIS itmp (ImmInt 0x4330),
4788                 ST II32 itmp (spRel 2),
4789                 LD FF64 ftmp (spRel 2)
4790             ] `appOL` addr_code `appOL` toOL [
4791                 LD FF64 dst addr,
4792                 FSUB FF64 dst ftmp dst
4793             ] `appOL` maybe_frsp dst
4794
4795         maybe_exts = case fromRep of
4796                         W8 ->  unitOL $ EXTS II8 src src
4797                         W16 -> unitOL $ EXTS II16 src src
4798                         W32 -> nilOL
4799         maybe_frsp dst = case toRep of
4800                         W32 -> unitOL $ FRSP dst dst
4801                         W64 -> nilOL
4802     return (Any (floatSize toRep) code')
4803
4804 coerceFP2Int fromRep toRep x = do
4805     -- the reps don't really matter: F*->FF64 and II32->I* are no-ops
4806     (src, code) <- getSomeReg x
4807     tmp <- getNewRegNat FF64
4808     let
4809         code' dst = code `appOL` toOL [
4810                 -- convert to int in FP reg
4811             FCTIWZ tmp src,
4812                 -- store value (64bit) from FP to stack
4813             ST FF64 tmp (spRel 2),
4814                 -- read low word of value (high word is undefined)
4815             LD II32 dst (spRel 3)]
4816     return (Any (intSize toRep) code')
4817 #endif /* powerpc_TARGET_ARCH */
4818
4819
4820 -- -----------------------------------------------------------------------------
4821 -- eXTRA_STK_ARGS_HERE
4822
4823 -- We (allegedly) put the first six C-call arguments in registers;
4824 -- where do we start putting the rest of them?
4825
4826 -- Moved from MachInstrs (SDM):
4827
4828 #if alpha_TARGET_ARCH || sparc_TARGET_ARCH
4829 eXTRA_STK_ARGS_HERE :: Int
4830 eXTRA_STK_ARGS_HERE
4831   = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???))
4832 #endif