compiler/nativeGen/MachCodeGen.hs

   1 {-# OPTIONS -w #-}
   2 -- The above warning supression flag is a temporary kludge.
   3 -- While working on this module you are encouraged to remove it and fix
   4 -- any warnings in the module. See
   5 --     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
   6 -- for details
   7
   8 -----------------------------------------------------------------------------
   9 --
  10 -- Generating machine code (instruction selection)
  11 --
  12 -- (c) The University of Glasgow 1996-2004
  13 --
  14 -----------------------------------------------------------------------------
  15
  16 -- This is a big module, but, if you pay attention to
  17 -- (a) the sectioning, (b) the type signatures, and
  18 -- (c) the #if blah_TARGET_ARCH} things, the
  19 -- structure should not be too overwhelming.
  20
  21 module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where
  22
  23 #include "HsVersions.h"
  24 #include "nativeGen/NCG.h"
  25 #include "MachDeps.h"
  26
  27 -- NCG stuff:
  28 import MachInstrs
  29 import MachRegs
  30 import NCGMonad
  31 import PositionIndependentCode
  32 import RegAllocInfo ( mkBranchInstr )
  33
  34 -- Our intermediate code:
  35 import BlockId
  36 import PprCmm           ( pprExpr )
  37 import Cmm
  38 import MachOp
  39 import CLabel
  40 import ClosureInfo      ( C_SRT(..) )
  41
  42 -- The rest:
  43 import StaticFlags      ( opt_PIC )
  44 import ForeignCall      ( CCallConv(..) )
  45 import OrdList
  46 import Pretty
  47 import Outputable
  48 import FastString
  49 import FastBool         ( isFastTrue )
  50 import Constants        ( wORD_SIZE )
  51
  52 import Debug.Trace      ( trace )
  53
  54 import Control.Monad    ( mapAndUnzipM )
  55 import Data.Maybe       ( fromJust )
  56 import Data.Bits
  57 import Data.Word
  58 import Data.Int
  59
  60 -- -----------------------------------------------------------------------------
  61 -- Top-level of the instruction selector
  62
  63 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
  64 -- They are really trees of insns to facilitate fast appending, where a
  65 -- left-to-right traversal (pre-order?) yields the insns in the correct
  66 -- order.
  67
  68 type InstrBlock = OrdList Instr
  69
  70 cmmTopCodeGen :: RawCmmTop -> NatM [NatCmmTop]
  71 cmmTopCodeGen (CmmProc info lab params (ListGraph blocks)) = do
  72   (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
  73   picBaseMb <- getPicBaseMaybeNat
  74   let proc = CmmProc info lab params (ListGraph $ concat nat_blocks)
  75       tops = proc : concat statics
  76   case picBaseMb of
  77       Just picBase -> initializePicBase picBase tops
  78       Nothing -> return tops
  79
  80 cmmTopCodeGen (CmmData sec dat) = do
  81   return [CmmData sec dat]  -- no translation, we just use CmmStatic
  82
  83 basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop])
  84 basicBlockCodeGen (BasicBlock id stmts) = do
  85   instrs <- stmtsToInstrs stmts
  86   -- code generation may introduce new basic block boundaries, which
  87   -- are indicated by the NEWBLOCK instruction.  We must split up the
  88   -- instruction stream into basic blocks again.  Also, we extract
  89   -- LDATAs here too.
  90   let
  91         (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
  92
  93         mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
  94           = ([], BasicBlock id instrs : blocks, statics)
  95         mkBlocks (LDATA sec dat) (instrs,blocks,statics)
  96           = (instrs, blocks, CmmData sec dat:statics)
  97         mkBlocks instr (instrs,blocks,statics)
  98           = (instr:instrs, blocks, statics)
  99   -- in
 100   return (BasicBlock id top : other_blocks, statics)
 101
 102 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
 103 stmtsToInstrs stmts
 104    = do instrss <- mapM stmtToInstrs stmts
 105         return (concatOL instrss)
 106
 107 stmtToInstrs :: CmmStmt -> NatM InstrBlock
 108 stmtToInstrs stmt = case stmt of
 109     CmmNop         -> return nilOL
 110     CmmComment s   -> return (unitOL (COMMENT s))
 111
 112     CmmAssign reg src
 113       | isFloatingRep kind -> assignReg_FltCode kind reg src
 114 #if WORD_SIZE_IN_BITS==32
 115       | kind == I64        -> assignReg_I64Code      reg src
 116 #endif
 117       | otherwise          -> assignReg_IntCode kind reg src
 118         where kind = cmmRegRep reg
 119
 120     CmmStore addr src
 121       | isFloatingRep kind -> assignMem_FltCode kind addr src
 122 #if WORD_SIZE_IN_BITS==32
 123       | kind == I64      -> assignMem_I64Code      addr src
 124 #endif
 125       | otherwise        -> assignMem_IntCode kind addr src
 126         where kind = cmmExprRep src
 127
 128     CmmCall target result_regs args _ _
 129        -> genCCall target result_regs args
 130
 131     CmmBranch id          -> genBranch id
 132     CmmCondBranch arg id  -> genCondJump id arg
 133     CmmSwitch arg ids     -> genSwitch arg ids
 134     CmmJump arg params    -> genJump arg
 135     CmmReturn params      ->
 136       panic "stmtToInstrs: return statement should have been cps'd away"
 137
 138 -- -----------------------------------------------------------------------------
 139 -- General things for putting together code sequences
 140
 141 -- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
 142 -- CmmExprs into CmmRegOff?
 143 mangleIndexTree :: CmmExpr -> CmmExpr
 144 mangleIndexTree (CmmRegOff reg off)
 145   = CmmMachOp (MO_Add rep) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) rep)]
 146   where rep = cmmRegRep reg
 147
 148 -- -----------------------------------------------------------------------------
 149 --  Code gen for 64-bit arithmetic on 32-bit platforms
 150
 151 {-
 152 Simple support for generating 64-bit code (ie, 64 bit values and 64
 153 bit assignments) on 32-bit platforms.  Unlike the main code generator
 154 we merely shoot for generating working code as simply as possible, and
 155 pay little attention to code quality.  Specifically, there is no
 156 attempt to deal cleverly with the fixed-vs-floating register
 157 distinction; all values are generated into (pairs of) floating
 158 registers, even if this would mean some redundant reg-reg moves as a
 159 result.  Only one of the VRegUniques is returned, since it will be
 160 of the VRegUniqueLo form, and the upper-half VReg can be determined
 161 by applying getHiVRegFromLo to it.
 162 -}
 163
 164 data ChildCode64        -- a.k.a "Register64"
 165    = ChildCode64
 166         InstrBlock      -- code
 167         Reg             -- the lower 32-bit temporary which contains the
 168                         -- result; use getHiVRegFromLo to find the other
 169                         -- VRegUnique.  Rules of this simplified insn
 170                         -- selection game are therefore that the returned
 171                         -- Reg may be modified
 172
 173 #if WORD_SIZE_IN_BITS==32
 174 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
 175 assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
 176 #endif
 177
 178 #ifndef x86_64_TARGET_ARCH
 179 iselExpr64        :: CmmExpr -> NatM ChildCode64
 180 #endif
 181
 182 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 183
 184 #if i386_TARGET_ARCH
 185
 186 assignMem_I64Code addrTree valueTree = do
 187   Amode addr addr_code <- getAmode addrTree
 188   ChildCode64 vcode rlo <- iselExpr64 valueTree
 189   let
 190         rhi = getHiVRegFromLo rlo
 191
 192         -- Little-endian store
 193         mov_lo = MOV I32 (OpReg rlo) (OpAddr addr)
 194         mov_hi = MOV I32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
 195   -- in
 196   return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 197
 198
 199 assignReg_I64Code (CmmLocal (LocalReg u_dst pk _)) valueTree = do
 200    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 201    let
 202          r_dst_lo = mkVReg u_dst I32
 203          r_dst_hi = getHiVRegFromLo r_dst_lo
 204          r_src_hi = getHiVRegFromLo r_src_lo
 205          mov_lo = MOV I32 (OpReg r_src_lo) (OpReg r_dst_lo)
 206          mov_hi = MOV I32 (OpReg r_src_hi) (OpReg r_dst_hi)
 207    -- in
 208    return (
 209         vcode `snocOL` mov_lo `snocOL` mov_hi
 210      )
 211
 212 assignReg_I64Code lvalue valueTree
 213    = panic "assignReg_I64Code(i386): invalid lvalue"
 214
 215 ------------
 216
 217 iselExpr64 (CmmLit (CmmInt i _)) = do
 218   (rlo,rhi) <- getNewRegPairNat I32
 219   let
 220         r = fromIntegral (fromIntegral i :: Word32)
 221         q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
 222         code = toOL [
 223                 MOV I32 (OpImm (ImmInteger r)) (OpReg rlo),
 224                 MOV I32 (OpImm (ImmInteger q)) (OpReg rhi)
 225                 ]
 226   -- in
 227   return (ChildCode64 code rlo)
 228
 229 iselExpr64 (CmmLoad addrTree I64) = do
 230    Amode addr addr_code <- getAmode addrTree
 231    (rlo,rhi) <- getNewRegPairNat I32
 232    let
 233         mov_lo = MOV I32 (OpAddr addr) (OpReg rlo)
 234         mov_hi = MOV I32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
 235    -- in
 236    return (
 237             ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
 238                         rlo
 239      )
 240
 241 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64 _)))
 242    = return (ChildCode64 nilOL (mkVReg vu I32))
 243
 244 -- we handle addition, but rather badly
 245 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
 246    ChildCode64 code1 r1lo <- iselExpr64 e1
 247    (rlo,rhi) <- getNewRegPairNat I32
 248    let
 249         r = fromIntegral (fromIntegral i :: Word32)
 250         q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
 251         r1hi = getHiVRegFromLo r1lo
 252         code =  code1 `appOL`
 253                 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
 254                        ADD I32 (OpImm (ImmInteger r)) (OpReg rlo),
 255                        MOV I32 (OpReg r1hi) (OpReg rhi),
 256                        ADC I32 (OpImm (ImmInteger q)) (OpReg rhi) ]
 257    -- in
 258    return (ChildCode64 code rlo)
 259
 260 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
 261    ChildCode64 code1 r1lo <- iselExpr64 e1
 262    ChildCode64 code2 r2lo <- iselExpr64 e2
 263    (rlo,rhi) <- getNewRegPairNat I32
 264    let
 265         r1hi = getHiVRegFromLo r1lo
 266         r2hi = getHiVRegFromLo r2lo
 267         code =  code1 `appOL`
 268                 code2 `appOL`
 269                 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
 270                        ADD I32 (OpReg r2lo) (OpReg rlo),
 271                        MOV I32 (OpReg r1hi) (OpReg rhi),
 272                        ADC I32 (OpReg r2hi) (OpReg rhi) ]
 273    -- in
 274    return (ChildCode64 code rlo)
 275
 276 iselExpr64 (CmmMachOp (MO_U_Conv _ I64) [expr]) = do
 277      fn <- getAnyReg expr
 278      r_dst_lo <-  getNewRegNat I32
 279      let r_dst_hi = getHiVRegFromLo r_dst_lo
 280          code = fn r_dst_lo
 281      return (
 282              ChildCode64 (code `snocOL`
 283                           MOV I32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
 284                           r_dst_lo
 285             )
 286
 287 iselExpr64 expr
 288    = pprPanic "iselExpr64(i386)" (ppr expr)
 289
 290 #endif /* i386_TARGET_ARCH */
 291
 292 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 293
 294 #if sparc_TARGET_ARCH
 295
 296 assignMem_I64Code addrTree valueTree = do
 297      Amode addr addr_code <- getAmode addrTree
 298      ChildCode64 vcode rlo <- iselExpr64 valueTree
 299      (src, code) <- getSomeReg addrTree
 300      let
 301          rhi = getHiVRegFromLo rlo
 302          -- Big-endian store
 303          mov_hi = ST I32 rhi (AddrRegImm src (ImmInt 0))
 304          mov_lo = ST I32 rlo (AddrRegImm src (ImmInt 4))
 305      return (vcode `appOL` code `snocOL` mov_hi `snocOL` mov_lo)
 306
 307 assignReg_I64Code (CmmLocal (LocalReg u_dst pk _)) valueTree = do
 308      ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 309      let
 310          r_dst_lo = mkVReg u_dst pk
 311          r_dst_hi = getHiVRegFromLo r_dst_lo
 312          r_src_hi = getHiVRegFromLo r_src_lo
 313          mov_lo = mkMOV r_src_lo r_dst_lo
 314          mov_hi = mkMOV r_src_hi r_dst_hi
 315          mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
 316      return (vcode `snocOL` mov_hi `snocOL` mov_lo)
 317 assignReg_I64Code lvalue valueTree
 318    = panic "assignReg_I64Code(sparc): invalid lvalue"
 319
 320
 321 -- Don't delete this -- it's very handy for debugging.
 322 --iselExpr64 expr
 323 --   | trace ("iselExpr64: " ++ showSDoc (ppr expr)) False
 324 --   = panic "iselExpr64(???)"
 325
 326 iselExpr64 (CmmLoad addrTree I64) = do
 327      Amode (AddrRegReg r1 r2) addr_code <- getAmode addrTree
 328      rlo <- getNewRegNat I32
 329      let rhi = getHiVRegFromLo rlo
 330          mov_hi = LD I32 (AddrRegImm r1 (ImmInt 0)) rhi
 331          mov_lo = LD I32 (AddrRegImm r1 (ImmInt 4)) rlo
 332      return (
 333             ChildCode64 (addr_code `snocOL` mov_hi `snocOL` mov_lo)
 334                          rlo
 335           )
 336
 337 iselExpr64 (CmmReg (CmmLocal (LocalReg uq I64 _))) = do
 338      r_dst_lo <-  getNewRegNat I32
 339      let r_dst_hi = getHiVRegFromLo r_dst_lo
 340          r_src_lo = mkVReg uq I32
 341          r_src_hi = getHiVRegFromLo r_src_lo
 342          mov_lo = mkMOV r_src_lo r_dst_lo
 343          mov_hi = mkMOV r_src_hi r_dst_hi
 344          mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
 345      return (
 346             ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo
 347          )
 348
 349 iselExpr64 expr
 350    = pprPanic "iselExpr64(sparc)" (ppr expr)
 351
 352 #endif /* sparc_TARGET_ARCH */
 353
 354 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 355
 356 #if powerpc_TARGET_ARCH
 357
 358 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
 359 getI64Amodes addrTree = do
 360     Amode hi_addr addr_code <- getAmode addrTree
 361     case addrOffset hi_addr 4 of
 362         Just lo_addr -> return (hi_addr, lo_addr, addr_code)
 363         Nothing      -> do (hi_ptr, code) <- getSomeReg addrTree
 364                            return (AddrRegImm hi_ptr (ImmInt 0),
 365                                    AddrRegImm hi_ptr (ImmInt 4),
 366                                    code)
 367
 368 assignMem_I64Code addrTree valueTree = do
 369         (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
 370         ChildCode64 vcode rlo <- iselExpr64 valueTree
 371         let
 372                 rhi = getHiVRegFromLo rlo
 373
 374                 -- Big-endian store
 375                 mov_hi = ST I32 rhi hi_addr
 376                 mov_lo = ST I32 rlo lo_addr
 377         -- in
 378         return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 379
 380 assignReg_I64Code (CmmLocal (LocalReg u_dst pk _)) valueTree = do
 381    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 382    let
 383          r_dst_lo = mkVReg u_dst I32
 384          r_dst_hi = getHiVRegFromLo r_dst_lo
 385          r_src_hi = getHiVRegFromLo r_src_lo
 386          mov_lo = MR r_dst_lo r_src_lo
 387          mov_hi = MR r_dst_hi r_src_hi
 388    -- in
 389    return (
 390         vcode `snocOL` mov_lo `snocOL` mov_hi
 391      )
 392
 393 assignReg_I64Code lvalue valueTree
 394    = panic "assignReg_I64Code(powerpc): invalid lvalue"
 395
 396
 397 -- Don't delete this -- it's very handy for debugging.
 398 --iselExpr64 expr
 399 --   | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
 400 --   = panic "iselExpr64(???)"
 401
 402 iselExpr64 (CmmLoad addrTree I64) = do
 403     (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
 404     (rlo, rhi) <- getNewRegPairNat I32
 405     let mov_hi = LD I32 rhi hi_addr
 406         mov_lo = LD I32 rlo lo_addr
 407     return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
 408                          rlo
 409
 410 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64 _)))
 411    = return (ChildCode64 nilOL (mkVReg vu I32))
 412
 413 iselExpr64 (CmmLit (CmmInt i _)) = do
 414   (rlo,rhi) <- getNewRegPairNat I32
 415   let
 416         half0 = fromIntegral (fromIntegral i :: Word16)
 417         half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
 418         half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
 419         half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
 420
 421         code = toOL [
 422                 LIS rlo (ImmInt half1),
 423                 OR rlo rlo (RIImm $ ImmInt half0),
 424                 LIS rhi (ImmInt half3),
 425                 OR rlo rlo (RIImm $ ImmInt half2)
 426                 ]
 427   -- in
 428   return (ChildCode64 code rlo)
 429
 430 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
 431    ChildCode64 code1 r1lo <- iselExpr64 e1
 432    ChildCode64 code2 r2lo <- iselExpr64 e2
 433    (rlo,rhi) <- getNewRegPairNat I32
 434    let
 435         r1hi = getHiVRegFromLo r1lo
 436         r2hi = getHiVRegFromLo r2lo
 437         code =  code1 `appOL`
 438                 code2 `appOL`
 439                 toOL [ ADDC rlo r1lo r2lo,
 440                        ADDE rhi r1hi r2hi ]
 441    -- in
 442    return (ChildCode64 code rlo)
 443
 444 iselExpr64 (CmmMachOp (MO_U_Conv I32 I64) [expr]) = do
 445     (expr_reg,expr_code) <- getSomeReg expr
 446     (rlo, rhi) <- getNewRegPairNat I32
 447     let mov_hi = LI rhi (ImmInt 0)
 448         mov_lo = MR rlo expr_reg
 449     return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
 450                          rlo
 451 iselExpr64 expr
 452    = pprPanic "iselExpr64(powerpc)" (ppr expr)
 453
 454 #endif /* powerpc_TARGET_ARCH */
 455
 456
 457 -- -----------------------------------------------------------------------------
 458 -- The 'Register' type
 459
 460 -- 'Register's passed up the tree.  If the stix code forces the register
 461 -- to live in a pre-decided machine register, it comes out as @Fixed@;
 462 -- otherwise, it comes out as @Any@, and the parent can decide which
 463 -- register to put it in.
 464
 465 data Register
 466   = Fixed   MachRep Reg InstrBlock
 467   | Any     MachRep (Reg -> InstrBlock)
 468
 469 swizzleRegisterRep :: Register -> MachRep -> Register
 470 swizzleRegisterRep (Fixed _ reg code) rep = Fixed rep reg code
 471 swizzleRegisterRep (Any _ codefn)     rep = Any rep codefn
 472
 473
 474 -- -----------------------------------------------------------------------------
 475 -- Utils based on getRegister, below
 476
 477 -- The dual to getAnyReg: compute an expression into a register, but
 478 -- we don't mind which one it is.
 479 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
 480 getSomeReg expr = do
 481   r <- getRegister expr
 482   case r of
 483     Any rep code -> do
 484         tmp <- getNewRegNat rep
 485         return (tmp, code tmp)
 486     Fixed _ reg code ->
 487         return (reg, code)
 488
 489 -- -----------------------------------------------------------------------------
 490 -- Grab the Reg for a CmmReg
 491
 492 getRegisterReg :: CmmReg -> Reg
 493
 494 getRegisterReg (CmmLocal (LocalReg u pk _))
 495   = mkVReg u pk
 496
 497 getRegisterReg (CmmGlobal mid)
 498   = case get_GlobalReg_reg_or_addr mid of
 499        Left (RealReg rrno) -> RealReg rrno
 500        _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
 501           -- By this stage, the only MagicIds remaining should be the
 502           -- ones which map to a real machine register on this
 503           -- platform.  Hence ...
 504
 505
 506 -- -----------------------------------------------------------------------------
 507 -- Generate code to get a subtree into a Register
 508
 509 -- Don't delete this -- it's very handy for debugging.
 510 --getRegister expr
 511 --   | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False
 512 --   = panic "getRegister(???)"
 513
 514 getRegister :: CmmExpr -> NatM Register
 515
 516 #if !x86_64_TARGET_ARCH
 517     -- on x86_64, we have %rip for PicBaseReg, but it's not a full-featured
 518     -- register, it can only be used for rip-relative addressing.
 519 getRegister (CmmReg (CmmGlobal PicBaseReg))
 520   = do
 521       reg <- getPicBaseNat wordRep
 522       return (Fixed wordRep reg nilOL)
 523 #endif
 524
 525 getRegister (CmmReg reg)
 526   = return (Fixed (cmmRegRep reg) (getRegisterReg reg) nilOL)
 527
 528 getRegister tree@(CmmRegOff _ _)
 529   = getRegister (mangleIndexTree tree)
 530
 531
 532 #if WORD_SIZE_IN_BITS==32
 533     -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
 534     -- TO_W_(x), TO_W_(x >> 32)
 535
 536 getRegister (CmmMachOp (MO_U_Conv I64 I32)
 537              [CmmMachOp (MO_U_Shr I64) [x,CmmLit (CmmInt 32 _)]]) = do
 538   ChildCode64 code rlo <- iselExpr64 x
 539   return $ Fixed I32 (getHiVRegFromLo rlo) code
 540
 541 getRegister (CmmMachOp (MO_S_Conv I64 I32)
 542              [CmmMachOp (MO_U_Shr I64) [x,CmmLit (CmmInt 32 _)]]) = do
 543   ChildCode64 code rlo <- iselExpr64 x
 544   return $ Fixed I32 (getHiVRegFromLo rlo) code
 545
 546 getRegister (CmmMachOp (MO_U_Conv I64 I32) [x]) = do
 547   ChildCode64 code rlo <- iselExpr64 x
 548   return $ Fixed I32 rlo code
 549
 550 getRegister (CmmMachOp (MO_S_Conv I64 I32) [x]) = do
 551   ChildCode64 code rlo <- iselExpr64 x
 552   return $ Fixed I32 rlo code
 553
 554 #endif
 555
 556 -- end of machine-"independent" bit; here we go on the rest...
 557
 558 #if alpha_TARGET_ARCH
 559
 560 getRegister (StDouble d)
 561   = getBlockIdNat                   `thenNat` \ lbl ->
 562     getNewRegNat PtrRep             `thenNat` \ tmp ->
 563     let code dst = mkSeqInstrs [
 564             LDATA RoDataSegment lbl [
 565                     DATA TF [ImmLab (rational d)]
 566                 ],
 567             LDA tmp (AddrImm (ImmCLbl lbl)),
 568             LD TF dst (AddrReg tmp)]
 569     in
 570         return (Any F64 code)
 571
 572 getRegister (StPrim primop [x]) -- unary PrimOps
 573   = case primop of
 574       IntNegOp -> trivialUCode (NEG Q False) x
 575
 576       NotOp    -> trivialUCode NOT x
 577
 578       FloatNegOp  -> trivialUFCode FloatRep  (FNEG TF) x
 579       DoubleNegOp -> trivialUFCode F64 (FNEG TF) x
 580
 581       OrdOp -> coerceIntCode IntRep x
 582       ChrOp -> chrCode x
 583
 584       Float2IntOp  -> coerceFP2Int    x
 585       Int2FloatOp  -> coerceInt2FP pr x
 586       Double2IntOp -> coerceFP2Int    x
 587       Int2DoubleOp -> coerceInt2FP pr x
 588
 589       Double2FloatOp -> coerceFltCode x
 590       Float2DoubleOp -> coerceFltCode x
 591
 592       other_op -> getRegister (StCall fn CCallConv F64 [x])
 593         where
 594           fn = case other_op of
 595                  FloatExpOp    -> fsLit "exp"
 596                  FloatLogOp    -> fsLit "log"
 597                  FloatSqrtOp   -> fsLit "sqrt"
 598                  FloatSinOp    -> fsLit "sin"
 599                  FloatCosOp    -> fsLit "cos"
 600                  FloatTanOp    -> fsLit "tan"
 601                  FloatAsinOp   -> fsLit "asin"
 602                  FloatAcosOp   -> fsLit "acos"
 603                  FloatAtanOp   -> fsLit "atan"
 604                  FloatSinhOp   -> fsLit "sinh"
 605                  FloatCoshOp   -> fsLit "cosh"
 606                  FloatTanhOp   -> fsLit "tanh"
 607                  DoubleExpOp   -> fsLit "exp"
 608                  DoubleLogOp   -> fsLit "log"
 609                  DoubleSqrtOp  -> fsLit "sqrt"
 610                  DoubleSinOp   -> fsLit "sin"
 611                  DoubleCosOp   -> fsLit "cos"
 612                  DoubleTanOp   -> fsLit "tan"
 613                  DoubleAsinOp  -> fsLit "asin"
 614                  DoubleAcosOp  -> fsLit "acos"
 615                  DoubleAtanOp  -> fsLit "atan"
 616                  DoubleSinhOp  -> fsLit "sinh"
 617                  DoubleCoshOp  -> fsLit "cosh"
 618                  DoubleTanhOp  -> fsLit "tanh"
 619   where
 620     pr = panic "MachCode.getRegister: no primrep needed for Alpha"
 621
 622 getRegister (StPrim primop [x, y]) -- dyadic PrimOps
 623   = case primop of
 624       CharGtOp -> trivialCode (CMP LTT) y x
 625       CharGeOp -> trivialCode (CMP LE) y x
 626       CharEqOp -> trivialCode (CMP EQQ) x y
 627       CharNeOp -> int_NE_code x y
 628       CharLtOp -> trivialCode (CMP LTT) x y
 629       CharLeOp -> trivialCode (CMP LE) x y
 630
 631       IntGtOp  -> trivialCode (CMP LTT) y x
 632       IntGeOp  -> trivialCode (CMP LE) y x
 633       IntEqOp  -> trivialCode (CMP EQQ) x y
 634       IntNeOp  -> int_NE_code x y
 635       IntLtOp  -> trivialCode (CMP LTT) x y
 636       IntLeOp  -> trivialCode (CMP LE) x y
 637
 638       WordGtOp -> trivialCode (CMP ULT) y x
 639       WordGeOp -> trivialCode (CMP ULE) x y
 640       WordEqOp -> trivialCode (CMP EQQ)  x y
 641       WordNeOp -> int_NE_code x y
 642       WordLtOp -> trivialCode (CMP ULT) x y
 643       WordLeOp -> trivialCode (CMP ULE) x y
 644
 645       AddrGtOp -> trivialCode (CMP ULT) y x
 646       AddrGeOp -> trivialCode (CMP ULE) y x
 647       AddrEqOp -> trivialCode (CMP EQQ)  x y
 648       AddrNeOp -> int_NE_code x y
 649       AddrLtOp -> trivialCode (CMP ULT) x y
 650       AddrLeOp -> trivialCode (CMP ULE) x y
 651
 652       FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
 653       FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
 654       FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
 655       FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
 656       FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
 657       FloatLeOp -> cmpF_code (FCMP TF LE) NE x y
 658
 659       DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
 660       DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
 661       DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
 662       DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
 663       DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
 664       DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y
 665
 666       IntAddOp  -> trivialCode (ADD Q False) x y
 667       IntSubOp  -> trivialCode (SUB Q False) x y
 668       IntMulOp  -> trivialCode (MUL Q False) x y
 669       IntQuotOp -> trivialCode (DIV Q False) x y
 670       IntRemOp  -> trivialCode (REM Q False) x y
 671
 672       WordAddOp  -> trivialCode (ADD Q False) x y
 673       WordSubOp  -> trivialCode (SUB Q False) x y
 674       WordMulOp  -> trivialCode (MUL Q False) x y
 675       WordQuotOp -> trivialCode (DIV Q True) x y
 676       WordRemOp  -> trivialCode (REM Q True) x y
 677
 678       FloatAddOp -> trivialFCode  FloatRep (FADD TF) x y
 679       FloatSubOp -> trivialFCode  FloatRep (FSUB TF) x y
 680       FloatMulOp -> trivialFCode  FloatRep (FMUL TF) x y
 681       FloatDivOp -> trivialFCode  FloatRep (FDIV TF) x y
 682
 683       DoubleAddOp -> trivialFCode  F64 (FADD TF) x y
 684       DoubleSubOp -> trivialFCode  F64 (FSUB TF) x y
 685       DoubleMulOp -> trivialFCode  F64 (FMUL TF) x y
 686       DoubleDivOp -> trivialFCode  F64 (FDIV TF) x y
 687
 688       AddrAddOp  -> trivialCode (ADD Q False) x y
 689       AddrSubOp  -> trivialCode (SUB Q False) x y
 690       AddrRemOp  -> trivialCode (REM Q True) x y
 691
 692       AndOp  -> trivialCode AND x y
 693       OrOp   -> trivialCode OR  x y
 694       XorOp  -> trivialCode XOR x y
 695       SllOp  -> trivialCode SLL x y
 696       SrlOp  -> trivialCode SRL x y
 697
 698       ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
 699       ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
 700       ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"
 701
 702       FloatPowerOp  -> getRegister (StCall (fsLit "pow") CCallConv F64 [x,y])
 703       DoublePowerOp -> getRegister (StCall (fsLit "pow") CCallConv F64 [x,y])
 704   where
 705     {- ------------------------------------------------------------
 706         Some bizarre special code for getting condition codes into
 707         registers.  Integer non-equality is a test for equality
 708         followed by an XOR with 1.  (Integer comparisons always set
 709         the result register to 0 or 1.)  Floating point comparisons of
 710         any kind leave the result in a floating point register, so we
 711         need to wrangle an integer register out of things.
 712     -}
 713     int_NE_code :: StixTree -> StixTree -> NatM Register
 714
 715     int_NE_code x y
 716       = trivialCode (CMP EQQ) x y       `thenNat` \ register ->
 717         getNewRegNat IntRep             `thenNat` \ tmp ->
 718         let
 719             code = registerCode register tmp
 720             src  = registerName register tmp
 721             code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
 722         in
 723         return (Any IntRep code__2)
 724
 725     {- ------------------------------------------------------------
 726         Comments for int_NE_code also apply to cmpF_code
 727     -}
 728     cmpF_code
 729         :: (Reg -> Reg -> Reg -> Instr)
 730         -> Cond
 731         -> StixTree -> StixTree
 732         -> NatM Register
 733
 734     cmpF_code instr cond x y
 735       = trivialFCode pr instr x y       `thenNat` \ register ->
 736         getNewRegNat F64                `thenNat` \ tmp ->
 737         getBlockIdNat                   `thenNat` \ lbl ->
 738         let
 739             code = registerCode register tmp
 740             result  = registerName register tmp
 741
 742             code__2 dst = code . mkSeqInstrs [
 743                 OR zeroh (RIImm (ImmInt 1)) dst,
 744                 BF cond  result (ImmCLbl lbl),
 745                 OR zeroh (RIReg zeroh) dst,
 746                 NEWBLOCK lbl]
 747         in
 748         return (Any IntRep code__2)
 749       where
 750         pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
 751       ------------------------------------------------------------
 752
 753 getRegister (CmmLoad pk mem)
 754   = getAmode mem                    `thenNat` \ amode ->
 755     let
 756         code = amodeCode amode
 757         src   = amodeAddr amode
 758         size = primRepToSize pk
 759         code__2 dst = code . mkSeqInstr (LD size dst src)
 760     in
 761     return (Any pk code__2)
 762
 763 getRegister (StInt i)
 764   | fits8Bits i
 765   = let
 766         code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
 767     in
 768     return (Any IntRep code)
 769   | otherwise
 770   = let
 771         code dst = mkSeqInstr (LDI Q dst src)
 772     in
 773     return (Any IntRep code)
 774   where
 775     src = ImmInt (fromInteger i)
 776
 777 getRegister leaf
 778   | isJust imm
 779   = let
 780         code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
 781     in
 782     return (Any PtrRep code)
 783   where
 784     imm = maybeImm leaf
 785     imm__2 = case imm of Just x -> x
 786
 787 #endif /* alpha_TARGET_ARCH */
 788
 789 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 790
 791 #if i386_TARGET_ARCH
 792
 793 getRegister (CmmLit (CmmFloat f F32)) = do
 794     lbl <- getNewLabelNat
 795     dflags <- getDynFlagsNat
 796     dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
 797     Amode addr addr_code <- getAmode dynRef
 798     let code dst =
 799             LDATA ReadOnlyData
 800                         [CmmDataLabel lbl,
 801                          CmmStaticLit (CmmFloat f F32)]
 802             `consOL` (addr_code `snocOL`
 803             GLD F32 addr dst)
 804     -- in
 805     return (Any F32 code)
 806
 807
 808 getRegister (CmmLit (CmmFloat d F64))
 809   | d == 0.0
 810   = let code dst = unitOL (GLDZ dst)
 811     in  return (Any F64 code)
 812
 813   | d == 1.0
 814   = let code dst = unitOL (GLD1 dst)
 815     in  return (Any F64 code)
 816
 817   | otherwise = do
 818     lbl <- getNewLabelNat
 819     dflags <- getDynFlagsNat
 820     dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
 821     Amode addr addr_code <- getAmode dynRef
 822     let code dst =
 823             LDATA ReadOnlyData
 824                         [CmmDataLabel lbl,
 825                          CmmStaticLit (CmmFloat d F64)]
 826             `consOL` (addr_code `snocOL`
 827             GLD F64 addr dst)
 828     -- in
 829     return (Any F64 code)
 830
 831 #endif /* i386_TARGET_ARCH */
 832
 833 #if x86_64_TARGET_ARCH
 834
 835 getRegister (CmmLit (CmmFloat 0.0 rep)) = do
 836    let code dst = unitOL  (XOR rep (OpReg dst) (OpReg dst))
 837         -- I don't know why there are xorpd, xorps, and pxor instructions.
 838         -- They all appear to do the same thing --SDM
 839    return (Any rep code)
 840
 841 getRegister (CmmLit (CmmFloat f rep)) = do
 842     lbl <- getNewLabelNat
 843     let code dst = toOL [
 844             LDATA ReadOnlyData
 845                         [CmmDataLabel lbl,
 846                          CmmStaticLit (CmmFloat f rep)],
 847             MOV rep (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 848             ]
 849     -- in
 850     return (Any rep code)
 851
 852 #endif /* x86_64_TARGET_ARCH */
 853
 854 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
 855
 856 -- catch simple cases of zero- or sign-extended load
 857 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad addr _]) = do
 858   code <- intLoadCode (MOVZxL I8) addr
 859   return (Any I32 code)
 860
 861 getRegister (CmmMachOp (MO_S_Conv I8 I32) [CmmLoad addr _]) = do
 862   code <- intLoadCode (MOVSxL I8) addr
 863   return (Any I32 code)
 864
 865 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad addr _]) = do
 866   code <- intLoadCode (MOVZxL I16) addr
 867   return (Any I32 code)
 868
 869 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad addr _]) = do
 870   code <- intLoadCode (MOVSxL I16) addr
 871   return (Any I32 code)
 872
 873 #endif
 874
 875 #if x86_64_TARGET_ARCH
 876
 877 -- catch simple cases of zero- or sign-extended load
 878 getRegister (CmmMachOp (MO_U_Conv I8 I64) [CmmLoad addr _]) = do
 879   code <- intLoadCode (MOVZxL I8) addr
 880   return (Any I64 code)
 881
 882 getRegister (CmmMachOp (MO_S_Conv I8 I64) [CmmLoad addr _]) = do
 883   code <- intLoadCode (MOVSxL I8) addr
 884   return (Any I64 code)
 885
 886 getRegister (CmmMachOp (MO_U_Conv I16 I64) [CmmLoad addr _]) = do
 887   code <- intLoadCode (MOVZxL I16) addr
 888   return (Any I64 code)
 889
 890 getRegister (CmmMachOp (MO_S_Conv I16 I64) [CmmLoad addr _]) = do
 891   code <- intLoadCode (MOVSxL I16) addr
 892   return (Any I64 code)
 893
 894 getRegister (CmmMachOp (MO_U_Conv I32 I64) [CmmLoad addr _]) = do
 895   code <- intLoadCode (MOV I32) addr -- 32-bit loads zero-extend
 896   return (Any I64 code)
 897
 898 getRegister (CmmMachOp (MO_S_Conv I32 I64) [CmmLoad addr _]) = do
 899   code <- intLoadCode (MOVSxL I32) addr
 900   return (Any I64 code)
 901
 902 #endif
 903
 904 #if x86_64_TARGET_ARCH
 905 getRegister (CmmMachOp (MO_Add I64) [CmmReg (CmmGlobal PicBaseReg),
 906                                      CmmLit displacement])
 907     = return $ Any I64 (\dst -> unitOL $
 908         LEA I64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
 909 #endif
 910
 911 #if x86_64_TARGET_ARCH
 912 getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do
 913   x_code <- getAnyReg x
 914   lbl <- getNewLabelNat
 915   let
 916     code dst = x_code dst `appOL` toOL [
 917         -- This is how gcc does it, so it can't be that bad:
 918         LDATA ReadOnlyData16 [
 919                 CmmAlign 16,
 920                 CmmDataLabel lbl,
 921                 CmmStaticLit (CmmInt 0x80000000 I32),
 922                 CmmStaticLit (CmmInt 0 I32),
 923                 CmmStaticLit (CmmInt 0 I32),
 924                 CmmStaticLit (CmmInt 0 I32)
 925         ],
 926         XOR F32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 927                 -- xorps, so we need the 128-bit constant
 928                 -- ToDo: rip-relative
 929         ]
 930   --
 931   return (Any F32 code)
 932
 933 getRegister (CmmMachOp (MO_S_Neg F64) [x]) = do
 934   x_code <- getAnyReg x
 935   lbl <- getNewLabelNat
 936   let
 937         -- This is how gcc does it, so it can't be that bad:
 938     code dst = x_code dst `appOL` toOL [
 939         LDATA ReadOnlyData16 [
 940                 CmmAlign 16,
 941                 CmmDataLabel lbl,
 942                 CmmStaticLit (CmmInt 0x8000000000000000 I64),
 943                 CmmStaticLit (CmmInt 0 I64)
 944         ],
 945                 -- gcc puts an unpck here.  Wonder if we need it.
 946         XOR F64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 947                 -- xorpd, so we need the 128-bit constant
 948         ]
 949   --
 950   return (Any F64 code)
 951 #endif
 952
 953 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
 954
 955 getRegister (CmmMachOp mop [x]) -- unary MachOps
 956   = case mop of
 957 #if i386_TARGET_ARCH
 958       MO_S_Neg F32 -> trivialUFCode F32 (GNEG F32) x
 959       MO_S_Neg F64 -> trivialUFCode F64 (GNEG F64) x
 960 #endif
 961
 962       MO_S_Neg rep -> trivialUCode rep (NEGI rep) x
 963       MO_Not rep   -> trivialUCode rep (NOT  rep) x
 964
 965       -- Nop conversions
 966       MO_U_Conv I32 I8  -> toI8Reg  I32 x
 967       MO_S_Conv I32 I8  -> toI8Reg  I32 x
 968       MO_U_Conv I16 I8  -> toI8Reg  I16 x
 969       MO_S_Conv I16 I8  -> toI8Reg  I16 x
 970       MO_U_Conv I32 I16 -> toI16Reg I32 x
 971       MO_S_Conv I32 I16 -> toI16Reg I32 x
 972 #if x86_64_TARGET_ARCH
 973       MO_U_Conv I64 I32 -> conversionNop I64 x
 974       MO_S_Conv I64 I32 -> conversionNop I64 x
 975       MO_U_Conv I64 I16 -> toI16Reg I64 x
 976       MO_S_Conv I64 I16 -> toI16Reg I64 x
 977       MO_U_Conv I64 I8  -> toI8Reg  I64 x
 978       MO_S_Conv I64 I8  -> toI8Reg  I64 x
 979 #endif
 980
 981       MO_U_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
 982       MO_S_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
 983
 984       -- widenings
 985       MO_U_Conv I8  I32 -> integerExtend I8  I32 MOVZxL x
 986       MO_U_Conv I16 I32 -> integerExtend I16 I32 MOVZxL x
 987       MO_U_Conv I8  I16 -> integerExtend I8  I16 MOVZxL x
 988
 989       MO_S_Conv I8  I32 -> integerExtend I8  I32 MOVSxL x
 990       MO_S_Conv I16 I32 -> integerExtend I16 I32 MOVSxL x
 991       MO_S_Conv I8  I16 -> integerExtend I8  I16 MOVSxL x
 992
 993 #if x86_64_TARGET_ARCH
 994       MO_U_Conv I8  I64 -> integerExtend I8  I64 MOVZxL x
 995       MO_U_Conv I16 I64 -> integerExtend I16 I64 MOVZxL x
 996       MO_U_Conv I32 I64 -> integerExtend I32 I64 MOVZxL x
 997       MO_S_Conv I8  I64 -> integerExtend I8  I64 MOVSxL x
 998       MO_S_Conv I16 I64 -> integerExtend I16 I64 MOVSxL x
 999       MO_S_Conv I32 I64 -> integerExtend I32 I64 MOVSxL x
1000         -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
1001         -- However, we don't want the register allocator to throw it
1002         -- away as an unnecessary reg-to-reg move, so we keep it in
1003         -- the form of a movzl and print it as a movl later.
1004 #endif
1005
1006 #if i386_TARGET_ARCH
1007       MO_S_Conv F32 F64 -> conversionNop F64 x
1008       MO_S_Conv F64 F32 -> conversionNop F32 x
1009 #else
1010       MO_S_Conv F32 F64 -> coerceFP2FP F64 x
1011       MO_S_Conv F64 F32 -> coerceFP2FP F32 x
1012 #endif
1013
1014       MO_S_Conv from to
1015         | isFloatingRep from -> coerceFP2Int from to x
1016         | isFloatingRep to   -> coerceInt2FP from to x
1017
1018       other -> pprPanic "getRegister" (pprMachOp mop)
1019    where
1020         -- signed or unsigned extension.
1021         integerExtend from to instr expr = do
1022             (reg,e_code) <- if from == I8 then getByteReg expr
1023                                           else getSomeReg expr
1024             let
1025                 code dst =
1026                   e_code `snocOL`
1027                   instr from (OpReg reg) (OpReg dst)
1028             return (Any to code)
1029
1030         toI8Reg new_rep expr
1031             = do codefn <- getAnyReg expr
1032                  return (Any new_rep codefn)
1033                 -- HACK: use getAnyReg to get a byte-addressable register.
1034                 -- If the source was a Fixed register, this will add the
1035                 -- mov instruction to put it into the desired destination.
1036                 -- We're assuming that the destination won't be a fixed
1037                 -- non-byte-addressable register; it won't be, because all
1038                 -- fixed registers are word-sized.
1039
1040         toI16Reg = toI8Reg -- for now
1041
1042         conversionNop new_rep expr
1043             = do e_code <- getRegister expr
1044                  return (swizzleRegisterRep e_code new_rep)
1045
1046
1047 getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
1048   = case mop of
1049       MO_Eq F32   -> condFltReg EQQ x y
1050       MO_Ne F32   -> condFltReg NE x y
1051       MO_S_Gt F32 -> condFltReg GTT x y
1052       MO_S_Ge F32 -> condFltReg GE x y
1053       MO_S_Lt F32 -> condFltReg LTT x y
1054       MO_S_Le F32 -> condFltReg LE x y
1055
1056       MO_Eq F64   -> condFltReg EQQ x y
1057       MO_Ne F64   -> condFltReg NE x y
1058       MO_S_Gt F64 -> condFltReg GTT x y
1059       MO_S_Ge F64 -> condFltReg GE x y
1060       MO_S_Lt F64 -> condFltReg LTT x y
1061       MO_S_Le F64 -> condFltReg LE x y
1062
1063       MO_Eq rep   -> condIntReg EQQ x y
1064       MO_Ne rep   -> condIntReg NE x y
1065
1066       MO_S_Gt rep -> condIntReg GTT x y
1067       MO_S_Ge rep -> condIntReg GE x y
1068       MO_S_Lt rep -> condIntReg LTT x y
1069       MO_S_Le rep -> condIntReg LE x y
1070
1071       MO_U_Gt rep -> condIntReg GU  x y
1072       MO_U_Ge rep -> condIntReg GEU x y
1073       MO_U_Lt rep -> condIntReg LU  x y
1074       MO_U_Le rep -> condIntReg LEU x y
1075
1076 #if i386_TARGET_ARCH
1077       MO_Add F32 -> trivialFCode F32 GADD x y
1078       MO_Sub F32 -> trivialFCode F32 GSUB x y
1079
1080       MO_Add F64 -> trivialFCode F64 GADD x y
1081       MO_Sub F64 -> trivialFCode F64 GSUB x y
1082
1083       MO_S_Quot F32 -> trivialFCode F32 GDIV x y
1084       MO_S_Quot F64 -> trivialFCode F64 GDIV x y
1085 #endif
1086
1087 #if x86_64_TARGET_ARCH
1088       MO_Add F32 -> trivialFCode F32 ADD x y
1089       MO_Sub F32 -> trivialFCode F32 SUB x y
1090
1091       MO_Add F64 -> trivialFCode F64 ADD x y
1092       MO_Sub F64 -> trivialFCode F64 SUB x y
1093
1094       MO_S_Quot F32 -> trivialFCode F32 FDIV x y
1095       MO_S_Quot F64 -> trivialFCode F64 FDIV x y
1096 #endif
1097
1098       MO_Add rep -> add_code rep x y
1099       MO_Sub rep -> sub_code rep x y
1100
1101       MO_S_Quot rep -> div_code rep True  True  x y
1102       MO_S_Rem  rep -> div_code rep True  False x y
1103       MO_U_Quot rep -> div_code rep False True  x y
1104       MO_U_Rem  rep -> div_code rep False False x y
1105
1106 #if i386_TARGET_ARCH
1107       MO_Mul   F32 -> trivialFCode F32 GMUL x y
1108       MO_Mul   F64 -> trivialFCode F64 GMUL x y
1109 #endif
1110
1111 #if x86_64_TARGET_ARCH
1112       MO_Mul   F32 -> trivialFCode F32 MUL x y
1113       MO_Mul   F64 -> trivialFCode F64 MUL x y
1114 #endif
1115
1116       MO_Mul   rep -> let op = IMUL rep in
1117                       trivialCode rep op (Just op) x y
1118
1119       MO_S_MulMayOflo rep -> imulMayOflo rep x y
1120
1121       MO_And rep -> let op = AND rep in
1122                     trivialCode rep op (Just op) x y
1123       MO_Or  rep -> let op = OR  rep in
1124                     trivialCode rep op (Just op) x y
1125       MO_Xor rep -> let op = XOR rep in
1126                     trivialCode rep op (Just op) x y
1127
1128         {- Shift ops on x86s have constraints on their source, it
1129            either has to be Imm, CL or 1
1130             => trivialCode is not restrictive enough (sigh.)
1131         -}
1132       MO_Shl rep   -> shift_code rep (SHL rep) x y {-False-}
1133       MO_U_Shr rep -> shift_code rep (SHR rep) x y {-False-}
1134       MO_S_Shr rep -> shift_code rep (SAR rep) x y {-False-}
1135
1136       other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
1137   where
1138     --------------------
1139     imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1140     imulMayOflo rep a b = do
1141          (a_reg, a_code) <- getNonClobberedReg a
1142          b_code <- getAnyReg b
1143          let
1144              shift_amt  = case rep of
1145                            I32 -> 31
1146                            I64 -> 63
1147                            _ -> panic "shift_amt"
1148
1149              code = a_code `appOL` b_code eax `appOL`
1150                         toOL [
1151                            IMUL2 rep (OpReg a_reg),   -- result in %edx:%eax
1152                            SAR rep (OpImm (ImmInt shift_amt)) (OpReg eax),
1153                                 -- sign extend lower part
1154                            SUB rep (OpReg edx) (OpReg eax)
1155                                 -- compare against upper
1156                            -- eax==0 if high part == sign extended low part
1157                         ]
1158          -- in
1159          return (Fixed rep eax code)
1160
1161     --------------------
1162     shift_code :: MachRep
1163                -> (Operand -> Operand -> Instr)
1164                -> CmmExpr
1165                -> CmmExpr
1166                -> NatM Register
1167
1168     {- Case1: shift length as immediate -}
1169     shift_code rep instr x y@(CmmLit lit) = do
1170           x_code <- getAnyReg x
1171           let
1172                code dst
1173                   = x_code dst `snocOL`
1174                     instr (OpImm (litToImm lit)) (OpReg dst)
1175           -- in
1176           return (Any rep code)
1177
1178     {- Case2: shift length is complex (non-immediate)
1179       * y must go in %ecx.
1180       * we cannot do y first *and* put its result in %ecx, because
1181         %ecx might be clobbered by x.
1182       * if we do y second, then x cannot be
1183         in a clobbered reg.  Also, we cannot clobber x's reg
1184         with the instruction itself.
1185       * so we can either:
1186         - do y first, put its result in a fresh tmp, then copy it to %ecx later
1187         - do y second and put its result into %ecx.  x gets placed in a fresh
1188           tmp.  This is likely to be better, becuase the reg alloc can
1189           eliminate this reg->reg move here (it won't eliminate the other one,
1190           because the move is into the fixed %ecx).
1191     -}
1192     shift_code rep instr x y{-amount-} = do
1193         x_code <- getAnyReg x
1194         tmp <- getNewRegNat rep
1195         y_code <- getAnyReg y
1196         let
1197            code = x_code tmp `appOL`
1198                   y_code ecx `snocOL`
1199                   instr (OpReg ecx) (OpReg tmp)
1200         -- in
1201         return (Fixed rep tmp code)
1202
1203     --------------------
1204     add_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1205     add_code rep x (CmmLit (CmmInt y _))
1206         | not (is64BitInteger y) = add_int rep x y
1207     add_code rep x y = trivialCode rep (ADD rep) (Just (ADD rep)) x y
1208
1209     --------------------
1210     sub_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1211     sub_code rep x (CmmLit (CmmInt y _))
1212         | not (is64BitInteger (-y)) = add_int rep x (-y)
1213     sub_code rep x y = trivialCode rep (SUB rep) Nothing x y
1214
1215     -- our three-operand add instruction:
1216     add_int rep x y = do
1217         (x_reg, x_code) <- getSomeReg x
1218         let
1219             imm = ImmInt (fromInteger y)
1220             code dst
1221                = x_code `snocOL`
1222                  LEA rep
1223                         (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
1224                         (OpReg dst)
1225         --
1226         return (Any rep code)
1227
1228     ----------------------
1229     div_code rep signed quotient x y = do
1230            (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
1231            x_code <- getAnyReg x
1232            let
1233              widen | signed    = CLTD rep
1234                    | otherwise = XOR rep (OpReg edx) (OpReg edx)
1235
1236              instr | signed    = IDIV
1237                    | otherwise = DIV
1238
1239              code = y_code `appOL`
1240                     x_code eax `appOL`
1241                     toOL [widen, instr rep y_op]
1242
1243              result | quotient  = eax
1244                     | otherwise = edx
1245
1246            -- in
1247            return (Fixed rep result code)
1248
1249
1250 getRegister (CmmLoad mem pk)
1251   | isFloatingRep pk
1252   = do
1253     Amode src mem_code <- getAmode mem
1254     let
1255         code dst = mem_code `snocOL`
1256                    IF_ARCH_i386(GLD pk src dst,
1257                                 MOV pk (OpAddr src) (OpReg dst))
1258     --
1259     return (Any pk code)
1260
1261 #if i386_TARGET_ARCH
1262 getRegister (CmmLoad mem pk)
1263   | pk /= I64
1264   = do
1265     code <- intLoadCode (instr pk) mem
1266     return (Any pk code)
1267   where
1268         instr I8  = MOVZxL pk
1269         instr I16 = MOV I16
1270         instr I32 = MOV I32
1271         -- we always zero-extend 8-bit loads, if we
1272         -- can't think of anything better.  This is because
1273         -- we can't guarantee access to an 8-bit variant of every register
1274         -- (esi and edi don't have 8-bit variants), so to make things
1275         -- simpler we do our 8-bit arithmetic with full 32-bit registers.
1276 #endif
1277
1278 #if x86_64_TARGET_ARCH
1279 -- Simpler memory load code on x86_64
1280 getRegister (CmmLoad mem pk)
1281   = do
1282     code <- intLoadCode (MOV pk) mem
1283     return (Any pk code)
1284 #endif
1285
1286 getRegister (CmmLit (CmmInt 0 rep))
1287   = let
1288         -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
1289         adj_rep = case rep of I64 -> I32; _ -> rep
1290         rep1 = IF_ARCH_i386( rep, adj_rep )
1291         code dst
1292            = unitOL (XOR rep1 (OpReg dst) (OpReg dst))
1293     in
1294         return (Any rep code)
1295
1296 #if x86_64_TARGET_ARCH
1297   -- optimisation for loading small literals on x86_64: take advantage
1298   -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
1299   -- instruction forms are shorter.
1300 getRegister (CmmLit lit)
1301   | I64 <- cmmLitRep lit, not (isBigLit lit)
1302   = let
1303         imm = litToImm lit
1304         code dst = unitOL (MOV I32 (OpImm imm) (OpReg dst))
1305     in
1306         return (Any I64 code)
1307   where
1308    isBigLit (CmmInt i I64) = i < 0 || i > 0xffffffff
1309    isBigLit _ = False
1310         -- note1: not the same as is64BitLit, because that checks for
1311         -- signed literals that fit in 32 bits, but we want unsigned
1312         -- literals here.
1313         -- note2: all labels are small, because we're assuming the
1314         -- small memory model (see gcc docs, -mcmodel=small).
1315 #endif
1316
1317 getRegister (CmmLit lit)
1318   = let
1319         rep = cmmLitRep lit
1320         imm = litToImm lit
1321         code dst = unitOL (MOV rep (OpImm imm) (OpReg dst))
1322     in
1323         return (Any rep code)
1324
1325 getRegister other = pprPanic "getRegister(x86)" (ppr other)
1326
1327
1328 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
1329    -> NatM (Reg -> InstrBlock)
1330 intLoadCode instr mem = do
1331   Amode src mem_code <- getAmode mem
1332   return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
1333
1334 -- Compute an expression into *any* register, adding the appropriate
1335 -- move instruction if necessary.
1336 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
1337 getAnyReg expr = do
1338   r <- getRegister expr
1339   anyReg r
1340
1341 anyReg :: Register -> NatM (Reg -> InstrBlock)
1342 anyReg (Any _ code)          = return code
1343 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
1344
1345 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
1346 -- Fixed registers might not be byte-addressable, so we make sure we've
1347 -- got a temporary, inserting an extra reg copy if necessary.
1348 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
1349 #if x86_64_TARGET_ARCH
1350 getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
1351 #else
1352 getByteReg expr = do
1353   r <- getRegister expr
1354   case r of
1355     Any rep code -> do
1356         tmp <- getNewRegNat rep
1357         return (tmp, code tmp)
1358     Fixed rep reg code
1359         | isVirtualReg reg -> return (reg,code)
1360         | otherwise -> do
1361             tmp <- getNewRegNat rep
1362             return (tmp, code `snocOL` reg2reg rep reg tmp)
1363         -- ToDo: could optimise slightly by checking for byte-addressable
1364         -- real registers, but that will happen very rarely if at all.
1365 #endif
1366
1367 -- Another variant: this time we want the result in a register that cannot
1368 -- be modified by code to evaluate an arbitrary expression.
1369 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1370 getNonClobberedReg expr = do
1371   r <- getRegister expr
1372   case r of
1373     Any rep code -> do
1374         tmp <- getNewRegNat rep
1375         return (tmp, code tmp)
1376     Fixed rep reg code
1377         -- only free regs can be clobbered
1378         | RealReg rr <- reg, isFastTrue (freeReg rr) -> do
1379                 tmp <- getNewRegNat rep
1380                 return (tmp, code `snocOL` reg2reg rep reg tmp)
1381         | otherwise ->
1382                 return (reg, code)
1383
1384 reg2reg :: MachRep -> Reg -> Reg -> Instr
1385 reg2reg rep src dst
1386 #if i386_TARGET_ARCH
1387   | isFloatingRep rep = GMOV src dst
1388 #endif
1389   | otherwise         = MOV rep (OpReg src) (OpReg dst)
1390
1391 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1392
1393 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1394
1395 #if sparc_TARGET_ARCH
1396
1397 getRegister (CmmLit (CmmFloat f F32)) = do
1398     lbl <- getNewLabelNat
1399     let code dst = toOL [
1400             LDATA ReadOnlyData
1401                         [CmmDataLabel lbl,
1402                          CmmStaticLit (CmmFloat f F32)],
1403             SETHI (HI (ImmCLbl lbl)) dst,
1404             LD F32 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1405     return (Any F32 code)
1406
1407 getRegister (CmmLit (CmmFloat d F64)) = do
1408     lbl <- getNewLabelNat
1409     let code dst = toOL [
1410             LDATA ReadOnlyData
1411                         [CmmDataLabel lbl,
1412                          CmmStaticLit (CmmFloat d F64)],
1413             SETHI (HI (ImmCLbl lbl)) dst,
1414             LD F64 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1415     return (Any F64 code)
1416
1417 getRegister (CmmMachOp mop [x]) -- unary MachOps
1418   = case mop of
1419       MO_S_Neg F32     -> trivialUFCode F32 (FNEG F32) x
1420       MO_S_Neg F64     -> trivialUFCode F64 (FNEG F64) x
1421
1422       MO_S_Neg rep     -> trivialUCode rep (SUB False False g0) x
1423       MO_Not rep       -> trivialUCode rep (XNOR False g0) x
1424
1425       MO_U_Conv I32 I8 -> trivialCode I8 (AND False) x (CmmLit (CmmInt 255 I8))
1426
1427       MO_U_Conv F64 F32-> coerceDbl2Flt x
1428       MO_U_Conv F32 F64-> coerceFlt2Dbl x
1429
1430       MO_S_Conv F32 I32-> coerceFP2Int F32 I32 x
1431       MO_S_Conv I32 F32-> coerceInt2FP I32 F32 x
1432       MO_S_Conv F64 I32-> coerceFP2Int F64 I32 x
1433       MO_S_Conv I32 F64-> coerceInt2FP I32 F64 x
1434
1435       -- Conversions which are a nop on sparc
1436       MO_U_Conv from to
1437         | from == to   -> conversionNop to   x
1438       MO_U_Conv I32 to -> conversionNop to   x
1439       MO_S_Conv I32 to -> conversionNop to   x
1440
1441       -- widenings
1442       MO_U_Conv I8 I32  -> integerExtend False I8 I32  x
1443       MO_U_Conv I16 I32 -> integerExtend False I16 I32 x
1444       MO_U_Conv I8 I16  -> integerExtend False I8 I16  x
1445       MO_S_Conv I16 I32 -> integerExtend True I16 I32  x
1446
1447       other_op -> panic "Unknown unary mach op"
1448     where
1449         -- XXX SLL/SRL?
1450         integerExtend signed from to expr = do
1451            (reg, e_code) <- getSomeReg expr
1452            let
1453                code dst =
1454                    e_code `snocOL`
1455                    ((if signed then SRA else SRL)
1456                           reg (RIImm (ImmInt 0)) dst)
1457            return (Any to code)
1458         conversionNop new_rep expr
1459             = do e_code <- getRegister expr
1460                  return (swizzleRegisterRep e_code new_rep)
1461
1462 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1463   = case mop of
1464       MO_Eq F32 -> condFltReg EQQ x y
1465       MO_Ne F32 -> condFltReg NE x y
1466
1467       MO_S_Gt F32 -> condFltReg GTT x y
1468       MO_S_Ge F32 -> condFltReg GE x y
1469       MO_S_Lt F32 -> condFltReg LTT x y
1470       MO_S_Le F32 -> condFltReg LE x y
1471
1472       MO_Eq F64 -> condFltReg EQQ x y
1473       MO_Ne F64 -> condFltReg NE x y
1474
1475       MO_S_Gt F64 -> condFltReg GTT x y
1476       MO_S_Ge F64 -> condFltReg GE x y
1477       MO_S_Lt F64 -> condFltReg LTT x y
1478       MO_S_Le F64 -> condFltReg LE x y
1479
1480       MO_Eq rep -> condIntReg EQQ x y
1481       MO_Ne rep -> condIntReg NE x y
1482
1483       MO_S_Gt rep -> condIntReg GTT x y
1484       MO_S_Ge rep -> condIntReg GE x y
1485       MO_S_Lt rep -> condIntReg LTT x y
1486       MO_S_Le rep -> condIntReg LE x y
1487
1488       MO_U_Gt I32  -> condIntReg GTT x y
1489       MO_U_Ge I32  -> condIntReg GE x y
1490       MO_U_Lt I32  -> condIntReg LTT x y
1491       MO_U_Le I32  -> condIntReg LE x y
1492
1493       MO_U_Gt I16 -> condIntReg GU  x y
1494       MO_U_Ge I16 -> condIntReg GEU x y
1495       MO_U_Lt I16 -> condIntReg LU  x y
1496       MO_U_Le I16 -> condIntReg LEU x y
1497
1498       MO_Add I32 -> trivialCode I32 (ADD False False) x y
1499       MO_Sub I32 -> trivialCode I32 (SUB False False) x y
1500
1501       MO_S_MulMayOflo rep -> imulMayOflo rep x y
1502 {-
1503       -- ToDo: teach about V8+ SPARC div instructions
1504       MO_S_Quot I32 -> idiv (fsLit ".div")  x y
1505       MO_S_Rem I32  -> idiv (fsLit ".rem")  x y
1506       MO_U_Quot I32 -> idiv (fsLit ".udiv")  x y
1507       MO_U_Rem I32  -> idiv (fsLit ".urem")  x y
1508 -}
1509       MO_Add F32  -> trivialFCode F32 FADD  x y
1510       MO_Sub F32   -> trivialFCode F32  FSUB x y
1511       MO_Mul F32   -> trivialFCode F32  FMUL  x y
1512       MO_S_Quot F32   -> trivialFCode F32  FDIV x y
1513
1514       MO_Add F64   -> trivialFCode F64 FADD  x y
1515       MO_Sub F64   -> trivialFCode F64  FSUB x y
1516       MO_Mul F64   -> trivialFCode F64  FMUL x y
1517       MO_S_Quot F64   -> trivialFCode F64  FDIV x y
1518
1519       MO_And rep   -> trivialCode rep (AND False) x y
1520       MO_Or rep    -> trivialCode rep (OR  False) x y
1521       MO_Xor rep   -> trivialCode rep (XOR False) x y
1522
1523       MO_Mul rep -> trivialCode rep (SMUL False) x y
1524
1525       MO_Shl rep   -> trivialCode rep SLL  x y
1526       MO_U_Shr rep   -> trivialCode rep SRL x y
1527       MO_S_Shr rep   -> trivialCode rep SRA x y
1528
1529 {-
1530       MO_F32_Pwr  -> getRegister (StCall (Left (fsLit "pow")) CCallConv F64
1531                                          [promote x, promote y])
1532                        where promote x = CmmMachOp MO_F32_to_Dbl [x]
1533       MO_F64_Pwr -> getRegister (StCall (Left (fsLit "pow")) CCallConv F64
1534                                         [x, y])
1535 -}
1536       other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
1537   where
1538     --idiv fn x y = getRegister (StCall (Left fn) CCallConv I32 [x, y])
1539
1540     --------------------
1541     imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1542     imulMayOflo rep a b = do
1543          (a_reg, a_code) <- getSomeReg a
1544          (b_reg, b_code) <- getSomeReg b
1545          res_lo <- getNewRegNat I32
1546          res_hi <- getNewRegNat I32
1547          let
1548             shift_amt  = case rep of
1549                           I32 -> 31
1550                           I64 -> 63
1551                           _ -> panic "shift_amt"
1552             code dst = a_code `appOL` b_code `appOL`
1553                        toOL [
1554                            SMUL False a_reg (RIReg b_reg) res_lo,
1555                            RDY res_hi,
1556                            SRA res_lo (RIImm (ImmInt shift_amt)) res_lo,
1557                            SUB False False res_lo (RIReg res_hi) dst
1558                         ]
1559          return (Any I32 code)
1560
1561 getRegister (CmmLoad mem pk) = do
1562     Amode src code <- getAmode mem
1563     let
1564         code__2 dst = code `snocOL` LD pk src dst
1565     return (Any pk code__2)
1566
1567 getRegister (CmmLit (CmmInt i _))
1568   | fits13Bits i
1569   = let
1570         src = ImmInt (fromInteger i)
1571         code dst = unitOL (OR False g0 (RIImm src) dst)
1572     in
1573         return (Any I32 code)
1574
1575 getRegister (CmmLit lit)
1576   = let rep = cmmLitRep lit
1577         imm = litToImm lit
1578         code dst = toOL [
1579             SETHI (HI imm) dst,
1580             OR False dst (RIImm (LO imm)) dst]
1581     in return (Any I32 code)
1582
1583 #endif /* sparc_TARGET_ARCH */
1584
1585 #if powerpc_TARGET_ARCH
1586 getRegister (CmmLoad mem pk)
1587   | pk /= I64
1588   = do
1589         Amode addr addr_code <- getAmode mem
1590         let code dst = ASSERT((regClass dst == RcDouble) == isFloatingRep pk)
1591                        addr_code `snocOL` LD pk dst addr
1592         return (Any pk code)
1593
1594 -- catch simple cases of zero- or sign-extended load
1595 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad mem _]) = do
1596     Amode addr addr_code <- getAmode mem
1597     return (Any I32 (\dst -> addr_code `snocOL` LD I8 dst addr))
1598
1599 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
1600
1601 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad mem _]) = do
1602     Amode addr addr_code <- getAmode mem
1603     return (Any I32 (\dst -> addr_code `snocOL` LD I16 dst addr))
1604
1605 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad mem _]) = do
1606     Amode addr addr_code <- getAmode mem
1607     return (Any I32 (\dst -> addr_code `snocOL` LA I16 dst addr))
1608
1609 getRegister (CmmMachOp mop [x]) -- unary MachOps
1610   = case mop of
1611       MO_Not rep   -> trivialUCode rep NOT x
1612
1613       MO_S_Conv F64 F32 -> trivialUCode F32 FRSP x
1614       MO_S_Conv F32 F64 -> conversionNop F64 x
1615
1616       MO_S_Conv from to
1617         | from == to         -> conversionNop to x
1618         | isFloatingRep from -> coerceFP2Int from to x
1619         | isFloatingRep to   -> coerceInt2FP from to x
1620
1621         -- narrowing is a nop: we treat the high bits as undefined
1622       MO_S_Conv I32 to -> conversionNop to x
1623       MO_S_Conv I16 I8 -> conversionNop I8 x
1624       MO_S_Conv I8 to -> trivialUCode to (EXTS I8) x
1625       MO_S_Conv I16 to -> trivialUCode to (EXTS I16) x
1626
1627       MO_U_Conv from to
1628         | from == to -> conversionNop to x
1629         -- narrowing is a nop: we treat the high bits as undefined
1630       MO_U_Conv I32 to -> conversionNop to x
1631       MO_U_Conv I16 I8 -> conversionNop I8 x
1632       MO_U_Conv I8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 I32))
1633       MO_U_Conv I16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 I32))
1634
1635       MO_S_Neg F32      -> trivialUCode F32 FNEG x
1636       MO_S_Neg F64      -> trivialUCode F64 FNEG x
1637       MO_S_Neg rep      -> trivialUCode rep NEG x
1638
1639     where
1640         conversionNop new_rep expr
1641             = do e_code <- getRegister expr
1642                  return (swizzleRegisterRep e_code new_rep)
1643
1644 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1645   = case mop of
1646       MO_Eq F32 -> condFltReg EQQ x y
1647       MO_Ne F32 -> condFltReg NE  x y
1648
1649       MO_S_Gt F32 -> condFltReg GTT x y
1650       MO_S_Ge F32 -> condFltReg GE  x y
1651       MO_S_Lt F32 -> condFltReg LTT x y
1652       MO_S_Le F32 -> condFltReg LE  x y
1653
1654       MO_Eq F64 -> condFltReg EQQ x y
1655       MO_Ne F64 -> condFltReg NE  x y
1656
1657       MO_S_Gt F64 -> condFltReg GTT x y
1658       MO_S_Ge F64 -> condFltReg GE  x y
1659       MO_S_Lt F64 -> condFltReg LTT x y
1660       MO_S_Le F64 -> condFltReg LE  x y
1661
1662       MO_Eq rep -> condIntReg EQQ  (extendUExpr rep x) (extendUExpr rep y)
1663       MO_Ne rep -> condIntReg NE   (extendUExpr rep x) (extendUExpr rep y)
1664
1665       MO_S_Gt rep -> condIntReg GTT  (extendSExpr rep x) (extendSExpr rep y)
1666       MO_S_Ge rep -> condIntReg GE   (extendSExpr rep x) (extendSExpr rep y)
1667       MO_S_Lt rep -> condIntReg LTT  (extendSExpr rep x) (extendSExpr rep y)
1668       MO_S_Le rep -> condIntReg LE   (extendSExpr rep x) (extendSExpr rep y)
1669
1670       MO_U_Gt rep -> condIntReg GU   (extendUExpr rep x) (extendUExpr rep y)
1671       MO_U_Ge rep -> condIntReg GEU  (extendUExpr rep x) (extendUExpr rep y)
1672       MO_U_Lt rep -> condIntReg LU   (extendUExpr rep x) (extendUExpr rep y)
1673       MO_U_Le rep -> condIntReg LEU  (extendUExpr rep x) (extendUExpr rep y)
1674
1675       MO_Add F32   -> trivialCodeNoImm F32 (FADD F32) x y
1676       MO_Sub F32   -> trivialCodeNoImm F32 (FSUB F32) x y
1677       MO_Mul F32   -> trivialCodeNoImm F32 (FMUL F32) x y
1678       MO_S_Quot F32   -> trivialCodeNoImm F32 (FDIV F32) x y
1679
1680       MO_Add F64   -> trivialCodeNoImm F64 (FADD F64) x y
1681       MO_Sub F64   -> trivialCodeNoImm F64 (FSUB F64) x y
1682       MO_Mul F64   -> trivialCodeNoImm F64 (FMUL F64) x y
1683       MO_S_Quot F64   -> trivialCodeNoImm F64 (FDIV F64) x y
1684
1685          -- optimize addition with 32-bit immediate
1686          -- (needed for PIC)
1687       MO_Add I32 ->
1688         case y of
1689           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate I32 True (-imm)
1690             -> trivialCode I32 True ADD x (CmmLit $ CmmInt imm immrep)
1691           CmmLit lit
1692             -> do
1693                 (src, srcCode) <- getSomeReg x
1694                 let imm = litToImm lit
1695                     code dst = srcCode `appOL` toOL [
1696                                     ADDIS dst src (HA imm),
1697                                     ADD dst dst (RIImm (LO imm))
1698                                 ]
1699                 return (Any I32 code)
1700           _ -> trivialCode I32 True ADD x y
1701
1702       MO_Add rep -> trivialCode rep True ADD x y
1703       MO_Sub rep ->
1704         case y of    -- subfi ('substract from' with immediate) doesn't exist
1705           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
1706             -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
1707           _ -> trivialCodeNoImm rep SUBF y x
1708
1709       MO_Mul rep -> trivialCode rep True MULLW x y
1710
1711       MO_S_MulMayOflo I32 -> trivialCodeNoImm I32 MULLW_MayOflo x y
1712
1713       MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= I32): not implemented"
1714       MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
1715
1716       MO_S_Quot rep -> trivialCodeNoImm rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1717       MO_U_Quot rep -> trivialCodeNoImm rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1718
1719       MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1720       MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1721
1722       MO_And rep   -> trivialCode rep False AND x y
1723       MO_Or rep    -> trivialCode rep False OR x y
1724       MO_Xor rep   -> trivialCode rep False XOR x y
1725
1726       MO_Shl rep   -> trivialCode rep False SLW x y
1727       MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
1728       MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
1729
1730 getRegister (CmmLit (CmmInt i rep))
1731   | Just imm <- makeImmediate rep True i
1732   = let
1733         code dst = unitOL (LI dst imm)
1734     in
1735         return (Any rep code)
1736
1737 getRegister (CmmLit (CmmFloat f frep)) = do
1738     lbl <- getNewLabelNat
1739     dflags <- getDynFlagsNat
1740     dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
1741     Amode addr addr_code <- getAmode dynRef
1742     let code dst =
1743             LDATA ReadOnlyData  [CmmDataLabel lbl,
1744                                  CmmStaticLit (CmmFloat f frep)]
1745             `consOL` (addr_code `snocOL` LD frep dst addr)
1746     return (Any frep code)
1747
1748 getRegister (CmmLit lit)
1749   = let rep = cmmLitRep lit
1750         imm = litToImm lit
1751         code dst = toOL [
1752               LIS dst (HA imm),
1753               ADD dst dst (RIImm (LO imm))
1754           ]
1755     in return (Any rep code)
1756
1757 getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
1758
1759     -- extend?Rep: wrap integer expression of type rep
1760     -- in a conversion to I32
1761 extendSExpr I32 x = x
1762 extendSExpr rep x = CmmMachOp (MO_S_Conv rep I32) [x]
1763 extendUExpr I32 x = x
1764 extendUExpr rep x = CmmMachOp (MO_U_Conv rep I32) [x]
1765
1766 #endif /* powerpc_TARGET_ARCH */
1767
1768
1769 -- -----------------------------------------------------------------------------
1770 --  The 'Amode' type: Memory addressing modes passed up the tree.
1771
1772 data Amode = Amode AddrMode InstrBlock
1773
1774 {-
1775 Now, given a tree (the argument to an CmmLoad) that references memory,
1776 produce a suitable addressing mode.
1777
1778 A Rule of the Game (tm) for Amodes: use of the addr bit must
1779 immediately follow use of the code part, since the code part puts
1780 values in registers which the addr then refers to.  So you can't put
1781 anything in between, lest it overwrite some of those registers.  If
1782 you need to do some other computation between the code part and use of
1783 the addr bit, first store the effective address from the amode in a
1784 temporary, then do the other computation, and then use the temporary:
1785
1786     code
1787     LEA amode, tmp
1788     ... other computation ...
1789     ... (tmp) ...
1790 -}
1791
1792 getAmode :: CmmExpr -> NatM Amode
1793 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
1794
1795 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1796
1797 #if alpha_TARGET_ARCH
1798
1799 getAmode (StPrim IntSubOp [x, StInt i])
1800   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1801     getRegister x               `thenNat` \ register ->
1802     let
1803         code = registerCode register tmp
1804         reg  = registerName register tmp
1805         off  = ImmInt (-(fromInteger i))
1806     in
1807     return (Amode (AddrRegImm reg off) code)
1808
1809 getAmode (StPrim IntAddOp [x, StInt i])
1810   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1811     getRegister x               `thenNat` \ register ->
1812     let
1813         code = registerCode register tmp
1814         reg  = registerName register tmp
1815         off  = ImmInt (fromInteger i)
1816     in
1817     return (Amode (AddrRegImm reg off) code)
1818
1819 getAmode leaf
1820   | isJust imm
1821   = return (Amode (AddrImm imm__2) id)
1822   where
1823     imm = maybeImm leaf
1824     imm__2 = case imm of Just x -> x
1825
1826 getAmode other
1827   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1828     getRegister other           `thenNat` \ register ->
1829     let
1830         code = registerCode register tmp
1831         reg  = registerName register tmp
1832     in
1833     return (Amode (AddrReg reg) code)
1834
1835 #endif /* alpha_TARGET_ARCH */
1836
1837 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1838
1839 #if x86_64_TARGET_ARCH
1840
1841 getAmode (CmmMachOp (MO_Add I64) [CmmReg (CmmGlobal PicBaseReg),
1842                                      CmmLit displacement])
1843     = return $ Amode (ripRel (litToImm displacement)) nilOL
1844
1845 #endif
1846
1847 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1848
1849 -- This is all just ridiculous, since it carefully undoes
1850 -- what mangleIndexTree has just done.
1851 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
1852   | not (is64BitLit lit)
1853   -- ASSERT(rep == I32)???
1854   = do (x_reg, x_code) <- getSomeReg x
1855        let off = ImmInt (-(fromInteger i))
1856        return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1857
1858 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
1859   | not (is64BitLit lit)
1860   -- ASSERT(rep == I32)???
1861   = do (x_reg, x_code) <- getSomeReg x
1862        let off = ImmInt (fromInteger i)
1863        return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1864
1865 -- Turn (lit1 << n  + lit2) into  (lit2 + lit1 << n) so it will be
1866 -- recognised by the next rule.
1867 getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1868                                   b@(CmmLit _)])
1869   = getAmode (CmmMachOp (MO_Add rep) [b,a])
1870
1871 getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
1872                                         [y, CmmLit (CmmInt shift _)]])
1873   | shift == 0 || shift == 1 || shift == 2 || shift == 3
1874   = x86_complex_amode x y shift 0
1875
1876 getAmode (CmmMachOp (MO_Add rep)
1877                 [x, CmmMachOp (MO_Add _)
1878                         [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)],
1879                          CmmLit (CmmInt offset _)]])
1880   | shift == 0 || shift == 1 || shift == 2 || shift == 3
1881   && not (is64BitInteger offset)
1882   = x86_complex_amode x y shift offset
1883
1884 getAmode (CmmMachOp (MO_Add rep) [x,y])
1885   = x86_complex_amode x y 0 0
1886
1887 getAmode (CmmLit lit) | not (is64BitLit lit)
1888   = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1889
1890 getAmode expr = do
1891   (reg,code) <- getSomeReg expr
1892   return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
1893
1894
1895 x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
1896 x86_complex_amode base index shift offset
1897   = do (x_reg, x_code) <- getNonClobberedReg base
1898         -- x must be in a temp, because it has to stay live over y_code
1899         -- we could compre x_reg and y_reg and do something better here...
1900        (y_reg, y_code) <- getSomeReg index
1901        let
1902            code = x_code `appOL` y_code
1903            base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
1904        return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset)))
1905                code)
1906
1907 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1908
1909 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1910
1911 #if sparc_TARGET_ARCH
1912
1913 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)])
1914   | fits13Bits (-i)
1915   = do
1916        (reg, code) <- getSomeReg x
1917        let
1918          off  = ImmInt (-(fromInteger i))
1919        return (Amode (AddrRegImm reg off) code)
1920
1921
1922 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)])
1923   | fits13Bits i
1924   = do
1925        (reg, code) <- getSomeReg x
1926        let
1927          off  = ImmInt (fromInteger i)
1928        return (Amode (AddrRegImm reg off) code)
1929
1930 getAmode (CmmMachOp (MO_Add rep) [x, y])
1931   = do
1932     (regX, codeX) <- getSomeReg x
1933     (regY, codeY) <- getSomeReg y
1934     let
1935         code = codeX `appOL` codeY
1936     return (Amode (AddrRegReg regX regY) code)
1937
1938 -- XXX Is this same as "leaf" in Stix?
1939 getAmode (CmmLit lit)
1940   = do
1941       tmp <- getNewRegNat I32
1942       let
1943         code = unitOL (SETHI (HI imm__2) tmp)
1944       return (Amode (AddrRegImm tmp (LO imm__2)) code)
1945       where
1946          imm__2 = litToImm lit
1947
1948 getAmode other
1949   = do
1950        (reg, code) <- getSomeReg other
1951        let
1952             off  = ImmInt 0
1953        return (Amode (AddrRegImm reg off) code)
1954
1955 #endif /* sparc_TARGET_ARCH */
1956
1957 #ifdef powerpc_TARGET_ARCH
1958 getAmode (CmmMachOp (MO_Sub I32) [x, CmmLit (CmmInt i _)])
1959   | Just off <- makeImmediate I32 True (-i)
1960   = do
1961         (reg, code) <- getSomeReg x
1962         return (Amode (AddrRegImm reg off) code)
1963
1964
1965 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit (CmmInt i _)])
1966   | Just off <- makeImmediate I32 True i
1967   = do
1968         (reg, code) <- getSomeReg x
1969         return (Amode (AddrRegImm reg off) code)
1970
1971    -- optimize addition with 32-bit immediate
1972    -- (needed for PIC)
1973 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit lit])
1974   = do
1975         tmp <- getNewRegNat I32
1976         (src, srcCode) <- getSomeReg x
1977         let imm = litToImm lit
1978             code = srcCode `snocOL` ADDIS tmp src (HA imm)
1979         return (Amode (AddrRegImm tmp (LO imm)) code)
1980
1981 getAmode (CmmLit lit)
1982   = do
1983         tmp <- getNewRegNat I32
1984         let imm = litToImm lit
1985             code = unitOL (LIS tmp (HA imm))
1986         return (Amode (AddrRegImm tmp (LO imm)) code)
1987
1988 getAmode (CmmMachOp (MO_Add I32) [x, y])
1989   = do
1990         (regX, codeX) <- getSomeReg x
1991         (regY, codeY) <- getSomeReg y
1992         return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
1993
1994 getAmode other
1995   = do
1996         (reg, code) <- getSomeReg other
1997         let
1998             off  = ImmInt 0
1999         return (Amode (AddrRegImm reg off) code)
2000 #endif /* powerpc_TARGET_ARCH */
2001
2002 -- -----------------------------------------------------------------------------
2003 -- getOperand: sometimes any operand will do.
2004
2005 -- getNonClobberedOperand: the value of the operand will remain valid across
2006 -- the computation of an arbitrary expression, unless the expression
2007 -- is computed directly into a register which the operand refers to
2008 -- (see trivialCode where this function is used for an example).
2009
2010 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2011
2012 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2013 #if x86_64_TARGET_ARCH
2014 getNonClobberedOperand (CmmLit lit)
2015   | isSuitableFloatingPointLit lit = do
2016     lbl <- getNewLabelNat
2017     let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
2018                                            CmmStaticLit lit])
2019     return (OpAddr (ripRel (ImmCLbl lbl)), code)
2020 #endif
2021 getNonClobberedOperand (CmmLit lit)
2022   | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) =
2023     return (OpImm (litToImm lit), nilOL)
2024 getNonClobberedOperand (CmmLoad mem pk)
2025   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2026     Amode src mem_code <- getAmode mem
2027     (src',save_code) <-
2028         if (amodeCouldBeClobbered src)
2029                 then do
2030                    tmp <- getNewRegNat wordRep
2031                    return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
2032                            unitOL (LEA I32 (OpAddr src) (OpReg tmp)))
2033                 else
2034                    return (src, nilOL)
2035     return (OpAddr src', save_code `appOL` mem_code)
2036 getNonClobberedOperand e = do
2037     (reg, code) <- getNonClobberedReg e
2038     return (OpReg reg, code)
2039
2040 amodeCouldBeClobbered :: AddrMode -> Bool
2041 amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
2042
2043 regClobbered (RealReg rr) = isFastTrue (freeReg rr)
2044 regClobbered _ = False
2045
2046 -- getOperand: the operand is not required to remain valid across the
2047 -- computation of an arbitrary expression.
2048 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2049 #if x86_64_TARGET_ARCH
2050 getOperand (CmmLit lit)
2051   | isSuitableFloatingPointLit lit = do
2052     lbl <- getNewLabelNat
2053     let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
2054                                            CmmStaticLit lit])
2055     return (OpAddr (ripRel (ImmCLbl lbl)), code)
2056 #endif
2057 getOperand (CmmLit lit)
2058   | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) = do
2059     return (OpImm (litToImm lit), nilOL)
2060 getOperand (CmmLoad mem pk)
2061   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2062     Amode src mem_code <- getAmode mem
2063     return (OpAddr src, mem_code)
2064 getOperand e = do
2065     (reg, code) <- getSomeReg e
2066     return (OpReg reg, code)
2067
2068 isOperand :: CmmExpr -> Bool
2069 isOperand (CmmLoad _ _) = True
2070 isOperand (CmmLit lit)  = not (is64BitLit lit)
2071                           || isSuitableFloatingPointLit lit
2072 isOperand _             = False
2073
2074 -- if we want a floating-point literal as an operand, we can
2075 -- use it directly from memory.  However, if the literal is
2076 -- zero, we're better off generating it into a register using
2077 -- xor.
2078 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
2079 isSuitableFloatingPointLit _ = False
2080
2081 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
2082 getRegOrMem (CmmLoad mem pk)
2083   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2084     Amode src mem_code <- getAmode mem
2085     return (OpAddr src, mem_code)
2086 getRegOrMem e = do
2087     (reg, code) <- getNonClobberedReg e
2088     return (OpReg reg, code)
2089
2090 #if x86_64_TARGET_ARCH
2091 is64BitLit (CmmInt i I64) = is64BitInteger i
2092    -- assume that labels are in the range 0-2^31-1: this assumes the
2093    -- small memory model (see gcc docs, -mcmodel=small).
2094 #endif
2095 is64BitLit x = False
2096 #endif
2097
2098 is64BitInteger :: Integer -> Bool
2099 is64BitInteger i = i64 > 0x7fffffff || i64 < -0x80000000
2100   where i64 = fromIntegral i :: Int64
2101   -- a CmmInt is intended to be truncated to the appropriate
2102   -- number of bits, so here we truncate it to Int64.  This is
2103   -- important because e.g. -1 as a CmmInt might be either
2104   -- -1 or 18446744073709551615.
2105
2106 -- -----------------------------------------------------------------------------
2107 --  The 'CondCode' type:  Condition codes passed up the tree.
2108
2109 data CondCode = CondCode Bool Cond InstrBlock
2110
2111 -- Set up a condition code for a conditional branch.
2112
2113 getCondCode :: CmmExpr -> NatM CondCode
2114
2115 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2116
2117 #if alpha_TARGET_ARCH
2118 getCondCode = panic "MachCode.getCondCode: not on Alphas"
2119 #endif /* alpha_TARGET_ARCH */
2120
2121 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2122
2123 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
2124 -- yes, they really do seem to want exactly the same!
2125
2126 getCondCode (CmmMachOp mop [x, y])
2127   =
2128     case mop of
2129       MO_Eq F32 -> condFltCode EQQ x y
2130       MO_Ne F32 -> condFltCode NE  x y
2131
2132       MO_S_Gt F32 -> condFltCode GTT x y
2133       MO_S_Ge F32 -> condFltCode GE  x y
2134       MO_S_Lt F32 -> condFltCode LTT x y
2135       MO_S_Le F32 -> condFltCode LE  x y
2136
2137       MO_Eq F64 -> condFltCode EQQ x y
2138       MO_Ne F64 -> condFltCode NE  x y
2139
2140       MO_S_Gt F64 -> condFltCode GTT x y
2141       MO_S_Ge F64 -> condFltCode GE  x y
2142       MO_S_Lt F64 -> condFltCode LTT x y
2143       MO_S_Le F64 -> condFltCode LE  x y
2144
2145       MO_Eq rep -> condIntCode EQQ  x y
2146       MO_Ne rep -> condIntCode NE   x y
2147
2148       MO_S_Gt rep -> condIntCode GTT  x y
2149       MO_S_Ge rep -> condIntCode GE   x y
2150       MO_S_Lt rep -> condIntCode LTT  x y
2151       MO_S_Le rep -> condIntCode LE   x y
2152
2153       MO_U_Gt rep -> condIntCode GU   x y
2154       MO_U_Ge rep -> condIntCode GEU  x y
2155       MO_U_Lt rep -> condIntCode LU   x y
2156       MO_U_Le rep -> condIntCode LEU  x y
2157
2158       other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y]))
2159
2160 getCondCode other =  pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
2161
2162 #elif powerpc_TARGET_ARCH
2163
2164 -- almost the same as everywhere else - but we need to
2165 -- extend small integers to 32 bit first
2166
2167 getCondCode (CmmMachOp mop [x, y])
2168   = case mop of
2169       MO_Eq F32 -> condFltCode EQQ x y
2170       MO_Ne F32 -> condFltCode NE  x y
2171
2172       MO_S_Gt F32 -> condFltCode GTT x y
2173       MO_S_Ge F32 -> condFltCode GE  x y
2174       MO_S_Lt F32 -> condFltCode LTT x y
2175       MO_S_Le F32 -> condFltCode LE  x y
2176
2177       MO_Eq F64 -> condFltCode EQQ x y
2178       MO_Ne F64 -> condFltCode NE  x y
2179
2180       MO_S_Gt F64 -> condFltCode GTT x y
2181       MO_S_Ge F64 -> condFltCode GE  x y
2182       MO_S_Lt F64 -> condFltCode LTT x y
2183       MO_S_Le F64 -> condFltCode LE  x y
2184
2185       MO_Eq rep -> condIntCode EQQ  (extendUExpr rep x) (extendUExpr rep y)
2186       MO_Ne rep -> condIntCode NE   (extendUExpr rep x) (extendUExpr rep y)
2187
2188       MO_S_Gt rep -> condIntCode GTT  (extendSExpr rep x) (extendSExpr rep y)
2189       MO_S_Ge rep -> condIntCode GE   (extendSExpr rep x) (extendSExpr rep y)
2190       MO_S_Lt rep -> condIntCode LTT  (extendSExpr rep x) (extendSExpr rep y)
2191       MO_S_Le rep -> condIntCode LE   (extendSExpr rep x) (extendSExpr rep y)
2192
2193       MO_U_Gt rep -> condIntCode GU   (extendUExpr rep x) (extendUExpr rep y)
2194       MO_U_Ge rep -> condIntCode GEU  (extendUExpr rep x) (extendUExpr rep y)
2195       MO_U_Lt rep -> condIntCode LU   (extendUExpr rep x) (extendUExpr rep y)
2196       MO_U_Le rep -> condIntCode LEU  (extendUExpr rep x) (extendUExpr rep y)
2197
2198       other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
2199
2200 getCondCode other =  panic "getCondCode(2)(powerpc)"
2201
2202
2203 #endif
2204
2205
2206 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
2207 -- passed back up the tree.
2208
2209 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
2210
2211 #if alpha_TARGET_ARCH
2212 condIntCode = panic "MachCode.condIntCode: not on Alphas"
2213 condFltCode = panic "MachCode.condFltCode: not on Alphas"
2214 #endif /* alpha_TARGET_ARCH */
2215
2216 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2217 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2218
2219 -- memory vs immediate
2220 condIntCode cond (CmmLoad x pk) (CmmLit lit) | not (is64BitLit lit) = do
2221     Amode x_addr x_code <- getAmode x
2222     let
2223         imm  = litToImm lit
2224         code = x_code `snocOL`
2225                   CMP pk (OpImm imm) (OpAddr x_addr)
2226     --
2227     return (CondCode False cond code)
2228
2229 -- anything vs zero, using a mask
2230 -- TODO: Add some sanity checking!!!!
2231 condIntCode cond (CmmMachOp (MO_And rep) [x,o2]) (CmmLit (CmmInt 0 pk))
2232     | (CmmLit (CmmInt mask pk2)) <- o2
2233     = do
2234       (x_reg, x_code) <- getSomeReg x
2235       let
2236          code = x_code `snocOL`
2237                 TEST pk (OpImm (ImmInteger mask)) (OpReg x_reg)
2238       --
2239       return (CondCode False cond code)
2240
2241 -- anything vs zero
2242 condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
2243     (x_reg, x_code) <- getSomeReg x
2244     let
2245         code = x_code `snocOL`
2246                   TEST pk (OpReg x_reg) (OpReg x_reg)
2247     --
2248     return (CondCode False cond code)
2249
2250 -- anything vs operand
2251 condIntCode cond x y | isOperand y = do
2252     (x_reg, x_code) <- getNonClobberedReg x
2253     (y_op,  y_code) <- getOperand y
2254     let
2255         code = x_code `appOL` y_code `snocOL`
2256                   CMP (cmmExprRep x) y_op (OpReg x_reg)
2257     -- in
2258     return (CondCode False cond code)
2259
2260 -- anything vs anything
2261 condIntCode cond x y = do
2262   (y_reg, y_code) <- getNonClobberedReg y
2263   (x_op, x_code) <- getRegOrMem x
2264   let
2265         code = y_code `appOL`
2266                x_code `snocOL`
2267                   CMP (cmmExprRep x) (OpReg y_reg) x_op
2268   -- in
2269   return (CondCode False cond code)
2270 #endif
2271
2272 #if i386_TARGET_ARCH
2273 condFltCode cond x y
2274   = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
2275   (x_reg, x_code) <- getNonClobberedReg x
2276   (y_reg, y_code) <- getSomeReg y
2277   let
2278         code = x_code `appOL` y_code `snocOL`
2279                 GCMP cond x_reg y_reg
2280   -- The GCMP insn does the test and sets the zero flag if comparable
2281   -- and true.  Hence we always supply EQQ as the condition to test.
2282   return (CondCode True EQQ code)
2283 #endif /* i386_TARGET_ARCH */
2284
2285 #if x86_64_TARGET_ARCH
2286 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
2287 -- an operand, but the right must be a reg.  We can probably do better
2288 -- than this general case...
2289 condFltCode cond x y = do
2290   (x_reg, x_code) <- getNonClobberedReg x
2291   (y_op, y_code) <- getOperand y
2292   let
2293         code = x_code `appOL`
2294                y_code `snocOL`
2295                   CMP (cmmExprRep x) y_op (OpReg x_reg)
2296         -- NB(1): we need to use the unsigned comparison operators on the
2297         -- result of this comparison.
2298   -- in
2299   return (CondCode True (condToUnsigned cond) code)
2300 #endif
2301
2302 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2303
2304 #if sparc_TARGET_ARCH
2305
2306 condIntCode cond x (CmmLit (CmmInt y rep))
2307   | fits13Bits y
2308   = do
2309        (src1, code) <- getSomeReg x
2310        let
2311            src2 = ImmInt (fromInteger y)
2312            code' = code `snocOL` SUB False True src1 (RIImm src2) g0
2313        return (CondCode False cond code')
2314
2315 condIntCode cond x y = do
2316     (src1, code1) <- getSomeReg x
2317     (src2, code2) <- getSomeReg y
2318     let
2319         code__2 = code1 `appOL` code2 `snocOL`
2320                   SUB False True src1 (RIReg src2) g0
2321     return (CondCode False cond code__2)
2322
2323 -----------
2324 condFltCode cond x y = do
2325     (src1, code1) <- getSomeReg x
2326     (src2, code2) <- getSomeReg y
2327     tmp <- getNewRegNat F64
2328     let
2329         promote x = FxTOy F32 F64 x tmp
2330
2331         pk1   = cmmExprRep x
2332         pk2   = cmmExprRep y
2333
2334         code__2 =
2335                 if pk1 == pk2 then
2336                     code1 `appOL` code2 `snocOL`
2337                     FCMP True pk1 src1 src2
2338                 else if pk1 == F32 then
2339                     code1 `snocOL` promote src1 `appOL` code2 `snocOL`
2340                     FCMP True F64 tmp src2
2341                 else
2342                     code1 `appOL` code2 `snocOL` promote src2 `snocOL`
2343                     FCMP True F64 src1 tmp
2344     return (CondCode True cond code__2)
2345
2346 #endif /* sparc_TARGET_ARCH */
2347
2348 #if powerpc_TARGET_ARCH
2349 --  ###FIXME: I16 and I8!
2350 condIntCode cond x (CmmLit (CmmInt y rep))
2351   | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
2352   = do
2353         (src1, code) <- getSomeReg x
2354         let
2355             code' = code `snocOL`
2356                 (if condUnsigned cond then CMPL else CMP) I32 src1 (RIImm src2)
2357         return (CondCode False cond code')
2358
2359 condIntCode cond x y = do
2360     (src1, code1) <- getSomeReg x
2361     (src2, code2) <- getSomeReg y
2362     let
2363         code' = code1 `appOL` code2 `snocOL`
2364                   (if condUnsigned cond then CMPL else CMP) I32 src1 (RIReg src2)
2365     return (CondCode False cond code')
2366
2367 condFltCode cond x y = do
2368     (src1, code1) <- getSomeReg x
2369     (src2, code2) <- getSomeReg y
2370     let
2371         code'  = code1 `appOL` code2 `snocOL` FCMP src1 src2
2372         code'' = case cond of -- twiddle CR to handle unordered case
2373                     GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
2374                     LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
2375                     _ -> code'
2376                  where
2377                     ltbit = 0 ; eqbit = 2 ; gtbit = 1
2378     return (CondCode True cond code'')
2379
2380 #endif /* powerpc_TARGET_ARCH */
2381
2382 -- -----------------------------------------------------------------------------
2383 -- Generating assignments
2384
2385 -- Assignments are really at the heart of the whole code generation
2386 -- business.  Almost all top-level nodes of any real importance are
2387 -- assignments, which correspond to loads, stores, or register
2388 -- transfers.  If we're really lucky, some of the register transfers
2389 -- will go away, because we can use the destination register to
2390 -- complete the code generation for the right hand side.  This only
2391 -- fails when the right hand side is forced into a fixed register
2392 -- (e.g. the result of a call).
2393
2394 assignMem_IntCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2395 assignReg_IntCode :: MachRep -> CmmReg  -> CmmExpr -> NatM InstrBlock
2396
2397 assignMem_FltCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2398 assignReg_FltCode :: MachRep -> CmmReg  -> CmmExpr -> NatM InstrBlock
2399
2400 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2401
2402 #if alpha_TARGET_ARCH
2403
2404 assignIntCode pk (CmmLoad dst _) src
2405   = getNewRegNat IntRep             `thenNat` \ tmp ->
2406     getAmode dst                    `thenNat` \ amode ->
2407     getRegister src                 `thenNat` \ register ->
2408     let
2409         code1   = amodeCode amode []
2410         dst__2  = amodeAddr amode
2411         code2   = registerCode register tmp []
2412         src__2  = registerName register tmp
2413         sz      = primRepToSize pk
2414         code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2415     in
2416     return code__2
2417
2418 assignIntCode pk dst src
2419   = getRegister dst                         `thenNat` \ register1 ->
2420     getRegister src                         `thenNat` \ register2 ->
2421     let
2422         dst__2  = registerName register1 zeroh
2423         code    = registerCode register2 dst__2
2424         src__2  = registerName register2 dst__2
2425         code__2 = if isFixed register2
2426                   then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2)
2427                   else code
2428     in
2429     return code__2
2430
2431 #endif /* alpha_TARGET_ARCH */
2432
2433 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2434
2435 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2436
2437 -- integer assignment to memory
2438
2439 -- specific case of adding/subtracting an integer to a particular address.
2440 -- ToDo: catch other cases where we can use an operation directly on a memory
2441 -- address.
2442 assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _,
2443                                                  CmmLit (CmmInt i _)])
2444    | addr == addr2, pk /= I64 || not (is64BitInteger i),
2445      Just instr <- check op
2446    = do Amode amode code_addr <- getAmode addr
2447         let code = code_addr `snocOL`
2448                    instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode)
2449         return code
2450    where
2451         check (MO_Add _) = Just ADD
2452         check (MO_Sub _) = Just SUB
2453         check _ = Nothing
2454         -- ToDo: more?
2455
2456 -- general case
2457 assignMem_IntCode pk addr src = do
2458     Amode addr code_addr <- getAmode addr
2459     (code_src, op_src)   <- get_op_RI src
2460     let
2461         code = code_src `appOL`
2462                code_addr `snocOL`
2463                   MOV pk op_src (OpAddr addr)
2464         -- NOTE: op_src is stable, so it will still be valid
2465         -- after code_addr.  This may involve the introduction
2466         -- of an extra MOV to a temporary register, but we hope
2467         -- the register allocator will get rid of it.
2468     --
2469     return code
2470   where
2471     get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand)   -- code, operator
2472     get_op_RI (CmmLit lit) | not (is64BitLit lit)
2473       = return (nilOL, OpImm (litToImm lit))
2474     get_op_RI op
2475       = do (reg,code) <- getNonClobberedReg op
2476            return (code, OpReg reg)
2477
2478
2479 -- Assign; dst is a reg, rhs is mem
2480 assignReg_IntCode pk reg (CmmLoad src _) = do
2481   load_code <- intLoadCode (MOV pk) src
2482   return (load_code (getRegisterReg reg))
2483
2484 -- dst is a reg, but src could be anything
2485 assignReg_IntCode pk reg src = do
2486   code <- getAnyReg src
2487   return (code (getRegisterReg reg))
2488
2489 #endif /* i386_TARGET_ARCH */
2490
2491 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2492
2493 #if sparc_TARGET_ARCH
2494
2495 assignMem_IntCode pk addr src = do
2496     (srcReg, code) <- getSomeReg src
2497     Amode dstAddr addr_code <- getAmode addr
2498     return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2499
2500 assignReg_IntCode pk reg src = do
2501     r <- getRegister src
2502     return $ case r of
2503         Any _ code         -> code dst
2504         Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg dst) freg
2505     where
2506       dst = getRegisterReg reg
2507
2508
2509 #endif /* sparc_TARGET_ARCH */
2510
2511 #if powerpc_TARGET_ARCH
2512
2513 assignMem_IntCode pk addr src = do
2514     (srcReg, code) <- getSomeReg src
2515     Amode dstAddr addr_code <- getAmode addr
2516     return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2517
2518 -- dst is a reg, but src could be anything
2519 assignReg_IntCode pk reg src
2520     = do
2521         r <- getRegister src
2522         return $ case r of
2523             Any _ code         -> code dst
2524             Fixed _ freg fcode -> fcode `snocOL` MR dst freg
2525     where
2526         dst = getRegisterReg reg
2527
2528 #endif /* powerpc_TARGET_ARCH */
2529
2530
2531 -- -----------------------------------------------------------------------------
2532 -- Floating-point assignments
2533
2534 #if alpha_TARGET_ARCH
2535
2536 assignFltCode pk (CmmLoad dst _) src
2537   = getNewRegNat pk                 `thenNat` \ tmp ->
2538     getAmode dst                    `thenNat` \ amode ->
2539     getRegister src                         `thenNat` \ register ->
2540     let
2541         code1   = amodeCode amode []
2542         dst__2  = amodeAddr amode
2543         code2   = registerCode register tmp []
2544         src__2  = registerName register tmp
2545         sz      = primRepToSize pk
2546         code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2547     in
2548     return code__2
2549
2550 assignFltCode pk dst src
2551   = getRegister dst                         `thenNat` \ register1 ->
2552     getRegister src                         `thenNat` \ register2 ->
2553     let
2554         dst__2  = registerName register1 zeroh
2555         code    = registerCode register2 dst__2
2556         src__2  = registerName register2 dst__2
2557         code__2 = if isFixed register2
2558                   then code . mkSeqInstr (FMOV src__2 dst__2)
2559                   else code
2560     in
2561     return code__2
2562
2563 #endif /* alpha_TARGET_ARCH */
2564
2565 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2566
2567 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2568
2569 -- Floating point assignment to memory
2570 assignMem_FltCode pk addr src = do
2571   (src_reg, src_code) <- getNonClobberedReg src
2572   Amode addr addr_code <- getAmode addr
2573   let
2574         code = src_code `appOL`
2575                addr_code `snocOL`
2576                 IF_ARCH_i386(GST pk src_reg addr,
2577                              MOV pk (OpReg src_reg) (OpAddr addr))
2578   return code
2579
2580 -- Floating point assignment to a register/temporary
2581 assignReg_FltCode pk reg src = do
2582   src_code <- getAnyReg src
2583   return (src_code (getRegisterReg reg))
2584
2585 #endif /* i386_TARGET_ARCH */
2586
2587 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2588
2589 #if sparc_TARGET_ARCH
2590
2591 -- Floating point assignment to memory
2592 assignMem_FltCode pk addr src = do
2593     Amode dst__2 code1 <- getAmode addr
2594     (src__2, code2) <- getSomeReg src
2595     tmp1 <- getNewRegNat pk
2596     let
2597         pk__2   = cmmExprRep src
2598         code__2 = code1 `appOL` code2 `appOL`
2599             if   pk == pk__2
2600             then unitOL (ST pk src__2 dst__2)
2601             else toOL [FxTOy pk__2 pk src__2 tmp1, ST pk tmp1 dst__2]
2602     return code__2
2603
2604 -- Floating point assignment to a register/temporary
2605 -- ToDo: Verify correctness
2606 assignReg_FltCode pk reg src = do
2607     r <- getRegister src
2608     v1 <- getNewRegNat pk
2609     return $ case r of
2610         Any _ code         -> code dst
2611         Fixed _ freg fcode -> fcode `snocOL` FMOV pk freg v1
2612     where
2613       dst = getRegisterReg reg
2614
2615 #endif /* sparc_TARGET_ARCH */
2616
2617 #if powerpc_TARGET_ARCH
2618
2619 -- Easy, isn't it?
2620 assignMem_FltCode = assignMem_IntCode
2621 assignReg_FltCode = assignReg_IntCode
2622
2623 #endif /* powerpc_TARGET_ARCH */
2624
2625
2626 -- -----------------------------------------------------------------------------
2627 -- Generating an non-local jump
2628
2629 -- (If applicable) Do not fill the delay slots here; you will confuse the
2630 -- register allocator.
2631
2632 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
2633
2634 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2635
2636 #if alpha_TARGET_ARCH
2637
2638 genJump (CmmLabel lbl)
2639   | isAsmTemp lbl = returnInstr (BR target)
2640   | otherwise     = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0]
2641   where
2642     target = ImmCLbl lbl
2643
2644 genJump tree
2645   = getRegister tree                `thenNat` \ register ->
2646     getNewRegNat PtrRep             `thenNat` \ tmp ->
2647     let
2648         dst    = registerName register pv
2649         code   = registerCode register pv
2650         target = registerName register pv
2651     in
2652     if isFixed register then
2653         returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0]
2654     else
2655     return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0))
2656
2657 #endif /* alpha_TARGET_ARCH */
2658
2659 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2660
2661 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2662
2663 genJump (CmmLoad mem pk) = do
2664   Amode target code <- getAmode mem
2665   return (code `snocOL` JMP (OpAddr target))
2666
2667 genJump (CmmLit lit) = do
2668   return (unitOL (JMP (OpImm (litToImm lit))))
2669
2670 genJump expr = do
2671   (reg,code) <- getSomeReg expr
2672   return (code `snocOL` JMP (OpReg reg))
2673
2674 #endif /* i386_TARGET_ARCH */
2675
2676 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2677
2678 #if sparc_TARGET_ARCH
2679
2680 genJump (CmmLit (CmmLabel lbl))
2681   = return (toOL [CALL (Left target) 0 True, NOP])
2682   where
2683     target = ImmCLbl lbl
2684
2685 genJump tree
2686   = do
2687         (target, code) <- getSomeReg tree
2688         return (code `snocOL` JMP (AddrRegReg target g0)  `snocOL` NOP)
2689
2690 #endif /* sparc_TARGET_ARCH */
2691
2692 #if powerpc_TARGET_ARCH
2693 genJump (CmmLit (CmmLabel lbl))
2694   = return (unitOL $ JMP lbl)
2695
2696 genJump tree
2697   = do
2698         (target,code) <- getSomeReg tree
2699         return (code `snocOL` MTCTR target `snocOL` BCTR [])
2700 #endif /* powerpc_TARGET_ARCH */
2701
2702
2703 -- -----------------------------------------------------------------------------
2704 --  Unconditional branches
2705
2706 genBranch :: BlockId -> NatM InstrBlock
2707
2708 genBranch = return . toOL . mkBranchInstr
2709
2710 -- -----------------------------------------------------------------------------
2711 --  Conditional jumps
2712
2713 {-
2714 Conditional jumps are always to local labels, so we can use branch
2715 instructions.  We peek at the arguments to decide what kind of
2716 comparison to do.
2717
2718 ALPHA: For comparisons with 0, we're laughing, because we can just do
2719 the desired conditional branch.
2720
2721 I386: First, we have to ensure that the condition
2722 codes are set according to the supplied comparison operation.
2723
2724 SPARC: First, we have to ensure that the condition codes are set
2725 according to the supplied comparison operation.  We generate slightly
2726 different code for floating point comparisons, because a floating
2727 point operation cannot directly precede a @BF@.  We assume the worst
2728 and fill that slot with a @NOP@.
2729
2730 SPARC: Do not fill the delay slots here; you will confuse the register
2731 allocator.
2732 -}
2733
2734
2735 genCondJump
2736     :: BlockId      -- the branch target
2737     -> CmmExpr      -- the condition on which to branch
2738     -> NatM InstrBlock
2739
2740 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2741
2742 #if alpha_TARGET_ARCH
2743
2744 genCondJump id (StPrim op [x, StInt 0])
2745   = getRegister x                           `thenNat` \ register ->
2746     getNewRegNat (registerRep register)
2747                                     `thenNat` \ tmp ->
2748     let
2749         code   = registerCode register tmp
2750         value  = registerName register tmp
2751         pk     = registerRep register
2752         target = ImmCLbl lbl
2753     in
2754     returnSeq code [BI (cmpOp op) value target]
2755   where
2756     cmpOp CharGtOp = GTT
2757     cmpOp CharGeOp = GE
2758     cmpOp CharEqOp = EQQ
2759     cmpOp CharNeOp = NE
2760     cmpOp CharLtOp = LTT
2761     cmpOp CharLeOp = LE
2762     cmpOp IntGtOp = GTT
2763     cmpOp IntGeOp = GE
2764     cmpOp IntEqOp = EQQ
2765     cmpOp IntNeOp = NE
2766     cmpOp IntLtOp = LTT
2767     cmpOp IntLeOp = LE
2768     cmpOp WordGtOp = NE
2769     cmpOp WordGeOp = ALWAYS
2770     cmpOp WordEqOp = EQQ
2771     cmpOp WordNeOp = NE
2772     cmpOp WordLtOp = NEVER
2773     cmpOp WordLeOp = EQQ
2774     cmpOp AddrGtOp = NE
2775     cmpOp AddrGeOp = ALWAYS
2776     cmpOp AddrEqOp = EQQ
2777     cmpOp AddrNeOp = NE
2778     cmpOp AddrLtOp = NEVER
2779     cmpOp AddrLeOp = EQQ
2780
2781 genCondJump lbl (StPrim op [x, StDouble 0.0])
2782   = getRegister x                           `thenNat` \ register ->
2783     getNewRegNat (registerRep register)
2784                                     `thenNat` \ tmp ->
2785     let
2786         code   = registerCode register tmp
2787         value  = registerName register tmp
2788         pk     = registerRep register
2789         target = ImmCLbl lbl
2790     in
2791     return (code . mkSeqInstr (BF (cmpOp op) value target))
2792   where
2793     cmpOp FloatGtOp = GTT
2794     cmpOp FloatGeOp = GE
2795     cmpOp FloatEqOp = EQQ
2796     cmpOp FloatNeOp = NE
2797     cmpOp FloatLtOp = LTT
2798     cmpOp FloatLeOp = LE
2799     cmpOp DoubleGtOp = GTT
2800     cmpOp DoubleGeOp = GE
2801     cmpOp DoubleEqOp = EQQ
2802     cmpOp DoubleNeOp = NE
2803     cmpOp DoubleLtOp = LTT
2804     cmpOp DoubleLeOp = LE
2805
2806 genCondJump lbl (StPrim op [x, y])
2807   | fltCmpOp op
2808   = trivialFCode pr instr x y       `thenNat` \ register ->
2809     getNewRegNat F64                `thenNat` \ tmp ->
2810     let
2811         code   = registerCode register tmp
2812         result = registerName register tmp
2813         target = ImmCLbl lbl
2814     in
2815     return (code . mkSeqInstr (BF cond result target))
2816   where
2817     pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
2818
2819     fltCmpOp op = case op of
2820         FloatGtOp -> True
2821         FloatGeOp -> True
2822         FloatEqOp -> True
2823         FloatNeOp -> True
2824         FloatLtOp -> True
2825         FloatLeOp -> True
2826         DoubleGtOp -> True
2827         DoubleGeOp -> True
2828         DoubleEqOp -> True
2829         DoubleNeOp -> True
2830         DoubleLtOp -> True
2831         DoubleLeOp -> True
2832         _ -> False
2833     (instr, cond) = case op of
2834         FloatGtOp -> (FCMP TF LE, EQQ)
2835         FloatGeOp -> (FCMP TF LTT, EQQ)
2836         FloatEqOp -> (FCMP TF EQQ, NE)
2837         FloatNeOp -> (FCMP TF EQQ, EQQ)
2838         FloatLtOp -> (FCMP TF LTT, NE)
2839         FloatLeOp -> (FCMP TF LE, NE)
2840         DoubleGtOp -> (FCMP TF LE, EQQ)
2841         DoubleGeOp -> (FCMP TF LTT, EQQ)
2842         DoubleEqOp -> (FCMP TF EQQ, NE)
2843         DoubleNeOp -> (FCMP TF EQQ, EQQ)
2844         DoubleLtOp -> (FCMP TF LTT, NE)
2845         DoubleLeOp -> (FCMP TF LE, NE)
2846
2847 genCondJump lbl (StPrim op [x, y])
2848   = trivialCode instr x y           `thenNat` \ register ->
2849     getNewRegNat IntRep             `thenNat` \ tmp ->
2850     let
2851         code   = registerCode register tmp
2852         result = registerName register tmp
2853         target = ImmCLbl lbl
2854     in
2855     return (code . mkSeqInstr (BI cond result target))
2856   where
2857     (instr, cond) = case op of
2858         CharGtOp -> (CMP LE, EQQ)
2859         CharGeOp -> (CMP LTT, EQQ)
2860         CharEqOp -> (CMP EQQ, NE)
2861         CharNeOp -> (CMP EQQ, EQQ)
2862         CharLtOp -> (CMP LTT, NE)
2863         CharLeOp -> (CMP LE, NE)
2864         IntGtOp -> (CMP LE, EQQ)
2865         IntGeOp -> (CMP LTT, EQQ)
2866         IntEqOp -> (CMP EQQ, NE)
2867         IntNeOp -> (CMP EQQ, EQQ)
2868         IntLtOp -> (CMP LTT, NE)
2869         IntLeOp -> (CMP LE, NE)
2870         WordGtOp -> (CMP ULE, EQQ)
2871         WordGeOp -> (CMP ULT, EQQ)
2872         WordEqOp -> (CMP EQQ, NE)
2873         WordNeOp -> (CMP EQQ, EQQ)
2874         WordLtOp -> (CMP ULT, NE)
2875         WordLeOp -> (CMP ULE, NE)
2876         AddrGtOp -> (CMP ULE, EQQ)
2877         AddrGeOp -> (CMP ULT, EQQ)
2878         AddrEqOp -> (CMP EQQ, NE)
2879         AddrNeOp -> (CMP EQQ, EQQ)
2880         AddrLtOp -> (CMP ULT, NE)
2881         AddrLeOp -> (CMP ULE, NE)
2882
2883 #endif /* alpha_TARGET_ARCH */
2884
2885 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2886
2887 #if i386_TARGET_ARCH
2888
2889 genCondJump id bool = do
2890   CondCode _ cond code <- getCondCode bool
2891   return (code `snocOL` JXX cond id)
2892
2893 #endif
2894
2895 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2896
2897 #if x86_64_TARGET_ARCH
2898
2899 genCondJump id bool = do
2900   CondCode is_float cond cond_code <- getCondCode bool
2901   if not is_float
2902     then
2903         return (cond_code `snocOL` JXX cond id)
2904     else do
2905         lbl <- getBlockIdNat
2906
2907         -- see comment with condFltReg
2908         let code = case cond of
2909                         NE  -> or_unordered
2910                         GU  -> plain_test
2911                         GEU -> plain_test
2912                         _   -> and_ordered
2913
2914             plain_test = unitOL (
2915                   JXX cond id
2916                 )
2917             or_unordered = toOL [
2918                   JXX cond id,
2919                   JXX PARITY id
2920                 ]
2921             and_ordered = toOL [
2922                   JXX PARITY lbl,
2923                   JXX cond id,
2924                   JXX ALWAYS lbl,
2925                   NEWBLOCK lbl
2926                 ]
2927         return (cond_code `appOL` code)
2928
2929 #endif
2930
2931 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2932
2933 #if sparc_TARGET_ARCH
2934
2935 genCondJump (BlockId id) bool = do
2936   CondCode is_float cond code <- getCondCode bool
2937   return (
2938        code `appOL`
2939        toOL (
2940          if   is_float
2941          then [NOP, BF cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2942          else [BI cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2943        )
2944     )
2945
2946 #endif /* sparc_TARGET_ARCH */
2947
2948
2949 #if powerpc_TARGET_ARCH
2950
2951 genCondJump id bool = do
2952   CondCode is_float cond code <- getCondCode bool
2953   return (code `snocOL` BCC cond id)
2954
2955 #endif /* powerpc_TARGET_ARCH */
2956
2957
2958 -- -----------------------------------------------------------------------------
2959 --  Generating C calls
2960
2961 -- Now the biggest nightmare---calls.  Most of the nastiness is buried in
2962 -- @get_arg@, which moves the arguments to the correct registers/stack
2963 -- locations.  Apart from that, the code is easy.
2964 --
2965 -- (If applicable) Do not fill the delay slots here; you will confuse the
2966 -- register allocator.
2967
2968 genCCall
2969     :: CmmCallTarget            -- function to call
2970     -> CmmFormals               -- where to put the result
2971     -> CmmActuals               -- arguments (of mixed type)
2972     -> NatM InstrBlock
2973
2974 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2975
2976 #if alpha_TARGET_ARCH
2977
2978 ccallResultRegs =
2979
2980 genCCall fn cconv result_regs args
2981   = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args
2982                           `thenNat` \ ((unused,_), argCode) ->
2983     let
2984         nRegs = length allArgRegs - length unused
2985         code = asmSeqThen (map ($ []) argCode)
2986     in
2987         returnSeq code [
2988             LDA pv (AddrImm (ImmLab (ptext fn))),
2989             JSR ra (AddrReg pv) nRegs,
2990             LDGP gp (AddrReg ra)]
2991   where
2992     ------------------------
2993     {-  Try to get a value into a specific register (or registers) for
2994         a call.  The first 6 arguments go into the appropriate
2995         argument register (separate registers for integer and floating
2996         point arguments, but used in lock-step), and the remaining
2997         arguments are dumped to the stack, beginning at 0(sp).  Our
2998         first argument is a pair of the list of remaining argument
2999         registers to be assigned for this call and the next stack
3000         offset to use for overflowing arguments.  This way,
3001         @get_Arg@ can be applied to all of a call's arguments using
3002         @mapAccumLNat@.
3003     -}
3004     get_arg
3005         :: ([(Reg,Reg)], Int)   -- Argument registers and stack offset (accumulator)
3006         -> StixTree             -- Current argument
3007         -> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code
3008
3009     -- We have to use up all of our argument registers first...
3010
3011     get_arg ((iDst,fDst):dsts, offset) arg
3012       = getRegister arg                     `thenNat` \ register ->
3013         let
3014             reg  = if isFloatingRep pk then fDst else iDst
3015             code = registerCode register reg
3016             src  = registerName register reg
3017             pk   = registerRep register
3018         in
3019         return (
3020             if isFloatingRep pk then
3021                 ((dsts, offset), if isFixed register then
3022                     code . mkSeqInstr (FMOV src fDst)
3023                     else code)
3024             else
3025                 ((dsts, offset), if isFixed register then
3026                     code . mkSeqInstr (OR src (RIReg src) iDst)
3027                     else code))
3028
3029     -- Once we have run out of argument registers, we move to the
3030     -- stack...
3031
3032     get_arg ([], offset) arg
3033       = getRegister arg                 `thenNat` \ register ->
3034         getNewRegNat (registerRep register)
3035                                         `thenNat` \ tmp ->
3036         let
3037             code = registerCode register tmp
3038             src  = registerName register tmp
3039             pk   = registerRep register
3040             sz   = primRepToSize pk
3041         in
3042         return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset)))
3043
3044 #endif /* alpha_TARGET_ARCH */
3045
3046 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3047
3048 #if i386_TARGET_ARCH
3049
3050 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
3051         -- write barrier compiles to no code on x86/x86-64;
3052         -- we keep it this long in order to prevent earlier optimisations.
3053
3054 -- we only cope with a single result for foreign calls
3055 genCCall (CmmPrim op) [CmmKinded r _] args = do
3056   l1 <- getNewLabelNat
3057   l2 <- getNewLabelNat
3058   case op of
3059         MO_F32_Sqrt -> actuallyInlineFloatOp F32  (GSQRT F32) args
3060         MO_F64_Sqrt -> actuallyInlineFloatOp F64 (GSQRT F64) args
3061
3062         MO_F32_Sin  -> actuallyInlineFloatOp F32  (GSIN F32 l1 l2) args
3063         MO_F64_Sin  -> actuallyInlineFloatOp F64 (GSIN F64 l1 l2) args
3064
3065         MO_F32_Cos  -> actuallyInlineFloatOp F32  (GCOS F32 l1 l2) args
3066         MO_F64_Cos  -> actuallyInlineFloatOp F64 (GCOS F64 l1 l2) args
3067
3068         MO_F32_Tan  -> actuallyInlineFloatOp F32  (GTAN F32 l1 l2) args
3069         MO_F64_Tan  -> actuallyInlineFloatOp F64 (GTAN F64 l1 l2) args
3070
3071         other_op    -> outOfLineFloatOp op r args
3072  where
3073   actuallyInlineFloatOp rep instr [CmmKinded x _]
3074         = do res <- trivialUFCode rep instr x
3075              any <- anyReg res
3076              return (any (getRegisterReg (CmmLocal r)))
3077
3078 genCCall target dest_regs args = do
3079     let
3080         sizes               = map (arg_size . cmmExprRep . kindlessCmm) (reverse args)
3081 #if !darwin_TARGET_OS
3082         tot_arg_size        = sum sizes
3083 #else
3084         raw_arg_size        = sum sizes
3085         tot_arg_size        = roundTo 16 raw_arg_size
3086         arg_pad_size        = tot_arg_size - raw_arg_size
3087     delta0 <- getDeltaNat
3088     setDeltaNat (delta0 - arg_pad_size)
3089 #endif
3090
3091     push_codes <- mapM push_arg (reverse args)
3092     delta <- getDeltaNat
3093
3094     -- in
3095     -- deal with static vs dynamic call targets
3096     (callinsns,cconv) <-
3097       case target of
3098         -- CmmPrim -> ...
3099         CmmCallee (CmmLit (CmmLabel lbl)) conv
3100            -> -- ToDo: stdcall arg sizes
3101               return (unitOL (CALL (Left fn_imm) []), conv)
3102            where fn_imm = ImmCLbl lbl
3103         CmmCallee expr conv
3104            -> do (dyn_c, dyn_r, dyn_rep) <- get_op expr
3105                  ASSERT(dyn_rep == I32)
3106                   return (dyn_c `snocOL` CALL (Right dyn_r) [], conv)
3107
3108     let push_code
3109 #if darwin_TARGET_OS
3110             | arg_pad_size /= 0
3111             = toOL [SUB I32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
3112                     DELTA (delta0 - arg_pad_size)]
3113               `appOL` concatOL push_codes
3114             | otherwise
3115 #endif
3116             = concatOL push_codes
3117         call = callinsns `appOL`
3118                toOL (
3119                         -- Deallocate parameters after call for ccall;
3120                         -- but not for stdcall (callee does it)
3121                   (if cconv == StdCallConv || tot_arg_size==0 then [] else
3122                    [ADD I32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
3123                   ++
3124                   [DELTA (delta + tot_arg_size)]
3125                )
3126     -- in
3127     setDeltaNat (delta + tot_arg_size)
3128
3129     let
3130         -- assign the results, if necessary
3131         assign_code []     = nilOL
3132         assign_code [CmmKinded dest _hint] =
3133           case rep of
3134                 I64 -> toOL [MOV I32 (OpReg eax) (OpReg r_dest),
3135                              MOV I32 (OpReg edx) (OpReg r_dest_hi)]
3136                 F32 -> unitOL (GMOV fake0 r_dest)
3137                 F64 -> unitOL (GMOV fake0 r_dest)
3138                 rep -> unitOL (MOV rep (OpReg eax) (OpReg r_dest))
3139           where
3140                 r_dest_hi = getHiVRegFromLo r_dest
3141                 rep = localRegRep dest
3142                 r_dest = getRegisterReg (CmmLocal dest)
3143         assign_code many = panic "genCCall.assign_code many"
3144
3145     return (push_code `appOL`
3146             call `appOL`
3147             assign_code dest_regs)
3148
3149   where
3150     arg_size F64 = 8
3151     arg_size F32 = 4
3152     arg_size I64 = 8
3153     arg_size _   = 4
3154
3155     roundTo a x | x `mod` a == 0 = x
3156                 | otherwise = x + a - (x `mod` a)
3157
3158
3159     push_arg :: (CmmKinded CmmExpr){-current argument-}
3160                     -> NatM InstrBlock  -- code
3161
3162     push_arg (CmmKinded arg _hint) -- we don't need the hints on x86
3163       | arg_rep == I64 = do
3164         ChildCode64 code r_lo <- iselExpr64 arg
3165         delta <- getDeltaNat
3166         setDeltaNat (delta - 8)
3167         let
3168             r_hi = getHiVRegFromLo r_lo
3169         -- in
3170         return (       code `appOL`
3171                        toOL [PUSH I32 (OpReg r_hi), DELTA (delta - 4),
3172                              PUSH I32 (OpReg r_lo), DELTA (delta - 8),
3173                              DELTA (delta-8)]
3174             )
3175
3176       | otherwise = do
3177         (code, reg, sz) <- get_op arg
3178         delta <- getDeltaNat
3179         let size = arg_size sz
3180         setDeltaNat (delta-size)
3181         if (case sz of F64 -> True; F32 -> True; _ -> False)
3182            then return (code `appOL`
3183                         toOL [SUB I32 (OpImm (ImmInt size)) (OpReg esp),
3184                               DELTA (delta-size),
3185                               GST sz reg (AddrBaseIndex (EABaseReg esp)
3186                                                         EAIndexNone
3187                                                         (ImmInt 0))]
3188                        )
3189            else return (code `snocOL`
3190                         PUSH I32 (OpReg reg) `snocOL`
3191                         DELTA (delta-size)
3192                        )
3193       where
3194          arg_rep = cmmExprRep arg
3195
3196     ------------
3197     get_op :: CmmExpr -> NatM (InstrBlock, Reg, MachRep) -- code, reg, size
3198     get_op op = do
3199         (reg,code) <- getSomeReg op
3200         return (code, reg, cmmExprRep op)
3201
3202 #endif /* i386_TARGET_ARCH */
3203
3204 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3205
3206 outOfLineFloatOp :: CallishMachOp -> CmmFormalWithoutKind -> CmmActuals
3207   -> NatM InstrBlock
3208 outOfLineFloatOp mop res args
3209   = do
3210       dflags <- getDynFlagsNat
3211       targetExpr <- cmmMakeDynamicReference dflags addImportNat CallReference lbl
3212       let target = CmmCallee targetExpr CCallConv
3213
3214       if localRegRep res == F64
3215         then
3216           stmtToInstrs (CmmCall target [CmmKinded res FloatHint] args CmmUnsafe CmmMayReturn)
3217         else do
3218           uq <- getUniqueNat
3219           let
3220             tmp = LocalReg uq F64 GCKindNonPtr
3221           -- in
3222           code1 <- stmtToInstrs (CmmCall target [CmmKinded tmp FloatHint] args CmmUnsafe CmmMayReturn)
3223           code2 <- stmtToInstrs (CmmAssign (CmmLocal res) (CmmReg (CmmLocal tmp)))
3224           return (code1 `appOL` code2)
3225   where
3226         lbl = mkForeignLabel fn Nothing False
3227
3228         fn = case mop of
3229               MO_F32_Sqrt  -> fsLit "sqrtf"
3230               MO_F32_Sin   -> fsLit "sinf"
3231               MO_F32_Cos   -> fsLit "cosf"
3232               MO_F32_Tan   -> fsLit "tanf"
3233               MO_F32_Exp   -> fsLit "expf"
3234               MO_F32_Log   -> fsLit "logf"
3235
3236               MO_F32_Asin  -> fsLit "asinf"
3237               MO_F32_Acos  -> fsLit "acosf"
3238               MO_F32_Atan  -> fsLit "atanf"
3239
3240               MO_F32_Sinh  -> fsLit "sinhf"
3241               MO_F32_Cosh  -> fsLit "coshf"
3242               MO_F32_Tanh  -> fsLit "tanhf"
3243               MO_F32_Pwr   -> fsLit "powf"
3244
3245               MO_F64_Sqrt  -> fsLit "sqrt"
3246               MO_F64_Sin   -> fsLit "sin"
3247               MO_F64_Cos   -> fsLit "cos"
3248               MO_F64_Tan   -> fsLit "tan"
3249               MO_F64_Exp   -> fsLit "exp"
3250               MO_F64_Log   -> fsLit "log"
3251
3252               MO_F64_Asin  -> fsLit "asin"
3253               MO_F64_Acos  -> fsLit "acos"
3254               MO_F64_Atan  -> fsLit "atan"
3255
3256               MO_F64_Sinh  -> fsLit "sinh"
3257               MO_F64_Cosh  -> fsLit "cosh"
3258               MO_F64_Tanh  -> fsLit "tanh"
3259               MO_F64_Pwr   -> fsLit "pow"
3260
3261 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
3262
3263 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3264
3265 #if x86_64_TARGET_ARCH
3266
3267 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
3268         -- write barrier compiles to no code on x86/x86-64;
3269         -- we keep it this long in order to prevent earlier optimisations.
3270
3271
3272 genCCall (CmmPrim op) [CmmKinded r _] args =
3273   outOfLineFloatOp op r args
3274
3275 genCCall target dest_regs args = do
3276
3277         -- load up the register arguments
3278     (stack_args, aregs, fregs, load_args_code)
3279          <- load_args args allArgRegs allFPArgRegs nilOL
3280
3281     let
3282         fp_regs_used  = reverse (drop (length fregs) (reverse allFPArgRegs))
3283         int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
3284         arg_regs = [eax] ++ int_regs_used ++ fp_regs_used
3285                 -- for annotating the call instruction with
3286
3287         sse_regs = length fp_regs_used
3288
3289         tot_arg_size = arg_size * length stack_args
3290
3291         -- On entry to the called function, %rsp should be aligned
3292         -- on a 16-byte boundary +8 (i.e. the first stack arg after
3293         -- the return address is 16-byte aligned).  In STG land
3294         -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
3295         -- need to make sure we push a multiple of 16-bytes of args,
3296         -- plus the return address, to get the correct alignment.
3297         -- Urg, this is hard.  We need to feed the delta back into
3298         -- the arg pushing code.
3299     (real_size, adjust_rsp) <-
3300         if tot_arg_size `rem` 16 == 0
3301             then return (tot_arg_size, nilOL)
3302             else do -- we need to adjust...
3303                 delta <- getDeltaNat
3304                 setDeltaNat (delta-8)
3305                 return (tot_arg_size+8, toOL [
3306                                 SUB I64 (OpImm (ImmInt 8)) (OpReg rsp),
3307                                 DELTA (delta-8)
3308                         ])
3309
3310         -- push the stack args, right to left
3311     push_code <- push_args (reverse stack_args) nilOL
3312     delta <- getDeltaNat
3313
3314     -- deal with static vs dynamic call targets
3315     (callinsns,cconv) <-
3316       case target of
3317         -- CmmPrim -> ...
3318         CmmCallee (CmmLit (CmmLabel lbl)) conv
3319            -> -- ToDo: stdcall arg sizes
3320               return (unitOL (CALL (Left fn_imm) arg_regs), conv)
3321            where fn_imm = ImmCLbl lbl
3322         CmmCallee expr conv
3323            -> do (dyn_r, dyn_c) <- getSomeReg expr
3324                  return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
3325
3326     let
3327         -- The x86_64 ABI requires us to set %al to the number of SSE
3328         -- registers that contain arguments, if the called routine
3329         -- is a varargs function.  We don't know whether it's a
3330         -- varargs function or not, so we have to assume it is.
3331         --
3332         -- It's not safe to omit this assignment, even if the number
3333         -- of SSE regs in use is zero.  If %al is larger than 8
3334         -- on entry to a varargs function, seg faults ensue.
3335         assign_eax n = unitOL (MOV I32 (OpImm (ImmInt n)) (OpReg eax))
3336
3337     let call = callinsns `appOL`
3338                toOL (
3339                         -- Deallocate parameters after call for ccall;
3340                         -- but not for stdcall (callee does it)
3341                   (if cconv == StdCallConv || real_size==0 then [] else
3342                    [ADD wordRep (OpImm (ImmInt real_size)) (OpReg esp)])
3343                   ++
3344                   [DELTA (delta + real_size)]
3345                )
3346     -- in
3347     setDeltaNat (delta + real_size)
3348
3349     let
3350         -- assign the results, if necessary
3351         assign_code []     = nilOL
3352         assign_code [CmmKinded dest _hint] =
3353           case rep of
3354                 F32 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3355                 F64 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3356                 rep -> unitOL (MOV rep (OpReg rax) (OpReg r_dest))
3357           where
3358                 rep = localRegRep dest
3359                 r_dest = getRegisterReg (CmmLocal dest)
3360         assign_code many = panic "genCCall.assign_code many"
3361
3362     return (load_args_code      `appOL`
3363             adjust_rsp          `appOL`
3364             push_code           `appOL`
3365             assign_eax sse_regs `appOL`
3366             call                `appOL`
3367             assign_code dest_regs)
3368
3369   where
3370     arg_size = 8 -- always, at the mo
3371
3372     load_args :: [CmmKinded CmmExpr]
3373               -> [Reg]                  -- int regs avail for args
3374               -> [Reg]                  -- FP regs avail for args
3375               -> InstrBlock
3376               -> NatM ([CmmKinded CmmExpr],[Reg],[Reg],InstrBlock)
3377     load_args args [] [] code     =  return (args, [], [], code)
3378         -- no more regs to use
3379     load_args [] aregs fregs code =  return ([], aregs, fregs, code)
3380         -- no more args to push
3381     load_args ((CmmKinded arg hint) : rest) aregs fregs code
3382         | isFloatingRep arg_rep =
3383         case fregs of
3384           [] -> push_this_arg
3385           (r:rs) -> do
3386              arg_code <- getAnyReg arg
3387              load_args rest aregs rs (code `appOL` arg_code r)
3388         | otherwise =
3389         case aregs of
3390           [] -> push_this_arg
3391           (r:rs) -> do
3392              arg_code <- getAnyReg arg
3393              load_args rest rs fregs (code `appOL` arg_code r)
3394         where
3395           arg_rep = cmmExprRep arg
3396
3397           push_this_arg = do
3398             (args',ars,frs,code') <- load_args rest aregs fregs code
3399             return ((CmmKinded arg hint):args', ars, frs, code')
3400
3401     push_args [] code = return code
3402     push_args ((CmmKinded arg hint):rest) code
3403        | isFloatingRep arg_rep = do
3404          (arg_reg, arg_code) <- getSomeReg arg
3405          delta <- getDeltaNat
3406          setDeltaNat (delta-arg_size)
3407          let code' = code `appOL` arg_code `appOL` toOL [
3408                         SUB wordRep (OpImm (ImmInt arg_size)) (OpReg rsp) ,
3409                         DELTA (delta-arg_size),
3410                         MOV arg_rep (OpReg arg_reg) (OpAddr  (spRel 0))]
3411          push_args rest code'
3412
3413        | otherwise = do
3414        -- we only ever generate word-sized function arguments.  Promotion
3415        -- has already happened: our Int8# type is kept sign-extended
3416        -- in an Int#, for example.
3417          ASSERT(arg_rep == I64) return ()
3418          (arg_op, arg_code) <- getOperand arg
3419          delta <- getDeltaNat
3420          setDeltaNat (delta-arg_size)
3421          let code' = code `appOL` toOL [PUSH I64 arg_op,
3422                                         DELTA (delta-arg_size)]
3423          push_args rest code'
3424         where
3425           arg_rep = cmmExprRep arg
3426 #endif
3427
3428 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3429
3430 #if sparc_TARGET_ARCH
3431 {-
3432    The SPARC calling convention is an absolute
3433    nightmare.  The first 6x32 bits of arguments are mapped into
3434    %o0 through %o5, and the remaining arguments are dumped to the
3435    stack, beginning at [%sp+92].  (Note that %o6 == %sp.)
3436
3437    If we have to put args on the stack, move %o6==%sp down by
3438    the number of words to go on the stack, to ensure there's enough space.
3439
3440    According to Fraser and Hanson's lcc book, page 478, fig 17.2,
3441    16 words above the stack pointer is a word for the address of
3442    a structure return value.  I use this as a temporary location
3443    for moving values from float to int regs.  Certainly it isn't
3444    safe to put anything in the 16 words starting at %sp, since
3445    this area can get trashed at any time due to window overflows
3446    caused by signal handlers.
3447
3448    A final complication (if the above isn't enough) is that
3449    we can't blithely calculate the arguments one by one into
3450    %o0 .. %o5.  Consider the following nested calls:
3451
3452        fff a (fff b c)
3453
3454    Naive code moves a into %o0, and (fff b c) into %o1.  Unfortunately
3455    the inner call will itself use %o0, which trashes the value put there
3456    in preparation for the outer call.  Upshot: we need to calculate the
3457    args into temporary regs, and move those to arg regs or onto the
3458    stack only immediately prior to the call proper.  Sigh.
3459 -}
3460
3461 genCCall target dest_regs argsAndHints = do
3462     let
3463         args = map kindlessCmm argsAndHints
3464     argcode_and_vregs <- mapM arg_to_int_vregs args
3465     let
3466         (argcodes, vregss) = unzip argcode_and_vregs
3467         n_argRegs          = length allArgRegs
3468         n_argRegs_used     = min (length vregs) n_argRegs
3469         vregs              = concat vregss
3470     -- deal with static vs dynamic call targets
3471     callinsns <- (case target of
3472         CmmCallee (CmmLit (CmmLabel lbl)) conv -> do
3473                 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3474         CmmCallee expr conv -> do
3475                 (dyn_c, [dyn_r]) <- arg_to_int_vregs expr
3476                 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3477         CmmPrim mop -> do
3478                   (res, reduce) <- outOfLineFloatOp mop
3479                   lblOrMopExpr <- case res of
3480                        Left lbl -> do
3481                             return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3482                        Right mopExpr -> do
3483                             (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
3484                             return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3485                   if reduce then panic "genCCall(sparc): can not reduce" else return lblOrMopExpr
3486
3487       )
3488     let
3489         argcode = concatOL argcodes
3490         (move_sp_down, move_sp_up)
3491            = let diff = length vregs - n_argRegs
3492                  nn   = if odd diff then diff + 1 else diff -- keep 8-byte alignment
3493              in  if   nn <= 0
3494                  then (nilOL, nilOL)
3495                  else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
3496         transfer_code
3497            = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
3498     return (argcode       `appOL`
3499             move_sp_down  `appOL`
3500             transfer_code `appOL`
3501             callinsns     `appOL`
3502             unitOL NOP    `appOL`
3503             move_sp_up)
3504   where
3505      -- move args from the integer vregs into which they have been
3506      -- marshalled, into %o0 .. %o5, and the rest onto the stack.
3507      move_final :: [Reg] -> [Reg] -> Int -> [Instr]
3508
3509      move_final [] _ offset          -- all args done
3510         = []
3511
3512      move_final (v:vs) [] offset     -- out of aregs; move to stack
3513         = ST I32 v (spRel offset)
3514           : move_final vs [] (offset+1)
3515
3516      move_final (v:vs) (a:az) offset -- move into an arg (%o[0..5]) reg
3517         = OR False g0 (RIReg v) a
3518           : move_final vs az offset
3519
3520      -- generate code to calculate an argument, and move it into one
3521      -- or two integer vregs.
3522      arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
3523      arg_to_int_vregs arg
3524         | (cmmExprRep arg) == I64
3525         = do
3526           (ChildCode64 code r_lo) <- iselExpr64 arg
3527           let
3528               r_hi = getHiVRegFromLo r_lo
3529           return (code, [r_hi, r_lo])
3530         | otherwise
3531         = do
3532           (src, code) <- getSomeReg arg
3533           tmp <- getNewRegNat (cmmExprRep arg)
3534           let
3535               pk   = cmmExprRep arg
3536           case pk of
3537              F64 -> do
3538                       v1 <- getNewRegNat I32
3539                       v2 <- getNewRegNat I32
3540                       return (
3541                         code                          `snocOL`
3542                         FMOV F64 src f0                `snocOL`
3543                         ST   F32  f0 (spRel 16)         `snocOL`
3544                         LD   I32  (spRel 16) v1         `snocOL`
3545                         ST   F32  (fPair f0) (spRel 16) `snocOL`
3546                         LD   I32  (spRel 16) v2
3547                         ,
3548                         [v1,v2]
3549                        )
3550              F32 -> do
3551                       v1 <- getNewRegNat I32
3552                       return (
3553                         code                    `snocOL`
3554                         ST   F32  src (spRel 16)  `snocOL`
3555                         LD   I32  (spRel 16) v1
3556                         ,
3557                         [v1]
3558                        )
3559              other -> do
3560                         v1 <- getNewRegNat I32
3561                         return (
3562                           code `snocOL` OR False g0 (RIReg src) v1
3563                           ,
3564                           [v1]
3565                          )
3566 outOfLineFloatOp mop =
3567     do
3568       dflags <- getDynFlagsNat
3569       mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference $
3570                   mkForeignLabel functionName Nothing True
3571       let mopLabelOrExpr = case mopExpr of
3572                         CmmLit (CmmLabel lbl) -> Left lbl
3573                         _ -> Right mopExpr
3574       return (mopLabelOrExpr, reduce)
3575             where
3576                 (reduce, functionName) = case mop of
3577                   MO_F32_Exp    -> (True,  fsLit "exp")
3578                   MO_F32_Log    -> (True,  fsLit "log")
3579                   MO_F32_Sqrt   -> (True,  fsLit "sqrt")
3580
3581                   MO_F32_Sin    -> (True,  fsLit "sin")
3582                   MO_F32_Cos    -> (True,  fsLit "cos")
3583                   MO_F32_Tan    -> (True,  fsLit "tan")
3584
3585                   MO_F32_Asin   -> (True,  fsLit "asin")
3586                   MO_F32_Acos   -> (True,  fsLit "acos")
3587                   MO_F32_Atan   -> (True,  fsLit "atan")
3588
3589                   MO_F32_Sinh   -> (True,  fsLit "sinh")
3590                   MO_F32_Cosh   -> (True,  fsLit "cosh")
3591                   MO_F32_Tanh   -> (True,  fsLit "tanh")
3592
3593                   MO_F64_Exp    -> (False, fsLit "exp")
3594                   MO_F64_Log    -> (False, fsLit "log")
3595                   MO_F64_Sqrt   -> (False, fsLit "sqrt")
3596
3597                   MO_F64_Sin    -> (False, fsLit "sin")
3598                   MO_F64_Cos    -> (False, fsLit "cos")
3599                   MO_F64_Tan    -> (False, fsLit "tan")
3600
3601                   MO_F64_Asin   -> (False, fsLit "asin")
3602                   MO_F64_Acos   -> (False, fsLit "acos")
3603                   MO_F64_Atan   -> (False, fsLit "atan")
3604
3605                   MO_F64_Sinh   -> (False, fsLit "sinh")
3606                   MO_F64_Cosh   -> (False, fsLit "cosh")
3607                   MO_F64_Tanh   -> (False, fsLit "tanh")
3608
3609                   other -> pprPanic "outOfLineFloatOp(sparc) "
3610                                 (pprCallishMachOp mop)
3611
3612 #endif /* sparc_TARGET_ARCH */
3613
3614 #if powerpc_TARGET_ARCH
3615
3616 #if darwin_TARGET_OS || linux_TARGET_OS
3617 {-
3618     The PowerPC calling convention for Darwin/Mac OS X
3619     is described in Apple's document
3620     "Inside Mac OS X - Mach-O Runtime Architecture".
3621
3622     PowerPC Linux uses the System V Release 4 Calling Convention
3623     for PowerPC. It is described in the
3624     "System V Application Binary Interface PowerPC Processor Supplement".
3625
3626     Both conventions are similar:
3627     Parameters may be passed in general-purpose registers starting at r3, in
3628     floating point registers starting at f1, or on the stack.
3629
3630     But there are substantial differences:
3631     * The number of registers used for parameter passing and the exact set of
3632       nonvolatile registers differs (see MachRegs.lhs).
3633     * On Darwin, stack space is always reserved for parameters, even if they are
3634       passed in registers. The called routine may choose to save parameters from
3635       registers to the corresponding space on the stack.
3636     * On Darwin, a corresponding amount of GPRs is skipped when a floating point
3637       parameter is passed in an FPR.
3638     * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
3639       starting with an odd-numbered GPR. It may skip a GPR to achieve this.
3640       Darwin just treats an I64 like two separate I32s (high word first).
3641     * I64 and F64 arguments are 8-byte aligned on the stack for SysV, but only
3642       4-byte aligned like everything else on Darwin.
3643     * The SysV spec claims that F32 is represented as F64 on the stack. GCC on
3644       PowerPC Linux does not agree, so neither do we.
3645
3646     According to both conventions, The parameter area should be part of the
3647     caller's stack frame, allocated in the caller's prologue code (large enough
3648     to hold the parameter lists for all called routines). The NCG already
3649     uses the stack for register spilling, leaving 64 bytes free at the top.
3650     If we need a larger parameter area than that, we just allocate a new stack
3651     frame just before ccalling.
3652 -}
3653
3654
3655 genCCall (CmmPrim MO_WriteBarrier) _ _
3656  = return $ unitOL LWSYNC
3657
3658 genCCall target dest_regs argsAndHints
3659   = ASSERT (not $ any (`elem` [I8,I16]) argReps)
3660         -- we rely on argument promotion in the codeGen
3661     do
3662         (finalStack,passArgumentsCode,usedRegs) <- passArguments
3663                                                         (zip args argReps)
3664                                                         allArgRegs allFPArgRegs
3665                                                         initialStackOffset
3666                                                         (toOL []) []
3667
3668         (labelOrExpr, reduceToF32) <- case target of
3669             CmmCallee (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
3670             CmmCallee expr conv -> return  (Right expr, False)
3671             CmmPrim mop -> outOfLineFloatOp mop
3672
3673         let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
3674             codeAfter = move_sp_up finalStack `appOL` moveResult reduceToF32
3675
3676         case labelOrExpr of
3677             Left lbl -> do
3678                 return (         codeBefore
3679                         `snocOL` BL lbl usedRegs
3680                         `appOL`  codeAfter)
3681             Right dyn -> do
3682                 (dynReg, dynCode) <- getSomeReg dyn
3683                 return (         dynCode
3684                         `snocOL` MTCTR dynReg
3685                         `appOL`  codeBefore
3686                         `snocOL` BCTRL usedRegs
3687                         `appOL`  codeAfter)
3688     where
3689 #if darwin_TARGET_OS
3690         initialStackOffset = 24
3691             -- size of linkage area + size of arguments, in bytes
3692         stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
3693                                        map machRepByteWidth argReps
3694 #elif linux_TARGET_OS
3695         initialStackOffset = 8
3696         stackDelta finalStack = roundTo 16 finalStack
3697 #endif
3698         args = map kindlessCmm argsAndHints
3699         argReps = map cmmExprRep args
3700
3701         roundTo a x | x `mod` a == 0 = x
3702                     | otherwise = x + a - (x `mod` a)
3703
3704         move_sp_down finalStack
3705                | delta > 64 =
3706                         toOL [STU I32 sp (AddrRegImm sp (ImmInt (-delta))),
3707                               DELTA (-delta)]
3708                | otherwise = nilOL
3709                where delta = stackDelta finalStack
3710         move_sp_up finalStack
3711                | delta > 64 =
3712                         toOL [ADD sp sp (RIImm (ImmInt delta)),
3713                               DELTA 0]
3714                | otherwise = nilOL
3715                where delta = stackDelta finalStack
3716
3717
3718         passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
3719         passArguments ((arg,I64):args) gprs fprs stackOffset
3720                accumCode accumUsed =
3721             do
3722                 ChildCode64 code vr_lo <- iselExpr64 arg
3723                 let vr_hi = getHiVRegFromLo vr_lo
3724
3725 #if darwin_TARGET_OS
3726                 passArguments args
3727                               (drop 2 gprs)
3728                               fprs
3729                               (stackOffset+8)
3730                               (accumCode `appOL` code
3731                                     `snocOL` storeWord vr_hi gprs stackOffset
3732                                     `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
3733                               ((take 2 gprs) ++ accumUsed)
3734             where
3735                 storeWord vr (gpr:_) offset = MR gpr vr
3736                 storeWord vr [] offset = ST I32 vr (AddrRegImm sp (ImmInt offset))
3737
3738 #elif linux_TARGET_OS
3739                 let stackOffset' = roundTo 8 stackOffset
3740                     stackCode = accumCode `appOL` code
3741                         `snocOL` ST I32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
3742                         `snocOL` ST I32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
3743                     regCode hireg loreg =
3744                         accumCode `appOL` code
3745                             `snocOL` MR hireg vr_hi
3746                             `snocOL` MR loreg vr_lo
3747
3748                 case gprs of
3749                     hireg : loreg : regs | even (length gprs) ->
3750                         passArguments args regs fprs stackOffset
3751                                       (regCode hireg loreg) (hireg : loreg : accumUsed)
3752                     _skipped : hireg : loreg : regs ->
3753                         passArguments args regs fprs stackOffset
3754                                       (regCode hireg loreg) (hireg : loreg : accumUsed)
3755                     _ -> -- only one or no regs left
3756                         passArguments args [] fprs (stackOffset'+8)
3757                                       stackCode accumUsed
3758 #endif
3759
3760         passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
3761             | reg : _ <- regs = do
3762                 register <- getRegister arg
3763                 let code = case register of
3764                             Fixed _ freg fcode -> fcode `snocOL` MR reg freg
3765                             Any _ acode -> acode reg
3766                 passArguments args
3767                               (drop nGprs gprs)
3768                               (drop nFprs fprs)
3769 #if darwin_TARGET_OS
3770         -- The Darwin ABI requires that we reserve stack slots for register parameters
3771                               (stackOffset + stackBytes)
3772 #elif linux_TARGET_OS
3773         -- ... the SysV ABI doesn't.
3774                               stackOffset
3775 #endif
3776                               (accumCode `appOL` code)
3777                               (reg : accumUsed)
3778             | otherwise = do
3779                 (vr, code) <- getSomeReg arg
3780                 passArguments args
3781                               (drop nGprs gprs)
3782                               (drop nFprs fprs)
3783                               (stackOffset' + stackBytes)
3784                               (accumCode `appOL` code `snocOL` ST rep vr stackSlot)
3785                               accumUsed
3786             where
3787 #if darwin_TARGET_OS
3788         -- stackOffset is at least 4-byte aligned
3789         -- The Darwin ABI is happy with that.
3790                 stackOffset' = stackOffset
3791 #else
3792         -- ... the SysV ABI requires 8-byte alignment for doubles.
3793                 stackOffset' | rep == F64 = roundTo 8 stackOffset
3794                              | otherwise  =           stackOffset
3795 #endif
3796                 stackSlot = AddrRegImm sp (ImmInt stackOffset')
3797                 (nGprs, nFprs, stackBytes, regs) = case rep of
3798                     I32 -> (1, 0, 4, gprs)
3799 #if darwin_TARGET_OS
3800         -- The Darwin ABI requires that we skip a corresponding number of GPRs when
3801         -- we use the FPRs.
3802                     F32 -> (1, 1, 4, fprs)
3803                     F64 -> (2, 1, 8, fprs)
3804 #elif linux_TARGET_OS
3805         -- ... the SysV ABI doesn't.
3806                     F32 -> (0, 1, 4, fprs)
3807                     F64 -> (0, 1, 8, fprs)
3808 #endif
3809
3810         moveResult reduceToF32 =
3811             case dest_regs of
3812                 [] -> nilOL
3813                 [CmmKinded dest _hint]
3814                     | reduceToF32 && rep == F32 -> unitOL (FRSP r_dest f1)
3815                     | rep == F32 || rep == F64 -> unitOL (MR r_dest f1)
3816                     | rep == I64 -> toOL [MR (getHiVRegFromLo r_dest) r3,
3817                                           MR r_dest r4]
3818                     | otherwise -> unitOL (MR r_dest r3)
3819                     where rep = cmmRegRep (CmmLocal dest)
3820                           r_dest = getRegisterReg (CmmLocal dest)
3821
3822         outOfLineFloatOp mop =
3823             do
3824                 dflags <- getDynFlagsNat
3825                 mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference $
3826                               mkForeignLabel functionName Nothing True
3827                 let mopLabelOrExpr = case mopExpr of
3828                         CmmLit (CmmLabel lbl) -> Left lbl
3829                         _ -> Right mopExpr
3830                 return (mopLabelOrExpr, reduce)
3831             where
3832                 (functionName, reduce) = case mop of
3833                     MO_F32_Exp   -> (fsLit "exp", True)
3834                     MO_F32_Log   -> (fsLit "log", True)
3835                     MO_F32_Sqrt  -> (fsLit "sqrt", True)
3836
3837                     MO_F32_Sin   -> (fsLit "sin", True)
3838                     MO_F32_Cos   -> (fsLit "cos", True)
3839                     MO_F32_Tan   -> (fsLit "tan", True)
3840
3841                     MO_F32_Asin  -> (fsLit "asin", True)
3842                     MO_F32_Acos  -> (fsLit "acos", True)
3843                     MO_F32_Atan  -> (fsLit "atan", True)
3844
3845                     MO_F32_Sinh  -> (fsLit "sinh", True)
3846                     MO_F32_Cosh  -> (fsLit "cosh", True)
3847                     MO_F32_Tanh  -> (fsLit "tanh", True)
3848                     MO_F32_Pwr   -> (fsLit "pow", True)
3849
3850                     MO_F64_Exp   -> (fsLit "exp", False)
3851                     MO_F64_Log   -> (fsLit "log", False)
3852                     MO_F64_Sqrt  -> (fsLit "sqrt", False)
3853
3854                     MO_F64_Sin   -> (fsLit "sin", False)
3855                     MO_F64_Cos   -> (fsLit "cos", False)
3856                     MO_F64_Tan   -> (fsLit "tan", False)
3857
3858                     MO_F64_Asin  -> (fsLit "asin", False)
3859                     MO_F64_Acos  -> (fsLit "acos", False)
3860                     MO_F64_Atan  -> (fsLit "atan", False)
3861
3862                     MO_F64_Sinh  -> (fsLit "sinh", False)
3863                     MO_F64_Cosh  -> (fsLit "cosh", False)
3864                     MO_F64_Tanh  -> (fsLit "tanh", False)
3865                     MO_F64_Pwr   -> (fsLit "pow", False)
3866                     other -> pprPanic "genCCall(ppc): unknown callish op"
3867                                     (pprCallishMachOp other)
3868
3869 #endif /* darwin_TARGET_OS || linux_TARGET_OS */
3870
3871 #endif /* powerpc_TARGET_ARCH */
3872
3873
3874 -- -----------------------------------------------------------------------------
3875 -- Generating a table-branch
3876
3877 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
3878
3879 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3880 genSwitch expr ids
3881   | opt_PIC
3882   = do
3883         (reg,e_code) <- getSomeReg expr
3884         lbl <- getNewLabelNat
3885         dflags <- getDynFlagsNat
3886         dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
3887         (tableReg,t_code) <- getSomeReg $ dynRef
3888         let
3889             jumpTable = map jumpTableEntryRel ids
3890
3891             jumpTableEntryRel Nothing
3892                 = CmmStaticLit (CmmInt 0 wordRep)
3893             jumpTableEntryRel (Just (BlockId id))
3894                 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3895                 where blockLabel = mkAsmTempLabel id
3896
3897             op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
3898                                        (EAIndex reg wORD_SIZE) (ImmInt 0))
3899
3900 #if x86_64_TARGET_ARCH
3901 #if darwin_TARGET_OS
3902     -- on Mac OS X/x86_64, put the jump table in the text section
3903     -- to work around a limitation of the linker.
3904     -- ld64 is unable to handle the relocations for
3905     --     .quad L1 - L0
3906     -- if L0 is not preceded by a non-anonymous label in its section.
3907
3908             code = e_code `appOL` t_code `appOL` toOL [
3909                             ADD wordRep op (OpReg tableReg),
3910                             JMP_TBL (OpReg tableReg) [ id | Just id <- ids ],
3911                             LDATA Text (CmmDataLabel lbl : jumpTable)
3912                     ]
3913 #else
3914     -- HACK: On x86_64 binutils<2.17 is only able to generate PC32
3915     -- relocations, hence we only get 32-bit offsets in the jump
3916     -- table. As these offsets are always negative we need to properly
3917     -- sign extend them to 64-bit. This hack should be removed in
3918     -- conjunction with the hack in PprMach.hs/pprDataItem once
3919     -- binutils 2.17 is standard.
3920             code = e_code `appOL` t_code `appOL` toOL [
3921                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3922                             MOVSxL I32
3923                                    (OpAddr (AddrBaseIndex (EABaseReg tableReg)
3924                                                           (EAIndex reg wORD_SIZE) (ImmInt 0)))
3925                                    (OpReg reg),
3926                             ADD wordRep (OpReg reg) (OpReg tableReg),
3927                             JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
3928                    ]
3929 #endif
3930 #else
3931             code = e_code `appOL` t_code `appOL` toOL [
3932                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3933                             ADD wordRep op (OpReg tableReg),
3934                             JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
3935                     ]
3936 #endif
3937         return code
3938   | otherwise
3939   = do
3940         (reg,e_code) <- getSomeReg expr
3941         lbl <- getNewLabelNat
3942         let
3943             jumpTable = map jumpTableEntry ids
3944             op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
3945             code = e_code `appOL` toOL [
3946                     LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3947                     JMP_TBL op [ id | Just id <- ids ]
3948                  ]
3949         -- in
3950         return code
3951 #elif powerpc_TARGET_ARCH
3952 genSwitch expr ids
3953   | opt_PIC
3954   = do
3955         (reg,e_code) <- getSomeReg expr
3956         tmp <- getNewRegNat I32
3957         lbl <- getNewLabelNat
3958         dflags <- getDynFlagsNat
3959         dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
3960         (tableReg,t_code) <- getSomeReg $ dynRef
3961         let
3962             jumpTable = map jumpTableEntryRel ids
3963
3964             jumpTableEntryRel Nothing
3965                 = CmmStaticLit (CmmInt 0 wordRep)
3966             jumpTableEntryRel (Just (BlockId id))
3967                 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3968                 where blockLabel = mkAsmTempLabel id
3969
3970             code = e_code `appOL` t_code `appOL` toOL [
3971                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3972                             SLW tmp reg (RIImm (ImmInt 2)),
3973                             LD I32 tmp (AddrRegReg tableReg tmp),
3974                             ADD tmp tmp (RIReg tableReg),
3975                             MTCTR tmp,
3976                             BCTR [ id | Just id <- ids ]
3977                     ]
3978         return code
3979   | otherwise
3980   = do
3981         (reg,e_code) <- getSomeReg expr
3982         tmp <- getNewRegNat I32
3983         lbl <- getNewLabelNat
3984         let
3985             jumpTable = map jumpTableEntry ids
3986
3987             code = e_code `appOL` toOL [
3988                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3989                             SLW tmp reg (RIImm (ImmInt 2)),
3990                             ADDIS tmp tmp (HA (ImmCLbl lbl)),
3991                             LD I32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
3992                             MTCTR tmp,
3993                             BCTR [ id | Just id <- ids ]
3994                     ]
3995         return code
3996 #else
3997 #error "ToDo: genSwitch"
3998 #endif
3999
4000 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordRep)
4001 jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
4002     where blockLabel = mkAsmTempLabel id
4003
4004 -- -----------------------------------------------------------------------------
4005 -- Support bits
4006 -- -----------------------------------------------------------------------------
4007
4008
4009 -- -----------------------------------------------------------------------------
4010 -- 'condIntReg' and 'condFltReg': condition codes into registers
4011
4012 -- Turn those condition codes into integers now (when they appear on
4013 -- the right hand side of an assignment).
4014 --
4015 -- (If applicable) Do not fill the delay slots here; you will confuse the
4016 -- register allocator.
4017
4018 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
4019
4020 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4021
4022 #if alpha_TARGET_ARCH
4023 condIntReg = panic "MachCode.condIntReg (not on Alpha)"
4024 condFltReg = panic "MachCode.condFltReg (not on Alpha)"
4025 #endif /* alpha_TARGET_ARCH */
4026
4027 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4028
4029 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4030
4031 condIntReg cond x y = do
4032   CondCode _ cond cond_code <- condIntCode cond x y
4033   tmp <- getNewRegNat I8
4034   let
4035         code dst = cond_code `appOL` toOL [
4036                     SETCC cond (OpReg tmp),
4037                     MOVZxL I8 (OpReg tmp) (OpReg dst)
4038                   ]
4039   -- in
4040   return (Any I32 code)
4041
4042 #endif
4043
4044 #if i386_TARGET_ARCH
4045
4046 condFltReg cond x y = do
4047   CondCode _ cond cond_code <- condFltCode cond x y
4048   tmp <- getNewRegNat I8
4049   let
4050         code dst = cond_code `appOL` toOL [
4051                     SETCC cond (OpReg tmp),
4052                     MOVZxL I8 (OpReg tmp) (OpReg dst)
4053                   ]
4054   -- in
4055   return (Any I32 code)
4056
4057 #endif
4058
4059 #if x86_64_TARGET_ARCH
4060
4061 condFltReg cond x y = do
4062   CondCode _ cond cond_code <- condFltCode cond x y
4063   tmp1 <- getNewRegNat wordRep
4064   tmp2 <- getNewRegNat wordRep
4065   let
4066         -- We have to worry about unordered operands (eg. comparisons
4067         -- against NaN).  If the operands are unordered, the comparison
4068         -- sets the parity flag, carry flag and zero flag.
4069         -- All comparisons are supposed to return false for unordered
4070         -- operands except for !=, which returns true.
4071         --
4072         -- Optimisation: we don't have to test the parity flag if we
4073         -- know the test has already excluded the unordered case: eg >
4074         -- and >= test for a zero carry flag, which can only occur for
4075         -- ordered operands.
4076         --
4077         -- ToDo: by reversing comparisons we could avoid testing the
4078         -- parity flag in more cases.
4079
4080         code dst =
4081            cond_code `appOL`
4082              (case cond of
4083                 NE  -> or_unordered dst
4084                 GU  -> plain_test   dst
4085                 GEU -> plain_test   dst
4086                 _   -> and_ordered  dst)
4087
4088         plain_test dst = toOL [
4089                     SETCC cond (OpReg tmp1),
4090                     MOVZxL I8 (OpReg tmp1) (OpReg dst)
4091                  ]
4092         or_unordered dst = toOL [
4093                     SETCC cond (OpReg tmp1),
4094                     SETCC PARITY (OpReg tmp2),
4095                     OR I8 (OpReg tmp1) (OpReg tmp2),
4096                     MOVZxL I8 (OpReg tmp2) (OpReg dst)
4097                   ]
4098         and_ordered dst = toOL [
4099                     SETCC cond (OpReg tmp1),
4100                     SETCC NOTPARITY (OpReg tmp2),
4101                     AND I8 (OpReg tmp1) (OpReg tmp2),
4102                     MOVZxL I8 (OpReg tmp2) (OpReg dst)
4103                   ]
4104   -- in
4105   return (Any I32 code)
4106
4107 #endif
4108
4109 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4110
4111 #if sparc_TARGET_ARCH
4112
4113 condIntReg EQQ x (CmmLit (CmmInt 0 d)) = do
4114     (src, code) <- getSomeReg x
4115     tmp <- getNewRegNat I32
4116     let
4117         code__2 dst = code `appOL` toOL [
4118             SUB False True g0 (RIReg src) g0,
4119             SUB True False g0 (RIImm (ImmInt (-1))) dst]
4120     return (Any I32 code__2)
4121
4122 condIntReg EQQ x y = do
4123     (src1, code1) <- getSomeReg x
4124     (src2, code2) <- getSomeReg y
4125     tmp1 <- getNewRegNat I32
4126     tmp2 <- getNewRegNat I32
4127     let
4128         code__2 dst = code1 `appOL` code2 `appOL` toOL [
4129             XOR False src1 (RIReg src2) dst,
4130             SUB False True g0 (RIReg dst) g0,
4131             SUB True False g0 (RIImm (ImmInt (-1))) dst]
4132     return (Any I32 code__2)
4133
4134 condIntReg NE x (CmmLit (CmmInt 0 d)) = do
4135     (src, code) <- getSomeReg x
4136     tmp <- getNewRegNat I32
4137     let
4138         code__2 dst = code `appOL` toOL [
4139             SUB False True g0 (RIReg src) g0,
4140             ADD True False g0 (RIImm (ImmInt 0)) dst]
4141     return (Any I32 code__2)
4142
4143 condIntReg NE x y = do
4144     (src1, code1) <- getSomeReg x
4145     (src2, code2) <- getSomeReg y
4146     tmp1 <- getNewRegNat I32
4147     tmp2 <- getNewRegNat I32
4148     let
4149         code__2 dst = code1 `appOL` code2 `appOL` toOL [
4150             XOR False src1 (RIReg src2) dst,
4151             SUB False True g0 (RIReg dst) g0,
4152             ADD True False g0 (RIImm (ImmInt 0)) dst]
4153     return (Any I32 code__2)
4154
4155 condIntReg cond x y = do
4156     BlockId lbl1 <- getBlockIdNat
4157     BlockId lbl2 <- getBlockIdNat
4158     CondCode _ cond cond_code <- condIntCode cond x y
4159     let
4160         code__2 dst = cond_code `appOL` toOL [
4161             BI cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
4162             OR False g0 (RIImm (ImmInt 0)) dst,
4163             BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
4164             NEWBLOCK (BlockId lbl1),
4165             OR False g0 (RIImm (ImmInt 1)) dst,
4166             NEWBLOCK (BlockId lbl2)]
4167     return (Any I32 code__2)
4168
4169 condFltReg cond x y = do
4170     BlockId lbl1 <- getBlockIdNat
4171     BlockId lbl2 <- getBlockIdNat
4172     CondCode _ cond cond_code <- condFltCode cond x y
4173     let
4174         code__2 dst = cond_code `appOL` toOL [
4175             NOP,
4176             BF cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
4177             OR False g0 (RIImm (ImmInt 0)) dst,
4178             BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
4179             NEWBLOCK (BlockId lbl1),
4180             OR False g0 (RIImm (ImmInt 1)) dst,
4181             NEWBLOCK (BlockId lbl2)]
4182     return (Any I32 code__2)
4183
4184 #endif /* sparc_TARGET_ARCH */
4185
4186 #if powerpc_TARGET_ARCH
4187 condReg getCond = do
4188     lbl1 <- getBlockIdNat
4189     lbl2 <- getBlockIdNat
4190     CondCode _ cond cond_code <- getCond
4191     let
4192 {-        code dst = cond_code `appOL` toOL [
4193                 BCC cond lbl1,
4194                 LI dst (ImmInt 0),
4195                 BCC ALWAYS lbl2,
4196                 NEWBLOCK lbl1,
4197                 LI dst (ImmInt 1),
4198                 BCC ALWAYS lbl2,
4199                 NEWBLOCK lbl2
4200             ]-}
4201         code dst = cond_code
4202             `appOL` negate_code
4203             `appOL` toOL [
4204                 MFCR dst,
4205                 RLWINM dst dst (bit + 1) 31 31
4206             ]
4207
4208         negate_code | do_negate = unitOL (CRNOR bit bit bit)
4209                     | otherwise = nilOL
4210
4211         (bit, do_negate) = case cond of
4212             LTT -> (0, False)
4213             LE  -> (1, True)
4214             EQQ -> (2, False)
4215             GE  -> (0, True)
4216             GTT -> (1, False)
4217
4218             NE  -> (2, True)
4219
4220             LU  -> (0, False)
4221             LEU -> (1, True)
4222             GEU -> (0, True)
4223             GU  -> (1, False)
4224
4225     return (Any I32 code)
4226
4227 condIntReg cond x y = condReg (condIntCode cond x y)
4228 condFltReg cond x y = condReg (condFltCode cond x y)
4229 #endif /* powerpc_TARGET_ARCH */
4230
4231
4232 -- -----------------------------------------------------------------------------
4233 -- 'trivial*Code': deal with trivial instructions
4234
4235 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
4236 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
4237 -- Only look for constants on the right hand side, because that's
4238 -- where the generic optimizer will have put them.
4239
4240 -- Similarly, for unary instructions, we don't have to worry about
4241 -- matching an StInt as the argument, because genericOpt will already
4242 -- have handled the constant-folding.
4243
4244 trivialCode
4245     :: MachRep
4246     -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
4247       ,IF_ARCH_i386 ((Operand -> Operand -> Instr)
4248                      -> Maybe (Operand -> Operand -> Instr)
4249       ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr)
4250                      -> Maybe (Operand -> Operand -> Instr)
4251       ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
4252       ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
4253       ,)))))
4254     -> CmmExpr -> CmmExpr -- the two arguments
4255     -> NatM Register
4256
4257 #ifndef powerpc_TARGET_ARCH
4258 trivialFCode
4259     :: MachRep
4260     -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
4261       ,IF_ARCH_sparc((MachRep -> Reg -> Reg -> Reg -> Instr)
4262       ,IF_ARCH_i386 ((MachRep -> Reg -> Reg -> Reg -> Instr)
4263       ,IF_ARCH_x86_64 ((MachRep -> Operand -> Operand -> Instr)
4264       ,))))
4265     -> CmmExpr -> CmmExpr -- the two arguments
4266     -> NatM Register
4267 #endif
4268
4269 trivialUCode
4270     :: MachRep
4271     -> IF_ARCH_alpha((RI -> Reg -> Instr)
4272       ,IF_ARCH_i386 ((Operand -> Instr)
4273       ,IF_ARCH_x86_64 ((Operand -> Instr)
4274       ,IF_ARCH_sparc((RI -> Reg -> Instr)
4275       ,IF_ARCH_powerpc((Reg -> Reg -> Instr)
4276       ,)))))
4277     -> CmmExpr  -- the one argument
4278     -> NatM Register
4279
4280 #ifndef powerpc_TARGET_ARCH
4281 trivialUFCode
4282     :: MachRep
4283     -> IF_ARCH_alpha((Reg -> Reg -> Instr)
4284       ,IF_ARCH_i386 ((Reg -> Reg -> Instr)
4285       ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr)
4286       ,IF_ARCH_sparc((Reg -> Reg -> Instr)
4287       ,))))
4288     -> CmmExpr -- the one argument
4289     -> NatM Register
4290 #endif
4291
4292 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4293
4294 #if alpha_TARGET_ARCH
4295
4296 trivialCode instr x (StInt y)
4297   | fits8Bits y
4298   = getRegister x               `thenNat` \ register ->
4299     getNewRegNat IntRep         `thenNat` \ tmp ->
4300     let
4301         code = registerCode register tmp
4302         src1 = registerName register tmp
4303         src2 = ImmInt (fromInteger y)
4304         code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst)
4305     in
4306     return (Any IntRep code__2)
4307
4308 trivialCode instr x y
4309   = getRegister x               `thenNat` \ register1 ->
4310     getRegister y               `thenNat` \ register2 ->
4311     getNewRegNat IntRep         `thenNat` \ tmp1 ->
4312     getNewRegNat IntRep         `thenNat` \ tmp2 ->
4313     let
4314         code1 = registerCode register1 tmp1 []
4315         src1  = registerName register1 tmp1
4316         code2 = registerCode register2 tmp2 []
4317         src2  = registerName register2 tmp2
4318         code__2 dst = asmSeqThen [code1, code2] .
4319                      mkSeqInstr (instr src1 (RIReg src2) dst)
4320     in
4321     return (Any IntRep code__2)
4322
4323 ------------
4324 trivialUCode instr x
4325   = getRegister x               `thenNat` \ register ->
4326     getNewRegNat IntRep         `thenNat` \ tmp ->
4327     let
4328         code = registerCode register tmp
4329         src  = registerName register tmp
4330         code__2 dst = code . mkSeqInstr (instr (RIReg src) dst)
4331     in
4332     return (Any IntRep code__2)
4333
4334 ------------
4335 trivialFCode _ instr x y
4336   = getRegister x               `thenNat` \ register1 ->
4337     getRegister y               `thenNat` \ register2 ->
4338     getNewRegNat F64    `thenNat` \ tmp1 ->
4339     getNewRegNat F64    `thenNat` \ tmp2 ->
4340     let
4341         code1 = registerCode register1 tmp1
4342         src1  = registerName register1 tmp1
4343
4344         code2 = registerCode register2 tmp2
4345         src2  = registerName register2 tmp2
4346
4347         code__2 dst = asmSeqThen [code1 [], code2 []] .
4348                       mkSeqInstr (instr src1 src2 dst)
4349     in
4350     return (Any F64 code__2)
4351
4352 trivialUFCode _ instr x
4353   = getRegister x               `thenNat` \ register ->
4354     getNewRegNat F64    `thenNat` \ tmp ->
4355     let
4356         code = registerCode register tmp
4357         src  = registerName register tmp
4358         code__2 dst = code . mkSeqInstr (instr src dst)
4359     in
4360     return (Any F64 code__2)
4361
4362 #endif /* alpha_TARGET_ARCH */
4363
4364 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4365
4366 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4367
4368 {-
4369 The Rules of the Game are:
4370
4371 * You cannot assume anything about the destination register dst;
4372   it may be anything, including a fixed reg.
4373
4374 * You may compute an operand into a fixed reg, but you may not
4375   subsequently change the contents of that fixed reg.  If you
4376   want to do so, first copy the value either to a temporary
4377   or into dst.  You are free to modify dst even if it happens
4378   to be a fixed reg -- that's not your problem.
4379
4380 * You cannot assume that a fixed reg will stay live over an
4381   arbitrary computation.  The same applies to the dst reg.
4382
4383 * Temporary regs obtained from getNewRegNat are distinct from
4384   each other and from all other regs, and stay live over
4385   arbitrary computations.
4386
4387 --------------------
4388
4389 SDM's version of The Rules:
4390
4391 * If getRegister returns Any, that means it can generate correct
4392   code which places the result in any register, period.  Even if that
4393   register happens to be read during the computation.
4394
4395   Corollary #1: this means that if you are generating code for an
4396   operation with two arbitrary operands, you cannot assign the result
4397   of the first operand into the destination register before computing
4398   the second operand.  The second operand might require the old value
4399   of the destination register.
4400
4401   Corollary #2: A function might be able to generate more efficient
4402   code if it knows the destination register is a new temporary (and
4403   therefore not read by any of the sub-computations).
4404
4405 * If getRegister returns Any, then the code it generates may modify only:
4406         (a) fresh temporaries
4407         (b) the destination register
4408         (c) known registers (eg. %ecx is used by shifts)
4409   In particular, it may *not* modify global registers, unless the global
4410   register happens to be the destination register.
4411 -}
4412
4413 trivialCode rep instr (Just revinstr) (CmmLit lit_a) b
4414   | not (is64BitLit lit_a) = do
4415   b_code <- getAnyReg b
4416   let
4417        code dst
4418          = b_code dst `snocOL`
4419            revinstr (OpImm (litToImm lit_a)) (OpReg dst)
4420   -- in
4421   return (Any rep code)
4422
4423 trivialCode rep instr maybe_revinstr a b = genTrivialCode rep instr a b
4424
4425 -- This is re-used for floating pt instructions too.
4426 genTrivialCode rep instr a b = do
4427   (b_op, b_code) <- getNonClobberedOperand b
4428   a_code <- getAnyReg a
4429   tmp <- getNewRegNat rep
4430   let
4431      -- We want the value of b to stay alive across the computation of a.
4432      -- But, we want to calculate a straight into the destination register,
4433      -- because the instruction only has two operands (dst := dst `op` src).
4434      -- The troublesome case is when the result of b is in the same register
4435      -- as the destination reg.  In this case, we have to save b in a
4436      -- new temporary across the computation of a.
4437      code dst
4438         | dst `regClashesWithOp` b_op =
4439                 b_code `appOL`
4440                 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
4441                 a_code dst `snocOL`
4442                 instr (OpReg tmp) (OpReg dst)
4443         | otherwise =
4444                 b_code `appOL`
4445                 a_code dst `snocOL`
4446                 instr b_op (OpReg dst)
4447   -- in
4448   return (Any rep code)
4449
4450 reg `regClashesWithOp` OpReg reg2   = reg == reg2
4451 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
4452 reg `regClashesWithOp` _            = False
4453
4454 -----------
4455
4456 trivialUCode rep instr x = do
4457   x_code <- getAnyReg x
4458   let
4459      code dst =
4460         x_code dst `snocOL`
4461         instr (OpReg dst)
4462   -- in
4463   return (Any rep code)
4464
4465 -----------
4466
4467 #if i386_TARGET_ARCH
4468
4469 trivialFCode pk instr x y = do
4470   (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
4471   (y_reg, y_code) <- getSomeReg y
4472   let
4473      code dst =
4474         x_code `appOL`
4475         y_code `snocOL`
4476         instr pk x_reg y_reg dst
4477   -- in
4478   return (Any pk code)
4479
4480 #endif
4481
4482 #if x86_64_TARGET_ARCH
4483
4484 trivialFCode pk instr x y = genTrivialCode  pk (instr pk) x y
4485
4486 #endif
4487
4488 -------------
4489
4490 trivialUFCode rep instr x = do
4491   (x_reg, x_code) <- getSomeReg x
4492   let
4493      code dst =
4494         x_code `snocOL`
4495         instr x_reg dst
4496   -- in
4497   return (Any rep code)
4498
4499 #endif /* i386_TARGET_ARCH */
4500
4501 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4502
4503 #if sparc_TARGET_ARCH
4504
4505 trivialCode pk instr x (CmmLit (CmmInt y d))
4506   | fits13Bits y
4507   = do
4508       (src1, code) <- getSomeReg x
4509       tmp <- getNewRegNat I32
4510       let
4511         src2 = ImmInt (fromInteger y)
4512         code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
4513       return (Any I32 code__2)
4514
4515 trivialCode pk instr x y = do
4516     (src1, code1) <- getSomeReg x
4517     (src2, code2) <- getSomeReg y
4518     tmp1 <- getNewRegNat I32
4519     tmp2 <- getNewRegNat I32
4520     let
4521         code__2 dst = code1 `appOL` code2 `snocOL`
4522                       instr src1 (RIReg src2) dst
4523     return (Any I32 code__2)
4524
4525 ------------
4526 trivialFCode pk instr x y = do
4527     (src1, code1) <- getSomeReg x
4528     (src2, code2) <- getSomeReg y
4529     tmp1 <- getNewRegNat (cmmExprRep x)
4530     tmp2 <- getNewRegNat (cmmExprRep y)
4531     tmp <- getNewRegNat F64
4532     let
4533         promote x = FxTOy F32 F64 x tmp
4534
4535         pk1   = cmmExprRep x
4536         pk2   = cmmExprRep y
4537
4538         code__2 dst =
4539                 if pk1 == pk2 then
4540                     code1 `appOL` code2 `snocOL`
4541                     instr pk src1 src2 dst
4542                 else if pk1 == F32 then
4543                     code1 `snocOL` promote src1 `appOL` code2 `snocOL`
4544                     instr F64 tmp src2 dst
4545                 else
4546                     code1 `appOL` code2 `snocOL` promote src2 `snocOL`
4547                     instr F64 src1 tmp dst
4548     return (Any (if pk1 == pk2 then pk1 else F64) code__2)
4549
4550 ------------
4551 trivialUCode pk instr x = do
4552     (src, code) <- getSomeReg x
4553     tmp <- getNewRegNat pk
4554     let
4555         code__2 dst = code `snocOL` instr (RIReg src) dst
4556     return (Any pk code__2)
4557
4558 -------------
4559 trivialUFCode pk instr x = do
4560     (src, code) <- getSomeReg x
4561     tmp <- getNewRegNat pk
4562     let
4563         code__2 dst = code `snocOL` instr src dst
4564     return (Any pk code__2)
4565
4566 #endif /* sparc_TARGET_ARCH */
4567
4568 #if powerpc_TARGET_ARCH
4569
4570 {-
4571 Wolfgang's PowerPC version of The Rules:
4572
4573 A slightly modified version of The Rules to take advantage of the fact
4574 that PowerPC instructions work on all registers and don't implicitly
4575 clobber any fixed registers.
4576
4577 * The only expression for which getRegister returns Fixed is (CmmReg reg).
4578
4579 * If getRegister returns Any, then the code it generates may modify only:
4580         (a) fresh temporaries
4581         (b) the destination register
4582   It may *not* modify global registers, unless the global
4583   register happens to be the destination register.
4584   It may not clobber any other registers. In fact, only ccalls clobber any
4585   fixed registers.
4586   Also, it may not modify the counter register (used by genCCall).
4587
4588   Corollary: If a getRegister for a subexpression returns Fixed, you need
4589   not move it to a fresh temporary before evaluating the next subexpression.
4590   The Fixed register won't be modified.
4591   Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
4592
4593 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
4594   the value of the destination register.
4595 -}
4596
4597 trivialCode rep signed instr x (CmmLit (CmmInt y _))
4598     | Just imm <- makeImmediate rep signed y
4599     = do
4600         (src1, code1) <- getSomeReg x
4601         let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
4602         return (Any rep code)
4603
4604 trivialCode rep signed instr x y = do
4605     (src1, code1) <- getSomeReg x
4606     (src2, code2) <- getSomeReg y
4607     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
4608     return (Any rep code)
4609
4610 trivialCodeNoImm :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4611     -> CmmExpr -> CmmExpr -> NatM Register
4612 trivialCodeNoImm rep instr x y = do
4613     (src1, code1) <- getSomeReg x
4614     (src2, code2) <- getSomeReg y
4615     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
4616     return (Any rep code)
4617
4618 trivialUCode rep instr x = do
4619     (src, code) <- getSomeReg x
4620     let code' dst = code `snocOL` instr dst src
4621     return (Any rep code')
4622
4623 -- There is no "remainder" instruction on the PPC, so we have to do
4624 -- it the hard way.
4625 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
4626
4627 remainderCode :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4628     -> CmmExpr -> CmmExpr -> NatM Register
4629 remainderCode rep div x y = do
4630     (src1, code1) <- getSomeReg x
4631     (src2, code2) <- getSomeReg y
4632     let code dst = code1 `appOL` code2 `appOL` toOL [
4633                 div dst src1 src2,
4634                 MULLW dst dst (RIReg src2),
4635                 SUBF dst dst src1
4636             ]
4637     return (Any rep code)
4638
4639 #endif /* powerpc_TARGET_ARCH */
4640
4641
4642 -- -----------------------------------------------------------------------------
4643 --  Coercing to/from integer/floating-point...
4644
4645 -- When going to integer, we truncate (round towards 0).
4646
4647 -- @coerce(Int2FP|FP2Int)@ are more complicated integer/float
4648 -- conversions.  We have to store temporaries in memory to move
4649 -- between the integer and the floating point register sets.
4650
4651 -- @coerceDbl2Flt@ and @coerceFlt2Dbl@ are done this way because we
4652 -- pretend, on sparc at least, that double and float regs are seperate
4653 -- kinds, so the value has to be computed into one kind before being
4654 -- explicitly "converted" to live in the other kind.
4655
4656 coerceInt2FP :: MachRep -> MachRep -> CmmExpr -> NatM Register
4657 coerceFP2Int :: MachRep -> MachRep -> CmmExpr -> NatM Register
4658
4659 #if sparc_TARGET_ARCH
4660 coerceDbl2Flt :: CmmExpr -> NatM Register
4661 coerceFlt2Dbl :: CmmExpr -> NatM Register
4662 #endif
4663
4664 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4665
4666 #if alpha_TARGET_ARCH
4667
4668 coerceInt2FP _ x
4669   = getRegister x               `thenNat` \ register ->
4670     getNewRegNat IntRep         `thenNat` \ reg ->
4671     let
4672         code = registerCode register reg
4673         src  = registerName register reg
4674
4675         code__2 dst = code . mkSeqInstrs [
4676             ST Q src (spRel 0),
4677             LD TF dst (spRel 0),
4678             CVTxy Q TF dst dst]
4679     in
4680     return (Any F64 code__2)
4681
4682 -------------
4683 coerceFP2Int x
4684   = getRegister x               `thenNat` \ register ->
4685     getNewRegNat F64    `thenNat` \ tmp ->
4686     let
4687         code = registerCode register tmp
4688         src  = registerName register tmp
4689
4690         code__2 dst = code . mkSeqInstrs [
4691             CVTxy TF Q src tmp,
4692             ST TF tmp (spRel 0),
4693             LD Q dst (spRel 0)]
4694     in
4695     return (Any IntRep code__2)
4696
4697 #endif /* alpha_TARGET_ARCH */
4698
4699 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4700
4701 #if i386_TARGET_ARCH
4702
4703 coerceInt2FP from to x = do
4704   (x_reg, x_code) <- getSomeReg x
4705   let
4706         opc  = case to of F32 -> GITOF; F64 -> GITOD
4707         code dst = x_code `snocOL` opc x_reg dst
4708         -- ToDo: works for non-I32 reps?
4709   -- in
4710   return (Any to code)
4711
4712 ------------
4713
4714 coerceFP2Int from to x = do
4715   (x_reg, x_code) <- getSomeReg x
4716   let
4717         opc  = case from of F32 -> GFTOI; F64 -> GDTOI
4718         code dst = x_code `snocOL` opc x_reg dst
4719         -- ToDo: works for non-I32 reps?
4720   -- in
4721   return (Any to code)
4722
4723 #endif /* i386_TARGET_ARCH */
4724
4725 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4726
4727 #if x86_64_TARGET_ARCH
4728
4729 coerceFP2Int from to x = do
4730   (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
4731   let
4732         opc  = case from of F32 -> CVTTSS2SIQ; F64 -> CVTTSD2SIQ
4733         code dst = x_code `snocOL` opc x_op dst
4734   -- in
4735   return (Any to code) -- works even if the destination rep is <I32
4736
4737 coerceInt2FP from to x = do
4738   (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
4739   let
4740         opc  = case to of F32 -> CVTSI2SS; F64 -> CVTSI2SD
4741         code dst = x_code `snocOL` opc x_op dst
4742   -- in
4743   return (Any to code) -- works even if the destination rep is <I32
4744
4745 coerceFP2FP :: MachRep -> CmmExpr -> NatM Register
4746 coerceFP2FP to x = do
4747   (x_reg, x_code) <- getSomeReg x
4748   let
4749         opc  = case to of F32 -> CVTSD2SS; F64 -> CVTSS2SD
4750         code dst = x_code `snocOL` opc x_reg dst
4751   -- in
4752   return (Any to code)
4753
4754 #endif
4755
4756 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4757
4758 #if sparc_TARGET_ARCH
4759
4760 coerceInt2FP pk1 pk2 x = do
4761     (src, code) <- getSomeReg x
4762     let
4763         code__2 dst = code `appOL` toOL [
4764             ST pk1 src (spRel (-2)),
4765             LD pk1 (spRel (-2)) dst,
4766             FxTOy pk1 pk2 dst dst]
4767     return (Any pk2 code__2)
4768
4769 ------------
4770 coerceFP2Int pk fprep x = do
4771     (src, code) <- getSomeReg x
4772     reg <- getNewRegNat fprep
4773     tmp <- getNewRegNat pk
4774     let
4775         code__2 dst = ASSERT(fprep == F64 || fprep == F32)
4776             code `appOL` toOL [
4777             FxTOy fprep pk src tmp,
4778             ST pk tmp (spRel (-2)),
4779             LD pk (spRel (-2)) dst]
4780     return (Any pk code__2)
4781
4782 ------------
4783 coerceDbl2Flt x = do
4784     (src, code) <- getSomeReg x
4785     return (Any F32 (\dst -> code `snocOL` FxTOy F64 F32 src dst))
4786
4787 ------------
4788 coerceFlt2Dbl x = do
4789     (src, code) <- getSomeReg x
4790     return (Any F64 (\dst -> code `snocOL` FxTOy F32 F64 src dst))
4791
4792 #endif /* sparc_TARGET_ARCH */
4793
4794 #if powerpc_TARGET_ARCH
4795 coerceInt2FP fromRep toRep x = do
4796     (src, code) <- getSomeReg x
4797     lbl <- getNewLabelNat
4798     itmp <- getNewRegNat I32
4799     ftmp <- getNewRegNat F64
4800     dflags <- getDynFlagsNat
4801     dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
4802     Amode addr addr_code <- getAmode dynRef
4803     let
4804         code' dst = code `appOL` maybe_exts `appOL` toOL [
4805                 LDATA ReadOnlyData
4806                                 [CmmDataLabel lbl,
4807                                  CmmStaticLit (CmmInt 0x43300000 I32),
4808                                  CmmStaticLit (CmmInt 0x80000000 I32)],
4809                 XORIS itmp src (ImmInt 0x8000),
4810                 ST I32 itmp (spRel 3),
4811                 LIS itmp (ImmInt 0x4330),
4812                 ST I32 itmp (spRel 2),
4813                 LD F64 ftmp (spRel 2)
4814             ] `appOL` addr_code `appOL` toOL [
4815                 LD F64 dst addr,
4816                 FSUB F64 dst ftmp dst
4817             ] `appOL` maybe_frsp dst
4818
4819         maybe_exts = case fromRep of
4820                         I8 ->  unitOL $ EXTS I8 src src
4821                         I16 -> unitOL $ EXTS I16 src src
4822                         I32 -> nilOL
4823         maybe_frsp dst = case toRep of
4824                         F32 -> unitOL $ FRSP dst dst
4825                         F64 -> nilOL
4826     return (Any toRep code')
4827
4828 coerceFP2Int fromRep toRep x = do
4829     -- the reps don't really matter: F*->F64 and I32->I* are no-ops
4830     (src, code) <- getSomeReg x
4831     tmp <- getNewRegNat F64
4832     let
4833         code' dst = code `appOL` toOL [
4834                 -- convert to int in FP reg
4835             FCTIWZ tmp src,
4836                 -- store value (64bit) from FP to stack
4837             ST F64 tmp (spRel 2),
4838                 -- read low word of value (high word is undefined)
4839             LD I32 dst (spRel 3)]
4840     return (Any toRep code')
4841 #endif /* powerpc_TARGET_ARCH */
4842
4843
4844 -- -----------------------------------------------------------------------------
4845 -- eXTRA_STK_ARGS_HERE
4846
4847 -- We (allegedly) put the first six C-call arguments in registers;
4848 -- where do we start putting the rest of them?
4849
4850 -- Moved from MachInstrs (SDM):
4851
4852 #if alpha_TARGET_ARCH || sparc_TARGET_ARCH
4853 eXTRA_STK_ARGS_HERE :: Int
4854 eXTRA_STK_ARGS_HERE
4855   = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???))
4856 #endif
4857