compiler/nativeGen/MachCodeGen.hs

   1 {-# OPTIONS -w #-}
   2 -- The above warning supression flag is a temporary kludge.
   3 -- While working on this module you are encouraged to remove it and fix
   4 -- any warnings in the module. See
   5 --     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
   6 -- for details
   7
   8 -----------------------------------------------------------------------------
   9 --
  10 -- Generating machine code (instruction selection)
  11 --
  12 -- (c) The University of Glasgow 1996-2004
  13 --
  14 -----------------------------------------------------------------------------
  15
  16 -- This is a big module, but, if you pay attention to
  17 -- (a) the sectioning, (b) the type signatures, and
  18 -- (c) the #if blah_TARGET_ARCH} things, the
  19 -- structure should not be too overwhelming.
  20
  21 module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where
  22
  23 #include "HsVersions.h"
  24 #include "nativeGen/NCG.h"
  25 #include "MachDeps.h"
  26
  27 -- NCG stuff:
  28 import MachInstrs
  29 import MachRegs
  30 import NCGMonad
  31 import PositionIndependentCode
  32 import RegAllocInfo     ( mkBranchInstr, mkRegRegMoveInstr )
  33 import MachRegs
  34 import PprMach
  35
  36 -- Our intermediate code:
  37 import BlockId
  38 import PprCmm           ( pprExpr )
  39 import Cmm
  40 import CLabel
  41 import ClosureInfo      ( C_SRT(..) )
  42
  43 -- The rest:
  44 import StaticFlags      ( opt_PIC )
  45 import ForeignCall      ( CCallConv(..) )
  46 import OrdList
  47 import Pretty
  48 import qualified Outputable as O
  49 import Outputable
  50 import FastString
  51 import FastBool         ( isFastTrue )
  52 import Constants        ( wORD_SIZE )
  53
  54 import Debug.Trace      ( trace )
  55
  56 import Control.Monad    ( mapAndUnzipM )
  57 import Data.Maybe       ( fromJust )
  58 import Data.Bits
  59 import Data.Word
  60 import Data.Int
  61
  62
  63 -- -----------------------------------------------------------------------------
  64 -- Top-level of the instruction selector
  65
  66 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
  67 -- They are really trees of insns to facilitate fast appending, where a
  68 -- left-to-right traversal (pre-order?) yields the insns in the correct
  69 -- order.
  70
  71 type InstrBlock = OrdList Instr
  72
  73 cmmTopCodeGen :: RawCmmTop -> NatM [NatCmmTop]
  74 cmmTopCodeGen (CmmProc info lab params (ListGraph blocks)) = do
  75   (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
  76   picBaseMb <- getPicBaseMaybeNat
  77   let proc = CmmProc info lab params (ListGraph $ concat nat_blocks)
  78       tops = proc : concat statics
  79   case picBaseMb of
  80       Just picBase -> initializePicBase picBase tops
  81       Nothing -> return tops
  82
  83 cmmTopCodeGen (CmmData sec dat) = do
  84   return [CmmData sec dat]  -- no translation, we just use CmmStatic
  85
  86 basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop])
  87 basicBlockCodeGen (BasicBlock id stmts) = do
  88   instrs <- stmtsToInstrs stmts
  89   -- code generation may introduce new basic block boundaries, which
  90   -- are indicated by the NEWBLOCK instruction.  We must split up the
  91   -- instruction stream into basic blocks again.  Also, we extract
  92   -- LDATAs here too.
  93   let
  94         (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
  95
  96         mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
  97           = ([], BasicBlock id instrs : blocks, statics)
  98         mkBlocks (LDATA sec dat) (instrs,blocks,statics)
  99           = (instrs, blocks, CmmData sec dat:statics)
 100         mkBlocks instr (instrs,blocks,statics)
 101           = (instr:instrs, blocks, statics)
 102   -- in
 103   return (BasicBlock id top : other_blocks, statics)
 104
 105 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
 106 stmtsToInstrs stmts
 107    = do instrss <- mapM stmtToInstrs stmts
 108         return (concatOL instrss)
 109
 110 stmtToInstrs :: CmmStmt -> NatM InstrBlock
 111 stmtToInstrs stmt = case stmt of
 112     CmmNop         -> return nilOL
 113     CmmComment s   -> return (unitOL (COMMENT s))
 114
 115     CmmAssign reg src
 116       | isFloatType ty -> assignReg_FltCode size reg src
 117 #if WORD_SIZE_IN_BITS==32
 118       | isWord64 ty    -> assignReg_I64Code      reg src
 119 #endif
 120       | otherwise        -> assignReg_IntCode size reg src
 121         where ty = cmmRegType reg
 122               size = cmmTypeSize ty
 123
 124     CmmStore addr src
 125       | isFloatType ty -> assignMem_FltCode size addr src
 126 #if WORD_SIZE_IN_BITS==32
 127       | isWord64 ty      -> assignMem_I64Code      addr src
 128 #endif
 129       | otherwise        -> assignMem_IntCode size addr src
 130         where ty = cmmExprType src
 131               size = cmmTypeSize ty
 132
 133     CmmCall target result_regs args _ _
 134        -> genCCall target result_regs args
 135
 136     CmmBranch id          -> genBranch id
 137     CmmCondBranch arg id  -> genCondJump id arg
 138     CmmSwitch arg ids     -> genSwitch arg ids
 139     CmmJump arg params    -> genJump arg
 140     CmmReturn params      ->
 141       panic "stmtToInstrs: return statement should have been cps'd away"
 142
 143 -- -----------------------------------------------------------------------------
 144 -- General things for putting together code sequences
 145
 146 -- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
 147 -- CmmExprs into CmmRegOff?
 148 mangleIndexTree :: CmmExpr -> CmmExpr
 149 mangleIndexTree (CmmRegOff reg off)
 150   = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
 151   where width = typeWidth (cmmRegType reg)
 152
 153 -- -----------------------------------------------------------------------------
 154 --  Code gen for 64-bit arithmetic on 32-bit platforms
 155
 156 {-
 157 Simple support for generating 64-bit code (ie, 64 bit values and 64
 158 bit assignments) on 32-bit platforms.  Unlike the main code generator
 159 we merely shoot for generating working code as simply as possible, and
 160 pay little attention to code quality.  Specifically, there is no
 161 attempt to deal cleverly with the fixed-vs-floating register
 162 distinction; all values are generated into (pairs of) floating
 163 registers, even if this would mean some redundant reg-reg moves as a
 164 result.  Only one of the VRegUniques is returned, since it will be
 165 of the VRegUniqueLo form, and the upper-half VReg can be determined
 166 by applying getHiVRegFromLo to it.
 167 -}
 168
 169 data ChildCode64        -- a.k.a "Register64"
 170    = ChildCode64
 171         InstrBlock      -- code
 172         Reg             -- the lower 32-bit temporary which contains the
 173                         -- result; use getHiVRegFromLo to find the other
 174                         -- VRegUnique.  Rules of this simplified insn
 175                         -- selection game are therefore that the returned
 176                         -- Reg may be modified
 177
 178 #if WORD_SIZE_IN_BITS==32
 179 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
 180 assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
 181 #endif
 182
 183 #ifndef x86_64_TARGET_ARCH
 184 iselExpr64        :: CmmExpr -> NatM ChildCode64
 185 #endif
 186
 187 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 188
 189 #if i386_TARGET_ARCH
 190
 191 assignMem_I64Code addrTree valueTree = do
 192   Amode addr addr_code <- getAmode addrTree
 193   ChildCode64 vcode rlo <- iselExpr64 valueTree
 194   let
 195         rhi = getHiVRegFromLo rlo
 196
 197         -- Little-endian store
 198         mov_lo = MOV II32 (OpReg rlo) (OpAddr addr)
 199         mov_hi = MOV II32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
 200   -- in
 201   return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 202
 203
 204 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
 205    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 206    let
 207          r_dst_lo = mkVReg u_dst II32
 208          r_dst_hi = getHiVRegFromLo r_dst_lo
 209          r_src_hi = getHiVRegFromLo r_src_lo
 210          mov_lo = MOV II32 (OpReg r_src_lo) (OpReg r_dst_lo)
 211          mov_hi = MOV II32 (OpReg r_src_hi) (OpReg r_dst_hi)
 212    -- in
 213    return (
 214         vcode `snocOL` mov_lo `snocOL` mov_hi
 215      )
 216
 217 assignReg_I64Code lvalue valueTree
 218    = panic "assignReg_I64Code(i386): invalid lvalue"
 219
 220 ------------
 221
 222 iselExpr64 (CmmLit (CmmInt i _)) = do
 223   (rlo,rhi) <- getNewRegPairNat II32
 224   let
 225         r = fromIntegral (fromIntegral i :: Word32)
 226         q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
 227         code = toOL [
 228                 MOV II32 (OpImm (ImmInteger r)) (OpReg rlo),
 229                 MOV II32 (OpImm (ImmInteger q)) (OpReg rhi)
 230                 ]
 231   -- in
 232   return (ChildCode64 code rlo)
 233
 234 iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
 235    Amode addr addr_code <- getAmode addrTree
 236    (rlo,rhi) <- getNewRegPairNat II32
 237    let
 238         mov_lo = MOV II32 (OpAddr addr) (OpReg rlo)
 239         mov_hi = MOV II32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
 240    -- in
 241    return (
 242             ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
 243                         rlo
 244      )
 245
 246 iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
 247    = return (ChildCode64 nilOL (mkVReg vu II32))
 248
 249 -- we handle addition, but rather badly
 250 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
 251    ChildCode64 code1 r1lo <- iselExpr64 e1
 252    (rlo,rhi) <- getNewRegPairNat II32
 253    let
 254         r = fromIntegral (fromIntegral i :: Word32)
 255         q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
 256         r1hi = getHiVRegFromLo r1lo
 257         code =  code1 `appOL`
 258                 toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
 259                        ADD II32 (OpImm (ImmInteger r)) (OpReg rlo),
 260                        MOV II32 (OpReg r1hi) (OpReg rhi),
 261                        ADC II32 (OpImm (ImmInteger q)) (OpReg rhi) ]
 262    -- in
 263    return (ChildCode64 code rlo)
 264
 265 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
 266    ChildCode64 code1 r1lo <- iselExpr64 e1
 267    ChildCode64 code2 r2lo <- iselExpr64 e2
 268    (rlo,rhi) <- getNewRegPairNat II32
 269    let
 270         r1hi = getHiVRegFromLo r1lo
 271         r2hi = getHiVRegFromLo r2lo
 272         code =  code1 `appOL`
 273                 code2 `appOL`
 274                 toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
 275                        ADD II32 (OpReg r2lo) (OpReg rlo),
 276                        MOV II32 (OpReg r1hi) (OpReg rhi),
 277                        ADC II32 (OpReg r2hi) (OpReg rhi) ]
 278    -- in
 279    return (ChildCode64 code rlo)
 280
 281 iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do
 282      fn <- getAnyReg expr
 283      r_dst_lo <-  getNewRegNat II32
 284      let r_dst_hi = getHiVRegFromLo r_dst_lo
 285          code = fn r_dst_lo
 286      return (
 287              ChildCode64 (code `snocOL`
 288                           MOV II32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
 289                           r_dst_lo
 290             )
 291
 292 iselExpr64 expr
 293    = pprPanic "iselExpr64(i386)" (ppr expr)
 294
 295 #endif /* i386_TARGET_ARCH */
 296
 297 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 298
 299 #if sparc_TARGET_ARCH
 300
 301 assignMem_I64Code addrTree valueTree = do
 302      Amode addr addr_code <- getAmode addrTree
 303      ChildCode64 vcode rlo <- iselExpr64 valueTree
 304      (src, code) <- getSomeReg addrTree
 305      let
 306          rhi = getHiVRegFromLo rlo
 307          -- Big-endian store
 308          mov_hi = ST II32 rhi (AddrRegImm src (ImmInt 0))
 309          mov_lo = ST II32 rlo (AddrRegImm src (ImmInt 4))
 310      return (vcode `appOL` code `snocOL` mov_hi `snocOL` mov_lo)
 311
 312 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
 313      ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 314      let
 315          r_dst_lo = mkVReg u_dst (cmmTypeSize pk)
 316          r_dst_hi = getHiVRegFromLo r_dst_lo
 317          r_src_hi = getHiVRegFromLo r_src_lo
 318          mov_lo = mkMOV r_src_lo r_dst_lo
 319          mov_hi = mkMOV r_src_hi r_dst_hi
 320          mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
 321      return (vcode `snocOL` mov_hi `snocOL` mov_lo)
 322 assignReg_I64Code lvalue valueTree
 323    = panic "assignReg_I64Code(sparc): invalid lvalue"
 324
 325
 326 -- Load a 64 bit word
 327 iselExpr64 (CmmLoad addrTree ty)
 328  | isWord64 ty
 329  = do   Amode amode addr_code   <- getAmode addrTree
 330         let result
 331
 332                 | AddrRegReg r1 r2      <- amode
 333                 = do    rlo     <- getNewRegNat II32
 334                         tmp     <- getNewRegNat II32
 335                         let rhi = getHiVRegFromLo rlo
 336
 337                         return  $ ChildCode64
 338                                 (        addr_code
 339                                 `appOL`  toOL
 340                                          [ ADD False False r1 (RIReg r2) tmp
 341                                          , LD II32 (AddrRegImm tmp (ImmInt 0)) rhi
 342                                          , LD II32 (AddrRegImm tmp (ImmInt 4)) rlo ])
 343                                 rlo
 344
 345                 | AddrRegImm r1 (ImmInt i) <- amode
 346                 = do    rlo     <- getNewRegNat II32
 347                         let rhi = getHiVRegFromLo rlo
 348
 349                         return  $ ChildCode64
 350                                 (        addr_code
 351                                 `appOL`  toOL
 352                                          [ LD II32 (AddrRegImm r1 (ImmInt $ 0 + i)) rhi
 353                                          , LD II32 (AddrRegImm r1 (ImmInt $ 4 + i)) rlo ])
 354                                 rlo
 355
 356         result
 357
 358
 359 -- Add a literal to a 64 bit integer
 360 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)])
 361  = do   ChildCode64 code1 r1_lo <- iselExpr64 e1
 362         let r1_hi       = getHiVRegFromLo r1_lo
 363
 364         r_dst_lo        <- getNewRegNat II32
 365         let r_dst_hi    =  getHiVRegFromLo r_dst_lo
 366
 367         return  $ ChildCode64
 368                         ( toOL
 369                         [ ADD False False r1_lo (RIImm (ImmInteger i)) r_dst_lo
 370                         , ADD True  False r1_hi (RIReg g0)         r_dst_hi ])
 371                         r_dst_lo
 372
 373
 374 -- Addition of II64
 375 iselExpr64 (CmmMachOp (MO_Add width) [e1, e2])
 376  = do   ChildCode64 code1 r1_lo <- iselExpr64 e1
 377         let r1_hi       = getHiVRegFromLo r1_lo
 378
 379         ChildCode64 code2 r2_lo <- iselExpr64 e2
 380         let r2_hi       = getHiVRegFromLo r2_lo
 381
 382         r_dst_lo        <- getNewRegNat II32
 383         let r_dst_hi    = getHiVRegFromLo r_dst_lo
 384
 385         let code =      code1
 386                 `appOL` code2
 387                 `appOL` toOL
 388                         [ ADD False False r1_lo (RIReg r2_lo) r_dst_lo
 389                         , ADD True  False r1_hi (RIReg r2_hi) r_dst_hi ]
 390
 391         return  $ ChildCode64 code r_dst_lo
 392
 393
 394 iselExpr64 (CmmReg (CmmLocal (LocalReg uq ty))) | isWord64 ty = do
 395      r_dst_lo <-  getNewRegNat II32
 396      let r_dst_hi = getHiVRegFromLo r_dst_lo
 397          r_src_lo = mkVReg uq II32
 398          r_src_hi = getHiVRegFromLo r_src_lo
 399          mov_lo = mkMOV r_src_lo r_dst_lo
 400          mov_hi = mkMOV r_src_hi r_dst_hi
 401          mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
 402      return (
 403             ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo
 404          )
 405
 406 -- Convert something into II64
 407 iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr])
 408  = do
 409         r_dst_lo        <- getNewRegNat II32
 410         let r_dst_hi    = getHiVRegFromLo r_dst_lo
 411
 412         -- compute expr and load it into r_dst_lo
 413         (a_reg, a_code) <- getSomeReg expr
 414
 415         let code        = a_code
 416                 `appOL` toOL
 417                         [ mkRegRegMoveInstr g0    r_dst_hi      -- clear high 32 bits
 418                         , mkRegRegMoveInstr a_reg r_dst_lo ]
 419
 420         return  $ ChildCode64 code r_dst_lo
 421
 422
 423 iselExpr64 expr
 424    = pprPanic "iselExpr64(sparc)" (ppr expr)
 425
 426 #endif /* sparc_TARGET_ARCH */
 427
 428 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 429
 430 #if powerpc_TARGET_ARCH
 431
 432 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
 433 getI64Amodes addrTree = do
 434     Amode hi_addr addr_code <- getAmode addrTree
 435     case addrOffset hi_addr 4 of
 436         Just lo_addr -> return (hi_addr, lo_addr, addr_code)
 437         Nothing      -> do (hi_ptr, code) <- getSomeReg addrTree
 438                            return (AddrRegImm hi_ptr (ImmInt 0),
 439                                    AddrRegImm hi_ptr (ImmInt 4),
 440                                    code)
 441
 442 assignMem_I64Code addrTree valueTree = do
 443         (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
 444         ChildCode64 vcode rlo <- iselExpr64 valueTree
 445         let
 446                 rhi = getHiVRegFromLo rlo
 447
 448                 -- Big-endian store
 449                 mov_hi = ST II32 rhi hi_addr
 450                 mov_lo = ST II32 rlo lo_addr
 451         -- in
 452         return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 453
 454 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
 455    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 456    let
 457          r_dst_lo = mkVReg u_dst II32
 458          r_dst_hi = getHiVRegFromLo r_dst_lo
 459          r_src_hi = getHiVRegFromLo r_src_lo
 460          mov_lo = MR r_dst_lo r_src_lo
 461          mov_hi = MR r_dst_hi r_src_hi
 462    -- in
 463    return (
 464         vcode `snocOL` mov_lo `snocOL` mov_hi
 465      )
 466
 467 assignReg_I64Code lvalue valueTree
 468    = panic "assignReg_I64Code(powerpc): invalid lvalue"
 469
 470
 471 -- Don't delete this -- it's very handy for debugging.
 472 --iselExpr64 expr
 473 --   | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
 474 --   = panic "iselExpr64(???)"
 475
 476 iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
 477     (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
 478     (rlo, rhi) <- getNewRegPairNat II32
 479     let mov_hi = LD II32 rhi hi_addr
 480         mov_lo = LD II32 rlo lo_addr
 481     return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
 482                          rlo
 483
 484 iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
 485    = return (ChildCode64 nilOL (mkVReg vu II32))
 486
 487 iselExpr64 (CmmLit (CmmInt i _)) = do
 488   (rlo,rhi) <- getNewRegPairNat II32
 489   let
 490         half0 = fromIntegral (fromIntegral i :: Word16)
 491         half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
 492         half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
 493         half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
 494
 495         code = toOL [
 496                 LIS rlo (ImmInt half1),
 497                 OR rlo rlo (RIImm $ ImmInt half0),
 498                 LIS rhi (ImmInt half3),
 499                 OR rlo rlo (RIImm $ ImmInt half2)
 500                 ]
 501   -- in
 502   return (ChildCode64 code rlo)
 503
 504 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
 505    ChildCode64 code1 r1lo <- iselExpr64 e1
 506    ChildCode64 code2 r2lo <- iselExpr64 e2
 507    (rlo,rhi) <- getNewRegPairNat II32
 508    let
 509         r1hi = getHiVRegFromLo r1lo
 510         r2hi = getHiVRegFromLo r2lo
 511         code =  code1 `appOL`
 512                 code2 `appOL`
 513                 toOL [ ADDC rlo r1lo r2lo,
 514                        ADDE rhi r1hi r2hi ]
 515    -- in
 516    return (ChildCode64 code rlo)
 517
 518 iselExpr64 (CmmMachOp (MO_UU_Conv W32 W64) [expr]) = do
 519     (expr_reg,expr_code) <- getSomeReg expr
 520     (rlo, rhi) <- getNewRegPairNat II32
 521     let mov_hi = LI rhi (ImmInt 0)
 522         mov_lo = MR rlo expr_reg
 523     return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
 524                          rlo
 525 iselExpr64 expr
 526    = pprPanic "iselExpr64(powerpc)" (ppr expr)
 527
 528 #endif /* powerpc_TARGET_ARCH */
 529
 530
 531 -- -----------------------------------------------------------------------------
 532 -- The 'Register' type
 533
 534 -- 'Register's passed up the tree.  If the stix code forces the register
 535 -- to live in a pre-decided machine register, it comes out as @Fixed@;
 536 -- otherwise, it comes out as @Any@, and the parent can decide which
 537 -- register to put it in.
 538
 539 data Register
 540   = Fixed   Size Reg InstrBlock
 541   | Any     Size (Reg -> InstrBlock)
 542
 543 swizzleRegisterRep :: Register -> Size -> Register
 544 -- Change the width; it's a no-op
 545 swizzleRegisterRep (Fixed _ reg code) size = Fixed size reg code
 546 swizzleRegisterRep (Any _ codefn)     size = Any   size codefn
 547
 548
 549 -- -----------------------------------------------------------------------------
 550 -- Utils based on getRegister, below
 551
 552 -- The dual to getAnyReg: compute an expression into a register, but
 553 -- we don't mind which one it is.
 554 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
 555 getSomeReg expr = do
 556   r <- getRegister expr
 557   case r of
 558     Any rep code -> do
 559         tmp <- getNewRegNat rep
 560         return (tmp, code tmp)
 561     Fixed _ reg code ->
 562         return (reg, code)
 563
 564 -- -----------------------------------------------------------------------------
 565 -- Grab the Reg for a CmmReg
 566
 567 getRegisterReg :: CmmReg -> Reg
 568
 569 getRegisterReg (CmmLocal (LocalReg u pk))
 570   = mkVReg u (cmmTypeSize pk)
 571
 572 getRegisterReg (CmmGlobal mid)
 573   = case get_GlobalReg_reg_or_addr mid of
 574        Left (RealReg rrno) -> RealReg rrno
 575        _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
 576           -- By this stage, the only MagicIds remaining should be the
 577           -- ones which map to a real machine register on this
 578           -- platform.  Hence ...
 579
 580
 581 -- -----------------------------------------------------------------------------
 582 -- Generate code to get a subtree into a Register
 583
 584 -- Don't delete this -- it's very handy for debugging.
 585 --getRegister expr
 586 --   | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False
 587 --   = panic "getRegister(???)"
 588
 589 getRegister :: CmmExpr -> NatM Register
 590
 591 #if !x86_64_TARGET_ARCH
 592     -- on x86_64, we have %rip for PicBaseReg, but it's not a full-featured
 593     -- register, it can only be used for rip-relative addressing.
 594 getRegister (CmmReg (CmmGlobal PicBaseReg))
 595   = do
 596       reg <- getPicBaseNat wordSize
 597       return (Fixed wordSize reg nilOL)
 598 #endif
 599
 600 getRegister (CmmReg reg)
 601   = return (Fixed (cmmTypeSize (cmmRegType reg))
 602                   (getRegisterReg reg) nilOL)
 603
 604 getRegister tree@(CmmRegOff _ _)
 605   = getRegister (mangleIndexTree tree)
 606
 607
 608 #if WORD_SIZE_IN_BITS==32
 609     -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
 610     -- TO_W_(x), TO_W_(x >> 32)
 611
 612 getRegister (CmmMachOp (MO_UU_Conv W64 W32)
 613              [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
 614   ChildCode64 code rlo <- iselExpr64 x
 615   return $ Fixed II32 (getHiVRegFromLo rlo) code
 616
 617 getRegister (CmmMachOp (MO_SS_Conv W64 W32)
 618              [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
 619   ChildCode64 code rlo <- iselExpr64 x
 620   return $ Fixed II32 (getHiVRegFromLo rlo) code
 621
 622 getRegister (CmmMachOp (MO_UU_Conv W64 W32) [x]) = do
 623   ChildCode64 code rlo <- iselExpr64 x
 624   return $ Fixed II32 rlo code
 625
 626 getRegister (CmmMachOp (MO_SS_Conv W64 W32) [x]) = do
 627   ChildCode64 code rlo <- iselExpr64 x
 628   return $ Fixed II32 rlo code
 629
 630 #endif
 631
 632 -- end of machine-"independent" bit; here we go on the rest...
 633
 634 #if alpha_TARGET_ARCH
 635
 636 getRegister (StDouble d)
 637   = getBlockIdNat                   `thenNat` \ lbl ->
 638     getNewRegNat PtrRep             `thenNat` \ tmp ->
 639     let code dst = mkSeqInstrs [
 640             LDATA RoDataSegment lbl [
 641                     DATA TF [ImmLab (rational d)]
 642                 ],
 643             LDA tmp (AddrImm (ImmCLbl lbl)),
 644             LD TF dst (AddrReg tmp)]
 645     in
 646         return (Any FF64 code)
 647
 648 getRegister (StPrim primop [x]) -- unary PrimOps
 649   = case primop of
 650       IntNegOp -> trivialUCode (NEG Q False) x
 651
 652       NotOp    -> trivialUCode NOT x
 653
 654       FloatNegOp  -> trivialUFCode FloatRep (FNEG TF) x
 655       DoubleNegOp -> trivialUFCode FF64 (FNEG TF) x
 656
 657       OrdOp -> coerceIntCode IntRep x
 658       ChrOp -> chrCode x
 659
 660       Float2IntOp  -> coerceFP2Int    x
 661       Int2FloatOp  -> coerceInt2FP pr x
 662       Double2IntOp -> coerceFP2Int    x
 663       Int2DoubleOp -> coerceInt2FP pr x
 664
 665       Double2FloatOp -> coerceFltCode x
 666       Float2DoubleOp -> coerceFltCode x
 667
 668       other_op -> getRegister (StCall fn CCallConv FF64 [x])
 669         where
 670           fn = case other_op of
 671                  FloatExpOp    -> fsLit "exp"
 672                  FloatLogOp    -> fsLit "log"
 673                  FloatSqrtOp   -> fsLit "sqrt"
 674                  FloatSinOp    -> fsLit "sin"
 675                  FloatCosOp    -> fsLit "cos"
 676                  FloatTanOp    -> fsLit "tan"
 677                  FloatAsinOp   -> fsLit "asin"
 678                  FloatAcosOp   -> fsLit "acos"
 679                  FloatAtanOp   -> fsLit "atan"
 680                  FloatSinhOp   -> fsLit "sinh"
 681                  FloatCoshOp   -> fsLit "cosh"
 682                  FloatTanhOp   -> fsLit "tanh"
 683                  DoubleExpOp   -> fsLit "exp"
 684                  DoubleLogOp   -> fsLit "log"
 685                  DoubleSqrtOp  -> fsLit "sqrt"
 686                  DoubleSinOp   -> fsLit "sin"
 687                  DoubleCosOp   -> fsLit "cos"
 688                  DoubleTanOp   -> fsLit "tan"
 689                  DoubleAsinOp  -> fsLit "asin"
 690                  DoubleAcosOp  -> fsLit "acos"
 691                  DoubleAtanOp  -> fsLit "atan"
 692                  DoubleSinhOp  -> fsLit "sinh"
 693                  DoubleCoshOp  -> fsLit "cosh"
 694                  DoubleTanhOp  -> fsLit "tanh"
 695   where
 696     pr = panic "MachCode.getRegister: no primrep needed for Alpha"
 697
 698 getRegister (StPrim primop [x, y]) -- dyadic PrimOps
 699   = case primop of
 700       CharGtOp -> trivialCode (CMP LTT) y x
 701       CharGeOp -> trivialCode (CMP LE) y x
 702       CharEqOp -> trivialCode (CMP EQQ) x y
 703       CharNeOp -> int_NE_code x y
 704       CharLtOp -> trivialCode (CMP LTT) x y
 705       CharLeOp -> trivialCode (CMP LE) x y
 706
 707       IntGtOp  -> trivialCode (CMP LTT) y x
 708       IntGeOp  -> trivialCode (CMP LE) y x
 709       IntEqOp  -> trivialCode (CMP EQQ) x y
 710       IntNeOp  -> int_NE_code x y
 711       IntLtOp  -> trivialCode (CMP LTT) x y
 712       IntLeOp  -> trivialCode (CMP LE) x y
 713
 714       WordGtOp -> trivialCode (CMP ULT) y x
 715       WordGeOp -> trivialCode (CMP ULE) x y
 716       WordEqOp -> trivialCode (CMP EQQ)  x y
 717       WordNeOp -> int_NE_code x y
 718       WordLtOp -> trivialCode (CMP ULT) x y
 719       WordLeOp -> trivialCode (CMP ULE) x y
 720
 721       AddrGtOp -> trivialCode (CMP ULT) y x
 722       AddrGeOp -> trivialCode (CMP ULE) y x
 723       AddrEqOp -> trivialCode (CMP EQQ)  x y
 724       AddrNeOp -> int_NE_code x y
 725       AddrLtOp -> trivialCode (CMP ULT) x y
 726       AddrLeOp -> trivialCode (CMP ULE) x y
 727
 728       FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
 729       FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
 730       FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
 731       FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
 732       FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
 733       FloatLeOp -> cmpF_code (FCMP TF LE) NE x y
 734
 735       DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
 736       DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
 737       DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
 738       DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
 739       DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
 740       DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y
 741
 742       IntAddOp  -> trivialCode (ADD Q False) x y
 743       IntSubOp  -> trivialCode (SUB Q False) x y
 744       IntMulOp  -> trivialCode (MUL Q False) x y
 745       IntQuotOp -> trivialCode (DIV Q False) x y
 746       IntRemOp  -> trivialCode (REM Q False) x y
 747
 748       WordAddOp  -> trivialCode (ADD Q False) x y
 749       WordSubOp  -> trivialCode (SUB Q False) x y
 750       WordMulOp  -> trivialCode (MUL Q False) x y
 751       WordQuotOp -> trivialCode (DIV Q True) x y
 752       WordRemOp  -> trivialCode (REM Q True) x y
 753
 754       FloatAddOp -> trivialFCode  W32 (FADD TF) x y
 755       FloatSubOp -> trivialFCode  W32 (FSUB TF) x y
 756       FloatMulOp -> trivialFCode  W32 (FMUL TF) x y
 757       FloatDivOp -> trivialFCode  W32 (FDIV TF) x y
 758
 759       DoubleAddOp -> trivialFCode  W64 (FADD TF) x y
 760       DoubleSubOp -> trivialFCode  W64 (FSUB TF) x y
 761       DoubleMulOp -> trivialFCode  W64 (FMUL TF) x y
 762       DoubleDivOp -> trivialFCode  W64 (FDIV TF) x y
 763
 764       AddrAddOp  -> trivialCode (ADD Q False) x y
 765       AddrSubOp  -> trivialCode (SUB Q False) x y
 766       AddrRemOp  -> trivialCode (REM Q True) x y
 767
 768       AndOp  -> trivialCode AND x y
 769       OrOp   -> trivialCode OR  x y
 770       XorOp  -> trivialCode XOR x y
 771       SllOp  -> trivialCode SLL x y
 772       SrlOp  -> trivialCode SRL x y
 773
 774       ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
 775       ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
 776       ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"
 777
 778       FloatPowerOp  -> getRegister (StCall (fsLit "pow") CCallConv FF64 [x,y])
 779       DoublePowerOp -> getRegister (StCall (fsLit "pow") CCallConv FF64 [x,y])
 780   where
 781     {- ------------------------------------------------------------
 782         Some bizarre special code for getting condition codes into
 783         registers.  Integer non-equality is a test for equality
 784         followed by an XOR with 1.  (Integer comparisons always set
 785         the result register to 0 or 1.)  Floating point comparisons of
 786         any kind leave the result in a floating point register, so we
 787         need to wrangle an integer register out of things.
 788     -}
 789     int_NE_code :: StixTree -> StixTree -> NatM Register
 790
 791     int_NE_code x y
 792       = trivialCode (CMP EQQ) x y       `thenNat` \ register ->
 793         getNewRegNat IntRep             `thenNat` \ tmp ->
 794         let
 795             code = registerCode register tmp
 796             src  = registerName register tmp
 797             code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
 798         in
 799         return (Any IntRep code__2)
 800
 801     {- ------------------------------------------------------------
 802         Comments for int_NE_code also apply to cmpF_code
 803     -}
 804     cmpF_code
 805         :: (Reg -> Reg -> Reg -> Instr)
 806         -> Cond
 807         -> StixTree -> StixTree
 808         -> NatM Register
 809
 810     cmpF_code instr cond x y
 811       = trivialFCode pr instr x y       `thenNat` \ register ->
 812         getNewRegNat FF64               `thenNat` \ tmp ->
 813         getBlockIdNat                   `thenNat` \ lbl ->
 814         let
 815             code = registerCode register tmp
 816             result  = registerName register tmp
 817
 818             code__2 dst = code . mkSeqInstrs [
 819                 OR zeroh (RIImm (ImmInt 1)) dst,
 820                 BF cond  result (ImmCLbl lbl),
 821                 OR zeroh (RIReg zeroh) dst,
 822                 NEWBLOCK lbl]
 823         in
 824         return (Any IntRep code__2)
 825       where
 826         pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
 827       ------------------------------------------------------------
 828
 829 getRegister (CmmLoad pk mem)
 830   = getAmode mem                    `thenNat` \ amode ->
 831     let
 832         code = amodeCode amode
 833         src   = amodeAddr amode
 834         size = primRepToSize pk
 835         code__2 dst = code . mkSeqInstr (LD size dst src)
 836     in
 837     return (Any pk code__2)
 838
 839 getRegister (StInt i)
 840   | fits8Bits i
 841   = let
 842         code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
 843     in
 844     return (Any IntRep code)
 845   | otherwise
 846   = let
 847         code dst = mkSeqInstr (LDI Q dst src)
 848     in
 849     return (Any IntRep code)
 850   where
 851     src = ImmInt (fromInteger i)
 852
 853 getRegister leaf
 854   | isJust imm
 855   = let
 856         code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
 857     in
 858     return (Any PtrRep code)
 859   where
 860     imm = maybeImm leaf
 861     imm__2 = case imm of Just x -> x
 862
 863 #endif /* alpha_TARGET_ARCH */
 864
 865 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 866
 867 #if i386_TARGET_ARCH
 868
 869 getRegister (CmmLit (CmmFloat f W32)) = do
 870     lbl <- getNewLabelNat
 871     dflags <- getDynFlagsNat
 872     dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
 873     Amode addr addr_code <- getAmode dynRef
 874     let code dst =
 875             LDATA ReadOnlyData
 876                         [CmmDataLabel lbl,
 877                          CmmStaticLit (CmmFloat f W32)]
 878             `consOL` (addr_code `snocOL`
 879             GLD FF32 addr dst)
 880     -- in
 881     return (Any FF32 code)
 882
 883
 884 getRegister (CmmLit (CmmFloat d W64))
 885   | d == 0.0
 886   = let code dst = unitOL (GLDZ dst)
 887     in  return (Any FF64 code)
 888
 889   | d == 1.0
 890   = let code dst = unitOL (GLD1 dst)
 891     in  return (Any FF64 code)
 892
 893   | otherwise = do
 894     lbl <- getNewLabelNat
 895     dflags <- getDynFlagsNat
 896     dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
 897     Amode addr addr_code <- getAmode dynRef
 898     let code dst =
 899             LDATA ReadOnlyData
 900                         [CmmDataLabel lbl,
 901                          CmmStaticLit (CmmFloat d W64)]
 902             `consOL` (addr_code `snocOL`
 903             GLD FF64 addr dst)
 904     -- in
 905     return (Any FF64 code)
 906
 907 #endif /* i386_TARGET_ARCH */
 908
 909 #if x86_64_TARGET_ARCH
 910
 911 getRegister (CmmLit (CmmFloat 0.0 w)) = do
 912    let size = floatSize w
 913        code dst = unitOL  (XOR size (OpReg dst) (OpReg dst))
 914         -- I don't know why there are xorpd, xorps, and pxor instructions.
 915         -- They all appear to do the same thing --SDM
 916    return (Any size code)
 917
 918 getRegister (CmmLit (CmmFloat f w)) = do
 919     lbl <- getNewLabelNat
 920     let code dst = toOL [
 921             LDATA ReadOnlyData
 922                         [CmmDataLabel lbl,
 923                          CmmStaticLit (CmmFloat f w)],
 924             MOV size (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 925             ]
 926     -- in
 927     return (Any size code)
 928   where size = floatSize w
 929
 930 #endif /* x86_64_TARGET_ARCH */
 931
 932 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
 933
 934 -- catch simple cases of zero- or sign-extended load
 935 getRegister (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _]) = do
 936   code <- intLoadCode (MOVZxL II8) addr
 937   return (Any II32 code)
 938
 939 getRegister (CmmMachOp (MO_SS_Conv W8 W32) [CmmLoad addr _]) = do
 940   code <- intLoadCode (MOVSxL II8) addr
 941   return (Any II32 code)
 942
 943 getRegister (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad addr _]) = do
 944   code <- intLoadCode (MOVZxL II16) addr
 945   return (Any II32 code)
 946
 947 getRegister (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad addr _]) = do
 948   code <- intLoadCode (MOVSxL II16) addr
 949   return (Any II32 code)
 950
 951 #endif
 952
 953 #if x86_64_TARGET_ARCH
 954
 955 -- catch simple cases of zero- or sign-extended load
 956 getRegister (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad addr _]) = do
 957   code <- intLoadCode (MOVZxL II8) addr
 958   return (Any II64 code)
 959
 960 getRegister (CmmMachOp (MO_SS_Conv W8 W64) [CmmLoad addr _]) = do
 961   code <- intLoadCode (MOVSxL II8) addr
 962   return (Any II64 code)
 963
 964 getRegister (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad addr _]) = do
 965   code <- intLoadCode (MOVZxL II16) addr
 966   return (Any II64 code)
 967
 968 getRegister (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad addr _]) = do
 969   code <- intLoadCode (MOVSxL II16) addr
 970   return (Any II64 code)
 971
 972 getRegister (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad addr _]) = do
 973   code <- intLoadCode (MOV II32) addr -- 32-bit loads zero-extend
 974   return (Any II64 code)
 975
 976 getRegister (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad addr _]) = do
 977   code <- intLoadCode (MOVSxL II32) addr
 978   return (Any II64 code)
 979
 980 #endif
 981
 982 #if x86_64_TARGET_ARCH
 983 getRegister (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
 984                                      CmmLit displacement])
 985     = return $ Any II64 (\dst -> unitOL $
 986         LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
 987 #endif
 988
 989 #if x86_64_TARGET_ARCH
 990 getRegister (CmmMachOp (MO_F_Neg W32) [x]) = do
 991   x_code <- getAnyReg x
 992   lbl <- getNewLabelNat
 993   let
 994     code dst = x_code dst `appOL` toOL [
 995         -- This is how gcc does it, so it can't be that bad:
 996         LDATA ReadOnlyData16 [
 997                 CmmAlign 16,
 998                 CmmDataLabel lbl,
 999                 CmmStaticLit (CmmInt 0x80000000 W32),
1000                 CmmStaticLit (CmmInt 0 W32),
1001                 CmmStaticLit (CmmInt 0 W32),
1002                 CmmStaticLit (CmmInt 0 W32)
1003         ],
1004         XOR FF32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
1005                 -- xorps, so we need the 128-bit constant
1006                 -- ToDo: rip-relative
1007         ]
1008   --
1009   return (Any FF32 code)
1010
1011 getRegister (CmmMachOp (MO_F_Neg W64) [x]) = do
1012   x_code <- getAnyReg x
1013   lbl <- getNewLabelNat
1014   let
1015         -- This is how gcc does it, so it can't be that bad:
1016     code dst = x_code dst `appOL` toOL [
1017         LDATA ReadOnlyData16 [
1018                 CmmAlign 16,
1019                 CmmDataLabel lbl,
1020                 CmmStaticLit (CmmInt 0x8000000000000000 W64),
1021                 CmmStaticLit (CmmInt 0 W64)
1022         ],
1023                 -- gcc puts an unpck here.  Wonder if we need it.
1024         XOR FF64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
1025                 -- xorpd, so we need the 128-bit constant
1026         ]
1027   --
1028   return (Any FF64 code)
1029 #endif
1030
1031 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1032
1033 getRegister (CmmMachOp mop [x]) -- unary MachOps
1034   = case mop of
1035 #if i386_TARGET_ARCH
1036       MO_F_Neg W32 -> trivialUFCode FF32 (GNEG FF32) x
1037       MO_F_Neg W64 -> trivialUFCode FF64 (GNEG FF64) x
1038 #endif
1039
1040       MO_S_Neg w -> triv_ucode NEGI (intSize w)
1041       MO_F_Neg w -> triv_ucode NEGI (floatSize w)
1042       MO_Not w   -> triv_ucode NOT  (intSize w)
1043
1044       -- Nop conversions
1045       MO_UU_Conv W32 W8  -> toI8Reg  W32 x
1046       MO_SS_Conv W32 W8  -> toI8Reg  W32 x
1047       MO_UU_Conv W16 W8  -> toI8Reg  W16 x
1048       MO_SS_Conv W16 W8  -> toI8Reg  W16 x
1049       MO_UU_Conv W32 W16 -> toI16Reg W32 x
1050       MO_SS_Conv W32 W16 -> toI16Reg W32 x
1051
1052 #if x86_64_TARGET_ARCH
1053       MO_UU_Conv W64 W32 -> conversionNop II64 x
1054       MO_SS_Conv W64 W32 -> conversionNop II64 x
1055       MO_UU_Conv W64 W16 -> toI16Reg W64 x
1056       MO_SS_Conv W64 W16 -> toI16Reg W64 x
1057       MO_UU_Conv W64 W8  -> toI8Reg  W64 x
1058       MO_SS_Conv W64 W8  -> toI8Reg  W64 x
1059 #endif
1060
1061       MO_UU_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
1062       MO_SS_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
1063
1064       -- widenings
1065       MO_UU_Conv W8  W32 -> integerExtend W8  W32 MOVZxL x
1066       MO_UU_Conv W16 W32 -> integerExtend W16 W32 MOVZxL x
1067       MO_UU_Conv W8  W16 -> integerExtend W8  W16 MOVZxL x
1068
1069       MO_SS_Conv W8  W32 -> integerExtend W8  W32 MOVSxL x
1070       MO_SS_Conv W16 W32 -> integerExtend W16 W32 MOVSxL x
1071       MO_SS_Conv W8  W16 -> integerExtend W8  W16 MOVSxL x
1072
1073 #if x86_64_TARGET_ARCH
1074       MO_UU_Conv W8  W64 -> integerExtend W8  W64 MOVZxL x
1075       MO_UU_Conv W16 W64 -> integerExtend W16 W64 MOVZxL x
1076       MO_UU_Conv W32 W64 -> integerExtend W32 W64 MOVZxL x
1077       MO_SS_Conv W8  W64 -> integerExtend W8  W64 MOVSxL x
1078       MO_SS_Conv W16 W64 -> integerExtend W16 W64 MOVSxL x
1079       MO_SS_Conv W32 W64 -> integerExtend W32 W64 MOVSxL x
1080         -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
1081         -- However, we don't want the register allocator to throw it
1082         -- away as an unnecessary reg-to-reg move, so we keep it in
1083         -- the form of a movzl and print it as a movl later.
1084 #endif
1085
1086 #if i386_TARGET_ARCH
1087       MO_FF_Conv W32 W64 -> conversionNop FF64 x
1088       MO_FF_Conv W64 W32 -> conversionNop FF32 x
1089 #else
1090       MO_FF_Conv W32 W64 -> coerceFP2FP W64 x
1091       MO_FF_Conv W64 W32 -> coerceFP2FP W32 x
1092 #endif
1093
1094       MO_FS_Conv from to -> coerceFP2Int from to x
1095       MO_SF_Conv from to -> coerceInt2FP from to x
1096
1097       other -> pprPanic "getRegister" (pprMachOp mop)
1098    where
1099         triv_ucode :: (Size -> Operand -> Instr) -> Size -> NatM Register
1100         triv_ucode instr size = trivialUCode size (instr size) x
1101
1102         -- signed or unsigned extension.
1103         integerExtend :: Width -> Width
1104                       -> (Size -> Operand -> Operand -> Instr)
1105                       -> CmmExpr -> NatM Register
1106         integerExtend from to instr expr = do
1107             (reg,e_code) <- if from == W8 then getByteReg expr
1108                                           else getSomeReg expr
1109             let
1110                 code dst =
1111                   e_code `snocOL`
1112                   instr (intSize from) (OpReg reg) (OpReg dst)
1113             return (Any (intSize to) code)
1114
1115         toI8Reg :: Width -> CmmExpr -> NatM Register
1116         toI8Reg new_rep expr
1117             = do codefn <- getAnyReg expr
1118                  return (Any (intSize new_rep) codefn)
1119                 -- HACK: use getAnyReg to get a byte-addressable register.
1120                 -- If the source was a Fixed register, this will add the
1121                 -- mov instruction to put it into the desired destination.
1122                 -- We're assuming that the destination won't be a fixed
1123                 -- non-byte-addressable register; it won't be, because all
1124                 -- fixed registers are word-sized.
1125
1126         toI16Reg = toI8Reg -- for now
1127
1128         conversionNop :: Size -> CmmExpr -> NatM Register
1129         conversionNop new_size expr
1130             = do e_code <- getRegister expr
1131                  return (swizzleRegisterRep e_code new_size)
1132
1133
1134 getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
1135   = case mop of
1136       MO_F_Eq w -> condFltReg EQQ x y
1137       MO_F_Ne w -> condFltReg NE x y
1138       MO_F_Gt w -> condFltReg GTT x y
1139       MO_F_Ge w -> condFltReg GE x y
1140       MO_F_Lt w -> condFltReg LTT x y
1141       MO_F_Le w -> condFltReg LE x y
1142
1143       MO_Eq rep   -> condIntReg EQQ x y
1144       MO_Ne rep   -> condIntReg NE x y
1145
1146       MO_S_Gt rep -> condIntReg GTT x y
1147       MO_S_Ge rep -> condIntReg GE x y
1148       MO_S_Lt rep -> condIntReg LTT x y
1149       MO_S_Le rep -> condIntReg LE x y
1150
1151       MO_U_Gt rep -> condIntReg GU  x y
1152       MO_U_Ge rep -> condIntReg GEU x y
1153       MO_U_Lt rep -> condIntReg LU  x y
1154       MO_U_Le rep -> condIntReg LEU x y
1155
1156 #if i386_TARGET_ARCH
1157       MO_F_Add w -> trivialFCode w GADD x y
1158       MO_F_Sub w -> trivialFCode w GSUB x y
1159       MO_F_Quot w -> trivialFCode w GDIV x y
1160       MO_F_Mul w -> trivialFCode w GMUL x y
1161 #endif
1162
1163 #if x86_64_TARGET_ARCH
1164       MO_F_Add w -> trivialFCode w ADD x y
1165       MO_F_Sub w -> trivialFCode w SUB x y
1166       MO_F_Quot w -> trivialFCode w FDIV x y
1167       MO_F_Mul w -> trivialFCode w MUL x y
1168 #endif
1169
1170       MO_Add rep -> add_code rep x y
1171       MO_Sub rep -> sub_code rep x y
1172
1173       MO_S_Quot rep -> div_code rep True  True  x y
1174       MO_S_Rem  rep -> div_code rep True  False x y
1175       MO_U_Quot rep -> div_code rep False True  x y
1176       MO_U_Rem  rep -> div_code rep False False x y
1177
1178       MO_S_MulMayOflo rep -> imulMayOflo rep x y
1179
1180       MO_Mul rep -> triv_op rep IMUL
1181       MO_And rep -> triv_op rep AND
1182       MO_Or  rep -> triv_op rep OR
1183       MO_Xor rep -> triv_op rep XOR
1184
1185         {- Shift ops on x86s have constraints on their source, it
1186            either has to be Imm, CL or 1
1187             => trivialCode is not restrictive enough (sigh.)
1188         -}
1189       MO_Shl rep   -> shift_code rep SHL x y {-False-}
1190       MO_U_Shr rep -> shift_code rep SHR x y {-False-}
1191       MO_S_Shr rep -> shift_code rep SAR x y {-False-}
1192
1193       other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
1194   where
1195     --------------------
1196     triv_op width instr = trivialCode width op (Just op) x y
1197                         where op   = instr (intSize width)
1198
1199     imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
1200     imulMayOflo rep a b = do
1201          (a_reg, a_code) <- getNonClobberedReg a
1202          b_code <- getAnyReg b
1203          let
1204              shift_amt  = case rep of
1205                            W32 -> 31
1206                            W64 -> 63
1207                            _ -> panic "shift_amt"
1208
1209              size = intSize rep
1210              code = a_code `appOL` b_code eax `appOL`
1211                         toOL [
1212                            IMUL2 size (OpReg a_reg),   -- result in %edx:%eax
1213                            SAR size (OpImm (ImmInt shift_amt)) (OpReg eax),
1214                                 -- sign extend lower part
1215                            SUB size (OpReg edx) (OpReg eax)
1216                                 -- compare against upper
1217                            -- eax==0 if high part == sign extended low part
1218                         ]
1219          -- in
1220          return (Fixed size eax code)
1221
1222     --------------------
1223     shift_code :: Width
1224                -> (Size -> Operand -> Operand -> Instr)
1225                -> CmmExpr
1226                -> CmmExpr
1227                -> NatM Register
1228
1229     {- Case1: shift length as immediate -}
1230     shift_code width instr x y@(CmmLit lit) = do
1231           x_code <- getAnyReg x
1232           let
1233                size = intSize width
1234                code dst
1235                   = x_code dst `snocOL`
1236                     instr size (OpImm (litToImm lit)) (OpReg dst)
1237           -- in
1238           return (Any size code)
1239
1240     {- Case2: shift length is complex (non-immediate)
1241       * y must go in %ecx.
1242       * we cannot do y first *and* put its result in %ecx, because
1243         %ecx might be clobbered by x.
1244       * if we do y second, then x cannot be
1245         in a clobbered reg.  Also, we cannot clobber x's reg
1246         with the instruction itself.
1247       * so we can either:
1248         - do y first, put its result in a fresh tmp, then copy it to %ecx later
1249         - do y second and put its result into %ecx.  x gets placed in a fresh
1250           tmp.  This is likely to be better, becuase the reg alloc can
1251           eliminate this reg->reg move here (it won't eliminate the other one,
1252           because the move is into the fixed %ecx).
1253     -}
1254     shift_code width instr x y{-amount-} = do
1255         x_code <- getAnyReg x
1256         let size = intSize width
1257         tmp <- getNewRegNat size
1258         y_code <- getAnyReg y
1259         let
1260            code = x_code tmp `appOL`
1261                   y_code ecx `snocOL`
1262                   instr size (OpReg ecx) (OpReg tmp)
1263         -- in
1264         return (Fixed size tmp code)
1265
1266     --------------------
1267     add_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
1268     add_code rep x (CmmLit (CmmInt y _))
1269         | is32BitInteger y = add_int rep x y
1270     add_code rep x y = trivialCode rep (ADD size) (Just (ADD size)) x y
1271       where size = intSize rep
1272
1273     --------------------
1274     sub_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
1275     sub_code rep x (CmmLit (CmmInt y _))
1276         | is32BitInteger (-y) = add_int rep x (-y)
1277     sub_code rep x y = trivialCode rep (SUB (intSize rep)) Nothing x y
1278
1279     -- our three-operand add instruction:
1280     add_int width x y = do
1281         (x_reg, x_code) <- getSomeReg x
1282         let
1283             size = intSize width
1284             imm = ImmInt (fromInteger y)
1285             code dst
1286                = x_code `snocOL`
1287                  LEA size
1288                         (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
1289                         (OpReg dst)
1290         --
1291         return (Any size code)
1292
1293     ----------------------
1294     div_code width signed quotient x y = do
1295            (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
1296            x_code <- getAnyReg x
1297            let
1298              size = intSize width
1299              widen | signed    = CLTD size
1300                    | otherwise = XOR size (OpReg edx) (OpReg edx)
1301
1302              instr | signed    = IDIV
1303                    | otherwise = DIV
1304
1305              code = y_code `appOL`
1306                     x_code eax `appOL`
1307                     toOL [widen, instr size y_op]
1308
1309              result | quotient  = eax
1310                     | otherwise = edx
1311
1312            -- in
1313            return (Fixed size result code)
1314
1315
1316 getRegister (CmmLoad mem pk)
1317   | isFloatType pk
1318   = do
1319     Amode src mem_code <- getAmode mem
1320     let
1321         size = cmmTypeSize pk
1322         code dst = mem_code `snocOL`
1323                    IF_ARCH_i386(GLD size src dst,
1324                                 MOV size (OpAddr src) (OpReg dst))
1325     return (Any size code)
1326
1327 #if i386_TARGET_ARCH
1328 getRegister (CmmLoad mem pk)
1329   | not (isWord64 pk)
1330   = do
1331     code <- intLoadCode instr mem
1332     return (Any size code)
1333   where
1334     width = typeWidth pk
1335     size = intSize width
1336     instr = case width of
1337                 W8     -> MOVZxL II8
1338                 _other -> MOV size
1339         -- We always zero-extend 8-bit loads, if we
1340         -- can't think of anything better.  This is because
1341         -- we can't guarantee access to an 8-bit variant of every register
1342         -- (esi and edi don't have 8-bit variants), so to make things
1343         -- simpler we do our 8-bit arithmetic with full 32-bit registers.
1344 #endif
1345
1346 #if x86_64_TARGET_ARCH
1347 -- Simpler memory load code on x86_64
1348 getRegister (CmmLoad mem pk)
1349   = do
1350     code <- intLoadCode (MOV size) mem
1351     return (Any size code)
1352   where size = intSize $ typeWidth pk
1353 #endif
1354
1355 getRegister (CmmLit (CmmInt 0 width))
1356   = let
1357         size = intSize width
1358
1359         -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
1360         adj_size = case size of II64 -> II32; _ -> size
1361         size1 = IF_ARCH_i386( size, adj_size )
1362         code dst
1363            = unitOL (XOR size1 (OpReg dst) (OpReg dst))
1364     in
1365         return (Any size code)
1366
1367 #if x86_64_TARGET_ARCH
1368   -- optimisation for loading small literals on x86_64: take advantage
1369   -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
1370   -- instruction forms are shorter.
1371 getRegister (CmmLit lit)
1372   | isWord64 (cmmLitType lit), not (isBigLit lit)
1373   = let
1374         imm = litToImm lit
1375         code dst = unitOL (MOV II32 (OpImm imm) (OpReg dst))
1376     in
1377         return (Any II64 code)
1378   where
1379    isBigLit (CmmInt i _) = i < 0 || i > 0xffffffff
1380    isBigLit _ = False
1381         -- note1: not the same as (not.is32BitLit), because that checks for
1382         -- signed literals that fit in 32 bits, but we want unsigned
1383         -- literals here.
1384         -- note2: all labels are small, because we're assuming the
1385         -- small memory model (see gcc docs, -mcmodel=small).
1386 #endif
1387
1388 getRegister (CmmLit lit)
1389   = let
1390         size = cmmTypeSize (cmmLitType lit)
1391         imm = litToImm lit
1392         code dst = unitOL (MOV size (OpImm imm) (OpReg dst))
1393     in
1394         return (Any size code)
1395
1396 getRegister other = pprPanic "getRegister(x86)" (ppr other)
1397
1398
1399 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
1400    -> NatM (Reg -> InstrBlock)
1401 intLoadCode instr mem = do
1402   Amode src mem_code <- getAmode mem
1403   return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
1404
1405 -- Compute an expression into *any* register, adding the appropriate
1406 -- move instruction if necessary.
1407 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
1408 getAnyReg expr = do
1409   r <- getRegister expr
1410   anyReg r
1411
1412 anyReg :: Register -> NatM (Reg -> InstrBlock)
1413 anyReg (Any _ code)          = return code
1414 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
1415
1416 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
1417 -- Fixed registers might not be byte-addressable, so we make sure we've
1418 -- got a temporary, inserting an extra reg copy if necessary.
1419 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
1420 #if x86_64_TARGET_ARCH
1421 getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
1422 #else
1423 getByteReg expr = do
1424   r <- getRegister expr
1425   case r of
1426     Any rep code -> do
1427         tmp <- getNewRegNat rep
1428         return (tmp, code tmp)
1429     Fixed rep reg code
1430         | isVirtualReg reg -> return (reg,code)
1431         | otherwise -> do
1432             tmp <- getNewRegNat rep
1433             return (tmp, code `snocOL` reg2reg rep reg tmp)
1434         -- ToDo: could optimise slightly by checking for byte-addressable
1435         -- real registers, but that will happen very rarely if at all.
1436 #endif
1437
1438 -- Another variant: this time we want the result in a register that cannot
1439 -- be modified by code to evaluate an arbitrary expression.
1440 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1441 getNonClobberedReg expr = do
1442   r <- getRegister expr
1443   case r of
1444     Any rep code -> do
1445         tmp <- getNewRegNat rep
1446         return (tmp, code tmp)
1447     Fixed rep reg code
1448         -- only free regs can be clobbered
1449         | RealReg rr <- reg, isFastTrue (freeReg rr) -> do
1450                 tmp <- getNewRegNat rep
1451                 return (tmp, code `snocOL` reg2reg rep reg tmp)
1452         | otherwise ->
1453                 return (reg, code)
1454
1455 reg2reg :: Size -> Reg -> Reg -> Instr
1456 reg2reg size src dst
1457 #if i386_TARGET_ARCH
1458   | isFloatSize size = GMOV src dst
1459 #endif
1460   | otherwise        = MOV size (OpReg src) (OpReg dst)
1461
1462 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1463
1464 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1465
1466 #if sparc_TARGET_ARCH
1467
1468 -- getRegister :: CmmExpr -> NatM Register
1469
1470 -- Load a literal float into a float register.
1471 --      The actual literal is stored in a new data area, and we load it
1472 --      at runtime.
1473 getRegister (CmmLit (CmmFloat f W32)) = do
1474
1475     -- a label for the new data area
1476     lbl <- getNewLabelNat
1477     tmp <- getNewRegNat II32
1478
1479     let code dst = toOL [
1480             -- the data area
1481             LDATA ReadOnlyData
1482                         [CmmDataLabel lbl,
1483                          CmmStaticLit (CmmFloat f W32)],
1484
1485             -- load the literal
1486             SETHI (HI (ImmCLbl lbl)) tmp,
1487             LD II32 (AddrRegImm tmp (LO (ImmCLbl lbl))) dst]
1488
1489     return (Any FF32 code)
1490
1491 getRegister (CmmLit (CmmFloat d W64)) = do
1492     lbl <- getNewLabelNat
1493     tmp <- getNewRegNat II32
1494     let code dst = toOL [
1495             LDATA ReadOnlyData
1496                         [CmmDataLabel lbl,
1497                          CmmStaticLit (CmmFloat d W64)],
1498             SETHI (HI (ImmCLbl lbl)) tmp,
1499             LD II64 (AddrRegImm tmp (LO (ImmCLbl lbl))) dst]
1500     return (Any FF64 code)
1501
1502 getRegister (CmmMachOp mop [x]) -- unary MachOps
1503   = case mop of
1504       MO_F_Neg W32     -> trivialUFCode FF32 (FNEG FF32) x
1505       MO_F_Neg W64     -> trivialUFCode FF64 (FNEG FF64) x
1506
1507       MO_S_Neg rep     -> trivialUCode (intSize rep) (SUB False False g0) x
1508       MO_Not rep       -> trivialUCode (intSize rep) (XNOR False g0) x
1509
1510       MO_FF_Conv W64 W32-> coerceDbl2Flt x
1511       MO_FF_Conv W32 W64-> coerceFlt2Dbl x
1512
1513       MO_FS_Conv from to -> coerceFP2Int from to x
1514       MO_SF_Conv from to -> coerceInt2FP from to x
1515
1516       -- Conversions which are a nop on sparc
1517       MO_UU_Conv from to
1518         | from == to    -> conversionNop (intSize to)  x
1519       MO_UU_Conv W32 W8 -> trivialCode W8 (AND False) x (CmmLit (CmmInt 255 W8))
1520       MO_UU_Conv W32 to -> conversionNop (intSize to) x
1521       MO_SS_Conv W32 to -> conversionNop (intSize to) x
1522
1523       MO_UU_Conv W8  to@W32  -> conversionNop (intSize to)  x
1524       MO_UU_Conv W16 to@W32  -> conversionNop (intSize to)  x
1525       MO_UU_Conv W8  to@W16  -> conversionNop (intSize to)  x
1526
1527       -- sign extension
1528       MO_SS_Conv W8  W32  -> integerExtend W8  W32 x
1529       MO_SS_Conv W16 W32  -> integerExtend W16 W32 x
1530       MO_SS_Conv W8  W16  -> integerExtend W8  W16 x
1531
1532       other_op -> panic ("Unknown unary mach op: " ++ show mop)
1533     where
1534
1535         -- | sign extend and widen
1536         integerExtend
1537                 :: Width                -- ^ width of source expression
1538                 -> Width                -- ^ width of result
1539                 -> CmmExpr              -- ^ source expression
1540                 -> NatM Register
1541
1542         integerExtend from to expr
1543          = do   -- load the expr into some register
1544                 (reg, e_code)   <- getSomeReg expr
1545                 tmp             <- getNewRegNat II32
1546                 let bitCount
1547                         = case (from, to) of
1548                                 (W8,  W32)      -> 24
1549                                 (W16, W32)      -> 16
1550                                 (W8,  W16)      -> 24
1551                 let code dst
1552                         = e_code
1553
1554                         -- local shift word left to load the sign bit
1555                         `snocOL`  SLL reg (RIImm (ImmInt bitCount)) tmp
1556
1557                         -- arithmetic shift right to sign extend
1558                         `snocOL`  SRA tmp (RIImm (ImmInt bitCount)) dst
1559
1560                 return (Any (intSize to) code)
1561
1562
1563         conversionNop new_rep expr
1564             = do e_code <- getRegister expr
1565                  return (swizzleRegisterRep e_code new_rep)
1566
1567 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1568   = case mop of
1569       MO_Eq rep -> condIntReg EQQ x y
1570       MO_Ne rep -> condIntReg NE x y
1571
1572       MO_S_Gt rep -> condIntReg GTT x y
1573       MO_S_Ge rep -> condIntReg GE x y
1574       MO_S_Lt rep -> condIntReg LTT x y
1575       MO_S_Le rep -> condIntReg LE x y
1576
1577       MO_U_Gt W32  -> condIntReg GTT x y
1578       MO_U_Ge W32  -> condIntReg GE x y
1579       MO_U_Lt W32  -> condIntReg LTT x y
1580       MO_U_Le W32  -> condIntReg LE x y
1581
1582       MO_U_Gt W16 -> condIntReg GU  x y
1583       MO_U_Ge W16 -> condIntReg GEU x y
1584       MO_U_Lt W16 -> condIntReg LU  x y
1585       MO_U_Le W16 -> condIntReg LEU x y
1586
1587       MO_Add W32 -> trivialCode W32 (ADD False False) x y
1588       MO_Sub W32 -> trivialCode W32 (SUB False False) x y
1589
1590       MO_S_MulMayOflo rep -> imulMayOflo rep x y
1591
1592       MO_S_Quot W32     -> idiv True  False x y
1593       MO_U_Quot W32     -> idiv False False x y
1594
1595       MO_S_Rem  W32     -> irem True  x y
1596       MO_U_Rem  W32     -> irem False x y
1597
1598       MO_F_Eq w -> condFltReg EQQ x y
1599       MO_F_Ne w -> condFltReg NE x y
1600
1601       MO_F_Gt w -> condFltReg GTT x y
1602       MO_F_Ge w -> condFltReg GE x y
1603       MO_F_Lt w -> condFltReg LTT x y
1604       MO_F_Le w -> condFltReg LE x y
1605
1606       MO_F_Add  w  -> trivialFCode w FADD x y
1607       MO_F_Sub  w  -> trivialFCode w FSUB x y
1608       MO_F_Mul  w  -> trivialFCode w FMUL x y
1609       MO_F_Quot w  -> trivialFCode w FDIV x y
1610
1611       MO_And rep   -> trivialCode rep (AND False) x y
1612       MO_Or rep    -> trivialCode rep (OR  False) x y
1613       MO_Xor rep   -> trivialCode rep (XOR False) x y
1614
1615       MO_Mul rep -> trivialCode rep (SMUL False) x y
1616
1617       MO_Shl rep   -> trivialCode rep SLL  x y
1618       MO_U_Shr rep   -> trivialCode rep SRL x y
1619       MO_S_Shr rep   -> trivialCode rep SRA x y
1620
1621 {-
1622       MO_F32_Pwr  -> getRegister (StCall (Left (fsLit "pow")) CCallConv FF64
1623                                          [promote x, promote y])
1624                        where promote x = CmmMachOp MO_F32_to_Dbl [x]
1625       MO_F64_Pwr -> getRegister (StCall (Left (fsLit "pow")) CCallConv FF64
1626                                         [x, y])
1627 -}
1628       other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
1629   where
1630     -- idiv fn x y = getRegister (StCall (Left fn) CCallConv II32 [x, y])
1631
1632
1633     -- | Generate an integer division instruction.
1634     idiv :: Bool -> Bool -> CmmExpr -> CmmExpr -> NatM Register
1635
1636     -- For unsigned division with a 32 bit numerator,
1637     --          we can just clear the Y register.
1638     idiv False cc x y = do
1639         (a_reg, a_code)         <- getSomeReg x
1640         (b_reg, b_code)         <- getSomeReg y
1641
1642         let code dst
1643                 =       a_code
1644                 `appOL` b_code
1645                 `appOL` toOL
1646                         [ WRY  g0 g0
1647                         , UDIV cc a_reg (RIReg b_reg) dst]
1648
1649         return (Any II32 code)
1650
1651
1652     -- For _signed_ division with a 32 bit numerator,
1653     --          we have to sign extend the numerator into the Y register.
1654     idiv True cc x y = do
1655         (a_reg, a_code)         <- getSomeReg x
1656         (b_reg, b_code)         <- getSomeReg y
1657
1658         tmp                     <- getNewRegNat II32
1659
1660         let code dst
1661                 =       a_code
1662                 `appOL` b_code
1663                 `appOL` toOL
1664                         [ SRA  a_reg (RIImm (ImmInt 16)) tmp            -- sign extend
1665                         , SRA  tmp   (RIImm (ImmInt 16)) tmp
1666
1667                         , WRY  tmp g0
1668                         , SDIV cc a_reg (RIReg b_reg) dst]
1669
1670         return (Any II32 code)
1671
1672
1673     -- | Do an integer remainder.
1674     --
1675     --   NOTE:  The SPARC v8 architecture manual says that integer division
1676     --          instructions _may_ generate a remainder, depending on the implementation.
1677     --          If so it is _recommended_ that the remainder is placed in the Y register.
1678     --
1679     --          The UltraSparc 2007 manual says Y is _undefined_ after division.
1680     --
1681     --          The SPARC T2 doesn't store the remainder, not sure about the others.
1682     --          It's probably best not to worry about it, and just generate our own
1683     --          remainders.
1684     --
1685     irem :: Bool -> CmmExpr -> CmmExpr -> NatM Register
1686
1687     -- For unsigned operands:
1688     --          Division is between a 64 bit numerator and a 32 bit denominator,
1689     --          so we still have to clear the Y register.
1690     irem False x y = do
1691         (a_reg, a_code) <- getSomeReg x
1692         (b_reg, b_code) <- getSomeReg y
1693
1694         tmp_reg         <- getNewRegNat II32
1695
1696         let code dst
1697                 =       a_code
1698                 `appOL` b_code
1699                 `appOL` toOL
1700                         [ WRY   g0 g0
1701                         , UDIV  False         a_reg (RIReg b_reg) tmp_reg
1702                         , UMUL  False       tmp_reg (RIReg b_reg) tmp_reg
1703                         , SUB   False False   a_reg (RIReg tmp_reg) dst]
1704
1705         return  (Any II32 code)
1706
1707
1708     -- For signed operands:
1709     --          Make sure to sign extend into the Y register, or the remainder
1710     --          will have the wrong sign when the numerator is negative.
1711     --
1712     --  TODO:   When sign extending, GCC only shifts the a_reg right by 17 bits,
1713     --          not the full 32. Not sure why this is, something to do with overflow?
1714     --          If anyone cares enough about the speed of signed remainder they
1715     --          can work it out themselves (then tell me). -- BL 2009/01/20
1716
1717     irem True x y = do
1718         (a_reg, a_code) <- getSomeReg x
1719         (b_reg, b_code) <- getSomeReg y
1720
1721         tmp1_reg        <- getNewRegNat II32
1722         tmp2_reg        <- getNewRegNat II32
1723
1724         let code dst
1725                 =       a_code
1726                 `appOL` b_code
1727                 `appOL` toOL
1728                         [ SRA   a_reg      (RIImm (ImmInt 16)) tmp1_reg -- sign extend
1729                         , SRA   tmp1_reg   (RIImm (ImmInt 16)) tmp1_reg -- sign extend
1730                         , WRY   tmp1_reg g0
1731
1732                         , SDIV  False          a_reg (RIReg b_reg)    tmp2_reg
1733                         , SMUL  False       tmp2_reg (RIReg b_reg)    tmp2_reg
1734                         , SUB   False False    a_reg (RIReg tmp2_reg) dst]
1735
1736         return (Any II32 code)
1737
1738
1739     imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
1740     imulMayOflo rep a b = do
1741          (a_reg, a_code) <- getSomeReg a
1742          (b_reg, b_code) <- getSomeReg b
1743          res_lo <- getNewRegNat II32
1744          res_hi <- getNewRegNat II32
1745          let
1746             shift_amt  = case rep of
1747                           W32 -> 31
1748                           W64 -> 63
1749                           _ -> panic "shift_amt"
1750             code dst = a_code `appOL` b_code `appOL`
1751                        toOL [
1752                            SMUL False a_reg (RIReg b_reg) res_lo,
1753                            RDY res_hi,
1754                            SRA res_lo (RIImm (ImmInt shift_amt)) res_lo,
1755                            SUB False False res_lo (RIReg res_hi) dst
1756                         ]
1757          return (Any II32 code)
1758
1759 getRegister (CmmLoad mem pk) = do
1760     Amode src code <- getAmode mem
1761     let
1762         code__2 dst     = code `snocOL` LD (cmmTypeSize pk) src dst
1763     return (Any (cmmTypeSize pk) code__2)
1764
1765 getRegister (CmmLit (CmmInt i _))
1766   | fits13Bits i
1767   = let
1768         src = ImmInt (fromInteger i)
1769         code dst = unitOL (OR False g0 (RIImm src) dst)
1770     in
1771         return (Any II32 code)
1772
1773 getRegister (CmmLit lit)
1774   = let rep = cmmLitType lit
1775         imm = litToImm lit
1776         code dst = toOL [
1777             SETHI (HI imm) dst,
1778             OR False dst (RIImm (LO imm)) dst]
1779     in return (Any II32 code)
1780
1781 #endif /* sparc_TARGET_ARCH */
1782
1783 #if powerpc_TARGET_ARCH
1784 getRegister (CmmLoad mem pk)
1785   | not (isWord64 pk)
1786   = do
1787         Amode addr addr_code <- getAmode mem
1788         let code dst = ASSERT((regClass dst == RcDouble) == isFloatType pk)
1789                        addr_code `snocOL` LD size dst addr
1790         return (Any size code)
1791           where size = cmmTypeSize pk
1792
1793 -- catch simple cases of zero- or sign-extended load
1794 getRegister (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad mem _]) = do
1795     Amode addr addr_code <- getAmode mem
1796     return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr))
1797
1798 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
1799
1800 getRegister (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad mem _]) = do
1801     Amode addr addr_code <- getAmode mem
1802     return (Any II32 (\dst -> addr_code `snocOL` LD II16 dst addr))
1803
1804 getRegister (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad mem _]) = do
1805     Amode addr addr_code <- getAmode mem
1806     return (Any II32 (\dst -> addr_code `snocOL` LA II16 dst addr))
1807
1808 getRegister (CmmMachOp mop [x]) -- unary MachOps
1809   = case mop of
1810       MO_Not rep   -> triv_ucode_int rep NOT
1811
1812       MO_F_Neg w   -> triv_ucode_float w FNEG
1813       MO_S_Neg w   -> triv_ucode_int   w NEG
1814
1815       MO_FF_Conv W64 W32 -> trivialUCode  FF32 FRSP x
1816       MO_FF_Conv W32 W64 -> conversionNop FF64 x
1817
1818       MO_FS_Conv from to -> coerceFP2Int from to x
1819       MO_SF_Conv from to -> coerceInt2FP from to x
1820
1821       MO_SS_Conv from to
1822         | from == to    -> conversionNop (intSize to) x
1823
1824         -- narrowing is a nop: we treat the high bits as undefined
1825       MO_SS_Conv W32 to -> conversionNop (intSize to) x
1826       MO_SS_Conv W16 W8 -> conversionNop II8 x
1827       MO_SS_Conv W8  to -> triv_ucode_int to (EXTS II8)
1828       MO_SS_Conv W16 to -> triv_ucode_int to (EXTS II16)
1829
1830       MO_UU_Conv from to
1831         | from == to -> conversionNop (intSize to) x
1832         -- narrowing is a nop: we treat the high bits as undefined
1833       MO_UU_Conv W32 to -> conversionNop (intSize to) x
1834       MO_UU_Conv W16 W8 -> conversionNop II8 x
1835       MO_UU_Conv W8 to  -> trivialCode to False AND x (CmmLit (CmmInt 255 W32))
1836       MO_UU_Conv W16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 W32))
1837
1838     where
1839         triv_ucode_int   width instr = trivialUCode (intSize   width) instr x
1840         triv_ucode_float width instr = trivialUCode (floatSize width) instr x
1841
1842         conversionNop new_size expr
1843             = do e_code <- getRegister expr
1844                  return (swizzleRegisterRep e_code new_size)
1845
1846 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1847   = case mop of
1848       MO_F_Eq w -> condFltReg EQQ x y
1849       MO_F_Ne w -> condFltReg NE  x y
1850       MO_F_Gt w -> condFltReg GTT x y
1851       MO_F_Ge w -> condFltReg GE  x y
1852       MO_F_Lt w -> condFltReg LTT x y
1853       MO_F_Le w -> condFltReg LE  x y
1854
1855       MO_Eq rep -> condIntReg EQQ  (extendUExpr rep x) (extendUExpr rep y)
1856       MO_Ne rep -> condIntReg NE   (extendUExpr rep x) (extendUExpr rep y)
1857
1858       MO_S_Gt rep -> condIntReg GTT  (extendSExpr rep x) (extendSExpr rep y)
1859       MO_S_Ge rep -> condIntReg GE   (extendSExpr rep x) (extendSExpr rep y)
1860       MO_S_Lt rep -> condIntReg LTT  (extendSExpr rep x) (extendSExpr rep y)
1861       MO_S_Le rep -> condIntReg LE   (extendSExpr rep x) (extendSExpr rep y)
1862
1863       MO_U_Gt rep -> condIntReg GU   (extendUExpr rep x) (extendUExpr rep y)
1864       MO_U_Ge rep -> condIntReg GEU  (extendUExpr rep x) (extendUExpr rep y)
1865       MO_U_Lt rep -> condIntReg LU   (extendUExpr rep x) (extendUExpr rep y)
1866       MO_U_Le rep -> condIntReg LEU  (extendUExpr rep x) (extendUExpr rep y)
1867
1868       MO_F_Add w  -> triv_float w FADD
1869       MO_F_Sub w  -> triv_float w FSUB
1870       MO_F_Mul w  -> triv_float w FMUL
1871       MO_F_Quot w -> triv_float w FDIV
1872
1873          -- optimize addition with 32-bit immediate
1874          -- (needed for PIC)
1875       MO_Add W32 ->
1876         case y of
1877           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate W32 True (-imm)
1878             -> trivialCode W32 True ADD x (CmmLit $ CmmInt imm immrep)
1879           CmmLit lit
1880             -> do
1881                 (src, srcCode) <- getSomeReg x
1882                 let imm = litToImm lit
1883                     code dst = srcCode `appOL` toOL [
1884                                     ADDIS dst src (HA imm),
1885                                     ADD dst dst (RIImm (LO imm))
1886                                 ]
1887                 return (Any II32 code)
1888           _ -> trivialCode W32 True ADD x y
1889
1890       MO_Add rep -> trivialCode rep True ADD x y
1891       MO_Sub rep ->
1892         case y of    -- subfi ('substract from' with immediate) doesn't exist
1893           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
1894             -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
1895           _ -> trivialCodeNoImm' (intSize rep) SUBF y x
1896
1897       MO_Mul rep -> trivialCode rep True MULLW x y
1898
1899       MO_S_MulMayOflo W32 -> trivialCodeNoImm' II32 MULLW_MayOflo x y
1900
1901       MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= II32): not implemented"
1902       MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
1903
1904       MO_S_Quot rep -> trivialCodeNoImm' (intSize rep) DIVW (extendSExpr rep x) (extendSExpr rep y)
1905       MO_U_Quot rep -> trivialCodeNoImm' (intSize rep) DIVWU (extendUExpr rep x) (extendUExpr rep y)
1906
1907       MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1908       MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1909
1910       MO_And rep   -> trivialCode rep False AND x y
1911       MO_Or rep    -> trivialCode rep False OR x y
1912       MO_Xor rep   -> trivialCode rep False XOR x y
1913
1914       MO_Shl rep   -> trivialCode rep False SLW x y
1915       MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
1916       MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
1917   where
1918     triv_float :: Width -> (Size -> Reg -> Reg -> Reg -> Instr) -> NatM Register
1919     triv_float width instr = trivialCodeNoImm (floatSize width) instr x y
1920
1921 getRegister (CmmLit (CmmInt i rep))
1922   | Just imm <- makeImmediate rep True i
1923   = let
1924         code dst = unitOL (LI dst imm)
1925     in
1926         return (Any (intSize rep) code)
1927
1928 getRegister (CmmLit (CmmFloat f frep)) = do
1929     lbl <- getNewLabelNat
1930     dflags <- getDynFlagsNat
1931     dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
1932     Amode addr addr_code <- getAmode dynRef
1933     let size = floatSize frep
1934         code dst =
1935             LDATA ReadOnlyData  [CmmDataLabel lbl,
1936                                  CmmStaticLit (CmmFloat f frep)]
1937             `consOL` (addr_code `snocOL` LD size dst addr)
1938     return (Any size code)
1939
1940 getRegister (CmmLit lit)
1941   = let rep = cmmLitType lit
1942         imm = litToImm lit
1943         code dst = toOL [
1944               LIS dst (HA imm),
1945               ADD dst dst (RIImm (LO imm))
1946           ]
1947     in return (Any (cmmTypeSize rep) code)
1948
1949 getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
1950
1951     -- extend?Rep: wrap integer expression of type rep
1952     -- in a conversion to II32
1953 extendSExpr W32 x = x
1954 extendSExpr rep x = CmmMachOp (MO_SS_Conv rep W32) [x]
1955 extendUExpr W32 x = x
1956 extendUExpr rep x = CmmMachOp (MO_UU_Conv rep W32) [x]
1957
1958 #endif /* powerpc_TARGET_ARCH */
1959
1960
1961 -- -----------------------------------------------------------------------------
1962 --  The 'Amode' type: Memory addressing modes passed up the tree.
1963
1964 data Amode = Amode AddrMode InstrBlock
1965
1966 {-
1967 Now, given a tree (the argument to an CmmLoad) that references memory,
1968 produce a suitable addressing mode.
1969
1970 A Rule of the Game (tm) for Amodes: use of the addr bit must
1971 immediately follow use of the code part, since the code part puts
1972 values in registers which the addr then refers to.  So you can't put
1973 anything in between, lest it overwrite some of those registers.  If
1974 you need to do some other computation between the code part and use of
1975 the addr bit, first store the effective address from the amode in a
1976 temporary, then do the other computation, and then use the temporary:
1977
1978     code
1979     LEA amode, tmp
1980     ... other computation ...
1981     ... (tmp) ...
1982 -}
1983
1984 getAmode :: CmmExpr -> NatM Amode
1985 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
1986
1987 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1988
1989 #if alpha_TARGET_ARCH
1990
1991 getAmode (StPrim IntSubOp [x, StInt i])
1992   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1993     getRegister x               `thenNat` \ register ->
1994     let
1995         code = registerCode register tmp
1996         reg  = registerName register tmp
1997         off  = ImmInt (-(fromInteger i))
1998     in
1999     return (Amode (AddrRegImm reg off) code)
2000
2001 getAmode (StPrim IntAddOp [x, StInt i])
2002   = getNewRegNat PtrRep         `thenNat` \ tmp ->
2003     getRegister x               `thenNat` \ register ->
2004     let
2005         code = registerCode register tmp
2006         reg  = registerName register tmp
2007         off  = ImmInt (fromInteger i)
2008     in
2009     return (Amode (AddrRegImm reg off) code)
2010
2011 getAmode leaf
2012   | isJust imm
2013   = return (Amode (AddrImm imm__2) id)
2014   where
2015     imm = maybeImm leaf
2016     imm__2 = case imm of Just x -> x
2017
2018 getAmode other
2019   = getNewRegNat PtrRep         `thenNat` \ tmp ->
2020     getRegister other           `thenNat` \ register ->
2021     let
2022         code = registerCode register tmp
2023         reg  = registerName register tmp
2024     in
2025     return (Amode (AddrReg reg) code)
2026
2027 #endif /* alpha_TARGET_ARCH */
2028
2029 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2030
2031 #if x86_64_TARGET_ARCH
2032
2033 getAmode (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
2034                                      CmmLit displacement])
2035     = return $ Amode (ripRel (litToImm displacement)) nilOL
2036
2037 #endif
2038
2039 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2040
2041 -- This is all just ridiculous, since it carefully undoes
2042 -- what mangleIndexTree has just done.
2043 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
2044   | is32BitLit lit
2045   -- ASSERT(rep == II32)???
2046   = do (x_reg, x_code) <- getSomeReg x
2047        let off = ImmInt (-(fromInteger i))
2048        return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
2049
2050 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
2051   | is32BitLit lit
2052   -- ASSERT(rep == II32)???
2053   = do (x_reg, x_code) <- getSomeReg x
2054        let off = ImmInt (fromInteger i)
2055        return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
2056
2057 -- Turn (lit1 << n  + lit2) into  (lit2 + lit1 << n) so it will be
2058 -- recognised by the next rule.
2059 getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
2060                                   b@(CmmLit _)])
2061   = getAmode (CmmMachOp (MO_Add rep) [b,a])
2062
2063 getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
2064                                         [y, CmmLit (CmmInt shift _)]])
2065   | shift == 0 || shift == 1 || shift == 2 || shift == 3
2066   = x86_complex_amode x y shift 0
2067
2068 getAmode (CmmMachOp (MO_Add rep)
2069                 [x, CmmMachOp (MO_Add _)
2070                         [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)],
2071                          CmmLit (CmmInt offset _)]])
2072   | shift == 0 || shift == 1 || shift == 2 || shift == 3
2073   && is32BitInteger offset
2074   = x86_complex_amode x y shift offset
2075
2076 getAmode (CmmMachOp (MO_Add rep) [x,y])
2077   = x86_complex_amode x y 0 0
2078
2079 getAmode (CmmLit lit) | is32BitLit lit
2080   = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
2081
2082 getAmode expr = do
2083   (reg,code) <- getSomeReg expr
2084   return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
2085
2086
2087 x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
2088 x86_complex_amode base index shift offset
2089   = do (x_reg, x_code) <- getNonClobberedReg base
2090         -- x must be in a temp, because it has to stay live over y_code
2091         -- we could compre x_reg and y_reg and do something better here...
2092        (y_reg, y_code) <- getSomeReg index
2093        let
2094            code = x_code `appOL` y_code
2095            base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
2096        return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset)))
2097                code)
2098
2099 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
2100
2101 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2102
2103 #if sparc_TARGET_ARCH
2104
2105 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)])
2106   | fits13Bits (-i)
2107   = do
2108        (reg, code) <- getSomeReg x
2109        let
2110          off  = ImmInt (-(fromInteger i))
2111        return (Amode (AddrRegImm reg off) code)
2112
2113
2114 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)])
2115   | fits13Bits i
2116   = do
2117        (reg, code) <- getSomeReg x
2118        let
2119          off  = ImmInt (fromInteger i)
2120        return (Amode (AddrRegImm reg off) code)
2121
2122 getAmode (CmmMachOp (MO_Add rep) [x, y])
2123   = do
2124     (regX, codeX) <- getSomeReg x
2125     (regY, codeY) <- getSomeReg y
2126     let
2127         code = codeX `appOL` codeY
2128     return (Amode (AddrRegReg regX regY) code)
2129
2130 getAmode (CmmLit lit)
2131   = do
2132         let imm__2      = litToImm lit
2133         tmp1    <- getNewRegNat II32
2134         tmp2    <- getNewRegNat II32
2135
2136         let code = toOL [ SETHI (HI imm__2) tmp1
2137                         , OR    False tmp1 (RIImm (LO imm__2)) tmp2]
2138
2139         return (Amode (AddrRegReg tmp2 g0) code)
2140
2141 getAmode other
2142   = do
2143        (reg, code) <- getSomeReg other
2144        let
2145             off  = ImmInt 0
2146        return (Amode (AddrRegImm reg off) code)
2147
2148 #endif /* sparc_TARGET_ARCH */
2149
2150 #ifdef powerpc_TARGET_ARCH
2151 getAmode (CmmMachOp (MO_Sub W32) [x, CmmLit (CmmInt i _)])
2152   | Just off <- makeImmediate W32 True (-i)
2153   = do
2154         (reg, code) <- getSomeReg x
2155         return (Amode (AddrRegImm reg off) code)
2156
2157
2158 getAmode (CmmMachOp (MO_Add W32) [x, CmmLit (CmmInt i _)])
2159   | Just off <- makeImmediate W32 True i
2160   = do
2161         (reg, code) <- getSomeReg x
2162         return (Amode (AddrRegImm reg off) code)
2163
2164    -- optimize addition with 32-bit immediate
2165    -- (needed for PIC)
2166 getAmode (CmmMachOp (MO_Add W32) [x, CmmLit lit])
2167   = do
2168         tmp <- getNewRegNat II32
2169         (src, srcCode) <- getSomeReg x
2170         let imm = litToImm lit
2171             code = srcCode `snocOL` ADDIS tmp src (HA imm)
2172         return (Amode (AddrRegImm tmp (LO imm)) code)
2173
2174 getAmode (CmmLit lit)
2175   = do
2176         tmp <- getNewRegNat II32
2177         let imm = litToImm lit
2178             code = unitOL (LIS tmp (HA imm))
2179         return (Amode (AddrRegImm tmp (LO imm)) code)
2180
2181 getAmode (CmmMachOp (MO_Add W32) [x, y])
2182   = do
2183         (regX, codeX) <- getSomeReg x
2184         (regY, codeY) <- getSomeReg y
2185         return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
2186
2187 getAmode other
2188   = do
2189         (reg, code) <- getSomeReg other
2190         let
2191             off  = ImmInt 0
2192         return (Amode (AddrRegImm reg off) code)
2193 #endif /* powerpc_TARGET_ARCH */
2194
2195 -- -----------------------------------------------------------------------------
2196 -- getOperand: sometimes any operand will do.
2197
2198 -- getNonClobberedOperand: the value of the operand will remain valid across
2199 -- the computation of an arbitrary expression, unless the expression
2200 -- is computed directly into a register which the operand refers to
2201 -- (see trivialCode where this function is used for an example).
2202
2203 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2204
2205 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2206 #if x86_64_TARGET_ARCH
2207 getNonClobberedOperand (CmmLit lit)
2208   | isSuitableFloatingPointLit lit = do
2209     lbl <- getNewLabelNat
2210     let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
2211                                            CmmStaticLit lit])
2212     return (OpAddr (ripRel (ImmCLbl lbl)), code)
2213 #endif
2214 getNonClobberedOperand (CmmLit lit)
2215   | is32BitLit lit && not (isFloatType (cmmLitType lit)) =
2216     return (OpImm (litToImm lit), nilOL)
2217 getNonClobberedOperand (CmmLoad mem pk)
2218   | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
2219     Amode src mem_code <- getAmode mem
2220     (src',save_code) <-
2221         if (amodeCouldBeClobbered src)
2222                 then do
2223                    tmp <- getNewRegNat wordSize
2224                    return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
2225                            unitOL (LEA II32 (OpAddr src) (OpReg tmp)))
2226                 else
2227                    return (src, nilOL)
2228     return (OpAddr src', save_code `appOL` mem_code)
2229 getNonClobberedOperand e = do
2230     (reg, code) <- getNonClobberedReg e
2231     return (OpReg reg, code)
2232
2233 amodeCouldBeClobbered :: AddrMode -> Bool
2234 amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
2235
2236 regClobbered (RealReg rr) = isFastTrue (freeReg rr)
2237 regClobbered _ = False
2238
2239 -- getOperand: the operand is not required to remain valid across the
2240 -- computation of an arbitrary expression.
2241 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2242 #if x86_64_TARGET_ARCH
2243 getOperand (CmmLit lit)
2244   | isSuitableFloatingPointLit lit = do
2245     lbl <- getNewLabelNat
2246     let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
2247                                            CmmStaticLit lit])
2248     return (OpAddr (ripRel (ImmCLbl lbl)), code)
2249 #endif
2250 getOperand (CmmLit lit)
2251   | is32BitLit lit && not (isFloatType (cmmLitType lit)) = do
2252     return (OpImm (litToImm lit), nilOL)
2253 getOperand (CmmLoad mem pk)
2254   | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
2255     Amode src mem_code <- getAmode mem
2256     return (OpAddr src, mem_code)
2257 getOperand e = do
2258     (reg, code) <- getSomeReg e
2259     return (OpReg reg, code)
2260
2261 isOperand :: CmmExpr -> Bool
2262 isOperand (CmmLoad _ _) = True
2263 isOperand (CmmLit lit)  = is32BitLit lit
2264                           || isSuitableFloatingPointLit lit
2265 isOperand _             = False
2266
2267 -- if we want a floating-point literal as an operand, we can
2268 -- use it directly from memory.  However, if the literal is
2269 -- zero, we're better off generating it into a register using
2270 -- xor.
2271 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
2272 isSuitableFloatingPointLit _ = False
2273
2274 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
2275 getRegOrMem (CmmLoad mem pk)
2276   | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
2277     Amode src mem_code <- getAmode mem
2278     return (OpAddr src, mem_code)
2279 getRegOrMem e = do
2280     (reg, code) <- getNonClobberedReg e
2281     return (OpReg reg, code)
2282
2283 #if x86_64_TARGET_ARCH
2284 is32BitLit (CmmInt i W64) = is32BitInteger i
2285    -- assume that labels are in the range 0-2^31-1: this assumes the
2286    -- small memory model (see gcc docs, -mcmodel=small).
2287 #endif
2288 is32BitLit x = True
2289 #endif
2290
2291 is32BitInteger :: Integer -> Bool
2292 is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000
2293   where i64 = fromIntegral i :: Int64
2294   -- a CmmInt is intended to be truncated to the appropriate
2295   -- number of bits, so here we truncate it to Int64.  This is
2296   -- important because e.g. -1 as a CmmInt might be either
2297   -- -1 or 18446744073709551615.
2298
2299 -- -----------------------------------------------------------------------------
2300 --  The 'CondCode' type:  Condition codes passed up the tree.
2301
2302 data CondCode = CondCode Bool Cond InstrBlock
2303
2304 -- Set up a condition code for a conditional branch.
2305
2306 getCondCode :: CmmExpr -> NatM CondCode
2307
2308 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2309
2310 #if alpha_TARGET_ARCH
2311 getCondCode = panic "MachCode.getCondCode: not on Alphas"
2312 #endif /* alpha_TARGET_ARCH */
2313
2314 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2315
2316 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
2317 -- yes, they really do seem to want exactly the same!
2318
2319 getCondCode (CmmMachOp mop [x, y])
2320   =
2321     case mop of
2322       MO_F_Eq W32 -> condFltCode EQQ x y
2323       MO_F_Ne W32 -> condFltCode NE  x y
2324       MO_F_Gt W32 -> condFltCode GTT x y
2325       MO_F_Ge W32 -> condFltCode GE  x y
2326       MO_F_Lt W32 -> condFltCode LTT x y
2327       MO_F_Le W32 -> condFltCode LE  x y
2328
2329       MO_F_Eq W64 -> condFltCode EQQ x y
2330       MO_F_Ne W64 -> condFltCode NE  x y
2331       MO_F_Gt W64 -> condFltCode GTT x y
2332       MO_F_Ge W64 -> condFltCode GE  x y
2333       MO_F_Lt W64 -> condFltCode LTT x y
2334       MO_F_Le W64 -> condFltCode LE  x y
2335
2336       MO_Eq rep -> condIntCode EQQ  x y
2337       MO_Ne rep -> condIntCode NE   x y
2338
2339       MO_S_Gt rep -> condIntCode GTT  x y
2340       MO_S_Ge rep -> condIntCode GE   x y
2341       MO_S_Lt rep -> condIntCode LTT  x y
2342       MO_S_Le rep -> condIntCode LE   x y
2343
2344       MO_U_Gt rep -> condIntCode GU   x y
2345       MO_U_Ge rep -> condIntCode GEU  x y
2346       MO_U_Lt rep -> condIntCode LU   x y
2347       MO_U_Le rep -> condIntCode LEU  x y
2348
2349       other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y]))
2350
2351 getCondCode other =  pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
2352
2353 #elif powerpc_TARGET_ARCH
2354
2355 -- almost the same as everywhere else - but we need to
2356 -- extend small integers to 32 bit first
2357
2358 getCondCode (CmmMachOp mop [x, y])
2359   = case mop of
2360       MO_F_Eq W32 -> condFltCode EQQ x y
2361       MO_F_Ne W32 -> condFltCode NE  x y
2362       MO_F_Gt W32 -> condFltCode GTT x y
2363       MO_F_Ge W32 -> condFltCode GE  x y
2364       MO_F_Lt W32 -> condFltCode LTT x y
2365       MO_F_Le W32 -> condFltCode LE  x y
2366
2367       MO_F_Eq W64 -> condFltCode EQQ x y
2368       MO_F_Ne W64 -> condFltCode NE  x y
2369       MO_F_Gt W64 -> condFltCode GTT x y
2370       MO_F_Ge W64 -> condFltCode GE  x y
2371       MO_F_Lt W64 -> condFltCode LTT x y
2372       MO_F_Le W64 -> condFltCode LE  x y
2373
2374       MO_Eq rep -> condIntCode EQQ  (extendUExpr rep x) (extendUExpr rep y)
2375       MO_Ne rep -> condIntCode NE   (extendUExpr rep x) (extendUExpr rep y)
2376
2377       MO_S_Gt rep -> condIntCode GTT  (extendSExpr rep x) (extendSExpr rep y)
2378       MO_S_Ge rep -> condIntCode GE   (extendSExpr rep x) (extendSExpr rep y)
2379       MO_S_Lt rep -> condIntCode LTT  (extendSExpr rep x) (extendSExpr rep y)
2380       MO_S_Le rep -> condIntCode LE   (extendSExpr rep x) (extendSExpr rep y)
2381
2382       MO_U_Gt rep -> condIntCode GU   (extendUExpr rep x) (extendUExpr rep y)
2383       MO_U_Ge rep -> condIntCode GEU  (extendUExpr rep x) (extendUExpr rep y)
2384       MO_U_Lt rep -> condIntCode LU   (extendUExpr rep x) (extendUExpr rep y)
2385       MO_U_Le rep -> condIntCode LEU  (extendUExpr rep x) (extendUExpr rep y)
2386
2387       other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
2388
2389 getCondCode other =  panic "getCondCode(2)(powerpc)"
2390
2391
2392 #endif
2393
2394
2395 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
2396 -- passed back up the tree.
2397
2398 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
2399
2400 #if alpha_TARGET_ARCH
2401 condIntCode = panic "MachCode.condIntCode: not on Alphas"
2402 condFltCode = panic "MachCode.condFltCode: not on Alphas"
2403 #endif /* alpha_TARGET_ARCH */
2404
2405 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2406 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2407
2408 -- memory vs immediate
2409 condIntCode cond (CmmLoad x pk) (CmmLit lit) | is32BitLit lit = do
2410     Amode x_addr x_code <- getAmode x
2411     let
2412         imm  = litToImm lit
2413         code = x_code `snocOL`
2414                   CMP (cmmTypeSize pk) (OpImm imm) (OpAddr x_addr)
2415     --
2416     return (CondCode False cond code)
2417
2418 -- anything vs zero, using a mask
2419 -- TODO: Add some sanity checking!!!!
2420 condIntCode cond (CmmMachOp (MO_And rep) [x,o2]) (CmmLit (CmmInt 0 pk))
2421     | (CmmLit lit@(CmmInt mask pk2)) <- o2, is32BitLit lit
2422     = do
2423       (x_reg, x_code) <- getSomeReg x
2424       let
2425          code = x_code `snocOL`
2426                 TEST (intSize pk) (OpImm (ImmInteger mask)) (OpReg x_reg)
2427       --
2428       return (CondCode False cond code)
2429
2430 -- anything vs zero
2431 condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
2432     (x_reg, x_code) <- getSomeReg x
2433     let
2434         code = x_code `snocOL`
2435                   TEST (intSize pk) (OpReg x_reg) (OpReg x_reg)
2436     --
2437     return (CondCode False cond code)
2438
2439 -- anything vs operand
2440 condIntCode cond x y | isOperand y = do
2441     (x_reg, x_code) <- getNonClobberedReg x
2442     (y_op,  y_code) <- getOperand y
2443     let
2444         code = x_code `appOL` y_code `snocOL`
2445                   CMP (cmmTypeSize (cmmExprType x)) y_op (OpReg x_reg)
2446     -- in
2447     return (CondCode False cond code)
2448
2449 -- anything vs anything
2450 condIntCode cond x y = do
2451   (y_reg, y_code) <- getNonClobberedReg y
2452   (x_op, x_code) <- getRegOrMem x
2453   let
2454         code = y_code `appOL`
2455                x_code `snocOL`
2456                   CMP (cmmTypeSize (cmmExprType x)) (OpReg y_reg) x_op
2457   -- in
2458   return (CondCode False cond code)
2459 #endif
2460
2461 #if i386_TARGET_ARCH
2462 condFltCode cond x y
2463   = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
2464   (x_reg, x_code) <- getNonClobberedReg x
2465   (y_reg, y_code) <- getSomeReg y
2466   let
2467         code = x_code `appOL` y_code `snocOL`
2468                 GCMP cond x_reg y_reg
2469   -- The GCMP insn does the test and sets the zero flag if comparable
2470   -- and true.  Hence we always supply EQQ as the condition to test.
2471   return (CondCode True EQQ code)
2472 #endif /* i386_TARGET_ARCH */
2473
2474 #if x86_64_TARGET_ARCH
2475 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
2476 -- an operand, but the right must be a reg.  We can probably do better
2477 -- than this general case...
2478 condFltCode cond x y = do
2479   (x_reg, x_code) <- getNonClobberedReg x
2480   (y_op, y_code) <- getOperand y
2481   let
2482         code = x_code `appOL`
2483                y_code `snocOL`
2484                   CMP (floatSize $ cmmExprWidth x) y_op (OpReg x_reg)
2485         -- NB(1): we need to use the unsigned comparison operators on the
2486         -- result of this comparison.
2487   -- in
2488   return (CondCode True (condToUnsigned cond) code)
2489 #endif
2490
2491 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2492
2493 #if sparc_TARGET_ARCH
2494
2495 condIntCode cond x (CmmLit (CmmInt y rep))
2496   | fits13Bits y
2497   = do
2498        (src1, code) <- getSomeReg x
2499        let
2500            src2 = ImmInt (fromInteger y)
2501            code' = code `snocOL` SUB False True src1 (RIImm src2) g0
2502        return (CondCode False cond code')
2503
2504 condIntCode cond x y = do
2505     (src1, code1) <- getSomeReg x
2506     (src2, code2) <- getSomeReg y
2507     let
2508         code__2 = code1 `appOL` code2 `snocOL`
2509                   SUB False True src1 (RIReg src2) g0
2510     return (CondCode False cond code__2)
2511
2512 -----------
2513 condFltCode cond x y = do
2514     (src1, code1) <- getSomeReg x
2515     (src2, code2) <- getSomeReg y
2516     tmp <- getNewRegNat FF64
2517     let
2518         promote x = FxTOy FF32 FF64 x tmp
2519
2520         pk1   = cmmExprType x
2521         pk2   = cmmExprType y
2522
2523         code__2 =
2524                 if pk1 `cmmEqType` pk2 then
2525                     code1 `appOL` code2 `snocOL`
2526                     FCMP True (cmmTypeSize pk1) src1 src2
2527                 else if typeWidth pk1 == W32 then
2528                     code1 `snocOL` promote src1 `appOL` code2 `snocOL`
2529                     FCMP True FF64 tmp src2
2530                 else
2531                     code1 `appOL` code2 `snocOL` promote src2 `snocOL`
2532                     FCMP True FF64 src1 tmp
2533     return (CondCode True cond code__2)
2534
2535 #endif /* sparc_TARGET_ARCH */
2536
2537 #if powerpc_TARGET_ARCH
2538 --  ###FIXME: I16 and I8!
2539 condIntCode cond x (CmmLit (CmmInt y rep))
2540   | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
2541   = do
2542         (src1, code) <- getSomeReg x
2543         let
2544             code' = code `snocOL`
2545                 (if condUnsigned cond then CMPL else CMP) II32 src1 (RIImm src2)
2546         return (CondCode False cond code')
2547
2548 condIntCode cond x y = do
2549     (src1, code1) <- getSomeReg x
2550     (src2, code2) <- getSomeReg y
2551     let
2552         code' = code1 `appOL` code2 `snocOL`
2553                   (if condUnsigned cond then CMPL else CMP) II32 src1 (RIReg src2)
2554     return (CondCode False cond code')
2555
2556 condFltCode cond x y = do
2557     (src1, code1) <- getSomeReg x
2558     (src2, code2) <- getSomeReg y
2559     let
2560         code'  = code1 `appOL` code2 `snocOL` FCMP src1 src2
2561         code'' = case cond of -- twiddle CR to handle unordered case
2562                     GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
2563                     LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
2564                     _ -> code'
2565                  where
2566                     ltbit = 0 ; eqbit = 2 ; gtbit = 1
2567     return (CondCode True cond code'')
2568
2569 #endif /* powerpc_TARGET_ARCH */
2570
2571 -- -----------------------------------------------------------------------------
2572 -- Generating assignments
2573
2574 -- Assignments are really at the heart of the whole code generation
2575 -- business.  Almost all top-level nodes of any real importance are
2576 -- assignments, which correspond to loads, stores, or register
2577 -- transfers.  If we're really lucky, some of the register transfers
2578 -- will go away, because we can use the destination register to
2579 -- complete the code generation for the right hand side.  This only
2580 -- fails when the right hand side is forced into a fixed register
2581 -- (e.g. the result of a call).
2582
2583 assignMem_IntCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
2584 assignReg_IntCode :: Size -> CmmReg  -> CmmExpr -> NatM InstrBlock
2585
2586 assignMem_FltCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
2587 assignReg_FltCode :: Size -> CmmReg  -> CmmExpr -> NatM InstrBlock
2588
2589 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2590
2591 #if alpha_TARGET_ARCH
2592
2593 assignIntCode pk (CmmLoad dst _) src
2594   = getNewRegNat IntRep             `thenNat` \ tmp ->
2595     getAmode dst                    `thenNat` \ amode ->
2596     getRegister src                 `thenNat` \ register ->
2597     let
2598         code1   = amodeCode amode []
2599         dst__2  = amodeAddr amode
2600         code2   = registerCode register tmp []
2601         src__2  = registerName register tmp
2602         sz      = primRepToSize pk
2603         code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2604     in
2605     return code__2
2606
2607 assignIntCode pk dst src
2608   = getRegister dst                         `thenNat` \ register1 ->
2609     getRegister src                         `thenNat` \ register2 ->
2610     let
2611         dst__2  = registerName register1 zeroh
2612         code    = registerCode register2 dst__2
2613         src__2  = registerName register2 dst__2
2614         code__2 = if isFixed register2
2615                   then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2)
2616                   else code
2617     in
2618     return code__2
2619
2620 #endif /* alpha_TARGET_ARCH */
2621
2622 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2623
2624 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2625
2626 -- integer assignment to memory
2627
2628 -- specific case of adding/subtracting an integer to a particular address.
2629 -- ToDo: catch other cases where we can use an operation directly on a memory
2630 -- address.
2631 assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _,
2632                                                  CmmLit (CmmInt i _)])
2633    | addr == addr2, pk /= II64 || is32BitInteger i,
2634      Just instr <- check op
2635    = do Amode amode code_addr <- getAmode addr
2636         let code = code_addr `snocOL`
2637                    instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode)
2638         return code
2639    where
2640         check (MO_Add _) = Just ADD
2641         check (MO_Sub _) = Just SUB
2642         check _ = Nothing
2643         -- ToDo: more?
2644
2645 -- general case
2646 assignMem_IntCode pk addr src = do
2647     Amode addr code_addr <- getAmode addr
2648     (code_src, op_src)   <- get_op_RI src
2649     let
2650         code = code_src `appOL`
2651                code_addr `snocOL`
2652                   MOV pk op_src (OpAddr addr)
2653         -- NOTE: op_src is stable, so it will still be valid
2654         -- after code_addr.  This may involve the introduction
2655         -- of an extra MOV to a temporary register, but we hope
2656         -- the register allocator will get rid of it.
2657     --
2658     return code
2659   where
2660     get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand)   -- code, operator
2661     get_op_RI (CmmLit lit) | is32BitLit lit
2662       = return (nilOL, OpImm (litToImm lit))
2663     get_op_RI op
2664       = do (reg,code) <- getNonClobberedReg op
2665            return (code, OpReg reg)
2666
2667
2668 -- Assign; dst is a reg, rhs is mem
2669 assignReg_IntCode pk reg (CmmLoad src _) = do
2670   load_code <- intLoadCode (MOV pk) src
2671   return (load_code (getRegisterReg reg))
2672
2673 -- dst is a reg, but src could be anything
2674 assignReg_IntCode pk reg src = do
2675   code <- getAnyReg src
2676   return (code (getRegisterReg reg))
2677
2678 #endif /* i386_TARGET_ARCH */
2679
2680 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2681
2682 #if sparc_TARGET_ARCH
2683
2684 assignMem_IntCode pk addr src = do
2685     (srcReg, code) <- getSomeReg src
2686     Amode dstAddr addr_code <- getAmode addr
2687     return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2688
2689 assignReg_IntCode pk reg src = do
2690     r <- getRegister src
2691     return $ case r of
2692         Any _ code         -> code dst
2693         Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg freg) dst
2694     where
2695       dst = getRegisterReg reg
2696
2697
2698 #endif /* sparc_TARGET_ARCH */
2699
2700 #if powerpc_TARGET_ARCH
2701
2702 assignMem_IntCode pk addr src = do
2703     (srcReg, code) <- getSomeReg src
2704     Amode dstAddr addr_code <- getAmode addr
2705     return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2706
2707 -- dst is a reg, but src could be anything
2708 assignReg_IntCode pk reg src
2709     = do
2710         r <- getRegister src
2711         return $ case r of
2712             Any _ code         -> code dst
2713             Fixed _ freg fcode -> fcode `snocOL` MR dst freg
2714     where
2715         dst = getRegisterReg reg
2716
2717 #endif /* powerpc_TARGET_ARCH */
2718
2719
2720 -- -----------------------------------------------------------------------------
2721 -- Floating-point assignments
2722
2723 #if alpha_TARGET_ARCH
2724
2725 assignFltCode pk (CmmLoad dst _) src
2726   = getNewRegNat pk                 `thenNat` \ tmp ->
2727     getAmode dst                    `thenNat` \ amode ->
2728     getRegister src                         `thenNat` \ register ->
2729     let
2730         code1   = amodeCode amode []
2731         dst__2  = amodeAddr amode
2732         code2   = registerCode register tmp []
2733         src__2  = registerName register tmp
2734         sz      = primRepToSize pk
2735         code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2736     in
2737     return code__2
2738
2739 assignFltCode pk dst src
2740   = getRegister dst                         `thenNat` \ register1 ->
2741     getRegister src                         `thenNat` \ register2 ->
2742     let
2743         dst__2  = registerName register1 zeroh
2744         code    = registerCode register2 dst__2
2745         src__2  = registerName register2 dst__2
2746         code__2 = if isFixed register2
2747                   then code . mkSeqInstr (FMOV src__2 dst__2)
2748                   else code
2749     in
2750     return code__2
2751
2752 #endif /* alpha_TARGET_ARCH */
2753
2754 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2755
2756 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2757
2758 -- Floating point assignment to memory
2759 assignMem_FltCode pk addr src = do
2760   (src_reg, src_code) <- getNonClobberedReg src
2761   Amode addr addr_code <- getAmode addr
2762   let
2763         code = src_code `appOL`
2764                addr_code `snocOL`
2765                 IF_ARCH_i386(GST pk src_reg addr,
2766                              MOV pk (OpReg src_reg) (OpAddr addr))
2767   return code
2768
2769 -- Floating point assignment to a register/temporary
2770 assignReg_FltCode pk reg src = do
2771   src_code <- getAnyReg src
2772   return (src_code (getRegisterReg reg))
2773
2774 #endif /* i386_TARGET_ARCH */
2775
2776 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2777
2778 #if sparc_TARGET_ARCH
2779
2780 -- Floating point assignment to memory
2781 assignMem_FltCode pk addr src = do
2782     Amode dst__2 code1 <- getAmode addr
2783     (src__2, code2) <- getSomeReg src
2784     tmp1 <- getNewRegNat pk
2785     let
2786         pk__2   = cmmExprType src
2787         code__2 = code1 `appOL` code2 `appOL`
2788             if   sizeToWidth pk == typeWidth pk__2
2789             then unitOL (ST pk src__2 dst__2)
2790             else toOL   [ FxTOy (cmmTypeSize pk__2) pk src__2 tmp1
2791                         , ST    pk tmp1 dst__2]
2792     return code__2
2793
2794 -- Floating point assignment to a register/temporary
2795 assignReg_FltCode pk dstCmmReg srcCmmExpr = do
2796     srcRegister <- getRegister srcCmmExpr
2797     let dstReg  = getRegisterReg dstCmmReg
2798
2799     return $ case srcRegister of
2800         Any _ code                  -> code dstReg
2801         Fixed _ srcFixedReg srcCode -> srcCode `snocOL` FMOV pk srcFixedReg dstReg
2802
2803 #endif /* sparc_TARGET_ARCH */
2804
2805 #if powerpc_TARGET_ARCH
2806
2807 -- Easy, isn't it?
2808 assignMem_FltCode = assignMem_IntCode
2809 assignReg_FltCode = assignReg_IntCode
2810
2811 #endif /* powerpc_TARGET_ARCH */
2812
2813
2814 -- -----------------------------------------------------------------------------
2815 -- Generating an non-local jump
2816
2817 -- (If applicable) Do not fill the delay slots here; you will confuse the
2818 -- register allocator.
2819
2820 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
2821
2822 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2823
2824 #if alpha_TARGET_ARCH
2825
2826 genJump (CmmLabel lbl)
2827   | isAsmTemp lbl = returnInstr (BR target)
2828   | otherwise     = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0]
2829   where
2830     target = ImmCLbl lbl
2831
2832 genJump tree
2833   = getRegister tree                `thenNat` \ register ->
2834     getNewRegNat PtrRep             `thenNat` \ tmp ->
2835     let
2836         dst    = registerName register pv
2837         code   = registerCode register pv
2838         target = registerName register pv
2839     in
2840     if isFixed register then
2841         returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0]
2842     else
2843     return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0))
2844
2845 #endif /* alpha_TARGET_ARCH */
2846
2847 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2848
2849 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2850
2851 genJump (CmmLoad mem pk) = do
2852   Amode target code <- getAmode mem
2853   return (code `snocOL` JMP (OpAddr target))
2854
2855 genJump (CmmLit lit) = do
2856   return (unitOL (JMP (OpImm (litToImm lit))))
2857
2858 genJump expr = do
2859   (reg,code) <- getSomeReg expr
2860   return (code `snocOL` JMP (OpReg reg))
2861
2862 #endif /* i386_TARGET_ARCH */
2863
2864 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2865
2866 #if sparc_TARGET_ARCH
2867
2868 genJump (CmmLit (CmmLabel lbl))
2869   = return (toOL [CALL (Left target) 0 True, NOP])
2870   where
2871     target = ImmCLbl lbl
2872
2873 genJump tree
2874   = do
2875         (target, code) <- getSomeReg tree
2876         return (code `snocOL` JMP (AddrRegReg target g0)  `snocOL` NOP)
2877
2878 #endif /* sparc_TARGET_ARCH */
2879
2880 #if powerpc_TARGET_ARCH
2881 genJump (CmmLit (CmmLabel lbl))
2882   = return (unitOL $ JMP lbl)
2883
2884 genJump tree
2885   = do
2886         (target,code) <- getSomeReg tree
2887         return (code `snocOL` MTCTR target `snocOL` BCTR [])
2888 #endif /* powerpc_TARGET_ARCH */
2889
2890
2891 -- -----------------------------------------------------------------------------
2892 --  Unconditional branches
2893
2894 genBranch :: BlockId -> NatM InstrBlock
2895
2896 genBranch = return . toOL . mkBranchInstr
2897
2898 -- -----------------------------------------------------------------------------
2899 --  Conditional jumps
2900
2901 {-
2902 Conditional jumps are always to local labels, so we can use branch
2903 instructions.  We peek at the arguments to decide what kind of
2904 comparison to do.
2905
2906 ALPHA: For comparisons with 0, we're laughing, because we can just do
2907 the desired conditional branch.
2908
2909 I386: First, we have to ensure that the condition
2910 codes are set according to the supplied comparison operation.
2911
2912 SPARC: First, we have to ensure that the condition codes are set
2913 according to the supplied comparison operation.  We generate slightly
2914 different code for floating point comparisons, because a floating
2915 point operation cannot directly precede a @BF@.  We assume the worst
2916 and fill that slot with a @NOP@.
2917
2918 SPARC: Do not fill the delay slots here; you will confuse the register
2919 allocator.
2920 -}
2921
2922
2923 genCondJump
2924     :: BlockId      -- the branch target
2925     -> CmmExpr      -- the condition on which to branch
2926     -> NatM InstrBlock
2927
2928 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2929
2930 #if alpha_TARGET_ARCH
2931
2932 genCondJump id (StPrim op [x, StInt 0])
2933   = getRegister x                           `thenNat` \ register ->
2934     getNewRegNat (registerRep register)
2935                                     `thenNat` \ tmp ->
2936     let
2937         code   = registerCode register tmp
2938         value  = registerName register tmp
2939         pk     = registerRep register
2940         target = ImmCLbl lbl
2941     in
2942     returnSeq code [BI (cmpOp op) value target]
2943   where
2944     cmpOp CharGtOp = GTT
2945     cmpOp CharGeOp = GE
2946     cmpOp CharEqOp = EQQ
2947     cmpOp CharNeOp = NE
2948     cmpOp CharLtOp = LTT
2949     cmpOp CharLeOp = LE
2950     cmpOp IntGtOp = GTT
2951     cmpOp IntGeOp = GE
2952     cmpOp IntEqOp = EQQ
2953     cmpOp IntNeOp = NE
2954     cmpOp IntLtOp = LTT
2955     cmpOp IntLeOp = LE
2956     cmpOp WordGtOp = NE
2957     cmpOp WordGeOp = ALWAYS
2958     cmpOp WordEqOp = EQQ
2959     cmpOp WordNeOp = NE
2960     cmpOp WordLtOp = NEVER
2961     cmpOp WordLeOp = EQQ
2962     cmpOp AddrGtOp = NE
2963     cmpOp AddrGeOp = ALWAYS
2964     cmpOp AddrEqOp = EQQ
2965     cmpOp AddrNeOp = NE
2966     cmpOp AddrLtOp = NEVER
2967     cmpOp AddrLeOp = EQQ
2968
2969 genCondJump lbl (StPrim op [x, StDouble 0.0])
2970   = getRegister x                           `thenNat` \ register ->
2971     getNewRegNat (registerRep register)
2972                                     `thenNat` \ tmp ->
2973     let
2974         code   = registerCode register tmp
2975         value  = registerName register tmp
2976         pk     = registerRep register
2977         target = ImmCLbl lbl
2978     in
2979     return (code . mkSeqInstr (BF (cmpOp op) value target))
2980   where
2981     cmpOp FloatGtOp = GTT
2982     cmpOp FloatGeOp = GE
2983     cmpOp FloatEqOp = EQQ
2984     cmpOp FloatNeOp = NE
2985     cmpOp FloatLtOp = LTT
2986     cmpOp FloatLeOp = LE
2987     cmpOp DoubleGtOp = GTT
2988     cmpOp DoubleGeOp = GE
2989     cmpOp DoubleEqOp = EQQ
2990     cmpOp DoubleNeOp = NE
2991     cmpOp DoubleLtOp = LTT
2992     cmpOp DoubleLeOp = LE
2993
2994 genCondJump lbl (StPrim op [x, y])
2995   | fltCmpOp op
2996   = trivialFCode pr instr x y       `thenNat` \ register ->
2997     getNewRegNat FF64               `thenNat` \ tmp ->
2998     let
2999         code   = registerCode register tmp
3000         result = registerName register tmp
3001         target = ImmCLbl lbl
3002     in
3003     return (code . mkSeqInstr (BF cond result target))
3004   where
3005     pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
3006
3007     fltCmpOp op = case op of
3008         FloatGtOp -> True
3009         FloatGeOp -> True
3010         FloatEqOp -> True
3011         FloatNeOp -> True
3012         FloatLtOp -> True
3013         FloatLeOp -> True
3014         DoubleGtOp -> True
3015         DoubleGeOp -> True
3016         DoubleEqOp -> True
3017         DoubleNeOp -> True
3018         DoubleLtOp -> True
3019         DoubleLeOp -> True
3020         _ -> False
3021     (instr, cond) = case op of
3022         FloatGtOp -> (FCMP TF LE, EQQ)
3023         FloatGeOp -> (FCMP TF LTT, EQQ)
3024         FloatEqOp -> (FCMP TF EQQ, NE)
3025         FloatNeOp -> (FCMP TF EQQ, EQQ)
3026         FloatLtOp -> (FCMP TF LTT, NE)
3027         FloatLeOp -> (FCMP TF LE, NE)
3028         DoubleGtOp -> (FCMP TF LE, EQQ)
3029         DoubleGeOp -> (FCMP TF LTT, EQQ)
3030         DoubleEqOp -> (FCMP TF EQQ, NE)
3031         DoubleNeOp -> (FCMP TF EQQ, EQQ)
3032         DoubleLtOp -> (FCMP TF LTT, NE)
3033         DoubleLeOp -> (FCMP TF LE, NE)
3034
3035 genCondJump lbl (StPrim op [x, y])
3036   = trivialCode instr x y           `thenNat` \ register ->
3037     getNewRegNat IntRep             `thenNat` \ tmp ->
3038     let
3039         code   = registerCode register tmp
3040         result = registerName register tmp
3041         target = ImmCLbl lbl
3042     in
3043     return (code . mkSeqInstr (BI cond result target))
3044   where
3045     (instr, cond) = case op of
3046         CharGtOp -> (CMP LE, EQQ)
3047         CharGeOp -> (CMP LTT, EQQ)
3048         CharEqOp -> (CMP EQQ, NE)
3049         CharNeOp -> (CMP EQQ, EQQ)
3050         CharLtOp -> (CMP LTT, NE)
3051         CharLeOp -> (CMP LE, NE)
3052         IntGtOp -> (CMP LE, EQQ)
3053         IntGeOp -> (CMP LTT, EQQ)
3054         IntEqOp -> (CMP EQQ, NE)
3055         IntNeOp -> (CMP EQQ, EQQ)
3056         IntLtOp -> (CMP LTT, NE)
3057         IntLeOp -> (CMP LE, NE)
3058         WordGtOp -> (CMP ULE, EQQ)
3059         WordGeOp -> (CMP ULT, EQQ)
3060         WordEqOp -> (CMP EQQ, NE)
3061         WordNeOp -> (CMP EQQ, EQQ)
3062         WordLtOp -> (CMP ULT, NE)
3063         WordLeOp -> (CMP ULE, NE)
3064         AddrGtOp -> (CMP ULE, EQQ)
3065         AddrGeOp -> (CMP ULT, EQQ)
3066         AddrEqOp -> (CMP EQQ, NE)
3067         AddrNeOp -> (CMP EQQ, EQQ)
3068         AddrLtOp -> (CMP ULT, NE)
3069         AddrLeOp -> (CMP ULE, NE)
3070
3071 #endif /* alpha_TARGET_ARCH */
3072
3073 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3074
3075 #if i386_TARGET_ARCH
3076
3077 genCondJump id bool = do
3078   CondCode _ cond code <- getCondCode bool
3079   return (code `snocOL` JXX cond id)
3080
3081 #endif
3082
3083 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3084
3085 #if x86_64_TARGET_ARCH
3086
3087 genCondJump id bool = do
3088   CondCode is_float cond cond_code <- getCondCode bool
3089   if not is_float
3090     then
3091         return (cond_code `snocOL` JXX cond id)
3092     else do
3093         lbl <- getBlockIdNat
3094
3095         -- see comment with condFltReg
3096         let code = case cond of
3097                         NE  -> or_unordered
3098                         GU  -> plain_test
3099                         GEU -> plain_test
3100                         _   -> and_ordered
3101
3102             plain_test = unitOL (
3103                   JXX cond id
3104                 )
3105             or_unordered = toOL [
3106                   JXX cond id,
3107                   JXX PARITY id
3108                 ]
3109             and_ordered = toOL [
3110                   JXX PARITY lbl,
3111                   JXX cond id,
3112                   JXX ALWAYS lbl,
3113                   NEWBLOCK lbl
3114                 ]
3115         return (cond_code `appOL` code)
3116
3117 #endif
3118
3119 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3120
3121 #if sparc_TARGET_ARCH
3122
3123 genCondJump bid bool = do
3124   CondCode is_float cond code <- getCondCode bool
3125   return (
3126        code `appOL`
3127        toOL (
3128          if   is_float
3129          then [NOP, BF cond False bid, NOP]
3130          else [BI cond False bid, NOP]
3131        )
3132     )
3133
3134 #endif /* sparc_TARGET_ARCH */
3135
3136
3137 #if powerpc_TARGET_ARCH
3138
3139 genCondJump id bool = do
3140   CondCode is_float cond code <- getCondCode bool
3141   return (code `snocOL` BCC cond id)
3142
3143 #endif /* powerpc_TARGET_ARCH */
3144
3145
3146 -- -----------------------------------------------------------------------------
3147 --  Generating C calls
3148
3149 -- Now the biggest nightmare---calls.  Most of the nastiness is buried in
3150 -- @get_arg@, which moves the arguments to the correct registers/stack
3151 -- locations.  Apart from that, the code is easy.
3152 --
3153 -- (If applicable) Do not fill the delay slots here; you will confuse the
3154 -- register allocator.
3155
3156 genCCall
3157     :: CmmCallTarget            -- function to call
3158     -> HintedCmmFormals         -- where to put the result
3159     -> HintedCmmActuals         -- arguments (of mixed type)
3160     -> NatM InstrBlock
3161
3162 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3163
3164 #if alpha_TARGET_ARCH
3165
3166 ccallResultRegs =
3167
3168 genCCall fn cconv result_regs args
3169   = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args
3170                           `thenNat` \ ((unused,_), argCode) ->
3171     let
3172         nRegs = length allArgRegs - length unused
3173         code = asmSeqThen (map ($ []) argCode)
3174     in
3175         returnSeq code [
3176             LDA pv (AddrImm (ImmLab (ptext fn))),
3177             JSR ra (AddrReg pv) nRegs,
3178             LDGP gp (AddrReg ra)]
3179   where
3180     ------------------------
3181     {-  Try to get a value into a specific register (or registers) for
3182         a call.  The first 6 arguments go into the appropriate
3183         argument register (separate registers for integer and floating
3184         point arguments, but used in lock-step), and the remaining
3185         arguments are dumped to the stack, beginning at 0(sp).  Our
3186         first argument is a pair of the list of remaining argument
3187         registers to be assigned for this call and the next stack
3188         offset to use for overflowing arguments.  This way,
3189         @get_Arg@ can be applied to all of a call's arguments using
3190         @mapAccumLNat@.
3191     -}
3192     get_arg
3193         :: ([(Reg,Reg)], Int)   -- Argument registers and stack offset (accumulator)
3194         -> StixTree             -- Current argument
3195         -> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code
3196
3197     -- We have to use up all of our argument registers first...
3198
3199     get_arg ((iDst,fDst):dsts, offset) arg
3200       = getRegister arg                     `thenNat` \ register ->
3201         let
3202             reg  = if isFloatType pk then fDst else iDst
3203             code = registerCode register reg
3204             src  = registerName register reg
3205             pk   = registerRep register
3206         in
3207         return (
3208             if isFloatType pk then
3209                 ((dsts, offset), if isFixed register then
3210                     code . mkSeqInstr (FMOV src fDst)
3211                     else code)
3212             else
3213                 ((dsts, offset), if isFixed register then
3214                     code . mkSeqInstr (OR src (RIReg src) iDst)
3215                     else code))
3216
3217     -- Once we have run out of argument registers, we move to the
3218     -- stack...
3219
3220     get_arg ([], offset) arg
3221       = getRegister arg                 `thenNat` \ register ->
3222         getNewRegNat (registerRep register)
3223                                         `thenNat` \ tmp ->
3224         let
3225             code = registerCode register tmp
3226             src  = registerName register tmp
3227             pk   = registerRep register
3228             sz   = primRepToSize pk
3229         in
3230         return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset)))
3231
3232 #endif /* alpha_TARGET_ARCH */
3233
3234 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3235
3236 #if i386_TARGET_ARCH
3237
3238 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
3239         -- write barrier compiles to no code on x86/x86-64;
3240         -- we keep it this long in order to prevent earlier optimisations.
3241
3242 -- we only cope with a single result for foreign calls
3243 genCCall (CmmPrim op) [CmmHinted r _] args = do
3244   l1 <- getNewLabelNat
3245   l2 <- getNewLabelNat
3246   case op of
3247         MO_F32_Sqrt -> actuallyInlineFloatOp GSQRT FF32 args
3248         MO_F64_Sqrt -> actuallyInlineFloatOp GSQRT FF64 args
3249
3250         MO_F32_Sin  -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF32 args
3251         MO_F64_Sin  -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF64 args
3252
3253         MO_F32_Cos  -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF32 args
3254         MO_F64_Cos  -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF64 args
3255
3256         MO_F32_Tan  -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF32 args
3257         MO_F64_Tan  -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF64 args
3258
3259         other_op    -> outOfLineFloatOp op r args
3260  where
3261   actuallyInlineFloatOp instr size [CmmHinted x _]
3262         = do res <- trivialUFCode size (instr size) x
3263              any <- anyReg res
3264              return (any (getRegisterReg (CmmLocal r)))
3265
3266 genCCall target dest_regs args = do
3267     let
3268         sizes               = map (arg_size . cmmExprType . hintlessCmm) (reverse args)
3269 #if !darwin_TARGET_OS
3270         tot_arg_size        = sum sizes
3271 #else
3272         raw_arg_size        = sum sizes
3273         tot_arg_size        = roundTo 16 raw_arg_size
3274         arg_pad_size        = tot_arg_size - raw_arg_size
3275     delta0 <- getDeltaNat
3276     setDeltaNat (delta0 - arg_pad_size)
3277 #endif
3278
3279     push_codes <- mapM push_arg (reverse args)
3280     delta <- getDeltaNat
3281
3282     -- in
3283     -- deal with static vs dynamic call targets
3284     (callinsns,cconv) <-
3285       case target of
3286         -- CmmPrim -> ...
3287         CmmCallee (CmmLit (CmmLabel lbl)) conv
3288            -> -- ToDo: stdcall arg sizes
3289               return (unitOL (CALL (Left fn_imm) []), conv)
3290            where fn_imm = ImmCLbl lbl
3291         CmmCallee expr conv
3292            -> do { (dyn_c, dyn_r) <- get_op expr
3293                  ; ASSERT( isWord32 (cmmExprType expr) )
3294                    return (dyn_c `snocOL` CALL (Right dyn_r) [], conv) }
3295
3296     let push_code
3297 #if darwin_TARGET_OS
3298             | arg_pad_size /= 0
3299             = toOL [SUB II32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
3300                     DELTA (delta0 - arg_pad_size)]
3301               `appOL` concatOL push_codes
3302             | otherwise
3303 #endif
3304             = concatOL push_codes
3305         call = callinsns `appOL`
3306                toOL (
3307                         -- Deallocate parameters after call for ccall;
3308                         -- but not for stdcall (callee does it)
3309                   (if cconv == StdCallConv || tot_arg_size==0 then [] else
3310                    [ADD II32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
3311                   ++
3312                   [DELTA (delta + tot_arg_size)]
3313                )
3314     -- in
3315     setDeltaNat (delta + tot_arg_size)
3316
3317     let
3318         -- assign the results, if necessary
3319         assign_code []     = nilOL
3320         assign_code [CmmHinted dest _hint]
3321           | isFloatType ty = unitOL (GMOV fake0 r_dest)
3322           | isWord64 ty    = toOL [MOV II32 (OpReg eax) (OpReg r_dest),
3323                                     MOV II32 (OpReg edx) (OpReg r_dest_hi)]
3324           | otherwise      = unitOL (MOV (intSize w) (OpReg eax) (OpReg r_dest))
3325           where
3326                 ty = localRegType dest
3327                 w  = typeWidth ty
3328                 r_dest_hi = getHiVRegFromLo r_dest
3329                 r_dest    = getRegisterReg (CmmLocal dest)
3330         assign_code many = panic "genCCall.assign_code many"
3331
3332     return (push_code `appOL`
3333             call `appOL`
3334             assign_code dest_regs)
3335
3336   where
3337     arg_size :: CmmType -> Int  -- Width in bytes
3338     arg_size ty = widthInBytes (typeWidth ty)
3339
3340     roundTo a x | x `mod` a == 0 = x
3341                 | otherwise = x + a - (x `mod` a)
3342
3343
3344     push_arg :: HintedCmmActual {-current argument-}
3345                     -> NatM InstrBlock  -- code
3346
3347     push_arg (CmmHinted arg _hint) -- we don't need the hints on x86
3348       | isWord64 arg_ty = do
3349         ChildCode64 code r_lo <- iselExpr64 arg
3350         delta <- getDeltaNat
3351         setDeltaNat (delta - 8)
3352         let
3353             r_hi = getHiVRegFromLo r_lo
3354         -- in
3355         return (       code `appOL`
3356                        toOL [PUSH II32 (OpReg r_hi), DELTA (delta - 4),
3357                              PUSH II32 (OpReg r_lo), DELTA (delta - 8),
3358                              DELTA (delta-8)]
3359             )
3360
3361       | otherwise = do
3362         (code, reg) <- get_op arg
3363         delta <- getDeltaNat
3364         let size = arg_size arg_ty      -- Byte size
3365         setDeltaNat (delta-size)
3366         if (isFloatType arg_ty)
3367            then return (code `appOL`
3368                         toOL [SUB II32 (OpImm (ImmInt size)) (OpReg esp),
3369                               DELTA (delta-size),
3370                               GST (floatSize (typeWidth arg_ty))
3371                                   reg (AddrBaseIndex (EABaseReg esp)
3372                                                         EAIndexNone
3373                                                         (ImmInt 0))]
3374                        )
3375            else return (code `snocOL`
3376                         PUSH II32 (OpReg reg) `snocOL`
3377                         DELTA (delta-size)
3378                        )
3379       where
3380          arg_ty = cmmExprType arg
3381
3382     ------------
3383     get_op :: CmmExpr -> NatM (InstrBlock, Reg) -- code, reg
3384     get_op op = do
3385         (reg,code) <- getSomeReg op
3386         return (code, reg)
3387
3388 #endif /* i386_TARGET_ARCH */
3389
3390 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3391
3392 outOfLineFloatOp :: CallishMachOp -> CmmFormal -> HintedCmmActuals
3393   -> NatM InstrBlock
3394 outOfLineFloatOp mop res args
3395   = do
3396       dflags <- getDynFlagsNat
3397       targetExpr <- cmmMakeDynamicReference dflags addImportNat CallReference lbl
3398       let target = CmmCallee targetExpr CCallConv
3399
3400       if isFloat64 (localRegType res)
3401         then
3402           stmtToInstrs (CmmCall target [CmmHinted res NoHint] args CmmUnsafe CmmMayReturn)
3403         else do
3404           uq <- getUniqueNat
3405           let
3406             tmp = LocalReg uq f64
3407           -- in
3408           code1 <- stmtToInstrs (CmmCall target [CmmHinted tmp NoHint] args CmmUnsafe CmmMayReturn)
3409           code2 <- stmtToInstrs (CmmAssign (CmmLocal res) (CmmReg (CmmLocal tmp)))
3410           return (code1 `appOL` code2)
3411   where
3412         lbl = mkForeignLabel fn Nothing False
3413
3414         fn = case mop of
3415               MO_F32_Sqrt  -> fsLit "sqrtf"
3416               MO_F32_Sin   -> fsLit "sinf"
3417               MO_F32_Cos   -> fsLit "cosf"
3418               MO_F32_Tan   -> fsLit "tanf"
3419               MO_F32_Exp   -> fsLit "expf"
3420               MO_F32_Log   -> fsLit "logf"
3421
3422               MO_F32_Asin  -> fsLit "asinf"
3423               MO_F32_Acos  -> fsLit "acosf"
3424               MO_F32_Atan  -> fsLit "atanf"
3425
3426               MO_F32_Sinh  -> fsLit "sinhf"
3427               MO_F32_Cosh  -> fsLit "coshf"
3428               MO_F32_Tanh  -> fsLit "tanhf"
3429               MO_F32_Pwr   -> fsLit "powf"
3430
3431               MO_F64_Sqrt  -> fsLit "sqrt"
3432               MO_F64_Sin   -> fsLit "sin"
3433               MO_F64_Cos   -> fsLit "cos"
3434               MO_F64_Tan   -> fsLit "tan"
3435               MO_F64_Exp   -> fsLit "exp"
3436               MO_F64_Log   -> fsLit "log"
3437
3438               MO_F64_Asin  -> fsLit "asin"
3439               MO_F64_Acos  -> fsLit "acos"
3440               MO_F64_Atan  -> fsLit "atan"
3441
3442               MO_F64_Sinh  -> fsLit "sinh"
3443               MO_F64_Cosh  -> fsLit "cosh"
3444               MO_F64_Tanh  -> fsLit "tanh"
3445               MO_F64_Pwr   -> fsLit "pow"
3446
3447 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
3448
3449 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3450
3451 #if x86_64_TARGET_ARCH
3452
3453 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
3454         -- write barrier compiles to no code on x86/x86-64;
3455         -- we keep it this long in order to prevent earlier optimisations.
3456
3457
3458 genCCall (CmmPrim op) [CmmHinted r _] args =
3459   outOfLineFloatOp op r args
3460
3461 genCCall target dest_regs args = do
3462
3463         -- load up the register arguments
3464     (stack_args, aregs, fregs, load_args_code)
3465          <- load_args args allArgRegs allFPArgRegs nilOL
3466
3467     let
3468         fp_regs_used  = reverse (drop (length fregs) (reverse allFPArgRegs))
3469         int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
3470         arg_regs = [eax] ++ int_regs_used ++ fp_regs_used
3471                 -- for annotating the call instruction with
3472
3473         sse_regs = length fp_regs_used
3474
3475         tot_arg_size = arg_size * length stack_args
3476
3477         -- On entry to the called function, %rsp should be aligned
3478         -- on a 16-byte boundary +8 (i.e. the first stack arg after
3479         -- the return address is 16-byte aligned).  In STG land
3480         -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
3481         -- need to make sure we push a multiple of 16-bytes of args,
3482         -- plus the return address, to get the correct alignment.
3483         -- Urg, this is hard.  We need to feed the delta back into
3484         -- the arg pushing code.
3485     (real_size, adjust_rsp) <-
3486         if tot_arg_size `rem` 16 == 0
3487             then return (tot_arg_size, nilOL)
3488             else do -- we need to adjust...
3489                 delta <- getDeltaNat
3490                 setDeltaNat (delta-8)
3491                 return (tot_arg_size+8, toOL [
3492                                 SUB II64 (OpImm (ImmInt 8)) (OpReg rsp),
3493                                 DELTA (delta-8)
3494                         ])
3495
3496         -- push the stack args, right to left
3497     push_code <- push_args (reverse stack_args) nilOL
3498     delta <- getDeltaNat
3499
3500     -- deal with static vs dynamic call targets
3501     (callinsns,cconv) <-
3502       case target of
3503         -- CmmPrim -> ...
3504         CmmCallee (CmmLit (CmmLabel lbl)) conv
3505            -> -- ToDo: stdcall arg sizes
3506               return (unitOL (CALL (Left fn_imm) arg_regs), conv)
3507            where fn_imm = ImmCLbl lbl
3508         CmmCallee expr conv
3509            -> do (dyn_r, dyn_c) <- getSomeReg expr
3510                  return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
3511
3512     let
3513         -- The x86_64 ABI requires us to set %al to the number of SSE
3514         -- registers that contain arguments, if the called routine
3515         -- is a varargs function.  We don't know whether it's a
3516         -- varargs function or not, so we have to assume it is.
3517         --
3518         -- It's not safe to omit this assignment, even if the number
3519         -- of SSE regs in use is zero.  If %al is larger than 8
3520         -- on entry to a varargs function, seg faults ensue.
3521         assign_eax n = unitOL (MOV II32 (OpImm (ImmInt n)) (OpReg eax))
3522
3523     let call = callinsns `appOL`
3524                toOL (
3525                         -- Deallocate parameters after call for ccall;
3526                         -- but not for stdcall (callee does it)
3527                   (if cconv == StdCallConv || real_size==0 then [] else
3528                    [ADD (intSize wordWidth) (OpImm (ImmInt real_size)) (OpReg esp)])
3529                   ++
3530                   [DELTA (delta + real_size)]
3531                )
3532     -- in
3533     setDeltaNat (delta + real_size)
3534
3535     let
3536         -- assign the results, if necessary
3537         assign_code []     = nilOL
3538         assign_code [CmmHinted dest _hint] =
3539           case typeWidth rep of
3540                 W32 | isFloatType rep -> unitOL (MOV (floatSize W32) (OpReg xmm0) (OpReg r_dest))
3541                 W64 | isFloatType rep -> unitOL (MOV (floatSize W32) (OpReg xmm0) (OpReg r_dest))
3542                 _ -> unitOL (MOV (cmmTypeSize rep) (OpReg rax) (OpReg r_dest))
3543           where
3544                 rep = localRegType dest
3545                 r_dest = getRegisterReg (CmmLocal dest)
3546         assign_code many = panic "genCCall.assign_code many"
3547
3548     return (load_args_code      `appOL`
3549             adjust_rsp          `appOL`
3550             push_code           `appOL`
3551             assign_eax sse_regs `appOL`
3552             call                `appOL`
3553             assign_code dest_regs)
3554
3555   where
3556     arg_size = 8 -- always, at the mo
3557
3558     load_args :: [CmmHinted CmmExpr]
3559               -> [Reg]                  -- int regs avail for args
3560               -> [Reg]                  -- FP regs avail for args
3561               -> InstrBlock
3562               -> NatM ([CmmHinted CmmExpr],[Reg],[Reg],InstrBlock)
3563     load_args args [] [] code     =  return (args, [], [], code)
3564         -- no more regs to use
3565     load_args [] aregs fregs code =  return ([], aregs, fregs, code)
3566         -- no more args to push
3567     load_args ((CmmHinted arg hint) : rest) aregs fregs code
3568         | isFloatType arg_rep =
3569         case fregs of
3570           [] -> push_this_arg
3571           (r:rs) -> do
3572              arg_code <- getAnyReg arg
3573              load_args rest aregs rs (code `appOL` arg_code r)
3574         | otherwise =
3575         case aregs of
3576           [] -> push_this_arg
3577           (r:rs) -> do
3578              arg_code <- getAnyReg arg
3579              load_args rest rs fregs (code `appOL` arg_code r)
3580         where
3581           arg_rep = cmmExprType arg
3582
3583           push_this_arg = do
3584             (args',ars,frs,code') <- load_args rest aregs fregs code
3585             return ((CmmHinted arg hint):args', ars, frs, code')
3586
3587     push_args [] code = return code
3588     push_args ((CmmHinted arg hint):rest) code
3589        | isFloatType arg_rep = do
3590          (arg_reg, arg_code) <- getSomeReg arg
3591          delta <- getDeltaNat
3592          setDeltaNat (delta-arg_size)
3593          let code' = code `appOL` arg_code `appOL` toOL [
3594                         SUB (intSize wordWidth) (OpImm (ImmInt arg_size)) (OpReg rsp) ,
3595                         DELTA (delta-arg_size),
3596                         MOV (floatSize width) (OpReg arg_reg) (OpAddr  (spRel 0))]
3597          push_args rest code'
3598
3599        | otherwise = do
3600        -- we only ever generate word-sized function arguments.  Promotion
3601        -- has already happened: our Int8# type is kept sign-extended
3602        -- in an Int#, for example.
3603          ASSERT(width == W64) return ()
3604          (arg_op, arg_code) <- getOperand arg
3605          delta <- getDeltaNat
3606          setDeltaNat (delta-arg_size)
3607          let code' = code `appOL` arg_code `appOL` toOL [
3608                         PUSH II64 arg_op,
3609                         DELTA (delta-arg_size)]
3610          push_args rest code'
3611         where
3612           arg_rep = cmmExprType arg
3613           width = typeWidth arg_rep
3614 #endif
3615
3616 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3617
3618 #if sparc_TARGET_ARCH
3619 {-
3620    The SPARC calling convention is an absolute
3621    nightmare.  The first 6x32 bits of arguments are mapped into
3622    %o0 through %o5, and the remaining arguments are dumped to the
3623    stack, beginning at [%sp+92].  (Note that %o6 == %sp.)
3624
3625    If we have to put args on the stack, move %o6==%sp down by
3626    the number of words to go on the stack, to ensure there's enough space.
3627
3628    According to Fraser and Hanson's lcc book, page 478, fig 17.2,
3629    16 words above the stack pointer is a word for the address of
3630    a structure return value.  I use this as a temporary location
3631    for moving values from float to int regs.  Certainly it isn't
3632    safe to put anything in the 16 words starting at %sp, since
3633    this area can get trashed at any time due to window overflows
3634    caused by signal handlers.
3635
3636    A final complication (if the above isn't enough) is that
3637    we can't blithely calculate the arguments one by one into
3638    %o0 .. %o5.  Consider the following nested calls:
3639
3640        fff a (fff b c)
3641
3642    Naive code moves a into %o0, and (fff b c) into %o1.  Unfortunately
3643    the inner call will itself use %o0, which trashes the value put there
3644    in preparation for the outer call.  Upshot: we need to calculate the
3645    args into temporary regs, and move those to arg regs or onto the
3646    stack only immediately prior to the call proper.  Sigh.
3647
3648 genCCall
3649     :: CmmCallTarget            -- function to call
3650     -> HintedCmmFormals         -- where to put the result
3651     -> HintedCmmActuals         -- arguments (of mixed type)
3652     -> NatM InstrBlock
3653
3654 -}
3655
3656
3657 -- On SPARC under TSO (Total Store Ordering), writes earlier in the instruction stream
3658 -- are guaranteed to take place before writes afterwards (unlike on PowerPC).
3659 -- Ref: Section 8.4 of the SPARC V9 Architecture manual.
3660 --
3661 -- In the SPARC case we don't need a barrier.
3662 --
3663 genCCall (CmmPrim (MO_WriteBarrier)) _ _
3664  = do   return nilOL
3665
3666 genCCall target dest_regs argsAndHints
3667  = do
3668         -- strip hints from the arg regs
3669         let args :: [CmmExpr]
3670             args  = map hintlessCmm argsAndHints
3671
3672
3673         -- work out the arguments, and assign them to integer regs
3674         argcode_and_vregs       <- mapM arg_to_int_vregs args
3675         let (argcodes, vregss)  = unzip argcode_and_vregs
3676         let vregs               = concat vregss
3677
3678         let n_argRegs           = length allArgRegs
3679         let n_argRegs_used      = min (length vregs) n_argRegs
3680
3681
3682         -- deal with static vs dynamic call targets
3683         callinsns <- case target of
3684                 CmmCallee (CmmLit (CmmLabel lbl)) conv ->
3685                         return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3686
3687                 CmmCallee expr conv
3688                  -> do  (dyn_c, [dyn_r]) <- arg_to_int_vregs expr
3689                         return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3690
3691                 CmmPrim mop
3692                  -> do  res     <- outOfLineFloatOp mop
3693                         lblOrMopExpr <- case res of
3694                                 Left lbl -> do
3695                                         return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3696
3697                                 Right mopExpr -> do
3698                                         (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
3699                                         return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3700
3701                         return lblOrMopExpr
3702
3703         let argcode = concatOL argcodes
3704
3705         let (move_sp_down, move_sp_up)
3706                    = let diff = length vregs - n_argRegs
3707                          nn   = if odd diff then diff + 1 else diff -- keep 8-byte alignment
3708                      in  if   nn <= 0
3709                          then (nilOL, nilOL)
3710                          else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
3711
3712         let transfer_code
3713                 = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
3714
3715         return
3716          $      argcode                 `appOL`
3717                 move_sp_down            `appOL`
3718                 transfer_code           `appOL`
3719                 callinsns               `appOL`
3720                 unitOL NOP              `appOL`
3721                 move_sp_up              `appOL`
3722                 assign_code dest_regs
3723
3724
3725 -- | Generate code to calculate an argument, and move it into one
3726 --      or two integer vregs.
3727 arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
3728 arg_to_int_vregs arg
3729
3730         -- If the expr produces a 64 bit int, then we can just use iselExpr64
3731         | isWord64 (cmmExprType arg)
3732         = do    (ChildCode64 code r_lo) <- iselExpr64 arg
3733                 let r_hi                = getHiVRegFromLo r_lo
3734                 return (code, [r_hi, r_lo])
3735
3736         | otherwise
3737         = do    (src, code)     <- getSomeReg arg
3738                 tmp             <- getNewRegNat (cmmTypeSize $ cmmExprType arg)
3739                 let pk          = cmmExprType arg
3740
3741                 case cmmTypeSize pk of
3742
3743                  -- Load a 64 bit float return value into two integer regs.
3744                  FF64 -> do
3745                         v1 <- getNewRegNat II32
3746                         v2 <- getNewRegNat II32
3747
3748                         let Just f0_high = fPair f0
3749
3750                         let code2 =
3751                                 code                            `snocOL`
3752                                 FMOV FF64 src f0                `snocOL`
3753                                 ST   FF32  f0 (spRel 16)        `snocOL`
3754                                 LD   II32  (spRel 16) v1        `snocOL`
3755                                 ST   FF32  f0_high (spRel 16)   `snocOL`
3756                                 LD   II32  (spRel 16) v2
3757
3758                         return  (code2, [v1,v2])
3759
3760                  -- Load a 32 bit float return value into an integer reg
3761                  FF32 -> do
3762                         v1 <- getNewRegNat II32
3763
3764                         let code2 =
3765                                 code                            `snocOL`
3766                                 ST   FF32  src (spRel 16)       `snocOL`
3767                                 LD   II32  (spRel 16) v1
3768
3769                         return (code2, [v1])
3770
3771                  -- Move an integer return value into its destination reg.
3772                  other -> do
3773                         v1 <- getNewRegNat II32
3774
3775                         let code2 =
3776                                 code                            `snocOL`
3777                                 OR False g0 (RIReg src) v1
3778
3779                         return (code2, [v1])
3780
3781
3782 -- | Move args from the integer vregs into which they have been
3783 --      marshalled, into %o0 .. %o5, and the rest onto the stack.
3784 --
3785 move_final :: [Reg] -> [Reg] -> Int -> [Instr]
3786
3787 -- all args done
3788 move_final [] _ offset
3789         = []
3790
3791 -- out of aregs; move to stack
3792 move_final (v:vs) [] offset
3793         = ST II32 v (spRel offset)
3794         : move_final vs [] (offset+1)
3795
3796 -- move into an arg (%o[0..5]) reg
3797 move_final (v:vs) (a:az) offset
3798         = OR False g0 (RIReg v) a
3799         : move_final vs az offset
3800
3801
3802 -- | Assign results returned from the call into their
3803 --      desination regs.
3804 --
3805 assign_code :: [CmmHinted LocalReg] -> OrdList Instr
3806 assign_code []  = nilOL
3807
3808 assign_code [CmmHinted dest _hint]
3809  = let  rep     = localRegType dest
3810         width   = typeWidth rep
3811         r_dest  = getRegisterReg (CmmLocal dest)
3812
3813         result
3814                 | isFloatType rep
3815                 , W32   <- width
3816                 = unitOL $ FMOV FF32 (RealReg $ fReg 0) r_dest
3817
3818                 | isFloatType rep
3819                 , W64   <- width
3820                 = unitOL $ FMOV FF64 (RealReg $ fReg 0) r_dest
3821
3822                 | not $ isFloatType rep
3823                 , W32   <- width
3824                 = unitOL $ mkRegRegMoveInstr (RealReg $ oReg 0) r_dest
3825
3826                 | not $ isFloatType rep
3827                 , W64           <- width
3828                 , r_dest_hi     <- getHiVRegFromLo r_dest
3829                 = toOL  [ mkRegRegMoveInstr (RealReg $ oReg 0) r_dest_hi
3830                         , mkRegRegMoveInstr (RealReg $ oReg 1) r_dest]
3831    in   result
3832
3833
3834 -- | Generate a call to implement an out-of-line floating point operation
3835 outOfLineFloatOp
3836         :: CallishMachOp
3837         -> NatM (Either CLabel CmmExpr)
3838
3839 outOfLineFloatOp mop
3840  = do   let functionName
3841                 = outOfLineFloatOp_table mop
3842
3843         dflags  <- getDynFlagsNat
3844         mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference
3845                 $  mkForeignLabel functionName Nothing True
3846
3847         let mopLabelOrExpr
3848                 = case mopExpr of
3849                         CmmLit (CmmLabel lbl)   -> Left lbl
3850                         _                       -> Right mopExpr
3851
3852         return mopLabelOrExpr
3853
3854
3855 -- | Decide what C function to use to implement a CallishMachOp
3856 --
3857 outOfLineFloatOp_table
3858         :: CallishMachOp
3859         -> FastString
3860
3861 outOfLineFloatOp_table mop
3862  = case mop of
3863         MO_F32_Exp    -> fsLit "expf"
3864         MO_F32_Log    -> fsLit "logf"
3865         MO_F32_Sqrt   -> fsLit "sqrtf"
3866         MO_F32_Pwr    -> fsLit "powf"
3867
3868         MO_F32_Sin    -> fsLit "sinf"
3869         MO_F32_Cos    -> fsLit "cosf"
3870         MO_F32_Tan    -> fsLit "tanf"
3871
3872         MO_F32_Asin   -> fsLit "asinf"
3873         MO_F32_Acos   -> fsLit "acosf"
3874         MO_F32_Atan   -> fsLit "atanf"
3875
3876         MO_F32_Sinh   -> fsLit "sinhf"
3877         MO_F32_Cosh   -> fsLit "coshf"
3878         MO_F32_Tanh   -> fsLit "tanhf"
3879
3880         MO_F64_Exp    -> fsLit "exp"
3881         MO_F64_Log    -> fsLit "log"
3882         MO_F64_Sqrt   -> fsLit "sqrt"
3883         MO_F64_Pwr    -> fsLit "pow"
3884
3885         MO_F64_Sin    -> fsLit "sin"
3886         MO_F64_Cos    -> fsLit "cos"
3887         MO_F64_Tan    -> fsLit "tan"
3888
3889         MO_F64_Asin   -> fsLit "asin"
3890         MO_F64_Acos   -> fsLit "acos"
3891         MO_F64_Atan   -> fsLit "atan"
3892
3893         MO_F64_Sinh   -> fsLit "sinh"
3894         MO_F64_Cosh   -> fsLit "cosh"
3895         MO_F64_Tanh   -> fsLit "tanh"
3896
3897         other -> pprPanic "outOfLineFloatOp(sparc): Unknown callish mach op "
3898                         (pprCallishMachOp mop)
3899
3900
3901 #endif /* sparc_TARGET_ARCH */
3902
3903 #if powerpc_TARGET_ARCH
3904
3905 #if darwin_TARGET_OS || linux_TARGET_OS
3906 {-
3907     The PowerPC calling convention for Darwin/Mac OS X
3908     is described in Apple's document
3909     "Inside Mac OS X - Mach-O Runtime Architecture".
3910
3911     PowerPC Linux uses the System V Release 4 Calling Convention
3912     for PowerPC. It is described in the
3913     "System V Application Binary Interface PowerPC Processor Supplement".
3914
3915     Both conventions are similar:
3916     Parameters may be passed in general-purpose registers starting at r3, in
3917     floating point registers starting at f1, or on the stack.
3918
3919     But there are substantial differences:
3920     * The number of registers used for parameter passing and the exact set of
3921       nonvolatile registers differs (see MachRegs.lhs).
3922     * On Darwin, stack space is always reserved for parameters, even if they are
3923       passed in registers. The called routine may choose to save parameters from
3924       registers to the corresponding space on the stack.
3925     * On Darwin, a corresponding amount of GPRs is skipped when a floating point
3926       parameter is passed in an FPR.
3927     * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
3928       starting with an odd-numbered GPR. It may skip a GPR to achieve this.
3929       Darwin just treats an I64 like two separate II32s (high word first).
3930     * I64 and FF64 arguments are 8-byte aligned on the stack for SysV, but only
3931       4-byte aligned like everything else on Darwin.
3932     * The SysV spec claims that FF32 is represented as FF64 on the stack. GCC on
3933       PowerPC Linux does not agree, so neither do we.
3934
3935     According to both conventions, The parameter area should be part of the
3936     caller's stack frame, allocated in the caller's prologue code (large enough
3937     to hold the parameter lists for all called routines). The NCG already
3938     uses the stack for register spilling, leaving 64 bytes free at the top.
3939     If we need a larger parameter area than that, we just allocate a new stack
3940     frame just before ccalling.
3941 -}
3942
3943
3944 genCCall (CmmPrim MO_WriteBarrier) _ _
3945  = return $ unitOL LWSYNC
3946
3947 genCCall target dest_regs argsAndHints
3948   = ASSERT (not $ any (`elem` [II8,II16]) $ map cmmTypeSize argReps)
3949         -- we rely on argument promotion in the codeGen
3950     do
3951         (finalStack,passArgumentsCode,usedRegs) <- passArguments
3952                                                         (zip args argReps)
3953                                                         allArgRegs allFPArgRegs
3954                                                         initialStackOffset
3955                                                         (toOL []) []
3956
3957         (labelOrExpr, reduceToFF32) <- case target of
3958             CmmCallee (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
3959             CmmCallee expr conv -> return  (Right expr, False)
3960             CmmPrim mop -> outOfLineFloatOp mop
3961
3962         let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
3963             codeAfter = move_sp_up finalStack `appOL` moveResult reduceToFF32
3964
3965         case labelOrExpr of
3966             Left lbl -> do
3967                 return (         codeBefore
3968                         `snocOL` BL lbl usedRegs
3969                         `appOL`  codeAfter)
3970             Right dyn -> do
3971                 (dynReg, dynCode) <- getSomeReg dyn
3972                 return (         dynCode
3973                         `snocOL` MTCTR dynReg
3974                         `appOL`  codeBefore
3975                         `snocOL` BCTRL usedRegs
3976                         `appOL`  codeAfter)
3977     where
3978 #if darwin_TARGET_OS
3979         initialStackOffset = 24
3980             -- size of linkage area + size of arguments, in bytes
3981         stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
3982                                  map (widthInBytes . typeWidth) argReps
3983 #elif linux_TARGET_OS
3984         initialStackOffset = 8
3985         stackDelta finalStack = roundTo 16 finalStack
3986 #endif
3987         args = map hintlessCmm argsAndHints
3988         argReps = map cmmExprType args
3989
3990         roundTo a x | x `mod` a == 0 = x
3991                     | otherwise = x + a - (x `mod` a)
3992
3993         move_sp_down finalStack
3994                | delta > 64 =
3995                         toOL [STU II32 sp (AddrRegImm sp (ImmInt (-delta))),
3996                               DELTA (-delta)]
3997                | otherwise = nilOL
3998                where delta = stackDelta finalStack
3999         move_sp_up finalStack
4000                | delta > 64 =
4001                         toOL [ADD sp sp (RIImm (ImmInt delta)),
4002                               DELTA 0]
4003                | otherwise = nilOL
4004                where delta = stackDelta finalStack
4005
4006
4007         passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
4008         passArguments ((arg,arg_ty):args) gprs fprs stackOffset
4009                accumCode accumUsed | isWord64 arg_ty =
4010             do
4011                 ChildCode64 code vr_lo <- iselExpr64 arg
4012                 let vr_hi = getHiVRegFromLo vr_lo
4013
4014 #if darwin_TARGET_OS
4015                 passArguments args
4016                               (drop 2 gprs)
4017                               fprs
4018                               (stackOffset+8)
4019                               (accumCode `appOL` code
4020                                     `snocOL` storeWord vr_hi gprs stackOffset
4021                                     `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
4022                               ((take 2 gprs) ++ accumUsed)
4023             where
4024                 storeWord vr (gpr:_) offset = MR gpr vr
4025                 storeWord vr [] offset = ST II32 vr (AddrRegImm sp (ImmInt offset))
4026
4027 #elif linux_TARGET_OS
4028                 let stackOffset' = roundTo 8 stackOffset
4029                     stackCode = accumCode `appOL` code
4030                         `snocOL` ST II32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
4031                         `snocOL` ST II32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
4032                     regCode hireg loreg =
4033                         accumCode `appOL` code
4034                             `snocOL` MR hireg vr_hi
4035                             `snocOL` MR loreg vr_lo
4036
4037                 case gprs of
4038                     hireg : loreg : regs | even (length gprs) ->
4039                         passArguments args regs fprs stackOffset
4040                                       (regCode hireg loreg) (hireg : loreg : accumUsed)
4041                     _skipped : hireg : loreg : regs ->
4042                         passArguments args regs fprs stackOffset
4043                                       (regCode hireg loreg) (hireg : loreg : accumUsed)
4044                     _ -> -- only one or no regs left
4045                         passArguments args [] fprs (stackOffset'+8)
4046                                       stackCode accumUsed
4047 #endif
4048
4049         passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
4050             | reg : _ <- regs = do
4051                 register <- getRegister arg
4052                 let code = case register of
4053                             Fixed _ freg fcode -> fcode `snocOL` MR reg freg
4054                             Any _ acode -> acode reg
4055                 passArguments args
4056                               (drop nGprs gprs)
4057                               (drop nFprs fprs)
4058 #if darwin_TARGET_OS
4059         -- The Darwin ABI requires that we reserve stack slots for register parameters
4060                               (stackOffset + stackBytes)
4061 #elif linux_TARGET_OS
4062         -- ... the SysV ABI doesn't.
4063                               stackOffset
4064 #endif
4065                               (accumCode `appOL` code)
4066                               (reg : accumUsed)
4067             | otherwise = do
4068                 (vr, code) <- getSomeReg arg
4069                 passArguments args
4070                               (drop nGprs gprs)
4071                               (drop nFprs fprs)
4072                               (stackOffset' + stackBytes)
4073                               (accumCode `appOL` code `snocOL` ST (cmmTypeSize rep) vr stackSlot)
4074                               accumUsed
4075             where
4076 #if darwin_TARGET_OS
4077         -- stackOffset is at least 4-byte aligned
4078         -- The Darwin ABI is happy with that.
4079                 stackOffset' = stackOffset
4080 #else
4081         -- ... the SysV ABI requires 8-byte alignment for doubles.
4082                 stackOffset' | isFloatType rep && typeWidth rep == W64 =
4083                                  roundTo 8 stackOffset
4084                              | otherwise  =           stackOffset
4085 #endif
4086                 stackSlot = AddrRegImm sp (ImmInt stackOffset')
4087                 (nGprs, nFprs, stackBytes, regs) = case cmmTypeSize rep of
4088                     II32 -> (1, 0, 4, gprs)
4089 #if darwin_TARGET_OS
4090         -- The Darwin ABI requires that we skip a corresponding number of GPRs when
4091         -- we use the FPRs.
4092                     FF32 -> (1, 1, 4, fprs)
4093                     FF64 -> (2, 1, 8, fprs)
4094 #elif linux_TARGET_OS
4095         -- ... the SysV ABI doesn't.
4096                     FF32 -> (0, 1, 4, fprs)
4097                     FF64 -> (0, 1, 8, fprs)
4098 #endif
4099
4100         moveResult reduceToFF32 =
4101             case dest_regs of
4102                 [] -> nilOL
4103                 [CmmHinted dest _hint]
4104                     | reduceToFF32 && isFloat32 rep   -> unitOL (FRSP r_dest f1)
4105                     | isFloat32 rep || isFloat64 rep -> unitOL (MR r_dest f1)
4106                     | isWord64 rep -> toOL [MR (getHiVRegFromLo r_dest) r3,
4107                                           MR r_dest r4]
4108                     | otherwise -> unitOL (MR r_dest r3)
4109                     where rep = cmmRegType (CmmLocal dest)
4110                           r_dest = getRegisterReg (CmmLocal dest)
4111
4112         outOfLineFloatOp mop =
4113             do
4114                 dflags <- getDynFlagsNat
4115                 mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference $
4116                               mkForeignLabel functionName Nothing True
4117                 let mopLabelOrExpr = case mopExpr of
4118                         CmmLit (CmmLabel lbl) -> Left lbl
4119                         _ -> Right mopExpr
4120                 return (mopLabelOrExpr, reduce)
4121             where
4122                 (functionName, reduce) = case mop of
4123                     MO_F32_Exp   -> (fsLit "exp", True)
4124                     MO_F32_Log   -> (fsLit "log", True)
4125                     MO_F32_Sqrt  -> (fsLit "sqrt", True)
4126
4127                     MO_F32_Sin   -> (fsLit "sin", True)
4128                     MO_F32_Cos   -> (fsLit "cos", True)
4129                     MO_F32_Tan   -> (fsLit "tan", True)
4130
4131                     MO_F32_Asin  -> (fsLit "asin", True)
4132                     MO_F32_Acos  -> (fsLit "acos", True)
4133                     MO_F32_Atan  -> (fsLit "atan", True)
4134
4135                     MO_F32_Sinh  -> (fsLit "sinh", True)
4136                     MO_F32_Cosh  -> (fsLit "cosh", True)
4137                     MO_F32_Tanh  -> (fsLit "tanh", True)
4138                     MO_F32_Pwr   -> (fsLit "pow", True)
4139
4140                     MO_F64_Exp   -> (fsLit "exp", False)
4141                     MO_F64_Log   -> (fsLit "log", False)
4142                     MO_F64_Sqrt  -> (fsLit "sqrt", False)
4143
4144                     MO_F64_Sin   -> (fsLit "sin", False)
4145                     MO_F64_Cos   -> (fsLit "cos", False)
4146                     MO_F64_Tan   -> (fsLit "tan", False)
4147
4148                     MO_F64_Asin  -> (fsLit "asin", False)
4149                     MO_F64_Acos  -> (fsLit "acos", False)
4150                     MO_F64_Atan  -> (fsLit "atan", False)
4151
4152                     MO_F64_Sinh  -> (fsLit "sinh", False)
4153                     MO_F64_Cosh  -> (fsLit "cosh", False)
4154                     MO_F64_Tanh  -> (fsLit "tanh", False)
4155                     MO_F64_Pwr   -> (fsLit "pow", False)
4156                     other -> pprPanic "genCCall(ppc): unknown callish op"
4157                                     (pprCallishMachOp other)
4158
4159 #endif /* darwin_TARGET_OS || linux_TARGET_OS */
4160
4161 #endif /* powerpc_TARGET_ARCH */
4162
4163
4164 -- -----------------------------------------------------------------------------
4165 -- Generating a table-branch
4166
4167 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
4168
4169 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4170 genSwitch expr ids
4171   | opt_PIC
4172   = do
4173         (reg,e_code) <- getSomeReg expr
4174         lbl <- getNewLabelNat
4175         dflags <- getDynFlagsNat
4176         dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
4177         (tableReg,t_code) <- getSomeReg $ dynRef
4178         let
4179             jumpTable = map jumpTableEntryRel ids
4180
4181             jumpTableEntryRel Nothing
4182                 = CmmStaticLit (CmmInt 0 wordWidth)
4183             jumpTableEntryRel (Just (BlockId id))
4184                 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
4185                 where blockLabel = mkAsmTempLabel id
4186
4187             op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
4188                                        (EAIndex reg wORD_SIZE) (ImmInt 0))
4189
4190 #if x86_64_TARGET_ARCH
4191 #if darwin_TARGET_OS
4192     -- on Mac OS X/x86_64, put the jump table in the text section
4193     -- to work around a limitation of the linker.
4194     -- ld64 is unable to handle the relocations for
4195     --     .quad L1 - L0
4196     -- if L0 is not preceded by a non-anonymous label in its section.
4197
4198             code = e_code `appOL` t_code `appOL` toOL [
4199                             ADD (intSize wordWidth) op (OpReg tableReg),
4200                             JMP_TBL (OpReg tableReg) [ id | Just id <- ids ],
4201                             LDATA Text (CmmDataLabel lbl : jumpTable)
4202                     ]
4203 #else
4204     -- HACK: On x86_64 binutils<2.17 is only able to generate PC32
4205     -- relocations, hence we only get 32-bit offsets in the jump
4206     -- table. As these offsets are always negative we need to properly
4207     -- sign extend them to 64-bit. This hack should be removed in
4208     -- conjunction with the hack in PprMach.hs/pprDataItem once
4209     -- binutils 2.17 is standard.
4210             code = e_code `appOL` t_code `appOL` toOL [
4211                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
4212                             MOVSxL II32
4213                                    (OpAddr (AddrBaseIndex (EABaseReg tableReg)
4214                                                           (EAIndex reg wORD_SIZE) (ImmInt 0)))
4215                                    (OpReg reg),
4216                             ADD (intSize wordWidth) (OpReg reg) (OpReg tableReg),
4217                             JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
4218                    ]
4219 #endif
4220 #else
4221             code = e_code `appOL` t_code `appOL` toOL [
4222                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
4223                             ADD (intSize wordWidth) op (OpReg tableReg),
4224                             JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
4225                     ]
4226 #endif
4227         return code
4228   | otherwise
4229   = do
4230         (reg,e_code) <- getSomeReg expr
4231         lbl <- getNewLabelNat
4232         let
4233             jumpTable = map jumpTableEntry ids
4234             op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
4235             code = e_code `appOL` toOL [
4236                     LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
4237                     JMP_TBL op [ id | Just id <- ids ]
4238                  ]
4239         -- in
4240         return code
4241 #elif powerpc_TARGET_ARCH
4242 genSwitch expr ids
4243   | opt_PIC
4244   = do
4245         (reg,e_code) <- getSomeReg expr
4246         tmp <- getNewRegNat II32
4247         lbl <- getNewLabelNat
4248         dflags <- getDynFlagsNat
4249         dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
4250         (tableReg,t_code) <- getSomeReg $ dynRef
4251         let
4252             jumpTable = map jumpTableEntryRel ids
4253
4254             jumpTableEntryRel Nothing
4255                 = CmmStaticLit (CmmInt 0 wordWidth)
4256             jumpTableEntryRel (Just (BlockId id))
4257                 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
4258                 where blockLabel = mkAsmTempLabel id
4259
4260             code = e_code `appOL` t_code `appOL` toOL [
4261                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
4262                             SLW tmp reg (RIImm (ImmInt 2)),
4263                             LD II32 tmp (AddrRegReg tableReg tmp),
4264                             ADD tmp tmp (RIReg tableReg),
4265                             MTCTR tmp,
4266                             BCTR [ id | Just id <- ids ]
4267                     ]
4268         return code
4269   | otherwise
4270   = do
4271         (reg,e_code) <- getSomeReg expr
4272         tmp <- getNewRegNat II32
4273         lbl <- getNewLabelNat
4274         let
4275             jumpTable = map jumpTableEntry ids
4276
4277             code = e_code `appOL` toOL [
4278                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
4279                             SLW tmp reg (RIImm (ImmInt 2)),
4280                             ADDIS tmp tmp (HA (ImmCLbl lbl)),
4281                             LD II32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
4282                             MTCTR tmp,
4283                             BCTR [ id | Just id <- ids ]
4284                     ]
4285         return code
4286 #elif sparc_TARGET_ARCH
4287 genSwitch expr ids
4288         | opt_PIC
4289         = error "MachCodeGen: sparc genSwitch PIC not finished\n"
4290
4291         | otherwise
4292         = do    (e_reg, e_code) <- getSomeReg expr
4293
4294                 base_reg        <- getNewRegNat II32
4295                 offset_reg      <- getNewRegNat II32
4296                 dst             <- getNewRegNat II32
4297
4298                 label           <- getNewLabelNat
4299                 let jumpTable   = map jumpTableEntry ids
4300
4301                 return $ e_code `appOL`
4302                  toOL
4303                         -- the jump table
4304                         [ LDATA ReadOnlyData (CmmDataLabel label : jumpTable)
4305
4306                         -- load base of jump table
4307                         , SETHI (HI (ImmCLbl label)) base_reg
4308                         , OR    False base_reg (RIImm $ LO $ ImmCLbl label) base_reg
4309
4310                         -- the addrs in the table are 32 bits wide..
4311                         , SLL   e_reg (RIImm $ ImmInt 2) offset_reg
4312
4313                         -- load and jump to the destination
4314                         , LD      II32 (AddrRegReg base_reg offset_reg) dst
4315                         , JMP_TBL (AddrRegImm dst (ImmInt 0)) [i | Just i <- ids]
4316                         , NOP ]
4317
4318 #else
4319 #error "ToDo: genSwitch"
4320 #endif
4321
4322
4323 -- | Convert a BlockId to some CmmStatic data
4324 jumpTableEntry :: Maybe BlockId -> CmmStatic
4325 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordWidth)
4326 jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
4327     where blockLabel = mkAsmTempLabel id
4328
4329 -- -----------------------------------------------------------------------------
4330 -- Support bits
4331 -- -----------------------------------------------------------------------------
4332
4333
4334 -- -----------------------------------------------------------------------------
4335 -- 'condIntReg' and 'condFltReg': condition codes into registers
4336
4337 -- Turn those condition codes into integers now (when they appear on
4338 -- the right hand side of an assignment).
4339 --
4340 -- (If applicable) Do not fill the delay slots here; you will confuse the
4341 -- register allocator.
4342
4343 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
4344
4345 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4346
4347 #if alpha_TARGET_ARCH
4348 condIntReg = panic "MachCode.condIntReg (not on Alpha)"
4349 condFltReg = panic "MachCode.condFltReg (not on Alpha)"
4350 #endif /* alpha_TARGET_ARCH */
4351
4352 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4353
4354 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4355
4356 condIntReg cond x y = do
4357   CondCode _ cond cond_code <- condIntCode cond x y
4358   tmp <- getNewRegNat II8
4359   let
4360         code dst = cond_code `appOL` toOL [
4361                     SETCC cond (OpReg tmp),
4362                     MOVZxL II8 (OpReg tmp) (OpReg dst)
4363                   ]
4364   -- in
4365   return (Any II32 code)
4366
4367 #endif
4368
4369 #if i386_TARGET_ARCH
4370
4371 condFltReg cond x y = do
4372   CondCode _ cond cond_code <- condFltCode cond x y
4373   tmp <- getNewRegNat II8
4374   let
4375         code dst = cond_code `appOL` toOL [
4376                     SETCC cond (OpReg tmp),
4377                     MOVZxL II8 (OpReg tmp) (OpReg dst)
4378                   ]
4379   -- in
4380   return (Any II32 code)
4381
4382 #endif
4383
4384 #if x86_64_TARGET_ARCH
4385
4386 condFltReg cond x y = do
4387   CondCode _ cond cond_code <- condFltCode cond x y
4388   tmp1 <- getNewRegNat wordSize
4389   tmp2 <- getNewRegNat wordSize
4390   let
4391         -- We have to worry about unordered operands (eg. comparisons
4392         -- against NaN).  If the operands are unordered, the comparison
4393         -- sets the parity flag, carry flag and zero flag.
4394         -- All comparisons are supposed to return false for unordered
4395         -- operands except for !=, which returns true.
4396         --
4397         -- Optimisation: we don't have to test the parity flag if we
4398         -- know the test has already excluded the unordered case: eg >
4399         -- and >= test for a zero carry flag, which can only occur for
4400         -- ordered operands.
4401         --
4402         -- ToDo: by reversing comparisons we could avoid testing the
4403         -- parity flag in more cases.
4404
4405         code dst =
4406            cond_code `appOL`
4407              (case cond of
4408                 NE  -> or_unordered dst
4409                 GU  -> plain_test   dst
4410                 GEU -> plain_test   dst
4411                 _   -> and_ordered  dst)
4412
4413         plain_test dst = toOL [
4414                     SETCC cond (OpReg tmp1),
4415                     MOVZxL II8 (OpReg tmp1) (OpReg dst)
4416                  ]
4417         or_unordered dst = toOL [
4418                     SETCC cond (OpReg tmp1),
4419                     SETCC PARITY (OpReg tmp2),
4420                     OR II8 (OpReg tmp1) (OpReg tmp2),
4421                     MOVZxL II8 (OpReg tmp2) (OpReg dst)
4422                   ]
4423         and_ordered dst = toOL [
4424                     SETCC cond (OpReg tmp1),
4425                     SETCC NOTPARITY (OpReg tmp2),
4426                     AND II8 (OpReg tmp1) (OpReg tmp2),
4427                     MOVZxL II8 (OpReg tmp2) (OpReg dst)
4428                   ]
4429   -- in
4430   return (Any II32 code)
4431
4432 #endif
4433
4434 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4435
4436 #if sparc_TARGET_ARCH
4437
4438 condIntReg EQQ x (CmmLit (CmmInt 0 d)) = do
4439     (src, code) <- getSomeReg x
4440     tmp <- getNewRegNat II32
4441     let
4442         code__2 dst = code `appOL` toOL [
4443             SUB False True g0 (RIReg src) g0,
4444             SUB True False g0 (RIImm (ImmInt (-1))) dst]
4445     return (Any II32 code__2)
4446
4447 condIntReg EQQ x y = do
4448     (src1, code1) <- getSomeReg x
4449     (src2, code2) <- getSomeReg y
4450     tmp1 <- getNewRegNat II32
4451     tmp2 <- getNewRegNat II32
4452     let
4453         code__2 dst = code1 `appOL` code2 `appOL` toOL [
4454             XOR False src1 (RIReg src2) dst,
4455             SUB False True g0 (RIReg dst) g0,
4456             SUB True False g0 (RIImm (ImmInt (-1))) dst]
4457     return (Any II32 code__2)
4458
4459 condIntReg NE x (CmmLit (CmmInt 0 d)) = do
4460     (src, code) <- getSomeReg x
4461     tmp <- getNewRegNat II32
4462     let
4463         code__2 dst = code `appOL` toOL [
4464             SUB False True g0 (RIReg src) g0,
4465             ADD True False g0 (RIImm (ImmInt 0)) dst]
4466     return (Any II32 code__2)
4467
4468 condIntReg NE x y = do
4469     (src1, code1) <- getSomeReg x
4470     (src2, code2) <- getSomeReg y
4471     tmp1 <- getNewRegNat II32
4472     tmp2 <- getNewRegNat II32
4473     let
4474         code__2 dst = code1 `appOL` code2 `appOL` toOL [
4475             XOR False src1 (RIReg src2) dst,
4476             SUB False True g0 (RIReg dst) g0,
4477             ADD True False g0 (RIImm (ImmInt 0)) dst]
4478     return (Any II32 code__2)
4479
4480 condIntReg cond x y = do
4481     bid1@(BlockId lbl1) <- getBlockIdNat
4482     bid2@(BlockId lbl2) <- getBlockIdNat
4483     CondCode _ cond cond_code <- condIntCode cond x y
4484     let
4485         code__2 dst = cond_code `appOL` toOL [
4486             BI cond False bid1, NOP,
4487             OR False g0 (RIImm (ImmInt 0)) dst,
4488             BI ALWAYS False bid2, NOP,
4489             NEWBLOCK bid1,
4490             OR False g0 (RIImm (ImmInt 1)) dst,
4491             NEWBLOCK bid2]
4492     return (Any II32 code__2)
4493
4494 condFltReg cond x y = do
4495     bid1@(BlockId lbl1) <- getBlockIdNat
4496     bid2@(BlockId lbl2) <- getBlockIdNat
4497     CondCode _ cond cond_code <- condFltCode cond x y
4498     let
4499         code__2 dst = cond_code `appOL` toOL [
4500             NOP,
4501             BF cond False bid1, NOP,
4502             OR False g0 (RIImm (ImmInt 0)) dst,
4503             BI ALWAYS False bid2, NOP,
4504             NEWBLOCK bid1,
4505             OR False g0 (RIImm (ImmInt 1)) dst,
4506             NEWBLOCK bid2]
4507     return (Any II32 code__2)
4508
4509 #endif /* sparc_TARGET_ARCH */
4510
4511 #if powerpc_TARGET_ARCH
4512 condReg getCond = do
4513     lbl1 <- getBlockIdNat
4514     lbl2 <- getBlockIdNat
4515     CondCode _ cond cond_code <- getCond
4516     let
4517 {-        code dst = cond_code `appOL` toOL [
4518                 BCC cond lbl1,
4519                 LI dst (ImmInt 0),
4520                 BCC ALWAYS lbl2,
4521                 NEWBLOCK lbl1,
4522                 LI dst (ImmInt 1),
4523                 BCC ALWAYS lbl2,
4524                 NEWBLOCK lbl2
4525             ]-}
4526         code dst = cond_code
4527             `appOL` negate_code
4528             `appOL` toOL [
4529                 MFCR dst,
4530                 RLWINM dst dst (bit + 1) 31 31
4531             ]
4532
4533         negate_code | do_negate = unitOL (CRNOR bit bit bit)
4534                     | otherwise = nilOL
4535
4536         (bit, do_negate) = case cond of
4537             LTT -> (0, False)
4538             LE  -> (1, True)
4539             EQQ -> (2, False)
4540             GE  -> (0, True)
4541             GTT -> (1, False)
4542
4543             NE  -> (2, True)
4544
4545             LU  -> (0, False)
4546             LEU -> (1, True)
4547             GEU -> (0, True)
4548             GU  -> (1, False)
4549
4550     return (Any II32 code)
4551
4552 condIntReg cond x y = condReg (condIntCode cond x y)
4553 condFltReg cond x y = condReg (condFltCode cond x y)
4554 #endif /* powerpc_TARGET_ARCH */
4555
4556
4557 -- -----------------------------------------------------------------------------
4558 -- 'trivial*Code': deal with trivial instructions
4559
4560 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
4561 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
4562 -- Only look for constants on the right hand side, because that's
4563 -- where the generic optimizer will have put them.
4564
4565 -- Similarly, for unary instructions, we don't have to worry about
4566 -- matching an StInt as the argument, because genericOpt will already
4567 -- have handled the constant-folding.
4568
4569 trivialCode
4570     :: Width    -- Int only
4571     -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
4572       ,IF_ARCH_i386 ((Operand -> Operand -> Instr)
4573                      -> Maybe (Operand -> Operand -> Instr)
4574       ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr)
4575                      -> Maybe (Operand -> Operand -> Instr)
4576       ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
4577       ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
4578       ,)))))
4579     -> CmmExpr -> CmmExpr -- the two arguments
4580     -> NatM Register
4581
4582 #ifndef powerpc_TARGET_ARCH
4583 trivialFCode
4584     :: Width    -- Floating point only
4585     -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
4586       ,IF_ARCH_sparc((Size -> Reg -> Reg -> Reg -> Instr)
4587       ,IF_ARCH_i386 ((Size -> Reg -> Reg -> Reg -> Instr)
4588       ,IF_ARCH_x86_64 ((Size -> Operand -> Operand -> Instr)
4589       ,))))
4590     -> CmmExpr -> CmmExpr -- the two arguments
4591     -> NatM Register
4592 #endif
4593
4594 trivialUCode
4595     :: Size
4596     -> IF_ARCH_alpha((RI -> Reg -> Instr)
4597       ,IF_ARCH_i386 ((Operand -> Instr)
4598       ,IF_ARCH_x86_64 ((Operand -> Instr)
4599       ,IF_ARCH_sparc((RI -> Reg -> Instr)
4600       ,IF_ARCH_powerpc((Reg -> Reg -> Instr)
4601       ,)))))
4602     -> CmmExpr  -- the one argument
4603     -> NatM Register
4604
4605 #ifndef powerpc_TARGET_ARCH
4606 trivialUFCode
4607     :: Size
4608     -> IF_ARCH_alpha((Reg -> Reg -> Instr)
4609       ,IF_ARCH_i386 ((Reg -> Reg -> Instr)
4610       ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr)
4611       ,IF_ARCH_sparc((Reg -> Reg -> Instr)
4612       ,))))
4613     -> CmmExpr -- the one argument
4614     -> NatM Register
4615 #endif
4616
4617 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4618
4619 #if alpha_TARGET_ARCH
4620
4621 trivialCode instr x (StInt y)
4622   | fits8Bits y
4623   = getRegister x               `thenNat` \ register ->
4624     getNewRegNat IntRep         `thenNat` \ tmp ->
4625     let
4626         code = registerCode register tmp
4627         src1 = registerName register tmp
4628         src2 = ImmInt (fromInteger y)
4629         code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst)
4630     in
4631     return (Any IntRep code__2)
4632
4633 trivialCode instr x y
4634   = getRegister x               `thenNat` \ register1 ->
4635     getRegister y               `thenNat` \ register2 ->
4636     getNewRegNat IntRep         `thenNat` \ tmp1 ->
4637     getNewRegNat IntRep         `thenNat` \ tmp2 ->
4638     let
4639         code1 = registerCode register1 tmp1 []
4640         src1  = registerName register1 tmp1
4641         code2 = registerCode register2 tmp2 []
4642         src2  = registerName register2 tmp2
4643         code__2 dst = asmSeqThen [code1, code2] .
4644                      mkSeqInstr (instr src1 (RIReg src2) dst)
4645     in
4646     return (Any IntRep code__2)
4647
4648 ------------
4649 trivialUCode instr x
4650   = getRegister x               `thenNat` \ register ->
4651     getNewRegNat IntRep         `thenNat` \ tmp ->
4652     let
4653         code = registerCode register tmp
4654         src  = registerName register tmp
4655         code__2 dst = code . mkSeqInstr (instr (RIReg src) dst)
4656     in
4657     return (Any IntRep code__2)
4658
4659 ------------
4660 trivialFCode _ instr x y
4661   = getRegister x               `thenNat` \ register1 ->
4662     getRegister y               `thenNat` \ register2 ->
4663     getNewRegNat FF64   `thenNat` \ tmp1 ->
4664     getNewRegNat FF64   `thenNat` \ tmp2 ->
4665     let
4666         code1 = registerCode register1 tmp1
4667         src1  = registerName register1 tmp1
4668
4669         code2 = registerCode register2 tmp2
4670         src2  = registerName register2 tmp2
4671
4672         code__2 dst = asmSeqThen [code1 [], code2 []] .
4673                       mkSeqInstr (instr src1 src2 dst)
4674     in
4675     return (Any FF64 code__2)
4676
4677 trivialUFCode _ instr x
4678   = getRegister x               `thenNat` \ register ->
4679     getNewRegNat FF64   `thenNat` \ tmp ->
4680     let
4681         code = registerCode register tmp
4682         src  = registerName register tmp
4683         code__2 dst = code . mkSeqInstr (instr src dst)
4684     in
4685     return (Any FF64 code__2)
4686
4687 #endif /* alpha_TARGET_ARCH */
4688
4689 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4690
4691 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4692
4693 {-
4694 The Rules of the Game are:
4695
4696 * You cannot assume anything about the destination register dst;
4697   it may be anything, including a fixed reg.
4698
4699 * You may compute an operand into a fixed reg, but you may not
4700   subsequently change the contents of that fixed reg.  If you
4701   want to do so, first copy the value either to a temporary
4702   or into dst.  You are free to modify dst even if it happens
4703   to be a fixed reg -- that's not your problem.
4704
4705 * You cannot assume that a fixed reg will stay live over an
4706   arbitrary computation.  The same applies to the dst reg.
4707
4708 * Temporary regs obtained from getNewRegNat are distinct from
4709   each other and from all other regs, and stay live over
4710   arbitrary computations.
4711
4712 --------------------
4713
4714 SDM's version of The Rules:
4715
4716 * If getRegister returns Any, that means it can generate correct
4717   code which places the result in any register, period.  Even if that
4718   register happens to be read during the computation.
4719
4720   Corollary #1: this means that if you are generating code for an
4721   operation with two arbitrary operands, you cannot assign the result
4722   of the first operand into the destination register before computing
4723   the second operand.  The second operand might require the old value
4724   of the destination register.
4725
4726   Corollary #2: A function might be able to generate more efficient
4727   code if it knows the destination register is a new temporary (and
4728   therefore not read by any of the sub-computations).
4729
4730 * If getRegister returns Any, then the code it generates may modify only:
4731         (a) fresh temporaries
4732         (b) the destination register
4733         (c) known registers (eg. %ecx is used by shifts)
4734   In particular, it may *not* modify global registers, unless the global
4735   register happens to be the destination register.
4736 -}
4737
4738 trivialCode width instr (Just revinstr) (CmmLit lit_a) b
4739   | is32BitLit lit_a = do
4740   b_code <- getAnyReg b
4741   let
4742        code dst
4743          = b_code dst `snocOL`
4744            revinstr (OpImm (litToImm lit_a)) (OpReg dst)
4745   -- in
4746   return (Any (intSize width) code)
4747
4748 trivialCode width instr maybe_revinstr a b
4749   = genTrivialCode (intSize width) instr a b
4750
4751 -- This is re-used for floating pt instructions too.
4752 genTrivialCode rep instr a b = do
4753   (b_op, b_code) <- getNonClobberedOperand b
4754   a_code <- getAnyReg a
4755   tmp <- getNewRegNat rep
4756   let
4757      -- We want the value of b to stay alive across the computation of a.
4758      -- But, we want to calculate a straight into the destination register,
4759      -- because the instruction only has two operands (dst := dst `op` src).
4760      -- The troublesome case is when the result of b is in the same register
4761      -- as the destination reg.  In this case, we have to save b in a
4762      -- new temporary across the computation of a.
4763      code dst
4764         | dst `regClashesWithOp` b_op =
4765                 b_code `appOL`
4766                 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
4767                 a_code dst `snocOL`
4768                 instr (OpReg tmp) (OpReg dst)
4769         | otherwise =
4770                 b_code `appOL`
4771                 a_code dst `snocOL`
4772                 instr b_op (OpReg dst)
4773   -- in
4774   return (Any rep code)
4775
4776 reg `regClashesWithOp` OpReg reg2   = reg == reg2
4777 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
4778 reg `regClashesWithOp` _            = False
4779
4780 -----------
4781
4782 trivialUCode rep instr x = do
4783   x_code <- getAnyReg x
4784   let
4785      code dst =
4786         x_code dst `snocOL`
4787         instr (OpReg dst)
4788   return (Any rep code)
4789
4790 -----------
4791
4792 #if i386_TARGET_ARCH
4793
4794 trivialFCode width instr x y = do
4795   (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
4796   (y_reg, y_code) <- getSomeReg y
4797   let
4798      size = floatSize width
4799      code dst =
4800         x_code `appOL`
4801         y_code `snocOL`
4802         instr size x_reg y_reg dst
4803   return (Any size code)
4804
4805 #endif
4806
4807 #if x86_64_TARGET_ARCH
4808 trivialFCode pk instr x y
4809   = genTrivialCode size (instr size) x y
4810   where size = floatSize pk
4811 #endif
4812
4813 -------------
4814
4815 trivialUFCode size instr x = do
4816   (x_reg, x_code) <- getSomeReg x
4817   let
4818      code dst =
4819         x_code `snocOL`
4820         instr x_reg dst
4821   -- in
4822   return (Any size code)
4823
4824 #endif /* i386_TARGET_ARCH */
4825
4826 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4827
4828 #if sparc_TARGET_ARCH
4829
4830 trivialCode pk instr x (CmmLit (CmmInt y d))
4831   | fits13Bits y
4832   = do
4833       (src1, code) <- getSomeReg x
4834       tmp <- getNewRegNat II32
4835       let
4836         src2 = ImmInt (fromInteger y)
4837         code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
4838       return (Any II32 code__2)
4839
4840 trivialCode pk instr x y = do
4841     (src1, code1) <- getSomeReg x
4842     (src2, code2) <- getSomeReg y
4843     tmp1 <- getNewRegNat II32
4844     tmp2 <- getNewRegNat II32
4845     let
4846         code__2 dst = code1 `appOL` code2 `snocOL`
4847                       instr src1 (RIReg src2) dst
4848     return (Any II32 code__2)
4849
4850 ------------
4851 trivialFCode pk instr x y = do
4852     (src1, code1) <- getSomeReg x
4853     (src2, code2) <- getSomeReg y
4854     tmp1 <- getNewRegNat (cmmTypeSize $ cmmExprType x)
4855     tmp2 <- getNewRegNat (cmmTypeSize $ cmmExprType y)
4856     tmp <- getNewRegNat FF64
4857     let
4858         promote x = FxTOy FF32 FF64 x tmp
4859
4860         pk1   = cmmExprType x
4861         pk2   = cmmExprType y
4862
4863         code__2 dst =
4864                 if pk1 `cmmEqType` pk2 then
4865                     code1 `appOL` code2 `snocOL`
4866                     instr (floatSize pk) src1 src2 dst
4867                 else if typeWidth pk1 == W32 then
4868                     code1 `snocOL` promote src1 `appOL` code2 `snocOL`
4869                     instr FF64 tmp src2 dst
4870                 else
4871                     code1 `appOL` code2 `snocOL` promote src2 `snocOL`
4872                     instr FF64 src1 tmp dst
4873     return (Any (cmmTypeSize $ if pk1 `cmmEqType` pk2 then pk1 else cmmFloat W64)
4874                 code__2)
4875
4876 ------------
4877 trivialUCode size instr x = do
4878     (src, code) <- getSomeReg x
4879     tmp <- getNewRegNat size
4880     let
4881         code__2 dst = code `snocOL` instr (RIReg src) dst
4882     return (Any size code__2)
4883
4884 -------------
4885 trivialUFCode pk instr x = do
4886     (src, code) <- getSomeReg x
4887     tmp <- getNewRegNat pk
4888     let
4889         code__2 dst = code `snocOL` instr src dst
4890     return (Any pk code__2)
4891
4892 #endif /* sparc_TARGET_ARCH */
4893
4894 #if powerpc_TARGET_ARCH
4895
4896 {-
4897 Wolfgang's PowerPC version of The Rules:
4898
4899 A slightly modified version of The Rules to take advantage of the fact
4900 that PowerPC instructions work on all registers and don't implicitly
4901 clobber any fixed registers.
4902
4903 * The only expression for which getRegister returns Fixed is (CmmReg reg).
4904
4905 * If getRegister returns Any, then the code it generates may modify only:
4906         (a) fresh temporaries
4907         (b) the destination register
4908   It may *not* modify global registers, unless the global
4909   register happens to be the destination register.
4910   It may not clobber any other registers. In fact, only ccalls clobber any
4911   fixed registers.
4912   Also, it may not modify the counter register (used by genCCall).
4913
4914   Corollary: If a getRegister for a subexpression returns Fixed, you need
4915   not move it to a fresh temporary before evaluating the next subexpression.
4916   The Fixed register won't be modified.
4917   Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
4918
4919 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
4920   the value of the destination register.
4921 -}
4922
4923 trivialCode rep signed instr x (CmmLit (CmmInt y _))
4924     | Just imm <- makeImmediate rep signed y
4925     = do
4926         (src1, code1) <- getSomeReg x
4927         let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
4928         return (Any (intSize rep) code)
4929
4930 trivialCode rep signed instr x y = do
4931     (src1, code1) <- getSomeReg x
4932     (src2, code2) <- getSomeReg y
4933     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
4934     return (Any (intSize rep) code)
4935
4936 trivialCodeNoImm' :: Size -> (Reg -> Reg -> Reg -> Instr)
4937                  -> CmmExpr -> CmmExpr -> NatM Register
4938 trivialCodeNoImm' size instr x y = do
4939     (src1, code1) <- getSomeReg x
4940     (src2, code2) <- getSomeReg y
4941     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
4942     return (Any size code)
4943
4944 trivialCodeNoImm :: Size -> (Size -> Reg -> Reg -> Reg -> Instr)
4945                  -> CmmExpr -> CmmExpr -> NatM Register
4946 trivialCodeNoImm size instr x y = trivialCodeNoImm' size (instr size) x y
4947
4948 trivialUCode rep instr x = do
4949     (src, code) <- getSomeReg x
4950     let code' dst = code `snocOL` instr dst src
4951     return (Any rep code')
4952
4953 -- There is no "remainder" instruction on the PPC, so we have to do
4954 -- it the hard way.
4955 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
4956
4957 remainderCode :: Width -> (Reg -> Reg -> Reg -> Instr)
4958     -> CmmExpr -> CmmExpr -> NatM Register
4959 remainderCode rep div x y = do
4960     (src1, code1) <- getSomeReg x
4961     (src2, code2) <- getSomeReg y
4962     let code dst = code1 `appOL` code2 `appOL` toOL [
4963                 div dst src1 src2,
4964                 MULLW dst dst (RIReg src2),
4965                 SUBF dst dst src1
4966             ]
4967     return (Any (intSize rep) code)
4968
4969 #endif /* powerpc_TARGET_ARCH */
4970
4971
4972 -- -----------------------------------------------------------------------------
4973 --  Coercing to/from integer/floating-point...
4974
4975 -- When going to integer, we truncate (round towards 0).
4976
4977 -- @coerce(Int2FP|FP2Int)@ are more complicated integer/float
4978 -- conversions.  We have to store temporaries in memory to move
4979 -- between the integer and the floating point register sets.
4980
4981 -- @coerceDbl2Flt@ and @coerceFlt2Dbl@ are done this way because we
4982 -- pretend, on sparc at least, that double and float regs are seperate
4983 -- kinds, so the value has to be computed into one kind before being
4984 -- explicitly "converted" to live in the other kind.
4985
4986 coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
4987 coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
4988
4989 #if sparc_TARGET_ARCH
4990 coerceDbl2Flt :: CmmExpr -> NatM Register
4991 coerceFlt2Dbl :: CmmExpr -> NatM Register
4992 #endif
4993
4994 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4995
4996 #if alpha_TARGET_ARCH
4997
4998 coerceInt2FP _ x
4999   = getRegister x               `thenNat` \ register ->
5000     getNewRegNat IntRep         `thenNat` \ reg ->
5001     let
5002         code = registerCode register reg
5003         src  = registerName register reg
5004
5005         code__2 dst = code . mkSeqInstrs [
5006             ST Q src (spRel 0),
5007             LD TF dst (spRel 0),
5008             CVTxy Q TF dst dst]
5009     in
5010     return (Any FF64 code__2)
5011
5012 -------------
5013 coerceFP2Int x
5014   = getRegister x               `thenNat` \ register ->
5015     getNewRegNat FF64   `thenNat` \ tmp ->
5016     let
5017         code = registerCode register tmp
5018         src  = registerName register tmp
5019
5020         code__2 dst = code . mkSeqInstrs [
5021             CVTxy TF Q src tmp,
5022             ST TF tmp (spRel 0),
5023             LD Q dst (spRel 0)]
5024     in
5025     return (Any IntRep code__2)
5026
5027 #endif /* alpha_TARGET_ARCH */
5028
5029 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
5030
5031 #if i386_TARGET_ARCH
5032
5033 coerceInt2FP from to x = do
5034   (x_reg, x_code) <- getSomeReg x
5035   let
5036         opc  = case to of W32 -> GITOF; W64 -> GITOD
5037         code dst = x_code `snocOL` opc x_reg dst
5038         -- ToDo: works for non-II32 reps?
5039   return (Any (floatSize to) code)
5040
5041 ------------
5042
5043 coerceFP2Int from to x = do
5044   (x_reg, x_code) <- getSomeReg x
5045   let
5046         opc  = case from of W32 -> GFTOI; W64 -> GDTOI
5047         code dst = x_code `snocOL` opc x_reg dst
5048         -- ToDo: works for non-II32 reps?
5049   -- in
5050   return (Any (intSize to) code)
5051
5052 #endif /* i386_TARGET_ARCH */
5053
5054 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
5055
5056 #if x86_64_TARGET_ARCH
5057
5058 coerceFP2Int from to x = do
5059   (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
5060   let
5061         opc  = case from of W32 -> CVTTSS2SIQ; W64 -> CVTTSD2SIQ
5062         code dst = x_code `snocOL` opc x_op dst
5063   -- in
5064   return (Any (intSize to) code) -- works even if the destination rep is <II32
5065
5066 coerceInt2FP from to x = do
5067   (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
5068   let
5069         opc  = case to of W32 -> CVTSI2SS; W64 -> CVTSI2SD
5070         code dst = x_code `snocOL` opc x_op dst
5071   -- in
5072   return (Any (floatSize to) code) -- works even if the destination rep is <II32
5073
5074 coerceFP2FP :: Width -> CmmExpr -> NatM Register
5075 coerceFP2FP to x = do
5076   (x_reg, x_code) <- getSomeReg x
5077   let
5078         opc  = case to of W32 -> CVTSD2SS; W64 -> CVTSS2SD
5079         code dst = x_code `snocOL` opc x_reg dst
5080   -- in
5081   return (Any (floatSize to) code)
5082 #endif
5083
5084 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
5085
5086 #if sparc_TARGET_ARCH
5087
5088 coerceInt2FP width1 width2 x = do
5089     (src, code) <- getSomeReg x
5090     let
5091         code__2 dst = code `appOL` toOL [
5092             ST (intSize width1) src (spRel (-2)),
5093             LD (intSize width1) (spRel (-2)) dst,
5094             FxTOy (intSize width1) (floatSize width2) dst dst]
5095     return (Any (floatSize $ width2) code__2)
5096
5097
5098 -- | Coerce a floating point value to integer
5099 --
5100 --   NOTE: On sparc v9 there are no instructions to move a value from an
5101 --         FP register directly to an int register, so we have to use a load/store.
5102 --
5103 coerceFP2Int width1 width2 x
5104  = do   let fsize1      = floatSize width1
5105             fsize2      = floatSize width2
5106
5107             isize2      = intSize   width2
5108
5109         (fsrc, code)    <- getSomeReg x
5110         fdst            <- getNewRegNat fsize2
5111
5112         let code2 dst
5113                 =       code
5114                 `appOL` toOL
5115                         -- convert float to int format, leaving it in a float reg.
5116                         [ FxTOy fsize1 isize2 fsrc fdst
5117
5118                         -- store the int into mem, then load it back to move
5119                         --      it into an actual int reg.
5120                         , ST    fsize2 fdst (spRel (-2))
5121                         , LD    isize2 (spRel (-2)) dst]
5122
5123         return (Any isize2 code2)
5124
5125 ------------
5126 coerceDbl2Flt x = do
5127     (src, code) <- getSomeReg x
5128     return (Any FF32 (\dst -> code `snocOL` FxTOy FF64 FF32 src dst))
5129
5130 ------------
5131 coerceFlt2Dbl x = do
5132     (src, code) <- getSomeReg x
5133     return (Any FF64 (\dst -> code `snocOL` FxTOy FF32 FF64 src dst))
5134
5135 #endif /* sparc_TARGET_ARCH */
5136
5137 #if powerpc_TARGET_ARCH
5138 coerceInt2FP fromRep toRep x = do
5139     (src, code) <- getSomeReg x
5140     lbl <- getNewLabelNat
5141     itmp <- getNewRegNat II32
5142     ftmp <- getNewRegNat FF64
5143     dflags <- getDynFlagsNat
5144     dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
5145     Amode addr addr_code <- getAmode dynRef
5146     let
5147         code' dst = code `appOL` maybe_exts `appOL` toOL [
5148                 LDATA ReadOnlyData
5149                                 [CmmDataLabel lbl,
5150                                  CmmStaticLit (CmmInt 0x43300000 W32),
5151                                  CmmStaticLit (CmmInt 0x80000000 W32)],
5152                 XORIS itmp src (ImmInt 0x8000),
5153                 ST II32 itmp (spRel 3),
5154                 LIS itmp (ImmInt 0x4330),
5155                 ST II32 itmp (spRel 2),
5156                 LD FF64 ftmp (spRel 2)
5157             ] `appOL` addr_code `appOL` toOL [
5158                 LD FF64 dst addr,
5159                 FSUB FF64 dst ftmp dst
5160             ] `appOL` maybe_frsp dst
5161
5162         maybe_exts = case fromRep of
5163                         W8 ->  unitOL $ EXTS II8 src src
5164                         W16 -> unitOL $ EXTS II16 src src
5165                         W32 -> nilOL
5166         maybe_frsp dst = case toRep of
5167                         W32 -> unitOL $ FRSP dst dst
5168                         W64 -> nilOL
5169     return (Any (floatSize toRep) code')
5170
5171 coerceFP2Int fromRep toRep x = do
5172     -- the reps don't really matter: F*->FF64 and II32->I* are no-ops
5173     (src, code) <- getSomeReg x
5174     tmp <- getNewRegNat FF64
5175     let
5176         code' dst = code `appOL` toOL [
5177                 -- convert to int in FP reg
5178             FCTIWZ tmp src,
5179                 -- store value (64bit) from FP to stack
5180             ST FF64 tmp (spRel 2),
5181                 -- read low word of value (high word is undefined)
5182             LD II32 dst (spRel 3)]
5183     return (Any (intSize toRep) code')
5184 #endif /* powerpc_TARGET_ARCH */
5185
5186
5187 -- -----------------------------------------------------------------------------
5188 -- eXTRA_STK_ARGS_HERE
5189
5190 -- We (allegedly) put the first six C-call arguments in registers;
5191 -- where do we start putting the rest of them?
5192
5193 -- Moved from MachInstrs (SDM):
5194
5195 #if alpha_TARGET_ARCH || sparc_TARGET_ARCH
5196 eXTRA_STK_ARGS_HERE :: Int
5197 eXTRA_STK_ARGS_HERE
5198   = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???))
5199 #endif