compiler/nativeGen/MachCodeGen.hs

   1 {-# OPTIONS -w #-}
   2 -- The above warning supression flag is a temporary kludge.
   3 -- While working on this module you are encouraged to remove it and fix
   4 -- any warnings in the module. See
   5 --     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
   6 -- for details
   7
   8 -----------------------------------------------------------------------------
   9 --
  10 -- Generating machine code (instruction selection)
  11 --
  12 -- (c) The University of Glasgow 1996-2004
  13 --
  14 -----------------------------------------------------------------------------
  15
  16 -- This is a big module, but, if you pay attention to
  17 -- (a) the sectioning, (b) the type signatures, and
  18 -- (c) the #if blah_TARGET_ARCH} things, the
  19 -- structure should not be too overwhelming.
  20
  21 module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where
  22
  23 #include "HsVersions.h"
  24 #include "nativeGen/NCG.h"
  25 #include "MachDeps.h"
  26
  27 -- NCG stuff:
  28 import MachInstrs
  29 import MachRegs
  30 import NCGMonad
  31 import PositionIndependentCode
  32 import RegAllocInfo ( mkBranchInstr )
  33
  34 -- Our intermediate code:
  35 import PprCmm           ( pprExpr )
  36 import Cmm
  37 import MachOp
  38 import CLabel
  39 import ClosureInfo      ( C_SRT(..) )
  40
  41 -- The rest:
  42 import StaticFlags      ( opt_PIC )
  43 import ForeignCall      ( CCallConv(..) )
  44 import OrdList
  45 import Pretty
  46 import Outputable
  47 import FastString
  48 import FastTypes        ( isFastTrue )
  49 import Constants        ( wORD_SIZE )
  50
  51 #ifdef DEBUG
  52 import Outputable       ( assertPanic )
  53 import Debug.Trace      ( trace )
  54 #endif
  55 import Debug.Trace      ( trace )
  56
  57 import Control.Monad    ( mapAndUnzipM )
  58 import Data.Maybe       ( fromJust )
  59 import Data.Bits
  60 import Data.Word
  61 import Data.Int
  62
  63 -- -----------------------------------------------------------------------------
  64 -- Top-level of the instruction selector
  65
  66 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
  67 -- They are really trees of insns to facilitate fast appending, where a
  68 -- left-to-right traversal (pre-order?) yields the insns in the correct
  69 -- order.
  70
  71 type InstrBlock = OrdList Instr
  72
  73 cmmTopCodeGen :: RawCmmTop -> NatM [NatCmmTop]
  74 cmmTopCodeGen (CmmProc info lab params (ListGraph blocks)) = do
  75   (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
  76   picBaseMb <- getPicBaseMaybeNat
  77   let proc = CmmProc info lab params (ListGraph $ concat nat_blocks)
  78       tops = proc : concat statics
  79   case picBaseMb of
  80       Just picBase -> initializePicBase picBase tops
  81       Nothing -> return tops
  82
  83 cmmTopCodeGen (CmmData sec dat) = do
  84   return [CmmData sec dat]  -- no translation, we just use CmmStatic
  85
  86 basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop])
  87 basicBlockCodeGen (BasicBlock id stmts) = do
  88   instrs <- stmtsToInstrs stmts
  89   -- code generation may introduce new basic block boundaries, which
  90   -- are indicated by the NEWBLOCK instruction.  We must split up the
  91   -- instruction stream into basic blocks again.  Also, we extract
  92   -- LDATAs here too.
  93   let
  94         (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
  95
  96         mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
  97           = ([], BasicBlock id instrs : blocks, statics)
  98         mkBlocks (LDATA sec dat) (instrs,blocks,statics)
  99           = (instrs, blocks, CmmData sec dat:statics)
 100         mkBlocks instr (instrs,blocks,statics)
 101           = (instr:instrs, blocks, statics)
 102   -- in
 103   return (BasicBlock id top : other_blocks, statics)
 104
 105 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
 106 stmtsToInstrs stmts
 107    = do instrss <- mapM stmtToInstrs stmts
 108         return (concatOL instrss)
 109
 110 stmtToInstrs :: CmmStmt -> NatM InstrBlock
 111 stmtToInstrs stmt = case stmt of
 112     CmmNop         -> return nilOL
 113     CmmComment s   -> return (unitOL (COMMENT s))
 114
 115     CmmAssign reg src
 116       | isFloatingRep kind -> assignReg_FltCode kind reg src
 117 #if WORD_SIZE_IN_BITS==32
 118       | kind == I64        -> assignReg_I64Code      reg src
 119 #endif
 120       | otherwise          -> assignReg_IntCode kind reg src
 121         where kind = cmmRegRep reg
 122
 123     CmmStore addr src
 124       | isFloatingRep kind -> assignMem_FltCode kind addr src
 125 #if WORD_SIZE_IN_BITS==32
 126       | kind == I64      -> assignMem_I64Code      addr src
 127 #endif
 128       | otherwise        -> assignMem_IntCode kind addr src
 129         where kind = cmmExprRep src
 130
 131     CmmCall target result_regs args _ _
 132        -> genCCall target result_regs args
 133
 134     CmmBranch id          -> genBranch id
 135     CmmCondBranch arg id  -> genCondJump id arg
 136     CmmSwitch arg ids     -> genSwitch arg ids
 137     CmmJump arg params    -> genJump arg
 138
 139 -- -----------------------------------------------------------------------------
 140 -- General things for putting together code sequences
 141
 142 -- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
 143 -- CmmExprs into CmmRegOff?
 144 mangleIndexTree :: CmmExpr -> CmmExpr
 145 mangleIndexTree (CmmRegOff reg off)
 146   = CmmMachOp (MO_Add rep) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) rep)]
 147   where rep = cmmRegRep reg
 148
 149 -- -----------------------------------------------------------------------------
 150 --  Code gen for 64-bit arithmetic on 32-bit platforms
 151
 152 {-
 153 Simple support for generating 64-bit code (ie, 64 bit values and 64
 154 bit assignments) on 32-bit platforms.  Unlike the main code generator
 155 we merely shoot for generating working code as simply as possible, and
 156 pay little attention to code quality.  Specifically, there is no
 157 attempt to deal cleverly with the fixed-vs-floating register
 158 distinction; all values are generated into (pairs of) floating
 159 registers, even if this would mean some redundant reg-reg moves as a
 160 result.  Only one of the VRegUniques is returned, since it will be
 161 of the VRegUniqueLo form, and the upper-half VReg can be determined
 162 by applying getHiVRegFromLo to it.
 163 -}
 164
 165 data ChildCode64        -- a.k.a "Register64"
 166    = ChildCode64
 167         InstrBlock      -- code
 168         Reg             -- the lower 32-bit temporary which contains the
 169                         -- result; use getHiVRegFromLo to find the other
 170                         -- VRegUnique.  Rules of this simplified insn
 171                         -- selection game are therefore that the returned
 172                         -- Reg may be modified
 173
 174 #if WORD_SIZE_IN_BITS==32
 175 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
 176 assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
 177 #endif
 178
 179 #ifndef x86_64_TARGET_ARCH
 180 iselExpr64        :: CmmExpr -> NatM ChildCode64
 181 #endif
 182
 183 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 184
 185 #if i386_TARGET_ARCH
 186
 187 assignMem_I64Code addrTree valueTree = do
 188   Amode addr addr_code <- getAmode addrTree
 189   ChildCode64 vcode rlo <- iselExpr64 valueTree
 190   let
 191         rhi = getHiVRegFromLo rlo
 192
 193         -- Little-endian store
 194         mov_lo = MOV I32 (OpReg rlo) (OpAddr addr)
 195         mov_hi = MOV I32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
 196   -- in
 197   return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 198
 199
 200 assignReg_I64Code (CmmLocal (LocalReg u_dst pk _)) valueTree = do
 201    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 202    let
 203          r_dst_lo = mkVReg u_dst I32
 204          r_dst_hi = getHiVRegFromLo r_dst_lo
 205          r_src_hi = getHiVRegFromLo r_src_lo
 206          mov_lo = MOV I32 (OpReg r_src_lo) (OpReg r_dst_lo)
 207          mov_hi = MOV I32 (OpReg r_src_hi) (OpReg r_dst_hi)
 208    -- in
 209    return (
 210         vcode `snocOL` mov_lo `snocOL` mov_hi
 211      )
 212
 213 assignReg_I64Code lvalue valueTree
 214    = panic "assignReg_I64Code(i386): invalid lvalue"
 215
 216 ------------
 217
 218 iselExpr64 (CmmLit (CmmInt i _)) = do
 219   (rlo,rhi) <- getNewRegPairNat I32
 220   let
 221         r = fromIntegral (fromIntegral i :: Word32)
 222         q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
 223         code = toOL [
 224                 MOV I32 (OpImm (ImmInteger r)) (OpReg rlo),
 225                 MOV I32 (OpImm (ImmInteger q)) (OpReg rhi)
 226                 ]
 227   -- in
 228   return (ChildCode64 code rlo)
 229
 230 iselExpr64 (CmmLoad addrTree I64) = do
 231    Amode addr addr_code <- getAmode addrTree
 232    (rlo,rhi) <- getNewRegPairNat I32
 233    let
 234         mov_lo = MOV I32 (OpAddr addr) (OpReg rlo)
 235         mov_hi = MOV I32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
 236    -- in
 237    return (
 238             ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
 239                         rlo
 240      )
 241
 242 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64 _)))
 243    = return (ChildCode64 nilOL (mkVReg vu I32))
 244
 245 -- we handle addition, but rather badly
 246 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
 247    ChildCode64 code1 r1lo <- iselExpr64 e1
 248    (rlo,rhi) <- getNewRegPairNat I32
 249    let
 250         r = fromIntegral (fromIntegral i :: Word32)
 251         q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
 252         r1hi = getHiVRegFromLo r1lo
 253         code =  code1 `appOL`
 254                 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
 255                        ADD I32 (OpImm (ImmInteger r)) (OpReg rlo),
 256                        MOV I32 (OpReg r1hi) (OpReg rhi),
 257                        ADC I32 (OpImm (ImmInteger q)) (OpReg rhi) ]
 258    -- in
 259    return (ChildCode64 code rlo)
 260
 261 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
 262    ChildCode64 code1 r1lo <- iselExpr64 e1
 263    ChildCode64 code2 r2lo <- iselExpr64 e2
 264    (rlo,rhi) <- getNewRegPairNat I32
 265    let
 266         r1hi = getHiVRegFromLo r1lo
 267         r2hi = getHiVRegFromLo r2lo
 268         code =  code1 `appOL`
 269                 code2 `appOL`
 270                 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
 271                        ADD I32 (OpReg r2lo) (OpReg rlo),
 272                        MOV I32 (OpReg r1hi) (OpReg rhi),
 273                        ADC I32 (OpReg r2hi) (OpReg rhi) ]
 274    -- in
 275    return (ChildCode64 code rlo)
 276
 277 iselExpr64 (CmmMachOp (MO_U_Conv _ I64) [expr]) = do
 278      fn <- getAnyReg expr
 279      r_dst_lo <-  getNewRegNat I32
 280      let r_dst_hi = getHiVRegFromLo r_dst_lo
 281          code = fn r_dst_lo
 282      return (
 283              ChildCode64 (code `snocOL`
 284                           MOV I32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
 285                           r_dst_lo
 286             )
 287
 288 iselExpr64 expr
 289    = pprPanic "iselExpr64(i386)" (ppr expr)
 290
 291 #endif /* i386_TARGET_ARCH */
 292
 293 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 294
 295 #if sparc_TARGET_ARCH
 296
 297 assignMem_I64Code addrTree valueTree = do
 298      Amode addr addr_code <- getAmode addrTree
 299      ChildCode64 vcode rlo <- iselExpr64 valueTree
 300      (src, code) <- getSomeReg addrTree
 301      let
 302          rhi = getHiVRegFromLo rlo
 303          -- Big-endian store
 304          mov_hi = ST I32 rhi (AddrRegImm src (ImmInt 0))
 305          mov_lo = ST I32 rlo (AddrRegImm src (ImmInt 4))
 306      return (vcode `appOL` code `snocOL` mov_hi `snocOL` mov_lo)
 307
 308 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
 309      ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 310      let
 311          r_dst_lo = mkVReg u_dst pk
 312          r_dst_hi = getHiVRegFromLo r_dst_lo
 313          r_src_hi = getHiVRegFromLo r_src_lo
 314          mov_lo = mkMOV r_src_lo r_dst_lo
 315          mov_hi = mkMOV r_src_hi r_dst_hi
 316          mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
 317      return (vcode `snocOL` mov_hi `snocOL` mov_lo)
 318 assignReg_I64Code lvalue valueTree
 319    = panic "assignReg_I64Code(sparc): invalid lvalue"
 320
 321
 322 -- Don't delete this -- it's very handy for debugging.
 323 --iselExpr64 expr
 324 --   | trace ("iselExpr64: " ++ showSDoc (ppr expr)) False
 325 --   = panic "iselExpr64(???)"
 326
 327 iselExpr64 (CmmLoad addrTree I64) = do
 328      Amode (AddrRegReg r1 r2) addr_code <- getAmode addrTree
 329      rlo <- getNewRegNat I32
 330      let rhi = getHiVRegFromLo rlo
 331          mov_hi = LD I32 (AddrRegImm r1 (ImmInt 0)) rhi
 332          mov_lo = LD I32 (AddrRegImm r1 (ImmInt 4)) rlo
 333      return (
 334             ChildCode64 (addr_code `snocOL` mov_hi `snocOL` mov_lo)
 335                          rlo
 336           )
 337
 338 iselExpr64 (CmmReg (CmmLocal (LocalReg uq I64))) = do
 339      r_dst_lo <-  getNewRegNat I32
 340      let r_dst_hi = getHiVRegFromLo r_dst_lo
 341          r_src_lo = mkVReg uq I32
 342          r_src_hi = getHiVRegFromLo r_src_lo
 343          mov_lo = mkMOV r_src_lo r_dst_lo
 344          mov_hi = mkMOV r_src_hi r_dst_hi
 345          mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
 346      return (
 347             ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo
 348          )
 349
 350 iselExpr64 expr
 351    = pprPanic "iselExpr64(sparc)" (ppr expr)
 352
 353 #endif /* sparc_TARGET_ARCH */
 354
 355 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 356
 357 #if powerpc_TARGET_ARCH
 358
 359 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
 360 getI64Amodes addrTree = do
 361     Amode hi_addr addr_code <- getAmode addrTree
 362     case addrOffset hi_addr 4 of
 363         Just lo_addr -> return (hi_addr, lo_addr, addr_code)
 364         Nothing      -> do (hi_ptr, code) <- getSomeReg addrTree
 365                            return (AddrRegImm hi_ptr (ImmInt 0),
 366                                    AddrRegImm hi_ptr (ImmInt 4),
 367                                    code)
 368
 369 assignMem_I64Code addrTree valueTree = do
 370         (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
 371         ChildCode64 vcode rlo <- iselExpr64 valueTree
 372         let
 373                 rhi = getHiVRegFromLo rlo
 374
 375                 -- Big-endian store
 376                 mov_hi = ST I32 rhi hi_addr
 377                 mov_lo = ST I32 rlo lo_addr
 378         -- in
 379         return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 380
 381 assignReg_I64Code (CmmLocal (LocalReg u_dst pk _)) valueTree = do
 382    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 383    let
 384          r_dst_lo = mkVReg u_dst I32
 385          r_dst_hi = getHiVRegFromLo r_dst_lo
 386          r_src_hi = getHiVRegFromLo r_src_lo
 387          mov_lo = MR r_dst_lo r_src_lo
 388          mov_hi = MR r_dst_hi r_src_hi
 389    -- in
 390    return (
 391         vcode `snocOL` mov_lo `snocOL` mov_hi
 392      )
 393
 394 assignReg_I64Code lvalue valueTree
 395    = panic "assignReg_I64Code(powerpc): invalid lvalue"
 396
 397
 398 -- Don't delete this -- it's very handy for debugging.
 399 --iselExpr64 expr
 400 --   | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
 401 --   = panic "iselExpr64(???)"
 402
 403 iselExpr64 (CmmLoad addrTree I64) = do
 404     (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
 405     (rlo, rhi) <- getNewRegPairNat I32
 406     let mov_hi = LD I32 rhi hi_addr
 407         mov_lo = LD I32 rlo lo_addr
 408     return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
 409                          rlo
 410
 411 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64 _)))
 412    = return (ChildCode64 nilOL (mkVReg vu I32))
 413
 414 iselExpr64 (CmmLit (CmmInt i _)) = do
 415   (rlo,rhi) <- getNewRegPairNat I32
 416   let
 417         half0 = fromIntegral (fromIntegral i :: Word16)
 418         half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
 419         half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
 420         half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
 421
 422         code = toOL [
 423                 LIS rlo (ImmInt half1),
 424                 OR rlo rlo (RIImm $ ImmInt half0),
 425                 LIS rhi (ImmInt half3),
 426                 OR rlo rlo (RIImm $ ImmInt half2)
 427                 ]
 428   -- in
 429   return (ChildCode64 code rlo)
 430
 431 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
 432    ChildCode64 code1 r1lo <- iselExpr64 e1
 433    ChildCode64 code2 r2lo <- iselExpr64 e2
 434    (rlo,rhi) <- getNewRegPairNat I32
 435    let
 436         r1hi = getHiVRegFromLo r1lo
 437         r2hi = getHiVRegFromLo r2lo
 438         code =  code1 `appOL`
 439                 code2 `appOL`
 440                 toOL [ ADDC rlo r1lo r2lo,
 441                        ADDE rhi r1hi r2hi ]
 442    -- in
 443    return (ChildCode64 code rlo)
 444
 445 iselExpr64 (CmmMachOp (MO_U_Conv I32 I64) [expr]) = do
 446     (expr_reg,expr_code) <- getSomeReg expr
 447     (rlo, rhi) <- getNewRegPairNat I32
 448     let mov_hi = LI rhi (ImmInt 0)
 449         mov_lo = MR rlo expr_reg
 450     return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
 451                          rlo
 452 iselExpr64 expr
 453    = pprPanic "iselExpr64(powerpc)" (ppr expr)
 454
 455 #endif /* powerpc_TARGET_ARCH */
 456
 457
 458 -- -----------------------------------------------------------------------------
 459 -- The 'Register' type
 460
 461 -- 'Register's passed up the tree.  If the stix code forces the register
 462 -- to live in a pre-decided machine register, it comes out as @Fixed@;
 463 -- otherwise, it comes out as @Any@, and the parent can decide which
 464 -- register to put it in.
 465
 466 data Register
 467   = Fixed   MachRep Reg InstrBlock
 468   | Any     MachRep (Reg -> InstrBlock)
 469
 470 swizzleRegisterRep :: Register -> MachRep -> Register
 471 swizzleRegisterRep (Fixed _ reg code) rep = Fixed rep reg code
 472 swizzleRegisterRep (Any _ codefn)     rep = Any rep codefn
 473
 474
 475 -- -----------------------------------------------------------------------------
 476 -- Utils based on getRegister, below
 477
 478 -- The dual to getAnyReg: compute an expression into a register, but
 479 -- we don't mind which one it is.
 480 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
 481 getSomeReg expr = do
 482   r <- getRegister expr
 483   case r of
 484     Any rep code -> do
 485         tmp <- getNewRegNat rep
 486         return (tmp, code tmp)
 487     Fixed _ reg code ->
 488         return (reg, code)
 489
 490 -- -----------------------------------------------------------------------------
 491 -- Grab the Reg for a CmmReg
 492
 493 getRegisterReg :: CmmReg -> Reg
 494
 495 getRegisterReg (CmmLocal (LocalReg u pk _))
 496   = mkVReg u pk
 497
 498 getRegisterReg (CmmGlobal mid)
 499   = case get_GlobalReg_reg_or_addr mid of
 500        Left (RealReg rrno) -> RealReg rrno
 501        _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
 502           -- By this stage, the only MagicIds remaining should be the
 503           -- ones which map to a real machine register on this
 504           -- platform.  Hence ...
 505
 506
 507 -- -----------------------------------------------------------------------------
 508 -- Generate code to get a subtree into a Register
 509
 510 -- Don't delete this -- it's very handy for debugging.
 511 --getRegister expr
 512 --   | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False
 513 --   = panic "getRegister(???)"
 514
 515 getRegister :: CmmExpr -> NatM Register
 516
 517 #if !x86_64_TARGET_ARCH
 518     -- on x86_64, we have %rip for PicBaseReg, but it's not a full-featured
 519     -- register, it can only be used for rip-relative addressing.
 520 getRegister (CmmReg (CmmGlobal PicBaseReg))
 521   = do
 522       reg <- getPicBaseNat wordRep
 523       return (Fixed wordRep reg nilOL)
 524 #endif
 525
 526 getRegister (CmmReg reg)
 527   = return (Fixed (cmmRegRep reg) (getRegisterReg reg) nilOL)
 528
 529 getRegister tree@(CmmRegOff _ _)
 530   = getRegister (mangleIndexTree tree)
 531
 532
 533 #if WORD_SIZE_IN_BITS==32
 534     -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
 535     -- TO_W_(x), TO_W_(x >> 32)
 536
 537 getRegister (CmmMachOp (MO_U_Conv I64 I32)
 538              [CmmMachOp (MO_U_Shr I64) [x,CmmLit (CmmInt 32 _)]]) = do
 539   ChildCode64 code rlo <- iselExpr64 x
 540   return $ Fixed I32 (getHiVRegFromLo rlo) code
 541
 542 getRegister (CmmMachOp (MO_S_Conv I64 I32)
 543              [CmmMachOp (MO_U_Shr I64) [x,CmmLit (CmmInt 32 _)]]) = do
 544   ChildCode64 code rlo <- iselExpr64 x
 545   return $ Fixed I32 (getHiVRegFromLo rlo) code
 546
 547 getRegister (CmmMachOp (MO_U_Conv I64 I32) [x]) = do
 548   ChildCode64 code rlo <- iselExpr64 x
 549   return $ Fixed I32 rlo code
 550
 551 getRegister (CmmMachOp (MO_S_Conv I64 I32) [x]) = do
 552   ChildCode64 code rlo <- iselExpr64 x
 553   return $ Fixed I32 rlo code
 554
 555 #endif
 556
 557 -- end of machine-"independent" bit; here we go on the rest...
 558
 559 #if alpha_TARGET_ARCH
 560
 561 getRegister (StDouble d)
 562   = getBlockIdNat                   `thenNat` \ lbl ->
 563     getNewRegNat PtrRep             `thenNat` \ tmp ->
 564     let code dst = mkSeqInstrs [
 565             LDATA RoDataSegment lbl [
 566                     DATA TF [ImmLab (rational d)]
 567                 ],
 568             LDA tmp (AddrImm (ImmCLbl lbl)),
 569             LD TF dst (AddrReg tmp)]
 570     in
 571         return (Any F64 code)
 572
 573 getRegister (StPrim primop [x]) -- unary PrimOps
 574   = case primop of
 575       IntNegOp -> trivialUCode (NEG Q False) x
 576
 577       NotOp    -> trivialUCode NOT x
 578
 579       FloatNegOp  -> trivialUFCode FloatRep  (FNEG TF) x
 580       DoubleNegOp -> trivialUFCode F64 (FNEG TF) x
 581
 582       OrdOp -> coerceIntCode IntRep x
 583       ChrOp -> chrCode x
 584
 585       Float2IntOp  -> coerceFP2Int    x
 586       Int2FloatOp  -> coerceInt2FP pr x
 587       Double2IntOp -> coerceFP2Int    x
 588       Int2DoubleOp -> coerceInt2FP pr x
 589
 590       Double2FloatOp -> coerceFltCode x
 591       Float2DoubleOp -> coerceFltCode x
 592
 593       other_op -> getRegister (StCall fn CCallConv F64 [x])
 594         where
 595           fn = case other_op of
 596                  FloatExpOp    -> FSLIT("exp")
 597                  FloatLogOp    -> FSLIT("log")
 598                  FloatSqrtOp   -> FSLIT("sqrt")
 599                  FloatSinOp    -> FSLIT("sin")
 600                  FloatCosOp    -> FSLIT("cos")
 601                  FloatTanOp    -> FSLIT("tan")
 602                  FloatAsinOp   -> FSLIT("asin")
 603                  FloatAcosOp   -> FSLIT("acos")
 604                  FloatAtanOp   -> FSLIT("atan")
 605                  FloatSinhOp   -> FSLIT("sinh")
 606                  FloatCoshOp   -> FSLIT("cosh")
 607                  FloatTanhOp   -> FSLIT("tanh")
 608                  DoubleExpOp   -> FSLIT("exp")
 609                  DoubleLogOp   -> FSLIT("log")
 610                  DoubleSqrtOp  -> FSLIT("sqrt")
 611                  DoubleSinOp   -> FSLIT("sin")
 612                  DoubleCosOp   -> FSLIT("cos")
 613                  DoubleTanOp   -> FSLIT("tan")
 614                  DoubleAsinOp  -> FSLIT("asin")
 615                  DoubleAcosOp  -> FSLIT("acos")
 616                  DoubleAtanOp  -> FSLIT("atan")
 617                  DoubleSinhOp  -> FSLIT("sinh")
 618                  DoubleCoshOp  -> FSLIT("cosh")
 619                  DoubleTanhOp  -> FSLIT("tanh")
 620   where
 621     pr = panic "MachCode.getRegister: no primrep needed for Alpha"
 622
 623 getRegister (StPrim primop [x, y]) -- dyadic PrimOps
 624   = case primop of
 625       CharGtOp -> trivialCode (CMP LTT) y x
 626       CharGeOp -> trivialCode (CMP LE) y x
 627       CharEqOp -> trivialCode (CMP EQQ) x y
 628       CharNeOp -> int_NE_code x y
 629       CharLtOp -> trivialCode (CMP LTT) x y
 630       CharLeOp -> trivialCode (CMP LE) x y
 631
 632       IntGtOp  -> trivialCode (CMP LTT) y x
 633       IntGeOp  -> trivialCode (CMP LE) y x
 634       IntEqOp  -> trivialCode (CMP EQQ) x y
 635       IntNeOp  -> int_NE_code x y
 636       IntLtOp  -> trivialCode (CMP LTT) x y
 637       IntLeOp  -> trivialCode (CMP LE) x y
 638
 639       WordGtOp -> trivialCode (CMP ULT) y x
 640       WordGeOp -> trivialCode (CMP ULE) x y
 641       WordEqOp -> trivialCode (CMP EQQ)  x y
 642       WordNeOp -> int_NE_code x y
 643       WordLtOp -> trivialCode (CMP ULT) x y
 644       WordLeOp -> trivialCode (CMP ULE) x y
 645
 646       AddrGtOp -> trivialCode (CMP ULT) y x
 647       AddrGeOp -> trivialCode (CMP ULE) y x
 648       AddrEqOp -> trivialCode (CMP EQQ)  x y
 649       AddrNeOp -> int_NE_code x y
 650       AddrLtOp -> trivialCode (CMP ULT) x y
 651       AddrLeOp -> trivialCode (CMP ULE) x y
 652
 653       FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
 654       FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
 655       FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
 656       FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
 657       FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
 658       FloatLeOp -> cmpF_code (FCMP TF LE) NE x y
 659
 660       DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
 661       DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
 662       DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
 663       DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
 664       DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
 665       DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y
 666
 667       IntAddOp  -> trivialCode (ADD Q False) x y
 668       IntSubOp  -> trivialCode (SUB Q False) x y
 669       IntMulOp  -> trivialCode (MUL Q False) x y
 670       IntQuotOp -> trivialCode (DIV Q False) x y
 671       IntRemOp  -> trivialCode (REM Q False) x y
 672
 673       WordAddOp  -> trivialCode (ADD Q False) x y
 674       WordSubOp  -> trivialCode (SUB Q False) x y
 675       WordMulOp  -> trivialCode (MUL Q False) x y
 676       WordQuotOp -> trivialCode (DIV Q True) x y
 677       WordRemOp  -> trivialCode (REM Q True) x y
 678
 679       FloatAddOp -> trivialFCode  FloatRep (FADD TF) x y
 680       FloatSubOp -> trivialFCode  FloatRep (FSUB TF) x y
 681       FloatMulOp -> trivialFCode  FloatRep (FMUL TF) x y
 682       FloatDivOp -> trivialFCode  FloatRep (FDIV TF) x y
 683
 684       DoubleAddOp -> trivialFCode  F64 (FADD TF) x y
 685       DoubleSubOp -> trivialFCode  F64 (FSUB TF) x y
 686       DoubleMulOp -> trivialFCode  F64 (FMUL TF) x y
 687       DoubleDivOp -> trivialFCode  F64 (FDIV TF) x y
 688
 689       AddrAddOp  -> trivialCode (ADD Q False) x y
 690       AddrSubOp  -> trivialCode (SUB Q False) x y
 691       AddrRemOp  -> trivialCode (REM Q True) x y
 692
 693       AndOp  -> trivialCode AND x y
 694       OrOp   -> trivialCode OR  x y
 695       XorOp  -> trivialCode XOR x y
 696       SllOp  -> trivialCode SLL x y
 697       SrlOp  -> trivialCode SRL x y
 698
 699       ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
 700       ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
 701       ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"
 702
 703       FloatPowerOp  -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
 704       DoublePowerOp -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
 705   where
 706     {- ------------------------------------------------------------
 707         Some bizarre special code for getting condition codes into
 708         registers.  Integer non-equality is a test for equality
 709         followed by an XOR with 1.  (Integer comparisons always set
 710         the result register to 0 or 1.)  Floating point comparisons of
 711         any kind leave the result in a floating point register, so we
 712         need to wrangle an integer register out of things.
 713     -}
 714     int_NE_code :: StixTree -> StixTree -> NatM Register
 715
 716     int_NE_code x y
 717       = trivialCode (CMP EQQ) x y       `thenNat` \ register ->
 718         getNewRegNat IntRep             `thenNat` \ tmp ->
 719         let
 720             code = registerCode register tmp
 721             src  = registerName register tmp
 722             code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
 723         in
 724         return (Any IntRep code__2)
 725
 726     {- ------------------------------------------------------------
 727         Comments for int_NE_code also apply to cmpF_code
 728     -}
 729     cmpF_code
 730         :: (Reg -> Reg -> Reg -> Instr)
 731         -> Cond
 732         -> StixTree -> StixTree
 733         -> NatM Register
 734
 735     cmpF_code instr cond x y
 736       = trivialFCode pr instr x y       `thenNat` \ register ->
 737         getNewRegNat F64                `thenNat` \ tmp ->
 738         getBlockIdNat                   `thenNat` \ lbl ->
 739         let
 740             code = registerCode register tmp
 741             result  = registerName register tmp
 742
 743             code__2 dst = code . mkSeqInstrs [
 744                 OR zeroh (RIImm (ImmInt 1)) dst,
 745                 BF cond  result (ImmCLbl lbl),
 746                 OR zeroh (RIReg zeroh) dst,
 747                 NEWBLOCK lbl]
 748         in
 749         return (Any IntRep code__2)
 750       where
 751         pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
 752       ------------------------------------------------------------
 753
 754 getRegister (CmmLoad pk mem)
 755   = getAmode mem                    `thenNat` \ amode ->
 756     let
 757         code = amodeCode amode
 758         src   = amodeAddr amode
 759         size = primRepToSize pk
 760         code__2 dst = code . mkSeqInstr (LD size dst src)
 761     in
 762     return (Any pk code__2)
 763
 764 getRegister (StInt i)
 765   | fits8Bits i
 766   = let
 767         code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
 768     in
 769     return (Any IntRep code)
 770   | otherwise
 771   = let
 772         code dst = mkSeqInstr (LDI Q dst src)
 773     in
 774     return (Any IntRep code)
 775   where
 776     src = ImmInt (fromInteger i)
 777
 778 getRegister leaf
 779   | isJust imm
 780   = let
 781         code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
 782     in
 783     return (Any PtrRep code)
 784   where
 785     imm = maybeImm leaf
 786     imm__2 = case imm of Just x -> x
 787
 788 #endif /* alpha_TARGET_ARCH */
 789
 790 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 791
 792 #if i386_TARGET_ARCH
 793
 794 getRegister (CmmLit (CmmFloat f F32)) = do
 795     lbl <- getNewLabelNat
 796     dflags <- getDynFlagsNat
 797     dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
 798     Amode addr addr_code <- getAmode dynRef
 799     let code dst =
 800             LDATA ReadOnlyData
 801                         [CmmDataLabel lbl,
 802                          CmmStaticLit (CmmFloat f F32)]
 803             `consOL` (addr_code `snocOL`
 804             GLD F32 addr dst)
 805     -- in
 806     return (Any F32 code)
 807
 808
 809 getRegister (CmmLit (CmmFloat d F64))
 810   | d == 0.0
 811   = let code dst = unitOL (GLDZ dst)
 812     in  return (Any F64 code)
 813
 814   | d == 1.0
 815   = let code dst = unitOL (GLD1 dst)
 816     in  return (Any F64 code)
 817
 818   | otherwise = do
 819     lbl <- getNewLabelNat
 820     dflags <- getDynFlagsNat
 821     dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
 822     Amode addr addr_code <- getAmode dynRef
 823     let code dst =
 824             LDATA ReadOnlyData
 825                         [CmmDataLabel lbl,
 826                          CmmStaticLit (CmmFloat d F64)]
 827             `consOL` (addr_code `snocOL`
 828             GLD F64 addr dst)
 829     -- in
 830     return (Any F64 code)
 831
 832 #endif /* i386_TARGET_ARCH */
 833
 834 #if x86_64_TARGET_ARCH
 835
 836 getRegister (CmmLit (CmmFloat 0.0 rep)) = do
 837    let code dst = unitOL  (XOR rep (OpReg dst) (OpReg dst))
 838         -- I don't know why there are xorpd, xorps, and pxor instructions.
 839         -- They all appear to do the same thing --SDM
 840    return (Any rep code)
 841
 842 getRegister (CmmLit (CmmFloat f rep)) = do
 843     lbl <- getNewLabelNat
 844     let code dst = toOL [
 845             LDATA ReadOnlyData
 846                         [CmmDataLabel lbl,
 847                          CmmStaticLit (CmmFloat f rep)],
 848             MOV rep (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 849             ]
 850     -- in
 851     return (Any rep code)
 852
 853 #endif /* x86_64_TARGET_ARCH */
 854
 855 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
 856
 857 -- catch simple cases of zero- or sign-extended load
 858 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad addr _]) = do
 859   code <- intLoadCode (MOVZxL I8) addr
 860   return (Any I32 code)
 861
 862 getRegister (CmmMachOp (MO_S_Conv I8 I32) [CmmLoad addr _]) = do
 863   code <- intLoadCode (MOVSxL I8) addr
 864   return (Any I32 code)
 865
 866 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad addr _]) = do
 867   code <- intLoadCode (MOVZxL I16) addr
 868   return (Any I32 code)
 869
 870 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad addr _]) = do
 871   code <- intLoadCode (MOVSxL I16) addr
 872   return (Any I32 code)
 873
 874 #endif
 875
 876 #if x86_64_TARGET_ARCH
 877
 878 -- catch simple cases of zero- or sign-extended load
 879 getRegister (CmmMachOp (MO_U_Conv I8 I64) [CmmLoad addr _]) = do
 880   code <- intLoadCode (MOVZxL I8) addr
 881   return (Any I64 code)
 882
 883 getRegister (CmmMachOp (MO_S_Conv I8 I64) [CmmLoad addr _]) = do
 884   code <- intLoadCode (MOVSxL I8) addr
 885   return (Any I64 code)
 886
 887 getRegister (CmmMachOp (MO_U_Conv I16 I64) [CmmLoad addr _]) = do
 888   code <- intLoadCode (MOVZxL I16) addr
 889   return (Any I64 code)
 890
 891 getRegister (CmmMachOp (MO_S_Conv I16 I64) [CmmLoad addr _]) = do
 892   code <- intLoadCode (MOVSxL I16) addr
 893   return (Any I64 code)
 894
 895 getRegister (CmmMachOp (MO_U_Conv I32 I64) [CmmLoad addr _]) = do
 896   code <- intLoadCode (MOV I32) addr -- 32-bit loads zero-extend
 897   return (Any I64 code)
 898
 899 getRegister (CmmMachOp (MO_S_Conv I32 I64) [CmmLoad addr _]) = do
 900   code <- intLoadCode (MOVSxL I32) addr
 901   return (Any I64 code)
 902
 903 #endif
 904
 905 #if x86_64_TARGET_ARCH
 906 getRegister (CmmMachOp (MO_Add I64) [CmmReg (CmmGlobal PicBaseReg),
 907                                      CmmLit displacement])
 908     = return $ Any I64 (\dst -> unitOL $
 909         LEA I64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
 910 #endif
 911
 912 #if x86_64_TARGET_ARCH
 913 getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do
 914   x_code <- getAnyReg x
 915   lbl <- getNewLabelNat
 916   let
 917     code dst = x_code dst `appOL` toOL [
 918         -- This is how gcc does it, so it can't be that bad:
 919         LDATA ReadOnlyData16 [
 920                 CmmAlign 16,
 921                 CmmDataLabel lbl,
 922                 CmmStaticLit (CmmInt 0x80000000 I32),
 923                 CmmStaticLit (CmmInt 0 I32),
 924                 CmmStaticLit (CmmInt 0 I32),
 925                 CmmStaticLit (CmmInt 0 I32)
 926         ],
 927         XOR F32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 928                 -- xorps, so we need the 128-bit constant
 929                 -- ToDo: rip-relative
 930         ]
 931   --
 932   return (Any F32 code)
 933
 934 getRegister (CmmMachOp (MO_S_Neg F64) [x]) = do
 935   x_code <- getAnyReg x
 936   lbl <- getNewLabelNat
 937   let
 938         -- This is how gcc does it, so it can't be that bad:
 939     code dst = x_code dst `appOL` toOL [
 940         LDATA ReadOnlyData16 [
 941                 CmmAlign 16,
 942                 CmmDataLabel lbl,
 943                 CmmStaticLit (CmmInt 0x8000000000000000 I64),
 944                 CmmStaticLit (CmmInt 0 I64)
 945         ],
 946                 -- gcc puts an unpck here.  Wonder if we need it.
 947         XOR F64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 948                 -- xorpd, so we need the 128-bit constant
 949         ]
 950   --
 951   return (Any F64 code)
 952 #endif
 953
 954 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
 955
 956 getRegister (CmmMachOp mop [x]) -- unary MachOps
 957   = case mop of
 958 #if i386_TARGET_ARCH
 959       MO_S_Neg F32 -> trivialUFCode F32 (GNEG F32) x
 960       MO_S_Neg F64 -> trivialUFCode F64 (GNEG F64) x
 961 #endif
 962
 963       MO_S_Neg rep -> trivialUCode rep (NEGI rep) x
 964       MO_Not rep   -> trivialUCode rep (NOT  rep) x
 965
 966       -- Nop conversions
 967       MO_U_Conv I32 I8  -> toI8Reg  I32 x
 968       MO_S_Conv I32 I8  -> toI8Reg  I32 x
 969       MO_U_Conv I16 I8  -> toI8Reg  I16 x
 970       MO_S_Conv I16 I8  -> toI8Reg  I16 x
 971       MO_U_Conv I32 I16 -> toI16Reg I32 x
 972       MO_S_Conv I32 I16 -> toI16Reg I32 x
 973 #if x86_64_TARGET_ARCH
 974       MO_U_Conv I64 I32 -> conversionNop I64 x
 975       MO_S_Conv I64 I32 -> conversionNop I64 x
 976       MO_U_Conv I64 I16 -> toI16Reg I64 x
 977       MO_S_Conv I64 I16 -> toI16Reg I64 x
 978       MO_U_Conv I64 I8  -> toI8Reg  I64 x
 979       MO_S_Conv I64 I8  -> toI8Reg  I64 x
 980 #endif
 981
 982       MO_U_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
 983       MO_S_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
 984
 985       -- widenings
 986       MO_U_Conv I8  I32 -> integerExtend I8  I32 MOVZxL x
 987       MO_U_Conv I16 I32 -> integerExtend I16 I32 MOVZxL x
 988       MO_U_Conv I8  I16 -> integerExtend I8  I16 MOVZxL x
 989
 990       MO_S_Conv I8  I32 -> integerExtend I8  I32 MOVSxL x
 991       MO_S_Conv I16 I32 -> integerExtend I16 I32 MOVSxL x
 992       MO_S_Conv I8  I16 -> integerExtend I8  I16 MOVSxL x
 993
 994 #if x86_64_TARGET_ARCH
 995       MO_U_Conv I8  I64 -> integerExtend I8  I64 MOVZxL x
 996       MO_U_Conv I16 I64 -> integerExtend I16 I64 MOVZxL x
 997       MO_U_Conv I32 I64 -> integerExtend I32 I64 MOVZxL x
 998       MO_S_Conv I8  I64 -> integerExtend I8  I64 MOVSxL x
 999       MO_S_Conv I16 I64 -> integerExtend I16 I64 MOVSxL x
1000       MO_S_Conv I32 I64 -> integerExtend I32 I64 MOVSxL x
1001         -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
1002         -- However, we don't want the register allocator to throw it
1003         -- away as an unnecessary reg-to-reg move, so we keep it in
1004         -- the form of a movzl and print it as a movl later.
1005 #endif
1006
1007 #if i386_TARGET_ARCH
1008       MO_S_Conv F32 F64 -> conversionNop F64 x
1009       MO_S_Conv F64 F32 -> conversionNop F32 x
1010 #else
1011       MO_S_Conv F32 F64 -> coerceFP2FP F64 x
1012       MO_S_Conv F64 F32 -> coerceFP2FP F32 x
1013 #endif
1014
1015       MO_S_Conv from to
1016         | isFloatingRep from -> coerceFP2Int from to x
1017         | isFloatingRep to   -> coerceInt2FP from to x
1018
1019       other -> pprPanic "getRegister" (pprMachOp mop)
1020    where
1021         -- signed or unsigned extension.
1022         integerExtend from to instr expr = do
1023             (reg,e_code) <- if from == I8 then getByteReg expr
1024                                           else getSomeReg expr
1025             let
1026                 code dst =
1027                   e_code `snocOL`
1028                   instr from (OpReg reg) (OpReg dst)
1029             return (Any to code)
1030
1031         toI8Reg new_rep expr
1032             = do codefn <- getAnyReg expr
1033                  return (Any new_rep codefn)
1034                 -- HACK: use getAnyReg to get a byte-addressable register.
1035                 -- If the source was a Fixed register, this will add the
1036                 -- mov instruction to put it into the desired destination.
1037                 -- We're assuming that the destination won't be a fixed
1038                 -- non-byte-addressable register; it won't be, because all
1039                 -- fixed registers are word-sized.
1040
1041         toI16Reg = toI8Reg -- for now
1042
1043         conversionNop new_rep expr
1044             = do e_code <- getRegister expr
1045                  return (swizzleRegisterRep e_code new_rep)
1046
1047
1048 getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
1049   = ASSERT2(cmmExprRep x /= I8, pprExpr e)
1050     case mop of
1051       MO_Eq F32   -> condFltReg EQQ x y
1052       MO_Ne F32   -> condFltReg NE x y
1053       MO_S_Gt F32 -> condFltReg GTT x y
1054       MO_S_Ge F32 -> condFltReg GE x y
1055       MO_S_Lt F32 -> condFltReg LTT x y
1056       MO_S_Le F32 -> condFltReg LE x y
1057
1058       MO_Eq F64   -> condFltReg EQQ x y
1059       MO_Ne F64   -> condFltReg NE x y
1060       MO_S_Gt F64 -> condFltReg GTT x y
1061       MO_S_Ge F64 -> condFltReg GE x y
1062       MO_S_Lt F64 -> condFltReg LTT x y
1063       MO_S_Le F64 -> condFltReg LE x y
1064
1065       MO_Eq rep   -> condIntReg EQQ x y
1066       MO_Ne rep   -> condIntReg NE x y
1067
1068       MO_S_Gt rep -> condIntReg GTT x y
1069       MO_S_Ge rep -> condIntReg GE x y
1070       MO_S_Lt rep -> condIntReg LTT x y
1071       MO_S_Le rep -> condIntReg LE x y
1072
1073       MO_U_Gt rep -> condIntReg GU  x y
1074       MO_U_Ge rep -> condIntReg GEU x y
1075       MO_U_Lt rep -> condIntReg LU  x y
1076       MO_U_Le rep -> condIntReg LEU x y
1077
1078 #if i386_TARGET_ARCH
1079       MO_Add F32 -> trivialFCode F32 GADD x y
1080       MO_Sub F32 -> trivialFCode F32 GSUB x y
1081
1082       MO_Add F64 -> trivialFCode F64 GADD x y
1083       MO_Sub F64 -> trivialFCode F64 GSUB x y
1084
1085       MO_S_Quot F32 -> trivialFCode F32 GDIV x y
1086       MO_S_Quot F64 -> trivialFCode F64 GDIV x y
1087 #endif
1088
1089 #if x86_64_TARGET_ARCH
1090       MO_Add F32 -> trivialFCode F32 ADD x y
1091       MO_Sub F32 -> trivialFCode F32 SUB x y
1092
1093       MO_Add F64 -> trivialFCode F64 ADD x y
1094       MO_Sub F64 -> trivialFCode F64 SUB x y
1095
1096       MO_S_Quot F32 -> trivialFCode F32 FDIV x y
1097       MO_S_Quot F64 -> trivialFCode F64 FDIV x y
1098 #endif
1099
1100       MO_Add rep -> add_code rep x y
1101       MO_Sub rep -> sub_code rep x y
1102
1103       MO_S_Quot rep -> div_code rep True  True  x y
1104       MO_S_Rem  rep -> div_code rep True  False x y
1105       MO_U_Quot rep -> div_code rep False True  x y
1106       MO_U_Rem  rep -> div_code rep False False x y
1107
1108 #if i386_TARGET_ARCH
1109       MO_Mul   F32 -> trivialFCode F32 GMUL x y
1110       MO_Mul   F64 -> trivialFCode F64 GMUL x y
1111 #endif
1112
1113 #if x86_64_TARGET_ARCH
1114       MO_Mul   F32 -> trivialFCode F32 MUL x y
1115       MO_Mul   F64 -> trivialFCode F64 MUL x y
1116 #endif
1117
1118       MO_Mul   rep -> let op = IMUL rep in
1119                       trivialCode rep op (Just op) x y
1120
1121       MO_S_MulMayOflo rep -> imulMayOflo rep x y
1122
1123       MO_And rep -> let op = AND rep in
1124                     trivialCode rep op (Just op) x y
1125       MO_Or  rep -> let op = OR  rep in
1126                     trivialCode rep op (Just op) x y
1127       MO_Xor rep -> let op = XOR rep in
1128                     trivialCode rep op (Just op) x y
1129
1130         {- Shift ops on x86s have constraints on their source, it
1131            either has to be Imm, CL or 1
1132             => trivialCode is not restrictive enough (sigh.)
1133         -}
1134       MO_Shl rep   -> shift_code rep (SHL rep) x y {-False-}
1135       MO_U_Shr rep -> shift_code rep (SHR rep) x y {-False-}
1136       MO_S_Shr rep -> shift_code rep (SAR rep) x y {-False-}
1137
1138       other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
1139   where
1140     --------------------
1141     imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1142     imulMayOflo rep a b = do
1143          (a_reg, a_code) <- getNonClobberedReg a
1144          b_code <- getAnyReg b
1145          let
1146              shift_amt  = case rep of
1147                            I32 -> 31
1148                            I64 -> 63
1149                            _ -> panic "shift_amt"
1150
1151              code = a_code `appOL` b_code eax `appOL`
1152                         toOL [
1153                            IMUL2 rep (OpReg a_reg),   -- result in %edx:%eax
1154                            SAR rep (OpImm (ImmInt shift_amt)) (OpReg eax),
1155                                 -- sign extend lower part
1156                            SUB rep (OpReg edx) (OpReg eax)
1157                                 -- compare against upper
1158                            -- eax==0 if high part == sign extended low part
1159                         ]
1160          -- in
1161          return (Fixed rep eax code)
1162
1163     --------------------
1164     shift_code :: MachRep
1165                -> (Operand -> Operand -> Instr)
1166                -> CmmExpr
1167                -> CmmExpr
1168                -> NatM Register
1169
1170     {- Case1: shift length as immediate -}
1171     shift_code rep instr x y@(CmmLit lit) = do
1172           x_code <- getAnyReg x
1173           let
1174                code dst
1175                   = x_code dst `snocOL`
1176                     instr (OpImm (litToImm lit)) (OpReg dst)
1177           -- in
1178           return (Any rep code)
1179
1180     {- Case2: shift length is complex (non-immediate)
1181       * y must go in %ecx.
1182       * we cannot do y first *and* put its result in %ecx, because
1183         %ecx might be clobbered by x.
1184       * if we do y second, then x cannot be
1185         in a clobbered reg.  Also, we cannot clobber x's reg
1186         with the instruction itself.
1187       * so we can either:
1188         - do y first, put its result in a fresh tmp, then copy it to %ecx later
1189         - do y second and put its result into %ecx.  x gets placed in a fresh
1190           tmp.  This is likely to be better, becuase the reg alloc can
1191           eliminate this reg->reg move here (it won't eliminate the other one,
1192           because the move is into the fixed %ecx).
1193     -}
1194     shift_code rep instr x y{-amount-} = do
1195         x_code <- getAnyReg x
1196         tmp <- getNewRegNat rep
1197         y_code <- getAnyReg y
1198         let
1199            code = x_code tmp `appOL`
1200                   y_code ecx `snocOL`
1201                   instr (OpReg ecx) (OpReg tmp)
1202         -- in
1203         return (Fixed rep tmp code)
1204
1205     --------------------
1206     add_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1207     add_code rep x (CmmLit (CmmInt y _))
1208         | not (is64BitInteger y) = add_int rep x y
1209     add_code rep x y = trivialCode rep (ADD rep) (Just (ADD rep)) x y
1210
1211     --------------------
1212     sub_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1213     sub_code rep x (CmmLit (CmmInt y _))
1214         | not (is64BitInteger (-y)) = add_int rep x (-y)
1215     sub_code rep x y = trivialCode rep (SUB rep) Nothing x y
1216
1217     -- our three-operand add instruction:
1218     add_int rep x y = do
1219         (x_reg, x_code) <- getSomeReg x
1220         let
1221             imm = ImmInt (fromInteger y)
1222             code dst
1223                = x_code `snocOL`
1224                  LEA rep
1225                         (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
1226                         (OpReg dst)
1227         --
1228         return (Any rep code)
1229
1230     ----------------------
1231     div_code rep signed quotient x y = do
1232            (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
1233            x_code <- getAnyReg x
1234            let
1235              widen | signed    = CLTD rep
1236                    | otherwise = XOR rep (OpReg edx) (OpReg edx)
1237
1238              instr | signed    = IDIV
1239                    | otherwise = DIV
1240
1241              code = y_code `appOL`
1242                     x_code eax `appOL`
1243                     toOL [widen, instr rep y_op]
1244
1245              result | quotient  = eax
1246                     | otherwise = edx
1247
1248            -- in
1249            return (Fixed rep result code)
1250
1251
1252 getRegister (CmmLoad mem pk)
1253   | isFloatingRep pk
1254   = do
1255     Amode src mem_code <- getAmode mem
1256     let
1257         code dst = mem_code `snocOL`
1258                    IF_ARCH_i386(GLD pk src dst,
1259                                 MOV pk (OpAddr src) (OpReg dst))
1260     --
1261     return (Any pk code)
1262
1263 #if i386_TARGET_ARCH
1264 getRegister (CmmLoad mem pk)
1265   | pk /= I64
1266   = do
1267     code <- intLoadCode (instr pk) mem
1268     return (Any pk code)
1269   where
1270         instr I8  = MOVZxL pk
1271         instr I16 = MOV I16
1272         instr I32 = MOV I32
1273         -- we always zero-extend 8-bit loads, if we
1274         -- can't think of anything better.  This is because
1275         -- we can't guarantee access to an 8-bit variant of every register
1276         -- (esi and edi don't have 8-bit variants), so to make things
1277         -- simpler we do our 8-bit arithmetic with full 32-bit registers.
1278 #endif
1279
1280 #if x86_64_TARGET_ARCH
1281 -- Simpler memory load code on x86_64
1282 getRegister (CmmLoad mem pk)
1283   = do
1284     code <- intLoadCode (MOV pk) mem
1285     return (Any pk code)
1286 #endif
1287
1288 getRegister (CmmLit (CmmInt 0 rep))
1289   = let
1290         -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
1291         adj_rep = case rep of I64 -> I32; _ -> rep
1292         rep1 = IF_ARCH_i386( rep, adj_rep )
1293         code dst
1294            = unitOL (XOR rep1 (OpReg dst) (OpReg dst))
1295     in
1296         return (Any rep code)
1297
1298 #if x86_64_TARGET_ARCH
1299   -- optimisation for loading small literals on x86_64: take advantage
1300   -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
1301   -- instruction forms are shorter.
1302 getRegister (CmmLit lit)
1303   | I64 <- cmmLitRep lit, not (isBigLit lit)
1304   = let
1305         imm = litToImm lit
1306         code dst = unitOL (MOV I32 (OpImm imm) (OpReg dst))
1307     in
1308         return (Any I64 code)
1309   where
1310    isBigLit (CmmInt i I64) = i < 0 || i > 0xffffffff
1311    isBigLit _ = False
1312         -- note1: not the same as is64BitLit, because that checks for
1313         -- signed literals that fit in 32 bits, but we want unsigned
1314         -- literals here.
1315         -- note2: all labels are small, because we're assuming the
1316         -- small memory model (see gcc docs, -mcmodel=small).
1317 #endif
1318
1319 getRegister (CmmLit lit)
1320   = let
1321         rep = cmmLitRep lit
1322         imm = litToImm lit
1323         code dst = unitOL (MOV rep (OpImm imm) (OpReg dst))
1324     in
1325         return (Any rep code)
1326
1327 getRegister other = pprPanic "getRegister(x86)" (ppr other)
1328
1329
1330 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
1331    -> NatM (Reg -> InstrBlock)
1332 intLoadCode instr mem = do
1333   Amode src mem_code <- getAmode mem
1334   return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
1335
1336 -- Compute an expression into *any* register, adding the appropriate
1337 -- move instruction if necessary.
1338 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
1339 getAnyReg expr = do
1340   r <- getRegister expr
1341   anyReg r
1342
1343 anyReg :: Register -> NatM (Reg -> InstrBlock)
1344 anyReg (Any _ code)          = return code
1345 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
1346
1347 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
1348 -- Fixed registers might not be byte-addressable, so we make sure we've
1349 -- got a temporary, inserting an extra reg copy if necessary.
1350 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
1351 #if x86_64_TARGET_ARCH
1352 getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
1353 #else
1354 getByteReg expr = do
1355   r <- getRegister expr
1356   case r of
1357     Any rep code -> do
1358         tmp <- getNewRegNat rep
1359         return (tmp, code tmp)
1360     Fixed rep reg code
1361         | isVirtualReg reg -> return (reg,code)
1362         | otherwise -> do
1363             tmp <- getNewRegNat rep
1364             return (tmp, code `snocOL` reg2reg rep reg tmp)
1365         -- ToDo: could optimise slightly by checking for byte-addressable
1366         -- real registers, but that will happen very rarely if at all.
1367 #endif
1368
1369 -- Another variant: this time we want the result in a register that cannot
1370 -- be modified by code to evaluate an arbitrary expression.
1371 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1372 getNonClobberedReg expr = do
1373   r <- getRegister expr
1374   case r of
1375     Any rep code -> do
1376         tmp <- getNewRegNat rep
1377         return (tmp, code tmp)
1378     Fixed rep reg code
1379         -- only free regs can be clobbered
1380         | RealReg rr <- reg, isFastTrue (freeReg rr) -> do
1381                 tmp <- getNewRegNat rep
1382                 return (tmp, code `snocOL` reg2reg rep reg tmp)
1383         | otherwise ->
1384                 return (reg, code)
1385
1386 reg2reg :: MachRep -> Reg -> Reg -> Instr
1387 reg2reg rep src dst
1388 #if i386_TARGET_ARCH
1389   | isFloatingRep rep = GMOV src dst
1390 #endif
1391   | otherwise         = MOV rep (OpReg src) (OpReg dst)
1392
1393 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1394
1395 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1396
1397 #if sparc_TARGET_ARCH
1398
1399 getRegister (CmmLit (CmmFloat f F32)) = do
1400     lbl <- getNewLabelNat
1401     let code dst = toOL [
1402             LDATA ReadOnlyData
1403                         [CmmDataLabel lbl,
1404                          CmmStaticLit (CmmFloat f F32)],
1405             SETHI (HI (ImmCLbl lbl)) dst,
1406             LD F32 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1407     return (Any F32 code)
1408
1409 getRegister (CmmLit (CmmFloat d F64)) = do
1410     lbl <- getNewLabelNat
1411     let code dst = toOL [
1412             LDATA ReadOnlyData
1413                         [CmmDataLabel lbl,
1414                          CmmStaticLit (CmmFloat d F64)],
1415             SETHI (HI (ImmCLbl lbl)) dst,
1416             LD F64 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1417     return (Any F64 code)
1418
1419 getRegister (CmmMachOp mop [x]) -- unary MachOps
1420   = case mop of
1421       MO_S_Neg F32     -> trivialUFCode F32 (FNEG F32) x
1422       MO_S_Neg F64     -> trivialUFCode F64 (FNEG F64) x
1423
1424       MO_S_Neg rep     -> trivialUCode rep (SUB False False g0) x
1425       MO_Not rep       -> trivialUCode rep (XNOR False g0) x
1426
1427       MO_U_Conv I32 I8 -> trivialCode I8 (AND False) x (CmmLit (CmmInt 255 I8))
1428
1429       MO_U_Conv F64 F32-> coerceDbl2Flt x
1430       MO_U_Conv F32 F64-> coerceFlt2Dbl x
1431
1432       MO_S_Conv F32 I32-> coerceFP2Int F32 I32 x
1433       MO_S_Conv I32 F32-> coerceInt2FP I32 F32 x
1434       MO_S_Conv F64 I32-> coerceFP2Int F64 I32 x
1435       MO_S_Conv I32 F64-> coerceInt2FP I32 F64 x
1436
1437       -- Conversions which are a nop on sparc
1438       MO_U_Conv from to
1439         | from == to   -> conversionNop to   x
1440       MO_U_Conv I32 to -> conversionNop to   x
1441       MO_S_Conv I32 to -> conversionNop to   x
1442
1443       -- widenings
1444       MO_U_Conv I8 I32  -> integerExtend False I8 I32  x
1445       MO_U_Conv I16 I32 -> integerExtend False I16 I32 x
1446       MO_U_Conv I8 I16  -> integerExtend False I8 I16  x
1447       MO_S_Conv I16 I32 -> integerExtend True I16 I32  x
1448
1449       other_op -> panic "Unknown unary mach op"
1450     where
1451         -- XXX SLL/SRL?
1452         integerExtend signed from to expr = do
1453            (reg, e_code) <- getSomeReg expr
1454            let
1455                code dst =
1456                    e_code `snocOL`
1457                    ((if signed then SRA else SRL)
1458                           reg (RIImm (ImmInt 0)) dst)
1459            return (Any to code)
1460         conversionNop new_rep expr
1461             = do e_code <- getRegister expr
1462                  return (swizzleRegisterRep e_code new_rep)
1463
1464 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1465   = case mop of
1466       MO_Eq F32 -> condFltReg EQQ x y
1467       MO_Ne F32 -> condFltReg NE x y
1468
1469       MO_S_Gt F32 -> condFltReg GTT x y
1470       MO_S_Ge F32 -> condFltReg GE x y
1471       MO_S_Lt F32 -> condFltReg LTT x y
1472       MO_S_Le F32 -> condFltReg LE x y
1473
1474       MO_Eq F64 -> condFltReg EQQ x y
1475       MO_Ne F64 -> condFltReg NE x y
1476
1477       MO_S_Gt F64 -> condFltReg GTT x y
1478       MO_S_Ge F64 -> condFltReg GE x y
1479       MO_S_Lt F64 -> condFltReg LTT x y
1480       MO_S_Le F64 -> condFltReg LE x y
1481
1482       MO_Eq rep -> condIntReg EQQ x y
1483       MO_Ne rep -> condIntReg NE x y
1484
1485       MO_S_Gt rep -> condIntReg GTT x y
1486       MO_S_Ge rep -> condIntReg GE x y
1487       MO_S_Lt rep -> condIntReg LTT x y
1488       MO_S_Le rep -> condIntReg LE x y
1489
1490       MO_U_Gt I32  -> condIntReg GTT x y
1491       MO_U_Ge I32  -> condIntReg GE x y
1492       MO_U_Lt I32  -> condIntReg LTT x y
1493       MO_U_Le I32  -> condIntReg LE x y
1494
1495       MO_U_Gt I16 -> condIntReg GU  x y
1496       MO_U_Ge I16 -> condIntReg GEU x y
1497       MO_U_Lt I16 -> condIntReg LU  x y
1498       MO_U_Le I16 -> condIntReg LEU x y
1499
1500       MO_Add I32 -> trivialCode I32 (ADD False False) x y
1501       MO_Sub I32 -> trivialCode I32 (SUB False False) x y
1502
1503       MO_S_MulMayOflo rep -> imulMayOflo rep x y
1504 {-
1505       -- ToDo: teach about V8+ SPARC div instructions
1506       MO_S_Quot I32 -> idiv FSLIT(".div")  x y
1507       MO_S_Rem I32  -> idiv FSLIT(".rem")  x y
1508       MO_U_Quot I32 -> idiv FSLIT(".udiv")  x y
1509       MO_U_Rem I32  -> idiv FSLIT(".urem")  x y
1510 -}
1511       MO_Add F32  -> trivialFCode F32 FADD  x y
1512       MO_Sub F32   -> trivialFCode F32  FSUB x y
1513       MO_Mul F32   -> trivialFCode F32  FMUL  x y
1514       MO_S_Quot F32   -> trivialFCode F32  FDIV x y
1515
1516       MO_Add F64   -> trivialFCode F64 FADD  x y
1517       MO_Sub F64   -> trivialFCode F64  FSUB x y
1518       MO_Mul F64   -> trivialFCode F64  FMUL x y
1519       MO_S_Quot F64   -> trivialFCode F64  FDIV x y
1520
1521       MO_And rep   -> trivialCode rep (AND False) x y
1522       MO_Or rep    -> trivialCode rep (OR  False) x y
1523       MO_Xor rep   -> trivialCode rep (XOR False) x y
1524
1525       MO_Mul rep -> trivialCode rep (SMUL False) x y
1526
1527       MO_Shl rep   -> trivialCode rep SLL  x y
1528       MO_U_Shr rep   -> trivialCode rep SRL x y
1529       MO_S_Shr rep   -> trivialCode rep SRA x y
1530
1531 {-
1532       MO_F32_Pwr  -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1533                                          [promote x, promote y])
1534                        where promote x = CmmMachOp MO_F32_to_Dbl [x]
1535       MO_F64_Pwr -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1536                                         [x, y])
1537 -}
1538       other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
1539   where
1540     --idiv fn x y = getRegister (StCall (Left fn) CCallConv I32 [x, y])
1541
1542     --------------------
1543     imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1544     imulMayOflo rep a b = do
1545          (a_reg, a_code) <- getSomeReg a
1546          (b_reg, b_code) <- getSomeReg b
1547          res_lo <- getNewRegNat I32
1548          res_hi <- getNewRegNat I32
1549          let
1550             shift_amt  = case rep of
1551                           I32 -> 31
1552                           I64 -> 63
1553                           _ -> panic "shift_amt"
1554             code dst = a_code `appOL` b_code `appOL`
1555                        toOL [
1556                            SMUL False a_reg (RIReg b_reg) res_lo,
1557                            RDY res_hi,
1558                            SRA res_lo (RIImm (ImmInt shift_amt)) res_lo,
1559                            SUB False False res_lo (RIReg res_hi) dst
1560                         ]
1561          return (Any I32 code)
1562
1563 getRegister (CmmLoad mem pk) = do
1564     Amode src code <- getAmode mem
1565     let
1566         code__2 dst = code `snocOL` LD pk src dst
1567     return (Any pk code__2)
1568
1569 getRegister (CmmLit (CmmInt i _))
1570   | fits13Bits i
1571   = let
1572         src = ImmInt (fromInteger i)
1573         code dst = unitOL (OR False g0 (RIImm src) dst)
1574     in
1575         return (Any I32 code)
1576
1577 getRegister (CmmLit lit)
1578   = let rep = cmmLitRep lit
1579         imm = litToImm lit
1580         code dst = toOL [
1581             SETHI (HI imm) dst,
1582             OR False dst (RIImm (LO imm)) dst]
1583     in return (Any I32 code)
1584
1585 #endif /* sparc_TARGET_ARCH */
1586
1587 #if powerpc_TARGET_ARCH
1588 getRegister (CmmLoad mem pk)
1589   | pk /= I64
1590   = do
1591         Amode addr addr_code <- getAmode mem
1592         let code dst = ASSERT((regClass dst == RcDouble) == isFloatingRep pk)
1593                        addr_code `snocOL` LD pk dst addr
1594         return (Any pk code)
1595
1596 -- catch simple cases of zero- or sign-extended load
1597 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad mem _]) = do
1598     Amode addr addr_code <- getAmode mem
1599     return (Any I32 (\dst -> addr_code `snocOL` LD I8 dst addr))
1600
1601 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
1602
1603 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad mem _]) = do
1604     Amode addr addr_code <- getAmode mem
1605     return (Any I32 (\dst -> addr_code `snocOL` LD I16 dst addr))
1606
1607 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad mem _]) = do
1608     Amode addr addr_code <- getAmode mem
1609     return (Any I32 (\dst -> addr_code `snocOL` LA I16 dst addr))
1610
1611 getRegister (CmmMachOp mop [x]) -- unary MachOps
1612   = case mop of
1613       MO_Not rep   -> trivialUCode rep NOT x
1614
1615       MO_S_Conv F64 F32 -> trivialUCode F32 FRSP x
1616       MO_S_Conv F32 F64 -> conversionNop F64 x
1617
1618       MO_S_Conv from to
1619         | from == to         -> conversionNop to x
1620         | isFloatingRep from -> coerceFP2Int from to x
1621         | isFloatingRep to   -> coerceInt2FP from to x
1622
1623         -- narrowing is a nop: we treat the high bits as undefined
1624       MO_S_Conv I32 to -> conversionNop to x
1625       MO_S_Conv I16 I8 -> conversionNop I8 x
1626       MO_S_Conv I8 to -> trivialUCode to (EXTS I8) x
1627       MO_S_Conv I16 to -> trivialUCode to (EXTS I16) x
1628
1629       MO_U_Conv from to
1630         | from == to -> conversionNop to x
1631         -- narrowing is a nop: we treat the high bits as undefined
1632       MO_U_Conv I32 to -> conversionNop to x
1633       MO_U_Conv I16 I8 -> conversionNop I8 x
1634       MO_U_Conv I8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 I32))
1635       MO_U_Conv I16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 I32))
1636
1637       MO_S_Neg F32      -> trivialUCode F32 FNEG x
1638       MO_S_Neg F64      -> trivialUCode F64 FNEG x
1639       MO_S_Neg rep      -> trivialUCode rep NEG x
1640
1641     where
1642         conversionNop new_rep expr
1643             = do e_code <- getRegister expr
1644                  return (swizzleRegisterRep e_code new_rep)
1645
1646 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1647   = case mop of
1648       MO_Eq F32 -> condFltReg EQQ x y
1649       MO_Ne F32 -> condFltReg NE  x y
1650
1651       MO_S_Gt F32 -> condFltReg GTT x y
1652       MO_S_Ge F32 -> condFltReg GE  x y
1653       MO_S_Lt F32 -> condFltReg LTT x y
1654       MO_S_Le F32 -> condFltReg LE  x y
1655
1656       MO_Eq F64 -> condFltReg EQQ x y
1657       MO_Ne F64 -> condFltReg NE  x y
1658
1659       MO_S_Gt F64 -> condFltReg GTT x y
1660       MO_S_Ge F64 -> condFltReg GE  x y
1661       MO_S_Lt F64 -> condFltReg LTT x y
1662       MO_S_Le F64 -> condFltReg LE  x y
1663
1664       MO_Eq rep -> condIntReg EQQ  (extendUExpr rep x) (extendUExpr rep y)
1665       MO_Ne rep -> condIntReg NE   (extendUExpr rep x) (extendUExpr rep y)
1666
1667       MO_S_Gt rep -> condIntReg GTT  (extendSExpr rep x) (extendSExpr rep y)
1668       MO_S_Ge rep -> condIntReg GE   (extendSExpr rep x) (extendSExpr rep y)
1669       MO_S_Lt rep -> condIntReg LTT  (extendSExpr rep x) (extendSExpr rep y)
1670       MO_S_Le rep -> condIntReg LE   (extendSExpr rep x) (extendSExpr rep y)
1671
1672       MO_U_Gt rep -> condIntReg GU   (extendUExpr rep x) (extendUExpr rep y)
1673       MO_U_Ge rep -> condIntReg GEU  (extendUExpr rep x) (extendUExpr rep y)
1674       MO_U_Lt rep -> condIntReg LU   (extendUExpr rep x) (extendUExpr rep y)
1675       MO_U_Le rep -> condIntReg LEU  (extendUExpr rep x) (extendUExpr rep y)
1676
1677       MO_Add F32   -> trivialCodeNoImm F32 (FADD F32) x y
1678       MO_Sub F32   -> trivialCodeNoImm F32 (FSUB F32) x y
1679       MO_Mul F32   -> trivialCodeNoImm F32 (FMUL F32) x y
1680       MO_S_Quot F32   -> trivialCodeNoImm F32 (FDIV F32) x y
1681
1682       MO_Add F64   -> trivialCodeNoImm F64 (FADD F64) x y
1683       MO_Sub F64   -> trivialCodeNoImm F64 (FSUB F64) x y
1684       MO_Mul F64   -> trivialCodeNoImm F64 (FMUL F64) x y
1685       MO_S_Quot F64   -> trivialCodeNoImm F64 (FDIV F64) x y
1686
1687          -- optimize addition with 32-bit immediate
1688          -- (needed for PIC)
1689       MO_Add I32 ->
1690         case y of
1691           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate I32 True (-imm)
1692             -> trivialCode I32 True ADD x (CmmLit $ CmmInt imm immrep)
1693           CmmLit lit
1694             -> do
1695                 (src, srcCode) <- getSomeReg x
1696                 let imm = litToImm lit
1697                     code dst = srcCode `appOL` toOL [
1698                                     ADDIS dst src (HA imm),
1699                                     ADD dst dst (RIImm (LO imm))
1700                                 ]
1701                 return (Any I32 code)
1702           _ -> trivialCode I32 True ADD x y
1703
1704       MO_Add rep -> trivialCode rep True ADD x y
1705       MO_Sub rep ->
1706         case y of    -- subfi ('substract from' with immediate) doesn't exist
1707           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
1708             -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
1709           _ -> trivialCodeNoImm rep SUBF y x
1710
1711       MO_Mul rep -> trivialCode rep True MULLW x y
1712
1713       MO_S_MulMayOflo I32 -> trivialCodeNoImm I32 MULLW_MayOflo x y
1714
1715       MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= I32): not implemented"
1716       MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
1717
1718       MO_S_Quot rep -> trivialCodeNoImm rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1719       MO_U_Quot rep -> trivialCodeNoImm rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1720
1721       MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1722       MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1723
1724       MO_And rep   -> trivialCode rep False AND x y
1725       MO_Or rep    -> trivialCode rep False OR x y
1726       MO_Xor rep   -> trivialCode rep False XOR x y
1727
1728       MO_Shl rep   -> trivialCode rep False SLW x y
1729       MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
1730       MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
1731
1732 getRegister (CmmLit (CmmInt i rep))
1733   | Just imm <- makeImmediate rep True i
1734   = let
1735         code dst = unitOL (LI dst imm)
1736     in
1737         return (Any rep code)
1738
1739 getRegister (CmmLit (CmmFloat f frep)) = do
1740     lbl <- getNewLabelNat
1741     dflags <- getDynFlagsNat
1742     dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
1743     Amode addr addr_code <- getAmode dynRef
1744     let code dst =
1745             LDATA ReadOnlyData  [CmmDataLabel lbl,
1746                                  CmmStaticLit (CmmFloat f frep)]
1747             `consOL` (addr_code `snocOL` LD frep dst addr)
1748     return (Any frep code)
1749
1750 getRegister (CmmLit lit)
1751   = let rep = cmmLitRep lit
1752         imm = litToImm lit
1753         code dst = toOL [
1754               LIS dst (HA imm),
1755               ADD dst dst (RIImm (LO imm))
1756           ]
1757     in return (Any rep code)
1758
1759 getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
1760
1761     -- extend?Rep: wrap integer expression of type rep
1762     -- in a conversion to I32
1763 extendSExpr I32 x = x
1764 extendSExpr rep x = CmmMachOp (MO_S_Conv rep I32) [x]
1765 extendUExpr I32 x = x
1766 extendUExpr rep x = CmmMachOp (MO_U_Conv rep I32) [x]
1767
1768 #endif /* powerpc_TARGET_ARCH */
1769
1770
1771 -- -----------------------------------------------------------------------------
1772 --  The 'Amode' type: Memory addressing modes passed up the tree.
1773
1774 data Amode = Amode AddrMode InstrBlock
1775
1776 {-
1777 Now, given a tree (the argument to an CmmLoad) that references memory,
1778 produce a suitable addressing mode.
1779
1780 A Rule of the Game (tm) for Amodes: use of the addr bit must
1781 immediately follow use of the code part, since the code part puts
1782 values in registers which the addr then refers to.  So you can't put
1783 anything in between, lest it overwrite some of those registers.  If
1784 you need to do some other computation between the code part and use of
1785 the addr bit, first store the effective address from the amode in a
1786 temporary, then do the other computation, and then use the temporary:
1787
1788     code
1789     LEA amode, tmp
1790     ... other computation ...
1791     ... (tmp) ...
1792 -}
1793
1794 getAmode :: CmmExpr -> NatM Amode
1795 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
1796
1797 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1798
1799 #if alpha_TARGET_ARCH
1800
1801 getAmode (StPrim IntSubOp [x, StInt i])
1802   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1803     getRegister x               `thenNat` \ register ->
1804     let
1805         code = registerCode register tmp
1806         reg  = registerName register tmp
1807         off  = ImmInt (-(fromInteger i))
1808     in
1809     return (Amode (AddrRegImm reg off) code)
1810
1811 getAmode (StPrim IntAddOp [x, StInt i])
1812   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1813     getRegister x               `thenNat` \ register ->
1814     let
1815         code = registerCode register tmp
1816         reg  = registerName register tmp
1817         off  = ImmInt (fromInteger i)
1818     in
1819     return (Amode (AddrRegImm reg off) code)
1820
1821 getAmode leaf
1822   | isJust imm
1823   = return (Amode (AddrImm imm__2) id)
1824   where
1825     imm = maybeImm leaf
1826     imm__2 = case imm of Just x -> x
1827
1828 getAmode other
1829   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1830     getRegister other           `thenNat` \ register ->
1831     let
1832         code = registerCode register tmp
1833         reg  = registerName register tmp
1834     in
1835     return (Amode (AddrReg reg) code)
1836
1837 #endif /* alpha_TARGET_ARCH */
1838
1839 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1840
1841 #if x86_64_TARGET_ARCH
1842
1843 getAmode (CmmMachOp (MO_Add I64) [CmmReg (CmmGlobal PicBaseReg),
1844                                      CmmLit displacement])
1845     = return $ Amode (ripRel (litToImm displacement)) nilOL
1846
1847 #endif
1848
1849 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1850
1851 -- This is all just ridiculous, since it carefully undoes
1852 -- what mangleIndexTree has just done.
1853 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
1854   | not (is64BitLit lit)
1855   -- ASSERT(rep == I32)???
1856   = do (x_reg, x_code) <- getSomeReg x
1857        let off = ImmInt (-(fromInteger i))
1858        return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1859
1860 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
1861   | not (is64BitLit lit)
1862   -- ASSERT(rep == I32)???
1863   = do (x_reg, x_code) <- getSomeReg x
1864        let off = ImmInt (fromInteger i)
1865        return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1866
1867 -- Turn (lit1 << n  + lit2) into  (lit2 + lit1 << n) so it will be
1868 -- recognised by the next rule.
1869 getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1870                                   b@(CmmLit _)])
1871   = getAmode (CmmMachOp (MO_Add rep) [b,a])
1872
1873 getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
1874                                         [y, CmmLit (CmmInt shift _)]])
1875   | shift == 0 || shift == 1 || shift == 2 || shift == 3
1876   = x86_complex_amode x y shift 0
1877
1878 getAmode (CmmMachOp (MO_Add rep)
1879                 [x, CmmMachOp (MO_Add _)
1880                         [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)],
1881                          CmmLit (CmmInt offset _)]])
1882   | shift == 0 || shift == 1 || shift == 2 || shift == 3
1883   && not (is64BitInteger offset)
1884   = x86_complex_amode x y shift offset
1885
1886 getAmode (CmmMachOp (MO_Add rep) [x,y])
1887   = x86_complex_amode x y 0 0
1888
1889 getAmode (CmmLit lit) | not (is64BitLit lit)
1890   = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1891
1892 getAmode expr = do
1893   (reg,code) <- getSomeReg expr
1894   return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
1895
1896
1897 x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
1898 x86_complex_amode base index shift offset
1899   = do (x_reg, x_code) <- getNonClobberedReg base
1900         -- x must be in a temp, because it has to stay live over y_code
1901         -- we could compre x_reg and y_reg and do something better here...
1902        (y_reg, y_code) <- getSomeReg index
1903        let
1904            code = x_code `appOL` y_code
1905            base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
1906        return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset)))
1907                code)
1908
1909 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1910
1911 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1912
1913 #if sparc_TARGET_ARCH
1914
1915 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)])
1916   | fits13Bits (-i)
1917   = do
1918        (reg, code) <- getSomeReg x
1919        let
1920          off  = ImmInt (-(fromInteger i))
1921        return (Amode (AddrRegImm reg off) code)
1922
1923
1924 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)])
1925   | fits13Bits i
1926   = do
1927        (reg, code) <- getSomeReg x
1928        let
1929          off  = ImmInt (fromInteger i)
1930        return (Amode (AddrRegImm reg off) code)
1931
1932 getAmode (CmmMachOp (MO_Add rep) [x, y])
1933   = do
1934     (regX, codeX) <- getSomeReg x
1935     (regY, codeY) <- getSomeReg y
1936     let
1937         code = codeX `appOL` codeY
1938     return (Amode (AddrRegReg regX regY) code)
1939
1940 -- XXX Is this same as "leaf" in Stix?
1941 getAmode (CmmLit lit)
1942   = do
1943       tmp <- getNewRegNat I32
1944       let
1945         code = unitOL (SETHI (HI imm__2) tmp)
1946       return (Amode (AddrRegImm tmp (LO imm__2)) code)
1947       where
1948          imm__2 = litToImm lit
1949
1950 getAmode other
1951   = do
1952        (reg, code) <- getSomeReg other
1953        let
1954             off  = ImmInt 0
1955        return (Amode (AddrRegImm reg off) code)
1956
1957 #endif /* sparc_TARGET_ARCH */
1958
1959 #ifdef powerpc_TARGET_ARCH
1960 getAmode (CmmMachOp (MO_Sub I32) [x, CmmLit (CmmInt i _)])
1961   | Just off <- makeImmediate I32 True (-i)
1962   = do
1963         (reg, code) <- getSomeReg x
1964         return (Amode (AddrRegImm reg off) code)
1965
1966
1967 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit (CmmInt i _)])
1968   | Just off <- makeImmediate I32 True i
1969   = do
1970         (reg, code) <- getSomeReg x
1971         return (Amode (AddrRegImm reg off) code)
1972
1973    -- optimize addition with 32-bit immediate
1974    -- (needed for PIC)
1975 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit lit])
1976   = do
1977         tmp <- getNewRegNat I32
1978         (src, srcCode) <- getSomeReg x
1979         let imm = litToImm lit
1980             code = srcCode `snocOL` ADDIS tmp src (HA imm)
1981         return (Amode (AddrRegImm tmp (LO imm)) code)
1982
1983 getAmode (CmmLit lit)
1984   = do
1985         tmp <- getNewRegNat I32
1986         let imm = litToImm lit
1987             code = unitOL (LIS tmp (HA imm))
1988         return (Amode (AddrRegImm tmp (LO imm)) code)
1989
1990 getAmode (CmmMachOp (MO_Add I32) [x, y])
1991   = do
1992         (regX, codeX) <- getSomeReg x
1993         (regY, codeY) <- getSomeReg y
1994         return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
1995
1996 getAmode other
1997   = do
1998         (reg, code) <- getSomeReg other
1999         let
2000             off  = ImmInt 0
2001         return (Amode (AddrRegImm reg off) code)
2002 #endif /* powerpc_TARGET_ARCH */
2003
2004 -- -----------------------------------------------------------------------------
2005 -- getOperand: sometimes any operand will do.
2006
2007 -- getNonClobberedOperand: the value of the operand will remain valid across
2008 -- the computation of an arbitrary expression, unless the expression
2009 -- is computed directly into a register which the operand refers to
2010 -- (see trivialCode where this function is used for an example).
2011
2012 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2013
2014 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2015 #if x86_64_TARGET_ARCH
2016 getNonClobberedOperand (CmmLit lit)
2017   | isSuitableFloatingPointLit lit = do
2018     lbl <- getNewLabelNat
2019     let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
2020                                            CmmStaticLit lit])
2021     return (OpAddr (ripRel (ImmCLbl lbl)), code)
2022 #endif
2023 getNonClobberedOperand (CmmLit lit)
2024   | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) =
2025     return (OpImm (litToImm lit), nilOL)
2026 getNonClobberedOperand (CmmLoad mem pk)
2027   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2028     Amode src mem_code <- getAmode mem
2029     (src',save_code) <-
2030         if (amodeCouldBeClobbered src)
2031                 then do
2032                    tmp <- getNewRegNat wordRep
2033                    return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
2034                            unitOL (LEA I32 (OpAddr src) (OpReg tmp)))
2035                 else
2036                    return (src, nilOL)
2037     return (OpAddr src', save_code `appOL` mem_code)
2038 getNonClobberedOperand e = do
2039     (reg, code) <- getNonClobberedReg e
2040     return (OpReg reg, code)
2041
2042 amodeCouldBeClobbered :: AddrMode -> Bool
2043 amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
2044
2045 regClobbered (RealReg rr) = isFastTrue (freeReg rr)
2046 regClobbered _ = False
2047
2048 -- getOperand: the operand is not required to remain valid across the
2049 -- computation of an arbitrary expression.
2050 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2051 #if x86_64_TARGET_ARCH
2052 getOperand (CmmLit lit)
2053   | isSuitableFloatingPointLit lit = do
2054     lbl <- getNewLabelNat
2055     let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
2056                                            CmmStaticLit lit])
2057     return (OpAddr (ripRel (ImmCLbl lbl)), code)
2058 #endif
2059 getOperand (CmmLit lit)
2060   | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) = do
2061     return (OpImm (litToImm lit), nilOL)
2062 getOperand (CmmLoad mem pk)
2063   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2064     Amode src mem_code <- getAmode mem
2065     return (OpAddr src, mem_code)
2066 getOperand e = do
2067     (reg, code) <- getSomeReg e
2068     return (OpReg reg, code)
2069
2070 isOperand :: CmmExpr -> Bool
2071 isOperand (CmmLoad _ _) = True
2072 isOperand (CmmLit lit)  = not (is64BitLit lit)
2073                           || isSuitableFloatingPointLit lit
2074 isOperand _             = False
2075
2076 -- if we want a floating-point literal as an operand, we can
2077 -- use it directly from memory.  However, if the literal is
2078 -- zero, we're better off generating it into a register using
2079 -- xor.
2080 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
2081 isSuitableFloatingPointLit _ = False
2082
2083 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
2084 getRegOrMem (CmmLoad mem pk)
2085   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2086     Amode src mem_code <- getAmode mem
2087     return (OpAddr src, mem_code)
2088 getRegOrMem e = do
2089     (reg, code) <- getNonClobberedReg e
2090     return (OpReg reg, code)
2091
2092 #if x86_64_TARGET_ARCH
2093 is64BitLit (CmmInt i I64) = is64BitInteger i
2094    -- assume that labels are in the range 0-2^31-1: this assumes the
2095    -- small memory model (see gcc docs, -mcmodel=small).
2096 #endif
2097 is64BitLit x = False
2098 #endif
2099
2100 is64BitInteger :: Integer -> Bool
2101 is64BitInteger i = i64 > 0x7fffffff || i64 < -0x80000000
2102   where i64 = fromIntegral i :: Int64
2103   -- a CmmInt is intended to be truncated to the appropriate
2104   -- number of bits, so here we truncate it to Int64.  This is
2105   -- important because e.g. -1 as a CmmInt might be either
2106   -- -1 or 18446744073709551615.
2107
2108 -- -----------------------------------------------------------------------------
2109 --  The 'CondCode' type:  Condition codes passed up the tree.
2110
2111 data CondCode = CondCode Bool Cond InstrBlock
2112
2113 -- Set up a condition code for a conditional branch.
2114
2115 getCondCode :: CmmExpr -> NatM CondCode
2116
2117 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2118
2119 #if alpha_TARGET_ARCH
2120 getCondCode = panic "MachCode.getCondCode: not on Alphas"
2121 #endif /* alpha_TARGET_ARCH */
2122
2123 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2124
2125 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
2126 -- yes, they really do seem to want exactly the same!
2127
2128 getCondCode (CmmMachOp mop [x, y])
2129   =
2130     case mop of
2131       MO_Eq F32 -> condFltCode EQQ x y
2132       MO_Ne F32 -> condFltCode NE  x y
2133
2134       MO_S_Gt F32 -> condFltCode GTT x y
2135       MO_S_Ge F32 -> condFltCode GE  x y
2136       MO_S_Lt F32 -> condFltCode LTT x y
2137       MO_S_Le F32 -> condFltCode LE  x y
2138
2139       MO_Eq F64 -> condFltCode EQQ x y
2140       MO_Ne F64 -> condFltCode NE  x y
2141
2142       MO_S_Gt F64 -> condFltCode GTT x y
2143       MO_S_Ge F64 -> condFltCode GE  x y
2144       MO_S_Lt F64 -> condFltCode LTT x y
2145       MO_S_Le F64 -> condFltCode LE  x y
2146
2147       MO_Eq rep -> condIntCode EQQ  x y
2148       MO_Ne rep -> condIntCode NE   x y
2149
2150       MO_S_Gt rep -> condIntCode GTT  x y
2151       MO_S_Ge rep -> condIntCode GE   x y
2152       MO_S_Lt rep -> condIntCode LTT  x y
2153       MO_S_Le rep -> condIntCode LE   x y
2154
2155       MO_U_Gt rep -> condIntCode GU   x y
2156       MO_U_Ge rep -> condIntCode GEU  x y
2157       MO_U_Lt rep -> condIntCode LU   x y
2158       MO_U_Le rep -> condIntCode LEU  x y
2159
2160       other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y]))
2161
2162 getCondCode other =  pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
2163
2164 #elif powerpc_TARGET_ARCH
2165
2166 -- almost the same as everywhere else - but we need to
2167 -- extend small integers to 32 bit first
2168
2169 getCondCode (CmmMachOp mop [x, y])
2170   = case mop of
2171       MO_Eq F32 -> condFltCode EQQ x y
2172       MO_Ne F32 -> condFltCode NE  x y
2173
2174       MO_S_Gt F32 -> condFltCode GTT x y
2175       MO_S_Ge F32 -> condFltCode GE  x y
2176       MO_S_Lt F32 -> condFltCode LTT x y
2177       MO_S_Le F32 -> condFltCode LE  x y
2178
2179       MO_Eq F64 -> condFltCode EQQ x y
2180       MO_Ne F64 -> condFltCode NE  x y
2181
2182       MO_S_Gt F64 -> condFltCode GTT x y
2183       MO_S_Ge F64 -> condFltCode GE  x y
2184       MO_S_Lt F64 -> condFltCode LTT x y
2185       MO_S_Le F64 -> condFltCode LE  x y
2186
2187       MO_Eq rep -> condIntCode EQQ  (extendUExpr rep x) (extendUExpr rep y)
2188       MO_Ne rep -> condIntCode NE   (extendUExpr rep x) (extendUExpr rep y)
2189
2190       MO_S_Gt rep -> condIntCode GTT  (extendSExpr rep x) (extendSExpr rep y)
2191       MO_S_Ge rep -> condIntCode GE   (extendSExpr rep x) (extendSExpr rep y)
2192       MO_S_Lt rep -> condIntCode LTT  (extendSExpr rep x) (extendSExpr rep y)
2193       MO_S_Le rep -> condIntCode LE   (extendSExpr rep x) (extendSExpr rep y)
2194
2195       MO_U_Gt rep -> condIntCode GU   (extendUExpr rep x) (extendUExpr rep y)
2196       MO_U_Ge rep -> condIntCode GEU  (extendUExpr rep x) (extendUExpr rep y)
2197       MO_U_Lt rep -> condIntCode LU   (extendUExpr rep x) (extendUExpr rep y)
2198       MO_U_Le rep -> condIntCode LEU  (extendUExpr rep x) (extendUExpr rep y)
2199
2200       other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
2201
2202 getCondCode other =  panic "getCondCode(2)(powerpc)"
2203
2204
2205 #endif
2206
2207
2208 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
2209 -- passed back up the tree.
2210
2211 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
2212
2213 #if alpha_TARGET_ARCH
2214 condIntCode = panic "MachCode.condIntCode: not on Alphas"
2215 condFltCode = panic "MachCode.condFltCode: not on Alphas"
2216 #endif /* alpha_TARGET_ARCH */
2217
2218 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2219 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2220
2221 -- memory vs immediate
2222 condIntCode cond (CmmLoad x pk) (CmmLit lit) | not (is64BitLit lit) = do
2223     Amode x_addr x_code <- getAmode x
2224     let
2225         imm  = litToImm lit
2226         code = x_code `snocOL`
2227                   CMP pk (OpImm imm) (OpAddr x_addr)
2228     --
2229     return (CondCode False cond code)
2230
2231 -- anything vs zero, using a mask
2232 -- TODO: Add some sanity checking!!!!
2233 condIntCode cond (CmmMachOp (MO_And rep) [x,o2]) (CmmLit (CmmInt 0 pk))
2234     | (CmmLit (CmmInt mask pk2)) <- o2
2235     = do
2236       (x_reg, x_code) <- getSomeReg x
2237       let
2238          code = x_code `snocOL`
2239                 TEST pk (OpImm (ImmInteger mask)) (OpReg x_reg)
2240       --
2241       return (CondCode False cond code)
2242
2243 -- anything vs zero
2244 condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
2245     (x_reg, x_code) <- getSomeReg x
2246     let
2247         code = x_code `snocOL`
2248                   TEST pk (OpReg x_reg) (OpReg x_reg)
2249     --
2250     return (CondCode False cond code)
2251
2252 -- anything vs operand
2253 condIntCode cond x y | isOperand y = do
2254     (x_reg, x_code) <- getNonClobberedReg x
2255     (y_op,  y_code) <- getOperand y
2256     let
2257         code = x_code `appOL` y_code `snocOL`
2258                   CMP (cmmExprRep x) y_op (OpReg x_reg)
2259     -- in
2260     return (CondCode False cond code)
2261
2262 -- anything vs anything
2263 condIntCode cond x y = do
2264   (y_reg, y_code) <- getNonClobberedReg y
2265   (x_op, x_code) <- getRegOrMem x
2266   let
2267         code = y_code `appOL`
2268                x_code `snocOL`
2269                   CMP (cmmExprRep x) (OpReg y_reg) x_op
2270   -- in
2271   return (CondCode False cond code)
2272 #endif
2273
2274 #if i386_TARGET_ARCH
2275 condFltCode cond x y
2276   = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
2277   (x_reg, x_code) <- getNonClobberedReg x
2278   (y_reg, y_code) <- getSomeReg y
2279   let
2280         code = x_code `appOL` y_code `snocOL`
2281                 GCMP cond x_reg y_reg
2282   -- The GCMP insn does the test and sets the zero flag if comparable
2283   -- and true.  Hence we always supply EQQ as the condition to test.
2284   return (CondCode True EQQ code)
2285 #endif /* i386_TARGET_ARCH */
2286
2287 #if x86_64_TARGET_ARCH
2288 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
2289 -- an operand, but the right must be a reg.  We can probably do better
2290 -- than this general case...
2291 condFltCode cond x y = do
2292   (x_reg, x_code) <- getNonClobberedReg x
2293   (y_op, y_code) <- getOperand y
2294   let
2295         code = x_code `appOL`
2296                y_code `snocOL`
2297                   CMP (cmmExprRep x) y_op (OpReg x_reg)
2298         -- NB(1): we need to use the unsigned comparison operators on the
2299         -- result of this comparison.
2300   -- in
2301   return (CondCode True (condToUnsigned cond) code)
2302 #endif
2303
2304 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2305
2306 #if sparc_TARGET_ARCH
2307
2308 condIntCode cond x (CmmLit (CmmInt y rep))
2309   | fits13Bits y
2310   = do
2311        (src1, code) <- getSomeReg x
2312        let
2313            src2 = ImmInt (fromInteger y)
2314            code' = code `snocOL` SUB False True src1 (RIImm src2) g0
2315        return (CondCode False cond code')
2316
2317 condIntCode cond x y = do
2318     (src1, code1) <- getSomeReg x
2319     (src2, code2) <- getSomeReg y
2320     let
2321         code__2 = code1 `appOL` code2 `snocOL`
2322                   SUB False True src1 (RIReg src2) g0
2323     return (CondCode False cond code__2)
2324
2325 -----------
2326 condFltCode cond x y = do
2327     (src1, code1) <- getSomeReg x
2328     (src2, code2) <- getSomeReg y
2329     tmp <- getNewRegNat F64
2330     let
2331         promote x = FxTOy F32 F64 x tmp
2332
2333         pk1   = cmmExprRep x
2334         pk2   = cmmExprRep y
2335
2336         code__2 =
2337                 if pk1 == pk2 then
2338                     code1 `appOL` code2 `snocOL`
2339                     FCMP True pk1 src1 src2
2340                 else if pk1 == F32 then
2341                     code1 `snocOL` promote src1 `appOL` code2 `snocOL`
2342                     FCMP True F64 tmp src2
2343                 else
2344                     code1 `appOL` code2 `snocOL` promote src2 `snocOL`
2345                     FCMP True F64 src1 tmp
2346     return (CondCode True cond code__2)
2347
2348 #endif /* sparc_TARGET_ARCH */
2349
2350 #if powerpc_TARGET_ARCH
2351 --  ###FIXME: I16 and I8!
2352 condIntCode cond x (CmmLit (CmmInt y rep))
2353   | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
2354   = do
2355         (src1, code) <- getSomeReg x
2356         let
2357             code' = code `snocOL`
2358                 (if condUnsigned cond then CMPL else CMP) I32 src1 (RIImm src2)
2359         return (CondCode False cond code')
2360
2361 condIntCode cond x y = do
2362     (src1, code1) <- getSomeReg x
2363     (src2, code2) <- getSomeReg y
2364     let
2365         code' = code1 `appOL` code2 `snocOL`
2366                   (if condUnsigned cond then CMPL else CMP) I32 src1 (RIReg src2)
2367     return (CondCode False cond code')
2368
2369 condFltCode cond x y = do
2370     (src1, code1) <- getSomeReg x
2371     (src2, code2) <- getSomeReg y
2372     let
2373         code'  = code1 `appOL` code2 `snocOL` FCMP src1 src2
2374         code'' = case cond of -- twiddle CR to handle unordered case
2375                     GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
2376                     LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
2377                     _ -> code'
2378                  where
2379                     ltbit = 0 ; eqbit = 2 ; gtbit = 1
2380     return (CondCode True cond code'')
2381
2382 #endif /* powerpc_TARGET_ARCH */
2383
2384 -- -----------------------------------------------------------------------------
2385 -- Generating assignments
2386
2387 -- Assignments are really at the heart of the whole code generation
2388 -- business.  Almost all top-level nodes of any real importance are
2389 -- assignments, which correspond to loads, stores, or register
2390 -- transfers.  If we're really lucky, some of the register transfers
2391 -- will go away, because we can use the destination register to
2392 -- complete the code generation for the right hand side.  This only
2393 -- fails when the right hand side is forced into a fixed register
2394 -- (e.g. the result of a call).
2395
2396 assignMem_IntCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2397 assignReg_IntCode :: MachRep -> CmmReg  -> CmmExpr -> NatM InstrBlock
2398
2399 assignMem_FltCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2400 assignReg_FltCode :: MachRep -> CmmReg  -> CmmExpr -> NatM InstrBlock
2401
2402 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2403
2404 #if alpha_TARGET_ARCH
2405
2406 assignIntCode pk (CmmLoad dst _) src
2407   = getNewRegNat IntRep             `thenNat` \ tmp ->
2408     getAmode dst                    `thenNat` \ amode ->
2409     getRegister src                 `thenNat` \ register ->
2410     let
2411         code1   = amodeCode amode []
2412         dst__2  = amodeAddr amode
2413         code2   = registerCode register tmp []
2414         src__2  = registerName register tmp
2415         sz      = primRepToSize pk
2416         code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2417     in
2418     return code__2
2419
2420 assignIntCode pk dst src
2421   = getRegister dst                         `thenNat` \ register1 ->
2422     getRegister src                         `thenNat` \ register2 ->
2423     let
2424         dst__2  = registerName register1 zeroh
2425         code    = registerCode register2 dst__2
2426         src__2  = registerName register2 dst__2
2427         code__2 = if isFixed register2
2428                   then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2)
2429                   else code
2430     in
2431     return code__2
2432
2433 #endif /* alpha_TARGET_ARCH */
2434
2435 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2436
2437 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2438
2439 -- integer assignment to memory
2440
2441 -- specific case of adding/subtracting an integer to a particular address.
2442 -- ToDo: catch other cases where we can use an operation directly on a memory
2443 -- address.
2444 assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _,
2445                                                  CmmLit (CmmInt i _)])
2446    | addr == addr2, pk /= I64 || not (is64BitInteger i),
2447      Just instr <- check op
2448    = do Amode amode code_addr <- getAmode addr
2449         let code = code_addr `snocOL`
2450                    instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode)
2451         return code
2452    where
2453         check (MO_Add _) = Just ADD
2454         check (MO_Sub _) = Just SUB
2455         check _ = Nothing
2456         -- ToDo: more?
2457
2458 -- general case
2459 assignMem_IntCode pk addr src = do
2460     Amode addr code_addr <- getAmode addr
2461     (code_src, op_src)   <- get_op_RI src
2462     let
2463         code = code_src `appOL`
2464                code_addr `snocOL`
2465                   MOV pk op_src (OpAddr addr)
2466         -- NOTE: op_src is stable, so it will still be valid
2467         -- after code_addr.  This may involve the introduction
2468         -- of an extra MOV to a temporary register, but we hope
2469         -- the register allocator will get rid of it.
2470     --
2471     return code
2472   where
2473     get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand)   -- code, operator
2474     get_op_RI (CmmLit lit) | not (is64BitLit lit)
2475       = return (nilOL, OpImm (litToImm lit))
2476     get_op_RI op
2477       = do (reg,code) <- getNonClobberedReg op
2478            return (code, OpReg reg)
2479
2480
2481 -- Assign; dst is a reg, rhs is mem
2482 assignReg_IntCode pk reg (CmmLoad src _) = do
2483   load_code <- intLoadCode (MOV pk) src
2484   return (load_code (getRegisterReg reg))
2485
2486 -- dst is a reg, but src could be anything
2487 assignReg_IntCode pk reg src = do
2488   code <- getAnyReg src
2489   return (code (getRegisterReg reg))
2490
2491 #endif /* i386_TARGET_ARCH */
2492
2493 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2494
2495 #if sparc_TARGET_ARCH
2496
2497 assignMem_IntCode pk addr src = do
2498     (srcReg, code) <- getSomeReg src
2499     Amode dstAddr addr_code <- getAmode addr
2500     return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2501
2502 assignReg_IntCode pk reg src = do
2503     r <- getRegister src
2504     return $ case r of
2505         Any _ code         -> code dst
2506         Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg dst) freg
2507     where
2508       dst = getRegisterReg reg
2509
2510
2511 #endif /* sparc_TARGET_ARCH */
2512
2513 #if powerpc_TARGET_ARCH
2514
2515 assignMem_IntCode pk addr src = do
2516     (srcReg, code) <- getSomeReg src
2517     Amode dstAddr addr_code <- getAmode addr
2518     return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2519
2520 -- dst is a reg, but src could be anything
2521 assignReg_IntCode pk reg src
2522     = do
2523         r <- getRegister src
2524         return $ case r of
2525             Any _ code         -> code dst
2526             Fixed _ freg fcode -> fcode `snocOL` MR dst freg
2527     where
2528         dst = getRegisterReg reg
2529
2530 #endif /* powerpc_TARGET_ARCH */
2531
2532
2533 -- -----------------------------------------------------------------------------
2534 -- Floating-point assignments
2535
2536 #if alpha_TARGET_ARCH
2537
2538 assignFltCode pk (CmmLoad dst _) src
2539   = getNewRegNat pk                 `thenNat` \ tmp ->
2540     getAmode dst                    `thenNat` \ amode ->
2541     getRegister src                         `thenNat` \ register ->
2542     let
2543         code1   = amodeCode amode []
2544         dst__2  = amodeAddr amode
2545         code2   = registerCode register tmp []
2546         src__2  = registerName register tmp
2547         sz      = primRepToSize pk
2548         code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2549     in
2550     return code__2
2551
2552 assignFltCode pk dst src
2553   = getRegister dst                         `thenNat` \ register1 ->
2554     getRegister src                         `thenNat` \ register2 ->
2555     let
2556         dst__2  = registerName register1 zeroh
2557         code    = registerCode register2 dst__2
2558         src__2  = registerName register2 dst__2
2559         code__2 = if isFixed register2
2560                   then code . mkSeqInstr (FMOV src__2 dst__2)
2561                   else code
2562     in
2563     return code__2
2564
2565 #endif /* alpha_TARGET_ARCH */
2566
2567 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2568
2569 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2570
2571 -- Floating point assignment to memory
2572 assignMem_FltCode pk addr src = do
2573   (src_reg, src_code) <- getNonClobberedReg src
2574   Amode addr addr_code <- getAmode addr
2575   let
2576         code = src_code `appOL`
2577                addr_code `snocOL`
2578                 IF_ARCH_i386(GST pk src_reg addr,
2579                              MOV pk (OpReg src_reg) (OpAddr addr))
2580   return code
2581
2582 -- Floating point assignment to a register/temporary
2583 assignReg_FltCode pk reg src = do
2584   src_code <- getAnyReg src
2585   return (src_code (getRegisterReg reg))
2586
2587 #endif /* i386_TARGET_ARCH */
2588
2589 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2590
2591 #if sparc_TARGET_ARCH
2592
2593 -- Floating point assignment to memory
2594 assignMem_FltCode pk addr src = do
2595     Amode dst__2 code1 <- getAmode addr
2596     (src__2, code2) <- getSomeReg src
2597     tmp1 <- getNewRegNat pk
2598     let
2599         pk__2   = cmmExprRep src
2600         code__2 = code1 `appOL` code2 `appOL`
2601             if   pk == pk__2
2602             then unitOL (ST pk src__2 dst__2)
2603             else toOL [FxTOy pk__2 pk src__2 tmp1, ST pk tmp1 dst__2]
2604     return code__2
2605
2606 -- Floating point assignment to a register/temporary
2607 -- ToDo: Verify correctness
2608 assignReg_FltCode pk reg src = do
2609     r <- getRegister src
2610     v1 <- getNewRegNat pk
2611     return $ case r of
2612         Any _ code         -> code dst
2613         Fixed _ freg fcode -> fcode `snocOL` FMOV pk freg v1
2614     where
2615       dst = getRegisterReg reg
2616
2617 #endif /* sparc_TARGET_ARCH */
2618
2619 #if powerpc_TARGET_ARCH
2620
2621 -- Easy, isn't it?
2622 assignMem_FltCode = assignMem_IntCode
2623 assignReg_FltCode = assignReg_IntCode
2624
2625 #endif /* powerpc_TARGET_ARCH */
2626
2627
2628 -- -----------------------------------------------------------------------------
2629 -- Generating an non-local jump
2630
2631 -- (If applicable) Do not fill the delay slots here; you will confuse the
2632 -- register allocator.
2633
2634 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
2635
2636 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2637
2638 #if alpha_TARGET_ARCH
2639
2640 genJump (CmmLabel lbl)
2641   | isAsmTemp lbl = returnInstr (BR target)
2642   | otherwise     = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0]
2643   where
2644     target = ImmCLbl lbl
2645
2646 genJump tree
2647   = getRegister tree                `thenNat` \ register ->
2648     getNewRegNat PtrRep             `thenNat` \ tmp ->
2649     let
2650         dst    = registerName register pv
2651         code   = registerCode register pv
2652         target = registerName register pv
2653     in
2654     if isFixed register then
2655         returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0]
2656     else
2657     return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0))
2658
2659 #endif /* alpha_TARGET_ARCH */
2660
2661 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2662
2663 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2664
2665 genJump (CmmLoad mem pk) = do
2666   Amode target code <- getAmode mem
2667   return (code `snocOL` JMP (OpAddr target))
2668
2669 genJump (CmmLit lit) = do
2670   return (unitOL (JMP (OpImm (litToImm lit))))
2671
2672 genJump expr = do
2673   (reg,code) <- getSomeReg expr
2674   return (code `snocOL` JMP (OpReg reg))
2675
2676 #endif /* i386_TARGET_ARCH */
2677
2678 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2679
2680 #if sparc_TARGET_ARCH
2681
2682 genJump (CmmLit (CmmLabel lbl))
2683   = return (toOL [CALL (Left target) 0 True, NOP])
2684   where
2685     target = ImmCLbl lbl
2686
2687 genJump tree
2688   = do
2689         (target, code) <- getSomeReg tree
2690         return (code `snocOL` JMP (AddrRegReg target g0)  `snocOL` NOP)
2691
2692 #endif /* sparc_TARGET_ARCH */
2693
2694 #if powerpc_TARGET_ARCH
2695 genJump (CmmLit (CmmLabel lbl))
2696   = return (unitOL $ JMP lbl)
2697
2698 genJump tree
2699   = do
2700         (target,code) <- getSomeReg tree
2701         return (code `snocOL` MTCTR target `snocOL` BCTR [])
2702 #endif /* powerpc_TARGET_ARCH */
2703
2704
2705 -- -----------------------------------------------------------------------------
2706 --  Unconditional branches
2707
2708 genBranch :: BlockId -> NatM InstrBlock
2709
2710 genBranch = return . toOL . mkBranchInstr
2711
2712 -- -----------------------------------------------------------------------------
2713 --  Conditional jumps
2714
2715 {-
2716 Conditional jumps are always to local labels, so we can use branch
2717 instructions.  We peek at the arguments to decide what kind of
2718 comparison to do.
2719
2720 ALPHA: For comparisons with 0, we're laughing, because we can just do
2721 the desired conditional branch.
2722
2723 I386: First, we have to ensure that the condition
2724 codes are set according to the supplied comparison operation.
2725
2726 SPARC: First, we have to ensure that the condition codes are set
2727 according to the supplied comparison operation.  We generate slightly
2728 different code for floating point comparisons, because a floating
2729 point operation cannot directly precede a @BF@.  We assume the worst
2730 and fill that slot with a @NOP@.
2731
2732 SPARC: Do not fill the delay slots here; you will confuse the register
2733 allocator.
2734 -}
2735
2736
2737 genCondJump
2738     :: BlockId      -- the branch target
2739     -> CmmExpr      -- the condition on which to branch
2740     -> NatM InstrBlock
2741
2742 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2743
2744 #if alpha_TARGET_ARCH
2745
2746 genCondJump id (StPrim op [x, StInt 0])
2747   = getRegister x                           `thenNat` \ register ->
2748     getNewRegNat (registerRep register)
2749                                     `thenNat` \ tmp ->
2750     let
2751         code   = registerCode register tmp
2752         value  = registerName register tmp
2753         pk     = registerRep register
2754         target = ImmCLbl lbl
2755     in
2756     returnSeq code [BI (cmpOp op) value target]
2757   where
2758     cmpOp CharGtOp = GTT
2759     cmpOp CharGeOp = GE
2760     cmpOp CharEqOp = EQQ
2761     cmpOp CharNeOp = NE
2762     cmpOp CharLtOp = LTT
2763     cmpOp CharLeOp = LE
2764     cmpOp IntGtOp = GTT
2765     cmpOp IntGeOp = GE
2766     cmpOp IntEqOp = EQQ
2767     cmpOp IntNeOp = NE
2768     cmpOp IntLtOp = LTT
2769     cmpOp IntLeOp = LE
2770     cmpOp WordGtOp = NE
2771     cmpOp WordGeOp = ALWAYS
2772     cmpOp WordEqOp = EQQ
2773     cmpOp WordNeOp = NE
2774     cmpOp WordLtOp = NEVER
2775     cmpOp WordLeOp = EQQ
2776     cmpOp AddrGtOp = NE
2777     cmpOp AddrGeOp = ALWAYS
2778     cmpOp AddrEqOp = EQQ
2779     cmpOp AddrNeOp = NE
2780     cmpOp AddrLtOp = NEVER
2781     cmpOp AddrLeOp = EQQ
2782
2783 genCondJump lbl (StPrim op [x, StDouble 0.0])
2784   = getRegister x                           `thenNat` \ register ->
2785     getNewRegNat (registerRep register)
2786                                     `thenNat` \ tmp ->
2787     let
2788         code   = registerCode register tmp
2789         value  = registerName register tmp
2790         pk     = registerRep register
2791         target = ImmCLbl lbl
2792     in
2793     return (code . mkSeqInstr (BF (cmpOp op) value target))
2794   where
2795     cmpOp FloatGtOp = GTT
2796     cmpOp FloatGeOp = GE
2797     cmpOp FloatEqOp = EQQ
2798     cmpOp FloatNeOp = NE
2799     cmpOp FloatLtOp = LTT
2800     cmpOp FloatLeOp = LE
2801     cmpOp DoubleGtOp = GTT
2802     cmpOp DoubleGeOp = GE
2803     cmpOp DoubleEqOp = EQQ
2804     cmpOp DoubleNeOp = NE
2805     cmpOp DoubleLtOp = LTT
2806     cmpOp DoubleLeOp = LE
2807
2808 genCondJump lbl (StPrim op [x, y])
2809   | fltCmpOp op
2810   = trivialFCode pr instr x y       `thenNat` \ register ->
2811     getNewRegNat F64                `thenNat` \ tmp ->
2812     let
2813         code   = registerCode register tmp
2814         result = registerName register tmp
2815         target = ImmCLbl lbl
2816     in
2817     return (code . mkSeqInstr (BF cond result target))
2818   where
2819     pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
2820
2821     fltCmpOp op = case op of
2822         FloatGtOp -> True
2823         FloatGeOp -> True
2824         FloatEqOp -> True
2825         FloatNeOp -> True
2826         FloatLtOp -> True
2827         FloatLeOp -> True
2828         DoubleGtOp -> True
2829         DoubleGeOp -> True
2830         DoubleEqOp -> True
2831         DoubleNeOp -> True
2832         DoubleLtOp -> True
2833         DoubleLeOp -> True
2834         _ -> False
2835     (instr, cond) = case op of
2836         FloatGtOp -> (FCMP TF LE, EQQ)
2837         FloatGeOp -> (FCMP TF LTT, EQQ)
2838         FloatEqOp -> (FCMP TF EQQ, NE)
2839         FloatNeOp -> (FCMP TF EQQ, EQQ)
2840         FloatLtOp -> (FCMP TF LTT, NE)
2841         FloatLeOp -> (FCMP TF LE, NE)
2842         DoubleGtOp -> (FCMP TF LE, EQQ)
2843         DoubleGeOp -> (FCMP TF LTT, EQQ)
2844         DoubleEqOp -> (FCMP TF EQQ, NE)
2845         DoubleNeOp -> (FCMP TF EQQ, EQQ)
2846         DoubleLtOp -> (FCMP TF LTT, NE)
2847         DoubleLeOp -> (FCMP TF LE, NE)
2848
2849 genCondJump lbl (StPrim op [x, y])
2850   = trivialCode instr x y           `thenNat` \ register ->
2851     getNewRegNat IntRep             `thenNat` \ tmp ->
2852     let
2853         code   = registerCode register tmp
2854         result = registerName register tmp
2855         target = ImmCLbl lbl
2856     in
2857     return (code . mkSeqInstr (BI cond result target))
2858   where
2859     (instr, cond) = case op of
2860         CharGtOp -> (CMP LE, EQQ)
2861         CharGeOp -> (CMP LTT, EQQ)
2862         CharEqOp -> (CMP EQQ, NE)
2863         CharNeOp -> (CMP EQQ, EQQ)
2864         CharLtOp -> (CMP LTT, NE)
2865         CharLeOp -> (CMP LE, NE)
2866         IntGtOp -> (CMP LE, EQQ)
2867         IntGeOp -> (CMP LTT, EQQ)
2868         IntEqOp -> (CMP EQQ, NE)
2869         IntNeOp -> (CMP EQQ, EQQ)
2870         IntLtOp -> (CMP LTT, NE)
2871         IntLeOp -> (CMP LE, NE)
2872         WordGtOp -> (CMP ULE, EQQ)
2873         WordGeOp -> (CMP ULT, EQQ)
2874         WordEqOp -> (CMP EQQ, NE)
2875         WordNeOp -> (CMP EQQ, EQQ)
2876         WordLtOp -> (CMP ULT, NE)
2877         WordLeOp -> (CMP ULE, NE)
2878         AddrGtOp -> (CMP ULE, EQQ)
2879         AddrGeOp -> (CMP ULT, EQQ)
2880         AddrEqOp -> (CMP EQQ, NE)
2881         AddrNeOp -> (CMP EQQ, EQQ)
2882         AddrLtOp -> (CMP ULT, NE)
2883         AddrLeOp -> (CMP ULE, NE)
2884
2885 #endif /* alpha_TARGET_ARCH */
2886
2887 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2888
2889 #if i386_TARGET_ARCH
2890
2891 genCondJump id bool = do
2892   CondCode _ cond code <- getCondCode bool
2893   return (code `snocOL` JXX cond id)
2894
2895 #endif
2896
2897 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2898
2899 #if x86_64_TARGET_ARCH
2900
2901 genCondJump id bool = do
2902   CondCode is_float cond cond_code <- getCondCode bool
2903   if not is_float
2904     then
2905         return (cond_code `snocOL` JXX cond id)
2906     else do
2907         lbl <- getBlockIdNat
2908
2909         -- see comment with condFltReg
2910         let code = case cond of
2911                         NE  -> or_unordered
2912                         GU  -> plain_test
2913                         GEU -> plain_test
2914                         _   -> and_ordered
2915
2916             plain_test = unitOL (
2917                   JXX cond id
2918                 )
2919             or_unordered = toOL [
2920                   JXX cond id,
2921                   JXX PARITY id
2922                 ]
2923             and_ordered = toOL [
2924                   JXX PARITY lbl,
2925                   JXX cond id,
2926                   JXX ALWAYS lbl,
2927                   NEWBLOCK lbl
2928                 ]
2929         return (cond_code `appOL` code)
2930
2931 #endif
2932
2933 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2934
2935 #if sparc_TARGET_ARCH
2936
2937 genCondJump (BlockId id) bool = do
2938   CondCode is_float cond code <- getCondCode bool
2939   return (
2940        code `appOL`
2941        toOL (
2942          if   is_float
2943          then [NOP, BF cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2944          else [BI cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2945        )
2946     )
2947
2948 #endif /* sparc_TARGET_ARCH */
2949
2950
2951 #if powerpc_TARGET_ARCH
2952
2953 genCondJump id bool = do
2954   CondCode is_float cond code <- getCondCode bool
2955   return (code `snocOL` BCC cond id)
2956
2957 #endif /* powerpc_TARGET_ARCH */
2958
2959
2960 -- -----------------------------------------------------------------------------
2961 --  Generating C calls
2962
2963 -- Now the biggest nightmare---calls.  Most of the nastiness is buried in
2964 -- @get_arg@, which moves the arguments to the correct registers/stack
2965 -- locations.  Apart from that, the code is easy.
2966 --
2967 -- (If applicable) Do not fill the delay slots here; you will confuse the
2968 -- register allocator.
2969
2970 genCCall
2971     :: CmmCallTarget            -- function to call
2972     -> CmmFormals               -- where to put the result
2973     -> CmmActuals               -- arguments (of mixed type)
2974     -> NatM InstrBlock
2975
2976 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2977
2978 #if alpha_TARGET_ARCH
2979
2980 ccallResultRegs =
2981
2982 genCCall fn cconv result_regs args
2983   = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args
2984                           `thenNat` \ ((unused,_), argCode) ->
2985     let
2986         nRegs = length allArgRegs - length unused
2987         code = asmSeqThen (map ($ []) argCode)
2988     in
2989         returnSeq code [
2990             LDA pv (AddrImm (ImmLab (ptext fn))),
2991             JSR ra (AddrReg pv) nRegs,
2992             LDGP gp (AddrReg ra)]
2993   where
2994     ------------------------
2995     {-  Try to get a value into a specific register (or registers) for
2996         a call.  The first 6 arguments go into the appropriate
2997         argument register (separate registers for integer and floating
2998         point arguments, but used in lock-step), and the remaining
2999         arguments are dumped to the stack, beginning at 0(sp).  Our
3000         first argument is a pair of the list of remaining argument
3001         registers to be assigned for this call and the next stack
3002         offset to use for overflowing arguments.  This way,
3003         @get_Arg@ can be applied to all of a call's arguments using
3004         @mapAccumLNat@.
3005     -}
3006     get_arg
3007         :: ([(Reg,Reg)], Int)   -- Argument registers and stack offset (accumulator)
3008         -> StixTree             -- Current argument
3009         -> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code
3010
3011     -- We have to use up all of our argument registers first...
3012
3013     get_arg ((iDst,fDst):dsts, offset) arg
3014       = getRegister arg                     `thenNat` \ register ->
3015         let
3016             reg  = if isFloatingRep pk then fDst else iDst
3017             code = registerCode register reg
3018             src  = registerName register reg
3019             pk   = registerRep register
3020         in
3021         return (
3022             if isFloatingRep pk then
3023                 ((dsts, offset), if isFixed register then
3024                     code . mkSeqInstr (FMOV src fDst)
3025                     else code)
3026             else
3027                 ((dsts, offset), if isFixed register then
3028                     code . mkSeqInstr (OR src (RIReg src) iDst)
3029                     else code))
3030
3031     -- Once we have run out of argument registers, we move to the
3032     -- stack...
3033
3034     get_arg ([], offset) arg
3035       = getRegister arg                 `thenNat` \ register ->
3036         getNewRegNat (registerRep register)
3037                                         `thenNat` \ tmp ->
3038         let
3039             code = registerCode register tmp
3040             src  = registerName register tmp
3041             pk   = registerRep register
3042             sz   = primRepToSize pk
3043         in
3044         return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset)))
3045
3046 #endif /* alpha_TARGET_ARCH */
3047
3048 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3049
3050 #if i386_TARGET_ARCH
3051
3052 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
3053         -- write barrier compiles to no code on x86/x86-64;
3054         -- we keep it this long in order to prevent earlier optimisations.
3055
3056 -- we only cope with a single result for foreign calls
3057 genCCall (CmmPrim op) [CmmHinted r _] args = do
3058   case op of
3059         MO_F32_Sqrt -> actuallyInlineFloatOp F32  (GSQRT F32) args
3060         MO_F64_Sqrt -> actuallyInlineFloatOp F64 (GSQRT F64) args
3061
3062         MO_F32_Sin  -> actuallyInlineFloatOp F32  (GSIN F32) args
3063         MO_F64_Sin  -> actuallyInlineFloatOp F64 (GSIN F64) args
3064
3065         MO_F32_Cos  -> actuallyInlineFloatOp F32  (GCOS F32) args
3066         MO_F64_Cos  -> actuallyInlineFloatOp F64 (GCOS F64) args
3067
3068         MO_F32_Tan  -> actuallyInlineFloatOp F32  (GTAN F32) args
3069         MO_F64_Tan  -> actuallyInlineFloatOp F64 (GTAN F64) args
3070
3071         other_op    -> outOfLineFloatOp op r args
3072  where
3073   actuallyInlineFloatOp rep instr [CmmHinted x _]
3074         = do res <- trivialUFCode rep instr x
3075              any <- anyReg res
3076              return (any (getRegisterReg (CmmLocal r)))
3077
3078 genCCall target dest_regs args = do
3079     let
3080         sizes               = map (arg_size . cmmExprRep . hintlessCmm) (reverse args)
3081 #if !darwin_TARGET_OS
3082         tot_arg_size        = sum sizes
3083 #else
3084         raw_arg_size        = sum sizes
3085         tot_arg_size        = roundTo 16 raw_arg_size
3086         arg_pad_size        = tot_arg_size - raw_arg_size
3087     delta0 <- getDeltaNat
3088     setDeltaNat (delta0 - arg_pad_size)
3089 #endif
3090
3091     push_codes <- mapM push_arg (reverse args)
3092     delta <- getDeltaNat
3093
3094     -- in
3095     -- deal with static vs dynamic call targets
3096     (callinsns,cconv) <-
3097       case target of
3098         -- CmmPrim -> ...
3099         CmmCallee (CmmLit (CmmLabel lbl)) conv
3100            -> -- ToDo: stdcall arg sizes
3101               return (unitOL (CALL (Left fn_imm) []), conv)
3102            where fn_imm = ImmCLbl lbl
3103         CmmCallee expr conv
3104            -> do (dyn_c, dyn_r, dyn_rep) <- get_op expr
3105                  ASSERT(dyn_rep == I32)
3106                   return (dyn_c `snocOL` CALL (Right dyn_r) [], conv)
3107
3108     let push_code
3109 #if darwin_TARGET_OS
3110             | arg_pad_size /= 0
3111             = toOL [SUB I32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
3112                     DELTA (delta0 - arg_pad_size)]
3113               `appOL` concatOL push_codes
3114             | otherwise
3115 #endif
3116             = concatOL push_codes
3117         call = callinsns `appOL`
3118                toOL (
3119                         -- Deallocate parameters after call for ccall;
3120                         -- but not for stdcall (callee does it)
3121                   (if cconv == StdCallConv || tot_arg_size==0 then [] else
3122                    [ADD I32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
3123                   ++
3124                   [DELTA (delta + tot_arg_size)]
3125                )
3126     -- in
3127     setDeltaNat (delta + tot_arg_size)
3128
3129     let
3130         -- assign the results, if necessary
3131         assign_code []     = nilOL
3132         assign_code [CmmHinted dest _hint] =
3133           case rep of
3134                 I64 -> toOL [MOV I32 (OpReg eax) (OpReg r_dest),
3135                              MOV I32 (OpReg edx) (OpReg r_dest_hi)]
3136                 F32 -> unitOL (GMOV fake0 r_dest)
3137                 F64 -> unitOL (GMOV fake0 r_dest)
3138                 rep -> unitOL (MOV rep (OpReg eax) (OpReg r_dest))
3139           where
3140                 r_dest_hi = getHiVRegFromLo r_dest
3141                 rep = localRegRep dest
3142                 r_dest = getRegisterReg (CmmLocal dest)
3143         assign_code many = panic "genCCall.assign_code many"
3144
3145     return (push_code `appOL`
3146             call `appOL`
3147             assign_code dest_regs)
3148
3149   where
3150     arg_size F64 = 8
3151     arg_size F32 = 4
3152     arg_size I64 = 8
3153     arg_size _   = 4
3154
3155     roundTo a x | x `mod` a == 0 = x
3156                 | otherwise = x + a - (x `mod` a)
3157
3158
3159     push_arg :: (CmmHinted CmmExpr){-current argument-}
3160                     -> NatM InstrBlock  -- code
3161
3162     push_arg (CmmHinted arg _hint) -- we don't need the hints on x86
3163       | arg_rep == I64 = do
3164         ChildCode64 code r_lo <- iselExpr64 arg
3165         delta <- getDeltaNat
3166         setDeltaNat (delta - 8)
3167         let
3168             r_hi = getHiVRegFromLo r_lo
3169         -- in
3170         return (       code `appOL`
3171                        toOL [PUSH I32 (OpReg r_hi), DELTA (delta - 4),
3172                              PUSH I32 (OpReg r_lo), DELTA (delta - 8),
3173                              DELTA (delta-8)]
3174             )
3175
3176       | otherwise = do
3177         (code, reg, sz) <- get_op arg
3178         delta <- getDeltaNat
3179         let size = arg_size sz
3180         setDeltaNat (delta-size)
3181         if (case sz of F64 -> True; F32 -> True; _ -> False)
3182            then return (code `appOL`
3183                         toOL [SUB I32 (OpImm (ImmInt size)) (OpReg esp),
3184                               DELTA (delta-size),
3185                               GST sz reg (AddrBaseIndex (EABaseReg esp)
3186                                                         EAIndexNone
3187                                                         (ImmInt 0))]
3188                        )
3189            else return (code `snocOL`
3190                         PUSH I32 (OpReg reg) `snocOL`
3191                         DELTA (delta-size)
3192                        )
3193       where
3194          arg_rep = cmmExprRep arg
3195
3196     ------------
3197     get_op :: CmmExpr -> NatM (InstrBlock, Reg, MachRep) -- code, reg, size
3198     get_op op = do
3199         (reg,code) <- getSomeReg op
3200         return (code, reg, cmmExprRep op)
3201
3202 #endif /* i386_TARGET_ARCH */
3203
3204 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3205
3206 outOfLineFloatOp :: CallishMachOp -> CmmFormalWithoutKind -> CmmActuals
3207   -> NatM InstrBlock
3208 outOfLineFloatOp mop res args
3209   = do
3210       dflags <- getDynFlagsNat
3211       targetExpr <- cmmMakeDynamicReference dflags addImportNat CallReference lbl
3212       let target = CmmCallee targetExpr CCallConv
3213
3214       if localRegRep res == F64
3215         then
3216           stmtToInstrs (CmmCall target [CmmHinted res FloatHint] args CmmUnsafe CmmMayReturn)
3217         else do
3218           uq <- getUniqueNat
3219           let
3220             tmp = LocalReg uq F64 GCKindNonPtr
3221           -- in
3222           code1 <- stmtToInstrs (CmmCall target [CmmHinted tmp FloatHint] args CmmUnsafe CmmMayReturn)
3223           code2 <- stmtToInstrs (CmmAssign (CmmLocal res) (CmmReg (CmmLocal tmp)))
3224           return (code1 `appOL` code2)
3225   where
3226         lbl = mkForeignLabel fn Nothing False
3227
3228         fn = case mop of
3229               MO_F32_Sqrt  -> FSLIT("sqrtf")
3230               MO_F32_Sin   -> FSLIT("sinf")
3231               MO_F32_Cos   -> FSLIT("cosf")
3232               MO_F32_Tan   -> FSLIT("tanf")
3233               MO_F32_Exp   -> FSLIT("expf")
3234               MO_F32_Log   -> FSLIT("logf")
3235
3236               MO_F32_Asin  -> FSLIT("asinf")
3237               MO_F32_Acos  -> FSLIT("acosf")
3238               MO_F32_Atan  -> FSLIT("atanf")
3239
3240               MO_F32_Sinh  -> FSLIT("sinhf")
3241               MO_F32_Cosh  -> FSLIT("coshf")
3242               MO_F32_Tanh  -> FSLIT("tanhf")
3243               MO_F32_Pwr   -> FSLIT("powf")
3244
3245               MO_F64_Sqrt  -> FSLIT("sqrt")
3246               MO_F64_Sin   -> FSLIT("sin")
3247               MO_F64_Cos   -> FSLIT("cos")
3248               MO_F64_Tan   -> FSLIT("tan")
3249               MO_F64_Exp   -> FSLIT("exp")
3250               MO_F64_Log   -> FSLIT("log")
3251
3252               MO_F64_Asin  -> FSLIT("asin")
3253               MO_F64_Acos  -> FSLIT("acos")
3254               MO_F64_Atan  -> FSLIT("atan")
3255
3256               MO_F64_Sinh  -> FSLIT("sinh")
3257               MO_F64_Cosh  -> FSLIT("cosh")
3258               MO_F64_Tanh  -> FSLIT("tanh")
3259               MO_F64_Pwr   -> FSLIT("pow")
3260
3261 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
3262
3263 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3264
3265 #if x86_64_TARGET_ARCH
3266
3267 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
3268         -- write barrier compiles to no code on x86/x86-64;
3269         -- we keep it this long in order to prevent earlier optimisations.
3270
3271
3272 genCCall (CmmPrim op) [CmmHinted r _] args =
3273   outOfLineFloatOp op r args
3274
3275 genCCall target dest_regs args = do
3276
3277         -- load up the register arguments
3278     (stack_args, aregs, fregs, load_args_code)
3279          <- load_args args allArgRegs allFPArgRegs nilOL
3280
3281     let
3282         fp_regs_used  = reverse (drop (length fregs) (reverse allFPArgRegs))
3283         int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
3284         arg_regs = [eax] ++ int_regs_used ++ fp_regs_used
3285                 -- for annotating the call instruction with
3286
3287         sse_regs = length fp_regs_used
3288
3289         tot_arg_size = arg_size * length stack_args
3290
3291         -- On entry to the called function, %rsp should be aligned
3292         -- on a 16-byte boundary +8 (i.e. the first stack arg after
3293         -- the return address is 16-byte aligned).  In STG land
3294         -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
3295         -- need to make sure we push a multiple of 16-bytes of args,
3296         -- plus the return address, to get the correct alignment.
3297         -- Urg, this is hard.  We need to feed the delta back into
3298         -- the arg pushing code.
3299     (real_size, adjust_rsp) <-
3300         if tot_arg_size `rem` 16 == 0
3301             then return (tot_arg_size, nilOL)
3302             else do -- we need to adjust...
3303                 delta <- getDeltaNat
3304                 setDeltaNat (delta-8)
3305                 return (tot_arg_size+8, toOL [
3306                                 SUB I64 (OpImm (ImmInt 8)) (OpReg rsp),
3307                                 DELTA (delta-8)
3308                         ])
3309
3310         -- push the stack args, right to left
3311     push_code <- push_args (reverse stack_args) nilOL
3312     delta <- getDeltaNat
3313
3314     -- deal with static vs dynamic call targets
3315     (callinsns,cconv) <-
3316       case target of
3317         -- CmmPrim -> ...
3318         CmmCallee (CmmLit (CmmLabel lbl)) conv
3319            -> -- ToDo: stdcall arg sizes
3320               return (unitOL (CALL (Left fn_imm) arg_regs), conv)
3321            where fn_imm = ImmCLbl lbl
3322         CmmCallee expr conv
3323            -> do (dyn_r, dyn_c) <- getSomeReg expr
3324                  return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
3325
3326     let
3327         -- The x86_64 ABI requires us to set %al to the number of SSE
3328         -- registers that contain arguments, if the called routine
3329         -- is a varargs function.  We don't know whether it's a
3330         -- varargs function or not, so we have to assume it is.
3331         --
3332         -- It's not safe to omit this assignment, even if the number
3333         -- of SSE regs in use is zero.  If %al is larger than 8
3334         -- on entry to a varargs function, seg faults ensue.
3335         assign_eax n = unitOL (MOV I32 (OpImm (ImmInt n)) (OpReg eax))
3336
3337     let call = callinsns `appOL`
3338                toOL (
3339                         -- Deallocate parameters after call for ccall;
3340                         -- but not for stdcall (callee does it)
3341                   (if cconv == StdCallConv || real_size==0 then [] else
3342                    [ADD wordRep (OpImm (ImmInt real_size)) (OpReg esp)])
3343                   ++
3344                   [DELTA (delta + real_size)]
3345                )
3346     -- in
3347     setDeltaNat (delta + real_size)
3348
3349     let
3350         -- assign the results, if necessary
3351         assign_code []     = nilOL
3352         assign_code [CmmHinted dest _hint] =
3353           case rep of
3354                 F32 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3355                 F64 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3356                 rep -> unitOL (MOV rep (OpReg rax) (OpReg r_dest))
3357           where
3358                 rep = localRegRep dest
3359                 r_dest = getRegisterReg (CmmLocal dest)
3360         assign_code many = panic "genCCall.assign_code many"
3361
3362     return (load_args_code      `appOL`
3363             adjust_rsp          `appOL`
3364             push_code           `appOL`
3365             assign_eax sse_regs `appOL`
3366             call                `appOL`
3367             assign_code dest_regs)
3368
3369   where
3370     arg_size = 8 -- always, at the mo
3371
3372     load_args :: [CmmHinted CmmExpr]
3373               -> [Reg]                  -- int regs avail for args
3374               -> [Reg]                  -- FP regs avail for args
3375               -> InstrBlock
3376               -> NatM ([CmmHinted CmmExpr],[Reg],[Reg],InstrBlock)
3377     load_args args [] [] code     =  return (args, [], [], code)
3378         -- no more regs to use
3379     load_args [] aregs fregs code =  return ([], aregs, fregs, code)
3380         -- no more args to push
3381     load_args ((CmmHinted arg hint) : rest) aregs fregs code
3382         | isFloatingRep arg_rep =
3383         case fregs of
3384           [] -> push_this_arg
3385           (r:rs) -> do
3386              arg_code <- getAnyReg arg
3387              load_args rest aregs rs (code `appOL` arg_code r)
3388         | otherwise =
3389         case aregs of
3390           [] -> push_this_arg
3391           (r:rs) -> do
3392              arg_code <- getAnyReg arg
3393              load_args rest rs fregs (code `appOL` arg_code r)
3394         where
3395           arg_rep = cmmExprRep arg
3396
3397           push_this_arg = do
3398             (args',ars,frs,code') <- load_args rest aregs fregs code
3399             return ((CmmHinted arg hint):args', ars, frs, code')
3400
3401     push_args [] code = return code
3402     push_args ((CmmHinted arg hint):rest) code
3403        | isFloatingRep arg_rep = do
3404          (arg_reg, arg_code) <- getSomeReg arg
3405          delta <- getDeltaNat
3406          setDeltaNat (delta-arg_size)
3407          let code' = code `appOL` arg_code `appOL` toOL [
3408                         SUB wordRep (OpImm (ImmInt arg_size)) (OpReg rsp) ,
3409                         DELTA (delta-arg_size),
3410                         MOV arg_rep (OpReg arg_reg) (OpAddr  (spRel 0))]
3411          push_args rest code'
3412
3413        | otherwise = do
3414        -- we only ever generate word-sized function arguments.  Promotion
3415        -- has already happened: our Int8# type is kept sign-extended
3416        -- in an Int#, for example.
3417          ASSERT(arg_rep == I64) return ()
3418          (arg_op, arg_code) <- getOperand arg
3419          delta <- getDeltaNat
3420          setDeltaNat (delta-arg_size)
3421          let code' = code `appOL` toOL [PUSH I64 arg_op,
3422                                         DELTA (delta-arg_size)]
3423          push_args rest code'
3424         where
3425           arg_rep = cmmExprRep arg
3426 #endif
3427
3428 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3429
3430 #if sparc_TARGET_ARCH
3431 {-
3432    The SPARC calling convention is an absolute
3433    nightmare.  The first 6x32 bits of arguments are mapped into
3434    %o0 through %o5, and the remaining arguments are dumped to the
3435    stack, beginning at [%sp+92].  (Note that %o6 == %sp.)
3436
3437    If we have to put args on the stack, move %o6==%sp down by
3438    the number of words to go on the stack, to ensure there's enough space.
3439
3440    According to Fraser and Hanson's lcc book, page 478, fig 17.2,
3441    16 words above the stack pointer is a word for the address of
3442    a structure return value.  I use this as a temporary location
3443    for moving values from float to int regs.  Certainly it isn't
3444    safe to put anything in the 16 words starting at %sp, since
3445    this area can get trashed at any time due to window overflows
3446    caused by signal handlers.
3447
3448    A final complication (if the above isn't enough) is that
3449    we can't blithely calculate the arguments one by one into
3450    %o0 .. %o5.  Consider the following nested calls:
3451
3452        fff a (fff b c)
3453
3454    Naive code moves a into %o0, and (fff b c) into %o1.  Unfortunately
3455    the inner call will itself use %o0, which trashes the value put there
3456    in preparation for the outer call.  Upshot: we need to calculate the
3457    args into temporary regs, and move those to arg regs or onto the
3458    stack only immediately prior to the call proper.  Sigh.
3459 -}
3460
3461 genCCall target dest_regs argsAndHints = do
3462     let
3463         args = map hintlessCmm argsAndHints
3464     argcode_and_vregs <- mapM arg_to_int_vregs args
3465     let
3466         (argcodes, vregss) = unzip argcode_and_vregs
3467         n_argRegs          = length allArgRegs
3468         n_argRegs_used     = min (length vregs) n_argRegs
3469         vregs              = concat vregss
3470     -- deal with static vs dynamic call targets
3471     callinsns <- (case target of
3472         CmmCallee (CmmLit (CmmLabel lbl)) conv -> do
3473                 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3474         CmmCallee expr conv -> do
3475                 (dyn_c, [dyn_r]) <- arg_to_int_vregs expr
3476                 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3477         CmmPrim mop -> do
3478                   (res, reduce) <- outOfLineFloatOp mop
3479                   lblOrMopExpr <- case res of
3480                        Left lbl -> do
3481                             return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3482                        Right mopExpr -> do
3483                             (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
3484                             return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3485                   if reduce then panic "genCCall(sparc): can not reduce" else return lblOrMopExpr
3486
3487       )
3488     let
3489         argcode = concatOL argcodes
3490         (move_sp_down, move_sp_up)
3491            = let diff = length vregs - n_argRegs
3492                  nn   = if odd diff then diff + 1 else diff -- keep 8-byte alignment
3493              in  if   nn <= 0
3494                  then (nilOL, nilOL)
3495                  else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
3496         transfer_code
3497            = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
3498     return (argcode       `appOL`
3499             move_sp_down  `appOL`
3500             transfer_code `appOL`
3501             callinsns     `appOL`
3502             unitOL NOP    `appOL`
3503             move_sp_up)
3504   where
3505      -- move args from the integer vregs into which they have been
3506      -- marshalled, into %o0 .. %o5, and the rest onto the stack.
3507      move_final :: [Reg] -> [Reg] -> Int -> [Instr]
3508
3509      move_final [] _ offset          -- all args done
3510         = []
3511
3512      move_final (v:vs) [] offset     -- out of aregs; move to stack
3513         = ST I32 v (spRel offset)
3514           : move_final vs [] (offset+1)
3515
3516      move_final (v:vs) (a:az) offset -- move into an arg (%o[0..5]) reg
3517         = OR False g0 (RIReg v) a
3518           : move_final vs az offset
3519
3520      -- generate code to calculate an argument, and move it into one
3521      -- or two integer vregs.
3522      arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
3523      arg_to_int_vregs arg
3524         | (cmmExprRep arg) == I64
3525         = do
3526           (ChildCode64 code r_lo) <- iselExpr64 arg
3527           let
3528               r_hi = getHiVRegFromLo r_lo
3529           return (code, [r_hi, r_lo])
3530         | otherwise
3531         = do
3532           (src, code) <- getSomeReg arg
3533           tmp <- getNewRegNat (cmmExprRep arg)
3534           let
3535               pk   = cmmExprRep arg
3536           case pk of
3537              F64 -> do
3538                       v1 <- getNewRegNat I32
3539                       v2 <- getNewRegNat I32
3540                       return (
3541                         code                          `snocOL`
3542                         FMOV F64 src f0                `snocOL`
3543                         ST   F32  f0 (spRel 16)         `snocOL`
3544                         LD   I32  (spRel 16) v1         `snocOL`
3545                         ST   F32  (fPair f0) (spRel 16) `snocOL`
3546                         LD   I32  (spRel 16) v2
3547                         ,
3548                         [v1,v2]
3549                        )
3550              F32 -> do
3551                       v1 <- getNewRegNat I32
3552                       return (
3553                         code                    `snocOL`
3554                         ST   F32  src (spRel 16)  `snocOL`
3555                         LD   I32  (spRel 16) v1
3556                         ,
3557                         [v1]
3558                        )
3559              other -> do
3560                         v1 <- getNewRegNat I32
3561                         return (
3562                           code `snocOL` OR False g0 (RIReg src) v1
3563                           ,
3564                           [v1]
3565                          )
3566 outOfLineFloatOp mop =
3567     do
3568       dflags <- getDynFlagsNat
3569       mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference $
3570                   mkForeignLabel functionName Nothing True
3571       let mopLabelOrExpr = case mopExpr of
3572                         CmmLit (CmmLabel lbl) -> Left lbl
3573                         _ -> Right mopExpr
3574       return (mopLabelOrExpr, reduce)
3575             where
3576                 (reduce, functionName) = case mop of
3577                   MO_F32_Exp    -> (True,  FSLIT("exp"))
3578                   MO_F32_Log    -> (True,  FSLIT("log"))
3579                   MO_F32_Sqrt   -> (True,  FSLIT("sqrt"))
3580
3581                   MO_F32_Sin    -> (True,  FSLIT("sin"))
3582                   MO_F32_Cos    -> (True,  FSLIT("cos"))
3583                   MO_F32_Tan    -> (True,  FSLIT("tan"))
3584
3585                   MO_F32_Asin   -> (True,  FSLIT("asin"))
3586                   MO_F32_Acos   -> (True,  FSLIT("acos"))
3587                   MO_F32_Atan   -> (True,  FSLIT("atan"))
3588
3589                   MO_F32_Sinh   -> (True,  FSLIT("sinh"))
3590                   MO_F32_Cosh   -> (True,  FSLIT("cosh"))
3591                   MO_F32_Tanh   -> (True,  FSLIT("tanh"))
3592
3593                   MO_F64_Exp    -> (False, FSLIT("exp"))
3594                   MO_F64_Log    -> (False, FSLIT("log"))
3595                   MO_F64_Sqrt   -> (False, FSLIT("sqrt"))
3596
3597                   MO_F64_Sin    -> (False, FSLIT("sin"))
3598                   MO_F64_Cos    -> (False, FSLIT("cos"))
3599                   MO_F64_Tan    -> (False, FSLIT("tan"))
3600
3601                   MO_F64_Asin   -> (False, FSLIT("asin"))
3602                   MO_F64_Acos   -> (False, FSLIT("acos"))
3603                   MO_F64_Atan   -> (False, FSLIT("atan"))
3604
3605                   MO_F64_Sinh   -> (False, FSLIT("sinh"))
3606                   MO_F64_Cosh   -> (False, FSLIT("cosh"))
3607                   MO_F64_Tanh   -> (False, FSLIT("tanh"))
3608
3609                   other -> pprPanic "outOfLineFloatOp(sparc) "
3610                                 (pprCallishMachOp mop)
3611
3612 #endif /* sparc_TARGET_ARCH */
3613
3614 #if powerpc_TARGET_ARCH
3615
3616 #if darwin_TARGET_OS || linux_TARGET_OS
3617 {-
3618     The PowerPC calling convention for Darwin/Mac OS X
3619     is described in Apple's document
3620     "Inside Mac OS X - Mach-O Runtime Architecture".
3621
3622     PowerPC Linux uses the System V Release 4 Calling Convention
3623     for PowerPC. It is described in the
3624     "System V Application Binary Interface PowerPC Processor Supplement".
3625
3626     Both conventions are similar:
3627     Parameters may be passed in general-purpose registers starting at r3, in
3628     floating point registers starting at f1, or on the stack.
3629
3630     But there are substantial differences:
3631     * The number of registers used for parameter passing and the exact set of
3632       nonvolatile registers differs (see MachRegs.lhs).
3633     * On Darwin, stack space is always reserved for parameters, even if they are
3634       passed in registers. The called routine may choose to save parameters from
3635       registers to the corresponding space on the stack.
3636     * On Darwin, a corresponding amount of GPRs is skipped when a floating point
3637       parameter is passed in an FPR.
3638     * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
3639       starting with an odd-numbered GPR. It may skip a GPR to achieve this.
3640       Darwin just treats an I64 like two separate I32s (high word first).
3641     * I64 and F64 arguments are 8-byte aligned on the stack for SysV, but only
3642       4-byte aligned like everything else on Darwin.
3643     * The SysV spec claims that F32 is represented as F64 on the stack. GCC on
3644       PowerPC Linux does not agree, so neither do we.
3645
3646     According to both conventions, The parameter area should be part of the
3647     caller's stack frame, allocated in the caller's prologue code (large enough
3648     to hold the parameter lists for all called routines). The NCG already
3649     uses the stack for register spilling, leaving 64 bytes free at the top.
3650     If we need a larger parameter area than that, we just allocate a new stack
3651     frame just before ccalling.
3652 -}
3653
3654
3655 genCCall (CmmPrim MO_WriteBarrier) _ _
3656  = return $ unitOL LWSYNC
3657
3658 genCCall target dest_regs argsAndHints
3659   = ASSERT (not $ any (`elem` [I8,I16]) argReps)
3660         -- we rely on argument promotion in the codeGen
3661     do
3662         (finalStack,passArgumentsCode,usedRegs) <- passArguments
3663                                                         (zip args argReps)
3664                                                         allArgRegs allFPArgRegs
3665                                                         initialStackOffset
3666                                                         (toOL []) []
3667
3668         (labelOrExpr, reduceToF32) <- case target of
3669             CmmCallee (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
3670             CmmCallee expr conv -> return  (Right expr, False)
3671             CmmPrim mop -> outOfLineFloatOp mop
3672
3673         let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
3674             codeAfter = move_sp_up finalStack `appOL` moveResult reduceToF32
3675
3676         case labelOrExpr of
3677             Left lbl -> do
3678                 return (         codeBefore
3679                         `snocOL` BL lbl usedRegs
3680                         `appOL`  codeAfter)
3681             Right dyn -> do
3682                 (dynReg, dynCode) <- getSomeReg dyn
3683                 return (         dynCode
3684                         `snocOL` MTCTR dynReg
3685                         `appOL`  codeBefore
3686                         `snocOL` BCTRL usedRegs
3687                         `appOL`  codeAfter)
3688     where
3689 #if darwin_TARGET_OS
3690         initialStackOffset = 24
3691             -- size of linkage area + size of arguments, in bytes
3692         stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
3693                                        map machRepByteWidth argReps
3694 #elif linux_TARGET_OS
3695         initialStackOffset = 8
3696         stackDelta finalStack = roundTo 16 finalStack
3697 #endif
3698         args = map hintlessCmm argsAndHints
3699         argReps = map cmmExprRep args
3700
3701         roundTo a x | x `mod` a == 0 = x
3702                     | otherwise = x + a - (x `mod` a)
3703
3704         move_sp_down finalStack
3705                | delta > 64 =
3706                         toOL [STU I32 sp (AddrRegImm sp (ImmInt (-delta))),
3707                               DELTA (-delta)]
3708                | otherwise = nilOL
3709                where delta = stackDelta finalStack
3710         move_sp_up finalStack
3711                | delta > 64 =
3712                         toOL [ADD sp sp (RIImm (ImmInt delta)),
3713                               DELTA 0]
3714                | otherwise = nilOL
3715                where delta = stackDelta finalStack
3716
3717
3718         passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
3719         passArguments ((arg,I64):args) gprs fprs stackOffset
3720                accumCode accumUsed =
3721             do
3722                 ChildCode64 code vr_lo <- iselExpr64 arg
3723                 let vr_hi = getHiVRegFromLo vr_lo
3724
3725 #if darwin_TARGET_OS
3726                 passArguments args
3727                               (drop 2 gprs)
3728                               fprs
3729                               (stackOffset+8)
3730                               (accumCode `appOL` code
3731                                     `snocOL` storeWord vr_hi gprs stackOffset
3732                                     `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
3733                               ((take 2 gprs) ++ accumUsed)
3734             where
3735                 storeWord vr (gpr:_) offset = MR gpr vr
3736                 storeWord vr [] offset = ST I32 vr (AddrRegImm sp (ImmInt offset))
3737
3738 #elif linux_TARGET_OS
3739                 let stackOffset' = roundTo 8 stackOffset
3740                     stackCode = accumCode `appOL` code
3741                         `snocOL` ST I32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
3742                         `snocOL` ST I32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
3743                     regCode hireg loreg =
3744                         accumCode `appOL` code
3745                             `snocOL` MR hireg vr_hi
3746                             `snocOL` MR loreg vr_lo
3747
3748                 case gprs of
3749                     hireg : loreg : regs | even (length gprs) ->
3750                         passArguments args regs fprs stackOffset
3751                                       (regCode hireg loreg) (hireg : loreg : accumUsed)
3752                     _skipped : hireg : loreg : regs ->
3753                         passArguments args regs fprs stackOffset
3754                                       (regCode hireg loreg) (hireg : loreg : accumUsed)
3755                     _ -> -- only one or no regs left
3756                         passArguments args [] fprs (stackOffset'+8)
3757                                       stackCode accumUsed
3758 #endif
3759
3760         passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
3761             | reg : _ <- regs = do
3762                 register <- getRegister arg
3763                 let code = case register of
3764                             Fixed _ freg fcode -> fcode `snocOL` MR reg freg
3765                             Any _ acode -> acode reg
3766                 passArguments args
3767                               (drop nGprs gprs)
3768                               (drop nFprs fprs)
3769 #if darwin_TARGET_OS
3770         -- The Darwin ABI requires that we reserve stack slots for register parameters
3771                               (stackOffset + stackBytes)
3772 #elif linux_TARGET_OS
3773         -- ... the SysV ABI doesn't.
3774                               stackOffset
3775 #endif
3776                               (accumCode `appOL` code)
3777                               (reg : accumUsed)
3778             | otherwise = do
3779                 (vr, code) <- getSomeReg arg
3780                 passArguments args
3781                               (drop nGprs gprs)
3782                               (drop nFprs fprs)
3783                               (stackOffset' + stackBytes)
3784                               (accumCode `appOL` code `snocOL` ST rep vr stackSlot)
3785                               accumUsed
3786             where
3787 #if darwin_TARGET_OS
3788         -- stackOffset is at least 4-byte aligned
3789         -- The Darwin ABI is happy with that.
3790                 stackOffset' = stackOffset
3791 #else
3792         -- ... the SysV ABI requires 8-byte alignment for doubles.
3793                 stackOffset' | rep == F64 = roundTo 8 stackOffset
3794                              | otherwise  =           stackOffset
3795 #endif
3796                 stackSlot = AddrRegImm sp (ImmInt stackOffset')
3797                 (nGprs, nFprs, stackBytes, regs) = case rep of
3798                     I32 -> (1, 0, 4, gprs)
3799 #if darwin_TARGET_OS
3800         -- The Darwin ABI requires that we skip a corresponding number of GPRs when
3801         -- we use the FPRs.
3802                     F32 -> (1, 1, 4, fprs)
3803                     F64 -> (2, 1, 8, fprs)
3804 #elif linux_TARGET_OS
3805         -- ... the SysV ABI doesn't.
3806                     F32 -> (0, 1, 4, fprs)
3807                     F64 -> (0, 1, 8, fprs)
3808 #endif
3809
3810         moveResult reduceToF32 =
3811             case dest_regs of
3812                 [] -> nilOL
3813                 [CmmHinted dest _hint]
3814                     | reduceToF32 && rep == F32 -> unitOL (FRSP r_dest f1)
3815                     | rep == F32 || rep == F64 -> unitOL (MR r_dest f1)
3816                     | rep == I64 -> toOL [MR (getHiVRegFromLo r_dest) r3,
3817                                           MR r_dest r4]
3818                     | otherwise -> unitOL (MR r_dest r3)
3819                     where rep = cmmRegRep (CmmLocal dest)
3820                           r_dest = getRegisterReg (CmmLocal dest)
3821
3822         outOfLineFloatOp mop =
3823             do
3824                 dflags <- getDynFlagsNat
3825                 mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference $
3826                               mkForeignLabel functionName Nothing True
3827                 let mopLabelOrExpr = case mopExpr of
3828                         CmmLit (CmmLabel lbl) -> Left lbl
3829                         _ -> Right mopExpr
3830                 return (mopLabelOrExpr, reduce)
3831             where
3832                 (functionName, reduce) = case mop of
3833                     MO_F32_Exp   -> (FSLIT("exp"), True)
3834                     MO_F32_Log   -> (FSLIT("log"), True)
3835                     MO_F32_Sqrt  -> (FSLIT("sqrt"), True)
3836
3837                     MO_F32_Sin   -> (FSLIT("sin"), True)
3838                     MO_F32_Cos   -> (FSLIT("cos"), True)
3839                     MO_F32_Tan   -> (FSLIT("tan"), True)
3840
3841                     MO_F32_Asin  -> (FSLIT("asin"), True)
3842                     MO_F32_Acos  -> (FSLIT("acos"), True)
3843                     MO_F32_Atan  -> (FSLIT("atan"), True)
3844
3845                     MO_F32_Sinh  -> (FSLIT("sinh"), True)
3846                     MO_F32_Cosh  -> (FSLIT("cosh"), True)
3847                     MO_F32_Tanh  -> (FSLIT("tanh"), True)
3848                     MO_F32_Pwr   -> (FSLIT("pow"), True)
3849
3850                     MO_F64_Exp   -> (FSLIT("exp"), False)
3851                     MO_F64_Log   -> (FSLIT("log"), False)
3852                     MO_F64_Sqrt  -> (FSLIT("sqrt"), False)
3853
3854                     MO_F64_Sin   -> (FSLIT("sin"), False)
3855                     MO_F64_Cos   -> (FSLIT("cos"), False)
3856                     MO_F64_Tan   -> (FSLIT("tan"), False)
3857
3858                     MO_F64_Asin  -> (FSLIT("asin"), False)
3859                     MO_F64_Acos  -> (FSLIT("acos"), False)
3860                     MO_F64_Atan  -> (FSLIT("atan"), False)
3861
3862                     MO_F64_Sinh  -> (FSLIT("sinh"), False)
3863                     MO_F64_Cosh  -> (FSLIT("cosh"), False)
3864                     MO_F64_Tanh  -> (FSLIT("tanh"), False)
3865                     MO_F64_Pwr   -> (FSLIT("pow"), False)
3866                     other -> pprPanic "genCCall(ppc): unknown callish op"
3867                                     (pprCallishMachOp other)
3868
3869 #endif /* darwin_TARGET_OS || linux_TARGET_OS */
3870
3871 #endif /* powerpc_TARGET_ARCH */
3872
3873
3874 -- -----------------------------------------------------------------------------
3875 -- Generating a table-branch
3876
3877 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
3878
3879 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3880 genSwitch expr ids
3881   | opt_PIC
3882   = do
3883         (reg,e_code) <- getSomeReg expr
3884         lbl <- getNewLabelNat
3885         dflags <- getDynFlagsNat
3886         dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
3887         (tableReg,t_code) <- getSomeReg $ dynRef
3888         let
3889             jumpTable = map jumpTableEntryRel ids
3890
3891             jumpTableEntryRel Nothing
3892                 = CmmStaticLit (CmmInt 0 wordRep)
3893             jumpTableEntryRel (Just (BlockId id))
3894                 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3895                 where blockLabel = mkAsmTempLabel id
3896
3897             op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
3898                                        (EAIndex reg wORD_SIZE) (ImmInt 0))
3899
3900 #if x86_64_TARGET_ARCH
3901 #if darwin_TARGET_OS
3902     -- on Mac OS X/x86_64, put the jump table in the text section
3903     -- to work around a limitation of the linker.
3904     -- ld64 is unable to handle the relocations for
3905     --     .quad L1 - L0
3906     -- if L0 is not preceded by a non-anonymous label in its section.
3907
3908             code = e_code `appOL` t_code `appOL` toOL [
3909                             ADD wordRep op (OpReg tableReg),
3910                             JMP_TBL (OpReg tableReg) [ id | Just id <- ids ],
3911                             LDATA Text (CmmDataLabel lbl : jumpTable)
3912                     ]
3913 #else
3914     -- HACK: On x86_64 binutils<2.17 is only able to generate PC32
3915     -- relocations, hence we only get 32-bit offsets in the jump
3916     -- table. As these offsets are always negative we need to properly
3917     -- sign extend them to 64-bit. This hack should be removed in
3918     -- conjunction with the hack in PprMach.hs/pprDataItem once
3919     -- binutils 2.17 is standard.
3920             code = e_code `appOL` t_code `appOL` toOL [
3921                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3922                             MOVSxL I32
3923                                    (OpAddr (AddrBaseIndex (EABaseReg tableReg)
3924                                                           (EAIndex reg wORD_SIZE) (ImmInt 0)))
3925                                    (OpReg reg),
3926                             ADD wordRep (OpReg reg) (OpReg tableReg),
3927                             JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
3928                    ]
3929 #endif
3930 #else
3931             code = e_code `appOL` t_code `appOL` toOL [
3932                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3933                             ADD wordRep op (OpReg tableReg),
3934                             JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
3935                     ]
3936 #endif
3937         return code
3938   | otherwise
3939   = do
3940         (reg,e_code) <- getSomeReg expr
3941         lbl <- getNewLabelNat
3942         let
3943             jumpTable = map jumpTableEntry ids
3944             op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
3945             code = e_code `appOL` toOL [
3946                     LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3947                     JMP_TBL op [ id | Just id <- ids ]
3948                  ]
3949         -- in
3950         return code
3951 #elif powerpc_TARGET_ARCH
3952 genSwitch expr ids
3953   | opt_PIC
3954   = do
3955         (reg,e_code) <- getSomeReg expr
3956         tmp <- getNewRegNat I32
3957         lbl <- getNewLabelNat
3958         dflags <- getDynFlagsNat
3959         dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
3960         (tableReg,t_code) <- getSomeReg $ dynRef
3961         let
3962             jumpTable = map jumpTableEntryRel ids
3963
3964             jumpTableEntryRel Nothing
3965                 = CmmStaticLit (CmmInt 0 wordRep)
3966             jumpTableEntryRel (Just (BlockId id))
3967                 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3968                 where blockLabel = mkAsmTempLabel id
3969
3970             code = e_code `appOL` t_code `appOL` toOL [
3971                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3972                             SLW tmp reg (RIImm (ImmInt 2)),
3973                             LD I32 tmp (AddrRegReg tableReg tmp),
3974                             ADD tmp tmp (RIReg tableReg),
3975                             MTCTR tmp,
3976                             BCTR [ id | Just id <- ids ]
3977                     ]
3978         return code
3979   | otherwise
3980   = do
3981         (reg,e_code) <- getSomeReg expr
3982         tmp <- getNewRegNat I32
3983         lbl <- getNewLabelNat
3984         let
3985             jumpTable = map jumpTableEntry ids
3986
3987             code = e_code `appOL` toOL [
3988                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3989                             SLW tmp reg (RIImm (ImmInt 2)),
3990                             ADDIS tmp tmp (HA (ImmCLbl lbl)),
3991                             LD I32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
3992                             MTCTR tmp,
3993                             BCTR [ id | Just id <- ids ]
3994                     ]
3995         return code
3996 #else
3997 genSwitch expr ids = panic "ToDo: genSwitch"
3998 #endif
3999
4000 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordRep)
4001 jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
4002     where blockLabel = mkAsmTempLabel id
4003
4004 -- -----------------------------------------------------------------------------
4005 -- Support bits
4006 -- -----------------------------------------------------------------------------
4007
4008
4009 -- -----------------------------------------------------------------------------
4010 -- 'condIntReg' and 'condFltReg': condition codes into registers
4011
4012 -- Turn those condition codes into integers now (when they appear on
4013 -- the right hand side of an assignment).
4014 --
4015 -- (If applicable) Do not fill the delay slots here; you will confuse the
4016 -- register allocator.
4017
4018 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
4019
4020 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4021
4022 #if alpha_TARGET_ARCH
4023 condIntReg = panic "MachCode.condIntReg (not on Alpha)"
4024 condFltReg = panic "MachCode.condFltReg (not on Alpha)"
4025 #endif /* alpha_TARGET_ARCH */
4026
4027 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4028
4029 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4030
4031 condIntReg cond x y = do
4032   CondCode _ cond cond_code <- condIntCode cond x y
4033   tmp <- getNewRegNat I8
4034   let
4035         code dst = cond_code `appOL` toOL [
4036                     SETCC cond (OpReg tmp),
4037                     MOVZxL I8 (OpReg tmp) (OpReg dst)
4038                   ]
4039   -- in
4040   return (Any I32 code)
4041
4042 #endif
4043
4044 #if i386_TARGET_ARCH
4045
4046 condFltReg cond x y = do
4047   CondCode _ cond cond_code <- condFltCode cond x y
4048   tmp <- getNewRegNat I8
4049   let
4050         code dst = cond_code `appOL` toOL [
4051                     SETCC cond (OpReg tmp),
4052                     MOVZxL I8 (OpReg tmp) (OpReg dst)
4053                   ]
4054   -- in
4055   return (Any I32 code)
4056
4057 #endif
4058
4059 #if x86_64_TARGET_ARCH
4060
4061 condFltReg cond x y = do
4062   CondCode _ cond cond_code <- condFltCode cond x y
4063   tmp1 <- getNewRegNat wordRep
4064   tmp2 <- getNewRegNat wordRep
4065   let
4066         -- We have to worry about unordered operands (eg. comparisons
4067         -- against NaN).  If the operands are unordered, the comparison
4068         -- sets the parity flag, carry flag and zero flag.
4069         -- All comparisons are supposed to return false for unordered
4070         -- operands except for !=, which returns true.
4071         --
4072         -- Optimisation: we don't have to test the parity flag if we
4073         -- know the test has already excluded the unordered case: eg >
4074         -- and >= test for a zero carry flag, which can only occur for
4075         -- ordered operands.
4076         --
4077         -- ToDo: by reversing comparisons we could avoid testing the
4078         -- parity flag in more cases.
4079
4080         code dst =
4081            cond_code `appOL`
4082              (case cond of
4083                 NE  -> or_unordered dst
4084                 GU  -> plain_test   dst
4085                 GEU -> plain_test   dst
4086                 _   -> and_ordered  dst)
4087
4088         plain_test dst = toOL [
4089                     SETCC cond (OpReg tmp1),
4090                     MOVZxL I8 (OpReg tmp1) (OpReg dst)
4091                  ]
4092         or_unordered dst = toOL [
4093                     SETCC cond (OpReg tmp1),
4094                     SETCC PARITY (OpReg tmp2),
4095                     OR I8 (OpReg tmp1) (OpReg tmp2),
4096                     MOVZxL I8 (OpReg tmp2) (OpReg dst)
4097                   ]
4098         and_ordered dst = toOL [
4099                     SETCC cond (OpReg tmp1),
4100                     SETCC NOTPARITY (OpReg tmp2),
4101                     AND I8 (OpReg tmp1) (OpReg tmp2),
4102                     MOVZxL I8 (OpReg tmp2) (OpReg dst)
4103                   ]
4104   -- in
4105   return (Any I32 code)
4106
4107 #endif
4108
4109 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4110
4111 #if sparc_TARGET_ARCH
4112
4113 condIntReg EQQ x (CmmLit (CmmInt 0 d)) = do
4114     (src, code) <- getSomeReg x
4115     tmp <- getNewRegNat I32
4116     let
4117         code__2 dst = code `appOL` toOL [
4118             SUB False True g0 (RIReg src) g0,
4119             SUB True False g0 (RIImm (ImmInt (-1))) dst]
4120     return (Any I32 code__2)
4121
4122 condIntReg EQQ x y = do
4123     (src1, code1) <- getSomeReg x
4124     (src2, code2) <- getSomeReg y
4125     tmp1 <- getNewRegNat I32
4126     tmp2 <- getNewRegNat I32
4127     let
4128         code__2 dst = code1 `appOL` code2 `appOL` toOL [
4129             XOR False src1 (RIReg src2) dst,
4130             SUB False True g0 (RIReg dst) g0,
4131             SUB True False g0 (RIImm (ImmInt (-1))) dst]
4132     return (Any I32 code__2)
4133
4134 condIntReg NE x (CmmLit (CmmInt 0 d)) = do
4135     (src, code) <- getSomeReg x
4136     tmp <- getNewRegNat I32
4137     let
4138         code__2 dst = code `appOL` toOL [
4139             SUB False True g0 (RIReg src) g0,
4140             ADD True False g0 (RIImm (ImmInt 0)) dst]
4141     return (Any I32 code__2)
4142
4143 condIntReg NE x y = do
4144     (src1, code1) <- getSomeReg x
4145     (src2, code2) <- getSomeReg y
4146     tmp1 <- getNewRegNat I32
4147     tmp2 <- getNewRegNat I32
4148     let
4149         code__2 dst = code1 `appOL` code2 `appOL` toOL [
4150             XOR False src1 (RIReg src2) dst,
4151             SUB False True g0 (RIReg dst) g0,
4152             ADD True False g0 (RIImm (ImmInt 0)) dst]
4153     return (Any I32 code__2)
4154
4155 condIntReg cond x y = do
4156     BlockId lbl1 <- getBlockIdNat
4157     BlockId lbl2 <- getBlockIdNat
4158     CondCode _ cond cond_code <- condIntCode cond x y
4159     let
4160         code__2 dst = cond_code `appOL` toOL [
4161             BI cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
4162             OR False g0 (RIImm (ImmInt 0)) dst,
4163             BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
4164             NEWBLOCK (BlockId lbl1),
4165             OR False g0 (RIImm (ImmInt 1)) dst,
4166             NEWBLOCK (BlockId lbl2)]
4167     return (Any I32 code__2)
4168
4169 condFltReg cond x y = do
4170     BlockId lbl1 <- getBlockIdNat
4171     BlockId lbl2 <- getBlockIdNat
4172     CondCode _ cond cond_code <- condFltCode cond x y
4173     let
4174         code__2 dst = cond_code `appOL` toOL [
4175             NOP,
4176             BF cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
4177             OR False g0 (RIImm (ImmInt 0)) dst,
4178             BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
4179             NEWBLOCK (BlockId lbl1),
4180             OR False g0 (RIImm (ImmInt 1)) dst,
4181             NEWBLOCK (BlockId lbl2)]
4182     return (Any I32 code__2)
4183
4184 #endif /* sparc_TARGET_ARCH */
4185
4186 #if powerpc_TARGET_ARCH
4187 condReg getCond = do
4188     lbl1 <- getBlockIdNat
4189     lbl2 <- getBlockIdNat
4190     CondCode _ cond cond_code <- getCond
4191     let
4192 {-        code dst = cond_code `appOL` toOL [
4193                 BCC cond lbl1,
4194                 LI dst (ImmInt 0),
4195                 BCC ALWAYS lbl2,
4196                 NEWBLOCK lbl1,
4197                 LI dst (ImmInt 1),
4198                 BCC ALWAYS lbl2,
4199                 NEWBLOCK lbl2
4200             ]-}
4201         code dst = cond_code
4202             `appOL` negate_code
4203             `appOL` toOL [
4204                 MFCR dst,
4205                 RLWINM dst dst (bit + 1) 31 31
4206             ]
4207
4208         negate_code | do_negate = unitOL (CRNOR bit bit bit)
4209                     | otherwise = nilOL
4210
4211         (bit, do_negate) = case cond of
4212             LTT -> (0, False)
4213             LE  -> (1, True)
4214             EQQ -> (2, False)
4215             GE  -> (0, True)
4216             GTT -> (1, False)
4217
4218             NE  -> (2, True)
4219
4220             LU  -> (0, False)
4221             LEU -> (1, True)
4222             GEU -> (0, True)
4223             GU  -> (1, False)
4224
4225     return (Any I32 code)
4226
4227 condIntReg cond x y = condReg (condIntCode cond x y)
4228 condFltReg cond x y = condReg (condFltCode cond x y)
4229 #endif /* powerpc_TARGET_ARCH */
4230
4231
4232 -- -----------------------------------------------------------------------------
4233 -- 'trivial*Code': deal with trivial instructions
4234
4235 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
4236 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
4237 -- Only look for constants on the right hand side, because that's
4238 -- where the generic optimizer will have put them.
4239
4240 -- Similarly, for unary instructions, we don't have to worry about
4241 -- matching an StInt as the argument, because genericOpt will already
4242 -- have handled the constant-folding.
4243
4244 trivialCode
4245     :: MachRep
4246     -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
4247       ,IF_ARCH_i386 ((Operand -> Operand -> Instr)
4248                      -> Maybe (Operand -> Operand -> Instr)
4249       ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr)
4250                      -> Maybe (Operand -> Operand -> Instr)
4251       ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
4252       ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
4253       ,)))))
4254     -> CmmExpr -> CmmExpr -- the two arguments
4255     -> NatM Register
4256
4257 #ifndef powerpc_TARGET_ARCH
4258 trivialFCode
4259     :: MachRep
4260     -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
4261       ,IF_ARCH_sparc((MachRep -> Reg -> Reg -> Reg -> Instr)
4262       ,IF_ARCH_i386 ((MachRep -> Reg -> Reg -> Reg -> Instr)
4263       ,IF_ARCH_x86_64 ((MachRep -> Operand -> Operand -> Instr)
4264       ,))))
4265     -> CmmExpr -> CmmExpr -- the two arguments
4266     -> NatM Register
4267 #endif
4268
4269 trivialUCode
4270     :: MachRep
4271     -> IF_ARCH_alpha((RI -> Reg -> Instr)
4272       ,IF_ARCH_i386 ((Operand -> Instr)
4273       ,IF_ARCH_x86_64 ((Operand -> Instr)
4274       ,IF_ARCH_sparc((RI -> Reg -> Instr)
4275       ,IF_ARCH_powerpc((Reg -> Reg -> Instr)
4276       ,)))))
4277     -> CmmExpr  -- the one argument
4278     -> NatM Register
4279
4280 #ifndef powerpc_TARGET_ARCH
4281 trivialUFCode
4282     :: MachRep
4283     -> IF_ARCH_alpha((Reg -> Reg -> Instr)
4284       ,IF_ARCH_i386 ((Reg -> Reg -> Instr)
4285       ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr)
4286       ,IF_ARCH_sparc((Reg -> Reg -> Instr)
4287       ,))))
4288     -> CmmExpr -- the one argument
4289     -> NatM Register
4290 #endif
4291
4292 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4293
4294 #if alpha_TARGET_ARCH
4295
4296 trivialCode instr x (StInt y)
4297   | fits8Bits y
4298   = getRegister x               `thenNat` \ register ->
4299     getNewRegNat IntRep         `thenNat` \ tmp ->
4300     let
4301         code = registerCode register tmp
4302         src1 = registerName register tmp
4303         src2 = ImmInt (fromInteger y)
4304         code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst)
4305     in
4306     return (Any IntRep code__2)
4307
4308 trivialCode instr x y
4309   = getRegister x               `thenNat` \ register1 ->
4310     getRegister y               `thenNat` \ register2 ->
4311     getNewRegNat IntRep         `thenNat` \ tmp1 ->
4312     getNewRegNat IntRep         `thenNat` \ tmp2 ->
4313     let
4314         code1 = registerCode register1 tmp1 []
4315         src1  = registerName register1 tmp1
4316         code2 = registerCode register2 tmp2 []
4317         src2  = registerName register2 tmp2
4318         code__2 dst = asmSeqThen [code1, code2] .
4319                      mkSeqInstr (instr src1 (RIReg src2) dst)
4320     in
4321     return (Any IntRep code__2)
4322
4323 ------------
4324 trivialUCode instr x
4325   = getRegister x               `thenNat` \ register ->
4326     getNewRegNat IntRep         `thenNat` \ tmp ->
4327     let
4328         code = registerCode register tmp
4329         src  = registerName register tmp
4330         code__2 dst = code . mkSeqInstr (instr (RIReg src) dst)
4331     in
4332     return (Any IntRep code__2)
4333
4334 ------------
4335 trivialFCode _ instr x y
4336   = getRegister x               `thenNat` \ register1 ->
4337     getRegister y               `thenNat` \ register2 ->
4338     getNewRegNat F64    `thenNat` \ tmp1 ->
4339     getNewRegNat F64    `thenNat` \ tmp2 ->
4340     let
4341         code1 = registerCode register1 tmp1
4342         src1  = registerName register1 tmp1
4343
4344         code2 = registerCode register2 tmp2
4345         src2  = registerName register2 tmp2
4346
4347         code__2 dst = asmSeqThen [code1 [], code2 []] .
4348                       mkSeqInstr (instr src1 src2 dst)
4349     in
4350     return (Any F64 code__2)
4351
4352 trivialUFCode _ instr x
4353   = getRegister x               `thenNat` \ register ->
4354     getNewRegNat F64    `thenNat` \ tmp ->
4355     let
4356         code = registerCode register tmp
4357         src  = registerName register tmp
4358         code__2 dst = code . mkSeqInstr (instr src dst)
4359     in
4360     return (Any F64 code__2)
4361
4362 #endif /* alpha_TARGET_ARCH */
4363
4364 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4365
4366 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4367
4368 {-
4369 The Rules of the Game are:
4370
4371 * You cannot assume anything about the destination register dst;
4372   it may be anything, including a fixed reg.
4373
4374 * You may compute an operand into a fixed reg, but you may not
4375   subsequently change the contents of that fixed reg.  If you
4376   want to do so, first copy the value either to a temporary
4377   or into dst.  You are free to modify dst even if it happens
4378   to be a fixed reg -- that's not your problem.
4379
4380 * You cannot assume that a fixed reg will stay live over an
4381   arbitrary computation.  The same applies to the dst reg.
4382
4383 * Temporary regs obtained from getNewRegNat are distinct from
4384   each other and from all other regs, and stay live over
4385   arbitrary computations.
4386
4387 --------------------
4388
4389 SDM's version of The Rules:
4390
4391 * If getRegister returns Any, that means it can generate correct
4392   code which places the result in any register, period.  Even if that
4393   register happens to be read during the computation.
4394
4395   Corollary #1: this means that if you are generating code for an
4396   operation with two arbitrary operands, you cannot assign the result
4397   of the first operand into the destination register before computing
4398   the second operand.  The second operand might require the old value
4399   of the destination register.
4400
4401   Corollary #2: A function might be able to generate more efficient
4402   code if it knows the destination register is a new temporary (and
4403   therefore not read by any of the sub-computations).
4404
4405 * If getRegister returns Any, then the code it generates may modify only:
4406         (a) fresh temporaries
4407         (b) the destination register
4408         (c) known registers (eg. %ecx is used by shifts)
4409   In particular, it may *not* modify global registers, unless the global
4410   register happens to be the destination register.
4411 -}
4412
4413 trivialCode rep instr (Just revinstr) (CmmLit lit_a) b
4414   | not (is64BitLit lit_a) = do
4415   b_code <- getAnyReg b
4416   let
4417        code dst
4418          = b_code dst `snocOL`
4419            revinstr (OpImm (litToImm lit_a)) (OpReg dst)
4420   -- in
4421   return (Any rep code)
4422
4423 trivialCode rep instr maybe_revinstr a b = genTrivialCode rep instr a b
4424
4425 -- This is re-used for floating pt instructions too.
4426 genTrivialCode rep instr a b = do
4427   (b_op, b_code) <- getNonClobberedOperand b
4428   a_code <- getAnyReg a
4429   tmp <- getNewRegNat rep
4430   let
4431      -- We want the value of b to stay alive across the computation of a.
4432      -- But, we want to calculate a straight into the destination register,
4433      -- because the instruction only has two operands (dst := dst `op` src).
4434      -- The troublesome case is when the result of b is in the same register
4435      -- as the destination reg.  In this case, we have to save b in a
4436      -- new temporary across the computation of a.
4437      code dst
4438         | dst `regClashesWithOp` b_op =
4439                 b_code `appOL`
4440                 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
4441                 a_code dst `snocOL`
4442                 instr (OpReg tmp) (OpReg dst)
4443         | otherwise =
4444                 b_code `appOL`
4445                 a_code dst `snocOL`
4446                 instr b_op (OpReg dst)
4447   -- in
4448   return (Any rep code)
4449
4450 reg `regClashesWithOp` OpReg reg2   = reg == reg2
4451 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
4452 reg `regClashesWithOp` _            = False
4453
4454 -----------
4455
4456 trivialUCode rep instr x = do
4457   x_code <- getAnyReg x
4458   let
4459      code dst =
4460         x_code dst `snocOL`
4461         instr (OpReg dst)
4462   -- in
4463   return (Any rep code)
4464
4465 -----------
4466
4467 #if i386_TARGET_ARCH
4468
4469 trivialFCode pk instr x y = do
4470   (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
4471   (y_reg, y_code) <- getSomeReg y
4472   let
4473      code dst =
4474         x_code `appOL`
4475         y_code `snocOL`
4476         instr pk x_reg y_reg dst
4477   -- in
4478   return (Any pk code)
4479
4480 #endif
4481
4482 #if x86_64_TARGET_ARCH
4483
4484 trivialFCode pk instr x y = genTrivialCode  pk (instr pk) x y
4485
4486 #endif
4487
4488 -------------
4489
4490 trivialUFCode rep instr x = do
4491   (x_reg, x_code) <- getSomeReg x
4492   let
4493      code dst =
4494         x_code `snocOL`
4495         instr x_reg dst
4496   -- in
4497   return (Any rep code)
4498
4499 #endif /* i386_TARGET_ARCH */
4500
4501 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4502
4503 #if sparc_TARGET_ARCH
4504
4505 trivialCode pk instr x (CmmLit (CmmInt y d))
4506   | fits13Bits y
4507   = do
4508       (src1, code) <- getSomeReg x
4509       tmp <- getNewRegNat I32
4510       let
4511         src2 = ImmInt (fromInteger y)
4512         code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
4513       return (Any I32 code__2)
4514
4515 trivialCode pk instr x y = do
4516     (src1, code1) <- getSomeReg x
4517     (src2, code2) <- getSomeReg y
4518     tmp1 <- getNewRegNat I32
4519     tmp2 <- getNewRegNat I32
4520     let
4521         code__2 dst = code1 `appOL` code2 `snocOL`
4522                       instr src1 (RIReg src2) dst
4523     return (Any I32 code__2)
4524
4525 ------------
4526 trivialFCode pk instr x y = do
4527     (src1, code1) <- getSomeReg x
4528     (src2, code2) <- getSomeReg y
4529     tmp1 <- getNewRegNat (cmmExprRep x)
4530     tmp2 <- getNewRegNat (cmmExprRep y)
4531     tmp <- getNewRegNat F64
4532     let
4533         promote x = FxTOy F32 F64 x tmp
4534
4535         pk1   = cmmExprRep x
4536         pk2   = cmmExprRep y
4537
4538         code__2 dst =
4539                 if pk1 == pk2 then
4540                     code1 `appOL` code2 `snocOL`
4541                     instr pk src1 src2 dst
4542                 else if pk1 == F32 then
4543                     code1 `snocOL` promote src1 `appOL` code2 `snocOL`
4544                     instr F64 tmp src2 dst
4545                 else
4546                     code1 `appOL` code2 `snocOL` promote src2 `snocOL`
4547                     instr F64 src1 tmp dst
4548     return (Any (if pk1 == pk2 then pk1 else F64) code__2)
4549
4550 ------------
4551 trivialUCode pk instr x = do
4552     (src, code) <- getSomeReg x
4553     tmp <- getNewRegNat pk
4554     let
4555         code__2 dst = code `snocOL` instr (RIReg src) dst
4556     return (Any pk code__2)
4557
4558 -------------
4559 trivialUFCode pk instr x = do
4560     (src, code) <- getSomeReg x
4561     tmp <- getNewRegNat pk
4562     let
4563         code__2 dst = code `snocOL` instr src dst
4564     return (Any pk code__2)
4565
4566 #endif /* sparc_TARGET_ARCH */
4567
4568 #if powerpc_TARGET_ARCH
4569
4570 {-
4571 Wolfgang's PowerPC version of The Rules:
4572
4573 A slightly modified version of The Rules to take advantage of the fact
4574 that PowerPC instructions work on all registers and don't implicitly
4575 clobber any fixed registers.
4576
4577 * The only expression for which getRegister returns Fixed is (CmmReg reg).
4578
4579 * If getRegister returns Any, then the code it generates may modify only:
4580         (a) fresh temporaries
4581         (b) the destination register
4582   It may *not* modify global registers, unless the global
4583   register happens to be the destination register.
4584   It may not clobber any other registers. In fact, only ccalls clobber any
4585   fixed registers.
4586   Also, it may not modify the counter register (used by genCCall).
4587
4588   Corollary: If a getRegister for a subexpression returns Fixed, you need
4589   not move it to a fresh temporary before evaluating the next subexpression.
4590   The Fixed register won't be modified.
4591   Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
4592
4593 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
4594   the value of the destination register.
4595 -}
4596
4597 trivialCode rep signed instr x (CmmLit (CmmInt y _))
4598     | Just imm <- makeImmediate rep signed y
4599     = do
4600         (src1, code1) <- getSomeReg x
4601         let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
4602         return (Any rep code)
4603
4604 trivialCode rep signed instr x y = do
4605     (src1, code1) <- getSomeReg x
4606     (src2, code2) <- getSomeReg y
4607     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
4608     return (Any rep code)
4609
4610 trivialCodeNoImm :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4611     -> CmmExpr -> CmmExpr -> NatM Register
4612 trivialCodeNoImm rep instr x y = do
4613     (src1, code1) <- getSomeReg x
4614     (src2, code2) <- getSomeReg y
4615     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
4616     return (Any rep code)
4617
4618 trivialUCode rep instr x = do
4619     (src, code) <- getSomeReg x
4620     let code' dst = code `snocOL` instr dst src
4621     return (Any rep code')
4622
4623 -- There is no "remainder" instruction on the PPC, so we have to do
4624 -- it the hard way.
4625 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
4626
4627 remainderCode :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4628     -> CmmExpr -> CmmExpr -> NatM Register
4629 remainderCode rep div x y = do
4630     (src1, code1) <- getSomeReg x
4631     (src2, code2) <- getSomeReg y
4632     let code dst = code1 `appOL` code2 `appOL` toOL [
4633                 div dst src1 src2,
4634                 MULLW dst dst (RIReg src2),
4635                 SUBF dst dst src1
4636             ]
4637     return (Any rep code)
4638
4639 #endif /* powerpc_TARGET_ARCH */
4640
4641
4642 -- -----------------------------------------------------------------------------
4643 --  Coercing to/from integer/floating-point...
4644
4645 -- When going to integer, we truncate (round towards 0).
4646
4647 -- @coerce(Int2FP|FP2Int)@ are more complicated integer/float
4648 -- conversions.  We have to store temporaries in memory to move
4649 -- between the integer and the floating point register sets.
4650
4651 -- @coerceDbl2Flt@ and @coerceFlt2Dbl@ are done this way because we
4652 -- pretend, on sparc at least, that double and float regs are seperate
4653 -- kinds, so the value has to be computed into one kind before being
4654 -- explicitly "converted" to live in the other kind.
4655
4656 coerceInt2FP :: MachRep -> MachRep -> CmmExpr -> NatM Register
4657 coerceFP2Int :: MachRep -> MachRep -> CmmExpr -> NatM Register
4658
4659 #if sparc_TARGET_ARCH
4660 coerceDbl2Flt :: CmmExpr -> NatM Register
4661 coerceFlt2Dbl :: CmmExpr -> NatM Register
4662 #endif
4663
4664 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4665
4666 #if alpha_TARGET_ARCH
4667
4668 coerceInt2FP _ x
4669   = getRegister x               `thenNat` \ register ->
4670     getNewRegNat IntRep         `thenNat` \ reg ->
4671     let
4672         code = registerCode register reg
4673         src  = registerName register reg
4674
4675         code__2 dst = code . mkSeqInstrs [
4676             ST Q src (spRel 0),
4677             LD TF dst (spRel 0),
4678             CVTxy Q TF dst dst]
4679     in
4680     return (Any F64 code__2)
4681
4682 -------------
4683 coerceFP2Int x
4684   = getRegister x               `thenNat` \ register ->
4685     getNewRegNat F64    `thenNat` \ tmp ->
4686     let
4687         code = registerCode register tmp
4688         src  = registerName register tmp
4689
4690         code__2 dst = code . mkSeqInstrs [
4691             CVTxy TF Q src tmp,
4692             ST TF tmp (spRel 0),
4693             LD Q dst (spRel 0)]
4694     in
4695     return (Any IntRep code__2)
4696
4697 #endif /* alpha_TARGET_ARCH */
4698
4699 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4700
4701 #if i386_TARGET_ARCH
4702
4703 coerceInt2FP from to x = do
4704   (x_reg, x_code) <- getSomeReg x
4705   let
4706         opc  = case to of F32 -> GITOF; F64 -> GITOD
4707         code dst = x_code `snocOL` opc x_reg dst
4708         -- ToDo: works for non-I32 reps?
4709   -- in
4710   return (Any to code)
4711
4712 ------------
4713
4714 coerceFP2Int from to x = do
4715   (x_reg, x_code) <- getSomeReg x
4716   let
4717         opc  = case from of F32 -> GFTOI; F64 -> GDTOI
4718         code dst = x_code `snocOL` opc x_reg dst
4719         -- ToDo: works for non-I32 reps?
4720   -- in
4721   return (Any to code)
4722
4723 #endif /* i386_TARGET_ARCH */
4724
4725 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4726
4727 #if x86_64_TARGET_ARCH
4728
4729 coerceFP2Int from to x = do
4730   (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
4731   let
4732         opc  = case from of F32 -> CVTTSS2SIQ; F64 -> CVTTSD2SIQ
4733         code dst = x_code `snocOL` opc x_op dst
4734   -- in
4735   return (Any to code) -- works even if the destination rep is <I32
4736
4737 coerceInt2FP from to x = do
4738   (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
4739   let
4740         opc  = case to of F32 -> CVTSI2SS; F64 -> CVTSI2SD
4741         code dst = x_code `snocOL` opc x_op dst
4742   -- in
4743   return (Any to code) -- works even if the destination rep is <I32
4744
4745 coerceFP2FP :: MachRep -> CmmExpr -> NatM Register
4746 coerceFP2FP to x = do
4747   (x_reg, x_code) <- getSomeReg x
4748   let
4749         opc  = case to of F32 -> CVTSD2SS; F64 -> CVTSS2SD
4750         code dst = x_code `snocOL` opc x_reg dst
4751   -- in
4752   return (Any to code)
4753
4754 #endif
4755
4756 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4757
4758 #if sparc_TARGET_ARCH
4759
4760 coerceInt2FP pk1 pk2 x = do
4761     (src, code) <- getSomeReg x
4762     let
4763         code__2 dst = code `appOL` toOL [
4764             ST pk1 src (spRel (-2)),
4765             LD pk1 (spRel (-2)) dst,
4766             FxTOy pk1 pk2 dst dst]
4767     return (Any pk2 code__2)
4768
4769 ------------
4770 coerceFP2Int pk fprep x = do
4771     (src, code) <- getSomeReg x
4772     reg <- getNewRegNat fprep
4773     tmp <- getNewRegNat pk
4774     let
4775         code__2 dst = ASSERT(fprep == F64 || fprep == F32)
4776             code `appOL` toOL [
4777             FxTOy fprep pk src tmp,
4778             ST pk tmp (spRel (-2)),
4779             LD pk (spRel (-2)) dst]
4780     return (Any pk code__2)
4781
4782 ------------
4783 coerceDbl2Flt x = do
4784     (src, code) <- getSomeReg x
4785     return (Any F32 (\dst -> code `snocOL` FxTOy F64 F32 src dst))
4786
4787 ------------
4788 coerceFlt2Dbl x = do
4789     (src, code) <- getSomeReg x
4790     return (Any F64 (\dst -> code `snocOL` FxTOy F32 F64 src dst))
4791
4792 #endif /* sparc_TARGET_ARCH */
4793
4794 #if powerpc_TARGET_ARCH
4795 coerceInt2FP fromRep toRep x = do
4796     (src, code) <- getSomeReg x
4797     lbl <- getNewLabelNat
4798     itmp <- getNewRegNat I32
4799     ftmp <- getNewRegNat F64
4800     dflags <- getDynFlagsNat
4801     dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
4802     Amode addr addr_code <- getAmode dynRef
4803     let
4804         code' dst = code `appOL` maybe_exts `appOL` toOL [
4805                 LDATA ReadOnlyData
4806                                 [CmmDataLabel lbl,
4807                                  CmmStaticLit (CmmInt 0x43300000 I32),
4808                                  CmmStaticLit (CmmInt 0x80000000 I32)],
4809                 XORIS itmp src (ImmInt 0x8000),
4810                 ST I32 itmp (spRel 3),
4811                 LIS itmp (ImmInt 0x4330),
4812                 ST I32 itmp (spRel 2),
4813                 LD F64 ftmp (spRel 2)
4814             ] `appOL` addr_code `appOL` toOL [
4815                 LD F64 dst addr,
4816                 FSUB F64 dst ftmp dst
4817             ] `appOL` maybe_frsp dst
4818
4819         maybe_exts = case fromRep of
4820                         I8 ->  unitOL $ EXTS I8 src src
4821                         I16 -> unitOL $ EXTS I16 src src
4822                         I32 -> nilOL
4823         maybe_frsp dst = case toRep of
4824                         F32 -> unitOL $ FRSP dst dst
4825                         F64 -> nilOL
4826     return (Any toRep code')
4827
4828 coerceFP2Int fromRep toRep x = do
4829     -- the reps don't really matter: F*->F64 and I32->I* are no-ops
4830     (src, code) <- getSomeReg x
4831     tmp <- getNewRegNat F64
4832     let
4833         code' dst = code `appOL` toOL [
4834                 -- convert to int in FP reg
4835             FCTIWZ tmp src,
4836                 -- store value (64bit) from FP to stack
4837             ST F64 tmp (spRel 2),
4838                 -- read low word of value (high word is undefined)
4839             LD I32 dst (spRel 3)]
4840     return (Any toRep code')
4841 #endif /* powerpc_TARGET_ARCH */
4842
4843
4844 -- -----------------------------------------------------------------------------
4845 -- eXTRA_STK_ARGS_HERE
4846
4847 -- We (allegedly) put the first six C-call arguments in registers;
4848 -- where do we start putting the rest of them?
4849
4850 -- Moved from MachInstrs (SDM):
4851
4852 #if alpha_TARGET_ARCH || sparc_TARGET_ARCH
4853 eXTRA_STK_ARGS_HERE :: Int
4854 eXTRA_STK_ARGS_HERE
4855   = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???))
4856 #endif
4857