compiler/nativeGen/MachCodeGen.hs

   1 {-# OPTIONS -w #-}
   2 -- The above warning supression flag is a temporary kludge.
   3 -- While working on this module you are encouraged to remove it and fix
   4 -- any warnings in the module. See
   5 --     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
   6 -- for details
   7
   8 -----------------------------------------------------------------------------
   9 --
  10 -- Generating machine code (instruction selection)
  11 --
  12 -- (c) The University of Glasgow 1996-2004
  13 --
  14 -----------------------------------------------------------------------------
  15
  16 -- This is a big module, but, if you pay attention to
  17 -- (a) the sectioning, (b) the type signatures, and
  18 -- (c) the #if blah_TARGET_ARCH} things, the
  19 -- structure should not be too overwhelming.
  20
  21 module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where
  22
  23 #include "HsVersions.h"
  24 #include "nativeGen/NCG.h"
  25 #include "MachDeps.h"
  26
  27 -- NCG stuff:
  28 import MachInstrs
  29 import MachRegs
  30 import NCGMonad
  31 import PositionIndependentCode
  32 import RegAllocInfo ( mkBranchInstr )
  33
  34 -- Our intermediate code:
  35 import PprCmm           ( pprExpr )
  36 import Cmm
  37 import MachOp
  38 import CLabel
  39 import ClosureInfo      ( C_SRT(..) )
  40
  41 -- The rest:
  42 import StaticFlags      ( opt_PIC )
  43 import ForeignCall      ( CCallConv(..) )
  44 import OrdList
  45 import Pretty
  46 import Outputable
  47 import FastString
  48 import FastBool         ( isFastTrue )
  49 import Constants        ( wORD_SIZE )
  50
  51 #ifdef DEBUG
  52 import Outputable       ( assertPanic )
  53 import Debug.Trace      ( trace )
  54 #endif
  55 import Debug.Trace      ( trace )
  56
  57 import Control.Monad    ( mapAndUnzipM )
  58 import Data.Maybe       ( fromJust )
  59 import Data.Bits
  60 import Data.Word
  61 import Data.Int
  62
  63 -- -----------------------------------------------------------------------------
  64 -- Top-level of the instruction selector
  65
  66 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
  67 -- They are really trees of insns to facilitate fast appending, where a
  68 -- left-to-right traversal (pre-order?) yields the insns in the correct
  69 -- order.
  70
  71 type InstrBlock = OrdList Instr
  72
  73 cmmTopCodeGen :: RawCmmTop -> NatM [NatCmmTop]
  74 cmmTopCodeGen (CmmProc info lab params (ListGraph blocks)) = do
  75   (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
  76   picBaseMb <- getPicBaseMaybeNat
  77   let proc = CmmProc info lab params (ListGraph $ concat nat_blocks)
  78       tops = proc : concat statics
  79   case picBaseMb of
  80       Just picBase -> initializePicBase picBase tops
  81       Nothing -> return tops
  82
  83 cmmTopCodeGen (CmmData sec dat) = do
  84   return [CmmData sec dat]  -- no translation, we just use CmmStatic
  85
  86 basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop])
  87 basicBlockCodeGen (BasicBlock id stmts) = do
  88   instrs <- stmtsToInstrs stmts
  89   -- code generation may introduce new basic block boundaries, which
  90   -- are indicated by the NEWBLOCK instruction.  We must split up the
  91   -- instruction stream into basic blocks again.  Also, we extract
  92   -- LDATAs here too.
  93   let
  94         (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
  95
  96         mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
  97           = ([], BasicBlock id instrs : blocks, statics)
  98         mkBlocks (LDATA sec dat) (instrs,blocks,statics)
  99           = (instrs, blocks, CmmData sec dat:statics)
 100         mkBlocks instr (instrs,blocks,statics)
 101           = (instr:instrs, blocks, statics)
 102   -- in
 103   return (BasicBlock id top : other_blocks, statics)
 104
 105 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
 106 stmtsToInstrs stmts
 107    = do instrss <- mapM stmtToInstrs stmts
 108         return (concatOL instrss)
 109
 110 stmtToInstrs :: CmmStmt -> NatM InstrBlock
 111 stmtToInstrs stmt = case stmt of
 112     CmmNop         -> return nilOL
 113     CmmComment s   -> return (unitOL (COMMENT s))
 114
 115     CmmAssign reg src
 116       | isFloatingRep kind -> assignReg_FltCode kind reg src
 117 #if WORD_SIZE_IN_BITS==32
 118       | kind == I64        -> assignReg_I64Code      reg src
 119 #endif
 120       | otherwise          -> assignReg_IntCode kind reg src
 121         where kind = cmmRegRep reg
 122
 123     CmmStore addr src
 124       | isFloatingRep kind -> assignMem_FltCode kind addr src
 125 #if WORD_SIZE_IN_BITS==32
 126       | kind == I64      -> assignMem_I64Code      addr src
 127 #endif
 128       | otherwise        -> assignMem_IntCode kind addr src
 129         where kind = cmmExprRep src
 130
 131     CmmCall target result_regs args _ _
 132        -> genCCall target result_regs args
 133
 134     CmmBranch id          -> genBranch id
 135     CmmCondBranch arg id  -> genCondJump id arg
 136     CmmSwitch arg ids     -> genSwitch arg ids
 137     CmmJump arg params    -> genJump arg
 138
 139 -- -----------------------------------------------------------------------------
 140 -- General things for putting together code sequences
 141
 142 -- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
 143 -- CmmExprs into CmmRegOff?
 144 mangleIndexTree :: CmmExpr -> CmmExpr
 145 mangleIndexTree (CmmRegOff reg off)
 146   = CmmMachOp (MO_Add rep) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) rep)]
 147   where rep = cmmRegRep reg
 148
 149 -- -----------------------------------------------------------------------------
 150 --  Code gen for 64-bit arithmetic on 32-bit platforms
 151
 152 {-
 153 Simple support for generating 64-bit code (ie, 64 bit values and 64
 154 bit assignments) on 32-bit platforms.  Unlike the main code generator
 155 we merely shoot for generating working code as simply as possible, and
 156 pay little attention to code quality.  Specifically, there is no
 157 attempt to deal cleverly with the fixed-vs-floating register
 158 distinction; all values are generated into (pairs of) floating
 159 registers, even if this would mean some redundant reg-reg moves as a
 160 result.  Only one of the VRegUniques is returned, since it will be
 161 of the VRegUniqueLo form, and the upper-half VReg can be determined
 162 by applying getHiVRegFromLo to it.
 163 -}
 164
 165 data ChildCode64        -- a.k.a "Register64"
 166    = ChildCode64
 167         InstrBlock      -- code
 168         Reg             -- the lower 32-bit temporary which contains the
 169                         -- result; use getHiVRegFromLo to find the other
 170                         -- VRegUnique.  Rules of this simplified insn
 171                         -- selection game are therefore that the returned
 172                         -- Reg may be modified
 173
 174 #if WORD_SIZE_IN_BITS==32
 175 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
 176 assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
 177 #endif
 178
 179 #ifndef x86_64_TARGET_ARCH
 180 iselExpr64        :: CmmExpr -> NatM ChildCode64
 181 #endif
 182
 183 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 184
 185 #if i386_TARGET_ARCH
 186
 187 assignMem_I64Code addrTree valueTree = do
 188   Amode addr addr_code <- getAmode addrTree
 189   ChildCode64 vcode rlo <- iselExpr64 valueTree
 190   let
 191         rhi = getHiVRegFromLo rlo
 192
 193         -- Little-endian store
 194         mov_lo = MOV I32 (OpReg rlo) (OpAddr addr)
 195         mov_hi = MOV I32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
 196   -- in
 197   return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 198
 199
 200 assignReg_I64Code (CmmLocal (LocalReg u_dst pk _)) valueTree = do
 201    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 202    let
 203          r_dst_lo = mkVReg u_dst I32
 204          r_dst_hi = getHiVRegFromLo r_dst_lo
 205          r_src_hi = getHiVRegFromLo r_src_lo
 206          mov_lo = MOV I32 (OpReg r_src_lo) (OpReg r_dst_lo)
 207          mov_hi = MOV I32 (OpReg r_src_hi) (OpReg r_dst_hi)
 208    -- in
 209    return (
 210         vcode `snocOL` mov_lo `snocOL` mov_hi
 211      )
 212
 213 assignReg_I64Code lvalue valueTree
 214    = panic "assignReg_I64Code(i386): invalid lvalue"
 215
 216 ------------
 217
 218 iselExpr64 (CmmLit (CmmInt i _)) = do
 219   (rlo,rhi) <- getNewRegPairNat I32
 220   let
 221         r = fromIntegral (fromIntegral i :: Word32)
 222         q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
 223         code = toOL [
 224                 MOV I32 (OpImm (ImmInteger r)) (OpReg rlo),
 225                 MOV I32 (OpImm (ImmInteger q)) (OpReg rhi)
 226                 ]
 227   -- in
 228   return (ChildCode64 code rlo)
 229
 230 iselExpr64 (CmmLoad addrTree I64) = do
 231    Amode addr addr_code <- getAmode addrTree
 232    (rlo,rhi) <- getNewRegPairNat I32
 233    let
 234         mov_lo = MOV I32 (OpAddr addr) (OpReg rlo)
 235         mov_hi = MOV I32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
 236    -- in
 237    return (
 238             ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
 239                         rlo
 240      )
 241
 242 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64 _)))
 243    = return (ChildCode64 nilOL (mkVReg vu I32))
 244
 245 -- we handle addition, but rather badly
 246 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
 247    ChildCode64 code1 r1lo <- iselExpr64 e1
 248    (rlo,rhi) <- getNewRegPairNat I32
 249    let
 250         r = fromIntegral (fromIntegral i :: Word32)
 251         q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
 252         r1hi = getHiVRegFromLo r1lo
 253         code =  code1 `appOL`
 254                 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
 255                        ADD I32 (OpImm (ImmInteger r)) (OpReg rlo),
 256                        MOV I32 (OpReg r1hi) (OpReg rhi),
 257                        ADC I32 (OpImm (ImmInteger q)) (OpReg rhi) ]
 258    -- in
 259    return (ChildCode64 code rlo)
 260
 261 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
 262    ChildCode64 code1 r1lo <- iselExpr64 e1
 263    ChildCode64 code2 r2lo <- iselExpr64 e2
 264    (rlo,rhi) <- getNewRegPairNat I32
 265    let
 266         r1hi = getHiVRegFromLo r1lo
 267         r2hi = getHiVRegFromLo r2lo
 268         code =  code1 `appOL`
 269                 code2 `appOL`
 270                 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
 271                        ADD I32 (OpReg r2lo) (OpReg rlo),
 272                        MOV I32 (OpReg r1hi) (OpReg rhi),
 273                        ADC I32 (OpReg r2hi) (OpReg rhi) ]
 274    -- in
 275    return (ChildCode64 code rlo)
 276
 277 iselExpr64 (CmmMachOp (MO_U_Conv _ I64) [expr]) = do
 278      fn <- getAnyReg expr
 279      r_dst_lo <-  getNewRegNat I32
 280      let r_dst_hi = getHiVRegFromLo r_dst_lo
 281          code = fn r_dst_lo
 282      return (
 283              ChildCode64 (code `snocOL`
 284                           MOV I32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
 285                           r_dst_lo
 286             )
 287
 288 iselExpr64 expr
 289    = pprPanic "iselExpr64(i386)" (ppr expr)
 290
 291 #endif /* i386_TARGET_ARCH */
 292
 293 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 294
 295 #if sparc_TARGET_ARCH
 296
 297 assignMem_I64Code addrTree valueTree = do
 298      Amode addr addr_code <- getAmode addrTree
 299      ChildCode64 vcode rlo <- iselExpr64 valueTree
 300      (src, code) <- getSomeReg addrTree
 301      let
 302          rhi = getHiVRegFromLo rlo
 303          -- Big-endian store
 304          mov_hi = ST I32 rhi (AddrRegImm src (ImmInt 0))
 305          mov_lo = ST I32 rlo (AddrRegImm src (ImmInt 4))
 306      return (vcode `appOL` code `snocOL` mov_hi `snocOL` mov_lo)
 307
 308 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
 309      ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 310      let
 311          r_dst_lo = mkVReg u_dst pk
 312          r_dst_hi = getHiVRegFromLo r_dst_lo
 313          r_src_hi = getHiVRegFromLo r_src_lo
 314          mov_lo = mkMOV r_src_lo r_dst_lo
 315          mov_hi = mkMOV r_src_hi r_dst_hi
 316          mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
 317      return (vcode `snocOL` mov_hi `snocOL` mov_lo)
 318 assignReg_I64Code lvalue valueTree
 319    = panic "assignReg_I64Code(sparc): invalid lvalue"
 320
 321
 322 -- Don't delete this -- it's very handy for debugging.
 323 --iselExpr64 expr
 324 --   | trace ("iselExpr64: " ++ showSDoc (ppr expr)) False
 325 --   = panic "iselExpr64(???)"
 326
 327 iselExpr64 (CmmLoad addrTree I64) = do
 328      Amode (AddrRegReg r1 r2) addr_code <- getAmode addrTree
 329      rlo <- getNewRegNat I32
 330      let rhi = getHiVRegFromLo rlo
 331          mov_hi = LD I32 (AddrRegImm r1 (ImmInt 0)) rhi
 332          mov_lo = LD I32 (AddrRegImm r1 (ImmInt 4)) rlo
 333      return (
 334             ChildCode64 (addr_code `snocOL` mov_hi `snocOL` mov_lo)
 335                          rlo
 336           )
 337
 338 iselExpr64 (CmmReg (CmmLocal (LocalReg uq I64))) = do
 339      r_dst_lo <-  getNewRegNat I32
 340      let r_dst_hi = getHiVRegFromLo r_dst_lo
 341          r_src_lo = mkVReg uq I32
 342          r_src_hi = getHiVRegFromLo r_src_lo
 343          mov_lo = mkMOV r_src_lo r_dst_lo
 344          mov_hi = mkMOV r_src_hi r_dst_hi
 345          mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
 346      return (
 347             ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo
 348          )
 349
 350 iselExpr64 expr
 351    = pprPanic "iselExpr64(sparc)" (ppr expr)
 352
 353 #endif /* sparc_TARGET_ARCH */
 354
 355 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 356
 357 #if powerpc_TARGET_ARCH
 358
 359 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
 360 getI64Amodes addrTree = do
 361     Amode hi_addr addr_code <- getAmode addrTree
 362     case addrOffset hi_addr 4 of
 363         Just lo_addr -> return (hi_addr, lo_addr, addr_code)
 364         Nothing      -> do (hi_ptr, code) <- getSomeReg addrTree
 365                            return (AddrRegImm hi_ptr (ImmInt 0),
 366                                    AddrRegImm hi_ptr (ImmInt 4),
 367                                    code)
 368
 369 assignMem_I64Code addrTree valueTree = do
 370         (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
 371         ChildCode64 vcode rlo <- iselExpr64 valueTree
 372         let
 373                 rhi = getHiVRegFromLo rlo
 374
 375                 -- Big-endian store
 376                 mov_hi = ST I32 rhi hi_addr
 377                 mov_lo = ST I32 rlo lo_addr
 378         -- in
 379         return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 380
 381 assignReg_I64Code (CmmLocal (LocalReg u_dst pk _)) valueTree = do
 382    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 383    let
 384          r_dst_lo = mkVReg u_dst I32
 385          r_dst_hi = getHiVRegFromLo r_dst_lo
 386          r_src_hi = getHiVRegFromLo r_src_lo
 387          mov_lo = MR r_dst_lo r_src_lo
 388          mov_hi = MR r_dst_hi r_src_hi
 389    -- in
 390    return (
 391         vcode `snocOL` mov_lo `snocOL` mov_hi
 392      )
 393
 394 assignReg_I64Code lvalue valueTree
 395    = panic "assignReg_I64Code(powerpc): invalid lvalue"
 396
 397
 398 -- Don't delete this -- it's very handy for debugging.
 399 --iselExpr64 expr
 400 --   | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
 401 --   = panic "iselExpr64(???)"
 402
 403 iselExpr64 (CmmLoad addrTree I64) = do
 404     (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
 405     (rlo, rhi) <- getNewRegPairNat I32
 406     let mov_hi = LD I32 rhi hi_addr
 407         mov_lo = LD I32 rlo lo_addr
 408     return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
 409                          rlo
 410
 411 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64 _)))
 412    = return (ChildCode64 nilOL (mkVReg vu I32))
 413
 414 iselExpr64 (CmmLit (CmmInt i _)) = do
 415   (rlo,rhi) <- getNewRegPairNat I32
 416   let
 417         half0 = fromIntegral (fromIntegral i :: Word16)
 418         half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
 419         half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
 420         half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
 421
 422         code = toOL [
 423                 LIS rlo (ImmInt half1),
 424                 OR rlo rlo (RIImm $ ImmInt half0),
 425                 LIS rhi (ImmInt half3),
 426                 OR rlo rlo (RIImm $ ImmInt half2)
 427                 ]
 428   -- in
 429   return (ChildCode64 code rlo)
 430
 431 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
 432    ChildCode64 code1 r1lo <- iselExpr64 e1
 433    ChildCode64 code2 r2lo <- iselExpr64 e2
 434    (rlo,rhi) <- getNewRegPairNat I32
 435    let
 436         r1hi = getHiVRegFromLo r1lo
 437         r2hi = getHiVRegFromLo r2lo
 438         code =  code1 `appOL`
 439                 code2 `appOL`
 440                 toOL [ ADDC rlo r1lo r2lo,
 441                        ADDE rhi r1hi r2hi ]
 442    -- in
 443    return (ChildCode64 code rlo)
 444
 445 iselExpr64 (CmmMachOp (MO_U_Conv I32 I64) [expr]) = do
 446     (expr_reg,expr_code) <- getSomeReg expr
 447     (rlo, rhi) <- getNewRegPairNat I32
 448     let mov_hi = LI rhi (ImmInt 0)
 449         mov_lo = MR rlo expr_reg
 450     return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
 451                          rlo
 452 iselExpr64 expr
 453    = pprPanic "iselExpr64(powerpc)" (ppr expr)
 454
 455 #endif /* powerpc_TARGET_ARCH */
 456
 457
 458 -- -----------------------------------------------------------------------------
 459 -- The 'Register' type
 460
 461 -- 'Register's passed up the tree.  If the stix code forces the register
 462 -- to live in a pre-decided machine register, it comes out as @Fixed@;
 463 -- otherwise, it comes out as @Any@, and the parent can decide which
 464 -- register to put it in.
 465
 466 data Register
 467   = Fixed   MachRep Reg InstrBlock
 468   | Any     MachRep (Reg -> InstrBlock)
 469
 470 swizzleRegisterRep :: Register -> MachRep -> Register
 471 swizzleRegisterRep (Fixed _ reg code) rep = Fixed rep reg code
 472 swizzleRegisterRep (Any _ codefn)     rep = Any rep codefn
 473
 474
 475 -- -----------------------------------------------------------------------------
 476 -- Utils based on getRegister, below
 477
 478 -- The dual to getAnyReg: compute an expression into a register, but
 479 -- we don't mind which one it is.
 480 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
 481 getSomeReg expr = do
 482   r <- getRegister expr
 483   case r of
 484     Any rep code -> do
 485         tmp <- getNewRegNat rep
 486         return (tmp, code tmp)
 487     Fixed _ reg code ->
 488         return (reg, code)
 489
 490 -- -----------------------------------------------------------------------------
 491 -- Grab the Reg for a CmmReg
 492
 493 getRegisterReg :: CmmReg -> Reg
 494
 495 getRegisterReg (CmmLocal (LocalReg u pk _))
 496   = mkVReg u pk
 497
 498 getRegisterReg (CmmGlobal mid)
 499   = case get_GlobalReg_reg_or_addr mid of
 500        Left (RealReg rrno) -> RealReg rrno
 501        _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
 502           -- By this stage, the only MagicIds remaining should be the
 503           -- ones which map to a real machine register on this
 504           -- platform.  Hence ...
 505
 506
 507 -- -----------------------------------------------------------------------------
 508 -- Generate code to get a subtree into a Register
 509
 510 -- Don't delete this -- it's very handy for debugging.
 511 --getRegister expr
 512 --   | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False
 513 --   = panic "getRegister(???)"
 514
 515 getRegister :: CmmExpr -> NatM Register
 516
 517 #if !x86_64_TARGET_ARCH
 518     -- on x86_64, we have %rip for PicBaseReg, but it's not a full-featured
 519     -- register, it can only be used for rip-relative addressing.
 520 getRegister (CmmReg (CmmGlobal PicBaseReg))
 521   = do
 522       reg <- getPicBaseNat wordRep
 523       return (Fixed wordRep reg nilOL)
 524 #endif
 525
 526 getRegister (CmmReg reg)
 527   = return (Fixed (cmmRegRep reg) (getRegisterReg reg) nilOL)
 528
 529 getRegister tree@(CmmRegOff _ _)
 530   = getRegister (mangleIndexTree tree)
 531
 532
 533 #if WORD_SIZE_IN_BITS==32
 534     -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
 535     -- TO_W_(x), TO_W_(x >> 32)
 536
 537 getRegister (CmmMachOp (MO_U_Conv I64 I32)
 538              [CmmMachOp (MO_U_Shr I64) [x,CmmLit (CmmInt 32 _)]]) = do
 539   ChildCode64 code rlo <- iselExpr64 x
 540   return $ Fixed I32 (getHiVRegFromLo rlo) code
 541
 542 getRegister (CmmMachOp (MO_S_Conv I64 I32)
 543              [CmmMachOp (MO_U_Shr I64) [x,CmmLit (CmmInt 32 _)]]) = do
 544   ChildCode64 code rlo <- iselExpr64 x
 545   return $ Fixed I32 (getHiVRegFromLo rlo) code
 546
 547 getRegister (CmmMachOp (MO_U_Conv I64 I32) [x]) = do
 548   ChildCode64 code rlo <- iselExpr64 x
 549   return $ Fixed I32 rlo code
 550
 551 getRegister (CmmMachOp (MO_S_Conv I64 I32) [x]) = do
 552   ChildCode64 code rlo <- iselExpr64 x
 553   return $ Fixed I32 rlo code
 554
 555 #endif
 556
 557 -- end of machine-"independent" bit; here we go on the rest...
 558
 559 #if alpha_TARGET_ARCH
 560
 561 getRegister (StDouble d)
 562   = getBlockIdNat                   `thenNat` \ lbl ->
 563     getNewRegNat PtrRep             `thenNat` \ tmp ->
 564     let code dst = mkSeqInstrs [
 565             LDATA RoDataSegment lbl [
 566                     DATA TF [ImmLab (rational d)]
 567                 ],
 568             LDA tmp (AddrImm (ImmCLbl lbl)),
 569             LD TF dst (AddrReg tmp)]
 570     in
 571         return (Any F64 code)
 572
 573 getRegister (StPrim primop [x]) -- unary PrimOps
 574   = case primop of
 575       IntNegOp -> trivialUCode (NEG Q False) x
 576
 577       NotOp    -> trivialUCode NOT x
 578
 579       FloatNegOp  -> trivialUFCode FloatRep  (FNEG TF) x
 580       DoubleNegOp -> trivialUFCode F64 (FNEG TF) x
 581
 582       OrdOp -> coerceIntCode IntRep x
 583       ChrOp -> chrCode x
 584
 585       Float2IntOp  -> coerceFP2Int    x
 586       Int2FloatOp  -> coerceInt2FP pr x
 587       Double2IntOp -> coerceFP2Int    x
 588       Int2DoubleOp -> coerceInt2FP pr x
 589
 590       Double2FloatOp -> coerceFltCode x
 591       Float2DoubleOp -> coerceFltCode x
 592
 593       other_op -> getRegister (StCall fn CCallConv F64 [x])
 594         where
 595           fn = case other_op of
 596                  FloatExpOp    -> FSLIT("exp")
 597                  FloatLogOp    -> FSLIT("log")
 598                  FloatSqrtOp   -> FSLIT("sqrt")
 599                  FloatSinOp    -> FSLIT("sin")
 600                  FloatCosOp    -> FSLIT("cos")
 601                  FloatTanOp    -> FSLIT("tan")
 602                  FloatAsinOp   -> FSLIT("asin")
 603                  FloatAcosOp   -> FSLIT("acos")
 604                  FloatAtanOp   -> FSLIT("atan")
 605                  FloatSinhOp   -> FSLIT("sinh")
 606                  FloatCoshOp   -> FSLIT("cosh")
 607                  FloatTanhOp   -> FSLIT("tanh")
 608                  DoubleExpOp   -> FSLIT("exp")
 609                  DoubleLogOp   -> FSLIT("log")
 610                  DoubleSqrtOp  -> FSLIT("sqrt")
 611                  DoubleSinOp   -> FSLIT("sin")
 612                  DoubleCosOp   -> FSLIT("cos")
 613                  DoubleTanOp   -> FSLIT("tan")
 614                  DoubleAsinOp  -> FSLIT("asin")
 615                  DoubleAcosOp  -> FSLIT("acos")
 616                  DoubleAtanOp  -> FSLIT("atan")
 617                  DoubleSinhOp  -> FSLIT("sinh")
 618                  DoubleCoshOp  -> FSLIT("cosh")
 619                  DoubleTanhOp  -> FSLIT("tanh")
 620   where
 621     pr = panic "MachCode.getRegister: no primrep needed for Alpha"
 622
 623 getRegister (StPrim primop [x, y]) -- dyadic PrimOps
 624   = case primop of
 625       CharGtOp -> trivialCode (CMP LTT) y x
 626       CharGeOp -> trivialCode (CMP LE) y x
 627       CharEqOp -> trivialCode (CMP EQQ) x y
 628       CharNeOp -> int_NE_code x y
 629       CharLtOp -> trivialCode (CMP LTT) x y
 630       CharLeOp -> trivialCode (CMP LE) x y
 631
 632       IntGtOp  -> trivialCode (CMP LTT) y x
 633       IntGeOp  -> trivialCode (CMP LE) y x
 634       IntEqOp  -> trivialCode (CMP EQQ) x y
 635       IntNeOp  -> int_NE_code x y
 636       IntLtOp  -> trivialCode (CMP LTT) x y
 637       IntLeOp  -> trivialCode (CMP LE) x y
 638
 639       WordGtOp -> trivialCode (CMP ULT) y x
 640       WordGeOp -> trivialCode (CMP ULE) x y
 641       WordEqOp -> trivialCode (CMP EQQ)  x y
 642       WordNeOp -> int_NE_code x y
 643       WordLtOp -> trivialCode (CMP ULT) x y
 644       WordLeOp -> trivialCode (CMP ULE) x y
 645
 646       AddrGtOp -> trivialCode (CMP ULT) y x
 647       AddrGeOp -> trivialCode (CMP ULE) y x
 648       AddrEqOp -> trivialCode (CMP EQQ)  x y
 649       AddrNeOp -> int_NE_code x y
 650       AddrLtOp -> trivialCode (CMP ULT) x y
 651       AddrLeOp -> trivialCode (CMP ULE) x y
 652
 653       FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
 654       FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
 655       FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
 656       FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
 657       FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
 658       FloatLeOp -> cmpF_code (FCMP TF LE) NE x y
 659
 660       DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
 661       DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
 662       DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
 663       DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
 664       DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
 665       DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y
 666
 667       IntAddOp  -> trivialCode (ADD Q False) x y
 668       IntSubOp  -> trivialCode (SUB Q False) x y
 669       IntMulOp  -> trivialCode (MUL Q False) x y
 670       IntQuotOp -> trivialCode (DIV Q False) x y
 671       IntRemOp  -> trivialCode (REM Q False) x y
 672
 673       WordAddOp  -> trivialCode (ADD Q False) x y
 674       WordSubOp  -> trivialCode (SUB Q False) x y
 675       WordMulOp  -> trivialCode (MUL Q False) x y
 676       WordQuotOp -> trivialCode (DIV Q True) x y
 677       WordRemOp  -> trivialCode (REM Q True) x y
 678
 679       FloatAddOp -> trivialFCode  FloatRep (FADD TF) x y
 680       FloatSubOp -> trivialFCode  FloatRep (FSUB TF) x y
 681       FloatMulOp -> trivialFCode  FloatRep (FMUL TF) x y
 682       FloatDivOp -> trivialFCode  FloatRep (FDIV TF) x y
 683
 684       DoubleAddOp -> trivialFCode  F64 (FADD TF) x y
 685       DoubleSubOp -> trivialFCode  F64 (FSUB TF) x y
 686       DoubleMulOp -> trivialFCode  F64 (FMUL TF) x y
 687       DoubleDivOp -> trivialFCode  F64 (FDIV TF) x y
 688
 689       AddrAddOp  -> trivialCode (ADD Q False) x y
 690       AddrSubOp  -> trivialCode (SUB Q False) x y
 691       AddrRemOp  -> trivialCode (REM Q True) x y
 692
 693       AndOp  -> trivialCode AND x y
 694       OrOp   -> trivialCode OR  x y
 695       XorOp  -> trivialCode XOR x y
 696       SllOp  -> trivialCode SLL x y
 697       SrlOp  -> trivialCode SRL x y
 698
 699       ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
 700       ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
 701       ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"
 702
 703       FloatPowerOp  -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
 704       DoublePowerOp -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
 705   where
 706     {- ------------------------------------------------------------
 707         Some bizarre special code for getting condition codes into
 708         registers.  Integer non-equality is a test for equality
 709         followed by an XOR with 1.  (Integer comparisons always set
 710         the result register to 0 or 1.)  Floating point comparisons of
 711         any kind leave the result in a floating point register, so we
 712         need to wrangle an integer register out of things.
 713     -}
 714     int_NE_code :: StixTree -> StixTree -> NatM Register
 715
 716     int_NE_code x y
 717       = trivialCode (CMP EQQ) x y       `thenNat` \ register ->
 718         getNewRegNat IntRep             `thenNat` \ tmp ->
 719         let
 720             code = registerCode register tmp
 721             src  = registerName register tmp
 722             code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
 723         in
 724         return (Any IntRep code__2)
 725
 726     {- ------------------------------------------------------------
 727         Comments for int_NE_code also apply to cmpF_code
 728     -}
 729     cmpF_code
 730         :: (Reg -> Reg -> Reg -> Instr)
 731         -> Cond
 732         -> StixTree -> StixTree
 733         -> NatM Register
 734
 735     cmpF_code instr cond x y
 736       = trivialFCode pr instr x y       `thenNat` \ register ->
 737         getNewRegNat F64                `thenNat` \ tmp ->
 738         getBlockIdNat                   `thenNat` \ lbl ->
 739         let
 740             code = registerCode register tmp
 741             result  = registerName register tmp
 742
 743             code__2 dst = code . mkSeqInstrs [
 744                 OR zeroh (RIImm (ImmInt 1)) dst,
 745                 BF cond  result (ImmCLbl lbl),
 746                 OR zeroh (RIReg zeroh) dst,
 747                 NEWBLOCK lbl]
 748         in
 749         return (Any IntRep code__2)
 750       where
 751         pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
 752       ------------------------------------------------------------
 753
 754 getRegister (CmmLoad pk mem)
 755   = getAmode mem                    `thenNat` \ amode ->
 756     let
 757         code = amodeCode amode
 758         src   = amodeAddr amode
 759         size = primRepToSize pk
 760         code__2 dst = code . mkSeqInstr (LD size dst src)
 761     in
 762     return (Any pk code__2)
 763
 764 getRegister (StInt i)
 765   | fits8Bits i
 766   = let
 767         code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
 768     in
 769     return (Any IntRep code)
 770   | otherwise
 771   = let
 772         code dst = mkSeqInstr (LDI Q dst src)
 773     in
 774     return (Any IntRep code)
 775   where
 776     src = ImmInt (fromInteger i)
 777
 778 getRegister leaf
 779   | isJust imm
 780   = let
 781         code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
 782     in
 783     return (Any PtrRep code)
 784   where
 785     imm = maybeImm leaf
 786     imm__2 = case imm of Just x -> x
 787
 788 #endif /* alpha_TARGET_ARCH */
 789
 790 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 791
 792 #if i386_TARGET_ARCH
 793
 794 getRegister (CmmLit (CmmFloat f F32)) = do
 795     lbl <- getNewLabelNat
 796     dflags <- getDynFlagsNat
 797     dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
 798     Amode addr addr_code <- getAmode dynRef
 799     let code dst =
 800             LDATA ReadOnlyData
 801                         [CmmDataLabel lbl,
 802                          CmmStaticLit (CmmFloat f F32)]
 803             `consOL` (addr_code `snocOL`
 804             GLD F32 addr dst)
 805     -- in
 806     return (Any F32 code)
 807
 808
 809 getRegister (CmmLit (CmmFloat d F64))
 810   | d == 0.0
 811   = let code dst = unitOL (GLDZ dst)
 812     in  return (Any F64 code)
 813
 814   | d == 1.0
 815   = let code dst = unitOL (GLD1 dst)
 816     in  return (Any F64 code)
 817
 818   | otherwise = do
 819     lbl <- getNewLabelNat
 820     dflags <- getDynFlagsNat
 821     dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
 822     Amode addr addr_code <- getAmode dynRef
 823     let code dst =
 824             LDATA ReadOnlyData
 825                         [CmmDataLabel lbl,
 826                          CmmStaticLit (CmmFloat d F64)]
 827             `consOL` (addr_code `snocOL`
 828             GLD F64 addr dst)
 829     -- in
 830     return (Any F64 code)
 831
 832 #endif /* i386_TARGET_ARCH */
 833
 834 #if x86_64_TARGET_ARCH
 835
 836 getRegister (CmmLit (CmmFloat 0.0 rep)) = do
 837    let code dst = unitOL  (XOR rep (OpReg dst) (OpReg dst))
 838         -- I don't know why there are xorpd, xorps, and pxor instructions.
 839         -- They all appear to do the same thing --SDM
 840    return (Any rep code)
 841
 842 getRegister (CmmLit (CmmFloat f rep)) = do
 843     lbl <- getNewLabelNat
 844     let code dst = toOL [
 845             LDATA ReadOnlyData
 846                         [CmmDataLabel lbl,
 847                          CmmStaticLit (CmmFloat f rep)],
 848             MOV rep (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 849             ]
 850     -- in
 851     return (Any rep code)
 852
 853 #endif /* x86_64_TARGET_ARCH */
 854
 855 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
 856
 857 -- catch simple cases of zero- or sign-extended load
 858 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad addr _]) = do
 859   code <- intLoadCode (MOVZxL I8) addr
 860   return (Any I32 code)
 861
 862 getRegister (CmmMachOp (MO_S_Conv I8 I32) [CmmLoad addr _]) = do
 863   code <- intLoadCode (MOVSxL I8) addr
 864   return (Any I32 code)
 865
 866 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad addr _]) = do
 867   code <- intLoadCode (MOVZxL I16) addr
 868   return (Any I32 code)
 869
 870 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad addr _]) = do
 871   code <- intLoadCode (MOVSxL I16) addr
 872   return (Any I32 code)
 873
 874 #endif
 875
 876 #if x86_64_TARGET_ARCH
 877
 878 -- catch simple cases of zero- or sign-extended load
 879 getRegister (CmmMachOp (MO_U_Conv I8 I64) [CmmLoad addr _]) = do
 880   code <- intLoadCode (MOVZxL I8) addr
 881   return (Any I64 code)
 882
 883 getRegister (CmmMachOp (MO_S_Conv I8 I64) [CmmLoad addr _]) = do
 884   code <- intLoadCode (MOVSxL I8) addr
 885   return (Any I64 code)
 886
 887 getRegister (CmmMachOp (MO_U_Conv I16 I64) [CmmLoad addr _]) = do
 888   code <- intLoadCode (MOVZxL I16) addr
 889   return (Any I64 code)
 890
 891 getRegister (CmmMachOp (MO_S_Conv I16 I64) [CmmLoad addr _]) = do
 892   code <- intLoadCode (MOVSxL I16) addr
 893   return (Any I64 code)
 894
 895 getRegister (CmmMachOp (MO_U_Conv I32 I64) [CmmLoad addr _]) = do
 896   code <- intLoadCode (MOV I32) addr -- 32-bit loads zero-extend
 897   return (Any I64 code)
 898
 899 getRegister (CmmMachOp (MO_S_Conv I32 I64) [CmmLoad addr _]) = do
 900   code <- intLoadCode (MOVSxL I32) addr
 901   return (Any I64 code)
 902
 903 #endif
 904
 905 #if x86_64_TARGET_ARCH
 906 getRegister (CmmMachOp (MO_Add I64) [CmmReg (CmmGlobal PicBaseReg),
 907                                      CmmLit displacement])
 908     = return $ Any I64 (\dst -> unitOL $
 909         LEA I64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
 910 #endif
 911
 912 #if x86_64_TARGET_ARCH
 913 getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do
 914   x_code <- getAnyReg x
 915   lbl <- getNewLabelNat
 916   let
 917     code dst = x_code dst `appOL` toOL [
 918         -- This is how gcc does it, so it can't be that bad:
 919         LDATA ReadOnlyData16 [
 920                 CmmAlign 16,
 921                 CmmDataLabel lbl,
 922                 CmmStaticLit (CmmInt 0x80000000 I32),
 923                 CmmStaticLit (CmmInt 0 I32),
 924                 CmmStaticLit (CmmInt 0 I32),
 925                 CmmStaticLit (CmmInt 0 I32)
 926         ],
 927         XOR F32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 928                 -- xorps, so we need the 128-bit constant
 929                 -- ToDo: rip-relative
 930         ]
 931   --
 932   return (Any F32 code)
 933
 934 getRegister (CmmMachOp (MO_S_Neg F64) [x]) = do
 935   x_code <- getAnyReg x
 936   lbl <- getNewLabelNat
 937   let
 938         -- This is how gcc does it, so it can't be that bad:
 939     code dst = x_code dst `appOL` toOL [
 940         LDATA ReadOnlyData16 [
 941                 CmmAlign 16,
 942                 CmmDataLabel lbl,
 943                 CmmStaticLit (CmmInt 0x8000000000000000 I64),
 944                 CmmStaticLit (CmmInt 0 I64)
 945         ],
 946                 -- gcc puts an unpck here.  Wonder if we need it.
 947         XOR F64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 948                 -- xorpd, so we need the 128-bit constant
 949         ]
 950   --
 951   return (Any F64 code)
 952 #endif
 953
 954 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
 955
 956 getRegister (CmmMachOp mop [x]) -- unary MachOps
 957   = case mop of
 958 #if i386_TARGET_ARCH
 959       MO_S_Neg F32 -> trivialUFCode F32 (GNEG F32) x
 960       MO_S_Neg F64 -> trivialUFCode F64 (GNEG F64) x
 961 #endif
 962
 963       MO_S_Neg rep -> trivialUCode rep (NEGI rep) x
 964       MO_Not rep   -> trivialUCode rep (NOT  rep) x
 965
 966       -- Nop conversions
 967       MO_U_Conv I32 I8  -> toI8Reg  I32 x
 968       MO_S_Conv I32 I8  -> toI8Reg  I32 x
 969       MO_U_Conv I16 I8  -> toI8Reg  I16 x
 970       MO_S_Conv I16 I8  -> toI8Reg  I16 x
 971       MO_U_Conv I32 I16 -> toI16Reg I32 x
 972       MO_S_Conv I32 I16 -> toI16Reg I32 x
 973 #if x86_64_TARGET_ARCH
 974       MO_U_Conv I64 I32 -> conversionNop I64 x
 975       MO_S_Conv I64 I32 -> conversionNop I64 x
 976       MO_U_Conv I64 I16 -> toI16Reg I64 x
 977       MO_S_Conv I64 I16 -> toI16Reg I64 x
 978       MO_U_Conv I64 I8  -> toI8Reg  I64 x
 979       MO_S_Conv I64 I8  -> toI8Reg  I64 x
 980 #endif
 981
 982       MO_U_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
 983       MO_S_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
 984
 985       -- widenings
 986       MO_U_Conv I8  I32 -> integerExtend I8  I32 MOVZxL x
 987       MO_U_Conv I16 I32 -> integerExtend I16 I32 MOVZxL x
 988       MO_U_Conv I8  I16 -> integerExtend I8  I16 MOVZxL x
 989
 990       MO_S_Conv I8  I32 -> integerExtend I8  I32 MOVSxL x
 991       MO_S_Conv I16 I32 -> integerExtend I16 I32 MOVSxL x
 992       MO_S_Conv I8  I16 -> integerExtend I8  I16 MOVSxL x
 993
 994 #if x86_64_TARGET_ARCH
 995       MO_U_Conv I8  I64 -> integerExtend I8  I64 MOVZxL x
 996       MO_U_Conv I16 I64 -> integerExtend I16 I64 MOVZxL x
 997       MO_U_Conv I32 I64 -> integerExtend I32 I64 MOVZxL x
 998       MO_S_Conv I8  I64 -> integerExtend I8  I64 MOVSxL x
 999       MO_S_Conv I16 I64 -> integerExtend I16 I64 MOVSxL x
1000       MO_S_Conv I32 I64 -> integerExtend I32 I64 MOVSxL x
1001         -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
1002         -- However, we don't want the register allocator to throw it
1003         -- away as an unnecessary reg-to-reg move, so we keep it in
1004         -- the form of a movzl and print it as a movl later.
1005 #endif
1006
1007 #if i386_TARGET_ARCH
1008       MO_S_Conv F32 F64 -> conversionNop F64 x
1009       MO_S_Conv F64 F32 -> conversionNop F32 x
1010 #else
1011       MO_S_Conv F32 F64 -> coerceFP2FP F64 x
1012       MO_S_Conv F64 F32 -> coerceFP2FP F32 x
1013 #endif
1014
1015       MO_S_Conv from to
1016         | isFloatingRep from -> coerceFP2Int from to x
1017         | isFloatingRep to   -> coerceInt2FP from to x
1018
1019       other -> pprPanic "getRegister" (pprMachOp mop)
1020    where
1021         -- signed or unsigned extension.
1022         integerExtend from to instr expr = do
1023             (reg,e_code) <- if from == I8 then getByteReg expr
1024                                           else getSomeReg expr
1025             let
1026                 code dst =
1027                   e_code `snocOL`
1028                   instr from (OpReg reg) (OpReg dst)
1029             return (Any to code)
1030
1031         toI8Reg new_rep expr
1032             = do codefn <- getAnyReg expr
1033                  return (Any new_rep codefn)
1034                 -- HACK: use getAnyReg to get a byte-addressable register.
1035                 -- If the source was a Fixed register, this will add the
1036                 -- mov instruction to put it into the desired destination.
1037                 -- We're assuming that the destination won't be a fixed
1038                 -- non-byte-addressable register; it won't be, because all
1039                 -- fixed registers are word-sized.
1040
1041         toI16Reg = toI8Reg -- for now
1042
1043         conversionNop new_rep expr
1044             = do e_code <- getRegister expr
1045                  return (swizzleRegisterRep e_code new_rep)
1046
1047
1048 getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
1049   = case mop of
1050       MO_Eq F32   -> condFltReg EQQ x y
1051       MO_Ne F32   -> condFltReg NE x y
1052       MO_S_Gt F32 -> condFltReg GTT x y
1053       MO_S_Ge F32 -> condFltReg GE x y
1054       MO_S_Lt F32 -> condFltReg LTT x y
1055       MO_S_Le F32 -> condFltReg LE x y
1056
1057       MO_Eq F64   -> condFltReg EQQ x y
1058       MO_Ne F64   -> condFltReg NE x y
1059       MO_S_Gt F64 -> condFltReg GTT x y
1060       MO_S_Ge F64 -> condFltReg GE x y
1061       MO_S_Lt F64 -> condFltReg LTT x y
1062       MO_S_Le F64 -> condFltReg LE x y
1063
1064       MO_Eq rep   -> condIntReg EQQ x y
1065       MO_Ne rep   -> condIntReg NE x y
1066
1067       MO_S_Gt rep -> condIntReg GTT x y
1068       MO_S_Ge rep -> condIntReg GE x y
1069       MO_S_Lt rep -> condIntReg LTT x y
1070       MO_S_Le rep -> condIntReg LE x y
1071
1072       MO_U_Gt rep -> condIntReg GU  x y
1073       MO_U_Ge rep -> condIntReg GEU x y
1074       MO_U_Lt rep -> condIntReg LU  x y
1075       MO_U_Le rep -> condIntReg LEU x y
1076
1077 #if i386_TARGET_ARCH
1078       MO_Add F32 -> trivialFCode F32 GADD x y
1079       MO_Sub F32 -> trivialFCode F32 GSUB x y
1080
1081       MO_Add F64 -> trivialFCode F64 GADD x y
1082       MO_Sub F64 -> trivialFCode F64 GSUB x y
1083
1084       MO_S_Quot F32 -> trivialFCode F32 GDIV x y
1085       MO_S_Quot F64 -> trivialFCode F64 GDIV x y
1086 #endif
1087
1088 #if x86_64_TARGET_ARCH
1089       MO_Add F32 -> trivialFCode F32 ADD x y
1090       MO_Sub F32 -> trivialFCode F32 SUB x y
1091
1092       MO_Add F64 -> trivialFCode F64 ADD x y
1093       MO_Sub F64 -> trivialFCode F64 SUB x y
1094
1095       MO_S_Quot F32 -> trivialFCode F32 FDIV x y
1096       MO_S_Quot F64 -> trivialFCode F64 FDIV x y
1097 #endif
1098
1099       MO_Add rep -> add_code rep x y
1100       MO_Sub rep -> sub_code rep x y
1101
1102       MO_S_Quot rep -> div_code rep True  True  x y
1103       MO_S_Rem  rep -> div_code rep True  False x y
1104       MO_U_Quot rep -> div_code rep False True  x y
1105       MO_U_Rem  rep -> div_code rep False False x y
1106
1107 #if i386_TARGET_ARCH
1108       MO_Mul   F32 -> trivialFCode F32 GMUL x y
1109       MO_Mul   F64 -> trivialFCode F64 GMUL x y
1110 #endif
1111
1112 #if x86_64_TARGET_ARCH
1113       MO_Mul   F32 -> trivialFCode F32 MUL x y
1114       MO_Mul   F64 -> trivialFCode F64 MUL x y
1115 #endif
1116
1117       MO_Mul   rep -> let op = IMUL rep in
1118                       trivialCode rep op (Just op) x y
1119
1120       MO_S_MulMayOflo rep -> imulMayOflo rep x y
1121
1122       MO_And rep -> let op = AND rep in
1123                     trivialCode rep op (Just op) x y
1124       MO_Or  rep -> let op = OR  rep in
1125                     trivialCode rep op (Just op) x y
1126       MO_Xor rep -> let op = XOR rep in
1127                     trivialCode rep op (Just op) x y
1128
1129         {- Shift ops on x86s have constraints on their source, it
1130            either has to be Imm, CL or 1
1131             => trivialCode is not restrictive enough (sigh.)
1132         -}
1133       MO_Shl rep   -> shift_code rep (SHL rep) x y {-False-}
1134       MO_U_Shr rep -> shift_code rep (SHR rep) x y {-False-}
1135       MO_S_Shr rep -> shift_code rep (SAR rep) x y {-False-}
1136
1137       other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
1138   where
1139     --------------------
1140     imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1141     imulMayOflo rep a b = do
1142          (a_reg, a_code) <- getNonClobberedReg a
1143          b_code <- getAnyReg b
1144          let
1145              shift_amt  = case rep of
1146                            I32 -> 31
1147                            I64 -> 63
1148                            _ -> panic "shift_amt"
1149
1150              code = a_code `appOL` b_code eax `appOL`
1151                         toOL [
1152                            IMUL2 rep (OpReg a_reg),   -- result in %edx:%eax
1153                            SAR rep (OpImm (ImmInt shift_amt)) (OpReg eax),
1154                                 -- sign extend lower part
1155                            SUB rep (OpReg edx) (OpReg eax)
1156                                 -- compare against upper
1157                            -- eax==0 if high part == sign extended low part
1158                         ]
1159          -- in
1160          return (Fixed rep eax code)
1161
1162     --------------------
1163     shift_code :: MachRep
1164                -> (Operand -> Operand -> Instr)
1165                -> CmmExpr
1166                -> CmmExpr
1167                -> NatM Register
1168
1169     {- Case1: shift length as immediate -}
1170     shift_code rep instr x y@(CmmLit lit) = do
1171           x_code <- getAnyReg x
1172           let
1173                code dst
1174                   = x_code dst `snocOL`
1175                     instr (OpImm (litToImm lit)) (OpReg dst)
1176           -- in
1177           return (Any rep code)
1178
1179     {- Case2: shift length is complex (non-immediate)
1180       * y must go in %ecx.
1181       * we cannot do y first *and* put its result in %ecx, because
1182         %ecx might be clobbered by x.
1183       * if we do y second, then x cannot be
1184         in a clobbered reg.  Also, we cannot clobber x's reg
1185         with the instruction itself.
1186       * so we can either:
1187         - do y first, put its result in a fresh tmp, then copy it to %ecx later
1188         - do y second and put its result into %ecx.  x gets placed in a fresh
1189           tmp.  This is likely to be better, becuase the reg alloc can
1190           eliminate this reg->reg move here (it won't eliminate the other one,
1191           because the move is into the fixed %ecx).
1192     -}
1193     shift_code rep instr x y{-amount-} = do
1194         x_code <- getAnyReg x
1195         tmp <- getNewRegNat rep
1196         y_code <- getAnyReg y
1197         let
1198            code = x_code tmp `appOL`
1199                   y_code ecx `snocOL`
1200                   instr (OpReg ecx) (OpReg tmp)
1201         -- in
1202         return (Fixed rep tmp code)
1203
1204     --------------------
1205     add_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1206     add_code rep x (CmmLit (CmmInt y _))
1207         | not (is64BitInteger y) = add_int rep x y
1208     add_code rep x y = trivialCode rep (ADD rep) (Just (ADD rep)) x y
1209
1210     --------------------
1211     sub_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1212     sub_code rep x (CmmLit (CmmInt y _))
1213         | not (is64BitInteger (-y)) = add_int rep x (-y)
1214     sub_code rep x y = trivialCode rep (SUB rep) Nothing x y
1215
1216     -- our three-operand add instruction:
1217     add_int rep x y = do
1218         (x_reg, x_code) <- getSomeReg x
1219         let
1220             imm = ImmInt (fromInteger y)
1221             code dst
1222                = x_code `snocOL`
1223                  LEA rep
1224                         (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
1225                         (OpReg dst)
1226         --
1227         return (Any rep code)
1228
1229     ----------------------
1230     div_code rep signed quotient x y = do
1231            (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
1232            x_code <- getAnyReg x
1233            let
1234              widen | signed    = CLTD rep
1235                    | otherwise = XOR rep (OpReg edx) (OpReg edx)
1236
1237              instr | signed    = IDIV
1238                    | otherwise = DIV
1239
1240              code = y_code `appOL`
1241                     x_code eax `appOL`
1242                     toOL [widen, instr rep y_op]
1243
1244              result | quotient  = eax
1245                     | otherwise = edx
1246
1247            -- in
1248            return (Fixed rep result code)
1249
1250
1251 getRegister (CmmLoad mem pk)
1252   | isFloatingRep pk
1253   = do
1254     Amode src mem_code <- getAmode mem
1255     let
1256         code dst = mem_code `snocOL`
1257                    IF_ARCH_i386(GLD pk src dst,
1258                                 MOV pk (OpAddr src) (OpReg dst))
1259     --
1260     return (Any pk code)
1261
1262 #if i386_TARGET_ARCH
1263 getRegister (CmmLoad mem pk)
1264   | pk /= I64
1265   = do
1266     code <- intLoadCode (instr pk) mem
1267     return (Any pk code)
1268   where
1269         instr I8  = MOVZxL pk
1270         instr I16 = MOV I16
1271         instr I32 = MOV I32
1272         -- we always zero-extend 8-bit loads, if we
1273         -- can't think of anything better.  This is because
1274         -- we can't guarantee access to an 8-bit variant of every register
1275         -- (esi and edi don't have 8-bit variants), so to make things
1276         -- simpler we do our 8-bit arithmetic with full 32-bit registers.
1277 #endif
1278
1279 #if x86_64_TARGET_ARCH
1280 -- Simpler memory load code on x86_64
1281 getRegister (CmmLoad mem pk)
1282   = do
1283     code <- intLoadCode (MOV pk) mem
1284     return (Any pk code)
1285 #endif
1286
1287 getRegister (CmmLit (CmmInt 0 rep))
1288   = let
1289         -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
1290         adj_rep = case rep of I64 -> I32; _ -> rep
1291         rep1 = IF_ARCH_i386( rep, adj_rep )
1292         code dst
1293            = unitOL (XOR rep1 (OpReg dst) (OpReg dst))
1294     in
1295         return (Any rep code)
1296
1297 #if x86_64_TARGET_ARCH
1298   -- optimisation for loading small literals on x86_64: take advantage
1299   -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
1300   -- instruction forms are shorter.
1301 getRegister (CmmLit lit)
1302   | I64 <- cmmLitRep lit, not (isBigLit lit)
1303   = let
1304         imm = litToImm lit
1305         code dst = unitOL (MOV I32 (OpImm imm) (OpReg dst))
1306     in
1307         return (Any I64 code)
1308   where
1309    isBigLit (CmmInt i I64) = i < 0 || i > 0xffffffff
1310    isBigLit _ = False
1311         -- note1: not the same as is64BitLit, because that checks for
1312         -- signed literals that fit in 32 bits, but we want unsigned
1313         -- literals here.
1314         -- note2: all labels are small, because we're assuming the
1315         -- small memory model (see gcc docs, -mcmodel=small).
1316 #endif
1317
1318 getRegister (CmmLit lit)
1319   = let
1320         rep = cmmLitRep lit
1321         imm = litToImm lit
1322         code dst = unitOL (MOV rep (OpImm imm) (OpReg dst))
1323     in
1324         return (Any rep code)
1325
1326 getRegister other = pprPanic "getRegister(x86)" (ppr other)
1327
1328
1329 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
1330    -> NatM (Reg -> InstrBlock)
1331 intLoadCode instr mem = do
1332   Amode src mem_code <- getAmode mem
1333   return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
1334
1335 -- Compute an expression into *any* register, adding the appropriate
1336 -- move instruction if necessary.
1337 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
1338 getAnyReg expr = do
1339   r <- getRegister expr
1340   anyReg r
1341
1342 anyReg :: Register -> NatM (Reg -> InstrBlock)
1343 anyReg (Any _ code)          = return code
1344 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
1345
1346 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
1347 -- Fixed registers might not be byte-addressable, so we make sure we've
1348 -- got a temporary, inserting an extra reg copy if necessary.
1349 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
1350 #if x86_64_TARGET_ARCH
1351 getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
1352 #else
1353 getByteReg expr = do
1354   r <- getRegister expr
1355   case r of
1356     Any rep code -> do
1357         tmp <- getNewRegNat rep
1358         return (tmp, code tmp)
1359     Fixed rep reg code
1360         | isVirtualReg reg -> return (reg,code)
1361         | otherwise -> do
1362             tmp <- getNewRegNat rep
1363             return (tmp, code `snocOL` reg2reg rep reg tmp)
1364         -- ToDo: could optimise slightly by checking for byte-addressable
1365         -- real registers, but that will happen very rarely if at all.
1366 #endif
1367
1368 -- Another variant: this time we want the result in a register that cannot
1369 -- be modified by code to evaluate an arbitrary expression.
1370 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1371 getNonClobberedReg expr = do
1372   r <- getRegister expr
1373   case r of
1374     Any rep code -> do
1375         tmp <- getNewRegNat rep
1376         return (tmp, code tmp)
1377     Fixed rep reg code
1378         -- only free regs can be clobbered
1379         | RealReg rr <- reg, isFastTrue (freeReg rr) -> do
1380                 tmp <- getNewRegNat rep
1381                 return (tmp, code `snocOL` reg2reg rep reg tmp)
1382         | otherwise ->
1383                 return (reg, code)
1384
1385 reg2reg :: MachRep -> Reg -> Reg -> Instr
1386 reg2reg rep src dst
1387 #if i386_TARGET_ARCH
1388   | isFloatingRep rep = GMOV src dst
1389 #endif
1390   | otherwise         = MOV rep (OpReg src) (OpReg dst)
1391
1392 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1393
1394 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1395
1396 #if sparc_TARGET_ARCH
1397
1398 getRegister (CmmLit (CmmFloat f F32)) = do
1399     lbl <- getNewLabelNat
1400     let code dst = toOL [
1401             LDATA ReadOnlyData
1402                         [CmmDataLabel lbl,
1403                          CmmStaticLit (CmmFloat f F32)],
1404             SETHI (HI (ImmCLbl lbl)) dst,
1405             LD F32 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1406     return (Any F32 code)
1407
1408 getRegister (CmmLit (CmmFloat d F64)) = do
1409     lbl <- getNewLabelNat
1410     let code dst = toOL [
1411             LDATA ReadOnlyData
1412                         [CmmDataLabel lbl,
1413                          CmmStaticLit (CmmFloat d F64)],
1414             SETHI (HI (ImmCLbl lbl)) dst,
1415             LD F64 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1416     return (Any F64 code)
1417
1418 getRegister (CmmMachOp mop [x]) -- unary MachOps
1419   = case mop of
1420       MO_S_Neg F32     -> trivialUFCode F32 (FNEG F32) x
1421       MO_S_Neg F64     -> trivialUFCode F64 (FNEG F64) x
1422
1423       MO_S_Neg rep     -> trivialUCode rep (SUB False False g0) x
1424       MO_Not rep       -> trivialUCode rep (XNOR False g0) x
1425
1426       MO_U_Conv I32 I8 -> trivialCode I8 (AND False) x (CmmLit (CmmInt 255 I8))
1427
1428       MO_U_Conv F64 F32-> coerceDbl2Flt x
1429       MO_U_Conv F32 F64-> coerceFlt2Dbl x
1430
1431       MO_S_Conv F32 I32-> coerceFP2Int F32 I32 x
1432       MO_S_Conv I32 F32-> coerceInt2FP I32 F32 x
1433       MO_S_Conv F64 I32-> coerceFP2Int F64 I32 x
1434       MO_S_Conv I32 F64-> coerceInt2FP I32 F64 x
1435
1436       -- Conversions which are a nop on sparc
1437       MO_U_Conv from to
1438         | from == to   -> conversionNop to   x
1439       MO_U_Conv I32 to -> conversionNop to   x
1440       MO_S_Conv I32 to -> conversionNop to   x
1441
1442       -- widenings
1443       MO_U_Conv I8 I32  -> integerExtend False I8 I32  x
1444       MO_U_Conv I16 I32 -> integerExtend False I16 I32 x
1445       MO_U_Conv I8 I16  -> integerExtend False I8 I16  x
1446       MO_S_Conv I16 I32 -> integerExtend True I16 I32  x
1447
1448       other_op -> panic "Unknown unary mach op"
1449     where
1450         -- XXX SLL/SRL?
1451         integerExtend signed from to expr = do
1452            (reg, e_code) <- getSomeReg expr
1453            let
1454                code dst =
1455                    e_code `snocOL`
1456                    ((if signed then SRA else SRL)
1457                           reg (RIImm (ImmInt 0)) dst)
1458            return (Any to code)
1459         conversionNop new_rep expr
1460             = do e_code <- getRegister expr
1461                  return (swizzleRegisterRep e_code new_rep)
1462
1463 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1464   = case mop of
1465       MO_Eq F32 -> condFltReg EQQ x y
1466       MO_Ne F32 -> condFltReg NE x y
1467
1468       MO_S_Gt F32 -> condFltReg GTT x y
1469       MO_S_Ge F32 -> condFltReg GE x y
1470       MO_S_Lt F32 -> condFltReg LTT x y
1471       MO_S_Le F32 -> condFltReg LE x y
1472
1473       MO_Eq F64 -> condFltReg EQQ x y
1474       MO_Ne F64 -> condFltReg NE x y
1475
1476       MO_S_Gt F64 -> condFltReg GTT x y
1477       MO_S_Ge F64 -> condFltReg GE x y
1478       MO_S_Lt F64 -> condFltReg LTT x y
1479       MO_S_Le F64 -> condFltReg LE x y
1480
1481       MO_Eq rep -> condIntReg EQQ x y
1482       MO_Ne rep -> condIntReg NE x y
1483
1484       MO_S_Gt rep -> condIntReg GTT x y
1485       MO_S_Ge rep -> condIntReg GE x y
1486       MO_S_Lt rep -> condIntReg LTT x y
1487       MO_S_Le rep -> condIntReg LE x y
1488
1489       MO_U_Gt I32  -> condIntReg GTT x y
1490       MO_U_Ge I32  -> condIntReg GE x y
1491       MO_U_Lt I32  -> condIntReg LTT x y
1492       MO_U_Le I32  -> condIntReg LE x y
1493
1494       MO_U_Gt I16 -> condIntReg GU  x y
1495       MO_U_Ge I16 -> condIntReg GEU x y
1496       MO_U_Lt I16 -> condIntReg LU  x y
1497       MO_U_Le I16 -> condIntReg LEU x y
1498
1499       MO_Add I32 -> trivialCode I32 (ADD False False) x y
1500       MO_Sub I32 -> trivialCode I32 (SUB False False) x y
1501
1502       MO_S_MulMayOflo rep -> imulMayOflo rep x y
1503 {-
1504       -- ToDo: teach about V8+ SPARC div instructions
1505       MO_S_Quot I32 -> idiv FSLIT(".div")  x y
1506       MO_S_Rem I32  -> idiv FSLIT(".rem")  x y
1507       MO_U_Quot I32 -> idiv FSLIT(".udiv")  x y
1508       MO_U_Rem I32  -> idiv FSLIT(".urem")  x y
1509 -}
1510       MO_Add F32  -> trivialFCode F32 FADD  x y
1511       MO_Sub F32   -> trivialFCode F32  FSUB x y
1512       MO_Mul F32   -> trivialFCode F32  FMUL  x y
1513       MO_S_Quot F32   -> trivialFCode F32  FDIV x y
1514
1515       MO_Add F64   -> trivialFCode F64 FADD  x y
1516       MO_Sub F64   -> trivialFCode F64  FSUB x y
1517       MO_Mul F64   -> trivialFCode F64  FMUL x y
1518       MO_S_Quot F64   -> trivialFCode F64  FDIV x y
1519
1520       MO_And rep   -> trivialCode rep (AND False) x y
1521       MO_Or rep    -> trivialCode rep (OR  False) x y
1522       MO_Xor rep   -> trivialCode rep (XOR False) x y
1523
1524       MO_Mul rep -> trivialCode rep (SMUL False) x y
1525
1526       MO_Shl rep   -> trivialCode rep SLL  x y
1527       MO_U_Shr rep   -> trivialCode rep SRL x y
1528       MO_S_Shr rep   -> trivialCode rep SRA x y
1529
1530 {-
1531       MO_F32_Pwr  -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1532                                          [promote x, promote y])
1533                        where promote x = CmmMachOp MO_F32_to_Dbl [x]
1534       MO_F64_Pwr -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1535                                         [x, y])
1536 -}
1537       other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
1538   where
1539     --idiv fn x y = getRegister (StCall (Left fn) CCallConv I32 [x, y])
1540
1541     --------------------
1542     imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1543     imulMayOflo rep a b = do
1544          (a_reg, a_code) <- getSomeReg a
1545          (b_reg, b_code) <- getSomeReg b
1546          res_lo <- getNewRegNat I32
1547          res_hi <- getNewRegNat I32
1548          let
1549             shift_amt  = case rep of
1550                           I32 -> 31
1551                           I64 -> 63
1552                           _ -> panic "shift_amt"
1553             code dst = a_code `appOL` b_code `appOL`
1554                        toOL [
1555                            SMUL False a_reg (RIReg b_reg) res_lo,
1556                            RDY res_hi,
1557                            SRA res_lo (RIImm (ImmInt shift_amt)) res_lo,
1558                            SUB False False res_lo (RIReg res_hi) dst
1559                         ]
1560          return (Any I32 code)
1561
1562 getRegister (CmmLoad mem pk) = do
1563     Amode src code <- getAmode mem
1564     let
1565         code__2 dst = code `snocOL` LD pk src dst
1566     return (Any pk code__2)
1567
1568 getRegister (CmmLit (CmmInt i _))
1569   | fits13Bits i
1570   = let
1571         src = ImmInt (fromInteger i)
1572         code dst = unitOL (OR False g0 (RIImm src) dst)
1573     in
1574         return (Any I32 code)
1575
1576 getRegister (CmmLit lit)
1577   = let rep = cmmLitRep lit
1578         imm = litToImm lit
1579         code dst = toOL [
1580             SETHI (HI imm) dst,
1581             OR False dst (RIImm (LO imm)) dst]
1582     in return (Any I32 code)
1583
1584 #endif /* sparc_TARGET_ARCH */
1585
1586 #if powerpc_TARGET_ARCH
1587 getRegister (CmmLoad mem pk)
1588   | pk /= I64
1589   = do
1590         Amode addr addr_code <- getAmode mem
1591         let code dst = ASSERT((regClass dst == RcDouble) == isFloatingRep pk)
1592                        addr_code `snocOL` LD pk dst addr
1593         return (Any pk code)
1594
1595 -- catch simple cases of zero- or sign-extended load
1596 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad mem _]) = do
1597     Amode addr addr_code <- getAmode mem
1598     return (Any I32 (\dst -> addr_code `snocOL` LD I8 dst addr))
1599
1600 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
1601
1602 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad mem _]) = do
1603     Amode addr addr_code <- getAmode mem
1604     return (Any I32 (\dst -> addr_code `snocOL` LD I16 dst addr))
1605
1606 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad mem _]) = do
1607     Amode addr addr_code <- getAmode mem
1608     return (Any I32 (\dst -> addr_code `snocOL` LA I16 dst addr))
1609
1610 getRegister (CmmMachOp mop [x]) -- unary MachOps
1611   = case mop of
1612       MO_Not rep   -> trivialUCode rep NOT x
1613
1614       MO_S_Conv F64 F32 -> trivialUCode F32 FRSP x
1615       MO_S_Conv F32 F64 -> conversionNop F64 x
1616
1617       MO_S_Conv from to
1618         | from == to         -> conversionNop to x
1619         | isFloatingRep from -> coerceFP2Int from to x
1620         | isFloatingRep to   -> coerceInt2FP from to x
1621
1622         -- narrowing is a nop: we treat the high bits as undefined
1623       MO_S_Conv I32 to -> conversionNop to x
1624       MO_S_Conv I16 I8 -> conversionNop I8 x
1625       MO_S_Conv I8 to -> trivialUCode to (EXTS I8) x
1626       MO_S_Conv I16 to -> trivialUCode to (EXTS I16) x
1627
1628       MO_U_Conv from to
1629         | from == to -> conversionNop to x
1630         -- narrowing is a nop: we treat the high bits as undefined
1631       MO_U_Conv I32 to -> conversionNop to x
1632       MO_U_Conv I16 I8 -> conversionNop I8 x
1633       MO_U_Conv I8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 I32))
1634       MO_U_Conv I16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 I32))
1635
1636       MO_S_Neg F32      -> trivialUCode F32 FNEG x
1637       MO_S_Neg F64      -> trivialUCode F64 FNEG x
1638       MO_S_Neg rep      -> trivialUCode rep NEG x
1639
1640     where
1641         conversionNop new_rep expr
1642             = do e_code <- getRegister expr
1643                  return (swizzleRegisterRep e_code new_rep)
1644
1645 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1646   = case mop of
1647       MO_Eq F32 -> condFltReg EQQ x y
1648       MO_Ne F32 -> condFltReg NE  x y
1649
1650       MO_S_Gt F32 -> condFltReg GTT x y
1651       MO_S_Ge F32 -> condFltReg GE  x y
1652       MO_S_Lt F32 -> condFltReg LTT x y
1653       MO_S_Le F32 -> condFltReg LE  x y
1654
1655       MO_Eq F64 -> condFltReg EQQ x y
1656       MO_Ne F64 -> condFltReg NE  x y
1657
1658       MO_S_Gt F64 -> condFltReg GTT x y
1659       MO_S_Ge F64 -> condFltReg GE  x y
1660       MO_S_Lt F64 -> condFltReg LTT x y
1661       MO_S_Le F64 -> condFltReg LE  x y
1662
1663       MO_Eq rep -> condIntReg EQQ  (extendUExpr rep x) (extendUExpr rep y)
1664       MO_Ne rep -> condIntReg NE   (extendUExpr rep x) (extendUExpr rep y)
1665
1666       MO_S_Gt rep -> condIntReg GTT  (extendSExpr rep x) (extendSExpr rep y)
1667       MO_S_Ge rep -> condIntReg GE   (extendSExpr rep x) (extendSExpr rep y)
1668       MO_S_Lt rep -> condIntReg LTT  (extendSExpr rep x) (extendSExpr rep y)
1669       MO_S_Le rep -> condIntReg LE   (extendSExpr rep x) (extendSExpr rep y)
1670
1671       MO_U_Gt rep -> condIntReg GU   (extendUExpr rep x) (extendUExpr rep y)
1672       MO_U_Ge rep -> condIntReg GEU  (extendUExpr rep x) (extendUExpr rep y)
1673       MO_U_Lt rep -> condIntReg LU   (extendUExpr rep x) (extendUExpr rep y)
1674       MO_U_Le rep -> condIntReg LEU  (extendUExpr rep x) (extendUExpr rep y)
1675
1676       MO_Add F32   -> trivialCodeNoImm F32 (FADD F32) x y
1677       MO_Sub F32   -> trivialCodeNoImm F32 (FSUB F32) x y
1678       MO_Mul F32   -> trivialCodeNoImm F32 (FMUL F32) x y
1679       MO_S_Quot F32   -> trivialCodeNoImm F32 (FDIV F32) x y
1680
1681       MO_Add F64   -> trivialCodeNoImm F64 (FADD F64) x y
1682       MO_Sub F64   -> trivialCodeNoImm F64 (FSUB F64) x y
1683       MO_Mul F64   -> trivialCodeNoImm F64 (FMUL F64) x y
1684       MO_S_Quot F64   -> trivialCodeNoImm F64 (FDIV F64) x y
1685
1686          -- optimize addition with 32-bit immediate
1687          -- (needed for PIC)
1688       MO_Add I32 ->
1689         case y of
1690           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate I32 True (-imm)
1691             -> trivialCode I32 True ADD x (CmmLit $ CmmInt imm immrep)
1692           CmmLit lit
1693             -> do
1694                 (src, srcCode) <- getSomeReg x
1695                 let imm = litToImm lit
1696                     code dst = srcCode `appOL` toOL [
1697                                     ADDIS dst src (HA imm),
1698                                     ADD dst dst (RIImm (LO imm))
1699                                 ]
1700                 return (Any I32 code)
1701           _ -> trivialCode I32 True ADD x y
1702
1703       MO_Add rep -> trivialCode rep True ADD x y
1704       MO_Sub rep ->
1705         case y of    -- subfi ('substract from' with immediate) doesn't exist
1706           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
1707             -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
1708           _ -> trivialCodeNoImm rep SUBF y x
1709
1710       MO_Mul rep -> trivialCode rep True MULLW x y
1711
1712       MO_S_MulMayOflo I32 -> trivialCodeNoImm I32 MULLW_MayOflo x y
1713
1714       MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= I32): not implemented"
1715       MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
1716
1717       MO_S_Quot rep -> trivialCodeNoImm rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1718       MO_U_Quot rep -> trivialCodeNoImm rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1719
1720       MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1721       MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1722
1723       MO_And rep   -> trivialCode rep False AND x y
1724       MO_Or rep    -> trivialCode rep False OR x y
1725       MO_Xor rep   -> trivialCode rep False XOR x y
1726
1727       MO_Shl rep   -> trivialCode rep False SLW x y
1728       MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
1729       MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
1730
1731 getRegister (CmmLit (CmmInt i rep))
1732   | Just imm <- makeImmediate rep True i
1733   = let
1734         code dst = unitOL (LI dst imm)
1735     in
1736         return (Any rep code)
1737
1738 getRegister (CmmLit (CmmFloat f frep)) = do
1739     lbl <- getNewLabelNat
1740     dflags <- getDynFlagsNat
1741     dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
1742     Amode addr addr_code <- getAmode dynRef
1743     let code dst =
1744             LDATA ReadOnlyData  [CmmDataLabel lbl,
1745                                  CmmStaticLit (CmmFloat f frep)]
1746             `consOL` (addr_code `snocOL` LD frep dst addr)
1747     return (Any frep code)
1748
1749 getRegister (CmmLit lit)
1750   = let rep = cmmLitRep lit
1751         imm = litToImm lit
1752         code dst = toOL [
1753               LIS dst (HA imm),
1754               ADD dst dst (RIImm (LO imm))
1755           ]
1756     in return (Any rep code)
1757
1758 getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
1759
1760     -- extend?Rep: wrap integer expression of type rep
1761     -- in a conversion to I32
1762 extendSExpr I32 x = x
1763 extendSExpr rep x = CmmMachOp (MO_S_Conv rep I32) [x]
1764 extendUExpr I32 x = x
1765 extendUExpr rep x = CmmMachOp (MO_U_Conv rep I32) [x]
1766
1767 #endif /* powerpc_TARGET_ARCH */
1768
1769
1770 -- -----------------------------------------------------------------------------
1771 --  The 'Amode' type: Memory addressing modes passed up the tree.
1772
1773 data Amode = Amode AddrMode InstrBlock
1774
1775 {-
1776 Now, given a tree (the argument to an CmmLoad) that references memory,
1777 produce a suitable addressing mode.
1778
1779 A Rule of the Game (tm) for Amodes: use of the addr bit must
1780 immediately follow use of the code part, since the code part puts
1781 values in registers which the addr then refers to.  So you can't put
1782 anything in between, lest it overwrite some of those registers.  If
1783 you need to do some other computation between the code part and use of
1784 the addr bit, first store the effective address from the amode in a
1785 temporary, then do the other computation, and then use the temporary:
1786
1787     code
1788     LEA amode, tmp
1789     ... other computation ...
1790     ... (tmp) ...
1791 -}
1792
1793 getAmode :: CmmExpr -> NatM Amode
1794 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
1795
1796 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1797
1798 #if alpha_TARGET_ARCH
1799
1800 getAmode (StPrim IntSubOp [x, StInt i])
1801   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1802     getRegister x               `thenNat` \ register ->
1803     let
1804         code = registerCode register tmp
1805         reg  = registerName register tmp
1806         off  = ImmInt (-(fromInteger i))
1807     in
1808     return (Amode (AddrRegImm reg off) code)
1809
1810 getAmode (StPrim IntAddOp [x, StInt i])
1811   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1812     getRegister x               `thenNat` \ register ->
1813     let
1814         code = registerCode register tmp
1815         reg  = registerName register tmp
1816         off  = ImmInt (fromInteger i)
1817     in
1818     return (Amode (AddrRegImm reg off) code)
1819
1820 getAmode leaf
1821   | isJust imm
1822   = return (Amode (AddrImm imm__2) id)
1823   where
1824     imm = maybeImm leaf
1825     imm__2 = case imm of Just x -> x
1826
1827 getAmode other
1828   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1829     getRegister other           `thenNat` \ register ->
1830     let
1831         code = registerCode register tmp
1832         reg  = registerName register tmp
1833     in
1834     return (Amode (AddrReg reg) code)
1835
1836 #endif /* alpha_TARGET_ARCH */
1837
1838 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1839
1840 #if x86_64_TARGET_ARCH
1841
1842 getAmode (CmmMachOp (MO_Add I64) [CmmReg (CmmGlobal PicBaseReg),
1843                                      CmmLit displacement])
1844     = return $ Amode (ripRel (litToImm displacement)) nilOL
1845
1846 #endif
1847
1848 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1849
1850 -- This is all just ridiculous, since it carefully undoes
1851 -- what mangleIndexTree has just done.
1852 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
1853   | not (is64BitLit lit)
1854   -- ASSERT(rep == I32)???
1855   = do (x_reg, x_code) <- getSomeReg x
1856        let off = ImmInt (-(fromInteger i))
1857        return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1858
1859 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
1860   | not (is64BitLit lit)
1861   -- ASSERT(rep == I32)???
1862   = do (x_reg, x_code) <- getSomeReg x
1863        let off = ImmInt (fromInteger i)
1864        return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1865
1866 -- Turn (lit1 << n  + lit2) into  (lit2 + lit1 << n) so it will be
1867 -- recognised by the next rule.
1868 getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1869                                   b@(CmmLit _)])
1870   = getAmode (CmmMachOp (MO_Add rep) [b,a])
1871
1872 getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
1873                                         [y, CmmLit (CmmInt shift _)]])
1874   | shift == 0 || shift == 1 || shift == 2 || shift == 3
1875   = x86_complex_amode x y shift 0
1876
1877 getAmode (CmmMachOp (MO_Add rep)
1878                 [x, CmmMachOp (MO_Add _)
1879                         [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)],
1880                          CmmLit (CmmInt offset _)]])
1881   | shift == 0 || shift == 1 || shift == 2 || shift == 3
1882   && not (is64BitInteger offset)
1883   = x86_complex_amode x y shift offset
1884
1885 getAmode (CmmMachOp (MO_Add rep) [x,y])
1886   = x86_complex_amode x y 0 0
1887
1888 getAmode (CmmLit lit) | not (is64BitLit lit)
1889   = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1890
1891 getAmode expr = do
1892   (reg,code) <- getSomeReg expr
1893   return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
1894
1895
1896 x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
1897 x86_complex_amode base index shift offset
1898   = do (x_reg, x_code) <- getNonClobberedReg base
1899         -- x must be in a temp, because it has to stay live over y_code
1900         -- we could compre x_reg and y_reg and do something better here...
1901        (y_reg, y_code) <- getSomeReg index
1902        let
1903            code = x_code `appOL` y_code
1904            base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
1905        return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset)))
1906                code)
1907
1908 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1909
1910 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1911
1912 #if sparc_TARGET_ARCH
1913
1914 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)])
1915   | fits13Bits (-i)
1916   = do
1917        (reg, code) <- getSomeReg x
1918        let
1919          off  = ImmInt (-(fromInteger i))
1920        return (Amode (AddrRegImm reg off) code)
1921
1922
1923 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)])
1924   | fits13Bits i
1925   = do
1926        (reg, code) <- getSomeReg x
1927        let
1928          off  = ImmInt (fromInteger i)
1929        return (Amode (AddrRegImm reg off) code)
1930
1931 getAmode (CmmMachOp (MO_Add rep) [x, y])
1932   = do
1933     (regX, codeX) <- getSomeReg x
1934     (regY, codeY) <- getSomeReg y
1935     let
1936         code = codeX `appOL` codeY
1937     return (Amode (AddrRegReg regX regY) code)
1938
1939 -- XXX Is this same as "leaf" in Stix?
1940 getAmode (CmmLit lit)
1941   = do
1942       tmp <- getNewRegNat I32
1943       let
1944         code = unitOL (SETHI (HI imm__2) tmp)
1945       return (Amode (AddrRegImm tmp (LO imm__2)) code)
1946       where
1947          imm__2 = litToImm lit
1948
1949 getAmode other
1950   = do
1951        (reg, code) <- getSomeReg other
1952        let
1953             off  = ImmInt 0
1954        return (Amode (AddrRegImm reg off) code)
1955
1956 #endif /* sparc_TARGET_ARCH */
1957
1958 #ifdef powerpc_TARGET_ARCH
1959 getAmode (CmmMachOp (MO_Sub I32) [x, CmmLit (CmmInt i _)])
1960   | Just off <- makeImmediate I32 True (-i)
1961   = do
1962         (reg, code) <- getSomeReg x
1963         return (Amode (AddrRegImm reg off) code)
1964
1965
1966 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit (CmmInt i _)])
1967   | Just off <- makeImmediate I32 True i
1968   = do
1969         (reg, code) <- getSomeReg x
1970         return (Amode (AddrRegImm reg off) code)
1971
1972    -- optimize addition with 32-bit immediate
1973    -- (needed for PIC)
1974 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit lit])
1975   = do
1976         tmp <- getNewRegNat I32
1977         (src, srcCode) <- getSomeReg x
1978         let imm = litToImm lit
1979             code = srcCode `snocOL` ADDIS tmp src (HA imm)
1980         return (Amode (AddrRegImm tmp (LO imm)) code)
1981
1982 getAmode (CmmLit lit)
1983   = do
1984         tmp <- getNewRegNat I32
1985         let imm = litToImm lit
1986             code = unitOL (LIS tmp (HA imm))
1987         return (Amode (AddrRegImm tmp (LO imm)) code)
1988
1989 getAmode (CmmMachOp (MO_Add I32) [x, y])
1990   = do
1991         (regX, codeX) <- getSomeReg x
1992         (regY, codeY) <- getSomeReg y
1993         return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
1994
1995 getAmode other
1996   = do
1997         (reg, code) <- getSomeReg other
1998         let
1999             off  = ImmInt 0
2000         return (Amode (AddrRegImm reg off) code)
2001 #endif /* powerpc_TARGET_ARCH */
2002
2003 -- -----------------------------------------------------------------------------
2004 -- getOperand: sometimes any operand will do.
2005
2006 -- getNonClobberedOperand: the value of the operand will remain valid across
2007 -- the computation of an arbitrary expression, unless the expression
2008 -- is computed directly into a register which the operand refers to
2009 -- (see trivialCode where this function is used for an example).
2010
2011 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2012
2013 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2014 #if x86_64_TARGET_ARCH
2015 getNonClobberedOperand (CmmLit lit)
2016   | isSuitableFloatingPointLit lit = do
2017     lbl <- getNewLabelNat
2018     let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
2019                                            CmmStaticLit lit])
2020     return (OpAddr (ripRel (ImmCLbl lbl)), code)
2021 #endif
2022 getNonClobberedOperand (CmmLit lit)
2023   | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) =
2024     return (OpImm (litToImm lit), nilOL)
2025 getNonClobberedOperand (CmmLoad mem pk)
2026   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2027     Amode src mem_code <- getAmode mem
2028     (src',save_code) <-
2029         if (amodeCouldBeClobbered src)
2030                 then do
2031                    tmp <- getNewRegNat wordRep
2032                    return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
2033                            unitOL (LEA I32 (OpAddr src) (OpReg tmp)))
2034                 else
2035                    return (src, nilOL)
2036     return (OpAddr src', save_code `appOL` mem_code)
2037 getNonClobberedOperand e = do
2038     (reg, code) <- getNonClobberedReg e
2039     return (OpReg reg, code)
2040
2041 amodeCouldBeClobbered :: AddrMode -> Bool
2042 amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
2043
2044 regClobbered (RealReg rr) = isFastTrue (freeReg rr)
2045 regClobbered _ = False
2046
2047 -- getOperand: the operand is not required to remain valid across the
2048 -- computation of an arbitrary expression.
2049 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2050 #if x86_64_TARGET_ARCH
2051 getOperand (CmmLit lit)
2052   | isSuitableFloatingPointLit lit = do
2053     lbl <- getNewLabelNat
2054     let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
2055                                            CmmStaticLit lit])
2056     return (OpAddr (ripRel (ImmCLbl lbl)), code)
2057 #endif
2058 getOperand (CmmLit lit)
2059   | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) = do
2060     return (OpImm (litToImm lit), nilOL)
2061 getOperand (CmmLoad mem pk)
2062   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2063     Amode src mem_code <- getAmode mem
2064     return (OpAddr src, mem_code)
2065 getOperand e = do
2066     (reg, code) <- getSomeReg e
2067     return (OpReg reg, code)
2068
2069 isOperand :: CmmExpr -> Bool
2070 isOperand (CmmLoad _ _) = True
2071 isOperand (CmmLit lit)  = not (is64BitLit lit)
2072                           || isSuitableFloatingPointLit lit
2073 isOperand _             = False
2074
2075 -- if we want a floating-point literal as an operand, we can
2076 -- use it directly from memory.  However, if the literal is
2077 -- zero, we're better off generating it into a register using
2078 -- xor.
2079 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
2080 isSuitableFloatingPointLit _ = False
2081
2082 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
2083 getRegOrMem (CmmLoad mem pk)
2084   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2085     Amode src mem_code <- getAmode mem
2086     return (OpAddr src, mem_code)
2087 getRegOrMem e = do
2088     (reg, code) <- getNonClobberedReg e
2089     return (OpReg reg, code)
2090
2091 #if x86_64_TARGET_ARCH
2092 is64BitLit (CmmInt i I64) = is64BitInteger i
2093    -- assume that labels are in the range 0-2^31-1: this assumes the
2094    -- small memory model (see gcc docs, -mcmodel=small).
2095 #endif
2096 is64BitLit x = False
2097 #endif
2098
2099 is64BitInteger :: Integer -> Bool
2100 is64BitInteger i = i64 > 0x7fffffff || i64 < -0x80000000
2101   where i64 = fromIntegral i :: Int64
2102   -- a CmmInt is intended to be truncated to the appropriate
2103   -- number of bits, so here we truncate it to Int64.  This is
2104   -- important because e.g. -1 as a CmmInt might be either
2105   -- -1 or 18446744073709551615.
2106
2107 -- -----------------------------------------------------------------------------
2108 --  The 'CondCode' type:  Condition codes passed up the tree.
2109
2110 data CondCode = CondCode Bool Cond InstrBlock
2111
2112 -- Set up a condition code for a conditional branch.
2113
2114 getCondCode :: CmmExpr -> NatM CondCode
2115
2116 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2117
2118 #if alpha_TARGET_ARCH
2119 getCondCode = panic "MachCode.getCondCode: not on Alphas"
2120 #endif /* alpha_TARGET_ARCH */
2121
2122 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2123
2124 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
2125 -- yes, they really do seem to want exactly the same!
2126
2127 getCondCode (CmmMachOp mop [x, y])
2128   =
2129     case mop of
2130       MO_Eq F32 -> condFltCode EQQ x y
2131       MO_Ne F32 -> condFltCode NE  x y
2132
2133       MO_S_Gt F32 -> condFltCode GTT x y
2134       MO_S_Ge F32 -> condFltCode GE  x y
2135       MO_S_Lt F32 -> condFltCode LTT x y
2136       MO_S_Le F32 -> condFltCode LE  x y
2137
2138       MO_Eq F64 -> condFltCode EQQ x y
2139       MO_Ne F64 -> condFltCode NE  x y
2140
2141       MO_S_Gt F64 -> condFltCode GTT x y
2142       MO_S_Ge F64 -> condFltCode GE  x y
2143       MO_S_Lt F64 -> condFltCode LTT x y
2144       MO_S_Le F64 -> condFltCode LE  x y
2145
2146       MO_Eq rep -> condIntCode EQQ  x y
2147       MO_Ne rep -> condIntCode NE   x y
2148
2149       MO_S_Gt rep -> condIntCode GTT  x y
2150       MO_S_Ge rep -> condIntCode GE   x y
2151       MO_S_Lt rep -> condIntCode LTT  x y
2152       MO_S_Le rep -> condIntCode LE   x y
2153
2154       MO_U_Gt rep -> condIntCode GU   x y
2155       MO_U_Ge rep -> condIntCode GEU  x y
2156       MO_U_Lt rep -> condIntCode LU   x y
2157       MO_U_Le rep -> condIntCode LEU  x y
2158
2159       other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y]))
2160
2161 getCondCode other =  pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
2162
2163 #elif powerpc_TARGET_ARCH
2164
2165 -- almost the same as everywhere else - but we need to
2166 -- extend small integers to 32 bit first
2167
2168 getCondCode (CmmMachOp mop [x, y])
2169   = case mop of
2170       MO_Eq F32 -> condFltCode EQQ x y
2171       MO_Ne F32 -> condFltCode NE  x y
2172
2173       MO_S_Gt F32 -> condFltCode GTT x y
2174       MO_S_Ge F32 -> condFltCode GE  x y
2175       MO_S_Lt F32 -> condFltCode LTT x y
2176       MO_S_Le F32 -> condFltCode LE  x y
2177
2178       MO_Eq F64 -> condFltCode EQQ x y
2179       MO_Ne F64 -> condFltCode NE  x y
2180
2181       MO_S_Gt F64 -> condFltCode GTT x y
2182       MO_S_Ge F64 -> condFltCode GE  x y
2183       MO_S_Lt F64 -> condFltCode LTT x y
2184       MO_S_Le F64 -> condFltCode LE  x y
2185
2186       MO_Eq rep -> condIntCode EQQ  (extendUExpr rep x) (extendUExpr rep y)
2187       MO_Ne rep -> condIntCode NE   (extendUExpr rep x) (extendUExpr rep y)
2188
2189       MO_S_Gt rep -> condIntCode GTT  (extendSExpr rep x) (extendSExpr rep y)
2190       MO_S_Ge rep -> condIntCode GE   (extendSExpr rep x) (extendSExpr rep y)
2191       MO_S_Lt rep -> condIntCode LTT  (extendSExpr rep x) (extendSExpr rep y)
2192       MO_S_Le rep -> condIntCode LE   (extendSExpr rep x) (extendSExpr rep y)
2193
2194       MO_U_Gt rep -> condIntCode GU   (extendUExpr rep x) (extendUExpr rep y)
2195       MO_U_Ge rep -> condIntCode GEU  (extendUExpr rep x) (extendUExpr rep y)
2196       MO_U_Lt rep -> condIntCode LU   (extendUExpr rep x) (extendUExpr rep y)
2197       MO_U_Le rep -> condIntCode LEU  (extendUExpr rep x) (extendUExpr rep y)
2198
2199       other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
2200
2201 getCondCode other =  panic "getCondCode(2)(powerpc)"
2202
2203
2204 #endif
2205
2206
2207 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
2208 -- passed back up the tree.
2209
2210 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
2211
2212 #if alpha_TARGET_ARCH
2213 condIntCode = panic "MachCode.condIntCode: not on Alphas"
2214 condFltCode = panic "MachCode.condFltCode: not on Alphas"
2215 #endif /* alpha_TARGET_ARCH */
2216
2217 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2218 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2219
2220 -- memory vs immediate
2221 condIntCode cond (CmmLoad x pk) (CmmLit lit) | not (is64BitLit lit) = do
2222     Amode x_addr x_code <- getAmode x
2223     let
2224         imm  = litToImm lit
2225         code = x_code `snocOL`
2226                   CMP pk (OpImm imm) (OpAddr x_addr)
2227     --
2228     return (CondCode False cond code)
2229
2230 -- anything vs zero, using a mask
2231 -- TODO: Add some sanity checking!!!!
2232 condIntCode cond (CmmMachOp (MO_And rep) [x,o2]) (CmmLit (CmmInt 0 pk))
2233     | (CmmLit (CmmInt mask pk2)) <- o2
2234     = do
2235       (x_reg, x_code) <- getSomeReg x
2236       let
2237          code = x_code `snocOL`
2238                 TEST pk (OpImm (ImmInteger mask)) (OpReg x_reg)
2239       --
2240       return (CondCode False cond code)
2241
2242 -- anything vs zero
2243 condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
2244     (x_reg, x_code) <- getSomeReg x
2245     let
2246         code = x_code `snocOL`
2247                   TEST pk (OpReg x_reg) (OpReg x_reg)
2248     --
2249     return (CondCode False cond code)
2250
2251 -- anything vs operand
2252 condIntCode cond x y | isOperand y = do
2253     (x_reg, x_code) <- getNonClobberedReg x
2254     (y_op,  y_code) <- getOperand y
2255     let
2256         code = x_code `appOL` y_code `snocOL`
2257                   CMP (cmmExprRep x) y_op (OpReg x_reg)
2258     -- in
2259     return (CondCode False cond code)
2260
2261 -- anything vs anything
2262 condIntCode cond x y = do
2263   (y_reg, y_code) <- getNonClobberedReg y
2264   (x_op, x_code) <- getRegOrMem x
2265   let
2266         code = y_code `appOL`
2267                x_code `snocOL`
2268                   CMP (cmmExprRep x) (OpReg y_reg) x_op
2269   -- in
2270   return (CondCode False cond code)
2271 #endif
2272
2273 #if i386_TARGET_ARCH
2274 condFltCode cond x y
2275   = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
2276   (x_reg, x_code) <- getNonClobberedReg x
2277   (y_reg, y_code) <- getSomeReg y
2278   let
2279         code = x_code `appOL` y_code `snocOL`
2280                 GCMP cond x_reg y_reg
2281   -- The GCMP insn does the test and sets the zero flag if comparable
2282   -- and true.  Hence we always supply EQQ as the condition to test.
2283   return (CondCode True EQQ code)
2284 #endif /* i386_TARGET_ARCH */
2285
2286 #if x86_64_TARGET_ARCH
2287 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
2288 -- an operand, but the right must be a reg.  We can probably do better
2289 -- than this general case...
2290 condFltCode cond x y = do
2291   (x_reg, x_code) <- getNonClobberedReg x
2292   (y_op, y_code) <- getOperand y
2293   let
2294         code = x_code `appOL`
2295                y_code `snocOL`
2296                   CMP (cmmExprRep x) y_op (OpReg x_reg)
2297         -- NB(1): we need to use the unsigned comparison operators on the
2298         -- result of this comparison.
2299   -- in
2300   return (CondCode True (condToUnsigned cond) code)
2301 #endif
2302
2303 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2304
2305 #if sparc_TARGET_ARCH
2306
2307 condIntCode cond x (CmmLit (CmmInt y rep))
2308   | fits13Bits y
2309   = do
2310        (src1, code) <- getSomeReg x
2311        let
2312            src2 = ImmInt (fromInteger y)
2313            code' = code `snocOL` SUB False True src1 (RIImm src2) g0
2314        return (CondCode False cond code')
2315
2316 condIntCode cond x y = do
2317     (src1, code1) <- getSomeReg x
2318     (src2, code2) <- getSomeReg y
2319     let
2320         code__2 = code1 `appOL` code2 `snocOL`
2321                   SUB False True src1 (RIReg src2) g0
2322     return (CondCode False cond code__2)
2323
2324 -----------
2325 condFltCode cond x y = do
2326     (src1, code1) <- getSomeReg x
2327     (src2, code2) <- getSomeReg y
2328     tmp <- getNewRegNat F64
2329     let
2330         promote x = FxTOy F32 F64 x tmp
2331
2332         pk1   = cmmExprRep x
2333         pk2   = cmmExprRep y
2334
2335         code__2 =
2336                 if pk1 == pk2 then
2337                     code1 `appOL` code2 `snocOL`
2338                     FCMP True pk1 src1 src2
2339                 else if pk1 == F32 then
2340                     code1 `snocOL` promote src1 `appOL` code2 `snocOL`
2341                     FCMP True F64 tmp src2
2342                 else
2343                     code1 `appOL` code2 `snocOL` promote src2 `snocOL`
2344                     FCMP True F64 src1 tmp
2345     return (CondCode True cond code__2)
2346
2347 #endif /* sparc_TARGET_ARCH */
2348
2349 #if powerpc_TARGET_ARCH
2350 --  ###FIXME: I16 and I8!
2351 condIntCode cond x (CmmLit (CmmInt y rep))
2352   | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
2353   = do
2354         (src1, code) <- getSomeReg x
2355         let
2356             code' = code `snocOL`
2357                 (if condUnsigned cond then CMPL else CMP) I32 src1 (RIImm src2)
2358         return (CondCode False cond code')
2359
2360 condIntCode cond x y = do
2361     (src1, code1) <- getSomeReg x
2362     (src2, code2) <- getSomeReg y
2363     let
2364         code' = code1 `appOL` code2 `snocOL`
2365                   (if condUnsigned cond then CMPL else CMP) I32 src1 (RIReg src2)
2366     return (CondCode False cond code')
2367
2368 condFltCode cond x y = do
2369     (src1, code1) <- getSomeReg x
2370     (src2, code2) <- getSomeReg y
2371     let
2372         code'  = code1 `appOL` code2 `snocOL` FCMP src1 src2
2373         code'' = case cond of -- twiddle CR to handle unordered case
2374                     GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
2375                     LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
2376                     _ -> code'
2377                  where
2378                     ltbit = 0 ; eqbit = 2 ; gtbit = 1
2379     return (CondCode True cond code'')
2380
2381 #endif /* powerpc_TARGET_ARCH */
2382
2383 -- -----------------------------------------------------------------------------
2384 -- Generating assignments
2385
2386 -- Assignments are really at the heart of the whole code generation
2387 -- business.  Almost all top-level nodes of any real importance are
2388 -- assignments, which correspond to loads, stores, or register
2389 -- transfers.  If we're really lucky, some of the register transfers
2390 -- will go away, because we can use the destination register to
2391 -- complete the code generation for the right hand side.  This only
2392 -- fails when the right hand side is forced into a fixed register
2393 -- (e.g. the result of a call).
2394
2395 assignMem_IntCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2396 assignReg_IntCode :: MachRep -> CmmReg  -> CmmExpr -> NatM InstrBlock
2397
2398 assignMem_FltCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2399 assignReg_FltCode :: MachRep -> CmmReg  -> CmmExpr -> NatM InstrBlock
2400
2401 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2402
2403 #if alpha_TARGET_ARCH
2404
2405 assignIntCode pk (CmmLoad dst _) src
2406   = getNewRegNat IntRep             `thenNat` \ tmp ->
2407     getAmode dst                    `thenNat` \ amode ->
2408     getRegister src                 `thenNat` \ register ->
2409     let
2410         code1   = amodeCode amode []
2411         dst__2  = amodeAddr amode
2412         code2   = registerCode register tmp []
2413         src__2  = registerName register tmp
2414         sz      = primRepToSize pk
2415         code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2416     in
2417     return code__2
2418
2419 assignIntCode pk dst src
2420   = getRegister dst                         `thenNat` \ register1 ->
2421     getRegister src                         `thenNat` \ register2 ->
2422     let
2423         dst__2  = registerName register1 zeroh
2424         code    = registerCode register2 dst__2
2425         src__2  = registerName register2 dst__2
2426         code__2 = if isFixed register2
2427                   then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2)
2428                   else code
2429     in
2430     return code__2
2431
2432 #endif /* alpha_TARGET_ARCH */
2433
2434 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2435
2436 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2437
2438 -- integer assignment to memory
2439
2440 -- specific case of adding/subtracting an integer to a particular address.
2441 -- ToDo: catch other cases where we can use an operation directly on a memory
2442 -- address.
2443 assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _,
2444                                                  CmmLit (CmmInt i _)])
2445    | addr == addr2, pk /= I64 || not (is64BitInteger i),
2446      Just instr <- check op
2447    = do Amode amode code_addr <- getAmode addr
2448         let code = code_addr `snocOL`
2449                    instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode)
2450         return code
2451    where
2452         check (MO_Add _) = Just ADD
2453         check (MO_Sub _) = Just SUB
2454         check _ = Nothing
2455         -- ToDo: more?
2456
2457 -- general case
2458 assignMem_IntCode pk addr src = do
2459     Amode addr code_addr <- getAmode addr
2460     (code_src, op_src)   <- get_op_RI src
2461     let
2462         code = code_src `appOL`
2463                code_addr `snocOL`
2464                   MOV pk op_src (OpAddr addr)
2465         -- NOTE: op_src is stable, so it will still be valid
2466         -- after code_addr.  This may involve the introduction
2467         -- of an extra MOV to a temporary register, but we hope
2468         -- the register allocator will get rid of it.
2469     --
2470     return code
2471   where
2472     get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand)   -- code, operator
2473     get_op_RI (CmmLit lit) | not (is64BitLit lit)
2474       = return (nilOL, OpImm (litToImm lit))
2475     get_op_RI op
2476       = do (reg,code) <- getNonClobberedReg op
2477            return (code, OpReg reg)
2478
2479
2480 -- Assign; dst is a reg, rhs is mem
2481 assignReg_IntCode pk reg (CmmLoad src _) = do
2482   load_code <- intLoadCode (MOV pk) src
2483   return (load_code (getRegisterReg reg))
2484
2485 -- dst is a reg, but src could be anything
2486 assignReg_IntCode pk reg src = do
2487   code <- getAnyReg src
2488   return (code (getRegisterReg reg))
2489
2490 #endif /* i386_TARGET_ARCH */
2491
2492 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2493
2494 #if sparc_TARGET_ARCH
2495
2496 assignMem_IntCode pk addr src = do
2497     (srcReg, code) <- getSomeReg src
2498     Amode dstAddr addr_code <- getAmode addr
2499     return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2500
2501 assignReg_IntCode pk reg src = do
2502     r <- getRegister src
2503     return $ case r of
2504         Any _ code         -> code dst
2505         Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg dst) freg
2506     where
2507       dst = getRegisterReg reg
2508
2509
2510 #endif /* sparc_TARGET_ARCH */
2511
2512 #if powerpc_TARGET_ARCH
2513
2514 assignMem_IntCode pk addr src = do
2515     (srcReg, code) <- getSomeReg src
2516     Amode dstAddr addr_code <- getAmode addr
2517     return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2518
2519 -- dst is a reg, but src could be anything
2520 assignReg_IntCode pk reg src
2521     = do
2522         r <- getRegister src
2523         return $ case r of
2524             Any _ code         -> code dst
2525             Fixed _ freg fcode -> fcode `snocOL` MR dst freg
2526     where
2527         dst = getRegisterReg reg
2528
2529 #endif /* powerpc_TARGET_ARCH */
2530
2531
2532 -- -----------------------------------------------------------------------------
2533 -- Floating-point assignments
2534
2535 #if alpha_TARGET_ARCH
2536
2537 assignFltCode pk (CmmLoad dst _) src
2538   = getNewRegNat pk                 `thenNat` \ tmp ->
2539     getAmode dst                    `thenNat` \ amode ->
2540     getRegister src                         `thenNat` \ register ->
2541     let
2542         code1   = amodeCode amode []
2543         dst__2  = amodeAddr amode
2544         code2   = registerCode register tmp []
2545         src__2  = registerName register tmp
2546         sz      = primRepToSize pk
2547         code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2548     in
2549     return code__2
2550
2551 assignFltCode pk dst src
2552   = getRegister dst                         `thenNat` \ register1 ->
2553     getRegister src                         `thenNat` \ register2 ->
2554     let
2555         dst__2  = registerName register1 zeroh
2556         code    = registerCode register2 dst__2
2557         src__2  = registerName register2 dst__2
2558         code__2 = if isFixed register2
2559                   then code . mkSeqInstr (FMOV src__2 dst__2)
2560                   else code
2561     in
2562     return code__2
2563
2564 #endif /* alpha_TARGET_ARCH */
2565
2566 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2567
2568 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2569
2570 -- Floating point assignment to memory
2571 assignMem_FltCode pk addr src = do
2572   (src_reg, src_code) <- getNonClobberedReg src
2573   Amode addr addr_code <- getAmode addr
2574   let
2575         code = src_code `appOL`
2576                addr_code `snocOL`
2577                 IF_ARCH_i386(GST pk src_reg addr,
2578                              MOV pk (OpReg src_reg) (OpAddr addr))
2579   return code
2580
2581 -- Floating point assignment to a register/temporary
2582 assignReg_FltCode pk reg src = do
2583   src_code <- getAnyReg src
2584   return (src_code (getRegisterReg reg))
2585
2586 #endif /* i386_TARGET_ARCH */
2587
2588 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2589
2590 #if sparc_TARGET_ARCH
2591
2592 -- Floating point assignment to memory
2593 assignMem_FltCode pk addr src = do
2594     Amode dst__2 code1 <- getAmode addr
2595     (src__2, code2) <- getSomeReg src
2596     tmp1 <- getNewRegNat pk
2597     let
2598         pk__2   = cmmExprRep src
2599         code__2 = code1 `appOL` code2 `appOL`
2600             if   pk == pk__2
2601             then unitOL (ST pk src__2 dst__2)
2602             else toOL [FxTOy pk__2 pk src__2 tmp1, ST pk tmp1 dst__2]
2603     return code__2
2604
2605 -- Floating point assignment to a register/temporary
2606 -- ToDo: Verify correctness
2607 assignReg_FltCode pk reg src = do
2608     r <- getRegister src
2609     v1 <- getNewRegNat pk
2610     return $ case r of
2611         Any _ code         -> code dst
2612         Fixed _ freg fcode -> fcode `snocOL` FMOV pk freg v1
2613     where
2614       dst = getRegisterReg reg
2615
2616 #endif /* sparc_TARGET_ARCH */
2617
2618 #if powerpc_TARGET_ARCH
2619
2620 -- Easy, isn't it?
2621 assignMem_FltCode = assignMem_IntCode
2622 assignReg_FltCode = assignReg_IntCode
2623
2624 #endif /* powerpc_TARGET_ARCH */
2625
2626
2627 -- -----------------------------------------------------------------------------
2628 -- Generating an non-local jump
2629
2630 -- (If applicable) Do not fill the delay slots here; you will confuse the
2631 -- register allocator.
2632
2633 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
2634
2635 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2636
2637 #if alpha_TARGET_ARCH
2638
2639 genJump (CmmLabel lbl)
2640   | isAsmTemp lbl = returnInstr (BR target)
2641   | otherwise     = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0]
2642   where
2643     target = ImmCLbl lbl
2644
2645 genJump tree
2646   = getRegister tree                `thenNat` \ register ->
2647     getNewRegNat PtrRep             `thenNat` \ tmp ->
2648     let
2649         dst    = registerName register pv
2650         code   = registerCode register pv
2651         target = registerName register pv
2652     in
2653     if isFixed register then
2654         returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0]
2655     else
2656     return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0))
2657
2658 #endif /* alpha_TARGET_ARCH */
2659
2660 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2661
2662 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2663
2664 genJump (CmmLoad mem pk) = do
2665   Amode target code <- getAmode mem
2666   return (code `snocOL` JMP (OpAddr target))
2667
2668 genJump (CmmLit lit) = do
2669   return (unitOL (JMP (OpImm (litToImm lit))))
2670
2671 genJump expr = do
2672   (reg,code) <- getSomeReg expr
2673   return (code `snocOL` JMP (OpReg reg))
2674
2675 #endif /* i386_TARGET_ARCH */
2676
2677 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2678
2679 #if sparc_TARGET_ARCH
2680
2681 genJump (CmmLit (CmmLabel lbl))
2682   = return (toOL [CALL (Left target) 0 True, NOP])
2683   where
2684     target = ImmCLbl lbl
2685
2686 genJump tree
2687   = do
2688         (target, code) <- getSomeReg tree
2689         return (code `snocOL` JMP (AddrRegReg target g0)  `snocOL` NOP)
2690
2691 #endif /* sparc_TARGET_ARCH */
2692
2693 #if powerpc_TARGET_ARCH
2694 genJump (CmmLit (CmmLabel lbl))
2695   = return (unitOL $ JMP lbl)
2696
2697 genJump tree
2698   = do
2699         (target,code) <- getSomeReg tree
2700         return (code `snocOL` MTCTR target `snocOL` BCTR [])
2701 #endif /* powerpc_TARGET_ARCH */
2702
2703
2704 -- -----------------------------------------------------------------------------
2705 --  Unconditional branches
2706
2707 genBranch :: BlockId -> NatM InstrBlock
2708
2709 genBranch = return . toOL . mkBranchInstr
2710
2711 -- -----------------------------------------------------------------------------
2712 --  Conditional jumps
2713
2714 {-
2715 Conditional jumps are always to local labels, so we can use branch
2716 instructions.  We peek at the arguments to decide what kind of
2717 comparison to do.
2718
2719 ALPHA: For comparisons with 0, we're laughing, because we can just do
2720 the desired conditional branch.
2721
2722 I386: First, we have to ensure that the condition
2723 codes are set according to the supplied comparison operation.
2724
2725 SPARC: First, we have to ensure that the condition codes are set
2726 according to the supplied comparison operation.  We generate slightly
2727 different code for floating point comparisons, because a floating
2728 point operation cannot directly precede a @BF@.  We assume the worst
2729 and fill that slot with a @NOP@.
2730
2731 SPARC: Do not fill the delay slots here; you will confuse the register
2732 allocator.
2733 -}
2734
2735
2736 genCondJump
2737     :: BlockId      -- the branch target
2738     -> CmmExpr      -- the condition on which to branch
2739     -> NatM InstrBlock
2740
2741 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2742
2743 #if alpha_TARGET_ARCH
2744
2745 genCondJump id (StPrim op [x, StInt 0])
2746   = getRegister x                           `thenNat` \ register ->
2747     getNewRegNat (registerRep register)
2748                                     `thenNat` \ tmp ->
2749     let
2750         code   = registerCode register tmp
2751         value  = registerName register tmp
2752         pk     = registerRep register
2753         target = ImmCLbl lbl
2754     in
2755     returnSeq code [BI (cmpOp op) value target]
2756   where
2757     cmpOp CharGtOp = GTT
2758     cmpOp CharGeOp = GE
2759     cmpOp CharEqOp = EQQ
2760     cmpOp CharNeOp = NE
2761     cmpOp CharLtOp = LTT
2762     cmpOp CharLeOp = LE
2763     cmpOp IntGtOp = GTT
2764     cmpOp IntGeOp = GE
2765     cmpOp IntEqOp = EQQ
2766     cmpOp IntNeOp = NE
2767     cmpOp IntLtOp = LTT
2768     cmpOp IntLeOp = LE
2769     cmpOp WordGtOp = NE
2770     cmpOp WordGeOp = ALWAYS
2771     cmpOp WordEqOp = EQQ
2772     cmpOp WordNeOp = NE
2773     cmpOp WordLtOp = NEVER
2774     cmpOp WordLeOp = EQQ
2775     cmpOp AddrGtOp = NE
2776     cmpOp AddrGeOp = ALWAYS
2777     cmpOp AddrEqOp = EQQ
2778     cmpOp AddrNeOp = NE
2779     cmpOp AddrLtOp = NEVER
2780     cmpOp AddrLeOp = EQQ
2781
2782 genCondJump lbl (StPrim op [x, StDouble 0.0])
2783   = getRegister x                           `thenNat` \ register ->
2784     getNewRegNat (registerRep register)
2785                                     `thenNat` \ tmp ->
2786     let
2787         code   = registerCode register tmp
2788         value  = registerName register tmp
2789         pk     = registerRep register
2790         target = ImmCLbl lbl
2791     in
2792     return (code . mkSeqInstr (BF (cmpOp op) value target))
2793   where
2794     cmpOp FloatGtOp = GTT
2795     cmpOp FloatGeOp = GE
2796     cmpOp FloatEqOp = EQQ
2797     cmpOp FloatNeOp = NE
2798     cmpOp FloatLtOp = LTT
2799     cmpOp FloatLeOp = LE
2800     cmpOp DoubleGtOp = GTT
2801     cmpOp DoubleGeOp = GE
2802     cmpOp DoubleEqOp = EQQ
2803     cmpOp DoubleNeOp = NE
2804     cmpOp DoubleLtOp = LTT
2805     cmpOp DoubleLeOp = LE
2806
2807 genCondJump lbl (StPrim op [x, y])
2808   | fltCmpOp op
2809   = trivialFCode pr instr x y       `thenNat` \ register ->
2810     getNewRegNat F64                `thenNat` \ tmp ->
2811     let
2812         code   = registerCode register tmp
2813         result = registerName register tmp
2814         target = ImmCLbl lbl
2815     in
2816     return (code . mkSeqInstr (BF cond result target))
2817   where
2818     pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
2819
2820     fltCmpOp op = case op of
2821         FloatGtOp -> True
2822         FloatGeOp -> True
2823         FloatEqOp -> True
2824         FloatNeOp -> True
2825         FloatLtOp -> True
2826         FloatLeOp -> True
2827         DoubleGtOp -> True
2828         DoubleGeOp -> True
2829         DoubleEqOp -> True
2830         DoubleNeOp -> True
2831         DoubleLtOp -> True
2832         DoubleLeOp -> True
2833         _ -> False
2834     (instr, cond) = case op of
2835         FloatGtOp -> (FCMP TF LE, EQQ)
2836         FloatGeOp -> (FCMP TF LTT, EQQ)
2837         FloatEqOp -> (FCMP TF EQQ, NE)
2838         FloatNeOp -> (FCMP TF EQQ, EQQ)
2839         FloatLtOp -> (FCMP TF LTT, NE)
2840         FloatLeOp -> (FCMP TF LE, NE)
2841         DoubleGtOp -> (FCMP TF LE, EQQ)
2842         DoubleGeOp -> (FCMP TF LTT, EQQ)
2843         DoubleEqOp -> (FCMP TF EQQ, NE)
2844         DoubleNeOp -> (FCMP TF EQQ, EQQ)
2845         DoubleLtOp -> (FCMP TF LTT, NE)
2846         DoubleLeOp -> (FCMP TF LE, NE)
2847
2848 genCondJump lbl (StPrim op [x, y])
2849   = trivialCode instr x y           `thenNat` \ register ->
2850     getNewRegNat IntRep             `thenNat` \ tmp ->
2851     let
2852         code   = registerCode register tmp
2853         result = registerName register tmp
2854         target = ImmCLbl lbl
2855     in
2856     return (code . mkSeqInstr (BI cond result target))
2857   where
2858     (instr, cond) = case op of
2859         CharGtOp -> (CMP LE, EQQ)
2860         CharGeOp -> (CMP LTT, EQQ)
2861         CharEqOp -> (CMP EQQ, NE)
2862         CharNeOp -> (CMP EQQ, EQQ)
2863         CharLtOp -> (CMP LTT, NE)
2864         CharLeOp -> (CMP LE, NE)
2865         IntGtOp -> (CMP LE, EQQ)
2866         IntGeOp -> (CMP LTT, EQQ)
2867         IntEqOp -> (CMP EQQ, NE)
2868         IntNeOp -> (CMP EQQ, EQQ)
2869         IntLtOp -> (CMP LTT, NE)
2870         IntLeOp -> (CMP LE, NE)
2871         WordGtOp -> (CMP ULE, EQQ)
2872         WordGeOp -> (CMP ULT, EQQ)
2873         WordEqOp -> (CMP EQQ, NE)
2874         WordNeOp -> (CMP EQQ, EQQ)
2875         WordLtOp -> (CMP ULT, NE)
2876         WordLeOp -> (CMP ULE, NE)
2877         AddrGtOp -> (CMP ULE, EQQ)
2878         AddrGeOp -> (CMP ULT, EQQ)
2879         AddrEqOp -> (CMP EQQ, NE)
2880         AddrNeOp -> (CMP EQQ, EQQ)
2881         AddrLtOp -> (CMP ULT, NE)
2882         AddrLeOp -> (CMP ULE, NE)
2883
2884 #endif /* alpha_TARGET_ARCH */
2885
2886 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2887
2888 #if i386_TARGET_ARCH
2889
2890 genCondJump id bool = do
2891   CondCode _ cond code <- getCondCode bool
2892   return (code `snocOL` JXX cond id)
2893
2894 #endif
2895
2896 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2897
2898 #if x86_64_TARGET_ARCH
2899
2900 genCondJump id bool = do
2901   CondCode is_float cond cond_code <- getCondCode bool
2902   if not is_float
2903     then
2904         return (cond_code `snocOL` JXX cond id)
2905     else do
2906         lbl <- getBlockIdNat
2907
2908         -- see comment with condFltReg
2909         let code = case cond of
2910                         NE  -> or_unordered
2911                         GU  -> plain_test
2912                         GEU -> plain_test
2913                         _   -> and_ordered
2914
2915             plain_test = unitOL (
2916                   JXX cond id
2917                 )
2918             or_unordered = toOL [
2919                   JXX cond id,
2920                   JXX PARITY id
2921                 ]
2922             and_ordered = toOL [
2923                   JXX PARITY lbl,
2924                   JXX cond id,
2925                   JXX ALWAYS lbl,
2926                   NEWBLOCK lbl
2927                 ]
2928         return (cond_code `appOL` code)
2929
2930 #endif
2931
2932 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2933
2934 #if sparc_TARGET_ARCH
2935
2936 genCondJump (BlockId id) bool = do
2937   CondCode is_float cond code <- getCondCode bool
2938   return (
2939        code `appOL`
2940        toOL (
2941          if   is_float
2942          then [NOP, BF cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2943          else [BI cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2944        )
2945     )
2946
2947 #endif /* sparc_TARGET_ARCH */
2948
2949
2950 #if powerpc_TARGET_ARCH
2951
2952 genCondJump id bool = do
2953   CondCode is_float cond code <- getCondCode bool
2954   return (code `snocOL` BCC cond id)
2955
2956 #endif /* powerpc_TARGET_ARCH */
2957
2958
2959 -- -----------------------------------------------------------------------------
2960 --  Generating C calls
2961
2962 -- Now the biggest nightmare---calls.  Most of the nastiness is buried in
2963 -- @get_arg@, which moves the arguments to the correct registers/stack
2964 -- locations.  Apart from that, the code is easy.
2965 --
2966 -- (If applicable) Do not fill the delay slots here; you will confuse the
2967 -- register allocator.
2968
2969 genCCall
2970     :: CmmCallTarget            -- function to call
2971     -> CmmFormals               -- where to put the result
2972     -> CmmActuals               -- arguments (of mixed type)
2973     -> NatM InstrBlock
2974
2975 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2976
2977 #if alpha_TARGET_ARCH
2978
2979 ccallResultRegs =
2980
2981 genCCall fn cconv result_regs args
2982   = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args
2983                           `thenNat` \ ((unused,_), argCode) ->
2984     let
2985         nRegs = length allArgRegs - length unused
2986         code = asmSeqThen (map ($ []) argCode)
2987     in
2988         returnSeq code [
2989             LDA pv (AddrImm (ImmLab (ptext fn))),
2990             JSR ra (AddrReg pv) nRegs,
2991             LDGP gp (AddrReg ra)]
2992   where
2993     ------------------------
2994     {-  Try to get a value into a specific register (or registers) for
2995         a call.  The first 6 arguments go into the appropriate
2996         argument register (separate registers for integer and floating
2997         point arguments, but used in lock-step), and the remaining
2998         arguments are dumped to the stack, beginning at 0(sp).  Our
2999         first argument is a pair of the list of remaining argument
3000         registers to be assigned for this call and the next stack
3001         offset to use for overflowing arguments.  This way,
3002         @get_Arg@ can be applied to all of a call's arguments using
3003         @mapAccumLNat@.
3004     -}
3005     get_arg
3006         :: ([(Reg,Reg)], Int)   -- Argument registers and stack offset (accumulator)
3007         -> StixTree             -- Current argument
3008         -> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code
3009
3010     -- We have to use up all of our argument registers first...
3011
3012     get_arg ((iDst,fDst):dsts, offset) arg
3013       = getRegister arg                     `thenNat` \ register ->
3014         let
3015             reg  = if isFloatingRep pk then fDst else iDst
3016             code = registerCode register reg
3017             src  = registerName register reg
3018             pk   = registerRep register
3019         in
3020         return (
3021             if isFloatingRep pk then
3022                 ((dsts, offset), if isFixed register then
3023                     code . mkSeqInstr (FMOV src fDst)
3024                     else code)
3025             else
3026                 ((dsts, offset), if isFixed register then
3027                     code . mkSeqInstr (OR src (RIReg src) iDst)
3028                     else code))
3029
3030     -- Once we have run out of argument registers, we move to the
3031     -- stack...
3032
3033     get_arg ([], offset) arg
3034       = getRegister arg                 `thenNat` \ register ->
3035         getNewRegNat (registerRep register)
3036                                         `thenNat` \ tmp ->
3037         let
3038             code = registerCode register tmp
3039             src  = registerName register tmp
3040             pk   = registerRep register
3041             sz   = primRepToSize pk
3042         in
3043         return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset)))
3044
3045 #endif /* alpha_TARGET_ARCH */
3046
3047 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3048
3049 #if i386_TARGET_ARCH
3050
3051 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
3052         -- write barrier compiles to no code on x86/x86-64;
3053         -- we keep it this long in order to prevent earlier optimisations.
3054
3055 -- we only cope with a single result for foreign calls
3056 genCCall (CmmPrim op) [CmmHinted r _] args = do
3057   case op of
3058         MO_F32_Sqrt -> actuallyInlineFloatOp F32  (GSQRT F32) args
3059         MO_F64_Sqrt -> actuallyInlineFloatOp F64 (GSQRT F64) args
3060
3061         MO_F32_Sin  -> actuallyInlineFloatOp F32  (GSIN F32) args
3062         MO_F64_Sin  -> actuallyInlineFloatOp F64 (GSIN F64) args
3063
3064         MO_F32_Cos  -> actuallyInlineFloatOp F32  (GCOS F32) args
3065         MO_F64_Cos  -> actuallyInlineFloatOp F64 (GCOS F64) args
3066
3067         MO_F32_Tan  -> actuallyInlineFloatOp F32  (GTAN F32) args
3068         MO_F64_Tan  -> actuallyInlineFloatOp F64 (GTAN F64) args
3069
3070         other_op    -> outOfLineFloatOp op r args
3071  where
3072   actuallyInlineFloatOp rep instr [CmmHinted x _]
3073         = do res <- trivialUFCode rep instr x
3074              any <- anyReg res
3075              return (any (getRegisterReg (CmmLocal r)))
3076
3077 genCCall target dest_regs args = do
3078     let
3079         sizes               = map (arg_size . cmmExprRep . hintlessCmm) (reverse args)
3080 #if !darwin_TARGET_OS
3081         tot_arg_size        = sum sizes
3082 #else
3083         raw_arg_size        = sum sizes
3084         tot_arg_size        = roundTo 16 raw_arg_size
3085         arg_pad_size        = tot_arg_size - raw_arg_size
3086     delta0 <- getDeltaNat
3087     setDeltaNat (delta0 - arg_pad_size)
3088 #endif
3089
3090     push_codes <- mapM push_arg (reverse args)
3091     delta <- getDeltaNat
3092
3093     -- in
3094     -- deal with static vs dynamic call targets
3095     (callinsns,cconv) <-
3096       case target of
3097         -- CmmPrim -> ...
3098         CmmCallee (CmmLit (CmmLabel lbl)) conv
3099            -> -- ToDo: stdcall arg sizes
3100               return (unitOL (CALL (Left fn_imm) []), conv)
3101            where fn_imm = ImmCLbl lbl
3102         CmmCallee expr conv
3103            -> do (dyn_c, dyn_r, dyn_rep) <- get_op expr
3104                  ASSERT(dyn_rep == I32)
3105                   return (dyn_c `snocOL` CALL (Right dyn_r) [], conv)
3106
3107     let push_code
3108 #if darwin_TARGET_OS
3109             | arg_pad_size /= 0
3110             = toOL [SUB I32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
3111                     DELTA (delta0 - arg_pad_size)]
3112               `appOL` concatOL push_codes
3113             | otherwise
3114 #endif
3115             = concatOL push_codes
3116         call = callinsns `appOL`
3117                toOL (
3118                         -- Deallocate parameters after call for ccall;
3119                         -- but not for stdcall (callee does it)
3120                   (if cconv == StdCallConv || tot_arg_size==0 then [] else
3121                    [ADD I32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
3122                   ++
3123                   [DELTA (delta + tot_arg_size)]
3124                )
3125     -- in
3126     setDeltaNat (delta + tot_arg_size)
3127
3128     let
3129         -- assign the results, if necessary
3130         assign_code []     = nilOL
3131         assign_code [CmmHinted dest _hint] =
3132           case rep of
3133                 I64 -> toOL [MOV I32 (OpReg eax) (OpReg r_dest),
3134                              MOV I32 (OpReg edx) (OpReg r_dest_hi)]
3135                 F32 -> unitOL (GMOV fake0 r_dest)
3136                 F64 -> unitOL (GMOV fake0 r_dest)
3137                 rep -> unitOL (MOV rep (OpReg eax) (OpReg r_dest))
3138           where
3139                 r_dest_hi = getHiVRegFromLo r_dest
3140                 rep = localRegRep dest
3141                 r_dest = getRegisterReg (CmmLocal dest)
3142         assign_code many = panic "genCCall.assign_code many"
3143
3144     return (push_code `appOL`
3145             call `appOL`
3146             assign_code dest_regs)
3147
3148   where
3149     arg_size F64 = 8
3150     arg_size F32 = 4
3151     arg_size I64 = 8
3152     arg_size _   = 4
3153
3154     roundTo a x | x `mod` a == 0 = x
3155                 | otherwise = x + a - (x `mod` a)
3156
3157
3158     push_arg :: (CmmHinted CmmExpr){-current argument-}
3159                     -> NatM InstrBlock  -- code
3160
3161     push_arg (CmmHinted arg _hint) -- we don't need the hints on x86
3162       | arg_rep == I64 = do
3163         ChildCode64 code r_lo <- iselExpr64 arg
3164         delta <- getDeltaNat
3165         setDeltaNat (delta - 8)
3166         let
3167             r_hi = getHiVRegFromLo r_lo
3168         -- in
3169         return (       code `appOL`
3170                        toOL [PUSH I32 (OpReg r_hi), DELTA (delta - 4),
3171                              PUSH I32 (OpReg r_lo), DELTA (delta - 8),
3172                              DELTA (delta-8)]
3173             )
3174
3175       | otherwise = do
3176         (code, reg, sz) <- get_op arg
3177         delta <- getDeltaNat
3178         let size = arg_size sz
3179         setDeltaNat (delta-size)
3180         if (case sz of F64 -> True; F32 -> True; _ -> False)
3181            then return (code `appOL`
3182                         toOL [SUB I32 (OpImm (ImmInt size)) (OpReg esp),
3183                               DELTA (delta-size),
3184                               GST sz reg (AddrBaseIndex (EABaseReg esp)
3185                                                         EAIndexNone
3186                                                         (ImmInt 0))]
3187                        )
3188            else return (code `snocOL`
3189                         PUSH I32 (OpReg reg) `snocOL`
3190                         DELTA (delta-size)
3191                        )
3192       where
3193          arg_rep = cmmExprRep arg
3194
3195     ------------
3196     get_op :: CmmExpr -> NatM (InstrBlock, Reg, MachRep) -- code, reg, size
3197     get_op op = do
3198         (reg,code) <- getSomeReg op
3199         return (code, reg, cmmExprRep op)
3200
3201 #endif /* i386_TARGET_ARCH */
3202
3203 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3204
3205 outOfLineFloatOp :: CallishMachOp -> CmmFormalWithoutKind -> CmmActuals
3206   -> NatM InstrBlock
3207 outOfLineFloatOp mop res args
3208   = do
3209       dflags <- getDynFlagsNat
3210       targetExpr <- cmmMakeDynamicReference dflags addImportNat CallReference lbl
3211       let target = CmmCallee targetExpr CCallConv
3212
3213       if localRegRep res == F64
3214         then
3215           stmtToInstrs (CmmCall target [CmmHinted res FloatHint] args CmmUnsafe CmmMayReturn)
3216         else do
3217           uq <- getUniqueNat
3218           let
3219             tmp = LocalReg uq F64 GCKindNonPtr
3220           -- in
3221           code1 <- stmtToInstrs (CmmCall target [CmmHinted tmp FloatHint] args CmmUnsafe CmmMayReturn)
3222           code2 <- stmtToInstrs (CmmAssign (CmmLocal res) (CmmReg (CmmLocal tmp)))
3223           return (code1 `appOL` code2)
3224   where
3225         lbl = mkForeignLabel fn Nothing False
3226
3227         fn = case mop of
3228               MO_F32_Sqrt  -> FSLIT("sqrtf")
3229               MO_F32_Sin   -> FSLIT("sinf")
3230               MO_F32_Cos   -> FSLIT("cosf")
3231               MO_F32_Tan   -> FSLIT("tanf")
3232               MO_F32_Exp   -> FSLIT("expf")
3233               MO_F32_Log   -> FSLIT("logf")
3234
3235               MO_F32_Asin  -> FSLIT("asinf")
3236               MO_F32_Acos  -> FSLIT("acosf")
3237               MO_F32_Atan  -> FSLIT("atanf")
3238
3239               MO_F32_Sinh  -> FSLIT("sinhf")
3240               MO_F32_Cosh  -> FSLIT("coshf")
3241               MO_F32_Tanh  -> FSLIT("tanhf")
3242               MO_F32_Pwr   -> FSLIT("powf")
3243
3244               MO_F64_Sqrt  -> FSLIT("sqrt")
3245               MO_F64_Sin   -> FSLIT("sin")
3246               MO_F64_Cos   -> FSLIT("cos")
3247               MO_F64_Tan   -> FSLIT("tan")
3248               MO_F64_Exp   -> FSLIT("exp")
3249               MO_F64_Log   -> FSLIT("log")
3250
3251               MO_F64_Asin  -> FSLIT("asin")
3252               MO_F64_Acos  -> FSLIT("acos")
3253               MO_F64_Atan  -> FSLIT("atan")
3254
3255               MO_F64_Sinh  -> FSLIT("sinh")
3256               MO_F64_Cosh  -> FSLIT("cosh")
3257               MO_F64_Tanh  -> FSLIT("tanh")
3258               MO_F64_Pwr   -> FSLIT("pow")
3259
3260 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
3261
3262 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3263
3264 #if x86_64_TARGET_ARCH
3265
3266 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
3267         -- write barrier compiles to no code on x86/x86-64;
3268         -- we keep it this long in order to prevent earlier optimisations.
3269
3270
3271 genCCall (CmmPrim op) [CmmHinted r _] args =
3272   outOfLineFloatOp op r args
3273
3274 genCCall target dest_regs args = do
3275
3276         -- load up the register arguments
3277     (stack_args, aregs, fregs, load_args_code)
3278          <- load_args args allArgRegs allFPArgRegs nilOL
3279
3280     let
3281         fp_regs_used  = reverse (drop (length fregs) (reverse allFPArgRegs))
3282         int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
3283         arg_regs = [eax] ++ int_regs_used ++ fp_regs_used
3284                 -- for annotating the call instruction with
3285
3286         sse_regs = length fp_regs_used
3287
3288         tot_arg_size = arg_size * length stack_args
3289
3290         -- On entry to the called function, %rsp should be aligned
3291         -- on a 16-byte boundary +8 (i.e. the first stack arg after
3292         -- the return address is 16-byte aligned).  In STG land
3293         -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
3294         -- need to make sure we push a multiple of 16-bytes of args,
3295         -- plus the return address, to get the correct alignment.
3296         -- Urg, this is hard.  We need to feed the delta back into
3297         -- the arg pushing code.
3298     (real_size, adjust_rsp) <-
3299         if tot_arg_size `rem` 16 == 0
3300             then return (tot_arg_size, nilOL)
3301             else do -- we need to adjust...
3302                 delta <- getDeltaNat
3303                 setDeltaNat (delta-8)
3304                 return (tot_arg_size+8, toOL [
3305                                 SUB I64 (OpImm (ImmInt 8)) (OpReg rsp),
3306                                 DELTA (delta-8)
3307                         ])
3308
3309         -- push the stack args, right to left
3310     push_code <- push_args (reverse stack_args) nilOL
3311     delta <- getDeltaNat
3312
3313     -- deal with static vs dynamic call targets
3314     (callinsns,cconv) <-
3315       case target of
3316         -- CmmPrim -> ...
3317         CmmCallee (CmmLit (CmmLabel lbl)) conv
3318            -> -- ToDo: stdcall arg sizes
3319               return (unitOL (CALL (Left fn_imm) arg_regs), conv)
3320            where fn_imm = ImmCLbl lbl
3321         CmmCallee expr conv
3322            -> do (dyn_r, dyn_c) <- getSomeReg expr
3323                  return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
3324
3325     let
3326         -- The x86_64 ABI requires us to set %al to the number of SSE
3327         -- registers that contain arguments, if the called routine
3328         -- is a varargs function.  We don't know whether it's a
3329         -- varargs function or not, so we have to assume it is.
3330         --
3331         -- It's not safe to omit this assignment, even if the number
3332         -- of SSE regs in use is zero.  If %al is larger than 8
3333         -- on entry to a varargs function, seg faults ensue.
3334         assign_eax n = unitOL (MOV I32 (OpImm (ImmInt n)) (OpReg eax))
3335
3336     let call = callinsns `appOL`
3337                toOL (
3338                         -- Deallocate parameters after call for ccall;
3339                         -- but not for stdcall (callee does it)
3340                   (if cconv == StdCallConv || real_size==0 then [] else
3341                    [ADD wordRep (OpImm (ImmInt real_size)) (OpReg esp)])
3342                   ++
3343                   [DELTA (delta + real_size)]
3344                )
3345     -- in
3346     setDeltaNat (delta + real_size)
3347
3348     let
3349         -- assign the results, if necessary
3350         assign_code []     = nilOL
3351         assign_code [CmmHinted dest _hint] =
3352           case rep of
3353                 F32 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3354                 F64 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3355                 rep -> unitOL (MOV rep (OpReg rax) (OpReg r_dest))
3356           where
3357                 rep = localRegRep dest
3358                 r_dest = getRegisterReg (CmmLocal dest)
3359         assign_code many = panic "genCCall.assign_code many"
3360
3361     return (load_args_code      `appOL`
3362             adjust_rsp          `appOL`
3363             push_code           `appOL`
3364             assign_eax sse_regs `appOL`
3365             call                `appOL`
3366             assign_code dest_regs)
3367
3368   where
3369     arg_size = 8 -- always, at the mo
3370
3371     load_args :: [CmmHinted CmmExpr]
3372               -> [Reg]                  -- int regs avail for args
3373               -> [Reg]                  -- FP regs avail for args
3374               -> InstrBlock
3375               -> NatM ([CmmHinted CmmExpr],[Reg],[Reg],InstrBlock)
3376     load_args args [] [] code     =  return (args, [], [], code)
3377         -- no more regs to use
3378     load_args [] aregs fregs code =  return ([], aregs, fregs, code)
3379         -- no more args to push
3380     load_args ((CmmHinted arg hint) : rest) aregs fregs code
3381         | isFloatingRep arg_rep =
3382         case fregs of
3383           [] -> push_this_arg
3384           (r:rs) -> do
3385              arg_code <- getAnyReg arg
3386              load_args rest aregs rs (code `appOL` arg_code r)
3387         | otherwise =
3388         case aregs of
3389           [] -> push_this_arg
3390           (r:rs) -> do
3391              arg_code <- getAnyReg arg
3392              load_args rest rs fregs (code `appOL` arg_code r)
3393         where
3394           arg_rep = cmmExprRep arg
3395
3396           push_this_arg = do
3397             (args',ars,frs,code') <- load_args rest aregs fregs code
3398             return ((CmmHinted arg hint):args', ars, frs, code')
3399
3400     push_args [] code = return code
3401     push_args ((CmmHinted arg hint):rest) code
3402        | isFloatingRep arg_rep = do
3403          (arg_reg, arg_code) <- getSomeReg arg
3404          delta <- getDeltaNat
3405          setDeltaNat (delta-arg_size)
3406          let code' = code `appOL` arg_code `appOL` toOL [
3407                         SUB wordRep (OpImm (ImmInt arg_size)) (OpReg rsp) ,
3408                         DELTA (delta-arg_size),
3409                         MOV arg_rep (OpReg arg_reg) (OpAddr  (spRel 0))]
3410          push_args rest code'
3411
3412        | otherwise = do
3413        -- we only ever generate word-sized function arguments.  Promotion
3414        -- has already happened: our Int8# type is kept sign-extended
3415        -- in an Int#, for example.
3416          ASSERT(arg_rep == I64) return ()
3417          (arg_op, arg_code) <- getOperand arg
3418          delta <- getDeltaNat
3419          setDeltaNat (delta-arg_size)
3420          let code' = code `appOL` toOL [PUSH I64 arg_op,
3421                                         DELTA (delta-arg_size)]
3422          push_args rest code'
3423         where
3424           arg_rep = cmmExprRep arg
3425 #endif
3426
3427 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3428
3429 #if sparc_TARGET_ARCH
3430 {-
3431    The SPARC calling convention is an absolute
3432    nightmare.  The first 6x32 bits of arguments are mapped into
3433    %o0 through %o5, and the remaining arguments are dumped to the
3434    stack, beginning at [%sp+92].  (Note that %o6 == %sp.)
3435
3436    If we have to put args on the stack, move %o6==%sp down by
3437    the number of words to go on the stack, to ensure there's enough space.
3438
3439    According to Fraser and Hanson's lcc book, page 478, fig 17.2,
3440    16 words above the stack pointer is a word for the address of
3441    a structure return value.  I use this as a temporary location
3442    for moving values from float to int regs.  Certainly it isn't
3443    safe to put anything in the 16 words starting at %sp, since
3444    this area can get trashed at any time due to window overflows
3445    caused by signal handlers.
3446
3447    A final complication (if the above isn't enough) is that
3448    we can't blithely calculate the arguments one by one into
3449    %o0 .. %o5.  Consider the following nested calls:
3450
3451        fff a (fff b c)
3452
3453    Naive code moves a into %o0, and (fff b c) into %o1.  Unfortunately
3454    the inner call will itself use %o0, which trashes the value put there
3455    in preparation for the outer call.  Upshot: we need to calculate the
3456    args into temporary regs, and move those to arg regs or onto the
3457    stack only immediately prior to the call proper.  Sigh.
3458 -}
3459
3460 genCCall target dest_regs argsAndHints = do
3461     let
3462         args = map hintlessCmm argsAndHints
3463     argcode_and_vregs <- mapM arg_to_int_vregs args
3464     let
3465         (argcodes, vregss) = unzip argcode_and_vregs
3466         n_argRegs          = length allArgRegs
3467         n_argRegs_used     = min (length vregs) n_argRegs
3468         vregs              = concat vregss
3469     -- deal with static vs dynamic call targets
3470     callinsns <- (case target of
3471         CmmCallee (CmmLit (CmmLabel lbl)) conv -> do
3472                 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3473         CmmCallee expr conv -> do
3474                 (dyn_c, [dyn_r]) <- arg_to_int_vregs expr
3475                 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3476         CmmPrim mop -> do
3477                   (res, reduce) <- outOfLineFloatOp mop
3478                   lblOrMopExpr <- case res of
3479                        Left lbl -> do
3480                             return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3481                        Right mopExpr -> do
3482                             (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
3483                             return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3484                   if reduce then panic "genCCall(sparc): can not reduce" else return lblOrMopExpr
3485
3486       )
3487     let
3488         argcode = concatOL argcodes
3489         (move_sp_down, move_sp_up)
3490            = let diff = length vregs - n_argRegs
3491                  nn   = if odd diff then diff + 1 else diff -- keep 8-byte alignment
3492              in  if   nn <= 0
3493                  then (nilOL, nilOL)
3494                  else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
3495         transfer_code
3496            = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
3497     return (argcode       `appOL`
3498             move_sp_down  `appOL`
3499             transfer_code `appOL`
3500             callinsns     `appOL`
3501             unitOL NOP    `appOL`
3502             move_sp_up)
3503   where
3504      -- move args from the integer vregs into which they have been
3505      -- marshalled, into %o0 .. %o5, and the rest onto the stack.
3506      move_final :: [Reg] -> [Reg] -> Int -> [Instr]
3507
3508      move_final [] _ offset          -- all args done
3509         = []
3510
3511      move_final (v:vs) [] offset     -- out of aregs; move to stack
3512         = ST I32 v (spRel offset)
3513           : move_final vs [] (offset+1)
3514
3515      move_final (v:vs) (a:az) offset -- move into an arg (%o[0..5]) reg
3516         = OR False g0 (RIReg v) a
3517           : move_final vs az offset
3518
3519      -- generate code to calculate an argument, and move it into one
3520      -- or two integer vregs.
3521      arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
3522      arg_to_int_vregs arg
3523         | (cmmExprRep arg) == I64
3524         = do
3525           (ChildCode64 code r_lo) <- iselExpr64 arg
3526           let
3527               r_hi = getHiVRegFromLo r_lo
3528           return (code, [r_hi, r_lo])
3529         | otherwise
3530         = do
3531           (src, code) <- getSomeReg arg
3532           tmp <- getNewRegNat (cmmExprRep arg)
3533           let
3534               pk   = cmmExprRep arg
3535           case pk of
3536              F64 -> do
3537                       v1 <- getNewRegNat I32
3538                       v2 <- getNewRegNat I32
3539                       return (
3540                         code                          `snocOL`
3541                         FMOV F64 src f0                `snocOL`
3542                         ST   F32  f0 (spRel 16)         `snocOL`
3543                         LD   I32  (spRel 16) v1         `snocOL`
3544                         ST   F32  (fPair f0) (spRel 16) `snocOL`
3545                         LD   I32  (spRel 16) v2
3546                         ,
3547                         [v1,v2]
3548                        )
3549              F32 -> do
3550                       v1 <- getNewRegNat I32
3551                       return (
3552                         code                    `snocOL`
3553                         ST   F32  src (spRel 16)  `snocOL`
3554                         LD   I32  (spRel 16) v1
3555                         ,
3556                         [v1]
3557                        )
3558              other -> do
3559                         v1 <- getNewRegNat I32
3560                         return (
3561                           code `snocOL` OR False g0 (RIReg src) v1
3562                           ,
3563                           [v1]
3564                          )
3565 outOfLineFloatOp mop =
3566     do
3567       dflags <- getDynFlagsNat
3568       mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference $
3569                   mkForeignLabel functionName Nothing True
3570       let mopLabelOrExpr = case mopExpr of
3571                         CmmLit (CmmLabel lbl) -> Left lbl
3572                         _ -> Right mopExpr
3573       return (mopLabelOrExpr, reduce)
3574             where
3575                 (reduce, functionName) = case mop of
3576                   MO_F32_Exp    -> (True,  FSLIT("exp"))
3577                   MO_F32_Log    -> (True,  FSLIT("log"))
3578                   MO_F32_Sqrt   -> (True,  FSLIT("sqrt"))
3579
3580                   MO_F32_Sin    -> (True,  FSLIT("sin"))
3581                   MO_F32_Cos    -> (True,  FSLIT("cos"))
3582                   MO_F32_Tan    -> (True,  FSLIT("tan"))
3583
3584                   MO_F32_Asin   -> (True,  FSLIT("asin"))
3585                   MO_F32_Acos   -> (True,  FSLIT("acos"))
3586                   MO_F32_Atan   -> (True,  FSLIT("atan"))
3587
3588                   MO_F32_Sinh   -> (True,  FSLIT("sinh"))
3589                   MO_F32_Cosh   -> (True,  FSLIT("cosh"))
3590                   MO_F32_Tanh   -> (True,  FSLIT("tanh"))
3591
3592                   MO_F64_Exp    -> (False, FSLIT("exp"))
3593                   MO_F64_Log    -> (False, FSLIT("log"))
3594                   MO_F64_Sqrt   -> (False, FSLIT("sqrt"))
3595
3596                   MO_F64_Sin    -> (False, FSLIT("sin"))
3597                   MO_F64_Cos    -> (False, FSLIT("cos"))
3598                   MO_F64_Tan    -> (False, FSLIT("tan"))
3599
3600                   MO_F64_Asin   -> (False, FSLIT("asin"))
3601                   MO_F64_Acos   -> (False, FSLIT("acos"))
3602                   MO_F64_Atan   -> (False, FSLIT("atan"))
3603
3604                   MO_F64_Sinh   -> (False, FSLIT("sinh"))
3605                   MO_F64_Cosh   -> (False, FSLIT("cosh"))
3606                   MO_F64_Tanh   -> (False, FSLIT("tanh"))
3607
3608                   other -> pprPanic "outOfLineFloatOp(sparc) "
3609                                 (pprCallishMachOp mop)
3610
3611 #endif /* sparc_TARGET_ARCH */
3612
3613 #if powerpc_TARGET_ARCH
3614
3615 #if darwin_TARGET_OS || linux_TARGET_OS
3616 {-
3617     The PowerPC calling convention for Darwin/Mac OS X
3618     is described in Apple's document
3619     "Inside Mac OS X - Mach-O Runtime Architecture".
3620
3621     PowerPC Linux uses the System V Release 4 Calling Convention
3622     for PowerPC. It is described in the
3623     "System V Application Binary Interface PowerPC Processor Supplement".
3624
3625     Both conventions are similar:
3626     Parameters may be passed in general-purpose registers starting at r3, in
3627     floating point registers starting at f1, or on the stack.
3628
3629     But there are substantial differences:
3630     * The number of registers used for parameter passing and the exact set of
3631       nonvolatile registers differs (see MachRegs.lhs).
3632     * On Darwin, stack space is always reserved for parameters, even if they are
3633       passed in registers. The called routine may choose to save parameters from
3634       registers to the corresponding space on the stack.
3635     * On Darwin, a corresponding amount of GPRs is skipped when a floating point
3636       parameter is passed in an FPR.
3637     * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
3638       starting with an odd-numbered GPR. It may skip a GPR to achieve this.
3639       Darwin just treats an I64 like two separate I32s (high word first).
3640     * I64 and F64 arguments are 8-byte aligned on the stack for SysV, but only
3641       4-byte aligned like everything else on Darwin.
3642     * The SysV spec claims that F32 is represented as F64 on the stack. GCC on
3643       PowerPC Linux does not agree, so neither do we.
3644
3645     According to both conventions, The parameter area should be part of the
3646     caller's stack frame, allocated in the caller's prologue code (large enough
3647     to hold the parameter lists for all called routines). The NCG already
3648     uses the stack for register spilling, leaving 64 bytes free at the top.
3649     If we need a larger parameter area than that, we just allocate a new stack
3650     frame just before ccalling.
3651 -}
3652
3653
3654 genCCall (CmmPrim MO_WriteBarrier) _ _
3655  = return $ unitOL LWSYNC
3656
3657 genCCall target dest_regs argsAndHints
3658   = ASSERT (not $ any (`elem` [I8,I16]) argReps)
3659         -- we rely on argument promotion in the codeGen
3660     do
3661         (finalStack,passArgumentsCode,usedRegs) <- passArguments
3662                                                         (zip args argReps)
3663                                                         allArgRegs allFPArgRegs
3664                                                         initialStackOffset
3665                                                         (toOL []) []
3666
3667         (labelOrExpr, reduceToF32) <- case target of
3668             CmmCallee (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
3669             CmmCallee expr conv -> return  (Right expr, False)
3670             CmmPrim mop -> outOfLineFloatOp mop
3671
3672         let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
3673             codeAfter = move_sp_up finalStack `appOL` moveResult reduceToF32
3674
3675         case labelOrExpr of
3676             Left lbl -> do
3677                 return (         codeBefore
3678                         `snocOL` BL lbl usedRegs
3679                         `appOL`  codeAfter)
3680             Right dyn -> do
3681                 (dynReg, dynCode) <- getSomeReg dyn
3682                 return (         dynCode
3683                         `snocOL` MTCTR dynReg
3684                         `appOL`  codeBefore
3685                         `snocOL` BCTRL usedRegs
3686                         `appOL`  codeAfter)
3687     where
3688 #if darwin_TARGET_OS
3689         initialStackOffset = 24
3690             -- size of linkage area + size of arguments, in bytes
3691         stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
3692                                        map machRepByteWidth argReps
3693 #elif linux_TARGET_OS
3694         initialStackOffset = 8
3695         stackDelta finalStack = roundTo 16 finalStack
3696 #endif
3697         args = map hintlessCmm argsAndHints
3698         argReps = map cmmExprRep args
3699
3700         roundTo a x | x `mod` a == 0 = x
3701                     | otherwise = x + a - (x `mod` a)
3702
3703         move_sp_down finalStack
3704                | delta > 64 =
3705                         toOL [STU I32 sp (AddrRegImm sp (ImmInt (-delta))),
3706                               DELTA (-delta)]
3707                | otherwise = nilOL
3708                where delta = stackDelta finalStack
3709         move_sp_up finalStack
3710                | delta > 64 =
3711                         toOL [ADD sp sp (RIImm (ImmInt delta)),
3712                               DELTA 0]
3713                | otherwise = nilOL
3714                where delta = stackDelta finalStack
3715
3716
3717         passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
3718         passArguments ((arg,I64):args) gprs fprs stackOffset
3719                accumCode accumUsed =
3720             do
3721                 ChildCode64 code vr_lo <- iselExpr64 arg
3722                 let vr_hi = getHiVRegFromLo vr_lo
3723
3724 #if darwin_TARGET_OS
3725                 passArguments args
3726                               (drop 2 gprs)
3727                               fprs
3728                               (stackOffset+8)
3729                               (accumCode `appOL` code
3730                                     `snocOL` storeWord vr_hi gprs stackOffset
3731                                     `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
3732                               ((take 2 gprs) ++ accumUsed)
3733             where
3734                 storeWord vr (gpr:_) offset = MR gpr vr
3735                 storeWord vr [] offset = ST I32 vr (AddrRegImm sp (ImmInt offset))
3736
3737 #elif linux_TARGET_OS
3738                 let stackOffset' = roundTo 8 stackOffset
3739                     stackCode = accumCode `appOL` code
3740                         `snocOL` ST I32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
3741                         `snocOL` ST I32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
3742                     regCode hireg loreg =
3743                         accumCode `appOL` code
3744                             `snocOL` MR hireg vr_hi
3745                             `snocOL` MR loreg vr_lo
3746
3747                 case gprs of
3748                     hireg : loreg : regs | even (length gprs) ->
3749                         passArguments args regs fprs stackOffset
3750                                       (regCode hireg loreg) (hireg : loreg : accumUsed)
3751                     _skipped : hireg : loreg : regs ->
3752                         passArguments args regs fprs stackOffset
3753                                       (regCode hireg loreg) (hireg : loreg : accumUsed)
3754                     _ -> -- only one or no regs left
3755                         passArguments args [] fprs (stackOffset'+8)
3756                                       stackCode accumUsed
3757 #endif
3758
3759         passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
3760             | reg : _ <- regs = do
3761                 register <- getRegister arg
3762                 let code = case register of
3763                             Fixed _ freg fcode -> fcode `snocOL` MR reg freg
3764                             Any _ acode -> acode reg
3765                 passArguments args
3766                               (drop nGprs gprs)
3767                               (drop nFprs fprs)
3768 #if darwin_TARGET_OS
3769         -- The Darwin ABI requires that we reserve stack slots for register parameters
3770                               (stackOffset + stackBytes)
3771 #elif linux_TARGET_OS
3772         -- ... the SysV ABI doesn't.
3773                               stackOffset
3774 #endif
3775                               (accumCode `appOL` code)
3776                               (reg : accumUsed)
3777             | otherwise = do
3778                 (vr, code) <- getSomeReg arg
3779                 passArguments args
3780                               (drop nGprs gprs)
3781                               (drop nFprs fprs)
3782                               (stackOffset' + stackBytes)
3783                               (accumCode `appOL` code `snocOL` ST rep vr stackSlot)
3784                               accumUsed
3785             where
3786 #if darwin_TARGET_OS
3787         -- stackOffset is at least 4-byte aligned
3788         -- The Darwin ABI is happy with that.
3789                 stackOffset' = stackOffset
3790 #else
3791         -- ... the SysV ABI requires 8-byte alignment for doubles.
3792                 stackOffset' | rep == F64 = roundTo 8 stackOffset
3793                              | otherwise  =           stackOffset
3794 #endif
3795                 stackSlot = AddrRegImm sp (ImmInt stackOffset')
3796                 (nGprs, nFprs, stackBytes, regs) = case rep of
3797                     I32 -> (1, 0, 4, gprs)
3798 #if darwin_TARGET_OS
3799         -- The Darwin ABI requires that we skip a corresponding number of GPRs when
3800         -- we use the FPRs.
3801                     F32 -> (1, 1, 4, fprs)
3802                     F64 -> (2, 1, 8, fprs)
3803 #elif linux_TARGET_OS
3804         -- ... the SysV ABI doesn't.
3805                     F32 -> (0, 1, 4, fprs)
3806                     F64 -> (0, 1, 8, fprs)
3807 #endif
3808
3809         moveResult reduceToF32 =
3810             case dest_regs of
3811                 [] -> nilOL
3812                 [CmmHinted dest _hint]
3813                     | reduceToF32 && rep == F32 -> unitOL (FRSP r_dest f1)
3814                     | rep == F32 || rep == F64 -> unitOL (MR r_dest f1)
3815                     | rep == I64 -> toOL [MR (getHiVRegFromLo r_dest) r3,
3816                                           MR r_dest r4]
3817                     | otherwise -> unitOL (MR r_dest r3)
3818                     where rep = cmmRegRep (CmmLocal dest)
3819                           r_dest = getRegisterReg (CmmLocal dest)
3820
3821         outOfLineFloatOp mop =
3822             do
3823                 dflags <- getDynFlagsNat
3824                 mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference $
3825                               mkForeignLabel functionName Nothing True
3826                 let mopLabelOrExpr = case mopExpr of
3827                         CmmLit (CmmLabel lbl) -> Left lbl
3828                         _ -> Right mopExpr
3829                 return (mopLabelOrExpr, reduce)
3830             where
3831                 (functionName, reduce) = case mop of
3832                     MO_F32_Exp   -> (FSLIT("exp"), True)
3833                     MO_F32_Log   -> (FSLIT("log"), True)
3834                     MO_F32_Sqrt  -> (FSLIT("sqrt"), True)
3835
3836                     MO_F32_Sin   -> (FSLIT("sin"), True)
3837                     MO_F32_Cos   -> (FSLIT("cos"), True)
3838                     MO_F32_Tan   -> (FSLIT("tan"), True)
3839
3840                     MO_F32_Asin  -> (FSLIT("asin"), True)
3841                     MO_F32_Acos  -> (FSLIT("acos"), True)
3842                     MO_F32_Atan  -> (FSLIT("atan"), True)
3843
3844                     MO_F32_Sinh  -> (FSLIT("sinh"), True)
3845                     MO_F32_Cosh  -> (FSLIT("cosh"), True)
3846                     MO_F32_Tanh  -> (FSLIT("tanh"), True)
3847                     MO_F32_Pwr   -> (FSLIT("pow"), True)
3848
3849                     MO_F64_Exp   -> (FSLIT("exp"), False)
3850                     MO_F64_Log   -> (FSLIT("log"), False)
3851                     MO_F64_Sqrt  -> (FSLIT("sqrt"), False)
3852
3853                     MO_F64_Sin   -> (FSLIT("sin"), False)
3854                     MO_F64_Cos   -> (FSLIT("cos"), False)
3855                     MO_F64_Tan   -> (FSLIT("tan"), False)
3856
3857                     MO_F64_Asin  -> (FSLIT("asin"), False)
3858                     MO_F64_Acos  -> (FSLIT("acos"), False)
3859                     MO_F64_Atan  -> (FSLIT("atan"), False)
3860
3861                     MO_F64_Sinh  -> (FSLIT("sinh"), False)
3862                     MO_F64_Cosh  -> (FSLIT("cosh"), False)
3863                     MO_F64_Tanh  -> (FSLIT("tanh"), False)
3864                     MO_F64_Pwr   -> (FSLIT("pow"), False)
3865                     other -> pprPanic "genCCall(ppc): unknown callish op"
3866                                     (pprCallishMachOp other)
3867
3868 #endif /* darwin_TARGET_OS || linux_TARGET_OS */
3869
3870 #endif /* powerpc_TARGET_ARCH */
3871
3872
3873 -- -----------------------------------------------------------------------------
3874 -- Generating a table-branch
3875
3876 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
3877
3878 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3879 genSwitch expr ids
3880   | opt_PIC
3881   = do
3882         (reg,e_code) <- getSomeReg expr
3883         lbl <- getNewLabelNat
3884         dflags <- getDynFlagsNat
3885         dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
3886         (tableReg,t_code) <- getSomeReg $ dynRef
3887         let
3888             jumpTable = map jumpTableEntryRel ids
3889
3890             jumpTableEntryRel Nothing
3891                 = CmmStaticLit (CmmInt 0 wordRep)
3892             jumpTableEntryRel (Just (BlockId id))
3893                 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3894                 where blockLabel = mkAsmTempLabel id
3895
3896             op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
3897                                        (EAIndex reg wORD_SIZE) (ImmInt 0))
3898
3899 #if x86_64_TARGET_ARCH
3900 #if darwin_TARGET_OS
3901     -- on Mac OS X/x86_64, put the jump table in the text section
3902     -- to work around a limitation of the linker.
3903     -- ld64 is unable to handle the relocations for
3904     --     .quad L1 - L0
3905     -- if L0 is not preceded by a non-anonymous label in its section.
3906
3907             code = e_code `appOL` t_code `appOL` toOL [
3908                             ADD wordRep op (OpReg tableReg),
3909                             JMP_TBL (OpReg tableReg) [ id | Just id <- ids ],
3910                             LDATA Text (CmmDataLabel lbl : jumpTable)
3911                     ]
3912 #else
3913     -- HACK: On x86_64 binutils<2.17 is only able to generate PC32
3914     -- relocations, hence we only get 32-bit offsets in the jump
3915     -- table. As these offsets are always negative we need to properly
3916     -- sign extend them to 64-bit. This hack should be removed in
3917     -- conjunction with the hack in PprMach.hs/pprDataItem once
3918     -- binutils 2.17 is standard.
3919             code = e_code `appOL` t_code `appOL` toOL [
3920                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3921                             MOVSxL I32
3922                                    (OpAddr (AddrBaseIndex (EABaseReg tableReg)
3923                                                           (EAIndex reg wORD_SIZE) (ImmInt 0)))
3924                                    (OpReg reg),
3925                             ADD wordRep (OpReg reg) (OpReg tableReg),
3926                             JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
3927                    ]
3928 #endif
3929 #else
3930             code = e_code `appOL` t_code `appOL` toOL [
3931                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3932                             ADD wordRep op (OpReg tableReg),
3933                             JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
3934                     ]
3935 #endif
3936         return code
3937   | otherwise
3938   = do
3939         (reg,e_code) <- getSomeReg expr
3940         lbl <- getNewLabelNat
3941         let
3942             jumpTable = map jumpTableEntry ids
3943             op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
3944             code = e_code `appOL` toOL [
3945                     LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3946                     JMP_TBL op [ id | Just id <- ids ]
3947                  ]
3948         -- in
3949         return code
3950 #elif powerpc_TARGET_ARCH
3951 genSwitch expr ids
3952   | opt_PIC
3953   = do
3954         (reg,e_code) <- getSomeReg expr
3955         tmp <- getNewRegNat I32
3956         lbl <- getNewLabelNat
3957         dflags <- getDynFlagsNat
3958         dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
3959         (tableReg,t_code) <- getSomeReg $ dynRef
3960         let
3961             jumpTable = map jumpTableEntryRel ids
3962
3963             jumpTableEntryRel Nothing
3964                 = CmmStaticLit (CmmInt 0 wordRep)
3965             jumpTableEntryRel (Just (BlockId id))
3966                 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3967                 where blockLabel = mkAsmTempLabel id
3968
3969             code = e_code `appOL` t_code `appOL` toOL [
3970                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3971                             SLW tmp reg (RIImm (ImmInt 2)),
3972                             LD I32 tmp (AddrRegReg tableReg tmp),
3973                             ADD tmp tmp (RIReg tableReg),
3974                             MTCTR tmp,
3975                             BCTR [ id | Just id <- ids ]
3976                     ]
3977         return code
3978   | otherwise
3979   = do
3980         (reg,e_code) <- getSomeReg expr
3981         tmp <- getNewRegNat I32
3982         lbl <- getNewLabelNat
3983         let
3984             jumpTable = map jumpTableEntry ids
3985
3986             code = e_code `appOL` toOL [
3987                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3988                             SLW tmp reg (RIImm (ImmInt 2)),
3989                             ADDIS tmp tmp (HA (ImmCLbl lbl)),
3990                             LD I32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
3991                             MTCTR tmp,
3992                             BCTR [ id | Just id <- ids ]
3993                     ]
3994         return code
3995 #else
3996 genSwitch expr ids = panic "ToDo: genSwitch"
3997 #endif
3998
3999 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordRep)
4000 jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
4001     where blockLabel = mkAsmTempLabel id
4002
4003 -- -----------------------------------------------------------------------------
4004 -- Support bits
4005 -- -----------------------------------------------------------------------------
4006
4007
4008 -- -----------------------------------------------------------------------------
4009 -- 'condIntReg' and 'condFltReg': condition codes into registers
4010
4011 -- Turn those condition codes into integers now (when they appear on
4012 -- the right hand side of an assignment).
4013 --
4014 -- (If applicable) Do not fill the delay slots here; you will confuse the
4015 -- register allocator.
4016
4017 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
4018
4019 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4020
4021 #if alpha_TARGET_ARCH
4022 condIntReg = panic "MachCode.condIntReg (not on Alpha)"
4023 condFltReg = panic "MachCode.condFltReg (not on Alpha)"
4024 #endif /* alpha_TARGET_ARCH */
4025
4026 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4027
4028 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4029
4030 condIntReg cond x y = do
4031   CondCode _ cond cond_code <- condIntCode cond x y
4032   tmp <- getNewRegNat I8
4033   let
4034         code dst = cond_code `appOL` toOL [
4035                     SETCC cond (OpReg tmp),
4036                     MOVZxL I8 (OpReg tmp) (OpReg dst)
4037                   ]
4038   -- in
4039   return (Any I32 code)
4040
4041 #endif
4042
4043 #if i386_TARGET_ARCH
4044
4045 condFltReg cond x y = do
4046   CondCode _ cond cond_code <- condFltCode cond x y
4047   tmp <- getNewRegNat I8
4048   let
4049         code dst = cond_code `appOL` toOL [
4050                     SETCC cond (OpReg tmp),
4051                     MOVZxL I8 (OpReg tmp) (OpReg dst)
4052                   ]
4053   -- in
4054   return (Any I32 code)
4055
4056 #endif
4057
4058 #if x86_64_TARGET_ARCH
4059
4060 condFltReg cond x y = do
4061   CondCode _ cond cond_code <- condFltCode cond x y
4062   tmp1 <- getNewRegNat wordRep
4063   tmp2 <- getNewRegNat wordRep
4064   let
4065         -- We have to worry about unordered operands (eg. comparisons
4066         -- against NaN).  If the operands are unordered, the comparison
4067         -- sets the parity flag, carry flag and zero flag.
4068         -- All comparisons are supposed to return false for unordered
4069         -- operands except for !=, which returns true.
4070         --
4071         -- Optimisation: we don't have to test the parity flag if we
4072         -- know the test has already excluded the unordered case: eg >
4073         -- and >= test for a zero carry flag, which can only occur for
4074         -- ordered operands.
4075         --
4076         -- ToDo: by reversing comparisons we could avoid testing the
4077         -- parity flag in more cases.
4078
4079         code dst =
4080            cond_code `appOL`
4081              (case cond of
4082                 NE  -> or_unordered dst
4083                 GU  -> plain_test   dst
4084                 GEU -> plain_test   dst
4085                 _   -> and_ordered  dst)
4086
4087         plain_test dst = toOL [
4088                     SETCC cond (OpReg tmp1),
4089                     MOVZxL I8 (OpReg tmp1) (OpReg dst)
4090                  ]
4091         or_unordered dst = toOL [
4092                     SETCC cond (OpReg tmp1),
4093                     SETCC PARITY (OpReg tmp2),
4094                     OR I8 (OpReg tmp1) (OpReg tmp2),
4095                     MOVZxL I8 (OpReg tmp2) (OpReg dst)
4096                   ]
4097         and_ordered dst = toOL [
4098                     SETCC cond (OpReg tmp1),
4099                     SETCC NOTPARITY (OpReg tmp2),
4100                     AND I8 (OpReg tmp1) (OpReg tmp2),
4101                     MOVZxL I8 (OpReg tmp2) (OpReg dst)
4102                   ]
4103   -- in
4104   return (Any I32 code)
4105
4106 #endif
4107
4108 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4109
4110 #if sparc_TARGET_ARCH
4111
4112 condIntReg EQQ x (CmmLit (CmmInt 0 d)) = do
4113     (src, code) <- getSomeReg x
4114     tmp <- getNewRegNat I32
4115     let
4116         code__2 dst = code `appOL` toOL [
4117             SUB False True g0 (RIReg src) g0,
4118             SUB True False g0 (RIImm (ImmInt (-1))) dst]
4119     return (Any I32 code__2)
4120
4121 condIntReg EQQ x y = do
4122     (src1, code1) <- getSomeReg x
4123     (src2, code2) <- getSomeReg y
4124     tmp1 <- getNewRegNat I32
4125     tmp2 <- getNewRegNat I32
4126     let
4127         code__2 dst = code1 `appOL` code2 `appOL` toOL [
4128             XOR False src1 (RIReg src2) dst,
4129             SUB False True g0 (RIReg dst) g0,
4130             SUB True False g0 (RIImm (ImmInt (-1))) dst]
4131     return (Any I32 code__2)
4132
4133 condIntReg NE x (CmmLit (CmmInt 0 d)) = do
4134     (src, code) <- getSomeReg x
4135     tmp <- getNewRegNat I32
4136     let
4137         code__2 dst = code `appOL` toOL [
4138             SUB False True g0 (RIReg src) g0,
4139             ADD True False g0 (RIImm (ImmInt 0)) dst]
4140     return (Any I32 code__2)
4141
4142 condIntReg NE x y = do
4143     (src1, code1) <- getSomeReg x
4144     (src2, code2) <- getSomeReg y
4145     tmp1 <- getNewRegNat I32
4146     tmp2 <- getNewRegNat I32
4147     let
4148         code__2 dst = code1 `appOL` code2 `appOL` toOL [
4149             XOR False src1 (RIReg src2) dst,
4150             SUB False True g0 (RIReg dst) g0,
4151             ADD True False g0 (RIImm (ImmInt 0)) dst]
4152     return (Any I32 code__2)
4153
4154 condIntReg cond x y = do
4155     BlockId lbl1 <- getBlockIdNat
4156     BlockId lbl2 <- getBlockIdNat
4157     CondCode _ cond cond_code <- condIntCode cond x y
4158     let
4159         code__2 dst = cond_code `appOL` toOL [
4160             BI cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
4161             OR False g0 (RIImm (ImmInt 0)) dst,
4162             BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
4163             NEWBLOCK (BlockId lbl1),
4164             OR False g0 (RIImm (ImmInt 1)) dst,
4165             NEWBLOCK (BlockId lbl2)]
4166     return (Any I32 code__2)
4167
4168 condFltReg cond x y = do
4169     BlockId lbl1 <- getBlockIdNat
4170     BlockId lbl2 <- getBlockIdNat
4171     CondCode _ cond cond_code <- condFltCode cond x y
4172     let
4173         code__2 dst = cond_code `appOL` toOL [
4174             NOP,
4175             BF cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
4176             OR False g0 (RIImm (ImmInt 0)) dst,
4177             BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
4178             NEWBLOCK (BlockId lbl1),
4179             OR False g0 (RIImm (ImmInt 1)) dst,
4180             NEWBLOCK (BlockId lbl2)]
4181     return (Any I32 code__2)
4182
4183 #endif /* sparc_TARGET_ARCH */
4184
4185 #if powerpc_TARGET_ARCH
4186 condReg getCond = do
4187     lbl1 <- getBlockIdNat
4188     lbl2 <- getBlockIdNat
4189     CondCode _ cond cond_code <- getCond
4190     let
4191 {-        code dst = cond_code `appOL` toOL [
4192                 BCC cond lbl1,
4193                 LI dst (ImmInt 0),
4194                 BCC ALWAYS lbl2,
4195                 NEWBLOCK lbl1,
4196                 LI dst (ImmInt 1),
4197                 BCC ALWAYS lbl2,
4198                 NEWBLOCK lbl2
4199             ]-}
4200         code dst = cond_code
4201             `appOL` negate_code
4202             `appOL` toOL [
4203                 MFCR dst,
4204                 RLWINM dst dst (bit + 1) 31 31
4205             ]
4206
4207         negate_code | do_negate = unitOL (CRNOR bit bit bit)
4208                     | otherwise = nilOL
4209
4210         (bit, do_negate) = case cond of
4211             LTT -> (0, False)
4212             LE  -> (1, True)
4213             EQQ -> (2, False)
4214             GE  -> (0, True)
4215             GTT -> (1, False)
4216
4217             NE  -> (2, True)
4218
4219             LU  -> (0, False)
4220             LEU -> (1, True)
4221             GEU -> (0, True)
4222             GU  -> (1, False)
4223
4224     return (Any I32 code)
4225
4226 condIntReg cond x y = condReg (condIntCode cond x y)
4227 condFltReg cond x y = condReg (condFltCode cond x y)
4228 #endif /* powerpc_TARGET_ARCH */
4229
4230
4231 -- -----------------------------------------------------------------------------
4232 -- 'trivial*Code': deal with trivial instructions
4233
4234 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
4235 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
4236 -- Only look for constants on the right hand side, because that's
4237 -- where the generic optimizer will have put them.
4238
4239 -- Similarly, for unary instructions, we don't have to worry about
4240 -- matching an StInt as the argument, because genericOpt will already
4241 -- have handled the constant-folding.
4242
4243 trivialCode
4244     :: MachRep
4245     -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
4246       ,IF_ARCH_i386 ((Operand -> Operand -> Instr)
4247                      -> Maybe (Operand -> Operand -> Instr)
4248       ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr)
4249                      -> Maybe (Operand -> Operand -> Instr)
4250       ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
4251       ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
4252       ,)))))
4253     -> CmmExpr -> CmmExpr -- the two arguments
4254     -> NatM Register
4255
4256 #ifndef powerpc_TARGET_ARCH
4257 trivialFCode
4258     :: MachRep
4259     -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
4260       ,IF_ARCH_sparc((MachRep -> Reg -> Reg -> Reg -> Instr)
4261       ,IF_ARCH_i386 ((MachRep -> Reg -> Reg -> Reg -> Instr)
4262       ,IF_ARCH_x86_64 ((MachRep -> Operand -> Operand -> Instr)
4263       ,))))
4264     -> CmmExpr -> CmmExpr -- the two arguments
4265     -> NatM Register
4266 #endif
4267
4268 trivialUCode
4269     :: MachRep
4270     -> IF_ARCH_alpha((RI -> Reg -> Instr)
4271       ,IF_ARCH_i386 ((Operand -> Instr)
4272       ,IF_ARCH_x86_64 ((Operand -> Instr)
4273       ,IF_ARCH_sparc((RI -> Reg -> Instr)
4274       ,IF_ARCH_powerpc((Reg -> Reg -> Instr)
4275       ,)))))
4276     -> CmmExpr  -- the one argument
4277     -> NatM Register
4278
4279 #ifndef powerpc_TARGET_ARCH
4280 trivialUFCode
4281     :: MachRep
4282     -> IF_ARCH_alpha((Reg -> Reg -> Instr)
4283       ,IF_ARCH_i386 ((Reg -> Reg -> Instr)
4284       ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr)
4285       ,IF_ARCH_sparc((Reg -> Reg -> Instr)
4286       ,))))
4287     -> CmmExpr -- the one argument
4288     -> NatM Register
4289 #endif
4290
4291 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4292
4293 #if alpha_TARGET_ARCH
4294
4295 trivialCode instr x (StInt y)
4296   | fits8Bits y
4297   = getRegister x               `thenNat` \ register ->
4298     getNewRegNat IntRep         `thenNat` \ tmp ->
4299     let
4300         code = registerCode register tmp
4301         src1 = registerName register tmp
4302         src2 = ImmInt (fromInteger y)
4303         code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst)
4304     in
4305     return (Any IntRep code__2)
4306
4307 trivialCode instr x y
4308   = getRegister x               `thenNat` \ register1 ->
4309     getRegister y               `thenNat` \ register2 ->
4310     getNewRegNat IntRep         `thenNat` \ tmp1 ->
4311     getNewRegNat IntRep         `thenNat` \ tmp2 ->
4312     let
4313         code1 = registerCode register1 tmp1 []
4314         src1  = registerName register1 tmp1
4315         code2 = registerCode register2 tmp2 []
4316         src2  = registerName register2 tmp2
4317         code__2 dst = asmSeqThen [code1, code2] .
4318                      mkSeqInstr (instr src1 (RIReg src2) dst)
4319     in
4320     return (Any IntRep code__2)
4321
4322 ------------
4323 trivialUCode instr x
4324   = getRegister x               `thenNat` \ register ->
4325     getNewRegNat IntRep         `thenNat` \ tmp ->
4326     let
4327         code = registerCode register tmp
4328         src  = registerName register tmp
4329         code__2 dst = code . mkSeqInstr (instr (RIReg src) dst)
4330     in
4331     return (Any IntRep code__2)
4332
4333 ------------
4334 trivialFCode _ instr x y
4335   = getRegister x               `thenNat` \ register1 ->
4336     getRegister y               `thenNat` \ register2 ->
4337     getNewRegNat F64    `thenNat` \ tmp1 ->
4338     getNewRegNat F64    `thenNat` \ tmp2 ->
4339     let
4340         code1 = registerCode register1 tmp1
4341         src1  = registerName register1 tmp1
4342
4343         code2 = registerCode register2 tmp2
4344         src2  = registerName register2 tmp2
4345
4346         code__2 dst = asmSeqThen [code1 [], code2 []] .
4347                       mkSeqInstr (instr src1 src2 dst)
4348     in
4349     return (Any F64 code__2)
4350
4351 trivialUFCode _ instr x
4352   = getRegister x               `thenNat` \ register ->
4353     getNewRegNat F64    `thenNat` \ tmp ->
4354     let
4355         code = registerCode register tmp
4356         src  = registerName register tmp
4357         code__2 dst = code . mkSeqInstr (instr src dst)
4358     in
4359     return (Any F64 code__2)
4360
4361 #endif /* alpha_TARGET_ARCH */
4362
4363 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4364
4365 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4366
4367 {-
4368 The Rules of the Game are:
4369
4370 * You cannot assume anything about the destination register dst;
4371   it may be anything, including a fixed reg.
4372
4373 * You may compute an operand into a fixed reg, but you may not
4374   subsequently change the contents of that fixed reg.  If you
4375   want to do so, first copy the value either to a temporary
4376   or into dst.  You are free to modify dst even if it happens
4377   to be a fixed reg -- that's not your problem.
4378
4379 * You cannot assume that a fixed reg will stay live over an
4380   arbitrary computation.  The same applies to the dst reg.
4381
4382 * Temporary regs obtained from getNewRegNat are distinct from
4383   each other and from all other regs, and stay live over
4384   arbitrary computations.
4385
4386 --------------------
4387
4388 SDM's version of The Rules:
4389
4390 * If getRegister returns Any, that means it can generate correct
4391   code which places the result in any register, period.  Even if that
4392   register happens to be read during the computation.
4393
4394   Corollary #1: this means that if you are generating code for an
4395   operation with two arbitrary operands, you cannot assign the result
4396   of the first operand into the destination register before computing
4397   the second operand.  The second operand might require the old value
4398   of the destination register.
4399
4400   Corollary #2: A function might be able to generate more efficient
4401   code if it knows the destination register is a new temporary (and
4402   therefore not read by any of the sub-computations).
4403
4404 * If getRegister returns Any, then the code it generates may modify only:
4405         (a) fresh temporaries
4406         (b) the destination register
4407         (c) known registers (eg. %ecx is used by shifts)
4408   In particular, it may *not* modify global registers, unless the global
4409   register happens to be the destination register.
4410 -}
4411
4412 trivialCode rep instr (Just revinstr) (CmmLit lit_a) b
4413   | not (is64BitLit lit_a) = do
4414   b_code <- getAnyReg b
4415   let
4416        code dst
4417          = b_code dst `snocOL`
4418            revinstr (OpImm (litToImm lit_a)) (OpReg dst)
4419   -- in
4420   return (Any rep code)
4421
4422 trivialCode rep instr maybe_revinstr a b = genTrivialCode rep instr a b
4423
4424 -- This is re-used for floating pt instructions too.
4425 genTrivialCode rep instr a b = do
4426   (b_op, b_code) <- getNonClobberedOperand b
4427   a_code <- getAnyReg a
4428   tmp <- getNewRegNat rep
4429   let
4430      -- We want the value of b to stay alive across the computation of a.
4431      -- But, we want to calculate a straight into the destination register,
4432      -- because the instruction only has two operands (dst := dst `op` src).
4433      -- The troublesome case is when the result of b is in the same register
4434      -- as the destination reg.  In this case, we have to save b in a
4435      -- new temporary across the computation of a.
4436      code dst
4437         | dst `regClashesWithOp` b_op =
4438                 b_code `appOL`
4439                 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
4440                 a_code dst `snocOL`
4441                 instr (OpReg tmp) (OpReg dst)
4442         | otherwise =
4443                 b_code `appOL`
4444                 a_code dst `snocOL`
4445                 instr b_op (OpReg dst)
4446   -- in
4447   return (Any rep code)
4448
4449 reg `regClashesWithOp` OpReg reg2   = reg == reg2
4450 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
4451 reg `regClashesWithOp` _            = False
4452
4453 -----------
4454
4455 trivialUCode rep instr x = do
4456   x_code <- getAnyReg x
4457   let
4458      code dst =
4459         x_code dst `snocOL`
4460         instr (OpReg dst)
4461   -- in
4462   return (Any rep code)
4463
4464 -----------
4465
4466 #if i386_TARGET_ARCH
4467
4468 trivialFCode pk instr x y = do
4469   (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
4470   (y_reg, y_code) <- getSomeReg y
4471   let
4472      code dst =
4473         x_code `appOL`
4474         y_code `snocOL`
4475         instr pk x_reg y_reg dst
4476   -- in
4477   return (Any pk code)
4478
4479 #endif
4480
4481 #if x86_64_TARGET_ARCH
4482
4483 trivialFCode pk instr x y = genTrivialCode  pk (instr pk) x y
4484
4485 #endif
4486
4487 -------------
4488
4489 trivialUFCode rep instr x = do
4490   (x_reg, x_code) <- getSomeReg x
4491   let
4492      code dst =
4493         x_code `snocOL`
4494         instr x_reg dst
4495   -- in
4496   return (Any rep code)
4497
4498 #endif /* i386_TARGET_ARCH */
4499
4500 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4501
4502 #if sparc_TARGET_ARCH
4503
4504 trivialCode pk instr x (CmmLit (CmmInt y d))
4505   | fits13Bits y
4506   = do
4507       (src1, code) <- getSomeReg x
4508       tmp <- getNewRegNat I32
4509       let
4510         src2 = ImmInt (fromInteger y)
4511         code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
4512       return (Any I32 code__2)
4513
4514 trivialCode pk instr x y = do
4515     (src1, code1) <- getSomeReg x
4516     (src2, code2) <- getSomeReg y
4517     tmp1 <- getNewRegNat I32
4518     tmp2 <- getNewRegNat I32
4519     let
4520         code__2 dst = code1 `appOL` code2 `snocOL`
4521                       instr src1 (RIReg src2) dst
4522     return (Any I32 code__2)
4523
4524 ------------
4525 trivialFCode pk instr x y = do
4526     (src1, code1) <- getSomeReg x
4527     (src2, code2) <- getSomeReg y
4528     tmp1 <- getNewRegNat (cmmExprRep x)
4529     tmp2 <- getNewRegNat (cmmExprRep y)
4530     tmp <- getNewRegNat F64
4531     let
4532         promote x = FxTOy F32 F64 x tmp
4533
4534         pk1   = cmmExprRep x
4535         pk2   = cmmExprRep y
4536
4537         code__2 dst =
4538                 if pk1 == pk2 then
4539                     code1 `appOL` code2 `snocOL`
4540                     instr pk src1 src2 dst
4541                 else if pk1 == F32 then
4542                     code1 `snocOL` promote src1 `appOL` code2 `snocOL`
4543                     instr F64 tmp src2 dst
4544                 else
4545                     code1 `appOL` code2 `snocOL` promote src2 `snocOL`
4546                     instr F64 src1 tmp dst
4547     return (Any (if pk1 == pk2 then pk1 else F64) code__2)
4548
4549 ------------
4550 trivialUCode pk instr x = do
4551     (src, code) <- getSomeReg x
4552     tmp <- getNewRegNat pk
4553     let
4554         code__2 dst = code `snocOL` instr (RIReg src) dst
4555     return (Any pk code__2)
4556
4557 -------------
4558 trivialUFCode pk instr x = do
4559     (src, code) <- getSomeReg x
4560     tmp <- getNewRegNat pk
4561     let
4562         code__2 dst = code `snocOL` instr src dst
4563     return (Any pk code__2)
4564
4565 #endif /* sparc_TARGET_ARCH */
4566
4567 #if powerpc_TARGET_ARCH
4568
4569 {-
4570 Wolfgang's PowerPC version of The Rules:
4571
4572 A slightly modified version of The Rules to take advantage of the fact
4573 that PowerPC instructions work on all registers and don't implicitly
4574 clobber any fixed registers.
4575
4576 * The only expression for which getRegister returns Fixed is (CmmReg reg).
4577
4578 * If getRegister returns Any, then the code it generates may modify only:
4579         (a) fresh temporaries
4580         (b) the destination register
4581   It may *not* modify global registers, unless the global
4582   register happens to be the destination register.
4583   It may not clobber any other registers. In fact, only ccalls clobber any
4584   fixed registers.
4585   Also, it may not modify the counter register (used by genCCall).
4586
4587   Corollary: If a getRegister for a subexpression returns Fixed, you need
4588   not move it to a fresh temporary before evaluating the next subexpression.
4589   The Fixed register won't be modified.
4590   Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
4591
4592 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
4593   the value of the destination register.
4594 -}
4595
4596 trivialCode rep signed instr x (CmmLit (CmmInt y _))
4597     | Just imm <- makeImmediate rep signed y
4598     = do
4599         (src1, code1) <- getSomeReg x
4600         let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
4601         return (Any rep code)
4602
4603 trivialCode rep signed instr x y = do
4604     (src1, code1) <- getSomeReg x
4605     (src2, code2) <- getSomeReg y
4606     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
4607     return (Any rep code)
4608
4609 trivialCodeNoImm :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4610     -> CmmExpr -> CmmExpr -> NatM Register
4611 trivialCodeNoImm rep instr x y = do
4612     (src1, code1) <- getSomeReg x
4613     (src2, code2) <- getSomeReg y
4614     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
4615     return (Any rep code)
4616
4617 trivialUCode rep instr x = do
4618     (src, code) <- getSomeReg x
4619     let code' dst = code `snocOL` instr dst src
4620     return (Any rep code')
4621
4622 -- There is no "remainder" instruction on the PPC, so we have to do
4623 -- it the hard way.
4624 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
4625
4626 remainderCode :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4627     -> CmmExpr -> CmmExpr -> NatM Register
4628 remainderCode rep div x y = do
4629     (src1, code1) <- getSomeReg x
4630     (src2, code2) <- getSomeReg y
4631     let code dst = code1 `appOL` code2 `appOL` toOL [
4632                 div dst src1 src2,
4633                 MULLW dst dst (RIReg src2),
4634                 SUBF dst dst src1
4635             ]
4636     return (Any rep code)
4637
4638 #endif /* powerpc_TARGET_ARCH */
4639
4640
4641 -- -----------------------------------------------------------------------------
4642 --  Coercing to/from integer/floating-point...
4643
4644 -- When going to integer, we truncate (round towards 0).
4645
4646 -- @coerce(Int2FP|FP2Int)@ are more complicated integer/float
4647 -- conversions.  We have to store temporaries in memory to move
4648 -- between the integer and the floating point register sets.
4649
4650 -- @coerceDbl2Flt@ and @coerceFlt2Dbl@ are done this way because we
4651 -- pretend, on sparc at least, that double and float regs are seperate
4652 -- kinds, so the value has to be computed into one kind before being
4653 -- explicitly "converted" to live in the other kind.
4654
4655 coerceInt2FP :: MachRep -> MachRep -> CmmExpr -> NatM Register
4656 coerceFP2Int :: MachRep -> MachRep -> CmmExpr -> NatM Register
4657
4658 #if sparc_TARGET_ARCH
4659 coerceDbl2Flt :: CmmExpr -> NatM Register
4660 coerceFlt2Dbl :: CmmExpr -> NatM Register
4661 #endif
4662
4663 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4664
4665 #if alpha_TARGET_ARCH
4666
4667 coerceInt2FP _ x
4668   = getRegister x               `thenNat` \ register ->
4669     getNewRegNat IntRep         `thenNat` \ reg ->
4670     let
4671         code = registerCode register reg
4672         src  = registerName register reg
4673
4674         code__2 dst = code . mkSeqInstrs [
4675             ST Q src (spRel 0),
4676             LD TF dst (spRel 0),
4677             CVTxy Q TF dst dst]
4678     in
4679     return (Any F64 code__2)
4680
4681 -------------
4682 coerceFP2Int x
4683   = getRegister x               `thenNat` \ register ->
4684     getNewRegNat F64    `thenNat` \ tmp ->
4685     let
4686         code = registerCode register tmp
4687         src  = registerName register tmp
4688
4689         code__2 dst = code . mkSeqInstrs [
4690             CVTxy TF Q src tmp,
4691             ST TF tmp (spRel 0),
4692             LD Q dst (spRel 0)]
4693     in
4694     return (Any IntRep code__2)
4695
4696 #endif /* alpha_TARGET_ARCH */
4697
4698 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4699
4700 #if i386_TARGET_ARCH
4701
4702 coerceInt2FP from to x = do
4703   (x_reg, x_code) <- getSomeReg x
4704   let
4705         opc  = case to of F32 -> GITOF; F64 -> GITOD
4706         code dst = x_code `snocOL` opc x_reg dst
4707         -- ToDo: works for non-I32 reps?
4708   -- in
4709   return (Any to code)
4710
4711 ------------
4712
4713 coerceFP2Int from to x = do
4714   (x_reg, x_code) <- getSomeReg x
4715   let
4716         opc  = case from of F32 -> GFTOI; F64 -> GDTOI
4717         code dst = x_code `snocOL` opc x_reg dst
4718         -- ToDo: works for non-I32 reps?
4719   -- in
4720   return (Any to code)
4721
4722 #endif /* i386_TARGET_ARCH */
4723
4724 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4725
4726 #if x86_64_TARGET_ARCH
4727
4728 coerceFP2Int from to x = do
4729   (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
4730   let
4731         opc  = case from of F32 -> CVTTSS2SIQ; F64 -> CVTTSD2SIQ
4732         code dst = x_code `snocOL` opc x_op dst
4733   -- in
4734   return (Any to code) -- works even if the destination rep is <I32
4735
4736 coerceInt2FP from to x = do
4737   (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
4738   let
4739         opc  = case to of F32 -> CVTSI2SS; F64 -> CVTSI2SD
4740         code dst = x_code `snocOL` opc x_op dst
4741   -- in
4742   return (Any to code) -- works even if the destination rep is <I32
4743
4744 coerceFP2FP :: MachRep -> CmmExpr -> NatM Register
4745 coerceFP2FP to x = do
4746   (x_reg, x_code) <- getSomeReg x
4747   let
4748         opc  = case to of F32 -> CVTSD2SS; F64 -> CVTSS2SD
4749         code dst = x_code `snocOL` opc x_reg dst
4750   -- in
4751   return (Any to code)
4752
4753 #endif
4754
4755 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4756
4757 #if sparc_TARGET_ARCH
4758
4759 coerceInt2FP pk1 pk2 x = do
4760     (src, code) <- getSomeReg x
4761     let
4762         code__2 dst = code `appOL` toOL [
4763             ST pk1 src (spRel (-2)),
4764             LD pk1 (spRel (-2)) dst,
4765             FxTOy pk1 pk2 dst dst]
4766     return (Any pk2 code__2)
4767
4768 ------------
4769 coerceFP2Int pk fprep x = do
4770     (src, code) <- getSomeReg x
4771     reg <- getNewRegNat fprep
4772     tmp <- getNewRegNat pk
4773     let
4774         code__2 dst = ASSERT(fprep == F64 || fprep == F32)
4775             code `appOL` toOL [
4776             FxTOy fprep pk src tmp,
4777             ST pk tmp (spRel (-2)),
4778             LD pk (spRel (-2)) dst]
4779     return (Any pk code__2)
4780
4781 ------------
4782 coerceDbl2Flt x = do
4783     (src, code) <- getSomeReg x
4784     return (Any F32 (\dst -> code `snocOL` FxTOy F64 F32 src dst))
4785
4786 ------------
4787 coerceFlt2Dbl x = do
4788     (src, code) <- getSomeReg x
4789     return (Any F64 (\dst -> code `snocOL` FxTOy F32 F64 src dst))
4790
4791 #endif /* sparc_TARGET_ARCH */
4792
4793 #if powerpc_TARGET_ARCH
4794 coerceInt2FP fromRep toRep x = do
4795     (src, code) <- getSomeReg x
4796     lbl <- getNewLabelNat
4797     itmp <- getNewRegNat I32
4798     ftmp <- getNewRegNat F64
4799     dflags <- getDynFlagsNat
4800     dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
4801     Amode addr addr_code <- getAmode dynRef
4802     let
4803         code' dst = code `appOL` maybe_exts `appOL` toOL [
4804                 LDATA ReadOnlyData
4805                                 [CmmDataLabel lbl,
4806                                  CmmStaticLit (CmmInt 0x43300000 I32),
4807                                  CmmStaticLit (CmmInt 0x80000000 I32)],
4808                 XORIS itmp src (ImmInt 0x8000),
4809                 ST I32 itmp (spRel 3),
4810                 LIS itmp (ImmInt 0x4330),
4811                 ST I32 itmp (spRel 2),
4812                 LD F64 ftmp (spRel 2)
4813             ] `appOL` addr_code `appOL` toOL [
4814                 LD F64 dst addr,
4815                 FSUB F64 dst ftmp dst
4816             ] `appOL` maybe_frsp dst
4817
4818         maybe_exts = case fromRep of
4819                         I8 ->  unitOL $ EXTS I8 src src
4820                         I16 -> unitOL $ EXTS I16 src src
4821                         I32 -> nilOL
4822         maybe_frsp dst = case toRep of
4823                         F32 -> unitOL $ FRSP dst dst
4824                         F64 -> nilOL
4825     return (Any toRep code')
4826
4827 coerceFP2Int fromRep toRep x = do
4828     -- the reps don't really matter: F*->F64 and I32->I* are no-ops
4829     (src, code) <- getSomeReg x
4830     tmp <- getNewRegNat F64
4831     let
4832         code' dst = code `appOL` toOL [
4833                 -- convert to int in FP reg
4834             FCTIWZ tmp src,
4835                 -- store value (64bit) from FP to stack
4836             ST F64 tmp (spRel 2),
4837                 -- read low word of value (high word is undefined)
4838             LD I32 dst (spRel 3)]
4839     return (Any toRep code')
4840 #endif /* powerpc_TARGET_ARCH */
4841
4842
4843 -- -----------------------------------------------------------------------------
4844 -- eXTRA_STK_ARGS_HERE
4845
4846 -- We (allegedly) put the first six C-call arguments in registers;
4847 -- where do we start putting the rest of them?
4848
4849 -- Moved from MachInstrs (SDM):
4850
4851 #if alpha_TARGET_ARCH || sparc_TARGET_ARCH
4852 eXTRA_STK_ARGS_HERE :: Int
4853 eXTRA_STK_ARGS_HERE
4854   = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???))
4855 #endif
4856