ghc/compiler/nativeGen/MachCodeGen.hs

   1 -----------------------------------------------------------------------------
   2 --
   3 -- Generating machine code (instruction selection)
   4 --
   5 -- (c) The University of Glasgow 1996-2004
   6 --
   7 -----------------------------------------------------------------------------
   8
   9 -- This is a big module, but, if you pay attention to
  10 -- (a) the sectioning, (b) the type signatures, and
  11 -- (c) the #if blah_TARGET_ARCH} things, the
  12 -- structure should not be too overwhelming.
  13
  14 module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where
  15
  16 #include "HsVersions.h"
  17 #include "nativeGen/NCG.h"
  18 #include "MachDeps.h"
  19
  20 -- NCG stuff:
  21 import MachInstrs
  22 import MachRegs
  23 import NCGMonad
  24 import PositionIndependentCode ( cmmMakeDynamicReference, initializePicBase )
  25
  26 -- Our intermediate code:
  27 import PprCmm           ( pprExpr )
  28 import Cmm
  29 import MachOp
  30 import CLabel
  31
  32 -- The rest:
  33 import StaticFlags      ( opt_PIC )
  34 import ForeignCall      ( CCallConv(..) )
  35 import OrdList
  36 import Pretty
  37 import Outputable
  38 import qualified Outputable
  39 import FastString
  40 import FastTypes        ( isFastTrue )
  41 import Constants        ( wORD_SIZE )
  42
  43 #ifdef DEBUG
  44 import Outputable       ( assertPanic )
  45 import TRACE            ( trace )
  46 #endif
  47
  48 import Control.Monad    ( mapAndUnzipM )
  49 import Maybe            ( fromJust )
  50 import DATA_BITS
  51 import DATA_WORD
  52
  53 -- -----------------------------------------------------------------------------
  54 -- Top-level of the instruction selector
  55
  56 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
  57 -- They are really trees of insns to facilitate fast appending, where a
  58 -- left-to-right traversal (pre-order?) yields the insns in the correct
  59 -- order.
  60
  61 type InstrBlock = OrdList Instr
  62
  63 cmmTopCodeGen :: CmmTop -> NatM [NatCmmTop]
  64 cmmTopCodeGen (CmmProc info lab params blocks) = do
  65   (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
  66   picBaseMb <- getPicBaseMaybeNat
  67   let proc = CmmProc info lab params (concat nat_blocks)
  68       tops = proc : concat statics
  69   case picBaseMb of
  70       Just picBase -> initializePicBase picBase tops
  71       Nothing -> return tops
  72
  73 cmmTopCodeGen (CmmData sec dat) = do
  74   return [CmmData sec dat]  -- no translation, we just use CmmStatic
  75
  76 basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop])
  77 basicBlockCodeGen (BasicBlock id stmts) = do
  78   instrs <- stmtsToInstrs stmts
  79   -- code generation may introduce new basic block boundaries, which
  80   -- are indicated by the NEWBLOCK instruction.  We must split up the
  81   -- instruction stream into basic blocks again.  Also, we extract
  82   -- LDATAs here too.
  83   let
  84         (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
  85
  86         mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
  87           = ([], BasicBlock id instrs : blocks, statics)
  88         mkBlocks (LDATA sec dat) (instrs,blocks,statics)
  89           = (instrs, blocks, CmmData sec dat:statics)
  90         mkBlocks instr (instrs,blocks,statics)
  91           = (instr:instrs, blocks, statics)
  92   -- in
  93   return (BasicBlock id top : other_blocks, statics)
  94
  95 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
  96 stmtsToInstrs stmts
  97    = do instrss <- mapM stmtToInstrs stmts
  98         return (concatOL instrss)
  99
 100 stmtToInstrs :: CmmStmt -> NatM InstrBlock
 101 stmtToInstrs stmt = case stmt of
 102     CmmNop         -> return nilOL
 103     CmmComment s   -> return (unitOL (COMMENT s))
 104
 105     CmmAssign reg src
 106       | isFloatingRep kind -> assignReg_FltCode kind reg src
 107 #if WORD_SIZE_IN_BITS==32
 108       | kind == I64        -> assignReg_I64Code      reg src
 109 #endif
 110       | otherwise          -> assignReg_IntCode kind reg src
 111         where kind = cmmRegRep reg
 112
 113     CmmStore addr src
 114       | isFloatingRep kind -> assignMem_FltCode kind addr src
 115 #if WORD_SIZE_IN_BITS==32
 116       | kind == I64      -> assignMem_I64Code      addr src
 117 #endif
 118       | otherwise        -> assignMem_IntCode kind addr src
 119         where kind = cmmExprRep src
 120
 121     CmmCall target result_regs args vols
 122        -> genCCall target result_regs args vols
 123
 124     CmmBranch id          -> genBranch id
 125     CmmCondBranch arg id  -> genCondJump id arg
 126     CmmSwitch arg ids     -> genSwitch arg ids
 127     CmmJump arg params    -> genJump arg
 128
 129 -- -----------------------------------------------------------------------------
 130 -- General things for putting together code sequences
 131
 132 -- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
 133 -- CmmExprs into CmmRegOff?
 134 mangleIndexTree :: CmmExpr -> CmmExpr
 135 mangleIndexTree (CmmRegOff reg off)
 136   = CmmMachOp (MO_Add rep) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) rep)]
 137   where rep = cmmRegRep reg
 138
 139 -- -----------------------------------------------------------------------------
 140 --  Code gen for 64-bit arithmetic on 32-bit platforms
 141
 142 {-
 143 Simple support for generating 64-bit code (ie, 64 bit values and 64
 144 bit assignments) on 32-bit platforms.  Unlike the main code generator
 145 we merely shoot for generating working code as simply as possible, and
 146 pay little attention to code quality.  Specifically, there is no
 147 attempt to deal cleverly with the fixed-vs-floating register
 148 distinction; all values are generated into (pairs of) floating
 149 registers, even if this would mean some redundant reg-reg moves as a
 150 result.  Only one of the VRegUniques is returned, since it will be
 151 of the VRegUniqueLo form, and the upper-half VReg can be determined
 152 by applying getHiVRegFromLo to it.
 153 -}
 154
 155 data ChildCode64        -- a.k.a "Register64"
 156    = ChildCode64
 157         InstrBlock      -- code
 158         Reg             -- the lower 32-bit temporary which contains the
 159                         -- result; use getHiVRegFromLo to find the other
 160                         -- VRegUnique.  Rules of this simplified insn
 161                         -- selection game are therefore that the returned
 162                         -- Reg may be modified
 163
 164 #if WORD_SIZE_IN_BITS==32
 165 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
 166 assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
 167 #endif
 168
 169 #ifndef x86_64_TARGET_ARCH
 170 iselExpr64        :: CmmExpr -> NatM ChildCode64
 171 #endif
 172
 173 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 174
 175 #if i386_TARGET_ARCH
 176
 177 assignMem_I64Code addrTree valueTree = do
 178   Amode addr addr_code <- getAmode addrTree
 179   ChildCode64 vcode rlo <- iselExpr64 valueTree
 180   let
 181         rhi = getHiVRegFromLo rlo
 182
 183         -- Little-endian store
 184         mov_lo = MOV I32 (OpReg rlo) (OpAddr addr)
 185         mov_hi = MOV I32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
 186   -- in
 187   return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 188
 189
 190 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
 191    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 192    let
 193          r_dst_lo = mkVReg u_dst I32
 194          r_dst_hi = getHiVRegFromLo r_dst_lo
 195          r_src_hi = getHiVRegFromLo r_src_lo
 196          mov_lo = MOV I32 (OpReg r_src_lo) (OpReg r_dst_lo)
 197          mov_hi = MOV I32 (OpReg r_src_hi) (OpReg r_dst_hi)
 198    -- in
 199    return (
 200         vcode `snocOL` mov_lo `snocOL` mov_hi
 201      )
 202
 203 assignReg_I64Code lvalue valueTree
 204    = panic "assignReg_I64Code(i386): invalid lvalue"
 205
 206 ------------
 207
 208 iselExpr64 (CmmLit (CmmInt i _)) = do
 209   (rlo,rhi) <- getNewRegPairNat I32
 210   let
 211         r = fromIntegral (fromIntegral i :: Word32)
 212         q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
 213         code = toOL [
 214                 MOV I32 (OpImm (ImmInteger r)) (OpReg rlo),
 215                 MOV I32 (OpImm (ImmInteger q)) (OpReg rhi)
 216                 ]
 217   -- in
 218   return (ChildCode64 code rlo)
 219
 220 iselExpr64 (CmmLoad addrTree I64) = do
 221    Amode addr addr_code <- getAmode addrTree
 222    (rlo,rhi) <- getNewRegPairNat I32
 223    let
 224         mov_lo = MOV I32 (OpAddr addr) (OpReg rlo)
 225         mov_hi = MOV I32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
 226    -- in
 227    return (
 228             ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
 229                         rlo
 230      )
 231
 232 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64)))
 233    = return (ChildCode64 nilOL (mkVReg vu I32))
 234
 235 -- we handle addition, but rather badly
 236 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
 237    ChildCode64 code1 r1lo <- iselExpr64 e1
 238    (rlo,rhi) <- getNewRegPairNat I32
 239    let
 240         r = fromIntegral (fromIntegral i :: Word32)
 241         q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
 242         r1hi = getHiVRegFromLo r1lo
 243         code =  code1 `appOL`
 244                 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
 245                        ADD I32 (OpImm (ImmInteger r)) (OpReg rlo),
 246                        MOV I32 (OpReg r1hi) (OpReg rhi),
 247                        ADC I32 (OpImm (ImmInteger q)) (OpReg rhi) ]
 248    -- in
 249    return (ChildCode64 code rlo)
 250
 251 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
 252    ChildCode64 code1 r1lo <- iselExpr64 e1
 253    ChildCode64 code2 r2lo <- iselExpr64 e2
 254    (rlo,rhi) <- getNewRegPairNat I32
 255    let
 256         r1hi = getHiVRegFromLo r1lo
 257         r2hi = getHiVRegFromLo r2lo
 258         code =  code1 `appOL`
 259                 code2 `appOL`
 260                 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
 261                        ADD I32 (OpReg r2lo) (OpReg rlo),
 262                        MOV I32 (OpReg r1hi) (OpReg rhi),
 263                        ADC I32 (OpReg r2hi) (OpReg rhi) ]
 264    -- in
 265    return (ChildCode64 code rlo)
 266
 267 iselExpr64 expr
 268    = pprPanic "iselExpr64(i386)" (ppr expr)
 269
 270 #endif /* i386_TARGET_ARCH */
 271
 272 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 273
 274 #if sparc_TARGET_ARCH
 275
 276 assignMem_I64Code addrTree valueTree
 277    = iselExpr64 valueTree               `thenNat` \ (ChildCode64 vcode vrlo) ->
 278      getRegister addrTree               `thenNat` \ register_addr ->
 279      getNewRegNat IntRep                `thenNat` \ t_addr ->
 280      let rlo = VirtualRegI vrlo
 281          rhi = getHiVRegFromLo rlo
 282          code_addr = registerCode register_addr t_addr
 283          reg_addr  = registerName register_addr t_addr
 284          -- Big-endian store
 285          mov_hi = ST W rhi (AddrRegImm reg_addr (ImmInt 0))
 286          mov_lo = ST W rlo (AddrRegImm reg_addr (ImmInt 4))
 287      in
 288          return (vcode `appOL` code_addr `snocOL` mov_hi `snocOL` mov_lo)
 289
 290
 291 assignReg_I64Code (StixTemp (StixVReg u_dst pk)) valueTree
 292    = iselExpr64 valueTree               `thenNat` \ (ChildCode64 vcode vr_src_lo) ->
 293      let
 294          r_dst_lo = mkVReg u_dst IntRep
 295          r_src_lo = VirtualRegI vr_src_lo
 296          r_dst_hi = getHiVRegFromLo r_dst_lo
 297          r_src_hi = getHiVRegFromLo r_src_lo
 298          mov_lo = mkMOV r_src_lo r_dst_lo
 299          mov_hi = mkMOV r_src_hi r_dst_hi
 300          mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
 301      in
 302          return (
 303             vcode `snocOL` mov_hi `snocOL` mov_lo
 304          )
 305 assignReg_I64Code lvalue valueTree
 306    = pprPanic "assignReg_I64Code(sparc): invalid lvalue"
 307               (pprStixReg lvalue)
 308
 309
 310 -- Don't delete this -- it's very handy for debugging.
 311 --iselExpr64 expr
 312 --   | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
 313 --   = panic "iselExpr64(???)"
 314
 315 iselExpr64 (CmmLoad I64 addrTree)
 316    = getRegister addrTree               `thenNat` \ register_addr ->
 317      getNewRegNat IntRep                `thenNat` \ t_addr ->
 318      getNewRegNat IntRep                `thenNat` \ rlo ->
 319      let rhi = getHiVRegFromLo rlo
 320          code_addr = registerCode register_addr t_addr
 321          reg_addr  = registerName register_addr t_addr
 322          mov_hi = LD W (AddrRegImm reg_addr (ImmInt 0)) rhi
 323          mov_lo = LD W (AddrRegImm reg_addr (ImmInt 4)) rlo
 324      in
 325          return (
 326             ChildCode64 (code_addr `snocOL` mov_hi `snocOL` mov_lo)
 327                         (getVRegUnique rlo)
 328          )
 329
 330 iselExpr64 (CmmReg (CmmLocal (LocalReg uq I64)))
 331    = getNewRegNat IntRep                `thenNat` \ r_dst_lo ->
 332      let r_dst_hi = getHiVRegFromLo r_dst_lo
 333          r_src_lo = mkVReg vu IntRep
 334          r_src_hi = getHiVRegFromLo r_src_lo
 335          mov_lo = mkMOV r_src_lo r_dst_lo
 336          mov_hi = mkMOV r_src_hi r_dst_hi
 337          mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
 338      in
 339          return (
 340             ChildCode64 (toOL [mov_hi, mov_lo]) (getVRegUnique r_dst_lo)
 341          )
 342
 343 iselExpr64 (StCall fn cconv I64 args)
 344   = genCCall fn cconv kind args                 `thenNat` \ call ->
 345     getNewRegNat IntRep                         `thenNat` \ r_dst_lo ->
 346     let r_dst_hi = getHiVRegFromLo r_dst_lo
 347         mov_lo = mkMOV o0 r_dst_lo
 348         mov_hi = mkMOV o1 r_dst_hi
 349         mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
 350     in
 351     return (
 352        ChildCode64 (call `snocOL` mov_hi `snocOL` mov_lo)
 353                    (getVRegUnique r_dst_lo)
 354     )
 355
 356 iselExpr64 expr
 357    = pprPanic "iselExpr64(sparc)" (pprCmmExpr expr)
 358
 359 #endif /* sparc_TARGET_ARCH */
 360
 361 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 362
 363 #if powerpc_TARGET_ARCH
 364
 365 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
 366 getI64Amodes addrTree = do
 367     Amode hi_addr addr_code <- getAmode addrTree
 368     case addrOffset hi_addr 4 of
 369         Just lo_addr -> return (hi_addr, lo_addr, addr_code)
 370         Nothing      -> do (hi_ptr, code) <- getSomeReg addrTree
 371                            return (AddrRegImm hi_ptr (ImmInt 0),
 372                                    AddrRegImm hi_ptr (ImmInt 4),
 373                                    code)
 374
 375 assignMem_I64Code addrTree valueTree = do
 376         (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
 377         ChildCode64 vcode rlo <- iselExpr64 valueTree
 378         let
 379                 rhi = getHiVRegFromLo rlo
 380
 381                 -- Big-endian store
 382                 mov_hi = ST I32 rhi hi_addr
 383                 mov_lo = ST I32 rlo lo_addr
 384         -- in
 385         return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 386
 387 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
 388    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 389    let
 390          r_dst_lo = mkVReg u_dst I32
 391          r_dst_hi = getHiVRegFromLo r_dst_lo
 392          r_src_hi = getHiVRegFromLo r_src_lo
 393          mov_lo = MR r_dst_lo r_src_lo
 394          mov_hi = MR r_dst_hi r_src_hi
 395    -- in
 396    return (
 397         vcode `snocOL` mov_lo `snocOL` mov_hi
 398      )
 399
 400 assignReg_I64Code lvalue valueTree
 401    = panic "assignReg_I64Code(powerpc): invalid lvalue"
 402
 403
 404 -- Don't delete this -- it's very handy for debugging.
 405 --iselExpr64 expr
 406 --   | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
 407 --   = panic "iselExpr64(???)"
 408
 409 iselExpr64 (CmmLoad addrTree I64) = do
 410     (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
 411     (rlo, rhi) <- getNewRegPairNat I32
 412     let mov_hi = LD I32 rhi hi_addr
 413         mov_lo = LD I32 rlo lo_addr
 414     return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
 415                          rlo
 416
 417 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64)))
 418    = return (ChildCode64 nilOL (mkVReg vu I32))
 419
 420 iselExpr64 (CmmLit (CmmInt i _)) = do
 421   (rlo,rhi) <- getNewRegPairNat I32
 422   let
 423         half0 = fromIntegral (fromIntegral i :: Word16)
 424         half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
 425         half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
 426         half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
 427
 428         code = toOL [
 429                 LIS rlo (ImmInt half1),
 430                 OR rlo rlo (RIImm $ ImmInt half0),
 431                 LIS rhi (ImmInt half3),
 432                 OR rlo rlo (RIImm $ ImmInt half2)
 433                 ]
 434   -- in
 435   return (ChildCode64 code rlo)
 436
 437 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
 438    ChildCode64 code1 r1lo <- iselExpr64 e1
 439    ChildCode64 code2 r2lo <- iselExpr64 e2
 440    (rlo,rhi) <- getNewRegPairNat I32
 441    let
 442         r1hi = getHiVRegFromLo r1lo
 443         r2hi = getHiVRegFromLo r2lo
 444         code =  code1 `appOL`
 445                 code2 `appOL`
 446                 toOL [ ADDC rlo r1lo r2lo,
 447                        ADDE rhi r1hi r2hi ]
 448    -- in
 449    return (ChildCode64 code rlo)
 450
 451 iselExpr64 expr
 452    = pprPanic "iselExpr64(powerpc)" (ppr expr)
 453
 454 #endif /* powerpc_TARGET_ARCH */
 455
 456
 457 -- -----------------------------------------------------------------------------
 458 -- The 'Register' type
 459
 460 -- 'Register's passed up the tree.  If the stix code forces the register
 461 -- to live in a pre-decided machine register, it comes out as @Fixed@;
 462 -- otherwise, it comes out as @Any@, and the parent can decide which
 463 -- register to put it in.
 464
 465 data Register
 466   = Fixed   MachRep Reg InstrBlock
 467   | Any     MachRep (Reg -> InstrBlock)
 468
 469 swizzleRegisterRep :: Register -> MachRep -> Register
 470 swizzleRegisterRep (Fixed _ reg code) rep = Fixed rep reg code
 471 swizzleRegisterRep (Any _ codefn)     rep = Any rep codefn
 472
 473
 474 -- -----------------------------------------------------------------------------
 475 -- Utils based on getRegister, below
 476
 477 -- The dual to getAnyReg: compute an expression into a register, but
 478 -- we don't mind which one it is.
 479 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
 480 getSomeReg expr = do
 481   r <- getRegister expr
 482   case r of
 483     Any rep code -> do
 484         tmp <- getNewRegNat rep
 485         return (tmp, code tmp)
 486     Fixed _ reg code ->
 487         return (reg, code)
 488
 489 -- -----------------------------------------------------------------------------
 490 -- Grab the Reg for a CmmReg
 491
 492 getRegisterReg :: CmmReg -> Reg
 493
 494 getRegisterReg (CmmLocal (LocalReg u pk))
 495   = mkVReg u pk
 496
 497 getRegisterReg (CmmGlobal mid)
 498   = case get_GlobalReg_reg_or_addr mid of
 499        Left (RealReg rrno) -> RealReg rrno
 500        _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
 501           -- By this stage, the only MagicIds remaining should be the
 502           -- ones which map to a real machine register on this
 503           -- platform.  Hence ...
 504
 505
 506 -- -----------------------------------------------------------------------------
 507 -- Generate code to get a subtree into a Register
 508
 509 -- Don't delete this -- it's very handy for debugging.
 510 --getRegister expr
 511 --   | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False
 512 --   = panic "getRegister(???)"
 513
 514 getRegister :: CmmExpr -> NatM Register
 515
 516 getRegister (CmmReg reg)
 517   = return (Fixed (cmmRegRep reg) (getRegisterReg reg) nilOL)
 518
 519 getRegister tree@(CmmRegOff _ _)
 520   = getRegister (mangleIndexTree tree)
 521
 522 getRegister CmmPicBaseReg
 523   = do
 524       reg <- getPicBaseNat wordRep
 525       return (Fixed wordRep reg nilOL)
 526
 527 -- end of machine-"independent" bit; here we go on the rest...
 528
 529 #if alpha_TARGET_ARCH
 530
 531 getRegister (StDouble d)
 532   = getBlockIdNat                   `thenNat` \ lbl ->
 533     getNewRegNat PtrRep             `thenNat` \ tmp ->
 534     let code dst = mkSeqInstrs [
 535             LDATA RoDataSegment lbl [
 536                     DATA TF [ImmLab (rational d)]
 537                 ],
 538             LDA tmp (AddrImm (ImmCLbl lbl)),
 539             LD TF dst (AddrReg tmp)]
 540     in
 541         return (Any F64 code)
 542
 543 getRegister (StPrim primop [x]) -- unary PrimOps
 544   = case primop of
 545       IntNegOp -> trivialUCode (NEG Q False) x
 546
 547       NotOp    -> trivialUCode NOT x
 548
 549       FloatNegOp  -> trivialUFCode FloatRep  (FNEG TF) x
 550       DoubleNegOp -> trivialUFCode F64 (FNEG TF) x
 551
 552       OrdOp -> coerceIntCode IntRep x
 553       ChrOp -> chrCode x
 554
 555       Float2IntOp  -> coerceFP2Int    x
 556       Int2FloatOp  -> coerceInt2FP pr x
 557       Double2IntOp -> coerceFP2Int    x
 558       Int2DoubleOp -> coerceInt2FP pr x
 559
 560       Double2FloatOp -> coerceFltCode x
 561       Float2DoubleOp -> coerceFltCode x
 562
 563       other_op -> getRegister (StCall fn CCallConv F64 [x])
 564         where
 565           fn = case other_op of
 566                  FloatExpOp    -> FSLIT("exp")
 567                  FloatLogOp    -> FSLIT("log")
 568                  FloatSqrtOp   -> FSLIT("sqrt")
 569                  FloatSinOp    -> FSLIT("sin")
 570                  FloatCosOp    -> FSLIT("cos")
 571                  FloatTanOp    -> FSLIT("tan")
 572                  FloatAsinOp   -> FSLIT("asin")
 573                  FloatAcosOp   -> FSLIT("acos")
 574                  FloatAtanOp   -> FSLIT("atan")
 575                  FloatSinhOp   -> FSLIT("sinh")
 576                  FloatCoshOp   -> FSLIT("cosh")
 577                  FloatTanhOp   -> FSLIT("tanh")
 578                  DoubleExpOp   -> FSLIT("exp")
 579                  DoubleLogOp   -> FSLIT("log")
 580                  DoubleSqrtOp  -> FSLIT("sqrt")
 581                  DoubleSinOp   -> FSLIT("sin")
 582                  DoubleCosOp   -> FSLIT("cos")
 583                  DoubleTanOp   -> FSLIT("tan")
 584                  DoubleAsinOp  -> FSLIT("asin")
 585                  DoubleAcosOp  -> FSLIT("acos")
 586                  DoubleAtanOp  -> FSLIT("atan")
 587                  DoubleSinhOp  -> FSLIT("sinh")
 588                  DoubleCoshOp  -> FSLIT("cosh")
 589                  DoubleTanhOp  -> FSLIT("tanh")
 590   where
 591     pr = panic "MachCode.getRegister: no primrep needed for Alpha"
 592
 593 getRegister (StPrim primop [x, y]) -- dyadic PrimOps
 594   = case primop of
 595       CharGtOp -> trivialCode (CMP LTT) y x
 596       CharGeOp -> trivialCode (CMP LE) y x
 597       CharEqOp -> trivialCode (CMP EQQ) x y
 598       CharNeOp -> int_NE_code x y
 599       CharLtOp -> trivialCode (CMP LTT) x y
 600       CharLeOp -> trivialCode (CMP LE) x y
 601
 602       IntGtOp  -> trivialCode (CMP LTT) y x
 603       IntGeOp  -> trivialCode (CMP LE) y x
 604       IntEqOp  -> trivialCode (CMP EQQ) x y
 605       IntNeOp  -> int_NE_code x y
 606       IntLtOp  -> trivialCode (CMP LTT) x y
 607       IntLeOp  -> trivialCode (CMP LE) x y
 608
 609       WordGtOp -> trivialCode (CMP ULT) y x
 610       WordGeOp -> trivialCode (CMP ULE) x y
 611       WordEqOp -> trivialCode (CMP EQQ)  x y
 612       WordNeOp -> int_NE_code x y
 613       WordLtOp -> trivialCode (CMP ULT) x y
 614       WordLeOp -> trivialCode (CMP ULE) x y
 615
 616       AddrGtOp -> trivialCode (CMP ULT) y x
 617       AddrGeOp -> trivialCode (CMP ULE) y x
 618       AddrEqOp -> trivialCode (CMP EQQ)  x y
 619       AddrNeOp -> int_NE_code x y
 620       AddrLtOp -> trivialCode (CMP ULT) x y
 621       AddrLeOp -> trivialCode (CMP ULE) x y
 622
 623       FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
 624       FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
 625       FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
 626       FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
 627       FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
 628       FloatLeOp -> cmpF_code (FCMP TF LE) NE x y
 629
 630       DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
 631       DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
 632       DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
 633       DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
 634       DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
 635       DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y
 636
 637       IntAddOp  -> trivialCode (ADD Q False) x y
 638       IntSubOp  -> trivialCode (SUB Q False) x y
 639       IntMulOp  -> trivialCode (MUL Q False) x y
 640       IntQuotOp -> trivialCode (DIV Q False) x y
 641       IntRemOp  -> trivialCode (REM Q False) x y
 642
 643       WordAddOp  -> trivialCode (ADD Q False) x y
 644       WordSubOp  -> trivialCode (SUB Q False) x y
 645       WordMulOp  -> trivialCode (MUL Q False) x y
 646       WordQuotOp -> trivialCode (DIV Q True) x y
 647       WordRemOp  -> trivialCode (REM Q True) x y
 648
 649       FloatAddOp -> trivialFCode  FloatRep (FADD TF) x y
 650       FloatSubOp -> trivialFCode  FloatRep (FSUB TF) x y
 651       FloatMulOp -> trivialFCode  FloatRep (FMUL TF) x y
 652       FloatDivOp -> trivialFCode  FloatRep (FDIV TF) x y
 653
 654       DoubleAddOp -> trivialFCode  F64 (FADD TF) x y
 655       DoubleSubOp -> trivialFCode  F64 (FSUB TF) x y
 656       DoubleMulOp -> trivialFCode  F64 (FMUL TF) x y
 657       DoubleDivOp -> trivialFCode  F64 (FDIV TF) x y
 658
 659       AddrAddOp  -> trivialCode (ADD Q False) x y
 660       AddrSubOp  -> trivialCode (SUB Q False) x y
 661       AddrRemOp  -> trivialCode (REM Q True) x y
 662
 663       AndOp  -> trivialCode AND x y
 664       OrOp   -> trivialCode OR  x y
 665       XorOp  -> trivialCode XOR x y
 666       SllOp  -> trivialCode SLL x y
 667       SrlOp  -> trivialCode SRL x y
 668
 669       ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
 670       ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
 671       ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"
 672
 673       FloatPowerOp  -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
 674       DoublePowerOp -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
 675   where
 676     {- ------------------------------------------------------------
 677         Some bizarre special code for getting condition codes into
 678         registers.  Integer non-equality is a test for equality
 679         followed by an XOR with 1.  (Integer comparisons always set
 680         the result register to 0 or 1.)  Floating point comparisons of
 681         any kind leave the result in a floating point register, so we
 682         need to wrangle an integer register out of things.
 683     -}
 684     int_NE_code :: StixTree -> StixTree -> NatM Register
 685
 686     int_NE_code x y
 687       = trivialCode (CMP EQQ) x y       `thenNat` \ register ->
 688         getNewRegNat IntRep             `thenNat` \ tmp ->
 689         let
 690             code = registerCode register tmp
 691             src  = registerName register tmp
 692             code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
 693         in
 694         return (Any IntRep code__2)
 695
 696     {- ------------------------------------------------------------
 697         Comments for int_NE_code also apply to cmpF_code
 698     -}
 699     cmpF_code
 700         :: (Reg -> Reg -> Reg -> Instr)
 701         -> Cond
 702         -> StixTree -> StixTree
 703         -> NatM Register
 704
 705     cmpF_code instr cond x y
 706       = trivialFCode pr instr x y       `thenNat` \ register ->
 707         getNewRegNat F64                `thenNat` \ tmp ->
 708         getBlockIdNat                   `thenNat` \ lbl ->
 709         let
 710             code = registerCode register tmp
 711             result  = registerName register tmp
 712
 713             code__2 dst = code . mkSeqInstrs [
 714                 OR zeroh (RIImm (ImmInt 1)) dst,
 715                 BF cond  result (ImmCLbl lbl),
 716                 OR zeroh (RIReg zeroh) dst,
 717                 NEWBLOCK lbl]
 718         in
 719         return (Any IntRep code__2)
 720       where
 721         pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
 722       ------------------------------------------------------------
 723
 724 getRegister (CmmLoad pk mem)
 725   = getAmode mem                    `thenNat` \ amode ->
 726     let
 727         code = amodeCode amode
 728         src   = amodeAddr amode
 729         size = primRepToSize pk
 730         code__2 dst = code . mkSeqInstr (LD size dst src)
 731     in
 732     return (Any pk code__2)
 733
 734 getRegister (StInt i)
 735   | fits8Bits i
 736   = let
 737         code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
 738     in
 739     return (Any IntRep code)
 740   | otherwise
 741   = let
 742         code dst = mkSeqInstr (LDI Q dst src)
 743     in
 744     return (Any IntRep code)
 745   where
 746     src = ImmInt (fromInteger i)
 747
 748 getRegister leaf
 749   | isJust imm
 750   = let
 751         code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
 752     in
 753     return (Any PtrRep code)
 754   where
 755     imm = maybeImm leaf
 756     imm__2 = case imm of Just x -> x
 757
 758 #endif /* alpha_TARGET_ARCH */
 759
 760 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 761
 762 #if i386_TARGET_ARCH
 763
 764 getRegister (CmmLit (CmmFloat f F32)) = do
 765     lbl <- getNewLabelNat
 766     let code dst = toOL [
 767             LDATA ReadOnlyData
 768                         [CmmDataLabel lbl,
 769                          CmmStaticLit (CmmFloat f F32)],
 770             GLD F32 (ImmAddr (ImmCLbl lbl) 0) dst
 771             ]
 772     -- in
 773     return (Any F32 code)
 774
 775
 776 getRegister (CmmLit (CmmFloat d F64))
 777   | d == 0.0
 778   = let code dst = unitOL (GLDZ dst)
 779     in  return (Any F64 code)
 780
 781   | d == 1.0
 782   = let code dst = unitOL (GLD1 dst)
 783     in  return (Any F64 code)
 784
 785   | otherwise = do
 786     lbl <- getNewLabelNat
 787     let code dst = toOL [
 788             LDATA ReadOnlyData
 789                         [CmmDataLabel lbl,
 790                          CmmStaticLit (CmmFloat d F64)],
 791             GLD F64 (ImmAddr (ImmCLbl lbl) 0) dst
 792             ]
 793     -- in
 794     return (Any F64 code)
 795
 796 #endif /* i386_TARGET_ARCH */
 797
 798 #if x86_64_TARGET_ARCH
 799
 800 getRegister (CmmLit (CmmFloat 0.0 rep)) = do
 801    let code dst = unitOL  (XOR rep (OpReg dst) (OpReg dst))
 802         -- I don't know why there are xorpd, xorps, and pxor instructions.
 803         -- They all appear to do the same thing --SDM
 804    return (Any rep code)
 805
 806 getRegister (CmmLit (CmmFloat f rep)) = do
 807     lbl <- getNewLabelNat
 808     let code dst = toOL [
 809             LDATA ReadOnlyData
 810                         [CmmDataLabel lbl,
 811                          CmmStaticLit (CmmFloat f rep)],
 812             MOV rep (OpAddr (ImmAddr (ImmCLbl lbl) 0)) (OpReg dst)
 813         -- ToDo: should use %rip-relative
 814             ]
 815     -- in
 816     return (Any rep code)
 817
 818 #endif /* x86_64_TARGET_ARCH */
 819
 820 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
 821
 822 -- catch simple cases of zero- or sign-extended load
 823 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad addr _]) = do
 824   code <- intLoadCode (MOVZxL I8) addr
 825   return (Any I32 code)
 826
 827 getRegister (CmmMachOp (MO_S_Conv I8 I32) [CmmLoad addr _]) = do
 828   code <- intLoadCode (MOVSxL I8) addr
 829   return (Any I32 code)
 830
 831 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad addr _]) = do
 832   code <- intLoadCode (MOVZxL I16) addr
 833   return (Any I32 code)
 834
 835 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad addr _]) = do
 836   code <- intLoadCode (MOVSxL I16) addr
 837   return (Any I32 code)
 838
 839 #endif
 840
 841 #if x86_64_TARGET_ARCH
 842
 843 -- catch simple cases of zero- or sign-extended load
 844 getRegister (CmmMachOp (MO_U_Conv I8 I64) [CmmLoad addr _]) = do
 845   code <- intLoadCode (MOVZxL I8) addr
 846   return (Any I64 code)
 847
 848 getRegister (CmmMachOp (MO_S_Conv I8 I64) [CmmLoad addr _]) = do
 849   code <- intLoadCode (MOVSxL I8) addr
 850   return (Any I64 code)
 851
 852 getRegister (CmmMachOp (MO_U_Conv I16 I64) [CmmLoad addr _]) = do
 853   code <- intLoadCode (MOVZxL I16) addr
 854   return (Any I64 code)
 855
 856 getRegister (CmmMachOp (MO_S_Conv I16 I64) [CmmLoad addr _]) = do
 857   code <- intLoadCode (MOVSxL I16) addr
 858   return (Any I64 code)
 859
 860 getRegister (CmmMachOp (MO_U_Conv I32 I64) [CmmLoad addr _]) = do
 861   code <- intLoadCode (MOV I32) addr -- 32-bit loads zero-extend
 862   return (Any I64 code)
 863
 864 getRegister (CmmMachOp (MO_S_Conv I32 I64) [CmmLoad addr _]) = do
 865   code <- intLoadCode (MOVSxL I32) addr
 866   return (Any I64 code)
 867
 868 #endif
 869
 870 #if x86_64_TARGET_ARCH
 871 getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do
 872   x_code <- getAnyReg x
 873   lbl <- getNewLabelNat
 874   let
 875     code dst = x_code dst `appOL` toOL [
 876         -- This is how gcc does it, so it can't be that bad:
 877         LDATA ReadOnlyData16 [
 878                 CmmAlign 16,
 879                 CmmDataLabel lbl,
 880                 CmmStaticLit (CmmInt 0x80000000 I32),
 881                 CmmStaticLit (CmmInt 0 I32),
 882                 CmmStaticLit (CmmInt 0 I32),
 883                 CmmStaticLit (CmmInt 0 I32)
 884         ],
 885         XOR F32 (OpAddr (ImmAddr (ImmCLbl lbl) 0)) (OpReg dst)
 886                 -- xorps, so we need the 128-bit constant
 887                 -- ToDo: rip-relative
 888         ]
 889   --
 890   return (Any F32 code)
 891
 892 getRegister (CmmMachOp (MO_S_Neg F64) [x]) = do
 893   x_code <- getAnyReg x
 894   lbl <- getNewLabelNat
 895   let
 896         -- This is how gcc does it, so it can't be that bad:
 897     code dst = x_code dst `appOL` toOL [
 898         LDATA ReadOnlyData16 [
 899                 CmmAlign 16,
 900                 CmmDataLabel lbl,
 901                 CmmStaticLit (CmmInt 0x8000000000000000 I64),
 902                 CmmStaticLit (CmmInt 0 I64)
 903         ],
 904                 -- gcc puts an unpck here.  Wonder if we need it.
 905         XOR F64 (OpAddr (ImmAddr (ImmCLbl lbl) 0)) (OpReg dst)
 906                 -- xorpd, so we need the 128-bit constant
 907                 -- ToDo: rip-relative
 908         ]
 909   --
 910   return (Any F64 code)
 911 #endif
 912
 913 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
 914
 915 getRegister (CmmMachOp mop [x]) -- unary MachOps
 916   = case mop of
 917 #if i386_TARGET_ARCH
 918       MO_S_Neg F32 -> trivialUFCode F32 (GNEG F32) x
 919       MO_S_Neg F64 -> trivialUFCode F64 (GNEG F64) x
 920 #endif
 921
 922       MO_S_Neg rep -> trivialUCode rep (NEGI rep) x
 923       MO_Not rep   -> trivialUCode rep (NOT  rep) x
 924
 925       -- Nop conversions
 926       -- TODO: these are only nops if the arg is not a fixed register that
 927       -- can't be byte-addressed.
 928       MO_U_Conv I32 I8  -> conversionNop I32 x
 929       MO_S_Conv I32 I8  -> conversionNop I32 x
 930       MO_U_Conv I16 I8  -> conversionNop I16 x
 931       MO_S_Conv I16 I8  -> conversionNop I16 x
 932       MO_U_Conv I32 I16 -> conversionNop I32 x
 933       MO_S_Conv I32 I16 -> conversionNop I32 x
 934 #if x86_64_TARGET_ARCH
 935       MO_U_Conv I64 I32 -> conversionNop I64 x
 936       MO_S_Conv I64 I32 -> conversionNop I64 x
 937       MO_U_Conv I64 I16 -> conversionNop I64 x
 938       MO_S_Conv I64 I16 -> conversionNop I64 x
 939       MO_U_Conv I64 I8  -> conversionNop I64 x
 940       MO_S_Conv I64 I8  -> conversionNop I64 x
 941 #endif
 942
 943       MO_U_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
 944       MO_S_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
 945
 946       -- widenings
 947       MO_U_Conv I8  I32 -> integerExtend I8  I32 MOVZxL x
 948       MO_U_Conv I16 I32 -> integerExtend I16 I32 MOVZxL x
 949       MO_U_Conv I8  I16 -> integerExtend I8  I16 MOVZxL x
 950
 951       MO_S_Conv I8  I32 -> integerExtend I8  I32 MOVSxL x
 952       MO_S_Conv I16 I32 -> integerExtend I16 I32 MOVSxL x
 953       MO_S_Conv I8  I16 -> integerExtend I8  I16 MOVSxL x
 954
 955 #if x86_64_TARGET_ARCH
 956       MO_U_Conv I8  I64 -> integerExtend I8  I64 MOVZxL x
 957       MO_U_Conv I16 I64 -> integerExtend I16 I64 MOVZxL x
 958       MO_U_Conv I32 I64 -> integerExtend I32 I64 MOVZxL x
 959       MO_S_Conv I8  I64 -> integerExtend I8  I64 MOVSxL x
 960       MO_S_Conv I16 I64 -> integerExtend I16 I64 MOVSxL x
 961       MO_S_Conv I32 I64 -> integerExtend I32 I64 MOVSxL x
 962         -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
 963         -- However, we don't want the register allocator to throw it
 964         -- away as an unnecessary reg-to-reg move, so we keep it in
 965         -- the form of a movzl and print it as a movl later.
 966 #endif
 967
 968 #if i386_TARGET_ARCH
 969       MO_S_Conv F32 F64 -> conversionNop F64 x
 970       MO_S_Conv F64 F32 -> conversionNop F32 x
 971 #else
 972       MO_S_Conv F32 F64 -> coerceFP2FP F64 x
 973       MO_S_Conv F64 F32 -> coerceFP2FP F32 x
 974 #endif
 975
 976       MO_S_Conv from to
 977         | isFloatingRep from -> coerceFP2Int from to x
 978         | isFloatingRep to   -> coerceInt2FP from to x
 979
 980       other -> pprPanic "getRegister" (pprMachOp mop)
 981    where
 982         -- signed or unsigned extension.
 983         integerExtend from to instr expr = do
 984             (reg,e_code) <- if from == I8 then getByteReg expr
 985                                           else getSomeReg expr
 986             let
 987                 code dst =
 988                   e_code `snocOL`
 989                   instr from (OpReg reg) (OpReg dst)
 990             return (Any to code)
 991
 992         conversionNop new_rep expr
 993             = do e_code <- getRegister expr
 994                  return (swizzleRegisterRep e_code new_rep)
 995
 996
 997 getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
 998   = ASSERT2(cmmExprRep x /= I8, pprExpr e)
 999     case mop of
1000       MO_Eq F32   -> condFltReg EQQ x y
1001       MO_Ne F32   -> condFltReg NE x y
1002       MO_S_Gt F32 -> condFltReg GTT x y
1003       MO_S_Ge F32 -> condFltReg GE x y
1004       MO_S_Lt F32 -> condFltReg LTT x y
1005       MO_S_Le F32 -> condFltReg LE x y
1006
1007       MO_Eq F64   -> condFltReg EQQ x y
1008       MO_Ne F64   -> condFltReg NE x y
1009       MO_S_Gt F64 -> condFltReg GTT x y
1010       MO_S_Ge F64 -> condFltReg GE x y
1011       MO_S_Lt F64 -> condFltReg LTT x y
1012       MO_S_Le F64 -> condFltReg LE x y
1013
1014       MO_Eq rep   -> condIntReg EQQ x y
1015       MO_Ne rep   -> condIntReg NE x y
1016
1017       MO_S_Gt rep -> condIntReg GTT x y
1018       MO_S_Ge rep -> condIntReg GE x y
1019       MO_S_Lt rep -> condIntReg LTT x y
1020       MO_S_Le rep -> condIntReg LE x y
1021
1022       MO_U_Gt rep -> condIntReg GU  x y
1023       MO_U_Ge rep -> condIntReg GEU x y
1024       MO_U_Lt rep -> condIntReg LU  x y
1025       MO_U_Le rep -> condIntReg LEU x y
1026
1027 #if i386_TARGET_ARCH
1028       MO_Add F32 -> trivialFCode F32 GADD x y
1029       MO_Sub F32 -> trivialFCode F32 GSUB x y
1030
1031       MO_Add F64 -> trivialFCode F64 GADD x y
1032       MO_Sub F64 -> trivialFCode F64 GSUB x y
1033
1034       MO_S_Quot F32 -> trivialFCode F32 GDIV x y
1035       MO_S_Quot F64 -> trivialFCode F64 GDIV x y
1036 #endif
1037
1038 #if x86_64_TARGET_ARCH
1039       MO_Add F32 -> trivialFCode F32 ADD x y
1040       MO_Sub F32 -> trivialFCode F32 SUB x y
1041
1042       MO_Add F64 -> trivialFCode F64 ADD x y
1043       MO_Sub F64 -> trivialFCode F64 SUB x y
1044
1045       MO_S_Quot F32 -> trivialFCode F32 FDIV x y
1046       MO_S_Quot F64 -> trivialFCode F64 FDIV x y
1047 #endif
1048
1049       MO_Add rep -> add_code rep x y
1050       MO_Sub rep -> sub_code rep x y
1051
1052       MO_S_Quot rep -> div_code rep True  True  x y
1053       MO_S_Rem  rep -> div_code rep True  False x y
1054       MO_U_Quot rep -> div_code rep False True  x y
1055       MO_U_Rem  rep -> div_code rep False False x y
1056
1057 #if i386_TARGET_ARCH
1058       MO_Mul   F32 -> trivialFCode F32 GMUL x y
1059       MO_Mul   F64 -> trivialFCode F64 GMUL x y
1060 #endif
1061
1062 #if x86_64_TARGET_ARCH
1063       MO_Mul   F32 -> trivialFCode F32 MUL x y
1064       MO_Mul   F64 -> trivialFCode F64 MUL x y
1065 #endif
1066
1067       MO_Mul   rep -> let op = IMUL rep in
1068                       trivialCode rep op (Just op) x y
1069
1070       MO_S_MulMayOflo rep -> imulMayOflo rep x y
1071
1072       MO_And rep -> let op = AND rep in
1073                     trivialCode rep op (Just op) x y
1074       MO_Or  rep -> let op = OR  rep in
1075                     trivialCode rep op (Just op) x y
1076       MO_Xor rep -> let op = XOR rep in
1077                     trivialCode rep op (Just op) x y
1078
1079         {- Shift ops on x86s have constraints on their source, it
1080            either has to be Imm, CL or 1
1081             => trivialCode is not restrictive enough (sigh.)
1082         -}
1083       MO_Shl rep   -> shift_code rep (SHL rep) x y {-False-}
1084       MO_U_Shr rep -> shift_code rep (SHR rep) x y {-False-}
1085       MO_S_Shr rep -> shift_code rep (SAR rep) x y {-False-}
1086
1087       other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
1088   where
1089     --------------------
1090     imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1091     imulMayOflo rep a b = do
1092          (a_reg, a_code) <- getNonClobberedReg a
1093          b_code <- getAnyReg b
1094          let
1095              shift_amt  = case rep of
1096                            I32 -> 31
1097                            I64 -> 63
1098                            _ -> panic "shift_amt"
1099
1100              code = a_code `appOL` b_code eax `appOL`
1101                         toOL [
1102                            IMUL2 rep (OpReg a_reg),   -- result in %edx:%eax
1103                            SAR rep (OpImm (ImmInt shift_amt)) (OpReg eax),
1104                                 -- sign extend lower part
1105                            SUB rep (OpReg edx) (OpReg eax)
1106                                 -- compare against upper
1107                            -- eax==0 if high part == sign extended low part
1108                         ]
1109          -- in
1110          return (Fixed rep eax code)
1111
1112     --------------------
1113     shift_code :: MachRep
1114                -> (Operand -> Operand -> Instr)
1115                -> CmmExpr
1116                -> CmmExpr
1117                -> NatM Register
1118
1119     {- Case1: shift length as immediate -}
1120     shift_code rep instr x y@(CmmLit lit) = do
1121           x_code <- getAnyReg x
1122           let
1123                code dst
1124                   = x_code dst `snocOL`
1125                     instr (OpImm (litToImm lit)) (OpReg dst)
1126           -- in
1127           return (Any rep code)
1128
1129     {- Case2: shift length is complex (non-immediate) -}
1130     shift_code rep instr x y{-amount-} = do
1131         (x_reg, x_code) <- getNonClobberedReg x
1132         y_code <- getAnyReg y
1133         let
1134            code = x_code `appOL`
1135                   y_code ecx `snocOL`
1136                   instr (OpReg ecx) (OpReg x_reg)
1137         -- in
1138         return (Fixed rep x_reg code)
1139
1140     --------------------
1141     add_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1142     add_code rep x (CmmLit (CmmInt y _)) = add_int rep x y
1143     add_code rep x y = trivialCode rep (ADD rep) (Just (ADD rep)) x y
1144
1145     --------------------
1146     sub_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1147     sub_code rep x (CmmLit (CmmInt y _)) = add_int rep x (-y)
1148     sub_code rep x y = trivialCode rep (SUB rep) Nothing x y
1149
1150     -- our three-operand add instruction:
1151     add_int rep x y = do
1152         (x_reg, x_code) <- getSomeReg x
1153         let
1154             imm = ImmInt (fromInteger y)
1155             code dst
1156                = x_code `snocOL`
1157                  LEA rep
1158                         (OpAddr (AddrBaseIndex (Just x_reg) Nothing imm))
1159                         (OpReg dst)
1160         --
1161         return (Any rep code)
1162
1163     ----------------------
1164     div_code rep signed quotient x y = do
1165            (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
1166            x_code <- getAnyReg x
1167            let
1168              widen | signed    = CLTD rep
1169                    | otherwise = XOR rep (OpReg edx) (OpReg edx)
1170
1171              instr | signed    = IDIV
1172                    | otherwise = DIV
1173
1174              code = y_code `appOL`
1175                     x_code eax `appOL`
1176                     toOL [widen, instr rep y_op]
1177
1178              result | quotient  = eax
1179                     | otherwise = edx
1180
1181            -- in
1182            return (Fixed rep result code)
1183
1184
1185 getRegister (CmmLoad mem pk)
1186   | isFloatingRep pk
1187   = do
1188     Amode src mem_code <- getAmode mem
1189     let
1190         code dst = mem_code `snocOL`
1191                    IF_ARCH_i386(GLD pk src dst,
1192                                 MOV pk (OpAddr src) (OpReg dst))
1193     --
1194     return (Any pk code)
1195
1196 #if i386_TARGET_ARCH
1197 getRegister (CmmLoad mem pk)
1198   | pk /= I64
1199   = do
1200     code <- intLoadCode (instr pk) mem
1201     return (Any pk code)
1202   where
1203         instr I8  = MOVZxL pk
1204         instr I16 = MOV I16
1205         instr I32 = MOV I32
1206         -- we always zero-extend 8-bit loads, if we
1207         -- can't think of anything better.  This is because
1208         -- we can't guarantee access to an 8-bit variant of every register
1209         -- (esi and edi don't have 8-bit variants), so to make things
1210         -- simpler we do our 8-bit arithmetic with full 32-bit registers.
1211 #endif
1212
1213 #if x86_64_TARGET_ARCH
1214 -- Simpler memory load code on x86_64
1215 getRegister (CmmLoad mem pk)
1216   = do
1217     code <- intLoadCode (MOV pk) mem
1218     return (Any pk code)
1219 #endif
1220
1221 getRegister (CmmLit (CmmInt 0 rep))
1222   = let
1223         -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
1224         adj_rep = case rep of I64 -> I32; _ -> rep
1225         rep1 = IF_ARCH_i386( rep, adj_rep )
1226         code dst
1227            = unitOL (XOR rep1 (OpReg dst) (OpReg dst))
1228     in
1229         return (Any rep code)
1230
1231 #if x86_64_TARGET_ARCH
1232   -- optimisation for loading small literals on x86_64: take advantage
1233   -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
1234   -- instruction forms are shorter.
1235 getRegister (CmmLit lit)
1236   | I64 <- cmmLitRep lit, not (isBigLit lit)
1237   = let
1238         imm = litToImm lit
1239         code dst = unitOL (MOV I32 (OpImm imm) (OpReg dst))
1240     in
1241         return (Any I64 code)
1242   where
1243    isBigLit (CmmInt i I64) = i < 0 || i > 0xffffffff
1244    isBigLit _ = False
1245         -- note1: not the same as is64BitLit, because that checks for
1246         -- signed literals that fit in 32 bits, but we want unsigned
1247         -- literals here.
1248         -- note2: all labels are small, because we're assuming the
1249         -- small memory model (see gcc docs, -mcmodel=small).
1250 #endif
1251
1252 getRegister (CmmLit lit)
1253   = let
1254         rep = cmmLitRep lit
1255         imm = litToImm lit
1256         code dst = unitOL (MOV rep (OpImm imm) (OpReg dst))
1257     in
1258         return (Any rep code)
1259
1260 getRegister other = pprPanic "getRegister(x86)" (ppr other)
1261
1262
1263 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
1264    -> NatM (Reg -> InstrBlock)
1265 intLoadCode instr mem = do
1266   Amode src mem_code <- getAmode mem
1267   return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
1268
1269 -- Compute an expression into *any* register, adding the appropriate
1270 -- move instruction if necessary.
1271 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
1272 getAnyReg expr = do
1273   r <- getRegister expr
1274   anyReg r
1275
1276 anyReg :: Register -> NatM (Reg -> InstrBlock)
1277 anyReg (Any _ code)          = return code
1278 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
1279
1280 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
1281 -- Fixed registers might not be byte-addressable, so we make sure we've
1282 -- got a temporary, inserting an extra reg copy if necessary.
1283 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
1284 #if x86_64_TARGET_ARCH
1285 getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
1286 #else
1287 getByteReg expr = do
1288   r <- getRegister expr
1289   case r of
1290     Any rep code -> do
1291         tmp <- getNewRegNat rep
1292         return (tmp, code tmp)
1293     Fixed rep reg code
1294         | isVirtualReg reg -> return (reg,code)
1295         | otherwise -> do
1296             tmp <- getNewRegNat rep
1297             return (tmp, code `snocOL` reg2reg rep reg tmp)
1298         -- ToDo: could optimise slightly by checking for byte-addressable
1299         -- real registers, but that will happen very rarely if at all.
1300 #endif
1301
1302 -- Another variant: this time we want the result in a register that cannot
1303 -- be modified by code to evaluate an arbitrary expression.
1304 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1305 getNonClobberedReg expr = do
1306   r <- getRegister expr
1307   case r of
1308     Any rep code -> do
1309         tmp <- getNewRegNat rep
1310         return (tmp, code tmp)
1311     Fixed rep reg code
1312         -- only free regs can be clobbered
1313         | RealReg rr <- reg, isFastTrue (freeReg rr) -> do
1314                 tmp <- getNewRegNat rep
1315                 return (tmp, code `snocOL` reg2reg rep reg tmp)
1316         | otherwise ->
1317                 return (reg, code)
1318
1319 reg2reg :: MachRep -> Reg -> Reg -> Instr
1320 reg2reg rep src dst
1321 #if i386_TARGET_ARCH
1322   | isFloatingRep rep = GMOV src dst
1323 #endif
1324   | otherwise         = MOV rep (OpReg src) (OpReg dst)
1325
1326 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1327
1328 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1329
1330 #if sparc_TARGET_ARCH
1331
1332 getRegister (StFloat d)
1333   = getBlockIdNat                   `thenNat` \ lbl ->
1334     getNewRegNat PtrRep             `thenNat` \ tmp ->
1335     let code dst = toOL [
1336             SEGMENT DataSegment,
1337             NEWBLOCK lbl,
1338             DATA F [ImmFloat d],
1339             SEGMENT TextSegment,
1340             SETHI (HI (ImmCLbl lbl)) tmp,
1341             LD F (AddrRegImm tmp (LO (ImmCLbl lbl))) dst]
1342     in
1343         return (Any F32 code)
1344
1345 getRegister (StDouble d)
1346   = getBlockIdNat                   `thenNat` \ lbl ->
1347     getNewRegNat PtrRep             `thenNat` \ tmp ->
1348     let code dst = toOL [
1349             SEGMENT DataSegment,
1350             NEWBLOCK lbl,
1351             DATA DF [ImmDouble d],
1352             SEGMENT TextSegment,
1353             SETHI (HI (ImmCLbl lbl)) tmp,
1354             LD DF (AddrRegImm tmp (LO (ImmCLbl lbl))) dst]
1355     in
1356         return (Any F64 code)
1357
1358
1359 getRegister (CmmMachOp mop [x]) -- unary PrimOps
1360   = case mop of
1361       MO_NatS_Neg      -> trivialUCode (SUB False False g0) x
1362       MO_Nat_Not       -> trivialUCode (XNOR False g0) x
1363       MO_32U_to_8U     -> trivialCode (AND False) x (StInt 255)
1364
1365       MO_F32_Neg       -> trivialUFCode F32 (FNEG F) x
1366       MO_F64_Neg       -> trivialUFCode F64 (FNEG DF) x
1367
1368       MO_F64_to_Flt    -> coerceDbl2Flt x
1369       MO_F32_to_Dbl    -> coerceFlt2Dbl x
1370
1371       MO_F32_to_NatS   -> coerceFP2Int F32 x
1372       MO_NatS_to_Flt   -> coerceInt2FP F32 x
1373       MO_F64_to_NatS   -> coerceFP2Int F64 x
1374       MO_NatS_to_Dbl   -> coerceInt2FP F64 x
1375
1376       -- Conversions which are a nop on sparc
1377       MO_32U_to_NatS   -> conversionNop IntRep   x
1378       MO_32S_to_NatS  -> conversionNop IntRep   x
1379       MO_NatS_to_32U   -> conversionNop WordRep  x
1380       MO_32U_to_NatU   -> conversionNop WordRep  x
1381
1382       MO_NatU_to_NatS -> conversionNop IntRep    x
1383       MO_NatS_to_NatU -> conversionNop WordRep   x
1384       MO_NatP_to_NatU -> conversionNop WordRep   x
1385       MO_NatU_to_NatP -> conversionNop PtrRep    x
1386       MO_NatS_to_NatP -> conversionNop PtrRep    x
1387       MO_NatP_to_NatS -> conversionNop IntRep    x
1388
1389       -- sign-extending widenings
1390       MO_8U_to_32U    -> integerExtend False 24 x
1391       MO_8U_to_NatU   -> integerExtend False 24 x
1392       MO_8S_to_NatS   -> integerExtend True  24 x
1393       MO_16U_to_NatU  -> integerExtend False 16 x
1394       MO_16S_to_NatS  -> integerExtend True  16 x
1395
1396       other_op ->
1397         let fixed_x = if   is_float_op  -- promote to double
1398                       then CmmMachOp MO_F32_to_Dbl [x]
1399                       else x
1400         in
1401         getRegister (StCall (Left fn) CCallConv F64 [fixed_x])
1402     where
1403         integerExtend signed nBits x
1404            = getRegister (
1405                 CmmMachOp (if signed then MO_Nat_Sar else MO_Nat_Shr)
1406                          [CmmMachOp MO_Nat_Shl [x, StInt nBits], StInt nBits]
1407              )
1408         conversionNop new_rep expr
1409             = getRegister expr          `thenNat` \ e_code ->
1410               return (swizzleRegisterRep e_code new_rep)
1411
1412         (is_float_op, fn)
1413           = case mop of
1414               MO_F32_Exp    -> (True,  FSLIT("exp"))
1415               MO_F32_Log    -> (True,  FSLIT("log"))
1416               MO_F32_Sqrt   -> (True,  FSLIT("sqrt"))
1417
1418               MO_F32_Sin    -> (True,  FSLIT("sin"))
1419               MO_F32_Cos    -> (True,  FSLIT("cos"))
1420               MO_F32_Tan    -> (True,  FSLIT("tan"))
1421
1422               MO_F32_Asin   -> (True,  FSLIT("asin"))
1423               MO_F32_Acos   -> (True,  FSLIT("acos"))
1424               MO_F32_Atan   -> (True,  FSLIT("atan"))
1425
1426               MO_F32_Sinh   -> (True,  FSLIT("sinh"))
1427               MO_F32_Cosh   -> (True,  FSLIT("cosh"))
1428               MO_F32_Tanh   -> (True,  FSLIT("tanh"))
1429
1430               MO_F64_Exp    -> (False, FSLIT("exp"))
1431               MO_F64_Log    -> (False, FSLIT("log"))
1432               MO_F64_Sqrt   -> (False, FSLIT("sqrt"))
1433
1434               MO_F64_Sin    -> (False, FSLIT("sin"))
1435               MO_F64_Cos    -> (False, FSLIT("cos"))
1436               MO_F64_Tan    -> (False, FSLIT("tan"))
1437
1438               MO_F64_Asin   -> (False, FSLIT("asin"))
1439               MO_F64_Acos   -> (False, FSLIT("acos"))
1440               MO_F64_Atan   -> (False, FSLIT("atan"))
1441
1442               MO_F64_Sinh   -> (False, FSLIT("sinh"))
1443               MO_F64_Cosh   -> (False, FSLIT("cosh"))
1444               MO_F64_Tanh   -> (False, FSLIT("tanh"))
1445
1446               other -> pprPanic "getRegister(sparc) - binary CmmMachOp (2)"
1447                                 (pprMachOp mop)
1448
1449
1450 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1451   = case mop of
1452       MO_32U_Gt  -> condIntReg GTT x y
1453       MO_32U_Ge  -> condIntReg GE x y
1454       MO_32U_Eq  -> condIntReg EQQ x y
1455       MO_32U_Ne  -> condIntReg NE x y
1456       MO_32U_Lt  -> condIntReg LTT x y
1457       MO_32U_Le  -> condIntReg LE x y
1458
1459       MO_Nat_Eq   -> condIntReg EQQ x y
1460       MO_Nat_Ne   -> condIntReg NE x y
1461
1462       MO_NatS_Gt  -> condIntReg GTT x y
1463       MO_NatS_Ge  -> condIntReg GE x y
1464       MO_NatS_Lt  -> condIntReg LTT x y
1465       MO_NatS_Le  -> condIntReg LE x y
1466
1467       MO_NatU_Gt  -> condIntReg GU  x y
1468       MO_NatU_Ge  -> condIntReg GEU x y
1469       MO_NatU_Lt  -> condIntReg LU  x y
1470       MO_NatU_Le  -> condIntReg LEU x y
1471
1472       MO_F32_Gt -> condFltReg GTT x y
1473       MO_F32_Ge -> condFltReg GE x y
1474       MO_F32_Eq -> condFltReg EQQ x y
1475       MO_F32_Ne -> condFltReg NE x y
1476       MO_F32_Lt -> condFltReg LTT x y
1477       MO_F32_Le -> condFltReg LE x y
1478
1479       MO_F64_Gt -> condFltReg GTT x y
1480       MO_F64_Ge -> condFltReg GE x y
1481       MO_F64_Eq -> condFltReg EQQ x y
1482       MO_F64_Ne -> condFltReg NE x y
1483       MO_F64_Lt -> condFltReg LTT x y
1484       MO_F64_Le -> condFltReg LE x y
1485
1486       MO_Nat_Add -> trivialCode (ADD False False) x y
1487       MO_Nat_Sub -> trivialCode (SUB False False) x y
1488
1489       MO_NatS_Mul  -> trivialCode (SMUL False) x y
1490       MO_NatU_Mul  -> trivialCode (UMUL False) x y
1491       MO_NatS_MulMayOflo -> imulMayOflo x y
1492
1493       -- ToDo: teach about V8+ SPARC div instructions
1494       MO_NatS_Quot -> idiv FSLIT(".div")  x y
1495       MO_NatS_Rem  -> idiv FSLIT(".rem")  x y
1496       MO_NatU_Quot -> idiv FSLIT(".udiv")  x y
1497       MO_NatU_Rem  -> idiv FSLIT(".urem")  x y
1498
1499       MO_F32_Add   -> trivialFCode F32  FADD x y
1500       MO_F32_Sub   -> trivialFCode F32  FSUB x y
1501       MO_F32_Mul   -> trivialFCode F32  FMUL x y
1502       MO_F32_Div   -> trivialFCode F32  FDIV x y
1503
1504       MO_F64_Add   -> trivialFCode F64 FADD x y
1505       MO_F64_Sub   -> trivialFCode F64 FSUB x y
1506       MO_F64_Mul   -> trivialFCode F64 FMUL x y
1507       MO_F64_Div   -> trivialFCode F64 FDIV x y
1508
1509       MO_Nat_And   -> trivialCode (AND False) x y
1510       MO_Nat_Or    -> trivialCode (OR  False) x y
1511       MO_Nat_Xor   -> trivialCode (XOR False) x y
1512
1513       MO_Nat_Shl   -> trivialCode SLL x y
1514       MO_Nat_Shr   -> trivialCode SRL x y
1515       MO_Nat_Sar   -> trivialCode SRA x y
1516
1517       MO_F32_Pwr  -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1518                                          [promote x, promote y])
1519                        where promote x = CmmMachOp MO_F32_to_Dbl [x]
1520       MO_F64_Pwr -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1521                                         [x, y])
1522
1523       other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
1524   where
1525     idiv fn x y = getRegister (StCall (Left fn) CCallConv IntRep [x, y])
1526
1527     --------------------
1528     imulMayOflo :: CmmExpr -> CmmExpr -> NatM Register
1529     imulMayOflo a1 a2
1530        = getNewRegNat IntRep            `thenNat` \ t1 ->
1531          getNewRegNat IntRep            `thenNat` \ t2 ->
1532          getNewRegNat IntRep            `thenNat` \ res_lo ->
1533          getNewRegNat IntRep            `thenNat` \ res_hi ->
1534          getRegister a1                 `thenNat` \ reg1 ->
1535          getRegister a2                 `thenNat` \ reg2 ->
1536          let code1 = registerCode reg1 t1
1537              code2 = registerCode reg2 t2
1538              src1  = registerName reg1 t1
1539              src2  = registerName reg2 t2
1540              code dst = code1 `appOL` code2 `appOL`
1541                         toOL [
1542                            SMUL False src1 (RIReg src2) res_lo,
1543                            RDY res_hi,
1544                            SRA res_lo (RIImm (ImmInt 31)) res_lo,
1545                            SUB False False res_lo (RIReg res_hi) dst
1546                         ]
1547          in
1548             return (Any IntRep code)
1549
1550 getRegister (CmmLoad pk mem) = do
1551     Amode src code <- getAmode mem
1552     let
1553         size = primRepToSize pk
1554         code__2 dst = code `snocOL` LD size src dst
1555     --
1556     return (Any pk code__2)
1557
1558 getRegister (StInt i)
1559   | fits13Bits i
1560   = let
1561         src = ImmInt (fromInteger i)
1562         code dst = unitOL (OR False g0 (RIImm src) dst)
1563     in
1564         return (Any IntRep code)
1565
1566 getRegister leaf
1567   | isJust imm
1568   = let
1569         code dst = toOL [
1570             SETHI (HI imm__2) dst,
1571             OR False dst (RIImm (LO imm__2)) dst]
1572     in
1573         return (Any PtrRep code)
1574   | otherwise
1575   = ncgPrimopMoan "getRegister(sparc)" (pprCmmExpr leaf)
1576   where
1577     imm = maybeImm leaf
1578     imm__2 = case imm of Just x -> x
1579
1580 #endif /* sparc_TARGET_ARCH */
1581
1582 #if powerpc_TARGET_ARCH
1583 getRegister (CmmLoad mem pk)
1584   | pk /= I64
1585   = do
1586         Amode addr addr_code <- getAmode mem
1587         let code dst = ASSERT((regClass dst == RcDouble) == isFloatingRep pk)
1588                        addr_code `snocOL` LD pk dst addr
1589         return (Any pk code)
1590
1591 -- catch simple cases of zero- or sign-extended load
1592 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad mem _]) = do
1593     Amode addr addr_code <- getAmode mem
1594     return (Any I32 (\dst -> addr_code `snocOL` LD I8 dst addr))
1595
1596 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
1597
1598 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad mem _]) = do
1599     Amode addr addr_code <- getAmode mem
1600     return (Any I32 (\dst -> addr_code `snocOL` LD I16 dst addr))
1601
1602 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad mem _]) = do
1603     Amode addr addr_code <- getAmode mem
1604     return (Any I32 (\dst -> addr_code `snocOL` LA I16 dst addr))
1605
1606 getRegister (CmmMachOp mop [x]) -- unary MachOps
1607   = case mop of
1608       MO_Not rep   -> trivialUCode rep NOT x
1609
1610       MO_S_Conv F64 F32 -> trivialUCode F32 FRSP x
1611       MO_S_Conv F32 F64 -> conversionNop F64 x
1612
1613       MO_S_Conv from to
1614         | from == to         -> conversionNop to x
1615         | isFloatingRep from -> coerceFP2Int from to x
1616         | isFloatingRep to   -> coerceInt2FP from to x
1617
1618         -- narrowing is a nop: we treat the high bits as undefined
1619       MO_S_Conv I32 to -> conversionNop to x
1620       MO_S_Conv I16 I8 -> conversionNop I8 x
1621       MO_S_Conv I8 to -> trivialUCode to (EXTS I8) x
1622       MO_S_Conv I16 to -> trivialUCode to (EXTS I16) x
1623
1624       MO_U_Conv from to
1625         | from == to -> conversionNop to x
1626         -- narrowing is a nop: we treat the high bits as undefined
1627       MO_U_Conv I32 to -> conversionNop to x
1628       MO_U_Conv I16 I8 -> conversionNop I8 x
1629       MO_U_Conv I8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 I32))
1630       MO_U_Conv I16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 I32))
1631
1632       MO_S_Neg F32      -> trivialUCode F32 FNEG x
1633       MO_S_Neg F64      -> trivialUCode F64 FNEG x
1634       MO_S_Neg rep      -> trivialUCode rep NEG x
1635
1636     where
1637         conversionNop new_rep expr
1638             = do e_code <- getRegister expr
1639                  return (swizzleRegisterRep e_code new_rep)
1640
1641 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1642   = case mop of
1643       MO_Eq F32 -> condFltReg EQQ x y
1644       MO_Ne F32 -> condFltReg NE  x y
1645
1646       MO_S_Gt F32 -> condFltReg GTT x y
1647       MO_S_Ge F32 -> condFltReg GE  x y
1648       MO_S_Lt F32 -> condFltReg LTT x y
1649       MO_S_Le F32 -> condFltReg LE  x y
1650
1651       MO_Eq F64 -> condFltReg EQQ x y
1652       MO_Ne F64 -> condFltReg NE  x y
1653
1654       MO_S_Gt F64 -> condFltReg GTT x y
1655       MO_S_Ge F64 -> condFltReg GE  x y
1656       MO_S_Lt F64 -> condFltReg LTT x y
1657       MO_S_Le F64 -> condFltReg LE  x y
1658
1659       MO_Eq rep -> condIntReg EQQ  (extendUExpr rep x) (extendUExpr rep y)
1660       MO_Ne rep -> condIntReg NE   (extendUExpr rep x) (extendUExpr rep y)
1661
1662       MO_S_Gt rep -> condIntReg GTT  (extendSExpr rep x) (extendSExpr rep y)
1663       MO_S_Ge rep -> condIntReg GE   (extendSExpr rep x) (extendSExpr rep y)
1664       MO_S_Lt rep -> condIntReg LTT  (extendSExpr rep x) (extendSExpr rep y)
1665       MO_S_Le rep -> condIntReg LE   (extendSExpr rep x) (extendSExpr rep y)
1666
1667       MO_U_Gt rep -> condIntReg GU   (extendUExpr rep x) (extendUExpr rep y)
1668       MO_U_Ge rep -> condIntReg GEU  (extendUExpr rep x) (extendUExpr rep y)
1669       MO_U_Lt rep -> condIntReg LU   (extendUExpr rep x) (extendUExpr rep y)
1670       MO_U_Le rep -> condIntReg LEU  (extendUExpr rep x) (extendUExpr rep y)
1671
1672       MO_Add F32   -> trivialCodeNoImm F32 (FADD F32) x y
1673       MO_Sub F32   -> trivialCodeNoImm F32 (FSUB F32) x y
1674       MO_Mul F32   -> trivialCodeNoImm F32 (FMUL F32) x y
1675       MO_S_Quot F32   -> trivialCodeNoImm F32 (FDIV F32) x y
1676
1677       MO_Add F64   -> trivialCodeNoImm F64 (FADD F64) x y
1678       MO_Sub F64   -> trivialCodeNoImm F64 (FSUB F64) x y
1679       MO_Mul F64   -> trivialCodeNoImm F64 (FMUL F64) x y
1680       MO_S_Quot F64   -> trivialCodeNoImm F64 (FDIV F64) x y
1681
1682          -- optimize addition with 32-bit immediate
1683          -- (needed for PIC)
1684       MO_Add I32 ->
1685         case y of
1686           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate I32 True (-imm)
1687             -> trivialCode I32 True ADD x (CmmLit $ CmmInt imm immrep)
1688           CmmLit lit
1689             -> do
1690                 (src, srcCode) <- getSomeReg x
1691                 let imm = litToImm lit
1692                     code dst = srcCode `appOL` toOL [
1693                                     ADDIS dst src (HA imm),
1694                                     ADD dst dst (RIImm (LO imm))
1695                                 ]
1696                 return (Any I32 code)
1697           _ -> trivialCode I32 True ADD x y
1698
1699       MO_Add rep -> trivialCode rep True ADD x y
1700       MO_Sub rep ->
1701         case y of    -- subfi ('substract from' with immediate) doesn't exist
1702           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
1703             -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
1704           _ -> trivialCodeNoImm rep SUBF y x
1705
1706       MO_Mul rep -> trivialCode rep True MULLW x y
1707
1708       MO_S_MulMayOflo I32 -> trivialCodeNoImm I32 MULLW_MayOflo x y
1709
1710       MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= I32): not implemented"
1711       MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
1712
1713       MO_S_Quot rep -> trivialCodeNoImm rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1714       MO_U_Quot rep -> trivialCodeNoImm rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1715
1716       MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1717       MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1718
1719       MO_And rep   -> trivialCode rep False AND x y
1720       MO_Or rep    -> trivialCode rep False OR x y
1721       MO_Xor rep   -> trivialCode rep False XOR x y
1722
1723       MO_Shl rep   -> trivialCode rep False SLW x y
1724       MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
1725       MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
1726
1727 getRegister (CmmLit (CmmInt i rep))
1728   | Just imm <- makeImmediate rep True i
1729   = let
1730         code dst = unitOL (LI dst imm)
1731     in
1732         return (Any rep code)
1733
1734 getRegister (CmmLit (CmmFloat f frep)) = do
1735     lbl <- getNewLabelNat
1736     dynRef <- cmmMakeDynamicReference addImportNat False lbl
1737     Amode addr addr_code <- getAmode dynRef
1738     let code dst =
1739             LDATA ReadOnlyData  [CmmDataLabel lbl,
1740                                  CmmStaticLit (CmmFloat f frep)]
1741             `consOL` (addr_code `snocOL` LD frep dst addr)
1742     return (Any frep code)
1743
1744 getRegister (CmmLit lit)
1745   = let rep = cmmLitRep lit
1746         imm = litToImm lit
1747         code dst = toOL [
1748               LIS dst (HI imm),
1749               OR dst dst (RIImm (LO imm))
1750           ]
1751     in return (Any rep code)
1752
1753 getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
1754
1755     -- extend?Rep: wrap integer expression of type rep
1756     -- in a conversion to I32
1757 extendSExpr I32 x = x
1758 extendSExpr rep x = CmmMachOp (MO_S_Conv rep I32) [x]
1759 extendUExpr I32 x = x
1760 extendUExpr rep x = CmmMachOp (MO_U_Conv rep I32) [x]
1761
1762 #endif /* powerpc_TARGET_ARCH */
1763
1764
1765 -- -----------------------------------------------------------------------------
1766 --  The 'Amode' type: Memory addressing modes passed up the tree.
1767
1768 data Amode = Amode AddrMode InstrBlock
1769
1770 {-
1771 Now, given a tree (the argument to an CmmLoad) that references memory,
1772 produce a suitable addressing mode.
1773
1774 A Rule of the Game (tm) for Amodes: use of the addr bit must
1775 immediately follow use of the code part, since the code part puts
1776 values in registers which the addr then refers to.  So you can't put
1777 anything in between, lest it overwrite some of those registers.  If
1778 you need to do some other computation between the code part and use of
1779 the addr bit, first store the effective address from the amode in a
1780 temporary, then do the other computation, and then use the temporary:
1781
1782     code
1783     LEA amode, tmp
1784     ... other computation ...
1785     ... (tmp) ...
1786 -}
1787
1788 getAmode :: CmmExpr -> NatM Amode
1789 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
1790
1791 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1792
1793 #if alpha_TARGET_ARCH
1794
1795 getAmode (StPrim IntSubOp [x, StInt i])
1796   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1797     getRegister x               `thenNat` \ register ->
1798     let
1799         code = registerCode register tmp
1800         reg  = registerName register tmp
1801         off  = ImmInt (-(fromInteger i))
1802     in
1803     return (Amode (AddrRegImm reg off) code)
1804
1805 getAmode (StPrim IntAddOp [x, StInt i])
1806   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1807     getRegister x               `thenNat` \ register ->
1808     let
1809         code = registerCode register tmp
1810         reg  = registerName register tmp
1811         off  = ImmInt (fromInteger i)
1812     in
1813     return (Amode (AddrRegImm reg off) code)
1814
1815 getAmode leaf
1816   | isJust imm
1817   = return (Amode (AddrImm imm__2) id)
1818   where
1819     imm = maybeImm leaf
1820     imm__2 = case imm of Just x -> x
1821
1822 getAmode other
1823   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1824     getRegister other           `thenNat` \ register ->
1825     let
1826         code = registerCode register tmp
1827         reg  = registerName register tmp
1828     in
1829     return (Amode (AddrReg reg) code)
1830
1831 #endif /* alpha_TARGET_ARCH */
1832
1833 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1834
1835 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1836
1837 -- This is all just ridiculous, since it carefully undoes
1838 -- what mangleIndexTree has just done.
1839 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
1840   | not (is64BitLit lit)
1841   -- ASSERT(rep == I32)???
1842   = do (x_reg, x_code) <- getSomeReg x
1843        let off = ImmInt (-(fromInteger i))
1844        return (Amode (AddrBaseIndex (Just x_reg) Nothing off) x_code)
1845
1846 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
1847   | not (is64BitLit lit)
1848   -- ASSERT(rep == I32)???
1849   = do (x_reg, x_code) <- getSomeReg x
1850        let off = ImmInt (fromInteger i)
1851        return (Amode (AddrBaseIndex (Just x_reg) Nothing off) x_code)
1852
1853 -- Turn (lit1 << n  + lit2) into  (lit2 + lit1 << n) so it will be
1854 -- recognised by the next rule.
1855 getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1856                                   b@(CmmLit _)])
1857   = getAmode (CmmMachOp (MO_Add rep) [b,a])
1858
1859 getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
1860                                         [y, CmmLit (CmmInt shift _)]])
1861   | shift == 0 || shift == 1 || shift == 2 || shift == 3
1862   = do (x_reg, x_code) <- getNonClobberedReg x
1863         -- x must be in a temp, because it has to stay live over y_code
1864         -- we could compre x_reg and y_reg and do something better here...
1865        (y_reg, y_code) <- getSomeReg y
1866        let
1867            code = x_code `appOL` y_code
1868            base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
1869        return (Amode (AddrBaseIndex (Just x_reg) (Just (y_reg,base)) (ImmInt 0))
1870                code)
1871
1872 getAmode (CmmLit lit) | not (is64BitLit lit)
1873   = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1874
1875 getAmode expr = do
1876   (reg,code) <- getSomeReg expr
1877   return (Amode (AddrBaseIndex (Just reg) Nothing (ImmInt 0)) code)
1878
1879 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1880
1881 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1882
1883 #if sparc_TARGET_ARCH
1884
1885 getAmode (CmmMachOp MO_Nat_Sub [x, StInt i])
1886   | fits13Bits (-i)
1887   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1888     getRegister x               `thenNat` \ register ->
1889     let
1890         code = registerCode register tmp
1891         reg  = registerName register tmp
1892         off  = ImmInt (-(fromInteger i))
1893     in
1894     return (Amode (AddrRegImm reg off) code)
1895
1896
1897 getAmode (CmmMachOp MO_Nat_Add [x, StInt i])
1898   | fits13Bits i
1899   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1900     getRegister x               `thenNat` \ register ->
1901     let
1902         code = registerCode register tmp
1903         reg  = registerName register tmp
1904         off  = ImmInt (fromInteger i)
1905     in
1906     return (Amode (AddrRegImm reg off) code)
1907
1908 getAmode (CmmMachOp MO_Nat_Add [x, y])
1909   = getNewRegNat PtrRep         `thenNat` \ tmp1 ->
1910     getNewRegNat IntRep         `thenNat` \ tmp2 ->
1911     getRegister x               `thenNat` \ register1 ->
1912     getRegister y               `thenNat` \ register2 ->
1913     let
1914         code1 = registerCode register1 tmp1
1915         reg1  = registerName register1 tmp1
1916         code2 = registerCode register2 tmp2
1917         reg2  = registerName register2 tmp2
1918         code__2 = code1 `appOL` code2
1919     in
1920     return (Amode (AddrRegReg reg1 reg2) code__2)
1921
1922 getAmode leaf
1923   | isJust imm
1924   = getNewRegNat PtrRep             `thenNat` \ tmp ->
1925     let
1926         code = unitOL (SETHI (HI imm__2) tmp)
1927     in
1928     return (Amode (AddrRegImm tmp (LO imm__2)) code)
1929   where
1930     imm    = maybeImm leaf
1931     imm__2 = case imm of Just x -> x
1932
1933 getAmode other
1934   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1935     getRegister other           `thenNat` \ register ->
1936     let
1937         code = registerCode register tmp
1938         reg  = registerName register tmp
1939         off  = ImmInt 0
1940     in
1941     return (Amode (AddrRegImm reg off) code)
1942
1943 #endif /* sparc_TARGET_ARCH */
1944
1945 #ifdef powerpc_TARGET_ARCH
1946 getAmode (CmmMachOp (MO_Sub I32) [x, CmmLit (CmmInt i _)])
1947   | Just off <- makeImmediate I32 True (-i)
1948   = do
1949         (reg, code) <- getSomeReg x
1950         return (Amode (AddrRegImm reg off) code)
1951
1952
1953 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit (CmmInt i _)])
1954   | Just off <- makeImmediate I32 True i
1955   = do
1956         (reg, code) <- getSomeReg x
1957         return (Amode (AddrRegImm reg off) code)
1958
1959    -- optimize addition with 32-bit immediate
1960    -- (needed for PIC)
1961 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit lit])
1962   = do
1963         tmp <- getNewRegNat I32
1964         (src, srcCode) <- getSomeReg x
1965         let imm = litToImm lit
1966             code = srcCode `snocOL` ADDIS tmp src (HA imm)
1967         return (Amode (AddrRegImm tmp (LO imm)) code)
1968
1969 getAmode (CmmLit lit)
1970   = do
1971         tmp <- getNewRegNat I32
1972         let imm = litToImm lit
1973             code = unitOL (LIS tmp (HA imm))
1974         return (Amode (AddrRegImm tmp (LO imm)) code)
1975
1976 getAmode (CmmMachOp (MO_Add I32) [x, y])
1977   = do
1978         (regX, codeX) <- getSomeReg x
1979         (regY, codeY) <- getSomeReg y
1980         return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
1981
1982 getAmode other
1983   = do
1984         (reg, code) <- getSomeReg other
1985         let
1986             off  = ImmInt 0
1987         return (Amode (AddrRegImm reg off) code)
1988 #endif /* powerpc_TARGET_ARCH */
1989
1990 -- -----------------------------------------------------------------------------
1991 -- getOperand: sometimes any operand will do.
1992
1993 -- getNonClobberedOperand: the value of the operand will remain valid across
1994 -- the computation of an arbitrary expression, unless the expression
1995 -- is computed directly into a register which the operand refers to
1996 -- (see trivialCode where this function is used for an example).
1997
1998 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1999
2000 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2001 getNonClobberedOperand (CmmLit lit)
2002   | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) =
2003     return (OpImm (litToImm lit), nilOL)
2004 getNonClobberedOperand (CmmLoad mem pk)
2005   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2006     Amode src mem_code <- getAmode mem
2007     (src',save_code) <-
2008         if (amodeCouldBeClobbered src)
2009                 then do
2010                    tmp <- getNewRegNat wordRep
2011                    return (AddrBaseIndex (Just tmp) Nothing (ImmInt 0),
2012                            unitOL (LEA I32 (OpAddr src) (OpReg tmp)))
2013                 else
2014                    return (src, nilOL)
2015     return (OpAddr src', save_code `appOL` mem_code)
2016 getNonClobberedOperand e = do
2017     (reg, code) <- getNonClobberedReg e
2018     return (OpReg reg, code)
2019
2020 amodeCouldBeClobbered :: AddrMode -> Bool
2021 amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
2022
2023 regClobbered (RealReg rr) = isFastTrue (freeReg rr)
2024 regClobbered _ = False
2025
2026 -- getOperand: the operand is not required to remain valid across the
2027 -- computation of an arbitrary expression.
2028 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2029 getOperand (CmmLit lit)
2030   | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) =
2031     return (OpImm (litToImm lit), nilOL)
2032 getOperand (CmmLoad mem pk)
2033   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2034     Amode src mem_code <- getAmode mem
2035     return (OpAddr src, mem_code)
2036 getOperand e = do
2037     (reg, code) <- getNonClobberedReg e
2038     return (OpReg reg, code)
2039
2040 isOperand :: CmmExpr -> Bool
2041 isOperand (CmmLoad _ _) = True
2042 isOperand (CmmLit lit)  = not (is64BitLit lit) &&
2043                           not (isFloatingRep (cmmLitRep lit))
2044 isOperand _             = False
2045
2046 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
2047 getRegOrMem (CmmLoad mem pk)
2048   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2049     Amode src mem_code <- getAmode mem
2050     return (OpAddr src, mem_code)
2051 getRegOrMem e = do
2052     (reg, code) <- getNonClobberedReg e
2053     return (OpReg reg, code)
2054
2055 #if x86_64_TARGET_ARCH
2056 is64BitLit (CmmInt i I64) = i > 0x7fffffff || i < -0x80000000
2057    -- assume that labels are in the range 0-2^31-1: this assumes the
2058    -- small memory model (see gcc docs, -mcmodel=small).
2059 #endif
2060 is64BitLit x = False
2061 #endif
2062
2063 -- -----------------------------------------------------------------------------
2064 --  The 'CondCode' type:  Condition codes passed up the tree.
2065
2066 data CondCode = CondCode Bool Cond InstrBlock
2067
2068 -- Set up a condition code for a conditional branch.
2069
2070 getCondCode :: CmmExpr -> NatM CondCode
2071
2072 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2073
2074 #if alpha_TARGET_ARCH
2075 getCondCode = panic "MachCode.getCondCode: not on Alphas"
2076 #endif /* alpha_TARGET_ARCH */
2077
2078 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2079
2080 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
2081 -- yes, they really do seem to want exactly the same!
2082
2083 getCondCode (CmmMachOp mop [x, y])
2084   = ASSERT (cmmExprRep x /= I8) -- tmp, not set up to handle 8-bit comparisons
2085     case mop of
2086       MO_Eq F32 -> condFltCode EQQ x y
2087       MO_Ne F32 -> condFltCode NE  x y
2088
2089       MO_S_Gt F32 -> condFltCode GTT x y
2090       MO_S_Ge F32 -> condFltCode GE  x y
2091       MO_S_Lt F32 -> condFltCode LTT x y
2092       MO_S_Le F32 -> condFltCode LE  x y
2093
2094       MO_Eq F64 -> condFltCode EQQ x y
2095       MO_Ne F64 -> condFltCode NE  x y
2096
2097       MO_S_Gt F64 -> condFltCode GTT x y
2098       MO_S_Ge F64 -> condFltCode GE  x y
2099       MO_S_Lt F64 -> condFltCode LTT x y
2100       MO_S_Le F64 -> condFltCode LE  x y
2101
2102       MO_Eq rep -> condIntCode EQQ  x y
2103       MO_Ne rep -> condIntCode NE   x y
2104
2105       MO_S_Gt rep -> condIntCode GTT  x y
2106       MO_S_Ge rep -> condIntCode GE   x y
2107       MO_S_Lt rep -> condIntCode LTT  x y
2108       MO_S_Le rep -> condIntCode LE   x y
2109
2110       MO_U_Gt rep -> condIntCode GU   x y
2111       MO_U_Ge rep -> condIntCode GEU  x y
2112       MO_U_Lt rep -> condIntCode LU   x y
2113       MO_U_Le rep -> condIntCode LEU  x y
2114
2115       other -> pprPanic "getCondCode(x86,sparc)" (pprMachOp mop)
2116
2117 getCondCode other =  pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
2118
2119 #elif powerpc_TARGET_ARCH
2120
2121 -- almost the same as everywhere else - but we need to
2122 -- extend small integers to 32 bit first
2123
2124 getCondCode (CmmMachOp mop [x, y])
2125   = case mop of
2126       MO_Eq F32 -> condFltCode EQQ x y
2127       MO_Ne F32 -> condFltCode NE  x y
2128
2129       MO_S_Gt F32 -> condFltCode GTT x y
2130       MO_S_Ge F32 -> condFltCode GE  x y
2131       MO_S_Lt F32 -> condFltCode LTT x y
2132       MO_S_Le F32 -> condFltCode LE  x y
2133
2134       MO_Eq F64 -> condFltCode EQQ x y
2135       MO_Ne F64 -> condFltCode NE  x y
2136
2137       MO_S_Gt F64 -> condFltCode GTT x y
2138       MO_S_Ge F64 -> condFltCode GE  x y
2139       MO_S_Lt F64 -> condFltCode LTT x y
2140       MO_S_Le F64 -> condFltCode LE  x y
2141
2142       MO_Eq rep -> condIntCode EQQ  (extendUExpr rep x) (extendUExpr rep y)
2143       MO_Ne rep -> condIntCode NE   (extendUExpr rep x) (extendUExpr rep y)
2144
2145       MO_S_Gt rep -> condIntCode GTT  (extendSExpr rep x) (extendSExpr rep y)
2146       MO_S_Ge rep -> condIntCode GE   (extendSExpr rep x) (extendSExpr rep y)
2147       MO_S_Lt rep -> condIntCode LTT  (extendSExpr rep x) (extendSExpr rep y)
2148       MO_S_Le rep -> condIntCode LE   (extendSExpr rep x) (extendSExpr rep y)
2149
2150       MO_U_Gt rep -> condIntCode GU   (extendUExpr rep x) (extendUExpr rep y)
2151       MO_U_Ge rep -> condIntCode GEU  (extendUExpr rep x) (extendUExpr rep y)
2152       MO_U_Lt rep -> condIntCode LU   (extendUExpr rep x) (extendUExpr rep y)
2153       MO_U_Le rep -> condIntCode LEU  (extendUExpr rep x) (extendUExpr rep y)
2154
2155       other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
2156
2157 getCondCode other =  panic "getCondCode(2)(powerpc)"
2158
2159
2160 #endif
2161
2162
2163 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
2164 -- passed back up the tree.
2165
2166 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
2167
2168 #if alpha_TARGET_ARCH
2169 condIntCode = panic "MachCode.condIntCode: not on Alphas"
2170 condFltCode = panic "MachCode.condFltCode: not on Alphas"
2171 #endif /* alpha_TARGET_ARCH */
2172
2173 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2174 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2175
2176 -- memory vs immediate
2177 condIntCode cond (CmmLoad x pk) (CmmLit lit) | not (is64BitLit lit) = do
2178     Amode x_addr x_code <- getAmode x
2179     let
2180         imm  = litToImm lit
2181         code = x_code `snocOL`
2182                   CMP pk (OpImm imm) (OpAddr x_addr)
2183     --
2184     return (CondCode False cond code)
2185
2186 -- anything vs zero
2187 condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
2188     (x_reg, x_code) <- getSomeReg x
2189     let
2190         code = x_code `snocOL`
2191                   TEST pk (OpReg x_reg) (OpReg x_reg)
2192     --
2193     return (CondCode False cond code)
2194
2195 -- anything vs operand
2196 condIntCode cond x y | isOperand y = do
2197     (x_reg, x_code) <- getNonClobberedReg x
2198     (y_op,  y_code) <- getOperand y
2199     let
2200         code = x_code `appOL` y_code `snocOL`
2201                   CMP (cmmExprRep x) y_op (OpReg x_reg)
2202     -- in
2203     return (CondCode False cond code)
2204
2205 -- anything vs anything
2206 condIntCode cond x y = do
2207   (y_reg, y_code) <- getNonClobberedReg y
2208   (x_op, x_code) <- getRegOrMem x
2209   let
2210         code = y_code `appOL`
2211                x_code `snocOL`
2212                   CMP (cmmExprRep x) (OpReg y_reg) x_op
2213   -- in
2214   return (CondCode False cond code)
2215 #endif
2216
2217 #if i386_TARGET_ARCH
2218 condFltCode cond x y
2219   = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
2220   (x_reg, x_code) <- getNonClobberedReg x
2221   (y_reg, y_code) <- getSomeReg y
2222   let
2223         code = x_code `appOL` y_code `snocOL`
2224                 GCMP cond x_reg y_reg
2225   -- The GCMP insn does the test and sets the zero flag if comparable
2226   -- and true.  Hence we always supply EQQ as the condition to test.
2227   return (CondCode True EQQ code)
2228 #endif /* i386_TARGET_ARCH */
2229
2230 #if x86_64_TARGET_ARCH
2231 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
2232 -- an operand, but the right must be a reg.  We can probably do better
2233 -- than this general case...
2234 condFltCode cond x y = do
2235   (x_reg, x_code) <- getNonClobberedReg x
2236   (y_op, y_code) <- getOperand y
2237   let
2238         code = x_code `appOL`
2239                y_code `snocOL`
2240                   CMP (cmmExprRep x) y_op (OpReg x_reg)
2241   -- in
2242   return (CondCode False (condToUnsigned cond) code)
2243         -- we need to use the unsigned comparison operators on the
2244         -- result of this comparison.
2245 #endif
2246
2247 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2248
2249 #if sparc_TARGET_ARCH
2250
2251 condIntCode cond x (StInt y)
2252   | fits13Bits y
2253   = getRegister x               `thenNat` \ register ->
2254     getNewRegNat IntRep         `thenNat` \ tmp ->
2255     let
2256         code = registerCode register tmp
2257         src1 = registerName register tmp
2258         src2 = ImmInt (fromInteger y)
2259         code__2 = code `snocOL` SUB False True src1 (RIImm src2) g0
2260     in
2261     return (CondCode False cond code__2)
2262
2263 condIntCode cond x y
2264   = getRegister x               `thenNat` \ register1 ->
2265     getRegister y               `thenNat` \ register2 ->
2266     getNewRegNat IntRep         `thenNat` \ tmp1 ->
2267     getNewRegNat IntRep         `thenNat` \ tmp2 ->
2268     let
2269         code1 = registerCode register1 tmp1
2270         src1  = registerName register1 tmp1
2271         code2 = registerCode register2 tmp2
2272         src2  = registerName register2 tmp2
2273         code__2 = code1 `appOL` code2 `snocOL`
2274                   SUB False True src1 (RIReg src2) g0
2275     in
2276     return (CondCode False cond code__2)
2277
2278 -----------
2279 condFltCode cond x y
2280   = getRegister x               `thenNat` \ register1 ->
2281     getRegister y               `thenNat` \ register2 ->
2282     getNewRegNat (registerRep register1)
2283                                 `thenNat` \ tmp1 ->
2284     getNewRegNat (registerRep register2)
2285                                 `thenNat` \ tmp2 ->
2286     getNewRegNat F64    `thenNat` \ tmp ->
2287     let
2288         promote x = FxTOy F DF x tmp
2289
2290         pk1   = registerRep register1
2291         code1 = registerCode register1 tmp1
2292         src1  = registerName register1 tmp1
2293
2294         pk2   = registerRep register2
2295         code2 = registerCode register2 tmp2
2296         src2  = registerName register2 tmp2
2297
2298         code__2 =
2299                 if pk1 == pk2 then
2300                     code1 `appOL` code2 `snocOL`
2301                     FCMP True (primRepToSize pk1) src1 src2
2302                 else if pk1 == F32 then
2303                     code1 `snocOL` promote src1 `appOL` code2 `snocOL`
2304                     FCMP True DF tmp src2
2305                 else
2306                     code1 `appOL` code2 `snocOL` promote src2 `snocOL`
2307                     FCMP True DF src1 tmp
2308     in
2309     return (CondCode True cond code__2)
2310
2311 #endif /* sparc_TARGET_ARCH */
2312
2313 #if powerpc_TARGET_ARCH
2314 --  ###FIXME: I16 and I8!
2315 condIntCode cond x (CmmLit (CmmInt y rep))
2316   | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
2317   = do
2318         (src1, code) <- getSomeReg x
2319         let
2320             code' = code `snocOL`
2321                 (if condUnsigned cond then CMPL else CMP) I32 src1 (RIImm src2)
2322         return (CondCode False cond code')
2323
2324 condIntCode cond x y = do
2325     (src1, code1) <- getSomeReg x
2326     (src2, code2) <- getSomeReg y
2327     let
2328         code' = code1 `appOL` code2 `snocOL`
2329                   (if condUnsigned cond then CMPL else CMP) I32 src1 (RIReg src2)
2330     return (CondCode False cond code')
2331
2332 condFltCode cond x y = do
2333     (src1, code1) <- getSomeReg x
2334     (src2, code2) <- getSomeReg y
2335     let
2336         code'  = code1 `appOL` code2 `snocOL` FCMP src1 src2
2337         code'' = case cond of -- twiddle CR to handle unordered case
2338                     GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
2339                     LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
2340                     _ -> code'
2341                  where
2342                     ltbit = 0 ; eqbit = 2 ; gtbit = 1
2343     return (CondCode True cond code'')
2344
2345 #endif /* powerpc_TARGET_ARCH */
2346
2347 -- -----------------------------------------------------------------------------
2348 -- Generating assignments
2349
2350 -- Assignments are really at the heart of the whole code generation
2351 -- business.  Almost all top-level nodes of any real importance are
2352 -- assignments, which correspond to loads, stores, or register
2353 -- transfers.  If we're really lucky, some of the register transfers
2354 -- will go away, because we can use the destination register to
2355 -- complete the code generation for the right hand side.  This only
2356 -- fails when the right hand side is forced into a fixed register
2357 -- (e.g. the result of a call).
2358
2359 assignMem_IntCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2360 assignReg_IntCode :: MachRep -> CmmReg  -> CmmExpr -> NatM InstrBlock
2361
2362 assignMem_FltCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2363 assignReg_FltCode :: MachRep -> CmmReg  -> CmmExpr -> NatM InstrBlock
2364
2365 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2366
2367 #if alpha_TARGET_ARCH
2368
2369 assignIntCode pk (CmmLoad dst _) src
2370   = getNewRegNat IntRep             `thenNat` \ tmp ->
2371     getAmode dst                    `thenNat` \ amode ->
2372     getRegister src                 `thenNat` \ register ->
2373     let
2374         code1   = amodeCode amode []
2375         dst__2  = amodeAddr amode
2376         code2   = registerCode register tmp []
2377         src__2  = registerName register tmp
2378         sz      = primRepToSize pk
2379         code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2380     in
2381     return code__2
2382
2383 assignIntCode pk dst src
2384   = getRegister dst                         `thenNat` \ register1 ->
2385     getRegister src                         `thenNat` \ register2 ->
2386     let
2387         dst__2  = registerName register1 zeroh
2388         code    = registerCode register2 dst__2
2389         src__2  = registerName register2 dst__2
2390         code__2 = if isFixed register2
2391                   then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2)
2392                   else code
2393     in
2394     return code__2
2395
2396 #endif /* alpha_TARGET_ARCH */
2397
2398 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2399
2400 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2401
2402 -- integer assignment to memory
2403 assignMem_IntCode pk addr src = do
2404     Amode addr code_addr <- getAmode addr
2405     (code_src, op_src)   <- get_op_RI src
2406     let
2407         code = code_src `appOL`
2408                code_addr `snocOL`
2409                   MOV pk op_src (OpAddr addr)
2410         -- NOTE: op_src is stable, so it will still be valid
2411         -- after code_addr.  This may involve the introduction
2412         -- of an extra MOV to a temporary register, but we hope
2413         -- the register allocator will get rid of it.
2414     --
2415     return code
2416   where
2417     get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand)   -- code, operator
2418     get_op_RI (CmmLit lit) | not (is64BitLit lit)
2419       = return (nilOL, OpImm (litToImm lit))
2420     get_op_RI op
2421       = do (reg,code) <- getNonClobberedReg op
2422            return (code, OpReg reg)
2423
2424
2425 -- Assign; dst is a reg, rhs is mem
2426 assignReg_IntCode pk reg (CmmLoad src _) = do
2427   load_code <- intLoadCode (MOV pk) src
2428   return (load_code (getRegisterReg reg))
2429
2430 -- dst is a reg, but src could be anything
2431 assignReg_IntCode pk reg src = do
2432   code <- getAnyReg src
2433   return (code (getRegisterReg reg))
2434
2435 #endif /* i386_TARGET_ARCH */
2436
2437 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2438
2439 #if sparc_TARGET_ARCH
2440
2441 assignMem_IntCode pk addr src
2442   = getNewRegNat IntRep                     `thenNat` \ tmp ->
2443     getAmode addr                           `thenNat` \ amode ->
2444     getRegister src                         `thenNat` \ register ->
2445     let
2446         code1   = amodeCode amode
2447         dst__2  = amodeAddr amode
2448         code2   = registerCode register tmp
2449         src__2  = registerName register tmp
2450         sz      = primRepToSize pk
2451         code__2 = code1 `appOL` code2 `snocOL` ST sz src__2 dst__2
2452     in
2453     return code__2
2454
2455 assignReg_IntCode pk reg src
2456   = getRegister src                         `thenNat` \ register2 ->
2457     getRegisterReg reg                      `thenNat` \ register1 ->
2458     getNewRegNat IntRep                     `thenNat` \ tmp ->
2459     let
2460         dst__2  = registerName register1 tmp
2461         code    = registerCode register2 dst__2
2462         src__2  = registerName register2 dst__2
2463         code__2 = if isFixed register2
2464                   then code `snocOL` OR False g0 (RIReg src__2) dst__2
2465                   else code
2466     in
2467     return code__2
2468
2469 #endif /* sparc_TARGET_ARCH */
2470
2471 #if powerpc_TARGET_ARCH
2472
2473 assignMem_IntCode pk addr src = do
2474     (srcReg, code) <- getSomeReg src
2475     Amode dstAddr addr_code <- getAmode addr
2476     return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2477
2478 -- dst is a reg, but src could be anything
2479 assignReg_IntCode pk reg src
2480     = do
2481         r <- getRegister src
2482         return $ case r of
2483             Any _ code         -> code dst
2484             Fixed _ freg fcode -> fcode `snocOL` MR dst freg
2485     where
2486         dst = getRegisterReg reg
2487
2488 #endif /* powerpc_TARGET_ARCH */
2489
2490
2491 -- -----------------------------------------------------------------------------
2492 -- Floating-point assignments
2493
2494 #if alpha_TARGET_ARCH
2495
2496 assignFltCode pk (CmmLoad dst _) src
2497   = getNewRegNat pk                 `thenNat` \ tmp ->
2498     getAmode dst                    `thenNat` \ amode ->
2499     getRegister src                         `thenNat` \ register ->
2500     let
2501         code1   = amodeCode amode []
2502         dst__2  = amodeAddr amode
2503         code2   = registerCode register tmp []
2504         src__2  = registerName register tmp
2505         sz      = primRepToSize pk
2506         code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2507     in
2508     return code__2
2509
2510 assignFltCode pk dst src
2511   = getRegister dst                         `thenNat` \ register1 ->
2512     getRegister src                         `thenNat` \ register2 ->
2513     let
2514         dst__2  = registerName register1 zeroh
2515         code    = registerCode register2 dst__2
2516         src__2  = registerName register2 dst__2
2517         code__2 = if isFixed register2
2518                   then code . mkSeqInstr (FMOV src__2 dst__2)
2519                   else code
2520     in
2521     return code__2
2522
2523 #endif /* alpha_TARGET_ARCH */
2524
2525 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2526
2527 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2528
2529 -- Floating point assignment to memory
2530 assignMem_FltCode pk addr src = do
2531   (src_reg, src_code) <- getNonClobberedReg src
2532   Amode addr addr_code <- getAmode addr
2533   let
2534         code = src_code `appOL`
2535                addr_code `snocOL`
2536                 IF_ARCH_i386(GST pk src_reg addr,
2537                              MOV pk (OpReg src_reg) (OpAddr addr))
2538   return code
2539
2540 -- Floating point assignment to a register/temporary
2541 assignReg_FltCode pk reg src = do
2542   src_code <- getAnyReg src
2543   return (src_code (getRegisterReg reg))
2544
2545 #endif /* i386_TARGET_ARCH */
2546
2547 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2548
2549 #if sparc_TARGET_ARCH
2550
2551 -- Floating point assignment to memory
2552 assignMem_FltCode pk addr src
2553   = getNewRegNat pk                 `thenNat` \ tmp1 ->
2554     getAmode addr                   `thenNat` \ amode ->
2555     getRegister src                 `thenNat` \ register ->
2556     let
2557         sz      = primRepToSize pk
2558         dst__2  = amodeAddr amode
2559
2560         code1   = amodeCode amode
2561         code2   = registerCode register tmp1
2562
2563         src__2  = registerName register tmp1
2564         pk__2   = registerRep register
2565         sz__2   = primRepToSize pk__2
2566
2567         code__2 = code1 `appOL` code2 `appOL`
2568             if   pk == pk__2
2569             then unitOL (ST sz src__2 dst__2)
2570             else toOL [FxTOy sz__2 sz src__2 tmp1, ST sz tmp1 dst__2]
2571     in
2572     return code__2
2573
2574 -- Floating point assignment to a register/temporary
2575 -- Why is this so bizarrely ugly?
2576 assignReg_FltCode pk reg src
2577   = getRegisterReg reg                      `thenNat` \ register1 ->
2578     getRegister src                         `thenNat` \ register2 ->
2579     let
2580         pk__2   = registerRep register2
2581         sz__2   = primRepToSize pk__2
2582     in
2583     getNewRegNat pk__2                      `thenNat` \ tmp ->
2584     let
2585         sz      = primRepToSize pk
2586         dst__2  = registerName register1 g0    -- must be Fixed
2587         reg__2  = if pk /= pk__2 then tmp else dst__2
2588         code    = registerCode register2 reg__2
2589         src__2  = registerName register2 reg__2
2590         code__2 =
2591                 if pk /= pk__2 then
2592                      code `snocOL` FxTOy sz__2 sz src__2 dst__2
2593                 else if isFixed register2 then
2594                      code `snocOL` FMOV sz src__2 dst__2
2595                 else
2596                      code
2597     in
2598     return code__2
2599
2600 #endif /* sparc_TARGET_ARCH */
2601
2602 #if powerpc_TARGET_ARCH
2603
2604 -- Easy, isn't it?
2605 assignMem_FltCode = assignMem_IntCode
2606 assignReg_FltCode = assignReg_IntCode
2607
2608 #endif /* powerpc_TARGET_ARCH */
2609
2610
2611 -- -----------------------------------------------------------------------------
2612 -- Generating an non-local jump
2613
2614 -- (If applicable) Do not fill the delay slots here; you will confuse the
2615 -- register allocator.
2616
2617 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
2618
2619 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2620
2621 #if alpha_TARGET_ARCH
2622
2623 genJump (CmmLabel lbl)
2624   | isAsmTemp lbl = returnInstr (BR target)
2625   | otherwise     = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0]
2626   where
2627     target = ImmCLbl lbl
2628
2629 genJump tree
2630   = getRegister tree                `thenNat` \ register ->
2631     getNewRegNat PtrRep             `thenNat` \ tmp ->
2632     let
2633         dst    = registerName register pv
2634         code   = registerCode register pv
2635         target = registerName register pv
2636     in
2637     if isFixed register then
2638         returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0]
2639     else
2640     return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0))
2641
2642 #endif /* alpha_TARGET_ARCH */
2643
2644 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2645
2646 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2647
2648 genJump (CmmLoad mem pk) = do
2649   Amode target code <- getAmode mem
2650   return (code `snocOL` JMP (OpAddr target))
2651
2652 genJump (CmmLit lit) = do
2653   return (unitOL (JMP (OpImm (litToImm lit))))
2654
2655 genJump expr = do
2656   (reg,code) <- getSomeReg expr
2657   return (code `snocOL` JMP (OpReg reg))
2658
2659 #endif /* i386_TARGET_ARCH */
2660
2661 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2662
2663 #if sparc_TARGET_ARCH
2664
2665 genJump (CmmLabel lbl)
2666   = return (toOL [CALL (Left target) 0 True, NOP])
2667   where
2668     target = ImmCLbl lbl
2669
2670 genJump tree
2671   = getRegister tree                        `thenNat` \ register ->
2672     getNewRegNat PtrRep             `thenNat` \ tmp ->
2673     let
2674         code   = registerCode register tmp
2675         target = registerName register tmp
2676     in
2677     return (code `snocOL` JMP dsts (AddrRegReg target g0) `snocOL` NOP)
2678
2679 #endif /* sparc_TARGET_ARCH */
2680
2681 #if powerpc_TARGET_ARCH
2682 genJump (CmmLit (CmmLabel lbl))
2683   = return (unitOL $ JMP lbl)
2684
2685 genJump tree
2686   = do
2687         (target,code) <- getSomeReg tree
2688         return (code `snocOL` MTCTR target `snocOL` BCTR [])
2689 #endif /* powerpc_TARGET_ARCH */
2690
2691
2692 -- -----------------------------------------------------------------------------
2693 --  Unconditional branches
2694
2695 genBranch :: BlockId -> NatM InstrBlock
2696
2697 #if alpha_TARGET_ARCH
2698 genBranch id = return (unitOL (BR id))
2699 #endif
2700
2701 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2702 genBranch id = return (unitOL (JXX ALWAYS id))
2703 #endif
2704
2705 #if sparc_TARGET_ARCH
2706 genBranch id = return (toOL [BI ALWAYS False id, NOP])
2707 #endif
2708
2709 #if powerpc_TARGET_ARCH
2710 genBranch id = return (unitOL (BCC ALWAYS id))
2711 #endif
2712
2713
2714 -- -----------------------------------------------------------------------------
2715 --  Conditional jumps
2716
2717 {-
2718 Conditional jumps are always to local labels, so we can use branch
2719 instructions.  We peek at the arguments to decide what kind of
2720 comparison to do.
2721
2722 ALPHA: For comparisons with 0, we're laughing, because we can just do
2723 the desired conditional branch.
2724
2725 I386: First, we have to ensure that the condition
2726 codes are set according to the supplied comparison operation.
2727
2728 SPARC: First, we have to ensure that the condition codes are set
2729 according to the supplied comparison operation.  We generate slightly
2730 different code for floating point comparisons, because a floating
2731 point operation cannot directly precede a @BF@.  We assume the worst
2732 and fill that slot with a @NOP@.
2733
2734 SPARC: Do not fill the delay slots here; you will confuse the register
2735 allocator.
2736 -}
2737
2738
2739 genCondJump
2740     :: BlockId      -- the branch target
2741     -> CmmExpr      -- the condition on which to branch
2742     -> NatM InstrBlock
2743
2744 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2745
2746 #if alpha_TARGET_ARCH
2747
2748 genCondJump id (StPrim op [x, StInt 0])
2749   = getRegister x                           `thenNat` \ register ->
2750     getNewRegNat (registerRep register)
2751                                     `thenNat` \ tmp ->
2752     let
2753         code   = registerCode register tmp
2754         value  = registerName register tmp
2755         pk     = registerRep register
2756         target = ImmCLbl lbl
2757     in
2758     returnSeq code [BI (cmpOp op) value target]
2759   where
2760     cmpOp CharGtOp = GTT
2761     cmpOp CharGeOp = GE
2762     cmpOp CharEqOp = EQQ
2763     cmpOp CharNeOp = NE
2764     cmpOp CharLtOp = LTT
2765     cmpOp CharLeOp = LE
2766     cmpOp IntGtOp = GTT
2767     cmpOp IntGeOp = GE
2768     cmpOp IntEqOp = EQQ
2769     cmpOp IntNeOp = NE
2770     cmpOp IntLtOp = LTT
2771     cmpOp IntLeOp = LE
2772     cmpOp WordGtOp = NE
2773     cmpOp WordGeOp = ALWAYS
2774     cmpOp WordEqOp = EQQ
2775     cmpOp WordNeOp = NE
2776     cmpOp WordLtOp = NEVER
2777     cmpOp WordLeOp = EQQ
2778     cmpOp AddrGtOp = NE
2779     cmpOp AddrGeOp = ALWAYS
2780     cmpOp AddrEqOp = EQQ
2781     cmpOp AddrNeOp = NE
2782     cmpOp AddrLtOp = NEVER
2783     cmpOp AddrLeOp = EQQ
2784
2785 genCondJump lbl (StPrim op [x, StDouble 0.0])
2786   = getRegister x                           `thenNat` \ register ->
2787     getNewRegNat (registerRep register)
2788                                     `thenNat` \ tmp ->
2789     let
2790         code   = registerCode register tmp
2791         value  = registerName register tmp
2792         pk     = registerRep register
2793         target = ImmCLbl lbl
2794     in
2795     return (code . mkSeqInstr (BF (cmpOp op) value target))
2796   where
2797     cmpOp FloatGtOp = GTT
2798     cmpOp FloatGeOp = GE
2799     cmpOp FloatEqOp = EQQ
2800     cmpOp FloatNeOp = NE
2801     cmpOp FloatLtOp = LTT
2802     cmpOp FloatLeOp = LE
2803     cmpOp DoubleGtOp = GTT
2804     cmpOp DoubleGeOp = GE
2805     cmpOp DoubleEqOp = EQQ
2806     cmpOp DoubleNeOp = NE
2807     cmpOp DoubleLtOp = LTT
2808     cmpOp DoubleLeOp = LE
2809
2810 genCondJump lbl (StPrim op [x, y])
2811   | fltCmpOp op
2812   = trivialFCode pr instr x y       `thenNat` \ register ->
2813     getNewRegNat F64                `thenNat` \ tmp ->
2814     let
2815         code   = registerCode register tmp
2816         result = registerName register tmp
2817         target = ImmCLbl lbl
2818     in
2819     return (code . mkSeqInstr (BF cond result target))
2820   where
2821     pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
2822
2823     fltCmpOp op = case op of
2824         FloatGtOp -> True
2825         FloatGeOp -> True
2826         FloatEqOp -> True
2827         FloatNeOp -> True
2828         FloatLtOp -> True
2829         FloatLeOp -> True
2830         DoubleGtOp -> True
2831         DoubleGeOp -> True
2832         DoubleEqOp -> True
2833         DoubleNeOp -> True
2834         DoubleLtOp -> True
2835         DoubleLeOp -> True
2836         _ -> False
2837     (instr, cond) = case op of
2838         FloatGtOp -> (FCMP TF LE, EQQ)
2839         FloatGeOp -> (FCMP TF LTT, EQQ)
2840         FloatEqOp -> (FCMP TF EQQ, NE)
2841         FloatNeOp -> (FCMP TF EQQ, EQQ)
2842         FloatLtOp -> (FCMP TF LTT, NE)
2843         FloatLeOp -> (FCMP TF LE, NE)
2844         DoubleGtOp -> (FCMP TF LE, EQQ)
2845         DoubleGeOp -> (FCMP TF LTT, EQQ)
2846         DoubleEqOp -> (FCMP TF EQQ, NE)
2847         DoubleNeOp -> (FCMP TF EQQ, EQQ)
2848         DoubleLtOp -> (FCMP TF LTT, NE)
2849         DoubleLeOp -> (FCMP TF LE, NE)
2850
2851 genCondJump lbl (StPrim op [x, y])
2852   = trivialCode instr x y           `thenNat` \ register ->
2853     getNewRegNat IntRep             `thenNat` \ tmp ->
2854     let
2855         code   = registerCode register tmp
2856         result = registerName register tmp
2857         target = ImmCLbl lbl
2858     in
2859     return (code . mkSeqInstr (BI cond result target))
2860   where
2861     (instr, cond) = case op of
2862         CharGtOp -> (CMP LE, EQQ)
2863         CharGeOp -> (CMP LTT, EQQ)
2864         CharEqOp -> (CMP EQQ, NE)
2865         CharNeOp -> (CMP EQQ, EQQ)
2866         CharLtOp -> (CMP LTT, NE)
2867         CharLeOp -> (CMP LE, NE)
2868         IntGtOp -> (CMP LE, EQQ)
2869         IntGeOp -> (CMP LTT, EQQ)
2870         IntEqOp -> (CMP EQQ, NE)
2871         IntNeOp -> (CMP EQQ, EQQ)
2872         IntLtOp -> (CMP LTT, NE)
2873         IntLeOp -> (CMP LE, NE)
2874         WordGtOp -> (CMP ULE, EQQ)
2875         WordGeOp -> (CMP ULT, EQQ)
2876         WordEqOp -> (CMP EQQ, NE)
2877         WordNeOp -> (CMP EQQ, EQQ)
2878         WordLtOp -> (CMP ULT, NE)
2879         WordLeOp -> (CMP ULE, NE)
2880         AddrGtOp -> (CMP ULE, EQQ)
2881         AddrGeOp -> (CMP ULT, EQQ)
2882         AddrEqOp -> (CMP EQQ, NE)
2883         AddrNeOp -> (CMP EQQ, EQQ)
2884         AddrLtOp -> (CMP ULT, NE)
2885         AddrLeOp -> (CMP ULE, NE)
2886
2887 #endif /* alpha_TARGET_ARCH */
2888
2889 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2890
2891 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2892
2893 genCondJump id bool = do
2894   CondCode _ cond code <- getCondCode bool
2895   return (code `snocOL` JXX cond id)
2896
2897 #endif /* i386_TARGET_ARCH */
2898
2899
2900 #if sparc_TARGET_ARCH
2901
2902 genCondJump id bool = do
2903   CondCode is_float cond code <- getCondCode bool
2904   return (
2905        code `appOL`
2906        toOL (
2907          if   is_float
2908          then [NOP, BF cond False id, NOP]
2909          else [BI cond False id, NOP]
2910        )
2911     )
2912
2913 #endif /* sparc_TARGET_ARCH */
2914
2915
2916 #if powerpc_TARGET_ARCH
2917
2918 genCondJump id bool = do
2919   CondCode is_float cond code <- getCondCode bool
2920   return (code `snocOL` BCC cond id)
2921
2922 #endif /* powerpc_TARGET_ARCH */
2923
2924
2925 -- -----------------------------------------------------------------------------
2926 --  Generating C calls
2927
2928 -- Now the biggest nightmare---calls.  Most of the nastiness is buried in
2929 -- @get_arg@, which moves the arguments to the correct registers/stack
2930 -- locations.  Apart from that, the code is easy.
2931 --
2932 -- (If applicable) Do not fill the delay slots here; you will confuse the
2933 -- register allocator.
2934
2935 genCCall
2936     :: CmmCallTarget            -- function to call
2937     -> [(CmmReg,MachHint)]      -- where to put the result
2938     -> [(CmmExpr,MachHint)]     -- arguments (of mixed type)
2939     -> Maybe [GlobalReg]        -- volatile regs to save
2940     -> NatM InstrBlock
2941
2942 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2943
2944 #if alpha_TARGET_ARCH
2945
2946 ccallResultRegs =
2947
2948 genCCall fn cconv result_regs args
2949   = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args
2950                           `thenNat` \ ((unused,_), argCode) ->
2951     let
2952         nRegs = length allArgRegs - length unused
2953         code = asmSeqThen (map ($ []) argCode)
2954     in
2955         returnSeq code [
2956             LDA pv (AddrImm (ImmLab (ptext fn))),
2957             JSR ra (AddrReg pv) nRegs,
2958             LDGP gp (AddrReg ra)]
2959   where
2960     ------------------------
2961     {-  Try to get a value into a specific register (or registers) for
2962         a call.  The first 6 arguments go into the appropriate
2963         argument register (separate registers for integer and floating
2964         point arguments, but used in lock-step), and the remaining
2965         arguments are dumped to the stack, beginning at 0(sp).  Our
2966         first argument is a pair of the list of remaining argument
2967         registers to be assigned for this call and the next stack
2968         offset to use for overflowing arguments.  This way,
2969         @get_Arg@ can be applied to all of a call's arguments using
2970         @mapAccumLNat@.
2971     -}
2972     get_arg
2973         :: ([(Reg,Reg)], Int)   -- Argument registers and stack offset (accumulator)
2974         -> StixTree             -- Current argument
2975         -> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code
2976
2977     -- We have to use up all of our argument registers first...
2978
2979     get_arg ((iDst,fDst):dsts, offset) arg
2980       = getRegister arg                     `thenNat` \ register ->
2981         let
2982             reg  = if isFloatingRep pk then fDst else iDst
2983             code = registerCode register reg
2984             src  = registerName register reg
2985             pk   = registerRep register
2986         in
2987         return (
2988             if isFloatingRep pk then
2989                 ((dsts, offset), if isFixed register then
2990                     code . mkSeqInstr (FMOV src fDst)
2991                     else code)
2992             else
2993                 ((dsts, offset), if isFixed register then
2994                     code . mkSeqInstr (OR src (RIReg src) iDst)
2995                     else code))
2996
2997     -- Once we have run out of argument registers, we move to the
2998     -- stack...
2999
3000     get_arg ([], offset) arg
3001       = getRegister arg                 `thenNat` \ register ->
3002         getNewRegNat (registerRep register)
3003                                         `thenNat` \ tmp ->
3004         let
3005             code = registerCode register tmp
3006             src  = registerName register tmp
3007             pk   = registerRep register
3008             sz   = primRepToSize pk
3009         in
3010         return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset)))
3011
3012 #endif /* alpha_TARGET_ARCH */
3013
3014 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3015
3016 #if i386_TARGET_ARCH
3017
3018 -- we only cope with a single result for foreign calls
3019 genCCall (CmmPrim op) [(r,_)] args vols = do
3020   case op of
3021         MO_F32_Sqrt -> actuallyInlineFloatOp F32  (GSQRT F32) args
3022         MO_F64_Sqrt -> actuallyInlineFloatOp F64 (GSQRT F64) args
3023
3024         MO_F32_Sin  -> actuallyInlineFloatOp F32  (GSIN F32) args
3025         MO_F64_Sin  -> actuallyInlineFloatOp F64 (GSIN F64) args
3026
3027         MO_F32_Cos  -> actuallyInlineFloatOp F32  (GCOS F32) args
3028         MO_F64_Cos  -> actuallyInlineFloatOp F64 (GCOS F64) args
3029
3030         MO_F32_Tan  -> actuallyInlineFloatOp F32  (GTAN F32) args
3031         MO_F64_Tan  -> actuallyInlineFloatOp F64 (GTAN F64) args
3032
3033         other_op    -> outOfLineFloatOp op r args vols
3034  where
3035   actuallyInlineFloatOp rep instr [(x,_)]
3036         = do res <- trivialUFCode rep instr x
3037              any <- anyReg res
3038              return (any (getRegisterReg r))
3039
3040 genCCall target dest_regs args vols = do
3041     sizes_n_codes <- mapM push_arg (reverse args)
3042     delta <- getDeltaNat
3043     let
3044         (sizes, push_codes) = unzip sizes_n_codes
3045         tot_arg_size        = sum sizes
3046     -- in
3047     -- deal with static vs dynamic call targets
3048     (callinsns,cconv) <-
3049       case target of
3050         -- CmmPrim -> ...
3051         CmmForeignCall (CmmLit (CmmLabel lbl)) conv
3052            -> -- ToDo: stdcall arg sizes
3053               return (unitOL (CALL (Left fn_imm)), conv)
3054            where fn_imm = ImmCLbl lbl
3055         CmmForeignCall expr conv
3056            -> do (dyn_c, dyn_r, dyn_rep) <- get_op expr
3057                  ASSERT(dyn_rep == I32)
3058                   return (dyn_c `snocOL` CALL (Right dyn_r), conv)
3059
3060     let push_code = concatOL push_codes
3061         call = callinsns `appOL`
3062                toOL (
3063                         -- Deallocate parameters after call for ccall;
3064                         -- but not for stdcall (callee does it)
3065                   (if cconv == StdCallConv || tot_arg_size==0 then [] else
3066                    [ADD I32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
3067                   ++
3068                   [DELTA (delta + tot_arg_size)]
3069                )
3070     -- in
3071     setDeltaNat (delta + tot_arg_size)
3072
3073     let
3074         -- assign the results, if necessary
3075         assign_code []     = nilOL
3076         assign_code [(dest,_hint)] =
3077           case rep of
3078                 I64 -> toOL [MOV I32 (OpReg eax) (OpReg r_dest),
3079                              MOV I32 (OpReg edx) (OpReg r_dest_hi)]
3080                 F32 -> unitOL (GMOV fake0 r_dest)
3081                 F64 -> unitOL (GMOV fake0 r_dest)
3082                 rep -> unitOL (MOV rep (OpReg eax) (OpReg r_dest))
3083           where
3084                 r_dest_hi = getHiVRegFromLo r_dest
3085                 rep = cmmRegRep dest
3086                 r_dest = getRegisterReg dest
3087         assign_code many = panic "genCCall.assign_code many"
3088
3089     return (push_code `appOL`
3090             call `appOL`
3091             assign_code dest_regs)
3092
3093   where
3094     arg_size F64 = 8
3095     arg_size F32 = 4
3096     arg_size _   = 4
3097
3098     push_arg :: (CmmExpr,MachHint){-current argument-}
3099                     -> NatM (Int, InstrBlock)  -- argsz, code
3100
3101     push_arg (arg,_hint) -- we don't need the hints on x86
3102       | arg_rep == I64 = do
3103         ChildCode64 code r_lo <- iselExpr64 arg
3104         delta <- getDeltaNat
3105         setDeltaNat (delta - 8)
3106         let
3107             r_hi = getHiVRegFromLo r_lo
3108         -- in
3109         return (8,     code `appOL`
3110                        toOL [PUSH I32 (OpReg r_hi), DELTA (delta - 4),
3111                              PUSH I32 (OpReg r_lo), DELTA (delta - 8),
3112                              DELTA (delta-8)]
3113             )
3114
3115       | otherwise = do
3116         (code, reg, sz) <- get_op arg
3117         delta <- getDeltaNat
3118         let size = arg_size sz
3119         setDeltaNat (delta-size)
3120         if (case sz of F64 -> True; F32 -> True; _ -> False)
3121            then return (size,
3122                         code `appOL`
3123                         toOL [SUB I32 (OpImm (ImmInt size)) (OpReg esp),
3124                               DELTA (delta-size),
3125                               GST sz reg (AddrBaseIndex (Just esp)
3126                                                         Nothing
3127                                                         (ImmInt 0))]
3128                        )
3129            else return (size,
3130                         code `snocOL`
3131                         PUSH I32 (OpReg reg) `snocOL`
3132                         DELTA (delta-size)
3133                        )
3134       where
3135          arg_rep = cmmExprRep arg
3136
3137     ------------
3138     get_op :: CmmExpr -> NatM (InstrBlock, Reg, MachRep) -- code, reg, size
3139     get_op op = do
3140         (reg,code) <- getSomeReg op
3141         return (code, reg, cmmExprRep op)
3142
3143 #endif /* i386_TARGET_ARCH */
3144
3145 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3146
3147 outOfLineFloatOp :: CallishMachOp -> CmmReg -> [(CmmExpr,MachHint)]
3148   -> Maybe [GlobalReg] -> NatM InstrBlock
3149 outOfLineFloatOp mop res args vols
3150   | cmmRegRep res == F64
3151   = stmtToInstrs (CmmCall target [(res,FloatHint)] args vols)
3152
3153   | otherwise
3154   = do uq <- getUniqueNat
3155        let
3156          tmp = CmmLocal (LocalReg uq F64)
3157        -- in
3158        code1 <- stmtToInstrs (CmmCall target [(tmp,FloatHint)] (map promote args) vols)
3159        code2 <- stmtToInstrs (CmmAssign res (demote (CmmReg tmp)))
3160        return (code1 `appOL` code2)
3161   where
3162         promote (x,hint) = (CmmMachOp (MO_S_Conv F32 F64) [x], hint)
3163         demote  x = CmmMachOp (MO_S_Conv F64 F32) [x]
3164
3165         target = CmmForeignCall (CmmLit lbl) CCallConv
3166         lbl = CmmLabel (mkForeignLabel fn Nothing False)
3167
3168         fn = case mop of
3169               MO_F32_Sqrt  -> FSLIT("sqrt")
3170               MO_F32_Sin   -> FSLIT("sin")
3171               MO_F32_Cos   -> FSLIT("cos")
3172               MO_F32_Tan   -> FSLIT("tan")
3173               MO_F32_Exp   -> FSLIT("exp")
3174               MO_F32_Log   -> FSLIT("log")
3175
3176               MO_F32_Asin  -> FSLIT("asin")
3177               MO_F32_Acos  -> FSLIT("acos")
3178               MO_F32_Atan  -> FSLIT("atan")
3179
3180               MO_F32_Sinh  -> FSLIT("sinh")
3181               MO_F32_Cosh  -> FSLIT("cosh")
3182               MO_F32_Tanh  -> FSLIT("tanh")
3183               MO_F32_Pwr   -> FSLIT("pow")
3184
3185               MO_F64_Sqrt  -> FSLIT("sqrt")
3186               MO_F64_Sin   -> FSLIT("sin")
3187               MO_F64_Cos   -> FSLIT("cos")
3188               MO_F64_Tan   -> FSLIT("tan")
3189               MO_F64_Exp   -> FSLIT("exp")
3190               MO_F64_Log   -> FSLIT("log")
3191
3192               MO_F64_Asin  -> FSLIT("asin")
3193               MO_F64_Acos  -> FSLIT("acos")
3194               MO_F64_Atan  -> FSLIT("atan")
3195
3196               MO_F64_Sinh  -> FSLIT("sinh")
3197               MO_F64_Cosh  -> FSLIT("cosh")
3198               MO_F64_Tanh  -> FSLIT("tanh")
3199               MO_F64_Pwr   -> FSLIT("pow")
3200
3201               other -> pprPanic "outOfLineFloatOp" (pprCallishMachOp mop)
3202
3203 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
3204
3205 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3206
3207 #if x86_64_TARGET_ARCH
3208
3209 genCCall (CmmPrim op) [(r,_)] args vols =
3210   outOfLineFloatOp op r args vols
3211
3212 genCCall target dest_regs args vols = do
3213
3214         -- load up the register arguments
3215     (stack_args, sse_regs, load_args_code)
3216          <- load_args args allArgRegs allFPArgRegs 0 nilOL
3217
3218     let
3219         tot_arg_size = arg_size * length stack_args
3220
3221         -- On entry to the called function, %rsp should be aligned
3222         -- on a 16-byte boundary +8 (i.e. the first stack arg after
3223         -- the return address is 16-byte aligned).  In STG land
3224         -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
3225         -- need to make sure we push a multiple of 16-bytes of args,
3226         -- plus the return address, to get the correct alignment.
3227         -- Urg, this is hard.  We need to feed the delta back into
3228         -- the arg pushing code.
3229     (real_size, adjust_rsp) <-
3230         if tot_arg_size `rem` 16 == 0
3231             then return (tot_arg_size, nilOL)
3232             else do -- we need to adjust...
3233                 delta <- getDeltaNat
3234                 setDeltaNat (delta-8)
3235                 return (tot_arg_size+8, toOL [
3236                                 SUB I64 (OpImm (ImmInt 8)) (OpReg rsp),
3237                                 DELTA (delta-8)
3238                         ])
3239
3240         -- push the stack args, right to left
3241     push_code <- push_args (reverse stack_args) nilOL
3242     delta <- getDeltaNat
3243
3244     -- deal with static vs dynamic call targets
3245     (callinsns,cconv) <-
3246       case target of
3247         -- CmmPrim -> ...
3248         CmmForeignCall (CmmLit (CmmLabel lbl)) conv
3249            -> -- ToDo: stdcall arg sizes
3250               return (unitOL (CALL (Left fn_imm)), conv)
3251            where fn_imm = ImmCLbl lbl
3252         CmmForeignCall expr conv
3253            -> do (dyn_r, dyn_c) <- getSomeReg expr
3254                  return (dyn_c `snocOL` CALL (Right dyn_r), conv)
3255
3256     let
3257         -- The x86_64 ABI requires us to set %al to the number of SSE
3258         -- registers that contain arguments, if the called routine
3259         -- is a varargs function.  We don't know whether it's a
3260         -- varargs function or not, so we have to assume it is.
3261         --
3262         -- It's not safe to omit this assignment, even if the number
3263         -- of SSE regs in use is zero.  If %al is larger than 8
3264         -- on entry to a varargs function, seg faults ensue.
3265         assign_eax n = unitOL (MOV I32 (OpImm (ImmInt n)) (OpReg eax))
3266
3267     let call = callinsns `appOL`
3268                toOL (
3269                         -- Deallocate parameters after call for ccall;
3270                         -- but not for stdcall (callee does it)
3271                   (if cconv == StdCallConv || real_size==0 then [] else
3272                    [ADD wordRep (OpImm (ImmInt real_size)) (OpReg esp)])
3273                   ++
3274                   [DELTA (delta + real_size)]
3275                )
3276     -- in
3277     setDeltaNat (delta + real_size)
3278
3279     let
3280         -- assign the results, if necessary
3281         assign_code []     = nilOL
3282         assign_code [(dest,_hint)] =
3283           case rep of
3284                 F32 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3285                 F64 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3286                 rep -> unitOL (MOV rep (OpReg rax) (OpReg r_dest))
3287           where
3288                 rep = cmmRegRep dest
3289                 r_dest = getRegisterReg dest
3290         assign_code many = panic "genCCall.assign_code many"
3291
3292     return (load_args_code      `appOL`
3293             adjust_rsp          `appOL`
3294             push_code           `appOL`
3295             assign_eax sse_regs `appOL`
3296             call                `appOL`
3297             assign_code dest_regs)
3298
3299   where
3300     arg_size = 8 -- always, at the mo
3301
3302     load_args :: [(CmmExpr,MachHint)]
3303               -> [Reg]                  -- int regs avail for args
3304               -> [Reg]                  -- FP regs avail for args
3305               -> Int -> InstrBlock
3306               -> NatM ([(CmmExpr,MachHint)],Int,InstrBlock)
3307     load_args args [] [] sse_regs code = return (args, sse_regs, code)
3308         -- no more regs to use
3309     load_args [] aregs fregs sse_regs code = return ([],sse_regs,code)
3310         -- no more args to push
3311     load_args ((arg,hint) : rest) aregs fregs sse_regs code
3312         | isFloatingRep arg_rep =
3313         case fregs of
3314           [] -> push_this_arg
3315           (r:rs) -> do
3316              arg_code <- getAnyReg arg
3317              load_args rest aregs rs (sse_regs+1) (code `appOL` arg_code r)
3318         | otherwise =
3319         case aregs of
3320           [] -> push_this_arg
3321           (r:rs) -> do
3322              arg_code <- getAnyReg arg
3323              load_args rest rs fregs sse_regs (code `appOL` arg_code r)
3324         where
3325           arg_rep = cmmExprRep arg
3326
3327           push_this_arg = do
3328             (args',sse',code') <- load_args rest aregs fregs sse_regs code
3329             return ((arg,hint):args', sse', code')
3330
3331     push_args [] code = return code
3332     push_args ((arg,hint):rest) code
3333        | isFloatingRep arg_rep = do
3334          (arg_reg, arg_code) <- getSomeReg arg
3335          delta <- getDeltaNat
3336          setDeltaNat (delta-arg_size)
3337          let code' = code `appOL` toOL [
3338                         MOV arg_rep (OpReg arg_reg) (OpAddr  (spRel 0)),
3339                         SUB wordRep (OpImm (ImmInt arg_size)) (OpReg rsp) ,
3340                         DELTA (delta-arg_size)]
3341          push_args rest code'
3342
3343        | otherwise = do
3344        -- we only ever generate word-sized function arguments.  Promotion
3345        -- has already happened: our Int8# type is kept sign-extended
3346        -- in an Int#, for example.
3347          ASSERT(arg_rep == I64) return ()
3348          (arg_op, arg_code) <- getOperand arg
3349          delta <- getDeltaNat
3350          setDeltaNat (delta-arg_size)
3351          let code' = code `appOL` toOL [PUSH I64 arg_op,
3352                                         DELTA (delta-arg_size)]
3353          push_args rest code'
3354         where
3355           arg_rep = cmmExprRep arg
3356 #endif
3357
3358 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3359
3360 #if sparc_TARGET_ARCH
3361 {-
3362    The SPARC calling convention is an absolute
3363    nightmare.  The first 6x32 bits of arguments are mapped into
3364    %o0 through %o5, and the remaining arguments are dumped to the
3365    stack, beginning at [%sp+92].  (Note that %o6 == %sp.)
3366
3367    If we have to put args on the stack, move %o6==%sp down by
3368    the number of words to go on the stack, to ensure there's enough space.
3369
3370    According to Fraser and Hanson's lcc book, page 478, fig 17.2,
3371    16 words above the stack pointer is a word for the address of
3372    a structure return value.  I use this as a temporary location
3373    for moving values from float to int regs.  Certainly it isn't
3374    safe to put anything in the 16 words starting at %sp, since
3375    this area can get trashed at any time due to window overflows
3376    caused by signal handlers.
3377
3378    A final complication (if the above isn't enough) is that
3379    we can't blithely calculate the arguments one by one into
3380    %o0 .. %o5.  Consider the following nested calls:
3381
3382        fff a (fff b c)
3383
3384    Naive code moves a into %o0, and (fff b c) into %o1.  Unfortunately
3385    the inner call will itself use %o0, which trashes the value put there
3386    in preparation for the outer call.  Upshot: we need to calculate the
3387    args into temporary regs, and move those to arg regs or onto the
3388    stack only immediately prior to the call proper.  Sigh.
3389 -}
3390
3391 genCCall fn cconv kind args
3392   = mapNat arg_to_int_vregs args `thenNat` \ argcode_and_vregs ->
3393     let
3394         (argcodes, vregss) = unzip argcode_and_vregs
3395         n_argRegs          = length allArgRegs
3396         n_argRegs_used     = min (length vregs) n_argRegs
3397         vregs              = concat vregss
3398     in
3399     -- deal with static vs dynamic call targets
3400     (case fn of
3401         Left t_static
3402            -> return (unitOL (CALL (Left fn__2) n_argRegs_used False))
3403         Right dyn
3404            -> arg_to_int_vregs dyn `thenNat` \ (dyn_c, [dyn_r]) ->
3405               return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3406     )
3407                                 `thenNat` \ callinsns ->
3408     let
3409         argcode = concatOL argcodes
3410         (move_sp_down, move_sp_up)
3411            = let diff = length vregs - n_argRegs
3412                  nn   = if odd diff then diff + 1 else diff -- keep 8-byte alignment
3413              in  if   nn <= 0
3414                  then (nilOL, nilOL)
3415                  else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
3416         transfer_code
3417            = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
3418     in
3419         return (argcode       `appOL`
3420                    move_sp_down  `appOL`
3421                    transfer_code `appOL`
3422                    callinsns     `appOL`
3423                    unitOL NOP    `appOL`
3424                    move_sp_up)
3425   where
3426      -- function names that begin with '.' are assumed to be special
3427      -- internally generated names like '.mul,' which don't get an
3428      -- underscore prefix
3429      -- ToDo:needed (WDP 96/03) ???
3430      fn_static = unLeft fn
3431      fn__2 = case (headFS fn_static) of
3432                 '.' -> ImmLit (ftext fn_static)
3433                 _   -> ImmCLbl (mkForeignLabel fn_static False)
3434
3435      -- move args from the integer vregs into which they have been
3436      -- marshalled, into %o0 .. %o5, and the rest onto the stack.
3437      move_final :: [Reg] -> [Reg] -> Int -> [Instr]
3438
3439      move_final [] _ offset          -- all args done
3440         = []
3441
3442      move_final (v:vs) [] offset     -- out of aregs; move to stack
3443         = ST W v (spRel offset)
3444           : move_final vs [] (offset+1)
3445
3446      move_final (v:vs) (a:az) offset -- move into an arg (%o[0..5]) reg
3447         = OR False g0 (RIReg v) a
3448           : move_final vs az offset
3449
3450      -- generate code to calculate an argument, and move it into one
3451      -- or two integer vregs.
3452      arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
3453      arg_to_int_vregs arg
3454         | is64BitRep (repOfCmmExpr arg)
3455         = iselExpr64 arg                `thenNat` \ (ChildCode64 code vr_lo) ->
3456           let r_lo = VirtualRegI vr_lo
3457               r_hi = getHiVRegFromLo r_lo
3458           in  return (code, [r_hi, r_lo])
3459         | otherwise
3460         = getRegister arg                     `thenNat` \ register ->
3461           getNewRegNat (registerRep register) `thenNat` \ tmp ->
3462           let code = registerCode register tmp
3463               src  = registerName register tmp
3464               pk   = registerRep register
3465           in
3466           -- the value is in src.  Get it into 1 or 2 int vregs.
3467           case pk of
3468              F64 ->
3469                 getNewRegNat WordRep  `thenNat` \ v1 ->
3470                 getNewRegNat WordRep  `thenNat` \ v2 ->
3471                 return (
3472                    code                          `snocOL`
3473                    FMOV DF src f0                `snocOL`
3474                    ST   F  f0 (spRel 16)         `snocOL`
3475                    LD   W  (spRel 16) v1         `snocOL`
3476                    ST   F  (fPair f0) (spRel 16) `snocOL`
3477                    LD   W  (spRel 16) v2
3478                    ,
3479                    [v1,v2]
3480                 )
3481              F32 ->
3482                 getNewRegNat WordRep  `thenNat` \ v1 ->
3483                 return (
3484                    code                    `snocOL`
3485                    ST   F  src (spRel 16)  `snocOL`
3486                    LD   W  (spRel 16) v1
3487                    ,
3488                    [v1]
3489                 )
3490              other ->
3491                 getNewRegNat WordRep  `thenNat` \ v1 ->
3492                 return (
3493                    code `snocOL` OR False g0 (RIReg src) v1
3494                    ,
3495                    [v1]
3496                 )
3497 #endif /* sparc_TARGET_ARCH */
3498
3499 #if powerpc_TARGET_ARCH
3500
3501 #if darwin_TARGET_OS || linux_TARGET_OS
3502 {-
3503     The PowerPC calling convention for Darwin/Mac OS X
3504     is described in Apple's document
3505     "Inside Mac OS X - Mach-O Runtime Architecture".
3506
3507     PowerPC Linux uses the System V Release 4 Calling Convention
3508     for PowerPC. It is described in the
3509     "System V Application Binary Interface PowerPC Processor Supplement".
3510
3511     Both conventions are similar:
3512     Parameters may be passed in general-purpose registers starting at r3, in
3513     floating point registers starting at f1, or on the stack.
3514
3515     But there are substantial differences:
3516     * The number of registers used for parameter passing and the exact set of
3517       nonvolatile registers differs (see MachRegs.lhs).
3518     * On Darwin, stack space is always reserved for parameters, even if they are
3519       passed in registers. The called routine may choose to save parameters from
3520       registers to the corresponding space on the stack.
3521     * On Darwin, a corresponding amount of GPRs is skipped when a floating point
3522       parameter is passed in an FPR.
3523     * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
3524       starting with an odd-numbered GPR. It may skip a GPR to achieve this.
3525       Darwin just treats an I64 like two separate I32s (high word first).
3526     * I64 and F64 arguments are 8-byte aligned on the stack for SysV, but only
3527       4-byte aligned like everything else on Darwin.
3528     * The SysV spec claims that F32 is represented as F64 on the stack. GCC on
3529       PowerPC Linux does not agree, so neither do we.
3530
3531     According to both conventions, The parameter area should be part of the
3532     caller's stack frame, allocated in the caller's prologue code (large enough
3533     to hold the parameter lists for all called routines). The NCG already
3534     uses the stack for register spilling, leaving 64 bytes free at the top.
3535     If we need a larger parameter area than that, we just allocate a new stack
3536     frame just before ccalling.
3537 -}
3538
3539 genCCall target dest_regs argsAndHints vols
3540   = ASSERT (not $ any (`elem` [I8,I16]) argReps)
3541         -- we rely on argument promotion in the codeGen
3542     do
3543         (finalStack,passArgumentsCode,usedRegs) <- passArguments
3544                                                         (zip args argReps)
3545                                                         allArgRegs allFPArgRegs
3546                                                         initialStackOffset
3547                                                         (toOL []) []
3548
3549         (labelOrExpr, reduceToF32) <- case target of
3550             CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
3551             CmmForeignCall expr conv -> return  (Right expr, False)
3552             CmmPrim mop -> outOfLineFloatOp mop
3553
3554         let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
3555             codeAfter = move_sp_up finalStack `appOL` moveResult reduceToF32
3556
3557         case labelOrExpr of
3558             Left lbl -> do
3559                 return (         codeBefore
3560                         `snocOL` BL lbl usedRegs
3561                         `appOL`  codeAfter)
3562             Right dyn -> do
3563                 (dynReg, dynCode) <- getSomeReg dyn
3564                 return (         dynCode
3565                         `snocOL` MTCTR dynReg
3566                         `appOL`  codeBefore
3567                         `snocOL` BCTRL usedRegs
3568                         `appOL`  codeAfter)
3569     where
3570 #if darwin_TARGET_OS
3571         initialStackOffset = 24
3572             -- size of linkage area + size of arguments, in bytes
3573         stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
3574                                        map machRepByteWidth argReps
3575 #elif linux_TARGET_OS
3576         initialStackOffset = 8
3577         stackDelta finalStack = roundTo 16 finalStack
3578 #endif
3579         args = map fst argsAndHints
3580         argReps = map cmmExprRep args
3581
3582         roundTo a x | x `mod` a == 0 = x
3583                     | otherwise = x + a - (x `mod` a)
3584
3585         move_sp_down finalStack
3586                | delta > 64 =
3587                         toOL [STU I32 sp (AddrRegImm sp (ImmInt (-delta))),
3588                               DELTA (-delta)]
3589                | otherwise = nilOL
3590                where delta = stackDelta finalStack
3591         move_sp_up finalStack
3592                | delta > 64 =
3593                         toOL [ADD sp sp (RIImm (ImmInt delta)),
3594                               DELTA 0]
3595                | otherwise = nilOL
3596                where delta = stackDelta finalStack
3597
3598
3599         passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
3600         passArguments ((arg,I64):args) gprs fprs stackOffset
3601                accumCode accumUsed =
3602             do
3603                 ChildCode64 code vr_lo <- iselExpr64 arg
3604                 let vr_hi = getHiVRegFromLo vr_lo
3605
3606 #if darwin_TARGET_OS
3607                 passArguments args
3608                               (drop 2 gprs)
3609                               fprs
3610                               (stackOffset+8)
3611                               (accumCode `appOL` code
3612                                     `snocOL` storeWord vr_hi gprs stackOffset
3613                                     `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
3614                               ((take 2 gprs) ++ accumUsed)
3615             where
3616                 storeWord vr (gpr:_) offset = MR gpr vr
3617                 storeWord vr [] offset = ST I32 vr (AddrRegImm sp (ImmInt offset))
3618
3619 #elif linux_TARGET_OS
3620                 let stackOffset' = roundTo 8 stackOffset
3621                     stackCode = accumCode `appOL` code
3622                         `snocOL` ST I32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
3623                         `snocOL` ST I32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
3624                     regCode hireg loreg =
3625                         accumCode `appOL` code
3626                             `snocOL` MR hireg vr_hi
3627                             `snocOL` MR loreg vr_lo
3628
3629                 case gprs of
3630                     hireg : loreg : regs | even (length gprs) ->
3631                         passArguments args regs fprs stackOffset
3632                                       (regCode hireg loreg) (hireg : loreg : accumUsed)
3633                     _skipped : hireg : loreg : regs ->
3634                         passArguments args regs fprs stackOffset
3635                                       (regCode hireg loreg) (hireg : loreg : accumUsed)
3636                     _ -> -- only one or no regs left
3637                         passArguments args [] fprs (stackOffset'+8)
3638                                       stackCode accumUsed
3639 #endif
3640
3641         passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
3642             | reg : _ <- regs = do
3643                 register <- getRegister arg
3644                 let code = case register of
3645                             Fixed _ freg fcode -> fcode `snocOL` MR reg freg
3646                             Any _ acode -> acode reg
3647                 passArguments args
3648                               (drop nGprs gprs)
3649                               (drop nFprs fprs)
3650 #if darwin_TARGET_OS
3651         -- The Darwin ABI requires that we reserve stack slots for register parameters
3652                               (stackOffset + stackBytes)
3653 #elif linux_TARGET_OS
3654         -- ... the SysV ABI doesn't.
3655                               stackOffset
3656 #endif
3657                               (accumCode `appOL` code)
3658                               (reg : accumUsed)
3659             | otherwise = do
3660                 (vr, code) <- getSomeReg arg
3661                 passArguments args
3662                               (drop nGprs gprs)
3663                               (drop nFprs fprs)
3664                               (stackOffset' + stackBytes)
3665                               (accumCode `appOL` code `snocOL` ST rep vr stackSlot)
3666                               accumUsed
3667             where
3668 #if darwin_TARGET_OS
3669         -- stackOffset is at least 4-byte aligned
3670         -- The Darwin ABI is happy with that.
3671                 stackOffset' = stackOffset
3672 #else
3673         -- ... the SysV ABI requires 8-byte alignment for doubles.
3674                 stackOffset' | rep == F64 = roundTo 8 stackOffset
3675                              | otherwise  =           stackOffset
3676 #endif
3677                 stackSlot = AddrRegImm sp (ImmInt stackOffset')
3678                 (nGprs, nFprs, stackBytes, regs) = case rep of
3679                     I32 -> (1, 0, 4, gprs)
3680 #if darwin_TARGET_OS
3681         -- The Darwin ABI requires that we skip a corresponding number of GPRs when
3682         -- we use the FPRs.
3683                     F32 -> (1, 1, 4, fprs)
3684                     F64 -> (2, 1, 8, fprs)
3685 #elif linux_TARGET_OS
3686         -- ... the SysV ABI doesn't.
3687                     F32 -> (0, 1, 4, fprs)
3688                     F64 -> (0, 1, 8, fprs)
3689 #endif
3690
3691         moveResult reduceToF32 =
3692             case dest_regs of
3693                 [] -> nilOL
3694                 [(dest, _hint)]
3695                     | reduceToF32 && rep == F32 -> unitOL (FRSP r_dest f1)
3696                     | rep == F32 || rep == F64 -> unitOL (MR r_dest f1)
3697                     | rep == I64 -> toOL [MR (getHiVRegFromLo r_dest) r3,
3698                                           MR r_dest r4]
3699                     | otherwise -> unitOL (MR r_dest r3)
3700                     where rep = cmmRegRep dest
3701                           r_dest = getRegisterReg dest
3702
3703         outOfLineFloatOp mop =
3704             do
3705                 mopExpr <- cmmMakeDynamicReference addImportNat True $
3706                               mkForeignLabel functionName Nothing True
3707                 let mopLabelOrExpr = case mopExpr of
3708                         CmmLit (CmmLabel lbl) -> Left lbl
3709                         _ -> Right mopExpr
3710                 return (mopLabelOrExpr, reduce)
3711             where
3712                 (functionName, reduce) = case mop of
3713                     MO_F32_Exp   -> (FSLIT("exp"), True)
3714                     MO_F32_Log   -> (FSLIT("log"), True)
3715                     MO_F32_Sqrt  -> (FSLIT("sqrt"), True)
3716
3717                     MO_F32_Sin   -> (FSLIT("sin"), True)
3718                     MO_F32_Cos   -> (FSLIT("cos"), True)
3719                     MO_F32_Tan   -> (FSLIT("tan"), True)
3720
3721                     MO_F32_Asin  -> (FSLIT("asin"), True)
3722                     MO_F32_Acos  -> (FSLIT("acos"), True)
3723                     MO_F32_Atan  -> (FSLIT("atan"), True)
3724
3725                     MO_F32_Sinh  -> (FSLIT("sinh"), True)
3726                     MO_F32_Cosh  -> (FSLIT("cosh"), True)
3727                     MO_F32_Tanh  -> (FSLIT("tanh"), True)
3728                     MO_F32_Pwr   -> (FSLIT("pow"), True)
3729
3730                     MO_F64_Exp   -> (FSLIT("exp"), False)
3731                     MO_F64_Log   -> (FSLIT("log"), False)
3732                     MO_F64_Sqrt  -> (FSLIT("sqrt"), False)
3733
3734                     MO_F64_Sin   -> (FSLIT("sin"), False)
3735                     MO_F64_Cos   -> (FSLIT("cos"), False)
3736                     MO_F64_Tan   -> (FSLIT("tan"), False)
3737
3738                     MO_F64_Asin  -> (FSLIT("asin"), False)
3739                     MO_F64_Acos  -> (FSLIT("acos"), False)
3740                     MO_F64_Atan  -> (FSLIT("atan"), False)
3741
3742                     MO_F64_Sinh  -> (FSLIT("sinh"), False)
3743                     MO_F64_Cosh  -> (FSLIT("cosh"), False)
3744                     MO_F64_Tanh  -> (FSLIT("tanh"), False)
3745                     MO_F64_Pwr   -> (FSLIT("pow"), False)
3746                     other -> pprPanic "genCCall(ppc): unknown callish op"
3747                                     (pprCallishMachOp other)
3748
3749 #endif /* darwin_TARGET_OS || linux_TARGET_OS */
3750
3751 #endif /* powerpc_TARGET_ARCH */
3752
3753
3754 -- -----------------------------------------------------------------------------
3755 -- Generating a table-branch
3756
3757 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
3758
3759 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3760 genSwitch expr ids = do
3761   (reg,e_code) <- getSomeReg expr
3762   lbl <- getNewLabelNat
3763   let
3764         jumpTable = map jumpTableEntry ids
3765         op = OpAddr (AddrBaseIndex Nothing (Just (reg,wORD_SIZE)) (ImmCLbl lbl))
3766         code = e_code `appOL` toOL [
3767                 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3768                 JMP_TBL op [ id | Just id <- ids ]
3769              ]
3770   -- in
3771   return code
3772 #elif powerpc_TARGET_ARCH
3773 genSwitch expr ids
3774   | opt_PIC
3775   = do
3776         (reg,e_code) <- getSomeReg expr
3777         tmp <- getNewRegNat I32
3778         lbl <- getNewLabelNat
3779         dynRef <- cmmMakeDynamicReference addImportNat False lbl
3780         (tableReg,t_code) <- getSomeReg $ dynRef
3781         let
3782             jumpTable = map jumpTableEntryRel ids
3783
3784             jumpTableEntryRel Nothing
3785                 = CmmStaticLit (CmmInt 0 wordRep)
3786             jumpTableEntryRel (Just (BlockId id))
3787                 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3788                 where blockLabel = mkAsmTempLabel id
3789
3790             code = e_code `appOL` t_code `appOL` toOL [
3791                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3792                             SLW tmp reg (RIImm (ImmInt 2)),
3793                             LD I32 tmp (AddrRegReg tableReg tmp),
3794                             ADD tmp tmp (RIReg tableReg),
3795                             MTCTR tmp,
3796                             BCTR [ id | Just id <- ids ]
3797                     ]
3798         return code
3799   | otherwise
3800   = do
3801         (reg,e_code) <- getSomeReg expr
3802         tmp <- getNewRegNat I32
3803         lbl <- getNewLabelNat
3804         let
3805             jumpTable = map jumpTableEntry ids
3806
3807             code = e_code `appOL` toOL [
3808                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3809                             SLW tmp reg (RIImm (ImmInt 2)),
3810                             ADDIS tmp tmp (HA (ImmCLbl lbl)),
3811                             LD I32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
3812                             MTCTR tmp,
3813                             BCTR [ id | Just id <- ids ]
3814                     ]
3815         return code
3816 #else
3817 genSwitch expr ids = panic "ToDo: genSwitch"
3818 #endif
3819
3820 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordRep)
3821 jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
3822     where blockLabel = mkAsmTempLabel id
3823
3824 -- -----------------------------------------------------------------------------
3825 -- Support bits
3826 -- -----------------------------------------------------------------------------
3827
3828
3829 -- -----------------------------------------------------------------------------
3830 -- 'condIntReg' and 'condFltReg': condition codes into registers
3831
3832 -- Turn those condition codes into integers now (when they appear on
3833 -- the right hand side of an assignment).
3834 --
3835 -- (If applicable) Do not fill the delay slots here; you will confuse the
3836 -- register allocator.
3837
3838 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
3839
3840 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3841
3842 #if alpha_TARGET_ARCH
3843 condIntReg = panic "MachCode.condIntReg (not on Alpha)"
3844 condFltReg = panic "MachCode.condFltReg (not on Alpha)"
3845 #endif /* alpha_TARGET_ARCH */
3846
3847 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3848
3849 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3850
3851 condIntReg cond x y = do
3852   CondCode _ cond cond_code <- condIntCode cond x y
3853   tmp <- getNewRegNat I8
3854   let
3855         code dst = cond_code `appOL` toOL [
3856                     SETCC cond (OpReg tmp),
3857                     MOV I32 (OpReg tmp) (OpReg dst),
3858                     AND I32 (OpImm (ImmInt 1)) (OpReg dst)
3859                   ]
3860                    -- NB. (1) Tha AND is needed here because the x86 only
3861                    -- sets the low byte in the SETCC instruction.
3862                    -- NB. (2) The extra temporary register is a hack to
3863                    -- work around the fact that the setcc instructions only
3864                    -- accept byte registers.  dst might not be a byte-able reg,
3865                    -- but currently all free registers are byte-able, so we're
3866                    -- guaranteed that a new temporary is byte-able.
3867   -- in
3868   return (Any I32 code)
3869
3870
3871 condFltReg cond x y = do
3872   lbl1 <- getBlockIdNat
3873   lbl2 <- getBlockIdNat
3874   CondCode _ cond cond_code <- condFltCode cond x y
3875   let
3876         code dst = cond_code `appOL` toOL [
3877             JXX cond lbl1,
3878             MOV I32 (OpImm (ImmInt 0)) (OpReg dst),
3879             JXX ALWAYS lbl2,
3880             NEWBLOCK lbl1,
3881             MOV I32 (OpImm (ImmInt 1)) (OpReg dst),
3882             JXX ALWAYS lbl2,
3883             NEWBLOCK lbl2]
3884                 -- SIGH, have to split up this block somehow...
3885   -- in
3886   return (Any I32 code)
3887
3888 #endif /* i386_TARGET_ARCH */
3889
3890 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3891
3892 #if sparc_TARGET_ARCH
3893
3894 condIntReg EQQ x (StInt 0)
3895   = getRegister x               `thenNat` \ register ->
3896     getNewRegNat IntRep         `thenNat` \ tmp ->
3897     let
3898         code = registerCode register tmp
3899         src  = registerName register tmp
3900         code__2 dst = code `appOL` toOL [
3901             SUB False True g0 (RIReg src) g0,
3902             SUB True False g0 (RIImm (ImmInt (-1))) dst]
3903     in
3904     return (Any IntRep code__2)
3905
3906 condIntReg EQQ x y
3907   = getRegister x               `thenNat` \ register1 ->
3908     getRegister y               `thenNat` \ register2 ->
3909     getNewRegNat IntRep         `thenNat` \ tmp1 ->
3910     getNewRegNat IntRep         `thenNat` \ tmp2 ->
3911     let
3912         code1 = registerCode register1 tmp1
3913         src1  = registerName register1 tmp1
3914         code2 = registerCode register2 tmp2
3915         src2  = registerName register2 tmp2
3916         code__2 dst = code1 `appOL` code2 `appOL` toOL [
3917             XOR False src1 (RIReg src2) dst,
3918             SUB False True g0 (RIReg dst) g0,
3919             SUB True False g0 (RIImm (ImmInt (-1))) dst]
3920     in
3921     return (Any IntRep code__2)
3922
3923 condIntReg NE x (StInt 0)
3924   = getRegister x               `thenNat` \ register ->
3925     getNewRegNat IntRep         `thenNat` \ tmp ->
3926     let
3927         code = registerCode register tmp
3928         src  = registerName register tmp
3929         code__2 dst = code `appOL` toOL [
3930             SUB False True g0 (RIReg src) g0,
3931             ADD True False g0 (RIImm (ImmInt 0)) dst]
3932     in
3933     return (Any IntRep code__2)
3934
3935 condIntReg NE x y
3936   = getRegister x               `thenNat` \ register1 ->
3937     getRegister y               `thenNat` \ register2 ->
3938     getNewRegNat IntRep         `thenNat` \ tmp1 ->
3939     getNewRegNat IntRep         `thenNat` \ tmp2 ->
3940     let
3941         code1 = registerCode register1 tmp1
3942         src1  = registerName register1 tmp1
3943         code2 = registerCode register2 tmp2
3944         src2  = registerName register2 tmp2
3945         code__2 dst = code1 `appOL` code2 `appOL` toOL [
3946             XOR False src1 (RIReg src2) dst,
3947             SUB False True g0 (RIReg dst) g0,
3948             ADD True False g0 (RIImm (ImmInt 0)) dst]
3949     in
3950     return (Any IntRep code__2)
3951
3952 condIntReg cond x y
3953   = getBlockIdNat               `thenNat` \ lbl1 ->
3954     getBlockIdNat               `thenNat` \ lbl2 ->
3955     condIntCode cond x y        `thenNat` \ condition ->
3956     let
3957         code = condCode condition
3958         cond = condName condition
3959         code__2 dst = code `appOL` toOL [
3960             BI cond False (ImmCLbl lbl1), NOP,
3961             OR False g0 (RIImm (ImmInt 0)) dst,
3962             BI ALWAYS False (ImmCLbl lbl2), NOP,
3963             NEWBLOCK lbl1,
3964             OR False g0 (RIImm (ImmInt 1)) dst,
3965             NEWBLOCK lbl2]
3966     in
3967     return (Any IntRep code__2)
3968
3969 condFltReg cond x y
3970   = getBlockIdNat               `thenNat` \ lbl1 ->
3971     getBlockIdNat               `thenNat` \ lbl2 ->
3972     condFltCode cond x y        `thenNat` \ condition ->
3973     let
3974         code = condCode condition
3975         cond = condName condition
3976         code__2 dst = code `appOL` toOL [
3977             NOP,
3978             BF cond False (ImmCLbl lbl1), NOP,
3979             OR False g0 (RIImm (ImmInt 0)) dst,
3980             BI ALWAYS False (ImmCLbl lbl2), NOP,
3981             NEWBLOCK lbl1,
3982             OR False g0 (RIImm (ImmInt 1)) dst,
3983             NEWBLOCK lbl2]
3984     in
3985     return (Any IntRep code__2)
3986
3987 #endif /* sparc_TARGET_ARCH */
3988
3989 #if powerpc_TARGET_ARCH
3990 condReg getCond = do
3991     lbl1 <- getBlockIdNat
3992     lbl2 <- getBlockIdNat
3993     CondCode _ cond cond_code <- getCond
3994     let
3995 {-        code dst = cond_code `appOL` toOL [
3996                 BCC cond lbl1,
3997                 LI dst (ImmInt 0),
3998                 BCC ALWAYS lbl2,
3999                 NEWBLOCK lbl1,
4000                 LI dst (ImmInt 1),
4001                 BCC ALWAYS lbl2,
4002                 NEWBLOCK lbl2
4003             ]-}
4004         code dst = cond_code
4005             `appOL` negate_code
4006             `appOL` toOL [
4007                 MFCR dst,
4008                 RLWINM dst dst (bit + 1) 31 31
4009             ]
4010
4011         negate_code | do_negate = unitOL (CRNOR bit bit bit)
4012                     | otherwise = nilOL
4013
4014         (bit, do_negate) = case cond of
4015             LTT -> (0, False)
4016             LE  -> (1, True)
4017             EQQ -> (2, False)
4018             GE  -> (0, True)
4019             GTT -> (1, False)
4020
4021             NE  -> (2, True)
4022
4023             LU  -> (0, False)
4024             LEU -> (1, True)
4025             GEU -> (0, True)
4026             GU  -> (1, False)
4027
4028     return (Any I32 code)
4029
4030 condIntReg cond x y = condReg (condIntCode cond x y)
4031 condFltReg cond x y = condReg (condFltCode cond x y)
4032 #endif /* powerpc_TARGET_ARCH */
4033
4034
4035 -- -----------------------------------------------------------------------------
4036 -- 'trivial*Code': deal with trivial instructions
4037
4038 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
4039 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
4040 -- Only look for constants on the right hand side, because that's
4041 -- where the generic optimizer will have put them.
4042
4043 -- Similarly, for unary instructions, we don't have to worry about
4044 -- matching an StInt as the argument, because genericOpt will already
4045 -- have handled the constant-folding.
4046
4047 trivialCode
4048     :: MachRep
4049     -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
4050       ,IF_ARCH_i386 ((Operand -> Operand -> Instr)
4051                      -> Maybe (Operand -> Operand -> Instr)
4052       ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr)
4053                      -> Maybe (Operand -> Operand -> Instr)
4054       ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
4055       ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
4056       ,)))))
4057     -> CmmExpr -> CmmExpr -- the two arguments
4058     -> NatM Register
4059
4060 #ifndef powerpc_TARGET_ARCH
4061 trivialFCode
4062     :: MachRep
4063     -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
4064       ,IF_ARCH_sparc((Size -> Reg -> Reg -> Reg -> Instr)
4065       ,IF_ARCH_i386 ((MachRep -> Reg -> Reg -> Reg -> Instr)
4066       ,IF_ARCH_x86_64 ((MachRep -> Operand -> Operand -> Instr)
4067       ,))))
4068     -> CmmExpr -> CmmExpr -- the two arguments
4069     -> NatM Register
4070 #endif
4071
4072 trivialUCode
4073     :: MachRep
4074     -> IF_ARCH_alpha((RI -> Reg -> Instr)
4075       ,IF_ARCH_i386 ((Operand -> Instr)
4076       ,IF_ARCH_x86_64 ((Operand -> Instr)
4077       ,IF_ARCH_sparc((RI -> Reg -> Instr)
4078       ,IF_ARCH_powerpc((Reg -> Reg -> Instr)
4079       ,)))))
4080     -> CmmExpr  -- the one argument
4081     -> NatM Register
4082
4083 #ifndef powerpc_TARGET_ARCH
4084 trivialUFCode
4085     :: MachRep
4086     -> IF_ARCH_alpha((Reg -> Reg -> Instr)
4087       ,IF_ARCH_i386 ((Reg -> Reg -> Instr)
4088       ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr)
4089       ,IF_ARCH_sparc((Reg -> Reg -> Instr)
4090       ,))))
4091     -> CmmExpr -- the one argument
4092     -> NatM Register
4093 #endif
4094
4095 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4096
4097 #if alpha_TARGET_ARCH
4098
4099 trivialCode instr x (StInt y)
4100   | fits8Bits y
4101   = getRegister x               `thenNat` \ register ->
4102     getNewRegNat IntRep         `thenNat` \ tmp ->
4103     let
4104         code = registerCode register tmp
4105         src1 = registerName register tmp
4106         src2 = ImmInt (fromInteger y)
4107         code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst)
4108     in
4109     return (Any IntRep code__2)
4110
4111 trivialCode instr x y
4112   = getRegister x               `thenNat` \ register1 ->
4113     getRegister y               `thenNat` \ register2 ->
4114     getNewRegNat IntRep         `thenNat` \ tmp1 ->
4115     getNewRegNat IntRep         `thenNat` \ tmp2 ->
4116     let
4117         code1 = registerCode register1 tmp1 []
4118         src1  = registerName register1 tmp1
4119         code2 = registerCode register2 tmp2 []
4120         src2  = registerName register2 tmp2
4121         code__2 dst = asmSeqThen [code1, code2] .
4122                      mkSeqInstr (instr src1 (RIReg src2) dst)
4123     in
4124     return (Any IntRep code__2)
4125
4126 ------------
4127 trivialUCode instr x
4128   = getRegister x               `thenNat` \ register ->
4129     getNewRegNat IntRep         `thenNat` \ tmp ->
4130     let
4131         code = registerCode register tmp
4132         src  = registerName register tmp
4133         code__2 dst = code . mkSeqInstr (instr (RIReg src) dst)
4134     in
4135     return (Any IntRep code__2)
4136
4137 ------------
4138 trivialFCode _ instr x y
4139   = getRegister x               `thenNat` \ register1 ->
4140     getRegister y               `thenNat` \ register2 ->
4141     getNewRegNat F64    `thenNat` \ tmp1 ->
4142     getNewRegNat F64    `thenNat` \ tmp2 ->
4143     let
4144         code1 = registerCode register1 tmp1
4145         src1  = registerName register1 tmp1
4146
4147         code2 = registerCode register2 tmp2
4148         src2  = registerName register2 tmp2
4149
4150         code__2 dst = asmSeqThen [code1 [], code2 []] .
4151                       mkSeqInstr (instr src1 src2 dst)
4152     in
4153     return (Any F64 code__2)
4154
4155 trivialUFCode _ instr x
4156   = getRegister x               `thenNat` \ register ->
4157     getNewRegNat F64    `thenNat` \ tmp ->
4158     let
4159         code = registerCode register tmp
4160         src  = registerName register tmp
4161         code__2 dst = code . mkSeqInstr (instr src dst)
4162     in
4163     return (Any F64 code__2)
4164
4165 #endif /* alpha_TARGET_ARCH */
4166
4167 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4168
4169 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4170
4171 {-
4172 The Rules of the Game are:
4173
4174 * You cannot assume anything about the destination register dst;
4175   it may be anything, including a fixed reg.
4176
4177 * You may compute an operand into a fixed reg, but you may not
4178   subsequently change the contents of that fixed reg.  If you
4179   want to do so, first copy the value either to a temporary
4180   or into dst.  You are free to modify dst even if it happens
4181   to be a fixed reg -- that's not your problem.
4182
4183 * You cannot assume that a fixed reg will stay live over an
4184   arbitrary computation.  The same applies to the dst reg.
4185
4186 * Temporary regs obtained from getNewRegNat are distinct from
4187   each other and from all other regs, and stay live over
4188   arbitrary computations.
4189
4190 --------------------
4191
4192 SDM's version of The Rules:
4193
4194 * If getRegister returns Any, that means it can generate correct
4195   code which places the result in any register, period.  Even if that
4196   register happens to be read during the computation.
4197
4198   Corollary #1: this means that if you are generating code for an
4199   operation with two arbitrary operands, you cannot assign the result
4200   of the first operand into the destination register before computing
4201   the second operand.  The second operand might require the old value
4202   of the destination register.
4203
4204   Corollary #2: A function might be able to generate more efficient
4205   code if it knows the destination register is a new temporary (and
4206   therefore not read by any of the sub-computations).
4207
4208 * If getRegister returns Any, then the code it generates may modify only:
4209         (a) fresh temporaries
4210         (b) the destination register
4211         (c) known registers (eg. %ecx is used by shifts)
4212   In particular, it may *not* modify global registers, unless the global
4213   register happens to be the destination register.
4214 -}
4215
4216 trivialCode rep instr (Just revinstr) (CmmLit lit_a) b
4217   | not (is64BitLit lit_a) = do
4218   b_code <- getAnyReg b
4219   let
4220        code dst
4221          = b_code dst `snocOL`
4222            revinstr (OpImm (litToImm lit_a)) (OpReg dst)
4223   -- in
4224   return (Any rep code)
4225
4226 trivialCode rep instr maybe_revinstr a b = do
4227   (b_op, b_code) <- getNonClobberedOperand b
4228   a_code <- getAnyReg a
4229   tmp <- getNewRegNat rep
4230   let
4231      -- We want the value of b to stay alive across the computation of a.
4232      -- But, we want to calculate a straight into the destination register,
4233      -- because the instruction only has two operands (dst := dst `op` src).
4234      -- The troublesome case is when the result of b is in the same register
4235      -- as the destination reg.  In this case, we have to save b in a
4236      -- new temporary across the computation of a.
4237      code dst
4238         | dst `clashesWith` b_op =
4239                 b_code `appOL`
4240                 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
4241                 a_code dst `snocOL`
4242                 instr (OpReg tmp) (OpReg dst)
4243         | otherwise =
4244                 b_code `appOL`
4245                 a_code dst `snocOL`
4246                 instr b_op (OpReg dst)
4247   -- in
4248   return (Any rep code)
4249  where
4250   reg `clashesWith` OpReg reg2   = reg == reg2
4251   reg `clashesWith` OpAddr amode = any (==reg) (addrModeRegs amode)
4252   reg `clashesWith` _            = False
4253
4254 -----------
4255
4256 trivialUCode rep instr x = do
4257   x_code <- getAnyReg x
4258   let
4259      code dst =
4260         x_code dst `snocOL`
4261         instr (OpReg dst)
4262   -- in
4263   return (Any rep code)
4264
4265 -----------
4266
4267 #if i386_TARGET_ARCH
4268
4269 trivialFCode pk instr x y = do
4270   (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
4271   (y_reg, y_code) <- getSomeReg y
4272   let
4273      code dst =
4274         x_code `appOL`
4275         y_code `snocOL`
4276         instr pk x_reg y_reg dst
4277   -- in
4278   return (Any pk code)
4279
4280 #endif
4281
4282 #if x86_64_TARGET_ARCH
4283
4284 -- We use the 2-operand SSE2 floating pt instructions.  ToDo: improve on
4285 -- this by using some of the special cases in trivialCode above.
4286 trivialFCode pk instr x y = do
4287   (y_reg, y_code) <- getNonClobberedReg y -- these work for float regs too
4288   x_code <- getAnyReg x
4289   let
4290      code dst =
4291         y_code `appOL`
4292         x_code dst `snocOL`
4293         instr pk (IF_ARCH_x86_64(OpReg,) y_reg)
4294                  (IF_ARCH_x86_64(OpReg,) dst)
4295   -- in
4296   return (Any pk code)
4297
4298 #endif
4299
4300 -------------
4301
4302 trivialUFCode rep instr x = do
4303   (x_reg, x_code) <- getSomeReg x
4304   let
4305      code dst =
4306         x_code `snocOL`
4307         instr x_reg dst
4308   -- in
4309   return (Any rep code)
4310
4311 #endif /* i386_TARGET_ARCH */
4312
4313 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4314
4315 #if sparc_TARGET_ARCH
4316
4317 trivialCode instr x (StInt y)
4318   | fits13Bits y
4319   = getRegister x               `thenNat` \ register ->
4320     getNewRegNat IntRep         `thenNat` \ tmp ->
4321     let
4322         code = registerCode register tmp
4323         src1 = registerName register tmp
4324         src2 = ImmInt (fromInteger y)
4325         code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
4326     in
4327     return (Any IntRep code__2)
4328
4329 trivialCode instr x y
4330   = getRegister x               `thenNat` \ register1 ->
4331     getRegister y               `thenNat` \ register2 ->
4332     getNewRegNat IntRep         `thenNat` \ tmp1 ->
4333     getNewRegNat IntRep         `thenNat` \ tmp2 ->
4334     let
4335         code1 = registerCode register1 tmp1
4336         src1  = registerName register1 tmp1
4337         code2 = registerCode register2 tmp2
4338         src2  = registerName register2 tmp2
4339         code__2 dst = code1 `appOL` code2 `snocOL`
4340                       instr src1 (RIReg src2) dst
4341     in
4342     return (Any IntRep code__2)
4343
4344 ------------
4345 trivialFCode pk instr x y
4346   = getRegister x               `thenNat` \ register1 ->
4347     getRegister y               `thenNat` \ register2 ->
4348     getNewRegNat (registerRep register1)
4349                                 `thenNat` \ tmp1 ->
4350     getNewRegNat (registerRep register2)
4351                                 `thenNat` \ tmp2 ->
4352     getNewRegNat F64    `thenNat` \ tmp ->
4353     let
4354         promote x = FxTOy F DF x tmp
4355
4356         pk1   = registerRep register1
4357         code1 = registerCode register1 tmp1
4358         src1  = registerName register1 tmp1
4359
4360         pk2   = registerRep register2
4361         code2 = registerCode register2 tmp2
4362         src2  = registerName register2 tmp2
4363
4364         code__2 dst =
4365                 if pk1 == pk2 then
4366                     code1 `appOL` code2 `snocOL`
4367                     instr (primRepToSize pk) src1 src2 dst
4368                 else if pk1 == F32 then
4369                     code1 `snocOL` promote src1 `appOL` code2 `snocOL`
4370                     instr DF tmp src2 dst
4371                 else
4372                     code1 `appOL` code2 `snocOL` promote src2 `snocOL`
4373                     instr DF src1 tmp dst
4374     in
4375     return (Any (if pk1 == pk2 then pk1 else F64) code__2)
4376
4377 ------------
4378 trivialUCode instr x
4379   = getRegister x               `thenNat` \ register ->
4380     getNewRegNat IntRep         `thenNat` \ tmp ->
4381     let
4382         code = registerCode register tmp
4383         src  = registerName register tmp
4384         code__2 dst = code `snocOL` instr (RIReg src) dst
4385     in
4386     return (Any IntRep code__2)
4387
4388 -------------
4389 trivialUFCode pk instr x
4390   = getRegister x               `thenNat` \ register ->
4391     getNewRegNat pk             `thenNat` \ tmp ->
4392     let
4393         code = registerCode register tmp
4394         src  = registerName register tmp
4395         code__2 dst = code `snocOL` instr src dst
4396     in
4397     return (Any pk code__2)
4398
4399 #endif /* sparc_TARGET_ARCH */
4400
4401 #if powerpc_TARGET_ARCH
4402
4403 {-
4404 Wolfgang's PowerPC version of The Rules:
4405
4406 A slightly modified version of The Rules to take advantage of the fact
4407 that PowerPC instructions work on all registers and don't implicitly
4408 clobber any fixed registers.
4409
4410 * The only expression for which getRegister returns Fixed is (CmmReg reg).
4411
4412 * If getRegister returns Any, then the code it generates may modify only:
4413         (a) fresh temporaries
4414         (b) the destination register
4415   It may *not* modify global registers, unless the global
4416   register happens to be the destination register.
4417   It may not clobber any other registers. In fact, only ccalls clobber any
4418   fixed registers.
4419   Also, it may not modify the counter register (used by genCCall).
4420
4421   Corollary: If a getRegister for a subexpression returns Fixed, you need
4422   not move it to a fresh temporary before evaluating the next subexpression.
4423   The Fixed register won't be modified.
4424   Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
4425
4426 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
4427   the value of the destination register.
4428 -}
4429
4430 trivialCode rep signed instr x (CmmLit (CmmInt y _))
4431     | Just imm <- makeImmediate rep signed y
4432     = do
4433         (src1, code1) <- getSomeReg x
4434         let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
4435         return (Any rep code)
4436
4437 trivialCode rep signed instr x y = do
4438     (src1, code1) <- getSomeReg x
4439     (src2, code2) <- getSomeReg y
4440     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
4441     return (Any rep code)
4442
4443 trivialCodeNoImm :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4444     -> CmmExpr -> CmmExpr -> NatM Register
4445 trivialCodeNoImm rep instr x y = do
4446     (src1, code1) <- getSomeReg x
4447     (src2, code2) <- getSomeReg y
4448     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
4449     return (Any rep code)
4450
4451 trivialUCode rep instr x = do
4452     (src, code) <- getSomeReg x
4453     let code' dst = code `snocOL` instr dst src
4454     return (Any rep code')
4455
4456 -- There is no "remainder" instruction on the PPC, so we have to do
4457 -- it the hard way.
4458 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
4459
4460 remainderCode :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4461     -> CmmExpr -> CmmExpr -> NatM Register
4462 remainderCode rep div x y = do
4463     (src1, code1) <- getSomeReg x
4464     (src2, code2) <- getSomeReg y
4465     let code dst = code1 `appOL` code2 `appOL` toOL [
4466                 div dst src1 src2,
4467                 MULLW dst dst (RIReg src2),
4468                 SUBF dst dst src1
4469             ]
4470     return (Any rep code)
4471
4472 #endif /* powerpc_TARGET_ARCH */
4473
4474
4475 -- -----------------------------------------------------------------------------
4476 --  Coercing to/from integer/floating-point...
4477
4478 -- @coerce(Int2FP|FP2Int)@ are more complicated integer/float
4479 -- conversions.  We have to store temporaries in memory to move
4480 -- between the integer and the floating point register sets.
4481
4482 -- @coerceDbl2Flt@ and @coerceFlt2Dbl@ are done this way because we
4483 -- pretend, on sparc at least, that double and float regs are seperate
4484 -- kinds, so the value has to be computed into one kind before being
4485 -- explicitly "converted" to live in the other kind.
4486
4487 coerceInt2FP :: MachRep -> MachRep -> CmmExpr -> NatM Register
4488 coerceFP2Int :: MachRep -> MachRep -> CmmExpr -> NatM Register
4489
4490 #if sparc_TARGET_ARCH
4491 coerceDbl2Flt :: CmmExpr -> NatM Register
4492 coerceFlt2Dbl :: CmmExpr -> NatM Register
4493 #endif
4494
4495 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4496
4497 #if alpha_TARGET_ARCH
4498
4499 coerceInt2FP _ x
4500   = getRegister x               `thenNat` \ register ->
4501     getNewRegNat IntRep         `thenNat` \ reg ->
4502     let
4503         code = registerCode register reg
4504         src  = registerName register reg
4505
4506         code__2 dst = code . mkSeqInstrs [
4507             ST Q src (spRel 0),
4508             LD TF dst (spRel 0),
4509             CVTxy Q TF dst dst]
4510     in
4511     return (Any F64 code__2)
4512
4513 -------------
4514 coerceFP2Int x
4515   = getRegister x               `thenNat` \ register ->
4516     getNewRegNat F64    `thenNat` \ tmp ->
4517     let
4518         code = registerCode register tmp
4519         src  = registerName register tmp
4520
4521         code__2 dst = code . mkSeqInstrs [
4522             CVTxy TF Q src tmp,
4523             ST TF tmp (spRel 0),
4524             LD Q dst (spRel 0)]
4525     in
4526     return (Any IntRep code__2)
4527
4528 #endif /* alpha_TARGET_ARCH */
4529
4530 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4531
4532 #if i386_TARGET_ARCH
4533
4534 coerceInt2FP from to x = do
4535   (x_reg, x_code) <- getSomeReg x
4536   let
4537         opc  = case to of F32 -> GITOF; F64 -> GITOD
4538         code dst = x_code `snocOL` opc x_reg dst
4539         -- ToDo: works for non-I32 reps?
4540   -- in
4541   return (Any to code)
4542
4543 ------------
4544
4545 coerceFP2Int from to x = do
4546   (x_reg, x_code) <- getSomeReg x
4547   let
4548         opc  = case from of F32 -> GFTOI; F64 -> GDTOI
4549         code dst = x_code `snocOL` opc x_reg dst
4550         -- ToDo: works for non-I32 reps?
4551   -- in
4552   return (Any to code)
4553
4554 #endif /* i386_TARGET_ARCH */
4555
4556 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4557
4558 #if x86_64_TARGET_ARCH
4559
4560 coerceFP2Int from to x = do
4561   (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
4562   let
4563         opc  = case from of F32 -> CVTSS2SI; F64 -> CVTSD2SI
4564         code dst = x_code `snocOL` opc x_op dst
4565   -- in
4566   return (Any to code) -- works even if the destination rep is <I32
4567
4568 coerceInt2FP from to x = do
4569   (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
4570   let
4571         opc  = case to of F32 -> CVTSI2SS; F64 -> CVTSI2SD
4572         code dst = x_code `snocOL` opc x_op dst
4573   -- in
4574   return (Any to code) -- works even if the destination rep is <I32
4575
4576 coerceFP2FP :: MachRep -> CmmExpr -> NatM Register
4577 coerceFP2FP to x = do
4578   (x_reg, x_code) <- getSomeReg x
4579   let
4580         opc  = case to of F32 -> CVTSD2SS; F64 -> CVTSS2SD
4581         code dst = x_code `snocOL` opc x_reg dst
4582   -- in
4583   return (Any to code)
4584
4585 #endif
4586
4587 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4588
4589 #if sparc_TARGET_ARCH
4590
4591 coerceInt2FP pk x
4592   = getRegister x               `thenNat` \ register ->
4593     getNewRegNat IntRep         `thenNat` \ reg ->
4594     let
4595         code = registerCode register reg
4596         src  = registerName register reg
4597
4598         code__2 dst = code `appOL` toOL [
4599             ST W src (spRel (-2)),
4600             LD W (spRel (-2)) dst,
4601             FxTOy W (primRepToSize pk) dst dst]
4602     in
4603     return (Any pk code__2)
4604
4605 ------------
4606 coerceFP2Int fprep x
4607   = ASSERT(fprep == F64 || fprep == F32)
4608     getRegister x               `thenNat` \ register ->
4609     getNewRegNat fprep          `thenNat` \ reg ->
4610     getNewRegNat F32    `thenNat` \ tmp ->
4611     let
4612         code = registerCode register reg
4613         src  = registerName register reg
4614         code__2 dst = code `appOL` toOL [
4615             FxTOy (primRepToSize fprep) W src tmp,
4616             ST W tmp (spRel (-2)),
4617             LD W (spRel (-2)) dst]
4618     in
4619     return (Any IntRep code__2)
4620
4621 ------------
4622 coerceDbl2Flt x
4623   = getRegister x               `thenNat` \ register ->
4624     getNewRegNat F64    `thenNat` \ tmp ->
4625     let code = registerCode register tmp
4626         src  = registerName register tmp
4627     in
4628         return (Any F32
4629                        (\dst -> code `snocOL` FxTOy DF F src dst))
4630
4631 ------------
4632 coerceFlt2Dbl x
4633   = getRegister x               `thenNat` \ register ->
4634     getNewRegNat F32    `thenNat` \ tmp ->
4635     let code = registerCode register tmp
4636         src  = registerName register tmp
4637     in
4638         return (Any F64
4639                        (\dst -> code `snocOL` FxTOy F DF src dst))
4640
4641 #endif /* sparc_TARGET_ARCH */
4642
4643 #if powerpc_TARGET_ARCH
4644 coerceInt2FP fromRep toRep x = do
4645     (src, code) <- getSomeReg x
4646     lbl <- getNewLabelNat
4647     itmp <- getNewRegNat I32
4648     ftmp <- getNewRegNat F64
4649     dynRef <- cmmMakeDynamicReference addImportNat False lbl
4650     Amode addr addr_code <- getAmode dynRef
4651     let
4652         code' dst = code `appOL` maybe_exts `appOL` toOL [
4653                 LDATA ReadOnlyData
4654                                 [CmmDataLabel lbl,
4655                                  CmmStaticLit (CmmInt 0x43300000 I32),
4656                                  CmmStaticLit (CmmInt 0x80000000 I32)],
4657                 XORIS itmp src (ImmInt 0x8000),
4658                 ST I32 itmp (spRel 3),
4659                 LIS itmp (ImmInt 0x4330),
4660                 ST I32 itmp (spRel 2),
4661                 LD F64 ftmp (spRel 2)
4662             ] `appOL` addr_code `appOL` toOL [
4663                 LD F64 dst addr,
4664                 FSUB F64 dst ftmp dst
4665             ] `appOL` maybe_frsp dst
4666
4667         maybe_exts = case fromRep of
4668                         I8 ->  unitOL $ EXTS I8 src src
4669                         I16 -> unitOL $ EXTS I16 src src
4670                         I32 -> nilOL
4671         maybe_frsp dst = case toRep of
4672                         F32 -> unitOL $ FRSP dst dst
4673                         F64 -> nilOL
4674     return (Any toRep code')
4675
4676 coerceFP2Int fromRep toRep x = do
4677     -- the reps don't really matter: F*->F64 and I32->I* are no-ops
4678     (src, code) <- getSomeReg x
4679     tmp <- getNewRegNat F64
4680     let
4681         code' dst = code `appOL` toOL [
4682                 -- convert to int in FP reg
4683             FCTIWZ tmp src,
4684                 -- store value (64bit) from FP to stack
4685             ST F64 tmp (spRel 2),
4686                 -- read low word of value (high word is undefined)
4687             LD I32 dst (spRel 3)]
4688     return (Any toRep code')
4689 #endif /* powerpc_TARGET_ARCH */
4690
4691
4692 -- -----------------------------------------------------------------------------
4693 -- eXTRA_STK_ARGS_HERE
4694
4695 -- We (allegedly) put the first six C-call arguments in registers;
4696 -- where do we start putting the rest of them?
4697
4698 -- Moved from MachInstrs (SDM):
4699
4700 #if alpha_TARGET_ARCH || sparc_TARGET_ARCH
4701 eXTRA_STK_ARGS_HERE :: Int
4702 eXTRA_STK_ARGS_HERE
4703   = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???))
4704 #endif
4705