ghc/compiler/nativeGen/MachCodeGen.hs

   1 -----------------------------------------------------------------------------
   2 --
   3 -- Generating machine code (instruction selection)
   4 --
   5 -- (c) The University of Glasgow 1996-2004
   6 --
   7 -----------------------------------------------------------------------------
   8
   9 -- This is a big module, but, if you pay attention to
  10 -- (a) the sectioning, (b) the type signatures, and
  11 -- (c) the #if blah_TARGET_ARCH} things, the
  12 -- structure should not be too overwhelming.
  13
  14 module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where
  15
  16 #include "HsVersions.h"
  17 #include "nativeGen/NCG.h"
  18 #include "MachDeps.h"
  19
  20 -- NCG stuff:
  21 import MachInstrs
  22 import MachRegs
  23 import NCGMonad
  24 import PositionIndependentCode ( cmmMakeDynamicReference, initializePicBase )
  25
  26 -- Our intermediate code:
  27 import PprCmm           ( pprExpr )
  28 import Cmm
  29 import MachOp
  30 import CLabel
  31
  32 -- The rest:
  33 import StaticFlags      ( opt_PIC )
  34 import ForeignCall      ( CCallConv(..) )
  35 import OrdList
  36 import Pretty
  37 import Outputable
  38 import qualified Outputable
  39 import FastString
  40 import FastTypes        ( isFastTrue )
  41 import Constants        ( wORD_SIZE )
  42
  43 #ifdef DEBUG
  44 import Outputable       ( assertPanic )
  45 import TRACE            ( trace )
  46 #endif
  47
  48 import Control.Monad    ( mapAndUnzipM )
  49 import Maybe            ( fromJust )
  50 import DATA_BITS
  51 import DATA_WORD
  52
  53 -- -----------------------------------------------------------------------------
  54 -- Top-level of the instruction selector
  55
  56 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
  57 -- They are really trees of insns to facilitate fast appending, where a
  58 -- left-to-right traversal (pre-order?) yields the insns in the correct
  59 -- order.
  60
  61 type InstrBlock = OrdList Instr
  62
  63 cmmTopCodeGen :: CmmTop -> NatM [NatCmmTop]
  64 cmmTopCodeGen (CmmProc info lab params blocks) = do
  65   (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
  66   picBaseMb <- getPicBaseMaybeNat
  67   let proc = CmmProc info lab params (concat nat_blocks)
  68       tops = proc : concat statics
  69   case picBaseMb of
  70       Just picBase -> initializePicBase picBase tops
  71       Nothing -> return tops
  72
  73 cmmTopCodeGen (CmmData sec dat) = do
  74   return [CmmData sec dat]  -- no translation, we just use CmmStatic
  75
  76 basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop])
  77 basicBlockCodeGen (BasicBlock id stmts) = do
  78   instrs <- stmtsToInstrs stmts
  79   -- code generation may introduce new basic block boundaries, which
  80   -- are indicated by the NEWBLOCK instruction.  We must split up the
  81   -- instruction stream into basic blocks again.  Also, we extract
  82   -- LDATAs here too.
  83   let
  84         (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
  85
  86         mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
  87           = ([], BasicBlock id instrs : blocks, statics)
  88         mkBlocks (LDATA sec dat) (instrs,blocks,statics)
  89           = (instrs, blocks, CmmData sec dat:statics)
  90         mkBlocks instr (instrs,blocks,statics)
  91           = (instr:instrs, blocks, statics)
  92   -- in
  93   return (BasicBlock id top : other_blocks, statics)
  94
  95 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
  96 stmtsToInstrs stmts
  97    = do instrss <- mapM stmtToInstrs stmts
  98         return (concatOL instrss)
  99
 100 stmtToInstrs :: CmmStmt -> NatM InstrBlock
 101 stmtToInstrs stmt = case stmt of
 102     CmmNop         -> return nilOL
 103     CmmComment s   -> return (unitOL (COMMENT s))
 104
 105     CmmAssign reg src
 106       | isFloatingRep kind -> assignReg_FltCode kind reg src
 107 #if WORD_SIZE_IN_BITS==32
 108       | kind == I64        -> assignReg_I64Code      reg src
 109 #endif
 110       | otherwise          -> assignReg_IntCode kind reg src
 111         where kind = cmmRegRep reg
 112
 113     CmmStore addr src
 114       | isFloatingRep kind -> assignMem_FltCode kind addr src
 115 #if WORD_SIZE_IN_BITS==32
 116       | kind == I64      -> assignMem_I64Code      addr src
 117 #endif
 118       | otherwise        -> assignMem_IntCode kind addr src
 119         where kind = cmmExprRep src
 120
 121     CmmCall target result_regs args vols
 122        -> genCCall target result_regs args vols
 123
 124     CmmBranch id          -> genBranch id
 125     CmmCondBranch arg id  -> genCondJump id arg
 126     CmmSwitch arg ids     -> genSwitch arg ids
 127     CmmJump arg params    -> genJump arg
 128
 129 -- -----------------------------------------------------------------------------
 130 -- General things for putting together code sequences
 131
 132 -- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
 133 -- CmmExprs into CmmRegOff?
 134 mangleIndexTree :: CmmExpr -> CmmExpr
 135 mangleIndexTree (CmmRegOff reg off)
 136   = CmmMachOp (MO_Add rep) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) rep)]
 137   where rep = cmmRegRep reg
 138
 139 -- -----------------------------------------------------------------------------
 140 --  Code gen for 64-bit arithmetic on 32-bit platforms
 141
 142 {-
 143 Simple support for generating 64-bit code (ie, 64 bit values and 64
 144 bit assignments) on 32-bit platforms.  Unlike the main code generator
 145 we merely shoot for generating working code as simply as possible, and
 146 pay little attention to code quality.  Specifically, there is no
 147 attempt to deal cleverly with the fixed-vs-floating register
 148 distinction; all values are generated into (pairs of) floating
 149 registers, even if this would mean some redundant reg-reg moves as a
 150 result.  Only one of the VRegUniques is returned, since it will be
 151 of the VRegUniqueLo form, and the upper-half VReg can be determined
 152 by applying getHiVRegFromLo to it.
 153 -}
 154
 155 data ChildCode64        -- a.k.a "Register64"
 156    = ChildCode64
 157         InstrBlock      -- code
 158         Reg             -- the lower 32-bit temporary which contains the
 159                         -- result; use getHiVRegFromLo to find the other
 160                         -- VRegUnique.  Rules of this simplified insn
 161                         -- selection game are therefore that the returned
 162                         -- Reg may be modified
 163
 164 #if WORD_SIZE_IN_BITS==32
 165 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
 166 assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
 167 #endif
 168
 169 #ifndef x86_64_TARGET_ARCH
 170 iselExpr64        :: CmmExpr -> NatM ChildCode64
 171 #endif
 172
 173 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 174
 175 #if i386_TARGET_ARCH
 176
 177 assignMem_I64Code addrTree valueTree = do
 178   Amode addr addr_code <- getAmode addrTree
 179   ChildCode64 vcode rlo <- iselExpr64 valueTree
 180   let
 181         rhi = getHiVRegFromLo rlo
 182
 183         -- Little-endian store
 184         mov_lo = MOV I32 (OpReg rlo) (OpAddr addr)
 185         mov_hi = MOV I32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
 186   -- in
 187   return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 188
 189
 190 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
 191    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 192    let
 193          r_dst_lo = mkVReg u_dst I32
 194          r_dst_hi = getHiVRegFromLo r_dst_lo
 195          r_src_hi = getHiVRegFromLo r_src_lo
 196          mov_lo = MOV I32 (OpReg r_src_lo) (OpReg r_dst_lo)
 197          mov_hi = MOV I32 (OpReg r_src_hi) (OpReg r_dst_hi)
 198    -- in
 199    return (
 200         vcode `snocOL` mov_lo `snocOL` mov_hi
 201      )
 202
 203 assignReg_I64Code lvalue valueTree
 204    = panic "assignReg_I64Code(i386): invalid lvalue"
 205
 206 ------------
 207
 208 iselExpr64 (CmmLit (CmmInt i _)) = do
 209   (rlo,rhi) <- getNewRegPairNat I32
 210   let
 211         r = fromIntegral (fromIntegral i :: Word32)
 212         q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
 213         code = toOL [
 214                 MOV I32 (OpImm (ImmInteger r)) (OpReg rlo),
 215                 MOV I32 (OpImm (ImmInteger q)) (OpReg rhi)
 216                 ]
 217   -- in
 218   return (ChildCode64 code rlo)
 219
 220 iselExpr64 (CmmLoad addrTree I64) = do
 221    Amode addr addr_code <- getAmode addrTree
 222    (rlo,rhi) <- getNewRegPairNat I32
 223    let
 224         mov_lo = MOV I32 (OpAddr addr) (OpReg rlo)
 225         mov_hi = MOV I32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
 226    -- in
 227    return (
 228             ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
 229                         rlo
 230      )
 231
 232 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64)))
 233    = return (ChildCode64 nilOL (mkVReg vu I32))
 234
 235 -- we handle addition, but rather badly
 236 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
 237    ChildCode64 code1 r1lo <- iselExpr64 e1
 238    (rlo,rhi) <- getNewRegPairNat I32
 239    let
 240         r = fromIntegral (fromIntegral i :: Word32)
 241         q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
 242         r1hi = getHiVRegFromLo r1lo
 243         code =  code1 `appOL`
 244                 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
 245                        ADD I32 (OpImm (ImmInteger r)) (OpReg rlo),
 246                        MOV I32 (OpReg r1hi) (OpReg rhi),
 247                        ADC I32 (OpImm (ImmInteger q)) (OpReg rhi) ]
 248    -- in
 249    return (ChildCode64 code rlo)
 250
 251 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
 252    ChildCode64 code1 r1lo <- iselExpr64 e1
 253    ChildCode64 code2 r2lo <- iselExpr64 e2
 254    (rlo,rhi) <- getNewRegPairNat I32
 255    let
 256         r1hi = getHiVRegFromLo r1lo
 257         r2hi = getHiVRegFromLo r2lo
 258         code =  code1 `appOL`
 259                 code2 `appOL`
 260                 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
 261                        ADD I32 (OpReg r2lo) (OpReg rlo),
 262                        MOV I32 (OpReg r1hi) (OpReg rhi),
 263                        ADC I32 (OpReg r2hi) (OpReg rhi) ]
 264    -- in
 265    return (ChildCode64 code rlo)
 266
 267 iselExpr64 expr
 268    = pprPanic "iselExpr64(i386)" (ppr expr)
 269
 270 #endif /* i386_TARGET_ARCH */
 271
 272 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 273
 274 #if sparc_TARGET_ARCH
 275
 276 assignMem_I64Code addrTree valueTree
 277    = iselExpr64 valueTree               `thenNat` \ (ChildCode64 vcode vrlo) ->
 278      getRegister addrTree               `thenNat` \ register_addr ->
 279      getNewRegNat IntRep                `thenNat` \ t_addr ->
 280      let rlo = VirtualRegI vrlo
 281          rhi = getHiVRegFromLo rlo
 282          code_addr = registerCode register_addr t_addr
 283          reg_addr  = registerName register_addr t_addr
 284          -- Big-endian store
 285          mov_hi = ST W rhi (AddrRegImm reg_addr (ImmInt 0))
 286          mov_lo = ST W rlo (AddrRegImm reg_addr (ImmInt 4))
 287      in
 288          return (vcode `appOL` code_addr `snocOL` mov_hi `snocOL` mov_lo)
 289
 290
 291 assignReg_I64Code (StixTemp (StixVReg u_dst pk)) valueTree
 292    = iselExpr64 valueTree               `thenNat` \ (ChildCode64 vcode vr_src_lo) ->
 293      let
 294          r_dst_lo = mkVReg u_dst IntRep
 295          r_src_lo = VirtualRegI vr_src_lo
 296          r_dst_hi = getHiVRegFromLo r_dst_lo
 297          r_src_hi = getHiVRegFromLo r_src_lo
 298          mov_lo = mkMOV r_src_lo r_dst_lo
 299          mov_hi = mkMOV r_src_hi r_dst_hi
 300          mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
 301      in
 302          return (
 303             vcode `snocOL` mov_hi `snocOL` mov_lo
 304          )
 305 assignReg_I64Code lvalue valueTree
 306    = pprPanic "assignReg_I64Code(sparc): invalid lvalue"
 307               (pprStixReg lvalue)
 308
 309
 310 -- Don't delete this -- it's very handy for debugging.
 311 --iselExpr64 expr
 312 --   | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
 313 --   = panic "iselExpr64(???)"
 314
 315 iselExpr64 (CmmLoad I64 addrTree)
 316    = getRegister addrTree               `thenNat` \ register_addr ->
 317      getNewRegNat IntRep                `thenNat` \ t_addr ->
 318      getNewRegNat IntRep                `thenNat` \ rlo ->
 319      let rhi = getHiVRegFromLo rlo
 320          code_addr = registerCode register_addr t_addr
 321          reg_addr  = registerName register_addr t_addr
 322          mov_hi = LD W (AddrRegImm reg_addr (ImmInt 0)) rhi
 323          mov_lo = LD W (AddrRegImm reg_addr (ImmInt 4)) rlo
 324      in
 325          return (
 326             ChildCode64 (code_addr `snocOL` mov_hi `snocOL` mov_lo)
 327                         (getVRegUnique rlo)
 328          )
 329
 330 iselExpr64 (CmmReg (CmmLocal (LocalReg uq I64)))
 331    = getNewRegNat IntRep                `thenNat` \ r_dst_lo ->
 332      let r_dst_hi = getHiVRegFromLo r_dst_lo
 333          r_src_lo = mkVReg vu IntRep
 334          r_src_hi = getHiVRegFromLo r_src_lo
 335          mov_lo = mkMOV r_src_lo r_dst_lo
 336          mov_hi = mkMOV r_src_hi r_dst_hi
 337          mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
 338      in
 339          return (
 340             ChildCode64 (toOL [mov_hi, mov_lo]) (getVRegUnique r_dst_lo)
 341          )
 342
 343 iselExpr64 (StCall fn cconv I64 args)
 344   = genCCall fn cconv kind args                 `thenNat` \ call ->
 345     getNewRegNat IntRep                         `thenNat` \ r_dst_lo ->
 346     let r_dst_hi = getHiVRegFromLo r_dst_lo
 347         mov_lo = mkMOV o0 r_dst_lo
 348         mov_hi = mkMOV o1 r_dst_hi
 349         mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
 350     in
 351     return (
 352        ChildCode64 (call `snocOL` mov_hi `snocOL` mov_lo)
 353                    (getVRegUnique r_dst_lo)
 354     )
 355
 356 iselExpr64 expr
 357    = pprPanic "iselExpr64(sparc)" (pprCmmExpr expr)
 358
 359 #endif /* sparc_TARGET_ARCH */
 360
 361 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 362
 363 #if powerpc_TARGET_ARCH
 364
 365 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
 366 getI64Amodes addrTree = do
 367     Amode hi_addr addr_code <- getAmode addrTree
 368     case addrOffset hi_addr 4 of
 369         Just lo_addr -> return (hi_addr, lo_addr, addr_code)
 370         Nothing      -> do (hi_ptr, code) <- getSomeReg addrTree
 371                            return (AddrRegImm hi_ptr (ImmInt 0),
 372                                    AddrRegImm hi_ptr (ImmInt 4),
 373                                    code)
 374
 375 assignMem_I64Code addrTree valueTree = do
 376         (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
 377         ChildCode64 vcode rlo <- iselExpr64 valueTree
 378         let
 379                 rhi = getHiVRegFromLo rlo
 380
 381                 -- Big-endian store
 382                 mov_hi = ST I32 rhi hi_addr
 383                 mov_lo = ST I32 rlo lo_addr
 384         -- in
 385         return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 386
 387 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
 388    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 389    let
 390          r_dst_lo = mkVReg u_dst I32
 391          r_dst_hi = getHiVRegFromLo r_dst_lo
 392          r_src_hi = getHiVRegFromLo r_src_lo
 393          mov_lo = MR r_dst_lo r_src_lo
 394          mov_hi = MR r_dst_hi r_src_hi
 395    -- in
 396    return (
 397         vcode `snocOL` mov_lo `snocOL` mov_hi
 398      )
 399
 400 assignReg_I64Code lvalue valueTree
 401    = panic "assignReg_I64Code(powerpc): invalid lvalue"
 402
 403
 404 -- Don't delete this -- it's very handy for debugging.
 405 --iselExpr64 expr
 406 --   | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
 407 --   = panic "iselExpr64(???)"
 408
 409 iselExpr64 (CmmLoad addrTree I64) = do
 410     (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
 411     (rlo, rhi) <- getNewRegPairNat I32
 412     let mov_hi = LD I32 rhi hi_addr
 413         mov_lo = LD I32 rlo lo_addr
 414     return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
 415                          rlo
 416
 417 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64)))
 418    = return (ChildCode64 nilOL (mkVReg vu I32))
 419
 420 iselExpr64 (CmmLit (CmmInt i _)) = do
 421   (rlo,rhi) <- getNewRegPairNat I32
 422   let
 423         half0 = fromIntegral (fromIntegral i :: Word16)
 424         half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
 425         half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
 426         half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
 427
 428         code = toOL [
 429                 LIS rlo (ImmInt half1),
 430                 OR rlo rlo (RIImm $ ImmInt half0),
 431                 LIS rhi (ImmInt half3),
 432                 OR rlo rlo (RIImm $ ImmInt half2)
 433                 ]
 434   -- in
 435   return (ChildCode64 code rlo)
 436
 437 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
 438    ChildCode64 code1 r1lo <- iselExpr64 e1
 439    ChildCode64 code2 r2lo <- iselExpr64 e2
 440    (rlo,rhi) <- getNewRegPairNat I32
 441    let
 442         r1hi = getHiVRegFromLo r1lo
 443         r2hi = getHiVRegFromLo r2lo
 444         code =  code1 `appOL`
 445                 code2 `appOL`
 446                 toOL [ ADDC rlo r1lo r2lo,
 447                        ADDE rhi r1hi r2hi ]
 448    -- in
 449    return (ChildCode64 code rlo)
 450
 451 iselExpr64 expr
 452    = pprPanic "iselExpr64(powerpc)" (ppr expr)
 453
 454 #endif /* powerpc_TARGET_ARCH */
 455
 456
 457 -- -----------------------------------------------------------------------------
 458 -- The 'Register' type
 459
 460 -- 'Register's passed up the tree.  If the stix code forces the register
 461 -- to live in a pre-decided machine register, it comes out as @Fixed@;
 462 -- otherwise, it comes out as @Any@, and the parent can decide which
 463 -- register to put it in.
 464
 465 data Register
 466   = Fixed   MachRep Reg InstrBlock
 467   | Any     MachRep (Reg -> InstrBlock)
 468
 469 swizzleRegisterRep :: Register -> MachRep -> Register
 470 swizzleRegisterRep (Fixed _ reg code) rep = Fixed rep reg code
 471 swizzleRegisterRep (Any _ codefn)     rep = Any rep codefn
 472
 473
 474 -- -----------------------------------------------------------------------------
 475 -- Utils based on getRegister, below
 476
 477 -- The dual to getAnyReg: compute an expression into a register, but
 478 -- we don't mind which one it is.
 479 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
 480 getSomeReg expr = do
 481   r <- getRegister expr
 482   case r of
 483     Any rep code -> do
 484         tmp <- getNewRegNat rep
 485         return (tmp, code tmp)
 486     Fixed _ reg code ->
 487         return (reg, code)
 488
 489 -- -----------------------------------------------------------------------------
 490 -- Grab the Reg for a CmmReg
 491
 492 getRegisterReg :: CmmReg -> Reg
 493
 494 getRegisterReg (CmmLocal (LocalReg u pk))
 495   = mkVReg u pk
 496
 497 getRegisterReg (CmmGlobal mid)
 498   = case get_GlobalReg_reg_or_addr mid of
 499        Left (RealReg rrno) -> RealReg rrno
 500        _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
 501           -- By this stage, the only MagicIds remaining should be the
 502           -- ones which map to a real machine register on this
 503           -- platform.  Hence ...
 504
 505
 506 -- -----------------------------------------------------------------------------
 507 -- Generate code to get a subtree into a Register
 508
 509 -- Don't delete this -- it's very handy for debugging.
 510 --getRegister expr
 511 --   | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False
 512 --   = panic "getRegister(???)"
 513
 514 getRegister :: CmmExpr -> NatM Register
 515
 516 getRegister (CmmReg (CmmGlobal PicBaseReg))
 517   = do
 518       reg <- getPicBaseNat wordRep
 519       return (Fixed wordRep reg nilOL)
 520
 521 getRegister (CmmReg reg)
 522   = return (Fixed (cmmRegRep reg) (getRegisterReg reg) nilOL)
 523
 524 getRegister tree@(CmmRegOff _ _)
 525   = getRegister (mangleIndexTree tree)
 526
 527 -- end of machine-"independent" bit; here we go on the rest...
 528
 529 #if alpha_TARGET_ARCH
 530
 531 getRegister (StDouble d)
 532   = getBlockIdNat                   `thenNat` \ lbl ->
 533     getNewRegNat PtrRep             `thenNat` \ tmp ->
 534     let code dst = mkSeqInstrs [
 535             LDATA RoDataSegment lbl [
 536                     DATA TF [ImmLab (rational d)]
 537                 ],
 538             LDA tmp (AddrImm (ImmCLbl lbl)),
 539             LD TF dst (AddrReg tmp)]
 540     in
 541         return (Any F64 code)
 542
 543 getRegister (StPrim primop [x]) -- unary PrimOps
 544   = case primop of
 545       IntNegOp -> trivialUCode (NEG Q False) x
 546
 547       NotOp    -> trivialUCode NOT x
 548
 549       FloatNegOp  -> trivialUFCode FloatRep  (FNEG TF) x
 550       DoubleNegOp -> trivialUFCode F64 (FNEG TF) x
 551
 552       OrdOp -> coerceIntCode IntRep x
 553       ChrOp -> chrCode x
 554
 555       Float2IntOp  -> coerceFP2Int    x
 556       Int2FloatOp  -> coerceInt2FP pr x
 557       Double2IntOp -> coerceFP2Int    x
 558       Int2DoubleOp -> coerceInt2FP pr x
 559
 560       Double2FloatOp -> coerceFltCode x
 561       Float2DoubleOp -> coerceFltCode x
 562
 563       other_op -> getRegister (StCall fn CCallConv F64 [x])
 564         where
 565           fn = case other_op of
 566                  FloatExpOp    -> FSLIT("exp")
 567                  FloatLogOp    -> FSLIT("log")
 568                  FloatSqrtOp   -> FSLIT("sqrt")
 569                  FloatSinOp    -> FSLIT("sin")
 570                  FloatCosOp    -> FSLIT("cos")
 571                  FloatTanOp    -> FSLIT("tan")
 572                  FloatAsinOp   -> FSLIT("asin")
 573                  FloatAcosOp   -> FSLIT("acos")
 574                  FloatAtanOp   -> FSLIT("atan")
 575                  FloatSinhOp   -> FSLIT("sinh")
 576                  FloatCoshOp   -> FSLIT("cosh")
 577                  FloatTanhOp   -> FSLIT("tanh")
 578                  DoubleExpOp   -> FSLIT("exp")
 579                  DoubleLogOp   -> FSLIT("log")
 580                  DoubleSqrtOp  -> FSLIT("sqrt")
 581                  DoubleSinOp   -> FSLIT("sin")
 582                  DoubleCosOp   -> FSLIT("cos")
 583                  DoubleTanOp   -> FSLIT("tan")
 584                  DoubleAsinOp  -> FSLIT("asin")
 585                  DoubleAcosOp  -> FSLIT("acos")
 586                  DoubleAtanOp  -> FSLIT("atan")
 587                  DoubleSinhOp  -> FSLIT("sinh")
 588                  DoubleCoshOp  -> FSLIT("cosh")
 589                  DoubleTanhOp  -> FSLIT("tanh")
 590   where
 591     pr = panic "MachCode.getRegister: no primrep needed for Alpha"
 592
 593 getRegister (StPrim primop [x, y]) -- dyadic PrimOps
 594   = case primop of
 595       CharGtOp -> trivialCode (CMP LTT) y x
 596       CharGeOp -> trivialCode (CMP LE) y x
 597       CharEqOp -> trivialCode (CMP EQQ) x y
 598       CharNeOp -> int_NE_code x y
 599       CharLtOp -> trivialCode (CMP LTT) x y
 600       CharLeOp -> trivialCode (CMP LE) x y
 601
 602       IntGtOp  -> trivialCode (CMP LTT) y x
 603       IntGeOp  -> trivialCode (CMP LE) y x
 604       IntEqOp  -> trivialCode (CMP EQQ) x y
 605       IntNeOp  -> int_NE_code x y
 606       IntLtOp  -> trivialCode (CMP LTT) x y
 607       IntLeOp  -> trivialCode (CMP LE) x y
 608
 609       WordGtOp -> trivialCode (CMP ULT) y x
 610       WordGeOp -> trivialCode (CMP ULE) x y
 611       WordEqOp -> trivialCode (CMP EQQ)  x y
 612       WordNeOp -> int_NE_code x y
 613       WordLtOp -> trivialCode (CMP ULT) x y
 614       WordLeOp -> trivialCode (CMP ULE) x y
 615
 616       AddrGtOp -> trivialCode (CMP ULT) y x
 617       AddrGeOp -> trivialCode (CMP ULE) y x
 618       AddrEqOp -> trivialCode (CMP EQQ)  x y
 619       AddrNeOp -> int_NE_code x y
 620       AddrLtOp -> trivialCode (CMP ULT) x y
 621       AddrLeOp -> trivialCode (CMP ULE) x y
 622
 623       FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
 624       FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
 625       FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
 626       FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
 627       FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
 628       FloatLeOp -> cmpF_code (FCMP TF LE) NE x y
 629
 630       DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
 631       DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
 632       DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
 633       DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
 634       DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
 635       DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y
 636
 637       IntAddOp  -> trivialCode (ADD Q False) x y
 638       IntSubOp  -> trivialCode (SUB Q False) x y
 639       IntMulOp  -> trivialCode (MUL Q False) x y
 640       IntQuotOp -> trivialCode (DIV Q False) x y
 641       IntRemOp  -> trivialCode (REM Q False) x y
 642
 643       WordAddOp  -> trivialCode (ADD Q False) x y
 644       WordSubOp  -> trivialCode (SUB Q False) x y
 645       WordMulOp  -> trivialCode (MUL Q False) x y
 646       WordQuotOp -> trivialCode (DIV Q True) x y
 647       WordRemOp  -> trivialCode (REM Q True) x y
 648
 649       FloatAddOp -> trivialFCode  FloatRep (FADD TF) x y
 650       FloatSubOp -> trivialFCode  FloatRep (FSUB TF) x y
 651       FloatMulOp -> trivialFCode  FloatRep (FMUL TF) x y
 652       FloatDivOp -> trivialFCode  FloatRep (FDIV TF) x y
 653
 654       DoubleAddOp -> trivialFCode  F64 (FADD TF) x y
 655       DoubleSubOp -> trivialFCode  F64 (FSUB TF) x y
 656       DoubleMulOp -> trivialFCode  F64 (FMUL TF) x y
 657       DoubleDivOp -> trivialFCode  F64 (FDIV TF) x y
 658
 659       AddrAddOp  -> trivialCode (ADD Q False) x y
 660       AddrSubOp  -> trivialCode (SUB Q False) x y
 661       AddrRemOp  -> trivialCode (REM Q True) x y
 662
 663       AndOp  -> trivialCode AND x y
 664       OrOp   -> trivialCode OR  x y
 665       XorOp  -> trivialCode XOR x y
 666       SllOp  -> trivialCode SLL x y
 667       SrlOp  -> trivialCode SRL x y
 668
 669       ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
 670       ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
 671       ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"
 672
 673       FloatPowerOp  -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
 674       DoublePowerOp -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
 675   where
 676     {- ------------------------------------------------------------
 677         Some bizarre special code for getting condition codes into
 678         registers.  Integer non-equality is a test for equality
 679         followed by an XOR with 1.  (Integer comparisons always set
 680         the result register to 0 or 1.)  Floating point comparisons of
 681         any kind leave the result in a floating point register, so we
 682         need to wrangle an integer register out of things.
 683     -}
 684     int_NE_code :: StixTree -> StixTree -> NatM Register
 685
 686     int_NE_code x y
 687       = trivialCode (CMP EQQ) x y       `thenNat` \ register ->
 688         getNewRegNat IntRep             `thenNat` \ tmp ->
 689         let
 690             code = registerCode register tmp
 691             src  = registerName register tmp
 692             code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
 693         in
 694         return (Any IntRep code__2)
 695
 696     {- ------------------------------------------------------------
 697         Comments for int_NE_code also apply to cmpF_code
 698     -}
 699     cmpF_code
 700         :: (Reg -> Reg -> Reg -> Instr)
 701         -> Cond
 702         -> StixTree -> StixTree
 703         -> NatM Register
 704
 705     cmpF_code instr cond x y
 706       = trivialFCode pr instr x y       `thenNat` \ register ->
 707         getNewRegNat F64                `thenNat` \ tmp ->
 708         getBlockIdNat                   `thenNat` \ lbl ->
 709         let
 710             code = registerCode register tmp
 711             result  = registerName register tmp
 712
 713             code__2 dst = code . mkSeqInstrs [
 714                 OR zeroh (RIImm (ImmInt 1)) dst,
 715                 BF cond  result (ImmCLbl lbl),
 716                 OR zeroh (RIReg zeroh) dst,
 717                 NEWBLOCK lbl]
 718         in
 719         return (Any IntRep code__2)
 720       where
 721         pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
 722       ------------------------------------------------------------
 723
 724 getRegister (CmmLoad pk mem)
 725   = getAmode mem                    `thenNat` \ amode ->
 726     let
 727         code = amodeCode amode
 728         src   = amodeAddr amode
 729         size = primRepToSize pk
 730         code__2 dst = code . mkSeqInstr (LD size dst src)
 731     in
 732     return (Any pk code__2)
 733
 734 getRegister (StInt i)
 735   | fits8Bits i
 736   = let
 737         code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
 738     in
 739     return (Any IntRep code)
 740   | otherwise
 741   = let
 742         code dst = mkSeqInstr (LDI Q dst src)
 743     in
 744     return (Any IntRep code)
 745   where
 746     src = ImmInt (fromInteger i)
 747
 748 getRegister leaf
 749   | isJust imm
 750   = let
 751         code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
 752     in
 753     return (Any PtrRep code)
 754   where
 755     imm = maybeImm leaf
 756     imm__2 = case imm of Just x -> x
 757
 758 #endif /* alpha_TARGET_ARCH */
 759
 760 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 761
 762 #if i386_TARGET_ARCH
 763
 764 getRegister (CmmLit (CmmFloat f F32)) = do
 765     lbl <- getNewLabelNat
 766     let code dst = toOL [
 767             LDATA ReadOnlyData
 768                         [CmmDataLabel lbl,
 769                          CmmStaticLit (CmmFloat f F32)],
 770             GLD F32 (ImmAddr (ImmCLbl lbl) 0) dst
 771             ]
 772     -- in
 773     return (Any F32 code)
 774
 775
 776 getRegister (CmmLit (CmmFloat d F64))
 777   | d == 0.0
 778   = let code dst = unitOL (GLDZ dst)
 779     in  return (Any F64 code)
 780
 781   | d == 1.0
 782   = let code dst = unitOL (GLD1 dst)
 783     in  return (Any F64 code)
 784
 785   | otherwise = do
 786     lbl <- getNewLabelNat
 787     let code dst = toOL [
 788             LDATA ReadOnlyData
 789                         [CmmDataLabel lbl,
 790                          CmmStaticLit (CmmFloat d F64)],
 791             GLD F64 (ImmAddr (ImmCLbl lbl) 0) dst
 792             ]
 793     -- in
 794     return (Any F64 code)
 795
 796 #endif /* i386_TARGET_ARCH */
 797
 798 #if x86_64_TARGET_ARCH
 799
 800 getRegister (CmmLit (CmmFloat 0.0 rep)) = do
 801    let code dst = unitOL  (XOR rep (OpReg dst) (OpReg dst))
 802         -- I don't know why there are xorpd, xorps, and pxor instructions.
 803         -- They all appear to do the same thing --SDM
 804    return (Any rep code)
 805
 806 getRegister (CmmLit (CmmFloat f rep)) = do
 807     lbl <- getNewLabelNat
 808     let code dst = toOL [
 809             LDATA ReadOnlyData
 810                         [CmmDataLabel lbl,
 811                          CmmStaticLit (CmmFloat f rep)],
 812             MOV rep (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 813             ]
 814     -- in
 815     return (Any rep code)
 816
 817 #endif /* x86_64_TARGET_ARCH */
 818
 819 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
 820
 821 -- catch simple cases of zero- or sign-extended load
 822 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad addr _]) = do
 823   code <- intLoadCode (MOVZxL I8) addr
 824   return (Any I32 code)
 825
 826 getRegister (CmmMachOp (MO_S_Conv I8 I32) [CmmLoad addr _]) = do
 827   code <- intLoadCode (MOVSxL I8) addr
 828   return (Any I32 code)
 829
 830 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad addr _]) = do
 831   code <- intLoadCode (MOVZxL I16) addr
 832   return (Any I32 code)
 833
 834 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad addr _]) = do
 835   code <- intLoadCode (MOVSxL I16) addr
 836   return (Any I32 code)
 837
 838 #endif
 839
 840 #if x86_64_TARGET_ARCH
 841
 842 -- catch simple cases of zero- or sign-extended load
 843 getRegister (CmmMachOp (MO_U_Conv I8 I64) [CmmLoad addr _]) = do
 844   code <- intLoadCode (MOVZxL I8) addr
 845   return (Any I64 code)
 846
 847 getRegister (CmmMachOp (MO_S_Conv I8 I64) [CmmLoad addr _]) = do
 848   code <- intLoadCode (MOVSxL I8) addr
 849   return (Any I64 code)
 850
 851 getRegister (CmmMachOp (MO_U_Conv I16 I64) [CmmLoad addr _]) = do
 852   code <- intLoadCode (MOVZxL I16) addr
 853   return (Any I64 code)
 854
 855 getRegister (CmmMachOp (MO_S_Conv I16 I64) [CmmLoad addr _]) = do
 856   code <- intLoadCode (MOVSxL I16) addr
 857   return (Any I64 code)
 858
 859 getRegister (CmmMachOp (MO_U_Conv I32 I64) [CmmLoad addr _]) = do
 860   code <- intLoadCode (MOV I32) addr -- 32-bit loads zero-extend
 861   return (Any I64 code)
 862
 863 getRegister (CmmMachOp (MO_S_Conv I32 I64) [CmmLoad addr _]) = do
 864   code <- intLoadCode (MOVSxL I32) addr
 865   return (Any I64 code)
 866
 867 #endif
 868
 869 #if x86_64_TARGET_ARCH
 870 getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do
 871   x_code <- getAnyReg x
 872   lbl <- getNewLabelNat
 873   let
 874     code dst = x_code dst `appOL` toOL [
 875         -- This is how gcc does it, so it can't be that bad:
 876         LDATA ReadOnlyData16 [
 877                 CmmAlign 16,
 878                 CmmDataLabel lbl,
 879                 CmmStaticLit (CmmInt 0x80000000 I32),
 880                 CmmStaticLit (CmmInt 0 I32),
 881                 CmmStaticLit (CmmInt 0 I32),
 882                 CmmStaticLit (CmmInt 0 I32)
 883         ],
 884         XOR F32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 885                 -- xorps, so we need the 128-bit constant
 886                 -- ToDo: rip-relative
 887         ]
 888   --
 889   return (Any F32 code)
 890
 891 getRegister (CmmMachOp (MO_S_Neg F64) [x]) = do
 892   x_code <- getAnyReg x
 893   lbl <- getNewLabelNat
 894   let
 895         -- This is how gcc does it, so it can't be that bad:
 896     code dst = x_code dst `appOL` toOL [
 897         LDATA ReadOnlyData16 [
 898                 CmmAlign 16,
 899                 CmmDataLabel lbl,
 900                 CmmStaticLit (CmmInt 0x8000000000000000 I64),
 901                 CmmStaticLit (CmmInt 0 I64)
 902         ],
 903                 -- gcc puts an unpck here.  Wonder if we need it.
 904         XOR F64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 905                 -- xorpd, so we need the 128-bit constant
 906         ]
 907   --
 908   return (Any F64 code)
 909 #endif
 910
 911 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
 912
 913 getRegister (CmmMachOp mop [x]) -- unary MachOps
 914   = case mop of
 915 #if i386_TARGET_ARCH
 916       MO_S_Neg F32 -> trivialUFCode F32 (GNEG F32) x
 917       MO_S_Neg F64 -> trivialUFCode F64 (GNEG F64) x
 918 #endif
 919
 920       MO_S_Neg rep -> trivialUCode rep (NEGI rep) x
 921       MO_Not rep   -> trivialUCode rep (NOT  rep) x
 922
 923       -- Nop conversions
 924       -- TODO: these are only nops if the arg is not a fixed register that
 925       -- can't be byte-addressed.
 926       MO_U_Conv I32 I8  -> conversionNop I32 x
 927       MO_S_Conv I32 I8  -> conversionNop I32 x
 928       MO_U_Conv I16 I8  -> conversionNop I16 x
 929       MO_S_Conv I16 I8  -> conversionNop I16 x
 930       MO_U_Conv I32 I16 -> conversionNop I32 x
 931       MO_S_Conv I32 I16 -> conversionNop I32 x
 932 #if x86_64_TARGET_ARCH
 933       MO_U_Conv I64 I32 -> conversionNop I64 x
 934       MO_S_Conv I64 I32 -> conversionNop I64 x
 935       MO_U_Conv I64 I16 -> conversionNop I64 x
 936       MO_S_Conv I64 I16 -> conversionNop I64 x
 937       MO_U_Conv I64 I8  -> conversionNop I64 x
 938       MO_S_Conv I64 I8  -> conversionNop I64 x
 939 #endif
 940
 941       MO_U_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
 942       MO_S_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
 943
 944       -- widenings
 945       MO_U_Conv I8  I32 -> integerExtend I8  I32 MOVZxL x
 946       MO_U_Conv I16 I32 -> integerExtend I16 I32 MOVZxL x
 947       MO_U_Conv I8  I16 -> integerExtend I8  I16 MOVZxL x
 948
 949       MO_S_Conv I8  I32 -> integerExtend I8  I32 MOVSxL x
 950       MO_S_Conv I16 I32 -> integerExtend I16 I32 MOVSxL x
 951       MO_S_Conv I8  I16 -> integerExtend I8  I16 MOVSxL x
 952
 953 #if x86_64_TARGET_ARCH
 954       MO_U_Conv I8  I64 -> integerExtend I8  I64 MOVZxL x
 955       MO_U_Conv I16 I64 -> integerExtend I16 I64 MOVZxL x
 956       MO_U_Conv I32 I64 -> integerExtend I32 I64 MOVZxL x
 957       MO_S_Conv I8  I64 -> integerExtend I8  I64 MOVSxL x
 958       MO_S_Conv I16 I64 -> integerExtend I16 I64 MOVSxL x
 959       MO_S_Conv I32 I64 -> integerExtend I32 I64 MOVSxL x
 960         -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
 961         -- However, we don't want the register allocator to throw it
 962         -- away as an unnecessary reg-to-reg move, so we keep it in
 963         -- the form of a movzl and print it as a movl later.
 964 #endif
 965
 966 #if i386_TARGET_ARCH
 967       MO_S_Conv F32 F64 -> conversionNop F64 x
 968       MO_S_Conv F64 F32 -> conversionNop F32 x
 969 #else
 970       MO_S_Conv F32 F64 -> coerceFP2FP F64 x
 971       MO_S_Conv F64 F32 -> coerceFP2FP F32 x
 972 #endif
 973
 974       MO_S_Conv from to
 975         | isFloatingRep from -> coerceFP2Int from to x
 976         | isFloatingRep to   -> coerceInt2FP from to x
 977
 978       other -> pprPanic "getRegister" (pprMachOp mop)
 979    where
 980         -- signed or unsigned extension.
 981         integerExtend from to instr expr = do
 982             (reg,e_code) <- if from == I8 then getByteReg expr
 983                                           else getSomeReg expr
 984             let
 985                 code dst =
 986                   e_code `snocOL`
 987                   instr from (OpReg reg) (OpReg dst)
 988             return (Any to code)
 989
 990         conversionNop new_rep expr
 991             = do e_code <- getRegister expr
 992                  return (swizzleRegisterRep e_code new_rep)
 993
 994
 995 getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
 996   = ASSERT2(cmmExprRep x /= I8, pprExpr e)
 997     case mop of
 998       MO_Eq F32   -> condFltReg EQQ x y
 999       MO_Ne F32   -> condFltReg NE x y
1000       MO_S_Gt F32 -> condFltReg GTT x y
1001       MO_S_Ge F32 -> condFltReg GE x y
1002       MO_S_Lt F32 -> condFltReg LTT x y
1003       MO_S_Le F32 -> condFltReg LE x y
1004
1005       MO_Eq F64   -> condFltReg EQQ x y
1006       MO_Ne F64   -> condFltReg NE x y
1007       MO_S_Gt F64 -> condFltReg GTT x y
1008       MO_S_Ge F64 -> condFltReg GE x y
1009       MO_S_Lt F64 -> condFltReg LTT x y
1010       MO_S_Le F64 -> condFltReg LE x y
1011
1012       MO_Eq rep   -> condIntReg EQQ x y
1013       MO_Ne rep   -> condIntReg NE x y
1014
1015       MO_S_Gt rep -> condIntReg GTT x y
1016       MO_S_Ge rep -> condIntReg GE x y
1017       MO_S_Lt rep -> condIntReg LTT x y
1018       MO_S_Le rep -> condIntReg LE x y
1019
1020       MO_U_Gt rep -> condIntReg GU  x y
1021       MO_U_Ge rep -> condIntReg GEU x y
1022       MO_U_Lt rep -> condIntReg LU  x y
1023       MO_U_Le rep -> condIntReg LEU x y
1024
1025 #if i386_TARGET_ARCH
1026       MO_Add F32 -> trivialFCode F32 GADD x y
1027       MO_Sub F32 -> trivialFCode F32 GSUB x y
1028
1029       MO_Add F64 -> trivialFCode F64 GADD x y
1030       MO_Sub F64 -> trivialFCode F64 GSUB x y
1031
1032       MO_S_Quot F32 -> trivialFCode F32 GDIV x y
1033       MO_S_Quot F64 -> trivialFCode F64 GDIV x y
1034 #endif
1035
1036 #if x86_64_TARGET_ARCH
1037       MO_Add F32 -> trivialFCode F32 ADD x y
1038       MO_Sub F32 -> trivialFCode F32 SUB x y
1039
1040       MO_Add F64 -> trivialFCode F64 ADD x y
1041       MO_Sub F64 -> trivialFCode F64 SUB x y
1042
1043       MO_S_Quot F32 -> trivialFCode F32 FDIV x y
1044       MO_S_Quot F64 -> trivialFCode F64 FDIV x y
1045 #endif
1046
1047       MO_Add rep -> add_code rep x y
1048       MO_Sub rep -> sub_code rep x y
1049
1050       MO_S_Quot rep -> div_code rep True  True  x y
1051       MO_S_Rem  rep -> div_code rep True  False x y
1052       MO_U_Quot rep -> div_code rep False True  x y
1053       MO_U_Rem  rep -> div_code rep False False x y
1054
1055 #if i386_TARGET_ARCH
1056       MO_Mul   F32 -> trivialFCode F32 GMUL x y
1057       MO_Mul   F64 -> trivialFCode F64 GMUL x y
1058 #endif
1059
1060 #if x86_64_TARGET_ARCH
1061       MO_Mul   F32 -> trivialFCode F32 MUL x y
1062       MO_Mul   F64 -> trivialFCode F64 MUL x y
1063 #endif
1064
1065       MO_Mul   rep -> let op = IMUL rep in
1066                       trivialCode rep op (Just op) x y
1067
1068       MO_S_MulMayOflo rep -> imulMayOflo rep x y
1069
1070       MO_And rep -> let op = AND rep in
1071                     trivialCode rep op (Just op) x y
1072       MO_Or  rep -> let op = OR  rep in
1073                     trivialCode rep op (Just op) x y
1074       MO_Xor rep -> let op = XOR rep in
1075                     trivialCode rep op (Just op) x y
1076
1077         {- Shift ops on x86s have constraints on their source, it
1078            either has to be Imm, CL or 1
1079             => trivialCode is not restrictive enough (sigh.)
1080         -}
1081       MO_Shl rep   -> shift_code rep (SHL rep) x y {-False-}
1082       MO_U_Shr rep -> shift_code rep (SHR rep) x y {-False-}
1083       MO_S_Shr rep -> shift_code rep (SAR rep) x y {-False-}
1084
1085       other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
1086   where
1087     --------------------
1088     imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1089     imulMayOflo rep a b = do
1090          (a_reg, a_code) <- getNonClobberedReg a
1091          b_code <- getAnyReg b
1092          let
1093              shift_amt  = case rep of
1094                            I32 -> 31
1095                            I64 -> 63
1096                            _ -> panic "shift_amt"
1097
1098              code = a_code `appOL` b_code eax `appOL`
1099                         toOL [
1100                            IMUL2 rep (OpReg a_reg),   -- result in %edx:%eax
1101                            SAR rep (OpImm (ImmInt shift_amt)) (OpReg eax),
1102                                 -- sign extend lower part
1103                            SUB rep (OpReg edx) (OpReg eax)
1104                                 -- compare against upper
1105                            -- eax==0 if high part == sign extended low part
1106                         ]
1107          -- in
1108          return (Fixed rep eax code)
1109
1110     --------------------
1111     shift_code :: MachRep
1112                -> (Operand -> Operand -> Instr)
1113                -> CmmExpr
1114                -> CmmExpr
1115                -> NatM Register
1116
1117     {- Case1: shift length as immediate -}
1118     shift_code rep instr x y@(CmmLit lit) = do
1119           x_code <- getAnyReg x
1120           let
1121                code dst
1122                   = x_code dst `snocOL`
1123                     instr (OpImm (litToImm lit)) (OpReg dst)
1124           -- in
1125           return (Any rep code)
1126
1127     {- Case2: shift length is complex (non-immediate) -}
1128     shift_code rep instr x y{-amount-} = do
1129         (x_reg, x_code) <- getNonClobberedReg x
1130         y_code <- getAnyReg y
1131         let
1132            code = x_code `appOL`
1133                   y_code ecx `snocOL`
1134                   instr (OpReg ecx) (OpReg x_reg)
1135         -- in
1136         return (Fixed rep x_reg code)
1137
1138     --------------------
1139     add_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1140     add_code rep x (CmmLit (CmmInt y _)) = add_int rep x y
1141     add_code rep x y = trivialCode rep (ADD rep) (Just (ADD rep)) x y
1142
1143     --------------------
1144     sub_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1145     sub_code rep x (CmmLit (CmmInt y _)) = add_int rep x (-y)
1146     sub_code rep x y = trivialCode rep (SUB rep) Nothing x y
1147
1148     -- our three-operand add instruction:
1149     add_int rep x y = do
1150         (x_reg, x_code) <- getSomeReg x
1151         let
1152             imm = ImmInt (fromInteger y)
1153             code dst
1154                = x_code `snocOL`
1155                  LEA rep
1156                         (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
1157                         (OpReg dst)
1158         --
1159         return (Any rep code)
1160
1161     ----------------------
1162     div_code rep signed quotient x y = do
1163            (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
1164            x_code <- getAnyReg x
1165            let
1166              widen | signed    = CLTD rep
1167                    | otherwise = XOR rep (OpReg edx) (OpReg edx)
1168
1169              instr | signed    = IDIV
1170                    | otherwise = DIV
1171
1172              code = y_code `appOL`
1173                     x_code eax `appOL`
1174                     toOL [widen, instr rep y_op]
1175
1176              result | quotient  = eax
1177                     | otherwise = edx
1178
1179            -- in
1180            return (Fixed rep result code)
1181
1182
1183 getRegister (CmmLoad mem pk)
1184   | isFloatingRep pk
1185   = do
1186     Amode src mem_code <- getAmode mem
1187     let
1188         code dst = mem_code `snocOL`
1189                    IF_ARCH_i386(GLD pk src dst,
1190                                 MOV pk (OpAddr src) (OpReg dst))
1191     --
1192     return (Any pk code)
1193
1194 #if i386_TARGET_ARCH
1195 getRegister (CmmLoad mem pk)
1196   | pk /= I64
1197   = do
1198     code <- intLoadCode (instr pk) mem
1199     return (Any pk code)
1200   where
1201         instr I8  = MOVZxL pk
1202         instr I16 = MOV I16
1203         instr I32 = MOV I32
1204         -- we always zero-extend 8-bit loads, if we
1205         -- can't think of anything better.  This is because
1206         -- we can't guarantee access to an 8-bit variant of every register
1207         -- (esi and edi don't have 8-bit variants), so to make things
1208         -- simpler we do our 8-bit arithmetic with full 32-bit registers.
1209 #endif
1210
1211 #if x86_64_TARGET_ARCH
1212 -- Simpler memory load code on x86_64
1213 getRegister (CmmLoad mem pk)
1214   = do
1215     code <- intLoadCode (MOV pk) mem
1216     return (Any pk code)
1217 #endif
1218
1219 getRegister (CmmLit (CmmInt 0 rep))
1220   = let
1221         -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
1222         adj_rep = case rep of I64 -> I32; _ -> rep
1223         rep1 = IF_ARCH_i386( rep, adj_rep )
1224         code dst
1225            = unitOL (XOR rep1 (OpReg dst) (OpReg dst))
1226     in
1227         return (Any rep code)
1228
1229 #if x86_64_TARGET_ARCH
1230   -- optimisation for loading small literals on x86_64: take advantage
1231   -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
1232   -- instruction forms are shorter.
1233 getRegister (CmmLit lit)
1234   | I64 <- cmmLitRep lit, not (isBigLit lit)
1235   = let
1236         imm = litToImm lit
1237         code dst = unitOL (MOV I32 (OpImm imm) (OpReg dst))
1238     in
1239         return (Any I64 code)
1240   where
1241    isBigLit (CmmInt i I64) = i < 0 || i > 0xffffffff
1242    isBigLit _ = False
1243         -- note1: not the same as is64BitLit, because that checks for
1244         -- signed literals that fit in 32 bits, but we want unsigned
1245         -- literals here.
1246         -- note2: all labels are small, because we're assuming the
1247         -- small memory model (see gcc docs, -mcmodel=small).
1248 #endif
1249
1250 getRegister (CmmLit lit)
1251   = let
1252         rep = cmmLitRep lit
1253         imm = litToImm lit
1254         code dst = unitOL (MOV rep (OpImm imm) (OpReg dst))
1255     in
1256         return (Any rep code)
1257
1258 getRegister other = pprPanic "getRegister(x86)" (ppr other)
1259
1260
1261 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
1262    -> NatM (Reg -> InstrBlock)
1263 intLoadCode instr mem = do
1264   Amode src mem_code <- getAmode mem
1265   return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
1266
1267 -- Compute an expression into *any* register, adding the appropriate
1268 -- move instruction if necessary.
1269 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
1270 getAnyReg expr = do
1271   r <- getRegister expr
1272   anyReg r
1273
1274 anyReg :: Register -> NatM (Reg -> InstrBlock)
1275 anyReg (Any _ code)          = return code
1276 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
1277
1278 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
1279 -- Fixed registers might not be byte-addressable, so we make sure we've
1280 -- got a temporary, inserting an extra reg copy if necessary.
1281 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
1282 #if x86_64_TARGET_ARCH
1283 getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
1284 #else
1285 getByteReg expr = do
1286   r <- getRegister expr
1287   case r of
1288     Any rep code -> do
1289         tmp <- getNewRegNat rep
1290         return (tmp, code tmp)
1291     Fixed rep reg code
1292         | isVirtualReg reg -> return (reg,code)
1293         | otherwise -> do
1294             tmp <- getNewRegNat rep
1295             return (tmp, code `snocOL` reg2reg rep reg tmp)
1296         -- ToDo: could optimise slightly by checking for byte-addressable
1297         -- real registers, but that will happen very rarely if at all.
1298 #endif
1299
1300 -- Another variant: this time we want the result in a register that cannot
1301 -- be modified by code to evaluate an arbitrary expression.
1302 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1303 getNonClobberedReg expr = do
1304   r <- getRegister expr
1305   case r of
1306     Any rep code -> do
1307         tmp <- getNewRegNat rep
1308         return (tmp, code tmp)
1309     Fixed rep reg code
1310         -- only free regs can be clobbered
1311         | RealReg rr <- reg, isFastTrue (freeReg rr) -> do
1312                 tmp <- getNewRegNat rep
1313                 return (tmp, code `snocOL` reg2reg rep reg tmp)
1314         | otherwise ->
1315                 return (reg, code)
1316
1317 reg2reg :: MachRep -> Reg -> Reg -> Instr
1318 reg2reg rep src dst
1319 #if i386_TARGET_ARCH
1320   | isFloatingRep rep = GMOV src dst
1321 #endif
1322   | otherwise         = MOV rep (OpReg src) (OpReg dst)
1323
1324 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1325
1326 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1327
1328 #if sparc_TARGET_ARCH
1329
1330 getRegister (StFloat d)
1331   = getBlockIdNat                   `thenNat` \ lbl ->
1332     getNewRegNat PtrRep             `thenNat` \ tmp ->
1333     let code dst = toOL [
1334             SEGMENT DataSegment,
1335             NEWBLOCK lbl,
1336             DATA F [ImmFloat d],
1337             SEGMENT TextSegment,
1338             SETHI (HI (ImmCLbl lbl)) tmp,
1339             LD F (AddrRegImm tmp (LO (ImmCLbl lbl))) dst]
1340     in
1341         return (Any F32 code)
1342
1343 getRegister (StDouble d)
1344   = getBlockIdNat                   `thenNat` \ lbl ->
1345     getNewRegNat PtrRep             `thenNat` \ tmp ->
1346     let code dst = toOL [
1347             SEGMENT DataSegment,
1348             NEWBLOCK lbl,
1349             DATA DF [ImmDouble d],
1350             SEGMENT TextSegment,
1351             SETHI (HI (ImmCLbl lbl)) tmp,
1352             LD DF (AddrRegImm tmp (LO (ImmCLbl lbl))) dst]
1353     in
1354         return (Any F64 code)
1355
1356
1357 getRegister (CmmMachOp mop [x]) -- unary PrimOps
1358   = case mop of
1359       MO_NatS_Neg      -> trivialUCode (SUB False False g0) x
1360       MO_Nat_Not       -> trivialUCode (XNOR False g0) x
1361       MO_32U_to_8U     -> trivialCode (AND False) x (StInt 255)
1362
1363       MO_F32_Neg       -> trivialUFCode F32 (FNEG F) x
1364       MO_F64_Neg       -> trivialUFCode F64 (FNEG DF) x
1365
1366       MO_F64_to_Flt    -> coerceDbl2Flt x
1367       MO_F32_to_Dbl    -> coerceFlt2Dbl x
1368
1369       MO_F32_to_NatS   -> coerceFP2Int F32 x
1370       MO_NatS_to_Flt   -> coerceInt2FP F32 x
1371       MO_F64_to_NatS   -> coerceFP2Int F64 x
1372       MO_NatS_to_Dbl   -> coerceInt2FP F64 x
1373
1374       -- Conversions which are a nop on sparc
1375       MO_32U_to_NatS   -> conversionNop IntRep   x
1376       MO_32S_to_NatS  -> conversionNop IntRep   x
1377       MO_NatS_to_32U   -> conversionNop WordRep  x
1378       MO_32U_to_NatU   -> conversionNop WordRep  x
1379
1380       MO_NatU_to_NatS -> conversionNop IntRep    x
1381       MO_NatS_to_NatU -> conversionNop WordRep   x
1382       MO_NatP_to_NatU -> conversionNop WordRep   x
1383       MO_NatU_to_NatP -> conversionNop PtrRep    x
1384       MO_NatS_to_NatP -> conversionNop PtrRep    x
1385       MO_NatP_to_NatS -> conversionNop IntRep    x
1386
1387       -- sign-extending widenings
1388       MO_8U_to_32U    -> integerExtend False 24 x
1389       MO_8U_to_NatU   -> integerExtend False 24 x
1390       MO_8S_to_NatS   -> integerExtend True  24 x
1391       MO_16U_to_NatU  -> integerExtend False 16 x
1392       MO_16S_to_NatS  -> integerExtend True  16 x
1393
1394       other_op ->
1395         let fixed_x = if   is_float_op  -- promote to double
1396                       then CmmMachOp MO_F32_to_Dbl [x]
1397                       else x
1398         in
1399         getRegister (StCall (Left fn) CCallConv F64 [fixed_x])
1400     where
1401         integerExtend signed nBits x
1402            = getRegister (
1403                 CmmMachOp (if signed then MO_Nat_Sar else MO_Nat_Shr)
1404                          [CmmMachOp MO_Nat_Shl [x, StInt nBits], StInt nBits]
1405              )
1406         conversionNop new_rep expr
1407             = getRegister expr          `thenNat` \ e_code ->
1408               return (swizzleRegisterRep e_code new_rep)
1409
1410         (is_float_op, fn)
1411           = case mop of
1412               MO_F32_Exp    -> (True,  FSLIT("exp"))
1413               MO_F32_Log    -> (True,  FSLIT("log"))
1414               MO_F32_Sqrt   -> (True,  FSLIT("sqrt"))
1415
1416               MO_F32_Sin    -> (True,  FSLIT("sin"))
1417               MO_F32_Cos    -> (True,  FSLIT("cos"))
1418               MO_F32_Tan    -> (True,  FSLIT("tan"))
1419
1420               MO_F32_Asin   -> (True,  FSLIT("asin"))
1421               MO_F32_Acos   -> (True,  FSLIT("acos"))
1422               MO_F32_Atan   -> (True,  FSLIT("atan"))
1423
1424               MO_F32_Sinh   -> (True,  FSLIT("sinh"))
1425               MO_F32_Cosh   -> (True,  FSLIT("cosh"))
1426               MO_F32_Tanh   -> (True,  FSLIT("tanh"))
1427
1428               MO_F64_Exp    -> (False, FSLIT("exp"))
1429               MO_F64_Log    -> (False, FSLIT("log"))
1430               MO_F64_Sqrt   -> (False, FSLIT("sqrt"))
1431
1432               MO_F64_Sin    -> (False, FSLIT("sin"))
1433               MO_F64_Cos    -> (False, FSLIT("cos"))
1434               MO_F64_Tan    -> (False, FSLIT("tan"))
1435
1436               MO_F64_Asin   -> (False, FSLIT("asin"))
1437               MO_F64_Acos   -> (False, FSLIT("acos"))
1438               MO_F64_Atan   -> (False, FSLIT("atan"))
1439
1440               MO_F64_Sinh   -> (False, FSLIT("sinh"))
1441               MO_F64_Cosh   -> (False, FSLIT("cosh"))
1442               MO_F64_Tanh   -> (False, FSLIT("tanh"))
1443
1444               other -> pprPanic "getRegister(sparc) - binary CmmMachOp (2)"
1445                                 (pprMachOp mop)
1446
1447
1448 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1449   = case mop of
1450       MO_32U_Gt  -> condIntReg GTT x y
1451       MO_32U_Ge  -> condIntReg GE x y
1452       MO_32U_Eq  -> condIntReg EQQ x y
1453       MO_32U_Ne  -> condIntReg NE x y
1454       MO_32U_Lt  -> condIntReg LTT x y
1455       MO_32U_Le  -> condIntReg LE x y
1456
1457       MO_Nat_Eq   -> condIntReg EQQ x y
1458       MO_Nat_Ne   -> condIntReg NE x y
1459
1460       MO_NatS_Gt  -> condIntReg GTT x y
1461       MO_NatS_Ge  -> condIntReg GE x y
1462       MO_NatS_Lt  -> condIntReg LTT x y
1463       MO_NatS_Le  -> condIntReg LE x y
1464
1465       MO_NatU_Gt  -> condIntReg GU  x y
1466       MO_NatU_Ge  -> condIntReg GEU x y
1467       MO_NatU_Lt  -> condIntReg LU  x y
1468       MO_NatU_Le  -> condIntReg LEU x y
1469
1470       MO_F32_Gt -> condFltReg GTT x y
1471       MO_F32_Ge -> condFltReg GE x y
1472       MO_F32_Eq -> condFltReg EQQ x y
1473       MO_F32_Ne -> condFltReg NE x y
1474       MO_F32_Lt -> condFltReg LTT x y
1475       MO_F32_Le -> condFltReg LE x y
1476
1477       MO_F64_Gt -> condFltReg GTT x y
1478       MO_F64_Ge -> condFltReg GE x y
1479       MO_F64_Eq -> condFltReg EQQ x y
1480       MO_F64_Ne -> condFltReg NE x y
1481       MO_F64_Lt -> condFltReg LTT x y
1482       MO_F64_Le -> condFltReg LE x y
1483
1484       MO_Nat_Add -> trivialCode (ADD False False) x y
1485       MO_Nat_Sub -> trivialCode (SUB False False) x y
1486
1487       MO_NatS_Mul  -> trivialCode (SMUL False) x y
1488       MO_NatU_Mul  -> trivialCode (UMUL False) x y
1489       MO_NatS_MulMayOflo -> imulMayOflo x y
1490
1491       -- ToDo: teach about V8+ SPARC div instructions
1492       MO_NatS_Quot -> idiv FSLIT(".div")  x y
1493       MO_NatS_Rem  -> idiv FSLIT(".rem")  x y
1494       MO_NatU_Quot -> idiv FSLIT(".udiv")  x y
1495       MO_NatU_Rem  -> idiv FSLIT(".urem")  x y
1496
1497       MO_F32_Add   -> trivialFCode F32  FADD x y
1498       MO_F32_Sub   -> trivialFCode F32  FSUB x y
1499       MO_F32_Mul   -> trivialFCode F32  FMUL x y
1500       MO_F32_Div   -> trivialFCode F32  FDIV x y
1501
1502       MO_F64_Add   -> trivialFCode F64 FADD x y
1503       MO_F64_Sub   -> trivialFCode F64 FSUB x y
1504       MO_F64_Mul   -> trivialFCode F64 FMUL x y
1505       MO_F64_Div   -> trivialFCode F64 FDIV x y
1506
1507       MO_Nat_And   -> trivialCode (AND False) x y
1508       MO_Nat_Or    -> trivialCode (OR  False) x y
1509       MO_Nat_Xor   -> trivialCode (XOR False) x y
1510
1511       MO_Nat_Shl   -> trivialCode SLL x y
1512       MO_Nat_Shr   -> trivialCode SRL x y
1513       MO_Nat_Sar   -> trivialCode SRA x y
1514
1515       MO_F32_Pwr  -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1516                                          [promote x, promote y])
1517                        where promote x = CmmMachOp MO_F32_to_Dbl [x]
1518       MO_F64_Pwr -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1519                                         [x, y])
1520
1521       other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
1522   where
1523     idiv fn x y = getRegister (StCall (Left fn) CCallConv IntRep [x, y])
1524
1525     --------------------
1526     imulMayOflo :: CmmExpr -> CmmExpr -> NatM Register
1527     imulMayOflo a1 a2
1528        = getNewRegNat IntRep            `thenNat` \ t1 ->
1529          getNewRegNat IntRep            `thenNat` \ t2 ->
1530          getNewRegNat IntRep            `thenNat` \ res_lo ->
1531          getNewRegNat IntRep            `thenNat` \ res_hi ->
1532          getRegister a1                 `thenNat` \ reg1 ->
1533          getRegister a2                 `thenNat` \ reg2 ->
1534          let code1 = registerCode reg1 t1
1535              code2 = registerCode reg2 t2
1536              src1  = registerName reg1 t1
1537              src2  = registerName reg2 t2
1538              code dst = code1 `appOL` code2 `appOL`
1539                         toOL [
1540                            SMUL False src1 (RIReg src2) res_lo,
1541                            RDY res_hi,
1542                            SRA res_lo (RIImm (ImmInt 31)) res_lo,
1543                            SUB False False res_lo (RIReg res_hi) dst
1544                         ]
1545          in
1546             return (Any IntRep code)
1547
1548 getRegister (CmmLoad pk mem) = do
1549     Amode src code <- getAmode mem
1550     let
1551         size = primRepToSize pk
1552         code__2 dst = code `snocOL` LD size src dst
1553     --
1554     return (Any pk code__2)
1555
1556 getRegister (StInt i)
1557   | fits13Bits i
1558   = let
1559         src = ImmInt (fromInteger i)
1560         code dst = unitOL (OR False g0 (RIImm src) dst)
1561     in
1562         return (Any IntRep code)
1563
1564 getRegister leaf
1565   | isJust imm
1566   = let
1567         code dst = toOL [
1568             SETHI (HI imm__2) dst,
1569             OR False dst (RIImm (LO imm__2)) dst]
1570     in
1571         return (Any PtrRep code)
1572   | otherwise
1573   = ncgPrimopMoan "getRegister(sparc)" (pprCmmExpr leaf)
1574   where
1575     imm = maybeImm leaf
1576     imm__2 = case imm of Just x -> x
1577
1578 #endif /* sparc_TARGET_ARCH */
1579
1580 #if powerpc_TARGET_ARCH
1581 getRegister (CmmLoad mem pk)
1582   | pk /= I64
1583   = do
1584         Amode addr addr_code <- getAmode mem
1585         let code dst = ASSERT((regClass dst == RcDouble) == isFloatingRep pk)
1586                        addr_code `snocOL` LD pk dst addr
1587         return (Any pk code)
1588
1589 -- catch simple cases of zero- or sign-extended load
1590 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad mem _]) = do
1591     Amode addr addr_code <- getAmode mem
1592     return (Any I32 (\dst -> addr_code `snocOL` LD I8 dst addr))
1593
1594 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
1595
1596 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad mem _]) = do
1597     Amode addr addr_code <- getAmode mem
1598     return (Any I32 (\dst -> addr_code `snocOL` LD I16 dst addr))
1599
1600 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad mem _]) = do
1601     Amode addr addr_code <- getAmode mem
1602     return (Any I32 (\dst -> addr_code `snocOL` LA I16 dst addr))
1603
1604 getRegister (CmmMachOp mop [x]) -- unary MachOps
1605   = case mop of
1606       MO_Not rep   -> trivialUCode rep NOT x
1607
1608       MO_S_Conv F64 F32 -> trivialUCode F32 FRSP x
1609       MO_S_Conv F32 F64 -> conversionNop F64 x
1610
1611       MO_S_Conv from to
1612         | from == to         -> conversionNop to x
1613         | isFloatingRep from -> coerceFP2Int from to x
1614         | isFloatingRep to   -> coerceInt2FP from to x
1615
1616         -- narrowing is a nop: we treat the high bits as undefined
1617       MO_S_Conv I32 to -> conversionNop to x
1618       MO_S_Conv I16 I8 -> conversionNop I8 x
1619       MO_S_Conv I8 to -> trivialUCode to (EXTS I8) x
1620       MO_S_Conv I16 to -> trivialUCode to (EXTS I16) x
1621
1622       MO_U_Conv from to
1623         | from == to -> conversionNop to x
1624         -- narrowing is a nop: we treat the high bits as undefined
1625       MO_U_Conv I32 to -> conversionNop to x
1626       MO_U_Conv I16 I8 -> conversionNop I8 x
1627       MO_U_Conv I8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 I32))
1628       MO_U_Conv I16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 I32))
1629
1630       MO_S_Neg F32      -> trivialUCode F32 FNEG x
1631       MO_S_Neg F64      -> trivialUCode F64 FNEG x
1632       MO_S_Neg rep      -> trivialUCode rep NEG x
1633
1634     where
1635         conversionNop new_rep expr
1636             = do e_code <- getRegister expr
1637                  return (swizzleRegisterRep e_code new_rep)
1638
1639 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1640   = case mop of
1641       MO_Eq F32 -> condFltReg EQQ x y
1642       MO_Ne F32 -> condFltReg NE  x y
1643
1644       MO_S_Gt F32 -> condFltReg GTT x y
1645       MO_S_Ge F32 -> condFltReg GE  x y
1646       MO_S_Lt F32 -> condFltReg LTT x y
1647       MO_S_Le F32 -> condFltReg LE  x y
1648
1649       MO_Eq F64 -> condFltReg EQQ x y
1650       MO_Ne F64 -> condFltReg NE  x y
1651
1652       MO_S_Gt F64 -> condFltReg GTT x y
1653       MO_S_Ge F64 -> condFltReg GE  x y
1654       MO_S_Lt F64 -> condFltReg LTT x y
1655       MO_S_Le F64 -> condFltReg LE  x y
1656
1657       MO_Eq rep -> condIntReg EQQ  (extendUExpr rep x) (extendUExpr rep y)
1658       MO_Ne rep -> condIntReg NE   (extendUExpr rep x) (extendUExpr rep y)
1659
1660       MO_S_Gt rep -> condIntReg GTT  (extendSExpr rep x) (extendSExpr rep y)
1661       MO_S_Ge rep -> condIntReg GE   (extendSExpr rep x) (extendSExpr rep y)
1662       MO_S_Lt rep -> condIntReg LTT  (extendSExpr rep x) (extendSExpr rep y)
1663       MO_S_Le rep -> condIntReg LE   (extendSExpr rep x) (extendSExpr rep y)
1664
1665       MO_U_Gt rep -> condIntReg GU   (extendUExpr rep x) (extendUExpr rep y)
1666       MO_U_Ge rep -> condIntReg GEU  (extendUExpr rep x) (extendUExpr rep y)
1667       MO_U_Lt rep -> condIntReg LU   (extendUExpr rep x) (extendUExpr rep y)
1668       MO_U_Le rep -> condIntReg LEU  (extendUExpr rep x) (extendUExpr rep y)
1669
1670       MO_Add F32   -> trivialCodeNoImm F32 (FADD F32) x y
1671       MO_Sub F32   -> trivialCodeNoImm F32 (FSUB F32) x y
1672       MO_Mul F32   -> trivialCodeNoImm F32 (FMUL F32) x y
1673       MO_S_Quot F32   -> trivialCodeNoImm F32 (FDIV F32) x y
1674
1675       MO_Add F64   -> trivialCodeNoImm F64 (FADD F64) x y
1676       MO_Sub F64   -> trivialCodeNoImm F64 (FSUB F64) x y
1677       MO_Mul F64   -> trivialCodeNoImm F64 (FMUL F64) x y
1678       MO_S_Quot F64   -> trivialCodeNoImm F64 (FDIV F64) x y
1679
1680          -- optimize addition with 32-bit immediate
1681          -- (needed for PIC)
1682       MO_Add I32 ->
1683         case y of
1684           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate I32 True (-imm)
1685             -> trivialCode I32 True ADD x (CmmLit $ CmmInt imm immrep)
1686           CmmLit lit
1687             -> do
1688                 (src, srcCode) <- getSomeReg x
1689                 let imm = litToImm lit
1690                     code dst = srcCode `appOL` toOL [
1691                                     ADDIS dst src (HA imm),
1692                                     ADD dst dst (RIImm (LO imm))
1693                                 ]
1694                 return (Any I32 code)
1695           _ -> trivialCode I32 True ADD x y
1696
1697       MO_Add rep -> trivialCode rep True ADD x y
1698       MO_Sub rep ->
1699         case y of    -- subfi ('substract from' with immediate) doesn't exist
1700           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
1701             -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
1702           _ -> trivialCodeNoImm rep SUBF y x
1703
1704       MO_Mul rep -> trivialCode rep True MULLW x y
1705
1706       MO_S_MulMayOflo I32 -> trivialCodeNoImm I32 MULLW_MayOflo x y
1707
1708       MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= I32): not implemented"
1709       MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
1710
1711       MO_S_Quot rep -> trivialCodeNoImm rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1712       MO_U_Quot rep -> trivialCodeNoImm rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1713
1714       MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1715       MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1716
1717       MO_And rep   -> trivialCode rep False AND x y
1718       MO_Or rep    -> trivialCode rep False OR x y
1719       MO_Xor rep   -> trivialCode rep False XOR x y
1720
1721       MO_Shl rep   -> trivialCode rep False SLW x y
1722       MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
1723       MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
1724
1725 getRegister (CmmLit (CmmInt i rep))
1726   | Just imm <- makeImmediate rep True i
1727   = let
1728         code dst = unitOL (LI dst imm)
1729     in
1730         return (Any rep code)
1731
1732 getRegister (CmmLit (CmmFloat f frep)) = do
1733     lbl <- getNewLabelNat
1734     dynRef <- cmmMakeDynamicReference addImportNat False lbl
1735     Amode addr addr_code <- getAmode dynRef
1736     let code dst =
1737             LDATA ReadOnlyData  [CmmDataLabel lbl,
1738                                  CmmStaticLit (CmmFloat f frep)]
1739             `consOL` (addr_code `snocOL` LD frep dst addr)
1740     return (Any frep code)
1741
1742 getRegister (CmmLit lit)
1743   = let rep = cmmLitRep lit
1744         imm = litToImm lit
1745         code dst = toOL [
1746               LIS dst (HI imm),
1747               OR dst dst (RIImm (LO imm))
1748           ]
1749     in return (Any rep code)
1750
1751 getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
1752
1753     -- extend?Rep: wrap integer expression of type rep
1754     -- in a conversion to I32
1755 extendSExpr I32 x = x
1756 extendSExpr rep x = CmmMachOp (MO_S_Conv rep I32) [x]
1757 extendUExpr I32 x = x
1758 extendUExpr rep x = CmmMachOp (MO_U_Conv rep I32) [x]
1759
1760 #endif /* powerpc_TARGET_ARCH */
1761
1762
1763 -- -----------------------------------------------------------------------------
1764 --  The 'Amode' type: Memory addressing modes passed up the tree.
1765
1766 data Amode = Amode AddrMode InstrBlock
1767
1768 {-
1769 Now, given a tree (the argument to an CmmLoad) that references memory,
1770 produce a suitable addressing mode.
1771
1772 A Rule of the Game (tm) for Amodes: use of the addr bit must
1773 immediately follow use of the code part, since the code part puts
1774 values in registers which the addr then refers to.  So you can't put
1775 anything in between, lest it overwrite some of those registers.  If
1776 you need to do some other computation between the code part and use of
1777 the addr bit, first store the effective address from the amode in a
1778 temporary, then do the other computation, and then use the temporary:
1779
1780     code
1781     LEA amode, tmp
1782     ... other computation ...
1783     ... (tmp) ...
1784 -}
1785
1786 getAmode :: CmmExpr -> NatM Amode
1787 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
1788
1789 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1790
1791 #if alpha_TARGET_ARCH
1792
1793 getAmode (StPrim IntSubOp [x, StInt i])
1794   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1795     getRegister x               `thenNat` \ register ->
1796     let
1797         code = registerCode register tmp
1798         reg  = registerName register tmp
1799         off  = ImmInt (-(fromInteger i))
1800     in
1801     return (Amode (AddrRegImm reg off) code)
1802
1803 getAmode (StPrim IntAddOp [x, StInt i])
1804   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1805     getRegister x               `thenNat` \ register ->
1806     let
1807         code = registerCode register tmp
1808         reg  = registerName register tmp
1809         off  = ImmInt (fromInteger i)
1810     in
1811     return (Amode (AddrRegImm reg off) code)
1812
1813 getAmode leaf
1814   | isJust imm
1815   = return (Amode (AddrImm imm__2) id)
1816   where
1817     imm = maybeImm leaf
1818     imm__2 = case imm of Just x -> x
1819
1820 getAmode other
1821   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1822     getRegister other           `thenNat` \ register ->
1823     let
1824         code = registerCode register tmp
1825         reg  = registerName register tmp
1826     in
1827     return (Amode (AddrReg reg) code)
1828
1829 #endif /* alpha_TARGET_ARCH */
1830
1831 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1832
1833 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1834
1835 -- This is all just ridiculous, since it carefully undoes
1836 -- what mangleIndexTree has just done.
1837 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
1838   | not (is64BitLit lit)
1839   -- ASSERT(rep == I32)???
1840   = do (x_reg, x_code) <- getSomeReg x
1841        let off = ImmInt (-(fromInteger i))
1842        return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1843
1844 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
1845   | not (is64BitLit lit)
1846   -- ASSERT(rep == I32)???
1847   = do (x_reg, x_code) <- getSomeReg x
1848        let off = ImmInt (fromInteger i)
1849        return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1850
1851 -- Turn (lit1 << n  + lit2) into  (lit2 + lit1 << n) so it will be
1852 -- recognised by the next rule.
1853 getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1854                                   b@(CmmLit _)])
1855   = getAmode (CmmMachOp (MO_Add rep) [b,a])
1856
1857 getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
1858                                         [y, CmmLit (CmmInt shift _)]])
1859   | shift == 0 || shift == 1 || shift == 2 || shift == 3
1860   = do (x_reg, x_code) <- getNonClobberedReg x
1861         -- x must be in a temp, because it has to stay live over y_code
1862         -- we could compre x_reg and y_reg and do something better here...
1863        (y_reg, y_code) <- getSomeReg y
1864        let
1865            code = x_code `appOL` y_code
1866            base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
1867        return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt 0))
1868                code)
1869
1870 getAmode (CmmLit lit) | not (is64BitLit lit)
1871   = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1872
1873 getAmode expr = do
1874   (reg,code) <- getSomeReg expr
1875   return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
1876
1877 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1878
1879 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1880
1881 #if sparc_TARGET_ARCH
1882
1883 getAmode (CmmMachOp MO_Nat_Sub [x, StInt i])
1884   | fits13Bits (-i)
1885   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1886     getRegister x               `thenNat` \ register ->
1887     let
1888         code = registerCode register tmp
1889         reg  = registerName register tmp
1890         off  = ImmInt (-(fromInteger i))
1891     in
1892     return (Amode (AddrRegImm reg off) code)
1893
1894
1895 getAmode (CmmMachOp MO_Nat_Add [x, StInt i])
1896   | fits13Bits i
1897   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1898     getRegister x               `thenNat` \ register ->
1899     let
1900         code = registerCode register tmp
1901         reg  = registerName register tmp
1902         off  = ImmInt (fromInteger i)
1903     in
1904     return (Amode (AddrRegImm reg off) code)
1905
1906 getAmode (CmmMachOp MO_Nat_Add [x, y])
1907   = getNewRegNat PtrRep         `thenNat` \ tmp1 ->
1908     getNewRegNat IntRep         `thenNat` \ tmp2 ->
1909     getRegister x               `thenNat` \ register1 ->
1910     getRegister y               `thenNat` \ register2 ->
1911     let
1912         code1 = registerCode register1 tmp1
1913         reg1  = registerName register1 tmp1
1914         code2 = registerCode register2 tmp2
1915         reg2  = registerName register2 tmp2
1916         code__2 = code1 `appOL` code2
1917     in
1918     return (Amode (AddrRegReg reg1 reg2) code__2)
1919
1920 getAmode leaf
1921   | isJust imm
1922   = getNewRegNat PtrRep             `thenNat` \ tmp ->
1923     let
1924         code = unitOL (SETHI (HI imm__2) tmp)
1925     in
1926     return (Amode (AddrRegImm tmp (LO imm__2)) code)
1927   where
1928     imm    = maybeImm leaf
1929     imm__2 = case imm of Just x -> x
1930
1931 getAmode other
1932   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1933     getRegister other           `thenNat` \ register ->
1934     let
1935         code = registerCode register tmp
1936         reg  = registerName register tmp
1937         off  = ImmInt 0
1938     in
1939     return (Amode (AddrRegImm reg off) code)
1940
1941 #endif /* sparc_TARGET_ARCH */
1942
1943 #ifdef powerpc_TARGET_ARCH
1944 getAmode (CmmMachOp (MO_Sub I32) [x, CmmLit (CmmInt i _)])
1945   | Just off <- makeImmediate I32 True (-i)
1946   = do
1947         (reg, code) <- getSomeReg x
1948         return (Amode (AddrRegImm reg off) code)
1949
1950
1951 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit (CmmInt i _)])
1952   | Just off <- makeImmediate I32 True i
1953   = do
1954         (reg, code) <- getSomeReg x
1955         return (Amode (AddrRegImm reg off) code)
1956
1957    -- optimize addition with 32-bit immediate
1958    -- (needed for PIC)
1959 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit lit])
1960   = do
1961         tmp <- getNewRegNat I32
1962         (src, srcCode) <- getSomeReg x
1963         let imm = litToImm lit
1964             code = srcCode `snocOL` ADDIS tmp src (HA imm)
1965         return (Amode (AddrRegImm tmp (LO imm)) code)
1966
1967 getAmode (CmmLit lit)
1968   = do
1969         tmp <- getNewRegNat I32
1970         let imm = litToImm lit
1971             code = unitOL (LIS tmp (HA imm))
1972         return (Amode (AddrRegImm tmp (LO imm)) code)
1973
1974 getAmode (CmmMachOp (MO_Add I32) [x, y])
1975   = do
1976         (regX, codeX) <- getSomeReg x
1977         (regY, codeY) <- getSomeReg y
1978         return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
1979
1980 getAmode other
1981   = do
1982         (reg, code) <- getSomeReg other
1983         let
1984             off  = ImmInt 0
1985         return (Amode (AddrRegImm reg off) code)
1986 #endif /* powerpc_TARGET_ARCH */
1987
1988 -- -----------------------------------------------------------------------------
1989 -- getOperand: sometimes any operand will do.
1990
1991 -- getNonClobberedOperand: the value of the operand will remain valid across
1992 -- the computation of an arbitrary expression, unless the expression
1993 -- is computed directly into a register which the operand refers to
1994 -- (see trivialCode where this function is used for an example).
1995
1996 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1997
1998 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1999 #if x86_64_TARGET_ARCH
2000 getNonClobberedOperand (CmmLit lit)
2001   | isSuitableFloatingPointLit lit = do
2002     lbl <- getNewLabelNat
2003     let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
2004                                            CmmStaticLit lit])
2005     return (OpAddr (ripRel (ImmCLbl lbl)), code)
2006 #endif
2007 getNonClobberedOperand (CmmLit lit)
2008   | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) =
2009     return (OpImm (litToImm lit), nilOL)
2010 getNonClobberedOperand (CmmLoad mem pk)
2011   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2012     Amode src mem_code <- getAmode mem
2013     (src',save_code) <-
2014         if (amodeCouldBeClobbered src)
2015                 then do
2016                    tmp <- getNewRegNat wordRep
2017                    return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
2018                            unitOL (LEA I32 (OpAddr src) (OpReg tmp)))
2019                 else
2020                    return (src, nilOL)
2021     return (OpAddr src', save_code `appOL` mem_code)
2022 getNonClobberedOperand e = do
2023     (reg, code) <- getNonClobberedReg e
2024     return (OpReg reg, code)
2025
2026 amodeCouldBeClobbered :: AddrMode -> Bool
2027 amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
2028
2029 regClobbered (RealReg rr) = isFastTrue (freeReg rr)
2030 regClobbered _ = False
2031
2032 -- getOperand: the operand is not required to remain valid across the
2033 -- computation of an arbitrary expression.
2034 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2035 #if x86_64_TARGET_ARCH
2036 getOperand (CmmLit lit)
2037   | isSuitableFloatingPointLit lit = do
2038     lbl <- getNewLabelNat
2039     let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
2040                                            CmmStaticLit lit])
2041     return (OpAddr (ripRel (ImmCLbl lbl)), code)
2042 #endif
2043 getOperand (CmmLit lit)
2044   | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) = do
2045     return (OpImm (litToImm lit), nilOL)
2046 getOperand (CmmLoad mem pk)
2047   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2048     Amode src mem_code <- getAmode mem
2049     return (OpAddr src, mem_code)
2050 getOperand e = do
2051     (reg, code) <- getSomeReg e
2052     return (OpReg reg, code)
2053
2054 isOperand :: CmmExpr -> Bool
2055 isOperand (CmmLoad _ _) = True
2056 isOperand (CmmLit lit)  = not (is64BitLit lit)
2057                           || isSuitableFloatingPointLit lit
2058 isOperand _             = False
2059
2060 -- if we want a floating-point literal as an operand, we can
2061 -- use it directly from memory.  However, if the literal is
2062 -- zero, we're better off generating it into a register using
2063 -- xor.
2064 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
2065 isSuitableFloatingPointLit _ = False
2066
2067 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
2068 getRegOrMem (CmmLoad mem pk)
2069   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2070     Amode src mem_code <- getAmode mem
2071     return (OpAddr src, mem_code)
2072 getRegOrMem e = do
2073     (reg, code) <- getNonClobberedReg e
2074     return (OpReg reg, code)
2075
2076 #if x86_64_TARGET_ARCH
2077 is64BitLit (CmmInt i I64) = i > 0x7fffffff || i < -0x80000000
2078    -- assume that labels are in the range 0-2^31-1: this assumes the
2079    -- small memory model (see gcc docs, -mcmodel=small).
2080 #endif
2081 is64BitLit x = False
2082 #endif
2083
2084 -- -----------------------------------------------------------------------------
2085 --  The 'CondCode' type:  Condition codes passed up the tree.
2086
2087 data CondCode = CondCode Bool Cond InstrBlock
2088
2089 -- Set up a condition code for a conditional branch.
2090
2091 getCondCode :: CmmExpr -> NatM CondCode
2092
2093 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2094
2095 #if alpha_TARGET_ARCH
2096 getCondCode = panic "MachCode.getCondCode: not on Alphas"
2097 #endif /* alpha_TARGET_ARCH */
2098
2099 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2100
2101 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
2102 -- yes, they really do seem to want exactly the same!
2103
2104 getCondCode (CmmMachOp mop [x, y])
2105   = ASSERT (cmmExprRep x /= I8) -- tmp, not set up to handle 8-bit comparisons
2106     case mop of
2107       MO_Eq F32 -> condFltCode EQQ x y
2108       MO_Ne F32 -> condFltCode NE  x y
2109
2110       MO_S_Gt F32 -> condFltCode GTT x y
2111       MO_S_Ge F32 -> condFltCode GE  x y
2112       MO_S_Lt F32 -> condFltCode LTT x y
2113       MO_S_Le F32 -> condFltCode LE  x y
2114
2115       MO_Eq F64 -> condFltCode EQQ x y
2116       MO_Ne F64 -> condFltCode NE  x y
2117
2118       MO_S_Gt F64 -> condFltCode GTT x y
2119       MO_S_Ge F64 -> condFltCode GE  x y
2120       MO_S_Lt F64 -> condFltCode LTT x y
2121       MO_S_Le F64 -> condFltCode LE  x y
2122
2123       MO_Eq rep -> condIntCode EQQ  x y
2124       MO_Ne rep -> condIntCode NE   x y
2125
2126       MO_S_Gt rep -> condIntCode GTT  x y
2127       MO_S_Ge rep -> condIntCode GE   x y
2128       MO_S_Lt rep -> condIntCode LTT  x y
2129       MO_S_Le rep -> condIntCode LE   x y
2130
2131       MO_U_Gt rep -> condIntCode GU   x y
2132       MO_U_Ge rep -> condIntCode GEU  x y
2133       MO_U_Lt rep -> condIntCode LU   x y
2134       MO_U_Le rep -> condIntCode LEU  x y
2135
2136       other -> pprPanic "getCondCode(x86,sparc)" (pprMachOp mop)
2137
2138 getCondCode other =  pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
2139
2140 #elif powerpc_TARGET_ARCH
2141
2142 -- almost the same as everywhere else - but we need to
2143 -- extend small integers to 32 bit first
2144
2145 getCondCode (CmmMachOp mop [x, y])
2146   = case mop of
2147       MO_Eq F32 -> condFltCode EQQ x y
2148       MO_Ne F32 -> condFltCode NE  x y
2149
2150       MO_S_Gt F32 -> condFltCode GTT x y
2151       MO_S_Ge F32 -> condFltCode GE  x y
2152       MO_S_Lt F32 -> condFltCode LTT x y
2153       MO_S_Le F32 -> condFltCode LE  x y
2154
2155       MO_Eq F64 -> condFltCode EQQ x y
2156       MO_Ne F64 -> condFltCode NE  x y
2157
2158       MO_S_Gt F64 -> condFltCode GTT x y
2159       MO_S_Ge F64 -> condFltCode GE  x y
2160       MO_S_Lt F64 -> condFltCode LTT x y
2161       MO_S_Le F64 -> condFltCode LE  x y
2162
2163       MO_Eq rep -> condIntCode EQQ  (extendUExpr rep x) (extendUExpr rep y)
2164       MO_Ne rep -> condIntCode NE   (extendUExpr rep x) (extendUExpr rep y)
2165
2166       MO_S_Gt rep -> condIntCode GTT  (extendSExpr rep x) (extendSExpr rep y)
2167       MO_S_Ge rep -> condIntCode GE   (extendSExpr rep x) (extendSExpr rep y)
2168       MO_S_Lt rep -> condIntCode LTT  (extendSExpr rep x) (extendSExpr rep y)
2169       MO_S_Le rep -> condIntCode LE   (extendSExpr rep x) (extendSExpr rep y)
2170
2171       MO_U_Gt rep -> condIntCode GU   (extendUExpr rep x) (extendUExpr rep y)
2172       MO_U_Ge rep -> condIntCode GEU  (extendUExpr rep x) (extendUExpr rep y)
2173       MO_U_Lt rep -> condIntCode LU   (extendUExpr rep x) (extendUExpr rep y)
2174       MO_U_Le rep -> condIntCode LEU  (extendUExpr rep x) (extendUExpr rep y)
2175
2176       other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
2177
2178 getCondCode other =  panic "getCondCode(2)(powerpc)"
2179
2180
2181 #endif
2182
2183
2184 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
2185 -- passed back up the tree.
2186
2187 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
2188
2189 #if alpha_TARGET_ARCH
2190 condIntCode = panic "MachCode.condIntCode: not on Alphas"
2191 condFltCode = panic "MachCode.condFltCode: not on Alphas"
2192 #endif /* alpha_TARGET_ARCH */
2193
2194 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2195 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2196
2197 -- memory vs immediate
2198 condIntCode cond (CmmLoad x pk) (CmmLit lit) | not (is64BitLit lit) = do
2199     Amode x_addr x_code <- getAmode x
2200     let
2201         imm  = litToImm lit
2202         code = x_code `snocOL`
2203                   CMP pk (OpImm imm) (OpAddr x_addr)
2204     --
2205     return (CondCode False cond code)
2206
2207 -- anything vs zero
2208 condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
2209     (x_reg, x_code) <- getSomeReg x
2210     let
2211         code = x_code `snocOL`
2212                   TEST pk (OpReg x_reg) (OpReg x_reg)
2213     --
2214     return (CondCode False cond code)
2215
2216 -- anything vs operand
2217 condIntCode cond x y | isOperand y = do
2218     (x_reg, x_code) <- getNonClobberedReg x
2219     (y_op,  y_code) <- getOperand y
2220     let
2221         code = x_code `appOL` y_code `snocOL`
2222                   CMP (cmmExprRep x) y_op (OpReg x_reg)
2223     -- in
2224     return (CondCode False cond code)
2225
2226 -- anything vs anything
2227 condIntCode cond x y = do
2228   (y_reg, y_code) <- getNonClobberedReg y
2229   (x_op, x_code) <- getRegOrMem x
2230   let
2231         code = y_code `appOL`
2232                x_code `snocOL`
2233                   CMP (cmmExprRep x) (OpReg y_reg) x_op
2234   -- in
2235   return (CondCode False cond code)
2236 #endif
2237
2238 #if i386_TARGET_ARCH
2239 condFltCode cond x y
2240   = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
2241   (x_reg, x_code) <- getNonClobberedReg x
2242   (y_reg, y_code) <- getSomeReg y
2243   let
2244         code = x_code `appOL` y_code `snocOL`
2245                 GCMP cond x_reg y_reg
2246   -- The GCMP insn does the test and sets the zero flag if comparable
2247   -- and true.  Hence we always supply EQQ as the condition to test.
2248   return (CondCode True EQQ code)
2249 #endif /* i386_TARGET_ARCH */
2250
2251 #if x86_64_TARGET_ARCH
2252 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
2253 -- an operand, but the right must be a reg.  We can probably do better
2254 -- than this general case...
2255 condFltCode cond x y = do
2256   (x_reg, x_code) <- getNonClobberedReg x
2257   (y_op, y_code) <- getOperand y
2258   let
2259         code = x_code `appOL`
2260                y_code `snocOL`
2261                   CMP (cmmExprRep x) y_op (OpReg x_reg)
2262         -- NB(1): we need to use the unsigned comparison operators on the
2263         -- result of this comparison.
2264   -- in
2265   return (CondCode True (condToUnsigned cond) code)
2266 #endif
2267
2268 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2269
2270 #if sparc_TARGET_ARCH
2271
2272 condIntCode cond x (StInt y)
2273   | fits13Bits y
2274   = getRegister x               `thenNat` \ register ->
2275     getNewRegNat IntRep         `thenNat` \ tmp ->
2276     let
2277         code = registerCode register tmp
2278         src1 = registerName register tmp
2279         src2 = ImmInt (fromInteger y)
2280         code__2 = code `snocOL` SUB False True src1 (RIImm src2) g0
2281     in
2282     return (CondCode False cond code__2)
2283
2284 condIntCode cond x y
2285   = getRegister x               `thenNat` \ register1 ->
2286     getRegister y               `thenNat` \ register2 ->
2287     getNewRegNat IntRep         `thenNat` \ tmp1 ->
2288     getNewRegNat IntRep         `thenNat` \ tmp2 ->
2289     let
2290         code1 = registerCode register1 tmp1
2291         src1  = registerName register1 tmp1
2292         code2 = registerCode register2 tmp2
2293         src2  = registerName register2 tmp2
2294         code__2 = code1 `appOL` code2 `snocOL`
2295                   SUB False True src1 (RIReg src2) g0
2296     in
2297     return (CondCode False cond code__2)
2298
2299 -----------
2300 condFltCode cond x y
2301   = getRegister x               `thenNat` \ register1 ->
2302     getRegister y               `thenNat` \ register2 ->
2303     getNewRegNat (registerRep register1)
2304                                 `thenNat` \ tmp1 ->
2305     getNewRegNat (registerRep register2)
2306                                 `thenNat` \ tmp2 ->
2307     getNewRegNat F64    `thenNat` \ tmp ->
2308     let
2309         promote x = FxTOy F DF x tmp
2310
2311         pk1   = registerRep register1
2312         code1 = registerCode register1 tmp1
2313         src1  = registerName register1 tmp1
2314
2315         pk2   = registerRep register2
2316         code2 = registerCode register2 tmp2
2317         src2  = registerName register2 tmp2
2318
2319         code__2 =
2320                 if pk1 == pk2 then
2321                     code1 `appOL` code2 `snocOL`
2322                     FCMP True (primRepToSize pk1) src1 src2
2323                 else if pk1 == F32 then
2324                     code1 `snocOL` promote src1 `appOL` code2 `snocOL`
2325                     FCMP True DF tmp src2
2326                 else
2327                     code1 `appOL` code2 `snocOL` promote src2 `snocOL`
2328                     FCMP True DF src1 tmp
2329     in
2330     return (CondCode True cond code__2)
2331
2332 #endif /* sparc_TARGET_ARCH */
2333
2334 #if powerpc_TARGET_ARCH
2335 --  ###FIXME: I16 and I8!
2336 condIntCode cond x (CmmLit (CmmInt y rep))
2337   | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
2338   = do
2339         (src1, code) <- getSomeReg x
2340         let
2341             code' = code `snocOL`
2342                 (if condUnsigned cond then CMPL else CMP) I32 src1 (RIImm src2)
2343         return (CondCode False cond code')
2344
2345 condIntCode cond x y = do
2346     (src1, code1) <- getSomeReg x
2347     (src2, code2) <- getSomeReg y
2348     let
2349         code' = code1 `appOL` code2 `snocOL`
2350                   (if condUnsigned cond then CMPL else CMP) I32 src1 (RIReg src2)
2351     return (CondCode False cond code')
2352
2353 condFltCode cond x y = do
2354     (src1, code1) <- getSomeReg x
2355     (src2, code2) <- getSomeReg y
2356     let
2357         code'  = code1 `appOL` code2 `snocOL` FCMP src1 src2
2358         code'' = case cond of -- twiddle CR to handle unordered case
2359                     GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
2360                     LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
2361                     _ -> code'
2362                  where
2363                     ltbit = 0 ; eqbit = 2 ; gtbit = 1
2364     return (CondCode True cond code'')
2365
2366 #endif /* powerpc_TARGET_ARCH */
2367
2368 -- -----------------------------------------------------------------------------
2369 -- Generating assignments
2370
2371 -- Assignments are really at the heart of the whole code generation
2372 -- business.  Almost all top-level nodes of any real importance are
2373 -- assignments, which correspond to loads, stores, or register
2374 -- transfers.  If we're really lucky, some of the register transfers
2375 -- will go away, because we can use the destination register to
2376 -- complete the code generation for the right hand side.  This only
2377 -- fails when the right hand side is forced into a fixed register
2378 -- (e.g. the result of a call).
2379
2380 assignMem_IntCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2381 assignReg_IntCode :: MachRep -> CmmReg  -> CmmExpr -> NatM InstrBlock
2382
2383 assignMem_FltCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2384 assignReg_FltCode :: MachRep -> CmmReg  -> CmmExpr -> NatM InstrBlock
2385
2386 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2387
2388 #if alpha_TARGET_ARCH
2389
2390 assignIntCode pk (CmmLoad dst _) src
2391   = getNewRegNat IntRep             `thenNat` \ tmp ->
2392     getAmode dst                    `thenNat` \ amode ->
2393     getRegister src                 `thenNat` \ register ->
2394     let
2395         code1   = amodeCode amode []
2396         dst__2  = amodeAddr amode
2397         code2   = registerCode register tmp []
2398         src__2  = registerName register tmp
2399         sz      = primRepToSize pk
2400         code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2401     in
2402     return code__2
2403
2404 assignIntCode pk dst src
2405   = getRegister dst                         `thenNat` \ register1 ->
2406     getRegister src                         `thenNat` \ register2 ->
2407     let
2408         dst__2  = registerName register1 zeroh
2409         code    = registerCode register2 dst__2
2410         src__2  = registerName register2 dst__2
2411         code__2 = if isFixed register2
2412                   then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2)
2413                   else code
2414     in
2415     return code__2
2416
2417 #endif /* alpha_TARGET_ARCH */
2418
2419 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2420
2421 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2422
2423 -- integer assignment to memory
2424 assignMem_IntCode pk addr src = do
2425     Amode addr code_addr <- getAmode addr
2426     (code_src, op_src)   <- get_op_RI src
2427     let
2428         code = code_src `appOL`
2429                code_addr `snocOL`
2430                   MOV pk op_src (OpAddr addr)
2431         -- NOTE: op_src is stable, so it will still be valid
2432         -- after code_addr.  This may involve the introduction
2433         -- of an extra MOV to a temporary register, but we hope
2434         -- the register allocator will get rid of it.
2435     --
2436     return code
2437   where
2438     get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand)   -- code, operator
2439     get_op_RI (CmmLit lit) | not (is64BitLit lit)
2440       = return (nilOL, OpImm (litToImm lit))
2441     get_op_RI op
2442       = do (reg,code) <- getNonClobberedReg op
2443            return (code, OpReg reg)
2444
2445
2446 -- Assign; dst is a reg, rhs is mem
2447 assignReg_IntCode pk reg (CmmLoad src _) = do
2448   load_code <- intLoadCode (MOV pk) src
2449   return (load_code (getRegisterReg reg))
2450
2451 -- dst is a reg, but src could be anything
2452 assignReg_IntCode pk reg src = do
2453   code <- getAnyReg src
2454   return (code (getRegisterReg reg))
2455
2456 #endif /* i386_TARGET_ARCH */
2457
2458 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2459
2460 #if sparc_TARGET_ARCH
2461
2462 assignMem_IntCode pk addr src
2463   = getNewRegNat IntRep                     `thenNat` \ tmp ->
2464     getAmode addr                           `thenNat` \ amode ->
2465     getRegister src                         `thenNat` \ register ->
2466     let
2467         code1   = amodeCode amode
2468         dst__2  = amodeAddr amode
2469         code2   = registerCode register tmp
2470         src__2  = registerName register tmp
2471         sz      = primRepToSize pk
2472         code__2 = code1 `appOL` code2 `snocOL` ST sz src__2 dst__2
2473     in
2474     return code__2
2475
2476 assignReg_IntCode pk reg src
2477   = getRegister src                         `thenNat` \ register2 ->
2478     getRegisterReg reg                      `thenNat` \ register1 ->
2479     getNewRegNat IntRep                     `thenNat` \ tmp ->
2480     let
2481         dst__2  = registerName register1 tmp
2482         code    = registerCode register2 dst__2
2483         src__2  = registerName register2 dst__2
2484         code__2 = if isFixed register2
2485                   then code `snocOL` OR False g0 (RIReg src__2) dst__2
2486                   else code
2487     in
2488     return code__2
2489
2490 #endif /* sparc_TARGET_ARCH */
2491
2492 #if powerpc_TARGET_ARCH
2493
2494 assignMem_IntCode pk addr src = do
2495     (srcReg, code) <- getSomeReg src
2496     Amode dstAddr addr_code <- getAmode addr
2497     return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2498
2499 -- dst is a reg, but src could be anything
2500 assignReg_IntCode pk reg src
2501     = do
2502         r <- getRegister src
2503         return $ case r of
2504             Any _ code         -> code dst
2505             Fixed _ freg fcode -> fcode `snocOL` MR dst freg
2506     where
2507         dst = getRegisterReg reg
2508
2509 #endif /* powerpc_TARGET_ARCH */
2510
2511
2512 -- -----------------------------------------------------------------------------
2513 -- Floating-point assignments
2514
2515 #if alpha_TARGET_ARCH
2516
2517 assignFltCode pk (CmmLoad dst _) src
2518   = getNewRegNat pk                 `thenNat` \ tmp ->
2519     getAmode dst                    `thenNat` \ amode ->
2520     getRegister src                         `thenNat` \ register ->
2521     let
2522         code1   = amodeCode amode []
2523         dst__2  = amodeAddr amode
2524         code2   = registerCode register tmp []
2525         src__2  = registerName register tmp
2526         sz      = primRepToSize pk
2527         code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2528     in
2529     return code__2
2530
2531 assignFltCode pk dst src
2532   = getRegister dst                         `thenNat` \ register1 ->
2533     getRegister src                         `thenNat` \ register2 ->
2534     let
2535         dst__2  = registerName register1 zeroh
2536         code    = registerCode register2 dst__2
2537         src__2  = registerName register2 dst__2
2538         code__2 = if isFixed register2
2539                   then code . mkSeqInstr (FMOV src__2 dst__2)
2540                   else code
2541     in
2542     return code__2
2543
2544 #endif /* alpha_TARGET_ARCH */
2545
2546 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2547
2548 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2549
2550 -- Floating point assignment to memory
2551 assignMem_FltCode pk addr src = do
2552   (src_reg, src_code) <- getNonClobberedReg src
2553   Amode addr addr_code <- getAmode addr
2554   let
2555         code = src_code `appOL`
2556                addr_code `snocOL`
2557                 IF_ARCH_i386(GST pk src_reg addr,
2558                              MOV pk (OpReg src_reg) (OpAddr addr))
2559   return code
2560
2561 -- Floating point assignment to a register/temporary
2562 assignReg_FltCode pk reg src = do
2563   src_code <- getAnyReg src
2564   return (src_code (getRegisterReg reg))
2565
2566 #endif /* i386_TARGET_ARCH */
2567
2568 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2569
2570 #if sparc_TARGET_ARCH
2571
2572 -- Floating point assignment to memory
2573 assignMem_FltCode pk addr src
2574   = getNewRegNat pk                 `thenNat` \ tmp1 ->
2575     getAmode addr                   `thenNat` \ amode ->
2576     getRegister src                 `thenNat` \ register ->
2577     let
2578         sz      = primRepToSize pk
2579         dst__2  = amodeAddr amode
2580
2581         code1   = amodeCode amode
2582         code2   = registerCode register tmp1
2583
2584         src__2  = registerName register tmp1
2585         pk__2   = registerRep register
2586         sz__2   = primRepToSize pk__2
2587
2588         code__2 = code1 `appOL` code2 `appOL`
2589             if   pk == pk__2
2590             then unitOL (ST sz src__2 dst__2)
2591             else toOL [FxTOy sz__2 sz src__2 tmp1, ST sz tmp1 dst__2]
2592     in
2593     return code__2
2594
2595 -- Floating point assignment to a register/temporary
2596 -- Why is this so bizarrely ugly?
2597 assignReg_FltCode pk reg src
2598   = getRegisterReg reg                      `thenNat` \ register1 ->
2599     getRegister src                         `thenNat` \ register2 ->
2600     let
2601         pk__2   = registerRep register2
2602         sz__2   = primRepToSize pk__2
2603     in
2604     getNewRegNat pk__2                      `thenNat` \ tmp ->
2605     let
2606         sz      = primRepToSize pk
2607         dst__2  = registerName register1 g0    -- must be Fixed
2608         reg__2  = if pk /= pk__2 then tmp else dst__2
2609         code    = registerCode register2 reg__2
2610         src__2  = registerName register2 reg__2
2611         code__2 =
2612                 if pk /= pk__2 then
2613                      code `snocOL` FxTOy sz__2 sz src__2 dst__2
2614                 else if isFixed register2 then
2615                      code `snocOL` FMOV sz src__2 dst__2
2616                 else
2617                      code
2618     in
2619     return code__2
2620
2621 #endif /* sparc_TARGET_ARCH */
2622
2623 #if powerpc_TARGET_ARCH
2624
2625 -- Easy, isn't it?
2626 assignMem_FltCode = assignMem_IntCode
2627 assignReg_FltCode = assignReg_IntCode
2628
2629 #endif /* powerpc_TARGET_ARCH */
2630
2631
2632 -- -----------------------------------------------------------------------------
2633 -- Generating an non-local jump
2634
2635 -- (If applicable) Do not fill the delay slots here; you will confuse the
2636 -- register allocator.
2637
2638 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
2639
2640 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2641
2642 #if alpha_TARGET_ARCH
2643
2644 genJump (CmmLabel lbl)
2645   | isAsmTemp lbl = returnInstr (BR target)
2646   | otherwise     = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0]
2647   where
2648     target = ImmCLbl lbl
2649
2650 genJump tree
2651   = getRegister tree                `thenNat` \ register ->
2652     getNewRegNat PtrRep             `thenNat` \ tmp ->
2653     let
2654         dst    = registerName register pv
2655         code   = registerCode register pv
2656         target = registerName register pv
2657     in
2658     if isFixed register then
2659         returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0]
2660     else
2661     return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0))
2662
2663 #endif /* alpha_TARGET_ARCH */
2664
2665 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2666
2667 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2668
2669 genJump (CmmLoad mem pk) = do
2670   Amode target code <- getAmode mem
2671   return (code `snocOL` JMP (OpAddr target))
2672
2673 genJump (CmmLit lit) = do
2674   return (unitOL (JMP (OpImm (litToImm lit))))
2675
2676 genJump expr = do
2677   (reg,code) <- getSomeReg expr
2678   return (code `snocOL` JMP (OpReg reg))
2679
2680 #endif /* i386_TARGET_ARCH */
2681
2682 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2683
2684 #if sparc_TARGET_ARCH
2685
2686 genJump (CmmLabel lbl)
2687   = return (toOL [CALL (Left target) 0 True, NOP])
2688   where
2689     target = ImmCLbl lbl
2690
2691 genJump tree
2692   = getRegister tree                        `thenNat` \ register ->
2693     getNewRegNat PtrRep             `thenNat` \ tmp ->
2694     let
2695         code   = registerCode register tmp
2696         target = registerName register tmp
2697     in
2698     return (code `snocOL` JMP dsts (AddrRegReg target g0) `snocOL` NOP)
2699
2700 #endif /* sparc_TARGET_ARCH */
2701
2702 #if powerpc_TARGET_ARCH
2703 genJump (CmmLit (CmmLabel lbl))
2704   = return (unitOL $ JMP lbl)
2705
2706 genJump tree
2707   = do
2708         (target,code) <- getSomeReg tree
2709         return (code `snocOL` MTCTR target `snocOL` BCTR [])
2710 #endif /* powerpc_TARGET_ARCH */
2711
2712
2713 -- -----------------------------------------------------------------------------
2714 --  Unconditional branches
2715
2716 genBranch :: BlockId -> NatM InstrBlock
2717
2718 #if alpha_TARGET_ARCH
2719 genBranch id = return (unitOL (BR id))
2720 #endif
2721
2722 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2723 genBranch id = return (unitOL (JXX ALWAYS id))
2724 #endif
2725
2726 #if sparc_TARGET_ARCH
2727 genBranch id = return (toOL [BI ALWAYS False id, NOP])
2728 #endif
2729
2730 #if powerpc_TARGET_ARCH
2731 genBranch id = return (unitOL (BCC ALWAYS id))
2732 #endif
2733
2734
2735 -- -----------------------------------------------------------------------------
2736 --  Conditional jumps
2737
2738 {-
2739 Conditional jumps are always to local labels, so we can use branch
2740 instructions.  We peek at the arguments to decide what kind of
2741 comparison to do.
2742
2743 ALPHA: For comparisons with 0, we're laughing, because we can just do
2744 the desired conditional branch.
2745
2746 I386: First, we have to ensure that the condition
2747 codes are set according to the supplied comparison operation.
2748
2749 SPARC: First, we have to ensure that the condition codes are set
2750 according to the supplied comparison operation.  We generate slightly
2751 different code for floating point comparisons, because a floating
2752 point operation cannot directly precede a @BF@.  We assume the worst
2753 and fill that slot with a @NOP@.
2754
2755 SPARC: Do not fill the delay slots here; you will confuse the register
2756 allocator.
2757 -}
2758
2759
2760 genCondJump
2761     :: BlockId      -- the branch target
2762     -> CmmExpr      -- the condition on which to branch
2763     -> NatM InstrBlock
2764
2765 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2766
2767 #if alpha_TARGET_ARCH
2768
2769 genCondJump id (StPrim op [x, StInt 0])
2770   = getRegister x                           `thenNat` \ register ->
2771     getNewRegNat (registerRep register)
2772                                     `thenNat` \ tmp ->
2773     let
2774         code   = registerCode register tmp
2775         value  = registerName register tmp
2776         pk     = registerRep register
2777         target = ImmCLbl lbl
2778     in
2779     returnSeq code [BI (cmpOp op) value target]
2780   where
2781     cmpOp CharGtOp = GTT
2782     cmpOp CharGeOp = GE
2783     cmpOp CharEqOp = EQQ
2784     cmpOp CharNeOp = NE
2785     cmpOp CharLtOp = LTT
2786     cmpOp CharLeOp = LE
2787     cmpOp IntGtOp = GTT
2788     cmpOp IntGeOp = GE
2789     cmpOp IntEqOp = EQQ
2790     cmpOp IntNeOp = NE
2791     cmpOp IntLtOp = LTT
2792     cmpOp IntLeOp = LE
2793     cmpOp WordGtOp = NE
2794     cmpOp WordGeOp = ALWAYS
2795     cmpOp WordEqOp = EQQ
2796     cmpOp WordNeOp = NE
2797     cmpOp WordLtOp = NEVER
2798     cmpOp WordLeOp = EQQ
2799     cmpOp AddrGtOp = NE
2800     cmpOp AddrGeOp = ALWAYS
2801     cmpOp AddrEqOp = EQQ
2802     cmpOp AddrNeOp = NE
2803     cmpOp AddrLtOp = NEVER
2804     cmpOp AddrLeOp = EQQ
2805
2806 genCondJump lbl (StPrim op [x, StDouble 0.0])
2807   = getRegister x                           `thenNat` \ register ->
2808     getNewRegNat (registerRep register)
2809                                     `thenNat` \ tmp ->
2810     let
2811         code   = registerCode register tmp
2812         value  = registerName register tmp
2813         pk     = registerRep register
2814         target = ImmCLbl lbl
2815     in
2816     return (code . mkSeqInstr (BF (cmpOp op) value target))
2817   where
2818     cmpOp FloatGtOp = GTT
2819     cmpOp FloatGeOp = GE
2820     cmpOp FloatEqOp = EQQ
2821     cmpOp FloatNeOp = NE
2822     cmpOp FloatLtOp = LTT
2823     cmpOp FloatLeOp = LE
2824     cmpOp DoubleGtOp = GTT
2825     cmpOp DoubleGeOp = GE
2826     cmpOp DoubleEqOp = EQQ
2827     cmpOp DoubleNeOp = NE
2828     cmpOp DoubleLtOp = LTT
2829     cmpOp DoubleLeOp = LE
2830
2831 genCondJump lbl (StPrim op [x, y])
2832   | fltCmpOp op
2833   = trivialFCode pr instr x y       `thenNat` \ register ->
2834     getNewRegNat F64                `thenNat` \ tmp ->
2835     let
2836         code   = registerCode register tmp
2837         result = registerName register tmp
2838         target = ImmCLbl lbl
2839     in
2840     return (code . mkSeqInstr (BF cond result target))
2841   where
2842     pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
2843
2844     fltCmpOp op = case op of
2845         FloatGtOp -> True
2846         FloatGeOp -> True
2847         FloatEqOp -> True
2848         FloatNeOp -> True
2849         FloatLtOp -> True
2850         FloatLeOp -> True
2851         DoubleGtOp -> True
2852         DoubleGeOp -> True
2853         DoubleEqOp -> True
2854         DoubleNeOp -> True
2855         DoubleLtOp -> True
2856         DoubleLeOp -> True
2857         _ -> False
2858     (instr, cond) = case op of
2859         FloatGtOp -> (FCMP TF LE, EQQ)
2860         FloatGeOp -> (FCMP TF LTT, EQQ)
2861         FloatEqOp -> (FCMP TF EQQ, NE)
2862         FloatNeOp -> (FCMP TF EQQ, EQQ)
2863         FloatLtOp -> (FCMP TF LTT, NE)
2864         FloatLeOp -> (FCMP TF LE, NE)
2865         DoubleGtOp -> (FCMP TF LE, EQQ)
2866         DoubleGeOp -> (FCMP TF LTT, EQQ)
2867         DoubleEqOp -> (FCMP TF EQQ, NE)
2868         DoubleNeOp -> (FCMP TF EQQ, EQQ)
2869         DoubleLtOp -> (FCMP TF LTT, NE)
2870         DoubleLeOp -> (FCMP TF LE, NE)
2871
2872 genCondJump lbl (StPrim op [x, y])
2873   = trivialCode instr x y           `thenNat` \ register ->
2874     getNewRegNat IntRep             `thenNat` \ tmp ->
2875     let
2876         code   = registerCode register tmp
2877         result = registerName register tmp
2878         target = ImmCLbl lbl
2879     in
2880     return (code . mkSeqInstr (BI cond result target))
2881   where
2882     (instr, cond) = case op of
2883         CharGtOp -> (CMP LE, EQQ)
2884         CharGeOp -> (CMP LTT, EQQ)
2885         CharEqOp -> (CMP EQQ, NE)
2886         CharNeOp -> (CMP EQQ, EQQ)
2887         CharLtOp -> (CMP LTT, NE)
2888         CharLeOp -> (CMP LE, NE)
2889         IntGtOp -> (CMP LE, EQQ)
2890         IntGeOp -> (CMP LTT, EQQ)
2891         IntEqOp -> (CMP EQQ, NE)
2892         IntNeOp -> (CMP EQQ, EQQ)
2893         IntLtOp -> (CMP LTT, NE)
2894         IntLeOp -> (CMP LE, NE)
2895         WordGtOp -> (CMP ULE, EQQ)
2896         WordGeOp -> (CMP ULT, EQQ)
2897         WordEqOp -> (CMP EQQ, NE)
2898         WordNeOp -> (CMP EQQ, EQQ)
2899         WordLtOp -> (CMP ULT, NE)
2900         WordLeOp -> (CMP ULE, NE)
2901         AddrGtOp -> (CMP ULE, EQQ)
2902         AddrGeOp -> (CMP ULT, EQQ)
2903         AddrEqOp -> (CMP EQQ, NE)
2904         AddrNeOp -> (CMP EQQ, EQQ)
2905         AddrLtOp -> (CMP ULT, NE)
2906         AddrLeOp -> (CMP ULE, NE)
2907
2908 #endif /* alpha_TARGET_ARCH */
2909
2910 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2911
2912 #if i386_TARGET_ARCH
2913
2914 genCondJump id bool = do
2915   CondCode _ cond code <- getCondCode bool
2916   return (code `snocOL` JXX cond id)
2917
2918 #endif
2919
2920 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2921
2922 #if x86_64_TARGET_ARCH
2923
2924 genCondJump id bool = do
2925   CondCode is_float cond cond_code <- getCondCode bool
2926   if not is_float
2927     then
2928         return (cond_code `snocOL` JXX cond id)
2929     else do
2930         lbl <- getBlockIdNat
2931
2932         -- see comment with condFltReg
2933         let code = case cond of
2934                         NE  -> or_unordered
2935                         GU  -> plain_test
2936                         GEU -> plain_test
2937                         _   -> and_ordered
2938
2939             plain_test = unitOL (
2940                   JXX cond id
2941                 )
2942             or_unordered = toOL [
2943                   JXX cond id,
2944                   JXX PARITY id
2945                 ]
2946             and_ordered = toOL [
2947                   JXX PARITY lbl,
2948                   JXX cond id,
2949                   JXX ALWAYS lbl,
2950                   NEWBLOCK lbl
2951                 ]
2952         return (cond_code `appOL` code)
2953
2954 #endif
2955
2956 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2957
2958 #if sparc_TARGET_ARCH
2959
2960 genCondJump id bool = do
2961   CondCode is_float cond code <- getCondCode bool
2962   return (
2963        code `appOL`
2964        toOL (
2965          if   is_float
2966          then [NOP, BF cond False id, NOP]
2967          else [BI cond False id, NOP]
2968        )
2969     )
2970
2971 #endif /* sparc_TARGET_ARCH */
2972
2973
2974 #if powerpc_TARGET_ARCH
2975
2976 genCondJump id bool = do
2977   CondCode is_float cond code <- getCondCode bool
2978   return (code `snocOL` BCC cond id)
2979
2980 #endif /* powerpc_TARGET_ARCH */
2981
2982
2983 -- -----------------------------------------------------------------------------
2984 --  Generating C calls
2985
2986 -- Now the biggest nightmare---calls.  Most of the nastiness is buried in
2987 -- @get_arg@, which moves the arguments to the correct registers/stack
2988 -- locations.  Apart from that, the code is easy.
2989 --
2990 -- (If applicable) Do not fill the delay slots here; you will confuse the
2991 -- register allocator.
2992
2993 genCCall
2994     :: CmmCallTarget            -- function to call
2995     -> [(CmmReg,MachHint)]      -- where to put the result
2996     -> [(CmmExpr,MachHint)]     -- arguments (of mixed type)
2997     -> Maybe [GlobalReg]        -- volatile regs to save
2998     -> NatM InstrBlock
2999
3000 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3001
3002 #if alpha_TARGET_ARCH
3003
3004 ccallResultRegs =
3005
3006 genCCall fn cconv result_regs args
3007   = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args
3008                           `thenNat` \ ((unused,_), argCode) ->
3009     let
3010         nRegs = length allArgRegs - length unused
3011         code = asmSeqThen (map ($ []) argCode)
3012     in
3013         returnSeq code [
3014             LDA pv (AddrImm (ImmLab (ptext fn))),
3015             JSR ra (AddrReg pv) nRegs,
3016             LDGP gp (AddrReg ra)]
3017   where
3018     ------------------------
3019     {-  Try to get a value into a specific register (or registers) for
3020         a call.  The first 6 arguments go into the appropriate
3021         argument register (separate registers for integer and floating
3022         point arguments, but used in lock-step), and the remaining
3023         arguments are dumped to the stack, beginning at 0(sp).  Our
3024         first argument is a pair of the list of remaining argument
3025         registers to be assigned for this call and the next stack
3026         offset to use for overflowing arguments.  This way,
3027         @get_Arg@ can be applied to all of a call's arguments using
3028         @mapAccumLNat@.
3029     -}
3030     get_arg
3031         :: ([(Reg,Reg)], Int)   -- Argument registers and stack offset (accumulator)
3032         -> StixTree             -- Current argument
3033         -> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code
3034
3035     -- We have to use up all of our argument registers first...
3036
3037     get_arg ((iDst,fDst):dsts, offset) arg
3038       = getRegister arg                     `thenNat` \ register ->
3039         let
3040             reg  = if isFloatingRep pk then fDst else iDst
3041             code = registerCode register reg
3042             src  = registerName register reg
3043             pk   = registerRep register
3044         in
3045         return (
3046             if isFloatingRep pk then
3047                 ((dsts, offset), if isFixed register then
3048                     code . mkSeqInstr (FMOV src fDst)
3049                     else code)
3050             else
3051                 ((dsts, offset), if isFixed register then
3052                     code . mkSeqInstr (OR src (RIReg src) iDst)
3053                     else code))
3054
3055     -- Once we have run out of argument registers, we move to the
3056     -- stack...
3057
3058     get_arg ([], offset) arg
3059       = getRegister arg                 `thenNat` \ register ->
3060         getNewRegNat (registerRep register)
3061                                         `thenNat` \ tmp ->
3062         let
3063             code = registerCode register tmp
3064             src  = registerName register tmp
3065             pk   = registerRep register
3066             sz   = primRepToSize pk
3067         in
3068         return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset)))
3069
3070 #endif /* alpha_TARGET_ARCH */
3071
3072 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3073
3074 #if i386_TARGET_ARCH
3075
3076 -- we only cope with a single result for foreign calls
3077 genCCall (CmmPrim op) [(r,_)] args vols = do
3078   case op of
3079         MO_F32_Sqrt -> actuallyInlineFloatOp F32  (GSQRT F32) args
3080         MO_F64_Sqrt -> actuallyInlineFloatOp F64 (GSQRT F64) args
3081
3082         MO_F32_Sin  -> actuallyInlineFloatOp F32  (GSIN F32) args
3083         MO_F64_Sin  -> actuallyInlineFloatOp F64 (GSIN F64) args
3084
3085         MO_F32_Cos  -> actuallyInlineFloatOp F32  (GCOS F32) args
3086         MO_F64_Cos  -> actuallyInlineFloatOp F64 (GCOS F64) args
3087
3088         MO_F32_Tan  -> actuallyInlineFloatOp F32  (GTAN F32) args
3089         MO_F64_Tan  -> actuallyInlineFloatOp F64 (GTAN F64) args
3090
3091         other_op    -> outOfLineFloatOp op r args vols
3092  where
3093   actuallyInlineFloatOp rep instr [(x,_)]
3094         = do res <- trivialUFCode rep instr x
3095              any <- anyReg res
3096              return (any (getRegisterReg r))
3097
3098 genCCall target dest_regs args vols = do
3099     sizes_n_codes <- mapM push_arg (reverse args)
3100     delta <- getDeltaNat
3101     let
3102         (sizes, push_codes) = unzip sizes_n_codes
3103         tot_arg_size        = sum sizes
3104     -- in
3105     -- deal with static vs dynamic call targets
3106     (callinsns,cconv) <-
3107       case target of
3108         -- CmmPrim -> ...
3109         CmmForeignCall (CmmLit (CmmLabel lbl)) conv
3110            -> -- ToDo: stdcall arg sizes
3111               return (unitOL (CALL (Left fn_imm) []), conv)
3112            where fn_imm = ImmCLbl lbl
3113         CmmForeignCall expr conv
3114            -> do (dyn_c, dyn_r, dyn_rep) <- get_op expr
3115                  ASSERT(dyn_rep == I32)
3116                   return (dyn_c `snocOL` CALL (Right dyn_r) [], conv)
3117
3118     let push_code = concatOL push_codes
3119         call = callinsns `appOL`
3120                toOL (
3121                         -- Deallocate parameters after call for ccall;
3122                         -- but not for stdcall (callee does it)
3123                   (if cconv == StdCallConv || tot_arg_size==0 then [] else
3124                    [ADD I32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
3125                   ++
3126                   [DELTA (delta + tot_arg_size)]
3127                )
3128     -- in
3129     setDeltaNat (delta + tot_arg_size)
3130
3131     let
3132         -- assign the results, if necessary
3133         assign_code []     = nilOL
3134         assign_code [(dest,_hint)] =
3135           case rep of
3136                 I64 -> toOL [MOV I32 (OpReg eax) (OpReg r_dest),
3137                              MOV I32 (OpReg edx) (OpReg r_dest_hi)]
3138                 F32 -> unitOL (GMOV fake0 r_dest)
3139                 F64 -> unitOL (GMOV fake0 r_dest)
3140                 rep -> unitOL (MOV rep (OpReg eax) (OpReg r_dest))
3141           where
3142                 r_dest_hi = getHiVRegFromLo r_dest
3143                 rep = cmmRegRep dest
3144                 r_dest = getRegisterReg dest
3145         assign_code many = panic "genCCall.assign_code many"
3146
3147     return (push_code `appOL`
3148             call `appOL`
3149             assign_code dest_regs)
3150
3151   where
3152     arg_size F64 = 8
3153     arg_size F32 = 4
3154     arg_size _   = 4
3155
3156     push_arg :: (CmmExpr,MachHint){-current argument-}
3157                     -> NatM (Int, InstrBlock)  -- argsz, code
3158
3159     push_arg (arg,_hint) -- we don't need the hints on x86
3160       | arg_rep == I64 = do
3161         ChildCode64 code r_lo <- iselExpr64 arg
3162         delta <- getDeltaNat
3163         setDeltaNat (delta - 8)
3164         let
3165             r_hi = getHiVRegFromLo r_lo
3166         -- in
3167         return (8,     code `appOL`
3168                        toOL [PUSH I32 (OpReg r_hi), DELTA (delta - 4),
3169                              PUSH I32 (OpReg r_lo), DELTA (delta - 8),
3170                              DELTA (delta-8)]
3171             )
3172
3173       | otherwise = do
3174         (code, reg, sz) <- get_op arg
3175         delta <- getDeltaNat
3176         let size = arg_size sz
3177         setDeltaNat (delta-size)
3178         if (case sz of F64 -> True; F32 -> True; _ -> False)
3179            then return (size,
3180                         code `appOL`
3181                         toOL [SUB I32 (OpImm (ImmInt size)) (OpReg esp),
3182                               DELTA (delta-size),
3183                               GST sz reg (AddrBaseIndex (EABaseReg esp)
3184                                                         EAIndexNone
3185                                                         (ImmInt 0))]
3186                        )
3187            else return (size,
3188                         code `snocOL`
3189                         PUSH I32 (OpReg reg) `snocOL`
3190                         DELTA (delta-size)
3191                        )
3192       where
3193          arg_rep = cmmExprRep arg
3194
3195     ------------
3196     get_op :: CmmExpr -> NatM (InstrBlock, Reg, MachRep) -- code, reg, size
3197     get_op op = do
3198         (reg,code) <- getSomeReg op
3199         return (code, reg, cmmExprRep op)
3200
3201 #endif /* i386_TARGET_ARCH */
3202
3203 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3204
3205 outOfLineFloatOp :: CallishMachOp -> CmmReg -> [(CmmExpr,MachHint)]
3206   -> Maybe [GlobalReg] -> NatM InstrBlock
3207 outOfLineFloatOp mop res args vols
3208   | cmmRegRep res == F64
3209   = stmtToInstrs (CmmCall target [(res,FloatHint)] args vols)
3210
3211   | otherwise
3212   = do uq <- getUniqueNat
3213        let
3214          tmp = CmmLocal (LocalReg uq F64)
3215        -- in
3216        code1 <- stmtToInstrs (CmmCall target [(tmp,FloatHint)] (map promote args) vols)
3217        code2 <- stmtToInstrs (CmmAssign res (demote (CmmReg tmp)))
3218        return (code1 `appOL` code2)
3219   where
3220         promote (x,hint) = (CmmMachOp (MO_S_Conv F32 F64) [x], hint)
3221         demote  x = CmmMachOp (MO_S_Conv F64 F32) [x]
3222
3223         target = CmmForeignCall (CmmLit lbl) CCallConv
3224         lbl = CmmLabel (mkForeignLabel fn Nothing False)
3225
3226         fn = case mop of
3227               MO_F32_Sqrt  -> FSLIT("sqrt")
3228               MO_F32_Sin   -> FSLIT("sin")
3229               MO_F32_Cos   -> FSLIT("cos")
3230               MO_F32_Tan   -> FSLIT("tan")
3231               MO_F32_Exp   -> FSLIT("exp")
3232               MO_F32_Log   -> FSLIT("log")
3233
3234               MO_F32_Asin  -> FSLIT("asin")
3235               MO_F32_Acos  -> FSLIT("acos")
3236               MO_F32_Atan  -> FSLIT("atan")
3237
3238               MO_F32_Sinh  -> FSLIT("sinh")
3239               MO_F32_Cosh  -> FSLIT("cosh")
3240               MO_F32_Tanh  -> FSLIT("tanh")
3241               MO_F32_Pwr   -> FSLIT("pow")
3242
3243               MO_F64_Sqrt  -> FSLIT("sqrt")
3244               MO_F64_Sin   -> FSLIT("sin")
3245               MO_F64_Cos   -> FSLIT("cos")
3246               MO_F64_Tan   -> FSLIT("tan")
3247               MO_F64_Exp   -> FSLIT("exp")
3248               MO_F64_Log   -> FSLIT("log")
3249
3250               MO_F64_Asin  -> FSLIT("asin")
3251               MO_F64_Acos  -> FSLIT("acos")
3252               MO_F64_Atan  -> FSLIT("atan")
3253
3254               MO_F64_Sinh  -> FSLIT("sinh")
3255               MO_F64_Cosh  -> FSLIT("cosh")
3256               MO_F64_Tanh  -> FSLIT("tanh")
3257               MO_F64_Pwr   -> FSLIT("pow")
3258
3259               other -> pprPanic "outOfLineFloatOp" (pprCallishMachOp mop)
3260
3261 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
3262
3263 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3264
3265 #if x86_64_TARGET_ARCH
3266
3267 genCCall (CmmPrim op) [(r,_)] args vols =
3268   outOfLineFloatOp op r args vols
3269
3270 genCCall target dest_regs args vols = do
3271
3272         -- load up the register arguments
3273     (stack_args, aregs, fregs, load_args_code)
3274          <- load_args args allArgRegs allFPArgRegs nilOL
3275
3276     let
3277         fp_regs_used  = reverse (drop (length fregs) (reverse allFPArgRegs))
3278         int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
3279         arg_regs = int_regs_used ++ fp_regs_used
3280                 -- for annotating the call instruction with
3281
3282         sse_regs = length fp_regs_used
3283
3284         tot_arg_size = arg_size * length stack_args
3285
3286         -- On entry to the called function, %rsp should be aligned
3287         -- on a 16-byte boundary +8 (i.e. the first stack arg after
3288         -- the return address is 16-byte aligned).  In STG land
3289         -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
3290         -- need to make sure we push a multiple of 16-bytes of args,
3291         -- plus the return address, to get the correct alignment.
3292         -- Urg, this is hard.  We need to feed the delta back into
3293         -- the arg pushing code.
3294     (real_size, adjust_rsp) <-
3295         if tot_arg_size `rem` 16 == 0
3296             then return (tot_arg_size, nilOL)
3297             else do -- we need to adjust...
3298                 delta <- getDeltaNat
3299                 setDeltaNat (delta-8)
3300                 return (tot_arg_size+8, toOL [
3301                                 SUB I64 (OpImm (ImmInt 8)) (OpReg rsp),
3302                                 DELTA (delta-8)
3303                         ])
3304
3305         -- push the stack args, right to left
3306     push_code <- push_args (reverse stack_args) nilOL
3307     delta <- getDeltaNat
3308
3309     -- deal with static vs dynamic call targets
3310     (callinsns,cconv) <-
3311       case target of
3312         -- CmmPrim -> ...
3313         CmmForeignCall (CmmLit (CmmLabel lbl)) conv
3314            -> -- ToDo: stdcall arg sizes
3315               return (unitOL (CALL (Left fn_imm) arg_regs), conv)
3316            where fn_imm = ImmCLbl lbl
3317         CmmForeignCall expr conv
3318            -> do (dyn_r, dyn_c) <- getSomeReg expr
3319                  return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
3320
3321     let
3322         -- The x86_64 ABI requires us to set %al to the number of SSE
3323         -- registers that contain arguments, if the called routine
3324         -- is a varargs function.  We don't know whether it's a
3325         -- varargs function or not, so we have to assume it is.
3326         --
3327         -- It's not safe to omit this assignment, even if the number
3328         -- of SSE regs in use is zero.  If %al is larger than 8
3329         -- on entry to a varargs function, seg faults ensue.
3330         assign_eax n = unitOL (MOV I32 (OpImm (ImmInt n)) (OpReg eax))
3331
3332     let call = callinsns `appOL`
3333                toOL (
3334                         -- Deallocate parameters after call for ccall;
3335                         -- but not for stdcall (callee does it)
3336                   (if cconv == StdCallConv || real_size==0 then [] else
3337                    [ADD wordRep (OpImm (ImmInt real_size)) (OpReg esp)])
3338                   ++
3339                   [DELTA (delta + real_size)]
3340                )
3341     -- in
3342     setDeltaNat (delta + real_size)
3343
3344     let
3345         -- assign the results, if necessary
3346         assign_code []     = nilOL
3347         assign_code [(dest,_hint)] =
3348           case rep of
3349                 F32 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3350                 F64 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3351                 rep -> unitOL (MOV rep (OpReg rax) (OpReg r_dest))
3352           where
3353                 rep = cmmRegRep dest
3354                 r_dest = getRegisterReg dest
3355         assign_code many = panic "genCCall.assign_code many"
3356
3357     return (load_args_code      `appOL`
3358             adjust_rsp          `appOL`
3359             push_code           `appOL`
3360             assign_eax sse_regs `appOL`
3361             call                `appOL`
3362             assign_code dest_regs)
3363
3364   where
3365     arg_size = 8 -- always, at the mo
3366
3367     load_args :: [(CmmExpr,MachHint)]
3368               -> [Reg]                  -- int regs avail for args
3369               -> [Reg]                  -- FP regs avail for args
3370               -> InstrBlock
3371               -> NatM ([(CmmExpr,MachHint)],[Reg],[Reg],InstrBlock)
3372     load_args args [] [] code     =  return (args, [], [], code)
3373         -- no more regs to use
3374     load_args [] aregs fregs code =  return ([], aregs, fregs, code)
3375         -- no more args to push
3376     load_args ((arg,hint) : rest) aregs fregs code
3377         | isFloatingRep arg_rep =
3378         case fregs of
3379           [] -> push_this_arg
3380           (r:rs) -> do
3381              arg_code <- getAnyReg arg
3382              load_args rest aregs rs (code `appOL` arg_code r)
3383         | otherwise =
3384         case aregs of
3385           [] -> push_this_arg
3386           (r:rs) -> do
3387              arg_code <- getAnyReg arg
3388              load_args rest rs fregs (code `appOL` arg_code r)
3389         where
3390           arg_rep = cmmExprRep arg
3391
3392           push_this_arg = do
3393             (args',ars,frs,code') <- load_args rest aregs fregs code
3394             return ((arg,hint):args', ars, frs, code')
3395
3396     push_args [] code = return code
3397     push_args ((arg,hint):rest) code
3398        | isFloatingRep arg_rep = do
3399          (arg_reg, arg_code) <- getSomeReg arg
3400          delta <- getDeltaNat
3401          setDeltaNat (delta-arg_size)
3402          let code' = code `appOL` toOL [
3403                         MOV arg_rep (OpReg arg_reg) (OpAddr  (spRel 0)),
3404                         SUB wordRep (OpImm (ImmInt arg_size)) (OpReg rsp) ,
3405                         DELTA (delta-arg_size)]
3406          push_args rest code'
3407
3408        | otherwise = do
3409        -- we only ever generate word-sized function arguments.  Promotion
3410        -- has already happened: our Int8# type is kept sign-extended
3411        -- in an Int#, for example.
3412          ASSERT(arg_rep == I64) return ()
3413          (arg_op, arg_code) <- getOperand arg
3414          delta <- getDeltaNat
3415          setDeltaNat (delta-arg_size)
3416          let code' = code `appOL` toOL [PUSH I64 arg_op,
3417                                         DELTA (delta-arg_size)]
3418          push_args rest code'
3419         where
3420           arg_rep = cmmExprRep arg
3421 #endif
3422
3423 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3424
3425 #if sparc_TARGET_ARCH
3426 {-
3427    The SPARC calling convention is an absolute
3428    nightmare.  The first 6x32 bits of arguments are mapped into
3429    %o0 through %o5, and the remaining arguments are dumped to the
3430    stack, beginning at [%sp+92].  (Note that %o6 == %sp.)
3431
3432    If we have to put args on the stack, move %o6==%sp down by
3433    the number of words to go on the stack, to ensure there's enough space.
3434
3435    According to Fraser and Hanson's lcc book, page 478, fig 17.2,
3436    16 words above the stack pointer is a word for the address of
3437    a structure return value.  I use this as a temporary location
3438    for moving values from float to int regs.  Certainly it isn't
3439    safe to put anything in the 16 words starting at %sp, since
3440    this area can get trashed at any time due to window overflows
3441    caused by signal handlers.
3442
3443    A final complication (if the above isn't enough) is that
3444    we can't blithely calculate the arguments one by one into
3445    %o0 .. %o5.  Consider the following nested calls:
3446
3447        fff a (fff b c)
3448
3449    Naive code moves a into %o0, and (fff b c) into %o1.  Unfortunately
3450    the inner call will itself use %o0, which trashes the value put there
3451    in preparation for the outer call.  Upshot: we need to calculate the
3452    args into temporary regs, and move those to arg regs or onto the
3453    stack only immediately prior to the call proper.  Sigh.
3454 -}
3455
3456 genCCall fn cconv kind args
3457   = mapNat arg_to_int_vregs args `thenNat` \ argcode_and_vregs ->
3458     let
3459         (argcodes, vregss) = unzip argcode_and_vregs
3460         n_argRegs          = length allArgRegs
3461         n_argRegs_used     = min (length vregs) n_argRegs
3462         vregs              = concat vregss
3463     in
3464     -- deal with static vs dynamic call targets
3465     (case fn of
3466         Left t_static
3467            -> return (unitOL (CALL (Left fn__2) n_argRegs_used False))
3468         Right dyn
3469            -> arg_to_int_vregs dyn `thenNat` \ (dyn_c, [dyn_r]) ->
3470               return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3471     )
3472                                 `thenNat` \ callinsns ->
3473     let
3474         argcode = concatOL argcodes
3475         (move_sp_down, move_sp_up)
3476            = let diff = length vregs - n_argRegs
3477                  nn   = if odd diff then diff + 1 else diff -- keep 8-byte alignment
3478              in  if   nn <= 0
3479                  then (nilOL, nilOL)
3480                  else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
3481         transfer_code
3482            = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
3483     in
3484         return (argcode       `appOL`
3485                    move_sp_down  `appOL`
3486                    transfer_code `appOL`
3487                    callinsns     `appOL`
3488                    unitOL NOP    `appOL`
3489                    move_sp_up)
3490   where
3491      -- function names that begin with '.' are assumed to be special
3492      -- internally generated names like '.mul,' which don't get an
3493      -- underscore prefix
3494      -- ToDo:needed (WDP 96/03) ???
3495      fn_static = unLeft fn
3496      fn__2 = case (headFS fn_static) of
3497                 '.' -> ImmLit (ftext fn_static)
3498                 _   -> ImmCLbl (mkForeignLabel fn_static False)
3499
3500      -- move args from the integer vregs into which they have been
3501      -- marshalled, into %o0 .. %o5, and the rest onto the stack.
3502      move_final :: [Reg] -> [Reg] -> Int -> [Instr]
3503
3504      move_final [] _ offset          -- all args done
3505         = []
3506
3507      move_final (v:vs) [] offset     -- out of aregs; move to stack
3508         = ST W v (spRel offset)
3509           : move_final vs [] (offset+1)
3510
3511      move_final (v:vs) (a:az) offset -- move into an arg (%o[0..5]) reg
3512         = OR False g0 (RIReg v) a
3513           : move_final vs az offset
3514
3515      -- generate code to calculate an argument, and move it into one
3516      -- or two integer vregs.
3517      arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
3518      arg_to_int_vregs arg
3519         | is64BitRep (repOfCmmExpr arg)
3520         = iselExpr64 arg                `thenNat` \ (ChildCode64 code vr_lo) ->
3521           let r_lo = VirtualRegI vr_lo
3522               r_hi = getHiVRegFromLo r_lo
3523           in  return (code, [r_hi, r_lo])
3524         | otherwise
3525         = getRegister arg                     `thenNat` \ register ->
3526           getNewRegNat (registerRep register) `thenNat` \ tmp ->
3527           let code = registerCode register tmp
3528               src  = registerName register tmp
3529               pk   = registerRep register
3530           in
3531           -- the value is in src.  Get it into 1 or 2 int vregs.
3532           case pk of
3533              F64 ->
3534                 getNewRegNat WordRep  `thenNat` \ v1 ->
3535                 getNewRegNat WordRep  `thenNat` \ v2 ->
3536                 return (
3537                    code                          `snocOL`
3538                    FMOV DF src f0                `snocOL`
3539                    ST   F  f0 (spRel 16)         `snocOL`
3540                    LD   W  (spRel 16) v1         `snocOL`
3541                    ST   F  (fPair f0) (spRel 16) `snocOL`
3542                    LD   W  (spRel 16) v2
3543                    ,
3544                    [v1,v2]
3545                 )
3546              F32 ->
3547                 getNewRegNat WordRep  `thenNat` \ v1 ->
3548                 return (
3549                    code                    `snocOL`
3550                    ST   F  src (spRel 16)  `snocOL`
3551                    LD   W  (spRel 16) v1
3552                    ,
3553                    [v1]
3554                 )
3555              other ->
3556                 getNewRegNat WordRep  `thenNat` \ v1 ->
3557                 return (
3558                    code `snocOL` OR False g0 (RIReg src) v1
3559                    ,
3560                    [v1]
3561                 )
3562 #endif /* sparc_TARGET_ARCH */
3563
3564 #if powerpc_TARGET_ARCH
3565
3566 #if darwin_TARGET_OS || linux_TARGET_OS
3567 {-
3568     The PowerPC calling convention for Darwin/Mac OS X
3569     is described in Apple's document
3570     "Inside Mac OS X - Mach-O Runtime Architecture".
3571
3572     PowerPC Linux uses the System V Release 4 Calling Convention
3573     for PowerPC. It is described in the
3574     "System V Application Binary Interface PowerPC Processor Supplement".
3575
3576     Both conventions are similar:
3577     Parameters may be passed in general-purpose registers starting at r3, in
3578     floating point registers starting at f1, or on the stack.
3579
3580     But there are substantial differences:
3581     * The number of registers used for parameter passing and the exact set of
3582       nonvolatile registers differs (see MachRegs.lhs).
3583     * On Darwin, stack space is always reserved for parameters, even if they are
3584       passed in registers. The called routine may choose to save parameters from
3585       registers to the corresponding space on the stack.
3586     * On Darwin, a corresponding amount of GPRs is skipped when a floating point
3587       parameter is passed in an FPR.
3588     * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
3589       starting with an odd-numbered GPR. It may skip a GPR to achieve this.
3590       Darwin just treats an I64 like two separate I32s (high word first).
3591     * I64 and F64 arguments are 8-byte aligned on the stack for SysV, but only
3592       4-byte aligned like everything else on Darwin.
3593     * The SysV spec claims that F32 is represented as F64 on the stack. GCC on
3594       PowerPC Linux does not agree, so neither do we.
3595
3596     According to both conventions, The parameter area should be part of the
3597     caller's stack frame, allocated in the caller's prologue code (large enough
3598     to hold the parameter lists for all called routines). The NCG already
3599     uses the stack for register spilling, leaving 64 bytes free at the top.
3600     If we need a larger parameter area than that, we just allocate a new stack
3601     frame just before ccalling.
3602 -}
3603
3604 genCCall target dest_regs argsAndHints vols
3605   = ASSERT (not $ any (`elem` [I8,I16]) argReps)
3606         -- we rely on argument promotion in the codeGen
3607     do
3608         (finalStack,passArgumentsCode,usedRegs) <- passArguments
3609                                                         (zip args argReps)
3610                                                         allArgRegs allFPArgRegs
3611                                                         initialStackOffset
3612                                                         (toOL []) []
3613
3614         (labelOrExpr, reduceToF32) <- case target of
3615             CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
3616             CmmForeignCall expr conv -> return  (Right expr, False)
3617             CmmPrim mop -> outOfLineFloatOp mop
3618
3619         let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
3620             codeAfter = move_sp_up finalStack `appOL` moveResult reduceToF32
3621
3622         case labelOrExpr of
3623             Left lbl -> do
3624                 return (         codeBefore
3625                         `snocOL` BL lbl usedRegs
3626                         `appOL`  codeAfter)
3627             Right dyn -> do
3628                 (dynReg, dynCode) <- getSomeReg dyn
3629                 return (         dynCode
3630                         `snocOL` MTCTR dynReg
3631                         `appOL`  codeBefore
3632                         `snocOL` BCTRL usedRegs
3633                         `appOL`  codeAfter)
3634     where
3635 #if darwin_TARGET_OS
3636         initialStackOffset = 24
3637             -- size of linkage area + size of arguments, in bytes
3638         stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
3639                                        map machRepByteWidth argReps
3640 #elif linux_TARGET_OS
3641         initialStackOffset = 8
3642         stackDelta finalStack = roundTo 16 finalStack
3643 #endif
3644         args = map fst argsAndHints
3645         argReps = map cmmExprRep args
3646
3647         roundTo a x | x `mod` a == 0 = x
3648                     | otherwise = x + a - (x `mod` a)
3649
3650         move_sp_down finalStack
3651                | delta > 64 =
3652                         toOL [STU I32 sp (AddrRegImm sp (ImmInt (-delta))),
3653                               DELTA (-delta)]
3654                | otherwise = nilOL
3655                where delta = stackDelta finalStack
3656         move_sp_up finalStack
3657                | delta > 64 =
3658                         toOL [ADD sp sp (RIImm (ImmInt delta)),
3659                               DELTA 0]
3660                | otherwise = nilOL
3661                where delta = stackDelta finalStack
3662
3663
3664         passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
3665         passArguments ((arg,I64):args) gprs fprs stackOffset
3666                accumCode accumUsed =
3667             do
3668                 ChildCode64 code vr_lo <- iselExpr64 arg
3669                 let vr_hi = getHiVRegFromLo vr_lo
3670
3671 #if darwin_TARGET_OS
3672                 passArguments args
3673                               (drop 2 gprs)
3674                               fprs
3675                               (stackOffset+8)
3676                               (accumCode `appOL` code
3677                                     `snocOL` storeWord vr_hi gprs stackOffset
3678                                     `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
3679                               ((take 2 gprs) ++ accumUsed)
3680             where
3681                 storeWord vr (gpr:_) offset = MR gpr vr
3682                 storeWord vr [] offset = ST I32 vr (AddrRegImm sp (ImmInt offset))
3683
3684 #elif linux_TARGET_OS
3685                 let stackOffset' = roundTo 8 stackOffset
3686                     stackCode = accumCode `appOL` code
3687                         `snocOL` ST I32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
3688                         `snocOL` ST I32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
3689                     regCode hireg loreg =
3690                         accumCode `appOL` code
3691                             `snocOL` MR hireg vr_hi
3692                             `snocOL` MR loreg vr_lo
3693
3694                 case gprs of
3695                     hireg : loreg : regs | even (length gprs) ->
3696                         passArguments args regs fprs stackOffset
3697                                       (regCode hireg loreg) (hireg : loreg : accumUsed)
3698                     _skipped : hireg : loreg : regs ->
3699                         passArguments args regs fprs stackOffset
3700                                       (regCode hireg loreg) (hireg : loreg : accumUsed)
3701                     _ -> -- only one or no regs left
3702                         passArguments args [] fprs (stackOffset'+8)
3703                                       stackCode accumUsed
3704 #endif
3705
3706         passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
3707             | reg : _ <- regs = do
3708                 register <- getRegister arg
3709                 let code = case register of
3710                             Fixed _ freg fcode -> fcode `snocOL` MR reg freg
3711                             Any _ acode -> acode reg
3712                 passArguments args
3713                               (drop nGprs gprs)
3714                               (drop nFprs fprs)
3715 #if darwin_TARGET_OS
3716         -- The Darwin ABI requires that we reserve stack slots for register parameters
3717                               (stackOffset + stackBytes)
3718 #elif linux_TARGET_OS
3719         -- ... the SysV ABI doesn't.
3720                               stackOffset
3721 #endif
3722                               (accumCode `appOL` code)
3723                               (reg : accumUsed)
3724             | otherwise = do
3725                 (vr, code) <- getSomeReg arg
3726                 passArguments args
3727                               (drop nGprs gprs)
3728                               (drop nFprs fprs)
3729                               (stackOffset' + stackBytes)
3730                               (accumCode `appOL` code `snocOL` ST rep vr stackSlot)
3731                               accumUsed
3732             where
3733 #if darwin_TARGET_OS
3734         -- stackOffset is at least 4-byte aligned
3735         -- The Darwin ABI is happy with that.
3736                 stackOffset' = stackOffset
3737 #else
3738         -- ... the SysV ABI requires 8-byte alignment for doubles.
3739                 stackOffset' | rep == F64 = roundTo 8 stackOffset
3740                              | otherwise  =           stackOffset
3741 #endif
3742                 stackSlot = AddrRegImm sp (ImmInt stackOffset')
3743                 (nGprs, nFprs, stackBytes, regs) = case rep of
3744                     I32 -> (1, 0, 4, gprs)
3745 #if darwin_TARGET_OS
3746         -- The Darwin ABI requires that we skip a corresponding number of GPRs when
3747         -- we use the FPRs.
3748                     F32 -> (1, 1, 4, fprs)
3749                     F64 -> (2, 1, 8, fprs)
3750 #elif linux_TARGET_OS
3751         -- ... the SysV ABI doesn't.
3752                     F32 -> (0, 1, 4, fprs)
3753                     F64 -> (0, 1, 8, fprs)
3754 #endif
3755
3756         moveResult reduceToF32 =
3757             case dest_regs of
3758                 [] -> nilOL
3759                 [(dest, _hint)]
3760                     | reduceToF32 && rep == F32 -> unitOL (FRSP r_dest f1)
3761                     | rep == F32 || rep == F64 -> unitOL (MR r_dest f1)
3762                     | rep == I64 -> toOL [MR (getHiVRegFromLo r_dest) r3,
3763                                           MR r_dest r4]
3764                     | otherwise -> unitOL (MR r_dest r3)
3765                     where rep = cmmRegRep dest
3766                           r_dest = getRegisterReg dest
3767
3768         outOfLineFloatOp mop =
3769             do
3770                 mopExpr <- cmmMakeDynamicReference addImportNat True $
3771                               mkForeignLabel functionName Nothing True
3772                 let mopLabelOrExpr = case mopExpr of
3773                         CmmLit (CmmLabel lbl) -> Left lbl
3774                         _ -> Right mopExpr
3775                 return (mopLabelOrExpr, reduce)
3776             where
3777                 (functionName, reduce) = case mop of
3778                     MO_F32_Exp   -> (FSLIT("exp"), True)
3779                     MO_F32_Log   -> (FSLIT("log"), True)
3780                     MO_F32_Sqrt  -> (FSLIT("sqrt"), True)
3781
3782                     MO_F32_Sin   -> (FSLIT("sin"), True)
3783                     MO_F32_Cos   -> (FSLIT("cos"), True)
3784                     MO_F32_Tan   -> (FSLIT("tan"), True)
3785
3786                     MO_F32_Asin  -> (FSLIT("asin"), True)
3787                     MO_F32_Acos  -> (FSLIT("acos"), True)
3788                     MO_F32_Atan  -> (FSLIT("atan"), True)
3789
3790                     MO_F32_Sinh  -> (FSLIT("sinh"), True)
3791                     MO_F32_Cosh  -> (FSLIT("cosh"), True)
3792                     MO_F32_Tanh  -> (FSLIT("tanh"), True)
3793                     MO_F32_Pwr   -> (FSLIT("pow"), True)
3794
3795                     MO_F64_Exp   -> (FSLIT("exp"), False)
3796                     MO_F64_Log   -> (FSLIT("log"), False)
3797                     MO_F64_Sqrt  -> (FSLIT("sqrt"), False)
3798
3799                     MO_F64_Sin   -> (FSLIT("sin"), False)
3800                     MO_F64_Cos   -> (FSLIT("cos"), False)
3801                     MO_F64_Tan   -> (FSLIT("tan"), False)
3802
3803                     MO_F64_Asin  -> (FSLIT("asin"), False)
3804                     MO_F64_Acos  -> (FSLIT("acos"), False)
3805                     MO_F64_Atan  -> (FSLIT("atan"), False)
3806
3807                     MO_F64_Sinh  -> (FSLIT("sinh"), False)
3808                     MO_F64_Cosh  -> (FSLIT("cosh"), False)
3809                     MO_F64_Tanh  -> (FSLIT("tanh"), False)
3810                     MO_F64_Pwr   -> (FSLIT("pow"), False)
3811                     other -> pprPanic "genCCall(ppc): unknown callish op"
3812                                     (pprCallishMachOp other)
3813
3814 #endif /* darwin_TARGET_OS || linux_TARGET_OS */
3815
3816 #endif /* powerpc_TARGET_ARCH */
3817
3818
3819 -- -----------------------------------------------------------------------------
3820 -- Generating a table-branch
3821
3822 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
3823
3824 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3825 genSwitch expr ids = do
3826   (reg,e_code) <- getSomeReg expr
3827   lbl <- getNewLabelNat
3828   let
3829         jumpTable = map jumpTableEntry ids
3830         op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
3831         code = e_code `appOL` toOL [
3832                 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3833                 JMP_TBL op [ id | Just id <- ids ]
3834              ]
3835   -- in
3836   return code
3837 #elif powerpc_TARGET_ARCH
3838 genSwitch expr ids
3839   | opt_PIC
3840   = do
3841         (reg,e_code) <- getSomeReg expr
3842         tmp <- getNewRegNat I32
3843         lbl <- getNewLabelNat
3844         dynRef <- cmmMakeDynamicReference addImportNat False lbl
3845         (tableReg,t_code) <- getSomeReg $ dynRef
3846         let
3847             jumpTable = map jumpTableEntryRel ids
3848
3849             jumpTableEntryRel Nothing
3850                 = CmmStaticLit (CmmInt 0 wordRep)
3851             jumpTableEntryRel (Just (BlockId id))
3852                 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3853                 where blockLabel = mkAsmTempLabel id
3854
3855             code = e_code `appOL` t_code `appOL` toOL [
3856                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3857                             SLW tmp reg (RIImm (ImmInt 2)),
3858                             LD I32 tmp (AddrRegReg tableReg tmp),
3859                             ADD tmp tmp (RIReg tableReg),
3860                             MTCTR tmp,
3861                             BCTR [ id | Just id <- ids ]
3862                     ]
3863         return code
3864   | otherwise
3865   = do
3866         (reg,e_code) <- getSomeReg expr
3867         tmp <- getNewRegNat I32
3868         lbl <- getNewLabelNat
3869         let
3870             jumpTable = map jumpTableEntry ids
3871
3872             code = e_code `appOL` toOL [
3873                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3874                             SLW tmp reg (RIImm (ImmInt 2)),
3875                             ADDIS tmp tmp (HA (ImmCLbl lbl)),
3876                             LD I32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
3877                             MTCTR tmp,
3878                             BCTR [ id | Just id <- ids ]
3879                     ]
3880         return code
3881 #else
3882 genSwitch expr ids = panic "ToDo: genSwitch"
3883 #endif
3884
3885 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordRep)
3886 jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
3887     where blockLabel = mkAsmTempLabel id
3888
3889 -- -----------------------------------------------------------------------------
3890 -- Support bits
3891 -- -----------------------------------------------------------------------------
3892
3893
3894 -- -----------------------------------------------------------------------------
3895 -- 'condIntReg' and 'condFltReg': condition codes into registers
3896
3897 -- Turn those condition codes into integers now (when they appear on
3898 -- the right hand side of an assignment).
3899 --
3900 -- (If applicable) Do not fill the delay slots here; you will confuse the
3901 -- register allocator.
3902
3903 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
3904
3905 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3906
3907 #if alpha_TARGET_ARCH
3908 condIntReg = panic "MachCode.condIntReg (not on Alpha)"
3909 condFltReg = panic "MachCode.condFltReg (not on Alpha)"
3910 #endif /* alpha_TARGET_ARCH */
3911
3912 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3913
3914 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3915
3916 condIntReg cond x y = do
3917   CondCode _ cond cond_code <- condIntCode cond x y
3918   tmp <- getNewRegNat I8
3919   let
3920         code dst = cond_code `appOL` toOL [
3921                     SETCC cond (OpReg tmp),
3922                     MOVZxL I8 (OpReg tmp) (OpReg dst)
3923                   ]
3924   -- in
3925   return (Any I32 code)
3926
3927 #endif
3928
3929 #if i386_TARGET_ARCH
3930
3931 condFltReg cond x y = do
3932   CondCode _ cond cond_code <- condFltCode cond x y
3933   tmp <- getNewRegNat I8
3934   let
3935         code dst = cond_code `appOL` toOL [
3936                     SETCC cond (OpReg tmp),
3937                     MOVZxL I8 (OpReg tmp) (OpReg dst)
3938                   ]
3939   -- in
3940   return (Any I32 code)
3941
3942 #endif
3943
3944 #if x86_64_TARGET_ARCH
3945
3946 condFltReg cond x y = do
3947   CondCode _ cond cond_code <- condFltCode cond x y
3948   tmp1 <- getNewRegNat wordRep
3949   tmp2 <- getNewRegNat wordRep
3950   let
3951         -- We have to worry about unordered operands (eg. comparisons
3952         -- against NaN).  If the operands are unordered, the comparison
3953         -- sets the parity flag, carry flag and zero flag.
3954         -- All comparisons are supposed to return false for unordered
3955         -- operands except for !=, which returns true.
3956         --
3957         -- Optimisation: we don't have to test the parity flag if we
3958         -- know the test has already excluded the unordered case: eg >
3959         -- and >= test for a zero carry flag, which can only occur for
3960         -- ordered operands.
3961         --
3962         -- ToDo: by reversing comparisons we could avoid testing the
3963         -- parity flag in more cases.
3964
3965         code dst =
3966            cond_code `appOL`
3967              (case cond of
3968                 NE  -> or_unordered dst
3969                 GU  -> plain_test   dst
3970                 GEU -> plain_test   dst
3971                 _   -> and_ordered  dst)
3972
3973         plain_test dst = toOL [
3974                     SETCC cond (OpReg tmp1),
3975                     MOVZxL I8 (OpReg tmp1) (OpReg dst)
3976                  ]
3977         or_unordered dst = toOL [
3978                     SETCC cond (OpReg tmp1),
3979                     SETCC PARITY (OpReg tmp2),
3980                     OR I8 (OpReg tmp1) (OpReg tmp2),
3981                     MOVZxL I8 (OpReg tmp2) (OpReg dst)
3982                   ]
3983         and_ordered dst = toOL [
3984                     SETCC cond (OpReg tmp1),
3985                     SETCC NOTPARITY (OpReg tmp2),
3986                     AND I8 (OpReg tmp1) (OpReg tmp2),
3987                     MOVZxL I8 (OpReg tmp2) (OpReg dst)
3988                   ]
3989   -- in
3990   return (Any I32 code)
3991
3992 #endif
3993
3994 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3995
3996 #if sparc_TARGET_ARCH
3997
3998 condIntReg EQQ x (StInt 0)
3999   = getRegister x               `thenNat` \ register ->
4000     getNewRegNat IntRep         `thenNat` \ tmp ->
4001     let
4002         code = registerCode register tmp
4003         src  = registerName register tmp
4004         code__2 dst = code `appOL` toOL [
4005             SUB False True g0 (RIReg src) g0,
4006             SUB True False g0 (RIImm (ImmInt (-1))) dst]
4007     in
4008     return (Any IntRep code__2)
4009
4010 condIntReg EQQ x y
4011   = getRegister x               `thenNat` \ register1 ->
4012     getRegister y               `thenNat` \ register2 ->
4013     getNewRegNat IntRep         `thenNat` \ tmp1 ->
4014     getNewRegNat IntRep         `thenNat` \ tmp2 ->
4015     let
4016         code1 = registerCode register1 tmp1
4017         src1  = registerName register1 tmp1
4018         code2 = registerCode register2 tmp2
4019         src2  = registerName register2 tmp2
4020         code__2 dst = code1 `appOL` code2 `appOL` toOL [
4021             XOR False src1 (RIReg src2) dst,
4022             SUB False True g0 (RIReg dst) g0,
4023             SUB True False g0 (RIImm (ImmInt (-1))) dst]
4024     in
4025     return (Any IntRep code__2)
4026
4027 condIntReg NE x (StInt 0)
4028   = getRegister x               `thenNat` \ register ->
4029     getNewRegNat IntRep         `thenNat` \ tmp ->
4030     let
4031         code = registerCode register tmp
4032         src  = registerName register tmp
4033         code__2 dst = code `appOL` toOL [
4034             SUB False True g0 (RIReg src) g0,
4035             ADD True False g0 (RIImm (ImmInt 0)) dst]
4036     in
4037     return (Any IntRep code__2)
4038
4039 condIntReg NE x y
4040   = getRegister x               `thenNat` \ register1 ->
4041     getRegister y               `thenNat` \ register2 ->
4042     getNewRegNat IntRep         `thenNat` \ tmp1 ->
4043     getNewRegNat IntRep         `thenNat` \ tmp2 ->
4044     let
4045         code1 = registerCode register1 tmp1
4046         src1  = registerName register1 tmp1
4047         code2 = registerCode register2 tmp2
4048         src2  = registerName register2 tmp2
4049         code__2 dst = code1 `appOL` code2 `appOL` toOL [
4050             XOR False src1 (RIReg src2) dst,
4051             SUB False True g0 (RIReg dst) g0,
4052             ADD True False g0 (RIImm (ImmInt 0)) dst]
4053     in
4054     return (Any IntRep code__2)
4055
4056 condIntReg cond x y
4057   = getBlockIdNat               `thenNat` \ lbl1 ->
4058     getBlockIdNat               `thenNat` \ lbl2 ->
4059     condIntCode cond x y        `thenNat` \ condition ->
4060     let
4061         code = condCode condition
4062         cond = condName condition
4063         code__2 dst = code `appOL` toOL [
4064             BI cond False (ImmCLbl lbl1), NOP,
4065             OR False g0 (RIImm (ImmInt 0)) dst,
4066             BI ALWAYS False (ImmCLbl lbl2), NOP,
4067             NEWBLOCK lbl1,
4068             OR False g0 (RIImm (ImmInt 1)) dst,
4069             NEWBLOCK lbl2]
4070     in
4071     return (Any IntRep code__2)
4072
4073 condFltReg cond x y
4074   = getBlockIdNat               `thenNat` \ lbl1 ->
4075     getBlockIdNat               `thenNat` \ lbl2 ->
4076     condFltCode cond x y        `thenNat` \ condition ->
4077     let
4078         code = condCode condition
4079         cond = condName condition
4080         code__2 dst = code `appOL` toOL [
4081             NOP,
4082             BF cond False (ImmCLbl lbl1), NOP,
4083             OR False g0 (RIImm (ImmInt 0)) dst,
4084             BI ALWAYS False (ImmCLbl lbl2), NOP,
4085             NEWBLOCK lbl1,
4086             OR False g0 (RIImm (ImmInt 1)) dst,
4087             NEWBLOCK lbl2]
4088     in
4089     return (Any IntRep code__2)
4090
4091 #endif /* sparc_TARGET_ARCH */
4092
4093 #if powerpc_TARGET_ARCH
4094 condReg getCond = do
4095     lbl1 <- getBlockIdNat
4096     lbl2 <- getBlockIdNat
4097     CondCode _ cond cond_code <- getCond
4098     let
4099 {-        code dst = cond_code `appOL` toOL [
4100                 BCC cond lbl1,
4101                 LI dst (ImmInt 0),
4102                 BCC ALWAYS lbl2,
4103                 NEWBLOCK lbl1,
4104                 LI dst (ImmInt 1),
4105                 BCC ALWAYS lbl2,
4106                 NEWBLOCK lbl2
4107             ]-}
4108         code dst = cond_code
4109             `appOL` negate_code
4110             `appOL` toOL [
4111                 MFCR dst,
4112                 RLWINM dst dst (bit + 1) 31 31
4113             ]
4114
4115         negate_code | do_negate = unitOL (CRNOR bit bit bit)
4116                     | otherwise = nilOL
4117
4118         (bit, do_negate) = case cond of
4119             LTT -> (0, False)
4120             LE  -> (1, True)
4121             EQQ -> (2, False)
4122             GE  -> (0, True)
4123             GTT -> (1, False)
4124
4125             NE  -> (2, True)
4126
4127             LU  -> (0, False)
4128             LEU -> (1, True)
4129             GEU -> (0, True)
4130             GU  -> (1, False)
4131
4132     return (Any I32 code)
4133
4134 condIntReg cond x y = condReg (condIntCode cond x y)
4135 condFltReg cond x y = condReg (condFltCode cond x y)
4136 #endif /* powerpc_TARGET_ARCH */
4137
4138
4139 -- -----------------------------------------------------------------------------
4140 -- 'trivial*Code': deal with trivial instructions
4141
4142 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
4143 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
4144 -- Only look for constants on the right hand side, because that's
4145 -- where the generic optimizer will have put them.
4146
4147 -- Similarly, for unary instructions, we don't have to worry about
4148 -- matching an StInt as the argument, because genericOpt will already
4149 -- have handled the constant-folding.
4150
4151 trivialCode
4152     :: MachRep
4153     -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
4154       ,IF_ARCH_i386 ((Operand -> Operand -> Instr)
4155                      -> Maybe (Operand -> Operand -> Instr)
4156       ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr)
4157                      -> Maybe (Operand -> Operand -> Instr)
4158       ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
4159       ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
4160       ,)))))
4161     -> CmmExpr -> CmmExpr -- the two arguments
4162     -> NatM Register
4163
4164 #ifndef powerpc_TARGET_ARCH
4165 trivialFCode
4166     :: MachRep
4167     -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
4168       ,IF_ARCH_sparc((Size -> Reg -> Reg -> Reg -> Instr)
4169       ,IF_ARCH_i386 ((MachRep -> Reg -> Reg -> Reg -> Instr)
4170       ,IF_ARCH_x86_64 ((MachRep -> Operand -> Operand -> Instr)
4171       ,))))
4172     -> CmmExpr -> CmmExpr -- the two arguments
4173     -> NatM Register
4174 #endif
4175
4176 trivialUCode
4177     :: MachRep
4178     -> IF_ARCH_alpha((RI -> Reg -> Instr)
4179       ,IF_ARCH_i386 ((Operand -> Instr)
4180       ,IF_ARCH_x86_64 ((Operand -> Instr)
4181       ,IF_ARCH_sparc((RI -> Reg -> Instr)
4182       ,IF_ARCH_powerpc((Reg -> Reg -> Instr)
4183       ,)))))
4184     -> CmmExpr  -- the one argument
4185     -> NatM Register
4186
4187 #ifndef powerpc_TARGET_ARCH
4188 trivialUFCode
4189     :: MachRep
4190     -> IF_ARCH_alpha((Reg -> Reg -> Instr)
4191       ,IF_ARCH_i386 ((Reg -> Reg -> Instr)
4192       ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr)
4193       ,IF_ARCH_sparc((Reg -> Reg -> Instr)
4194       ,))))
4195     -> CmmExpr -- the one argument
4196     -> NatM Register
4197 #endif
4198
4199 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4200
4201 #if alpha_TARGET_ARCH
4202
4203 trivialCode instr x (StInt y)
4204   | fits8Bits y
4205   = getRegister x               `thenNat` \ register ->
4206     getNewRegNat IntRep         `thenNat` \ tmp ->
4207     let
4208         code = registerCode register tmp
4209         src1 = registerName register tmp
4210         src2 = ImmInt (fromInteger y)
4211         code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst)
4212     in
4213     return (Any IntRep code__2)
4214
4215 trivialCode instr x y
4216   = getRegister x               `thenNat` \ register1 ->
4217     getRegister y               `thenNat` \ register2 ->
4218     getNewRegNat IntRep         `thenNat` \ tmp1 ->
4219     getNewRegNat IntRep         `thenNat` \ tmp2 ->
4220     let
4221         code1 = registerCode register1 tmp1 []
4222         src1  = registerName register1 tmp1
4223         code2 = registerCode register2 tmp2 []
4224         src2  = registerName register2 tmp2
4225         code__2 dst = asmSeqThen [code1, code2] .
4226                      mkSeqInstr (instr src1 (RIReg src2) dst)
4227     in
4228     return (Any IntRep code__2)
4229
4230 ------------
4231 trivialUCode instr x
4232   = getRegister x               `thenNat` \ register ->
4233     getNewRegNat IntRep         `thenNat` \ tmp ->
4234     let
4235         code = registerCode register tmp
4236         src  = registerName register tmp
4237         code__2 dst = code . mkSeqInstr (instr (RIReg src) dst)
4238     in
4239     return (Any IntRep code__2)
4240
4241 ------------
4242 trivialFCode _ instr x y
4243   = getRegister x               `thenNat` \ register1 ->
4244     getRegister y               `thenNat` \ register2 ->
4245     getNewRegNat F64    `thenNat` \ tmp1 ->
4246     getNewRegNat F64    `thenNat` \ tmp2 ->
4247     let
4248         code1 = registerCode register1 tmp1
4249         src1  = registerName register1 tmp1
4250
4251         code2 = registerCode register2 tmp2
4252         src2  = registerName register2 tmp2
4253
4254         code__2 dst = asmSeqThen [code1 [], code2 []] .
4255                       mkSeqInstr (instr src1 src2 dst)
4256     in
4257     return (Any F64 code__2)
4258
4259 trivialUFCode _ instr x
4260   = getRegister x               `thenNat` \ register ->
4261     getNewRegNat F64    `thenNat` \ tmp ->
4262     let
4263         code = registerCode register tmp
4264         src  = registerName register tmp
4265         code__2 dst = code . mkSeqInstr (instr src dst)
4266     in
4267     return (Any F64 code__2)
4268
4269 #endif /* alpha_TARGET_ARCH */
4270
4271 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4272
4273 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4274
4275 {-
4276 The Rules of the Game are:
4277
4278 * You cannot assume anything about the destination register dst;
4279   it may be anything, including a fixed reg.
4280
4281 * You may compute an operand into a fixed reg, but you may not
4282   subsequently change the contents of that fixed reg.  If you
4283   want to do so, first copy the value either to a temporary
4284   or into dst.  You are free to modify dst even if it happens
4285   to be a fixed reg -- that's not your problem.
4286
4287 * You cannot assume that a fixed reg will stay live over an
4288   arbitrary computation.  The same applies to the dst reg.
4289
4290 * Temporary regs obtained from getNewRegNat are distinct from
4291   each other and from all other regs, and stay live over
4292   arbitrary computations.
4293
4294 --------------------
4295
4296 SDM's version of The Rules:
4297
4298 * If getRegister returns Any, that means it can generate correct
4299   code which places the result in any register, period.  Even if that
4300   register happens to be read during the computation.
4301
4302   Corollary #1: this means that if you are generating code for an
4303   operation with two arbitrary operands, you cannot assign the result
4304   of the first operand into the destination register before computing
4305   the second operand.  The second operand might require the old value
4306   of the destination register.
4307
4308   Corollary #2: A function might be able to generate more efficient
4309   code if it knows the destination register is a new temporary (and
4310   therefore not read by any of the sub-computations).
4311
4312 * If getRegister returns Any, then the code it generates may modify only:
4313         (a) fresh temporaries
4314         (b) the destination register
4315         (c) known registers (eg. %ecx is used by shifts)
4316   In particular, it may *not* modify global registers, unless the global
4317   register happens to be the destination register.
4318 -}
4319
4320 trivialCode rep instr (Just revinstr) (CmmLit lit_a) b
4321   | not (is64BitLit lit_a) = do
4322   b_code <- getAnyReg b
4323   let
4324        code dst
4325          = b_code dst `snocOL`
4326            revinstr (OpImm (litToImm lit_a)) (OpReg dst)
4327   -- in
4328   return (Any rep code)
4329
4330 trivialCode rep instr maybe_revinstr a b = genTrivialCode rep instr a b
4331
4332 -- This is re-used for floating pt instructions too.
4333 genTrivialCode rep instr a b = do
4334   (b_op, b_code) <- getNonClobberedOperand b
4335   a_code <- getAnyReg a
4336   tmp <- getNewRegNat rep
4337   let
4338      -- We want the value of b to stay alive across the computation of a.
4339      -- But, we want to calculate a straight into the destination register,
4340      -- because the instruction only has two operands (dst := dst `op` src).
4341      -- The troublesome case is when the result of b is in the same register
4342      -- as the destination reg.  In this case, we have to save b in a
4343      -- new temporary across the computation of a.
4344      code dst
4345         | dst `regClashesWithOp` b_op =
4346                 b_code `appOL`
4347                 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
4348                 a_code dst `snocOL`
4349                 instr (OpReg tmp) (OpReg dst)
4350         | otherwise =
4351                 b_code `appOL`
4352                 a_code dst `snocOL`
4353                 instr b_op (OpReg dst)
4354   -- in
4355   return (Any rep code)
4356
4357 reg `regClashesWithOp` OpReg reg2   = reg == reg2
4358 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
4359 reg `regClashesWithOp` _            = False
4360
4361 -----------
4362
4363 trivialUCode rep instr x = do
4364   x_code <- getAnyReg x
4365   let
4366      code dst =
4367         x_code dst `snocOL`
4368         instr (OpReg dst)
4369   -- in
4370   return (Any rep code)
4371
4372 -----------
4373
4374 #if i386_TARGET_ARCH
4375
4376 trivialFCode pk instr x y = do
4377   (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
4378   (y_reg, y_code) <- getSomeReg y
4379   let
4380      code dst =
4381         x_code `appOL`
4382         y_code `snocOL`
4383         instr pk x_reg y_reg dst
4384   -- in
4385   return (Any pk code)
4386
4387 #endif
4388
4389 #if x86_64_TARGET_ARCH
4390
4391 trivialFCode pk instr x y = genTrivialCode  pk (instr pk) x y
4392
4393 #endif
4394
4395 -------------
4396
4397 trivialUFCode rep instr x = do
4398   (x_reg, x_code) <- getSomeReg x
4399   let
4400      code dst =
4401         x_code `snocOL`
4402         instr x_reg dst
4403   -- in
4404   return (Any rep code)
4405
4406 #endif /* i386_TARGET_ARCH */
4407
4408 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4409
4410 #if sparc_TARGET_ARCH
4411
4412 trivialCode instr x (StInt y)
4413   | fits13Bits y
4414   = getRegister x               `thenNat` \ register ->
4415     getNewRegNat IntRep         `thenNat` \ tmp ->
4416     let
4417         code = registerCode register tmp
4418         src1 = registerName register tmp
4419         src2 = ImmInt (fromInteger y)
4420         code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
4421     in
4422     return (Any IntRep code__2)
4423
4424 trivialCode instr x y
4425   = getRegister x               `thenNat` \ register1 ->
4426     getRegister y               `thenNat` \ register2 ->
4427     getNewRegNat IntRep         `thenNat` \ tmp1 ->
4428     getNewRegNat IntRep         `thenNat` \ tmp2 ->
4429     let
4430         code1 = registerCode register1 tmp1
4431         src1  = registerName register1 tmp1
4432         code2 = registerCode register2 tmp2
4433         src2  = registerName register2 tmp2
4434         code__2 dst = code1 `appOL` code2 `snocOL`
4435                       instr src1 (RIReg src2) dst
4436     in
4437     return (Any IntRep code__2)
4438
4439 ------------
4440 trivialFCode pk instr x y
4441   = getRegister x               `thenNat` \ register1 ->
4442     getRegister y               `thenNat` \ register2 ->
4443     getNewRegNat (registerRep register1)
4444                                 `thenNat` \ tmp1 ->
4445     getNewRegNat (registerRep register2)
4446                                 `thenNat` \ tmp2 ->
4447     getNewRegNat F64    `thenNat` \ tmp ->
4448     let
4449         promote x = FxTOy F DF x tmp
4450
4451         pk1   = registerRep register1
4452         code1 = registerCode register1 tmp1
4453         src1  = registerName register1 tmp1
4454
4455         pk2   = registerRep register2
4456         code2 = registerCode register2 tmp2
4457         src2  = registerName register2 tmp2
4458
4459         code__2 dst =
4460                 if pk1 == pk2 then
4461                     code1 `appOL` code2 `snocOL`
4462                     instr (primRepToSize pk) src1 src2 dst
4463                 else if pk1 == F32 then
4464                     code1 `snocOL` promote src1 `appOL` code2 `snocOL`
4465                     instr DF tmp src2 dst
4466                 else
4467                     code1 `appOL` code2 `snocOL` promote src2 `snocOL`
4468                     instr DF src1 tmp dst
4469     in
4470     return (Any (if pk1 == pk2 then pk1 else F64) code__2)
4471
4472 ------------
4473 trivialUCode instr x
4474   = getRegister x               `thenNat` \ register ->
4475     getNewRegNat IntRep         `thenNat` \ tmp ->
4476     let
4477         code = registerCode register tmp
4478         src  = registerName register tmp
4479         code__2 dst = code `snocOL` instr (RIReg src) dst
4480     in
4481     return (Any IntRep code__2)
4482
4483 -------------
4484 trivialUFCode pk instr x
4485   = getRegister x               `thenNat` \ register ->
4486     getNewRegNat pk             `thenNat` \ tmp ->
4487     let
4488         code = registerCode register tmp
4489         src  = registerName register tmp
4490         code__2 dst = code `snocOL` instr src dst
4491     in
4492     return (Any pk code__2)
4493
4494 #endif /* sparc_TARGET_ARCH */
4495
4496 #if powerpc_TARGET_ARCH
4497
4498 {-
4499 Wolfgang's PowerPC version of The Rules:
4500
4501 A slightly modified version of The Rules to take advantage of the fact
4502 that PowerPC instructions work on all registers and don't implicitly
4503 clobber any fixed registers.
4504
4505 * The only expression for which getRegister returns Fixed is (CmmReg reg).
4506
4507 * If getRegister returns Any, then the code it generates may modify only:
4508         (a) fresh temporaries
4509         (b) the destination register
4510   It may *not* modify global registers, unless the global
4511   register happens to be the destination register.
4512   It may not clobber any other registers. In fact, only ccalls clobber any
4513   fixed registers.
4514   Also, it may not modify the counter register (used by genCCall).
4515
4516   Corollary: If a getRegister for a subexpression returns Fixed, you need
4517   not move it to a fresh temporary before evaluating the next subexpression.
4518   The Fixed register won't be modified.
4519   Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
4520
4521 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
4522   the value of the destination register.
4523 -}
4524
4525 trivialCode rep signed instr x (CmmLit (CmmInt y _))
4526     | Just imm <- makeImmediate rep signed y
4527     = do
4528         (src1, code1) <- getSomeReg x
4529         let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
4530         return (Any rep code)
4531
4532 trivialCode rep signed instr x y = do
4533     (src1, code1) <- getSomeReg x
4534     (src2, code2) <- getSomeReg y
4535     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
4536     return (Any rep code)
4537
4538 trivialCodeNoImm :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4539     -> CmmExpr -> CmmExpr -> NatM Register
4540 trivialCodeNoImm rep instr x y = do
4541     (src1, code1) <- getSomeReg x
4542     (src2, code2) <- getSomeReg y
4543     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
4544     return (Any rep code)
4545
4546 trivialUCode rep instr x = do
4547     (src, code) <- getSomeReg x
4548     let code' dst = code `snocOL` instr dst src
4549     return (Any rep code')
4550
4551 -- There is no "remainder" instruction on the PPC, so we have to do
4552 -- it the hard way.
4553 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
4554
4555 remainderCode :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4556     -> CmmExpr -> CmmExpr -> NatM Register
4557 remainderCode rep div x y = do
4558     (src1, code1) <- getSomeReg x
4559     (src2, code2) <- getSomeReg y
4560     let code dst = code1 `appOL` code2 `appOL` toOL [
4561                 div dst src1 src2,
4562                 MULLW dst dst (RIReg src2),
4563                 SUBF dst dst src1
4564             ]
4565     return (Any rep code)
4566
4567 #endif /* powerpc_TARGET_ARCH */
4568
4569
4570 -- -----------------------------------------------------------------------------
4571 --  Coercing to/from integer/floating-point...
4572
4573 -- @coerce(Int2FP|FP2Int)@ are more complicated integer/float
4574 -- conversions.  We have to store temporaries in memory to move
4575 -- between the integer and the floating point register sets.
4576
4577 -- @coerceDbl2Flt@ and @coerceFlt2Dbl@ are done this way because we
4578 -- pretend, on sparc at least, that double and float regs are seperate
4579 -- kinds, so the value has to be computed into one kind before being
4580 -- explicitly "converted" to live in the other kind.
4581
4582 coerceInt2FP :: MachRep -> MachRep -> CmmExpr -> NatM Register
4583 coerceFP2Int :: MachRep -> MachRep -> CmmExpr -> NatM Register
4584
4585 #if sparc_TARGET_ARCH
4586 coerceDbl2Flt :: CmmExpr -> NatM Register
4587 coerceFlt2Dbl :: CmmExpr -> NatM Register
4588 #endif
4589
4590 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4591
4592 #if alpha_TARGET_ARCH
4593
4594 coerceInt2FP _ x
4595   = getRegister x               `thenNat` \ register ->
4596     getNewRegNat IntRep         `thenNat` \ reg ->
4597     let
4598         code = registerCode register reg
4599         src  = registerName register reg
4600
4601         code__2 dst = code . mkSeqInstrs [
4602             ST Q src (spRel 0),
4603             LD TF dst (spRel 0),
4604             CVTxy Q TF dst dst]
4605     in
4606     return (Any F64 code__2)
4607
4608 -------------
4609 coerceFP2Int x
4610   = getRegister x               `thenNat` \ register ->
4611     getNewRegNat F64    `thenNat` \ tmp ->
4612     let
4613         code = registerCode register tmp
4614         src  = registerName register tmp
4615
4616         code__2 dst = code . mkSeqInstrs [
4617             CVTxy TF Q src tmp,
4618             ST TF tmp (spRel 0),
4619             LD Q dst (spRel 0)]
4620     in
4621     return (Any IntRep code__2)
4622
4623 #endif /* alpha_TARGET_ARCH */
4624
4625 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4626
4627 #if i386_TARGET_ARCH
4628
4629 coerceInt2FP from to x = do
4630   (x_reg, x_code) <- getSomeReg x
4631   let
4632         opc  = case to of F32 -> GITOF; F64 -> GITOD
4633         code dst = x_code `snocOL` opc x_reg dst
4634         -- ToDo: works for non-I32 reps?
4635   -- in
4636   return (Any to code)
4637
4638 ------------
4639
4640 coerceFP2Int from to x = do
4641   (x_reg, x_code) <- getSomeReg x
4642   let
4643         opc  = case from of F32 -> GFTOI; F64 -> GDTOI
4644         code dst = x_code `snocOL` opc x_reg dst
4645         -- ToDo: works for non-I32 reps?
4646   -- in
4647   return (Any to code)
4648
4649 #endif /* i386_TARGET_ARCH */
4650
4651 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4652
4653 #if x86_64_TARGET_ARCH
4654
4655 coerceFP2Int from to x = do
4656   (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
4657   let
4658         opc  = case from of F32 -> CVTSS2SI; F64 -> CVTSD2SI
4659         code dst = x_code `snocOL` opc x_op dst
4660   -- in
4661   return (Any to code) -- works even if the destination rep is <I32
4662
4663 coerceInt2FP from to x = do
4664   (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
4665   let
4666         opc  = case to of F32 -> CVTSI2SS; F64 -> CVTSI2SD
4667         code dst = x_code `snocOL` opc x_op dst
4668   -- in
4669   return (Any to code) -- works even if the destination rep is <I32
4670
4671 coerceFP2FP :: MachRep -> CmmExpr -> NatM Register
4672 coerceFP2FP to x = do
4673   (x_reg, x_code) <- getSomeReg x
4674   let
4675         opc  = case to of F32 -> CVTSD2SS; F64 -> CVTSS2SD
4676         code dst = x_code `snocOL` opc x_reg dst
4677   -- in
4678   return (Any to code)
4679
4680 #endif
4681
4682 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4683
4684 #if sparc_TARGET_ARCH
4685
4686 coerceInt2FP pk x
4687   = getRegister x               `thenNat` \ register ->
4688     getNewRegNat IntRep         `thenNat` \ reg ->
4689     let
4690         code = registerCode register reg
4691         src  = registerName register reg
4692
4693         code__2 dst = code `appOL` toOL [
4694             ST W src (spRel (-2)),
4695             LD W (spRel (-2)) dst,
4696             FxTOy W (primRepToSize pk) dst dst]
4697     in
4698     return (Any pk code__2)
4699
4700 ------------
4701 coerceFP2Int fprep x
4702   = ASSERT(fprep == F64 || fprep == F32)
4703     getRegister x               `thenNat` \ register ->
4704     getNewRegNat fprep          `thenNat` \ reg ->
4705     getNewRegNat F32    `thenNat` \ tmp ->
4706     let
4707         code = registerCode register reg
4708         src  = registerName register reg
4709         code__2 dst = code `appOL` toOL [
4710             FxTOy (primRepToSize fprep) W src tmp,
4711             ST W tmp (spRel (-2)),
4712             LD W (spRel (-2)) dst]
4713     in
4714     return (Any IntRep code__2)
4715
4716 ------------
4717 coerceDbl2Flt x
4718   = getRegister x               `thenNat` \ register ->
4719     getNewRegNat F64    `thenNat` \ tmp ->
4720     let code = registerCode register tmp
4721         src  = registerName register tmp
4722     in
4723         return (Any F32
4724                        (\dst -> code `snocOL` FxTOy DF F src dst))
4725
4726 ------------
4727 coerceFlt2Dbl x
4728   = getRegister x               `thenNat` \ register ->
4729     getNewRegNat F32    `thenNat` \ tmp ->
4730     let code = registerCode register tmp
4731         src  = registerName register tmp
4732     in
4733         return (Any F64
4734                        (\dst -> code `snocOL` FxTOy F DF src dst))
4735
4736 #endif /* sparc_TARGET_ARCH */
4737
4738 #if powerpc_TARGET_ARCH
4739 coerceInt2FP fromRep toRep x = do
4740     (src, code) <- getSomeReg x
4741     lbl <- getNewLabelNat
4742     itmp <- getNewRegNat I32
4743     ftmp <- getNewRegNat F64
4744     dynRef <- cmmMakeDynamicReference addImportNat False lbl
4745     Amode addr addr_code <- getAmode dynRef
4746     let
4747         code' dst = code `appOL` maybe_exts `appOL` toOL [
4748                 LDATA ReadOnlyData
4749                                 [CmmDataLabel lbl,
4750                                  CmmStaticLit (CmmInt 0x43300000 I32),
4751                                  CmmStaticLit (CmmInt 0x80000000 I32)],
4752                 XORIS itmp src (ImmInt 0x8000),
4753                 ST I32 itmp (spRel 3),
4754                 LIS itmp (ImmInt 0x4330),
4755                 ST I32 itmp (spRel 2),
4756                 LD F64 ftmp (spRel 2)
4757             ] `appOL` addr_code `appOL` toOL [
4758                 LD F64 dst addr,
4759                 FSUB F64 dst ftmp dst
4760             ] `appOL` maybe_frsp dst
4761
4762         maybe_exts = case fromRep of
4763                         I8 ->  unitOL $ EXTS I8 src src
4764                         I16 -> unitOL $ EXTS I16 src src
4765                         I32 -> nilOL
4766         maybe_frsp dst = case toRep of
4767                         F32 -> unitOL $ FRSP dst dst
4768                         F64 -> nilOL
4769     return (Any toRep code')
4770
4771 coerceFP2Int fromRep toRep x = do
4772     -- the reps don't really matter: F*->F64 and I32->I* are no-ops
4773     (src, code) <- getSomeReg x
4774     tmp <- getNewRegNat F64
4775     let
4776         code' dst = code `appOL` toOL [
4777                 -- convert to int in FP reg
4778             FCTIWZ tmp src,
4779                 -- store value (64bit) from FP to stack
4780             ST F64 tmp (spRel 2),
4781                 -- read low word of value (high word is undefined)
4782             LD I32 dst (spRel 3)]
4783     return (Any toRep code')
4784 #endif /* powerpc_TARGET_ARCH */
4785
4786
4787 -- -----------------------------------------------------------------------------
4788 -- eXTRA_STK_ARGS_HERE
4789
4790 -- We (allegedly) put the first six C-call arguments in registers;
4791 -- where do we start putting the rest of them?
4792
4793 -- Moved from MachInstrs (SDM):
4794
4795 #if alpha_TARGET_ARCH || sparc_TARGET_ARCH
4796 eXTRA_STK_ARGS_HERE :: Int
4797 eXTRA_STK_ARGS_HERE
4798   = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???))
4799 #endif
4800