ghc/compiler/nativeGen/MachCodeGen.hs

   1 -----------------------------------------------------------------------------
   2 --
   3 -- Generating machine code (instruction selection)
   4 --
   5 -- (c) The University of Glasgow 1996-2004
   6 --
   7 -----------------------------------------------------------------------------
   8
   9 -- This is a big module, but, if you pay attention to
  10 -- (a) the sectioning, (b) the type signatures, and
  11 -- (c) the #if blah_TARGET_ARCH} things, the
  12 -- structure should not be too overwhelming.
  13
  14 module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where
  15
  16 #include "HsVersions.h"
  17 #include "nativeGen/NCG.h"
  18
  19 -- NCG stuff:
  20 import MachInstrs
  21 import MachRegs
  22 import NCGMonad
  23 import PositionIndependentCode ( cmmMakeDynamicReference, initializePicBase )
  24
  25 -- Our intermediate code:
  26 import PprCmm           ( pprExpr )
  27 import Cmm
  28 import MachOp
  29 import CLabel
  30
  31 -- The rest:
  32 import CmdLineOpts      ( opt_PIC )
  33 import ForeignCall      ( CCallConv(..) )
  34 import OrdList
  35 import Pretty
  36 import Outputable
  37 import qualified Outputable
  38 import FastString
  39 import FastTypes        ( isFastTrue )
  40
  41 #ifdef DEBUG
  42 import Outputable       ( assertPanic )
  43 import TRACE            ( trace )
  44 #endif
  45
  46 import Control.Monad    ( mapAndUnzipM )
  47 import Maybe            ( fromJust )
  48 import DATA_BITS
  49 import DATA_WORD
  50
  51 -- -----------------------------------------------------------------------------
  52 -- Top-level of the instruction selector
  53
  54 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
  55 -- They are really trees of insns to facilitate fast appending, where a
  56 -- left-to-right traversal (pre-order?) yields the insns in the correct
  57 -- order.
  58
  59 type InstrBlock = OrdList Instr
  60
  61 cmmTopCodeGen :: CmmTop -> NatM [NatCmmTop]
  62 cmmTopCodeGen (CmmProc info lab params blocks) = do
  63   (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
  64   picBaseMb <- getPicBaseMaybeNat
  65   let proc = CmmProc info lab params (concat nat_blocks)
  66       tops = proc : concat statics
  67   case picBaseMb of
  68       Just picBase -> initializePicBase picBase tops
  69       Nothing -> return tops
  70
  71 cmmTopCodeGen (CmmData sec dat) = do
  72   return [CmmData sec dat]  -- no translation, we just use CmmStatic
  73
  74 basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop])
  75 basicBlockCodeGen (BasicBlock id stmts) = do
  76   instrs <- stmtsToInstrs stmts
  77   -- code generation may introduce new basic block boundaries, which
  78   -- are indicated by the NEWBLOCK instruction.  We must split up the
  79   -- instruction stream into basic blocks again.  Also, we extract
  80   -- LDATAs here too.
  81   let
  82         (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
  83
  84         mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
  85           = ([], BasicBlock id instrs : blocks, statics)
  86         mkBlocks (LDATA sec dat) (instrs,blocks,statics)
  87           = (instrs, blocks, CmmData sec dat:statics)
  88         mkBlocks instr (instrs,blocks,statics)
  89           = (instr:instrs, blocks, statics)
  90   -- in
  91   return (BasicBlock id top : other_blocks, statics)
  92
  93 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
  94 stmtsToInstrs stmts
  95    = do instrss <- mapM stmtToInstrs stmts
  96         return (concatOL instrss)
  97
  98 stmtToInstrs :: CmmStmt -> NatM InstrBlock
  99 stmtToInstrs stmt = case stmt of
 100     CmmNop         -> return nilOL
 101     CmmComment s   -> return (unitOL (COMMENT s))
 102
 103     CmmAssign reg src
 104       | isFloatingRep kind -> assignReg_FltCode kind reg src
 105       | wordRep == I32 && kind == I64
 106                            -> assignReg_I64Code      reg src
 107       | otherwise          -> assignReg_IntCode kind reg src
 108         where kind = cmmRegRep reg
 109
 110     CmmStore addr src
 111       | isFloatingRep kind -> assignMem_FltCode kind addr src
 112       | wordRep == I32 && kind == I64
 113                          -> assignMem_I64Code      addr src
 114       | otherwise        -> assignMem_IntCode kind addr src
 115         where kind = cmmExprRep src
 116
 117     CmmCall target result_regs args vols
 118        -> genCCall target result_regs args vols
 119
 120     CmmBranch id          -> genBranch id
 121     CmmCondBranch arg id  -> genCondJump id arg
 122     CmmSwitch arg ids     -> genSwitch arg ids
 123     CmmJump arg params    -> genJump arg
 124
 125 -- -----------------------------------------------------------------------------
 126 -- General things for putting together code sequences
 127
 128 -- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
 129 -- CmmExprs into CmmRegOff?
 130 mangleIndexTree :: CmmExpr -> CmmExpr
 131 mangleIndexTree (CmmRegOff reg off)
 132   = CmmMachOp (MO_Add rep) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) rep)]
 133   where rep = cmmRegRep reg
 134
 135 -- -----------------------------------------------------------------------------
 136 --  Code gen for 64-bit arithmetic on 32-bit platforms
 137
 138 {-
 139 Simple support for generating 64-bit code (ie, 64 bit values and 64
 140 bit assignments) on 32-bit platforms.  Unlike the main code generator
 141 we merely shoot for generating working code as simply as possible, and
 142 pay little attention to code quality.  Specifically, there is no
 143 attempt to deal cleverly with the fixed-vs-floating register
 144 distinction; all values are generated into (pairs of) floating
 145 registers, even if this would mean some redundant reg-reg moves as a
 146 result.  Only one of the VRegUniques is returned, since it will be
 147 of the VRegUniqueLo form, and the upper-half VReg can be determined
 148 by applying getHiVRegFromLo to it.
 149 -}
 150
 151 data ChildCode64        -- a.k.a "Register64"
 152    = ChildCode64
 153         InstrBlock      -- code
 154         Reg             -- the lower 32-bit temporary which contains the
 155                         -- result; use getHiVRegFromLo to find the other
 156                         -- VRegUnique.  Rules of this simplified insn
 157                         -- selection game are therefore that the returned
 158                         -- Reg may be modified
 159
 160 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
 161 assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
 162 iselExpr64        :: CmmExpr -> NatM ChildCode64
 163
 164 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 165
 166 #if i386_TARGET_ARCH
 167
 168 assignMem_I64Code addrTree valueTree = do
 169   Amode addr addr_code <- getAmode addrTree
 170   ChildCode64 vcode rlo <- iselExpr64 valueTree
 171   let
 172         rhi = getHiVRegFromLo rlo
 173
 174         -- Little-endian store
 175         mov_lo = MOV I32 (OpReg rlo) (OpAddr addr)
 176         mov_hi = MOV I32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
 177   -- in
 178   return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 179
 180
 181 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
 182    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 183    let
 184          r_dst_lo = mkVReg u_dst I32
 185          r_dst_hi = getHiVRegFromLo r_dst_lo
 186          r_src_hi = getHiVRegFromLo r_src_lo
 187          mov_lo = MOV I32 (OpReg r_src_lo) (OpReg r_dst_lo)
 188          mov_hi = MOV I32 (OpReg r_src_hi) (OpReg r_dst_hi)
 189    -- in
 190    return (
 191         vcode `snocOL` mov_lo `snocOL` mov_hi
 192      )
 193
 194 assignReg_I64Code lvalue valueTree
 195    = panic "assignReg_I64Code(i386): invalid lvalue"
 196
 197 ------------
 198
 199 iselExpr64 (CmmLit (CmmInt i _)) = do
 200   (rlo,rhi) <- getNewRegPairNat I32
 201   let
 202         r = fromIntegral (fromIntegral i :: Word32)
 203         q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
 204         code = toOL [
 205                 MOV I32 (OpImm (ImmInteger r)) (OpReg rlo),
 206                 MOV I32 (OpImm (ImmInteger q)) (OpReg rhi)
 207                 ]
 208   -- in
 209   return (ChildCode64 code rlo)
 210
 211 iselExpr64 (CmmLoad addrTree I64) = do
 212    Amode addr addr_code <- getAmode addrTree
 213    (rlo,rhi) <- getNewRegPairNat I32
 214    let
 215         mov_lo = MOV I32 (OpAddr addr) (OpReg rlo)
 216         mov_hi = MOV I32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
 217    -- in
 218    return (
 219             ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
 220                         rlo
 221      )
 222
 223 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64)))
 224    = return (ChildCode64 nilOL (mkVReg vu I32))
 225
 226 -- we handle addition, but rather badly
 227 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
 228    ChildCode64 code1 r1lo <- iselExpr64 e1
 229    (rlo,rhi) <- getNewRegPairNat I32
 230    let
 231         r = fromIntegral (fromIntegral i :: Word32)
 232         q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
 233         r1hi = getHiVRegFromLo r1lo
 234         code =  code1 `appOL`
 235                 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
 236                        ADD I32 (OpImm (ImmInteger r)) (OpReg rlo),
 237                        MOV I32 (OpReg r1hi) (OpReg rhi),
 238                        ADC I32 (OpImm (ImmInteger q)) (OpReg rhi) ]
 239    -- in
 240    return (ChildCode64 code rlo)
 241
 242 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
 243    ChildCode64 code1 r1lo <- iselExpr64 e1
 244    ChildCode64 code2 r2lo <- iselExpr64 e2
 245    (rlo,rhi) <- getNewRegPairNat I32
 246    let
 247         r1hi = getHiVRegFromLo r1lo
 248         r2hi = getHiVRegFromLo r2lo
 249         code =  code1 `appOL`
 250                 code2 `appOL`
 251                 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
 252                        ADD I32 (OpReg r2lo) (OpReg rlo),
 253                        MOV I32 (OpReg r1hi) (OpReg rhi),
 254                        ADC I32 (OpReg r2hi) (OpReg rhi) ]
 255    -- in
 256    return (ChildCode64 code rlo)
 257
 258 iselExpr64 expr
 259    = pprPanic "iselExpr64(i386)" (ppr expr)
 260
 261 #endif /* i386_TARGET_ARCH */
 262
 263 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 264
 265 #if sparc_TARGET_ARCH
 266
 267 assignMem_I64Code addrTree valueTree
 268    = iselExpr64 valueTree               `thenNat` \ (ChildCode64 vcode vrlo) ->
 269      getRegister addrTree               `thenNat` \ register_addr ->
 270      getNewRegNat IntRep                `thenNat` \ t_addr ->
 271      let rlo = VirtualRegI vrlo
 272          rhi = getHiVRegFromLo rlo
 273          code_addr = registerCode register_addr t_addr
 274          reg_addr  = registerName register_addr t_addr
 275          -- Big-endian store
 276          mov_hi = ST W rhi (AddrRegImm reg_addr (ImmInt 0))
 277          mov_lo = ST W rlo (AddrRegImm reg_addr (ImmInt 4))
 278      in
 279          return (vcode `appOL` code_addr `snocOL` mov_hi `snocOL` mov_lo)
 280
 281
 282 assignReg_I64Code (StixTemp (StixVReg u_dst pk)) valueTree
 283    = iselExpr64 valueTree               `thenNat` \ (ChildCode64 vcode vr_src_lo) ->
 284      let
 285          r_dst_lo = mkVReg u_dst IntRep
 286          r_src_lo = VirtualRegI vr_src_lo
 287          r_dst_hi = getHiVRegFromLo r_dst_lo
 288          r_src_hi = getHiVRegFromLo r_src_lo
 289          mov_lo = mkMOV r_src_lo r_dst_lo
 290          mov_hi = mkMOV r_src_hi r_dst_hi
 291          mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
 292      in
 293          return (
 294             vcode `snocOL` mov_hi `snocOL` mov_lo
 295          )
 296 assignReg_I64Code lvalue valueTree
 297    = pprPanic "assignReg_I64Code(sparc): invalid lvalue"
 298               (pprStixReg lvalue)
 299
 300
 301 -- Don't delete this -- it's very handy for debugging.
 302 --iselExpr64 expr
 303 --   | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
 304 --   = panic "iselExpr64(???)"
 305
 306 iselExpr64 (CmmLoad I64 addrTree)
 307    = getRegister addrTree               `thenNat` \ register_addr ->
 308      getNewRegNat IntRep                `thenNat` \ t_addr ->
 309      getNewRegNat IntRep                `thenNat` \ rlo ->
 310      let rhi = getHiVRegFromLo rlo
 311          code_addr = registerCode register_addr t_addr
 312          reg_addr  = registerName register_addr t_addr
 313          mov_hi = LD W (AddrRegImm reg_addr (ImmInt 0)) rhi
 314          mov_lo = LD W (AddrRegImm reg_addr (ImmInt 4)) rlo
 315      in
 316          return (
 317             ChildCode64 (code_addr `snocOL` mov_hi `snocOL` mov_lo)
 318                         (getVRegUnique rlo)
 319          )
 320
 321 iselExpr64 (CmmReg (CmmLocal (LocalReg uq I64)))
 322    = getNewRegNat IntRep                `thenNat` \ r_dst_lo ->
 323      let r_dst_hi = getHiVRegFromLo r_dst_lo
 324          r_src_lo = mkVReg vu IntRep
 325          r_src_hi = getHiVRegFromLo r_src_lo
 326          mov_lo = mkMOV r_src_lo r_dst_lo
 327          mov_hi = mkMOV r_src_hi r_dst_hi
 328          mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
 329      in
 330          return (
 331             ChildCode64 (toOL [mov_hi, mov_lo]) (getVRegUnique r_dst_lo)
 332          )
 333
 334 iselExpr64 (StCall fn cconv I64 args)
 335   = genCCall fn cconv kind args                 `thenNat` \ call ->
 336     getNewRegNat IntRep                         `thenNat` \ r_dst_lo ->
 337     let r_dst_hi = getHiVRegFromLo r_dst_lo
 338         mov_lo = mkMOV o0 r_dst_lo
 339         mov_hi = mkMOV o1 r_dst_hi
 340         mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
 341     in
 342     return (
 343        ChildCode64 (call `snocOL` mov_hi `snocOL` mov_lo)
 344                    (getVRegUnique r_dst_lo)
 345     )
 346
 347 iselExpr64 expr
 348    = pprPanic "iselExpr64(sparc)" (pprCmmExpr expr)
 349
 350 #endif /* sparc_TARGET_ARCH */
 351
 352 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 353
 354 #if powerpc_TARGET_ARCH
 355
 356 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
 357 getI64Amodes addrTree = do
 358     Amode hi_addr addr_code <- getAmode addrTree
 359     case addrOffset hi_addr 4 of
 360         Just lo_addr -> return (hi_addr, lo_addr, addr_code)
 361         Nothing      -> do (hi_ptr, code) <- getSomeReg addrTree
 362                            return (AddrRegImm hi_ptr (ImmInt 0),
 363                                    AddrRegImm hi_ptr (ImmInt 4),
 364                                    code)
 365
 366 assignMem_I64Code addrTree valueTree = do
 367         (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
 368         ChildCode64 vcode rlo <- iselExpr64 valueTree
 369         let
 370                 rhi = getHiVRegFromLo rlo
 371
 372                 -- Big-endian store
 373                 mov_hi = ST I32 rhi hi_addr
 374                 mov_lo = ST I32 rlo lo_addr
 375         -- in
 376         return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 377
 378 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
 379    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 380    let
 381          r_dst_lo = mkVReg u_dst I32
 382          r_dst_hi = getHiVRegFromLo r_dst_lo
 383          r_src_hi = getHiVRegFromLo r_src_lo
 384          mov_lo = MR r_dst_lo r_src_lo
 385          mov_hi = MR r_dst_hi r_src_hi
 386    -- in
 387    return (
 388         vcode `snocOL` mov_lo `snocOL` mov_hi
 389      )
 390
 391 assignReg_I64Code lvalue valueTree
 392    = panic "assignReg_I64Code(powerpc): invalid lvalue"
 393
 394
 395 -- Don't delete this -- it's very handy for debugging.
 396 --iselExpr64 expr
 397 --   | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
 398 --   = panic "iselExpr64(???)"
 399
 400 iselExpr64 (CmmLoad addrTree I64) = do
 401     (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
 402     (rlo, rhi) <- getNewRegPairNat I32
 403     let mov_hi = LD I32 rhi hi_addr
 404         mov_lo = LD I32 rlo lo_addr
 405     return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
 406                          rlo
 407
 408 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64)))
 409    = return (ChildCode64 nilOL (mkVReg vu I32))
 410
 411 iselExpr64 (CmmLit (CmmInt i _)) = do
 412   (rlo,rhi) <- getNewRegPairNat I32
 413   let
 414         half0 = fromIntegral (fromIntegral i :: Word16)
 415         half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
 416         half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
 417         half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
 418
 419         code = toOL [
 420                 LIS rlo (ImmInt half1),
 421                 OR rlo rlo (RIImm $ ImmInt half0),
 422                 LIS rhi (ImmInt half3),
 423                 OR rlo rlo (RIImm $ ImmInt half2)
 424                 ]
 425   -- in
 426   return (ChildCode64 code rlo)
 427
 428 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
 429    ChildCode64 code1 r1lo <- iselExpr64 e1
 430    ChildCode64 code2 r2lo <- iselExpr64 e2
 431    (rlo,rhi) <- getNewRegPairNat I32
 432    let
 433         r1hi = getHiVRegFromLo r1lo
 434         r2hi = getHiVRegFromLo r2lo
 435         code =  code1 `appOL`
 436                 code2 `appOL`
 437                 toOL [ ADDC rlo r1lo r2lo,
 438                        ADDE rhi r1hi r2hi ]
 439    -- in
 440    return (ChildCode64 code rlo)
 441
 442 iselExpr64 expr
 443    = pprPanic "iselExpr64(powerpc)" (ppr expr)
 444
 445 #endif /* powerpc_TARGET_ARCH */
 446
 447
 448 -- -----------------------------------------------------------------------------
 449 -- The 'Register' type
 450
 451 -- 'Register's passed up the tree.  If the stix code forces the register
 452 -- to live in a pre-decided machine register, it comes out as @Fixed@;
 453 -- otherwise, it comes out as @Any@, and the parent can decide which
 454 -- register to put it in.
 455
 456 data Register
 457   = Fixed   MachRep Reg InstrBlock
 458   | Any     MachRep (Reg -> InstrBlock)
 459
 460 swizzleRegisterRep :: Register -> MachRep -> Register
 461 swizzleRegisterRep (Fixed _ reg code) rep = Fixed rep reg code
 462 swizzleRegisterRep (Any _ codefn)     rep = Any rep codefn
 463
 464
 465 -- -----------------------------------------------------------------------------
 466 -- Grab the Reg for a CmmReg
 467
 468 getRegisterReg :: CmmReg -> Reg
 469
 470 getRegisterReg (CmmLocal (LocalReg u pk))
 471   = mkVReg u pk
 472
 473 getRegisterReg (CmmGlobal mid)
 474   = case get_GlobalReg_reg_or_addr mid of
 475        Left (RealReg rrno) -> RealReg rrno
 476        _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
 477           -- By this stage, the only MagicIds remaining should be the
 478           -- ones which map to a real machine register on this
 479           -- platform.  Hence ...
 480
 481
 482 -- -----------------------------------------------------------------------------
 483 -- Generate code to get a subtree into a Register
 484
 485 -- Don't delete this -- it's very handy for debugging.
 486 --getRegister expr
 487 --   | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False
 488 --   = panic "getRegister(???)"
 489
 490 getRegister :: CmmExpr -> NatM Register
 491
 492 getRegister (CmmReg reg)
 493   = return (Fixed (cmmRegRep reg) (getRegisterReg reg) nilOL)
 494
 495 getRegister tree@(CmmRegOff _ _)
 496   = getRegister (mangleIndexTree tree)
 497
 498 getRegister CmmPicBaseReg
 499   = do
 500       reg <- getPicBaseNat wordRep
 501       return (Fixed wordRep reg nilOL)
 502
 503 -- end of machine-"independent" bit; here we go on the rest...
 504
 505 #if alpha_TARGET_ARCH
 506
 507 getRegister (StDouble d)
 508   = getBlockIdNat                   `thenNat` \ lbl ->
 509     getNewRegNat PtrRep             `thenNat` \ tmp ->
 510     let code dst = mkSeqInstrs [
 511             LDATA RoDataSegment lbl [
 512                     DATA TF [ImmLab (rational d)]
 513                 ],
 514             LDA tmp (AddrImm (ImmCLbl lbl)),
 515             LD TF dst (AddrReg tmp)]
 516     in
 517         return (Any F64 code)
 518
 519 getRegister (StPrim primop [x]) -- unary PrimOps
 520   = case primop of
 521       IntNegOp -> trivialUCode (NEG Q False) x
 522
 523       NotOp    -> trivialUCode NOT x
 524
 525       FloatNegOp  -> trivialUFCode FloatRep  (FNEG TF) x
 526       DoubleNegOp -> trivialUFCode F64 (FNEG TF) x
 527
 528       OrdOp -> coerceIntCode IntRep x
 529       ChrOp -> chrCode x
 530
 531       Float2IntOp  -> coerceFP2Int    x
 532       Int2FloatOp  -> coerceInt2FP pr x
 533       Double2IntOp -> coerceFP2Int    x
 534       Int2DoubleOp -> coerceInt2FP pr x
 535
 536       Double2FloatOp -> coerceFltCode x
 537       Float2DoubleOp -> coerceFltCode x
 538
 539       other_op -> getRegister (StCall fn CCallConv F64 [x])
 540         where
 541           fn = case other_op of
 542                  FloatExpOp    -> FSLIT("exp")
 543                  FloatLogOp    -> FSLIT("log")
 544                  FloatSqrtOp   -> FSLIT("sqrt")
 545                  FloatSinOp    -> FSLIT("sin")
 546                  FloatCosOp    -> FSLIT("cos")
 547                  FloatTanOp    -> FSLIT("tan")
 548                  FloatAsinOp   -> FSLIT("asin")
 549                  FloatAcosOp   -> FSLIT("acos")
 550                  FloatAtanOp   -> FSLIT("atan")
 551                  FloatSinhOp   -> FSLIT("sinh")
 552                  FloatCoshOp   -> FSLIT("cosh")
 553                  FloatTanhOp   -> FSLIT("tanh")
 554                  DoubleExpOp   -> FSLIT("exp")
 555                  DoubleLogOp   -> FSLIT("log")
 556                  DoubleSqrtOp  -> FSLIT("sqrt")
 557                  DoubleSinOp   -> FSLIT("sin")
 558                  DoubleCosOp   -> FSLIT("cos")
 559                  DoubleTanOp   -> FSLIT("tan")
 560                  DoubleAsinOp  -> FSLIT("asin")
 561                  DoubleAcosOp  -> FSLIT("acos")
 562                  DoubleAtanOp  -> FSLIT("atan")
 563                  DoubleSinhOp  -> FSLIT("sinh")
 564                  DoubleCoshOp  -> FSLIT("cosh")
 565                  DoubleTanhOp  -> FSLIT("tanh")
 566   where
 567     pr = panic "MachCode.getRegister: no primrep needed for Alpha"
 568
 569 getRegister (StPrim primop [x, y]) -- dyadic PrimOps
 570   = case primop of
 571       CharGtOp -> trivialCode (CMP LTT) y x
 572       CharGeOp -> trivialCode (CMP LE) y x
 573       CharEqOp -> trivialCode (CMP EQQ) x y
 574       CharNeOp -> int_NE_code x y
 575       CharLtOp -> trivialCode (CMP LTT) x y
 576       CharLeOp -> trivialCode (CMP LE) x y
 577
 578       IntGtOp  -> trivialCode (CMP LTT) y x
 579       IntGeOp  -> trivialCode (CMP LE) y x
 580       IntEqOp  -> trivialCode (CMP EQQ) x y
 581       IntNeOp  -> int_NE_code x y
 582       IntLtOp  -> trivialCode (CMP LTT) x y
 583       IntLeOp  -> trivialCode (CMP LE) x y
 584
 585       WordGtOp -> trivialCode (CMP ULT) y x
 586       WordGeOp -> trivialCode (CMP ULE) x y
 587       WordEqOp -> trivialCode (CMP EQQ)  x y
 588       WordNeOp -> int_NE_code x y
 589       WordLtOp -> trivialCode (CMP ULT) x y
 590       WordLeOp -> trivialCode (CMP ULE) x y
 591
 592       AddrGtOp -> trivialCode (CMP ULT) y x
 593       AddrGeOp -> trivialCode (CMP ULE) y x
 594       AddrEqOp -> trivialCode (CMP EQQ)  x y
 595       AddrNeOp -> int_NE_code x y
 596       AddrLtOp -> trivialCode (CMP ULT) x y
 597       AddrLeOp -> trivialCode (CMP ULE) x y
 598
 599       FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
 600       FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
 601       FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
 602       FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
 603       FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
 604       FloatLeOp -> cmpF_code (FCMP TF LE) NE x y
 605
 606       DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
 607       DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
 608       DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
 609       DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
 610       DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
 611       DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y
 612
 613       IntAddOp  -> trivialCode (ADD Q False) x y
 614       IntSubOp  -> trivialCode (SUB Q False) x y
 615       IntMulOp  -> trivialCode (MUL Q False) x y
 616       IntQuotOp -> trivialCode (DIV Q False) x y
 617       IntRemOp  -> trivialCode (REM Q False) x y
 618
 619       WordAddOp  -> trivialCode (ADD Q False) x y
 620       WordSubOp  -> trivialCode (SUB Q False) x y
 621       WordMulOp  -> trivialCode (MUL Q False) x y
 622       WordQuotOp -> trivialCode (DIV Q True) x y
 623       WordRemOp  -> trivialCode (REM Q True) x y
 624
 625       FloatAddOp -> trivialFCode  FloatRep (FADD TF) x y
 626       FloatSubOp -> trivialFCode  FloatRep (FSUB TF) x y
 627       FloatMulOp -> trivialFCode  FloatRep (FMUL TF) x y
 628       FloatDivOp -> trivialFCode  FloatRep (FDIV TF) x y
 629
 630       DoubleAddOp -> trivialFCode  F64 (FADD TF) x y
 631       DoubleSubOp -> trivialFCode  F64 (FSUB TF) x y
 632       DoubleMulOp -> trivialFCode  F64 (FMUL TF) x y
 633       DoubleDivOp -> trivialFCode  F64 (FDIV TF) x y
 634
 635       AddrAddOp  -> trivialCode (ADD Q False) x y
 636       AddrSubOp  -> trivialCode (SUB Q False) x y
 637       AddrRemOp  -> trivialCode (REM Q True) x y
 638
 639       AndOp  -> trivialCode AND x y
 640       OrOp   -> trivialCode OR  x y
 641       XorOp  -> trivialCode XOR x y
 642       SllOp  -> trivialCode SLL x y
 643       SrlOp  -> trivialCode SRL x y
 644
 645       ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
 646       ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
 647       ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"
 648
 649       FloatPowerOp  -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
 650       DoublePowerOp -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
 651   where
 652     {- ------------------------------------------------------------
 653         Some bizarre special code for getting condition codes into
 654         registers.  Integer non-equality is a test for equality
 655         followed by an XOR with 1.  (Integer comparisons always set
 656         the result register to 0 or 1.)  Floating point comparisons of
 657         any kind leave the result in a floating point register, so we
 658         need to wrangle an integer register out of things.
 659     -}
 660     int_NE_code :: StixTree -> StixTree -> NatM Register
 661
 662     int_NE_code x y
 663       = trivialCode (CMP EQQ) x y       `thenNat` \ register ->
 664         getNewRegNat IntRep             `thenNat` \ tmp ->
 665         let
 666             code = registerCode register tmp
 667             src  = registerName register tmp
 668             code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
 669         in
 670         return (Any IntRep code__2)
 671
 672     {- ------------------------------------------------------------
 673         Comments for int_NE_code also apply to cmpF_code
 674     -}
 675     cmpF_code
 676         :: (Reg -> Reg -> Reg -> Instr)
 677         -> Cond
 678         -> StixTree -> StixTree
 679         -> NatM Register
 680
 681     cmpF_code instr cond x y
 682       = trivialFCode pr instr x y       `thenNat` \ register ->
 683         getNewRegNat F64                `thenNat` \ tmp ->
 684         getBlockIdNat                   `thenNat` \ lbl ->
 685         let
 686             code = registerCode register tmp
 687             result  = registerName register tmp
 688
 689             code__2 dst = code . mkSeqInstrs [
 690                 OR zeroh (RIImm (ImmInt 1)) dst,
 691                 BF cond  result (ImmCLbl lbl),
 692                 OR zeroh (RIReg zeroh) dst,
 693                 NEWBLOCK lbl]
 694         in
 695         return (Any IntRep code__2)
 696       where
 697         pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
 698       ------------------------------------------------------------
 699
 700 getRegister (CmmLoad pk mem)
 701   = getAmode mem                    `thenNat` \ amode ->
 702     let
 703         code = amodeCode amode
 704         src   = amodeAddr amode
 705         size = primRepToSize pk
 706         code__2 dst = code . mkSeqInstr (LD size dst src)
 707     in
 708     return (Any pk code__2)
 709
 710 getRegister (StInt i)
 711   | fits8Bits i
 712   = let
 713         code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
 714     in
 715     return (Any IntRep code)
 716   | otherwise
 717   = let
 718         code dst = mkSeqInstr (LDI Q dst src)
 719     in
 720     return (Any IntRep code)
 721   where
 722     src = ImmInt (fromInteger i)
 723
 724 getRegister leaf
 725   | isJust imm
 726   = let
 727         code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
 728     in
 729     return (Any PtrRep code)
 730   where
 731     imm = maybeImm leaf
 732     imm__2 = case imm of Just x -> x
 733
 734 #endif /* alpha_TARGET_ARCH */
 735
 736 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 737
 738 #if i386_TARGET_ARCH
 739
 740 getRegister (CmmLit (CmmFloat f F32)) = do
 741     lbl <- getNewLabelNat
 742     let code dst = toOL [
 743             LDATA ReadOnlyData
 744                         [CmmDataLabel lbl,
 745                          CmmStaticLit (CmmFloat f F32)],
 746             GLD F32 (ImmAddr (ImmCLbl lbl) 0) dst
 747             ]
 748     -- in
 749     return (Any F32 code)
 750
 751
 752 getRegister (CmmLit (CmmFloat d F64))
 753   | d == 0.0
 754   = let code dst = unitOL (GLDZ dst)
 755     in  return (Any F64 code)
 756
 757   | d == 1.0
 758   = let code dst = unitOL (GLD1 dst)
 759     in  return (Any F64 code)
 760
 761   | otherwise = do
 762     lbl <- getNewLabelNat
 763     let code dst = toOL [
 764             LDATA ReadOnlyData
 765                         [CmmDataLabel lbl,
 766                          CmmStaticLit (CmmFloat d F64)],
 767             GLD F64 (ImmAddr (ImmCLbl lbl) 0) dst
 768             ]
 769     -- in
 770     return (Any F64 code)
 771
 772
 773 -- catch simple cases of zero- or sign-extended load
 774 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad addr _]) = do
 775   code <- intLoadCode (MOVZxL I8) addr
 776   return (Any I32 code)
 777
 778 getRegister (CmmMachOp (MO_S_Conv I8 I32) [CmmLoad addr _]) = do
 779   code <- intLoadCode (MOVSxL I8) addr
 780   return (Any I32 code)
 781
 782 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad addr _]) = do
 783   code <- intLoadCode (MOVZxL I16) addr
 784   return (Any I32 code)
 785
 786 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad addr _]) = do
 787   code <- intLoadCode (MOVSxL I16) addr
 788   return (Any I32 code)
 789
 790
 791 getRegister (CmmMachOp mop [x]) -- unary MachOps
 792   = case mop of
 793       MO_S_Neg F32 -> trivialUFCode F32 (GNEG F32) x
 794       MO_S_Neg F64 -> trivialUFCode F64 (GNEG F64) x
 795
 796       MO_S_Neg rep -> trivialUCode rep (NEGI rep) x
 797       MO_Not rep   -> trivialUCode rep (NOT  rep) x
 798
 799       -- Nop conversions
 800       -- TODO: these are only nops if the arg is not a fixed register that
 801       -- can't be byte-addressed.
 802       MO_U_Conv I32 I8  -> conversionNop I32 x
 803       MO_S_Conv I32 I8  -> conversionNop I32 x
 804       MO_U_Conv I16 I8  -> conversionNop I16 x
 805       MO_S_Conv I16 I8  -> conversionNop I16 x
 806       MO_U_Conv I32 I16 -> conversionNop I32 x
 807       MO_S_Conv I32 I16 -> conversionNop I32 x
 808       MO_U_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
 809       MO_S_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
 810
 811       -- widenings
 812       MO_U_Conv I8  I32 -> integerExtend I8  I32 MOVZxL x
 813       MO_U_Conv I16 I32 -> integerExtend I16 I32 MOVZxL x
 814       MO_U_Conv I8  I16 -> integerExtend I8  I16 MOVZxL x
 815
 816       MO_S_Conv I8  I32 -> integerExtend I8  I32 MOVSxL x
 817       MO_S_Conv I16 I32 -> integerExtend I16 I32 MOVSxL x
 818       MO_S_Conv I8  I16 -> integerExtend I8  I16 MOVSxL x
 819
 820       MO_S_Conv F32 F64 -> conversionNop F64 x
 821       MO_S_Conv F64 F32 -> conversionNop F32 x
 822       MO_S_Conv from to
 823         | isFloatingRep from -> coerceFP2Int from to x
 824         | isFloatingRep to   -> coerceInt2FP from to x
 825
 826    where
 827         -- signed or unsigned extension.
 828         integerExtend from to instr expr = do
 829             (reg,e_code) <- if from == I8 then getByteReg expr
 830                                           else getSomeReg expr
 831             let
 832                 code dst =
 833                   e_code `snocOL`
 834                   instr from (OpReg reg) (OpReg dst)
 835             return (Any to code)
 836
 837         conversionNop new_rep expr
 838             = do e_code <- getRegister expr
 839                  return (swizzleRegisterRep e_code new_rep)
 840
 841
 842 getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
 843   = ASSERT2(cmmExprRep x /= I8, pprExpr e)
 844     case mop of
 845       MO_Eq F32   -> condFltReg EQQ x y
 846       MO_Ne F32   -> condFltReg NE x y
 847       MO_S_Gt F32 -> condFltReg GTT x y
 848       MO_S_Ge F32 -> condFltReg GE x y
 849       MO_S_Lt F32 -> condFltReg LTT x y
 850       MO_S_Le F32 -> condFltReg LE x y
 851
 852       MO_Eq F64   -> condFltReg EQQ x y
 853       MO_Ne F64   -> condFltReg NE x y
 854       MO_S_Gt F64 -> condFltReg GTT x y
 855       MO_S_Ge F64 -> condFltReg GE x y
 856       MO_S_Lt F64 -> condFltReg LTT x y
 857       MO_S_Le F64 -> condFltReg LE x y
 858
 859       MO_Eq rep   -> condIntReg EQQ x y
 860       MO_Ne rep   -> condIntReg NE x y
 861
 862       MO_S_Gt rep -> condIntReg GTT x y
 863       MO_S_Ge rep -> condIntReg GE x y
 864       MO_S_Lt rep -> condIntReg LTT x y
 865       MO_S_Le rep -> condIntReg LE x y
 866
 867       MO_U_Gt rep -> condIntReg GU  x y
 868       MO_U_Ge rep -> condIntReg GEU x y
 869       MO_U_Lt rep -> condIntReg LU  x y
 870       MO_U_Le rep -> condIntReg LEU x y
 871
 872       MO_Add F32 -> trivialFCode F32  GADD x y
 873       MO_Sub F32 -> trivialFCode F32  GSUB x y
 874
 875       MO_Add F64 -> trivialFCode F64 GADD x y
 876       MO_Sub F64 -> trivialFCode F64 GSUB x y
 877
 878       MO_S_Quot F32 -> trivialFCode  F32  GDIV x y
 879       MO_S_Quot F64 -> trivialFCode F64 GDIV x y
 880
 881       MO_Add rep -> add_code rep x y
 882       MO_Sub rep -> sub_code rep x y
 883
 884       MO_S_Quot rep -> div_code rep True  True  x y
 885       MO_S_Rem  rep -> div_code rep True  False x y
 886       MO_U_Quot rep -> div_code rep False True  x y
 887       MO_U_Rem  rep -> div_code rep False False x y
 888
 889       MO_Mul   F32 -> trivialFCode F32 GMUL x y
 890       MO_Mul   F64 -> trivialFCode F64 GMUL x y
 891       MO_Mul   rep -> let op = IMUL rep in
 892                       trivialCode rep op (Just op) x y
 893
 894       MO_S_MulMayOflo rep -> imulMayOflo rep x y
 895
 896       MO_And rep -> let op = AND rep in
 897                     trivialCode rep op (Just op) x y
 898       MO_Or  rep -> let op = OR  rep in
 899                     trivialCode rep op (Just op) x y
 900       MO_Xor rep -> let op = XOR rep in
 901                     trivialCode rep op (Just op) x y
 902
 903         {- Shift ops on x86s have constraints on their source, it
 904            either has to be Imm, CL or 1
 905             => trivialCode is not restrictive enough (sigh.)
 906         -}
 907       MO_Shl rep   -> shift_code rep (SHL rep) x y {-False-}
 908       MO_U_Shr rep -> shift_code rep (SHR rep) x y {-False-}
 909       MO_S_Shr rep -> shift_code rep (SAR rep) x y {-False-}
 910
 911       other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
 912   where
 913     --------------------
 914     imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
 915     imulMayOflo I32 a b = do
 916          res_lo <- getNewRegNat I32
 917          res_hi <- getNewRegNat I32
 918          (a_reg, a_code) <- getNonClobberedReg a
 919          (b_reg, b_code) <- getSomeReg   b
 920          let
 921              code dst = a_code `appOL` b_code `appOL`
 922                         toOL [
 923                            MOV I32 (OpReg a_reg) (OpReg res_hi),
 924                            MOV I32 (OpReg b_reg) (OpReg res_lo),
 925                            IMUL64 res_hi res_lo,                -- result in res_hi:res_lo
 926                            SAR I32 (OpImm (ImmInt 31)) (OpReg res_lo),  -- sign extend lower part
 927                            SUB I32 (OpReg res_hi) (OpReg res_lo),       -- compare against upper
 928                            MOV I32 (OpReg res_lo) (OpReg dst)
 929                            -- dst==0 if high part == sign extended low part
 930                         ]
 931          -- in
 932          return (Any I32 code)
 933
 934     --------------------
 935     shift_code :: MachRep
 936                -> (Operand -> Operand -> Instr)
 937                -> CmmExpr
 938                -> CmmExpr
 939                -> NatM Register
 940
 941     {- Case1: shift length as immediate -}
 942     shift_code rep instr x y@(CmmLit lit) = do
 943           x_code <- getAnyReg x
 944           let
 945                code dst
 946                   = x_code dst `snocOL`
 947                     instr (OpImm (litToImm lit)) (OpReg dst)
 948           -- in
 949           return (Any rep code)
 950
 951     {- Case2: shift length is complex (non-immediate) -}
 952     shift_code rep instr x y{-amount-} = do
 953         (x_reg, x_code) <- getNonClobberedReg x
 954         y_code <- getAnyReg y
 955         let
 956            code = x_code `appOL`
 957                   y_code ecx `snocOL`
 958                   instr (OpReg ecx) (OpReg x_reg)
 959         -- in
 960         return (Fixed rep x_reg code)
 961
 962     --------------------
 963     add_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
 964     add_code rep x (CmmLit (CmmInt y _)) = add_int rep x y
 965     add_code rep x y = trivialCode rep (ADD rep) (Just (ADD rep)) x y
 966
 967     --------------------
 968     sub_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
 969     sub_code rep x (CmmLit (CmmInt y _)) = add_int rep x (-y)
 970     sub_code rep x y = trivialCode rep (SUB rep) Nothing x y
 971
 972     -- our three-operand add instruction:
 973     add_int rep x y = do
 974         (x_reg, x_code) <- getSomeReg x
 975         let
 976             imm = ImmInt (fromInteger y)
 977             code dst
 978                = x_code `snocOL`
 979                  LEA rep
 980                         (OpAddr (AddrBaseIndex (Just x_reg) Nothing imm))
 981                         (OpReg dst)
 982         --
 983         return (Any rep code)
 984
 985     ----------------------
 986     div_code rep signed quotient x y = do
 987            (y_op, y_code) <- getOperand y -- cannot be clobbered
 988            x_code <- getAnyReg x
 989            let
 990              widen | signed    = CLTD
 991                    | otherwise = XOR rep (OpReg edx) (OpReg edx)
 992
 993              instr | signed    = IDIV
 994                    | otherwise = DIV
 995
 996              code = y_code `appOL`
 997                     x_code eax `appOL`
 998                     toOL [widen, instr rep y_op]
 999
1000              result | quotient  = eax
1001                     | otherwise = edx
1002
1003            -- in
1004            return (Fixed rep result code)
1005
1006
1007
1008 getRegister (CmmLoad mem pk)
1009   | isFloatingRep pk
1010   = do
1011     Amode src mem_code <- getAmode mem
1012     let
1013         code dst = mem_code `snocOL`
1014                    GLD pk src dst
1015     --
1016     return (Any pk code)
1017
1018 getRegister (CmmLoad mem pk)
1019   | pk /= I64
1020   = do
1021     code <- intLoadCode (instr pk) mem
1022     return (Any pk code)
1023   where
1024         instr I8  = MOVZxL pk
1025         instr I16 = MOV I16
1026         instr I32 = MOV I32
1027         -- we always zero-extend 8-bit loads, if we
1028         -- can't think of anything better.  This is because
1029         -- we can't guarantee access to an 8-bit variant of every register
1030         -- (esi and edi don't have 8-bit variants), so to make things
1031         -- simpler we do our 8-bit arithmetic with full 32-bit registers.
1032
1033 getRegister (CmmLit (CmmInt 0 rep))
1034   = let
1035         code dst
1036            = unitOL (XOR rep (OpReg dst) (OpReg dst))
1037     in
1038         return (Any rep code)
1039
1040 getRegister (CmmLit lit)
1041   = let
1042         rep = cmmLitRep lit
1043         imm = litToImm lit
1044         code dst = unitOL (MOV rep (OpImm imm) (OpReg dst))
1045     in
1046         return (Any rep code)
1047
1048 getRegister other = panic "getRegister(x86)"
1049
1050
1051 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
1052    -> NatM (Reg -> InstrBlock)
1053 intLoadCode instr mem = do
1054   Amode src mem_code <- getAmode mem
1055   return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
1056
1057 -- Compute an expression into *any* register, adding the appropriate
1058 -- move instruction if necessary.
1059 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
1060 getAnyReg expr = do
1061   r <- getRegister expr
1062   anyReg r
1063
1064 anyReg :: Register -> NatM (Reg -> InstrBlock)
1065 anyReg (Any _ code)          = return code
1066 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
1067
1068 -- The dual to getAnyReg: compute an expression into a register, but
1069 -- we don't mind which one it is.
1070 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
1071 getSomeReg expr = do
1072   r <- getRegister expr
1073   case r of
1074     Any rep code -> do
1075         tmp <- getNewRegNat rep
1076         return (tmp, code tmp)
1077     Fixed _ reg code ->
1078         return (reg, code)
1079
1080 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
1081 -- Fixed registers might not be byte-addressable, so we make sure we've
1082 -- got a temporary, inserting an extra reg copy if necessary.
1083 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
1084 getByteReg expr = do
1085   r <- getRegister expr
1086   case r of
1087     Any rep code -> do
1088         tmp <- getNewRegNat rep
1089         return (tmp, code tmp)
1090     Fixed rep reg code
1091         | isVirtualReg reg -> return (reg,code)
1092         | otherwise -> do
1093             tmp <- getNewRegNat rep
1094             return (tmp, code `snocOL` reg2reg rep reg tmp)
1095         -- ToDo: could optimise slightly by checking for byte-addressable
1096         -- real registers, but that will happen very rarely if at all.
1097
1098 -- Another variant: this time we want the result in a register that cannot
1099 -- be modified by code to evaluate an arbitrary expression.
1100 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1101 getNonClobberedReg expr = do
1102   r <- getRegister expr
1103   case r of
1104     Any rep code -> do
1105         tmp <- getNewRegNat rep
1106         return (tmp, code tmp)
1107     Fixed rep reg code
1108         -- only free regs can be clobbered
1109         | RealReg rr <- reg, isFastTrue (freeReg rr) -> do
1110                 tmp <- getNewRegNat rep
1111                 return (tmp, code `snocOL` reg2reg rep reg tmp)
1112         | otherwise ->
1113                 return (reg, code)
1114
1115 reg2reg :: MachRep -> Reg -> Reg -> Instr
1116 reg2reg rep src dst
1117   | isFloatingRep rep = GMOV src dst
1118   | otherwise         = MOV rep (OpReg src) (OpReg dst)
1119
1120 #endif /* i386_TARGET_ARCH */
1121
1122 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1123
1124 #if sparc_TARGET_ARCH
1125
1126 getRegister (StFloat d)
1127   = getBlockIdNat                   `thenNat` \ lbl ->
1128     getNewRegNat PtrRep             `thenNat` \ tmp ->
1129     let code dst = toOL [
1130             SEGMENT DataSegment,
1131             NEWBLOCK lbl,
1132             DATA F [ImmFloat d],
1133             SEGMENT TextSegment,
1134             SETHI (HI (ImmCLbl lbl)) tmp,
1135             LD F (AddrRegImm tmp (LO (ImmCLbl lbl))) dst]
1136     in
1137         return (Any F32 code)
1138
1139 getRegister (StDouble d)
1140   = getBlockIdNat                   `thenNat` \ lbl ->
1141     getNewRegNat PtrRep             `thenNat` \ tmp ->
1142     let code dst = toOL [
1143             SEGMENT DataSegment,
1144             NEWBLOCK lbl,
1145             DATA DF [ImmDouble d],
1146             SEGMENT TextSegment,
1147             SETHI (HI (ImmCLbl lbl)) tmp,
1148             LD DF (AddrRegImm tmp (LO (ImmCLbl lbl))) dst]
1149     in
1150         return (Any F64 code)
1151
1152
1153 getRegister (CmmMachOp mop [x]) -- unary PrimOps
1154   = case mop of
1155       MO_NatS_Neg      -> trivialUCode (SUB False False g0) x
1156       MO_Nat_Not       -> trivialUCode (XNOR False g0) x
1157       MO_32U_to_8U     -> trivialCode (AND False) x (StInt 255)
1158
1159       MO_F32_Neg       -> trivialUFCode F32 (FNEG F) x
1160       MO_F64_Neg       -> trivialUFCode F64 (FNEG DF) x
1161
1162       MO_F64_to_Flt    -> coerceDbl2Flt x
1163       MO_F32_to_Dbl    -> coerceFlt2Dbl x
1164
1165       MO_F32_to_NatS   -> coerceFP2Int F32 x
1166       MO_NatS_to_Flt   -> coerceInt2FP F32 x
1167       MO_F64_to_NatS   -> coerceFP2Int F64 x
1168       MO_NatS_to_Dbl   -> coerceInt2FP F64 x
1169
1170       -- Conversions which are a nop on sparc
1171       MO_32U_to_NatS   -> conversionNop IntRep   x
1172       MO_32S_to_NatS  -> conversionNop IntRep   x
1173       MO_NatS_to_32U   -> conversionNop WordRep  x
1174       MO_32U_to_NatU   -> conversionNop WordRep  x
1175
1176       MO_NatU_to_NatS -> conversionNop IntRep    x
1177       MO_NatS_to_NatU -> conversionNop WordRep   x
1178       MO_NatP_to_NatU -> conversionNop WordRep   x
1179       MO_NatU_to_NatP -> conversionNop PtrRep    x
1180       MO_NatS_to_NatP -> conversionNop PtrRep    x
1181       MO_NatP_to_NatS -> conversionNop IntRep    x
1182
1183       -- sign-extending widenings
1184       MO_8U_to_32U    -> integerExtend False 24 x
1185       MO_8U_to_NatU   -> integerExtend False 24 x
1186       MO_8S_to_NatS   -> integerExtend True  24 x
1187       MO_16U_to_NatU  -> integerExtend False 16 x
1188       MO_16S_to_NatS  -> integerExtend True  16 x
1189
1190       other_op ->
1191         let fixed_x = if   is_float_op  -- promote to double
1192                       then CmmMachOp MO_F32_to_Dbl [x]
1193                       else x
1194         in
1195         getRegister (StCall (Left fn) CCallConv F64 [fixed_x])
1196     where
1197         integerExtend signed nBits x
1198            = getRegister (
1199                 CmmMachOp (if signed then MO_Nat_Sar else MO_Nat_Shr)
1200                          [CmmMachOp MO_Nat_Shl [x, StInt nBits], StInt nBits]
1201              )
1202         conversionNop new_rep expr
1203             = getRegister expr          `thenNat` \ e_code ->
1204               return (swizzleRegisterRep e_code new_rep)
1205
1206         (is_float_op, fn)
1207           = case mop of
1208               MO_F32_Exp    -> (True,  FSLIT("exp"))
1209               MO_F32_Log    -> (True,  FSLIT("log"))
1210               MO_F32_Sqrt   -> (True,  FSLIT("sqrt"))
1211
1212               MO_F32_Sin    -> (True,  FSLIT("sin"))
1213               MO_F32_Cos    -> (True,  FSLIT("cos"))
1214               MO_F32_Tan    -> (True,  FSLIT("tan"))
1215
1216               MO_F32_Asin   -> (True,  FSLIT("asin"))
1217               MO_F32_Acos   -> (True,  FSLIT("acos"))
1218               MO_F32_Atan   -> (True,  FSLIT("atan"))
1219
1220               MO_F32_Sinh   -> (True,  FSLIT("sinh"))
1221               MO_F32_Cosh   -> (True,  FSLIT("cosh"))
1222               MO_F32_Tanh   -> (True,  FSLIT("tanh"))
1223
1224               MO_F64_Exp    -> (False, FSLIT("exp"))
1225               MO_F64_Log    -> (False, FSLIT("log"))
1226               MO_F64_Sqrt   -> (False, FSLIT("sqrt"))
1227
1228               MO_F64_Sin    -> (False, FSLIT("sin"))
1229               MO_F64_Cos    -> (False, FSLIT("cos"))
1230               MO_F64_Tan    -> (False, FSLIT("tan"))
1231
1232               MO_F64_Asin   -> (False, FSLIT("asin"))
1233               MO_F64_Acos   -> (False, FSLIT("acos"))
1234               MO_F64_Atan   -> (False, FSLIT("atan"))
1235
1236               MO_F64_Sinh   -> (False, FSLIT("sinh"))
1237               MO_F64_Cosh   -> (False, FSLIT("cosh"))
1238               MO_F64_Tanh   -> (False, FSLIT("tanh"))
1239
1240               other -> pprPanic "getRegister(sparc) - binary CmmMachOp (2)"
1241                                 (pprMachOp mop)
1242
1243
1244 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1245   = case mop of
1246       MO_32U_Gt  -> condIntReg GTT x y
1247       MO_32U_Ge  -> condIntReg GE x y
1248       MO_32U_Eq  -> condIntReg EQQ x y
1249       MO_32U_Ne  -> condIntReg NE x y
1250       MO_32U_Lt  -> condIntReg LTT x y
1251       MO_32U_Le  -> condIntReg LE x y
1252
1253       MO_Nat_Eq   -> condIntReg EQQ x y
1254       MO_Nat_Ne   -> condIntReg NE x y
1255
1256       MO_NatS_Gt  -> condIntReg GTT x y
1257       MO_NatS_Ge  -> condIntReg GE x y
1258       MO_NatS_Lt  -> condIntReg LTT x y
1259       MO_NatS_Le  -> condIntReg LE x y
1260
1261       MO_NatU_Gt  -> condIntReg GU  x y
1262       MO_NatU_Ge  -> condIntReg GEU x y
1263       MO_NatU_Lt  -> condIntReg LU  x y
1264       MO_NatU_Le  -> condIntReg LEU x y
1265
1266       MO_F32_Gt -> condFltReg GTT x y
1267       MO_F32_Ge -> condFltReg GE x y
1268       MO_F32_Eq -> condFltReg EQQ x y
1269       MO_F32_Ne -> condFltReg NE x y
1270       MO_F32_Lt -> condFltReg LTT x y
1271       MO_F32_Le -> condFltReg LE x y
1272
1273       MO_F64_Gt -> condFltReg GTT x y
1274       MO_F64_Ge -> condFltReg GE x y
1275       MO_F64_Eq -> condFltReg EQQ x y
1276       MO_F64_Ne -> condFltReg NE x y
1277       MO_F64_Lt -> condFltReg LTT x y
1278       MO_F64_Le -> condFltReg LE x y
1279
1280       MO_Nat_Add -> trivialCode (ADD False False) x y
1281       MO_Nat_Sub -> trivialCode (SUB False False) x y
1282
1283       MO_NatS_Mul  -> trivialCode (SMUL False) x y
1284       MO_NatU_Mul  -> trivialCode (UMUL False) x y
1285       MO_NatS_MulMayOflo -> imulMayOflo x y
1286
1287       -- ToDo: teach about V8+ SPARC div instructions
1288       MO_NatS_Quot -> idiv FSLIT(".div")  x y
1289       MO_NatS_Rem  -> idiv FSLIT(".rem")  x y
1290       MO_NatU_Quot -> idiv FSLIT(".udiv")  x y
1291       MO_NatU_Rem  -> idiv FSLIT(".urem")  x y
1292
1293       MO_F32_Add   -> trivialFCode F32  FADD x y
1294       MO_F32_Sub   -> trivialFCode F32  FSUB x y
1295       MO_F32_Mul   -> trivialFCode F32  FMUL x y
1296       MO_F32_Div   -> trivialFCode F32  FDIV x y
1297
1298       MO_F64_Add   -> trivialFCode F64 FADD x y
1299       MO_F64_Sub   -> trivialFCode F64 FSUB x y
1300       MO_F64_Mul   -> trivialFCode F64 FMUL x y
1301       MO_F64_Div   -> trivialFCode F64 FDIV x y
1302
1303       MO_Nat_And   -> trivialCode (AND False) x y
1304       MO_Nat_Or    -> trivialCode (OR  False) x y
1305       MO_Nat_Xor   -> trivialCode (XOR False) x y
1306
1307       MO_Nat_Shl   -> trivialCode SLL x y
1308       MO_Nat_Shr   -> trivialCode SRL x y
1309       MO_Nat_Sar   -> trivialCode SRA x y
1310
1311       MO_F32_Pwr  -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1312                                          [promote x, promote y])
1313                        where promote x = CmmMachOp MO_F32_to_Dbl [x]
1314       MO_F64_Pwr -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1315                                         [x, y])
1316
1317       other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
1318   where
1319     idiv fn x y = getRegister (StCall (Left fn) CCallConv IntRep [x, y])
1320
1321     --------------------
1322     imulMayOflo :: CmmExpr -> CmmExpr -> NatM Register
1323     imulMayOflo a1 a2
1324        = getNewRegNat IntRep            `thenNat` \ t1 ->
1325          getNewRegNat IntRep            `thenNat` \ t2 ->
1326          getNewRegNat IntRep            `thenNat` \ res_lo ->
1327          getNewRegNat IntRep            `thenNat` \ res_hi ->
1328          getRegister a1                 `thenNat` \ reg1 ->
1329          getRegister a2                 `thenNat` \ reg2 ->
1330          let code1 = registerCode reg1 t1
1331              code2 = registerCode reg2 t2
1332              src1  = registerName reg1 t1
1333              src2  = registerName reg2 t2
1334              code dst = code1 `appOL` code2 `appOL`
1335                         toOL [
1336                            SMUL False src1 (RIReg src2) res_lo,
1337                            RDY res_hi,
1338                            SRA res_lo (RIImm (ImmInt 31)) res_lo,
1339                            SUB False False res_lo (RIReg res_hi) dst
1340                         ]
1341          in
1342             return (Any IntRep code)
1343
1344 getRegister (CmmLoad pk mem) = do
1345     Amode src code <- getAmode mem
1346     let
1347         size = primRepToSize pk
1348         code__2 dst = code `snocOL` LD size src dst
1349     --
1350     return (Any pk code__2)
1351
1352 getRegister (StInt i)
1353   | fits13Bits i
1354   = let
1355         src = ImmInt (fromInteger i)
1356         code dst = unitOL (OR False g0 (RIImm src) dst)
1357     in
1358         return (Any IntRep code)
1359
1360 getRegister leaf
1361   | isJust imm
1362   = let
1363         code dst = toOL [
1364             SETHI (HI imm__2) dst,
1365             OR False dst (RIImm (LO imm__2)) dst]
1366     in
1367         return (Any PtrRep code)
1368   | otherwise
1369   = ncgPrimopMoan "getRegister(sparc)" (pprCmmExpr leaf)
1370   where
1371     imm = maybeImm leaf
1372     imm__2 = case imm of Just x -> x
1373
1374 #endif /* sparc_TARGET_ARCH */
1375
1376 #if powerpc_TARGET_ARCH
1377 getRegister (CmmLoad mem pk)
1378   | pk /= I64
1379   = do
1380         Amode addr addr_code <- getAmode mem
1381         let code dst = ASSERT((regClass dst == RcDouble) == isFloatingRep pk)
1382                        addr_code `snocOL` LD pk dst addr
1383         return (Any pk code)
1384
1385 -- catch simple cases of zero- or sign-extended load
1386 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad mem _]) = do
1387     Amode addr addr_code <- getAmode mem
1388     return (Any I32 (\dst -> addr_code `snocOL` LD I8 dst addr))
1389
1390 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
1391
1392 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad mem _]) = do
1393     Amode addr addr_code <- getAmode mem
1394     return (Any I32 (\dst -> addr_code `snocOL` LD I16 dst addr))
1395
1396 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad mem _]) = do
1397     Amode addr addr_code <- getAmode mem
1398     return (Any I32 (\dst -> addr_code `snocOL` LA I16 dst addr))
1399
1400 getRegister (CmmMachOp mop [x]) -- unary MachOps
1401   = case mop of
1402       MO_Not rep   -> trivialUCode rep NOT x
1403
1404       MO_S_Conv F64 F32 -> trivialUCode F32 FRSP x
1405       MO_S_Conv F32 F64 -> conversionNop F64 x
1406
1407       MO_S_Conv from to
1408         | from == to         -> conversionNop to x
1409         | isFloatingRep from -> coerceFP2Int from to x
1410         | isFloatingRep to   -> coerceInt2FP from to x
1411
1412         -- narrowing is a nop: we treat the high bits as undefined
1413       MO_S_Conv I32 to -> conversionNop to x
1414       MO_S_Conv I16 I8 -> conversionNop I8 x
1415       MO_S_Conv I8 to -> trivialUCode to (EXTS I8) x
1416       MO_S_Conv I16 to -> trivialUCode to (EXTS I16) x
1417
1418       MO_U_Conv from to
1419         | from == to -> conversionNop to x
1420         -- narrowing is a nop: we treat the high bits as undefined
1421       MO_U_Conv I32 to -> conversionNop to x
1422       MO_U_Conv I16 I8 -> conversionNop I8 x
1423       MO_U_Conv I8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 I32))
1424       MO_U_Conv I16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 I32))
1425
1426       MO_S_Neg F32      -> trivialUCode F32 FNEG x
1427       MO_S_Neg F64      -> trivialUCode F64 FNEG x
1428       MO_S_Neg rep      -> trivialUCode rep NEG x
1429
1430     where
1431         conversionNop new_rep expr
1432             = do e_code <- getRegister expr
1433                  return (swizzleRegisterRep e_code new_rep)
1434
1435 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1436   = case mop of
1437       MO_Eq F32 -> condFltReg EQQ x y
1438       MO_Ne F32 -> condFltReg NE  x y
1439
1440       MO_S_Gt F32 -> condFltReg GTT x y
1441       MO_S_Ge F32 -> condFltReg GE  x y
1442       MO_S_Lt F32 -> condFltReg LTT x y
1443       MO_S_Le F32 -> condFltReg LE  x y
1444
1445       MO_Eq F64 -> condFltReg EQQ x y
1446       MO_Ne F64 -> condFltReg NE  x y
1447
1448       MO_S_Gt F64 -> condFltReg GTT x y
1449       MO_S_Ge F64 -> condFltReg GE  x y
1450       MO_S_Lt F64 -> condFltReg LTT x y
1451       MO_S_Le F64 -> condFltReg LE  x y
1452
1453       MO_Eq rep -> condIntReg EQQ  (extendUExpr rep x) (extendUExpr rep y)
1454       MO_Ne rep -> condIntReg NE   (extendUExpr rep x) (extendUExpr rep y)
1455
1456       MO_S_Gt rep -> condIntReg GTT  (extendSExpr rep x) (extendSExpr rep y)
1457       MO_S_Ge rep -> condIntReg GE   (extendSExpr rep x) (extendSExpr rep y)
1458       MO_S_Lt rep -> condIntReg LTT  (extendSExpr rep x) (extendSExpr rep y)
1459       MO_S_Le rep -> condIntReg LE   (extendSExpr rep x) (extendSExpr rep y)
1460
1461       MO_U_Gt rep -> condIntReg GU   (extendUExpr rep x) (extendUExpr rep y)
1462       MO_U_Ge rep -> condIntReg GEU  (extendUExpr rep x) (extendUExpr rep y)
1463       MO_U_Lt rep -> condIntReg LU   (extendUExpr rep x) (extendUExpr rep y)
1464       MO_U_Le rep -> condIntReg LEU  (extendUExpr rep x) (extendUExpr rep y)
1465
1466       MO_Add F32   -> trivialCodeNoImm F32 (FADD F32) x y
1467       MO_Sub F32   -> trivialCodeNoImm F32 (FSUB F32) x y
1468       MO_Mul F32   -> trivialCodeNoImm F32 (FMUL F32) x y
1469       MO_S_Quot F32   -> trivialCodeNoImm F32 (FDIV F32) x y
1470
1471       MO_Add F64   -> trivialCodeNoImm F64 (FADD F64) x y
1472       MO_Sub F64   -> trivialCodeNoImm F64 (FSUB F64) x y
1473       MO_Mul F64   -> trivialCodeNoImm F64 (FMUL F64) x y
1474       MO_S_Quot F64   -> trivialCodeNoImm F64 (FDIV F64) x y
1475
1476          -- optimize addition with 32-bit immediate
1477          -- (needed for PIC)
1478       MO_Add I32 ->
1479         case y of
1480           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate I32 True (-imm)
1481             -> trivialCode I32 True ADD x (CmmLit $ CmmInt imm immrep)
1482           CmmLit lit
1483             -> do
1484                 (src, srcCode) <- getSomeReg x
1485                 let imm = litToImm lit
1486                     code dst = srcCode `appOL` toOL [
1487                                     ADDIS dst src (HA imm),
1488                                     ADD dst dst (RIImm (LO imm))
1489                                 ]
1490                 return (Any I32 code)
1491           _ -> trivialCode I32 True ADD x y
1492
1493       MO_Add rep -> trivialCode rep True ADD x y
1494       MO_Sub rep ->
1495         case y of    -- subfi ('substract from' with immediate) doesn't exist
1496           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
1497             -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
1498           _ -> trivialCodeNoImm rep SUBF y x
1499
1500       MO_Mul rep -> trivialCode rep True MULLW x y
1501
1502       MO_S_MulMayOflo I32 -> trivialCodeNoImm I32 MULLW_MayOflo x y
1503
1504       MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= I32): not implemented"
1505       MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
1506
1507       MO_S_Quot rep -> trivialCodeNoImm rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1508       MO_U_Quot rep -> trivialCodeNoImm rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1509
1510       MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1511       MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1512
1513       MO_And rep   -> trivialCode rep False AND x y
1514       MO_Or rep    -> trivialCode rep False OR x y
1515       MO_Xor rep   -> trivialCode rep False XOR x y
1516
1517       MO_Shl rep   -> trivialCode rep False SLW x y
1518       MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
1519       MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
1520
1521 getRegister (CmmLit (CmmInt i rep))
1522   | Just imm <- makeImmediate rep True i
1523   = let
1524         code dst = unitOL (LI dst imm)
1525     in
1526         return (Any rep code)
1527
1528 getRegister (CmmLit (CmmFloat f frep)) = do
1529     lbl <- getNewLabelNat
1530     dynRef <- cmmMakeDynamicReference addImportNat False lbl
1531     Amode addr addr_code <- getAmode dynRef
1532     let code dst =
1533             LDATA ReadOnlyData  [CmmDataLabel lbl,
1534                                  CmmStaticLit (CmmFloat f frep)]
1535             `consOL` (addr_code `snocOL` LD frep dst addr)
1536     return (Any frep code)
1537
1538 getRegister (CmmLit lit)
1539   = let rep = cmmLitRep lit
1540         imm = litToImm lit
1541         code dst = toOL [
1542               LIS dst (HI imm),
1543               OR dst dst (RIImm (LO imm))
1544           ]
1545     in return (Any rep code)
1546
1547 getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
1548
1549     -- extend?Rep: wrap integer expression of type rep
1550     -- in a conversion to I32
1551 extendSExpr I32 x = x
1552 extendSExpr rep x = CmmMachOp (MO_S_Conv rep I32) [x]
1553 extendUExpr I32 x = x
1554 extendUExpr rep x = CmmMachOp (MO_U_Conv rep I32) [x]
1555
1556 -- ###FIXME: exact code duplication from x86 case
1557 -- The dual to getAnyReg: compute an expression into a register, but
1558 -- we don't mind which one it is.
1559 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
1560 getSomeReg expr = do
1561   r <- getRegister expr
1562   case r of
1563     Any rep code -> do
1564         tmp <- getNewRegNat rep
1565         return (tmp, code tmp)
1566     Fixed _ reg code ->
1567         return (reg, code)
1568
1569 #endif /* powerpc_TARGET_ARCH */
1570
1571
1572 -- -----------------------------------------------------------------------------
1573 --  The 'Amode' type: Memory addressing modes passed up the tree.
1574
1575 data Amode = Amode AddrMode InstrBlock
1576
1577 {-
1578 Now, given a tree (the argument to an CmmLoad) that references memory,
1579 produce a suitable addressing mode.
1580
1581 A Rule of the Game (tm) for Amodes: use of the addr bit must
1582 immediately follow use of the code part, since the code part puts
1583 values in registers which the addr then refers to.  So you can't put
1584 anything in between, lest it overwrite some of those registers.  If
1585 you need to do some other computation between the code part and use of
1586 the addr bit, first store the effective address from the amode in a
1587 temporary, then do the other computation, and then use the temporary:
1588
1589     code
1590     LEA amode, tmp
1591     ... other computation ...
1592     ... (tmp) ...
1593 -}
1594
1595 getAmode :: CmmExpr -> NatM Amode
1596 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
1597
1598 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1599
1600 #if alpha_TARGET_ARCH
1601
1602 getAmode (StPrim IntSubOp [x, StInt i])
1603   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1604     getRegister x               `thenNat` \ register ->
1605     let
1606         code = registerCode register tmp
1607         reg  = registerName register tmp
1608         off  = ImmInt (-(fromInteger i))
1609     in
1610     return (Amode (AddrRegImm reg off) code)
1611
1612 getAmode (StPrim IntAddOp [x, StInt i])
1613   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1614     getRegister x               `thenNat` \ register ->
1615     let
1616         code = registerCode register tmp
1617         reg  = registerName register tmp
1618         off  = ImmInt (fromInteger i)
1619     in
1620     return (Amode (AddrRegImm reg off) code)
1621
1622 getAmode leaf
1623   | isJust imm
1624   = return (Amode (AddrImm imm__2) id)
1625   where
1626     imm = maybeImm leaf
1627     imm__2 = case imm of Just x -> x
1628
1629 getAmode other
1630   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1631     getRegister other           `thenNat` \ register ->
1632     let
1633         code = registerCode register tmp
1634         reg  = registerName register tmp
1635     in
1636     return (Amode (AddrReg reg) code)
1637
1638 #endif /* alpha_TARGET_ARCH */
1639
1640 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1641
1642 #if i386_TARGET_ARCH
1643
1644 -- This is all just ridiculous, since it carefully undoes
1645 -- what mangleIndexTree has just done.
1646 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)])
1647   -- ASSERT(rep == I32)???
1648   = do (x_reg, x_code) <- getSomeReg x
1649        let off = ImmInt (-(fromInteger i))
1650        return (Amode (AddrBaseIndex (Just x_reg) Nothing off) x_code)
1651
1652 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)])
1653   -- ASSERT(rep == I32)???
1654   = do (x_reg, x_code) <- getSomeReg x
1655        let off = ImmInt (fromInteger i)
1656        return (Amode (AddrBaseIndex (Just x_reg) Nothing off) x_code)
1657
1658 -- Turn (lit1 << n  + lit2) into  (lit2 + lit1 << n) so it will be
1659 -- recognised by the next rule.
1660 getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1661                                   b@(CmmLit _)])
1662   = getAmode (CmmMachOp (MO_Add rep) [b,a])
1663
1664 getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
1665                                         [y, CmmLit (CmmInt shift _)]])
1666   | shift == 0 || shift == 1 || shift == 2 || shift == 3
1667   = do (x_reg, x_code) <- getNonClobberedReg x
1668         -- x must be in a temp, because it has to stay live over y_code
1669         -- we could compre x_reg and y_reg and do something better here...
1670        (y_reg, y_code) <- getSomeReg y
1671        let
1672            code = x_code `appOL` y_code
1673            base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
1674        return (Amode (AddrBaseIndex (Just x_reg) (Just (y_reg,base)) (ImmInt 0))
1675                code)
1676
1677 getAmode (CmmLit lit)
1678   = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1679
1680 getAmode expr = do
1681   (reg,code) <- getSomeReg expr
1682   return (Amode (AddrBaseIndex (Just reg) Nothing (ImmInt 0)) code)
1683
1684 #endif /* i386_TARGET_ARCH */
1685
1686 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1687
1688 #if sparc_TARGET_ARCH
1689
1690 getAmode (CmmMachOp MO_Nat_Sub [x, StInt i])
1691   | fits13Bits (-i)
1692   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1693     getRegister x               `thenNat` \ register ->
1694     let
1695         code = registerCode register tmp
1696         reg  = registerName register tmp
1697         off  = ImmInt (-(fromInteger i))
1698     in
1699     return (Amode (AddrRegImm reg off) code)
1700
1701
1702 getAmode (CmmMachOp MO_Nat_Add [x, StInt i])
1703   | fits13Bits i
1704   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1705     getRegister x               `thenNat` \ register ->
1706     let
1707         code = registerCode register tmp
1708         reg  = registerName register tmp
1709         off  = ImmInt (fromInteger i)
1710     in
1711     return (Amode (AddrRegImm reg off) code)
1712
1713 getAmode (CmmMachOp MO_Nat_Add [x, y])
1714   = getNewRegNat PtrRep         `thenNat` \ tmp1 ->
1715     getNewRegNat IntRep         `thenNat` \ tmp2 ->
1716     getRegister x               `thenNat` \ register1 ->
1717     getRegister y               `thenNat` \ register2 ->
1718     let
1719         code1 = registerCode register1 tmp1
1720         reg1  = registerName register1 tmp1
1721         code2 = registerCode register2 tmp2
1722         reg2  = registerName register2 tmp2
1723         code__2 = code1 `appOL` code2
1724     in
1725     return (Amode (AddrRegReg reg1 reg2) code__2)
1726
1727 getAmode leaf
1728   | isJust imm
1729   = getNewRegNat PtrRep             `thenNat` \ tmp ->
1730     let
1731         code = unitOL (SETHI (HI imm__2) tmp)
1732     in
1733     return (Amode (AddrRegImm tmp (LO imm__2)) code)
1734   where
1735     imm    = maybeImm leaf
1736     imm__2 = case imm of Just x -> x
1737
1738 getAmode other
1739   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1740     getRegister other           `thenNat` \ register ->
1741     let
1742         code = registerCode register tmp
1743         reg  = registerName register tmp
1744         off  = ImmInt 0
1745     in
1746     return (Amode (AddrRegImm reg off) code)
1747
1748 #endif /* sparc_TARGET_ARCH */
1749
1750 #ifdef powerpc_TARGET_ARCH
1751 getAmode (CmmMachOp (MO_Sub I32) [x, CmmLit (CmmInt i _)])
1752   | Just off <- makeImmediate I32 True (-i)
1753   = do
1754         (reg, code) <- getSomeReg x
1755         return (Amode (AddrRegImm reg off) code)
1756
1757
1758 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit (CmmInt i _)])
1759   | Just off <- makeImmediate I32 True i
1760   = do
1761         (reg, code) <- getSomeReg x
1762         return (Amode (AddrRegImm reg off) code)
1763
1764    -- optimize addition with 32-bit immediate
1765    -- (needed for PIC)
1766 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit lit])
1767   = do
1768         tmp <- getNewRegNat I32
1769         (src, srcCode) <- getSomeReg x
1770         let imm = litToImm lit
1771             code = srcCode `snocOL` ADDIS tmp src (HA imm)
1772         return (Amode (AddrRegImm tmp (LO imm)) code)
1773
1774 getAmode (CmmLit lit)
1775   = do
1776         tmp <- getNewRegNat I32
1777         let imm = litToImm lit
1778             code = unitOL (LIS tmp (HA imm))
1779         return (Amode (AddrRegImm tmp (LO imm)) code)
1780
1781 getAmode (CmmMachOp (MO_Add I32) [x, y])
1782   = do
1783         (regX, codeX) <- getSomeReg x
1784         (regY, codeY) <- getSomeReg y
1785         return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
1786
1787 getAmode other
1788   = do
1789         (reg, code) <- getSomeReg other
1790         let
1791             off  = ImmInt 0
1792         return (Amode (AddrRegImm reg off) code)
1793 #endif /* powerpc_TARGET_ARCH */
1794
1795 -- -----------------------------------------------------------------------------
1796 -- getOperand: sometimes any operand will do.
1797
1798 -- getOperand gets a *safe* operand; that is, the value of the operand
1799 -- will remain valid across the computation of an arbitrary expression,
1800 -- unless the expression is computed directly into a register which
1801 -- the operand refers to (see trivialCode where this function is used
1802 -- for an example).
1803
1804 #ifdef i386_TARGET_ARCH
1805
1806 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1807 getOperand (CmmLoad mem pk)
1808   | not (isFloatingRep pk) && pk /= I64 = do
1809     Amode src mem_code <- getAmode mem
1810     (src',save_code) <-
1811         if (amodeCouldBeClobbered src)
1812                 then do
1813                    tmp <- getNewRegNat wordRep
1814                    return (AddrBaseIndex (Just tmp) Nothing (ImmInt 0),
1815                            unitOL (LEA I32 (OpAddr src) (OpReg tmp)))
1816                 else
1817                    return (src, nilOL)
1818     return (OpAddr src', save_code `appOL` mem_code)
1819
1820 getOperand e = do
1821     (reg, code) <- getNonClobberedReg e
1822     return (OpReg reg, code)
1823
1824 amodeCouldBeClobbered :: AddrMode -> Bool
1825 amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
1826
1827 regClobbered (RealReg rr) = isFastTrue (freeReg rr)
1828 regClobbered _ = False
1829
1830 #endif
1831
1832 -- -----------------------------------------------------------------------------
1833 --  The 'CondCode' type:  Condition codes passed up the tree.
1834
1835 data CondCode = CondCode Bool Cond InstrBlock
1836
1837 -- Set up a condition code for a conditional branch.
1838
1839 getCondCode :: CmmExpr -> NatM CondCode
1840
1841 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1842
1843 #if alpha_TARGET_ARCH
1844 getCondCode = panic "MachCode.getCondCode: not on Alphas"
1845 #endif /* alpha_TARGET_ARCH */
1846
1847 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1848
1849 #if i386_TARGET_ARCH || sparc_TARGET_ARCH
1850 -- yes, they really do seem to want exactly the same!
1851
1852 getCondCode (CmmMachOp mop [x, y])
1853   = ASSERT (cmmExprRep x /= I8) -- tmp, not set up to handle 8-bit comparisons
1854     case mop of
1855       MO_Eq F32 -> condFltCode EQQ x y
1856       MO_Ne F32 -> condFltCode NE  x y
1857
1858       MO_S_Gt F32 -> condFltCode GTT x y
1859       MO_S_Ge F32 -> condFltCode GE  x y
1860       MO_S_Lt F32 -> condFltCode LTT x y
1861       MO_S_Le F32 -> condFltCode LE  x y
1862
1863       MO_Eq F64 -> condFltCode EQQ x y
1864       MO_Ne F64 -> condFltCode NE  x y
1865
1866       MO_S_Gt F64 -> condFltCode GTT x y
1867       MO_S_Ge F64 -> condFltCode GE  x y
1868       MO_S_Lt F64 -> condFltCode LTT x y
1869       MO_S_Le F64 -> condFltCode LE  x y
1870
1871       MO_Eq rep -> condIntCode EQQ  x y
1872       MO_Ne rep -> condIntCode NE   x y
1873
1874       MO_S_Gt rep -> condIntCode GTT  x y
1875       MO_S_Ge rep -> condIntCode GE   x y
1876       MO_S_Lt rep -> condIntCode LTT  x y
1877       MO_S_Le rep -> condIntCode LE   x y
1878
1879       MO_U_Gt rep -> condIntCode GU   x y
1880       MO_U_Ge rep -> condIntCode GEU  x y
1881       MO_U_Lt rep -> condIntCode LU   x y
1882       MO_U_Le rep -> condIntCode LEU  x y
1883
1884       other -> pprPanic "getCondCode(x86,sparc)" (pprMachOp mop)
1885
1886 getCondCode other =  pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
1887
1888 #elif powerpc_TARGET_ARCH
1889
1890 -- almost the same as everywhere else - but we need to
1891 -- extend small integers to 32 bit first
1892
1893 getCondCode (CmmMachOp mop [x, y])
1894   = case mop of
1895       MO_Eq F32 -> condFltCode EQQ x y
1896       MO_Ne F32 -> condFltCode NE  x y
1897
1898       MO_S_Gt F32 -> condFltCode GTT x y
1899       MO_S_Ge F32 -> condFltCode GE  x y
1900       MO_S_Lt F32 -> condFltCode LTT x y
1901       MO_S_Le F32 -> condFltCode LE  x y
1902
1903       MO_Eq F64 -> condFltCode EQQ x y
1904       MO_Ne F64 -> condFltCode NE  x y
1905
1906       MO_S_Gt F64 -> condFltCode GTT x y
1907       MO_S_Ge F64 -> condFltCode GE  x y
1908       MO_S_Lt F64 -> condFltCode LTT x y
1909       MO_S_Le F64 -> condFltCode LE  x y
1910
1911       MO_Eq rep -> condIntCode EQQ  (extendUExpr rep x) (extendUExpr rep y)
1912       MO_Ne rep -> condIntCode NE   (extendUExpr rep x) (extendUExpr rep y)
1913
1914       MO_S_Gt rep -> condIntCode GTT  (extendSExpr rep x) (extendSExpr rep y)
1915       MO_S_Ge rep -> condIntCode GE   (extendSExpr rep x) (extendSExpr rep y)
1916       MO_S_Lt rep -> condIntCode LTT  (extendSExpr rep x) (extendSExpr rep y)
1917       MO_S_Le rep -> condIntCode LE   (extendSExpr rep x) (extendSExpr rep y)
1918
1919       MO_U_Gt rep -> condIntCode GU   (extendUExpr rep x) (extendUExpr rep y)
1920       MO_U_Ge rep -> condIntCode GEU  (extendUExpr rep x) (extendUExpr rep y)
1921       MO_U_Lt rep -> condIntCode LU   (extendUExpr rep x) (extendUExpr rep y)
1922       MO_U_Le rep -> condIntCode LEU  (extendUExpr rep x) (extendUExpr rep y)
1923
1924       other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
1925
1926 getCondCode other =  panic "getCondCode(2)(powerpc)"
1927
1928
1929 #endif
1930
1931
1932 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
1933 -- passed back up the tree.
1934
1935 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
1936
1937 #if alpha_TARGET_ARCH
1938 condIntCode = panic "MachCode.condIntCode: not on Alphas"
1939 condFltCode = panic "MachCode.condFltCode: not on Alphas"
1940 #endif /* alpha_TARGET_ARCH */
1941
1942 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1943 #if i386_TARGET_ARCH
1944
1945 -- memory vs immediate
1946 condIntCode cond (CmmLoad x pk) (CmmLit lit) = do
1947     Amode x_addr x_code <- getAmode x
1948     let
1949         imm  = litToImm lit
1950         code = x_code `snocOL`
1951                   CMP pk (OpImm imm) (OpAddr x_addr)
1952     --
1953     return (CondCode False cond code)
1954
1955 -- anything vs zero
1956 condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
1957     (x_reg, x_code) <- getSomeReg x
1958     let
1959         code = x_code `snocOL`
1960                   TEST pk (OpReg x_reg) (OpReg x_reg)
1961     --
1962     return (CondCode False cond code)
1963
1964 -- anything vs immediate
1965 condIntCode cond x (CmmLit lit) = do
1966     (x_reg, x_code) <- getSomeReg x
1967     let
1968         imm  = litToImm lit
1969         code = x_code `snocOL`
1970                   CMP (cmmLitRep lit) (OpImm imm) (OpReg x_reg)
1971     -- in
1972     return (CondCode False cond code)
1973
1974 -- memory vs anything
1975 condIntCode cond (CmmLoad x pk) y = do
1976     (y_reg, y_code) <- getNonClobberedReg y
1977     Amode x_addr x_code <- getAmode x
1978     let
1979         code = y_code `appOL`
1980                x_code `snocOL`
1981                   CMP pk (OpReg y_reg) (OpAddr x_addr)
1982     -- in
1983     return (CondCode False cond code)
1984
1985 -- anything vs memory
1986 condIntCode cond y (CmmLoad x pk) = do
1987     (y_reg, y_code) <- getNonClobberedReg y
1988     Amode x_addr x_code <- getAmode x
1989     let
1990         code = y_code `appOL`
1991                x_code `snocOL`
1992                   CMP pk (OpAddr x_addr) (OpReg y_reg)
1993     -- in
1994     return (CondCode False cond code)
1995
1996 -- anything vs anything
1997 condIntCode cond x y = do
1998   (x_op, x_code) <- getOperand x
1999   (y_reg, y_code) <- getSomeReg y
2000   let
2001         code = x_code `appOL`
2002                y_code `snocOL`
2003                   CMP (cmmExprRep x) (OpReg y_reg) x_op
2004   -- in
2005   return (CondCode False cond code)
2006
2007 -----------
2008 condFltCode cond x y
2009   = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
2010   (x_reg, x_code) <- getNonClobberedReg x
2011   (y_reg, y_code) <- getSomeReg y
2012   let
2013         code = x_code `appOL` y_code `snocOL`
2014                 GCMP cond x_reg y_reg
2015   -- The GCMP insn does the test and sets the zero flag if comparable
2016   -- and true.  Hence we always supply EQQ as the condition to test.
2017   return (CondCode True EQQ code)
2018
2019 #endif /* i386_TARGET_ARCH */
2020
2021 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2022
2023 #if sparc_TARGET_ARCH
2024
2025 condIntCode cond x (StInt y)
2026   | fits13Bits y
2027   = getRegister x               `thenNat` \ register ->
2028     getNewRegNat IntRep         `thenNat` \ tmp ->
2029     let
2030         code = registerCode register tmp
2031         src1 = registerName register tmp
2032         src2 = ImmInt (fromInteger y)
2033         code__2 = code `snocOL` SUB False True src1 (RIImm src2) g0
2034     in
2035     return (CondCode False cond code__2)
2036
2037 condIntCode cond x y
2038   = getRegister x               `thenNat` \ register1 ->
2039     getRegister y               `thenNat` \ register2 ->
2040     getNewRegNat IntRep         `thenNat` \ tmp1 ->
2041     getNewRegNat IntRep         `thenNat` \ tmp2 ->
2042     let
2043         code1 = registerCode register1 tmp1
2044         src1  = registerName register1 tmp1
2045         code2 = registerCode register2 tmp2
2046         src2  = registerName register2 tmp2
2047         code__2 = code1 `appOL` code2 `snocOL`
2048                   SUB False True src1 (RIReg src2) g0
2049     in
2050     return (CondCode False cond code__2)
2051
2052 -----------
2053 condFltCode cond x y
2054   = getRegister x               `thenNat` \ register1 ->
2055     getRegister y               `thenNat` \ register2 ->
2056     getNewRegNat (registerRep register1)
2057                                 `thenNat` \ tmp1 ->
2058     getNewRegNat (registerRep register2)
2059                                 `thenNat` \ tmp2 ->
2060     getNewRegNat F64    `thenNat` \ tmp ->
2061     let
2062         promote x = FxTOy F DF x tmp
2063
2064         pk1   = registerRep register1
2065         code1 = registerCode register1 tmp1
2066         src1  = registerName register1 tmp1
2067
2068         pk2   = registerRep register2
2069         code2 = registerCode register2 tmp2
2070         src2  = registerName register2 tmp2
2071
2072         code__2 =
2073                 if pk1 == pk2 then
2074                     code1 `appOL` code2 `snocOL`
2075                     FCMP True (primRepToSize pk1) src1 src2
2076                 else if pk1 == F32 then
2077                     code1 `snocOL` promote src1 `appOL` code2 `snocOL`
2078                     FCMP True DF tmp src2
2079                 else
2080                     code1 `appOL` code2 `snocOL` promote src2 `snocOL`
2081                     FCMP True DF src1 tmp
2082     in
2083     return (CondCode True cond code__2)
2084
2085 #endif /* sparc_TARGET_ARCH */
2086
2087 #if powerpc_TARGET_ARCH
2088 -- ###FIXME: I16 and I8!
2089 condIntCode cond x (CmmLit (CmmInt y rep))
2090   | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
2091   = do
2092         (src1, code) <- getSomeReg x
2093         let
2094             code' = code `snocOL`
2095                 (if condUnsigned cond then CMPL else CMP) I32 src1 (RIImm src2)
2096         return (CondCode False cond code')
2097
2098 condIntCode cond x y = do
2099     (src1, code1) <- getSomeReg x
2100     (src2, code2) <- getSomeReg y
2101     let
2102         code' = code1 `appOL` code2 `snocOL`
2103                   (if condUnsigned cond then CMPL else CMP) I32 src1 (RIReg src2)
2104     return (CondCode False cond code')
2105
2106 condFltCode cond x y = do
2107     (src1, code1) <- getSomeReg x
2108     (src2, code2) <- getSomeReg y
2109     let
2110         code'  = code1 `appOL` code2 `snocOL` FCMP src1 src2
2111         code'' = case cond of -- twiddle CR to handle unordered case
2112                     GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
2113                     LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
2114                     _ -> code'
2115                  where
2116                     ltbit = 0 ; eqbit = 2 ; gtbit = 1
2117     return (CondCode True cond code'')
2118
2119 #endif /* powerpc_TARGET_ARCH */
2120
2121 -- -----------------------------------------------------------------------------
2122 -- Generating assignments
2123
2124 -- Assignments are really at the heart of the whole code generation
2125 -- business.  Almost all top-level nodes of any real importance are
2126 -- assignments, which correspond to loads, stores, or register
2127 -- transfers.  If we're really lucky, some of the register transfers
2128 -- will go away, because we can use the destination register to
2129 -- complete the code generation for the right hand side.  This only
2130 -- fails when the right hand side is forced into a fixed register
2131 -- (e.g. the result of a call).
2132
2133 assignMem_IntCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2134 assignReg_IntCode :: MachRep -> CmmReg  -> CmmExpr -> NatM InstrBlock
2135
2136 assignMem_FltCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2137 assignReg_FltCode :: MachRep -> CmmReg  -> CmmExpr -> NatM InstrBlock
2138
2139 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2140
2141 #if alpha_TARGET_ARCH
2142
2143 assignIntCode pk (CmmLoad dst _) src
2144   = getNewRegNat IntRep             `thenNat` \ tmp ->
2145     getAmode dst                    `thenNat` \ amode ->
2146     getRegister src                 `thenNat` \ register ->
2147     let
2148         code1   = amodeCode amode []
2149         dst__2  = amodeAddr amode
2150         code2   = registerCode register tmp []
2151         src__2  = registerName register tmp
2152         sz      = primRepToSize pk
2153         code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2154     in
2155     return code__2
2156
2157 assignIntCode pk dst src
2158   = getRegister dst                         `thenNat` \ register1 ->
2159     getRegister src                         `thenNat` \ register2 ->
2160     let
2161         dst__2  = registerName register1 zeroh
2162         code    = registerCode register2 dst__2
2163         src__2  = registerName register2 dst__2
2164         code__2 = if isFixed register2
2165                   then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2)
2166                   else code
2167     in
2168     return code__2
2169
2170 #endif /* alpha_TARGET_ARCH */
2171
2172 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2173
2174 #if i386_TARGET_ARCH
2175
2176 -- integer assignment to memory
2177 assignMem_IntCode pk addr src = do
2178     Amode addr code_addr <- getAmode addr
2179     (code_src, op_src)   <- get_op_RI src
2180     let
2181         code = code_src `appOL`
2182                code_addr `snocOL`
2183                   MOV pk op_src (OpAddr addr)
2184         -- NOTE: op_src is stable, so it will still be valid
2185         -- after code_addr.  This may involve the introduction
2186         -- of an extra MOV to a temporary register, but we hope
2187         -- the register allocator will get rid of it.
2188     --
2189     return code
2190   where
2191     get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand)   -- code, operator
2192     get_op_RI (CmmLit lit)
2193       = return (nilOL, OpImm (litToImm lit))
2194     get_op_RI op
2195       = do (reg,code) <- getNonClobberedReg op
2196            return (code, OpReg reg)
2197
2198
2199 -- Assign; dst is a reg, rhs is mem
2200 assignReg_IntCode pk reg (CmmLoad src _) = do
2201   load_code <- intLoadCode (MOV pk) src
2202   return (load_code (getRegisterReg reg))
2203
2204 -- dst is a reg, but src could be anything
2205 assignReg_IntCode pk reg src = do
2206   code <- getAnyReg src
2207   return (code (getRegisterReg reg))
2208
2209 #endif /* i386_TARGET_ARCH */
2210
2211 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2212
2213 #if sparc_TARGET_ARCH
2214
2215 assignMem_IntCode pk addr src
2216   = getNewRegNat IntRep                     `thenNat` \ tmp ->
2217     getAmode addr                           `thenNat` \ amode ->
2218     getRegister src                         `thenNat` \ register ->
2219     let
2220         code1   = amodeCode amode
2221         dst__2  = amodeAddr amode
2222         code2   = registerCode register tmp
2223         src__2  = registerName register tmp
2224         sz      = primRepToSize pk
2225         code__2 = code1 `appOL` code2 `snocOL` ST sz src__2 dst__2
2226     in
2227     return code__2
2228
2229 assignReg_IntCode pk reg src
2230   = getRegister src                         `thenNat` \ register2 ->
2231     getRegisterReg reg                      `thenNat` \ register1 ->
2232     getNewRegNat IntRep                     `thenNat` \ tmp ->
2233     let
2234         dst__2  = registerName register1 tmp
2235         code    = registerCode register2 dst__2
2236         src__2  = registerName register2 dst__2
2237         code__2 = if isFixed register2
2238                   then code `snocOL` OR False g0 (RIReg src__2) dst__2
2239                   else code
2240     in
2241     return code__2
2242
2243 #endif /* sparc_TARGET_ARCH */
2244
2245 #if powerpc_TARGET_ARCH
2246
2247 assignMem_IntCode pk addr src = do
2248     (srcReg, code) <- getSomeReg src
2249     Amode dstAddr addr_code <- getAmode addr
2250     return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2251
2252 -- dst is a reg, but src could be anything
2253 assignReg_IntCode pk reg src
2254     = do
2255         r <- getRegister src
2256         return $ case r of
2257             Any _ code         -> code dst
2258             Fixed _ freg fcode -> fcode `snocOL` MR dst freg
2259     where
2260         dst = getRegisterReg reg
2261
2262 #endif /* powerpc_TARGET_ARCH */
2263
2264
2265 -- -----------------------------------------------------------------------------
2266 -- Floating-point assignments
2267
2268 #if alpha_TARGET_ARCH
2269
2270 assignFltCode pk (CmmLoad dst _) src
2271   = getNewRegNat pk                 `thenNat` \ tmp ->
2272     getAmode dst                    `thenNat` \ amode ->
2273     getRegister src                         `thenNat` \ register ->
2274     let
2275         code1   = amodeCode amode []
2276         dst__2  = amodeAddr amode
2277         code2   = registerCode register tmp []
2278         src__2  = registerName register tmp
2279         sz      = primRepToSize pk
2280         code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2281     in
2282     return code__2
2283
2284 assignFltCode pk dst src
2285   = getRegister dst                         `thenNat` \ register1 ->
2286     getRegister src                         `thenNat` \ register2 ->
2287     let
2288         dst__2  = registerName register1 zeroh
2289         code    = registerCode register2 dst__2
2290         src__2  = registerName register2 dst__2
2291         code__2 = if isFixed register2
2292                   then code . mkSeqInstr (FMOV src__2 dst__2)
2293                   else code
2294     in
2295     return code__2
2296
2297 #endif /* alpha_TARGET_ARCH */
2298
2299 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2300
2301 #if i386_TARGET_ARCH
2302
2303 -- Floating point assignment to memory
2304 assignMem_FltCode pk addr src = do
2305   (src_reg, src_code) <- getNonClobberedReg src
2306   Amode addr addr_code <- getAmode addr
2307   let
2308         code = src_code `appOL`
2309                addr_code `snocOL`
2310                 GST pk src_reg addr
2311   return code
2312
2313 -- Floating point assignment to a register/temporary
2314 assignReg_FltCode pk reg src = do
2315   src_code <- getAnyReg src
2316   return (src_code (getRegisterReg reg))
2317
2318 #endif /* i386_TARGET_ARCH */
2319
2320 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2321
2322 #if sparc_TARGET_ARCH
2323
2324 -- Floating point assignment to memory
2325 assignMem_FltCode pk addr src
2326   = getNewRegNat pk                 `thenNat` \ tmp1 ->
2327     getAmode addr                   `thenNat` \ amode ->
2328     getRegister src                 `thenNat` \ register ->
2329     let
2330         sz      = primRepToSize pk
2331         dst__2  = amodeAddr amode
2332
2333         code1   = amodeCode amode
2334         code2   = registerCode register tmp1
2335
2336         src__2  = registerName register tmp1
2337         pk__2   = registerRep register
2338         sz__2   = primRepToSize pk__2
2339
2340         code__2 = code1 `appOL` code2 `appOL`
2341             if   pk == pk__2
2342             then unitOL (ST sz src__2 dst__2)
2343             else toOL [FxTOy sz__2 sz src__2 tmp1, ST sz tmp1 dst__2]
2344     in
2345     return code__2
2346
2347 -- Floating point assignment to a register/temporary
2348 -- Why is this so bizarrely ugly?
2349 assignReg_FltCode pk reg src
2350   = getRegisterReg reg                      `thenNat` \ register1 ->
2351     getRegister src                         `thenNat` \ register2 ->
2352     let
2353         pk__2   = registerRep register2
2354         sz__2   = primRepToSize pk__2
2355     in
2356     getNewRegNat pk__2                      `thenNat` \ tmp ->
2357     let
2358         sz      = primRepToSize pk
2359         dst__2  = registerName register1 g0    -- must be Fixed
2360         reg__2  = if pk /= pk__2 then tmp else dst__2
2361         code    = registerCode register2 reg__2
2362         src__2  = registerName register2 reg__2
2363         code__2 =
2364                 if pk /= pk__2 then
2365                      code `snocOL` FxTOy sz__2 sz src__2 dst__2
2366                 else if isFixed register2 then
2367                      code `snocOL` FMOV sz src__2 dst__2
2368                 else
2369                      code
2370     in
2371     return code__2
2372
2373 #endif /* sparc_TARGET_ARCH */
2374
2375 #if powerpc_TARGET_ARCH
2376
2377 -- Easy, isn't it?
2378 assignMem_FltCode = assignMem_IntCode
2379 assignReg_FltCode = assignReg_IntCode
2380
2381 #endif /* powerpc_TARGET_ARCH */
2382
2383
2384 -- -----------------------------------------------------------------------------
2385 -- Generating an non-local jump
2386
2387 -- (If applicable) Do not fill the delay slots here; you will confuse the
2388 -- register allocator.
2389
2390 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
2391
2392 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2393
2394 #if alpha_TARGET_ARCH
2395
2396 genJump (CmmLabel lbl)
2397   | isAsmTemp lbl = returnInstr (BR target)
2398   | otherwise     = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0]
2399   where
2400     target = ImmCLbl lbl
2401
2402 genJump tree
2403   = getRegister tree                `thenNat` \ register ->
2404     getNewRegNat PtrRep             `thenNat` \ tmp ->
2405     let
2406         dst    = registerName register pv
2407         code   = registerCode register pv
2408         target = registerName register pv
2409     in
2410     if isFixed register then
2411         returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0]
2412     else
2413     return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0))
2414
2415 #endif /* alpha_TARGET_ARCH */
2416
2417 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2418
2419 #if i386_TARGET_ARCH
2420
2421 genJump (CmmLoad mem pk) = do
2422   Amode target code <- getAmode mem
2423   return (code `snocOL` JMP (OpAddr target))
2424
2425 genJump (CmmLit lit) = do
2426   return (unitOL (JMP (OpImm (litToImm lit))))
2427
2428 genJump expr = do
2429   (reg,code) <- getSomeReg expr
2430   return (code `snocOL` JMP (OpReg reg))
2431
2432 #endif /* i386_TARGET_ARCH */
2433
2434 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2435
2436 #if sparc_TARGET_ARCH
2437
2438 genJump (CmmLabel lbl)
2439   = return (toOL [CALL (Left target) 0 True, NOP])
2440   where
2441     target = ImmCLbl lbl
2442
2443 genJump tree
2444   = getRegister tree                        `thenNat` \ register ->
2445     getNewRegNat PtrRep             `thenNat` \ tmp ->
2446     let
2447         code   = registerCode register tmp
2448         target = registerName register tmp
2449     in
2450     return (code `snocOL` JMP dsts (AddrRegReg target g0) `snocOL` NOP)
2451
2452 #endif /* sparc_TARGET_ARCH */
2453
2454 #if powerpc_TARGET_ARCH
2455 genJump (CmmLit (CmmLabel lbl))
2456   = return (unitOL $ JMP lbl)
2457
2458 genJump tree
2459   = do
2460         (target,code) <- getSomeReg tree
2461         return (code `snocOL` MTCTR target `snocOL` BCTR [])
2462 #endif /* powerpc_TARGET_ARCH */
2463
2464
2465 -- -----------------------------------------------------------------------------
2466 --  Unconditional branches
2467
2468 genBranch :: BlockId -> NatM InstrBlock
2469
2470 #if alpha_TARGET_ARCH
2471 genBranch id = return (unitOL (BR id))
2472 #endif
2473
2474 #if i386_TARGET_ARCH
2475 genBranch id = return (unitOL (JXX ALWAYS id))
2476 #endif
2477
2478 #if sparc_TARGET_ARCH
2479 genBranch id = return (toOL [BI ALWAYS False id, NOP])
2480 #endif
2481
2482 #if powerpc_TARGET_ARCH
2483 genBranch id = return (unitOL (BCC ALWAYS id))
2484 #endif
2485
2486
2487 -- -----------------------------------------------------------------------------
2488 --  Conditional jumps
2489
2490 {-
2491 Conditional jumps are always to local labels, so we can use branch
2492 instructions.  We peek at the arguments to decide what kind of
2493 comparison to do.
2494
2495 ALPHA: For comparisons with 0, we're laughing, because we can just do
2496 the desired conditional branch.
2497
2498 I386: First, we have to ensure that the condition
2499 codes are set according to the supplied comparison operation.
2500
2501 SPARC: First, we have to ensure that the condition codes are set
2502 according to the supplied comparison operation.  We generate slightly
2503 different code for floating point comparisons, because a floating
2504 point operation cannot directly precede a @BF@.  We assume the worst
2505 and fill that slot with a @NOP@.
2506
2507 SPARC: Do not fill the delay slots here; you will confuse the register
2508 allocator.
2509 -}
2510
2511
2512 genCondJump
2513     :: BlockId      -- the branch target
2514     -> CmmExpr      -- the condition on which to branch
2515     -> NatM InstrBlock
2516
2517 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2518
2519 #if alpha_TARGET_ARCH
2520
2521 genCondJump id (StPrim op [x, StInt 0])
2522   = getRegister x                           `thenNat` \ register ->
2523     getNewRegNat (registerRep register)
2524                                     `thenNat` \ tmp ->
2525     let
2526         code   = registerCode register tmp
2527         value  = registerName register tmp
2528         pk     = registerRep register
2529         target = ImmCLbl lbl
2530     in
2531     returnSeq code [BI (cmpOp op) value target]
2532   where
2533     cmpOp CharGtOp = GTT
2534     cmpOp CharGeOp = GE
2535     cmpOp CharEqOp = EQQ
2536     cmpOp CharNeOp = NE
2537     cmpOp CharLtOp = LTT
2538     cmpOp CharLeOp = LE
2539     cmpOp IntGtOp = GTT
2540     cmpOp IntGeOp = GE
2541     cmpOp IntEqOp = EQQ
2542     cmpOp IntNeOp = NE
2543     cmpOp IntLtOp = LTT
2544     cmpOp IntLeOp = LE
2545     cmpOp WordGtOp = NE
2546     cmpOp WordGeOp = ALWAYS
2547     cmpOp WordEqOp = EQQ
2548     cmpOp WordNeOp = NE
2549     cmpOp WordLtOp = NEVER
2550     cmpOp WordLeOp = EQQ
2551     cmpOp AddrGtOp = NE
2552     cmpOp AddrGeOp = ALWAYS
2553     cmpOp AddrEqOp = EQQ
2554     cmpOp AddrNeOp = NE
2555     cmpOp AddrLtOp = NEVER
2556     cmpOp AddrLeOp = EQQ
2557
2558 genCondJump lbl (StPrim op [x, StDouble 0.0])
2559   = getRegister x                           `thenNat` \ register ->
2560     getNewRegNat (registerRep register)
2561                                     `thenNat` \ tmp ->
2562     let
2563         code   = registerCode register tmp
2564         value  = registerName register tmp
2565         pk     = registerRep register
2566         target = ImmCLbl lbl
2567     in
2568     return (code . mkSeqInstr (BF (cmpOp op) value target))
2569   where
2570     cmpOp FloatGtOp = GTT
2571     cmpOp FloatGeOp = GE
2572     cmpOp FloatEqOp = EQQ
2573     cmpOp FloatNeOp = NE
2574     cmpOp FloatLtOp = LTT
2575     cmpOp FloatLeOp = LE
2576     cmpOp DoubleGtOp = GTT
2577     cmpOp DoubleGeOp = GE
2578     cmpOp DoubleEqOp = EQQ
2579     cmpOp DoubleNeOp = NE
2580     cmpOp DoubleLtOp = LTT
2581     cmpOp DoubleLeOp = LE
2582
2583 genCondJump lbl (StPrim op [x, y])
2584   | fltCmpOp op
2585   = trivialFCode pr instr x y       `thenNat` \ register ->
2586     getNewRegNat F64                `thenNat` \ tmp ->
2587     let
2588         code   = registerCode register tmp
2589         result = registerName register tmp
2590         target = ImmCLbl lbl
2591     in
2592     return (code . mkSeqInstr (BF cond result target))
2593   where
2594     pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
2595
2596     fltCmpOp op = case op of
2597         FloatGtOp -> True
2598         FloatGeOp -> True
2599         FloatEqOp -> True
2600         FloatNeOp -> True
2601         FloatLtOp -> True
2602         FloatLeOp -> True
2603         DoubleGtOp -> True
2604         DoubleGeOp -> True
2605         DoubleEqOp -> True
2606         DoubleNeOp -> True
2607         DoubleLtOp -> True
2608         DoubleLeOp -> True
2609         _ -> False
2610     (instr, cond) = case op of
2611         FloatGtOp -> (FCMP TF LE, EQQ)
2612         FloatGeOp -> (FCMP TF LTT, EQQ)
2613         FloatEqOp -> (FCMP TF EQQ, NE)
2614         FloatNeOp -> (FCMP TF EQQ, EQQ)
2615         FloatLtOp -> (FCMP TF LTT, NE)
2616         FloatLeOp -> (FCMP TF LE, NE)
2617         DoubleGtOp -> (FCMP TF LE, EQQ)
2618         DoubleGeOp -> (FCMP TF LTT, EQQ)
2619         DoubleEqOp -> (FCMP TF EQQ, NE)
2620         DoubleNeOp -> (FCMP TF EQQ, EQQ)
2621         DoubleLtOp -> (FCMP TF LTT, NE)
2622         DoubleLeOp -> (FCMP TF LE, NE)
2623
2624 genCondJump lbl (StPrim op [x, y])
2625   = trivialCode instr x y           `thenNat` \ register ->
2626     getNewRegNat IntRep             `thenNat` \ tmp ->
2627     let
2628         code   = registerCode register tmp
2629         result = registerName register tmp
2630         target = ImmCLbl lbl
2631     in
2632     return (code . mkSeqInstr (BI cond result target))
2633   where
2634     (instr, cond) = case op of
2635         CharGtOp -> (CMP LE, EQQ)
2636         CharGeOp -> (CMP LTT, EQQ)
2637         CharEqOp -> (CMP EQQ, NE)
2638         CharNeOp -> (CMP EQQ, EQQ)
2639         CharLtOp -> (CMP LTT, NE)
2640         CharLeOp -> (CMP LE, NE)
2641         IntGtOp -> (CMP LE, EQQ)
2642         IntGeOp -> (CMP LTT, EQQ)
2643         IntEqOp -> (CMP EQQ, NE)
2644         IntNeOp -> (CMP EQQ, EQQ)
2645         IntLtOp -> (CMP LTT, NE)
2646         IntLeOp -> (CMP LE, NE)
2647         WordGtOp -> (CMP ULE, EQQ)
2648         WordGeOp -> (CMP ULT, EQQ)
2649         WordEqOp -> (CMP EQQ, NE)
2650         WordNeOp -> (CMP EQQ, EQQ)
2651         WordLtOp -> (CMP ULT, NE)
2652         WordLeOp -> (CMP ULE, NE)
2653         AddrGtOp -> (CMP ULE, EQQ)
2654         AddrGeOp -> (CMP ULT, EQQ)
2655         AddrEqOp -> (CMP EQQ, NE)
2656         AddrNeOp -> (CMP EQQ, EQQ)
2657         AddrLtOp -> (CMP ULT, NE)
2658         AddrLeOp -> (CMP ULE, NE)
2659
2660 #endif /* alpha_TARGET_ARCH */
2661
2662 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2663
2664 #if i386_TARGET_ARCH
2665
2666 genCondJump id bool = do
2667   CondCode _ cond code <- getCondCode bool
2668   return (code `snocOL` JXX cond id)
2669
2670 #endif /* i386_TARGET_ARCH */
2671
2672
2673 #if sparc_TARGET_ARCH
2674
2675 genCondJump id bool = do
2676   CondCode is_float cond code <- getCondCode bool
2677   return (
2678        code `appOL`
2679        toOL (
2680          if   is_float
2681          then [NOP, BF cond False id, NOP]
2682          else [BI cond False id, NOP]
2683        )
2684     )
2685
2686 #endif /* sparc_TARGET_ARCH */
2687
2688
2689 #if powerpc_TARGET_ARCH
2690
2691 genCondJump id bool = do
2692   CondCode is_float cond code <- getCondCode bool
2693   return (code `snocOL` BCC cond id)
2694
2695 #endif /* powerpc_TARGET_ARCH */
2696
2697
2698 -- -----------------------------------------------------------------------------
2699 --  Generating C calls
2700
2701 -- Now the biggest nightmare---calls.  Most of the nastiness is buried in
2702 -- @get_arg@, which moves the arguments to the correct registers/stack
2703 -- locations.  Apart from that, the code is easy.
2704 --
2705 -- (If applicable) Do not fill the delay slots here; you will confuse the
2706 -- register allocator.
2707
2708 genCCall
2709     :: CmmCallTarget            -- function to call
2710     -> [(CmmReg,MachHint)]      -- where to put the result
2711     -> [(CmmExpr,MachHint)]     -- arguments (of mixed type)
2712     -> Maybe [GlobalReg]        -- volatile regs to save
2713     -> NatM InstrBlock
2714
2715 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2716
2717 #if alpha_TARGET_ARCH
2718
2719 ccallResultRegs =
2720
2721 genCCall fn cconv result_regs args
2722   = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args
2723                           `thenNat` \ ((unused,_), argCode) ->
2724     let
2725         nRegs = length allArgRegs - length unused
2726         code = asmSeqThen (map ($ []) argCode)
2727     in
2728         returnSeq code [
2729             LDA pv (AddrImm (ImmLab (ptext fn))),
2730             JSR ra (AddrReg pv) nRegs,
2731             LDGP gp (AddrReg ra)]
2732   where
2733     ------------------------
2734     {-  Try to get a value into a specific register (or registers) for
2735         a call.  The first 6 arguments go into the appropriate
2736         argument register (separate registers for integer and floating
2737         point arguments, but used in lock-step), and the remaining
2738         arguments are dumped to the stack, beginning at 0(sp).  Our
2739         first argument is a pair of the list of remaining argument
2740         registers to be assigned for this call and the next stack
2741         offset to use for overflowing arguments.  This way,
2742         @get_Arg@ can be applied to all of a call's arguments using
2743         @mapAccumLNat@.
2744     -}
2745     get_arg
2746         :: ([(Reg,Reg)], Int)   -- Argument registers and stack offset (accumulator)
2747         -> StixTree             -- Current argument
2748         -> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code
2749
2750     -- We have to use up all of our argument registers first...
2751
2752     get_arg ((iDst,fDst):dsts, offset) arg
2753       = getRegister arg                     `thenNat` \ register ->
2754         let
2755             reg  = if isFloatingRep pk then fDst else iDst
2756             code = registerCode register reg
2757             src  = registerName register reg
2758             pk   = registerRep register
2759         in
2760         return (
2761             if isFloatingRep pk then
2762                 ((dsts, offset), if isFixed register then
2763                     code . mkSeqInstr (FMOV src fDst)
2764                     else code)
2765             else
2766                 ((dsts, offset), if isFixed register then
2767                     code . mkSeqInstr (OR src (RIReg src) iDst)
2768                     else code))
2769
2770     -- Once we have run out of argument registers, we move to the
2771     -- stack...
2772
2773     get_arg ([], offset) arg
2774       = getRegister arg                 `thenNat` \ register ->
2775         getNewRegNat (registerRep register)
2776                                         `thenNat` \ tmp ->
2777         let
2778             code = registerCode register tmp
2779             src  = registerName register tmp
2780             pk   = registerRep register
2781             sz   = primRepToSize pk
2782         in
2783         return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset)))
2784
2785 #endif /* alpha_TARGET_ARCH */
2786
2787 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2788
2789 #if i386_TARGET_ARCH
2790
2791 -- we only cope with a single result for foreign calls
2792 genCCall (CmmPrim op) [(r,_)] args vols = do
2793   case op of
2794         MO_F32_Sqrt -> actuallyInlineFloatOp F32  (GSQRT F32) args
2795         MO_F64_Sqrt -> actuallyInlineFloatOp F64 (GSQRT F64) args
2796
2797         MO_F32_Sin  -> actuallyInlineFloatOp F32  (GSIN F32) args
2798         MO_F64_Sin  -> actuallyInlineFloatOp F64 (GSIN F64) args
2799
2800         MO_F32_Cos  -> actuallyInlineFloatOp F32  (GCOS F32) args
2801         MO_F64_Cos  -> actuallyInlineFloatOp F64 (GCOS F64) args
2802
2803         MO_F32_Tan  -> actuallyInlineFloatOp F32  (GTAN F32) args
2804         MO_F64_Tan  -> actuallyInlineFloatOp F64 (GTAN F64) args
2805
2806         other_op    -> outOfLineFloatOp op r args vols
2807  where
2808   actuallyInlineFloatOp rep instr [(x,_)]
2809         = do res <- trivialUFCode rep instr x
2810              any <- anyReg res
2811              return (any (getRegisterReg r))
2812
2813 genCCall target dest_regs args vols = do
2814     sizes_n_codes <- mapM push_arg (reverse args)
2815     delta <- getDeltaNat
2816     let
2817         (sizes, push_codes) = unzip sizes_n_codes
2818         tot_arg_size        = sum sizes
2819     -- in
2820     -- deal with static vs dynamic call targets
2821     (callinsns,cconv) <-
2822       case target of
2823         -- CmmPrim -> ...
2824         CmmForeignCall (CmmLit (CmmLabel lbl)) conv
2825            -> -- ToDo: stdcall arg sizes
2826               return (unitOL (CALL (Left fn_imm)), conv)
2827            where fn_imm = ImmCLbl lbl
2828         CmmForeignCall expr conv
2829            -> do (dyn_c, dyn_r, dyn_rep) <- get_op expr
2830                  ASSERT(dyn_rep == I32)
2831                   return (dyn_c `snocOL` CALL (Right dyn_r), conv)
2832
2833     let push_code = concatOL push_codes
2834         call = callinsns `appOL`
2835                toOL (
2836                         -- Deallocate parameters after call for ccall;
2837                         -- but not for stdcall (callee does it)
2838                   (if cconv == StdCallConv then [] else
2839                    [ADD I32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
2840                   ++
2841                   [DELTA (delta + tot_arg_size)]
2842                )
2843     -- in
2844     setDeltaNat (delta + tot_arg_size)
2845
2846     let
2847         -- assign the results, if necessary
2848         assign_code []     = nilOL
2849         assign_code [(dest,_hint)] =
2850           case rep of
2851                 I64 -> toOL [MOV I32 (OpReg eax) (OpReg r_dest),
2852                              MOV I32 (OpReg edx) (OpReg r_dest_hi)]
2853                 F32 -> unitOL (GMOV fake0 r_dest)
2854                 F64 -> unitOL (GMOV fake0 r_dest)
2855                 rep -> unitOL (MOV rep (OpReg eax) (OpReg r_dest))
2856           where
2857                 r_dest_hi = getHiVRegFromLo r_dest
2858                 rep = cmmRegRep dest
2859                 r_dest = getRegisterReg dest
2860         assign_code many = panic "genCCall.assign_code many"
2861
2862     return (push_code `appOL`
2863             call `appOL`
2864             assign_code dest_regs)
2865
2866   where
2867     arg_size F64 = 8
2868     arg_size F32 = 4
2869     arg_size _   = 4
2870
2871     push_arg :: (CmmExpr,MachHint){-current argument-}
2872                     -> NatM (Int, InstrBlock)  -- argsz, code
2873
2874     push_arg (arg,_hint) -- we don't need the hints on x86
2875       | arg_rep == I64 = do
2876         ChildCode64 code r_lo <- iselExpr64 arg
2877         delta <- getDeltaNat
2878         setDeltaNat (delta - 8)
2879         let
2880             r_hi = getHiVRegFromLo r_lo
2881         -- in
2882         return (8,     code `appOL`
2883                        toOL [PUSH I32 (OpReg r_hi), DELTA (delta - 4),
2884                              PUSH I32 (OpReg r_lo), DELTA (delta - 8),
2885                              DELTA (delta-8)]
2886             )
2887
2888       | otherwise = do
2889         (code, reg, sz) <- get_op arg
2890         delta <- getDeltaNat
2891         let size = arg_size sz
2892         setDeltaNat (delta-size)
2893         if (case sz of F64 -> True; F32 -> True; _ -> False)
2894            then return (size,
2895                         code `appOL`
2896                         toOL [SUB I32 (OpImm (ImmInt size)) (OpReg esp),
2897                               DELTA (delta-size),
2898                               GST sz reg (AddrBaseIndex (Just esp)
2899                                                         Nothing
2900                                                         (ImmInt 0))]
2901                        )
2902            else return (size,
2903                         code `snocOL`
2904                         PUSH I32 (OpReg reg) `snocOL`
2905                         DELTA (delta-size)
2906                        )
2907       where
2908          arg_rep = cmmExprRep arg
2909
2910     ------------
2911     get_op :: CmmExpr -> NatM (InstrBlock, Reg, MachRep) -- code, reg, size
2912     get_op op = do
2913         (reg,code) <- getSomeReg op
2914         return (code, reg, cmmExprRep op)
2915
2916
2917 outOfLineFloatOp :: CallishMachOp -> CmmReg -> [(CmmExpr,MachHint)]
2918   -> Maybe [GlobalReg] -> NatM InstrBlock
2919 outOfLineFloatOp mop res args vols
2920   | cmmRegRep res == F64
2921   = stmtToInstrs (CmmCall target [(res,FloatHint)] args vols)
2922
2923   | otherwise
2924   = do uq <- getUniqueNat
2925        let
2926          tmp = CmmLocal (LocalReg uq F64)
2927        -- in
2928        code1 <- stmtToInstrs (CmmCall target [(tmp,FloatHint)] (map promote args) vols)
2929        code2 <- stmtToInstrs (CmmAssign res (demote (CmmReg tmp)))
2930        return (code1 `appOL` code2)
2931   where
2932         promote (x,hint) = (CmmMachOp (MO_S_Conv F32 F64) [x], hint)
2933         demote  x = CmmMachOp (MO_S_Conv F64 F32) [x]
2934
2935         target = CmmForeignCall (CmmLit lbl) CCallConv
2936         lbl = CmmLabel (mkForeignLabel fn Nothing False)
2937
2938         fn = case mop of
2939               MO_F32_Exp   -> FSLIT("exp")
2940               MO_F32_Log   -> FSLIT("log")
2941
2942               MO_F32_Asin  -> FSLIT("asin")
2943               MO_F32_Acos  -> FSLIT("acos")
2944               MO_F32_Atan  -> FSLIT("atan")
2945
2946               MO_F32_Sinh  -> FSLIT("sinh")
2947               MO_F32_Cosh  -> FSLIT("cosh")
2948               MO_F32_Tanh  -> FSLIT("tanh")
2949               MO_F32_Pwr   -> FSLIT("pow")
2950
2951               MO_F64_Exp   -> FSLIT("exp")
2952               MO_F64_Log   -> FSLIT("log")
2953
2954               MO_F64_Asin  -> FSLIT("asin")
2955               MO_F64_Acos  -> FSLIT("acos")
2956               MO_F64_Atan  -> FSLIT("atan")
2957
2958               MO_F64_Sinh  -> FSLIT("sinh")
2959               MO_F64_Cosh  -> FSLIT("cosh")
2960               MO_F64_Tanh  -> FSLIT("tanh")
2961               MO_F64_Pwr   -> FSLIT("pow")
2962
2963               other -> pprPanic "outOfLineFloatOp" (pprCallishMachOp mop)
2964
2965 #endif /* i386_TARGET_ARCH */
2966
2967 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2968
2969 #if sparc_TARGET_ARCH
2970 {-
2971    The SPARC calling convention is an absolute
2972    nightmare.  The first 6x32 bits of arguments are mapped into
2973    %o0 through %o5, and the remaining arguments are dumped to the
2974    stack, beginning at [%sp+92].  (Note that %o6 == %sp.)
2975
2976    If we have to put args on the stack, move %o6==%sp down by
2977    the number of words to go on the stack, to ensure there's enough space.
2978
2979    According to Fraser and Hanson's lcc book, page 478, fig 17.2,
2980    16 words above the stack pointer is a word for the address of
2981    a structure return value.  I use this as a temporary location
2982    for moving values from float to int regs.  Certainly it isn't
2983    safe to put anything in the 16 words starting at %sp, since
2984    this area can get trashed at any time due to window overflows
2985    caused by signal handlers.
2986
2987    A final complication (if the above isn't enough) is that
2988    we can't blithely calculate the arguments one by one into
2989    %o0 .. %o5.  Consider the following nested calls:
2990
2991        fff a (fff b c)
2992
2993    Naive code moves a into %o0, and (fff b c) into %o1.  Unfortunately
2994    the inner call will itself use %o0, which trashes the value put there
2995    in preparation for the outer call.  Upshot: we need to calculate the
2996    args into temporary regs, and move those to arg regs or onto the
2997    stack only immediately prior to the call proper.  Sigh.
2998 -}
2999
3000 genCCall fn cconv kind args
3001   = mapNat arg_to_int_vregs args `thenNat` \ argcode_and_vregs ->
3002     let
3003         (argcodes, vregss) = unzip argcode_and_vregs
3004         n_argRegs          = length allArgRegs
3005         n_argRegs_used     = min (length vregs) n_argRegs
3006         vregs              = concat vregss
3007     in
3008     -- deal with static vs dynamic call targets
3009     (case fn of
3010         Left t_static
3011            -> return (unitOL (CALL (Left fn__2) n_argRegs_used False))
3012         Right dyn
3013            -> arg_to_int_vregs dyn `thenNat` \ (dyn_c, [dyn_r]) ->
3014               return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3015     )
3016                                 `thenNat` \ callinsns ->
3017     let
3018         argcode = concatOL argcodes
3019         (move_sp_down, move_sp_up)
3020            = let diff = length vregs - n_argRegs
3021                  nn   = if odd diff then diff + 1 else diff -- keep 8-byte alignment
3022              in  if   nn <= 0
3023                  then (nilOL, nilOL)
3024                  else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
3025         transfer_code
3026            = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
3027     in
3028         return (argcode       `appOL`
3029                    move_sp_down  `appOL`
3030                    transfer_code `appOL`
3031                    callinsns     `appOL`
3032                    unitOL NOP    `appOL`
3033                    move_sp_up)
3034   where
3035      -- function names that begin with '.' are assumed to be special
3036      -- internally generated names like '.mul,' which don't get an
3037      -- underscore prefix
3038      -- ToDo:needed (WDP 96/03) ???
3039      fn_static = unLeft fn
3040      fn__2 = case (headFS fn_static) of
3041                 '.' -> ImmLit (ftext fn_static)
3042                 _   -> ImmCLbl (mkForeignLabel fn_static False)
3043
3044      -- move args from the integer vregs into which they have been
3045      -- marshalled, into %o0 .. %o5, and the rest onto the stack.
3046      move_final :: [Reg] -> [Reg] -> Int -> [Instr]
3047
3048      move_final [] _ offset          -- all args done
3049         = []
3050
3051      move_final (v:vs) [] offset     -- out of aregs; move to stack
3052         = ST W v (spRel offset)
3053           : move_final vs [] (offset+1)
3054
3055      move_final (v:vs) (a:az) offset -- move into an arg (%o[0..5]) reg
3056         = OR False g0 (RIReg v) a
3057           : move_final vs az offset
3058
3059      -- generate code to calculate an argument, and move it into one
3060      -- or two integer vregs.
3061      arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
3062      arg_to_int_vregs arg
3063         | is64BitRep (repOfCmmExpr arg)
3064         = iselExpr64 arg                `thenNat` \ (ChildCode64 code vr_lo) ->
3065           let r_lo = VirtualRegI vr_lo
3066               r_hi = getHiVRegFromLo r_lo
3067           in  return (code, [r_hi, r_lo])
3068         | otherwise
3069         = getRegister arg                     `thenNat` \ register ->
3070           getNewRegNat (registerRep register) `thenNat` \ tmp ->
3071           let code = registerCode register tmp
3072               src  = registerName register tmp
3073               pk   = registerRep register
3074           in
3075           -- the value is in src.  Get it into 1 or 2 int vregs.
3076           case pk of
3077              F64 ->
3078                 getNewRegNat WordRep  `thenNat` \ v1 ->
3079                 getNewRegNat WordRep  `thenNat` \ v2 ->
3080                 return (
3081                    code                          `snocOL`
3082                    FMOV DF src f0                `snocOL`
3083                    ST   F  f0 (spRel 16)         `snocOL`
3084                    LD   W  (spRel 16) v1         `snocOL`
3085                    ST   F  (fPair f0) (spRel 16) `snocOL`
3086                    LD   W  (spRel 16) v2
3087                    ,
3088                    [v1,v2]
3089                 )
3090              F32 ->
3091                 getNewRegNat WordRep  `thenNat` \ v1 ->
3092                 return (
3093                    code                    `snocOL`
3094                    ST   F  src (spRel 16)  `snocOL`
3095                    LD   W  (spRel 16) v1
3096                    ,
3097                    [v1]
3098                 )
3099              other ->
3100                 getNewRegNat WordRep  `thenNat` \ v1 ->
3101                 return (
3102                    code `snocOL` OR False g0 (RIReg src) v1
3103                    ,
3104                    [v1]
3105                 )
3106 #endif /* sparc_TARGET_ARCH */
3107
3108 #if powerpc_TARGET_ARCH
3109
3110 #if darwin_TARGET_OS || linux_TARGET_OS
3111 {-
3112     The PowerPC calling convention for Darwin/Mac OS X
3113     is described in Apple's document
3114     "Inside Mac OS X - Mach-O Runtime Architecture".
3115
3116     PowerPC Linux uses the System V Release 4 Calling Convention
3117     for PowerPC. It is described in the
3118     "System V Application Binary Interface PowerPC Processor Supplement".
3119
3120     Both conventions are similar:
3121     Parameters may be passed in general-purpose registers starting at r3, in
3122     floating point registers starting at f1, or on the stack.
3123
3124     But there are substantial differences:
3125     * The number of registers used for parameter passing and the exact set of
3126       nonvolatile registers differs (see MachRegs.lhs).
3127     * On Darwin, stack space is always reserved for parameters, even if they are
3128       passed in registers. The called routine may choose to save parameters from
3129       registers to the corresponding space on the stack.
3130     * On Darwin, a corresponding amount of GPRs is skipped when a floating point
3131       parameter is passed in an FPR.
3132     * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
3133       starting with an odd-numbered GPR. It may skip a GPR to achieve this.
3134       Darwin just treats an I64 like two separate I32s (high word first).
3135     * I64 and F64 arguments are 8-byte aligned on the stack for SysV, but only
3136       4-byte aligned like everything else on Darwin.
3137     * The SysV spec claims that F32 is represented as F64 on the stack. GCC on
3138       PowerPC Linux does not agree, so neither do we.
3139
3140     According to both conventions, The parameter area should be part of the
3141     caller's stack frame, allocated in the caller's prologue code (large enough
3142     to hold the parameter lists for all called routines). The NCG already
3143     uses the stack for register spilling, leaving 64 bytes free at the top.
3144     If we need a larger parameter area than that, we just allocate a new stack
3145     frame just before ccalling.
3146 -}
3147
3148 genCCall target dest_regs argsAndHints vols
3149   = ASSERT (not $ any (`elem` [I8,I16]) argReps)
3150         -- we rely on argument promotion in the codeGen
3151     do
3152         (finalStack,passArgumentsCode,usedRegs) <- passArguments
3153                                                         (zip args argReps)
3154                                                         allArgRegs allFPArgRegs
3155                                                         initialStackOffset
3156                                                         (toOL []) []
3157
3158         (labelOrExpr, reduceToF32) <- case target of
3159             CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
3160             CmmForeignCall expr conv -> return  (Right expr, False)
3161             CmmPrim mop -> outOfLineFloatOp mop
3162
3163         let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
3164             codeAfter = move_sp_up finalStack `appOL` moveResult reduceToF32
3165
3166         case labelOrExpr of
3167             Left lbl -> do
3168                 return (         codeBefore
3169                         `snocOL` BL lbl usedRegs
3170                         `appOL`  codeAfter)
3171             Right dyn -> do
3172                 (dynReg, dynCode) <- getSomeReg dyn
3173                 return (         dynCode
3174                         `snocOL` MTCTR dynReg
3175                         `appOL`  codeBefore
3176                         `snocOL` BCTRL usedRegs
3177                         `appOL`  codeAfter)
3178     where
3179 #if darwin_TARGET_OS
3180         initialStackOffset = 24
3181             -- size of linkage area + size of arguments, in bytes
3182         stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
3183                                        map machRepByteWidth argReps
3184 #elif linux_TARGET_OS
3185         initialStackOffset = 8
3186         stackDelta finalStack = roundTo 16 finalStack
3187 #endif
3188         args = map fst argsAndHints
3189         argReps = map cmmExprRep args
3190
3191         roundTo a x | x `mod` a == 0 = x
3192                     | otherwise = x + a - (x `mod` a)
3193
3194         move_sp_down finalStack
3195                | delta > 64 =
3196                         toOL [STU I32 sp (AddrRegImm sp (ImmInt (-delta))),
3197                               DELTA (-delta)]
3198                | otherwise = nilOL
3199                where delta = stackDelta finalStack
3200         move_sp_up finalStack
3201                | delta > 64 =
3202                         toOL [ADD sp sp (RIImm (ImmInt delta)),
3203                               DELTA 0]
3204                | otherwise = nilOL
3205                where delta = stackDelta finalStack
3206
3207
3208         passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
3209         passArguments ((arg,I64):args) gprs fprs stackOffset
3210                accumCode accumUsed =
3211             do
3212                 ChildCode64 code vr_lo <- iselExpr64 arg
3213                 let vr_hi = getHiVRegFromLo vr_lo
3214
3215 #if darwin_TARGET_OS
3216                 passArguments args
3217                               (drop 2 gprs)
3218                               fprs
3219                               (stackOffset+8)
3220                               (accumCode `appOL` code
3221                                     `snocOL` storeWord vr_hi gprs stackOffset
3222                                     `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
3223                               ((take 2 gprs) ++ accumUsed)
3224             where
3225                 storeWord vr (gpr:_) offset = MR gpr vr
3226                 storeWord vr [] offset = ST I32 vr (AddrRegImm sp (ImmInt offset))
3227
3228 #elif linux_TARGET_OS
3229                 let stackOffset' = roundTo 8 stackOffset
3230                     stackCode = accumCode `appOL` code
3231                         `snocOL` ST I32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
3232                         `snocOL` ST I32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
3233                     regCode hireg loreg =
3234                         accumCode `appOL` code
3235                             `snocOL` MR hireg vr_hi
3236                             `snocOL` MR loreg vr_lo
3237
3238                 case gprs of
3239                     hireg : loreg : regs | even (length gprs) ->
3240                         passArguments args regs fprs stackOffset
3241                                       (regCode hireg loreg) (hireg : loreg : accumUsed)
3242                     _skipped : hireg : loreg : regs ->
3243                         passArguments args regs fprs stackOffset
3244                                       (regCode hireg loreg) (hireg : loreg : accumUsed)
3245                     _ -> -- only one or no regs left
3246                         passArguments args [] fprs (stackOffset'+8)
3247                                       stackCode accumUsed
3248 #endif
3249
3250         passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
3251             | reg : _ <- regs = do
3252                 register <- getRegister arg
3253                 let code = case register of
3254                             Fixed _ freg fcode -> fcode `snocOL` MR reg freg
3255                             Any _ acode -> acode reg
3256                 passArguments args
3257                               (drop nGprs gprs)
3258                               (drop nFprs fprs)
3259 #if darwin_TARGET_OS
3260         -- The Darwin ABI requires that we reserve stack slots for register parameters
3261                               (stackOffset + stackBytes)
3262 #elif linux_TARGET_OS
3263         -- ... the SysV ABI doesn't.
3264                               stackOffset
3265 #endif
3266                               (accumCode `appOL` code)
3267                               (reg : accumUsed)
3268             | otherwise = do
3269                 (vr, code) <- getSomeReg arg
3270                 passArguments args
3271                               (drop nGprs gprs)
3272                               (drop nFprs fprs)
3273                               (stackOffset' + stackBytes)
3274                               (accumCode `appOL` code `snocOL` ST rep vr stackSlot)
3275                               accumUsed
3276             where
3277 #if darwin_TARGET_OS
3278         -- stackOffset is at least 4-byte aligned
3279         -- The Darwin ABI is happy with that.
3280                 stackOffset' = stackOffset
3281 #else
3282         -- ... the SysV ABI requires 8-byte alignment for doubles.
3283                 stackOffset' | rep == F64 = roundTo 8 stackOffset
3284                              | otherwise  =           stackOffset
3285 #endif
3286                 stackSlot = AddrRegImm sp (ImmInt stackOffset')
3287                 (nGprs, nFprs, stackBytes, regs) = case rep of
3288                     I32 -> (1, 0, 4, gprs)
3289 #if darwin_TARGET_OS
3290         -- The Darwin ABI requires that we skip a corresponding number of GPRs when
3291         -- we use the FPRs.
3292                     F32 -> (1, 1, 4, fprs)
3293                     F64 -> (2, 1, 8, fprs)
3294 #elif linux_TARGET_OS
3295         -- ... the SysV ABI doesn't.
3296                     F32 -> (0, 1, 4, fprs)
3297                     F64 -> (0, 1, 8, fprs)
3298 #endif
3299
3300         moveResult reduceToF32 =
3301             case dest_regs of
3302                 [] -> nilOL
3303                 [(dest, _hint)]
3304                     | reduceToF32 && rep == F32 -> unitOL (FRSP r_dest f1)
3305                     | rep == F32 || rep == F64 -> unitOL (MR r_dest f1)
3306                     | rep == I64 -> toOL [MR (getHiVRegFromLo r_dest) r3,
3307                                           MR r_dest r4]
3308                     | otherwise -> unitOL (MR r_dest r3)
3309                     where rep = cmmRegRep dest
3310                           r_dest = getRegisterReg dest
3311
3312         outOfLineFloatOp mop =
3313             do
3314                 mopExpr <- cmmMakeDynamicReference addImportNat True $
3315                               mkForeignLabel functionName Nothing True
3316                 let mopLabelOrExpr = case mopExpr of
3317                         CmmLit (CmmLabel lbl) -> Left lbl
3318                         _ -> Right mopExpr
3319                 return (mopLabelOrExpr, reduce)
3320             where
3321                 (functionName, reduce) = case mop of
3322                     MO_F32_Exp   -> (FSLIT("exp"), True)
3323                     MO_F32_Log   -> (FSLIT("log"), True)
3324                     MO_F32_Sqrt  -> (FSLIT("sqrt"), True)
3325
3326                     MO_F32_Sin   -> (FSLIT("sin"), True)
3327                     MO_F32_Cos   -> (FSLIT("cos"), True)
3328                     MO_F32_Tan   -> (FSLIT("tan"), True)
3329
3330                     MO_F32_Asin  -> (FSLIT("asin"), True)
3331                     MO_F32_Acos  -> (FSLIT("acos"), True)
3332                     MO_F32_Atan  -> (FSLIT("atan"), True)
3333
3334                     MO_F32_Sinh  -> (FSLIT("sinh"), True)
3335                     MO_F32_Cosh  -> (FSLIT("cosh"), True)
3336                     MO_F32_Tanh  -> (FSLIT("tanh"), True)
3337                     MO_F32_Pwr   -> (FSLIT("pow"), True)
3338
3339                     MO_F64_Exp   -> (FSLIT("exp"), False)
3340                     MO_F64_Log   -> (FSLIT("log"), False)
3341                     MO_F64_Sqrt  -> (FSLIT("sqrt"), False)
3342
3343                     MO_F64_Sin   -> (FSLIT("sin"), False)
3344                     MO_F64_Cos   -> (FSLIT("cos"), False)
3345                     MO_F64_Tan   -> (FSLIT("tan"), False)
3346
3347                     MO_F64_Asin  -> (FSLIT("asin"), False)
3348                     MO_F64_Acos  -> (FSLIT("acos"), False)
3349                     MO_F64_Atan  -> (FSLIT("atan"), False)
3350
3351                     MO_F64_Sinh  -> (FSLIT("sinh"), False)
3352                     MO_F64_Cosh  -> (FSLIT("cosh"), False)
3353                     MO_F64_Tanh  -> (FSLIT("tanh"), False)
3354                     MO_F64_Pwr   -> (FSLIT("pow"), False)
3355                     other -> pprPanic "genCCall(ppc): unknown callish op"
3356                                     (pprCallishMachOp other)
3357
3358 #endif /* darwin_TARGET_OS || linux_TARGET_OS */
3359
3360 #endif /* powerpc_TARGET_ARCH */
3361
3362
3363 -- -----------------------------------------------------------------------------
3364 -- Generating a table-branch
3365
3366 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
3367
3368 #if i386_TARGET_ARCH
3369 genSwitch expr ids = do
3370   (reg,e_code) <- getSomeReg expr
3371   lbl <- getNewLabelNat
3372   let
3373         jumpTable = map jumpTableEntry ids
3374         op = OpAddr (AddrBaseIndex Nothing (Just (reg,4)) (ImmCLbl lbl))
3375         code = e_code `appOL` toOL [
3376                 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3377                 JMP_TBL op [ id | Just id <- ids ]
3378              ]
3379   -- in
3380   return code
3381 #elif powerpc_TARGET_ARCH
3382 genSwitch expr ids
3383   | opt_PIC
3384   = do
3385         (reg,e_code) <- getSomeReg expr
3386         tmp <- getNewRegNat I32
3387         lbl <- getNewLabelNat
3388         dynRef <- cmmMakeDynamicReference addImportNat False lbl
3389         (tableReg,t_code) <- getSomeReg $ dynRef
3390         let
3391             jumpTable = map jumpTableEntry ids
3392
3393             code = e_code `appOL` t_code `appOL` toOL [
3394                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3395                             SLW tmp reg (RIImm (ImmInt 2)),
3396                             LD I32 tmp (AddrRegReg tableReg tmp),
3397                             MTCTR tmp,
3398                             BCTR [ id | Just id <- ids ]
3399                     ]
3400         return code
3401   | otherwise
3402   = do
3403         (reg,e_code) <- getSomeReg expr
3404         tmp <- getNewRegNat I32
3405         lbl <- getNewLabelNat
3406         let
3407             jumpTable = map jumpTableEntry ids
3408
3409             code = e_code `appOL` toOL [
3410                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3411                             SLW tmp reg (RIImm (ImmInt 2)),
3412                             ADDIS tmp tmp (HA (ImmCLbl lbl)),
3413                             LD I32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
3414                             MTCTR tmp,
3415                             BCTR [ id | Just id <- ids ]
3416                     ]
3417         return code
3418 #else
3419 genSwitch expr ids = panic "ToDo: genSwitch"
3420 #endif
3421
3422 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordRep)
3423 jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
3424     where blockLabel = mkAsmTempLabel id
3425
3426 -- -----------------------------------------------------------------------------
3427 -- Support bits
3428 -- -----------------------------------------------------------------------------
3429
3430
3431 -- -----------------------------------------------------------------------------
3432 -- 'condIntReg' and 'condFltReg': condition codes into registers
3433
3434 -- Turn those condition codes into integers now (when they appear on
3435 -- the right hand side of an assignment).
3436 --
3437 -- (If applicable) Do not fill the delay slots here; you will confuse the
3438 -- register allocator.
3439
3440 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
3441
3442 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3443
3444 #if alpha_TARGET_ARCH
3445 condIntReg = panic "MachCode.condIntReg (not on Alpha)"
3446 condFltReg = panic "MachCode.condFltReg (not on Alpha)"
3447 #endif /* alpha_TARGET_ARCH */
3448
3449 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3450
3451 #if i386_TARGET_ARCH
3452
3453 condIntReg cond x y = do
3454   CondCode _ cond cond_code <- condIntCode cond x y
3455   tmp <- getNewRegNat I8
3456   let
3457         code dst = cond_code `appOL` toOL [
3458                     SETCC cond (OpReg tmp),
3459                     MOV I32 (OpReg tmp) (OpReg dst),
3460                     AND I32 (OpImm (ImmInt 1)) (OpReg dst)
3461                   ]
3462                    -- NB. (1) Tha AND is needed here because the x86 only
3463                    -- sets the low byte in the SETCC instruction.
3464                    -- NB. (2) The extra temporary register is a hack to
3465                    -- work around the fact that the setcc instructions only
3466                    -- accept byte registers.  dst might not be a byte-able reg,
3467                    -- but currently all free registers are byte-able, so we're
3468                    -- guaranteed that a new temporary is byte-able.
3469   -- in
3470   return (Any I32 code)
3471
3472
3473 condFltReg cond x y = do
3474   lbl1 <- getBlockIdNat
3475   lbl2 <- getBlockIdNat
3476   CondCode _ cond cond_code <- condFltCode cond x y
3477   let
3478         code dst = cond_code `appOL` toOL [
3479             JXX cond lbl1,
3480             MOV I32 (OpImm (ImmInt 0)) (OpReg dst),
3481             JXX ALWAYS lbl2,
3482             NEWBLOCK lbl1,
3483             MOV I32 (OpImm (ImmInt 1)) (OpReg dst),
3484             JXX ALWAYS lbl2,
3485             NEWBLOCK lbl2]
3486                 -- SIGH, have to split up this block somehow...
3487   -- in
3488   return (Any I32 code)
3489
3490 #endif /* i386_TARGET_ARCH */
3491
3492 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3493
3494 #if sparc_TARGET_ARCH
3495
3496 condIntReg EQQ x (StInt 0)
3497   = getRegister x               `thenNat` \ register ->
3498     getNewRegNat IntRep         `thenNat` \ tmp ->
3499     let
3500         code = registerCode register tmp
3501         src  = registerName register tmp
3502         code__2 dst = code `appOL` toOL [
3503             SUB False True g0 (RIReg src) g0,
3504             SUB True False g0 (RIImm (ImmInt (-1))) dst]
3505     in
3506     return (Any IntRep code__2)
3507
3508 condIntReg EQQ x y
3509   = getRegister x               `thenNat` \ register1 ->
3510     getRegister y               `thenNat` \ register2 ->
3511     getNewRegNat IntRep         `thenNat` \ tmp1 ->
3512     getNewRegNat IntRep         `thenNat` \ tmp2 ->
3513     let
3514         code1 = registerCode register1 tmp1
3515         src1  = registerName register1 tmp1
3516         code2 = registerCode register2 tmp2
3517         src2  = registerName register2 tmp2
3518         code__2 dst = code1 `appOL` code2 `appOL` toOL [
3519             XOR False src1 (RIReg src2) dst,
3520             SUB False True g0 (RIReg dst) g0,
3521             SUB True False g0 (RIImm (ImmInt (-1))) dst]
3522     in
3523     return (Any IntRep code__2)
3524
3525 condIntReg NE x (StInt 0)
3526   = getRegister x               `thenNat` \ register ->
3527     getNewRegNat IntRep         `thenNat` \ tmp ->
3528     let
3529         code = registerCode register tmp
3530         src  = registerName register tmp
3531         code__2 dst = code `appOL` toOL [
3532             SUB False True g0 (RIReg src) g0,
3533             ADD True False g0 (RIImm (ImmInt 0)) dst]
3534     in
3535     return (Any IntRep code__2)
3536
3537 condIntReg NE x y
3538   = getRegister x               `thenNat` \ register1 ->
3539     getRegister y               `thenNat` \ register2 ->
3540     getNewRegNat IntRep         `thenNat` \ tmp1 ->
3541     getNewRegNat IntRep         `thenNat` \ tmp2 ->
3542     let
3543         code1 = registerCode register1 tmp1
3544         src1  = registerName register1 tmp1
3545         code2 = registerCode register2 tmp2
3546         src2  = registerName register2 tmp2
3547         code__2 dst = code1 `appOL` code2 `appOL` toOL [
3548             XOR False src1 (RIReg src2) dst,
3549             SUB False True g0 (RIReg dst) g0,
3550             ADD True False g0 (RIImm (ImmInt 0)) dst]
3551     in
3552     return (Any IntRep code__2)
3553
3554 condIntReg cond x y
3555   = getBlockIdNat               `thenNat` \ lbl1 ->
3556     getBlockIdNat               `thenNat` \ lbl2 ->
3557     condIntCode cond x y        `thenNat` \ condition ->
3558     let
3559         code = condCode condition
3560         cond = condName condition
3561         code__2 dst = code `appOL` toOL [
3562             BI cond False (ImmCLbl lbl1), NOP,
3563             OR False g0 (RIImm (ImmInt 0)) dst,
3564             BI ALWAYS False (ImmCLbl lbl2), NOP,
3565             NEWBLOCK lbl1,
3566             OR False g0 (RIImm (ImmInt 1)) dst,
3567             NEWBLOCK lbl2]
3568     in
3569     return (Any IntRep code__2)
3570
3571 condFltReg cond x y
3572   = getBlockIdNat               `thenNat` \ lbl1 ->
3573     getBlockIdNat               `thenNat` \ lbl2 ->
3574     condFltCode cond x y        `thenNat` \ condition ->
3575     let
3576         code = condCode condition
3577         cond = condName condition
3578         code__2 dst = code `appOL` toOL [
3579             NOP,
3580             BF cond False (ImmCLbl lbl1), NOP,
3581             OR False g0 (RIImm (ImmInt 0)) dst,
3582             BI ALWAYS False (ImmCLbl lbl2), NOP,
3583             NEWBLOCK lbl1,
3584             OR False g0 (RIImm (ImmInt 1)) dst,
3585             NEWBLOCK lbl2]
3586     in
3587     return (Any IntRep code__2)
3588
3589 #endif /* sparc_TARGET_ARCH */
3590
3591 #if powerpc_TARGET_ARCH
3592 condReg getCond = do
3593     lbl1 <- getBlockIdNat
3594     lbl2 <- getBlockIdNat
3595     CondCode _ cond cond_code <- getCond
3596     let
3597 {-        code dst = cond_code `appOL` toOL [
3598                 BCC cond lbl1,
3599                 LI dst (ImmInt 0),
3600                 BCC ALWAYS lbl2,
3601                 NEWBLOCK lbl1,
3602                 LI dst (ImmInt 1),
3603                 BCC ALWAYS lbl2,
3604                 NEWBLOCK lbl2
3605             ]-}
3606         code dst = cond_code
3607             `appOL` negate_code
3608             `appOL` toOL [
3609                 MFCR dst,
3610                 RLWINM dst dst (bit + 1) 31 31
3611             ]
3612
3613         negate_code | do_negate = unitOL (CRNOR bit bit bit)
3614                     | otherwise = nilOL
3615
3616         (bit, do_negate) = case cond of
3617             LTT -> (0, False)
3618             LE  -> (1, True)
3619             EQQ -> (2, False)
3620             GE  -> (0, True)
3621             GTT -> (1, False)
3622
3623             NE  -> (2, True)
3624
3625             LU  -> (0, False)
3626             LEU -> (1, True)
3627             GEU -> (0, True)
3628             GU  -> (1, False)
3629
3630     return (Any I32 code)
3631
3632 condIntReg cond x y = condReg (condIntCode cond x y)
3633 condFltReg cond x y = condReg (condFltCode cond x y)
3634 #endif /* powerpc_TARGET_ARCH */
3635
3636
3637 -- -----------------------------------------------------------------------------
3638 -- 'trivial*Code': deal with trivial instructions
3639
3640 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
3641 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
3642 -- Only look for constants on the right hand side, because that's
3643 -- where the generic optimizer will have put them.
3644
3645 -- Similarly, for unary instructions, we don't have to worry about
3646 -- matching an StInt as the argument, because genericOpt will already
3647 -- have handled the constant-folding.
3648
3649 trivialCode
3650     :: MachRep
3651     -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
3652       ,IF_ARCH_i386 ((Operand -> Operand -> Instr)
3653                      -> Maybe (Operand -> Operand -> Instr)
3654       ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
3655       ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
3656       ,))))
3657     -> CmmExpr -> CmmExpr -- the two arguments
3658     -> NatM Register
3659
3660 #ifndef powerpc_TARGET_ARCH
3661 trivialFCode
3662     :: MachRep
3663     -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
3664       ,IF_ARCH_sparc((Size -> Reg -> Reg -> Reg -> Instr)
3665       ,IF_ARCH_i386 ((MachRep -> Reg -> Reg -> Reg -> Instr)
3666       ,)))
3667     -> CmmExpr -> CmmExpr -- the two arguments
3668     -> NatM Register
3669 #endif
3670
3671 trivialUCode
3672     :: MachRep
3673     -> IF_ARCH_alpha((RI -> Reg -> Instr)
3674       ,IF_ARCH_i386 ((Operand -> Instr)
3675       ,IF_ARCH_sparc((RI -> Reg -> Instr)
3676       ,IF_ARCH_powerpc((Reg -> Reg -> Instr)
3677       ,))))
3678     -> CmmExpr  -- the one argument
3679     -> NatM Register
3680
3681 #ifndef powerpc_TARGET_ARCH
3682 trivialUFCode
3683     :: MachRep
3684     -> IF_ARCH_alpha((Reg -> Reg -> Instr)
3685       ,IF_ARCH_i386 ((Reg -> Reg -> Instr)
3686       ,IF_ARCH_sparc((Reg -> Reg -> Instr)
3687       ,)))
3688     -> CmmExpr -- the one argument
3689     -> NatM Register
3690 #endif
3691
3692 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3693
3694 #if alpha_TARGET_ARCH
3695
3696 trivialCode instr x (StInt y)
3697   | fits8Bits y
3698   = getRegister x               `thenNat` \ register ->
3699     getNewRegNat IntRep         `thenNat` \ tmp ->
3700     let
3701         code = registerCode register tmp
3702         src1 = registerName register tmp
3703         src2 = ImmInt (fromInteger y)
3704         code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst)
3705     in
3706     return (Any IntRep code__2)
3707
3708 trivialCode instr x y
3709   = getRegister x               `thenNat` \ register1 ->
3710     getRegister y               `thenNat` \ register2 ->
3711     getNewRegNat IntRep         `thenNat` \ tmp1 ->
3712     getNewRegNat IntRep         `thenNat` \ tmp2 ->
3713     let
3714         code1 = registerCode register1 tmp1 []
3715         src1  = registerName register1 tmp1
3716         code2 = registerCode register2 tmp2 []
3717         src2  = registerName register2 tmp2
3718         code__2 dst = asmSeqThen [code1, code2] .
3719                      mkSeqInstr (instr src1 (RIReg src2) dst)
3720     in
3721     return (Any IntRep code__2)
3722
3723 ------------
3724 trivialUCode instr x
3725   = getRegister x               `thenNat` \ register ->
3726     getNewRegNat IntRep         `thenNat` \ tmp ->
3727     let
3728         code = registerCode register tmp
3729         src  = registerName register tmp
3730         code__2 dst = code . mkSeqInstr (instr (RIReg src) dst)
3731     in
3732     return (Any IntRep code__2)
3733
3734 ------------
3735 trivialFCode _ instr x y
3736   = getRegister x               `thenNat` \ register1 ->
3737     getRegister y               `thenNat` \ register2 ->
3738     getNewRegNat F64    `thenNat` \ tmp1 ->
3739     getNewRegNat F64    `thenNat` \ tmp2 ->
3740     let
3741         code1 = registerCode register1 tmp1
3742         src1  = registerName register1 tmp1
3743
3744         code2 = registerCode register2 tmp2
3745         src2  = registerName register2 tmp2
3746
3747         code__2 dst = asmSeqThen [code1 [], code2 []] .
3748                       mkSeqInstr (instr src1 src2 dst)
3749     in
3750     return (Any F64 code__2)
3751
3752 trivialUFCode _ instr x
3753   = getRegister x               `thenNat` \ register ->
3754     getNewRegNat F64    `thenNat` \ tmp ->
3755     let
3756         code = registerCode register tmp
3757         src  = registerName register tmp
3758         code__2 dst = code . mkSeqInstr (instr src dst)
3759     in
3760     return (Any F64 code__2)
3761
3762 #endif /* alpha_TARGET_ARCH */
3763
3764 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3765
3766 #if i386_TARGET_ARCH
3767
3768 {-
3769 The Rules of the Game are:
3770
3771 * You cannot assume anything about the destination register dst;
3772   it may be anything, including a fixed reg.
3773
3774 * You may compute an operand into a fixed reg, but you may not
3775   subsequently change the contents of that fixed reg.  If you
3776   want to do so, first copy the value either to a temporary
3777   or into dst.  You are free to modify dst even if it happens
3778   to be a fixed reg -- that's not your problem.
3779
3780 * You cannot assume that a fixed reg will stay live over an
3781   arbitrary computation.  The same applies to the dst reg.
3782
3783 * Temporary regs obtained from getNewRegNat are distinct from
3784   each other and from all other regs, and stay live over
3785   arbitrary computations.
3786
3787 --------------------
3788
3789 SDM's version of The Rules:
3790
3791 * If getRegister returns Any, that means it can generate correct
3792   code which places the result in any register, period.  Even if that
3793   register happens to be read during the computation.
3794
3795   Corollary #1: this means that if you are generating code for an
3796   operation with two arbitrary operands, you cannot assign the result
3797   of the first operand into the destination register before computing
3798   the second operand.  The second operand might require the old value
3799   of the destination register.
3800
3801   Corollary #2: A function might be able to generate more efficient
3802   code if it knows the destination register is a new temporary (and
3803   therefore not read by any of the sub-computations).
3804
3805 * If getRegister returns Any, then the code it generates may modify only:
3806         (a) fresh temporaries
3807         (b) the destination register
3808         (c) known registers (eg. %ecx is used by shifts)
3809   In particular, it may *not* modify global registers, unless the global
3810   register happens to be the destination register.
3811 -}
3812
3813 trivialCode rep instr maybe_revinstr a (CmmLit lit_b) = do
3814   a_code <- getAnyReg a
3815   let
3816        code dst
3817           = a_code dst `snocOL`
3818             instr (OpImm (litToImm lit_b)) (OpReg dst)
3819   -- in
3820   return (Any rep code)
3821
3822 trivialCode rep instr (Just revinstr) (CmmLit lit_a) b = do
3823   b_code <- getAnyReg b
3824   let
3825        code dst
3826          = b_code dst `snocOL`
3827            revinstr (OpImm (litToImm lit_a)) (OpReg dst)
3828   -- in
3829   return (Any rep code)
3830
3831 trivialCode rep instr maybe_revinstr a b = do
3832   (b_op, b_code) <- getOperand b
3833   a_code <- getAnyReg a
3834   tmp <- getNewRegNat rep
3835   let
3836      -- We want the value of b to stay alive across the computation of a.
3837      -- But, we want to calculate a straight into the destination register,
3838      -- because the instruction only has two operands (dst := dst `op` src).
3839      -- The troublesome case is when the result of b is in the same register
3840      -- as the destination reg.  In this case, we have to save b in a
3841      -- new temporary across the computation of a.
3842      code dst
3843         | dst `clashesWith` b_op =
3844                 b_code `appOL`
3845                 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
3846                 a_code dst `snocOL`
3847                 instr (OpReg tmp) (OpReg dst)
3848         | otherwise =
3849                 b_code `appOL`
3850                 a_code dst `snocOL`
3851                 instr b_op (OpReg dst)
3852   -- in
3853   return (Any rep code)
3854  where
3855   reg `clashesWith` OpReg reg2   = reg == reg2
3856   reg `clashesWith` OpAddr amode = any (==reg) (addrModeRegs amode)
3857
3858 -----------
3859
3860 trivialUCode rep instr x = do
3861   x_code <- getAnyReg x
3862   let
3863      code dst =
3864         x_code dst `snocOL`
3865         instr (OpReg dst)
3866   -- in
3867   return (Any rep code)
3868
3869 -----------
3870
3871 trivialFCode pk instr x y = do
3872   (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
3873   (y_reg, y_code) <- getSomeReg y
3874   let
3875      code dst =
3876         x_code `appOL`
3877         y_code `snocOL`
3878         instr pk x_reg y_reg dst
3879   -- in
3880   return (Any pk code)
3881
3882 -------------
3883
3884 trivialUFCode rep instr x = do
3885   (x_reg, x_code) <- getSomeReg x
3886   let
3887      code dst =
3888         x_code `snocOL`
3889         instr x_reg dst
3890   -- in
3891   return (Any rep code)
3892
3893 #endif /* i386_TARGET_ARCH */
3894
3895 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3896
3897 #if sparc_TARGET_ARCH
3898
3899 trivialCode instr x (StInt y)
3900   | fits13Bits y
3901   = getRegister x               `thenNat` \ register ->
3902     getNewRegNat IntRep         `thenNat` \ tmp ->
3903     let
3904         code = registerCode register tmp
3905         src1 = registerName register tmp
3906         src2 = ImmInt (fromInteger y)
3907         code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
3908     in
3909     return (Any IntRep code__2)
3910
3911 trivialCode instr x y
3912   = getRegister x               `thenNat` \ register1 ->
3913     getRegister y               `thenNat` \ register2 ->
3914     getNewRegNat IntRep         `thenNat` \ tmp1 ->
3915     getNewRegNat IntRep         `thenNat` \ tmp2 ->
3916     let
3917         code1 = registerCode register1 tmp1
3918         src1  = registerName register1 tmp1
3919         code2 = registerCode register2 tmp2
3920         src2  = registerName register2 tmp2
3921         code__2 dst = code1 `appOL` code2 `snocOL`
3922                       instr src1 (RIReg src2) dst
3923     in
3924     return (Any IntRep code__2)
3925
3926 ------------
3927 trivialFCode pk instr x y
3928   = getRegister x               `thenNat` \ register1 ->
3929     getRegister y               `thenNat` \ register2 ->
3930     getNewRegNat (registerRep register1)
3931                                 `thenNat` \ tmp1 ->
3932     getNewRegNat (registerRep register2)
3933                                 `thenNat` \ tmp2 ->
3934     getNewRegNat F64    `thenNat` \ tmp ->
3935     let
3936         promote x = FxTOy F DF x tmp
3937
3938         pk1   = registerRep register1
3939         code1 = registerCode register1 tmp1
3940         src1  = registerName register1 tmp1
3941
3942         pk2   = registerRep register2
3943         code2 = registerCode register2 tmp2
3944         src2  = registerName register2 tmp2
3945
3946         code__2 dst =
3947                 if pk1 == pk2 then
3948                     code1 `appOL` code2 `snocOL`
3949                     instr (primRepToSize pk) src1 src2 dst
3950                 else if pk1 == F32 then
3951                     code1 `snocOL` promote src1 `appOL` code2 `snocOL`
3952                     instr DF tmp src2 dst
3953                 else
3954                     code1 `appOL` code2 `snocOL` promote src2 `snocOL`
3955                     instr DF src1 tmp dst
3956     in
3957     return (Any (if pk1 == pk2 then pk1 else F64) code__2)
3958
3959 ------------
3960 trivialUCode instr x
3961   = getRegister x               `thenNat` \ register ->
3962     getNewRegNat IntRep         `thenNat` \ tmp ->
3963     let
3964         code = registerCode register tmp
3965         src  = registerName register tmp
3966         code__2 dst = code `snocOL` instr (RIReg src) dst
3967     in
3968     return (Any IntRep code__2)
3969
3970 -------------
3971 trivialUFCode pk instr x
3972   = getRegister x               `thenNat` \ register ->
3973     getNewRegNat pk             `thenNat` \ tmp ->
3974     let
3975         code = registerCode register tmp
3976         src  = registerName register tmp
3977         code__2 dst = code `snocOL` instr src dst
3978     in
3979     return (Any pk code__2)
3980
3981 #endif /* sparc_TARGET_ARCH */
3982
3983 #if powerpc_TARGET_ARCH
3984
3985 {-
3986 Wolfgang's PowerPC version of The Rules:
3987
3988 A slightly modified version of The Rules to take advantage of the fact
3989 that PowerPC instructions work on all registers and don't implicitly
3990 clobber any fixed registers.
3991
3992 * The only expression for which getRegister returns Fixed is (CmmReg reg).
3993
3994 * If getRegister returns Any, then the code it generates may modify only:
3995         (a) fresh temporaries
3996         (b) the destination register
3997   It may *not* modify global registers, unless the global
3998   register happens to be the destination register.
3999   It may not clobber any other registers. In fact, only ccalls clobber any
4000   fixed registers.
4001   Also, it may not modify the counter register (used by genCCall).
4002
4003   Corollary: If a getRegister for a subexpression returns Fixed, you need
4004   not move it to a fresh temporary before evaluating the next subexpression.
4005   The Fixed register won't be modified.
4006   Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
4007
4008 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
4009   the value of the destination register.
4010 -}
4011
4012 trivialCode rep signed instr x (CmmLit (CmmInt y _))
4013     | Just imm <- makeImmediate rep signed y
4014     = do
4015         (src1, code1) <- getSomeReg x
4016         let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
4017         return (Any rep code)
4018
4019 trivialCode rep signed instr x y = do
4020     (src1, code1) <- getSomeReg x
4021     (src2, code2) <- getSomeReg y
4022     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
4023     return (Any rep code)
4024
4025 trivialCodeNoImm :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4026     -> CmmExpr -> CmmExpr -> NatM Register
4027 trivialCodeNoImm rep instr x y = do
4028     (src1, code1) <- getSomeReg x
4029     (src2, code2) <- getSomeReg y
4030     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
4031     return (Any rep code)
4032
4033 trivialUCode rep instr x = do
4034     (src, code) <- getSomeReg x
4035     let code' dst = code `snocOL` instr dst src
4036     return (Any rep code')
4037
4038 -- There is no "remainder" instruction on the PPC, so we have to do
4039 -- it the hard way.
4040 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
4041
4042 remainderCode :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4043     -> CmmExpr -> CmmExpr -> NatM Register
4044 remainderCode rep div x y = do
4045     (src1, code1) <- getSomeReg x
4046     (src2, code2) <- getSomeReg y
4047     let code dst = code1 `appOL` code2 `appOL` toOL [
4048                 div dst src1 src2,
4049                 MULLW dst dst (RIReg src2),
4050                 SUBF dst dst src1
4051             ]
4052     return (Any rep code)
4053
4054 #endif /* powerpc_TARGET_ARCH */
4055
4056
4057 -- -----------------------------------------------------------------------------
4058 --  Coercing to/from integer/floating-point...
4059
4060 -- @coerce(Int2FP|FP2Int)@ are more complicated integer/float
4061 -- conversions.  We have to store temporaries in memory to move
4062 -- between the integer and the floating point register sets.
4063
4064 -- @coerceDbl2Flt@ and @coerceFlt2Dbl@ are done this way because we
4065 -- pretend, on sparc at least, that double and float regs are seperate
4066 -- kinds, so the value has to be computed into one kind before being
4067 -- explicitly "converted" to live in the other kind.
4068
4069 coerceInt2FP :: MachRep -> MachRep -> CmmExpr -> NatM Register
4070 coerceFP2Int :: MachRep -> MachRep -> CmmExpr -> NatM Register
4071
4072 #ifdef sparc_TARGET_ARCH
4073 coerceDbl2Flt :: CmmExpr -> NatM Register
4074 coerceFlt2Dbl :: CmmExpr -> NatM Register
4075 #endif
4076
4077 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4078
4079 #if alpha_TARGET_ARCH
4080
4081 coerceInt2FP _ x
4082   = getRegister x               `thenNat` \ register ->
4083     getNewRegNat IntRep         `thenNat` \ reg ->
4084     let
4085         code = registerCode register reg
4086         src  = registerName register reg
4087
4088         code__2 dst = code . mkSeqInstrs [
4089             ST Q src (spRel 0),
4090             LD TF dst (spRel 0),
4091             CVTxy Q TF dst dst]
4092     in
4093     return (Any F64 code__2)
4094
4095 -------------
4096 coerceFP2Int x
4097   = getRegister x               `thenNat` \ register ->
4098     getNewRegNat F64    `thenNat` \ tmp ->
4099     let
4100         code = registerCode register tmp
4101         src  = registerName register tmp
4102
4103         code__2 dst = code . mkSeqInstrs [
4104             CVTxy TF Q src tmp,
4105             ST TF tmp (spRel 0),
4106             LD Q dst (spRel 0)]
4107     in
4108     return (Any IntRep code__2)
4109
4110 #endif /* alpha_TARGET_ARCH */
4111
4112 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4113
4114 #if i386_TARGET_ARCH
4115
4116 coerceInt2FP from to x = do
4117   (x_reg, x_code) <- getSomeReg x
4118   let
4119         opc  = case to of F32 -> GITOF; F64 -> GITOD
4120         code dst = x_code `snocOL` opc x_reg dst
4121         -- ToDo: works for non-I32 reps?
4122   -- in
4123   return (Any to code)
4124
4125 ------------
4126
4127 coerceFP2Int from to x = do
4128   (x_reg, x_code) <- getSomeReg x
4129   let
4130         opc  = case from of F32 -> GFTOI; F64 -> GDTOI
4131         code dst = x_code `snocOL` opc x_reg dst
4132         -- ToDo: works for non-I32 reps?
4133   -- in
4134   return (Any to code)
4135
4136 #endif /* i386_TARGET_ARCH */
4137
4138 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4139
4140 #if sparc_TARGET_ARCH
4141
4142 coerceInt2FP pk x
4143   = getRegister x               `thenNat` \ register ->
4144     getNewRegNat IntRep         `thenNat` \ reg ->
4145     let
4146         code = registerCode register reg
4147         src  = registerName register reg
4148
4149         code__2 dst = code `appOL` toOL [
4150             ST W src (spRel (-2)),
4151             LD W (spRel (-2)) dst,
4152             FxTOy W (primRepToSize pk) dst dst]
4153     in
4154     return (Any pk code__2)
4155
4156 ------------
4157 coerceFP2Int fprep x
4158   = ASSERT(fprep == F64 || fprep == F32)
4159     getRegister x               `thenNat` \ register ->
4160     getNewRegNat fprep          `thenNat` \ reg ->
4161     getNewRegNat F32    `thenNat` \ tmp ->
4162     let
4163         code = registerCode register reg
4164         src  = registerName register reg
4165         code__2 dst = code `appOL` toOL [
4166             FxTOy (primRepToSize fprep) W src tmp,
4167             ST W tmp (spRel (-2)),
4168             LD W (spRel (-2)) dst]
4169     in
4170     return (Any IntRep code__2)
4171
4172 ------------
4173 coerceDbl2Flt x
4174   = getRegister x               `thenNat` \ register ->
4175     getNewRegNat F64    `thenNat` \ tmp ->
4176     let code = registerCode register tmp
4177         src  = registerName register tmp
4178     in
4179         return (Any F32
4180                        (\dst -> code `snocOL` FxTOy DF F src dst))
4181
4182 ------------
4183 coerceFlt2Dbl x
4184   = getRegister x               `thenNat` \ register ->
4185     getNewRegNat F32    `thenNat` \ tmp ->
4186     let code = registerCode register tmp
4187         src  = registerName register tmp
4188     in
4189         return (Any F64
4190                        (\dst -> code `snocOL` FxTOy F DF src dst))
4191
4192 #endif /* sparc_TARGET_ARCH */
4193
4194 #if powerpc_TARGET_ARCH
4195 coerceInt2FP fromRep toRep x = do
4196     (src, code) <- getSomeReg x
4197     lbl <- getNewLabelNat
4198     itmp <- getNewRegNat I32
4199     ftmp <- getNewRegNat F64
4200     dynRef <- cmmMakeDynamicReference addImportNat False lbl
4201     Amode addr addr_code <- getAmode dynRef
4202     let
4203         code' dst = code `appOL` maybe_exts `appOL` toOL [
4204                 LDATA ReadOnlyData
4205                                 [CmmDataLabel lbl,
4206                                  CmmStaticLit (CmmInt 0x43300000 I32),
4207                                  CmmStaticLit (CmmInt 0x80000000 I32)],
4208                 XORIS itmp src (ImmInt 0x8000),
4209                 ST I32 itmp (spRel 3),
4210                 LIS itmp (ImmInt 0x4330),
4211                 ST I32 itmp (spRel 2),
4212                 LD F64 ftmp (spRel 2)
4213             ] `appOL` addr_code `appOL` toOL [
4214                 LD F64 dst addr,
4215                 FSUB F64 dst ftmp dst
4216             ] `appOL` maybe_frsp dst
4217
4218         maybe_exts = case fromRep of
4219                         I8 ->  unitOL $ EXTS I8 src src
4220                         I16 -> unitOL $ EXTS I16 src src
4221                         I32 -> nilOL
4222         maybe_frsp dst = case toRep of
4223                         F32 -> unitOL $ FRSP dst dst
4224                         F64 -> nilOL
4225     return (Any toRep code')
4226
4227 coerceFP2Int fromRep toRep x = do
4228     -- the reps don't really matter: F*->F64 and I32->I* are no-ops
4229     (src, code) <- getSomeReg x
4230     tmp <- getNewRegNat F64
4231     let
4232         code' dst = code `appOL` toOL [
4233                 -- convert to int in FP reg
4234             FCTIWZ tmp src,
4235                 -- store value (64bit) from FP to stack
4236             ST F64 tmp (spRel 2),
4237                 -- read low word of value (high word is undefined)
4238             LD I32 dst (spRel 3)]
4239     return (Any toRep code')
4240 #endif /* powerpc_TARGET_ARCH */
4241
4242
4243 -- -----------------------------------------------------------------------------
4244 -- eXTRA_STK_ARGS_HERE
4245
4246 -- We (allegedly) put the first six C-call arguments in registers;
4247 -- where do we start putting the rest of them?
4248
4249 -- Moved from MachInstrs (SDM):
4250
4251 #if alpha_TARGET_ARCH || sparc_TARGET_ARCH
4252 eXTRA_STK_ARGS_HERE :: Int
4253 eXTRA_STK_ARGS_HERE
4254   = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???))
4255 #endif
4256