ghc/compiler/nativeGen/MachCodeGen.hs

   1 -----------------------------------------------------------------------------
   2 --
   3 -- Generating machine code (instruction selection)
   4 --
   5 -- (c) The University of Glasgow 1996-2004
   6 --
   7 -----------------------------------------------------------------------------
   8
   9 -- This is a big module, but, if you pay attention to
  10 -- (a) the sectioning, (b) the type signatures, and
  11 -- (c) the #if blah_TARGET_ARCH} things, the
  12 -- structure should not be too overwhelming.
  13
  14 module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where
  15
  16 #include "HsVersions.h"
  17 #include "nativeGen/NCG.h"
  18
  19 -- NCG stuff:
  20 import MachInstrs
  21 import MachRegs
  22 import NCGMonad
  23 import PositionIndependentCode ( cmmMakeDynamicReference, initializePicBase )
  24
  25 -- Our intermediate code:
  26 import PprCmm           ( pprExpr )
  27 import Cmm
  28 import MachOp
  29 import CLabel
  30
  31 -- The rest:
  32 import StaticFlags      ( opt_PIC )
  33 import ForeignCall      ( CCallConv(..) )
  34 import OrdList
  35 import Pretty
  36 import Outputable
  37 import qualified Outputable
  38 import FastString
  39 import FastTypes        ( isFastTrue )
  40 import Constants        ( wORD_SIZE )
  41
  42 #ifdef DEBUG
  43 import Outputable       ( assertPanic )
  44 import TRACE            ( trace )
  45 #endif
  46
  47 import Control.Monad    ( mapAndUnzipM )
  48 import Maybe            ( fromJust )
  49 import DATA_BITS
  50 import DATA_WORD
  51
  52 -- -----------------------------------------------------------------------------
  53 -- Top-level of the instruction selector
  54
  55 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
  56 -- They are really trees of insns to facilitate fast appending, where a
  57 -- left-to-right traversal (pre-order?) yields the insns in the correct
  58 -- order.
  59
  60 type InstrBlock = OrdList Instr
  61
  62 cmmTopCodeGen :: CmmTop -> NatM [NatCmmTop]
  63 cmmTopCodeGen (CmmProc info lab params blocks) = do
  64   (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
  65   picBaseMb <- getPicBaseMaybeNat
  66   let proc = CmmProc info lab params (concat nat_blocks)
  67       tops = proc : concat statics
  68   case picBaseMb of
  69       Just picBase -> initializePicBase picBase tops
  70       Nothing -> return tops
  71
  72 cmmTopCodeGen (CmmData sec dat) = do
  73   return [CmmData sec dat]  -- no translation, we just use CmmStatic
  74
  75 basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop])
  76 basicBlockCodeGen (BasicBlock id stmts) = do
  77   instrs <- stmtsToInstrs stmts
  78   -- code generation may introduce new basic block boundaries, which
  79   -- are indicated by the NEWBLOCK instruction.  We must split up the
  80   -- instruction stream into basic blocks again.  Also, we extract
  81   -- LDATAs here too.
  82   let
  83         (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
  84
  85         mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
  86           = ([], BasicBlock id instrs : blocks, statics)
  87         mkBlocks (LDATA sec dat) (instrs,blocks,statics)
  88           = (instrs, blocks, CmmData sec dat:statics)
  89         mkBlocks instr (instrs,blocks,statics)
  90           = (instr:instrs, blocks, statics)
  91   -- in
  92   return (BasicBlock id top : other_blocks, statics)
  93
  94 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
  95 stmtsToInstrs stmts
  96    = do instrss <- mapM stmtToInstrs stmts
  97         return (concatOL instrss)
  98
  99 stmtToInstrs :: CmmStmt -> NatM InstrBlock
 100 stmtToInstrs stmt = case stmt of
 101     CmmNop         -> return nilOL
 102     CmmComment s   -> return (unitOL (COMMENT s))
 103
 104     CmmAssign reg src
 105       | isFloatingRep kind -> assignReg_FltCode kind reg src
 106 #if WORD_SIZE_IN_BITS==32
 107       | kind == I64        -> assignReg_I64Code      reg src
 108 #endif
 109       | otherwise          -> assignReg_IntCode kind reg src
 110         where kind = cmmRegRep reg
 111
 112     CmmStore addr src
 113       | isFloatingRep kind -> assignMem_FltCode kind addr src
 114 #if WORD_SIZE_IN_BITS==32
 115       | kind == I64      -> assignMem_I64Code      addr src
 116 #endif
 117       | otherwise        -> assignMem_IntCode kind addr src
 118         where kind = cmmExprRep src
 119
 120     CmmCall target result_regs args vols
 121        -> genCCall target result_regs args vols
 122
 123     CmmBranch id          -> genBranch id
 124     CmmCondBranch arg id  -> genCondJump id arg
 125     CmmSwitch arg ids     -> genSwitch arg ids
 126     CmmJump arg params    -> genJump arg
 127
 128 -- -----------------------------------------------------------------------------
 129 -- General things for putting together code sequences
 130
 131 -- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
 132 -- CmmExprs into CmmRegOff?
 133 mangleIndexTree :: CmmExpr -> CmmExpr
 134 mangleIndexTree (CmmRegOff reg off)
 135   = CmmMachOp (MO_Add rep) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) rep)]
 136   where rep = cmmRegRep reg
 137
 138 -- -----------------------------------------------------------------------------
 139 --  Code gen for 64-bit arithmetic on 32-bit platforms
 140
 141 {-
 142 Simple support for generating 64-bit code (ie, 64 bit values and 64
 143 bit assignments) on 32-bit platforms.  Unlike the main code generator
 144 we merely shoot for generating working code as simply as possible, and
 145 pay little attention to code quality.  Specifically, there is no
 146 attempt to deal cleverly with the fixed-vs-floating register
 147 distinction; all values are generated into (pairs of) floating
 148 registers, even if this would mean some redundant reg-reg moves as a
 149 result.  Only one of the VRegUniques is returned, since it will be
 150 of the VRegUniqueLo form, and the upper-half VReg can be determined
 151 by applying getHiVRegFromLo to it.
 152 -}
 153
 154 data ChildCode64        -- a.k.a "Register64"
 155    = ChildCode64
 156         InstrBlock      -- code
 157         Reg             -- the lower 32-bit temporary which contains the
 158                         -- result; use getHiVRegFromLo to find the other
 159                         -- VRegUnique.  Rules of this simplified insn
 160                         -- selection game are therefore that the returned
 161                         -- Reg may be modified
 162
 163 #if WORD_SIZE_IN_BITS==32
 164 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
 165 assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
 166 #endif
 167
 168 #ifndef x86_64_TARGET_ARCH
 169 iselExpr64        :: CmmExpr -> NatM ChildCode64
 170 #endif
 171
 172 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 173
 174 #if i386_TARGET_ARCH
 175
 176 assignMem_I64Code addrTree valueTree = do
 177   Amode addr addr_code <- getAmode addrTree
 178   ChildCode64 vcode rlo <- iselExpr64 valueTree
 179   let
 180         rhi = getHiVRegFromLo rlo
 181
 182         -- Little-endian store
 183         mov_lo = MOV I32 (OpReg rlo) (OpAddr addr)
 184         mov_hi = MOV I32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
 185   -- in
 186   return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 187
 188
 189 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
 190    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 191    let
 192          r_dst_lo = mkVReg u_dst I32
 193          r_dst_hi = getHiVRegFromLo r_dst_lo
 194          r_src_hi = getHiVRegFromLo r_src_lo
 195          mov_lo = MOV I32 (OpReg r_src_lo) (OpReg r_dst_lo)
 196          mov_hi = MOV I32 (OpReg r_src_hi) (OpReg r_dst_hi)
 197    -- in
 198    return (
 199         vcode `snocOL` mov_lo `snocOL` mov_hi
 200      )
 201
 202 assignReg_I64Code lvalue valueTree
 203    = panic "assignReg_I64Code(i386): invalid lvalue"
 204
 205 ------------
 206
 207 iselExpr64 (CmmLit (CmmInt i _)) = do
 208   (rlo,rhi) <- getNewRegPairNat I32
 209   let
 210         r = fromIntegral (fromIntegral i :: Word32)
 211         q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
 212         code = toOL [
 213                 MOV I32 (OpImm (ImmInteger r)) (OpReg rlo),
 214                 MOV I32 (OpImm (ImmInteger q)) (OpReg rhi)
 215                 ]
 216   -- in
 217   return (ChildCode64 code rlo)
 218
 219 iselExpr64 (CmmLoad addrTree I64) = do
 220    Amode addr addr_code <- getAmode addrTree
 221    (rlo,rhi) <- getNewRegPairNat I32
 222    let
 223         mov_lo = MOV I32 (OpAddr addr) (OpReg rlo)
 224         mov_hi = MOV I32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
 225    -- in
 226    return (
 227             ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
 228                         rlo
 229      )
 230
 231 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64)))
 232    = return (ChildCode64 nilOL (mkVReg vu I32))
 233
 234 -- we handle addition, but rather badly
 235 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
 236    ChildCode64 code1 r1lo <- iselExpr64 e1
 237    (rlo,rhi) <- getNewRegPairNat I32
 238    let
 239         r = fromIntegral (fromIntegral i :: Word32)
 240         q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
 241         r1hi = getHiVRegFromLo r1lo
 242         code =  code1 `appOL`
 243                 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
 244                        ADD I32 (OpImm (ImmInteger r)) (OpReg rlo),
 245                        MOV I32 (OpReg r1hi) (OpReg rhi),
 246                        ADC I32 (OpImm (ImmInteger q)) (OpReg rhi) ]
 247    -- in
 248    return (ChildCode64 code rlo)
 249
 250 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
 251    ChildCode64 code1 r1lo <- iselExpr64 e1
 252    ChildCode64 code2 r2lo <- iselExpr64 e2
 253    (rlo,rhi) <- getNewRegPairNat I32
 254    let
 255         r1hi = getHiVRegFromLo r1lo
 256         r2hi = getHiVRegFromLo r2lo
 257         code =  code1 `appOL`
 258                 code2 `appOL`
 259                 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
 260                        ADD I32 (OpReg r2lo) (OpReg rlo),
 261                        MOV I32 (OpReg r1hi) (OpReg rhi),
 262                        ADC I32 (OpReg r2hi) (OpReg rhi) ]
 263    -- in
 264    return (ChildCode64 code rlo)
 265
 266 iselExpr64 expr
 267    = pprPanic "iselExpr64(i386)" (ppr expr)
 268
 269 #endif /* i386_TARGET_ARCH */
 270
 271 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 272
 273 #if sparc_TARGET_ARCH
 274
 275 assignMem_I64Code addrTree valueTree
 276    = iselExpr64 valueTree               `thenNat` \ (ChildCode64 vcode vrlo) ->
 277      getRegister addrTree               `thenNat` \ register_addr ->
 278      getNewRegNat IntRep                `thenNat` \ t_addr ->
 279      let rlo = VirtualRegI vrlo
 280          rhi = getHiVRegFromLo rlo
 281          code_addr = registerCode register_addr t_addr
 282          reg_addr  = registerName register_addr t_addr
 283          -- Big-endian store
 284          mov_hi = ST W rhi (AddrRegImm reg_addr (ImmInt 0))
 285          mov_lo = ST W rlo (AddrRegImm reg_addr (ImmInt 4))
 286      in
 287          return (vcode `appOL` code_addr `snocOL` mov_hi `snocOL` mov_lo)
 288
 289
 290 assignReg_I64Code (StixTemp (StixVReg u_dst pk)) valueTree
 291    = iselExpr64 valueTree               `thenNat` \ (ChildCode64 vcode vr_src_lo) ->
 292      let
 293          r_dst_lo = mkVReg u_dst IntRep
 294          r_src_lo = VirtualRegI vr_src_lo
 295          r_dst_hi = getHiVRegFromLo r_dst_lo
 296          r_src_hi = getHiVRegFromLo r_src_lo
 297          mov_lo = mkMOV r_src_lo r_dst_lo
 298          mov_hi = mkMOV r_src_hi r_dst_hi
 299          mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
 300      in
 301          return (
 302             vcode `snocOL` mov_hi `snocOL` mov_lo
 303          )
 304 assignReg_I64Code lvalue valueTree
 305    = pprPanic "assignReg_I64Code(sparc): invalid lvalue"
 306               (pprStixReg lvalue)
 307
 308
 309 -- Don't delete this -- it's very handy for debugging.
 310 --iselExpr64 expr
 311 --   | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
 312 --   = panic "iselExpr64(???)"
 313
 314 iselExpr64 (CmmLoad I64 addrTree)
 315    = getRegister addrTree               `thenNat` \ register_addr ->
 316      getNewRegNat IntRep                `thenNat` \ t_addr ->
 317      getNewRegNat IntRep                `thenNat` \ rlo ->
 318      let rhi = getHiVRegFromLo rlo
 319          code_addr = registerCode register_addr t_addr
 320          reg_addr  = registerName register_addr t_addr
 321          mov_hi = LD W (AddrRegImm reg_addr (ImmInt 0)) rhi
 322          mov_lo = LD W (AddrRegImm reg_addr (ImmInt 4)) rlo
 323      in
 324          return (
 325             ChildCode64 (code_addr `snocOL` mov_hi `snocOL` mov_lo)
 326                         (getVRegUnique rlo)
 327          )
 328
 329 iselExpr64 (CmmReg (CmmLocal (LocalReg uq I64)))
 330    = getNewRegNat IntRep                `thenNat` \ r_dst_lo ->
 331      let r_dst_hi = getHiVRegFromLo r_dst_lo
 332          r_src_lo = mkVReg vu IntRep
 333          r_src_hi = getHiVRegFromLo r_src_lo
 334          mov_lo = mkMOV r_src_lo r_dst_lo
 335          mov_hi = mkMOV r_src_hi r_dst_hi
 336          mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
 337      in
 338          return (
 339             ChildCode64 (toOL [mov_hi, mov_lo]) (getVRegUnique r_dst_lo)
 340          )
 341
 342 iselExpr64 (StCall fn cconv I64 args)
 343   = genCCall fn cconv kind args                 `thenNat` \ call ->
 344     getNewRegNat IntRep                         `thenNat` \ r_dst_lo ->
 345     let r_dst_hi = getHiVRegFromLo r_dst_lo
 346         mov_lo = mkMOV o0 r_dst_lo
 347         mov_hi = mkMOV o1 r_dst_hi
 348         mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
 349     in
 350     return (
 351        ChildCode64 (call `snocOL` mov_hi `snocOL` mov_lo)
 352                    (getVRegUnique r_dst_lo)
 353     )
 354
 355 iselExpr64 expr
 356    = pprPanic "iselExpr64(sparc)" (pprCmmExpr expr)
 357
 358 #endif /* sparc_TARGET_ARCH */
 359
 360 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 361
 362 #if powerpc_TARGET_ARCH
 363
 364 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
 365 getI64Amodes addrTree = do
 366     Amode hi_addr addr_code <- getAmode addrTree
 367     case addrOffset hi_addr 4 of
 368         Just lo_addr -> return (hi_addr, lo_addr, addr_code)
 369         Nothing      -> do (hi_ptr, code) <- getSomeReg addrTree
 370                            return (AddrRegImm hi_ptr (ImmInt 0),
 371                                    AddrRegImm hi_ptr (ImmInt 4),
 372                                    code)
 373
 374 assignMem_I64Code addrTree valueTree = do
 375         (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
 376         ChildCode64 vcode rlo <- iselExpr64 valueTree
 377         let
 378                 rhi = getHiVRegFromLo rlo
 379
 380                 -- Big-endian store
 381                 mov_hi = ST I32 rhi hi_addr
 382                 mov_lo = ST I32 rlo lo_addr
 383         -- in
 384         return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 385
 386 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
 387    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 388    let
 389          r_dst_lo = mkVReg u_dst I32
 390          r_dst_hi = getHiVRegFromLo r_dst_lo
 391          r_src_hi = getHiVRegFromLo r_src_lo
 392          mov_lo = MR r_dst_lo r_src_lo
 393          mov_hi = MR r_dst_hi r_src_hi
 394    -- in
 395    return (
 396         vcode `snocOL` mov_lo `snocOL` mov_hi
 397      )
 398
 399 assignReg_I64Code lvalue valueTree
 400    = panic "assignReg_I64Code(powerpc): invalid lvalue"
 401
 402
 403 -- Don't delete this -- it's very handy for debugging.
 404 --iselExpr64 expr
 405 --   | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
 406 --   = panic "iselExpr64(???)"
 407
 408 iselExpr64 (CmmLoad addrTree I64) = do
 409     (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
 410     (rlo, rhi) <- getNewRegPairNat I32
 411     let mov_hi = LD I32 rhi hi_addr
 412         mov_lo = LD I32 rlo lo_addr
 413     return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
 414                          rlo
 415
 416 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64)))
 417    = return (ChildCode64 nilOL (mkVReg vu I32))
 418
 419 iselExpr64 (CmmLit (CmmInt i _)) = do
 420   (rlo,rhi) <- getNewRegPairNat I32
 421   let
 422         half0 = fromIntegral (fromIntegral i :: Word16)
 423         half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
 424         half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
 425         half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
 426
 427         code = toOL [
 428                 LIS rlo (ImmInt half1),
 429                 OR rlo rlo (RIImm $ ImmInt half0),
 430                 LIS rhi (ImmInt half3),
 431                 OR rlo rlo (RIImm $ ImmInt half2)
 432                 ]
 433   -- in
 434   return (ChildCode64 code rlo)
 435
 436 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
 437    ChildCode64 code1 r1lo <- iselExpr64 e1
 438    ChildCode64 code2 r2lo <- iselExpr64 e2
 439    (rlo,rhi) <- getNewRegPairNat I32
 440    let
 441         r1hi = getHiVRegFromLo r1lo
 442         r2hi = getHiVRegFromLo r2lo
 443         code =  code1 `appOL`
 444                 code2 `appOL`
 445                 toOL [ ADDC rlo r1lo r2lo,
 446                        ADDE rhi r1hi r2hi ]
 447    -- in
 448    return (ChildCode64 code rlo)
 449
 450 iselExpr64 expr
 451    = pprPanic "iselExpr64(powerpc)" (ppr expr)
 452
 453 #endif /* powerpc_TARGET_ARCH */
 454
 455
 456 -- -----------------------------------------------------------------------------
 457 -- The 'Register' type
 458
 459 -- 'Register's passed up the tree.  If the stix code forces the register
 460 -- to live in a pre-decided machine register, it comes out as @Fixed@;
 461 -- otherwise, it comes out as @Any@, and the parent can decide which
 462 -- register to put it in.
 463
 464 data Register
 465   = Fixed   MachRep Reg InstrBlock
 466   | Any     MachRep (Reg -> InstrBlock)
 467
 468 swizzleRegisterRep :: Register -> MachRep -> Register
 469 swizzleRegisterRep (Fixed _ reg code) rep = Fixed rep reg code
 470 swizzleRegisterRep (Any _ codefn)     rep = Any rep codefn
 471
 472
 473 -- -----------------------------------------------------------------------------
 474 -- Utils based on getRegister, below
 475
 476 -- The dual to getAnyReg: compute an expression into a register, but
 477 -- we don't mind which one it is.
 478 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
 479 getSomeReg expr = do
 480   r <- getRegister expr
 481   case r of
 482     Any rep code -> do
 483         tmp <- getNewRegNat rep
 484         return (tmp, code tmp)
 485     Fixed _ reg code ->
 486         return (reg, code)
 487
 488 -- -----------------------------------------------------------------------------
 489 -- Grab the Reg for a CmmReg
 490
 491 getRegisterReg :: CmmReg -> Reg
 492
 493 getRegisterReg (CmmLocal (LocalReg u pk))
 494   = mkVReg u pk
 495
 496 getRegisterReg (CmmGlobal mid)
 497   = case get_GlobalReg_reg_or_addr mid of
 498        Left (RealReg rrno) -> RealReg rrno
 499        _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
 500           -- By this stage, the only MagicIds remaining should be the
 501           -- ones which map to a real machine register on this
 502           -- platform.  Hence ...
 503
 504
 505 -- -----------------------------------------------------------------------------
 506 -- Generate code to get a subtree into a Register
 507
 508 -- Don't delete this -- it's very handy for debugging.
 509 --getRegister expr
 510 --   | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False
 511 --   = panic "getRegister(???)"
 512
 513 getRegister :: CmmExpr -> NatM Register
 514
 515 getRegister (CmmReg reg)
 516   = return (Fixed (cmmRegRep reg) (getRegisterReg reg) nilOL)
 517
 518 getRegister tree@(CmmRegOff _ _)
 519   = getRegister (mangleIndexTree tree)
 520
 521 getRegister CmmPicBaseReg
 522   = do
 523       reg <- getPicBaseNat wordRep
 524       return (Fixed wordRep reg nilOL)
 525
 526 -- end of machine-"independent" bit; here we go on the rest...
 527
 528 #if alpha_TARGET_ARCH
 529
 530 getRegister (StDouble d)
 531   = getBlockIdNat                   `thenNat` \ lbl ->
 532     getNewRegNat PtrRep             `thenNat` \ tmp ->
 533     let code dst = mkSeqInstrs [
 534             LDATA RoDataSegment lbl [
 535                     DATA TF [ImmLab (rational d)]
 536                 ],
 537             LDA tmp (AddrImm (ImmCLbl lbl)),
 538             LD TF dst (AddrReg tmp)]
 539     in
 540         return (Any F64 code)
 541
 542 getRegister (StPrim primop [x]) -- unary PrimOps
 543   = case primop of
 544       IntNegOp -> trivialUCode (NEG Q False) x
 545
 546       NotOp    -> trivialUCode NOT x
 547
 548       FloatNegOp  -> trivialUFCode FloatRep  (FNEG TF) x
 549       DoubleNegOp -> trivialUFCode F64 (FNEG TF) x
 550
 551       OrdOp -> coerceIntCode IntRep x
 552       ChrOp -> chrCode x
 553
 554       Float2IntOp  -> coerceFP2Int    x
 555       Int2FloatOp  -> coerceInt2FP pr x
 556       Double2IntOp -> coerceFP2Int    x
 557       Int2DoubleOp -> coerceInt2FP pr x
 558
 559       Double2FloatOp -> coerceFltCode x
 560       Float2DoubleOp -> coerceFltCode x
 561
 562       other_op -> getRegister (StCall fn CCallConv F64 [x])
 563         where
 564           fn = case other_op of
 565                  FloatExpOp    -> FSLIT("exp")
 566                  FloatLogOp    -> FSLIT("log")
 567                  FloatSqrtOp   -> FSLIT("sqrt")
 568                  FloatSinOp    -> FSLIT("sin")
 569                  FloatCosOp    -> FSLIT("cos")
 570                  FloatTanOp    -> FSLIT("tan")
 571                  FloatAsinOp   -> FSLIT("asin")
 572                  FloatAcosOp   -> FSLIT("acos")
 573                  FloatAtanOp   -> FSLIT("atan")
 574                  FloatSinhOp   -> FSLIT("sinh")
 575                  FloatCoshOp   -> FSLIT("cosh")
 576                  FloatTanhOp   -> FSLIT("tanh")
 577                  DoubleExpOp   -> FSLIT("exp")
 578                  DoubleLogOp   -> FSLIT("log")
 579                  DoubleSqrtOp  -> FSLIT("sqrt")
 580                  DoubleSinOp   -> FSLIT("sin")
 581                  DoubleCosOp   -> FSLIT("cos")
 582                  DoubleTanOp   -> FSLIT("tan")
 583                  DoubleAsinOp  -> FSLIT("asin")
 584                  DoubleAcosOp  -> FSLIT("acos")
 585                  DoubleAtanOp  -> FSLIT("atan")
 586                  DoubleSinhOp  -> FSLIT("sinh")
 587                  DoubleCoshOp  -> FSLIT("cosh")
 588                  DoubleTanhOp  -> FSLIT("tanh")
 589   where
 590     pr = panic "MachCode.getRegister: no primrep needed for Alpha"
 591
 592 getRegister (StPrim primop [x, y]) -- dyadic PrimOps
 593   = case primop of
 594       CharGtOp -> trivialCode (CMP LTT) y x
 595       CharGeOp -> trivialCode (CMP LE) y x
 596       CharEqOp -> trivialCode (CMP EQQ) x y
 597       CharNeOp -> int_NE_code x y
 598       CharLtOp -> trivialCode (CMP LTT) x y
 599       CharLeOp -> trivialCode (CMP LE) x y
 600
 601       IntGtOp  -> trivialCode (CMP LTT) y x
 602       IntGeOp  -> trivialCode (CMP LE) y x
 603       IntEqOp  -> trivialCode (CMP EQQ) x y
 604       IntNeOp  -> int_NE_code x y
 605       IntLtOp  -> trivialCode (CMP LTT) x y
 606       IntLeOp  -> trivialCode (CMP LE) x y
 607
 608       WordGtOp -> trivialCode (CMP ULT) y x
 609       WordGeOp -> trivialCode (CMP ULE) x y
 610       WordEqOp -> trivialCode (CMP EQQ)  x y
 611       WordNeOp -> int_NE_code x y
 612       WordLtOp -> trivialCode (CMP ULT) x y
 613       WordLeOp -> trivialCode (CMP ULE) x y
 614
 615       AddrGtOp -> trivialCode (CMP ULT) y x
 616       AddrGeOp -> trivialCode (CMP ULE) y x
 617       AddrEqOp -> trivialCode (CMP EQQ)  x y
 618       AddrNeOp -> int_NE_code x y
 619       AddrLtOp -> trivialCode (CMP ULT) x y
 620       AddrLeOp -> trivialCode (CMP ULE) x y
 621
 622       FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
 623       FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
 624       FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
 625       FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
 626       FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
 627       FloatLeOp -> cmpF_code (FCMP TF LE) NE x y
 628
 629       DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
 630       DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
 631       DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
 632       DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
 633       DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
 634       DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y
 635
 636       IntAddOp  -> trivialCode (ADD Q False) x y
 637       IntSubOp  -> trivialCode (SUB Q False) x y
 638       IntMulOp  -> trivialCode (MUL Q False) x y
 639       IntQuotOp -> trivialCode (DIV Q False) x y
 640       IntRemOp  -> trivialCode (REM Q False) x y
 641
 642       WordAddOp  -> trivialCode (ADD Q False) x y
 643       WordSubOp  -> trivialCode (SUB Q False) x y
 644       WordMulOp  -> trivialCode (MUL Q False) x y
 645       WordQuotOp -> trivialCode (DIV Q True) x y
 646       WordRemOp  -> trivialCode (REM Q True) x y
 647
 648       FloatAddOp -> trivialFCode  FloatRep (FADD TF) x y
 649       FloatSubOp -> trivialFCode  FloatRep (FSUB TF) x y
 650       FloatMulOp -> trivialFCode  FloatRep (FMUL TF) x y
 651       FloatDivOp -> trivialFCode  FloatRep (FDIV TF) x y
 652
 653       DoubleAddOp -> trivialFCode  F64 (FADD TF) x y
 654       DoubleSubOp -> trivialFCode  F64 (FSUB TF) x y
 655       DoubleMulOp -> trivialFCode  F64 (FMUL TF) x y
 656       DoubleDivOp -> trivialFCode  F64 (FDIV TF) x y
 657
 658       AddrAddOp  -> trivialCode (ADD Q False) x y
 659       AddrSubOp  -> trivialCode (SUB Q False) x y
 660       AddrRemOp  -> trivialCode (REM Q True) x y
 661
 662       AndOp  -> trivialCode AND x y
 663       OrOp   -> trivialCode OR  x y
 664       XorOp  -> trivialCode XOR x y
 665       SllOp  -> trivialCode SLL x y
 666       SrlOp  -> trivialCode SRL x y
 667
 668       ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
 669       ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
 670       ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"
 671
 672       FloatPowerOp  -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
 673       DoublePowerOp -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
 674   where
 675     {- ------------------------------------------------------------
 676         Some bizarre special code for getting condition codes into
 677         registers.  Integer non-equality is a test for equality
 678         followed by an XOR with 1.  (Integer comparisons always set
 679         the result register to 0 or 1.)  Floating point comparisons of
 680         any kind leave the result in a floating point register, so we
 681         need to wrangle an integer register out of things.
 682     -}
 683     int_NE_code :: StixTree -> StixTree -> NatM Register
 684
 685     int_NE_code x y
 686       = trivialCode (CMP EQQ) x y       `thenNat` \ register ->
 687         getNewRegNat IntRep             `thenNat` \ tmp ->
 688         let
 689             code = registerCode register tmp
 690             src  = registerName register tmp
 691             code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
 692         in
 693         return (Any IntRep code__2)
 694
 695     {- ------------------------------------------------------------
 696         Comments for int_NE_code also apply to cmpF_code
 697     -}
 698     cmpF_code
 699         :: (Reg -> Reg -> Reg -> Instr)
 700         -> Cond
 701         -> StixTree -> StixTree
 702         -> NatM Register
 703
 704     cmpF_code instr cond x y
 705       = trivialFCode pr instr x y       `thenNat` \ register ->
 706         getNewRegNat F64                `thenNat` \ tmp ->
 707         getBlockIdNat                   `thenNat` \ lbl ->
 708         let
 709             code = registerCode register tmp
 710             result  = registerName register tmp
 711
 712             code__2 dst = code . mkSeqInstrs [
 713                 OR zeroh (RIImm (ImmInt 1)) dst,
 714                 BF cond  result (ImmCLbl lbl),
 715                 OR zeroh (RIReg zeroh) dst,
 716                 NEWBLOCK lbl]
 717         in
 718         return (Any IntRep code__2)
 719       where
 720         pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
 721       ------------------------------------------------------------
 722
 723 getRegister (CmmLoad pk mem)
 724   = getAmode mem                    `thenNat` \ amode ->
 725     let
 726         code = amodeCode amode
 727         src   = amodeAddr amode
 728         size = primRepToSize pk
 729         code__2 dst = code . mkSeqInstr (LD size dst src)
 730     in
 731     return (Any pk code__2)
 732
 733 getRegister (StInt i)
 734   | fits8Bits i
 735   = let
 736         code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
 737     in
 738     return (Any IntRep code)
 739   | otherwise
 740   = let
 741         code dst = mkSeqInstr (LDI Q dst src)
 742     in
 743     return (Any IntRep code)
 744   where
 745     src = ImmInt (fromInteger i)
 746
 747 getRegister leaf
 748   | isJust imm
 749   = let
 750         code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
 751     in
 752     return (Any PtrRep code)
 753   where
 754     imm = maybeImm leaf
 755     imm__2 = case imm of Just x -> x
 756
 757 #endif /* alpha_TARGET_ARCH */
 758
 759 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 760
 761 #if i386_TARGET_ARCH
 762
 763 getRegister (CmmLit (CmmFloat f F32)) = do
 764     lbl <- getNewLabelNat
 765     let code dst = toOL [
 766             LDATA ReadOnlyData
 767                         [CmmDataLabel lbl,
 768                          CmmStaticLit (CmmFloat f F32)],
 769             GLD F32 (ImmAddr (ImmCLbl lbl) 0) dst
 770             ]
 771     -- in
 772     return (Any F32 code)
 773
 774
 775 getRegister (CmmLit (CmmFloat d F64))
 776   | d == 0.0
 777   = let code dst = unitOL (GLDZ dst)
 778     in  return (Any F64 code)
 779
 780   | d == 1.0
 781   = let code dst = unitOL (GLD1 dst)
 782     in  return (Any F64 code)
 783
 784   | otherwise = do
 785     lbl <- getNewLabelNat
 786     let code dst = toOL [
 787             LDATA ReadOnlyData
 788                         [CmmDataLabel lbl,
 789                          CmmStaticLit (CmmFloat d F64)],
 790             GLD F64 (ImmAddr (ImmCLbl lbl) 0) dst
 791             ]
 792     -- in
 793     return (Any F64 code)
 794
 795 #endif /* i386_TARGET_ARCH */
 796
 797 #if x86_64_TARGET_ARCH
 798
 799 getRegister (CmmLit (CmmFloat 0.0 rep)) = do
 800    let code dst = unitOL  (XOR rep (OpReg dst) (OpReg dst))
 801         -- I don't know why there are xorpd, xorps, and pxor instructions.
 802         -- They all appear to do the same thing --SDM
 803    return (Any rep code)
 804
 805 getRegister (CmmLit (CmmFloat f rep)) = do
 806     lbl <- getNewLabelNat
 807     let code dst = toOL [
 808             LDATA ReadOnlyData
 809                         [CmmDataLabel lbl,
 810                          CmmStaticLit (CmmFloat f rep)],
 811             MOV rep (OpAddr (ImmAddr (ImmCLbl lbl) 0)) (OpReg dst)
 812         -- ToDo: should use %rip-relative
 813             ]
 814     -- in
 815     return (Any rep code)
 816
 817 #endif /* x86_64_TARGET_ARCH */
 818
 819 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
 820
 821 -- catch simple cases of zero- or sign-extended load
 822 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad addr _]) = do
 823   code <- intLoadCode (MOVZxL I8) addr
 824   return (Any I32 code)
 825
 826 getRegister (CmmMachOp (MO_S_Conv I8 I32) [CmmLoad addr _]) = do
 827   code <- intLoadCode (MOVSxL I8) addr
 828   return (Any I32 code)
 829
 830 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad addr _]) = do
 831   code <- intLoadCode (MOVZxL I16) addr
 832   return (Any I32 code)
 833
 834 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad addr _]) = do
 835   code <- intLoadCode (MOVSxL I16) addr
 836   return (Any I32 code)
 837
 838 #endif
 839
 840 #if x86_64_TARGET_ARCH
 841
 842 -- catch simple cases of zero- or sign-extended load
 843 getRegister (CmmMachOp (MO_U_Conv I8 I64) [CmmLoad addr _]) = do
 844   code <- intLoadCode (MOVZxL I8) addr
 845   return (Any I64 code)
 846
 847 getRegister (CmmMachOp (MO_S_Conv I8 I64) [CmmLoad addr _]) = do
 848   code <- intLoadCode (MOVSxL I8) addr
 849   return (Any I64 code)
 850
 851 getRegister (CmmMachOp (MO_U_Conv I16 I64) [CmmLoad addr _]) = do
 852   code <- intLoadCode (MOVZxL I16) addr
 853   return (Any I64 code)
 854
 855 getRegister (CmmMachOp (MO_S_Conv I16 I64) [CmmLoad addr _]) = do
 856   code <- intLoadCode (MOVSxL I16) addr
 857   return (Any I64 code)
 858
 859 getRegister (CmmMachOp (MO_U_Conv I32 I64) [CmmLoad addr _]) = do
 860   code <- intLoadCode (MOV I32) addr -- 32-bit loads zero-extend
 861   return (Any I64 code)
 862
 863 getRegister (CmmMachOp (MO_S_Conv I32 I64) [CmmLoad addr _]) = do
 864   code <- intLoadCode (MOVSxL I32) addr
 865   return (Any I64 code)
 866
 867 #endif
 868
 869 #if x86_64_TARGET_ARCH
 870 getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do
 871   lbl <- getNewLabelNat
 872   let
 873     code dst = toOL [
 874         -- This is how gcc does it, so it can't be that bad:
 875         LDATA ReadOnlyData16 [
 876                 CmmAlign 16,
 877                 CmmDataLabel lbl,
 878                 CmmStaticLit (CmmInt 0x80000000 I32),
 879                 CmmStaticLit (CmmInt 0 I32),
 880                 CmmStaticLit (CmmInt 0 I32),
 881                 CmmStaticLit (CmmInt 0 I32)
 882         ],
 883         XOR F32 (OpAddr (ImmAddr (ImmCLbl lbl) 0)) (OpReg dst)
 884                 -- xorps, so we need the 128-bit constant
 885                 -- ToDo: rip-relative
 886         ]
 887   --
 888   return (Any F32 code)
 889
 890 getRegister (CmmMachOp (MO_S_Neg F64) [x]) = do
 891   lbl <- getNewLabelNat
 892   let
 893         -- This is how gcc does it, so it can't be that bad:
 894     code dst = toOL [
 895         LDATA ReadOnlyData16 [
 896                 CmmAlign 16,
 897                 CmmDataLabel lbl,
 898                 CmmStaticLit (CmmInt 0x8000000000000000 I64),
 899                 CmmStaticLit (CmmInt 0 I64)
 900         ],
 901                 -- gcc puts an unpck here.  Wonder if we need it.
 902         XOR F64 (OpAddr (ImmAddr (ImmCLbl lbl) 0)) (OpReg dst)
 903                 -- xorpd, so we need the 128-bit constant
 904                 -- ToDo: rip-relative
 905         ]
 906   --
 907   return (Any F64 code)
 908 #endif
 909
 910 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
 911
 912 getRegister (CmmMachOp mop [x]) -- unary MachOps
 913   = case mop of
 914 #if i386_TARGET_ARCH
 915       MO_S_Neg F32 -> trivialUFCode F32 (GNEG F32) x
 916       MO_S_Neg F64 -> trivialUFCode F64 (GNEG F64) x
 917 #endif
 918
 919       MO_S_Neg rep -> trivialUCode rep (NEGI rep) x
 920       MO_Not rep   -> trivialUCode rep (NOT  rep) x
 921
 922       -- Nop conversions
 923       -- TODO: these are only nops if the arg is not a fixed register that
 924       -- can't be byte-addressed.
 925       MO_U_Conv I32 I8  -> conversionNop I32 x
 926       MO_S_Conv I32 I8  -> conversionNop I32 x
 927       MO_U_Conv I16 I8  -> conversionNop I16 x
 928       MO_S_Conv I16 I8  -> conversionNop I16 x
 929       MO_U_Conv I32 I16 -> conversionNop I32 x
 930       MO_S_Conv I32 I16 -> conversionNop I32 x
 931 #if x86_64_TARGET_ARCH
 932       MO_U_Conv I64 I32 -> conversionNop I64 x
 933       MO_S_Conv I64 I32 -> conversionNop I64 x
 934       MO_U_Conv I64 I16 -> conversionNop I64 x
 935       MO_S_Conv I64 I16 -> conversionNop I64 x
 936       MO_U_Conv I64 I8  -> conversionNop I64 x
 937       MO_S_Conv I64 I8  -> conversionNop I64 x
 938 #endif
 939
 940       MO_U_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
 941       MO_S_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
 942
 943       -- widenings
 944       MO_U_Conv I8  I32 -> integerExtend I8  I32 MOVZxL x
 945       MO_U_Conv I16 I32 -> integerExtend I16 I32 MOVZxL x
 946       MO_U_Conv I8  I16 -> integerExtend I8  I16 MOVZxL x
 947
 948       MO_S_Conv I8  I32 -> integerExtend I8  I32 MOVSxL x
 949       MO_S_Conv I16 I32 -> integerExtend I16 I32 MOVSxL x
 950       MO_S_Conv I8  I16 -> integerExtend I8  I16 MOVSxL x
 951
 952 #if x86_64_TARGET_ARCH
 953       MO_U_Conv I8  I64 -> integerExtend I8  I64 MOVZxL x
 954       MO_U_Conv I16 I64 -> integerExtend I16 I64 MOVZxL x
 955       MO_U_Conv I32 I64 -> integerExtend I32 I64 MOVZxL x
 956       MO_S_Conv I8  I64 -> integerExtend I8  I64 MOVSxL x
 957       MO_S_Conv I16 I64 -> integerExtend I16 I64 MOVSxL x
 958       MO_S_Conv I32 I64 -> integerExtend I32 I64 MOVSxL x
 959         -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
 960         -- However, we don't want the register allocator to throw it
 961         -- away as an unnecessary reg-to-reg move, so we keep it in
 962         -- the form of a movzl and print it as a movl later.
 963 #endif
 964
 965 #if i386_TARGET_ARCH
 966       MO_S_Conv F32 F64 -> conversionNop F64 x
 967       MO_S_Conv F64 F32 -> conversionNop F32 x
 968 #else
 969       MO_S_Conv F32 F64 -> coerceFP2FP F64 x
 970       MO_S_Conv F64 F32 -> coerceFP2FP F32 x
 971 #endif
 972
 973       MO_S_Conv from to
 974         | isFloatingRep from -> coerceFP2Int from to x
 975         | isFloatingRep to   -> coerceInt2FP from to x
 976
 977       other -> pprPanic "getRegister" (pprMachOp mop)
 978    where
 979         -- signed or unsigned extension.
 980         integerExtend from to instr expr = do
 981             (reg,e_code) <- if from == I8 then getByteReg expr
 982                                           else getSomeReg expr
 983             let
 984                 code dst =
 985                   e_code `snocOL`
 986                   instr from (OpReg reg) (OpReg dst)
 987             return (Any to code)
 988
 989         conversionNop new_rep expr
 990             = do e_code <- getRegister expr
 991                  return (swizzleRegisterRep e_code new_rep)
 992
 993
 994 getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
 995   = ASSERT2(cmmExprRep x /= I8, pprExpr e)
 996     case mop of
 997       MO_Eq F32   -> condFltReg EQQ x y
 998       MO_Ne F32   -> condFltReg NE x y
 999       MO_S_Gt F32 -> condFltReg GTT x y
1000       MO_S_Ge F32 -> condFltReg GE x y
1001       MO_S_Lt F32 -> condFltReg LTT x y
1002       MO_S_Le F32 -> condFltReg LE x y
1003
1004       MO_Eq F64   -> condFltReg EQQ x y
1005       MO_Ne F64   -> condFltReg NE x y
1006       MO_S_Gt F64 -> condFltReg GTT x y
1007       MO_S_Ge F64 -> condFltReg GE x y
1008       MO_S_Lt F64 -> condFltReg LTT x y
1009       MO_S_Le F64 -> condFltReg LE x y
1010
1011       MO_Eq rep   -> condIntReg EQQ x y
1012       MO_Ne rep   -> condIntReg NE x y
1013
1014       MO_S_Gt rep -> condIntReg GTT x y
1015       MO_S_Ge rep -> condIntReg GE x y
1016       MO_S_Lt rep -> condIntReg LTT x y
1017       MO_S_Le rep -> condIntReg LE x y
1018
1019       MO_U_Gt rep -> condIntReg GU  x y
1020       MO_U_Ge rep -> condIntReg GEU x y
1021       MO_U_Lt rep -> condIntReg LU  x y
1022       MO_U_Le rep -> condIntReg LEU x y
1023
1024 #if i386_TARGET_ARCH
1025       MO_Add F32 -> trivialFCode F32 GADD x y
1026       MO_Sub F32 -> trivialFCode F32 GSUB x y
1027
1028       MO_Add F64 -> trivialFCode F64 GADD x y
1029       MO_Sub F64 -> trivialFCode F64 GSUB x y
1030
1031       MO_S_Quot F32 -> trivialFCode F32 GDIV x y
1032       MO_S_Quot F64 -> trivialFCode F64 GDIV x y
1033 #endif
1034
1035 #if x86_64_TARGET_ARCH
1036       MO_Add F32 -> trivialFCode F32 ADD x y
1037       MO_Sub F32 -> trivialFCode F32 SUB x y
1038
1039       MO_Add F64 -> trivialFCode F64 ADD x y
1040       MO_Sub F64 -> trivialFCode F64 SUB x y
1041
1042       MO_S_Quot F32 -> trivialFCode F32 FDIV x y
1043       MO_S_Quot F64 -> trivialFCode F64 FDIV x y
1044 #endif
1045
1046       MO_Add rep -> add_code rep x y
1047       MO_Sub rep -> sub_code rep x y
1048
1049       MO_S_Quot rep -> div_code rep True  True  x y
1050       MO_S_Rem  rep -> div_code rep True  False x y
1051       MO_U_Quot rep -> div_code rep False True  x y
1052       MO_U_Rem  rep -> div_code rep False False x y
1053
1054 #if i386_TARGET_ARCH
1055       MO_Mul   F32 -> trivialFCode F32 GMUL x y
1056       MO_Mul   F64 -> trivialFCode F64 GMUL x y
1057 #endif
1058
1059 #if x86_64_TARGET_ARCH
1060       MO_Mul   F32 -> trivialFCode F32 MUL x y
1061       MO_Mul   F64 -> trivialFCode F64 MUL x y
1062 #endif
1063
1064       MO_Mul   rep -> let op = IMUL rep in
1065                       trivialCode rep op (Just op) x y
1066
1067       MO_S_MulMayOflo rep -> imulMayOflo rep x y
1068
1069       MO_And rep -> let op = AND rep in
1070                     trivialCode rep op (Just op) x y
1071       MO_Or  rep -> let op = OR  rep in
1072                     trivialCode rep op (Just op) x y
1073       MO_Xor rep -> let op = XOR rep in
1074                     trivialCode rep op (Just op) x y
1075
1076         {- Shift ops on x86s have constraints on their source, it
1077            either has to be Imm, CL or 1
1078             => trivialCode is not restrictive enough (sigh.)
1079         -}
1080       MO_Shl rep   -> shift_code rep (SHL rep) x y {-False-}
1081       MO_U_Shr rep -> shift_code rep (SHR rep) x y {-False-}
1082       MO_S_Shr rep -> shift_code rep (SAR rep) x y {-False-}
1083
1084       other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
1085   where
1086     --------------------
1087     imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1088     imulMayOflo rep a b = do
1089          (a_reg, a_code) <- getNonClobberedReg a
1090          b_code <- getAnyReg b
1091          let
1092              shift_amt  = case rep of
1093                            I32 -> 31
1094                            I64 -> 63
1095                            _ -> panic "shift_amt"
1096
1097              code = a_code `appOL` b_code eax `appOL`
1098                         toOL [
1099                            IMUL2 rep (OpReg a_reg),   -- result in %edx:%eax
1100                            SAR rep (OpImm (ImmInt shift_amt)) (OpReg eax),
1101                                 -- sign extend lower part
1102                            SUB rep (OpReg edx) (OpReg eax)
1103                                 -- compare against upper
1104                            -- eax==0 if high part == sign extended low part
1105                         ]
1106          -- in
1107          return (Fixed rep eax code)
1108
1109     --------------------
1110     shift_code :: MachRep
1111                -> (Operand -> Operand -> Instr)
1112                -> CmmExpr
1113                -> CmmExpr
1114                -> NatM Register
1115
1116     {- Case1: shift length as immediate -}
1117     shift_code rep instr x y@(CmmLit lit) = do
1118           x_code <- getAnyReg x
1119           let
1120                code dst
1121                   = x_code dst `snocOL`
1122                     instr (OpImm (litToImm lit)) (OpReg dst)
1123           -- in
1124           return (Any rep code)
1125
1126     {- Case2: shift length is complex (non-immediate) -}
1127     shift_code rep instr x y{-amount-} = do
1128         (x_reg, x_code) <- getNonClobberedReg x
1129         y_code <- getAnyReg y
1130         let
1131            code = x_code `appOL`
1132                   y_code ecx `snocOL`
1133                   instr (OpReg ecx) (OpReg x_reg)
1134         -- in
1135         return (Fixed rep x_reg code)
1136
1137     --------------------
1138     add_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1139     add_code rep x (CmmLit (CmmInt y _)) = add_int rep x y
1140     add_code rep x y = trivialCode rep (ADD rep) (Just (ADD rep)) x y
1141
1142     --------------------
1143     sub_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1144     sub_code rep x (CmmLit (CmmInt y _)) = add_int rep x (-y)
1145     sub_code rep x y = trivialCode rep (SUB rep) Nothing x y
1146
1147     -- our three-operand add instruction:
1148     add_int rep x y = do
1149         (x_reg, x_code) <- getSomeReg x
1150         let
1151             imm = ImmInt (fromInteger y)
1152             code dst
1153                = x_code `snocOL`
1154                  LEA rep
1155                         (OpAddr (AddrBaseIndex (Just x_reg) Nothing imm))
1156                         (OpReg dst)
1157         --
1158         return (Any rep code)
1159
1160     ----------------------
1161     div_code rep signed quotient x y = do
1162            (y_op, y_code) <- getOperand y -- cannot be clobbered
1163            x_code <- getAnyReg x
1164            let
1165              widen | signed    = CLTD rep
1166                    | otherwise = XOR rep (OpReg edx) (OpReg edx)
1167
1168              instr | signed    = IDIV
1169                    | otherwise = DIV
1170
1171              code = y_code `appOL`
1172                     x_code eax `appOL`
1173                     toOL [widen, instr rep y_op]
1174
1175              result | quotient  = eax
1176                     | otherwise = edx
1177
1178            -- in
1179            return (Fixed rep result code)
1180
1181
1182 getRegister (CmmLoad mem pk)
1183   | isFloatingRep pk
1184   = do
1185     Amode src mem_code <- getAmode mem
1186     let
1187         code dst = mem_code `snocOL`
1188                    IF_ARCH_i386(GLD pk src dst,
1189                                 MOV pk (OpAddr src) (OpReg dst))
1190     --
1191     return (Any pk code)
1192
1193 #if i386_TARGET_ARCH
1194 getRegister (CmmLoad mem pk)
1195   | pk /= I64
1196   = do
1197     code <- intLoadCode (instr pk) mem
1198     return (Any pk code)
1199   where
1200         instr I8  = MOVZxL pk
1201         instr I16 = MOV I16
1202         instr I32 = MOV I32
1203         -- we always zero-extend 8-bit loads, if we
1204         -- can't think of anything better.  This is because
1205         -- we can't guarantee access to an 8-bit variant of every register
1206         -- (esi and edi don't have 8-bit variants), so to make things
1207         -- simpler we do our 8-bit arithmetic with full 32-bit registers.
1208 #endif
1209
1210 #if x86_64_TARGET_ARCH
1211 -- Simpler memory load code on x86_64
1212 getRegister (CmmLoad mem pk)
1213   = do
1214     code <- intLoadCode (MOV pk) mem
1215     return (Any pk code)
1216 #endif
1217
1218 getRegister (CmmLit (CmmInt 0 rep))
1219   = let
1220         code dst
1221            = unitOL (XOR rep (OpReg dst) (OpReg dst))
1222     in
1223         return (Any rep code)
1224
1225 getRegister (CmmLit lit)
1226   = let
1227         rep = cmmLitRep lit
1228         imm = litToImm lit
1229         code dst = unitOL (MOV rep (OpImm imm) (OpReg dst))
1230     in
1231         return (Any rep code)
1232
1233 getRegister other = panic "getRegister(x86)"
1234
1235
1236 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
1237    -> NatM (Reg -> InstrBlock)
1238 intLoadCode instr mem = do
1239   Amode src mem_code <- getAmode mem
1240   return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
1241
1242 -- Compute an expression into *any* register, adding the appropriate
1243 -- move instruction if necessary.
1244 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
1245 getAnyReg expr = do
1246   r <- getRegister expr
1247   anyReg r
1248
1249 anyReg :: Register -> NatM (Reg -> InstrBlock)
1250 anyReg (Any _ code)          = return code
1251 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
1252
1253 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
1254 -- Fixed registers might not be byte-addressable, so we make sure we've
1255 -- got a temporary, inserting an extra reg copy if necessary.
1256 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
1257 #if x86_64_TARGET_ARCH
1258 getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
1259 #else
1260 getByteReg expr = do
1261   r <- getRegister expr
1262   case r of
1263     Any rep code -> do
1264         tmp <- getNewRegNat rep
1265         return (tmp, code tmp)
1266     Fixed rep reg code
1267         | isVirtualReg reg -> return (reg,code)
1268         | otherwise -> do
1269             tmp <- getNewRegNat rep
1270             return (tmp, code `snocOL` reg2reg rep reg tmp)
1271         -- ToDo: could optimise slightly by checking for byte-addressable
1272         -- real registers, but that will happen very rarely if at all.
1273 #endif
1274
1275 -- Another variant: this time we want the result in a register that cannot
1276 -- be modified by code to evaluate an arbitrary expression.
1277 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1278 getNonClobberedReg expr = do
1279   r <- getRegister expr
1280   case r of
1281     Any rep code -> do
1282         tmp <- getNewRegNat rep
1283         return (tmp, code tmp)
1284     Fixed rep reg code
1285         -- only free regs can be clobbered
1286         | RealReg rr <- reg, isFastTrue (freeReg rr) -> do
1287                 tmp <- getNewRegNat rep
1288                 return (tmp, code `snocOL` reg2reg rep reg tmp)
1289         | otherwise ->
1290                 return (reg, code)
1291
1292 reg2reg :: MachRep -> Reg -> Reg -> Instr
1293 reg2reg rep src dst
1294 #if i386_TARGET_ARCH
1295   | isFloatingRep rep = GMOV src dst
1296 #endif
1297   | otherwise         = MOV rep (OpReg src) (OpReg dst)
1298
1299 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1300
1301 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1302
1303 #if sparc_TARGET_ARCH
1304
1305 getRegister (StFloat d)
1306   = getBlockIdNat                   `thenNat` \ lbl ->
1307     getNewRegNat PtrRep             `thenNat` \ tmp ->
1308     let code dst = toOL [
1309             SEGMENT DataSegment,
1310             NEWBLOCK lbl,
1311             DATA F [ImmFloat d],
1312             SEGMENT TextSegment,
1313             SETHI (HI (ImmCLbl lbl)) tmp,
1314             LD F (AddrRegImm tmp (LO (ImmCLbl lbl))) dst]
1315     in
1316         return (Any F32 code)
1317
1318 getRegister (StDouble d)
1319   = getBlockIdNat                   `thenNat` \ lbl ->
1320     getNewRegNat PtrRep             `thenNat` \ tmp ->
1321     let code dst = toOL [
1322             SEGMENT DataSegment,
1323             NEWBLOCK lbl,
1324             DATA DF [ImmDouble d],
1325             SEGMENT TextSegment,
1326             SETHI (HI (ImmCLbl lbl)) tmp,
1327             LD DF (AddrRegImm tmp (LO (ImmCLbl lbl))) dst]
1328     in
1329         return (Any F64 code)
1330
1331
1332 getRegister (CmmMachOp mop [x]) -- unary PrimOps
1333   = case mop of
1334       MO_NatS_Neg      -> trivialUCode (SUB False False g0) x
1335       MO_Nat_Not       -> trivialUCode (XNOR False g0) x
1336       MO_32U_to_8U     -> trivialCode (AND False) x (StInt 255)
1337
1338       MO_F32_Neg       -> trivialUFCode F32 (FNEG F) x
1339       MO_F64_Neg       -> trivialUFCode F64 (FNEG DF) x
1340
1341       MO_F64_to_Flt    -> coerceDbl2Flt x
1342       MO_F32_to_Dbl    -> coerceFlt2Dbl x
1343
1344       MO_F32_to_NatS   -> coerceFP2Int F32 x
1345       MO_NatS_to_Flt   -> coerceInt2FP F32 x
1346       MO_F64_to_NatS   -> coerceFP2Int F64 x
1347       MO_NatS_to_Dbl   -> coerceInt2FP F64 x
1348
1349       -- Conversions which are a nop on sparc
1350       MO_32U_to_NatS   -> conversionNop IntRep   x
1351       MO_32S_to_NatS  -> conversionNop IntRep   x
1352       MO_NatS_to_32U   -> conversionNop WordRep  x
1353       MO_32U_to_NatU   -> conversionNop WordRep  x
1354
1355       MO_NatU_to_NatS -> conversionNop IntRep    x
1356       MO_NatS_to_NatU -> conversionNop WordRep   x
1357       MO_NatP_to_NatU -> conversionNop WordRep   x
1358       MO_NatU_to_NatP -> conversionNop PtrRep    x
1359       MO_NatS_to_NatP -> conversionNop PtrRep    x
1360       MO_NatP_to_NatS -> conversionNop IntRep    x
1361
1362       -- sign-extending widenings
1363       MO_8U_to_32U    -> integerExtend False 24 x
1364       MO_8U_to_NatU   -> integerExtend False 24 x
1365       MO_8S_to_NatS   -> integerExtend True  24 x
1366       MO_16U_to_NatU  -> integerExtend False 16 x
1367       MO_16S_to_NatS  -> integerExtend True  16 x
1368
1369       other_op ->
1370         let fixed_x = if   is_float_op  -- promote to double
1371                       then CmmMachOp MO_F32_to_Dbl [x]
1372                       else x
1373         in
1374         getRegister (StCall (Left fn) CCallConv F64 [fixed_x])
1375     where
1376         integerExtend signed nBits x
1377            = getRegister (
1378                 CmmMachOp (if signed then MO_Nat_Sar else MO_Nat_Shr)
1379                          [CmmMachOp MO_Nat_Shl [x, StInt nBits], StInt nBits]
1380              )
1381         conversionNop new_rep expr
1382             = getRegister expr          `thenNat` \ e_code ->
1383               return (swizzleRegisterRep e_code new_rep)
1384
1385         (is_float_op, fn)
1386           = case mop of
1387               MO_F32_Exp    -> (True,  FSLIT("exp"))
1388               MO_F32_Log    -> (True,  FSLIT("log"))
1389               MO_F32_Sqrt   -> (True,  FSLIT("sqrt"))
1390
1391               MO_F32_Sin    -> (True,  FSLIT("sin"))
1392               MO_F32_Cos    -> (True,  FSLIT("cos"))
1393               MO_F32_Tan    -> (True,  FSLIT("tan"))
1394
1395               MO_F32_Asin   -> (True,  FSLIT("asin"))
1396               MO_F32_Acos   -> (True,  FSLIT("acos"))
1397               MO_F32_Atan   -> (True,  FSLIT("atan"))
1398
1399               MO_F32_Sinh   -> (True,  FSLIT("sinh"))
1400               MO_F32_Cosh   -> (True,  FSLIT("cosh"))
1401               MO_F32_Tanh   -> (True,  FSLIT("tanh"))
1402
1403               MO_F64_Exp    -> (False, FSLIT("exp"))
1404               MO_F64_Log    -> (False, FSLIT("log"))
1405               MO_F64_Sqrt   -> (False, FSLIT("sqrt"))
1406
1407               MO_F64_Sin    -> (False, FSLIT("sin"))
1408               MO_F64_Cos    -> (False, FSLIT("cos"))
1409               MO_F64_Tan    -> (False, FSLIT("tan"))
1410
1411               MO_F64_Asin   -> (False, FSLIT("asin"))
1412               MO_F64_Acos   -> (False, FSLIT("acos"))
1413               MO_F64_Atan   -> (False, FSLIT("atan"))
1414
1415               MO_F64_Sinh   -> (False, FSLIT("sinh"))
1416               MO_F64_Cosh   -> (False, FSLIT("cosh"))
1417               MO_F64_Tanh   -> (False, FSLIT("tanh"))
1418
1419               other -> pprPanic "getRegister(sparc) - binary CmmMachOp (2)"
1420                                 (pprMachOp mop)
1421
1422
1423 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1424   = case mop of
1425       MO_32U_Gt  -> condIntReg GTT x y
1426       MO_32U_Ge  -> condIntReg GE x y
1427       MO_32U_Eq  -> condIntReg EQQ x y
1428       MO_32U_Ne  -> condIntReg NE x y
1429       MO_32U_Lt  -> condIntReg LTT x y
1430       MO_32U_Le  -> condIntReg LE x y
1431
1432       MO_Nat_Eq   -> condIntReg EQQ x y
1433       MO_Nat_Ne   -> condIntReg NE x y
1434
1435       MO_NatS_Gt  -> condIntReg GTT x y
1436       MO_NatS_Ge  -> condIntReg GE x y
1437       MO_NatS_Lt  -> condIntReg LTT x y
1438       MO_NatS_Le  -> condIntReg LE x y
1439
1440       MO_NatU_Gt  -> condIntReg GU  x y
1441       MO_NatU_Ge  -> condIntReg GEU x y
1442       MO_NatU_Lt  -> condIntReg LU  x y
1443       MO_NatU_Le  -> condIntReg LEU x y
1444
1445       MO_F32_Gt -> condFltReg GTT x y
1446       MO_F32_Ge -> condFltReg GE x y
1447       MO_F32_Eq -> condFltReg EQQ x y
1448       MO_F32_Ne -> condFltReg NE x y
1449       MO_F32_Lt -> condFltReg LTT x y
1450       MO_F32_Le -> condFltReg LE x y
1451
1452       MO_F64_Gt -> condFltReg GTT x y
1453       MO_F64_Ge -> condFltReg GE x y
1454       MO_F64_Eq -> condFltReg EQQ x y
1455       MO_F64_Ne -> condFltReg NE x y
1456       MO_F64_Lt -> condFltReg LTT x y
1457       MO_F64_Le -> condFltReg LE x y
1458
1459       MO_Nat_Add -> trivialCode (ADD False False) x y
1460       MO_Nat_Sub -> trivialCode (SUB False False) x y
1461
1462       MO_NatS_Mul  -> trivialCode (SMUL False) x y
1463       MO_NatU_Mul  -> trivialCode (UMUL False) x y
1464       MO_NatS_MulMayOflo -> imulMayOflo x y
1465
1466       -- ToDo: teach about V8+ SPARC div instructions
1467       MO_NatS_Quot -> idiv FSLIT(".div")  x y
1468       MO_NatS_Rem  -> idiv FSLIT(".rem")  x y
1469       MO_NatU_Quot -> idiv FSLIT(".udiv")  x y
1470       MO_NatU_Rem  -> idiv FSLIT(".urem")  x y
1471
1472       MO_F32_Add   -> trivialFCode F32  FADD x y
1473       MO_F32_Sub   -> trivialFCode F32  FSUB x y
1474       MO_F32_Mul   -> trivialFCode F32  FMUL x y
1475       MO_F32_Div   -> trivialFCode F32  FDIV x y
1476
1477       MO_F64_Add   -> trivialFCode F64 FADD x y
1478       MO_F64_Sub   -> trivialFCode F64 FSUB x y
1479       MO_F64_Mul   -> trivialFCode F64 FMUL x y
1480       MO_F64_Div   -> trivialFCode F64 FDIV x y
1481
1482       MO_Nat_And   -> trivialCode (AND False) x y
1483       MO_Nat_Or    -> trivialCode (OR  False) x y
1484       MO_Nat_Xor   -> trivialCode (XOR False) x y
1485
1486       MO_Nat_Shl   -> trivialCode SLL x y
1487       MO_Nat_Shr   -> trivialCode SRL x y
1488       MO_Nat_Sar   -> trivialCode SRA x y
1489
1490       MO_F32_Pwr  -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1491                                          [promote x, promote y])
1492                        where promote x = CmmMachOp MO_F32_to_Dbl [x]
1493       MO_F64_Pwr -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1494                                         [x, y])
1495
1496       other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
1497   where
1498     idiv fn x y = getRegister (StCall (Left fn) CCallConv IntRep [x, y])
1499
1500     --------------------
1501     imulMayOflo :: CmmExpr -> CmmExpr -> NatM Register
1502     imulMayOflo a1 a2
1503        = getNewRegNat IntRep            `thenNat` \ t1 ->
1504          getNewRegNat IntRep            `thenNat` \ t2 ->
1505          getNewRegNat IntRep            `thenNat` \ res_lo ->
1506          getNewRegNat IntRep            `thenNat` \ res_hi ->
1507          getRegister a1                 `thenNat` \ reg1 ->
1508          getRegister a2                 `thenNat` \ reg2 ->
1509          let code1 = registerCode reg1 t1
1510              code2 = registerCode reg2 t2
1511              src1  = registerName reg1 t1
1512              src2  = registerName reg2 t2
1513              code dst = code1 `appOL` code2 `appOL`
1514                         toOL [
1515                            SMUL False src1 (RIReg src2) res_lo,
1516                            RDY res_hi,
1517                            SRA res_lo (RIImm (ImmInt 31)) res_lo,
1518                            SUB False False res_lo (RIReg res_hi) dst
1519                         ]
1520          in
1521             return (Any IntRep code)
1522
1523 getRegister (CmmLoad pk mem) = do
1524     Amode src code <- getAmode mem
1525     let
1526         size = primRepToSize pk
1527         code__2 dst = code `snocOL` LD size src dst
1528     --
1529     return (Any pk code__2)
1530
1531 getRegister (StInt i)
1532   | fits13Bits i
1533   = let
1534         src = ImmInt (fromInteger i)
1535         code dst = unitOL (OR False g0 (RIImm src) dst)
1536     in
1537         return (Any IntRep code)
1538
1539 getRegister leaf
1540   | isJust imm
1541   = let
1542         code dst = toOL [
1543             SETHI (HI imm__2) dst,
1544             OR False dst (RIImm (LO imm__2)) dst]
1545     in
1546         return (Any PtrRep code)
1547   | otherwise
1548   = ncgPrimopMoan "getRegister(sparc)" (pprCmmExpr leaf)
1549   where
1550     imm = maybeImm leaf
1551     imm__2 = case imm of Just x -> x
1552
1553 #endif /* sparc_TARGET_ARCH */
1554
1555 #if powerpc_TARGET_ARCH
1556 getRegister (CmmLoad mem pk)
1557   | pk /= I64
1558   = do
1559         Amode addr addr_code <- getAmode mem
1560         let code dst = ASSERT((regClass dst == RcDouble) == isFloatingRep pk)
1561                        addr_code `snocOL` LD pk dst addr
1562         return (Any pk code)
1563
1564 -- catch simple cases of zero- or sign-extended load
1565 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad mem _]) = do
1566     Amode addr addr_code <- getAmode mem
1567     return (Any I32 (\dst -> addr_code `snocOL` LD I8 dst addr))
1568
1569 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
1570
1571 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad mem _]) = do
1572     Amode addr addr_code <- getAmode mem
1573     return (Any I32 (\dst -> addr_code `snocOL` LD I16 dst addr))
1574
1575 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad mem _]) = do
1576     Amode addr addr_code <- getAmode mem
1577     return (Any I32 (\dst -> addr_code `snocOL` LA I16 dst addr))
1578
1579 getRegister (CmmMachOp mop [x]) -- unary MachOps
1580   = case mop of
1581       MO_Not rep   -> trivialUCode rep NOT x
1582
1583       MO_S_Conv F64 F32 -> trivialUCode F32 FRSP x
1584       MO_S_Conv F32 F64 -> conversionNop F64 x
1585
1586       MO_S_Conv from to
1587         | from == to         -> conversionNop to x
1588         | isFloatingRep from -> coerceFP2Int from to x
1589         | isFloatingRep to   -> coerceInt2FP from to x
1590
1591         -- narrowing is a nop: we treat the high bits as undefined
1592       MO_S_Conv I32 to -> conversionNop to x
1593       MO_S_Conv I16 I8 -> conversionNop I8 x
1594       MO_S_Conv I8 to -> trivialUCode to (EXTS I8) x
1595       MO_S_Conv I16 to -> trivialUCode to (EXTS I16) x
1596
1597       MO_U_Conv from to
1598         | from == to -> conversionNop to x
1599         -- narrowing is a nop: we treat the high bits as undefined
1600       MO_U_Conv I32 to -> conversionNop to x
1601       MO_U_Conv I16 I8 -> conversionNop I8 x
1602       MO_U_Conv I8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 I32))
1603       MO_U_Conv I16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 I32))
1604
1605       MO_S_Neg F32      -> trivialUCode F32 FNEG x
1606       MO_S_Neg F64      -> trivialUCode F64 FNEG x
1607       MO_S_Neg rep      -> trivialUCode rep NEG x
1608
1609     where
1610         conversionNop new_rep expr
1611             = do e_code <- getRegister expr
1612                  return (swizzleRegisterRep e_code new_rep)
1613
1614 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1615   = case mop of
1616       MO_Eq F32 -> condFltReg EQQ x y
1617       MO_Ne F32 -> condFltReg NE  x y
1618
1619       MO_S_Gt F32 -> condFltReg GTT x y
1620       MO_S_Ge F32 -> condFltReg GE  x y
1621       MO_S_Lt F32 -> condFltReg LTT x y
1622       MO_S_Le F32 -> condFltReg LE  x y
1623
1624       MO_Eq F64 -> condFltReg EQQ x y
1625       MO_Ne F64 -> condFltReg NE  x y
1626
1627       MO_S_Gt F64 -> condFltReg GTT x y
1628       MO_S_Ge F64 -> condFltReg GE  x y
1629       MO_S_Lt F64 -> condFltReg LTT x y
1630       MO_S_Le F64 -> condFltReg LE  x y
1631
1632       MO_Eq rep -> condIntReg EQQ  (extendUExpr rep x) (extendUExpr rep y)
1633       MO_Ne rep -> condIntReg NE   (extendUExpr rep x) (extendUExpr rep y)
1634
1635       MO_S_Gt rep -> condIntReg GTT  (extendSExpr rep x) (extendSExpr rep y)
1636       MO_S_Ge rep -> condIntReg GE   (extendSExpr rep x) (extendSExpr rep y)
1637       MO_S_Lt rep -> condIntReg LTT  (extendSExpr rep x) (extendSExpr rep y)
1638       MO_S_Le rep -> condIntReg LE   (extendSExpr rep x) (extendSExpr rep y)
1639
1640       MO_U_Gt rep -> condIntReg GU   (extendUExpr rep x) (extendUExpr rep y)
1641       MO_U_Ge rep -> condIntReg GEU  (extendUExpr rep x) (extendUExpr rep y)
1642       MO_U_Lt rep -> condIntReg LU   (extendUExpr rep x) (extendUExpr rep y)
1643       MO_U_Le rep -> condIntReg LEU  (extendUExpr rep x) (extendUExpr rep y)
1644
1645       MO_Add F32   -> trivialCodeNoImm F32 (FADD F32) x y
1646       MO_Sub F32   -> trivialCodeNoImm F32 (FSUB F32) x y
1647       MO_Mul F32   -> trivialCodeNoImm F32 (FMUL F32) x y
1648       MO_S_Quot F32   -> trivialCodeNoImm F32 (FDIV F32) x y
1649
1650       MO_Add F64   -> trivialCodeNoImm F64 (FADD F64) x y
1651       MO_Sub F64   -> trivialCodeNoImm F64 (FSUB F64) x y
1652       MO_Mul F64   -> trivialCodeNoImm F64 (FMUL F64) x y
1653       MO_S_Quot F64   -> trivialCodeNoImm F64 (FDIV F64) x y
1654
1655          -- optimize addition with 32-bit immediate
1656          -- (needed for PIC)
1657       MO_Add I32 ->
1658         case y of
1659           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate I32 True (-imm)
1660             -> trivialCode I32 True ADD x (CmmLit $ CmmInt imm immrep)
1661           CmmLit lit
1662             -> do
1663                 (src, srcCode) <- getSomeReg x
1664                 let imm = litToImm lit
1665                     code dst = srcCode `appOL` toOL [
1666                                     ADDIS dst src (HA imm),
1667                                     ADD dst dst (RIImm (LO imm))
1668                                 ]
1669                 return (Any I32 code)
1670           _ -> trivialCode I32 True ADD x y
1671
1672       MO_Add rep -> trivialCode rep True ADD x y
1673       MO_Sub rep ->
1674         case y of    -- subfi ('substract from' with immediate) doesn't exist
1675           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
1676             -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
1677           _ -> trivialCodeNoImm rep SUBF y x
1678
1679       MO_Mul rep -> trivialCode rep True MULLW x y
1680
1681       MO_S_MulMayOflo I32 -> trivialCodeNoImm I32 MULLW_MayOflo x y
1682
1683       MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= I32): not implemented"
1684       MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
1685
1686       MO_S_Quot rep -> trivialCodeNoImm rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1687       MO_U_Quot rep -> trivialCodeNoImm rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1688
1689       MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1690       MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1691
1692       MO_And rep   -> trivialCode rep False AND x y
1693       MO_Or rep    -> trivialCode rep False OR x y
1694       MO_Xor rep   -> trivialCode rep False XOR x y
1695
1696       MO_Shl rep   -> trivialCode rep False SLW x y
1697       MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
1698       MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
1699
1700 getRegister (CmmLit (CmmInt i rep))
1701   | Just imm <- makeImmediate rep True i
1702   = let
1703         code dst = unitOL (LI dst imm)
1704     in
1705         return (Any rep code)
1706
1707 getRegister (CmmLit (CmmFloat f frep)) = do
1708     lbl <- getNewLabelNat
1709     dynRef <- cmmMakeDynamicReference addImportNat False lbl
1710     Amode addr addr_code <- getAmode dynRef
1711     let code dst =
1712             LDATA ReadOnlyData  [CmmDataLabel lbl,
1713                                  CmmStaticLit (CmmFloat f frep)]
1714             `consOL` (addr_code `snocOL` LD frep dst addr)
1715     return (Any frep code)
1716
1717 getRegister (CmmLit lit)
1718   = let rep = cmmLitRep lit
1719         imm = litToImm lit
1720         code dst = toOL [
1721               LIS dst (HI imm),
1722               OR dst dst (RIImm (LO imm))
1723           ]
1724     in return (Any rep code)
1725
1726 getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
1727
1728     -- extend?Rep: wrap integer expression of type rep
1729     -- in a conversion to I32
1730 extendSExpr I32 x = x
1731 extendSExpr rep x = CmmMachOp (MO_S_Conv rep I32) [x]
1732 extendUExpr I32 x = x
1733 extendUExpr rep x = CmmMachOp (MO_U_Conv rep I32) [x]
1734
1735 #endif /* powerpc_TARGET_ARCH */
1736
1737
1738 -- -----------------------------------------------------------------------------
1739 --  The 'Amode' type: Memory addressing modes passed up the tree.
1740
1741 data Amode = Amode AddrMode InstrBlock
1742
1743 {-
1744 Now, given a tree (the argument to an CmmLoad) that references memory,
1745 produce a suitable addressing mode.
1746
1747 A Rule of the Game (tm) for Amodes: use of the addr bit must
1748 immediately follow use of the code part, since the code part puts
1749 values in registers which the addr then refers to.  So you can't put
1750 anything in between, lest it overwrite some of those registers.  If
1751 you need to do some other computation between the code part and use of
1752 the addr bit, first store the effective address from the amode in a
1753 temporary, then do the other computation, and then use the temporary:
1754
1755     code
1756     LEA amode, tmp
1757     ... other computation ...
1758     ... (tmp) ...
1759 -}
1760
1761 getAmode :: CmmExpr -> NatM Amode
1762 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
1763
1764 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1765
1766 #if alpha_TARGET_ARCH
1767
1768 getAmode (StPrim IntSubOp [x, StInt i])
1769   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1770     getRegister x               `thenNat` \ register ->
1771     let
1772         code = registerCode register tmp
1773         reg  = registerName register tmp
1774         off  = ImmInt (-(fromInteger i))
1775     in
1776     return (Amode (AddrRegImm reg off) code)
1777
1778 getAmode (StPrim IntAddOp [x, StInt i])
1779   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1780     getRegister x               `thenNat` \ register ->
1781     let
1782         code = registerCode register tmp
1783         reg  = registerName register tmp
1784         off  = ImmInt (fromInteger i)
1785     in
1786     return (Amode (AddrRegImm reg off) code)
1787
1788 getAmode leaf
1789   | isJust imm
1790   = return (Amode (AddrImm imm__2) id)
1791   where
1792     imm = maybeImm leaf
1793     imm__2 = case imm of Just x -> x
1794
1795 getAmode other
1796   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1797     getRegister other           `thenNat` \ register ->
1798     let
1799         code = registerCode register tmp
1800         reg  = registerName register tmp
1801     in
1802     return (Amode (AddrReg reg) code)
1803
1804 #endif /* alpha_TARGET_ARCH */
1805
1806 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1807
1808 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1809
1810 -- This is all just ridiculous, since it carefully undoes
1811 -- what mangleIndexTree has just done.
1812 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
1813   | not (is64BitLit lit)
1814   -- ASSERT(rep == I32)???
1815   = do (x_reg, x_code) <- getSomeReg x
1816        let off = ImmInt (-(fromInteger i))
1817        return (Amode (AddrBaseIndex (Just x_reg) Nothing off) x_code)
1818
1819 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
1820   | not (is64BitLit lit)
1821   -- ASSERT(rep == I32)???
1822   = do (x_reg, x_code) <- getSomeReg x
1823        let off = ImmInt (fromInteger i)
1824        return (Amode (AddrBaseIndex (Just x_reg) Nothing off) x_code)
1825
1826 -- Turn (lit1 << n  + lit2) into  (lit2 + lit1 << n) so it will be
1827 -- recognised by the next rule.
1828 getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1829                                   b@(CmmLit _)])
1830   = getAmode (CmmMachOp (MO_Add rep) [b,a])
1831
1832 getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
1833                                         [y, CmmLit (CmmInt shift _)]])
1834   | shift == 0 || shift == 1 || shift == 2 || shift == 3
1835   = do (x_reg, x_code) <- getNonClobberedReg x
1836         -- x must be in a temp, because it has to stay live over y_code
1837         -- we could compre x_reg and y_reg and do something better here...
1838        (y_reg, y_code) <- getSomeReg y
1839        let
1840            code = x_code `appOL` y_code
1841            base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
1842        return (Amode (AddrBaseIndex (Just x_reg) (Just (y_reg,base)) (ImmInt 0))
1843                code)
1844
1845 getAmode (CmmLit lit) | not (is64BitLit lit)
1846   = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1847
1848 getAmode expr = do
1849   (reg,code) <- getSomeReg expr
1850   return (Amode (AddrBaseIndex (Just reg) Nothing (ImmInt 0)) code)
1851
1852 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1853
1854 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1855
1856 #if sparc_TARGET_ARCH
1857
1858 getAmode (CmmMachOp MO_Nat_Sub [x, StInt i])
1859   | fits13Bits (-i)
1860   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1861     getRegister x               `thenNat` \ register ->
1862     let
1863         code = registerCode register tmp
1864         reg  = registerName register tmp
1865         off  = ImmInt (-(fromInteger i))
1866     in
1867     return (Amode (AddrRegImm reg off) code)
1868
1869
1870 getAmode (CmmMachOp MO_Nat_Add [x, StInt i])
1871   | fits13Bits i
1872   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1873     getRegister x               `thenNat` \ register ->
1874     let
1875         code = registerCode register tmp
1876         reg  = registerName register tmp
1877         off  = ImmInt (fromInteger i)
1878     in
1879     return (Amode (AddrRegImm reg off) code)
1880
1881 getAmode (CmmMachOp MO_Nat_Add [x, y])
1882   = getNewRegNat PtrRep         `thenNat` \ tmp1 ->
1883     getNewRegNat IntRep         `thenNat` \ tmp2 ->
1884     getRegister x               `thenNat` \ register1 ->
1885     getRegister y               `thenNat` \ register2 ->
1886     let
1887         code1 = registerCode register1 tmp1
1888         reg1  = registerName register1 tmp1
1889         code2 = registerCode register2 tmp2
1890         reg2  = registerName register2 tmp2
1891         code__2 = code1 `appOL` code2
1892     in
1893     return (Amode (AddrRegReg reg1 reg2) code__2)
1894
1895 getAmode leaf
1896   | isJust imm
1897   = getNewRegNat PtrRep             `thenNat` \ tmp ->
1898     let
1899         code = unitOL (SETHI (HI imm__2) tmp)
1900     in
1901     return (Amode (AddrRegImm tmp (LO imm__2)) code)
1902   where
1903     imm    = maybeImm leaf
1904     imm__2 = case imm of Just x -> x
1905
1906 getAmode other
1907   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1908     getRegister other           `thenNat` \ register ->
1909     let
1910         code = registerCode register tmp
1911         reg  = registerName register tmp
1912         off  = ImmInt 0
1913     in
1914     return (Amode (AddrRegImm reg off) code)
1915
1916 #endif /* sparc_TARGET_ARCH */
1917
1918 #ifdef powerpc_TARGET_ARCH
1919 getAmode (CmmMachOp (MO_Sub I32) [x, CmmLit (CmmInt i _)])
1920   | Just off <- makeImmediate I32 True (-i)
1921   = do
1922         (reg, code) <- getSomeReg x
1923         return (Amode (AddrRegImm reg off) code)
1924
1925
1926 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit (CmmInt i _)])
1927   | Just off <- makeImmediate I32 True i
1928   = do
1929         (reg, code) <- getSomeReg x
1930         return (Amode (AddrRegImm reg off) code)
1931
1932    -- optimize addition with 32-bit immediate
1933    -- (needed for PIC)
1934 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit lit])
1935   = do
1936         tmp <- getNewRegNat I32
1937         (src, srcCode) <- getSomeReg x
1938         let imm = litToImm lit
1939             code = srcCode `snocOL` ADDIS tmp src (HA imm)
1940         return (Amode (AddrRegImm tmp (LO imm)) code)
1941
1942 getAmode (CmmLit lit)
1943   = do
1944         tmp <- getNewRegNat I32
1945         let imm = litToImm lit
1946             code = unitOL (LIS tmp (HA imm))
1947         return (Amode (AddrRegImm tmp (LO imm)) code)
1948
1949 getAmode (CmmMachOp (MO_Add I32) [x, y])
1950   = do
1951         (regX, codeX) <- getSomeReg x
1952         (regY, codeY) <- getSomeReg y
1953         return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
1954
1955 getAmode other
1956   = do
1957         (reg, code) <- getSomeReg other
1958         let
1959             off  = ImmInt 0
1960         return (Amode (AddrRegImm reg off) code)
1961 #endif /* powerpc_TARGET_ARCH */
1962
1963 -- -----------------------------------------------------------------------------
1964 -- getOperand: sometimes any operand will do.
1965
1966 -- getNonClobberedOperand: the value of the operand will remain valid across
1967 -- the computation of an arbitrary expression, unless the expression
1968 -- is computed directly into a register which the operand refers to
1969 -- (see trivialCode where this function is used for an example).
1970
1971 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1972
1973 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1974 getNonClobberedOperand (CmmLit lit)
1975   | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) =
1976     return (OpImm (litToImm lit), nilOL)
1977 getNonClobberedOperand (CmmLoad mem pk)
1978   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
1979     Amode src mem_code <- getAmode mem
1980     (src',save_code) <-
1981         if (amodeCouldBeClobbered src)
1982                 then do
1983                    tmp <- getNewRegNat wordRep
1984                    return (AddrBaseIndex (Just tmp) Nothing (ImmInt 0),
1985                            unitOL (LEA I32 (OpAddr src) (OpReg tmp)))
1986                 else
1987                    return (src, nilOL)
1988     return (OpAddr src', save_code `appOL` mem_code)
1989 getNonClobberedOperand e = do
1990     (reg, code) <- getNonClobberedReg e
1991     return (OpReg reg, code)
1992
1993 amodeCouldBeClobbered :: AddrMode -> Bool
1994 amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
1995
1996 regClobbered (RealReg rr) = isFastTrue (freeReg rr)
1997 regClobbered _ = False
1998
1999 -- getOperand: the operand is not required to remain valid across the
2000 -- computation of an arbitrary expression.
2001 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2002 getOperand (CmmLit lit)
2003   | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) =
2004     return (OpImm (litToImm lit), nilOL)
2005 getOperand (CmmLoad mem pk)
2006   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2007     Amode src mem_code <- getAmode mem
2008     return (OpAddr src, mem_code)
2009 getOperand e = do
2010     (reg, code) <- getNonClobberedReg e
2011     return (OpReg reg, code)
2012
2013 isOperand :: CmmExpr -> Bool
2014 isOperand (CmmLoad _ _) = True
2015 isOperand (CmmLit lit)  = not (is64BitLit lit) &&
2016                           not (isFloatingRep (cmmLitRep lit))
2017 isOperand _             = False
2018
2019 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
2020 getRegOrMem (CmmLoad mem pk)
2021   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2022     Amode src mem_code <- getAmode mem
2023     return (OpAddr src, mem_code)
2024 getRegOrMem e = do
2025     (reg, code) <- getNonClobberedReg e
2026     return (OpReg reg, code)
2027
2028 #if x86_64_TARGET_ARCH
2029 is64BitLit (CmmInt i I64) = i > 0x7fffffff || i < -0x80000000
2030 #endif
2031 is64BitLit x = False
2032 #endif
2033
2034 -- -----------------------------------------------------------------------------
2035 --  The 'CondCode' type:  Condition codes passed up the tree.
2036
2037 data CondCode = CondCode Bool Cond InstrBlock
2038
2039 -- Set up a condition code for a conditional branch.
2040
2041 getCondCode :: CmmExpr -> NatM CondCode
2042
2043 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2044
2045 #if alpha_TARGET_ARCH
2046 getCondCode = panic "MachCode.getCondCode: not on Alphas"
2047 #endif /* alpha_TARGET_ARCH */
2048
2049 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2050
2051 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
2052 -- yes, they really do seem to want exactly the same!
2053
2054 getCondCode (CmmMachOp mop [x, y])
2055   = ASSERT (cmmExprRep x /= I8) -- tmp, not set up to handle 8-bit comparisons
2056     case mop of
2057       MO_Eq F32 -> condFltCode EQQ x y
2058       MO_Ne F32 -> condFltCode NE  x y
2059
2060       MO_S_Gt F32 -> condFltCode GTT x y
2061       MO_S_Ge F32 -> condFltCode GE  x y
2062       MO_S_Lt F32 -> condFltCode LTT x y
2063       MO_S_Le F32 -> condFltCode LE  x y
2064
2065       MO_Eq F64 -> condFltCode EQQ x y
2066       MO_Ne F64 -> condFltCode NE  x y
2067
2068       MO_S_Gt F64 -> condFltCode GTT x y
2069       MO_S_Ge F64 -> condFltCode GE  x y
2070       MO_S_Lt F64 -> condFltCode LTT x y
2071       MO_S_Le F64 -> condFltCode LE  x y
2072
2073       MO_Eq rep -> condIntCode EQQ  x y
2074       MO_Ne rep -> condIntCode NE   x y
2075
2076       MO_S_Gt rep -> condIntCode GTT  x y
2077       MO_S_Ge rep -> condIntCode GE   x y
2078       MO_S_Lt rep -> condIntCode LTT  x y
2079       MO_S_Le rep -> condIntCode LE   x y
2080
2081       MO_U_Gt rep -> condIntCode GU   x y
2082       MO_U_Ge rep -> condIntCode GEU  x y
2083       MO_U_Lt rep -> condIntCode LU   x y
2084       MO_U_Le rep -> condIntCode LEU  x y
2085
2086       other -> pprPanic "getCondCode(x86,sparc)" (pprMachOp mop)
2087
2088 getCondCode other =  pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
2089
2090 #elif powerpc_TARGET_ARCH
2091
2092 -- almost the same as everywhere else - but we need to
2093 -- extend small integers to 32 bit first
2094
2095 getCondCode (CmmMachOp mop [x, y])
2096   = case mop of
2097       MO_Eq F32 -> condFltCode EQQ x y
2098       MO_Ne F32 -> condFltCode NE  x y
2099
2100       MO_S_Gt F32 -> condFltCode GTT x y
2101       MO_S_Ge F32 -> condFltCode GE  x y
2102       MO_S_Lt F32 -> condFltCode LTT x y
2103       MO_S_Le F32 -> condFltCode LE  x y
2104
2105       MO_Eq F64 -> condFltCode EQQ x y
2106       MO_Ne F64 -> condFltCode NE  x y
2107
2108       MO_S_Gt F64 -> condFltCode GTT x y
2109       MO_S_Ge F64 -> condFltCode GE  x y
2110       MO_S_Lt F64 -> condFltCode LTT x y
2111       MO_S_Le F64 -> condFltCode LE  x y
2112
2113       MO_Eq rep -> condIntCode EQQ  (extendUExpr rep x) (extendUExpr rep y)
2114       MO_Ne rep -> condIntCode NE   (extendUExpr rep x) (extendUExpr rep y)
2115
2116       MO_S_Gt rep -> condIntCode GTT  (extendSExpr rep x) (extendSExpr rep y)
2117       MO_S_Ge rep -> condIntCode GE   (extendSExpr rep x) (extendSExpr rep y)
2118       MO_S_Lt rep -> condIntCode LTT  (extendSExpr rep x) (extendSExpr rep y)
2119       MO_S_Le rep -> condIntCode LE   (extendSExpr rep x) (extendSExpr rep y)
2120
2121       MO_U_Gt rep -> condIntCode GU   (extendUExpr rep x) (extendUExpr rep y)
2122       MO_U_Ge rep -> condIntCode GEU  (extendUExpr rep x) (extendUExpr rep y)
2123       MO_U_Lt rep -> condIntCode LU   (extendUExpr rep x) (extendUExpr rep y)
2124       MO_U_Le rep -> condIntCode LEU  (extendUExpr rep x) (extendUExpr rep y)
2125
2126       other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
2127
2128 getCondCode other =  panic "getCondCode(2)(powerpc)"
2129
2130
2131 #endif
2132
2133
2134 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
2135 -- passed back up the tree.
2136
2137 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
2138
2139 #if alpha_TARGET_ARCH
2140 condIntCode = panic "MachCode.condIntCode: not on Alphas"
2141 condFltCode = panic "MachCode.condFltCode: not on Alphas"
2142 #endif /* alpha_TARGET_ARCH */
2143
2144 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2145 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2146
2147 -- memory vs immediate
2148 condIntCode cond (CmmLoad x pk) (CmmLit lit) | not (is64BitLit lit) = do
2149     Amode x_addr x_code <- getAmode x
2150     let
2151         imm  = litToImm lit
2152         code = x_code `snocOL`
2153                   CMP pk (OpImm imm) (OpAddr x_addr)
2154     --
2155     return (CondCode False cond code)
2156
2157 -- anything vs zero
2158 condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
2159     (x_reg, x_code) <- getSomeReg x
2160     let
2161         code = x_code `snocOL`
2162                   TEST pk (OpReg x_reg) (OpReg x_reg)
2163     --
2164     return (CondCode False cond code)
2165
2166 -- anything vs operand
2167 condIntCode cond x y | isOperand y = do
2168     (x_reg, x_code) <- getNonClobberedReg x
2169     (y_op,  y_code) <- getOperand y
2170     let
2171         code = x_code `appOL` y_code `snocOL`
2172                   CMP (cmmExprRep x) y_op (OpReg x_reg)
2173     -- in
2174     return (CondCode False cond code)
2175
2176 -- anything vs anything
2177 condIntCode cond x y = do
2178   (y_reg, y_code) <- getNonClobberedReg y
2179   (x_op, x_code) <- getRegOrMem x
2180   let
2181         code = y_code `appOL`
2182                x_code `snocOL`
2183                   CMP (cmmExprRep x) (OpReg y_reg) x_op
2184   -- in
2185   return (CondCode False cond code)
2186 #endif
2187
2188 #if i386_TARGET_ARCH
2189 condFltCode cond x y
2190   = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
2191   (x_reg, x_code) <- getNonClobberedReg x
2192   (y_reg, y_code) <- getSomeReg y
2193   let
2194         code = x_code `appOL` y_code `snocOL`
2195                 GCMP cond x_reg y_reg
2196   -- The GCMP insn does the test and sets the zero flag if comparable
2197   -- and true.  Hence we always supply EQQ as the condition to test.
2198   return (CondCode True EQQ code)
2199 #endif /* i386_TARGET_ARCH */
2200
2201 #if x86_64_TARGET_ARCH
2202 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
2203 -- an operand, but the right must be a reg.  We can probably do better
2204 -- than this general case...
2205 condFltCode cond x y = do
2206   (x_reg, x_code) <- getNonClobberedReg x
2207   (y_op, y_code) <- getOperand y
2208   let
2209         code = x_code `appOL`
2210                y_code `snocOL`
2211                   CMP (cmmExprRep x) y_op (OpReg x_reg)
2212   -- in
2213   return (CondCode False (condToUnsigned cond) code)
2214         -- we need to use the unsigned comparison operators on the
2215         -- result of this comparison.
2216 #endif
2217
2218 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2219
2220 #if sparc_TARGET_ARCH
2221
2222 condIntCode cond x (StInt y)
2223   | fits13Bits y
2224   = getRegister x               `thenNat` \ register ->
2225     getNewRegNat IntRep         `thenNat` \ tmp ->
2226     let
2227         code = registerCode register tmp
2228         src1 = registerName register tmp
2229         src2 = ImmInt (fromInteger y)
2230         code__2 = code `snocOL` SUB False True src1 (RIImm src2) g0
2231     in
2232     return (CondCode False cond code__2)
2233
2234 condIntCode cond x y
2235   = getRegister x               `thenNat` \ register1 ->
2236     getRegister y               `thenNat` \ register2 ->
2237     getNewRegNat IntRep         `thenNat` \ tmp1 ->
2238     getNewRegNat IntRep         `thenNat` \ tmp2 ->
2239     let
2240         code1 = registerCode register1 tmp1
2241         src1  = registerName register1 tmp1
2242         code2 = registerCode register2 tmp2
2243         src2  = registerName register2 tmp2
2244         code__2 = code1 `appOL` code2 `snocOL`
2245                   SUB False True src1 (RIReg src2) g0
2246     in
2247     return (CondCode False cond code__2)
2248
2249 -----------
2250 condFltCode cond x y
2251   = getRegister x               `thenNat` \ register1 ->
2252     getRegister y               `thenNat` \ register2 ->
2253     getNewRegNat (registerRep register1)
2254                                 `thenNat` \ tmp1 ->
2255     getNewRegNat (registerRep register2)
2256                                 `thenNat` \ tmp2 ->
2257     getNewRegNat F64    `thenNat` \ tmp ->
2258     let
2259         promote x = FxTOy F DF x tmp
2260
2261         pk1   = registerRep register1
2262         code1 = registerCode register1 tmp1
2263         src1  = registerName register1 tmp1
2264
2265         pk2   = registerRep register2
2266         code2 = registerCode register2 tmp2
2267         src2  = registerName register2 tmp2
2268
2269         code__2 =
2270                 if pk1 == pk2 then
2271                     code1 `appOL` code2 `snocOL`
2272                     FCMP True (primRepToSize pk1) src1 src2
2273                 else if pk1 == F32 then
2274                     code1 `snocOL` promote src1 `appOL` code2 `snocOL`
2275                     FCMP True DF tmp src2
2276                 else
2277                     code1 `appOL` code2 `snocOL` promote src2 `snocOL`
2278                     FCMP True DF src1 tmp
2279     in
2280     return (CondCode True cond code__2)
2281
2282 #endif /* sparc_TARGET_ARCH */
2283
2284 #if powerpc_TARGET_ARCH
2285 --  ###FIXME: I16 and I8!
2286 condIntCode cond x (CmmLit (CmmInt y rep))
2287   | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
2288   = do
2289         (src1, code) <- getSomeReg x
2290         let
2291             code' = code `snocOL`
2292                 (if condUnsigned cond then CMPL else CMP) I32 src1 (RIImm src2)
2293         return (CondCode False cond code')
2294
2295 condIntCode cond x y = do
2296     (src1, code1) <- getSomeReg x
2297     (src2, code2) <- getSomeReg y
2298     let
2299         code' = code1 `appOL` code2 `snocOL`
2300                   (if condUnsigned cond then CMPL else CMP) I32 src1 (RIReg src2)
2301     return (CondCode False cond code')
2302
2303 condFltCode cond x y = do
2304     (src1, code1) <- getSomeReg x
2305     (src2, code2) <- getSomeReg y
2306     let
2307         code'  = code1 `appOL` code2 `snocOL` FCMP src1 src2
2308         code'' = case cond of -- twiddle CR to handle unordered case
2309                     GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
2310                     LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
2311                     _ -> code'
2312                  where
2313                     ltbit = 0 ; eqbit = 2 ; gtbit = 1
2314     return (CondCode True cond code'')
2315
2316 #endif /* powerpc_TARGET_ARCH */
2317
2318 -- -----------------------------------------------------------------------------
2319 -- Generating assignments
2320
2321 -- Assignments are really at the heart of the whole code generation
2322 -- business.  Almost all top-level nodes of any real importance are
2323 -- assignments, which correspond to loads, stores, or register
2324 -- transfers.  If we're really lucky, some of the register transfers
2325 -- will go away, because we can use the destination register to
2326 -- complete the code generation for the right hand side.  This only
2327 -- fails when the right hand side is forced into a fixed register
2328 -- (e.g. the result of a call).
2329
2330 assignMem_IntCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2331 assignReg_IntCode :: MachRep -> CmmReg  -> CmmExpr -> NatM InstrBlock
2332
2333 assignMem_FltCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2334 assignReg_FltCode :: MachRep -> CmmReg  -> CmmExpr -> NatM InstrBlock
2335
2336 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2337
2338 #if alpha_TARGET_ARCH
2339
2340 assignIntCode pk (CmmLoad dst _) src
2341   = getNewRegNat IntRep             `thenNat` \ tmp ->
2342     getAmode dst                    `thenNat` \ amode ->
2343     getRegister src                 `thenNat` \ register ->
2344     let
2345         code1   = amodeCode amode []
2346         dst__2  = amodeAddr amode
2347         code2   = registerCode register tmp []
2348         src__2  = registerName register tmp
2349         sz      = primRepToSize pk
2350         code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2351     in
2352     return code__2
2353
2354 assignIntCode pk dst src
2355   = getRegister dst                         `thenNat` \ register1 ->
2356     getRegister src                         `thenNat` \ register2 ->
2357     let
2358         dst__2  = registerName register1 zeroh
2359         code    = registerCode register2 dst__2
2360         src__2  = registerName register2 dst__2
2361         code__2 = if isFixed register2
2362                   then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2)
2363                   else code
2364     in
2365     return code__2
2366
2367 #endif /* alpha_TARGET_ARCH */
2368
2369 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2370
2371 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2372
2373 -- integer assignment to memory
2374 assignMem_IntCode pk addr src = do
2375     Amode addr code_addr <- getAmode addr
2376     (code_src, op_src)   <- get_op_RI src
2377     let
2378         code = code_src `appOL`
2379                code_addr `snocOL`
2380                   MOV pk op_src (OpAddr addr)
2381         -- NOTE: op_src is stable, so it will still be valid
2382         -- after code_addr.  This may involve the introduction
2383         -- of an extra MOV to a temporary register, but we hope
2384         -- the register allocator will get rid of it.
2385     --
2386     return code
2387   where
2388     get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand)   -- code, operator
2389     get_op_RI (CmmLit lit) | not (is64BitLit lit)
2390       = return (nilOL, OpImm (litToImm lit))
2391     get_op_RI op
2392       = do (reg,code) <- getNonClobberedReg op
2393            return (code, OpReg reg)
2394
2395
2396 -- Assign; dst is a reg, rhs is mem
2397 assignReg_IntCode pk reg (CmmLoad src _) = do
2398   load_code <- intLoadCode (MOV pk) src
2399   return (load_code (getRegisterReg reg))
2400
2401 -- dst is a reg, but src could be anything
2402 assignReg_IntCode pk reg src = do
2403   code <- getAnyReg src
2404   return (code (getRegisterReg reg))
2405
2406 #endif /* i386_TARGET_ARCH */
2407
2408 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2409
2410 #if sparc_TARGET_ARCH
2411
2412 assignMem_IntCode pk addr src
2413   = getNewRegNat IntRep                     `thenNat` \ tmp ->
2414     getAmode addr                           `thenNat` \ amode ->
2415     getRegister src                         `thenNat` \ register ->
2416     let
2417         code1   = amodeCode amode
2418         dst__2  = amodeAddr amode
2419         code2   = registerCode register tmp
2420         src__2  = registerName register tmp
2421         sz      = primRepToSize pk
2422         code__2 = code1 `appOL` code2 `snocOL` ST sz src__2 dst__2
2423     in
2424     return code__2
2425
2426 assignReg_IntCode pk reg src
2427   = getRegister src                         `thenNat` \ register2 ->
2428     getRegisterReg reg                      `thenNat` \ register1 ->
2429     getNewRegNat IntRep                     `thenNat` \ tmp ->
2430     let
2431         dst__2  = registerName register1 tmp
2432         code    = registerCode register2 dst__2
2433         src__2  = registerName register2 dst__2
2434         code__2 = if isFixed register2
2435                   then code `snocOL` OR False g0 (RIReg src__2) dst__2
2436                   else code
2437     in
2438     return code__2
2439
2440 #endif /* sparc_TARGET_ARCH */
2441
2442 #if powerpc_TARGET_ARCH
2443
2444 assignMem_IntCode pk addr src = do
2445     (srcReg, code) <- getSomeReg src
2446     Amode dstAddr addr_code <- getAmode addr
2447     return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2448
2449 -- dst is a reg, but src could be anything
2450 assignReg_IntCode pk reg src
2451     = do
2452         r <- getRegister src
2453         return $ case r of
2454             Any _ code         -> code dst
2455             Fixed _ freg fcode -> fcode `snocOL` MR dst freg
2456     where
2457         dst = getRegisterReg reg
2458
2459 #endif /* powerpc_TARGET_ARCH */
2460
2461
2462 -- -----------------------------------------------------------------------------
2463 -- Floating-point assignments
2464
2465 #if alpha_TARGET_ARCH
2466
2467 assignFltCode pk (CmmLoad dst _) src
2468   = getNewRegNat pk                 `thenNat` \ tmp ->
2469     getAmode dst                    `thenNat` \ amode ->
2470     getRegister src                         `thenNat` \ register ->
2471     let
2472         code1   = amodeCode amode []
2473         dst__2  = amodeAddr amode
2474         code2   = registerCode register tmp []
2475         src__2  = registerName register tmp
2476         sz      = primRepToSize pk
2477         code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2478     in
2479     return code__2
2480
2481 assignFltCode pk dst src
2482   = getRegister dst                         `thenNat` \ register1 ->
2483     getRegister src                         `thenNat` \ register2 ->
2484     let
2485         dst__2  = registerName register1 zeroh
2486         code    = registerCode register2 dst__2
2487         src__2  = registerName register2 dst__2
2488         code__2 = if isFixed register2
2489                   then code . mkSeqInstr (FMOV src__2 dst__2)
2490                   else code
2491     in
2492     return code__2
2493
2494 #endif /* alpha_TARGET_ARCH */
2495
2496 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2497
2498 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2499
2500 -- Floating point assignment to memory
2501 assignMem_FltCode pk addr src = do
2502   (src_reg, src_code) <- getNonClobberedReg src
2503   Amode addr addr_code <- getAmode addr
2504   let
2505         code = src_code `appOL`
2506                addr_code `snocOL`
2507                 IF_ARCH_i386(GST pk src_reg addr,
2508                              MOV pk (OpReg src_reg) (OpAddr addr))
2509   return code
2510
2511 -- Floating point assignment to a register/temporary
2512 assignReg_FltCode pk reg src = do
2513   src_code <- getAnyReg src
2514   return (src_code (getRegisterReg reg))
2515
2516 #endif /* i386_TARGET_ARCH */
2517
2518 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2519
2520 #if sparc_TARGET_ARCH
2521
2522 -- Floating point assignment to memory
2523 assignMem_FltCode pk addr src
2524   = getNewRegNat pk                 `thenNat` \ tmp1 ->
2525     getAmode addr                   `thenNat` \ amode ->
2526     getRegister src                 `thenNat` \ register ->
2527     let
2528         sz      = primRepToSize pk
2529         dst__2  = amodeAddr amode
2530
2531         code1   = amodeCode amode
2532         code2   = registerCode register tmp1
2533
2534         src__2  = registerName register tmp1
2535         pk__2   = registerRep register
2536         sz__2   = primRepToSize pk__2
2537
2538         code__2 = code1 `appOL` code2 `appOL`
2539             if   pk == pk__2
2540             then unitOL (ST sz src__2 dst__2)
2541             else toOL [FxTOy sz__2 sz src__2 tmp1, ST sz tmp1 dst__2]
2542     in
2543     return code__2
2544
2545 -- Floating point assignment to a register/temporary
2546 -- Why is this so bizarrely ugly?
2547 assignReg_FltCode pk reg src
2548   = getRegisterReg reg                      `thenNat` \ register1 ->
2549     getRegister src                         `thenNat` \ register2 ->
2550     let
2551         pk__2   = registerRep register2
2552         sz__2   = primRepToSize pk__2
2553     in
2554     getNewRegNat pk__2                      `thenNat` \ tmp ->
2555     let
2556         sz      = primRepToSize pk
2557         dst__2  = registerName register1 g0    -- must be Fixed
2558         reg__2  = if pk /= pk__2 then tmp else dst__2
2559         code    = registerCode register2 reg__2
2560         src__2  = registerName register2 reg__2
2561         code__2 =
2562                 if pk /= pk__2 then
2563                      code `snocOL` FxTOy sz__2 sz src__2 dst__2
2564                 else if isFixed register2 then
2565                      code `snocOL` FMOV sz src__2 dst__2
2566                 else
2567                      code
2568     in
2569     return code__2
2570
2571 #endif /* sparc_TARGET_ARCH */
2572
2573 #if powerpc_TARGET_ARCH
2574
2575 -- Easy, isn't it?
2576 assignMem_FltCode = assignMem_IntCode
2577 assignReg_FltCode = assignReg_IntCode
2578
2579 #endif /* powerpc_TARGET_ARCH */
2580
2581
2582 -- -----------------------------------------------------------------------------
2583 -- Generating an non-local jump
2584
2585 -- (If applicable) Do not fill the delay slots here; you will confuse the
2586 -- register allocator.
2587
2588 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
2589
2590 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2591
2592 #if alpha_TARGET_ARCH
2593
2594 genJump (CmmLabel lbl)
2595   | isAsmTemp lbl = returnInstr (BR target)
2596   | otherwise     = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0]
2597   where
2598     target = ImmCLbl lbl
2599
2600 genJump tree
2601   = getRegister tree                `thenNat` \ register ->
2602     getNewRegNat PtrRep             `thenNat` \ tmp ->
2603     let
2604         dst    = registerName register pv
2605         code   = registerCode register pv
2606         target = registerName register pv
2607     in
2608     if isFixed register then
2609         returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0]
2610     else
2611     return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0))
2612
2613 #endif /* alpha_TARGET_ARCH */
2614
2615 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2616
2617 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2618
2619 genJump (CmmLoad mem pk) = do
2620   Amode target code <- getAmode mem
2621   return (code `snocOL` JMP (OpAddr target))
2622
2623 genJump (CmmLit lit) = do
2624   return (unitOL (JMP (OpImm (litToImm lit))))
2625
2626 genJump expr = do
2627   (reg,code) <- getSomeReg expr
2628   return (code `snocOL` JMP (OpReg reg))
2629
2630 #endif /* i386_TARGET_ARCH */
2631
2632 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2633
2634 #if sparc_TARGET_ARCH
2635
2636 genJump (CmmLabel lbl)
2637   = return (toOL [CALL (Left target) 0 True, NOP])
2638   where
2639     target = ImmCLbl lbl
2640
2641 genJump tree
2642   = getRegister tree                        `thenNat` \ register ->
2643     getNewRegNat PtrRep             `thenNat` \ tmp ->
2644     let
2645         code   = registerCode register tmp
2646         target = registerName register tmp
2647     in
2648     return (code `snocOL` JMP dsts (AddrRegReg target g0) `snocOL` NOP)
2649
2650 #endif /* sparc_TARGET_ARCH */
2651
2652 #if powerpc_TARGET_ARCH
2653 genJump (CmmLit (CmmLabel lbl))
2654   = return (unitOL $ JMP lbl)
2655
2656 genJump tree
2657   = do
2658         (target,code) <- getSomeReg tree
2659         return (code `snocOL` MTCTR target `snocOL` BCTR [])
2660 #endif /* powerpc_TARGET_ARCH */
2661
2662
2663 -- -----------------------------------------------------------------------------
2664 --  Unconditional branches
2665
2666 genBranch :: BlockId -> NatM InstrBlock
2667
2668 #if alpha_TARGET_ARCH
2669 genBranch id = return (unitOL (BR id))
2670 #endif
2671
2672 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2673 genBranch id = return (unitOL (JXX ALWAYS id))
2674 #endif
2675
2676 #if sparc_TARGET_ARCH
2677 genBranch id = return (toOL [BI ALWAYS False id, NOP])
2678 #endif
2679
2680 #if powerpc_TARGET_ARCH
2681 genBranch id = return (unitOL (BCC ALWAYS id))
2682 #endif
2683
2684
2685 -- -----------------------------------------------------------------------------
2686 --  Conditional jumps
2687
2688 {-
2689 Conditional jumps are always to local labels, so we can use branch
2690 instructions.  We peek at the arguments to decide what kind of
2691 comparison to do.
2692
2693 ALPHA: For comparisons with 0, we're laughing, because we can just do
2694 the desired conditional branch.
2695
2696 I386: First, we have to ensure that the condition
2697 codes are set according to the supplied comparison operation.
2698
2699 SPARC: First, we have to ensure that the condition codes are set
2700 according to the supplied comparison operation.  We generate slightly
2701 different code for floating point comparisons, because a floating
2702 point operation cannot directly precede a @BF@.  We assume the worst
2703 and fill that slot with a @NOP@.
2704
2705 SPARC: Do not fill the delay slots here; you will confuse the register
2706 allocator.
2707 -}
2708
2709
2710 genCondJump
2711     :: BlockId      -- the branch target
2712     -> CmmExpr      -- the condition on which to branch
2713     -> NatM InstrBlock
2714
2715 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2716
2717 #if alpha_TARGET_ARCH
2718
2719 genCondJump id (StPrim op [x, StInt 0])
2720   = getRegister x                           `thenNat` \ register ->
2721     getNewRegNat (registerRep register)
2722                                     `thenNat` \ tmp ->
2723     let
2724         code   = registerCode register tmp
2725         value  = registerName register tmp
2726         pk     = registerRep register
2727         target = ImmCLbl lbl
2728     in
2729     returnSeq code [BI (cmpOp op) value target]
2730   where
2731     cmpOp CharGtOp = GTT
2732     cmpOp CharGeOp = GE
2733     cmpOp CharEqOp = EQQ
2734     cmpOp CharNeOp = NE
2735     cmpOp CharLtOp = LTT
2736     cmpOp CharLeOp = LE
2737     cmpOp IntGtOp = GTT
2738     cmpOp IntGeOp = GE
2739     cmpOp IntEqOp = EQQ
2740     cmpOp IntNeOp = NE
2741     cmpOp IntLtOp = LTT
2742     cmpOp IntLeOp = LE
2743     cmpOp WordGtOp = NE
2744     cmpOp WordGeOp = ALWAYS
2745     cmpOp WordEqOp = EQQ
2746     cmpOp WordNeOp = NE
2747     cmpOp WordLtOp = NEVER
2748     cmpOp WordLeOp = EQQ
2749     cmpOp AddrGtOp = NE
2750     cmpOp AddrGeOp = ALWAYS
2751     cmpOp AddrEqOp = EQQ
2752     cmpOp AddrNeOp = NE
2753     cmpOp AddrLtOp = NEVER
2754     cmpOp AddrLeOp = EQQ
2755
2756 genCondJump lbl (StPrim op [x, StDouble 0.0])
2757   = getRegister x                           `thenNat` \ register ->
2758     getNewRegNat (registerRep register)
2759                                     `thenNat` \ tmp ->
2760     let
2761         code   = registerCode register tmp
2762         value  = registerName register tmp
2763         pk     = registerRep register
2764         target = ImmCLbl lbl
2765     in
2766     return (code . mkSeqInstr (BF (cmpOp op) value target))
2767   where
2768     cmpOp FloatGtOp = GTT
2769     cmpOp FloatGeOp = GE
2770     cmpOp FloatEqOp = EQQ
2771     cmpOp FloatNeOp = NE
2772     cmpOp FloatLtOp = LTT
2773     cmpOp FloatLeOp = LE
2774     cmpOp DoubleGtOp = GTT
2775     cmpOp DoubleGeOp = GE
2776     cmpOp DoubleEqOp = EQQ
2777     cmpOp DoubleNeOp = NE
2778     cmpOp DoubleLtOp = LTT
2779     cmpOp DoubleLeOp = LE
2780
2781 genCondJump lbl (StPrim op [x, y])
2782   | fltCmpOp op
2783   = trivialFCode pr instr x y       `thenNat` \ register ->
2784     getNewRegNat F64                `thenNat` \ tmp ->
2785     let
2786         code   = registerCode register tmp
2787         result = registerName register tmp
2788         target = ImmCLbl lbl
2789     in
2790     return (code . mkSeqInstr (BF cond result target))
2791   where
2792     pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
2793
2794     fltCmpOp op = case op of
2795         FloatGtOp -> True
2796         FloatGeOp -> True
2797         FloatEqOp -> True
2798         FloatNeOp -> True
2799         FloatLtOp -> True
2800         FloatLeOp -> True
2801         DoubleGtOp -> True
2802         DoubleGeOp -> True
2803         DoubleEqOp -> True
2804         DoubleNeOp -> True
2805         DoubleLtOp -> True
2806         DoubleLeOp -> True
2807         _ -> False
2808     (instr, cond) = case op of
2809         FloatGtOp -> (FCMP TF LE, EQQ)
2810         FloatGeOp -> (FCMP TF LTT, EQQ)
2811         FloatEqOp -> (FCMP TF EQQ, NE)
2812         FloatNeOp -> (FCMP TF EQQ, EQQ)
2813         FloatLtOp -> (FCMP TF LTT, NE)
2814         FloatLeOp -> (FCMP TF LE, NE)
2815         DoubleGtOp -> (FCMP TF LE, EQQ)
2816         DoubleGeOp -> (FCMP TF LTT, EQQ)
2817         DoubleEqOp -> (FCMP TF EQQ, NE)
2818         DoubleNeOp -> (FCMP TF EQQ, EQQ)
2819         DoubleLtOp -> (FCMP TF LTT, NE)
2820         DoubleLeOp -> (FCMP TF LE, NE)
2821
2822 genCondJump lbl (StPrim op [x, y])
2823   = trivialCode instr x y           `thenNat` \ register ->
2824     getNewRegNat IntRep             `thenNat` \ tmp ->
2825     let
2826         code   = registerCode register tmp
2827         result = registerName register tmp
2828         target = ImmCLbl lbl
2829     in
2830     return (code . mkSeqInstr (BI cond result target))
2831   where
2832     (instr, cond) = case op of
2833         CharGtOp -> (CMP LE, EQQ)
2834         CharGeOp -> (CMP LTT, EQQ)
2835         CharEqOp -> (CMP EQQ, NE)
2836         CharNeOp -> (CMP EQQ, EQQ)
2837         CharLtOp -> (CMP LTT, NE)
2838         CharLeOp -> (CMP LE, NE)
2839         IntGtOp -> (CMP LE, EQQ)
2840         IntGeOp -> (CMP LTT, EQQ)
2841         IntEqOp -> (CMP EQQ, NE)
2842         IntNeOp -> (CMP EQQ, EQQ)
2843         IntLtOp -> (CMP LTT, NE)
2844         IntLeOp -> (CMP LE, NE)
2845         WordGtOp -> (CMP ULE, EQQ)
2846         WordGeOp -> (CMP ULT, EQQ)
2847         WordEqOp -> (CMP EQQ, NE)
2848         WordNeOp -> (CMP EQQ, EQQ)
2849         WordLtOp -> (CMP ULT, NE)
2850         WordLeOp -> (CMP ULE, NE)
2851         AddrGtOp -> (CMP ULE, EQQ)
2852         AddrGeOp -> (CMP ULT, EQQ)
2853         AddrEqOp -> (CMP EQQ, NE)
2854         AddrNeOp -> (CMP EQQ, EQQ)
2855         AddrLtOp -> (CMP ULT, NE)
2856         AddrLeOp -> (CMP ULE, NE)
2857
2858 #endif /* alpha_TARGET_ARCH */
2859
2860 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2861
2862 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2863
2864 genCondJump id bool = do
2865   CondCode _ cond code <- getCondCode bool
2866   return (code `snocOL` JXX cond id)
2867
2868 #endif /* i386_TARGET_ARCH */
2869
2870
2871 #if sparc_TARGET_ARCH
2872
2873 genCondJump id bool = do
2874   CondCode is_float cond code <- getCondCode bool
2875   return (
2876        code `appOL`
2877        toOL (
2878          if   is_float
2879          then [NOP, BF cond False id, NOP]
2880          else [BI cond False id, NOP]
2881        )
2882     )
2883
2884 #endif /* sparc_TARGET_ARCH */
2885
2886
2887 #if powerpc_TARGET_ARCH
2888
2889 genCondJump id bool = do
2890   CondCode is_float cond code <- getCondCode bool
2891   return (code `snocOL` BCC cond id)
2892
2893 #endif /* powerpc_TARGET_ARCH */
2894
2895
2896 -- -----------------------------------------------------------------------------
2897 --  Generating C calls
2898
2899 -- Now the biggest nightmare---calls.  Most of the nastiness is buried in
2900 -- @get_arg@, which moves the arguments to the correct registers/stack
2901 -- locations.  Apart from that, the code is easy.
2902 --
2903 -- (If applicable) Do not fill the delay slots here; you will confuse the
2904 -- register allocator.
2905
2906 genCCall
2907     :: CmmCallTarget            -- function to call
2908     -> [(CmmReg,MachHint)]      -- where to put the result
2909     -> [(CmmExpr,MachHint)]     -- arguments (of mixed type)
2910     -> Maybe [GlobalReg]        -- volatile regs to save
2911     -> NatM InstrBlock
2912
2913 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2914
2915 #if alpha_TARGET_ARCH
2916
2917 ccallResultRegs =
2918
2919 genCCall fn cconv result_regs args
2920   = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args
2921                           `thenNat` \ ((unused,_), argCode) ->
2922     let
2923         nRegs = length allArgRegs - length unused
2924         code = asmSeqThen (map ($ []) argCode)
2925     in
2926         returnSeq code [
2927             LDA pv (AddrImm (ImmLab (ptext fn))),
2928             JSR ra (AddrReg pv) nRegs,
2929             LDGP gp (AddrReg ra)]
2930   where
2931     ------------------------
2932     {-  Try to get a value into a specific register (or registers) for
2933         a call.  The first 6 arguments go into the appropriate
2934         argument register (separate registers for integer and floating
2935         point arguments, but used in lock-step), and the remaining
2936         arguments are dumped to the stack, beginning at 0(sp).  Our
2937         first argument is a pair of the list of remaining argument
2938         registers to be assigned for this call and the next stack
2939         offset to use for overflowing arguments.  This way,
2940         @get_Arg@ can be applied to all of a call's arguments using
2941         @mapAccumLNat@.
2942     -}
2943     get_arg
2944         :: ([(Reg,Reg)], Int)   -- Argument registers and stack offset (accumulator)
2945         -> StixTree             -- Current argument
2946         -> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code
2947
2948     -- We have to use up all of our argument registers first...
2949
2950     get_arg ((iDst,fDst):dsts, offset) arg
2951       = getRegister arg                     `thenNat` \ register ->
2952         let
2953             reg  = if isFloatingRep pk then fDst else iDst
2954             code = registerCode register reg
2955             src  = registerName register reg
2956             pk   = registerRep register
2957         in
2958         return (
2959             if isFloatingRep pk then
2960                 ((dsts, offset), if isFixed register then
2961                     code . mkSeqInstr (FMOV src fDst)
2962                     else code)
2963             else
2964                 ((dsts, offset), if isFixed register then
2965                     code . mkSeqInstr (OR src (RIReg src) iDst)
2966                     else code))
2967
2968     -- Once we have run out of argument registers, we move to the
2969     -- stack...
2970
2971     get_arg ([], offset) arg
2972       = getRegister arg                 `thenNat` \ register ->
2973         getNewRegNat (registerRep register)
2974                                         `thenNat` \ tmp ->
2975         let
2976             code = registerCode register tmp
2977             src  = registerName register tmp
2978             pk   = registerRep register
2979             sz   = primRepToSize pk
2980         in
2981         return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset)))
2982
2983 #endif /* alpha_TARGET_ARCH */
2984
2985 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2986
2987 #if i386_TARGET_ARCH
2988
2989 -- we only cope with a single result for foreign calls
2990 genCCall (CmmPrim op) [(r,_)] args vols = do
2991   case op of
2992         MO_F32_Sqrt -> actuallyInlineFloatOp F32  (GSQRT F32) args
2993         MO_F64_Sqrt -> actuallyInlineFloatOp F64 (GSQRT F64) args
2994
2995         MO_F32_Sin  -> actuallyInlineFloatOp F32  (GSIN F32) args
2996         MO_F64_Sin  -> actuallyInlineFloatOp F64 (GSIN F64) args
2997
2998         MO_F32_Cos  -> actuallyInlineFloatOp F32  (GCOS F32) args
2999         MO_F64_Cos  -> actuallyInlineFloatOp F64 (GCOS F64) args
3000
3001         MO_F32_Tan  -> actuallyInlineFloatOp F32  (GTAN F32) args
3002         MO_F64_Tan  -> actuallyInlineFloatOp F64 (GTAN F64) args
3003
3004         other_op    -> outOfLineFloatOp op r args vols
3005  where
3006   actuallyInlineFloatOp rep instr [(x,_)]
3007         = do res <- trivialUFCode rep instr x
3008              any <- anyReg res
3009              return (any (getRegisterReg r))
3010
3011 genCCall target dest_regs args vols = do
3012     sizes_n_codes <- mapM push_arg (reverse args)
3013     delta <- getDeltaNat
3014     let
3015         (sizes, push_codes) = unzip sizes_n_codes
3016         tot_arg_size        = sum sizes
3017     -- in
3018     -- deal with static vs dynamic call targets
3019     (callinsns,cconv) <-
3020       case target of
3021         -- CmmPrim -> ...
3022         CmmForeignCall (CmmLit (CmmLabel lbl)) conv
3023            -> -- ToDo: stdcall arg sizes
3024               return (unitOL (CALL (Left fn_imm)), conv)
3025            where fn_imm = ImmCLbl lbl
3026         CmmForeignCall expr conv
3027            -> do (dyn_c, dyn_r, dyn_rep) <- get_op expr
3028                  ASSERT(dyn_rep == I32)
3029                   return (dyn_c `snocOL` CALL (Right dyn_r), conv)
3030
3031     let push_code = concatOL push_codes
3032         call = callinsns `appOL`
3033                toOL (
3034                         -- Deallocate parameters after call for ccall;
3035                         -- but not for stdcall (callee does it)
3036                   (if cconv == StdCallConv || tot_arg_size==0 then [] else
3037                    [ADD I32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
3038                   ++
3039                   [DELTA (delta + tot_arg_size)]
3040                )
3041     -- in
3042     setDeltaNat (delta + tot_arg_size)
3043
3044     let
3045         -- assign the results, if necessary
3046         assign_code []     = nilOL
3047         assign_code [(dest,_hint)] =
3048           case rep of
3049                 I64 -> toOL [MOV I32 (OpReg eax) (OpReg r_dest),
3050                              MOV I32 (OpReg edx) (OpReg r_dest_hi)]
3051                 F32 -> unitOL (GMOV fake0 r_dest)
3052                 F64 -> unitOL (GMOV fake0 r_dest)
3053                 rep -> unitOL (MOV rep (OpReg eax) (OpReg r_dest))
3054           where
3055                 r_dest_hi = getHiVRegFromLo r_dest
3056                 rep = cmmRegRep dest
3057                 r_dest = getRegisterReg dest
3058         assign_code many = panic "genCCall.assign_code many"
3059
3060     return (push_code `appOL`
3061             call `appOL`
3062             assign_code dest_regs)
3063
3064   where
3065     arg_size F64 = 8
3066     arg_size F32 = 4
3067     arg_size _   = 4
3068
3069     push_arg :: (CmmExpr,MachHint){-current argument-}
3070                     -> NatM (Int, InstrBlock)  -- argsz, code
3071
3072     push_arg (arg,_hint) -- we don't need the hints on x86
3073       | arg_rep == I64 = do
3074         ChildCode64 code r_lo <- iselExpr64 arg
3075         delta <- getDeltaNat
3076         setDeltaNat (delta - 8)
3077         let
3078             r_hi = getHiVRegFromLo r_lo
3079         -- in
3080         return (8,     code `appOL`
3081                        toOL [PUSH I32 (OpReg r_hi), DELTA (delta - 4),
3082                              PUSH I32 (OpReg r_lo), DELTA (delta - 8),
3083                              DELTA (delta-8)]
3084             )
3085
3086       | otherwise = do
3087         (code, reg, sz) <- get_op arg
3088         delta <- getDeltaNat
3089         let size = arg_size sz
3090         setDeltaNat (delta-size)
3091         if (case sz of F64 -> True; F32 -> True; _ -> False)
3092            then return (size,
3093                         code `appOL`
3094                         toOL [SUB I32 (OpImm (ImmInt size)) (OpReg esp),
3095                               DELTA (delta-size),
3096                               GST sz reg (AddrBaseIndex (Just esp)
3097                                                         Nothing
3098                                                         (ImmInt 0))]
3099                        )
3100            else return (size,
3101                         code `snocOL`
3102                         PUSH I32 (OpReg reg) `snocOL`
3103                         DELTA (delta-size)
3104                        )
3105       where
3106          arg_rep = cmmExprRep arg
3107
3108     ------------
3109     get_op :: CmmExpr -> NatM (InstrBlock, Reg, MachRep) -- code, reg, size
3110     get_op op = do
3111         (reg,code) <- getSomeReg op
3112         return (code, reg, cmmExprRep op)
3113
3114
3115 outOfLineFloatOp :: CallishMachOp -> CmmReg -> [(CmmExpr,MachHint)]
3116   -> Maybe [GlobalReg] -> NatM InstrBlock
3117 outOfLineFloatOp mop res args vols
3118   | cmmRegRep res == F64
3119   = stmtToInstrs (CmmCall target [(res,FloatHint)] args vols)
3120
3121   | otherwise
3122   = do uq <- getUniqueNat
3123        let
3124          tmp = CmmLocal (LocalReg uq F64)
3125        -- in
3126        code1 <- stmtToInstrs (CmmCall target [(tmp,FloatHint)] (map promote args) vols)
3127        code2 <- stmtToInstrs (CmmAssign res (demote (CmmReg tmp)))
3128        return (code1 `appOL` code2)
3129   where
3130         promote (x,hint) = (CmmMachOp (MO_S_Conv F32 F64) [x], hint)
3131         demote  x = CmmMachOp (MO_S_Conv F64 F32) [x]
3132
3133         target = CmmForeignCall (CmmLit lbl) CCallConv
3134         lbl = CmmLabel (mkForeignLabel fn Nothing False)
3135
3136         fn = case mop of
3137               MO_F32_Exp   -> FSLIT("exp")
3138               MO_F32_Log   -> FSLIT("log")
3139
3140               MO_F32_Asin  -> FSLIT("asin")
3141               MO_F32_Acos  -> FSLIT("acos")
3142               MO_F32_Atan  -> FSLIT("atan")
3143
3144               MO_F32_Sinh  -> FSLIT("sinh")
3145               MO_F32_Cosh  -> FSLIT("cosh")
3146               MO_F32_Tanh  -> FSLIT("tanh")
3147               MO_F32_Pwr   -> FSLIT("pow")
3148
3149               MO_F64_Exp   -> FSLIT("exp")
3150               MO_F64_Log   -> FSLIT("log")
3151
3152               MO_F64_Asin  -> FSLIT("asin")
3153               MO_F64_Acos  -> FSLIT("acos")
3154               MO_F64_Atan  -> FSLIT("atan")
3155
3156               MO_F64_Sinh  -> FSLIT("sinh")
3157               MO_F64_Cosh  -> FSLIT("cosh")
3158               MO_F64_Tanh  -> FSLIT("tanh")
3159               MO_F64_Pwr   -> FSLIT("pow")
3160
3161               other -> pprPanic "outOfLineFloatOp" (pprCallishMachOp mop)
3162
3163 #endif /* i386_TARGET_ARCH */
3164
3165 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3166
3167 #if x86_64_TARGET_ARCH
3168
3169 genCCall (CmmPrim op) [(r,_)] args vols =
3170   panic "genCCall(CmmPrim)(x86_64)"
3171
3172 genCCall target dest_regs args vols = do
3173
3174         -- load up the register arguments
3175     (stack_args, sse_regs, load_args_code)
3176          <- load_args args allArgRegs allFPArgRegs 0 nilOL
3177
3178     let
3179         tot_arg_size = arg_size * length stack_args
3180
3181         -- On entry to the called function, %rsp should be aligned
3182         -- on a 16-byte boundary +8 (i.e. the first stack arg after
3183         -- the return address is 16-byte aligned).  In STG land
3184         -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
3185         -- need to make sure we push a multiple of 16-bytes of args,
3186         -- plus the return address, to get the correct alignment.
3187         -- Urg, this is hard.  We need to feed the delta back into
3188         -- the arg pushing code.
3189     (real_size, adjust_rsp) <-
3190         if tot_arg_size `rem` 16 == 0
3191             then return (tot_arg_size, nilOL)
3192             else do -- we need to adjust...
3193                 delta <- getDeltaNat
3194                 setDeltaNat (delta-8)
3195                 return (tot_arg_size+8, toOL [
3196                                 SUB I64 (OpImm (ImmInt 8)) (OpReg rsp),
3197                                 DELTA (delta-8)
3198                         ])
3199
3200         -- push the stack args, right to left
3201     push_code <- push_args (reverse stack_args) nilOL
3202     delta <- getDeltaNat
3203
3204     -- deal with static vs dynamic call targets
3205     (callinsns,cconv) <-
3206       case target of
3207         -- CmmPrim -> ...
3208         CmmForeignCall (CmmLit (CmmLabel lbl)) conv
3209            -> -- ToDo: stdcall arg sizes
3210               return (unitOL (CALL (Left fn_imm)), conv)
3211            where fn_imm = ImmCLbl lbl
3212         CmmForeignCall expr conv
3213            -> do (dyn_r, dyn_c) <- getSomeReg expr
3214                  return (dyn_c `snocOL` CALL (Right dyn_r), conv)
3215
3216     let
3217         -- The x86_64 ABI requires us to set %al to the number of SSE
3218         -- registers that contain arguments, if the called routine
3219         -- is a varargs function.  We don't know whether it's a
3220         -- varargs function or not, so we have to assume it is.
3221         --
3222         -- It's not safe to omit this assignment, even if the number
3223         -- of SSE regs in use is zero.  If %al is larger than 8
3224         -- on entry to a varargs function, seg faults ensue.
3225         assign_eax n = unitOL (MOV I32 (OpImm (ImmInt n)) (OpReg eax))
3226
3227     let call = callinsns `appOL`
3228                toOL (
3229                         -- Deallocate parameters after call for ccall;
3230                         -- but not for stdcall (callee does it)
3231                   (if cconv == StdCallConv || real_size==0 then [] else
3232                    [ADD wordRep (OpImm (ImmInt real_size)) (OpReg esp)])
3233                   ++
3234                   [DELTA (delta + real_size)]
3235                )
3236     -- in
3237     setDeltaNat (delta + real_size)
3238
3239     let
3240         -- assign the results, if necessary
3241         assign_code []     = nilOL
3242         assign_code [(dest,_hint)] =
3243           case rep of
3244                 F32 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3245                 F64 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3246                 rep -> unitOL (MOV rep (OpReg rax) (OpReg r_dest))
3247           where
3248                 rep = cmmRegRep dest
3249                 r_dest = getRegisterReg dest
3250         assign_code many = panic "genCCall.assign_code many"
3251
3252     return (load_args_code      `appOL`
3253             adjust_rsp          `appOL`
3254             push_code           `appOL`
3255             assign_eax sse_regs `appOL`
3256             call                `appOL`
3257             assign_code dest_regs)
3258
3259   where
3260     arg_size = 8 -- always, at the mo
3261
3262     load_args :: [(CmmExpr,MachHint)]
3263               -> [Reg]                  -- int regs avail for args
3264               -> [Reg]                  -- FP regs avail for args
3265               -> Int -> InstrBlock
3266               -> NatM ([(CmmExpr,MachHint)],Int,InstrBlock)
3267     load_args args [] [] sse_regs code = return (args, sse_regs, code)
3268         -- no more regs to use
3269     load_args [] aregs fregs sse_regs code = return ([],sse_regs,code)
3270         -- no more args to push
3271     load_args ((arg,hint) : rest) aregs fregs sse_regs code
3272         | isFloatingRep arg_rep =
3273         case fregs of
3274           [] -> push_this_arg
3275           (r:rs) -> do
3276              arg_code <- getAnyReg arg
3277              load_args rest aregs rs (sse_regs+1) (code `appOL` arg_code r)
3278         | otherwise =
3279         case aregs of
3280           [] -> push_this_arg
3281           (r:rs) -> do
3282              arg_code <- getAnyReg arg
3283              load_args rest rs fregs sse_regs (code `appOL` arg_code r)
3284         where
3285           arg_rep = cmmExprRep arg
3286
3287           push_this_arg = do
3288             (args',sse',code') <- load_args rest aregs fregs sse_regs code
3289             return ((arg,hint):args', sse', code')
3290
3291     push_args [] code = return code
3292     push_args ((arg,hint):rest) code
3293        | isFloatingRep arg_rep = do
3294          (arg_reg, arg_code) <- getSomeReg arg
3295          delta <- getDeltaNat
3296          setDeltaNat (delta-arg_size)
3297          let code' = code `appOL` toOL [
3298                         MOV arg_rep (OpReg arg_reg) (OpAddr  (spRel 0)),
3299                         SUB wordRep (OpImm (ImmInt arg_size)) (OpReg rsp) ,
3300                         DELTA (delta-arg_size)]
3301          push_args rest code'
3302
3303        | otherwise = do
3304        -- we only ever generate word-sized function arguments.  Promotion
3305        -- has already happened: our Int8# type is kept sign-extended
3306        -- in an Int#, for example.
3307          ASSERT(arg_rep == I64) return ()
3308          (arg_op, arg_code) <- getOperand arg
3309          delta <- getDeltaNat
3310          setDeltaNat (delta-arg_size)
3311          let code' = code `appOL` toOL [PUSH I64 arg_op,
3312                                         DELTA (delta-arg_size)]
3313          push_args rest code'
3314         where
3315           arg_rep = cmmExprRep arg
3316 #endif
3317
3318 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3319
3320 #if sparc_TARGET_ARCH
3321 {-
3322    The SPARC calling convention is an absolute
3323    nightmare.  The first 6x32 bits of arguments are mapped into
3324    %o0 through %o5, and the remaining arguments are dumped to the
3325    stack, beginning at [%sp+92].  (Note that %o6 == %sp.)
3326
3327    If we have to put args on the stack, move %o6==%sp down by
3328    the number of words to go on the stack, to ensure there's enough space.
3329
3330    According to Fraser and Hanson's lcc book, page 478, fig 17.2,
3331    16 words above the stack pointer is a word for the address of
3332    a structure return value.  I use this as a temporary location
3333    for moving values from float to int regs.  Certainly it isn't
3334    safe to put anything in the 16 words starting at %sp, since
3335    this area can get trashed at any time due to window overflows
3336    caused by signal handlers.
3337
3338    A final complication (if the above isn't enough) is that
3339    we can't blithely calculate the arguments one by one into
3340    %o0 .. %o5.  Consider the following nested calls:
3341
3342        fff a (fff b c)
3343
3344    Naive code moves a into %o0, and (fff b c) into %o1.  Unfortunately
3345    the inner call will itself use %o0, which trashes the value put there
3346    in preparation for the outer call.  Upshot: we need to calculate the
3347    args into temporary regs, and move those to arg regs or onto the
3348    stack only immediately prior to the call proper.  Sigh.
3349 -}
3350
3351 genCCall fn cconv kind args
3352   = mapNat arg_to_int_vregs args `thenNat` \ argcode_and_vregs ->
3353     let
3354         (argcodes, vregss) = unzip argcode_and_vregs
3355         n_argRegs          = length allArgRegs
3356         n_argRegs_used     = min (length vregs) n_argRegs
3357         vregs              = concat vregss
3358     in
3359     -- deal with static vs dynamic call targets
3360     (case fn of
3361         Left t_static
3362            -> return (unitOL (CALL (Left fn__2) n_argRegs_used False))
3363         Right dyn
3364            -> arg_to_int_vregs dyn `thenNat` \ (dyn_c, [dyn_r]) ->
3365               return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3366     )
3367                                 `thenNat` \ callinsns ->
3368     let
3369         argcode = concatOL argcodes
3370         (move_sp_down, move_sp_up)
3371            = let diff = length vregs - n_argRegs
3372                  nn   = if odd diff then diff + 1 else diff -- keep 8-byte alignment
3373              in  if   nn <= 0
3374                  then (nilOL, nilOL)
3375                  else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
3376         transfer_code
3377            = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
3378     in
3379         return (argcode       `appOL`
3380                    move_sp_down  `appOL`
3381                    transfer_code `appOL`
3382                    callinsns     `appOL`
3383                    unitOL NOP    `appOL`
3384                    move_sp_up)
3385   where
3386      -- function names that begin with '.' are assumed to be special
3387      -- internally generated names like '.mul,' which don't get an
3388      -- underscore prefix
3389      -- ToDo:needed (WDP 96/03) ???
3390      fn_static = unLeft fn
3391      fn__2 = case (headFS fn_static) of
3392                 '.' -> ImmLit (ftext fn_static)
3393                 _   -> ImmCLbl (mkForeignLabel fn_static False)
3394
3395      -- move args from the integer vregs into which they have been
3396      -- marshalled, into %o0 .. %o5, and the rest onto the stack.
3397      move_final :: [Reg] -> [Reg] -> Int -> [Instr]
3398
3399      move_final [] _ offset          -- all args done
3400         = []
3401
3402      move_final (v:vs) [] offset     -- out of aregs; move to stack
3403         = ST W v (spRel offset)
3404           : move_final vs [] (offset+1)
3405
3406      move_final (v:vs) (a:az) offset -- move into an arg (%o[0..5]) reg
3407         = OR False g0 (RIReg v) a
3408           : move_final vs az offset
3409
3410      -- generate code to calculate an argument, and move it into one
3411      -- or two integer vregs.
3412      arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
3413      arg_to_int_vregs arg
3414         | is64BitRep (repOfCmmExpr arg)
3415         = iselExpr64 arg                `thenNat` \ (ChildCode64 code vr_lo) ->
3416           let r_lo = VirtualRegI vr_lo
3417               r_hi = getHiVRegFromLo r_lo
3418           in  return (code, [r_hi, r_lo])
3419         | otherwise
3420         = getRegister arg                     `thenNat` \ register ->
3421           getNewRegNat (registerRep register) `thenNat` \ tmp ->
3422           let code = registerCode register tmp
3423               src  = registerName register tmp
3424               pk   = registerRep register
3425           in
3426           -- the value is in src.  Get it into 1 or 2 int vregs.
3427           case pk of
3428              F64 ->
3429                 getNewRegNat WordRep  `thenNat` \ v1 ->
3430                 getNewRegNat WordRep  `thenNat` \ v2 ->
3431                 return (
3432                    code                          `snocOL`
3433                    FMOV DF src f0                `snocOL`
3434                    ST   F  f0 (spRel 16)         `snocOL`
3435                    LD   W  (spRel 16) v1         `snocOL`
3436                    ST   F  (fPair f0) (spRel 16) `snocOL`
3437                    LD   W  (spRel 16) v2
3438                    ,
3439                    [v1,v2]
3440                 )
3441              F32 ->
3442                 getNewRegNat WordRep  `thenNat` \ v1 ->
3443                 return (
3444                    code                    `snocOL`
3445                    ST   F  src (spRel 16)  `snocOL`
3446                    LD   W  (spRel 16) v1
3447                    ,
3448                    [v1]
3449                 )
3450              other ->
3451                 getNewRegNat WordRep  `thenNat` \ v1 ->
3452                 return (
3453                    code `snocOL` OR False g0 (RIReg src) v1
3454                    ,
3455                    [v1]
3456                 )
3457 #endif /* sparc_TARGET_ARCH */
3458
3459 #if powerpc_TARGET_ARCH
3460
3461 #if darwin_TARGET_OS || linux_TARGET_OS
3462 {-
3463     The PowerPC calling convention for Darwin/Mac OS X
3464     is described in Apple's document
3465     "Inside Mac OS X - Mach-O Runtime Architecture".
3466
3467     PowerPC Linux uses the System V Release 4 Calling Convention
3468     for PowerPC. It is described in the
3469     "System V Application Binary Interface PowerPC Processor Supplement".
3470
3471     Both conventions are similar:
3472     Parameters may be passed in general-purpose registers starting at r3, in
3473     floating point registers starting at f1, or on the stack.
3474
3475     But there are substantial differences:
3476     * The number of registers used for parameter passing and the exact set of
3477       nonvolatile registers differs (see MachRegs.lhs).
3478     * On Darwin, stack space is always reserved for parameters, even if they are
3479       passed in registers. The called routine may choose to save parameters from
3480       registers to the corresponding space on the stack.
3481     * On Darwin, a corresponding amount of GPRs is skipped when a floating point
3482       parameter is passed in an FPR.
3483     * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
3484       starting with an odd-numbered GPR. It may skip a GPR to achieve this.
3485       Darwin just treats an I64 like two separate I32s (high word first).
3486     * I64 and F64 arguments are 8-byte aligned on the stack for SysV, but only
3487       4-byte aligned like everything else on Darwin.
3488     * The SysV spec claims that F32 is represented as F64 on the stack. GCC on
3489       PowerPC Linux does not agree, so neither do we.
3490
3491     According to both conventions, The parameter area should be part of the
3492     caller's stack frame, allocated in the caller's prologue code (large enough
3493     to hold the parameter lists for all called routines). The NCG already
3494     uses the stack for register spilling, leaving 64 bytes free at the top.
3495     If we need a larger parameter area than that, we just allocate a new stack
3496     frame just before ccalling.
3497 -}
3498
3499 genCCall target dest_regs argsAndHints vols
3500   = ASSERT (not $ any (`elem` [I8,I16]) argReps)
3501         -- we rely on argument promotion in the codeGen
3502     do
3503         (finalStack,passArgumentsCode,usedRegs) <- passArguments
3504                                                         (zip args argReps)
3505                                                         allArgRegs allFPArgRegs
3506                                                         initialStackOffset
3507                                                         (toOL []) []
3508
3509         (labelOrExpr, reduceToF32) <- case target of
3510             CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
3511             CmmForeignCall expr conv -> return  (Right expr, False)
3512             CmmPrim mop -> outOfLineFloatOp mop
3513
3514         let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
3515             codeAfter = move_sp_up finalStack `appOL` moveResult reduceToF32
3516
3517         case labelOrExpr of
3518             Left lbl -> do
3519                 return (         codeBefore
3520                         `snocOL` BL lbl usedRegs
3521                         `appOL`  codeAfter)
3522             Right dyn -> do
3523                 (dynReg, dynCode) <- getSomeReg dyn
3524                 return (         dynCode
3525                         `snocOL` MTCTR dynReg
3526                         `appOL`  codeBefore
3527                         `snocOL` BCTRL usedRegs
3528                         `appOL`  codeAfter)
3529     where
3530 #if darwin_TARGET_OS
3531         initialStackOffset = 24
3532             -- size of linkage area + size of arguments, in bytes
3533         stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
3534                                        map machRepByteWidth argReps
3535 #elif linux_TARGET_OS
3536         initialStackOffset = 8
3537         stackDelta finalStack = roundTo 16 finalStack
3538 #endif
3539         args = map fst argsAndHints
3540         argReps = map cmmExprRep args
3541
3542         roundTo a x | x `mod` a == 0 = x
3543                     | otherwise = x + a - (x `mod` a)
3544
3545         move_sp_down finalStack
3546                | delta > 64 =
3547                         toOL [STU I32 sp (AddrRegImm sp (ImmInt (-delta))),
3548                               DELTA (-delta)]
3549                | otherwise = nilOL
3550                where delta = stackDelta finalStack
3551         move_sp_up finalStack
3552                | delta > 64 =
3553                         toOL [ADD sp sp (RIImm (ImmInt delta)),
3554                               DELTA 0]
3555                | otherwise = nilOL
3556                where delta = stackDelta finalStack
3557
3558
3559         passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
3560         passArguments ((arg,I64):args) gprs fprs stackOffset
3561                accumCode accumUsed =
3562             do
3563                 ChildCode64 code vr_lo <- iselExpr64 arg
3564                 let vr_hi = getHiVRegFromLo vr_lo
3565
3566 #if darwin_TARGET_OS
3567                 passArguments args
3568                               (drop 2 gprs)
3569                               fprs
3570                               (stackOffset+8)
3571                               (accumCode `appOL` code
3572                                     `snocOL` storeWord vr_hi gprs stackOffset
3573                                     `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
3574                               ((take 2 gprs) ++ accumUsed)
3575             where
3576                 storeWord vr (gpr:_) offset = MR gpr vr
3577                 storeWord vr [] offset = ST I32 vr (AddrRegImm sp (ImmInt offset))
3578
3579 #elif linux_TARGET_OS
3580                 let stackOffset' = roundTo 8 stackOffset
3581                     stackCode = accumCode `appOL` code
3582                         `snocOL` ST I32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
3583                         `snocOL` ST I32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
3584                     regCode hireg loreg =
3585                         accumCode `appOL` code
3586                             `snocOL` MR hireg vr_hi
3587                             `snocOL` MR loreg vr_lo
3588
3589                 case gprs of
3590                     hireg : loreg : regs | even (length gprs) ->
3591                         passArguments args regs fprs stackOffset
3592                                       (regCode hireg loreg) (hireg : loreg : accumUsed)
3593                     _skipped : hireg : loreg : regs ->
3594                         passArguments args regs fprs stackOffset
3595                                       (regCode hireg loreg) (hireg : loreg : accumUsed)
3596                     _ -> -- only one or no regs left
3597                         passArguments args [] fprs (stackOffset'+8)
3598                                       stackCode accumUsed
3599 #endif
3600
3601         passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
3602             | reg : _ <- regs = do
3603                 register <- getRegister arg
3604                 let code = case register of
3605                             Fixed _ freg fcode -> fcode `snocOL` MR reg freg
3606                             Any _ acode -> acode reg
3607                 passArguments args
3608                               (drop nGprs gprs)
3609                               (drop nFprs fprs)
3610 #if darwin_TARGET_OS
3611         -- The Darwin ABI requires that we reserve stack slots for register parameters
3612                               (stackOffset + stackBytes)
3613 #elif linux_TARGET_OS
3614         -- ... the SysV ABI doesn't.
3615                               stackOffset
3616 #endif
3617                               (accumCode `appOL` code)
3618                               (reg : accumUsed)
3619             | otherwise = do
3620                 (vr, code) <- getSomeReg arg
3621                 passArguments args
3622                               (drop nGprs gprs)
3623                               (drop nFprs fprs)
3624                               (stackOffset' + stackBytes)
3625                               (accumCode `appOL` code `snocOL` ST rep vr stackSlot)
3626                               accumUsed
3627             where
3628 #if darwin_TARGET_OS
3629         -- stackOffset is at least 4-byte aligned
3630         -- The Darwin ABI is happy with that.
3631                 stackOffset' = stackOffset
3632 #else
3633         -- ... the SysV ABI requires 8-byte alignment for doubles.
3634                 stackOffset' | rep == F64 = roundTo 8 stackOffset
3635                              | otherwise  =           stackOffset
3636 #endif
3637                 stackSlot = AddrRegImm sp (ImmInt stackOffset')
3638                 (nGprs, nFprs, stackBytes, regs) = case rep of
3639                     I32 -> (1, 0, 4, gprs)
3640 #if darwin_TARGET_OS
3641         -- The Darwin ABI requires that we skip a corresponding number of GPRs when
3642         -- we use the FPRs.
3643                     F32 -> (1, 1, 4, fprs)
3644                     F64 -> (2, 1, 8, fprs)
3645 #elif linux_TARGET_OS
3646         -- ... the SysV ABI doesn't.
3647                     F32 -> (0, 1, 4, fprs)
3648                     F64 -> (0, 1, 8, fprs)
3649 #endif
3650
3651         moveResult reduceToF32 =
3652             case dest_regs of
3653                 [] -> nilOL
3654                 [(dest, _hint)]
3655                     | reduceToF32 && rep == F32 -> unitOL (FRSP r_dest f1)
3656                     | rep == F32 || rep == F64 -> unitOL (MR r_dest f1)
3657                     | rep == I64 -> toOL [MR (getHiVRegFromLo r_dest) r3,
3658                                           MR r_dest r4]
3659                     | otherwise -> unitOL (MR r_dest r3)
3660                     where rep = cmmRegRep dest
3661                           r_dest = getRegisterReg dest
3662
3663         outOfLineFloatOp mop =
3664             do
3665                 mopExpr <- cmmMakeDynamicReference addImportNat True $
3666                               mkForeignLabel functionName Nothing True
3667                 let mopLabelOrExpr = case mopExpr of
3668                         CmmLit (CmmLabel lbl) -> Left lbl
3669                         _ -> Right mopExpr
3670                 return (mopLabelOrExpr, reduce)
3671             where
3672                 (functionName, reduce) = case mop of
3673                     MO_F32_Exp   -> (FSLIT("exp"), True)
3674                     MO_F32_Log   -> (FSLIT("log"), True)
3675                     MO_F32_Sqrt  -> (FSLIT("sqrt"), True)
3676
3677                     MO_F32_Sin   -> (FSLIT("sin"), True)
3678                     MO_F32_Cos   -> (FSLIT("cos"), True)
3679                     MO_F32_Tan   -> (FSLIT("tan"), True)
3680
3681                     MO_F32_Asin  -> (FSLIT("asin"), True)
3682                     MO_F32_Acos  -> (FSLIT("acos"), True)
3683                     MO_F32_Atan  -> (FSLIT("atan"), True)
3684
3685                     MO_F32_Sinh  -> (FSLIT("sinh"), True)
3686                     MO_F32_Cosh  -> (FSLIT("cosh"), True)
3687                     MO_F32_Tanh  -> (FSLIT("tanh"), True)
3688                     MO_F32_Pwr   -> (FSLIT("pow"), True)
3689
3690                     MO_F64_Exp   -> (FSLIT("exp"), False)
3691                     MO_F64_Log   -> (FSLIT("log"), False)
3692                     MO_F64_Sqrt  -> (FSLIT("sqrt"), False)
3693
3694                     MO_F64_Sin   -> (FSLIT("sin"), False)
3695                     MO_F64_Cos   -> (FSLIT("cos"), False)
3696                     MO_F64_Tan   -> (FSLIT("tan"), False)
3697
3698                     MO_F64_Asin  -> (FSLIT("asin"), False)
3699                     MO_F64_Acos  -> (FSLIT("acos"), False)
3700                     MO_F64_Atan  -> (FSLIT("atan"), False)
3701
3702                     MO_F64_Sinh  -> (FSLIT("sinh"), False)
3703                     MO_F64_Cosh  -> (FSLIT("cosh"), False)
3704                     MO_F64_Tanh  -> (FSLIT("tanh"), False)
3705                     MO_F64_Pwr   -> (FSLIT("pow"), False)
3706                     other -> pprPanic "genCCall(ppc): unknown callish op"
3707                                     (pprCallishMachOp other)
3708
3709 #endif /* darwin_TARGET_OS || linux_TARGET_OS */
3710
3711 #endif /* powerpc_TARGET_ARCH */
3712
3713
3714 -- -----------------------------------------------------------------------------
3715 -- Generating a table-branch
3716
3717 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
3718
3719 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3720 genSwitch expr ids = do
3721   (reg,e_code) <- getSomeReg expr
3722   lbl <- getNewLabelNat
3723   let
3724         jumpTable = map jumpTableEntry ids
3725         op = OpAddr (AddrBaseIndex Nothing (Just (reg,wORD_SIZE)) (ImmCLbl lbl))
3726         code = e_code `appOL` toOL [
3727                 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3728                 JMP_TBL op [ id | Just id <- ids ]
3729              ]
3730   -- in
3731   return code
3732 #elif powerpc_TARGET_ARCH
3733 genSwitch expr ids
3734   | opt_PIC
3735   = do
3736         (reg,e_code) <- getSomeReg expr
3737         tmp <- getNewRegNat I32
3738         lbl <- getNewLabelNat
3739         dynRef <- cmmMakeDynamicReference addImportNat False lbl
3740         (tableReg,t_code) <- getSomeReg $ dynRef
3741         let
3742             jumpTable = map jumpTableEntryRel ids
3743
3744             jumpTableEntryRel Nothing
3745                 = CmmStaticLit (CmmInt 0 wordRep)
3746             jumpTableEntryRel (Just (BlockId id))
3747                 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3748                 where blockLabel = mkAsmTempLabel id
3749
3750             code = e_code `appOL` t_code `appOL` toOL [
3751                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3752                             SLW tmp reg (RIImm (ImmInt 2)),
3753                             LD I32 tmp (AddrRegReg tableReg tmp),
3754                             ADD tmp tmp (RIReg tableReg),
3755                             MTCTR tmp,
3756                             BCTR [ id | Just id <- ids ]
3757                     ]
3758         return code
3759   | otherwise
3760   = do
3761         (reg,e_code) <- getSomeReg expr
3762         tmp <- getNewRegNat I32
3763         lbl <- getNewLabelNat
3764         let
3765             jumpTable = map jumpTableEntry ids
3766
3767             code = e_code `appOL` toOL [
3768                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3769                             SLW tmp reg (RIImm (ImmInt 2)),
3770                             ADDIS tmp tmp (HA (ImmCLbl lbl)),
3771                             LD I32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
3772                             MTCTR tmp,
3773                             BCTR [ id | Just id <- ids ]
3774                     ]
3775         return code
3776 #else
3777 genSwitch expr ids = panic "ToDo: genSwitch"
3778 #endif
3779
3780 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordRep)
3781 jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
3782     where blockLabel = mkAsmTempLabel id
3783
3784 -- -----------------------------------------------------------------------------
3785 -- Support bits
3786 -- -----------------------------------------------------------------------------
3787
3788
3789 -- -----------------------------------------------------------------------------
3790 -- 'condIntReg' and 'condFltReg': condition codes into registers
3791
3792 -- Turn those condition codes into integers now (when they appear on
3793 -- the right hand side of an assignment).
3794 --
3795 -- (If applicable) Do not fill the delay slots here; you will confuse the
3796 -- register allocator.
3797
3798 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
3799
3800 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3801
3802 #if alpha_TARGET_ARCH
3803 condIntReg = panic "MachCode.condIntReg (not on Alpha)"
3804 condFltReg = panic "MachCode.condFltReg (not on Alpha)"
3805 #endif /* alpha_TARGET_ARCH */
3806
3807 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3808
3809 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3810
3811 condIntReg cond x y = do
3812   CondCode _ cond cond_code <- condIntCode cond x y
3813   tmp <- getNewRegNat I8
3814   let
3815         code dst = cond_code `appOL` toOL [
3816                     SETCC cond (OpReg tmp),
3817                     MOV I32 (OpReg tmp) (OpReg dst),
3818                     AND I32 (OpImm (ImmInt 1)) (OpReg dst)
3819                   ]
3820                    -- NB. (1) Tha AND is needed here because the x86 only
3821                    -- sets the low byte in the SETCC instruction.
3822                    -- NB. (2) The extra temporary register is a hack to
3823                    -- work around the fact that the setcc instructions only
3824                    -- accept byte registers.  dst might not be a byte-able reg,
3825                    -- but currently all free registers are byte-able, so we're
3826                    -- guaranteed that a new temporary is byte-able.
3827   -- in
3828   return (Any I32 code)
3829
3830
3831 condFltReg cond x y = do
3832   lbl1 <- getBlockIdNat
3833   lbl2 <- getBlockIdNat
3834   CondCode _ cond cond_code <- condFltCode cond x y
3835   let
3836         code dst = cond_code `appOL` toOL [
3837             JXX cond lbl1,
3838             MOV I32 (OpImm (ImmInt 0)) (OpReg dst),
3839             JXX ALWAYS lbl2,
3840             NEWBLOCK lbl1,
3841             MOV I32 (OpImm (ImmInt 1)) (OpReg dst),
3842             JXX ALWAYS lbl2,
3843             NEWBLOCK lbl2]
3844                 -- SIGH, have to split up this block somehow...
3845   -- in
3846   return (Any I32 code)
3847
3848 #endif /* i386_TARGET_ARCH */
3849
3850 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3851
3852 #if sparc_TARGET_ARCH
3853
3854 condIntReg EQQ x (StInt 0)
3855   = getRegister x               `thenNat` \ register ->
3856     getNewRegNat IntRep         `thenNat` \ tmp ->
3857     let
3858         code = registerCode register tmp
3859         src  = registerName register tmp
3860         code__2 dst = code `appOL` toOL [
3861             SUB False True g0 (RIReg src) g0,
3862             SUB True False g0 (RIImm (ImmInt (-1))) dst]
3863     in
3864     return (Any IntRep code__2)
3865
3866 condIntReg EQQ x y
3867   = getRegister x               `thenNat` \ register1 ->
3868     getRegister y               `thenNat` \ register2 ->
3869     getNewRegNat IntRep         `thenNat` \ tmp1 ->
3870     getNewRegNat IntRep         `thenNat` \ tmp2 ->
3871     let
3872         code1 = registerCode register1 tmp1
3873         src1  = registerName register1 tmp1
3874         code2 = registerCode register2 tmp2
3875         src2  = registerName register2 tmp2
3876         code__2 dst = code1 `appOL` code2 `appOL` toOL [
3877             XOR False src1 (RIReg src2) dst,
3878             SUB False True g0 (RIReg dst) g0,
3879             SUB True False g0 (RIImm (ImmInt (-1))) dst]
3880     in
3881     return (Any IntRep code__2)
3882
3883 condIntReg NE x (StInt 0)
3884   = getRegister x               `thenNat` \ register ->
3885     getNewRegNat IntRep         `thenNat` \ tmp ->
3886     let
3887         code = registerCode register tmp
3888         src  = registerName register tmp
3889         code__2 dst = code `appOL` toOL [
3890             SUB False True g0 (RIReg src) g0,
3891             ADD True False g0 (RIImm (ImmInt 0)) dst]
3892     in
3893     return (Any IntRep code__2)
3894
3895 condIntReg NE x y
3896   = getRegister x               `thenNat` \ register1 ->
3897     getRegister y               `thenNat` \ register2 ->
3898     getNewRegNat IntRep         `thenNat` \ tmp1 ->
3899     getNewRegNat IntRep         `thenNat` \ tmp2 ->
3900     let
3901         code1 = registerCode register1 tmp1
3902         src1  = registerName register1 tmp1
3903         code2 = registerCode register2 tmp2
3904         src2  = registerName register2 tmp2
3905         code__2 dst = code1 `appOL` code2 `appOL` toOL [
3906             XOR False src1 (RIReg src2) dst,
3907             SUB False True g0 (RIReg dst) g0,
3908             ADD True False g0 (RIImm (ImmInt 0)) dst]
3909     in
3910     return (Any IntRep code__2)
3911
3912 condIntReg cond x y
3913   = getBlockIdNat               `thenNat` \ lbl1 ->
3914     getBlockIdNat               `thenNat` \ lbl2 ->
3915     condIntCode cond x y        `thenNat` \ condition ->
3916     let
3917         code = condCode condition
3918         cond = condName condition
3919         code__2 dst = code `appOL` toOL [
3920             BI cond False (ImmCLbl lbl1), NOP,
3921             OR False g0 (RIImm (ImmInt 0)) dst,
3922             BI ALWAYS False (ImmCLbl lbl2), NOP,
3923             NEWBLOCK lbl1,
3924             OR False g0 (RIImm (ImmInt 1)) dst,
3925             NEWBLOCK lbl2]
3926     in
3927     return (Any IntRep code__2)
3928
3929 condFltReg cond x y
3930   = getBlockIdNat               `thenNat` \ lbl1 ->
3931     getBlockIdNat               `thenNat` \ lbl2 ->
3932     condFltCode cond x y        `thenNat` \ condition ->
3933     let
3934         code = condCode condition
3935         cond = condName condition
3936         code__2 dst = code `appOL` toOL [
3937             NOP,
3938             BF cond False (ImmCLbl lbl1), NOP,
3939             OR False g0 (RIImm (ImmInt 0)) dst,
3940             BI ALWAYS False (ImmCLbl lbl2), NOP,
3941             NEWBLOCK lbl1,
3942             OR False g0 (RIImm (ImmInt 1)) dst,
3943             NEWBLOCK lbl2]
3944     in
3945     return (Any IntRep code__2)
3946
3947 #endif /* sparc_TARGET_ARCH */
3948
3949 #if powerpc_TARGET_ARCH
3950 condReg getCond = do
3951     lbl1 <- getBlockIdNat
3952     lbl2 <- getBlockIdNat
3953     CondCode _ cond cond_code <- getCond
3954     let
3955 {-        code dst = cond_code `appOL` toOL [
3956                 BCC cond lbl1,
3957                 LI dst (ImmInt 0),
3958                 BCC ALWAYS lbl2,
3959                 NEWBLOCK lbl1,
3960                 LI dst (ImmInt 1),
3961                 BCC ALWAYS lbl2,
3962                 NEWBLOCK lbl2
3963             ]-}
3964         code dst = cond_code
3965             `appOL` negate_code
3966             `appOL` toOL [
3967                 MFCR dst,
3968                 RLWINM dst dst (bit + 1) 31 31
3969             ]
3970
3971         negate_code | do_negate = unitOL (CRNOR bit bit bit)
3972                     | otherwise = nilOL
3973
3974         (bit, do_negate) = case cond of
3975             LTT -> (0, False)
3976             LE  -> (1, True)
3977             EQQ -> (2, False)
3978             GE  -> (0, True)
3979             GTT -> (1, False)
3980
3981             NE  -> (2, True)
3982
3983             LU  -> (0, False)
3984             LEU -> (1, True)
3985             GEU -> (0, True)
3986             GU  -> (1, False)
3987
3988     return (Any I32 code)
3989
3990 condIntReg cond x y = condReg (condIntCode cond x y)
3991 condFltReg cond x y = condReg (condFltCode cond x y)
3992 #endif /* powerpc_TARGET_ARCH */
3993
3994
3995 -- -----------------------------------------------------------------------------
3996 -- 'trivial*Code': deal with trivial instructions
3997
3998 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
3999 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
4000 -- Only look for constants on the right hand side, because that's
4001 -- where the generic optimizer will have put them.
4002
4003 -- Similarly, for unary instructions, we don't have to worry about
4004 -- matching an StInt as the argument, because genericOpt will already
4005 -- have handled the constant-folding.
4006
4007 trivialCode
4008     :: MachRep
4009     -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
4010       ,IF_ARCH_i386 ((Operand -> Operand -> Instr)
4011                      -> Maybe (Operand -> Operand -> Instr)
4012       ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr)
4013                      -> Maybe (Operand -> Operand -> Instr)
4014       ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
4015       ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
4016       ,)))))
4017     -> CmmExpr -> CmmExpr -- the two arguments
4018     -> NatM Register
4019
4020 #ifndef powerpc_TARGET_ARCH
4021 trivialFCode
4022     :: MachRep
4023     -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
4024       ,IF_ARCH_sparc((Size -> Reg -> Reg -> Reg -> Instr)
4025       ,IF_ARCH_i386 ((MachRep -> Reg -> Reg -> Reg -> Instr)
4026       ,IF_ARCH_x86_64 ((MachRep -> Operand -> Operand -> Instr)
4027       ,))))
4028     -> CmmExpr -> CmmExpr -- the two arguments
4029     -> NatM Register
4030 #endif
4031
4032 trivialUCode
4033     :: MachRep
4034     -> IF_ARCH_alpha((RI -> Reg -> Instr)
4035       ,IF_ARCH_i386 ((Operand -> Instr)
4036       ,IF_ARCH_x86_64 ((Operand -> Instr)
4037       ,IF_ARCH_sparc((RI -> Reg -> Instr)
4038       ,IF_ARCH_powerpc((Reg -> Reg -> Instr)
4039       ,)))))
4040     -> CmmExpr  -- the one argument
4041     -> NatM Register
4042
4043 #ifndef powerpc_TARGET_ARCH
4044 trivialUFCode
4045     :: MachRep
4046     -> IF_ARCH_alpha((Reg -> Reg -> Instr)
4047       ,IF_ARCH_i386 ((Reg -> Reg -> Instr)
4048       ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr)
4049       ,IF_ARCH_sparc((Reg -> Reg -> Instr)
4050       ,))))
4051     -> CmmExpr -- the one argument
4052     -> NatM Register
4053 #endif
4054
4055 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4056
4057 #if alpha_TARGET_ARCH
4058
4059 trivialCode instr x (StInt y)
4060   | fits8Bits y
4061   = getRegister x               `thenNat` \ register ->
4062     getNewRegNat IntRep         `thenNat` \ tmp ->
4063     let
4064         code = registerCode register tmp
4065         src1 = registerName register tmp
4066         src2 = ImmInt (fromInteger y)
4067         code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst)
4068     in
4069     return (Any IntRep code__2)
4070
4071 trivialCode instr x y
4072   = getRegister x               `thenNat` \ register1 ->
4073     getRegister y               `thenNat` \ register2 ->
4074     getNewRegNat IntRep         `thenNat` \ tmp1 ->
4075     getNewRegNat IntRep         `thenNat` \ tmp2 ->
4076     let
4077         code1 = registerCode register1 tmp1 []
4078         src1  = registerName register1 tmp1
4079         code2 = registerCode register2 tmp2 []
4080         src2  = registerName register2 tmp2
4081         code__2 dst = asmSeqThen [code1, code2] .
4082                      mkSeqInstr (instr src1 (RIReg src2) dst)
4083     in
4084     return (Any IntRep code__2)
4085
4086 ------------
4087 trivialUCode instr x
4088   = getRegister x               `thenNat` \ register ->
4089     getNewRegNat IntRep         `thenNat` \ tmp ->
4090     let
4091         code = registerCode register tmp
4092         src  = registerName register tmp
4093         code__2 dst = code . mkSeqInstr (instr (RIReg src) dst)
4094     in
4095     return (Any IntRep code__2)
4096
4097 ------------
4098 trivialFCode _ instr x y
4099   = getRegister x               `thenNat` \ register1 ->
4100     getRegister y               `thenNat` \ register2 ->
4101     getNewRegNat F64    `thenNat` \ tmp1 ->
4102     getNewRegNat F64    `thenNat` \ tmp2 ->
4103     let
4104         code1 = registerCode register1 tmp1
4105         src1  = registerName register1 tmp1
4106
4107         code2 = registerCode register2 tmp2
4108         src2  = registerName register2 tmp2
4109
4110         code__2 dst = asmSeqThen [code1 [], code2 []] .
4111                       mkSeqInstr (instr src1 src2 dst)
4112     in
4113     return (Any F64 code__2)
4114
4115 trivialUFCode _ instr x
4116   = getRegister x               `thenNat` \ register ->
4117     getNewRegNat F64    `thenNat` \ tmp ->
4118     let
4119         code = registerCode register tmp
4120         src  = registerName register tmp
4121         code__2 dst = code . mkSeqInstr (instr src dst)
4122     in
4123     return (Any F64 code__2)
4124
4125 #endif /* alpha_TARGET_ARCH */
4126
4127 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4128
4129 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4130
4131 {-
4132 The Rules of the Game are:
4133
4134 * You cannot assume anything about the destination register dst;
4135   it may be anything, including a fixed reg.
4136
4137 * You may compute an operand into a fixed reg, but you may not
4138   subsequently change the contents of that fixed reg.  If you
4139   want to do so, first copy the value either to a temporary
4140   or into dst.  You are free to modify dst even if it happens
4141   to be a fixed reg -- that's not your problem.
4142
4143 * You cannot assume that a fixed reg will stay live over an
4144   arbitrary computation.  The same applies to the dst reg.
4145
4146 * Temporary regs obtained from getNewRegNat are distinct from
4147   each other and from all other regs, and stay live over
4148   arbitrary computations.
4149
4150 --------------------
4151
4152 SDM's version of The Rules:
4153
4154 * If getRegister returns Any, that means it can generate correct
4155   code which places the result in any register, period.  Even if that
4156   register happens to be read during the computation.
4157
4158   Corollary #1: this means that if you are generating code for an
4159   operation with two arbitrary operands, you cannot assign the result
4160   of the first operand into the destination register before computing
4161   the second operand.  The second operand might require the old value
4162   of the destination register.
4163
4164   Corollary #2: A function might be able to generate more efficient
4165   code if it knows the destination register is a new temporary (and
4166   therefore not read by any of the sub-computations).
4167
4168 * If getRegister returns Any, then the code it generates may modify only:
4169         (a) fresh temporaries
4170         (b) the destination register
4171         (c) known registers (eg. %ecx is used by shifts)
4172   In particular, it may *not* modify global registers, unless the global
4173   register happens to be the destination register.
4174 -}
4175
4176 trivialCode rep instr (Just revinstr) (CmmLit lit_a) b
4177   | not (is64BitLit lit_a) = do
4178   b_code <- getAnyReg b
4179   let
4180        code dst
4181          = b_code dst `snocOL`
4182            revinstr (OpImm (litToImm lit_a)) (OpReg dst)
4183   -- in
4184   return (Any rep code)
4185
4186 trivialCode rep instr maybe_revinstr a b = do
4187   (b_op, b_code) <- getNonClobberedOperand b
4188   a_code <- getAnyReg a
4189   tmp <- getNewRegNat rep
4190   let
4191      -- We want the value of b to stay alive across the computation of a.
4192      -- But, we want to calculate a straight into the destination register,
4193      -- because the instruction only has two operands (dst := dst `op` src).
4194      -- The troublesome case is when the result of b is in the same register
4195      -- as the destination reg.  In this case, we have to save b in a
4196      -- new temporary across the computation of a.
4197      code dst
4198         | dst `clashesWith` b_op =
4199                 b_code `appOL`
4200                 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
4201                 a_code dst `snocOL`
4202                 instr (OpReg tmp) (OpReg dst)
4203         | otherwise =
4204                 b_code `appOL`
4205                 a_code dst `snocOL`
4206                 instr b_op (OpReg dst)
4207   -- in
4208   return (Any rep code)
4209  where
4210   reg `clashesWith` OpReg reg2   = reg == reg2
4211   reg `clashesWith` OpAddr amode = any (==reg) (addrModeRegs amode)
4212   reg `clashesWith` _            = False
4213
4214 -----------
4215
4216 trivialUCode rep instr x = do
4217   x_code <- getAnyReg x
4218   let
4219      code dst =
4220         x_code dst `snocOL`
4221         instr (OpReg dst)
4222   -- in
4223   return (Any rep code)
4224
4225 -----------
4226
4227 #if i386_TARGET_ARCH
4228
4229 trivialFCode pk instr x y = do
4230   (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
4231   (y_reg, y_code) <- getSomeReg y
4232   let
4233      code dst =
4234         x_code `appOL`
4235         y_code `snocOL`
4236         instr pk x_reg y_reg dst
4237   -- in
4238   return (Any pk code)
4239
4240 #endif
4241
4242 #if x86_64_TARGET_ARCH
4243
4244 -- We use the 2-operand SSE2 floating pt instructions.  ToDo: improve on
4245 -- this by using some of the special cases in trivialCode above.
4246 trivialFCode pk instr x y = do
4247   (y_reg, y_code) <- getNonClobberedReg y -- these work for float regs too
4248   x_code <- getAnyReg x
4249   let
4250      code dst =
4251         y_code `appOL`
4252         x_code dst `snocOL`
4253         instr pk (IF_ARCH_x86_64(OpReg,) y_reg)
4254                  (IF_ARCH_x86_64(OpReg,) dst)
4255   -- in
4256   return (Any pk code)
4257
4258 #endif
4259
4260 -------------
4261
4262 trivialUFCode rep instr x = do
4263   (x_reg, x_code) <- getSomeReg x
4264   let
4265      code dst =
4266         x_code `snocOL`
4267         instr x_reg dst
4268   -- in
4269   return (Any rep code)
4270
4271 #endif /* i386_TARGET_ARCH */
4272
4273 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4274
4275 #if sparc_TARGET_ARCH
4276
4277 trivialCode instr x (StInt y)
4278   | fits13Bits y
4279   = getRegister x               `thenNat` \ register ->
4280     getNewRegNat IntRep         `thenNat` \ tmp ->
4281     let
4282         code = registerCode register tmp
4283         src1 = registerName register tmp
4284         src2 = ImmInt (fromInteger y)
4285         code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
4286     in
4287     return (Any IntRep code__2)
4288
4289 trivialCode instr x y
4290   = getRegister x               `thenNat` \ register1 ->
4291     getRegister y               `thenNat` \ register2 ->
4292     getNewRegNat IntRep         `thenNat` \ tmp1 ->
4293     getNewRegNat IntRep         `thenNat` \ tmp2 ->
4294     let
4295         code1 = registerCode register1 tmp1
4296         src1  = registerName register1 tmp1
4297         code2 = registerCode register2 tmp2
4298         src2  = registerName register2 tmp2
4299         code__2 dst = code1 `appOL` code2 `snocOL`
4300                       instr src1 (RIReg src2) dst
4301     in
4302     return (Any IntRep code__2)
4303
4304 ------------
4305 trivialFCode pk instr x y
4306   = getRegister x               `thenNat` \ register1 ->
4307     getRegister y               `thenNat` \ register2 ->
4308     getNewRegNat (registerRep register1)
4309                                 `thenNat` \ tmp1 ->
4310     getNewRegNat (registerRep register2)
4311                                 `thenNat` \ tmp2 ->
4312     getNewRegNat F64    `thenNat` \ tmp ->
4313     let
4314         promote x = FxTOy F DF x tmp
4315
4316         pk1   = registerRep register1
4317         code1 = registerCode register1 tmp1
4318         src1  = registerName register1 tmp1
4319
4320         pk2   = registerRep register2
4321         code2 = registerCode register2 tmp2
4322         src2  = registerName register2 tmp2
4323
4324         code__2 dst =
4325                 if pk1 == pk2 then
4326                     code1 `appOL` code2 `snocOL`
4327                     instr (primRepToSize pk) src1 src2 dst
4328                 else if pk1 == F32 then
4329                     code1 `snocOL` promote src1 `appOL` code2 `snocOL`
4330                     instr DF tmp src2 dst
4331                 else
4332                     code1 `appOL` code2 `snocOL` promote src2 `snocOL`
4333                     instr DF src1 tmp dst
4334     in
4335     return (Any (if pk1 == pk2 then pk1 else F64) code__2)
4336
4337 ------------
4338 trivialUCode instr x
4339   = getRegister x               `thenNat` \ register ->
4340     getNewRegNat IntRep         `thenNat` \ tmp ->
4341     let
4342         code = registerCode register tmp
4343         src  = registerName register tmp
4344         code__2 dst = code `snocOL` instr (RIReg src) dst
4345     in
4346     return (Any IntRep code__2)
4347
4348 -------------
4349 trivialUFCode pk instr x
4350   = getRegister x               `thenNat` \ register ->
4351     getNewRegNat pk             `thenNat` \ tmp ->
4352     let
4353         code = registerCode register tmp
4354         src  = registerName register tmp
4355         code__2 dst = code `snocOL` instr src dst
4356     in
4357     return (Any pk code__2)
4358
4359 #endif /* sparc_TARGET_ARCH */
4360
4361 #if powerpc_TARGET_ARCH
4362
4363 {-
4364 Wolfgang's PowerPC version of The Rules:
4365
4366 A slightly modified version of The Rules to take advantage of the fact
4367 that PowerPC instructions work on all registers and don't implicitly
4368 clobber any fixed registers.
4369
4370 * The only expression for which getRegister returns Fixed is (CmmReg reg).
4371
4372 * If getRegister returns Any, then the code it generates may modify only:
4373         (a) fresh temporaries
4374         (b) the destination register
4375   It may *not* modify global registers, unless the global
4376   register happens to be the destination register.
4377   It may not clobber any other registers. In fact, only ccalls clobber any
4378   fixed registers.
4379   Also, it may not modify the counter register (used by genCCall).
4380
4381   Corollary: If a getRegister for a subexpression returns Fixed, you need
4382   not move it to a fresh temporary before evaluating the next subexpression.
4383   The Fixed register won't be modified.
4384   Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
4385
4386 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
4387   the value of the destination register.
4388 -}
4389
4390 trivialCode rep signed instr x (CmmLit (CmmInt y _))
4391     | Just imm <- makeImmediate rep signed y
4392     = do
4393         (src1, code1) <- getSomeReg x
4394         let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
4395         return (Any rep code)
4396
4397 trivialCode rep signed instr x y = do
4398     (src1, code1) <- getSomeReg x
4399     (src2, code2) <- getSomeReg y
4400     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
4401     return (Any rep code)
4402
4403 trivialCodeNoImm :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4404     -> CmmExpr -> CmmExpr -> NatM Register
4405 trivialCodeNoImm rep instr x y = do
4406     (src1, code1) <- getSomeReg x
4407     (src2, code2) <- getSomeReg y
4408     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
4409     return (Any rep code)
4410
4411 trivialUCode rep instr x = do
4412     (src, code) <- getSomeReg x
4413     let code' dst = code `snocOL` instr dst src
4414     return (Any rep code')
4415
4416 -- There is no "remainder" instruction on the PPC, so we have to do
4417 -- it the hard way.
4418 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
4419
4420 remainderCode :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4421     -> CmmExpr -> CmmExpr -> NatM Register
4422 remainderCode rep div x y = do
4423     (src1, code1) <- getSomeReg x
4424     (src2, code2) <- getSomeReg y
4425     let code dst = code1 `appOL` code2 `appOL` toOL [
4426                 div dst src1 src2,
4427                 MULLW dst dst (RIReg src2),
4428                 SUBF dst dst src1
4429             ]
4430     return (Any rep code)
4431
4432 #endif /* powerpc_TARGET_ARCH */
4433
4434
4435 -- -----------------------------------------------------------------------------
4436 --  Coercing to/from integer/floating-point...
4437
4438 -- @coerce(Int2FP|FP2Int)@ are more complicated integer/float
4439 -- conversions.  We have to store temporaries in memory to move
4440 -- between the integer and the floating point register sets.
4441
4442 -- @coerceDbl2Flt@ and @coerceFlt2Dbl@ are done this way because we
4443 -- pretend, on sparc at least, that double and float regs are seperate
4444 -- kinds, so the value has to be computed into one kind before being
4445 -- explicitly "converted" to live in the other kind.
4446
4447 coerceInt2FP :: MachRep -> MachRep -> CmmExpr -> NatM Register
4448 coerceFP2Int :: MachRep -> MachRep -> CmmExpr -> NatM Register
4449
4450 #if sparc_TARGET_ARCH
4451 coerceDbl2Flt :: CmmExpr -> NatM Register
4452 coerceFlt2Dbl :: CmmExpr -> NatM Register
4453 #endif
4454
4455 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4456
4457 #if alpha_TARGET_ARCH
4458
4459 coerceInt2FP _ x
4460   = getRegister x               `thenNat` \ register ->
4461     getNewRegNat IntRep         `thenNat` \ reg ->
4462     let
4463         code = registerCode register reg
4464         src  = registerName register reg
4465
4466         code__2 dst = code . mkSeqInstrs [
4467             ST Q src (spRel 0),
4468             LD TF dst (spRel 0),
4469             CVTxy Q TF dst dst]
4470     in
4471     return (Any F64 code__2)
4472
4473 -------------
4474 coerceFP2Int x
4475   = getRegister x               `thenNat` \ register ->
4476     getNewRegNat F64    `thenNat` \ tmp ->
4477     let
4478         code = registerCode register tmp
4479         src  = registerName register tmp
4480
4481         code__2 dst = code . mkSeqInstrs [
4482             CVTxy TF Q src tmp,
4483             ST TF tmp (spRel 0),
4484             LD Q dst (spRel 0)]
4485     in
4486     return (Any IntRep code__2)
4487
4488 #endif /* alpha_TARGET_ARCH */
4489
4490 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4491
4492 #if i386_TARGET_ARCH
4493
4494 coerceInt2FP from to x = do
4495   (x_reg, x_code) <- getSomeReg x
4496   let
4497         opc  = case to of F32 -> GITOF; F64 -> GITOD
4498         code dst = x_code `snocOL` opc x_reg dst
4499         -- ToDo: works for non-I32 reps?
4500   -- in
4501   return (Any to code)
4502
4503 ------------
4504
4505 coerceFP2Int from to x = do
4506   (x_reg, x_code) <- getSomeReg x
4507   let
4508         opc  = case from of F32 -> GFTOI; F64 -> GDTOI
4509         code dst = x_code `snocOL` opc x_reg dst
4510         -- ToDo: works for non-I32 reps?
4511   -- in
4512   return (Any to code)
4513
4514 #endif /* i386_TARGET_ARCH */
4515
4516 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4517
4518 #if x86_64_TARGET_ARCH
4519
4520 coerceFP2Int from to x = do
4521   (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
4522   let
4523         opc  = case to of F32 -> CVTSS2SI; F64 -> CVTSD2SI
4524         code dst = x_code `snocOL` opc x_op dst
4525   -- in
4526   return (Any to code) -- works even if the destination rep is <I32
4527
4528 coerceInt2FP from to x = do
4529   (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
4530   let
4531         opc  = case to of F32 -> CVTSI2SS; F64 -> CVTSI2SD
4532         code dst = x_code `snocOL` opc x_op dst
4533   -- in
4534   return (Any to code) -- works even if the destination rep is <I32
4535
4536 coerceFP2FP :: MachRep -> CmmExpr -> NatM Register
4537 coerceFP2FP to x = do
4538   (x_reg, x_code) <- getSomeReg x
4539   let
4540         opc  = case to of F32 -> CVTSD2SS; F64 -> CVTSS2SD
4541         code dst = x_code `snocOL` opc x_reg dst
4542   -- in
4543   return (Any to code)
4544
4545 #endif
4546
4547 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4548
4549 #if sparc_TARGET_ARCH
4550
4551 coerceInt2FP pk x
4552   = getRegister x               `thenNat` \ register ->
4553     getNewRegNat IntRep         `thenNat` \ reg ->
4554     let
4555         code = registerCode register reg
4556         src  = registerName register reg
4557
4558         code__2 dst = code `appOL` toOL [
4559             ST W src (spRel (-2)),
4560             LD W (spRel (-2)) dst,
4561             FxTOy W (primRepToSize pk) dst dst]
4562     in
4563     return (Any pk code__2)
4564
4565 ------------
4566 coerceFP2Int fprep x
4567   = ASSERT(fprep == F64 || fprep == F32)
4568     getRegister x               `thenNat` \ register ->
4569     getNewRegNat fprep          `thenNat` \ reg ->
4570     getNewRegNat F32    `thenNat` \ tmp ->
4571     let
4572         code = registerCode register reg
4573         src  = registerName register reg
4574         code__2 dst = code `appOL` toOL [
4575             FxTOy (primRepToSize fprep) W src tmp,
4576             ST W tmp (spRel (-2)),
4577             LD W (spRel (-2)) dst]
4578     in
4579     return (Any IntRep code__2)
4580
4581 ------------
4582 coerceDbl2Flt x
4583   = getRegister x               `thenNat` \ register ->
4584     getNewRegNat F64    `thenNat` \ tmp ->
4585     let code = registerCode register tmp
4586         src  = registerName register tmp
4587     in
4588         return (Any F32
4589                        (\dst -> code `snocOL` FxTOy DF F src dst))
4590
4591 ------------
4592 coerceFlt2Dbl x
4593   = getRegister x               `thenNat` \ register ->
4594     getNewRegNat F32    `thenNat` \ tmp ->
4595     let code = registerCode register tmp
4596         src  = registerName register tmp
4597     in
4598         return (Any F64
4599                        (\dst -> code `snocOL` FxTOy F DF src dst))
4600
4601 #endif /* sparc_TARGET_ARCH */
4602
4603 #if powerpc_TARGET_ARCH
4604 coerceInt2FP fromRep toRep x = do
4605     (src, code) <- getSomeReg x
4606     lbl <- getNewLabelNat
4607     itmp <- getNewRegNat I32
4608     ftmp <- getNewRegNat F64
4609     dynRef <- cmmMakeDynamicReference addImportNat False lbl
4610     Amode addr addr_code <- getAmode dynRef
4611     let
4612         code' dst = code `appOL` maybe_exts `appOL` toOL [
4613                 LDATA ReadOnlyData
4614                                 [CmmDataLabel lbl,
4615                                  CmmStaticLit (CmmInt 0x43300000 I32),
4616                                  CmmStaticLit (CmmInt 0x80000000 I32)],
4617                 XORIS itmp src (ImmInt 0x8000),
4618                 ST I32 itmp (spRel 3),
4619                 LIS itmp (ImmInt 0x4330),
4620                 ST I32 itmp (spRel 2),
4621                 LD F64 ftmp (spRel 2)
4622             ] `appOL` addr_code `appOL` toOL [
4623                 LD F64 dst addr,
4624                 FSUB F64 dst ftmp dst
4625             ] `appOL` maybe_frsp dst
4626
4627         maybe_exts = case fromRep of
4628                         I8 ->  unitOL $ EXTS I8 src src
4629                         I16 -> unitOL $ EXTS I16 src src
4630                         I32 -> nilOL
4631         maybe_frsp dst = case toRep of
4632                         F32 -> unitOL $ FRSP dst dst
4633                         F64 -> nilOL
4634     return (Any toRep code')
4635
4636 coerceFP2Int fromRep toRep x = do
4637     -- the reps don't really matter: F*->F64 and I32->I* are no-ops
4638     (src, code) <- getSomeReg x
4639     tmp <- getNewRegNat F64
4640     let
4641         code' dst = code `appOL` toOL [
4642                 -- convert to int in FP reg
4643             FCTIWZ tmp src,
4644                 -- store value (64bit) from FP to stack
4645             ST F64 tmp (spRel 2),
4646                 -- read low word of value (high word is undefined)
4647             LD I32 dst (spRel 3)]
4648     return (Any toRep code')
4649 #endif /* powerpc_TARGET_ARCH */
4650
4651
4652 -- -----------------------------------------------------------------------------
4653 -- eXTRA_STK_ARGS_HERE
4654
4655 -- We (allegedly) put the first six C-call arguments in registers;
4656 -- where do we start putting the rest of them?
4657
4658 -- Moved from MachInstrs (SDM):
4659
4660 #if alpha_TARGET_ARCH || sparc_TARGET_ARCH
4661 eXTRA_STK_ARGS_HERE :: Int
4662 eXTRA_STK_ARGS_HERE
4663   = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???))
4664 #endif
4665