compiler/nativeGen/MachCodeGen.hs

   1 -----------------------------------------------------------------------------
   2 --
   3 -- Generating machine code (instruction selection)
   4 --
   5 -- (c) The University of Glasgow 1996-2004
   6 --
   7 -----------------------------------------------------------------------------
   8
   9 -- This is a big module, but, if you pay attention to
  10 -- (a) the sectioning, (b) the type signatures, and
  11 -- (c) the #if blah_TARGET_ARCH} things, the
  12 -- structure should not be too overwhelming.
  13
  14 module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where
  15
  16 #include "HsVersions.h"
  17 #include "nativeGen/NCG.h"
  18 #include "MachDeps.h"
  19
  20 -- NCG stuff:
  21 import MachInstrs
  22 import MachRegs
  23 import NCGMonad
  24 import PositionIndependentCode ( cmmMakeDynamicReference, initializePicBase )
  25 import RegAllocInfo ( mkBranchInstr )
  26
  27 -- Our intermediate code:
  28 import PprCmm           ( pprExpr )
  29 import Cmm
  30 import MachOp
  31 import CLabel
  32
  33 -- The rest:
  34 import StaticFlags      ( opt_PIC )
  35 import ForeignCall      ( CCallConv(..) )
  36 import OrdList
  37 import Pretty
  38 import Outputable
  39 import FastString
  40 import FastTypes        ( isFastTrue )
  41 import Constants        ( wORD_SIZE )
  42
  43 #ifdef DEBUG
  44 import Outputable       ( assertPanic )
  45 import TRACE            ( trace )
  46 #endif
  47
  48 import Control.Monad    ( mapAndUnzipM )
  49 import Maybe            ( fromJust )
  50 import DATA_BITS
  51 import DATA_WORD
  52
  53 -- -----------------------------------------------------------------------------
  54 -- Top-level of the instruction selector
  55
  56 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
  57 -- They are really trees of insns to facilitate fast appending, where a
  58 -- left-to-right traversal (pre-order?) yields the insns in the correct
  59 -- order.
  60
  61 type InstrBlock = OrdList Instr
  62
  63 cmmTopCodeGen :: CmmTop -> NatM [NatCmmTop]
  64 cmmTopCodeGen (CmmProc info lab params blocks) = do
  65   (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
  66   picBaseMb <- getPicBaseMaybeNat
  67   let proc = CmmProc info lab params (concat nat_blocks)
  68       tops = proc : concat statics
  69   case picBaseMb of
  70       Just picBase -> initializePicBase picBase tops
  71       Nothing -> return tops
  72
  73 cmmTopCodeGen (CmmData sec dat) = do
  74   return [CmmData sec dat]  -- no translation, we just use CmmStatic
  75
  76 basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop])
  77 basicBlockCodeGen (BasicBlock id stmts) = do
  78   instrs <- stmtsToInstrs stmts
  79   -- code generation may introduce new basic block boundaries, which
  80   -- are indicated by the NEWBLOCK instruction.  We must split up the
  81   -- instruction stream into basic blocks again.  Also, we extract
  82   -- LDATAs here too.
  83   let
  84         (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
  85
  86         mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
  87           = ([], BasicBlock id instrs : blocks, statics)
  88         mkBlocks (LDATA sec dat) (instrs,blocks,statics)
  89           = (instrs, blocks, CmmData sec dat:statics)
  90         mkBlocks instr (instrs,blocks,statics)
  91           = (instr:instrs, blocks, statics)
  92   -- in
  93   return (BasicBlock id top : other_blocks, statics)
  94
  95 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
  96 stmtsToInstrs stmts
  97    = do instrss <- mapM stmtToInstrs stmts
  98         return (concatOL instrss)
  99
 100 stmtToInstrs :: CmmStmt -> NatM InstrBlock
 101 stmtToInstrs stmt = case stmt of
 102     CmmNop         -> return nilOL
 103     CmmComment s   -> return (unitOL (COMMENT s))
 104
 105     CmmAssign reg src
 106       | isFloatingRep kind -> assignReg_FltCode kind reg src
 107 #if WORD_SIZE_IN_BITS==32
 108       | kind == I64        -> assignReg_I64Code      reg src
 109 #endif
 110       | otherwise          -> assignReg_IntCode kind reg src
 111         where kind = cmmRegRep reg
 112
 113     CmmStore addr src
 114       | isFloatingRep kind -> assignMem_FltCode kind addr src
 115 #if WORD_SIZE_IN_BITS==32
 116       | kind == I64      -> assignMem_I64Code      addr src
 117 #endif
 118       | otherwise        -> assignMem_IntCode kind addr src
 119         where kind = cmmExprRep src
 120
 121     CmmCall target result_regs args vols
 122        -> genCCall target result_regs args vols
 123
 124     CmmBranch id          -> genBranch id
 125     CmmCondBranch arg id  -> genCondJump id arg
 126     CmmSwitch arg ids     -> genSwitch arg ids
 127     CmmJump arg params    -> genJump arg
 128
 129 -- -----------------------------------------------------------------------------
 130 -- General things for putting together code sequences
 131
 132 -- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
 133 -- CmmExprs into CmmRegOff?
 134 mangleIndexTree :: CmmExpr -> CmmExpr
 135 mangleIndexTree (CmmRegOff reg off)
 136   = CmmMachOp (MO_Add rep) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) rep)]
 137   where rep = cmmRegRep reg
 138
 139 -- -----------------------------------------------------------------------------
 140 --  Code gen for 64-bit arithmetic on 32-bit platforms
 141
 142 {-
 143 Simple support for generating 64-bit code (ie, 64 bit values and 64
 144 bit assignments) on 32-bit platforms.  Unlike the main code generator
 145 we merely shoot for generating working code as simply as possible, and
 146 pay little attention to code quality.  Specifically, there is no
 147 attempt to deal cleverly with the fixed-vs-floating register
 148 distinction; all values are generated into (pairs of) floating
 149 registers, even if this would mean some redundant reg-reg moves as a
 150 result.  Only one of the VRegUniques is returned, since it will be
 151 of the VRegUniqueLo form, and the upper-half VReg can be determined
 152 by applying getHiVRegFromLo to it.
 153 -}
 154
 155 data ChildCode64        -- a.k.a "Register64"
 156    = ChildCode64
 157         InstrBlock      -- code
 158         Reg             -- the lower 32-bit temporary which contains the
 159                         -- result; use getHiVRegFromLo to find the other
 160                         -- VRegUnique.  Rules of this simplified insn
 161                         -- selection game are therefore that the returned
 162                         -- Reg may be modified
 163
 164 #if WORD_SIZE_IN_BITS==32
 165 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
 166 assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
 167 #endif
 168
 169 #ifndef x86_64_TARGET_ARCH
 170 iselExpr64        :: CmmExpr -> NatM ChildCode64
 171 #endif
 172
 173 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 174
 175 #if i386_TARGET_ARCH
 176
 177 assignMem_I64Code addrTree valueTree = do
 178   Amode addr addr_code <- getAmode addrTree
 179   ChildCode64 vcode rlo <- iselExpr64 valueTree
 180   let
 181         rhi = getHiVRegFromLo rlo
 182
 183         -- Little-endian store
 184         mov_lo = MOV I32 (OpReg rlo) (OpAddr addr)
 185         mov_hi = MOV I32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
 186   -- in
 187   return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 188
 189
 190 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
 191    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 192    let
 193          r_dst_lo = mkVReg u_dst I32
 194          r_dst_hi = getHiVRegFromLo r_dst_lo
 195          r_src_hi = getHiVRegFromLo r_src_lo
 196          mov_lo = MOV I32 (OpReg r_src_lo) (OpReg r_dst_lo)
 197          mov_hi = MOV I32 (OpReg r_src_hi) (OpReg r_dst_hi)
 198    -- in
 199    return (
 200         vcode `snocOL` mov_lo `snocOL` mov_hi
 201      )
 202
 203 assignReg_I64Code lvalue valueTree
 204    = panic "assignReg_I64Code(i386): invalid lvalue"
 205
 206 ------------
 207
 208 iselExpr64 (CmmLit (CmmInt i _)) = do
 209   (rlo,rhi) <- getNewRegPairNat I32
 210   let
 211         r = fromIntegral (fromIntegral i :: Word32)
 212         q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
 213         code = toOL [
 214                 MOV I32 (OpImm (ImmInteger r)) (OpReg rlo),
 215                 MOV I32 (OpImm (ImmInteger q)) (OpReg rhi)
 216                 ]
 217   -- in
 218   return (ChildCode64 code rlo)
 219
 220 iselExpr64 (CmmLoad addrTree I64) = do
 221    Amode addr addr_code <- getAmode addrTree
 222    (rlo,rhi) <- getNewRegPairNat I32
 223    let
 224         mov_lo = MOV I32 (OpAddr addr) (OpReg rlo)
 225         mov_hi = MOV I32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
 226    -- in
 227    return (
 228             ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
 229                         rlo
 230      )
 231
 232 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64)))
 233    = return (ChildCode64 nilOL (mkVReg vu I32))
 234
 235 -- we handle addition, but rather badly
 236 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
 237    ChildCode64 code1 r1lo <- iselExpr64 e1
 238    (rlo,rhi) <- getNewRegPairNat I32
 239    let
 240         r = fromIntegral (fromIntegral i :: Word32)
 241         q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
 242         r1hi = getHiVRegFromLo r1lo
 243         code =  code1 `appOL`
 244                 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
 245                        ADD I32 (OpImm (ImmInteger r)) (OpReg rlo),
 246                        MOV I32 (OpReg r1hi) (OpReg rhi),
 247                        ADC I32 (OpImm (ImmInteger q)) (OpReg rhi) ]
 248    -- in
 249    return (ChildCode64 code rlo)
 250
 251 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
 252    ChildCode64 code1 r1lo <- iselExpr64 e1
 253    ChildCode64 code2 r2lo <- iselExpr64 e2
 254    (rlo,rhi) <- getNewRegPairNat I32
 255    let
 256         r1hi = getHiVRegFromLo r1lo
 257         r2hi = getHiVRegFromLo r2lo
 258         code =  code1 `appOL`
 259                 code2 `appOL`
 260                 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
 261                        ADD I32 (OpReg r2lo) (OpReg rlo),
 262                        MOV I32 (OpReg r1hi) (OpReg rhi),
 263                        ADC I32 (OpReg r2hi) (OpReg rhi) ]
 264    -- in
 265    return (ChildCode64 code rlo)
 266
 267 iselExpr64 expr
 268    = pprPanic "iselExpr64(i386)" (ppr expr)
 269
 270 #endif /* i386_TARGET_ARCH */
 271
 272 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 273
 274 #if sparc_TARGET_ARCH
 275
 276 assignMem_I64Code addrTree valueTree = do
 277      Amode addr addr_code <- getAmode addrTree
 278      ChildCode64 vcode rlo <- iselExpr64 valueTree
 279      (src, code) <- getSomeReg addrTree
 280      let
 281          rhi = getHiVRegFromLo rlo
 282          -- Big-endian store
 283          mov_hi = ST I32 rhi (AddrRegImm src (ImmInt 0))
 284          mov_lo = ST I32 rlo (AddrRegImm src (ImmInt 4))
 285      return (vcode `appOL` code `snocOL` mov_hi `snocOL` mov_lo)
 286
 287 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
 288      ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 289      let
 290          r_dst_lo = mkVReg u_dst pk
 291          r_dst_hi = getHiVRegFromLo r_dst_lo
 292          r_src_hi = getHiVRegFromLo r_src_lo
 293          mov_lo = mkMOV r_src_lo r_dst_lo
 294          mov_hi = mkMOV r_src_hi r_dst_hi
 295          mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
 296      return (vcode `snocOL` mov_hi `snocOL` mov_lo)
 297 assignReg_I64Code lvalue valueTree
 298    = panic "assignReg_I64Code(sparc): invalid lvalue"
 299
 300
 301 -- Don't delete this -- it's very handy for debugging.
 302 --iselExpr64 expr
 303 --   | trace ("iselExpr64: " ++ showSDoc (ppr expr)) False
 304 --   = panic "iselExpr64(???)"
 305
 306 iselExpr64 (CmmLoad addrTree I64) = do
 307      Amode (AddrRegReg r1 r2) addr_code <- getAmode addrTree
 308      rlo <- getNewRegNat I32
 309      let rhi = getHiVRegFromLo rlo
 310          mov_hi = LD I32 (AddrRegImm r1 (ImmInt 0)) rhi
 311          mov_lo = LD I32 (AddrRegImm r1 (ImmInt 4)) rlo
 312      return (
 313             ChildCode64 (addr_code `snocOL` mov_hi `snocOL` mov_lo)
 314                          rlo
 315           )
 316
 317 iselExpr64 (CmmReg (CmmLocal (LocalReg uq I64))) = do
 318      r_dst_lo <-  getNewRegNat I32
 319      let r_dst_hi = getHiVRegFromLo r_dst_lo
 320          r_src_lo = mkVReg uq I32
 321          r_src_hi = getHiVRegFromLo r_src_lo
 322          mov_lo = mkMOV r_src_lo r_dst_lo
 323          mov_hi = mkMOV r_src_hi r_dst_hi
 324          mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
 325      return (
 326             ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo
 327          )
 328
 329 iselExpr64 expr
 330    = pprPanic "iselExpr64(sparc)" (ppr expr)
 331
 332 #endif /* sparc_TARGET_ARCH */
 333
 334 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 335
 336 #if powerpc_TARGET_ARCH
 337
 338 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
 339 getI64Amodes addrTree = do
 340     Amode hi_addr addr_code <- getAmode addrTree
 341     case addrOffset hi_addr 4 of
 342         Just lo_addr -> return (hi_addr, lo_addr, addr_code)
 343         Nothing      -> do (hi_ptr, code) <- getSomeReg addrTree
 344                            return (AddrRegImm hi_ptr (ImmInt 0),
 345                                    AddrRegImm hi_ptr (ImmInt 4),
 346                                    code)
 347
 348 assignMem_I64Code addrTree valueTree = do
 349         (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
 350         ChildCode64 vcode rlo <- iselExpr64 valueTree
 351         let
 352                 rhi = getHiVRegFromLo rlo
 353
 354                 -- Big-endian store
 355                 mov_hi = ST I32 rhi hi_addr
 356                 mov_lo = ST I32 rlo lo_addr
 357         -- in
 358         return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 359
 360 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
 361    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 362    let
 363          r_dst_lo = mkVReg u_dst I32
 364          r_dst_hi = getHiVRegFromLo r_dst_lo
 365          r_src_hi = getHiVRegFromLo r_src_lo
 366          mov_lo = MR r_dst_lo r_src_lo
 367          mov_hi = MR r_dst_hi r_src_hi
 368    -- in
 369    return (
 370         vcode `snocOL` mov_lo `snocOL` mov_hi
 371      )
 372
 373 assignReg_I64Code lvalue valueTree
 374    = panic "assignReg_I64Code(powerpc): invalid lvalue"
 375
 376
 377 -- Don't delete this -- it's very handy for debugging.
 378 --iselExpr64 expr
 379 --   | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
 380 --   = panic "iselExpr64(???)"
 381
 382 iselExpr64 (CmmLoad addrTree I64) = do
 383     (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
 384     (rlo, rhi) <- getNewRegPairNat I32
 385     let mov_hi = LD I32 rhi hi_addr
 386         mov_lo = LD I32 rlo lo_addr
 387     return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
 388                          rlo
 389
 390 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64)))
 391    = return (ChildCode64 nilOL (mkVReg vu I32))
 392
 393 iselExpr64 (CmmLit (CmmInt i _)) = do
 394   (rlo,rhi) <- getNewRegPairNat I32
 395   let
 396         half0 = fromIntegral (fromIntegral i :: Word16)
 397         half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
 398         half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
 399         half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
 400
 401         code = toOL [
 402                 LIS rlo (ImmInt half1),
 403                 OR rlo rlo (RIImm $ ImmInt half0),
 404                 LIS rhi (ImmInt half3),
 405                 OR rlo rlo (RIImm $ ImmInt half2)
 406                 ]
 407   -- in
 408   return (ChildCode64 code rlo)
 409
 410 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
 411    ChildCode64 code1 r1lo <- iselExpr64 e1
 412    ChildCode64 code2 r2lo <- iselExpr64 e2
 413    (rlo,rhi) <- getNewRegPairNat I32
 414    let
 415         r1hi = getHiVRegFromLo r1lo
 416         r2hi = getHiVRegFromLo r2lo
 417         code =  code1 `appOL`
 418                 code2 `appOL`
 419                 toOL [ ADDC rlo r1lo r2lo,
 420                        ADDE rhi r1hi r2hi ]
 421    -- in
 422    return (ChildCode64 code rlo)
 423
 424 iselExpr64 expr
 425    = pprPanic "iselExpr64(powerpc)" (ppr expr)
 426
 427 #endif /* powerpc_TARGET_ARCH */
 428
 429
 430 -- -----------------------------------------------------------------------------
 431 -- The 'Register' type
 432
 433 -- 'Register's passed up the tree.  If the stix code forces the register
 434 -- to live in a pre-decided machine register, it comes out as @Fixed@;
 435 -- otherwise, it comes out as @Any@, and the parent can decide which
 436 -- register to put it in.
 437
 438 data Register
 439   = Fixed   MachRep Reg InstrBlock
 440   | Any     MachRep (Reg -> InstrBlock)
 441
 442 swizzleRegisterRep :: Register -> MachRep -> Register
 443 swizzleRegisterRep (Fixed _ reg code) rep = Fixed rep reg code
 444 swizzleRegisterRep (Any _ codefn)     rep = Any rep codefn
 445
 446
 447 -- -----------------------------------------------------------------------------
 448 -- Utils based on getRegister, below
 449
 450 -- The dual to getAnyReg: compute an expression into a register, but
 451 -- we don't mind which one it is.
 452 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
 453 getSomeReg expr = do
 454   r <- getRegister expr
 455   case r of
 456     Any rep code -> do
 457         tmp <- getNewRegNat rep
 458         return (tmp, code tmp)
 459     Fixed _ reg code ->
 460         return (reg, code)
 461
 462 -- -----------------------------------------------------------------------------
 463 -- Grab the Reg for a CmmReg
 464
 465 getRegisterReg :: CmmReg -> Reg
 466
 467 getRegisterReg (CmmLocal (LocalReg u pk))
 468   = mkVReg u pk
 469
 470 getRegisterReg (CmmGlobal mid)
 471   = case get_GlobalReg_reg_or_addr mid of
 472        Left (RealReg rrno) -> RealReg rrno
 473        _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
 474           -- By this stage, the only MagicIds remaining should be the
 475           -- ones which map to a real machine register on this
 476           -- platform.  Hence ...
 477
 478
 479 -- -----------------------------------------------------------------------------
 480 -- Generate code to get a subtree into a Register
 481
 482 -- Don't delete this -- it's very handy for debugging.
 483 --getRegister expr
 484 --   | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False
 485 --   = panic "getRegister(???)"
 486
 487 getRegister :: CmmExpr -> NatM Register
 488
 489 getRegister (CmmReg (CmmGlobal PicBaseReg))
 490   = do
 491       reg <- getPicBaseNat wordRep
 492       return (Fixed wordRep reg nilOL)
 493
 494 getRegister (CmmReg reg)
 495   = return (Fixed (cmmRegRep reg) (getRegisterReg reg) nilOL)
 496
 497 getRegister tree@(CmmRegOff _ _)
 498   = getRegister (mangleIndexTree tree)
 499
 500 -- end of machine-"independent" bit; here we go on the rest...
 501
 502 #if alpha_TARGET_ARCH
 503
 504 getRegister (StDouble d)
 505   = getBlockIdNat                   `thenNat` \ lbl ->
 506     getNewRegNat PtrRep             `thenNat` \ tmp ->
 507     let code dst = mkSeqInstrs [
 508             LDATA RoDataSegment lbl [
 509                     DATA TF [ImmLab (rational d)]
 510                 ],
 511             LDA tmp (AddrImm (ImmCLbl lbl)),
 512             LD TF dst (AddrReg tmp)]
 513     in
 514         return (Any F64 code)
 515
 516 getRegister (StPrim primop [x]) -- unary PrimOps
 517   = case primop of
 518       IntNegOp -> trivialUCode (NEG Q False) x
 519
 520       NotOp    -> trivialUCode NOT x
 521
 522       FloatNegOp  -> trivialUFCode FloatRep  (FNEG TF) x
 523       DoubleNegOp -> trivialUFCode F64 (FNEG TF) x
 524
 525       OrdOp -> coerceIntCode IntRep x
 526       ChrOp -> chrCode x
 527
 528       Float2IntOp  -> coerceFP2Int    x
 529       Int2FloatOp  -> coerceInt2FP pr x
 530       Double2IntOp -> coerceFP2Int    x
 531       Int2DoubleOp -> coerceInt2FP pr x
 532
 533       Double2FloatOp -> coerceFltCode x
 534       Float2DoubleOp -> coerceFltCode x
 535
 536       other_op -> getRegister (StCall fn CCallConv F64 [x])
 537         where
 538           fn = case other_op of
 539                  FloatExpOp    -> FSLIT("exp")
 540                  FloatLogOp    -> FSLIT("log")
 541                  FloatSqrtOp   -> FSLIT("sqrt")
 542                  FloatSinOp    -> FSLIT("sin")
 543                  FloatCosOp    -> FSLIT("cos")
 544                  FloatTanOp    -> FSLIT("tan")
 545                  FloatAsinOp   -> FSLIT("asin")
 546                  FloatAcosOp   -> FSLIT("acos")
 547                  FloatAtanOp   -> FSLIT("atan")
 548                  FloatSinhOp   -> FSLIT("sinh")
 549                  FloatCoshOp   -> FSLIT("cosh")
 550                  FloatTanhOp   -> FSLIT("tanh")
 551                  DoubleExpOp   -> FSLIT("exp")
 552                  DoubleLogOp   -> FSLIT("log")
 553                  DoubleSqrtOp  -> FSLIT("sqrt")
 554                  DoubleSinOp   -> FSLIT("sin")
 555                  DoubleCosOp   -> FSLIT("cos")
 556                  DoubleTanOp   -> FSLIT("tan")
 557                  DoubleAsinOp  -> FSLIT("asin")
 558                  DoubleAcosOp  -> FSLIT("acos")
 559                  DoubleAtanOp  -> FSLIT("atan")
 560                  DoubleSinhOp  -> FSLIT("sinh")
 561                  DoubleCoshOp  -> FSLIT("cosh")
 562                  DoubleTanhOp  -> FSLIT("tanh")
 563   where
 564     pr = panic "MachCode.getRegister: no primrep needed for Alpha"
 565
 566 getRegister (StPrim primop [x, y]) -- dyadic PrimOps
 567   = case primop of
 568       CharGtOp -> trivialCode (CMP LTT) y x
 569       CharGeOp -> trivialCode (CMP LE) y x
 570       CharEqOp -> trivialCode (CMP EQQ) x y
 571       CharNeOp -> int_NE_code x y
 572       CharLtOp -> trivialCode (CMP LTT) x y
 573       CharLeOp -> trivialCode (CMP LE) x y
 574
 575       IntGtOp  -> trivialCode (CMP LTT) y x
 576       IntGeOp  -> trivialCode (CMP LE) y x
 577       IntEqOp  -> trivialCode (CMP EQQ) x y
 578       IntNeOp  -> int_NE_code x y
 579       IntLtOp  -> trivialCode (CMP LTT) x y
 580       IntLeOp  -> trivialCode (CMP LE) x y
 581
 582       WordGtOp -> trivialCode (CMP ULT) y x
 583       WordGeOp -> trivialCode (CMP ULE) x y
 584       WordEqOp -> trivialCode (CMP EQQ)  x y
 585       WordNeOp -> int_NE_code x y
 586       WordLtOp -> trivialCode (CMP ULT) x y
 587       WordLeOp -> trivialCode (CMP ULE) x y
 588
 589       AddrGtOp -> trivialCode (CMP ULT) y x
 590       AddrGeOp -> trivialCode (CMP ULE) y x
 591       AddrEqOp -> trivialCode (CMP EQQ)  x y
 592       AddrNeOp -> int_NE_code x y
 593       AddrLtOp -> trivialCode (CMP ULT) x y
 594       AddrLeOp -> trivialCode (CMP ULE) x y
 595
 596       FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
 597       FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
 598       FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
 599       FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
 600       FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
 601       FloatLeOp -> cmpF_code (FCMP TF LE) NE x y
 602
 603       DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
 604       DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
 605       DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
 606       DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
 607       DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
 608       DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y
 609
 610       IntAddOp  -> trivialCode (ADD Q False) x y
 611       IntSubOp  -> trivialCode (SUB Q False) x y
 612       IntMulOp  -> trivialCode (MUL Q False) x y
 613       IntQuotOp -> trivialCode (DIV Q False) x y
 614       IntRemOp  -> trivialCode (REM Q False) x y
 615
 616       WordAddOp  -> trivialCode (ADD Q False) x y
 617       WordSubOp  -> trivialCode (SUB Q False) x y
 618       WordMulOp  -> trivialCode (MUL Q False) x y
 619       WordQuotOp -> trivialCode (DIV Q True) x y
 620       WordRemOp  -> trivialCode (REM Q True) x y
 621
 622       FloatAddOp -> trivialFCode  FloatRep (FADD TF) x y
 623       FloatSubOp -> trivialFCode  FloatRep (FSUB TF) x y
 624       FloatMulOp -> trivialFCode  FloatRep (FMUL TF) x y
 625       FloatDivOp -> trivialFCode  FloatRep (FDIV TF) x y
 626
 627       DoubleAddOp -> trivialFCode  F64 (FADD TF) x y
 628       DoubleSubOp -> trivialFCode  F64 (FSUB TF) x y
 629       DoubleMulOp -> trivialFCode  F64 (FMUL TF) x y
 630       DoubleDivOp -> trivialFCode  F64 (FDIV TF) x y
 631
 632       AddrAddOp  -> trivialCode (ADD Q False) x y
 633       AddrSubOp  -> trivialCode (SUB Q False) x y
 634       AddrRemOp  -> trivialCode (REM Q True) x y
 635
 636       AndOp  -> trivialCode AND x y
 637       OrOp   -> trivialCode OR  x y
 638       XorOp  -> trivialCode XOR x y
 639       SllOp  -> trivialCode SLL x y
 640       SrlOp  -> trivialCode SRL x y
 641
 642       ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
 643       ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
 644       ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"
 645
 646       FloatPowerOp  -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
 647       DoublePowerOp -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
 648   where
 649     {- ------------------------------------------------------------
 650         Some bizarre special code for getting condition codes into
 651         registers.  Integer non-equality is a test for equality
 652         followed by an XOR with 1.  (Integer comparisons always set
 653         the result register to 0 or 1.)  Floating point comparisons of
 654         any kind leave the result in a floating point register, so we
 655         need to wrangle an integer register out of things.
 656     -}
 657     int_NE_code :: StixTree -> StixTree -> NatM Register
 658
 659     int_NE_code x y
 660       = trivialCode (CMP EQQ) x y       `thenNat` \ register ->
 661         getNewRegNat IntRep             `thenNat` \ tmp ->
 662         let
 663             code = registerCode register tmp
 664             src  = registerName register tmp
 665             code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
 666         in
 667         return (Any IntRep code__2)
 668
 669     {- ------------------------------------------------------------
 670         Comments for int_NE_code also apply to cmpF_code
 671     -}
 672     cmpF_code
 673         :: (Reg -> Reg -> Reg -> Instr)
 674         -> Cond
 675         -> StixTree -> StixTree
 676         -> NatM Register
 677
 678     cmpF_code instr cond x y
 679       = trivialFCode pr instr x y       `thenNat` \ register ->
 680         getNewRegNat F64                `thenNat` \ tmp ->
 681         getBlockIdNat                   `thenNat` \ lbl ->
 682         let
 683             code = registerCode register tmp
 684             result  = registerName register tmp
 685
 686             code__2 dst = code . mkSeqInstrs [
 687                 OR zeroh (RIImm (ImmInt 1)) dst,
 688                 BF cond  result (ImmCLbl lbl),
 689                 OR zeroh (RIReg zeroh) dst,
 690                 NEWBLOCK lbl]
 691         in
 692         return (Any IntRep code__2)
 693       where
 694         pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
 695       ------------------------------------------------------------
 696
 697 getRegister (CmmLoad pk mem)
 698   = getAmode mem                    `thenNat` \ amode ->
 699     let
 700         code = amodeCode amode
 701         src   = amodeAddr amode
 702         size = primRepToSize pk
 703         code__2 dst = code . mkSeqInstr (LD size dst src)
 704     in
 705     return (Any pk code__2)
 706
 707 getRegister (StInt i)
 708   | fits8Bits i
 709   = let
 710         code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
 711     in
 712     return (Any IntRep code)
 713   | otherwise
 714   = let
 715         code dst = mkSeqInstr (LDI Q dst src)
 716     in
 717     return (Any IntRep code)
 718   where
 719     src = ImmInt (fromInteger i)
 720
 721 getRegister leaf
 722   | isJust imm
 723   = let
 724         code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
 725     in
 726     return (Any PtrRep code)
 727   where
 728     imm = maybeImm leaf
 729     imm__2 = case imm of Just x -> x
 730
 731 #endif /* alpha_TARGET_ARCH */
 732
 733 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 734
 735 #if i386_TARGET_ARCH
 736
 737 getRegister (CmmLit (CmmFloat f F32)) = do
 738     lbl <- getNewLabelNat
 739     dynRef <- cmmMakeDynamicReference addImportNat False lbl
 740     Amode addr addr_code <- getAmode dynRef
 741     let code dst =
 742             LDATA ReadOnlyData
 743                         [CmmDataLabel lbl,
 744                          CmmStaticLit (CmmFloat f F32)]
 745             `consOL` (addr_code `snocOL`
 746             GLD F32 addr dst)
 747     -- in
 748     return (Any F32 code)
 749
 750
 751 getRegister (CmmLit (CmmFloat d F64))
 752   | d == 0.0
 753   = let code dst = unitOL (GLDZ dst)
 754     in  return (Any F64 code)
 755
 756   | d == 1.0
 757   = let code dst = unitOL (GLD1 dst)
 758     in  return (Any F64 code)
 759
 760   | otherwise = do
 761     lbl <- getNewLabelNat
 762     dynRef <- cmmMakeDynamicReference addImportNat False lbl
 763     Amode addr addr_code <- getAmode dynRef
 764     let code dst =
 765             LDATA ReadOnlyData
 766                         [CmmDataLabel lbl,
 767                          CmmStaticLit (CmmFloat d F64)]
 768             `consOL` (addr_code `snocOL`
 769             GLD F64 addr dst)
 770     -- in
 771     return (Any F64 code)
 772
 773 #endif /* i386_TARGET_ARCH */
 774
 775 #if x86_64_TARGET_ARCH
 776
 777 getRegister (CmmLit (CmmFloat 0.0 rep)) = do
 778    let code dst = unitOL  (XOR rep (OpReg dst) (OpReg dst))
 779         -- I don't know why there are xorpd, xorps, and pxor instructions.
 780         -- They all appear to do the same thing --SDM
 781    return (Any rep code)
 782
 783 getRegister (CmmLit (CmmFloat f rep)) = do
 784     lbl <- getNewLabelNat
 785     let code dst = toOL [
 786             LDATA ReadOnlyData
 787                         [CmmDataLabel lbl,
 788                          CmmStaticLit (CmmFloat f rep)],
 789             MOV rep (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 790             ]
 791     -- in
 792     return (Any rep code)
 793
 794 #endif /* x86_64_TARGET_ARCH */
 795
 796 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
 797
 798 -- catch simple cases of zero- or sign-extended load
 799 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad addr _]) = do
 800   code <- intLoadCode (MOVZxL I8) addr
 801   return (Any I32 code)
 802
 803 getRegister (CmmMachOp (MO_S_Conv I8 I32) [CmmLoad addr _]) = do
 804   code <- intLoadCode (MOVSxL I8) addr
 805   return (Any I32 code)
 806
 807 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad addr _]) = do
 808   code <- intLoadCode (MOVZxL I16) addr
 809   return (Any I32 code)
 810
 811 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad addr _]) = do
 812   code <- intLoadCode (MOVSxL I16) addr
 813   return (Any I32 code)
 814
 815 #endif
 816
 817 #if x86_64_TARGET_ARCH
 818
 819 -- catch simple cases of zero- or sign-extended load
 820 getRegister (CmmMachOp (MO_U_Conv I8 I64) [CmmLoad addr _]) = do
 821   code <- intLoadCode (MOVZxL I8) addr
 822   return (Any I64 code)
 823
 824 getRegister (CmmMachOp (MO_S_Conv I8 I64) [CmmLoad addr _]) = do
 825   code <- intLoadCode (MOVSxL I8) addr
 826   return (Any I64 code)
 827
 828 getRegister (CmmMachOp (MO_U_Conv I16 I64) [CmmLoad addr _]) = do
 829   code <- intLoadCode (MOVZxL I16) addr
 830   return (Any I64 code)
 831
 832 getRegister (CmmMachOp (MO_S_Conv I16 I64) [CmmLoad addr _]) = do
 833   code <- intLoadCode (MOVSxL I16) addr
 834   return (Any I64 code)
 835
 836 getRegister (CmmMachOp (MO_U_Conv I32 I64) [CmmLoad addr _]) = do
 837   code <- intLoadCode (MOV I32) addr -- 32-bit loads zero-extend
 838   return (Any I64 code)
 839
 840 getRegister (CmmMachOp (MO_S_Conv I32 I64) [CmmLoad addr _]) = do
 841   code <- intLoadCode (MOVSxL I32) addr
 842   return (Any I64 code)
 843
 844 #endif
 845
 846 #if x86_64_TARGET_ARCH
 847 getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do
 848   x_code <- getAnyReg x
 849   lbl <- getNewLabelNat
 850   let
 851     code dst = x_code dst `appOL` toOL [
 852         -- This is how gcc does it, so it can't be that bad:
 853         LDATA ReadOnlyData16 [
 854                 CmmAlign 16,
 855                 CmmDataLabel lbl,
 856                 CmmStaticLit (CmmInt 0x80000000 I32),
 857                 CmmStaticLit (CmmInt 0 I32),
 858                 CmmStaticLit (CmmInt 0 I32),
 859                 CmmStaticLit (CmmInt 0 I32)
 860         ],
 861         XOR F32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 862                 -- xorps, so we need the 128-bit constant
 863                 -- ToDo: rip-relative
 864         ]
 865   --
 866   return (Any F32 code)
 867
 868 getRegister (CmmMachOp (MO_S_Neg F64) [x]) = do
 869   x_code <- getAnyReg x
 870   lbl <- getNewLabelNat
 871   let
 872         -- This is how gcc does it, so it can't be that bad:
 873     code dst = x_code dst `appOL` toOL [
 874         LDATA ReadOnlyData16 [
 875                 CmmAlign 16,
 876                 CmmDataLabel lbl,
 877                 CmmStaticLit (CmmInt 0x8000000000000000 I64),
 878                 CmmStaticLit (CmmInt 0 I64)
 879         ],
 880                 -- gcc puts an unpck here.  Wonder if we need it.
 881         XOR F64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 882                 -- xorpd, so we need the 128-bit constant
 883         ]
 884   --
 885   return (Any F64 code)
 886 #endif
 887
 888 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
 889
 890 getRegister (CmmMachOp mop [x]) -- unary MachOps
 891   = case mop of
 892 #if i386_TARGET_ARCH
 893       MO_S_Neg F32 -> trivialUFCode F32 (GNEG F32) x
 894       MO_S_Neg F64 -> trivialUFCode F64 (GNEG F64) x
 895 #endif
 896
 897       MO_S_Neg rep -> trivialUCode rep (NEGI rep) x
 898       MO_Not rep   -> trivialUCode rep (NOT  rep) x
 899
 900       -- Nop conversions
 901       MO_U_Conv I32 I8  -> toI8Reg  I32 x
 902       MO_S_Conv I32 I8  -> toI8Reg  I32 x
 903       MO_U_Conv I16 I8  -> toI8Reg  I16 x
 904       MO_S_Conv I16 I8  -> toI8Reg  I16 x
 905       MO_U_Conv I32 I16 -> toI16Reg I32 x
 906       MO_S_Conv I32 I16 -> toI16Reg I32 x
 907 #if x86_64_TARGET_ARCH
 908       MO_U_Conv I64 I32 -> conversionNop I64 x
 909       MO_S_Conv I64 I32 -> conversionNop I64 x
 910       MO_U_Conv I64 I16 -> toI16Reg I64 x
 911       MO_S_Conv I64 I16 -> toI16Reg I64 x
 912       MO_U_Conv I64 I8  -> toI8Reg  I64 x
 913       MO_S_Conv I64 I8  -> toI8Reg  I64 x
 914 #endif
 915
 916       MO_U_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
 917       MO_S_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
 918
 919       -- widenings
 920       MO_U_Conv I8  I32 -> integerExtend I8  I32 MOVZxL x
 921       MO_U_Conv I16 I32 -> integerExtend I16 I32 MOVZxL x
 922       MO_U_Conv I8  I16 -> integerExtend I8  I16 MOVZxL x
 923
 924       MO_S_Conv I8  I32 -> integerExtend I8  I32 MOVSxL x
 925       MO_S_Conv I16 I32 -> integerExtend I16 I32 MOVSxL x
 926       MO_S_Conv I8  I16 -> integerExtend I8  I16 MOVSxL x
 927
 928 #if x86_64_TARGET_ARCH
 929       MO_U_Conv I8  I64 -> integerExtend I8  I64 MOVZxL x
 930       MO_U_Conv I16 I64 -> integerExtend I16 I64 MOVZxL x
 931       MO_U_Conv I32 I64 -> integerExtend I32 I64 MOVZxL x
 932       MO_S_Conv I8  I64 -> integerExtend I8  I64 MOVSxL x
 933       MO_S_Conv I16 I64 -> integerExtend I16 I64 MOVSxL x
 934       MO_S_Conv I32 I64 -> integerExtend I32 I64 MOVSxL x
 935         -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
 936         -- However, we don't want the register allocator to throw it
 937         -- away as an unnecessary reg-to-reg move, so we keep it in
 938         -- the form of a movzl and print it as a movl later.
 939 #endif
 940
 941 #if i386_TARGET_ARCH
 942       MO_S_Conv F32 F64 -> conversionNop F64 x
 943       MO_S_Conv F64 F32 -> conversionNop F32 x
 944 #else
 945       MO_S_Conv F32 F64 -> coerceFP2FP F64 x
 946       MO_S_Conv F64 F32 -> coerceFP2FP F32 x
 947 #endif
 948
 949       MO_S_Conv from to
 950         | isFloatingRep from -> coerceFP2Int from to x
 951         | isFloatingRep to   -> coerceInt2FP from to x
 952
 953       other -> pprPanic "getRegister" (pprMachOp mop)
 954    where
 955         -- signed or unsigned extension.
 956         integerExtend from to instr expr = do
 957             (reg,e_code) <- if from == I8 then getByteReg expr
 958                                           else getSomeReg expr
 959             let
 960                 code dst =
 961                   e_code `snocOL`
 962                   instr from (OpReg reg) (OpReg dst)
 963             return (Any to code)
 964
 965         toI8Reg new_rep expr
 966             = do codefn <- getAnyReg expr
 967                  return (Any new_rep codefn)
 968                 -- HACK: use getAnyReg to get a byte-addressable register.
 969                 -- If the source was a Fixed register, this will add the
 970                 -- mov instruction to put it into the desired destination.
 971                 -- We're assuming that the destination won't be a fixed
 972                 -- non-byte-addressable register; it won't be, because all
 973                 -- fixed registers are word-sized.
 974
 975         toI16Reg = toI8Reg -- for now
 976
 977         conversionNop new_rep expr
 978             = do e_code <- getRegister expr
 979                  return (swizzleRegisterRep e_code new_rep)
 980
 981
 982 getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
 983   = ASSERT2(cmmExprRep x /= I8, pprExpr e)
 984     case mop of
 985       MO_Eq F32   -> condFltReg EQQ x y
 986       MO_Ne F32   -> condFltReg NE x y
 987       MO_S_Gt F32 -> condFltReg GTT x y
 988       MO_S_Ge F32 -> condFltReg GE x y
 989       MO_S_Lt F32 -> condFltReg LTT x y
 990       MO_S_Le F32 -> condFltReg LE x y
 991
 992       MO_Eq F64   -> condFltReg EQQ x y
 993       MO_Ne F64   -> condFltReg NE x y
 994       MO_S_Gt F64 -> condFltReg GTT x y
 995       MO_S_Ge F64 -> condFltReg GE x y
 996       MO_S_Lt F64 -> condFltReg LTT x y
 997       MO_S_Le F64 -> condFltReg LE x y
 998
 999       MO_Eq rep   -> condIntReg EQQ x y
1000       MO_Ne rep   -> condIntReg NE x y
1001
1002       MO_S_Gt rep -> condIntReg GTT x y
1003       MO_S_Ge rep -> condIntReg GE x y
1004       MO_S_Lt rep -> condIntReg LTT x y
1005       MO_S_Le rep -> condIntReg LE x y
1006
1007       MO_U_Gt rep -> condIntReg GU  x y
1008       MO_U_Ge rep -> condIntReg GEU x y
1009       MO_U_Lt rep -> condIntReg LU  x y
1010       MO_U_Le rep -> condIntReg LEU x y
1011
1012 #if i386_TARGET_ARCH
1013       MO_Add F32 -> trivialFCode F32 GADD x y
1014       MO_Sub F32 -> trivialFCode F32 GSUB x y
1015
1016       MO_Add F64 -> trivialFCode F64 GADD x y
1017       MO_Sub F64 -> trivialFCode F64 GSUB x y
1018
1019       MO_S_Quot F32 -> trivialFCode F32 GDIV x y
1020       MO_S_Quot F64 -> trivialFCode F64 GDIV x y
1021 #endif
1022
1023 #if x86_64_TARGET_ARCH
1024       MO_Add F32 -> trivialFCode F32 ADD x y
1025       MO_Sub F32 -> trivialFCode F32 SUB x y
1026
1027       MO_Add F64 -> trivialFCode F64 ADD x y
1028       MO_Sub F64 -> trivialFCode F64 SUB x y
1029
1030       MO_S_Quot F32 -> trivialFCode F32 FDIV x y
1031       MO_S_Quot F64 -> trivialFCode F64 FDIV x y
1032 #endif
1033
1034       MO_Add rep -> add_code rep x y
1035       MO_Sub rep -> sub_code rep x y
1036
1037       MO_S_Quot rep -> div_code rep True  True  x y
1038       MO_S_Rem  rep -> div_code rep True  False x y
1039       MO_U_Quot rep -> div_code rep False True  x y
1040       MO_U_Rem  rep -> div_code rep False False x y
1041
1042 #if i386_TARGET_ARCH
1043       MO_Mul   F32 -> trivialFCode F32 GMUL x y
1044       MO_Mul   F64 -> trivialFCode F64 GMUL x y
1045 #endif
1046
1047 #if x86_64_TARGET_ARCH
1048       MO_Mul   F32 -> trivialFCode F32 MUL x y
1049       MO_Mul   F64 -> trivialFCode F64 MUL x y
1050 #endif
1051
1052       MO_Mul   rep -> let op = IMUL rep in
1053                       trivialCode rep op (Just op) x y
1054
1055       MO_S_MulMayOflo rep -> imulMayOflo rep x y
1056
1057       MO_And rep -> let op = AND rep in
1058                     trivialCode rep op (Just op) x y
1059       MO_Or  rep -> let op = OR  rep in
1060                     trivialCode rep op (Just op) x y
1061       MO_Xor rep -> let op = XOR rep in
1062                     trivialCode rep op (Just op) x y
1063
1064         {- Shift ops on x86s have constraints on their source, it
1065            either has to be Imm, CL or 1
1066             => trivialCode is not restrictive enough (sigh.)
1067         -}
1068       MO_Shl rep   -> shift_code rep (SHL rep) x y {-False-}
1069       MO_U_Shr rep -> shift_code rep (SHR rep) x y {-False-}
1070       MO_S_Shr rep -> shift_code rep (SAR rep) x y {-False-}
1071
1072       other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
1073   where
1074     --------------------
1075     imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1076     imulMayOflo rep a b = do
1077          (a_reg, a_code) <- getNonClobberedReg a
1078          b_code <- getAnyReg b
1079          let
1080              shift_amt  = case rep of
1081                            I32 -> 31
1082                            I64 -> 63
1083                            _ -> panic "shift_amt"
1084
1085              code = a_code `appOL` b_code eax `appOL`
1086                         toOL [
1087                            IMUL2 rep (OpReg a_reg),   -- result in %edx:%eax
1088                            SAR rep (OpImm (ImmInt shift_amt)) (OpReg eax),
1089                                 -- sign extend lower part
1090                            SUB rep (OpReg edx) (OpReg eax)
1091                                 -- compare against upper
1092                            -- eax==0 if high part == sign extended low part
1093                         ]
1094          -- in
1095          return (Fixed rep eax code)
1096
1097     --------------------
1098     shift_code :: MachRep
1099                -> (Operand -> Operand -> Instr)
1100                -> CmmExpr
1101                -> CmmExpr
1102                -> NatM Register
1103
1104     {- Case1: shift length as immediate -}
1105     shift_code rep instr x y@(CmmLit lit) = do
1106           x_code <- getAnyReg x
1107           let
1108                code dst
1109                   = x_code dst `snocOL`
1110                     instr (OpImm (litToImm lit)) (OpReg dst)
1111           -- in
1112           return (Any rep code)
1113
1114     {- Case2: shift length is complex (non-immediate) -}
1115     shift_code rep instr x y{-amount-} = do
1116         (x_reg, x_code) <- getNonClobberedReg x
1117         y_code <- getAnyReg y
1118         let
1119            code = x_code `appOL`
1120                   y_code ecx `snocOL`
1121                   instr (OpReg ecx) (OpReg x_reg)
1122         -- in
1123         return (Fixed rep x_reg code)
1124
1125     --------------------
1126     add_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1127     add_code rep x (CmmLit (CmmInt y _))
1128         | not (is64BitInteger y) = add_int rep x y
1129     add_code rep x y = trivialCode rep (ADD rep) (Just (ADD rep)) x y
1130
1131     --------------------
1132     sub_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1133     sub_code rep x (CmmLit (CmmInt y _))
1134         | not (is64BitInteger (-y)) = add_int rep x (-y)
1135     sub_code rep x y = trivialCode rep (SUB rep) Nothing x y
1136
1137     -- our three-operand add instruction:
1138     add_int rep x y = do
1139         (x_reg, x_code) <- getSomeReg x
1140         let
1141             imm = ImmInt (fromInteger y)
1142             code dst
1143                = x_code `snocOL`
1144                  LEA rep
1145                         (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
1146                         (OpReg dst)
1147         --
1148         return (Any rep code)
1149
1150     ----------------------
1151     div_code rep signed quotient x y = do
1152            (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
1153            x_code <- getAnyReg x
1154            let
1155              widen | signed    = CLTD rep
1156                    | otherwise = XOR rep (OpReg edx) (OpReg edx)
1157
1158              instr | signed    = IDIV
1159                    | otherwise = DIV
1160
1161              code = y_code `appOL`
1162                     x_code eax `appOL`
1163                     toOL [widen, instr rep y_op]
1164
1165              result | quotient  = eax
1166                     | otherwise = edx
1167
1168            -- in
1169            return (Fixed rep result code)
1170
1171
1172 getRegister (CmmLoad mem pk)
1173   | isFloatingRep pk
1174   = do
1175     Amode src mem_code <- getAmode mem
1176     let
1177         code dst = mem_code `snocOL`
1178                    IF_ARCH_i386(GLD pk src dst,
1179                                 MOV pk (OpAddr src) (OpReg dst))
1180     --
1181     return (Any pk code)
1182
1183 #if i386_TARGET_ARCH
1184 getRegister (CmmLoad mem pk)
1185   | pk /= I64
1186   = do
1187     code <- intLoadCode (instr pk) mem
1188     return (Any pk code)
1189   where
1190         instr I8  = MOVZxL pk
1191         instr I16 = MOV I16
1192         instr I32 = MOV I32
1193         -- we always zero-extend 8-bit loads, if we
1194         -- can't think of anything better.  This is because
1195         -- we can't guarantee access to an 8-bit variant of every register
1196         -- (esi and edi don't have 8-bit variants), so to make things
1197         -- simpler we do our 8-bit arithmetic with full 32-bit registers.
1198 #endif
1199
1200 #if x86_64_TARGET_ARCH
1201 -- Simpler memory load code on x86_64
1202 getRegister (CmmLoad mem pk)
1203   = do
1204     code <- intLoadCode (MOV pk) mem
1205     return (Any pk code)
1206 #endif
1207
1208 getRegister (CmmLit (CmmInt 0 rep))
1209   = let
1210         -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
1211         adj_rep = case rep of I64 -> I32; _ -> rep
1212         rep1 = IF_ARCH_i386( rep, adj_rep )
1213         code dst
1214            = unitOL (XOR rep1 (OpReg dst) (OpReg dst))
1215     in
1216         return (Any rep code)
1217
1218 #if x86_64_TARGET_ARCH
1219   -- optimisation for loading small literals on x86_64: take advantage
1220   -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
1221   -- instruction forms are shorter.
1222 getRegister (CmmLit lit)
1223   | I64 <- cmmLitRep lit, not (isBigLit lit)
1224   = let
1225         imm = litToImm lit
1226         code dst = unitOL (MOV I32 (OpImm imm) (OpReg dst))
1227     in
1228         return (Any I64 code)
1229   where
1230    isBigLit (CmmInt i I64) = i < 0 || i > 0xffffffff
1231    isBigLit _ = False
1232         -- note1: not the same as is64BitLit, because that checks for
1233         -- signed literals that fit in 32 bits, but we want unsigned
1234         -- literals here.
1235         -- note2: all labels are small, because we're assuming the
1236         -- small memory model (see gcc docs, -mcmodel=small).
1237 #endif
1238
1239 getRegister (CmmLit lit)
1240   = let
1241         rep = cmmLitRep lit
1242         imm = litToImm lit
1243         code dst = unitOL (MOV rep (OpImm imm) (OpReg dst))
1244     in
1245         return (Any rep code)
1246
1247 getRegister other = pprPanic "getRegister(x86)" (ppr other)
1248
1249
1250 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
1251    -> NatM (Reg -> InstrBlock)
1252 intLoadCode instr mem = do
1253   Amode src mem_code <- getAmode mem
1254   return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
1255
1256 -- Compute an expression into *any* register, adding the appropriate
1257 -- move instruction if necessary.
1258 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
1259 getAnyReg expr = do
1260   r <- getRegister expr
1261   anyReg r
1262
1263 anyReg :: Register -> NatM (Reg -> InstrBlock)
1264 anyReg (Any _ code)          = return code
1265 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
1266
1267 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
1268 -- Fixed registers might not be byte-addressable, so we make sure we've
1269 -- got a temporary, inserting an extra reg copy if necessary.
1270 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
1271 #if x86_64_TARGET_ARCH
1272 getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
1273 #else
1274 getByteReg expr = do
1275   r <- getRegister expr
1276   case r of
1277     Any rep code -> do
1278         tmp <- getNewRegNat rep
1279         return (tmp, code tmp)
1280     Fixed rep reg code
1281         | isVirtualReg reg -> return (reg,code)
1282         | otherwise -> do
1283             tmp <- getNewRegNat rep
1284             return (tmp, code `snocOL` reg2reg rep reg tmp)
1285         -- ToDo: could optimise slightly by checking for byte-addressable
1286         -- real registers, but that will happen very rarely if at all.
1287 #endif
1288
1289 -- Another variant: this time we want the result in a register that cannot
1290 -- be modified by code to evaluate an arbitrary expression.
1291 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1292 getNonClobberedReg expr = do
1293   r <- getRegister expr
1294   case r of
1295     Any rep code -> do
1296         tmp <- getNewRegNat rep
1297         return (tmp, code tmp)
1298     Fixed rep reg code
1299         -- only free regs can be clobbered
1300         | RealReg rr <- reg, isFastTrue (freeReg rr) -> do
1301                 tmp <- getNewRegNat rep
1302                 return (tmp, code `snocOL` reg2reg rep reg tmp)
1303         | otherwise ->
1304                 return (reg, code)
1305
1306 reg2reg :: MachRep -> Reg -> Reg -> Instr
1307 reg2reg rep src dst
1308 #if i386_TARGET_ARCH
1309   | isFloatingRep rep = GMOV src dst
1310 #endif
1311   | otherwise         = MOV rep (OpReg src) (OpReg dst)
1312
1313 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1314
1315 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1316
1317 #if sparc_TARGET_ARCH
1318
1319 getRegister (CmmLit (CmmFloat f F32)) = do
1320     lbl <- getNewLabelNat
1321     let code dst = toOL [
1322             LDATA ReadOnlyData
1323                         [CmmDataLabel lbl,
1324                          CmmStaticLit (CmmFloat f F32)],
1325             SETHI (HI (ImmCLbl lbl)) dst,
1326             LD F32 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1327     return (Any F32 code)
1328
1329 getRegister (CmmLit (CmmFloat d F64)) = do
1330     lbl <- getNewLabelNat
1331     let code dst = toOL [
1332             LDATA ReadOnlyData
1333                         [CmmDataLabel lbl,
1334                          CmmStaticLit (CmmFloat d F64)],
1335             SETHI (HI (ImmCLbl lbl)) dst,
1336             LD F64 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1337     return (Any F64 code)
1338
1339 getRegister (CmmMachOp mop [x]) -- unary MachOps
1340   = case mop of
1341       MO_S_Neg F32     -> trivialUFCode F32 (FNEG F32) x
1342       MO_S_Neg F64     -> trivialUFCode F64 (FNEG F64) x
1343
1344       MO_S_Neg rep     -> trivialUCode rep (SUB False False g0) x
1345       MO_Not rep       -> trivialUCode rep (XNOR False g0) x
1346
1347       MO_U_Conv I32 I8 -> trivialCode I8 (AND False) x (CmmLit (CmmInt 255 I8))
1348
1349       MO_U_Conv F64 F32-> coerceDbl2Flt x
1350       MO_U_Conv F32 F64-> coerceFlt2Dbl x
1351
1352       MO_S_Conv F32 I32-> coerceFP2Int F32 I32 x
1353       MO_S_Conv I32 F32-> coerceInt2FP I32 F32 x
1354       MO_S_Conv F64 I32-> coerceFP2Int F64 I32 x
1355       MO_S_Conv I32 F64-> coerceInt2FP I32 F64 x
1356
1357       -- Conversions which are a nop on sparc
1358       MO_U_Conv from to
1359         | from == to   -> conversionNop to   x
1360       MO_U_Conv I32 to -> conversionNop to   x
1361       MO_S_Conv I32 to -> conversionNop to   x
1362
1363       -- widenings
1364       MO_U_Conv I8 I32  -> integerExtend False I8 I32  x
1365       MO_U_Conv I16 I32 -> integerExtend False I16 I32 x
1366       MO_U_Conv I8 I16  -> integerExtend False I8 I16  x
1367       MO_S_Conv I16 I32 -> integerExtend True I16 I32  x
1368
1369       other_op -> panic "Unknown unary mach op"
1370     where
1371         -- XXX SLL/SRL?
1372         integerExtend signed from to expr = do
1373            (reg, e_code) <- getSomeReg expr
1374            let
1375                code dst =
1376                    e_code `snocOL`
1377                    ((if signed then SRA else SRL)
1378                           reg (RIImm (ImmInt 0)) dst)
1379            return (Any to code)
1380         conversionNop new_rep expr
1381             = do e_code <- getRegister expr
1382                  return (swizzleRegisterRep e_code new_rep)
1383
1384 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1385   = case mop of
1386       MO_Eq F32 -> condFltReg EQQ x y
1387       MO_Ne F32 -> condFltReg NE x y
1388
1389       MO_S_Gt F32 -> condFltReg GTT x y
1390       MO_S_Ge F32 -> condFltReg GE x y
1391       MO_S_Lt F32 -> condFltReg LTT x y
1392       MO_S_Le F32 -> condFltReg LE x y
1393
1394       MO_Eq F64 -> condFltReg EQQ x y
1395       MO_Ne F64 -> condFltReg NE x y
1396
1397       MO_S_Gt F64 -> condFltReg GTT x y
1398       MO_S_Ge F64 -> condFltReg GE x y
1399       MO_S_Lt F64 -> condFltReg LTT x y
1400       MO_S_Le F64 -> condFltReg LE x y
1401
1402       MO_Eq rep -> condIntReg EQQ x y
1403       MO_Ne rep -> condIntReg NE x y
1404
1405       MO_S_Gt rep -> condIntReg GTT x y
1406       MO_S_Ge rep -> condIntReg GE x y
1407       MO_S_Lt rep -> condIntReg LTT x y
1408       MO_S_Le rep -> condIntReg LE x y
1409
1410       MO_U_Gt I32  -> condIntReg GTT x y
1411       MO_U_Ge I32  -> condIntReg GE x y
1412       MO_U_Lt I32  -> condIntReg LTT x y
1413       MO_U_Le I32  -> condIntReg LE x y
1414
1415       MO_U_Gt I16 -> condIntReg GU  x y
1416       MO_U_Ge I16 -> condIntReg GEU x y
1417       MO_U_Lt I16 -> condIntReg LU  x y
1418       MO_U_Le I16 -> condIntReg LEU x y
1419
1420       MO_Add I32 -> trivialCode I32 (ADD False False) x y
1421       MO_Sub I32 -> trivialCode I32 (SUB False False) x y
1422
1423       MO_S_MulMayOflo rep -> imulMayOflo rep x y
1424 {-
1425       -- ToDo: teach about V8+ SPARC div instructions
1426       MO_S_Quot I32 -> idiv FSLIT(".div")  x y
1427       MO_S_Rem I32  -> idiv FSLIT(".rem")  x y
1428       MO_U_Quot I32 -> idiv FSLIT(".udiv")  x y
1429       MO_U_Rem I32  -> idiv FSLIT(".urem")  x y
1430 -}
1431       MO_Add F32  -> trivialFCode F32 FADD  x y
1432       MO_Sub F32   -> trivialFCode F32  FSUB x y
1433       MO_Mul F32   -> trivialFCode F32  FMUL  x y
1434       MO_S_Quot F32   -> trivialFCode F32  FDIV x y
1435
1436       MO_Add F64   -> trivialFCode F64 FADD  x y
1437       MO_Sub F64   -> trivialFCode F64  FSUB x y
1438       MO_Mul F64   -> trivialFCode F64  FMUL x y
1439       MO_S_Quot F64   -> trivialFCode F64  FDIV x y
1440
1441       MO_And rep   -> trivialCode rep (AND False) x y
1442       MO_Or rep    -> trivialCode rep (OR  False) x y
1443       MO_Xor rep   -> trivialCode rep (XOR False) x y
1444
1445       MO_Mul rep -> trivialCode rep (SMUL False) x y
1446
1447       MO_Shl rep   -> trivialCode rep SLL  x y
1448       MO_U_Shr rep   -> trivialCode rep SRL x y
1449       MO_S_Shr rep   -> trivialCode rep SRA x y
1450
1451 {-
1452       MO_F32_Pwr  -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1453                                          [promote x, promote y])
1454                        where promote x = CmmMachOp MO_F32_to_Dbl [x]
1455       MO_F64_Pwr -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1456                                         [x, y])
1457 -}
1458       other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
1459   where
1460     --idiv fn x y = getRegister (StCall (Left fn) CCallConv I32 [x, y])
1461
1462     --------------------
1463     imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1464     imulMayOflo rep a b = do
1465          (a_reg, a_code) <- getSomeReg a
1466          (b_reg, b_code) <- getSomeReg b
1467          res_lo <- getNewRegNat I32
1468          res_hi <- getNewRegNat I32
1469          let
1470             shift_amt  = case rep of
1471                           I32 -> 31
1472                           I64 -> 63
1473                           _ -> panic "shift_amt"
1474             code dst = a_code `appOL` b_code `appOL`
1475                        toOL [
1476                            SMUL False a_reg (RIReg b_reg) res_lo,
1477                            RDY res_hi,
1478                            SRA res_lo (RIImm (ImmInt shift_amt)) res_lo,
1479                            SUB False False res_lo (RIReg res_hi) dst
1480                         ]
1481          return (Any I32 code)
1482
1483 getRegister (CmmLoad mem pk) = do
1484     Amode src code <- getAmode mem
1485     let
1486         code__2 dst = code `snocOL` LD pk src dst
1487     return (Any pk code__2)
1488
1489 getRegister (CmmLit (CmmInt i _))
1490   | fits13Bits i
1491   = let
1492         src = ImmInt (fromInteger i)
1493         code dst = unitOL (OR False g0 (RIImm src) dst)
1494     in
1495         return (Any I32 code)
1496
1497 getRegister (CmmLit lit)
1498   = let rep = cmmLitRep lit
1499         imm = litToImm lit
1500         code dst = toOL [
1501             SETHI (HI imm) dst,
1502             OR False dst (RIImm (LO imm)) dst]
1503     in return (Any I32 code)
1504
1505 #endif /* sparc_TARGET_ARCH */
1506
1507 #if powerpc_TARGET_ARCH
1508 getRegister (CmmLoad mem pk)
1509   | pk /= I64
1510   = do
1511         Amode addr addr_code <- getAmode mem
1512         let code dst = ASSERT((regClass dst == RcDouble) == isFloatingRep pk)
1513                        addr_code `snocOL` LD pk dst addr
1514         return (Any pk code)
1515
1516 -- catch simple cases of zero- or sign-extended load
1517 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad mem _]) = do
1518     Amode addr addr_code <- getAmode mem
1519     return (Any I32 (\dst -> addr_code `snocOL` LD I8 dst addr))
1520
1521 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
1522
1523 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad mem _]) = do
1524     Amode addr addr_code <- getAmode mem
1525     return (Any I32 (\dst -> addr_code `snocOL` LD I16 dst addr))
1526
1527 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad mem _]) = do
1528     Amode addr addr_code <- getAmode mem
1529     return (Any I32 (\dst -> addr_code `snocOL` LA I16 dst addr))
1530
1531 getRegister (CmmMachOp mop [x]) -- unary MachOps
1532   = case mop of
1533       MO_Not rep   -> trivialUCode rep NOT x
1534
1535       MO_S_Conv F64 F32 -> trivialUCode F32 FRSP x
1536       MO_S_Conv F32 F64 -> conversionNop F64 x
1537
1538       MO_S_Conv from to
1539         | from == to         -> conversionNop to x
1540         | isFloatingRep from -> coerceFP2Int from to x
1541         | isFloatingRep to   -> coerceInt2FP from to x
1542
1543         -- narrowing is a nop: we treat the high bits as undefined
1544       MO_S_Conv I32 to -> conversionNop to x
1545       MO_S_Conv I16 I8 -> conversionNop I8 x
1546       MO_S_Conv I8 to -> trivialUCode to (EXTS I8) x
1547       MO_S_Conv I16 to -> trivialUCode to (EXTS I16) x
1548
1549       MO_U_Conv from to
1550         | from == to -> conversionNop to x
1551         -- narrowing is a nop: we treat the high bits as undefined
1552       MO_U_Conv I32 to -> conversionNop to x
1553       MO_U_Conv I16 I8 -> conversionNop I8 x
1554       MO_U_Conv I8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 I32))
1555       MO_U_Conv I16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 I32))
1556
1557       MO_S_Neg F32      -> trivialUCode F32 FNEG x
1558       MO_S_Neg F64      -> trivialUCode F64 FNEG x
1559       MO_S_Neg rep      -> trivialUCode rep NEG x
1560
1561     where
1562         conversionNop new_rep expr
1563             = do e_code <- getRegister expr
1564                  return (swizzleRegisterRep e_code new_rep)
1565
1566 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1567   = case mop of
1568       MO_Eq F32 -> condFltReg EQQ x y
1569       MO_Ne F32 -> condFltReg NE  x y
1570
1571       MO_S_Gt F32 -> condFltReg GTT x y
1572       MO_S_Ge F32 -> condFltReg GE  x y
1573       MO_S_Lt F32 -> condFltReg LTT x y
1574       MO_S_Le F32 -> condFltReg LE  x y
1575
1576       MO_Eq F64 -> condFltReg EQQ x y
1577       MO_Ne F64 -> condFltReg NE  x y
1578
1579       MO_S_Gt F64 -> condFltReg GTT x y
1580       MO_S_Ge F64 -> condFltReg GE  x y
1581       MO_S_Lt F64 -> condFltReg LTT x y
1582       MO_S_Le F64 -> condFltReg LE  x y
1583
1584       MO_Eq rep -> condIntReg EQQ  (extendUExpr rep x) (extendUExpr rep y)
1585       MO_Ne rep -> condIntReg NE   (extendUExpr rep x) (extendUExpr rep y)
1586
1587       MO_S_Gt rep -> condIntReg GTT  (extendSExpr rep x) (extendSExpr rep y)
1588       MO_S_Ge rep -> condIntReg GE   (extendSExpr rep x) (extendSExpr rep y)
1589       MO_S_Lt rep -> condIntReg LTT  (extendSExpr rep x) (extendSExpr rep y)
1590       MO_S_Le rep -> condIntReg LE   (extendSExpr rep x) (extendSExpr rep y)
1591
1592       MO_U_Gt rep -> condIntReg GU   (extendUExpr rep x) (extendUExpr rep y)
1593       MO_U_Ge rep -> condIntReg GEU  (extendUExpr rep x) (extendUExpr rep y)
1594       MO_U_Lt rep -> condIntReg LU   (extendUExpr rep x) (extendUExpr rep y)
1595       MO_U_Le rep -> condIntReg LEU  (extendUExpr rep x) (extendUExpr rep y)
1596
1597       MO_Add F32   -> trivialCodeNoImm F32 (FADD F32) x y
1598       MO_Sub F32   -> trivialCodeNoImm F32 (FSUB F32) x y
1599       MO_Mul F32   -> trivialCodeNoImm F32 (FMUL F32) x y
1600       MO_S_Quot F32   -> trivialCodeNoImm F32 (FDIV F32) x y
1601
1602       MO_Add F64   -> trivialCodeNoImm F64 (FADD F64) x y
1603       MO_Sub F64   -> trivialCodeNoImm F64 (FSUB F64) x y
1604       MO_Mul F64   -> trivialCodeNoImm F64 (FMUL F64) x y
1605       MO_S_Quot F64   -> trivialCodeNoImm F64 (FDIV F64) x y
1606
1607          -- optimize addition with 32-bit immediate
1608          -- (needed for PIC)
1609       MO_Add I32 ->
1610         case y of
1611           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate I32 True (-imm)
1612             -> trivialCode I32 True ADD x (CmmLit $ CmmInt imm immrep)
1613           CmmLit lit
1614             -> do
1615                 (src, srcCode) <- getSomeReg x
1616                 let imm = litToImm lit
1617                     code dst = srcCode `appOL` toOL [
1618                                     ADDIS dst src (HA imm),
1619                                     ADD dst dst (RIImm (LO imm))
1620                                 ]
1621                 return (Any I32 code)
1622           _ -> trivialCode I32 True ADD x y
1623
1624       MO_Add rep -> trivialCode rep True ADD x y
1625       MO_Sub rep ->
1626         case y of    -- subfi ('substract from' with immediate) doesn't exist
1627           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
1628             -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
1629           _ -> trivialCodeNoImm rep SUBF y x
1630
1631       MO_Mul rep -> trivialCode rep True MULLW x y
1632
1633       MO_S_MulMayOflo I32 -> trivialCodeNoImm I32 MULLW_MayOflo x y
1634
1635       MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= I32): not implemented"
1636       MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
1637
1638       MO_S_Quot rep -> trivialCodeNoImm rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1639       MO_U_Quot rep -> trivialCodeNoImm rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1640
1641       MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1642       MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1643
1644       MO_And rep   -> trivialCode rep False AND x y
1645       MO_Or rep    -> trivialCode rep False OR x y
1646       MO_Xor rep   -> trivialCode rep False XOR x y
1647
1648       MO_Shl rep   -> trivialCode rep False SLW x y
1649       MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
1650       MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
1651
1652 getRegister (CmmLit (CmmInt i rep))
1653   | Just imm <- makeImmediate rep True i
1654   = let
1655         code dst = unitOL (LI dst imm)
1656     in
1657         return (Any rep code)
1658
1659 getRegister (CmmLit (CmmFloat f frep)) = do
1660     lbl <- getNewLabelNat
1661     dynRef <- cmmMakeDynamicReference addImportNat False lbl
1662     Amode addr addr_code <- getAmode dynRef
1663     let code dst =
1664             LDATA ReadOnlyData  [CmmDataLabel lbl,
1665                                  CmmStaticLit (CmmFloat f frep)]
1666             `consOL` (addr_code `snocOL` LD frep dst addr)
1667     return (Any frep code)
1668
1669 getRegister (CmmLit lit)
1670   = let rep = cmmLitRep lit
1671         imm = litToImm lit
1672         code dst = toOL [
1673               LIS dst (HI imm),
1674               OR dst dst (RIImm (LO imm))
1675           ]
1676     in return (Any rep code)
1677
1678 getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
1679
1680     -- extend?Rep: wrap integer expression of type rep
1681     -- in a conversion to I32
1682 extendSExpr I32 x = x
1683 extendSExpr rep x = CmmMachOp (MO_S_Conv rep I32) [x]
1684 extendUExpr I32 x = x
1685 extendUExpr rep x = CmmMachOp (MO_U_Conv rep I32) [x]
1686
1687 #endif /* powerpc_TARGET_ARCH */
1688
1689
1690 -- -----------------------------------------------------------------------------
1691 --  The 'Amode' type: Memory addressing modes passed up the tree.
1692
1693 data Amode = Amode AddrMode InstrBlock
1694
1695 {-
1696 Now, given a tree (the argument to an CmmLoad) that references memory,
1697 produce a suitable addressing mode.
1698
1699 A Rule of the Game (tm) for Amodes: use of the addr bit must
1700 immediately follow use of the code part, since the code part puts
1701 values in registers which the addr then refers to.  So you can't put
1702 anything in between, lest it overwrite some of those registers.  If
1703 you need to do some other computation between the code part and use of
1704 the addr bit, first store the effective address from the amode in a
1705 temporary, then do the other computation, and then use the temporary:
1706
1707     code
1708     LEA amode, tmp
1709     ... other computation ...
1710     ... (tmp) ...
1711 -}
1712
1713 getAmode :: CmmExpr -> NatM Amode
1714 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
1715
1716 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1717
1718 #if alpha_TARGET_ARCH
1719
1720 getAmode (StPrim IntSubOp [x, StInt i])
1721   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1722     getRegister x               `thenNat` \ register ->
1723     let
1724         code = registerCode register tmp
1725         reg  = registerName register tmp
1726         off  = ImmInt (-(fromInteger i))
1727     in
1728     return (Amode (AddrRegImm reg off) code)
1729
1730 getAmode (StPrim IntAddOp [x, StInt i])
1731   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1732     getRegister x               `thenNat` \ register ->
1733     let
1734         code = registerCode register tmp
1735         reg  = registerName register tmp
1736         off  = ImmInt (fromInteger i)
1737     in
1738     return (Amode (AddrRegImm reg off) code)
1739
1740 getAmode leaf
1741   | isJust imm
1742   = return (Amode (AddrImm imm__2) id)
1743   where
1744     imm = maybeImm leaf
1745     imm__2 = case imm of Just x -> x
1746
1747 getAmode other
1748   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1749     getRegister other           `thenNat` \ register ->
1750     let
1751         code = registerCode register tmp
1752         reg  = registerName register tmp
1753     in
1754     return (Amode (AddrReg reg) code)
1755
1756 #endif /* alpha_TARGET_ARCH */
1757
1758 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1759
1760 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1761
1762 -- This is all just ridiculous, since it carefully undoes
1763 -- what mangleIndexTree has just done.
1764 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
1765   | not (is64BitLit lit)
1766   -- ASSERT(rep == I32)???
1767   = do (x_reg, x_code) <- getSomeReg x
1768        let off = ImmInt (-(fromInteger i))
1769        return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1770
1771 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
1772   | not (is64BitLit lit)
1773   -- ASSERT(rep == I32)???
1774   = do (x_reg, x_code) <- getSomeReg x
1775        let off = ImmInt (fromInteger i)
1776        return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1777
1778 -- Turn (lit1 << n  + lit2) into  (lit2 + lit1 << n) so it will be
1779 -- recognised by the next rule.
1780 getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1781                                   b@(CmmLit _)])
1782   = getAmode (CmmMachOp (MO_Add rep) [b,a])
1783
1784 getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
1785                                         [y, CmmLit (CmmInt shift _)]])
1786   | shift == 0 || shift == 1 || shift == 2 || shift == 3
1787   = do (x_reg, x_code) <- getNonClobberedReg x
1788         -- x must be in a temp, because it has to stay live over y_code
1789         -- we could compre x_reg and y_reg and do something better here...
1790        (y_reg, y_code) <- getSomeReg y
1791        let
1792            code = x_code `appOL` y_code
1793            base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
1794        return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt 0))
1795                code)
1796
1797 getAmode (CmmLit lit) | not (is64BitLit lit)
1798   = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1799
1800 getAmode expr = do
1801   (reg,code) <- getSomeReg expr
1802   return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
1803
1804 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1805
1806 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1807
1808 #if sparc_TARGET_ARCH
1809
1810 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)])
1811   | fits13Bits (-i)
1812   = do
1813        (reg, code) <- getSomeReg x
1814        let
1815          off  = ImmInt (-(fromInteger i))
1816        return (Amode (AddrRegImm reg off) code)
1817
1818
1819 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)])
1820   | fits13Bits i
1821   = do
1822        (reg, code) <- getSomeReg x
1823        let
1824          off  = ImmInt (fromInteger i)
1825        return (Amode (AddrRegImm reg off) code)
1826
1827 getAmode (CmmMachOp (MO_Add rep) [x, y])
1828   = do
1829     (regX, codeX) <- getSomeReg x
1830     (regY, codeY) <- getSomeReg y
1831     let
1832         code = codeX `appOL` codeY
1833     return (Amode (AddrRegReg regX regY) code)
1834
1835 -- XXX Is this same as "leaf" in Stix?
1836 getAmode (CmmLit lit)
1837   = do
1838       tmp <- getNewRegNat I32
1839       let
1840         code = unitOL (SETHI (HI imm__2) tmp)
1841       return (Amode (AddrRegImm tmp (LO imm__2)) code)
1842       where
1843          imm__2 = litToImm lit
1844
1845 getAmode other
1846   = do
1847        (reg, code) <- getSomeReg other
1848        let
1849             off  = ImmInt 0
1850        return (Amode (AddrRegImm reg off) code)
1851
1852 #endif /* sparc_TARGET_ARCH */
1853
1854 #ifdef powerpc_TARGET_ARCH
1855 getAmode (CmmMachOp (MO_Sub I32) [x, CmmLit (CmmInt i _)])
1856   | Just off <- makeImmediate I32 True (-i)
1857   = do
1858         (reg, code) <- getSomeReg x
1859         return (Amode (AddrRegImm reg off) code)
1860
1861
1862 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit (CmmInt i _)])
1863   | Just off <- makeImmediate I32 True i
1864   = do
1865         (reg, code) <- getSomeReg x
1866         return (Amode (AddrRegImm reg off) code)
1867
1868    -- optimize addition with 32-bit immediate
1869    -- (needed for PIC)
1870 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit lit])
1871   = do
1872         tmp <- getNewRegNat I32
1873         (src, srcCode) <- getSomeReg x
1874         let imm = litToImm lit
1875             code = srcCode `snocOL` ADDIS tmp src (HA imm)
1876         return (Amode (AddrRegImm tmp (LO imm)) code)
1877
1878 getAmode (CmmLit lit)
1879   = do
1880         tmp <- getNewRegNat I32
1881         let imm = litToImm lit
1882             code = unitOL (LIS tmp (HA imm))
1883         return (Amode (AddrRegImm tmp (LO imm)) code)
1884
1885 getAmode (CmmMachOp (MO_Add I32) [x, y])
1886   = do
1887         (regX, codeX) <- getSomeReg x
1888         (regY, codeY) <- getSomeReg y
1889         return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
1890
1891 getAmode other
1892   = do
1893         (reg, code) <- getSomeReg other
1894         let
1895             off  = ImmInt 0
1896         return (Amode (AddrRegImm reg off) code)
1897 #endif /* powerpc_TARGET_ARCH */
1898
1899 -- -----------------------------------------------------------------------------
1900 -- getOperand: sometimes any operand will do.
1901
1902 -- getNonClobberedOperand: the value of the operand will remain valid across
1903 -- the computation of an arbitrary expression, unless the expression
1904 -- is computed directly into a register which the operand refers to
1905 -- (see trivialCode where this function is used for an example).
1906
1907 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1908
1909 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1910 #if x86_64_TARGET_ARCH
1911 getNonClobberedOperand (CmmLit lit)
1912   | isSuitableFloatingPointLit lit = do
1913     lbl <- getNewLabelNat
1914     let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
1915                                            CmmStaticLit lit])
1916     return (OpAddr (ripRel (ImmCLbl lbl)), code)
1917 #endif
1918 getNonClobberedOperand (CmmLit lit)
1919   | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) =
1920     return (OpImm (litToImm lit), nilOL)
1921 getNonClobberedOperand (CmmLoad mem pk)
1922   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
1923     Amode src mem_code <- getAmode mem
1924     (src',save_code) <-
1925         if (amodeCouldBeClobbered src)
1926                 then do
1927                    tmp <- getNewRegNat wordRep
1928                    return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
1929                            unitOL (LEA I32 (OpAddr src) (OpReg tmp)))
1930                 else
1931                    return (src, nilOL)
1932     return (OpAddr src', save_code `appOL` mem_code)
1933 getNonClobberedOperand e = do
1934     (reg, code) <- getNonClobberedReg e
1935     return (OpReg reg, code)
1936
1937 amodeCouldBeClobbered :: AddrMode -> Bool
1938 amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
1939
1940 regClobbered (RealReg rr) = isFastTrue (freeReg rr)
1941 regClobbered _ = False
1942
1943 -- getOperand: the operand is not required to remain valid across the
1944 -- computation of an arbitrary expression.
1945 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1946 #if x86_64_TARGET_ARCH
1947 getOperand (CmmLit lit)
1948   | isSuitableFloatingPointLit lit = do
1949     lbl <- getNewLabelNat
1950     let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
1951                                            CmmStaticLit lit])
1952     return (OpAddr (ripRel (ImmCLbl lbl)), code)
1953 #endif
1954 getOperand (CmmLit lit)
1955   | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) = do
1956     return (OpImm (litToImm lit), nilOL)
1957 getOperand (CmmLoad mem pk)
1958   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
1959     Amode src mem_code <- getAmode mem
1960     return (OpAddr src, mem_code)
1961 getOperand e = do
1962     (reg, code) <- getSomeReg e
1963     return (OpReg reg, code)
1964
1965 isOperand :: CmmExpr -> Bool
1966 isOperand (CmmLoad _ _) = True
1967 isOperand (CmmLit lit)  = not (is64BitLit lit)
1968                           || isSuitableFloatingPointLit lit
1969 isOperand _             = False
1970
1971 -- if we want a floating-point literal as an operand, we can
1972 -- use it directly from memory.  However, if the literal is
1973 -- zero, we're better off generating it into a register using
1974 -- xor.
1975 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
1976 isSuitableFloatingPointLit _ = False
1977
1978 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
1979 getRegOrMem (CmmLoad mem pk)
1980   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
1981     Amode src mem_code <- getAmode mem
1982     return (OpAddr src, mem_code)
1983 getRegOrMem e = do
1984     (reg, code) <- getNonClobberedReg e
1985     return (OpReg reg, code)
1986
1987 #if x86_64_TARGET_ARCH
1988 is64BitLit (CmmInt i I64) = is64BitInteger i
1989    -- assume that labels are in the range 0-2^31-1: this assumes the
1990    -- small memory model (see gcc docs, -mcmodel=small).
1991 #endif
1992 is64BitLit x = False
1993 #endif
1994
1995 is64BitInteger :: Integer -> Bool
1996 is64BitInteger i = i > 0x7fffffff || i < -0x80000000
1997
1998 -- -----------------------------------------------------------------------------
1999 --  The 'CondCode' type:  Condition codes passed up the tree.
2000
2001 data CondCode = CondCode Bool Cond InstrBlock
2002
2003 -- Set up a condition code for a conditional branch.
2004
2005 getCondCode :: CmmExpr -> NatM CondCode
2006
2007 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2008
2009 #if alpha_TARGET_ARCH
2010 getCondCode = panic "MachCode.getCondCode: not on Alphas"
2011 #endif /* alpha_TARGET_ARCH */
2012
2013 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2014
2015 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
2016 -- yes, they really do seem to want exactly the same!
2017
2018 getCondCode (CmmMachOp mop [x, y])
2019   = ASSERT (cmmExprRep x /= I8) -- tmp, not set up to handle 8-bit comparisons
2020     case mop of
2021       MO_Eq F32 -> condFltCode EQQ x y
2022       MO_Ne F32 -> condFltCode NE  x y
2023
2024       MO_S_Gt F32 -> condFltCode GTT x y
2025       MO_S_Ge F32 -> condFltCode GE  x y
2026       MO_S_Lt F32 -> condFltCode LTT x y
2027       MO_S_Le F32 -> condFltCode LE  x y
2028
2029       MO_Eq F64 -> condFltCode EQQ x y
2030       MO_Ne F64 -> condFltCode NE  x y
2031
2032       MO_S_Gt F64 -> condFltCode GTT x y
2033       MO_S_Ge F64 -> condFltCode GE  x y
2034       MO_S_Lt F64 -> condFltCode LTT x y
2035       MO_S_Le F64 -> condFltCode LE  x y
2036
2037       MO_Eq rep -> condIntCode EQQ  x y
2038       MO_Ne rep -> condIntCode NE   x y
2039
2040       MO_S_Gt rep -> condIntCode GTT  x y
2041       MO_S_Ge rep -> condIntCode GE   x y
2042       MO_S_Lt rep -> condIntCode LTT  x y
2043       MO_S_Le rep -> condIntCode LE   x y
2044
2045       MO_U_Gt rep -> condIntCode GU   x y
2046       MO_U_Ge rep -> condIntCode GEU  x y
2047       MO_U_Lt rep -> condIntCode LU   x y
2048       MO_U_Le rep -> condIntCode LEU  x y
2049
2050       other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y]))
2051
2052 getCondCode other =  pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
2053
2054 #elif powerpc_TARGET_ARCH
2055
2056 -- almost the same as everywhere else - but we need to
2057 -- extend small integers to 32 bit first
2058
2059 getCondCode (CmmMachOp mop [x, y])
2060   = case mop of
2061       MO_Eq F32 -> condFltCode EQQ x y
2062       MO_Ne F32 -> condFltCode NE  x y
2063
2064       MO_S_Gt F32 -> condFltCode GTT x y
2065       MO_S_Ge F32 -> condFltCode GE  x y
2066       MO_S_Lt F32 -> condFltCode LTT x y
2067       MO_S_Le F32 -> condFltCode LE  x y
2068
2069       MO_Eq F64 -> condFltCode EQQ x y
2070       MO_Ne F64 -> condFltCode NE  x y
2071
2072       MO_S_Gt F64 -> condFltCode GTT x y
2073       MO_S_Ge F64 -> condFltCode GE  x y
2074       MO_S_Lt F64 -> condFltCode LTT x y
2075       MO_S_Le F64 -> condFltCode LE  x y
2076
2077       MO_Eq rep -> condIntCode EQQ  (extendUExpr rep x) (extendUExpr rep y)
2078       MO_Ne rep -> condIntCode NE   (extendUExpr rep x) (extendUExpr rep y)
2079
2080       MO_S_Gt rep -> condIntCode GTT  (extendSExpr rep x) (extendSExpr rep y)
2081       MO_S_Ge rep -> condIntCode GE   (extendSExpr rep x) (extendSExpr rep y)
2082       MO_S_Lt rep -> condIntCode LTT  (extendSExpr rep x) (extendSExpr rep y)
2083       MO_S_Le rep -> condIntCode LE   (extendSExpr rep x) (extendSExpr rep y)
2084
2085       MO_U_Gt rep -> condIntCode GU   (extendUExpr rep x) (extendUExpr rep y)
2086       MO_U_Ge rep -> condIntCode GEU  (extendUExpr rep x) (extendUExpr rep y)
2087       MO_U_Lt rep -> condIntCode LU   (extendUExpr rep x) (extendUExpr rep y)
2088       MO_U_Le rep -> condIntCode LEU  (extendUExpr rep x) (extendUExpr rep y)
2089
2090       other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
2091
2092 getCondCode other =  panic "getCondCode(2)(powerpc)"
2093
2094
2095 #endif
2096
2097
2098 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
2099 -- passed back up the tree.
2100
2101 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
2102
2103 #if alpha_TARGET_ARCH
2104 condIntCode = panic "MachCode.condIntCode: not on Alphas"
2105 condFltCode = panic "MachCode.condFltCode: not on Alphas"
2106 #endif /* alpha_TARGET_ARCH */
2107
2108 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2109 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2110
2111 -- memory vs immediate
2112 condIntCode cond (CmmLoad x pk) (CmmLit lit) | not (is64BitLit lit) = do
2113     Amode x_addr x_code <- getAmode x
2114     let
2115         imm  = litToImm lit
2116         code = x_code `snocOL`
2117                   CMP pk (OpImm imm) (OpAddr x_addr)
2118     --
2119     return (CondCode False cond code)
2120
2121 -- anything vs zero
2122 condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
2123     (x_reg, x_code) <- getSomeReg x
2124     let
2125         code = x_code `snocOL`
2126                   TEST pk (OpReg x_reg) (OpReg x_reg)
2127     --
2128     return (CondCode False cond code)
2129
2130 -- anything vs operand
2131 condIntCode cond x y | isOperand y = do
2132     (x_reg, x_code) <- getNonClobberedReg x
2133     (y_op,  y_code) <- getOperand y
2134     let
2135         code = x_code `appOL` y_code `snocOL`
2136                   CMP (cmmExprRep x) y_op (OpReg x_reg)
2137     -- in
2138     return (CondCode False cond code)
2139
2140 -- anything vs anything
2141 condIntCode cond x y = do
2142   (y_reg, y_code) <- getNonClobberedReg y
2143   (x_op, x_code) <- getRegOrMem x
2144   let
2145         code = y_code `appOL`
2146                x_code `snocOL`
2147                   CMP (cmmExprRep x) (OpReg y_reg) x_op
2148   -- in
2149   return (CondCode False cond code)
2150 #endif
2151
2152 #if i386_TARGET_ARCH
2153 condFltCode cond x y
2154   = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
2155   (x_reg, x_code) <- getNonClobberedReg x
2156   (y_reg, y_code) <- getSomeReg y
2157   let
2158         code = x_code `appOL` y_code `snocOL`
2159                 GCMP cond x_reg y_reg
2160   -- The GCMP insn does the test and sets the zero flag if comparable
2161   -- and true.  Hence we always supply EQQ as the condition to test.
2162   return (CondCode True EQQ code)
2163 #endif /* i386_TARGET_ARCH */
2164
2165 #if x86_64_TARGET_ARCH
2166 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
2167 -- an operand, but the right must be a reg.  We can probably do better
2168 -- than this general case...
2169 condFltCode cond x y = do
2170   (x_reg, x_code) <- getNonClobberedReg x
2171   (y_op, y_code) <- getOperand y
2172   let
2173         code = x_code `appOL`
2174                y_code `snocOL`
2175                   CMP (cmmExprRep x) y_op (OpReg x_reg)
2176         -- NB(1): we need to use the unsigned comparison operators on the
2177         -- result of this comparison.
2178   -- in
2179   return (CondCode True (condToUnsigned cond) code)
2180 #endif
2181
2182 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2183
2184 #if sparc_TARGET_ARCH
2185
2186 condIntCode cond x (CmmLit (CmmInt y rep))
2187   | fits13Bits y
2188   = do
2189        (src1, code) <- getSomeReg x
2190        let
2191            src2 = ImmInt (fromInteger y)
2192            code' = code `snocOL` SUB False True src1 (RIImm src2) g0
2193        return (CondCode False cond code')
2194
2195 condIntCode cond x y = do
2196     (src1, code1) <- getSomeReg x
2197     (src2, code2) <- getSomeReg y
2198     let
2199         code__2 = code1 `appOL` code2 `snocOL`
2200                   SUB False True src1 (RIReg src2) g0
2201     return (CondCode False cond code__2)
2202
2203 -----------
2204 condFltCode cond x y = do
2205     (src1, code1) <- getSomeReg x
2206     (src2, code2) <- getSomeReg y
2207     tmp <- getNewRegNat F64
2208     let
2209         promote x = FxTOy F32 F64 x tmp
2210
2211         pk1   = cmmExprRep x
2212         pk2   = cmmExprRep y
2213
2214         code__2 =
2215                 if pk1 == pk2 then
2216                     code1 `appOL` code2 `snocOL`
2217                     FCMP True pk1 src1 src2
2218                 else if pk1 == F32 then
2219                     code1 `snocOL` promote src1 `appOL` code2 `snocOL`
2220                     FCMP True F64 tmp src2
2221                 else
2222                     code1 `appOL` code2 `snocOL` promote src2 `snocOL`
2223                     FCMP True F64 src1 tmp
2224     return (CondCode True cond code__2)
2225
2226 #endif /* sparc_TARGET_ARCH */
2227
2228 #if powerpc_TARGET_ARCH
2229 --  ###FIXME: I16 and I8!
2230 condIntCode cond x (CmmLit (CmmInt y rep))
2231   | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
2232   = do
2233         (src1, code) <- getSomeReg x
2234         let
2235             code' = code `snocOL`
2236                 (if condUnsigned cond then CMPL else CMP) I32 src1 (RIImm src2)
2237         return (CondCode False cond code')
2238
2239 condIntCode cond x y = do
2240     (src1, code1) <- getSomeReg x
2241     (src2, code2) <- getSomeReg y
2242     let
2243         code' = code1 `appOL` code2 `snocOL`
2244                   (if condUnsigned cond then CMPL else CMP) I32 src1 (RIReg src2)
2245     return (CondCode False cond code')
2246
2247 condFltCode cond x y = do
2248     (src1, code1) <- getSomeReg x
2249     (src2, code2) <- getSomeReg y
2250     let
2251         code'  = code1 `appOL` code2 `snocOL` FCMP src1 src2
2252         code'' = case cond of -- twiddle CR to handle unordered case
2253                     GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
2254                     LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
2255                     _ -> code'
2256                  where
2257                     ltbit = 0 ; eqbit = 2 ; gtbit = 1
2258     return (CondCode True cond code'')
2259
2260 #endif /* powerpc_TARGET_ARCH */
2261
2262 -- -----------------------------------------------------------------------------
2263 -- Generating assignments
2264
2265 -- Assignments are really at the heart of the whole code generation
2266 -- business.  Almost all top-level nodes of any real importance are
2267 -- assignments, which correspond to loads, stores, or register
2268 -- transfers.  If we're really lucky, some of the register transfers
2269 -- will go away, because we can use the destination register to
2270 -- complete the code generation for the right hand side.  This only
2271 -- fails when the right hand side is forced into a fixed register
2272 -- (e.g. the result of a call).
2273
2274 assignMem_IntCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2275 assignReg_IntCode :: MachRep -> CmmReg  -> CmmExpr -> NatM InstrBlock
2276
2277 assignMem_FltCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2278 assignReg_FltCode :: MachRep -> CmmReg  -> CmmExpr -> NatM InstrBlock
2279
2280 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2281
2282 #if alpha_TARGET_ARCH
2283
2284 assignIntCode pk (CmmLoad dst _) src
2285   = getNewRegNat IntRep             `thenNat` \ tmp ->
2286     getAmode dst                    `thenNat` \ amode ->
2287     getRegister src                 `thenNat` \ register ->
2288     let
2289         code1   = amodeCode amode []
2290         dst__2  = amodeAddr amode
2291         code2   = registerCode register tmp []
2292         src__2  = registerName register tmp
2293         sz      = primRepToSize pk
2294         code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2295     in
2296     return code__2
2297
2298 assignIntCode pk dst src
2299   = getRegister dst                         `thenNat` \ register1 ->
2300     getRegister src                         `thenNat` \ register2 ->
2301     let
2302         dst__2  = registerName register1 zeroh
2303         code    = registerCode register2 dst__2
2304         src__2  = registerName register2 dst__2
2305         code__2 = if isFixed register2
2306                   then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2)
2307                   else code
2308     in
2309     return code__2
2310
2311 #endif /* alpha_TARGET_ARCH */
2312
2313 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2314
2315 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2316
2317 -- integer assignment to memory
2318 assignMem_IntCode pk addr src = do
2319     Amode addr code_addr <- getAmode addr
2320     (code_src, op_src)   <- get_op_RI src
2321     let
2322         code = code_src `appOL`
2323                code_addr `snocOL`
2324                   MOV pk op_src (OpAddr addr)
2325         -- NOTE: op_src is stable, so it will still be valid
2326         -- after code_addr.  This may involve the introduction
2327         -- of an extra MOV to a temporary register, but we hope
2328         -- the register allocator will get rid of it.
2329     --
2330     return code
2331   where
2332     get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand)   -- code, operator
2333     get_op_RI (CmmLit lit) | not (is64BitLit lit)
2334       = return (nilOL, OpImm (litToImm lit))
2335     get_op_RI op
2336       = do (reg,code) <- getNonClobberedReg op
2337            return (code, OpReg reg)
2338
2339
2340 -- Assign; dst is a reg, rhs is mem
2341 assignReg_IntCode pk reg (CmmLoad src _) = do
2342   load_code <- intLoadCode (MOV pk) src
2343   return (load_code (getRegisterReg reg))
2344
2345 -- dst is a reg, but src could be anything
2346 assignReg_IntCode pk reg src = do
2347   code <- getAnyReg src
2348   return (code (getRegisterReg reg))
2349
2350 #endif /* i386_TARGET_ARCH */
2351
2352 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2353
2354 #if sparc_TARGET_ARCH
2355
2356 assignMem_IntCode pk addr src = do
2357     (srcReg, code) <- getSomeReg src
2358     Amode dstAddr addr_code <- getAmode addr
2359     return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2360
2361 assignReg_IntCode pk reg src = do
2362     r <- getRegister src
2363     return $ case r of
2364         Any _ code         -> code dst
2365         Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg dst) freg
2366     where
2367       dst = getRegisterReg reg
2368
2369
2370 #endif /* sparc_TARGET_ARCH */
2371
2372 #if powerpc_TARGET_ARCH
2373
2374 assignMem_IntCode pk addr src = do
2375     (srcReg, code) <- getSomeReg src
2376     Amode dstAddr addr_code <- getAmode addr
2377     return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2378
2379 -- dst is a reg, but src could be anything
2380 assignReg_IntCode pk reg src
2381     = do
2382         r <- getRegister src
2383         return $ case r of
2384             Any _ code         -> code dst
2385             Fixed _ freg fcode -> fcode `snocOL` MR dst freg
2386     where
2387         dst = getRegisterReg reg
2388
2389 #endif /* powerpc_TARGET_ARCH */
2390
2391
2392 -- -----------------------------------------------------------------------------
2393 -- Floating-point assignments
2394
2395 #if alpha_TARGET_ARCH
2396
2397 assignFltCode pk (CmmLoad dst _) src
2398   = getNewRegNat pk                 `thenNat` \ tmp ->
2399     getAmode dst                    `thenNat` \ amode ->
2400     getRegister src                         `thenNat` \ register ->
2401     let
2402         code1   = amodeCode amode []
2403         dst__2  = amodeAddr amode
2404         code2   = registerCode register tmp []
2405         src__2  = registerName register tmp
2406         sz      = primRepToSize pk
2407         code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2408     in
2409     return code__2
2410
2411 assignFltCode pk dst src
2412   = getRegister dst                         `thenNat` \ register1 ->
2413     getRegister src                         `thenNat` \ register2 ->
2414     let
2415         dst__2  = registerName register1 zeroh
2416         code    = registerCode register2 dst__2
2417         src__2  = registerName register2 dst__2
2418         code__2 = if isFixed register2
2419                   then code . mkSeqInstr (FMOV src__2 dst__2)
2420                   else code
2421     in
2422     return code__2
2423
2424 #endif /* alpha_TARGET_ARCH */
2425
2426 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2427
2428 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2429
2430 -- Floating point assignment to memory
2431 assignMem_FltCode pk addr src = do
2432   (src_reg, src_code) <- getNonClobberedReg src
2433   Amode addr addr_code <- getAmode addr
2434   let
2435         code = src_code `appOL`
2436                addr_code `snocOL`
2437                 IF_ARCH_i386(GST pk src_reg addr,
2438                              MOV pk (OpReg src_reg) (OpAddr addr))
2439   return code
2440
2441 -- Floating point assignment to a register/temporary
2442 assignReg_FltCode pk reg src = do
2443   src_code <- getAnyReg src
2444   return (src_code (getRegisterReg reg))
2445
2446 #endif /* i386_TARGET_ARCH */
2447
2448 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2449
2450 #if sparc_TARGET_ARCH
2451
2452 -- Floating point assignment to memory
2453 assignMem_FltCode pk addr src = do
2454     Amode dst__2 code1 <- getAmode addr
2455     (src__2, code2) <- getSomeReg src
2456     tmp1 <- getNewRegNat pk
2457     let
2458         pk__2   = cmmExprRep src
2459         code__2 = code1 `appOL` code2 `appOL`
2460             if   pk == pk__2
2461             then unitOL (ST pk src__2 dst__2)
2462             else toOL [FxTOy pk__2 pk src__2 tmp1, ST pk tmp1 dst__2]
2463     return code__2
2464
2465 -- Floating point assignment to a register/temporary
2466 -- ToDo: Verify correctness
2467 assignReg_FltCode pk reg src = do
2468     r <- getRegister src
2469     v1 <- getNewRegNat pk
2470     return $ case r of
2471         Any _ code         -> code dst
2472         Fixed _ freg fcode -> fcode `snocOL` FMOV pk freg v1
2473     where
2474       dst = getRegisterReg reg
2475
2476 #endif /* sparc_TARGET_ARCH */
2477
2478 #if powerpc_TARGET_ARCH
2479
2480 -- Easy, isn't it?
2481 assignMem_FltCode = assignMem_IntCode
2482 assignReg_FltCode = assignReg_IntCode
2483
2484 #endif /* powerpc_TARGET_ARCH */
2485
2486
2487 -- -----------------------------------------------------------------------------
2488 -- Generating an non-local jump
2489
2490 -- (If applicable) Do not fill the delay slots here; you will confuse the
2491 -- register allocator.
2492
2493 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
2494
2495 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2496
2497 #if alpha_TARGET_ARCH
2498
2499 genJump (CmmLabel lbl)
2500   | isAsmTemp lbl = returnInstr (BR target)
2501   | otherwise     = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0]
2502   where
2503     target = ImmCLbl lbl
2504
2505 genJump tree
2506   = getRegister tree                `thenNat` \ register ->
2507     getNewRegNat PtrRep             `thenNat` \ tmp ->
2508     let
2509         dst    = registerName register pv
2510         code   = registerCode register pv
2511         target = registerName register pv
2512     in
2513     if isFixed register then
2514         returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0]
2515     else
2516     return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0))
2517
2518 #endif /* alpha_TARGET_ARCH */
2519
2520 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2521
2522 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2523
2524 genJump (CmmLoad mem pk) = do
2525   Amode target code <- getAmode mem
2526   return (code `snocOL` JMP (OpAddr target))
2527
2528 genJump (CmmLit lit) = do
2529   return (unitOL (JMP (OpImm (litToImm lit))))
2530
2531 genJump expr = do
2532   (reg,code) <- getSomeReg expr
2533   return (code `snocOL` JMP (OpReg reg))
2534
2535 #endif /* i386_TARGET_ARCH */
2536
2537 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2538
2539 #if sparc_TARGET_ARCH
2540
2541 genJump (CmmLit (CmmLabel lbl))
2542   = return (toOL [CALL (Left target) 0 True, NOP])
2543   where
2544     target = ImmCLbl lbl
2545
2546 genJump tree
2547   = do
2548         (target, code) <- getSomeReg tree
2549         return (code `snocOL` JMP (AddrRegReg target g0)  `snocOL` NOP)
2550
2551 #endif /* sparc_TARGET_ARCH */
2552
2553 #if powerpc_TARGET_ARCH
2554 genJump (CmmLit (CmmLabel lbl))
2555   = return (unitOL $ JMP lbl)
2556
2557 genJump tree
2558   = do
2559         (target,code) <- getSomeReg tree
2560         return (code `snocOL` MTCTR target `snocOL` BCTR [])
2561 #endif /* powerpc_TARGET_ARCH */
2562
2563
2564 -- -----------------------------------------------------------------------------
2565 --  Unconditional branches
2566
2567 genBranch :: BlockId -> NatM InstrBlock
2568
2569 genBranch = return . toOL . mkBranchInstr
2570
2571 -- -----------------------------------------------------------------------------
2572 --  Conditional jumps
2573
2574 {-
2575 Conditional jumps are always to local labels, so we can use branch
2576 instructions.  We peek at the arguments to decide what kind of
2577 comparison to do.
2578
2579 ALPHA: For comparisons with 0, we're laughing, because we can just do
2580 the desired conditional branch.
2581
2582 I386: First, we have to ensure that the condition
2583 codes are set according to the supplied comparison operation.
2584
2585 SPARC: First, we have to ensure that the condition codes are set
2586 according to the supplied comparison operation.  We generate slightly
2587 different code for floating point comparisons, because a floating
2588 point operation cannot directly precede a @BF@.  We assume the worst
2589 and fill that slot with a @NOP@.
2590
2591 SPARC: Do not fill the delay slots here; you will confuse the register
2592 allocator.
2593 -}
2594
2595
2596 genCondJump
2597     :: BlockId      -- the branch target
2598     -> CmmExpr      -- the condition on which to branch
2599     -> NatM InstrBlock
2600
2601 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2602
2603 #if alpha_TARGET_ARCH
2604
2605 genCondJump id (StPrim op [x, StInt 0])
2606   = getRegister x                           `thenNat` \ register ->
2607     getNewRegNat (registerRep register)
2608                                     `thenNat` \ tmp ->
2609     let
2610         code   = registerCode register tmp
2611         value  = registerName register tmp
2612         pk     = registerRep register
2613         target = ImmCLbl lbl
2614     in
2615     returnSeq code [BI (cmpOp op) value target]
2616   where
2617     cmpOp CharGtOp = GTT
2618     cmpOp CharGeOp = GE
2619     cmpOp CharEqOp = EQQ
2620     cmpOp CharNeOp = NE
2621     cmpOp CharLtOp = LTT
2622     cmpOp CharLeOp = LE
2623     cmpOp IntGtOp = GTT
2624     cmpOp IntGeOp = GE
2625     cmpOp IntEqOp = EQQ
2626     cmpOp IntNeOp = NE
2627     cmpOp IntLtOp = LTT
2628     cmpOp IntLeOp = LE
2629     cmpOp WordGtOp = NE
2630     cmpOp WordGeOp = ALWAYS
2631     cmpOp WordEqOp = EQQ
2632     cmpOp WordNeOp = NE
2633     cmpOp WordLtOp = NEVER
2634     cmpOp WordLeOp = EQQ
2635     cmpOp AddrGtOp = NE
2636     cmpOp AddrGeOp = ALWAYS
2637     cmpOp AddrEqOp = EQQ
2638     cmpOp AddrNeOp = NE
2639     cmpOp AddrLtOp = NEVER
2640     cmpOp AddrLeOp = EQQ
2641
2642 genCondJump lbl (StPrim op [x, StDouble 0.0])
2643   = getRegister x                           `thenNat` \ register ->
2644     getNewRegNat (registerRep register)
2645                                     `thenNat` \ tmp ->
2646     let
2647         code   = registerCode register tmp
2648         value  = registerName register tmp
2649         pk     = registerRep register
2650         target = ImmCLbl lbl
2651     in
2652     return (code . mkSeqInstr (BF (cmpOp op) value target))
2653   where
2654     cmpOp FloatGtOp = GTT
2655     cmpOp FloatGeOp = GE
2656     cmpOp FloatEqOp = EQQ
2657     cmpOp FloatNeOp = NE
2658     cmpOp FloatLtOp = LTT
2659     cmpOp FloatLeOp = LE
2660     cmpOp DoubleGtOp = GTT
2661     cmpOp DoubleGeOp = GE
2662     cmpOp DoubleEqOp = EQQ
2663     cmpOp DoubleNeOp = NE
2664     cmpOp DoubleLtOp = LTT
2665     cmpOp DoubleLeOp = LE
2666
2667 genCondJump lbl (StPrim op [x, y])
2668   | fltCmpOp op
2669   = trivialFCode pr instr x y       `thenNat` \ register ->
2670     getNewRegNat F64                `thenNat` \ tmp ->
2671     let
2672         code   = registerCode register tmp
2673         result = registerName register tmp
2674         target = ImmCLbl lbl
2675     in
2676     return (code . mkSeqInstr (BF cond result target))
2677   where
2678     pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
2679
2680     fltCmpOp op = case op of
2681         FloatGtOp -> True
2682         FloatGeOp -> True
2683         FloatEqOp -> True
2684         FloatNeOp -> True
2685         FloatLtOp -> True
2686         FloatLeOp -> True
2687         DoubleGtOp -> True
2688         DoubleGeOp -> True
2689         DoubleEqOp -> True
2690         DoubleNeOp -> True
2691         DoubleLtOp -> True
2692         DoubleLeOp -> True
2693         _ -> False
2694     (instr, cond) = case op of
2695         FloatGtOp -> (FCMP TF LE, EQQ)
2696         FloatGeOp -> (FCMP TF LTT, EQQ)
2697         FloatEqOp -> (FCMP TF EQQ, NE)
2698         FloatNeOp -> (FCMP TF EQQ, EQQ)
2699         FloatLtOp -> (FCMP TF LTT, NE)
2700         FloatLeOp -> (FCMP TF LE, NE)
2701         DoubleGtOp -> (FCMP TF LE, EQQ)
2702         DoubleGeOp -> (FCMP TF LTT, EQQ)
2703         DoubleEqOp -> (FCMP TF EQQ, NE)
2704         DoubleNeOp -> (FCMP TF EQQ, EQQ)
2705         DoubleLtOp -> (FCMP TF LTT, NE)
2706         DoubleLeOp -> (FCMP TF LE, NE)
2707
2708 genCondJump lbl (StPrim op [x, y])
2709   = trivialCode instr x y           `thenNat` \ register ->
2710     getNewRegNat IntRep             `thenNat` \ tmp ->
2711     let
2712         code   = registerCode register tmp
2713         result = registerName register tmp
2714         target = ImmCLbl lbl
2715     in
2716     return (code . mkSeqInstr (BI cond result target))
2717   where
2718     (instr, cond) = case op of
2719         CharGtOp -> (CMP LE, EQQ)
2720         CharGeOp -> (CMP LTT, EQQ)
2721         CharEqOp -> (CMP EQQ, NE)
2722         CharNeOp -> (CMP EQQ, EQQ)
2723         CharLtOp -> (CMP LTT, NE)
2724         CharLeOp -> (CMP LE, NE)
2725         IntGtOp -> (CMP LE, EQQ)
2726         IntGeOp -> (CMP LTT, EQQ)
2727         IntEqOp -> (CMP EQQ, NE)
2728         IntNeOp -> (CMP EQQ, EQQ)
2729         IntLtOp -> (CMP LTT, NE)
2730         IntLeOp -> (CMP LE, NE)
2731         WordGtOp -> (CMP ULE, EQQ)
2732         WordGeOp -> (CMP ULT, EQQ)
2733         WordEqOp -> (CMP EQQ, NE)
2734         WordNeOp -> (CMP EQQ, EQQ)
2735         WordLtOp -> (CMP ULT, NE)
2736         WordLeOp -> (CMP ULE, NE)
2737         AddrGtOp -> (CMP ULE, EQQ)
2738         AddrGeOp -> (CMP ULT, EQQ)
2739         AddrEqOp -> (CMP EQQ, NE)
2740         AddrNeOp -> (CMP EQQ, EQQ)
2741         AddrLtOp -> (CMP ULT, NE)
2742         AddrLeOp -> (CMP ULE, NE)
2743
2744 #endif /* alpha_TARGET_ARCH */
2745
2746 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2747
2748 #if i386_TARGET_ARCH
2749
2750 genCondJump id bool = do
2751   CondCode _ cond code <- getCondCode bool
2752   return (code `snocOL` JXX cond id)
2753
2754 #endif
2755
2756 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2757
2758 #if x86_64_TARGET_ARCH
2759
2760 genCondJump id bool = do
2761   CondCode is_float cond cond_code <- getCondCode bool
2762   if not is_float
2763     then
2764         return (cond_code `snocOL` JXX cond id)
2765     else do
2766         lbl <- getBlockIdNat
2767
2768         -- see comment with condFltReg
2769         let code = case cond of
2770                         NE  -> or_unordered
2771                         GU  -> plain_test
2772                         GEU -> plain_test
2773                         _   -> and_ordered
2774
2775             plain_test = unitOL (
2776                   JXX cond id
2777                 )
2778             or_unordered = toOL [
2779                   JXX cond id,
2780                   JXX PARITY id
2781                 ]
2782             and_ordered = toOL [
2783                   JXX PARITY lbl,
2784                   JXX cond id,
2785                   JXX ALWAYS lbl,
2786                   NEWBLOCK lbl
2787                 ]
2788         return (cond_code `appOL` code)
2789
2790 #endif
2791
2792 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2793
2794 #if sparc_TARGET_ARCH
2795
2796 genCondJump (BlockId id) bool = do
2797   CondCode is_float cond code <- getCondCode bool
2798   return (
2799        code `appOL`
2800        toOL (
2801          if   is_float
2802          then [NOP, BF cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2803          else [BI cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2804        )
2805     )
2806
2807 #endif /* sparc_TARGET_ARCH */
2808
2809
2810 #if powerpc_TARGET_ARCH
2811
2812 genCondJump id bool = do
2813   CondCode is_float cond code <- getCondCode bool
2814   return (code `snocOL` BCC cond id)
2815
2816 #endif /* powerpc_TARGET_ARCH */
2817
2818
2819 -- -----------------------------------------------------------------------------
2820 --  Generating C calls
2821
2822 -- Now the biggest nightmare---calls.  Most of the nastiness is buried in
2823 -- @get_arg@, which moves the arguments to the correct registers/stack
2824 -- locations.  Apart from that, the code is easy.
2825 --
2826 -- (If applicable) Do not fill the delay slots here; you will confuse the
2827 -- register allocator.
2828
2829 genCCall
2830     :: CmmCallTarget            -- function to call
2831     -> [(CmmReg,MachHint)]      -- where to put the result
2832     -> [(CmmExpr,MachHint)]     -- arguments (of mixed type)
2833     -> Maybe [GlobalReg]        -- volatile regs to save
2834     -> NatM InstrBlock
2835
2836 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2837
2838 #if alpha_TARGET_ARCH
2839
2840 ccallResultRegs =
2841
2842 genCCall fn cconv result_regs args
2843   = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args
2844                           `thenNat` \ ((unused,_), argCode) ->
2845     let
2846         nRegs = length allArgRegs - length unused
2847         code = asmSeqThen (map ($ []) argCode)
2848     in
2849         returnSeq code [
2850             LDA pv (AddrImm (ImmLab (ptext fn))),
2851             JSR ra (AddrReg pv) nRegs,
2852             LDGP gp (AddrReg ra)]
2853   where
2854     ------------------------
2855     {-  Try to get a value into a specific register (or registers) for
2856         a call.  The first 6 arguments go into the appropriate
2857         argument register (separate registers for integer and floating
2858         point arguments, but used in lock-step), and the remaining
2859         arguments are dumped to the stack, beginning at 0(sp).  Our
2860         first argument is a pair of the list of remaining argument
2861         registers to be assigned for this call and the next stack
2862         offset to use for overflowing arguments.  This way,
2863         @get_Arg@ can be applied to all of a call's arguments using
2864         @mapAccumLNat@.
2865     -}
2866     get_arg
2867         :: ([(Reg,Reg)], Int)   -- Argument registers and stack offset (accumulator)
2868         -> StixTree             -- Current argument
2869         -> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code
2870
2871     -- We have to use up all of our argument registers first...
2872
2873     get_arg ((iDst,fDst):dsts, offset) arg
2874       = getRegister arg                     `thenNat` \ register ->
2875         let
2876             reg  = if isFloatingRep pk then fDst else iDst
2877             code = registerCode register reg
2878             src  = registerName register reg
2879             pk   = registerRep register
2880         in
2881         return (
2882             if isFloatingRep pk then
2883                 ((dsts, offset), if isFixed register then
2884                     code . mkSeqInstr (FMOV src fDst)
2885                     else code)
2886             else
2887                 ((dsts, offset), if isFixed register then
2888                     code . mkSeqInstr (OR src (RIReg src) iDst)
2889                     else code))
2890
2891     -- Once we have run out of argument registers, we move to the
2892     -- stack...
2893
2894     get_arg ([], offset) arg
2895       = getRegister arg                 `thenNat` \ register ->
2896         getNewRegNat (registerRep register)
2897                                         `thenNat` \ tmp ->
2898         let
2899             code = registerCode register tmp
2900             src  = registerName register tmp
2901             pk   = registerRep register
2902             sz   = primRepToSize pk
2903         in
2904         return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset)))
2905
2906 #endif /* alpha_TARGET_ARCH */
2907
2908 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2909
2910 #if i386_TARGET_ARCH
2911
2912 genCCall (CmmPrim MO_WriteBarrier) _ _ _ = return nilOL
2913         -- write barrier compiles to no code on x86/x86-64;
2914         -- we keep it this long in order to prevent earlier optimisations.
2915
2916 -- we only cope with a single result for foreign calls
2917 genCCall (CmmPrim op) [(r,_)] args vols = do
2918   case op of
2919         MO_F32_Sqrt -> actuallyInlineFloatOp F32  (GSQRT F32) args
2920         MO_F64_Sqrt -> actuallyInlineFloatOp F64 (GSQRT F64) args
2921
2922         MO_F32_Sin  -> actuallyInlineFloatOp F32  (GSIN F32) args
2923         MO_F64_Sin  -> actuallyInlineFloatOp F64 (GSIN F64) args
2924
2925         MO_F32_Cos  -> actuallyInlineFloatOp F32  (GCOS F32) args
2926         MO_F64_Cos  -> actuallyInlineFloatOp F64 (GCOS F64) args
2927
2928         MO_F32_Tan  -> actuallyInlineFloatOp F32  (GTAN F32) args
2929         MO_F64_Tan  -> actuallyInlineFloatOp F64 (GTAN F64) args
2930
2931         other_op    -> outOfLineFloatOp op r args vols
2932  where
2933   actuallyInlineFloatOp rep instr [(x,_)]
2934         = do res <- trivialUFCode rep instr x
2935              any <- anyReg res
2936              return (any (getRegisterReg r))
2937
2938 genCCall target dest_regs args vols = do
2939     let
2940         sizes               = map (arg_size . cmmExprRep . fst) (reverse args)
2941 #if !darwin_TARGET_OS
2942         tot_arg_size        = sum sizes
2943 #else
2944         raw_arg_size        = sum sizes
2945         tot_arg_size        = roundTo 16 raw_arg_size
2946         arg_pad_size        = tot_arg_size - raw_arg_size
2947     delta0 <- getDeltaNat
2948     setDeltaNat (delta0 - arg_pad_size)
2949 #endif
2950
2951     push_codes <- mapM push_arg (reverse args)
2952     delta <- getDeltaNat
2953
2954     -- in
2955     -- deal with static vs dynamic call targets
2956     (callinsns,cconv) <-
2957       case target of
2958         -- CmmPrim -> ...
2959         CmmForeignCall (CmmLit (CmmLabel lbl)) conv
2960            -> -- ToDo: stdcall arg sizes
2961               return (unitOL (CALL (Left fn_imm) []), conv)
2962            where fn_imm = ImmCLbl lbl
2963         CmmForeignCall expr conv
2964            -> do (dyn_c, dyn_r, dyn_rep) <- get_op expr
2965                  ASSERT(dyn_rep == I32)
2966                   return (dyn_c `snocOL` CALL (Right dyn_r) [], conv)
2967
2968     let push_code
2969 #if darwin_TARGET_OS
2970             | arg_pad_size /= 0
2971             = toOL [SUB I32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
2972                     DELTA (delta0 - arg_pad_size)]
2973               `appOL` concatOL push_codes
2974             | otherwise
2975 #endif
2976             = concatOL push_codes
2977         call = callinsns `appOL`
2978                toOL (
2979                         -- Deallocate parameters after call for ccall;
2980                         -- but not for stdcall (callee does it)
2981                   (if cconv == StdCallConv || tot_arg_size==0 then [] else
2982                    [ADD I32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
2983                   ++
2984                   [DELTA (delta + tot_arg_size)]
2985                )
2986     -- in
2987     setDeltaNat (delta + tot_arg_size)
2988
2989     let
2990         -- assign the results, if necessary
2991         assign_code []     = nilOL
2992         assign_code [(dest,_hint)] =
2993           case rep of
2994                 I64 -> toOL [MOV I32 (OpReg eax) (OpReg r_dest),
2995                              MOV I32 (OpReg edx) (OpReg r_dest_hi)]
2996                 F32 -> unitOL (GMOV fake0 r_dest)
2997                 F64 -> unitOL (GMOV fake0 r_dest)
2998                 rep -> unitOL (MOV rep (OpReg eax) (OpReg r_dest))
2999           where
3000                 r_dest_hi = getHiVRegFromLo r_dest
3001                 rep = cmmRegRep dest
3002                 r_dest = getRegisterReg dest
3003         assign_code many = panic "genCCall.assign_code many"
3004
3005     return (push_code `appOL`
3006             call `appOL`
3007             assign_code dest_regs)
3008
3009   where
3010     arg_size F64 = 8
3011     arg_size F32 = 4
3012     arg_size I64 = 8
3013     arg_size _   = 4
3014
3015     roundTo a x | x `mod` a == 0 = x
3016                 | otherwise = x + a - (x `mod` a)
3017
3018
3019     push_arg :: (CmmExpr,MachHint){-current argument-}
3020                     -> NatM InstrBlock  -- code
3021
3022     push_arg (arg,_hint) -- we don't need the hints on x86
3023       | arg_rep == I64 = do
3024         ChildCode64 code r_lo <- iselExpr64 arg
3025         delta <- getDeltaNat
3026         setDeltaNat (delta - 8)
3027         let
3028             r_hi = getHiVRegFromLo r_lo
3029         -- in
3030         return (       code `appOL`
3031                        toOL [PUSH I32 (OpReg r_hi), DELTA (delta - 4),
3032                              PUSH I32 (OpReg r_lo), DELTA (delta - 8),
3033                              DELTA (delta-8)]
3034             )
3035
3036       | otherwise = do
3037         (code, reg, sz) <- get_op arg
3038         delta <- getDeltaNat
3039         let size = arg_size sz
3040         setDeltaNat (delta-size)
3041         if (case sz of F64 -> True; F32 -> True; _ -> False)
3042            then return (code `appOL`
3043                         toOL [SUB I32 (OpImm (ImmInt size)) (OpReg esp),
3044                               DELTA (delta-size),
3045                               GST sz reg (AddrBaseIndex (EABaseReg esp)
3046                                                         EAIndexNone
3047                                                         (ImmInt 0))]
3048                        )
3049            else return (code `snocOL`
3050                         PUSH I32 (OpReg reg) `snocOL`
3051                         DELTA (delta-size)
3052                        )
3053       where
3054          arg_rep = cmmExprRep arg
3055
3056     ------------
3057     get_op :: CmmExpr -> NatM (InstrBlock, Reg, MachRep) -- code, reg, size
3058     get_op op = do
3059         (reg,code) <- getSomeReg op
3060         return (code, reg, cmmExprRep op)
3061
3062 #endif /* i386_TARGET_ARCH */
3063
3064 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3065
3066 outOfLineFloatOp :: CallishMachOp -> CmmReg -> [(CmmExpr,MachHint)]
3067   -> Maybe [GlobalReg] -> NatM InstrBlock
3068 outOfLineFloatOp mop res args vols
3069   = do
3070       targetExpr <- cmmMakeDynamicReference addImportNat True lbl
3071       let target = CmmForeignCall targetExpr CCallConv
3072
3073       if cmmRegRep res == F64
3074         then
3075           stmtToInstrs (CmmCall target [(res,FloatHint)] args vols)
3076         else do
3077           uq <- getUniqueNat
3078           let
3079             tmp = CmmLocal (LocalReg uq F64)
3080           -- in
3081           code1 <- stmtToInstrs (CmmCall target [(tmp,FloatHint)] args vols)
3082           code2 <- stmtToInstrs (CmmAssign res (CmmReg tmp))
3083           return (code1 `appOL` code2)
3084   where
3085         lbl = mkForeignLabel fn Nothing False
3086
3087         fn = case mop of
3088               MO_F32_Sqrt  -> FSLIT("sqrtf")
3089               MO_F32_Sin   -> FSLIT("sinf")
3090               MO_F32_Cos   -> FSLIT("cosf")
3091               MO_F32_Tan   -> FSLIT("tanf")
3092               MO_F32_Exp   -> FSLIT("expf")
3093               MO_F32_Log   -> FSLIT("logf")
3094
3095               MO_F32_Asin  -> FSLIT("asinf")
3096               MO_F32_Acos  -> FSLIT("acosf")
3097               MO_F32_Atan  -> FSLIT("atanf")
3098
3099               MO_F32_Sinh  -> FSLIT("sinhf")
3100               MO_F32_Cosh  -> FSLIT("coshf")
3101               MO_F32_Tanh  -> FSLIT("tanhf")
3102               MO_F32_Pwr   -> FSLIT("powf")
3103
3104               MO_F64_Sqrt  -> FSLIT("sqrt")
3105               MO_F64_Sin   -> FSLIT("sin")
3106               MO_F64_Cos   -> FSLIT("cos")
3107               MO_F64_Tan   -> FSLIT("tan")
3108               MO_F64_Exp   -> FSLIT("exp")
3109               MO_F64_Log   -> FSLIT("log")
3110
3111               MO_F64_Asin  -> FSLIT("asin")
3112               MO_F64_Acos  -> FSLIT("acos")
3113               MO_F64_Atan  -> FSLIT("atan")
3114
3115               MO_F64_Sinh  -> FSLIT("sinh")
3116               MO_F64_Cosh  -> FSLIT("cosh")
3117               MO_F64_Tanh  -> FSLIT("tanh")
3118               MO_F64_Pwr   -> FSLIT("pow")
3119
3120 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
3121
3122 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3123
3124 #if x86_64_TARGET_ARCH
3125
3126 genCCall (CmmPrim MO_WriteBarrier) _ _ _ = return nilOL
3127         -- write barrier compiles to no code on x86/x86-64;
3128         -- we keep it this long in order to prevent earlier optimisations.
3129
3130 genCCall (CmmPrim op) [(r,_)] args vols =
3131   outOfLineFloatOp op r args vols
3132
3133 genCCall target dest_regs args vols = do
3134
3135         -- load up the register arguments
3136     (stack_args, aregs, fregs, load_args_code)
3137          <- load_args args allArgRegs allFPArgRegs nilOL
3138
3139     let
3140         fp_regs_used  = reverse (drop (length fregs) (reverse allFPArgRegs))
3141         int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
3142         arg_regs = int_regs_used ++ fp_regs_used
3143                 -- for annotating the call instruction with
3144
3145         sse_regs = length fp_regs_used
3146
3147         tot_arg_size = arg_size * length stack_args
3148
3149         -- On entry to the called function, %rsp should be aligned
3150         -- on a 16-byte boundary +8 (i.e. the first stack arg after
3151         -- the return address is 16-byte aligned).  In STG land
3152         -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
3153         -- need to make sure we push a multiple of 16-bytes of args,
3154         -- plus the return address, to get the correct alignment.
3155         -- Urg, this is hard.  We need to feed the delta back into
3156         -- the arg pushing code.
3157     (real_size, adjust_rsp) <-
3158         if tot_arg_size `rem` 16 == 0
3159             then return (tot_arg_size, nilOL)
3160             else do -- we need to adjust...
3161                 delta <- getDeltaNat
3162                 setDeltaNat (delta-8)
3163                 return (tot_arg_size+8, toOL [
3164                                 SUB I64 (OpImm (ImmInt 8)) (OpReg rsp),
3165                                 DELTA (delta-8)
3166                         ])
3167
3168         -- push the stack args, right to left
3169     push_code <- push_args (reverse stack_args) nilOL
3170     delta <- getDeltaNat
3171
3172     -- deal with static vs dynamic call targets
3173     (callinsns,cconv) <-
3174       case target of
3175         -- CmmPrim -> ...
3176         CmmForeignCall (CmmLit (CmmLabel lbl)) conv
3177            -> -- ToDo: stdcall arg sizes
3178               return (unitOL (CALL (Left fn_imm) arg_regs), conv)
3179            where fn_imm = ImmCLbl lbl
3180         CmmForeignCall expr conv
3181            -> do (dyn_r, dyn_c) <- getSomeReg expr
3182                  return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
3183
3184     let
3185         -- The x86_64 ABI requires us to set %al to the number of SSE
3186         -- registers that contain arguments, if the called routine
3187         -- is a varargs function.  We don't know whether it's a
3188         -- varargs function or not, so we have to assume it is.
3189         --
3190         -- It's not safe to omit this assignment, even if the number
3191         -- of SSE regs in use is zero.  If %al is larger than 8
3192         -- on entry to a varargs function, seg faults ensue.
3193         assign_eax n = unitOL (MOV I32 (OpImm (ImmInt n)) (OpReg eax))
3194
3195     let call = callinsns `appOL`
3196                toOL (
3197                         -- Deallocate parameters after call for ccall;
3198                         -- but not for stdcall (callee does it)
3199                   (if cconv == StdCallConv || real_size==0 then [] else
3200                    [ADD wordRep (OpImm (ImmInt real_size)) (OpReg esp)])
3201                   ++
3202                   [DELTA (delta + real_size)]
3203                )
3204     -- in
3205     setDeltaNat (delta + real_size)
3206
3207     let
3208         -- assign the results, if necessary
3209         assign_code []     = nilOL
3210         assign_code [(dest,_hint)] =
3211           case rep of
3212                 F32 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3213                 F64 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3214                 rep -> unitOL (MOV rep (OpReg rax) (OpReg r_dest))
3215           where
3216                 rep = cmmRegRep dest
3217                 r_dest = getRegisterReg dest
3218         assign_code many = panic "genCCall.assign_code many"
3219
3220     return (load_args_code      `appOL`
3221             adjust_rsp          `appOL`
3222             push_code           `appOL`
3223             assign_eax sse_regs `appOL`
3224             call                `appOL`
3225             assign_code dest_regs)
3226
3227   where
3228     arg_size = 8 -- always, at the mo
3229
3230     load_args :: [(CmmExpr,MachHint)]
3231               -> [Reg]                  -- int regs avail for args
3232               -> [Reg]                  -- FP regs avail for args
3233               -> InstrBlock
3234               -> NatM ([(CmmExpr,MachHint)],[Reg],[Reg],InstrBlock)
3235     load_args args [] [] code     =  return (args, [], [], code)
3236         -- no more regs to use
3237     load_args [] aregs fregs code =  return ([], aregs, fregs, code)
3238         -- no more args to push
3239     load_args ((arg,hint) : rest) aregs fregs code
3240         | isFloatingRep arg_rep =
3241         case fregs of
3242           [] -> push_this_arg
3243           (r:rs) -> do
3244              arg_code <- getAnyReg arg
3245              load_args rest aregs rs (code `appOL` arg_code r)
3246         | otherwise =
3247         case aregs of
3248           [] -> push_this_arg
3249           (r:rs) -> do
3250              arg_code <- getAnyReg arg
3251              load_args rest rs fregs (code `appOL` arg_code r)
3252         where
3253           arg_rep = cmmExprRep arg
3254
3255           push_this_arg = do
3256             (args',ars,frs,code') <- load_args rest aregs fregs code
3257             return ((arg,hint):args', ars, frs, code')
3258
3259     push_args [] code = return code
3260     push_args ((arg,hint):rest) code
3261        | isFloatingRep arg_rep = do
3262          (arg_reg, arg_code) <- getSomeReg arg
3263          delta <- getDeltaNat
3264          setDeltaNat (delta-arg_size)
3265          let code' = code `appOL` toOL [
3266                         MOV arg_rep (OpReg arg_reg) (OpAddr  (spRel 0)),
3267                         SUB wordRep (OpImm (ImmInt arg_size)) (OpReg rsp) ,
3268                         DELTA (delta-arg_size)]
3269          push_args rest code'
3270
3271        | otherwise = do
3272        -- we only ever generate word-sized function arguments.  Promotion
3273        -- has already happened: our Int8# type is kept sign-extended
3274        -- in an Int#, for example.
3275          ASSERT(arg_rep == I64) return ()
3276          (arg_op, arg_code) <- getOperand arg
3277          delta <- getDeltaNat
3278          setDeltaNat (delta-arg_size)
3279          let code' = code `appOL` toOL [PUSH I64 arg_op,
3280                                         DELTA (delta-arg_size)]
3281          push_args rest code'
3282         where
3283           arg_rep = cmmExprRep arg
3284 #endif
3285
3286 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3287
3288 #if sparc_TARGET_ARCH
3289 {-
3290    The SPARC calling convention is an absolute
3291    nightmare.  The first 6x32 bits of arguments are mapped into
3292    %o0 through %o5, and the remaining arguments are dumped to the
3293    stack, beginning at [%sp+92].  (Note that %o6 == %sp.)
3294
3295    If we have to put args on the stack, move %o6==%sp down by
3296    the number of words to go on the stack, to ensure there's enough space.
3297
3298    According to Fraser and Hanson's lcc book, page 478, fig 17.2,
3299    16 words above the stack pointer is a word for the address of
3300    a structure return value.  I use this as a temporary location
3301    for moving values from float to int regs.  Certainly it isn't
3302    safe to put anything in the 16 words starting at %sp, since
3303    this area can get trashed at any time due to window overflows
3304    caused by signal handlers.
3305
3306    A final complication (if the above isn't enough) is that
3307    we can't blithely calculate the arguments one by one into
3308    %o0 .. %o5.  Consider the following nested calls:
3309
3310        fff a (fff b c)
3311
3312    Naive code moves a into %o0, and (fff b c) into %o1.  Unfortunately
3313    the inner call will itself use %o0, which trashes the value put there
3314    in preparation for the outer call.  Upshot: we need to calculate the
3315    args into temporary regs, and move those to arg regs or onto the
3316    stack only immediately prior to the call proper.  Sigh.
3317 -}
3318
3319 genCCall target dest_regs argsAndHints vols = do
3320     let
3321         args = map fst argsAndHints
3322     argcode_and_vregs <- mapM arg_to_int_vregs args
3323     let
3324         (argcodes, vregss) = unzip argcode_and_vregs
3325         n_argRegs          = length allArgRegs
3326         n_argRegs_used     = min (length vregs) n_argRegs
3327         vregs              = concat vregss
3328     -- deal with static vs dynamic call targets
3329     callinsns <- (case target of
3330         CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> do
3331                 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3332         CmmForeignCall expr conv -> do
3333                 (dyn_c, [dyn_r]) <- arg_to_int_vregs expr
3334                 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3335         CmmPrim mop -> do
3336                   (res, reduce) <- outOfLineFloatOp mop
3337                   lblOrMopExpr <- case res of
3338                        Left lbl -> do
3339                             return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3340                        Right mopExpr -> do
3341                             (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
3342                             return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3343                   if reduce then panic "genCCall(sparc): can not reduce" else return lblOrMopExpr
3344
3345       )
3346     let
3347         argcode = concatOL argcodes
3348         (move_sp_down, move_sp_up)
3349            = let diff = length vregs - n_argRegs
3350                  nn   = if odd diff then diff + 1 else diff -- keep 8-byte alignment
3351              in  if   nn <= 0
3352                  then (nilOL, nilOL)
3353                  else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
3354         transfer_code
3355            = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
3356     return (argcode       `appOL`
3357             move_sp_down  `appOL`
3358             transfer_code `appOL`
3359             callinsns     `appOL`
3360             unitOL NOP    `appOL`
3361             move_sp_up)
3362   where
3363      -- move args from the integer vregs into which they have been
3364      -- marshalled, into %o0 .. %o5, and the rest onto the stack.
3365      move_final :: [Reg] -> [Reg] -> Int -> [Instr]
3366
3367      move_final [] _ offset          -- all args done
3368         = []
3369
3370      move_final (v:vs) [] offset     -- out of aregs; move to stack
3371         = ST I32 v (spRel offset)
3372           : move_final vs [] (offset+1)
3373
3374      move_final (v:vs) (a:az) offset -- move into an arg (%o[0..5]) reg
3375         = OR False g0 (RIReg v) a
3376           : move_final vs az offset
3377
3378      -- generate code to calculate an argument, and move it into one
3379      -- or two integer vregs.
3380      arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
3381      arg_to_int_vregs arg
3382         | (cmmExprRep arg) == I64
3383         = do
3384           (ChildCode64 code r_lo) <- iselExpr64 arg
3385           let
3386               r_hi = getHiVRegFromLo r_lo
3387           return (code, [r_hi, r_lo])
3388         | otherwise
3389         = do
3390           (src, code) <- getSomeReg arg
3391           tmp <- getNewRegNat (cmmExprRep arg)
3392           let
3393               pk   = cmmExprRep arg
3394           case pk of
3395              F64 -> do
3396                       v1 <- getNewRegNat I32
3397                       v2 <- getNewRegNat I32
3398                       return (
3399                         code                          `snocOL`
3400                         FMOV F64 src f0                `snocOL`
3401                         ST   F32  f0 (spRel 16)         `snocOL`
3402                         LD   I32  (spRel 16) v1         `snocOL`
3403                         ST   F32  (fPair f0) (spRel 16) `snocOL`
3404                         LD   I32  (spRel 16) v2
3405                         ,
3406                         [v1,v2]
3407                        )
3408              F32 -> do
3409                       v1 <- getNewRegNat I32
3410                       return (
3411                         code                    `snocOL`
3412                         ST   F32  src (spRel 16)  `snocOL`
3413                         LD   I32  (spRel 16) v1
3414                         ,
3415                         [v1]
3416                        )
3417              other -> do
3418                         v1 <- getNewRegNat I32
3419                         return (
3420                           code `snocOL` OR False g0 (RIReg src) v1
3421                           ,
3422                           [v1]
3423                          )
3424 outOfLineFloatOp mop =
3425     do
3426       mopExpr <- cmmMakeDynamicReference addImportNat True $
3427                   mkForeignLabel functionName Nothing True
3428       let mopLabelOrExpr = case mopExpr of
3429                         CmmLit (CmmLabel lbl) -> Left lbl
3430                         _ -> Right mopExpr
3431       return (mopLabelOrExpr, reduce)
3432             where
3433                 (reduce, functionName) = case mop of
3434                   MO_F32_Exp    -> (True,  FSLIT("exp"))
3435                   MO_F32_Log    -> (True,  FSLIT("log"))
3436                   MO_F32_Sqrt   -> (True,  FSLIT("sqrt"))
3437
3438                   MO_F32_Sin    -> (True,  FSLIT("sin"))
3439                   MO_F32_Cos    -> (True,  FSLIT("cos"))
3440                   MO_F32_Tan    -> (True,  FSLIT("tan"))
3441
3442                   MO_F32_Asin   -> (True,  FSLIT("asin"))
3443                   MO_F32_Acos   -> (True,  FSLIT("acos"))
3444                   MO_F32_Atan   -> (True,  FSLIT("atan"))
3445
3446                   MO_F32_Sinh   -> (True,  FSLIT("sinh"))
3447                   MO_F32_Cosh   -> (True,  FSLIT("cosh"))
3448                   MO_F32_Tanh   -> (True,  FSLIT("tanh"))
3449
3450                   MO_F64_Exp    -> (False, FSLIT("exp"))
3451                   MO_F64_Log    -> (False, FSLIT("log"))
3452                   MO_F64_Sqrt   -> (False, FSLIT("sqrt"))
3453
3454                   MO_F64_Sin    -> (False, FSLIT("sin"))
3455                   MO_F64_Cos    -> (False, FSLIT("cos"))
3456                   MO_F64_Tan    -> (False, FSLIT("tan"))
3457
3458                   MO_F64_Asin   -> (False, FSLIT("asin"))
3459                   MO_F64_Acos   -> (False, FSLIT("acos"))
3460                   MO_F64_Atan   -> (False, FSLIT("atan"))
3461
3462                   MO_F64_Sinh   -> (False, FSLIT("sinh"))
3463                   MO_F64_Cosh   -> (False, FSLIT("cosh"))
3464                   MO_F64_Tanh   -> (False, FSLIT("tanh"))
3465
3466                   other -> pprPanic "outOfLineFloatOp(sparc) "
3467                                 (pprCallishMachOp mop)
3468
3469 #endif /* sparc_TARGET_ARCH */
3470
3471 #if powerpc_TARGET_ARCH
3472
3473 #if darwin_TARGET_OS || linux_TARGET_OS
3474 {-
3475     The PowerPC calling convention for Darwin/Mac OS X
3476     is described in Apple's document
3477     "Inside Mac OS X - Mach-O Runtime Architecture".
3478
3479     PowerPC Linux uses the System V Release 4 Calling Convention
3480     for PowerPC. It is described in the
3481     "System V Application Binary Interface PowerPC Processor Supplement".
3482
3483     Both conventions are similar:
3484     Parameters may be passed in general-purpose registers starting at r3, in
3485     floating point registers starting at f1, or on the stack.
3486
3487     But there are substantial differences:
3488     * The number of registers used for parameter passing and the exact set of
3489       nonvolatile registers differs (see MachRegs.lhs).
3490     * On Darwin, stack space is always reserved for parameters, even if they are
3491       passed in registers. The called routine may choose to save parameters from
3492       registers to the corresponding space on the stack.
3493     * On Darwin, a corresponding amount of GPRs is skipped when a floating point
3494       parameter is passed in an FPR.
3495     * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
3496       starting with an odd-numbered GPR. It may skip a GPR to achieve this.
3497       Darwin just treats an I64 like two separate I32s (high word first).
3498     * I64 and F64 arguments are 8-byte aligned on the stack for SysV, but only
3499       4-byte aligned like everything else on Darwin.
3500     * The SysV spec claims that F32 is represented as F64 on the stack. GCC on
3501       PowerPC Linux does not agree, so neither do we.
3502
3503     According to both conventions, The parameter area should be part of the
3504     caller's stack frame, allocated in the caller's prologue code (large enough
3505     to hold the parameter lists for all called routines). The NCG already
3506     uses the stack for register spilling, leaving 64 bytes free at the top.
3507     If we need a larger parameter area than that, we just allocate a new stack
3508     frame just before ccalling.
3509 -}
3510
3511 genCCall target dest_regs argsAndHints vols
3512   = ASSERT (not $ any (`elem` [I8,I16]) argReps)
3513         -- we rely on argument promotion in the codeGen
3514     do
3515         (finalStack,passArgumentsCode,usedRegs) <- passArguments
3516                                                         (zip args argReps)
3517                                                         allArgRegs allFPArgRegs
3518                                                         initialStackOffset
3519                                                         (toOL []) []
3520
3521         (labelOrExpr, reduceToF32) <- case target of
3522             CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
3523             CmmForeignCall expr conv -> return  (Right expr, False)
3524             CmmPrim mop -> outOfLineFloatOp mop
3525
3526         let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
3527             codeAfter = move_sp_up finalStack `appOL` moveResult reduceToF32
3528
3529         case labelOrExpr of
3530             Left lbl -> do
3531                 return (         codeBefore
3532                         `snocOL` BL lbl usedRegs
3533                         `appOL`  codeAfter)
3534             Right dyn -> do
3535                 (dynReg, dynCode) <- getSomeReg dyn
3536                 return (         dynCode
3537                         `snocOL` MTCTR dynReg
3538                         `appOL`  codeBefore
3539                         `snocOL` BCTRL usedRegs
3540                         `appOL`  codeAfter)
3541     where
3542 #if darwin_TARGET_OS
3543         initialStackOffset = 24
3544             -- size of linkage area + size of arguments, in bytes
3545         stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
3546                                        map machRepByteWidth argReps
3547 #elif linux_TARGET_OS
3548         initialStackOffset = 8
3549         stackDelta finalStack = roundTo 16 finalStack
3550 #endif
3551         args = map fst argsAndHints
3552         argReps = map cmmExprRep args
3553
3554         roundTo a x | x `mod` a == 0 = x
3555                     | otherwise = x + a - (x `mod` a)
3556
3557         move_sp_down finalStack
3558                | delta > 64 =
3559                         toOL [STU I32 sp (AddrRegImm sp (ImmInt (-delta))),
3560                               DELTA (-delta)]
3561                | otherwise = nilOL
3562                where delta = stackDelta finalStack
3563         move_sp_up finalStack
3564                | delta > 64 =
3565                         toOL [ADD sp sp (RIImm (ImmInt delta)),
3566                               DELTA 0]
3567                | otherwise = nilOL
3568                where delta = stackDelta finalStack
3569
3570
3571         passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
3572         passArguments ((arg,I64):args) gprs fprs stackOffset
3573                accumCode accumUsed =
3574             do
3575                 ChildCode64 code vr_lo <- iselExpr64 arg
3576                 let vr_hi = getHiVRegFromLo vr_lo
3577
3578 #if darwin_TARGET_OS
3579                 passArguments args
3580                               (drop 2 gprs)
3581                               fprs
3582                               (stackOffset+8)
3583                               (accumCode `appOL` code
3584                                     `snocOL` storeWord vr_hi gprs stackOffset
3585                                     `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
3586                               ((take 2 gprs) ++ accumUsed)
3587             where
3588                 storeWord vr (gpr:_) offset = MR gpr vr
3589                 storeWord vr [] offset = ST I32 vr (AddrRegImm sp (ImmInt offset))
3590
3591 #elif linux_TARGET_OS
3592                 let stackOffset' = roundTo 8 stackOffset
3593                     stackCode = accumCode `appOL` code
3594                         `snocOL` ST I32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
3595                         `snocOL` ST I32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
3596                     regCode hireg loreg =
3597                         accumCode `appOL` code
3598                             `snocOL` MR hireg vr_hi
3599                             `snocOL` MR loreg vr_lo
3600
3601                 case gprs of
3602                     hireg : loreg : regs | even (length gprs) ->
3603                         passArguments args regs fprs stackOffset
3604                                       (regCode hireg loreg) (hireg : loreg : accumUsed)
3605                     _skipped : hireg : loreg : regs ->
3606                         passArguments args regs fprs stackOffset
3607                                       (regCode hireg loreg) (hireg : loreg : accumUsed)
3608                     _ -> -- only one or no regs left
3609                         passArguments args [] fprs (stackOffset'+8)
3610                                       stackCode accumUsed
3611 #endif
3612
3613         passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
3614             | reg : _ <- regs = do
3615                 register <- getRegister arg
3616                 let code = case register of
3617                             Fixed _ freg fcode -> fcode `snocOL` MR reg freg
3618                             Any _ acode -> acode reg
3619                 passArguments args
3620                               (drop nGprs gprs)
3621                               (drop nFprs fprs)
3622 #if darwin_TARGET_OS
3623         -- The Darwin ABI requires that we reserve stack slots for register parameters
3624                               (stackOffset + stackBytes)
3625 #elif linux_TARGET_OS
3626         -- ... the SysV ABI doesn't.
3627                               stackOffset
3628 #endif
3629                               (accumCode `appOL` code)
3630                               (reg : accumUsed)
3631             | otherwise = do
3632                 (vr, code) <- getSomeReg arg
3633                 passArguments args
3634                               (drop nGprs gprs)
3635                               (drop nFprs fprs)
3636                               (stackOffset' + stackBytes)
3637                               (accumCode `appOL` code `snocOL` ST rep vr stackSlot)
3638                               accumUsed
3639             where
3640 #if darwin_TARGET_OS
3641         -- stackOffset is at least 4-byte aligned
3642         -- The Darwin ABI is happy with that.
3643                 stackOffset' = stackOffset
3644 #else
3645         -- ... the SysV ABI requires 8-byte alignment for doubles.
3646                 stackOffset' | rep == F64 = roundTo 8 stackOffset
3647                              | otherwise  =           stackOffset
3648 #endif
3649                 stackSlot = AddrRegImm sp (ImmInt stackOffset')
3650                 (nGprs, nFprs, stackBytes, regs) = case rep of
3651                     I32 -> (1, 0, 4, gprs)
3652 #if darwin_TARGET_OS
3653         -- The Darwin ABI requires that we skip a corresponding number of GPRs when
3654         -- we use the FPRs.
3655                     F32 -> (1, 1, 4, fprs)
3656                     F64 -> (2, 1, 8, fprs)
3657 #elif linux_TARGET_OS
3658         -- ... the SysV ABI doesn't.
3659                     F32 -> (0, 1, 4, fprs)
3660                     F64 -> (0, 1, 8, fprs)
3661 #endif
3662
3663         moveResult reduceToF32 =
3664             case dest_regs of
3665                 [] -> nilOL
3666                 [(dest, _hint)]
3667                     | reduceToF32 && rep == F32 -> unitOL (FRSP r_dest f1)
3668                     | rep == F32 || rep == F64 -> unitOL (MR r_dest f1)
3669                     | rep == I64 -> toOL [MR (getHiVRegFromLo r_dest) r3,
3670                                           MR r_dest r4]
3671                     | otherwise -> unitOL (MR r_dest r3)
3672                     where rep = cmmRegRep dest
3673                           r_dest = getRegisterReg dest
3674
3675         outOfLineFloatOp mop =
3676             do
3677                 mopExpr <- cmmMakeDynamicReference addImportNat True $
3678                               mkForeignLabel functionName Nothing True
3679                 let mopLabelOrExpr = case mopExpr of
3680                         CmmLit (CmmLabel lbl) -> Left lbl
3681                         _ -> Right mopExpr
3682                 return (mopLabelOrExpr, reduce)
3683             where
3684                 (functionName, reduce) = case mop of
3685                     MO_F32_Exp   -> (FSLIT("exp"), True)
3686                     MO_F32_Log   -> (FSLIT("log"), True)
3687                     MO_F32_Sqrt  -> (FSLIT("sqrt"), True)
3688
3689                     MO_F32_Sin   -> (FSLIT("sin"), True)
3690                     MO_F32_Cos   -> (FSLIT("cos"), True)
3691                     MO_F32_Tan   -> (FSLIT("tan"), True)
3692
3693                     MO_F32_Asin  -> (FSLIT("asin"), True)
3694                     MO_F32_Acos  -> (FSLIT("acos"), True)
3695                     MO_F32_Atan  -> (FSLIT("atan"), True)
3696
3697                     MO_F32_Sinh  -> (FSLIT("sinh"), True)
3698                     MO_F32_Cosh  -> (FSLIT("cosh"), True)
3699                     MO_F32_Tanh  -> (FSLIT("tanh"), True)
3700                     MO_F32_Pwr   -> (FSLIT("pow"), True)
3701
3702                     MO_F64_Exp   -> (FSLIT("exp"), False)
3703                     MO_F64_Log   -> (FSLIT("log"), False)
3704                     MO_F64_Sqrt  -> (FSLIT("sqrt"), False)
3705
3706                     MO_F64_Sin   -> (FSLIT("sin"), False)
3707                     MO_F64_Cos   -> (FSLIT("cos"), False)
3708                     MO_F64_Tan   -> (FSLIT("tan"), False)
3709
3710                     MO_F64_Asin  -> (FSLIT("asin"), False)
3711                     MO_F64_Acos  -> (FSLIT("acos"), False)
3712                     MO_F64_Atan  -> (FSLIT("atan"), False)
3713
3714                     MO_F64_Sinh  -> (FSLIT("sinh"), False)
3715                     MO_F64_Cosh  -> (FSLIT("cosh"), False)
3716                     MO_F64_Tanh  -> (FSLIT("tanh"), False)
3717                     MO_F64_Pwr   -> (FSLIT("pow"), False)
3718                     other -> pprPanic "genCCall(ppc): unknown callish op"
3719                                     (pprCallishMachOp other)
3720
3721 #endif /* darwin_TARGET_OS || linux_TARGET_OS */
3722
3723 #endif /* powerpc_TARGET_ARCH */
3724
3725
3726 -- -----------------------------------------------------------------------------
3727 -- Generating a table-branch
3728
3729 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
3730
3731 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3732 genSwitch expr ids
3733   | opt_PIC
3734   = do
3735         (reg,e_code) <- getSomeReg expr
3736         lbl <- getNewLabelNat
3737         dynRef <- cmmMakeDynamicReference addImportNat False lbl
3738         (tableReg,t_code) <- getSomeReg $ dynRef
3739         let
3740             jumpTable = map jumpTableEntryRel ids
3741
3742             jumpTableEntryRel Nothing
3743                 = CmmStaticLit (CmmInt 0 wordRep)
3744             jumpTableEntryRel (Just (BlockId id))
3745                 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3746                 where blockLabel = mkAsmTempLabel id
3747
3748             op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
3749                                        (EAIndex reg wORD_SIZE) (ImmInt 0))
3750
3751             code = e_code `appOL` t_code `appOL` toOL [
3752                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3753                             ADD wordRep op (OpReg tableReg),
3754                             JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
3755                     ]
3756         return code
3757   | otherwise
3758   = do
3759         (reg,e_code) <- getSomeReg expr
3760         lbl <- getNewLabelNat
3761         let
3762             jumpTable = map jumpTableEntry ids
3763             op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
3764             code = e_code `appOL` toOL [
3765                     LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3766                     JMP_TBL op [ id | Just id <- ids ]
3767                  ]
3768         -- in
3769         return code
3770 #elif powerpc_TARGET_ARCH
3771 genSwitch expr ids
3772   | opt_PIC
3773   = do
3774         (reg,e_code) <- getSomeReg expr
3775         tmp <- getNewRegNat I32
3776         lbl <- getNewLabelNat
3777         dynRef <- cmmMakeDynamicReference addImportNat False lbl
3778         (tableReg,t_code) <- getSomeReg $ dynRef
3779         let
3780             jumpTable = map jumpTableEntryRel ids
3781
3782             jumpTableEntryRel Nothing
3783                 = CmmStaticLit (CmmInt 0 wordRep)
3784             jumpTableEntryRel (Just (BlockId id))
3785                 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3786                 where blockLabel = mkAsmTempLabel id
3787
3788             code = e_code `appOL` t_code `appOL` toOL [
3789                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3790                             SLW tmp reg (RIImm (ImmInt 2)),
3791                             LD I32 tmp (AddrRegReg tableReg tmp),
3792                             ADD tmp tmp (RIReg tableReg),
3793                             MTCTR tmp,
3794                             BCTR [ id | Just id <- ids ]
3795                     ]
3796         return code
3797   | otherwise
3798   = do
3799         (reg,e_code) <- getSomeReg expr
3800         tmp <- getNewRegNat I32
3801         lbl <- getNewLabelNat
3802         let
3803             jumpTable = map jumpTableEntry ids
3804
3805             code = e_code `appOL` toOL [
3806                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3807                             SLW tmp reg (RIImm (ImmInt 2)),
3808                             ADDIS tmp tmp (HA (ImmCLbl lbl)),
3809                             LD I32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
3810                             MTCTR tmp,
3811                             BCTR [ id | Just id <- ids ]
3812                     ]
3813         return code
3814 #else
3815 genSwitch expr ids = panic "ToDo: genSwitch"
3816 #endif
3817
3818 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordRep)
3819 jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
3820     where blockLabel = mkAsmTempLabel id
3821
3822 -- -----------------------------------------------------------------------------
3823 -- Support bits
3824 -- -----------------------------------------------------------------------------
3825
3826
3827 -- -----------------------------------------------------------------------------
3828 -- 'condIntReg' and 'condFltReg': condition codes into registers
3829
3830 -- Turn those condition codes into integers now (when they appear on
3831 -- the right hand side of an assignment).
3832 --
3833 -- (If applicable) Do not fill the delay slots here; you will confuse the
3834 -- register allocator.
3835
3836 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
3837
3838 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3839
3840 #if alpha_TARGET_ARCH
3841 condIntReg = panic "MachCode.condIntReg (not on Alpha)"
3842 condFltReg = panic "MachCode.condFltReg (not on Alpha)"
3843 #endif /* alpha_TARGET_ARCH */
3844
3845 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3846
3847 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3848
3849 condIntReg cond x y = do
3850   CondCode _ cond cond_code <- condIntCode cond x y
3851   tmp <- getNewRegNat I8
3852   let
3853         code dst = cond_code `appOL` toOL [
3854                     SETCC cond (OpReg tmp),
3855                     MOVZxL I8 (OpReg tmp) (OpReg dst)
3856                   ]
3857   -- in
3858   return (Any I32 code)
3859
3860 #endif
3861
3862 #if i386_TARGET_ARCH
3863
3864 condFltReg cond x y = do
3865   CondCode _ cond cond_code <- condFltCode cond x y
3866   tmp <- getNewRegNat I8
3867   let
3868         code dst = cond_code `appOL` toOL [
3869                     SETCC cond (OpReg tmp),
3870                     MOVZxL I8 (OpReg tmp) (OpReg dst)
3871                   ]
3872   -- in
3873   return (Any I32 code)
3874
3875 #endif
3876
3877 #if x86_64_TARGET_ARCH
3878
3879 condFltReg cond x y = do
3880   CondCode _ cond cond_code <- condFltCode cond x y
3881   tmp1 <- getNewRegNat wordRep
3882   tmp2 <- getNewRegNat wordRep
3883   let
3884         -- We have to worry about unordered operands (eg. comparisons
3885         -- against NaN).  If the operands are unordered, the comparison
3886         -- sets the parity flag, carry flag and zero flag.
3887         -- All comparisons are supposed to return false for unordered
3888         -- operands except for !=, which returns true.
3889         --
3890         -- Optimisation: we don't have to test the parity flag if we
3891         -- know the test has already excluded the unordered case: eg >
3892         -- and >= test for a zero carry flag, which can only occur for
3893         -- ordered operands.
3894         --
3895         -- ToDo: by reversing comparisons we could avoid testing the
3896         -- parity flag in more cases.
3897
3898         code dst =
3899            cond_code `appOL`
3900              (case cond of
3901                 NE  -> or_unordered dst
3902                 GU  -> plain_test   dst
3903                 GEU -> plain_test   dst
3904                 _   -> and_ordered  dst)
3905
3906         plain_test dst = toOL [
3907                     SETCC cond (OpReg tmp1),
3908                     MOVZxL I8 (OpReg tmp1) (OpReg dst)
3909                  ]
3910         or_unordered dst = toOL [
3911                     SETCC cond (OpReg tmp1),
3912                     SETCC PARITY (OpReg tmp2),
3913                     OR I8 (OpReg tmp1) (OpReg tmp2),
3914                     MOVZxL I8 (OpReg tmp2) (OpReg dst)
3915                   ]
3916         and_ordered dst = toOL [
3917                     SETCC cond (OpReg tmp1),
3918                     SETCC NOTPARITY (OpReg tmp2),
3919                     AND I8 (OpReg tmp1) (OpReg tmp2),
3920                     MOVZxL I8 (OpReg tmp2) (OpReg dst)
3921                   ]
3922   -- in
3923   return (Any I32 code)
3924
3925 #endif
3926
3927 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3928
3929 #if sparc_TARGET_ARCH
3930
3931 condIntReg EQQ x (CmmLit (CmmInt 0 d)) = do
3932     (src, code) <- getSomeReg x
3933     tmp <- getNewRegNat I32
3934     let
3935         code__2 dst = code `appOL` toOL [
3936             SUB False True g0 (RIReg src) g0,
3937             SUB True False g0 (RIImm (ImmInt (-1))) dst]
3938     return (Any I32 code__2)
3939
3940 condIntReg EQQ x y = do
3941     (src1, code1) <- getSomeReg x
3942     (src2, code2) <- getSomeReg y
3943     tmp1 <- getNewRegNat I32
3944     tmp2 <- getNewRegNat I32
3945     let
3946         code__2 dst = code1 `appOL` code2 `appOL` toOL [
3947             XOR False src1 (RIReg src2) dst,
3948             SUB False True g0 (RIReg dst) g0,
3949             SUB True False g0 (RIImm (ImmInt (-1))) dst]
3950     return (Any I32 code__2)
3951
3952 condIntReg NE x (CmmLit (CmmInt 0 d)) = do
3953     (src, code) <- getSomeReg x
3954     tmp <- getNewRegNat I32
3955     let
3956         code__2 dst = code `appOL` toOL [
3957             SUB False True g0 (RIReg src) g0,
3958             ADD True False g0 (RIImm (ImmInt 0)) dst]
3959     return (Any I32 code__2)
3960
3961 condIntReg NE x y = do
3962     (src1, code1) <- getSomeReg x
3963     (src2, code2) <- getSomeReg y
3964     tmp1 <- getNewRegNat I32
3965     tmp2 <- getNewRegNat I32
3966     let
3967         code__2 dst = code1 `appOL` code2 `appOL` toOL [
3968             XOR False src1 (RIReg src2) dst,
3969             SUB False True g0 (RIReg dst) g0,
3970             ADD True False g0 (RIImm (ImmInt 0)) dst]
3971     return (Any I32 code__2)
3972
3973 condIntReg cond x y = do
3974     BlockId lbl1 <- getBlockIdNat
3975     BlockId lbl2 <- getBlockIdNat
3976     CondCode _ cond cond_code <- condIntCode cond x y
3977     let
3978         code__2 dst = cond_code `appOL` toOL [
3979             BI cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
3980             OR False g0 (RIImm (ImmInt 0)) dst,
3981             BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
3982             NEWBLOCK (BlockId lbl1),
3983             OR False g0 (RIImm (ImmInt 1)) dst,
3984             NEWBLOCK (BlockId lbl2)]
3985     return (Any I32 code__2)
3986
3987 condFltReg cond x y = do
3988     BlockId lbl1 <- getBlockIdNat
3989     BlockId lbl2 <- getBlockIdNat
3990     CondCode _ cond cond_code <- condFltCode cond x y
3991     let
3992         code__2 dst = cond_code `appOL` toOL [
3993             NOP,
3994             BF cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
3995             OR False g0 (RIImm (ImmInt 0)) dst,
3996             BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
3997             NEWBLOCK (BlockId lbl1),
3998             OR False g0 (RIImm (ImmInt 1)) dst,
3999             NEWBLOCK (BlockId lbl2)]
4000     return (Any I32 code__2)
4001
4002 #endif /* sparc_TARGET_ARCH */
4003
4004 #if powerpc_TARGET_ARCH
4005 condReg getCond = do
4006     lbl1 <- getBlockIdNat
4007     lbl2 <- getBlockIdNat
4008     CondCode _ cond cond_code <- getCond
4009     let
4010 {-        code dst = cond_code `appOL` toOL [
4011                 BCC cond lbl1,
4012                 LI dst (ImmInt 0),
4013                 BCC ALWAYS lbl2,
4014                 NEWBLOCK lbl1,
4015                 LI dst (ImmInt 1),
4016                 BCC ALWAYS lbl2,
4017                 NEWBLOCK lbl2
4018             ]-}
4019         code dst = cond_code
4020             `appOL` negate_code
4021             `appOL` toOL [
4022                 MFCR dst,
4023                 RLWINM dst dst (bit + 1) 31 31
4024             ]
4025
4026         negate_code | do_negate = unitOL (CRNOR bit bit bit)
4027                     | otherwise = nilOL
4028
4029         (bit, do_negate) = case cond of
4030             LTT -> (0, False)
4031             LE  -> (1, True)
4032             EQQ -> (2, False)
4033             GE  -> (0, True)
4034             GTT -> (1, False)
4035
4036             NE  -> (2, True)
4037
4038             LU  -> (0, False)
4039             LEU -> (1, True)
4040             GEU -> (0, True)
4041             GU  -> (1, False)
4042
4043     return (Any I32 code)
4044
4045 condIntReg cond x y = condReg (condIntCode cond x y)
4046 condFltReg cond x y = condReg (condFltCode cond x y)
4047 #endif /* powerpc_TARGET_ARCH */
4048
4049
4050 -- -----------------------------------------------------------------------------
4051 -- 'trivial*Code': deal with trivial instructions
4052
4053 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
4054 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
4055 -- Only look for constants on the right hand side, because that's
4056 -- where the generic optimizer will have put them.
4057
4058 -- Similarly, for unary instructions, we don't have to worry about
4059 -- matching an StInt as the argument, because genericOpt will already
4060 -- have handled the constant-folding.
4061
4062 trivialCode
4063     :: MachRep
4064     -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
4065       ,IF_ARCH_i386 ((Operand -> Operand -> Instr)
4066                      -> Maybe (Operand -> Operand -> Instr)
4067       ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr)
4068                      -> Maybe (Operand -> Operand -> Instr)
4069       ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
4070       ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
4071       ,)))))
4072     -> CmmExpr -> CmmExpr -- the two arguments
4073     -> NatM Register
4074
4075 #ifndef powerpc_TARGET_ARCH
4076 trivialFCode
4077     :: MachRep
4078     -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
4079       ,IF_ARCH_sparc((MachRep -> Reg -> Reg -> Reg -> Instr)
4080       ,IF_ARCH_i386 ((MachRep -> Reg -> Reg -> Reg -> Instr)
4081       ,IF_ARCH_x86_64 ((MachRep -> Operand -> Operand -> Instr)
4082       ,))))
4083     -> CmmExpr -> CmmExpr -- the two arguments
4084     -> NatM Register
4085 #endif
4086
4087 trivialUCode
4088     :: MachRep
4089     -> IF_ARCH_alpha((RI -> Reg -> Instr)
4090       ,IF_ARCH_i386 ((Operand -> Instr)
4091       ,IF_ARCH_x86_64 ((Operand -> Instr)
4092       ,IF_ARCH_sparc((RI -> Reg -> Instr)
4093       ,IF_ARCH_powerpc((Reg -> Reg -> Instr)
4094       ,)))))
4095     -> CmmExpr  -- the one argument
4096     -> NatM Register
4097
4098 #ifndef powerpc_TARGET_ARCH
4099 trivialUFCode
4100     :: MachRep
4101     -> IF_ARCH_alpha((Reg -> Reg -> Instr)
4102       ,IF_ARCH_i386 ((Reg -> Reg -> Instr)
4103       ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr)
4104       ,IF_ARCH_sparc((Reg -> Reg -> Instr)
4105       ,))))
4106     -> CmmExpr -- the one argument
4107     -> NatM Register
4108 #endif
4109
4110 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4111
4112 #if alpha_TARGET_ARCH
4113
4114 trivialCode instr x (StInt y)
4115   | fits8Bits y
4116   = getRegister x               `thenNat` \ register ->
4117     getNewRegNat IntRep         `thenNat` \ tmp ->
4118     let
4119         code = registerCode register tmp
4120         src1 = registerName register tmp
4121         src2 = ImmInt (fromInteger y)
4122         code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst)
4123     in
4124     return (Any IntRep code__2)
4125
4126 trivialCode instr x y
4127   = getRegister x               `thenNat` \ register1 ->
4128     getRegister y               `thenNat` \ register2 ->
4129     getNewRegNat IntRep         `thenNat` \ tmp1 ->
4130     getNewRegNat IntRep         `thenNat` \ tmp2 ->
4131     let
4132         code1 = registerCode register1 tmp1 []
4133         src1  = registerName register1 tmp1
4134         code2 = registerCode register2 tmp2 []
4135         src2  = registerName register2 tmp2
4136         code__2 dst = asmSeqThen [code1, code2] .
4137                      mkSeqInstr (instr src1 (RIReg src2) dst)
4138     in
4139     return (Any IntRep code__2)
4140
4141 ------------
4142 trivialUCode instr x
4143   = getRegister x               `thenNat` \ register ->
4144     getNewRegNat IntRep         `thenNat` \ tmp ->
4145     let
4146         code = registerCode register tmp
4147         src  = registerName register tmp
4148         code__2 dst = code . mkSeqInstr (instr (RIReg src) dst)
4149     in
4150     return (Any IntRep code__2)
4151
4152 ------------
4153 trivialFCode _ instr x y
4154   = getRegister x               `thenNat` \ register1 ->
4155     getRegister y               `thenNat` \ register2 ->
4156     getNewRegNat F64    `thenNat` \ tmp1 ->
4157     getNewRegNat F64    `thenNat` \ tmp2 ->
4158     let
4159         code1 = registerCode register1 tmp1
4160         src1  = registerName register1 tmp1
4161
4162         code2 = registerCode register2 tmp2
4163         src2  = registerName register2 tmp2
4164
4165         code__2 dst = asmSeqThen [code1 [], code2 []] .
4166                       mkSeqInstr (instr src1 src2 dst)
4167     in
4168     return (Any F64 code__2)
4169
4170 trivialUFCode _ instr x
4171   = getRegister x               `thenNat` \ register ->
4172     getNewRegNat F64    `thenNat` \ tmp ->
4173     let
4174         code = registerCode register tmp
4175         src  = registerName register tmp
4176         code__2 dst = code . mkSeqInstr (instr src dst)
4177     in
4178     return (Any F64 code__2)
4179
4180 #endif /* alpha_TARGET_ARCH */
4181
4182 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4183
4184 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4185
4186 {-
4187 The Rules of the Game are:
4188
4189 * You cannot assume anything about the destination register dst;
4190   it may be anything, including a fixed reg.
4191
4192 * You may compute an operand into a fixed reg, but you may not
4193   subsequently change the contents of that fixed reg.  If you
4194   want to do so, first copy the value either to a temporary
4195   or into dst.  You are free to modify dst even if it happens
4196   to be a fixed reg -- that's not your problem.
4197
4198 * You cannot assume that a fixed reg will stay live over an
4199   arbitrary computation.  The same applies to the dst reg.
4200
4201 * Temporary regs obtained from getNewRegNat are distinct from
4202   each other and from all other regs, and stay live over
4203   arbitrary computations.
4204
4205 --------------------
4206
4207 SDM's version of The Rules:
4208
4209 * If getRegister returns Any, that means it can generate correct
4210   code which places the result in any register, period.  Even if that
4211   register happens to be read during the computation.
4212
4213   Corollary #1: this means that if you are generating code for an
4214   operation with two arbitrary operands, you cannot assign the result
4215   of the first operand into the destination register before computing
4216   the second operand.  The second operand might require the old value
4217   of the destination register.
4218
4219   Corollary #2: A function might be able to generate more efficient
4220   code if it knows the destination register is a new temporary (and
4221   therefore not read by any of the sub-computations).
4222
4223 * If getRegister returns Any, then the code it generates may modify only:
4224         (a) fresh temporaries
4225         (b) the destination register
4226         (c) known registers (eg. %ecx is used by shifts)
4227   In particular, it may *not* modify global registers, unless the global
4228   register happens to be the destination register.
4229 -}
4230
4231 trivialCode rep instr (Just revinstr) (CmmLit lit_a) b
4232   | not (is64BitLit lit_a) = do
4233   b_code <- getAnyReg b
4234   let
4235        code dst
4236          = b_code dst `snocOL`
4237            revinstr (OpImm (litToImm lit_a)) (OpReg dst)
4238   -- in
4239   return (Any rep code)
4240
4241 trivialCode rep instr maybe_revinstr a b = genTrivialCode rep instr a b
4242
4243 -- This is re-used for floating pt instructions too.
4244 genTrivialCode rep instr a b = do
4245   (b_op, b_code) <- getNonClobberedOperand b
4246   a_code <- getAnyReg a
4247   tmp <- getNewRegNat rep
4248   let
4249      -- We want the value of b to stay alive across the computation of a.
4250      -- But, we want to calculate a straight into the destination register,
4251      -- because the instruction only has two operands (dst := dst `op` src).
4252      -- The troublesome case is when the result of b is in the same register
4253      -- as the destination reg.  In this case, we have to save b in a
4254      -- new temporary across the computation of a.
4255      code dst
4256         | dst `regClashesWithOp` b_op =
4257                 b_code `appOL`
4258                 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
4259                 a_code dst `snocOL`
4260                 instr (OpReg tmp) (OpReg dst)
4261         | otherwise =
4262                 b_code `appOL`
4263                 a_code dst `snocOL`
4264                 instr b_op (OpReg dst)
4265   -- in
4266   return (Any rep code)
4267
4268 reg `regClashesWithOp` OpReg reg2   = reg == reg2
4269 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
4270 reg `regClashesWithOp` _            = False
4271
4272 -----------
4273
4274 trivialUCode rep instr x = do
4275   x_code <- getAnyReg x
4276   let
4277      code dst =
4278         x_code dst `snocOL`
4279         instr (OpReg dst)
4280   -- in
4281   return (Any rep code)
4282
4283 -----------
4284
4285 #if i386_TARGET_ARCH
4286
4287 trivialFCode pk instr x y = do
4288   (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
4289   (y_reg, y_code) <- getSomeReg y
4290   let
4291      code dst =
4292         x_code `appOL`
4293         y_code `snocOL`
4294         instr pk x_reg y_reg dst
4295   -- in
4296   return (Any pk code)
4297
4298 #endif
4299
4300 #if x86_64_TARGET_ARCH
4301
4302 trivialFCode pk instr x y = genTrivialCode  pk (instr pk) x y
4303
4304 #endif
4305
4306 -------------
4307
4308 trivialUFCode rep instr x = do
4309   (x_reg, x_code) <- getSomeReg x
4310   let
4311      code dst =
4312         x_code `snocOL`
4313         instr x_reg dst
4314   -- in
4315   return (Any rep code)
4316
4317 #endif /* i386_TARGET_ARCH */
4318
4319 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4320
4321 #if sparc_TARGET_ARCH
4322
4323 trivialCode pk instr x (CmmLit (CmmInt y d))
4324   | fits13Bits y
4325   = do
4326       (src1, code) <- getSomeReg x
4327       tmp <- getNewRegNat I32
4328       let
4329         src2 = ImmInt (fromInteger y)
4330         code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
4331       return (Any I32 code__2)
4332
4333 trivialCode pk instr x y = do
4334     (src1, code1) <- getSomeReg x
4335     (src2, code2) <- getSomeReg y
4336     tmp1 <- getNewRegNat I32
4337     tmp2 <- getNewRegNat I32
4338     let
4339         code__2 dst = code1 `appOL` code2 `snocOL`
4340                       instr src1 (RIReg src2) dst
4341     return (Any I32 code__2)
4342
4343 ------------
4344 trivialFCode pk instr x y = do
4345     (src1, code1) <- getSomeReg x
4346     (src2, code2) <- getSomeReg y
4347     tmp1 <- getNewRegNat (cmmExprRep x)
4348     tmp2 <- getNewRegNat (cmmExprRep y)
4349     tmp <- getNewRegNat F64
4350     let
4351         promote x = FxTOy F32 F64 x tmp
4352
4353         pk1   = cmmExprRep x
4354         pk2   = cmmExprRep y
4355
4356         code__2 dst =
4357                 if pk1 == pk2 then
4358                     code1 `appOL` code2 `snocOL`
4359                     instr pk src1 src2 dst
4360                 else if pk1 == F32 then
4361                     code1 `snocOL` promote src1 `appOL` code2 `snocOL`
4362                     instr F64 tmp src2 dst
4363                 else
4364                     code1 `appOL` code2 `snocOL` promote src2 `snocOL`
4365                     instr F64 src1 tmp dst
4366     return (Any (if pk1 == pk2 then pk1 else F64) code__2)
4367
4368 ------------
4369 trivialUCode pk instr x = do
4370     (src, code) <- getSomeReg x
4371     tmp <- getNewRegNat pk
4372     let
4373         code__2 dst = code `snocOL` instr (RIReg src) dst
4374     return (Any pk code__2)
4375
4376 -------------
4377 trivialUFCode pk instr x = do
4378     (src, code) <- getSomeReg x
4379     tmp <- getNewRegNat pk
4380     let
4381         code__2 dst = code `snocOL` instr src dst
4382     return (Any pk code__2)
4383
4384 #endif /* sparc_TARGET_ARCH */
4385
4386 #if powerpc_TARGET_ARCH
4387
4388 {-
4389 Wolfgang's PowerPC version of The Rules:
4390
4391 A slightly modified version of The Rules to take advantage of the fact
4392 that PowerPC instructions work on all registers and don't implicitly
4393 clobber any fixed registers.
4394
4395 * The only expression for which getRegister returns Fixed is (CmmReg reg).
4396
4397 * If getRegister returns Any, then the code it generates may modify only:
4398         (a) fresh temporaries
4399         (b) the destination register
4400   It may *not* modify global registers, unless the global
4401   register happens to be the destination register.
4402   It may not clobber any other registers. In fact, only ccalls clobber any
4403   fixed registers.
4404   Also, it may not modify the counter register (used by genCCall).
4405
4406   Corollary: If a getRegister for a subexpression returns Fixed, you need
4407   not move it to a fresh temporary before evaluating the next subexpression.
4408   The Fixed register won't be modified.
4409   Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
4410
4411 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
4412   the value of the destination register.
4413 -}
4414
4415 trivialCode rep signed instr x (CmmLit (CmmInt y _))
4416     | Just imm <- makeImmediate rep signed y
4417     = do
4418         (src1, code1) <- getSomeReg x
4419         let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
4420         return (Any rep code)
4421
4422 trivialCode rep signed instr x y = do
4423     (src1, code1) <- getSomeReg x
4424     (src2, code2) <- getSomeReg y
4425     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
4426     return (Any rep code)
4427
4428 trivialCodeNoImm :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4429     -> CmmExpr -> CmmExpr -> NatM Register
4430 trivialCodeNoImm rep instr x y = do
4431     (src1, code1) <- getSomeReg x
4432     (src2, code2) <- getSomeReg y
4433     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
4434     return (Any rep code)
4435
4436 trivialUCode rep instr x = do
4437     (src, code) <- getSomeReg x
4438     let code' dst = code `snocOL` instr dst src
4439     return (Any rep code')
4440
4441 -- There is no "remainder" instruction on the PPC, so we have to do
4442 -- it the hard way.
4443 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
4444
4445 remainderCode :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4446     -> CmmExpr -> CmmExpr -> NatM Register
4447 remainderCode rep div x y = do
4448     (src1, code1) <- getSomeReg x
4449     (src2, code2) <- getSomeReg y
4450     let code dst = code1 `appOL` code2 `appOL` toOL [
4451                 div dst src1 src2,
4452                 MULLW dst dst (RIReg src2),
4453                 SUBF dst dst src1
4454             ]
4455     return (Any rep code)
4456
4457 #endif /* powerpc_TARGET_ARCH */
4458
4459
4460 -- -----------------------------------------------------------------------------
4461 --  Coercing to/from integer/floating-point...
4462
4463 -- @coerce(Int2FP|FP2Int)@ are more complicated integer/float
4464 -- conversions.  We have to store temporaries in memory to move
4465 -- between the integer and the floating point register sets.
4466
4467 -- @coerceDbl2Flt@ and @coerceFlt2Dbl@ are done this way because we
4468 -- pretend, on sparc at least, that double and float regs are seperate
4469 -- kinds, so the value has to be computed into one kind before being
4470 -- explicitly "converted" to live in the other kind.
4471
4472 coerceInt2FP :: MachRep -> MachRep -> CmmExpr -> NatM Register
4473 coerceFP2Int :: MachRep -> MachRep -> CmmExpr -> NatM Register
4474
4475 #if sparc_TARGET_ARCH
4476 coerceDbl2Flt :: CmmExpr -> NatM Register
4477 coerceFlt2Dbl :: CmmExpr -> NatM Register
4478 #endif
4479
4480 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4481
4482 #if alpha_TARGET_ARCH
4483
4484 coerceInt2FP _ x
4485   = getRegister x               `thenNat` \ register ->
4486     getNewRegNat IntRep         `thenNat` \ reg ->
4487     let
4488         code = registerCode register reg
4489         src  = registerName register reg
4490
4491         code__2 dst = code . mkSeqInstrs [
4492             ST Q src (spRel 0),
4493             LD TF dst (spRel 0),
4494             CVTxy Q TF dst dst]
4495     in
4496     return (Any F64 code__2)
4497
4498 -------------
4499 coerceFP2Int x
4500   = getRegister x               `thenNat` \ register ->
4501     getNewRegNat F64    `thenNat` \ tmp ->
4502     let
4503         code = registerCode register tmp
4504         src  = registerName register tmp
4505
4506         code__2 dst = code . mkSeqInstrs [
4507             CVTxy TF Q src tmp,
4508             ST TF tmp (spRel 0),
4509             LD Q dst (spRel 0)]
4510     in
4511     return (Any IntRep code__2)
4512
4513 #endif /* alpha_TARGET_ARCH */
4514
4515 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4516
4517 #if i386_TARGET_ARCH
4518
4519 coerceInt2FP from to x = do
4520   (x_reg, x_code) <- getSomeReg x
4521   let
4522         opc  = case to of F32 -> GITOF; F64 -> GITOD
4523         code dst = x_code `snocOL` opc x_reg dst
4524         -- ToDo: works for non-I32 reps?
4525   -- in
4526   return (Any to code)
4527
4528 ------------
4529
4530 coerceFP2Int from to x = do
4531   (x_reg, x_code) <- getSomeReg x
4532   let
4533         opc  = case from of F32 -> GFTOI; F64 -> GDTOI
4534         code dst = x_code `snocOL` opc x_reg dst
4535         -- ToDo: works for non-I32 reps?
4536   -- in
4537   return (Any to code)
4538
4539 #endif /* i386_TARGET_ARCH */
4540
4541 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4542
4543 #if x86_64_TARGET_ARCH
4544
4545 coerceFP2Int from to x = do
4546   (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
4547   let
4548         opc  = case from of F32 -> CVTSS2SI; F64 -> CVTSD2SI
4549         code dst = x_code `snocOL` opc x_op dst
4550   -- in
4551   return (Any to code) -- works even if the destination rep is <I32
4552
4553 coerceInt2FP from to x = do
4554   (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
4555   let
4556         opc  = case to of F32 -> CVTSI2SS; F64 -> CVTSI2SD
4557         code dst = x_code `snocOL` opc x_op dst
4558   -- in
4559   return (Any to code) -- works even if the destination rep is <I32
4560
4561 coerceFP2FP :: MachRep -> CmmExpr -> NatM Register
4562 coerceFP2FP to x = do
4563   (x_reg, x_code) <- getSomeReg x
4564   let
4565         opc  = case to of F32 -> CVTSD2SS; F64 -> CVTSS2SD
4566         code dst = x_code `snocOL` opc x_reg dst
4567   -- in
4568   return (Any to code)
4569
4570 #endif
4571
4572 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4573
4574 #if sparc_TARGET_ARCH
4575
4576 coerceInt2FP pk1 pk2 x = do
4577     (src, code) <- getSomeReg x
4578     let
4579         code__2 dst = code `appOL` toOL [
4580             ST pk1 src (spRel (-2)),
4581             LD pk1 (spRel (-2)) dst,
4582             FxTOy pk1 pk2 dst dst]
4583     return (Any pk2 code__2)
4584
4585 ------------
4586 coerceFP2Int pk fprep x = do
4587     (src, code) <- getSomeReg x
4588     reg <- getNewRegNat fprep
4589     tmp <- getNewRegNat pk
4590     let
4591         code__2 dst = ASSERT(fprep == F64 || fprep == F32)
4592             code `appOL` toOL [
4593             FxTOy fprep pk src tmp,
4594             ST pk tmp (spRel (-2)),
4595             LD pk (spRel (-2)) dst]
4596     return (Any pk code__2)
4597
4598 ------------
4599 coerceDbl2Flt x = do
4600     (src, code) <- getSomeReg x
4601     return (Any F32 (\dst -> code `snocOL` FxTOy F64 F32 src dst))
4602
4603 ------------
4604 coerceFlt2Dbl x = do
4605     (src, code) <- getSomeReg x
4606     return (Any F64 (\dst -> code `snocOL` FxTOy F32 F64 src dst))
4607
4608 #endif /* sparc_TARGET_ARCH */
4609
4610 #if powerpc_TARGET_ARCH
4611 coerceInt2FP fromRep toRep x = do
4612     (src, code) <- getSomeReg x
4613     lbl <- getNewLabelNat
4614     itmp <- getNewRegNat I32
4615     ftmp <- getNewRegNat F64
4616     dynRef <- cmmMakeDynamicReference addImportNat False lbl
4617     Amode addr addr_code <- getAmode dynRef
4618     let
4619         code' dst = code `appOL` maybe_exts `appOL` toOL [
4620                 LDATA ReadOnlyData
4621                                 [CmmDataLabel lbl,
4622                                  CmmStaticLit (CmmInt 0x43300000 I32),
4623                                  CmmStaticLit (CmmInt 0x80000000 I32)],
4624                 XORIS itmp src (ImmInt 0x8000),
4625                 ST I32 itmp (spRel 3),
4626                 LIS itmp (ImmInt 0x4330),
4627                 ST I32 itmp (spRel 2),
4628                 LD F64 ftmp (spRel 2)
4629             ] `appOL` addr_code `appOL` toOL [
4630                 LD F64 dst addr,
4631                 FSUB F64 dst ftmp dst
4632             ] `appOL` maybe_frsp dst
4633
4634         maybe_exts = case fromRep of
4635                         I8 ->  unitOL $ EXTS I8 src src
4636                         I16 -> unitOL $ EXTS I16 src src
4637                         I32 -> nilOL
4638         maybe_frsp dst = case toRep of
4639                         F32 -> unitOL $ FRSP dst dst
4640                         F64 -> nilOL
4641     return (Any toRep code')
4642
4643 coerceFP2Int fromRep toRep x = do
4644     -- the reps don't really matter: F*->F64 and I32->I* are no-ops
4645     (src, code) <- getSomeReg x
4646     tmp <- getNewRegNat F64
4647     let
4648         code' dst = code `appOL` toOL [
4649                 -- convert to int in FP reg
4650             FCTIWZ tmp src,
4651                 -- store value (64bit) from FP to stack
4652             ST F64 tmp (spRel 2),
4653                 -- read low word of value (high word is undefined)
4654             LD I32 dst (spRel 3)]
4655     return (Any toRep code')
4656 #endif /* powerpc_TARGET_ARCH */
4657
4658
4659 -- -----------------------------------------------------------------------------
4660 -- eXTRA_STK_ARGS_HERE
4661
4662 -- We (allegedly) put the first six C-call arguments in registers;
4663 -- where do we start putting the rest of them?
4664
4665 -- Moved from MachInstrs (SDM):
4666
4667 #if alpha_TARGET_ARCH || sparc_TARGET_ARCH
4668 eXTRA_STK_ARGS_HERE :: Int
4669 eXTRA_STK_ARGS_HERE
4670   = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???))
4671 #endif
4672