ghc/compiler/nativeGen/MachCodeGen.hs

   1 -----------------------------------------------------------------------------
   2 --
   3 -- Generating machine code (instruction selection)
   4 --
   5 -- (c) The University of Glasgow 1996-2004
   6 --
   7 -----------------------------------------------------------------------------
   8
   9 -- This is a big module, but, if you pay attention to
  10 -- (a) the sectioning, (b) the type signatures, and
  11 -- (c) the #if blah_TARGET_ARCH} things, the
  12 -- structure should not be too overwhelming.
  13
  14 module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where
  15
  16 #include "HsVersions.h"
  17 #include "nativeGen/NCG.h"
  18 #include "MachDeps.h"
  19
  20 -- NCG stuff:
  21 import MachInstrs
  22 import MachRegs
  23 import NCGMonad
  24 import PositionIndependentCode ( cmmMakeDynamicReference, initializePicBase )
  25
  26 -- Our intermediate code:
  27 import PprCmm           ( pprExpr )
  28 import Cmm
  29 import MachOp
  30 import CLabel
  31
  32 -- The rest:
  33 import StaticFlags      ( opt_PIC )
  34 import ForeignCall      ( CCallConv(..) )
  35 import OrdList
  36 import Pretty
  37 import Outputable
  38 import FastString
  39 import FastTypes        ( isFastTrue )
  40 import Constants        ( wORD_SIZE )
  41
  42 #ifdef DEBUG
  43 import Outputable       ( assertPanic )
  44 import TRACE            ( trace )
  45 #endif
  46
  47 import Control.Monad    ( mapAndUnzipM )
  48 import Maybe            ( fromJust )
  49 import DATA_BITS
  50 import DATA_WORD
  51
  52 -- -----------------------------------------------------------------------------
  53 -- Top-level of the instruction selector
  54
  55 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
  56 -- They are really trees of insns to facilitate fast appending, where a
  57 -- left-to-right traversal (pre-order?) yields the insns in the correct
  58 -- order.
  59
  60 type InstrBlock = OrdList Instr
  61
  62 cmmTopCodeGen :: CmmTop -> NatM [NatCmmTop]
  63 cmmTopCodeGen (CmmProc info lab params blocks) = do
  64   (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
  65   picBaseMb <- getPicBaseMaybeNat
  66   let proc = CmmProc info lab params (concat nat_blocks)
  67       tops = proc : concat statics
  68   case picBaseMb of
  69       Just picBase -> initializePicBase picBase tops
  70       Nothing -> return tops
  71
  72 cmmTopCodeGen (CmmData sec dat) = do
  73   return [CmmData sec dat]  -- no translation, we just use CmmStatic
  74
  75 basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop])
  76 basicBlockCodeGen (BasicBlock id stmts) = do
  77   instrs <- stmtsToInstrs stmts
  78   -- code generation may introduce new basic block boundaries, which
  79   -- are indicated by the NEWBLOCK instruction.  We must split up the
  80   -- instruction stream into basic blocks again.  Also, we extract
  81   -- LDATAs here too.
  82   let
  83         (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
  84
  85         mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
  86           = ([], BasicBlock id instrs : blocks, statics)
  87         mkBlocks (LDATA sec dat) (instrs,blocks,statics)
  88           = (instrs, blocks, CmmData sec dat:statics)
  89         mkBlocks instr (instrs,blocks,statics)
  90           = (instr:instrs, blocks, statics)
  91   -- in
  92   return (BasicBlock id top : other_blocks, statics)
  93
  94 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
  95 stmtsToInstrs stmts
  96    = do instrss <- mapM stmtToInstrs stmts
  97         return (concatOL instrss)
  98
  99 stmtToInstrs :: CmmStmt -> NatM InstrBlock
 100 stmtToInstrs stmt = case stmt of
 101     CmmNop         -> return nilOL
 102     CmmComment s   -> return (unitOL (COMMENT s))
 103
 104     CmmAssign reg src
 105       | isFloatingRep kind -> assignReg_FltCode kind reg src
 106 #if WORD_SIZE_IN_BITS==32
 107       | kind == I64        -> assignReg_I64Code      reg src
 108 #endif
 109       | otherwise          -> assignReg_IntCode kind reg src
 110         where kind = cmmRegRep reg
 111
 112     CmmStore addr src
 113       | isFloatingRep kind -> assignMem_FltCode kind addr src
 114 #if WORD_SIZE_IN_BITS==32
 115       | kind == I64      -> assignMem_I64Code      addr src
 116 #endif
 117       | otherwise        -> assignMem_IntCode kind addr src
 118         where kind = cmmExprRep src
 119
 120     CmmCall target result_regs args vols
 121        -> genCCall target result_regs args vols
 122
 123     CmmBranch id          -> genBranch id
 124     CmmCondBranch arg id  -> genCondJump id arg
 125     CmmSwitch arg ids     -> genSwitch arg ids
 126     CmmJump arg params    -> genJump arg
 127
 128 -- -----------------------------------------------------------------------------
 129 -- General things for putting together code sequences
 130
 131 -- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
 132 -- CmmExprs into CmmRegOff?
 133 mangleIndexTree :: CmmExpr -> CmmExpr
 134 mangleIndexTree (CmmRegOff reg off)
 135   = CmmMachOp (MO_Add rep) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) rep)]
 136   where rep = cmmRegRep reg
 137
 138 -- -----------------------------------------------------------------------------
 139 --  Code gen for 64-bit arithmetic on 32-bit platforms
 140
 141 {-
 142 Simple support for generating 64-bit code (ie, 64 bit values and 64
 143 bit assignments) on 32-bit platforms.  Unlike the main code generator
 144 we merely shoot for generating working code as simply as possible, and
 145 pay little attention to code quality.  Specifically, there is no
 146 attempt to deal cleverly with the fixed-vs-floating register
 147 distinction; all values are generated into (pairs of) floating
 148 registers, even if this would mean some redundant reg-reg moves as a
 149 result.  Only one of the VRegUniques is returned, since it will be
 150 of the VRegUniqueLo form, and the upper-half VReg can be determined
 151 by applying getHiVRegFromLo to it.
 152 -}
 153
 154 data ChildCode64        -- a.k.a "Register64"
 155    = ChildCode64
 156         InstrBlock      -- code
 157         Reg             -- the lower 32-bit temporary which contains the
 158                         -- result; use getHiVRegFromLo to find the other
 159                         -- VRegUnique.  Rules of this simplified insn
 160                         -- selection game are therefore that the returned
 161                         -- Reg may be modified
 162
 163 #if WORD_SIZE_IN_BITS==32
 164 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
 165 assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
 166 #endif
 167
 168 #ifndef x86_64_TARGET_ARCH
 169 iselExpr64        :: CmmExpr -> NatM ChildCode64
 170 #endif
 171
 172 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 173
 174 #if i386_TARGET_ARCH
 175
 176 assignMem_I64Code addrTree valueTree = do
 177   Amode addr addr_code <- getAmode addrTree
 178   ChildCode64 vcode rlo <- iselExpr64 valueTree
 179   let
 180         rhi = getHiVRegFromLo rlo
 181
 182         -- Little-endian store
 183         mov_lo = MOV I32 (OpReg rlo) (OpAddr addr)
 184         mov_hi = MOV I32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
 185   -- in
 186   return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 187
 188
 189 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
 190    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 191    let
 192          r_dst_lo = mkVReg u_dst I32
 193          r_dst_hi = getHiVRegFromLo r_dst_lo
 194          r_src_hi = getHiVRegFromLo r_src_lo
 195          mov_lo = MOV I32 (OpReg r_src_lo) (OpReg r_dst_lo)
 196          mov_hi = MOV I32 (OpReg r_src_hi) (OpReg r_dst_hi)
 197    -- in
 198    return (
 199         vcode `snocOL` mov_lo `snocOL` mov_hi
 200      )
 201
 202 assignReg_I64Code lvalue valueTree
 203    = panic "assignReg_I64Code(i386): invalid lvalue"
 204
 205 ------------
 206
 207 iselExpr64 (CmmLit (CmmInt i _)) = do
 208   (rlo,rhi) <- getNewRegPairNat I32
 209   let
 210         r = fromIntegral (fromIntegral i :: Word32)
 211         q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
 212         code = toOL [
 213                 MOV I32 (OpImm (ImmInteger r)) (OpReg rlo),
 214                 MOV I32 (OpImm (ImmInteger q)) (OpReg rhi)
 215                 ]
 216   -- in
 217   return (ChildCode64 code rlo)
 218
 219 iselExpr64 (CmmLoad addrTree I64) = do
 220    Amode addr addr_code <- getAmode addrTree
 221    (rlo,rhi) <- getNewRegPairNat I32
 222    let
 223         mov_lo = MOV I32 (OpAddr addr) (OpReg rlo)
 224         mov_hi = MOV I32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
 225    -- in
 226    return (
 227             ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
 228                         rlo
 229      )
 230
 231 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64)))
 232    = return (ChildCode64 nilOL (mkVReg vu I32))
 233
 234 -- we handle addition, but rather badly
 235 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
 236    ChildCode64 code1 r1lo <- iselExpr64 e1
 237    (rlo,rhi) <- getNewRegPairNat I32
 238    let
 239         r = fromIntegral (fromIntegral i :: Word32)
 240         q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
 241         r1hi = getHiVRegFromLo r1lo
 242         code =  code1 `appOL`
 243                 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
 244                        ADD I32 (OpImm (ImmInteger r)) (OpReg rlo),
 245                        MOV I32 (OpReg r1hi) (OpReg rhi),
 246                        ADC I32 (OpImm (ImmInteger q)) (OpReg rhi) ]
 247    -- in
 248    return (ChildCode64 code rlo)
 249
 250 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
 251    ChildCode64 code1 r1lo <- iselExpr64 e1
 252    ChildCode64 code2 r2lo <- iselExpr64 e2
 253    (rlo,rhi) <- getNewRegPairNat I32
 254    let
 255         r1hi = getHiVRegFromLo r1lo
 256         r2hi = getHiVRegFromLo r2lo
 257         code =  code1 `appOL`
 258                 code2 `appOL`
 259                 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
 260                        ADD I32 (OpReg r2lo) (OpReg rlo),
 261                        MOV I32 (OpReg r1hi) (OpReg rhi),
 262                        ADC I32 (OpReg r2hi) (OpReg rhi) ]
 263    -- in
 264    return (ChildCode64 code rlo)
 265
 266 iselExpr64 expr
 267    = pprPanic "iselExpr64(i386)" (ppr expr)
 268
 269 #endif /* i386_TARGET_ARCH */
 270
 271 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 272
 273 #if sparc_TARGET_ARCH
 274
 275 assignMem_I64Code addrTree valueTree = do
 276      Amode addr addr_code <- getAmode addrTree
 277      ChildCode64 vcode rlo <- iselExpr64 valueTree
 278      (src, code) <- getSomeReg addrTree
 279      let
 280          rhi = getHiVRegFromLo rlo
 281          -- Big-endian store
 282          mov_hi = ST I32 rhi (AddrRegImm src (ImmInt 0))
 283          mov_lo = ST I32 rlo (AddrRegImm src (ImmInt 4))
 284      return (vcode `appOL` code `snocOL` mov_hi `snocOL` mov_lo)
 285
 286 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
 287      ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 288      let
 289          r_dst_lo = mkVReg u_dst pk
 290          r_dst_hi = getHiVRegFromLo r_dst_lo
 291          r_src_hi = getHiVRegFromLo r_src_lo
 292          mov_lo = mkMOV r_src_lo r_dst_lo
 293          mov_hi = mkMOV r_src_hi r_dst_hi
 294          mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
 295      return (vcode `snocOL` mov_hi `snocOL` mov_lo)
 296 assignReg_I64Code lvalue valueTree
 297    = panic "assignReg_I64Code(sparc): invalid lvalue"
 298
 299
 300 -- Don't delete this -- it's very handy for debugging.
 301 --iselExpr64 expr
 302 --   | trace ("iselExpr64: " ++ showSDoc (ppr expr)) False
 303 --   = panic "iselExpr64(???)"
 304
 305 iselExpr64 (CmmLoad addrTree I64) = do
 306      Amode (AddrRegReg r1 r2) addr_code <- getAmode addrTree
 307      rlo <- getNewRegNat I32
 308      let rhi = getHiVRegFromLo rlo
 309          mov_hi = LD I32 (AddrRegImm r1 (ImmInt 0)) rhi
 310          mov_lo = LD I32 (AddrRegImm r1 (ImmInt 4)) rlo
 311      return (
 312             ChildCode64 (addr_code `snocOL` mov_hi `snocOL` mov_lo)
 313                          rlo
 314           )
 315
 316 iselExpr64 (CmmReg (CmmLocal (LocalReg uq I64))) = do
 317      r_dst_lo <-  getNewRegNat I32
 318      let r_dst_hi = getHiVRegFromLo r_dst_lo
 319          r_src_lo = mkVReg uq I32
 320          r_src_hi = getHiVRegFromLo r_src_lo
 321          mov_lo = mkMOV r_src_lo r_dst_lo
 322          mov_hi = mkMOV r_src_hi r_dst_hi
 323          mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
 324      return (
 325             ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo
 326          )
 327
 328 iselExpr64 expr
 329    = pprPanic "iselExpr64(sparc)" (ppr expr)
 330
 331 #endif /* sparc_TARGET_ARCH */
 332
 333 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 334
 335 #if powerpc_TARGET_ARCH
 336
 337 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
 338 getI64Amodes addrTree = do
 339     Amode hi_addr addr_code <- getAmode addrTree
 340     case addrOffset hi_addr 4 of
 341         Just lo_addr -> return (hi_addr, lo_addr, addr_code)
 342         Nothing      -> do (hi_ptr, code) <- getSomeReg addrTree
 343                            return (AddrRegImm hi_ptr (ImmInt 0),
 344                                    AddrRegImm hi_ptr (ImmInt 4),
 345                                    code)
 346
 347 assignMem_I64Code addrTree valueTree = do
 348         (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
 349         ChildCode64 vcode rlo <- iselExpr64 valueTree
 350         let
 351                 rhi = getHiVRegFromLo rlo
 352
 353                 -- Big-endian store
 354                 mov_hi = ST I32 rhi hi_addr
 355                 mov_lo = ST I32 rlo lo_addr
 356         -- in
 357         return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 358
 359 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
 360    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 361    let
 362          r_dst_lo = mkVReg u_dst I32
 363          r_dst_hi = getHiVRegFromLo r_dst_lo
 364          r_src_hi = getHiVRegFromLo r_src_lo
 365          mov_lo = MR r_dst_lo r_src_lo
 366          mov_hi = MR r_dst_hi r_src_hi
 367    -- in
 368    return (
 369         vcode `snocOL` mov_lo `snocOL` mov_hi
 370      )
 371
 372 assignReg_I64Code lvalue valueTree
 373    = panic "assignReg_I64Code(powerpc): invalid lvalue"
 374
 375
 376 -- Don't delete this -- it's very handy for debugging.
 377 --iselExpr64 expr
 378 --   | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
 379 --   = panic "iselExpr64(???)"
 380
 381 iselExpr64 (CmmLoad addrTree I64) = do
 382     (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
 383     (rlo, rhi) <- getNewRegPairNat I32
 384     let mov_hi = LD I32 rhi hi_addr
 385         mov_lo = LD I32 rlo lo_addr
 386     return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
 387                          rlo
 388
 389 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64)))
 390    = return (ChildCode64 nilOL (mkVReg vu I32))
 391
 392 iselExpr64 (CmmLit (CmmInt i _)) = do
 393   (rlo,rhi) <- getNewRegPairNat I32
 394   let
 395         half0 = fromIntegral (fromIntegral i :: Word16)
 396         half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
 397         half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
 398         half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
 399
 400         code = toOL [
 401                 LIS rlo (ImmInt half1),
 402                 OR rlo rlo (RIImm $ ImmInt half0),
 403                 LIS rhi (ImmInt half3),
 404                 OR rlo rlo (RIImm $ ImmInt half2)
 405                 ]
 406   -- in
 407   return (ChildCode64 code rlo)
 408
 409 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
 410    ChildCode64 code1 r1lo <- iselExpr64 e1
 411    ChildCode64 code2 r2lo <- iselExpr64 e2
 412    (rlo,rhi) <- getNewRegPairNat I32
 413    let
 414         r1hi = getHiVRegFromLo r1lo
 415         r2hi = getHiVRegFromLo r2lo
 416         code =  code1 `appOL`
 417                 code2 `appOL`
 418                 toOL [ ADDC rlo r1lo r2lo,
 419                        ADDE rhi r1hi r2hi ]
 420    -- in
 421    return (ChildCode64 code rlo)
 422
 423 iselExpr64 expr
 424    = pprPanic "iselExpr64(powerpc)" (ppr expr)
 425
 426 #endif /* powerpc_TARGET_ARCH */
 427
 428
 429 -- -----------------------------------------------------------------------------
 430 -- The 'Register' type
 431
 432 -- 'Register's passed up the tree.  If the stix code forces the register
 433 -- to live in a pre-decided machine register, it comes out as @Fixed@;
 434 -- otherwise, it comes out as @Any@, and the parent can decide which
 435 -- register to put it in.
 436
 437 data Register
 438   = Fixed   MachRep Reg InstrBlock
 439   | Any     MachRep (Reg -> InstrBlock)
 440
 441 swizzleRegisterRep :: Register -> MachRep -> Register
 442 swizzleRegisterRep (Fixed _ reg code) rep = Fixed rep reg code
 443 swizzleRegisterRep (Any _ codefn)     rep = Any rep codefn
 444
 445
 446 -- -----------------------------------------------------------------------------
 447 -- Utils based on getRegister, below
 448
 449 -- The dual to getAnyReg: compute an expression into a register, but
 450 -- we don't mind which one it is.
 451 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
 452 getSomeReg expr = do
 453   r <- getRegister expr
 454   case r of
 455     Any rep code -> do
 456         tmp <- getNewRegNat rep
 457         return (tmp, code tmp)
 458     Fixed _ reg code ->
 459         return (reg, code)
 460
 461 -- -----------------------------------------------------------------------------
 462 -- Grab the Reg for a CmmReg
 463
 464 getRegisterReg :: CmmReg -> Reg
 465
 466 getRegisterReg (CmmLocal (LocalReg u pk))
 467   = mkVReg u pk
 468
 469 getRegisterReg (CmmGlobal mid)
 470   = case get_GlobalReg_reg_or_addr mid of
 471        Left (RealReg rrno) -> RealReg rrno
 472        _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
 473           -- By this stage, the only MagicIds remaining should be the
 474           -- ones which map to a real machine register on this
 475           -- platform.  Hence ...
 476
 477
 478 -- -----------------------------------------------------------------------------
 479 -- Generate code to get a subtree into a Register
 480
 481 -- Don't delete this -- it's very handy for debugging.
 482 --getRegister expr
 483 --   | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False
 484 --   = panic "getRegister(???)"
 485
 486 getRegister :: CmmExpr -> NatM Register
 487
 488 getRegister (CmmReg (CmmGlobal PicBaseReg))
 489   = do
 490       reg <- getPicBaseNat wordRep
 491       return (Fixed wordRep reg nilOL)
 492
 493 getRegister (CmmReg reg)
 494   = return (Fixed (cmmRegRep reg) (getRegisterReg reg) nilOL)
 495
 496 getRegister tree@(CmmRegOff _ _)
 497   = getRegister (mangleIndexTree tree)
 498
 499 -- end of machine-"independent" bit; here we go on the rest...
 500
 501 #if alpha_TARGET_ARCH
 502
 503 getRegister (StDouble d)
 504   = getBlockIdNat                   `thenNat` \ lbl ->
 505     getNewRegNat PtrRep             `thenNat` \ tmp ->
 506     let code dst = mkSeqInstrs [
 507             LDATA RoDataSegment lbl [
 508                     DATA TF [ImmLab (rational d)]
 509                 ],
 510             LDA tmp (AddrImm (ImmCLbl lbl)),
 511             LD TF dst (AddrReg tmp)]
 512     in
 513         return (Any F64 code)
 514
 515 getRegister (StPrim primop [x]) -- unary PrimOps
 516   = case primop of
 517       IntNegOp -> trivialUCode (NEG Q False) x
 518
 519       NotOp    -> trivialUCode NOT x
 520
 521       FloatNegOp  -> trivialUFCode FloatRep  (FNEG TF) x
 522       DoubleNegOp -> trivialUFCode F64 (FNEG TF) x
 523
 524       OrdOp -> coerceIntCode IntRep x
 525       ChrOp -> chrCode x
 526
 527       Float2IntOp  -> coerceFP2Int    x
 528       Int2FloatOp  -> coerceInt2FP pr x
 529       Double2IntOp -> coerceFP2Int    x
 530       Int2DoubleOp -> coerceInt2FP pr x
 531
 532       Double2FloatOp -> coerceFltCode x
 533       Float2DoubleOp -> coerceFltCode x
 534
 535       other_op -> getRegister (StCall fn CCallConv F64 [x])
 536         where
 537           fn = case other_op of
 538                  FloatExpOp    -> FSLIT("exp")
 539                  FloatLogOp    -> FSLIT("log")
 540                  FloatSqrtOp   -> FSLIT("sqrt")
 541                  FloatSinOp    -> FSLIT("sin")
 542                  FloatCosOp    -> FSLIT("cos")
 543                  FloatTanOp    -> FSLIT("tan")
 544                  FloatAsinOp   -> FSLIT("asin")
 545                  FloatAcosOp   -> FSLIT("acos")
 546                  FloatAtanOp   -> FSLIT("atan")
 547                  FloatSinhOp   -> FSLIT("sinh")
 548                  FloatCoshOp   -> FSLIT("cosh")
 549                  FloatTanhOp   -> FSLIT("tanh")
 550                  DoubleExpOp   -> FSLIT("exp")
 551                  DoubleLogOp   -> FSLIT("log")
 552                  DoubleSqrtOp  -> FSLIT("sqrt")
 553                  DoubleSinOp   -> FSLIT("sin")
 554                  DoubleCosOp   -> FSLIT("cos")
 555                  DoubleTanOp   -> FSLIT("tan")
 556                  DoubleAsinOp  -> FSLIT("asin")
 557                  DoubleAcosOp  -> FSLIT("acos")
 558                  DoubleAtanOp  -> FSLIT("atan")
 559                  DoubleSinhOp  -> FSLIT("sinh")
 560                  DoubleCoshOp  -> FSLIT("cosh")
 561                  DoubleTanhOp  -> FSLIT("tanh")
 562   where
 563     pr = panic "MachCode.getRegister: no primrep needed for Alpha"
 564
 565 getRegister (StPrim primop [x, y]) -- dyadic PrimOps
 566   = case primop of
 567       CharGtOp -> trivialCode (CMP LTT) y x
 568       CharGeOp -> trivialCode (CMP LE) y x
 569       CharEqOp -> trivialCode (CMP EQQ) x y
 570       CharNeOp -> int_NE_code x y
 571       CharLtOp -> trivialCode (CMP LTT) x y
 572       CharLeOp -> trivialCode (CMP LE) x y
 573
 574       IntGtOp  -> trivialCode (CMP LTT) y x
 575       IntGeOp  -> trivialCode (CMP LE) y x
 576       IntEqOp  -> trivialCode (CMP EQQ) x y
 577       IntNeOp  -> int_NE_code x y
 578       IntLtOp  -> trivialCode (CMP LTT) x y
 579       IntLeOp  -> trivialCode (CMP LE) x y
 580
 581       WordGtOp -> trivialCode (CMP ULT) y x
 582       WordGeOp -> trivialCode (CMP ULE) x y
 583       WordEqOp -> trivialCode (CMP EQQ)  x y
 584       WordNeOp -> int_NE_code x y
 585       WordLtOp -> trivialCode (CMP ULT) x y
 586       WordLeOp -> trivialCode (CMP ULE) x y
 587
 588       AddrGtOp -> trivialCode (CMP ULT) y x
 589       AddrGeOp -> trivialCode (CMP ULE) y x
 590       AddrEqOp -> trivialCode (CMP EQQ)  x y
 591       AddrNeOp -> int_NE_code x y
 592       AddrLtOp -> trivialCode (CMP ULT) x y
 593       AddrLeOp -> trivialCode (CMP ULE) x y
 594
 595       FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
 596       FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
 597       FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
 598       FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
 599       FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
 600       FloatLeOp -> cmpF_code (FCMP TF LE) NE x y
 601
 602       DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
 603       DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
 604       DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
 605       DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
 606       DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
 607       DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y
 608
 609       IntAddOp  -> trivialCode (ADD Q False) x y
 610       IntSubOp  -> trivialCode (SUB Q False) x y
 611       IntMulOp  -> trivialCode (MUL Q False) x y
 612       IntQuotOp -> trivialCode (DIV Q False) x y
 613       IntRemOp  -> trivialCode (REM Q False) x y
 614
 615       WordAddOp  -> trivialCode (ADD Q False) x y
 616       WordSubOp  -> trivialCode (SUB Q False) x y
 617       WordMulOp  -> trivialCode (MUL Q False) x y
 618       WordQuotOp -> trivialCode (DIV Q True) x y
 619       WordRemOp  -> trivialCode (REM Q True) x y
 620
 621       FloatAddOp -> trivialFCode  FloatRep (FADD TF) x y
 622       FloatSubOp -> trivialFCode  FloatRep (FSUB TF) x y
 623       FloatMulOp -> trivialFCode  FloatRep (FMUL TF) x y
 624       FloatDivOp -> trivialFCode  FloatRep (FDIV TF) x y
 625
 626       DoubleAddOp -> trivialFCode  F64 (FADD TF) x y
 627       DoubleSubOp -> trivialFCode  F64 (FSUB TF) x y
 628       DoubleMulOp -> trivialFCode  F64 (FMUL TF) x y
 629       DoubleDivOp -> trivialFCode  F64 (FDIV TF) x y
 630
 631       AddrAddOp  -> trivialCode (ADD Q False) x y
 632       AddrSubOp  -> trivialCode (SUB Q False) x y
 633       AddrRemOp  -> trivialCode (REM Q True) x y
 634
 635       AndOp  -> trivialCode AND x y
 636       OrOp   -> trivialCode OR  x y
 637       XorOp  -> trivialCode XOR x y
 638       SllOp  -> trivialCode SLL x y
 639       SrlOp  -> trivialCode SRL x y
 640
 641       ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
 642       ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
 643       ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"
 644
 645       FloatPowerOp  -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
 646       DoublePowerOp -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
 647   where
 648     {- ------------------------------------------------------------
 649         Some bizarre special code for getting condition codes into
 650         registers.  Integer non-equality is a test for equality
 651         followed by an XOR with 1.  (Integer comparisons always set
 652         the result register to 0 or 1.)  Floating point comparisons of
 653         any kind leave the result in a floating point register, so we
 654         need to wrangle an integer register out of things.
 655     -}
 656     int_NE_code :: StixTree -> StixTree -> NatM Register
 657
 658     int_NE_code x y
 659       = trivialCode (CMP EQQ) x y       `thenNat` \ register ->
 660         getNewRegNat IntRep             `thenNat` \ tmp ->
 661         let
 662             code = registerCode register tmp
 663             src  = registerName register tmp
 664             code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
 665         in
 666         return (Any IntRep code__2)
 667
 668     {- ------------------------------------------------------------
 669         Comments for int_NE_code also apply to cmpF_code
 670     -}
 671     cmpF_code
 672         :: (Reg -> Reg -> Reg -> Instr)
 673         -> Cond
 674         -> StixTree -> StixTree
 675         -> NatM Register
 676
 677     cmpF_code instr cond x y
 678       = trivialFCode pr instr x y       `thenNat` \ register ->
 679         getNewRegNat F64                `thenNat` \ tmp ->
 680         getBlockIdNat                   `thenNat` \ lbl ->
 681         let
 682             code = registerCode register tmp
 683             result  = registerName register tmp
 684
 685             code__2 dst = code . mkSeqInstrs [
 686                 OR zeroh (RIImm (ImmInt 1)) dst,
 687                 BF cond  result (ImmCLbl lbl),
 688                 OR zeroh (RIReg zeroh) dst,
 689                 NEWBLOCK lbl]
 690         in
 691         return (Any IntRep code__2)
 692       where
 693         pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
 694       ------------------------------------------------------------
 695
 696 getRegister (CmmLoad pk mem)
 697   = getAmode mem                    `thenNat` \ amode ->
 698     let
 699         code = amodeCode amode
 700         src   = amodeAddr amode
 701         size = primRepToSize pk
 702         code__2 dst = code . mkSeqInstr (LD size dst src)
 703     in
 704     return (Any pk code__2)
 705
 706 getRegister (StInt i)
 707   | fits8Bits i
 708   = let
 709         code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
 710     in
 711     return (Any IntRep code)
 712   | otherwise
 713   = let
 714         code dst = mkSeqInstr (LDI Q dst src)
 715     in
 716     return (Any IntRep code)
 717   where
 718     src = ImmInt (fromInteger i)
 719
 720 getRegister leaf
 721   | isJust imm
 722   = let
 723         code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
 724     in
 725     return (Any PtrRep code)
 726   where
 727     imm = maybeImm leaf
 728     imm__2 = case imm of Just x -> x
 729
 730 #endif /* alpha_TARGET_ARCH */
 731
 732 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 733
 734 #if i386_TARGET_ARCH
 735
 736 getRegister (CmmLit (CmmFloat f F32)) = do
 737     lbl <- getNewLabelNat
 738     dynRef <- cmmMakeDynamicReference addImportNat False lbl
 739     Amode addr addr_code <- getAmode dynRef
 740     let code dst =
 741             LDATA ReadOnlyData
 742                         [CmmDataLabel lbl,
 743                          CmmStaticLit (CmmFloat f F32)]
 744             `consOL` (addr_code `snocOL`
 745             GLD F32 addr dst)
 746     -- in
 747     return (Any F32 code)
 748
 749
 750 getRegister (CmmLit (CmmFloat d F64))
 751   | d == 0.0
 752   = let code dst = unitOL (GLDZ dst)
 753     in  return (Any F64 code)
 754
 755   | d == 1.0
 756   = let code dst = unitOL (GLD1 dst)
 757     in  return (Any F64 code)
 758
 759   | otherwise = do
 760     lbl <- getNewLabelNat
 761     dynRef <- cmmMakeDynamicReference addImportNat False lbl
 762     Amode addr addr_code <- getAmode dynRef
 763     let code dst =
 764             LDATA ReadOnlyData
 765                         [CmmDataLabel lbl,
 766                          CmmStaticLit (CmmFloat d F64)]
 767             `consOL` (addr_code `snocOL`
 768             GLD F64 addr dst)
 769     -- in
 770     return (Any F64 code)
 771
 772 #endif /* i386_TARGET_ARCH */
 773
 774 #if x86_64_TARGET_ARCH
 775
 776 getRegister (CmmLit (CmmFloat 0.0 rep)) = do
 777    let code dst = unitOL  (XOR rep (OpReg dst) (OpReg dst))
 778         -- I don't know why there are xorpd, xorps, and pxor instructions.
 779         -- They all appear to do the same thing --SDM
 780    return (Any rep code)
 781
 782 getRegister (CmmLit (CmmFloat f rep)) = do
 783     lbl <- getNewLabelNat
 784     let code dst = toOL [
 785             LDATA ReadOnlyData
 786                         [CmmDataLabel lbl,
 787                          CmmStaticLit (CmmFloat f rep)],
 788             MOV rep (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 789             ]
 790     -- in
 791     return (Any rep code)
 792
 793 #endif /* x86_64_TARGET_ARCH */
 794
 795 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
 796
 797 -- catch simple cases of zero- or sign-extended load
 798 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad addr _]) = do
 799   code <- intLoadCode (MOVZxL I8) addr
 800   return (Any I32 code)
 801
 802 getRegister (CmmMachOp (MO_S_Conv I8 I32) [CmmLoad addr _]) = do
 803   code <- intLoadCode (MOVSxL I8) addr
 804   return (Any I32 code)
 805
 806 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad addr _]) = do
 807   code <- intLoadCode (MOVZxL I16) addr
 808   return (Any I32 code)
 809
 810 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad addr _]) = do
 811   code <- intLoadCode (MOVSxL I16) addr
 812   return (Any I32 code)
 813
 814 #endif
 815
 816 #if x86_64_TARGET_ARCH
 817
 818 -- catch simple cases of zero- or sign-extended load
 819 getRegister (CmmMachOp (MO_U_Conv I8 I64) [CmmLoad addr _]) = do
 820   code <- intLoadCode (MOVZxL I8) addr
 821   return (Any I64 code)
 822
 823 getRegister (CmmMachOp (MO_S_Conv I8 I64) [CmmLoad addr _]) = do
 824   code <- intLoadCode (MOVSxL I8) addr
 825   return (Any I64 code)
 826
 827 getRegister (CmmMachOp (MO_U_Conv I16 I64) [CmmLoad addr _]) = do
 828   code <- intLoadCode (MOVZxL I16) addr
 829   return (Any I64 code)
 830
 831 getRegister (CmmMachOp (MO_S_Conv I16 I64) [CmmLoad addr _]) = do
 832   code <- intLoadCode (MOVSxL I16) addr
 833   return (Any I64 code)
 834
 835 getRegister (CmmMachOp (MO_U_Conv I32 I64) [CmmLoad addr _]) = do
 836   code <- intLoadCode (MOV I32) addr -- 32-bit loads zero-extend
 837   return (Any I64 code)
 838
 839 getRegister (CmmMachOp (MO_S_Conv I32 I64) [CmmLoad addr _]) = do
 840   code <- intLoadCode (MOVSxL I32) addr
 841   return (Any I64 code)
 842
 843 #endif
 844
 845 #if x86_64_TARGET_ARCH
 846 getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do
 847   x_code <- getAnyReg x
 848   lbl <- getNewLabelNat
 849   let
 850     code dst = x_code dst `appOL` toOL [
 851         -- This is how gcc does it, so it can't be that bad:
 852         LDATA ReadOnlyData16 [
 853                 CmmAlign 16,
 854                 CmmDataLabel lbl,
 855                 CmmStaticLit (CmmInt 0x80000000 I32),
 856                 CmmStaticLit (CmmInt 0 I32),
 857                 CmmStaticLit (CmmInt 0 I32),
 858                 CmmStaticLit (CmmInt 0 I32)
 859         ],
 860         XOR F32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 861                 -- xorps, so we need the 128-bit constant
 862                 -- ToDo: rip-relative
 863         ]
 864   --
 865   return (Any F32 code)
 866
 867 getRegister (CmmMachOp (MO_S_Neg F64) [x]) = do
 868   x_code <- getAnyReg x
 869   lbl <- getNewLabelNat
 870   let
 871         -- This is how gcc does it, so it can't be that bad:
 872     code dst = x_code dst `appOL` toOL [
 873         LDATA ReadOnlyData16 [
 874                 CmmAlign 16,
 875                 CmmDataLabel lbl,
 876                 CmmStaticLit (CmmInt 0x8000000000000000 I64),
 877                 CmmStaticLit (CmmInt 0 I64)
 878         ],
 879                 -- gcc puts an unpck here.  Wonder if we need it.
 880         XOR F64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 881                 -- xorpd, so we need the 128-bit constant
 882         ]
 883   --
 884   return (Any F64 code)
 885 #endif
 886
 887 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
 888
 889 getRegister (CmmMachOp mop [x]) -- unary MachOps
 890   = case mop of
 891 #if i386_TARGET_ARCH
 892       MO_S_Neg F32 -> trivialUFCode F32 (GNEG F32) x
 893       MO_S_Neg F64 -> trivialUFCode F64 (GNEG F64) x
 894 #endif
 895
 896       MO_S_Neg rep -> trivialUCode rep (NEGI rep) x
 897       MO_Not rep   -> trivialUCode rep (NOT  rep) x
 898
 899       -- Nop conversions
 900       -- TODO: these are only nops if the arg is not a fixed register that
 901       -- can't be byte-addressed.
 902       MO_U_Conv I32 I8  -> conversionNop I32 x
 903       MO_S_Conv I32 I8  -> conversionNop I32 x
 904       MO_U_Conv I16 I8  -> conversionNop I16 x
 905       MO_S_Conv I16 I8  -> conversionNop I16 x
 906       MO_U_Conv I32 I16 -> conversionNop I32 x
 907       MO_S_Conv I32 I16 -> conversionNop I32 x
 908 #if x86_64_TARGET_ARCH
 909       MO_U_Conv I64 I32 -> conversionNop I64 x
 910       MO_S_Conv I64 I32 -> conversionNop I64 x
 911       MO_U_Conv I64 I16 -> conversionNop I64 x
 912       MO_S_Conv I64 I16 -> conversionNop I64 x
 913       MO_U_Conv I64 I8  -> conversionNop I64 x
 914       MO_S_Conv I64 I8  -> conversionNop I64 x
 915 #endif
 916
 917       MO_U_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
 918       MO_S_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
 919
 920       -- widenings
 921       MO_U_Conv I8  I32 -> integerExtend I8  I32 MOVZxL x
 922       MO_U_Conv I16 I32 -> integerExtend I16 I32 MOVZxL x
 923       MO_U_Conv I8  I16 -> integerExtend I8  I16 MOVZxL x
 924
 925       MO_S_Conv I8  I32 -> integerExtend I8  I32 MOVSxL x
 926       MO_S_Conv I16 I32 -> integerExtend I16 I32 MOVSxL x
 927       MO_S_Conv I8  I16 -> integerExtend I8  I16 MOVSxL x
 928
 929 #if x86_64_TARGET_ARCH
 930       MO_U_Conv I8  I64 -> integerExtend I8  I64 MOVZxL x
 931       MO_U_Conv I16 I64 -> integerExtend I16 I64 MOVZxL x
 932       MO_U_Conv I32 I64 -> integerExtend I32 I64 MOVZxL x
 933       MO_S_Conv I8  I64 -> integerExtend I8  I64 MOVSxL x
 934       MO_S_Conv I16 I64 -> integerExtend I16 I64 MOVSxL x
 935       MO_S_Conv I32 I64 -> integerExtend I32 I64 MOVSxL x
 936         -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
 937         -- However, we don't want the register allocator to throw it
 938         -- away as an unnecessary reg-to-reg move, so we keep it in
 939         -- the form of a movzl and print it as a movl later.
 940 #endif
 941
 942 #if i386_TARGET_ARCH
 943       MO_S_Conv F32 F64 -> conversionNop F64 x
 944       MO_S_Conv F64 F32 -> conversionNop F32 x
 945 #else
 946       MO_S_Conv F32 F64 -> coerceFP2FP F64 x
 947       MO_S_Conv F64 F32 -> coerceFP2FP F32 x
 948 #endif
 949
 950       MO_S_Conv from to
 951         | isFloatingRep from -> coerceFP2Int from to x
 952         | isFloatingRep to   -> coerceInt2FP from to x
 953
 954       other -> pprPanic "getRegister" (pprMachOp mop)
 955    where
 956         -- signed or unsigned extension.
 957         integerExtend from to instr expr = do
 958             (reg,e_code) <- if from == I8 then getByteReg expr
 959                                           else getSomeReg expr
 960             let
 961                 code dst =
 962                   e_code `snocOL`
 963                   instr from (OpReg reg) (OpReg dst)
 964             return (Any to code)
 965
 966         conversionNop new_rep expr
 967             = do e_code <- getRegister expr
 968                  return (swizzleRegisterRep e_code new_rep)
 969
 970
 971 getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
 972   = ASSERT2(cmmExprRep x /= I8, pprExpr e)
 973     case mop of
 974       MO_Eq F32   -> condFltReg EQQ x y
 975       MO_Ne F32   -> condFltReg NE x y
 976       MO_S_Gt F32 -> condFltReg GTT x y
 977       MO_S_Ge F32 -> condFltReg GE x y
 978       MO_S_Lt F32 -> condFltReg LTT x y
 979       MO_S_Le F32 -> condFltReg LE x y
 980
 981       MO_Eq F64   -> condFltReg EQQ x y
 982       MO_Ne F64   -> condFltReg NE x y
 983       MO_S_Gt F64 -> condFltReg GTT x y
 984       MO_S_Ge F64 -> condFltReg GE x y
 985       MO_S_Lt F64 -> condFltReg LTT x y
 986       MO_S_Le F64 -> condFltReg LE x y
 987
 988       MO_Eq rep   -> condIntReg EQQ x y
 989       MO_Ne rep   -> condIntReg NE x y
 990
 991       MO_S_Gt rep -> condIntReg GTT x y
 992       MO_S_Ge rep -> condIntReg GE x y
 993       MO_S_Lt rep -> condIntReg LTT x y
 994       MO_S_Le rep -> condIntReg LE x y
 995
 996       MO_U_Gt rep -> condIntReg GU  x y
 997       MO_U_Ge rep -> condIntReg GEU x y
 998       MO_U_Lt rep -> condIntReg LU  x y
 999       MO_U_Le rep -> condIntReg LEU x y
1000
1001 #if i386_TARGET_ARCH
1002       MO_Add F32 -> trivialFCode F32 GADD x y
1003       MO_Sub F32 -> trivialFCode F32 GSUB x y
1004
1005       MO_Add F64 -> trivialFCode F64 GADD x y
1006       MO_Sub F64 -> trivialFCode F64 GSUB x y
1007
1008       MO_S_Quot F32 -> trivialFCode F32 GDIV x y
1009       MO_S_Quot F64 -> trivialFCode F64 GDIV x y
1010 #endif
1011
1012 #if x86_64_TARGET_ARCH
1013       MO_Add F32 -> trivialFCode F32 ADD x y
1014       MO_Sub F32 -> trivialFCode F32 SUB x y
1015
1016       MO_Add F64 -> trivialFCode F64 ADD x y
1017       MO_Sub F64 -> trivialFCode F64 SUB x y
1018
1019       MO_S_Quot F32 -> trivialFCode F32 FDIV x y
1020       MO_S_Quot F64 -> trivialFCode F64 FDIV x y
1021 #endif
1022
1023       MO_Add rep -> add_code rep x y
1024       MO_Sub rep -> sub_code rep x y
1025
1026       MO_S_Quot rep -> div_code rep True  True  x y
1027       MO_S_Rem  rep -> div_code rep True  False x y
1028       MO_U_Quot rep -> div_code rep False True  x y
1029       MO_U_Rem  rep -> div_code rep False False x y
1030
1031 #if i386_TARGET_ARCH
1032       MO_Mul   F32 -> trivialFCode F32 GMUL x y
1033       MO_Mul   F64 -> trivialFCode F64 GMUL x y
1034 #endif
1035
1036 #if x86_64_TARGET_ARCH
1037       MO_Mul   F32 -> trivialFCode F32 MUL x y
1038       MO_Mul   F64 -> trivialFCode F64 MUL x y
1039 #endif
1040
1041       MO_Mul   rep -> let op = IMUL rep in
1042                       trivialCode rep op (Just op) x y
1043
1044       MO_S_MulMayOflo rep -> imulMayOflo rep x y
1045
1046       MO_And rep -> let op = AND rep in
1047                     trivialCode rep op (Just op) x y
1048       MO_Or  rep -> let op = OR  rep in
1049                     trivialCode rep op (Just op) x y
1050       MO_Xor rep -> let op = XOR rep in
1051                     trivialCode rep op (Just op) x y
1052
1053         {- Shift ops on x86s have constraints on their source, it
1054            either has to be Imm, CL or 1
1055             => trivialCode is not restrictive enough (sigh.)
1056         -}
1057       MO_Shl rep   -> shift_code rep (SHL rep) x y {-False-}
1058       MO_U_Shr rep -> shift_code rep (SHR rep) x y {-False-}
1059       MO_S_Shr rep -> shift_code rep (SAR rep) x y {-False-}
1060
1061       other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
1062   where
1063     --------------------
1064     imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1065     imulMayOflo rep a b = do
1066          (a_reg, a_code) <- getNonClobberedReg a
1067          b_code <- getAnyReg b
1068          let
1069              shift_amt  = case rep of
1070                            I32 -> 31
1071                            I64 -> 63
1072                            _ -> panic "shift_amt"
1073
1074              code = a_code `appOL` b_code eax `appOL`
1075                         toOL [
1076                            IMUL2 rep (OpReg a_reg),   -- result in %edx:%eax
1077                            SAR rep (OpImm (ImmInt shift_amt)) (OpReg eax),
1078                                 -- sign extend lower part
1079                            SUB rep (OpReg edx) (OpReg eax)
1080                                 -- compare against upper
1081                            -- eax==0 if high part == sign extended low part
1082                         ]
1083          -- in
1084          return (Fixed rep eax code)
1085
1086     --------------------
1087     shift_code :: MachRep
1088                -> (Operand -> Operand -> Instr)
1089                -> CmmExpr
1090                -> CmmExpr
1091                -> NatM Register
1092
1093     {- Case1: shift length as immediate -}
1094     shift_code rep instr x y@(CmmLit lit) = do
1095           x_code <- getAnyReg x
1096           let
1097                code dst
1098                   = x_code dst `snocOL`
1099                     instr (OpImm (litToImm lit)) (OpReg dst)
1100           -- in
1101           return (Any rep code)
1102
1103     {- Case2: shift length is complex (non-immediate) -}
1104     shift_code rep instr x y{-amount-} = do
1105         (x_reg, x_code) <- getNonClobberedReg x
1106         y_code <- getAnyReg y
1107         let
1108            code = x_code `appOL`
1109                   y_code ecx `snocOL`
1110                   instr (OpReg ecx) (OpReg x_reg)
1111         -- in
1112         return (Fixed rep x_reg code)
1113
1114     --------------------
1115     add_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1116     add_code rep x (CmmLit (CmmInt y _))
1117         | not (is64BitInteger y) = add_int rep x y
1118     add_code rep x y = trivialCode rep (ADD rep) (Just (ADD rep)) x y
1119
1120     --------------------
1121     sub_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1122     sub_code rep x (CmmLit (CmmInt y _))
1123         | not (is64BitInteger (-y)) = add_int rep x (-y)
1124     sub_code rep x y = trivialCode rep (SUB rep) Nothing x y
1125
1126     -- our three-operand add instruction:
1127     add_int rep x y = do
1128         (x_reg, x_code) <- getSomeReg x
1129         let
1130             imm = ImmInt (fromInteger y)
1131             code dst
1132                = x_code `snocOL`
1133                  LEA rep
1134                         (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
1135                         (OpReg dst)
1136         --
1137         return (Any rep code)
1138
1139     ----------------------
1140     div_code rep signed quotient x y = do
1141            (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
1142            x_code <- getAnyReg x
1143            let
1144              widen | signed    = CLTD rep
1145                    | otherwise = XOR rep (OpReg edx) (OpReg edx)
1146
1147              instr | signed    = IDIV
1148                    | otherwise = DIV
1149
1150              code = y_code `appOL`
1151                     x_code eax `appOL`
1152                     toOL [widen, instr rep y_op]
1153
1154              result | quotient  = eax
1155                     | otherwise = edx
1156
1157            -- in
1158            return (Fixed rep result code)
1159
1160
1161 getRegister (CmmLoad mem pk)
1162   | isFloatingRep pk
1163   = do
1164     Amode src mem_code <- getAmode mem
1165     let
1166         code dst = mem_code `snocOL`
1167                    IF_ARCH_i386(GLD pk src dst,
1168                                 MOV pk (OpAddr src) (OpReg dst))
1169     --
1170     return (Any pk code)
1171
1172 #if i386_TARGET_ARCH
1173 getRegister (CmmLoad mem pk)
1174   | pk /= I64
1175   = do
1176     code <- intLoadCode (instr pk) mem
1177     return (Any pk code)
1178   where
1179         instr I8  = MOVZxL pk
1180         instr I16 = MOV I16
1181         instr I32 = MOV I32
1182         -- we always zero-extend 8-bit loads, if we
1183         -- can't think of anything better.  This is because
1184         -- we can't guarantee access to an 8-bit variant of every register
1185         -- (esi and edi don't have 8-bit variants), so to make things
1186         -- simpler we do our 8-bit arithmetic with full 32-bit registers.
1187 #endif
1188
1189 #if x86_64_TARGET_ARCH
1190 -- Simpler memory load code on x86_64
1191 getRegister (CmmLoad mem pk)
1192   = do
1193     code <- intLoadCode (MOV pk) mem
1194     return (Any pk code)
1195 #endif
1196
1197 getRegister (CmmLit (CmmInt 0 rep))
1198   = let
1199         -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
1200         adj_rep = case rep of I64 -> I32; _ -> rep
1201         rep1 = IF_ARCH_i386( rep, adj_rep )
1202         code dst
1203            = unitOL (XOR rep1 (OpReg dst) (OpReg dst))
1204     in
1205         return (Any rep code)
1206
1207 #if x86_64_TARGET_ARCH
1208   -- optimisation for loading small literals on x86_64: take advantage
1209   -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
1210   -- instruction forms are shorter.
1211 getRegister (CmmLit lit)
1212   | I64 <- cmmLitRep lit, not (isBigLit lit)
1213   = let
1214         imm = litToImm lit
1215         code dst = unitOL (MOV I32 (OpImm imm) (OpReg dst))
1216     in
1217         return (Any I64 code)
1218   where
1219    isBigLit (CmmInt i I64) = i < 0 || i > 0xffffffff
1220    isBigLit _ = False
1221         -- note1: not the same as is64BitLit, because that checks for
1222         -- signed literals that fit in 32 bits, but we want unsigned
1223         -- literals here.
1224         -- note2: all labels are small, because we're assuming the
1225         -- small memory model (see gcc docs, -mcmodel=small).
1226 #endif
1227
1228 getRegister (CmmLit lit)
1229   = let
1230         rep = cmmLitRep lit
1231         imm = litToImm lit
1232         code dst = unitOL (MOV rep (OpImm imm) (OpReg dst))
1233     in
1234         return (Any rep code)
1235
1236 getRegister other = pprPanic "getRegister(x86)" (ppr other)
1237
1238
1239 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
1240    -> NatM (Reg -> InstrBlock)
1241 intLoadCode instr mem = do
1242   Amode src mem_code <- getAmode mem
1243   return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
1244
1245 -- Compute an expression into *any* register, adding the appropriate
1246 -- move instruction if necessary.
1247 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
1248 getAnyReg expr = do
1249   r <- getRegister expr
1250   anyReg r
1251
1252 anyReg :: Register -> NatM (Reg -> InstrBlock)
1253 anyReg (Any _ code)          = return code
1254 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
1255
1256 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
1257 -- Fixed registers might not be byte-addressable, so we make sure we've
1258 -- got a temporary, inserting an extra reg copy if necessary.
1259 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
1260 #if x86_64_TARGET_ARCH
1261 getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
1262 #else
1263 getByteReg expr = do
1264   r <- getRegister expr
1265   case r of
1266     Any rep code -> do
1267         tmp <- getNewRegNat rep
1268         return (tmp, code tmp)
1269     Fixed rep reg code
1270         | isVirtualReg reg -> return (reg,code)
1271         | otherwise -> do
1272             tmp <- getNewRegNat rep
1273             return (tmp, code `snocOL` reg2reg rep reg tmp)
1274         -- ToDo: could optimise slightly by checking for byte-addressable
1275         -- real registers, but that will happen very rarely if at all.
1276 #endif
1277
1278 -- Another variant: this time we want the result in a register that cannot
1279 -- be modified by code to evaluate an arbitrary expression.
1280 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1281 getNonClobberedReg expr = do
1282   r <- getRegister expr
1283   case r of
1284     Any rep code -> do
1285         tmp <- getNewRegNat rep
1286         return (tmp, code tmp)
1287     Fixed rep reg code
1288         -- only free regs can be clobbered
1289         | RealReg rr <- reg, isFastTrue (freeReg rr) -> do
1290                 tmp <- getNewRegNat rep
1291                 return (tmp, code `snocOL` reg2reg rep reg tmp)
1292         | otherwise ->
1293                 return (reg, code)
1294
1295 reg2reg :: MachRep -> Reg -> Reg -> Instr
1296 reg2reg rep src dst
1297 #if i386_TARGET_ARCH
1298   | isFloatingRep rep = GMOV src dst
1299 #endif
1300   | otherwise         = MOV rep (OpReg src) (OpReg dst)
1301
1302 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1303
1304 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1305
1306 #if sparc_TARGET_ARCH
1307
1308 getRegister (CmmLit (CmmFloat f F32)) = do
1309     lbl <- getNewLabelNat
1310     let code dst = toOL [
1311             LDATA ReadOnlyData
1312                         [CmmDataLabel lbl,
1313                          CmmStaticLit (CmmFloat f F32)],
1314             SETHI (HI (ImmCLbl lbl)) dst,
1315             LD F32 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1316     return (Any F32 code)
1317
1318 getRegister (CmmLit (CmmFloat d F64)) = do
1319     lbl <- getNewLabelNat
1320     let code dst = toOL [
1321             LDATA ReadOnlyData
1322                         [CmmDataLabel lbl,
1323                          CmmStaticLit (CmmFloat d F64)],
1324             SETHI (HI (ImmCLbl lbl)) dst,
1325             LD F64 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1326     return (Any F64 code)
1327
1328 getRegister (CmmMachOp mop [x]) -- unary MachOps
1329   = case mop of
1330       MO_S_Neg F32     -> trivialUFCode F32 (FNEG F32) x
1331       MO_S_Neg F64     -> trivialUFCode F64 (FNEG F64) x
1332
1333       MO_S_Neg rep     -> trivialUCode rep (SUB False False g0) x
1334       MO_Not rep       -> trivialUCode rep (XNOR False g0) x
1335
1336       MO_U_Conv I32 I8 -> trivialCode I8 (AND False) x (CmmLit (CmmInt 255 I8))
1337
1338       MO_U_Conv F64 F32-> coerceDbl2Flt x
1339       MO_U_Conv F32 F64-> coerceFlt2Dbl x
1340
1341       MO_S_Conv F32 I32-> coerceFP2Int F32 I32 x
1342       MO_S_Conv I32 F32-> coerceInt2FP I32 F32 x
1343       MO_S_Conv F64 I32-> coerceFP2Int F64 I32 x
1344       MO_S_Conv I32 F64-> coerceInt2FP I32 F64 x
1345
1346       -- Conversions which are a nop on sparc
1347       MO_U_Conv from to
1348         | from == to   -> conversionNop to   x
1349       MO_U_Conv I32 to -> conversionNop to   x
1350       MO_S_Conv I32 to -> conversionNop to   x
1351
1352       -- widenings
1353       MO_U_Conv I8 I32  -> integerExtend False I8 I32  x
1354       MO_U_Conv I16 I32 -> integerExtend False I16 I32 x
1355       MO_U_Conv I8 I16  -> integerExtend False I8 I16  x
1356       MO_S_Conv I16 I32 -> integerExtend True I16 I32  x
1357
1358       other_op -> panic "Unknown unary mach op"
1359     where
1360         -- XXX SLL/SRL?
1361         integerExtend signed from to expr = do
1362            (reg, e_code) <- getSomeReg expr
1363            let
1364                code dst =
1365                    e_code `snocOL`
1366                    ((if signed then SRA else SRL)
1367                           reg (RIImm (ImmInt 0)) dst)
1368            return (Any to code)
1369         conversionNop new_rep expr
1370             = do e_code <- getRegister expr
1371                  return (swizzleRegisterRep e_code new_rep)
1372
1373 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1374   = case mop of
1375       MO_Eq F32 -> condFltReg EQQ x y
1376       MO_Ne F32 -> condFltReg NE x y
1377
1378       MO_S_Gt F32 -> condFltReg GTT x y
1379       MO_S_Ge F32 -> condFltReg GE x y
1380       MO_S_Lt F32 -> condFltReg LTT x y
1381       MO_S_Le F32 -> condFltReg LE x y
1382
1383       MO_Eq F64 -> condFltReg EQQ x y
1384       MO_Ne F64 -> condFltReg NE x y
1385
1386       MO_S_Gt F64 -> condFltReg GTT x y
1387       MO_S_Ge F64 -> condFltReg GE x y
1388       MO_S_Lt F64 -> condFltReg LTT x y
1389       MO_S_Le F64 -> condFltReg LE x y
1390
1391       MO_Eq rep -> condIntReg EQQ x y
1392       MO_Ne rep -> condIntReg NE x y
1393
1394       MO_S_Gt rep -> condIntReg GTT x y
1395       MO_S_Ge rep -> condIntReg GE x y
1396       MO_S_Lt rep -> condIntReg LTT x y
1397       MO_S_Le rep -> condIntReg LE x y
1398
1399       MO_U_Gt I32  -> condIntReg GTT x y
1400       MO_U_Ge I32  -> condIntReg GE x y
1401       MO_U_Lt I32  -> condIntReg LTT x y
1402       MO_U_Le I32  -> condIntReg LE x y
1403
1404       MO_U_Gt I16 -> condIntReg GU  x y
1405       MO_U_Ge I16 -> condIntReg GEU x y
1406       MO_U_Lt I16 -> condIntReg LU  x y
1407       MO_U_Le I16 -> condIntReg LEU x y
1408
1409       MO_Add I32 -> trivialCode I32 (ADD False False) x y
1410       MO_Sub I32 -> trivialCode I32 (SUB False False) x y
1411
1412       MO_S_MulMayOflo rep -> imulMayOflo rep x y
1413 {-
1414       -- ToDo: teach about V8+ SPARC div instructions
1415       MO_S_Quot I32 -> idiv FSLIT(".div")  x y
1416       MO_S_Rem I32  -> idiv FSLIT(".rem")  x y
1417       MO_U_Quot I32 -> idiv FSLIT(".udiv")  x y
1418       MO_U_Rem I32  -> idiv FSLIT(".urem")  x y
1419 -}
1420       MO_Add F32  -> trivialFCode F32 FADD  x y
1421       MO_Sub F32   -> trivialFCode F32  FSUB x y
1422       MO_Mul F32   -> trivialFCode F32  FMUL  x y
1423       MO_S_Quot F32   -> trivialFCode F32  FDIV x y
1424
1425       MO_Add F64   -> trivialFCode F64 FADD  x y
1426       MO_Sub F64   -> trivialFCode F64  FSUB x y
1427       MO_Mul F64   -> trivialFCode F64  FMUL x y
1428       MO_S_Quot F64   -> trivialFCode F64  FDIV x y
1429
1430       MO_And rep   -> trivialCode rep (AND False) x y
1431       MO_Or rep    -> trivialCode rep (OR  False) x y
1432       MO_Xor rep   -> trivialCode rep (XOR False) x y
1433
1434       MO_Mul rep -> trivialCode rep (SMUL False) x y
1435
1436       MO_Shl rep   -> trivialCode rep SLL  x y
1437       MO_U_Shr rep   -> trivialCode rep SRL x y
1438       MO_S_Shr rep   -> trivialCode rep SRA x y
1439
1440 {-
1441       MO_F32_Pwr  -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1442                                          [promote x, promote y])
1443                        where promote x = CmmMachOp MO_F32_to_Dbl [x]
1444       MO_F64_Pwr -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1445                                         [x, y])
1446 -}
1447       other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
1448   where
1449     --idiv fn x y = getRegister (StCall (Left fn) CCallConv I32 [x, y])
1450
1451     --------------------
1452     imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1453     imulMayOflo rep a b = do
1454          (a_reg, a_code) <- getSomeReg a
1455          (b_reg, b_code) <- getSomeReg b
1456          res_lo <- getNewRegNat I32
1457          res_hi <- getNewRegNat I32
1458          let
1459             shift_amt  = case rep of
1460                           I32 -> 31
1461                           I64 -> 63
1462                           _ -> panic "shift_amt"
1463             code dst = a_code `appOL` b_code `appOL`
1464                        toOL [
1465                            SMUL False a_reg (RIReg b_reg) res_lo,
1466                            RDY res_hi,
1467                            SRA res_lo (RIImm (ImmInt shift_amt)) res_lo,
1468                            SUB False False res_lo (RIReg res_hi) dst
1469                         ]
1470          return (Any I32 code)
1471
1472 getRegister (CmmLoad mem pk) = do
1473     Amode src code <- getAmode mem
1474     let
1475         code__2 dst = code `snocOL` LD pk src dst
1476     return (Any pk code__2)
1477
1478 getRegister (CmmLit (CmmInt i _))
1479   | fits13Bits i
1480   = let
1481         src = ImmInt (fromInteger i)
1482         code dst = unitOL (OR False g0 (RIImm src) dst)
1483     in
1484         return (Any I32 code)
1485
1486 getRegister (CmmLit lit)
1487   = let rep = cmmLitRep lit
1488         imm = litToImm lit
1489         code dst = toOL [
1490             SETHI (HI imm) dst,
1491             OR False dst (RIImm (LO imm)) dst]
1492     in return (Any I32 code)
1493
1494 #endif /* sparc_TARGET_ARCH */
1495
1496 #if powerpc_TARGET_ARCH
1497 getRegister (CmmLoad mem pk)
1498   | pk /= I64
1499   = do
1500         Amode addr addr_code <- getAmode mem
1501         let code dst = ASSERT((regClass dst == RcDouble) == isFloatingRep pk)
1502                        addr_code `snocOL` LD pk dst addr
1503         return (Any pk code)
1504
1505 -- catch simple cases of zero- or sign-extended load
1506 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad mem _]) = do
1507     Amode addr addr_code <- getAmode mem
1508     return (Any I32 (\dst -> addr_code `snocOL` LD I8 dst addr))
1509
1510 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
1511
1512 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad mem _]) = do
1513     Amode addr addr_code <- getAmode mem
1514     return (Any I32 (\dst -> addr_code `snocOL` LD I16 dst addr))
1515
1516 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad mem _]) = do
1517     Amode addr addr_code <- getAmode mem
1518     return (Any I32 (\dst -> addr_code `snocOL` LA I16 dst addr))
1519
1520 getRegister (CmmMachOp mop [x]) -- unary MachOps
1521   = case mop of
1522       MO_Not rep   -> trivialUCode rep NOT x
1523
1524       MO_S_Conv F64 F32 -> trivialUCode F32 FRSP x
1525       MO_S_Conv F32 F64 -> conversionNop F64 x
1526
1527       MO_S_Conv from to
1528         | from == to         -> conversionNop to x
1529         | isFloatingRep from -> coerceFP2Int from to x
1530         | isFloatingRep to   -> coerceInt2FP from to x
1531
1532         -- narrowing is a nop: we treat the high bits as undefined
1533       MO_S_Conv I32 to -> conversionNop to x
1534       MO_S_Conv I16 I8 -> conversionNop I8 x
1535       MO_S_Conv I8 to -> trivialUCode to (EXTS I8) x
1536       MO_S_Conv I16 to -> trivialUCode to (EXTS I16) x
1537
1538       MO_U_Conv from to
1539         | from == to -> conversionNop to x
1540         -- narrowing is a nop: we treat the high bits as undefined
1541       MO_U_Conv I32 to -> conversionNop to x
1542       MO_U_Conv I16 I8 -> conversionNop I8 x
1543       MO_U_Conv I8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 I32))
1544       MO_U_Conv I16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 I32))
1545
1546       MO_S_Neg F32      -> trivialUCode F32 FNEG x
1547       MO_S_Neg F64      -> trivialUCode F64 FNEG x
1548       MO_S_Neg rep      -> trivialUCode rep NEG x
1549
1550     where
1551         conversionNop new_rep expr
1552             = do e_code <- getRegister expr
1553                  return (swizzleRegisterRep e_code new_rep)
1554
1555 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1556   = case mop of
1557       MO_Eq F32 -> condFltReg EQQ x y
1558       MO_Ne F32 -> condFltReg NE  x y
1559
1560       MO_S_Gt F32 -> condFltReg GTT x y
1561       MO_S_Ge F32 -> condFltReg GE  x y
1562       MO_S_Lt F32 -> condFltReg LTT x y
1563       MO_S_Le F32 -> condFltReg LE  x y
1564
1565       MO_Eq F64 -> condFltReg EQQ x y
1566       MO_Ne F64 -> condFltReg NE  x y
1567
1568       MO_S_Gt F64 -> condFltReg GTT x y
1569       MO_S_Ge F64 -> condFltReg GE  x y
1570       MO_S_Lt F64 -> condFltReg LTT x y
1571       MO_S_Le F64 -> condFltReg LE  x y
1572
1573       MO_Eq rep -> condIntReg EQQ  (extendUExpr rep x) (extendUExpr rep y)
1574       MO_Ne rep -> condIntReg NE   (extendUExpr rep x) (extendUExpr rep y)
1575
1576       MO_S_Gt rep -> condIntReg GTT  (extendSExpr rep x) (extendSExpr rep y)
1577       MO_S_Ge rep -> condIntReg GE   (extendSExpr rep x) (extendSExpr rep y)
1578       MO_S_Lt rep -> condIntReg LTT  (extendSExpr rep x) (extendSExpr rep y)
1579       MO_S_Le rep -> condIntReg LE   (extendSExpr rep x) (extendSExpr rep y)
1580
1581       MO_U_Gt rep -> condIntReg GU   (extendUExpr rep x) (extendUExpr rep y)
1582       MO_U_Ge rep -> condIntReg GEU  (extendUExpr rep x) (extendUExpr rep y)
1583       MO_U_Lt rep -> condIntReg LU   (extendUExpr rep x) (extendUExpr rep y)
1584       MO_U_Le rep -> condIntReg LEU  (extendUExpr rep x) (extendUExpr rep y)
1585
1586       MO_Add F32   -> trivialCodeNoImm F32 (FADD F32) x y
1587       MO_Sub F32   -> trivialCodeNoImm F32 (FSUB F32) x y
1588       MO_Mul F32   -> trivialCodeNoImm F32 (FMUL F32) x y
1589       MO_S_Quot F32   -> trivialCodeNoImm F32 (FDIV F32) x y
1590
1591       MO_Add F64   -> trivialCodeNoImm F64 (FADD F64) x y
1592       MO_Sub F64   -> trivialCodeNoImm F64 (FSUB F64) x y
1593       MO_Mul F64   -> trivialCodeNoImm F64 (FMUL F64) x y
1594       MO_S_Quot F64   -> trivialCodeNoImm F64 (FDIV F64) x y
1595
1596          -- optimize addition with 32-bit immediate
1597          -- (needed for PIC)
1598       MO_Add I32 ->
1599         case y of
1600           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate I32 True (-imm)
1601             -> trivialCode I32 True ADD x (CmmLit $ CmmInt imm immrep)
1602           CmmLit lit
1603             -> do
1604                 (src, srcCode) <- getSomeReg x
1605                 let imm = litToImm lit
1606                     code dst = srcCode `appOL` toOL [
1607                                     ADDIS dst src (HA imm),
1608                                     ADD dst dst (RIImm (LO imm))
1609                                 ]
1610                 return (Any I32 code)
1611           _ -> trivialCode I32 True ADD x y
1612
1613       MO_Add rep -> trivialCode rep True ADD x y
1614       MO_Sub rep ->
1615         case y of    -- subfi ('substract from' with immediate) doesn't exist
1616           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
1617             -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
1618           _ -> trivialCodeNoImm rep SUBF y x
1619
1620       MO_Mul rep -> trivialCode rep True MULLW x y
1621
1622       MO_S_MulMayOflo I32 -> trivialCodeNoImm I32 MULLW_MayOflo x y
1623
1624       MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= I32): not implemented"
1625       MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
1626
1627       MO_S_Quot rep -> trivialCodeNoImm rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1628       MO_U_Quot rep -> trivialCodeNoImm rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1629
1630       MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1631       MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1632
1633       MO_And rep   -> trivialCode rep False AND x y
1634       MO_Or rep    -> trivialCode rep False OR x y
1635       MO_Xor rep   -> trivialCode rep False XOR x y
1636
1637       MO_Shl rep   -> trivialCode rep False SLW x y
1638       MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
1639       MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
1640
1641 getRegister (CmmLit (CmmInt i rep))
1642   | Just imm <- makeImmediate rep True i
1643   = let
1644         code dst = unitOL (LI dst imm)
1645     in
1646         return (Any rep code)
1647
1648 getRegister (CmmLit (CmmFloat f frep)) = do
1649     lbl <- getNewLabelNat
1650     dynRef <- cmmMakeDynamicReference addImportNat False lbl
1651     Amode addr addr_code <- getAmode dynRef
1652     let code dst =
1653             LDATA ReadOnlyData  [CmmDataLabel lbl,
1654                                  CmmStaticLit (CmmFloat f frep)]
1655             `consOL` (addr_code `snocOL` LD frep dst addr)
1656     return (Any frep code)
1657
1658 getRegister (CmmLit lit)
1659   = let rep = cmmLitRep lit
1660         imm = litToImm lit
1661         code dst = toOL [
1662               LIS dst (HI imm),
1663               OR dst dst (RIImm (LO imm))
1664           ]
1665     in return (Any rep code)
1666
1667 getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
1668
1669     -- extend?Rep: wrap integer expression of type rep
1670     -- in a conversion to I32
1671 extendSExpr I32 x = x
1672 extendSExpr rep x = CmmMachOp (MO_S_Conv rep I32) [x]
1673 extendUExpr I32 x = x
1674 extendUExpr rep x = CmmMachOp (MO_U_Conv rep I32) [x]
1675
1676 #endif /* powerpc_TARGET_ARCH */
1677
1678
1679 -- -----------------------------------------------------------------------------
1680 --  The 'Amode' type: Memory addressing modes passed up the tree.
1681
1682 data Amode = Amode AddrMode InstrBlock
1683
1684 {-
1685 Now, given a tree (the argument to an CmmLoad) that references memory,
1686 produce a suitable addressing mode.
1687
1688 A Rule of the Game (tm) for Amodes: use of the addr bit must
1689 immediately follow use of the code part, since the code part puts
1690 values in registers which the addr then refers to.  So you can't put
1691 anything in between, lest it overwrite some of those registers.  If
1692 you need to do some other computation between the code part and use of
1693 the addr bit, first store the effective address from the amode in a
1694 temporary, then do the other computation, and then use the temporary:
1695
1696     code
1697     LEA amode, tmp
1698     ... other computation ...
1699     ... (tmp) ...
1700 -}
1701
1702 getAmode :: CmmExpr -> NatM Amode
1703 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
1704
1705 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1706
1707 #if alpha_TARGET_ARCH
1708
1709 getAmode (StPrim IntSubOp [x, StInt i])
1710   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1711     getRegister x               `thenNat` \ register ->
1712     let
1713         code = registerCode register tmp
1714         reg  = registerName register tmp
1715         off  = ImmInt (-(fromInteger i))
1716     in
1717     return (Amode (AddrRegImm reg off) code)
1718
1719 getAmode (StPrim IntAddOp [x, StInt i])
1720   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1721     getRegister x               `thenNat` \ register ->
1722     let
1723         code = registerCode register tmp
1724         reg  = registerName register tmp
1725         off  = ImmInt (fromInteger i)
1726     in
1727     return (Amode (AddrRegImm reg off) code)
1728
1729 getAmode leaf
1730   | isJust imm
1731   = return (Amode (AddrImm imm__2) id)
1732   where
1733     imm = maybeImm leaf
1734     imm__2 = case imm of Just x -> x
1735
1736 getAmode other
1737   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1738     getRegister other           `thenNat` \ register ->
1739     let
1740         code = registerCode register tmp
1741         reg  = registerName register tmp
1742     in
1743     return (Amode (AddrReg reg) code)
1744
1745 #endif /* alpha_TARGET_ARCH */
1746
1747 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1748
1749 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1750
1751 -- This is all just ridiculous, since it carefully undoes
1752 -- what mangleIndexTree has just done.
1753 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
1754   | not (is64BitLit lit)
1755   -- ASSERT(rep == I32)???
1756   = do (x_reg, x_code) <- getSomeReg x
1757        let off = ImmInt (-(fromInteger i))
1758        return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1759
1760 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
1761   | not (is64BitLit lit)
1762   -- ASSERT(rep == I32)???
1763   = do (x_reg, x_code) <- getSomeReg x
1764        let off = ImmInt (fromInteger i)
1765        return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1766
1767 -- Turn (lit1 << n  + lit2) into  (lit2 + lit1 << n) so it will be
1768 -- recognised by the next rule.
1769 getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1770                                   b@(CmmLit _)])
1771   = getAmode (CmmMachOp (MO_Add rep) [b,a])
1772
1773 getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
1774                                         [y, CmmLit (CmmInt shift _)]])
1775   | shift == 0 || shift == 1 || shift == 2 || shift == 3
1776   = do (x_reg, x_code) <- getNonClobberedReg x
1777         -- x must be in a temp, because it has to stay live over y_code
1778         -- we could compre x_reg and y_reg and do something better here...
1779        (y_reg, y_code) <- getSomeReg y
1780        let
1781            code = x_code `appOL` y_code
1782            base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
1783        return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt 0))
1784                code)
1785
1786 getAmode (CmmLit lit) | not (is64BitLit lit)
1787   = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1788
1789 getAmode expr = do
1790   (reg,code) <- getSomeReg expr
1791   return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
1792
1793 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1794
1795 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1796
1797 #if sparc_TARGET_ARCH
1798
1799 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)])
1800   | fits13Bits (-i)
1801   = do
1802        (reg, code) <- getSomeReg x
1803        let
1804          off  = ImmInt (-(fromInteger i))
1805        return (Amode (AddrRegImm reg off) code)
1806
1807
1808 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)])
1809   | fits13Bits i
1810   = do
1811        (reg, code) <- getSomeReg x
1812        let
1813          off  = ImmInt (fromInteger i)
1814        return (Amode (AddrRegImm reg off) code)
1815
1816 getAmode (CmmMachOp (MO_Add rep) [x, y])
1817   = do
1818     (regX, codeX) <- getSomeReg x
1819     (regY, codeY) <- getSomeReg y
1820     let
1821         code = codeX `appOL` codeY
1822     return (Amode (AddrRegReg regX regY) code)
1823
1824 -- XXX Is this same as "leaf" in Stix?
1825 getAmode (CmmLit lit)
1826   = do
1827       tmp <- getNewRegNat I32
1828       let
1829         code = unitOL (SETHI (HI imm__2) tmp)
1830       return (Amode (AddrRegImm tmp (LO imm__2)) code)
1831       where
1832          imm__2 = litToImm lit
1833
1834 getAmode other
1835   = do
1836        (reg, code) <- getSomeReg other
1837        let
1838             off  = ImmInt 0
1839        return (Amode (AddrRegImm reg off) code)
1840
1841 #endif /* sparc_TARGET_ARCH */
1842
1843 #ifdef powerpc_TARGET_ARCH
1844 getAmode (CmmMachOp (MO_Sub I32) [x, CmmLit (CmmInt i _)])
1845   | Just off <- makeImmediate I32 True (-i)
1846   = do
1847         (reg, code) <- getSomeReg x
1848         return (Amode (AddrRegImm reg off) code)
1849
1850
1851 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit (CmmInt i _)])
1852   | Just off <- makeImmediate I32 True i
1853   = do
1854         (reg, code) <- getSomeReg x
1855         return (Amode (AddrRegImm reg off) code)
1856
1857    -- optimize addition with 32-bit immediate
1858    -- (needed for PIC)
1859 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit lit])
1860   = do
1861         tmp <- getNewRegNat I32
1862         (src, srcCode) <- getSomeReg x
1863         let imm = litToImm lit
1864             code = srcCode `snocOL` ADDIS tmp src (HA imm)
1865         return (Amode (AddrRegImm tmp (LO imm)) code)
1866
1867 getAmode (CmmLit lit)
1868   = do
1869         tmp <- getNewRegNat I32
1870         let imm = litToImm lit
1871             code = unitOL (LIS tmp (HA imm))
1872         return (Amode (AddrRegImm tmp (LO imm)) code)
1873
1874 getAmode (CmmMachOp (MO_Add I32) [x, y])
1875   = do
1876         (regX, codeX) <- getSomeReg x
1877         (regY, codeY) <- getSomeReg y
1878         return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
1879
1880 getAmode other
1881   = do
1882         (reg, code) <- getSomeReg other
1883         let
1884             off  = ImmInt 0
1885         return (Amode (AddrRegImm reg off) code)
1886 #endif /* powerpc_TARGET_ARCH */
1887
1888 -- -----------------------------------------------------------------------------
1889 -- getOperand: sometimes any operand will do.
1890
1891 -- getNonClobberedOperand: the value of the operand will remain valid across
1892 -- the computation of an arbitrary expression, unless the expression
1893 -- is computed directly into a register which the operand refers to
1894 -- (see trivialCode where this function is used for an example).
1895
1896 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1897
1898 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1899 #if x86_64_TARGET_ARCH
1900 getNonClobberedOperand (CmmLit lit)
1901   | isSuitableFloatingPointLit lit = do
1902     lbl <- getNewLabelNat
1903     let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
1904                                            CmmStaticLit lit])
1905     return (OpAddr (ripRel (ImmCLbl lbl)), code)
1906 #endif
1907 getNonClobberedOperand (CmmLit lit)
1908   | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) =
1909     return (OpImm (litToImm lit), nilOL)
1910 getNonClobberedOperand (CmmLoad mem pk)
1911   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
1912     Amode src mem_code <- getAmode mem
1913     (src',save_code) <-
1914         if (amodeCouldBeClobbered src)
1915                 then do
1916                    tmp <- getNewRegNat wordRep
1917                    return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
1918                            unitOL (LEA I32 (OpAddr src) (OpReg tmp)))
1919                 else
1920                    return (src, nilOL)
1921     return (OpAddr src', save_code `appOL` mem_code)
1922 getNonClobberedOperand e = do
1923     (reg, code) <- getNonClobberedReg e
1924     return (OpReg reg, code)
1925
1926 amodeCouldBeClobbered :: AddrMode -> Bool
1927 amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
1928
1929 regClobbered (RealReg rr) = isFastTrue (freeReg rr)
1930 regClobbered _ = False
1931
1932 -- getOperand: the operand is not required to remain valid across the
1933 -- computation of an arbitrary expression.
1934 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1935 #if x86_64_TARGET_ARCH
1936 getOperand (CmmLit lit)
1937   | isSuitableFloatingPointLit lit = do
1938     lbl <- getNewLabelNat
1939     let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
1940                                            CmmStaticLit lit])
1941     return (OpAddr (ripRel (ImmCLbl lbl)), code)
1942 #endif
1943 getOperand (CmmLit lit)
1944   | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) = do
1945     return (OpImm (litToImm lit), nilOL)
1946 getOperand (CmmLoad mem pk)
1947   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
1948     Amode src mem_code <- getAmode mem
1949     return (OpAddr src, mem_code)
1950 getOperand e = do
1951     (reg, code) <- getSomeReg e
1952     return (OpReg reg, code)
1953
1954 isOperand :: CmmExpr -> Bool
1955 isOperand (CmmLoad _ _) = True
1956 isOperand (CmmLit lit)  = not (is64BitLit lit)
1957                           || isSuitableFloatingPointLit lit
1958 isOperand _             = False
1959
1960 -- if we want a floating-point literal as an operand, we can
1961 -- use it directly from memory.  However, if the literal is
1962 -- zero, we're better off generating it into a register using
1963 -- xor.
1964 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
1965 isSuitableFloatingPointLit _ = False
1966
1967 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
1968 getRegOrMem (CmmLoad mem pk)
1969   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
1970     Amode src mem_code <- getAmode mem
1971     return (OpAddr src, mem_code)
1972 getRegOrMem e = do
1973     (reg, code) <- getNonClobberedReg e
1974     return (OpReg reg, code)
1975
1976 #if x86_64_TARGET_ARCH
1977 is64BitLit (CmmInt i I64) = is64BitInteger i
1978    -- assume that labels are in the range 0-2^31-1: this assumes the
1979    -- small memory model (see gcc docs, -mcmodel=small).
1980 #endif
1981 is64BitLit x = False
1982 #endif
1983
1984 is64BitInteger :: Integer -> Bool
1985 is64BitInteger i = i > 0x7fffffff || i < -0x80000000
1986
1987 -- -----------------------------------------------------------------------------
1988 --  The 'CondCode' type:  Condition codes passed up the tree.
1989
1990 data CondCode = CondCode Bool Cond InstrBlock
1991
1992 -- Set up a condition code for a conditional branch.
1993
1994 getCondCode :: CmmExpr -> NatM CondCode
1995
1996 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1997
1998 #if alpha_TARGET_ARCH
1999 getCondCode = panic "MachCode.getCondCode: not on Alphas"
2000 #endif /* alpha_TARGET_ARCH */
2001
2002 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2003
2004 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
2005 -- yes, they really do seem to want exactly the same!
2006
2007 getCondCode (CmmMachOp mop [x, y])
2008   = ASSERT (cmmExprRep x /= I8) -- tmp, not set up to handle 8-bit comparisons
2009     case mop of
2010       MO_Eq F32 -> condFltCode EQQ x y
2011       MO_Ne F32 -> condFltCode NE  x y
2012
2013       MO_S_Gt F32 -> condFltCode GTT x y
2014       MO_S_Ge F32 -> condFltCode GE  x y
2015       MO_S_Lt F32 -> condFltCode LTT x y
2016       MO_S_Le F32 -> condFltCode LE  x y
2017
2018       MO_Eq F64 -> condFltCode EQQ x y
2019       MO_Ne F64 -> condFltCode NE  x y
2020
2021       MO_S_Gt F64 -> condFltCode GTT x y
2022       MO_S_Ge F64 -> condFltCode GE  x y
2023       MO_S_Lt F64 -> condFltCode LTT x y
2024       MO_S_Le F64 -> condFltCode LE  x y
2025
2026       MO_Eq rep -> condIntCode EQQ  x y
2027       MO_Ne rep -> condIntCode NE   x y
2028
2029       MO_S_Gt rep -> condIntCode GTT  x y
2030       MO_S_Ge rep -> condIntCode GE   x y
2031       MO_S_Lt rep -> condIntCode LTT  x y
2032       MO_S_Le rep -> condIntCode LE   x y
2033
2034       MO_U_Gt rep -> condIntCode GU   x y
2035       MO_U_Ge rep -> condIntCode GEU  x y
2036       MO_U_Lt rep -> condIntCode LU   x y
2037       MO_U_Le rep -> condIntCode LEU  x y
2038
2039       other -> pprPanic "getCondCode(x86,sparc)" (pprMachOp mop)
2040
2041 getCondCode other =  pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
2042
2043 #elif powerpc_TARGET_ARCH
2044
2045 -- almost the same as everywhere else - but we need to
2046 -- extend small integers to 32 bit first
2047
2048 getCondCode (CmmMachOp mop [x, y])
2049   = case mop of
2050       MO_Eq F32 -> condFltCode EQQ x y
2051       MO_Ne F32 -> condFltCode NE  x y
2052
2053       MO_S_Gt F32 -> condFltCode GTT x y
2054       MO_S_Ge F32 -> condFltCode GE  x y
2055       MO_S_Lt F32 -> condFltCode LTT x y
2056       MO_S_Le F32 -> condFltCode LE  x y
2057
2058       MO_Eq F64 -> condFltCode EQQ x y
2059       MO_Ne F64 -> condFltCode NE  x y
2060
2061       MO_S_Gt F64 -> condFltCode GTT x y
2062       MO_S_Ge F64 -> condFltCode GE  x y
2063       MO_S_Lt F64 -> condFltCode LTT x y
2064       MO_S_Le F64 -> condFltCode LE  x y
2065
2066       MO_Eq rep -> condIntCode EQQ  (extendUExpr rep x) (extendUExpr rep y)
2067       MO_Ne rep -> condIntCode NE   (extendUExpr rep x) (extendUExpr rep y)
2068
2069       MO_S_Gt rep -> condIntCode GTT  (extendSExpr rep x) (extendSExpr rep y)
2070       MO_S_Ge rep -> condIntCode GE   (extendSExpr rep x) (extendSExpr rep y)
2071       MO_S_Lt rep -> condIntCode LTT  (extendSExpr rep x) (extendSExpr rep y)
2072       MO_S_Le rep -> condIntCode LE   (extendSExpr rep x) (extendSExpr rep y)
2073
2074       MO_U_Gt rep -> condIntCode GU   (extendUExpr rep x) (extendUExpr rep y)
2075       MO_U_Ge rep -> condIntCode GEU  (extendUExpr rep x) (extendUExpr rep y)
2076       MO_U_Lt rep -> condIntCode LU   (extendUExpr rep x) (extendUExpr rep y)
2077       MO_U_Le rep -> condIntCode LEU  (extendUExpr rep x) (extendUExpr rep y)
2078
2079       other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
2080
2081 getCondCode other =  panic "getCondCode(2)(powerpc)"
2082
2083
2084 #endif
2085
2086
2087 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
2088 -- passed back up the tree.
2089
2090 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
2091
2092 #if alpha_TARGET_ARCH
2093 condIntCode = panic "MachCode.condIntCode: not on Alphas"
2094 condFltCode = panic "MachCode.condFltCode: not on Alphas"
2095 #endif /* alpha_TARGET_ARCH */
2096
2097 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2098 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2099
2100 -- memory vs immediate
2101 condIntCode cond (CmmLoad x pk) (CmmLit lit) | not (is64BitLit lit) = do
2102     Amode x_addr x_code <- getAmode x
2103     let
2104         imm  = litToImm lit
2105         code = x_code `snocOL`
2106                   CMP pk (OpImm imm) (OpAddr x_addr)
2107     --
2108     return (CondCode False cond code)
2109
2110 -- anything vs zero
2111 condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
2112     (x_reg, x_code) <- getSomeReg x
2113     let
2114         code = x_code `snocOL`
2115                   TEST pk (OpReg x_reg) (OpReg x_reg)
2116     --
2117     return (CondCode False cond code)
2118
2119 -- anything vs operand
2120 condIntCode cond x y | isOperand y = do
2121     (x_reg, x_code) <- getNonClobberedReg x
2122     (y_op,  y_code) <- getOperand y
2123     let
2124         code = x_code `appOL` y_code `snocOL`
2125                   CMP (cmmExprRep x) y_op (OpReg x_reg)
2126     -- in
2127     return (CondCode False cond code)
2128
2129 -- anything vs anything
2130 condIntCode cond x y = do
2131   (y_reg, y_code) <- getNonClobberedReg y
2132   (x_op, x_code) <- getRegOrMem x
2133   let
2134         code = y_code `appOL`
2135                x_code `snocOL`
2136                   CMP (cmmExprRep x) (OpReg y_reg) x_op
2137   -- in
2138   return (CondCode False cond code)
2139 #endif
2140
2141 #if i386_TARGET_ARCH
2142 condFltCode cond x y
2143   = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
2144   (x_reg, x_code) <- getNonClobberedReg x
2145   (y_reg, y_code) <- getSomeReg y
2146   let
2147         code = x_code `appOL` y_code `snocOL`
2148                 GCMP cond x_reg y_reg
2149   -- The GCMP insn does the test and sets the zero flag if comparable
2150   -- and true.  Hence we always supply EQQ as the condition to test.
2151   return (CondCode True EQQ code)
2152 #endif /* i386_TARGET_ARCH */
2153
2154 #if x86_64_TARGET_ARCH
2155 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
2156 -- an operand, but the right must be a reg.  We can probably do better
2157 -- than this general case...
2158 condFltCode cond x y = do
2159   (x_reg, x_code) <- getNonClobberedReg x
2160   (y_op, y_code) <- getOperand y
2161   let
2162         code = x_code `appOL`
2163                y_code `snocOL`
2164                   CMP (cmmExprRep x) y_op (OpReg x_reg)
2165         -- NB(1): we need to use the unsigned comparison operators on the
2166         -- result of this comparison.
2167   -- in
2168   return (CondCode True (condToUnsigned cond) code)
2169 #endif
2170
2171 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2172
2173 #if sparc_TARGET_ARCH
2174
2175 condIntCode cond x (CmmLit (CmmInt y rep))
2176   | fits13Bits y
2177   = do
2178        (src1, code) <- getSomeReg x
2179        let
2180            src2 = ImmInt (fromInteger y)
2181            code' = code `snocOL` SUB False True src1 (RIImm src2) g0
2182        return (CondCode False cond code')
2183
2184 condIntCode cond x y = do
2185     (src1, code1) <- getSomeReg x
2186     (src2, code2) <- getSomeReg y
2187     let
2188         code__2 = code1 `appOL` code2 `snocOL`
2189                   SUB False True src1 (RIReg src2) g0
2190     return (CondCode False cond code__2)
2191
2192 -----------
2193 condFltCode cond x y = do
2194     (src1, code1) <- getSomeReg x
2195     (src2, code2) <- getSomeReg y
2196     tmp <- getNewRegNat F64
2197     let
2198         promote x = FxTOy F32 F64 x tmp
2199
2200         pk1   = cmmExprRep x
2201         pk2   = cmmExprRep y
2202
2203         code__2 =
2204                 if pk1 == pk2 then
2205                     code1 `appOL` code2 `snocOL`
2206                     FCMP True pk1 src1 src2
2207                 else if pk1 == F32 then
2208                     code1 `snocOL` promote src1 `appOL` code2 `snocOL`
2209                     FCMP True F64 tmp src2
2210                 else
2211                     code1 `appOL` code2 `snocOL` promote src2 `snocOL`
2212                     FCMP True F64 src1 tmp
2213     return (CondCode True cond code__2)
2214
2215 #endif /* sparc_TARGET_ARCH */
2216
2217 #if powerpc_TARGET_ARCH
2218 --  ###FIXME: I16 and I8!
2219 condIntCode cond x (CmmLit (CmmInt y rep))
2220   | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
2221   = do
2222         (src1, code) <- getSomeReg x
2223         let
2224             code' = code `snocOL`
2225                 (if condUnsigned cond then CMPL else CMP) I32 src1 (RIImm src2)
2226         return (CondCode False cond code')
2227
2228 condIntCode cond x y = do
2229     (src1, code1) <- getSomeReg x
2230     (src2, code2) <- getSomeReg y
2231     let
2232         code' = code1 `appOL` code2 `snocOL`
2233                   (if condUnsigned cond then CMPL else CMP) I32 src1 (RIReg src2)
2234     return (CondCode False cond code')
2235
2236 condFltCode cond x y = do
2237     (src1, code1) <- getSomeReg x
2238     (src2, code2) <- getSomeReg y
2239     let
2240         code'  = code1 `appOL` code2 `snocOL` FCMP src1 src2
2241         code'' = case cond of -- twiddle CR to handle unordered case
2242                     GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
2243                     LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
2244                     _ -> code'
2245                  where
2246                     ltbit = 0 ; eqbit = 2 ; gtbit = 1
2247     return (CondCode True cond code'')
2248
2249 #endif /* powerpc_TARGET_ARCH */
2250
2251 -- -----------------------------------------------------------------------------
2252 -- Generating assignments
2253
2254 -- Assignments are really at the heart of the whole code generation
2255 -- business.  Almost all top-level nodes of any real importance are
2256 -- assignments, which correspond to loads, stores, or register
2257 -- transfers.  If we're really lucky, some of the register transfers
2258 -- will go away, because we can use the destination register to
2259 -- complete the code generation for the right hand side.  This only
2260 -- fails when the right hand side is forced into a fixed register
2261 -- (e.g. the result of a call).
2262
2263 assignMem_IntCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2264 assignReg_IntCode :: MachRep -> CmmReg  -> CmmExpr -> NatM InstrBlock
2265
2266 assignMem_FltCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2267 assignReg_FltCode :: MachRep -> CmmReg  -> CmmExpr -> NatM InstrBlock
2268
2269 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2270
2271 #if alpha_TARGET_ARCH
2272
2273 assignIntCode pk (CmmLoad dst _) src
2274   = getNewRegNat IntRep             `thenNat` \ tmp ->
2275     getAmode dst                    `thenNat` \ amode ->
2276     getRegister src                 `thenNat` \ register ->
2277     let
2278         code1   = amodeCode amode []
2279         dst__2  = amodeAddr amode
2280         code2   = registerCode register tmp []
2281         src__2  = registerName register tmp
2282         sz      = primRepToSize pk
2283         code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2284     in
2285     return code__2
2286
2287 assignIntCode pk dst src
2288   = getRegister dst                         `thenNat` \ register1 ->
2289     getRegister src                         `thenNat` \ register2 ->
2290     let
2291         dst__2  = registerName register1 zeroh
2292         code    = registerCode register2 dst__2
2293         src__2  = registerName register2 dst__2
2294         code__2 = if isFixed register2
2295                   then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2)
2296                   else code
2297     in
2298     return code__2
2299
2300 #endif /* alpha_TARGET_ARCH */
2301
2302 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2303
2304 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2305
2306 -- integer assignment to memory
2307 assignMem_IntCode pk addr src = do
2308     Amode addr code_addr <- getAmode addr
2309     (code_src, op_src)   <- get_op_RI src
2310     let
2311         code = code_src `appOL`
2312                code_addr `snocOL`
2313                   MOV pk op_src (OpAddr addr)
2314         -- NOTE: op_src is stable, so it will still be valid
2315         -- after code_addr.  This may involve the introduction
2316         -- of an extra MOV to a temporary register, but we hope
2317         -- the register allocator will get rid of it.
2318     --
2319     return code
2320   where
2321     get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand)   -- code, operator
2322     get_op_RI (CmmLit lit) | not (is64BitLit lit)
2323       = return (nilOL, OpImm (litToImm lit))
2324     get_op_RI op
2325       = do (reg,code) <- getNonClobberedReg op
2326            return (code, OpReg reg)
2327
2328
2329 -- Assign; dst is a reg, rhs is mem
2330 assignReg_IntCode pk reg (CmmLoad src _) = do
2331   load_code <- intLoadCode (MOV pk) src
2332   return (load_code (getRegisterReg reg))
2333
2334 -- dst is a reg, but src could be anything
2335 assignReg_IntCode pk reg src = do
2336   code <- getAnyReg src
2337   return (code (getRegisterReg reg))
2338
2339 #endif /* i386_TARGET_ARCH */
2340
2341 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2342
2343 #if sparc_TARGET_ARCH
2344
2345 assignMem_IntCode pk addr src = do
2346     (srcReg, code) <- getSomeReg src
2347     Amode dstAddr addr_code <- getAmode addr
2348     return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2349
2350 assignReg_IntCode pk reg src = do
2351     r <- getRegister src
2352     return $ case r of
2353         Any _ code         -> code dst
2354         Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg dst) freg
2355     where
2356       dst = getRegisterReg reg
2357
2358
2359 #endif /* sparc_TARGET_ARCH */
2360
2361 #if powerpc_TARGET_ARCH
2362
2363 assignMem_IntCode pk addr src = do
2364     (srcReg, code) <- getSomeReg src
2365     Amode dstAddr addr_code <- getAmode addr
2366     return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2367
2368 -- dst is a reg, but src could be anything
2369 assignReg_IntCode pk reg src
2370     = do
2371         r <- getRegister src
2372         return $ case r of
2373             Any _ code         -> code dst
2374             Fixed _ freg fcode -> fcode `snocOL` MR dst freg
2375     where
2376         dst = getRegisterReg reg
2377
2378 #endif /* powerpc_TARGET_ARCH */
2379
2380
2381 -- -----------------------------------------------------------------------------
2382 -- Floating-point assignments
2383
2384 #if alpha_TARGET_ARCH
2385
2386 assignFltCode pk (CmmLoad dst _) src
2387   = getNewRegNat pk                 `thenNat` \ tmp ->
2388     getAmode dst                    `thenNat` \ amode ->
2389     getRegister src                         `thenNat` \ register ->
2390     let
2391         code1   = amodeCode amode []
2392         dst__2  = amodeAddr amode
2393         code2   = registerCode register tmp []
2394         src__2  = registerName register tmp
2395         sz      = primRepToSize pk
2396         code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2397     in
2398     return code__2
2399
2400 assignFltCode pk dst src
2401   = getRegister dst                         `thenNat` \ register1 ->
2402     getRegister src                         `thenNat` \ register2 ->
2403     let
2404         dst__2  = registerName register1 zeroh
2405         code    = registerCode register2 dst__2
2406         src__2  = registerName register2 dst__2
2407         code__2 = if isFixed register2
2408                   then code . mkSeqInstr (FMOV src__2 dst__2)
2409                   else code
2410     in
2411     return code__2
2412
2413 #endif /* alpha_TARGET_ARCH */
2414
2415 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2416
2417 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2418
2419 -- Floating point assignment to memory
2420 assignMem_FltCode pk addr src = do
2421   (src_reg, src_code) <- getNonClobberedReg src
2422   Amode addr addr_code <- getAmode addr
2423   let
2424         code = src_code `appOL`
2425                addr_code `snocOL`
2426                 IF_ARCH_i386(GST pk src_reg addr,
2427                              MOV pk (OpReg src_reg) (OpAddr addr))
2428   return code
2429
2430 -- Floating point assignment to a register/temporary
2431 assignReg_FltCode pk reg src = do
2432   src_code <- getAnyReg src
2433   return (src_code (getRegisterReg reg))
2434
2435 #endif /* i386_TARGET_ARCH */
2436
2437 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2438
2439 #if sparc_TARGET_ARCH
2440
2441 -- Floating point assignment to memory
2442 assignMem_FltCode pk addr src = do
2443     Amode dst__2 code1 <- getAmode addr
2444     (src__2, code2) <- getSomeReg src
2445     tmp1 <- getNewRegNat pk
2446     let
2447         pk__2   = cmmExprRep src
2448         code__2 = code1 `appOL` code2 `appOL`
2449             if   pk == pk__2
2450             then unitOL (ST pk src__2 dst__2)
2451             else toOL [FxTOy pk__2 pk src__2 tmp1, ST pk tmp1 dst__2]
2452     return code__2
2453
2454 -- Floating point assignment to a register/temporary
2455 -- ToDo: Verify correctness
2456 assignReg_FltCode pk reg src = do
2457     r <- getRegister src
2458     v1 <- getNewRegNat pk
2459     return $ case r of
2460         Any _ code         -> code dst
2461         Fixed _ freg fcode -> fcode `snocOL` FMOV pk freg v1
2462     where
2463       dst = getRegisterReg reg
2464
2465 #endif /* sparc_TARGET_ARCH */
2466
2467 #if powerpc_TARGET_ARCH
2468
2469 -- Easy, isn't it?
2470 assignMem_FltCode = assignMem_IntCode
2471 assignReg_FltCode = assignReg_IntCode
2472
2473 #endif /* powerpc_TARGET_ARCH */
2474
2475
2476 -- -----------------------------------------------------------------------------
2477 -- Generating an non-local jump
2478
2479 -- (If applicable) Do not fill the delay slots here; you will confuse the
2480 -- register allocator.
2481
2482 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
2483
2484 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2485
2486 #if alpha_TARGET_ARCH
2487
2488 genJump (CmmLabel lbl)
2489   | isAsmTemp lbl = returnInstr (BR target)
2490   | otherwise     = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0]
2491   where
2492     target = ImmCLbl lbl
2493
2494 genJump tree
2495   = getRegister tree                `thenNat` \ register ->
2496     getNewRegNat PtrRep             `thenNat` \ tmp ->
2497     let
2498         dst    = registerName register pv
2499         code   = registerCode register pv
2500         target = registerName register pv
2501     in
2502     if isFixed register then
2503         returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0]
2504     else
2505     return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0))
2506
2507 #endif /* alpha_TARGET_ARCH */
2508
2509 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2510
2511 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2512
2513 genJump (CmmLoad mem pk) = do
2514   Amode target code <- getAmode mem
2515   return (code `snocOL` JMP (OpAddr target))
2516
2517 genJump (CmmLit lit) = do
2518   return (unitOL (JMP (OpImm (litToImm lit))))
2519
2520 genJump expr = do
2521   (reg,code) <- getSomeReg expr
2522   return (code `snocOL` JMP (OpReg reg))
2523
2524 #endif /* i386_TARGET_ARCH */
2525
2526 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2527
2528 #if sparc_TARGET_ARCH
2529
2530 genJump (CmmLit (CmmLabel lbl))
2531   = return (toOL [CALL (Left target) 0 True, NOP])
2532   where
2533     target = ImmCLbl lbl
2534
2535 genJump tree
2536   = do
2537         (target, code) <- getSomeReg tree
2538         return (code `snocOL` JMP (AddrRegReg target g0)  `snocOL` NOP)
2539
2540 #endif /* sparc_TARGET_ARCH */
2541
2542 #if powerpc_TARGET_ARCH
2543 genJump (CmmLit (CmmLabel lbl))
2544   = return (unitOL $ JMP lbl)
2545
2546 genJump tree
2547   = do
2548         (target,code) <- getSomeReg tree
2549         return (code `snocOL` MTCTR target `snocOL` BCTR [])
2550 #endif /* powerpc_TARGET_ARCH */
2551
2552
2553 -- -----------------------------------------------------------------------------
2554 --  Unconditional branches
2555
2556 genBranch :: BlockId -> NatM InstrBlock
2557
2558 #if alpha_TARGET_ARCH
2559 genBranch id = return (unitOL (BR id))
2560 #endif
2561
2562 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2563 genBranch id = return (unitOL (JXX ALWAYS id))
2564 #endif
2565
2566 #if sparc_TARGET_ARCH
2567 genBranch (BlockId id) = return (toOL [BI ALWAYS False (ImmCLbl (mkAsmTempLabel id)), NOP])
2568 #endif
2569
2570 #if powerpc_TARGET_ARCH
2571 genBranch id = return (unitOL (BCC ALWAYS id))
2572 #endif
2573
2574
2575 -- -----------------------------------------------------------------------------
2576 --  Conditional jumps
2577
2578 {-
2579 Conditional jumps are always to local labels, so we can use branch
2580 instructions.  We peek at the arguments to decide what kind of
2581 comparison to do.
2582
2583 ALPHA: For comparisons with 0, we're laughing, because we can just do
2584 the desired conditional branch.
2585
2586 I386: First, we have to ensure that the condition
2587 codes are set according to the supplied comparison operation.
2588
2589 SPARC: First, we have to ensure that the condition codes are set
2590 according to the supplied comparison operation.  We generate slightly
2591 different code for floating point comparisons, because a floating
2592 point operation cannot directly precede a @BF@.  We assume the worst
2593 and fill that slot with a @NOP@.
2594
2595 SPARC: Do not fill the delay slots here; you will confuse the register
2596 allocator.
2597 -}
2598
2599
2600 genCondJump
2601     :: BlockId      -- the branch target
2602     -> CmmExpr      -- the condition on which to branch
2603     -> NatM InstrBlock
2604
2605 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2606
2607 #if alpha_TARGET_ARCH
2608
2609 genCondJump id (StPrim op [x, StInt 0])
2610   = getRegister x                           `thenNat` \ register ->
2611     getNewRegNat (registerRep register)
2612                                     `thenNat` \ tmp ->
2613     let
2614         code   = registerCode register tmp
2615         value  = registerName register tmp
2616         pk     = registerRep register
2617         target = ImmCLbl lbl
2618     in
2619     returnSeq code [BI (cmpOp op) value target]
2620   where
2621     cmpOp CharGtOp = GTT
2622     cmpOp CharGeOp = GE
2623     cmpOp CharEqOp = EQQ
2624     cmpOp CharNeOp = NE
2625     cmpOp CharLtOp = LTT
2626     cmpOp CharLeOp = LE
2627     cmpOp IntGtOp = GTT
2628     cmpOp IntGeOp = GE
2629     cmpOp IntEqOp = EQQ
2630     cmpOp IntNeOp = NE
2631     cmpOp IntLtOp = LTT
2632     cmpOp IntLeOp = LE
2633     cmpOp WordGtOp = NE
2634     cmpOp WordGeOp = ALWAYS
2635     cmpOp WordEqOp = EQQ
2636     cmpOp WordNeOp = NE
2637     cmpOp WordLtOp = NEVER
2638     cmpOp WordLeOp = EQQ
2639     cmpOp AddrGtOp = NE
2640     cmpOp AddrGeOp = ALWAYS
2641     cmpOp AddrEqOp = EQQ
2642     cmpOp AddrNeOp = NE
2643     cmpOp AddrLtOp = NEVER
2644     cmpOp AddrLeOp = EQQ
2645
2646 genCondJump lbl (StPrim op [x, StDouble 0.0])
2647   = getRegister x                           `thenNat` \ register ->
2648     getNewRegNat (registerRep register)
2649                                     `thenNat` \ tmp ->
2650     let
2651         code   = registerCode register tmp
2652         value  = registerName register tmp
2653         pk     = registerRep register
2654         target = ImmCLbl lbl
2655     in
2656     return (code . mkSeqInstr (BF (cmpOp op) value target))
2657   where
2658     cmpOp FloatGtOp = GTT
2659     cmpOp FloatGeOp = GE
2660     cmpOp FloatEqOp = EQQ
2661     cmpOp FloatNeOp = NE
2662     cmpOp FloatLtOp = LTT
2663     cmpOp FloatLeOp = LE
2664     cmpOp DoubleGtOp = GTT
2665     cmpOp DoubleGeOp = GE
2666     cmpOp DoubleEqOp = EQQ
2667     cmpOp DoubleNeOp = NE
2668     cmpOp DoubleLtOp = LTT
2669     cmpOp DoubleLeOp = LE
2670
2671 genCondJump lbl (StPrim op [x, y])
2672   | fltCmpOp op
2673   = trivialFCode pr instr x y       `thenNat` \ register ->
2674     getNewRegNat F64                `thenNat` \ tmp ->
2675     let
2676         code   = registerCode register tmp
2677         result = registerName register tmp
2678         target = ImmCLbl lbl
2679     in
2680     return (code . mkSeqInstr (BF cond result target))
2681   where
2682     pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
2683
2684     fltCmpOp op = case op of
2685         FloatGtOp -> True
2686         FloatGeOp -> True
2687         FloatEqOp -> True
2688         FloatNeOp -> True
2689         FloatLtOp -> True
2690         FloatLeOp -> True
2691         DoubleGtOp -> True
2692         DoubleGeOp -> True
2693         DoubleEqOp -> True
2694         DoubleNeOp -> True
2695         DoubleLtOp -> True
2696         DoubleLeOp -> True
2697         _ -> False
2698     (instr, cond) = case op of
2699         FloatGtOp -> (FCMP TF LE, EQQ)
2700         FloatGeOp -> (FCMP TF LTT, EQQ)
2701         FloatEqOp -> (FCMP TF EQQ, NE)
2702         FloatNeOp -> (FCMP TF EQQ, EQQ)
2703         FloatLtOp -> (FCMP TF LTT, NE)
2704         FloatLeOp -> (FCMP TF LE, NE)
2705         DoubleGtOp -> (FCMP TF LE, EQQ)
2706         DoubleGeOp -> (FCMP TF LTT, EQQ)
2707         DoubleEqOp -> (FCMP TF EQQ, NE)
2708         DoubleNeOp -> (FCMP TF EQQ, EQQ)
2709         DoubleLtOp -> (FCMP TF LTT, NE)
2710         DoubleLeOp -> (FCMP TF LE, NE)
2711
2712 genCondJump lbl (StPrim op [x, y])
2713   = trivialCode instr x y           `thenNat` \ register ->
2714     getNewRegNat IntRep             `thenNat` \ tmp ->
2715     let
2716         code   = registerCode register tmp
2717         result = registerName register tmp
2718         target = ImmCLbl lbl
2719     in
2720     return (code . mkSeqInstr (BI cond result target))
2721   where
2722     (instr, cond) = case op of
2723         CharGtOp -> (CMP LE, EQQ)
2724         CharGeOp -> (CMP LTT, EQQ)
2725         CharEqOp -> (CMP EQQ, NE)
2726         CharNeOp -> (CMP EQQ, EQQ)
2727         CharLtOp -> (CMP LTT, NE)
2728         CharLeOp -> (CMP LE, NE)
2729         IntGtOp -> (CMP LE, EQQ)
2730         IntGeOp -> (CMP LTT, EQQ)
2731         IntEqOp -> (CMP EQQ, NE)
2732         IntNeOp -> (CMP EQQ, EQQ)
2733         IntLtOp -> (CMP LTT, NE)
2734         IntLeOp -> (CMP LE, NE)
2735         WordGtOp -> (CMP ULE, EQQ)
2736         WordGeOp -> (CMP ULT, EQQ)
2737         WordEqOp -> (CMP EQQ, NE)
2738         WordNeOp -> (CMP EQQ, EQQ)
2739         WordLtOp -> (CMP ULT, NE)
2740         WordLeOp -> (CMP ULE, NE)
2741         AddrGtOp -> (CMP ULE, EQQ)
2742         AddrGeOp -> (CMP ULT, EQQ)
2743         AddrEqOp -> (CMP EQQ, NE)
2744         AddrNeOp -> (CMP EQQ, EQQ)
2745         AddrLtOp -> (CMP ULT, NE)
2746         AddrLeOp -> (CMP ULE, NE)
2747
2748 #endif /* alpha_TARGET_ARCH */
2749
2750 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2751
2752 #if i386_TARGET_ARCH
2753
2754 genCondJump id bool = do
2755   CondCode _ cond code <- getCondCode bool
2756   return (code `snocOL` JXX cond id)
2757
2758 #endif
2759
2760 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2761
2762 #if x86_64_TARGET_ARCH
2763
2764 genCondJump id bool = do
2765   CondCode is_float cond cond_code <- getCondCode bool
2766   if not is_float
2767     then
2768         return (cond_code `snocOL` JXX cond id)
2769     else do
2770         lbl <- getBlockIdNat
2771
2772         -- see comment with condFltReg
2773         let code = case cond of
2774                         NE  -> or_unordered
2775                         GU  -> plain_test
2776                         GEU -> plain_test
2777                         _   -> and_ordered
2778
2779             plain_test = unitOL (
2780                   JXX cond id
2781                 )
2782             or_unordered = toOL [
2783                   JXX cond id,
2784                   JXX PARITY id
2785                 ]
2786             and_ordered = toOL [
2787                   JXX PARITY lbl,
2788                   JXX cond id,
2789                   JXX ALWAYS lbl,
2790                   NEWBLOCK lbl
2791                 ]
2792         return (cond_code `appOL` code)
2793
2794 #endif
2795
2796 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2797
2798 #if sparc_TARGET_ARCH
2799
2800 genCondJump (BlockId id) bool = do
2801   CondCode is_float cond code <- getCondCode bool
2802   return (
2803        code `appOL`
2804        toOL (
2805          if   is_float
2806          then [NOP, BF cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2807          else [BI cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2808        )
2809     )
2810
2811 #endif /* sparc_TARGET_ARCH */
2812
2813
2814 #if powerpc_TARGET_ARCH
2815
2816 genCondJump id bool = do
2817   CondCode is_float cond code <- getCondCode bool
2818   return (code `snocOL` BCC cond id)
2819
2820 #endif /* powerpc_TARGET_ARCH */
2821
2822
2823 -- -----------------------------------------------------------------------------
2824 --  Generating C calls
2825
2826 -- Now the biggest nightmare---calls.  Most of the nastiness is buried in
2827 -- @get_arg@, which moves the arguments to the correct registers/stack
2828 -- locations.  Apart from that, the code is easy.
2829 --
2830 -- (If applicable) Do not fill the delay slots here; you will confuse the
2831 -- register allocator.
2832
2833 genCCall
2834     :: CmmCallTarget            -- function to call
2835     -> [(CmmReg,MachHint)]      -- where to put the result
2836     -> [(CmmExpr,MachHint)]     -- arguments (of mixed type)
2837     -> Maybe [GlobalReg]        -- volatile regs to save
2838     -> NatM InstrBlock
2839
2840 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2841
2842 #if alpha_TARGET_ARCH
2843
2844 ccallResultRegs =
2845
2846 genCCall fn cconv result_regs args
2847   = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args
2848                           `thenNat` \ ((unused,_), argCode) ->
2849     let
2850         nRegs = length allArgRegs - length unused
2851         code = asmSeqThen (map ($ []) argCode)
2852     in
2853         returnSeq code [
2854             LDA pv (AddrImm (ImmLab (ptext fn))),
2855             JSR ra (AddrReg pv) nRegs,
2856             LDGP gp (AddrReg ra)]
2857   where
2858     ------------------------
2859     {-  Try to get a value into a specific register (or registers) for
2860         a call.  The first 6 arguments go into the appropriate
2861         argument register (separate registers for integer and floating
2862         point arguments, but used in lock-step), and the remaining
2863         arguments are dumped to the stack, beginning at 0(sp).  Our
2864         first argument is a pair of the list of remaining argument
2865         registers to be assigned for this call and the next stack
2866         offset to use for overflowing arguments.  This way,
2867         @get_Arg@ can be applied to all of a call's arguments using
2868         @mapAccumLNat@.
2869     -}
2870     get_arg
2871         :: ([(Reg,Reg)], Int)   -- Argument registers and stack offset (accumulator)
2872         -> StixTree             -- Current argument
2873         -> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code
2874
2875     -- We have to use up all of our argument registers first...
2876
2877     get_arg ((iDst,fDst):dsts, offset) arg
2878       = getRegister arg                     `thenNat` \ register ->
2879         let
2880             reg  = if isFloatingRep pk then fDst else iDst
2881             code = registerCode register reg
2882             src  = registerName register reg
2883             pk   = registerRep register
2884         in
2885         return (
2886             if isFloatingRep pk then
2887                 ((dsts, offset), if isFixed register then
2888                     code . mkSeqInstr (FMOV src fDst)
2889                     else code)
2890             else
2891                 ((dsts, offset), if isFixed register then
2892                     code . mkSeqInstr (OR src (RIReg src) iDst)
2893                     else code))
2894
2895     -- Once we have run out of argument registers, we move to the
2896     -- stack...
2897
2898     get_arg ([], offset) arg
2899       = getRegister arg                 `thenNat` \ register ->
2900         getNewRegNat (registerRep register)
2901                                         `thenNat` \ tmp ->
2902         let
2903             code = registerCode register tmp
2904             src  = registerName register tmp
2905             pk   = registerRep register
2906             sz   = primRepToSize pk
2907         in
2908         return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset)))
2909
2910 #endif /* alpha_TARGET_ARCH */
2911
2912 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2913
2914 #if i386_TARGET_ARCH
2915
2916 -- we only cope with a single result for foreign calls
2917 genCCall (CmmPrim op) [(r,_)] args vols = do
2918   case op of
2919         MO_F32_Sqrt -> actuallyInlineFloatOp F32  (GSQRT F32) args
2920         MO_F64_Sqrt -> actuallyInlineFloatOp F64 (GSQRT F64) args
2921
2922         MO_F32_Sin  -> actuallyInlineFloatOp F32  (GSIN F32) args
2923         MO_F64_Sin  -> actuallyInlineFloatOp F64 (GSIN F64) args
2924
2925         MO_F32_Cos  -> actuallyInlineFloatOp F32  (GCOS F32) args
2926         MO_F64_Cos  -> actuallyInlineFloatOp F64 (GCOS F64) args
2927
2928         MO_F32_Tan  -> actuallyInlineFloatOp F32  (GTAN F32) args
2929         MO_F64_Tan  -> actuallyInlineFloatOp F64 (GTAN F64) args
2930
2931         other_op    -> outOfLineFloatOp op r args vols
2932  where
2933   actuallyInlineFloatOp rep instr [(x,_)]
2934         = do res <- trivialUFCode rep instr x
2935              any <- anyReg res
2936              return (any (getRegisterReg r))
2937
2938 genCCall target dest_regs args vols = do
2939     let
2940         sizes               = map (arg_size . cmmExprRep . fst) (reverse args)
2941 #if !darwin_TARGET_OS
2942         tot_arg_size        = sum sizes
2943 #else
2944         raw_arg_size        = sum sizes
2945         tot_arg_size        = roundTo 16 raw_arg_size
2946         arg_pad_size        = tot_arg_size - raw_arg_size
2947     delta0 <- getDeltaNat
2948     setDeltaNat (delta0 - arg_pad_size)
2949 #endif
2950
2951     push_codes <- mapM push_arg (reverse args)
2952     delta <- getDeltaNat
2953
2954     -- in
2955     -- deal with static vs dynamic call targets
2956     (callinsns,cconv) <-
2957       case target of
2958         -- CmmPrim -> ...
2959         CmmForeignCall (CmmLit (CmmLabel lbl)) conv
2960            -> -- ToDo: stdcall arg sizes
2961               return (unitOL (CALL (Left fn_imm) []), conv)
2962            where fn_imm = ImmCLbl lbl
2963         CmmForeignCall expr conv
2964            -> do (dyn_c, dyn_r, dyn_rep) <- get_op expr
2965                  ASSERT(dyn_rep == I32)
2966                   return (dyn_c `snocOL` CALL (Right dyn_r) [], conv)
2967
2968     let push_code
2969 #if darwin_TARGET_OS
2970             | arg_pad_size /= 0
2971             = toOL [SUB I32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
2972                     DELTA (delta0 - arg_pad_size)]
2973               `appOL` concatOL push_codes
2974             | otherwise
2975 #endif
2976             = concatOL push_codes
2977         call = callinsns `appOL`
2978                toOL (
2979                         -- Deallocate parameters after call for ccall;
2980                         -- but not for stdcall (callee does it)
2981                   (if cconv == StdCallConv || tot_arg_size==0 then [] else
2982                    [ADD I32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
2983                   ++
2984                   [DELTA (delta + tot_arg_size)]
2985                )
2986     -- in
2987     setDeltaNat (delta + tot_arg_size)
2988
2989     let
2990         -- assign the results, if necessary
2991         assign_code []     = nilOL
2992         assign_code [(dest,_hint)] =
2993           case rep of
2994                 I64 -> toOL [MOV I32 (OpReg eax) (OpReg r_dest),
2995                              MOV I32 (OpReg edx) (OpReg r_dest_hi)]
2996                 F32 -> unitOL (GMOV fake0 r_dest)
2997                 F64 -> unitOL (GMOV fake0 r_dest)
2998                 rep -> unitOL (MOV rep (OpReg eax) (OpReg r_dest))
2999           where
3000                 r_dest_hi = getHiVRegFromLo r_dest
3001                 rep = cmmRegRep dest
3002                 r_dest = getRegisterReg dest
3003         assign_code many = panic "genCCall.assign_code many"
3004
3005     return (push_code `appOL`
3006             call `appOL`
3007             assign_code dest_regs)
3008
3009   where
3010     arg_size F64 = 8
3011     arg_size F32 = 4
3012     arg_size I64 = 8
3013     arg_size _   = 4
3014
3015     roundTo a x | x `mod` a == 0 = x
3016                 | otherwise = x + a - (x `mod` a)
3017
3018
3019     push_arg :: (CmmExpr,MachHint){-current argument-}
3020                     -> NatM InstrBlock  -- code
3021
3022     push_arg (arg,_hint) -- we don't need the hints on x86
3023       | arg_rep == I64 = do
3024         ChildCode64 code r_lo <- iselExpr64 arg
3025         delta <- getDeltaNat
3026         setDeltaNat (delta - 8)
3027         let
3028             r_hi = getHiVRegFromLo r_lo
3029         -- in
3030         return (       code `appOL`
3031                        toOL [PUSH I32 (OpReg r_hi), DELTA (delta - 4),
3032                              PUSH I32 (OpReg r_lo), DELTA (delta - 8),
3033                              DELTA (delta-8)]
3034             )
3035
3036       | otherwise = do
3037         (code, reg, sz) <- get_op arg
3038         delta <- getDeltaNat
3039         let size = arg_size sz
3040         setDeltaNat (delta-size)
3041         if (case sz of F64 -> True; F32 -> True; _ -> False)
3042            then return (code `appOL`
3043                         toOL [SUB I32 (OpImm (ImmInt size)) (OpReg esp),
3044                               DELTA (delta-size),
3045                               GST sz reg (AddrBaseIndex (EABaseReg esp)
3046                                                         EAIndexNone
3047                                                         (ImmInt 0))]
3048                        )
3049            else return (code `snocOL`
3050                         PUSH I32 (OpReg reg) `snocOL`
3051                         DELTA (delta-size)
3052                        )
3053       where
3054          arg_rep = cmmExprRep arg
3055
3056     ------------
3057     get_op :: CmmExpr -> NatM (InstrBlock, Reg, MachRep) -- code, reg, size
3058     get_op op = do
3059         (reg,code) <- getSomeReg op
3060         return (code, reg, cmmExprRep op)
3061
3062 #endif /* i386_TARGET_ARCH */
3063
3064 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3065
3066 outOfLineFloatOp :: CallishMachOp -> CmmReg -> [(CmmExpr,MachHint)]
3067   -> Maybe [GlobalReg] -> NatM InstrBlock
3068 outOfLineFloatOp mop res args vols
3069   = do
3070       targetExpr <- cmmMakeDynamicReference addImportNat True lbl
3071       let target = CmmForeignCall targetExpr CCallConv
3072
3073       if cmmRegRep res == F64
3074         then
3075           stmtToInstrs (CmmCall target [(res,FloatHint)] args vols)
3076         else do
3077           uq <- getUniqueNat
3078           let
3079             tmp = CmmLocal (LocalReg uq F64)
3080           -- in
3081           code1 <- stmtToInstrs (CmmCall target [(tmp,FloatHint)]
3082                                          (map promote args) vols)
3083           code2 <- stmtToInstrs (CmmAssign res (demote (CmmReg tmp)))
3084           return (code1 `appOL` code2)
3085   where
3086 #if i386_TARGET_ARCH
3087         promote (x,hint) = (CmmMachOp (MO_S_Conv F32 F64) [x], hint)
3088         demote  x = CmmMachOp (MO_S_Conv F64 F32) [x]
3089 #else
3090         promote (x,hint) = (x,hint)
3091         demote  x = x
3092 #endif
3093
3094         lbl = mkForeignLabel fn Nothing True
3095
3096         fn = case mop of
3097               MO_F32_Sqrt  -> FSLIT("sqrtf")
3098               MO_F32_Sin   -> FSLIT("sinf")
3099               MO_F32_Cos   -> FSLIT("cosf")
3100               MO_F32_Tan   -> FSLIT("tanf")
3101               MO_F32_Exp   -> FSLIT("expf")
3102               MO_F32_Log   -> FSLIT("logf")
3103
3104               MO_F32_Asin  -> FSLIT("asinf")
3105               MO_F32_Acos  -> FSLIT("acosf")
3106               MO_F32_Atan  -> FSLIT("atanf")
3107
3108               MO_F32_Sinh  -> FSLIT("sinhf")
3109               MO_F32_Cosh  -> FSLIT("coshf")
3110               MO_F32_Tanh  -> FSLIT("tanhf")
3111               MO_F32_Pwr   -> FSLIT("powf")
3112
3113               MO_F64_Sqrt  -> FSLIT("sqrt")
3114               MO_F64_Sin   -> FSLIT("sin")
3115               MO_F64_Cos   -> FSLIT("cos")
3116               MO_F64_Tan   -> FSLIT("tan")
3117               MO_F64_Exp   -> FSLIT("exp")
3118               MO_F64_Log   -> FSLIT("log")
3119
3120               MO_F64_Asin  -> FSLIT("asin")
3121               MO_F64_Acos  -> FSLIT("acos")
3122               MO_F64_Atan  -> FSLIT("atan")
3123
3124               MO_F64_Sinh  -> FSLIT("sinh")
3125               MO_F64_Cosh  -> FSLIT("cosh")
3126               MO_F64_Tanh  -> FSLIT("tanh")
3127               MO_F64_Pwr   -> FSLIT("pow")
3128
3129 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
3130
3131 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3132
3133 #if x86_64_TARGET_ARCH
3134
3135 genCCall (CmmPrim op) [(r,_)] args vols =
3136   outOfLineFloatOp op r args vols
3137
3138 genCCall target dest_regs args vols = do
3139
3140         -- load up the register arguments
3141     (stack_args, aregs, fregs, load_args_code)
3142          <- load_args args allArgRegs allFPArgRegs nilOL
3143
3144     let
3145         fp_regs_used  = reverse (drop (length fregs) (reverse allFPArgRegs))
3146         int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
3147         arg_regs = int_regs_used ++ fp_regs_used
3148                 -- for annotating the call instruction with
3149
3150         sse_regs = length fp_regs_used
3151
3152         tot_arg_size = arg_size * length stack_args
3153
3154         -- On entry to the called function, %rsp should be aligned
3155         -- on a 16-byte boundary +8 (i.e. the first stack arg after
3156         -- the return address is 16-byte aligned).  In STG land
3157         -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
3158         -- need to make sure we push a multiple of 16-bytes of args,
3159         -- plus the return address, to get the correct alignment.
3160         -- Urg, this is hard.  We need to feed the delta back into
3161         -- the arg pushing code.
3162     (real_size, adjust_rsp) <-
3163         if tot_arg_size `rem` 16 == 0
3164             then return (tot_arg_size, nilOL)
3165             else do -- we need to adjust...
3166                 delta <- getDeltaNat
3167                 setDeltaNat (delta-8)
3168                 return (tot_arg_size+8, toOL [
3169                                 SUB I64 (OpImm (ImmInt 8)) (OpReg rsp),
3170                                 DELTA (delta-8)
3171                         ])
3172
3173         -- push the stack args, right to left
3174     push_code <- push_args (reverse stack_args) nilOL
3175     delta <- getDeltaNat
3176
3177     -- deal with static vs dynamic call targets
3178     (callinsns,cconv) <-
3179       case target of
3180         -- CmmPrim -> ...
3181         CmmForeignCall (CmmLit (CmmLabel lbl)) conv
3182            -> -- ToDo: stdcall arg sizes
3183               return (unitOL (CALL (Left fn_imm) arg_regs), conv)
3184            where fn_imm = ImmCLbl lbl
3185         CmmForeignCall expr conv
3186            -> do (dyn_r, dyn_c) <- getSomeReg expr
3187                  return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
3188
3189     let
3190         -- The x86_64 ABI requires us to set %al to the number of SSE
3191         -- registers that contain arguments, if the called routine
3192         -- is a varargs function.  We don't know whether it's a
3193         -- varargs function or not, so we have to assume it is.
3194         --
3195         -- It's not safe to omit this assignment, even if the number
3196         -- of SSE regs in use is zero.  If %al is larger than 8
3197         -- on entry to a varargs function, seg faults ensue.
3198         assign_eax n = unitOL (MOV I32 (OpImm (ImmInt n)) (OpReg eax))
3199
3200     let call = callinsns `appOL`
3201                toOL (
3202                         -- Deallocate parameters after call for ccall;
3203                         -- but not for stdcall (callee does it)
3204                   (if cconv == StdCallConv || real_size==0 then [] else
3205                    [ADD wordRep (OpImm (ImmInt real_size)) (OpReg esp)])
3206                   ++
3207                   [DELTA (delta + real_size)]
3208                )
3209     -- in
3210     setDeltaNat (delta + real_size)
3211
3212     let
3213         -- assign the results, if necessary
3214         assign_code []     = nilOL
3215         assign_code [(dest,_hint)] =
3216           case rep of
3217                 F32 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3218                 F64 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3219                 rep -> unitOL (MOV rep (OpReg rax) (OpReg r_dest))
3220           where
3221                 rep = cmmRegRep dest
3222                 r_dest = getRegisterReg dest
3223         assign_code many = panic "genCCall.assign_code many"
3224
3225     return (load_args_code      `appOL`
3226             adjust_rsp          `appOL`
3227             push_code           `appOL`
3228             assign_eax sse_regs `appOL`
3229             call                `appOL`
3230             assign_code dest_regs)
3231
3232   where
3233     arg_size = 8 -- always, at the mo
3234
3235     load_args :: [(CmmExpr,MachHint)]
3236               -> [Reg]                  -- int regs avail for args
3237               -> [Reg]                  -- FP regs avail for args
3238               -> InstrBlock
3239               -> NatM ([(CmmExpr,MachHint)],[Reg],[Reg],InstrBlock)
3240     load_args args [] [] code     =  return (args, [], [], code)
3241         -- no more regs to use
3242     load_args [] aregs fregs code =  return ([], aregs, fregs, code)
3243         -- no more args to push
3244     load_args ((arg,hint) : rest) aregs fregs code
3245         | isFloatingRep arg_rep =
3246         case fregs of
3247           [] -> push_this_arg
3248           (r:rs) -> do
3249              arg_code <- getAnyReg arg
3250              load_args rest aregs rs (code `appOL` arg_code r)
3251         | otherwise =
3252         case aregs of
3253           [] -> push_this_arg
3254           (r:rs) -> do
3255              arg_code <- getAnyReg arg
3256              load_args rest rs fregs (code `appOL` arg_code r)
3257         where
3258           arg_rep = cmmExprRep arg
3259
3260           push_this_arg = do
3261             (args',ars,frs,code') <- load_args rest aregs fregs code
3262             return ((arg,hint):args', ars, frs, code')
3263
3264     push_args [] code = return code
3265     push_args ((arg,hint):rest) code
3266        | isFloatingRep arg_rep = do
3267          (arg_reg, arg_code) <- getSomeReg arg
3268          delta <- getDeltaNat
3269          setDeltaNat (delta-arg_size)
3270          let code' = code `appOL` toOL [
3271                         MOV arg_rep (OpReg arg_reg) (OpAddr  (spRel 0)),
3272                         SUB wordRep (OpImm (ImmInt arg_size)) (OpReg rsp) ,
3273                         DELTA (delta-arg_size)]
3274          push_args rest code'
3275
3276        | otherwise = do
3277        -- we only ever generate word-sized function arguments.  Promotion
3278        -- has already happened: our Int8# type is kept sign-extended
3279        -- in an Int#, for example.
3280          ASSERT(arg_rep == I64) return ()
3281          (arg_op, arg_code) <- getOperand arg
3282          delta <- getDeltaNat
3283          setDeltaNat (delta-arg_size)
3284          let code' = code `appOL` toOL [PUSH I64 arg_op,
3285                                         DELTA (delta-arg_size)]
3286          push_args rest code'
3287         where
3288           arg_rep = cmmExprRep arg
3289 #endif
3290
3291 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3292
3293 #if sparc_TARGET_ARCH
3294 {-
3295    The SPARC calling convention is an absolute
3296    nightmare.  The first 6x32 bits of arguments are mapped into
3297    %o0 through %o5, and the remaining arguments are dumped to the
3298    stack, beginning at [%sp+92].  (Note that %o6 == %sp.)
3299
3300    If we have to put args on the stack, move %o6==%sp down by
3301    the number of words to go on the stack, to ensure there's enough space.
3302
3303    According to Fraser and Hanson's lcc book, page 478, fig 17.2,
3304    16 words above the stack pointer is a word for the address of
3305    a structure return value.  I use this as a temporary location
3306    for moving values from float to int regs.  Certainly it isn't
3307    safe to put anything in the 16 words starting at %sp, since
3308    this area can get trashed at any time due to window overflows
3309    caused by signal handlers.
3310
3311    A final complication (if the above isn't enough) is that
3312    we can't blithely calculate the arguments one by one into
3313    %o0 .. %o5.  Consider the following nested calls:
3314
3315        fff a (fff b c)
3316
3317    Naive code moves a into %o0, and (fff b c) into %o1.  Unfortunately
3318    the inner call will itself use %o0, which trashes the value put there
3319    in preparation for the outer call.  Upshot: we need to calculate the
3320    args into temporary regs, and move those to arg regs or onto the
3321    stack only immediately prior to the call proper.  Sigh.
3322 -}
3323
3324 genCCall target dest_regs argsAndHints vols = do
3325     let
3326         args = map fst argsAndHints
3327     argcode_and_vregs <- mapM arg_to_int_vregs args
3328     let
3329         (argcodes, vregss) = unzip argcode_and_vregs
3330         n_argRegs          = length allArgRegs
3331         n_argRegs_used     = min (length vregs) n_argRegs
3332         vregs              = concat vregss
3333     -- deal with static vs dynamic call targets
3334     callinsns <- (case target of
3335         CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> do
3336                 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3337         CmmForeignCall expr conv -> do
3338                 (dyn_c, [dyn_r]) <- arg_to_int_vregs expr
3339                 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3340         CmmPrim mop -> do
3341                   (res, reduce) <- outOfLineFloatOp mop
3342                   lblOrMopExpr <- case res of
3343                        Left lbl -> do
3344                             return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3345                        Right mopExpr -> do
3346                             (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
3347                             return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3348                   if reduce then panic "genCCall(sparc): can not reduce" else return lblOrMopExpr
3349
3350       )
3351     let
3352         argcode = concatOL argcodes
3353         (move_sp_down, move_sp_up)
3354            = let diff = length vregs - n_argRegs
3355                  nn   = if odd diff then diff + 1 else diff -- keep 8-byte alignment
3356              in  if   nn <= 0
3357                  then (nilOL, nilOL)
3358                  else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
3359         transfer_code
3360            = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
3361     return (argcode       `appOL`
3362             move_sp_down  `appOL`
3363             transfer_code `appOL`
3364             callinsns     `appOL`
3365             unitOL NOP    `appOL`
3366             move_sp_up)
3367   where
3368      -- move args from the integer vregs into which they have been
3369      -- marshalled, into %o0 .. %o5, and the rest onto the stack.
3370      move_final :: [Reg] -> [Reg] -> Int -> [Instr]
3371
3372      move_final [] _ offset          -- all args done
3373         = []
3374
3375      move_final (v:vs) [] offset     -- out of aregs; move to stack
3376         = ST I32 v (spRel offset)
3377           : move_final vs [] (offset+1)
3378
3379      move_final (v:vs) (a:az) offset -- move into an arg (%o[0..5]) reg
3380         = OR False g0 (RIReg v) a
3381           : move_final vs az offset
3382
3383      -- generate code to calculate an argument, and move it into one
3384      -- or two integer vregs.
3385      arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
3386      arg_to_int_vregs arg
3387         | (cmmExprRep arg) == I64
3388         = do
3389           (ChildCode64 code r_lo) <- iselExpr64 arg
3390           let
3391               r_hi = getHiVRegFromLo r_lo
3392           return (code, [r_hi, r_lo])
3393         | otherwise
3394         = do
3395           (src, code) <- getSomeReg arg
3396           tmp <- getNewRegNat (cmmExprRep arg)
3397           let
3398               pk   = cmmExprRep arg
3399           case pk of
3400              F64 -> do
3401                       v1 <- getNewRegNat I32
3402                       v2 <- getNewRegNat I32
3403                       return (
3404                         code                          `snocOL`
3405                         FMOV F64 src f0                `snocOL`
3406                         ST   F32  f0 (spRel 16)         `snocOL`
3407                         LD   I32  (spRel 16) v1         `snocOL`
3408                         ST   F32  (fPair f0) (spRel 16) `snocOL`
3409                         LD   I32  (spRel 16) v2
3410                         ,
3411                         [v1,v2]
3412                        )
3413              F32 -> do
3414                       v1 <- getNewRegNat I32
3415                       return (
3416                         code                    `snocOL`
3417                         ST   F32  src (spRel 16)  `snocOL`
3418                         LD   I32  (spRel 16) v1
3419                         ,
3420                         [v1]
3421                        )
3422              other -> do
3423                         v1 <- getNewRegNat I32
3424                         return (
3425                           code `snocOL` OR False g0 (RIReg src) v1
3426                           ,
3427                           [v1]
3428                          )
3429 outOfLineFloatOp mop =
3430     do
3431       mopExpr <- cmmMakeDynamicReference addImportNat True $
3432                   mkForeignLabel functionName Nothing True
3433       let mopLabelOrExpr = case mopExpr of
3434                         CmmLit (CmmLabel lbl) -> Left lbl
3435                         _ -> Right mopExpr
3436       return (mopLabelOrExpr, reduce)
3437             where
3438                 (reduce, functionName) = case mop of
3439                   MO_F32_Exp    -> (True,  FSLIT("exp"))
3440                   MO_F32_Log    -> (True,  FSLIT("log"))
3441                   MO_F32_Sqrt   -> (True,  FSLIT("sqrt"))
3442
3443                   MO_F32_Sin    -> (True,  FSLIT("sin"))
3444                   MO_F32_Cos    -> (True,  FSLIT("cos"))
3445                   MO_F32_Tan    -> (True,  FSLIT("tan"))
3446
3447                   MO_F32_Asin   -> (True,  FSLIT("asin"))
3448                   MO_F32_Acos   -> (True,  FSLIT("acos"))
3449                   MO_F32_Atan   -> (True,  FSLIT("atan"))
3450
3451                   MO_F32_Sinh   -> (True,  FSLIT("sinh"))
3452                   MO_F32_Cosh   -> (True,  FSLIT("cosh"))
3453                   MO_F32_Tanh   -> (True,  FSLIT("tanh"))
3454
3455                   MO_F64_Exp    -> (False, FSLIT("exp"))
3456                   MO_F64_Log    -> (False, FSLIT("log"))
3457                   MO_F64_Sqrt   -> (False, FSLIT("sqrt"))
3458
3459                   MO_F64_Sin    -> (False, FSLIT("sin"))
3460                   MO_F64_Cos    -> (False, FSLIT("cos"))
3461                   MO_F64_Tan    -> (False, FSLIT("tan"))
3462
3463                   MO_F64_Asin   -> (False, FSLIT("asin"))
3464                   MO_F64_Acos   -> (False, FSLIT("acos"))
3465                   MO_F64_Atan   -> (False, FSLIT("atan"))
3466
3467                   MO_F64_Sinh   -> (False, FSLIT("sinh"))
3468                   MO_F64_Cosh   -> (False, FSLIT("cosh"))
3469                   MO_F64_Tanh   -> (False, FSLIT("tanh"))
3470
3471                   other -> pprPanic "outOfLineFloatOp(sparc) "
3472                                 (pprCallishMachOp mop)
3473
3474 #endif /* sparc_TARGET_ARCH */
3475
3476 #if powerpc_TARGET_ARCH
3477
3478 #if darwin_TARGET_OS || linux_TARGET_OS
3479 {-
3480     The PowerPC calling convention for Darwin/Mac OS X
3481     is described in Apple's document
3482     "Inside Mac OS X - Mach-O Runtime Architecture".
3483
3484     PowerPC Linux uses the System V Release 4 Calling Convention
3485     for PowerPC. It is described in the
3486     "System V Application Binary Interface PowerPC Processor Supplement".
3487
3488     Both conventions are similar:
3489     Parameters may be passed in general-purpose registers starting at r3, in
3490     floating point registers starting at f1, or on the stack.
3491
3492     But there are substantial differences:
3493     * The number of registers used for parameter passing and the exact set of
3494       nonvolatile registers differs (see MachRegs.lhs).
3495     * On Darwin, stack space is always reserved for parameters, even if they are
3496       passed in registers. The called routine may choose to save parameters from
3497       registers to the corresponding space on the stack.
3498     * On Darwin, a corresponding amount of GPRs is skipped when a floating point
3499       parameter is passed in an FPR.
3500     * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
3501       starting with an odd-numbered GPR. It may skip a GPR to achieve this.
3502       Darwin just treats an I64 like two separate I32s (high word first).
3503     * I64 and F64 arguments are 8-byte aligned on the stack for SysV, but only
3504       4-byte aligned like everything else on Darwin.
3505     * The SysV spec claims that F32 is represented as F64 on the stack. GCC on
3506       PowerPC Linux does not agree, so neither do we.
3507
3508     According to both conventions, The parameter area should be part of the
3509     caller's stack frame, allocated in the caller's prologue code (large enough
3510     to hold the parameter lists for all called routines). The NCG already
3511     uses the stack for register spilling, leaving 64 bytes free at the top.
3512     If we need a larger parameter area than that, we just allocate a new stack
3513     frame just before ccalling.
3514 -}
3515
3516 genCCall target dest_regs argsAndHints vols
3517   = ASSERT (not $ any (`elem` [I8,I16]) argReps)
3518         -- we rely on argument promotion in the codeGen
3519     do
3520         (finalStack,passArgumentsCode,usedRegs) <- passArguments
3521                                                         (zip args argReps)
3522                                                         allArgRegs allFPArgRegs
3523                                                         initialStackOffset
3524                                                         (toOL []) []
3525
3526         (labelOrExpr, reduceToF32) <- case target of
3527             CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
3528             CmmForeignCall expr conv -> return  (Right expr, False)
3529             CmmPrim mop -> outOfLineFloatOp mop
3530
3531         let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
3532             codeAfter = move_sp_up finalStack `appOL` moveResult reduceToF32
3533
3534         case labelOrExpr of
3535             Left lbl -> do
3536                 return (         codeBefore
3537                         `snocOL` BL lbl usedRegs
3538                         `appOL`  codeAfter)
3539             Right dyn -> do
3540                 (dynReg, dynCode) <- getSomeReg dyn
3541                 return (         dynCode
3542                         `snocOL` MTCTR dynReg
3543                         `appOL`  codeBefore
3544                         `snocOL` BCTRL usedRegs
3545                         `appOL`  codeAfter)
3546     where
3547 #if darwin_TARGET_OS
3548         initialStackOffset = 24
3549             -- size of linkage area + size of arguments, in bytes
3550         stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
3551                                        map machRepByteWidth argReps
3552 #elif linux_TARGET_OS
3553         initialStackOffset = 8
3554         stackDelta finalStack = roundTo 16 finalStack
3555 #endif
3556         args = map fst argsAndHints
3557         argReps = map cmmExprRep args
3558
3559         roundTo a x | x `mod` a == 0 = x
3560                     | otherwise = x + a - (x `mod` a)
3561
3562         move_sp_down finalStack
3563                | delta > 64 =
3564                         toOL [STU I32 sp (AddrRegImm sp (ImmInt (-delta))),
3565                               DELTA (-delta)]
3566                | otherwise = nilOL
3567                where delta = stackDelta finalStack
3568         move_sp_up finalStack
3569                | delta > 64 =
3570                         toOL [ADD sp sp (RIImm (ImmInt delta)),
3571                               DELTA 0]
3572                | otherwise = nilOL
3573                where delta = stackDelta finalStack
3574
3575
3576         passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
3577         passArguments ((arg,I64):args) gprs fprs stackOffset
3578                accumCode accumUsed =
3579             do
3580                 ChildCode64 code vr_lo <- iselExpr64 arg
3581                 let vr_hi = getHiVRegFromLo vr_lo
3582
3583 #if darwin_TARGET_OS
3584                 passArguments args
3585                               (drop 2 gprs)
3586                               fprs
3587                               (stackOffset+8)
3588                               (accumCode `appOL` code
3589                                     `snocOL` storeWord vr_hi gprs stackOffset
3590                                     `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
3591                               ((take 2 gprs) ++ accumUsed)
3592             where
3593                 storeWord vr (gpr:_) offset = MR gpr vr
3594                 storeWord vr [] offset = ST I32 vr (AddrRegImm sp (ImmInt offset))
3595
3596 #elif linux_TARGET_OS
3597                 let stackOffset' = roundTo 8 stackOffset
3598                     stackCode = accumCode `appOL` code
3599                         `snocOL` ST I32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
3600                         `snocOL` ST I32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
3601                     regCode hireg loreg =
3602                         accumCode `appOL` code
3603                             `snocOL` MR hireg vr_hi
3604                             `snocOL` MR loreg vr_lo
3605
3606                 case gprs of
3607                     hireg : loreg : regs | even (length gprs) ->
3608                         passArguments args regs fprs stackOffset
3609                                       (regCode hireg loreg) (hireg : loreg : accumUsed)
3610                     _skipped : hireg : loreg : regs ->
3611                         passArguments args regs fprs stackOffset
3612                                       (regCode hireg loreg) (hireg : loreg : accumUsed)
3613                     _ -> -- only one or no regs left
3614                         passArguments args [] fprs (stackOffset'+8)
3615                                       stackCode accumUsed
3616 #endif
3617
3618         passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
3619             | reg : _ <- regs = do
3620                 register <- getRegister arg
3621                 let code = case register of
3622                             Fixed _ freg fcode -> fcode `snocOL` MR reg freg
3623                             Any _ acode -> acode reg
3624                 passArguments args
3625                               (drop nGprs gprs)
3626                               (drop nFprs fprs)
3627 #if darwin_TARGET_OS
3628         -- The Darwin ABI requires that we reserve stack slots for register parameters
3629                               (stackOffset + stackBytes)
3630 #elif linux_TARGET_OS
3631         -- ... the SysV ABI doesn't.
3632                               stackOffset
3633 #endif
3634                               (accumCode `appOL` code)
3635                               (reg : accumUsed)
3636             | otherwise = do
3637                 (vr, code) <- getSomeReg arg
3638                 passArguments args
3639                               (drop nGprs gprs)
3640                               (drop nFprs fprs)
3641                               (stackOffset' + stackBytes)
3642                               (accumCode `appOL` code `snocOL` ST rep vr stackSlot)
3643                               accumUsed
3644             where
3645 #if darwin_TARGET_OS
3646         -- stackOffset is at least 4-byte aligned
3647         -- The Darwin ABI is happy with that.
3648                 stackOffset' = stackOffset
3649 #else
3650         -- ... the SysV ABI requires 8-byte alignment for doubles.
3651                 stackOffset' | rep == F64 = roundTo 8 stackOffset
3652                              | otherwise  =           stackOffset
3653 #endif
3654                 stackSlot = AddrRegImm sp (ImmInt stackOffset')
3655                 (nGprs, nFprs, stackBytes, regs) = case rep of
3656                     I32 -> (1, 0, 4, gprs)
3657 #if darwin_TARGET_OS
3658         -- The Darwin ABI requires that we skip a corresponding number of GPRs when
3659         -- we use the FPRs.
3660                     F32 -> (1, 1, 4, fprs)
3661                     F64 -> (2, 1, 8, fprs)
3662 #elif linux_TARGET_OS
3663         -- ... the SysV ABI doesn't.
3664                     F32 -> (0, 1, 4, fprs)
3665                     F64 -> (0, 1, 8, fprs)
3666 #endif
3667
3668         moveResult reduceToF32 =
3669             case dest_regs of
3670                 [] -> nilOL
3671                 [(dest, _hint)]
3672                     | reduceToF32 && rep == F32 -> unitOL (FRSP r_dest f1)
3673                     | rep == F32 || rep == F64 -> unitOL (MR r_dest f1)
3674                     | rep == I64 -> toOL [MR (getHiVRegFromLo r_dest) r3,
3675                                           MR r_dest r4]
3676                     | otherwise -> unitOL (MR r_dest r3)
3677                     where rep = cmmRegRep dest
3678                           r_dest = getRegisterReg dest
3679
3680         outOfLineFloatOp mop =
3681             do
3682                 mopExpr <- cmmMakeDynamicReference addImportNat True $
3683                               mkForeignLabel functionName Nothing True
3684                 let mopLabelOrExpr = case mopExpr of
3685                         CmmLit (CmmLabel lbl) -> Left lbl
3686                         _ -> Right mopExpr
3687                 return (mopLabelOrExpr, reduce)
3688             where
3689                 (functionName, reduce) = case mop of
3690                     MO_F32_Exp   -> (FSLIT("exp"), True)
3691                     MO_F32_Log   -> (FSLIT("log"), True)
3692                     MO_F32_Sqrt  -> (FSLIT("sqrt"), True)
3693
3694                     MO_F32_Sin   -> (FSLIT("sin"), True)
3695                     MO_F32_Cos   -> (FSLIT("cos"), True)
3696                     MO_F32_Tan   -> (FSLIT("tan"), True)
3697
3698                     MO_F32_Asin  -> (FSLIT("asin"), True)
3699                     MO_F32_Acos  -> (FSLIT("acos"), True)
3700                     MO_F32_Atan  -> (FSLIT("atan"), True)
3701
3702                     MO_F32_Sinh  -> (FSLIT("sinh"), True)
3703                     MO_F32_Cosh  -> (FSLIT("cosh"), True)
3704                     MO_F32_Tanh  -> (FSLIT("tanh"), True)
3705                     MO_F32_Pwr   -> (FSLIT("pow"), True)
3706
3707                     MO_F64_Exp   -> (FSLIT("exp"), False)
3708                     MO_F64_Log   -> (FSLIT("log"), False)
3709                     MO_F64_Sqrt  -> (FSLIT("sqrt"), False)
3710
3711                     MO_F64_Sin   -> (FSLIT("sin"), False)
3712                     MO_F64_Cos   -> (FSLIT("cos"), False)
3713                     MO_F64_Tan   -> (FSLIT("tan"), False)
3714
3715                     MO_F64_Asin  -> (FSLIT("asin"), False)
3716                     MO_F64_Acos  -> (FSLIT("acos"), False)
3717                     MO_F64_Atan  -> (FSLIT("atan"), False)
3718
3719                     MO_F64_Sinh  -> (FSLIT("sinh"), False)
3720                     MO_F64_Cosh  -> (FSLIT("cosh"), False)
3721                     MO_F64_Tanh  -> (FSLIT("tanh"), False)
3722                     MO_F64_Pwr   -> (FSLIT("pow"), False)
3723                     other -> pprPanic "genCCall(ppc): unknown callish op"
3724                                     (pprCallishMachOp other)
3725
3726 #endif /* darwin_TARGET_OS || linux_TARGET_OS */
3727
3728 #endif /* powerpc_TARGET_ARCH */
3729
3730
3731 -- -----------------------------------------------------------------------------
3732 -- Generating a table-branch
3733
3734 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
3735
3736 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3737 genSwitch expr ids
3738   | opt_PIC
3739   = do
3740         (reg,e_code) <- getSomeReg expr
3741         lbl <- getNewLabelNat
3742         dynRef <- cmmMakeDynamicReference addImportNat False lbl
3743         (tableReg,t_code) <- getSomeReg $ dynRef
3744         let
3745             jumpTable = map jumpTableEntryRel ids
3746
3747             jumpTableEntryRel Nothing
3748                 = CmmStaticLit (CmmInt 0 wordRep)
3749             jumpTableEntryRel (Just (BlockId id))
3750                 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3751                 where blockLabel = mkAsmTempLabel id
3752
3753             op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
3754                                        (EAIndex reg wORD_SIZE) (ImmInt 0))
3755
3756             code = e_code `appOL` t_code `appOL` toOL [
3757                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3758                             ADD wordRep op (OpReg tableReg),
3759                             JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
3760                     ]
3761         return code
3762   | otherwise
3763   = do
3764         (reg,e_code) <- getSomeReg expr
3765         lbl <- getNewLabelNat
3766         let
3767             jumpTable = map jumpTableEntry ids
3768             op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
3769             code = e_code `appOL` toOL [
3770                     LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3771                     JMP_TBL op [ id | Just id <- ids ]
3772                  ]
3773         -- in
3774         return code
3775 #elif powerpc_TARGET_ARCH
3776 genSwitch expr ids
3777   | opt_PIC
3778   = do
3779         (reg,e_code) <- getSomeReg expr
3780         tmp <- getNewRegNat I32
3781         lbl <- getNewLabelNat
3782         dynRef <- cmmMakeDynamicReference addImportNat False lbl
3783         (tableReg,t_code) <- getSomeReg $ dynRef
3784         let
3785             jumpTable = map jumpTableEntryRel ids
3786
3787             jumpTableEntryRel Nothing
3788                 = CmmStaticLit (CmmInt 0 wordRep)
3789             jumpTableEntryRel (Just (BlockId id))
3790                 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3791                 where blockLabel = mkAsmTempLabel id
3792
3793             code = e_code `appOL` t_code `appOL` toOL [
3794                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3795                             SLW tmp reg (RIImm (ImmInt 2)),
3796                             LD I32 tmp (AddrRegReg tableReg tmp),
3797                             ADD tmp tmp (RIReg tableReg),
3798                             MTCTR tmp,
3799                             BCTR [ id | Just id <- ids ]
3800                     ]
3801         return code
3802   | otherwise
3803   = do
3804         (reg,e_code) <- getSomeReg expr
3805         tmp <- getNewRegNat I32
3806         lbl <- getNewLabelNat
3807         let
3808             jumpTable = map jumpTableEntry ids
3809
3810             code = e_code `appOL` toOL [
3811                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3812                             SLW tmp reg (RIImm (ImmInt 2)),
3813                             ADDIS tmp tmp (HA (ImmCLbl lbl)),
3814                             LD I32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
3815                             MTCTR tmp,
3816                             BCTR [ id | Just id <- ids ]
3817                     ]
3818         return code
3819 #else
3820 genSwitch expr ids = panic "ToDo: genSwitch"
3821 #endif
3822
3823 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordRep)
3824 jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
3825     where blockLabel = mkAsmTempLabel id
3826
3827 -- -----------------------------------------------------------------------------
3828 -- Support bits
3829 -- -----------------------------------------------------------------------------
3830
3831
3832 -- -----------------------------------------------------------------------------
3833 -- 'condIntReg' and 'condFltReg': condition codes into registers
3834
3835 -- Turn those condition codes into integers now (when they appear on
3836 -- the right hand side of an assignment).
3837 --
3838 -- (If applicable) Do not fill the delay slots here; you will confuse the
3839 -- register allocator.
3840
3841 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
3842
3843 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3844
3845 #if alpha_TARGET_ARCH
3846 condIntReg = panic "MachCode.condIntReg (not on Alpha)"
3847 condFltReg = panic "MachCode.condFltReg (not on Alpha)"
3848 #endif /* alpha_TARGET_ARCH */
3849
3850 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3851
3852 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3853
3854 condIntReg cond x y = do
3855   CondCode _ cond cond_code <- condIntCode cond x y
3856   tmp <- getNewRegNat I8
3857   let
3858         code dst = cond_code `appOL` toOL [
3859                     SETCC cond (OpReg tmp),
3860                     MOVZxL I8 (OpReg tmp) (OpReg dst)
3861                   ]
3862   -- in
3863   return (Any I32 code)
3864
3865 #endif
3866
3867 #if i386_TARGET_ARCH
3868
3869 condFltReg cond x y = do
3870   CondCode _ cond cond_code <- condFltCode cond x y
3871   tmp <- getNewRegNat I8
3872   let
3873         code dst = cond_code `appOL` toOL [
3874                     SETCC cond (OpReg tmp),
3875                     MOVZxL I8 (OpReg tmp) (OpReg dst)
3876                   ]
3877   -- in
3878   return (Any I32 code)
3879
3880 #endif
3881
3882 #if x86_64_TARGET_ARCH
3883
3884 condFltReg cond x y = do
3885   CondCode _ cond cond_code <- condFltCode cond x y
3886   tmp1 <- getNewRegNat wordRep
3887   tmp2 <- getNewRegNat wordRep
3888   let
3889         -- We have to worry about unordered operands (eg. comparisons
3890         -- against NaN).  If the operands are unordered, the comparison
3891         -- sets the parity flag, carry flag and zero flag.
3892         -- All comparisons are supposed to return false for unordered
3893         -- operands except for !=, which returns true.
3894         --
3895         -- Optimisation: we don't have to test the parity flag if we
3896         -- know the test has already excluded the unordered case: eg >
3897         -- and >= test for a zero carry flag, which can only occur for
3898         -- ordered operands.
3899         --
3900         -- ToDo: by reversing comparisons we could avoid testing the
3901         -- parity flag in more cases.
3902
3903         code dst =
3904            cond_code `appOL`
3905              (case cond of
3906                 NE  -> or_unordered dst
3907                 GU  -> plain_test   dst
3908                 GEU -> plain_test   dst
3909                 _   -> and_ordered  dst)
3910
3911         plain_test dst = toOL [
3912                     SETCC cond (OpReg tmp1),
3913                     MOVZxL I8 (OpReg tmp1) (OpReg dst)
3914                  ]
3915         or_unordered dst = toOL [
3916                     SETCC cond (OpReg tmp1),
3917                     SETCC PARITY (OpReg tmp2),
3918                     OR I8 (OpReg tmp1) (OpReg tmp2),
3919                     MOVZxL I8 (OpReg tmp2) (OpReg dst)
3920                   ]
3921         and_ordered dst = toOL [
3922                     SETCC cond (OpReg tmp1),
3923                     SETCC NOTPARITY (OpReg tmp2),
3924                     AND I8 (OpReg tmp1) (OpReg tmp2),
3925                     MOVZxL I8 (OpReg tmp2) (OpReg dst)
3926                   ]
3927   -- in
3928   return (Any I32 code)
3929
3930 #endif
3931
3932 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3933
3934 #if sparc_TARGET_ARCH
3935
3936 condIntReg EQQ x (CmmLit (CmmInt 0 d)) = do
3937     (src, code) <- getSomeReg x
3938     tmp <- getNewRegNat I32
3939     let
3940         code__2 dst = code `appOL` toOL [
3941             SUB False True g0 (RIReg src) g0,
3942             SUB True False g0 (RIImm (ImmInt (-1))) dst]
3943     return (Any I32 code__2)
3944
3945 condIntReg EQQ x y = do
3946     (src1, code1) <- getSomeReg x
3947     (src2, code2) <- getSomeReg y
3948     tmp1 <- getNewRegNat I32
3949     tmp2 <- getNewRegNat I32
3950     let
3951         code__2 dst = code1 `appOL` code2 `appOL` toOL [
3952             XOR False src1 (RIReg src2) dst,
3953             SUB False True g0 (RIReg dst) g0,
3954             SUB True False g0 (RIImm (ImmInt (-1))) dst]
3955     return (Any I32 code__2)
3956
3957 condIntReg NE x (CmmLit (CmmInt 0 d)) = do
3958     (src, code) <- getSomeReg x
3959     tmp <- getNewRegNat I32
3960     let
3961         code__2 dst = code `appOL` toOL [
3962             SUB False True g0 (RIReg src) g0,
3963             ADD True False g0 (RIImm (ImmInt 0)) dst]
3964     return (Any I32 code__2)
3965
3966 condIntReg NE x y = do
3967     (src1, code1) <- getSomeReg x
3968     (src2, code2) <- getSomeReg y
3969     tmp1 <- getNewRegNat I32
3970     tmp2 <- getNewRegNat I32
3971     let
3972         code__2 dst = code1 `appOL` code2 `appOL` toOL [
3973             XOR False src1 (RIReg src2) dst,
3974             SUB False True g0 (RIReg dst) g0,
3975             ADD True False g0 (RIImm (ImmInt 0)) dst]
3976     return (Any I32 code__2)
3977
3978 condIntReg cond x y = do
3979     BlockId lbl1 <- getBlockIdNat
3980     BlockId lbl2 <- getBlockIdNat
3981     CondCode _ cond cond_code <- condIntCode cond x y
3982     let
3983         code__2 dst = cond_code `appOL` toOL [
3984             BI cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
3985             OR False g0 (RIImm (ImmInt 0)) dst,
3986             BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
3987             NEWBLOCK (BlockId lbl1),
3988             OR False g0 (RIImm (ImmInt 1)) dst,
3989             NEWBLOCK (BlockId lbl2)]
3990     return (Any I32 code__2)
3991
3992 condFltReg cond x y = do
3993     BlockId lbl1 <- getBlockIdNat
3994     BlockId lbl2 <- getBlockIdNat
3995     CondCode _ cond cond_code <- condFltCode cond x y
3996     let
3997         code__2 dst = cond_code `appOL` toOL [
3998             NOP,
3999             BF cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
4000             OR False g0 (RIImm (ImmInt 0)) dst,
4001             BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
4002             NEWBLOCK (BlockId lbl1),
4003             OR False g0 (RIImm (ImmInt 1)) dst,
4004             NEWBLOCK (BlockId lbl2)]
4005     return (Any I32 code__2)
4006
4007 #endif /* sparc_TARGET_ARCH */
4008
4009 #if powerpc_TARGET_ARCH
4010 condReg getCond = do
4011     lbl1 <- getBlockIdNat
4012     lbl2 <- getBlockIdNat
4013     CondCode _ cond cond_code <- getCond
4014     let
4015 {-        code dst = cond_code `appOL` toOL [
4016                 BCC cond lbl1,
4017                 LI dst (ImmInt 0),
4018                 BCC ALWAYS lbl2,
4019                 NEWBLOCK lbl1,
4020                 LI dst (ImmInt 1),
4021                 BCC ALWAYS lbl2,
4022                 NEWBLOCK lbl2
4023             ]-}
4024         code dst = cond_code
4025             `appOL` negate_code
4026             `appOL` toOL [
4027                 MFCR dst,
4028                 RLWINM dst dst (bit + 1) 31 31
4029             ]
4030
4031         negate_code | do_negate = unitOL (CRNOR bit bit bit)
4032                     | otherwise = nilOL
4033
4034         (bit, do_negate) = case cond of
4035             LTT -> (0, False)
4036             LE  -> (1, True)
4037             EQQ -> (2, False)
4038             GE  -> (0, True)
4039             GTT -> (1, False)
4040
4041             NE  -> (2, True)
4042
4043             LU  -> (0, False)
4044             LEU -> (1, True)
4045             GEU -> (0, True)
4046             GU  -> (1, False)
4047
4048     return (Any I32 code)
4049
4050 condIntReg cond x y = condReg (condIntCode cond x y)
4051 condFltReg cond x y = condReg (condFltCode cond x y)
4052 #endif /* powerpc_TARGET_ARCH */
4053
4054
4055 -- -----------------------------------------------------------------------------
4056 -- 'trivial*Code': deal with trivial instructions
4057
4058 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
4059 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
4060 -- Only look for constants on the right hand side, because that's
4061 -- where the generic optimizer will have put them.
4062
4063 -- Similarly, for unary instructions, we don't have to worry about
4064 -- matching an StInt as the argument, because genericOpt will already
4065 -- have handled the constant-folding.
4066
4067 trivialCode
4068     :: MachRep
4069     -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
4070       ,IF_ARCH_i386 ((Operand -> Operand -> Instr)
4071                      -> Maybe (Operand -> Operand -> Instr)
4072       ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr)
4073                      -> Maybe (Operand -> Operand -> Instr)
4074       ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
4075       ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
4076       ,)))))
4077     -> CmmExpr -> CmmExpr -- the two arguments
4078     -> NatM Register
4079
4080 #ifndef powerpc_TARGET_ARCH
4081 trivialFCode
4082     :: MachRep
4083     -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
4084       ,IF_ARCH_sparc((MachRep -> Reg -> Reg -> Reg -> Instr)
4085       ,IF_ARCH_i386 ((MachRep -> Reg -> Reg -> Reg -> Instr)
4086       ,IF_ARCH_x86_64 ((MachRep -> Operand -> Operand -> Instr)
4087       ,))))
4088     -> CmmExpr -> CmmExpr -- the two arguments
4089     -> NatM Register
4090 #endif
4091
4092 trivialUCode
4093     :: MachRep
4094     -> IF_ARCH_alpha((RI -> Reg -> Instr)
4095       ,IF_ARCH_i386 ((Operand -> Instr)
4096       ,IF_ARCH_x86_64 ((Operand -> Instr)
4097       ,IF_ARCH_sparc((RI -> Reg -> Instr)
4098       ,IF_ARCH_powerpc((Reg -> Reg -> Instr)
4099       ,)))))
4100     -> CmmExpr  -- the one argument
4101     -> NatM Register
4102
4103 #ifndef powerpc_TARGET_ARCH
4104 trivialUFCode
4105     :: MachRep
4106     -> IF_ARCH_alpha((Reg -> Reg -> Instr)
4107       ,IF_ARCH_i386 ((Reg -> Reg -> Instr)
4108       ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr)
4109       ,IF_ARCH_sparc((Reg -> Reg -> Instr)
4110       ,))))
4111     -> CmmExpr -- the one argument
4112     -> NatM Register
4113 #endif
4114
4115 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4116
4117 #if alpha_TARGET_ARCH
4118
4119 trivialCode instr x (StInt y)
4120   | fits8Bits y
4121   = getRegister x               `thenNat` \ register ->
4122     getNewRegNat IntRep         `thenNat` \ tmp ->
4123     let
4124         code = registerCode register tmp
4125         src1 = registerName register tmp
4126         src2 = ImmInt (fromInteger y)
4127         code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst)
4128     in
4129     return (Any IntRep code__2)
4130
4131 trivialCode instr x y
4132   = getRegister x               `thenNat` \ register1 ->
4133     getRegister y               `thenNat` \ register2 ->
4134     getNewRegNat IntRep         `thenNat` \ tmp1 ->
4135     getNewRegNat IntRep         `thenNat` \ tmp2 ->
4136     let
4137         code1 = registerCode register1 tmp1 []
4138         src1  = registerName register1 tmp1
4139         code2 = registerCode register2 tmp2 []
4140         src2  = registerName register2 tmp2
4141         code__2 dst = asmSeqThen [code1, code2] .
4142                      mkSeqInstr (instr src1 (RIReg src2) dst)
4143     in
4144     return (Any IntRep code__2)
4145
4146 ------------
4147 trivialUCode instr x
4148   = getRegister x               `thenNat` \ register ->
4149     getNewRegNat IntRep         `thenNat` \ tmp ->
4150     let
4151         code = registerCode register tmp
4152         src  = registerName register tmp
4153         code__2 dst = code . mkSeqInstr (instr (RIReg src) dst)
4154     in
4155     return (Any IntRep code__2)
4156
4157 ------------
4158 trivialFCode _ instr x y
4159   = getRegister x               `thenNat` \ register1 ->
4160     getRegister y               `thenNat` \ register2 ->
4161     getNewRegNat F64    `thenNat` \ tmp1 ->
4162     getNewRegNat F64    `thenNat` \ tmp2 ->
4163     let
4164         code1 = registerCode register1 tmp1
4165         src1  = registerName register1 tmp1
4166
4167         code2 = registerCode register2 tmp2
4168         src2  = registerName register2 tmp2
4169
4170         code__2 dst = asmSeqThen [code1 [], code2 []] .
4171                       mkSeqInstr (instr src1 src2 dst)
4172     in
4173     return (Any F64 code__2)
4174
4175 trivialUFCode _ instr x
4176   = getRegister x               `thenNat` \ register ->
4177     getNewRegNat F64    `thenNat` \ tmp ->
4178     let
4179         code = registerCode register tmp
4180         src  = registerName register tmp
4181         code__2 dst = code . mkSeqInstr (instr src dst)
4182     in
4183     return (Any F64 code__2)
4184
4185 #endif /* alpha_TARGET_ARCH */
4186
4187 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4188
4189 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4190
4191 {-
4192 The Rules of the Game are:
4193
4194 * You cannot assume anything about the destination register dst;
4195   it may be anything, including a fixed reg.
4196
4197 * You may compute an operand into a fixed reg, but you may not
4198   subsequently change the contents of that fixed reg.  If you
4199   want to do so, first copy the value either to a temporary
4200   or into dst.  You are free to modify dst even if it happens
4201   to be a fixed reg -- that's not your problem.
4202
4203 * You cannot assume that a fixed reg will stay live over an
4204   arbitrary computation.  The same applies to the dst reg.
4205
4206 * Temporary regs obtained from getNewRegNat are distinct from
4207   each other and from all other regs, and stay live over
4208   arbitrary computations.
4209
4210 --------------------
4211
4212 SDM's version of The Rules:
4213
4214 * If getRegister returns Any, that means it can generate correct
4215   code which places the result in any register, period.  Even if that
4216   register happens to be read during the computation.
4217
4218   Corollary #1: this means that if you are generating code for an
4219   operation with two arbitrary operands, you cannot assign the result
4220   of the first operand into the destination register before computing
4221   the second operand.  The second operand might require the old value
4222   of the destination register.
4223
4224   Corollary #2: A function might be able to generate more efficient
4225   code if it knows the destination register is a new temporary (and
4226   therefore not read by any of the sub-computations).
4227
4228 * If getRegister returns Any, then the code it generates may modify only:
4229         (a) fresh temporaries
4230         (b) the destination register
4231         (c) known registers (eg. %ecx is used by shifts)
4232   In particular, it may *not* modify global registers, unless the global
4233   register happens to be the destination register.
4234 -}
4235
4236 trivialCode rep instr (Just revinstr) (CmmLit lit_a) b
4237   | not (is64BitLit lit_a) = do
4238   b_code <- getAnyReg b
4239   let
4240        code dst
4241          = b_code dst `snocOL`
4242            revinstr (OpImm (litToImm lit_a)) (OpReg dst)
4243   -- in
4244   return (Any rep code)
4245
4246 trivialCode rep instr maybe_revinstr a b = genTrivialCode rep instr a b
4247
4248 -- This is re-used for floating pt instructions too.
4249 genTrivialCode rep instr a b = do
4250   (b_op, b_code) <- getNonClobberedOperand b
4251   a_code <- getAnyReg a
4252   tmp <- getNewRegNat rep
4253   let
4254      -- We want the value of b to stay alive across the computation of a.
4255      -- But, we want to calculate a straight into the destination register,
4256      -- because the instruction only has two operands (dst := dst `op` src).
4257      -- The troublesome case is when the result of b is in the same register
4258      -- as the destination reg.  In this case, we have to save b in a
4259      -- new temporary across the computation of a.
4260      code dst
4261         | dst `regClashesWithOp` b_op =
4262                 b_code `appOL`
4263                 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
4264                 a_code dst `snocOL`
4265                 instr (OpReg tmp) (OpReg dst)
4266         | otherwise =
4267                 b_code `appOL`
4268                 a_code dst `snocOL`
4269                 instr b_op (OpReg dst)
4270   -- in
4271   return (Any rep code)
4272
4273 reg `regClashesWithOp` OpReg reg2   = reg == reg2
4274 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
4275 reg `regClashesWithOp` _            = False
4276
4277 -----------
4278
4279 trivialUCode rep instr x = do
4280   x_code <- getAnyReg x
4281   let
4282      code dst =
4283         x_code dst `snocOL`
4284         instr (OpReg dst)
4285   -- in
4286   return (Any rep code)
4287
4288 -----------
4289
4290 #if i386_TARGET_ARCH
4291
4292 trivialFCode pk instr x y = do
4293   (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
4294   (y_reg, y_code) <- getSomeReg y
4295   let
4296      code dst =
4297         x_code `appOL`
4298         y_code `snocOL`
4299         instr pk x_reg y_reg dst
4300   -- in
4301   return (Any pk code)
4302
4303 #endif
4304
4305 #if x86_64_TARGET_ARCH
4306
4307 trivialFCode pk instr x y = genTrivialCode  pk (instr pk) x y
4308
4309 #endif
4310
4311 -------------
4312
4313 trivialUFCode rep instr x = do
4314   (x_reg, x_code) <- getSomeReg x
4315   let
4316      code dst =
4317         x_code `snocOL`
4318         instr x_reg dst
4319   -- in
4320   return (Any rep code)
4321
4322 #endif /* i386_TARGET_ARCH */
4323
4324 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4325
4326 #if sparc_TARGET_ARCH
4327
4328 trivialCode pk instr x (CmmLit (CmmInt y d))
4329   | fits13Bits y
4330   = do
4331       (src1, code) <- getSomeReg x
4332       tmp <- getNewRegNat I32
4333       let
4334         src2 = ImmInt (fromInteger y)
4335         code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
4336       return (Any I32 code__2)
4337
4338 trivialCode pk instr x y = do
4339     (src1, code1) <- getSomeReg x
4340     (src2, code2) <- getSomeReg y
4341     tmp1 <- getNewRegNat I32
4342     tmp2 <- getNewRegNat I32
4343     let
4344         code__2 dst = code1 `appOL` code2 `snocOL`
4345                       instr src1 (RIReg src2) dst
4346     return (Any I32 code__2)
4347
4348 ------------
4349 trivialFCode pk instr x y = do
4350     (src1, code1) <- getSomeReg x
4351     (src2, code2) <- getSomeReg y
4352     tmp1 <- getNewRegNat (cmmExprRep x)
4353     tmp2 <- getNewRegNat (cmmExprRep y)
4354     tmp <- getNewRegNat F64
4355     let
4356         promote x = FxTOy F32 F64 x tmp
4357
4358         pk1   = cmmExprRep x
4359         pk2   = cmmExprRep y
4360
4361         code__2 dst =
4362                 if pk1 == pk2 then
4363                     code1 `appOL` code2 `snocOL`
4364                     instr pk src1 src2 dst
4365                 else if pk1 == F32 then
4366                     code1 `snocOL` promote src1 `appOL` code2 `snocOL`
4367                     instr F64 tmp src2 dst
4368                 else
4369                     code1 `appOL` code2 `snocOL` promote src2 `snocOL`
4370                     instr F64 src1 tmp dst
4371     return (Any (if pk1 == pk2 then pk1 else F64) code__2)
4372
4373 ------------
4374 trivialUCode pk instr x = do
4375     (src, code) <- getSomeReg x
4376     tmp <- getNewRegNat pk
4377     let
4378         code__2 dst = code `snocOL` instr (RIReg src) dst
4379     return (Any pk code__2)
4380
4381 -------------
4382 trivialUFCode pk instr x = do
4383     (src, code) <- getSomeReg x
4384     tmp <- getNewRegNat pk
4385     let
4386         code__2 dst = code `snocOL` instr src dst
4387     return (Any pk code__2)
4388
4389 #endif /* sparc_TARGET_ARCH */
4390
4391 #if powerpc_TARGET_ARCH
4392
4393 {-
4394 Wolfgang's PowerPC version of The Rules:
4395
4396 A slightly modified version of The Rules to take advantage of the fact
4397 that PowerPC instructions work on all registers and don't implicitly
4398 clobber any fixed registers.
4399
4400 * The only expression for which getRegister returns Fixed is (CmmReg reg).
4401
4402 * If getRegister returns Any, then the code it generates may modify only:
4403         (a) fresh temporaries
4404         (b) the destination register
4405   It may *not* modify global registers, unless the global
4406   register happens to be the destination register.
4407   It may not clobber any other registers. In fact, only ccalls clobber any
4408   fixed registers.
4409   Also, it may not modify the counter register (used by genCCall).
4410
4411   Corollary: If a getRegister for a subexpression returns Fixed, you need
4412   not move it to a fresh temporary before evaluating the next subexpression.
4413   The Fixed register won't be modified.
4414   Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
4415
4416 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
4417   the value of the destination register.
4418 -}
4419
4420 trivialCode rep signed instr x (CmmLit (CmmInt y _))
4421     | Just imm <- makeImmediate rep signed y
4422     = do
4423         (src1, code1) <- getSomeReg x
4424         let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
4425         return (Any rep code)
4426
4427 trivialCode rep signed instr x y = do
4428     (src1, code1) <- getSomeReg x
4429     (src2, code2) <- getSomeReg y
4430     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
4431     return (Any rep code)
4432
4433 trivialCodeNoImm :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4434     -> CmmExpr -> CmmExpr -> NatM Register
4435 trivialCodeNoImm rep instr x y = do
4436     (src1, code1) <- getSomeReg x
4437     (src2, code2) <- getSomeReg y
4438     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
4439     return (Any rep code)
4440
4441 trivialUCode rep instr x = do
4442     (src, code) <- getSomeReg x
4443     let code' dst = code `snocOL` instr dst src
4444     return (Any rep code')
4445
4446 -- There is no "remainder" instruction on the PPC, so we have to do
4447 -- it the hard way.
4448 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
4449
4450 remainderCode :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4451     -> CmmExpr -> CmmExpr -> NatM Register
4452 remainderCode rep div x y = do
4453     (src1, code1) <- getSomeReg x
4454     (src2, code2) <- getSomeReg y
4455     let code dst = code1 `appOL` code2 `appOL` toOL [
4456                 div dst src1 src2,
4457                 MULLW dst dst (RIReg src2),
4458                 SUBF dst dst src1
4459             ]
4460     return (Any rep code)
4461
4462 #endif /* powerpc_TARGET_ARCH */
4463
4464
4465 -- -----------------------------------------------------------------------------
4466 --  Coercing to/from integer/floating-point...
4467
4468 -- @coerce(Int2FP|FP2Int)@ are more complicated integer/float
4469 -- conversions.  We have to store temporaries in memory to move
4470 -- between the integer and the floating point register sets.
4471
4472 -- @coerceDbl2Flt@ and @coerceFlt2Dbl@ are done this way because we
4473 -- pretend, on sparc at least, that double and float regs are seperate
4474 -- kinds, so the value has to be computed into one kind before being
4475 -- explicitly "converted" to live in the other kind.
4476
4477 coerceInt2FP :: MachRep -> MachRep -> CmmExpr -> NatM Register
4478 coerceFP2Int :: MachRep -> MachRep -> CmmExpr -> NatM Register
4479
4480 #if sparc_TARGET_ARCH
4481 coerceDbl2Flt :: CmmExpr -> NatM Register
4482 coerceFlt2Dbl :: CmmExpr -> NatM Register
4483 #endif
4484
4485 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4486
4487 #if alpha_TARGET_ARCH
4488
4489 coerceInt2FP _ x
4490   = getRegister x               `thenNat` \ register ->
4491     getNewRegNat IntRep         `thenNat` \ reg ->
4492     let
4493         code = registerCode register reg
4494         src  = registerName register reg
4495
4496         code__2 dst = code . mkSeqInstrs [
4497             ST Q src (spRel 0),
4498             LD TF dst (spRel 0),
4499             CVTxy Q TF dst dst]
4500     in
4501     return (Any F64 code__2)
4502
4503 -------------
4504 coerceFP2Int x
4505   = getRegister x               `thenNat` \ register ->
4506     getNewRegNat F64    `thenNat` \ tmp ->
4507     let
4508         code = registerCode register tmp
4509         src  = registerName register tmp
4510
4511         code__2 dst = code . mkSeqInstrs [
4512             CVTxy TF Q src tmp,
4513             ST TF tmp (spRel 0),
4514             LD Q dst (spRel 0)]
4515     in
4516     return (Any IntRep code__2)
4517
4518 #endif /* alpha_TARGET_ARCH */
4519
4520 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4521
4522 #if i386_TARGET_ARCH
4523
4524 coerceInt2FP from to x = do
4525   (x_reg, x_code) <- getSomeReg x
4526   let
4527         opc  = case to of F32 -> GITOF; F64 -> GITOD
4528         code dst = x_code `snocOL` opc x_reg dst
4529         -- ToDo: works for non-I32 reps?
4530   -- in
4531   return (Any to code)
4532
4533 ------------
4534
4535 coerceFP2Int from to x = do
4536   (x_reg, x_code) <- getSomeReg x
4537   let
4538         opc  = case from of F32 -> GFTOI; F64 -> GDTOI
4539         code dst = x_code `snocOL` opc x_reg dst
4540         -- ToDo: works for non-I32 reps?
4541   -- in
4542   return (Any to code)
4543
4544 #endif /* i386_TARGET_ARCH */
4545
4546 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4547
4548 #if x86_64_TARGET_ARCH
4549
4550 coerceFP2Int from to x = do
4551   (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
4552   let
4553         opc  = case from of F32 -> CVTSS2SI; F64 -> CVTSD2SI
4554         code dst = x_code `snocOL` opc x_op dst
4555   -- in
4556   return (Any to code) -- works even if the destination rep is <I32
4557
4558 coerceInt2FP from to x = do
4559   (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
4560   let
4561         opc  = case to of F32 -> CVTSI2SS; F64 -> CVTSI2SD
4562         code dst = x_code `snocOL` opc x_op dst
4563   -- in
4564   return (Any to code) -- works even if the destination rep is <I32
4565
4566 coerceFP2FP :: MachRep -> CmmExpr -> NatM Register
4567 coerceFP2FP to x = do
4568   (x_reg, x_code) <- getSomeReg x
4569   let
4570         opc  = case to of F32 -> CVTSD2SS; F64 -> CVTSS2SD
4571         code dst = x_code `snocOL` opc x_reg dst
4572   -- in
4573   return (Any to code)
4574
4575 #endif
4576
4577 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4578
4579 #if sparc_TARGET_ARCH
4580
4581 coerceInt2FP pk1 pk2 x = do
4582     (src, code) <- getSomeReg x
4583     let
4584         code__2 dst = code `appOL` toOL [
4585             ST pk1 src (spRel (-2)),
4586             LD pk1 (spRel (-2)) dst,
4587             FxTOy pk1 pk2 dst dst]
4588     return (Any pk2 code__2)
4589
4590 ------------
4591 coerceFP2Int pk fprep x = do
4592     (src, code) <- getSomeReg x
4593     reg <- getNewRegNat fprep
4594     tmp <- getNewRegNat pk
4595     let
4596         code__2 dst = ASSERT(fprep == F64 || fprep == F32)
4597             code `appOL` toOL [
4598             FxTOy fprep pk src tmp,
4599             ST pk tmp (spRel (-2)),
4600             LD pk (spRel (-2)) dst]
4601     return (Any pk code__2)
4602
4603 ------------
4604 coerceDbl2Flt x = do
4605     (src, code) <- getSomeReg x
4606     return (Any F32 (\dst -> code `snocOL` FxTOy F64 F32 src dst))
4607
4608 ------------
4609 coerceFlt2Dbl x = do
4610     (src, code) <- getSomeReg x
4611     return (Any F64 (\dst -> code `snocOL` FxTOy F32 F64 src dst))
4612
4613 #endif /* sparc_TARGET_ARCH */
4614
4615 #if powerpc_TARGET_ARCH
4616 coerceInt2FP fromRep toRep x = do
4617     (src, code) <- getSomeReg x
4618     lbl <- getNewLabelNat
4619     itmp <- getNewRegNat I32
4620     ftmp <- getNewRegNat F64
4621     dynRef <- cmmMakeDynamicReference addImportNat False lbl
4622     Amode addr addr_code <- getAmode dynRef
4623     let
4624         code' dst = code `appOL` maybe_exts `appOL` toOL [
4625                 LDATA ReadOnlyData
4626                                 [CmmDataLabel lbl,
4627                                  CmmStaticLit (CmmInt 0x43300000 I32),
4628                                  CmmStaticLit (CmmInt 0x80000000 I32)],
4629                 XORIS itmp src (ImmInt 0x8000),
4630                 ST I32 itmp (spRel 3),
4631                 LIS itmp (ImmInt 0x4330),
4632                 ST I32 itmp (spRel 2),
4633                 LD F64 ftmp (spRel 2)
4634             ] `appOL` addr_code `appOL` toOL [
4635                 LD F64 dst addr,
4636                 FSUB F64 dst ftmp dst
4637             ] `appOL` maybe_frsp dst
4638
4639         maybe_exts = case fromRep of
4640                         I8 ->  unitOL $ EXTS I8 src src
4641                         I16 -> unitOL $ EXTS I16 src src
4642                         I32 -> nilOL
4643         maybe_frsp dst = case toRep of
4644                         F32 -> unitOL $ FRSP dst dst
4645                         F64 -> nilOL
4646     return (Any toRep code')
4647
4648 coerceFP2Int fromRep toRep x = do
4649     -- the reps don't really matter: F*->F64 and I32->I* are no-ops
4650     (src, code) <- getSomeReg x
4651     tmp <- getNewRegNat F64
4652     let
4653         code' dst = code `appOL` toOL [
4654                 -- convert to int in FP reg
4655             FCTIWZ tmp src,
4656                 -- store value (64bit) from FP to stack
4657             ST F64 tmp (spRel 2),
4658                 -- read low word of value (high word is undefined)
4659             LD I32 dst (spRel 3)]
4660     return (Any toRep code')
4661 #endif /* powerpc_TARGET_ARCH */
4662
4663
4664 -- -----------------------------------------------------------------------------
4665 -- eXTRA_STK_ARGS_HERE
4666
4667 -- We (allegedly) put the first six C-call arguments in registers;
4668 -- where do we start putting the rest of them?
4669
4670 -- Moved from MachInstrs (SDM):
4671
4672 #if alpha_TARGET_ARCH || sparc_TARGET_ARCH
4673 eXTRA_STK_ARGS_HERE :: Int
4674 eXTRA_STK_ARGS_HERE
4675   = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???))
4676 #endif
4677