ghc/compiler/nativeGen/MachCodeGen.hs

   1 -----------------------------------------------------------------------------
   2 --
   3 -- Generating machine code (instruction selection)
   4 --
   5 -- (c) The University of Glasgow 1996-2004
   6 --
   7 -----------------------------------------------------------------------------
   8
   9 -- This is a big module, but, if you pay attention to
  10 -- (a) the sectioning, (b) the type signatures, and
  11 -- (c) the #if blah_TARGET_ARCH} things, the
  12 -- structure should not be too overwhelming.
  13
  14 module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where
  15
  16 #include "HsVersions.h"
  17 #include "nativeGen/NCG.h"
  18 #include "MachDeps.h"
  19
  20 -- NCG stuff:
  21 import MachInstrs
  22 import MachRegs
  23 import NCGMonad
  24 import PositionIndependentCode ( cmmMakeDynamicReference, initializePicBase )
  25
  26 -- Our intermediate code:
  27 import PprCmm           ( pprExpr )
  28 import Cmm
  29 import MachOp
  30 import CLabel
  31
  32 -- The rest:
  33 import StaticFlags      ( opt_PIC )
  34 import ForeignCall      ( CCallConv(..) )
  35 import OrdList
  36 import Pretty
  37 import Outputable
  38 import FastString
  39 import FastTypes        ( isFastTrue )
  40 import Constants        ( wORD_SIZE )
  41
  42 #ifdef DEBUG
  43 import Outputable       ( assertPanic )
  44 import TRACE            ( trace )
  45 #endif
  46
  47 import Control.Monad    ( mapAndUnzipM )
  48 import Maybe            ( fromJust )
  49 import DATA_BITS
  50 import DATA_WORD
  51
  52 -- -----------------------------------------------------------------------------
  53 -- Top-level of the instruction selector
  54
  55 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
  56 -- They are really trees of insns to facilitate fast appending, where a
  57 -- left-to-right traversal (pre-order?) yields the insns in the correct
  58 -- order.
  59
  60 type InstrBlock = OrdList Instr
  61
  62 cmmTopCodeGen :: CmmTop -> NatM [NatCmmTop]
  63 cmmTopCodeGen (CmmProc info lab params blocks) = do
  64   (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
  65   picBaseMb <- getPicBaseMaybeNat
  66   let proc = CmmProc info lab params (concat nat_blocks)
  67       tops = proc : concat statics
  68   case picBaseMb of
  69       Just picBase -> initializePicBase picBase tops
  70       Nothing -> return tops
  71
  72 cmmTopCodeGen (CmmData sec dat) = do
  73   return [CmmData sec dat]  -- no translation, we just use CmmStatic
  74
  75 basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop])
  76 basicBlockCodeGen (BasicBlock id stmts) = do
  77   instrs <- stmtsToInstrs stmts
  78   -- code generation may introduce new basic block boundaries, which
  79   -- are indicated by the NEWBLOCK instruction.  We must split up the
  80   -- instruction stream into basic blocks again.  Also, we extract
  81   -- LDATAs here too.
  82   let
  83         (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
  84
  85         mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
  86           = ([], BasicBlock id instrs : blocks, statics)
  87         mkBlocks (LDATA sec dat) (instrs,blocks,statics)
  88           = (instrs, blocks, CmmData sec dat:statics)
  89         mkBlocks instr (instrs,blocks,statics)
  90           = (instr:instrs, blocks, statics)
  91   -- in
  92   return (BasicBlock id top : other_blocks, statics)
  93
  94 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
  95 stmtsToInstrs stmts
  96    = do instrss <- mapM stmtToInstrs stmts
  97         return (concatOL instrss)
  98
  99 stmtToInstrs :: CmmStmt -> NatM InstrBlock
 100 stmtToInstrs stmt = case stmt of
 101     CmmNop         -> return nilOL
 102     CmmComment s   -> return (unitOL (COMMENT s))
 103
 104     CmmAssign reg src
 105       | isFloatingRep kind -> assignReg_FltCode kind reg src
 106 #if WORD_SIZE_IN_BITS==32
 107       | kind == I64        -> assignReg_I64Code      reg src
 108 #endif
 109       | otherwise          -> assignReg_IntCode kind reg src
 110         where kind = cmmRegRep reg
 111
 112     CmmStore addr src
 113       | isFloatingRep kind -> assignMem_FltCode kind addr src
 114 #if WORD_SIZE_IN_BITS==32
 115       | kind == I64      -> assignMem_I64Code      addr src
 116 #endif
 117       | otherwise        -> assignMem_IntCode kind addr src
 118         where kind = cmmExprRep src
 119
 120     CmmCall target result_regs args vols
 121        -> genCCall target result_regs args vols
 122
 123     CmmBranch id          -> genBranch id
 124     CmmCondBranch arg id  -> genCondJump id arg
 125     CmmSwitch arg ids     -> genSwitch arg ids
 126     CmmJump arg params    -> genJump arg
 127
 128 -- -----------------------------------------------------------------------------
 129 -- General things for putting together code sequences
 130
 131 -- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
 132 -- CmmExprs into CmmRegOff?
 133 mangleIndexTree :: CmmExpr -> CmmExpr
 134 mangleIndexTree (CmmRegOff reg off)
 135   = CmmMachOp (MO_Add rep) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) rep)]
 136   where rep = cmmRegRep reg
 137
 138 -- -----------------------------------------------------------------------------
 139 --  Code gen for 64-bit arithmetic on 32-bit platforms
 140
 141 {-
 142 Simple support for generating 64-bit code (ie, 64 bit values and 64
 143 bit assignments) on 32-bit platforms.  Unlike the main code generator
 144 we merely shoot for generating working code as simply as possible, and
 145 pay little attention to code quality.  Specifically, there is no
 146 attempt to deal cleverly with the fixed-vs-floating register
 147 distinction; all values are generated into (pairs of) floating
 148 registers, even if this would mean some redundant reg-reg moves as a
 149 result.  Only one of the VRegUniques is returned, since it will be
 150 of the VRegUniqueLo form, and the upper-half VReg can be determined
 151 by applying getHiVRegFromLo to it.
 152 -}
 153
 154 data ChildCode64        -- a.k.a "Register64"
 155    = ChildCode64
 156         InstrBlock      -- code
 157         Reg             -- the lower 32-bit temporary which contains the
 158                         -- result; use getHiVRegFromLo to find the other
 159                         -- VRegUnique.  Rules of this simplified insn
 160                         -- selection game are therefore that the returned
 161                         -- Reg may be modified
 162
 163 #if WORD_SIZE_IN_BITS==32
 164 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
 165 assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
 166 #endif
 167
 168 #ifndef x86_64_TARGET_ARCH
 169 iselExpr64        :: CmmExpr -> NatM ChildCode64
 170 #endif
 171
 172 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 173
 174 #if i386_TARGET_ARCH
 175
 176 assignMem_I64Code addrTree valueTree = do
 177   Amode addr addr_code <- getAmode addrTree
 178   ChildCode64 vcode rlo <- iselExpr64 valueTree
 179   let
 180         rhi = getHiVRegFromLo rlo
 181
 182         -- Little-endian store
 183         mov_lo = MOV I32 (OpReg rlo) (OpAddr addr)
 184         mov_hi = MOV I32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
 185   -- in
 186   return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 187
 188
 189 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
 190    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 191    let
 192          r_dst_lo = mkVReg u_dst I32
 193          r_dst_hi = getHiVRegFromLo r_dst_lo
 194          r_src_hi = getHiVRegFromLo r_src_lo
 195          mov_lo = MOV I32 (OpReg r_src_lo) (OpReg r_dst_lo)
 196          mov_hi = MOV I32 (OpReg r_src_hi) (OpReg r_dst_hi)
 197    -- in
 198    return (
 199         vcode `snocOL` mov_lo `snocOL` mov_hi
 200      )
 201
 202 assignReg_I64Code lvalue valueTree
 203    = panic "assignReg_I64Code(i386): invalid lvalue"
 204
 205 ------------
 206
 207 iselExpr64 (CmmLit (CmmInt i _)) = do
 208   (rlo,rhi) <- getNewRegPairNat I32
 209   let
 210         r = fromIntegral (fromIntegral i :: Word32)
 211         q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
 212         code = toOL [
 213                 MOV I32 (OpImm (ImmInteger r)) (OpReg rlo),
 214                 MOV I32 (OpImm (ImmInteger q)) (OpReg rhi)
 215                 ]
 216   -- in
 217   return (ChildCode64 code rlo)
 218
 219 iselExpr64 (CmmLoad addrTree I64) = do
 220    Amode addr addr_code <- getAmode addrTree
 221    (rlo,rhi) <- getNewRegPairNat I32
 222    let
 223         mov_lo = MOV I32 (OpAddr addr) (OpReg rlo)
 224         mov_hi = MOV I32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
 225    -- in
 226    return (
 227             ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
 228                         rlo
 229      )
 230
 231 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64)))
 232    = return (ChildCode64 nilOL (mkVReg vu I32))
 233
 234 -- we handle addition, but rather badly
 235 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
 236    ChildCode64 code1 r1lo <- iselExpr64 e1
 237    (rlo,rhi) <- getNewRegPairNat I32
 238    let
 239         r = fromIntegral (fromIntegral i :: Word32)
 240         q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
 241         r1hi = getHiVRegFromLo r1lo
 242         code =  code1 `appOL`
 243                 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
 244                        ADD I32 (OpImm (ImmInteger r)) (OpReg rlo),
 245                        MOV I32 (OpReg r1hi) (OpReg rhi),
 246                        ADC I32 (OpImm (ImmInteger q)) (OpReg rhi) ]
 247    -- in
 248    return (ChildCode64 code rlo)
 249
 250 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
 251    ChildCode64 code1 r1lo <- iselExpr64 e1
 252    ChildCode64 code2 r2lo <- iselExpr64 e2
 253    (rlo,rhi) <- getNewRegPairNat I32
 254    let
 255         r1hi = getHiVRegFromLo r1lo
 256         r2hi = getHiVRegFromLo r2lo
 257         code =  code1 `appOL`
 258                 code2 `appOL`
 259                 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
 260                        ADD I32 (OpReg r2lo) (OpReg rlo),
 261                        MOV I32 (OpReg r1hi) (OpReg rhi),
 262                        ADC I32 (OpReg r2hi) (OpReg rhi) ]
 263    -- in
 264    return (ChildCode64 code rlo)
 265
 266 iselExpr64 expr
 267    = pprPanic "iselExpr64(i386)" (ppr expr)
 268
 269 #endif /* i386_TARGET_ARCH */
 270
 271 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 272
 273 #if sparc_TARGET_ARCH
 274
 275 assignMem_I64Code addrTree valueTree
 276    = iselExpr64 valueTree               `thenNat` \ (ChildCode64 vcode vrlo) ->
 277      getRegister addrTree               `thenNat` \ register_addr ->
 278      getNewRegNat IntRep                `thenNat` \ t_addr ->
 279      let rlo = VirtualRegI vrlo
 280          rhi = getHiVRegFromLo rlo
 281          code_addr = registerCode register_addr t_addr
 282          reg_addr  = registerName register_addr t_addr
 283          -- Big-endian store
 284          mov_hi = ST W rhi (AddrRegImm reg_addr (ImmInt 0))
 285          mov_lo = ST W rlo (AddrRegImm reg_addr (ImmInt 4))
 286      in
 287          return (vcode `appOL` code_addr `snocOL` mov_hi `snocOL` mov_lo)
 288
 289
 290 assignReg_I64Code (StixTemp (StixVReg u_dst pk)) valueTree
 291    = iselExpr64 valueTree               `thenNat` \ (ChildCode64 vcode vr_src_lo) ->
 292      let
 293          r_dst_lo = mkVReg u_dst IntRep
 294          r_src_lo = VirtualRegI vr_src_lo
 295          r_dst_hi = getHiVRegFromLo r_dst_lo
 296          r_src_hi = getHiVRegFromLo r_src_lo
 297          mov_lo = mkMOV r_src_lo r_dst_lo
 298          mov_hi = mkMOV r_src_hi r_dst_hi
 299          mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
 300      in
 301          return (
 302             vcode `snocOL` mov_hi `snocOL` mov_lo
 303          )
 304 assignReg_I64Code lvalue valueTree
 305    = pprPanic "assignReg_I64Code(sparc): invalid lvalue"
 306               (pprStixReg lvalue)
 307
 308
 309 -- Don't delete this -- it's very handy for debugging.
 310 --iselExpr64 expr
 311 --   | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
 312 --   = panic "iselExpr64(???)"
 313
 314 iselExpr64 (CmmLoad I64 addrTree)
 315    = getRegister addrTree               `thenNat` \ register_addr ->
 316      getNewRegNat IntRep                `thenNat` \ t_addr ->
 317      getNewRegNat IntRep                `thenNat` \ rlo ->
 318      let rhi = getHiVRegFromLo rlo
 319          code_addr = registerCode register_addr t_addr
 320          reg_addr  = registerName register_addr t_addr
 321          mov_hi = LD W (AddrRegImm reg_addr (ImmInt 0)) rhi
 322          mov_lo = LD W (AddrRegImm reg_addr (ImmInt 4)) rlo
 323      in
 324          return (
 325             ChildCode64 (code_addr `snocOL` mov_hi `snocOL` mov_lo)
 326                         (getVRegUnique rlo)
 327          )
 328
 329 iselExpr64 (CmmReg (CmmLocal (LocalReg uq I64)))
 330    = getNewRegNat IntRep                `thenNat` \ r_dst_lo ->
 331      let r_dst_hi = getHiVRegFromLo r_dst_lo
 332          r_src_lo = mkVReg vu IntRep
 333          r_src_hi = getHiVRegFromLo r_src_lo
 334          mov_lo = mkMOV r_src_lo r_dst_lo
 335          mov_hi = mkMOV r_src_hi r_dst_hi
 336          mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
 337      in
 338          return (
 339             ChildCode64 (toOL [mov_hi, mov_lo]) (getVRegUnique r_dst_lo)
 340          )
 341
 342 iselExpr64 (StCall fn cconv I64 args)
 343   = genCCall fn cconv kind args                 `thenNat` \ call ->
 344     getNewRegNat IntRep                         `thenNat` \ r_dst_lo ->
 345     let r_dst_hi = getHiVRegFromLo r_dst_lo
 346         mov_lo = mkMOV o0 r_dst_lo
 347         mov_hi = mkMOV o1 r_dst_hi
 348         mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
 349     in
 350     return (
 351        ChildCode64 (call `snocOL` mov_hi `snocOL` mov_lo)
 352                    (getVRegUnique r_dst_lo)
 353     )
 354
 355 iselExpr64 expr
 356    = pprPanic "iselExpr64(sparc)" (pprCmmExpr expr)
 357
 358 #endif /* sparc_TARGET_ARCH */
 359
 360 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 361
 362 #if powerpc_TARGET_ARCH
 363
 364 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
 365 getI64Amodes addrTree = do
 366     Amode hi_addr addr_code <- getAmode addrTree
 367     case addrOffset hi_addr 4 of
 368         Just lo_addr -> return (hi_addr, lo_addr, addr_code)
 369         Nothing      -> do (hi_ptr, code) <- getSomeReg addrTree
 370                            return (AddrRegImm hi_ptr (ImmInt 0),
 371                                    AddrRegImm hi_ptr (ImmInt 4),
 372                                    code)
 373
 374 assignMem_I64Code addrTree valueTree = do
 375         (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
 376         ChildCode64 vcode rlo <- iselExpr64 valueTree
 377         let
 378                 rhi = getHiVRegFromLo rlo
 379
 380                 -- Big-endian store
 381                 mov_hi = ST I32 rhi hi_addr
 382                 mov_lo = ST I32 rlo lo_addr
 383         -- in
 384         return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 385
 386 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
 387    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 388    let
 389          r_dst_lo = mkVReg u_dst I32
 390          r_dst_hi = getHiVRegFromLo r_dst_lo
 391          r_src_hi = getHiVRegFromLo r_src_lo
 392          mov_lo = MR r_dst_lo r_src_lo
 393          mov_hi = MR r_dst_hi r_src_hi
 394    -- in
 395    return (
 396         vcode `snocOL` mov_lo `snocOL` mov_hi
 397      )
 398
 399 assignReg_I64Code lvalue valueTree
 400    = panic "assignReg_I64Code(powerpc): invalid lvalue"
 401
 402
 403 -- Don't delete this -- it's very handy for debugging.
 404 --iselExpr64 expr
 405 --   | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
 406 --   = panic "iselExpr64(???)"
 407
 408 iselExpr64 (CmmLoad addrTree I64) = do
 409     (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
 410     (rlo, rhi) <- getNewRegPairNat I32
 411     let mov_hi = LD I32 rhi hi_addr
 412         mov_lo = LD I32 rlo lo_addr
 413     return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
 414                          rlo
 415
 416 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64)))
 417    = return (ChildCode64 nilOL (mkVReg vu I32))
 418
 419 iselExpr64 (CmmLit (CmmInt i _)) = do
 420   (rlo,rhi) <- getNewRegPairNat I32
 421   let
 422         half0 = fromIntegral (fromIntegral i :: Word16)
 423         half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
 424         half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
 425         half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
 426
 427         code = toOL [
 428                 LIS rlo (ImmInt half1),
 429                 OR rlo rlo (RIImm $ ImmInt half0),
 430                 LIS rhi (ImmInt half3),
 431                 OR rlo rlo (RIImm $ ImmInt half2)
 432                 ]
 433   -- in
 434   return (ChildCode64 code rlo)
 435
 436 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
 437    ChildCode64 code1 r1lo <- iselExpr64 e1
 438    ChildCode64 code2 r2lo <- iselExpr64 e2
 439    (rlo,rhi) <- getNewRegPairNat I32
 440    let
 441         r1hi = getHiVRegFromLo r1lo
 442         r2hi = getHiVRegFromLo r2lo
 443         code =  code1 `appOL`
 444                 code2 `appOL`
 445                 toOL [ ADDC rlo r1lo r2lo,
 446                        ADDE rhi r1hi r2hi ]
 447    -- in
 448    return (ChildCode64 code rlo)
 449
 450 iselExpr64 expr
 451    = pprPanic "iselExpr64(powerpc)" (ppr expr)
 452
 453 #endif /* powerpc_TARGET_ARCH */
 454
 455
 456 -- -----------------------------------------------------------------------------
 457 -- The 'Register' type
 458
 459 -- 'Register's passed up the tree.  If the stix code forces the register
 460 -- to live in a pre-decided machine register, it comes out as @Fixed@;
 461 -- otherwise, it comes out as @Any@, and the parent can decide which
 462 -- register to put it in.
 463
 464 data Register
 465   = Fixed   MachRep Reg InstrBlock
 466   | Any     MachRep (Reg -> InstrBlock)
 467
 468 swizzleRegisterRep :: Register -> MachRep -> Register
 469 swizzleRegisterRep (Fixed _ reg code) rep = Fixed rep reg code
 470 swizzleRegisterRep (Any _ codefn)     rep = Any rep codefn
 471
 472
 473 -- -----------------------------------------------------------------------------
 474 -- Utils based on getRegister, below
 475
 476 -- The dual to getAnyReg: compute an expression into a register, but
 477 -- we don't mind which one it is.
 478 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
 479 getSomeReg expr = do
 480   r <- getRegister expr
 481   case r of
 482     Any rep code -> do
 483         tmp <- getNewRegNat rep
 484         return (tmp, code tmp)
 485     Fixed _ reg code ->
 486         return (reg, code)
 487
 488 -- -----------------------------------------------------------------------------
 489 -- Grab the Reg for a CmmReg
 490
 491 getRegisterReg :: CmmReg -> Reg
 492
 493 getRegisterReg (CmmLocal (LocalReg u pk))
 494   = mkVReg u pk
 495
 496 getRegisterReg (CmmGlobal mid)
 497   = case get_GlobalReg_reg_or_addr mid of
 498        Left (RealReg rrno) -> RealReg rrno
 499        _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
 500           -- By this stage, the only MagicIds remaining should be the
 501           -- ones which map to a real machine register on this
 502           -- platform.  Hence ...
 503
 504
 505 -- -----------------------------------------------------------------------------
 506 -- Generate code to get a subtree into a Register
 507
 508 -- Don't delete this -- it's very handy for debugging.
 509 --getRegister expr
 510 --   | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False
 511 --   = panic "getRegister(???)"
 512
 513 getRegister :: CmmExpr -> NatM Register
 514
 515 getRegister (CmmReg (CmmGlobal PicBaseReg))
 516   = do
 517       reg <- getPicBaseNat wordRep
 518       return (Fixed wordRep reg nilOL)
 519
 520 getRegister (CmmReg reg)
 521   = return (Fixed (cmmRegRep reg) (getRegisterReg reg) nilOL)
 522
 523 getRegister tree@(CmmRegOff _ _)
 524   = getRegister (mangleIndexTree tree)
 525
 526 -- end of machine-"independent" bit; here we go on the rest...
 527
 528 #if alpha_TARGET_ARCH
 529
 530 getRegister (StDouble d)
 531   = getBlockIdNat                   `thenNat` \ lbl ->
 532     getNewRegNat PtrRep             `thenNat` \ tmp ->
 533     let code dst = mkSeqInstrs [
 534             LDATA RoDataSegment lbl [
 535                     DATA TF [ImmLab (rational d)]
 536                 ],
 537             LDA tmp (AddrImm (ImmCLbl lbl)),
 538             LD TF dst (AddrReg tmp)]
 539     in
 540         return (Any F64 code)
 541
 542 getRegister (StPrim primop [x]) -- unary PrimOps
 543   = case primop of
 544       IntNegOp -> trivialUCode (NEG Q False) x
 545
 546       NotOp    -> trivialUCode NOT x
 547
 548       FloatNegOp  -> trivialUFCode FloatRep  (FNEG TF) x
 549       DoubleNegOp -> trivialUFCode F64 (FNEG TF) x
 550
 551       OrdOp -> coerceIntCode IntRep x
 552       ChrOp -> chrCode x
 553
 554       Float2IntOp  -> coerceFP2Int    x
 555       Int2FloatOp  -> coerceInt2FP pr x
 556       Double2IntOp -> coerceFP2Int    x
 557       Int2DoubleOp -> coerceInt2FP pr x
 558
 559       Double2FloatOp -> coerceFltCode x
 560       Float2DoubleOp -> coerceFltCode x
 561
 562       other_op -> getRegister (StCall fn CCallConv F64 [x])
 563         where
 564           fn = case other_op of
 565                  FloatExpOp    -> FSLIT("exp")
 566                  FloatLogOp    -> FSLIT("log")
 567                  FloatSqrtOp   -> FSLIT("sqrt")
 568                  FloatSinOp    -> FSLIT("sin")
 569                  FloatCosOp    -> FSLIT("cos")
 570                  FloatTanOp    -> FSLIT("tan")
 571                  FloatAsinOp   -> FSLIT("asin")
 572                  FloatAcosOp   -> FSLIT("acos")
 573                  FloatAtanOp   -> FSLIT("atan")
 574                  FloatSinhOp   -> FSLIT("sinh")
 575                  FloatCoshOp   -> FSLIT("cosh")
 576                  FloatTanhOp   -> FSLIT("tanh")
 577                  DoubleExpOp   -> FSLIT("exp")
 578                  DoubleLogOp   -> FSLIT("log")
 579                  DoubleSqrtOp  -> FSLIT("sqrt")
 580                  DoubleSinOp   -> FSLIT("sin")
 581                  DoubleCosOp   -> FSLIT("cos")
 582                  DoubleTanOp   -> FSLIT("tan")
 583                  DoubleAsinOp  -> FSLIT("asin")
 584                  DoubleAcosOp  -> FSLIT("acos")
 585                  DoubleAtanOp  -> FSLIT("atan")
 586                  DoubleSinhOp  -> FSLIT("sinh")
 587                  DoubleCoshOp  -> FSLIT("cosh")
 588                  DoubleTanhOp  -> FSLIT("tanh")
 589   where
 590     pr = panic "MachCode.getRegister: no primrep needed for Alpha"
 591
 592 getRegister (StPrim primop [x, y]) -- dyadic PrimOps
 593   = case primop of
 594       CharGtOp -> trivialCode (CMP LTT) y x
 595       CharGeOp -> trivialCode (CMP LE) y x
 596       CharEqOp -> trivialCode (CMP EQQ) x y
 597       CharNeOp -> int_NE_code x y
 598       CharLtOp -> trivialCode (CMP LTT) x y
 599       CharLeOp -> trivialCode (CMP LE) x y
 600
 601       IntGtOp  -> trivialCode (CMP LTT) y x
 602       IntGeOp  -> trivialCode (CMP LE) y x
 603       IntEqOp  -> trivialCode (CMP EQQ) x y
 604       IntNeOp  -> int_NE_code x y
 605       IntLtOp  -> trivialCode (CMP LTT) x y
 606       IntLeOp  -> trivialCode (CMP LE) x y
 607
 608       WordGtOp -> trivialCode (CMP ULT) y x
 609       WordGeOp -> trivialCode (CMP ULE) x y
 610       WordEqOp -> trivialCode (CMP EQQ)  x y
 611       WordNeOp -> int_NE_code x y
 612       WordLtOp -> trivialCode (CMP ULT) x y
 613       WordLeOp -> trivialCode (CMP ULE) x y
 614
 615       AddrGtOp -> trivialCode (CMP ULT) y x
 616       AddrGeOp -> trivialCode (CMP ULE) y x
 617       AddrEqOp -> trivialCode (CMP EQQ)  x y
 618       AddrNeOp -> int_NE_code x y
 619       AddrLtOp -> trivialCode (CMP ULT) x y
 620       AddrLeOp -> trivialCode (CMP ULE) x y
 621
 622       FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
 623       FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
 624       FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
 625       FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
 626       FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
 627       FloatLeOp -> cmpF_code (FCMP TF LE) NE x y
 628
 629       DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
 630       DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
 631       DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
 632       DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
 633       DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
 634       DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y
 635
 636       IntAddOp  -> trivialCode (ADD Q False) x y
 637       IntSubOp  -> trivialCode (SUB Q False) x y
 638       IntMulOp  -> trivialCode (MUL Q False) x y
 639       IntQuotOp -> trivialCode (DIV Q False) x y
 640       IntRemOp  -> trivialCode (REM Q False) x y
 641
 642       WordAddOp  -> trivialCode (ADD Q False) x y
 643       WordSubOp  -> trivialCode (SUB Q False) x y
 644       WordMulOp  -> trivialCode (MUL Q False) x y
 645       WordQuotOp -> trivialCode (DIV Q True) x y
 646       WordRemOp  -> trivialCode (REM Q True) x y
 647
 648       FloatAddOp -> trivialFCode  FloatRep (FADD TF) x y
 649       FloatSubOp -> trivialFCode  FloatRep (FSUB TF) x y
 650       FloatMulOp -> trivialFCode  FloatRep (FMUL TF) x y
 651       FloatDivOp -> trivialFCode  FloatRep (FDIV TF) x y
 652
 653       DoubleAddOp -> trivialFCode  F64 (FADD TF) x y
 654       DoubleSubOp -> trivialFCode  F64 (FSUB TF) x y
 655       DoubleMulOp -> trivialFCode  F64 (FMUL TF) x y
 656       DoubleDivOp -> trivialFCode  F64 (FDIV TF) x y
 657
 658       AddrAddOp  -> trivialCode (ADD Q False) x y
 659       AddrSubOp  -> trivialCode (SUB Q False) x y
 660       AddrRemOp  -> trivialCode (REM Q True) x y
 661
 662       AndOp  -> trivialCode AND x y
 663       OrOp   -> trivialCode OR  x y
 664       XorOp  -> trivialCode XOR x y
 665       SllOp  -> trivialCode SLL x y
 666       SrlOp  -> trivialCode SRL x y
 667
 668       ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
 669       ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
 670       ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"
 671
 672       FloatPowerOp  -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
 673       DoublePowerOp -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
 674   where
 675     {- ------------------------------------------------------------
 676         Some bizarre special code for getting condition codes into
 677         registers.  Integer non-equality is a test for equality
 678         followed by an XOR with 1.  (Integer comparisons always set
 679         the result register to 0 or 1.)  Floating point comparisons of
 680         any kind leave the result in a floating point register, so we
 681         need to wrangle an integer register out of things.
 682     -}
 683     int_NE_code :: StixTree -> StixTree -> NatM Register
 684
 685     int_NE_code x y
 686       = trivialCode (CMP EQQ) x y       `thenNat` \ register ->
 687         getNewRegNat IntRep             `thenNat` \ tmp ->
 688         let
 689             code = registerCode register tmp
 690             src  = registerName register tmp
 691             code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
 692         in
 693         return (Any IntRep code__2)
 694
 695     {- ------------------------------------------------------------
 696         Comments for int_NE_code also apply to cmpF_code
 697     -}
 698     cmpF_code
 699         :: (Reg -> Reg -> Reg -> Instr)
 700         -> Cond
 701         -> StixTree -> StixTree
 702         -> NatM Register
 703
 704     cmpF_code instr cond x y
 705       = trivialFCode pr instr x y       `thenNat` \ register ->
 706         getNewRegNat F64                `thenNat` \ tmp ->
 707         getBlockIdNat                   `thenNat` \ lbl ->
 708         let
 709             code = registerCode register tmp
 710             result  = registerName register tmp
 711
 712             code__2 dst = code . mkSeqInstrs [
 713                 OR zeroh (RIImm (ImmInt 1)) dst,
 714                 BF cond  result (ImmCLbl lbl),
 715                 OR zeroh (RIReg zeroh) dst,
 716                 NEWBLOCK lbl]
 717         in
 718         return (Any IntRep code__2)
 719       where
 720         pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
 721       ------------------------------------------------------------
 722
 723 getRegister (CmmLoad pk mem)
 724   = getAmode mem                    `thenNat` \ amode ->
 725     let
 726         code = amodeCode amode
 727         src   = amodeAddr amode
 728         size = primRepToSize pk
 729         code__2 dst = code . mkSeqInstr (LD size dst src)
 730     in
 731     return (Any pk code__2)
 732
 733 getRegister (StInt i)
 734   | fits8Bits i
 735   = let
 736         code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
 737     in
 738     return (Any IntRep code)
 739   | otherwise
 740   = let
 741         code dst = mkSeqInstr (LDI Q dst src)
 742     in
 743     return (Any IntRep code)
 744   where
 745     src = ImmInt (fromInteger i)
 746
 747 getRegister leaf
 748   | isJust imm
 749   = let
 750         code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
 751     in
 752     return (Any PtrRep code)
 753   where
 754     imm = maybeImm leaf
 755     imm__2 = case imm of Just x -> x
 756
 757 #endif /* alpha_TARGET_ARCH */
 758
 759 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 760
 761 #if i386_TARGET_ARCH
 762
 763 getRegister (CmmLit (CmmFloat f F32)) = do
 764     lbl <- getNewLabelNat
 765     let code dst = toOL [
 766             LDATA ReadOnlyData
 767                         [CmmDataLabel lbl,
 768                          CmmStaticLit (CmmFloat f F32)],
 769             GLD F32 (ImmAddr (ImmCLbl lbl) 0) dst
 770             ]
 771     -- in
 772     return (Any F32 code)
 773
 774
 775 getRegister (CmmLit (CmmFloat d F64))
 776   | d == 0.0
 777   = let code dst = unitOL (GLDZ dst)
 778     in  return (Any F64 code)
 779
 780   | d == 1.0
 781   = let code dst = unitOL (GLD1 dst)
 782     in  return (Any F64 code)
 783
 784   | otherwise = do
 785     lbl <- getNewLabelNat
 786     let code dst = toOL [
 787             LDATA ReadOnlyData
 788                         [CmmDataLabel lbl,
 789                          CmmStaticLit (CmmFloat d F64)],
 790             GLD F64 (ImmAddr (ImmCLbl lbl) 0) dst
 791             ]
 792     -- in
 793     return (Any F64 code)
 794
 795 #endif /* i386_TARGET_ARCH */
 796
 797 #if x86_64_TARGET_ARCH
 798
 799 getRegister (CmmLit (CmmFloat 0.0 rep)) = do
 800    let code dst = unitOL  (XOR rep (OpReg dst) (OpReg dst))
 801         -- I don't know why there are xorpd, xorps, and pxor instructions.
 802         -- They all appear to do the same thing --SDM
 803    return (Any rep code)
 804
 805 getRegister (CmmLit (CmmFloat f rep)) = do
 806     lbl <- getNewLabelNat
 807     let code dst = toOL [
 808             LDATA ReadOnlyData
 809                         [CmmDataLabel lbl,
 810                          CmmStaticLit (CmmFloat f rep)],
 811             MOV rep (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 812             ]
 813     -- in
 814     return (Any rep code)
 815
 816 #endif /* x86_64_TARGET_ARCH */
 817
 818 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
 819
 820 -- catch simple cases of zero- or sign-extended load
 821 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad addr _]) = do
 822   code <- intLoadCode (MOVZxL I8) addr
 823   return (Any I32 code)
 824
 825 getRegister (CmmMachOp (MO_S_Conv I8 I32) [CmmLoad addr _]) = do
 826   code <- intLoadCode (MOVSxL I8) addr
 827   return (Any I32 code)
 828
 829 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad addr _]) = do
 830   code <- intLoadCode (MOVZxL I16) addr
 831   return (Any I32 code)
 832
 833 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad addr _]) = do
 834   code <- intLoadCode (MOVSxL I16) addr
 835   return (Any I32 code)
 836
 837 #endif
 838
 839 #if x86_64_TARGET_ARCH
 840
 841 -- catch simple cases of zero- or sign-extended load
 842 getRegister (CmmMachOp (MO_U_Conv I8 I64) [CmmLoad addr _]) = do
 843   code <- intLoadCode (MOVZxL I8) addr
 844   return (Any I64 code)
 845
 846 getRegister (CmmMachOp (MO_S_Conv I8 I64) [CmmLoad addr _]) = do
 847   code <- intLoadCode (MOVSxL I8) addr
 848   return (Any I64 code)
 849
 850 getRegister (CmmMachOp (MO_U_Conv I16 I64) [CmmLoad addr _]) = do
 851   code <- intLoadCode (MOVZxL I16) addr
 852   return (Any I64 code)
 853
 854 getRegister (CmmMachOp (MO_S_Conv I16 I64) [CmmLoad addr _]) = do
 855   code <- intLoadCode (MOVSxL I16) addr
 856   return (Any I64 code)
 857
 858 getRegister (CmmMachOp (MO_U_Conv I32 I64) [CmmLoad addr _]) = do
 859   code <- intLoadCode (MOV I32) addr -- 32-bit loads zero-extend
 860   return (Any I64 code)
 861
 862 getRegister (CmmMachOp (MO_S_Conv I32 I64) [CmmLoad addr _]) = do
 863   code <- intLoadCode (MOVSxL I32) addr
 864   return (Any I64 code)
 865
 866 #endif
 867
 868 #if x86_64_TARGET_ARCH
 869 getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do
 870   x_code <- getAnyReg x
 871   lbl <- getNewLabelNat
 872   let
 873     code dst = x_code dst `appOL` toOL [
 874         -- This is how gcc does it, so it can't be that bad:
 875         LDATA ReadOnlyData16 [
 876                 CmmAlign 16,
 877                 CmmDataLabel lbl,
 878                 CmmStaticLit (CmmInt 0x80000000 I32),
 879                 CmmStaticLit (CmmInt 0 I32),
 880                 CmmStaticLit (CmmInt 0 I32),
 881                 CmmStaticLit (CmmInt 0 I32)
 882         ],
 883         XOR F32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 884                 -- xorps, so we need the 128-bit constant
 885                 -- ToDo: rip-relative
 886         ]
 887   --
 888   return (Any F32 code)
 889
 890 getRegister (CmmMachOp (MO_S_Neg F64) [x]) = do
 891   x_code <- getAnyReg x
 892   lbl <- getNewLabelNat
 893   let
 894         -- This is how gcc does it, so it can't be that bad:
 895     code dst = x_code dst `appOL` toOL [
 896         LDATA ReadOnlyData16 [
 897                 CmmAlign 16,
 898                 CmmDataLabel lbl,
 899                 CmmStaticLit (CmmInt 0x8000000000000000 I64),
 900                 CmmStaticLit (CmmInt 0 I64)
 901         ],
 902                 -- gcc puts an unpck here.  Wonder if we need it.
 903         XOR F64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 904                 -- xorpd, so we need the 128-bit constant
 905         ]
 906   --
 907   return (Any F64 code)
 908 #endif
 909
 910 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
 911
 912 getRegister (CmmMachOp mop [x]) -- unary MachOps
 913   = case mop of
 914 #if i386_TARGET_ARCH
 915       MO_S_Neg F32 -> trivialUFCode F32 (GNEG F32) x
 916       MO_S_Neg F64 -> trivialUFCode F64 (GNEG F64) x
 917 #endif
 918
 919       MO_S_Neg rep -> trivialUCode rep (NEGI rep) x
 920       MO_Not rep   -> trivialUCode rep (NOT  rep) x
 921
 922       -- Nop conversions
 923       -- TODO: these are only nops if the arg is not a fixed register that
 924       -- can't be byte-addressed.
 925       MO_U_Conv I32 I8  -> conversionNop I32 x
 926       MO_S_Conv I32 I8  -> conversionNop I32 x
 927       MO_U_Conv I16 I8  -> conversionNop I16 x
 928       MO_S_Conv I16 I8  -> conversionNop I16 x
 929       MO_U_Conv I32 I16 -> conversionNop I32 x
 930       MO_S_Conv I32 I16 -> conversionNop I32 x
 931 #if x86_64_TARGET_ARCH
 932       MO_U_Conv I64 I32 -> conversionNop I64 x
 933       MO_S_Conv I64 I32 -> conversionNop I64 x
 934       MO_U_Conv I64 I16 -> conversionNop I64 x
 935       MO_S_Conv I64 I16 -> conversionNop I64 x
 936       MO_U_Conv I64 I8  -> conversionNop I64 x
 937       MO_S_Conv I64 I8  -> conversionNop I64 x
 938 #endif
 939
 940       MO_U_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
 941       MO_S_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
 942
 943       -- widenings
 944       MO_U_Conv I8  I32 -> integerExtend I8  I32 MOVZxL x
 945       MO_U_Conv I16 I32 -> integerExtend I16 I32 MOVZxL x
 946       MO_U_Conv I8  I16 -> integerExtend I8  I16 MOVZxL x
 947
 948       MO_S_Conv I8  I32 -> integerExtend I8  I32 MOVSxL x
 949       MO_S_Conv I16 I32 -> integerExtend I16 I32 MOVSxL x
 950       MO_S_Conv I8  I16 -> integerExtend I8  I16 MOVSxL x
 951
 952 #if x86_64_TARGET_ARCH
 953       MO_U_Conv I8  I64 -> integerExtend I8  I64 MOVZxL x
 954       MO_U_Conv I16 I64 -> integerExtend I16 I64 MOVZxL x
 955       MO_U_Conv I32 I64 -> integerExtend I32 I64 MOVZxL x
 956       MO_S_Conv I8  I64 -> integerExtend I8  I64 MOVSxL x
 957       MO_S_Conv I16 I64 -> integerExtend I16 I64 MOVSxL x
 958       MO_S_Conv I32 I64 -> integerExtend I32 I64 MOVSxL x
 959         -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
 960         -- However, we don't want the register allocator to throw it
 961         -- away as an unnecessary reg-to-reg move, so we keep it in
 962         -- the form of a movzl and print it as a movl later.
 963 #endif
 964
 965 #if i386_TARGET_ARCH
 966       MO_S_Conv F32 F64 -> conversionNop F64 x
 967       MO_S_Conv F64 F32 -> conversionNop F32 x
 968 #else
 969       MO_S_Conv F32 F64 -> coerceFP2FP F64 x
 970       MO_S_Conv F64 F32 -> coerceFP2FP F32 x
 971 #endif
 972
 973       MO_S_Conv from to
 974         | isFloatingRep from -> coerceFP2Int from to x
 975         | isFloatingRep to   -> coerceInt2FP from to x
 976
 977       other -> pprPanic "getRegister" (pprMachOp mop)
 978    where
 979         -- signed or unsigned extension.
 980         integerExtend from to instr expr = do
 981             (reg,e_code) <- if from == I8 then getByteReg expr
 982                                           else getSomeReg expr
 983             let
 984                 code dst =
 985                   e_code `snocOL`
 986                   instr from (OpReg reg) (OpReg dst)
 987             return (Any to code)
 988
 989         conversionNop new_rep expr
 990             = do e_code <- getRegister expr
 991                  return (swizzleRegisterRep e_code new_rep)
 992
 993
 994 getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
 995   = ASSERT2(cmmExprRep x /= I8, pprExpr e)
 996     case mop of
 997       MO_Eq F32   -> condFltReg EQQ x y
 998       MO_Ne F32   -> condFltReg NE x y
 999       MO_S_Gt F32 -> condFltReg GTT x y
1000       MO_S_Ge F32 -> condFltReg GE x y
1001       MO_S_Lt F32 -> condFltReg LTT x y
1002       MO_S_Le F32 -> condFltReg LE x y
1003
1004       MO_Eq F64   -> condFltReg EQQ x y
1005       MO_Ne F64   -> condFltReg NE x y
1006       MO_S_Gt F64 -> condFltReg GTT x y
1007       MO_S_Ge F64 -> condFltReg GE x y
1008       MO_S_Lt F64 -> condFltReg LTT x y
1009       MO_S_Le F64 -> condFltReg LE x y
1010
1011       MO_Eq rep   -> condIntReg EQQ x y
1012       MO_Ne rep   -> condIntReg NE x y
1013
1014       MO_S_Gt rep -> condIntReg GTT x y
1015       MO_S_Ge rep -> condIntReg GE x y
1016       MO_S_Lt rep -> condIntReg LTT x y
1017       MO_S_Le rep -> condIntReg LE x y
1018
1019       MO_U_Gt rep -> condIntReg GU  x y
1020       MO_U_Ge rep -> condIntReg GEU x y
1021       MO_U_Lt rep -> condIntReg LU  x y
1022       MO_U_Le rep -> condIntReg LEU x y
1023
1024 #if i386_TARGET_ARCH
1025       MO_Add F32 -> trivialFCode F32 GADD x y
1026       MO_Sub F32 -> trivialFCode F32 GSUB x y
1027
1028       MO_Add F64 -> trivialFCode F64 GADD x y
1029       MO_Sub F64 -> trivialFCode F64 GSUB x y
1030
1031       MO_S_Quot F32 -> trivialFCode F32 GDIV x y
1032       MO_S_Quot F64 -> trivialFCode F64 GDIV x y
1033 #endif
1034
1035 #if x86_64_TARGET_ARCH
1036       MO_Add F32 -> trivialFCode F32 ADD x y
1037       MO_Sub F32 -> trivialFCode F32 SUB x y
1038
1039       MO_Add F64 -> trivialFCode F64 ADD x y
1040       MO_Sub F64 -> trivialFCode F64 SUB x y
1041
1042       MO_S_Quot F32 -> trivialFCode F32 FDIV x y
1043       MO_S_Quot F64 -> trivialFCode F64 FDIV x y
1044 #endif
1045
1046       MO_Add rep -> add_code rep x y
1047       MO_Sub rep -> sub_code rep x y
1048
1049       MO_S_Quot rep -> div_code rep True  True  x y
1050       MO_S_Rem  rep -> div_code rep True  False x y
1051       MO_U_Quot rep -> div_code rep False True  x y
1052       MO_U_Rem  rep -> div_code rep False False x y
1053
1054 #if i386_TARGET_ARCH
1055       MO_Mul   F32 -> trivialFCode F32 GMUL x y
1056       MO_Mul   F64 -> trivialFCode F64 GMUL x y
1057 #endif
1058
1059 #if x86_64_TARGET_ARCH
1060       MO_Mul   F32 -> trivialFCode F32 MUL x y
1061       MO_Mul   F64 -> trivialFCode F64 MUL x y
1062 #endif
1063
1064       MO_Mul   rep -> let op = IMUL rep in
1065                       trivialCode rep op (Just op) x y
1066
1067       MO_S_MulMayOflo rep -> imulMayOflo rep x y
1068
1069       MO_And rep -> let op = AND rep in
1070                     trivialCode rep op (Just op) x y
1071       MO_Or  rep -> let op = OR  rep in
1072                     trivialCode rep op (Just op) x y
1073       MO_Xor rep -> let op = XOR rep in
1074                     trivialCode rep op (Just op) x y
1075
1076         {- Shift ops on x86s have constraints on their source, it
1077            either has to be Imm, CL or 1
1078             => trivialCode is not restrictive enough (sigh.)
1079         -}
1080       MO_Shl rep   -> shift_code rep (SHL rep) x y {-False-}
1081       MO_U_Shr rep -> shift_code rep (SHR rep) x y {-False-}
1082       MO_S_Shr rep -> shift_code rep (SAR rep) x y {-False-}
1083
1084       other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
1085   where
1086     --------------------
1087     imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1088     imulMayOflo rep a b = do
1089          (a_reg, a_code) <- getNonClobberedReg a
1090          b_code <- getAnyReg b
1091          let
1092              shift_amt  = case rep of
1093                            I32 -> 31
1094                            I64 -> 63
1095                            _ -> panic "shift_amt"
1096
1097              code = a_code `appOL` b_code eax `appOL`
1098                         toOL [
1099                            IMUL2 rep (OpReg a_reg),   -- result in %edx:%eax
1100                            SAR rep (OpImm (ImmInt shift_amt)) (OpReg eax),
1101                                 -- sign extend lower part
1102                            SUB rep (OpReg edx) (OpReg eax)
1103                                 -- compare against upper
1104                            -- eax==0 if high part == sign extended low part
1105                         ]
1106          -- in
1107          return (Fixed rep eax code)
1108
1109     --------------------
1110     shift_code :: MachRep
1111                -> (Operand -> Operand -> Instr)
1112                -> CmmExpr
1113                -> CmmExpr
1114                -> NatM Register
1115
1116     {- Case1: shift length as immediate -}
1117     shift_code rep instr x y@(CmmLit lit) = do
1118           x_code <- getAnyReg x
1119           let
1120                code dst
1121                   = x_code dst `snocOL`
1122                     instr (OpImm (litToImm lit)) (OpReg dst)
1123           -- in
1124           return (Any rep code)
1125
1126     {- Case2: shift length is complex (non-immediate) -}
1127     shift_code rep instr x y{-amount-} = do
1128         (x_reg, x_code) <- getNonClobberedReg x
1129         y_code <- getAnyReg y
1130         let
1131            code = x_code `appOL`
1132                   y_code ecx `snocOL`
1133                   instr (OpReg ecx) (OpReg x_reg)
1134         -- in
1135         return (Fixed rep x_reg code)
1136
1137     --------------------
1138     add_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1139     add_code rep x (CmmLit (CmmInt y _)) = add_int rep x y
1140     add_code rep x y = trivialCode rep (ADD rep) (Just (ADD rep)) x y
1141
1142     --------------------
1143     sub_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1144     sub_code rep x (CmmLit (CmmInt y _)) = add_int rep x (-y)
1145     sub_code rep x y = trivialCode rep (SUB rep) Nothing x y
1146
1147     -- our three-operand add instruction:
1148     add_int rep x y = do
1149         (x_reg, x_code) <- getSomeReg x
1150         let
1151             imm = ImmInt (fromInteger y)
1152             code dst
1153                = x_code `snocOL`
1154                  LEA rep
1155                         (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
1156                         (OpReg dst)
1157         --
1158         return (Any rep code)
1159
1160     ----------------------
1161     div_code rep signed quotient x y = do
1162            (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
1163            x_code <- getAnyReg x
1164            let
1165              widen | signed    = CLTD rep
1166                    | otherwise = XOR rep (OpReg edx) (OpReg edx)
1167
1168              instr | signed    = IDIV
1169                    | otherwise = DIV
1170
1171              code = y_code `appOL`
1172                     x_code eax `appOL`
1173                     toOL [widen, instr rep y_op]
1174
1175              result | quotient  = eax
1176                     | otherwise = edx
1177
1178            -- in
1179            return (Fixed rep result code)
1180
1181
1182 getRegister (CmmLoad mem pk)
1183   | isFloatingRep pk
1184   = do
1185     Amode src mem_code <- getAmode mem
1186     let
1187         code dst = mem_code `snocOL`
1188                    IF_ARCH_i386(GLD pk src dst,
1189                                 MOV pk (OpAddr src) (OpReg dst))
1190     --
1191     return (Any pk code)
1192
1193 #if i386_TARGET_ARCH
1194 getRegister (CmmLoad mem pk)
1195   | pk /= I64
1196   = do
1197     code <- intLoadCode (instr pk) mem
1198     return (Any pk code)
1199   where
1200         instr I8  = MOVZxL pk
1201         instr I16 = MOV I16
1202         instr I32 = MOV I32
1203         -- we always zero-extend 8-bit loads, if we
1204         -- can't think of anything better.  This is because
1205         -- we can't guarantee access to an 8-bit variant of every register
1206         -- (esi and edi don't have 8-bit variants), so to make things
1207         -- simpler we do our 8-bit arithmetic with full 32-bit registers.
1208 #endif
1209
1210 #if x86_64_TARGET_ARCH
1211 -- Simpler memory load code on x86_64
1212 getRegister (CmmLoad mem pk)
1213   = do
1214     code <- intLoadCode (MOV pk) mem
1215     return (Any pk code)
1216 #endif
1217
1218 getRegister (CmmLit (CmmInt 0 rep))
1219   = let
1220         -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
1221         adj_rep = case rep of I64 -> I32; _ -> rep
1222         rep1 = IF_ARCH_i386( rep, adj_rep )
1223         code dst
1224            = unitOL (XOR rep1 (OpReg dst) (OpReg dst))
1225     in
1226         return (Any rep code)
1227
1228 #if x86_64_TARGET_ARCH
1229   -- optimisation for loading small literals on x86_64: take advantage
1230   -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
1231   -- instruction forms are shorter.
1232 getRegister (CmmLit lit)
1233   | I64 <- cmmLitRep lit, not (isBigLit lit)
1234   = let
1235         imm = litToImm lit
1236         code dst = unitOL (MOV I32 (OpImm imm) (OpReg dst))
1237     in
1238         return (Any I64 code)
1239   where
1240    isBigLit (CmmInt i I64) = i < 0 || i > 0xffffffff
1241    isBigLit _ = False
1242         -- note1: not the same as is64BitLit, because that checks for
1243         -- signed literals that fit in 32 bits, but we want unsigned
1244         -- literals here.
1245         -- note2: all labels are small, because we're assuming the
1246         -- small memory model (see gcc docs, -mcmodel=small).
1247 #endif
1248
1249 getRegister (CmmLit lit)
1250   = let
1251         rep = cmmLitRep lit
1252         imm = litToImm lit
1253         code dst = unitOL (MOV rep (OpImm imm) (OpReg dst))
1254     in
1255         return (Any rep code)
1256
1257 getRegister other = pprPanic "getRegister(x86)" (ppr other)
1258
1259
1260 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
1261    -> NatM (Reg -> InstrBlock)
1262 intLoadCode instr mem = do
1263   Amode src mem_code <- getAmode mem
1264   return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
1265
1266 -- Compute an expression into *any* register, adding the appropriate
1267 -- move instruction if necessary.
1268 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
1269 getAnyReg expr = do
1270   r <- getRegister expr
1271   anyReg r
1272
1273 anyReg :: Register -> NatM (Reg -> InstrBlock)
1274 anyReg (Any _ code)          = return code
1275 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
1276
1277 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
1278 -- Fixed registers might not be byte-addressable, so we make sure we've
1279 -- got a temporary, inserting an extra reg copy if necessary.
1280 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
1281 #if x86_64_TARGET_ARCH
1282 getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
1283 #else
1284 getByteReg expr = do
1285   r <- getRegister expr
1286   case r of
1287     Any rep code -> do
1288         tmp <- getNewRegNat rep
1289         return (tmp, code tmp)
1290     Fixed rep reg code
1291         | isVirtualReg reg -> return (reg,code)
1292         | otherwise -> do
1293             tmp <- getNewRegNat rep
1294             return (tmp, code `snocOL` reg2reg rep reg tmp)
1295         -- ToDo: could optimise slightly by checking for byte-addressable
1296         -- real registers, but that will happen very rarely if at all.
1297 #endif
1298
1299 -- Another variant: this time we want the result in a register that cannot
1300 -- be modified by code to evaluate an arbitrary expression.
1301 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1302 getNonClobberedReg expr = do
1303   r <- getRegister expr
1304   case r of
1305     Any rep code -> do
1306         tmp <- getNewRegNat rep
1307         return (tmp, code tmp)
1308     Fixed rep reg code
1309         -- only free regs can be clobbered
1310         | RealReg rr <- reg, isFastTrue (freeReg rr) -> do
1311                 tmp <- getNewRegNat rep
1312                 return (tmp, code `snocOL` reg2reg rep reg tmp)
1313         | otherwise ->
1314                 return (reg, code)
1315
1316 reg2reg :: MachRep -> Reg -> Reg -> Instr
1317 reg2reg rep src dst
1318 #if i386_TARGET_ARCH
1319   | isFloatingRep rep = GMOV src dst
1320 #endif
1321   | otherwise         = MOV rep (OpReg src) (OpReg dst)
1322
1323 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1324
1325 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1326
1327 #if sparc_TARGET_ARCH
1328
1329 getRegister (StFloat d)
1330   = getBlockIdNat                   `thenNat` \ lbl ->
1331     getNewRegNat PtrRep             `thenNat` \ tmp ->
1332     let code dst = toOL [
1333             SEGMENT DataSegment,
1334             NEWBLOCK lbl,
1335             DATA F [ImmFloat d],
1336             SEGMENT TextSegment,
1337             SETHI (HI (ImmCLbl lbl)) tmp,
1338             LD F (AddrRegImm tmp (LO (ImmCLbl lbl))) dst]
1339     in
1340         return (Any F32 code)
1341
1342 getRegister (StDouble d)
1343   = getBlockIdNat                   `thenNat` \ lbl ->
1344     getNewRegNat PtrRep             `thenNat` \ tmp ->
1345     let code dst = toOL [
1346             SEGMENT DataSegment,
1347             NEWBLOCK lbl,
1348             DATA DF [ImmDouble d],
1349             SEGMENT TextSegment,
1350             SETHI (HI (ImmCLbl lbl)) tmp,
1351             LD DF (AddrRegImm tmp (LO (ImmCLbl lbl))) dst]
1352     in
1353         return (Any F64 code)
1354
1355
1356 getRegister (CmmMachOp mop [x]) -- unary PrimOps
1357   = case mop of
1358       MO_NatS_Neg      -> trivialUCode (SUB False False g0) x
1359       MO_Nat_Not       -> trivialUCode (XNOR False g0) x
1360       MO_32U_to_8U     -> trivialCode (AND False) x (StInt 255)
1361
1362       MO_F32_Neg       -> trivialUFCode F32 (FNEG F) x
1363       MO_F64_Neg       -> trivialUFCode F64 (FNEG DF) x
1364
1365       MO_F64_to_Flt    -> coerceDbl2Flt x
1366       MO_F32_to_Dbl    -> coerceFlt2Dbl x
1367
1368       MO_F32_to_NatS   -> coerceFP2Int F32 x
1369       MO_NatS_to_Flt   -> coerceInt2FP F32 x
1370       MO_F64_to_NatS   -> coerceFP2Int F64 x
1371       MO_NatS_to_Dbl   -> coerceInt2FP F64 x
1372
1373       -- Conversions which are a nop on sparc
1374       MO_32U_to_NatS   -> conversionNop IntRep   x
1375       MO_32S_to_NatS  -> conversionNop IntRep   x
1376       MO_NatS_to_32U   -> conversionNop WordRep  x
1377       MO_32U_to_NatU   -> conversionNop WordRep  x
1378
1379       MO_NatU_to_NatS -> conversionNop IntRep    x
1380       MO_NatS_to_NatU -> conversionNop WordRep   x
1381       MO_NatP_to_NatU -> conversionNop WordRep   x
1382       MO_NatU_to_NatP -> conversionNop PtrRep    x
1383       MO_NatS_to_NatP -> conversionNop PtrRep    x
1384       MO_NatP_to_NatS -> conversionNop IntRep    x
1385
1386       -- sign-extending widenings
1387       MO_8U_to_32U    -> integerExtend False 24 x
1388       MO_8U_to_NatU   -> integerExtend False 24 x
1389       MO_8S_to_NatS   -> integerExtend True  24 x
1390       MO_16U_to_NatU  -> integerExtend False 16 x
1391       MO_16S_to_NatS  -> integerExtend True  16 x
1392
1393       other_op ->
1394         let fixed_x = if   is_float_op  -- promote to double
1395                       then CmmMachOp MO_F32_to_Dbl [x]
1396                       else x
1397         in
1398         getRegister (StCall (Left fn) CCallConv F64 [fixed_x])
1399     where
1400         integerExtend signed nBits x
1401            = getRegister (
1402                 CmmMachOp (if signed then MO_Nat_Sar else MO_Nat_Shr)
1403                          [CmmMachOp MO_Nat_Shl [x, StInt nBits], StInt nBits]
1404              )
1405         conversionNop new_rep expr
1406             = getRegister expr          `thenNat` \ e_code ->
1407               return (swizzleRegisterRep e_code new_rep)
1408
1409         (is_float_op, fn)
1410           = case mop of
1411               MO_F32_Exp    -> (True,  FSLIT("exp"))
1412               MO_F32_Log    -> (True,  FSLIT("log"))
1413               MO_F32_Sqrt   -> (True,  FSLIT("sqrt"))
1414
1415               MO_F32_Sin    -> (True,  FSLIT("sin"))
1416               MO_F32_Cos    -> (True,  FSLIT("cos"))
1417               MO_F32_Tan    -> (True,  FSLIT("tan"))
1418
1419               MO_F32_Asin   -> (True,  FSLIT("asin"))
1420               MO_F32_Acos   -> (True,  FSLIT("acos"))
1421               MO_F32_Atan   -> (True,  FSLIT("atan"))
1422
1423               MO_F32_Sinh   -> (True,  FSLIT("sinh"))
1424               MO_F32_Cosh   -> (True,  FSLIT("cosh"))
1425               MO_F32_Tanh   -> (True,  FSLIT("tanh"))
1426
1427               MO_F64_Exp    -> (False, FSLIT("exp"))
1428               MO_F64_Log    -> (False, FSLIT("log"))
1429               MO_F64_Sqrt   -> (False, FSLIT("sqrt"))
1430
1431               MO_F64_Sin    -> (False, FSLIT("sin"))
1432               MO_F64_Cos    -> (False, FSLIT("cos"))
1433               MO_F64_Tan    -> (False, FSLIT("tan"))
1434
1435               MO_F64_Asin   -> (False, FSLIT("asin"))
1436               MO_F64_Acos   -> (False, FSLIT("acos"))
1437               MO_F64_Atan   -> (False, FSLIT("atan"))
1438
1439               MO_F64_Sinh   -> (False, FSLIT("sinh"))
1440               MO_F64_Cosh   -> (False, FSLIT("cosh"))
1441               MO_F64_Tanh   -> (False, FSLIT("tanh"))
1442
1443               other -> pprPanic "getRegister(sparc) - binary CmmMachOp (2)"
1444                                 (pprMachOp mop)
1445
1446
1447 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1448   = case mop of
1449       MO_32U_Gt  -> condIntReg GTT x y
1450       MO_32U_Ge  -> condIntReg GE x y
1451       MO_32U_Eq  -> condIntReg EQQ x y
1452       MO_32U_Ne  -> condIntReg NE x y
1453       MO_32U_Lt  -> condIntReg LTT x y
1454       MO_32U_Le  -> condIntReg LE x y
1455
1456       MO_Nat_Eq   -> condIntReg EQQ x y
1457       MO_Nat_Ne   -> condIntReg NE x y
1458
1459       MO_NatS_Gt  -> condIntReg GTT x y
1460       MO_NatS_Ge  -> condIntReg GE x y
1461       MO_NatS_Lt  -> condIntReg LTT x y
1462       MO_NatS_Le  -> condIntReg LE x y
1463
1464       MO_NatU_Gt  -> condIntReg GU  x y
1465       MO_NatU_Ge  -> condIntReg GEU x y
1466       MO_NatU_Lt  -> condIntReg LU  x y
1467       MO_NatU_Le  -> condIntReg LEU x y
1468
1469       MO_F32_Gt -> condFltReg GTT x y
1470       MO_F32_Ge -> condFltReg GE x y
1471       MO_F32_Eq -> condFltReg EQQ x y
1472       MO_F32_Ne -> condFltReg NE x y
1473       MO_F32_Lt -> condFltReg LTT x y
1474       MO_F32_Le -> condFltReg LE x y
1475
1476       MO_F64_Gt -> condFltReg GTT x y
1477       MO_F64_Ge -> condFltReg GE x y
1478       MO_F64_Eq -> condFltReg EQQ x y
1479       MO_F64_Ne -> condFltReg NE x y
1480       MO_F64_Lt -> condFltReg LTT x y
1481       MO_F64_Le -> condFltReg LE x y
1482
1483       MO_Nat_Add -> trivialCode (ADD False False) x y
1484       MO_Nat_Sub -> trivialCode (SUB False False) x y
1485
1486       MO_NatS_Mul  -> trivialCode (SMUL False) x y
1487       MO_NatU_Mul  -> trivialCode (UMUL False) x y
1488       MO_NatS_MulMayOflo -> imulMayOflo x y
1489
1490       -- ToDo: teach about V8+ SPARC div instructions
1491       MO_NatS_Quot -> idiv FSLIT(".div")  x y
1492       MO_NatS_Rem  -> idiv FSLIT(".rem")  x y
1493       MO_NatU_Quot -> idiv FSLIT(".udiv")  x y
1494       MO_NatU_Rem  -> idiv FSLIT(".urem")  x y
1495
1496       MO_F32_Add   -> trivialFCode F32  FADD x y
1497       MO_F32_Sub   -> trivialFCode F32  FSUB x y
1498       MO_F32_Mul   -> trivialFCode F32  FMUL x y
1499       MO_F32_Div   -> trivialFCode F32  FDIV x y
1500
1501       MO_F64_Add   -> trivialFCode F64 FADD x y
1502       MO_F64_Sub   -> trivialFCode F64 FSUB x y
1503       MO_F64_Mul   -> trivialFCode F64 FMUL x y
1504       MO_F64_Div   -> trivialFCode F64 FDIV x y
1505
1506       MO_Nat_And   -> trivialCode (AND False) x y
1507       MO_Nat_Or    -> trivialCode (OR  False) x y
1508       MO_Nat_Xor   -> trivialCode (XOR False) x y
1509
1510       MO_Nat_Shl   -> trivialCode SLL x y
1511       MO_Nat_Shr   -> trivialCode SRL x y
1512       MO_Nat_Sar   -> trivialCode SRA x y
1513
1514       MO_F32_Pwr  -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1515                                          [promote x, promote y])
1516                        where promote x = CmmMachOp MO_F32_to_Dbl [x]
1517       MO_F64_Pwr -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1518                                         [x, y])
1519
1520       other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
1521   where
1522     idiv fn x y = getRegister (StCall (Left fn) CCallConv IntRep [x, y])
1523
1524     --------------------
1525     imulMayOflo :: CmmExpr -> CmmExpr -> NatM Register
1526     imulMayOflo a1 a2
1527        = getNewRegNat IntRep            `thenNat` \ t1 ->
1528          getNewRegNat IntRep            `thenNat` \ t2 ->
1529          getNewRegNat IntRep            `thenNat` \ res_lo ->
1530          getNewRegNat IntRep            `thenNat` \ res_hi ->
1531          getRegister a1                 `thenNat` \ reg1 ->
1532          getRegister a2                 `thenNat` \ reg2 ->
1533          let code1 = registerCode reg1 t1
1534              code2 = registerCode reg2 t2
1535              src1  = registerName reg1 t1
1536              src2  = registerName reg2 t2
1537              code dst = code1 `appOL` code2 `appOL`
1538                         toOL [
1539                            SMUL False src1 (RIReg src2) res_lo,
1540                            RDY res_hi,
1541                            SRA res_lo (RIImm (ImmInt 31)) res_lo,
1542                            SUB False False res_lo (RIReg res_hi) dst
1543                         ]
1544          in
1545             return (Any IntRep code)
1546
1547 getRegister (CmmLoad pk mem) = do
1548     Amode src code <- getAmode mem
1549     let
1550         size = primRepToSize pk
1551         code__2 dst = code `snocOL` LD size src dst
1552     --
1553     return (Any pk code__2)
1554
1555 getRegister (StInt i)
1556   | fits13Bits i
1557   = let
1558         src = ImmInt (fromInteger i)
1559         code dst = unitOL (OR False g0 (RIImm src) dst)
1560     in
1561         return (Any IntRep code)
1562
1563 getRegister leaf
1564   | isJust imm
1565   = let
1566         code dst = toOL [
1567             SETHI (HI imm__2) dst,
1568             OR False dst (RIImm (LO imm__2)) dst]
1569     in
1570         return (Any PtrRep code)
1571   | otherwise
1572   = ncgPrimopMoan "getRegister(sparc)" (pprCmmExpr leaf)
1573   where
1574     imm = maybeImm leaf
1575     imm__2 = case imm of Just x -> x
1576
1577 #endif /* sparc_TARGET_ARCH */
1578
1579 #if powerpc_TARGET_ARCH
1580 getRegister (CmmLoad mem pk)
1581   | pk /= I64
1582   = do
1583         Amode addr addr_code <- getAmode mem
1584         let code dst = ASSERT((regClass dst == RcDouble) == isFloatingRep pk)
1585                        addr_code `snocOL` LD pk dst addr
1586         return (Any pk code)
1587
1588 -- catch simple cases of zero- or sign-extended load
1589 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad mem _]) = do
1590     Amode addr addr_code <- getAmode mem
1591     return (Any I32 (\dst -> addr_code `snocOL` LD I8 dst addr))
1592
1593 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
1594
1595 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad mem _]) = do
1596     Amode addr addr_code <- getAmode mem
1597     return (Any I32 (\dst -> addr_code `snocOL` LD I16 dst addr))
1598
1599 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad mem _]) = do
1600     Amode addr addr_code <- getAmode mem
1601     return (Any I32 (\dst -> addr_code `snocOL` LA I16 dst addr))
1602
1603 getRegister (CmmMachOp mop [x]) -- unary MachOps
1604   = case mop of
1605       MO_Not rep   -> trivialUCode rep NOT x
1606
1607       MO_S_Conv F64 F32 -> trivialUCode F32 FRSP x
1608       MO_S_Conv F32 F64 -> conversionNop F64 x
1609
1610       MO_S_Conv from to
1611         | from == to         -> conversionNop to x
1612         | isFloatingRep from -> coerceFP2Int from to x
1613         | isFloatingRep to   -> coerceInt2FP from to x
1614
1615         -- narrowing is a nop: we treat the high bits as undefined
1616       MO_S_Conv I32 to -> conversionNop to x
1617       MO_S_Conv I16 I8 -> conversionNop I8 x
1618       MO_S_Conv I8 to -> trivialUCode to (EXTS I8) x
1619       MO_S_Conv I16 to -> trivialUCode to (EXTS I16) x
1620
1621       MO_U_Conv from to
1622         | from == to -> conversionNop to x
1623         -- narrowing is a nop: we treat the high bits as undefined
1624       MO_U_Conv I32 to -> conversionNop to x
1625       MO_U_Conv I16 I8 -> conversionNop I8 x
1626       MO_U_Conv I8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 I32))
1627       MO_U_Conv I16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 I32))
1628
1629       MO_S_Neg F32      -> trivialUCode F32 FNEG x
1630       MO_S_Neg F64      -> trivialUCode F64 FNEG x
1631       MO_S_Neg rep      -> trivialUCode rep NEG x
1632
1633     where
1634         conversionNop new_rep expr
1635             = do e_code <- getRegister expr
1636                  return (swizzleRegisterRep e_code new_rep)
1637
1638 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1639   = case mop of
1640       MO_Eq F32 -> condFltReg EQQ x y
1641       MO_Ne F32 -> condFltReg NE  x y
1642
1643       MO_S_Gt F32 -> condFltReg GTT x y
1644       MO_S_Ge F32 -> condFltReg GE  x y
1645       MO_S_Lt F32 -> condFltReg LTT x y
1646       MO_S_Le F32 -> condFltReg LE  x y
1647
1648       MO_Eq F64 -> condFltReg EQQ x y
1649       MO_Ne F64 -> condFltReg NE  x y
1650
1651       MO_S_Gt F64 -> condFltReg GTT x y
1652       MO_S_Ge F64 -> condFltReg GE  x y
1653       MO_S_Lt F64 -> condFltReg LTT x y
1654       MO_S_Le F64 -> condFltReg LE  x y
1655
1656       MO_Eq rep -> condIntReg EQQ  (extendUExpr rep x) (extendUExpr rep y)
1657       MO_Ne rep -> condIntReg NE   (extendUExpr rep x) (extendUExpr rep y)
1658
1659       MO_S_Gt rep -> condIntReg GTT  (extendSExpr rep x) (extendSExpr rep y)
1660       MO_S_Ge rep -> condIntReg GE   (extendSExpr rep x) (extendSExpr rep y)
1661       MO_S_Lt rep -> condIntReg LTT  (extendSExpr rep x) (extendSExpr rep y)
1662       MO_S_Le rep -> condIntReg LE   (extendSExpr rep x) (extendSExpr rep y)
1663
1664       MO_U_Gt rep -> condIntReg GU   (extendUExpr rep x) (extendUExpr rep y)
1665       MO_U_Ge rep -> condIntReg GEU  (extendUExpr rep x) (extendUExpr rep y)
1666       MO_U_Lt rep -> condIntReg LU   (extendUExpr rep x) (extendUExpr rep y)
1667       MO_U_Le rep -> condIntReg LEU  (extendUExpr rep x) (extendUExpr rep y)
1668
1669       MO_Add F32   -> trivialCodeNoImm F32 (FADD F32) x y
1670       MO_Sub F32   -> trivialCodeNoImm F32 (FSUB F32) x y
1671       MO_Mul F32   -> trivialCodeNoImm F32 (FMUL F32) x y
1672       MO_S_Quot F32   -> trivialCodeNoImm F32 (FDIV F32) x y
1673
1674       MO_Add F64   -> trivialCodeNoImm F64 (FADD F64) x y
1675       MO_Sub F64   -> trivialCodeNoImm F64 (FSUB F64) x y
1676       MO_Mul F64   -> trivialCodeNoImm F64 (FMUL F64) x y
1677       MO_S_Quot F64   -> trivialCodeNoImm F64 (FDIV F64) x y
1678
1679          -- optimize addition with 32-bit immediate
1680          -- (needed for PIC)
1681       MO_Add I32 ->
1682         case y of
1683           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate I32 True (-imm)
1684             -> trivialCode I32 True ADD x (CmmLit $ CmmInt imm immrep)
1685           CmmLit lit
1686             -> do
1687                 (src, srcCode) <- getSomeReg x
1688                 let imm = litToImm lit
1689                     code dst = srcCode `appOL` toOL [
1690                                     ADDIS dst src (HA imm),
1691                                     ADD dst dst (RIImm (LO imm))
1692                                 ]
1693                 return (Any I32 code)
1694           _ -> trivialCode I32 True ADD x y
1695
1696       MO_Add rep -> trivialCode rep True ADD x y
1697       MO_Sub rep ->
1698         case y of    -- subfi ('substract from' with immediate) doesn't exist
1699           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
1700             -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
1701           _ -> trivialCodeNoImm rep SUBF y x
1702
1703       MO_Mul rep -> trivialCode rep True MULLW x y
1704
1705       MO_S_MulMayOflo I32 -> trivialCodeNoImm I32 MULLW_MayOflo x y
1706
1707       MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= I32): not implemented"
1708       MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
1709
1710       MO_S_Quot rep -> trivialCodeNoImm rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1711       MO_U_Quot rep -> trivialCodeNoImm rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1712
1713       MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1714       MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1715
1716       MO_And rep   -> trivialCode rep False AND x y
1717       MO_Or rep    -> trivialCode rep False OR x y
1718       MO_Xor rep   -> trivialCode rep False XOR x y
1719
1720       MO_Shl rep   -> trivialCode rep False SLW x y
1721       MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
1722       MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
1723
1724 getRegister (CmmLit (CmmInt i rep))
1725   | Just imm <- makeImmediate rep True i
1726   = let
1727         code dst = unitOL (LI dst imm)
1728     in
1729         return (Any rep code)
1730
1731 getRegister (CmmLit (CmmFloat f frep)) = do
1732     lbl <- getNewLabelNat
1733     dynRef <- cmmMakeDynamicReference addImportNat False lbl
1734     Amode addr addr_code <- getAmode dynRef
1735     let code dst =
1736             LDATA ReadOnlyData  [CmmDataLabel lbl,
1737                                  CmmStaticLit (CmmFloat f frep)]
1738             `consOL` (addr_code `snocOL` LD frep dst addr)
1739     return (Any frep code)
1740
1741 getRegister (CmmLit lit)
1742   = let rep = cmmLitRep lit
1743         imm = litToImm lit
1744         code dst = toOL [
1745               LIS dst (HI imm),
1746               OR dst dst (RIImm (LO imm))
1747           ]
1748     in return (Any rep code)
1749
1750 getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
1751
1752     -- extend?Rep: wrap integer expression of type rep
1753     -- in a conversion to I32
1754 extendSExpr I32 x = x
1755 extendSExpr rep x = CmmMachOp (MO_S_Conv rep I32) [x]
1756 extendUExpr I32 x = x
1757 extendUExpr rep x = CmmMachOp (MO_U_Conv rep I32) [x]
1758
1759 #endif /* powerpc_TARGET_ARCH */
1760
1761
1762 -- -----------------------------------------------------------------------------
1763 --  The 'Amode' type: Memory addressing modes passed up the tree.
1764
1765 data Amode = Amode AddrMode InstrBlock
1766
1767 {-
1768 Now, given a tree (the argument to an CmmLoad) that references memory,
1769 produce a suitable addressing mode.
1770
1771 A Rule of the Game (tm) for Amodes: use of the addr bit must
1772 immediately follow use of the code part, since the code part puts
1773 values in registers which the addr then refers to.  So you can't put
1774 anything in between, lest it overwrite some of those registers.  If
1775 you need to do some other computation between the code part and use of
1776 the addr bit, first store the effective address from the amode in a
1777 temporary, then do the other computation, and then use the temporary:
1778
1779     code
1780     LEA amode, tmp
1781     ... other computation ...
1782     ... (tmp) ...
1783 -}
1784
1785 getAmode :: CmmExpr -> NatM Amode
1786 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
1787
1788 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1789
1790 #if alpha_TARGET_ARCH
1791
1792 getAmode (StPrim IntSubOp [x, StInt i])
1793   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1794     getRegister x               `thenNat` \ register ->
1795     let
1796         code = registerCode register tmp
1797         reg  = registerName register tmp
1798         off  = ImmInt (-(fromInteger i))
1799     in
1800     return (Amode (AddrRegImm reg off) code)
1801
1802 getAmode (StPrim IntAddOp [x, StInt i])
1803   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1804     getRegister x               `thenNat` \ register ->
1805     let
1806         code = registerCode register tmp
1807         reg  = registerName register tmp
1808         off  = ImmInt (fromInteger i)
1809     in
1810     return (Amode (AddrRegImm reg off) code)
1811
1812 getAmode leaf
1813   | isJust imm
1814   = return (Amode (AddrImm imm__2) id)
1815   where
1816     imm = maybeImm leaf
1817     imm__2 = case imm of Just x -> x
1818
1819 getAmode other
1820   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1821     getRegister other           `thenNat` \ register ->
1822     let
1823         code = registerCode register tmp
1824         reg  = registerName register tmp
1825     in
1826     return (Amode (AddrReg reg) code)
1827
1828 #endif /* alpha_TARGET_ARCH */
1829
1830 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1831
1832 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1833
1834 -- This is all just ridiculous, since it carefully undoes
1835 -- what mangleIndexTree has just done.
1836 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
1837   | not (is64BitLit lit)
1838   -- ASSERT(rep == I32)???
1839   = do (x_reg, x_code) <- getSomeReg x
1840        let off = ImmInt (-(fromInteger i))
1841        return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1842
1843 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
1844   | not (is64BitLit lit)
1845   -- ASSERT(rep == I32)???
1846   = do (x_reg, x_code) <- getSomeReg x
1847        let off = ImmInt (fromInteger i)
1848        return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1849
1850 -- Turn (lit1 << n  + lit2) into  (lit2 + lit1 << n) so it will be
1851 -- recognised by the next rule.
1852 getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1853                                   b@(CmmLit _)])
1854   = getAmode (CmmMachOp (MO_Add rep) [b,a])
1855
1856 getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
1857                                         [y, CmmLit (CmmInt shift _)]])
1858   | shift == 0 || shift == 1 || shift == 2 || shift == 3
1859   = do (x_reg, x_code) <- getNonClobberedReg x
1860         -- x must be in a temp, because it has to stay live over y_code
1861         -- we could compre x_reg and y_reg and do something better here...
1862        (y_reg, y_code) <- getSomeReg y
1863        let
1864            code = x_code `appOL` y_code
1865            base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
1866        return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt 0))
1867                code)
1868
1869 getAmode (CmmLit lit) | not (is64BitLit lit)
1870   = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1871
1872 getAmode expr = do
1873   (reg,code) <- getSomeReg expr
1874   return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
1875
1876 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1877
1878 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1879
1880 #if sparc_TARGET_ARCH
1881
1882 getAmode (CmmMachOp MO_Nat_Sub [x, StInt i])
1883   | fits13Bits (-i)
1884   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1885     getRegister x               `thenNat` \ register ->
1886     let
1887         code = registerCode register tmp
1888         reg  = registerName register tmp
1889         off  = ImmInt (-(fromInteger i))
1890     in
1891     return (Amode (AddrRegImm reg off) code)
1892
1893
1894 getAmode (CmmMachOp MO_Nat_Add [x, StInt i])
1895   | fits13Bits i
1896   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1897     getRegister x               `thenNat` \ register ->
1898     let
1899         code = registerCode register tmp
1900         reg  = registerName register tmp
1901         off  = ImmInt (fromInteger i)
1902     in
1903     return (Amode (AddrRegImm reg off) code)
1904
1905 getAmode (CmmMachOp MO_Nat_Add [x, y])
1906   = getNewRegNat PtrRep         `thenNat` \ tmp1 ->
1907     getNewRegNat IntRep         `thenNat` \ tmp2 ->
1908     getRegister x               `thenNat` \ register1 ->
1909     getRegister y               `thenNat` \ register2 ->
1910     let
1911         code1 = registerCode register1 tmp1
1912         reg1  = registerName register1 tmp1
1913         code2 = registerCode register2 tmp2
1914         reg2  = registerName register2 tmp2
1915         code__2 = code1 `appOL` code2
1916     in
1917     return (Amode (AddrRegReg reg1 reg2) code__2)
1918
1919 getAmode leaf
1920   | isJust imm
1921   = getNewRegNat PtrRep             `thenNat` \ tmp ->
1922     let
1923         code = unitOL (SETHI (HI imm__2) tmp)
1924     in
1925     return (Amode (AddrRegImm tmp (LO imm__2)) code)
1926   where
1927     imm    = maybeImm leaf
1928     imm__2 = case imm of Just x -> x
1929
1930 getAmode other
1931   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1932     getRegister other           `thenNat` \ register ->
1933     let
1934         code = registerCode register tmp
1935         reg  = registerName register tmp
1936         off  = ImmInt 0
1937     in
1938     return (Amode (AddrRegImm reg off) code)
1939
1940 #endif /* sparc_TARGET_ARCH */
1941
1942 #ifdef powerpc_TARGET_ARCH
1943 getAmode (CmmMachOp (MO_Sub I32) [x, CmmLit (CmmInt i _)])
1944   | Just off <- makeImmediate I32 True (-i)
1945   = do
1946         (reg, code) <- getSomeReg x
1947         return (Amode (AddrRegImm reg off) code)
1948
1949
1950 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit (CmmInt i _)])
1951   | Just off <- makeImmediate I32 True i
1952   = do
1953         (reg, code) <- getSomeReg x
1954         return (Amode (AddrRegImm reg off) code)
1955
1956    -- optimize addition with 32-bit immediate
1957    -- (needed for PIC)
1958 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit lit])
1959   = do
1960         tmp <- getNewRegNat I32
1961         (src, srcCode) <- getSomeReg x
1962         let imm = litToImm lit
1963             code = srcCode `snocOL` ADDIS tmp src (HA imm)
1964         return (Amode (AddrRegImm tmp (LO imm)) code)
1965
1966 getAmode (CmmLit lit)
1967   = do
1968         tmp <- getNewRegNat I32
1969         let imm = litToImm lit
1970             code = unitOL (LIS tmp (HA imm))
1971         return (Amode (AddrRegImm tmp (LO imm)) code)
1972
1973 getAmode (CmmMachOp (MO_Add I32) [x, y])
1974   = do
1975         (regX, codeX) <- getSomeReg x
1976         (regY, codeY) <- getSomeReg y
1977         return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
1978
1979 getAmode other
1980   = do
1981         (reg, code) <- getSomeReg other
1982         let
1983             off  = ImmInt 0
1984         return (Amode (AddrRegImm reg off) code)
1985 #endif /* powerpc_TARGET_ARCH */
1986
1987 -- -----------------------------------------------------------------------------
1988 -- getOperand: sometimes any operand will do.
1989
1990 -- getNonClobberedOperand: the value of the operand will remain valid across
1991 -- the computation of an arbitrary expression, unless the expression
1992 -- is computed directly into a register which the operand refers to
1993 -- (see trivialCode where this function is used for an example).
1994
1995 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1996
1997 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1998 #if x86_64_TARGET_ARCH
1999 getNonClobberedOperand (CmmLit lit)
2000   | isSuitableFloatingPointLit lit = do
2001     lbl <- getNewLabelNat
2002     let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
2003                                            CmmStaticLit lit])
2004     return (OpAddr (ripRel (ImmCLbl lbl)), code)
2005 #endif
2006 getNonClobberedOperand (CmmLit lit)
2007   | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) =
2008     return (OpImm (litToImm lit), nilOL)
2009 getNonClobberedOperand (CmmLoad mem pk)
2010   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2011     Amode src mem_code <- getAmode mem
2012     (src',save_code) <-
2013         if (amodeCouldBeClobbered src)
2014                 then do
2015                    tmp <- getNewRegNat wordRep
2016                    return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
2017                            unitOL (LEA I32 (OpAddr src) (OpReg tmp)))
2018                 else
2019                    return (src, nilOL)
2020     return (OpAddr src', save_code `appOL` mem_code)
2021 getNonClobberedOperand e = do
2022     (reg, code) <- getNonClobberedReg e
2023     return (OpReg reg, code)
2024
2025 amodeCouldBeClobbered :: AddrMode -> Bool
2026 amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
2027
2028 regClobbered (RealReg rr) = isFastTrue (freeReg rr)
2029 regClobbered _ = False
2030
2031 -- getOperand: the operand is not required to remain valid across the
2032 -- computation of an arbitrary expression.
2033 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2034 #if x86_64_TARGET_ARCH
2035 getOperand (CmmLit lit)
2036   | isSuitableFloatingPointLit lit = do
2037     lbl <- getNewLabelNat
2038     let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
2039                                            CmmStaticLit lit])
2040     return (OpAddr (ripRel (ImmCLbl lbl)), code)
2041 #endif
2042 getOperand (CmmLit lit)
2043   | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) = do
2044     return (OpImm (litToImm lit), nilOL)
2045 getOperand (CmmLoad mem pk)
2046   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2047     Amode src mem_code <- getAmode mem
2048     return (OpAddr src, mem_code)
2049 getOperand e = do
2050     (reg, code) <- getSomeReg e
2051     return (OpReg reg, code)
2052
2053 isOperand :: CmmExpr -> Bool
2054 isOperand (CmmLoad _ _) = True
2055 isOperand (CmmLit lit)  = not (is64BitLit lit)
2056                           || isSuitableFloatingPointLit lit
2057 isOperand _             = False
2058
2059 -- if we want a floating-point literal as an operand, we can
2060 -- use it directly from memory.  However, if the literal is
2061 -- zero, we're better off generating it into a register using
2062 -- xor.
2063 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
2064 isSuitableFloatingPointLit _ = False
2065
2066 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
2067 getRegOrMem (CmmLoad mem pk)
2068   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2069     Amode src mem_code <- getAmode mem
2070     return (OpAddr src, mem_code)
2071 getRegOrMem e = do
2072     (reg, code) <- getNonClobberedReg e
2073     return (OpReg reg, code)
2074
2075 #if x86_64_TARGET_ARCH
2076 is64BitLit (CmmInt i I64) = i > 0x7fffffff || i < -0x80000000
2077    -- assume that labels are in the range 0-2^31-1: this assumes the
2078    -- small memory model (see gcc docs, -mcmodel=small).
2079 #endif
2080 is64BitLit x = False
2081 #endif
2082
2083 -- -----------------------------------------------------------------------------
2084 --  The 'CondCode' type:  Condition codes passed up the tree.
2085
2086 data CondCode = CondCode Bool Cond InstrBlock
2087
2088 -- Set up a condition code for a conditional branch.
2089
2090 getCondCode :: CmmExpr -> NatM CondCode
2091
2092 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2093
2094 #if alpha_TARGET_ARCH
2095 getCondCode = panic "MachCode.getCondCode: not on Alphas"
2096 #endif /* alpha_TARGET_ARCH */
2097
2098 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2099
2100 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
2101 -- yes, they really do seem to want exactly the same!
2102
2103 getCondCode (CmmMachOp mop [x, y])
2104   = ASSERT (cmmExprRep x /= I8) -- tmp, not set up to handle 8-bit comparisons
2105     case mop of
2106       MO_Eq F32 -> condFltCode EQQ x y
2107       MO_Ne F32 -> condFltCode NE  x y
2108
2109       MO_S_Gt F32 -> condFltCode GTT x y
2110       MO_S_Ge F32 -> condFltCode GE  x y
2111       MO_S_Lt F32 -> condFltCode LTT x y
2112       MO_S_Le F32 -> condFltCode LE  x y
2113
2114       MO_Eq F64 -> condFltCode EQQ x y
2115       MO_Ne F64 -> condFltCode NE  x y
2116
2117       MO_S_Gt F64 -> condFltCode GTT x y
2118       MO_S_Ge F64 -> condFltCode GE  x y
2119       MO_S_Lt F64 -> condFltCode LTT x y
2120       MO_S_Le F64 -> condFltCode LE  x y
2121
2122       MO_Eq rep -> condIntCode EQQ  x y
2123       MO_Ne rep -> condIntCode NE   x y
2124
2125       MO_S_Gt rep -> condIntCode GTT  x y
2126       MO_S_Ge rep -> condIntCode GE   x y
2127       MO_S_Lt rep -> condIntCode LTT  x y
2128       MO_S_Le rep -> condIntCode LE   x y
2129
2130       MO_U_Gt rep -> condIntCode GU   x y
2131       MO_U_Ge rep -> condIntCode GEU  x y
2132       MO_U_Lt rep -> condIntCode LU   x y
2133       MO_U_Le rep -> condIntCode LEU  x y
2134
2135       other -> pprPanic "getCondCode(x86,sparc)" (pprMachOp mop)
2136
2137 getCondCode other =  pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
2138
2139 #elif powerpc_TARGET_ARCH
2140
2141 -- almost the same as everywhere else - but we need to
2142 -- extend small integers to 32 bit first
2143
2144 getCondCode (CmmMachOp mop [x, y])
2145   = case mop of
2146       MO_Eq F32 -> condFltCode EQQ x y
2147       MO_Ne F32 -> condFltCode NE  x y
2148
2149       MO_S_Gt F32 -> condFltCode GTT x y
2150       MO_S_Ge F32 -> condFltCode GE  x y
2151       MO_S_Lt F32 -> condFltCode LTT x y
2152       MO_S_Le F32 -> condFltCode LE  x y
2153
2154       MO_Eq F64 -> condFltCode EQQ x y
2155       MO_Ne F64 -> condFltCode NE  x y
2156
2157       MO_S_Gt F64 -> condFltCode GTT x y
2158       MO_S_Ge F64 -> condFltCode GE  x y
2159       MO_S_Lt F64 -> condFltCode LTT x y
2160       MO_S_Le F64 -> condFltCode LE  x y
2161
2162       MO_Eq rep -> condIntCode EQQ  (extendUExpr rep x) (extendUExpr rep y)
2163       MO_Ne rep -> condIntCode NE   (extendUExpr rep x) (extendUExpr rep y)
2164
2165       MO_S_Gt rep -> condIntCode GTT  (extendSExpr rep x) (extendSExpr rep y)
2166       MO_S_Ge rep -> condIntCode GE   (extendSExpr rep x) (extendSExpr rep y)
2167       MO_S_Lt rep -> condIntCode LTT  (extendSExpr rep x) (extendSExpr rep y)
2168       MO_S_Le rep -> condIntCode LE   (extendSExpr rep x) (extendSExpr rep y)
2169
2170       MO_U_Gt rep -> condIntCode GU   (extendUExpr rep x) (extendUExpr rep y)
2171       MO_U_Ge rep -> condIntCode GEU  (extendUExpr rep x) (extendUExpr rep y)
2172       MO_U_Lt rep -> condIntCode LU   (extendUExpr rep x) (extendUExpr rep y)
2173       MO_U_Le rep -> condIntCode LEU  (extendUExpr rep x) (extendUExpr rep y)
2174
2175       other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
2176
2177 getCondCode other =  panic "getCondCode(2)(powerpc)"
2178
2179
2180 #endif
2181
2182
2183 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
2184 -- passed back up the tree.
2185
2186 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
2187
2188 #if alpha_TARGET_ARCH
2189 condIntCode = panic "MachCode.condIntCode: not on Alphas"
2190 condFltCode = panic "MachCode.condFltCode: not on Alphas"
2191 #endif /* alpha_TARGET_ARCH */
2192
2193 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2194 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2195
2196 -- memory vs immediate
2197 condIntCode cond (CmmLoad x pk) (CmmLit lit) | not (is64BitLit lit) = do
2198     Amode x_addr x_code <- getAmode x
2199     let
2200         imm  = litToImm lit
2201         code = x_code `snocOL`
2202                   CMP pk (OpImm imm) (OpAddr x_addr)
2203     --
2204     return (CondCode False cond code)
2205
2206 -- anything vs zero
2207 condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
2208     (x_reg, x_code) <- getSomeReg x
2209     let
2210         code = x_code `snocOL`
2211                   TEST pk (OpReg x_reg) (OpReg x_reg)
2212     --
2213     return (CondCode False cond code)
2214
2215 -- anything vs operand
2216 condIntCode cond x y | isOperand y = do
2217     (x_reg, x_code) <- getNonClobberedReg x
2218     (y_op,  y_code) <- getOperand y
2219     let
2220         code = x_code `appOL` y_code `snocOL`
2221                   CMP (cmmExprRep x) y_op (OpReg x_reg)
2222     -- in
2223     return (CondCode False cond code)
2224
2225 -- anything vs anything
2226 condIntCode cond x y = do
2227   (y_reg, y_code) <- getNonClobberedReg y
2228   (x_op, x_code) <- getRegOrMem x
2229   let
2230         code = y_code `appOL`
2231                x_code `snocOL`
2232                   CMP (cmmExprRep x) (OpReg y_reg) x_op
2233   -- in
2234   return (CondCode False cond code)
2235 #endif
2236
2237 #if i386_TARGET_ARCH
2238 condFltCode cond x y
2239   = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
2240   (x_reg, x_code) <- getNonClobberedReg x
2241   (y_reg, y_code) <- getSomeReg y
2242   let
2243         code = x_code `appOL` y_code `snocOL`
2244                 GCMP cond x_reg y_reg
2245   -- The GCMP insn does the test and sets the zero flag if comparable
2246   -- and true.  Hence we always supply EQQ as the condition to test.
2247   return (CondCode True EQQ code)
2248 #endif /* i386_TARGET_ARCH */
2249
2250 #if x86_64_TARGET_ARCH
2251 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
2252 -- an operand, but the right must be a reg.  We can probably do better
2253 -- than this general case...
2254 condFltCode cond x y = do
2255   (x_reg, x_code) <- getNonClobberedReg x
2256   (y_op, y_code) <- getOperand y
2257   let
2258         code = x_code `appOL`
2259                y_code `snocOL`
2260                   CMP (cmmExprRep x) y_op (OpReg x_reg)
2261         -- NB(1): we need to use the unsigned comparison operators on the
2262         -- result of this comparison.
2263   -- in
2264   return (CondCode True (condToUnsigned cond) code)
2265 #endif
2266
2267 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2268
2269 #if sparc_TARGET_ARCH
2270
2271 condIntCode cond x (StInt y)
2272   | fits13Bits y
2273   = getRegister x               `thenNat` \ register ->
2274     getNewRegNat IntRep         `thenNat` \ tmp ->
2275     let
2276         code = registerCode register tmp
2277         src1 = registerName register tmp
2278         src2 = ImmInt (fromInteger y)
2279         code__2 = code `snocOL` SUB False True src1 (RIImm src2) g0
2280     in
2281     return (CondCode False cond code__2)
2282
2283 condIntCode cond x y
2284   = getRegister x               `thenNat` \ register1 ->
2285     getRegister y               `thenNat` \ register2 ->
2286     getNewRegNat IntRep         `thenNat` \ tmp1 ->
2287     getNewRegNat IntRep         `thenNat` \ tmp2 ->
2288     let
2289         code1 = registerCode register1 tmp1
2290         src1  = registerName register1 tmp1
2291         code2 = registerCode register2 tmp2
2292         src2  = registerName register2 tmp2
2293         code__2 = code1 `appOL` code2 `snocOL`
2294                   SUB False True src1 (RIReg src2) g0
2295     in
2296     return (CondCode False cond code__2)
2297
2298 -----------
2299 condFltCode cond x y
2300   = getRegister x               `thenNat` \ register1 ->
2301     getRegister y               `thenNat` \ register2 ->
2302     getNewRegNat (registerRep register1)
2303                                 `thenNat` \ tmp1 ->
2304     getNewRegNat (registerRep register2)
2305                                 `thenNat` \ tmp2 ->
2306     getNewRegNat F64    `thenNat` \ tmp ->
2307     let
2308         promote x = FxTOy F DF x tmp
2309
2310         pk1   = registerRep register1
2311         code1 = registerCode register1 tmp1
2312         src1  = registerName register1 tmp1
2313
2314         pk2   = registerRep register2
2315         code2 = registerCode register2 tmp2
2316         src2  = registerName register2 tmp2
2317
2318         code__2 =
2319                 if pk1 == pk2 then
2320                     code1 `appOL` code2 `snocOL`
2321                     FCMP True (primRepToSize pk1) src1 src2
2322                 else if pk1 == F32 then
2323                     code1 `snocOL` promote src1 `appOL` code2 `snocOL`
2324                     FCMP True DF tmp src2
2325                 else
2326                     code1 `appOL` code2 `snocOL` promote src2 `snocOL`
2327                     FCMP True DF src1 tmp
2328     in
2329     return (CondCode True cond code__2)
2330
2331 #endif /* sparc_TARGET_ARCH */
2332
2333 #if powerpc_TARGET_ARCH
2334 --  ###FIXME: I16 and I8!
2335 condIntCode cond x (CmmLit (CmmInt y rep))
2336   | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
2337   = do
2338         (src1, code) <- getSomeReg x
2339         let
2340             code' = code `snocOL`
2341                 (if condUnsigned cond then CMPL else CMP) I32 src1 (RIImm src2)
2342         return (CondCode False cond code')
2343
2344 condIntCode cond x y = do
2345     (src1, code1) <- getSomeReg x
2346     (src2, code2) <- getSomeReg y
2347     let
2348         code' = code1 `appOL` code2 `snocOL`
2349                   (if condUnsigned cond then CMPL else CMP) I32 src1 (RIReg src2)
2350     return (CondCode False cond code')
2351
2352 condFltCode cond x y = do
2353     (src1, code1) <- getSomeReg x
2354     (src2, code2) <- getSomeReg y
2355     let
2356         code'  = code1 `appOL` code2 `snocOL` FCMP src1 src2
2357         code'' = case cond of -- twiddle CR to handle unordered case
2358                     GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
2359                     LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
2360                     _ -> code'
2361                  where
2362                     ltbit = 0 ; eqbit = 2 ; gtbit = 1
2363     return (CondCode True cond code'')
2364
2365 #endif /* powerpc_TARGET_ARCH */
2366
2367 -- -----------------------------------------------------------------------------
2368 -- Generating assignments
2369
2370 -- Assignments are really at the heart of the whole code generation
2371 -- business.  Almost all top-level nodes of any real importance are
2372 -- assignments, which correspond to loads, stores, or register
2373 -- transfers.  If we're really lucky, some of the register transfers
2374 -- will go away, because we can use the destination register to
2375 -- complete the code generation for the right hand side.  This only
2376 -- fails when the right hand side is forced into a fixed register
2377 -- (e.g. the result of a call).
2378
2379 assignMem_IntCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2380 assignReg_IntCode :: MachRep -> CmmReg  -> CmmExpr -> NatM InstrBlock
2381
2382 assignMem_FltCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2383 assignReg_FltCode :: MachRep -> CmmReg  -> CmmExpr -> NatM InstrBlock
2384
2385 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2386
2387 #if alpha_TARGET_ARCH
2388
2389 assignIntCode pk (CmmLoad dst _) src
2390   = getNewRegNat IntRep             `thenNat` \ tmp ->
2391     getAmode dst                    `thenNat` \ amode ->
2392     getRegister src                 `thenNat` \ register ->
2393     let
2394         code1   = amodeCode amode []
2395         dst__2  = amodeAddr amode
2396         code2   = registerCode register tmp []
2397         src__2  = registerName register tmp
2398         sz      = primRepToSize pk
2399         code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2400     in
2401     return code__2
2402
2403 assignIntCode pk dst src
2404   = getRegister dst                         `thenNat` \ register1 ->
2405     getRegister src                         `thenNat` \ register2 ->
2406     let
2407         dst__2  = registerName register1 zeroh
2408         code    = registerCode register2 dst__2
2409         src__2  = registerName register2 dst__2
2410         code__2 = if isFixed register2
2411                   then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2)
2412                   else code
2413     in
2414     return code__2
2415
2416 #endif /* alpha_TARGET_ARCH */
2417
2418 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2419
2420 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2421
2422 -- integer assignment to memory
2423 assignMem_IntCode pk addr src = do
2424     Amode addr code_addr <- getAmode addr
2425     (code_src, op_src)   <- get_op_RI src
2426     let
2427         code = code_src `appOL`
2428                code_addr `snocOL`
2429                   MOV pk op_src (OpAddr addr)
2430         -- NOTE: op_src is stable, so it will still be valid
2431         -- after code_addr.  This may involve the introduction
2432         -- of an extra MOV to a temporary register, but we hope
2433         -- the register allocator will get rid of it.
2434     --
2435     return code
2436   where
2437     get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand)   -- code, operator
2438     get_op_RI (CmmLit lit) | not (is64BitLit lit)
2439       = return (nilOL, OpImm (litToImm lit))
2440     get_op_RI op
2441       = do (reg,code) <- getNonClobberedReg op
2442            return (code, OpReg reg)
2443
2444
2445 -- Assign; dst is a reg, rhs is mem
2446 assignReg_IntCode pk reg (CmmLoad src _) = do
2447   load_code <- intLoadCode (MOV pk) src
2448   return (load_code (getRegisterReg reg))
2449
2450 -- dst is a reg, but src could be anything
2451 assignReg_IntCode pk reg src = do
2452   code <- getAnyReg src
2453   return (code (getRegisterReg reg))
2454
2455 #endif /* i386_TARGET_ARCH */
2456
2457 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2458
2459 #if sparc_TARGET_ARCH
2460
2461 assignMem_IntCode pk addr src
2462   = getNewRegNat IntRep                     `thenNat` \ tmp ->
2463     getAmode addr                           `thenNat` \ amode ->
2464     getRegister src                         `thenNat` \ register ->
2465     let
2466         code1   = amodeCode amode
2467         dst__2  = amodeAddr amode
2468         code2   = registerCode register tmp
2469         src__2  = registerName register tmp
2470         sz      = primRepToSize pk
2471         code__2 = code1 `appOL` code2 `snocOL` ST sz src__2 dst__2
2472     in
2473     return code__2
2474
2475 assignReg_IntCode pk reg src
2476   = getRegister src                         `thenNat` \ register2 ->
2477     getRegisterReg reg                      `thenNat` \ register1 ->
2478     getNewRegNat IntRep                     `thenNat` \ tmp ->
2479     let
2480         dst__2  = registerName register1 tmp
2481         code    = registerCode register2 dst__2
2482         src__2  = registerName register2 dst__2
2483         code__2 = if isFixed register2
2484                   then code `snocOL` OR False g0 (RIReg src__2) dst__2
2485                   else code
2486     in
2487     return code__2
2488
2489 #endif /* sparc_TARGET_ARCH */
2490
2491 #if powerpc_TARGET_ARCH
2492
2493 assignMem_IntCode pk addr src = do
2494     (srcReg, code) <- getSomeReg src
2495     Amode dstAddr addr_code <- getAmode addr
2496     return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2497
2498 -- dst is a reg, but src could be anything
2499 assignReg_IntCode pk reg src
2500     = do
2501         r <- getRegister src
2502         return $ case r of
2503             Any _ code         -> code dst
2504             Fixed _ freg fcode -> fcode `snocOL` MR dst freg
2505     where
2506         dst = getRegisterReg reg
2507
2508 #endif /* powerpc_TARGET_ARCH */
2509
2510
2511 -- -----------------------------------------------------------------------------
2512 -- Floating-point assignments
2513
2514 #if alpha_TARGET_ARCH
2515
2516 assignFltCode pk (CmmLoad dst _) src
2517   = getNewRegNat pk                 `thenNat` \ tmp ->
2518     getAmode dst                    `thenNat` \ amode ->
2519     getRegister src                         `thenNat` \ register ->
2520     let
2521         code1   = amodeCode amode []
2522         dst__2  = amodeAddr amode
2523         code2   = registerCode register tmp []
2524         src__2  = registerName register tmp
2525         sz      = primRepToSize pk
2526         code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2527     in
2528     return code__2
2529
2530 assignFltCode pk dst src
2531   = getRegister dst                         `thenNat` \ register1 ->
2532     getRegister src                         `thenNat` \ register2 ->
2533     let
2534         dst__2  = registerName register1 zeroh
2535         code    = registerCode register2 dst__2
2536         src__2  = registerName register2 dst__2
2537         code__2 = if isFixed register2
2538                   then code . mkSeqInstr (FMOV src__2 dst__2)
2539                   else code
2540     in
2541     return code__2
2542
2543 #endif /* alpha_TARGET_ARCH */
2544
2545 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2546
2547 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2548
2549 -- Floating point assignment to memory
2550 assignMem_FltCode pk addr src = do
2551   (src_reg, src_code) <- getNonClobberedReg src
2552   Amode addr addr_code <- getAmode addr
2553   let
2554         code = src_code `appOL`
2555                addr_code `snocOL`
2556                 IF_ARCH_i386(GST pk src_reg addr,
2557                              MOV pk (OpReg src_reg) (OpAddr addr))
2558   return code
2559
2560 -- Floating point assignment to a register/temporary
2561 assignReg_FltCode pk reg src = do
2562   src_code <- getAnyReg src
2563   return (src_code (getRegisterReg reg))
2564
2565 #endif /* i386_TARGET_ARCH */
2566
2567 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2568
2569 #if sparc_TARGET_ARCH
2570
2571 -- Floating point assignment to memory
2572 assignMem_FltCode pk addr src
2573   = getNewRegNat pk                 `thenNat` \ tmp1 ->
2574     getAmode addr                   `thenNat` \ amode ->
2575     getRegister src                 `thenNat` \ register ->
2576     let
2577         sz      = primRepToSize pk
2578         dst__2  = amodeAddr amode
2579
2580         code1   = amodeCode amode
2581         code2   = registerCode register tmp1
2582
2583         src__2  = registerName register tmp1
2584         pk__2   = registerRep register
2585         sz__2   = primRepToSize pk__2
2586
2587         code__2 = code1 `appOL` code2 `appOL`
2588             if   pk == pk__2
2589             then unitOL (ST sz src__2 dst__2)
2590             else toOL [FxTOy sz__2 sz src__2 tmp1, ST sz tmp1 dst__2]
2591     in
2592     return code__2
2593
2594 -- Floating point assignment to a register/temporary
2595 -- Why is this so bizarrely ugly?
2596 assignReg_FltCode pk reg src
2597   = getRegisterReg reg                      `thenNat` \ register1 ->
2598     getRegister src                         `thenNat` \ register2 ->
2599     let
2600         pk__2   = registerRep register2
2601         sz__2   = primRepToSize pk__2
2602     in
2603     getNewRegNat pk__2                      `thenNat` \ tmp ->
2604     let
2605         sz      = primRepToSize pk
2606         dst__2  = registerName register1 g0    -- must be Fixed
2607         reg__2  = if pk /= pk__2 then tmp else dst__2
2608         code    = registerCode register2 reg__2
2609         src__2  = registerName register2 reg__2
2610         code__2 =
2611                 if pk /= pk__2 then
2612                      code `snocOL` FxTOy sz__2 sz src__2 dst__2
2613                 else if isFixed register2 then
2614                      code `snocOL` FMOV sz src__2 dst__2
2615                 else
2616                      code
2617     in
2618     return code__2
2619
2620 #endif /* sparc_TARGET_ARCH */
2621
2622 #if powerpc_TARGET_ARCH
2623
2624 -- Easy, isn't it?
2625 assignMem_FltCode = assignMem_IntCode
2626 assignReg_FltCode = assignReg_IntCode
2627
2628 #endif /* powerpc_TARGET_ARCH */
2629
2630
2631 -- -----------------------------------------------------------------------------
2632 -- Generating an non-local jump
2633
2634 -- (If applicable) Do not fill the delay slots here; you will confuse the
2635 -- register allocator.
2636
2637 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
2638
2639 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2640
2641 #if alpha_TARGET_ARCH
2642
2643 genJump (CmmLabel lbl)
2644   | isAsmTemp lbl = returnInstr (BR target)
2645   | otherwise     = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0]
2646   where
2647     target = ImmCLbl lbl
2648
2649 genJump tree
2650   = getRegister tree                `thenNat` \ register ->
2651     getNewRegNat PtrRep             `thenNat` \ tmp ->
2652     let
2653         dst    = registerName register pv
2654         code   = registerCode register pv
2655         target = registerName register pv
2656     in
2657     if isFixed register then
2658         returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0]
2659     else
2660     return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0))
2661
2662 #endif /* alpha_TARGET_ARCH */
2663
2664 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2665
2666 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2667
2668 genJump (CmmLoad mem pk) = do
2669   Amode target code <- getAmode mem
2670   return (code `snocOL` JMP (OpAddr target))
2671
2672 genJump (CmmLit lit) = do
2673   return (unitOL (JMP (OpImm (litToImm lit))))
2674
2675 genJump expr = do
2676   (reg,code) <- getSomeReg expr
2677   return (code `snocOL` JMP (OpReg reg))
2678
2679 #endif /* i386_TARGET_ARCH */
2680
2681 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2682
2683 #if sparc_TARGET_ARCH
2684
2685 genJump (CmmLabel lbl)
2686   = return (toOL [CALL (Left target) 0 True, NOP])
2687   where
2688     target = ImmCLbl lbl
2689
2690 genJump tree
2691   = getRegister tree                        `thenNat` \ register ->
2692     getNewRegNat PtrRep             `thenNat` \ tmp ->
2693     let
2694         code   = registerCode register tmp
2695         target = registerName register tmp
2696     in
2697     return (code `snocOL` JMP dsts (AddrRegReg target g0) `snocOL` NOP)
2698
2699 #endif /* sparc_TARGET_ARCH */
2700
2701 #if powerpc_TARGET_ARCH
2702 genJump (CmmLit (CmmLabel lbl))
2703   = return (unitOL $ JMP lbl)
2704
2705 genJump tree
2706   = do
2707         (target,code) <- getSomeReg tree
2708         return (code `snocOL` MTCTR target `snocOL` BCTR [])
2709 #endif /* powerpc_TARGET_ARCH */
2710
2711
2712 -- -----------------------------------------------------------------------------
2713 --  Unconditional branches
2714
2715 genBranch :: BlockId -> NatM InstrBlock
2716
2717 #if alpha_TARGET_ARCH
2718 genBranch id = return (unitOL (BR id))
2719 #endif
2720
2721 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2722 genBranch id = return (unitOL (JXX ALWAYS id))
2723 #endif
2724
2725 #if sparc_TARGET_ARCH
2726 genBranch id = return (toOL [BI ALWAYS False id, NOP])
2727 #endif
2728
2729 #if powerpc_TARGET_ARCH
2730 genBranch id = return (unitOL (BCC ALWAYS id))
2731 #endif
2732
2733
2734 -- -----------------------------------------------------------------------------
2735 --  Conditional jumps
2736
2737 {-
2738 Conditional jumps are always to local labels, so we can use branch
2739 instructions.  We peek at the arguments to decide what kind of
2740 comparison to do.
2741
2742 ALPHA: For comparisons with 0, we're laughing, because we can just do
2743 the desired conditional branch.
2744
2745 I386: First, we have to ensure that the condition
2746 codes are set according to the supplied comparison operation.
2747
2748 SPARC: First, we have to ensure that the condition codes are set
2749 according to the supplied comparison operation.  We generate slightly
2750 different code for floating point comparisons, because a floating
2751 point operation cannot directly precede a @BF@.  We assume the worst
2752 and fill that slot with a @NOP@.
2753
2754 SPARC: Do not fill the delay slots here; you will confuse the register
2755 allocator.
2756 -}
2757
2758
2759 genCondJump
2760     :: BlockId      -- the branch target
2761     -> CmmExpr      -- the condition on which to branch
2762     -> NatM InstrBlock
2763
2764 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2765
2766 #if alpha_TARGET_ARCH
2767
2768 genCondJump id (StPrim op [x, StInt 0])
2769   = getRegister x                           `thenNat` \ register ->
2770     getNewRegNat (registerRep register)
2771                                     `thenNat` \ tmp ->
2772     let
2773         code   = registerCode register tmp
2774         value  = registerName register tmp
2775         pk     = registerRep register
2776         target = ImmCLbl lbl
2777     in
2778     returnSeq code [BI (cmpOp op) value target]
2779   where
2780     cmpOp CharGtOp = GTT
2781     cmpOp CharGeOp = GE
2782     cmpOp CharEqOp = EQQ
2783     cmpOp CharNeOp = NE
2784     cmpOp CharLtOp = LTT
2785     cmpOp CharLeOp = LE
2786     cmpOp IntGtOp = GTT
2787     cmpOp IntGeOp = GE
2788     cmpOp IntEqOp = EQQ
2789     cmpOp IntNeOp = NE
2790     cmpOp IntLtOp = LTT
2791     cmpOp IntLeOp = LE
2792     cmpOp WordGtOp = NE
2793     cmpOp WordGeOp = ALWAYS
2794     cmpOp WordEqOp = EQQ
2795     cmpOp WordNeOp = NE
2796     cmpOp WordLtOp = NEVER
2797     cmpOp WordLeOp = EQQ
2798     cmpOp AddrGtOp = NE
2799     cmpOp AddrGeOp = ALWAYS
2800     cmpOp AddrEqOp = EQQ
2801     cmpOp AddrNeOp = NE
2802     cmpOp AddrLtOp = NEVER
2803     cmpOp AddrLeOp = EQQ
2804
2805 genCondJump lbl (StPrim op [x, StDouble 0.0])
2806   = getRegister x                           `thenNat` \ register ->
2807     getNewRegNat (registerRep register)
2808                                     `thenNat` \ tmp ->
2809     let
2810         code   = registerCode register tmp
2811         value  = registerName register tmp
2812         pk     = registerRep register
2813         target = ImmCLbl lbl
2814     in
2815     return (code . mkSeqInstr (BF (cmpOp op) value target))
2816   where
2817     cmpOp FloatGtOp = GTT
2818     cmpOp FloatGeOp = GE
2819     cmpOp FloatEqOp = EQQ
2820     cmpOp FloatNeOp = NE
2821     cmpOp FloatLtOp = LTT
2822     cmpOp FloatLeOp = LE
2823     cmpOp DoubleGtOp = GTT
2824     cmpOp DoubleGeOp = GE
2825     cmpOp DoubleEqOp = EQQ
2826     cmpOp DoubleNeOp = NE
2827     cmpOp DoubleLtOp = LTT
2828     cmpOp DoubleLeOp = LE
2829
2830 genCondJump lbl (StPrim op [x, y])
2831   | fltCmpOp op
2832   = trivialFCode pr instr x y       `thenNat` \ register ->
2833     getNewRegNat F64                `thenNat` \ tmp ->
2834     let
2835         code   = registerCode register tmp
2836         result = registerName register tmp
2837         target = ImmCLbl lbl
2838     in
2839     return (code . mkSeqInstr (BF cond result target))
2840   where
2841     pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
2842
2843     fltCmpOp op = case op of
2844         FloatGtOp -> True
2845         FloatGeOp -> True
2846         FloatEqOp -> True
2847         FloatNeOp -> True
2848         FloatLtOp -> True
2849         FloatLeOp -> True
2850         DoubleGtOp -> True
2851         DoubleGeOp -> True
2852         DoubleEqOp -> True
2853         DoubleNeOp -> True
2854         DoubleLtOp -> True
2855         DoubleLeOp -> True
2856         _ -> False
2857     (instr, cond) = case op of
2858         FloatGtOp -> (FCMP TF LE, EQQ)
2859         FloatGeOp -> (FCMP TF LTT, EQQ)
2860         FloatEqOp -> (FCMP TF EQQ, NE)
2861         FloatNeOp -> (FCMP TF EQQ, EQQ)
2862         FloatLtOp -> (FCMP TF LTT, NE)
2863         FloatLeOp -> (FCMP TF LE, NE)
2864         DoubleGtOp -> (FCMP TF LE, EQQ)
2865         DoubleGeOp -> (FCMP TF LTT, EQQ)
2866         DoubleEqOp -> (FCMP TF EQQ, NE)
2867         DoubleNeOp -> (FCMP TF EQQ, EQQ)
2868         DoubleLtOp -> (FCMP TF LTT, NE)
2869         DoubleLeOp -> (FCMP TF LE, NE)
2870
2871 genCondJump lbl (StPrim op [x, y])
2872   = trivialCode instr x y           `thenNat` \ register ->
2873     getNewRegNat IntRep             `thenNat` \ tmp ->
2874     let
2875         code   = registerCode register tmp
2876         result = registerName register tmp
2877         target = ImmCLbl lbl
2878     in
2879     return (code . mkSeqInstr (BI cond result target))
2880   where
2881     (instr, cond) = case op of
2882         CharGtOp -> (CMP LE, EQQ)
2883         CharGeOp -> (CMP LTT, EQQ)
2884         CharEqOp -> (CMP EQQ, NE)
2885         CharNeOp -> (CMP EQQ, EQQ)
2886         CharLtOp -> (CMP LTT, NE)
2887         CharLeOp -> (CMP LE, NE)
2888         IntGtOp -> (CMP LE, EQQ)
2889         IntGeOp -> (CMP LTT, EQQ)
2890         IntEqOp -> (CMP EQQ, NE)
2891         IntNeOp -> (CMP EQQ, EQQ)
2892         IntLtOp -> (CMP LTT, NE)
2893         IntLeOp -> (CMP LE, NE)
2894         WordGtOp -> (CMP ULE, EQQ)
2895         WordGeOp -> (CMP ULT, EQQ)
2896         WordEqOp -> (CMP EQQ, NE)
2897         WordNeOp -> (CMP EQQ, EQQ)
2898         WordLtOp -> (CMP ULT, NE)
2899         WordLeOp -> (CMP ULE, NE)
2900         AddrGtOp -> (CMP ULE, EQQ)
2901         AddrGeOp -> (CMP ULT, EQQ)
2902         AddrEqOp -> (CMP EQQ, NE)
2903         AddrNeOp -> (CMP EQQ, EQQ)
2904         AddrLtOp -> (CMP ULT, NE)
2905         AddrLeOp -> (CMP ULE, NE)
2906
2907 #endif /* alpha_TARGET_ARCH */
2908
2909 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2910
2911 #if i386_TARGET_ARCH
2912
2913 genCondJump id bool = do
2914   CondCode _ cond code <- getCondCode bool
2915   return (code `snocOL` JXX cond id)
2916
2917 #endif
2918
2919 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2920
2921 #if x86_64_TARGET_ARCH
2922
2923 genCondJump id bool = do
2924   CondCode is_float cond cond_code <- getCondCode bool
2925   if not is_float
2926     then
2927         return (cond_code `snocOL` JXX cond id)
2928     else do
2929         lbl <- getBlockIdNat
2930
2931         -- see comment with condFltReg
2932         let code = case cond of
2933                         NE  -> or_unordered
2934                         GU  -> plain_test
2935                         GEU -> plain_test
2936                         _   -> and_ordered
2937
2938             plain_test = unitOL (
2939                   JXX cond id
2940                 )
2941             or_unordered = toOL [
2942                   JXX cond id,
2943                   JXX PARITY id
2944                 ]
2945             and_ordered = toOL [
2946                   JXX PARITY lbl,
2947                   JXX cond id,
2948                   JXX ALWAYS lbl,
2949                   NEWBLOCK lbl
2950                 ]
2951         return (cond_code `appOL` code)
2952
2953 #endif
2954
2955 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2956
2957 #if sparc_TARGET_ARCH
2958
2959 genCondJump id bool = do
2960   CondCode is_float cond code <- getCondCode bool
2961   return (
2962        code `appOL`
2963        toOL (
2964          if   is_float
2965          then [NOP, BF cond False id, NOP]
2966          else [BI cond False id, NOP]
2967        )
2968     )
2969
2970 #endif /* sparc_TARGET_ARCH */
2971
2972
2973 #if powerpc_TARGET_ARCH
2974
2975 genCondJump id bool = do
2976   CondCode is_float cond code <- getCondCode bool
2977   return (code `snocOL` BCC cond id)
2978
2979 #endif /* powerpc_TARGET_ARCH */
2980
2981
2982 -- -----------------------------------------------------------------------------
2983 --  Generating C calls
2984
2985 -- Now the biggest nightmare---calls.  Most of the nastiness is buried in
2986 -- @get_arg@, which moves the arguments to the correct registers/stack
2987 -- locations.  Apart from that, the code is easy.
2988 --
2989 -- (If applicable) Do not fill the delay slots here; you will confuse the
2990 -- register allocator.
2991
2992 genCCall
2993     :: CmmCallTarget            -- function to call
2994     -> [(CmmReg,MachHint)]      -- where to put the result
2995     -> [(CmmExpr,MachHint)]     -- arguments (of mixed type)
2996     -> Maybe [GlobalReg]        -- volatile regs to save
2997     -> NatM InstrBlock
2998
2999 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3000
3001 #if alpha_TARGET_ARCH
3002
3003 ccallResultRegs =
3004
3005 genCCall fn cconv result_regs args
3006   = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args
3007                           `thenNat` \ ((unused,_), argCode) ->
3008     let
3009         nRegs = length allArgRegs - length unused
3010         code = asmSeqThen (map ($ []) argCode)
3011     in
3012         returnSeq code [
3013             LDA pv (AddrImm (ImmLab (ptext fn))),
3014             JSR ra (AddrReg pv) nRegs,
3015             LDGP gp (AddrReg ra)]
3016   where
3017     ------------------------
3018     {-  Try to get a value into a specific register (or registers) for
3019         a call.  The first 6 arguments go into the appropriate
3020         argument register (separate registers for integer and floating
3021         point arguments, but used in lock-step), and the remaining
3022         arguments are dumped to the stack, beginning at 0(sp).  Our
3023         first argument is a pair of the list of remaining argument
3024         registers to be assigned for this call and the next stack
3025         offset to use for overflowing arguments.  This way,
3026         @get_Arg@ can be applied to all of a call's arguments using
3027         @mapAccumLNat@.
3028     -}
3029     get_arg
3030         :: ([(Reg,Reg)], Int)   -- Argument registers and stack offset (accumulator)
3031         -> StixTree             -- Current argument
3032         -> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code
3033
3034     -- We have to use up all of our argument registers first...
3035
3036     get_arg ((iDst,fDst):dsts, offset) arg
3037       = getRegister arg                     `thenNat` \ register ->
3038         let
3039             reg  = if isFloatingRep pk then fDst else iDst
3040             code = registerCode register reg
3041             src  = registerName register reg
3042             pk   = registerRep register
3043         in
3044         return (
3045             if isFloatingRep pk then
3046                 ((dsts, offset), if isFixed register then
3047                     code . mkSeqInstr (FMOV src fDst)
3048                     else code)
3049             else
3050                 ((dsts, offset), if isFixed register then
3051                     code . mkSeqInstr (OR src (RIReg src) iDst)
3052                     else code))
3053
3054     -- Once we have run out of argument registers, we move to the
3055     -- stack...
3056
3057     get_arg ([], offset) arg
3058       = getRegister arg                 `thenNat` \ register ->
3059         getNewRegNat (registerRep register)
3060                                         `thenNat` \ tmp ->
3061         let
3062             code = registerCode register tmp
3063             src  = registerName register tmp
3064             pk   = registerRep register
3065             sz   = primRepToSize pk
3066         in
3067         return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset)))
3068
3069 #endif /* alpha_TARGET_ARCH */
3070
3071 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3072
3073 #if i386_TARGET_ARCH
3074
3075 -- we only cope with a single result for foreign calls
3076 genCCall (CmmPrim op) [(r,_)] args vols = do
3077   case op of
3078         MO_F32_Sqrt -> actuallyInlineFloatOp F32  (GSQRT F32) args
3079         MO_F64_Sqrt -> actuallyInlineFloatOp F64 (GSQRT F64) args
3080
3081         MO_F32_Sin  -> actuallyInlineFloatOp F32  (GSIN F32) args
3082         MO_F64_Sin  -> actuallyInlineFloatOp F64 (GSIN F64) args
3083
3084         MO_F32_Cos  -> actuallyInlineFloatOp F32  (GCOS F32) args
3085         MO_F64_Cos  -> actuallyInlineFloatOp F64 (GCOS F64) args
3086
3087         MO_F32_Tan  -> actuallyInlineFloatOp F32  (GTAN F32) args
3088         MO_F64_Tan  -> actuallyInlineFloatOp F64 (GTAN F64) args
3089
3090         other_op    -> outOfLineFloatOp op r args vols
3091  where
3092   actuallyInlineFloatOp rep instr [(x,_)]
3093         = do res <- trivialUFCode rep instr x
3094              any <- anyReg res
3095              return (any (getRegisterReg r))
3096
3097 genCCall target dest_regs args vols = do
3098     sizes_n_codes <- mapM push_arg (reverse args)
3099     delta <- getDeltaNat
3100     let
3101         (sizes, push_codes) = unzip sizes_n_codes
3102         tot_arg_size        = sum sizes
3103     -- in
3104     -- deal with static vs dynamic call targets
3105     (callinsns,cconv) <-
3106       case target of
3107         -- CmmPrim -> ...
3108         CmmForeignCall (CmmLit (CmmLabel lbl)) conv
3109            -> -- ToDo: stdcall arg sizes
3110               return (unitOL (CALL (Left fn_imm) []), conv)
3111            where fn_imm = ImmCLbl lbl
3112         CmmForeignCall expr conv
3113            -> do (dyn_c, dyn_r, dyn_rep) <- get_op expr
3114                  ASSERT(dyn_rep == I32)
3115                   return (dyn_c `snocOL` CALL (Right dyn_r) [], conv)
3116
3117     let push_code = concatOL push_codes
3118         call = callinsns `appOL`
3119                toOL (
3120                         -- Deallocate parameters after call for ccall;
3121                         -- but not for stdcall (callee does it)
3122                   (if cconv == StdCallConv || tot_arg_size==0 then [] else
3123                    [ADD I32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
3124                   ++
3125                   [DELTA (delta + tot_arg_size)]
3126                )
3127     -- in
3128     setDeltaNat (delta + tot_arg_size)
3129
3130     let
3131         -- assign the results, if necessary
3132         assign_code []     = nilOL
3133         assign_code [(dest,_hint)] =
3134           case rep of
3135                 I64 -> toOL [MOV I32 (OpReg eax) (OpReg r_dest),
3136                              MOV I32 (OpReg edx) (OpReg r_dest_hi)]
3137                 F32 -> unitOL (GMOV fake0 r_dest)
3138                 F64 -> unitOL (GMOV fake0 r_dest)
3139                 rep -> unitOL (MOV rep (OpReg eax) (OpReg r_dest))
3140           where
3141                 r_dest_hi = getHiVRegFromLo r_dest
3142                 rep = cmmRegRep dest
3143                 r_dest = getRegisterReg dest
3144         assign_code many = panic "genCCall.assign_code many"
3145
3146     return (push_code `appOL`
3147             call `appOL`
3148             assign_code dest_regs)
3149
3150   where
3151     arg_size F64 = 8
3152     arg_size F32 = 4
3153     arg_size _   = 4
3154
3155     push_arg :: (CmmExpr,MachHint){-current argument-}
3156                     -> NatM (Int, InstrBlock)  -- argsz, code
3157
3158     push_arg (arg,_hint) -- we don't need the hints on x86
3159       | arg_rep == I64 = do
3160         ChildCode64 code r_lo <- iselExpr64 arg
3161         delta <- getDeltaNat
3162         setDeltaNat (delta - 8)
3163         let
3164             r_hi = getHiVRegFromLo r_lo
3165         -- in
3166         return (8,     code `appOL`
3167                        toOL [PUSH I32 (OpReg r_hi), DELTA (delta - 4),
3168                              PUSH I32 (OpReg r_lo), DELTA (delta - 8),
3169                              DELTA (delta-8)]
3170             )
3171
3172       | otherwise = do
3173         (code, reg, sz) <- get_op arg
3174         delta <- getDeltaNat
3175         let size = arg_size sz
3176         setDeltaNat (delta-size)
3177         if (case sz of F64 -> True; F32 -> True; _ -> False)
3178            then return (size,
3179                         code `appOL`
3180                         toOL [SUB I32 (OpImm (ImmInt size)) (OpReg esp),
3181                               DELTA (delta-size),
3182                               GST sz reg (AddrBaseIndex (EABaseReg esp)
3183                                                         EAIndexNone
3184                                                         (ImmInt 0))]
3185                        )
3186            else return (size,
3187                         code `snocOL`
3188                         PUSH I32 (OpReg reg) `snocOL`
3189                         DELTA (delta-size)
3190                        )
3191       where
3192          arg_rep = cmmExprRep arg
3193
3194     ------------
3195     get_op :: CmmExpr -> NatM (InstrBlock, Reg, MachRep) -- code, reg, size
3196     get_op op = do
3197         (reg,code) <- getSomeReg op
3198         return (code, reg, cmmExprRep op)
3199
3200 #endif /* i386_TARGET_ARCH */
3201
3202 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3203
3204 outOfLineFloatOp :: CallishMachOp -> CmmReg -> [(CmmExpr,MachHint)]
3205   -> Maybe [GlobalReg] -> NatM InstrBlock
3206 outOfLineFloatOp mop res args vols
3207   | cmmRegRep res == F64
3208   = stmtToInstrs (CmmCall target [(res,FloatHint)] args vols)
3209
3210   | otherwise
3211   = do uq <- getUniqueNat
3212        let
3213          tmp = CmmLocal (LocalReg uq F64)
3214        -- in
3215        code1 <- stmtToInstrs (CmmCall target [(tmp,FloatHint)] (map promote args) vols)
3216        code2 <- stmtToInstrs (CmmAssign res (demote (CmmReg tmp)))
3217        return (code1 `appOL` code2)
3218   where
3219         promote (x,hint) = (CmmMachOp (MO_S_Conv F32 F64) [x], hint)
3220         demote  x = CmmMachOp (MO_S_Conv F64 F32) [x]
3221
3222         target = CmmForeignCall (CmmLit lbl) CCallConv
3223         lbl = CmmLabel (mkForeignLabel fn Nothing False)
3224
3225         fn = case mop of
3226               MO_F32_Sqrt  -> FSLIT("sqrt")
3227               MO_F32_Sin   -> FSLIT("sin")
3228               MO_F32_Cos   -> FSLIT("cos")
3229               MO_F32_Tan   -> FSLIT("tan")
3230               MO_F32_Exp   -> FSLIT("exp")
3231               MO_F32_Log   -> FSLIT("log")
3232
3233               MO_F32_Asin  -> FSLIT("asin")
3234               MO_F32_Acos  -> FSLIT("acos")
3235               MO_F32_Atan  -> FSLIT("atan")
3236
3237               MO_F32_Sinh  -> FSLIT("sinh")
3238               MO_F32_Cosh  -> FSLIT("cosh")
3239               MO_F32_Tanh  -> FSLIT("tanh")
3240               MO_F32_Pwr   -> FSLIT("pow")
3241
3242               MO_F64_Sqrt  -> FSLIT("sqrt")
3243               MO_F64_Sin   -> FSLIT("sin")
3244               MO_F64_Cos   -> FSLIT("cos")
3245               MO_F64_Tan   -> FSLIT("tan")
3246               MO_F64_Exp   -> FSLIT("exp")
3247               MO_F64_Log   -> FSLIT("log")
3248
3249               MO_F64_Asin  -> FSLIT("asin")
3250               MO_F64_Acos  -> FSLIT("acos")
3251               MO_F64_Atan  -> FSLIT("atan")
3252
3253               MO_F64_Sinh  -> FSLIT("sinh")
3254               MO_F64_Cosh  -> FSLIT("cosh")
3255               MO_F64_Tanh  -> FSLIT("tanh")
3256               MO_F64_Pwr   -> FSLIT("pow")
3257
3258               other -> pprPanic "outOfLineFloatOp" (pprCallishMachOp mop)
3259
3260 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
3261
3262 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3263
3264 #if x86_64_TARGET_ARCH
3265
3266 genCCall (CmmPrim op) [(r,_)] args vols =
3267   outOfLineFloatOp op r args vols
3268
3269 genCCall target dest_regs args vols = do
3270
3271         -- load up the register arguments
3272     (stack_args, aregs, fregs, load_args_code)
3273          <- load_args args allArgRegs allFPArgRegs nilOL
3274
3275     let
3276         fp_regs_used  = reverse (drop (length fregs) (reverse allFPArgRegs))
3277         int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
3278         arg_regs = int_regs_used ++ fp_regs_used
3279                 -- for annotating the call instruction with
3280
3281         sse_regs = length fp_regs_used
3282
3283         tot_arg_size = arg_size * length stack_args
3284
3285         -- On entry to the called function, %rsp should be aligned
3286         -- on a 16-byte boundary +8 (i.e. the first stack arg after
3287         -- the return address is 16-byte aligned).  In STG land
3288         -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
3289         -- need to make sure we push a multiple of 16-bytes of args,
3290         -- plus the return address, to get the correct alignment.
3291         -- Urg, this is hard.  We need to feed the delta back into
3292         -- the arg pushing code.
3293     (real_size, adjust_rsp) <-
3294         if tot_arg_size `rem` 16 == 0
3295             then return (tot_arg_size, nilOL)
3296             else do -- we need to adjust...
3297                 delta <- getDeltaNat
3298                 setDeltaNat (delta-8)
3299                 return (tot_arg_size+8, toOL [
3300                                 SUB I64 (OpImm (ImmInt 8)) (OpReg rsp),
3301                                 DELTA (delta-8)
3302                         ])
3303
3304         -- push the stack args, right to left
3305     push_code <- push_args (reverse stack_args) nilOL
3306     delta <- getDeltaNat
3307
3308     -- deal with static vs dynamic call targets
3309     (callinsns,cconv) <-
3310       case target of
3311         -- CmmPrim -> ...
3312         CmmForeignCall (CmmLit (CmmLabel lbl)) conv
3313            -> -- ToDo: stdcall arg sizes
3314               return (unitOL (CALL (Left fn_imm) arg_regs), conv)
3315            where fn_imm = ImmCLbl lbl
3316         CmmForeignCall expr conv
3317            -> do (dyn_r, dyn_c) <- getSomeReg expr
3318                  return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
3319
3320     let
3321         -- The x86_64 ABI requires us to set %al to the number of SSE
3322         -- registers that contain arguments, if the called routine
3323         -- is a varargs function.  We don't know whether it's a
3324         -- varargs function or not, so we have to assume it is.
3325         --
3326         -- It's not safe to omit this assignment, even if the number
3327         -- of SSE regs in use is zero.  If %al is larger than 8
3328         -- on entry to a varargs function, seg faults ensue.
3329         assign_eax n = unitOL (MOV I32 (OpImm (ImmInt n)) (OpReg eax))
3330
3331     let call = callinsns `appOL`
3332                toOL (
3333                         -- Deallocate parameters after call for ccall;
3334                         -- but not for stdcall (callee does it)
3335                   (if cconv == StdCallConv || real_size==0 then [] else
3336                    [ADD wordRep (OpImm (ImmInt real_size)) (OpReg esp)])
3337                   ++
3338                   [DELTA (delta + real_size)]
3339                )
3340     -- in
3341     setDeltaNat (delta + real_size)
3342
3343     let
3344         -- assign the results, if necessary
3345         assign_code []     = nilOL
3346         assign_code [(dest,_hint)] =
3347           case rep of
3348                 F32 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3349                 F64 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3350                 rep -> unitOL (MOV rep (OpReg rax) (OpReg r_dest))
3351           where
3352                 rep = cmmRegRep dest
3353                 r_dest = getRegisterReg dest
3354         assign_code many = panic "genCCall.assign_code many"
3355
3356     return (load_args_code      `appOL`
3357             adjust_rsp          `appOL`
3358             push_code           `appOL`
3359             assign_eax sse_regs `appOL`
3360             call                `appOL`
3361             assign_code dest_regs)
3362
3363   where
3364     arg_size = 8 -- always, at the mo
3365
3366     load_args :: [(CmmExpr,MachHint)]
3367               -> [Reg]                  -- int regs avail for args
3368               -> [Reg]                  -- FP regs avail for args
3369               -> InstrBlock
3370               -> NatM ([(CmmExpr,MachHint)],[Reg],[Reg],InstrBlock)
3371     load_args args [] [] code     =  return (args, [], [], code)
3372         -- no more regs to use
3373     load_args [] aregs fregs code =  return ([], aregs, fregs, code)
3374         -- no more args to push
3375     load_args ((arg,hint) : rest) aregs fregs code
3376         | isFloatingRep arg_rep =
3377         case fregs of
3378           [] -> push_this_arg
3379           (r:rs) -> do
3380              arg_code <- getAnyReg arg
3381              load_args rest aregs rs (code `appOL` arg_code r)
3382         | otherwise =
3383         case aregs of
3384           [] -> push_this_arg
3385           (r:rs) -> do
3386              arg_code <- getAnyReg arg
3387              load_args rest rs fregs (code `appOL` arg_code r)
3388         where
3389           arg_rep = cmmExprRep arg
3390
3391           push_this_arg = do
3392             (args',ars,frs,code') <- load_args rest aregs fregs code
3393             return ((arg,hint):args', ars, frs, code')
3394
3395     push_args [] code = return code
3396     push_args ((arg,hint):rest) code
3397        | isFloatingRep arg_rep = do
3398          (arg_reg, arg_code) <- getSomeReg arg
3399          delta <- getDeltaNat
3400          setDeltaNat (delta-arg_size)
3401          let code' = code `appOL` toOL [
3402                         MOV arg_rep (OpReg arg_reg) (OpAddr  (spRel 0)),
3403                         SUB wordRep (OpImm (ImmInt arg_size)) (OpReg rsp) ,
3404                         DELTA (delta-arg_size)]
3405          push_args rest code'
3406
3407        | otherwise = do
3408        -- we only ever generate word-sized function arguments.  Promotion
3409        -- has already happened: our Int8# type is kept sign-extended
3410        -- in an Int#, for example.
3411          ASSERT(arg_rep == I64) return ()
3412          (arg_op, arg_code) <- getOperand arg
3413          delta <- getDeltaNat
3414          setDeltaNat (delta-arg_size)
3415          let code' = code `appOL` toOL [PUSH I64 arg_op,
3416                                         DELTA (delta-arg_size)]
3417          push_args rest code'
3418         where
3419           arg_rep = cmmExprRep arg
3420 #endif
3421
3422 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3423
3424 #if sparc_TARGET_ARCH
3425 {-
3426    The SPARC calling convention is an absolute
3427    nightmare.  The first 6x32 bits of arguments are mapped into
3428    %o0 through %o5, and the remaining arguments are dumped to the
3429    stack, beginning at [%sp+92].  (Note that %o6 == %sp.)
3430
3431    If we have to put args on the stack, move %o6==%sp down by
3432    the number of words to go on the stack, to ensure there's enough space.
3433
3434    According to Fraser and Hanson's lcc book, page 478, fig 17.2,
3435    16 words above the stack pointer is a word for the address of
3436    a structure return value.  I use this as a temporary location
3437    for moving values from float to int regs.  Certainly it isn't
3438    safe to put anything in the 16 words starting at %sp, since
3439    this area can get trashed at any time due to window overflows
3440    caused by signal handlers.
3441
3442    A final complication (if the above isn't enough) is that
3443    we can't blithely calculate the arguments one by one into
3444    %o0 .. %o5.  Consider the following nested calls:
3445
3446        fff a (fff b c)
3447
3448    Naive code moves a into %o0, and (fff b c) into %o1.  Unfortunately
3449    the inner call will itself use %o0, which trashes the value put there
3450    in preparation for the outer call.  Upshot: we need to calculate the
3451    args into temporary regs, and move those to arg regs or onto the
3452    stack only immediately prior to the call proper.  Sigh.
3453 -}
3454
3455 genCCall fn cconv kind args
3456   = mapNat arg_to_int_vregs args `thenNat` \ argcode_and_vregs ->
3457     let
3458         (argcodes, vregss) = unzip argcode_and_vregs
3459         n_argRegs          = length allArgRegs
3460         n_argRegs_used     = min (length vregs) n_argRegs
3461         vregs              = concat vregss
3462     in
3463     -- deal with static vs dynamic call targets
3464     (case fn of
3465         Left t_static
3466            -> return (unitOL (CALL (Left fn__2) n_argRegs_used False))
3467         Right dyn
3468            -> arg_to_int_vregs dyn `thenNat` \ (dyn_c, [dyn_r]) ->
3469               return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3470     )
3471                                 `thenNat` \ callinsns ->
3472     let
3473         argcode = concatOL argcodes
3474         (move_sp_down, move_sp_up)
3475            = let diff = length vregs - n_argRegs
3476                  nn   = if odd diff then diff + 1 else diff -- keep 8-byte alignment
3477              in  if   nn <= 0
3478                  then (nilOL, nilOL)
3479                  else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
3480         transfer_code
3481            = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
3482     in
3483         return (argcode       `appOL`
3484                    move_sp_down  `appOL`
3485                    transfer_code `appOL`
3486                    callinsns     `appOL`
3487                    unitOL NOP    `appOL`
3488                    move_sp_up)
3489   where
3490      -- function names that begin with '.' are assumed to be special
3491      -- internally generated names like '.mul,' which don't get an
3492      -- underscore prefix
3493      -- ToDo:needed (WDP 96/03) ???
3494      fn_static = unLeft fn
3495      fn__2 = case (headFS fn_static) of
3496                 '.' -> ImmLit (ftext fn_static)
3497                 _   -> ImmCLbl (mkForeignLabel fn_static False)
3498
3499      -- move args from the integer vregs into which they have been
3500      -- marshalled, into %o0 .. %o5, and the rest onto the stack.
3501      move_final :: [Reg] -> [Reg] -> Int -> [Instr]
3502
3503      move_final [] _ offset          -- all args done
3504         = []
3505
3506      move_final (v:vs) [] offset     -- out of aregs; move to stack
3507         = ST W v (spRel offset)
3508           : move_final vs [] (offset+1)
3509
3510      move_final (v:vs) (a:az) offset -- move into an arg (%o[0..5]) reg
3511         = OR False g0 (RIReg v) a
3512           : move_final vs az offset
3513
3514      -- generate code to calculate an argument, and move it into one
3515      -- or two integer vregs.
3516      arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
3517      arg_to_int_vregs arg
3518         | is64BitRep (repOfCmmExpr arg)
3519         = iselExpr64 arg                `thenNat` \ (ChildCode64 code vr_lo) ->
3520           let r_lo = VirtualRegI vr_lo
3521               r_hi = getHiVRegFromLo r_lo
3522           in  return (code, [r_hi, r_lo])
3523         | otherwise
3524         = getRegister arg                     `thenNat` \ register ->
3525           getNewRegNat (registerRep register) `thenNat` \ tmp ->
3526           let code = registerCode register tmp
3527               src  = registerName register tmp
3528               pk   = registerRep register
3529           in
3530           -- the value is in src.  Get it into 1 or 2 int vregs.
3531           case pk of
3532              F64 ->
3533                 getNewRegNat WordRep  `thenNat` \ v1 ->
3534                 getNewRegNat WordRep  `thenNat` \ v2 ->
3535                 return (
3536                    code                          `snocOL`
3537                    FMOV DF src f0                `snocOL`
3538                    ST   F  f0 (spRel 16)         `snocOL`
3539                    LD   W  (spRel 16) v1         `snocOL`
3540                    ST   F  (fPair f0) (spRel 16) `snocOL`
3541                    LD   W  (spRel 16) v2
3542                    ,
3543                    [v1,v2]
3544                 )
3545              F32 ->
3546                 getNewRegNat WordRep  `thenNat` \ v1 ->
3547                 return (
3548                    code                    `snocOL`
3549                    ST   F  src (spRel 16)  `snocOL`
3550                    LD   W  (spRel 16) v1
3551                    ,
3552                    [v1]
3553                 )
3554              other ->
3555                 getNewRegNat WordRep  `thenNat` \ v1 ->
3556                 return (
3557                    code `snocOL` OR False g0 (RIReg src) v1
3558                    ,
3559                    [v1]
3560                 )
3561 #endif /* sparc_TARGET_ARCH */
3562
3563 #if powerpc_TARGET_ARCH
3564
3565 #if darwin_TARGET_OS || linux_TARGET_OS
3566 {-
3567     The PowerPC calling convention for Darwin/Mac OS X
3568     is described in Apple's document
3569     "Inside Mac OS X - Mach-O Runtime Architecture".
3570
3571     PowerPC Linux uses the System V Release 4 Calling Convention
3572     for PowerPC. It is described in the
3573     "System V Application Binary Interface PowerPC Processor Supplement".
3574
3575     Both conventions are similar:
3576     Parameters may be passed in general-purpose registers starting at r3, in
3577     floating point registers starting at f1, or on the stack.
3578
3579     But there are substantial differences:
3580     * The number of registers used for parameter passing and the exact set of
3581       nonvolatile registers differs (see MachRegs.lhs).
3582     * On Darwin, stack space is always reserved for parameters, even if they are
3583       passed in registers. The called routine may choose to save parameters from
3584       registers to the corresponding space on the stack.
3585     * On Darwin, a corresponding amount of GPRs is skipped when a floating point
3586       parameter is passed in an FPR.
3587     * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
3588       starting with an odd-numbered GPR. It may skip a GPR to achieve this.
3589       Darwin just treats an I64 like two separate I32s (high word first).
3590     * I64 and F64 arguments are 8-byte aligned on the stack for SysV, but only
3591       4-byte aligned like everything else on Darwin.
3592     * The SysV spec claims that F32 is represented as F64 on the stack. GCC on
3593       PowerPC Linux does not agree, so neither do we.
3594
3595     According to both conventions, The parameter area should be part of the
3596     caller's stack frame, allocated in the caller's prologue code (large enough
3597     to hold the parameter lists for all called routines). The NCG already
3598     uses the stack for register spilling, leaving 64 bytes free at the top.
3599     If we need a larger parameter area than that, we just allocate a new stack
3600     frame just before ccalling.
3601 -}
3602
3603 genCCall target dest_regs argsAndHints vols
3604   = ASSERT (not $ any (`elem` [I8,I16]) argReps)
3605         -- we rely on argument promotion in the codeGen
3606     do
3607         (finalStack,passArgumentsCode,usedRegs) <- passArguments
3608                                                         (zip args argReps)
3609                                                         allArgRegs allFPArgRegs
3610                                                         initialStackOffset
3611                                                         (toOL []) []
3612
3613         (labelOrExpr, reduceToF32) <- case target of
3614             CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
3615             CmmForeignCall expr conv -> return  (Right expr, False)
3616             CmmPrim mop -> outOfLineFloatOp mop
3617
3618         let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
3619             codeAfter = move_sp_up finalStack `appOL` moveResult reduceToF32
3620
3621         case labelOrExpr of
3622             Left lbl -> do
3623                 return (         codeBefore
3624                         `snocOL` BL lbl usedRegs
3625                         `appOL`  codeAfter)
3626             Right dyn -> do
3627                 (dynReg, dynCode) <- getSomeReg dyn
3628                 return (         dynCode
3629                         `snocOL` MTCTR dynReg
3630                         `appOL`  codeBefore
3631                         `snocOL` BCTRL usedRegs
3632                         `appOL`  codeAfter)
3633     where
3634 #if darwin_TARGET_OS
3635         initialStackOffset = 24
3636             -- size of linkage area + size of arguments, in bytes
3637         stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
3638                                        map machRepByteWidth argReps
3639 #elif linux_TARGET_OS
3640         initialStackOffset = 8
3641         stackDelta finalStack = roundTo 16 finalStack
3642 #endif
3643         args = map fst argsAndHints
3644         argReps = map cmmExprRep args
3645
3646         roundTo a x | x `mod` a == 0 = x
3647                     | otherwise = x + a - (x `mod` a)
3648
3649         move_sp_down finalStack
3650                | delta > 64 =
3651                         toOL [STU I32 sp (AddrRegImm sp (ImmInt (-delta))),
3652                               DELTA (-delta)]
3653                | otherwise = nilOL
3654                where delta = stackDelta finalStack
3655         move_sp_up finalStack
3656                | delta > 64 =
3657                         toOL [ADD sp sp (RIImm (ImmInt delta)),
3658                               DELTA 0]
3659                | otherwise = nilOL
3660                where delta = stackDelta finalStack
3661
3662
3663         passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
3664         passArguments ((arg,I64):args) gprs fprs stackOffset
3665                accumCode accumUsed =
3666             do
3667                 ChildCode64 code vr_lo <- iselExpr64 arg
3668                 let vr_hi = getHiVRegFromLo vr_lo
3669
3670 #if darwin_TARGET_OS
3671                 passArguments args
3672                               (drop 2 gprs)
3673                               fprs
3674                               (stackOffset+8)
3675                               (accumCode `appOL` code
3676                                     `snocOL` storeWord vr_hi gprs stackOffset
3677                                     `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
3678                               ((take 2 gprs) ++ accumUsed)
3679             where
3680                 storeWord vr (gpr:_) offset = MR gpr vr
3681                 storeWord vr [] offset = ST I32 vr (AddrRegImm sp (ImmInt offset))
3682
3683 #elif linux_TARGET_OS
3684                 let stackOffset' = roundTo 8 stackOffset
3685                     stackCode = accumCode `appOL` code
3686                         `snocOL` ST I32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
3687                         `snocOL` ST I32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
3688                     regCode hireg loreg =
3689                         accumCode `appOL` code
3690                             `snocOL` MR hireg vr_hi
3691                             `snocOL` MR loreg vr_lo
3692
3693                 case gprs of
3694                     hireg : loreg : regs | even (length gprs) ->
3695                         passArguments args regs fprs stackOffset
3696                                       (regCode hireg loreg) (hireg : loreg : accumUsed)
3697                     _skipped : hireg : loreg : regs ->
3698                         passArguments args regs fprs stackOffset
3699                                       (regCode hireg loreg) (hireg : loreg : accumUsed)
3700                     _ -> -- only one or no regs left
3701                         passArguments args [] fprs (stackOffset'+8)
3702                                       stackCode accumUsed
3703 #endif
3704
3705         passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
3706             | reg : _ <- regs = do
3707                 register <- getRegister arg
3708                 let code = case register of
3709                             Fixed _ freg fcode -> fcode `snocOL` MR reg freg
3710                             Any _ acode -> acode reg
3711                 passArguments args
3712                               (drop nGprs gprs)
3713                               (drop nFprs fprs)
3714 #if darwin_TARGET_OS
3715         -- The Darwin ABI requires that we reserve stack slots for register parameters
3716                               (stackOffset + stackBytes)
3717 #elif linux_TARGET_OS
3718         -- ... the SysV ABI doesn't.
3719                               stackOffset
3720 #endif
3721                               (accumCode `appOL` code)
3722                               (reg : accumUsed)
3723             | otherwise = do
3724                 (vr, code) <- getSomeReg arg
3725                 passArguments args
3726                               (drop nGprs gprs)
3727                               (drop nFprs fprs)
3728                               (stackOffset' + stackBytes)
3729                               (accumCode `appOL` code `snocOL` ST rep vr stackSlot)
3730                               accumUsed
3731             where
3732 #if darwin_TARGET_OS
3733         -- stackOffset is at least 4-byte aligned
3734         -- The Darwin ABI is happy with that.
3735                 stackOffset' = stackOffset
3736 #else
3737         -- ... the SysV ABI requires 8-byte alignment for doubles.
3738                 stackOffset' | rep == F64 = roundTo 8 stackOffset
3739                              | otherwise  =           stackOffset
3740 #endif
3741                 stackSlot = AddrRegImm sp (ImmInt stackOffset')
3742                 (nGprs, nFprs, stackBytes, regs) = case rep of
3743                     I32 -> (1, 0, 4, gprs)
3744 #if darwin_TARGET_OS
3745         -- The Darwin ABI requires that we skip a corresponding number of GPRs when
3746         -- we use the FPRs.
3747                     F32 -> (1, 1, 4, fprs)
3748                     F64 -> (2, 1, 8, fprs)
3749 #elif linux_TARGET_OS
3750         -- ... the SysV ABI doesn't.
3751                     F32 -> (0, 1, 4, fprs)
3752                     F64 -> (0, 1, 8, fprs)
3753 #endif
3754
3755         moveResult reduceToF32 =
3756             case dest_regs of
3757                 [] -> nilOL
3758                 [(dest, _hint)]
3759                     | reduceToF32 && rep == F32 -> unitOL (FRSP r_dest f1)
3760                     | rep == F32 || rep == F64 -> unitOL (MR r_dest f1)
3761                     | rep == I64 -> toOL [MR (getHiVRegFromLo r_dest) r3,
3762                                           MR r_dest r4]
3763                     | otherwise -> unitOL (MR r_dest r3)
3764                     where rep = cmmRegRep dest
3765                           r_dest = getRegisterReg dest
3766
3767         outOfLineFloatOp mop =
3768             do
3769                 mopExpr <- cmmMakeDynamicReference addImportNat True $
3770                               mkForeignLabel functionName Nothing True
3771                 let mopLabelOrExpr = case mopExpr of
3772                         CmmLit (CmmLabel lbl) -> Left lbl
3773                         _ -> Right mopExpr
3774                 return (mopLabelOrExpr, reduce)
3775             where
3776                 (functionName, reduce) = case mop of
3777                     MO_F32_Exp   -> (FSLIT("exp"), True)
3778                     MO_F32_Log   -> (FSLIT("log"), True)
3779                     MO_F32_Sqrt  -> (FSLIT("sqrt"), True)
3780
3781                     MO_F32_Sin   -> (FSLIT("sin"), True)
3782                     MO_F32_Cos   -> (FSLIT("cos"), True)
3783                     MO_F32_Tan   -> (FSLIT("tan"), True)
3784
3785                     MO_F32_Asin  -> (FSLIT("asin"), True)
3786                     MO_F32_Acos  -> (FSLIT("acos"), True)
3787                     MO_F32_Atan  -> (FSLIT("atan"), True)
3788
3789                     MO_F32_Sinh  -> (FSLIT("sinh"), True)
3790                     MO_F32_Cosh  -> (FSLIT("cosh"), True)
3791                     MO_F32_Tanh  -> (FSLIT("tanh"), True)
3792                     MO_F32_Pwr   -> (FSLIT("pow"), True)
3793
3794                     MO_F64_Exp   -> (FSLIT("exp"), False)
3795                     MO_F64_Log   -> (FSLIT("log"), False)
3796                     MO_F64_Sqrt  -> (FSLIT("sqrt"), False)
3797
3798                     MO_F64_Sin   -> (FSLIT("sin"), False)
3799                     MO_F64_Cos   -> (FSLIT("cos"), False)
3800                     MO_F64_Tan   -> (FSLIT("tan"), False)
3801
3802                     MO_F64_Asin  -> (FSLIT("asin"), False)
3803                     MO_F64_Acos  -> (FSLIT("acos"), False)
3804                     MO_F64_Atan  -> (FSLIT("atan"), False)
3805
3806                     MO_F64_Sinh  -> (FSLIT("sinh"), False)
3807                     MO_F64_Cosh  -> (FSLIT("cosh"), False)
3808                     MO_F64_Tanh  -> (FSLIT("tanh"), False)
3809                     MO_F64_Pwr   -> (FSLIT("pow"), False)
3810                     other -> pprPanic "genCCall(ppc): unknown callish op"
3811                                     (pprCallishMachOp other)
3812
3813 #endif /* darwin_TARGET_OS || linux_TARGET_OS */
3814
3815 #endif /* powerpc_TARGET_ARCH */
3816
3817
3818 -- -----------------------------------------------------------------------------
3819 -- Generating a table-branch
3820
3821 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
3822
3823 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3824 genSwitch expr ids = do
3825   (reg,e_code) <- getSomeReg expr
3826   lbl <- getNewLabelNat
3827   let
3828         jumpTable = map jumpTableEntry ids
3829         op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
3830         code = e_code `appOL` toOL [
3831                 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3832                 JMP_TBL op [ id | Just id <- ids ]
3833              ]
3834   -- in
3835   return code
3836 #elif powerpc_TARGET_ARCH
3837 genSwitch expr ids
3838   | opt_PIC
3839   = do
3840         (reg,e_code) <- getSomeReg expr
3841         tmp <- getNewRegNat I32
3842         lbl <- getNewLabelNat
3843         dynRef <- cmmMakeDynamicReference addImportNat False lbl
3844         (tableReg,t_code) <- getSomeReg $ dynRef
3845         let
3846             jumpTable = map jumpTableEntryRel ids
3847
3848             jumpTableEntryRel Nothing
3849                 = CmmStaticLit (CmmInt 0 wordRep)
3850             jumpTableEntryRel (Just (BlockId id))
3851                 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3852                 where blockLabel = mkAsmTempLabel id
3853
3854             code = e_code `appOL` t_code `appOL` toOL [
3855                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3856                             SLW tmp reg (RIImm (ImmInt 2)),
3857                             LD I32 tmp (AddrRegReg tableReg tmp),
3858                             ADD tmp tmp (RIReg tableReg),
3859                             MTCTR tmp,
3860                             BCTR [ id | Just id <- ids ]
3861                     ]
3862         return code
3863   | otherwise
3864   = do
3865         (reg,e_code) <- getSomeReg expr
3866         tmp <- getNewRegNat I32
3867         lbl <- getNewLabelNat
3868         let
3869             jumpTable = map jumpTableEntry ids
3870
3871             code = e_code `appOL` toOL [
3872                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3873                             SLW tmp reg (RIImm (ImmInt 2)),
3874                             ADDIS tmp tmp (HA (ImmCLbl lbl)),
3875                             LD I32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
3876                             MTCTR tmp,
3877                             BCTR [ id | Just id <- ids ]
3878                     ]
3879         return code
3880 #else
3881 genSwitch expr ids = panic "ToDo: genSwitch"
3882 #endif
3883
3884 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordRep)
3885 jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
3886     where blockLabel = mkAsmTempLabel id
3887
3888 -- -----------------------------------------------------------------------------
3889 -- Support bits
3890 -- -----------------------------------------------------------------------------
3891
3892
3893 -- -----------------------------------------------------------------------------
3894 -- 'condIntReg' and 'condFltReg': condition codes into registers
3895
3896 -- Turn those condition codes into integers now (when they appear on
3897 -- the right hand side of an assignment).
3898 --
3899 -- (If applicable) Do not fill the delay slots here; you will confuse the
3900 -- register allocator.
3901
3902 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
3903
3904 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3905
3906 #if alpha_TARGET_ARCH
3907 condIntReg = panic "MachCode.condIntReg (not on Alpha)"
3908 condFltReg = panic "MachCode.condFltReg (not on Alpha)"
3909 #endif /* alpha_TARGET_ARCH */
3910
3911 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3912
3913 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3914
3915 condIntReg cond x y = do
3916   CondCode _ cond cond_code <- condIntCode cond x y
3917   tmp <- getNewRegNat I8
3918   let
3919         code dst = cond_code `appOL` toOL [
3920                     SETCC cond (OpReg tmp),
3921                     MOVZxL I8 (OpReg tmp) (OpReg dst)
3922                   ]
3923   -- in
3924   return (Any I32 code)
3925
3926 #endif
3927
3928 #if i386_TARGET_ARCH
3929
3930 condFltReg cond x y = do
3931   CondCode _ cond cond_code <- condFltCode cond x y
3932   tmp <- getNewRegNat I8
3933   let
3934         code dst = cond_code `appOL` toOL [
3935                     SETCC cond (OpReg tmp),
3936                     MOVZxL I8 (OpReg tmp) (OpReg dst)
3937                   ]
3938   -- in
3939   return (Any I32 code)
3940
3941 #endif
3942
3943 #if x86_64_TARGET_ARCH
3944
3945 condFltReg cond x y = do
3946   CondCode _ cond cond_code <- condFltCode cond x y
3947   tmp1 <- getNewRegNat wordRep
3948   tmp2 <- getNewRegNat wordRep
3949   let
3950         -- We have to worry about unordered operands (eg. comparisons
3951         -- against NaN).  If the operands are unordered, the comparison
3952         -- sets the parity flag, carry flag and zero flag.
3953         -- All comparisons are supposed to return false for unordered
3954         -- operands except for !=, which returns true.
3955         --
3956         -- Optimisation: we don't have to test the parity flag if we
3957         -- know the test has already excluded the unordered case: eg >
3958         -- and >= test for a zero carry flag, which can only occur for
3959         -- ordered operands.
3960         --
3961         -- ToDo: by reversing comparisons we could avoid testing the
3962         -- parity flag in more cases.
3963
3964         code dst =
3965            cond_code `appOL`
3966              (case cond of
3967                 NE  -> or_unordered dst
3968                 GU  -> plain_test   dst
3969                 GEU -> plain_test   dst
3970                 _   -> and_ordered  dst)
3971
3972         plain_test dst = toOL [
3973                     SETCC cond (OpReg tmp1),
3974                     MOVZxL I8 (OpReg tmp1) (OpReg dst)
3975                  ]
3976         or_unordered dst = toOL [
3977                     SETCC cond (OpReg tmp1),
3978                     SETCC PARITY (OpReg tmp2),
3979                     OR I8 (OpReg tmp1) (OpReg tmp2),
3980                     MOVZxL I8 (OpReg tmp2) (OpReg dst)
3981                   ]
3982         and_ordered dst = toOL [
3983                     SETCC cond (OpReg tmp1),
3984                     SETCC NOTPARITY (OpReg tmp2),
3985                     AND I8 (OpReg tmp1) (OpReg tmp2),
3986                     MOVZxL I8 (OpReg tmp2) (OpReg dst)
3987                   ]
3988   -- in
3989   return (Any I32 code)
3990
3991 #endif
3992
3993 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3994
3995 #if sparc_TARGET_ARCH
3996
3997 condIntReg EQQ x (StInt 0)
3998   = getRegister x               `thenNat` \ register ->
3999     getNewRegNat IntRep         `thenNat` \ tmp ->
4000     let
4001         code = registerCode register tmp
4002         src  = registerName register tmp
4003         code__2 dst = code `appOL` toOL [
4004             SUB False True g0 (RIReg src) g0,
4005             SUB True False g0 (RIImm (ImmInt (-1))) dst]
4006     in
4007     return (Any IntRep code__2)
4008
4009 condIntReg EQQ x y
4010   = getRegister x               `thenNat` \ register1 ->
4011     getRegister y               `thenNat` \ register2 ->
4012     getNewRegNat IntRep         `thenNat` \ tmp1 ->
4013     getNewRegNat IntRep         `thenNat` \ tmp2 ->
4014     let
4015         code1 = registerCode register1 tmp1
4016         src1  = registerName register1 tmp1
4017         code2 = registerCode register2 tmp2
4018         src2  = registerName register2 tmp2
4019         code__2 dst = code1 `appOL` code2 `appOL` toOL [
4020             XOR False src1 (RIReg src2) dst,
4021             SUB False True g0 (RIReg dst) g0,
4022             SUB True False g0 (RIImm (ImmInt (-1))) dst]
4023     in
4024     return (Any IntRep code__2)
4025
4026 condIntReg NE x (StInt 0)
4027   = getRegister x               `thenNat` \ register ->
4028     getNewRegNat IntRep         `thenNat` \ tmp ->
4029     let
4030         code = registerCode register tmp
4031         src  = registerName register tmp
4032         code__2 dst = code `appOL` toOL [
4033             SUB False True g0 (RIReg src) g0,
4034             ADD True False g0 (RIImm (ImmInt 0)) dst]
4035     in
4036     return (Any IntRep code__2)
4037
4038 condIntReg NE x y
4039   = getRegister x               `thenNat` \ register1 ->
4040     getRegister y               `thenNat` \ register2 ->
4041     getNewRegNat IntRep         `thenNat` \ tmp1 ->
4042     getNewRegNat IntRep         `thenNat` \ tmp2 ->
4043     let
4044         code1 = registerCode register1 tmp1
4045         src1  = registerName register1 tmp1
4046         code2 = registerCode register2 tmp2
4047         src2  = registerName register2 tmp2
4048         code__2 dst = code1 `appOL` code2 `appOL` toOL [
4049             XOR False src1 (RIReg src2) dst,
4050             SUB False True g0 (RIReg dst) g0,
4051             ADD True False g0 (RIImm (ImmInt 0)) dst]
4052     in
4053     return (Any IntRep code__2)
4054
4055 condIntReg cond x y
4056   = getBlockIdNat               `thenNat` \ lbl1 ->
4057     getBlockIdNat               `thenNat` \ lbl2 ->
4058     condIntCode cond x y        `thenNat` \ condition ->
4059     let
4060         code = condCode condition
4061         cond = condName condition
4062         code__2 dst = code `appOL` toOL [
4063             BI cond False (ImmCLbl lbl1), NOP,
4064             OR False g0 (RIImm (ImmInt 0)) dst,
4065             BI ALWAYS False (ImmCLbl lbl2), NOP,
4066             NEWBLOCK lbl1,
4067             OR False g0 (RIImm (ImmInt 1)) dst,
4068             NEWBLOCK lbl2]
4069     in
4070     return (Any IntRep code__2)
4071
4072 condFltReg cond x y
4073   = getBlockIdNat               `thenNat` \ lbl1 ->
4074     getBlockIdNat               `thenNat` \ lbl2 ->
4075     condFltCode cond x y        `thenNat` \ condition ->
4076     let
4077         code = condCode condition
4078         cond = condName condition
4079         code__2 dst = code `appOL` toOL [
4080             NOP,
4081             BF cond False (ImmCLbl lbl1), NOP,
4082             OR False g0 (RIImm (ImmInt 0)) dst,
4083             BI ALWAYS False (ImmCLbl lbl2), NOP,
4084             NEWBLOCK lbl1,
4085             OR False g0 (RIImm (ImmInt 1)) dst,
4086             NEWBLOCK lbl2]
4087     in
4088     return (Any IntRep code__2)
4089
4090 #endif /* sparc_TARGET_ARCH */
4091
4092 #if powerpc_TARGET_ARCH
4093 condReg getCond = do
4094     lbl1 <- getBlockIdNat
4095     lbl2 <- getBlockIdNat
4096     CondCode _ cond cond_code <- getCond
4097     let
4098 {-        code dst = cond_code `appOL` toOL [
4099                 BCC cond lbl1,
4100                 LI dst (ImmInt 0),
4101                 BCC ALWAYS lbl2,
4102                 NEWBLOCK lbl1,
4103                 LI dst (ImmInt 1),
4104                 BCC ALWAYS lbl2,
4105                 NEWBLOCK lbl2
4106             ]-}
4107         code dst = cond_code
4108             `appOL` negate_code
4109             `appOL` toOL [
4110                 MFCR dst,
4111                 RLWINM dst dst (bit + 1) 31 31
4112             ]
4113
4114         negate_code | do_negate = unitOL (CRNOR bit bit bit)
4115                     | otherwise = nilOL
4116
4117         (bit, do_negate) = case cond of
4118             LTT -> (0, False)
4119             LE  -> (1, True)
4120             EQQ -> (2, False)
4121             GE  -> (0, True)
4122             GTT -> (1, False)
4123
4124             NE  -> (2, True)
4125
4126             LU  -> (0, False)
4127             LEU -> (1, True)
4128             GEU -> (0, True)
4129             GU  -> (1, False)
4130
4131     return (Any I32 code)
4132
4133 condIntReg cond x y = condReg (condIntCode cond x y)
4134 condFltReg cond x y = condReg (condFltCode cond x y)
4135 #endif /* powerpc_TARGET_ARCH */
4136
4137
4138 -- -----------------------------------------------------------------------------
4139 -- 'trivial*Code': deal with trivial instructions
4140
4141 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
4142 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
4143 -- Only look for constants on the right hand side, because that's
4144 -- where the generic optimizer will have put them.
4145
4146 -- Similarly, for unary instructions, we don't have to worry about
4147 -- matching an StInt as the argument, because genericOpt will already
4148 -- have handled the constant-folding.
4149
4150 trivialCode
4151     :: MachRep
4152     -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
4153       ,IF_ARCH_i386 ((Operand -> Operand -> Instr)
4154                      -> Maybe (Operand -> Operand -> Instr)
4155       ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr)
4156                      -> Maybe (Operand -> Operand -> Instr)
4157       ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
4158       ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
4159       ,)))))
4160     -> CmmExpr -> CmmExpr -- the two arguments
4161     -> NatM Register
4162
4163 #ifndef powerpc_TARGET_ARCH
4164 trivialFCode
4165     :: MachRep
4166     -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
4167       ,IF_ARCH_sparc((Size -> Reg -> Reg -> Reg -> Instr)
4168       ,IF_ARCH_i386 ((MachRep -> Reg -> Reg -> Reg -> Instr)
4169       ,IF_ARCH_x86_64 ((MachRep -> Operand -> Operand -> Instr)
4170       ,))))
4171     -> CmmExpr -> CmmExpr -- the two arguments
4172     -> NatM Register
4173 #endif
4174
4175 trivialUCode
4176     :: MachRep
4177     -> IF_ARCH_alpha((RI -> Reg -> Instr)
4178       ,IF_ARCH_i386 ((Operand -> Instr)
4179       ,IF_ARCH_x86_64 ((Operand -> Instr)
4180       ,IF_ARCH_sparc((RI -> Reg -> Instr)
4181       ,IF_ARCH_powerpc((Reg -> Reg -> Instr)
4182       ,)))))
4183     -> CmmExpr  -- the one argument
4184     -> NatM Register
4185
4186 #ifndef powerpc_TARGET_ARCH
4187 trivialUFCode
4188     :: MachRep
4189     -> IF_ARCH_alpha((Reg -> Reg -> Instr)
4190       ,IF_ARCH_i386 ((Reg -> Reg -> Instr)
4191       ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr)
4192       ,IF_ARCH_sparc((Reg -> Reg -> Instr)
4193       ,))))
4194     -> CmmExpr -- the one argument
4195     -> NatM Register
4196 #endif
4197
4198 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4199
4200 #if alpha_TARGET_ARCH
4201
4202 trivialCode instr x (StInt y)
4203   | fits8Bits y
4204   = getRegister x               `thenNat` \ register ->
4205     getNewRegNat IntRep         `thenNat` \ tmp ->
4206     let
4207         code = registerCode register tmp
4208         src1 = registerName register tmp
4209         src2 = ImmInt (fromInteger y)
4210         code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst)
4211     in
4212     return (Any IntRep code__2)
4213
4214 trivialCode instr x y
4215   = getRegister x               `thenNat` \ register1 ->
4216     getRegister y               `thenNat` \ register2 ->
4217     getNewRegNat IntRep         `thenNat` \ tmp1 ->
4218     getNewRegNat IntRep         `thenNat` \ tmp2 ->
4219     let
4220         code1 = registerCode register1 tmp1 []
4221         src1  = registerName register1 tmp1
4222         code2 = registerCode register2 tmp2 []
4223         src2  = registerName register2 tmp2
4224         code__2 dst = asmSeqThen [code1, code2] .
4225                      mkSeqInstr (instr src1 (RIReg src2) dst)
4226     in
4227     return (Any IntRep code__2)
4228
4229 ------------
4230 trivialUCode instr x
4231   = getRegister x               `thenNat` \ register ->
4232     getNewRegNat IntRep         `thenNat` \ tmp ->
4233     let
4234         code = registerCode register tmp
4235         src  = registerName register tmp
4236         code__2 dst = code . mkSeqInstr (instr (RIReg src) dst)
4237     in
4238     return (Any IntRep code__2)
4239
4240 ------------
4241 trivialFCode _ instr x y
4242   = getRegister x               `thenNat` \ register1 ->
4243     getRegister y               `thenNat` \ register2 ->
4244     getNewRegNat F64    `thenNat` \ tmp1 ->
4245     getNewRegNat F64    `thenNat` \ tmp2 ->
4246     let
4247         code1 = registerCode register1 tmp1
4248         src1  = registerName register1 tmp1
4249
4250         code2 = registerCode register2 tmp2
4251         src2  = registerName register2 tmp2
4252
4253         code__2 dst = asmSeqThen [code1 [], code2 []] .
4254                       mkSeqInstr (instr src1 src2 dst)
4255     in
4256     return (Any F64 code__2)
4257
4258 trivialUFCode _ instr x
4259   = getRegister x               `thenNat` \ register ->
4260     getNewRegNat F64    `thenNat` \ tmp ->
4261     let
4262         code = registerCode register tmp
4263         src  = registerName register tmp
4264         code__2 dst = code . mkSeqInstr (instr src dst)
4265     in
4266     return (Any F64 code__2)
4267
4268 #endif /* alpha_TARGET_ARCH */
4269
4270 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4271
4272 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4273
4274 {-
4275 The Rules of the Game are:
4276
4277 * You cannot assume anything about the destination register dst;
4278   it may be anything, including a fixed reg.
4279
4280 * You may compute an operand into a fixed reg, but you may not
4281   subsequently change the contents of that fixed reg.  If you
4282   want to do so, first copy the value either to a temporary
4283   or into dst.  You are free to modify dst even if it happens
4284   to be a fixed reg -- that's not your problem.
4285
4286 * You cannot assume that a fixed reg will stay live over an
4287   arbitrary computation.  The same applies to the dst reg.
4288
4289 * Temporary regs obtained from getNewRegNat are distinct from
4290   each other and from all other regs, and stay live over
4291   arbitrary computations.
4292
4293 --------------------
4294
4295 SDM's version of The Rules:
4296
4297 * If getRegister returns Any, that means it can generate correct
4298   code which places the result in any register, period.  Even if that
4299   register happens to be read during the computation.
4300
4301   Corollary #1: this means that if you are generating code for an
4302   operation with two arbitrary operands, you cannot assign the result
4303   of the first operand into the destination register before computing
4304   the second operand.  The second operand might require the old value
4305   of the destination register.
4306
4307   Corollary #2: A function might be able to generate more efficient
4308   code if it knows the destination register is a new temporary (and
4309   therefore not read by any of the sub-computations).
4310
4311 * If getRegister returns Any, then the code it generates may modify only:
4312         (a) fresh temporaries
4313         (b) the destination register
4314         (c) known registers (eg. %ecx is used by shifts)
4315   In particular, it may *not* modify global registers, unless the global
4316   register happens to be the destination register.
4317 -}
4318
4319 trivialCode rep instr (Just revinstr) (CmmLit lit_a) b
4320   | not (is64BitLit lit_a) = do
4321   b_code <- getAnyReg b
4322   let
4323        code dst
4324          = b_code dst `snocOL`
4325            revinstr (OpImm (litToImm lit_a)) (OpReg dst)
4326   -- in
4327   return (Any rep code)
4328
4329 trivialCode rep instr maybe_revinstr a b = genTrivialCode rep instr a b
4330
4331 -- This is re-used for floating pt instructions too.
4332 genTrivialCode rep instr a b = do
4333   (b_op, b_code) <- getNonClobberedOperand b
4334   a_code <- getAnyReg a
4335   tmp <- getNewRegNat rep
4336   let
4337      -- We want the value of b to stay alive across the computation of a.
4338      -- But, we want to calculate a straight into the destination register,
4339      -- because the instruction only has two operands (dst := dst `op` src).
4340      -- The troublesome case is when the result of b is in the same register
4341      -- as the destination reg.  In this case, we have to save b in a
4342      -- new temporary across the computation of a.
4343      code dst
4344         | dst `regClashesWithOp` b_op =
4345                 b_code `appOL`
4346                 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
4347                 a_code dst `snocOL`
4348                 instr (OpReg tmp) (OpReg dst)
4349         | otherwise =
4350                 b_code `appOL`
4351                 a_code dst `snocOL`
4352                 instr b_op (OpReg dst)
4353   -- in
4354   return (Any rep code)
4355
4356 reg `regClashesWithOp` OpReg reg2   = reg == reg2
4357 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
4358 reg `regClashesWithOp` _            = False
4359
4360 -----------
4361
4362 trivialUCode rep instr x = do
4363   x_code <- getAnyReg x
4364   let
4365      code dst =
4366         x_code dst `snocOL`
4367         instr (OpReg dst)
4368   -- in
4369   return (Any rep code)
4370
4371 -----------
4372
4373 #if i386_TARGET_ARCH
4374
4375 trivialFCode pk instr x y = do
4376   (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
4377   (y_reg, y_code) <- getSomeReg y
4378   let
4379      code dst =
4380         x_code `appOL`
4381         y_code `snocOL`
4382         instr pk x_reg y_reg dst
4383   -- in
4384   return (Any pk code)
4385
4386 #endif
4387
4388 #if x86_64_TARGET_ARCH
4389
4390 trivialFCode pk instr x y = genTrivialCode  pk (instr pk) x y
4391
4392 #endif
4393
4394 -------------
4395
4396 trivialUFCode rep instr x = do
4397   (x_reg, x_code) <- getSomeReg x
4398   let
4399      code dst =
4400         x_code `snocOL`
4401         instr x_reg dst
4402   -- in
4403   return (Any rep code)
4404
4405 #endif /* i386_TARGET_ARCH */
4406
4407 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4408
4409 #if sparc_TARGET_ARCH
4410
4411 trivialCode instr x (StInt y)
4412   | fits13Bits y
4413   = getRegister x               `thenNat` \ register ->
4414     getNewRegNat IntRep         `thenNat` \ tmp ->
4415     let
4416         code = registerCode register tmp
4417         src1 = registerName register tmp
4418         src2 = ImmInt (fromInteger y)
4419         code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
4420     in
4421     return (Any IntRep code__2)
4422
4423 trivialCode instr x y
4424   = getRegister x               `thenNat` \ register1 ->
4425     getRegister y               `thenNat` \ register2 ->
4426     getNewRegNat IntRep         `thenNat` \ tmp1 ->
4427     getNewRegNat IntRep         `thenNat` \ tmp2 ->
4428     let
4429         code1 = registerCode register1 tmp1
4430         src1  = registerName register1 tmp1
4431         code2 = registerCode register2 tmp2
4432         src2  = registerName register2 tmp2
4433         code__2 dst = code1 `appOL` code2 `snocOL`
4434                       instr src1 (RIReg src2) dst
4435     in
4436     return (Any IntRep code__2)
4437
4438 ------------
4439 trivialFCode pk instr x y
4440   = getRegister x               `thenNat` \ register1 ->
4441     getRegister y               `thenNat` \ register2 ->
4442     getNewRegNat (registerRep register1)
4443                                 `thenNat` \ tmp1 ->
4444     getNewRegNat (registerRep register2)
4445                                 `thenNat` \ tmp2 ->
4446     getNewRegNat F64    `thenNat` \ tmp ->
4447     let
4448         promote x = FxTOy F DF x tmp
4449
4450         pk1   = registerRep register1
4451         code1 = registerCode register1 tmp1
4452         src1  = registerName register1 tmp1
4453
4454         pk2   = registerRep register2
4455         code2 = registerCode register2 tmp2
4456         src2  = registerName register2 tmp2
4457
4458         code__2 dst =
4459                 if pk1 == pk2 then
4460                     code1 `appOL` code2 `snocOL`
4461                     instr (primRepToSize pk) src1 src2 dst
4462                 else if pk1 == F32 then
4463                     code1 `snocOL` promote src1 `appOL` code2 `snocOL`
4464                     instr DF tmp src2 dst
4465                 else
4466                     code1 `appOL` code2 `snocOL` promote src2 `snocOL`
4467                     instr DF src1 tmp dst
4468     in
4469     return (Any (if pk1 == pk2 then pk1 else F64) code__2)
4470
4471 ------------
4472 trivialUCode instr x
4473   = getRegister x               `thenNat` \ register ->
4474     getNewRegNat IntRep         `thenNat` \ tmp ->
4475     let
4476         code = registerCode register tmp
4477         src  = registerName register tmp
4478         code__2 dst = code `snocOL` instr (RIReg src) dst
4479     in
4480     return (Any IntRep code__2)
4481
4482 -------------
4483 trivialUFCode pk instr x
4484   = getRegister x               `thenNat` \ register ->
4485     getNewRegNat pk             `thenNat` \ tmp ->
4486     let
4487         code = registerCode register tmp
4488         src  = registerName register tmp
4489         code__2 dst = code `snocOL` instr src dst
4490     in
4491     return (Any pk code__2)
4492
4493 #endif /* sparc_TARGET_ARCH */
4494
4495 #if powerpc_TARGET_ARCH
4496
4497 {-
4498 Wolfgang's PowerPC version of The Rules:
4499
4500 A slightly modified version of The Rules to take advantage of the fact
4501 that PowerPC instructions work on all registers and don't implicitly
4502 clobber any fixed registers.
4503
4504 * The only expression for which getRegister returns Fixed is (CmmReg reg).
4505
4506 * If getRegister returns Any, then the code it generates may modify only:
4507         (a) fresh temporaries
4508         (b) the destination register
4509   It may *not* modify global registers, unless the global
4510   register happens to be the destination register.
4511   It may not clobber any other registers. In fact, only ccalls clobber any
4512   fixed registers.
4513   Also, it may not modify the counter register (used by genCCall).
4514
4515   Corollary: If a getRegister for a subexpression returns Fixed, you need
4516   not move it to a fresh temporary before evaluating the next subexpression.
4517   The Fixed register won't be modified.
4518   Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
4519
4520 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
4521   the value of the destination register.
4522 -}
4523
4524 trivialCode rep signed instr x (CmmLit (CmmInt y _))
4525     | Just imm <- makeImmediate rep signed y
4526     = do
4527         (src1, code1) <- getSomeReg x
4528         let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
4529         return (Any rep code)
4530
4531 trivialCode rep signed instr x y = do
4532     (src1, code1) <- getSomeReg x
4533     (src2, code2) <- getSomeReg y
4534     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
4535     return (Any rep code)
4536
4537 trivialCodeNoImm :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4538     -> CmmExpr -> CmmExpr -> NatM Register
4539 trivialCodeNoImm rep instr x y = do
4540     (src1, code1) <- getSomeReg x
4541     (src2, code2) <- getSomeReg y
4542     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
4543     return (Any rep code)
4544
4545 trivialUCode rep instr x = do
4546     (src, code) <- getSomeReg x
4547     let code' dst = code `snocOL` instr dst src
4548     return (Any rep code')
4549
4550 -- There is no "remainder" instruction on the PPC, so we have to do
4551 -- it the hard way.
4552 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
4553
4554 remainderCode :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4555     -> CmmExpr -> CmmExpr -> NatM Register
4556 remainderCode rep div x y = do
4557     (src1, code1) <- getSomeReg x
4558     (src2, code2) <- getSomeReg y
4559     let code dst = code1 `appOL` code2 `appOL` toOL [
4560                 div dst src1 src2,
4561                 MULLW dst dst (RIReg src2),
4562                 SUBF dst dst src1
4563             ]
4564     return (Any rep code)
4565
4566 #endif /* powerpc_TARGET_ARCH */
4567
4568
4569 -- -----------------------------------------------------------------------------
4570 --  Coercing to/from integer/floating-point...
4571
4572 -- @coerce(Int2FP|FP2Int)@ are more complicated integer/float
4573 -- conversions.  We have to store temporaries in memory to move
4574 -- between the integer and the floating point register sets.
4575
4576 -- @coerceDbl2Flt@ and @coerceFlt2Dbl@ are done this way because we
4577 -- pretend, on sparc at least, that double and float regs are seperate
4578 -- kinds, so the value has to be computed into one kind before being
4579 -- explicitly "converted" to live in the other kind.
4580
4581 coerceInt2FP :: MachRep -> MachRep -> CmmExpr -> NatM Register
4582 coerceFP2Int :: MachRep -> MachRep -> CmmExpr -> NatM Register
4583
4584 #if sparc_TARGET_ARCH
4585 coerceDbl2Flt :: CmmExpr -> NatM Register
4586 coerceFlt2Dbl :: CmmExpr -> NatM Register
4587 #endif
4588
4589 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4590
4591 #if alpha_TARGET_ARCH
4592
4593 coerceInt2FP _ x
4594   = getRegister x               `thenNat` \ register ->
4595     getNewRegNat IntRep         `thenNat` \ reg ->
4596     let
4597         code = registerCode register reg
4598         src  = registerName register reg
4599
4600         code__2 dst = code . mkSeqInstrs [
4601             ST Q src (spRel 0),
4602             LD TF dst (spRel 0),
4603             CVTxy Q TF dst dst]
4604     in
4605     return (Any F64 code__2)
4606
4607 -------------
4608 coerceFP2Int x
4609   = getRegister x               `thenNat` \ register ->
4610     getNewRegNat F64    `thenNat` \ tmp ->
4611     let
4612         code = registerCode register tmp
4613         src  = registerName register tmp
4614
4615         code__2 dst = code . mkSeqInstrs [
4616             CVTxy TF Q src tmp,
4617             ST TF tmp (spRel 0),
4618             LD Q dst (spRel 0)]
4619     in
4620     return (Any IntRep code__2)
4621
4622 #endif /* alpha_TARGET_ARCH */
4623
4624 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4625
4626 #if i386_TARGET_ARCH
4627
4628 coerceInt2FP from to x = do
4629   (x_reg, x_code) <- getSomeReg x
4630   let
4631         opc  = case to of F32 -> GITOF; F64 -> GITOD
4632         code dst = x_code `snocOL` opc x_reg dst
4633         -- ToDo: works for non-I32 reps?
4634   -- in
4635   return (Any to code)
4636
4637 ------------
4638
4639 coerceFP2Int from to x = do
4640   (x_reg, x_code) <- getSomeReg x
4641   let
4642         opc  = case from of F32 -> GFTOI; F64 -> GDTOI
4643         code dst = x_code `snocOL` opc x_reg dst
4644         -- ToDo: works for non-I32 reps?
4645   -- in
4646   return (Any to code)
4647
4648 #endif /* i386_TARGET_ARCH */
4649
4650 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4651
4652 #if x86_64_TARGET_ARCH
4653
4654 coerceFP2Int from to x = do
4655   (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
4656   let
4657         opc  = case from of F32 -> CVTSS2SI; F64 -> CVTSD2SI
4658         code dst = x_code `snocOL` opc x_op dst
4659   -- in
4660   return (Any to code) -- works even if the destination rep is <I32
4661
4662 coerceInt2FP from to x = do
4663   (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
4664   let
4665         opc  = case to of F32 -> CVTSI2SS; F64 -> CVTSI2SD
4666         code dst = x_code `snocOL` opc x_op dst
4667   -- in
4668   return (Any to code) -- works even if the destination rep is <I32
4669
4670 coerceFP2FP :: MachRep -> CmmExpr -> NatM Register
4671 coerceFP2FP to x = do
4672   (x_reg, x_code) <- getSomeReg x
4673   let
4674         opc  = case to of F32 -> CVTSD2SS; F64 -> CVTSS2SD
4675         code dst = x_code `snocOL` opc x_reg dst
4676   -- in
4677   return (Any to code)
4678
4679 #endif
4680
4681 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4682
4683 #if sparc_TARGET_ARCH
4684
4685 coerceInt2FP pk x
4686   = getRegister x               `thenNat` \ register ->
4687     getNewRegNat IntRep         `thenNat` \ reg ->
4688     let
4689         code = registerCode register reg
4690         src  = registerName register reg
4691
4692         code__2 dst = code `appOL` toOL [
4693             ST W src (spRel (-2)),
4694             LD W (spRel (-2)) dst,
4695             FxTOy W (primRepToSize pk) dst dst]
4696     in
4697     return (Any pk code__2)
4698
4699 ------------
4700 coerceFP2Int fprep x
4701   = ASSERT(fprep == F64 || fprep == F32)
4702     getRegister x               `thenNat` \ register ->
4703     getNewRegNat fprep          `thenNat` \ reg ->
4704     getNewRegNat F32    `thenNat` \ tmp ->
4705     let
4706         code = registerCode register reg
4707         src  = registerName register reg
4708         code__2 dst = code `appOL` toOL [
4709             FxTOy (primRepToSize fprep) W src tmp,
4710             ST W tmp (spRel (-2)),
4711             LD W (spRel (-2)) dst]
4712     in
4713     return (Any IntRep code__2)
4714
4715 ------------
4716 coerceDbl2Flt x
4717   = getRegister x               `thenNat` \ register ->
4718     getNewRegNat F64    `thenNat` \ tmp ->
4719     let code = registerCode register tmp
4720         src  = registerName register tmp
4721     in
4722         return (Any F32
4723                        (\dst -> code `snocOL` FxTOy DF F src dst))
4724
4725 ------------
4726 coerceFlt2Dbl x
4727   = getRegister x               `thenNat` \ register ->
4728     getNewRegNat F32    `thenNat` \ tmp ->
4729     let code = registerCode register tmp
4730         src  = registerName register tmp
4731     in
4732         return (Any F64
4733                        (\dst -> code `snocOL` FxTOy F DF src dst))
4734
4735 #endif /* sparc_TARGET_ARCH */
4736
4737 #if powerpc_TARGET_ARCH
4738 coerceInt2FP fromRep toRep x = do
4739     (src, code) <- getSomeReg x
4740     lbl <- getNewLabelNat
4741     itmp <- getNewRegNat I32
4742     ftmp <- getNewRegNat F64
4743     dynRef <- cmmMakeDynamicReference addImportNat False lbl
4744     Amode addr addr_code <- getAmode dynRef
4745     let
4746         code' dst = code `appOL` maybe_exts `appOL` toOL [
4747                 LDATA ReadOnlyData
4748                                 [CmmDataLabel lbl,
4749                                  CmmStaticLit (CmmInt 0x43300000 I32),
4750                                  CmmStaticLit (CmmInt 0x80000000 I32)],
4751                 XORIS itmp src (ImmInt 0x8000),
4752                 ST I32 itmp (spRel 3),
4753                 LIS itmp (ImmInt 0x4330),
4754                 ST I32 itmp (spRel 2),
4755                 LD F64 ftmp (spRel 2)
4756             ] `appOL` addr_code `appOL` toOL [
4757                 LD F64 dst addr,
4758                 FSUB F64 dst ftmp dst
4759             ] `appOL` maybe_frsp dst
4760
4761         maybe_exts = case fromRep of
4762                         I8 ->  unitOL $ EXTS I8 src src
4763                         I16 -> unitOL $ EXTS I16 src src
4764                         I32 -> nilOL
4765         maybe_frsp dst = case toRep of
4766                         F32 -> unitOL $ FRSP dst dst
4767                         F64 -> nilOL
4768     return (Any toRep code')
4769
4770 coerceFP2Int fromRep toRep x = do
4771     -- the reps don't really matter: F*->F64 and I32->I* are no-ops
4772     (src, code) <- getSomeReg x
4773     tmp <- getNewRegNat F64
4774     let
4775         code' dst = code `appOL` toOL [
4776                 -- convert to int in FP reg
4777             FCTIWZ tmp src,
4778                 -- store value (64bit) from FP to stack
4779             ST F64 tmp (spRel 2),
4780                 -- read low word of value (high word is undefined)
4781             LD I32 dst (spRel 3)]
4782     return (Any toRep code')
4783 #endif /* powerpc_TARGET_ARCH */
4784
4785
4786 -- -----------------------------------------------------------------------------
4787 -- eXTRA_STK_ARGS_HERE
4788
4789 -- We (allegedly) put the first six C-call arguments in registers;
4790 -- where do we start putting the rest of them?
4791
4792 -- Moved from MachInstrs (SDM):
4793
4794 #if alpha_TARGET_ARCH || sparc_TARGET_ARCH
4795 eXTRA_STK_ARGS_HERE :: Int
4796 eXTRA_STK_ARGS_HERE
4797   = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???))
4798 #endif
4799