compiler/nativeGen/MachCodeGen.hs

   1 -----------------------------------------------------------------------------
   2 --
   3 -- Generating machine code (instruction selection)
   4 --
   5 -- (c) The University of Glasgow 1996-2004
   6 --
   7 -----------------------------------------------------------------------------
   8
   9 -- This is a big module, but, if you pay attention to
  10 -- (a) the sectioning, (b) the type signatures, and
  11 -- (c) the #if blah_TARGET_ARCH} things, the
  12 -- structure should not be too overwhelming.
  13
  14 module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where
  15
  16 #include "HsVersions.h"
  17 #include "nativeGen/NCG.h"
  18 #include "MachDeps.h"
  19
  20 -- NCG stuff:
  21 import MachInstrs
  22 import MachRegs
  23 import NCGMonad
  24 import PositionIndependentCode
  25 import RegAllocInfo ( mkBranchInstr )
  26
  27 -- Our intermediate code:
  28 import PprCmm           ( pprExpr )
  29 import Cmm
  30 import MachOp
  31 import CLabel
  32 import ClosureInfo      ( C_SRT(..) )
  33
  34 -- The rest:
  35 import StaticFlags      ( opt_PIC )
  36 import ForeignCall      ( CCallConv(..) )
  37 import OrdList
  38 import Pretty
  39 import Outputable
  40 import FastString
  41 import FastTypes        ( isFastTrue )
  42 import Constants        ( wORD_SIZE )
  43
  44 #ifdef DEBUG
  45 import Outputable       ( assertPanic )
  46 import Debug.Trace      ( trace )
  47 #endif
  48
  49 import Control.Monad    ( mapAndUnzipM )
  50 import Data.Maybe       ( fromJust )
  51 import Data.Bits
  52 import Data.Word
  53 import Data.Int
  54
  55 -- -----------------------------------------------------------------------------
  56 -- Top-level of the instruction selector
  57
  58 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
  59 -- They are really trees of insns to facilitate fast appending, where a
  60 -- left-to-right traversal (pre-order?) yields the insns in the correct
  61 -- order.
  62
  63 type InstrBlock = OrdList Instr
  64
  65 cmmTopCodeGen :: RawCmmTop -> NatM [NatCmmTop]
  66 cmmTopCodeGen (CmmProc info lab params blocks) = do
  67   (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
  68   picBaseMb <- getPicBaseMaybeNat
  69   let proc = CmmProc info lab params (concat nat_blocks)
  70       tops = proc : concat statics
  71   case picBaseMb of
  72       Just picBase -> initializePicBase picBase tops
  73       Nothing -> return tops
  74
  75 cmmTopCodeGen (CmmData sec dat) = do
  76   return [CmmData sec dat]  -- no translation, we just use CmmStatic
  77
  78 basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop])
  79 basicBlockCodeGen (BasicBlock id stmts) = do
  80   instrs <- stmtsToInstrs stmts
  81   -- code generation may introduce new basic block boundaries, which
  82   -- are indicated by the NEWBLOCK instruction.  We must split up the
  83   -- instruction stream into basic blocks again.  Also, we extract
  84   -- LDATAs here too.
  85   let
  86         (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
  87
  88         mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
  89           = ([], BasicBlock id instrs : blocks, statics)
  90         mkBlocks (LDATA sec dat) (instrs,blocks,statics)
  91           = (instrs, blocks, CmmData sec dat:statics)
  92         mkBlocks instr (instrs,blocks,statics)
  93           = (instr:instrs, blocks, statics)
  94   -- in
  95   return (BasicBlock id top : other_blocks, statics)
  96
  97 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
  98 stmtsToInstrs stmts
  99    = do instrss <- mapM stmtToInstrs stmts
 100         return (concatOL instrss)
 101
 102 stmtToInstrs :: CmmStmt -> NatM InstrBlock
 103 stmtToInstrs stmt = case stmt of
 104     CmmNop         -> return nilOL
 105     CmmComment s   -> return (unitOL (COMMENT s))
 106
 107     CmmAssign reg src
 108       | isFloatingRep kind -> assignReg_FltCode kind reg src
 109 #if WORD_SIZE_IN_BITS==32
 110       | kind == I64        -> assignReg_I64Code      reg src
 111 #endif
 112       | otherwise          -> assignReg_IntCode kind reg src
 113         where kind = cmmRegRep reg
 114
 115     CmmStore addr src
 116       | isFloatingRep kind -> assignMem_FltCode kind addr src
 117 #if WORD_SIZE_IN_BITS==32
 118       | kind == I64      -> assignMem_I64Code      addr src
 119 #endif
 120       | otherwise        -> assignMem_IntCode kind addr src
 121         where kind = cmmExprRep src
 122
 123     CmmCall target result_regs args _
 124        -> genCCall target result_regs args
 125
 126     CmmBranch id          -> genBranch id
 127     CmmCondBranch arg id  -> genCondJump id arg
 128     CmmSwitch arg ids     -> genSwitch arg ids
 129     CmmJump arg params    -> genJump arg
 130
 131 -- -----------------------------------------------------------------------------
 132 -- General things for putting together code sequences
 133
 134 -- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
 135 -- CmmExprs into CmmRegOff?
 136 mangleIndexTree :: CmmExpr -> CmmExpr
 137 mangleIndexTree (CmmRegOff reg off)
 138   = CmmMachOp (MO_Add rep) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) rep)]
 139   where rep = cmmRegRep reg
 140
 141 -- -----------------------------------------------------------------------------
 142 --  Code gen for 64-bit arithmetic on 32-bit platforms
 143
 144 {-
 145 Simple support for generating 64-bit code (ie, 64 bit values and 64
 146 bit assignments) on 32-bit platforms.  Unlike the main code generator
 147 we merely shoot for generating working code as simply as possible, and
 148 pay little attention to code quality.  Specifically, there is no
 149 attempt to deal cleverly with the fixed-vs-floating register
 150 distinction; all values are generated into (pairs of) floating
 151 registers, even if this would mean some redundant reg-reg moves as a
 152 result.  Only one of the VRegUniques is returned, since it will be
 153 of the VRegUniqueLo form, and the upper-half VReg can be determined
 154 by applying getHiVRegFromLo to it.
 155 -}
 156
 157 data ChildCode64        -- a.k.a "Register64"
 158    = ChildCode64
 159         InstrBlock      -- code
 160         Reg             -- the lower 32-bit temporary which contains the
 161                         -- result; use getHiVRegFromLo to find the other
 162                         -- VRegUnique.  Rules of this simplified insn
 163                         -- selection game are therefore that the returned
 164                         -- Reg may be modified
 165
 166 #if WORD_SIZE_IN_BITS==32
 167 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
 168 assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
 169 #endif
 170
 171 #ifndef x86_64_TARGET_ARCH
 172 iselExpr64        :: CmmExpr -> NatM ChildCode64
 173 #endif
 174
 175 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 176
 177 #if i386_TARGET_ARCH
 178
 179 assignMem_I64Code addrTree valueTree = do
 180   Amode addr addr_code <- getAmode addrTree
 181   ChildCode64 vcode rlo <- iselExpr64 valueTree
 182   let
 183         rhi = getHiVRegFromLo rlo
 184
 185         -- Little-endian store
 186         mov_lo = MOV I32 (OpReg rlo) (OpAddr addr)
 187         mov_hi = MOV I32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
 188   -- in
 189   return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 190
 191
 192 assignReg_I64Code (CmmLocal (LocalReg u_dst pk _)) valueTree = do
 193    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 194    let
 195          r_dst_lo = mkVReg u_dst I32
 196          r_dst_hi = getHiVRegFromLo r_dst_lo
 197          r_src_hi = getHiVRegFromLo r_src_lo
 198          mov_lo = MOV I32 (OpReg r_src_lo) (OpReg r_dst_lo)
 199          mov_hi = MOV I32 (OpReg r_src_hi) (OpReg r_dst_hi)
 200    -- in
 201    return (
 202         vcode `snocOL` mov_lo `snocOL` mov_hi
 203      )
 204
 205 assignReg_I64Code lvalue valueTree
 206    = panic "assignReg_I64Code(i386): invalid lvalue"
 207
 208 ------------
 209
 210 iselExpr64 (CmmLit (CmmInt i _)) = do
 211   (rlo,rhi) <- getNewRegPairNat I32
 212   let
 213         r = fromIntegral (fromIntegral i :: Word32)
 214         q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
 215         code = toOL [
 216                 MOV I32 (OpImm (ImmInteger r)) (OpReg rlo),
 217                 MOV I32 (OpImm (ImmInteger q)) (OpReg rhi)
 218                 ]
 219   -- in
 220   return (ChildCode64 code rlo)
 221
 222 iselExpr64 (CmmLoad addrTree I64) = do
 223    Amode addr addr_code <- getAmode addrTree
 224    (rlo,rhi) <- getNewRegPairNat I32
 225    let
 226         mov_lo = MOV I32 (OpAddr addr) (OpReg rlo)
 227         mov_hi = MOV I32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
 228    -- in
 229    return (
 230             ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
 231                         rlo
 232      )
 233
 234 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64 _)))
 235    = return (ChildCode64 nilOL (mkVReg vu I32))
 236
 237 -- we handle addition, but rather badly
 238 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
 239    ChildCode64 code1 r1lo <- iselExpr64 e1
 240    (rlo,rhi) <- getNewRegPairNat I32
 241    let
 242         r = fromIntegral (fromIntegral i :: Word32)
 243         q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
 244         r1hi = getHiVRegFromLo r1lo
 245         code =  code1 `appOL`
 246                 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
 247                        ADD I32 (OpImm (ImmInteger r)) (OpReg rlo),
 248                        MOV I32 (OpReg r1hi) (OpReg rhi),
 249                        ADC I32 (OpImm (ImmInteger q)) (OpReg rhi) ]
 250    -- in
 251    return (ChildCode64 code rlo)
 252
 253 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
 254    ChildCode64 code1 r1lo <- iselExpr64 e1
 255    ChildCode64 code2 r2lo <- iselExpr64 e2
 256    (rlo,rhi) <- getNewRegPairNat I32
 257    let
 258         r1hi = getHiVRegFromLo r1lo
 259         r2hi = getHiVRegFromLo r2lo
 260         code =  code1 `appOL`
 261                 code2 `appOL`
 262                 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
 263                        ADD I32 (OpReg r2lo) (OpReg rlo),
 264                        MOV I32 (OpReg r1hi) (OpReg rhi),
 265                        ADC I32 (OpReg r2hi) (OpReg rhi) ]
 266    -- in
 267    return (ChildCode64 code rlo)
 268
 269 iselExpr64 (CmmMachOp (MO_U_Conv _ I64) [expr]) = do
 270      fn <- getAnyReg expr
 271      r_dst_lo <-  getNewRegNat I32
 272      let r_dst_hi = getHiVRegFromLo r_dst_lo
 273          code = fn r_dst_lo
 274      return (
 275              ChildCode64 (code `snocOL`
 276                           MOV I32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
 277                           r_dst_lo
 278             )
 279
 280 iselExpr64 expr
 281    = pprPanic "iselExpr64(i386)" (ppr expr)
 282
 283 #endif /* i386_TARGET_ARCH */
 284
 285 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 286
 287 #if sparc_TARGET_ARCH
 288
 289 assignMem_I64Code addrTree valueTree = do
 290      Amode addr addr_code <- getAmode addrTree
 291      ChildCode64 vcode rlo <- iselExpr64 valueTree
 292      (src, code) <- getSomeReg addrTree
 293      let
 294          rhi = getHiVRegFromLo rlo
 295          -- Big-endian store
 296          mov_hi = ST I32 rhi (AddrRegImm src (ImmInt 0))
 297          mov_lo = ST I32 rlo (AddrRegImm src (ImmInt 4))
 298      return (vcode `appOL` code `snocOL` mov_hi `snocOL` mov_lo)
 299
 300 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
 301      ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 302      let
 303          r_dst_lo = mkVReg u_dst pk
 304          r_dst_hi = getHiVRegFromLo r_dst_lo
 305          r_src_hi = getHiVRegFromLo r_src_lo
 306          mov_lo = mkMOV r_src_lo r_dst_lo
 307          mov_hi = mkMOV r_src_hi r_dst_hi
 308          mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
 309      return (vcode `snocOL` mov_hi `snocOL` mov_lo)
 310 assignReg_I64Code lvalue valueTree
 311    = panic "assignReg_I64Code(sparc): invalid lvalue"
 312
 313
 314 -- Don't delete this -- it's very handy for debugging.
 315 --iselExpr64 expr
 316 --   | trace ("iselExpr64: " ++ showSDoc (ppr expr)) False
 317 --   = panic "iselExpr64(???)"
 318
 319 iselExpr64 (CmmLoad addrTree I64) = do
 320      Amode (AddrRegReg r1 r2) addr_code <- getAmode addrTree
 321      rlo <- getNewRegNat I32
 322      let rhi = getHiVRegFromLo rlo
 323          mov_hi = LD I32 (AddrRegImm r1 (ImmInt 0)) rhi
 324          mov_lo = LD I32 (AddrRegImm r1 (ImmInt 4)) rlo
 325      return (
 326             ChildCode64 (addr_code `snocOL` mov_hi `snocOL` mov_lo)
 327                          rlo
 328           )
 329
 330 iselExpr64 (CmmReg (CmmLocal (LocalReg uq I64))) = do
 331      r_dst_lo <-  getNewRegNat I32
 332      let r_dst_hi = getHiVRegFromLo r_dst_lo
 333          r_src_lo = mkVReg uq I32
 334          r_src_hi = getHiVRegFromLo r_src_lo
 335          mov_lo = mkMOV r_src_lo r_dst_lo
 336          mov_hi = mkMOV r_src_hi r_dst_hi
 337          mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
 338      return (
 339             ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo
 340          )
 341
 342 iselExpr64 expr
 343    = pprPanic "iselExpr64(sparc)" (ppr expr)
 344
 345 #endif /* sparc_TARGET_ARCH */
 346
 347 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 348
 349 #if powerpc_TARGET_ARCH
 350
 351 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
 352 getI64Amodes addrTree = do
 353     Amode hi_addr addr_code <- getAmode addrTree
 354     case addrOffset hi_addr 4 of
 355         Just lo_addr -> return (hi_addr, lo_addr, addr_code)
 356         Nothing      -> do (hi_ptr, code) <- getSomeReg addrTree
 357                            return (AddrRegImm hi_ptr (ImmInt 0),
 358                                    AddrRegImm hi_ptr (ImmInt 4),
 359                                    code)
 360
 361 assignMem_I64Code addrTree valueTree = do
 362         (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
 363         ChildCode64 vcode rlo <- iselExpr64 valueTree
 364         let
 365                 rhi = getHiVRegFromLo rlo
 366
 367                 -- Big-endian store
 368                 mov_hi = ST I32 rhi hi_addr
 369                 mov_lo = ST I32 rlo lo_addr
 370         -- in
 371         return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 372
 373 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
 374    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 375    let
 376          r_dst_lo = mkVReg u_dst I32
 377          r_dst_hi = getHiVRegFromLo r_dst_lo
 378          r_src_hi = getHiVRegFromLo r_src_lo
 379          mov_lo = MR r_dst_lo r_src_lo
 380          mov_hi = MR r_dst_hi r_src_hi
 381    -- in
 382    return (
 383         vcode `snocOL` mov_lo `snocOL` mov_hi
 384      )
 385
 386 assignReg_I64Code lvalue valueTree
 387    = panic "assignReg_I64Code(powerpc): invalid lvalue"
 388
 389
 390 -- Don't delete this -- it's very handy for debugging.
 391 --iselExpr64 expr
 392 --   | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
 393 --   = panic "iselExpr64(???)"
 394
 395 iselExpr64 (CmmLoad addrTree I64) = do
 396     (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
 397     (rlo, rhi) <- getNewRegPairNat I32
 398     let mov_hi = LD I32 rhi hi_addr
 399         mov_lo = LD I32 rlo lo_addr
 400     return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
 401                          rlo
 402
 403 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64 _)))
 404    = return (ChildCode64 nilOL (mkVReg vu I32))
 405
 406 iselExpr64 (CmmLit (CmmInt i _)) = do
 407   (rlo,rhi) <- getNewRegPairNat I32
 408   let
 409         half0 = fromIntegral (fromIntegral i :: Word16)
 410         half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
 411         half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
 412         half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
 413
 414         code = toOL [
 415                 LIS rlo (ImmInt half1),
 416                 OR rlo rlo (RIImm $ ImmInt half0),
 417                 LIS rhi (ImmInt half3),
 418                 OR rlo rlo (RIImm $ ImmInt half2)
 419                 ]
 420   -- in
 421   return (ChildCode64 code rlo)
 422
 423 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
 424    ChildCode64 code1 r1lo <- iselExpr64 e1
 425    ChildCode64 code2 r2lo <- iselExpr64 e2
 426    (rlo,rhi) <- getNewRegPairNat I32
 427    let
 428         r1hi = getHiVRegFromLo r1lo
 429         r2hi = getHiVRegFromLo r2lo
 430         code =  code1 `appOL`
 431                 code2 `appOL`
 432                 toOL [ ADDC rlo r1lo r2lo,
 433                        ADDE rhi r1hi r2hi ]
 434    -- in
 435    return (ChildCode64 code rlo)
 436
 437 iselExpr64 expr
 438    = pprPanic "iselExpr64(powerpc)" (ppr expr)
 439
 440 #endif /* powerpc_TARGET_ARCH */
 441
 442
 443 -- -----------------------------------------------------------------------------
 444 -- The 'Register' type
 445
 446 -- 'Register's passed up the tree.  If the stix code forces the register
 447 -- to live in a pre-decided machine register, it comes out as @Fixed@;
 448 -- otherwise, it comes out as @Any@, and the parent can decide which
 449 -- register to put it in.
 450
 451 data Register
 452   = Fixed   MachRep Reg InstrBlock
 453   | Any     MachRep (Reg -> InstrBlock)
 454
 455 swizzleRegisterRep :: Register -> MachRep -> Register
 456 swizzleRegisterRep (Fixed _ reg code) rep = Fixed rep reg code
 457 swizzleRegisterRep (Any _ codefn)     rep = Any rep codefn
 458
 459
 460 -- -----------------------------------------------------------------------------
 461 -- Utils based on getRegister, below
 462
 463 -- The dual to getAnyReg: compute an expression into a register, but
 464 -- we don't mind which one it is.
 465 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
 466 getSomeReg expr = do
 467   r <- getRegister expr
 468   case r of
 469     Any rep code -> do
 470         tmp <- getNewRegNat rep
 471         return (tmp, code tmp)
 472     Fixed _ reg code ->
 473         return (reg, code)
 474
 475 -- -----------------------------------------------------------------------------
 476 -- Grab the Reg for a CmmReg
 477
 478 getRegisterReg :: CmmReg -> Reg
 479
 480 getRegisterReg (CmmLocal (LocalReg u pk _))
 481   = mkVReg u pk
 482
 483 getRegisterReg (CmmGlobal mid)
 484   = case get_GlobalReg_reg_or_addr mid of
 485        Left (RealReg rrno) -> RealReg rrno
 486        _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
 487           -- By this stage, the only MagicIds remaining should be the
 488           -- ones which map to a real machine register on this
 489           -- platform.  Hence ...
 490
 491
 492 -- -----------------------------------------------------------------------------
 493 -- Generate code to get a subtree into a Register
 494
 495 -- Don't delete this -- it's very handy for debugging.
 496 --getRegister expr
 497 --   | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False
 498 --   = panic "getRegister(???)"
 499
 500 getRegister :: CmmExpr -> NatM Register
 501
 502 #if !x86_64_TARGET_ARCH
 503     -- on x86_64, we have %rip for PicBaseReg, but it's not a full-featured
 504     -- register, it can only be used for rip-relative addressing.
 505 getRegister (CmmReg (CmmGlobal PicBaseReg))
 506   = do
 507       reg <- getPicBaseNat wordRep
 508       return (Fixed wordRep reg nilOL)
 509 #endif
 510
 511 getRegister (CmmReg reg)
 512   = return (Fixed (cmmRegRep reg) (getRegisterReg reg) nilOL)
 513
 514 getRegister tree@(CmmRegOff _ _)
 515   = getRegister (mangleIndexTree tree)
 516
 517
 518 #if WORD_SIZE_IN_BITS==32
 519     -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
 520     -- TO_W_(x), TO_W_(x >> 32)
 521
 522 getRegister (CmmMachOp (MO_U_Conv I64 I32)
 523              [CmmMachOp (MO_U_Shr I64) [x,CmmLit (CmmInt 32 _)]]) = do
 524   ChildCode64 code rlo <- iselExpr64 x
 525   return $ Fixed I32 (getHiVRegFromLo rlo) code
 526
 527 getRegister (CmmMachOp (MO_S_Conv I64 I32)
 528              [CmmMachOp (MO_U_Shr I64) [x,CmmLit (CmmInt 32 _)]]) = do
 529   ChildCode64 code rlo <- iselExpr64 x
 530   return $ Fixed I32 (getHiVRegFromLo rlo) code
 531
 532 getRegister (CmmMachOp (MO_U_Conv I64 I32) [x]) = do
 533   ChildCode64 code rlo <- iselExpr64 x
 534   return $ Fixed I32 rlo code
 535
 536 getRegister (CmmMachOp (MO_S_Conv I64 I32) [x]) = do
 537   ChildCode64 code rlo <- iselExpr64 x
 538   return $ Fixed I32 rlo code
 539
 540 #endif
 541
 542 -- end of machine-"independent" bit; here we go on the rest...
 543
 544 #if alpha_TARGET_ARCH
 545
 546 getRegister (StDouble d)
 547   = getBlockIdNat                   `thenNat` \ lbl ->
 548     getNewRegNat PtrRep             `thenNat` \ tmp ->
 549     let code dst = mkSeqInstrs [
 550             LDATA RoDataSegment lbl [
 551                     DATA TF [ImmLab (rational d)]
 552                 ],
 553             LDA tmp (AddrImm (ImmCLbl lbl)),
 554             LD TF dst (AddrReg tmp)]
 555     in
 556         return (Any F64 code)
 557
 558 getRegister (StPrim primop [x]) -- unary PrimOps
 559   = case primop of
 560       IntNegOp -> trivialUCode (NEG Q False) x
 561
 562       NotOp    -> trivialUCode NOT x
 563
 564       FloatNegOp  -> trivialUFCode FloatRep  (FNEG TF) x
 565       DoubleNegOp -> trivialUFCode F64 (FNEG TF) x
 566
 567       OrdOp -> coerceIntCode IntRep x
 568       ChrOp -> chrCode x
 569
 570       Float2IntOp  -> coerceFP2Int    x
 571       Int2FloatOp  -> coerceInt2FP pr x
 572       Double2IntOp -> coerceFP2Int    x
 573       Int2DoubleOp -> coerceInt2FP pr x
 574
 575       Double2FloatOp -> coerceFltCode x
 576       Float2DoubleOp -> coerceFltCode x
 577
 578       other_op -> getRegister (StCall fn CCallConv F64 [x])
 579         where
 580           fn = case other_op of
 581                  FloatExpOp    -> FSLIT("exp")
 582                  FloatLogOp    -> FSLIT("log")
 583                  FloatSqrtOp   -> FSLIT("sqrt")
 584                  FloatSinOp    -> FSLIT("sin")
 585                  FloatCosOp    -> FSLIT("cos")
 586                  FloatTanOp    -> FSLIT("tan")
 587                  FloatAsinOp   -> FSLIT("asin")
 588                  FloatAcosOp   -> FSLIT("acos")
 589                  FloatAtanOp   -> FSLIT("atan")
 590                  FloatSinhOp   -> FSLIT("sinh")
 591                  FloatCoshOp   -> FSLIT("cosh")
 592                  FloatTanhOp   -> FSLIT("tanh")
 593                  DoubleExpOp   -> FSLIT("exp")
 594                  DoubleLogOp   -> FSLIT("log")
 595                  DoubleSqrtOp  -> FSLIT("sqrt")
 596                  DoubleSinOp   -> FSLIT("sin")
 597                  DoubleCosOp   -> FSLIT("cos")
 598                  DoubleTanOp   -> FSLIT("tan")
 599                  DoubleAsinOp  -> FSLIT("asin")
 600                  DoubleAcosOp  -> FSLIT("acos")
 601                  DoubleAtanOp  -> FSLIT("atan")
 602                  DoubleSinhOp  -> FSLIT("sinh")
 603                  DoubleCoshOp  -> FSLIT("cosh")
 604                  DoubleTanhOp  -> FSLIT("tanh")
 605   where
 606     pr = panic "MachCode.getRegister: no primrep needed for Alpha"
 607
 608 getRegister (StPrim primop [x, y]) -- dyadic PrimOps
 609   = case primop of
 610       CharGtOp -> trivialCode (CMP LTT) y x
 611       CharGeOp -> trivialCode (CMP LE) y x
 612       CharEqOp -> trivialCode (CMP EQQ) x y
 613       CharNeOp -> int_NE_code x y
 614       CharLtOp -> trivialCode (CMP LTT) x y
 615       CharLeOp -> trivialCode (CMP LE) x y
 616
 617       IntGtOp  -> trivialCode (CMP LTT) y x
 618       IntGeOp  -> trivialCode (CMP LE) y x
 619       IntEqOp  -> trivialCode (CMP EQQ) x y
 620       IntNeOp  -> int_NE_code x y
 621       IntLtOp  -> trivialCode (CMP LTT) x y
 622       IntLeOp  -> trivialCode (CMP LE) x y
 623
 624       WordGtOp -> trivialCode (CMP ULT) y x
 625       WordGeOp -> trivialCode (CMP ULE) x y
 626       WordEqOp -> trivialCode (CMP EQQ)  x y
 627       WordNeOp -> int_NE_code x y
 628       WordLtOp -> trivialCode (CMP ULT) x y
 629       WordLeOp -> trivialCode (CMP ULE) x y
 630
 631       AddrGtOp -> trivialCode (CMP ULT) y x
 632       AddrGeOp -> trivialCode (CMP ULE) y x
 633       AddrEqOp -> trivialCode (CMP EQQ)  x y
 634       AddrNeOp -> int_NE_code x y
 635       AddrLtOp -> trivialCode (CMP ULT) x y
 636       AddrLeOp -> trivialCode (CMP ULE) x y
 637
 638       FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
 639       FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
 640       FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
 641       FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
 642       FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
 643       FloatLeOp -> cmpF_code (FCMP TF LE) NE x y
 644
 645       DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
 646       DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
 647       DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
 648       DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
 649       DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
 650       DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y
 651
 652       IntAddOp  -> trivialCode (ADD Q False) x y
 653       IntSubOp  -> trivialCode (SUB Q False) x y
 654       IntMulOp  -> trivialCode (MUL Q False) x y
 655       IntQuotOp -> trivialCode (DIV Q False) x y
 656       IntRemOp  -> trivialCode (REM Q False) x y
 657
 658       WordAddOp  -> trivialCode (ADD Q False) x y
 659       WordSubOp  -> trivialCode (SUB Q False) x y
 660       WordMulOp  -> trivialCode (MUL Q False) x y
 661       WordQuotOp -> trivialCode (DIV Q True) x y
 662       WordRemOp  -> trivialCode (REM Q True) x y
 663
 664       FloatAddOp -> trivialFCode  FloatRep (FADD TF) x y
 665       FloatSubOp -> trivialFCode  FloatRep (FSUB TF) x y
 666       FloatMulOp -> trivialFCode  FloatRep (FMUL TF) x y
 667       FloatDivOp -> trivialFCode  FloatRep (FDIV TF) x y
 668
 669       DoubleAddOp -> trivialFCode  F64 (FADD TF) x y
 670       DoubleSubOp -> trivialFCode  F64 (FSUB TF) x y
 671       DoubleMulOp -> trivialFCode  F64 (FMUL TF) x y
 672       DoubleDivOp -> trivialFCode  F64 (FDIV TF) x y
 673
 674       AddrAddOp  -> trivialCode (ADD Q False) x y
 675       AddrSubOp  -> trivialCode (SUB Q False) x y
 676       AddrRemOp  -> trivialCode (REM Q True) x y
 677
 678       AndOp  -> trivialCode AND x y
 679       OrOp   -> trivialCode OR  x y
 680       XorOp  -> trivialCode XOR x y
 681       SllOp  -> trivialCode SLL x y
 682       SrlOp  -> trivialCode SRL x y
 683
 684       ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
 685       ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
 686       ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"
 687
 688       FloatPowerOp  -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
 689       DoublePowerOp -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
 690   where
 691     {- ------------------------------------------------------------
 692         Some bizarre special code for getting condition codes into
 693         registers.  Integer non-equality is a test for equality
 694         followed by an XOR with 1.  (Integer comparisons always set
 695         the result register to 0 or 1.)  Floating point comparisons of
 696         any kind leave the result in a floating point register, so we
 697         need to wrangle an integer register out of things.
 698     -}
 699     int_NE_code :: StixTree -> StixTree -> NatM Register
 700
 701     int_NE_code x y
 702       = trivialCode (CMP EQQ) x y       `thenNat` \ register ->
 703         getNewRegNat IntRep             `thenNat` \ tmp ->
 704         let
 705             code = registerCode register tmp
 706             src  = registerName register tmp
 707             code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
 708         in
 709         return (Any IntRep code__2)
 710
 711     {- ------------------------------------------------------------
 712         Comments for int_NE_code also apply to cmpF_code
 713     -}
 714     cmpF_code
 715         :: (Reg -> Reg -> Reg -> Instr)
 716         -> Cond
 717         -> StixTree -> StixTree
 718         -> NatM Register
 719
 720     cmpF_code instr cond x y
 721       = trivialFCode pr instr x y       `thenNat` \ register ->
 722         getNewRegNat F64                `thenNat` \ tmp ->
 723         getBlockIdNat                   `thenNat` \ lbl ->
 724         let
 725             code = registerCode register tmp
 726             result  = registerName register tmp
 727
 728             code__2 dst = code . mkSeqInstrs [
 729                 OR zeroh (RIImm (ImmInt 1)) dst,
 730                 BF cond  result (ImmCLbl lbl),
 731                 OR zeroh (RIReg zeroh) dst,
 732                 NEWBLOCK lbl]
 733         in
 734         return (Any IntRep code__2)
 735       where
 736         pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
 737       ------------------------------------------------------------
 738
 739 getRegister (CmmLoad pk mem)
 740   = getAmode mem                    `thenNat` \ amode ->
 741     let
 742         code = amodeCode amode
 743         src   = amodeAddr amode
 744         size = primRepToSize pk
 745         code__2 dst = code . mkSeqInstr (LD size dst src)
 746     in
 747     return (Any pk code__2)
 748
 749 getRegister (StInt i)
 750   | fits8Bits i
 751   = let
 752         code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
 753     in
 754     return (Any IntRep code)
 755   | otherwise
 756   = let
 757         code dst = mkSeqInstr (LDI Q dst src)
 758     in
 759     return (Any IntRep code)
 760   where
 761     src = ImmInt (fromInteger i)
 762
 763 getRegister leaf
 764   | isJust imm
 765   = let
 766         code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
 767     in
 768     return (Any PtrRep code)
 769   where
 770     imm = maybeImm leaf
 771     imm__2 = case imm of Just x -> x
 772
 773 #endif /* alpha_TARGET_ARCH */
 774
 775 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 776
 777 #if i386_TARGET_ARCH
 778
 779 getRegister (CmmLit (CmmFloat f F32)) = do
 780     lbl <- getNewLabelNat
 781     dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
 782     Amode addr addr_code <- getAmode dynRef
 783     let code dst =
 784             LDATA ReadOnlyData
 785                         [CmmDataLabel lbl,
 786                          CmmStaticLit (CmmFloat f F32)]
 787             `consOL` (addr_code `snocOL`
 788             GLD F32 addr dst)
 789     -- in
 790     return (Any F32 code)
 791
 792
 793 getRegister (CmmLit (CmmFloat d F64))
 794   | d == 0.0
 795   = let code dst = unitOL (GLDZ dst)
 796     in  return (Any F64 code)
 797
 798   | d == 1.0
 799   = let code dst = unitOL (GLD1 dst)
 800     in  return (Any F64 code)
 801
 802   | otherwise = do
 803     lbl <- getNewLabelNat
 804     dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
 805     Amode addr addr_code <- getAmode dynRef
 806     let code dst =
 807             LDATA ReadOnlyData
 808                         [CmmDataLabel lbl,
 809                          CmmStaticLit (CmmFloat d F64)]
 810             `consOL` (addr_code `snocOL`
 811             GLD F64 addr dst)
 812     -- in
 813     return (Any F64 code)
 814
 815 #endif /* i386_TARGET_ARCH */
 816
 817 #if x86_64_TARGET_ARCH
 818
 819 getRegister (CmmLit (CmmFloat 0.0 rep)) = do
 820    let code dst = unitOL  (XOR rep (OpReg dst) (OpReg dst))
 821         -- I don't know why there are xorpd, xorps, and pxor instructions.
 822         -- They all appear to do the same thing --SDM
 823    return (Any rep code)
 824
 825 getRegister (CmmLit (CmmFloat f rep)) = do
 826     lbl <- getNewLabelNat
 827     let code dst = toOL [
 828             LDATA ReadOnlyData
 829                         [CmmDataLabel lbl,
 830                          CmmStaticLit (CmmFloat f rep)],
 831             MOV rep (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 832             ]
 833     -- in
 834     return (Any rep code)
 835
 836 #endif /* x86_64_TARGET_ARCH */
 837
 838 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
 839
 840 -- catch simple cases of zero- or sign-extended load
 841 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad addr _]) = do
 842   code <- intLoadCode (MOVZxL I8) addr
 843   return (Any I32 code)
 844
 845 getRegister (CmmMachOp (MO_S_Conv I8 I32) [CmmLoad addr _]) = do
 846   code <- intLoadCode (MOVSxL I8) addr
 847   return (Any I32 code)
 848
 849 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad addr _]) = do
 850   code <- intLoadCode (MOVZxL I16) addr
 851   return (Any I32 code)
 852
 853 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad addr _]) = do
 854   code <- intLoadCode (MOVSxL I16) addr
 855   return (Any I32 code)
 856
 857 #endif
 858
 859 #if x86_64_TARGET_ARCH
 860
 861 -- catch simple cases of zero- or sign-extended load
 862 getRegister (CmmMachOp (MO_U_Conv I8 I64) [CmmLoad addr _]) = do
 863   code <- intLoadCode (MOVZxL I8) addr
 864   return (Any I64 code)
 865
 866 getRegister (CmmMachOp (MO_S_Conv I8 I64) [CmmLoad addr _]) = do
 867   code <- intLoadCode (MOVSxL I8) addr
 868   return (Any I64 code)
 869
 870 getRegister (CmmMachOp (MO_U_Conv I16 I64) [CmmLoad addr _]) = do
 871   code <- intLoadCode (MOVZxL I16) addr
 872   return (Any I64 code)
 873
 874 getRegister (CmmMachOp (MO_S_Conv I16 I64) [CmmLoad addr _]) = do
 875   code <- intLoadCode (MOVSxL I16) addr
 876   return (Any I64 code)
 877
 878 getRegister (CmmMachOp (MO_U_Conv I32 I64) [CmmLoad addr _]) = do
 879   code <- intLoadCode (MOV I32) addr -- 32-bit loads zero-extend
 880   return (Any I64 code)
 881
 882 getRegister (CmmMachOp (MO_S_Conv I32 I64) [CmmLoad addr _]) = do
 883   code <- intLoadCode (MOVSxL I32) addr
 884   return (Any I64 code)
 885
 886 #endif
 887
 888 #if x86_64_TARGET_ARCH
 889 getRegister (CmmMachOp (MO_Add I64) [CmmReg (CmmGlobal PicBaseReg),
 890                                      CmmLit displacement])
 891     = return $ Any I64 (\dst -> unitOL $
 892         LEA I64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
 893 #endif
 894
 895 #if x86_64_TARGET_ARCH
 896 getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do
 897   x_code <- getAnyReg x
 898   lbl <- getNewLabelNat
 899   let
 900     code dst = x_code dst `appOL` toOL [
 901         -- This is how gcc does it, so it can't be that bad:
 902         LDATA ReadOnlyData16 [
 903                 CmmAlign 16,
 904                 CmmDataLabel lbl,
 905                 CmmStaticLit (CmmInt 0x80000000 I32),
 906                 CmmStaticLit (CmmInt 0 I32),
 907                 CmmStaticLit (CmmInt 0 I32),
 908                 CmmStaticLit (CmmInt 0 I32)
 909         ],
 910         XOR F32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 911                 -- xorps, so we need the 128-bit constant
 912                 -- ToDo: rip-relative
 913         ]
 914   --
 915   return (Any F32 code)
 916
 917 getRegister (CmmMachOp (MO_S_Neg F64) [x]) = do
 918   x_code <- getAnyReg x
 919   lbl <- getNewLabelNat
 920   let
 921         -- This is how gcc does it, so it can't be that bad:
 922     code dst = x_code dst `appOL` toOL [
 923         LDATA ReadOnlyData16 [
 924                 CmmAlign 16,
 925                 CmmDataLabel lbl,
 926                 CmmStaticLit (CmmInt 0x8000000000000000 I64),
 927                 CmmStaticLit (CmmInt 0 I64)
 928         ],
 929                 -- gcc puts an unpck here.  Wonder if we need it.
 930         XOR F64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 931                 -- xorpd, so we need the 128-bit constant
 932         ]
 933   --
 934   return (Any F64 code)
 935 #endif
 936
 937 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
 938
 939 getRegister (CmmMachOp mop [x]) -- unary MachOps
 940   = case mop of
 941 #if i386_TARGET_ARCH
 942       MO_S_Neg F32 -> trivialUFCode F32 (GNEG F32) x
 943       MO_S_Neg F64 -> trivialUFCode F64 (GNEG F64) x
 944 #endif
 945
 946       MO_S_Neg rep -> trivialUCode rep (NEGI rep) x
 947       MO_Not rep   -> trivialUCode rep (NOT  rep) x
 948
 949       -- Nop conversions
 950       MO_U_Conv I32 I8  -> toI8Reg  I32 x
 951       MO_S_Conv I32 I8  -> toI8Reg  I32 x
 952       MO_U_Conv I16 I8  -> toI8Reg  I16 x
 953       MO_S_Conv I16 I8  -> toI8Reg  I16 x
 954       MO_U_Conv I32 I16 -> toI16Reg I32 x
 955       MO_S_Conv I32 I16 -> toI16Reg I32 x
 956 #if x86_64_TARGET_ARCH
 957       MO_U_Conv I64 I32 -> conversionNop I64 x
 958       MO_S_Conv I64 I32 -> conversionNop I64 x
 959       MO_U_Conv I64 I16 -> toI16Reg I64 x
 960       MO_S_Conv I64 I16 -> toI16Reg I64 x
 961       MO_U_Conv I64 I8  -> toI8Reg  I64 x
 962       MO_S_Conv I64 I8  -> toI8Reg  I64 x
 963 #endif
 964
 965       MO_U_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
 966       MO_S_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
 967
 968       -- widenings
 969       MO_U_Conv I8  I32 -> integerExtend I8  I32 MOVZxL x
 970       MO_U_Conv I16 I32 -> integerExtend I16 I32 MOVZxL x
 971       MO_U_Conv I8  I16 -> integerExtend I8  I16 MOVZxL x
 972
 973       MO_S_Conv I8  I32 -> integerExtend I8  I32 MOVSxL x
 974       MO_S_Conv I16 I32 -> integerExtend I16 I32 MOVSxL x
 975       MO_S_Conv I8  I16 -> integerExtend I8  I16 MOVSxL x
 976
 977 #if x86_64_TARGET_ARCH
 978       MO_U_Conv I8  I64 -> integerExtend I8  I64 MOVZxL x
 979       MO_U_Conv I16 I64 -> integerExtend I16 I64 MOVZxL x
 980       MO_U_Conv I32 I64 -> integerExtend I32 I64 MOVZxL x
 981       MO_S_Conv I8  I64 -> integerExtend I8  I64 MOVSxL x
 982       MO_S_Conv I16 I64 -> integerExtend I16 I64 MOVSxL x
 983       MO_S_Conv I32 I64 -> integerExtend I32 I64 MOVSxL x
 984         -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
 985         -- However, we don't want the register allocator to throw it
 986         -- away as an unnecessary reg-to-reg move, so we keep it in
 987         -- the form of a movzl and print it as a movl later.
 988 #endif
 989
 990 #if i386_TARGET_ARCH
 991       MO_S_Conv F32 F64 -> conversionNop F64 x
 992       MO_S_Conv F64 F32 -> conversionNop F32 x
 993 #else
 994       MO_S_Conv F32 F64 -> coerceFP2FP F64 x
 995       MO_S_Conv F64 F32 -> coerceFP2FP F32 x
 996 #endif
 997
 998       MO_S_Conv from to
 999         | isFloatingRep from -> coerceFP2Int from to x
1000         | isFloatingRep to   -> coerceInt2FP from to x
1001
1002       other -> pprPanic "getRegister" (pprMachOp mop)
1003    where
1004         -- signed or unsigned extension.
1005         integerExtend from to instr expr = do
1006             (reg,e_code) <- if from == I8 then getByteReg expr
1007                                           else getSomeReg expr
1008             let
1009                 code dst =
1010                   e_code `snocOL`
1011                   instr from (OpReg reg) (OpReg dst)
1012             return (Any to code)
1013
1014         toI8Reg new_rep expr
1015             = do codefn <- getAnyReg expr
1016                  return (Any new_rep codefn)
1017                 -- HACK: use getAnyReg to get a byte-addressable register.
1018                 -- If the source was a Fixed register, this will add the
1019                 -- mov instruction to put it into the desired destination.
1020                 -- We're assuming that the destination won't be a fixed
1021                 -- non-byte-addressable register; it won't be, because all
1022                 -- fixed registers are word-sized.
1023
1024         toI16Reg = toI8Reg -- for now
1025
1026         conversionNop new_rep expr
1027             = do e_code <- getRegister expr
1028                  return (swizzleRegisterRep e_code new_rep)
1029
1030
1031 getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
1032   = ASSERT2(cmmExprRep x /= I8, pprExpr e)
1033     case mop of
1034       MO_Eq F32   -> condFltReg EQQ x y
1035       MO_Ne F32   -> condFltReg NE x y
1036       MO_S_Gt F32 -> condFltReg GTT x y
1037       MO_S_Ge F32 -> condFltReg GE x y
1038       MO_S_Lt F32 -> condFltReg LTT x y
1039       MO_S_Le F32 -> condFltReg LE x y
1040
1041       MO_Eq F64   -> condFltReg EQQ x y
1042       MO_Ne F64   -> condFltReg NE x y
1043       MO_S_Gt F64 -> condFltReg GTT x y
1044       MO_S_Ge F64 -> condFltReg GE x y
1045       MO_S_Lt F64 -> condFltReg LTT x y
1046       MO_S_Le F64 -> condFltReg LE x y
1047
1048       MO_Eq rep   -> condIntReg EQQ x y
1049       MO_Ne rep   -> condIntReg NE x y
1050
1051       MO_S_Gt rep -> condIntReg GTT x y
1052       MO_S_Ge rep -> condIntReg GE x y
1053       MO_S_Lt rep -> condIntReg LTT x y
1054       MO_S_Le rep -> condIntReg LE x y
1055
1056       MO_U_Gt rep -> condIntReg GU  x y
1057       MO_U_Ge rep -> condIntReg GEU x y
1058       MO_U_Lt rep -> condIntReg LU  x y
1059       MO_U_Le rep -> condIntReg LEU x y
1060
1061 #if i386_TARGET_ARCH
1062       MO_Add F32 -> trivialFCode F32 GADD x y
1063       MO_Sub F32 -> trivialFCode F32 GSUB x y
1064
1065       MO_Add F64 -> trivialFCode F64 GADD x y
1066       MO_Sub F64 -> trivialFCode F64 GSUB x y
1067
1068       MO_S_Quot F32 -> trivialFCode F32 GDIV x y
1069       MO_S_Quot F64 -> trivialFCode F64 GDIV x y
1070 #endif
1071
1072 #if x86_64_TARGET_ARCH
1073       MO_Add F32 -> trivialFCode F32 ADD x y
1074       MO_Sub F32 -> trivialFCode F32 SUB x y
1075
1076       MO_Add F64 -> trivialFCode F64 ADD x y
1077       MO_Sub F64 -> trivialFCode F64 SUB x y
1078
1079       MO_S_Quot F32 -> trivialFCode F32 FDIV x y
1080       MO_S_Quot F64 -> trivialFCode F64 FDIV x y
1081 #endif
1082
1083       MO_Add rep -> add_code rep x y
1084       MO_Sub rep -> sub_code rep x y
1085
1086       MO_S_Quot rep -> div_code rep True  True  x y
1087       MO_S_Rem  rep -> div_code rep True  False x y
1088       MO_U_Quot rep -> div_code rep False True  x y
1089       MO_U_Rem  rep -> div_code rep False False x y
1090
1091 #if i386_TARGET_ARCH
1092       MO_Mul   F32 -> trivialFCode F32 GMUL x y
1093       MO_Mul   F64 -> trivialFCode F64 GMUL x y
1094 #endif
1095
1096 #if x86_64_TARGET_ARCH
1097       MO_Mul   F32 -> trivialFCode F32 MUL x y
1098       MO_Mul   F64 -> trivialFCode F64 MUL x y
1099 #endif
1100
1101       MO_Mul   rep -> let op = IMUL rep in
1102                       trivialCode rep op (Just op) x y
1103
1104       MO_S_MulMayOflo rep -> imulMayOflo rep x y
1105
1106       MO_And rep -> let op = AND rep in
1107                     trivialCode rep op (Just op) x y
1108       MO_Or  rep -> let op = OR  rep in
1109                     trivialCode rep op (Just op) x y
1110       MO_Xor rep -> let op = XOR rep in
1111                     trivialCode rep op (Just op) x y
1112
1113         {- Shift ops on x86s have constraints on their source, it
1114            either has to be Imm, CL or 1
1115             => trivialCode is not restrictive enough (sigh.)
1116         -}
1117       MO_Shl rep   -> shift_code rep (SHL rep) x y {-False-}
1118       MO_U_Shr rep -> shift_code rep (SHR rep) x y {-False-}
1119       MO_S_Shr rep -> shift_code rep (SAR rep) x y {-False-}
1120
1121       other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
1122   where
1123     --------------------
1124     imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1125     imulMayOflo rep a b = do
1126          (a_reg, a_code) <- getNonClobberedReg a
1127          b_code <- getAnyReg b
1128          let
1129              shift_amt  = case rep of
1130                            I32 -> 31
1131                            I64 -> 63
1132                            _ -> panic "shift_amt"
1133
1134              code = a_code `appOL` b_code eax `appOL`
1135                         toOL [
1136                            IMUL2 rep (OpReg a_reg),   -- result in %edx:%eax
1137                            SAR rep (OpImm (ImmInt shift_amt)) (OpReg eax),
1138                                 -- sign extend lower part
1139                            SUB rep (OpReg edx) (OpReg eax)
1140                                 -- compare against upper
1141                            -- eax==0 if high part == sign extended low part
1142                         ]
1143          -- in
1144          return (Fixed rep eax code)
1145
1146     --------------------
1147     shift_code :: MachRep
1148                -> (Operand -> Operand -> Instr)
1149                -> CmmExpr
1150                -> CmmExpr
1151                -> NatM Register
1152
1153     {- Case1: shift length as immediate -}
1154     shift_code rep instr x y@(CmmLit lit) = do
1155           x_code <- getAnyReg x
1156           let
1157                code dst
1158                   = x_code dst `snocOL`
1159                     instr (OpImm (litToImm lit)) (OpReg dst)
1160           -- in
1161           return (Any rep code)
1162
1163     {- Case2: shift length is complex (non-immediate)
1164       * y must go in %ecx.
1165       * we cannot do y first *and* put its result in %ecx, because
1166         %ecx might be clobbered by x.
1167       * if we do y second, then x cannot be
1168         in a clobbered reg.  Also, we cannot clobber x's reg
1169         with the instruction itself.
1170       * so we can either:
1171         - do y first, put its result in a fresh tmp, then copy it to %ecx later
1172         - do y second and put its result into %ecx.  x gets placed in a fresh
1173           tmp.  This is likely to be better, becuase the reg alloc can
1174           eliminate this reg->reg move here (it won't eliminate the other one,
1175           because the move is into the fixed %ecx).
1176     -}
1177     shift_code rep instr x y{-amount-} = do
1178         x_code <- getAnyReg x
1179         tmp <- getNewRegNat rep
1180         y_code <- getAnyReg y
1181         let
1182            code = x_code tmp `appOL`
1183                   y_code ecx `snocOL`
1184                   instr (OpReg ecx) (OpReg tmp)
1185         -- in
1186         return (Fixed rep tmp code)
1187
1188     --------------------
1189     add_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1190     add_code rep x (CmmLit (CmmInt y _))
1191         | not (is64BitInteger y) = add_int rep x y
1192     add_code rep x y = trivialCode rep (ADD rep) (Just (ADD rep)) x y
1193
1194     --------------------
1195     sub_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1196     sub_code rep x (CmmLit (CmmInt y _))
1197         | not (is64BitInteger (-y)) = add_int rep x (-y)
1198     sub_code rep x y = trivialCode rep (SUB rep) Nothing x y
1199
1200     -- our three-operand add instruction:
1201     add_int rep x y = do
1202         (x_reg, x_code) <- getSomeReg x
1203         let
1204             imm = ImmInt (fromInteger y)
1205             code dst
1206                = x_code `snocOL`
1207                  LEA rep
1208                         (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
1209                         (OpReg dst)
1210         --
1211         return (Any rep code)
1212
1213     ----------------------
1214     div_code rep signed quotient x y = do
1215            (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
1216            x_code <- getAnyReg x
1217            let
1218              widen | signed    = CLTD rep
1219                    | otherwise = XOR rep (OpReg edx) (OpReg edx)
1220
1221              instr | signed    = IDIV
1222                    | otherwise = DIV
1223
1224              code = y_code `appOL`
1225                     x_code eax `appOL`
1226                     toOL [widen, instr rep y_op]
1227
1228              result | quotient  = eax
1229                     | otherwise = edx
1230
1231            -- in
1232            return (Fixed rep result code)
1233
1234
1235 getRegister (CmmLoad mem pk)
1236   | isFloatingRep pk
1237   = do
1238     Amode src mem_code <- getAmode mem
1239     let
1240         code dst = mem_code `snocOL`
1241                    IF_ARCH_i386(GLD pk src dst,
1242                                 MOV pk (OpAddr src) (OpReg dst))
1243     --
1244     return (Any pk code)
1245
1246 #if i386_TARGET_ARCH
1247 getRegister (CmmLoad mem pk)
1248   | pk /= I64
1249   = do
1250     code <- intLoadCode (instr pk) mem
1251     return (Any pk code)
1252   where
1253         instr I8  = MOVZxL pk
1254         instr I16 = MOV I16
1255         instr I32 = MOV I32
1256         -- we always zero-extend 8-bit loads, if we
1257         -- can't think of anything better.  This is because
1258         -- we can't guarantee access to an 8-bit variant of every register
1259         -- (esi and edi don't have 8-bit variants), so to make things
1260         -- simpler we do our 8-bit arithmetic with full 32-bit registers.
1261 #endif
1262
1263 #if x86_64_TARGET_ARCH
1264 -- Simpler memory load code on x86_64
1265 getRegister (CmmLoad mem pk)
1266   = do
1267     code <- intLoadCode (MOV pk) mem
1268     return (Any pk code)
1269 #endif
1270
1271 getRegister (CmmLit (CmmInt 0 rep))
1272   = let
1273         -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
1274         adj_rep = case rep of I64 -> I32; _ -> rep
1275         rep1 = IF_ARCH_i386( rep, adj_rep )
1276         code dst
1277            = unitOL (XOR rep1 (OpReg dst) (OpReg dst))
1278     in
1279         return (Any rep code)
1280
1281 #if x86_64_TARGET_ARCH
1282   -- optimisation for loading small literals on x86_64: take advantage
1283   -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
1284   -- instruction forms are shorter.
1285 getRegister (CmmLit lit)
1286   | I64 <- cmmLitRep lit, not (isBigLit lit)
1287   = let
1288         imm = litToImm lit
1289         code dst = unitOL (MOV I32 (OpImm imm) (OpReg dst))
1290     in
1291         return (Any I64 code)
1292   where
1293    isBigLit (CmmInt i I64) = i < 0 || i > 0xffffffff
1294    isBigLit _ = False
1295         -- note1: not the same as is64BitLit, because that checks for
1296         -- signed literals that fit in 32 bits, but we want unsigned
1297         -- literals here.
1298         -- note2: all labels are small, because we're assuming the
1299         -- small memory model (see gcc docs, -mcmodel=small).
1300 #endif
1301
1302 getRegister (CmmLit lit)
1303   = let
1304         rep = cmmLitRep lit
1305         imm = litToImm lit
1306         code dst = unitOL (MOV rep (OpImm imm) (OpReg dst))
1307     in
1308         return (Any rep code)
1309
1310 getRegister other = pprPanic "getRegister(x86)" (ppr other)
1311
1312
1313 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
1314    -> NatM (Reg -> InstrBlock)
1315 intLoadCode instr mem = do
1316   Amode src mem_code <- getAmode mem
1317   return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
1318
1319 -- Compute an expression into *any* register, adding the appropriate
1320 -- move instruction if necessary.
1321 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
1322 getAnyReg expr = do
1323   r <- getRegister expr
1324   anyReg r
1325
1326 anyReg :: Register -> NatM (Reg -> InstrBlock)
1327 anyReg (Any _ code)          = return code
1328 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
1329
1330 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
1331 -- Fixed registers might not be byte-addressable, so we make sure we've
1332 -- got a temporary, inserting an extra reg copy if necessary.
1333 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
1334 #if x86_64_TARGET_ARCH
1335 getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
1336 #else
1337 getByteReg expr = do
1338   r <- getRegister expr
1339   case r of
1340     Any rep code -> do
1341         tmp <- getNewRegNat rep
1342         return (tmp, code tmp)
1343     Fixed rep reg code
1344         | isVirtualReg reg -> return (reg,code)
1345         | otherwise -> do
1346             tmp <- getNewRegNat rep
1347             return (tmp, code `snocOL` reg2reg rep reg tmp)
1348         -- ToDo: could optimise slightly by checking for byte-addressable
1349         -- real registers, but that will happen very rarely if at all.
1350 #endif
1351
1352 -- Another variant: this time we want the result in a register that cannot
1353 -- be modified by code to evaluate an arbitrary expression.
1354 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1355 getNonClobberedReg expr = do
1356   r <- getRegister expr
1357   case r of
1358     Any rep code -> do
1359         tmp <- getNewRegNat rep
1360         return (tmp, code tmp)
1361     Fixed rep reg code
1362         -- only free regs can be clobbered
1363         | RealReg rr <- reg, isFastTrue (freeReg rr) -> do
1364                 tmp <- getNewRegNat rep
1365                 return (tmp, code `snocOL` reg2reg rep reg tmp)
1366         | otherwise ->
1367                 return (reg, code)
1368
1369 reg2reg :: MachRep -> Reg -> Reg -> Instr
1370 reg2reg rep src dst
1371 #if i386_TARGET_ARCH
1372   | isFloatingRep rep = GMOV src dst
1373 #endif
1374   | otherwise         = MOV rep (OpReg src) (OpReg dst)
1375
1376 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1377
1378 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1379
1380 #if sparc_TARGET_ARCH
1381
1382 getRegister (CmmLit (CmmFloat f F32)) = do
1383     lbl <- getNewLabelNat
1384     let code dst = toOL [
1385             LDATA ReadOnlyData
1386                         [CmmDataLabel lbl,
1387                          CmmStaticLit (CmmFloat f F32)],
1388             SETHI (HI (ImmCLbl lbl)) dst,
1389             LD F32 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1390     return (Any F32 code)
1391
1392 getRegister (CmmLit (CmmFloat d F64)) = do
1393     lbl <- getNewLabelNat
1394     let code dst = toOL [
1395             LDATA ReadOnlyData
1396                         [CmmDataLabel lbl,
1397                          CmmStaticLit (CmmFloat d F64)],
1398             SETHI (HI (ImmCLbl lbl)) dst,
1399             LD F64 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1400     return (Any F64 code)
1401
1402 getRegister (CmmMachOp mop [x]) -- unary MachOps
1403   = case mop of
1404       MO_S_Neg F32     -> trivialUFCode F32 (FNEG F32) x
1405       MO_S_Neg F64     -> trivialUFCode F64 (FNEG F64) x
1406
1407       MO_S_Neg rep     -> trivialUCode rep (SUB False False g0) x
1408       MO_Not rep       -> trivialUCode rep (XNOR False g0) x
1409
1410       MO_U_Conv I32 I8 -> trivialCode I8 (AND False) x (CmmLit (CmmInt 255 I8))
1411
1412       MO_U_Conv F64 F32-> coerceDbl2Flt x
1413       MO_U_Conv F32 F64-> coerceFlt2Dbl x
1414
1415       MO_S_Conv F32 I32-> coerceFP2Int F32 I32 x
1416       MO_S_Conv I32 F32-> coerceInt2FP I32 F32 x
1417       MO_S_Conv F64 I32-> coerceFP2Int F64 I32 x
1418       MO_S_Conv I32 F64-> coerceInt2FP I32 F64 x
1419
1420       -- Conversions which are a nop on sparc
1421       MO_U_Conv from to
1422         | from == to   -> conversionNop to   x
1423       MO_U_Conv I32 to -> conversionNop to   x
1424       MO_S_Conv I32 to -> conversionNop to   x
1425
1426       -- widenings
1427       MO_U_Conv I8 I32  -> integerExtend False I8 I32  x
1428       MO_U_Conv I16 I32 -> integerExtend False I16 I32 x
1429       MO_U_Conv I8 I16  -> integerExtend False I8 I16  x
1430       MO_S_Conv I16 I32 -> integerExtend True I16 I32  x
1431
1432       other_op -> panic "Unknown unary mach op"
1433     where
1434         -- XXX SLL/SRL?
1435         integerExtend signed from to expr = do
1436            (reg, e_code) <- getSomeReg expr
1437            let
1438                code dst =
1439                    e_code `snocOL`
1440                    ((if signed then SRA else SRL)
1441                           reg (RIImm (ImmInt 0)) dst)
1442            return (Any to code)
1443         conversionNop new_rep expr
1444             = do e_code <- getRegister expr
1445                  return (swizzleRegisterRep e_code new_rep)
1446
1447 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1448   = case mop of
1449       MO_Eq F32 -> condFltReg EQQ x y
1450       MO_Ne F32 -> condFltReg NE x y
1451
1452       MO_S_Gt F32 -> condFltReg GTT x y
1453       MO_S_Ge F32 -> condFltReg GE x y
1454       MO_S_Lt F32 -> condFltReg LTT x y
1455       MO_S_Le F32 -> condFltReg LE x y
1456
1457       MO_Eq F64 -> condFltReg EQQ x y
1458       MO_Ne F64 -> condFltReg NE x y
1459
1460       MO_S_Gt F64 -> condFltReg GTT x y
1461       MO_S_Ge F64 -> condFltReg GE x y
1462       MO_S_Lt F64 -> condFltReg LTT x y
1463       MO_S_Le F64 -> condFltReg LE x y
1464
1465       MO_Eq rep -> condIntReg EQQ x y
1466       MO_Ne rep -> condIntReg NE x y
1467
1468       MO_S_Gt rep -> condIntReg GTT x y
1469       MO_S_Ge rep -> condIntReg GE x y
1470       MO_S_Lt rep -> condIntReg LTT x y
1471       MO_S_Le rep -> condIntReg LE x y
1472
1473       MO_U_Gt I32  -> condIntReg GTT x y
1474       MO_U_Ge I32  -> condIntReg GE x y
1475       MO_U_Lt I32  -> condIntReg LTT x y
1476       MO_U_Le I32  -> condIntReg LE x y
1477
1478       MO_U_Gt I16 -> condIntReg GU  x y
1479       MO_U_Ge I16 -> condIntReg GEU x y
1480       MO_U_Lt I16 -> condIntReg LU  x y
1481       MO_U_Le I16 -> condIntReg LEU x y
1482
1483       MO_Add I32 -> trivialCode I32 (ADD False False) x y
1484       MO_Sub I32 -> trivialCode I32 (SUB False False) x y
1485
1486       MO_S_MulMayOflo rep -> imulMayOflo rep x y
1487 {-
1488       -- ToDo: teach about V8+ SPARC div instructions
1489       MO_S_Quot I32 -> idiv FSLIT(".div")  x y
1490       MO_S_Rem I32  -> idiv FSLIT(".rem")  x y
1491       MO_U_Quot I32 -> idiv FSLIT(".udiv")  x y
1492       MO_U_Rem I32  -> idiv FSLIT(".urem")  x y
1493 -}
1494       MO_Add F32  -> trivialFCode F32 FADD  x y
1495       MO_Sub F32   -> trivialFCode F32  FSUB x y
1496       MO_Mul F32   -> trivialFCode F32  FMUL  x y
1497       MO_S_Quot F32   -> trivialFCode F32  FDIV x y
1498
1499       MO_Add F64   -> trivialFCode F64 FADD  x y
1500       MO_Sub F64   -> trivialFCode F64  FSUB x y
1501       MO_Mul F64   -> trivialFCode F64  FMUL x y
1502       MO_S_Quot F64   -> trivialFCode F64  FDIV x y
1503
1504       MO_And rep   -> trivialCode rep (AND False) x y
1505       MO_Or rep    -> trivialCode rep (OR  False) x y
1506       MO_Xor rep   -> trivialCode rep (XOR False) x y
1507
1508       MO_Mul rep -> trivialCode rep (SMUL False) x y
1509
1510       MO_Shl rep   -> trivialCode rep SLL  x y
1511       MO_U_Shr rep   -> trivialCode rep SRL x y
1512       MO_S_Shr rep   -> trivialCode rep SRA x y
1513
1514 {-
1515       MO_F32_Pwr  -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1516                                          [promote x, promote y])
1517                        where promote x = CmmMachOp MO_F32_to_Dbl [x]
1518       MO_F64_Pwr -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1519                                         [x, y])
1520 -}
1521       other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
1522   where
1523     --idiv fn x y = getRegister (StCall (Left fn) CCallConv I32 [x, y])
1524
1525     --------------------
1526     imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1527     imulMayOflo rep a b = do
1528          (a_reg, a_code) <- getSomeReg a
1529          (b_reg, b_code) <- getSomeReg b
1530          res_lo <- getNewRegNat I32
1531          res_hi <- getNewRegNat I32
1532          let
1533             shift_amt  = case rep of
1534                           I32 -> 31
1535                           I64 -> 63
1536                           _ -> panic "shift_amt"
1537             code dst = a_code `appOL` b_code `appOL`
1538                        toOL [
1539                            SMUL False a_reg (RIReg b_reg) res_lo,
1540                            RDY res_hi,
1541                            SRA res_lo (RIImm (ImmInt shift_amt)) res_lo,
1542                            SUB False False res_lo (RIReg res_hi) dst
1543                         ]
1544          return (Any I32 code)
1545
1546 getRegister (CmmLoad mem pk) = do
1547     Amode src code <- getAmode mem
1548     let
1549         code__2 dst = code `snocOL` LD pk src dst
1550     return (Any pk code__2)
1551
1552 getRegister (CmmLit (CmmInt i _))
1553   | fits13Bits i
1554   = let
1555         src = ImmInt (fromInteger i)
1556         code dst = unitOL (OR False g0 (RIImm src) dst)
1557     in
1558         return (Any I32 code)
1559
1560 getRegister (CmmLit lit)
1561   = let rep = cmmLitRep lit
1562         imm = litToImm lit
1563         code dst = toOL [
1564             SETHI (HI imm) dst,
1565             OR False dst (RIImm (LO imm)) dst]
1566     in return (Any I32 code)
1567
1568 #endif /* sparc_TARGET_ARCH */
1569
1570 #if powerpc_TARGET_ARCH
1571 getRegister (CmmLoad mem pk)
1572   | pk /= I64
1573   = do
1574         Amode addr addr_code <- getAmode mem
1575         let code dst = ASSERT((regClass dst == RcDouble) == isFloatingRep pk)
1576                        addr_code `snocOL` LD pk dst addr
1577         return (Any pk code)
1578
1579 -- catch simple cases of zero- or sign-extended load
1580 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad mem _]) = do
1581     Amode addr addr_code <- getAmode mem
1582     return (Any I32 (\dst -> addr_code `snocOL` LD I8 dst addr))
1583
1584 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
1585
1586 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad mem _]) = do
1587     Amode addr addr_code <- getAmode mem
1588     return (Any I32 (\dst -> addr_code `snocOL` LD I16 dst addr))
1589
1590 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad mem _]) = do
1591     Amode addr addr_code <- getAmode mem
1592     return (Any I32 (\dst -> addr_code `snocOL` LA I16 dst addr))
1593
1594 getRegister (CmmMachOp mop [x]) -- unary MachOps
1595   = case mop of
1596       MO_Not rep   -> trivialUCode rep NOT x
1597
1598       MO_S_Conv F64 F32 -> trivialUCode F32 FRSP x
1599       MO_S_Conv F32 F64 -> conversionNop F64 x
1600
1601       MO_S_Conv from to
1602         | from == to         -> conversionNop to x
1603         | isFloatingRep from -> coerceFP2Int from to x
1604         | isFloatingRep to   -> coerceInt2FP from to x
1605
1606         -- narrowing is a nop: we treat the high bits as undefined
1607       MO_S_Conv I32 to -> conversionNop to x
1608       MO_S_Conv I16 I8 -> conversionNop I8 x
1609       MO_S_Conv I8 to -> trivialUCode to (EXTS I8) x
1610       MO_S_Conv I16 to -> trivialUCode to (EXTS I16) x
1611
1612       MO_U_Conv from to
1613         | from == to -> conversionNop to x
1614         -- narrowing is a nop: we treat the high bits as undefined
1615       MO_U_Conv I32 to -> conversionNop to x
1616       MO_U_Conv I16 I8 -> conversionNop I8 x
1617       MO_U_Conv I8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 I32))
1618       MO_U_Conv I16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 I32))
1619
1620       MO_S_Neg F32      -> trivialUCode F32 FNEG x
1621       MO_S_Neg F64      -> trivialUCode F64 FNEG x
1622       MO_S_Neg rep      -> trivialUCode rep NEG x
1623
1624     where
1625         conversionNop new_rep expr
1626             = do e_code <- getRegister expr
1627                  return (swizzleRegisterRep e_code new_rep)
1628
1629 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1630   = case mop of
1631       MO_Eq F32 -> condFltReg EQQ x y
1632       MO_Ne F32 -> condFltReg NE  x y
1633
1634       MO_S_Gt F32 -> condFltReg GTT x y
1635       MO_S_Ge F32 -> condFltReg GE  x y
1636       MO_S_Lt F32 -> condFltReg LTT x y
1637       MO_S_Le F32 -> condFltReg LE  x y
1638
1639       MO_Eq F64 -> condFltReg EQQ x y
1640       MO_Ne F64 -> condFltReg NE  x y
1641
1642       MO_S_Gt F64 -> condFltReg GTT x y
1643       MO_S_Ge F64 -> condFltReg GE  x y
1644       MO_S_Lt F64 -> condFltReg LTT x y
1645       MO_S_Le F64 -> condFltReg LE  x y
1646
1647       MO_Eq rep -> condIntReg EQQ  (extendUExpr rep x) (extendUExpr rep y)
1648       MO_Ne rep -> condIntReg NE   (extendUExpr rep x) (extendUExpr rep y)
1649
1650       MO_S_Gt rep -> condIntReg GTT  (extendSExpr rep x) (extendSExpr rep y)
1651       MO_S_Ge rep -> condIntReg GE   (extendSExpr rep x) (extendSExpr rep y)
1652       MO_S_Lt rep -> condIntReg LTT  (extendSExpr rep x) (extendSExpr rep y)
1653       MO_S_Le rep -> condIntReg LE   (extendSExpr rep x) (extendSExpr rep y)
1654
1655       MO_U_Gt rep -> condIntReg GU   (extendUExpr rep x) (extendUExpr rep y)
1656       MO_U_Ge rep -> condIntReg GEU  (extendUExpr rep x) (extendUExpr rep y)
1657       MO_U_Lt rep -> condIntReg LU   (extendUExpr rep x) (extendUExpr rep y)
1658       MO_U_Le rep -> condIntReg LEU  (extendUExpr rep x) (extendUExpr rep y)
1659
1660       MO_Add F32   -> trivialCodeNoImm F32 (FADD F32) x y
1661       MO_Sub F32   -> trivialCodeNoImm F32 (FSUB F32) x y
1662       MO_Mul F32   -> trivialCodeNoImm F32 (FMUL F32) x y
1663       MO_S_Quot F32   -> trivialCodeNoImm F32 (FDIV F32) x y
1664
1665       MO_Add F64   -> trivialCodeNoImm F64 (FADD F64) x y
1666       MO_Sub F64   -> trivialCodeNoImm F64 (FSUB F64) x y
1667       MO_Mul F64   -> trivialCodeNoImm F64 (FMUL F64) x y
1668       MO_S_Quot F64   -> trivialCodeNoImm F64 (FDIV F64) x y
1669
1670          -- optimize addition with 32-bit immediate
1671          -- (needed for PIC)
1672       MO_Add I32 ->
1673         case y of
1674           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate I32 True (-imm)
1675             -> trivialCode I32 True ADD x (CmmLit $ CmmInt imm immrep)
1676           CmmLit lit
1677             -> do
1678                 (src, srcCode) <- getSomeReg x
1679                 let imm = litToImm lit
1680                     code dst = srcCode `appOL` toOL [
1681                                     ADDIS dst src (HA imm),
1682                                     ADD dst dst (RIImm (LO imm))
1683                                 ]
1684                 return (Any I32 code)
1685           _ -> trivialCode I32 True ADD x y
1686
1687       MO_Add rep -> trivialCode rep True ADD x y
1688       MO_Sub rep ->
1689         case y of    -- subfi ('substract from' with immediate) doesn't exist
1690           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
1691             -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
1692           _ -> trivialCodeNoImm rep SUBF y x
1693
1694       MO_Mul rep -> trivialCode rep True MULLW x y
1695
1696       MO_S_MulMayOflo I32 -> trivialCodeNoImm I32 MULLW_MayOflo x y
1697
1698       MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= I32): not implemented"
1699       MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
1700
1701       MO_S_Quot rep -> trivialCodeNoImm rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1702       MO_U_Quot rep -> trivialCodeNoImm rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1703
1704       MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1705       MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1706
1707       MO_And rep   -> trivialCode rep False AND x y
1708       MO_Or rep    -> trivialCode rep False OR x y
1709       MO_Xor rep   -> trivialCode rep False XOR x y
1710
1711       MO_Shl rep   -> trivialCode rep False SLW x y
1712       MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
1713       MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
1714
1715 getRegister (CmmLit (CmmInt i rep))
1716   | Just imm <- makeImmediate rep True i
1717   = let
1718         code dst = unitOL (LI dst imm)
1719     in
1720         return (Any rep code)
1721
1722 getRegister (CmmLit (CmmFloat f frep)) = do
1723     lbl <- getNewLabelNat
1724     dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
1725     Amode addr addr_code <- getAmode dynRef
1726     let code dst =
1727             LDATA ReadOnlyData  [CmmDataLabel lbl,
1728                                  CmmStaticLit (CmmFloat f frep)]
1729             `consOL` (addr_code `snocOL` LD frep dst addr)
1730     return (Any frep code)
1731
1732 getRegister (CmmLit lit)
1733   = let rep = cmmLitRep lit
1734         imm = litToImm lit
1735         code dst = toOL [
1736               LIS dst (HI imm),
1737               OR dst dst (RIImm (LO imm))
1738           ]
1739     in return (Any rep code)
1740
1741 getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
1742
1743     -- extend?Rep: wrap integer expression of type rep
1744     -- in a conversion to I32
1745 extendSExpr I32 x = x
1746 extendSExpr rep x = CmmMachOp (MO_S_Conv rep I32) [x]
1747 extendUExpr I32 x = x
1748 extendUExpr rep x = CmmMachOp (MO_U_Conv rep I32) [x]
1749
1750 #endif /* powerpc_TARGET_ARCH */
1751
1752
1753 -- -----------------------------------------------------------------------------
1754 --  The 'Amode' type: Memory addressing modes passed up the tree.
1755
1756 data Amode = Amode AddrMode InstrBlock
1757
1758 {-
1759 Now, given a tree (the argument to an CmmLoad) that references memory,
1760 produce a suitable addressing mode.
1761
1762 A Rule of the Game (tm) for Amodes: use of the addr bit must
1763 immediately follow use of the code part, since the code part puts
1764 values in registers which the addr then refers to.  So you can't put
1765 anything in between, lest it overwrite some of those registers.  If
1766 you need to do some other computation between the code part and use of
1767 the addr bit, first store the effective address from the amode in a
1768 temporary, then do the other computation, and then use the temporary:
1769
1770     code
1771     LEA amode, tmp
1772     ... other computation ...
1773     ... (tmp) ...
1774 -}
1775
1776 getAmode :: CmmExpr -> NatM Amode
1777 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
1778
1779 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1780
1781 #if alpha_TARGET_ARCH
1782
1783 getAmode (StPrim IntSubOp [x, StInt i])
1784   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1785     getRegister x               `thenNat` \ register ->
1786     let
1787         code = registerCode register tmp
1788         reg  = registerName register tmp
1789         off  = ImmInt (-(fromInteger i))
1790     in
1791     return (Amode (AddrRegImm reg off) code)
1792
1793 getAmode (StPrim IntAddOp [x, StInt i])
1794   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1795     getRegister x               `thenNat` \ register ->
1796     let
1797         code = registerCode register tmp
1798         reg  = registerName register tmp
1799         off  = ImmInt (fromInteger i)
1800     in
1801     return (Amode (AddrRegImm reg off) code)
1802
1803 getAmode leaf
1804   | isJust imm
1805   = return (Amode (AddrImm imm__2) id)
1806   where
1807     imm = maybeImm leaf
1808     imm__2 = case imm of Just x -> x
1809
1810 getAmode other
1811   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1812     getRegister other           `thenNat` \ register ->
1813     let
1814         code = registerCode register tmp
1815         reg  = registerName register tmp
1816     in
1817     return (Amode (AddrReg reg) code)
1818
1819 #endif /* alpha_TARGET_ARCH */
1820
1821 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1822
1823 #if x86_64_TARGET_ARCH
1824
1825 getAmode (CmmMachOp (MO_Add I64) [CmmReg (CmmGlobal PicBaseReg),
1826                                      CmmLit displacement])
1827     = return $ Amode (ripRel (litToImm displacement)) nilOL
1828
1829 #endif
1830
1831 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1832
1833 -- This is all just ridiculous, since it carefully undoes
1834 -- what mangleIndexTree has just done.
1835 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
1836   | not (is64BitLit lit)
1837   -- ASSERT(rep == I32)???
1838   = do (x_reg, x_code) <- getSomeReg x
1839        let off = ImmInt (-(fromInteger i))
1840        return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1841
1842 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
1843   | not (is64BitLit lit)
1844   -- ASSERT(rep == I32)???
1845   = do (x_reg, x_code) <- getSomeReg x
1846        let off = ImmInt (fromInteger i)
1847        return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1848
1849 -- Turn (lit1 << n  + lit2) into  (lit2 + lit1 << n) so it will be
1850 -- recognised by the next rule.
1851 getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1852                                   b@(CmmLit _)])
1853   = getAmode (CmmMachOp (MO_Add rep) [b,a])
1854
1855 getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
1856                                         [y, CmmLit (CmmInt shift _)]])
1857   | shift == 0 || shift == 1 || shift == 2 || shift == 3
1858   = x86_complex_amode x y shift 0
1859
1860 getAmode (CmmMachOp (MO_Add rep)
1861                 [x, CmmMachOp (MO_Add _)
1862                         [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)],
1863                          CmmLit (CmmInt offset _)]])
1864   | shift == 0 || shift == 1 || shift == 2 || shift == 3
1865   && not (is64BitInteger offset)
1866   = x86_complex_amode x y shift offset
1867
1868 getAmode (CmmMachOp (MO_Add rep) [x,y])
1869   = x86_complex_amode x y 0 0
1870
1871 getAmode (CmmLit lit) | not (is64BitLit lit)
1872   = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1873
1874 getAmode expr = do
1875   (reg,code) <- getSomeReg expr
1876   return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
1877
1878
1879 x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
1880 x86_complex_amode base index shift offset
1881   = do (x_reg, x_code) <- getNonClobberedReg base
1882         -- x must be in a temp, because it has to stay live over y_code
1883         -- we could compre x_reg and y_reg and do something better here...
1884        (y_reg, y_code) <- getSomeReg index
1885        let
1886            code = x_code `appOL` y_code
1887            base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
1888        return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset)))
1889                code)
1890
1891 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1892
1893 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1894
1895 #if sparc_TARGET_ARCH
1896
1897 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)])
1898   | fits13Bits (-i)
1899   = do
1900        (reg, code) <- getSomeReg x
1901        let
1902          off  = ImmInt (-(fromInteger i))
1903        return (Amode (AddrRegImm reg off) code)
1904
1905
1906 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)])
1907   | fits13Bits i
1908   = do
1909        (reg, code) <- getSomeReg x
1910        let
1911          off  = ImmInt (fromInteger i)
1912        return (Amode (AddrRegImm reg off) code)
1913
1914 getAmode (CmmMachOp (MO_Add rep) [x, y])
1915   = do
1916     (regX, codeX) <- getSomeReg x
1917     (regY, codeY) <- getSomeReg y
1918     let
1919         code = codeX `appOL` codeY
1920     return (Amode (AddrRegReg regX regY) code)
1921
1922 -- XXX Is this same as "leaf" in Stix?
1923 getAmode (CmmLit lit)
1924   = do
1925       tmp <- getNewRegNat I32
1926       let
1927         code = unitOL (SETHI (HI imm__2) tmp)
1928       return (Amode (AddrRegImm tmp (LO imm__2)) code)
1929       where
1930          imm__2 = litToImm lit
1931
1932 getAmode other
1933   = do
1934        (reg, code) <- getSomeReg other
1935        let
1936             off  = ImmInt 0
1937        return (Amode (AddrRegImm reg off) code)
1938
1939 #endif /* sparc_TARGET_ARCH */
1940
1941 #ifdef powerpc_TARGET_ARCH
1942 getAmode (CmmMachOp (MO_Sub I32) [x, CmmLit (CmmInt i _)])
1943   | Just off <- makeImmediate I32 True (-i)
1944   = do
1945         (reg, code) <- getSomeReg x
1946         return (Amode (AddrRegImm reg off) code)
1947
1948
1949 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit (CmmInt i _)])
1950   | Just off <- makeImmediate I32 True i
1951   = do
1952         (reg, code) <- getSomeReg x
1953         return (Amode (AddrRegImm reg off) code)
1954
1955    -- optimize addition with 32-bit immediate
1956    -- (needed for PIC)
1957 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit lit])
1958   = do
1959         tmp <- getNewRegNat I32
1960         (src, srcCode) <- getSomeReg x
1961         let imm = litToImm lit
1962             code = srcCode `snocOL` ADDIS tmp src (HA imm)
1963         return (Amode (AddrRegImm tmp (LO imm)) code)
1964
1965 getAmode (CmmLit lit)
1966   = do
1967         tmp <- getNewRegNat I32
1968         let imm = litToImm lit
1969             code = unitOL (LIS tmp (HA imm))
1970         return (Amode (AddrRegImm tmp (LO imm)) code)
1971
1972 getAmode (CmmMachOp (MO_Add I32) [x, y])
1973   = do
1974         (regX, codeX) <- getSomeReg x
1975         (regY, codeY) <- getSomeReg y
1976         return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
1977
1978 getAmode other
1979   = do
1980         (reg, code) <- getSomeReg other
1981         let
1982             off  = ImmInt 0
1983         return (Amode (AddrRegImm reg off) code)
1984 #endif /* powerpc_TARGET_ARCH */
1985
1986 -- -----------------------------------------------------------------------------
1987 -- getOperand: sometimes any operand will do.
1988
1989 -- getNonClobberedOperand: the value of the operand will remain valid across
1990 -- the computation of an arbitrary expression, unless the expression
1991 -- is computed directly into a register which the operand refers to
1992 -- (see trivialCode where this function is used for an example).
1993
1994 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1995
1996 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1997 #if x86_64_TARGET_ARCH
1998 getNonClobberedOperand (CmmLit lit)
1999   | isSuitableFloatingPointLit lit = do
2000     lbl <- getNewLabelNat
2001     let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
2002                                            CmmStaticLit lit])
2003     return (OpAddr (ripRel (ImmCLbl lbl)), code)
2004 #endif
2005 getNonClobberedOperand (CmmLit lit)
2006   | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) =
2007     return (OpImm (litToImm lit), nilOL)
2008 getNonClobberedOperand (CmmLoad mem pk)
2009   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2010     Amode src mem_code <- getAmode mem
2011     (src',save_code) <-
2012         if (amodeCouldBeClobbered src)
2013                 then do
2014                    tmp <- getNewRegNat wordRep
2015                    return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
2016                            unitOL (LEA I32 (OpAddr src) (OpReg tmp)))
2017                 else
2018                    return (src, nilOL)
2019     return (OpAddr src', save_code `appOL` mem_code)
2020 getNonClobberedOperand e = do
2021     (reg, code) <- getNonClobberedReg e
2022     return (OpReg reg, code)
2023
2024 amodeCouldBeClobbered :: AddrMode -> Bool
2025 amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
2026
2027 regClobbered (RealReg rr) = isFastTrue (freeReg rr)
2028 regClobbered _ = False
2029
2030 -- getOperand: the operand is not required to remain valid across the
2031 -- computation of an arbitrary expression.
2032 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2033 #if x86_64_TARGET_ARCH
2034 getOperand (CmmLit lit)
2035   | isSuitableFloatingPointLit lit = do
2036     lbl <- getNewLabelNat
2037     let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
2038                                            CmmStaticLit lit])
2039     return (OpAddr (ripRel (ImmCLbl lbl)), code)
2040 #endif
2041 getOperand (CmmLit lit)
2042   | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) = do
2043     return (OpImm (litToImm lit), nilOL)
2044 getOperand (CmmLoad mem pk)
2045   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2046     Amode src mem_code <- getAmode mem
2047     return (OpAddr src, mem_code)
2048 getOperand e = do
2049     (reg, code) <- getSomeReg e
2050     return (OpReg reg, code)
2051
2052 isOperand :: CmmExpr -> Bool
2053 isOperand (CmmLoad _ _) = True
2054 isOperand (CmmLit lit)  = not (is64BitLit lit)
2055                           || isSuitableFloatingPointLit lit
2056 isOperand _             = False
2057
2058 -- if we want a floating-point literal as an operand, we can
2059 -- use it directly from memory.  However, if the literal is
2060 -- zero, we're better off generating it into a register using
2061 -- xor.
2062 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
2063 isSuitableFloatingPointLit _ = False
2064
2065 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
2066 getRegOrMem (CmmLoad mem pk)
2067   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2068     Amode src mem_code <- getAmode mem
2069     return (OpAddr src, mem_code)
2070 getRegOrMem e = do
2071     (reg, code) <- getNonClobberedReg e
2072     return (OpReg reg, code)
2073
2074 #if x86_64_TARGET_ARCH
2075 is64BitLit (CmmInt i I64) = is64BitInteger i
2076    -- assume that labels are in the range 0-2^31-1: this assumes the
2077    -- small memory model (see gcc docs, -mcmodel=small).
2078 #endif
2079 is64BitLit x = False
2080 #endif
2081
2082 is64BitInteger :: Integer -> Bool
2083 is64BitInteger i = i64 > 0x7fffffff || i64 < -0x80000000
2084   where i64 = fromIntegral i :: Int64
2085   -- a CmmInt is intended to be truncated to the appropriate
2086   -- number of bits, so here we truncate it to Int64.  This is
2087   -- important because e.g. -1 as a CmmInt might be either
2088   -- -1 or 18446744073709551615.
2089
2090 -- -----------------------------------------------------------------------------
2091 --  The 'CondCode' type:  Condition codes passed up the tree.
2092
2093 data CondCode = CondCode Bool Cond InstrBlock
2094
2095 -- Set up a condition code for a conditional branch.
2096
2097 getCondCode :: CmmExpr -> NatM CondCode
2098
2099 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2100
2101 #if alpha_TARGET_ARCH
2102 getCondCode = panic "MachCode.getCondCode: not on Alphas"
2103 #endif /* alpha_TARGET_ARCH */
2104
2105 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2106
2107 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
2108 -- yes, they really do seem to want exactly the same!
2109
2110 getCondCode (CmmMachOp mop [x, y])
2111   =
2112     case mop of
2113       MO_Eq F32 -> condFltCode EQQ x y
2114       MO_Ne F32 -> condFltCode NE  x y
2115
2116       MO_S_Gt F32 -> condFltCode GTT x y
2117       MO_S_Ge F32 -> condFltCode GE  x y
2118       MO_S_Lt F32 -> condFltCode LTT x y
2119       MO_S_Le F32 -> condFltCode LE  x y
2120
2121       MO_Eq F64 -> condFltCode EQQ x y
2122       MO_Ne F64 -> condFltCode NE  x y
2123
2124       MO_S_Gt F64 -> condFltCode GTT x y
2125       MO_S_Ge F64 -> condFltCode GE  x y
2126       MO_S_Lt F64 -> condFltCode LTT x y
2127       MO_S_Le F64 -> condFltCode LE  x y
2128
2129       MO_Eq rep -> condIntCode EQQ  x y
2130       MO_Ne rep -> condIntCode NE   x y
2131
2132       MO_S_Gt rep -> condIntCode GTT  x y
2133       MO_S_Ge rep -> condIntCode GE   x y
2134       MO_S_Lt rep -> condIntCode LTT  x y
2135       MO_S_Le rep -> condIntCode LE   x y
2136
2137       MO_U_Gt rep -> condIntCode GU   x y
2138       MO_U_Ge rep -> condIntCode GEU  x y
2139       MO_U_Lt rep -> condIntCode LU   x y
2140       MO_U_Le rep -> condIntCode LEU  x y
2141
2142       other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y]))
2143
2144 getCondCode other =  pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
2145
2146 #elif powerpc_TARGET_ARCH
2147
2148 -- almost the same as everywhere else - but we need to
2149 -- extend small integers to 32 bit first
2150
2151 getCondCode (CmmMachOp mop [x, y])
2152   = case mop of
2153       MO_Eq F32 -> condFltCode EQQ x y
2154       MO_Ne F32 -> condFltCode NE  x y
2155
2156       MO_S_Gt F32 -> condFltCode GTT x y
2157       MO_S_Ge F32 -> condFltCode GE  x y
2158       MO_S_Lt F32 -> condFltCode LTT x y
2159       MO_S_Le F32 -> condFltCode LE  x y
2160
2161       MO_Eq F64 -> condFltCode EQQ x y
2162       MO_Ne F64 -> condFltCode NE  x y
2163
2164       MO_S_Gt F64 -> condFltCode GTT x y
2165       MO_S_Ge F64 -> condFltCode GE  x y
2166       MO_S_Lt F64 -> condFltCode LTT x y
2167       MO_S_Le F64 -> condFltCode LE  x y
2168
2169       MO_Eq rep -> condIntCode EQQ  (extendUExpr rep x) (extendUExpr rep y)
2170       MO_Ne rep -> condIntCode NE   (extendUExpr rep x) (extendUExpr rep y)
2171
2172       MO_S_Gt rep -> condIntCode GTT  (extendSExpr rep x) (extendSExpr rep y)
2173       MO_S_Ge rep -> condIntCode GE   (extendSExpr rep x) (extendSExpr rep y)
2174       MO_S_Lt rep -> condIntCode LTT  (extendSExpr rep x) (extendSExpr rep y)
2175       MO_S_Le rep -> condIntCode LE   (extendSExpr rep x) (extendSExpr rep y)
2176
2177       MO_U_Gt rep -> condIntCode GU   (extendUExpr rep x) (extendUExpr rep y)
2178       MO_U_Ge rep -> condIntCode GEU  (extendUExpr rep x) (extendUExpr rep y)
2179       MO_U_Lt rep -> condIntCode LU   (extendUExpr rep x) (extendUExpr rep y)
2180       MO_U_Le rep -> condIntCode LEU  (extendUExpr rep x) (extendUExpr rep y)
2181
2182       other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
2183
2184 getCondCode other =  panic "getCondCode(2)(powerpc)"
2185
2186
2187 #endif
2188
2189
2190 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
2191 -- passed back up the tree.
2192
2193 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
2194
2195 #if alpha_TARGET_ARCH
2196 condIntCode = panic "MachCode.condIntCode: not on Alphas"
2197 condFltCode = panic "MachCode.condFltCode: not on Alphas"
2198 #endif /* alpha_TARGET_ARCH */
2199
2200 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2201 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2202
2203 -- memory vs immediate
2204 condIntCode cond (CmmLoad x pk) (CmmLit lit) | not (is64BitLit lit) = do
2205     Amode x_addr x_code <- getAmode x
2206     let
2207         imm  = litToImm lit
2208         code = x_code `snocOL`
2209                   CMP pk (OpImm imm) (OpAddr x_addr)
2210     --
2211     return (CondCode False cond code)
2212
2213 -- anything vs zero
2214 condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
2215     (x_reg, x_code) <- getSomeReg x
2216     let
2217         code = x_code `snocOL`
2218                   TEST pk (OpReg x_reg) (OpReg x_reg)
2219     --
2220     return (CondCode False cond code)
2221
2222 -- anything vs operand
2223 condIntCode cond x y | isOperand y = do
2224     (x_reg, x_code) <- getNonClobberedReg x
2225     (y_op,  y_code) <- getOperand y
2226     let
2227         code = x_code `appOL` y_code `snocOL`
2228                   CMP (cmmExprRep x) y_op (OpReg x_reg)
2229     -- in
2230     return (CondCode False cond code)
2231
2232 -- anything vs anything
2233 condIntCode cond x y = do
2234   (y_reg, y_code) <- getNonClobberedReg y
2235   (x_op, x_code) <- getRegOrMem x
2236   let
2237         code = y_code `appOL`
2238                x_code `snocOL`
2239                   CMP (cmmExprRep x) (OpReg y_reg) x_op
2240   -- in
2241   return (CondCode False cond code)
2242 #endif
2243
2244 #if i386_TARGET_ARCH
2245 condFltCode cond x y
2246   = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
2247   (x_reg, x_code) <- getNonClobberedReg x
2248   (y_reg, y_code) <- getSomeReg y
2249   let
2250         code = x_code `appOL` y_code `snocOL`
2251                 GCMP cond x_reg y_reg
2252   -- The GCMP insn does the test and sets the zero flag if comparable
2253   -- and true.  Hence we always supply EQQ as the condition to test.
2254   return (CondCode True EQQ code)
2255 #endif /* i386_TARGET_ARCH */
2256
2257 #if x86_64_TARGET_ARCH
2258 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
2259 -- an operand, but the right must be a reg.  We can probably do better
2260 -- than this general case...
2261 condFltCode cond x y = do
2262   (x_reg, x_code) <- getNonClobberedReg x
2263   (y_op, y_code) <- getOperand y
2264   let
2265         code = x_code `appOL`
2266                y_code `snocOL`
2267                   CMP (cmmExprRep x) y_op (OpReg x_reg)
2268         -- NB(1): we need to use the unsigned comparison operators on the
2269         -- result of this comparison.
2270   -- in
2271   return (CondCode True (condToUnsigned cond) code)
2272 #endif
2273
2274 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2275
2276 #if sparc_TARGET_ARCH
2277
2278 condIntCode cond x (CmmLit (CmmInt y rep))
2279   | fits13Bits y
2280   = do
2281        (src1, code) <- getSomeReg x
2282        let
2283            src2 = ImmInt (fromInteger y)
2284            code' = code `snocOL` SUB False True src1 (RIImm src2) g0
2285        return (CondCode False cond code')
2286
2287 condIntCode cond x y = do
2288     (src1, code1) <- getSomeReg x
2289     (src2, code2) <- getSomeReg y
2290     let
2291         code__2 = code1 `appOL` code2 `snocOL`
2292                   SUB False True src1 (RIReg src2) g0
2293     return (CondCode False cond code__2)
2294
2295 -----------
2296 condFltCode cond x y = do
2297     (src1, code1) <- getSomeReg x
2298     (src2, code2) <- getSomeReg y
2299     tmp <- getNewRegNat F64
2300     let
2301         promote x = FxTOy F32 F64 x tmp
2302
2303         pk1   = cmmExprRep x
2304         pk2   = cmmExprRep y
2305
2306         code__2 =
2307                 if pk1 == pk2 then
2308                     code1 `appOL` code2 `snocOL`
2309                     FCMP True pk1 src1 src2
2310                 else if pk1 == F32 then
2311                     code1 `snocOL` promote src1 `appOL` code2 `snocOL`
2312                     FCMP True F64 tmp src2
2313                 else
2314                     code1 `appOL` code2 `snocOL` promote src2 `snocOL`
2315                     FCMP True F64 src1 tmp
2316     return (CondCode True cond code__2)
2317
2318 #endif /* sparc_TARGET_ARCH */
2319
2320 #if powerpc_TARGET_ARCH
2321 --  ###FIXME: I16 and I8!
2322 condIntCode cond x (CmmLit (CmmInt y rep))
2323   | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
2324   = do
2325         (src1, code) <- getSomeReg x
2326         let
2327             code' = code `snocOL`
2328                 (if condUnsigned cond then CMPL else CMP) I32 src1 (RIImm src2)
2329         return (CondCode False cond code')
2330
2331 condIntCode cond x y = do
2332     (src1, code1) <- getSomeReg x
2333     (src2, code2) <- getSomeReg y
2334     let
2335         code' = code1 `appOL` code2 `snocOL`
2336                   (if condUnsigned cond then CMPL else CMP) I32 src1 (RIReg src2)
2337     return (CondCode False cond code')
2338
2339 condFltCode cond x y = do
2340     (src1, code1) <- getSomeReg x
2341     (src2, code2) <- getSomeReg y
2342     let
2343         code'  = code1 `appOL` code2 `snocOL` FCMP src1 src2
2344         code'' = case cond of -- twiddle CR to handle unordered case
2345                     GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
2346                     LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
2347                     _ -> code'
2348                  where
2349                     ltbit = 0 ; eqbit = 2 ; gtbit = 1
2350     return (CondCode True cond code'')
2351
2352 #endif /* powerpc_TARGET_ARCH */
2353
2354 -- -----------------------------------------------------------------------------
2355 -- Generating assignments
2356
2357 -- Assignments are really at the heart of the whole code generation
2358 -- business.  Almost all top-level nodes of any real importance are
2359 -- assignments, which correspond to loads, stores, or register
2360 -- transfers.  If we're really lucky, some of the register transfers
2361 -- will go away, because we can use the destination register to
2362 -- complete the code generation for the right hand side.  This only
2363 -- fails when the right hand side is forced into a fixed register
2364 -- (e.g. the result of a call).
2365
2366 assignMem_IntCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2367 assignReg_IntCode :: MachRep -> CmmReg  -> CmmExpr -> NatM InstrBlock
2368
2369 assignMem_FltCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2370 assignReg_FltCode :: MachRep -> CmmReg  -> CmmExpr -> NatM InstrBlock
2371
2372 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2373
2374 #if alpha_TARGET_ARCH
2375
2376 assignIntCode pk (CmmLoad dst _) src
2377   = getNewRegNat IntRep             `thenNat` \ tmp ->
2378     getAmode dst                    `thenNat` \ amode ->
2379     getRegister src                 `thenNat` \ register ->
2380     let
2381         code1   = amodeCode amode []
2382         dst__2  = amodeAddr amode
2383         code2   = registerCode register tmp []
2384         src__2  = registerName register tmp
2385         sz      = primRepToSize pk
2386         code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2387     in
2388     return code__2
2389
2390 assignIntCode pk dst src
2391   = getRegister dst                         `thenNat` \ register1 ->
2392     getRegister src                         `thenNat` \ register2 ->
2393     let
2394         dst__2  = registerName register1 zeroh
2395         code    = registerCode register2 dst__2
2396         src__2  = registerName register2 dst__2
2397         code__2 = if isFixed register2
2398                   then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2)
2399                   else code
2400     in
2401     return code__2
2402
2403 #endif /* alpha_TARGET_ARCH */
2404
2405 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2406
2407 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2408
2409 -- integer assignment to memory
2410
2411 -- specific case of adding/subtracting an integer to a particular address.
2412 -- ToDo: catch other cases where we can use an operation directly on a memory
2413 -- address.
2414 assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _,
2415                                                  CmmLit (CmmInt i _)])
2416    | addr == addr2, pk /= I64 || not (is64BitInteger i),
2417      Just instr <- check op
2418    = do Amode amode code_addr <- getAmode addr
2419         let code = code_addr `snocOL`
2420                    instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode)
2421         return code
2422    where
2423         check (MO_Add _) = Just ADD
2424         check (MO_Sub _) = Just SUB
2425         check _ = Nothing
2426         -- ToDo: more?
2427
2428 -- general case
2429 assignMem_IntCode pk addr src = do
2430     Amode addr code_addr <- getAmode addr
2431     (code_src, op_src)   <- get_op_RI src
2432     let
2433         code = code_src `appOL`
2434                code_addr `snocOL`
2435                   MOV pk op_src (OpAddr addr)
2436         -- NOTE: op_src is stable, so it will still be valid
2437         -- after code_addr.  This may involve the introduction
2438         -- of an extra MOV to a temporary register, but we hope
2439         -- the register allocator will get rid of it.
2440     --
2441     return code
2442   where
2443     get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand)   -- code, operator
2444     get_op_RI (CmmLit lit) | not (is64BitLit lit)
2445       = return (nilOL, OpImm (litToImm lit))
2446     get_op_RI op
2447       = do (reg,code) <- getNonClobberedReg op
2448            return (code, OpReg reg)
2449
2450
2451 -- Assign; dst is a reg, rhs is mem
2452 assignReg_IntCode pk reg (CmmLoad src _) = do
2453   load_code <- intLoadCode (MOV pk) src
2454   return (load_code (getRegisterReg reg))
2455
2456 -- dst is a reg, but src could be anything
2457 assignReg_IntCode pk reg src = do
2458   code <- getAnyReg src
2459   return (code (getRegisterReg reg))
2460
2461 #endif /* i386_TARGET_ARCH */
2462
2463 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2464
2465 #if sparc_TARGET_ARCH
2466
2467 assignMem_IntCode pk addr src = do
2468     (srcReg, code) <- getSomeReg src
2469     Amode dstAddr addr_code <- getAmode addr
2470     return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2471
2472 assignReg_IntCode pk reg src = do
2473     r <- getRegister src
2474     return $ case r of
2475         Any _ code         -> code dst
2476         Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg dst) freg
2477     where
2478       dst = getRegisterReg reg
2479
2480
2481 #endif /* sparc_TARGET_ARCH */
2482
2483 #if powerpc_TARGET_ARCH
2484
2485 assignMem_IntCode pk addr src = do
2486     (srcReg, code) <- getSomeReg src
2487     Amode dstAddr addr_code <- getAmode addr
2488     return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2489
2490 -- dst is a reg, but src could be anything
2491 assignReg_IntCode pk reg src
2492     = do
2493         r <- getRegister src
2494         return $ case r of
2495             Any _ code         -> code dst
2496             Fixed _ freg fcode -> fcode `snocOL` MR dst freg
2497     where
2498         dst = getRegisterReg reg
2499
2500 #endif /* powerpc_TARGET_ARCH */
2501
2502
2503 -- -----------------------------------------------------------------------------
2504 -- Floating-point assignments
2505
2506 #if alpha_TARGET_ARCH
2507
2508 assignFltCode pk (CmmLoad dst _) src
2509   = getNewRegNat pk                 `thenNat` \ tmp ->
2510     getAmode dst                    `thenNat` \ amode ->
2511     getRegister src                         `thenNat` \ register ->
2512     let
2513         code1   = amodeCode amode []
2514         dst__2  = amodeAddr amode
2515         code2   = registerCode register tmp []
2516         src__2  = registerName register tmp
2517         sz      = primRepToSize pk
2518         code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2519     in
2520     return code__2
2521
2522 assignFltCode pk dst src
2523   = getRegister dst                         `thenNat` \ register1 ->
2524     getRegister src                         `thenNat` \ register2 ->
2525     let
2526         dst__2  = registerName register1 zeroh
2527         code    = registerCode register2 dst__2
2528         src__2  = registerName register2 dst__2
2529         code__2 = if isFixed register2
2530                   then code . mkSeqInstr (FMOV src__2 dst__2)
2531                   else code
2532     in
2533     return code__2
2534
2535 #endif /* alpha_TARGET_ARCH */
2536
2537 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2538
2539 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2540
2541 -- Floating point assignment to memory
2542 assignMem_FltCode pk addr src = do
2543   (src_reg, src_code) <- getNonClobberedReg src
2544   Amode addr addr_code <- getAmode addr
2545   let
2546         code = src_code `appOL`
2547                addr_code `snocOL`
2548                 IF_ARCH_i386(GST pk src_reg addr,
2549                              MOV pk (OpReg src_reg) (OpAddr addr))
2550   return code
2551
2552 -- Floating point assignment to a register/temporary
2553 assignReg_FltCode pk reg src = do
2554   src_code <- getAnyReg src
2555   return (src_code (getRegisterReg reg))
2556
2557 #endif /* i386_TARGET_ARCH */
2558
2559 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2560
2561 #if sparc_TARGET_ARCH
2562
2563 -- Floating point assignment to memory
2564 assignMem_FltCode pk addr src = do
2565     Amode dst__2 code1 <- getAmode addr
2566     (src__2, code2) <- getSomeReg src
2567     tmp1 <- getNewRegNat pk
2568     let
2569         pk__2   = cmmExprRep src
2570         code__2 = code1 `appOL` code2 `appOL`
2571             if   pk == pk__2
2572             then unitOL (ST pk src__2 dst__2)
2573             else toOL [FxTOy pk__2 pk src__2 tmp1, ST pk tmp1 dst__2]
2574     return code__2
2575
2576 -- Floating point assignment to a register/temporary
2577 -- ToDo: Verify correctness
2578 assignReg_FltCode pk reg src = do
2579     r <- getRegister src
2580     v1 <- getNewRegNat pk
2581     return $ case r of
2582         Any _ code         -> code dst
2583         Fixed _ freg fcode -> fcode `snocOL` FMOV pk freg v1
2584     where
2585       dst = getRegisterReg reg
2586
2587 #endif /* sparc_TARGET_ARCH */
2588
2589 #if powerpc_TARGET_ARCH
2590
2591 -- Easy, isn't it?
2592 assignMem_FltCode = assignMem_IntCode
2593 assignReg_FltCode = assignReg_IntCode
2594
2595 #endif /* powerpc_TARGET_ARCH */
2596
2597
2598 -- -----------------------------------------------------------------------------
2599 -- Generating an non-local jump
2600
2601 -- (If applicable) Do not fill the delay slots here; you will confuse the
2602 -- register allocator.
2603
2604 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
2605
2606 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2607
2608 #if alpha_TARGET_ARCH
2609
2610 genJump (CmmLabel lbl)
2611   | isAsmTemp lbl = returnInstr (BR target)
2612   | otherwise     = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0]
2613   where
2614     target = ImmCLbl lbl
2615
2616 genJump tree
2617   = getRegister tree                `thenNat` \ register ->
2618     getNewRegNat PtrRep             `thenNat` \ tmp ->
2619     let
2620         dst    = registerName register pv
2621         code   = registerCode register pv
2622         target = registerName register pv
2623     in
2624     if isFixed register then
2625         returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0]
2626     else
2627     return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0))
2628
2629 #endif /* alpha_TARGET_ARCH */
2630
2631 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2632
2633 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2634
2635 genJump (CmmLoad mem pk) = do
2636   Amode target code <- getAmode mem
2637   return (code `snocOL` JMP (OpAddr target))
2638
2639 genJump (CmmLit lit) = do
2640   return (unitOL (JMP (OpImm (litToImm lit))))
2641
2642 genJump expr = do
2643   (reg,code) <- getSomeReg expr
2644   return (code `snocOL` JMP (OpReg reg))
2645
2646 #endif /* i386_TARGET_ARCH */
2647
2648 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2649
2650 #if sparc_TARGET_ARCH
2651
2652 genJump (CmmLit (CmmLabel lbl))
2653   = return (toOL [CALL (Left target) 0 True, NOP])
2654   where
2655     target = ImmCLbl lbl
2656
2657 genJump tree
2658   = do
2659         (target, code) <- getSomeReg tree
2660         return (code `snocOL` JMP (AddrRegReg target g0)  `snocOL` NOP)
2661
2662 #endif /* sparc_TARGET_ARCH */
2663
2664 #if powerpc_TARGET_ARCH
2665 genJump (CmmLit (CmmLabel lbl))
2666   = return (unitOL $ JMP lbl)
2667
2668 genJump tree
2669   = do
2670         (target,code) <- getSomeReg tree
2671         return (code `snocOL` MTCTR target `snocOL` BCTR [])
2672 #endif /* powerpc_TARGET_ARCH */
2673
2674
2675 -- -----------------------------------------------------------------------------
2676 --  Unconditional branches
2677
2678 genBranch :: BlockId -> NatM InstrBlock
2679
2680 genBranch = return . toOL . mkBranchInstr
2681
2682 -- -----------------------------------------------------------------------------
2683 --  Conditional jumps
2684
2685 {-
2686 Conditional jumps are always to local labels, so we can use branch
2687 instructions.  We peek at the arguments to decide what kind of
2688 comparison to do.
2689
2690 ALPHA: For comparisons with 0, we're laughing, because we can just do
2691 the desired conditional branch.
2692
2693 I386: First, we have to ensure that the condition
2694 codes are set according to the supplied comparison operation.
2695
2696 SPARC: First, we have to ensure that the condition codes are set
2697 according to the supplied comparison operation.  We generate slightly
2698 different code for floating point comparisons, because a floating
2699 point operation cannot directly precede a @BF@.  We assume the worst
2700 and fill that slot with a @NOP@.
2701
2702 SPARC: Do not fill the delay slots here; you will confuse the register
2703 allocator.
2704 -}
2705
2706
2707 genCondJump
2708     :: BlockId      -- the branch target
2709     -> CmmExpr      -- the condition on which to branch
2710     -> NatM InstrBlock
2711
2712 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2713
2714 #if alpha_TARGET_ARCH
2715
2716 genCondJump id (StPrim op [x, StInt 0])
2717   = getRegister x                           `thenNat` \ register ->
2718     getNewRegNat (registerRep register)
2719                                     `thenNat` \ tmp ->
2720     let
2721         code   = registerCode register tmp
2722         value  = registerName register tmp
2723         pk     = registerRep register
2724         target = ImmCLbl lbl
2725     in
2726     returnSeq code [BI (cmpOp op) value target]
2727   where
2728     cmpOp CharGtOp = GTT
2729     cmpOp CharGeOp = GE
2730     cmpOp CharEqOp = EQQ
2731     cmpOp CharNeOp = NE
2732     cmpOp CharLtOp = LTT
2733     cmpOp CharLeOp = LE
2734     cmpOp IntGtOp = GTT
2735     cmpOp IntGeOp = GE
2736     cmpOp IntEqOp = EQQ
2737     cmpOp IntNeOp = NE
2738     cmpOp IntLtOp = LTT
2739     cmpOp IntLeOp = LE
2740     cmpOp WordGtOp = NE
2741     cmpOp WordGeOp = ALWAYS
2742     cmpOp WordEqOp = EQQ
2743     cmpOp WordNeOp = NE
2744     cmpOp WordLtOp = NEVER
2745     cmpOp WordLeOp = EQQ
2746     cmpOp AddrGtOp = NE
2747     cmpOp AddrGeOp = ALWAYS
2748     cmpOp AddrEqOp = EQQ
2749     cmpOp AddrNeOp = NE
2750     cmpOp AddrLtOp = NEVER
2751     cmpOp AddrLeOp = EQQ
2752
2753 genCondJump lbl (StPrim op [x, StDouble 0.0])
2754   = getRegister x                           `thenNat` \ register ->
2755     getNewRegNat (registerRep register)
2756                                     `thenNat` \ tmp ->
2757     let
2758         code   = registerCode register tmp
2759         value  = registerName register tmp
2760         pk     = registerRep register
2761         target = ImmCLbl lbl
2762     in
2763     return (code . mkSeqInstr (BF (cmpOp op) value target))
2764   where
2765     cmpOp FloatGtOp = GTT
2766     cmpOp FloatGeOp = GE
2767     cmpOp FloatEqOp = EQQ
2768     cmpOp FloatNeOp = NE
2769     cmpOp FloatLtOp = LTT
2770     cmpOp FloatLeOp = LE
2771     cmpOp DoubleGtOp = GTT
2772     cmpOp DoubleGeOp = GE
2773     cmpOp DoubleEqOp = EQQ
2774     cmpOp DoubleNeOp = NE
2775     cmpOp DoubleLtOp = LTT
2776     cmpOp DoubleLeOp = LE
2777
2778 genCondJump lbl (StPrim op [x, y])
2779   | fltCmpOp op
2780   = trivialFCode pr instr x y       `thenNat` \ register ->
2781     getNewRegNat F64                `thenNat` \ tmp ->
2782     let
2783         code   = registerCode register tmp
2784         result = registerName register tmp
2785         target = ImmCLbl lbl
2786     in
2787     return (code . mkSeqInstr (BF cond result target))
2788   where
2789     pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
2790
2791     fltCmpOp op = case op of
2792         FloatGtOp -> True
2793         FloatGeOp -> True
2794         FloatEqOp -> True
2795         FloatNeOp -> True
2796         FloatLtOp -> True
2797         FloatLeOp -> True
2798         DoubleGtOp -> True
2799         DoubleGeOp -> True
2800         DoubleEqOp -> True
2801         DoubleNeOp -> True
2802         DoubleLtOp -> True
2803         DoubleLeOp -> True
2804         _ -> False
2805     (instr, cond) = case op of
2806         FloatGtOp -> (FCMP TF LE, EQQ)
2807         FloatGeOp -> (FCMP TF LTT, EQQ)
2808         FloatEqOp -> (FCMP TF EQQ, NE)
2809         FloatNeOp -> (FCMP TF EQQ, EQQ)
2810         FloatLtOp -> (FCMP TF LTT, NE)
2811         FloatLeOp -> (FCMP TF LE, NE)
2812         DoubleGtOp -> (FCMP TF LE, EQQ)
2813         DoubleGeOp -> (FCMP TF LTT, EQQ)
2814         DoubleEqOp -> (FCMP TF EQQ, NE)
2815         DoubleNeOp -> (FCMP TF EQQ, EQQ)
2816         DoubleLtOp -> (FCMP TF LTT, NE)
2817         DoubleLeOp -> (FCMP TF LE, NE)
2818
2819 genCondJump lbl (StPrim op [x, y])
2820   = trivialCode instr x y           `thenNat` \ register ->
2821     getNewRegNat IntRep             `thenNat` \ tmp ->
2822     let
2823         code   = registerCode register tmp
2824         result = registerName register tmp
2825         target = ImmCLbl lbl
2826     in
2827     return (code . mkSeqInstr (BI cond result target))
2828   where
2829     (instr, cond) = case op of
2830         CharGtOp -> (CMP LE, EQQ)
2831         CharGeOp -> (CMP LTT, EQQ)
2832         CharEqOp -> (CMP EQQ, NE)
2833         CharNeOp -> (CMP EQQ, EQQ)
2834         CharLtOp -> (CMP LTT, NE)
2835         CharLeOp -> (CMP LE, NE)
2836         IntGtOp -> (CMP LE, EQQ)
2837         IntGeOp -> (CMP LTT, EQQ)
2838         IntEqOp -> (CMP EQQ, NE)
2839         IntNeOp -> (CMP EQQ, EQQ)
2840         IntLtOp -> (CMP LTT, NE)
2841         IntLeOp -> (CMP LE, NE)
2842         WordGtOp -> (CMP ULE, EQQ)
2843         WordGeOp -> (CMP ULT, EQQ)
2844         WordEqOp -> (CMP EQQ, NE)
2845         WordNeOp -> (CMP EQQ, EQQ)
2846         WordLtOp -> (CMP ULT, NE)
2847         WordLeOp -> (CMP ULE, NE)
2848         AddrGtOp -> (CMP ULE, EQQ)
2849         AddrGeOp -> (CMP ULT, EQQ)
2850         AddrEqOp -> (CMP EQQ, NE)
2851         AddrNeOp -> (CMP EQQ, EQQ)
2852         AddrLtOp -> (CMP ULT, NE)
2853         AddrLeOp -> (CMP ULE, NE)
2854
2855 #endif /* alpha_TARGET_ARCH */
2856
2857 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2858
2859 #if i386_TARGET_ARCH
2860
2861 genCondJump id bool = do
2862   CondCode _ cond code <- getCondCode bool
2863   return (code `snocOL` JXX cond id)
2864
2865 #endif
2866
2867 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2868
2869 #if x86_64_TARGET_ARCH
2870
2871 genCondJump id bool = do
2872   CondCode is_float cond cond_code <- getCondCode bool
2873   if not is_float
2874     then
2875         return (cond_code `snocOL` JXX cond id)
2876     else do
2877         lbl <- getBlockIdNat
2878
2879         -- see comment with condFltReg
2880         let code = case cond of
2881                         NE  -> or_unordered
2882                         GU  -> plain_test
2883                         GEU -> plain_test
2884                         _   -> and_ordered
2885
2886             plain_test = unitOL (
2887                   JXX cond id
2888                 )
2889             or_unordered = toOL [
2890                   JXX cond id,
2891                   JXX PARITY id
2892                 ]
2893             and_ordered = toOL [
2894                   JXX PARITY lbl,
2895                   JXX cond id,
2896                   JXX ALWAYS lbl,
2897                   NEWBLOCK lbl
2898                 ]
2899         return (cond_code `appOL` code)
2900
2901 #endif
2902
2903 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2904
2905 #if sparc_TARGET_ARCH
2906
2907 genCondJump (BlockId id) bool = do
2908   CondCode is_float cond code <- getCondCode bool
2909   return (
2910        code `appOL`
2911        toOL (
2912          if   is_float
2913          then [NOP, BF cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2914          else [BI cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2915        )
2916     )
2917
2918 #endif /* sparc_TARGET_ARCH */
2919
2920
2921 #if powerpc_TARGET_ARCH
2922
2923 genCondJump id bool = do
2924   CondCode is_float cond code <- getCondCode bool
2925   return (code `snocOL` BCC cond id)
2926
2927 #endif /* powerpc_TARGET_ARCH */
2928
2929
2930 -- -----------------------------------------------------------------------------
2931 --  Generating C calls
2932
2933 -- Now the biggest nightmare---calls.  Most of the nastiness is buried in
2934 -- @get_arg@, which moves the arguments to the correct registers/stack
2935 -- locations.  Apart from that, the code is easy.
2936 --
2937 -- (If applicable) Do not fill the delay slots here; you will confuse the
2938 -- register allocator.
2939
2940 genCCall
2941     :: CmmCallTarget            -- function to call
2942     -> CmmHintFormals           -- where to put the result
2943     -> CmmActuals               -- arguments (of mixed type)
2944     -> NatM InstrBlock
2945
2946 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2947
2948 #if alpha_TARGET_ARCH
2949
2950 ccallResultRegs =
2951
2952 genCCall fn cconv result_regs args
2953   = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args
2954                           `thenNat` \ ((unused,_), argCode) ->
2955     let
2956         nRegs = length allArgRegs - length unused
2957         code = asmSeqThen (map ($ []) argCode)
2958     in
2959         returnSeq code [
2960             LDA pv (AddrImm (ImmLab (ptext fn))),
2961             JSR ra (AddrReg pv) nRegs,
2962             LDGP gp (AddrReg ra)]
2963   where
2964     ------------------------
2965     {-  Try to get a value into a specific register (or registers) for
2966         a call.  The first 6 arguments go into the appropriate
2967         argument register (separate registers for integer and floating
2968         point arguments, but used in lock-step), and the remaining
2969         arguments are dumped to the stack, beginning at 0(sp).  Our
2970         first argument is a pair of the list of remaining argument
2971         registers to be assigned for this call and the next stack
2972         offset to use for overflowing arguments.  This way,
2973         @get_Arg@ can be applied to all of a call's arguments using
2974         @mapAccumLNat@.
2975     -}
2976     get_arg
2977         :: ([(Reg,Reg)], Int)   -- Argument registers and stack offset (accumulator)
2978         -> StixTree             -- Current argument
2979         -> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code
2980
2981     -- We have to use up all of our argument registers first...
2982
2983     get_arg ((iDst,fDst):dsts, offset) arg
2984       = getRegister arg                     `thenNat` \ register ->
2985         let
2986             reg  = if isFloatingRep pk then fDst else iDst
2987             code = registerCode register reg
2988             src  = registerName register reg
2989             pk   = registerRep register
2990         in
2991         return (
2992             if isFloatingRep pk then
2993                 ((dsts, offset), if isFixed register then
2994                     code . mkSeqInstr (FMOV src fDst)
2995                     else code)
2996             else
2997                 ((dsts, offset), if isFixed register then
2998                     code . mkSeqInstr (OR src (RIReg src) iDst)
2999                     else code))
3000
3001     -- Once we have run out of argument registers, we move to the
3002     -- stack...
3003
3004     get_arg ([], offset) arg
3005       = getRegister arg                 `thenNat` \ register ->
3006         getNewRegNat (registerRep register)
3007                                         `thenNat` \ tmp ->
3008         let
3009             code = registerCode register tmp
3010             src  = registerName register tmp
3011             pk   = registerRep register
3012             sz   = primRepToSize pk
3013         in
3014         return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset)))
3015
3016 #endif /* alpha_TARGET_ARCH */
3017
3018 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3019
3020 #if i386_TARGET_ARCH
3021
3022 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
3023         -- write barrier compiles to no code on x86/x86-64;
3024         -- we keep it this long in order to prevent earlier optimisations.
3025
3026 -- we only cope with a single result for foreign calls
3027 genCCall (CmmPrim op) [(r,_)] args = do
3028   case op of
3029         MO_F32_Sqrt -> actuallyInlineFloatOp F32  (GSQRT F32) args
3030         MO_F64_Sqrt -> actuallyInlineFloatOp F64 (GSQRT F64) args
3031
3032         MO_F32_Sin  -> actuallyInlineFloatOp F32  (GSIN F32) args
3033         MO_F64_Sin  -> actuallyInlineFloatOp F64 (GSIN F64) args
3034
3035         MO_F32_Cos  -> actuallyInlineFloatOp F32  (GCOS F32) args
3036         MO_F64_Cos  -> actuallyInlineFloatOp F64 (GCOS F64) args
3037
3038         MO_F32_Tan  -> actuallyInlineFloatOp F32  (GTAN F32) args
3039         MO_F64_Tan  -> actuallyInlineFloatOp F64 (GTAN F64) args
3040
3041         other_op    -> outOfLineFloatOp op r args
3042  where
3043   actuallyInlineFloatOp rep instr [(x,_)]
3044         = do res <- trivialUFCode rep instr x
3045              any <- anyReg res
3046              return (any (getRegisterReg (CmmLocal r)))
3047
3048 genCCall target dest_regs args = do
3049     let
3050         sizes               = map (arg_size . cmmExprRep . fst) (reverse args)
3051 #if !darwin_TARGET_OS
3052         tot_arg_size        = sum sizes
3053 #else
3054         raw_arg_size        = sum sizes
3055         tot_arg_size        = roundTo 16 raw_arg_size
3056         arg_pad_size        = tot_arg_size - raw_arg_size
3057     delta0 <- getDeltaNat
3058     setDeltaNat (delta0 - arg_pad_size)
3059 #endif
3060
3061     push_codes <- mapM push_arg (reverse args)
3062     delta <- getDeltaNat
3063
3064     -- in
3065     -- deal with static vs dynamic call targets
3066     (callinsns,cconv) <-
3067       case target of
3068         -- CmmPrim -> ...
3069         CmmForeignCall (CmmLit (CmmLabel lbl)) conv
3070            -> -- ToDo: stdcall arg sizes
3071               return (unitOL (CALL (Left fn_imm) []), conv)
3072            where fn_imm = ImmCLbl lbl
3073         CmmForeignCall expr conv
3074            -> do (dyn_c, dyn_r, dyn_rep) <- get_op expr
3075                  ASSERT(dyn_rep == I32)
3076                   return (dyn_c `snocOL` CALL (Right dyn_r) [], conv)
3077
3078     let push_code
3079 #if darwin_TARGET_OS
3080             | arg_pad_size /= 0
3081             = toOL [SUB I32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
3082                     DELTA (delta0 - arg_pad_size)]
3083               `appOL` concatOL push_codes
3084             | otherwise
3085 #endif
3086             = concatOL push_codes
3087         call = callinsns `appOL`
3088                toOL (
3089                         -- Deallocate parameters after call for ccall;
3090                         -- but not for stdcall (callee does it)
3091                   (if cconv == StdCallConv || tot_arg_size==0 then [] else
3092                    [ADD I32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
3093                   ++
3094                   [DELTA (delta + tot_arg_size)]
3095                )
3096     -- in
3097     setDeltaNat (delta + tot_arg_size)
3098
3099     let
3100         -- assign the results, if necessary
3101         assign_code []     = nilOL
3102         assign_code [(dest,_hint)] =
3103           case rep of
3104                 I64 -> toOL [MOV I32 (OpReg eax) (OpReg r_dest),
3105                              MOV I32 (OpReg edx) (OpReg r_dest_hi)]
3106                 F32 -> unitOL (GMOV fake0 r_dest)
3107                 F64 -> unitOL (GMOV fake0 r_dest)
3108                 rep -> unitOL (MOV rep (OpReg eax) (OpReg r_dest))
3109           where
3110                 r_dest_hi = getHiVRegFromLo r_dest
3111                 rep = localRegRep dest
3112                 r_dest = getRegisterReg (CmmLocal dest)
3113         assign_code many = panic "genCCall.assign_code many"
3114
3115     return (push_code `appOL`
3116             call `appOL`
3117             assign_code dest_regs)
3118
3119   where
3120     arg_size F64 = 8
3121     arg_size F32 = 4
3122     arg_size I64 = 8
3123     arg_size _   = 4
3124
3125     roundTo a x | x `mod` a == 0 = x
3126                 | otherwise = x + a - (x `mod` a)
3127
3128
3129     push_arg :: (CmmExpr,MachHint){-current argument-}
3130                     -> NatM InstrBlock  -- code
3131
3132     push_arg (arg,_hint) -- we don't need the hints on x86
3133       | arg_rep == I64 = do
3134         ChildCode64 code r_lo <- iselExpr64 arg
3135         delta <- getDeltaNat
3136         setDeltaNat (delta - 8)
3137         let
3138             r_hi = getHiVRegFromLo r_lo
3139         -- in
3140         return (       code `appOL`
3141                        toOL [PUSH I32 (OpReg r_hi), DELTA (delta - 4),
3142                              PUSH I32 (OpReg r_lo), DELTA (delta - 8),
3143                              DELTA (delta-8)]
3144             )
3145
3146       | otherwise = do
3147         (code, reg, sz) <- get_op arg
3148         delta <- getDeltaNat
3149         let size = arg_size sz
3150         setDeltaNat (delta-size)
3151         if (case sz of F64 -> True; F32 -> True; _ -> False)
3152            then return (code `appOL`
3153                         toOL [SUB I32 (OpImm (ImmInt size)) (OpReg esp),
3154                               DELTA (delta-size),
3155                               GST sz reg (AddrBaseIndex (EABaseReg esp)
3156                                                         EAIndexNone
3157                                                         (ImmInt 0))]
3158                        )
3159            else return (code `snocOL`
3160                         PUSH I32 (OpReg reg) `snocOL`
3161                         DELTA (delta-size)
3162                        )
3163       where
3164          arg_rep = cmmExprRep arg
3165
3166     ------------
3167     get_op :: CmmExpr -> NatM (InstrBlock, Reg, MachRep) -- code, reg, size
3168     get_op op = do
3169         (reg,code) <- getSomeReg op
3170         return (code, reg, cmmExprRep op)
3171
3172 #endif /* i386_TARGET_ARCH */
3173
3174 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3175
3176 outOfLineFloatOp :: CallishMachOp -> CmmFormal -> CmmActuals
3177   -> NatM InstrBlock
3178 outOfLineFloatOp mop res args
3179   = do
3180       targetExpr <- cmmMakeDynamicReference addImportNat CallReference lbl
3181       let target = CmmForeignCall targetExpr CCallConv
3182
3183       if localRegRep res == F64
3184         then
3185           stmtToInstrs (CmmCall target [(res,FloatHint)] args CmmUnsafe)
3186         else do
3187           uq <- getUniqueNat
3188           let
3189             tmp = LocalReg uq F64 KindNonPtr
3190           -- in
3191           code1 <- stmtToInstrs (CmmCall target [(tmp,FloatHint)] args CmmUnsafe)
3192           code2 <- stmtToInstrs (CmmAssign (CmmLocal res) (CmmReg (CmmLocal tmp)))
3193           return (code1 `appOL` code2)
3194   where
3195         lbl = mkForeignLabel fn Nothing False
3196
3197         fn = case mop of
3198               MO_F32_Sqrt  -> FSLIT("sqrtf")
3199               MO_F32_Sin   -> FSLIT("sinf")
3200               MO_F32_Cos   -> FSLIT("cosf")
3201               MO_F32_Tan   -> FSLIT("tanf")
3202               MO_F32_Exp   -> FSLIT("expf")
3203               MO_F32_Log   -> FSLIT("logf")
3204
3205               MO_F32_Asin  -> FSLIT("asinf")
3206               MO_F32_Acos  -> FSLIT("acosf")
3207               MO_F32_Atan  -> FSLIT("atanf")
3208
3209               MO_F32_Sinh  -> FSLIT("sinhf")
3210               MO_F32_Cosh  -> FSLIT("coshf")
3211               MO_F32_Tanh  -> FSLIT("tanhf")
3212               MO_F32_Pwr   -> FSLIT("powf")
3213
3214               MO_F64_Sqrt  -> FSLIT("sqrt")
3215               MO_F64_Sin   -> FSLIT("sin")
3216               MO_F64_Cos   -> FSLIT("cos")
3217               MO_F64_Tan   -> FSLIT("tan")
3218               MO_F64_Exp   -> FSLIT("exp")
3219               MO_F64_Log   -> FSLIT("log")
3220
3221               MO_F64_Asin  -> FSLIT("asin")
3222               MO_F64_Acos  -> FSLIT("acos")
3223               MO_F64_Atan  -> FSLIT("atan")
3224
3225               MO_F64_Sinh  -> FSLIT("sinh")
3226               MO_F64_Cosh  -> FSLIT("cosh")
3227               MO_F64_Tanh  -> FSLIT("tanh")
3228               MO_F64_Pwr   -> FSLIT("pow")
3229
3230 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
3231
3232 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3233
3234 #if x86_64_TARGET_ARCH
3235
3236 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
3237         -- write barrier compiles to no code on x86/x86-64;
3238         -- we keep it this long in order to prevent earlier optimisations.
3239
3240 genCCall (CmmPrim op) [(r,_)] args =
3241   outOfLineFloatOp op r args
3242
3243 genCCall target dest_regs args = do
3244
3245         -- load up the register arguments
3246     (stack_args, aregs, fregs, load_args_code)
3247          <- load_args args allArgRegs allFPArgRegs nilOL
3248
3249     let
3250         fp_regs_used  = reverse (drop (length fregs) (reverse allFPArgRegs))
3251         int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
3252         arg_regs = [eax] ++ int_regs_used ++ fp_regs_used
3253                 -- for annotating the call instruction with
3254
3255         sse_regs = length fp_regs_used
3256
3257         tot_arg_size = arg_size * length stack_args
3258
3259         -- On entry to the called function, %rsp should be aligned
3260         -- on a 16-byte boundary +8 (i.e. the first stack arg after
3261         -- the return address is 16-byte aligned).  In STG land
3262         -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
3263         -- need to make sure we push a multiple of 16-bytes of args,
3264         -- plus the return address, to get the correct alignment.
3265         -- Urg, this is hard.  We need to feed the delta back into
3266         -- the arg pushing code.
3267     (real_size, adjust_rsp) <-
3268         if tot_arg_size `rem` 16 == 0
3269             then return (tot_arg_size, nilOL)
3270             else do -- we need to adjust...
3271                 delta <- getDeltaNat
3272                 setDeltaNat (delta-8)
3273                 return (tot_arg_size+8, toOL [
3274                                 SUB I64 (OpImm (ImmInt 8)) (OpReg rsp),
3275                                 DELTA (delta-8)
3276                         ])
3277
3278         -- push the stack args, right to left
3279     push_code <- push_args (reverse stack_args) nilOL
3280     delta <- getDeltaNat
3281
3282     -- deal with static vs dynamic call targets
3283     (callinsns,cconv) <-
3284       case target of
3285         -- CmmPrim -> ...
3286         CmmForeignCall (CmmLit (CmmLabel lbl)) conv
3287            -> -- ToDo: stdcall arg sizes
3288               return (unitOL (CALL (Left fn_imm) arg_regs), conv)
3289            where fn_imm = ImmCLbl lbl
3290         CmmForeignCall expr conv
3291            -> do (dyn_r, dyn_c) <- getSomeReg expr
3292                  return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
3293
3294     let
3295         -- The x86_64 ABI requires us to set %al to the number of SSE
3296         -- registers that contain arguments, if the called routine
3297         -- is a varargs function.  We don't know whether it's a
3298         -- varargs function or not, so we have to assume it is.
3299         --
3300         -- It's not safe to omit this assignment, even if the number
3301         -- of SSE regs in use is zero.  If %al is larger than 8
3302         -- on entry to a varargs function, seg faults ensue.
3303         assign_eax n = unitOL (MOV I32 (OpImm (ImmInt n)) (OpReg eax))
3304
3305     let call = callinsns `appOL`
3306                toOL (
3307                         -- Deallocate parameters after call for ccall;
3308                         -- but not for stdcall (callee does it)
3309                   (if cconv == StdCallConv || real_size==0 then [] else
3310                    [ADD wordRep (OpImm (ImmInt real_size)) (OpReg esp)])
3311                   ++
3312                   [DELTA (delta + real_size)]
3313                )
3314     -- in
3315     setDeltaNat (delta + real_size)
3316
3317     let
3318         -- assign the results, if necessary
3319         assign_code []     = nilOL
3320         assign_code [(dest,_hint)] =
3321           case rep of
3322                 F32 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3323                 F64 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3324                 rep -> unitOL (MOV rep (OpReg rax) (OpReg r_dest))
3325           where
3326                 rep = localRegRep dest
3327                 r_dest = getRegisterReg (CmmLocal dest)
3328         assign_code many = panic "genCCall.assign_code many"
3329
3330     return (load_args_code      `appOL`
3331             adjust_rsp          `appOL`
3332             push_code           `appOL`
3333             assign_eax sse_regs `appOL`
3334             call                `appOL`
3335             assign_code dest_regs)
3336
3337   where
3338     arg_size = 8 -- always, at the mo
3339
3340     load_args :: [(CmmExpr,MachHint)]
3341               -> [Reg]                  -- int regs avail for args
3342               -> [Reg]                  -- FP regs avail for args
3343               -> InstrBlock
3344               -> NatM ([(CmmExpr,MachHint)],[Reg],[Reg],InstrBlock)
3345     load_args args [] [] code     =  return (args, [], [], code)
3346         -- no more regs to use
3347     load_args [] aregs fregs code =  return ([], aregs, fregs, code)
3348         -- no more args to push
3349     load_args ((arg,hint) : rest) aregs fregs code
3350         | isFloatingRep arg_rep =
3351         case fregs of
3352           [] -> push_this_arg
3353           (r:rs) -> do
3354              arg_code <- getAnyReg arg
3355              load_args rest aregs rs (code `appOL` arg_code r)
3356         | otherwise =
3357         case aregs of
3358           [] -> push_this_arg
3359           (r:rs) -> do
3360              arg_code <- getAnyReg arg
3361              load_args rest rs fregs (code `appOL` arg_code r)
3362         where
3363           arg_rep = cmmExprRep arg
3364
3365           push_this_arg = do
3366             (args',ars,frs,code') <- load_args rest aregs fregs code
3367             return ((arg,hint):args', ars, frs, code')
3368
3369     push_args [] code = return code
3370     push_args ((arg,hint):rest) code
3371        | isFloatingRep arg_rep = do
3372          (arg_reg, arg_code) <- getSomeReg arg
3373          delta <- getDeltaNat
3374          setDeltaNat (delta-arg_size)
3375          let code' = code `appOL` arg_code `appOL` toOL [
3376                         SUB wordRep (OpImm (ImmInt arg_size)) (OpReg rsp) ,
3377                         DELTA (delta-arg_size),
3378                         MOV arg_rep (OpReg arg_reg) (OpAddr  (spRel 0))]
3379          push_args rest code'
3380
3381        | otherwise = do
3382        -- we only ever generate word-sized function arguments.  Promotion
3383        -- has already happened: our Int8# type is kept sign-extended
3384        -- in an Int#, for example.
3385          ASSERT(arg_rep == I64) return ()
3386          (arg_op, arg_code) <- getOperand arg
3387          delta <- getDeltaNat
3388          setDeltaNat (delta-arg_size)
3389          let code' = code `appOL` toOL [PUSH I64 arg_op,
3390                                         DELTA (delta-arg_size)]
3391          push_args rest code'
3392         where
3393           arg_rep = cmmExprRep arg
3394 #endif
3395
3396 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3397
3398 #if sparc_TARGET_ARCH
3399 {-
3400    The SPARC calling convention is an absolute
3401    nightmare.  The first 6x32 bits of arguments are mapped into
3402    %o0 through %o5, and the remaining arguments are dumped to the
3403    stack, beginning at [%sp+92].  (Note that %o6 == %sp.)
3404
3405    If we have to put args on the stack, move %o6==%sp down by
3406    the number of words to go on the stack, to ensure there's enough space.
3407
3408    According to Fraser and Hanson's lcc book, page 478, fig 17.2,
3409    16 words above the stack pointer is a word for the address of
3410    a structure return value.  I use this as a temporary location
3411    for moving values from float to int regs.  Certainly it isn't
3412    safe to put anything in the 16 words starting at %sp, since
3413    this area can get trashed at any time due to window overflows
3414    caused by signal handlers.
3415
3416    A final complication (if the above isn't enough) is that
3417    we can't blithely calculate the arguments one by one into
3418    %o0 .. %o5.  Consider the following nested calls:
3419
3420        fff a (fff b c)
3421
3422    Naive code moves a into %o0, and (fff b c) into %o1.  Unfortunately
3423    the inner call will itself use %o0, which trashes the value put there
3424    in preparation for the outer call.  Upshot: we need to calculate the
3425    args into temporary regs, and move those to arg regs or onto the
3426    stack only immediately prior to the call proper.  Sigh.
3427 -}
3428
3429 genCCall target dest_regs argsAndHints = do
3430     let
3431         args = map fst argsAndHints
3432     argcode_and_vregs <- mapM arg_to_int_vregs args
3433     let
3434         (argcodes, vregss) = unzip argcode_and_vregs
3435         n_argRegs          = length allArgRegs
3436         n_argRegs_used     = min (length vregs) n_argRegs
3437         vregs              = concat vregss
3438     -- deal with static vs dynamic call targets
3439     callinsns <- (case target of
3440         CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> do
3441                 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3442         CmmForeignCall expr conv -> do
3443                 (dyn_c, [dyn_r]) <- arg_to_int_vregs expr
3444                 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3445         CmmPrim mop -> do
3446                   (res, reduce) <- outOfLineFloatOp mop
3447                   lblOrMopExpr <- case res of
3448                        Left lbl -> do
3449                             return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3450                        Right mopExpr -> do
3451                             (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
3452                             return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3453                   if reduce then panic "genCCall(sparc): can not reduce" else return lblOrMopExpr
3454
3455       )
3456     let
3457         argcode = concatOL argcodes
3458         (move_sp_down, move_sp_up)
3459            = let diff = length vregs - n_argRegs
3460                  nn   = if odd diff then diff + 1 else diff -- keep 8-byte alignment
3461              in  if   nn <= 0
3462                  then (nilOL, nilOL)
3463                  else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
3464         transfer_code
3465            = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
3466     return (argcode       `appOL`
3467             move_sp_down  `appOL`
3468             transfer_code `appOL`
3469             callinsns     `appOL`
3470             unitOL NOP    `appOL`
3471             move_sp_up)
3472   where
3473      -- move args from the integer vregs into which they have been
3474      -- marshalled, into %o0 .. %o5, and the rest onto the stack.
3475      move_final :: [Reg] -> [Reg] -> Int -> [Instr]
3476
3477      move_final [] _ offset          -- all args done
3478         = []
3479
3480      move_final (v:vs) [] offset     -- out of aregs; move to stack
3481         = ST I32 v (spRel offset)
3482           : move_final vs [] (offset+1)
3483
3484      move_final (v:vs) (a:az) offset -- move into an arg (%o[0..5]) reg
3485         = OR False g0 (RIReg v) a
3486           : move_final vs az offset
3487
3488      -- generate code to calculate an argument, and move it into one
3489      -- or two integer vregs.
3490      arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
3491      arg_to_int_vregs arg
3492         | (cmmExprRep arg) == I64
3493         = do
3494           (ChildCode64 code r_lo) <- iselExpr64 arg
3495           let
3496               r_hi = getHiVRegFromLo r_lo
3497           return (code, [r_hi, r_lo])
3498         | otherwise
3499         = do
3500           (src, code) <- getSomeReg arg
3501           tmp <- getNewRegNat (cmmExprRep arg)
3502           let
3503               pk   = cmmExprRep arg
3504           case pk of
3505              F64 -> do
3506                       v1 <- getNewRegNat I32
3507                       v2 <- getNewRegNat I32
3508                       return (
3509                         code                          `snocOL`
3510                         FMOV F64 src f0                `snocOL`
3511                         ST   F32  f0 (spRel 16)         `snocOL`
3512                         LD   I32  (spRel 16) v1         `snocOL`
3513                         ST   F32  (fPair f0) (spRel 16) `snocOL`
3514                         LD   I32  (spRel 16) v2
3515                         ,
3516                         [v1,v2]
3517                        )
3518              F32 -> do
3519                       v1 <- getNewRegNat I32
3520                       return (
3521                         code                    `snocOL`
3522                         ST   F32  src (spRel 16)  `snocOL`
3523                         LD   I32  (spRel 16) v1
3524                         ,
3525                         [v1]
3526                        )
3527              other -> do
3528                         v1 <- getNewRegNat I32
3529                         return (
3530                           code `snocOL` OR False g0 (RIReg src) v1
3531                           ,
3532                           [v1]
3533                          )
3534 outOfLineFloatOp mop =
3535     do
3536       mopExpr <- cmmMakeDynamicReference addImportNat CallReference $
3537                   mkForeignLabel functionName Nothing True
3538       let mopLabelOrExpr = case mopExpr of
3539                         CmmLit (CmmLabel lbl) -> Left lbl
3540                         _ -> Right mopExpr
3541       return (mopLabelOrExpr, reduce)
3542             where
3543                 (reduce, functionName) = case mop of
3544                   MO_F32_Exp    -> (True,  FSLIT("exp"))
3545                   MO_F32_Log    -> (True,  FSLIT("log"))
3546                   MO_F32_Sqrt   -> (True,  FSLIT("sqrt"))
3547
3548                   MO_F32_Sin    -> (True,  FSLIT("sin"))
3549                   MO_F32_Cos    -> (True,  FSLIT("cos"))
3550                   MO_F32_Tan    -> (True,  FSLIT("tan"))
3551
3552                   MO_F32_Asin   -> (True,  FSLIT("asin"))
3553                   MO_F32_Acos   -> (True,  FSLIT("acos"))
3554                   MO_F32_Atan   -> (True,  FSLIT("atan"))
3555
3556                   MO_F32_Sinh   -> (True,  FSLIT("sinh"))
3557                   MO_F32_Cosh   -> (True,  FSLIT("cosh"))
3558                   MO_F32_Tanh   -> (True,  FSLIT("tanh"))
3559
3560                   MO_F64_Exp    -> (False, FSLIT("exp"))
3561                   MO_F64_Log    -> (False, FSLIT("log"))
3562                   MO_F64_Sqrt   -> (False, FSLIT("sqrt"))
3563
3564                   MO_F64_Sin    -> (False, FSLIT("sin"))
3565                   MO_F64_Cos    -> (False, FSLIT("cos"))
3566                   MO_F64_Tan    -> (False, FSLIT("tan"))
3567
3568                   MO_F64_Asin   -> (False, FSLIT("asin"))
3569                   MO_F64_Acos   -> (False, FSLIT("acos"))
3570                   MO_F64_Atan   -> (False, FSLIT("atan"))
3571
3572                   MO_F64_Sinh   -> (False, FSLIT("sinh"))
3573                   MO_F64_Cosh   -> (False, FSLIT("cosh"))
3574                   MO_F64_Tanh   -> (False, FSLIT("tanh"))
3575
3576                   other -> pprPanic "outOfLineFloatOp(sparc) "
3577                                 (pprCallishMachOp mop)
3578
3579 #endif /* sparc_TARGET_ARCH */
3580
3581 #if powerpc_TARGET_ARCH
3582
3583 #if darwin_TARGET_OS || linux_TARGET_OS
3584 {-
3585     The PowerPC calling convention for Darwin/Mac OS X
3586     is described in Apple's document
3587     "Inside Mac OS X - Mach-O Runtime Architecture".
3588
3589     PowerPC Linux uses the System V Release 4 Calling Convention
3590     for PowerPC. It is described in the
3591     "System V Application Binary Interface PowerPC Processor Supplement".
3592
3593     Both conventions are similar:
3594     Parameters may be passed in general-purpose registers starting at r3, in
3595     floating point registers starting at f1, or on the stack.
3596
3597     But there are substantial differences:
3598     * The number of registers used for parameter passing and the exact set of
3599       nonvolatile registers differs (see MachRegs.lhs).
3600     * On Darwin, stack space is always reserved for parameters, even if they are
3601       passed in registers. The called routine may choose to save parameters from
3602       registers to the corresponding space on the stack.
3603     * On Darwin, a corresponding amount of GPRs is skipped when a floating point
3604       parameter is passed in an FPR.
3605     * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
3606       starting with an odd-numbered GPR. It may skip a GPR to achieve this.
3607       Darwin just treats an I64 like two separate I32s (high word first).
3608     * I64 and F64 arguments are 8-byte aligned on the stack for SysV, but only
3609       4-byte aligned like everything else on Darwin.
3610     * The SysV spec claims that F32 is represented as F64 on the stack. GCC on
3611       PowerPC Linux does not agree, so neither do we.
3612
3613     According to both conventions, The parameter area should be part of the
3614     caller's stack frame, allocated in the caller's prologue code (large enough
3615     to hold the parameter lists for all called routines). The NCG already
3616     uses the stack for register spilling, leaving 64 bytes free at the top.
3617     If we need a larger parameter area than that, we just allocate a new stack
3618     frame just before ccalling.
3619 -}
3620
3621
3622 genCCall (CmmPrim MO_WriteBarrier) _ _ _
3623  = return $ unitOL LWSYNC
3624
3625 genCCall target dest_regs argsAndHints
3626   = ASSERT (not $ any (`elem` [I8,I16]) argReps)
3627         -- we rely on argument promotion in the codeGen
3628     do
3629         (finalStack,passArgumentsCode,usedRegs) <- passArguments
3630                                                         (zip args argReps)
3631                                                         allArgRegs allFPArgRegs
3632                                                         initialStackOffset
3633                                                         (toOL []) []
3634
3635         (labelOrExpr, reduceToF32) <- case target of
3636             CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
3637             CmmForeignCall expr conv -> return  (Right expr, False)
3638             CmmPrim mop -> outOfLineFloatOp mop
3639
3640         let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
3641             codeAfter = move_sp_up finalStack `appOL` moveResult reduceToF32
3642
3643         case labelOrExpr of
3644             Left lbl -> do
3645                 return (         codeBefore
3646                         `snocOL` BL lbl usedRegs
3647                         `appOL`  codeAfter)
3648             Right dyn -> do
3649                 (dynReg, dynCode) <- getSomeReg dyn
3650                 return (         dynCode
3651                         `snocOL` MTCTR dynReg
3652                         `appOL`  codeBefore
3653                         `snocOL` BCTRL usedRegs
3654                         `appOL`  codeAfter)
3655     where
3656 #if darwin_TARGET_OS
3657         initialStackOffset = 24
3658             -- size of linkage area + size of arguments, in bytes
3659         stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
3660                                        map machRepByteWidth argReps
3661 #elif linux_TARGET_OS
3662         initialStackOffset = 8
3663         stackDelta finalStack = roundTo 16 finalStack
3664 #endif
3665         args = map fst argsAndHints
3666         argReps = map cmmExprRep args
3667
3668         roundTo a x | x `mod` a == 0 = x
3669                     | otherwise = x + a - (x `mod` a)
3670
3671         move_sp_down finalStack
3672                | delta > 64 =
3673                         toOL [STU I32 sp (AddrRegImm sp (ImmInt (-delta))),
3674                               DELTA (-delta)]
3675                | otherwise = nilOL
3676                where delta = stackDelta finalStack
3677         move_sp_up finalStack
3678                | delta > 64 =
3679                         toOL [ADD sp sp (RIImm (ImmInt delta)),
3680                               DELTA 0]
3681                | otherwise = nilOL
3682                where delta = stackDelta finalStack
3683
3684
3685         passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
3686         passArguments ((arg,I64):args) gprs fprs stackOffset
3687                accumCode accumUsed =
3688             do
3689                 ChildCode64 code vr_lo <- iselExpr64 arg
3690                 let vr_hi = getHiVRegFromLo vr_lo
3691
3692 #if darwin_TARGET_OS
3693                 passArguments args
3694                               (drop 2 gprs)
3695                               fprs
3696                               (stackOffset+8)
3697                               (accumCode `appOL` code
3698                                     `snocOL` storeWord vr_hi gprs stackOffset
3699                                     `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
3700                               ((take 2 gprs) ++ accumUsed)
3701             where
3702                 storeWord vr (gpr:_) offset = MR gpr vr
3703                 storeWord vr [] offset = ST I32 vr (AddrRegImm sp (ImmInt offset))
3704
3705 #elif linux_TARGET_OS
3706                 let stackOffset' = roundTo 8 stackOffset
3707                     stackCode = accumCode `appOL` code
3708                         `snocOL` ST I32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
3709                         `snocOL` ST I32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
3710                     regCode hireg loreg =
3711                         accumCode `appOL` code
3712                             `snocOL` MR hireg vr_hi
3713                             `snocOL` MR loreg vr_lo
3714
3715                 case gprs of
3716                     hireg : loreg : regs | even (length gprs) ->
3717                         passArguments args regs fprs stackOffset
3718                                       (regCode hireg loreg) (hireg : loreg : accumUsed)
3719                     _skipped : hireg : loreg : regs ->
3720                         passArguments args regs fprs stackOffset
3721                                       (regCode hireg loreg) (hireg : loreg : accumUsed)
3722                     _ -> -- only one or no regs left
3723                         passArguments args [] fprs (stackOffset'+8)
3724                                       stackCode accumUsed
3725 #endif
3726
3727         passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
3728             | reg : _ <- regs = do
3729                 register <- getRegister arg
3730                 let code = case register of
3731                             Fixed _ freg fcode -> fcode `snocOL` MR reg freg
3732                             Any _ acode -> acode reg
3733                 passArguments args
3734                               (drop nGprs gprs)
3735                               (drop nFprs fprs)
3736 #if darwin_TARGET_OS
3737         -- The Darwin ABI requires that we reserve stack slots for register parameters
3738                               (stackOffset + stackBytes)
3739 #elif linux_TARGET_OS
3740         -- ... the SysV ABI doesn't.
3741                               stackOffset
3742 #endif
3743                               (accumCode `appOL` code)
3744                               (reg : accumUsed)
3745             | otherwise = do
3746                 (vr, code) <- getSomeReg arg
3747                 passArguments args
3748                               (drop nGprs gprs)
3749                               (drop nFprs fprs)
3750                               (stackOffset' + stackBytes)
3751                               (accumCode `appOL` code `snocOL` ST rep vr stackSlot)
3752                               accumUsed
3753             where
3754 #if darwin_TARGET_OS
3755         -- stackOffset is at least 4-byte aligned
3756         -- The Darwin ABI is happy with that.
3757                 stackOffset' = stackOffset
3758 #else
3759         -- ... the SysV ABI requires 8-byte alignment for doubles.
3760                 stackOffset' | rep == F64 = roundTo 8 stackOffset
3761                              | otherwise  =           stackOffset
3762 #endif
3763                 stackSlot = AddrRegImm sp (ImmInt stackOffset')
3764                 (nGprs, nFprs, stackBytes, regs) = case rep of
3765                     I32 -> (1, 0, 4, gprs)
3766 #if darwin_TARGET_OS
3767         -- The Darwin ABI requires that we skip a corresponding number of GPRs when
3768         -- we use the FPRs.
3769                     F32 -> (1, 1, 4, fprs)
3770                     F64 -> (2, 1, 8, fprs)
3771 #elif linux_TARGET_OS
3772         -- ... the SysV ABI doesn't.
3773                     F32 -> (0, 1, 4, fprs)
3774                     F64 -> (0, 1, 8, fprs)
3775 #endif
3776
3777         moveResult reduceToF32 =
3778             case dest_regs of
3779                 [] -> nilOL
3780                 [(dest, _hint)]
3781                     | reduceToF32 && rep == F32 -> unitOL (FRSP r_dest f1)
3782                     | rep == F32 || rep == F64 -> unitOL (MR r_dest f1)
3783                     | rep == I64 -> toOL [MR (getHiVRegFromLo r_dest) r3,
3784                                           MR r_dest r4]
3785                     | otherwise -> unitOL (MR r_dest r3)
3786                     where rep = cmmRegRep dest
3787                           r_dest = getRegisterReg dest
3788
3789         outOfLineFloatOp mop =
3790             do
3791                 mopExpr <- cmmMakeDynamicReference addImportNat CallReference $
3792                               mkForeignLabel functionName Nothing True
3793                 let mopLabelOrExpr = case mopExpr of
3794                         CmmLit (CmmLabel lbl) -> Left lbl
3795                         _ -> Right mopExpr
3796                 return (mopLabelOrExpr, reduce)
3797             where
3798                 (functionName, reduce) = case mop of
3799                     MO_F32_Exp   -> (FSLIT("exp"), True)
3800                     MO_F32_Log   -> (FSLIT("log"), True)
3801                     MO_F32_Sqrt  -> (FSLIT("sqrt"), True)
3802
3803                     MO_F32_Sin   -> (FSLIT("sin"), True)
3804                     MO_F32_Cos   -> (FSLIT("cos"), True)
3805                     MO_F32_Tan   -> (FSLIT("tan"), True)
3806
3807                     MO_F32_Asin  -> (FSLIT("asin"), True)
3808                     MO_F32_Acos  -> (FSLIT("acos"), True)
3809                     MO_F32_Atan  -> (FSLIT("atan"), True)
3810
3811                     MO_F32_Sinh  -> (FSLIT("sinh"), True)
3812                     MO_F32_Cosh  -> (FSLIT("cosh"), True)
3813                     MO_F32_Tanh  -> (FSLIT("tanh"), True)
3814                     MO_F32_Pwr   -> (FSLIT("pow"), True)
3815
3816                     MO_F64_Exp   -> (FSLIT("exp"), False)
3817                     MO_F64_Log   -> (FSLIT("log"), False)
3818                     MO_F64_Sqrt  -> (FSLIT("sqrt"), False)
3819
3820                     MO_F64_Sin   -> (FSLIT("sin"), False)
3821                     MO_F64_Cos   -> (FSLIT("cos"), False)
3822                     MO_F64_Tan   -> (FSLIT("tan"), False)
3823
3824                     MO_F64_Asin  -> (FSLIT("asin"), False)
3825                     MO_F64_Acos  -> (FSLIT("acos"), False)
3826                     MO_F64_Atan  -> (FSLIT("atan"), False)
3827
3828                     MO_F64_Sinh  -> (FSLIT("sinh"), False)
3829                     MO_F64_Cosh  -> (FSLIT("cosh"), False)
3830                     MO_F64_Tanh  -> (FSLIT("tanh"), False)
3831                     MO_F64_Pwr   -> (FSLIT("pow"), False)
3832                     other -> pprPanic "genCCall(ppc): unknown callish op"
3833                                     (pprCallishMachOp other)
3834
3835 #endif /* darwin_TARGET_OS || linux_TARGET_OS */
3836
3837 #endif /* powerpc_TARGET_ARCH */
3838
3839
3840 -- -----------------------------------------------------------------------------
3841 -- Generating a table-branch
3842
3843 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
3844
3845 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3846 genSwitch expr ids
3847   | opt_PIC
3848   = do
3849         (reg,e_code) <- getSomeReg expr
3850         lbl <- getNewLabelNat
3851         dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
3852         (tableReg,t_code) <- getSomeReg $ dynRef
3853         let
3854             jumpTable = map jumpTableEntryRel ids
3855
3856             jumpTableEntryRel Nothing
3857                 = CmmStaticLit (CmmInt 0 wordRep)
3858             jumpTableEntryRel (Just (BlockId id))
3859                 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3860                 where blockLabel = mkAsmTempLabel id
3861
3862             op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
3863                                        (EAIndex reg wORD_SIZE) (ImmInt 0))
3864
3865 #if x86_64_TARGET_ARCH && darwin_TARGET_OS
3866     -- on Mac OS X/x86_64, put the jump table in the text section
3867     -- to work around a limitation of the linker.
3868     -- ld64 is unable to handle the relocations for
3869     --     .quad L1 - L0
3870     -- if L0 is not preceded by a non-anonymous label in its section.
3871
3872             code = e_code `appOL` t_code `appOL` toOL [
3873                             ADD wordRep op (OpReg tableReg),
3874                             JMP_TBL (OpReg tableReg) [ id | Just id <- ids ],
3875                             LDATA Text (CmmDataLabel lbl : jumpTable)
3876                     ]
3877 #else
3878             code = e_code `appOL` t_code `appOL` toOL [
3879                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3880                             ADD wordRep op (OpReg tableReg),
3881                             JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
3882                     ]
3883 #endif
3884         return code
3885   | otherwise
3886   = do
3887         (reg,e_code) <- getSomeReg expr
3888         lbl <- getNewLabelNat
3889         let
3890             jumpTable = map jumpTableEntry ids
3891             op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
3892             code = e_code `appOL` toOL [
3893                     LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3894                     JMP_TBL op [ id | Just id <- ids ]
3895                  ]
3896         -- in
3897         return code
3898 #elif powerpc_TARGET_ARCH
3899 genSwitch expr ids
3900   | opt_PIC
3901   = do
3902         (reg,e_code) <- getSomeReg expr
3903         tmp <- getNewRegNat I32
3904         lbl <- getNewLabelNat
3905         dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
3906         (tableReg,t_code) <- getSomeReg $ dynRef
3907         let
3908             jumpTable = map jumpTableEntryRel ids
3909
3910             jumpTableEntryRel Nothing
3911                 = CmmStaticLit (CmmInt 0 wordRep)
3912             jumpTableEntryRel (Just (BlockId id))
3913                 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3914                 where blockLabel = mkAsmTempLabel id
3915
3916             code = e_code `appOL` t_code `appOL` toOL [
3917                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3918                             SLW tmp reg (RIImm (ImmInt 2)),
3919                             LD I32 tmp (AddrRegReg tableReg tmp),
3920                             ADD tmp tmp (RIReg tableReg),
3921                             MTCTR tmp,
3922                             BCTR [ id | Just id <- ids ]
3923                     ]
3924         return code
3925   | otherwise
3926   = do
3927         (reg,e_code) <- getSomeReg expr
3928         tmp <- getNewRegNat I32
3929         lbl <- getNewLabelNat
3930         let
3931             jumpTable = map jumpTableEntry ids
3932
3933             code = e_code `appOL` toOL [
3934                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3935                             SLW tmp reg (RIImm (ImmInt 2)),
3936                             ADDIS tmp tmp (HA (ImmCLbl lbl)),
3937                             LD I32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
3938                             MTCTR tmp,
3939                             BCTR [ id | Just id <- ids ]
3940                     ]
3941         return code
3942 #else
3943 genSwitch expr ids = panic "ToDo: genSwitch"
3944 #endif
3945
3946 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordRep)
3947 jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
3948     where blockLabel = mkAsmTempLabel id
3949
3950 -- -----------------------------------------------------------------------------
3951 -- Support bits
3952 -- -----------------------------------------------------------------------------
3953
3954
3955 -- -----------------------------------------------------------------------------
3956 -- 'condIntReg' and 'condFltReg': condition codes into registers
3957
3958 -- Turn those condition codes into integers now (when they appear on
3959 -- the right hand side of an assignment).
3960 --
3961 -- (If applicable) Do not fill the delay slots here; you will confuse the
3962 -- register allocator.
3963
3964 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
3965
3966 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3967
3968 #if alpha_TARGET_ARCH
3969 condIntReg = panic "MachCode.condIntReg (not on Alpha)"
3970 condFltReg = panic "MachCode.condFltReg (not on Alpha)"
3971 #endif /* alpha_TARGET_ARCH */
3972
3973 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3974
3975 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3976
3977 condIntReg cond x y = do
3978   CondCode _ cond cond_code <- condIntCode cond x y
3979   tmp <- getNewRegNat I8
3980   let
3981         code dst = cond_code `appOL` toOL [
3982                     SETCC cond (OpReg tmp),
3983                     MOVZxL I8 (OpReg tmp) (OpReg dst)
3984                   ]
3985   -- in
3986   return (Any I32 code)
3987
3988 #endif
3989
3990 #if i386_TARGET_ARCH
3991
3992 condFltReg cond x y = do
3993   CondCode _ cond cond_code <- condFltCode cond x y
3994   tmp <- getNewRegNat I8
3995   let
3996         code dst = cond_code `appOL` toOL [
3997                     SETCC cond (OpReg tmp),
3998                     MOVZxL I8 (OpReg tmp) (OpReg dst)
3999                   ]
4000   -- in
4001   return (Any I32 code)
4002
4003 #endif
4004
4005 #if x86_64_TARGET_ARCH
4006
4007 condFltReg cond x y = do
4008   CondCode _ cond cond_code <- condFltCode cond x y
4009   tmp1 <- getNewRegNat wordRep
4010   tmp2 <- getNewRegNat wordRep
4011   let
4012         -- We have to worry about unordered operands (eg. comparisons
4013         -- against NaN).  If the operands are unordered, the comparison
4014         -- sets the parity flag, carry flag and zero flag.
4015         -- All comparisons are supposed to return false for unordered
4016         -- operands except for !=, which returns true.
4017         --
4018         -- Optimisation: we don't have to test the parity flag if we
4019         -- know the test has already excluded the unordered case: eg >
4020         -- and >= test for a zero carry flag, which can only occur for
4021         -- ordered operands.
4022         --
4023         -- ToDo: by reversing comparisons we could avoid testing the
4024         -- parity flag in more cases.
4025
4026         code dst =
4027            cond_code `appOL`
4028              (case cond of
4029                 NE  -> or_unordered dst
4030                 GU  -> plain_test   dst
4031                 GEU -> plain_test   dst
4032                 _   -> and_ordered  dst)
4033
4034         plain_test dst = toOL [
4035                     SETCC cond (OpReg tmp1),
4036                     MOVZxL I8 (OpReg tmp1) (OpReg dst)
4037                  ]
4038         or_unordered dst = toOL [
4039                     SETCC cond (OpReg tmp1),
4040                     SETCC PARITY (OpReg tmp2),
4041                     OR I8 (OpReg tmp1) (OpReg tmp2),
4042                     MOVZxL I8 (OpReg tmp2) (OpReg dst)
4043                   ]
4044         and_ordered dst = toOL [
4045                     SETCC cond (OpReg tmp1),
4046                     SETCC NOTPARITY (OpReg tmp2),
4047                     AND I8 (OpReg tmp1) (OpReg tmp2),
4048                     MOVZxL I8 (OpReg tmp2) (OpReg dst)
4049                   ]
4050   -- in
4051   return (Any I32 code)
4052
4053 #endif
4054
4055 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4056
4057 #if sparc_TARGET_ARCH
4058
4059 condIntReg EQQ x (CmmLit (CmmInt 0 d)) = do
4060     (src, code) <- getSomeReg x
4061     tmp <- getNewRegNat I32
4062     let
4063         code__2 dst = code `appOL` toOL [
4064             SUB False True g0 (RIReg src) g0,
4065             SUB True False g0 (RIImm (ImmInt (-1))) dst]
4066     return (Any I32 code__2)
4067
4068 condIntReg EQQ x y = do
4069     (src1, code1) <- getSomeReg x
4070     (src2, code2) <- getSomeReg y
4071     tmp1 <- getNewRegNat I32
4072     tmp2 <- getNewRegNat I32
4073     let
4074         code__2 dst = code1 `appOL` code2 `appOL` toOL [
4075             XOR False src1 (RIReg src2) dst,
4076             SUB False True g0 (RIReg dst) g0,
4077             SUB True False g0 (RIImm (ImmInt (-1))) dst]
4078     return (Any I32 code__2)
4079
4080 condIntReg NE x (CmmLit (CmmInt 0 d)) = do
4081     (src, code) <- getSomeReg x
4082     tmp <- getNewRegNat I32
4083     let
4084         code__2 dst = code `appOL` toOL [
4085             SUB False True g0 (RIReg src) g0,
4086             ADD True False g0 (RIImm (ImmInt 0)) dst]
4087     return (Any I32 code__2)
4088
4089 condIntReg NE x y = do
4090     (src1, code1) <- getSomeReg x
4091     (src2, code2) <- getSomeReg y
4092     tmp1 <- getNewRegNat I32
4093     tmp2 <- getNewRegNat I32
4094     let
4095         code__2 dst = code1 `appOL` code2 `appOL` toOL [
4096             XOR False src1 (RIReg src2) dst,
4097             SUB False True g0 (RIReg dst) g0,
4098             ADD True False g0 (RIImm (ImmInt 0)) dst]
4099     return (Any I32 code__2)
4100
4101 condIntReg cond x y = do
4102     BlockId lbl1 <- getBlockIdNat
4103     BlockId lbl2 <- getBlockIdNat
4104     CondCode _ cond cond_code <- condIntCode cond x y
4105     let
4106         code__2 dst = cond_code `appOL` toOL [
4107             BI cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
4108             OR False g0 (RIImm (ImmInt 0)) dst,
4109             BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
4110             NEWBLOCK (BlockId lbl1),
4111             OR False g0 (RIImm (ImmInt 1)) dst,
4112             NEWBLOCK (BlockId lbl2)]
4113     return (Any I32 code__2)
4114
4115 condFltReg cond x y = do
4116     BlockId lbl1 <- getBlockIdNat
4117     BlockId lbl2 <- getBlockIdNat
4118     CondCode _ cond cond_code <- condFltCode cond x y
4119     let
4120         code__2 dst = cond_code `appOL` toOL [
4121             NOP,
4122             BF cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
4123             OR False g0 (RIImm (ImmInt 0)) dst,
4124             BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
4125             NEWBLOCK (BlockId lbl1),
4126             OR False g0 (RIImm (ImmInt 1)) dst,
4127             NEWBLOCK (BlockId lbl2)]
4128     return (Any I32 code__2)
4129
4130 #endif /* sparc_TARGET_ARCH */
4131
4132 #if powerpc_TARGET_ARCH
4133 condReg getCond = do
4134     lbl1 <- getBlockIdNat
4135     lbl2 <- getBlockIdNat
4136     CondCode _ cond cond_code <- getCond
4137     let
4138 {-        code dst = cond_code `appOL` toOL [
4139                 BCC cond lbl1,
4140                 LI dst (ImmInt 0),
4141                 BCC ALWAYS lbl2,
4142                 NEWBLOCK lbl1,
4143                 LI dst (ImmInt 1),
4144                 BCC ALWAYS lbl2,
4145                 NEWBLOCK lbl2
4146             ]-}
4147         code dst = cond_code
4148             `appOL` negate_code
4149             `appOL` toOL [
4150                 MFCR dst,
4151                 RLWINM dst dst (bit + 1) 31 31
4152             ]
4153
4154         negate_code | do_negate = unitOL (CRNOR bit bit bit)
4155                     | otherwise = nilOL
4156
4157         (bit, do_negate) = case cond of
4158             LTT -> (0, False)
4159             LE  -> (1, True)
4160             EQQ -> (2, False)
4161             GE  -> (0, True)
4162             GTT -> (1, False)
4163
4164             NE  -> (2, True)
4165
4166             LU  -> (0, False)
4167             LEU -> (1, True)
4168             GEU -> (0, True)
4169             GU  -> (1, False)
4170
4171     return (Any I32 code)
4172
4173 condIntReg cond x y = condReg (condIntCode cond x y)
4174 condFltReg cond x y = condReg (condFltCode cond x y)
4175 #endif /* powerpc_TARGET_ARCH */
4176
4177
4178 -- -----------------------------------------------------------------------------
4179 -- 'trivial*Code': deal with trivial instructions
4180
4181 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
4182 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
4183 -- Only look for constants on the right hand side, because that's
4184 -- where the generic optimizer will have put them.
4185
4186 -- Similarly, for unary instructions, we don't have to worry about
4187 -- matching an StInt as the argument, because genericOpt will already
4188 -- have handled the constant-folding.
4189
4190 trivialCode
4191     :: MachRep
4192     -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
4193       ,IF_ARCH_i386 ((Operand -> Operand -> Instr)
4194                      -> Maybe (Operand -> Operand -> Instr)
4195       ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr)
4196                      -> Maybe (Operand -> Operand -> Instr)
4197       ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
4198       ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
4199       ,)))))
4200     -> CmmExpr -> CmmExpr -- the two arguments
4201     -> NatM Register
4202
4203 #ifndef powerpc_TARGET_ARCH
4204 trivialFCode
4205     :: MachRep
4206     -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
4207       ,IF_ARCH_sparc((MachRep -> Reg -> Reg -> Reg -> Instr)
4208       ,IF_ARCH_i386 ((MachRep -> Reg -> Reg -> Reg -> Instr)
4209       ,IF_ARCH_x86_64 ((MachRep -> Operand -> Operand -> Instr)
4210       ,))))
4211     -> CmmExpr -> CmmExpr -- the two arguments
4212     -> NatM Register
4213 #endif
4214
4215 trivialUCode
4216     :: MachRep
4217     -> IF_ARCH_alpha((RI -> Reg -> Instr)
4218       ,IF_ARCH_i386 ((Operand -> Instr)
4219       ,IF_ARCH_x86_64 ((Operand -> Instr)
4220       ,IF_ARCH_sparc((RI -> Reg -> Instr)
4221       ,IF_ARCH_powerpc((Reg -> Reg -> Instr)
4222       ,)))))
4223     -> CmmExpr  -- the one argument
4224     -> NatM Register
4225
4226 #ifndef powerpc_TARGET_ARCH
4227 trivialUFCode
4228     :: MachRep
4229     -> IF_ARCH_alpha((Reg -> Reg -> Instr)
4230       ,IF_ARCH_i386 ((Reg -> Reg -> Instr)
4231       ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr)
4232       ,IF_ARCH_sparc((Reg -> Reg -> Instr)
4233       ,))))
4234     -> CmmExpr -- the one argument
4235     -> NatM Register
4236 #endif
4237
4238 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4239
4240 #if alpha_TARGET_ARCH
4241
4242 trivialCode instr x (StInt y)
4243   | fits8Bits y
4244   = getRegister x               `thenNat` \ register ->
4245     getNewRegNat IntRep         `thenNat` \ tmp ->
4246     let
4247         code = registerCode register tmp
4248         src1 = registerName register tmp
4249         src2 = ImmInt (fromInteger y)
4250         code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst)
4251     in
4252     return (Any IntRep code__2)
4253
4254 trivialCode instr x y
4255   = getRegister x               `thenNat` \ register1 ->
4256     getRegister y               `thenNat` \ register2 ->
4257     getNewRegNat IntRep         `thenNat` \ tmp1 ->
4258     getNewRegNat IntRep         `thenNat` \ tmp2 ->
4259     let
4260         code1 = registerCode register1 tmp1 []
4261         src1  = registerName register1 tmp1
4262         code2 = registerCode register2 tmp2 []
4263         src2  = registerName register2 tmp2
4264         code__2 dst = asmSeqThen [code1, code2] .
4265                      mkSeqInstr (instr src1 (RIReg src2) dst)
4266     in
4267     return (Any IntRep code__2)
4268
4269 ------------
4270 trivialUCode instr x
4271   = getRegister x               `thenNat` \ register ->
4272     getNewRegNat IntRep         `thenNat` \ tmp ->
4273     let
4274         code = registerCode register tmp
4275         src  = registerName register tmp
4276         code__2 dst = code . mkSeqInstr (instr (RIReg src) dst)
4277     in
4278     return (Any IntRep code__2)
4279
4280 ------------
4281 trivialFCode _ instr x y
4282   = getRegister x               `thenNat` \ register1 ->
4283     getRegister y               `thenNat` \ register2 ->
4284     getNewRegNat F64    `thenNat` \ tmp1 ->
4285     getNewRegNat F64    `thenNat` \ tmp2 ->
4286     let
4287         code1 = registerCode register1 tmp1
4288         src1  = registerName register1 tmp1
4289
4290         code2 = registerCode register2 tmp2
4291         src2  = registerName register2 tmp2
4292
4293         code__2 dst = asmSeqThen [code1 [], code2 []] .
4294                       mkSeqInstr (instr src1 src2 dst)
4295     in
4296     return (Any F64 code__2)
4297
4298 trivialUFCode _ instr x
4299   = getRegister x               `thenNat` \ register ->
4300     getNewRegNat F64    `thenNat` \ tmp ->
4301     let
4302         code = registerCode register tmp
4303         src  = registerName register tmp
4304         code__2 dst = code . mkSeqInstr (instr src dst)
4305     in
4306     return (Any F64 code__2)
4307
4308 #endif /* alpha_TARGET_ARCH */
4309
4310 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4311
4312 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4313
4314 {-
4315 The Rules of the Game are:
4316
4317 * You cannot assume anything about the destination register dst;
4318   it may be anything, including a fixed reg.
4319
4320 * You may compute an operand into a fixed reg, but you may not
4321   subsequently change the contents of that fixed reg.  If you
4322   want to do so, first copy the value either to a temporary
4323   or into dst.  You are free to modify dst even if it happens
4324   to be a fixed reg -- that's not your problem.
4325
4326 * You cannot assume that a fixed reg will stay live over an
4327   arbitrary computation.  The same applies to the dst reg.
4328
4329 * Temporary regs obtained from getNewRegNat are distinct from
4330   each other and from all other regs, and stay live over
4331   arbitrary computations.
4332
4333 --------------------
4334
4335 SDM's version of The Rules:
4336
4337 * If getRegister returns Any, that means it can generate correct
4338   code which places the result in any register, period.  Even if that
4339   register happens to be read during the computation.
4340
4341   Corollary #1: this means that if you are generating code for an
4342   operation with two arbitrary operands, you cannot assign the result
4343   of the first operand into the destination register before computing
4344   the second operand.  The second operand might require the old value
4345   of the destination register.
4346
4347   Corollary #2: A function might be able to generate more efficient
4348   code if it knows the destination register is a new temporary (and
4349   therefore not read by any of the sub-computations).
4350
4351 * If getRegister returns Any, then the code it generates may modify only:
4352         (a) fresh temporaries
4353         (b) the destination register
4354         (c) known registers (eg. %ecx is used by shifts)
4355   In particular, it may *not* modify global registers, unless the global
4356   register happens to be the destination register.
4357 -}
4358
4359 trivialCode rep instr (Just revinstr) (CmmLit lit_a) b
4360   | not (is64BitLit lit_a) = do
4361   b_code <- getAnyReg b
4362   let
4363        code dst
4364          = b_code dst `snocOL`
4365            revinstr (OpImm (litToImm lit_a)) (OpReg dst)
4366   -- in
4367   return (Any rep code)
4368
4369 trivialCode rep instr maybe_revinstr a b = genTrivialCode rep instr a b
4370
4371 -- This is re-used for floating pt instructions too.
4372 genTrivialCode rep instr a b = do
4373   (b_op, b_code) <- getNonClobberedOperand b
4374   a_code <- getAnyReg a
4375   tmp <- getNewRegNat rep
4376   let
4377      -- We want the value of b to stay alive across the computation of a.
4378      -- But, we want to calculate a straight into the destination register,
4379      -- because the instruction only has two operands (dst := dst `op` src).
4380      -- The troublesome case is when the result of b is in the same register
4381      -- as the destination reg.  In this case, we have to save b in a
4382      -- new temporary across the computation of a.
4383      code dst
4384         | dst `regClashesWithOp` b_op =
4385                 b_code `appOL`
4386                 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
4387                 a_code dst `snocOL`
4388                 instr (OpReg tmp) (OpReg dst)
4389         | otherwise =
4390                 b_code `appOL`
4391                 a_code dst `snocOL`
4392                 instr b_op (OpReg dst)
4393   -- in
4394   return (Any rep code)
4395
4396 reg `regClashesWithOp` OpReg reg2   = reg == reg2
4397 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
4398 reg `regClashesWithOp` _            = False
4399
4400 -----------
4401
4402 trivialUCode rep instr x = do
4403   x_code <- getAnyReg x
4404   let
4405      code dst =
4406         x_code dst `snocOL`
4407         instr (OpReg dst)
4408   -- in
4409   return (Any rep code)
4410
4411 -----------
4412
4413 #if i386_TARGET_ARCH
4414
4415 trivialFCode pk instr x y = do
4416   (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
4417   (y_reg, y_code) <- getSomeReg y
4418   let
4419      code dst =
4420         x_code `appOL`
4421         y_code `snocOL`
4422         instr pk x_reg y_reg dst
4423   -- in
4424   return (Any pk code)
4425
4426 #endif
4427
4428 #if x86_64_TARGET_ARCH
4429
4430 trivialFCode pk instr x y = genTrivialCode  pk (instr pk) x y
4431
4432 #endif
4433
4434 -------------
4435
4436 trivialUFCode rep instr x = do
4437   (x_reg, x_code) <- getSomeReg x
4438   let
4439      code dst =
4440         x_code `snocOL`
4441         instr x_reg dst
4442   -- in
4443   return (Any rep code)
4444
4445 #endif /* i386_TARGET_ARCH */
4446
4447 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4448
4449 #if sparc_TARGET_ARCH
4450
4451 trivialCode pk instr x (CmmLit (CmmInt y d))
4452   | fits13Bits y
4453   = do
4454       (src1, code) <- getSomeReg x
4455       tmp <- getNewRegNat I32
4456       let
4457         src2 = ImmInt (fromInteger y)
4458         code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
4459       return (Any I32 code__2)
4460
4461 trivialCode pk instr x y = do
4462     (src1, code1) <- getSomeReg x
4463     (src2, code2) <- getSomeReg y
4464     tmp1 <- getNewRegNat I32
4465     tmp2 <- getNewRegNat I32
4466     let
4467         code__2 dst = code1 `appOL` code2 `snocOL`
4468                       instr src1 (RIReg src2) dst
4469     return (Any I32 code__2)
4470
4471 ------------
4472 trivialFCode pk instr x y = do
4473     (src1, code1) <- getSomeReg x
4474     (src2, code2) <- getSomeReg y
4475     tmp1 <- getNewRegNat (cmmExprRep x)
4476     tmp2 <- getNewRegNat (cmmExprRep y)
4477     tmp <- getNewRegNat F64
4478     let
4479         promote x = FxTOy F32 F64 x tmp
4480
4481         pk1   = cmmExprRep x
4482         pk2   = cmmExprRep y
4483
4484         code__2 dst =
4485                 if pk1 == pk2 then
4486                     code1 `appOL` code2 `snocOL`
4487                     instr pk src1 src2 dst
4488                 else if pk1 == F32 then
4489                     code1 `snocOL` promote src1 `appOL` code2 `snocOL`
4490                     instr F64 tmp src2 dst
4491                 else
4492                     code1 `appOL` code2 `snocOL` promote src2 `snocOL`
4493                     instr F64 src1 tmp dst
4494     return (Any (if pk1 == pk2 then pk1 else F64) code__2)
4495
4496 ------------
4497 trivialUCode pk instr x = do
4498     (src, code) <- getSomeReg x
4499     tmp <- getNewRegNat pk
4500     let
4501         code__2 dst = code `snocOL` instr (RIReg src) dst
4502     return (Any pk code__2)
4503
4504 -------------
4505 trivialUFCode pk instr x = do
4506     (src, code) <- getSomeReg x
4507     tmp <- getNewRegNat pk
4508     let
4509         code__2 dst = code `snocOL` instr src dst
4510     return (Any pk code__2)
4511
4512 #endif /* sparc_TARGET_ARCH */
4513
4514 #if powerpc_TARGET_ARCH
4515
4516 {-
4517 Wolfgang's PowerPC version of The Rules:
4518
4519 A slightly modified version of The Rules to take advantage of the fact
4520 that PowerPC instructions work on all registers and don't implicitly
4521 clobber any fixed registers.
4522
4523 * The only expression for which getRegister returns Fixed is (CmmReg reg).
4524
4525 * If getRegister returns Any, then the code it generates may modify only:
4526         (a) fresh temporaries
4527         (b) the destination register
4528   It may *not* modify global registers, unless the global
4529   register happens to be the destination register.
4530   It may not clobber any other registers. In fact, only ccalls clobber any
4531   fixed registers.
4532   Also, it may not modify the counter register (used by genCCall).
4533
4534   Corollary: If a getRegister for a subexpression returns Fixed, you need
4535   not move it to a fresh temporary before evaluating the next subexpression.
4536   The Fixed register won't be modified.
4537   Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
4538
4539 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
4540   the value of the destination register.
4541 -}
4542
4543 trivialCode rep signed instr x (CmmLit (CmmInt y _))
4544     | Just imm <- makeImmediate rep signed y
4545     = do
4546         (src1, code1) <- getSomeReg x
4547         let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
4548         return (Any rep code)
4549
4550 trivialCode rep signed instr x y = do
4551     (src1, code1) <- getSomeReg x
4552     (src2, code2) <- getSomeReg y
4553     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
4554     return (Any rep code)
4555
4556 trivialCodeNoImm :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4557     -> CmmExpr -> CmmExpr -> NatM Register
4558 trivialCodeNoImm rep instr x y = do
4559     (src1, code1) <- getSomeReg x
4560     (src2, code2) <- getSomeReg y
4561     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
4562     return (Any rep code)
4563
4564 trivialUCode rep instr x = do
4565     (src, code) <- getSomeReg x
4566     let code' dst = code `snocOL` instr dst src
4567     return (Any rep code')
4568
4569 -- There is no "remainder" instruction on the PPC, so we have to do
4570 -- it the hard way.
4571 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
4572
4573 remainderCode :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4574     -> CmmExpr -> CmmExpr -> NatM Register
4575 remainderCode rep div x y = do
4576     (src1, code1) <- getSomeReg x
4577     (src2, code2) <- getSomeReg y
4578     let code dst = code1 `appOL` code2 `appOL` toOL [
4579                 div dst src1 src2,
4580                 MULLW dst dst (RIReg src2),
4581                 SUBF dst dst src1
4582             ]
4583     return (Any rep code)
4584
4585 #endif /* powerpc_TARGET_ARCH */
4586
4587
4588 -- -----------------------------------------------------------------------------
4589 --  Coercing to/from integer/floating-point...
4590
4591 -- When going to integer, we truncate (round towards 0).
4592
4593 -- @coerce(Int2FP|FP2Int)@ are more complicated integer/float
4594 -- conversions.  We have to store temporaries in memory to move
4595 -- between the integer and the floating point register sets.
4596
4597 -- @coerceDbl2Flt@ and @coerceFlt2Dbl@ are done this way because we
4598 -- pretend, on sparc at least, that double and float regs are seperate
4599 -- kinds, so the value has to be computed into one kind before being
4600 -- explicitly "converted" to live in the other kind.
4601
4602 coerceInt2FP :: MachRep -> MachRep -> CmmExpr -> NatM Register
4603 coerceFP2Int :: MachRep -> MachRep -> CmmExpr -> NatM Register
4604
4605 #if sparc_TARGET_ARCH
4606 coerceDbl2Flt :: CmmExpr -> NatM Register
4607 coerceFlt2Dbl :: CmmExpr -> NatM Register
4608 #endif
4609
4610 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4611
4612 #if alpha_TARGET_ARCH
4613
4614 coerceInt2FP _ x
4615   = getRegister x               `thenNat` \ register ->
4616     getNewRegNat IntRep         `thenNat` \ reg ->
4617     let
4618         code = registerCode register reg
4619         src  = registerName register reg
4620
4621         code__2 dst = code . mkSeqInstrs [
4622             ST Q src (spRel 0),
4623             LD TF dst (spRel 0),
4624             CVTxy Q TF dst dst]
4625     in
4626     return (Any F64 code__2)
4627
4628 -------------
4629 coerceFP2Int x
4630   = getRegister x               `thenNat` \ register ->
4631     getNewRegNat F64    `thenNat` \ tmp ->
4632     let
4633         code = registerCode register tmp
4634         src  = registerName register tmp
4635
4636         code__2 dst = code . mkSeqInstrs [
4637             CVTxy TF Q src tmp,
4638             ST TF tmp (spRel 0),
4639             LD Q dst (spRel 0)]
4640     in
4641     return (Any IntRep code__2)
4642
4643 #endif /* alpha_TARGET_ARCH */
4644
4645 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4646
4647 #if i386_TARGET_ARCH
4648
4649 coerceInt2FP from to x = do
4650   (x_reg, x_code) <- getSomeReg x
4651   let
4652         opc  = case to of F32 -> GITOF; F64 -> GITOD
4653         code dst = x_code `snocOL` opc x_reg dst
4654         -- ToDo: works for non-I32 reps?
4655   -- in
4656   return (Any to code)
4657
4658 ------------
4659
4660 coerceFP2Int from to x = do
4661   (x_reg, x_code) <- getSomeReg x
4662   let
4663         opc  = case from of F32 -> GFTOI; F64 -> GDTOI
4664         code dst = x_code `snocOL` opc x_reg dst
4665         -- ToDo: works for non-I32 reps?
4666   -- in
4667   return (Any to code)
4668
4669 #endif /* i386_TARGET_ARCH */
4670
4671 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4672
4673 #if x86_64_TARGET_ARCH
4674
4675 coerceFP2Int from to x = do
4676   (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
4677   let
4678         opc  = case from of F32 -> CVTTSS2SIQ; F64 -> CVTTSD2SIQ
4679         code dst = x_code `snocOL` opc x_op dst
4680   -- in
4681   return (Any to code) -- works even if the destination rep is <I32
4682
4683 coerceInt2FP from to x = do
4684   (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
4685   let
4686         opc  = case to of F32 -> CVTSI2SS; F64 -> CVTSI2SD
4687         code dst = x_code `snocOL` opc x_op dst
4688   -- in
4689   return (Any to code) -- works even if the destination rep is <I32
4690
4691 coerceFP2FP :: MachRep -> CmmExpr -> NatM Register
4692 coerceFP2FP to x = do
4693   (x_reg, x_code) <- getSomeReg x
4694   let
4695         opc  = case to of F32 -> CVTSD2SS; F64 -> CVTSS2SD
4696         code dst = x_code `snocOL` opc x_reg dst
4697   -- in
4698   return (Any to code)
4699
4700 #endif
4701
4702 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4703
4704 #if sparc_TARGET_ARCH
4705
4706 coerceInt2FP pk1 pk2 x = do
4707     (src, code) <- getSomeReg x
4708     let
4709         code__2 dst = code `appOL` toOL [
4710             ST pk1 src (spRel (-2)),
4711             LD pk1 (spRel (-2)) dst,
4712             FxTOy pk1 pk2 dst dst]
4713     return (Any pk2 code__2)
4714
4715 ------------
4716 coerceFP2Int pk fprep x = do
4717     (src, code) <- getSomeReg x
4718     reg <- getNewRegNat fprep
4719     tmp <- getNewRegNat pk
4720     let
4721         code__2 dst = ASSERT(fprep == F64 || fprep == F32)
4722             code `appOL` toOL [
4723             FxTOy fprep pk src tmp,
4724             ST pk tmp (spRel (-2)),
4725             LD pk (spRel (-2)) dst]
4726     return (Any pk code__2)
4727
4728 ------------
4729 coerceDbl2Flt x = do
4730     (src, code) <- getSomeReg x
4731     return (Any F32 (\dst -> code `snocOL` FxTOy F64 F32 src dst))
4732
4733 ------------
4734 coerceFlt2Dbl x = do
4735     (src, code) <- getSomeReg x
4736     return (Any F64 (\dst -> code `snocOL` FxTOy F32 F64 src dst))
4737
4738 #endif /* sparc_TARGET_ARCH */
4739
4740 #if powerpc_TARGET_ARCH
4741 coerceInt2FP fromRep toRep x = do
4742     (src, code) <- getSomeReg x
4743     lbl <- getNewLabelNat
4744     itmp <- getNewRegNat I32
4745     ftmp <- getNewRegNat F64
4746     dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
4747     Amode addr addr_code <- getAmode dynRef
4748     let
4749         code' dst = code `appOL` maybe_exts `appOL` toOL [
4750                 LDATA ReadOnlyData
4751                                 [CmmDataLabel lbl,
4752                                  CmmStaticLit (CmmInt 0x43300000 I32),
4753                                  CmmStaticLit (CmmInt 0x80000000 I32)],
4754                 XORIS itmp src (ImmInt 0x8000),
4755                 ST I32 itmp (spRel 3),
4756                 LIS itmp (ImmInt 0x4330),
4757                 ST I32 itmp (spRel 2),
4758                 LD F64 ftmp (spRel 2)
4759             ] `appOL` addr_code `appOL` toOL [
4760                 LD F64 dst addr,
4761                 FSUB F64 dst ftmp dst
4762             ] `appOL` maybe_frsp dst
4763
4764         maybe_exts = case fromRep of
4765                         I8 ->  unitOL $ EXTS I8 src src
4766                         I16 -> unitOL $ EXTS I16 src src
4767                         I32 -> nilOL
4768         maybe_frsp dst = case toRep of
4769                         F32 -> unitOL $ FRSP dst dst
4770                         F64 -> nilOL
4771     return (Any toRep code')
4772
4773 coerceFP2Int fromRep toRep x = do
4774     -- the reps don't really matter: F*->F64 and I32->I* are no-ops
4775     (src, code) <- getSomeReg x
4776     tmp <- getNewRegNat F64
4777     let
4778         code' dst = code `appOL` toOL [
4779                 -- convert to int in FP reg
4780             FCTIWZ tmp src,
4781                 -- store value (64bit) from FP to stack
4782             ST F64 tmp (spRel 2),
4783                 -- read low word of value (high word is undefined)
4784             LD I32 dst (spRel 3)]
4785     return (Any toRep code')
4786 #endif /* powerpc_TARGET_ARCH */
4787
4788
4789 -- -----------------------------------------------------------------------------
4790 -- eXTRA_STK_ARGS_HERE
4791
4792 -- We (allegedly) put the first six C-call arguments in registers;
4793 -- where do we start putting the rest of them?
4794
4795 -- Moved from MachInstrs (SDM):
4796
4797 #if alpha_TARGET_ARCH || sparc_TARGET_ARCH
4798 eXTRA_STK_ARGS_HERE :: Int
4799 eXTRA_STK_ARGS_HERE
4800   = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???))
4801 #endif
4802