compiler/nativeGen/MachCodeGen.hs

   1 {-# OPTIONS -w #-}
   2 -- The above warning supression flag is a temporary kludge.
   3 -- While working on this module you are encouraged to remove it and fix
   4 -- any warnings in the module. See
   5 --     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
   6 -- for details
   7
   8 -----------------------------------------------------------------------------
   9 --
  10 -- Generating machine code (instruction selection)
  11 --
  12 -- (c) The University of Glasgow 1996-2004
  13 --
  14 -----------------------------------------------------------------------------
  15
  16 -- This is a big module, but, if you pay attention to
  17 -- (a) the sectioning, (b) the type signatures, and
  18 -- (c) the #if blah_TARGET_ARCH} things, the
  19 -- structure should not be too overwhelming.
  20
  21 module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where
  22
  23 #include "HsVersions.h"
  24 #include "nativeGen/NCG.h"
  25 #include "MachDeps.h"
  26
  27 -- NCG stuff:
  28 import MachInstrs
  29 import MachRegs
  30 import NCGMonad
  31 import PositionIndependentCode
  32 import RegAllocInfo ( mkBranchInstr )
  33
  34 -- Our intermediate code:
  35 import PprCmm           ( pprExpr )
  36 import Cmm
  37 import MachOp
  38 import CLabel
  39 import ClosureInfo      ( C_SRT(..) )
  40
  41 -- The rest:
  42 import StaticFlags      ( opt_PIC )
  43 import ForeignCall      ( CCallConv(..) )
  44 import OrdList
  45 import Pretty
  46 import Outputable
  47 import FastString
  48 import FastBool         ( isFastTrue )
  49 import Constants        ( wORD_SIZE )
  50
  51 import Debug.Trace      ( trace )
  52
  53 import Control.Monad    ( mapAndUnzipM )
  54 import Data.Maybe       ( fromJust )
  55 import Data.Bits
  56 import Data.Word
  57 import Data.Int
  58
  59 -- -----------------------------------------------------------------------------
  60 -- Top-level of the instruction selector
  61
  62 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
  63 -- They are really trees of insns to facilitate fast appending, where a
  64 -- left-to-right traversal (pre-order?) yields the insns in the correct
  65 -- order.
  66
  67 type InstrBlock = OrdList Instr
  68
  69 cmmTopCodeGen :: RawCmmTop -> NatM [NatCmmTop]
  70 cmmTopCodeGen (CmmProc info lab params (ListGraph blocks)) = do
  71   (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
  72   picBaseMb <- getPicBaseMaybeNat
  73   let proc = CmmProc info lab params (ListGraph $ concat nat_blocks)
  74       tops = proc : concat statics
  75   case picBaseMb of
  76       Just picBase -> initializePicBase picBase tops
  77       Nothing -> return tops
  78
  79 cmmTopCodeGen (CmmData sec dat) = do
  80   return [CmmData sec dat]  -- no translation, we just use CmmStatic
  81
  82 basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop])
  83 basicBlockCodeGen (BasicBlock id stmts) = do
  84   instrs <- stmtsToInstrs stmts
  85   -- code generation may introduce new basic block boundaries, which
  86   -- are indicated by the NEWBLOCK instruction.  We must split up the
  87   -- instruction stream into basic blocks again.  Also, we extract
  88   -- LDATAs here too.
  89   let
  90         (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
  91
  92         mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
  93           = ([], BasicBlock id instrs : blocks, statics)
  94         mkBlocks (LDATA sec dat) (instrs,blocks,statics)
  95           = (instrs, blocks, CmmData sec dat:statics)
  96         mkBlocks instr (instrs,blocks,statics)
  97           = (instr:instrs, blocks, statics)
  98   -- in
  99   return (BasicBlock id top : other_blocks, statics)
 100
 101 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
 102 stmtsToInstrs stmts
 103    = do instrss <- mapM stmtToInstrs stmts
 104         return (concatOL instrss)
 105
 106 stmtToInstrs :: CmmStmt -> NatM InstrBlock
 107 stmtToInstrs stmt = case stmt of
 108     CmmNop         -> return nilOL
 109     CmmComment s   -> return (unitOL (COMMENT s))
 110
 111     CmmAssign reg src
 112       | isFloatingRep kind -> assignReg_FltCode kind reg src
 113 #if WORD_SIZE_IN_BITS==32
 114       | kind == I64        -> assignReg_I64Code      reg src
 115 #endif
 116       | otherwise          -> assignReg_IntCode kind reg src
 117         where kind = cmmRegRep reg
 118
 119     CmmStore addr src
 120       | isFloatingRep kind -> assignMem_FltCode kind addr src
 121 #if WORD_SIZE_IN_BITS==32
 122       | kind == I64      -> assignMem_I64Code      addr src
 123 #endif
 124       | otherwise        -> assignMem_IntCode kind addr src
 125         where kind = cmmExprRep src
 126
 127     CmmCall target result_regs args _ _
 128        -> genCCall target result_regs args
 129
 130     CmmBranch id          -> genBranch id
 131     CmmCondBranch arg id  -> genCondJump id arg
 132     CmmSwitch arg ids     -> genSwitch arg ids
 133     CmmJump arg params    -> genJump arg
 134
 135 -- -----------------------------------------------------------------------------
 136 -- General things for putting together code sequences
 137
 138 -- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
 139 -- CmmExprs into CmmRegOff?
 140 mangleIndexTree :: CmmExpr -> CmmExpr
 141 mangleIndexTree (CmmRegOff reg off)
 142   = CmmMachOp (MO_Add rep) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) rep)]
 143   where rep = cmmRegRep reg
 144
 145 -- -----------------------------------------------------------------------------
 146 --  Code gen for 64-bit arithmetic on 32-bit platforms
 147
 148 {-
 149 Simple support for generating 64-bit code (ie, 64 bit values and 64
 150 bit assignments) on 32-bit platforms.  Unlike the main code generator
 151 we merely shoot for generating working code as simply as possible, and
 152 pay little attention to code quality.  Specifically, there is no
 153 attempt to deal cleverly with the fixed-vs-floating register
 154 distinction; all values are generated into (pairs of) floating
 155 registers, even if this would mean some redundant reg-reg moves as a
 156 result.  Only one of the VRegUniques is returned, since it will be
 157 of the VRegUniqueLo form, and the upper-half VReg can be determined
 158 by applying getHiVRegFromLo to it.
 159 -}
 160
 161 data ChildCode64        -- a.k.a "Register64"
 162    = ChildCode64
 163         InstrBlock      -- code
 164         Reg             -- the lower 32-bit temporary which contains the
 165                         -- result; use getHiVRegFromLo to find the other
 166                         -- VRegUnique.  Rules of this simplified insn
 167                         -- selection game are therefore that the returned
 168                         -- Reg may be modified
 169
 170 #if WORD_SIZE_IN_BITS==32
 171 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
 172 assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
 173 #endif
 174
 175 #ifndef x86_64_TARGET_ARCH
 176 iselExpr64        :: CmmExpr -> NatM ChildCode64
 177 #endif
 178
 179 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 180
 181 #if i386_TARGET_ARCH
 182
 183 assignMem_I64Code addrTree valueTree = do
 184   Amode addr addr_code <- getAmode addrTree
 185   ChildCode64 vcode rlo <- iselExpr64 valueTree
 186   let
 187         rhi = getHiVRegFromLo rlo
 188
 189         -- Little-endian store
 190         mov_lo = MOV I32 (OpReg rlo) (OpAddr addr)
 191         mov_hi = MOV I32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
 192   -- in
 193   return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 194
 195
 196 assignReg_I64Code (CmmLocal (LocalReg u_dst pk _)) valueTree = do
 197    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 198    let
 199          r_dst_lo = mkVReg u_dst I32
 200          r_dst_hi = getHiVRegFromLo r_dst_lo
 201          r_src_hi = getHiVRegFromLo r_src_lo
 202          mov_lo = MOV I32 (OpReg r_src_lo) (OpReg r_dst_lo)
 203          mov_hi = MOV I32 (OpReg r_src_hi) (OpReg r_dst_hi)
 204    -- in
 205    return (
 206         vcode `snocOL` mov_lo `snocOL` mov_hi
 207      )
 208
 209 assignReg_I64Code lvalue valueTree
 210    = panic "assignReg_I64Code(i386): invalid lvalue"
 211
 212 ------------
 213
 214 iselExpr64 (CmmLit (CmmInt i _)) = do
 215   (rlo,rhi) <- getNewRegPairNat I32
 216   let
 217         r = fromIntegral (fromIntegral i :: Word32)
 218         q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
 219         code = toOL [
 220                 MOV I32 (OpImm (ImmInteger r)) (OpReg rlo),
 221                 MOV I32 (OpImm (ImmInteger q)) (OpReg rhi)
 222                 ]
 223   -- in
 224   return (ChildCode64 code rlo)
 225
 226 iselExpr64 (CmmLoad addrTree I64) = do
 227    Amode addr addr_code <- getAmode addrTree
 228    (rlo,rhi) <- getNewRegPairNat I32
 229    let
 230         mov_lo = MOV I32 (OpAddr addr) (OpReg rlo)
 231         mov_hi = MOV I32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
 232    -- in
 233    return (
 234             ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
 235                         rlo
 236      )
 237
 238 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64 _)))
 239    = return (ChildCode64 nilOL (mkVReg vu I32))
 240
 241 -- we handle addition, but rather badly
 242 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
 243    ChildCode64 code1 r1lo <- iselExpr64 e1
 244    (rlo,rhi) <- getNewRegPairNat I32
 245    let
 246         r = fromIntegral (fromIntegral i :: Word32)
 247         q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
 248         r1hi = getHiVRegFromLo r1lo
 249         code =  code1 `appOL`
 250                 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
 251                        ADD I32 (OpImm (ImmInteger r)) (OpReg rlo),
 252                        MOV I32 (OpReg r1hi) (OpReg rhi),
 253                        ADC I32 (OpImm (ImmInteger q)) (OpReg rhi) ]
 254    -- in
 255    return (ChildCode64 code rlo)
 256
 257 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
 258    ChildCode64 code1 r1lo <- iselExpr64 e1
 259    ChildCode64 code2 r2lo <- iselExpr64 e2
 260    (rlo,rhi) <- getNewRegPairNat I32
 261    let
 262         r1hi = getHiVRegFromLo r1lo
 263         r2hi = getHiVRegFromLo r2lo
 264         code =  code1 `appOL`
 265                 code2 `appOL`
 266                 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
 267                        ADD I32 (OpReg r2lo) (OpReg rlo),
 268                        MOV I32 (OpReg r1hi) (OpReg rhi),
 269                        ADC I32 (OpReg r2hi) (OpReg rhi) ]
 270    -- in
 271    return (ChildCode64 code rlo)
 272
 273 iselExpr64 (CmmMachOp (MO_U_Conv _ I64) [expr]) = do
 274      fn <- getAnyReg expr
 275      r_dst_lo <-  getNewRegNat I32
 276      let r_dst_hi = getHiVRegFromLo r_dst_lo
 277          code = fn r_dst_lo
 278      return (
 279              ChildCode64 (code `snocOL`
 280                           MOV I32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
 281                           r_dst_lo
 282             )
 283
 284 iselExpr64 expr
 285    = pprPanic "iselExpr64(i386)" (ppr expr)
 286
 287 #endif /* i386_TARGET_ARCH */
 288
 289 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 290
 291 #if sparc_TARGET_ARCH
 292
 293 assignMem_I64Code addrTree valueTree = do
 294      Amode addr addr_code <- getAmode addrTree
 295      ChildCode64 vcode rlo <- iselExpr64 valueTree
 296      (src, code) <- getSomeReg addrTree
 297      let
 298          rhi = getHiVRegFromLo rlo
 299          -- Big-endian store
 300          mov_hi = ST I32 rhi (AddrRegImm src (ImmInt 0))
 301          mov_lo = ST I32 rlo (AddrRegImm src (ImmInt 4))
 302      return (vcode `appOL` code `snocOL` mov_hi `snocOL` mov_lo)
 303
 304 assignReg_I64Code (CmmLocal (LocalReg u_dst pk _)) valueTree = do
 305      ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 306      let
 307          r_dst_lo = mkVReg u_dst pk
 308          r_dst_hi = getHiVRegFromLo r_dst_lo
 309          r_src_hi = getHiVRegFromLo r_src_lo
 310          mov_lo = mkMOV r_src_lo r_dst_lo
 311          mov_hi = mkMOV r_src_hi r_dst_hi
 312          mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
 313      return (vcode `snocOL` mov_hi `snocOL` mov_lo)
 314 assignReg_I64Code lvalue valueTree
 315    = panic "assignReg_I64Code(sparc): invalid lvalue"
 316
 317
 318 -- Don't delete this -- it's very handy for debugging.
 319 --iselExpr64 expr
 320 --   | trace ("iselExpr64: " ++ showSDoc (ppr expr)) False
 321 --   = panic "iselExpr64(???)"
 322
 323 iselExpr64 (CmmLoad addrTree I64) = do
 324      Amode (AddrRegReg r1 r2) addr_code <- getAmode addrTree
 325      rlo <- getNewRegNat I32
 326      let rhi = getHiVRegFromLo rlo
 327          mov_hi = LD I32 (AddrRegImm r1 (ImmInt 0)) rhi
 328          mov_lo = LD I32 (AddrRegImm r1 (ImmInt 4)) rlo
 329      return (
 330             ChildCode64 (addr_code `snocOL` mov_hi `snocOL` mov_lo)
 331                          rlo
 332           )
 333
 334 iselExpr64 (CmmReg (CmmLocal (LocalReg uq I64 _))) = do
 335      r_dst_lo <-  getNewRegNat I32
 336      let r_dst_hi = getHiVRegFromLo r_dst_lo
 337          r_src_lo = mkVReg uq I32
 338          r_src_hi = getHiVRegFromLo r_src_lo
 339          mov_lo = mkMOV r_src_lo r_dst_lo
 340          mov_hi = mkMOV r_src_hi r_dst_hi
 341          mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
 342      return (
 343             ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo
 344          )
 345
 346 iselExpr64 expr
 347    = pprPanic "iselExpr64(sparc)" (ppr expr)
 348
 349 #endif /* sparc_TARGET_ARCH */
 350
 351 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 352
 353 #if powerpc_TARGET_ARCH
 354
 355 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
 356 getI64Amodes addrTree = do
 357     Amode hi_addr addr_code <- getAmode addrTree
 358     case addrOffset hi_addr 4 of
 359         Just lo_addr -> return (hi_addr, lo_addr, addr_code)
 360         Nothing      -> do (hi_ptr, code) <- getSomeReg addrTree
 361                            return (AddrRegImm hi_ptr (ImmInt 0),
 362                                    AddrRegImm hi_ptr (ImmInt 4),
 363                                    code)
 364
 365 assignMem_I64Code addrTree valueTree = do
 366         (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
 367         ChildCode64 vcode rlo <- iselExpr64 valueTree
 368         let
 369                 rhi = getHiVRegFromLo rlo
 370
 371                 -- Big-endian store
 372                 mov_hi = ST I32 rhi hi_addr
 373                 mov_lo = ST I32 rlo lo_addr
 374         -- in
 375         return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 376
 377 assignReg_I64Code (CmmLocal (LocalReg u_dst pk _)) valueTree = do
 378    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 379    let
 380          r_dst_lo = mkVReg u_dst I32
 381          r_dst_hi = getHiVRegFromLo r_dst_lo
 382          r_src_hi = getHiVRegFromLo r_src_lo
 383          mov_lo = MR r_dst_lo r_src_lo
 384          mov_hi = MR r_dst_hi r_src_hi
 385    -- in
 386    return (
 387         vcode `snocOL` mov_lo `snocOL` mov_hi
 388      )
 389
 390 assignReg_I64Code lvalue valueTree
 391    = panic "assignReg_I64Code(powerpc): invalid lvalue"
 392
 393
 394 -- Don't delete this -- it's very handy for debugging.
 395 --iselExpr64 expr
 396 --   | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
 397 --   = panic "iselExpr64(???)"
 398
 399 iselExpr64 (CmmLoad addrTree I64) = do
 400     (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
 401     (rlo, rhi) <- getNewRegPairNat I32
 402     let mov_hi = LD I32 rhi hi_addr
 403         mov_lo = LD I32 rlo lo_addr
 404     return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
 405                          rlo
 406
 407 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64 _)))
 408    = return (ChildCode64 nilOL (mkVReg vu I32))
 409
 410 iselExpr64 (CmmLit (CmmInt i _)) = do
 411   (rlo,rhi) <- getNewRegPairNat I32
 412   let
 413         half0 = fromIntegral (fromIntegral i :: Word16)
 414         half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
 415         half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
 416         half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
 417
 418         code = toOL [
 419                 LIS rlo (ImmInt half1),
 420                 OR rlo rlo (RIImm $ ImmInt half0),
 421                 LIS rhi (ImmInt half3),
 422                 OR rlo rlo (RIImm $ ImmInt half2)
 423                 ]
 424   -- in
 425   return (ChildCode64 code rlo)
 426
 427 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
 428    ChildCode64 code1 r1lo <- iselExpr64 e1
 429    ChildCode64 code2 r2lo <- iselExpr64 e2
 430    (rlo,rhi) <- getNewRegPairNat I32
 431    let
 432         r1hi = getHiVRegFromLo r1lo
 433         r2hi = getHiVRegFromLo r2lo
 434         code =  code1 `appOL`
 435                 code2 `appOL`
 436                 toOL [ ADDC rlo r1lo r2lo,
 437                        ADDE rhi r1hi r2hi ]
 438    -- in
 439    return (ChildCode64 code rlo)
 440
 441 iselExpr64 (CmmMachOp (MO_U_Conv I32 I64) [expr]) = do
 442     (expr_reg,expr_code) <- getSomeReg expr
 443     (rlo, rhi) <- getNewRegPairNat I32
 444     let mov_hi = LI rhi (ImmInt 0)
 445         mov_lo = MR rlo expr_reg
 446     return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
 447                          rlo
 448 iselExpr64 expr
 449    = pprPanic "iselExpr64(powerpc)" (ppr expr)
 450
 451 #endif /* powerpc_TARGET_ARCH */
 452
 453
 454 -- -----------------------------------------------------------------------------
 455 -- The 'Register' type
 456
 457 -- 'Register's passed up the tree.  If the stix code forces the register
 458 -- to live in a pre-decided machine register, it comes out as @Fixed@;
 459 -- otherwise, it comes out as @Any@, and the parent can decide which
 460 -- register to put it in.
 461
 462 data Register
 463   = Fixed   MachRep Reg InstrBlock
 464   | Any     MachRep (Reg -> InstrBlock)
 465
 466 swizzleRegisterRep :: Register -> MachRep -> Register
 467 swizzleRegisterRep (Fixed _ reg code) rep = Fixed rep reg code
 468 swizzleRegisterRep (Any _ codefn)     rep = Any rep codefn
 469
 470
 471 -- -----------------------------------------------------------------------------
 472 -- Utils based on getRegister, below
 473
 474 -- The dual to getAnyReg: compute an expression into a register, but
 475 -- we don't mind which one it is.
 476 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
 477 getSomeReg expr = do
 478   r <- getRegister expr
 479   case r of
 480     Any rep code -> do
 481         tmp <- getNewRegNat rep
 482         return (tmp, code tmp)
 483     Fixed _ reg code ->
 484         return (reg, code)
 485
 486 -- -----------------------------------------------------------------------------
 487 -- Grab the Reg for a CmmReg
 488
 489 getRegisterReg :: CmmReg -> Reg
 490
 491 getRegisterReg (CmmLocal (LocalReg u pk _))
 492   = mkVReg u pk
 493
 494 getRegisterReg (CmmGlobal mid)
 495   = case get_GlobalReg_reg_or_addr mid of
 496        Left (RealReg rrno) -> RealReg rrno
 497        _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
 498           -- By this stage, the only MagicIds remaining should be the
 499           -- ones which map to a real machine register on this
 500           -- platform.  Hence ...
 501
 502
 503 -- -----------------------------------------------------------------------------
 504 -- Generate code to get a subtree into a Register
 505
 506 -- Don't delete this -- it's very handy for debugging.
 507 --getRegister expr
 508 --   | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False
 509 --   = panic "getRegister(???)"
 510
 511 getRegister :: CmmExpr -> NatM Register
 512
 513 #if !x86_64_TARGET_ARCH
 514     -- on x86_64, we have %rip for PicBaseReg, but it's not a full-featured
 515     -- register, it can only be used for rip-relative addressing.
 516 getRegister (CmmReg (CmmGlobal PicBaseReg))
 517   = do
 518       reg <- getPicBaseNat wordRep
 519       return (Fixed wordRep reg nilOL)
 520 #endif
 521
 522 getRegister (CmmReg reg)
 523   = return (Fixed (cmmRegRep reg) (getRegisterReg reg) nilOL)
 524
 525 getRegister tree@(CmmRegOff _ _)
 526   = getRegister (mangleIndexTree tree)
 527
 528
 529 #if WORD_SIZE_IN_BITS==32
 530     -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
 531     -- TO_W_(x), TO_W_(x >> 32)
 532
 533 getRegister (CmmMachOp (MO_U_Conv I64 I32)
 534              [CmmMachOp (MO_U_Shr I64) [x,CmmLit (CmmInt 32 _)]]) = do
 535   ChildCode64 code rlo <- iselExpr64 x
 536   return $ Fixed I32 (getHiVRegFromLo rlo) code
 537
 538 getRegister (CmmMachOp (MO_S_Conv I64 I32)
 539              [CmmMachOp (MO_U_Shr I64) [x,CmmLit (CmmInt 32 _)]]) = do
 540   ChildCode64 code rlo <- iselExpr64 x
 541   return $ Fixed I32 (getHiVRegFromLo rlo) code
 542
 543 getRegister (CmmMachOp (MO_U_Conv I64 I32) [x]) = do
 544   ChildCode64 code rlo <- iselExpr64 x
 545   return $ Fixed I32 rlo code
 546
 547 getRegister (CmmMachOp (MO_S_Conv I64 I32) [x]) = do
 548   ChildCode64 code rlo <- iselExpr64 x
 549   return $ Fixed I32 rlo code
 550
 551 #endif
 552
 553 -- end of machine-"independent" bit; here we go on the rest...
 554
 555 #if alpha_TARGET_ARCH
 556
 557 getRegister (StDouble d)
 558   = getBlockIdNat                   `thenNat` \ lbl ->
 559     getNewRegNat PtrRep             `thenNat` \ tmp ->
 560     let code dst = mkSeqInstrs [
 561             LDATA RoDataSegment lbl [
 562                     DATA TF [ImmLab (rational d)]
 563                 ],
 564             LDA tmp (AddrImm (ImmCLbl lbl)),
 565             LD TF dst (AddrReg tmp)]
 566     in
 567         return (Any F64 code)
 568
 569 getRegister (StPrim primop [x]) -- unary PrimOps
 570   = case primop of
 571       IntNegOp -> trivialUCode (NEG Q False) x
 572
 573       NotOp    -> trivialUCode NOT x
 574
 575       FloatNegOp  -> trivialUFCode FloatRep  (FNEG TF) x
 576       DoubleNegOp -> trivialUFCode F64 (FNEG TF) x
 577
 578       OrdOp -> coerceIntCode IntRep x
 579       ChrOp -> chrCode x
 580
 581       Float2IntOp  -> coerceFP2Int    x
 582       Int2FloatOp  -> coerceInt2FP pr x
 583       Double2IntOp -> coerceFP2Int    x
 584       Int2DoubleOp -> coerceInt2FP pr x
 585
 586       Double2FloatOp -> coerceFltCode x
 587       Float2DoubleOp -> coerceFltCode x
 588
 589       other_op -> getRegister (StCall fn CCallConv F64 [x])
 590         where
 591           fn = case other_op of
 592                  FloatExpOp    -> fsLit "exp"
 593                  FloatLogOp    -> fsLit "log"
 594                  FloatSqrtOp   -> fsLit "sqrt"
 595                  FloatSinOp    -> fsLit "sin"
 596                  FloatCosOp    -> fsLit "cos"
 597                  FloatTanOp    -> fsLit "tan"
 598                  FloatAsinOp   -> fsLit "asin"
 599                  FloatAcosOp   -> fsLit "acos"
 600                  FloatAtanOp   -> fsLit "atan"
 601                  FloatSinhOp   -> fsLit "sinh"
 602                  FloatCoshOp   -> fsLit "cosh"
 603                  FloatTanhOp   -> fsLit "tanh"
 604                  DoubleExpOp   -> fsLit "exp"
 605                  DoubleLogOp   -> fsLit "log"
 606                  DoubleSqrtOp  -> fsLit "sqrt"
 607                  DoubleSinOp   -> fsLit "sin"
 608                  DoubleCosOp   -> fsLit "cos"
 609                  DoubleTanOp   -> fsLit "tan"
 610                  DoubleAsinOp  -> fsLit "asin"
 611                  DoubleAcosOp  -> fsLit "acos"
 612                  DoubleAtanOp  -> fsLit "atan"
 613                  DoubleSinhOp  -> fsLit "sinh"
 614                  DoubleCoshOp  -> fsLit "cosh"
 615                  DoubleTanhOp  -> fsLit "tanh"
 616   where
 617     pr = panic "MachCode.getRegister: no primrep needed for Alpha"
 618
 619 getRegister (StPrim primop [x, y]) -- dyadic PrimOps
 620   = case primop of
 621       CharGtOp -> trivialCode (CMP LTT) y x
 622       CharGeOp -> trivialCode (CMP LE) y x
 623       CharEqOp -> trivialCode (CMP EQQ) x y
 624       CharNeOp -> int_NE_code x y
 625       CharLtOp -> trivialCode (CMP LTT) x y
 626       CharLeOp -> trivialCode (CMP LE) x y
 627
 628       IntGtOp  -> trivialCode (CMP LTT) y x
 629       IntGeOp  -> trivialCode (CMP LE) y x
 630       IntEqOp  -> trivialCode (CMP EQQ) x y
 631       IntNeOp  -> int_NE_code x y
 632       IntLtOp  -> trivialCode (CMP LTT) x y
 633       IntLeOp  -> trivialCode (CMP LE) x y
 634
 635       WordGtOp -> trivialCode (CMP ULT) y x
 636       WordGeOp -> trivialCode (CMP ULE) x y
 637       WordEqOp -> trivialCode (CMP EQQ)  x y
 638       WordNeOp -> int_NE_code x y
 639       WordLtOp -> trivialCode (CMP ULT) x y
 640       WordLeOp -> trivialCode (CMP ULE) x y
 641
 642       AddrGtOp -> trivialCode (CMP ULT) y x
 643       AddrGeOp -> trivialCode (CMP ULE) y x
 644       AddrEqOp -> trivialCode (CMP EQQ)  x y
 645       AddrNeOp -> int_NE_code x y
 646       AddrLtOp -> trivialCode (CMP ULT) x y
 647       AddrLeOp -> trivialCode (CMP ULE) x y
 648
 649       FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
 650       FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
 651       FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
 652       FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
 653       FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
 654       FloatLeOp -> cmpF_code (FCMP TF LE) NE x y
 655
 656       DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
 657       DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
 658       DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
 659       DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
 660       DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
 661       DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y
 662
 663       IntAddOp  -> trivialCode (ADD Q False) x y
 664       IntSubOp  -> trivialCode (SUB Q False) x y
 665       IntMulOp  -> trivialCode (MUL Q False) x y
 666       IntQuotOp -> trivialCode (DIV Q False) x y
 667       IntRemOp  -> trivialCode (REM Q False) x y
 668
 669       WordAddOp  -> trivialCode (ADD Q False) x y
 670       WordSubOp  -> trivialCode (SUB Q False) x y
 671       WordMulOp  -> trivialCode (MUL Q False) x y
 672       WordQuotOp -> trivialCode (DIV Q True) x y
 673       WordRemOp  -> trivialCode (REM Q True) x y
 674
 675       FloatAddOp -> trivialFCode  FloatRep (FADD TF) x y
 676       FloatSubOp -> trivialFCode  FloatRep (FSUB TF) x y
 677       FloatMulOp -> trivialFCode  FloatRep (FMUL TF) x y
 678       FloatDivOp -> trivialFCode  FloatRep (FDIV TF) x y
 679
 680       DoubleAddOp -> trivialFCode  F64 (FADD TF) x y
 681       DoubleSubOp -> trivialFCode  F64 (FSUB TF) x y
 682       DoubleMulOp -> trivialFCode  F64 (FMUL TF) x y
 683       DoubleDivOp -> trivialFCode  F64 (FDIV TF) x y
 684
 685       AddrAddOp  -> trivialCode (ADD Q False) x y
 686       AddrSubOp  -> trivialCode (SUB Q False) x y
 687       AddrRemOp  -> trivialCode (REM Q True) x y
 688
 689       AndOp  -> trivialCode AND x y
 690       OrOp   -> trivialCode OR  x y
 691       XorOp  -> trivialCode XOR x y
 692       SllOp  -> trivialCode SLL x y
 693       SrlOp  -> trivialCode SRL x y
 694
 695       ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
 696       ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
 697       ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"
 698
 699       FloatPowerOp  -> getRegister (StCall (fsLit "pow") CCallConv F64 [x,y])
 700       DoublePowerOp -> getRegister (StCall (fsLit "pow") CCallConv F64 [x,y])
 701   where
 702     {- ------------------------------------------------------------
 703         Some bizarre special code for getting condition codes into
 704         registers.  Integer non-equality is a test for equality
 705         followed by an XOR with 1.  (Integer comparisons always set
 706         the result register to 0 or 1.)  Floating point comparisons of
 707         any kind leave the result in a floating point register, so we
 708         need to wrangle an integer register out of things.
 709     -}
 710     int_NE_code :: StixTree -> StixTree -> NatM Register
 711
 712     int_NE_code x y
 713       = trivialCode (CMP EQQ) x y       `thenNat` \ register ->
 714         getNewRegNat IntRep             `thenNat` \ tmp ->
 715         let
 716             code = registerCode register tmp
 717             src  = registerName register tmp
 718             code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
 719         in
 720         return (Any IntRep code__2)
 721
 722     {- ------------------------------------------------------------
 723         Comments for int_NE_code also apply to cmpF_code
 724     -}
 725     cmpF_code
 726         :: (Reg -> Reg -> Reg -> Instr)
 727         -> Cond
 728         -> StixTree -> StixTree
 729         -> NatM Register
 730
 731     cmpF_code instr cond x y
 732       = trivialFCode pr instr x y       `thenNat` \ register ->
 733         getNewRegNat F64                `thenNat` \ tmp ->
 734         getBlockIdNat                   `thenNat` \ lbl ->
 735         let
 736             code = registerCode register tmp
 737             result  = registerName register tmp
 738
 739             code__2 dst = code . mkSeqInstrs [
 740                 OR zeroh (RIImm (ImmInt 1)) dst,
 741                 BF cond  result (ImmCLbl lbl),
 742                 OR zeroh (RIReg zeroh) dst,
 743                 NEWBLOCK lbl]
 744         in
 745         return (Any IntRep code__2)
 746       where
 747         pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
 748       ------------------------------------------------------------
 749
 750 getRegister (CmmLoad pk mem)
 751   = getAmode mem                    `thenNat` \ amode ->
 752     let
 753         code = amodeCode amode
 754         src   = amodeAddr amode
 755         size = primRepToSize pk
 756         code__2 dst = code . mkSeqInstr (LD size dst src)
 757     in
 758     return (Any pk code__2)
 759
 760 getRegister (StInt i)
 761   | fits8Bits i
 762   = let
 763         code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
 764     in
 765     return (Any IntRep code)
 766   | otherwise
 767   = let
 768         code dst = mkSeqInstr (LDI Q dst src)
 769     in
 770     return (Any IntRep code)
 771   where
 772     src = ImmInt (fromInteger i)
 773
 774 getRegister leaf
 775   | isJust imm
 776   = let
 777         code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
 778     in
 779     return (Any PtrRep code)
 780   where
 781     imm = maybeImm leaf
 782     imm__2 = case imm of Just x -> x
 783
 784 #endif /* alpha_TARGET_ARCH */
 785
 786 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 787
 788 #if i386_TARGET_ARCH
 789
 790 getRegister (CmmLit (CmmFloat f F32)) = do
 791     lbl <- getNewLabelNat
 792     dflags <- getDynFlagsNat
 793     dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
 794     Amode addr addr_code <- getAmode dynRef
 795     let code dst =
 796             LDATA ReadOnlyData
 797                         [CmmDataLabel lbl,
 798                          CmmStaticLit (CmmFloat f F32)]
 799             `consOL` (addr_code `snocOL`
 800             GLD F32 addr dst)
 801     -- in
 802     return (Any F32 code)
 803
 804
 805 getRegister (CmmLit (CmmFloat d F64))
 806   | d == 0.0
 807   = let code dst = unitOL (GLDZ dst)
 808     in  return (Any F64 code)
 809
 810   | d == 1.0
 811   = let code dst = unitOL (GLD1 dst)
 812     in  return (Any F64 code)
 813
 814   | otherwise = do
 815     lbl <- getNewLabelNat
 816     dflags <- getDynFlagsNat
 817     dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
 818     Amode addr addr_code <- getAmode dynRef
 819     let code dst =
 820             LDATA ReadOnlyData
 821                         [CmmDataLabel lbl,
 822                          CmmStaticLit (CmmFloat d F64)]
 823             `consOL` (addr_code `snocOL`
 824             GLD F64 addr dst)
 825     -- in
 826     return (Any F64 code)
 827
 828 #endif /* i386_TARGET_ARCH */
 829
 830 #if x86_64_TARGET_ARCH
 831
 832 getRegister (CmmLit (CmmFloat 0.0 rep)) = do
 833    let code dst = unitOL  (XOR rep (OpReg dst) (OpReg dst))
 834         -- I don't know why there are xorpd, xorps, and pxor instructions.
 835         -- They all appear to do the same thing --SDM
 836    return (Any rep code)
 837
 838 getRegister (CmmLit (CmmFloat f rep)) = do
 839     lbl <- getNewLabelNat
 840     let code dst = toOL [
 841             LDATA ReadOnlyData
 842                         [CmmDataLabel lbl,
 843                          CmmStaticLit (CmmFloat f rep)],
 844             MOV rep (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 845             ]
 846     -- in
 847     return (Any rep code)
 848
 849 #endif /* x86_64_TARGET_ARCH */
 850
 851 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
 852
 853 -- catch simple cases of zero- or sign-extended load
 854 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad addr _]) = do
 855   code <- intLoadCode (MOVZxL I8) addr
 856   return (Any I32 code)
 857
 858 getRegister (CmmMachOp (MO_S_Conv I8 I32) [CmmLoad addr _]) = do
 859   code <- intLoadCode (MOVSxL I8) addr
 860   return (Any I32 code)
 861
 862 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad addr _]) = do
 863   code <- intLoadCode (MOVZxL I16) addr
 864   return (Any I32 code)
 865
 866 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad addr _]) = do
 867   code <- intLoadCode (MOVSxL I16) addr
 868   return (Any I32 code)
 869
 870 #endif
 871
 872 #if x86_64_TARGET_ARCH
 873
 874 -- catch simple cases of zero- or sign-extended load
 875 getRegister (CmmMachOp (MO_U_Conv I8 I64) [CmmLoad addr _]) = do
 876   code <- intLoadCode (MOVZxL I8) addr
 877   return (Any I64 code)
 878
 879 getRegister (CmmMachOp (MO_S_Conv I8 I64) [CmmLoad addr _]) = do
 880   code <- intLoadCode (MOVSxL I8) addr
 881   return (Any I64 code)
 882
 883 getRegister (CmmMachOp (MO_U_Conv I16 I64) [CmmLoad addr _]) = do
 884   code <- intLoadCode (MOVZxL I16) addr
 885   return (Any I64 code)
 886
 887 getRegister (CmmMachOp (MO_S_Conv I16 I64) [CmmLoad addr _]) = do
 888   code <- intLoadCode (MOVSxL I16) addr
 889   return (Any I64 code)
 890
 891 getRegister (CmmMachOp (MO_U_Conv I32 I64) [CmmLoad addr _]) = do
 892   code <- intLoadCode (MOV I32) addr -- 32-bit loads zero-extend
 893   return (Any I64 code)
 894
 895 getRegister (CmmMachOp (MO_S_Conv I32 I64) [CmmLoad addr _]) = do
 896   code <- intLoadCode (MOVSxL I32) addr
 897   return (Any I64 code)
 898
 899 #endif
 900
 901 #if x86_64_TARGET_ARCH
 902 getRegister (CmmMachOp (MO_Add I64) [CmmReg (CmmGlobal PicBaseReg),
 903                                      CmmLit displacement])
 904     = return $ Any I64 (\dst -> unitOL $
 905         LEA I64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
 906 #endif
 907
 908 #if x86_64_TARGET_ARCH
 909 getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do
 910   x_code <- getAnyReg x
 911   lbl <- getNewLabelNat
 912   let
 913     code dst = x_code dst `appOL` toOL [
 914         -- This is how gcc does it, so it can't be that bad:
 915         LDATA ReadOnlyData16 [
 916                 CmmAlign 16,
 917                 CmmDataLabel lbl,
 918                 CmmStaticLit (CmmInt 0x80000000 I32),
 919                 CmmStaticLit (CmmInt 0 I32),
 920                 CmmStaticLit (CmmInt 0 I32),
 921                 CmmStaticLit (CmmInt 0 I32)
 922         ],
 923         XOR F32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 924                 -- xorps, so we need the 128-bit constant
 925                 -- ToDo: rip-relative
 926         ]
 927   --
 928   return (Any F32 code)
 929
 930 getRegister (CmmMachOp (MO_S_Neg F64) [x]) = do
 931   x_code <- getAnyReg x
 932   lbl <- getNewLabelNat
 933   let
 934         -- This is how gcc does it, so it can't be that bad:
 935     code dst = x_code dst `appOL` toOL [
 936         LDATA ReadOnlyData16 [
 937                 CmmAlign 16,
 938                 CmmDataLabel lbl,
 939                 CmmStaticLit (CmmInt 0x8000000000000000 I64),
 940                 CmmStaticLit (CmmInt 0 I64)
 941         ],
 942                 -- gcc puts an unpck here.  Wonder if we need it.
 943         XOR F64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 944                 -- xorpd, so we need the 128-bit constant
 945         ]
 946   --
 947   return (Any F64 code)
 948 #endif
 949
 950 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
 951
 952 getRegister (CmmMachOp mop [x]) -- unary MachOps
 953   = case mop of
 954 #if i386_TARGET_ARCH
 955       MO_S_Neg F32 -> trivialUFCode F32 (GNEG F32) x
 956       MO_S_Neg F64 -> trivialUFCode F64 (GNEG F64) x
 957 #endif
 958
 959       MO_S_Neg rep -> trivialUCode rep (NEGI rep) x
 960       MO_Not rep   -> trivialUCode rep (NOT  rep) x
 961
 962       -- Nop conversions
 963       MO_U_Conv I32 I8  -> toI8Reg  I32 x
 964       MO_S_Conv I32 I8  -> toI8Reg  I32 x
 965       MO_U_Conv I16 I8  -> toI8Reg  I16 x
 966       MO_S_Conv I16 I8  -> toI8Reg  I16 x
 967       MO_U_Conv I32 I16 -> toI16Reg I32 x
 968       MO_S_Conv I32 I16 -> toI16Reg I32 x
 969 #if x86_64_TARGET_ARCH
 970       MO_U_Conv I64 I32 -> conversionNop I64 x
 971       MO_S_Conv I64 I32 -> conversionNop I64 x
 972       MO_U_Conv I64 I16 -> toI16Reg I64 x
 973       MO_S_Conv I64 I16 -> toI16Reg I64 x
 974       MO_U_Conv I64 I8  -> toI8Reg  I64 x
 975       MO_S_Conv I64 I8  -> toI8Reg  I64 x
 976 #endif
 977
 978       MO_U_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
 979       MO_S_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
 980
 981       -- widenings
 982       MO_U_Conv I8  I32 -> integerExtend I8  I32 MOVZxL x
 983       MO_U_Conv I16 I32 -> integerExtend I16 I32 MOVZxL x
 984       MO_U_Conv I8  I16 -> integerExtend I8  I16 MOVZxL x
 985
 986       MO_S_Conv I8  I32 -> integerExtend I8  I32 MOVSxL x
 987       MO_S_Conv I16 I32 -> integerExtend I16 I32 MOVSxL x
 988       MO_S_Conv I8  I16 -> integerExtend I8  I16 MOVSxL x
 989
 990 #if x86_64_TARGET_ARCH
 991       MO_U_Conv I8  I64 -> integerExtend I8  I64 MOVZxL x
 992       MO_U_Conv I16 I64 -> integerExtend I16 I64 MOVZxL x
 993       MO_U_Conv I32 I64 -> integerExtend I32 I64 MOVZxL x
 994       MO_S_Conv I8  I64 -> integerExtend I8  I64 MOVSxL x
 995       MO_S_Conv I16 I64 -> integerExtend I16 I64 MOVSxL x
 996       MO_S_Conv I32 I64 -> integerExtend I32 I64 MOVSxL x
 997         -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
 998         -- However, we don't want the register allocator to throw it
 999         -- away as an unnecessary reg-to-reg move, so we keep it in
1000         -- the form of a movzl and print it as a movl later.
1001 #endif
1002
1003 #if i386_TARGET_ARCH
1004       MO_S_Conv F32 F64 -> conversionNop F64 x
1005       MO_S_Conv F64 F32 -> conversionNop F32 x
1006 #else
1007       MO_S_Conv F32 F64 -> coerceFP2FP F64 x
1008       MO_S_Conv F64 F32 -> coerceFP2FP F32 x
1009 #endif
1010
1011       MO_S_Conv from to
1012         | isFloatingRep from -> coerceFP2Int from to x
1013         | isFloatingRep to   -> coerceInt2FP from to x
1014
1015       other -> pprPanic "getRegister" (pprMachOp mop)
1016    where
1017         -- signed or unsigned extension.
1018         integerExtend from to instr expr = do
1019             (reg,e_code) <- if from == I8 then getByteReg expr
1020                                           else getSomeReg expr
1021             let
1022                 code dst =
1023                   e_code `snocOL`
1024                   instr from (OpReg reg) (OpReg dst)
1025             return (Any to code)
1026
1027         toI8Reg new_rep expr
1028             = do codefn <- getAnyReg expr
1029                  return (Any new_rep codefn)
1030                 -- HACK: use getAnyReg to get a byte-addressable register.
1031                 -- If the source was a Fixed register, this will add the
1032                 -- mov instruction to put it into the desired destination.
1033                 -- We're assuming that the destination won't be a fixed
1034                 -- non-byte-addressable register; it won't be, because all
1035                 -- fixed registers are word-sized.
1036
1037         toI16Reg = toI8Reg -- for now
1038
1039         conversionNop new_rep expr
1040             = do e_code <- getRegister expr
1041                  return (swizzleRegisterRep e_code new_rep)
1042
1043
1044 getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
1045   = case mop of
1046       MO_Eq F32   -> condFltReg EQQ x y
1047       MO_Ne F32   -> condFltReg NE x y
1048       MO_S_Gt F32 -> condFltReg GTT x y
1049       MO_S_Ge F32 -> condFltReg GE x y
1050       MO_S_Lt F32 -> condFltReg LTT x y
1051       MO_S_Le F32 -> condFltReg LE x y
1052
1053       MO_Eq F64   -> condFltReg EQQ x y
1054       MO_Ne F64   -> condFltReg NE x y
1055       MO_S_Gt F64 -> condFltReg GTT x y
1056       MO_S_Ge F64 -> condFltReg GE x y
1057       MO_S_Lt F64 -> condFltReg LTT x y
1058       MO_S_Le F64 -> condFltReg LE x y
1059
1060       MO_Eq rep   -> condIntReg EQQ x y
1061       MO_Ne rep   -> condIntReg NE x y
1062
1063       MO_S_Gt rep -> condIntReg GTT x y
1064       MO_S_Ge rep -> condIntReg GE x y
1065       MO_S_Lt rep -> condIntReg LTT x y
1066       MO_S_Le rep -> condIntReg LE x y
1067
1068       MO_U_Gt rep -> condIntReg GU  x y
1069       MO_U_Ge rep -> condIntReg GEU x y
1070       MO_U_Lt rep -> condIntReg LU  x y
1071       MO_U_Le rep -> condIntReg LEU x y
1072
1073 #if i386_TARGET_ARCH
1074       MO_Add F32 -> trivialFCode F32 GADD x y
1075       MO_Sub F32 -> trivialFCode F32 GSUB x y
1076
1077       MO_Add F64 -> trivialFCode F64 GADD x y
1078       MO_Sub F64 -> trivialFCode F64 GSUB x y
1079
1080       MO_S_Quot F32 -> trivialFCode F32 GDIV x y
1081       MO_S_Quot F64 -> trivialFCode F64 GDIV x y
1082 #endif
1083
1084 #if x86_64_TARGET_ARCH
1085       MO_Add F32 -> trivialFCode F32 ADD x y
1086       MO_Sub F32 -> trivialFCode F32 SUB x y
1087
1088       MO_Add F64 -> trivialFCode F64 ADD x y
1089       MO_Sub F64 -> trivialFCode F64 SUB x y
1090
1091       MO_S_Quot F32 -> trivialFCode F32 FDIV x y
1092       MO_S_Quot F64 -> trivialFCode F64 FDIV x y
1093 #endif
1094
1095       MO_Add rep -> add_code rep x y
1096       MO_Sub rep -> sub_code rep x y
1097
1098       MO_S_Quot rep -> div_code rep True  True  x y
1099       MO_S_Rem  rep -> div_code rep True  False x y
1100       MO_U_Quot rep -> div_code rep False True  x y
1101       MO_U_Rem  rep -> div_code rep False False x y
1102
1103 #if i386_TARGET_ARCH
1104       MO_Mul   F32 -> trivialFCode F32 GMUL x y
1105       MO_Mul   F64 -> trivialFCode F64 GMUL x y
1106 #endif
1107
1108 #if x86_64_TARGET_ARCH
1109       MO_Mul   F32 -> trivialFCode F32 MUL x y
1110       MO_Mul   F64 -> trivialFCode F64 MUL x y
1111 #endif
1112
1113       MO_Mul   rep -> let op = IMUL rep in
1114                       trivialCode rep op (Just op) x y
1115
1116       MO_S_MulMayOflo rep -> imulMayOflo rep x y
1117
1118       MO_And rep -> let op = AND rep in
1119                     trivialCode rep op (Just op) x y
1120       MO_Or  rep -> let op = OR  rep in
1121                     trivialCode rep op (Just op) x y
1122       MO_Xor rep -> let op = XOR rep in
1123                     trivialCode rep op (Just op) x y
1124
1125         {- Shift ops on x86s have constraints on their source, it
1126            either has to be Imm, CL or 1
1127             => trivialCode is not restrictive enough (sigh.)
1128         -}
1129       MO_Shl rep   -> shift_code rep (SHL rep) x y {-False-}
1130       MO_U_Shr rep -> shift_code rep (SHR rep) x y {-False-}
1131       MO_S_Shr rep -> shift_code rep (SAR rep) x y {-False-}
1132
1133       other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
1134   where
1135     --------------------
1136     imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1137     imulMayOflo rep a b = do
1138          (a_reg, a_code) <- getNonClobberedReg a
1139          b_code <- getAnyReg b
1140          let
1141              shift_amt  = case rep of
1142                            I32 -> 31
1143                            I64 -> 63
1144                            _ -> panic "shift_amt"
1145
1146              code = a_code `appOL` b_code eax `appOL`
1147                         toOL [
1148                            IMUL2 rep (OpReg a_reg),   -- result in %edx:%eax
1149                            SAR rep (OpImm (ImmInt shift_amt)) (OpReg eax),
1150                                 -- sign extend lower part
1151                            SUB rep (OpReg edx) (OpReg eax)
1152                                 -- compare against upper
1153                            -- eax==0 if high part == sign extended low part
1154                         ]
1155          -- in
1156          return (Fixed rep eax code)
1157
1158     --------------------
1159     shift_code :: MachRep
1160                -> (Operand -> Operand -> Instr)
1161                -> CmmExpr
1162                -> CmmExpr
1163                -> NatM Register
1164
1165     {- Case1: shift length as immediate -}
1166     shift_code rep instr x y@(CmmLit lit) = do
1167           x_code <- getAnyReg x
1168           let
1169                code dst
1170                   = x_code dst `snocOL`
1171                     instr (OpImm (litToImm lit)) (OpReg dst)
1172           -- in
1173           return (Any rep code)
1174
1175     {- Case2: shift length is complex (non-immediate)
1176       * y must go in %ecx.
1177       * we cannot do y first *and* put its result in %ecx, because
1178         %ecx might be clobbered by x.
1179       * if we do y second, then x cannot be
1180         in a clobbered reg.  Also, we cannot clobber x's reg
1181         with the instruction itself.
1182       * so we can either:
1183         - do y first, put its result in a fresh tmp, then copy it to %ecx later
1184         - do y second and put its result into %ecx.  x gets placed in a fresh
1185           tmp.  This is likely to be better, becuase the reg alloc can
1186           eliminate this reg->reg move here (it won't eliminate the other one,
1187           because the move is into the fixed %ecx).
1188     -}
1189     shift_code rep instr x y{-amount-} = do
1190         x_code <- getAnyReg x
1191         tmp <- getNewRegNat rep
1192         y_code <- getAnyReg y
1193         let
1194            code = x_code tmp `appOL`
1195                   y_code ecx `snocOL`
1196                   instr (OpReg ecx) (OpReg tmp)
1197         -- in
1198         return (Fixed rep tmp code)
1199
1200     --------------------
1201     add_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1202     add_code rep x (CmmLit (CmmInt y _))
1203         | not (is64BitInteger y) = add_int rep x y
1204     add_code rep x y = trivialCode rep (ADD rep) (Just (ADD rep)) x y
1205
1206     --------------------
1207     sub_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1208     sub_code rep x (CmmLit (CmmInt y _))
1209         | not (is64BitInteger (-y)) = add_int rep x (-y)
1210     sub_code rep x y = trivialCode rep (SUB rep) Nothing x y
1211
1212     -- our three-operand add instruction:
1213     add_int rep x y = do
1214         (x_reg, x_code) <- getSomeReg x
1215         let
1216             imm = ImmInt (fromInteger y)
1217             code dst
1218                = x_code `snocOL`
1219                  LEA rep
1220                         (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
1221                         (OpReg dst)
1222         --
1223         return (Any rep code)
1224
1225     ----------------------
1226     div_code rep signed quotient x y = do
1227            (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
1228            x_code <- getAnyReg x
1229            let
1230              widen | signed    = CLTD rep
1231                    | otherwise = XOR rep (OpReg edx) (OpReg edx)
1232
1233              instr | signed    = IDIV
1234                    | otherwise = DIV
1235
1236              code = y_code `appOL`
1237                     x_code eax `appOL`
1238                     toOL [widen, instr rep y_op]
1239
1240              result | quotient  = eax
1241                     | otherwise = edx
1242
1243            -- in
1244            return (Fixed rep result code)
1245
1246
1247 getRegister (CmmLoad mem pk)
1248   | isFloatingRep pk
1249   = do
1250     Amode src mem_code <- getAmode mem
1251     let
1252         code dst = mem_code `snocOL`
1253                    IF_ARCH_i386(GLD pk src dst,
1254                                 MOV pk (OpAddr src) (OpReg dst))
1255     --
1256     return (Any pk code)
1257
1258 #if i386_TARGET_ARCH
1259 getRegister (CmmLoad mem pk)
1260   | pk /= I64
1261   = do
1262     code <- intLoadCode (instr pk) mem
1263     return (Any pk code)
1264   where
1265         instr I8  = MOVZxL pk
1266         instr I16 = MOV I16
1267         instr I32 = MOV I32
1268         -- we always zero-extend 8-bit loads, if we
1269         -- can't think of anything better.  This is because
1270         -- we can't guarantee access to an 8-bit variant of every register
1271         -- (esi and edi don't have 8-bit variants), so to make things
1272         -- simpler we do our 8-bit arithmetic with full 32-bit registers.
1273 #endif
1274
1275 #if x86_64_TARGET_ARCH
1276 -- Simpler memory load code on x86_64
1277 getRegister (CmmLoad mem pk)
1278   = do
1279     code <- intLoadCode (MOV pk) mem
1280     return (Any pk code)
1281 #endif
1282
1283 getRegister (CmmLit (CmmInt 0 rep))
1284   = let
1285         -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
1286         adj_rep = case rep of I64 -> I32; _ -> rep
1287         rep1 = IF_ARCH_i386( rep, adj_rep )
1288         code dst
1289            = unitOL (XOR rep1 (OpReg dst) (OpReg dst))
1290     in
1291         return (Any rep code)
1292
1293 #if x86_64_TARGET_ARCH
1294   -- optimisation for loading small literals on x86_64: take advantage
1295   -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
1296   -- instruction forms are shorter.
1297 getRegister (CmmLit lit)
1298   | I64 <- cmmLitRep lit, not (isBigLit lit)
1299   = let
1300         imm = litToImm lit
1301         code dst = unitOL (MOV I32 (OpImm imm) (OpReg dst))
1302     in
1303         return (Any I64 code)
1304   where
1305    isBigLit (CmmInt i I64) = i < 0 || i > 0xffffffff
1306    isBigLit _ = False
1307         -- note1: not the same as is64BitLit, because that checks for
1308         -- signed literals that fit in 32 bits, but we want unsigned
1309         -- literals here.
1310         -- note2: all labels are small, because we're assuming the
1311         -- small memory model (see gcc docs, -mcmodel=small).
1312 #endif
1313
1314 getRegister (CmmLit lit)
1315   = let
1316         rep = cmmLitRep lit
1317         imm = litToImm lit
1318         code dst = unitOL (MOV rep (OpImm imm) (OpReg dst))
1319     in
1320         return (Any rep code)
1321
1322 getRegister other = pprPanic "getRegister(x86)" (ppr other)
1323
1324
1325 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
1326    -> NatM (Reg -> InstrBlock)
1327 intLoadCode instr mem = do
1328   Amode src mem_code <- getAmode mem
1329   return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
1330
1331 -- Compute an expression into *any* register, adding the appropriate
1332 -- move instruction if necessary.
1333 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
1334 getAnyReg expr = do
1335   r <- getRegister expr
1336   anyReg r
1337
1338 anyReg :: Register -> NatM (Reg -> InstrBlock)
1339 anyReg (Any _ code)          = return code
1340 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
1341
1342 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
1343 -- Fixed registers might not be byte-addressable, so we make sure we've
1344 -- got a temporary, inserting an extra reg copy if necessary.
1345 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
1346 #if x86_64_TARGET_ARCH
1347 getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
1348 #else
1349 getByteReg expr = do
1350   r <- getRegister expr
1351   case r of
1352     Any rep code -> do
1353         tmp <- getNewRegNat rep
1354         return (tmp, code tmp)
1355     Fixed rep reg code
1356         | isVirtualReg reg -> return (reg,code)
1357         | otherwise -> do
1358             tmp <- getNewRegNat rep
1359             return (tmp, code `snocOL` reg2reg rep reg tmp)
1360         -- ToDo: could optimise slightly by checking for byte-addressable
1361         -- real registers, but that will happen very rarely if at all.
1362 #endif
1363
1364 -- Another variant: this time we want the result in a register that cannot
1365 -- be modified by code to evaluate an arbitrary expression.
1366 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1367 getNonClobberedReg expr = do
1368   r <- getRegister expr
1369   case r of
1370     Any rep code -> do
1371         tmp <- getNewRegNat rep
1372         return (tmp, code tmp)
1373     Fixed rep reg code
1374         -- only free regs can be clobbered
1375         | RealReg rr <- reg, isFastTrue (freeReg rr) -> do
1376                 tmp <- getNewRegNat rep
1377                 return (tmp, code `snocOL` reg2reg rep reg tmp)
1378         | otherwise ->
1379                 return (reg, code)
1380
1381 reg2reg :: MachRep -> Reg -> Reg -> Instr
1382 reg2reg rep src dst
1383 #if i386_TARGET_ARCH
1384   | isFloatingRep rep = GMOV src dst
1385 #endif
1386   | otherwise         = MOV rep (OpReg src) (OpReg dst)
1387
1388 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1389
1390 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1391
1392 #if sparc_TARGET_ARCH
1393
1394 getRegister (CmmLit (CmmFloat f F32)) = do
1395     lbl <- getNewLabelNat
1396     let code dst = toOL [
1397             LDATA ReadOnlyData
1398                         [CmmDataLabel lbl,
1399                          CmmStaticLit (CmmFloat f F32)],
1400             SETHI (HI (ImmCLbl lbl)) dst,
1401             LD F32 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1402     return (Any F32 code)
1403
1404 getRegister (CmmLit (CmmFloat d F64)) = do
1405     lbl <- getNewLabelNat
1406     let code dst = toOL [
1407             LDATA ReadOnlyData
1408                         [CmmDataLabel lbl,
1409                          CmmStaticLit (CmmFloat d F64)],
1410             SETHI (HI (ImmCLbl lbl)) dst,
1411             LD F64 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1412     return (Any F64 code)
1413
1414 getRegister (CmmMachOp mop [x]) -- unary MachOps
1415   = case mop of
1416       MO_S_Neg F32     -> trivialUFCode F32 (FNEG F32) x
1417       MO_S_Neg F64     -> trivialUFCode F64 (FNEG F64) x
1418
1419       MO_S_Neg rep     -> trivialUCode rep (SUB False False g0) x
1420       MO_Not rep       -> trivialUCode rep (XNOR False g0) x
1421
1422       MO_U_Conv I32 I8 -> trivialCode I8 (AND False) x (CmmLit (CmmInt 255 I8))
1423
1424       MO_U_Conv F64 F32-> coerceDbl2Flt x
1425       MO_U_Conv F32 F64-> coerceFlt2Dbl x
1426
1427       MO_S_Conv F32 I32-> coerceFP2Int F32 I32 x
1428       MO_S_Conv I32 F32-> coerceInt2FP I32 F32 x
1429       MO_S_Conv F64 I32-> coerceFP2Int F64 I32 x
1430       MO_S_Conv I32 F64-> coerceInt2FP I32 F64 x
1431
1432       -- Conversions which are a nop on sparc
1433       MO_U_Conv from to
1434         | from == to   -> conversionNop to   x
1435       MO_U_Conv I32 to -> conversionNop to   x
1436       MO_S_Conv I32 to -> conversionNop to   x
1437
1438       -- widenings
1439       MO_U_Conv I8 I32  -> integerExtend False I8 I32  x
1440       MO_U_Conv I16 I32 -> integerExtend False I16 I32 x
1441       MO_U_Conv I8 I16  -> integerExtend False I8 I16  x
1442       MO_S_Conv I16 I32 -> integerExtend True I16 I32  x
1443
1444       other_op -> panic "Unknown unary mach op"
1445     where
1446         -- XXX SLL/SRL?
1447         integerExtend signed from to expr = do
1448            (reg, e_code) <- getSomeReg expr
1449            let
1450                code dst =
1451                    e_code `snocOL`
1452                    ((if signed then SRA else SRL)
1453                           reg (RIImm (ImmInt 0)) dst)
1454            return (Any to code)
1455         conversionNop new_rep expr
1456             = do e_code <- getRegister expr
1457                  return (swizzleRegisterRep e_code new_rep)
1458
1459 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1460   = case mop of
1461       MO_Eq F32 -> condFltReg EQQ x y
1462       MO_Ne F32 -> condFltReg NE x y
1463
1464       MO_S_Gt F32 -> condFltReg GTT x y
1465       MO_S_Ge F32 -> condFltReg GE x y
1466       MO_S_Lt F32 -> condFltReg LTT x y
1467       MO_S_Le F32 -> condFltReg LE x y
1468
1469       MO_Eq F64 -> condFltReg EQQ x y
1470       MO_Ne F64 -> condFltReg NE x y
1471
1472       MO_S_Gt F64 -> condFltReg GTT x y
1473       MO_S_Ge F64 -> condFltReg GE x y
1474       MO_S_Lt F64 -> condFltReg LTT x y
1475       MO_S_Le F64 -> condFltReg LE x y
1476
1477       MO_Eq rep -> condIntReg EQQ x y
1478       MO_Ne rep -> condIntReg NE x y
1479
1480       MO_S_Gt rep -> condIntReg GTT x y
1481       MO_S_Ge rep -> condIntReg GE x y
1482       MO_S_Lt rep -> condIntReg LTT x y
1483       MO_S_Le rep -> condIntReg LE x y
1484
1485       MO_U_Gt I32  -> condIntReg GTT x y
1486       MO_U_Ge I32  -> condIntReg GE x y
1487       MO_U_Lt I32  -> condIntReg LTT x y
1488       MO_U_Le I32  -> condIntReg LE x y
1489
1490       MO_U_Gt I16 -> condIntReg GU  x y
1491       MO_U_Ge I16 -> condIntReg GEU x y
1492       MO_U_Lt I16 -> condIntReg LU  x y
1493       MO_U_Le I16 -> condIntReg LEU x y
1494
1495       MO_Add I32 -> trivialCode I32 (ADD False False) x y
1496       MO_Sub I32 -> trivialCode I32 (SUB False False) x y
1497
1498       MO_S_MulMayOflo rep -> imulMayOflo rep x y
1499 {-
1500       -- ToDo: teach about V8+ SPARC div instructions
1501       MO_S_Quot I32 -> idiv (fsLit ".div")  x y
1502       MO_S_Rem I32  -> idiv (fsLit ".rem")  x y
1503       MO_U_Quot I32 -> idiv (fsLit ".udiv")  x y
1504       MO_U_Rem I32  -> idiv (fsLit ".urem")  x y
1505 -}
1506       MO_Add F32  -> trivialFCode F32 FADD  x y
1507       MO_Sub F32   -> trivialFCode F32  FSUB x y
1508       MO_Mul F32   -> trivialFCode F32  FMUL  x y
1509       MO_S_Quot F32   -> trivialFCode F32  FDIV x y
1510
1511       MO_Add F64   -> trivialFCode F64 FADD  x y
1512       MO_Sub F64   -> trivialFCode F64  FSUB x y
1513       MO_Mul F64   -> trivialFCode F64  FMUL x y
1514       MO_S_Quot F64   -> trivialFCode F64  FDIV x y
1515
1516       MO_And rep   -> trivialCode rep (AND False) x y
1517       MO_Or rep    -> trivialCode rep (OR  False) x y
1518       MO_Xor rep   -> trivialCode rep (XOR False) x y
1519
1520       MO_Mul rep -> trivialCode rep (SMUL False) x y
1521
1522       MO_Shl rep   -> trivialCode rep SLL  x y
1523       MO_U_Shr rep   -> trivialCode rep SRL x y
1524       MO_S_Shr rep   -> trivialCode rep SRA x y
1525
1526 {-
1527       MO_F32_Pwr  -> getRegister (StCall (Left (fsLit "pow")) CCallConv F64
1528                                          [promote x, promote y])
1529                        where promote x = CmmMachOp MO_F32_to_Dbl [x]
1530       MO_F64_Pwr -> getRegister (StCall (Left (fsLit "pow")) CCallConv F64
1531                                         [x, y])
1532 -}
1533       other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
1534   where
1535     --idiv fn x y = getRegister (StCall (Left fn) CCallConv I32 [x, y])
1536
1537     --------------------
1538     imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1539     imulMayOflo rep a b = do
1540          (a_reg, a_code) <- getSomeReg a
1541          (b_reg, b_code) <- getSomeReg b
1542          res_lo <- getNewRegNat I32
1543          res_hi <- getNewRegNat I32
1544          let
1545             shift_amt  = case rep of
1546                           I32 -> 31
1547                           I64 -> 63
1548                           _ -> panic "shift_amt"
1549             code dst = a_code `appOL` b_code `appOL`
1550                        toOL [
1551                            SMUL False a_reg (RIReg b_reg) res_lo,
1552                            RDY res_hi,
1553                            SRA res_lo (RIImm (ImmInt shift_amt)) res_lo,
1554                            SUB False False res_lo (RIReg res_hi) dst
1555                         ]
1556          return (Any I32 code)
1557
1558 getRegister (CmmLoad mem pk) = do
1559     Amode src code <- getAmode mem
1560     let
1561         code__2 dst = code `snocOL` LD pk src dst
1562     return (Any pk code__2)
1563
1564 getRegister (CmmLit (CmmInt i _))
1565   | fits13Bits i
1566   = let
1567         src = ImmInt (fromInteger i)
1568         code dst = unitOL (OR False g0 (RIImm src) dst)
1569     in
1570         return (Any I32 code)
1571
1572 getRegister (CmmLit lit)
1573   = let rep = cmmLitRep lit
1574         imm = litToImm lit
1575         code dst = toOL [
1576             SETHI (HI imm) dst,
1577             OR False dst (RIImm (LO imm)) dst]
1578     in return (Any I32 code)
1579
1580 #endif /* sparc_TARGET_ARCH */
1581
1582 #if powerpc_TARGET_ARCH
1583 getRegister (CmmLoad mem pk)
1584   | pk /= I64
1585   = do
1586         Amode addr addr_code <- getAmode mem
1587         let code dst = ASSERT((regClass dst == RcDouble) == isFloatingRep pk)
1588                        addr_code `snocOL` LD pk dst addr
1589         return (Any pk code)
1590
1591 -- catch simple cases of zero- or sign-extended load
1592 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad mem _]) = do
1593     Amode addr addr_code <- getAmode mem
1594     return (Any I32 (\dst -> addr_code `snocOL` LD I8 dst addr))
1595
1596 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
1597
1598 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad mem _]) = do
1599     Amode addr addr_code <- getAmode mem
1600     return (Any I32 (\dst -> addr_code `snocOL` LD I16 dst addr))
1601
1602 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad mem _]) = do
1603     Amode addr addr_code <- getAmode mem
1604     return (Any I32 (\dst -> addr_code `snocOL` LA I16 dst addr))
1605
1606 getRegister (CmmMachOp mop [x]) -- unary MachOps
1607   = case mop of
1608       MO_Not rep   -> trivialUCode rep NOT x
1609
1610       MO_S_Conv F64 F32 -> trivialUCode F32 FRSP x
1611       MO_S_Conv F32 F64 -> conversionNop F64 x
1612
1613       MO_S_Conv from to
1614         | from == to         -> conversionNop to x
1615         | isFloatingRep from -> coerceFP2Int from to x
1616         | isFloatingRep to   -> coerceInt2FP from to x
1617
1618         -- narrowing is a nop: we treat the high bits as undefined
1619       MO_S_Conv I32 to -> conversionNop to x
1620       MO_S_Conv I16 I8 -> conversionNop I8 x
1621       MO_S_Conv I8 to -> trivialUCode to (EXTS I8) x
1622       MO_S_Conv I16 to -> trivialUCode to (EXTS I16) x
1623
1624       MO_U_Conv from to
1625         | from == to -> conversionNop to x
1626         -- narrowing is a nop: we treat the high bits as undefined
1627       MO_U_Conv I32 to -> conversionNop to x
1628       MO_U_Conv I16 I8 -> conversionNop I8 x
1629       MO_U_Conv I8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 I32))
1630       MO_U_Conv I16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 I32))
1631
1632       MO_S_Neg F32      -> trivialUCode F32 FNEG x
1633       MO_S_Neg F64      -> trivialUCode F64 FNEG x
1634       MO_S_Neg rep      -> trivialUCode rep NEG x
1635
1636     where
1637         conversionNop new_rep expr
1638             = do e_code <- getRegister expr
1639                  return (swizzleRegisterRep e_code new_rep)
1640
1641 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1642   = case mop of
1643       MO_Eq F32 -> condFltReg EQQ x y
1644       MO_Ne F32 -> condFltReg NE  x y
1645
1646       MO_S_Gt F32 -> condFltReg GTT x y
1647       MO_S_Ge F32 -> condFltReg GE  x y
1648       MO_S_Lt F32 -> condFltReg LTT x y
1649       MO_S_Le F32 -> condFltReg LE  x y
1650
1651       MO_Eq F64 -> condFltReg EQQ x y
1652       MO_Ne F64 -> condFltReg NE  x y
1653
1654       MO_S_Gt F64 -> condFltReg GTT x y
1655       MO_S_Ge F64 -> condFltReg GE  x y
1656       MO_S_Lt F64 -> condFltReg LTT x y
1657       MO_S_Le F64 -> condFltReg LE  x y
1658
1659       MO_Eq rep -> condIntReg EQQ  (extendUExpr rep x) (extendUExpr rep y)
1660       MO_Ne rep -> condIntReg NE   (extendUExpr rep x) (extendUExpr rep y)
1661
1662       MO_S_Gt rep -> condIntReg GTT  (extendSExpr rep x) (extendSExpr rep y)
1663       MO_S_Ge rep -> condIntReg GE   (extendSExpr rep x) (extendSExpr rep y)
1664       MO_S_Lt rep -> condIntReg LTT  (extendSExpr rep x) (extendSExpr rep y)
1665       MO_S_Le rep -> condIntReg LE   (extendSExpr rep x) (extendSExpr rep y)
1666
1667       MO_U_Gt rep -> condIntReg GU   (extendUExpr rep x) (extendUExpr rep y)
1668       MO_U_Ge rep -> condIntReg GEU  (extendUExpr rep x) (extendUExpr rep y)
1669       MO_U_Lt rep -> condIntReg LU   (extendUExpr rep x) (extendUExpr rep y)
1670       MO_U_Le rep -> condIntReg LEU  (extendUExpr rep x) (extendUExpr rep y)
1671
1672       MO_Add F32   -> trivialCodeNoImm F32 (FADD F32) x y
1673       MO_Sub F32   -> trivialCodeNoImm F32 (FSUB F32) x y
1674       MO_Mul F32   -> trivialCodeNoImm F32 (FMUL F32) x y
1675       MO_S_Quot F32   -> trivialCodeNoImm F32 (FDIV F32) x y
1676
1677       MO_Add F64   -> trivialCodeNoImm F64 (FADD F64) x y
1678       MO_Sub F64   -> trivialCodeNoImm F64 (FSUB F64) x y
1679       MO_Mul F64   -> trivialCodeNoImm F64 (FMUL F64) x y
1680       MO_S_Quot F64   -> trivialCodeNoImm F64 (FDIV F64) x y
1681
1682          -- optimize addition with 32-bit immediate
1683          -- (needed for PIC)
1684       MO_Add I32 ->
1685         case y of
1686           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate I32 True (-imm)
1687             -> trivialCode I32 True ADD x (CmmLit $ CmmInt imm immrep)
1688           CmmLit lit
1689             -> do
1690                 (src, srcCode) <- getSomeReg x
1691                 let imm = litToImm lit
1692                     code dst = srcCode `appOL` toOL [
1693                                     ADDIS dst src (HA imm),
1694                                     ADD dst dst (RIImm (LO imm))
1695                                 ]
1696                 return (Any I32 code)
1697           _ -> trivialCode I32 True ADD x y
1698
1699       MO_Add rep -> trivialCode rep True ADD x y
1700       MO_Sub rep ->
1701         case y of    -- subfi ('substract from' with immediate) doesn't exist
1702           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
1703             -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
1704           _ -> trivialCodeNoImm rep SUBF y x
1705
1706       MO_Mul rep -> trivialCode rep True MULLW x y
1707
1708       MO_S_MulMayOflo I32 -> trivialCodeNoImm I32 MULLW_MayOflo x y
1709
1710       MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= I32): not implemented"
1711       MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
1712
1713       MO_S_Quot rep -> trivialCodeNoImm rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1714       MO_U_Quot rep -> trivialCodeNoImm rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1715
1716       MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1717       MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1718
1719       MO_And rep   -> trivialCode rep False AND x y
1720       MO_Or rep    -> trivialCode rep False OR x y
1721       MO_Xor rep   -> trivialCode rep False XOR x y
1722
1723       MO_Shl rep   -> trivialCode rep False SLW x y
1724       MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
1725       MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
1726
1727 getRegister (CmmLit (CmmInt i rep))
1728   | Just imm <- makeImmediate rep True i
1729   = let
1730         code dst = unitOL (LI dst imm)
1731     in
1732         return (Any rep code)
1733
1734 getRegister (CmmLit (CmmFloat f frep)) = do
1735     lbl <- getNewLabelNat
1736     dflags <- getDynFlagsNat
1737     dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
1738     Amode addr addr_code <- getAmode dynRef
1739     let code dst =
1740             LDATA ReadOnlyData  [CmmDataLabel lbl,
1741                                  CmmStaticLit (CmmFloat f frep)]
1742             `consOL` (addr_code `snocOL` LD frep dst addr)
1743     return (Any frep code)
1744
1745 getRegister (CmmLit lit)
1746   = let rep = cmmLitRep lit
1747         imm = litToImm lit
1748         code dst = toOL [
1749               LIS dst (HA imm),
1750               ADD dst dst (RIImm (LO imm))
1751           ]
1752     in return (Any rep code)
1753
1754 getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
1755
1756     -- extend?Rep: wrap integer expression of type rep
1757     -- in a conversion to I32
1758 extendSExpr I32 x = x
1759 extendSExpr rep x = CmmMachOp (MO_S_Conv rep I32) [x]
1760 extendUExpr I32 x = x
1761 extendUExpr rep x = CmmMachOp (MO_U_Conv rep I32) [x]
1762
1763 #endif /* powerpc_TARGET_ARCH */
1764
1765
1766 -- -----------------------------------------------------------------------------
1767 --  The 'Amode' type: Memory addressing modes passed up the tree.
1768
1769 data Amode = Amode AddrMode InstrBlock
1770
1771 {-
1772 Now, given a tree (the argument to an CmmLoad) that references memory,
1773 produce a suitable addressing mode.
1774
1775 A Rule of the Game (tm) for Amodes: use of the addr bit must
1776 immediately follow use of the code part, since the code part puts
1777 values in registers which the addr then refers to.  So you can't put
1778 anything in between, lest it overwrite some of those registers.  If
1779 you need to do some other computation between the code part and use of
1780 the addr bit, first store the effective address from the amode in a
1781 temporary, then do the other computation, and then use the temporary:
1782
1783     code
1784     LEA amode, tmp
1785     ... other computation ...
1786     ... (tmp) ...
1787 -}
1788
1789 getAmode :: CmmExpr -> NatM Amode
1790 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
1791
1792 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1793
1794 #if alpha_TARGET_ARCH
1795
1796 getAmode (StPrim IntSubOp [x, StInt i])
1797   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1798     getRegister x               `thenNat` \ register ->
1799     let
1800         code = registerCode register tmp
1801         reg  = registerName register tmp
1802         off  = ImmInt (-(fromInteger i))
1803     in
1804     return (Amode (AddrRegImm reg off) code)
1805
1806 getAmode (StPrim IntAddOp [x, StInt i])
1807   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1808     getRegister x               `thenNat` \ register ->
1809     let
1810         code = registerCode register tmp
1811         reg  = registerName register tmp
1812         off  = ImmInt (fromInteger i)
1813     in
1814     return (Amode (AddrRegImm reg off) code)
1815
1816 getAmode leaf
1817   | isJust imm
1818   = return (Amode (AddrImm imm__2) id)
1819   where
1820     imm = maybeImm leaf
1821     imm__2 = case imm of Just x -> x
1822
1823 getAmode other
1824   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1825     getRegister other           `thenNat` \ register ->
1826     let
1827         code = registerCode register tmp
1828         reg  = registerName register tmp
1829     in
1830     return (Amode (AddrReg reg) code)
1831
1832 #endif /* alpha_TARGET_ARCH */
1833
1834 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1835
1836 #if x86_64_TARGET_ARCH
1837
1838 getAmode (CmmMachOp (MO_Add I64) [CmmReg (CmmGlobal PicBaseReg),
1839                                      CmmLit displacement])
1840     = return $ Amode (ripRel (litToImm displacement)) nilOL
1841
1842 #endif
1843
1844 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1845
1846 -- This is all just ridiculous, since it carefully undoes
1847 -- what mangleIndexTree has just done.
1848 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
1849   | not (is64BitLit lit)
1850   -- ASSERT(rep == I32)???
1851   = do (x_reg, x_code) <- getSomeReg x
1852        let off = ImmInt (-(fromInteger i))
1853        return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1854
1855 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
1856   | not (is64BitLit lit)
1857   -- ASSERT(rep == I32)???
1858   = do (x_reg, x_code) <- getSomeReg x
1859        let off = ImmInt (fromInteger i)
1860        return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1861
1862 -- Turn (lit1 << n  + lit2) into  (lit2 + lit1 << n) so it will be
1863 -- recognised by the next rule.
1864 getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1865                                   b@(CmmLit _)])
1866   = getAmode (CmmMachOp (MO_Add rep) [b,a])
1867
1868 getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
1869                                         [y, CmmLit (CmmInt shift _)]])
1870   | shift == 0 || shift == 1 || shift == 2 || shift == 3
1871   = x86_complex_amode x y shift 0
1872
1873 getAmode (CmmMachOp (MO_Add rep)
1874                 [x, CmmMachOp (MO_Add _)
1875                         [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)],
1876                          CmmLit (CmmInt offset _)]])
1877   | shift == 0 || shift == 1 || shift == 2 || shift == 3
1878   && not (is64BitInteger offset)
1879   = x86_complex_amode x y shift offset
1880
1881 getAmode (CmmMachOp (MO_Add rep) [x,y])
1882   = x86_complex_amode x y 0 0
1883
1884 getAmode (CmmLit lit) | not (is64BitLit lit)
1885   = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1886
1887 getAmode expr = do
1888   (reg,code) <- getSomeReg expr
1889   return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
1890
1891
1892 x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
1893 x86_complex_amode base index shift offset
1894   = do (x_reg, x_code) <- getNonClobberedReg base
1895         -- x must be in a temp, because it has to stay live over y_code
1896         -- we could compre x_reg and y_reg and do something better here...
1897        (y_reg, y_code) <- getSomeReg index
1898        let
1899            code = x_code `appOL` y_code
1900            base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
1901        return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset)))
1902                code)
1903
1904 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1905
1906 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1907
1908 #if sparc_TARGET_ARCH
1909
1910 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)])
1911   | fits13Bits (-i)
1912   = do
1913        (reg, code) <- getSomeReg x
1914        let
1915          off  = ImmInt (-(fromInteger i))
1916        return (Amode (AddrRegImm reg off) code)
1917
1918
1919 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)])
1920   | fits13Bits i
1921   = do
1922        (reg, code) <- getSomeReg x
1923        let
1924          off  = ImmInt (fromInteger i)
1925        return (Amode (AddrRegImm reg off) code)
1926
1927 getAmode (CmmMachOp (MO_Add rep) [x, y])
1928   = do
1929     (regX, codeX) <- getSomeReg x
1930     (regY, codeY) <- getSomeReg y
1931     let
1932         code = codeX `appOL` codeY
1933     return (Amode (AddrRegReg regX regY) code)
1934
1935 -- XXX Is this same as "leaf" in Stix?
1936 getAmode (CmmLit lit)
1937   = do
1938       tmp <- getNewRegNat I32
1939       let
1940         code = unitOL (SETHI (HI imm__2) tmp)
1941       return (Amode (AddrRegImm tmp (LO imm__2)) code)
1942       where
1943          imm__2 = litToImm lit
1944
1945 getAmode other
1946   = do
1947        (reg, code) <- getSomeReg other
1948        let
1949             off  = ImmInt 0
1950        return (Amode (AddrRegImm reg off) code)
1951
1952 #endif /* sparc_TARGET_ARCH */
1953
1954 #ifdef powerpc_TARGET_ARCH
1955 getAmode (CmmMachOp (MO_Sub I32) [x, CmmLit (CmmInt i _)])
1956   | Just off <- makeImmediate I32 True (-i)
1957   = do
1958         (reg, code) <- getSomeReg x
1959         return (Amode (AddrRegImm reg off) code)
1960
1961
1962 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit (CmmInt i _)])
1963   | Just off <- makeImmediate I32 True i
1964   = do
1965         (reg, code) <- getSomeReg x
1966         return (Amode (AddrRegImm reg off) code)
1967
1968    -- optimize addition with 32-bit immediate
1969    -- (needed for PIC)
1970 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit lit])
1971   = do
1972         tmp <- getNewRegNat I32
1973         (src, srcCode) <- getSomeReg x
1974         let imm = litToImm lit
1975             code = srcCode `snocOL` ADDIS tmp src (HA imm)
1976         return (Amode (AddrRegImm tmp (LO imm)) code)
1977
1978 getAmode (CmmLit lit)
1979   = do
1980         tmp <- getNewRegNat I32
1981         let imm = litToImm lit
1982             code = unitOL (LIS tmp (HA imm))
1983         return (Amode (AddrRegImm tmp (LO imm)) code)
1984
1985 getAmode (CmmMachOp (MO_Add I32) [x, y])
1986   = do
1987         (regX, codeX) <- getSomeReg x
1988         (regY, codeY) <- getSomeReg y
1989         return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
1990
1991 getAmode other
1992   = do
1993         (reg, code) <- getSomeReg other
1994         let
1995             off  = ImmInt 0
1996         return (Amode (AddrRegImm reg off) code)
1997 #endif /* powerpc_TARGET_ARCH */
1998
1999 -- -----------------------------------------------------------------------------
2000 -- getOperand: sometimes any operand will do.
2001
2002 -- getNonClobberedOperand: the value of the operand will remain valid across
2003 -- the computation of an arbitrary expression, unless the expression
2004 -- is computed directly into a register which the operand refers to
2005 -- (see trivialCode where this function is used for an example).
2006
2007 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2008
2009 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2010 #if x86_64_TARGET_ARCH
2011 getNonClobberedOperand (CmmLit lit)
2012   | isSuitableFloatingPointLit lit = do
2013     lbl <- getNewLabelNat
2014     let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
2015                                            CmmStaticLit lit])
2016     return (OpAddr (ripRel (ImmCLbl lbl)), code)
2017 #endif
2018 getNonClobberedOperand (CmmLit lit)
2019   | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) =
2020     return (OpImm (litToImm lit), nilOL)
2021 getNonClobberedOperand (CmmLoad mem pk)
2022   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2023     Amode src mem_code <- getAmode mem
2024     (src',save_code) <-
2025         if (amodeCouldBeClobbered src)
2026                 then do
2027                    tmp <- getNewRegNat wordRep
2028                    return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
2029                            unitOL (LEA I32 (OpAddr src) (OpReg tmp)))
2030                 else
2031                    return (src, nilOL)
2032     return (OpAddr src', save_code `appOL` mem_code)
2033 getNonClobberedOperand e = do
2034     (reg, code) <- getNonClobberedReg e
2035     return (OpReg reg, code)
2036
2037 amodeCouldBeClobbered :: AddrMode -> Bool
2038 amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
2039
2040 regClobbered (RealReg rr) = isFastTrue (freeReg rr)
2041 regClobbered _ = False
2042
2043 -- getOperand: the operand is not required to remain valid across the
2044 -- computation of an arbitrary expression.
2045 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2046 #if x86_64_TARGET_ARCH
2047 getOperand (CmmLit lit)
2048   | isSuitableFloatingPointLit lit = do
2049     lbl <- getNewLabelNat
2050     let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
2051                                            CmmStaticLit lit])
2052     return (OpAddr (ripRel (ImmCLbl lbl)), code)
2053 #endif
2054 getOperand (CmmLit lit)
2055   | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) = do
2056     return (OpImm (litToImm lit), nilOL)
2057 getOperand (CmmLoad mem pk)
2058   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2059     Amode src mem_code <- getAmode mem
2060     return (OpAddr src, mem_code)
2061 getOperand e = do
2062     (reg, code) <- getSomeReg e
2063     return (OpReg reg, code)
2064
2065 isOperand :: CmmExpr -> Bool
2066 isOperand (CmmLoad _ _) = True
2067 isOperand (CmmLit lit)  = not (is64BitLit lit)
2068                           || isSuitableFloatingPointLit lit
2069 isOperand _             = False
2070
2071 -- if we want a floating-point literal as an operand, we can
2072 -- use it directly from memory.  However, if the literal is
2073 -- zero, we're better off generating it into a register using
2074 -- xor.
2075 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
2076 isSuitableFloatingPointLit _ = False
2077
2078 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
2079 getRegOrMem (CmmLoad mem pk)
2080   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2081     Amode src mem_code <- getAmode mem
2082     return (OpAddr src, mem_code)
2083 getRegOrMem e = do
2084     (reg, code) <- getNonClobberedReg e
2085     return (OpReg reg, code)
2086
2087 #if x86_64_TARGET_ARCH
2088 is64BitLit (CmmInt i I64) = is64BitInteger i
2089    -- assume that labels are in the range 0-2^31-1: this assumes the
2090    -- small memory model (see gcc docs, -mcmodel=small).
2091 #endif
2092 is64BitLit x = False
2093 #endif
2094
2095 is64BitInteger :: Integer -> Bool
2096 is64BitInteger i = i64 > 0x7fffffff || i64 < -0x80000000
2097   where i64 = fromIntegral i :: Int64
2098   -- a CmmInt is intended to be truncated to the appropriate
2099   -- number of bits, so here we truncate it to Int64.  This is
2100   -- important because e.g. -1 as a CmmInt might be either
2101   -- -1 or 18446744073709551615.
2102
2103 -- -----------------------------------------------------------------------------
2104 --  The 'CondCode' type:  Condition codes passed up the tree.
2105
2106 data CondCode = CondCode Bool Cond InstrBlock
2107
2108 -- Set up a condition code for a conditional branch.
2109
2110 getCondCode :: CmmExpr -> NatM CondCode
2111
2112 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2113
2114 #if alpha_TARGET_ARCH
2115 getCondCode = panic "MachCode.getCondCode: not on Alphas"
2116 #endif /* alpha_TARGET_ARCH */
2117
2118 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2119
2120 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
2121 -- yes, they really do seem to want exactly the same!
2122
2123 getCondCode (CmmMachOp mop [x, y])
2124   =
2125     case mop of
2126       MO_Eq F32 -> condFltCode EQQ x y
2127       MO_Ne F32 -> condFltCode NE  x y
2128
2129       MO_S_Gt F32 -> condFltCode GTT x y
2130       MO_S_Ge F32 -> condFltCode GE  x y
2131       MO_S_Lt F32 -> condFltCode LTT x y
2132       MO_S_Le F32 -> condFltCode LE  x y
2133
2134       MO_Eq F64 -> condFltCode EQQ x y
2135       MO_Ne F64 -> condFltCode NE  x y
2136
2137       MO_S_Gt F64 -> condFltCode GTT x y
2138       MO_S_Ge F64 -> condFltCode GE  x y
2139       MO_S_Lt F64 -> condFltCode LTT x y
2140       MO_S_Le F64 -> condFltCode LE  x y
2141
2142       MO_Eq rep -> condIntCode EQQ  x y
2143       MO_Ne rep -> condIntCode NE   x y
2144
2145       MO_S_Gt rep -> condIntCode GTT  x y
2146       MO_S_Ge rep -> condIntCode GE   x y
2147       MO_S_Lt rep -> condIntCode LTT  x y
2148       MO_S_Le rep -> condIntCode LE   x y
2149
2150       MO_U_Gt rep -> condIntCode GU   x y
2151       MO_U_Ge rep -> condIntCode GEU  x y
2152       MO_U_Lt rep -> condIntCode LU   x y
2153       MO_U_Le rep -> condIntCode LEU  x y
2154
2155       other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y]))
2156
2157 getCondCode other =  pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
2158
2159 #elif powerpc_TARGET_ARCH
2160
2161 -- almost the same as everywhere else - but we need to
2162 -- extend small integers to 32 bit first
2163
2164 getCondCode (CmmMachOp mop [x, y])
2165   = case mop of
2166       MO_Eq F32 -> condFltCode EQQ x y
2167       MO_Ne F32 -> condFltCode NE  x y
2168
2169       MO_S_Gt F32 -> condFltCode GTT x y
2170       MO_S_Ge F32 -> condFltCode GE  x y
2171       MO_S_Lt F32 -> condFltCode LTT x y
2172       MO_S_Le F32 -> condFltCode LE  x y
2173
2174       MO_Eq F64 -> condFltCode EQQ x y
2175       MO_Ne F64 -> condFltCode NE  x y
2176
2177       MO_S_Gt F64 -> condFltCode GTT x y
2178       MO_S_Ge F64 -> condFltCode GE  x y
2179       MO_S_Lt F64 -> condFltCode LTT x y
2180       MO_S_Le F64 -> condFltCode LE  x y
2181
2182       MO_Eq rep -> condIntCode EQQ  (extendUExpr rep x) (extendUExpr rep y)
2183       MO_Ne rep -> condIntCode NE   (extendUExpr rep x) (extendUExpr rep y)
2184
2185       MO_S_Gt rep -> condIntCode GTT  (extendSExpr rep x) (extendSExpr rep y)
2186       MO_S_Ge rep -> condIntCode GE   (extendSExpr rep x) (extendSExpr rep y)
2187       MO_S_Lt rep -> condIntCode LTT  (extendSExpr rep x) (extendSExpr rep y)
2188       MO_S_Le rep -> condIntCode LE   (extendSExpr rep x) (extendSExpr rep y)
2189
2190       MO_U_Gt rep -> condIntCode GU   (extendUExpr rep x) (extendUExpr rep y)
2191       MO_U_Ge rep -> condIntCode GEU  (extendUExpr rep x) (extendUExpr rep y)
2192       MO_U_Lt rep -> condIntCode LU   (extendUExpr rep x) (extendUExpr rep y)
2193       MO_U_Le rep -> condIntCode LEU  (extendUExpr rep x) (extendUExpr rep y)
2194
2195       other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
2196
2197 getCondCode other =  panic "getCondCode(2)(powerpc)"
2198
2199
2200 #endif
2201
2202
2203 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
2204 -- passed back up the tree.
2205
2206 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
2207
2208 #if alpha_TARGET_ARCH
2209 condIntCode = panic "MachCode.condIntCode: not on Alphas"
2210 condFltCode = panic "MachCode.condFltCode: not on Alphas"
2211 #endif /* alpha_TARGET_ARCH */
2212
2213 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2214 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2215
2216 -- memory vs immediate
2217 condIntCode cond (CmmLoad x pk) (CmmLit lit) | not (is64BitLit lit) = do
2218     Amode x_addr x_code <- getAmode x
2219     let
2220         imm  = litToImm lit
2221         code = x_code `snocOL`
2222                   CMP pk (OpImm imm) (OpAddr x_addr)
2223     --
2224     return (CondCode False cond code)
2225
2226 -- anything vs zero, using a mask
2227 -- TODO: Add some sanity checking!!!!
2228 condIntCode cond (CmmMachOp (MO_And rep) [x,o2]) (CmmLit (CmmInt 0 pk))
2229     | (CmmLit (CmmInt mask pk2)) <- o2
2230     = do
2231       (x_reg, x_code) <- getSomeReg x
2232       let
2233          code = x_code `snocOL`
2234                 TEST pk (OpImm (ImmInteger mask)) (OpReg x_reg)
2235       --
2236       return (CondCode False cond code)
2237
2238 -- anything vs zero
2239 condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
2240     (x_reg, x_code) <- getSomeReg x
2241     let
2242         code = x_code `snocOL`
2243                   TEST pk (OpReg x_reg) (OpReg x_reg)
2244     --
2245     return (CondCode False cond code)
2246
2247 -- anything vs operand
2248 condIntCode cond x y | isOperand y = do
2249     (x_reg, x_code) <- getNonClobberedReg x
2250     (y_op,  y_code) <- getOperand y
2251     let
2252         code = x_code `appOL` y_code `snocOL`
2253                   CMP (cmmExprRep x) y_op (OpReg x_reg)
2254     -- in
2255     return (CondCode False cond code)
2256
2257 -- anything vs anything
2258 condIntCode cond x y = do
2259   (y_reg, y_code) <- getNonClobberedReg y
2260   (x_op, x_code) <- getRegOrMem x
2261   let
2262         code = y_code `appOL`
2263                x_code `snocOL`
2264                   CMP (cmmExprRep x) (OpReg y_reg) x_op
2265   -- in
2266   return (CondCode False cond code)
2267 #endif
2268
2269 #if i386_TARGET_ARCH
2270 condFltCode cond x y
2271   = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
2272   (x_reg, x_code) <- getNonClobberedReg x
2273   (y_reg, y_code) <- getSomeReg y
2274   let
2275         code = x_code `appOL` y_code `snocOL`
2276                 GCMP cond x_reg y_reg
2277   -- The GCMP insn does the test and sets the zero flag if comparable
2278   -- and true.  Hence we always supply EQQ as the condition to test.
2279   return (CondCode True EQQ code)
2280 #endif /* i386_TARGET_ARCH */
2281
2282 #if x86_64_TARGET_ARCH
2283 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
2284 -- an operand, but the right must be a reg.  We can probably do better
2285 -- than this general case...
2286 condFltCode cond x y = do
2287   (x_reg, x_code) <- getNonClobberedReg x
2288   (y_op, y_code) <- getOperand y
2289   let
2290         code = x_code `appOL`
2291                y_code `snocOL`
2292                   CMP (cmmExprRep x) y_op (OpReg x_reg)
2293         -- NB(1): we need to use the unsigned comparison operators on the
2294         -- result of this comparison.
2295   -- in
2296   return (CondCode True (condToUnsigned cond) code)
2297 #endif
2298
2299 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2300
2301 #if sparc_TARGET_ARCH
2302
2303 condIntCode cond x (CmmLit (CmmInt y rep))
2304   | fits13Bits y
2305   = do
2306        (src1, code) <- getSomeReg x
2307        let
2308            src2 = ImmInt (fromInteger y)
2309            code' = code `snocOL` SUB False True src1 (RIImm src2) g0
2310        return (CondCode False cond code')
2311
2312 condIntCode cond x y = do
2313     (src1, code1) <- getSomeReg x
2314     (src2, code2) <- getSomeReg y
2315     let
2316         code__2 = code1 `appOL` code2 `snocOL`
2317                   SUB False True src1 (RIReg src2) g0
2318     return (CondCode False cond code__2)
2319
2320 -----------
2321 condFltCode cond x y = do
2322     (src1, code1) <- getSomeReg x
2323     (src2, code2) <- getSomeReg y
2324     tmp <- getNewRegNat F64
2325     let
2326         promote x = FxTOy F32 F64 x tmp
2327
2328         pk1   = cmmExprRep x
2329         pk2   = cmmExprRep y
2330
2331         code__2 =
2332                 if pk1 == pk2 then
2333                     code1 `appOL` code2 `snocOL`
2334                     FCMP True pk1 src1 src2
2335                 else if pk1 == F32 then
2336                     code1 `snocOL` promote src1 `appOL` code2 `snocOL`
2337                     FCMP True F64 tmp src2
2338                 else
2339                     code1 `appOL` code2 `snocOL` promote src2 `snocOL`
2340                     FCMP True F64 src1 tmp
2341     return (CondCode True cond code__2)
2342
2343 #endif /* sparc_TARGET_ARCH */
2344
2345 #if powerpc_TARGET_ARCH
2346 --  ###FIXME: I16 and I8!
2347 condIntCode cond x (CmmLit (CmmInt y rep))
2348   | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
2349   = do
2350         (src1, code) <- getSomeReg x
2351         let
2352             code' = code `snocOL`
2353                 (if condUnsigned cond then CMPL else CMP) I32 src1 (RIImm src2)
2354         return (CondCode False cond code')
2355
2356 condIntCode cond x y = do
2357     (src1, code1) <- getSomeReg x
2358     (src2, code2) <- getSomeReg y
2359     let
2360         code' = code1 `appOL` code2 `snocOL`
2361                   (if condUnsigned cond then CMPL else CMP) I32 src1 (RIReg src2)
2362     return (CondCode False cond code')
2363
2364 condFltCode cond x y = do
2365     (src1, code1) <- getSomeReg x
2366     (src2, code2) <- getSomeReg y
2367     let
2368         code'  = code1 `appOL` code2 `snocOL` FCMP src1 src2
2369         code'' = case cond of -- twiddle CR to handle unordered case
2370                     GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
2371                     LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
2372                     _ -> code'
2373                  where
2374                     ltbit = 0 ; eqbit = 2 ; gtbit = 1
2375     return (CondCode True cond code'')
2376
2377 #endif /* powerpc_TARGET_ARCH */
2378
2379 -- -----------------------------------------------------------------------------
2380 -- Generating assignments
2381
2382 -- Assignments are really at the heart of the whole code generation
2383 -- business.  Almost all top-level nodes of any real importance are
2384 -- assignments, which correspond to loads, stores, or register
2385 -- transfers.  If we're really lucky, some of the register transfers
2386 -- will go away, because we can use the destination register to
2387 -- complete the code generation for the right hand side.  This only
2388 -- fails when the right hand side is forced into a fixed register
2389 -- (e.g. the result of a call).
2390
2391 assignMem_IntCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2392 assignReg_IntCode :: MachRep -> CmmReg  -> CmmExpr -> NatM InstrBlock
2393
2394 assignMem_FltCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2395 assignReg_FltCode :: MachRep -> CmmReg  -> CmmExpr -> NatM InstrBlock
2396
2397 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2398
2399 #if alpha_TARGET_ARCH
2400
2401 assignIntCode pk (CmmLoad dst _) src
2402   = getNewRegNat IntRep             `thenNat` \ tmp ->
2403     getAmode dst                    `thenNat` \ amode ->
2404     getRegister src                 `thenNat` \ register ->
2405     let
2406         code1   = amodeCode amode []
2407         dst__2  = amodeAddr amode
2408         code2   = registerCode register tmp []
2409         src__2  = registerName register tmp
2410         sz      = primRepToSize pk
2411         code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2412     in
2413     return code__2
2414
2415 assignIntCode pk dst src
2416   = getRegister dst                         `thenNat` \ register1 ->
2417     getRegister src                         `thenNat` \ register2 ->
2418     let
2419         dst__2  = registerName register1 zeroh
2420         code    = registerCode register2 dst__2
2421         src__2  = registerName register2 dst__2
2422         code__2 = if isFixed register2
2423                   then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2)
2424                   else code
2425     in
2426     return code__2
2427
2428 #endif /* alpha_TARGET_ARCH */
2429
2430 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2431
2432 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2433
2434 -- integer assignment to memory
2435
2436 -- specific case of adding/subtracting an integer to a particular address.
2437 -- ToDo: catch other cases where we can use an operation directly on a memory
2438 -- address.
2439 assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _,
2440                                                  CmmLit (CmmInt i _)])
2441    | addr == addr2, pk /= I64 || not (is64BitInteger i),
2442      Just instr <- check op
2443    = do Amode amode code_addr <- getAmode addr
2444         let code = code_addr `snocOL`
2445                    instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode)
2446         return code
2447    where
2448         check (MO_Add _) = Just ADD
2449         check (MO_Sub _) = Just SUB
2450         check _ = Nothing
2451         -- ToDo: more?
2452
2453 -- general case
2454 assignMem_IntCode pk addr src = do
2455     Amode addr code_addr <- getAmode addr
2456     (code_src, op_src)   <- get_op_RI src
2457     let
2458         code = code_src `appOL`
2459                code_addr `snocOL`
2460                   MOV pk op_src (OpAddr addr)
2461         -- NOTE: op_src is stable, so it will still be valid
2462         -- after code_addr.  This may involve the introduction
2463         -- of an extra MOV to a temporary register, but we hope
2464         -- the register allocator will get rid of it.
2465     --
2466     return code
2467   where
2468     get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand)   -- code, operator
2469     get_op_RI (CmmLit lit) | not (is64BitLit lit)
2470       = return (nilOL, OpImm (litToImm lit))
2471     get_op_RI op
2472       = do (reg,code) <- getNonClobberedReg op
2473            return (code, OpReg reg)
2474
2475
2476 -- Assign; dst is a reg, rhs is mem
2477 assignReg_IntCode pk reg (CmmLoad src _) = do
2478   load_code <- intLoadCode (MOV pk) src
2479   return (load_code (getRegisterReg reg))
2480
2481 -- dst is a reg, but src could be anything
2482 assignReg_IntCode pk reg src = do
2483   code <- getAnyReg src
2484   return (code (getRegisterReg reg))
2485
2486 #endif /* i386_TARGET_ARCH */
2487
2488 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2489
2490 #if sparc_TARGET_ARCH
2491
2492 assignMem_IntCode pk addr src = do
2493     (srcReg, code) <- getSomeReg src
2494     Amode dstAddr addr_code <- getAmode addr
2495     return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2496
2497 assignReg_IntCode pk reg src = do
2498     r <- getRegister src
2499     return $ case r of
2500         Any _ code         -> code dst
2501         Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg dst) freg
2502     where
2503       dst = getRegisterReg reg
2504
2505
2506 #endif /* sparc_TARGET_ARCH */
2507
2508 #if powerpc_TARGET_ARCH
2509
2510 assignMem_IntCode pk addr src = do
2511     (srcReg, code) <- getSomeReg src
2512     Amode dstAddr addr_code <- getAmode addr
2513     return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2514
2515 -- dst is a reg, but src could be anything
2516 assignReg_IntCode pk reg src
2517     = do
2518         r <- getRegister src
2519         return $ case r of
2520             Any _ code         -> code dst
2521             Fixed _ freg fcode -> fcode `snocOL` MR dst freg
2522     where
2523         dst = getRegisterReg reg
2524
2525 #endif /* powerpc_TARGET_ARCH */
2526
2527
2528 -- -----------------------------------------------------------------------------
2529 -- Floating-point assignments
2530
2531 #if alpha_TARGET_ARCH
2532
2533 assignFltCode pk (CmmLoad dst _) src
2534   = getNewRegNat pk                 `thenNat` \ tmp ->
2535     getAmode dst                    `thenNat` \ amode ->
2536     getRegister src                         `thenNat` \ register ->
2537     let
2538         code1   = amodeCode amode []
2539         dst__2  = amodeAddr amode
2540         code2   = registerCode register tmp []
2541         src__2  = registerName register tmp
2542         sz      = primRepToSize pk
2543         code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2544     in
2545     return code__2
2546
2547 assignFltCode pk dst src
2548   = getRegister dst                         `thenNat` \ register1 ->
2549     getRegister src                         `thenNat` \ register2 ->
2550     let
2551         dst__2  = registerName register1 zeroh
2552         code    = registerCode register2 dst__2
2553         src__2  = registerName register2 dst__2
2554         code__2 = if isFixed register2
2555                   then code . mkSeqInstr (FMOV src__2 dst__2)
2556                   else code
2557     in
2558     return code__2
2559
2560 #endif /* alpha_TARGET_ARCH */
2561
2562 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2563
2564 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2565
2566 -- Floating point assignment to memory
2567 assignMem_FltCode pk addr src = do
2568   (src_reg, src_code) <- getNonClobberedReg src
2569   Amode addr addr_code <- getAmode addr
2570   let
2571         code = src_code `appOL`
2572                addr_code `snocOL`
2573                 IF_ARCH_i386(GST pk src_reg addr,
2574                              MOV pk (OpReg src_reg) (OpAddr addr))
2575   return code
2576
2577 -- Floating point assignment to a register/temporary
2578 assignReg_FltCode pk reg src = do
2579   src_code <- getAnyReg src
2580   return (src_code (getRegisterReg reg))
2581
2582 #endif /* i386_TARGET_ARCH */
2583
2584 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2585
2586 #if sparc_TARGET_ARCH
2587
2588 -- Floating point assignment to memory
2589 assignMem_FltCode pk addr src = do
2590     Amode dst__2 code1 <- getAmode addr
2591     (src__2, code2) <- getSomeReg src
2592     tmp1 <- getNewRegNat pk
2593     let
2594         pk__2   = cmmExprRep src
2595         code__2 = code1 `appOL` code2 `appOL`
2596             if   pk == pk__2
2597             then unitOL (ST pk src__2 dst__2)
2598             else toOL [FxTOy pk__2 pk src__2 tmp1, ST pk tmp1 dst__2]
2599     return code__2
2600
2601 -- Floating point assignment to a register/temporary
2602 -- ToDo: Verify correctness
2603 assignReg_FltCode pk reg src = do
2604     r <- getRegister src
2605     v1 <- getNewRegNat pk
2606     return $ case r of
2607         Any _ code         -> code dst
2608         Fixed _ freg fcode -> fcode `snocOL` FMOV pk freg v1
2609     where
2610       dst = getRegisterReg reg
2611
2612 #endif /* sparc_TARGET_ARCH */
2613
2614 #if powerpc_TARGET_ARCH
2615
2616 -- Easy, isn't it?
2617 assignMem_FltCode = assignMem_IntCode
2618 assignReg_FltCode = assignReg_IntCode
2619
2620 #endif /* powerpc_TARGET_ARCH */
2621
2622
2623 -- -----------------------------------------------------------------------------
2624 -- Generating an non-local jump
2625
2626 -- (If applicable) Do not fill the delay slots here; you will confuse the
2627 -- register allocator.
2628
2629 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
2630
2631 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2632
2633 #if alpha_TARGET_ARCH
2634
2635 genJump (CmmLabel lbl)
2636   | isAsmTemp lbl = returnInstr (BR target)
2637   | otherwise     = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0]
2638   where
2639     target = ImmCLbl lbl
2640
2641 genJump tree
2642   = getRegister tree                `thenNat` \ register ->
2643     getNewRegNat PtrRep             `thenNat` \ tmp ->
2644     let
2645         dst    = registerName register pv
2646         code   = registerCode register pv
2647         target = registerName register pv
2648     in
2649     if isFixed register then
2650         returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0]
2651     else
2652     return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0))
2653
2654 #endif /* alpha_TARGET_ARCH */
2655
2656 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2657
2658 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2659
2660 genJump (CmmLoad mem pk) = do
2661   Amode target code <- getAmode mem
2662   return (code `snocOL` JMP (OpAddr target))
2663
2664 genJump (CmmLit lit) = do
2665   return (unitOL (JMP (OpImm (litToImm lit))))
2666
2667 genJump expr = do
2668   (reg,code) <- getSomeReg expr
2669   return (code `snocOL` JMP (OpReg reg))
2670
2671 #endif /* i386_TARGET_ARCH */
2672
2673 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2674
2675 #if sparc_TARGET_ARCH
2676
2677 genJump (CmmLit (CmmLabel lbl))
2678   = return (toOL [CALL (Left target) 0 True, NOP])
2679   where
2680     target = ImmCLbl lbl
2681
2682 genJump tree
2683   = do
2684         (target, code) <- getSomeReg tree
2685         return (code `snocOL` JMP (AddrRegReg target g0)  `snocOL` NOP)
2686
2687 #endif /* sparc_TARGET_ARCH */
2688
2689 #if powerpc_TARGET_ARCH
2690 genJump (CmmLit (CmmLabel lbl))
2691   = return (unitOL $ JMP lbl)
2692
2693 genJump tree
2694   = do
2695         (target,code) <- getSomeReg tree
2696         return (code `snocOL` MTCTR target `snocOL` BCTR [])
2697 #endif /* powerpc_TARGET_ARCH */
2698
2699
2700 -- -----------------------------------------------------------------------------
2701 --  Unconditional branches
2702
2703 genBranch :: BlockId -> NatM InstrBlock
2704
2705 genBranch = return . toOL . mkBranchInstr
2706
2707 -- -----------------------------------------------------------------------------
2708 --  Conditional jumps
2709
2710 {-
2711 Conditional jumps are always to local labels, so we can use branch
2712 instructions.  We peek at the arguments to decide what kind of
2713 comparison to do.
2714
2715 ALPHA: For comparisons with 0, we're laughing, because we can just do
2716 the desired conditional branch.
2717
2718 I386: First, we have to ensure that the condition
2719 codes are set according to the supplied comparison operation.
2720
2721 SPARC: First, we have to ensure that the condition codes are set
2722 according to the supplied comparison operation.  We generate slightly
2723 different code for floating point comparisons, because a floating
2724 point operation cannot directly precede a @BF@.  We assume the worst
2725 and fill that slot with a @NOP@.
2726
2727 SPARC: Do not fill the delay slots here; you will confuse the register
2728 allocator.
2729 -}
2730
2731
2732 genCondJump
2733     :: BlockId      -- the branch target
2734     -> CmmExpr      -- the condition on which to branch
2735     -> NatM InstrBlock
2736
2737 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2738
2739 #if alpha_TARGET_ARCH
2740
2741 genCondJump id (StPrim op [x, StInt 0])
2742   = getRegister x                           `thenNat` \ register ->
2743     getNewRegNat (registerRep register)
2744                                     `thenNat` \ tmp ->
2745     let
2746         code   = registerCode register tmp
2747         value  = registerName register tmp
2748         pk     = registerRep register
2749         target = ImmCLbl lbl
2750     in
2751     returnSeq code [BI (cmpOp op) value target]
2752   where
2753     cmpOp CharGtOp = GTT
2754     cmpOp CharGeOp = GE
2755     cmpOp CharEqOp = EQQ
2756     cmpOp CharNeOp = NE
2757     cmpOp CharLtOp = LTT
2758     cmpOp CharLeOp = LE
2759     cmpOp IntGtOp = GTT
2760     cmpOp IntGeOp = GE
2761     cmpOp IntEqOp = EQQ
2762     cmpOp IntNeOp = NE
2763     cmpOp IntLtOp = LTT
2764     cmpOp IntLeOp = LE
2765     cmpOp WordGtOp = NE
2766     cmpOp WordGeOp = ALWAYS
2767     cmpOp WordEqOp = EQQ
2768     cmpOp WordNeOp = NE
2769     cmpOp WordLtOp = NEVER
2770     cmpOp WordLeOp = EQQ
2771     cmpOp AddrGtOp = NE
2772     cmpOp AddrGeOp = ALWAYS
2773     cmpOp AddrEqOp = EQQ
2774     cmpOp AddrNeOp = NE
2775     cmpOp AddrLtOp = NEVER
2776     cmpOp AddrLeOp = EQQ
2777
2778 genCondJump lbl (StPrim op [x, StDouble 0.0])
2779   = getRegister x                           `thenNat` \ register ->
2780     getNewRegNat (registerRep register)
2781                                     `thenNat` \ tmp ->
2782     let
2783         code   = registerCode register tmp
2784         value  = registerName register tmp
2785         pk     = registerRep register
2786         target = ImmCLbl lbl
2787     in
2788     return (code . mkSeqInstr (BF (cmpOp op) value target))
2789   where
2790     cmpOp FloatGtOp = GTT
2791     cmpOp FloatGeOp = GE
2792     cmpOp FloatEqOp = EQQ
2793     cmpOp FloatNeOp = NE
2794     cmpOp FloatLtOp = LTT
2795     cmpOp FloatLeOp = LE
2796     cmpOp DoubleGtOp = GTT
2797     cmpOp DoubleGeOp = GE
2798     cmpOp DoubleEqOp = EQQ
2799     cmpOp DoubleNeOp = NE
2800     cmpOp DoubleLtOp = LTT
2801     cmpOp DoubleLeOp = LE
2802
2803 genCondJump lbl (StPrim op [x, y])
2804   | fltCmpOp op
2805   = trivialFCode pr instr x y       `thenNat` \ register ->
2806     getNewRegNat F64                `thenNat` \ tmp ->
2807     let
2808         code   = registerCode register tmp
2809         result = registerName register tmp
2810         target = ImmCLbl lbl
2811     in
2812     return (code . mkSeqInstr (BF cond result target))
2813   where
2814     pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
2815
2816     fltCmpOp op = case op of
2817         FloatGtOp -> True
2818         FloatGeOp -> True
2819         FloatEqOp -> True
2820         FloatNeOp -> True
2821         FloatLtOp -> True
2822         FloatLeOp -> True
2823         DoubleGtOp -> True
2824         DoubleGeOp -> True
2825         DoubleEqOp -> True
2826         DoubleNeOp -> True
2827         DoubleLtOp -> True
2828         DoubleLeOp -> True
2829         _ -> False
2830     (instr, cond) = case op of
2831         FloatGtOp -> (FCMP TF LE, EQQ)
2832         FloatGeOp -> (FCMP TF LTT, EQQ)
2833         FloatEqOp -> (FCMP TF EQQ, NE)
2834         FloatNeOp -> (FCMP TF EQQ, EQQ)
2835         FloatLtOp -> (FCMP TF LTT, NE)
2836         FloatLeOp -> (FCMP TF LE, NE)
2837         DoubleGtOp -> (FCMP TF LE, EQQ)
2838         DoubleGeOp -> (FCMP TF LTT, EQQ)
2839         DoubleEqOp -> (FCMP TF EQQ, NE)
2840         DoubleNeOp -> (FCMP TF EQQ, EQQ)
2841         DoubleLtOp -> (FCMP TF LTT, NE)
2842         DoubleLeOp -> (FCMP TF LE, NE)
2843
2844 genCondJump lbl (StPrim op [x, y])
2845   = trivialCode instr x y           `thenNat` \ register ->
2846     getNewRegNat IntRep             `thenNat` \ tmp ->
2847     let
2848         code   = registerCode register tmp
2849         result = registerName register tmp
2850         target = ImmCLbl lbl
2851     in
2852     return (code . mkSeqInstr (BI cond result target))
2853   where
2854     (instr, cond) = case op of
2855         CharGtOp -> (CMP LE, EQQ)
2856         CharGeOp -> (CMP LTT, EQQ)
2857         CharEqOp -> (CMP EQQ, NE)
2858         CharNeOp -> (CMP EQQ, EQQ)
2859         CharLtOp -> (CMP LTT, NE)
2860         CharLeOp -> (CMP LE, NE)
2861         IntGtOp -> (CMP LE, EQQ)
2862         IntGeOp -> (CMP LTT, EQQ)
2863         IntEqOp -> (CMP EQQ, NE)
2864         IntNeOp -> (CMP EQQ, EQQ)
2865         IntLtOp -> (CMP LTT, NE)
2866         IntLeOp -> (CMP LE, NE)
2867         WordGtOp -> (CMP ULE, EQQ)
2868         WordGeOp -> (CMP ULT, EQQ)
2869         WordEqOp -> (CMP EQQ, NE)
2870         WordNeOp -> (CMP EQQ, EQQ)
2871         WordLtOp -> (CMP ULT, NE)
2872         WordLeOp -> (CMP ULE, NE)
2873         AddrGtOp -> (CMP ULE, EQQ)
2874         AddrGeOp -> (CMP ULT, EQQ)
2875         AddrEqOp -> (CMP EQQ, NE)
2876         AddrNeOp -> (CMP EQQ, EQQ)
2877         AddrLtOp -> (CMP ULT, NE)
2878         AddrLeOp -> (CMP ULE, NE)
2879
2880 #endif /* alpha_TARGET_ARCH */
2881
2882 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2883
2884 #if i386_TARGET_ARCH
2885
2886 genCondJump id bool = do
2887   CondCode _ cond code <- getCondCode bool
2888   return (code `snocOL` JXX cond id)
2889
2890 #endif
2891
2892 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2893
2894 #if x86_64_TARGET_ARCH
2895
2896 genCondJump id bool = do
2897   CondCode is_float cond cond_code <- getCondCode bool
2898   if not is_float
2899     then
2900         return (cond_code `snocOL` JXX cond id)
2901     else do
2902         lbl <- getBlockIdNat
2903
2904         -- see comment with condFltReg
2905         let code = case cond of
2906                         NE  -> or_unordered
2907                         GU  -> plain_test
2908                         GEU -> plain_test
2909                         _   -> and_ordered
2910
2911             plain_test = unitOL (
2912                   JXX cond id
2913                 )
2914             or_unordered = toOL [
2915                   JXX cond id,
2916                   JXX PARITY id
2917                 ]
2918             and_ordered = toOL [
2919                   JXX PARITY lbl,
2920                   JXX cond id,
2921                   JXX ALWAYS lbl,
2922                   NEWBLOCK lbl
2923                 ]
2924         return (cond_code `appOL` code)
2925
2926 #endif
2927
2928 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2929
2930 #if sparc_TARGET_ARCH
2931
2932 genCondJump (BlockId id) bool = do
2933   CondCode is_float cond code <- getCondCode bool
2934   return (
2935        code `appOL`
2936        toOL (
2937          if   is_float
2938          then [NOP, BF cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2939          else [BI cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2940        )
2941     )
2942
2943 #endif /* sparc_TARGET_ARCH */
2944
2945
2946 #if powerpc_TARGET_ARCH
2947
2948 genCondJump id bool = do
2949   CondCode is_float cond code <- getCondCode bool
2950   return (code `snocOL` BCC cond id)
2951
2952 #endif /* powerpc_TARGET_ARCH */
2953
2954
2955 -- -----------------------------------------------------------------------------
2956 --  Generating C calls
2957
2958 -- Now the biggest nightmare---calls.  Most of the nastiness is buried in
2959 -- @get_arg@, which moves the arguments to the correct registers/stack
2960 -- locations.  Apart from that, the code is easy.
2961 --
2962 -- (If applicable) Do not fill the delay slots here; you will confuse the
2963 -- register allocator.
2964
2965 genCCall
2966     :: CmmCallTarget            -- function to call
2967     -> CmmFormals               -- where to put the result
2968     -> CmmActuals               -- arguments (of mixed type)
2969     -> NatM InstrBlock
2970
2971 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2972
2973 #if alpha_TARGET_ARCH
2974
2975 ccallResultRegs =
2976
2977 genCCall fn cconv result_regs args
2978   = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args
2979                           `thenNat` \ ((unused,_), argCode) ->
2980     let
2981         nRegs = length allArgRegs - length unused
2982         code = asmSeqThen (map ($ []) argCode)
2983     in
2984         returnSeq code [
2985             LDA pv (AddrImm (ImmLab (ptext fn))),
2986             JSR ra (AddrReg pv) nRegs,
2987             LDGP gp (AddrReg ra)]
2988   where
2989     ------------------------
2990     {-  Try to get a value into a specific register (or registers) for
2991         a call.  The first 6 arguments go into the appropriate
2992         argument register (separate registers for integer and floating
2993         point arguments, but used in lock-step), and the remaining
2994         arguments are dumped to the stack, beginning at 0(sp).  Our
2995         first argument is a pair of the list of remaining argument
2996         registers to be assigned for this call and the next stack
2997         offset to use for overflowing arguments.  This way,
2998         @get_Arg@ can be applied to all of a call's arguments using
2999         @mapAccumLNat@.
3000     -}
3001     get_arg
3002         :: ([(Reg,Reg)], Int)   -- Argument registers and stack offset (accumulator)
3003         -> StixTree             -- Current argument
3004         -> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code
3005
3006     -- We have to use up all of our argument registers first...
3007
3008     get_arg ((iDst,fDst):dsts, offset) arg
3009       = getRegister arg                     `thenNat` \ register ->
3010         let
3011             reg  = if isFloatingRep pk then fDst else iDst
3012             code = registerCode register reg
3013             src  = registerName register reg
3014             pk   = registerRep register
3015         in
3016         return (
3017             if isFloatingRep pk then
3018                 ((dsts, offset), if isFixed register then
3019                     code . mkSeqInstr (FMOV src fDst)
3020                     else code)
3021             else
3022                 ((dsts, offset), if isFixed register then
3023                     code . mkSeqInstr (OR src (RIReg src) iDst)
3024                     else code))
3025
3026     -- Once we have run out of argument registers, we move to the
3027     -- stack...
3028
3029     get_arg ([], offset) arg
3030       = getRegister arg                 `thenNat` \ register ->
3031         getNewRegNat (registerRep register)
3032                                         `thenNat` \ tmp ->
3033         let
3034             code = registerCode register tmp
3035             src  = registerName register tmp
3036             pk   = registerRep register
3037             sz   = primRepToSize pk
3038         in
3039         return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset)))
3040
3041 #endif /* alpha_TARGET_ARCH */
3042
3043 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3044
3045 #if i386_TARGET_ARCH
3046
3047 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
3048         -- write barrier compiles to no code on x86/x86-64;
3049         -- we keep it this long in order to prevent earlier optimisations.
3050
3051 -- we only cope with a single result for foreign calls
3052 genCCall (CmmPrim op) [CmmKinded r _] args = do
3053   case op of
3054         MO_F32_Sqrt -> actuallyInlineFloatOp F32  (GSQRT F32) args
3055         MO_F64_Sqrt -> actuallyInlineFloatOp F64 (GSQRT F64) args
3056
3057         MO_F32_Sin  -> actuallyInlineFloatOp F32  (GSIN F32) args
3058         MO_F64_Sin  -> actuallyInlineFloatOp F64 (GSIN F64) args
3059
3060         MO_F32_Cos  -> actuallyInlineFloatOp F32  (GCOS F32) args
3061         MO_F64_Cos  -> actuallyInlineFloatOp F64 (GCOS F64) args
3062
3063         MO_F32_Tan  -> actuallyInlineFloatOp F32  (GTAN F32) args
3064         MO_F64_Tan  -> actuallyInlineFloatOp F64 (GTAN F64) args
3065
3066         other_op    -> outOfLineFloatOp op r args
3067  where
3068   actuallyInlineFloatOp rep instr [CmmKinded x _]
3069         = do res <- trivialUFCode rep instr x
3070              any <- anyReg res
3071              return (any (getRegisterReg (CmmLocal r)))
3072
3073 genCCall target dest_regs args = do
3074     let
3075         sizes               = map (arg_size . cmmExprRep . kindlessCmm) (reverse args)
3076 #if !darwin_TARGET_OS
3077         tot_arg_size        = sum sizes
3078 #else
3079         raw_arg_size        = sum sizes
3080         tot_arg_size        = roundTo 16 raw_arg_size
3081         arg_pad_size        = tot_arg_size - raw_arg_size
3082     delta0 <- getDeltaNat
3083     setDeltaNat (delta0 - arg_pad_size)
3084 #endif
3085
3086     push_codes <- mapM push_arg (reverse args)
3087     delta <- getDeltaNat
3088
3089     -- in
3090     -- deal with static vs dynamic call targets
3091     (callinsns,cconv) <-
3092       case target of
3093         -- CmmPrim -> ...
3094         CmmCallee (CmmLit (CmmLabel lbl)) conv
3095            -> -- ToDo: stdcall arg sizes
3096               return (unitOL (CALL (Left fn_imm) []), conv)
3097            where fn_imm = ImmCLbl lbl
3098         CmmCallee expr conv
3099            -> do (dyn_c, dyn_r, dyn_rep) <- get_op expr
3100                  ASSERT(dyn_rep == I32)
3101                   return (dyn_c `snocOL` CALL (Right dyn_r) [], conv)
3102
3103     let push_code
3104 #if darwin_TARGET_OS
3105             | arg_pad_size /= 0
3106             = toOL [SUB I32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
3107                     DELTA (delta0 - arg_pad_size)]
3108               `appOL` concatOL push_codes
3109             | otherwise
3110 #endif
3111             = concatOL push_codes
3112         call = callinsns `appOL`
3113                toOL (
3114                         -- Deallocate parameters after call for ccall;
3115                         -- but not for stdcall (callee does it)
3116                   (if cconv == StdCallConv || tot_arg_size==0 then [] else
3117                    [ADD I32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
3118                   ++
3119                   [DELTA (delta + tot_arg_size)]
3120                )
3121     -- in
3122     setDeltaNat (delta + tot_arg_size)
3123
3124     let
3125         -- assign the results, if necessary
3126         assign_code []     = nilOL
3127         assign_code [CmmKinded dest _hint] =
3128           case rep of
3129                 I64 -> toOL [MOV I32 (OpReg eax) (OpReg r_dest),
3130                              MOV I32 (OpReg edx) (OpReg r_dest_hi)]
3131                 F32 -> unitOL (GMOV fake0 r_dest)
3132                 F64 -> unitOL (GMOV fake0 r_dest)
3133                 rep -> unitOL (MOV rep (OpReg eax) (OpReg r_dest))
3134           where
3135                 r_dest_hi = getHiVRegFromLo r_dest
3136                 rep = localRegRep dest
3137                 r_dest = getRegisterReg (CmmLocal dest)
3138         assign_code many = panic "genCCall.assign_code many"
3139
3140     return (push_code `appOL`
3141             call `appOL`
3142             assign_code dest_regs)
3143
3144   where
3145     arg_size F64 = 8
3146     arg_size F32 = 4
3147     arg_size I64 = 8
3148     arg_size _   = 4
3149
3150     roundTo a x | x `mod` a == 0 = x
3151                 | otherwise = x + a - (x `mod` a)
3152
3153
3154     push_arg :: (CmmKinded CmmExpr){-current argument-}
3155                     -> NatM InstrBlock  -- code
3156
3157     push_arg (CmmKinded arg _hint) -- we don't need the hints on x86
3158       | arg_rep == I64 = do
3159         ChildCode64 code r_lo <- iselExpr64 arg
3160         delta <- getDeltaNat
3161         setDeltaNat (delta - 8)
3162         let
3163             r_hi = getHiVRegFromLo r_lo
3164         -- in
3165         return (       code `appOL`
3166                        toOL [PUSH I32 (OpReg r_hi), DELTA (delta - 4),
3167                              PUSH I32 (OpReg r_lo), DELTA (delta - 8),
3168                              DELTA (delta-8)]
3169             )
3170
3171       | otherwise = do
3172         (code, reg, sz) <- get_op arg
3173         delta <- getDeltaNat
3174         let size = arg_size sz
3175         setDeltaNat (delta-size)
3176         if (case sz of F64 -> True; F32 -> True; _ -> False)
3177            then return (code `appOL`
3178                         toOL [SUB I32 (OpImm (ImmInt size)) (OpReg esp),
3179                               DELTA (delta-size),
3180                               GST sz reg (AddrBaseIndex (EABaseReg esp)
3181                                                         EAIndexNone
3182                                                         (ImmInt 0))]
3183                        )
3184            else return (code `snocOL`
3185                         PUSH I32 (OpReg reg) `snocOL`
3186                         DELTA (delta-size)
3187                        )
3188       where
3189          arg_rep = cmmExprRep arg
3190
3191     ------------
3192     get_op :: CmmExpr -> NatM (InstrBlock, Reg, MachRep) -- code, reg, size
3193     get_op op = do
3194         (reg,code) <- getSomeReg op
3195         return (code, reg, cmmExprRep op)
3196
3197 #endif /* i386_TARGET_ARCH */
3198
3199 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3200
3201 outOfLineFloatOp :: CallishMachOp -> CmmFormalWithoutKind -> CmmActuals
3202   -> NatM InstrBlock
3203 outOfLineFloatOp mop res args
3204   = do
3205       dflags <- getDynFlagsNat
3206       targetExpr <- cmmMakeDynamicReference dflags addImportNat CallReference lbl
3207       let target = CmmCallee targetExpr CCallConv
3208
3209       if localRegRep res == F64
3210         then
3211           stmtToInstrs (CmmCall target [CmmKinded res FloatHint] args CmmUnsafe CmmMayReturn)
3212         else do
3213           uq <- getUniqueNat
3214           let
3215             tmp = LocalReg uq F64 GCKindNonPtr
3216           -- in
3217           code1 <- stmtToInstrs (CmmCall target [CmmKinded tmp FloatHint] args CmmUnsafe CmmMayReturn)
3218           code2 <- stmtToInstrs (CmmAssign (CmmLocal res) (CmmReg (CmmLocal tmp)))
3219           return (code1 `appOL` code2)
3220   where
3221         lbl = mkForeignLabel fn Nothing False
3222
3223         fn = case mop of
3224               MO_F32_Sqrt  -> fsLit "sqrtf"
3225               MO_F32_Sin   -> fsLit "sinf"
3226               MO_F32_Cos   -> fsLit "cosf"
3227               MO_F32_Tan   -> fsLit "tanf"
3228               MO_F32_Exp   -> fsLit "expf"
3229               MO_F32_Log   -> fsLit "logf"
3230
3231               MO_F32_Asin  -> fsLit "asinf"
3232               MO_F32_Acos  -> fsLit "acosf"
3233               MO_F32_Atan  -> fsLit "atanf"
3234
3235               MO_F32_Sinh  -> fsLit "sinhf"
3236               MO_F32_Cosh  -> fsLit "coshf"
3237               MO_F32_Tanh  -> fsLit "tanhf"
3238               MO_F32_Pwr   -> fsLit "powf"
3239
3240               MO_F64_Sqrt  -> fsLit "sqrt"
3241               MO_F64_Sin   -> fsLit "sin"
3242               MO_F64_Cos   -> fsLit "cos"
3243               MO_F64_Tan   -> fsLit "tan"
3244               MO_F64_Exp   -> fsLit "exp"
3245               MO_F64_Log   -> fsLit "log"
3246
3247               MO_F64_Asin  -> fsLit "asin"
3248               MO_F64_Acos  -> fsLit "acos"
3249               MO_F64_Atan  -> fsLit "atan"
3250
3251               MO_F64_Sinh  -> fsLit "sinh"
3252               MO_F64_Cosh  -> fsLit "cosh"
3253               MO_F64_Tanh  -> fsLit "tanh"
3254               MO_F64_Pwr   -> fsLit "pow"
3255
3256 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
3257
3258 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3259
3260 #if x86_64_TARGET_ARCH
3261
3262 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
3263         -- write barrier compiles to no code on x86/x86-64;
3264         -- we keep it this long in order to prevent earlier optimisations.
3265
3266
3267 genCCall (CmmPrim op) [CmmKinded r _] args =
3268   outOfLineFloatOp op r args
3269
3270 genCCall target dest_regs args = do
3271
3272         -- load up the register arguments
3273     (stack_args, aregs, fregs, load_args_code)
3274          <- load_args args allArgRegs allFPArgRegs nilOL
3275
3276     let
3277         fp_regs_used  = reverse (drop (length fregs) (reverse allFPArgRegs))
3278         int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
3279         arg_regs = [eax] ++ int_regs_used ++ fp_regs_used
3280                 -- for annotating the call instruction with
3281
3282         sse_regs = length fp_regs_used
3283
3284         tot_arg_size = arg_size * length stack_args
3285
3286         -- On entry to the called function, %rsp should be aligned
3287         -- on a 16-byte boundary +8 (i.e. the first stack arg after
3288         -- the return address is 16-byte aligned).  In STG land
3289         -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
3290         -- need to make sure we push a multiple of 16-bytes of args,
3291         -- plus the return address, to get the correct alignment.
3292         -- Urg, this is hard.  We need to feed the delta back into
3293         -- the arg pushing code.
3294     (real_size, adjust_rsp) <-
3295         if tot_arg_size `rem` 16 == 0
3296             then return (tot_arg_size, nilOL)
3297             else do -- we need to adjust...
3298                 delta <- getDeltaNat
3299                 setDeltaNat (delta-8)
3300                 return (tot_arg_size+8, toOL [
3301                                 SUB I64 (OpImm (ImmInt 8)) (OpReg rsp),
3302                                 DELTA (delta-8)
3303                         ])
3304
3305         -- push the stack args, right to left
3306     push_code <- push_args (reverse stack_args) nilOL
3307     delta <- getDeltaNat
3308
3309     -- deal with static vs dynamic call targets
3310     (callinsns,cconv) <-
3311       case target of
3312         -- CmmPrim -> ...
3313         CmmCallee (CmmLit (CmmLabel lbl)) conv
3314            -> -- ToDo: stdcall arg sizes
3315               return (unitOL (CALL (Left fn_imm) arg_regs), conv)
3316            where fn_imm = ImmCLbl lbl
3317         CmmCallee expr conv
3318            -> do (dyn_r, dyn_c) <- getSomeReg expr
3319                  return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
3320
3321     let
3322         -- The x86_64 ABI requires us to set %al to the number of SSE
3323         -- registers that contain arguments, if the called routine
3324         -- is a varargs function.  We don't know whether it's a
3325         -- varargs function or not, so we have to assume it is.
3326         --
3327         -- It's not safe to omit this assignment, even if the number
3328         -- of SSE regs in use is zero.  If %al is larger than 8
3329         -- on entry to a varargs function, seg faults ensue.
3330         assign_eax n = unitOL (MOV I32 (OpImm (ImmInt n)) (OpReg eax))
3331
3332     let call = callinsns `appOL`
3333                toOL (
3334                         -- Deallocate parameters after call for ccall;
3335                         -- but not for stdcall (callee does it)
3336                   (if cconv == StdCallConv || real_size==0 then [] else
3337                    [ADD wordRep (OpImm (ImmInt real_size)) (OpReg esp)])
3338                   ++
3339                   [DELTA (delta + real_size)]
3340                )
3341     -- in
3342     setDeltaNat (delta + real_size)
3343
3344     let
3345         -- assign the results, if necessary
3346         assign_code []     = nilOL
3347         assign_code [CmmKinded dest _hint] =
3348           case rep of
3349                 F32 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3350                 F64 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3351                 rep -> unitOL (MOV rep (OpReg rax) (OpReg r_dest))
3352           where
3353                 rep = localRegRep dest
3354                 r_dest = getRegisterReg (CmmLocal dest)
3355         assign_code many = panic "genCCall.assign_code many"
3356
3357     return (load_args_code      `appOL`
3358             adjust_rsp          `appOL`
3359             push_code           `appOL`
3360             assign_eax sse_regs `appOL`
3361             call                `appOL`
3362             assign_code dest_regs)
3363
3364   where
3365     arg_size = 8 -- always, at the mo
3366
3367     load_args :: [CmmKinded CmmExpr]
3368               -> [Reg]                  -- int regs avail for args
3369               -> [Reg]                  -- FP regs avail for args
3370               -> InstrBlock
3371               -> NatM ([CmmKinded CmmExpr],[Reg],[Reg],InstrBlock)
3372     load_args args [] [] code     =  return (args, [], [], code)
3373         -- no more regs to use
3374     load_args [] aregs fregs code =  return ([], aregs, fregs, code)
3375         -- no more args to push
3376     load_args ((CmmKinded arg hint) : rest) aregs fregs code
3377         | isFloatingRep arg_rep =
3378         case fregs of
3379           [] -> push_this_arg
3380           (r:rs) -> do
3381              arg_code <- getAnyReg arg
3382              load_args rest aregs rs (code `appOL` arg_code r)
3383         | otherwise =
3384         case aregs of
3385           [] -> push_this_arg
3386           (r:rs) -> do
3387              arg_code <- getAnyReg arg
3388              load_args rest rs fregs (code `appOL` arg_code r)
3389         where
3390           arg_rep = cmmExprRep arg
3391
3392           push_this_arg = do
3393             (args',ars,frs,code') <- load_args rest aregs fregs code
3394             return ((CmmKinded arg hint):args', ars, frs, code')
3395
3396     push_args [] code = return code
3397     push_args ((CmmKinded arg hint):rest) code
3398        | isFloatingRep arg_rep = do
3399          (arg_reg, arg_code) <- getSomeReg arg
3400          delta <- getDeltaNat
3401          setDeltaNat (delta-arg_size)
3402          let code' = code `appOL` arg_code `appOL` toOL [
3403                         SUB wordRep (OpImm (ImmInt arg_size)) (OpReg rsp) ,
3404                         DELTA (delta-arg_size),
3405                         MOV arg_rep (OpReg arg_reg) (OpAddr  (spRel 0))]
3406          push_args rest code'
3407
3408        | otherwise = do
3409        -- we only ever generate word-sized function arguments.  Promotion
3410        -- has already happened: our Int8# type is kept sign-extended
3411        -- in an Int#, for example.
3412          ASSERT(arg_rep == I64) return ()
3413          (arg_op, arg_code) <- getOperand arg
3414          delta <- getDeltaNat
3415          setDeltaNat (delta-arg_size)
3416          let code' = code `appOL` toOL [PUSH I64 arg_op,
3417                                         DELTA (delta-arg_size)]
3418          push_args rest code'
3419         where
3420           arg_rep = cmmExprRep arg
3421 #endif
3422
3423 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3424
3425 #if sparc_TARGET_ARCH
3426 {-
3427    The SPARC calling convention is an absolute
3428    nightmare.  The first 6x32 bits of arguments are mapped into
3429    %o0 through %o5, and the remaining arguments are dumped to the
3430    stack, beginning at [%sp+92].  (Note that %o6 == %sp.)
3431
3432    If we have to put args on the stack, move %o6==%sp down by
3433    the number of words to go on the stack, to ensure there's enough space.
3434
3435    According to Fraser and Hanson's lcc book, page 478, fig 17.2,
3436    16 words above the stack pointer is a word for the address of
3437    a structure return value.  I use this as a temporary location
3438    for moving values from float to int regs.  Certainly it isn't
3439    safe to put anything in the 16 words starting at %sp, since
3440    this area can get trashed at any time due to window overflows
3441    caused by signal handlers.
3442
3443    A final complication (if the above isn't enough) is that
3444    we can't blithely calculate the arguments one by one into
3445    %o0 .. %o5.  Consider the following nested calls:
3446
3447        fff a (fff b c)
3448
3449    Naive code moves a into %o0, and (fff b c) into %o1.  Unfortunately
3450    the inner call will itself use %o0, which trashes the value put there
3451    in preparation for the outer call.  Upshot: we need to calculate the
3452    args into temporary regs, and move those to arg regs or onto the
3453    stack only immediately prior to the call proper.  Sigh.
3454 -}
3455
3456 genCCall target dest_regs argsAndHints = do
3457     let
3458         args = map kindlessCmm argsAndHints
3459     argcode_and_vregs <- mapM arg_to_int_vregs args
3460     let
3461         (argcodes, vregss) = unzip argcode_and_vregs
3462         n_argRegs          = length allArgRegs
3463         n_argRegs_used     = min (length vregs) n_argRegs
3464         vregs              = concat vregss
3465     -- deal with static vs dynamic call targets
3466     callinsns <- (case target of
3467         CmmCallee (CmmLit (CmmLabel lbl)) conv -> do
3468                 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3469         CmmCallee expr conv -> do
3470                 (dyn_c, [dyn_r]) <- arg_to_int_vregs expr
3471                 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3472         CmmPrim mop -> do
3473                   (res, reduce) <- outOfLineFloatOp mop
3474                   lblOrMopExpr <- case res of
3475                        Left lbl -> do
3476                             return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3477                        Right mopExpr -> do
3478                             (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
3479                             return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3480                   if reduce then panic "genCCall(sparc): can not reduce" else return lblOrMopExpr
3481
3482       )
3483     let
3484         argcode = concatOL argcodes
3485         (move_sp_down, move_sp_up)
3486            = let diff = length vregs - n_argRegs
3487                  nn   = if odd diff then diff + 1 else diff -- keep 8-byte alignment
3488              in  if   nn <= 0
3489                  then (nilOL, nilOL)
3490                  else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
3491         transfer_code
3492            = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
3493     return (argcode       `appOL`
3494             move_sp_down  `appOL`
3495             transfer_code `appOL`
3496             callinsns     `appOL`
3497             unitOL NOP    `appOL`
3498             move_sp_up)
3499   where
3500      -- move args from the integer vregs into which they have been
3501      -- marshalled, into %o0 .. %o5, and the rest onto the stack.
3502      move_final :: [Reg] -> [Reg] -> Int -> [Instr]
3503
3504      move_final [] _ offset          -- all args done
3505         = []
3506
3507      move_final (v:vs) [] offset     -- out of aregs; move to stack
3508         = ST I32 v (spRel offset)
3509           : move_final vs [] (offset+1)
3510
3511      move_final (v:vs) (a:az) offset -- move into an arg (%o[0..5]) reg
3512         = OR False g0 (RIReg v) a
3513           : move_final vs az offset
3514
3515      -- generate code to calculate an argument, and move it into one
3516      -- or two integer vregs.
3517      arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
3518      arg_to_int_vregs arg
3519         | (cmmExprRep arg) == I64
3520         = do
3521           (ChildCode64 code r_lo) <- iselExpr64 arg
3522           let
3523               r_hi = getHiVRegFromLo r_lo
3524           return (code, [r_hi, r_lo])
3525         | otherwise
3526         = do
3527           (src, code) <- getSomeReg arg
3528           tmp <- getNewRegNat (cmmExprRep arg)
3529           let
3530               pk   = cmmExprRep arg
3531           case pk of
3532              F64 -> do
3533                       v1 <- getNewRegNat I32
3534                       v2 <- getNewRegNat I32
3535                       return (
3536                         code                          `snocOL`
3537                         FMOV F64 src f0                `snocOL`
3538                         ST   F32  f0 (spRel 16)         `snocOL`
3539                         LD   I32  (spRel 16) v1         `snocOL`
3540                         ST   F32  (fPair f0) (spRel 16) `snocOL`
3541                         LD   I32  (spRel 16) v2
3542                         ,
3543                         [v1,v2]
3544                        )
3545              F32 -> do
3546                       v1 <- getNewRegNat I32
3547                       return (
3548                         code                    `snocOL`
3549                         ST   F32  src (spRel 16)  `snocOL`
3550                         LD   I32  (spRel 16) v1
3551                         ,
3552                         [v1]
3553                        )
3554              other -> do
3555                         v1 <- getNewRegNat I32
3556                         return (
3557                           code `snocOL` OR False g0 (RIReg src) v1
3558                           ,
3559                           [v1]
3560                          )
3561 outOfLineFloatOp mop =
3562     do
3563       dflags <- getDynFlagsNat
3564       mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference $
3565                   mkForeignLabel functionName Nothing True
3566       let mopLabelOrExpr = case mopExpr of
3567                         CmmLit (CmmLabel lbl) -> Left lbl
3568                         _ -> Right mopExpr
3569       return (mopLabelOrExpr, reduce)
3570             where
3571                 (reduce, functionName) = case mop of
3572                   MO_F32_Exp    -> (True,  fsLit "exp")
3573                   MO_F32_Log    -> (True,  fsLit "log")
3574                   MO_F32_Sqrt   -> (True,  fsLit "sqrt")
3575
3576                   MO_F32_Sin    -> (True,  fsLit "sin")
3577                   MO_F32_Cos    -> (True,  fsLit "cos")
3578                   MO_F32_Tan    -> (True,  fsLit "tan")
3579
3580                   MO_F32_Asin   -> (True,  fsLit "asin")
3581                   MO_F32_Acos   -> (True,  fsLit "acos")
3582                   MO_F32_Atan   -> (True,  fsLit "atan")
3583
3584                   MO_F32_Sinh   -> (True,  fsLit "sinh")
3585                   MO_F32_Cosh   -> (True,  fsLit "cosh")
3586                   MO_F32_Tanh   -> (True,  fsLit "tanh")
3587
3588                   MO_F64_Exp    -> (False, fsLit "exp")
3589                   MO_F64_Log    -> (False, fsLit "log")
3590                   MO_F64_Sqrt   -> (False, fsLit "sqrt")
3591
3592                   MO_F64_Sin    -> (False, fsLit "sin")
3593                   MO_F64_Cos    -> (False, fsLit "cos")
3594                   MO_F64_Tan    -> (False, fsLit "tan")
3595
3596                   MO_F64_Asin   -> (False, fsLit "asin")
3597                   MO_F64_Acos   -> (False, fsLit "acos")
3598                   MO_F64_Atan   -> (False, fsLit "atan")
3599
3600                   MO_F64_Sinh   -> (False, fsLit "sinh")
3601                   MO_F64_Cosh   -> (False, fsLit "cosh")
3602                   MO_F64_Tanh   -> (False, fsLit "tanh")
3603
3604                   other -> pprPanic "outOfLineFloatOp(sparc) "
3605                                 (pprCallishMachOp mop)
3606
3607 #endif /* sparc_TARGET_ARCH */
3608
3609 #if powerpc_TARGET_ARCH
3610
3611 #if darwin_TARGET_OS || linux_TARGET_OS
3612 {-
3613     The PowerPC calling convention for Darwin/Mac OS X
3614     is described in Apple's document
3615     "Inside Mac OS X - Mach-O Runtime Architecture".
3616
3617     PowerPC Linux uses the System V Release 4 Calling Convention
3618     for PowerPC. It is described in the
3619     "System V Application Binary Interface PowerPC Processor Supplement".
3620
3621     Both conventions are similar:
3622     Parameters may be passed in general-purpose registers starting at r3, in
3623     floating point registers starting at f1, or on the stack.
3624
3625     But there are substantial differences:
3626     * The number of registers used for parameter passing and the exact set of
3627       nonvolatile registers differs (see MachRegs.lhs).
3628     * On Darwin, stack space is always reserved for parameters, even if they are
3629       passed in registers. The called routine may choose to save parameters from
3630       registers to the corresponding space on the stack.
3631     * On Darwin, a corresponding amount of GPRs is skipped when a floating point
3632       parameter is passed in an FPR.
3633     * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
3634       starting with an odd-numbered GPR. It may skip a GPR to achieve this.
3635       Darwin just treats an I64 like two separate I32s (high word first).
3636     * I64 and F64 arguments are 8-byte aligned on the stack for SysV, but only
3637       4-byte aligned like everything else on Darwin.
3638     * The SysV spec claims that F32 is represented as F64 on the stack. GCC on
3639       PowerPC Linux does not agree, so neither do we.
3640
3641     According to both conventions, The parameter area should be part of the
3642     caller's stack frame, allocated in the caller's prologue code (large enough
3643     to hold the parameter lists for all called routines). The NCG already
3644     uses the stack for register spilling, leaving 64 bytes free at the top.
3645     If we need a larger parameter area than that, we just allocate a new stack
3646     frame just before ccalling.
3647 -}
3648
3649
3650 genCCall (CmmPrim MO_WriteBarrier) _ _
3651  = return $ unitOL LWSYNC
3652
3653 genCCall target dest_regs argsAndHints
3654   = ASSERT (not $ any (`elem` [I8,I16]) argReps)
3655         -- we rely on argument promotion in the codeGen
3656     do
3657         (finalStack,passArgumentsCode,usedRegs) <- passArguments
3658                                                         (zip args argReps)
3659                                                         allArgRegs allFPArgRegs
3660                                                         initialStackOffset
3661                                                         (toOL []) []
3662
3663         (labelOrExpr, reduceToF32) <- case target of
3664             CmmCallee (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
3665             CmmCallee expr conv -> return  (Right expr, False)
3666             CmmPrim mop -> outOfLineFloatOp mop
3667
3668         let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
3669             codeAfter = move_sp_up finalStack `appOL` moveResult reduceToF32
3670
3671         case labelOrExpr of
3672             Left lbl -> do
3673                 return (         codeBefore
3674                         `snocOL` BL lbl usedRegs
3675                         `appOL`  codeAfter)
3676             Right dyn -> do
3677                 (dynReg, dynCode) <- getSomeReg dyn
3678                 return (         dynCode
3679                         `snocOL` MTCTR dynReg
3680                         `appOL`  codeBefore
3681                         `snocOL` BCTRL usedRegs
3682                         `appOL`  codeAfter)
3683     where
3684 #if darwin_TARGET_OS
3685         initialStackOffset = 24
3686             -- size of linkage area + size of arguments, in bytes
3687         stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
3688                                        map machRepByteWidth argReps
3689 #elif linux_TARGET_OS
3690         initialStackOffset = 8
3691         stackDelta finalStack = roundTo 16 finalStack
3692 #endif
3693         args = map kindlessCmm argsAndHints
3694         argReps = map cmmExprRep args
3695
3696         roundTo a x | x `mod` a == 0 = x
3697                     | otherwise = x + a - (x `mod` a)
3698
3699         move_sp_down finalStack
3700                | delta > 64 =
3701                         toOL [STU I32 sp (AddrRegImm sp (ImmInt (-delta))),
3702                               DELTA (-delta)]
3703                | otherwise = nilOL
3704                where delta = stackDelta finalStack
3705         move_sp_up finalStack
3706                | delta > 64 =
3707                         toOL [ADD sp sp (RIImm (ImmInt delta)),
3708                               DELTA 0]
3709                | otherwise = nilOL
3710                where delta = stackDelta finalStack
3711
3712
3713         passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
3714         passArguments ((arg,I64):args) gprs fprs stackOffset
3715                accumCode accumUsed =
3716             do
3717                 ChildCode64 code vr_lo <- iselExpr64 arg
3718                 let vr_hi = getHiVRegFromLo vr_lo
3719
3720 #if darwin_TARGET_OS
3721                 passArguments args
3722                               (drop 2 gprs)
3723                               fprs
3724                               (stackOffset+8)
3725                               (accumCode `appOL` code
3726                                     `snocOL` storeWord vr_hi gprs stackOffset
3727                                     `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
3728                               ((take 2 gprs) ++ accumUsed)
3729             where
3730                 storeWord vr (gpr:_) offset = MR gpr vr
3731                 storeWord vr [] offset = ST I32 vr (AddrRegImm sp (ImmInt offset))
3732
3733 #elif linux_TARGET_OS
3734                 let stackOffset' = roundTo 8 stackOffset
3735                     stackCode = accumCode `appOL` code
3736                         `snocOL` ST I32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
3737                         `snocOL` ST I32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
3738                     regCode hireg loreg =
3739                         accumCode `appOL` code
3740                             `snocOL` MR hireg vr_hi
3741                             `snocOL` MR loreg vr_lo
3742
3743                 case gprs of
3744                     hireg : loreg : regs | even (length gprs) ->
3745                         passArguments args regs fprs stackOffset
3746                                       (regCode hireg loreg) (hireg : loreg : accumUsed)
3747                     _skipped : hireg : loreg : regs ->
3748                         passArguments args regs fprs stackOffset
3749                                       (regCode hireg loreg) (hireg : loreg : accumUsed)
3750                     _ -> -- only one or no regs left
3751                         passArguments args [] fprs (stackOffset'+8)
3752                                       stackCode accumUsed
3753 #endif
3754
3755         passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
3756             | reg : _ <- regs = do
3757                 register <- getRegister arg
3758                 let code = case register of
3759                             Fixed _ freg fcode -> fcode `snocOL` MR reg freg
3760                             Any _ acode -> acode reg
3761                 passArguments args
3762                               (drop nGprs gprs)
3763                               (drop nFprs fprs)
3764 #if darwin_TARGET_OS
3765         -- The Darwin ABI requires that we reserve stack slots for register parameters
3766                               (stackOffset + stackBytes)
3767 #elif linux_TARGET_OS
3768         -- ... the SysV ABI doesn't.
3769                               stackOffset
3770 #endif
3771                               (accumCode `appOL` code)
3772                               (reg : accumUsed)
3773             | otherwise = do
3774                 (vr, code) <- getSomeReg arg
3775                 passArguments args
3776                               (drop nGprs gprs)
3777                               (drop nFprs fprs)
3778                               (stackOffset' + stackBytes)
3779                               (accumCode `appOL` code `snocOL` ST rep vr stackSlot)
3780                               accumUsed
3781             where
3782 #if darwin_TARGET_OS
3783         -- stackOffset is at least 4-byte aligned
3784         -- The Darwin ABI is happy with that.
3785                 stackOffset' = stackOffset
3786 #else
3787         -- ... the SysV ABI requires 8-byte alignment for doubles.
3788                 stackOffset' | rep == F64 = roundTo 8 stackOffset
3789                              | otherwise  =           stackOffset
3790 #endif
3791                 stackSlot = AddrRegImm sp (ImmInt stackOffset')
3792                 (nGprs, nFprs, stackBytes, regs) = case rep of
3793                     I32 -> (1, 0, 4, gprs)
3794 #if darwin_TARGET_OS
3795         -- The Darwin ABI requires that we skip a corresponding number of GPRs when
3796         -- we use the FPRs.
3797                     F32 -> (1, 1, 4, fprs)
3798                     F64 -> (2, 1, 8, fprs)
3799 #elif linux_TARGET_OS
3800         -- ... the SysV ABI doesn't.
3801                     F32 -> (0, 1, 4, fprs)
3802                     F64 -> (0, 1, 8, fprs)
3803 #endif
3804
3805         moveResult reduceToF32 =
3806             case dest_regs of
3807                 [] -> nilOL
3808                 [CmmKinded dest _hint]
3809                     | reduceToF32 && rep == F32 -> unitOL (FRSP r_dest f1)
3810                     | rep == F32 || rep == F64 -> unitOL (MR r_dest f1)
3811                     | rep == I64 -> toOL [MR (getHiVRegFromLo r_dest) r3,
3812                                           MR r_dest r4]
3813                     | otherwise -> unitOL (MR r_dest r3)
3814                     where rep = cmmRegRep (CmmLocal dest)
3815                           r_dest = getRegisterReg (CmmLocal dest)
3816
3817         outOfLineFloatOp mop =
3818             do
3819                 dflags <- getDynFlagsNat
3820                 mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference $
3821                               mkForeignLabel functionName Nothing True
3822                 let mopLabelOrExpr = case mopExpr of
3823                         CmmLit (CmmLabel lbl) -> Left lbl
3824                         _ -> Right mopExpr
3825                 return (mopLabelOrExpr, reduce)
3826             where
3827                 (functionName, reduce) = case mop of
3828                     MO_F32_Exp   -> (fsLit "exp", True)
3829                     MO_F32_Log   -> (fsLit "log", True)
3830                     MO_F32_Sqrt  -> (fsLit "sqrt", True)
3831
3832                     MO_F32_Sin   -> (fsLit "sin", True)
3833                     MO_F32_Cos   -> (fsLit "cos", True)
3834                     MO_F32_Tan   -> (fsLit "tan", True)
3835
3836                     MO_F32_Asin  -> (fsLit "asin", True)
3837                     MO_F32_Acos  -> (fsLit "acos", True)
3838                     MO_F32_Atan  -> (fsLit "atan", True)
3839
3840                     MO_F32_Sinh  -> (fsLit "sinh", True)
3841                     MO_F32_Cosh  -> (fsLit "cosh", True)
3842                     MO_F32_Tanh  -> (fsLit "tanh", True)
3843                     MO_F32_Pwr   -> (fsLit "pow", True)
3844
3845                     MO_F64_Exp   -> (fsLit "exp", False)
3846                     MO_F64_Log   -> (fsLit "log", False)
3847                     MO_F64_Sqrt  -> (fsLit "sqrt", False)
3848
3849                     MO_F64_Sin   -> (fsLit "sin", False)
3850                     MO_F64_Cos   -> (fsLit "cos", False)
3851                     MO_F64_Tan   -> (fsLit "tan", False)
3852
3853                     MO_F64_Asin  -> (fsLit "asin", False)
3854                     MO_F64_Acos  -> (fsLit "acos", False)
3855                     MO_F64_Atan  -> (fsLit "atan", False)
3856
3857                     MO_F64_Sinh  -> (fsLit "sinh", False)
3858                     MO_F64_Cosh  -> (fsLit "cosh", False)
3859                     MO_F64_Tanh  -> (fsLit "tanh", False)
3860                     MO_F64_Pwr   -> (fsLit "pow", False)
3861                     other -> pprPanic "genCCall(ppc): unknown callish op"
3862                                     (pprCallishMachOp other)
3863
3864 #endif /* darwin_TARGET_OS || linux_TARGET_OS */
3865
3866 #endif /* powerpc_TARGET_ARCH */
3867
3868
3869 -- -----------------------------------------------------------------------------
3870 -- Generating a table-branch
3871
3872 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
3873
3874 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3875 genSwitch expr ids
3876   | opt_PIC
3877   = do
3878         (reg,e_code) <- getSomeReg expr
3879         lbl <- getNewLabelNat
3880         dflags <- getDynFlagsNat
3881         dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
3882         (tableReg,t_code) <- getSomeReg $ dynRef
3883         let
3884             jumpTable = map jumpTableEntryRel ids
3885
3886             jumpTableEntryRel Nothing
3887                 = CmmStaticLit (CmmInt 0 wordRep)
3888             jumpTableEntryRel (Just (BlockId id))
3889                 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3890                 where blockLabel = mkAsmTempLabel id
3891
3892             op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
3893                                        (EAIndex reg wORD_SIZE) (ImmInt 0))
3894
3895 #if x86_64_TARGET_ARCH
3896 #if darwin_TARGET_OS
3897     -- on Mac OS X/x86_64, put the jump table in the text section
3898     -- to work around a limitation of the linker.
3899     -- ld64 is unable to handle the relocations for
3900     --     .quad L1 - L0
3901     -- if L0 is not preceded by a non-anonymous label in its section.
3902
3903             code = e_code `appOL` t_code `appOL` toOL [
3904                             ADD wordRep op (OpReg tableReg),
3905                             JMP_TBL (OpReg tableReg) [ id | Just id <- ids ],
3906                             LDATA Text (CmmDataLabel lbl : jumpTable)
3907                     ]
3908 #else
3909     -- HACK: On x86_64 binutils<2.17 is only able to generate PC32
3910     -- relocations, hence we only get 32-bit offsets in the jump
3911     -- table. As these offsets are always negative we need to properly
3912     -- sign extend them to 64-bit. This hack should be removed in
3913     -- conjunction with the hack in PprMach.hs/pprDataItem once
3914     -- binutils 2.17 is standard.
3915             code = e_code `appOL` t_code `appOL` toOL [
3916                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3917                             MOVSxL I32
3918                                    (OpAddr (AddrBaseIndex (EABaseReg tableReg)
3919                                                           (EAIndex reg wORD_SIZE) (ImmInt 0)))
3920                                    (OpReg reg),
3921                             ADD wordRep (OpReg reg) (OpReg tableReg),
3922                             JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
3923                    ]
3924 #endif
3925 #else
3926             code = e_code `appOL` t_code `appOL` toOL [
3927                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3928                             ADD wordRep op (OpReg tableReg),
3929                             JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
3930                     ]
3931 #endif
3932         return code
3933   | otherwise
3934   = do
3935         (reg,e_code) <- getSomeReg expr
3936         lbl <- getNewLabelNat
3937         let
3938             jumpTable = map jumpTableEntry ids
3939             op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
3940             code = e_code `appOL` toOL [
3941                     LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3942                     JMP_TBL op [ id | Just id <- ids ]
3943                  ]
3944         -- in
3945         return code
3946 #elif powerpc_TARGET_ARCH
3947 genSwitch expr ids
3948   | opt_PIC
3949   = do
3950         (reg,e_code) <- getSomeReg expr
3951         tmp <- getNewRegNat I32
3952         lbl <- getNewLabelNat
3953         dflags <- getDynFlagsNat
3954         dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
3955         (tableReg,t_code) <- getSomeReg $ dynRef
3956         let
3957             jumpTable = map jumpTableEntryRel ids
3958
3959             jumpTableEntryRel Nothing
3960                 = CmmStaticLit (CmmInt 0 wordRep)
3961             jumpTableEntryRel (Just (BlockId id))
3962                 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3963                 where blockLabel = mkAsmTempLabel id
3964
3965             code = e_code `appOL` t_code `appOL` toOL [
3966                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3967                             SLW tmp reg (RIImm (ImmInt 2)),
3968                             LD I32 tmp (AddrRegReg tableReg tmp),
3969                             ADD tmp tmp (RIReg tableReg),
3970                             MTCTR tmp,
3971                             BCTR [ id | Just id <- ids ]
3972                     ]
3973         return code
3974   | otherwise
3975   = do
3976         (reg,e_code) <- getSomeReg expr
3977         tmp <- getNewRegNat I32
3978         lbl <- getNewLabelNat
3979         let
3980             jumpTable = map jumpTableEntry ids
3981
3982             code = e_code `appOL` toOL [
3983                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3984                             SLW tmp reg (RIImm (ImmInt 2)),
3985                             ADDIS tmp tmp (HA (ImmCLbl lbl)),
3986                             LD I32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
3987                             MTCTR tmp,
3988                             BCTR [ id | Just id <- ids ]
3989                     ]
3990         return code
3991 #else
3992 #error "ToDo: genSwitch"
3993 #endif
3994
3995 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordRep)
3996 jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
3997     where blockLabel = mkAsmTempLabel id
3998
3999 -- -----------------------------------------------------------------------------
4000 -- Support bits
4001 -- -----------------------------------------------------------------------------
4002
4003
4004 -- -----------------------------------------------------------------------------
4005 -- 'condIntReg' and 'condFltReg': condition codes into registers
4006
4007 -- Turn those condition codes into integers now (when they appear on
4008 -- the right hand side of an assignment).
4009 --
4010 -- (If applicable) Do not fill the delay slots here; you will confuse the
4011 -- register allocator.
4012
4013 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
4014
4015 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4016
4017 #if alpha_TARGET_ARCH
4018 condIntReg = panic "MachCode.condIntReg (not on Alpha)"
4019 condFltReg = panic "MachCode.condFltReg (not on Alpha)"
4020 #endif /* alpha_TARGET_ARCH */
4021
4022 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4023
4024 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4025
4026 condIntReg cond x y = do
4027   CondCode _ cond cond_code <- condIntCode cond x y
4028   tmp <- getNewRegNat I8
4029   let
4030         code dst = cond_code `appOL` toOL [
4031                     SETCC cond (OpReg tmp),
4032                     MOVZxL I8 (OpReg tmp) (OpReg dst)
4033                   ]
4034   -- in
4035   return (Any I32 code)
4036
4037 #endif
4038
4039 #if i386_TARGET_ARCH
4040
4041 condFltReg cond x y = do
4042   CondCode _ cond cond_code <- condFltCode cond x y
4043   tmp <- getNewRegNat I8
4044   let
4045         code dst = cond_code `appOL` toOL [
4046                     SETCC cond (OpReg tmp),
4047                     MOVZxL I8 (OpReg tmp) (OpReg dst)
4048                   ]
4049   -- in
4050   return (Any I32 code)
4051
4052 #endif
4053
4054 #if x86_64_TARGET_ARCH
4055
4056 condFltReg cond x y = do
4057   CondCode _ cond cond_code <- condFltCode cond x y
4058   tmp1 <- getNewRegNat wordRep
4059   tmp2 <- getNewRegNat wordRep
4060   let
4061         -- We have to worry about unordered operands (eg. comparisons
4062         -- against NaN).  If the operands are unordered, the comparison
4063         -- sets the parity flag, carry flag and zero flag.
4064         -- All comparisons are supposed to return false for unordered
4065         -- operands except for !=, which returns true.
4066         --
4067         -- Optimisation: we don't have to test the parity flag if we
4068         -- know the test has already excluded the unordered case: eg >
4069         -- and >= test for a zero carry flag, which can only occur for
4070         -- ordered operands.
4071         --
4072         -- ToDo: by reversing comparisons we could avoid testing the
4073         -- parity flag in more cases.
4074
4075         code dst =
4076            cond_code `appOL`
4077              (case cond of
4078                 NE  -> or_unordered dst
4079                 GU  -> plain_test   dst
4080                 GEU -> plain_test   dst
4081                 _   -> and_ordered  dst)
4082
4083         plain_test dst = toOL [
4084                     SETCC cond (OpReg tmp1),
4085                     MOVZxL I8 (OpReg tmp1) (OpReg dst)
4086                  ]
4087         or_unordered dst = toOL [
4088                     SETCC cond (OpReg tmp1),
4089                     SETCC PARITY (OpReg tmp2),
4090                     OR I8 (OpReg tmp1) (OpReg tmp2),
4091                     MOVZxL I8 (OpReg tmp2) (OpReg dst)
4092                   ]
4093         and_ordered dst = toOL [
4094                     SETCC cond (OpReg tmp1),
4095                     SETCC NOTPARITY (OpReg tmp2),
4096                     AND I8 (OpReg tmp1) (OpReg tmp2),
4097                     MOVZxL I8 (OpReg tmp2) (OpReg dst)
4098                   ]
4099   -- in
4100   return (Any I32 code)
4101
4102 #endif
4103
4104 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4105
4106 #if sparc_TARGET_ARCH
4107
4108 condIntReg EQQ x (CmmLit (CmmInt 0 d)) = do
4109     (src, code) <- getSomeReg x
4110     tmp <- getNewRegNat I32
4111     let
4112         code__2 dst = code `appOL` toOL [
4113             SUB False True g0 (RIReg src) g0,
4114             SUB True False g0 (RIImm (ImmInt (-1))) dst]
4115     return (Any I32 code__2)
4116
4117 condIntReg EQQ x y = do
4118     (src1, code1) <- getSomeReg x
4119     (src2, code2) <- getSomeReg y
4120     tmp1 <- getNewRegNat I32
4121     tmp2 <- getNewRegNat I32
4122     let
4123         code__2 dst = code1 `appOL` code2 `appOL` toOL [
4124             XOR False src1 (RIReg src2) dst,
4125             SUB False True g0 (RIReg dst) g0,
4126             SUB True False g0 (RIImm (ImmInt (-1))) dst]
4127     return (Any I32 code__2)
4128
4129 condIntReg NE x (CmmLit (CmmInt 0 d)) = do
4130     (src, code) <- getSomeReg x
4131     tmp <- getNewRegNat I32
4132     let
4133         code__2 dst = code `appOL` toOL [
4134             SUB False True g0 (RIReg src) g0,
4135             ADD True False g0 (RIImm (ImmInt 0)) dst]
4136     return (Any I32 code__2)
4137
4138 condIntReg NE x y = do
4139     (src1, code1) <- getSomeReg x
4140     (src2, code2) <- getSomeReg y
4141     tmp1 <- getNewRegNat I32
4142     tmp2 <- getNewRegNat I32
4143     let
4144         code__2 dst = code1 `appOL` code2 `appOL` toOL [
4145             XOR False src1 (RIReg src2) dst,
4146             SUB False True g0 (RIReg dst) g0,
4147             ADD True False g0 (RIImm (ImmInt 0)) dst]
4148     return (Any I32 code__2)
4149
4150 condIntReg cond x y = do
4151     BlockId lbl1 <- getBlockIdNat
4152     BlockId lbl2 <- getBlockIdNat
4153     CondCode _ cond cond_code <- condIntCode cond x y
4154     let
4155         code__2 dst = cond_code `appOL` toOL [
4156             BI cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
4157             OR False g0 (RIImm (ImmInt 0)) dst,
4158             BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
4159             NEWBLOCK (BlockId lbl1),
4160             OR False g0 (RIImm (ImmInt 1)) dst,
4161             NEWBLOCK (BlockId lbl2)]
4162     return (Any I32 code__2)
4163
4164 condFltReg cond x y = do
4165     BlockId lbl1 <- getBlockIdNat
4166     BlockId lbl2 <- getBlockIdNat
4167     CondCode _ cond cond_code <- condFltCode cond x y
4168     let
4169         code__2 dst = cond_code `appOL` toOL [
4170             NOP,
4171             BF cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
4172             OR False g0 (RIImm (ImmInt 0)) dst,
4173             BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
4174             NEWBLOCK (BlockId lbl1),
4175             OR False g0 (RIImm (ImmInt 1)) dst,
4176             NEWBLOCK (BlockId lbl2)]
4177     return (Any I32 code__2)
4178
4179 #endif /* sparc_TARGET_ARCH */
4180
4181 #if powerpc_TARGET_ARCH
4182 condReg getCond = do
4183     lbl1 <- getBlockIdNat
4184     lbl2 <- getBlockIdNat
4185     CondCode _ cond cond_code <- getCond
4186     let
4187 {-        code dst = cond_code `appOL` toOL [
4188                 BCC cond lbl1,
4189                 LI dst (ImmInt 0),
4190                 BCC ALWAYS lbl2,
4191                 NEWBLOCK lbl1,
4192                 LI dst (ImmInt 1),
4193                 BCC ALWAYS lbl2,
4194                 NEWBLOCK lbl2
4195             ]-}
4196         code dst = cond_code
4197             `appOL` negate_code
4198             `appOL` toOL [
4199                 MFCR dst,
4200                 RLWINM dst dst (bit + 1) 31 31
4201             ]
4202
4203         negate_code | do_negate = unitOL (CRNOR bit bit bit)
4204                     | otherwise = nilOL
4205
4206         (bit, do_negate) = case cond of
4207             LTT -> (0, False)
4208             LE  -> (1, True)
4209             EQQ -> (2, False)
4210             GE  -> (0, True)
4211             GTT -> (1, False)
4212
4213             NE  -> (2, True)
4214
4215             LU  -> (0, False)
4216             LEU -> (1, True)
4217             GEU -> (0, True)
4218             GU  -> (1, False)
4219
4220     return (Any I32 code)
4221
4222 condIntReg cond x y = condReg (condIntCode cond x y)
4223 condFltReg cond x y = condReg (condFltCode cond x y)
4224 #endif /* powerpc_TARGET_ARCH */
4225
4226
4227 -- -----------------------------------------------------------------------------
4228 -- 'trivial*Code': deal with trivial instructions
4229
4230 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
4231 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
4232 -- Only look for constants on the right hand side, because that's
4233 -- where the generic optimizer will have put them.
4234
4235 -- Similarly, for unary instructions, we don't have to worry about
4236 -- matching an StInt as the argument, because genericOpt will already
4237 -- have handled the constant-folding.
4238
4239 trivialCode
4240     :: MachRep
4241     -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
4242       ,IF_ARCH_i386 ((Operand -> Operand -> Instr)
4243                      -> Maybe (Operand -> Operand -> Instr)
4244       ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr)
4245                      -> Maybe (Operand -> Operand -> Instr)
4246       ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
4247       ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
4248       ,)))))
4249     -> CmmExpr -> CmmExpr -- the two arguments
4250     -> NatM Register
4251
4252 #ifndef powerpc_TARGET_ARCH
4253 trivialFCode
4254     :: MachRep
4255     -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
4256       ,IF_ARCH_sparc((MachRep -> Reg -> Reg -> Reg -> Instr)
4257       ,IF_ARCH_i386 ((MachRep -> Reg -> Reg -> Reg -> Instr)
4258       ,IF_ARCH_x86_64 ((MachRep -> Operand -> Operand -> Instr)
4259       ,))))
4260     -> CmmExpr -> CmmExpr -- the two arguments
4261     -> NatM Register
4262 #endif
4263
4264 trivialUCode
4265     :: MachRep
4266     -> IF_ARCH_alpha((RI -> Reg -> Instr)
4267       ,IF_ARCH_i386 ((Operand -> Instr)
4268       ,IF_ARCH_x86_64 ((Operand -> Instr)
4269       ,IF_ARCH_sparc((RI -> Reg -> Instr)
4270       ,IF_ARCH_powerpc((Reg -> Reg -> Instr)
4271       ,)))))
4272     -> CmmExpr  -- the one argument
4273     -> NatM Register
4274
4275 #ifndef powerpc_TARGET_ARCH
4276 trivialUFCode
4277     :: MachRep
4278     -> IF_ARCH_alpha((Reg -> Reg -> Instr)
4279       ,IF_ARCH_i386 ((Reg -> Reg -> Instr)
4280       ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr)
4281       ,IF_ARCH_sparc((Reg -> Reg -> Instr)
4282       ,))))
4283     -> CmmExpr -- the one argument
4284     -> NatM Register
4285 #endif
4286
4287 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4288
4289 #if alpha_TARGET_ARCH
4290
4291 trivialCode instr x (StInt y)
4292   | fits8Bits y
4293   = getRegister x               `thenNat` \ register ->
4294     getNewRegNat IntRep         `thenNat` \ tmp ->
4295     let
4296         code = registerCode register tmp
4297         src1 = registerName register tmp
4298         src2 = ImmInt (fromInteger y)
4299         code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst)
4300     in
4301     return (Any IntRep code__2)
4302
4303 trivialCode instr x y
4304   = getRegister x               `thenNat` \ register1 ->
4305     getRegister y               `thenNat` \ register2 ->
4306     getNewRegNat IntRep         `thenNat` \ tmp1 ->
4307     getNewRegNat IntRep         `thenNat` \ tmp2 ->
4308     let
4309         code1 = registerCode register1 tmp1 []
4310         src1  = registerName register1 tmp1
4311         code2 = registerCode register2 tmp2 []
4312         src2  = registerName register2 tmp2
4313         code__2 dst = asmSeqThen [code1, code2] .
4314                      mkSeqInstr (instr src1 (RIReg src2) dst)
4315     in
4316     return (Any IntRep code__2)
4317
4318 ------------
4319 trivialUCode instr x
4320   = getRegister x               `thenNat` \ register ->
4321     getNewRegNat IntRep         `thenNat` \ tmp ->
4322     let
4323         code = registerCode register tmp
4324         src  = registerName register tmp
4325         code__2 dst = code . mkSeqInstr (instr (RIReg src) dst)
4326     in
4327     return (Any IntRep code__2)
4328
4329 ------------
4330 trivialFCode _ instr x y
4331   = getRegister x               `thenNat` \ register1 ->
4332     getRegister y               `thenNat` \ register2 ->
4333     getNewRegNat F64    `thenNat` \ tmp1 ->
4334     getNewRegNat F64    `thenNat` \ tmp2 ->
4335     let
4336         code1 = registerCode register1 tmp1
4337         src1  = registerName register1 tmp1
4338
4339         code2 = registerCode register2 tmp2
4340         src2  = registerName register2 tmp2
4341
4342         code__2 dst = asmSeqThen [code1 [], code2 []] .
4343                       mkSeqInstr (instr src1 src2 dst)
4344     in
4345     return (Any F64 code__2)
4346
4347 trivialUFCode _ instr x
4348   = getRegister x               `thenNat` \ register ->
4349     getNewRegNat F64    `thenNat` \ tmp ->
4350     let
4351         code = registerCode register tmp
4352         src  = registerName register tmp
4353         code__2 dst = code . mkSeqInstr (instr src dst)
4354     in
4355     return (Any F64 code__2)
4356
4357 #endif /* alpha_TARGET_ARCH */
4358
4359 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4360
4361 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4362
4363 {-
4364 The Rules of the Game are:
4365
4366 * You cannot assume anything about the destination register dst;
4367   it may be anything, including a fixed reg.
4368
4369 * You may compute an operand into a fixed reg, but you may not
4370   subsequently change the contents of that fixed reg.  If you
4371   want to do so, first copy the value either to a temporary
4372   or into dst.  You are free to modify dst even if it happens
4373   to be a fixed reg -- that's not your problem.
4374
4375 * You cannot assume that a fixed reg will stay live over an
4376   arbitrary computation.  The same applies to the dst reg.
4377
4378 * Temporary regs obtained from getNewRegNat are distinct from
4379   each other and from all other regs, and stay live over
4380   arbitrary computations.
4381
4382 --------------------
4383
4384 SDM's version of The Rules:
4385
4386 * If getRegister returns Any, that means it can generate correct
4387   code which places the result in any register, period.  Even if that
4388   register happens to be read during the computation.
4389
4390   Corollary #1: this means that if you are generating code for an
4391   operation with two arbitrary operands, you cannot assign the result
4392   of the first operand into the destination register before computing
4393   the second operand.  The second operand might require the old value
4394   of the destination register.
4395
4396   Corollary #2: A function might be able to generate more efficient
4397   code if it knows the destination register is a new temporary (and
4398   therefore not read by any of the sub-computations).
4399
4400 * If getRegister returns Any, then the code it generates may modify only:
4401         (a) fresh temporaries
4402         (b) the destination register
4403         (c) known registers (eg. %ecx is used by shifts)
4404   In particular, it may *not* modify global registers, unless the global
4405   register happens to be the destination register.
4406 -}
4407
4408 trivialCode rep instr (Just revinstr) (CmmLit lit_a) b
4409   | not (is64BitLit lit_a) = do
4410   b_code <- getAnyReg b
4411   let
4412        code dst
4413          = b_code dst `snocOL`
4414            revinstr (OpImm (litToImm lit_a)) (OpReg dst)
4415   -- in
4416   return (Any rep code)
4417
4418 trivialCode rep instr maybe_revinstr a b = genTrivialCode rep instr a b
4419
4420 -- This is re-used for floating pt instructions too.
4421 genTrivialCode rep instr a b = do
4422   (b_op, b_code) <- getNonClobberedOperand b
4423   a_code <- getAnyReg a
4424   tmp <- getNewRegNat rep
4425   let
4426      -- We want the value of b to stay alive across the computation of a.
4427      -- But, we want to calculate a straight into the destination register,
4428      -- because the instruction only has two operands (dst := dst `op` src).
4429      -- The troublesome case is when the result of b is in the same register
4430      -- as the destination reg.  In this case, we have to save b in a
4431      -- new temporary across the computation of a.
4432      code dst
4433         | dst `regClashesWithOp` b_op =
4434                 b_code `appOL`
4435                 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
4436                 a_code dst `snocOL`
4437                 instr (OpReg tmp) (OpReg dst)
4438         | otherwise =
4439                 b_code `appOL`
4440                 a_code dst `snocOL`
4441                 instr b_op (OpReg dst)
4442   -- in
4443   return (Any rep code)
4444
4445 reg `regClashesWithOp` OpReg reg2   = reg == reg2
4446 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
4447 reg `regClashesWithOp` _            = False
4448
4449 -----------
4450
4451 trivialUCode rep instr x = do
4452   x_code <- getAnyReg x
4453   let
4454      code dst =
4455         x_code dst `snocOL`
4456         instr (OpReg dst)
4457   -- in
4458   return (Any rep code)
4459
4460 -----------
4461
4462 #if i386_TARGET_ARCH
4463
4464 trivialFCode pk instr x y = do
4465   (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
4466   (y_reg, y_code) <- getSomeReg y
4467   let
4468      code dst =
4469         x_code `appOL`
4470         y_code `snocOL`
4471         instr pk x_reg y_reg dst
4472   -- in
4473   return (Any pk code)
4474
4475 #endif
4476
4477 #if x86_64_TARGET_ARCH
4478
4479 trivialFCode pk instr x y = genTrivialCode  pk (instr pk) x y
4480
4481 #endif
4482
4483 -------------
4484
4485 trivialUFCode rep instr x = do
4486   (x_reg, x_code) <- getSomeReg x
4487   let
4488      code dst =
4489         x_code `snocOL`
4490         instr x_reg dst
4491   -- in
4492   return (Any rep code)
4493
4494 #endif /* i386_TARGET_ARCH */
4495
4496 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4497
4498 #if sparc_TARGET_ARCH
4499
4500 trivialCode pk instr x (CmmLit (CmmInt y d))
4501   | fits13Bits y
4502   = do
4503       (src1, code) <- getSomeReg x
4504       tmp <- getNewRegNat I32
4505       let
4506         src2 = ImmInt (fromInteger y)
4507         code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
4508       return (Any I32 code__2)
4509
4510 trivialCode pk instr x y = do
4511     (src1, code1) <- getSomeReg x
4512     (src2, code2) <- getSomeReg y
4513     tmp1 <- getNewRegNat I32
4514     tmp2 <- getNewRegNat I32
4515     let
4516         code__2 dst = code1 `appOL` code2 `snocOL`
4517                       instr src1 (RIReg src2) dst
4518     return (Any I32 code__2)
4519
4520 ------------
4521 trivialFCode pk instr x y = do
4522     (src1, code1) <- getSomeReg x
4523     (src2, code2) <- getSomeReg y
4524     tmp1 <- getNewRegNat (cmmExprRep x)
4525     tmp2 <- getNewRegNat (cmmExprRep y)
4526     tmp <- getNewRegNat F64
4527     let
4528         promote x = FxTOy F32 F64 x tmp
4529
4530         pk1   = cmmExprRep x
4531         pk2   = cmmExprRep y
4532
4533         code__2 dst =
4534                 if pk1 == pk2 then
4535                     code1 `appOL` code2 `snocOL`
4536                     instr pk src1 src2 dst
4537                 else if pk1 == F32 then
4538                     code1 `snocOL` promote src1 `appOL` code2 `snocOL`
4539                     instr F64 tmp src2 dst
4540                 else
4541                     code1 `appOL` code2 `snocOL` promote src2 `snocOL`
4542                     instr F64 src1 tmp dst
4543     return (Any (if pk1 == pk2 then pk1 else F64) code__2)
4544
4545 ------------
4546 trivialUCode pk instr x = do
4547     (src, code) <- getSomeReg x
4548     tmp <- getNewRegNat pk
4549     let
4550         code__2 dst = code `snocOL` instr (RIReg src) dst
4551     return (Any pk code__2)
4552
4553 -------------
4554 trivialUFCode pk instr x = do
4555     (src, code) <- getSomeReg x
4556     tmp <- getNewRegNat pk
4557     let
4558         code__2 dst = code `snocOL` instr src dst
4559     return (Any pk code__2)
4560
4561 #endif /* sparc_TARGET_ARCH */
4562
4563 #if powerpc_TARGET_ARCH
4564
4565 {-
4566 Wolfgang's PowerPC version of The Rules:
4567
4568 A slightly modified version of The Rules to take advantage of the fact
4569 that PowerPC instructions work on all registers and don't implicitly
4570 clobber any fixed registers.
4571
4572 * The only expression for which getRegister returns Fixed is (CmmReg reg).
4573
4574 * If getRegister returns Any, then the code it generates may modify only:
4575         (a) fresh temporaries
4576         (b) the destination register
4577   It may *not* modify global registers, unless the global
4578   register happens to be the destination register.
4579   It may not clobber any other registers. In fact, only ccalls clobber any
4580   fixed registers.
4581   Also, it may not modify the counter register (used by genCCall).
4582
4583   Corollary: If a getRegister for a subexpression returns Fixed, you need
4584   not move it to a fresh temporary before evaluating the next subexpression.
4585   The Fixed register won't be modified.
4586   Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
4587
4588 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
4589   the value of the destination register.
4590 -}
4591
4592 trivialCode rep signed instr x (CmmLit (CmmInt y _))
4593     | Just imm <- makeImmediate rep signed y
4594     = do
4595         (src1, code1) <- getSomeReg x
4596         let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
4597         return (Any rep code)
4598
4599 trivialCode rep signed instr x y = do
4600     (src1, code1) <- getSomeReg x
4601     (src2, code2) <- getSomeReg y
4602     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
4603     return (Any rep code)
4604
4605 trivialCodeNoImm :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4606     -> CmmExpr -> CmmExpr -> NatM Register
4607 trivialCodeNoImm rep instr x y = do
4608     (src1, code1) <- getSomeReg x
4609     (src2, code2) <- getSomeReg y
4610     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
4611     return (Any rep code)
4612
4613 trivialUCode rep instr x = do
4614     (src, code) <- getSomeReg x
4615     let code' dst = code `snocOL` instr dst src
4616     return (Any rep code')
4617
4618 -- There is no "remainder" instruction on the PPC, so we have to do
4619 -- it the hard way.
4620 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
4621
4622 remainderCode :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4623     -> CmmExpr -> CmmExpr -> NatM Register
4624 remainderCode rep div x y = do
4625     (src1, code1) <- getSomeReg x
4626     (src2, code2) <- getSomeReg y
4627     let code dst = code1 `appOL` code2 `appOL` toOL [
4628                 div dst src1 src2,
4629                 MULLW dst dst (RIReg src2),
4630                 SUBF dst dst src1
4631             ]
4632     return (Any rep code)
4633
4634 #endif /* powerpc_TARGET_ARCH */
4635
4636
4637 -- -----------------------------------------------------------------------------
4638 --  Coercing to/from integer/floating-point...
4639
4640 -- When going to integer, we truncate (round towards 0).
4641
4642 -- @coerce(Int2FP|FP2Int)@ are more complicated integer/float
4643 -- conversions.  We have to store temporaries in memory to move
4644 -- between the integer and the floating point register sets.
4645
4646 -- @coerceDbl2Flt@ and @coerceFlt2Dbl@ are done this way because we
4647 -- pretend, on sparc at least, that double and float regs are seperate
4648 -- kinds, so the value has to be computed into one kind before being
4649 -- explicitly "converted" to live in the other kind.
4650
4651 coerceInt2FP :: MachRep -> MachRep -> CmmExpr -> NatM Register
4652 coerceFP2Int :: MachRep -> MachRep -> CmmExpr -> NatM Register
4653
4654 #if sparc_TARGET_ARCH
4655 coerceDbl2Flt :: CmmExpr -> NatM Register
4656 coerceFlt2Dbl :: CmmExpr -> NatM Register
4657 #endif
4658
4659 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4660
4661 #if alpha_TARGET_ARCH
4662
4663 coerceInt2FP _ x
4664   = getRegister x               `thenNat` \ register ->
4665     getNewRegNat IntRep         `thenNat` \ reg ->
4666     let
4667         code = registerCode register reg
4668         src  = registerName register reg
4669
4670         code__2 dst = code . mkSeqInstrs [
4671             ST Q src (spRel 0),
4672             LD TF dst (spRel 0),
4673             CVTxy Q TF dst dst]
4674     in
4675     return (Any F64 code__2)
4676
4677 -------------
4678 coerceFP2Int x
4679   = getRegister x               `thenNat` \ register ->
4680     getNewRegNat F64    `thenNat` \ tmp ->
4681     let
4682         code = registerCode register tmp
4683         src  = registerName register tmp
4684
4685         code__2 dst = code . mkSeqInstrs [
4686             CVTxy TF Q src tmp,
4687             ST TF tmp (spRel 0),
4688             LD Q dst (spRel 0)]
4689     in
4690     return (Any IntRep code__2)
4691
4692 #endif /* alpha_TARGET_ARCH */
4693
4694 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4695
4696 #if i386_TARGET_ARCH
4697
4698 coerceInt2FP from to x = do
4699   (x_reg, x_code) <- getSomeReg x
4700   let
4701         opc  = case to of F32 -> GITOF; F64 -> GITOD
4702         code dst = x_code `snocOL` opc x_reg dst
4703         -- ToDo: works for non-I32 reps?
4704   -- in
4705   return (Any to code)
4706
4707 ------------
4708
4709 coerceFP2Int from to x = do
4710   (x_reg, x_code) <- getSomeReg x
4711   let
4712         opc  = case from of F32 -> GFTOI; F64 -> GDTOI
4713         code dst = x_code `snocOL` opc x_reg dst
4714         -- ToDo: works for non-I32 reps?
4715   -- in
4716   return (Any to code)
4717
4718 #endif /* i386_TARGET_ARCH */
4719
4720 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4721
4722 #if x86_64_TARGET_ARCH
4723
4724 coerceFP2Int from to x = do
4725   (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
4726   let
4727         opc  = case from of F32 -> CVTTSS2SIQ; F64 -> CVTTSD2SIQ
4728         code dst = x_code `snocOL` opc x_op dst
4729   -- in
4730   return (Any to code) -- works even if the destination rep is <I32
4731
4732 coerceInt2FP from to x = do
4733   (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
4734   let
4735         opc  = case to of F32 -> CVTSI2SS; F64 -> CVTSI2SD
4736         code dst = x_code `snocOL` opc x_op dst
4737   -- in
4738   return (Any to code) -- works even if the destination rep is <I32
4739
4740 coerceFP2FP :: MachRep -> CmmExpr -> NatM Register
4741 coerceFP2FP to x = do
4742   (x_reg, x_code) <- getSomeReg x
4743   let
4744         opc  = case to of F32 -> CVTSD2SS; F64 -> CVTSS2SD
4745         code dst = x_code `snocOL` opc x_reg dst
4746   -- in
4747   return (Any to code)
4748
4749 #endif
4750
4751 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4752
4753 #if sparc_TARGET_ARCH
4754
4755 coerceInt2FP pk1 pk2 x = do
4756     (src, code) <- getSomeReg x
4757     let
4758         code__2 dst = code `appOL` toOL [
4759             ST pk1 src (spRel (-2)),
4760             LD pk1 (spRel (-2)) dst,
4761             FxTOy pk1 pk2 dst dst]
4762     return (Any pk2 code__2)
4763
4764 ------------
4765 coerceFP2Int pk fprep x = do
4766     (src, code) <- getSomeReg x
4767     reg <- getNewRegNat fprep
4768     tmp <- getNewRegNat pk
4769     let
4770         code__2 dst = ASSERT(fprep == F64 || fprep == F32)
4771             code `appOL` toOL [
4772             FxTOy fprep pk src tmp,
4773             ST pk tmp (spRel (-2)),
4774             LD pk (spRel (-2)) dst]
4775     return (Any pk code__2)
4776
4777 ------------
4778 coerceDbl2Flt x = do
4779     (src, code) <- getSomeReg x
4780     return (Any F32 (\dst -> code `snocOL` FxTOy F64 F32 src dst))
4781
4782 ------------
4783 coerceFlt2Dbl x = do
4784     (src, code) <- getSomeReg x
4785     return (Any F64 (\dst -> code `snocOL` FxTOy F32 F64 src dst))
4786
4787 #endif /* sparc_TARGET_ARCH */
4788
4789 #if powerpc_TARGET_ARCH
4790 coerceInt2FP fromRep toRep x = do
4791     (src, code) <- getSomeReg x
4792     lbl <- getNewLabelNat
4793     itmp <- getNewRegNat I32
4794     ftmp <- getNewRegNat F64
4795     dflags <- getDynFlagsNat
4796     dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
4797     Amode addr addr_code <- getAmode dynRef
4798     let
4799         code' dst = code `appOL` maybe_exts `appOL` toOL [
4800                 LDATA ReadOnlyData
4801                                 [CmmDataLabel lbl,
4802                                  CmmStaticLit (CmmInt 0x43300000 I32),
4803                                  CmmStaticLit (CmmInt 0x80000000 I32)],
4804                 XORIS itmp src (ImmInt 0x8000),
4805                 ST I32 itmp (spRel 3),
4806                 LIS itmp (ImmInt 0x4330),
4807                 ST I32 itmp (spRel 2),
4808                 LD F64 ftmp (spRel 2)
4809             ] `appOL` addr_code `appOL` toOL [
4810                 LD F64 dst addr,
4811                 FSUB F64 dst ftmp dst
4812             ] `appOL` maybe_frsp dst
4813
4814         maybe_exts = case fromRep of
4815                         I8 ->  unitOL $ EXTS I8 src src
4816                         I16 -> unitOL $ EXTS I16 src src
4817                         I32 -> nilOL
4818         maybe_frsp dst = case toRep of
4819                         F32 -> unitOL $ FRSP dst dst
4820                         F64 -> nilOL
4821     return (Any toRep code')
4822
4823 coerceFP2Int fromRep toRep x = do
4824     -- the reps don't really matter: F*->F64 and I32->I* are no-ops
4825     (src, code) <- getSomeReg x
4826     tmp <- getNewRegNat F64
4827     let
4828         code' dst = code `appOL` toOL [
4829                 -- convert to int in FP reg
4830             FCTIWZ tmp src,
4831                 -- store value (64bit) from FP to stack
4832             ST F64 tmp (spRel 2),
4833                 -- read low word of value (high word is undefined)
4834             LD I32 dst (spRel 3)]
4835     return (Any toRep code')
4836 #endif /* powerpc_TARGET_ARCH */
4837
4838
4839 -- -----------------------------------------------------------------------------
4840 -- eXTRA_STK_ARGS_HERE
4841
4842 -- We (allegedly) put the first six C-call arguments in registers;
4843 -- where do we start putting the rest of them?
4844
4845 -- Moved from MachInstrs (SDM):
4846
4847 #if alpha_TARGET_ARCH || sparc_TARGET_ARCH
4848 eXTRA_STK_ARGS_HERE :: Int
4849 eXTRA_STK_ARGS_HERE
4850   = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???))
4851 #endif
4852