compiler/nativeGen/MachCodeGen.hs

   1 {-# OPTIONS -w #-}
   2 -- The above warning supression flag is a temporary kludge.
   3 -- While working on this module you are encouraged to remove it and fix
   4 -- any warnings in the module. See
   5 --     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
   6 -- for details
   7
   8 -----------------------------------------------------------------------------
   9 --
  10 -- Generating machine code (instruction selection)
  11 --
  12 -- (c) The University of Glasgow 1996-2004
  13 --
  14 -----------------------------------------------------------------------------
  15
  16 -- This is a big module, but, if you pay attention to
  17 -- (a) the sectioning, (b) the type signatures, and
  18 -- (c) the #if blah_TARGET_ARCH} things, the
  19 -- structure should not be too overwhelming.
  20
  21 module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where
  22
  23 #include "HsVersions.h"
  24 #include "nativeGen/NCG.h"
  25 #include "MachDeps.h"
  26
  27 -- NCG stuff:
  28 import MachInstrs
  29 import MachRegs
  30 import NCGMonad
  31 import PositionIndependentCode
  32 import RegAllocInfo ( mkBranchInstr )
  33
  34 -- Our intermediate code:
  35 import PprCmm           ( pprExpr )
  36 import Cmm
  37 import MachOp
  38 import CLabel
  39 import ClosureInfo      ( C_SRT(..) )
  40
  41 -- The rest:
  42 import StaticFlags      ( opt_PIC )
  43 import ForeignCall      ( CCallConv(..) )
  44 import OrdList
  45 import Pretty
  46 import Outputable
  47 import FastString
  48 import FastBool         ( isFastTrue )
  49 import Constants        ( wORD_SIZE )
  50
  51 import Debug.Trace      ( trace )
  52
  53 import Control.Monad    ( mapAndUnzipM )
  54 import Data.Maybe       ( fromJust )
  55 import Data.Bits
  56 import Data.Word
  57 import Data.Int
  58
  59 -- -----------------------------------------------------------------------------
  60 -- Top-level of the instruction selector
  61
  62 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
  63 -- They are really trees of insns to facilitate fast appending, where a
  64 -- left-to-right traversal (pre-order?) yields the insns in the correct
  65 -- order.
  66
  67 type InstrBlock = OrdList Instr
  68
  69 cmmTopCodeGen :: RawCmmTop -> NatM [NatCmmTop]
  70 cmmTopCodeGen (CmmProc info lab params (ListGraph blocks)) = do
  71   (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
  72   picBaseMb <- getPicBaseMaybeNat
  73   let proc = CmmProc info lab params (ListGraph $ concat nat_blocks)
  74       tops = proc : concat statics
  75   case picBaseMb of
  76       Just picBase -> initializePicBase picBase tops
  77       Nothing -> return tops
  78
  79 cmmTopCodeGen (CmmData sec dat) = do
  80   return [CmmData sec dat]  -- no translation, we just use CmmStatic
  81
  82 basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop])
  83 basicBlockCodeGen (BasicBlock id stmts) = do
  84   instrs <- stmtsToInstrs stmts
  85   -- code generation may introduce new basic block boundaries, which
  86   -- are indicated by the NEWBLOCK instruction.  We must split up the
  87   -- instruction stream into basic blocks again.  Also, we extract
  88   -- LDATAs here too.
  89   let
  90         (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
  91
  92         mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
  93           = ([], BasicBlock id instrs : blocks, statics)
  94         mkBlocks (LDATA sec dat) (instrs,blocks,statics)
  95           = (instrs, blocks, CmmData sec dat:statics)
  96         mkBlocks instr (instrs,blocks,statics)
  97           = (instr:instrs, blocks, statics)
  98   -- in
  99   return (BasicBlock id top : other_blocks, statics)
 100
 101 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
 102 stmtsToInstrs stmts
 103    = do instrss <- mapM stmtToInstrs stmts
 104         return (concatOL instrss)
 105
 106 stmtToInstrs :: CmmStmt -> NatM InstrBlock
 107 stmtToInstrs stmt = case stmt of
 108     CmmNop         -> return nilOL
 109     CmmComment s   -> return (unitOL (COMMENT s))
 110
 111     CmmAssign reg src
 112       | isFloatingRep kind -> assignReg_FltCode kind reg src
 113 #if WORD_SIZE_IN_BITS==32
 114       | kind == I64        -> assignReg_I64Code      reg src
 115 #endif
 116       | otherwise          -> assignReg_IntCode kind reg src
 117         where kind = cmmRegRep reg
 118
 119     CmmStore addr src
 120       | isFloatingRep kind -> assignMem_FltCode kind addr src
 121 #if WORD_SIZE_IN_BITS==32
 122       | kind == I64      -> assignMem_I64Code      addr src
 123 #endif
 124       | otherwise        -> assignMem_IntCode kind addr src
 125         where kind = cmmExprRep src
 126
 127     CmmCall target result_regs args _ _
 128        -> genCCall target result_regs args
 129
 130     CmmBranch id          -> genBranch id
 131     CmmCondBranch arg id  -> genCondJump id arg
 132     CmmSwitch arg ids     -> genSwitch arg ids
 133     CmmJump arg params    -> genJump arg
 134     CmmReturn params      ->
 135       panic "stmtToInstrs: return statement should have been cps'd away"
 136
 137 -- -----------------------------------------------------------------------------
 138 -- General things for putting together code sequences
 139
 140 -- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
 141 -- CmmExprs into CmmRegOff?
 142 mangleIndexTree :: CmmExpr -> CmmExpr
 143 mangleIndexTree (CmmRegOff reg off)
 144   = CmmMachOp (MO_Add rep) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) rep)]
 145   where rep = cmmRegRep reg
 146
 147 -- -----------------------------------------------------------------------------
 148 --  Code gen for 64-bit arithmetic on 32-bit platforms
 149
 150 {-
 151 Simple support for generating 64-bit code (ie, 64 bit values and 64
 152 bit assignments) on 32-bit platforms.  Unlike the main code generator
 153 we merely shoot for generating working code as simply as possible, and
 154 pay little attention to code quality.  Specifically, there is no
 155 attempt to deal cleverly with the fixed-vs-floating register
 156 distinction; all values are generated into (pairs of) floating
 157 registers, even if this would mean some redundant reg-reg moves as a
 158 result.  Only one of the VRegUniques is returned, since it will be
 159 of the VRegUniqueLo form, and the upper-half VReg can be determined
 160 by applying getHiVRegFromLo to it.
 161 -}
 162
 163 data ChildCode64        -- a.k.a "Register64"
 164    = ChildCode64
 165         InstrBlock      -- code
 166         Reg             -- the lower 32-bit temporary which contains the
 167                         -- result; use getHiVRegFromLo to find the other
 168                         -- VRegUnique.  Rules of this simplified insn
 169                         -- selection game are therefore that the returned
 170                         -- Reg may be modified
 171
 172 #if WORD_SIZE_IN_BITS==32
 173 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
 174 assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
 175 #endif
 176
 177 #ifndef x86_64_TARGET_ARCH
 178 iselExpr64        :: CmmExpr -> NatM ChildCode64
 179 #endif
 180
 181 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 182
 183 #if i386_TARGET_ARCH
 184
 185 assignMem_I64Code addrTree valueTree = do
 186   Amode addr addr_code <- getAmode addrTree
 187   ChildCode64 vcode rlo <- iselExpr64 valueTree
 188   let
 189         rhi = getHiVRegFromLo rlo
 190
 191         -- Little-endian store
 192         mov_lo = MOV I32 (OpReg rlo) (OpAddr addr)
 193         mov_hi = MOV I32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
 194   -- in
 195   return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 196
 197
 198 assignReg_I64Code (CmmLocal (LocalReg u_dst pk _)) valueTree = do
 199    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 200    let
 201          r_dst_lo = mkVReg u_dst I32
 202          r_dst_hi = getHiVRegFromLo r_dst_lo
 203          r_src_hi = getHiVRegFromLo r_src_lo
 204          mov_lo = MOV I32 (OpReg r_src_lo) (OpReg r_dst_lo)
 205          mov_hi = MOV I32 (OpReg r_src_hi) (OpReg r_dst_hi)
 206    -- in
 207    return (
 208         vcode `snocOL` mov_lo `snocOL` mov_hi
 209      )
 210
 211 assignReg_I64Code lvalue valueTree
 212    = panic "assignReg_I64Code(i386): invalid lvalue"
 213
 214 ------------
 215
 216 iselExpr64 (CmmLit (CmmInt i _)) = do
 217   (rlo,rhi) <- getNewRegPairNat I32
 218   let
 219         r = fromIntegral (fromIntegral i :: Word32)
 220         q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
 221         code = toOL [
 222                 MOV I32 (OpImm (ImmInteger r)) (OpReg rlo),
 223                 MOV I32 (OpImm (ImmInteger q)) (OpReg rhi)
 224                 ]
 225   -- in
 226   return (ChildCode64 code rlo)
 227
 228 iselExpr64 (CmmLoad addrTree I64) = do
 229    Amode addr addr_code <- getAmode addrTree
 230    (rlo,rhi) <- getNewRegPairNat I32
 231    let
 232         mov_lo = MOV I32 (OpAddr addr) (OpReg rlo)
 233         mov_hi = MOV I32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
 234    -- in
 235    return (
 236             ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
 237                         rlo
 238      )
 239
 240 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64 _)))
 241    = return (ChildCode64 nilOL (mkVReg vu I32))
 242
 243 -- we handle addition, but rather badly
 244 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
 245    ChildCode64 code1 r1lo <- iselExpr64 e1
 246    (rlo,rhi) <- getNewRegPairNat I32
 247    let
 248         r = fromIntegral (fromIntegral i :: Word32)
 249         q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
 250         r1hi = getHiVRegFromLo r1lo
 251         code =  code1 `appOL`
 252                 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
 253                        ADD I32 (OpImm (ImmInteger r)) (OpReg rlo),
 254                        MOV I32 (OpReg r1hi) (OpReg rhi),
 255                        ADC I32 (OpImm (ImmInteger q)) (OpReg rhi) ]
 256    -- in
 257    return (ChildCode64 code rlo)
 258
 259 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
 260    ChildCode64 code1 r1lo <- iselExpr64 e1
 261    ChildCode64 code2 r2lo <- iselExpr64 e2
 262    (rlo,rhi) <- getNewRegPairNat I32
 263    let
 264         r1hi = getHiVRegFromLo r1lo
 265         r2hi = getHiVRegFromLo r2lo
 266         code =  code1 `appOL`
 267                 code2 `appOL`
 268                 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
 269                        ADD I32 (OpReg r2lo) (OpReg rlo),
 270                        MOV I32 (OpReg r1hi) (OpReg rhi),
 271                        ADC I32 (OpReg r2hi) (OpReg rhi) ]
 272    -- in
 273    return (ChildCode64 code rlo)
 274
 275 iselExpr64 (CmmMachOp (MO_U_Conv _ I64) [expr]) = do
 276      fn <- getAnyReg expr
 277      r_dst_lo <-  getNewRegNat I32
 278      let r_dst_hi = getHiVRegFromLo r_dst_lo
 279          code = fn r_dst_lo
 280      return (
 281              ChildCode64 (code `snocOL`
 282                           MOV I32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
 283                           r_dst_lo
 284             )
 285
 286 iselExpr64 expr
 287    = pprPanic "iselExpr64(i386)" (ppr expr)
 288
 289 #endif /* i386_TARGET_ARCH */
 290
 291 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 292
 293 #if sparc_TARGET_ARCH
 294
 295 assignMem_I64Code addrTree valueTree = do
 296      Amode addr addr_code <- getAmode addrTree
 297      ChildCode64 vcode rlo <- iselExpr64 valueTree
 298      (src, code) <- getSomeReg addrTree
 299      let
 300          rhi = getHiVRegFromLo rlo
 301          -- Big-endian store
 302          mov_hi = ST I32 rhi (AddrRegImm src (ImmInt 0))
 303          mov_lo = ST I32 rlo (AddrRegImm src (ImmInt 4))
 304      return (vcode `appOL` code `snocOL` mov_hi `snocOL` mov_lo)
 305
 306 assignReg_I64Code (CmmLocal (LocalReg u_dst pk _)) valueTree = do
 307      ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 308      let
 309          r_dst_lo = mkVReg u_dst pk
 310          r_dst_hi = getHiVRegFromLo r_dst_lo
 311          r_src_hi = getHiVRegFromLo r_src_lo
 312          mov_lo = mkMOV r_src_lo r_dst_lo
 313          mov_hi = mkMOV r_src_hi r_dst_hi
 314          mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
 315      return (vcode `snocOL` mov_hi `snocOL` mov_lo)
 316 assignReg_I64Code lvalue valueTree
 317    = panic "assignReg_I64Code(sparc): invalid lvalue"
 318
 319
 320 -- Don't delete this -- it's very handy for debugging.
 321 --iselExpr64 expr
 322 --   | trace ("iselExpr64: " ++ showSDoc (ppr expr)) False
 323 --   = panic "iselExpr64(???)"
 324
 325 iselExpr64 (CmmLoad addrTree I64) = do
 326      Amode (AddrRegReg r1 r2) addr_code <- getAmode addrTree
 327      rlo <- getNewRegNat I32
 328      let rhi = getHiVRegFromLo rlo
 329          mov_hi = LD I32 (AddrRegImm r1 (ImmInt 0)) rhi
 330          mov_lo = LD I32 (AddrRegImm r1 (ImmInt 4)) rlo
 331      return (
 332             ChildCode64 (addr_code `snocOL` mov_hi `snocOL` mov_lo)
 333                          rlo
 334           )
 335
 336 iselExpr64 (CmmReg (CmmLocal (LocalReg uq I64 _))) = do
 337      r_dst_lo <-  getNewRegNat I32
 338      let r_dst_hi = getHiVRegFromLo r_dst_lo
 339          r_src_lo = mkVReg uq I32
 340          r_src_hi = getHiVRegFromLo r_src_lo
 341          mov_lo = mkMOV r_src_lo r_dst_lo
 342          mov_hi = mkMOV r_src_hi r_dst_hi
 343          mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
 344      return (
 345             ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo
 346          )
 347
 348 iselExpr64 expr
 349    = pprPanic "iselExpr64(sparc)" (ppr expr)
 350
 351 #endif /* sparc_TARGET_ARCH */
 352
 353 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 354
 355 #if powerpc_TARGET_ARCH
 356
 357 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
 358 getI64Amodes addrTree = do
 359     Amode hi_addr addr_code <- getAmode addrTree
 360     case addrOffset hi_addr 4 of
 361         Just lo_addr -> return (hi_addr, lo_addr, addr_code)
 362         Nothing      -> do (hi_ptr, code) <- getSomeReg addrTree
 363                            return (AddrRegImm hi_ptr (ImmInt 0),
 364                                    AddrRegImm hi_ptr (ImmInt 4),
 365                                    code)
 366
 367 assignMem_I64Code addrTree valueTree = do
 368         (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
 369         ChildCode64 vcode rlo <- iselExpr64 valueTree
 370         let
 371                 rhi = getHiVRegFromLo rlo
 372
 373                 -- Big-endian store
 374                 mov_hi = ST I32 rhi hi_addr
 375                 mov_lo = ST I32 rlo lo_addr
 376         -- in
 377         return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 378
 379 assignReg_I64Code (CmmLocal (LocalReg u_dst pk _)) valueTree = do
 380    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 381    let
 382          r_dst_lo = mkVReg u_dst I32
 383          r_dst_hi = getHiVRegFromLo r_dst_lo
 384          r_src_hi = getHiVRegFromLo r_src_lo
 385          mov_lo = MR r_dst_lo r_src_lo
 386          mov_hi = MR r_dst_hi r_src_hi
 387    -- in
 388    return (
 389         vcode `snocOL` mov_lo `snocOL` mov_hi
 390      )
 391
 392 assignReg_I64Code lvalue valueTree
 393    = panic "assignReg_I64Code(powerpc): invalid lvalue"
 394
 395
 396 -- Don't delete this -- it's very handy for debugging.
 397 --iselExpr64 expr
 398 --   | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
 399 --   = panic "iselExpr64(???)"
 400
 401 iselExpr64 (CmmLoad addrTree I64) = do
 402     (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
 403     (rlo, rhi) <- getNewRegPairNat I32
 404     let mov_hi = LD I32 rhi hi_addr
 405         mov_lo = LD I32 rlo lo_addr
 406     return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
 407                          rlo
 408
 409 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64 _)))
 410    = return (ChildCode64 nilOL (mkVReg vu I32))
 411
 412 iselExpr64 (CmmLit (CmmInt i _)) = do
 413   (rlo,rhi) <- getNewRegPairNat I32
 414   let
 415         half0 = fromIntegral (fromIntegral i :: Word16)
 416         half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
 417         half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
 418         half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
 419
 420         code = toOL [
 421                 LIS rlo (ImmInt half1),
 422                 OR rlo rlo (RIImm $ ImmInt half0),
 423                 LIS rhi (ImmInt half3),
 424                 OR rlo rlo (RIImm $ ImmInt half2)
 425                 ]
 426   -- in
 427   return (ChildCode64 code rlo)
 428
 429 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
 430    ChildCode64 code1 r1lo <- iselExpr64 e1
 431    ChildCode64 code2 r2lo <- iselExpr64 e2
 432    (rlo,rhi) <- getNewRegPairNat I32
 433    let
 434         r1hi = getHiVRegFromLo r1lo
 435         r2hi = getHiVRegFromLo r2lo
 436         code =  code1 `appOL`
 437                 code2 `appOL`
 438                 toOL [ ADDC rlo r1lo r2lo,
 439                        ADDE rhi r1hi r2hi ]
 440    -- in
 441    return (ChildCode64 code rlo)
 442
 443 iselExpr64 (CmmMachOp (MO_U_Conv I32 I64) [expr]) = do
 444     (expr_reg,expr_code) <- getSomeReg expr
 445     (rlo, rhi) <- getNewRegPairNat I32
 446     let mov_hi = LI rhi (ImmInt 0)
 447         mov_lo = MR rlo expr_reg
 448     return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
 449                          rlo
 450 iselExpr64 expr
 451    = pprPanic "iselExpr64(powerpc)" (ppr expr)
 452
 453 #endif /* powerpc_TARGET_ARCH */
 454
 455
 456 -- -----------------------------------------------------------------------------
 457 -- The 'Register' type
 458
 459 -- 'Register's passed up the tree.  If the stix code forces the register
 460 -- to live in a pre-decided machine register, it comes out as @Fixed@;
 461 -- otherwise, it comes out as @Any@, and the parent can decide which
 462 -- register to put it in.
 463
 464 data Register
 465   = Fixed   MachRep Reg InstrBlock
 466   | Any     MachRep (Reg -> InstrBlock)
 467
 468 swizzleRegisterRep :: Register -> MachRep -> Register
 469 swizzleRegisterRep (Fixed _ reg code) rep = Fixed rep reg code
 470 swizzleRegisterRep (Any _ codefn)     rep = Any rep codefn
 471
 472
 473 -- -----------------------------------------------------------------------------
 474 -- Utils based on getRegister, below
 475
 476 -- The dual to getAnyReg: compute an expression into a register, but
 477 -- we don't mind which one it is.
 478 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
 479 getSomeReg expr = do
 480   r <- getRegister expr
 481   case r of
 482     Any rep code -> do
 483         tmp <- getNewRegNat rep
 484         return (tmp, code tmp)
 485     Fixed _ reg code ->
 486         return (reg, code)
 487
 488 -- -----------------------------------------------------------------------------
 489 -- Grab the Reg for a CmmReg
 490
 491 getRegisterReg :: CmmReg -> Reg
 492
 493 getRegisterReg (CmmLocal (LocalReg u pk _))
 494   = mkVReg u pk
 495
 496 getRegisterReg (CmmGlobal mid)
 497   = case get_GlobalReg_reg_or_addr mid of
 498        Left (RealReg rrno) -> RealReg rrno
 499        _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
 500           -- By this stage, the only MagicIds remaining should be the
 501           -- ones which map to a real machine register on this
 502           -- platform.  Hence ...
 503
 504
 505 -- -----------------------------------------------------------------------------
 506 -- Generate code to get a subtree into a Register
 507
 508 -- Don't delete this -- it's very handy for debugging.
 509 --getRegister expr
 510 --   | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False
 511 --   = panic "getRegister(???)"
 512
 513 getRegister :: CmmExpr -> NatM Register
 514
 515 #if !x86_64_TARGET_ARCH
 516     -- on x86_64, we have %rip for PicBaseReg, but it's not a full-featured
 517     -- register, it can only be used for rip-relative addressing.
 518 getRegister (CmmReg (CmmGlobal PicBaseReg))
 519   = do
 520       reg <- getPicBaseNat wordRep
 521       return (Fixed wordRep reg nilOL)
 522 #endif
 523
 524 getRegister (CmmReg reg)
 525   = return (Fixed (cmmRegRep reg) (getRegisterReg reg) nilOL)
 526
 527 getRegister tree@(CmmRegOff _ _)
 528   = getRegister (mangleIndexTree tree)
 529
 530
 531 #if WORD_SIZE_IN_BITS==32
 532     -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
 533     -- TO_W_(x), TO_W_(x >> 32)
 534
 535 getRegister (CmmMachOp (MO_U_Conv I64 I32)
 536              [CmmMachOp (MO_U_Shr I64) [x,CmmLit (CmmInt 32 _)]]) = do
 537   ChildCode64 code rlo <- iselExpr64 x
 538   return $ Fixed I32 (getHiVRegFromLo rlo) code
 539
 540 getRegister (CmmMachOp (MO_S_Conv I64 I32)
 541              [CmmMachOp (MO_U_Shr I64) [x,CmmLit (CmmInt 32 _)]]) = do
 542   ChildCode64 code rlo <- iselExpr64 x
 543   return $ Fixed I32 (getHiVRegFromLo rlo) code
 544
 545 getRegister (CmmMachOp (MO_U_Conv I64 I32) [x]) = do
 546   ChildCode64 code rlo <- iselExpr64 x
 547   return $ Fixed I32 rlo code
 548
 549 getRegister (CmmMachOp (MO_S_Conv I64 I32) [x]) = do
 550   ChildCode64 code rlo <- iselExpr64 x
 551   return $ Fixed I32 rlo code
 552
 553 #endif
 554
 555 -- end of machine-"independent" bit; here we go on the rest...
 556
 557 #if alpha_TARGET_ARCH
 558
 559 getRegister (StDouble d)
 560   = getBlockIdNat                   `thenNat` \ lbl ->
 561     getNewRegNat PtrRep             `thenNat` \ tmp ->
 562     let code dst = mkSeqInstrs [
 563             LDATA RoDataSegment lbl [
 564                     DATA TF [ImmLab (rational d)]
 565                 ],
 566             LDA tmp (AddrImm (ImmCLbl lbl)),
 567             LD TF dst (AddrReg tmp)]
 568     in
 569         return (Any F64 code)
 570
 571 getRegister (StPrim primop [x]) -- unary PrimOps
 572   = case primop of
 573       IntNegOp -> trivialUCode (NEG Q False) x
 574
 575       NotOp    -> trivialUCode NOT x
 576
 577       FloatNegOp  -> trivialUFCode FloatRep  (FNEG TF) x
 578       DoubleNegOp -> trivialUFCode F64 (FNEG TF) x
 579
 580       OrdOp -> coerceIntCode IntRep x
 581       ChrOp -> chrCode x
 582
 583       Float2IntOp  -> coerceFP2Int    x
 584       Int2FloatOp  -> coerceInt2FP pr x
 585       Double2IntOp -> coerceFP2Int    x
 586       Int2DoubleOp -> coerceInt2FP pr x
 587
 588       Double2FloatOp -> coerceFltCode x
 589       Float2DoubleOp -> coerceFltCode x
 590
 591       other_op -> getRegister (StCall fn CCallConv F64 [x])
 592         where
 593           fn = case other_op of
 594                  FloatExpOp    -> fsLit "exp"
 595                  FloatLogOp    -> fsLit "log"
 596                  FloatSqrtOp   -> fsLit "sqrt"
 597                  FloatSinOp    -> fsLit "sin"
 598                  FloatCosOp    -> fsLit "cos"
 599                  FloatTanOp    -> fsLit "tan"
 600                  FloatAsinOp   -> fsLit "asin"
 601                  FloatAcosOp   -> fsLit "acos"
 602                  FloatAtanOp   -> fsLit "atan"
 603                  FloatSinhOp   -> fsLit "sinh"
 604                  FloatCoshOp   -> fsLit "cosh"
 605                  FloatTanhOp   -> fsLit "tanh"
 606                  DoubleExpOp   -> fsLit "exp"
 607                  DoubleLogOp   -> fsLit "log"
 608                  DoubleSqrtOp  -> fsLit "sqrt"
 609                  DoubleSinOp   -> fsLit "sin"
 610                  DoubleCosOp   -> fsLit "cos"
 611                  DoubleTanOp   -> fsLit "tan"
 612                  DoubleAsinOp  -> fsLit "asin"
 613                  DoubleAcosOp  -> fsLit "acos"
 614                  DoubleAtanOp  -> fsLit "atan"
 615                  DoubleSinhOp  -> fsLit "sinh"
 616                  DoubleCoshOp  -> fsLit "cosh"
 617                  DoubleTanhOp  -> fsLit "tanh"
 618   where
 619     pr = panic "MachCode.getRegister: no primrep needed for Alpha"
 620
 621 getRegister (StPrim primop [x, y]) -- dyadic PrimOps
 622   = case primop of
 623       CharGtOp -> trivialCode (CMP LTT) y x
 624       CharGeOp -> trivialCode (CMP LE) y x
 625       CharEqOp -> trivialCode (CMP EQQ) x y
 626       CharNeOp -> int_NE_code x y
 627       CharLtOp -> trivialCode (CMP LTT) x y
 628       CharLeOp -> trivialCode (CMP LE) x y
 629
 630       IntGtOp  -> trivialCode (CMP LTT) y x
 631       IntGeOp  -> trivialCode (CMP LE) y x
 632       IntEqOp  -> trivialCode (CMP EQQ) x y
 633       IntNeOp  -> int_NE_code x y
 634       IntLtOp  -> trivialCode (CMP LTT) x y
 635       IntLeOp  -> trivialCode (CMP LE) x y
 636
 637       WordGtOp -> trivialCode (CMP ULT) y x
 638       WordGeOp -> trivialCode (CMP ULE) x y
 639       WordEqOp -> trivialCode (CMP EQQ)  x y
 640       WordNeOp -> int_NE_code x y
 641       WordLtOp -> trivialCode (CMP ULT) x y
 642       WordLeOp -> trivialCode (CMP ULE) x y
 643
 644       AddrGtOp -> trivialCode (CMP ULT) y x
 645       AddrGeOp -> trivialCode (CMP ULE) y x
 646       AddrEqOp -> trivialCode (CMP EQQ)  x y
 647       AddrNeOp -> int_NE_code x y
 648       AddrLtOp -> trivialCode (CMP ULT) x y
 649       AddrLeOp -> trivialCode (CMP ULE) x y
 650
 651       FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
 652       FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
 653       FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
 654       FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
 655       FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
 656       FloatLeOp -> cmpF_code (FCMP TF LE) NE x y
 657
 658       DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
 659       DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
 660       DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
 661       DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
 662       DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
 663       DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y
 664
 665       IntAddOp  -> trivialCode (ADD Q False) x y
 666       IntSubOp  -> trivialCode (SUB Q False) x y
 667       IntMulOp  -> trivialCode (MUL Q False) x y
 668       IntQuotOp -> trivialCode (DIV Q False) x y
 669       IntRemOp  -> trivialCode (REM Q False) x y
 670
 671       WordAddOp  -> trivialCode (ADD Q False) x y
 672       WordSubOp  -> trivialCode (SUB Q False) x y
 673       WordMulOp  -> trivialCode (MUL Q False) x y
 674       WordQuotOp -> trivialCode (DIV Q True) x y
 675       WordRemOp  -> trivialCode (REM Q True) x y
 676
 677       FloatAddOp -> trivialFCode  FloatRep (FADD TF) x y
 678       FloatSubOp -> trivialFCode  FloatRep (FSUB TF) x y
 679       FloatMulOp -> trivialFCode  FloatRep (FMUL TF) x y
 680       FloatDivOp -> trivialFCode  FloatRep (FDIV TF) x y
 681
 682       DoubleAddOp -> trivialFCode  F64 (FADD TF) x y
 683       DoubleSubOp -> trivialFCode  F64 (FSUB TF) x y
 684       DoubleMulOp -> trivialFCode  F64 (FMUL TF) x y
 685       DoubleDivOp -> trivialFCode  F64 (FDIV TF) x y
 686
 687       AddrAddOp  -> trivialCode (ADD Q False) x y
 688       AddrSubOp  -> trivialCode (SUB Q False) x y
 689       AddrRemOp  -> trivialCode (REM Q True) x y
 690
 691       AndOp  -> trivialCode AND x y
 692       OrOp   -> trivialCode OR  x y
 693       XorOp  -> trivialCode XOR x y
 694       SllOp  -> trivialCode SLL x y
 695       SrlOp  -> trivialCode SRL x y
 696
 697       ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
 698       ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
 699       ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"
 700
 701       FloatPowerOp  -> getRegister (StCall (fsLit "pow") CCallConv F64 [x,y])
 702       DoublePowerOp -> getRegister (StCall (fsLit "pow") CCallConv F64 [x,y])
 703   where
 704     {- ------------------------------------------------------------
 705         Some bizarre special code for getting condition codes into
 706         registers.  Integer non-equality is a test for equality
 707         followed by an XOR with 1.  (Integer comparisons always set
 708         the result register to 0 or 1.)  Floating point comparisons of
 709         any kind leave the result in a floating point register, so we
 710         need to wrangle an integer register out of things.
 711     -}
 712     int_NE_code :: StixTree -> StixTree -> NatM Register
 713
 714     int_NE_code x y
 715       = trivialCode (CMP EQQ) x y       `thenNat` \ register ->
 716         getNewRegNat IntRep             `thenNat` \ tmp ->
 717         let
 718             code = registerCode register tmp
 719             src  = registerName register tmp
 720             code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
 721         in
 722         return (Any IntRep code__2)
 723
 724     {- ------------------------------------------------------------
 725         Comments for int_NE_code also apply to cmpF_code
 726     -}
 727     cmpF_code
 728         :: (Reg -> Reg -> Reg -> Instr)
 729         -> Cond
 730         -> StixTree -> StixTree
 731         -> NatM Register
 732
 733     cmpF_code instr cond x y
 734       = trivialFCode pr instr x y       `thenNat` \ register ->
 735         getNewRegNat F64                `thenNat` \ tmp ->
 736         getBlockIdNat                   `thenNat` \ lbl ->
 737         let
 738             code = registerCode register tmp
 739             result  = registerName register tmp
 740
 741             code__2 dst = code . mkSeqInstrs [
 742                 OR zeroh (RIImm (ImmInt 1)) dst,
 743                 BF cond  result (ImmCLbl lbl),
 744                 OR zeroh (RIReg zeroh) dst,
 745                 NEWBLOCK lbl]
 746         in
 747         return (Any IntRep code__2)
 748       where
 749         pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
 750       ------------------------------------------------------------
 751
 752 getRegister (CmmLoad pk mem)
 753   = getAmode mem                    `thenNat` \ amode ->
 754     let
 755         code = amodeCode amode
 756         src   = amodeAddr amode
 757         size = primRepToSize pk
 758         code__2 dst = code . mkSeqInstr (LD size dst src)
 759     in
 760     return (Any pk code__2)
 761
 762 getRegister (StInt i)
 763   | fits8Bits i
 764   = let
 765         code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
 766     in
 767     return (Any IntRep code)
 768   | otherwise
 769   = let
 770         code dst = mkSeqInstr (LDI Q dst src)
 771     in
 772     return (Any IntRep code)
 773   where
 774     src = ImmInt (fromInteger i)
 775
 776 getRegister leaf
 777   | isJust imm
 778   = let
 779         code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
 780     in
 781     return (Any PtrRep code)
 782   where
 783     imm = maybeImm leaf
 784     imm__2 = case imm of Just x -> x
 785
 786 #endif /* alpha_TARGET_ARCH */
 787
 788 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 789
 790 #if i386_TARGET_ARCH
 791
 792 getRegister (CmmLit (CmmFloat f F32)) = do
 793     lbl <- getNewLabelNat
 794     dflags <- getDynFlagsNat
 795     dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
 796     Amode addr addr_code <- getAmode dynRef
 797     let code dst =
 798             LDATA ReadOnlyData
 799                         [CmmDataLabel lbl,
 800                          CmmStaticLit (CmmFloat f F32)]
 801             `consOL` (addr_code `snocOL`
 802             GLD F32 addr dst)
 803     -- in
 804     return (Any F32 code)
 805
 806
 807 getRegister (CmmLit (CmmFloat d F64))
 808   | d == 0.0
 809   = let code dst = unitOL (GLDZ dst)
 810     in  return (Any F64 code)
 811
 812   | d == 1.0
 813   = let code dst = unitOL (GLD1 dst)
 814     in  return (Any F64 code)
 815
 816   | otherwise = do
 817     lbl <- getNewLabelNat
 818     dflags <- getDynFlagsNat
 819     dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
 820     Amode addr addr_code <- getAmode dynRef
 821     let code dst =
 822             LDATA ReadOnlyData
 823                         [CmmDataLabel lbl,
 824                          CmmStaticLit (CmmFloat d F64)]
 825             `consOL` (addr_code `snocOL`
 826             GLD F64 addr dst)
 827     -- in
 828     return (Any F64 code)
 829
 830 #endif /* i386_TARGET_ARCH */
 831
 832 #if x86_64_TARGET_ARCH
 833
 834 getRegister (CmmLit (CmmFloat 0.0 rep)) = do
 835    let code dst = unitOL  (XOR rep (OpReg dst) (OpReg dst))
 836         -- I don't know why there are xorpd, xorps, and pxor instructions.
 837         -- They all appear to do the same thing --SDM
 838    return (Any rep code)
 839
 840 getRegister (CmmLit (CmmFloat f rep)) = do
 841     lbl <- getNewLabelNat
 842     let code dst = toOL [
 843             LDATA ReadOnlyData
 844                         [CmmDataLabel lbl,
 845                          CmmStaticLit (CmmFloat f rep)],
 846             MOV rep (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 847             ]
 848     -- in
 849     return (Any rep code)
 850
 851 #endif /* x86_64_TARGET_ARCH */
 852
 853 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
 854
 855 -- catch simple cases of zero- or sign-extended load
 856 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad addr _]) = do
 857   code <- intLoadCode (MOVZxL I8) addr
 858   return (Any I32 code)
 859
 860 getRegister (CmmMachOp (MO_S_Conv I8 I32) [CmmLoad addr _]) = do
 861   code <- intLoadCode (MOVSxL I8) addr
 862   return (Any I32 code)
 863
 864 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad addr _]) = do
 865   code <- intLoadCode (MOVZxL I16) addr
 866   return (Any I32 code)
 867
 868 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad addr _]) = do
 869   code <- intLoadCode (MOVSxL I16) addr
 870   return (Any I32 code)
 871
 872 #endif
 873
 874 #if x86_64_TARGET_ARCH
 875
 876 -- catch simple cases of zero- or sign-extended load
 877 getRegister (CmmMachOp (MO_U_Conv I8 I64) [CmmLoad addr _]) = do
 878   code <- intLoadCode (MOVZxL I8) addr
 879   return (Any I64 code)
 880
 881 getRegister (CmmMachOp (MO_S_Conv I8 I64) [CmmLoad addr _]) = do
 882   code <- intLoadCode (MOVSxL I8) addr
 883   return (Any I64 code)
 884
 885 getRegister (CmmMachOp (MO_U_Conv I16 I64) [CmmLoad addr _]) = do
 886   code <- intLoadCode (MOVZxL I16) addr
 887   return (Any I64 code)
 888
 889 getRegister (CmmMachOp (MO_S_Conv I16 I64) [CmmLoad addr _]) = do
 890   code <- intLoadCode (MOVSxL I16) addr
 891   return (Any I64 code)
 892
 893 getRegister (CmmMachOp (MO_U_Conv I32 I64) [CmmLoad addr _]) = do
 894   code <- intLoadCode (MOV I32) addr -- 32-bit loads zero-extend
 895   return (Any I64 code)
 896
 897 getRegister (CmmMachOp (MO_S_Conv I32 I64) [CmmLoad addr _]) = do
 898   code <- intLoadCode (MOVSxL I32) addr
 899   return (Any I64 code)
 900
 901 #endif
 902
 903 #if x86_64_TARGET_ARCH
 904 getRegister (CmmMachOp (MO_Add I64) [CmmReg (CmmGlobal PicBaseReg),
 905                                      CmmLit displacement])
 906     = return $ Any I64 (\dst -> unitOL $
 907         LEA I64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
 908 #endif
 909
 910 #if x86_64_TARGET_ARCH
 911 getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do
 912   x_code <- getAnyReg x
 913   lbl <- getNewLabelNat
 914   let
 915     code dst = x_code dst `appOL` toOL [
 916         -- This is how gcc does it, so it can't be that bad:
 917         LDATA ReadOnlyData16 [
 918                 CmmAlign 16,
 919                 CmmDataLabel lbl,
 920                 CmmStaticLit (CmmInt 0x80000000 I32),
 921                 CmmStaticLit (CmmInt 0 I32),
 922                 CmmStaticLit (CmmInt 0 I32),
 923                 CmmStaticLit (CmmInt 0 I32)
 924         ],
 925         XOR F32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 926                 -- xorps, so we need the 128-bit constant
 927                 -- ToDo: rip-relative
 928         ]
 929   --
 930   return (Any F32 code)
 931
 932 getRegister (CmmMachOp (MO_S_Neg F64) [x]) = do
 933   x_code <- getAnyReg x
 934   lbl <- getNewLabelNat
 935   let
 936         -- This is how gcc does it, so it can't be that bad:
 937     code dst = x_code dst `appOL` toOL [
 938         LDATA ReadOnlyData16 [
 939                 CmmAlign 16,
 940                 CmmDataLabel lbl,
 941                 CmmStaticLit (CmmInt 0x8000000000000000 I64),
 942                 CmmStaticLit (CmmInt 0 I64)
 943         ],
 944                 -- gcc puts an unpck here.  Wonder if we need it.
 945         XOR F64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 946                 -- xorpd, so we need the 128-bit constant
 947         ]
 948   --
 949   return (Any F64 code)
 950 #endif
 951
 952 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
 953
 954 getRegister (CmmMachOp mop [x]) -- unary MachOps
 955   = case mop of
 956 #if i386_TARGET_ARCH
 957       MO_S_Neg F32 -> trivialUFCode F32 (GNEG F32) x
 958       MO_S_Neg F64 -> trivialUFCode F64 (GNEG F64) x
 959 #endif
 960
 961       MO_S_Neg rep -> trivialUCode rep (NEGI rep) x
 962       MO_Not rep   -> trivialUCode rep (NOT  rep) x
 963
 964       -- Nop conversions
 965       MO_U_Conv I32 I8  -> toI8Reg  I32 x
 966       MO_S_Conv I32 I8  -> toI8Reg  I32 x
 967       MO_U_Conv I16 I8  -> toI8Reg  I16 x
 968       MO_S_Conv I16 I8  -> toI8Reg  I16 x
 969       MO_U_Conv I32 I16 -> toI16Reg I32 x
 970       MO_S_Conv I32 I16 -> toI16Reg I32 x
 971 #if x86_64_TARGET_ARCH
 972       MO_U_Conv I64 I32 -> conversionNop I64 x
 973       MO_S_Conv I64 I32 -> conversionNop I64 x
 974       MO_U_Conv I64 I16 -> toI16Reg I64 x
 975       MO_S_Conv I64 I16 -> toI16Reg I64 x
 976       MO_U_Conv I64 I8  -> toI8Reg  I64 x
 977       MO_S_Conv I64 I8  -> toI8Reg  I64 x
 978 #endif
 979
 980       MO_U_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
 981       MO_S_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
 982
 983       -- widenings
 984       MO_U_Conv I8  I32 -> integerExtend I8  I32 MOVZxL x
 985       MO_U_Conv I16 I32 -> integerExtend I16 I32 MOVZxL x
 986       MO_U_Conv I8  I16 -> integerExtend I8  I16 MOVZxL x
 987
 988       MO_S_Conv I8  I32 -> integerExtend I8  I32 MOVSxL x
 989       MO_S_Conv I16 I32 -> integerExtend I16 I32 MOVSxL x
 990       MO_S_Conv I8  I16 -> integerExtend I8  I16 MOVSxL x
 991
 992 #if x86_64_TARGET_ARCH
 993       MO_U_Conv I8  I64 -> integerExtend I8  I64 MOVZxL x
 994       MO_U_Conv I16 I64 -> integerExtend I16 I64 MOVZxL x
 995       MO_U_Conv I32 I64 -> integerExtend I32 I64 MOVZxL x
 996       MO_S_Conv I8  I64 -> integerExtend I8  I64 MOVSxL x
 997       MO_S_Conv I16 I64 -> integerExtend I16 I64 MOVSxL x
 998       MO_S_Conv I32 I64 -> integerExtend I32 I64 MOVSxL x
 999         -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
1000         -- However, we don't want the register allocator to throw it
1001         -- away as an unnecessary reg-to-reg move, so we keep it in
1002         -- the form of a movzl and print it as a movl later.
1003 #endif
1004
1005 #if i386_TARGET_ARCH
1006       MO_S_Conv F32 F64 -> conversionNop F64 x
1007       MO_S_Conv F64 F32 -> conversionNop F32 x
1008 #else
1009       MO_S_Conv F32 F64 -> coerceFP2FP F64 x
1010       MO_S_Conv F64 F32 -> coerceFP2FP F32 x
1011 #endif
1012
1013       MO_S_Conv from to
1014         | isFloatingRep from -> coerceFP2Int from to x
1015         | isFloatingRep to   -> coerceInt2FP from to x
1016
1017       other -> pprPanic "getRegister" (pprMachOp mop)
1018    where
1019         -- signed or unsigned extension.
1020         integerExtend from to instr expr = do
1021             (reg,e_code) <- if from == I8 then getByteReg expr
1022                                           else getSomeReg expr
1023             let
1024                 code dst =
1025                   e_code `snocOL`
1026                   instr from (OpReg reg) (OpReg dst)
1027             return (Any to code)
1028
1029         toI8Reg new_rep expr
1030             = do codefn <- getAnyReg expr
1031                  return (Any new_rep codefn)
1032                 -- HACK: use getAnyReg to get a byte-addressable register.
1033                 -- If the source was a Fixed register, this will add the
1034                 -- mov instruction to put it into the desired destination.
1035                 -- We're assuming that the destination won't be a fixed
1036                 -- non-byte-addressable register; it won't be, because all
1037                 -- fixed registers are word-sized.
1038
1039         toI16Reg = toI8Reg -- for now
1040
1041         conversionNop new_rep expr
1042             = do e_code <- getRegister expr
1043                  return (swizzleRegisterRep e_code new_rep)
1044
1045
1046 getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
1047   = case mop of
1048       MO_Eq F32   -> condFltReg EQQ x y
1049       MO_Ne F32   -> condFltReg NE x y
1050       MO_S_Gt F32 -> condFltReg GTT x y
1051       MO_S_Ge F32 -> condFltReg GE x y
1052       MO_S_Lt F32 -> condFltReg LTT x y
1053       MO_S_Le F32 -> condFltReg LE x y
1054
1055       MO_Eq F64   -> condFltReg EQQ x y
1056       MO_Ne F64   -> condFltReg NE x y
1057       MO_S_Gt F64 -> condFltReg GTT x y
1058       MO_S_Ge F64 -> condFltReg GE x y
1059       MO_S_Lt F64 -> condFltReg LTT x y
1060       MO_S_Le F64 -> condFltReg LE x y
1061
1062       MO_Eq rep   -> condIntReg EQQ x y
1063       MO_Ne rep   -> condIntReg NE x y
1064
1065       MO_S_Gt rep -> condIntReg GTT x y
1066       MO_S_Ge rep -> condIntReg GE x y
1067       MO_S_Lt rep -> condIntReg LTT x y
1068       MO_S_Le rep -> condIntReg LE x y
1069
1070       MO_U_Gt rep -> condIntReg GU  x y
1071       MO_U_Ge rep -> condIntReg GEU x y
1072       MO_U_Lt rep -> condIntReg LU  x y
1073       MO_U_Le rep -> condIntReg LEU x y
1074
1075 #if i386_TARGET_ARCH
1076       MO_Add F32 -> trivialFCode F32 GADD x y
1077       MO_Sub F32 -> trivialFCode F32 GSUB x y
1078
1079       MO_Add F64 -> trivialFCode F64 GADD x y
1080       MO_Sub F64 -> trivialFCode F64 GSUB x y
1081
1082       MO_S_Quot F32 -> trivialFCode F32 GDIV x y
1083       MO_S_Quot F64 -> trivialFCode F64 GDIV x y
1084 #endif
1085
1086 #if x86_64_TARGET_ARCH
1087       MO_Add F32 -> trivialFCode F32 ADD x y
1088       MO_Sub F32 -> trivialFCode F32 SUB x y
1089
1090       MO_Add F64 -> trivialFCode F64 ADD x y
1091       MO_Sub F64 -> trivialFCode F64 SUB x y
1092
1093       MO_S_Quot F32 -> trivialFCode F32 FDIV x y
1094       MO_S_Quot F64 -> trivialFCode F64 FDIV x y
1095 #endif
1096
1097       MO_Add rep -> add_code rep x y
1098       MO_Sub rep -> sub_code rep x y
1099
1100       MO_S_Quot rep -> div_code rep True  True  x y
1101       MO_S_Rem  rep -> div_code rep True  False x y
1102       MO_U_Quot rep -> div_code rep False True  x y
1103       MO_U_Rem  rep -> div_code rep False False x y
1104
1105 #if i386_TARGET_ARCH
1106       MO_Mul   F32 -> trivialFCode F32 GMUL x y
1107       MO_Mul   F64 -> trivialFCode F64 GMUL x y
1108 #endif
1109
1110 #if x86_64_TARGET_ARCH
1111       MO_Mul   F32 -> trivialFCode F32 MUL x y
1112       MO_Mul   F64 -> trivialFCode F64 MUL x y
1113 #endif
1114
1115       MO_Mul   rep -> let op = IMUL rep in
1116                       trivialCode rep op (Just op) x y
1117
1118       MO_S_MulMayOflo rep -> imulMayOflo rep x y
1119
1120       MO_And rep -> let op = AND rep in
1121                     trivialCode rep op (Just op) x y
1122       MO_Or  rep -> let op = OR  rep in
1123                     trivialCode rep op (Just op) x y
1124       MO_Xor rep -> let op = XOR rep in
1125                     trivialCode rep op (Just op) x y
1126
1127         {- Shift ops on x86s have constraints on their source, it
1128            either has to be Imm, CL or 1
1129             => trivialCode is not restrictive enough (sigh.)
1130         -}
1131       MO_Shl rep   -> shift_code rep (SHL rep) x y {-False-}
1132       MO_U_Shr rep -> shift_code rep (SHR rep) x y {-False-}
1133       MO_S_Shr rep -> shift_code rep (SAR rep) x y {-False-}
1134
1135       other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
1136   where
1137     --------------------
1138     imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1139     imulMayOflo rep a b = do
1140          (a_reg, a_code) <- getNonClobberedReg a
1141          b_code <- getAnyReg b
1142          let
1143              shift_amt  = case rep of
1144                            I32 -> 31
1145                            I64 -> 63
1146                            _ -> panic "shift_amt"
1147
1148              code = a_code `appOL` b_code eax `appOL`
1149                         toOL [
1150                            IMUL2 rep (OpReg a_reg),   -- result in %edx:%eax
1151                            SAR rep (OpImm (ImmInt shift_amt)) (OpReg eax),
1152                                 -- sign extend lower part
1153                            SUB rep (OpReg edx) (OpReg eax)
1154                                 -- compare against upper
1155                            -- eax==0 if high part == sign extended low part
1156                         ]
1157          -- in
1158          return (Fixed rep eax code)
1159
1160     --------------------
1161     shift_code :: MachRep
1162                -> (Operand -> Operand -> Instr)
1163                -> CmmExpr
1164                -> CmmExpr
1165                -> NatM Register
1166
1167     {- Case1: shift length as immediate -}
1168     shift_code rep instr x y@(CmmLit lit) = do
1169           x_code <- getAnyReg x
1170           let
1171                code dst
1172                   = x_code dst `snocOL`
1173                     instr (OpImm (litToImm lit)) (OpReg dst)
1174           -- in
1175           return (Any rep code)
1176
1177     {- Case2: shift length is complex (non-immediate)
1178       * y must go in %ecx.
1179       * we cannot do y first *and* put its result in %ecx, because
1180         %ecx might be clobbered by x.
1181       * if we do y second, then x cannot be
1182         in a clobbered reg.  Also, we cannot clobber x's reg
1183         with the instruction itself.
1184       * so we can either:
1185         - do y first, put its result in a fresh tmp, then copy it to %ecx later
1186         - do y second and put its result into %ecx.  x gets placed in a fresh
1187           tmp.  This is likely to be better, becuase the reg alloc can
1188           eliminate this reg->reg move here (it won't eliminate the other one,
1189           because the move is into the fixed %ecx).
1190     -}
1191     shift_code rep instr x y{-amount-} = do
1192         x_code <- getAnyReg x
1193         tmp <- getNewRegNat rep
1194         y_code <- getAnyReg y
1195         let
1196            code = x_code tmp `appOL`
1197                   y_code ecx `snocOL`
1198                   instr (OpReg ecx) (OpReg tmp)
1199         -- in
1200         return (Fixed rep tmp code)
1201
1202     --------------------
1203     add_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1204     add_code rep x (CmmLit (CmmInt y _))
1205         | not (is64BitInteger y) = add_int rep x y
1206     add_code rep x y = trivialCode rep (ADD rep) (Just (ADD rep)) x y
1207
1208     --------------------
1209     sub_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1210     sub_code rep x (CmmLit (CmmInt y _))
1211         | not (is64BitInteger (-y)) = add_int rep x (-y)
1212     sub_code rep x y = trivialCode rep (SUB rep) Nothing x y
1213
1214     -- our three-operand add instruction:
1215     add_int rep x y = do
1216         (x_reg, x_code) <- getSomeReg x
1217         let
1218             imm = ImmInt (fromInteger y)
1219             code dst
1220                = x_code `snocOL`
1221                  LEA rep
1222                         (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
1223                         (OpReg dst)
1224         --
1225         return (Any rep code)
1226
1227     ----------------------
1228     div_code rep signed quotient x y = do
1229            (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
1230            x_code <- getAnyReg x
1231            let
1232              widen | signed    = CLTD rep
1233                    | otherwise = XOR rep (OpReg edx) (OpReg edx)
1234
1235              instr | signed    = IDIV
1236                    | otherwise = DIV
1237
1238              code = y_code `appOL`
1239                     x_code eax `appOL`
1240                     toOL [widen, instr rep y_op]
1241
1242              result | quotient  = eax
1243                     | otherwise = edx
1244
1245            -- in
1246            return (Fixed rep result code)
1247
1248
1249 getRegister (CmmLoad mem pk)
1250   | isFloatingRep pk
1251   = do
1252     Amode src mem_code <- getAmode mem
1253     let
1254         code dst = mem_code `snocOL`
1255                    IF_ARCH_i386(GLD pk src dst,
1256                                 MOV pk (OpAddr src) (OpReg dst))
1257     --
1258     return (Any pk code)
1259
1260 #if i386_TARGET_ARCH
1261 getRegister (CmmLoad mem pk)
1262   | pk /= I64
1263   = do
1264     code <- intLoadCode (instr pk) mem
1265     return (Any pk code)
1266   where
1267         instr I8  = MOVZxL pk
1268         instr I16 = MOV I16
1269         instr I32 = MOV I32
1270         -- we always zero-extend 8-bit loads, if we
1271         -- can't think of anything better.  This is because
1272         -- we can't guarantee access to an 8-bit variant of every register
1273         -- (esi and edi don't have 8-bit variants), so to make things
1274         -- simpler we do our 8-bit arithmetic with full 32-bit registers.
1275 #endif
1276
1277 #if x86_64_TARGET_ARCH
1278 -- Simpler memory load code on x86_64
1279 getRegister (CmmLoad mem pk)
1280   = do
1281     code <- intLoadCode (MOV pk) mem
1282     return (Any pk code)
1283 #endif
1284
1285 getRegister (CmmLit (CmmInt 0 rep))
1286   = let
1287         -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
1288         adj_rep = case rep of I64 -> I32; _ -> rep
1289         rep1 = IF_ARCH_i386( rep, adj_rep )
1290         code dst
1291            = unitOL (XOR rep1 (OpReg dst) (OpReg dst))
1292     in
1293         return (Any rep code)
1294
1295 #if x86_64_TARGET_ARCH
1296   -- optimisation for loading small literals on x86_64: take advantage
1297   -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
1298   -- instruction forms are shorter.
1299 getRegister (CmmLit lit)
1300   | I64 <- cmmLitRep lit, not (isBigLit lit)
1301   = let
1302         imm = litToImm lit
1303         code dst = unitOL (MOV I32 (OpImm imm) (OpReg dst))
1304     in
1305         return (Any I64 code)
1306   where
1307    isBigLit (CmmInt i I64) = i < 0 || i > 0xffffffff
1308    isBigLit _ = False
1309         -- note1: not the same as is64BitLit, because that checks for
1310         -- signed literals that fit in 32 bits, but we want unsigned
1311         -- literals here.
1312         -- note2: all labels are small, because we're assuming the
1313         -- small memory model (see gcc docs, -mcmodel=small).
1314 #endif
1315
1316 getRegister (CmmLit lit)
1317   = let
1318         rep = cmmLitRep lit
1319         imm = litToImm lit
1320         code dst = unitOL (MOV rep (OpImm imm) (OpReg dst))
1321     in
1322         return (Any rep code)
1323
1324 getRegister other = pprPanic "getRegister(x86)" (ppr other)
1325
1326
1327 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
1328    -> NatM (Reg -> InstrBlock)
1329 intLoadCode instr mem = do
1330   Amode src mem_code <- getAmode mem
1331   return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
1332
1333 -- Compute an expression into *any* register, adding the appropriate
1334 -- move instruction if necessary.
1335 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
1336 getAnyReg expr = do
1337   r <- getRegister expr
1338   anyReg r
1339
1340 anyReg :: Register -> NatM (Reg -> InstrBlock)
1341 anyReg (Any _ code)          = return code
1342 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
1343
1344 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
1345 -- Fixed registers might not be byte-addressable, so we make sure we've
1346 -- got a temporary, inserting an extra reg copy if necessary.
1347 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
1348 #if x86_64_TARGET_ARCH
1349 getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
1350 #else
1351 getByteReg expr = do
1352   r <- getRegister expr
1353   case r of
1354     Any rep code -> do
1355         tmp <- getNewRegNat rep
1356         return (tmp, code tmp)
1357     Fixed rep reg code
1358         | isVirtualReg reg -> return (reg,code)
1359         | otherwise -> do
1360             tmp <- getNewRegNat rep
1361             return (tmp, code `snocOL` reg2reg rep reg tmp)
1362         -- ToDo: could optimise slightly by checking for byte-addressable
1363         -- real registers, but that will happen very rarely if at all.
1364 #endif
1365
1366 -- Another variant: this time we want the result in a register that cannot
1367 -- be modified by code to evaluate an arbitrary expression.
1368 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1369 getNonClobberedReg expr = do
1370   r <- getRegister expr
1371   case r of
1372     Any rep code -> do
1373         tmp <- getNewRegNat rep
1374         return (tmp, code tmp)
1375     Fixed rep reg code
1376         -- only free regs can be clobbered
1377         | RealReg rr <- reg, isFastTrue (freeReg rr) -> do
1378                 tmp <- getNewRegNat rep
1379                 return (tmp, code `snocOL` reg2reg rep reg tmp)
1380         | otherwise ->
1381                 return (reg, code)
1382
1383 reg2reg :: MachRep -> Reg -> Reg -> Instr
1384 reg2reg rep src dst
1385 #if i386_TARGET_ARCH
1386   | isFloatingRep rep = GMOV src dst
1387 #endif
1388   | otherwise         = MOV rep (OpReg src) (OpReg dst)
1389
1390 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1391
1392 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1393
1394 #if sparc_TARGET_ARCH
1395
1396 getRegister (CmmLit (CmmFloat f F32)) = do
1397     lbl <- getNewLabelNat
1398     let code dst = toOL [
1399             LDATA ReadOnlyData
1400                         [CmmDataLabel lbl,
1401                          CmmStaticLit (CmmFloat f F32)],
1402             SETHI (HI (ImmCLbl lbl)) dst,
1403             LD F32 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1404     return (Any F32 code)
1405
1406 getRegister (CmmLit (CmmFloat d F64)) = do
1407     lbl <- getNewLabelNat
1408     let code dst = toOL [
1409             LDATA ReadOnlyData
1410                         [CmmDataLabel lbl,
1411                          CmmStaticLit (CmmFloat d F64)],
1412             SETHI (HI (ImmCLbl lbl)) dst,
1413             LD F64 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1414     return (Any F64 code)
1415
1416 getRegister (CmmMachOp mop [x]) -- unary MachOps
1417   = case mop of
1418       MO_S_Neg F32     -> trivialUFCode F32 (FNEG F32) x
1419       MO_S_Neg F64     -> trivialUFCode F64 (FNEG F64) x
1420
1421       MO_S_Neg rep     -> trivialUCode rep (SUB False False g0) x
1422       MO_Not rep       -> trivialUCode rep (XNOR False g0) x
1423
1424       MO_U_Conv I32 I8 -> trivialCode I8 (AND False) x (CmmLit (CmmInt 255 I8))
1425
1426       MO_U_Conv F64 F32-> coerceDbl2Flt x
1427       MO_U_Conv F32 F64-> coerceFlt2Dbl x
1428
1429       MO_S_Conv F32 I32-> coerceFP2Int F32 I32 x
1430       MO_S_Conv I32 F32-> coerceInt2FP I32 F32 x
1431       MO_S_Conv F64 I32-> coerceFP2Int F64 I32 x
1432       MO_S_Conv I32 F64-> coerceInt2FP I32 F64 x
1433
1434       -- Conversions which are a nop on sparc
1435       MO_U_Conv from to
1436         | from == to   -> conversionNop to   x
1437       MO_U_Conv I32 to -> conversionNop to   x
1438       MO_S_Conv I32 to -> conversionNop to   x
1439
1440       -- widenings
1441       MO_U_Conv I8 I32  -> integerExtend False I8 I32  x
1442       MO_U_Conv I16 I32 -> integerExtend False I16 I32 x
1443       MO_U_Conv I8 I16  -> integerExtend False I8 I16  x
1444       MO_S_Conv I16 I32 -> integerExtend True I16 I32  x
1445
1446       other_op -> panic "Unknown unary mach op"
1447     where
1448         -- XXX SLL/SRL?
1449         integerExtend signed from to expr = do
1450            (reg, e_code) <- getSomeReg expr
1451            let
1452                code dst =
1453                    e_code `snocOL`
1454                    ((if signed then SRA else SRL)
1455                           reg (RIImm (ImmInt 0)) dst)
1456            return (Any to code)
1457         conversionNop new_rep expr
1458             = do e_code <- getRegister expr
1459                  return (swizzleRegisterRep e_code new_rep)
1460
1461 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1462   = case mop of
1463       MO_Eq F32 -> condFltReg EQQ x y
1464       MO_Ne F32 -> condFltReg NE x y
1465
1466       MO_S_Gt F32 -> condFltReg GTT x y
1467       MO_S_Ge F32 -> condFltReg GE x y
1468       MO_S_Lt F32 -> condFltReg LTT x y
1469       MO_S_Le F32 -> condFltReg LE x y
1470
1471       MO_Eq F64 -> condFltReg EQQ x y
1472       MO_Ne F64 -> condFltReg NE x y
1473
1474       MO_S_Gt F64 -> condFltReg GTT x y
1475       MO_S_Ge F64 -> condFltReg GE x y
1476       MO_S_Lt F64 -> condFltReg LTT x y
1477       MO_S_Le F64 -> condFltReg LE x y
1478
1479       MO_Eq rep -> condIntReg EQQ x y
1480       MO_Ne rep -> condIntReg NE x y
1481
1482       MO_S_Gt rep -> condIntReg GTT x y
1483       MO_S_Ge rep -> condIntReg GE x y
1484       MO_S_Lt rep -> condIntReg LTT x y
1485       MO_S_Le rep -> condIntReg LE x y
1486
1487       MO_U_Gt I32  -> condIntReg GTT x y
1488       MO_U_Ge I32  -> condIntReg GE x y
1489       MO_U_Lt I32  -> condIntReg LTT x y
1490       MO_U_Le I32  -> condIntReg LE x y
1491
1492       MO_U_Gt I16 -> condIntReg GU  x y
1493       MO_U_Ge I16 -> condIntReg GEU x y
1494       MO_U_Lt I16 -> condIntReg LU  x y
1495       MO_U_Le I16 -> condIntReg LEU x y
1496
1497       MO_Add I32 -> trivialCode I32 (ADD False False) x y
1498       MO_Sub I32 -> trivialCode I32 (SUB False False) x y
1499
1500       MO_S_MulMayOflo rep -> imulMayOflo rep x y
1501 {-
1502       -- ToDo: teach about V8+ SPARC div instructions
1503       MO_S_Quot I32 -> idiv (fsLit ".div")  x y
1504       MO_S_Rem I32  -> idiv (fsLit ".rem")  x y
1505       MO_U_Quot I32 -> idiv (fsLit ".udiv")  x y
1506       MO_U_Rem I32  -> idiv (fsLit ".urem")  x y
1507 -}
1508       MO_Add F32  -> trivialFCode F32 FADD  x y
1509       MO_Sub F32   -> trivialFCode F32  FSUB x y
1510       MO_Mul F32   -> trivialFCode F32  FMUL  x y
1511       MO_S_Quot F32   -> trivialFCode F32  FDIV x y
1512
1513       MO_Add F64   -> trivialFCode F64 FADD  x y
1514       MO_Sub F64   -> trivialFCode F64  FSUB x y
1515       MO_Mul F64   -> trivialFCode F64  FMUL x y
1516       MO_S_Quot F64   -> trivialFCode F64  FDIV x y
1517
1518       MO_And rep   -> trivialCode rep (AND False) x y
1519       MO_Or rep    -> trivialCode rep (OR  False) x y
1520       MO_Xor rep   -> trivialCode rep (XOR False) x y
1521
1522       MO_Mul rep -> trivialCode rep (SMUL False) x y
1523
1524       MO_Shl rep   -> trivialCode rep SLL  x y
1525       MO_U_Shr rep   -> trivialCode rep SRL x y
1526       MO_S_Shr rep   -> trivialCode rep SRA x y
1527
1528 {-
1529       MO_F32_Pwr  -> getRegister (StCall (Left (fsLit "pow")) CCallConv F64
1530                                          [promote x, promote y])
1531                        where promote x = CmmMachOp MO_F32_to_Dbl [x]
1532       MO_F64_Pwr -> getRegister (StCall (Left (fsLit "pow")) CCallConv F64
1533                                         [x, y])
1534 -}
1535       other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
1536   where
1537     --idiv fn x y = getRegister (StCall (Left fn) CCallConv I32 [x, y])
1538
1539     --------------------
1540     imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1541     imulMayOflo rep a b = do
1542          (a_reg, a_code) <- getSomeReg a
1543          (b_reg, b_code) <- getSomeReg b
1544          res_lo <- getNewRegNat I32
1545          res_hi <- getNewRegNat I32
1546          let
1547             shift_amt  = case rep of
1548                           I32 -> 31
1549                           I64 -> 63
1550                           _ -> panic "shift_amt"
1551             code dst = a_code `appOL` b_code `appOL`
1552                        toOL [
1553                            SMUL False a_reg (RIReg b_reg) res_lo,
1554                            RDY res_hi,
1555                            SRA res_lo (RIImm (ImmInt shift_amt)) res_lo,
1556                            SUB False False res_lo (RIReg res_hi) dst
1557                         ]
1558          return (Any I32 code)
1559
1560 getRegister (CmmLoad mem pk) = do
1561     Amode src code <- getAmode mem
1562     let
1563         code__2 dst = code `snocOL` LD pk src dst
1564     return (Any pk code__2)
1565
1566 getRegister (CmmLit (CmmInt i _))
1567   | fits13Bits i
1568   = let
1569         src = ImmInt (fromInteger i)
1570         code dst = unitOL (OR False g0 (RIImm src) dst)
1571     in
1572         return (Any I32 code)
1573
1574 getRegister (CmmLit lit)
1575   = let rep = cmmLitRep lit
1576         imm = litToImm lit
1577         code dst = toOL [
1578             SETHI (HI imm) dst,
1579             OR False dst (RIImm (LO imm)) dst]
1580     in return (Any I32 code)
1581
1582 #endif /* sparc_TARGET_ARCH */
1583
1584 #if powerpc_TARGET_ARCH
1585 getRegister (CmmLoad mem pk)
1586   | pk /= I64
1587   = do
1588         Amode addr addr_code <- getAmode mem
1589         let code dst = ASSERT((regClass dst == RcDouble) == isFloatingRep pk)
1590                        addr_code `snocOL` LD pk dst addr
1591         return (Any pk code)
1592
1593 -- catch simple cases of zero- or sign-extended load
1594 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad mem _]) = do
1595     Amode addr addr_code <- getAmode mem
1596     return (Any I32 (\dst -> addr_code `snocOL` LD I8 dst addr))
1597
1598 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
1599
1600 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad mem _]) = do
1601     Amode addr addr_code <- getAmode mem
1602     return (Any I32 (\dst -> addr_code `snocOL` LD I16 dst addr))
1603
1604 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad mem _]) = do
1605     Amode addr addr_code <- getAmode mem
1606     return (Any I32 (\dst -> addr_code `snocOL` LA I16 dst addr))
1607
1608 getRegister (CmmMachOp mop [x]) -- unary MachOps
1609   = case mop of
1610       MO_Not rep   -> trivialUCode rep NOT x
1611
1612       MO_S_Conv F64 F32 -> trivialUCode F32 FRSP x
1613       MO_S_Conv F32 F64 -> conversionNop F64 x
1614
1615       MO_S_Conv from to
1616         | from == to         -> conversionNop to x
1617         | isFloatingRep from -> coerceFP2Int from to x
1618         | isFloatingRep to   -> coerceInt2FP from to x
1619
1620         -- narrowing is a nop: we treat the high bits as undefined
1621       MO_S_Conv I32 to -> conversionNop to x
1622       MO_S_Conv I16 I8 -> conversionNop I8 x
1623       MO_S_Conv I8 to -> trivialUCode to (EXTS I8) x
1624       MO_S_Conv I16 to -> trivialUCode to (EXTS I16) x
1625
1626       MO_U_Conv from to
1627         | from == to -> conversionNop to x
1628         -- narrowing is a nop: we treat the high bits as undefined
1629       MO_U_Conv I32 to -> conversionNop to x
1630       MO_U_Conv I16 I8 -> conversionNop I8 x
1631       MO_U_Conv I8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 I32))
1632       MO_U_Conv I16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 I32))
1633
1634       MO_S_Neg F32      -> trivialUCode F32 FNEG x
1635       MO_S_Neg F64      -> trivialUCode F64 FNEG x
1636       MO_S_Neg rep      -> trivialUCode rep NEG x
1637
1638     where
1639         conversionNop new_rep expr
1640             = do e_code <- getRegister expr
1641                  return (swizzleRegisterRep e_code new_rep)
1642
1643 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1644   = case mop of
1645       MO_Eq F32 -> condFltReg EQQ x y
1646       MO_Ne F32 -> condFltReg NE  x y
1647
1648       MO_S_Gt F32 -> condFltReg GTT x y
1649       MO_S_Ge F32 -> condFltReg GE  x y
1650       MO_S_Lt F32 -> condFltReg LTT x y
1651       MO_S_Le F32 -> condFltReg LE  x y
1652
1653       MO_Eq F64 -> condFltReg EQQ x y
1654       MO_Ne F64 -> condFltReg NE  x y
1655
1656       MO_S_Gt F64 -> condFltReg GTT x y
1657       MO_S_Ge F64 -> condFltReg GE  x y
1658       MO_S_Lt F64 -> condFltReg LTT x y
1659       MO_S_Le F64 -> condFltReg LE  x y
1660
1661       MO_Eq rep -> condIntReg EQQ  (extendUExpr rep x) (extendUExpr rep y)
1662       MO_Ne rep -> condIntReg NE   (extendUExpr rep x) (extendUExpr rep y)
1663
1664       MO_S_Gt rep -> condIntReg GTT  (extendSExpr rep x) (extendSExpr rep y)
1665       MO_S_Ge rep -> condIntReg GE   (extendSExpr rep x) (extendSExpr rep y)
1666       MO_S_Lt rep -> condIntReg LTT  (extendSExpr rep x) (extendSExpr rep y)
1667       MO_S_Le rep -> condIntReg LE   (extendSExpr rep x) (extendSExpr rep y)
1668
1669       MO_U_Gt rep -> condIntReg GU   (extendUExpr rep x) (extendUExpr rep y)
1670       MO_U_Ge rep -> condIntReg GEU  (extendUExpr rep x) (extendUExpr rep y)
1671       MO_U_Lt rep -> condIntReg LU   (extendUExpr rep x) (extendUExpr rep y)
1672       MO_U_Le rep -> condIntReg LEU  (extendUExpr rep x) (extendUExpr rep y)
1673
1674       MO_Add F32   -> trivialCodeNoImm F32 (FADD F32) x y
1675       MO_Sub F32   -> trivialCodeNoImm F32 (FSUB F32) x y
1676       MO_Mul F32   -> trivialCodeNoImm F32 (FMUL F32) x y
1677       MO_S_Quot F32   -> trivialCodeNoImm F32 (FDIV F32) x y
1678
1679       MO_Add F64   -> trivialCodeNoImm F64 (FADD F64) x y
1680       MO_Sub F64   -> trivialCodeNoImm F64 (FSUB F64) x y
1681       MO_Mul F64   -> trivialCodeNoImm F64 (FMUL F64) x y
1682       MO_S_Quot F64   -> trivialCodeNoImm F64 (FDIV F64) x y
1683
1684          -- optimize addition with 32-bit immediate
1685          -- (needed for PIC)
1686       MO_Add I32 ->
1687         case y of
1688           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate I32 True (-imm)
1689             -> trivialCode I32 True ADD x (CmmLit $ CmmInt imm immrep)
1690           CmmLit lit
1691             -> do
1692                 (src, srcCode) <- getSomeReg x
1693                 let imm = litToImm lit
1694                     code dst = srcCode `appOL` toOL [
1695                                     ADDIS dst src (HA imm),
1696                                     ADD dst dst (RIImm (LO imm))
1697                                 ]
1698                 return (Any I32 code)
1699           _ -> trivialCode I32 True ADD x y
1700
1701       MO_Add rep -> trivialCode rep True ADD x y
1702       MO_Sub rep ->
1703         case y of    -- subfi ('substract from' with immediate) doesn't exist
1704           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
1705             -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
1706           _ -> trivialCodeNoImm rep SUBF y x
1707
1708       MO_Mul rep -> trivialCode rep True MULLW x y
1709
1710       MO_S_MulMayOflo I32 -> trivialCodeNoImm I32 MULLW_MayOflo x y
1711
1712       MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= I32): not implemented"
1713       MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
1714
1715       MO_S_Quot rep -> trivialCodeNoImm rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1716       MO_U_Quot rep -> trivialCodeNoImm rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1717
1718       MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1719       MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1720
1721       MO_And rep   -> trivialCode rep False AND x y
1722       MO_Or rep    -> trivialCode rep False OR x y
1723       MO_Xor rep   -> trivialCode rep False XOR x y
1724
1725       MO_Shl rep   -> trivialCode rep False SLW x y
1726       MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
1727       MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
1728
1729 getRegister (CmmLit (CmmInt i rep))
1730   | Just imm <- makeImmediate rep True i
1731   = let
1732         code dst = unitOL (LI dst imm)
1733     in
1734         return (Any rep code)
1735
1736 getRegister (CmmLit (CmmFloat f frep)) = do
1737     lbl <- getNewLabelNat
1738     dflags <- getDynFlagsNat
1739     dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
1740     Amode addr addr_code <- getAmode dynRef
1741     let code dst =
1742             LDATA ReadOnlyData  [CmmDataLabel lbl,
1743                                  CmmStaticLit (CmmFloat f frep)]
1744             `consOL` (addr_code `snocOL` LD frep dst addr)
1745     return (Any frep code)
1746
1747 getRegister (CmmLit lit)
1748   = let rep = cmmLitRep lit
1749         imm = litToImm lit
1750         code dst = toOL [
1751               LIS dst (HA imm),
1752               ADD dst dst (RIImm (LO imm))
1753           ]
1754     in return (Any rep code)
1755
1756 getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
1757
1758     -- extend?Rep: wrap integer expression of type rep
1759     -- in a conversion to I32
1760 extendSExpr I32 x = x
1761 extendSExpr rep x = CmmMachOp (MO_S_Conv rep I32) [x]
1762 extendUExpr I32 x = x
1763 extendUExpr rep x = CmmMachOp (MO_U_Conv rep I32) [x]
1764
1765 #endif /* powerpc_TARGET_ARCH */
1766
1767
1768 -- -----------------------------------------------------------------------------
1769 --  The 'Amode' type: Memory addressing modes passed up the tree.
1770
1771 data Amode = Amode AddrMode InstrBlock
1772
1773 {-
1774 Now, given a tree (the argument to an CmmLoad) that references memory,
1775 produce a suitable addressing mode.
1776
1777 A Rule of the Game (tm) for Amodes: use of the addr bit must
1778 immediately follow use of the code part, since the code part puts
1779 values in registers which the addr then refers to.  So you can't put
1780 anything in between, lest it overwrite some of those registers.  If
1781 you need to do some other computation between the code part and use of
1782 the addr bit, first store the effective address from the amode in a
1783 temporary, then do the other computation, and then use the temporary:
1784
1785     code
1786     LEA amode, tmp
1787     ... other computation ...
1788     ... (tmp) ...
1789 -}
1790
1791 getAmode :: CmmExpr -> NatM Amode
1792 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
1793
1794 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1795
1796 #if alpha_TARGET_ARCH
1797
1798 getAmode (StPrim IntSubOp [x, StInt i])
1799   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1800     getRegister x               `thenNat` \ register ->
1801     let
1802         code = registerCode register tmp
1803         reg  = registerName register tmp
1804         off  = ImmInt (-(fromInteger i))
1805     in
1806     return (Amode (AddrRegImm reg off) code)
1807
1808 getAmode (StPrim IntAddOp [x, StInt i])
1809   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1810     getRegister x               `thenNat` \ register ->
1811     let
1812         code = registerCode register tmp
1813         reg  = registerName register tmp
1814         off  = ImmInt (fromInteger i)
1815     in
1816     return (Amode (AddrRegImm reg off) code)
1817
1818 getAmode leaf
1819   | isJust imm
1820   = return (Amode (AddrImm imm__2) id)
1821   where
1822     imm = maybeImm leaf
1823     imm__2 = case imm of Just x -> x
1824
1825 getAmode other
1826   = getNewRegNat PtrRep         `thenNat` \ tmp ->
1827     getRegister other           `thenNat` \ register ->
1828     let
1829         code = registerCode register tmp
1830         reg  = registerName register tmp
1831     in
1832     return (Amode (AddrReg reg) code)
1833
1834 #endif /* alpha_TARGET_ARCH */
1835
1836 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1837
1838 #if x86_64_TARGET_ARCH
1839
1840 getAmode (CmmMachOp (MO_Add I64) [CmmReg (CmmGlobal PicBaseReg),
1841                                      CmmLit displacement])
1842     = return $ Amode (ripRel (litToImm displacement)) nilOL
1843
1844 #endif
1845
1846 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1847
1848 -- This is all just ridiculous, since it carefully undoes
1849 -- what mangleIndexTree has just done.
1850 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
1851   | not (is64BitLit lit)
1852   -- ASSERT(rep == I32)???
1853   = do (x_reg, x_code) <- getSomeReg x
1854        let off = ImmInt (-(fromInteger i))
1855        return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1856
1857 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
1858   | not (is64BitLit lit)
1859   -- ASSERT(rep == I32)???
1860   = do (x_reg, x_code) <- getSomeReg x
1861        let off = ImmInt (fromInteger i)
1862        return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1863
1864 -- Turn (lit1 << n  + lit2) into  (lit2 + lit1 << n) so it will be
1865 -- recognised by the next rule.
1866 getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1867                                   b@(CmmLit _)])
1868   = getAmode (CmmMachOp (MO_Add rep) [b,a])
1869
1870 getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
1871                                         [y, CmmLit (CmmInt shift _)]])
1872   | shift == 0 || shift == 1 || shift == 2 || shift == 3
1873   = x86_complex_amode x y shift 0
1874
1875 getAmode (CmmMachOp (MO_Add rep)
1876                 [x, CmmMachOp (MO_Add _)
1877                         [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)],
1878                          CmmLit (CmmInt offset _)]])
1879   | shift == 0 || shift == 1 || shift == 2 || shift == 3
1880   && not (is64BitInteger offset)
1881   = x86_complex_amode x y shift offset
1882
1883 getAmode (CmmMachOp (MO_Add rep) [x,y])
1884   = x86_complex_amode x y 0 0
1885
1886 getAmode (CmmLit lit) | not (is64BitLit lit)
1887   = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1888
1889 getAmode expr = do
1890   (reg,code) <- getSomeReg expr
1891   return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
1892
1893
1894 x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
1895 x86_complex_amode base index shift offset
1896   = do (x_reg, x_code) <- getNonClobberedReg base
1897         -- x must be in a temp, because it has to stay live over y_code
1898         -- we could compre x_reg and y_reg and do something better here...
1899        (y_reg, y_code) <- getSomeReg index
1900        let
1901            code = x_code `appOL` y_code
1902            base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
1903        return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset)))
1904                code)
1905
1906 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1907
1908 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1909
1910 #if sparc_TARGET_ARCH
1911
1912 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)])
1913   | fits13Bits (-i)
1914   = do
1915        (reg, code) <- getSomeReg x
1916        let
1917          off  = ImmInt (-(fromInteger i))
1918        return (Amode (AddrRegImm reg off) code)
1919
1920
1921 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)])
1922   | fits13Bits i
1923   = do
1924        (reg, code) <- getSomeReg x
1925        let
1926          off  = ImmInt (fromInteger i)
1927        return (Amode (AddrRegImm reg off) code)
1928
1929 getAmode (CmmMachOp (MO_Add rep) [x, y])
1930   = do
1931     (regX, codeX) <- getSomeReg x
1932     (regY, codeY) <- getSomeReg y
1933     let
1934         code = codeX `appOL` codeY
1935     return (Amode (AddrRegReg regX regY) code)
1936
1937 -- XXX Is this same as "leaf" in Stix?
1938 getAmode (CmmLit lit)
1939   = do
1940       tmp <- getNewRegNat I32
1941       let
1942         code = unitOL (SETHI (HI imm__2) tmp)
1943       return (Amode (AddrRegImm tmp (LO imm__2)) code)
1944       where
1945          imm__2 = litToImm lit
1946
1947 getAmode other
1948   = do
1949        (reg, code) <- getSomeReg other
1950        let
1951             off  = ImmInt 0
1952        return (Amode (AddrRegImm reg off) code)
1953
1954 #endif /* sparc_TARGET_ARCH */
1955
1956 #ifdef powerpc_TARGET_ARCH
1957 getAmode (CmmMachOp (MO_Sub I32) [x, CmmLit (CmmInt i _)])
1958   | Just off <- makeImmediate I32 True (-i)
1959   = do
1960         (reg, code) <- getSomeReg x
1961         return (Amode (AddrRegImm reg off) code)
1962
1963
1964 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit (CmmInt i _)])
1965   | Just off <- makeImmediate I32 True i
1966   = do
1967         (reg, code) <- getSomeReg x
1968         return (Amode (AddrRegImm reg off) code)
1969
1970    -- optimize addition with 32-bit immediate
1971    -- (needed for PIC)
1972 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit lit])
1973   = do
1974         tmp <- getNewRegNat I32
1975         (src, srcCode) <- getSomeReg x
1976         let imm = litToImm lit
1977             code = srcCode `snocOL` ADDIS tmp src (HA imm)
1978         return (Amode (AddrRegImm tmp (LO imm)) code)
1979
1980 getAmode (CmmLit lit)
1981   = do
1982         tmp <- getNewRegNat I32
1983         let imm = litToImm lit
1984             code = unitOL (LIS tmp (HA imm))
1985         return (Amode (AddrRegImm tmp (LO imm)) code)
1986
1987 getAmode (CmmMachOp (MO_Add I32) [x, y])
1988   = do
1989         (regX, codeX) <- getSomeReg x
1990         (regY, codeY) <- getSomeReg y
1991         return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
1992
1993 getAmode other
1994   = do
1995         (reg, code) <- getSomeReg other
1996         let
1997             off  = ImmInt 0
1998         return (Amode (AddrRegImm reg off) code)
1999 #endif /* powerpc_TARGET_ARCH */
2000
2001 -- -----------------------------------------------------------------------------
2002 -- getOperand: sometimes any operand will do.
2003
2004 -- getNonClobberedOperand: the value of the operand will remain valid across
2005 -- the computation of an arbitrary expression, unless the expression
2006 -- is computed directly into a register which the operand refers to
2007 -- (see trivialCode where this function is used for an example).
2008
2009 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2010
2011 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2012 #if x86_64_TARGET_ARCH
2013 getNonClobberedOperand (CmmLit lit)
2014   | isSuitableFloatingPointLit lit = do
2015     lbl <- getNewLabelNat
2016     let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
2017                                            CmmStaticLit lit])
2018     return (OpAddr (ripRel (ImmCLbl lbl)), code)
2019 #endif
2020 getNonClobberedOperand (CmmLit lit)
2021   | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) =
2022     return (OpImm (litToImm lit), nilOL)
2023 getNonClobberedOperand (CmmLoad mem pk)
2024   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2025     Amode src mem_code <- getAmode mem
2026     (src',save_code) <-
2027         if (amodeCouldBeClobbered src)
2028                 then do
2029                    tmp <- getNewRegNat wordRep
2030                    return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
2031                            unitOL (LEA I32 (OpAddr src) (OpReg tmp)))
2032                 else
2033                    return (src, nilOL)
2034     return (OpAddr src', save_code `appOL` mem_code)
2035 getNonClobberedOperand e = do
2036     (reg, code) <- getNonClobberedReg e
2037     return (OpReg reg, code)
2038
2039 amodeCouldBeClobbered :: AddrMode -> Bool
2040 amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
2041
2042 regClobbered (RealReg rr) = isFastTrue (freeReg rr)
2043 regClobbered _ = False
2044
2045 -- getOperand: the operand is not required to remain valid across the
2046 -- computation of an arbitrary expression.
2047 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2048 #if x86_64_TARGET_ARCH
2049 getOperand (CmmLit lit)
2050   | isSuitableFloatingPointLit lit = do
2051     lbl <- getNewLabelNat
2052     let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
2053                                            CmmStaticLit lit])
2054     return (OpAddr (ripRel (ImmCLbl lbl)), code)
2055 #endif
2056 getOperand (CmmLit lit)
2057   | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) = do
2058     return (OpImm (litToImm lit), nilOL)
2059 getOperand (CmmLoad mem pk)
2060   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2061     Amode src mem_code <- getAmode mem
2062     return (OpAddr src, mem_code)
2063 getOperand e = do
2064     (reg, code) <- getSomeReg e
2065     return (OpReg reg, code)
2066
2067 isOperand :: CmmExpr -> Bool
2068 isOperand (CmmLoad _ _) = True
2069 isOperand (CmmLit lit)  = not (is64BitLit lit)
2070                           || isSuitableFloatingPointLit lit
2071 isOperand _             = False
2072
2073 -- if we want a floating-point literal as an operand, we can
2074 -- use it directly from memory.  However, if the literal is
2075 -- zero, we're better off generating it into a register using
2076 -- xor.
2077 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
2078 isSuitableFloatingPointLit _ = False
2079
2080 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
2081 getRegOrMem (CmmLoad mem pk)
2082   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2083     Amode src mem_code <- getAmode mem
2084     return (OpAddr src, mem_code)
2085 getRegOrMem e = do
2086     (reg, code) <- getNonClobberedReg e
2087     return (OpReg reg, code)
2088
2089 #if x86_64_TARGET_ARCH
2090 is64BitLit (CmmInt i I64) = is64BitInteger i
2091    -- assume that labels are in the range 0-2^31-1: this assumes the
2092    -- small memory model (see gcc docs, -mcmodel=small).
2093 #endif
2094 is64BitLit x = False
2095 #endif
2096
2097 is64BitInteger :: Integer -> Bool
2098 is64BitInteger i = i64 > 0x7fffffff || i64 < -0x80000000
2099   where i64 = fromIntegral i :: Int64
2100   -- a CmmInt is intended to be truncated to the appropriate
2101   -- number of bits, so here we truncate it to Int64.  This is
2102   -- important because e.g. -1 as a CmmInt might be either
2103   -- -1 or 18446744073709551615.
2104
2105 -- -----------------------------------------------------------------------------
2106 --  The 'CondCode' type:  Condition codes passed up the tree.
2107
2108 data CondCode = CondCode Bool Cond InstrBlock
2109
2110 -- Set up a condition code for a conditional branch.
2111
2112 getCondCode :: CmmExpr -> NatM CondCode
2113
2114 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2115
2116 #if alpha_TARGET_ARCH
2117 getCondCode = panic "MachCode.getCondCode: not on Alphas"
2118 #endif /* alpha_TARGET_ARCH */
2119
2120 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2121
2122 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
2123 -- yes, they really do seem to want exactly the same!
2124
2125 getCondCode (CmmMachOp mop [x, y])
2126   =
2127     case mop of
2128       MO_Eq F32 -> condFltCode EQQ x y
2129       MO_Ne F32 -> condFltCode NE  x y
2130
2131       MO_S_Gt F32 -> condFltCode GTT x y
2132       MO_S_Ge F32 -> condFltCode GE  x y
2133       MO_S_Lt F32 -> condFltCode LTT x y
2134       MO_S_Le F32 -> condFltCode LE  x y
2135
2136       MO_Eq F64 -> condFltCode EQQ x y
2137       MO_Ne F64 -> condFltCode NE  x y
2138
2139       MO_S_Gt F64 -> condFltCode GTT x y
2140       MO_S_Ge F64 -> condFltCode GE  x y
2141       MO_S_Lt F64 -> condFltCode LTT x y
2142       MO_S_Le F64 -> condFltCode LE  x y
2143
2144       MO_Eq rep -> condIntCode EQQ  x y
2145       MO_Ne rep -> condIntCode NE   x y
2146
2147       MO_S_Gt rep -> condIntCode GTT  x y
2148       MO_S_Ge rep -> condIntCode GE   x y
2149       MO_S_Lt rep -> condIntCode LTT  x y
2150       MO_S_Le rep -> condIntCode LE   x y
2151
2152       MO_U_Gt rep -> condIntCode GU   x y
2153       MO_U_Ge rep -> condIntCode GEU  x y
2154       MO_U_Lt rep -> condIntCode LU   x y
2155       MO_U_Le rep -> condIntCode LEU  x y
2156
2157       other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y]))
2158
2159 getCondCode other =  pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
2160
2161 #elif powerpc_TARGET_ARCH
2162
2163 -- almost the same as everywhere else - but we need to
2164 -- extend small integers to 32 bit first
2165
2166 getCondCode (CmmMachOp mop [x, y])
2167   = case mop of
2168       MO_Eq F32 -> condFltCode EQQ x y
2169       MO_Ne F32 -> condFltCode NE  x y
2170
2171       MO_S_Gt F32 -> condFltCode GTT x y
2172       MO_S_Ge F32 -> condFltCode GE  x y
2173       MO_S_Lt F32 -> condFltCode LTT x y
2174       MO_S_Le F32 -> condFltCode LE  x y
2175
2176       MO_Eq F64 -> condFltCode EQQ x y
2177       MO_Ne F64 -> condFltCode NE  x y
2178
2179       MO_S_Gt F64 -> condFltCode GTT x y
2180       MO_S_Ge F64 -> condFltCode GE  x y
2181       MO_S_Lt F64 -> condFltCode LTT x y
2182       MO_S_Le F64 -> condFltCode LE  x y
2183
2184       MO_Eq rep -> condIntCode EQQ  (extendUExpr rep x) (extendUExpr rep y)
2185       MO_Ne rep -> condIntCode NE   (extendUExpr rep x) (extendUExpr rep y)
2186
2187       MO_S_Gt rep -> condIntCode GTT  (extendSExpr rep x) (extendSExpr rep y)
2188       MO_S_Ge rep -> condIntCode GE   (extendSExpr rep x) (extendSExpr rep y)
2189       MO_S_Lt rep -> condIntCode LTT  (extendSExpr rep x) (extendSExpr rep y)
2190       MO_S_Le rep -> condIntCode LE   (extendSExpr rep x) (extendSExpr rep y)
2191
2192       MO_U_Gt rep -> condIntCode GU   (extendUExpr rep x) (extendUExpr rep y)
2193       MO_U_Ge rep -> condIntCode GEU  (extendUExpr rep x) (extendUExpr rep y)
2194       MO_U_Lt rep -> condIntCode LU   (extendUExpr rep x) (extendUExpr rep y)
2195       MO_U_Le rep -> condIntCode LEU  (extendUExpr rep x) (extendUExpr rep y)
2196
2197       other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
2198
2199 getCondCode other =  panic "getCondCode(2)(powerpc)"
2200
2201
2202 #endif
2203
2204
2205 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
2206 -- passed back up the tree.
2207
2208 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
2209
2210 #if alpha_TARGET_ARCH
2211 condIntCode = panic "MachCode.condIntCode: not on Alphas"
2212 condFltCode = panic "MachCode.condFltCode: not on Alphas"
2213 #endif /* alpha_TARGET_ARCH */
2214
2215 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2216 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2217
2218 -- memory vs immediate
2219 condIntCode cond (CmmLoad x pk) (CmmLit lit) | not (is64BitLit lit) = do
2220     Amode x_addr x_code <- getAmode x
2221     let
2222         imm  = litToImm lit
2223         code = x_code `snocOL`
2224                   CMP pk (OpImm imm) (OpAddr x_addr)
2225     --
2226     return (CondCode False cond code)
2227
2228 -- anything vs zero, using a mask
2229 -- TODO: Add some sanity checking!!!!
2230 condIntCode cond (CmmMachOp (MO_And rep) [x,o2]) (CmmLit (CmmInt 0 pk))
2231     | (CmmLit (CmmInt mask pk2)) <- o2
2232     = do
2233       (x_reg, x_code) <- getSomeReg x
2234       let
2235          code = x_code `snocOL`
2236                 TEST pk (OpImm (ImmInteger mask)) (OpReg x_reg)
2237       --
2238       return (CondCode False cond code)
2239
2240 -- anything vs zero
2241 condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
2242     (x_reg, x_code) <- getSomeReg x
2243     let
2244         code = x_code `snocOL`
2245                   TEST pk (OpReg x_reg) (OpReg x_reg)
2246     --
2247     return (CondCode False cond code)
2248
2249 -- anything vs operand
2250 condIntCode cond x y | isOperand y = do
2251     (x_reg, x_code) <- getNonClobberedReg x
2252     (y_op,  y_code) <- getOperand y
2253     let
2254         code = x_code `appOL` y_code `snocOL`
2255                   CMP (cmmExprRep x) y_op (OpReg x_reg)
2256     -- in
2257     return (CondCode False cond code)
2258
2259 -- anything vs anything
2260 condIntCode cond x y = do
2261   (y_reg, y_code) <- getNonClobberedReg y
2262   (x_op, x_code) <- getRegOrMem x
2263   let
2264         code = y_code `appOL`
2265                x_code `snocOL`
2266                   CMP (cmmExprRep x) (OpReg y_reg) x_op
2267   -- in
2268   return (CondCode False cond code)
2269 #endif
2270
2271 #if i386_TARGET_ARCH
2272 condFltCode cond x y
2273   = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
2274   (x_reg, x_code) <- getNonClobberedReg x
2275   (y_reg, y_code) <- getSomeReg y
2276   let
2277         code = x_code `appOL` y_code `snocOL`
2278                 GCMP cond x_reg y_reg
2279   -- The GCMP insn does the test and sets the zero flag if comparable
2280   -- and true.  Hence we always supply EQQ as the condition to test.
2281   return (CondCode True EQQ code)
2282 #endif /* i386_TARGET_ARCH */
2283
2284 #if x86_64_TARGET_ARCH
2285 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
2286 -- an operand, but the right must be a reg.  We can probably do better
2287 -- than this general case...
2288 condFltCode cond x y = do
2289   (x_reg, x_code) <- getNonClobberedReg x
2290   (y_op, y_code) <- getOperand y
2291   let
2292         code = x_code `appOL`
2293                y_code `snocOL`
2294                   CMP (cmmExprRep x) y_op (OpReg x_reg)
2295         -- NB(1): we need to use the unsigned comparison operators on the
2296         -- result of this comparison.
2297   -- in
2298   return (CondCode True (condToUnsigned cond) code)
2299 #endif
2300
2301 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2302
2303 #if sparc_TARGET_ARCH
2304
2305 condIntCode cond x (CmmLit (CmmInt y rep))
2306   | fits13Bits y
2307   = do
2308        (src1, code) <- getSomeReg x
2309        let
2310            src2 = ImmInt (fromInteger y)
2311            code' = code `snocOL` SUB False True src1 (RIImm src2) g0
2312        return (CondCode False cond code')
2313
2314 condIntCode cond x y = do
2315     (src1, code1) <- getSomeReg x
2316     (src2, code2) <- getSomeReg y
2317     let
2318         code__2 = code1 `appOL` code2 `snocOL`
2319                   SUB False True src1 (RIReg src2) g0
2320     return (CondCode False cond code__2)
2321
2322 -----------
2323 condFltCode cond x y = do
2324     (src1, code1) <- getSomeReg x
2325     (src2, code2) <- getSomeReg y
2326     tmp <- getNewRegNat F64
2327     let
2328         promote x = FxTOy F32 F64 x tmp
2329
2330         pk1   = cmmExprRep x
2331         pk2   = cmmExprRep y
2332
2333         code__2 =
2334                 if pk1 == pk2 then
2335                     code1 `appOL` code2 `snocOL`
2336                     FCMP True pk1 src1 src2
2337                 else if pk1 == F32 then
2338                     code1 `snocOL` promote src1 `appOL` code2 `snocOL`
2339                     FCMP True F64 tmp src2
2340                 else
2341                     code1 `appOL` code2 `snocOL` promote src2 `snocOL`
2342                     FCMP True F64 src1 tmp
2343     return (CondCode True cond code__2)
2344
2345 #endif /* sparc_TARGET_ARCH */
2346
2347 #if powerpc_TARGET_ARCH
2348 --  ###FIXME: I16 and I8!
2349 condIntCode cond x (CmmLit (CmmInt y rep))
2350   | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
2351   = do
2352         (src1, code) <- getSomeReg x
2353         let
2354             code' = code `snocOL`
2355                 (if condUnsigned cond then CMPL else CMP) I32 src1 (RIImm src2)
2356         return (CondCode False cond code')
2357
2358 condIntCode cond x y = do
2359     (src1, code1) <- getSomeReg x
2360     (src2, code2) <- getSomeReg y
2361     let
2362         code' = code1 `appOL` code2 `snocOL`
2363                   (if condUnsigned cond then CMPL else CMP) I32 src1 (RIReg src2)
2364     return (CondCode False cond code')
2365
2366 condFltCode cond x y = do
2367     (src1, code1) <- getSomeReg x
2368     (src2, code2) <- getSomeReg y
2369     let
2370         code'  = code1 `appOL` code2 `snocOL` FCMP src1 src2
2371         code'' = case cond of -- twiddle CR to handle unordered case
2372                     GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
2373                     LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
2374                     _ -> code'
2375                  where
2376                     ltbit = 0 ; eqbit = 2 ; gtbit = 1
2377     return (CondCode True cond code'')
2378
2379 #endif /* powerpc_TARGET_ARCH */
2380
2381 -- -----------------------------------------------------------------------------
2382 -- Generating assignments
2383
2384 -- Assignments are really at the heart of the whole code generation
2385 -- business.  Almost all top-level nodes of any real importance are
2386 -- assignments, which correspond to loads, stores, or register
2387 -- transfers.  If we're really lucky, some of the register transfers
2388 -- will go away, because we can use the destination register to
2389 -- complete the code generation for the right hand side.  This only
2390 -- fails when the right hand side is forced into a fixed register
2391 -- (e.g. the result of a call).
2392
2393 assignMem_IntCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2394 assignReg_IntCode :: MachRep -> CmmReg  -> CmmExpr -> NatM InstrBlock
2395
2396 assignMem_FltCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2397 assignReg_FltCode :: MachRep -> CmmReg  -> CmmExpr -> NatM InstrBlock
2398
2399 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2400
2401 #if alpha_TARGET_ARCH
2402
2403 assignIntCode pk (CmmLoad dst _) src
2404   = getNewRegNat IntRep             `thenNat` \ tmp ->
2405     getAmode dst                    `thenNat` \ amode ->
2406     getRegister src                 `thenNat` \ register ->
2407     let
2408         code1   = amodeCode amode []
2409         dst__2  = amodeAddr amode
2410         code2   = registerCode register tmp []
2411         src__2  = registerName register tmp
2412         sz      = primRepToSize pk
2413         code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2414     in
2415     return code__2
2416
2417 assignIntCode pk dst src
2418   = getRegister dst                         `thenNat` \ register1 ->
2419     getRegister src                         `thenNat` \ register2 ->
2420     let
2421         dst__2  = registerName register1 zeroh
2422         code    = registerCode register2 dst__2
2423         src__2  = registerName register2 dst__2
2424         code__2 = if isFixed register2
2425                   then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2)
2426                   else code
2427     in
2428     return code__2
2429
2430 #endif /* alpha_TARGET_ARCH */
2431
2432 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2433
2434 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2435
2436 -- integer assignment to memory
2437
2438 -- specific case of adding/subtracting an integer to a particular address.
2439 -- ToDo: catch other cases where we can use an operation directly on a memory
2440 -- address.
2441 assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _,
2442                                                  CmmLit (CmmInt i _)])
2443    | addr == addr2, pk /= I64 || not (is64BitInteger i),
2444      Just instr <- check op
2445    = do Amode amode code_addr <- getAmode addr
2446         let code = code_addr `snocOL`
2447                    instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode)
2448         return code
2449    where
2450         check (MO_Add _) = Just ADD
2451         check (MO_Sub _) = Just SUB
2452         check _ = Nothing
2453         -- ToDo: more?
2454
2455 -- general case
2456 assignMem_IntCode pk addr src = do
2457     Amode addr code_addr <- getAmode addr
2458     (code_src, op_src)   <- get_op_RI src
2459     let
2460         code = code_src `appOL`
2461                code_addr `snocOL`
2462                   MOV pk op_src (OpAddr addr)
2463         -- NOTE: op_src is stable, so it will still be valid
2464         -- after code_addr.  This may involve the introduction
2465         -- of an extra MOV to a temporary register, but we hope
2466         -- the register allocator will get rid of it.
2467     --
2468     return code
2469   where
2470     get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand)   -- code, operator
2471     get_op_RI (CmmLit lit) | not (is64BitLit lit)
2472       = return (nilOL, OpImm (litToImm lit))
2473     get_op_RI op
2474       = do (reg,code) <- getNonClobberedReg op
2475            return (code, OpReg reg)
2476
2477
2478 -- Assign; dst is a reg, rhs is mem
2479 assignReg_IntCode pk reg (CmmLoad src _) = do
2480   load_code <- intLoadCode (MOV pk) src
2481   return (load_code (getRegisterReg reg))
2482
2483 -- dst is a reg, but src could be anything
2484 assignReg_IntCode pk reg src = do
2485   code <- getAnyReg src
2486   return (code (getRegisterReg reg))
2487
2488 #endif /* i386_TARGET_ARCH */
2489
2490 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2491
2492 #if sparc_TARGET_ARCH
2493
2494 assignMem_IntCode pk addr src = do
2495     (srcReg, code) <- getSomeReg src
2496     Amode dstAddr addr_code <- getAmode addr
2497     return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2498
2499 assignReg_IntCode pk reg src = do
2500     r <- getRegister src
2501     return $ case r of
2502         Any _ code         -> code dst
2503         Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg dst) freg
2504     where
2505       dst = getRegisterReg reg
2506
2507
2508 #endif /* sparc_TARGET_ARCH */
2509
2510 #if powerpc_TARGET_ARCH
2511
2512 assignMem_IntCode pk addr src = do
2513     (srcReg, code) <- getSomeReg src
2514     Amode dstAddr addr_code <- getAmode addr
2515     return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2516
2517 -- dst is a reg, but src could be anything
2518 assignReg_IntCode pk reg src
2519     = do
2520         r <- getRegister src
2521         return $ case r of
2522             Any _ code         -> code dst
2523             Fixed _ freg fcode -> fcode `snocOL` MR dst freg
2524     where
2525         dst = getRegisterReg reg
2526
2527 #endif /* powerpc_TARGET_ARCH */
2528
2529
2530 -- -----------------------------------------------------------------------------
2531 -- Floating-point assignments
2532
2533 #if alpha_TARGET_ARCH
2534
2535 assignFltCode pk (CmmLoad dst _) src
2536   = getNewRegNat pk                 `thenNat` \ tmp ->
2537     getAmode dst                    `thenNat` \ amode ->
2538     getRegister src                         `thenNat` \ register ->
2539     let
2540         code1   = amodeCode amode []
2541         dst__2  = amodeAddr amode
2542         code2   = registerCode register tmp []
2543         src__2  = registerName register tmp
2544         sz      = primRepToSize pk
2545         code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2546     in
2547     return code__2
2548
2549 assignFltCode pk dst src
2550   = getRegister dst                         `thenNat` \ register1 ->
2551     getRegister src                         `thenNat` \ register2 ->
2552     let
2553         dst__2  = registerName register1 zeroh
2554         code    = registerCode register2 dst__2
2555         src__2  = registerName register2 dst__2
2556         code__2 = if isFixed register2
2557                   then code . mkSeqInstr (FMOV src__2 dst__2)
2558                   else code
2559     in
2560     return code__2
2561
2562 #endif /* alpha_TARGET_ARCH */
2563
2564 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2565
2566 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2567
2568 -- Floating point assignment to memory
2569 assignMem_FltCode pk addr src = do
2570   (src_reg, src_code) <- getNonClobberedReg src
2571   Amode addr addr_code <- getAmode addr
2572   let
2573         code = src_code `appOL`
2574                addr_code `snocOL`
2575                 IF_ARCH_i386(GST pk src_reg addr,
2576                              MOV pk (OpReg src_reg) (OpAddr addr))
2577   return code
2578
2579 -- Floating point assignment to a register/temporary
2580 assignReg_FltCode pk reg src = do
2581   src_code <- getAnyReg src
2582   return (src_code (getRegisterReg reg))
2583
2584 #endif /* i386_TARGET_ARCH */
2585
2586 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2587
2588 #if sparc_TARGET_ARCH
2589
2590 -- Floating point assignment to memory
2591 assignMem_FltCode pk addr src = do
2592     Amode dst__2 code1 <- getAmode addr
2593     (src__2, code2) <- getSomeReg src
2594     tmp1 <- getNewRegNat pk
2595     let
2596         pk__2   = cmmExprRep src
2597         code__2 = code1 `appOL` code2 `appOL`
2598             if   pk == pk__2
2599             then unitOL (ST pk src__2 dst__2)
2600             else toOL [FxTOy pk__2 pk src__2 tmp1, ST pk tmp1 dst__2]
2601     return code__2
2602
2603 -- Floating point assignment to a register/temporary
2604 -- ToDo: Verify correctness
2605 assignReg_FltCode pk reg src = do
2606     r <- getRegister src
2607     v1 <- getNewRegNat pk
2608     return $ case r of
2609         Any _ code         -> code dst
2610         Fixed _ freg fcode -> fcode `snocOL` FMOV pk freg v1
2611     where
2612       dst = getRegisterReg reg
2613
2614 #endif /* sparc_TARGET_ARCH */
2615
2616 #if powerpc_TARGET_ARCH
2617
2618 -- Easy, isn't it?
2619 assignMem_FltCode = assignMem_IntCode
2620 assignReg_FltCode = assignReg_IntCode
2621
2622 #endif /* powerpc_TARGET_ARCH */
2623
2624
2625 -- -----------------------------------------------------------------------------
2626 -- Generating an non-local jump
2627
2628 -- (If applicable) Do not fill the delay slots here; you will confuse the
2629 -- register allocator.
2630
2631 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
2632
2633 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2634
2635 #if alpha_TARGET_ARCH
2636
2637 genJump (CmmLabel lbl)
2638   | isAsmTemp lbl = returnInstr (BR target)
2639   | otherwise     = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0]
2640   where
2641     target = ImmCLbl lbl
2642
2643 genJump tree
2644   = getRegister tree                `thenNat` \ register ->
2645     getNewRegNat PtrRep             `thenNat` \ tmp ->
2646     let
2647         dst    = registerName register pv
2648         code   = registerCode register pv
2649         target = registerName register pv
2650     in
2651     if isFixed register then
2652         returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0]
2653     else
2654     return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0))
2655
2656 #endif /* alpha_TARGET_ARCH */
2657
2658 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2659
2660 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2661
2662 genJump (CmmLoad mem pk) = do
2663   Amode target code <- getAmode mem
2664   return (code `snocOL` JMP (OpAddr target))
2665
2666 genJump (CmmLit lit) = do
2667   return (unitOL (JMP (OpImm (litToImm lit))))
2668
2669 genJump expr = do
2670   (reg,code) <- getSomeReg expr
2671   return (code `snocOL` JMP (OpReg reg))
2672
2673 #endif /* i386_TARGET_ARCH */
2674
2675 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2676
2677 #if sparc_TARGET_ARCH
2678
2679 genJump (CmmLit (CmmLabel lbl))
2680   = return (toOL [CALL (Left target) 0 True, NOP])
2681   where
2682     target = ImmCLbl lbl
2683
2684 genJump tree
2685   = do
2686         (target, code) <- getSomeReg tree
2687         return (code `snocOL` JMP (AddrRegReg target g0)  `snocOL` NOP)
2688
2689 #endif /* sparc_TARGET_ARCH */
2690
2691 #if powerpc_TARGET_ARCH
2692 genJump (CmmLit (CmmLabel lbl))
2693   = return (unitOL $ JMP lbl)
2694
2695 genJump tree
2696   = do
2697         (target,code) <- getSomeReg tree
2698         return (code `snocOL` MTCTR target `snocOL` BCTR [])
2699 #endif /* powerpc_TARGET_ARCH */
2700
2701
2702 -- -----------------------------------------------------------------------------
2703 --  Unconditional branches
2704
2705 genBranch :: BlockId -> NatM InstrBlock
2706
2707 genBranch = return . toOL . mkBranchInstr
2708
2709 -- -----------------------------------------------------------------------------
2710 --  Conditional jumps
2711
2712 {-
2713 Conditional jumps are always to local labels, so we can use branch
2714 instructions.  We peek at the arguments to decide what kind of
2715 comparison to do.
2716
2717 ALPHA: For comparisons with 0, we're laughing, because we can just do
2718 the desired conditional branch.
2719
2720 I386: First, we have to ensure that the condition
2721 codes are set according to the supplied comparison operation.
2722
2723 SPARC: First, we have to ensure that the condition codes are set
2724 according to the supplied comparison operation.  We generate slightly
2725 different code for floating point comparisons, because a floating
2726 point operation cannot directly precede a @BF@.  We assume the worst
2727 and fill that slot with a @NOP@.
2728
2729 SPARC: Do not fill the delay slots here; you will confuse the register
2730 allocator.
2731 -}
2732
2733
2734 genCondJump
2735     :: BlockId      -- the branch target
2736     -> CmmExpr      -- the condition on which to branch
2737     -> NatM InstrBlock
2738
2739 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2740
2741 #if alpha_TARGET_ARCH
2742
2743 genCondJump id (StPrim op [x, StInt 0])
2744   = getRegister x                           `thenNat` \ register ->
2745     getNewRegNat (registerRep register)
2746                                     `thenNat` \ tmp ->
2747     let
2748         code   = registerCode register tmp
2749         value  = registerName register tmp
2750         pk     = registerRep register
2751         target = ImmCLbl lbl
2752     in
2753     returnSeq code [BI (cmpOp op) value target]
2754   where
2755     cmpOp CharGtOp = GTT
2756     cmpOp CharGeOp = GE
2757     cmpOp CharEqOp = EQQ
2758     cmpOp CharNeOp = NE
2759     cmpOp CharLtOp = LTT
2760     cmpOp CharLeOp = LE
2761     cmpOp IntGtOp = GTT
2762     cmpOp IntGeOp = GE
2763     cmpOp IntEqOp = EQQ
2764     cmpOp IntNeOp = NE
2765     cmpOp IntLtOp = LTT
2766     cmpOp IntLeOp = LE
2767     cmpOp WordGtOp = NE
2768     cmpOp WordGeOp = ALWAYS
2769     cmpOp WordEqOp = EQQ
2770     cmpOp WordNeOp = NE
2771     cmpOp WordLtOp = NEVER
2772     cmpOp WordLeOp = EQQ
2773     cmpOp AddrGtOp = NE
2774     cmpOp AddrGeOp = ALWAYS
2775     cmpOp AddrEqOp = EQQ
2776     cmpOp AddrNeOp = NE
2777     cmpOp AddrLtOp = NEVER
2778     cmpOp AddrLeOp = EQQ
2779
2780 genCondJump lbl (StPrim op [x, StDouble 0.0])
2781   = getRegister x                           `thenNat` \ register ->
2782     getNewRegNat (registerRep register)
2783                                     `thenNat` \ tmp ->
2784     let
2785         code   = registerCode register tmp
2786         value  = registerName register tmp
2787         pk     = registerRep register
2788         target = ImmCLbl lbl
2789     in
2790     return (code . mkSeqInstr (BF (cmpOp op) value target))
2791   where
2792     cmpOp FloatGtOp = GTT
2793     cmpOp FloatGeOp = GE
2794     cmpOp FloatEqOp = EQQ
2795     cmpOp FloatNeOp = NE
2796     cmpOp FloatLtOp = LTT
2797     cmpOp FloatLeOp = LE
2798     cmpOp DoubleGtOp = GTT
2799     cmpOp DoubleGeOp = GE
2800     cmpOp DoubleEqOp = EQQ
2801     cmpOp DoubleNeOp = NE
2802     cmpOp DoubleLtOp = LTT
2803     cmpOp DoubleLeOp = LE
2804
2805 genCondJump lbl (StPrim op [x, y])
2806   | fltCmpOp op
2807   = trivialFCode pr instr x y       `thenNat` \ register ->
2808     getNewRegNat F64                `thenNat` \ tmp ->
2809     let
2810         code   = registerCode register tmp
2811         result = registerName register tmp
2812         target = ImmCLbl lbl
2813     in
2814     return (code . mkSeqInstr (BF cond result target))
2815   where
2816     pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
2817
2818     fltCmpOp op = case op of
2819         FloatGtOp -> True
2820         FloatGeOp -> True
2821         FloatEqOp -> True
2822         FloatNeOp -> True
2823         FloatLtOp -> True
2824         FloatLeOp -> True
2825         DoubleGtOp -> True
2826         DoubleGeOp -> True
2827         DoubleEqOp -> True
2828         DoubleNeOp -> True
2829         DoubleLtOp -> True
2830         DoubleLeOp -> True
2831         _ -> False
2832     (instr, cond) = case op of
2833         FloatGtOp -> (FCMP TF LE, EQQ)
2834         FloatGeOp -> (FCMP TF LTT, EQQ)
2835         FloatEqOp -> (FCMP TF EQQ, NE)
2836         FloatNeOp -> (FCMP TF EQQ, EQQ)
2837         FloatLtOp -> (FCMP TF LTT, NE)
2838         FloatLeOp -> (FCMP TF LE, NE)
2839         DoubleGtOp -> (FCMP TF LE, EQQ)
2840         DoubleGeOp -> (FCMP TF LTT, EQQ)
2841         DoubleEqOp -> (FCMP TF EQQ, NE)
2842         DoubleNeOp -> (FCMP TF EQQ, EQQ)
2843         DoubleLtOp -> (FCMP TF LTT, NE)
2844         DoubleLeOp -> (FCMP TF LE, NE)
2845
2846 genCondJump lbl (StPrim op [x, y])
2847   = trivialCode instr x y           `thenNat` \ register ->
2848     getNewRegNat IntRep             `thenNat` \ tmp ->
2849     let
2850         code   = registerCode register tmp
2851         result = registerName register tmp
2852         target = ImmCLbl lbl
2853     in
2854     return (code . mkSeqInstr (BI cond result target))
2855   where
2856     (instr, cond) = case op of
2857         CharGtOp -> (CMP LE, EQQ)
2858         CharGeOp -> (CMP LTT, EQQ)
2859         CharEqOp -> (CMP EQQ, NE)
2860         CharNeOp -> (CMP EQQ, EQQ)
2861         CharLtOp -> (CMP LTT, NE)
2862         CharLeOp -> (CMP LE, NE)
2863         IntGtOp -> (CMP LE, EQQ)
2864         IntGeOp -> (CMP LTT, EQQ)
2865         IntEqOp -> (CMP EQQ, NE)
2866         IntNeOp -> (CMP EQQ, EQQ)
2867         IntLtOp -> (CMP LTT, NE)
2868         IntLeOp -> (CMP LE, NE)
2869         WordGtOp -> (CMP ULE, EQQ)
2870         WordGeOp -> (CMP ULT, EQQ)
2871         WordEqOp -> (CMP EQQ, NE)
2872         WordNeOp -> (CMP EQQ, EQQ)
2873         WordLtOp -> (CMP ULT, NE)
2874         WordLeOp -> (CMP ULE, NE)
2875         AddrGtOp -> (CMP ULE, EQQ)
2876         AddrGeOp -> (CMP ULT, EQQ)
2877         AddrEqOp -> (CMP EQQ, NE)
2878         AddrNeOp -> (CMP EQQ, EQQ)
2879         AddrLtOp -> (CMP ULT, NE)
2880         AddrLeOp -> (CMP ULE, NE)
2881
2882 #endif /* alpha_TARGET_ARCH */
2883
2884 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2885
2886 #if i386_TARGET_ARCH
2887
2888 genCondJump id bool = do
2889   CondCode _ cond code <- getCondCode bool
2890   return (code `snocOL` JXX cond id)
2891
2892 #endif
2893
2894 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2895
2896 #if x86_64_TARGET_ARCH
2897
2898 genCondJump id bool = do
2899   CondCode is_float cond cond_code <- getCondCode bool
2900   if not is_float
2901     then
2902         return (cond_code `snocOL` JXX cond id)
2903     else do
2904         lbl <- getBlockIdNat
2905
2906         -- see comment with condFltReg
2907         let code = case cond of
2908                         NE  -> or_unordered
2909                         GU  -> plain_test
2910                         GEU -> plain_test
2911                         _   -> and_ordered
2912
2913             plain_test = unitOL (
2914                   JXX cond id
2915                 )
2916             or_unordered = toOL [
2917                   JXX cond id,
2918                   JXX PARITY id
2919                 ]
2920             and_ordered = toOL [
2921                   JXX PARITY lbl,
2922                   JXX cond id,
2923                   JXX ALWAYS lbl,
2924                   NEWBLOCK lbl
2925                 ]
2926         return (cond_code `appOL` code)
2927
2928 #endif
2929
2930 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2931
2932 #if sparc_TARGET_ARCH
2933
2934 genCondJump (BlockId id) bool = do
2935   CondCode is_float cond code <- getCondCode bool
2936   return (
2937        code `appOL`
2938        toOL (
2939          if   is_float
2940          then [NOP, BF cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2941          else [BI cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2942        )
2943     )
2944
2945 #endif /* sparc_TARGET_ARCH */
2946
2947
2948 #if powerpc_TARGET_ARCH
2949
2950 genCondJump id bool = do
2951   CondCode is_float cond code <- getCondCode bool
2952   return (code `snocOL` BCC cond id)
2953
2954 #endif /* powerpc_TARGET_ARCH */
2955
2956
2957 -- -----------------------------------------------------------------------------
2958 --  Generating C calls
2959
2960 -- Now the biggest nightmare---calls.  Most of the nastiness is buried in
2961 -- @get_arg@, which moves the arguments to the correct registers/stack
2962 -- locations.  Apart from that, the code is easy.
2963 --
2964 -- (If applicable) Do not fill the delay slots here; you will confuse the
2965 -- register allocator.
2966
2967 genCCall
2968     :: CmmCallTarget            -- function to call
2969     -> CmmFormals               -- where to put the result
2970     -> CmmActuals               -- arguments (of mixed type)
2971     -> NatM InstrBlock
2972
2973 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2974
2975 #if alpha_TARGET_ARCH
2976
2977 ccallResultRegs =
2978
2979 genCCall fn cconv result_regs args
2980   = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args
2981                           `thenNat` \ ((unused,_), argCode) ->
2982     let
2983         nRegs = length allArgRegs - length unused
2984         code = asmSeqThen (map ($ []) argCode)
2985     in
2986         returnSeq code [
2987             LDA pv (AddrImm (ImmLab (ptext fn))),
2988             JSR ra (AddrReg pv) nRegs,
2989             LDGP gp (AddrReg ra)]
2990   where
2991     ------------------------
2992     {-  Try to get a value into a specific register (or registers) for
2993         a call.  The first 6 arguments go into the appropriate
2994         argument register (separate registers for integer and floating
2995         point arguments, but used in lock-step), and the remaining
2996         arguments are dumped to the stack, beginning at 0(sp).  Our
2997         first argument is a pair of the list of remaining argument
2998         registers to be assigned for this call and the next stack
2999         offset to use for overflowing arguments.  This way,
3000         @get_Arg@ can be applied to all of a call's arguments using
3001         @mapAccumLNat@.
3002     -}
3003     get_arg
3004         :: ([(Reg,Reg)], Int)   -- Argument registers and stack offset (accumulator)
3005         -> StixTree             -- Current argument
3006         -> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code
3007
3008     -- We have to use up all of our argument registers first...
3009
3010     get_arg ((iDst,fDst):dsts, offset) arg
3011       = getRegister arg                     `thenNat` \ register ->
3012         let
3013             reg  = if isFloatingRep pk then fDst else iDst
3014             code = registerCode register reg
3015             src  = registerName register reg
3016             pk   = registerRep register
3017         in
3018         return (
3019             if isFloatingRep pk then
3020                 ((dsts, offset), if isFixed register then
3021                     code . mkSeqInstr (FMOV src fDst)
3022                     else code)
3023             else
3024                 ((dsts, offset), if isFixed register then
3025                     code . mkSeqInstr (OR src (RIReg src) iDst)
3026                     else code))
3027
3028     -- Once we have run out of argument registers, we move to the
3029     -- stack...
3030
3031     get_arg ([], offset) arg
3032       = getRegister arg                 `thenNat` \ register ->
3033         getNewRegNat (registerRep register)
3034                                         `thenNat` \ tmp ->
3035         let
3036             code = registerCode register tmp
3037             src  = registerName register tmp
3038             pk   = registerRep register
3039             sz   = primRepToSize pk
3040         in
3041         return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset)))
3042
3043 #endif /* alpha_TARGET_ARCH */
3044
3045 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3046
3047 #if i386_TARGET_ARCH
3048
3049 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
3050         -- write barrier compiles to no code on x86/x86-64;
3051         -- we keep it this long in order to prevent earlier optimisations.
3052
3053 -- we only cope with a single result for foreign calls
3054 genCCall (CmmPrim op) [CmmKinded r _] args = do
3055   l1 <- getNewLabelNat
3056   l2 <- getNewLabelNat
3057   case op of
3058         MO_F32_Sqrt -> actuallyInlineFloatOp F32  (GSQRT F32) args
3059         MO_F64_Sqrt -> actuallyInlineFloatOp F64 (GSQRT F64) args
3060
3061         MO_F32_Sin  -> actuallyInlineFloatOp F32  (GSIN F32 l1 l2) args
3062         MO_F64_Sin  -> actuallyInlineFloatOp F64 (GSIN F64 l1 l2) args
3063
3064         MO_F32_Cos  -> actuallyInlineFloatOp F32  (GCOS F32 l1 l2) args
3065         MO_F64_Cos  -> actuallyInlineFloatOp F64 (GCOS F64 l1 l2) args
3066
3067         MO_F32_Tan  -> actuallyInlineFloatOp F32  (GTAN F32 l1 l2) args
3068         MO_F64_Tan  -> actuallyInlineFloatOp F64 (GTAN F64 l1 l2) args
3069
3070         other_op    -> outOfLineFloatOp op r args
3071  where
3072   actuallyInlineFloatOp rep instr [CmmKinded x _]
3073         = do res <- trivialUFCode rep instr x
3074              any <- anyReg res
3075              return (any (getRegisterReg (CmmLocal r)))
3076
3077 genCCall target dest_regs args = do
3078     let
3079         sizes               = map (arg_size . cmmExprRep . kindlessCmm) (reverse args)
3080 #if !darwin_TARGET_OS
3081         tot_arg_size        = sum sizes
3082 #else
3083         raw_arg_size        = sum sizes
3084         tot_arg_size        = roundTo 16 raw_arg_size
3085         arg_pad_size        = tot_arg_size - raw_arg_size
3086     delta0 <- getDeltaNat
3087     setDeltaNat (delta0 - arg_pad_size)
3088 #endif
3089
3090     push_codes <- mapM push_arg (reverse args)
3091     delta <- getDeltaNat
3092
3093     -- in
3094     -- deal with static vs dynamic call targets
3095     (callinsns,cconv) <-
3096       case target of
3097         -- CmmPrim -> ...
3098         CmmCallee (CmmLit (CmmLabel lbl)) conv
3099            -> -- ToDo: stdcall arg sizes
3100               return (unitOL (CALL (Left fn_imm) []), conv)
3101            where fn_imm = ImmCLbl lbl
3102         CmmCallee expr conv
3103            -> do (dyn_c, dyn_r, dyn_rep) <- get_op expr
3104                  ASSERT(dyn_rep == I32)
3105                   return (dyn_c `snocOL` CALL (Right dyn_r) [], conv)
3106
3107     let push_code
3108 #if darwin_TARGET_OS
3109             | arg_pad_size /= 0
3110             = toOL [SUB I32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
3111                     DELTA (delta0 - arg_pad_size)]
3112               `appOL` concatOL push_codes
3113             | otherwise
3114 #endif
3115             = concatOL push_codes
3116         call = callinsns `appOL`
3117                toOL (
3118                         -- Deallocate parameters after call for ccall;
3119                         -- but not for stdcall (callee does it)
3120                   (if cconv == StdCallConv || tot_arg_size==0 then [] else
3121                    [ADD I32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
3122                   ++
3123                   [DELTA (delta + tot_arg_size)]
3124                )
3125     -- in
3126     setDeltaNat (delta + tot_arg_size)
3127
3128     let
3129         -- assign the results, if necessary
3130         assign_code []     = nilOL
3131         assign_code [CmmKinded dest _hint] =
3132           case rep of
3133                 I64 -> toOL [MOV I32 (OpReg eax) (OpReg r_dest),
3134                              MOV I32 (OpReg edx) (OpReg r_dest_hi)]
3135                 F32 -> unitOL (GMOV fake0 r_dest)
3136                 F64 -> unitOL (GMOV fake0 r_dest)
3137                 rep -> unitOL (MOV rep (OpReg eax) (OpReg r_dest))
3138           where
3139                 r_dest_hi = getHiVRegFromLo r_dest
3140                 rep = localRegRep dest
3141                 r_dest = getRegisterReg (CmmLocal dest)
3142         assign_code many = panic "genCCall.assign_code many"
3143
3144     return (push_code `appOL`
3145             call `appOL`
3146             assign_code dest_regs)
3147
3148   where
3149     arg_size F64 = 8
3150     arg_size F32 = 4
3151     arg_size I64 = 8
3152     arg_size _   = 4
3153
3154     roundTo a x | x `mod` a == 0 = x
3155                 | otherwise = x + a - (x `mod` a)
3156
3157
3158     push_arg :: (CmmKinded CmmExpr){-current argument-}
3159                     -> NatM InstrBlock  -- code
3160
3161     push_arg (CmmKinded arg _hint) -- we don't need the hints on x86
3162       | arg_rep == I64 = do
3163         ChildCode64 code r_lo <- iselExpr64 arg
3164         delta <- getDeltaNat
3165         setDeltaNat (delta - 8)
3166         let
3167             r_hi = getHiVRegFromLo r_lo
3168         -- in
3169         return (       code `appOL`
3170                        toOL [PUSH I32 (OpReg r_hi), DELTA (delta - 4),
3171                              PUSH I32 (OpReg r_lo), DELTA (delta - 8),
3172                              DELTA (delta-8)]
3173             )
3174
3175       | otherwise = do
3176         (code, reg, sz) <- get_op arg
3177         delta <- getDeltaNat
3178         let size = arg_size sz
3179         setDeltaNat (delta-size)
3180         if (case sz of F64 -> True; F32 -> True; _ -> False)
3181            then return (code `appOL`
3182                         toOL [SUB I32 (OpImm (ImmInt size)) (OpReg esp),
3183                               DELTA (delta-size),
3184                               GST sz reg (AddrBaseIndex (EABaseReg esp)
3185                                                         EAIndexNone
3186                                                         (ImmInt 0))]
3187                        )
3188            else return (code `snocOL`
3189                         PUSH I32 (OpReg reg) `snocOL`
3190                         DELTA (delta-size)
3191                        )
3192       where
3193          arg_rep = cmmExprRep arg
3194
3195     ------------
3196     get_op :: CmmExpr -> NatM (InstrBlock, Reg, MachRep) -- code, reg, size
3197     get_op op = do
3198         (reg,code) <- getSomeReg op
3199         return (code, reg, cmmExprRep op)
3200
3201 #endif /* i386_TARGET_ARCH */
3202
3203 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3204
3205 outOfLineFloatOp :: CallishMachOp -> CmmFormalWithoutKind -> CmmActuals
3206   -> NatM InstrBlock
3207 outOfLineFloatOp mop res args
3208   = do
3209       dflags <- getDynFlagsNat
3210       targetExpr <- cmmMakeDynamicReference dflags addImportNat CallReference lbl
3211       let target = CmmCallee targetExpr CCallConv
3212
3213       if localRegRep res == F64
3214         then
3215           stmtToInstrs (CmmCall target [CmmKinded res FloatHint] args CmmUnsafe CmmMayReturn)
3216         else do
3217           uq <- getUniqueNat
3218           let
3219             tmp = LocalReg uq F64 GCKindNonPtr
3220           -- in
3221           code1 <- stmtToInstrs (CmmCall target [CmmKinded tmp FloatHint] args CmmUnsafe CmmMayReturn)
3222           code2 <- stmtToInstrs (CmmAssign (CmmLocal res) (CmmReg (CmmLocal tmp)))
3223           return (code1 `appOL` code2)
3224   where
3225         lbl = mkForeignLabel fn Nothing False
3226
3227         fn = case mop of
3228               MO_F32_Sqrt  -> fsLit "sqrtf"
3229               MO_F32_Sin   -> fsLit "sinf"
3230               MO_F32_Cos   -> fsLit "cosf"
3231               MO_F32_Tan   -> fsLit "tanf"
3232               MO_F32_Exp   -> fsLit "expf"
3233               MO_F32_Log   -> fsLit "logf"
3234
3235               MO_F32_Asin  -> fsLit "asinf"
3236               MO_F32_Acos  -> fsLit "acosf"
3237               MO_F32_Atan  -> fsLit "atanf"
3238
3239               MO_F32_Sinh  -> fsLit "sinhf"
3240               MO_F32_Cosh  -> fsLit "coshf"
3241               MO_F32_Tanh  -> fsLit "tanhf"
3242               MO_F32_Pwr   -> fsLit "powf"
3243
3244               MO_F64_Sqrt  -> fsLit "sqrt"
3245               MO_F64_Sin   -> fsLit "sin"
3246               MO_F64_Cos   -> fsLit "cos"
3247               MO_F64_Tan   -> fsLit "tan"
3248               MO_F64_Exp   -> fsLit "exp"
3249               MO_F64_Log   -> fsLit "log"
3250
3251               MO_F64_Asin  -> fsLit "asin"
3252               MO_F64_Acos  -> fsLit "acos"
3253               MO_F64_Atan  -> fsLit "atan"
3254
3255               MO_F64_Sinh  -> fsLit "sinh"
3256               MO_F64_Cosh  -> fsLit "cosh"
3257               MO_F64_Tanh  -> fsLit "tanh"
3258               MO_F64_Pwr   -> fsLit "pow"
3259
3260 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
3261
3262 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3263
3264 #if x86_64_TARGET_ARCH
3265
3266 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
3267         -- write barrier compiles to no code on x86/x86-64;
3268         -- we keep it this long in order to prevent earlier optimisations.
3269
3270
3271 genCCall (CmmPrim op) [CmmKinded r _] args =
3272   outOfLineFloatOp op r args
3273
3274 genCCall target dest_regs args = do
3275
3276         -- load up the register arguments
3277     (stack_args, aregs, fregs, load_args_code)
3278          <- load_args args allArgRegs allFPArgRegs nilOL
3279
3280     let
3281         fp_regs_used  = reverse (drop (length fregs) (reverse allFPArgRegs))
3282         int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
3283         arg_regs = [eax] ++ int_regs_used ++ fp_regs_used
3284                 -- for annotating the call instruction with
3285
3286         sse_regs = length fp_regs_used
3287
3288         tot_arg_size = arg_size * length stack_args
3289
3290         -- On entry to the called function, %rsp should be aligned
3291         -- on a 16-byte boundary +8 (i.e. the first stack arg after
3292         -- the return address is 16-byte aligned).  In STG land
3293         -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
3294         -- need to make sure we push a multiple of 16-bytes of args,
3295         -- plus the return address, to get the correct alignment.
3296         -- Urg, this is hard.  We need to feed the delta back into
3297         -- the arg pushing code.
3298     (real_size, adjust_rsp) <-
3299         if tot_arg_size `rem` 16 == 0
3300             then return (tot_arg_size, nilOL)
3301             else do -- we need to adjust...
3302                 delta <- getDeltaNat
3303                 setDeltaNat (delta-8)
3304                 return (tot_arg_size+8, toOL [
3305                                 SUB I64 (OpImm (ImmInt 8)) (OpReg rsp),
3306                                 DELTA (delta-8)
3307                         ])
3308
3309         -- push the stack args, right to left
3310     push_code <- push_args (reverse stack_args) nilOL
3311     delta <- getDeltaNat
3312
3313     -- deal with static vs dynamic call targets
3314     (callinsns,cconv) <-
3315       case target of
3316         -- CmmPrim -> ...
3317         CmmCallee (CmmLit (CmmLabel lbl)) conv
3318            -> -- ToDo: stdcall arg sizes
3319               return (unitOL (CALL (Left fn_imm) arg_regs), conv)
3320            where fn_imm = ImmCLbl lbl
3321         CmmCallee expr conv
3322            -> do (dyn_r, dyn_c) <- getSomeReg expr
3323                  return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
3324
3325     let
3326         -- The x86_64 ABI requires us to set %al to the number of SSE
3327         -- registers that contain arguments, if the called routine
3328         -- is a varargs function.  We don't know whether it's a
3329         -- varargs function or not, so we have to assume it is.
3330         --
3331         -- It's not safe to omit this assignment, even if the number
3332         -- of SSE regs in use is zero.  If %al is larger than 8
3333         -- on entry to a varargs function, seg faults ensue.
3334         assign_eax n = unitOL (MOV I32 (OpImm (ImmInt n)) (OpReg eax))
3335
3336     let call = callinsns `appOL`
3337                toOL (
3338                         -- Deallocate parameters after call for ccall;
3339                         -- but not for stdcall (callee does it)
3340                   (if cconv == StdCallConv || real_size==0 then [] else
3341                    [ADD wordRep (OpImm (ImmInt real_size)) (OpReg esp)])
3342                   ++
3343                   [DELTA (delta + real_size)]
3344                )
3345     -- in
3346     setDeltaNat (delta + real_size)
3347
3348     let
3349         -- assign the results, if necessary
3350         assign_code []     = nilOL
3351         assign_code [CmmKinded dest _hint] =
3352           case rep of
3353                 F32 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3354                 F64 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3355                 rep -> unitOL (MOV rep (OpReg rax) (OpReg r_dest))
3356           where
3357                 rep = localRegRep dest
3358                 r_dest = getRegisterReg (CmmLocal dest)
3359         assign_code many = panic "genCCall.assign_code many"
3360
3361     return (load_args_code      `appOL`
3362             adjust_rsp          `appOL`
3363             push_code           `appOL`
3364             assign_eax sse_regs `appOL`
3365             call                `appOL`
3366             assign_code dest_regs)
3367
3368   where
3369     arg_size = 8 -- always, at the mo
3370
3371     load_args :: [CmmKinded CmmExpr]
3372               -> [Reg]                  -- int regs avail for args
3373               -> [Reg]                  -- FP regs avail for args
3374               -> InstrBlock
3375               -> NatM ([CmmKinded CmmExpr],[Reg],[Reg],InstrBlock)
3376     load_args args [] [] code     =  return (args, [], [], code)
3377         -- no more regs to use
3378     load_args [] aregs fregs code =  return ([], aregs, fregs, code)
3379         -- no more args to push
3380     load_args ((CmmKinded arg hint) : rest) aregs fregs code
3381         | isFloatingRep arg_rep =
3382         case fregs of
3383           [] -> push_this_arg
3384           (r:rs) -> do
3385              arg_code <- getAnyReg arg
3386              load_args rest aregs rs (code `appOL` arg_code r)
3387         | otherwise =
3388         case aregs of
3389           [] -> push_this_arg
3390           (r:rs) -> do
3391              arg_code <- getAnyReg arg
3392              load_args rest rs fregs (code `appOL` arg_code r)
3393         where
3394           arg_rep = cmmExprRep arg
3395
3396           push_this_arg = do
3397             (args',ars,frs,code') <- load_args rest aregs fregs code
3398             return ((CmmKinded arg hint):args', ars, frs, code')
3399
3400     push_args [] code = return code
3401     push_args ((CmmKinded arg hint):rest) code
3402        | isFloatingRep arg_rep = do
3403          (arg_reg, arg_code) <- getSomeReg arg
3404          delta <- getDeltaNat
3405          setDeltaNat (delta-arg_size)
3406          let code' = code `appOL` arg_code `appOL` toOL [
3407                         SUB wordRep (OpImm (ImmInt arg_size)) (OpReg rsp) ,
3408                         DELTA (delta-arg_size),
3409                         MOV arg_rep (OpReg arg_reg) (OpAddr  (spRel 0))]
3410          push_args rest code'
3411
3412        | otherwise = do
3413        -- we only ever generate word-sized function arguments.  Promotion
3414        -- has already happened: our Int8# type is kept sign-extended
3415        -- in an Int#, for example.
3416          ASSERT(arg_rep == I64) return ()
3417          (arg_op, arg_code) <- getOperand arg
3418          delta <- getDeltaNat
3419          setDeltaNat (delta-arg_size)
3420          let code' = code `appOL` toOL [PUSH I64 arg_op,
3421                                         DELTA (delta-arg_size)]
3422          push_args rest code'
3423         where
3424           arg_rep = cmmExprRep arg
3425 #endif
3426
3427 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3428
3429 #if sparc_TARGET_ARCH
3430 {-
3431    The SPARC calling convention is an absolute
3432    nightmare.  The first 6x32 bits of arguments are mapped into
3433    %o0 through %o5, and the remaining arguments are dumped to the
3434    stack, beginning at [%sp+92].  (Note that %o6 == %sp.)
3435
3436    If we have to put args on the stack, move %o6==%sp down by
3437    the number of words to go on the stack, to ensure there's enough space.
3438
3439    According to Fraser and Hanson's lcc book, page 478, fig 17.2,
3440    16 words above the stack pointer is a word for the address of
3441    a structure return value.  I use this as a temporary location
3442    for moving values from float to int regs.  Certainly it isn't
3443    safe to put anything in the 16 words starting at %sp, since
3444    this area can get trashed at any time due to window overflows
3445    caused by signal handlers.
3446
3447    A final complication (if the above isn't enough) is that
3448    we can't blithely calculate the arguments one by one into
3449    %o0 .. %o5.  Consider the following nested calls:
3450
3451        fff a (fff b c)
3452
3453    Naive code moves a into %o0, and (fff b c) into %o1.  Unfortunately
3454    the inner call will itself use %o0, which trashes the value put there
3455    in preparation for the outer call.  Upshot: we need to calculate the
3456    args into temporary regs, and move those to arg regs or onto the
3457    stack only immediately prior to the call proper.  Sigh.
3458 -}
3459
3460 genCCall target dest_regs argsAndHints = do
3461     let
3462         args = map kindlessCmm argsAndHints
3463     argcode_and_vregs <- mapM arg_to_int_vregs args
3464     let
3465         (argcodes, vregss) = unzip argcode_and_vregs
3466         n_argRegs          = length allArgRegs
3467         n_argRegs_used     = min (length vregs) n_argRegs
3468         vregs              = concat vregss
3469     -- deal with static vs dynamic call targets
3470     callinsns <- (case target of
3471         CmmCallee (CmmLit (CmmLabel lbl)) conv -> do
3472                 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3473         CmmCallee expr conv -> do
3474                 (dyn_c, [dyn_r]) <- arg_to_int_vregs expr
3475                 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3476         CmmPrim mop -> do
3477                   (res, reduce) <- outOfLineFloatOp mop
3478                   lblOrMopExpr <- case res of
3479                        Left lbl -> do
3480                             return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3481                        Right mopExpr -> do
3482                             (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
3483                             return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3484                   if reduce then panic "genCCall(sparc): can not reduce" else return lblOrMopExpr
3485
3486       )
3487     let
3488         argcode = concatOL argcodes
3489         (move_sp_down, move_sp_up)
3490            = let diff = length vregs - n_argRegs
3491                  nn   = if odd diff then diff + 1 else diff -- keep 8-byte alignment
3492              in  if   nn <= 0
3493                  then (nilOL, nilOL)
3494                  else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
3495         transfer_code
3496            = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
3497     return (argcode       `appOL`
3498             move_sp_down  `appOL`
3499             transfer_code `appOL`
3500             callinsns     `appOL`
3501             unitOL NOP    `appOL`
3502             move_sp_up)
3503   where
3504      -- move args from the integer vregs into which they have been
3505      -- marshalled, into %o0 .. %o5, and the rest onto the stack.
3506      move_final :: [Reg] -> [Reg] -> Int -> [Instr]
3507
3508      move_final [] _ offset          -- all args done
3509         = []
3510
3511      move_final (v:vs) [] offset     -- out of aregs; move to stack
3512         = ST I32 v (spRel offset)
3513           : move_final vs [] (offset+1)
3514
3515      move_final (v:vs) (a:az) offset -- move into an arg (%o[0..5]) reg
3516         = OR False g0 (RIReg v) a
3517           : move_final vs az offset
3518
3519      -- generate code to calculate an argument, and move it into one
3520      -- or two integer vregs.
3521      arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
3522      arg_to_int_vregs arg
3523         | (cmmExprRep arg) == I64
3524         = do
3525           (ChildCode64 code r_lo) <- iselExpr64 arg
3526           let
3527               r_hi = getHiVRegFromLo r_lo
3528           return (code, [r_hi, r_lo])
3529         | otherwise
3530         = do
3531           (src, code) <- getSomeReg arg
3532           tmp <- getNewRegNat (cmmExprRep arg)
3533           let
3534               pk   = cmmExprRep arg
3535           case pk of
3536              F64 -> do
3537                       v1 <- getNewRegNat I32
3538                       v2 <- getNewRegNat I32
3539                       return (
3540                         code                          `snocOL`
3541                         FMOV F64 src f0                `snocOL`
3542                         ST   F32  f0 (spRel 16)         `snocOL`
3543                         LD   I32  (spRel 16) v1         `snocOL`
3544                         ST   F32  (fPair f0) (spRel 16) `snocOL`
3545                         LD   I32  (spRel 16) v2
3546                         ,
3547                         [v1,v2]
3548                        )
3549              F32 -> do
3550                       v1 <- getNewRegNat I32
3551                       return (
3552                         code                    `snocOL`
3553                         ST   F32  src (spRel 16)  `snocOL`
3554                         LD   I32  (spRel 16) v1
3555                         ,
3556                         [v1]
3557                        )
3558              other -> do
3559                         v1 <- getNewRegNat I32
3560                         return (
3561                           code `snocOL` OR False g0 (RIReg src) v1
3562                           ,
3563                           [v1]
3564                          )
3565 outOfLineFloatOp mop =
3566     do
3567       dflags <- getDynFlagsNat
3568       mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference $
3569                   mkForeignLabel functionName Nothing True
3570       let mopLabelOrExpr = case mopExpr of
3571                         CmmLit (CmmLabel lbl) -> Left lbl
3572                         _ -> Right mopExpr
3573       return (mopLabelOrExpr, reduce)
3574             where
3575                 (reduce, functionName) = case mop of
3576                   MO_F32_Exp    -> (True,  fsLit "exp")
3577                   MO_F32_Log    -> (True,  fsLit "log")
3578                   MO_F32_Sqrt   -> (True,  fsLit "sqrt")
3579
3580                   MO_F32_Sin    -> (True,  fsLit "sin")
3581                   MO_F32_Cos    -> (True,  fsLit "cos")
3582                   MO_F32_Tan    -> (True,  fsLit "tan")
3583
3584                   MO_F32_Asin   -> (True,  fsLit "asin")
3585                   MO_F32_Acos   -> (True,  fsLit "acos")
3586                   MO_F32_Atan   -> (True,  fsLit "atan")
3587
3588                   MO_F32_Sinh   -> (True,  fsLit "sinh")
3589                   MO_F32_Cosh   -> (True,  fsLit "cosh")
3590                   MO_F32_Tanh   -> (True,  fsLit "tanh")
3591
3592                   MO_F64_Exp    -> (False, fsLit "exp")
3593                   MO_F64_Log    -> (False, fsLit "log")
3594                   MO_F64_Sqrt   -> (False, fsLit "sqrt")
3595
3596                   MO_F64_Sin    -> (False, fsLit "sin")
3597                   MO_F64_Cos    -> (False, fsLit "cos")
3598                   MO_F64_Tan    -> (False, fsLit "tan")
3599
3600                   MO_F64_Asin   -> (False, fsLit "asin")
3601                   MO_F64_Acos   -> (False, fsLit "acos")
3602                   MO_F64_Atan   -> (False, fsLit "atan")
3603
3604                   MO_F64_Sinh   -> (False, fsLit "sinh")
3605                   MO_F64_Cosh   -> (False, fsLit "cosh")
3606                   MO_F64_Tanh   -> (False, fsLit "tanh")
3607
3608                   other -> pprPanic "outOfLineFloatOp(sparc) "
3609                                 (pprCallishMachOp mop)
3610
3611 #endif /* sparc_TARGET_ARCH */
3612
3613 #if powerpc_TARGET_ARCH
3614
3615 #if darwin_TARGET_OS || linux_TARGET_OS
3616 {-
3617     The PowerPC calling convention for Darwin/Mac OS X
3618     is described in Apple's document
3619     "Inside Mac OS X - Mach-O Runtime Architecture".
3620
3621     PowerPC Linux uses the System V Release 4 Calling Convention
3622     for PowerPC. It is described in the
3623     "System V Application Binary Interface PowerPC Processor Supplement".
3624
3625     Both conventions are similar:
3626     Parameters may be passed in general-purpose registers starting at r3, in
3627     floating point registers starting at f1, or on the stack.
3628
3629     But there are substantial differences:
3630     * The number of registers used for parameter passing and the exact set of
3631       nonvolatile registers differs (see MachRegs.lhs).
3632     * On Darwin, stack space is always reserved for parameters, even if they are
3633       passed in registers. The called routine may choose to save parameters from
3634       registers to the corresponding space on the stack.
3635     * On Darwin, a corresponding amount of GPRs is skipped when a floating point
3636       parameter is passed in an FPR.
3637     * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
3638       starting with an odd-numbered GPR. It may skip a GPR to achieve this.
3639       Darwin just treats an I64 like two separate I32s (high word first).
3640     * I64 and F64 arguments are 8-byte aligned on the stack for SysV, but only
3641       4-byte aligned like everything else on Darwin.
3642     * The SysV spec claims that F32 is represented as F64 on the stack. GCC on
3643       PowerPC Linux does not agree, so neither do we.
3644
3645     According to both conventions, The parameter area should be part of the
3646     caller's stack frame, allocated in the caller's prologue code (large enough
3647     to hold the parameter lists for all called routines). The NCG already
3648     uses the stack for register spilling, leaving 64 bytes free at the top.
3649     If we need a larger parameter area than that, we just allocate a new stack
3650     frame just before ccalling.
3651 -}
3652
3653
3654 genCCall (CmmPrim MO_WriteBarrier) _ _
3655  = return $ unitOL LWSYNC
3656
3657 genCCall target dest_regs argsAndHints
3658   = ASSERT (not $ any (`elem` [I8,I16]) argReps)
3659         -- we rely on argument promotion in the codeGen
3660     do
3661         (finalStack,passArgumentsCode,usedRegs) <- passArguments
3662                                                         (zip args argReps)
3663                                                         allArgRegs allFPArgRegs
3664                                                         initialStackOffset
3665                                                         (toOL []) []
3666
3667         (labelOrExpr, reduceToF32) <- case target of
3668             CmmCallee (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
3669             CmmCallee expr conv -> return  (Right expr, False)
3670             CmmPrim mop -> outOfLineFloatOp mop
3671
3672         let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
3673             codeAfter = move_sp_up finalStack `appOL` moveResult reduceToF32
3674
3675         case labelOrExpr of
3676             Left lbl -> do
3677                 return (         codeBefore
3678                         `snocOL` BL lbl usedRegs
3679                         `appOL`  codeAfter)
3680             Right dyn -> do
3681                 (dynReg, dynCode) <- getSomeReg dyn
3682                 return (         dynCode
3683                         `snocOL` MTCTR dynReg
3684                         `appOL`  codeBefore
3685                         `snocOL` BCTRL usedRegs
3686                         `appOL`  codeAfter)
3687     where
3688 #if darwin_TARGET_OS
3689         initialStackOffset = 24
3690             -- size of linkage area + size of arguments, in bytes
3691         stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
3692                                        map machRepByteWidth argReps
3693 #elif linux_TARGET_OS
3694         initialStackOffset = 8
3695         stackDelta finalStack = roundTo 16 finalStack
3696 #endif
3697         args = map kindlessCmm argsAndHints
3698         argReps = map cmmExprRep args
3699
3700         roundTo a x | x `mod` a == 0 = x
3701                     | otherwise = x + a - (x `mod` a)
3702
3703         move_sp_down finalStack
3704                | delta > 64 =
3705                         toOL [STU I32 sp (AddrRegImm sp (ImmInt (-delta))),
3706                               DELTA (-delta)]
3707                | otherwise = nilOL
3708                where delta = stackDelta finalStack
3709         move_sp_up finalStack
3710                | delta > 64 =
3711                         toOL [ADD sp sp (RIImm (ImmInt delta)),
3712                               DELTA 0]
3713                | otherwise = nilOL
3714                where delta = stackDelta finalStack
3715
3716
3717         passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
3718         passArguments ((arg,I64):args) gprs fprs stackOffset
3719                accumCode accumUsed =
3720             do
3721                 ChildCode64 code vr_lo <- iselExpr64 arg
3722                 let vr_hi = getHiVRegFromLo vr_lo
3723
3724 #if darwin_TARGET_OS
3725                 passArguments args
3726                               (drop 2 gprs)
3727                               fprs
3728                               (stackOffset+8)
3729                               (accumCode `appOL` code
3730                                     `snocOL` storeWord vr_hi gprs stackOffset
3731                                     `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
3732                               ((take 2 gprs) ++ accumUsed)
3733             where
3734                 storeWord vr (gpr:_) offset = MR gpr vr
3735                 storeWord vr [] offset = ST I32 vr (AddrRegImm sp (ImmInt offset))
3736
3737 #elif linux_TARGET_OS
3738                 let stackOffset' = roundTo 8 stackOffset
3739                     stackCode = accumCode `appOL` code
3740                         `snocOL` ST I32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
3741                         `snocOL` ST I32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
3742                     regCode hireg loreg =
3743                         accumCode `appOL` code
3744                             `snocOL` MR hireg vr_hi
3745                             `snocOL` MR loreg vr_lo
3746
3747                 case gprs of
3748                     hireg : loreg : regs | even (length gprs) ->
3749                         passArguments args regs fprs stackOffset
3750                                       (regCode hireg loreg) (hireg : loreg : accumUsed)
3751                     _skipped : hireg : loreg : regs ->
3752                         passArguments args regs fprs stackOffset
3753                                       (regCode hireg loreg) (hireg : loreg : accumUsed)
3754                     _ -> -- only one or no regs left
3755                         passArguments args [] fprs (stackOffset'+8)
3756                                       stackCode accumUsed
3757 #endif
3758
3759         passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
3760             | reg : _ <- regs = do
3761                 register <- getRegister arg
3762                 let code = case register of
3763                             Fixed _ freg fcode -> fcode `snocOL` MR reg freg
3764                             Any _ acode -> acode reg
3765                 passArguments args
3766                               (drop nGprs gprs)
3767                               (drop nFprs fprs)
3768 #if darwin_TARGET_OS
3769         -- The Darwin ABI requires that we reserve stack slots for register parameters
3770                               (stackOffset + stackBytes)
3771 #elif linux_TARGET_OS
3772         -- ... the SysV ABI doesn't.
3773                               stackOffset
3774 #endif
3775                               (accumCode `appOL` code)
3776                               (reg : accumUsed)
3777             | otherwise = do
3778                 (vr, code) <- getSomeReg arg
3779                 passArguments args
3780                               (drop nGprs gprs)
3781                               (drop nFprs fprs)
3782                               (stackOffset' + stackBytes)
3783                               (accumCode `appOL` code `snocOL` ST rep vr stackSlot)
3784                               accumUsed
3785             where
3786 #if darwin_TARGET_OS
3787         -- stackOffset is at least 4-byte aligned
3788         -- The Darwin ABI is happy with that.
3789                 stackOffset' = stackOffset
3790 #else
3791         -- ... the SysV ABI requires 8-byte alignment for doubles.
3792                 stackOffset' | rep == F64 = roundTo 8 stackOffset
3793                              | otherwise  =           stackOffset
3794 #endif
3795                 stackSlot = AddrRegImm sp (ImmInt stackOffset')
3796                 (nGprs, nFprs, stackBytes, regs) = case rep of
3797                     I32 -> (1, 0, 4, gprs)
3798 #if darwin_TARGET_OS
3799         -- The Darwin ABI requires that we skip a corresponding number of GPRs when
3800         -- we use the FPRs.
3801                     F32 -> (1, 1, 4, fprs)
3802                     F64 -> (2, 1, 8, fprs)
3803 #elif linux_TARGET_OS
3804         -- ... the SysV ABI doesn't.
3805                     F32 -> (0, 1, 4, fprs)
3806                     F64 -> (0, 1, 8, fprs)
3807 #endif
3808
3809         moveResult reduceToF32 =
3810             case dest_regs of
3811                 [] -> nilOL
3812                 [CmmKinded dest _hint]
3813                     | reduceToF32 && rep == F32 -> unitOL (FRSP r_dest f1)
3814                     | rep == F32 || rep == F64 -> unitOL (MR r_dest f1)
3815                     | rep == I64 -> toOL [MR (getHiVRegFromLo r_dest) r3,
3816                                           MR r_dest r4]
3817                     | otherwise -> unitOL (MR r_dest r3)
3818                     where rep = cmmRegRep (CmmLocal dest)
3819                           r_dest = getRegisterReg (CmmLocal dest)
3820
3821         outOfLineFloatOp mop =
3822             do
3823                 dflags <- getDynFlagsNat
3824                 mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference $
3825                               mkForeignLabel functionName Nothing True
3826                 let mopLabelOrExpr = case mopExpr of
3827                         CmmLit (CmmLabel lbl) -> Left lbl
3828                         _ -> Right mopExpr
3829                 return (mopLabelOrExpr, reduce)
3830             where
3831                 (functionName, reduce) = case mop of
3832                     MO_F32_Exp   -> (fsLit "exp", True)
3833                     MO_F32_Log   -> (fsLit "log", True)
3834                     MO_F32_Sqrt  -> (fsLit "sqrt", True)
3835
3836                     MO_F32_Sin   -> (fsLit "sin", True)
3837                     MO_F32_Cos   -> (fsLit "cos", True)
3838                     MO_F32_Tan   -> (fsLit "tan", True)
3839
3840                     MO_F32_Asin  -> (fsLit "asin", True)
3841                     MO_F32_Acos  -> (fsLit "acos", True)
3842                     MO_F32_Atan  -> (fsLit "atan", True)
3843
3844                     MO_F32_Sinh  -> (fsLit "sinh", True)
3845                     MO_F32_Cosh  -> (fsLit "cosh", True)
3846                     MO_F32_Tanh  -> (fsLit "tanh", True)
3847                     MO_F32_Pwr   -> (fsLit "pow", True)
3848
3849                     MO_F64_Exp   -> (fsLit "exp", False)
3850                     MO_F64_Log   -> (fsLit "log", False)
3851                     MO_F64_Sqrt  -> (fsLit "sqrt", False)
3852
3853                     MO_F64_Sin   -> (fsLit "sin", False)
3854                     MO_F64_Cos   -> (fsLit "cos", False)
3855                     MO_F64_Tan   -> (fsLit "tan", False)
3856
3857                     MO_F64_Asin  -> (fsLit "asin", False)
3858                     MO_F64_Acos  -> (fsLit "acos", False)
3859                     MO_F64_Atan  -> (fsLit "atan", False)
3860
3861                     MO_F64_Sinh  -> (fsLit "sinh", False)
3862                     MO_F64_Cosh  -> (fsLit "cosh", False)
3863                     MO_F64_Tanh  -> (fsLit "tanh", False)
3864                     MO_F64_Pwr   -> (fsLit "pow", False)
3865                     other -> pprPanic "genCCall(ppc): unknown callish op"
3866                                     (pprCallishMachOp other)
3867
3868 #endif /* darwin_TARGET_OS || linux_TARGET_OS */
3869
3870 #endif /* powerpc_TARGET_ARCH */
3871
3872
3873 -- -----------------------------------------------------------------------------
3874 -- Generating a table-branch
3875
3876 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
3877
3878 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3879 genSwitch expr ids
3880   | opt_PIC
3881   = do
3882         (reg,e_code) <- getSomeReg expr
3883         lbl <- getNewLabelNat
3884         dflags <- getDynFlagsNat
3885         dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
3886         (tableReg,t_code) <- getSomeReg $ dynRef
3887         let
3888             jumpTable = map jumpTableEntryRel ids
3889
3890             jumpTableEntryRel Nothing
3891                 = CmmStaticLit (CmmInt 0 wordRep)
3892             jumpTableEntryRel (Just (BlockId id))
3893                 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3894                 where blockLabel = mkAsmTempLabel id
3895
3896             op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
3897                                        (EAIndex reg wORD_SIZE) (ImmInt 0))
3898
3899 #if x86_64_TARGET_ARCH
3900 #if darwin_TARGET_OS
3901     -- on Mac OS X/x86_64, put the jump table in the text section
3902     -- to work around a limitation of the linker.
3903     -- ld64 is unable to handle the relocations for
3904     --     .quad L1 - L0
3905     -- if L0 is not preceded by a non-anonymous label in its section.
3906
3907             code = e_code `appOL` t_code `appOL` toOL [
3908                             ADD wordRep op (OpReg tableReg),
3909                             JMP_TBL (OpReg tableReg) [ id | Just id <- ids ],
3910                             LDATA Text (CmmDataLabel lbl : jumpTable)
3911                     ]
3912 #else
3913     -- HACK: On x86_64 binutils<2.17 is only able to generate PC32
3914     -- relocations, hence we only get 32-bit offsets in the jump
3915     -- table. As these offsets are always negative we need to properly
3916     -- sign extend them to 64-bit. This hack should be removed in
3917     -- conjunction with the hack in PprMach.hs/pprDataItem once
3918     -- binutils 2.17 is standard.
3919             code = e_code `appOL` t_code `appOL` toOL [
3920                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3921                             MOVSxL I32
3922                                    (OpAddr (AddrBaseIndex (EABaseReg tableReg)
3923                                                           (EAIndex reg wORD_SIZE) (ImmInt 0)))
3924                                    (OpReg reg),
3925                             ADD wordRep (OpReg reg) (OpReg tableReg),
3926                             JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
3927                    ]
3928 #endif
3929 #else
3930             code = e_code `appOL` t_code `appOL` toOL [
3931                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3932                             ADD wordRep op (OpReg tableReg),
3933                             JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
3934                     ]
3935 #endif
3936         return code
3937   | otherwise
3938   = do
3939         (reg,e_code) <- getSomeReg expr
3940         lbl <- getNewLabelNat
3941         let
3942             jumpTable = map jumpTableEntry ids
3943             op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
3944             code = e_code `appOL` toOL [
3945                     LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3946                     JMP_TBL op [ id | Just id <- ids ]
3947                  ]
3948         -- in
3949         return code
3950 #elif powerpc_TARGET_ARCH
3951 genSwitch expr ids
3952   | opt_PIC
3953   = do
3954         (reg,e_code) <- getSomeReg expr
3955         tmp <- getNewRegNat I32
3956         lbl <- getNewLabelNat
3957         dflags <- getDynFlagsNat
3958         dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
3959         (tableReg,t_code) <- getSomeReg $ dynRef
3960         let
3961             jumpTable = map jumpTableEntryRel ids
3962
3963             jumpTableEntryRel Nothing
3964                 = CmmStaticLit (CmmInt 0 wordRep)
3965             jumpTableEntryRel (Just (BlockId id))
3966                 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3967                 where blockLabel = mkAsmTempLabel id
3968
3969             code = e_code `appOL` t_code `appOL` toOL [
3970                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3971                             SLW tmp reg (RIImm (ImmInt 2)),
3972                             LD I32 tmp (AddrRegReg tableReg tmp),
3973                             ADD tmp tmp (RIReg tableReg),
3974                             MTCTR tmp,
3975                             BCTR [ id | Just id <- ids ]
3976                     ]
3977         return code
3978   | otherwise
3979   = do
3980         (reg,e_code) <- getSomeReg expr
3981         tmp <- getNewRegNat I32
3982         lbl <- getNewLabelNat
3983         let
3984             jumpTable = map jumpTableEntry ids
3985
3986             code = e_code `appOL` toOL [
3987                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3988                             SLW tmp reg (RIImm (ImmInt 2)),
3989                             ADDIS tmp tmp (HA (ImmCLbl lbl)),
3990                             LD I32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
3991                             MTCTR tmp,
3992                             BCTR [ id | Just id <- ids ]
3993                     ]
3994         return code
3995 #else
3996 #error "ToDo: genSwitch"
3997 #endif
3998
3999 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordRep)
4000 jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
4001     where blockLabel = mkAsmTempLabel id
4002
4003 -- -----------------------------------------------------------------------------
4004 -- Support bits
4005 -- -----------------------------------------------------------------------------
4006
4007
4008 -- -----------------------------------------------------------------------------
4009 -- 'condIntReg' and 'condFltReg': condition codes into registers
4010
4011 -- Turn those condition codes into integers now (when they appear on
4012 -- the right hand side of an assignment).
4013 --
4014 -- (If applicable) Do not fill the delay slots here; you will confuse the
4015 -- register allocator.
4016
4017 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
4018
4019 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4020
4021 #if alpha_TARGET_ARCH
4022 condIntReg = panic "MachCode.condIntReg (not on Alpha)"
4023 condFltReg = panic "MachCode.condFltReg (not on Alpha)"
4024 #endif /* alpha_TARGET_ARCH */
4025
4026 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4027
4028 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4029
4030 condIntReg cond x y = do
4031   CondCode _ cond cond_code <- condIntCode cond x y
4032   tmp <- getNewRegNat I8
4033   let
4034         code dst = cond_code `appOL` toOL [
4035                     SETCC cond (OpReg tmp),
4036                     MOVZxL I8 (OpReg tmp) (OpReg dst)
4037                   ]
4038   -- in
4039   return (Any I32 code)
4040
4041 #endif
4042
4043 #if i386_TARGET_ARCH
4044
4045 condFltReg cond x y = do
4046   CondCode _ cond cond_code <- condFltCode cond x y
4047   tmp <- getNewRegNat I8
4048   let
4049         code dst = cond_code `appOL` toOL [
4050                     SETCC cond (OpReg tmp),
4051                     MOVZxL I8 (OpReg tmp) (OpReg dst)
4052                   ]
4053   -- in
4054   return (Any I32 code)
4055
4056 #endif
4057
4058 #if x86_64_TARGET_ARCH
4059
4060 condFltReg cond x y = do
4061   CondCode _ cond cond_code <- condFltCode cond x y
4062   tmp1 <- getNewRegNat wordRep
4063   tmp2 <- getNewRegNat wordRep
4064   let
4065         -- We have to worry about unordered operands (eg. comparisons
4066         -- against NaN).  If the operands are unordered, the comparison
4067         -- sets the parity flag, carry flag and zero flag.
4068         -- All comparisons are supposed to return false for unordered
4069         -- operands except for !=, which returns true.
4070         --
4071         -- Optimisation: we don't have to test the parity flag if we
4072         -- know the test has already excluded the unordered case: eg >
4073         -- and >= test for a zero carry flag, which can only occur for
4074         -- ordered operands.
4075         --
4076         -- ToDo: by reversing comparisons we could avoid testing the
4077         -- parity flag in more cases.
4078
4079         code dst =
4080            cond_code `appOL`
4081              (case cond of
4082                 NE  -> or_unordered dst
4083                 GU  -> plain_test   dst
4084                 GEU -> plain_test   dst
4085                 _   -> and_ordered  dst)
4086
4087         plain_test dst = toOL [
4088                     SETCC cond (OpReg tmp1),
4089                     MOVZxL I8 (OpReg tmp1) (OpReg dst)
4090                  ]
4091         or_unordered dst = toOL [
4092                     SETCC cond (OpReg tmp1),
4093                     SETCC PARITY (OpReg tmp2),
4094                     OR I8 (OpReg tmp1) (OpReg tmp2),
4095                     MOVZxL I8 (OpReg tmp2) (OpReg dst)
4096                   ]
4097         and_ordered dst = toOL [
4098                     SETCC cond (OpReg tmp1),
4099                     SETCC NOTPARITY (OpReg tmp2),
4100                     AND I8 (OpReg tmp1) (OpReg tmp2),
4101                     MOVZxL I8 (OpReg tmp2) (OpReg dst)
4102                   ]
4103   -- in
4104   return (Any I32 code)
4105
4106 #endif
4107
4108 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4109
4110 #if sparc_TARGET_ARCH
4111
4112 condIntReg EQQ x (CmmLit (CmmInt 0 d)) = do
4113     (src, code) <- getSomeReg x
4114     tmp <- getNewRegNat I32
4115     let
4116         code__2 dst = code `appOL` toOL [
4117             SUB False True g0 (RIReg src) g0,
4118             SUB True False g0 (RIImm (ImmInt (-1))) dst]
4119     return (Any I32 code__2)
4120
4121 condIntReg EQQ x y = do
4122     (src1, code1) <- getSomeReg x
4123     (src2, code2) <- getSomeReg y
4124     tmp1 <- getNewRegNat I32
4125     tmp2 <- getNewRegNat I32
4126     let
4127         code__2 dst = code1 `appOL` code2 `appOL` toOL [
4128             XOR False src1 (RIReg src2) dst,
4129             SUB False True g0 (RIReg dst) g0,
4130             SUB True False g0 (RIImm (ImmInt (-1))) dst]
4131     return (Any I32 code__2)
4132
4133 condIntReg NE x (CmmLit (CmmInt 0 d)) = do
4134     (src, code) <- getSomeReg x
4135     tmp <- getNewRegNat I32
4136     let
4137         code__2 dst = code `appOL` toOL [
4138             SUB False True g0 (RIReg src) g0,
4139             ADD True False g0 (RIImm (ImmInt 0)) dst]
4140     return (Any I32 code__2)
4141
4142 condIntReg NE x y = do
4143     (src1, code1) <- getSomeReg x
4144     (src2, code2) <- getSomeReg y
4145     tmp1 <- getNewRegNat I32
4146     tmp2 <- getNewRegNat I32
4147     let
4148         code__2 dst = code1 `appOL` code2 `appOL` toOL [
4149             XOR False src1 (RIReg src2) dst,
4150             SUB False True g0 (RIReg dst) g0,
4151             ADD True False g0 (RIImm (ImmInt 0)) dst]
4152     return (Any I32 code__2)
4153
4154 condIntReg cond x y = do
4155     BlockId lbl1 <- getBlockIdNat
4156     BlockId lbl2 <- getBlockIdNat
4157     CondCode _ cond cond_code <- condIntCode cond x y
4158     let
4159         code__2 dst = cond_code `appOL` toOL [
4160             BI cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
4161             OR False g0 (RIImm (ImmInt 0)) dst,
4162             BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
4163             NEWBLOCK (BlockId lbl1),
4164             OR False g0 (RIImm (ImmInt 1)) dst,
4165             NEWBLOCK (BlockId lbl2)]
4166     return (Any I32 code__2)
4167
4168 condFltReg cond x y = do
4169     BlockId lbl1 <- getBlockIdNat
4170     BlockId lbl2 <- getBlockIdNat
4171     CondCode _ cond cond_code <- condFltCode cond x y
4172     let
4173         code__2 dst = cond_code `appOL` toOL [
4174             NOP,
4175             BF cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
4176             OR False g0 (RIImm (ImmInt 0)) dst,
4177             BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
4178             NEWBLOCK (BlockId lbl1),
4179             OR False g0 (RIImm (ImmInt 1)) dst,
4180             NEWBLOCK (BlockId lbl2)]
4181     return (Any I32 code__2)
4182
4183 #endif /* sparc_TARGET_ARCH */
4184
4185 #if powerpc_TARGET_ARCH
4186 condReg getCond = do
4187     lbl1 <- getBlockIdNat
4188     lbl2 <- getBlockIdNat
4189     CondCode _ cond cond_code <- getCond
4190     let
4191 {-        code dst = cond_code `appOL` toOL [
4192                 BCC cond lbl1,
4193                 LI dst (ImmInt 0),
4194                 BCC ALWAYS lbl2,
4195                 NEWBLOCK lbl1,
4196                 LI dst (ImmInt 1),
4197                 BCC ALWAYS lbl2,
4198                 NEWBLOCK lbl2
4199             ]-}
4200         code dst = cond_code
4201             `appOL` negate_code
4202             `appOL` toOL [
4203                 MFCR dst,
4204                 RLWINM dst dst (bit + 1) 31 31
4205             ]
4206
4207         negate_code | do_negate = unitOL (CRNOR bit bit bit)
4208                     | otherwise = nilOL
4209
4210         (bit, do_negate) = case cond of
4211             LTT -> (0, False)
4212             LE  -> (1, True)
4213             EQQ -> (2, False)
4214             GE  -> (0, True)
4215             GTT -> (1, False)
4216
4217             NE  -> (2, True)
4218
4219             LU  -> (0, False)
4220             LEU -> (1, True)
4221             GEU -> (0, True)
4222             GU  -> (1, False)
4223
4224     return (Any I32 code)
4225
4226 condIntReg cond x y = condReg (condIntCode cond x y)
4227 condFltReg cond x y = condReg (condFltCode cond x y)
4228 #endif /* powerpc_TARGET_ARCH */
4229
4230
4231 -- -----------------------------------------------------------------------------
4232 -- 'trivial*Code': deal with trivial instructions
4233
4234 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
4235 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
4236 -- Only look for constants on the right hand side, because that's
4237 -- where the generic optimizer will have put them.
4238
4239 -- Similarly, for unary instructions, we don't have to worry about
4240 -- matching an StInt as the argument, because genericOpt will already
4241 -- have handled the constant-folding.
4242
4243 trivialCode
4244     :: MachRep
4245     -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
4246       ,IF_ARCH_i386 ((Operand -> Operand -> Instr)
4247                      -> Maybe (Operand -> Operand -> Instr)
4248       ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr)
4249                      -> Maybe (Operand -> Operand -> Instr)
4250       ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
4251       ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
4252       ,)))))
4253     -> CmmExpr -> CmmExpr -- the two arguments
4254     -> NatM Register
4255
4256 #ifndef powerpc_TARGET_ARCH
4257 trivialFCode
4258     :: MachRep
4259     -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
4260       ,IF_ARCH_sparc((MachRep -> Reg -> Reg -> Reg -> Instr)
4261       ,IF_ARCH_i386 ((MachRep -> Reg -> Reg -> Reg -> Instr)
4262       ,IF_ARCH_x86_64 ((MachRep -> Operand -> Operand -> Instr)
4263       ,))))
4264     -> CmmExpr -> CmmExpr -- the two arguments
4265     -> NatM Register
4266 #endif
4267
4268 trivialUCode
4269     :: MachRep
4270     -> IF_ARCH_alpha((RI -> Reg -> Instr)
4271       ,IF_ARCH_i386 ((Operand -> Instr)
4272       ,IF_ARCH_x86_64 ((Operand -> Instr)
4273       ,IF_ARCH_sparc((RI -> Reg -> Instr)
4274       ,IF_ARCH_powerpc((Reg -> Reg -> Instr)
4275       ,)))))
4276     -> CmmExpr  -- the one argument
4277     -> NatM Register
4278
4279 #ifndef powerpc_TARGET_ARCH
4280 trivialUFCode
4281     :: MachRep
4282     -> IF_ARCH_alpha((Reg -> Reg -> Instr)
4283       ,IF_ARCH_i386 ((Reg -> Reg -> Instr)
4284       ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr)
4285       ,IF_ARCH_sparc((Reg -> Reg -> Instr)
4286       ,))))
4287     -> CmmExpr -- the one argument
4288     -> NatM Register
4289 #endif
4290
4291 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4292
4293 #if alpha_TARGET_ARCH
4294
4295 trivialCode instr x (StInt y)
4296   | fits8Bits y
4297   = getRegister x               `thenNat` \ register ->
4298     getNewRegNat IntRep         `thenNat` \ tmp ->
4299     let
4300         code = registerCode register tmp
4301         src1 = registerName register tmp
4302         src2 = ImmInt (fromInteger y)
4303         code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst)
4304     in
4305     return (Any IntRep code__2)
4306
4307 trivialCode instr x y
4308   = getRegister x               `thenNat` \ register1 ->
4309     getRegister y               `thenNat` \ register2 ->
4310     getNewRegNat IntRep         `thenNat` \ tmp1 ->
4311     getNewRegNat IntRep         `thenNat` \ tmp2 ->
4312     let
4313         code1 = registerCode register1 tmp1 []
4314         src1  = registerName register1 tmp1
4315         code2 = registerCode register2 tmp2 []
4316         src2  = registerName register2 tmp2
4317         code__2 dst = asmSeqThen [code1, code2] .
4318                      mkSeqInstr (instr src1 (RIReg src2) dst)
4319     in
4320     return (Any IntRep code__2)
4321
4322 ------------
4323 trivialUCode instr x
4324   = getRegister x               `thenNat` \ register ->
4325     getNewRegNat IntRep         `thenNat` \ tmp ->
4326     let
4327         code = registerCode register tmp
4328         src  = registerName register tmp
4329         code__2 dst = code . mkSeqInstr (instr (RIReg src) dst)
4330     in
4331     return (Any IntRep code__2)
4332
4333 ------------
4334 trivialFCode _ instr x y
4335   = getRegister x               `thenNat` \ register1 ->
4336     getRegister y               `thenNat` \ register2 ->
4337     getNewRegNat F64    `thenNat` \ tmp1 ->
4338     getNewRegNat F64    `thenNat` \ tmp2 ->
4339     let
4340         code1 = registerCode register1 tmp1
4341         src1  = registerName register1 tmp1
4342
4343         code2 = registerCode register2 tmp2
4344         src2  = registerName register2 tmp2
4345
4346         code__2 dst = asmSeqThen [code1 [], code2 []] .
4347                       mkSeqInstr (instr src1 src2 dst)
4348     in
4349     return (Any F64 code__2)
4350
4351 trivialUFCode _ instr x
4352   = getRegister x               `thenNat` \ register ->
4353     getNewRegNat F64    `thenNat` \ tmp ->
4354     let
4355         code = registerCode register tmp
4356         src  = registerName register tmp
4357         code__2 dst = code . mkSeqInstr (instr src dst)
4358     in
4359     return (Any F64 code__2)
4360
4361 #endif /* alpha_TARGET_ARCH */
4362
4363 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4364
4365 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4366
4367 {-
4368 The Rules of the Game are:
4369
4370 * You cannot assume anything about the destination register dst;
4371   it may be anything, including a fixed reg.
4372
4373 * You may compute an operand into a fixed reg, but you may not
4374   subsequently change the contents of that fixed reg.  If you
4375   want to do so, first copy the value either to a temporary
4376   or into dst.  You are free to modify dst even if it happens
4377   to be a fixed reg -- that's not your problem.
4378
4379 * You cannot assume that a fixed reg will stay live over an
4380   arbitrary computation.  The same applies to the dst reg.
4381
4382 * Temporary regs obtained from getNewRegNat are distinct from
4383   each other and from all other regs, and stay live over
4384   arbitrary computations.
4385
4386 --------------------
4387
4388 SDM's version of The Rules:
4389
4390 * If getRegister returns Any, that means it can generate correct
4391   code which places the result in any register, period.  Even if that
4392   register happens to be read during the computation.
4393
4394   Corollary #1: this means that if you are generating code for an
4395   operation with two arbitrary operands, you cannot assign the result
4396   of the first operand into the destination register before computing
4397   the second operand.  The second operand might require the old value
4398   of the destination register.
4399
4400   Corollary #2: A function might be able to generate more efficient
4401   code if it knows the destination register is a new temporary (and
4402   therefore not read by any of the sub-computations).
4403
4404 * If getRegister returns Any, then the code it generates may modify only:
4405         (a) fresh temporaries
4406         (b) the destination register
4407         (c) known registers (eg. %ecx is used by shifts)
4408   In particular, it may *not* modify global registers, unless the global
4409   register happens to be the destination register.
4410 -}
4411
4412 trivialCode rep instr (Just revinstr) (CmmLit lit_a) b
4413   | not (is64BitLit lit_a) = do
4414   b_code <- getAnyReg b
4415   let
4416        code dst
4417          = b_code dst `snocOL`
4418            revinstr (OpImm (litToImm lit_a)) (OpReg dst)
4419   -- in
4420   return (Any rep code)
4421
4422 trivialCode rep instr maybe_revinstr a b = genTrivialCode rep instr a b
4423
4424 -- This is re-used for floating pt instructions too.
4425 genTrivialCode rep instr a b = do
4426   (b_op, b_code) <- getNonClobberedOperand b
4427   a_code <- getAnyReg a
4428   tmp <- getNewRegNat rep
4429   let
4430      -- We want the value of b to stay alive across the computation of a.
4431      -- But, we want to calculate a straight into the destination register,
4432      -- because the instruction only has two operands (dst := dst `op` src).
4433      -- The troublesome case is when the result of b is in the same register
4434      -- as the destination reg.  In this case, we have to save b in a
4435      -- new temporary across the computation of a.
4436      code dst
4437         | dst `regClashesWithOp` b_op =
4438                 b_code `appOL`
4439                 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
4440                 a_code dst `snocOL`
4441                 instr (OpReg tmp) (OpReg dst)
4442         | otherwise =
4443                 b_code `appOL`
4444                 a_code dst `snocOL`
4445                 instr b_op (OpReg dst)
4446   -- in
4447   return (Any rep code)
4448
4449 reg `regClashesWithOp` OpReg reg2   = reg == reg2
4450 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
4451 reg `regClashesWithOp` _            = False
4452
4453 -----------
4454
4455 trivialUCode rep instr x = do
4456   x_code <- getAnyReg x
4457   let
4458      code dst =
4459         x_code dst `snocOL`
4460         instr (OpReg dst)
4461   -- in
4462   return (Any rep code)
4463
4464 -----------
4465
4466 #if i386_TARGET_ARCH
4467
4468 trivialFCode pk instr x y = do
4469   (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
4470   (y_reg, y_code) <- getSomeReg y
4471   let
4472      code dst =
4473         x_code `appOL`
4474         y_code `snocOL`
4475         instr pk x_reg y_reg dst
4476   -- in
4477   return (Any pk code)
4478
4479 #endif
4480
4481 #if x86_64_TARGET_ARCH
4482
4483 trivialFCode pk instr x y = genTrivialCode  pk (instr pk) x y
4484
4485 #endif
4486
4487 -------------
4488
4489 trivialUFCode rep instr x = do
4490   (x_reg, x_code) <- getSomeReg x
4491   let
4492      code dst =
4493         x_code `snocOL`
4494         instr x_reg dst
4495   -- in
4496   return (Any rep code)
4497
4498 #endif /* i386_TARGET_ARCH */
4499
4500 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4501
4502 #if sparc_TARGET_ARCH
4503
4504 trivialCode pk instr x (CmmLit (CmmInt y d))
4505   | fits13Bits y
4506   = do
4507       (src1, code) <- getSomeReg x
4508       tmp <- getNewRegNat I32
4509       let
4510         src2 = ImmInt (fromInteger y)
4511         code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
4512       return (Any I32 code__2)
4513
4514 trivialCode pk instr x y = do
4515     (src1, code1) <- getSomeReg x
4516     (src2, code2) <- getSomeReg y
4517     tmp1 <- getNewRegNat I32
4518     tmp2 <- getNewRegNat I32
4519     let
4520         code__2 dst = code1 `appOL` code2 `snocOL`
4521                       instr src1 (RIReg src2) dst
4522     return (Any I32 code__2)
4523
4524 ------------
4525 trivialFCode pk instr x y = do
4526     (src1, code1) <- getSomeReg x
4527     (src2, code2) <- getSomeReg y
4528     tmp1 <- getNewRegNat (cmmExprRep x)
4529     tmp2 <- getNewRegNat (cmmExprRep y)
4530     tmp <- getNewRegNat F64
4531     let
4532         promote x = FxTOy F32 F64 x tmp
4533
4534         pk1   = cmmExprRep x
4535         pk2   = cmmExprRep y
4536
4537         code__2 dst =
4538                 if pk1 == pk2 then
4539                     code1 `appOL` code2 `snocOL`
4540                     instr pk src1 src2 dst
4541                 else if pk1 == F32 then
4542                     code1 `snocOL` promote src1 `appOL` code2 `snocOL`
4543                     instr F64 tmp src2 dst
4544                 else
4545                     code1 `appOL` code2 `snocOL` promote src2 `snocOL`
4546                     instr F64 src1 tmp dst
4547     return (Any (if pk1 == pk2 then pk1 else F64) code__2)
4548
4549 ------------
4550 trivialUCode pk instr x = do
4551     (src, code) <- getSomeReg x
4552     tmp <- getNewRegNat pk
4553     let
4554         code__2 dst = code `snocOL` instr (RIReg src) dst
4555     return (Any pk code__2)
4556
4557 -------------
4558 trivialUFCode pk instr x = do
4559     (src, code) <- getSomeReg x
4560     tmp <- getNewRegNat pk
4561     let
4562         code__2 dst = code `snocOL` instr src dst
4563     return (Any pk code__2)
4564
4565 #endif /* sparc_TARGET_ARCH */
4566
4567 #if powerpc_TARGET_ARCH
4568
4569 {-
4570 Wolfgang's PowerPC version of The Rules:
4571
4572 A slightly modified version of The Rules to take advantage of the fact
4573 that PowerPC instructions work on all registers and don't implicitly
4574 clobber any fixed registers.
4575
4576 * The only expression for which getRegister returns Fixed is (CmmReg reg).
4577
4578 * If getRegister returns Any, then the code it generates may modify only:
4579         (a) fresh temporaries
4580         (b) the destination register
4581   It may *not* modify global registers, unless the global
4582   register happens to be the destination register.
4583   It may not clobber any other registers. In fact, only ccalls clobber any
4584   fixed registers.
4585   Also, it may not modify the counter register (used by genCCall).
4586
4587   Corollary: If a getRegister for a subexpression returns Fixed, you need
4588   not move it to a fresh temporary before evaluating the next subexpression.
4589   The Fixed register won't be modified.
4590   Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
4591
4592 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
4593   the value of the destination register.
4594 -}
4595
4596 trivialCode rep signed instr x (CmmLit (CmmInt y _))
4597     | Just imm <- makeImmediate rep signed y
4598     = do
4599         (src1, code1) <- getSomeReg x
4600         let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
4601         return (Any rep code)
4602
4603 trivialCode rep signed instr x y = do
4604     (src1, code1) <- getSomeReg x
4605     (src2, code2) <- getSomeReg y
4606     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
4607     return (Any rep code)
4608
4609 trivialCodeNoImm :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4610     -> CmmExpr -> CmmExpr -> NatM Register
4611 trivialCodeNoImm rep instr x y = do
4612     (src1, code1) <- getSomeReg x
4613     (src2, code2) <- getSomeReg y
4614     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
4615     return (Any rep code)
4616
4617 trivialUCode rep instr x = do
4618     (src, code) <- getSomeReg x
4619     let code' dst = code `snocOL` instr dst src
4620     return (Any rep code')
4621
4622 -- There is no "remainder" instruction on the PPC, so we have to do
4623 -- it the hard way.
4624 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
4625
4626 remainderCode :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4627     -> CmmExpr -> CmmExpr -> NatM Register
4628 remainderCode rep div x y = do
4629     (src1, code1) <- getSomeReg x
4630     (src2, code2) <- getSomeReg y
4631     let code dst = code1 `appOL` code2 `appOL` toOL [
4632                 div dst src1 src2,
4633                 MULLW dst dst (RIReg src2),
4634                 SUBF dst dst src1
4635             ]
4636     return (Any rep code)
4637
4638 #endif /* powerpc_TARGET_ARCH */
4639
4640
4641 -- -----------------------------------------------------------------------------
4642 --  Coercing to/from integer/floating-point...
4643
4644 -- When going to integer, we truncate (round towards 0).
4645
4646 -- @coerce(Int2FP|FP2Int)@ are more complicated integer/float
4647 -- conversions.  We have to store temporaries in memory to move
4648 -- between the integer and the floating point register sets.
4649
4650 -- @coerceDbl2Flt@ and @coerceFlt2Dbl@ are done this way because we
4651 -- pretend, on sparc at least, that double and float regs are seperate
4652 -- kinds, so the value has to be computed into one kind before being
4653 -- explicitly "converted" to live in the other kind.
4654
4655 coerceInt2FP :: MachRep -> MachRep -> CmmExpr -> NatM Register
4656 coerceFP2Int :: MachRep -> MachRep -> CmmExpr -> NatM Register
4657
4658 #if sparc_TARGET_ARCH
4659 coerceDbl2Flt :: CmmExpr -> NatM Register
4660 coerceFlt2Dbl :: CmmExpr -> NatM Register
4661 #endif
4662
4663 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4664
4665 #if alpha_TARGET_ARCH
4666
4667 coerceInt2FP _ x
4668   = getRegister x               `thenNat` \ register ->
4669     getNewRegNat IntRep         `thenNat` \ reg ->
4670     let
4671         code = registerCode register reg
4672         src  = registerName register reg
4673
4674         code__2 dst = code . mkSeqInstrs [
4675             ST Q src (spRel 0),
4676             LD TF dst (spRel 0),
4677             CVTxy Q TF dst dst]
4678     in
4679     return (Any F64 code__2)
4680
4681 -------------
4682 coerceFP2Int x
4683   = getRegister x               `thenNat` \ register ->
4684     getNewRegNat F64    `thenNat` \ tmp ->
4685     let
4686         code = registerCode register tmp
4687         src  = registerName register tmp
4688
4689         code__2 dst = code . mkSeqInstrs [
4690             CVTxy TF Q src tmp,
4691             ST TF tmp (spRel 0),
4692             LD Q dst (spRel 0)]
4693     in
4694     return (Any IntRep code__2)
4695
4696 #endif /* alpha_TARGET_ARCH */
4697
4698 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4699
4700 #if i386_TARGET_ARCH
4701
4702 coerceInt2FP from to x = do
4703   (x_reg, x_code) <- getSomeReg x
4704   let
4705         opc  = case to of F32 -> GITOF; F64 -> GITOD
4706         code dst = x_code `snocOL` opc x_reg dst
4707         -- ToDo: works for non-I32 reps?
4708   -- in
4709   return (Any to code)
4710
4711 ------------
4712
4713 coerceFP2Int from to x = do
4714   (x_reg, x_code) <- getSomeReg x
4715   let
4716         opc  = case from of F32 -> GFTOI; F64 -> GDTOI
4717         code dst = x_code `snocOL` opc x_reg dst
4718         -- ToDo: works for non-I32 reps?
4719   -- in
4720   return (Any to code)
4721
4722 #endif /* i386_TARGET_ARCH */
4723
4724 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4725
4726 #if x86_64_TARGET_ARCH
4727
4728 coerceFP2Int from to x = do
4729   (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
4730   let
4731         opc  = case from of F32 -> CVTTSS2SIQ; F64 -> CVTTSD2SIQ
4732         code dst = x_code `snocOL` opc x_op dst
4733   -- in
4734   return (Any to code) -- works even if the destination rep is <I32
4735
4736 coerceInt2FP from to x = do
4737   (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
4738   let
4739         opc  = case to of F32 -> CVTSI2SS; F64 -> CVTSI2SD
4740         code dst = x_code `snocOL` opc x_op dst
4741   -- in
4742   return (Any to code) -- works even if the destination rep is <I32
4743
4744 coerceFP2FP :: MachRep -> CmmExpr -> NatM Register
4745 coerceFP2FP to x = do
4746   (x_reg, x_code) <- getSomeReg x
4747   let
4748         opc  = case to of F32 -> CVTSD2SS; F64 -> CVTSS2SD
4749         code dst = x_code `snocOL` opc x_reg dst
4750   -- in
4751   return (Any to code)
4752
4753 #endif
4754
4755 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4756
4757 #if sparc_TARGET_ARCH
4758
4759 coerceInt2FP pk1 pk2 x = do
4760     (src, code) <- getSomeReg x
4761     let
4762         code__2 dst = code `appOL` toOL [
4763             ST pk1 src (spRel (-2)),
4764             LD pk1 (spRel (-2)) dst,
4765             FxTOy pk1 pk2 dst dst]
4766     return (Any pk2 code__2)
4767
4768 ------------
4769 coerceFP2Int pk fprep x = do
4770     (src, code) <- getSomeReg x
4771     reg <- getNewRegNat fprep
4772     tmp <- getNewRegNat pk
4773     let
4774         code__2 dst = ASSERT(fprep == F64 || fprep == F32)
4775             code `appOL` toOL [
4776             FxTOy fprep pk src tmp,
4777             ST pk tmp (spRel (-2)),
4778             LD pk (spRel (-2)) dst]
4779     return (Any pk code__2)
4780
4781 ------------
4782 coerceDbl2Flt x = do
4783     (src, code) <- getSomeReg x
4784     return (Any F32 (\dst -> code `snocOL` FxTOy F64 F32 src dst))
4785
4786 ------------
4787 coerceFlt2Dbl x = do
4788     (src, code) <- getSomeReg x
4789     return (Any F64 (\dst -> code `snocOL` FxTOy F32 F64 src dst))
4790
4791 #endif /* sparc_TARGET_ARCH */
4792
4793 #if powerpc_TARGET_ARCH
4794 coerceInt2FP fromRep toRep x = do
4795     (src, code) <- getSomeReg x
4796     lbl <- getNewLabelNat
4797     itmp <- getNewRegNat I32
4798     ftmp <- getNewRegNat F64
4799     dflags <- getDynFlagsNat
4800     dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
4801     Amode addr addr_code <- getAmode dynRef
4802     let
4803         code' dst = code `appOL` maybe_exts `appOL` toOL [
4804                 LDATA ReadOnlyData
4805                                 [CmmDataLabel lbl,
4806                                  CmmStaticLit (CmmInt 0x43300000 I32),
4807                                  CmmStaticLit (CmmInt 0x80000000 I32)],
4808                 XORIS itmp src (ImmInt 0x8000),
4809                 ST I32 itmp (spRel 3),
4810                 LIS itmp (ImmInt 0x4330),
4811                 ST I32 itmp (spRel 2),
4812                 LD F64 ftmp (spRel 2)
4813             ] `appOL` addr_code `appOL` toOL [
4814                 LD F64 dst addr,
4815                 FSUB F64 dst ftmp dst
4816             ] `appOL` maybe_frsp dst
4817
4818         maybe_exts = case fromRep of
4819                         I8 ->  unitOL $ EXTS I8 src src
4820                         I16 -> unitOL $ EXTS I16 src src
4821                         I32 -> nilOL
4822         maybe_frsp dst = case toRep of
4823                         F32 -> unitOL $ FRSP dst dst
4824                         F64 -> nilOL
4825     return (Any toRep code')
4826
4827 coerceFP2Int fromRep toRep x = do
4828     -- the reps don't really matter: F*->F64 and I32->I* are no-ops
4829     (src, code) <- getSomeReg x
4830     tmp <- getNewRegNat F64
4831     let
4832         code' dst = code `appOL` toOL [
4833                 -- convert to int in FP reg
4834             FCTIWZ tmp src,
4835                 -- store value (64bit) from FP to stack
4836             ST F64 tmp (spRel 2),
4837                 -- read low word of value (high word is undefined)
4838             LD I32 dst (spRel 3)]
4839     return (Any toRep code')
4840 #endif /* powerpc_TARGET_ARCH */
4841
4842
4843 -- -----------------------------------------------------------------------------
4844 -- eXTRA_STK_ARGS_HERE
4845
4846 -- We (allegedly) put the first six C-call arguments in registers;
4847 -- where do we start putting the rest of them?
4848
4849 -- Moved from MachInstrs (SDM):
4850
4851 #if alpha_TARGET_ARCH || sparc_TARGET_ARCH
4852 eXTRA_STK_ARGS_HERE :: Int
4853 eXTRA_STK_ARGS_HERE
4854   = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???))
4855 #endif
4856