compiler/nativeGen/PPC/CodeGen.hs

   1 {-# OPTIONS -w #-}
   2
   3 -----------------------------------------------------------------------------
   4 --
   5 -- Generating machine code (instruction selection)
   6 --
   7 -- (c) The University of Glasgow 1996-2004
   8 --
   9 -----------------------------------------------------------------------------
  10
  11 -- This is a big module, but, if you pay attention to
  12 -- (a) the sectioning, (b) the type signatures, and
  13 -- (c) the #if blah_TARGET_ARCH} things, the
  14 -- structure should not be too overwhelming.
  15
  16 module PPC.CodeGen (
  17         cmmTopCodeGen,
  18         InstrBlock
  19 )
  20
  21 where
  22
  23 #include "HsVersions.h"
  24 #include "nativeGen/NCG.h"
  25 #include "../includes/MachDeps.h"
  26
  27 -- NCG stuff:
  28 import PPC.Instr
  29 import PPC.Cond
  30 import PPC.Regs
  31 import PPC.RegInfo
  32 import NCGMonad
  33 import Instruction
  34 import PIC
  35 import Size
  36 import RegClass
  37 import Reg
  38 import TargetReg
  39 import Platform
  40
  41 -- Our intermediate code:
  42 import BlockId
  43 import PprCmm           ( pprExpr )
  44 import Cmm
  45 import CLabel
  46
  47 -- The rest:
  48 import StaticFlags      ( opt_PIC )
  49 import OrdList
  50 import qualified Outputable as O
  51 import Outputable
  52 import DynFlags
  53
  54 import Control.Monad    ( mapAndUnzipM )
  55 import Data.Bits
  56 import Data.Int
  57 import Data.Word
  58
  59 #if darwin_TARGET_OS || linux_TARGET_OS
  60 import BasicTypes
  61 import FastString
  62 #endif
  63
  64 -- -----------------------------------------------------------------------------
  65 -- Top-level of the instruction selector
  66
  67 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
  68 -- They are really trees of insns to facilitate fast appending, where a
  69 -- left-to-right traversal (pre-order?) yields the insns in the correct
  70 -- order.
  71
  72 cmmTopCodeGen
  73         :: DynFlags
  74         -> RawCmmTop
  75         -> NatM [NatCmmTop Instr]
  76
  77 cmmTopCodeGen dflags (CmmProc info lab params (ListGraph blocks)) = do
  78   (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
  79   picBaseMb <- getPicBaseMaybeNat
  80   let proc = CmmProc info lab params (ListGraph $ concat nat_blocks)
  81       tops = proc : concat statics
  82       os   = platformOS $ targetPlatform dflags
  83   case picBaseMb of
  84       Just picBase -> initializePicBase_ppc ArchPPC os picBase tops
  85       Nothing -> return tops
  86
  87 cmmTopCodeGen dflags (CmmData sec dat) = do
  88   return [CmmData sec dat]  -- no translation, we just use CmmStatic
  89
  90 basicBlockCodeGen
  91         :: CmmBasicBlock
  92         -> NatM ( [NatBasicBlock Instr]
  93                 , [NatCmmTop Instr])
  94
  95 basicBlockCodeGen (BasicBlock id stmts) = do
  96   instrs <- stmtsToInstrs stmts
  97   -- code generation may introduce new basic block boundaries, which
  98   -- are indicated by the NEWBLOCK instruction.  We must split up the
  99   -- instruction stream into basic blocks again.  Also, we extract
 100   -- LDATAs here too.
 101   let
 102         (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
 103
 104         mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
 105           = ([], BasicBlock id instrs : blocks, statics)
 106         mkBlocks (LDATA sec dat) (instrs,blocks,statics)
 107           = (instrs, blocks, CmmData sec dat:statics)
 108         mkBlocks instr (instrs,blocks,statics)
 109           = (instr:instrs, blocks, statics)
 110   -- in
 111   return (BasicBlock id top : other_blocks, statics)
 112
 113 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
 114 stmtsToInstrs stmts
 115    = do instrss <- mapM stmtToInstrs stmts
 116         return (concatOL instrss)
 117
 118 stmtToInstrs :: CmmStmt -> NatM InstrBlock
 119 stmtToInstrs stmt = case stmt of
 120     CmmNop         -> return nilOL
 121     CmmComment s   -> return (unitOL (COMMENT s))
 122
 123     CmmAssign reg src
 124       | isFloatType ty -> assignReg_FltCode size reg src
 125 #if WORD_SIZE_IN_BITS==32
 126       | isWord64 ty    -> assignReg_I64Code      reg src
 127 #endif
 128       | otherwise        -> assignReg_IntCode size reg src
 129         where ty = cmmRegType reg
 130               size = cmmTypeSize ty
 131
 132     CmmStore addr src
 133       | isFloatType ty -> assignMem_FltCode size addr src
 134 #if WORD_SIZE_IN_BITS==32
 135       | isWord64 ty      -> assignMem_I64Code      addr src
 136 #endif
 137       | otherwise        -> assignMem_IntCode size addr src
 138         where ty = cmmExprType src
 139               size = cmmTypeSize ty
 140
 141     CmmCall target result_regs args _ _
 142        -> genCCall target result_regs args
 143
 144     CmmBranch id          -> genBranch id
 145     CmmCondBranch arg id  -> genCondJump id arg
 146     CmmSwitch arg ids     -> genSwitch arg ids
 147     CmmJump arg params    -> genJump arg
 148     CmmReturn params      ->
 149       panic "stmtToInstrs: return statement should have been cps'd away"
 150
 151
 152 --------------------------------------------------------------------------------
 153 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
 154 --      They are really trees of insns to facilitate fast appending, where a
 155 --      left-to-right traversal yields the insns in the correct order.
 156 --
 157 type InstrBlock
 158         = OrdList Instr
 159
 160
 161 -- | Register's passed up the tree.  If the stix code forces the register
 162 --      to live in a pre-decided machine register, it comes out as @Fixed@;
 163 --      otherwise, it comes out as @Any@, and the parent can decide which
 164 --      register to put it in.
 165 --
 166 data Register
 167         = Fixed Size Reg InstrBlock
 168         | Any   Size (Reg -> InstrBlock)
 169
 170
 171 swizzleRegisterRep :: Register -> Size -> Register
 172 swizzleRegisterRep (Fixed _ reg code) size = Fixed size reg code
 173 swizzleRegisterRep (Any _ codefn)     size = Any   size codefn
 174
 175
 176 -- | Grab the Reg for a CmmReg
 177 getRegisterReg :: CmmReg -> Reg
 178
 179 getRegisterReg (CmmLocal (LocalReg u pk))
 180   = RegVirtual $ mkVirtualReg u (cmmTypeSize pk)
 181
 182 getRegisterReg (CmmGlobal mid)
 183   = case get_GlobalReg_reg_or_addr mid of
 184        Left reg -> reg
 185        _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
 186           -- By this stage, the only MagicIds remaining should be the
 187           -- ones which map to a real machine register on this
 188           -- platform.  Hence ...
 189
 190
 191 {-
 192 Now, given a tree (the argument to an CmmLoad) that references memory,
 193 produce a suitable addressing mode.
 194
 195 A Rule of the Game (tm) for Amodes: use of the addr bit must
 196 immediately follow use of the code part, since the code part puts
 197 values in registers which the addr then refers to.  So you can't put
 198 anything in between, lest it overwrite some of those registers.  If
 199 you need to do some other computation between the code part and use of
 200 the addr bit, first store the effective address from the amode in a
 201 temporary, then do the other computation, and then use the temporary:
 202
 203     code
 204     LEA amode, tmp
 205     ... other computation ...
 206     ... (tmp) ...
 207 -}
 208
 209
 210 -- | Check whether an integer will fit in 32 bits.
 211 --      A CmmInt is intended to be truncated to the appropriate
 212 --      number of bits, so here we truncate it to Int64.  This is
 213 --      important because e.g. -1 as a CmmInt might be either
 214 --      -1 or 18446744073709551615.
 215 --
 216 is32BitInteger :: Integer -> Bool
 217 is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000
 218   where i64 = fromIntegral i :: Int64
 219
 220
 221 -- | Convert a BlockId to some CmmStatic data
 222 jumpTableEntry :: Maybe BlockId -> CmmStatic
 223 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordWidth)
 224 jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
 225     where blockLabel = mkAsmTempLabel id
 226
 227
 228
 229 -- -----------------------------------------------------------------------------
 230 -- General things for putting together code sequences
 231
 232 -- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
 233 -- CmmExprs into CmmRegOff?
 234 mangleIndexTree :: CmmExpr -> CmmExpr
 235 mangleIndexTree (CmmRegOff reg off)
 236   = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
 237   where width = typeWidth (cmmRegType reg)
 238
 239 mangleIndexTree _
 240         = panic "PPC.CodeGen.mangleIndexTree: no match"
 241
 242 -- -----------------------------------------------------------------------------
 243 --  Code gen for 64-bit arithmetic on 32-bit platforms
 244
 245 {-
 246 Simple support for generating 64-bit code (ie, 64 bit values and 64
 247 bit assignments) on 32-bit platforms.  Unlike the main code generator
 248 we merely shoot for generating working code as simply as possible, and
 249 pay little attention to code quality.  Specifically, there is no
 250 attempt to deal cleverly with the fixed-vs-floating register
 251 distinction; all values are generated into (pairs of) floating
 252 registers, even if this would mean some redundant reg-reg moves as a
 253 result.  Only one of the VRegUniques is returned, since it will be
 254 of the VRegUniqueLo form, and the upper-half VReg can be determined
 255 by applying getHiVRegFromLo to it.
 256 -}
 257
 258 data ChildCode64        -- a.k.a "Register64"
 259       = ChildCode64
 260            InstrBlock   -- code
 261            Reg          -- the lower 32-bit temporary which contains the
 262                         -- result; use getHiVRegFromLo to find the other
 263                         -- VRegUnique.  Rules of this simplified insn
 264                         -- selection game are therefore that the returned
 265                         -- Reg may be modified
 266
 267
 268 -- | The dual to getAnyReg: compute an expression into a register, but
 269 --      we don't mind which one it is.
 270 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
 271 getSomeReg expr = do
 272   r <- getRegister expr
 273   case r of
 274     Any rep code -> do
 275         tmp <- getNewRegNat rep
 276         return (tmp, code tmp)
 277     Fixed _ reg code ->
 278         return (reg, code)
 279
 280 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
 281 getI64Amodes addrTree = do
 282     Amode hi_addr addr_code <- getAmode addrTree
 283     case addrOffset hi_addr 4 of
 284         Just lo_addr -> return (hi_addr, lo_addr, addr_code)
 285         Nothing      -> do (hi_ptr, code) <- getSomeReg addrTree
 286                            return (AddrRegImm hi_ptr (ImmInt 0),
 287                                    AddrRegImm hi_ptr (ImmInt 4),
 288                                    code)
 289
 290
 291 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
 292 assignMem_I64Code addrTree valueTree = do
 293         (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
 294         ChildCode64 vcode rlo <- iselExpr64 valueTree
 295         let
 296                 rhi = getHiVRegFromLo rlo
 297
 298                 -- Big-endian store
 299                 mov_hi = ST II32 rhi hi_addr
 300                 mov_lo = ST II32 rlo lo_addr
 301         -- in
 302         return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 303
 304
 305 assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
 306 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
 307    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 308    let
 309          r_dst_lo = RegVirtual $ mkVirtualReg u_dst II32
 310          r_dst_hi = getHiVRegFromLo r_dst_lo
 311          r_src_hi = getHiVRegFromLo r_src_lo
 312          mov_lo = MR r_dst_lo r_src_lo
 313          mov_hi = MR r_dst_hi r_src_hi
 314    -- in
 315    return (
 316         vcode `snocOL` mov_lo `snocOL` mov_hi
 317      )
 318
 319 assignReg_I64Code lvalue valueTree
 320    = panic "assignReg_I64Code(powerpc): invalid lvalue"
 321
 322
 323 iselExpr64        :: CmmExpr -> NatM ChildCode64
 324 iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
 325     (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
 326     (rlo, rhi) <- getNewRegPairNat II32
 327     let mov_hi = LD II32 rhi hi_addr
 328         mov_lo = LD II32 rlo lo_addr
 329     return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
 330                          rlo
 331
 332 iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
 333    = return (ChildCode64 nilOL (RegVirtual $ mkVirtualReg vu II32))
 334
 335 iselExpr64 (CmmLit (CmmInt i _)) = do
 336   (rlo,rhi) <- getNewRegPairNat II32
 337   let
 338         half0 = fromIntegral (fromIntegral i :: Word16)
 339         half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
 340         half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
 341         half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
 342
 343         code = toOL [
 344                 LIS rlo (ImmInt half1),
 345                 OR rlo rlo (RIImm $ ImmInt half0),
 346                 LIS rhi (ImmInt half3),
 347                 OR rlo rlo (RIImm $ ImmInt half2)
 348                 ]
 349   -- in
 350   return (ChildCode64 code rlo)
 351
 352 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
 353    ChildCode64 code1 r1lo <- iselExpr64 e1
 354    ChildCode64 code2 r2lo <- iselExpr64 e2
 355    (rlo,rhi) <- getNewRegPairNat II32
 356    let
 357         r1hi = getHiVRegFromLo r1lo
 358         r2hi = getHiVRegFromLo r2lo
 359         code =  code1 `appOL`
 360                 code2 `appOL`
 361                 toOL [ ADDC rlo r1lo r2lo,
 362                        ADDE rhi r1hi r2hi ]
 363    -- in
 364    return (ChildCode64 code rlo)
 365
 366 iselExpr64 (CmmMachOp (MO_UU_Conv W32 W64) [expr]) = do
 367     (expr_reg,expr_code) <- getSomeReg expr
 368     (rlo, rhi) <- getNewRegPairNat II32
 369     let mov_hi = LI rhi (ImmInt 0)
 370         mov_lo = MR rlo expr_reg
 371     return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
 372                          rlo
 373 iselExpr64 expr
 374    = pprPanic "iselExpr64(powerpc)" (ppr expr)
 375
 376
 377
 378 getRegister :: CmmExpr -> NatM Register
 379
 380 getRegister (CmmReg reg)
 381   = return (Fixed (cmmTypeSize (cmmRegType reg))
 382                   (getRegisterReg reg) nilOL)
 383
 384 getRegister tree@(CmmRegOff _ _)
 385   = getRegister (mangleIndexTree tree)
 386
 387
 388 #if WORD_SIZE_IN_BITS==32
 389     -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
 390     -- TO_W_(x), TO_W_(x >> 32)
 391
 392 getRegister (CmmMachOp (MO_UU_Conv W64 W32)
 393              [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
 394   ChildCode64 code rlo <- iselExpr64 x
 395   return $ Fixed II32 (getHiVRegFromLo rlo) code
 396
 397 getRegister (CmmMachOp (MO_SS_Conv W64 W32)
 398              [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
 399   ChildCode64 code rlo <- iselExpr64 x
 400   return $ Fixed II32 (getHiVRegFromLo rlo) code
 401
 402 getRegister (CmmMachOp (MO_UU_Conv W64 W32) [x]) = do
 403   ChildCode64 code rlo <- iselExpr64 x
 404   return $ Fixed II32 rlo code
 405
 406 getRegister (CmmMachOp (MO_SS_Conv W64 W32) [x]) = do
 407   ChildCode64 code rlo <- iselExpr64 x
 408   return $ Fixed II32 rlo code
 409
 410 #endif
 411
 412
 413 getRegister (CmmLoad mem pk)
 414   | not (isWord64 pk)
 415   = do
 416         Amode addr addr_code <- getAmode mem
 417         let code dst = ASSERT((targetClassOfReg dst == RcDouble) == isFloatType pk)
 418                        addr_code `snocOL` LD size dst addr
 419         return (Any size code)
 420           where size = cmmTypeSize pk
 421
 422 -- catch simple cases of zero- or sign-extended load
 423 getRegister (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad mem _]) = do
 424     Amode addr addr_code <- getAmode mem
 425     return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr))
 426
 427 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
 428
 429 getRegister (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad mem _]) = do
 430     Amode addr addr_code <- getAmode mem
 431     return (Any II32 (\dst -> addr_code `snocOL` LD II16 dst addr))
 432
 433 getRegister (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad mem _]) = do
 434     Amode addr addr_code <- getAmode mem
 435     return (Any II32 (\dst -> addr_code `snocOL` LA II16 dst addr))
 436
 437 getRegister (CmmMachOp mop [x]) -- unary MachOps
 438   = case mop of
 439       MO_Not rep   -> triv_ucode_int rep NOT
 440
 441       MO_F_Neg w   -> triv_ucode_float w FNEG
 442       MO_S_Neg w   -> triv_ucode_int   w NEG
 443
 444       MO_FF_Conv W64 W32 -> trivialUCode  FF32 FRSP x
 445       MO_FF_Conv W32 W64 -> conversionNop FF64 x
 446
 447       MO_FS_Conv from to -> coerceFP2Int from to x
 448       MO_SF_Conv from to -> coerceInt2FP from to x
 449
 450       MO_SS_Conv from to
 451         | from == to    -> conversionNop (intSize to) x
 452
 453         -- narrowing is a nop: we treat the high bits as undefined
 454       MO_SS_Conv W32 to -> conversionNop (intSize to) x
 455       MO_SS_Conv W16 W8 -> conversionNop II8 x
 456       MO_SS_Conv W8  to -> triv_ucode_int to (EXTS II8)
 457       MO_SS_Conv W16 to -> triv_ucode_int to (EXTS II16)
 458
 459       MO_UU_Conv from to
 460         | from == to -> conversionNop (intSize to) x
 461         -- narrowing is a nop: we treat the high bits as undefined
 462       MO_UU_Conv W32 to -> conversionNop (intSize to) x
 463       MO_UU_Conv W16 W8 -> conversionNop II8 x
 464       MO_UU_Conv W8 to  -> trivialCode to False AND x (CmmLit (CmmInt 255 W32))
 465       MO_UU_Conv W16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 W32))
 466       _ -> panic "PPC.CodeGen.getRegister: no match"
 467
 468     where
 469         triv_ucode_int   width instr = trivialUCode (intSize   width) instr x
 470         triv_ucode_float width instr = trivialUCode (floatSize width) instr x
 471
 472         conversionNop new_size expr
 473             = do e_code <- getRegister expr
 474                  return (swizzleRegisterRep e_code new_size)
 475
 476 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
 477   = case mop of
 478       MO_F_Eq w -> condFltReg EQQ x y
 479       MO_F_Ne w -> condFltReg NE  x y
 480       MO_F_Gt w -> condFltReg GTT x y
 481       MO_F_Ge w -> condFltReg GE  x y
 482       MO_F_Lt w -> condFltReg LTT x y
 483       MO_F_Le w -> condFltReg LE  x y
 484
 485       MO_Eq rep -> condIntReg EQQ  (extendUExpr rep x) (extendUExpr rep y)
 486       MO_Ne rep -> condIntReg NE   (extendUExpr rep x) (extendUExpr rep y)
 487
 488       MO_S_Gt rep -> condIntReg GTT  (extendSExpr rep x) (extendSExpr rep y)
 489       MO_S_Ge rep -> condIntReg GE   (extendSExpr rep x) (extendSExpr rep y)
 490       MO_S_Lt rep -> condIntReg LTT  (extendSExpr rep x) (extendSExpr rep y)
 491       MO_S_Le rep -> condIntReg LE   (extendSExpr rep x) (extendSExpr rep y)
 492
 493       MO_U_Gt rep -> condIntReg GU   (extendUExpr rep x) (extendUExpr rep y)
 494       MO_U_Ge rep -> condIntReg GEU  (extendUExpr rep x) (extendUExpr rep y)
 495       MO_U_Lt rep -> condIntReg LU   (extendUExpr rep x) (extendUExpr rep y)
 496       MO_U_Le rep -> condIntReg LEU  (extendUExpr rep x) (extendUExpr rep y)
 497
 498       MO_F_Add w  -> triv_float w FADD
 499       MO_F_Sub w  -> triv_float w FSUB
 500       MO_F_Mul w  -> triv_float w FMUL
 501       MO_F_Quot w -> triv_float w FDIV
 502
 503          -- optimize addition with 32-bit immediate
 504          -- (needed for PIC)
 505       MO_Add W32 ->
 506         case y of
 507           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate W32 True (-imm)
 508             -> trivialCode W32 True ADD x (CmmLit $ CmmInt imm immrep)
 509           CmmLit lit
 510             -> do
 511                 (src, srcCode) <- getSomeReg x
 512                 let imm = litToImm lit
 513                     code dst = srcCode `appOL` toOL [
 514                                     ADDIS dst src (HA imm),
 515                                     ADD dst dst (RIImm (LO imm))
 516                                 ]
 517                 return (Any II32 code)
 518           _ -> trivialCode W32 True ADD x y
 519
 520       MO_Add rep -> trivialCode rep True ADD x y
 521       MO_Sub rep ->
 522         case y of    -- subfi ('substract from' with immediate) doesn't exist
 523           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
 524             -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
 525           _ -> trivialCodeNoImm' (intSize rep) SUBF y x
 526
 527       MO_Mul rep -> trivialCode rep True MULLW x y
 528
 529       MO_S_MulMayOflo W32 -> trivialCodeNoImm' II32 MULLW_MayOflo x y
 530
 531       MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= II32): not implemented"
 532       MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
 533
 534       MO_S_Quot rep -> trivialCodeNoImm' (intSize rep) DIVW (extendSExpr rep x) (extendSExpr rep y)
 535       MO_U_Quot rep -> trivialCodeNoImm' (intSize rep) DIVWU (extendUExpr rep x) (extendUExpr rep y)
 536
 537       MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
 538       MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
 539
 540       MO_And rep   -> trivialCode rep False AND x y
 541       MO_Or rep    -> trivialCode rep False OR x y
 542       MO_Xor rep   -> trivialCode rep False XOR x y
 543
 544       MO_Shl rep   -> trivialCode rep False SLW x y
 545       MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
 546       MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
 547       _         -> panic "PPC.CodeGen.getRegister: no match"
 548
 549   where
 550     triv_float :: Width -> (Size -> Reg -> Reg -> Reg -> Instr) -> NatM Register
 551     triv_float width instr = trivialCodeNoImm (floatSize width) instr x y
 552
 553 getRegister (CmmLit (CmmInt i rep))
 554   | Just imm <- makeImmediate rep True i
 555   = let
 556         code dst = unitOL (LI dst imm)
 557     in
 558         return (Any (intSize rep) code)
 559
 560 getRegister (CmmLit (CmmFloat f frep)) = do
 561     lbl <- getNewLabelNat
 562     dflags <- getDynFlagsNat
 563     dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
 564     Amode addr addr_code <- getAmode dynRef
 565     let size = floatSize frep
 566         code dst =
 567             LDATA ReadOnlyData  [CmmDataLabel lbl,
 568                                  CmmStaticLit (CmmFloat f frep)]
 569             `consOL` (addr_code `snocOL` LD size dst addr)
 570     return (Any size code)
 571
 572 getRegister (CmmLit lit)
 573   = let rep = cmmLitType lit
 574         imm = litToImm lit
 575         code dst = toOL [
 576               LIS dst (HA imm),
 577               ADD dst dst (RIImm (LO imm))
 578           ]
 579     in return (Any (cmmTypeSize rep) code)
 580
 581 getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
 582
 583     -- extend?Rep: wrap integer expression of type rep
 584     -- in a conversion to II32
 585 extendSExpr W32 x = x
 586 extendSExpr rep x = CmmMachOp (MO_SS_Conv rep W32) [x]
 587 extendUExpr W32 x = x
 588 extendUExpr rep x = CmmMachOp (MO_UU_Conv rep W32) [x]
 589
 590 -- -----------------------------------------------------------------------------
 591 --  The 'Amode' type: Memory addressing modes passed up the tree.
 592
 593 data Amode
 594         = Amode AddrMode InstrBlock
 595
 596 {-
 597 Now, given a tree (the argument to an CmmLoad) that references memory,
 598 produce a suitable addressing mode.
 599
 600 A Rule of the Game (tm) for Amodes: use of the addr bit must
 601 immediately follow use of the code part, since the code part puts
 602 values in registers which the addr then refers to.  So you can't put
 603 anything in between, lest it overwrite some of those registers.  If
 604 you need to do some other computation between the code part and use of
 605 the addr bit, first store the effective address from the amode in a
 606 temporary, then do the other computation, and then use the temporary:
 607
 608     code
 609     LEA amode, tmp
 610     ... other computation ...
 611     ... (tmp) ...
 612 -}
 613
 614 getAmode :: CmmExpr -> NatM Amode
 615 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
 616
 617 getAmode (CmmMachOp (MO_Sub W32) [x, CmmLit (CmmInt i _)])
 618   | Just off <- makeImmediate W32 True (-i)
 619   = do
 620         (reg, code) <- getSomeReg x
 621         return (Amode (AddrRegImm reg off) code)
 622
 623
 624 getAmode (CmmMachOp (MO_Add W32) [x, CmmLit (CmmInt i _)])
 625   | Just off <- makeImmediate W32 True i
 626   = do
 627         (reg, code) <- getSomeReg x
 628         return (Amode (AddrRegImm reg off) code)
 629
 630    -- optimize addition with 32-bit immediate
 631    -- (needed for PIC)
 632 getAmode (CmmMachOp (MO_Add W32) [x, CmmLit lit])
 633   = do
 634         tmp <- getNewRegNat II32
 635         (src, srcCode) <- getSomeReg x
 636         let imm = litToImm lit
 637             code = srcCode `snocOL` ADDIS tmp src (HA imm)
 638         return (Amode (AddrRegImm tmp (LO imm)) code)
 639
 640 getAmode (CmmLit lit)
 641   = do
 642         tmp <- getNewRegNat II32
 643         let imm = litToImm lit
 644             code = unitOL (LIS tmp (HA imm))
 645         return (Amode (AddrRegImm tmp (LO imm)) code)
 646
 647 getAmode (CmmMachOp (MO_Add W32) [x, y])
 648   = do
 649         (regX, codeX) <- getSomeReg x
 650         (regY, codeY) <- getSomeReg y
 651         return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
 652
 653 getAmode other
 654   = do
 655         (reg, code) <- getSomeReg other
 656         let
 657             off  = ImmInt 0
 658         return (Amode (AddrRegImm reg off) code)
 659
 660
 661
 662 --  The 'CondCode' type:  Condition codes passed up the tree.
 663 data CondCode
 664         = CondCode Bool Cond InstrBlock
 665
 666 -- Set up a condition code for a conditional branch.
 667
 668 getCondCode :: CmmExpr -> NatM CondCode
 669
 670 -- almost the same as everywhere else - but we need to
 671 -- extend small integers to 32 bit first
 672
 673 getCondCode (CmmMachOp mop [x, y])
 674   = case mop of
 675       MO_F_Eq W32 -> condFltCode EQQ x y
 676       MO_F_Ne W32 -> condFltCode NE  x y
 677       MO_F_Gt W32 -> condFltCode GTT x y
 678       MO_F_Ge W32 -> condFltCode GE  x y
 679       MO_F_Lt W32 -> condFltCode LTT x y
 680       MO_F_Le W32 -> condFltCode LE  x y
 681
 682       MO_F_Eq W64 -> condFltCode EQQ x y
 683       MO_F_Ne W64 -> condFltCode NE  x y
 684       MO_F_Gt W64 -> condFltCode GTT x y
 685       MO_F_Ge W64 -> condFltCode GE  x y
 686       MO_F_Lt W64 -> condFltCode LTT x y
 687       MO_F_Le W64 -> condFltCode LE  x y
 688
 689       MO_Eq rep -> condIntCode EQQ  (extendUExpr rep x) (extendUExpr rep y)
 690       MO_Ne rep -> condIntCode NE   (extendUExpr rep x) (extendUExpr rep y)
 691
 692       MO_S_Gt rep -> condIntCode GTT  (extendSExpr rep x) (extendSExpr rep y)
 693       MO_S_Ge rep -> condIntCode GE   (extendSExpr rep x) (extendSExpr rep y)
 694       MO_S_Lt rep -> condIntCode LTT  (extendSExpr rep x) (extendSExpr rep y)
 695       MO_S_Le rep -> condIntCode LE   (extendSExpr rep x) (extendSExpr rep y)
 696
 697       MO_U_Gt rep -> condIntCode GU   (extendUExpr rep x) (extendUExpr rep y)
 698       MO_U_Ge rep -> condIntCode GEU  (extendUExpr rep x) (extendUExpr rep y)
 699       MO_U_Lt rep -> condIntCode LU   (extendUExpr rep x) (extendUExpr rep y)
 700       MO_U_Le rep -> condIntCode LEU  (extendUExpr rep x) (extendUExpr rep y)
 701
 702       other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
 703
 704 getCondCode other =  panic "getCondCode(2)(powerpc)"
 705
 706
 707
 708 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
 709 -- passed back up the tree.
 710
 711 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
 712
 713 --  ###FIXME: I16 and I8!
 714 condIntCode cond x (CmmLit (CmmInt y rep))
 715   | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
 716   = do
 717         (src1, code) <- getSomeReg x
 718         let
 719             code' = code `snocOL`
 720                 (if condUnsigned cond then CMPL else CMP) II32 src1 (RIImm src2)
 721         return (CondCode False cond code')
 722
 723 condIntCode cond x y = do
 724     (src1, code1) <- getSomeReg x
 725     (src2, code2) <- getSomeReg y
 726     let
 727         code' = code1 `appOL` code2 `snocOL`
 728                   (if condUnsigned cond then CMPL else CMP) II32 src1 (RIReg src2)
 729     return (CondCode False cond code')
 730
 731 condFltCode cond x y = do
 732     (src1, code1) <- getSomeReg x
 733     (src2, code2) <- getSomeReg y
 734     let
 735         code'  = code1 `appOL` code2 `snocOL` FCMP src1 src2
 736         code'' = case cond of -- twiddle CR to handle unordered case
 737                     GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
 738                     LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
 739                     _ -> code'
 740                  where
 741                     ltbit = 0 ; eqbit = 2 ; gtbit = 1
 742     return (CondCode True cond code'')
 743
 744
 745
 746 -- -----------------------------------------------------------------------------
 747 -- Generating assignments
 748
 749 -- Assignments are really at the heart of the whole code generation
 750 -- business.  Almost all top-level nodes of any real importance are
 751 -- assignments, which correspond to loads, stores, or register
 752 -- transfers.  If we're really lucky, some of the register transfers
 753 -- will go away, because we can use the destination register to
 754 -- complete the code generation for the right hand side.  This only
 755 -- fails when the right hand side is forced into a fixed register
 756 -- (e.g. the result of a call).
 757
 758 assignMem_IntCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
 759 assignReg_IntCode :: Size -> CmmReg  -> CmmExpr -> NatM InstrBlock
 760
 761 assignMem_FltCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
 762 assignReg_FltCode :: Size -> CmmReg  -> CmmExpr -> NatM InstrBlock
 763
 764 assignMem_IntCode pk addr src = do
 765     (srcReg, code) <- getSomeReg src
 766     Amode dstAddr addr_code <- getAmode addr
 767     return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
 768
 769 -- dst is a reg, but src could be anything
 770 assignReg_IntCode _ reg src
 771     = do
 772         r <- getRegister src
 773         return $ case r of
 774             Any _ code         -> code dst
 775             Fixed _ freg fcode -> fcode `snocOL` MR dst freg
 776     where
 777         dst = getRegisterReg reg
 778
 779
 780
 781 -- Easy, isn't it?
 782 assignMem_FltCode = assignMem_IntCode
 783 assignReg_FltCode = assignReg_IntCode
 784
 785
 786
 787 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
 788
 789 genJump (CmmLit (CmmLabel lbl))
 790   = return (unitOL $ JMP lbl)
 791
 792 genJump tree
 793   = do
 794         (target,code) <- getSomeReg tree
 795         return (code `snocOL` MTCTR target `snocOL` BCTR [])
 796
 797
 798 -- -----------------------------------------------------------------------------
 799 --  Unconditional branches
 800 genBranch :: BlockId -> NatM InstrBlock
 801 genBranch = return . toOL . mkJumpInstr
 802
 803
 804 -- -----------------------------------------------------------------------------
 805 --  Conditional jumps
 806
 807 {-
 808 Conditional jumps are always to local labels, so we can use branch
 809 instructions.  We peek at the arguments to decide what kind of
 810 comparison to do.
 811
 812 SPARC: First, we have to ensure that the condition codes are set
 813 according to the supplied comparison operation.  We generate slightly
 814 different code for floating point comparisons, because a floating
 815 point operation cannot directly precede a @BF@.  We assume the worst
 816 and fill that slot with a @NOP@.
 817
 818 SPARC: Do not fill the delay slots here; you will confuse the register
 819 allocator.
 820 -}
 821
 822
 823 genCondJump
 824     :: BlockId      -- the branch target
 825     -> CmmExpr      -- the condition on which to branch
 826     -> NatM InstrBlock
 827
 828 genCondJump id bool = do
 829   CondCode _ cond code <- getCondCode bool
 830   return (code `snocOL` BCC cond id)
 831
 832
 833
 834 -- -----------------------------------------------------------------------------
 835 --  Generating C calls
 836
 837 -- Now the biggest nightmare---calls.  Most of the nastiness is buried in
 838 -- @get_arg@, which moves the arguments to the correct registers/stack
 839 -- locations.  Apart from that, the code is easy.
 840 --
 841 -- (If applicable) Do not fill the delay slots here; you will confuse the
 842 -- register allocator.
 843
 844 genCCall
 845     :: CmmCallTarget            -- function to call
 846     -> HintedCmmFormals         -- where to put the result
 847     -> HintedCmmActuals         -- arguments (of mixed type)
 848     -> NatM InstrBlock
 849
 850
 851 #if darwin_TARGET_OS || linux_TARGET_OS
 852 {-
 853     The PowerPC calling convention for Darwin/Mac OS X
 854     is described in Apple's document
 855     "Inside Mac OS X - Mach-O Runtime Architecture".
 856
 857     PowerPC Linux uses the System V Release 4 Calling Convention
 858     for PowerPC. It is described in the
 859     "System V Application Binary Interface PowerPC Processor Supplement".
 860
 861     Both conventions are similar:
 862     Parameters may be passed in general-purpose registers starting at r3, in
 863     floating point registers starting at f1, or on the stack.
 864
 865     But there are substantial differences:
 866     * The number of registers used for parameter passing and the exact set of
 867       nonvolatile registers differs (see MachRegs.lhs).
 868     * On Darwin, stack space is always reserved for parameters, even if they are
 869       passed in registers. The called routine may choose to save parameters from
 870       registers to the corresponding space on the stack.
 871     * On Darwin, a corresponding amount of GPRs is skipped when a floating point
 872       parameter is passed in an FPR.
 873     * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
 874       starting with an odd-numbered GPR. It may skip a GPR to achieve this.
 875       Darwin just treats an I64 like two separate II32s (high word first).
 876     * I64 and FF64 arguments are 8-byte aligned on the stack for SysV, but only
 877       4-byte aligned like everything else on Darwin.
 878     * The SysV spec claims that FF32 is represented as FF64 on the stack. GCC on
 879       PowerPC Linux does not agree, so neither do we.
 880
 881     According to both conventions, The parameter area should be part of the
 882     caller's stack frame, allocated in the caller's prologue code (large enough
 883     to hold the parameter lists for all called routines). The NCG already
 884     uses the stack for register spilling, leaving 64 bytes free at the top.
 885     If we need a larger parameter area than that, we just allocate a new stack
 886     frame just before ccalling.
 887 -}
 888
 889
 890 genCCall (CmmPrim MO_WriteBarrier) _ _
 891  = return $ unitOL LWSYNC
 892
 893 genCCall target dest_regs argsAndHints
 894   = ASSERT (not $ any (`elem` [II8,II16]) $ map cmmTypeSize argReps)
 895         -- we rely on argument promotion in the codeGen
 896     do
 897         (finalStack,passArgumentsCode,usedRegs) <- passArguments
 898                                                         (zip args argReps)
 899                                                         allArgRegs allFPArgRegs
 900                                                         initialStackOffset
 901                                                         (toOL []) []
 902
 903         (labelOrExpr, reduceToFF32) <- case target of
 904             CmmCallee (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
 905             CmmCallee expr conv -> return  (Right expr, False)
 906             CmmPrim mop -> outOfLineFloatOp mop
 907
 908         let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
 909             codeAfter = move_sp_up finalStack `appOL` moveResult reduceToFF32
 910
 911         case labelOrExpr of
 912             Left lbl -> do
 913                 return (         codeBefore
 914                         `snocOL` BL lbl usedRegs
 915                         `appOL`  codeAfter)
 916             Right dyn -> do
 917                 (dynReg, dynCode) <- getSomeReg dyn
 918                 return (         dynCode
 919                         `snocOL` MTCTR dynReg
 920                         `appOL`  codeBefore
 921                         `snocOL` BCTRL usedRegs
 922                         `appOL`  codeAfter)
 923     where
 924 #if darwin_TARGET_OS
 925         initialStackOffset = 24
 926             -- size of linkage area + size of arguments, in bytes
 927         stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
 928                                  map (widthInBytes . typeWidth) argReps
 929 #elif linux_TARGET_OS
 930         initialStackOffset = 8
 931         stackDelta finalStack = roundTo 16 finalStack
 932 #endif
 933         args = map hintlessCmm argsAndHints
 934         argReps = map cmmExprType args
 935
 936         roundTo a x | x `mod` a == 0 = x
 937                     | otherwise = x + a - (x `mod` a)
 938
 939         move_sp_down finalStack
 940                | delta > 64 =
 941                         toOL [STU II32 sp (AddrRegImm sp (ImmInt (-delta))),
 942                               DELTA (-delta)]
 943                | otherwise = nilOL
 944                where delta = stackDelta finalStack
 945         move_sp_up finalStack
 946                | delta > 64 =
 947                         toOL [ADD sp sp (RIImm (ImmInt delta)),
 948                               DELTA 0]
 949                | otherwise = nilOL
 950                where delta = stackDelta finalStack
 951
 952
 953         passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
 954         passArguments ((arg,arg_ty):args) gprs fprs stackOffset
 955                accumCode accumUsed | isWord64 arg_ty =
 956             do
 957                 ChildCode64 code vr_lo <- iselExpr64 arg
 958                 let vr_hi = getHiVRegFromLo vr_lo
 959
 960 #if darwin_TARGET_OS
 961                 passArguments args
 962                               (drop 2 gprs)
 963                               fprs
 964                               (stackOffset+8)
 965                               (accumCode `appOL` code
 966                                     `snocOL` storeWord vr_hi gprs stackOffset
 967                                     `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
 968                               ((take 2 gprs) ++ accumUsed)
 969             where
 970                 storeWord vr (gpr:_) offset = MR gpr vr
 971                 storeWord vr [] offset = ST II32 vr (AddrRegImm sp (ImmInt offset))
 972
 973 #elif linux_TARGET_OS
 974                 let stackOffset' = roundTo 8 stackOffset
 975                     stackCode = accumCode `appOL` code
 976                         `snocOL` ST II32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
 977                         `snocOL` ST II32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
 978                     regCode hireg loreg =
 979                         accumCode `appOL` code
 980                             `snocOL` MR hireg vr_hi
 981                             `snocOL` MR loreg vr_lo
 982
 983                 case gprs of
 984                     hireg : loreg : regs | even (length gprs) ->
 985                         passArguments args regs fprs stackOffset
 986                                       (regCode hireg loreg) (hireg : loreg : accumUsed)
 987                     _skipped : hireg : loreg : regs ->
 988                         passArguments args regs fprs stackOffset
 989                                       (regCode hireg loreg) (hireg : loreg : accumUsed)
 990                     _ -> -- only one or no regs left
 991                         passArguments args [] fprs (stackOffset'+8)
 992                                       stackCode accumUsed
 993 #endif
 994
 995         passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
 996             | reg : _ <- regs = do
 997                 register <- getRegister arg
 998                 let code = case register of
 999                             Fixed _ freg fcode -> fcode `snocOL` MR reg freg
1000                             Any _ acode -> acode reg
1001                 passArguments args
1002                               (drop nGprs gprs)
1003                               (drop nFprs fprs)
1004 #if darwin_TARGET_OS
1005         -- The Darwin ABI requires that we reserve stack slots for register parameters
1006                               (stackOffset + stackBytes)
1007 #elif linux_TARGET_OS
1008         -- ... the SysV ABI doesn't.
1009                               stackOffset
1010 #endif
1011                               (accumCode `appOL` code)
1012                               (reg : accumUsed)
1013             | otherwise = do
1014                 (vr, code) <- getSomeReg arg
1015                 passArguments args
1016                               (drop nGprs gprs)
1017                               (drop nFprs fprs)
1018                               (stackOffset' + stackBytes)
1019                               (accumCode `appOL` code `snocOL` ST (cmmTypeSize rep) vr stackSlot)
1020                               accumUsed
1021             where
1022 #if darwin_TARGET_OS
1023         -- stackOffset is at least 4-byte aligned
1024         -- The Darwin ABI is happy with that.
1025                 stackOffset' = stackOffset
1026 #else
1027         -- ... the SysV ABI requires 8-byte alignment for doubles.
1028                 stackOffset' | isFloatType rep && typeWidth rep == W64 =
1029                                  roundTo 8 stackOffset
1030                              | otherwise  =           stackOffset
1031 #endif
1032                 stackSlot = AddrRegImm sp (ImmInt stackOffset')
1033                 (nGprs, nFprs, stackBytes, regs) = case cmmTypeSize rep of
1034                     II32 -> (1, 0, 4, gprs)
1035 #if darwin_TARGET_OS
1036         -- The Darwin ABI requires that we skip a corresponding number of GPRs when
1037         -- we use the FPRs.
1038                     FF32 -> (1, 1, 4, fprs)
1039                     FF64 -> (2, 1, 8, fprs)
1040 #elif linux_TARGET_OS
1041         -- ... the SysV ABI doesn't.
1042                     FF32 -> (0, 1, 4, fprs)
1043                     FF64 -> (0, 1, 8, fprs)
1044 #endif
1045
1046         moveResult reduceToFF32 =
1047             case dest_regs of
1048                 [] -> nilOL
1049                 [CmmHinted dest _hint]
1050                     | reduceToFF32 && isFloat32 rep   -> unitOL (FRSP r_dest f1)
1051                     | isFloat32 rep || isFloat64 rep -> unitOL (MR r_dest f1)
1052                     | isWord64 rep -> toOL [MR (getHiVRegFromLo r_dest) r3,
1053                                           MR r_dest r4]
1054                     | otherwise -> unitOL (MR r_dest r3)
1055                     where rep = cmmRegType (CmmLocal dest)
1056                           r_dest = getRegisterReg (CmmLocal dest)
1057
1058         outOfLineFloatOp mop =
1059             do
1060                 dflags <- getDynFlagsNat
1061                 mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference $
1062                               mkForeignLabel functionName Nothing True IsFunction
1063                 let mopLabelOrExpr = case mopExpr of
1064                         CmmLit (CmmLabel lbl) -> Left lbl
1065                         _ -> Right mopExpr
1066                 return (mopLabelOrExpr, reduce)
1067             where
1068                 (functionName, reduce) = case mop of
1069                     MO_F32_Exp   -> (fsLit "exp", True)
1070                     MO_F32_Log   -> (fsLit "log", True)
1071                     MO_F32_Sqrt  -> (fsLit "sqrt", True)
1072
1073                     MO_F32_Sin   -> (fsLit "sin", True)
1074                     MO_F32_Cos   -> (fsLit "cos", True)
1075                     MO_F32_Tan   -> (fsLit "tan", True)
1076
1077                     MO_F32_Asin  -> (fsLit "asin", True)
1078                     MO_F32_Acos  -> (fsLit "acos", True)
1079                     MO_F32_Atan  -> (fsLit "atan", True)
1080
1081                     MO_F32_Sinh  -> (fsLit "sinh", True)
1082                     MO_F32_Cosh  -> (fsLit "cosh", True)
1083                     MO_F32_Tanh  -> (fsLit "tanh", True)
1084                     MO_F32_Pwr   -> (fsLit "pow", True)
1085
1086                     MO_F64_Exp   -> (fsLit "exp", False)
1087                     MO_F64_Log   -> (fsLit "log", False)
1088                     MO_F64_Sqrt  -> (fsLit "sqrt", False)
1089
1090                     MO_F64_Sin   -> (fsLit "sin", False)
1091                     MO_F64_Cos   -> (fsLit "cos", False)
1092                     MO_F64_Tan   -> (fsLit "tan", False)
1093
1094                     MO_F64_Asin  -> (fsLit "asin", False)
1095                     MO_F64_Acos  -> (fsLit "acos", False)
1096                     MO_F64_Atan  -> (fsLit "atan", False)
1097
1098                     MO_F64_Sinh  -> (fsLit "sinh", False)
1099                     MO_F64_Cosh  -> (fsLit "cosh", False)
1100                     MO_F64_Tanh  -> (fsLit "tanh", False)
1101                     MO_F64_Pwr   -> (fsLit "pow", False)
1102                     other -> pprPanic "genCCall(ppc): unknown callish op"
1103                                     (pprCallishMachOp other)
1104
1105 #else /* darwin_TARGET_OS || linux_TARGET_OS */
1106 genCCall = panic "PPC.CodeGen.genCCall: not defined for this os"
1107 #endif
1108
1109
1110 -- -----------------------------------------------------------------------------
1111 -- Generating a table-branch
1112
1113 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
1114 genSwitch expr ids
1115   | opt_PIC
1116   = do
1117         (reg,e_code) <- getSomeReg expr
1118         tmp <- getNewRegNat II32
1119         lbl <- getNewLabelNat
1120         dflags <- getDynFlagsNat
1121         dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
1122         (tableReg,t_code) <- getSomeReg $ dynRef
1123         let
1124             jumpTable = map jumpTableEntryRel ids
1125
1126             jumpTableEntryRel Nothing
1127                 = CmmStaticLit (CmmInt 0 wordWidth)
1128             jumpTableEntryRel (Just (BlockId id))
1129                 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
1130                 where blockLabel = mkAsmTempLabel id
1131
1132             code = e_code `appOL` t_code `appOL` toOL [
1133                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
1134                             SLW tmp reg (RIImm (ImmInt 2)),
1135                             LD II32 tmp (AddrRegReg tableReg tmp),
1136                             ADD tmp tmp (RIReg tableReg),
1137                             MTCTR tmp,
1138                             BCTR [ id | Just id <- ids ]
1139                     ]
1140         return code
1141   | otherwise
1142   = do
1143         (reg,e_code) <- getSomeReg expr
1144         tmp <- getNewRegNat II32
1145         lbl <- getNewLabelNat
1146         let
1147             jumpTable = map jumpTableEntry ids
1148
1149             code = e_code `appOL` toOL [
1150                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
1151                             SLW tmp reg (RIImm (ImmInt 2)),
1152                             ADDIS tmp tmp (HA (ImmCLbl lbl)),
1153                             LD II32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
1154                             MTCTR tmp,
1155                             BCTR [ id | Just id <- ids ]
1156                     ]
1157         return code
1158
1159
1160 -- -----------------------------------------------------------------------------
1161 -- 'condIntReg' and 'condFltReg': condition codes into registers
1162
1163 -- Turn those condition codes into integers now (when they appear on
1164 -- the right hand side of an assignment).
1165 --
1166 -- (If applicable) Do not fill the delay slots here; you will confuse the
1167 -- register allocator.
1168
1169 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
1170
1171 condReg :: NatM CondCode -> NatM Register
1172 condReg getCond = do
1173     CondCode _ cond cond_code <- getCond
1174     let
1175 {-        code dst = cond_code `appOL` toOL [
1176                 BCC cond lbl1,
1177                 LI dst (ImmInt 0),
1178                 BCC ALWAYS lbl2,
1179                 NEWBLOCK lbl1,
1180                 LI dst (ImmInt 1),
1181                 BCC ALWAYS lbl2,
1182                 NEWBLOCK lbl2
1183             ]-}
1184         code dst = cond_code
1185             `appOL` negate_code
1186             `appOL` toOL [
1187                 MFCR dst,
1188                 RLWINM dst dst (bit + 1) 31 31
1189             ]
1190
1191         negate_code | do_negate = unitOL (CRNOR bit bit bit)
1192                     | otherwise = nilOL
1193
1194         (bit, do_negate) = case cond of
1195             LTT -> (0, False)
1196             LE  -> (1, True)
1197             EQQ -> (2, False)
1198             GE  -> (0, True)
1199             GTT -> (1, False)
1200
1201             NE  -> (2, True)
1202
1203             LU  -> (0, False)
1204             LEU -> (1, True)
1205             GEU -> (0, True)
1206             GU  -> (1, False)
1207             _   -> panic "PPC.CodeGen.codeReg: no match"
1208
1209     return (Any II32 code)
1210
1211 condIntReg cond x y = condReg (condIntCode cond x y)
1212 condFltReg cond x y = condReg (condFltCode cond x y)
1213
1214
1215
1216 -- -----------------------------------------------------------------------------
1217 -- 'trivial*Code': deal with trivial instructions
1218
1219 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
1220 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
1221 -- Only look for constants on the right hand side, because that's
1222 -- where the generic optimizer will have put them.
1223
1224 -- Similarly, for unary instructions, we don't have to worry about
1225 -- matching an StInt as the argument, because genericOpt will already
1226 -- have handled the constant-folding.
1227
1228
1229
1230 {-
1231 Wolfgang's PowerPC version of The Rules:
1232
1233 A slightly modified version of The Rules to take advantage of the fact
1234 that PowerPC instructions work on all registers and don't implicitly
1235 clobber any fixed registers.
1236
1237 * The only expression for which getRegister returns Fixed is (CmmReg reg).
1238
1239 * If getRegister returns Any, then the code it generates may modify only:
1240         (a) fresh temporaries
1241         (b) the destination register
1242   It may *not* modify global registers, unless the global
1243   register happens to be the destination register.
1244   It may not clobber any other registers. In fact, only ccalls clobber any
1245   fixed registers.
1246   Also, it may not modify the counter register (used by genCCall).
1247
1248   Corollary: If a getRegister for a subexpression returns Fixed, you need
1249   not move it to a fresh temporary before evaluating the next subexpression.
1250   The Fixed register won't be modified.
1251   Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
1252
1253 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
1254   the value of the destination register.
1255 -}
1256
1257 trivialCode
1258         :: Width
1259         -> Bool
1260         -> (Reg -> Reg -> RI -> Instr)
1261         -> CmmExpr
1262         -> CmmExpr
1263         -> NatM Register
1264
1265 trivialCode rep signed instr x (CmmLit (CmmInt y _))
1266     | Just imm <- makeImmediate rep signed y
1267     = do
1268         (src1, code1) <- getSomeReg x
1269         let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
1270         return (Any (intSize rep) code)
1271
1272 trivialCode rep _ instr x y = do
1273     (src1, code1) <- getSomeReg x
1274     (src2, code2) <- getSomeReg y
1275     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
1276     return (Any (intSize rep) code)
1277
1278 trivialCodeNoImm' :: Size -> (Reg -> Reg -> Reg -> Instr)
1279                  -> CmmExpr -> CmmExpr -> NatM Register
1280 trivialCodeNoImm' size instr x y = do
1281     (src1, code1) <- getSomeReg x
1282     (src2, code2) <- getSomeReg y
1283     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
1284     return (Any size code)
1285
1286 trivialCodeNoImm :: Size -> (Size -> Reg -> Reg -> Reg -> Instr)
1287                  -> CmmExpr -> CmmExpr -> NatM Register
1288 trivialCodeNoImm size instr x y = trivialCodeNoImm' size (instr size) x y
1289
1290
1291 trivialUCode
1292         :: Size
1293         -> (Reg -> Reg -> Instr)
1294         -> CmmExpr
1295         -> NatM Register
1296 trivialUCode rep instr x = do
1297     (src, code) <- getSomeReg x
1298     let code' dst = code `snocOL` instr dst src
1299     return (Any rep code')
1300
1301 -- There is no "remainder" instruction on the PPC, so we have to do
1302 -- it the hard way.
1303 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
1304
1305 remainderCode :: Width -> (Reg -> Reg -> Reg -> Instr)
1306     -> CmmExpr -> CmmExpr -> NatM Register
1307 remainderCode rep div x y = do
1308     (src1, code1) <- getSomeReg x
1309     (src2, code2) <- getSomeReg y
1310     let code dst = code1 `appOL` code2 `appOL` toOL [
1311                 div dst src1 src2,
1312                 MULLW dst dst (RIReg src2),
1313                 SUBF dst dst src1
1314             ]
1315     return (Any (intSize rep) code)
1316
1317
1318 coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
1319 coerceInt2FP fromRep toRep x = do
1320     (src, code) <- getSomeReg x
1321     lbl <- getNewLabelNat
1322     itmp <- getNewRegNat II32
1323     ftmp <- getNewRegNat FF64
1324     dflags <- getDynFlagsNat
1325     dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
1326     Amode addr addr_code <- getAmode dynRef
1327     let
1328         code' dst = code `appOL` maybe_exts `appOL` toOL [
1329                 LDATA ReadOnlyData
1330                                 [CmmDataLabel lbl,
1331                                  CmmStaticLit (CmmInt 0x43300000 W32),
1332                                  CmmStaticLit (CmmInt 0x80000000 W32)],
1333                 XORIS itmp src (ImmInt 0x8000),
1334                 ST II32 itmp (spRel 3),
1335                 LIS itmp (ImmInt 0x4330),
1336                 ST II32 itmp (spRel 2),
1337                 LD FF64 ftmp (spRel 2)
1338             ] `appOL` addr_code `appOL` toOL [
1339                 LD FF64 dst addr,
1340                 FSUB FF64 dst ftmp dst
1341             ] `appOL` maybe_frsp dst
1342
1343         maybe_exts = case fromRep of
1344                         W8 ->  unitOL $ EXTS II8 src src
1345                         W16 -> unitOL $ EXTS II16 src src
1346                         W32 -> nilOL
1347                         _       -> panic "PPC.CodeGen.coerceInt2FP: no match"
1348
1349         maybe_frsp dst
1350                 = case toRep of
1351                         W32 -> unitOL $ FRSP dst dst
1352                         W64 -> nilOL
1353                         _       -> panic "PPC.CodeGen.coerceInt2FP: no match"
1354
1355     return (Any (floatSize toRep) code')
1356
1357 coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
1358 coerceFP2Int _ toRep x = do
1359     -- the reps don't really matter: F*->FF64 and II32->I* are no-ops
1360     (src, code) <- getSomeReg x
1361     tmp <- getNewRegNat FF64
1362     let
1363         code' dst = code `appOL` toOL [
1364                 -- convert to int in FP reg
1365             FCTIWZ tmp src,
1366                 -- store value (64bit) from FP to stack
1367             ST FF64 tmp (spRel 2),
1368                 -- read low word of value (high word is undefined)
1369             LD II32 dst (spRel 3)]
1370     return (Any (intSize toRep) code')