compiler/nativeGen/PPC/CodeGen.hs

   1 {-# OPTIONS -w #-}
   2
   3 -----------------------------------------------------------------------------
   4 --
   5 -- Generating machine code (instruction selection)
   6 --
   7 -- (c) The University of Glasgow 1996-2004
   8 --
   9 -----------------------------------------------------------------------------
  10
  11 -- This is a big module, but, if you pay attention to
  12 -- (a) the sectioning, (b) the type signatures, and
  13 -- (c) the #if blah_TARGET_ARCH} things, the
  14 -- structure should not be too overwhelming.
  15
  16 module PPC.CodeGen (
  17         cmmTopCodeGen,
  18         InstrBlock
  19 )
  20
  21 where
  22
  23 #include "HsVersions.h"
  24 #include "nativeGen/NCG.h"
  25 #include "MachDeps.h"
  26
  27 -- NCG stuff:
  28 import PPC.Instr
  29 import PPC.Cond
  30 import PPC.Regs
  31 import PPC.RegInfo
  32 import NCGMonad
  33 import Instruction
  34 import PIC
  35 import Size
  36 import RegClass
  37 import Reg
  38 import Platform
  39
  40 -- Our intermediate code:
  41 import BlockId
  42 import PprCmm           ( pprExpr )
  43 import Cmm
  44 import CLabel
  45
  46 -- The rest:
  47 import StaticFlags      ( opt_PIC )
  48 import OrdList
  49 import qualified Outputable as O
  50 import Outputable
  51 import DynFlags
  52
  53 import Control.Monad    ( mapAndUnzipM )
  54 import Data.Bits
  55 import Data.Int
  56 import Data.Word
  57
  58 #if darwin_TARGET_OS || linux_TARGET_OS
  59 import BasicTypes
  60 import FastString
  61 #endif
  62
  63 -- -----------------------------------------------------------------------------
  64 -- Top-level of the instruction selector
  65
  66 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
  67 -- They are really trees of insns to facilitate fast appending, where a
  68 -- left-to-right traversal (pre-order?) yields the insns in the correct
  69 -- order.
  70
  71 cmmTopCodeGen
  72         :: DynFlags
  73         -> RawCmmTop
  74         -> NatM [NatCmmTop Instr]
  75
  76 cmmTopCodeGen dflags (CmmProc info lab params (ListGraph blocks)) = do
  77   (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
  78   picBaseMb <- getPicBaseMaybeNat
  79   let proc = CmmProc info lab params (ListGraph $ concat nat_blocks)
  80       tops = proc : concat statics
  81       os   = platformOS $ targetPlatform dflags
  82   case picBaseMb of
  83       Just picBase -> initializePicBase_ppc ArchPPC os picBase tops
  84       Nothing -> return tops
  85
  86 cmmTopCodeGen dflags (CmmData sec dat) = do
  87   return [CmmData sec dat]  -- no translation, we just use CmmStatic
  88
  89 basicBlockCodeGen
  90         :: CmmBasicBlock
  91         -> NatM ( [NatBasicBlock Instr]
  92                 , [NatCmmTop Instr])
  93
  94 basicBlockCodeGen (BasicBlock id stmts) = do
  95   instrs <- stmtsToInstrs stmts
  96   -- code generation may introduce new basic block boundaries, which
  97   -- are indicated by the NEWBLOCK instruction.  We must split up the
  98   -- instruction stream into basic blocks again.  Also, we extract
  99   -- LDATAs here too.
 100   let
 101         (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
 102
 103         mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
 104           = ([], BasicBlock id instrs : blocks, statics)
 105         mkBlocks (LDATA sec dat) (instrs,blocks,statics)
 106           = (instrs, blocks, CmmData sec dat:statics)
 107         mkBlocks instr (instrs,blocks,statics)
 108           = (instr:instrs, blocks, statics)
 109   -- in
 110   return (BasicBlock id top : other_blocks, statics)
 111
 112 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
 113 stmtsToInstrs stmts
 114    = do instrss <- mapM stmtToInstrs stmts
 115         return (concatOL instrss)
 116
 117 stmtToInstrs :: CmmStmt -> NatM InstrBlock
 118 stmtToInstrs stmt = case stmt of
 119     CmmNop         -> return nilOL
 120     CmmComment s   -> return (unitOL (COMMENT s))
 121
 122     CmmAssign reg src
 123       | isFloatType ty -> assignReg_FltCode size reg src
 124 #if WORD_SIZE_IN_BITS==32
 125       | isWord64 ty    -> assignReg_I64Code      reg src
 126 #endif
 127       | otherwise        -> assignReg_IntCode size reg src
 128         where ty = cmmRegType reg
 129               size = cmmTypeSize ty
 130
 131     CmmStore addr src
 132       | isFloatType ty -> assignMem_FltCode size addr src
 133 #if WORD_SIZE_IN_BITS==32
 134       | isWord64 ty      -> assignMem_I64Code      addr src
 135 #endif
 136       | otherwise        -> assignMem_IntCode size addr src
 137         where ty = cmmExprType src
 138               size = cmmTypeSize ty
 139
 140     CmmCall target result_regs args _ _
 141        -> genCCall target result_regs args
 142
 143     CmmBranch id          -> genBranch id
 144     CmmCondBranch arg id  -> genCondJump id arg
 145     CmmSwitch arg ids     -> genSwitch arg ids
 146     CmmJump arg params    -> genJump arg
 147     CmmReturn params      ->
 148       panic "stmtToInstrs: return statement should have been cps'd away"
 149
 150
 151 --------------------------------------------------------------------------------
 152 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
 153 --      They are really trees of insns to facilitate fast appending, where a
 154 --      left-to-right traversal yields the insns in the correct order.
 155 --
 156 type InstrBlock
 157         = OrdList Instr
 158
 159
 160 -- | Register's passed up the tree.  If the stix code forces the register
 161 --      to live in a pre-decided machine register, it comes out as @Fixed@;
 162 --      otherwise, it comes out as @Any@, and the parent can decide which
 163 --      register to put it in.
 164 --
 165 data Register
 166         = Fixed Size Reg InstrBlock
 167         | Any   Size (Reg -> InstrBlock)
 168
 169
 170 swizzleRegisterRep :: Register -> Size -> Register
 171 swizzleRegisterRep (Fixed _ reg code) size = Fixed size reg code
 172 swizzleRegisterRep (Any _ codefn)     size = Any   size codefn
 173
 174
 175 -- | Grab the Reg for a CmmReg
 176 getRegisterReg :: CmmReg -> Reg
 177
 178 getRegisterReg (CmmLocal (LocalReg u pk))
 179   = mkVReg u (cmmTypeSize pk)
 180
 181 getRegisterReg (CmmGlobal mid)
 182   = case get_GlobalReg_reg_or_addr mid of
 183        Left (RealReg rrno) -> RealReg rrno
 184        _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
 185           -- By this stage, the only MagicIds remaining should be the
 186           -- ones which map to a real machine register on this
 187           -- platform.  Hence ...
 188
 189
 190 {-
 191 Now, given a tree (the argument to an CmmLoad) that references memory,
 192 produce a suitable addressing mode.
 193
 194 A Rule of the Game (tm) for Amodes: use of the addr bit must
 195 immediately follow use of the code part, since the code part puts
 196 values in registers which the addr then refers to.  So you can't put
 197 anything in between, lest it overwrite some of those registers.  If
 198 you need to do some other computation between the code part and use of
 199 the addr bit, first store the effective address from the amode in a
 200 temporary, then do the other computation, and then use the temporary:
 201
 202     code
 203     LEA amode, tmp
 204     ... other computation ...
 205     ... (tmp) ...
 206 -}
 207
 208
 209 -- | Check whether an integer will fit in 32 bits.
 210 --      A CmmInt is intended to be truncated to the appropriate
 211 --      number of bits, so here we truncate it to Int64.  This is
 212 --      important because e.g. -1 as a CmmInt might be either
 213 --      -1 or 18446744073709551615.
 214 --
 215 is32BitInteger :: Integer -> Bool
 216 is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000
 217   where i64 = fromIntegral i :: Int64
 218
 219
 220 -- | Convert a BlockId to some CmmStatic data
 221 jumpTableEntry :: Maybe BlockId -> CmmStatic
 222 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordWidth)
 223 jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
 224     where blockLabel = mkAsmTempLabel id
 225
 226
 227
 228 -- -----------------------------------------------------------------------------
 229 -- General things for putting together code sequences
 230
 231 -- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
 232 -- CmmExprs into CmmRegOff?
 233 mangleIndexTree :: CmmExpr -> CmmExpr
 234 mangleIndexTree (CmmRegOff reg off)
 235   = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
 236   where width = typeWidth (cmmRegType reg)
 237
 238 mangleIndexTree _
 239         = panic "PPC.CodeGen.mangleIndexTree: no match"
 240
 241 -- -----------------------------------------------------------------------------
 242 --  Code gen for 64-bit arithmetic on 32-bit platforms
 243
 244 {-
 245 Simple support for generating 64-bit code (ie, 64 bit values and 64
 246 bit assignments) on 32-bit platforms.  Unlike the main code generator
 247 we merely shoot for generating working code as simply as possible, and
 248 pay little attention to code quality.  Specifically, there is no
 249 attempt to deal cleverly with the fixed-vs-floating register
 250 distinction; all values are generated into (pairs of) floating
 251 registers, even if this would mean some redundant reg-reg moves as a
 252 result.  Only one of the VRegUniques is returned, since it will be
 253 of the VRegUniqueLo form, and the upper-half VReg can be determined
 254 by applying getHiVRegFromLo to it.
 255 -}
 256
 257 data ChildCode64        -- a.k.a "Register64"
 258       = ChildCode64
 259            InstrBlock   -- code
 260            Reg          -- the lower 32-bit temporary which contains the
 261                         -- result; use getHiVRegFromLo to find the other
 262                         -- VRegUnique.  Rules of this simplified insn
 263                         -- selection game are therefore that the returned
 264                         -- Reg may be modified
 265
 266
 267 -- | The dual to getAnyReg: compute an expression into a register, but
 268 --      we don't mind which one it is.
 269 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
 270 getSomeReg expr = do
 271   r <- getRegister expr
 272   case r of
 273     Any rep code -> do
 274         tmp <- getNewRegNat rep
 275         return (tmp, code tmp)
 276     Fixed _ reg code ->
 277         return (reg, code)
 278
 279 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
 280 getI64Amodes addrTree = do
 281     Amode hi_addr addr_code <- getAmode addrTree
 282     case addrOffset hi_addr 4 of
 283         Just lo_addr -> return (hi_addr, lo_addr, addr_code)
 284         Nothing      -> do (hi_ptr, code) <- getSomeReg addrTree
 285                            return (AddrRegImm hi_ptr (ImmInt 0),
 286                                    AddrRegImm hi_ptr (ImmInt 4),
 287                                    code)
 288
 289
 290 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
 291 assignMem_I64Code addrTree valueTree = do
 292         (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
 293         ChildCode64 vcode rlo <- iselExpr64 valueTree
 294         let
 295                 rhi = getHiVRegFromLo rlo
 296
 297                 -- Big-endian store
 298                 mov_hi = ST II32 rhi hi_addr
 299                 mov_lo = ST II32 rlo lo_addr
 300         -- in
 301         return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 302
 303
 304 assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
 305 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
 306    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 307    let
 308          r_dst_lo = mkVReg u_dst II32
 309          r_dst_hi = getHiVRegFromLo r_dst_lo
 310          r_src_hi = getHiVRegFromLo r_src_lo
 311          mov_lo = MR r_dst_lo r_src_lo
 312          mov_hi = MR r_dst_hi r_src_hi
 313    -- in
 314    return (
 315         vcode `snocOL` mov_lo `snocOL` mov_hi
 316      )
 317
 318 assignReg_I64Code lvalue valueTree
 319    = panic "assignReg_I64Code(powerpc): invalid lvalue"
 320
 321
 322 iselExpr64        :: CmmExpr -> NatM ChildCode64
 323 iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
 324     (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
 325     (rlo, rhi) <- getNewRegPairNat II32
 326     let mov_hi = LD II32 rhi hi_addr
 327         mov_lo = LD II32 rlo lo_addr
 328     return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
 329                          rlo
 330
 331 iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
 332    = return (ChildCode64 nilOL (mkVReg vu II32))
 333
 334 iselExpr64 (CmmLit (CmmInt i _)) = do
 335   (rlo,rhi) <- getNewRegPairNat II32
 336   let
 337         half0 = fromIntegral (fromIntegral i :: Word16)
 338         half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
 339         half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
 340         half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
 341
 342         code = toOL [
 343                 LIS rlo (ImmInt half1),
 344                 OR rlo rlo (RIImm $ ImmInt half0),
 345                 LIS rhi (ImmInt half3),
 346                 OR rlo rlo (RIImm $ ImmInt half2)
 347                 ]
 348   -- in
 349   return (ChildCode64 code rlo)
 350
 351 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
 352    ChildCode64 code1 r1lo <- iselExpr64 e1
 353    ChildCode64 code2 r2lo <- iselExpr64 e2
 354    (rlo,rhi) <- getNewRegPairNat II32
 355    let
 356         r1hi = getHiVRegFromLo r1lo
 357         r2hi = getHiVRegFromLo r2lo
 358         code =  code1 `appOL`
 359                 code2 `appOL`
 360                 toOL [ ADDC rlo r1lo r2lo,
 361                        ADDE rhi r1hi r2hi ]
 362    -- in
 363    return (ChildCode64 code rlo)
 364
 365 iselExpr64 (CmmMachOp (MO_UU_Conv W32 W64) [expr]) = do
 366     (expr_reg,expr_code) <- getSomeReg expr
 367     (rlo, rhi) <- getNewRegPairNat II32
 368     let mov_hi = LI rhi (ImmInt 0)
 369         mov_lo = MR rlo expr_reg
 370     return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
 371                          rlo
 372 iselExpr64 expr
 373    = pprPanic "iselExpr64(powerpc)" (ppr expr)
 374
 375
 376
 377 getRegister :: CmmExpr -> NatM Register
 378
 379 getRegister (CmmReg reg)
 380   = return (Fixed (cmmTypeSize (cmmRegType reg))
 381                   (getRegisterReg reg) nilOL)
 382
 383 getRegister tree@(CmmRegOff _ _)
 384   = getRegister (mangleIndexTree tree)
 385
 386
 387 #if WORD_SIZE_IN_BITS==32
 388     -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
 389     -- TO_W_(x), TO_W_(x >> 32)
 390
 391 getRegister (CmmMachOp (MO_UU_Conv W64 W32)
 392              [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
 393   ChildCode64 code rlo <- iselExpr64 x
 394   return $ Fixed II32 (getHiVRegFromLo rlo) code
 395
 396 getRegister (CmmMachOp (MO_SS_Conv W64 W32)
 397              [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
 398   ChildCode64 code rlo <- iselExpr64 x
 399   return $ Fixed II32 (getHiVRegFromLo rlo) code
 400
 401 getRegister (CmmMachOp (MO_UU_Conv W64 W32) [x]) = do
 402   ChildCode64 code rlo <- iselExpr64 x
 403   return $ Fixed II32 rlo code
 404
 405 getRegister (CmmMachOp (MO_SS_Conv W64 W32) [x]) = do
 406   ChildCode64 code rlo <- iselExpr64 x
 407   return $ Fixed II32 rlo code
 408
 409 #endif
 410
 411
 412 getRegister (CmmLoad mem pk)
 413   | not (isWord64 pk)
 414   = do
 415         Amode addr addr_code <- getAmode mem
 416         let code dst = ASSERT((regClass dst == RcDouble) == isFloatType pk)
 417                        addr_code `snocOL` LD size dst addr
 418         return (Any size code)
 419           where size = cmmTypeSize pk
 420
 421 -- catch simple cases of zero- or sign-extended load
 422 getRegister (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad mem _]) = do
 423     Amode addr addr_code <- getAmode mem
 424     return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr))
 425
 426 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
 427
 428 getRegister (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad mem _]) = do
 429     Amode addr addr_code <- getAmode mem
 430     return (Any II32 (\dst -> addr_code `snocOL` LD II16 dst addr))
 431
 432 getRegister (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad mem _]) = do
 433     Amode addr addr_code <- getAmode mem
 434     return (Any II32 (\dst -> addr_code `snocOL` LA II16 dst addr))
 435
 436 getRegister (CmmMachOp mop [x]) -- unary MachOps
 437   = case mop of
 438       MO_Not rep   -> triv_ucode_int rep NOT
 439
 440       MO_F_Neg w   -> triv_ucode_float w FNEG
 441       MO_S_Neg w   -> triv_ucode_int   w NEG
 442
 443       MO_FF_Conv W64 W32 -> trivialUCode  FF32 FRSP x
 444       MO_FF_Conv W32 W64 -> conversionNop FF64 x
 445
 446       MO_FS_Conv from to -> coerceFP2Int from to x
 447       MO_SF_Conv from to -> coerceInt2FP from to x
 448
 449       MO_SS_Conv from to
 450         | from == to    -> conversionNop (intSize to) x
 451
 452         -- narrowing is a nop: we treat the high bits as undefined
 453       MO_SS_Conv W32 to -> conversionNop (intSize to) x
 454       MO_SS_Conv W16 W8 -> conversionNop II8 x
 455       MO_SS_Conv W8  to -> triv_ucode_int to (EXTS II8)
 456       MO_SS_Conv W16 to -> triv_ucode_int to (EXTS II16)
 457
 458       MO_UU_Conv from to
 459         | from == to -> conversionNop (intSize to) x
 460         -- narrowing is a nop: we treat the high bits as undefined
 461       MO_UU_Conv W32 to -> conversionNop (intSize to) x
 462       MO_UU_Conv W16 W8 -> conversionNop II8 x
 463       MO_UU_Conv W8 to  -> trivialCode to False AND x (CmmLit (CmmInt 255 W32))
 464       MO_UU_Conv W16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 W32))
 465       _ -> panic "PPC.CodeGen.getRegister: no match"
 466
 467     where
 468         triv_ucode_int   width instr = trivialUCode (intSize   width) instr x
 469         triv_ucode_float width instr = trivialUCode (floatSize width) instr x
 470
 471         conversionNop new_size expr
 472             = do e_code <- getRegister expr
 473                  return (swizzleRegisterRep e_code new_size)
 474
 475 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
 476   = case mop of
 477       MO_F_Eq w -> condFltReg EQQ x y
 478       MO_F_Ne w -> condFltReg NE  x y
 479       MO_F_Gt w -> condFltReg GTT x y
 480       MO_F_Ge w -> condFltReg GE  x y
 481       MO_F_Lt w -> condFltReg LTT x y
 482       MO_F_Le w -> condFltReg LE  x y
 483
 484       MO_Eq rep -> condIntReg EQQ  (extendUExpr rep x) (extendUExpr rep y)
 485       MO_Ne rep -> condIntReg NE   (extendUExpr rep x) (extendUExpr rep y)
 486
 487       MO_S_Gt rep -> condIntReg GTT  (extendSExpr rep x) (extendSExpr rep y)
 488       MO_S_Ge rep -> condIntReg GE   (extendSExpr rep x) (extendSExpr rep y)
 489       MO_S_Lt rep -> condIntReg LTT  (extendSExpr rep x) (extendSExpr rep y)
 490       MO_S_Le rep -> condIntReg LE   (extendSExpr rep x) (extendSExpr rep y)
 491
 492       MO_U_Gt rep -> condIntReg GU   (extendUExpr rep x) (extendUExpr rep y)
 493       MO_U_Ge rep -> condIntReg GEU  (extendUExpr rep x) (extendUExpr rep y)
 494       MO_U_Lt rep -> condIntReg LU   (extendUExpr rep x) (extendUExpr rep y)
 495       MO_U_Le rep -> condIntReg LEU  (extendUExpr rep x) (extendUExpr rep y)
 496
 497       MO_F_Add w  -> triv_float w FADD
 498       MO_F_Sub w  -> triv_float w FSUB
 499       MO_F_Mul w  -> triv_float w FMUL
 500       MO_F_Quot w -> triv_float w FDIV
 501
 502          -- optimize addition with 32-bit immediate
 503          -- (needed for PIC)
 504       MO_Add W32 ->
 505         case y of
 506           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate W32 True (-imm)
 507             -> trivialCode W32 True ADD x (CmmLit $ CmmInt imm immrep)
 508           CmmLit lit
 509             -> do
 510                 (src, srcCode) <- getSomeReg x
 511                 let imm = litToImm lit
 512                     code dst = srcCode `appOL` toOL [
 513                                     ADDIS dst src (HA imm),
 514                                     ADD dst dst (RIImm (LO imm))
 515                                 ]
 516                 return (Any II32 code)
 517           _ -> trivialCode W32 True ADD x y
 518
 519       MO_Add rep -> trivialCode rep True ADD x y
 520       MO_Sub rep ->
 521         case y of    -- subfi ('substract from' with immediate) doesn't exist
 522           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
 523             -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
 524           _ -> trivialCodeNoImm' (intSize rep) SUBF y x
 525
 526       MO_Mul rep -> trivialCode rep True MULLW x y
 527
 528       MO_S_MulMayOflo W32 -> trivialCodeNoImm' II32 MULLW_MayOflo x y
 529
 530       MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= II32): not implemented"
 531       MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
 532
 533       MO_S_Quot rep -> trivialCodeNoImm' (intSize rep) DIVW (extendSExpr rep x) (extendSExpr rep y)
 534       MO_U_Quot rep -> trivialCodeNoImm' (intSize rep) DIVWU (extendUExpr rep x) (extendUExpr rep y)
 535
 536       MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
 537       MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
 538
 539       MO_And rep   -> trivialCode rep False AND x y
 540       MO_Or rep    -> trivialCode rep False OR x y
 541       MO_Xor rep   -> trivialCode rep False XOR x y
 542
 543       MO_Shl rep   -> trivialCode rep False SLW x y
 544       MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
 545       MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
 546       _         -> panic "PPC.CodeGen.getRegister: no match"
 547
 548   where
 549     triv_float :: Width -> (Size -> Reg -> Reg -> Reg -> Instr) -> NatM Register
 550     triv_float width instr = trivialCodeNoImm (floatSize width) instr x y
 551
 552 getRegister (CmmLit (CmmInt i rep))
 553   | Just imm <- makeImmediate rep True i
 554   = let
 555         code dst = unitOL (LI dst imm)
 556     in
 557         return (Any (intSize rep) code)
 558
 559 getRegister (CmmLit (CmmFloat f frep)) = do
 560     lbl <- getNewLabelNat
 561     dflags <- getDynFlagsNat
 562     dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
 563     Amode addr addr_code <- getAmode dynRef
 564     let size = floatSize frep
 565         code dst =
 566             LDATA ReadOnlyData  [CmmDataLabel lbl,
 567                                  CmmStaticLit (CmmFloat f frep)]
 568             `consOL` (addr_code `snocOL` LD size dst addr)
 569     return (Any size code)
 570
 571 getRegister (CmmLit lit)
 572   = let rep = cmmLitType lit
 573         imm = litToImm lit
 574         code dst = toOL [
 575               LIS dst (HA imm),
 576               ADD dst dst (RIImm (LO imm))
 577           ]
 578     in return (Any (cmmTypeSize rep) code)
 579
 580 getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
 581
 582     -- extend?Rep: wrap integer expression of type rep
 583     -- in a conversion to II32
 584 extendSExpr W32 x = x
 585 extendSExpr rep x = CmmMachOp (MO_SS_Conv rep W32) [x]
 586 extendUExpr W32 x = x
 587 extendUExpr rep x = CmmMachOp (MO_UU_Conv rep W32) [x]
 588
 589 -- -----------------------------------------------------------------------------
 590 --  The 'Amode' type: Memory addressing modes passed up the tree.
 591
 592 data Amode
 593         = Amode AddrMode InstrBlock
 594
 595 {-
 596 Now, given a tree (the argument to an CmmLoad) that references memory,
 597 produce a suitable addressing mode.
 598
 599 A Rule of the Game (tm) for Amodes: use of the addr bit must
 600 immediately follow use of the code part, since the code part puts
 601 values in registers which the addr then refers to.  So you can't put
 602 anything in between, lest it overwrite some of those registers.  If
 603 you need to do some other computation between the code part and use of
 604 the addr bit, first store the effective address from the amode in a
 605 temporary, then do the other computation, and then use the temporary:
 606
 607     code
 608     LEA amode, tmp
 609     ... other computation ...
 610     ... (tmp) ...
 611 -}
 612
 613 getAmode :: CmmExpr -> NatM Amode
 614 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
 615
 616 getAmode (CmmMachOp (MO_Sub W32) [x, CmmLit (CmmInt i _)])
 617   | Just off <- makeImmediate W32 True (-i)
 618   = do
 619         (reg, code) <- getSomeReg x
 620         return (Amode (AddrRegImm reg off) code)
 621
 622
 623 getAmode (CmmMachOp (MO_Add W32) [x, CmmLit (CmmInt i _)])
 624   | Just off <- makeImmediate W32 True i
 625   = do
 626         (reg, code) <- getSomeReg x
 627         return (Amode (AddrRegImm reg off) code)
 628
 629    -- optimize addition with 32-bit immediate
 630    -- (needed for PIC)
 631 getAmode (CmmMachOp (MO_Add W32) [x, CmmLit lit])
 632   = do
 633         tmp <- getNewRegNat II32
 634         (src, srcCode) <- getSomeReg x
 635         let imm = litToImm lit
 636             code = srcCode `snocOL` ADDIS tmp src (HA imm)
 637         return (Amode (AddrRegImm tmp (LO imm)) code)
 638
 639 getAmode (CmmLit lit)
 640   = do
 641         tmp <- getNewRegNat II32
 642         let imm = litToImm lit
 643             code = unitOL (LIS tmp (HA imm))
 644         return (Amode (AddrRegImm tmp (LO imm)) code)
 645
 646 getAmode (CmmMachOp (MO_Add W32) [x, y])
 647   = do
 648         (regX, codeX) <- getSomeReg x
 649         (regY, codeY) <- getSomeReg y
 650         return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
 651
 652 getAmode other
 653   = do
 654         (reg, code) <- getSomeReg other
 655         let
 656             off  = ImmInt 0
 657         return (Amode (AddrRegImm reg off) code)
 658
 659
 660
 661 --  The 'CondCode' type:  Condition codes passed up the tree.
 662 data CondCode
 663         = CondCode Bool Cond InstrBlock
 664
 665 -- Set up a condition code for a conditional branch.
 666
 667 getCondCode :: CmmExpr -> NatM CondCode
 668
 669 -- almost the same as everywhere else - but we need to
 670 -- extend small integers to 32 bit first
 671
 672 getCondCode (CmmMachOp mop [x, y])
 673   = case mop of
 674       MO_F_Eq W32 -> condFltCode EQQ x y
 675       MO_F_Ne W32 -> condFltCode NE  x y
 676       MO_F_Gt W32 -> condFltCode GTT x y
 677       MO_F_Ge W32 -> condFltCode GE  x y
 678       MO_F_Lt W32 -> condFltCode LTT x y
 679       MO_F_Le W32 -> condFltCode LE  x y
 680
 681       MO_F_Eq W64 -> condFltCode EQQ x y
 682       MO_F_Ne W64 -> condFltCode NE  x y
 683       MO_F_Gt W64 -> condFltCode GTT x y
 684       MO_F_Ge W64 -> condFltCode GE  x y
 685       MO_F_Lt W64 -> condFltCode LTT x y
 686       MO_F_Le W64 -> condFltCode LE  x y
 687
 688       MO_Eq rep -> condIntCode EQQ  (extendUExpr rep x) (extendUExpr rep y)
 689       MO_Ne rep -> condIntCode NE   (extendUExpr rep x) (extendUExpr rep y)
 690
 691       MO_S_Gt rep -> condIntCode GTT  (extendSExpr rep x) (extendSExpr rep y)
 692       MO_S_Ge rep -> condIntCode GE   (extendSExpr rep x) (extendSExpr rep y)
 693       MO_S_Lt rep -> condIntCode LTT  (extendSExpr rep x) (extendSExpr rep y)
 694       MO_S_Le rep -> condIntCode LE   (extendSExpr rep x) (extendSExpr rep y)
 695
 696       MO_U_Gt rep -> condIntCode GU   (extendUExpr rep x) (extendUExpr rep y)
 697       MO_U_Ge rep -> condIntCode GEU  (extendUExpr rep x) (extendUExpr rep y)
 698       MO_U_Lt rep -> condIntCode LU   (extendUExpr rep x) (extendUExpr rep y)
 699       MO_U_Le rep -> condIntCode LEU  (extendUExpr rep x) (extendUExpr rep y)
 700
 701       other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
 702
 703 getCondCode other =  panic "getCondCode(2)(powerpc)"
 704
 705
 706
 707 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
 708 -- passed back up the tree.
 709
 710 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
 711
 712 --  ###FIXME: I16 and I8!
 713 condIntCode cond x (CmmLit (CmmInt y rep))
 714   | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
 715   = do
 716         (src1, code) <- getSomeReg x
 717         let
 718             code' = code `snocOL`
 719                 (if condUnsigned cond then CMPL else CMP) II32 src1 (RIImm src2)
 720         return (CondCode False cond code')
 721
 722 condIntCode cond x y = do
 723     (src1, code1) <- getSomeReg x
 724     (src2, code2) <- getSomeReg y
 725     let
 726         code' = code1 `appOL` code2 `snocOL`
 727                   (if condUnsigned cond then CMPL else CMP) II32 src1 (RIReg src2)
 728     return (CondCode False cond code')
 729
 730 condFltCode cond x y = do
 731     (src1, code1) <- getSomeReg x
 732     (src2, code2) <- getSomeReg y
 733     let
 734         code'  = code1 `appOL` code2 `snocOL` FCMP src1 src2
 735         code'' = case cond of -- twiddle CR to handle unordered case
 736                     GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
 737                     LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
 738                     _ -> code'
 739                  where
 740                     ltbit = 0 ; eqbit = 2 ; gtbit = 1
 741     return (CondCode True cond code'')
 742
 743
 744
 745 -- -----------------------------------------------------------------------------
 746 -- Generating assignments
 747
 748 -- Assignments are really at the heart of the whole code generation
 749 -- business.  Almost all top-level nodes of any real importance are
 750 -- assignments, which correspond to loads, stores, or register
 751 -- transfers.  If we're really lucky, some of the register transfers
 752 -- will go away, because we can use the destination register to
 753 -- complete the code generation for the right hand side.  This only
 754 -- fails when the right hand side is forced into a fixed register
 755 -- (e.g. the result of a call).
 756
 757 assignMem_IntCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
 758 assignReg_IntCode :: Size -> CmmReg  -> CmmExpr -> NatM InstrBlock
 759
 760 assignMem_FltCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
 761 assignReg_FltCode :: Size -> CmmReg  -> CmmExpr -> NatM InstrBlock
 762
 763 assignMem_IntCode pk addr src = do
 764     (srcReg, code) <- getSomeReg src
 765     Amode dstAddr addr_code <- getAmode addr
 766     return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
 767
 768 -- dst is a reg, but src could be anything
 769 assignReg_IntCode _ reg src
 770     = do
 771         r <- getRegister src
 772         return $ case r of
 773             Any _ code         -> code dst
 774             Fixed _ freg fcode -> fcode `snocOL` MR dst freg
 775     where
 776         dst = getRegisterReg reg
 777
 778
 779
 780 -- Easy, isn't it?
 781 assignMem_FltCode = assignMem_IntCode
 782 assignReg_FltCode = assignReg_IntCode
 783
 784
 785
 786 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
 787
 788 genJump (CmmLit (CmmLabel lbl))
 789   = return (unitOL $ JMP lbl)
 790
 791 genJump tree
 792   = do
 793         (target,code) <- getSomeReg tree
 794         return (code `snocOL` MTCTR target `snocOL` BCTR [])
 795
 796
 797 -- -----------------------------------------------------------------------------
 798 --  Unconditional branches
 799 genBranch :: BlockId -> NatM InstrBlock
 800 genBranch = return . toOL . mkJumpInstr
 801
 802
 803 -- -----------------------------------------------------------------------------
 804 --  Conditional jumps
 805
 806 {-
 807 Conditional jumps are always to local labels, so we can use branch
 808 instructions.  We peek at the arguments to decide what kind of
 809 comparison to do.
 810
 811 SPARC: First, we have to ensure that the condition codes are set
 812 according to the supplied comparison operation.  We generate slightly
 813 different code for floating point comparisons, because a floating
 814 point operation cannot directly precede a @BF@.  We assume the worst
 815 and fill that slot with a @NOP@.
 816
 817 SPARC: Do not fill the delay slots here; you will confuse the register
 818 allocator.
 819 -}
 820
 821
 822 genCondJump
 823     :: BlockId      -- the branch target
 824     -> CmmExpr      -- the condition on which to branch
 825     -> NatM InstrBlock
 826
 827 genCondJump id bool = do
 828   CondCode _ cond code <- getCondCode bool
 829   return (code `snocOL` BCC cond id)
 830
 831
 832
 833 -- -----------------------------------------------------------------------------
 834 --  Generating C calls
 835
 836 -- Now the biggest nightmare---calls.  Most of the nastiness is buried in
 837 -- @get_arg@, which moves the arguments to the correct registers/stack
 838 -- locations.  Apart from that, the code is easy.
 839 --
 840 -- (If applicable) Do not fill the delay slots here; you will confuse the
 841 -- register allocator.
 842
 843 genCCall
 844     :: CmmCallTarget            -- function to call
 845     -> HintedCmmFormals         -- where to put the result
 846     -> HintedCmmActuals         -- arguments (of mixed type)
 847     -> NatM InstrBlock
 848
 849
 850 #if darwin_TARGET_OS || linux_TARGET_OS
 851 {-
 852     The PowerPC calling convention for Darwin/Mac OS X
 853     is described in Apple's document
 854     "Inside Mac OS X - Mach-O Runtime Architecture".
 855
 856     PowerPC Linux uses the System V Release 4 Calling Convention
 857     for PowerPC. It is described in the
 858     "System V Application Binary Interface PowerPC Processor Supplement".
 859
 860     Both conventions are similar:
 861     Parameters may be passed in general-purpose registers starting at r3, in
 862     floating point registers starting at f1, or on the stack.
 863
 864     But there are substantial differences:
 865     * The number of registers used for parameter passing and the exact set of
 866       nonvolatile registers differs (see MachRegs.lhs).
 867     * On Darwin, stack space is always reserved for parameters, even if they are
 868       passed in registers. The called routine may choose to save parameters from
 869       registers to the corresponding space on the stack.
 870     * On Darwin, a corresponding amount of GPRs is skipped when a floating point
 871       parameter is passed in an FPR.
 872     * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
 873       starting with an odd-numbered GPR. It may skip a GPR to achieve this.
 874       Darwin just treats an I64 like two separate II32s (high word first).
 875     * I64 and FF64 arguments are 8-byte aligned on the stack for SysV, but only
 876       4-byte aligned like everything else on Darwin.
 877     * The SysV spec claims that FF32 is represented as FF64 on the stack. GCC on
 878       PowerPC Linux does not agree, so neither do we.
 879
 880     According to both conventions, The parameter area should be part of the
 881     caller's stack frame, allocated in the caller's prologue code (large enough
 882     to hold the parameter lists for all called routines). The NCG already
 883     uses the stack for register spilling, leaving 64 bytes free at the top.
 884     If we need a larger parameter area than that, we just allocate a new stack
 885     frame just before ccalling.
 886 -}
 887
 888
 889 genCCall (CmmPrim MO_WriteBarrier) _ _
 890  = return $ unitOL LWSYNC
 891
 892 genCCall target dest_regs argsAndHints
 893   = ASSERT (not $ any (`elem` [II8,II16]) $ map cmmTypeSize argReps)
 894         -- we rely on argument promotion in the codeGen
 895     do
 896         (finalStack,passArgumentsCode,usedRegs) <- passArguments
 897                                                         (zip args argReps)
 898                                                         allArgRegs allFPArgRegs
 899                                                         initialStackOffset
 900                                                         (toOL []) []
 901
 902         (labelOrExpr, reduceToFF32) <- case target of
 903             CmmCallee (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
 904             CmmCallee expr conv -> return  (Right expr, False)
 905             CmmPrim mop -> outOfLineFloatOp mop
 906
 907         let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
 908             codeAfter = move_sp_up finalStack `appOL` moveResult reduceToFF32
 909
 910         case labelOrExpr of
 911             Left lbl -> do
 912                 return (         codeBefore
 913                         `snocOL` BL lbl usedRegs
 914                         `appOL`  codeAfter)
 915             Right dyn -> do
 916                 (dynReg, dynCode) <- getSomeReg dyn
 917                 return (         dynCode
 918                         `snocOL` MTCTR dynReg
 919                         `appOL`  codeBefore
 920                         `snocOL` BCTRL usedRegs
 921                         `appOL`  codeAfter)
 922     where
 923 #if darwin_TARGET_OS
 924         initialStackOffset = 24
 925             -- size of linkage area + size of arguments, in bytes
 926         stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
 927                                  map (widthInBytes . typeWidth) argReps
 928 #elif linux_TARGET_OS
 929         initialStackOffset = 8
 930         stackDelta finalStack = roundTo 16 finalStack
 931 #endif
 932         args = map hintlessCmm argsAndHints
 933         argReps = map cmmExprType args
 934
 935         roundTo a x | x `mod` a == 0 = x
 936                     | otherwise = x + a - (x `mod` a)
 937
 938         move_sp_down finalStack
 939                | delta > 64 =
 940                         toOL [STU II32 sp (AddrRegImm sp (ImmInt (-delta))),
 941                               DELTA (-delta)]
 942                | otherwise = nilOL
 943                where delta = stackDelta finalStack
 944         move_sp_up finalStack
 945                | delta > 64 =
 946                         toOL [ADD sp sp (RIImm (ImmInt delta)),
 947                               DELTA 0]
 948                | otherwise = nilOL
 949                where delta = stackDelta finalStack
 950
 951
 952         passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
 953         passArguments ((arg,arg_ty):args) gprs fprs stackOffset
 954                accumCode accumUsed | isWord64 arg_ty =
 955             do
 956                 ChildCode64 code vr_lo <- iselExpr64 arg
 957                 let vr_hi = getHiVRegFromLo vr_lo
 958
 959 #if darwin_TARGET_OS
 960                 passArguments args
 961                               (drop 2 gprs)
 962                               fprs
 963                               (stackOffset+8)
 964                               (accumCode `appOL` code
 965                                     `snocOL` storeWord vr_hi gprs stackOffset
 966                                     `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
 967                               ((take 2 gprs) ++ accumUsed)
 968             where
 969                 storeWord vr (gpr:_) offset = MR gpr vr
 970                 storeWord vr [] offset = ST II32 vr (AddrRegImm sp (ImmInt offset))
 971
 972 #elif linux_TARGET_OS
 973                 let stackOffset' = roundTo 8 stackOffset
 974                     stackCode = accumCode `appOL` code
 975                         `snocOL` ST II32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
 976                         `snocOL` ST II32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
 977                     regCode hireg loreg =
 978                         accumCode `appOL` code
 979                             `snocOL` MR hireg vr_hi
 980                             `snocOL` MR loreg vr_lo
 981
 982                 case gprs of
 983                     hireg : loreg : regs | even (length gprs) ->
 984                         passArguments args regs fprs stackOffset
 985                                       (regCode hireg loreg) (hireg : loreg : accumUsed)
 986                     _skipped : hireg : loreg : regs ->
 987                         passArguments args regs fprs stackOffset
 988                                       (regCode hireg loreg) (hireg : loreg : accumUsed)
 989                     _ -> -- only one or no regs left
 990                         passArguments args [] fprs (stackOffset'+8)
 991                                       stackCode accumUsed
 992 #endif
 993
 994         passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
 995             | reg : _ <- regs = do
 996                 register <- getRegister arg
 997                 let code = case register of
 998                             Fixed _ freg fcode -> fcode `snocOL` MR reg freg
 999                             Any _ acode -> acode reg
1000                 passArguments args
1001                               (drop nGprs gprs)
1002                               (drop nFprs fprs)
1003 #if darwin_TARGET_OS
1004         -- The Darwin ABI requires that we reserve stack slots for register parameters
1005                               (stackOffset + stackBytes)
1006 #elif linux_TARGET_OS
1007         -- ... the SysV ABI doesn't.
1008                               stackOffset
1009 #endif
1010                               (accumCode `appOL` code)
1011                               (reg : accumUsed)
1012             | otherwise = do
1013                 (vr, code) <- getSomeReg arg
1014                 passArguments args
1015                               (drop nGprs gprs)
1016                               (drop nFprs fprs)
1017                               (stackOffset' + stackBytes)
1018                               (accumCode `appOL` code `snocOL` ST (cmmTypeSize rep) vr stackSlot)
1019                               accumUsed
1020             where
1021 #if darwin_TARGET_OS
1022         -- stackOffset is at least 4-byte aligned
1023         -- The Darwin ABI is happy with that.
1024                 stackOffset' = stackOffset
1025 #else
1026         -- ... the SysV ABI requires 8-byte alignment for doubles.
1027                 stackOffset' | isFloatType rep && typeWidth rep == W64 =
1028                                  roundTo 8 stackOffset
1029                              | otherwise  =           stackOffset
1030 #endif
1031                 stackSlot = AddrRegImm sp (ImmInt stackOffset')
1032                 (nGprs, nFprs, stackBytes, regs) = case cmmTypeSize rep of
1033                     II32 -> (1, 0, 4, gprs)
1034 #if darwin_TARGET_OS
1035         -- The Darwin ABI requires that we skip a corresponding number of GPRs when
1036         -- we use the FPRs.
1037                     FF32 -> (1, 1, 4, fprs)
1038                     FF64 -> (2, 1, 8, fprs)
1039 #elif linux_TARGET_OS
1040         -- ... the SysV ABI doesn't.
1041                     FF32 -> (0, 1, 4, fprs)
1042                     FF64 -> (0, 1, 8, fprs)
1043 #endif
1044
1045         moveResult reduceToFF32 =
1046             case dest_regs of
1047                 [] -> nilOL
1048                 [CmmHinted dest _hint]
1049                     | reduceToFF32 && isFloat32 rep   -> unitOL (FRSP r_dest f1)
1050                     | isFloat32 rep || isFloat64 rep -> unitOL (MR r_dest f1)
1051                     | isWord64 rep -> toOL [MR (getHiVRegFromLo r_dest) r3,
1052                                           MR r_dest r4]
1053                     | otherwise -> unitOL (MR r_dest r3)
1054                     where rep = cmmRegType (CmmLocal dest)
1055                           r_dest = getRegisterReg (CmmLocal dest)
1056
1057         outOfLineFloatOp mop =
1058             do
1059                 dflags <- getDynFlagsNat
1060                 mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference $
1061                               mkForeignLabel functionName Nothing True IsFunction
1062                 let mopLabelOrExpr = case mopExpr of
1063                         CmmLit (CmmLabel lbl) -> Left lbl
1064                         _ -> Right mopExpr
1065                 return (mopLabelOrExpr, reduce)
1066             where
1067                 (functionName, reduce) = case mop of
1068                     MO_F32_Exp   -> (fsLit "exp", True)
1069                     MO_F32_Log   -> (fsLit "log", True)
1070                     MO_F32_Sqrt  -> (fsLit "sqrt", True)
1071
1072                     MO_F32_Sin   -> (fsLit "sin", True)
1073                     MO_F32_Cos   -> (fsLit "cos", True)
1074                     MO_F32_Tan   -> (fsLit "tan", True)
1075
1076                     MO_F32_Asin  -> (fsLit "asin", True)
1077                     MO_F32_Acos  -> (fsLit "acos", True)
1078                     MO_F32_Atan  -> (fsLit "atan", True)
1079
1080                     MO_F32_Sinh  -> (fsLit "sinh", True)
1081                     MO_F32_Cosh  -> (fsLit "cosh", True)
1082                     MO_F32_Tanh  -> (fsLit "tanh", True)
1083                     MO_F32_Pwr   -> (fsLit "pow", True)
1084
1085                     MO_F64_Exp   -> (fsLit "exp", False)
1086                     MO_F64_Log   -> (fsLit "log", False)
1087                     MO_F64_Sqrt  -> (fsLit "sqrt", False)
1088
1089                     MO_F64_Sin   -> (fsLit "sin", False)
1090                     MO_F64_Cos   -> (fsLit "cos", False)
1091                     MO_F64_Tan   -> (fsLit "tan", False)
1092
1093                     MO_F64_Asin  -> (fsLit "asin", False)
1094                     MO_F64_Acos  -> (fsLit "acos", False)
1095                     MO_F64_Atan  -> (fsLit "atan", False)
1096
1097                     MO_F64_Sinh  -> (fsLit "sinh", False)
1098                     MO_F64_Cosh  -> (fsLit "cosh", False)
1099                     MO_F64_Tanh  -> (fsLit "tanh", False)
1100                     MO_F64_Pwr   -> (fsLit "pow", False)
1101                     other -> pprPanic "genCCall(ppc): unknown callish op"
1102                                     (pprCallishMachOp other)
1103
1104 #else /* darwin_TARGET_OS || linux_TARGET_OS */
1105 genCCall = panic "PPC.CodeGen.genCCall: not defined for this os"
1106 #endif
1107
1108
1109 -- -----------------------------------------------------------------------------
1110 -- Generating a table-branch
1111
1112 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
1113 genSwitch expr ids
1114   | opt_PIC
1115   = do
1116         (reg,e_code) <- getSomeReg expr
1117         tmp <- getNewRegNat II32
1118         lbl <- getNewLabelNat
1119         dflags <- getDynFlagsNat
1120         dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
1121         (tableReg,t_code) <- getSomeReg $ dynRef
1122         let
1123             jumpTable = map jumpTableEntryRel ids
1124
1125             jumpTableEntryRel Nothing
1126                 = CmmStaticLit (CmmInt 0 wordWidth)
1127             jumpTableEntryRel (Just (BlockId id))
1128                 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
1129                 where blockLabel = mkAsmTempLabel id
1130
1131             code = e_code `appOL` t_code `appOL` toOL [
1132                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
1133                             SLW tmp reg (RIImm (ImmInt 2)),
1134                             LD II32 tmp (AddrRegReg tableReg tmp),
1135                             ADD tmp tmp (RIReg tableReg),
1136                             MTCTR tmp,
1137                             BCTR [ id | Just id <- ids ]
1138                     ]
1139         return code
1140   | otherwise
1141   = do
1142         (reg,e_code) <- getSomeReg expr
1143         tmp <- getNewRegNat II32
1144         lbl <- getNewLabelNat
1145         let
1146             jumpTable = map jumpTableEntry ids
1147
1148             code = e_code `appOL` toOL [
1149                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
1150                             SLW tmp reg (RIImm (ImmInt 2)),
1151                             ADDIS tmp tmp (HA (ImmCLbl lbl)),
1152                             LD II32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
1153                             MTCTR tmp,
1154                             BCTR [ id | Just id <- ids ]
1155                     ]
1156         return code
1157
1158
1159 -- -----------------------------------------------------------------------------
1160 -- 'condIntReg' and 'condFltReg': condition codes into registers
1161
1162 -- Turn those condition codes into integers now (when they appear on
1163 -- the right hand side of an assignment).
1164 --
1165 -- (If applicable) Do not fill the delay slots here; you will confuse the
1166 -- register allocator.
1167
1168 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
1169
1170 condReg :: NatM CondCode -> NatM Register
1171 condReg getCond = do
1172     CondCode _ cond cond_code <- getCond
1173     let
1174 {-        code dst = cond_code `appOL` toOL [
1175                 BCC cond lbl1,
1176                 LI dst (ImmInt 0),
1177                 BCC ALWAYS lbl2,
1178                 NEWBLOCK lbl1,
1179                 LI dst (ImmInt 1),
1180                 BCC ALWAYS lbl2,
1181                 NEWBLOCK lbl2
1182             ]-}
1183         code dst = cond_code
1184             `appOL` negate_code
1185             `appOL` toOL [
1186                 MFCR dst,
1187                 RLWINM dst dst (bit + 1) 31 31
1188             ]
1189
1190         negate_code | do_negate = unitOL (CRNOR bit bit bit)
1191                     | otherwise = nilOL
1192
1193         (bit, do_negate) = case cond of
1194             LTT -> (0, False)
1195             LE  -> (1, True)
1196             EQQ -> (2, False)
1197             GE  -> (0, True)
1198             GTT -> (1, False)
1199
1200             NE  -> (2, True)
1201
1202             LU  -> (0, False)
1203             LEU -> (1, True)
1204             GEU -> (0, True)
1205             GU  -> (1, False)
1206             _   -> panic "PPC.CodeGen.codeReg: no match"
1207
1208     return (Any II32 code)
1209
1210 condIntReg cond x y = condReg (condIntCode cond x y)
1211 condFltReg cond x y = condReg (condFltCode cond x y)
1212
1213
1214
1215 -- -----------------------------------------------------------------------------
1216 -- 'trivial*Code': deal with trivial instructions
1217
1218 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
1219 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
1220 -- Only look for constants on the right hand side, because that's
1221 -- where the generic optimizer will have put them.
1222
1223 -- Similarly, for unary instructions, we don't have to worry about
1224 -- matching an StInt as the argument, because genericOpt will already
1225 -- have handled the constant-folding.
1226
1227
1228
1229 {-
1230 Wolfgang's PowerPC version of The Rules:
1231
1232 A slightly modified version of The Rules to take advantage of the fact
1233 that PowerPC instructions work on all registers and don't implicitly
1234 clobber any fixed registers.
1235
1236 * The only expression for which getRegister returns Fixed is (CmmReg reg).
1237
1238 * If getRegister returns Any, then the code it generates may modify only:
1239         (a) fresh temporaries
1240         (b) the destination register
1241   It may *not* modify global registers, unless the global
1242   register happens to be the destination register.
1243   It may not clobber any other registers. In fact, only ccalls clobber any
1244   fixed registers.
1245   Also, it may not modify the counter register (used by genCCall).
1246
1247   Corollary: If a getRegister for a subexpression returns Fixed, you need
1248   not move it to a fresh temporary before evaluating the next subexpression.
1249   The Fixed register won't be modified.
1250   Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
1251
1252 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
1253   the value of the destination register.
1254 -}
1255
1256 trivialCode
1257         :: Width
1258         -> Bool
1259         -> (Reg -> Reg -> RI -> Instr)
1260         -> CmmExpr
1261         -> CmmExpr
1262         -> NatM Register
1263
1264 trivialCode rep signed instr x (CmmLit (CmmInt y _))
1265     | Just imm <- makeImmediate rep signed y
1266     = do
1267         (src1, code1) <- getSomeReg x
1268         let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
1269         return (Any (intSize rep) code)
1270
1271 trivialCode rep _ instr x y = do
1272     (src1, code1) <- getSomeReg x
1273     (src2, code2) <- getSomeReg y
1274     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
1275     return (Any (intSize rep) code)
1276
1277 trivialCodeNoImm' :: Size -> (Reg -> Reg -> Reg -> Instr)
1278                  -> CmmExpr -> CmmExpr -> NatM Register
1279 trivialCodeNoImm' size instr x y = do
1280     (src1, code1) <- getSomeReg x
1281     (src2, code2) <- getSomeReg y
1282     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
1283     return (Any size code)
1284
1285 trivialCodeNoImm :: Size -> (Size -> Reg -> Reg -> Reg -> Instr)
1286                  -> CmmExpr -> CmmExpr -> NatM Register
1287 trivialCodeNoImm size instr x y = trivialCodeNoImm' size (instr size) x y
1288
1289
1290 trivialUCode
1291         :: Size
1292         -> (Reg -> Reg -> Instr)
1293         -> CmmExpr
1294         -> NatM Register
1295 trivialUCode rep instr x = do
1296     (src, code) <- getSomeReg x
1297     let code' dst = code `snocOL` instr dst src
1298     return (Any rep code')
1299
1300 -- There is no "remainder" instruction on the PPC, so we have to do
1301 -- it the hard way.
1302 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
1303
1304 remainderCode :: Width -> (Reg -> Reg -> Reg -> Instr)
1305     -> CmmExpr -> CmmExpr -> NatM Register
1306 remainderCode rep div x y = do
1307     (src1, code1) <- getSomeReg x
1308     (src2, code2) <- getSomeReg y
1309     let code dst = code1 `appOL` code2 `appOL` toOL [
1310                 div dst src1 src2,
1311                 MULLW dst dst (RIReg src2),
1312                 SUBF dst dst src1
1313             ]
1314     return (Any (intSize rep) code)
1315
1316
1317 coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
1318 coerceInt2FP fromRep toRep x = do
1319     (src, code) <- getSomeReg x
1320     lbl <- getNewLabelNat
1321     itmp <- getNewRegNat II32
1322     ftmp <- getNewRegNat FF64
1323     dflags <- getDynFlagsNat
1324     dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
1325     Amode addr addr_code <- getAmode dynRef
1326     let
1327         code' dst = code `appOL` maybe_exts `appOL` toOL [
1328                 LDATA ReadOnlyData
1329                                 [CmmDataLabel lbl,
1330                                  CmmStaticLit (CmmInt 0x43300000 W32),
1331                                  CmmStaticLit (CmmInt 0x80000000 W32)],
1332                 XORIS itmp src (ImmInt 0x8000),
1333                 ST II32 itmp (spRel 3),
1334                 LIS itmp (ImmInt 0x4330),
1335                 ST II32 itmp (spRel 2),
1336                 LD FF64 ftmp (spRel 2)
1337             ] `appOL` addr_code `appOL` toOL [
1338                 LD FF64 dst addr,
1339                 FSUB FF64 dst ftmp dst
1340             ] `appOL` maybe_frsp dst
1341
1342         maybe_exts = case fromRep of
1343                         W8 ->  unitOL $ EXTS II8 src src
1344                         W16 -> unitOL $ EXTS II16 src src
1345                         W32 -> nilOL
1346                         _       -> panic "PPC.CodeGen.coerceInt2FP: no match"
1347
1348         maybe_frsp dst
1349                 = case toRep of
1350                         W32 -> unitOL $ FRSP dst dst
1351                         W64 -> nilOL
1352                         _       -> panic "PPC.CodeGen.coerceInt2FP: no match"
1353
1354     return (Any (floatSize toRep) code')
1355
1356 coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
1357 coerceFP2Int _ toRep x = do
1358     -- the reps don't really matter: F*->FF64 and II32->I* are no-ops
1359     (src, code) <- getSomeReg x
1360     tmp <- getNewRegNat FF64
1361     let
1362         code' dst = code `appOL` toOL [
1363                 -- convert to int in FP reg
1364             FCTIWZ tmp src,
1365                 -- store value (64bit) from FP to stack
1366             ST FF64 tmp (spRel 2),
1367                 -- read low word of value (high word is undefined)
1368             LD II32 dst (spRel 3)]
1369     return (Any (intSize toRep) code')