compiler/nativeGen/PPC/CodeGen.hs

   1 {-# OPTIONS -w #-}
   2
   3 -----------------------------------------------------------------------------
   4 --
   5 -- Generating machine code (instruction selection)
   6 --
   7 -- (c) The University of Glasgow 1996-2004
   8 --
   9 -----------------------------------------------------------------------------
  10
  11 -- This is a big module, but, if you pay attention to
  12 -- (a) the sectioning, (b) the type signatures, and
  13 -- (c) the #if blah_TARGET_ARCH} things, the
  14 -- structure should not be too overwhelming.
  15
  16 module PPC.CodeGen (
  17         cmmTopCodeGen,
  18         generateJumpTableForInstr,
  19         InstrBlock
  20 )
  21
  22 where
  23
  24 #include "HsVersions.h"
  25 #include "nativeGen/NCG.h"
  26 #include "../includes/MachDeps.h"
  27
  28 -- NCG stuff:
  29 import PPC.Instr
  30 import PPC.Cond
  31 import PPC.Regs
  32 import PPC.RegInfo
  33 import NCGMonad
  34 import Instruction
  35 import PIC
  36 import Size
  37 import RegClass
  38 import Reg
  39 import TargetReg
  40 import Platform
  41
  42 -- Our intermediate code:
  43 import BlockId
  44 import PprCmm           ( pprExpr )
  45 import OldCmm
  46 import CLabel
  47
  48 -- The rest:
  49 import StaticFlags      ( opt_PIC )
  50 import OrdList
  51 import qualified Outputable as O
  52 import Outputable
  53 import Unique
  54 import DynFlags
  55
  56 import Control.Monad    ( mapAndUnzipM )
  57 import Data.Bits
  58 import Data.Int
  59 import Data.Word
  60
  61 #if darwin_TARGET_OS || linux_TARGET_OS
  62 import BasicTypes
  63 import FastString
  64 #endif
  65
  66 -- -----------------------------------------------------------------------------
  67 -- Top-level of the instruction selector
  68
  69 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
  70 -- They are really trees of insns to facilitate fast appending, where a
  71 -- left-to-right traversal (pre-order?) yields the insns in the correct
  72 -- order.
  73
  74 cmmTopCodeGen
  75         :: DynFlags
  76         -> RawCmmTop
  77         -> NatM [NatCmmTop Instr]
  78
  79 cmmTopCodeGen dflags (CmmProc info lab (ListGraph blocks)) = do
  80   (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
  81   picBaseMb <- getPicBaseMaybeNat
  82   let proc = CmmProc info lab (ListGraph $ concat nat_blocks)
  83       tops = proc : concat statics
  84       os   = platformOS $ targetPlatform dflags
  85   case picBaseMb of
  86       Just picBase -> initializePicBase_ppc ArchPPC os picBase tops
  87       Nothing -> return tops
  88
  89 cmmTopCodeGen dflags (CmmData sec dat) = do
  90   return [CmmData sec dat]  -- no translation, we just use CmmStatic
  91
  92 basicBlockCodeGen
  93         :: CmmBasicBlock
  94         -> NatM ( [NatBasicBlock Instr]
  95                 , [NatCmmTop Instr])
  96
  97 basicBlockCodeGen (BasicBlock id stmts) = do
  98   instrs <- stmtsToInstrs stmts
  99   -- code generation may introduce new basic block boundaries, which
 100   -- are indicated by the NEWBLOCK instruction.  We must split up the
 101   -- instruction stream into basic blocks again.  Also, we extract
 102   -- LDATAs here too.
 103   let
 104         (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
 105
 106         mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
 107           = ([], BasicBlock id instrs : blocks, statics)
 108         mkBlocks (LDATA sec dat) (instrs,blocks,statics)
 109           = (instrs, blocks, CmmData sec dat:statics)
 110         mkBlocks instr (instrs,blocks,statics)
 111           = (instr:instrs, blocks, statics)
 112   -- in
 113   return (BasicBlock id top : other_blocks, statics)
 114
 115 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
 116 stmtsToInstrs stmts
 117    = do instrss <- mapM stmtToInstrs stmts
 118         return (concatOL instrss)
 119
 120 stmtToInstrs :: CmmStmt -> NatM InstrBlock
 121 stmtToInstrs stmt = case stmt of
 122     CmmNop         -> return nilOL
 123     CmmComment s   -> return (unitOL (COMMENT s))
 124
 125     CmmAssign reg src
 126       | isFloatType ty -> assignReg_FltCode size reg src
 127 #if WORD_SIZE_IN_BITS==32
 128       | isWord64 ty    -> assignReg_I64Code      reg src
 129 #endif
 130       | otherwise        -> assignReg_IntCode size reg src
 131         where ty = cmmRegType reg
 132               size = cmmTypeSize ty
 133
 134     CmmStore addr src
 135       | isFloatType ty -> assignMem_FltCode size addr src
 136 #if WORD_SIZE_IN_BITS==32
 137       | isWord64 ty      -> assignMem_I64Code      addr src
 138 #endif
 139       | otherwise        -> assignMem_IntCode size addr src
 140         where ty = cmmExprType src
 141               size = cmmTypeSize ty
 142
 143     CmmCall target result_regs args _ _
 144        -> genCCall target result_regs args
 145
 146     CmmBranch id          -> genBranch id
 147     CmmCondBranch arg id  -> genCondJump id arg
 148     CmmSwitch arg ids     -> genSwitch arg ids
 149     CmmJump arg params    -> genJump arg
 150     CmmReturn params      ->
 151       panic "stmtToInstrs: return statement should have been cps'd away"
 152
 153
 154 --------------------------------------------------------------------------------
 155 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
 156 --      They are really trees of insns to facilitate fast appending, where a
 157 --      left-to-right traversal yields the insns in the correct order.
 158 --
 159 type InstrBlock
 160         = OrdList Instr
 161
 162
 163 -- | Register's passed up the tree.  If the stix code forces the register
 164 --      to live in a pre-decided machine register, it comes out as @Fixed@;
 165 --      otherwise, it comes out as @Any@, and the parent can decide which
 166 --      register to put it in.
 167 --
 168 data Register
 169         = Fixed Size Reg InstrBlock
 170         | Any   Size (Reg -> InstrBlock)
 171
 172
 173 swizzleRegisterRep :: Register -> Size -> Register
 174 swizzleRegisterRep (Fixed _ reg code) size = Fixed size reg code
 175 swizzleRegisterRep (Any _ codefn)     size = Any   size codefn
 176
 177
 178 -- | Grab the Reg for a CmmReg
 179 getRegisterReg :: CmmReg -> Reg
 180
 181 getRegisterReg (CmmLocal (LocalReg u pk))
 182   = RegVirtual $ mkVirtualReg u (cmmTypeSize pk)
 183
 184 getRegisterReg (CmmGlobal mid)
 185   = case globalRegMaybe mid of
 186         Just reg -> reg
 187         Nothing  -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
 188         -- By this stage, the only MagicIds remaining should be the
 189         -- ones which map to a real machine register on this
 190         -- platform.  Hence ...
 191
 192
 193 {-
 194 Now, given a tree (the argument to an CmmLoad) that references memory,
 195 produce a suitable addressing mode.
 196
 197 A Rule of the Game (tm) for Amodes: use of the addr bit must
 198 immediately follow use of the code part, since the code part puts
 199 values in registers which the addr then refers to.  So you can't put
 200 anything in between, lest it overwrite some of those registers.  If
 201 you need to do some other computation between the code part and use of
 202 the addr bit, first store the effective address from the amode in a
 203 temporary, then do the other computation, and then use the temporary:
 204
 205     code
 206     LEA amode, tmp
 207     ... other computation ...
 208     ... (tmp) ...
 209 -}
 210
 211
 212 -- | Check whether an integer will fit in 32 bits.
 213 --      A CmmInt is intended to be truncated to the appropriate
 214 --      number of bits, so here we truncate it to Int64.  This is
 215 --      important because e.g. -1 as a CmmInt might be either
 216 --      -1 or 18446744073709551615.
 217 --
 218 is32BitInteger :: Integer -> Bool
 219 is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000
 220   where i64 = fromIntegral i :: Int64
 221
 222
 223 -- | Convert a BlockId to some CmmStatic data
 224 jumpTableEntry :: Maybe BlockId -> CmmStatic
 225 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordWidth)
 226 jumpTableEntry (Just blockid) = CmmStaticLit (CmmLabel blockLabel)
 227     where blockLabel = mkAsmTempLabel (getUnique blockid)
 228
 229
 230
 231 -- -----------------------------------------------------------------------------
 232 -- General things for putting together code sequences
 233
 234 -- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
 235 -- CmmExprs into CmmRegOff?
 236 mangleIndexTree :: CmmExpr -> CmmExpr
 237 mangleIndexTree (CmmRegOff reg off)
 238   = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
 239   where width = typeWidth (cmmRegType reg)
 240
 241 mangleIndexTree _
 242         = panic "PPC.CodeGen.mangleIndexTree: no match"
 243
 244 -- -----------------------------------------------------------------------------
 245 --  Code gen for 64-bit arithmetic on 32-bit platforms
 246
 247 {-
 248 Simple support for generating 64-bit code (ie, 64 bit values and 64
 249 bit assignments) on 32-bit platforms.  Unlike the main code generator
 250 we merely shoot for generating working code as simply as possible, and
 251 pay little attention to code quality.  Specifically, there is no
 252 attempt to deal cleverly with the fixed-vs-floating register
 253 distinction; all values are generated into (pairs of) floating
 254 registers, even if this would mean some redundant reg-reg moves as a
 255 result.  Only one of the VRegUniques is returned, since it will be
 256 of the VRegUniqueLo form, and the upper-half VReg can be determined
 257 by applying getHiVRegFromLo to it.
 258 -}
 259
 260 data ChildCode64        -- a.k.a "Register64"
 261       = ChildCode64
 262            InstrBlock   -- code
 263            Reg          -- the lower 32-bit temporary which contains the
 264                         -- result; use getHiVRegFromLo to find the other
 265                         -- VRegUnique.  Rules of this simplified insn
 266                         -- selection game are therefore that the returned
 267                         -- Reg may be modified
 268
 269
 270 -- | The dual to getAnyReg: compute an expression into a register, but
 271 --      we don't mind which one it is.
 272 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
 273 getSomeReg expr = do
 274   r <- getRegister expr
 275   case r of
 276     Any rep code -> do
 277         tmp <- getNewRegNat rep
 278         return (tmp, code tmp)
 279     Fixed _ reg code ->
 280         return (reg, code)
 281
 282 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
 283 getI64Amodes addrTree = do
 284     Amode hi_addr addr_code <- getAmode addrTree
 285     case addrOffset hi_addr 4 of
 286         Just lo_addr -> return (hi_addr, lo_addr, addr_code)
 287         Nothing      -> do (hi_ptr, code) <- getSomeReg addrTree
 288                            return (AddrRegImm hi_ptr (ImmInt 0),
 289                                    AddrRegImm hi_ptr (ImmInt 4),
 290                                    code)
 291
 292
 293 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
 294 assignMem_I64Code addrTree valueTree = do
 295         (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
 296         ChildCode64 vcode rlo <- iselExpr64 valueTree
 297         let
 298                 rhi = getHiVRegFromLo rlo
 299
 300                 -- Big-endian store
 301                 mov_hi = ST II32 rhi hi_addr
 302                 mov_lo = ST II32 rlo lo_addr
 303         -- in
 304         return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 305
 306
 307 assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
 308 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
 309    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 310    let
 311          r_dst_lo = RegVirtual $ mkVirtualReg u_dst II32
 312          r_dst_hi = getHiVRegFromLo r_dst_lo
 313          r_src_hi = getHiVRegFromLo r_src_lo
 314          mov_lo = MR r_dst_lo r_src_lo
 315          mov_hi = MR r_dst_hi r_src_hi
 316    -- in
 317    return (
 318         vcode `snocOL` mov_lo `snocOL` mov_hi
 319      )
 320
 321 assignReg_I64Code lvalue valueTree
 322    = panic "assignReg_I64Code(powerpc): invalid lvalue"
 323
 324
 325 iselExpr64        :: CmmExpr -> NatM ChildCode64
 326 iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
 327     (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
 328     (rlo, rhi) <- getNewRegPairNat II32
 329     let mov_hi = LD II32 rhi hi_addr
 330         mov_lo = LD II32 rlo lo_addr
 331     return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
 332                          rlo
 333
 334 iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
 335    = return (ChildCode64 nilOL (RegVirtual $ mkVirtualReg vu II32))
 336
 337 iselExpr64 (CmmLit (CmmInt i _)) = do
 338   (rlo,rhi) <- getNewRegPairNat II32
 339   let
 340         half0 = fromIntegral (fromIntegral i :: Word16)
 341         half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
 342         half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
 343         half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
 344
 345         code = toOL [
 346                 LIS rlo (ImmInt half1),
 347                 OR rlo rlo (RIImm $ ImmInt half0),
 348                 LIS rhi (ImmInt half3),
 349                 OR rlo rlo (RIImm $ ImmInt half2)
 350                 ]
 351   -- in
 352   return (ChildCode64 code rlo)
 353
 354 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
 355    ChildCode64 code1 r1lo <- iselExpr64 e1
 356    ChildCode64 code2 r2lo <- iselExpr64 e2
 357    (rlo,rhi) <- getNewRegPairNat II32
 358    let
 359         r1hi = getHiVRegFromLo r1lo
 360         r2hi = getHiVRegFromLo r2lo
 361         code =  code1 `appOL`
 362                 code2 `appOL`
 363                 toOL [ ADDC rlo r1lo r2lo,
 364                        ADDE rhi r1hi r2hi ]
 365    -- in
 366    return (ChildCode64 code rlo)
 367
 368 iselExpr64 (CmmMachOp (MO_UU_Conv W32 W64) [expr]) = do
 369     (expr_reg,expr_code) <- getSomeReg expr
 370     (rlo, rhi) <- getNewRegPairNat II32
 371     let mov_hi = LI rhi (ImmInt 0)
 372         mov_lo = MR rlo expr_reg
 373     return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
 374                          rlo
 375 iselExpr64 expr
 376    = pprPanic "iselExpr64(powerpc)" (ppr expr)
 377
 378
 379
 380 getRegister :: CmmExpr -> NatM Register
 381
 382 getRegister (CmmReg (CmmGlobal PicBaseReg))
 383   = do
 384       reg <- getPicBaseNat archWordSize
 385       return (Fixed archWordSize reg nilOL)
 386
 387 getRegister (CmmReg reg)
 388   = return (Fixed (cmmTypeSize (cmmRegType reg))
 389                   (getRegisterReg reg) nilOL)
 390
 391 getRegister tree@(CmmRegOff _ _)
 392   = getRegister (mangleIndexTree tree)
 393
 394
 395 #if WORD_SIZE_IN_BITS==32
 396     -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
 397     -- TO_W_(x), TO_W_(x >> 32)
 398
 399 getRegister (CmmMachOp (MO_UU_Conv W64 W32)
 400              [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
 401   ChildCode64 code rlo <- iselExpr64 x
 402   return $ Fixed II32 (getHiVRegFromLo rlo) code
 403
 404 getRegister (CmmMachOp (MO_SS_Conv W64 W32)
 405              [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
 406   ChildCode64 code rlo <- iselExpr64 x
 407   return $ Fixed II32 (getHiVRegFromLo rlo) code
 408
 409 getRegister (CmmMachOp (MO_UU_Conv W64 W32) [x]) = do
 410   ChildCode64 code rlo <- iselExpr64 x
 411   return $ Fixed II32 rlo code
 412
 413 getRegister (CmmMachOp (MO_SS_Conv W64 W32) [x]) = do
 414   ChildCode64 code rlo <- iselExpr64 x
 415   return $ Fixed II32 rlo code
 416
 417 #endif
 418
 419
 420 getRegister (CmmLoad mem pk)
 421   | not (isWord64 pk)
 422   = do
 423         Amode addr addr_code <- getAmode mem
 424         let code dst = ASSERT((targetClassOfReg dst == RcDouble) == isFloatType pk)
 425                        addr_code `snocOL` LD size dst addr
 426         return (Any size code)
 427           where size = cmmTypeSize pk
 428
 429 -- catch simple cases of zero- or sign-extended load
 430 getRegister (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad mem _]) = do
 431     Amode addr addr_code <- getAmode mem
 432     return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr))
 433
 434 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
 435
 436 getRegister (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad mem _]) = do
 437     Amode addr addr_code <- getAmode mem
 438     return (Any II32 (\dst -> addr_code `snocOL` LD II16 dst addr))
 439
 440 getRegister (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad mem _]) = do
 441     Amode addr addr_code <- getAmode mem
 442     return (Any II32 (\dst -> addr_code `snocOL` LA II16 dst addr))
 443
 444 getRegister (CmmMachOp mop [x]) -- unary MachOps
 445   = case mop of
 446       MO_Not rep   -> triv_ucode_int rep NOT
 447
 448       MO_F_Neg w   -> triv_ucode_float w FNEG
 449       MO_S_Neg w   -> triv_ucode_int   w NEG
 450
 451       MO_FF_Conv W64 W32 -> trivialUCode  FF32 FRSP x
 452       MO_FF_Conv W32 W64 -> conversionNop FF64 x
 453
 454       MO_FS_Conv from to -> coerceFP2Int from to x
 455       MO_SF_Conv from to -> coerceInt2FP from to x
 456
 457       MO_SS_Conv from to
 458         | from == to    -> conversionNop (intSize to) x
 459
 460         -- narrowing is a nop: we treat the high bits as undefined
 461       MO_SS_Conv W32 to -> conversionNop (intSize to) x
 462       MO_SS_Conv W16 W8 -> conversionNop II8 x
 463       MO_SS_Conv W8  to -> triv_ucode_int to (EXTS II8)
 464       MO_SS_Conv W16 to -> triv_ucode_int to (EXTS II16)
 465
 466       MO_UU_Conv from to
 467         | from == to -> conversionNop (intSize to) x
 468         -- narrowing is a nop: we treat the high bits as undefined
 469       MO_UU_Conv W32 to -> conversionNop (intSize to) x
 470       MO_UU_Conv W16 W8 -> conversionNop II8 x
 471       MO_UU_Conv W8 to  -> trivialCode to False AND x (CmmLit (CmmInt 255 W32))
 472       MO_UU_Conv W16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 W32))
 473       _ -> panic "PPC.CodeGen.getRegister: no match"
 474
 475     where
 476         triv_ucode_int   width instr = trivialUCode (intSize   width) instr x
 477         triv_ucode_float width instr = trivialUCode (floatSize width) instr x
 478
 479         conversionNop new_size expr
 480             = do e_code <- getRegister expr
 481                  return (swizzleRegisterRep e_code new_size)
 482
 483 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
 484   = case mop of
 485       MO_F_Eq w -> condFltReg EQQ x y
 486       MO_F_Ne w -> condFltReg NE  x y
 487       MO_F_Gt w -> condFltReg GTT x y
 488       MO_F_Ge w -> condFltReg GE  x y
 489       MO_F_Lt w -> condFltReg LTT x y
 490       MO_F_Le w -> condFltReg LE  x y
 491
 492       MO_Eq rep -> condIntReg EQQ  (extendUExpr rep x) (extendUExpr rep y)
 493       MO_Ne rep -> condIntReg NE   (extendUExpr rep x) (extendUExpr rep y)
 494
 495       MO_S_Gt rep -> condIntReg GTT  (extendSExpr rep x) (extendSExpr rep y)
 496       MO_S_Ge rep -> condIntReg GE   (extendSExpr rep x) (extendSExpr rep y)
 497       MO_S_Lt rep -> condIntReg LTT  (extendSExpr rep x) (extendSExpr rep y)
 498       MO_S_Le rep -> condIntReg LE   (extendSExpr rep x) (extendSExpr rep y)
 499
 500       MO_U_Gt rep -> condIntReg GU   (extendUExpr rep x) (extendUExpr rep y)
 501       MO_U_Ge rep -> condIntReg GEU  (extendUExpr rep x) (extendUExpr rep y)
 502       MO_U_Lt rep -> condIntReg LU   (extendUExpr rep x) (extendUExpr rep y)
 503       MO_U_Le rep -> condIntReg LEU  (extendUExpr rep x) (extendUExpr rep y)
 504
 505       MO_F_Add w  -> triv_float w FADD
 506       MO_F_Sub w  -> triv_float w FSUB
 507       MO_F_Mul w  -> triv_float w FMUL
 508       MO_F_Quot w -> triv_float w FDIV
 509
 510          -- optimize addition with 32-bit immediate
 511          -- (needed for PIC)
 512       MO_Add W32 ->
 513         case y of
 514           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate W32 True (-imm)
 515             -> trivialCode W32 True ADD x (CmmLit $ CmmInt imm immrep)
 516           CmmLit lit
 517             -> do
 518                 (src, srcCode) <- getSomeReg x
 519                 let imm = litToImm lit
 520                     code dst = srcCode `appOL` toOL [
 521                                     ADDIS dst src (HA imm),
 522                                     ADD dst dst (RIImm (LO imm))
 523                                 ]
 524                 return (Any II32 code)
 525           _ -> trivialCode W32 True ADD x y
 526
 527       MO_Add rep -> trivialCode rep True ADD x y
 528       MO_Sub rep ->
 529         case y of    -- subfi ('substract from' with immediate) doesn't exist
 530           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
 531             -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
 532           _ -> trivialCodeNoImm' (intSize rep) SUBF y x
 533
 534       MO_Mul rep -> trivialCode rep True MULLW x y
 535
 536       MO_S_MulMayOflo W32 -> trivialCodeNoImm' II32 MULLW_MayOflo x y
 537
 538       MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= II32): not implemented"
 539       MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
 540
 541       MO_S_Quot rep -> trivialCodeNoImm' (intSize rep) DIVW (extendSExpr rep x) (extendSExpr rep y)
 542       MO_U_Quot rep -> trivialCodeNoImm' (intSize rep) DIVWU (extendUExpr rep x) (extendUExpr rep y)
 543
 544       MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
 545       MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
 546
 547       MO_And rep   -> trivialCode rep False AND x y
 548       MO_Or rep    -> trivialCode rep False OR x y
 549       MO_Xor rep   -> trivialCode rep False XOR x y
 550
 551       MO_Shl rep   -> trivialCode rep False SLW x y
 552       MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
 553       MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
 554       _         -> panic "PPC.CodeGen.getRegister: no match"
 555
 556   where
 557     triv_float :: Width -> (Size -> Reg -> Reg -> Reg -> Instr) -> NatM Register
 558     triv_float width instr = trivialCodeNoImm (floatSize width) instr x y
 559
 560 getRegister (CmmLit (CmmInt i rep))
 561   | Just imm <- makeImmediate rep True i
 562   = let
 563         code dst = unitOL (LI dst imm)
 564     in
 565         return (Any (intSize rep) code)
 566
 567 getRegister (CmmLit (CmmFloat f frep)) = do
 568     lbl <- getNewLabelNat
 569     dflags <- getDynFlagsNat
 570     dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
 571     Amode addr addr_code <- getAmode dynRef
 572     let size = floatSize frep
 573         code dst =
 574             LDATA ReadOnlyData  [CmmDataLabel lbl,
 575                                  CmmStaticLit (CmmFloat f frep)]
 576             `consOL` (addr_code `snocOL` LD size dst addr)
 577     return (Any size code)
 578
 579 getRegister (CmmLit lit)
 580   = let rep = cmmLitType lit
 581         imm = litToImm lit
 582         code dst = toOL [
 583               LIS dst (HA imm),
 584               ADD dst dst (RIImm (LO imm))
 585           ]
 586     in return (Any (cmmTypeSize rep) code)
 587
 588 getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
 589
 590     -- extend?Rep: wrap integer expression of type rep
 591     -- in a conversion to II32
 592 extendSExpr W32 x = x
 593 extendSExpr rep x = CmmMachOp (MO_SS_Conv rep W32) [x]
 594 extendUExpr W32 x = x
 595 extendUExpr rep x = CmmMachOp (MO_UU_Conv rep W32) [x]
 596
 597 -- -----------------------------------------------------------------------------
 598 --  The 'Amode' type: Memory addressing modes passed up the tree.
 599
 600 data Amode
 601         = Amode AddrMode InstrBlock
 602
 603 {-
 604 Now, given a tree (the argument to an CmmLoad) that references memory,
 605 produce a suitable addressing mode.
 606
 607 A Rule of the Game (tm) for Amodes: use of the addr bit must
 608 immediately follow use of the code part, since the code part puts
 609 values in registers which the addr then refers to.  So you can't put
 610 anything in between, lest it overwrite some of those registers.  If
 611 you need to do some other computation between the code part and use of
 612 the addr bit, first store the effective address from the amode in a
 613 temporary, then do the other computation, and then use the temporary:
 614
 615     code
 616     LEA amode, tmp
 617     ... other computation ...
 618     ... (tmp) ...
 619 -}
 620
 621 getAmode :: CmmExpr -> NatM Amode
 622 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
 623
 624 getAmode (CmmMachOp (MO_Sub W32) [x, CmmLit (CmmInt i _)])
 625   | Just off <- makeImmediate W32 True (-i)
 626   = do
 627         (reg, code) <- getSomeReg x
 628         return (Amode (AddrRegImm reg off) code)
 629
 630
 631 getAmode (CmmMachOp (MO_Add W32) [x, CmmLit (CmmInt i _)])
 632   | Just off <- makeImmediate W32 True i
 633   = do
 634         (reg, code) <- getSomeReg x
 635         return (Amode (AddrRegImm reg off) code)
 636
 637    -- optimize addition with 32-bit immediate
 638    -- (needed for PIC)
 639 getAmode (CmmMachOp (MO_Add W32) [x, CmmLit lit])
 640   = do
 641         tmp <- getNewRegNat II32
 642         (src, srcCode) <- getSomeReg x
 643         let imm = litToImm lit
 644             code = srcCode `snocOL` ADDIS tmp src (HA imm)
 645         return (Amode (AddrRegImm tmp (LO imm)) code)
 646
 647 getAmode (CmmLit lit)
 648   = do
 649         tmp <- getNewRegNat II32
 650         let imm = litToImm lit
 651             code = unitOL (LIS tmp (HA imm))
 652         return (Amode (AddrRegImm tmp (LO imm)) code)
 653
 654 getAmode (CmmMachOp (MO_Add W32) [x, y])
 655   = do
 656         (regX, codeX) <- getSomeReg x
 657         (regY, codeY) <- getSomeReg y
 658         return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
 659
 660 getAmode other
 661   = do
 662         (reg, code) <- getSomeReg other
 663         let
 664             off  = ImmInt 0
 665         return (Amode (AddrRegImm reg off) code)
 666
 667
 668
 669 --  The 'CondCode' type:  Condition codes passed up the tree.
 670 data CondCode
 671         = CondCode Bool Cond InstrBlock
 672
 673 -- Set up a condition code for a conditional branch.
 674
 675 getCondCode :: CmmExpr -> NatM CondCode
 676
 677 -- almost the same as everywhere else - but we need to
 678 -- extend small integers to 32 bit first
 679
 680 getCondCode (CmmMachOp mop [x, y])
 681   = case mop of
 682       MO_F_Eq W32 -> condFltCode EQQ x y
 683       MO_F_Ne W32 -> condFltCode NE  x y
 684       MO_F_Gt W32 -> condFltCode GTT x y
 685       MO_F_Ge W32 -> condFltCode GE  x y
 686       MO_F_Lt W32 -> condFltCode LTT x y
 687       MO_F_Le W32 -> condFltCode LE  x y
 688
 689       MO_F_Eq W64 -> condFltCode EQQ x y
 690       MO_F_Ne W64 -> condFltCode NE  x y
 691       MO_F_Gt W64 -> condFltCode GTT x y
 692       MO_F_Ge W64 -> condFltCode GE  x y
 693       MO_F_Lt W64 -> condFltCode LTT x y
 694       MO_F_Le W64 -> condFltCode LE  x y
 695
 696       MO_Eq rep -> condIntCode EQQ  (extendUExpr rep x) (extendUExpr rep y)
 697       MO_Ne rep -> condIntCode NE   (extendUExpr rep x) (extendUExpr rep y)
 698
 699       MO_S_Gt rep -> condIntCode GTT  (extendSExpr rep x) (extendSExpr rep y)
 700       MO_S_Ge rep -> condIntCode GE   (extendSExpr rep x) (extendSExpr rep y)
 701       MO_S_Lt rep -> condIntCode LTT  (extendSExpr rep x) (extendSExpr rep y)
 702       MO_S_Le rep -> condIntCode LE   (extendSExpr rep x) (extendSExpr rep y)
 703
 704       MO_U_Gt rep -> condIntCode GU   (extendUExpr rep x) (extendUExpr rep y)
 705       MO_U_Ge rep -> condIntCode GEU  (extendUExpr rep x) (extendUExpr rep y)
 706       MO_U_Lt rep -> condIntCode LU   (extendUExpr rep x) (extendUExpr rep y)
 707       MO_U_Le rep -> condIntCode LEU  (extendUExpr rep x) (extendUExpr rep y)
 708
 709       other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
 710
 711 getCondCode other =  panic "getCondCode(2)(powerpc)"
 712
 713
 714
 715 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
 716 -- passed back up the tree.
 717
 718 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
 719
 720 --  ###FIXME: I16 and I8!
 721 condIntCode cond x (CmmLit (CmmInt y rep))
 722   | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
 723   = do
 724         (src1, code) <- getSomeReg x
 725         let
 726             code' = code `snocOL`
 727                 (if condUnsigned cond then CMPL else CMP) II32 src1 (RIImm src2)
 728         return (CondCode False cond code')
 729
 730 condIntCode cond x y = do
 731     (src1, code1) <- getSomeReg x
 732     (src2, code2) <- getSomeReg y
 733     let
 734         code' = code1 `appOL` code2 `snocOL`
 735                   (if condUnsigned cond then CMPL else CMP) II32 src1 (RIReg src2)
 736     return (CondCode False cond code')
 737
 738 condFltCode cond x y = do
 739     (src1, code1) <- getSomeReg x
 740     (src2, code2) <- getSomeReg y
 741     let
 742         code'  = code1 `appOL` code2 `snocOL` FCMP src1 src2
 743         code'' = case cond of -- twiddle CR to handle unordered case
 744                     GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
 745                     LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
 746                     _ -> code'
 747                  where
 748                     ltbit = 0 ; eqbit = 2 ; gtbit = 1
 749     return (CondCode True cond code'')
 750
 751
 752
 753 -- -----------------------------------------------------------------------------
 754 -- Generating assignments
 755
 756 -- Assignments are really at the heart of the whole code generation
 757 -- business.  Almost all top-level nodes of any real importance are
 758 -- assignments, which correspond to loads, stores, or register
 759 -- transfers.  If we're really lucky, some of the register transfers
 760 -- will go away, because we can use the destination register to
 761 -- complete the code generation for the right hand side.  This only
 762 -- fails when the right hand side is forced into a fixed register
 763 -- (e.g. the result of a call).
 764
 765 assignMem_IntCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
 766 assignReg_IntCode :: Size -> CmmReg  -> CmmExpr -> NatM InstrBlock
 767
 768 assignMem_FltCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
 769 assignReg_FltCode :: Size -> CmmReg  -> CmmExpr -> NatM InstrBlock
 770
 771 assignMem_IntCode pk addr src = do
 772     (srcReg, code) <- getSomeReg src
 773     Amode dstAddr addr_code <- getAmode addr
 774     return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
 775
 776 -- dst is a reg, but src could be anything
 777 assignReg_IntCode _ reg src
 778     = do
 779         r <- getRegister src
 780         return $ case r of
 781             Any _ code         -> code dst
 782             Fixed _ freg fcode -> fcode `snocOL` MR dst freg
 783     where
 784         dst = getRegisterReg reg
 785
 786
 787
 788 -- Easy, isn't it?
 789 assignMem_FltCode = assignMem_IntCode
 790 assignReg_FltCode = assignReg_IntCode
 791
 792
 793
 794 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
 795
 796 genJump (CmmLit (CmmLabel lbl))
 797   = return (unitOL $ JMP lbl)
 798
 799 genJump tree
 800   = do
 801         (target,code) <- getSomeReg tree
 802         return (code `snocOL` MTCTR target `snocOL` BCTR [] Nothing)
 803
 804
 805 -- -----------------------------------------------------------------------------
 806 --  Unconditional branches
 807 genBranch :: BlockId -> NatM InstrBlock
 808 genBranch = return . toOL . mkJumpInstr
 809
 810
 811 -- -----------------------------------------------------------------------------
 812 --  Conditional jumps
 813
 814 {-
 815 Conditional jumps are always to local labels, so we can use branch
 816 instructions.  We peek at the arguments to decide what kind of
 817 comparison to do.
 818
 819 SPARC: First, we have to ensure that the condition codes are set
 820 according to the supplied comparison operation.  We generate slightly
 821 different code for floating point comparisons, because a floating
 822 point operation cannot directly precede a @BF@.  We assume the worst
 823 and fill that slot with a @NOP@.
 824
 825 SPARC: Do not fill the delay slots here; you will confuse the register
 826 allocator.
 827 -}
 828
 829
 830 genCondJump
 831     :: BlockId      -- the branch target
 832     -> CmmExpr      -- the condition on which to branch
 833     -> NatM InstrBlock
 834
 835 genCondJump id bool = do
 836   CondCode _ cond code <- getCondCode bool
 837   return (code `snocOL` BCC cond id)
 838
 839
 840
 841 -- -----------------------------------------------------------------------------
 842 --  Generating C calls
 843
 844 -- Now the biggest nightmare---calls.  Most of the nastiness is buried in
 845 -- @get_arg@, which moves the arguments to the correct registers/stack
 846 -- locations.  Apart from that, the code is easy.
 847 --
 848 -- (If applicable) Do not fill the delay slots here; you will confuse the
 849 -- register allocator.
 850
 851 genCCall
 852     :: CmmCallTarget            -- function to call
 853     -> HintedCmmFormals         -- where to put the result
 854     -> HintedCmmActuals         -- arguments (of mixed type)
 855     -> NatM InstrBlock
 856
 857
 858 #if darwin_TARGET_OS || linux_TARGET_OS
 859 {-
 860     The PowerPC calling convention for Darwin/Mac OS X
 861     is described in Apple's document
 862     "Inside Mac OS X - Mach-O Runtime Architecture".
 863
 864     PowerPC Linux uses the System V Release 4 Calling Convention
 865     for PowerPC. It is described in the
 866     "System V Application Binary Interface PowerPC Processor Supplement".
 867
 868     Both conventions are similar:
 869     Parameters may be passed in general-purpose registers starting at r3, in
 870     floating point registers starting at f1, or on the stack.
 871
 872     But there are substantial differences:
 873     * The number of registers used for parameter passing and the exact set of
 874       nonvolatile registers differs (see MachRegs.lhs).
 875     * On Darwin, stack space is always reserved for parameters, even if they are
 876       passed in registers. The called routine may choose to save parameters from
 877       registers to the corresponding space on the stack.
 878     * On Darwin, a corresponding amount of GPRs is skipped when a floating point
 879       parameter is passed in an FPR.
 880     * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
 881       starting with an odd-numbered GPR. It may skip a GPR to achieve this.
 882       Darwin just treats an I64 like two separate II32s (high word first).
 883     * I64 and FF64 arguments are 8-byte aligned on the stack for SysV, but only
 884       4-byte aligned like everything else on Darwin.
 885     * The SysV spec claims that FF32 is represented as FF64 on the stack. GCC on
 886       PowerPC Linux does not agree, so neither do we.
 887
 888     According to both conventions, The parameter area should be part of the
 889     caller's stack frame, allocated in the caller's prologue code (large enough
 890     to hold the parameter lists for all called routines). The NCG already
 891     uses the stack for register spilling, leaving 64 bytes free at the top.
 892     If we need a larger parameter area than that, we just allocate a new stack
 893     frame just before ccalling.
 894 -}
 895
 896
 897 genCCall (CmmPrim MO_WriteBarrier) _ _
 898  = return $ unitOL LWSYNC
 899
 900 genCCall target dest_regs argsAndHints
 901   = ASSERT (not $ any (`elem` [II8,II16]) $ map cmmTypeSize argReps)
 902         -- we rely on argument promotion in the codeGen
 903     do
 904         (finalStack,passArgumentsCode,usedRegs) <- passArguments
 905                                                         (zip args argReps)
 906                                                         allArgRegs allFPArgRegs
 907                                                         initialStackOffset
 908                                                         (toOL []) []
 909
 910         (labelOrExpr, reduceToFF32) <- case target of
 911             CmmCallee (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
 912             CmmCallee expr conv -> return  (Right expr, False)
 913             CmmPrim mop -> outOfLineMachOp mop
 914
 915         let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
 916             codeAfter = move_sp_up finalStack `appOL` moveResult reduceToFF32
 917
 918         case labelOrExpr of
 919             Left lbl -> do
 920                 return (         codeBefore
 921                         `snocOL` BL lbl usedRegs
 922                         `appOL`  codeAfter)
 923             Right dyn -> do
 924                 (dynReg, dynCode) <- getSomeReg dyn
 925                 return (         dynCode
 926                         `snocOL` MTCTR dynReg
 927                         `appOL`  codeBefore
 928                         `snocOL` BCTRL usedRegs
 929                         `appOL`  codeAfter)
 930     where
 931 #if darwin_TARGET_OS
 932         initialStackOffset = 24
 933             -- size of linkage area + size of arguments, in bytes
 934         stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
 935                                  map (widthInBytes . typeWidth) argReps
 936 #elif linux_TARGET_OS
 937         initialStackOffset = 8
 938         stackDelta finalStack = roundTo 16 finalStack
 939 #endif
 940         -- need to remove alignment information
 941         argsAndHints' | (CmmPrim mop) <- target,
 942                         (mop == MO_Memcpy ||
 943                          mop == MO_Memset ||
 944                          mop == MO_Memmove)
 945                       = init argsAndHints
 946
 947                       | otherwise
 948                       = argsAndHints
 949
 950         args = map hintlessCmm argsAndHints'
 951         argReps = map cmmExprType args
 952
 953         roundTo a x | x `mod` a == 0 = x
 954                     | otherwise = x + a - (x `mod` a)
 955
 956         move_sp_down finalStack
 957                | delta > 64 =
 958                         toOL [STU II32 sp (AddrRegImm sp (ImmInt (-delta))),
 959                               DELTA (-delta)]
 960                | otherwise = nilOL
 961                where delta = stackDelta finalStack
 962         move_sp_up finalStack
 963                | delta > 64 =
 964                         toOL [ADD sp sp (RIImm (ImmInt delta)),
 965                               DELTA 0]
 966                | otherwise = nilOL
 967                where delta = stackDelta finalStack
 968
 969
 970         passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
 971         passArguments ((arg,arg_ty):args) gprs fprs stackOffset
 972                accumCode accumUsed | isWord64 arg_ty =
 973             do
 974                 ChildCode64 code vr_lo <- iselExpr64 arg
 975                 let vr_hi = getHiVRegFromLo vr_lo
 976
 977 #if darwin_TARGET_OS
 978                 passArguments args
 979                               (drop 2 gprs)
 980                               fprs
 981                               (stackOffset+8)
 982                               (accumCode `appOL` code
 983                                     `snocOL` storeWord vr_hi gprs stackOffset
 984                                     `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
 985                               ((take 2 gprs) ++ accumUsed)
 986             where
 987                 storeWord vr (gpr:_) offset = MR gpr vr
 988                 storeWord vr [] offset = ST II32 vr (AddrRegImm sp (ImmInt offset))
 989
 990 #elif linux_TARGET_OS
 991                 let stackOffset' = roundTo 8 stackOffset
 992                     stackCode = accumCode `appOL` code
 993                         `snocOL` ST II32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
 994                         `snocOL` ST II32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
 995                     regCode hireg loreg =
 996                         accumCode `appOL` code
 997                             `snocOL` MR hireg vr_hi
 998                             `snocOL` MR loreg vr_lo
 999
1000                 case gprs of
1001                     hireg : loreg : regs | even (length gprs) ->
1002                         passArguments args regs fprs stackOffset
1003                                       (regCode hireg loreg) (hireg : loreg : accumUsed)
1004                     _skipped : hireg : loreg : regs ->
1005                         passArguments args regs fprs stackOffset
1006                                       (regCode hireg loreg) (hireg : loreg : accumUsed)
1007                     _ -> -- only one or no regs left
1008                         passArguments args [] fprs (stackOffset'+8)
1009                                       stackCode accumUsed
1010 #endif
1011
1012         passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
1013             | reg : _ <- regs = do
1014                 register <- getRegister arg
1015                 let code = case register of
1016                             Fixed _ freg fcode -> fcode `snocOL` MR reg freg
1017                             Any _ acode -> acode reg
1018                 passArguments args
1019                               (drop nGprs gprs)
1020                               (drop nFprs fprs)
1021 #if darwin_TARGET_OS
1022         -- The Darwin ABI requires that we reserve stack slots for register parameters
1023                               (stackOffset + stackBytes)
1024 #elif linux_TARGET_OS
1025         -- ... the SysV ABI doesn't.
1026                               stackOffset
1027 #endif
1028                               (accumCode `appOL` code)
1029                               (reg : accumUsed)
1030             | otherwise = do
1031                 (vr, code) <- getSomeReg arg
1032                 passArguments args
1033                               (drop nGprs gprs)
1034                               (drop nFprs fprs)
1035                               (stackOffset' + stackBytes)
1036                               (accumCode `appOL` code `snocOL` ST (cmmTypeSize rep) vr stackSlot)
1037                               accumUsed
1038             where
1039 #if darwin_TARGET_OS
1040         -- stackOffset is at least 4-byte aligned
1041         -- The Darwin ABI is happy with that.
1042                 stackOffset' = stackOffset
1043 #else
1044         -- ... the SysV ABI requires 8-byte alignment for doubles.
1045                 stackOffset' | isFloatType rep && typeWidth rep == W64 =
1046                                  roundTo 8 stackOffset
1047                              | otherwise  =           stackOffset
1048 #endif
1049                 stackSlot = AddrRegImm sp (ImmInt stackOffset')
1050                 (nGprs, nFprs, stackBytes, regs) = case cmmTypeSize rep of
1051                     II32 -> (1, 0, 4, gprs)
1052 #if darwin_TARGET_OS
1053         -- The Darwin ABI requires that we skip a corresponding number of GPRs when
1054         -- we use the FPRs.
1055                     FF32 -> (1, 1, 4, fprs)
1056                     FF64 -> (2, 1, 8, fprs)
1057 #elif linux_TARGET_OS
1058         -- ... the SysV ABI doesn't.
1059                     FF32 -> (0, 1, 4, fprs)
1060                     FF64 -> (0, 1, 8, fprs)
1061 #endif
1062
1063         moveResult reduceToFF32 =
1064             case dest_regs of
1065                 [] -> nilOL
1066                 [CmmHinted dest _hint]
1067                     | reduceToFF32 && isFloat32 rep   -> unitOL (FRSP r_dest f1)
1068                     | isFloat32 rep || isFloat64 rep -> unitOL (MR r_dest f1)
1069                     | isWord64 rep -> toOL [MR (getHiVRegFromLo r_dest) r3,
1070                                           MR r_dest r4]
1071                     | otherwise -> unitOL (MR r_dest r3)
1072                     where rep = cmmRegType (CmmLocal dest)
1073                           r_dest = getRegisterReg (CmmLocal dest)
1074
1075         outOfLineMachOp mop =
1076             do
1077                 dflags <- getDynFlagsNat
1078                 mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference $
1079                               mkForeignLabel functionName Nothing ForeignLabelInThisPackage IsFunction
1080                 let mopLabelOrExpr = case mopExpr of
1081                         CmmLit (CmmLabel lbl) -> Left lbl
1082                         _ -> Right mopExpr
1083                 return (mopLabelOrExpr, reduce)
1084             where
1085                 (functionName, reduce) = case mop of
1086                     MO_F32_Exp   -> (fsLit "exp", True)
1087                     MO_F32_Log   -> (fsLit "log", True)
1088                     MO_F32_Sqrt  -> (fsLit "sqrt", True)
1089
1090                     MO_F32_Sin   -> (fsLit "sin", True)
1091                     MO_F32_Cos   -> (fsLit "cos", True)
1092                     MO_F32_Tan   -> (fsLit "tan", True)
1093
1094                     MO_F32_Asin  -> (fsLit "asin", True)
1095                     MO_F32_Acos  -> (fsLit "acos", True)
1096                     MO_F32_Atan  -> (fsLit "atan", True)
1097
1098                     MO_F32_Sinh  -> (fsLit "sinh", True)
1099                     MO_F32_Cosh  -> (fsLit "cosh", True)
1100                     MO_F32_Tanh  -> (fsLit "tanh", True)
1101                     MO_F32_Pwr   -> (fsLit "pow", True)
1102
1103                     MO_F64_Exp   -> (fsLit "exp", False)
1104                     MO_F64_Log   -> (fsLit "log", False)
1105                     MO_F64_Sqrt  -> (fsLit "sqrt", False)
1106
1107                     MO_F64_Sin   -> (fsLit "sin", False)
1108                     MO_F64_Cos   -> (fsLit "cos", False)
1109                     MO_F64_Tan   -> (fsLit "tan", False)
1110
1111                     MO_F64_Asin  -> (fsLit "asin", False)
1112                     MO_F64_Acos  -> (fsLit "acos", False)
1113                     MO_F64_Atan  -> (fsLit "atan", False)
1114
1115                     MO_F64_Sinh  -> (fsLit "sinh", False)
1116                     MO_F64_Cosh  -> (fsLit "cosh", False)
1117                     MO_F64_Tanh  -> (fsLit "tanh", False)
1118                     MO_F64_Pwr   -> (fsLit "pow", False)
1119
1120                     MO_Memcpy    -> (fsLit "memcpy", False)
1121                     MO_Memset    -> (fsLit "memset", False)
1122                     MO_Memmove   -> (fsLit "memmove", False)
1123
1124                     other -> pprPanic "genCCall(ppc): unknown callish op"
1125                                     (pprCallishMachOp other)
1126
1127 #else /* darwin_TARGET_OS || linux_TARGET_OS */
1128 genCCall = panic "PPC.CodeGen.genCCall: not defined for this os"
1129 #endif
1130
1131
1132 -- -----------------------------------------------------------------------------
1133 -- Generating a table-branch
1134
1135 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
1136 genSwitch expr ids
1137   | opt_PIC
1138   = do
1139         (reg,e_code) <- getSomeReg expr
1140         tmp <- getNewRegNat II32
1141         lbl <- getNewLabelNat
1142         dflags <- getDynFlagsNat
1143         dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
1144         (tableReg,t_code) <- getSomeReg $ dynRef
1145         let code = e_code `appOL` t_code `appOL` toOL [
1146                             SLW tmp reg (RIImm (ImmInt 2)),
1147                             LD II32 tmp (AddrRegReg tableReg tmp),
1148                             ADD tmp tmp (RIReg tableReg),
1149                             MTCTR tmp,
1150                             BCTR ids (Just lbl)
1151                     ]
1152         return code
1153   | otherwise
1154   = do
1155         (reg,e_code) <- getSomeReg expr
1156         tmp <- getNewRegNat II32
1157         lbl <- getNewLabelNat
1158         let code = e_code `appOL` toOL [
1159                             SLW tmp reg (RIImm (ImmInt 2)),
1160                             ADDIS tmp tmp (HA (ImmCLbl lbl)),
1161                             LD II32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
1162                             MTCTR tmp,
1163                             BCTR ids (Just lbl)
1164                     ]
1165         return code
1166
1167 generateJumpTableForInstr :: Instr -> Maybe (NatCmmTop Instr)
1168 generateJumpTableForInstr (BCTR ids (Just lbl)) =
1169     let jumpTable
1170             | opt_PIC   = map jumpTableEntryRel ids
1171             | otherwise = map jumpTableEntry ids
1172                 where jumpTableEntryRel Nothing
1173                         = CmmStaticLit (CmmInt 0 wordWidth)
1174                       jumpTableEntryRel (Just blockid)
1175                         = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
1176                             where blockLabel = mkAsmTempLabel (getUnique blockid)
1177     in Just (CmmData ReadOnlyData (CmmDataLabel lbl : jumpTable))
1178 generateJumpTableForInstr _ = Nothing
1179
1180 -- -----------------------------------------------------------------------------
1181 -- 'condIntReg' and 'condFltReg': condition codes into registers
1182
1183 -- Turn those condition codes into integers now (when they appear on
1184 -- the right hand side of an assignment).
1185 --
1186 -- (If applicable) Do not fill the delay slots here; you will confuse the
1187 -- register allocator.
1188
1189 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
1190
1191 condReg :: NatM CondCode -> NatM Register
1192 condReg getCond = do
1193     CondCode _ cond cond_code <- getCond
1194     let
1195 {-        code dst = cond_code `appOL` toOL [
1196                 BCC cond lbl1,
1197                 LI dst (ImmInt 0),
1198                 BCC ALWAYS lbl2,
1199                 NEWBLOCK lbl1,
1200                 LI dst (ImmInt 1),
1201                 BCC ALWAYS lbl2,
1202                 NEWBLOCK lbl2
1203             ]-}
1204         code dst = cond_code
1205             `appOL` negate_code
1206             `appOL` toOL [
1207                 MFCR dst,
1208                 RLWINM dst dst (bit + 1) 31 31
1209             ]
1210
1211         negate_code | do_negate = unitOL (CRNOR bit bit bit)
1212                     | otherwise = nilOL
1213
1214         (bit, do_negate) = case cond of
1215             LTT -> (0, False)
1216             LE  -> (1, True)
1217             EQQ -> (2, False)
1218             GE  -> (0, True)
1219             GTT -> (1, False)
1220
1221             NE  -> (2, True)
1222
1223             LU  -> (0, False)
1224             LEU -> (1, True)
1225             GEU -> (0, True)
1226             GU  -> (1, False)
1227             _   -> panic "PPC.CodeGen.codeReg: no match"
1228
1229     return (Any II32 code)
1230
1231 condIntReg cond x y = condReg (condIntCode cond x y)
1232 condFltReg cond x y = condReg (condFltCode cond x y)
1233
1234
1235
1236 -- -----------------------------------------------------------------------------
1237 -- 'trivial*Code': deal with trivial instructions
1238
1239 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
1240 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
1241 -- Only look for constants on the right hand side, because that's
1242 -- where the generic optimizer will have put them.
1243
1244 -- Similarly, for unary instructions, we don't have to worry about
1245 -- matching an StInt as the argument, because genericOpt will already
1246 -- have handled the constant-folding.
1247
1248
1249
1250 {-
1251 Wolfgang's PowerPC version of The Rules:
1252
1253 A slightly modified version of The Rules to take advantage of the fact
1254 that PowerPC instructions work on all registers and don't implicitly
1255 clobber any fixed registers.
1256
1257 * The only expression for which getRegister returns Fixed is (CmmReg reg).
1258
1259 * If getRegister returns Any, then the code it generates may modify only:
1260         (a) fresh temporaries
1261         (b) the destination register
1262   It may *not* modify global registers, unless the global
1263   register happens to be the destination register.
1264   It may not clobber any other registers. In fact, only ccalls clobber any
1265   fixed registers.
1266   Also, it may not modify the counter register (used by genCCall).
1267
1268   Corollary: If a getRegister for a subexpression returns Fixed, you need
1269   not move it to a fresh temporary before evaluating the next subexpression.
1270   The Fixed register won't be modified.
1271   Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
1272
1273 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
1274   the value of the destination register.
1275 -}
1276
1277 trivialCode
1278         :: Width
1279         -> Bool
1280         -> (Reg -> Reg -> RI -> Instr)
1281         -> CmmExpr
1282         -> CmmExpr
1283         -> NatM Register
1284
1285 trivialCode rep signed instr x (CmmLit (CmmInt y _))
1286     | Just imm <- makeImmediate rep signed y
1287     = do
1288         (src1, code1) <- getSomeReg x
1289         let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
1290         return (Any (intSize rep) code)
1291
1292 trivialCode rep _ instr x y = do
1293     (src1, code1) <- getSomeReg x
1294     (src2, code2) <- getSomeReg y
1295     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
1296     return (Any (intSize rep) code)
1297
1298 trivialCodeNoImm' :: Size -> (Reg -> Reg -> Reg -> Instr)
1299                  -> CmmExpr -> CmmExpr -> NatM Register
1300 trivialCodeNoImm' size instr x y = do
1301     (src1, code1) <- getSomeReg x
1302     (src2, code2) <- getSomeReg y
1303     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
1304     return (Any size code)
1305
1306 trivialCodeNoImm :: Size -> (Size -> Reg -> Reg -> Reg -> Instr)
1307                  -> CmmExpr -> CmmExpr -> NatM Register
1308 trivialCodeNoImm size instr x y = trivialCodeNoImm' size (instr size) x y
1309
1310
1311 trivialUCode
1312         :: Size
1313         -> (Reg -> Reg -> Instr)
1314         -> CmmExpr
1315         -> NatM Register
1316 trivialUCode rep instr x = do
1317     (src, code) <- getSomeReg x
1318     let code' dst = code `snocOL` instr dst src
1319     return (Any rep code')
1320
1321 -- There is no "remainder" instruction on the PPC, so we have to do
1322 -- it the hard way.
1323 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
1324
1325 remainderCode :: Width -> (Reg -> Reg -> Reg -> Instr)
1326     -> CmmExpr -> CmmExpr -> NatM Register
1327 remainderCode rep div x y = do
1328     (src1, code1) <- getSomeReg x
1329     (src2, code2) <- getSomeReg y
1330     let code dst = code1 `appOL` code2 `appOL` toOL [
1331                 div dst src1 src2,
1332                 MULLW dst dst (RIReg src2),
1333                 SUBF dst dst src1
1334             ]
1335     return (Any (intSize rep) code)
1336
1337
1338 coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
1339 coerceInt2FP fromRep toRep x = do
1340     (src, code) <- getSomeReg x
1341     lbl <- getNewLabelNat
1342     itmp <- getNewRegNat II32
1343     ftmp <- getNewRegNat FF64
1344     dflags <- getDynFlagsNat
1345     dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
1346     Amode addr addr_code <- getAmode dynRef
1347     let
1348         code' dst = code `appOL` maybe_exts `appOL` toOL [
1349                 LDATA ReadOnlyData
1350                                 [CmmDataLabel lbl,
1351                                  CmmStaticLit (CmmInt 0x43300000 W32),
1352                                  CmmStaticLit (CmmInt 0x80000000 W32)],
1353                 XORIS itmp src (ImmInt 0x8000),
1354                 ST II32 itmp (spRel 3),
1355                 LIS itmp (ImmInt 0x4330),
1356                 ST II32 itmp (spRel 2),
1357                 LD FF64 ftmp (spRel 2)
1358             ] `appOL` addr_code `appOL` toOL [
1359                 LD FF64 dst addr,
1360                 FSUB FF64 dst ftmp dst
1361             ] `appOL` maybe_frsp dst
1362
1363         maybe_exts = case fromRep of
1364                         W8 ->  unitOL $ EXTS II8 src src
1365                         W16 -> unitOL $ EXTS II16 src src
1366                         W32 -> nilOL
1367                         _       -> panic "PPC.CodeGen.coerceInt2FP: no match"
1368
1369         maybe_frsp dst
1370                 = case toRep of
1371                         W32 -> unitOL $ FRSP dst dst
1372                         W64 -> nilOL
1373                         _       -> panic "PPC.CodeGen.coerceInt2FP: no match"
1374
1375     return (Any (floatSize toRep) code')
1376
1377 coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
1378 coerceFP2Int _ toRep x = do
1379     -- the reps don't really matter: F*->FF64 and II32->I* are no-ops
1380     (src, code) <- getSomeReg x
1381     tmp <- getNewRegNat FF64
1382     let
1383         code' dst = code `appOL` toOL [
1384                 -- convert to int in FP reg
1385             FCTIWZ tmp src,
1386                 -- store value (64bit) from FP to stack
1387             ST FF64 tmp (spRel 2),
1388                 -- read low word of value (high word is undefined)
1389             LD II32 dst (spRel 3)]
1390     return (Any (intSize toRep) code')