compiler/nativeGen/PPC/CodeGen.hs

   1 {-# OPTIONS -w #-}
   2
   3 -----------------------------------------------------------------------------
   4 --
   5 -- Generating machine code (instruction selection)
   6 --
   7 -- (c) The University of Glasgow 1996-2004
   8 --
   9 -----------------------------------------------------------------------------
  10
  11 -- This is a big module, but, if you pay attention to
  12 -- (a) the sectioning, (b) the type signatures, and
  13 -- (c) the #if blah_TARGET_ARCH} things, the
  14 -- structure should not be too overwhelming.
  15
  16 module PPC.CodeGen (
  17         cmmTopCodeGen,
  18         InstrBlock
  19 )
  20
  21 where
  22
  23 #include "HsVersions.h"
  24 #include "nativeGen/NCG.h"
  25 #include "../includes/MachDeps.h"
  26
  27 -- NCG stuff:
  28 import PPC.Instr
  29 import PPC.Cond
  30 import PPC.Regs
  31 import PPC.RegInfo
  32 import NCGMonad
  33 import Instruction
  34 import PIC
  35 import Size
  36 import RegClass
  37 import Reg
  38 import TargetReg
  39 import Platform
  40
  41 -- Our intermediate code:
  42 import BlockId
  43 import PprCmm           ( pprExpr )
  44 import OldCmm
  45 import CLabel
  46
  47 -- The rest:
  48 import StaticFlags      ( opt_PIC )
  49 import OrdList
  50 import qualified Outputable as O
  51 import Outputable
  52 import Unique
  53 import DynFlags
  54
  55 import Control.Monad    ( mapAndUnzipM )
  56 import Data.Bits
  57 import Data.Int
  58 import Data.Word
  59
  60 #if darwin_TARGET_OS || linux_TARGET_OS
  61 import BasicTypes
  62 import FastString
  63 #endif
  64
  65 -- -----------------------------------------------------------------------------
  66 -- Top-level of the instruction selector
  67
  68 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
  69 -- They are really trees of insns to facilitate fast appending, where a
  70 -- left-to-right traversal (pre-order?) yields the insns in the correct
  71 -- order.
  72
  73 cmmTopCodeGen
  74         :: DynFlags
  75         -> RawCmmTop
  76         -> NatM [NatCmmTop Instr]
  77
  78 cmmTopCodeGen dflags (CmmProc info lab (ListGraph blocks)) = do
  79   (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
  80   picBaseMb <- getPicBaseMaybeNat
  81   let proc = CmmProc info lab (ListGraph $ concat nat_blocks)
  82       tops = proc : concat statics
  83       os   = platformOS $ targetPlatform dflags
  84   case picBaseMb of
  85       Just picBase -> initializePicBase_ppc ArchPPC os picBase tops
  86       Nothing -> return tops
  87
  88 cmmTopCodeGen dflags (CmmData sec dat) = do
  89   return [CmmData sec dat]  -- no translation, we just use CmmStatic
  90
  91 basicBlockCodeGen
  92         :: CmmBasicBlock
  93         -> NatM ( [NatBasicBlock Instr]
  94                 , [NatCmmTop Instr])
  95
  96 basicBlockCodeGen (BasicBlock id stmts) = do
  97   instrs <- stmtsToInstrs stmts
  98   -- code generation may introduce new basic block boundaries, which
  99   -- are indicated by the NEWBLOCK instruction.  We must split up the
 100   -- instruction stream into basic blocks again.  Also, we extract
 101   -- LDATAs here too.
 102   let
 103         (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
 104
 105         mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
 106           = ([], BasicBlock id instrs : blocks, statics)
 107         mkBlocks (LDATA sec dat) (instrs,blocks,statics)
 108           = (instrs, blocks, CmmData sec dat:statics)
 109         mkBlocks instr (instrs,blocks,statics)
 110           = (instr:instrs, blocks, statics)
 111   -- in
 112   return (BasicBlock id top : other_blocks, statics)
 113
 114 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
 115 stmtsToInstrs stmts
 116    = do instrss <- mapM stmtToInstrs stmts
 117         return (concatOL instrss)
 118
 119 stmtToInstrs :: CmmStmt -> NatM InstrBlock
 120 stmtToInstrs stmt = case stmt of
 121     CmmNop         -> return nilOL
 122     CmmComment s   -> return (unitOL (COMMENT s))
 123
 124     CmmAssign reg src
 125       | isFloatType ty -> assignReg_FltCode size reg src
 126 #if WORD_SIZE_IN_BITS==32
 127       | isWord64 ty    -> assignReg_I64Code      reg src
 128 #endif
 129       | otherwise        -> assignReg_IntCode size reg src
 130         where ty = cmmRegType reg
 131               size = cmmTypeSize ty
 132
 133     CmmStore addr src
 134       | isFloatType ty -> assignMem_FltCode size addr src
 135 #if WORD_SIZE_IN_BITS==32
 136       | isWord64 ty      -> assignMem_I64Code      addr src
 137 #endif
 138       | otherwise        -> assignMem_IntCode size addr src
 139         where ty = cmmExprType src
 140               size = cmmTypeSize ty
 141
 142     CmmCall target result_regs args _ _
 143        -> genCCall target result_regs args
 144
 145     CmmBranch id          -> genBranch id
 146     CmmCondBranch arg id  -> genCondJump id arg
 147     CmmSwitch arg ids     -> genSwitch arg ids
 148     CmmJump arg params    -> genJump arg
 149     CmmReturn params      ->
 150       panic "stmtToInstrs: return statement should have been cps'd away"
 151
 152
 153 --------------------------------------------------------------------------------
 154 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
 155 --      They are really trees of insns to facilitate fast appending, where a
 156 --      left-to-right traversal yields the insns in the correct order.
 157 --
 158 type InstrBlock
 159         = OrdList Instr
 160
 161
 162 -- | Register's passed up the tree.  If the stix code forces the register
 163 --      to live in a pre-decided machine register, it comes out as @Fixed@;
 164 --      otherwise, it comes out as @Any@, and the parent can decide which
 165 --      register to put it in.
 166 --
 167 data Register
 168         = Fixed Size Reg InstrBlock
 169         | Any   Size (Reg -> InstrBlock)
 170
 171
 172 swizzleRegisterRep :: Register -> Size -> Register
 173 swizzleRegisterRep (Fixed _ reg code) size = Fixed size reg code
 174 swizzleRegisterRep (Any _ codefn)     size = Any   size codefn
 175
 176
 177 -- | Grab the Reg for a CmmReg
 178 getRegisterReg :: CmmReg -> Reg
 179
 180 getRegisterReg (CmmLocal (LocalReg u pk))
 181   = RegVirtual $ mkVirtualReg u (cmmTypeSize pk)
 182
 183 getRegisterReg (CmmGlobal mid)
 184   = case globalRegMaybe mid of
 185         Just reg -> reg
 186         Nothing  -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
 187         -- By this stage, the only MagicIds remaining should be the
 188         -- ones which map to a real machine register on this
 189         -- platform.  Hence ...
 190
 191
 192 {-
 193 Now, given a tree (the argument to an CmmLoad) that references memory,
 194 produce a suitable addressing mode.
 195
 196 A Rule of the Game (tm) for Amodes: use of the addr bit must
 197 immediately follow use of the code part, since the code part puts
 198 values in registers which the addr then refers to.  So you can't put
 199 anything in between, lest it overwrite some of those registers.  If
 200 you need to do some other computation between the code part and use of
 201 the addr bit, first store the effective address from the amode in a
 202 temporary, then do the other computation, and then use the temporary:
 203
 204     code
 205     LEA amode, tmp
 206     ... other computation ...
 207     ... (tmp) ...
 208 -}
 209
 210
 211 -- | Check whether an integer will fit in 32 bits.
 212 --      A CmmInt is intended to be truncated to the appropriate
 213 --      number of bits, so here we truncate it to Int64.  This is
 214 --      important because e.g. -1 as a CmmInt might be either
 215 --      -1 or 18446744073709551615.
 216 --
 217 is32BitInteger :: Integer -> Bool
 218 is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000
 219   where i64 = fromIntegral i :: Int64
 220
 221
 222 -- | Convert a BlockId to some CmmStatic data
 223 jumpTableEntry :: Maybe BlockId -> CmmStatic
 224 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordWidth)
 225 jumpTableEntry (Just blockid) = CmmStaticLit (CmmLabel blockLabel)
 226     where blockLabel = mkAsmTempLabel (getUnique blockid)
 227
 228
 229
 230 -- -----------------------------------------------------------------------------
 231 -- General things for putting together code sequences
 232
 233 -- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
 234 -- CmmExprs into CmmRegOff?
 235 mangleIndexTree :: CmmExpr -> CmmExpr
 236 mangleIndexTree (CmmRegOff reg off)
 237   = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
 238   where width = typeWidth (cmmRegType reg)
 239
 240 mangleIndexTree _
 241         = panic "PPC.CodeGen.mangleIndexTree: no match"
 242
 243 -- -----------------------------------------------------------------------------
 244 --  Code gen for 64-bit arithmetic on 32-bit platforms
 245
 246 {-
 247 Simple support for generating 64-bit code (ie, 64 bit values and 64
 248 bit assignments) on 32-bit platforms.  Unlike the main code generator
 249 we merely shoot for generating working code as simply as possible, and
 250 pay little attention to code quality.  Specifically, there is no
 251 attempt to deal cleverly with the fixed-vs-floating register
 252 distinction; all values are generated into (pairs of) floating
 253 registers, even if this would mean some redundant reg-reg moves as a
 254 result.  Only one of the VRegUniques is returned, since it will be
 255 of the VRegUniqueLo form, and the upper-half VReg can be determined
 256 by applying getHiVRegFromLo to it.
 257 -}
 258
 259 data ChildCode64        -- a.k.a "Register64"
 260       = ChildCode64
 261            InstrBlock   -- code
 262            Reg          -- the lower 32-bit temporary which contains the
 263                         -- result; use getHiVRegFromLo to find the other
 264                         -- VRegUnique.  Rules of this simplified insn
 265                         -- selection game are therefore that the returned
 266                         -- Reg may be modified
 267
 268
 269 -- | The dual to getAnyReg: compute an expression into a register, but
 270 --      we don't mind which one it is.
 271 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
 272 getSomeReg expr = do
 273   r <- getRegister expr
 274   case r of
 275     Any rep code -> do
 276         tmp <- getNewRegNat rep
 277         return (tmp, code tmp)
 278     Fixed _ reg code ->
 279         return (reg, code)
 280
 281 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
 282 getI64Amodes addrTree = do
 283     Amode hi_addr addr_code <- getAmode addrTree
 284     case addrOffset hi_addr 4 of
 285         Just lo_addr -> return (hi_addr, lo_addr, addr_code)
 286         Nothing      -> do (hi_ptr, code) <- getSomeReg addrTree
 287                            return (AddrRegImm hi_ptr (ImmInt 0),
 288                                    AddrRegImm hi_ptr (ImmInt 4),
 289                                    code)
 290
 291
 292 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
 293 assignMem_I64Code addrTree valueTree = do
 294         (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
 295         ChildCode64 vcode rlo <- iselExpr64 valueTree
 296         let
 297                 rhi = getHiVRegFromLo rlo
 298
 299                 -- Big-endian store
 300                 mov_hi = ST II32 rhi hi_addr
 301                 mov_lo = ST II32 rlo lo_addr
 302         -- in
 303         return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 304
 305
 306 assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
 307 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
 308    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
 309    let
 310          r_dst_lo = RegVirtual $ mkVirtualReg u_dst II32
 311          r_dst_hi = getHiVRegFromLo r_dst_lo
 312          r_src_hi = getHiVRegFromLo r_src_lo
 313          mov_lo = MR r_dst_lo r_src_lo
 314          mov_hi = MR r_dst_hi r_src_hi
 315    -- in
 316    return (
 317         vcode `snocOL` mov_lo `snocOL` mov_hi
 318      )
 319
 320 assignReg_I64Code lvalue valueTree
 321    = panic "assignReg_I64Code(powerpc): invalid lvalue"
 322
 323
 324 iselExpr64        :: CmmExpr -> NatM ChildCode64
 325 iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
 326     (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
 327     (rlo, rhi) <- getNewRegPairNat II32
 328     let mov_hi = LD II32 rhi hi_addr
 329         mov_lo = LD II32 rlo lo_addr
 330     return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
 331                          rlo
 332
 333 iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
 334    = return (ChildCode64 nilOL (RegVirtual $ mkVirtualReg vu II32))
 335
 336 iselExpr64 (CmmLit (CmmInt i _)) = do
 337   (rlo,rhi) <- getNewRegPairNat II32
 338   let
 339         half0 = fromIntegral (fromIntegral i :: Word16)
 340         half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
 341         half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
 342         half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
 343
 344         code = toOL [
 345                 LIS rlo (ImmInt half1),
 346                 OR rlo rlo (RIImm $ ImmInt half0),
 347                 LIS rhi (ImmInt half3),
 348                 OR rlo rlo (RIImm $ ImmInt half2)
 349                 ]
 350   -- in
 351   return (ChildCode64 code rlo)
 352
 353 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
 354    ChildCode64 code1 r1lo <- iselExpr64 e1
 355    ChildCode64 code2 r2lo <- iselExpr64 e2
 356    (rlo,rhi) <- getNewRegPairNat II32
 357    let
 358         r1hi = getHiVRegFromLo r1lo
 359         r2hi = getHiVRegFromLo r2lo
 360         code =  code1 `appOL`
 361                 code2 `appOL`
 362                 toOL [ ADDC rlo r1lo r2lo,
 363                        ADDE rhi r1hi r2hi ]
 364    -- in
 365    return (ChildCode64 code rlo)
 366
 367 iselExpr64 (CmmMachOp (MO_UU_Conv W32 W64) [expr]) = do
 368     (expr_reg,expr_code) <- getSomeReg expr
 369     (rlo, rhi) <- getNewRegPairNat II32
 370     let mov_hi = LI rhi (ImmInt 0)
 371         mov_lo = MR rlo expr_reg
 372     return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
 373                          rlo
 374 iselExpr64 expr
 375    = pprPanic "iselExpr64(powerpc)" (ppr expr)
 376
 377
 378
 379 getRegister :: CmmExpr -> NatM Register
 380
 381 getRegister (CmmReg (CmmGlobal PicBaseReg))
 382   = do
 383       reg <- getPicBaseNat archWordSize
 384       return (Fixed archWordSize reg nilOL)
 385
 386 getRegister (CmmReg reg)
 387   = return (Fixed (cmmTypeSize (cmmRegType reg))
 388                   (getRegisterReg reg) nilOL)
 389
 390 getRegister tree@(CmmRegOff _ _)
 391   = getRegister (mangleIndexTree tree)
 392
 393
 394 #if WORD_SIZE_IN_BITS==32
 395     -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
 396     -- TO_W_(x), TO_W_(x >> 32)
 397
 398 getRegister (CmmMachOp (MO_UU_Conv W64 W32)
 399              [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
 400   ChildCode64 code rlo <- iselExpr64 x
 401   return $ Fixed II32 (getHiVRegFromLo rlo) code
 402
 403 getRegister (CmmMachOp (MO_SS_Conv W64 W32)
 404              [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
 405   ChildCode64 code rlo <- iselExpr64 x
 406   return $ Fixed II32 (getHiVRegFromLo rlo) code
 407
 408 getRegister (CmmMachOp (MO_UU_Conv W64 W32) [x]) = do
 409   ChildCode64 code rlo <- iselExpr64 x
 410   return $ Fixed II32 rlo code
 411
 412 getRegister (CmmMachOp (MO_SS_Conv W64 W32) [x]) = do
 413   ChildCode64 code rlo <- iselExpr64 x
 414   return $ Fixed II32 rlo code
 415
 416 #endif
 417
 418
 419 getRegister (CmmLoad mem pk)
 420   | not (isWord64 pk)
 421   = do
 422         Amode addr addr_code <- getAmode mem
 423         let code dst = ASSERT((targetClassOfReg dst == RcDouble) == isFloatType pk)
 424                        addr_code `snocOL` LD size dst addr
 425         return (Any size code)
 426           where size = cmmTypeSize pk
 427
 428 -- catch simple cases of zero- or sign-extended load
 429 getRegister (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad mem _]) = do
 430     Amode addr addr_code <- getAmode mem
 431     return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr))
 432
 433 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
 434
 435 getRegister (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad mem _]) = do
 436     Amode addr addr_code <- getAmode mem
 437     return (Any II32 (\dst -> addr_code `snocOL` LD II16 dst addr))
 438
 439 getRegister (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad mem _]) = do
 440     Amode addr addr_code <- getAmode mem
 441     return (Any II32 (\dst -> addr_code `snocOL` LA II16 dst addr))
 442
 443 getRegister (CmmMachOp mop [x]) -- unary MachOps
 444   = case mop of
 445       MO_Not rep   -> triv_ucode_int rep NOT
 446
 447       MO_F_Neg w   -> triv_ucode_float w FNEG
 448       MO_S_Neg w   -> triv_ucode_int   w NEG
 449
 450       MO_FF_Conv W64 W32 -> trivialUCode  FF32 FRSP x
 451       MO_FF_Conv W32 W64 -> conversionNop FF64 x
 452
 453       MO_FS_Conv from to -> coerceFP2Int from to x
 454       MO_SF_Conv from to -> coerceInt2FP from to x
 455
 456       MO_SS_Conv from to
 457         | from == to    -> conversionNop (intSize to) x
 458
 459         -- narrowing is a nop: we treat the high bits as undefined
 460       MO_SS_Conv W32 to -> conversionNop (intSize to) x
 461       MO_SS_Conv W16 W8 -> conversionNop II8 x
 462       MO_SS_Conv W8  to -> triv_ucode_int to (EXTS II8)
 463       MO_SS_Conv W16 to -> triv_ucode_int to (EXTS II16)
 464
 465       MO_UU_Conv from to
 466         | from == to -> conversionNop (intSize to) x
 467         -- narrowing is a nop: we treat the high bits as undefined
 468       MO_UU_Conv W32 to -> conversionNop (intSize to) x
 469       MO_UU_Conv W16 W8 -> conversionNop II8 x
 470       MO_UU_Conv W8 to  -> trivialCode to False AND x (CmmLit (CmmInt 255 W32))
 471       MO_UU_Conv W16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 W32))
 472       _ -> panic "PPC.CodeGen.getRegister: no match"
 473
 474     where
 475         triv_ucode_int   width instr = trivialUCode (intSize   width) instr x
 476         triv_ucode_float width instr = trivialUCode (floatSize width) instr x
 477
 478         conversionNop new_size expr
 479             = do e_code <- getRegister expr
 480                  return (swizzleRegisterRep e_code new_size)
 481
 482 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
 483   = case mop of
 484       MO_F_Eq w -> condFltReg EQQ x y
 485       MO_F_Ne w -> condFltReg NE  x y
 486       MO_F_Gt w -> condFltReg GTT x y
 487       MO_F_Ge w -> condFltReg GE  x y
 488       MO_F_Lt w -> condFltReg LTT x y
 489       MO_F_Le w -> condFltReg LE  x y
 490
 491       MO_Eq rep -> condIntReg EQQ  (extendUExpr rep x) (extendUExpr rep y)
 492       MO_Ne rep -> condIntReg NE   (extendUExpr rep x) (extendUExpr rep y)
 493
 494       MO_S_Gt rep -> condIntReg GTT  (extendSExpr rep x) (extendSExpr rep y)
 495       MO_S_Ge rep -> condIntReg GE   (extendSExpr rep x) (extendSExpr rep y)
 496       MO_S_Lt rep -> condIntReg LTT  (extendSExpr rep x) (extendSExpr rep y)
 497       MO_S_Le rep -> condIntReg LE   (extendSExpr rep x) (extendSExpr rep y)
 498
 499       MO_U_Gt rep -> condIntReg GU   (extendUExpr rep x) (extendUExpr rep y)
 500       MO_U_Ge rep -> condIntReg GEU  (extendUExpr rep x) (extendUExpr rep y)
 501       MO_U_Lt rep -> condIntReg LU   (extendUExpr rep x) (extendUExpr rep y)
 502       MO_U_Le rep -> condIntReg LEU  (extendUExpr rep x) (extendUExpr rep y)
 503
 504       MO_F_Add w  -> triv_float w FADD
 505       MO_F_Sub w  -> triv_float w FSUB
 506       MO_F_Mul w  -> triv_float w FMUL
 507       MO_F_Quot w -> triv_float w FDIV
 508
 509          -- optimize addition with 32-bit immediate
 510          -- (needed for PIC)
 511       MO_Add W32 ->
 512         case y of
 513           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate W32 True (-imm)
 514             -> trivialCode W32 True ADD x (CmmLit $ CmmInt imm immrep)
 515           CmmLit lit
 516             -> do
 517                 (src, srcCode) <- getSomeReg x
 518                 let imm = litToImm lit
 519                     code dst = srcCode `appOL` toOL [
 520                                     ADDIS dst src (HA imm),
 521                                     ADD dst dst (RIImm (LO imm))
 522                                 ]
 523                 return (Any II32 code)
 524           _ -> trivialCode W32 True ADD x y
 525
 526       MO_Add rep -> trivialCode rep True ADD x y
 527       MO_Sub rep ->
 528         case y of    -- subfi ('substract from' with immediate) doesn't exist
 529           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
 530             -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
 531           _ -> trivialCodeNoImm' (intSize rep) SUBF y x
 532
 533       MO_Mul rep -> trivialCode rep True MULLW x y
 534
 535       MO_S_MulMayOflo W32 -> trivialCodeNoImm' II32 MULLW_MayOflo x y
 536
 537       MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= II32): not implemented"
 538       MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
 539
 540       MO_S_Quot rep -> trivialCodeNoImm' (intSize rep) DIVW (extendSExpr rep x) (extendSExpr rep y)
 541       MO_U_Quot rep -> trivialCodeNoImm' (intSize rep) DIVWU (extendUExpr rep x) (extendUExpr rep y)
 542
 543       MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
 544       MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
 545
 546       MO_And rep   -> trivialCode rep False AND x y
 547       MO_Or rep    -> trivialCode rep False OR x y
 548       MO_Xor rep   -> trivialCode rep False XOR x y
 549
 550       MO_Shl rep   -> trivialCode rep False SLW x y
 551       MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
 552       MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
 553       _         -> panic "PPC.CodeGen.getRegister: no match"
 554
 555   where
 556     triv_float :: Width -> (Size -> Reg -> Reg -> Reg -> Instr) -> NatM Register
 557     triv_float width instr = trivialCodeNoImm (floatSize width) instr x y
 558
 559 getRegister (CmmLit (CmmInt i rep))
 560   | Just imm <- makeImmediate rep True i
 561   = let
 562         code dst = unitOL (LI dst imm)
 563     in
 564         return (Any (intSize rep) code)
 565
 566 getRegister (CmmLit (CmmFloat f frep)) = do
 567     lbl <- getNewLabelNat
 568     dflags <- getDynFlagsNat
 569     dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
 570     Amode addr addr_code <- getAmode dynRef
 571     let size = floatSize frep
 572         code dst =
 573             LDATA ReadOnlyData  [CmmDataLabel lbl,
 574                                  CmmStaticLit (CmmFloat f frep)]
 575             `consOL` (addr_code `snocOL` LD size dst addr)
 576     return (Any size code)
 577
 578 getRegister (CmmLit lit)
 579   = let rep = cmmLitType lit
 580         imm = litToImm lit
 581         code dst = toOL [
 582               LIS dst (HA imm),
 583               ADD dst dst (RIImm (LO imm))
 584           ]
 585     in return (Any (cmmTypeSize rep) code)
 586
 587 getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
 588
 589     -- extend?Rep: wrap integer expression of type rep
 590     -- in a conversion to II32
 591 extendSExpr W32 x = x
 592 extendSExpr rep x = CmmMachOp (MO_SS_Conv rep W32) [x]
 593 extendUExpr W32 x = x
 594 extendUExpr rep x = CmmMachOp (MO_UU_Conv rep W32) [x]
 595
 596 -- -----------------------------------------------------------------------------
 597 --  The 'Amode' type: Memory addressing modes passed up the tree.
 598
 599 data Amode
 600         = Amode AddrMode InstrBlock
 601
 602 {-
 603 Now, given a tree (the argument to an CmmLoad) that references memory,
 604 produce a suitable addressing mode.
 605
 606 A Rule of the Game (tm) for Amodes: use of the addr bit must
 607 immediately follow use of the code part, since the code part puts
 608 values in registers which the addr then refers to.  So you can't put
 609 anything in between, lest it overwrite some of those registers.  If
 610 you need to do some other computation between the code part and use of
 611 the addr bit, first store the effective address from the amode in a
 612 temporary, then do the other computation, and then use the temporary:
 613
 614     code
 615     LEA amode, tmp
 616     ... other computation ...
 617     ... (tmp) ...
 618 -}
 619
 620 getAmode :: CmmExpr -> NatM Amode
 621 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
 622
 623 getAmode (CmmMachOp (MO_Sub W32) [x, CmmLit (CmmInt i _)])
 624   | Just off <- makeImmediate W32 True (-i)
 625   = do
 626         (reg, code) <- getSomeReg x
 627         return (Amode (AddrRegImm reg off) code)
 628
 629
 630 getAmode (CmmMachOp (MO_Add W32) [x, CmmLit (CmmInt i _)])
 631   | Just off <- makeImmediate W32 True i
 632   = do
 633         (reg, code) <- getSomeReg x
 634         return (Amode (AddrRegImm reg off) code)
 635
 636    -- optimize addition with 32-bit immediate
 637    -- (needed for PIC)
 638 getAmode (CmmMachOp (MO_Add W32) [x, CmmLit lit])
 639   = do
 640         tmp <- getNewRegNat II32
 641         (src, srcCode) <- getSomeReg x
 642         let imm = litToImm lit
 643             code = srcCode `snocOL` ADDIS tmp src (HA imm)
 644         return (Amode (AddrRegImm tmp (LO imm)) code)
 645
 646 getAmode (CmmLit lit)
 647   = do
 648         tmp <- getNewRegNat II32
 649         let imm = litToImm lit
 650             code = unitOL (LIS tmp (HA imm))
 651         return (Amode (AddrRegImm tmp (LO imm)) code)
 652
 653 getAmode (CmmMachOp (MO_Add W32) [x, y])
 654   = do
 655         (regX, codeX) <- getSomeReg x
 656         (regY, codeY) <- getSomeReg y
 657         return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
 658
 659 getAmode other
 660   = do
 661         (reg, code) <- getSomeReg other
 662         let
 663             off  = ImmInt 0
 664         return (Amode (AddrRegImm reg off) code)
 665
 666
 667
 668 --  The 'CondCode' type:  Condition codes passed up the tree.
 669 data CondCode
 670         = CondCode Bool Cond InstrBlock
 671
 672 -- Set up a condition code for a conditional branch.
 673
 674 getCondCode :: CmmExpr -> NatM CondCode
 675
 676 -- almost the same as everywhere else - but we need to
 677 -- extend small integers to 32 bit first
 678
 679 getCondCode (CmmMachOp mop [x, y])
 680   = case mop of
 681       MO_F_Eq W32 -> condFltCode EQQ x y
 682       MO_F_Ne W32 -> condFltCode NE  x y
 683       MO_F_Gt W32 -> condFltCode GTT x y
 684       MO_F_Ge W32 -> condFltCode GE  x y
 685       MO_F_Lt W32 -> condFltCode LTT x y
 686       MO_F_Le W32 -> condFltCode LE  x y
 687
 688       MO_F_Eq W64 -> condFltCode EQQ x y
 689       MO_F_Ne W64 -> condFltCode NE  x y
 690       MO_F_Gt W64 -> condFltCode GTT x y
 691       MO_F_Ge W64 -> condFltCode GE  x y
 692       MO_F_Lt W64 -> condFltCode LTT x y
 693       MO_F_Le W64 -> condFltCode LE  x y
 694
 695       MO_Eq rep -> condIntCode EQQ  (extendUExpr rep x) (extendUExpr rep y)
 696       MO_Ne rep -> condIntCode NE   (extendUExpr rep x) (extendUExpr rep y)
 697
 698       MO_S_Gt rep -> condIntCode GTT  (extendSExpr rep x) (extendSExpr rep y)
 699       MO_S_Ge rep -> condIntCode GE   (extendSExpr rep x) (extendSExpr rep y)
 700       MO_S_Lt rep -> condIntCode LTT  (extendSExpr rep x) (extendSExpr rep y)
 701       MO_S_Le rep -> condIntCode LE   (extendSExpr rep x) (extendSExpr rep y)
 702
 703       MO_U_Gt rep -> condIntCode GU   (extendUExpr rep x) (extendUExpr rep y)
 704       MO_U_Ge rep -> condIntCode GEU  (extendUExpr rep x) (extendUExpr rep y)
 705       MO_U_Lt rep -> condIntCode LU   (extendUExpr rep x) (extendUExpr rep y)
 706       MO_U_Le rep -> condIntCode LEU  (extendUExpr rep x) (extendUExpr rep y)
 707
 708       other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
 709
 710 getCondCode other =  panic "getCondCode(2)(powerpc)"
 711
 712
 713
 714 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
 715 -- passed back up the tree.
 716
 717 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
 718
 719 --  ###FIXME: I16 and I8!
 720 condIntCode cond x (CmmLit (CmmInt y rep))
 721   | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
 722   = do
 723         (src1, code) <- getSomeReg x
 724         let
 725             code' = code `snocOL`
 726                 (if condUnsigned cond then CMPL else CMP) II32 src1 (RIImm src2)
 727         return (CondCode False cond code')
 728
 729 condIntCode cond x y = do
 730     (src1, code1) <- getSomeReg x
 731     (src2, code2) <- getSomeReg y
 732     let
 733         code' = code1 `appOL` code2 `snocOL`
 734                   (if condUnsigned cond then CMPL else CMP) II32 src1 (RIReg src2)
 735     return (CondCode False cond code')
 736
 737 condFltCode cond x y = do
 738     (src1, code1) <- getSomeReg x
 739     (src2, code2) <- getSomeReg y
 740     let
 741         code'  = code1 `appOL` code2 `snocOL` FCMP src1 src2
 742         code'' = case cond of -- twiddle CR to handle unordered case
 743                     GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
 744                     LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
 745                     _ -> code'
 746                  where
 747                     ltbit = 0 ; eqbit = 2 ; gtbit = 1
 748     return (CondCode True cond code'')
 749
 750
 751
 752 -- -----------------------------------------------------------------------------
 753 -- Generating assignments
 754
 755 -- Assignments are really at the heart of the whole code generation
 756 -- business.  Almost all top-level nodes of any real importance are
 757 -- assignments, which correspond to loads, stores, or register
 758 -- transfers.  If we're really lucky, some of the register transfers
 759 -- will go away, because we can use the destination register to
 760 -- complete the code generation for the right hand side.  This only
 761 -- fails when the right hand side is forced into a fixed register
 762 -- (e.g. the result of a call).
 763
 764 assignMem_IntCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
 765 assignReg_IntCode :: Size -> CmmReg  -> CmmExpr -> NatM InstrBlock
 766
 767 assignMem_FltCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
 768 assignReg_FltCode :: Size -> CmmReg  -> CmmExpr -> NatM InstrBlock
 769
 770 assignMem_IntCode pk addr src = do
 771     (srcReg, code) <- getSomeReg src
 772     Amode dstAddr addr_code <- getAmode addr
 773     return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
 774
 775 -- dst is a reg, but src could be anything
 776 assignReg_IntCode _ reg src
 777     = do
 778         r <- getRegister src
 779         return $ case r of
 780             Any _ code         -> code dst
 781             Fixed _ freg fcode -> fcode `snocOL` MR dst freg
 782     where
 783         dst = getRegisterReg reg
 784
 785
 786
 787 -- Easy, isn't it?
 788 assignMem_FltCode = assignMem_IntCode
 789 assignReg_FltCode = assignReg_IntCode
 790
 791
 792
 793 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
 794
 795 genJump (CmmLit (CmmLabel lbl))
 796   = return (unitOL $ JMP lbl)
 797
 798 genJump tree
 799   = do
 800         (target,code) <- getSomeReg tree
 801         return (code `snocOL` MTCTR target `snocOL` BCTR [])
 802
 803
 804 -- -----------------------------------------------------------------------------
 805 --  Unconditional branches
 806 genBranch :: BlockId -> NatM InstrBlock
 807 genBranch = return . toOL . mkJumpInstr
 808
 809
 810 -- -----------------------------------------------------------------------------
 811 --  Conditional jumps
 812
 813 {-
 814 Conditional jumps are always to local labels, so we can use branch
 815 instructions.  We peek at the arguments to decide what kind of
 816 comparison to do.
 817
 818 SPARC: First, we have to ensure that the condition codes are set
 819 according to the supplied comparison operation.  We generate slightly
 820 different code for floating point comparisons, because a floating
 821 point operation cannot directly precede a @BF@.  We assume the worst
 822 and fill that slot with a @NOP@.
 823
 824 SPARC: Do not fill the delay slots here; you will confuse the register
 825 allocator.
 826 -}
 827
 828
 829 genCondJump
 830     :: BlockId      -- the branch target
 831     -> CmmExpr      -- the condition on which to branch
 832     -> NatM InstrBlock
 833
 834 genCondJump id bool = do
 835   CondCode _ cond code <- getCondCode bool
 836   return (code `snocOL` BCC cond id)
 837
 838
 839
 840 -- -----------------------------------------------------------------------------
 841 --  Generating C calls
 842
 843 -- Now the biggest nightmare---calls.  Most of the nastiness is buried in
 844 -- @get_arg@, which moves the arguments to the correct registers/stack
 845 -- locations.  Apart from that, the code is easy.
 846 --
 847 -- (If applicable) Do not fill the delay slots here; you will confuse the
 848 -- register allocator.
 849
 850 genCCall
 851     :: CmmCallTarget            -- function to call
 852     -> HintedCmmFormals         -- where to put the result
 853     -> HintedCmmActuals         -- arguments (of mixed type)
 854     -> NatM InstrBlock
 855
 856
 857 #if darwin_TARGET_OS || linux_TARGET_OS
 858 {-
 859     The PowerPC calling convention for Darwin/Mac OS X
 860     is described in Apple's document
 861     "Inside Mac OS X - Mach-O Runtime Architecture".
 862
 863     PowerPC Linux uses the System V Release 4 Calling Convention
 864     for PowerPC. It is described in the
 865     "System V Application Binary Interface PowerPC Processor Supplement".
 866
 867     Both conventions are similar:
 868     Parameters may be passed in general-purpose registers starting at r3, in
 869     floating point registers starting at f1, or on the stack.
 870
 871     But there are substantial differences:
 872     * The number of registers used for parameter passing and the exact set of
 873       nonvolatile registers differs (see MachRegs.lhs).
 874     * On Darwin, stack space is always reserved for parameters, even if they are
 875       passed in registers. The called routine may choose to save parameters from
 876       registers to the corresponding space on the stack.
 877     * On Darwin, a corresponding amount of GPRs is skipped when a floating point
 878       parameter is passed in an FPR.
 879     * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
 880       starting with an odd-numbered GPR. It may skip a GPR to achieve this.
 881       Darwin just treats an I64 like two separate II32s (high word first).
 882     * I64 and FF64 arguments are 8-byte aligned on the stack for SysV, but only
 883       4-byte aligned like everything else on Darwin.
 884     * The SysV spec claims that FF32 is represented as FF64 on the stack. GCC on
 885       PowerPC Linux does not agree, so neither do we.
 886
 887     According to both conventions, The parameter area should be part of the
 888     caller's stack frame, allocated in the caller's prologue code (large enough
 889     to hold the parameter lists for all called routines). The NCG already
 890     uses the stack for register spilling, leaving 64 bytes free at the top.
 891     If we need a larger parameter area than that, we just allocate a new stack
 892     frame just before ccalling.
 893 -}
 894
 895
 896 genCCall (CmmPrim MO_WriteBarrier) _ _
 897  = return $ unitOL LWSYNC
 898
 899 genCCall target dest_regs argsAndHints
 900   = ASSERT (not $ any (`elem` [II8,II16]) $ map cmmTypeSize argReps)
 901         -- we rely on argument promotion in the codeGen
 902     do
 903         (finalStack,passArgumentsCode,usedRegs) <- passArguments
 904                                                         (zip args argReps)
 905                                                         allArgRegs allFPArgRegs
 906                                                         initialStackOffset
 907                                                         (toOL []) []
 908
 909         (labelOrExpr, reduceToFF32) <- case target of
 910             CmmCallee (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
 911             CmmCallee expr conv -> return  (Right expr, False)
 912             CmmPrim mop -> outOfLineFloatOp mop
 913
 914         let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
 915             codeAfter = move_sp_up finalStack `appOL` moveResult reduceToFF32
 916
 917         case labelOrExpr of
 918             Left lbl -> do
 919                 return (         codeBefore
 920                         `snocOL` BL lbl usedRegs
 921                         `appOL`  codeAfter)
 922             Right dyn -> do
 923                 (dynReg, dynCode) <- getSomeReg dyn
 924                 return (         dynCode
 925                         `snocOL` MTCTR dynReg
 926                         `appOL`  codeBefore
 927                         `snocOL` BCTRL usedRegs
 928                         `appOL`  codeAfter)
 929     where
 930 #if darwin_TARGET_OS
 931         initialStackOffset = 24
 932             -- size of linkage area + size of arguments, in bytes
 933         stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
 934                                  map (widthInBytes . typeWidth) argReps
 935 #elif linux_TARGET_OS
 936         initialStackOffset = 8
 937         stackDelta finalStack = roundTo 16 finalStack
 938 #endif
 939         args = map hintlessCmm argsAndHints
 940         argReps = map cmmExprType args
 941
 942         roundTo a x | x `mod` a == 0 = x
 943                     | otherwise = x + a - (x `mod` a)
 944
 945         move_sp_down finalStack
 946                | delta > 64 =
 947                         toOL [STU II32 sp (AddrRegImm sp (ImmInt (-delta))),
 948                               DELTA (-delta)]
 949                | otherwise = nilOL
 950                where delta = stackDelta finalStack
 951         move_sp_up finalStack
 952                | delta > 64 =
 953                         toOL [ADD sp sp (RIImm (ImmInt delta)),
 954                               DELTA 0]
 955                | otherwise = nilOL
 956                where delta = stackDelta finalStack
 957
 958
 959         passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
 960         passArguments ((arg,arg_ty):args) gprs fprs stackOffset
 961                accumCode accumUsed | isWord64 arg_ty =
 962             do
 963                 ChildCode64 code vr_lo <- iselExpr64 arg
 964                 let vr_hi = getHiVRegFromLo vr_lo
 965
 966 #if darwin_TARGET_OS
 967                 passArguments args
 968                               (drop 2 gprs)
 969                               fprs
 970                               (stackOffset+8)
 971                               (accumCode `appOL` code
 972                                     `snocOL` storeWord vr_hi gprs stackOffset
 973                                     `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
 974                               ((take 2 gprs) ++ accumUsed)
 975             where
 976                 storeWord vr (gpr:_) offset = MR gpr vr
 977                 storeWord vr [] offset = ST II32 vr (AddrRegImm sp (ImmInt offset))
 978
 979 #elif linux_TARGET_OS
 980                 let stackOffset' = roundTo 8 stackOffset
 981                     stackCode = accumCode `appOL` code
 982                         `snocOL` ST II32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
 983                         `snocOL` ST II32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
 984                     regCode hireg loreg =
 985                         accumCode `appOL` code
 986                             `snocOL` MR hireg vr_hi
 987                             `snocOL` MR loreg vr_lo
 988
 989                 case gprs of
 990                     hireg : loreg : regs | even (length gprs) ->
 991                         passArguments args regs fprs stackOffset
 992                                       (regCode hireg loreg) (hireg : loreg : accumUsed)
 993                     _skipped : hireg : loreg : regs ->
 994                         passArguments args regs fprs stackOffset
 995                                       (regCode hireg loreg) (hireg : loreg : accumUsed)
 996                     _ -> -- only one or no regs left
 997                         passArguments args [] fprs (stackOffset'+8)
 998                                       stackCode accumUsed
 999 #endif
1000
1001         passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
1002             | reg : _ <- regs = do
1003                 register <- getRegister arg
1004                 let code = case register of
1005                             Fixed _ freg fcode -> fcode `snocOL` MR reg freg
1006                             Any _ acode -> acode reg
1007                 passArguments args
1008                               (drop nGprs gprs)
1009                               (drop nFprs fprs)
1010 #if darwin_TARGET_OS
1011         -- The Darwin ABI requires that we reserve stack slots for register parameters
1012                               (stackOffset + stackBytes)
1013 #elif linux_TARGET_OS
1014         -- ... the SysV ABI doesn't.
1015                               stackOffset
1016 #endif
1017                               (accumCode `appOL` code)
1018                               (reg : accumUsed)
1019             | otherwise = do
1020                 (vr, code) <- getSomeReg arg
1021                 passArguments args
1022                               (drop nGprs gprs)
1023                               (drop nFprs fprs)
1024                               (stackOffset' + stackBytes)
1025                               (accumCode `appOL` code `snocOL` ST (cmmTypeSize rep) vr stackSlot)
1026                               accumUsed
1027             where
1028 #if darwin_TARGET_OS
1029         -- stackOffset is at least 4-byte aligned
1030         -- The Darwin ABI is happy with that.
1031                 stackOffset' = stackOffset
1032 #else
1033         -- ... the SysV ABI requires 8-byte alignment for doubles.
1034                 stackOffset' | isFloatType rep && typeWidth rep == W64 =
1035                                  roundTo 8 stackOffset
1036                              | otherwise  =           stackOffset
1037 #endif
1038                 stackSlot = AddrRegImm sp (ImmInt stackOffset')
1039                 (nGprs, nFprs, stackBytes, regs) = case cmmTypeSize rep of
1040                     II32 -> (1, 0, 4, gprs)
1041 #if darwin_TARGET_OS
1042         -- The Darwin ABI requires that we skip a corresponding number of GPRs when
1043         -- we use the FPRs.
1044                     FF32 -> (1, 1, 4, fprs)
1045                     FF64 -> (2, 1, 8, fprs)
1046 #elif linux_TARGET_OS
1047         -- ... the SysV ABI doesn't.
1048                     FF32 -> (0, 1, 4, fprs)
1049                     FF64 -> (0, 1, 8, fprs)
1050 #endif
1051
1052         moveResult reduceToFF32 =
1053             case dest_regs of
1054                 [] -> nilOL
1055                 [CmmHinted dest _hint]
1056                     | reduceToFF32 && isFloat32 rep   -> unitOL (FRSP r_dest f1)
1057                     | isFloat32 rep || isFloat64 rep -> unitOL (MR r_dest f1)
1058                     | isWord64 rep -> toOL [MR (getHiVRegFromLo r_dest) r3,
1059                                           MR r_dest r4]
1060                     | otherwise -> unitOL (MR r_dest r3)
1061                     where rep = cmmRegType (CmmLocal dest)
1062                           r_dest = getRegisterReg (CmmLocal dest)
1063
1064         outOfLineFloatOp mop =
1065             do
1066                 dflags <- getDynFlagsNat
1067                 mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference $
1068                               mkForeignLabel functionName Nothing ForeignLabelInThisPackage IsFunction
1069                 let mopLabelOrExpr = case mopExpr of
1070                         CmmLit (CmmLabel lbl) -> Left lbl
1071                         _ -> Right mopExpr
1072                 return (mopLabelOrExpr, reduce)
1073             where
1074                 (functionName, reduce) = case mop of
1075                     MO_F32_Exp   -> (fsLit "exp", True)
1076                     MO_F32_Log   -> (fsLit "log", True)
1077                     MO_F32_Sqrt  -> (fsLit "sqrt", True)
1078
1079                     MO_F32_Sin   -> (fsLit "sin", True)
1080                     MO_F32_Cos   -> (fsLit "cos", True)
1081                     MO_F32_Tan   -> (fsLit "tan", True)
1082
1083                     MO_F32_Asin  -> (fsLit "asin", True)
1084                     MO_F32_Acos  -> (fsLit "acos", True)
1085                     MO_F32_Atan  -> (fsLit "atan", True)
1086
1087                     MO_F32_Sinh  -> (fsLit "sinh", True)
1088                     MO_F32_Cosh  -> (fsLit "cosh", True)
1089                     MO_F32_Tanh  -> (fsLit "tanh", True)
1090                     MO_F32_Pwr   -> (fsLit "pow", True)
1091
1092                     MO_F64_Exp   -> (fsLit "exp", False)
1093                     MO_F64_Log   -> (fsLit "log", False)
1094                     MO_F64_Sqrt  -> (fsLit "sqrt", False)
1095
1096                     MO_F64_Sin   -> (fsLit "sin", False)
1097                     MO_F64_Cos   -> (fsLit "cos", False)
1098                     MO_F64_Tan   -> (fsLit "tan", False)
1099
1100                     MO_F64_Asin  -> (fsLit "asin", False)
1101                     MO_F64_Acos  -> (fsLit "acos", False)
1102                     MO_F64_Atan  -> (fsLit "atan", False)
1103
1104                     MO_F64_Sinh  -> (fsLit "sinh", False)
1105                     MO_F64_Cosh  -> (fsLit "cosh", False)
1106                     MO_F64_Tanh  -> (fsLit "tanh", False)
1107                     MO_F64_Pwr   -> (fsLit "pow", False)
1108                     other -> pprPanic "genCCall(ppc): unknown callish op"
1109                                     (pprCallishMachOp other)
1110
1111 #else /* darwin_TARGET_OS || linux_TARGET_OS */
1112 genCCall = panic "PPC.CodeGen.genCCall: not defined for this os"
1113 #endif
1114
1115
1116 -- -----------------------------------------------------------------------------
1117 -- Generating a table-branch
1118
1119 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
1120 genSwitch expr ids
1121   | opt_PIC
1122   = do
1123         (reg,e_code) <- getSomeReg expr
1124         tmp <- getNewRegNat II32
1125         lbl <- getNewLabelNat
1126         dflags <- getDynFlagsNat
1127         dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
1128         (tableReg,t_code) <- getSomeReg $ dynRef
1129         let
1130             jumpTable = map jumpTableEntryRel ids
1131
1132             jumpTableEntryRel Nothing
1133                 = CmmStaticLit (CmmInt 0 wordWidth)
1134             jumpTableEntryRel (Just blockid)
1135                 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
1136                 where blockLabel = mkAsmTempLabel (getUnique blockid)
1137
1138             code = e_code `appOL` t_code `appOL` toOL [
1139                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
1140                             SLW tmp reg (RIImm (ImmInt 2)),
1141                             LD II32 tmp (AddrRegReg tableReg tmp),
1142                             ADD tmp tmp (RIReg tableReg),
1143                             MTCTR tmp,
1144                             BCTR [ id | Just id <- ids ]
1145                     ]
1146         return code
1147   | otherwise
1148   = do
1149         (reg,e_code) <- getSomeReg expr
1150         tmp <- getNewRegNat II32
1151         lbl <- getNewLabelNat
1152         let
1153             jumpTable = map jumpTableEntry ids
1154
1155             code = e_code `appOL` toOL [
1156                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
1157                             SLW tmp reg (RIImm (ImmInt 2)),
1158                             ADDIS tmp tmp (HA (ImmCLbl lbl)),
1159                             LD II32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
1160                             MTCTR tmp,
1161                             BCTR [ id | Just id <- ids ]
1162                     ]
1163         return code
1164
1165
1166 -- -----------------------------------------------------------------------------
1167 -- 'condIntReg' and 'condFltReg': condition codes into registers
1168
1169 -- Turn those condition codes into integers now (when they appear on
1170 -- the right hand side of an assignment).
1171 --
1172 -- (If applicable) Do not fill the delay slots here; you will confuse the
1173 -- register allocator.
1174
1175 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
1176
1177 condReg :: NatM CondCode -> NatM Register
1178 condReg getCond = do
1179     CondCode _ cond cond_code <- getCond
1180     let
1181 {-        code dst = cond_code `appOL` toOL [
1182                 BCC cond lbl1,
1183                 LI dst (ImmInt 0),
1184                 BCC ALWAYS lbl2,
1185                 NEWBLOCK lbl1,
1186                 LI dst (ImmInt 1),
1187                 BCC ALWAYS lbl2,
1188                 NEWBLOCK lbl2
1189             ]-}
1190         code dst = cond_code
1191             `appOL` negate_code
1192             `appOL` toOL [
1193                 MFCR dst,
1194                 RLWINM dst dst (bit + 1) 31 31
1195             ]
1196
1197         negate_code | do_negate = unitOL (CRNOR bit bit bit)
1198                     | otherwise = nilOL
1199
1200         (bit, do_negate) = case cond of
1201             LTT -> (0, False)
1202             LE  -> (1, True)
1203             EQQ -> (2, False)
1204             GE  -> (0, True)
1205             GTT -> (1, False)
1206
1207             NE  -> (2, True)
1208
1209             LU  -> (0, False)
1210             LEU -> (1, True)
1211             GEU -> (0, True)
1212             GU  -> (1, False)
1213             _   -> panic "PPC.CodeGen.codeReg: no match"
1214
1215     return (Any II32 code)
1216
1217 condIntReg cond x y = condReg (condIntCode cond x y)
1218 condFltReg cond x y = condReg (condFltCode cond x y)
1219
1220
1221
1222 -- -----------------------------------------------------------------------------
1223 -- 'trivial*Code': deal with trivial instructions
1224
1225 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
1226 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
1227 -- Only look for constants on the right hand side, because that's
1228 -- where the generic optimizer will have put them.
1229
1230 -- Similarly, for unary instructions, we don't have to worry about
1231 -- matching an StInt as the argument, because genericOpt will already
1232 -- have handled the constant-folding.
1233
1234
1235
1236 {-
1237 Wolfgang's PowerPC version of The Rules:
1238
1239 A slightly modified version of The Rules to take advantage of the fact
1240 that PowerPC instructions work on all registers and don't implicitly
1241 clobber any fixed registers.
1242
1243 * The only expression for which getRegister returns Fixed is (CmmReg reg).
1244
1245 * If getRegister returns Any, then the code it generates may modify only:
1246         (a) fresh temporaries
1247         (b) the destination register
1248   It may *not* modify global registers, unless the global
1249   register happens to be the destination register.
1250   It may not clobber any other registers. In fact, only ccalls clobber any
1251   fixed registers.
1252   Also, it may not modify the counter register (used by genCCall).
1253
1254   Corollary: If a getRegister for a subexpression returns Fixed, you need
1255   not move it to a fresh temporary before evaluating the next subexpression.
1256   The Fixed register won't be modified.
1257   Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
1258
1259 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
1260   the value of the destination register.
1261 -}
1262
1263 trivialCode
1264         :: Width
1265         -> Bool
1266         -> (Reg -> Reg -> RI -> Instr)
1267         -> CmmExpr
1268         -> CmmExpr
1269         -> NatM Register
1270
1271 trivialCode rep signed instr x (CmmLit (CmmInt y _))
1272     | Just imm <- makeImmediate rep signed y
1273     = do
1274         (src1, code1) <- getSomeReg x
1275         let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
1276         return (Any (intSize rep) code)
1277
1278 trivialCode rep _ instr x y = do
1279     (src1, code1) <- getSomeReg x
1280     (src2, code2) <- getSomeReg y
1281     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
1282     return (Any (intSize rep) code)
1283
1284 trivialCodeNoImm' :: Size -> (Reg -> Reg -> Reg -> Instr)
1285                  -> CmmExpr -> CmmExpr -> NatM Register
1286 trivialCodeNoImm' size instr x y = do
1287     (src1, code1) <- getSomeReg x
1288     (src2, code2) <- getSomeReg y
1289     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
1290     return (Any size code)
1291
1292 trivialCodeNoImm :: Size -> (Size -> Reg -> Reg -> Reg -> Instr)
1293                  -> CmmExpr -> CmmExpr -> NatM Register
1294 trivialCodeNoImm size instr x y = trivialCodeNoImm' size (instr size) x y
1295
1296
1297 trivialUCode
1298         :: Size
1299         -> (Reg -> Reg -> Instr)
1300         -> CmmExpr
1301         -> NatM Register
1302 trivialUCode rep instr x = do
1303     (src, code) <- getSomeReg x
1304     let code' dst = code `snocOL` instr dst src
1305     return (Any rep code')
1306
1307 -- There is no "remainder" instruction on the PPC, so we have to do
1308 -- it the hard way.
1309 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
1310
1311 remainderCode :: Width -> (Reg -> Reg -> Reg -> Instr)
1312     -> CmmExpr -> CmmExpr -> NatM Register
1313 remainderCode rep div x y = do
1314     (src1, code1) <- getSomeReg x
1315     (src2, code2) <- getSomeReg y
1316     let code dst = code1 `appOL` code2 `appOL` toOL [
1317                 div dst src1 src2,
1318                 MULLW dst dst (RIReg src2),
1319                 SUBF dst dst src1
1320             ]
1321     return (Any (intSize rep) code)
1322
1323
1324 coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
1325 coerceInt2FP fromRep toRep x = do
1326     (src, code) <- getSomeReg x
1327     lbl <- getNewLabelNat
1328     itmp <- getNewRegNat II32
1329     ftmp <- getNewRegNat FF64
1330     dflags <- getDynFlagsNat
1331     dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
1332     Amode addr addr_code <- getAmode dynRef
1333     let
1334         code' dst = code `appOL` maybe_exts `appOL` toOL [
1335                 LDATA ReadOnlyData
1336                                 [CmmDataLabel lbl,
1337                                  CmmStaticLit (CmmInt 0x43300000 W32),
1338                                  CmmStaticLit (CmmInt 0x80000000 W32)],
1339                 XORIS itmp src (ImmInt 0x8000),
1340                 ST II32 itmp (spRel 3),
1341                 LIS itmp (ImmInt 0x4330),
1342                 ST II32 itmp (spRel 2),
1343                 LD FF64 ftmp (spRel 2)
1344             ] `appOL` addr_code `appOL` toOL [
1345                 LD FF64 dst addr,
1346                 FSUB FF64 dst ftmp dst
1347             ] `appOL` maybe_frsp dst
1348
1349         maybe_exts = case fromRep of
1350                         W8 ->  unitOL $ EXTS II8 src src
1351                         W16 -> unitOL $ EXTS II16 src src
1352                         W32 -> nilOL
1353                         _       -> panic "PPC.CodeGen.coerceInt2FP: no match"
1354
1355         maybe_frsp dst
1356                 = case toRep of
1357                         W32 -> unitOL $ FRSP dst dst
1358                         W64 -> nilOL
1359                         _       -> panic "PPC.CodeGen.coerceInt2FP: no match"
1360
1361     return (Any (floatSize toRep) code')
1362
1363 coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
1364 coerceFP2Int _ toRep x = do
1365     -- the reps don't really matter: F*->FF64 and II32->I* are no-ops
1366     (src, code) <- getSomeReg x
1367     tmp <- getNewRegNat FF64
1368     let
1369         code' dst = code `appOL` toOL [
1370                 -- convert to int in FP reg
1371             FCTIWZ tmp src,
1372                 -- store value (64bit) from FP to stack
1373             ST FF64 tmp (spRel 2),
1374                 -- read low word of value (high word is undefined)
1375             LD II32 dst (spRel 3)]
1376     return (Any (intSize toRep) code')