3 -----------------------------------------------------------------------------
5 -- Generating machine code (instruction selection)
7 -- (c) The University of Glasgow 1996-2004
9 -----------------------------------------------------------------------------
11 -- This is a big module, but, if you pay attention to
12 -- (a) the sectioning, (b) the type signatures, and
13 -- (c) the #if blah_TARGET_ARCH} things, the
14 -- structure should not be too overwhelming.
18 generateJumpTableForInstr,
24 #include "HsVersions.h"
25 #include "nativeGen/NCG.h"
26 #include "../includes/MachDeps.h"
42 -- Our intermediate code:
44 import PprCmm ( pprExpr )
49 import StaticFlags ( opt_PIC )
51 import qualified Outputable as O
56 import Control.Monad ( mapAndUnzipM )
64 -- -----------------------------------------------------------------------------
65 -- Top-level of the instruction selector
67 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
68 -- They are really trees of insns to facilitate fast appending, where a
69 -- left-to-right traversal (pre-order?) yields the insns in the correct
74 -> NatM [NatCmmTop Instr]
76 cmmTopCodeGen (CmmProc info lab (ListGraph blocks)) = do
77 (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
78 picBaseMb <- getPicBaseMaybeNat
79 dflags <- getDynFlagsNat
80 let proc = CmmProc info lab (ListGraph $ concat nat_blocks)
81 tops = proc : concat statics
82 os = platformOS $ targetPlatform dflags
84 Just picBase -> initializePicBase_ppc ArchPPC os picBase tops
85 Nothing -> return tops
87 cmmTopCodeGen (CmmData sec dat) = do
88 return [CmmData sec dat] -- no translation, we just use CmmStatic
92 -> NatM ( [NatBasicBlock Instr]
95 basicBlockCodeGen (BasicBlock id stmts) = do
96 instrs <- stmtsToInstrs stmts
97 -- code generation may introduce new basic block boundaries, which
98 -- are indicated by the NEWBLOCK instruction. We must split up the
99 -- instruction stream into basic blocks again. Also, we extract
102 (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
104 mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
105 = ([], BasicBlock id instrs : blocks, statics)
106 mkBlocks (LDATA sec dat) (instrs,blocks,statics)
107 = (instrs, blocks, CmmData sec dat:statics)
108 mkBlocks instr (instrs,blocks,statics)
109 = (instr:instrs, blocks, statics)
111 return (BasicBlock id top : other_blocks, statics)
113 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
115 = do instrss <- mapM stmtToInstrs stmts
116 return (concatOL instrss)
118 stmtToInstrs :: CmmStmt -> NatM InstrBlock
119 stmtToInstrs stmt = do
120 dflags <- getDynFlagsNat
122 CmmNop -> return nilOL
123 CmmComment s -> return (unitOL (COMMENT s))
126 | isFloatType ty -> assignReg_FltCode size reg src
127 | target32Bit (targetPlatform dflags) &&
128 isWord64 ty -> assignReg_I64Code reg src
129 | otherwise -> assignReg_IntCode size reg src
130 where ty = cmmRegType reg
131 size = cmmTypeSize ty
134 | isFloatType ty -> assignMem_FltCode size addr src
135 | target32Bit (targetPlatform dflags) &&
136 isWord64 ty -> assignMem_I64Code addr src
137 | otherwise -> assignMem_IntCode size addr src
138 where ty = cmmExprType src
139 size = cmmTypeSize ty
141 CmmCall target result_regs args _ _
142 -> genCCall target result_regs args
144 CmmBranch id -> genBranch id
145 CmmCondBranch arg id -> genCondJump id arg
146 CmmSwitch arg ids -> genSwitch arg ids
147 CmmJump arg params -> genJump arg
149 panic "stmtToInstrs: return statement should have been cps'd away"
152 --------------------------------------------------------------------------------
153 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
154 -- They are really trees of insns to facilitate fast appending, where a
155 -- left-to-right traversal yields the insns in the correct order.
161 -- | Register's passed up the tree. If the stix code forces the register
162 -- to live in a pre-decided machine register, it comes out as @Fixed@;
163 -- otherwise, it comes out as @Any@, and the parent can decide which
164 -- register to put it in.
167 = Fixed Size Reg InstrBlock
168 | Any Size (Reg -> InstrBlock)
171 swizzleRegisterRep :: Register -> Size -> Register
172 swizzleRegisterRep (Fixed _ reg code) size = Fixed size reg code
173 swizzleRegisterRep (Any _ codefn) size = Any size codefn
176 -- | Grab the Reg for a CmmReg
177 getRegisterReg :: CmmReg -> Reg
179 getRegisterReg (CmmLocal (LocalReg u pk))
180 = RegVirtual $ mkVirtualReg u (cmmTypeSize pk)
182 getRegisterReg (CmmGlobal mid)
183 = case globalRegMaybe mid of
185 Nothing -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
186 -- By this stage, the only MagicIds remaining should be the
187 -- ones which map to a real machine register on this
188 -- platform. Hence ...
192 Now, given a tree (the argument to an CmmLoad) that references memory,
193 produce a suitable addressing mode.
195 A Rule of the Game (tm) for Amodes: use of the addr bit must
196 immediately follow use of the code part, since the code part puts
197 values in registers which the addr then refers to. So you can't put
198 anything in between, lest it overwrite some of those registers. If
199 you need to do some other computation between the code part and use of
200 the addr bit, first store the effective address from the amode in a
201 temporary, then do the other computation, and then use the temporary:
205 ... other computation ...
210 -- | Check whether an integer will fit in 32 bits.
211 -- A CmmInt is intended to be truncated to the appropriate
212 -- number of bits, so here we truncate it to Int64. This is
213 -- important because e.g. -1 as a CmmInt might be either
214 -- -1 or 18446744073709551615.
216 is32BitInteger :: Integer -> Bool
217 is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000
218 where i64 = fromIntegral i :: Int64
221 -- | Convert a BlockId to some CmmStatic data
222 jumpTableEntry :: Maybe BlockId -> CmmStatic
223 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordWidth)
224 jumpTableEntry (Just blockid) = CmmStaticLit (CmmLabel blockLabel)
225 where blockLabel = mkAsmTempLabel (getUnique blockid)
229 -- -----------------------------------------------------------------------------
230 -- General things for putting together code sequences
232 -- Expand CmmRegOff. ToDo: should we do it this way around, or convert
233 -- CmmExprs into CmmRegOff?
234 mangleIndexTree :: CmmExpr -> CmmExpr
235 mangleIndexTree (CmmRegOff reg off)
236 = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
237 where width = typeWidth (cmmRegType reg)
240 = panic "PPC.CodeGen.mangleIndexTree: no match"
242 -- -----------------------------------------------------------------------------
243 -- Code gen for 64-bit arithmetic on 32-bit platforms
246 Simple support for generating 64-bit code (ie, 64 bit values and 64
247 bit assignments) on 32-bit platforms. Unlike the main code generator
248 we merely shoot for generating working code as simply as possible, and
249 pay little attention to code quality. Specifically, there is no
250 attempt to deal cleverly with the fixed-vs-floating register
251 distinction; all values are generated into (pairs of) floating
252 registers, even if this would mean some redundant reg-reg moves as a
253 result. Only one of the VRegUniques is returned, since it will be
254 of the VRegUniqueLo form, and the upper-half VReg can be determined
255 by applying getHiVRegFromLo to it.
258 data ChildCode64 -- a.k.a "Register64"
261 Reg -- the lower 32-bit temporary which contains the
262 -- result; use getHiVRegFromLo to find the other
263 -- VRegUnique. Rules of this simplified insn
264 -- selection game are therefore that the returned
265 -- Reg may be modified
268 -- | The dual to getAnyReg: compute an expression into a register, but
269 -- we don't mind which one it is.
270 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
272 r <- getRegister expr
275 tmp <- getNewRegNat rep
276 return (tmp, code tmp)
280 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
281 getI64Amodes addrTree = do
282 Amode hi_addr addr_code <- getAmode addrTree
283 case addrOffset hi_addr 4 of
284 Just lo_addr -> return (hi_addr, lo_addr, addr_code)
285 Nothing -> do (hi_ptr, code) <- getSomeReg addrTree
286 return (AddrRegImm hi_ptr (ImmInt 0),
287 AddrRegImm hi_ptr (ImmInt 4),
291 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
292 assignMem_I64Code addrTree valueTree = do
293 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
294 ChildCode64 vcode rlo <- iselExpr64 valueTree
296 rhi = getHiVRegFromLo rlo
299 mov_hi = ST II32 rhi hi_addr
300 mov_lo = ST II32 rlo lo_addr
302 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
305 assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
306 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
307 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
309 r_dst_lo = RegVirtual $ mkVirtualReg u_dst II32
310 r_dst_hi = getHiVRegFromLo r_dst_lo
311 r_src_hi = getHiVRegFromLo r_src_lo
312 mov_lo = MR r_dst_lo r_src_lo
313 mov_hi = MR r_dst_hi r_src_hi
316 vcode `snocOL` mov_lo `snocOL` mov_hi
319 assignReg_I64Code lvalue valueTree
320 = panic "assignReg_I64Code(powerpc): invalid lvalue"
323 iselExpr64 :: CmmExpr -> NatM ChildCode64
324 iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
325 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
326 (rlo, rhi) <- getNewRegPairNat II32
327 let mov_hi = LD II32 rhi hi_addr
328 mov_lo = LD II32 rlo lo_addr
329 return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
332 iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
333 = return (ChildCode64 nilOL (RegVirtual $ mkVirtualReg vu II32))
335 iselExpr64 (CmmLit (CmmInt i _)) = do
336 (rlo,rhi) <- getNewRegPairNat II32
338 half0 = fromIntegral (fromIntegral i :: Word16)
339 half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
340 half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
341 half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
344 LIS rlo (ImmInt half1),
345 OR rlo rlo (RIImm $ ImmInt half0),
346 LIS rhi (ImmInt half3),
347 OR rlo rlo (RIImm $ ImmInt half2)
350 return (ChildCode64 code rlo)
352 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
353 ChildCode64 code1 r1lo <- iselExpr64 e1
354 ChildCode64 code2 r2lo <- iselExpr64 e2
355 (rlo,rhi) <- getNewRegPairNat II32
357 r1hi = getHiVRegFromLo r1lo
358 r2hi = getHiVRegFromLo r2lo
361 toOL [ ADDC rlo r1lo r2lo,
364 return (ChildCode64 code rlo)
366 iselExpr64 (CmmMachOp (MO_UU_Conv W32 W64) [expr]) = do
367 (expr_reg,expr_code) <- getSomeReg expr
368 (rlo, rhi) <- getNewRegPairNat II32
369 let mov_hi = LI rhi (ImmInt 0)
370 mov_lo = MR rlo expr_reg
371 return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
374 = pprPanic "iselExpr64(powerpc)" (ppr expr)
378 getRegister :: CmmExpr -> NatM Register
379 getRegister e = do dflags <- getDynFlagsNat
380 getRegister' dflags e
382 getRegister' :: DynFlags -> CmmExpr -> NatM Register
384 getRegister' _ (CmmReg (CmmGlobal PicBaseReg))
386 reg <- getPicBaseNat archWordSize
387 return (Fixed archWordSize reg nilOL)
389 getRegister' _ (CmmReg reg)
390 = return (Fixed (cmmTypeSize (cmmRegType reg))
391 (getRegisterReg reg) nilOL)
393 getRegister' dflags tree@(CmmRegOff _ _)
394 = getRegister' dflags (mangleIndexTree tree)
396 -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
397 -- TO_W_(x), TO_W_(x >> 32)
399 getRegister' dflags (CmmMachOp (MO_UU_Conv W64 W32)
400 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
401 | target32Bit (targetPlatform dflags) = do
402 ChildCode64 code rlo <- iselExpr64 x
403 return $ Fixed II32 (getHiVRegFromLo rlo) code
405 getRegister' dflags (CmmMachOp (MO_SS_Conv W64 W32)
406 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
407 | target32Bit (targetPlatform dflags) = do
408 ChildCode64 code rlo <- iselExpr64 x
409 return $ Fixed II32 (getHiVRegFromLo rlo) code
411 getRegister' dflags (CmmMachOp (MO_UU_Conv W64 W32) [x])
412 | target32Bit (targetPlatform dflags) = do
413 ChildCode64 code rlo <- iselExpr64 x
414 return $ Fixed II32 rlo code
416 getRegister' dflags (CmmMachOp (MO_SS_Conv W64 W32) [x])
417 | target32Bit (targetPlatform dflags) = do
418 ChildCode64 code rlo <- iselExpr64 x
419 return $ Fixed II32 rlo code
421 getRegister' _ (CmmLoad mem pk)
424 Amode addr addr_code <- getAmode mem
425 let code dst = ASSERT((targetClassOfReg dst == RcDouble) == isFloatType pk)
426 addr_code `snocOL` LD size dst addr
427 return (Any size code)
428 where size = cmmTypeSize pk
430 -- catch simple cases of zero- or sign-extended load
431 getRegister' _ (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad mem _]) = do
432 Amode addr addr_code <- getAmode mem
433 return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr))
435 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
437 getRegister' _ (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad mem _]) = do
438 Amode addr addr_code <- getAmode mem
439 return (Any II32 (\dst -> addr_code `snocOL` LD II16 dst addr))
441 getRegister' _ (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad mem _]) = do
442 Amode addr addr_code <- getAmode mem
443 return (Any II32 (\dst -> addr_code `snocOL` LA II16 dst addr))
445 getRegister' dflags (CmmMachOp mop [x]) -- unary MachOps
447 MO_Not rep -> triv_ucode_int rep NOT
449 MO_F_Neg w -> triv_ucode_float w FNEG
450 MO_S_Neg w -> triv_ucode_int w NEG
452 MO_FF_Conv W64 W32 -> trivialUCode FF32 FRSP x
453 MO_FF_Conv W32 W64 -> conversionNop FF64 x
455 MO_FS_Conv from to -> coerceFP2Int from to x
456 MO_SF_Conv from to -> coerceInt2FP from to x
459 | from == to -> conversionNop (intSize to) x
461 -- narrowing is a nop: we treat the high bits as undefined
462 MO_SS_Conv W32 to -> conversionNop (intSize to) x
463 MO_SS_Conv W16 W8 -> conversionNop II8 x
464 MO_SS_Conv W8 to -> triv_ucode_int to (EXTS II8)
465 MO_SS_Conv W16 to -> triv_ucode_int to (EXTS II16)
468 | from == to -> conversionNop (intSize to) x
469 -- narrowing is a nop: we treat the high bits as undefined
470 MO_UU_Conv W32 to -> conversionNop (intSize to) x
471 MO_UU_Conv W16 W8 -> conversionNop II8 x
472 MO_UU_Conv W8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 W32))
473 MO_UU_Conv W16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 W32))
474 _ -> panic "PPC.CodeGen.getRegister: no match"
477 triv_ucode_int width instr = trivialUCode (intSize width) instr x
478 triv_ucode_float width instr = trivialUCode (floatSize width) instr x
480 conversionNop new_size expr
481 = do e_code <- getRegister' dflags expr
482 return (swizzleRegisterRep e_code new_size)
484 getRegister' _ (CmmMachOp mop [x, y]) -- dyadic PrimOps
486 MO_F_Eq w -> condFltReg EQQ x y
487 MO_F_Ne w -> condFltReg NE x y
488 MO_F_Gt w -> condFltReg GTT x y
489 MO_F_Ge w -> condFltReg GE x y
490 MO_F_Lt w -> condFltReg LTT x y
491 MO_F_Le w -> condFltReg LE x y
493 MO_Eq rep -> condIntReg EQQ (extendUExpr rep x) (extendUExpr rep y)
494 MO_Ne rep -> condIntReg NE (extendUExpr rep x) (extendUExpr rep y)
496 MO_S_Gt rep -> condIntReg GTT (extendSExpr rep x) (extendSExpr rep y)
497 MO_S_Ge rep -> condIntReg GE (extendSExpr rep x) (extendSExpr rep y)
498 MO_S_Lt rep -> condIntReg LTT (extendSExpr rep x) (extendSExpr rep y)
499 MO_S_Le rep -> condIntReg LE (extendSExpr rep x) (extendSExpr rep y)
501 MO_U_Gt rep -> condIntReg GU (extendUExpr rep x) (extendUExpr rep y)
502 MO_U_Ge rep -> condIntReg GEU (extendUExpr rep x) (extendUExpr rep y)
503 MO_U_Lt rep -> condIntReg LU (extendUExpr rep x) (extendUExpr rep y)
504 MO_U_Le rep -> condIntReg LEU (extendUExpr rep x) (extendUExpr rep y)
506 MO_F_Add w -> triv_float w FADD
507 MO_F_Sub w -> triv_float w FSUB
508 MO_F_Mul w -> triv_float w FMUL
509 MO_F_Quot w -> triv_float w FDIV
511 -- optimize addition with 32-bit immediate
515 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate W32 True (-imm)
516 -> trivialCode W32 True ADD x (CmmLit $ CmmInt imm immrep)
519 (src, srcCode) <- getSomeReg x
520 let imm = litToImm lit
521 code dst = srcCode `appOL` toOL [
522 ADDIS dst src (HA imm),
523 ADD dst dst (RIImm (LO imm))
525 return (Any II32 code)
526 _ -> trivialCode W32 True ADD x y
528 MO_Add rep -> trivialCode rep True ADD x y
530 case y of -- subfi ('substract from' with immediate) doesn't exist
531 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
532 -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
533 _ -> trivialCodeNoImm' (intSize rep) SUBF y x
535 MO_Mul rep -> trivialCode rep True MULLW x y
537 MO_S_MulMayOflo W32 -> trivialCodeNoImm' II32 MULLW_MayOflo x y
539 MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= II32): not implemented"
540 MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
542 MO_S_Quot rep -> trivialCodeNoImm' (intSize rep) DIVW (extendSExpr rep x) (extendSExpr rep y)
543 MO_U_Quot rep -> trivialCodeNoImm' (intSize rep) DIVWU (extendUExpr rep x) (extendUExpr rep y)
545 MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
546 MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
548 MO_And rep -> trivialCode rep False AND x y
549 MO_Or rep -> trivialCode rep False OR x y
550 MO_Xor rep -> trivialCode rep False XOR x y
552 MO_Shl rep -> trivialCode rep False SLW x y
553 MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
554 MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
555 _ -> panic "PPC.CodeGen.getRegister: no match"
558 triv_float :: Width -> (Size -> Reg -> Reg -> Reg -> Instr) -> NatM Register
559 triv_float width instr = trivialCodeNoImm (floatSize width) instr x y
561 getRegister' _ (CmmLit (CmmInt i rep))
562 | Just imm <- makeImmediate rep True i
564 code dst = unitOL (LI dst imm)
566 return (Any (intSize rep) code)
568 getRegister' _ (CmmLit (CmmFloat f frep)) = do
569 lbl <- getNewLabelNat
570 dflags <- getDynFlagsNat
571 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
572 Amode addr addr_code <- getAmode dynRef
573 let size = floatSize frep
575 LDATA ReadOnlyData [CmmDataLabel lbl,
576 CmmStaticLit (CmmFloat f frep)]
577 `consOL` (addr_code `snocOL` LD size dst addr)
578 return (Any size code)
580 getRegister' _ (CmmLit lit)
581 = let rep = cmmLitType lit
585 ADD dst dst (RIImm (LO imm))
587 in return (Any (cmmTypeSize rep) code)
589 getRegister' _ other = pprPanic "getRegister(ppc)" (pprExpr other)
591 -- extend?Rep: wrap integer expression of type rep
592 -- in a conversion to II32
593 extendSExpr W32 x = x
594 extendSExpr rep x = CmmMachOp (MO_SS_Conv rep W32) [x]
595 extendUExpr W32 x = x
596 extendUExpr rep x = CmmMachOp (MO_UU_Conv rep W32) [x]
598 -- -----------------------------------------------------------------------------
599 -- The 'Amode' type: Memory addressing modes passed up the tree.
602 = Amode AddrMode InstrBlock
605 Now, given a tree (the argument to an CmmLoad) that references memory,
606 produce a suitable addressing mode.
608 A Rule of the Game (tm) for Amodes: use of the addr bit must
609 immediately follow use of the code part, since the code part puts
610 values in registers which the addr then refers to. So you can't put
611 anything in between, lest it overwrite some of those registers. If
612 you need to do some other computation between the code part and use of
613 the addr bit, first store the effective address from the amode in a
614 temporary, then do the other computation, and then use the temporary:
618 ... other computation ...
622 getAmode :: CmmExpr -> NatM Amode
623 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
625 getAmode (CmmMachOp (MO_Sub W32) [x, CmmLit (CmmInt i _)])
626 | Just off <- makeImmediate W32 True (-i)
628 (reg, code) <- getSomeReg x
629 return (Amode (AddrRegImm reg off) code)
632 getAmode (CmmMachOp (MO_Add W32) [x, CmmLit (CmmInt i _)])
633 | Just off <- makeImmediate W32 True i
635 (reg, code) <- getSomeReg x
636 return (Amode (AddrRegImm reg off) code)
638 -- optimize addition with 32-bit immediate
640 getAmode (CmmMachOp (MO_Add W32) [x, CmmLit lit])
642 tmp <- getNewRegNat II32
643 (src, srcCode) <- getSomeReg x
644 let imm = litToImm lit
645 code = srcCode `snocOL` ADDIS tmp src (HA imm)
646 return (Amode (AddrRegImm tmp (LO imm)) code)
648 getAmode (CmmLit lit)
650 tmp <- getNewRegNat II32
651 let imm = litToImm lit
652 code = unitOL (LIS tmp (HA imm))
653 return (Amode (AddrRegImm tmp (LO imm)) code)
655 getAmode (CmmMachOp (MO_Add W32) [x, y])
657 (regX, codeX) <- getSomeReg x
658 (regY, codeY) <- getSomeReg y
659 return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
663 (reg, code) <- getSomeReg other
666 return (Amode (AddrRegImm reg off) code)
670 -- The 'CondCode' type: Condition codes passed up the tree.
672 = CondCode Bool Cond InstrBlock
674 -- Set up a condition code for a conditional branch.
676 getCondCode :: CmmExpr -> NatM CondCode
678 -- almost the same as everywhere else - but we need to
679 -- extend small integers to 32 bit first
681 getCondCode (CmmMachOp mop [x, y])
683 MO_F_Eq W32 -> condFltCode EQQ x y
684 MO_F_Ne W32 -> condFltCode NE x y
685 MO_F_Gt W32 -> condFltCode GTT x y
686 MO_F_Ge W32 -> condFltCode GE x y
687 MO_F_Lt W32 -> condFltCode LTT x y
688 MO_F_Le W32 -> condFltCode LE x y
690 MO_F_Eq W64 -> condFltCode EQQ x y
691 MO_F_Ne W64 -> condFltCode NE x y
692 MO_F_Gt W64 -> condFltCode GTT x y
693 MO_F_Ge W64 -> condFltCode GE x y
694 MO_F_Lt W64 -> condFltCode LTT x y
695 MO_F_Le W64 -> condFltCode LE x y
697 MO_Eq rep -> condIntCode EQQ (extendUExpr rep x) (extendUExpr rep y)
698 MO_Ne rep -> condIntCode NE (extendUExpr rep x) (extendUExpr rep y)
700 MO_S_Gt rep -> condIntCode GTT (extendSExpr rep x) (extendSExpr rep y)
701 MO_S_Ge rep -> condIntCode GE (extendSExpr rep x) (extendSExpr rep y)
702 MO_S_Lt rep -> condIntCode LTT (extendSExpr rep x) (extendSExpr rep y)
703 MO_S_Le rep -> condIntCode LE (extendSExpr rep x) (extendSExpr rep y)
705 MO_U_Gt rep -> condIntCode GU (extendUExpr rep x) (extendUExpr rep y)
706 MO_U_Ge rep -> condIntCode GEU (extendUExpr rep x) (extendUExpr rep y)
707 MO_U_Lt rep -> condIntCode LU (extendUExpr rep x) (extendUExpr rep y)
708 MO_U_Le rep -> condIntCode LEU (extendUExpr rep x) (extendUExpr rep y)
710 other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
712 getCondCode other = panic "getCondCode(2)(powerpc)"
716 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
717 -- passed back up the tree.
719 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
721 -- ###FIXME: I16 and I8!
722 condIntCode cond x (CmmLit (CmmInt y rep))
723 | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
725 (src1, code) <- getSomeReg x
727 code' = code `snocOL`
728 (if condUnsigned cond then CMPL else CMP) II32 src1 (RIImm src2)
729 return (CondCode False cond code')
731 condIntCode cond x y = do
732 (src1, code1) <- getSomeReg x
733 (src2, code2) <- getSomeReg y
735 code' = code1 `appOL` code2 `snocOL`
736 (if condUnsigned cond then CMPL else CMP) II32 src1 (RIReg src2)
737 return (CondCode False cond code')
739 condFltCode cond x y = do
740 (src1, code1) <- getSomeReg x
741 (src2, code2) <- getSomeReg y
743 code' = code1 `appOL` code2 `snocOL` FCMP src1 src2
744 code'' = case cond of -- twiddle CR to handle unordered case
745 GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
746 LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
749 ltbit = 0 ; eqbit = 2 ; gtbit = 1
750 return (CondCode True cond code'')
754 -- -----------------------------------------------------------------------------
755 -- Generating assignments
757 -- Assignments are really at the heart of the whole code generation
758 -- business. Almost all top-level nodes of any real importance are
759 -- assignments, which correspond to loads, stores, or register
760 -- transfers. If we're really lucky, some of the register transfers
761 -- will go away, because we can use the destination register to
762 -- complete the code generation for the right hand side. This only
763 -- fails when the right hand side is forced into a fixed register
764 -- (e.g. the result of a call).
766 assignMem_IntCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
767 assignReg_IntCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock
769 assignMem_FltCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
770 assignReg_FltCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock
772 assignMem_IntCode pk addr src = do
773 (srcReg, code) <- getSomeReg src
774 Amode dstAddr addr_code <- getAmode addr
775 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
777 -- dst is a reg, but src could be anything
778 assignReg_IntCode _ reg src
782 Any _ code -> code dst
783 Fixed _ freg fcode -> fcode `snocOL` MR dst freg
785 dst = getRegisterReg reg
790 assignMem_FltCode = assignMem_IntCode
791 assignReg_FltCode = assignReg_IntCode
795 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
797 genJump (CmmLit (CmmLabel lbl))
798 = return (unitOL $ JMP lbl)
802 (target,code) <- getSomeReg tree
803 return (code `snocOL` MTCTR target `snocOL` BCTR [] Nothing)
806 -- -----------------------------------------------------------------------------
807 -- Unconditional branches
808 genBranch :: BlockId -> NatM InstrBlock
809 genBranch = return . toOL . mkJumpInstr
812 -- -----------------------------------------------------------------------------
816 Conditional jumps are always to local labels, so we can use branch
817 instructions. We peek at the arguments to decide what kind of
820 SPARC: First, we have to ensure that the condition codes are set
821 according to the supplied comparison operation. We generate slightly
822 different code for floating point comparisons, because a floating
823 point operation cannot directly precede a @BF@. We assume the worst
824 and fill that slot with a @NOP@.
826 SPARC: Do not fill the delay slots here; you will confuse the register
832 :: BlockId -- the branch target
833 -> CmmExpr -- the condition on which to branch
836 genCondJump id bool = do
837 CondCode _ cond code <- getCondCode bool
838 return (code `snocOL` BCC cond id)
842 -- -----------------------------------------------------------------------------
843 -- Generating C calls
845 -- Now the biggest nightmare---calls. Most of the nastiness is buried in
846 -- @get_arg@, which moves the arguments to the correct registers/stack
847 -- locations. Apart from that, the code is easy.
849 -- (If applicable) Do not fill the delay slots here; you will confuse the
850 -- register allocator.
852 genCCall :: CmmCallTarget -- function to call
853 -> HintedCmmFormals -- where to put the result
854 -> HintedCmmActuals -- arguments (of mixed type)
856 genCCall target dest_regs argsAndHints
857 = do dflags <- getDynFlagsNat
858 case platformOS (targetPlatform dflags) of
859 OSLinux -> genCCall' GCPLinux target dest_regs argsAndHints
860 OSDarwin -> genCCall' GCPDarwin target dest_regs argsAndHints
861 OSSolaris2 -> panic "PPC.CodeGen.genCCall: not defined for this os"
862 OSMinGW32 -> panic "PPC.CodeGen.genCCall: not defined for this os"
863 OSFreeBSD -> panic "PPC.CodeGen.genCCall: not defined for this os"
864 OSOpenBSD -> panic "PPC.CodeGen.genCCall: not defined for this os"
865 OSUnknown -> panic "PPC.CodeGen.genCCall: not defined for this os"
867 data GenCCallPlatform = GCPLinux | GCPDarwin
871 -> CmmCallTarget -- function to call
872 -> HintedCmmFormals -- where to put the result
873 -> HintedCmmActuals -- arguments (of mixed type)
877 The PowerPC calling convention for Darwin/Mac OS X
878 is described in Apple's document
879 "Inside Mac OS X - Mach-O Runtime Architecture".
881 PowerPC Linux uses the System V Release 4 Calling Convention
882 for PowerPC. It is described in the
883 "System V Application Binary Interface PowerPC Processor Supplement".
885 Both conventions are similar:
886 Parameters may be passed in general-purpose registers starting at r3, in
887 floating point registers starting at f1, or on the stack.
889 But there are substantial differences:
890 * The number of registers used for parameter passing and the exact set of
891 nonvolatile registers differs (see MachRegs.lhs).
892 * On Darwin, stack space is always reserved for parameters, even if they are
893 passed in registers. The called routine may choose to save parameters from
894 registers to the corresponding space on the stack.
895 * On Darwin, a corresponding amount of GPRs is skipped when a floating point
896 parameter is passed in an FPR.
897 * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
898 starting with an odd-numbered GPR. It may skip a GPR to achieve this.
899 Darwin just treats an I64 like two separate II32s (high word first).
900 * I64 and FF64 arguments are 8-byte aligned on the stack for SysV, but only
901 4-byte aligned like everything else on Darwin.
902 * The SysV spec claims that FF32 is represented as FF64 on the stack. GCC on
903 PowerPC Linux does not agree, so neither do we.
905 According to both conventions, The parameter area should be part of the
906 caller's stack frame, allocated in the caller's prologue code (large enough
907 to hold the parameter lists for all called routines). The NCG already
908 uses the stack for register spilling, leaving 64 bytes free at the top.
909 If we need a larger parameter area than that, we just allocate a new stack
910 frame just before ccalling.
914 genCCall' _ (CmmPrim MO_WriteBarrier) _ _
915 = return $ unitOL LWSYNC
917 genCCall' gcp target dest_regs argsAndHints
918 = ASSERT (not $ any (`elem` [II8,II16]) $ map cmmTypeSize argReps)
919 -- we rely on argument promotion in the codeGen
921 (finalStack,passArgumentsCode,usedRegs) <- passArguments
923 allArgRegs allFPArgRegs
927 (labelOrExpr, reduceToFF32) <- case target of
928 CmmCallee (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
929 CmmCallee expr conv -> return (Right expr, False)
930 CmmPrim mop -> outOfLineMachOp mop
932 let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
933 codeAfter = move_sp_up finalStack `appOL` moveResult reduceToFF32
938 `snocOL` BL lbl usedRegs
941 (dynReg, dynCode) <- getSomeReg dyn
943 `snocOL` MTCTR dynReg
945 `snocOL` BCTRL usedRegs
948 initialStackOffset = case gcp of
951 -- size of linkage area + size of arguments, in bytes
952 stackDelta finalStack = case gcp of
954 roundTo 16 $ (24 +) $ max 32 $ sum $
955 map (widthInBytes . typeWidth) argReps
956 GCPLinux -> roundTo 16 finalStack
958 -- need to remove alignment information
959 argsAndHints' | (CmmPrim mop) <- target,
968 args = map hintlessCmm argsAndHints'
969 argReps = map cmmExprType args
971 roundTo a x | x `mod` a == 0 = x
972 | otherwise = x + a - (x `mod` a)
974 move_sp_down finalStack
976 toOL [STU II32 sp (AddrRegImm sp (ImmInt (-delta))),
979 where delta = stackDelta finalStack
980 move_sp_up finalStack
982 toOL [ADD sp sp (RIImm (ImmInt delta)),
985 where delta = stackDelta finalStack
988 passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
989 passArguments ((arg,arg_ty):args) gprs fprs stackOffset
990 accumCode accumUsed | isWord64 arg_ty =
992 ChildCode64 code vr_lo <- iselExpr64 arg
993 let vr_hi = getHiVRegFromLo vr_lo
997 do let storeWord vr (gpr:_) offset = MR gpr vr
998 storeWord vr [] offset
999 = ST II32 vr (AddrRegImm sp (ImmInt offset))
1004 (accumCode `appOL` code
1005 `snocOL` storeWord vr_hi gprs stackOffset
1006 `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
1007 ((take 2 gprs) ++ accumUsed)
1009 do let stackOffset' = roundTo 8 stackOffset
1010 stackCode = accumCode `appOL` code
1011 `snocOL` ST II32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
1012 `snocOL` ST II32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
1013 regCode hireg loreg =
1014 accumCode `appOL` code
1015 `snocOL` MR hireg vr_hi
1016 `snocOL` MR loreg vr_lo
1019 hireg : loreg : regs | even (length gprs) ->
1020 passArguments args regs fprs stackOffset
1021 (regCode hireg loreg) (hireg : loreg : accumUsed)
1022 _skipped : hireg : loreg : regs ->
1023 passArguments args regs fprs stackOffset
1024 (regCode hireg loreg) (hireg : loreg : accumUsed)
1025 _ -> -- only one or no regs left
1026 passArguments args [] fprs (stackOffset'+8)
1029 passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
1030 | reg : _ <- regs = do
1031 register <- getRegister arg
1032 let code = case register of
1033 Fixed _ freg fcode -> fcode `snocOL` MR reg freg
1034 Any _ acode -> acode reg
1035 stackOffsetRes = case gcp of
1036 -- The Darwin ABI requires that we reserve
1037 -- stack slots for register parameters
1038 GCPDarwin -> stackOffset + stackBytes
1039 -- ... the SysV ABI doesn't.
1040 GCPLinux -> stackOffset
1045 (accumCode `appOL` code)
1048 (vr, code) <- getSomeReg arg
1052 (stackOffset' + stackBytes)
1053 (accumCode `appOL` code `snocOL` ST (cmmTypeSize rep) vr stackSlot)
1056 stackOffset' = case gcp of
1058 -- stackOffset is at least 4-byte aligned
1059 -- The Darwin ABI is happy with that.
1062 -- ... the SysV ABI requires 8-byte
1063 -- alignment for doubles.
1064 | isFloatType rep && typeWidth rep == W64 ->
1065 roundTo 8 stackOffset
1068 stackSlot = AddrRegImm sp (ImmInt stackOffset')
1069 (nGprs, nFprs, stackBytes, regs)
1072 case cmmTypeSize rep of
1073 II32 -> (1, 0, 4, gprs)
1074 -- The Darwin ABI requires that we skip a
1075 -- corresponding number of GPRs when we use
1077 FF32 -> (1, 1, 4, fprs)
1078 FF64 -> (2, 1, 8, fprs)
1080 case cmmTypeSize rep of
1081 II32 -> (1, 0, 4, gprs)
1082 -- ... the SysV ABI doesn't.
1083 FF32 -> (0, 1, 4, fprs)
1084 FF64 -> (0, 1, 8, fprs)
1086 moveResult reduceToFF32 =
1089 [CmmHinted dest _hint]
1090 | reduceToFF32 && isFloat32 rep -> unitOL (FRSP r_dest f1)
1091 | isFloat32 rep || isFloat64 rep -> unitOL (MR r_dest f1)
1092 | isWord64 rep -> toOL [MR (getHiVRegFromLo r_dest) r3,
1094 | otherwise -> unitOL (MR r_dest r3)
1095 where rep = cmmRegType (CmmLocal dest)
1096 r_dest = getRegisterReg (CmmLocal dest)
1098 outOfLineMachOp mop =
1100 dflags <- getDynFlagsNat
1101 mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference $
1102 mkForeignLabel functionName Nothing ForeignLabelInThisPackage IsFunction
1103 let mopLabelOrExpr = case mopExpr of
1104 CmmLit (CmmLabel lbl) -> Left lbl
1106 return (mopLabelOrExpr, reduce)
1108 (functionName, reduce) = case mop of
1109 MO_F32_Exp -> (fsLit "exp", True)
1110 MO_F32_Log -> (fsLit "log", True)
1111 MO_F32_Sqrt -> (fsLit "sqrt", True)
1113 MO_F32_Sin -> (fsLit "sin", True)
1114 MO_F32_Cos -> (fsLit "cos", True)
1115 MO_F32_Tan -> (fsLit "tan", True)
1117 MO_F32_Asin -> (fsLit "asin", True)
1118 MO_F32_Acos -> (fsLit "acos", True)
1119 MO_F32_Atan -> (fsLit "atan", True)
1121 MO_F32_Sinh -> (fsLit "sinh", True)
1122 MO_F32_Cosh -> (fsLit "cosh", True)
1123 MO_F32_Tanh -> (fsLit "tanh", True)
1124 MO_F32_Pwr -> (fsLit "pow", True)
1126 MO_F64_Exp -> (fsLit "exp", False)
1127 MO_F64_Log -> (fsLit "log", False)
1128 MO_F64_Sqrt -> (fsLit "sqrt", False)
1130 MO_F64_Sin -> (fsLit "sin", False)
1131 MO_F64_Cos -> (fsLit "cos", False)
1132 MO_F64_Tan -> (fsLit "tan", False)
1134 MO_F64_Asin -> (fsLit "asin", False)
1135 MO_F64_Acos -> (fsLit "acos", False)
1136 MO_F64_Atan -> (fsLit "atan", False)
1138 MO_F64_Sinh -> (fsLit "sinh", False)
1139 MO_F64_Cosh -> (fsLit "cosh", False)
1140 MO_F64_Tanh -> (fsLit "tanh", False)
1141 MO_F64_Pwr -> (fsLit "pow", False)
1143 MO_Memcpy -> (fsLit "memcpy", False)
1144 MO_Memset -> (fsLit "memset", False)
1145 MO_Memmove -> (fsLit "memmove", False)
1147 other -> pprPanic "genCCall(ppc): unknown callish op"
1148 (pprCallishMachOp other)
1151 -- -----------------------------------------------------------------------------
1152 -- Generating a table-branch
1154 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
1158 (reg,e_code) <- getSomeReg expr
1159 tmp <- getNewRegNat II32
1160 lbl <- getNewLabelNat
1161 dflags <- getDynFlagsNat
1162 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
1163 (tableReg,t_code) <- getSomeReg $ dynRef
1164 let code = e_code `appOL` t_code `appOL` toOL [
1165 SLW tmp reg (RIImm (ImmInt 2)),
1166 LD II32 tmp (AddrRegReg tableReg tmp),
1167 ADD tmp tmp (RIReg tableReg),
1174 (reg,e_code) <- getSomeReg expr
1175 tmp <- getNewRegNat II32
1176 lbl <- getNewLabelNat
1177 let code = e_code `appOL` toOL [
1178 SLW tmp reg (RIImm (ImmInt 2)),
1179 ADDIS tmp tmp (HA (ImmCLbl lbl)),
1180 LD II32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
1186 generateJumpTableForInstr :: Instr -> Maybe (NatCmmTop Instr)
1187 generateJumpTableForInstr (BCTR ids (Just lbl)) =
1189 | opt_PIC = map jumpTableEntryRel ids
1190 | otherwise = map jumpTableEntry ids
1191 where jumpTableEntryRel Nothing
1192 = CmmStaticLit (CmmInt 0 wordWidth)
1193 jumpTableEntryRel (Just blockid)
1194 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
1195 where blockLabel = mkAsmTempLabel (getUnique blockid)
1196 in Just (CmmData ReadOnlyData (CmmDataLabel lbl : jumpTable))
1197 generateJumpTableForInstr _ = Nothing
1199 -- -----------------------------------------------------------------------------
1200 -- 'condIntReg' and 'condFltReg': condition codes into registers
1202 -- Turn those condition codes into integers now (when they appear on
1203 -- the right hand side of an assignment).
1205 -- (If applicable) Do not fill the delay slots here; you will confuse the
1206 -- register allocator.
1208 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
1210 condReg :: NatM CondCode -> NatM Register
1211 condReg getCond = do
1212 CondCode _ cond cond_code <- getCond
1214 {- code dst = cond_code `appOL` toOL [
1223 code dst = cond_code
1227 RLWINM dst dst (bit + 1) 31 31
1230 negate_code | do_negate = unitOL (CRNOR bit bit bit)
1233 (bit, do_negate) = case cond of
1246 _ -> panic "PPC.CodeGen.codeReg: no match"
1248 return (Any II32 code)
1250 condIntReg cond x y = condReg (condIntCode cond x y)
1251 condFltReg cond x y = condReg (condFltCode cond x y)
1255 -- -----------------------------------------------------------------------------
1256 -- 'trivial*Code': deal with trivial instructions
1258 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
1259 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
1260 -- Only look for constants on the right hand side, because that's
1261 -- where the generic optimizer will have put them.
1263 -- Similarly, for unary instructions, we don't have to worry about
1264 -- matching an StInt as the argument, because genericOpt will already
1265 -- have handled the constant-folding.
1270 Wolfgang's PowerPC version of The Rules:
1272 A slightly modified version of The Rules to take advantage of the fact
1273 that PowerPC instructions work on all registers and don't implicitly
1274 clobber any fixed registers.
1276 * The only expression for which getRegister returns Fixed is (CmmReg reg).
1278 * If getRegister returns Any, then the code it generates may modify only:
1279 (a) fresh temporaries
1280 (b) the destination register
1281 It may *not* modify global registers, unless the global
1282 register happens to be the destination register.
1283 It may not clobber any other registers. In fact, only ccalls clobber any
1285 Also, it may not modify the counter register (used by genCCall).
1287 Corollary: If a getRegister for a subexpression returns Fixed, you need
1288 not move it to a fresh temporary before evaluating the next subexpression.
1289 The Fixed register won't be modified.
1290 Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
1292 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
1293 the value of the destination register.
1299 -> (Reg -> Reg -> RI -> Instr)
1304 trivialCode rep signed instr x (CmmLit (CmmInt y _))
1305 | Just imm <- makeImmediate rep signed y
1307 (src1, code1) <- getSomeReg x
1308 let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
1309 return (Any (intSize rep) code)
1311 trivialCode rep _ instr x y = do
1312 (src1, code1) <- getSomeReg x
1313 (src2, code2) <- getSomeReg y
1314 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
1315 return (Any (intSize rep) code)
1317 trivialCodeNoImm' :: Size -> (Reg -> Reg -> Reg -> Instr)
1318 -> CmmExpr -> CmmExpr -> NatM Register
1319 trivialCodeNoImm' size instr x y = do
1320 (src1, code1) <- getSomeReg x
1321 (src2, code2) <- getSomeReg y
1322 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
1323 return (Any size code)
1325 trivialCodeNoImm :: Size -> (Size -> Reg -> Reg -> Reg -> Instr)
1326 -> CmmExpr -> CmmExpr -> NatM Register
1327 trivialCodeNoImm size instr x y = trivialCodeNoImm' size (instr size) x y
1332 -> (Reg -> Reg -> Instr)
1335 trivialUCode rep instr x = do
1336 (src, code) <- getSomeReg x
1337 let code' dst = code `snocOL` instr dst src
1338 return (Any rep code')
1340 -- There is no "remainder" instruction on the PPC, so we have to do
1342 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
1344 remainderCode :: Width -> (Reg -> Reg -> Reg -> Instr)
1345 -> CmmExpr -> CmmExpr -> NatM Register
1346 remainderCode rep div x y = do
1347 (src1, code1) <- getSomeReg x
1348 (src2, code2) <- getSomeReg y
1349 let code dst = code1 `appOL` code2 `appOL` toOL [
1351 MULLW dst dst (RIReg src2),
1354 return (Any (intSize rep) code)
1357 coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
1358 coerceInt2FP fromRep toRep x = do
1359 (src, code) <- getSomeReg x
1360 lbl <- getNewLabelNat
1361 itmp <- getNewRegNat II32
1362 ftmp <- getNewRegNat FF64
1363 dflags <- getDynFlagsNat
1364 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
1365 Amode addr addr_code <- getAmode dynRef
1367 code' dst = code `appOL` maybe_exts `appOL` toOL [
1370 CmmStaticLit (CmmInt 0x43300000 W32),
1371 CmmStaticLit (CmmInt 0x80000000 W32)],
1372 XORIS itmp src (ImmInt 0x8000),
1373 ST II32 itmp (spRel 3),
1374 LIS itmp (ImmInt 0x4330),
1375 ST II32 itmp (spRel 2),
1376 LD FF64 ftmp (spRel 2)
1377 ] `appOL` addr_code `appOL` toOL [
1379 FSUB FF64 dst ftmp dst
1380 ] `appOL` maybe_frsp dst
1382 maybe_exts = case fromRep of
1383 W8 -> unitOL $ EXTS II8 src src
1384 W16 -> unitOL $ EXTS II16 src src
1386 _ -> panic "PPC.CodeGen.coerceInt2FP: no match"
1390 W32 -> unitOL $ FRSP dst dst
1392 _ -> panic "PPC.CodeGen.coerceInt2FP: no match"
1394 return (Any (floatSize toRep) code')
1396 coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
1397 coerceFP2Int _ toRep x = do
1398 -- the reps don't really matter: F*->FF64 and II32->I* are no-ops
1399 (src, code) <- getSomeReg x
1400 tmp <- getNewRegNat FF64
1402 code' dst = code `appOL` toOL [
1403 -- convert to int in FP reg
1405 -- store value (64bit) from FP to stack
1406 ST FF64 tmp (spRel 2),
1407 -- read low word of value (high word is undefined)
1408 LD II32 dst (spRel 3)]
1409 return (Any (intSize toRep) code')