2 -- The above warning supression flag is a temporary kludge.
3 -- While working on this module you are encouraged to remove it and fix
4 -- any warnings in the module. See
5 -- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
8 -----------------------------------------------------------------------------
10 -- Generating machine code (instruction selection)
12 -- (c) The University of Glasgow 1996-2004
14 -----------------------------------------------------------------------------
16 -- This is a big module, but, if you pay attention to
17 -- (a) the sectioning, (b) the type signatures, and
18 -- (c) the #if blah_TARGET_ARCH} things, the
19 -- structure should not be too overwhelming.
21 module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where
23 #include "HsVersions.h"
24 #include "nativeGen/NCG.h"
31 import PositionIndependentCode
32 import RegAllocInfo ( mkBranchInstr, mkRegRegMoveInstr )
35 -- Our intermediate code:
37 import PprCmm ( pprExpr )
40 import ClosureInfo ( C_SRT(..) )
43 import StaticFlags ( opt_PIC )
44 import ForeignCall ( CCallConv(..) )
47 import qualified Outputable as O
50 import FastBool ( isFastTrue )
51 import Constants ( wORD_SIZE )
53 import Debug.Trace ( trace )
55 import Control.Monad ( mapAndUnzipM )
56 import Data.Maybe ( fromJust )
62 -- -----------------------------------------------------------------------------
63 -- Top-level of the instruction selector
65 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
66 -- They are really trees of insns to facilitate fast appending, where a
67 -- left-to-right traversal (pre-order?) yields the insns in the correct
70 type InstrBlock = OrdList Instr
72 cmmTopCodeGen :: RawCmmTop -> NatM [NatCmmTop]
73 cmmTopCodeGen (CmmProc info lab params (ListGraph blocks)) = do
74 (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
75 picBaseMb <- getPicBaseMaybeNat
76 let proc = CmmProc info lab params (ListGraph $ concat nat_blocks)
77 tops = proc : concat statics
79 Just picBase -> initializePicBase picBase tops
80 Nothing -> return tops
82 cmmTopCodeGen (CmmData sec dat) = do
83 return [CmmData sec dat] -- no translation, we just use CmmStatic
85 basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop])
86 basicBlockCodeGen (BasicBlock id stmts) = do
87 instrs <- stmtsToInstrs stmts
88 -- code generation may introduce new basic block boundaries, which
89 -- are indicated by the NEWBLOCK instruction. We must split up the
90 -- instruction stream into basic blocks again. Also, we extract
93 (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
95 mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
96 = ([], BasicBlock id instrs : blocks, statics)
97 mkBlocks (LDATA sec dat) (instrs,blocks,statics)
98 = (instrs, blocks, CmmData sec dat:statics)
99 mkBlocks instr (instrs,blocks,statics)
100 = (instr:instrs, blocks, statics)
102 return (BasicBlock id top : other_blocks, statics)
104 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
106 = do instrss <- mapM stmtToInstrs stmts
107 return (concatOL instrss)
109 stmtToInstrs :: CmmStmt -> NatM InstrBlock
110 stmtToInstrs stmt = case stmt of
111 CmmNop -> return nilOL
112 CmmComment s -> return (unitOL (COMMENT s))
115 | isFloatType ty -> assignReg_FltCode size reg src
116 #if WORD_SIZE_IN_BITS==32
117 | isWord64 ty -> assignReg_I64Code reg src
119 | otherwise -> assignReg_IntCode size reg src
120 where ty = cmmRegType reg
121 size = cmmTypeSize ty
124 | isFloatType ty -> assignMem_FltCode size addr src
125 #if WORD_SIZE_IN_BITS==32
126 | isWord64 ty -> assignMem_I64Code addr src
128 | otherwise -> assignMem_IntCode size addr src
129 where ty = cmmExprType src
130 size = cmmTypeSize ty
132 CmmCall target result_regs args _ _
133 -> genCCall target result_regs args
135 CmmBranch id -> genBranch id
136 CmmCondBranch arg id -> genCondJump id arg
137 CmmSwitch arg ids -> genSwitch arg ids
138 CmmJump arg params -> genJump arg
140 panic "stmtToInstrs: return statement should have been cps'd away"
142 -- -----------------------------------------------------------------------------
143 -- General things for putting together code sequences
145 -- Expand CmmRegOff. ToDo: should we do it this way around, or convert
146 -- CmmExprs into CmmRegOff?
147 mangleIndexTree :: CmmExpr -> CmmExpr
148 mangleIndexTree (CmmRegOff reg off)
149 = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
150 where width = typeWidth (cmmRegType reg)
152 -- -----------------------------------------------------------------------------
153 -- Code gen for 64-bit arithmetic on 32-bit platforms
156 Simple support for generating 64-bit code (ie, 64 bit values and 64
157 bit assignments) on 32-bit platforms. Unlike the main code generator
158 we merely shoot for generating working code as simply as possible, and
159 pay little attention to code quality. Specifically, there is no
160 attempt to deal cleverly with the fixed-vs-floating register
161 distinction; all values are generated into (pairs of) floating
162 registers, even if this would mean some redundant reg-reg moves as a
163 result. Only one of the VRegUniques is returned, since it will be
164 of the VRegUniqueLo form, and the upper-half VReg can be determined
165 by applying getHiVRegFromLo to it.
168 data ChildCode64 -- a.k.a "Register64"
171 Reg -- the lower 32-bit temporary which contains the
172 -- result; use getHiVRegFromLo to find the other
173 -- VRegUnique. Rules of this simplified insn
174 -- selection game are therefore that the returned
175 -- Reg may be modified
177 #if WORD_SIZE_IN_BITS==32
178 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
179 assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
182 #ifndef x86_64_TARGET_ARCH
183 iselExpr64 :: CmmExpr -> NatM ChildCode64
186 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
190 assignMem_I64Code addrTree valueTree = do
191 Amode addr addr_code <- getAmode addrTree
192 ChildCode64 vcode rlo <- iselExpr64 valueTree
194 rhi = getHiVRegFromLo rlo
196 -- Little-endian store
197 mov_lo = MOV II32 (OpReg rlo) (OpAddr addr)
198 mov_hi = MOV II32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
200 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
203 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
204 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
206 r_dst_lo = mkVReg u_dst II32
207 r_dst_hi = getHiVRegFromLo r_dst_lo
208 r_src_hi = getHiVRegFromLo r_src_lo
209 mov_lo = MOV II32 (OpReg r_src_lo) (OpReg r_dst_lo)
210 mov_hi = MOV II32 (OpReg r_src_hi) (OpReg r_dst_hi)
213 vcode `snocOL` mov_lo `snocOL` mov_hi
216 assignReg_I64Code lvalue valueTree
217 = panic "assignReg_I64Code(i386): invalid lvalue"
221 iselExpr64 (CmmLit (CmmInt i _)) = do
222 (rlo,rhi) <- getNewRegPairNat II32
224 r = fromIntegral (fromIntegral i :: Word32)
225 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
227 MOV II32 (OpImm (ImmInteger r)) (OpReg rlo),
228 MOV II32 (OpImm (ImmInteger q)) (OpReg rhi)
231 return (ChildCode64 code rlo)
233 iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
234 Amode addr addr_code <- getAmode addrTree
235 (rlo,rhi) <- getNewRegPairNat II32
237 mov_lo = MOV II32 (OpAddr addr) (OpReg rlo)
238 mov_hi = MOV II32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
241 ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
245 iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
246 = return (ChildCode64 nilOL (mkVReg vu II32))
248 -- we handle addition, but rather badly
249 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
250 ChildCode64 code1 r1lo <- iselExpr64 e1
251 (rlo,rhi) <- getNewRegPairNat II32
253 r = fromIntegral (fromIntegral i :: Word32)
254 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
255 r1hi = getHiVRegFromLo r1lo
257 toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
258 ADD II32 (OpImm (ImmInteger r)) (OpReg rlo),
259 MOV II32 (OpReg r1hi) (OpReg rhi),
260 ADC II32 (OpImm (ImmInteger q)) (OpReg rhi) ]
262 return (ChildCode64 code rlo)
264 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
265 ChildCode64 code1 r1lo <- iselExpr64 e1
266 ChildCode64 code2 r2lo <- iselExpr64 e2
267 (rlo,rhi) <- getNewRegPairNat II32
269 r1hi = getHiVRegFromLo r1lo
270 r2hi = getHiVRegFromLo r2lo
273 toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
274 ADD II32 (OpReg r2lo) (OpReg rlo),
275 MOV II32 (OpReg r1hi) (OpReg rhi),
276 ADC II32 (OpReg r2hi) (OpReg rhi) ]
278 return (ChildCode64 code rlo)
280 iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do
282 r_dst_lo <- getNewRegNat II32
283 let r_dst_hi = getHiVRegFromLo r_dst_lo
286 ChildCode64 (code `snocOL`
287 MOV II32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
292 = pprPanic "iselExpr64(i386)" (ppr expr)
294 #endif /* i386_TARGET_ARCH */
296 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
298 #if sparc_TARGET_ARCH
300 assignMem_I64Code addrTree valueTree = do
301 Amode addr addr_code <- getAmode addrTree
302 ChildCode64 vcode rlo <- iselExpr64 valueTree
303 (src, code) <- getSomeReg addrTree
305 rhi = getHiVRegFromLo rlo
307 mov_hi = ST II32 rhi (AddrRegImm src (ImmInt 0))
308 mov_lo = ST II32 rlo (AddrRegImm src (ImmInt 4))
309 return (vcode `appOL` code `snocOL` mov_hi `snocOL` mov_lo)
311 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
312 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
314 r_dst_lo = mkVReg u_dst (cmmTypeSize pk)
315 r_dst_hi = getHiVRegFromLo r_dst_lo
316 r_src_hi = getHiVRegFromLo r_src_lo
317 mov_lo = mkMOV r_src_lo r_dst_lo
318 mov_hi = mkMOV r_src_hi r_dst_hi
319 mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
320 return (vcode `snocOL` mov_hi `snocOL` mov_lo)
321 assignReg_I64Code lvalue valueTree
322 = panic "assignReg_I64Code(sparc): invalid lvalue"
325 -- Load a 64 bit word
326 iselExpr64 (CmmLoad addrTree ty)
328 = do Amode amode addr_code <- getAmode addrTree
331 | AddrRegReg r1 r2 <- amode
332 = do rlo <- getNewRegNat II32
333 tmp <- getNewRegNat II32
334 let rhi = getHiVRegFromLo rlo
339 [ ADD False False r1 (RIReg r2) tmp
340 , LD II32 (AddrRegImm tmp (ImmInt 0)) rhi
341 , LD II32 (AddrRegImm tmp (ImmInt 4)) rlo ])
344 | AddrRegImm r1 (ImmInt i) <- amode
345 = do rlo <- getNewRegNat II32
346 let rhi = getHiVRegFromLo rlo
351 [ LD II32 (AddrRegImm r1 (ImmInt $ 0 + i)) rhi
352 , LD II32 (AddrRegImm r1 (ImmInt $ 4 + i)) rlo ])
358 -- Add a literal to a 64 bit integer
359 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)])
360 = do ChildCode64 code1 r1_lo <- iselExpr64 e1
361 let r1_hi = getHiVRegFromLo r1_lo
363 r_dst_lo <- getNewRegNat II32
364 let r_dst_hi = getHiVRegFromLo r_dst_lo
368 [ ADD False False r1_lo (RIImm (ImmInteger i)) r_dst_lo
369 , ADD True False r1_hi (RIReg g0) r_dst_hi ])
374 iselExpr64 (CmmMachOp (MO_Add width) [e1, e2])
375 = do ChildCode64 code1 r1_lo <- iselExpr64 e1
376 let r1_hi = getHiVRegFromLo r1_lo
378 ChildCode64 code2 r2_lo <- iselExpr64 e2
379 let r2_hi = getHiVRegFromLo r2_lo
381 r_dst_lo <- getNewRegNat II32
382 let r_dst_hi = getHiVRegFromLo r_dst_lo
387 [ ADD False False r1_lo (RIReg r2_lo) r_dst_lo
388 , ADD True False r1_hi (RIReg r2_hi) r_dst_hi ]
390 return $ ChildCode64 code r_dst_lo
393 iselExpr64 (CmmReg (CmmLocal (LocalReg uq ty))) | isWord64 ty = do
394 r_dst_lo <- getNewRegNat II32
395 let r_dst_hi = getHiVRegFromLo r_dst_lo
396 r_src_lo = mkVReg uq II32
397 r_src_hi = getHiVRegFromLo r_src_lo
398 mov_lo = mkMOV r_src_lo r_dst_lo
399 mov_hi = mkMOV r_src_hi r_dst_hi
400 mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
402 ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo
405 -- Convert something into II64
406 iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr])
408 r_dst_lo <- getNewRegNat II32
409 let r_dst_hi = getHiVRegFromLo r_dst_lo
411 -- compute expr and load it into r_dst_lo
412 (a_reg, a_code) <- getSomeReg expr
416 [ mkRegRegMoveInstr g0 r_dst_hi -- clear high 32 bits
417 , mkRegRegMoveInstr a_reg r_dst_lo ]
419 return $ ChildCode64 code r_dst_lo
423 = pprPanic "iselExpr64(sparc)" (ppr expr)
425 #endif /* sparc_TARGET_ARCH */
427 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
429 #if powerpc_TARGET_ARCH
431 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
432 getI64Amodes addrTree = do
433 Amode hi_addr addr_code <- getAmode addrTree
434 case addrOffset hi_addr 4 of
435 Just lo_addr -> return (hi_addr, lo_addr, addr_code)
436 Nothing -> do (hi_ptr, code) <- getSomeReg addrTree
437 return (AddrRegImm hi_ptr (ImmInt 0),
438 AddrRegImm hi_ptr (ImmInt 4),
441 assignMem_I64Code addrTree valueTree = do
442 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
443 ChildCode64 vcode rlo <- iselExpr64 valueTree
445 rhi = getHiVRegFromLo rlo
448 mov_hi = ST II32 rhi hi_addr
449 mov_lo = ST II32 rlo lo_addr
451 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
453 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
454 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
456 r_dst_lo = mkVReg u_dst II32
457 r_dst_hi = getHiVRegFromLo r_dst_lo
458 r_src_hi = getHiVRegFromLo r_src_lo
459 mov_lo = MR r_dst_lo r_src_lo
460 mov_hi = MR r_dst_hi r_src_hi
463 vcode `snocOL` mov_lo `snocOL` mov_hi
466 assignReg_I64Code lvalue valueTree
467 = panic "assignReg_I64Code(powerpc): invalid lvalue"
470 -- Don't delete this -- it's very handy for debugging.
472 -- | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
473 -- = panic "iselExpr64(???)"
475 iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
476 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
477 (rlo, rhi) <- getNewRegPairNat II32
478 let mov_hi = LD II32 rhi hi_addr
479 mov_lo = LD II32 rlo lo_addr
480 return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
483 iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
484 = return (ChildCode64 nilOL (mkVReg vu II32))
486 iselExpr64 (CmmLit (CmmInt i _)) = do
487 (rlo,rhi) <- getNewRegPairNat II32
489 half0 = fromIntegral (fromIntegral i :: Word16)
490 half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
491 half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
492 half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
495 LIS rlo (ImmInt half1),
496 OR rlo rlo (RIImm $ ImmInt half0),
497 LIS rhi (ImmInt half3),
498 OR rlo rlo (RIImm $ ImmInt half2)
501 return (ChildCode64 code rlo)
503 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
504 ChildCode64 code1 r1lo <- iselExpr64 e1
505 ChildCode64 code2 r2lo <- iselExpr64 e2
506 (rlo,rhi) <- getNewRegPairNat II32
508 r1hi = getHiVRegFromLo r1lo
509 r2hi = getHiVRegFromLo r2lo
512 toOL [ ADDC rlo r1lo r2lo,
515 return (ChildCode64 code rlo)
517 iselExpr64 (CmmMachOp (MO_UU_Conv W32 W64) [expr]) = do
518 (expr_reg,expr_code) <- getSomeReg expr
519 (rlo, rhi) <- getNewRegPairNat II32
520 let mov_hi = LI rhi (ImmInt 0)
521 mov_lo = MR rlo expr_reg
522 return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
525 = pprPanic "iselExpr64(powerpc)" (ppr expr)
527 #endif /* powerpc_TARGET_ARCH */
530 -- -----------------------------------------------------------------------------
531 -- The 'Register' type
533 -- 'Register's passed up the tree. If the stix code forces the register
534 -- to live in a pre-decided machine register, it comes out as @Fixed@;
535 -- otherwise, it comes out as @Any@, and the parent can decide which
536 -- register to put it in.
539 = Fixed Size Reg InstrBlock
540 | Any Size (Reg -> InstrBlock)
542 swizzleRegisterRep :: Register -> Size -> Register
543 -- Change the width; it's a no-op
544 swizzleRegisterRep (Fixed _ reg code) size = Fixed size reg code
545 swizzleRegisterRep (Any _ codefn) size = Any size codefn
548 -- -----------------------------------------------------------------------------
549 -- Utils based on getRegister, below
551 -- The dual to getAnyReg: compute an expression into a register, but
552 -- we don't mind which one it is.
553 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
555 r <- getRegister expr
558 tmp <- getNewRegNat rep
559 return (tmp, code tmp)
563 -- -----------------------------------------------------------------------------
564 -- Grab the Reg for a CmmReg
566 getRegisterReg :: CmmReg -> Reg
568 getRegisterReg (CmmLocal (LocalReg u pk))
569 = mkVReg u (cmmTypeSize pk)
571 getRegisterReg (CmmGlobal mid)
572 = case get_GlobalReg_reg_or_addr mid of
573 Left (RealReg rrno) -> RealReg rrno
574 _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
575 -- By this stage, the only MagicIds remaining should be the
576 -- ones which map to a real machine register on this
577 -- platform. Hence ...
580 -- -----------------------------------------------------------------------------
581 -- Generate code to get a subtree into a Register
583 -- Don't delete this -- it's very handy for debugging.
585 -- | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False
586 -- = panic "getRegister(???)"
588 getRegister :: CmmExpr -> NatM Register
590 #if !x86_64_TARGET_ARCH
591 -- on x86_64, we have %rip for PicBaseReg, but it's not a full-featured
592 -- register, it can only be used for rip-relative addressing.
593 getRegister (CmmReg (CmmGlobal PicBaseReg))
595 reg <- getPicBaseNat wordSize
596 return (Fixed wordSize reg nilOL)
599 getRegister (CmmReg reg)
600 = return (Fixed (cmmTypeSize (cmmRegType reg))
601 (getRegisterReg reg) nilOL)
603 getRegister tree@(CmmRegOff _ _)
604 = getRegister (mangleIndexTree tree)
607 #if WORD_SIZE_IN_BITS==32
608 -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
609 -- TO_W_(x), TO_W_(x >> 32)
611 getRegister (CmmMachOp (MO_UU_Conv W64 W32)
612 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
613 ChildCode64 code rlo <- iselExpr64 x
614 return $ Fixed II32 (getHiVRegFromLo rlo) code
616 getRegister (CmmMachOp (MO_SS_Conv W64 W32)
617 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
618 ChildCode64 code rlo <- iselExpr64 x
619 return $ Fixed II32 (getHiVRegFromLo rlo) code
621 getRegister (CmmMachOp (MO_UU_Conv W64 W32) [x]) = do
622 ChildCode64 code rlo <- iselExpr64 x
623 return $ Fixed II32 rlo code
625 getRegister (CmmMachOp (MO_SS_Conv W64 W32) [x]) = do
626 ChildCode64 code rlo <- iselExpr64 x
627 return $ Fixed II32 rlo code
631 -- end of machine-"independent" bit; here we go on the rest...
633 #if alpha_TARGET_ARCH
635 getRegister (StDouble d)
636 = getBlockIdNat `thenNat` \ lbl ->
637 getNewRegNat PtrRep `thenNat` \ tmp ->
638 let code dst = mkSeqInstrs [
639 LDATA RoDataSegment lbl [
640 DATA TF [ImmLab (rational d)]
642 LDA tmp (AddrImm (ImmCLbl lbl)),
643 LD TF dst (AddrReg tmp)]
645 return (Any FF64 code)
647 getRegister (StPrim primop [x]) -- unary PrimOps
649 IntNegOp -> trivialUCode (NEG Q False) x
651 NotOp -> trivialUCode NOT x
653 FloatNegOp -> trivialUFCode FloatRep (FNEG TF) x
654 DoubleNegOp -> trivialUFCode FF64 (FNEG TF) x
656 OrdOp -> coerceIntCode IntRep x
659 Float2IntOp -> coerceFP2Int x
660 Int2FloatOp -> coerceInt2FP pr x
661 Double2IntOp -> coerceFP2Int x
662 Int2DoubleOp -> coerceInt2FP pr x
664 Double2FloatOp -> coerceFltCode x
665 Float2DoubleOp -> coerceFltCode x
667 other_op -> getRegister (StCall fn CCallConv FF64 [x])
669 fn = case other_op of
670 FloatExpOp -> fsLit "exp"
671 FloatLogOp -> fsLit "log"
672 FloatSqrtOp -> fsLit "sqrt"
673 FloatSinOp -> fsLit "sin"
674 FloatCosOp -> fsLit "cos"
675 FloatTanOp -> fsLit "tan"
676 FloatAsinOp -> fsLit "asin"
677 FloatAcosOp -> fsLit "acos"
678 FloatAtanOp -> fsLit "atan"
679 FloatSinhOp -> fsLit "sinh"
680 FloatCoshOp -> fsLit "cosh"
681 FloatTanhOp -> fsLit "tanh"
682 DoubleExpOp -> fsLit "exp"
683 DoubleLogOp -> fsLit "log"
684 DoubleSqrtOp -> fsLit "sqrt"
685 DoubleSinOp -> fsLit "sin"
686 DoubleCosOp -> fsLit "cos"
687 DoubleTanOp -> fsLit "tan"
688 DoubleAsinOp -> fsLit "asin"
689 DoubleAcosOp -> fsLit "acos"
690 DoubleAtanOp -> fsLit "atan"
691 DoubleSinhOp -> fsLit "sinh"
692 DoubleCoshOp -> fsLit "cosh"
693 DoubleTanhOp -> fsLit "tanh"
695 pr = panic "MachCode.getRegister: no primrep needed for Alpha"
697 getRegister (StPrim primop [x, y]) -- dyadic PrimOps
699 CharGtOp -> trivialCode (CMP LTT) y x
700 CharGeOp -> trivialCode (CMP LE) y x
701 CharEqOp -> trivialCode (CMP EQQ) x y
702 CharNeOp -> int_NE_code x y
703 CharLtOp -> trivialCode (CMP LTT) x y
704 CharLeOp -> trivialCode (CMP LE) x y
706 IntGtOp -> trivialCode (CMP LTT) y x
707 IntGeOp -> trivialCode (CMP LE) y x
708 IntEqOp -> trivialCode (CMP EQQ) x y
709 IntNeOp -> int_NE_code x y
710 IntLtOp -> trivialCode (CMP LTT) x y
711 IntLeOp -> trivialCode (CMP LE) x y
713 WordGtOp -> trivialCode (CMP ULT) y x
714 WordGeOp -> trivialCode (CMP ULE) x y
715 WordEqOp -> trivialCode (CMP EQQ) x y
716 WordNeOp -> int_NE_code x y
717 WordLtOp -> trivialCode (CMP ULT) x y
718 WordLeOp -> trivialCode (CMP ULE) x y
720 AddrGtOp -> trivialCode (CMP ULT) y x
721 AddrGeOp -> trivialCode (CMP ULE) y x
722 AddrEqOp -> trivialCode (CMP EQQ) x y
723 AddrNeOp -> int_NE_code x y
724 AddrLtOp -> trivialCode (CMP ULT) x y
725 AddrLeOp -> trivialCode (CMP ULE) x y
727 FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
728 FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
729 FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
730 FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
731 FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
732 FloatLeOp -> cmpF_code (FCMP TF LE) NE x y
734 DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
735 DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
736 DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
737 DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
738 DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
739 DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y
741 IntAddOp -> trivialCode (ADD Q False) x y
742 IntSubOp -> trivialCode (SUB Q False) x y
743 IntMulOp -> trivialCode (MUL Q False) x y
744 IntQuotOp -> trivialCode (DIV Q False) x y
745 IntRemOp -> trivialCode (REM Q False) x y
747 WordAddOp -> trivialCode (ADD Q False) x y
748 WordSubOp -> trivialCode (SUB Q False) x y
749 WordMulOp -> trivialCode (MUL Q False) x y
750 WordQuotOp -> trivialCode (DIV Q True) x y
751 WordRemOp -> trivialCode (REM Q True) x y
753 FloatAddOp -> trivialFCode W32 (FADD TF) x y
754 FloatSubOp -> trivialFCode W32 (FSUB TF) x y
755 FloatMulOp -> trivialFCode W32 (FMUL TF) x y
756 FloatDivOp -> trivialFCode W32 (FDIV TF) x y
758 DoubleAddOp -> trivialFCode W64 (FADD TF) x y
759 DoubleSubOp -> trivialFCode W64 (FSUB TF) x y
760 DoubleMulOp -> trivialFCode W64 (FMUL TF) x y
761 DoubleDivOp -> trivialFCode W64 (FDIV TF) x y
763 AddrAddOp -> trivialCode (ADD Q False) x y
764 AddrSubOp -> trivialCode (SUB Q False) x y
765 AddrRemOp -> trivialCode (REM Q True) x y
767 AndOp -> trivialCode AND x y
768 OrOp -> trivialCode OR x y
769 XorOp -> trivialCode XOR x y
770 SllOp -> trivialCode SLL x y
771 SrlOp -> trivialCode SRL x y
773 ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
774 ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
775 ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"
777 FloatPowerOp -> getRegister (StCall (fsLit "pow") CCallConv FF64 [x,y])
778 DoublePowerOp -> getRegister (StCall (fsLit "pow") CCallConv FF64 [x,y])
780 {- ------------------------------------------------------------
781 Some bizarre special code for getting condition codes into
782 registers. Integer non-equality is a test for equality
783 followed by an XOR with 1. (Integer comparisons always set
784 the result register to 0 or 1.) Floating point comparisons of
785 any kind leave the result in a floating point register, so we
786 need to wrangle an integer register out of things.
788 int_NE_code :: StixTree -> StixTree -> NatM Register
791 = trivialCode (CMP EQQ) x y `thenNat` \ register ->
792 getNewRegNat IntRep `thenNat` \ tmp ->
794 code = registerCode register tmp
795 src = registerName register tmp
796 code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
798 return (Any IntRep code__2)
800 {- ------------------------------------------------------------
801 Comments for int_NE_code also apply to cmpF_code
804 :: (Reg -> Reg -> Reg -> Instr)
806 -> StixTree -> StixTree
809 cmpF_code instr cond x y
810 = trivialFCode pr instr x y `thenNat` \ register ->
811 getNewRegNat FF64 `thenNat` \ tmp ->
812 getBlockIdNat `thenNat` \ lbl ->
814 code = registerCode register tmp
815 result = registerName register tmp
817 code__2 dst = code . mkSeqInstrs [
818 OR zeroh (RIImm (ImmInt 1)) dst,
819 BF cond result (ImmCLbl lbl),
820 OR zeroh (RIReg zeroh) dst,
823 return (Any IntRep code__2)
825 pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
826 ------------------------------------------------------------
828 getRegister (CmmLoad pk mem)
829 = getAmode mem `thenNat` \ amode ->
831 code = amodeCode amode
832 src = amodeAddr amode
833 size = primRepToSize pk
834 code__2 dst = code . mkSeqInstr (LD size dst src)
836 return (Any pk code__2)
838 getRegister (StInt i)
841 code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
843 return (Any IntRep code)
846 code dst = mkSeqInstr (LDI Q dst src)
848 return (Any IntRep code)
850 src = ImmInt (fromInteger i)
855 code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
857 return (Any PtrRep code)
860 imm__2 = case imm of Just x -> x
862 #endif /* alpha_TARGET_ARCH */
864 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
868 getRegister (CmmLit (CmmFloat f W32)) = do
869 lbl <- getNewLabelNat
870 dflags <- getDynFlagsNat
871 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
872 Amode addr addr_code <- getAmode dynRef
876 CmmStaticLit (CmmFloat f W32)]
877 `consOL` (addr_code `snocOL`
880 return (Any FF32 code)
883 getRegister (CmmLit (CmmFloat d W64))
885 = let code dst = unitOL (GLDZ dst)
886 in return (Any FF64 code)
889 = let code dst = unitOL (GLD1 dst)
890 in return (Any FF64 code)
893 lbl <- getNewLabelNat
894 dflags <- getDynFlagsNat
895 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
896 Amode addr addr_code <- getAmode dynRef
900 CmmStaticLit (CmmFloat d W64)]
901 `consOL` (addr_code `snocOL`
904 return (Any FF64 code)
906 #endif /* i386_TARGET_ARCH */
908 #if x86_64_TARGET_ARCH
910 getRegister (CmmLit (CmmFloat 0.0 w)) = do
911 let size = floatSize w
912 code dst = unitOL (XOR size (OpReg dst) (OpReg dst))
913 -- I don't know why there are xorpd, xorps, and pxor instructions.
914 -- They all appear to do the same thing --SDM
915 return (Any size code)
917 getRegister (CmmLit (CmmFloat f w)) = do
918 lbl <- getNewLabelNat
919 let code dst = toOL [
922 CmmStaticLit (CmmFloat f w)],
923 MOV size (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
926 return (Any size code)
927 where size = floatSize w
929 #endif /* x86_64_TARGET_ARCH */
931 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
933 -- catch simple cases of zero- or sign-extended load
934 getRegister (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _]) = do
935 code <- intLoadCode (MOVZxL II8) addr
936 return (Any II32 code)
938 getRegister (CmmMachOp (MO_SS_Conv W8 W32) [CmmLoad addr _]) = do
939 code <- intLoadCode (MOVSxL II8) addr
940 return (Any II32 code)
942 getRegister (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad addr _]) = do
943 code <- intLoadCode (MOVZxL II16) addr
944 return (Any II32 code)
946 getRegister (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad addr _]) = do
947 code <- intLoadCode (MOVSxL II16) addr
948 return (Any II32 code)
952 #if x86_64_TARGET_ARCH
954 -- catch simple cases of zero- or sign-extended load
955 getRegister (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad addr _]) = do
956 code <- intLoadCode (MOVZxL II8) addr
957 return (Any II64 code)
959 getRegister (CmmMachOp (MO_SS_Conv W8 W64) [CmmLoad addr _]) = do
960 code <- intLoadCode (MOVSxL II8) addr
961 return (Any II64 code)
963 getRegister (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad addr _]) = do
964 code <- intLoadCode (MOVZxL II16) addr
965 return (Any II64 code)
967 getRegister (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad addr _]) = do
968 code <- intLoadCode (MOVSxL II16) addr
969 return (Any II64 code)
971 getRegister (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad addr _]) = do
972 code <- intLoadCode (MOV II32) addr -- 32-bit loads zero-extend
973 return (Any II64 code)
975 getRegister (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad addr _]) = do
976 code <- intLoadCode (MOVSxL II32) addr
977 return (Any II64 code)
981 #if x86_64_TARGET_ARCH
982 getRegister (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
983 CmmLit displacement])
984 = return $ Any II64 (\dst -> unitOL $
985 LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
988 #if x86_64_TARGET_ARCH
989 getRegister (CmmMachOp (MO_F_Neg W32) [x]) = do
990 x_code <- getAnyReg x
991 lbl <- getNewLabelNat
993 code dst = x_code dst `appOL` toOL [
994 -- This is how gcc does it, so it can't be that bad:
995 LDATA ReadOnlyData16 [
998 CmmStaticLit (CmmInt 0x80000000 W32),
999 CmmStaticLit (CmmInt 0 W32),
1000 CmmStaticLit (CmmInt 0 W32),
1001 CmmStaticLit (CmmInt 0 W32)
1003 XOR FF32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
1004 -- xorps, so we need the 128-bit constant
1005 -- ToDo: rip-relative
1008 return (Any FF32 code)
1010 getRegister (CmmMachOp (MO_F_Neg W64) [x]) = do
1011 x_code <- getAnyReg x
1012 lbl <- getNewLabelNat
1014 -- This is how gcc does it, so it can't be that bad:
1015 code dst = x_code dst `appOL` toOL [
1016 LDATA ReadOnlyData16 [
1019 CmmStaticLit (CmmInt 0x8000000000000000 W64),
1020 CmmStaticLit (CmmInt 0 W64)
1022 -- gcc puts an unpck here. Wonder if we need it.
1023 XOR FF64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
1024 -- xorpd, so we need the 128-bit constant
1027 return (Any FF64 code)
1030 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1032 getRegister (CmmMachOp mop [x]) -- unary MachOps
1034 #if i386_TARGET_ARCH
1035 MO_F_Neg W32 -> trivialUFCode FF32 (GNEG FF32) x
1036 MO_F_Neg W64 -> trivialUFCode FF64 (GNEG FF64) x
1039 MO_S_Neg w -> triv_ucode NEGI (intSize w)
1040 MO_F_Neg w -> triv_ucode NEGI (floatSize w)
1041 MO_Not w -> triv_ucode NOT (intSize w)
1044 MO_UU_Conv W32 W8 -> toI8Reg W32 x
1045 MO_SS_Conv W32 W8 -> toI8Reg W32 x
1046 MO_UU_Conv W16 W8 -> toI8Reg W16 x
1047 MO_SS_Conv W16 W8 -> toI8Reg W16 x
1048 MO_UU_Conv W32 W16 -> toI16Reg W32 x
1049 MO_SS_Conv W32 W16 -> toI16Reg W32 x
1051 #if x86_64_TARGET_ARCH
1052 MO_UU_Conv W64 W32 -> conversionNop II64 x
1053 MO_SS_Conv W64 W32 -> conversionNop II64 x
1054 MO_UU_Conv W64 W16 -> toI16Reg W64 x
1055 MO_SS_Conv W64 W16 -> toI16Reg W64 x
1056 MO_UU_Conv W64 W8 -> toI8Reg W64 x
1057 MO_SS_Conv W64 W8 -> toI8Reg W64 x
1060 MO_UU_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
1061 MO_SS_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
1064 MO_UU_Conv W8 W32 -> integerExtend W8 W32 MOVZxL x
1065 MO_UU_Conv W16 W32 -> integerExtend W16 W32 MOVZxL x
1066 MO_UU_Conv W8 W16 -> integerExtend W8 W16 MOVZxL x
1068 MO_SS_Conv W8 W32 -> integerExtend W8 W32 MOVSxL x
1069 MO_SS_Conv W16 W32 -> integerExtend W16 W32 MOVSxL x
1070 MO_SS_Conv W8 W16 -> integerExtend W8 W16 MOVSxL x
1072 #if x86_64_TARGET_ARCH
1073 MO_UU_Conv W8 W64 -> integerExtend W8 W64 MOVZxL x
1074 MO_UU_Conv W16 W64 -> integerExtend W16 W64 MOVZxL x
1075 MO_UU_Conv W32 W64 -> integerExtend W32 W64 MOVZxL x
1076 MO_SS_Conv W8 W64 -> integerExtend W8 W64 MOVSxL x
1077 MO_SS_Conv W16 W64 -> integerExtend W16 W64 MOVSxL x
1078 MO_SS_Conv W32 W64 -> integerExtend W32 W64 MOVSxL x
1079 -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
1080 -- However, we don't want the register allocator to throw it
1081 -- away as an unnecessary reg-to-reg move, so we keep it in
1082 -- the form of a movzl and print it as a movl later.
1085 #if i386_TARGET_ARCH
1086 MO_FF_Conv W32 W64 -> conversionNop FF64 x
1087 MO_FF_Conv W64 W32 -> conversionNop FF32 x
1089 MO_FF_Conv W32 W64 -> coerceFP2FP W64 x
1090 MO_FF_Conv W64 W32 -> coerceFP2FP W32 x
1093 MO_FS_Conv from to -> coerceFP2Int from to x
1094 MO_SF_Conv from to -> coerceInt2FP from to x
1096 other -> pprPanic "getRegister" (pprMachOp mop)
1098 triv_ucode :: (Size -> Operand -> Instr) -> Size -> NatM Register
1099 triv_ucode instr size = trivialUCode size (instr size) x
1101 -- signed or unsigned extension.
1102 integerExtend :: Width -> Width
1103 -> (Size -> Operand -> Operand -> Instr)
1104 -> CmmExpr -> NatM Register
1105 integerExtend from to instr expr = do
1106 (reg,e_code) <- if from == W8 then getByteReg expr
1107 else getSomeReg expr
1111 instr (intSize from) (OpReg reg) (OpReg dst)
1112 return (Any (intSize to) code)
1114 toI8Reg :: Width -> CmmExpr -> NatM Register
1115 toI8Reg new_rep expr
1116 = do codefn <- getAnyReg expr
1117 return (Any (intSize new_rep) codefn)
1118 -- HACK: use getAnyReg to get a byte-addressable register.
1119 -- If the source was a Fixed register, this will add the
1120 -- mov instruction to put it into the desired destination.
1121 -- We're assuming that the destination won't be a fixed
1122 -- non-byte-addressable register; it won't be, because all
1123 -- fixed registers are word-sized.
1125 toI16Reg = toI8Reg -- for now
1127 conversionNop :: Size -> CmmExpr -> NatM Register
1128 conversionNop new_size expr
1129 = do e_code <- getRegister expr
1130 return (swizzleRegisterRep e_code new_size)
1133 getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
1135 MO_F_Eq w -> condFltReg EQQ x y
1136 MO_F_Ne w -> condFltReg NE x y
1137 MO_F_Gt w -> condFltReg GTT x y
1138 MO_F_Ge w -> condFltReg GE x y
1139 MO_F_Lt w -> condFltReg LTT x y
1140 MO_F_Le w -> condFltReg LE x y
1142 MO_Eq rep -> condIntReg EQQ x y
1143 MO_Ne rep -> condIntReg NE x y
1145 MO_S_Gt rep -> condIntReg GTT x y
1146 MO_S_Ge rep -> condIntReg GE x y
1147 MO_S_Lt rep -> condIntReg LTT x y
1148 MO_S_Le rep -> condIntReg LE x y
1150 MO_U_Gt rep -> condIntReg GU x y
1151 MO_U_Ge rep -> condIntReg GEU x y
1152 MO_U_Lt rep -> condIntReg LU x y
1153 MO_U_Le rep -> condIntReg LEU x y
1155 #if i386_TARGET_ARCH
1156 MO_F_Add w -> trivialFCode w GADD x y
1157 MO_F_Sub w -> trivialFCode w GSUB x y
1158 MO_F_Quot w -> trivialFCode w GDIV x y
1159 MO_F_Mul w -> trivialFCode w GMUL x y
1162 #if x86_64_TARGET_ARCH
1163 MO_F_Add w -> trivialFCode w ADD x y
1164 MO_F_Sub w -> trivialFCode w SUB x y
1165 MO_F_Quot w -> trivialFCode w FDIV x y
1166 MO_F_Mul w -> trivialFCode w MUL x y
1169 MO_Add rep -> add_code rep x y
1170 MO_Sub rep -> sub_code rep x y
1172 MO_S_Quot rep -> div_code rep True True x y
1173 MO_S_Rem rep -> div_code rep True False x y
1174 MO_U_Quot rep -> div_code rep False True x y
1175 MO_U_Rem rep -> div_code rep False False x y
1177 MO_S_MulMayOflo rep -> imulMayOflo rep x y
1179 MO_Mul rep -> triv_op rep IMUL
1180 MO_And rep -> triv_op rep AND
1181 MO_Or rep -> triv_op rep OR
1182 MO_Xor rep -> triv_op rep XOR
1184 {- Shift ops on x86s have constraints on their source, it
1185 either has to be Imm, CL or 1
1186 => trivialCode is not restrictive enough (sigh.)
1188 MO_Shl rep -> shift_code rep SHL x y {-False-}
1189 MO_U_Shr rep -> shift_code rep SHR x y {-False-}
1190 MO_S_Shr rep -> shift_code rep SAR x y {-False-}
1192 other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
1194 --------------------
1195 triv_op width instr = trivialCode width op (Just op) x y
1196 where op = instr (intSize width)
1198 imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
1199 imulMayOflo rep a b = do
1200 (a_reg, a_code) <- getNonClobberedReg a
1201 b_code <- getAnyReg b
1203 shift_amt = case rep of
1206 _ -> panic "shift_amt"
1209 code = a_code `appOL` b_code eax `appOL`
1211 IMUL2 size (OpReg a_reg), -- result in %edx:%eax
1212 SAR size (OpImm (ImmInt shift_amt)) (OpReg eax),
1213 -- sign extend lower part
1214 SUB size (OpReg edx) (OpReg eax)
1215 -- compare against upper
1216 -- eax==0 if high part == sign extended low part
1219 return (Fixed size eax code)
1221 --------------------
1223 -> (Size -> Operand -> Operand -> Instr)
1228 {- Case1: shift length as immediate -}
1229 shift_code width instr x y@(CmmLit lit) = do
1230 x_code <- getAnyReg x
1232 size = intSize width
1234 = x_code dst `snocOL`
1235 instr size (OpImm (litToImm lit)) (OpReg dst)
1237 return (Any size code)
1239 {- Case2: shift length is complex (non-immediate)
1240 * y must go in %ecx.
1241 * we cannot do y first *and* put its result in %ecx, because
1242 %ecx might be clobbered by x.
1243 * if we do y second, then x cannot be
1244 in a clobbered reg. Also, we cannot clobber x's reg
1245 with the instruction itself.
1247 - do y first, put its result in a fresh tmp, then copy it to %ecx later
1248 - do y second and put its result into %ecx. x gets placed in a fresh
1249 tmp. This is likely to be better, becuase the reg alloc can
1250 eliminate this reg->reg move here (it won't eliminate the other one,
1251 because the move is into the fixed %ecx).
1253 shift_code width instr x y{-amount-} = do
1254 x_code <- getAnyReg x
1255 let size = intSize width
1256 tmp <- getNewRegNat size
1257 y_code <- getAnyReg y
1259 code = x_code tmp `appOL`
1261 instr size (OpReg ecx) (OpReg tmp)
1263 return (Fixed size tmp code)
1265 --------------------
1266 add_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
1267 add_code rep x (CmmLit (CmmInt y _))
1268 | is32BitInteger y = add_int rep x y
1269 add_code rep x y = trivialCode rep (ADD size) (Just (ADD size)) x y
1270 where size = intSize rep
1272 --------------------
1273 sub_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
1274 sub_code rep x (CmmLit (CmmInt y _))
1275 | is32BitInteger (-y) = add_int rep x (-y)
1276 sub_code rep x y = trivialCode rep (SUB (intSize rep)) Nothing x y
1278 -- our three-operand add instruction:
1279 add_int width x y = do
1280 (x_reg, x_code) <- getSomeReg x
1282 size = intSize width
1283 imm = ImmInt (fromInteger y)
1287 (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
1290 return (Any size code)
1292 ----------------------
1293 div_code width signed quotient x y = do
1294 (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
1295 x_code <- getAnyReg x
1297 size = intSize width
1298 widen | signed = CLTD size
1299 | otherwise = XOR size (OpReg edx) (OpReg edx)
1301 instr | signed = IDIV
1304 code = y_code `appOL`
1306 toOL [widen, instr size y_op]
1308 result | quotient = eax
1312 return (Fixed size result code)
1315 getRegister (CmmLoad mem pk)
1318 Amode src mem_code <- getAmode mem
1320 size = cmmTypeSize pk
1321 code dst = mem_code `snocOL`
1322 IF_ARCH_i386(GLD size src dst,
1323 MOV size (OpAddr src) (OpReg dst))
1324 return (Any size code)
1326 #if i386_TARGET_ARCH
1327 getRegister (CmmLoad mem pk)
1330 code <- intLoadCode instr mem
1331 return (Any size code)
1333 width = typeWidth pk
1334 size = intSize width
1335 instr = case width of
1338 -- We always zero-extend 8-bit loads, if we
1339 -- can't think of anything better. This is because
1340 -- we can't guarantee access to an 8-bit variant of every register
1341 -- (esi and edi don't have 8-bit variants), so to make things
1342 -- simpler we do our 8-bit arithmetic with full 32-bit registers.
1345 #if x86_64_TARGET_ARCH
1346 -- Simpler memory load code on x86_64
1347 getRegister (CmmLoad mem pk)
1349 code <- intLoadCode (MOV size) mem
1350 return (Any size code)
1351 where size = intSize $ typeWidth pk
1354 getRegister (CmmLit (CmmInt 0 width))
1356 size = intSize width
1358 -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
1359 adj_size = case size of II64 -> II32; _ -> size
1360 size1 = IF_ARCH_i386( size, adj_size )
1362 = unitOL (XOR size1 (OpReg dst) (OpReg dst))
1364 return (Any size code)
1366 #if x86_64_TARGET_ARCH
1367 -- optimisation for loading small literals on x86_64: take advantage
1368 -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
1369 -- instruction forms are shorter.
1370 getRegister (CmmLit lit)
1371 | isWord64 (cmmLitType lit), not (isBigLit lit)
1374 code dst = unitOL (MOV II32 (OpImm imm) (OpReg dst))
1376 return (Any II64 code)
1378 isBigLit (CmmInt i _) = i < 0 || i > 0xffffffff
1380 -- note1: not the same as (not.is32BitLit), because that checks for
1381 -- signed literals that fit in 32 bits, but we want unsigned
1383 -- note2: all labels are small, because we're assuming the
1384 -- small memory model (see gcc docs, -mcmodel=small).
1387 getRegister (CmmLit lit)
1389 size = cmmTypeSize (cmmLitType lit)
1391 code dst = unitOL (MOV size (OpImm imm) (OpReg dst))
1393 return (Any size code)
1395 getRegister other = pprPanic "getRegister(x86)" (ppr other)
1398 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
1399 -> NatM (Reg -> InstrBlock)
1400 intLoadCode instr mem = do
1401 Amode src mem_code <- getAmode mem
1402 return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
1404 -- Compute an expression into *any* register, adding the appropriate
1405 -- move instruction if necessary.
1406 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
1408 r <- getRegister expr
1411 anyReg :: Register -> NatM (Reg -> InstrBlock)
1412 anyReg (Any _ code) = return code
1413 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
1415 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
1416 -- Fixed registers might not be byte-addressable, so we make sure we've
1417 -- got a temporary, inserting an extra reg copy if necessary.
1418 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
1419 #if x86_64_TARGET_ARCH
1420 getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
1422 getByteReg expr = do
1423 r <- getRegister expr
1426 tmp <- getNewRegNat rep
1427 return (tmp, code tmp)
1429 | isVirtualReg reg -> return (reg,code)
1431 tmp <- getNewRegNat rep
1432 return (tmp, code `snocOL` reg2reg rep reg tmp)
1433 -- ToDo: could optimise slightly by checking for byte-addressable
1434 -- real registers, but that will happen very rarely if at all.
1437 -- Another variant: this time we want the result in a register that cannot
1438 -- be modified by code to evaluate an arbitrary expression.
1439 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1440 getNonClobberedReg expr = do
1441 r <- getRegister expr
1444 tmp <- getNewRegNat rep
1445 return (tmp, code tmp)
1447 -- only free regs can be clobbered
1448 | RealReg rr <- reg, isFastTrue (freeReg rr) -> do
1449 tmp <- getNewRegNat rep
1450 return (tmp, code `snocOL` reg2reg rep reg tmp)
1454 reg2reg :: Size -> Reg -> Reg -> Instr
1455 reg2reg size src dst
1456 #if i386_TARGET_ARCH
1457 | isFloatSize size = GMOV src dst
1459 | otherwise = MOV size (OpReg src) (OpReg dst)
1461 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1463 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1465 #if sparc_TARGET_ARCH
1467 -- getRegister :: CmmExpr -> NatM Register
1469 -- Load a literal float into a float register.
1470 -- The actual literal is stored in a new data area, and we load it
1472 getRegister (CmmLit (CmmFloat f W32)) = do
1474 -- a label for the new data area
1475 lbl <- getNewLabelNat
1476 tmp <- getNewRegNat II32
1478 let code dst = toOL [
1482 CmmStaticLit (CmmFloat f W32)],
1485 SETHI (HI (ImmCLbl lbl)) tmp,
1486 LD II32 (AddrRegImm tmp (LO (ImmCLbl lbl))) dst]
1488 return (Any FF32 code)
1490 getRegister (CmmLit (CmmFloat d W64)) = do
1491 lbl <- getNewLabelNat
1492 tmp <- getNewRegNat II32
1493 let code dst = toOL [
1496 CmmStaticLit (CmmFloat d W64)],
1497 SETHI (HI (ImmCLbl lbl)) tmp,
1498 LD II64 (AddrRegImm tmp (LO (ImmCLbl lbl))) dst]
1499 return (Any FF64 code)
1501 getRegister (CmmMachOp mop [x]) -- unary MachOps
1503 MO_F_Neg W32 -> trivialUFCode FF32 (FNEG FF32) x
1504 MO_F_Neg W64 -> trivialUFCode FF64 (FNEG FF64) x
1506 MO_S_Neg rep -> trivialUCode (intSize rep) (SUB False False g0) x
1507 MO_Not rep -> trivialUCode (intSize rep) (XNOR False g0) x
1509 MO_FF_Conv W64 W32-> coerceDbl2Flt x
1510 MO_FF_Conv W32 W64-> coerceFlt2Dbl x
1512 MO_FS_Conv from to -> coerceFP2Int from to x
1513 MO_SF_Conv from to -> coerceInt2FP from to x
1515 -- Conversions which are a nop on sparc
1517 | from == to -> conversionNop (intSize to) x
1518 MO_UU_Conv W32 W8 -> trivialCode W8 (AND False) x (CmmLit (CmmInt 255 W8))
1519 MO_UU_Conv W32 to -> conversionNop (intSize to) x
1520 MO_SS_Conv W32 to -> conversionNop (intSize to) x
1522 MO_UU_Conv W8 to@W32 -> conversionNop (intSize to) x
1523 MO_UU_Conv W16 to@W32 -> conversionNop (intSize to) x
1524 MO_UU_Conv W8 to@W16 -> conversionNop (intSize to) x
1527 MO_SS_Conv W8 W32 -> integerExtend W8 W32 x
1528 MO_SS_Conv W16 W32 -> integerExtend W16 W32 x
1529 MO_SS_Conv W8 W16 -> integerExtend W8 W16 x
1531 other_op -> panic ("Unknown unary mach op: " ++ show mop)
1534 -- | sign extend and widen
1536 :: Width -- ^ width of source expression
1537 -> Width -- ^ width of result
1538 -> CmmExpr -- ^ source expression
1541 integerExtend from to expr
1542 = do -- load the expr into some register
1543 (reg, e_code) <- getSomeReg expr
1544 tmp <- getNewRegNat II32
1546 = case (from, to) of
1553 -- local shift word left to load the sign bit
1554 `snocOL` SLL reg (RIImm (ImmInt bitCount)) tmp
1556 -- arithmetic shift right to sign extend
1557 `snocOL` SRA tmp (RIImm (ImmInt bitCount)) dst
1559 return (Any (intSize to) code)
1562 conversionNop new_rep expr
1563 = do e_code <- getRegister expr
1564 return (swizzleRegisterRep e_code new_rep)
1566 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1568 MO_Eq rep -> condIntReg EQQ x y
1569 MO_Ne rep -> condIntReg NE x y
1571 MO_S_Gt rep -> condIntReg GTT x y
1572 MO_S_Ge rep -> condIntReg GE x y
1573 MO_S_Lt rep -> condIntReg LTT x y
1574 MO_S_Le rep -> condIntReg LE x y
1576 MO_U_Gt W32 -> condIntReg GTT x y
1577 MO_U_Ge W32 -> condIntReg GE x y
1578 MO_U_Lt W32 -> condIntReg LTT x y
1579 MO_U_Le W32 -> condIntReg LE x y
1581 MO_U_Gt W16 -> condIntReg GU x y
1582 MO_U_Ge W16 -> condIntReg GEU x y
1583 MO_U_Lt W16 -> condIntReg LU x y
1584 MO_U_Le W16 -> condIntReg LEU x y
1586 MO_Add W32 -> trivialCode W32 (ADD False False) x y
1587 MO_Sub W32 -> trivialCode W32 (SUB False False) x y
1589 MO_S_MulMayOflo rep -> imulMayOflo rep x y
1591 MO_S_Quot W32 -> idiv True False x y
1592 MO_U_Quot W32 -> idiv False False x y
1594 MO_S_Rem W32 -> irem True x y
1595 MO_U_Rem W32 -> irem False x y
1597 MO_F_Eq w -> condFltReg EQQ x y
1598 MO_F_Ne w -> condFltReg NE x y
1600 MO_F_Gt w -> condFltReg GTT x y
1601 MO_F_Ge w -> condFltReg GE x y
1602 MO_F_Lt w -> condFltReg LTT x y
1603 MO_F_Le w -> condFltReg LE x y
1605 MO_F_Add w -> trivialFCode w FADD x y
1606 MO_F_Sub w -> trivialFCode w FSUB x y
1607 MO_F_Mul w -> trivialFCode w FMUL x y
1608 MO_F_Quot w -> trivialFCode w FDIV x y
1610 MO_And rep -> trivialCode rep (AND False) x y
1611 MO_Or rep -> trivialCode rep (OR False) x y
1612 MO_Xor rep -> trivialCode rep (XOR False) x y
1614 MO_Mul rep -> trivialCode rep (SMUL False) x y
1616 MO_Shl rep -> trivialCode rep SLL x y
1617 MO_U_Shr rep -> trivialCode rep SRL x y
1618 MO_S_Shr rep -> trivialCode rep SRA x y
1621 MO_F32_Pwr -> getRegister (StCall (Left (fsLit "pow")) CCallConv FF64
1622 [promote x, promote y])
1623 where promote x = CmmMachOp MO_F32_to_Dbl [x]
1624 MO_F64_Pwr -> getRegister (StCall (Left (fsLit "pow")) CCallConv FF64
1627 other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
1629 -- idiv fn x y = getRegister (StCall (Left fn) CCallConv II32 [x, y])
1632 -- | Generate an integer division instruction.
1633 idiv :: Bool -> Bool -> CmmExpr -> CmmExpr -> NatM Register
1635 -- For unsigned division with a 32 bit numerator,
1636 -- we can just clear the Y register.
1637 idiv False cc x y = do
1638 (a_reg, a_code) <- getSomeReg x
1639 (b_reg, b_code) <- getSomeReg y
1646 , UDIV cc a_reg (RIReg b_reg) dst]
1648 return (Any II32 code)
1651 -- For _signed_ division with a 32 bit numerator,
1652 -- we have to sign extend the numerator into the Y register.
1653 idiv True cc x y = do
1654 (a_reg, a_code) <- getSomeReg x
1655 (b_reg, b_code) <- getSomeReg y
1657 tmp <- getNewRegNat II32
1663 [ SRA a_reg (RIImm (ImmInt 16)) tmp -- sign extend
1664 , SRA tmp (RIImm (ImmInt 16)) tmp
1667 , SDIV cc a_reg (RIReg b_reg) dst]
1669 return (Any II32 code)
1672 -- | Do an integer remainder.
1674 -- NOTE: The SPARC v8 architecture manual says that integer division
1675 -- instructions _may_ generate a remainder, depending on the implementation.
1676 -- If so it is _recommended_ that the remainder is placed in the Y register.
1678 -- The UltraSparc 2007 manual says Y is _undefined_ after division.
1680 -- The SPARC T2 doesn't store the remainder, not sure about the others.
1681 -- It's probably best not to worry about it, and just generate our own
1684 irem :: Bool -> CmmExpr -> CmmExpr -> NatM Register
1686 -- For unsigned operands:
1687 -- Division is between a 64 bit numerator and a 32 bit denominator,
1688 -- so we still have to clear the Y register.
1690 (a_reg, a_code) <- getSomeReg x
1691 (b_reg, b_code) <- getSomeReg y
1693 tmp_reg <- getNewRegNat II32
1700 , UDIV False a_reg (RIReg b_reg) tmp_reg
1701 , UMUL False tmp_reg (RIReg b_reg) tmp_reg
1702 , SUB False False a_reg (RIReg tmp_reg) dst]
1704 return (Any II32 code)
1707 -- For signed operands:
1708 -- Make sure to sign extend into the Y register, or the remainder
1709 -- will have the wrong sign when the numerator is negative.
1711 -- TODO: When sign extending, GCC only shifts the a_reg right by 17 bits,
1712 -- not the full 32. Not sure why this is, something to do with overflow?
1713 -- If anyone cares enough about the speed of signed remainder they
1714 -- can work it out themselves (then tell me). -- BL 2009/01/20
1717 (a_reg, a_code) <- getSomeReg x
1718 (b_reg, b_code) <- getSomeReg y
1720 tmp1_reg <- getNewRegNat II32
1721 tmp2_reg <- getNewRegNat II32
1727 [ SRA a_reg (RIImm (ImmInt 16)) tmp1_reg -- sign extend
1728 , SRA tmp1_reg (RIImm (ImmInt 16)) tmp1_reg -- sign extend
1731 , SDIV False a_reg (RIReg b_reg) tmp2_reg
1732 , SMUL False tmp2_reg (RIReg b_reg) tmp2_reg
1733 , SUB False False a_reg (RIReg tmp2_reg) dst]
1735 return (Any II32 code)
1738 imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
1739 imulMayOflo rep a b = do
1740 (a_reg, a_code) <- getSomeReg a
1741 (b_reg, b_code) <- getSomeReg b
1742 res_lo <- getNewRegNat II32
1743 res_hi <- getNewRegNat II32
1745 shift_amt = case rep of
1748 _ -> panic "shift_amt"
1749 code dst = a_code `appOL` b_code `appOL`
1751 SMUL False a_reg (RIReg b_reg) res_lo,
1753 SRA res_lo (RIImm (ImmInt shift_amt)) res_lo,
1754 SUB False False res_lo (RIReg res_hi) dst
1756 return (Any II32 code)
1758 getRegister (CmmLoad mem pk) = do
1759 Amode src code <- getAmode mem
1761 code__2 dst = code `snocOL` LD (cmmTypeSize pk) src dst
1762 return (Any (cmmTypeSize pk) code__2)
1764 getRegister (CmmLit (CmmInt i _))
1767 src = ImmInt (fromInteger i)
1768 code dst = unitOL (OR False g0 (RIImm src) dst)
1770 return (Any II32 code)
1772 getRegister (CmmLit lit)
1773 = let rep = cmmLitType lit
1777 OR False dst (RIImm (LO imm)) dst]
1778 in return (Any II32 code)
1780 #endif /* sparc_TARGET_ARCH */
1782 #if powerpc_TARGET_ARCH
1783 getRegister (CmmLoad mem pk)
1786 Amode addr addr_code <- getAmode mem
1787 let code dst = ASSERT((regClass dst == RcDouble) == isFloatType pk)
1788 addr_code `snocOL` LD size dst addr
1789 return (Any size code)
1790 where size = cmmTypeSize pk
1792 -- catch simple cases of zero- or sign-extended load
1793 getRegister (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad mem _]) = do
1794 Amode addr addr_code <- getAmode mem
1795 return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr))
1797 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
1799 getRegister (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad mem _]) = do
1800 Amode addr addr_code <- getAmode mem
1801 return (Any II32 (\dst -> addr_code `snocOL` LD II16 dst addr))
1803 getRegister (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad mem _]) = do
1804 Amode addr addr_code <- getAmode mem
1805 return (Any II32 (\dst -> addr_code `snocOL` LA II16 dst addr))
1807 getRegister (CmmMachOp mop [x]) -- unary MachOps
1809 MO_Not rep -> triv_ucode_int rep NOT
1811 MO_F_Neg w -> triv_ucode_float w FNEG
1812 MO_S_Neg w -> triv_ucode_int w NEG
1814 MO_FF_Conv W64 W32 -> trivialUCode FF32 FRSP x
1815 MO_FF_Conv W32 W64 -> conversionNop FF64 x
1817 MO_FS_Conv from to -> coerceFP2Int from to x
1818 MO_SF_Conv from to -> coerceInt2FP from to x
1821 | from == to -> conversionNop (intSize to) x
1823 -- narrowing is a nop: we treat the high bits as undefined
1824 MO_SS_Conv W32 to -> conversionNop (intSize to) x
1825 MO_SS_Conv W16 W8 -> conversionNop II8 x
1826 MO_SS_Conv W8 to -> triv_ucode_int to (EXTS II8)
1827 MO_SS_Conv W16 to -> triv_ucode_int to (EXTS II16)
1830 | from == to -> conversionNop (intSize to) x
1831 -- narrowing is a nop: we treat the high bits as undefined
1832 MO_UU_Conv W32 to -> conversionNop (intSize to) x
1833 MO_UU_Conv W16 W8 -> conversionNop II8 x
1834 MO_UU_Conv W8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 W32))
1835 MO_UU_Conv W16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 W32))
1838 triv_ucode_int width instr = trivialUCode (intSize width) instr x
1839 triv_ucode_float width instr = trivialUCode (floatSize width) instr x
1841 conversionNop new_size expr
1842 = do e_code <- getRegister expr
1843 return (swizzleRegisterRep e_code new_size)
1845 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1847 MO_F_Eq w -> condFltReg EQQ x y
1848 MO_F_Ne w -> condFltReg NE x y
1849 MO_F_Gt w -> condFltReg GTT x y
1850 MO_F_Ge w -> condFltReg GE x y
1851 MO_F_Lt w -> condFltReg LTT x y
1852 MO_F_Le w -> condFltReg LE x y
1854 MO_Eq rep -> condIntReg EQQ (extendUExpr rep x) (extendUExpr rep y)
1855 MO_Ne rep -> condIntReg NE (extendUExpr rep x) (extendUExpr rep y)
1857 MO_S_Gt rep -> condIntReg GTT (extendSExpr rep x) (extendSExpr rep y)
1858 MO_S_Ge rep -> condIntReg GE (extendSExpr rep x) (extendSExpr rep y)
1859 MO_S_Lt rep -> condIntReg LTT (extendSExpr rep x) (extendSExpr rep y)
1860 MO_S_Le rep -> condIntReg LE (extendSExpr rep x) (extendSExpr rep y)
1862 MO_U_Gt rep -> condIntReg GU (extendUExpr rep x) (extendUExpr rep y)
1863 MO_U_Ge rep -> condIntReg GEU (extendUExpr rep x) (extendUExpr rep y)
1864 MO_U_Lt rep -> condIntReg LU (extendUExpr rep x) (extendUExpr rep y)
1865 MO_U_Le rep -> condIntReg LEU (extendUExpr rep x) (extendUExpr rep y)
1867 MO_F_Add w -> triv_float w FADD
1868 MO_F_Sub w -> triv_float w FSUB
1869 MO_F_Mul w -> triv_float w FMUL
1870 MO_F_Quot w -> triv_float w FDIV
1872 -- optimize addition with 32-bit immediate
1876 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate W32 True (-imm)
1877 -> trivialCode W32 True ADD x (CmmLit $ CmmInt imm immrep)
1880 (src, srcCode) <- getSomeReg x
1881 let imm = litToImm lit
1882 code dst = srcCode `appOL` toOL [
1883 ADDIS dst src (HA imm),
1884 ADD dst dst (RIImm (LO imm))
1886 return (Any II32 code)
1887 _ -> trivialCode W32 True ADD x y
1889 MO_Add rep -> trivialCode rep True ADD x y
1891 case y of -- subfi ('substract from' with immediate) doesn't exist
1892 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
1893 -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
1894 _ -> trivialCodeNoImm' (intSize rep) SUBF y x
1896 MO_Mul rep -> trivialCode rep True MULLW x y
1898 MO_S_MulMayOflo W32 -> trivialCodeNoImm' II32 MULLW_MayOflo x y
1900 MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= II32): not implemented"
1901 MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
1903 MO_S_Quot rep -> trivialCodeNoImm' (intSize rep) DIVW (extendSExpr rep x) (extendSExpr rep y)
1904 MO_U_Quot rep -> trivialCodeNoImm' (intSize rep) DIVWU (extendUExpr rep x) (extendUExpr rep y)
1906 MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1907 MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1909 MO_And rep -> trivialCode rep False AND x y
1910 MO_Or rep -> trivialCode rep False OR x y
1911 MO_Xor rep -> trivialCode rep False XOR x y
1913 MO_Shl rep -> trivialCode rep False SLW x y
1914 MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
1915 MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
1917 triv_float :: Width -> (Size -> Reg -> Reg -> Reg -> Instr) -> NatM Register
1918 triv_float width instr = trivialCodeNoImm (floatSize width) instr x y
1920 getRegister (CmmLit (CmmInt i rep))
1921 | Just imm <- makeImmediate rep True i
1923 code dst = unitOL (LI dst imm)
1925 return (Any (intSize rep) code)
1927 getRegister (CmmLit (CmmFloat f frep)) = do
1928 lbl <- getNewLabelNat
1929 dflags <- getDynFlagsNat
1930 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
1931 Amode addr addr_code <- getAmode dynRef
1932 let size = floatSize frep
1934 LDATA ReadOnlyData [CmmDataLabel lbl,
1935 CmmStaticLit (CmmFloat f frep)]
1936 `consOL` (addr_code `snocOL` LD size dst addr)
1937 return (Any size code)
1939 getRegister (CmmLit lit)
1940 = let rep = cmmLitType lit
1944 ADD dst dst (RIImm (LO imm))
1946 in return (Any (cmmTypeSize rep) code)
1948 getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
1950 -- extend?Rep: wrap integer expression of type rep
1951 -- in a conversion to II32
1952 extendSExpr W32 x = x
1953 extendSExpr rep x = CmmMachOp (MO_SS_Conv rep W32) [x]
1954 extendUExpr W32 x = x
1955 extendUExpr rep x = CmmMachOp (MO_UU_Conv rep W32) [x]
1957 #endif /* powerpc_TARGET_ARCH */
1960 -- -----------------------------------------------------------------------------
1961 -- The 'Amode' type: Memory addressing modes passed up the tree.
1963 data Amode = Amode AddrMode InstrBlock
1966 Now, given a tree (the argument to an CmmLoad) that references memory,
1967 produce a suitable addressing mode.
1969 A Rule of the Game (tm) for Amodes: use of the addr bit must
1970 immediately follow use of the code part, since the code part puts
1971 values in registers which the addr then refers to. So you can't put
1972 anything in between, lest it overwrite some of those registers. If
1973 you need to do some other computation between the code part and use of
1974 the addr bit, first store the effective address from the amode in a
1975 temporary, then do the other computation, and then use the temporary:
1979 ... other computation ...
1983 getAmode :: CmmExpr -> NatM Amode
1984 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
1986 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1988 #if alpha_TARGET_ARCH
1990 getAmode (StPrim IntSubOp [x, StInt i])
1991 = getNewRegNat PtrRep `thenNat` \ tmp ->
1992 getRegister x `thenNat` \ register ->
1994 code = registerCode register tmp
1995 reg = registerName register tmp
1996 off = ImmInt (-(fromInteger i))
1998 return (Amode (AddrRegImm reg off) code)
2000 getAmode (StPrim IntAddOp [x, StInt i])
2001 = getNewRegNat PtrRep `thenNat` \ tmp ->
2002 getRegister x `thenNat` \ register ->
2004 code = registerCode register tmp
2005 reg = registerName register tmp
2006 off = ImmInt (fromInteger i)
2008 return (Amode (AddrRegImm reg off) code)
2012 = return (Amode (AddrImm imm__2) id)
2015 imm__2 = case imm of Just x -> x
2018 = getNewRegNat PtrRep `thenNat` \ tmp ->
2019 getRegister other `thenNat` \ register ->
2021 code = registerCode register tmp
2022 reg = registerName register tmp
2024 return (Amode (AddrReg reg) code)
2026 #endif /* alpha_TARGET_ARCH */
2028 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2030 #if x86_64_TARGET_ARCH
2032 getAmode (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
2033 CmmLit displacement])
2034 = return $ Amode (ripRel (litToImm displacement)) nilOL
2038 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2040 -- This is all just ridiculous, since it carefully undoes
2041 -- what mangleIndexTree has just done.
2042 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
2044 -- ASSERT(rep == II32)???
2045 = do (x_reg, x_code) <- getSomeReg x
2046 let off = ImmInt (-(fromInteger i))
2047 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
2049 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
2051 -- ASSERT(rep == II32)???
2052 = do (x_reg, x_code) <- getSomeReg x
2053 let off = ImmInt (fromInteger i)
2054 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
2056 -- Turn (lit1 << n + lit2) into (lit2 + lit1 << n) so it will be
2057 -- recognised by the next rule.
2058 getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
2060 = getAmode (CmmMachOp (MO_Add rep) [b,a])
2062 getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
2063 [y, CmmLit (CmmInt shift _)]])
2064 | shift == 0 || shift == 1 || shift == 2 || shift == 3
2065 = x86_complex_amode x y shift 0
2067 getAmode (CmmMachOp (MO_Add rep)
2068 [x, CmmMachOp (MO_Add _)
2069 [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)],
2070 CmmLit (CmmInt offset _)]])
2071 | shift == 0 || shift == 1 || shift == 2 || shift == 3
2072 && is32BitInteger offset
2073 = x86_complex_amode x y shift offset
2075 getAmode (CmmMachOp (MO_Add rep) [x,y])
2076 = x86_complex_amode x y 0 0
2078 getAmode (CmmLit lit) | is32BitLit lit
2079 = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
2082 (reg,code) <- getSomeReg expr
2083 return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
2086 x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
2087 x86_complex_amode base index shift offset
2088 = do (x_reg, x_code) <- getNonClobberedReg base
2089 -- x must be in a temp, because it has to stay live over y_code
2090 -- we could compre x_reg and y_reg and do something better here...
2091 (y_reg, y_code) <- getSomeReg index
2093 code = x_code `appOL` y_code
2094 base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
2095 return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset)))
2098 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
2100 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2102 #if sparc_TARGET_ARCH
2104 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)])
2107 (reg, code) <- getSomeReg x
2109 off = ImmInt (-(fromInteger i))
2110 return (Amode (AddrRegImm reg off) code)
2113 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)])
2116 (reg, code) <- getSomeReg x
2118 off = ImmInt (fromInteger i)
2119 return (Amode (AddrRegImm reg off) code)
2121 getAmode (CmmMachOp (MO_Add rep) [x, y])
2123 (regX, codeX) <- getSomeReg x
2124 (regY, codeY) <- getSomeReg y
2126 code = codeX `appOL` codeY
2127 return (Amode (AddrRegReg regX regY) code)
2129 getAmode (CmmLit lit)
2131 let imm__2 = litToImm lit
2132 tmp1 <- getNewRegNat II32
2133 tmp2 <- getNewRegNat II32
2135 let code = toOL [ SETHI (HI imm__2) tmp1
2136 , OR False tmp1 (RIImm (LO imm__2)) tmp2]
2138 return (Amode (AddrRegReg tmp2 g0) code)
2142 (reg, code) <- getSomeReg other
2145 return (Amode (AddrRegImm reg off) code)
2147 #endif /* sparc_TARGET_ARCH */
2149 #ifdef powerpc_TARGET_ARCH
2150 getAmode (CmmMachOp (MO_Sub W32) [x, CmmLit (CmmInt i _)])
2151 | Just off <- makeImmediate W32 True (-i)
2153 (reg, code) <- getSomeReg x
2154 return (Amode (AddrRegImm reg off) code)
2157 getAmode (CmmMachOp (MO_Add W32) [x, CmmLit (CmmInt i _)])
2158 | Just off <- makeImmediate W32 True i
2160 (reg, code) <- getSomeReg x
2161 return (Amode (AddrRegImm reg off) code)
2163 -- optimize addition with 32-bit immediate
2165 getAmode (CmmMachOp (MO_Add W32) [x, CmmLit lit])
2167 tmp <- getNewRegNat II32
2168 (src, srcCode) <- getSomeReg x
2169 let imm = litToImm lit
2170 code = srcCode `snocOL` ADDIS tmp src (HA imm)
2171 return (Amode (AddrRegImm tmp (LO imm)) code)
2173 getAmode (CmmLit lit)
2175 tmp <- getNewRegNat II32
2176 let imm = litToImm lit
2177 code = unitOL (LIS tmp (HA imm))
2178 return (Amode (AddrRegImm tmp (LO imm)) code)
2180 getAmode (CmmMachOp (MO_Add W32) [x, y])
2182 (regX, codeX) <- getSomeReg x
2183 (regY, codeY) <- getSomeReg y
2184 return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
2188 (reg, code) <- getSomeReg other
2191 return (Amode (AddrRegImm reg off) code)
2192 #endif /* powerpc_TARGET_ARCH */
2194 -- -----------------------------------------------------------------------------
2195 -- getOperand: sometimes any operand will do.
2197 -- getNonClobberedOperand: the value of the operand will remain valid across
2198 -- the computation of an arbitrary expression, unless the expression
2199 -- is computed directly into a register which the operand refers to
2200 -- (see trivialCode where this function is used for an example).
2202 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2204 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2205 #if x86_64_TARGET_ARCH
2206 getNonClobberedOperand (CmmLit lit)
2207 | isSuitableFloatingPointLit lit = do
2208 lbl <- getNewLabelNat
2209 let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
2211 return (OpAddr (ripRel (ImmCLbl lbl)), code)
2213 getNonClobberedOperand (CmmLit lit)
2214 | is32BitLit lit && not (isFloatType (cmmLitType lit)) =
2215 return (OpImm (litToImm lit), nilOL)
2216 getNonClobberedOperand (CmmLoad mem pk)
2217 | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
2218 Amode src mem_code <- getAmode mem
2220 if (amodeCouldBeClobbered src)
2222 tmp <- getNewRegNat wordSize
2223 return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
2224 unitOL (LEA II32 (OpAddr src) (OpReg tmp)))
2227 return (OpAddr src', save_code `appOL` mem_code)
2228 getNonClobberedOperand e = do
2229 (reg, code) <- getNonClobberedReg e
2230 return (OpReg reg, code)
2232 amodeCouldBeClobbered :: AddrMode -> Bool
2233 amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
2235 regClobbered (RealReg rr) = isFastTrue (freeReg rr)
2236 regClobbered _ = False
2238 -- getOperand: the operand is not required to remain valid across the
2239 -- computation of an arbitrary expression.
2240 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2241 #if x86_64_TARGET_ARCH
2242 getOperand (CmmLit lit)
2243 | isSuitableFloatingPointLit lit = do
2244 lbl <- getNewLabelNat
2245 let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
2247 return (OpAddr (ripRel (ImmCLbl lbl)), code)
2249 getOperand (CmmLit lit)
2250 | is32BitLit lit && not (isFloatType (cmmLitType lit)) = do
2251 return (OpImm (litToImm lit), nilOL)
2252 getOperand (CmmLoad mem pk)
2253 | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
2254 Amode src mem_code <- getAmode mem
2255 return (OpAddr src, mem_code)
2257 (reg, code) <- getSomeReg e
2258 return (OpReg reg, code)
2260 isOperand :: CmmExpr -> Bool
2261 isOperand (CmmLoad _ _) = True
2262 isOperand (CmmLit lit) = is32BitLit lit
2263 || isSuitableFloatingPointLit lit
2266 -- if we want a floating-point literal as an operand, we can
2267 -- use it directly from memory. However, if the literal is
2268 -- zero, we're better off generating it into a register using
2270 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
2271 isSuitableFloatingPointLit _ = False
2273 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
2274 getRegOrMem (CmmLoad mem pk)
2275 | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
2276 Amode src mem_code <- getAmode mem
2277 return (OpAddr src, mem_code)
2279 (reg, code) <- getNonClobberedReg e
2280 return (OpReg reg, code)
2282 #if x86_64_TARGET_ARCH
2283 is32BitLit (CmmInt i W64) = is32BitInteger i
2284 -- assume that labels are in the range 0-2^31-1: this assumes the
2285 -- small memory model (see gcc docs, -mcmodel=small).
2290 is32BitInteger :: Integer -> Bool
2291 is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000
2292 where i64 = fromIntegral i :: Int64
2293 -- a CmmInt is intended to be truncated to the appropriate
2294 -- number of bits, so here we truncate it to Int64. This is
2295 -- important because e.g. -1 as a CmmInt might be either
2296 -- -1 or 18446744073709551615.
2298 -- -----------------------------------------------------------------------------
2299 -- The 'CondCode' type: Condition codes passed up the tree.
2301 data CondCode = CondCode Bool Cond InstrBlock
2303 -- Set up a condition code for a conditional branch.
2305 getCondCode :: CmmExpr -> NatM CondCode
2307 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2309 #if alpha_TARGET_ARCH
2310 getCondCode = panic "MachCode.getCondCode: not on Alphas"
2311 #endif /* alpha_TARGET_ARCH */
2313 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2315 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
2316 -- yes, they really do seem to want exactly the same!
2318 getCondCode (CmmMachOp mop [x, y])
2321 MO_F_Eq W32 -> condFltCode EQQ x y
2322 MO_F_Ne W32 -> condFltCode NE x y
2323 MO_F_Gt W32 -> condFltCode GTT x y
2324 MO_F_Ge W32 -> condFltCode GE x y
2325 MO_F_Lt W32 -> condFltCode LTT x y
2326 MO_F_Le W32 -> condFltCode LE x y
2328 MO_F_Eq W64 -> condFltCode EQQ x y
2329 MO_F_Ne W64 -> condFltCode NE x y
2330 MO_F_Gt W64 -> condFltCode GTT x y
2331 MO_F_Ge W64 -> condFltCode GE x y
2332 MO_F_Lt W64 -> condFltCode LTT x y
2333 MO_F_Le W64 -> condFltCode LE x y
2335 MO_Eq rep -> condIntCode EQQ x y
2336 MO_Ne rep -> condIntCode NE x y
2338 MO_S_Gt rep -> condIntCode GTT x y
2339 MO_S_Ge rep -> condIntCode GE x y
2340 MO_S_Lt rep -> condIntCode LTT x y
2341 MO_S_Le rep -> condIntCode LE x y
2343 MO_U_Gt rep -> condIntCode GU x y
2344 MO_U_Ge rep -> condIntCode GEU x y
2345 MO_U_Lt rep -> condIntCode LU x y
2346 MO_U_Le rep -> condIntCode LEU x y
2348 other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y]))
2350 getCondCode other = pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
2352 #elif powerpc_TARGET_ARCH
2354 -- almost the same as everywhere else - but we need to
2355 -- extend small integers to 32 bit first
2357 getCondCode (CmmMachOp mop [x, y])
2359 MO_F_Eq W32 -> condFltCode EQQ x y
2360 MO_F_Ne W32 -> condFltCode NE x y
2361 MO_F_Gt W32 -> condFltCode GTT x y
2362 MO_F_Ge W32 -> condFltCode GE x y
2363 MO_F_Lt W32 -> condFltCode LTT x y
2364 MO_F_Le W32 -> condFltCode LE x y
2366 MO_F_Eq W64 -> condFltCode EQQ x y
2367 MO_F_Ne W64 -> condFltCode NE x y
2368 MO_F_Gt W64 -> condFltCode GTT x y
2369 MO_F_Ge W64 -> condFltCode GE x y
2370 MO_F_Lt W64 -> condFltCode LTT x y
2371 MO_F_Le W64 -> condFltCode LE x y
2373 MO_Eq rep -> condIntCode EQQ (extendUExpr rep x) (extendUExpr rep y)
2374 MO_Ne rep -> condIntCode NE (extendUExpr rep x) (extendUExpr rep y)
2376 MO_S_Gt rep -> condIntCode GTT (extendSExpr rep x) (extendSExpr rep y)
2377 MO_S_Ge rep -> condIntCode GE (extendSExpr rep x) (extendSExpr rep y)
2378 MO_S_Lt rep -> condIntCode LTT (extendSExpr rep x) (extendSExpr rep y)
2379 MO_S_Le rep -> condIntCode LE (extendSExpr rep x) (extendSExpr rep y)
2381 MO_U_Gt rep -> condIntCode GU (extendUExpr rep x) (extendUExpr rep y)
2382 MO_U_Ge rep -> condIntCode GEU (extendUExpr rep x) (extendUExpr rep y)
2383 MO_U_Lt rep -> condIntCode LU (extendUExpr rep x) (extendUExpr rep y)
2384 MO_U_Le rep -> condIntCode LEU (extendUExpr rep x) (extendUExpr rep y)
2386 other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
2388 getCondCode other = panic "getCondCode(2)(powerpc)"
2394 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
2395 -- passed back up the tree.
2397 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
2399 #if alpha_TARGET_ARCH
2400 condIntCode = panic "MachCode.condIntCode: not on Alphas"
2401 condFltCode = panic "MachCode.condFltCode: not on Alphas"
2402 #endif /* alpha_TARGET_ARCH */
2404 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2405 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2407 -- memory vs immediate
2408 condIntCode cond (CmmLoad x pk) (CmmLit lit) | is32BitLit lit = do
2409 Amode x_addr x_code <- getAmode x
2412 code = x_code `snocOL`
2413 CMP (cmmTypeSize pk) (OpImm imm) (OpAddr x_addr)
2415 return (CondCode False cond code)
2417 -- anything vs zero, using a mask
2418 -- TODO: Add some sanity checking!!!!
2419 condIntCode cond (CmmMachOp (MO_And rep) [x,o2]) (CmmLit (CmmInt 0 pk))
2420 | (CmmLit lit@(CmmInt mask pk2)) <- o2, is32BitLit lit
2422 (x_reg, x_code) <- getSomeReg x
2424 code = x_code `snocOL`
2425 TEST (intSize pk) (OpImm (ImmInteger mask)) (OpReg x_reg)
2427 return (CondCode False cond code)
2430 condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
2431 (x_reg, x_code) <- getSomeReg x
2433 code = x_code `snocOL`
2434 TEST (intSize pk) (OpReg x_reg) (OpReg x_reg)
2436 return (CondCode False cond code)
2438 -- anything vs operand
2439 condIntCode cond x y | isOperand y = do
2440 (x_reg, x_code) <- getNonClobberedReg x
2441 (y_op, y_code) <- getOperand y
2443 code = x_code `appOL` y_code `snocOL`
2444 CMP (cmmTypeSize (cmmExprType x)) y_op (OpReg x_reg)
2446 return (CondCode False cond code)
2448 -- anything vs anything
2449 condIntCode cond x y = do
2450 (y_reg, y_code) <- getNonClobberedReg y
2451 (x_op, x_code) <- getRegOrMem x
2453 code = y_code `appOL`
2455 CMP (cmmTypeSize (cmmExprType x)) (OpReg y_reg) x_op
2457 return (CondCode False cond code)
2460 #if i386_TARGET_ARCH
2461 condFltCode cond x y
2462 = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
2463 (x_reg, x_code) <- getNonClobberedReg x
2464 (y_reg, y_code) <- getSomeReg y
2466 code = x_code `appOL` y_code `snocOL`
2467 GCMP cond x_reg y_reg
2468 -- The GCMP insn does the test and sets the zero flag if comparable
2469 -- and true. Hence we always supply EQQ as the condition to test.
2470 return (CondCode True EQQ code)
2471 #endif /* i386_TARGET_ARCH */
2473 #if x86_64_TARGET_ARCH
2474 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
2475 -- an operand, but the right must be a reg. We can probably do better
2476 -- than this general case...
2477 condFltCode cond x y = do
2478 (x_reg, x_code) <- getNonClobberedReg x
2479 (y_op, y_code) <- getOperand y
2481 code = x_code `appOL`
2483 CMP (floatSize $ cmmExprWidth x) y_op (OpReg x_reg)
2484 -- NB(1): we need to use the unsigned comparison operators on the
2485 -- result of this comparison.
2487 return (CondCode True (condToUnsigned cond) code)
2490 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2492 #if sparc_TARGET_ARCH
2494 condIntCode cond x (CmmLit (CmmInt y rep))
2497 (src1, code) <- getSomeReg x
2499 src2 = ImmInt (fromInteger y)
2500 code' = code `snocOL` SUB False True src1 (RIImm src2) g0
2501 return (CondCode False cond code')
2503 condIntCode cond x y = do
2504 (src1, code1) <- getSomeReg x
2505 (src2, code2) <- getSomeReg y
2507 code__2 = code1 `appOL` code2 `snocOL`
2508 SUB False True src1 (RIReg src2) g0
2509 return (CondCode False cond code__2)
2512 condFltCode cond x y = do
2513 (src1, code1) <- getSomeReg x
2514 (src2, code2) <- getSomeReg y
2515 tmp <- getNewRegNat FF64
2517 promote x = FxTOy FF32 FF64 x tmp
2523 if pk1 `cmmEqType` pk2 then
2524 code1 `appOL` code2 `snocOL`
2525 FCMP True (cmmTypeSize pk1) src1 src2
2526 else if typeWidth pk1 == W32 then
2527 code1 `snocOL` promote src1 `appOL` code2 `snocOL`
2528 FCMP True FF64 tmp src2
2530 code1 `appOL` code2 `snocOL` promote src2 `snocOL`
2531 FCMP True FF64 src1 tmp
2532 return (CondCode True cond code__2)
2534 #endif /* sparc_TARGET_ARCH */
2536 #if powerpc_TARGET_ARCH
2537 -- ###FIXME: I16 and I8!
2538 condIntCode cond x (CmmLit (CmmInt y rep))
2539 | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
2541 (src1, code) <- getSomeReg x
2543 code' = code `snocOL`
2544 (if condUnsigned cond then CMPL else CMP) II32 src1 (RIImm src2)
2545 return (CondCode False cond code')
2547 condIntCode cond x y = do
2548 (src1, code1) <- getSomeReg x
2549 (src2, code2) <- getSomeReg y
2551 code' = code1 `appOL` code2 `snocOL`
2552 (if condUnsigned cond then CMPL else CMP) II32 src1 (RIReg src2)
2553 return (CondCode False cond code')
2555 condFltCode cond x y = do
2556 (src1, code1) <- getSomeReg x
2557 (src2, code2) <- getSomeReg y
2559 code' = code1 `appOL` code2 `snocOL` FCMP src1 src2
2560 code'' = case cond of -- twiddle CR to handle unordered case
2561 GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
2562 LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
2565 ltbit = 0 ; eqbit = 2 ; gtbit = 1
2566 return (CondCode True cond code'')
2568 #endif /* powerpc_TARGET_ARCH */
2570 -- -----------------------------------------------------------------------------
2571 -- Generating assignments
2573 -- Assignments are really at the heart of the whole code generation
2574 -- business. Almost all top-level nodes of any real importance are
2575 -- assignments, which correspond to loads, stores, or register
2576 -- transfers. If we're really lucky, some of the register transfers
2577 -- will go away, because we can use the destination register to
2578 -- complete the code generation for the right hand side. This only
2579 -- fails when the right hand side is forced into a fixed register
2580 -- (e.g. the result of a call).
2582 assignMem_IntCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
2583 assignReg_IntCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock
2585 assignMem_FltCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
2586 assignReg_FltCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock
2588 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2590 #if alpha_TARGET_ARCH
2592 assignIntCode pk (CmmLoad dst _) src
2593 = getNewRegNat IntRep `thenNat` \ tmp ->
2594 getAmode dst `thenNat` \ amode ->
2595 getRegister src `thenNat` \ register ->
2597 code1 = amodeCode amode []
2598 dst__2 = amodeAddr amode
2599 code2 = registerCode register tmp []
2600 src__2 = registerName register tmp
2601 sz = primRepToSize pk
2602 code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2606 assignIntCode pk dst src
2607 = getRegister dst `thenNat` \ register1 ->
2608 getRegister src `thenNat` \ register2 ->
2610 dst__2 = registerName register1 zeroh
2611 code = registerCode register2 dst__2
2612 src__2 = registerName register2 dst__2
2613 code__2 = if isFixed register2
2614 then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2)
2619 #endif /* alpha_TARGET_ARCH */
2621 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2623 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2625 -- integer assignment to memory
2627 -- specific case of adding/subtracting an integer to a particular address.
2628 -- ToDo: catch other cases where we can use an operation directly on a memory
2630 assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _,
2631 CmmLit (CmmInt i _)])
2632 | addr == addr2, pk /= II64 || is32BitInteger i,
2633 Just instr <- check op
2634 = do Amode amode code_addr <- getAmode addr
2635 let code = code_addr `snocOL`
2636 instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode)
2639 check (MO_Add _) = Just ADD
2640 check (MO_Sub _) = Just SUB
2645 assignMem_IntCode pk addr src = do
2646 Amode addr code_addr <- getAmode addr
2647 (code_src, op_src) <- get_op_RI src
2649 code = code_src `appOL`
2651 MOV pk op_src (OpAddr addr)
2652 -- NOTE: op_src is stable, so it will still be valid
2653 -- after code_addr. This may involve the introduction
2654 -- of an extra MOV to a temporary register, but we hope
2655 -- the register allocator will get rid of it.
2659 get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand) -- code, operator
2660 get_op_RI (CmmLit lit) | is32BitLit lit
2661 = return (nilOL, OpImm (litToImm lit))
2663 = do (reg,code) <- getNonClobberedReg op
2664 return (code, OpReg reg)
2667 -- Assign; dst is a reg, rhs is mem
2668 assignReg_IntCode pk reg (CmmLoad src _) = do
2669 load_code <- intLoadCode (MOV pk) src
2670 return (load_code (getRegisterReg reg))
2672 -- dst is a reg, but src could be anything
2673 assignReg_IntCode pk reg src = do
2674 code <- getAnyReg src
2675 return (code (getRegisterReg reg))
2677 #endif /* i386_TARGET_ARCH */
2679 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2681 #if sparc_TARGET_ARCH
2683 assignMem_IntCode pk addr src = do
2684 (srcReg, code) <- getSomeReg src
2685 Amode dstAddr addr_code <- getAmode addr
2686 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2688 assignReg_IntCode pk reg src = do
2689 r <- getRegister src
2691 Any _ code -> code dst
2692 Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg freg) dst
2694 dst = getRegisterReg reg
2697 #endif /* sparc_TARGET_ARCH */
2699 #if powerpc_TARGET_ARCH
2701 assignMem_IntCode pk addr src = do
2702 (srcReg, code) <- getSomeReg src
2703 Amode dstAddr addr_code <- getAmode addr
2704 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2706 -- dst is a reg, but src could be anything
2707 assignReg_IntCode pk reg src
2709 r <- getRegister src
2711 Any _ code -> code dst
2712 Fixed _ freg fcode -> fcode `snocOL` MR dst freg
2714 dst = getRegisterReg reg
2716 #endif /* powerpc_TARGET_ARCH */
2719 -- -----------------------------------------------------------------------------
2720 -- Floating-point assignments
2722 #if alpha_TARGET_ARCH
2724 assignFltCode pk (CmmLoad dst _) src
2725 = getNewRegNat pk `thenNat` \ tmp ->
2726 getAmode dst `thenNat` \ amode ->
2727 getRegister src `thenNat` \ register ->
2729 code1 = amodeCode amode []
2730 dst__2 = amodeAddr amode
2731 code2 = registerCode register tmp []
2732 src__2 = registerName register tmp
2733 sz = primRepToSize pk
2734 code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2738 assignFltCode pk dst src
2739 = getRegister dst `thenNat` \ register1 ->
2740 getRegister src `thenNat` \ register2 ->
2742 dst__2 = registerName register1 zeroh
2743 code = registerCode register2 dst__2
2744 src__2 = registerName register2 dst__2
2745 code__2 = if isFixed register2
2746 then code . mkSeqInstr (FMOV src__2 dst__2)
2751 #endif /* alpha_TARGET_ARCH */
2753 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2755 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2757 -- Floating point assignment to memory
2758 assignMem_FltCode pk addr src = do
2759 (src_reg, src_code) <- getNonClobberedReg src
2760 Amode addr addr_code <- getAmode addr
2762 code = src_code `appOL`
2764 IF_ARCH_i386(GST pk src_reg addr,
2765 MOV pk (OpReg src_reg) (OpAddr addr))
2768 -- Floating point assignment to a register/temporary
2769 assignReg_FltCode pk reg src = do
2770 src_code <- getAnyReg src
2771 return (src_code (getRegisterReg reg))
2773 #endif /* i386_TARGET_ARCH */
2775 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2777 #if sparc_TARGET_ARCH
2779 -- Floating point assignment to memory
2780 assignMem_FltCode pk addr src = do
2781 Amode dst__2 code1 <- getAmode addr
2782 (src__2, code2) <- getSomeReg src
2783 tmp1 <- getNewRegNat pk
2785 pk__2 = cmmExprType src
2786 code__2 = code1 `appOL` code2 `appOL`
2787 if sizeToWidth pk == typeWidth pk__2
2788 then unitOL (ST pk src__2 dst__2)
2789 else toOL [ FxTOy (cmmTypeSize pk__2) pk src__2 tmp1
2790 , ST pk tmp1 dst__2]
2793 -- Floating point assignment to a register/temporary
2794 assignReg_FltCode pk dstCmmReg srcCmmExpr = do
2795 srcRegister <- getRegister srcCmmExpr
2796 let dstReg = getRegisterReg dstCmmReg
2798 return $ case srcRegister of
2799 Any _ code -> code dstReg
2800 Fixed _ srcFixedReg srcCode -> srcCode `snocOL` FMOV pk srcFixedReg dstReg
2802 #endif /* sparc_TARGET_ARCH */
2804 #if powerpc_TARGET_ARCH
2807 assignMem_FltCode = assignMem_IntCode
2808 assignReg_FltCode = assignReg_IntCode
2810 #endif /* powerpc_TARGET_ARCH */
2813 -- -----------------------------------------------------------------------------
2814 -- Generating an non-local jump
2816 -- (If applicable) Do not fill the delay slots here; you will confuse the
2817 -- register allocator.
2819 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
2821 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2823 #if alpha_TARGET_ARCH
2825 genJump (CmmLabel lbl)
2826 | isAsmTemp lbl = returnInstr (BR target)
2827 | otherwise = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0]
2829 target = ImmCLbl lbl
2832 = getRegister tree `thenNat` \ register ->
2833 getNewRegNat PtrRep `thenNat` \ tmp ->
2835 dst = registerName register pv
2836 code = registerCode register pv
2837 target = registerName register pv
2839 if isFixed register then
2840 returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0]
2842 return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0))
2844 #endif /* alpha_TARGET_ARCH */
2846 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2848 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2850 genJump (CmmLoad mem pk) = do
2851 Amode target code <- getAmode mem
2852 return (code `snocOL` JMP (OpAddr target))
2854 genJump (CmmLit lit) = do
2855 return (unitOL (JMP (OpImm (litToImm lit))))
2858 (reg,code) <- getSomeReg expr
2859 return (code `snocOL` JMP (OpReg reg))
2861 #endif /* i386_TARGET_ARCH */
2863 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2865 #if sparc_TARGET_ARCH
2867 genJump (CmmLit (CmmLabel lbl))
2868 = return (toOL [CALL (Left target) 0 True, NOP])
2870 target = ImmCLbl lbl
2874 (target, code) <- getSomeReg tree
2875 return (code `snocOL` JMP (AddrRegReg target g0) `snocOL` NOP)
2877 #endif /* sparc_TARGET_ARCH */
2879 #if powerpc_TARGET_ARCH
2880 genJump (CmmLit (CmmLabel lbl))
2881 = return (unitOL $ JMP lbl)
2885 (target,code) <- getSomeReg tree
2886 return (code `snocOL` MTCTR target `snocOL` BCTR [])
2887 #endif /* powerpc_TARGET_ARCH */
2890 -- -----------------------------------------------------------------------------
2891 -- Unconditional branches
2893 genBranch :: BlockId -> NatM InstrBlock
2895 genBranch = return . toOL . mkBranchInstr
2897 -- -----------------------------------------------------------------------------
2898 -- Conditional jumps
2901 Conditional jumps are always to local labels, so we can use branch
2902 instructions. We peek at the arguments to decide what kind of
2905 ALPHA: For comparisons with 0, we're laughing, because we can just do
2906 the desired conditional branch.
2908 I386: First, we have to ensure that the condition
2909 codes are set according to the supplied comparison operation.
2911 SPARC: First, we have to ensure that the condition codes are set
2912 according to the supplied comparison operation. We generate slightly
2913 different code for floating point comparisons, because a floating
2914 point operation cannot directly precede a @BF@. We assume the worst
2915 and fill that slot with a @NOP@.
2917 SPARC: Do not fill the delay slots here; you will confuse the register
2923 :: BlockId -- the branch target
2924 -> CmmExpr -- the condition on which to branch
2927 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2929 #if alpha_TARGET_ARCH
2931 genCondJump id (StPrim op [x, StInt 0])
2932 = getRegister x `thenNat` \ register ->
2933 getNewRegNat (registerRep register)
2936 code = registerCode register tmp
2937 value = registerName register tmp
2938 pk = registerRep register
2939 target = ImmCLbl lbl
2941 returnSeq code [BI (cmpOp op) value target]
2943 cmpOp CharGtOp = GTT
2945 cmpOp CharEqOp = EQQ
2947 cmpOp CharLtOp = LTT
2956 cmpOp WordGeOp = ALWAYS
2957 cmpOp WordEqOp = EQQ
2959 cmpOp WordLtOp = NEVER
2960 cmpOp WordLeOp = EQQ
2962 cmpOp AddrGeOp = ALWAYS
2963 cmpOp AddrEqOp = EQQ
2965 cmpOp AddrLtOp = NEVER
2966 cmpOp AddrLeOp = EQQ
2968 genCondJump lbl (StPrim op [x, StDouble 0.0])
2969 = getRegister x `thenNat` \ register ->
2970 getNewRegNat (registerRep register)
2973 code = registerCode register tmp
2974 value = registerName register tmp
2975 pk = registerRep register
2976 target = ImmCLbl lbl
2978 return (code . mkSeqInstr (BF (cmpOp op) value target))
2980 cmpOp FloatGtOp = GTT
2981 cmpOp FloatGeOp = GE
2982 cmpOp FloatEqOp = EQQ
2983 cmpOp FloatNeOp = NE
2984 cmpOp FloatLtOp = LTT
2985 cmpOp FloatLeOp = LE
2986 cmpOp DoubleGtOp = GTT
2987 cmpOp DoubleGeOp = GE
2988 cmpOp DoubleEqOp = EQQ
2989 cmpOp DoubleNeOp = NE
2990 cmpOp DoubleLtOp = LTT
2991 cmpOp DoubleLeOp = LE
2993 genCondJump lbl (StPrim op [x, y])
2995 = trivialFCode pr instr x y `thenNat` \ register ->
2996 getNewRegNat FF64 `thenNat` \ tmp ->
2998 code = registerCode register tmp
2999 result = registerName register tmp
3000 target = ImmCLbl lbl
3002 return (code . mkSeqInstr (BF cond result target))
3004 pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
3006 fltCmpOp op = case op of
3020 (instr, cond) = case op of
3021 FloatGtOp -> (FCMP TF LE, EQQ)
3022 FloatGeOp -> (FCMP TF LTT, EQQ)
3023 FloatEqOp -> (FCMP TF EQQ, NE)
3024 FloatNeOp -> (FCMP TF EQQ, EQQ)
3025 FloatLtOp -> (FCMP TF LTT, NE)
3026 FloatLeOp -> (FCMP TF LE, NE)
3027 DoubleGtOp -> (FCMP TF LE, EQQ)
3028 DoubleGeOp -> (FCMP TF LTT, EQQ)
3029 DoubleEqOp -> (FCMP TF EQQ, NE)
3030 DoubleNeOp -> (FCMP TF EQQ, EQQ)
3031 DoubleLtOp -> (FCMP TF LTT, NE)
3032 DoubleLeOp -> (FCMP TF LE, NE)
3034 genCondJump lbl (StPrim op [x, y])
3035 = trivialCode instr x y `thenNat` \ register ->
3036 getNewRegNat IntRep `thenNat` \ tmp ->
3038 code = registerCode register tmp
3039 result = registerName register tmp
3040 target = ImmCLbl lbl
3042 return (code . mkSeqInstr (BI cond result target))
3044 (instr, cond) = case op of
3045 CharGtOp -> (CMP LE, EQQ)
3046 CharGeOp -> (CMP LTT, EQQ)
3047 CharEqOp -> (CMP EQQ, NE)
3048 CharNeOp -> (CMP EQQ, EQQ)
3049 CharLtOp -> (CMP LTT, NE)
3050 CharLeOp -> (CMP LE, NE)
3051 IntGtOp -> (CMP LE, EQQ)
3052 IntGeOp -> (CMP LTT, EQQ)
3053 IntEqOp -> (CMP EQQ, NE)
3054 IntNeOp -> (CMP EQQ, EQQ)
3055 IntLtOp -> (CMP LTT, NE)
3056 IntLeOp -> (CMP LE, NE)
3057 WordGtOp -> (CMP ULE, EQQ)
3058 WordGeOp -> (CMP ULT, EQQ)
3059 WordEqOp -> (CMP EQQ, NE)
3060 WordNeOp -> (CMP EQQ, EQQ)
3061 WordLtOp -> (CMP ULT, NE)
3062 WordLeOp -> (CMP ULE, NE)
3063 AddrGtOp -> (CMP ULE, EQQ)
3064 AddrGeOp -> (CMP ULT, EQQ)
3065 AddrEqOp -> (CMP EQQ, NE)
3066 AddrNeOp -> (CMP EQQ, EQQ)
3067 AddrLtOp -> (CMP ULT, NE)
3068 AddrLeOp -> (CMP ULE, NE)
3070 #endif /* alpha_TARGET_ARCH */
3072 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3074 #if i386_TARGET_ARCH
3076 genCondJump id bool = do
3077 CondCode _ cond code <- getCondCode bool
3078 return (code `snocOL` JXX cond id)
3082 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3084 #if x86_64_TARGET_ARCH
3086 genCondJump id bool = do
3087 CondCode is_float cond cond_code <- getCondCode bool
3090 return (cond_code `snocOL` JXX cond id)
3092 lbl <- getBlockIdNat
3094 -- see comment with condFltReg
3095 let code = case cond of
3101 plain_test = unitOL (
3104 or_unordered = toOL [
3108 and_ordered = toOL [
3114 return (cond_code `appOL` code)
3118 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3120 #if sparc_TARGET_ARCH
3122 genCondJump bid bool = do
3123 CondCode is_float cond code <- getCondCode bool
3128 then [NOP, BF cond False bid, NOP]
3129 else [BI cond False bid, NOP]
3133 #endif /* sparc_TARGET_ARCH */
3136 #if powerpc_TARGET_ARCH
3138 genCondJump id bool = do
3139 CondCode is_float cond code <- getCondCode bool
3140 return (code `snocOL` BCC cond id)
3142 #endif /* powerpc_TARGET_ARCH */
3145 -- -----------------------------------------------------------------------------
3146 -- Generating C calls
3148 -- Now the biggest nightmare---calls. Most of the nastiness is buried in
3149 -- @get_arg@, which moves the arguments to the correct registers/stack
3150 -- locations. Apart from that, the code is easy.
3152 -- (If applicable) Do not fill the delay slots here; you will confuse the
3153 -- register allocator.
3156 :: CmmCallTarget -- function to call
3157 -> HintedCmmFormals -- where to put the result
3158 -> HintedCmmActuals -- arguments (of mixed type)
3161 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3163 #if alpha_TARGET_ARCH
3167 genCCall fn cconv result_regs args
3168 = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args
3169 `thenNat` \ ((unused,_), argCode) ->
3171 nRegs = length allArgRegs - length unused
3172 code = asmSeqThen (map ($ []) argCode)
3175 LDA pv (AddrImm (ImmLab (ptext fn))),
3176 JSR ra (AddrReg pv) nRegs,
3177 LDGP gp (AddrReg ra)]
3179 ------------------------
3180 {- Try to get a value into a specific register (or registers) for
3181 a call. The first 6 arguments go into the appropriate
3182 argument register (separate registers for integer and floating
3183 point arguments, but used in lock-step), and the remaining
3184 arguments are dumped to the stack, beginning at 0(sp). Our
3185 first argument is a pair of the list of remaining argument
3186 registers to be assigned for this call and the next stack
3187 offset to use for overflowing arguments. This way,
3188 @get_Arg@ can be applied to all of a call's arguments using
3192 :: ([(Reg,Reg)], Int) -- Argument registers and stack offset (accumulator)
3193 -> StixTree -- Current argument
3194 -> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code
3196 -- We have to use up all of our argument registers first...
3198 get_arg ((iDst,fDst):dsts, offset) arg
3199 = getRegister arg `thenNat` \ register ->
3201 reg = if isFloatType pk then fDst else iDst
3202 code = registerCode register reg
3203 src = registerName register reg
3204 pk = registerRep register
3207 if isFloatType pk then
3208 ((dsts, offset), if isFixed register then
3209 code . mkSeqInstr (FMOV src fDst)
3212 ((dsts, offset), if isFixed register then
3213 code . mkSeqInstr (OR src (RIReg src) iDst)
3216 -- Once we have run out of argument registers, we move to the
3219 get_arg ([], offset) arg
3220 = getRegister arg `thenNat` \ register ->
3221 getNewRegNat (registerRep register)
3224 code = registerCode register tmp
3225 src = registerName register tmp
3226 pk = registerRep register
3227 sz = primRepToSize pk
3229 return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset)))
3231 #endif /* alpha_TARGET_ARCH */
3233 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3235 #if i386_TARGET_ARCH
3237 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
3238 -- write barrier compiles to no code on x86/x86-64;
3239 -- we keep it this long in order to prevent earlier optimisations.
3241 -- we only cope with a single result for foreign calls
3242 genCCall (CmmPrim op) [CmmHinted r _] args = do
3243 l1 <- getNewLabelNat
3244 l2 <- getNewLabelNat
3246 MO_F32_Sqrt -> actuallyInlineFloatOp GSQRT FF32 args
3247 MO_F64_Sqrt -> actuallyInlineFloatOp GSQRT FF64 args
3249 MO_F32_Sin -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF32 args
3250 MO_F64_Sin -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF64 args
3252 MO_F32_Cos -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF32 args
3253 MO_F64_Cos -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF64 args
3255 MO_F32_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF32 args
3256 MO_F64_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF64 args
3258 other_op -> outOfLineFloatOp op r args
3260 actuallyInlineFloatOp instr size [CmmHinted x _]
3261 = do res <- trivialUFCode size (instr size) x
3263 return (any (getRegisterReg (CmmLocal r)))
3265 genCCall target dest_regs args = do
3267 sizes = map (arg_size . cmmExprType . hintlessCmm) (reverse args)
3268 #if !darwin_TARGET_OS
3269 tot_arg_size = sum sizes
3271 raw_arg_size = sum sizes
3272 tot_arg_size = roundTo 16 raw_arg_size
3273 arg_pad_size = tot_arg_size - raw_arg_size
3274 delta0 <- getDeltaNat
3275 setDeltaNat (delta0 - arg_pad_size)
3278 push_codes <- mapM push_arg (reverse args)
3279 delta <- getDeltaNat
3282 -- deal with static vs dynamic call targets
3283 (callinsns,cconv) <-
3286 CmmCallee (CmmLit (CmmLabel lbl)) conv
3287 -> -- ToDo: stdcall arg sizes
3288 return (unitOL (CALL (Left fn_imm) []), conv)
3289 where fn_imm = ImmCLbl lbl
3291 -> do { (dyn_c, dyn_r) <- get_op expr
3292 ; ASSERT( isWord32 (cmmExprType expr) )
3293 return (dyn_c `snocOL` CALL (Right dyn_r) [], conv) }
3296 #if darwin_TARGET_OS
3298 = toOL [SUB II32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
3299 DELTA (delta0 - arg_pad_size)]
3300 `appOL` concatOL push_codes
3303 = concatOL push_codes
3304 call = callinsns `appOL`
3306 -- Deallocate parameters after call for ccall;
3307 -- but not for stdcall (callee does it)
3308 (if cconv == StdCallConv || tot_arg_size==0 then [] else
3309 [ADD II32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
3311 [DELTA (delta + tot_arg_size)]
3314 setDeltaNat (delta + tot_arg_size)
3317 -- assign the results, if necessary
3318 assign_code [] = nilOL
3319 assign_code [CmmHinted dest _hint]
3320 | isFloatType ty = unitOL (GMOV fake0 r_dest)
3321 | isWord64 ty = toOL [MOV II32 (OpReg eax) (OpReg r_dest),
3322 MOV II32 (OpReg edx) (OpReg r_dest_hi)]
3323 | otherwise = unitOL (MOV (intSize w) (OpReg eax) (OpReg r_dest))
3325 ty = localRegType dest
3327 r_dest_hi = getHiVRegFromLo r_dest
3328 r_dest = getRegisterReg (CmmLocal dest)
3329 assign_code many = panic "genCCall.assign_code many"
3331 return (push_code `appOL`
3333 assign_code dest_regs)
3336 arg_size :: CmmType -> Int -- Width in bytes
3337 arg_size ty = widthInBytes (typeWidth ty)
3339 roundTo a x | x `mod` a == 0 = x
3340 | otherwise = x + a - (x `mod` a)
3343 push_arg :: HintedCmmActual {-current argument-}
3344 -> NatM InstrBlock -- code
3346 push_arg (CmmHinted arg _hint) -- we don't need the hints on x86
3347 | isWord64 arg_ty = do
3348 ChildCode64 code r_lo <- iselExpr64 arg
3349 delta <- getDeltaNat
3350 setDeltaNat (delta - 8)
3352 r_hi = getHiVRegFromLo r_lo
3354 return ( code `appOL`
3355 toOL [PUSH II32 (OpReg r_hi), DELTA (delta - 4),
3356 PUSH II32 (OpReg r_lo), DELTA (delta - 8),
3361 (code, reg) <- get_op arg
3362 delta <- getDeltaNat
3363 let size = arg_size arg_ty -- Byte size
3364 setDeltaNat (delta-size)
3365 if (isFloatType arg_ty)
3366 then return (code `appOL`
3367 toOL [SUB II32 (OpImm (ImmInt size)) (OpReg esp),
3369 GST (floatSize (typeWidth arg_ty))
3370 reg (AddrBaseIndex (EABaseReg esp)
3374 else return (code `snocOL`
3375 PUSH II32 (OpReg reg) `snocOL`
3379 arg_ty = cmmExprType arg
3382 get_op :: CmmExpr -> NatM (InstrBlock, Reg) -- code, reg
3384 (reg,code) <- getSomeReg op
3387 #endif /* i386_TARGET_ARCH */
3389 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3391 outOfLineFloatOp :: CallishMachOp -> CmmFormal -> HintedCmmActuals
3393 outOfLineFloatOp mop res args
3395 dflags <- getDynFlagsNat
3396 targetExpr <- cmmMakeDynamicReference dflags addImportNat CallReference lbl
3397 let target = CmmCallee targetExpr CCallConv
3399 if isFloat64 (localRegType res)
3401 stmtToInstrs (CmmCall target [CmmHinted res NoHint] args CmmUnsafe CmmMayReturn)
3405 tmp = LocalReg uq f64
3407 code1 <- stmtToInstrs (CmmCall target [CmmHinted tmp NoHint] args CmmUnsafe CmmMayReturn)
3408 code2 <- stmtToInstrs (CmmAssign (CmmLocal res) (CmmReg (CmmLocal tmp)))
3409 return (code1 `appOL` code2)
3411 lbl = mkForeignLabel fn Nothing False
3414 MO_F32_Sqrt -> fsLit "sqrtf"
3415 MO_F32_Sin -> fsLit "sinf"
3416 MO_F32_Cos -> fsLit "cosf"
3417 MO_F32_Tan -> fsLit "tanf"
3418 MO_F32_Exp -> fsLit "expf"
3419 MO_F32_Log -> fsLit "logf"
3421 MO_F32_Asin -> fsLit "asinf"
3422 MO_F32_Acos -> fsLit "acosf"
3423 MO_F32_Atan -> fsLit "atanf"
3425 MO_F32_Sinh -> fsLit "sinhf"
3426 MO_F32_Cosh -> fsLit "coshf"
3427 MO_F32_Tanh -> fsLit "tanhf"
3428 MO_F32_Pwr -> fsLit "powf"
3430 MO_F64_Sqrt -> fsLit "sqrt"
3431 MO_F64_Sin -> fsLit "sin"
3432 MO_F64_Cos -> fsLit "cos"
3433 MO_F64_Tan -> fsLit "tan"
3434 MO_F64_Exp -> fsLit "exp"
3435 MO_F64_Log -> fsLit "log"
3437 MO_F64_Asin -> fsLit "asin"
3438 MO_F64_Acos -> fsLit "acos"
3439 MO_F64_Atan -> fsLit "atan"
3441 MO_F64_Sinh -> fsLit "sinh"
3442 MO_F64_Cosh -> fsLit "cosh"
3443 MO_F64_Tanh -> fsLit "tanh"
3444 MO_F64_Pwr -> fsLit "pow"
3446 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
3448 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3450 #if x86_64_TARGET_ARCH
3452 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
3453 -- write barrier compiles to no code on x86/x86-64;
3454 -- we keep it this long in order to prevent earlier optimisations.
3457 genCCall (CmmPrim op) [CmmHinted r _] args =
3458 outOfLineFloatOp op r args
3460 genCCall target dest_regs args = do
3462 -- load up the register arguments
3463 (stack_args, aregs, fregs, load_args_code)
3464 <- load_args args allArgRegs allFPArgRegs nilOL
3467 fp_regs_used = reverse (drop (length fregs) (reverse allFPArgRegs))
3468 int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
3469 arg_regs = [eax] ++ int_regs_used ++ fp_regs_used
3470 -- for annotating the call instruction with
3472 sse_regs = length fp_regs_used
3474 tot_arg_size = arg_size * length stack_args
3476 -- On entry to the called function, %rsp should be aligned
3477 -- on a 16-byte boundary +8 (i.e. the first stack arg after
3478 -- the return address is 16-byte aligned). In STG land
3479 -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
3480 -- need to make sure we push a multiple of 16-bytes of args,
3481 -- plus the return address, to get the correct alignment.
3482 -- Urg, this is hard. We need to feed the delta back into
3483 -- the arg pushing code.
3484 (real_size, adjust_rsp) <-
3485 if tot_arg_size `rem` 16 == 0
3486 then return (tot_arg_size, nilOL)
3487 else do -- we need to adjust...
3488 delta <- getDeltaNat
3489 setDeltaNat (delta-8)
3490 return (tot_arg_size+8, toOL [
3491 SUB II64 (OpImm (ImmInt 8)) (OpReg rsp),
3495 -- push the stack args, right to left
3496 push_code <- push_args (reverse stack_args) nilOL
3497 delta <- getDeltaNat
3499 -- deal with static vs dynamic call targets
3500 (callinsns,cconv) <-
3503 CmmCallee (CmmLit (CmmLabel lbl)) conv
3504 -> -- ToDo: stdcall arg sizes
3505 return (unitOL (CALL (Left fn_imm) arg_regs), conv)
3506 where fn_imm = ImmCLbl lbl
3508 -> do (dyn_r, dyn_c) <- getSomeReg expr
3509 return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
3512 -- The x86_64 ABI requires us to set %al to the number of SSE
3513 -- registers that contain arguments, if the called routine
3514 -- is a varargs function. We don't know whether it's a
3515 -- varargs function or not, so we have to assume it is.
3517 -- It's not safe to omit this assignment, even if the number
3518 -- of SSE regs in use is zero. If %al is larger than 8
3519 -- on entry to a varargs function, seg faults ensue.
3520 assign_eax n = unitOL (MOV II32 (OpImm (ImmInt n)) (OpReg eax))
3522 let call = callinsns `appOL`
3524 -- Deallocate parameters after call for ccall;
3525 -- but not for stdcall (callee does it)
3526 (if cconv == StdCallConv || real_size==0 then [] else
3527 [ADD (intSize wordWidth) (OpImm (ImmInt real_size)) (OpReg esp)])
3529 [DELTA (delta + real_size)]
3532 setDeltaNat (delta + real_size)
3535 -- assign the results, if necessary
3536 assign_code [] = nilOL
3537 assign_code [CmmHinted dest _hint] =
3538 case typeWidth rep of
3539 W32 | isFloatType rep -> unitOL (MOV (floatSize W32) (OpReg xmm0) (OpReg r_dest))
3540 W64 | isFloatType rep -> unitOL (MOV (floatSize W32) (OpReg xmm0) (OpReg r_dest))
3541 _ -> unitOL (MOV (cmmTypeSize rep) (OpReg rax) (OpReg r_dest))
3543 rep = localRegType dest
3544 r_dest = getRegisterReg (CmmLocal dest)
3545 assign_code many = panic "genCCall.assign_code many"
3547 return (load_args_code `appOL`
3550 assign_eax sse_regs `appOL`
3552 assign_code dest_regs)
3555 arg_size = 8 -- always, at the mo
3557 load_args :: [CmmHinted CmmExpr]
3558 -> [Reg] -- int regs avail for args
3559 -> [Reg] -- FP regs avail for args
3561 -> NatM ([CmmHinted CmmExpr],[Reg],[Reg],InstrBlock)
3562 load_args args [] [] code = return (args, [], [], code)
3563 -- no more regs to use
3564 load_args [] aregs fregs code = return ([], aregs, fregs, code)
3565 -- no more args to push
3566 load_args ((CmmHinted arg hint) : rest) aregs fregs code
3567 | isFloatType arg_rep =
3571 arg_code <- getAnyReg arg
3572 load_args rest aregs rs (code `appOL` arg_code r)
3577 arg_code <- getAnyReg arg
3578 load_args rest rs fregs (code `appOL` arg_code r)
3580 arg_rep = cmmExprType arg
3583 (args',ars,frs,code') <- load_args rest aregs fregs code
3584 return ((CmmHinted arg hint):args', ars, frs, code')
3586 push_args [] code = return code
3587 push_args ((CmmHinted arg hint):rest) code
3588 | isFloatType arg_rep = do
3589 (arg_reg, arg_code) <- getSomeReg arg
3590 delta <- getDeltaNat
3591 setDeltaNat (delta-arg_size)
3592 let code' = code `appOL` arg_code `appOL` toOL [
3593 SUB (intSize wordWidth) (OpImm (ImmInt arg_size)) (OpReg rsp) ,
3594 DELTA (delta-arg_size),
3595 MOV (floatSize width) (OpReg arg_reg) (OpAddr (spRel 0))]
3596 push_args rest code'
3599 -- we only ever generate word-sized function arguments. Promotion
3600 -- has already happened: our Int8# type is kept sign-extended
3601 -- in an Int#, for example.
3602 ASSERT(width == W64) return ()
3603 (arg_op, arg_code) <- getOperand arg
3604 delta <- getDeltaNat
3605 setDeltaNat (delta-arg_size)
3606 let code' = code `appOL` arg_code `appOL` toOL [
3608 DELTA (delta-arg_size)]
3609 push_args rest code'
3611 arg_rep = cmmExprType arg
3612 width = typeWidth arg_rep
3615 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3617 #if sparc_TARGET_ARCH
3619 The SPARC calling convention is an absolute
3620 nightmare. The first 6x32 bits of arguments are mapped into
3621 %o0 through %o5, and the remaining arguments are dumped to the
3622 stack, beginning at [%sp+92]. (Note that %o6 == %sp.)
3624 If we have to put args on the stack, move %o6==%sp down by
3625 the number of words to go on the stack, to ensure there's enough space.
3627 According to Fraser and Hanson's lcc book, page 478, fig 17.2,
3628 16 words above the stack pointer is a word for the address of
3629 a structure return value. I use this as a temporary location
3630 for moving values from float to int regs. Certainly it isn't
3631 safe to put anything in the 16 words starting at %sp, since
3632 this area can get trashed at any time due to window overflows
3633 caused by signal handlers.
3635 A final complication (if the above isn't enough) is that
3636 we can't blithely calculate the arguments one by one into
3637 %o0 .. %o5. Consider the following nested calls:
3641 Naive code moves a into %o0, and (fff b c) into %o1. Unfortunately
3642 the inner call will itself use %o0, which trashes the value put there
3643 in preparation for the outer call. Upshot: we need to calculate the
3644 args into temporary regs, and move those to arg regs or onto the
3645 stack only immediately prior to the call proper. Sigh.
3648 :: CmmCallTarget -- function to call
3649 -> HintedCmmFormals -- where to put the result
3650 -> HintedCmmActuals -- arguments (of mixed type)
3656 -- On SPARC under TSO (Total Store Ordering), writes earlier in the instruction stream
3657 -- are guaranteed to take place before writes afterwards (unlike on PowerPC).
3658 -- Ref: Section 8.4 of the SPARC V9 Architecture manual.
3660 -- In the SPARC case we don't need a barrier.
3662 genCCall (CmmPrim (MO_WriteBarrier)) _ _
3665 genCCall target dest_regs argsAndHints
3667 -- strip hints from the arg regs
3668 let args :: [CmmExpr]
3669 args = map hintlessCmm argsAndHints
3672 -- work out the arguments, and assign them to integer regs
3673 argcode_and_vregs <- mapM arg_to_int_vregs args
3674 let (argcodes, vregss) = unzip argcode_and_vregs
3675 let vregs = concat vregss
3677 let n_argRegs = length allArgRegs
3678 let n_argRegs_used = min (length vregs) n_argRegs
3681 -- deal with static vs dynamic call targets
3682 callinsns <- case target of
3683 CmmCallee (CmmLit (CmmLabel lbl)) conv ->
3684 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3687 -> do (dyn_c, [dyn_r]) <- arg_to_int_vregs expr
3688 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3691 -> do res <- outOfLineFloatOp mop
3692 lblOrMopExpr <- case res of
3694 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3697 (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
3698 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3702 let argcode = concatOL argcodes
3704 let (move_sp_down, move_sp_up)
3705 = let diff = length vregs - n_argRegs
3706 nn = if odd diff then diff + 1 else diff -- keep 8-byte alignment
3709 else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
3712 = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
3716 move_sp_down `appOL`
3717 transfer_code `appOL`
3721 assign_code dest_regs
3724 -- | Generate code to calculate an argument, and move it into one
3725 -- or two integer vregs.
3726 arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
3727 arg_to_int_vregs arg
3729 -- If the expr produces a 64 bit int, then we can just use iselExpr64
3730 | isWord64 (cmmExprType arg)
3731 = do (ChildCode64 code r_lo) <- iselExpr64 arg
3732 let r_hi = getHiVRegFromLo r_lo
3733 return (code, [r_hi, r_lo])
3736 = do (src, code) <- getSomeReg arg
3737 tmp <- getNewRegNat (cmmTypeSize $ cmmExprType arg)
3738 let pk = cmmExprType arg
3740 case cmmTypeSize pk of
3742 -- Load a 64 bit float return value into two integer regs.
3744 v1 <- getNewRegNat II32
3745 v2 <- getNewRegNat II32
3747 let Just f0_high = fPair f0
3751 FMOV FF64 src f0 `snocOL`
3752 ST FF32 f0 (spRel 16) `snocOL`
3753 LD II32 (spRel 16) v1 `snocOL`
3754 ST FF32 f0_high (spRel 16) `snocOL`
3755 LD II32 (spRel 16) v2
3757 return (code2, [v1,v2])
3759 -- Load a 32 bit float return value into an integer reg
3761 v1 <- getNewRegNat II32
3765 ST FF32 src (spRel 16) `snocOL`
3766 LD II32 (spRel 16) v1
3768 return (code2, [v1])
3770 -- Move an integer return value into its destination reg.
3772 v1 <- getNewRegNat II32
3776 OR False g0 (RIReg src) v1
3778 return (code2, [v1])
3781 -- | Move args from the integer vregs into which they have been
3782 -- marshalled, into %o0 .. %o5, and the rest onto the stack.
3784 move_final :: [Reg] -> [Reg] -> Int -> [Instr]
3787 move_final [] _ offset
3790 -- out of aregs; move to stack
3791 move_final (v:vs) [] offset
3792 = ST II32 v (spRel offset)
3793 : move_final vs [] (offset+1)
3795 -- move into an arg (%o[0..5]) reg
3796 move_final (v:vs) (a:az) offset
3797 = OR False g0 (RIReg v) a
3798 : move_final vs az offset
3801 -- | Assign results returned from the call into their
3804 assign_code :: [CmmHinted LocalReg] -> OrdList Instr
3805 assign_code [] = nilOL
3807 assign_code [CmmHinted dest _hint]
3808 = let rep = localRegType dest
3809 width = typeWidth rep
3810 r_dest = getRegisterReg (CmmLocal dest)
3815 = unitOL $ FMOV FF32 (RealReg $ fReg 0) r_dest
3819 = unitOL $ FMOV FF64 (RealReg $ fReg 0) r_dest
3821 | not $ isFloatType rep
3823 = unitOL $ mkRegRegMoveInstr (RealReg $ oReg 0) r_dest
3825 | not $ isFloatType rep
3827 , r_dest_hi <- getHiVRegFromLo r_dest
3828 = toOL [ mkRegRegMoveInstr (RealReg $ oReg 0) r_dest_hi
3829 , mkRegRegMoveInstr (RealReg $ oReg 1) r_dest]
3833 -- | Generate a call to implement an out-of-line floating point operation
3836 -> NatM (Either CLabel CmmExpr)
3838 outOfLineFloatOp mop
3839 = do let functionName
3840 = outOfLineFloatOp_table mop
3842 dflags <- getDynFlagsNat
3843 mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference
3844 $ mkForeignLabel functionName Nothing True
3848 CmmLit (CmmLabel lbl) -> Left lbl
3851 return mopLabelOrExpr
3854 -- | Decide what C function to use to implement a CallishMachOp
3856 outOfLineFloatOp_table
3860 outOfLineFloatOp_table mop
3862 MO_F32_Exp -> fsLit "expf"
3863 MO_F32_Log -> fsLit "logf"
3864 MO_F32_Sqrt -> fsLit "sqrtf"
3865 MO_F32_Pwr -> fsLit "powf"
3867 MO_F32_Sin -> fsLit "sinf"
3868 MO_F32_Cos -> fsLit "cosf"
3869 MO_F32_Tan -> fsLit "tanf"
3871 MO_F32_Asin -> fsLit "asinf"
3872 MO_F32_Acos -> fsLit "acosf"
3873 MO_F32_Atan -> fsLit "atanf"
3875 MO_F32_Sinh -> fsLit "sinhf"
3876 MO_F32_Cosh -> fsLit "coshf"
3877 MO_F32_Tanh -> fsLit "tanhf"
3879 MO_F64_Exp -> fsLit "exp"
3880 MO_F64_Log -> fsLit "log"
3881 MO_F64_Sqrt -> fsLit "sqrt"
3882 MO_F64_Pwr -> fsLit "pow"
3884 MO_F64_Sin -> fsLit "sin"
3885 MO_F64_Cos -> fsLit "cos"
3886 MO_F64_Tan -> fsLit "tan"
3888 MO_F64_Asin -> fsLit "asin"
3889 MO_F64_Acos -> fsLit "acos"
3890 MO_F64_Atan -> fsLit "atan"
3892 MO_F64_Sinh -> fsLit "sinh"
3893 MO_F64_Cosh -> fsLit "cosh"
3894 MO_F64_Tanh -> fsLit "tanh"
3896 other -> pprPanic "outOfLineFloatOp(sparc): Unknown callish mach op "
3897 (pprCallishMachOp mop)
3900 #endif /* sparc_TARGET_ARCH */
3902 #if powerpc_TARGET_ARCH
3904 #if darwin_TARGET_OS || linux_TARGET_OS
3906 The PowerPC calling convention for Darwin/Mac OS X
3907 is described in Apple's document
3908 "Inside Mac OS X - Mach-O Runtime Architecture".
3910 PowerPC Linux uses the System V Release 4 Calling Convention
3911 for PowerPC. It is described in the
3912 "System V Application Binary Interface PowerPC Processor Supplement".
3914 Both conventions are similar:
3915 Parameters may be passed in general-purpose registers starting at r3, in
3916 floating point registers starting at f1, or on the stack.
3918 But there are substantial differences:
3919 * The number of registers used for parameter passing and the exact set of
3920 nonvolatile registers differs (see MachRegs.lhs).
3921 * On Darwin, stack space is always reserved for parameters, even if they are
3922 passed in registers. The called routine may choose to save parameters from
3923 registers to the corresponding space on the stack.
3924 * On Darwin, a corresponding amount of GPRs is skipped when a floating point
3925 parameter is passed in an FPR.
3926 * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
3927 starting with an odd-numbered GPR. It may skip a GPR to achieve this.
3928 Darwin just treats an I64 like two separate II32s (high word first).
3929 * I64 and FF64 arguments are 8-byte aligned on the stack for SysV, but only
3930 4-byte aligned like everything else on Darwin.
3931 * The SysV spec claims that FF32 is represented as FF64 on the stack. GCC on
3932 PowerPC Linux does not agree, so neither do we.
3934 According to both conventions, The parameter area should be part of the
3935 caller's stack frame, allocated in the caller's prologue code (large enough
3936 to hold the parameter lists for all called routines). The NCG already
3937 uses the stack for register spilling, leaving 64 bytes free at the top.
3938 If we need a larger parameter area than that, we just allocate a new stack
3939 frame just before ccalling.
3943 genCCall (CmmPrim MO_WriteBarrier) _ _
3944 = return $ unitOL LWSYNC
3946 genCCall target dest_regs argsAndHints
3947 = ASSERT (not $ any (`elem` [II8,II16]) $ map cmmTypeSize argReps)
3948 -- we rely on argument promotion in the codeGen
3950 (finalStack,passArgumentsCode,usedRegs) <- passArguments
3952 allArgRegs allFPArgRegs
3956 (labelOrExpr, reduceToFF32) <- case target of
3957 CmmCallee (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
3958 CmmCallee expr conv -> return (Right expr, False)
3959 CmmPrim mop -> outOfLineFloatOp mop
3961 let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
3962 codeAfter = move_sp_up finalStack `appOL` moveResult reduceToFF32
3967 `snocOL` BL lbl usedRegs
3970 (dynReg, dynCode) <- getSomeReg dyn
3972 `snocOL` MTCTR dynReg
3974 `snocOL` BCTRL usedRegs
3977 #if darwin_TARGET_OS
3978 initialStackOffset = 24
3979 -- size of linkage area + size of arguments, in bytes
3980 stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
3981 map (widthInBytes . typeWidth) argReps
3982 #elif linux_TARGET_OS
3983 initialStackOffset = 8
3984 stackDelta finalStack = roundTo 16 finalStack
3986 args = map hintlessCmm argsAndHints
3987 argReps = map cmmExprType args
3989 roundTo a x | x `mod` a == 0 = x
3990 | otherwise = x + a - (x `mod` a)
3992 move_sp_down finalStack
3994 toOL [STU II32 sp (AddrRegImm sp (ImmInt (-delta))),
3997 where delta = stackDelta finalStack
3998 move_sp_up finalStack
4000 toOL [ADD sp sp (RIImm (ImmInt delta)),
4003 where delta = stackDelta finalStack
4006 passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
4007 passArguments ((arg,arg_ty):args) gprs fprs stackOffset
4008 accumCode accumUsed | isWord64 arg_ty =
4010 ChildCode64 code vr_lo <- iselExpr64 arg
4011 let vr_hi = getHiVRegFromLo vr_lo
4013 #if darwin_TARGET_OS
4018 (accumCode `appOL` code
4019 `snocOL` storeWord vr_hi gprs stackOffset
4020 `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
4021 ((take 2 gprs) ++ accumUsed)
4023 storeWord vr (gpr:_) offset = MR gpr vr
4024 storeWord vr [] offset = ST II32 vr (AddrRegImm sp (ImmInt offset))
4026 #elif linux_TARGET_OS
4027 let stackOffset' = roundTo 8 stackOffset
4028 stackCode = accumCode `appOL` code
4029 `snocOL` ST II32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
4030 `snocOL` ST II32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
4031 regCode hireg loreg =
4032 accumCode `appOL` code
4033 `snocOL` MR hireg vr_hi
4034 `snocOL` MR loreg vr_lo
4037 hireg : loreg : regs | even (length gprs) ->
4038 passArguments args regs fprs stackOffset
4039 (regCode hireg loreg) (hireg : loreg : accumUsed)
4040 _skipped : hireg : loreg : regs ->
4041 passArguments args regs fprs stackOffset
4042 (regCode hireg loreg) (hireg : loreg : accumUsed)
4043 _ -> -- only one or no regs left
4044 passArguments args [] fprs (stackOffset'+8)
4048 passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
4049 | reg : _ <- regs = do
4050 register <- getRegister arg
4051 let code = case register of
4052 Fixed _ freg fcode -> fcode `snocOL` MR reg freg
4053 Any _ acode -> acode reg
4057 #if darwin_TARGET_OS
4058 -- The Darwin ABI requires that we reserve stack slots for register parameters
4059 (stackOffset + stackBytes)
4060 #elif linux_TARGET_OS
4061 -- ... the SysV ABI doesn't.
4064 (accumCode `appOL` code)
4067 (vr, code) <- getSomeReg arg
4071 (stackOffset' + stackBytes)
4072 (accumCode `appOL` code `snocOL` ST (cmmTypeSize rep) vr stackSlot)
4075 #if darwin_TARGET_OS
4076 -- stackOffset is at least 4-byte aligned
4077 -- The Darwin ABI is happy with that.
4078 stackOffset' = stackOffset
4080 -- ... the SysV ABI requires 8-byte alignment for doubles.
4081 stackOffset' | isFloatType rep && typeWidth rep == W64 =
4082 roundTo 8 stackOffset
4083 | otherwise = stackOffset
4085 stackSlot = AddrRegImm sp (ImmInt stackOffset')
4086 (nGprs, nFprs, stackBytes, regs) = case cmmTypeSize rep of
4087 II32 -> (1, 0, 4, gprs)
4088 #if darwin_TARGET_OS
4089 -- The Darwin ABI requires that we skip a corresponding number of GPRs when
4091 FF32 -> (1, 1, 4, fprs)
4092 FF64 -> (2, 1, 8, fprs)
4093 #elif linux_TARGET_OS
4094 -- ... the SysV ABI doesn't.
4095 FF32 -> (0, 1, 4, fprs)
4096 FF64 -> (0, 1, 8, fprs)
4099 moveResult reduceToFF32 =
4102 [CmmHinted dest _hint]
4103 | reduceToFF32 && isFloat32 rep -> unitOL (FRSP r_dest f1)
4104 | isFloat32 rep || isFloat64 rep -> unitOL (MR r_dest f1)
4105 | isWord64 rep -> toOL [MR (getHiVRegFromLo r_dest) r3,
4107 | otherwise -> unitOL (MR r_dest r3)
4108 where rep = cmmRegType (CmmLocal dest)
4109 r_dest = getRegisterReg (CmmLocal dest)
4111 outOfLineFloatOp mop =
4113 dflags <- getDynFlagsNat
4114 mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference $
4115 mkForeignLabel functionName Nothing True
4116 let mopLabelOrExpr = case mopExpr of
4117 CmmLit (CmmLabel lbl) -> Left lbl
4119 return (mopLabelOrExpr, reduce)
4121 (functionName, reduce) = case mop of
4122 MO_F32_Exp -> (fsLit "exp", True)
4123 MO_F32_Log -> (fsLit "log", True)
4124 MO_F32_Sqrt -> (fsLit "sqrt", True)
4126 MO_F32_Sin -> (fsLit "sin", True)
4127 MO_F32_Cos -> (fsLit "cos", True)
4128 MO_F32_Tan -> (fsLit "tan", True)
4130 MO_F32_Asin -> (fsLit "asin", True)
4131 MO_F32_Acos -> (fsLit "acos", True)
4132 MO_F32_Atan -> (fsLit "atan", True)
4134 MO_F32_Sinh -> (fsLit "sinh", True)
4135 MO_F32_Cosh -> (fsLit "cosh", True)
4136 MO_F32_Tanh -> (fsLit "tanh", True)
4137 MO_F32_Pwr -> (fsLit "pow", True)
4139 MO_F64_Exp -> (fsLit "exp", False)
4140 MO_F64_Log -> (fsLit "log", False)
4141 MO_F64_Sqrt -> (fsLit "sqrt", False)
4143 MO_F64_Sin -> (fsLit "sin", False)
4144 MO_F64_Cos -> (fsLit "cos", False)
4145 MO_F64_Tan -> (fsLit "tan", False)
4147 MO_F64_Asin -> (fsLit "asin", False)
4148 MO_F64_Acos -> (fsLit "acos", False)
4149 MO_F64_Atan -> (fsLit "atan", False)
4151 MO_F64_Sinh -> (fsLit "sinh", False)
4152 MO_F64_Cosh -> (fsLit "cosh", False)
4153 MO_F64_Tanh -> (fsLit "tanh", False)
4154 MO_F64_Pwr -> (fsLit "pow", False)
4155 other -> pprPanic "genCCall(ppc): unknown callish op"
4156 (pprCallishMachOp other)
4158 #endif /* darwin_TARGET_OS || linux_TARGET_OS */
4160 #endif /* powerpc_TARGET_ARCH */
4163 -- -----------------------------------------------------------------------------
4164 -- Generating a table-branch
4166 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
4168 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4172 (reg,e_code) <- getSomeReg expr
4173 lbl <- getNewLabelNat
4174 dflags <- getDynFlagsNat
4175 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
4176 (tableReg,t_code) <- getSomeReg $ dynRef
4178 jumpTable = map jumpTableEntryRel ids
4180 jumpTableEntryRel Nothing
4181 = CmmStaticLit (CmmInt 0 wordWidth)
4182 jumpTableEntryRel (Just (BlockId id))
4183 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
4184 where blockLabel = mkAsmTempLabel id
4186 op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
4187 (EAIndex reg wORD_SIZE) (ImmInt 0))
4189 #if x86_64_TARGET_ARCH
4190 #if darwin_TARGET_OS
4191 -- on Mac OS X/x86_64, put the jump table in the text section
4192 -- to work around a limitation of the linker.
4193 -- ld64 is unable to handle the relocations for
4195 -- if L0 is not preceded by a non-anonymous label in its section.
4197 code = e_code `appOL` t_code `appOL` toOL [
4198 ADD (intSize wordWidth) op (OpReg tableReg),
4199 JMP_TBL (OpReg tableReg) [ id | Just id <- ids ],
4200 LDATA Text (CmmDataLabel lbl : jumpTable)
4203 -- HACK: On x86_64 binutils<2.17 is only able to generate PC32
4204 -- relocations, hence we only get 32-bit offsets in the jump
4205 -- table. As these offsets are always negative we need to properly
4206 -- sign extend them to 64-bit. This hack should be removed in
4207 -- conjunction with the hack in PprMach.hs/pprDataItem once
4208 -- binutils 2.17 is standard.
4209 code = e_code `appOL` t_code `appOL` toOL [
4210 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
4212 (OpAddr (AddrBaseIndex (EABaseReg tableReg)
4213 (EAIndex reg wORD_SIZE) (ImmInt 0)))
4215 ADD (intSize wordWidth) (OpReg reg) (OpReg tableReg),
4216 JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
4220 code = e_code `appOL` t_code `appOL` toOL [
4221 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
4222 ADD (intSize wordWidth) op (OpReg tableReg),
4223 JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
4229 (reg,e_code) <- getSomeReg expr
4230 lbl <- getNewLabelNat
4232 jumpTable = map jumpTableEntry ids
4233 op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
4234 code = e_code `appOL` toOL [
4235 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
4236 JMP_TBL op [ id | Just id <- ids ]
4240 #elif powerpc_TARGET_ARCH
4244 (reg,e_code) <- getSomeReg expr
4245 tmp <- getNewRegNat II32
4246 lbl <- getNewLabelNat
4247 dflags <- getDynFlagsNat
4248 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
4249 (tableReg,t_code) <- getSomeReg $ dynRef
4251 jumpTable = map jumpTableEntryRel ids
4253 jumpTableEntryRel Nothing
4254 = CmmStaticLit (CmmInt 0 wordWidth)
4255 jumpTableEntryRel (Just (BlockId id))
4256 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
4257 where blockLabel = mkAsmTempLabel id
4259 code = e_code `appOL` t_code `appOL` toOL [
4260 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
4261 SLW tmp reg (RIImm (ImmInt 2)),
4262 LD II32 tmp (AddrRegReg tableReg tmp),
4263 ADD tmp tmp (RIReg tableReg),
4265 BCTR [ id | Just id <- ids ]
4270 (reg,e_code) <- getSomeReg expr
4271 tmp <- getNewRegNat II32
4272 lbl <- getNewLabelNat
4274 jumpTable = map jumpTableEntry ids
4276 code = e_code `appOL` toOL [
4277 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
4278 SLW tmp reg (RIImm (ImmInt 2)),
4279 ADDIS tmp tmp (HA (ImmCLbl lbl)),
4280 LD II32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
4282 BCTR [ id | Just id <- ids ]
4285 #elif sparc_TARGET_ARCH
4288 = error "MachCodeGen: sparc genSwitch PIC not finished\n"
4291 = do (e_reg, e_code) <- getSomeReg expr
4293 base_reg <- getNewRegNat II32
4294 offset_reg <- getNewRegNat II32
4295 dst <- getNewRegNat II32
4297 label <- getNewLabelNat
4298 let jumpTable = map jumpTableEntry ids
4300 return $ e_code `appOL`
4303 [ LDATA ReadOnlyData (CmmDataLabel label : jumpTable)
4305 -- load base of jump table
4306 , SETHI (HI (ImmCLbl label)) base_reg
4307 , OR False base_reg (RIImm $ LO $ ImmCLbl label) base_reg
4309 -- the addrs in the table are 32 bits wide..
4310 , SLL e_reg (RIImm $ ImmInt 2) offset_reg
4312 -- load and jump to the destination
4313 , LD II32 (AddrRegReg base_reg offset_reg) dst
4314 , JMP_TBL (AddrRegImm dst (ImmInt 0)) [i | Just i <- ids]
4318 #error "ToDo: genSwitch"
4322 -- | Convert a BlockId to some CmmStatic data
4323 jumpTableEntry :: Maybe BlockId -> CmmStatic
4324 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordWidth)
4325 jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
4326 where blockLabel = mkAsmTempLabel id
4328 -- -----------------------------------------------------------------------------
4330 -- -----------------------------------------------------------------------------
4333 -- -----------------------------------------------------------------------------
4334 -- 'condIntReg' and 'condFltReg': condition codes into registers
4336 -- Turn those condition codes into integers now (when they appear on
4337 -- the right hand side of an assignment).
4339 -- (If applicable) Do not fill the delay slots here; you will confuse the
4340 -- register allocator.
4342 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
4344 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4346 #if alpha_TARGET_ARCH
4347 condIntReg = panic "MachCode.condIntReg (not on Alpha)"
4348 condFltReg = panic "MachCode.condFltReg (not on Alpha)"
4349 #endif /* alpha_TARGET_ARCH */
4351 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4353 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4355 condIntReg cond x y = do
4356 CondCode _ cond cond_code <- condIntCode cond x y
4357 tmp <- getNewRegNat II8
4359 code dst = cond_code `appOL` toOL [
4360 SETCC cond (OpReg tmp),
4361 MOVZxL II8 (OpReg tmp) (OpReg dst)
4364 return (Any II32 code)
4368 #if i386_TARGET_ARCH
4370 condFltReg cond x y = do
4371 CondCode _ cond cond_code <- condFltCode cond x y
4372 tmp <- getNewRegNat II8
4374 code dst = cond_code `appOL` toOL [
4375 SETCC cond (OpReg tmp),
4376 MOVZxL II8 (OpReg tmp) (OpReg dst)
4379 return (Any II32 code)
4383 #if x86_64_TARGET_ARCH
4385 condFltReg cond x y = do
4386 CondCode _ cond cond_code <- condFltCode cond x y
4387 tmp1 <- getNewRegNat wordSize
4388 tmp2 <- getNewRegNat wordSize
4390 -- We have to worry about unordered operands (eg. comparisons
4391 -- against NaN). If the operands are unordered, the comparison
4392 -- sets the parity flag, carry flag and zero flag.
4393 -- All comparisons are supposed to return false for unordered
4394 -- operands except for !=, which returns true.
4396 -- Optimisation: we don't have to test the parity flag if we
4397 -- know the test has already excluded the unordered case: eg >
4398 -- and >= test for a zero carry flag, which can only occur for
4399 -- ordered operands.
4401 -- ToDo: by reversing comparisons we could avoid testing the
4402 -- parity flag in more cases.
4407 NE -> or_unordered dst
4408 GU -> plain_test dst
4409 GEU -> plain_test dst
4410 _ -> and_ordered dst)
4412 plain_test dst = toOL [
4413 SETCC cond (OpReg tmp1),
4414 MOVZxL II8 (OpReg tmp1) (OpReg dst)
4416 or_unordered dst = toOL [
4417 SETCC cond (OpReg tmp1),
4418 SETCC PARITY (OpReg tmp2),
4419 OR II8 (OpReg tmp1) (OpReg tmp2),
4420 MOVZxL II8 (OpReg tmp2) (OpReg dst)
4422 and_ordered dst = toOL [
4423 SETCC cond (OpReg tmp1),
4424 SETCC NOTPARITY (OpReg tmp2),
4425 AND II8 (OpReg tmp1) (OpReg tmp2),
4426 MOVZxL II8 (OpReg tmp2) (OpReg dst)
4429 return (Any II32 code)
4433 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4435 #if sparc_TARGET_ARCH
4437 condIntReg EQQ x (CmmLit (CmmInt 0 d)) = do
4438 (src, code) <- getSomeReg x
4439 tmp <- getNewRegNat II32
4441 code__2 dst = code `appOL` toOL [
4442 SUB False True g0 (RIReg src) g0,
4443 SUB True False g0 (RIImm (ImmInt (-1))) dst]
4444 return (Any II32 code__2)
4446 condIntReg EQQ x y = do
4447 (src1, code1) <- getSomeReg x
4448 (src2, code2) <- getSomeReg y
4449 tmp1 <- getNewRegNat II32
4450 tmp2 <- getNewRegNat II32
4452 code__2 dst = code1 `appOL` code2 `appOL` toOL [
4453 XOR False src1 (RIReg src2) dst,
4454 SUB False True g0 (RIReg dst) g0,
4455 SUB True False g0 (RIImm (ImmInt (-1))) dst]
4456 return (Any II32 code__2)
4458 condIntReg NE x (CmmLit (CmmInt 0 d)) = do
4459 (src, code) <- getSomeReg x
4460 tmp <- getNewRegNat II32
4462 code__2 dst = code `appOL` toOL [
4463 SUB False True g0 (RIReg src) g0,
4464 ADD True False g0 (RIImm (ImmInt 0)) dst]
4465 return (Any II32 code__2)
4467 condIntReg NE x y = do
4468 (src1, code1) <- getSomeReg x
4469 (src2, code2) <- getSomeReg y
4470 tmp1 <- getNewRegNat II32
4471 tmp2 <- getNewRegNat II32
4473 code__2 dst = code1 `appOL` code2 `appOL` toOL [
4474 XOR False src1 (RIReg src2) dst,
4475 SUB False True g0 (RIReg dst) g0,
4476 ADD True False g0 (RIImm (ImmInt 0)) dst]
4477 return (Any II32 code__2)
4479 condIntReg cond x y = do
4480 bid1@(BlockId lbl1) <- getBlockIdNat
4481 bid2@(BlockId lbl2) <- getBlockIdNat
4482 CondCode _ cond cond_code <- condIntCode cond x y
4484 code__2 dst = cond_code `appOL` toOL [
4485 BI cond False bid1, NOP,
4486 OR False g0 (RIImm (ImmInt 0)) dst,
4487 BI ALWAYS False bid2, NOP,
4489 OR False g0 (RIImm (ImmInt 1)) dst,
4491 return (Any II32 code__2)
4493 condFltReg cond x y = do
4494 bid1@(BlockId lbl1) <- getBlockIdNat
4495 bid2@(BlockId lbl2) <- getBlockIdNat
4496 CondCode _ cond cond_code <- condFltCode cond x y
4498 code__2 dst = cond_code `appOL` toOL [
4500 BF cond False bid1, NOP,
4501 OR False g0 (RIImm (ImmInt 0)) dst,
4502 BI ALWAYS False bid2, NOP,
4504 OR False g0 (RIImm (ImmInt 1)) dst,
4506 return (Any II32 code__2)
4508 #endif /* sparc_TARGET_ARCH */
4510 #if powerpc_TARGET_ARCH
4511 condReg getCond = do
4512 lbl1 <- getBlockIdNat
4513 lbl2 <- getBlockIdNat
4514 CondCode _ cond cond_code <- getCond
4516 {- code dst = cond_code `appOL` toOL [
4525 code dst = cond_code
4529 RLWINM dst dst (bit + 1) 31 31
4532 negate_code | do_negate = unitOL (CRNOR bit bit bit)
4535 (bit, do_negate) = case cond of
4549 return (Any II32 code)
4551 condIntReg cond x y = condReg (condIntCode cond x y)
4552 condFltReg cond x y = condReg (condFltCode cond x y)
4553 #endif /* powerpc_TARGET_ARCH */
4556 -- -----------------------------------------------------------------------------
4557 -- 'trivial*Code': deal with trivial instructions
4559 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
4560 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
4561 -- Only look for constants on the right hand side, because that's
4562 -- where the generic optimizer will have put them.
4564 -- Similarly, for unary instructions, we don't have to worry about
4565 -- matching an StInt as the argument, because genericOpt will already
4566 -- have handled the constant-folding.
4569 :: Width -- Int only
4570 -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
4571 ,IF_ARCH_i386 ((Operand -> Operand -> Instr)
4572 -> Maybe (Operand -> Operand -> Instr)
4573 ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr)
4574 -> Maybe (Operand -> Operand -> Instr)
4575 ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
4576 ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
4578 -> CmmExpr -> CmmExpr -- the two arguments
4581 #ifndef powerpc_TARGET_ARCH
4583 :: Width -- Floating point only
4584 -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
4585 ,IF_ARCH_sparc((Size -> Reg -> Reg -> Reg -> Instr)
4586 ,IF_ARCH_i386 ((Size -> Reg -> Reg -> Reg -> Instr)
4587 ,IF_ARCH_x86_64 ((Size -> Operand -> Operand -> Instr)
4589 -> CmmExpr -> CmmExpr -- the two arguments
4595 -> IF_ARCH_alpha((RI -> Reg -> Instr)
4596 ,IF_ARCH_i386 ((Operand -> Instr)
4597 ,IF_ARCH_x86_64 ((Operand -> Instr)
4598 ,IF_ARCH_sparc((RI -> Reg -> Instr)
4599 ,IF_ARCH_powerpc((Reg -> Reg -> Instr)
4601 -> CmmExpr -- the one argument
4604 #ifndef powerpc_TARGET_ARCH
4607 -> IF_ARCH_alpha((Reg -> Reg -> Instr)
4608 ,IF_ARCH_i386 ((Reg -> Reg -> Instr)
4609 ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr)
4610 ,IF_ARCH_sparc((Reg -> Reg -> Instr)
4612 -> CmmExpr -- the one argument
4616 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4618 #if alpha_TARGET_ARCH
4620 trivialCode instr x (StInt y)
4622 = getRegister x `thenNat` \ register ->
4623 getNewRegNat IntRep `thenNat` \ tmp ->
4625 code = registerCode register tmp
4626 src1 = registerName register tmp
4627 src2 = ImmInt (fromInteger y)
4628 code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst)
4630 return (Any IntRep code__2)
4632 trivialCode instr x y
4633 = getRegister x `thenNat` \ register1 ->
4634 getRegister y `thenNat` \ register2 ->
4635 getNewRegNat IntRep `thenNat` \ tmp1 ->
4636 getNewRegNat IntRep `thenNat` \ tmp2 ->
4638 code1 = registerCode register1 tmp1 []
4639 src1 = registerName register1 tmp1
4640 code2 = registerCode register2 tmp2 []
4641 src2 = registerName register2 tmp2
4642 code__2 dst = asmSeqThen [code1, code2] .
4643 mkSeqInstr (instr src1 (RIReg src2) dst)
4645 return (Any IntRep code__2)
4648 trivialUCode instr x
4649 = getRegister x `thenNat` \ register ->
4650 getNewRegNat IntRep `thenNat` \ tmp ->
4652 code = registerCode register tmp
4653 src = registerName register tmp
4654 code__2 dst = code . mkSeqInstr (instr (RIReg src) dst)
4656 return (Any IntRep code__2)
4659 trivialFCode _ instr x y
4660 = getRegister x `thenNat` \ register1 ->
4661 getRegister y `thenNat` \ register2 ->
4662 getNewRegNat FF64 `thenNat` \ tmp1 ->
4663 getNewRegNat FF64 `thenNat` \ tmp2 ->
4665 code1 = registerCode register1 tmp1
4666 src1 = registerName register1 tmp1
4668 code2 = registerCode register2 tmp2
4669 src2 = registerName register2 tmp2
4671 code__2 dst = asmSeqThen [code1 [], code2 []] .
4672 mkSeqInstr (instr src1 src2 dst)
4674 return (Any FF64 code__2)
4676 trivialUFCode _ instr x
4677 = getRegister x `thenNat` \ register ->
4678 getNewRegNat FF64 `thenNat` \ tmp ->
4680 code = registerCode register tmp
4681 src = registerName register tmp
4682 code__2 dst = code . mkSeqInstr (instr src dst)
4684 return (Any FF64 code__2)
4686 #endif /* alpha_TARGET_ARCH */
4688 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4690 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4693 The Rules of the Game are:
4695 * You cannot assume anything about the destination register dst;
4696 it may be anything, including a fixed reg.
4698 * You may compute an operand into a fixed reg, but you may not
4699 subsequently change the contents of that fixed reg. If you
4700 want to do so, first copy the value either to a temporary
4701 or into dst. You are free to modify dst even if it happens
4702 to be a fixed reg -- that's not your problem.
4704 * You cannot assume that a fixed reg will stay live over an
4705 arbitrary computation. The same applies to the dst reg.
4707 * Temporary regs obtained from getNewRegNat are distinct from
4708 each other and from all other regs, and stay live over
4709 arbitrary computations.
4711 --------------------
4713 SDM's version of The Rules:
4715 * If getRegister returns Any, that means it can generate correct
4716 code which places the result in any register, period. Even if that
4717 register happens to be read during the computation.
4719 Corollary #1: this means that if you are generating code for an
4720 operation with two arbitrary operands, you cannot assign the result
4721 of the first operand into the destination register before computing
4722 the second operand. The second operand might require the old value
4723 of the destination register.
4725 Corollary #2: A function might be able to generate more efficient
4726 code if it knows the destination register is a new temporary (and
4727 therefore not read by any of the sub-computations).
4729 * If getRegister returns Any, then the code it generates may modify only:
4730 (a) fresh temporaries
4731 (b) the destination register
4732 (c) known registers (eg. %ecx is used by shifts)
4733 In particular, it may *not* modify global registers, unless the global
4734 register happens to be the destination register.
4737 trivialCode width instr (Just revinstr) (CmmLit lit_a) b
4738 | is32BitLit lit_a = do
4739 b_code <- getAnyReg b
4742 = b_code dst `snocOL`
4743 revinstr (OpImm (litToImm lit_a)) (OpReg dst)
4745 return (Any (intSize width) code)
4747 trivialCode width instr maybe_revinstr a b
4748 = genTrivialCode (intSize width) instr a b
4750 -- This is re-used for floating pt instructions too.
4751 genTrivialCode rep instr a b = do
4752 (b_op, b_code) <- getNonClobberedOperand b
4753 a_code <- getAnyReg a
4754 tmp <- getNewRegNat rep
4756 -- We want the value of b to stay alive across the computation of a.
4757 -- But, we want to calculate a straight into the destination register,
4758 -- because the instruction only has two operands (dst := dst `op` src).
4759 -- The troublesome case is when the result of b is in the same register
4760 -- as the destination reg. In this case, we have to save b in a
4761 -- new temporary across the computation of a.
4763 | dst `regClashesWithOp` b_op =
4765 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
4767 instr (OpReg tmp) (OpReg dst)
4771 instr b_op (OpReg dst)
4773 return (Any rep code)
4775 reg `regClashesWithOp` OpReg reg2 = reg == reg2
4776 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
4777 reg `regClashesWithOp` _ = False
4781 trivialUCode rep instr x = do
4782 x_code <- getAnyReg x
4787 return (Any rep code)
4791 #if i386_TARGET_ARCH
4793 trivialFCode width instr x y = do
4794 (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
4795 (y_reg, y_code) <- getSomeReg y
4797 size = floatSize width
4801 instr size x_reg y_reg dst
4802 return (Any size code)
4806 #if x86_64_TARGET_ARCH
4807 trivialFCode pk instr x y
4808 = genTrivialCode size (instr size) x y
4809 where size = floatSize pk
4814 trivialUFCode size instr x = do
4815 (x_reg, x_code) <- getSomeReg x
4821 return (Any size code)
4823 #endif /* i386_TARGET_ARCH */
4825 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4827 #if sparc_TARGET_ARCH
4829 trivialCode pk instr x (CmmLit (CmmInt y d))
4832 (src1, code) <- getSomeReg x
4833 tmp <- getNewRegNat II32
4835 src2 = ImmInt (fromInteger y)
4836 code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
4837 return (Any II32 code__2)
4839 trivialCode pk instr x y = do
4840 (src1, code1) <- getSomeReg x
4841 (src2, code2) <- getSomeReg y
4842 tmp1 <- getNewRegNat II32
4843 tmp2 <- getNewRegNat II32
4845 code__2 dst = code1 `appOL` code2 `snocOL`
4846 instr src1 (RIReg src2) dst
4847 return (Any II32 code__2)
4850 trivialFCode pk instr x y = do
4851 (src1, code1) <- getSomeReg x
4852 (src2, code2) <- getSomeReg y
4853 tmp1 <- getNewRegNat (cmmTypeSize $ cmmExprType x)
4854 tmp2 <- getNewRegNat (cmmTypeSize $ cmmExprType y)
4855 tmp <- getNewRegNat FF64
4857 promote x = FxTOy FF32 FF64 x tmp
4863 if pk1 `cmmEqType` pk2 then
4864 code1 `appOL` code2 `snocOL`
4865 instr (floatSize pk) src1 src2 dst
4866 else if typeWidth pk1 == W32 then
4867 code1 `snocOL` promote src1 `appOL` code2 `snocOL`
4868 instr FF64 tmp src2 dst
4870 code1 `appOL` code2 `snocOL` promote src2 `snocOL`
4871 instr FF64 src1 tmp dst
4872 return (Any (cmmTypeSize $ if pk1 `cmmEqType` pk2 then pk1 else cmmFloat W64)
4876 trivialUCode size instr x = do
4877 (src, code) <- getSomeReg x
4878 tmp <- getNewRegNat size
4880 code__2 dst = code `snocOL` instr (RIReg src) dst
4881 return (Any size code__2)
4884 trivialUFCode pk instr x = do
4885 (src, code) <- getSomeReg x
4886 tmp <- getNewRegNat pk
4888 code__2 dst = code `snocOL` instr src dst
4889 return (Any pk code__2)
4891 #endif /* sparc_TARGET_ARCH */
4893 #if powerpc_TARGET_ARCH
4896 Wolfgang's PowerPC version of The Rules:
4898 A slightly modified version of The Rules to take advantage of the fact
4899 that PowerPC instructions work on all registers and don't implicitly
4900 clobber any fixed registers.
4902 * The only expression for which getRegister returns Fixed is (CmmReg reg).
4904 * If getRegister returns Any, then the code it generates may modify only:
4905 (a) fresh temporaries
4906 (b) the destination register
4907 It may *not* modify global registers, unless the global
4908 register happens to be the destination register.
4909 It may not clobber any other registers. In fact, only ccalls clobber any
4911 Also, it may not modify the counter register (used by genCCall).
4913 Corollary: If a getRegister for a subexpression returns Fixed, you need
4914 not move it to a fresh temporary before evaluating the next subexpression.
4915 The Fixed register won't be modified.
4916 Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
4918 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
4919 the value of the destination register.
4922 trivialCode rep signed instr x (CmmLit (CmmInt y _))
4923 | Just imm <- makeImmediate rep signed y
4925 (src1, code1) <- getSomeReg x
4926 let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
4927 return (Any (intSize rep) code)
4929 trivialCode rep signed instr x y = do
4930 (src1, code1) <- getSomeReg x
4931 (src2, code2) <- getSomeReg y
4932 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
4933 return (Any (intSize rep) code)
4935 trivialCodeNoImm' :: Size -> (Reg -> Reg -> Reg -> Instr)
4936 -> CmmExpr -> CmmExpr -> NatM Register
4937 trivialCodeNoImm' size instr x y = do
4938 (src1, code1) <- getSomeReg x
4939 (src2, code2) <- getSomeReg y
4940 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
4941 return (Any size code)
4943 trivialCodeNoImm :: Size -> (Size -> Reg -> Reg -> Reg -> Instr)
4944 -> CmmExpr -> CmmExpr -> NatM Register
4945 trivialCodeNoImm size instr x y = trivialCodeNoImm' size (instr size) x y
4947 trivialUCode rep instr x = do
4948 (src, code) <- getSomeReg x
4949 let code' dst = code `snocOL` instr dst src
4950 return (Any rep code')
4952 -- There is no "remainder" instruction on the PPC, so we have to do
4954 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
4956 remainderCode :: Width -> (Reg -> Reg -> Reg -> Instr)
4957 -> CmmExpr -> CmmExpr -> NatM Register
4958 remainderCode rep div x y = do
4959 (src1, code1) <- getSomeReg x
4960 (src2, code2) <- getSomeReg y
4961 let code dst = code1 `appOL` code2 `appOL` toOL [
4963 MULLW dst dst (RIReg src2),
4966 return (Any (intSize rep) code)
4968 #endif /* powerpc_TARGET_ARCH */
4971 -- -----------------------------------------------------------------------------
4972 -- Coercing to/from integer/floating-point...
4974 -- When going to integer, we truncate (round towards 0).
4976 -- @coerce(Int2FP|FP2Int)@ are more complicated integer/float
4977 -- conversions. We have to store temporaries in memory to move
4978 -- between the integer and the floating point register sets.
4980 -- @coerceDbl2Flt@ and @coerceFlt2Dbl@ are done this way because we
4981 -- pretend, on sparc at least, that double and float regs are seperate
4982 -- kinds, so the value has to be computed into one kind before being
4983 -- explicitly "converted" to live in the other kind.
4985 coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
4986 coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
4988 #if sparc_TARGET_ARCH
4989 coerceDbl2Flt :: CmmExpr -> NatM Register
4990 coerceFlt2Dbl :: CmmExpr -> NatM Register
4993 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4995 #if alpha_TARGET_ARCH
4998 = getRegister x `thenNat` \ register ->
4999 getNewRegNat IntRep `thenNat` \ reg ->
5001 code = registerCode register reg
5002 src = registerName register reg
5004 code__2 dst = code . mkSeqInstrs [
5006 LD TF dst (spRel 0),
5009 return (Any FF64 code__2)
5013 = getRegister x `thenNat` \ register ->
5014 getNewRegNat FF64 `thenNat` \ tmp ->
5016 code = registerCode register tmp
5017 src = registerName register tmp
5019 code__2 dst = code . mkSeqInstrs [
5021 ST TF tmp (spRel 0),
5024 return (Any IntRep code__2)
5026 #endif /* alpha_TARGET_ARCH */
5028 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
5030 #if i386_TARGET_ARCH
5032 coerceInt2FP from to x = do
5033 (x_reg, x_code) <- getSomeReg x
5035 opc = case to of W32 -> GITOF; W64 -> GITOD
5036 code dst = x_code `snocOL` opc x_reg dst
5037 -- ToDo: works for non-II32 reps?
5038 return (Any (floatSize to) code)
5042 coerceFP2Int from to x = do
5043 (x_reg, x_code) <- getSomeReg x
5045 opc = case from of W32 -> GFTOI; W64 -> GDTOI
5046 code dst = x_code `snocOL` opc x_reg dst
5047 -- ToDo: works for non-II32 reps?
5049 return (Any (intSize to) code)
5051 #endif /* i386_TARGET_ARCH */
5053 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
5055 #if x86_64_TARGET_ARCH
5057 coerceFP2Int from to x = do
5058 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
5060 opc = case from of W32 -> CVTTSS2SIQ; W64 -> CVTTSD2SIQ
5061 code dst = x_code `snocOL` opc x_op dst
5063 return (Any (intSize to) code) -- works even if the destination rep is <II32
5065 coerceInt2FP from to x = do
5066 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
5068 opc = case to of W32 -> CVTSI2SS; W64 -> CVTSI2SD
5069 code dst = x_code `snocOL` opc x_op dst
5071 return (Any (floatSize to) code) -- works even if the destination rep is <II32
5073 coerceFP2FP :: Width -> CmmExpr -> NatM Register
5074 coerceFP2FP to x = do
5075 (x_reg, x_code) <- getSomeReg x
5077 opc = case to of W32 -> CVTSD2SS; W64 -> CVTSS2SD
5078 code dst = x_code `snocOL` opc x_reg dst
5080 return (Any (floatSize to) code)
5083 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
5085 #if sparc_TARGET_ARCH
5087 coerceInt2FP width1 width2 x = do
5088 (src, code) <- getSomeReg x
5090 code__2 dst = code `appOL` toOL [
5091 ST (intSize width1) src (spRel (-2)),
5092 LD (intSize width1) (spRel (-2)) dst,
5093 FxTOy (intSize width1) (floatSize width2) dst dst]
5094 return (Any (floatSize $ width2) code__2)
5097 -- | Coerce a floating point value to integer
5099 -- NOTE: On sparc v9 there are no instructions to move a value from an
5100 -- FP register directly to an int register, so we have to use a load/store.
5102 coerceFP2Int width1 width2 x
5103 = do let fsize1 = floatSize width1
5104 fsize2 = floatSize width2
5106 isize2 = intSize width2
5108 (fsrc, code) <- getSomeReg x
5109 fdst <- getNewRegNat fsize2
5114 -- convert float to int format, leaving it in a float reg.
5115 [ FxTOy fsize1 isize2 fsrc fdst
5117 -- store the int into mem, then load it back to move
5118 -- it into an actual int reg.
5119 , ST fsize2 fdst (spRel (-2))
5120 , LD isize2 (spRel (-2)) dst]
5122 return (Any isize2 code2)
5125 coerceDbl2Flt x = do
5126 (src, code) <- getSomeReg x
5127 return (Any FF32 (\dst -> code `snocOL` FxTOy FF64 FF32 src dst))
5130 coerceFlt2Dbl x = do
5131 (src, code) <- getSomeReg x
5132 return (Any FF64 (\dst -> code `snocOL` FxTOy FF32 FF64 src dst))
5134 #endif /* sparc_TARGET_ARCH */
5136 #if powerpc_TARGET_ARCH
5137 coerceInt2FP fromRep toRep x = do
5138 (src, code) <- getSomeReg x
5139 lbl <- getNewLabelNat
5140 itmp <- getNewRegNat II32
5141 ftmp <- getNewRegNat FF64
5142 dflags <- getDynFlagsNat
5143 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
5144 Amode addr addr_code <- getAmode dynRef
5146 code' dst = code `appOL` maybe_exts `appOL` toOL [
5149 CmmStaticLit (CmmInt 0x43300000 W32),
5150 CmmStaticLit (CmmInt 0x80000000 W32)],
5151 XORIS itmp src (ImmInt 0x8000),
5152 ST II32 itmp (spRel 3),
5153 LIS itmp (ImmInt 0x4330),
5154 ST II32 itmp (spRel 2),
5155 LD FF64 ftmp (spRel 2)
5156 ] `appOL` addr_code `appOL` toOL [
5158 FSUB FF64 dst ftmp dst
5159 ] `appOL` maybe_frsp dst
5161 maybe_exts = case fromRep of
5162 W8 -> unitOL $ EXTS II8 src src
5163 W16 -> unitOL $ EXTS II16 src src
5165 maybe_frsp dst = case toRep of
5166 W32 -> unitOL $ FRSP dst dst
5168 return (Any (floatSize toRep) code')
5170 coerceFP2Int fromRep toRep x = do
5171 -- the reps don't really matter: F*->FF64 and II32->I* are no-ops
5172 (src, code) <- getSomeReg x
5173 tmp <- getNewRegNat FF64
5175 code' dst = code `appOL` toOL [
5176 -- convert to int in FP reg
5178 -- store value (64bit) from FP to stack
5179 ST FF64 tmp (spRel 2),
5180 -- read low word of value (high word is undefined)
5181 LD II32 dst (spRel 3)]
5182 return (Any (intSize toRep) code')
5183 #endif /* powerpc_TARGET_ARCH */
5186 -- -----------------------------------------------------------------------------
5187 -- eXTRA_STK_ARGS_HERE
5189 -- We (allegedly) put the first six C-call arguments in registers;
5190 -- where do we start putting the rest of them?
5192 -- Moved from Instrs (SDM):
5194 #if alpha_TARGET_ARCH || sparc_TARGET_ARCH
5195 eXTRA_STK_ARGS_HERE :: Int
5197 = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???))