2 -- The above warning supression flag is a temporary kludge.
3 -- While working on this module you are encouraged to remove it and fix
4 -- any warnings in the module. See
5 -- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
8 -----------------------------------------------------------------------------
10 -- Generating machine code (instruction selection)
12 -- (c) The University of Glasgow 1996-2004
14 -----------------------------------------------------------------------------
16 -- This is a big module, but, if you pay attention to
17 -- (a) the sectioning, (b) the type signatures, and
18 -- (c) the #if blah_TARGET_ARCH} things, the
19 -- structure should not be too overwhelming.
21 module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where
23 #include "HsVersions.h"
24 #include "nativeGen/NCG.h"
31 import PositionIndependentCode
32 import RegAllocInfo ( mkBranchInstr, mkRegRegMoveInstr )
35 -- Our intermediate code:
37 import PprCmm ( pprExpr )
40 import ClosureInfo ( C_SRT(..) )
44 import StaticFlags ( opt_PIC )
45 import ForeignCall ( CCallConv(..) )
48 import qualified Outputable as O
51 import FastBool ( isFastTrue )
52 import Constants ( wORD_SIZE )
54 import Debug.Trace ( trace )
56 import Control.Monad ( mapAndUnzipM )
57 import Data.Maybe ( fromJust )
63 -- -----------------------------------------------------------------------------
64 -- Top-level of the instruction selector
66 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
67 -- They are really trees of insns to facilitate fast appending, where a
68 -- left-to-right traversal (pre-order?) yields the insns in the correct
71 type InstrBlock = OrdList Instr
73 cmmTopCodeGen :: RawCmmTop -> NatM [NatCmmTop]
74 cmmTopCodeGen (CmmProc info lab params (ListGraph blocks)) = do
75 (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
76 picBaseMb <- getPicBaseMaybeNat
77 let proc = CmmProc info lab params (ListGraph $ concat nat_blocks)
78 tops = proc : concat statics
80 Just picBase -> initializePicBase picBase tops
81 Nothing -> return tops
83 cmmTopCodeGen (CmmData sec dat) = do
84 return [CmmData sec dat] -- no translation, we just use CmmStatic
86 basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop])
87 basicBlockCodeGen (BasicBlock id stmts) = do
88 instrs <- stmtsToInstrs stmts
89 -- code generation may introduce new basic block boundaries, which
90 -- are indicated by the NEWBLOCK instruction. We must split up the
91 -- instruction stream into basic blocks again. Also, we extract
94 (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
96 mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
97 = ([], BasicBlock id instrs : blocks, statics)
98 mkBlocks (LDATA sec dat) (instrs,blocks,statics)
99 = (instrs, blocks, CmmData sec dat:statics)
100 mkBlocks instr (instrs,blocks,statics)
101 = (instr:instrs, blocks, statics)
103 return (BasicBlock id top : other_blocks, statics)
105 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
107 = do instrss <- mapM stmtToInstrs stmts
108 return (concatOL instrss)
110 stmtToInstrs :: CmmStmt -> NatM InstrBlock
111 stmtToInstrs stmt = case stmt of
112 CmmNop -> return nilOL
113 CmmComment s -> return (unitOL (COMMENT s))
116 | isFloatType ty -> assignReg_FltCode size reg src
117 #if WORD_SIZE_IN_BITS==32
118 | isWord64 ty -> assignReg_I64Code reg src
120 | otherwise -> assignReg_IntCode size reg src
121 where ty = cmmRegType reg
122 size = cmmTypeSize ty
125 | isFloatType ty -> assignMem_FltCode size addr src
126 #if WORD_SIZE_IN_BITS==32
127 | isWord64 ty -> assignMem_I64Code addr src
129 | otherwise -> assignMem_IntCode size addr src
130 where ty = cmmExprType src
131 size = cmmTypeSize ty
133 CmmCall target result_regs args _ _
134 -> genCCall target result_regs args
136 CmmBranch id -> genBranch id
137 CmmCondBranch arg id -> genCondJump id arg
138 CmmSwitch arg ids -> genSwitch arg ids
139 CmmJump arg params -> genJump arg
141 panic "stmtToInstrs: return statement should have been cps'd away"
143 -- -----------------------------------------------------------------------------
144 -- General things for putting together code sequences
146 -- Expand CmmRegOff. ToDo: should we do it this way around, or convert
147 -- CmmExprs into CmmRegOff?
148 mangleIndexTree :: CmmExpr -> CmmExpr
149 mangleIndexTree (CmmRegOff reg off)
150 = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
151 where width = typeWidth (cmmRegType reg)
153 -- -----------------------------------------------------------------------------
154 -- Code gen for 64-bit arithmetic on 32-bit platforms
157 Simple support for generating 64-bit code (ie, 64 bit values and 64
158 bit assignments) on 32-bit platforms. Unlike the main code generator
159 we merely shoot for generating working code as simply as possible, and
160 pay little attention to code quality. Specifically, there is no
161 attempt to deal cleverly with the fixed-vs-floating register
162 distinction; all values are generated into (pairs of) floating
163 registers, even if this would mean some redundant reg-reg moves as a
164 result. Only one of the VRegUniques is returned, since it will be
165 of the VRegUniqueLo form, and the upper-half VReg can be determined
166 by applying getHiVRegFromLo to it.
169 data ChildCode64 -- a.k.a "Register64"
172 Reg -- the lower 32-bit temporary which contains the
173 -- result; use getHiVRegFromLo to find the other
174 -- VRegUnique. Rules of this simplified insn
175 -- selection game are therefore that the returned
176 -- Reg may be modified
178 #if WORD_SIZE_IN_BITS==32
179 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
180 assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
183 #ifndef x86_64_TARGET_ARCH
184 iselExpr64 :: CmmExpr -> NatM ChildCode64
187 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
191 assignMem_I64Code addrTree valueTree = do
192 Amode addr addr_code <- getAmode addrTree
193 ChildCode64 vcode rlo <- iselExpr64 valueTree
195 rhi = getHiVRegFromLo rlo
197 -- Little-endian store
198 mov_lo = MOV II32 (OpReg rlo) (OpAddr addr)
199 mov_hi = MOV II32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
201 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
204 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
205 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
207 r_dst_lo = mkVReg u_dst II32
208 r_dst_hi = getHiVRegFromLo r_dst_lo
209 r_src_hi = getHiVRegFromLo r_src_lo
210 mov_lo = MOV II32 (OpReg r_src_lo) (OpReg r_dst_lo)
211 mov_hi = MOV II32 (OpReg r_src_hi) (OpReg r_dst_hi)
214 vcode `snocOL` mov_lo `snocOL` mov_hi
217 assignReg_I64Code lvalue valueTree
218 = panic "assignReg_I64Code(i386): invalid lvalue"
222 iselExpr64 (CmmLit (CmmInt i _)) = do
223 (rlo,rhi) <- getNewRegPairNat II32
225 r = fromIntegral (fromIntegral i :: Word32)
226 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
228 MOV II32 (OpImm (ImmInteger r)) (OpReg rlo),
229 MOV II32 (OpImm (ImmInteger q)) (OpReg rhi)
232 return (ChildCode64 code rlo)
234 iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
235 Amode addr addr_code <- getAmode addrTree
236 (rlo,rhi) <- getNewRegPairNat II32
238 mov_lo = MOV II32 (OpAddr addr) (OpReg rlo)
239 mov_hi = MOV II32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
242 ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
246 iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
247 = return (ChildCode64 nilOL (mkVReg vu II32))
249 -- we handle addition, but rather badly
250 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
251 ChildCode64 code1 r1lo <- iselExpr64 e1
252 (rlo,rhi) <- getNewRegPairNat II32
254 r = fromIntegral (fromIntegral i :: Word32)
255 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
256 r1hi = getHiVRegFromLo r1lo
258 toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
259 ADD II32 (OpImm (ImmInteger r)) (OpReg rlo),
260 MOV II32 (OpReg r1hi) (OpReg rhi),
261 ADC II32 (OpImm (ImmInteger q)) (OpReg rhi) ]
263 return (ChildCode64 code rlo)
265 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
266 ChildCode64 code1 r1lo <- iselExpr64 e1
267 ChildCode64 code2 r2lo <- iselExpr64 e2
268 (rlo,rhi) <- getNewRegPairNat II32
270 r1hi = getHiVRegFromLo r1lo
271 r2hi = getHiVRegFromLo r2lo
274 toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
275 ADD II32 (OpReg r2lo) (OpReg rlo),
276 MOV II32 (OpReg r1hi) (OpReg rhi),
277 ADC II32 (OpReg r2hi) (OpReg rhi) ]
279 return (ChildCode64 code rlo)
281 iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do
283 r_dst_lo <- getNewRegNat II32
284 let r_dst_hi = getHiVRegFromLo r_dst_lo
287 ChildCode64 (code `snocOL`
288 MOV II32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
293 = pprPanic "iselExpr64(i386)" (ppr expr)
295 #endif /* i386_TARGET_ARCH */
297 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
299 #if sparc_TARGET_ARCH
301 assignMem_I64Code addrTree valueTree = do
302 Amode addr addr_code <- getAmode addrTree
303 ChildCode64 vcode rlo <- iselExpr64 valueTree
304 (src, code) <- getSomeReg addrTree
306 rhi = getHiVRegFromLo rlo
308 mov_hi = ST II32 rhi (AddrRegImm src (ImmInt 0))
309 mov_lo = ST II32 rlo (AddrRegImm src (ImmInt 4))
310 return (vcode `appOL` code `snocOL` mov_hi `snocOL` mov_lo)
312 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
313 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
315 r_dst_lo = mkVReg u_dst (cmmTypeSize pk)
316 r_dst_hi = getHiVRegFromLo r_dst_lo
317 r_src_hi = getHiVRegFromLo r_src_lo
318 mov_lo = mkMOV r_src_lo r_dst_lo
319 mov_hi = mkMOV r_src_hi r_dst_hi
320 mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
321 return (vcode `snocOL` mov_hi `snocOL` mov_lo)
322 assignReg_I64Code lvalue valueTree
323 = panic "assignReg_I64Code(sparc): invalid lvalue"
326 -- Load a 64 bit word
327 iselExpr64 (CmmLoad addrTree ty)
329 = do Amode amode addr_code <- getAmode addrTree
332 | AddrRegReg r1 r2 <- amode
333 = do rlo <- getNewRegNat II32
334 tmp <- getNewRegNat II32
335 let rhi = getHiVRegFromLo rlo
340 [ ADD False False r1 (RIReg r2) tmp
341 , LD II32 (AddrRegImm tmp (ImmInt 0)) rhi
342 , LD II32 (AddrRegImm tmp (ImmInt 4)) rlo ])
345 | AddrRegImm r1 (ImmInt i) <- amode
346 = do rlo <- getNewRegNat II32
347 let rhi = getHiVRegFromLo rlo
352 [ LD II32 (AddrRegImm r1 (ImmInt $ 0 + i)) rhi
353 , LD II32 (AddrRegImm r1 (ImmInt $ 4 + i)) rlo ])
359 -- Add a literal to a 64 bit integer
360 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)])
361 = do ChildCode64 code1 r1_lo <- iselExpr64 e1
362 let r1_hi = getHiVRegFromLo r1_lo
364 r_dst_lo <- getNewRegNat II32
365 let r_dst_hi = getHiVRegFromLo r_dst_lo
369 [ ADD False False r1_lo (RIImm (ImmInteger i)) r_dst_lo
370 , ADD True False r1_hi (RIReg g0) r_dst_hi ])
375 iselExpr64 (CmmMachOp (MO_Add width) [e1, e2])
376 = do ChildCode64 code1 r1_lo <- iselExpr64 e1
377 let r1_hi = getHiVRegFromLo r1_lo
379 ChildCode64 code2 r2_lo <- iselExpr64 e2
380 let r2_hi = getHiVRegFromLo r2_lo
382 r_dst_lo <- getNewRegNat II32
383 let r_dst_hi = getHiVRegFromLo r_dst_lo
388 [ ADD False False r1_lo (RIReg r2_lo) r_dst_lo
389 , ADD True False r1_hi (RIReg r2_hi) r_dst_hi ]
391 return $ ChildCode64 code r_dst_lo
394 iselExpr64 (CmmReg (CmmLocal (LocalReg uq ty))) | isWord64 ty = do
395 r_dst_lo <- getNewRegNat II32
396 let r_dst_hi = getHiVRegFromLo r_dst_lo
397 r_src_lo = mkVReg uq II32
398 r_src_hi = getHiVRegFromLo r_src_lo
399 mov_lo = mkMOV r_src_lo r_dst_lo
400 mov_hi = mkMOV r_src_hi r_dst_hi
401 mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
403 ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo
406 -- Convert something into II64
407 iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr])
409 r_dst_lo <- getNewRegNat II32
410 let r_dst_hi = getHiVRegFromLo r_dst_lo
412 -- compute expr and load it into r_dst_lo
413 (a_reg, a_code) <- getSomeReg expr
417 [ mkRegRegMoveInstr g0 r_dst_hi -- clear high 32 bits
418 , mkRegRegMoveInstr a_reg r_dst_lo ]
420 return $ ChildCode64 code r_dst_lo
424 = pprPanic "iselExpr64(sparc)" (ppr expr)
426 #endif /* sparc_TARGET_ARCH */
428 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
430 #if powerpc_TARGET_ARCH
432 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
433 getI64Amodes addrTree = do
434 Amode hi_addr addr_code <- getAmode addrTree
435 case addrOffset hi_addr 4 of
436 Just lo_addr -> return (hi_addr, lo_addr, addr_code)
437 Nothing -> do (hi_ptr, code) <- getSomeReg addrTree
438 return (AddrRegImm hi_ptr (ImmInt 0),
439 AddrRegImm hi_ptr (ImmInt 4),
442 assignMem_I64Code addrTree valueTree = do
443 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
444 ChildCode64 vcode rlo <- iselExpr64 valueTree
446 rhi = getHiVRegFromLo rlo
449 mov_hi = ST II32 rhi hi_addr
450 mov_lo = ST II32 rlo lo_addr
452 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
454 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
455 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
457 r_dst_lo = mkVReg u_dst II32
458 r_dst_hi = getHiVRegFromLo r_dst_lo
459 r_src_hi = getHiVRegFromLo r_src_lo
460 mov_lo = MR r_dst_lo r_src_lo
461 mov_hi = MR r_dst_hi r_src_hi
464 vcode `snocOL` mov_lo `snocOL` mov_hi
467 assignReg_I64Code lvalue valueTree
468 = panic "assignReg_I64Code(powerpc): invalid lvalue"
471 -- Don't delete this -- it's very handy for debugging.
473 -- | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
474 -- = panic "iselExpr64(???)"
476 iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
477 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
478 (rlo, rhi) <- getNewRegPairNat II32
479 let mov_hi = LD II32 rhi hi_addr
480 mov_lo = LD II32 rlo lo_addr
481 return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
484 iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
485 = return (ChildCode64 nilOL (mkVReg vu II32))
487 iselExpr64 (CmmLit (CmmInt i _)) = do
488 (rlo,rhi) <- getNewRegPairNat II32
490 half0 = fromIntegral (fromIntegral i :: Word16)
491 half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
492 half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
493 half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
496 LIS rlo (ImmInt half1),
497 OR rlo rlo (RIImm $ ImmInt half0),
498 LIS rhi (ImmInt half3),
499 OR rlo rlo (RIImm $ ImmInt half2)
502 return (ChildCode64 code rlo)
504 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
505 ChildCode64 code1 r1lo <- iselExpr64 e1
506 ChildCode64 code2 r2lo <- iselExpr64 e2
507 (rlo,rhi) <- getNewRegPairNat II32
509 r1hi = getHiVRegFromLo r1lo
510 r2hi = getHiVRegFromLo r2lo
513 toOL [ ADDC rlo r1lo r2lo,
516 return (ChildCode64 code rlo)
518 iselExpr64 (CmmMachOp (MO_UU_Conv W32 W64) [expr]) = do
519 (expr_reg,expr_code) <- getSomeReg expr
520 (rlo, rhi) <- getNewRegPairNat II32
521 let mov_hi = LI rhi (ImmInt 0)
522 mov_lo = MR rlo expr_reg
523 return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
526 = pprPanic "iselExpr64(powerpc)" (ppr expr)
528 #endif /* powerpc_TARGET_ARCH */
531 -- -----------------------------------------------------------------------------
532 -- The 'Register' type
534 -- 'Register's passed up the tree. If the stix code forces the register
535 -- to live in a pre-decided machine register, it comes out as @Fixed@;
536 -- otherwise, it comes out as @Any@, and the parent can decide which
537 -- register to put it in.
540 = Fixed Size Reg InstrBlock
541 | Any Size (Reg -> InstrBlock)
543 swizzleRegisterRep :: Register -> Size -> Register
544 -- Change the width; it's a no-op
545 swizzleRegisterRep (Fixed _ reg code) size = Fixed size reg code
546 swizzleRegisterRep (Any _ codefn) size = Any size codefn
549 -- -----------------------------------------------------------------------------
550 -- Utils based on getRegister, below
552 -- The dual to getAnyReg: compute an expression into a register, but
553 -- we don't mind which one it is.
554 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
556 r <- getRegister expr
559 tmp <- getNewRegNat rep
560 return (tmp, code tmp)
564 -- -----------------------------------------------------------------------------
565 -- Grab the Reg for a CmmReg
567 getRegisterReg :: CmmReg -> Reg
569 getRegisterReg (CmmLocal (LocalReg u pk))
570 = mkVReg u (cmmTypeSize pk)
572 getRegisterReg (CmmGlobal mid)
573 = case get_GlobalReg_reg_or_addr mid of
574 Left (RealReg rrno) -> RealReg rrno
575 _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
576 -- By this stage, the only MagicIds remaining should be the
577 -- ones which map to a real machine register on this
578 -- platform. Hence ...
581 -- -----------------------------------------------------------------------------
582 -- Generate code to get a subtree into a Register
584 -- Don't delete this -- it's very handy for debugging.
586 -- | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False
587 -- = panic "getRegister(???)"
589 getRegister :: CmmExpr -> NatM Register
591 #if !x86_64_TARGET_ARCH
592 -- on x86_64, we have %rip for PicBaseReg, but it's not a full-featured
593 -- register, it can only be used for rip-relative addressing.
594 getRegister (CmmReg (CmmGlobal PicBaseReg))
596 reg <- getPicBaseNat wordSize
597 return (Fixed wordSize reg nilOL)
600 getRegister (CmmReg reg)
601 = return (Fixed (cmmTypeSize (cmmRegType reg))
602 (getRegisterReg reg) nilOL)
604 getRegister tree@(CmmRegOff _ _)
605 = getRegister (mangleIndexTree tree)
608 #if WORD_SIZE_IN_BITS==32
609 -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
610 -- TO_W_(x), TO_W_(x >> 32)
612 getRegister (CmmMachOp (MO_UU_Conv W64 W32)
613 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
614 ChildCode64 code rlo <- iselExpr64 x
615 return $ Fixed II32 (getHiVRegFromLo rlo) code
617 getRegister (CmmMachOp (MO_SS_Conv W64 W32)
618 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
619 ChildCode64 code rlo <- iselExpr64 x
620 return $ Fixed II32 (getHiVRegFromLo rlo) code
622 getRegister (CmmMachOp (MO_UU_Conv W64 W32) [x]) = do
623 ChildCode64 code rlo <- iselExpr64 x
624 return $ Fixed II32 rlo code
626 getRegister (CmmMachOp (MO_SS_Conv W64 W32) [x]) = do
627 ChildCode64 code rlo <- iselExpr64 x
628 return $ Fixed II32 rlo code
632 -- end of machine-"independent" bit; here we go on the rest...
634 #if alpha_TARGET_ARCH
636 getRegister (StDouble d)
637 = getBlockIdNat `thenNat` \ lbl ->
638 getNewRegNat PtrRep `thenNat` \ tmp ->
639 let code dst = mkSeqInstrs [
640 LDATA RoDataSegment lbl [
641 DATA TF [ImmLab (rational d)]
643 LDA tmp (AddrImm (ImmCLbl lbl)),
644 LD TF dst (AddrReg tmp)]
646 return (Any FF64 code)
648 getRegister (StPrim primop [x]) -- unary PrimOps
650 IntNegOp -> trivialUCode (NEG Q False) x
652 NotOp -> trivialUCode NOT x
654 FloatNegOp -> trivialUFCode FloatRep (FNEG TF) x
655 DoubleNegOp -> trivialUFCode FF64 (FNEG TF) x
657 OrdOp -> coerceIntCode IntRep x
660 Float2IntOp -> coerceFP2Int x
661 Int2FloatOp -> coerceInt2FP pr x
662 Double2IntOp -> coerceFP2Int x
663 Int2DoubleOp -> coerceInt2FP pr x
665 Double2FloatOp -> coerceFltCode x
666 Float2DoubleOp -> coerceFltCode x
668 other_op -> getRegister (StCall fn CCallConv FF64 [x])
670 fn = case other_op of
671 FloatExpOp -> fsLit "exp"
672 FloatLogOp -> fsLit "log"
673 FloatSqrtOp -> fsLit "sqrt"
674 FloatSinOp -> fsLit "sin"
675 FloatCosOp -> fsLit "cos"
676 FloatTanOp -> fsLit "tan"
677 FloatAsinOp -> fsLit "asin"
678 FloatAcosOp -> fsLit "acos"
679 FloatAtanOp -> fsLit "atan"
680 FloatSinhOp -> fsLit "sinh"
681 FloatCoshOp -> fsLit "cosh"
682 FloatTanhOp -> fsLit "tanh"
683 DoubleExpOp -> fsLit "exp"
684 DoubleLogOp -> fsLit "log"
685 DoubleSqrtOp -> fsLit "sqrt"
686 DoubleSinOp -> fsLit "sin"
687 DoubleCosOp -> fsLit "cos"
688 DoubleTanOp -> fsLit "tan"
689 DoubleAsinOp -> fsLit "asin"
690 DoubleAcosOp -> fsLit "acos"
691 DoubleAtanOp -> fsLit "atan"
692 DoubleSinhOp -> fsLit "sinh"
693 DoubleCoshOp -> fsLit "cosh"
694 DoubleTanhOp -> fsLit "tanh"
696 pr = panic "MachCode.getRegister: no primrep needed for Alpha"
698 getRegister (StPrim primop [x, y]) -- dyadic PrimOps
700 CharGtOp -> trivialCode (CMP LTT) y x
701 CharGeOp -> trivialCode (CMP LE) y x
702 CharEqOp -> trivialCode (CMP EQQ) x y
703 CharNeOp -> int_NE_code x y
704 CharLtOp -> trivialCode (CMP LTT) x y
705 CharLeOp -> trivialCode (CMP LE) x y
707 IntGtOp -> trivialCode (CMP LTT) y x
708 IntGeOp -> trivialCode (CMP LE) y x
709 IntEqOp -> trivialCode (CMP EQQ) x y
710 IntNeOp -> int_NE_code x y
711 IntLtOp -> trivialCode (CMP LTT) x y
712 IntLeOp -> trivialCode (CMP LE) x y
714 WordGtOp -> trivialCode (CMP ULT) y x
715 WordGeOp -> trivialCode (CMP ULE) x y
716 WordEqOp -> trivialCode (CMP EQQ) x y
717 WordNeOp -> int_NE_code x y
718 WordLtOp -> trivialCode (CMP ULT) x y
719 WordLeOp -> trivialCode (CMP ULE) x y
721 AddrGtOp -> trivialCode (CMP ULT) y x
722 AddrGeOp -> trivialCode (CMP ULE) y x
723 AddrEqOp -> trivialCode (CMP EQQ) x y
724 AddrNeOp -> int_NE_code x y
725 AddrLtOp -> trivialCode (CMP ULT) x y
726 AddrLeOp -> trivialCode (CMP ULE) x y
728 FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
729 FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
730 FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
731 FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
732 FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
733 FloatLeOp -> cmpF_code (FCMP TF LE) NE x y
735 DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
736 DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
737 DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
738 DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
739 DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
740 DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y
742 IntAddOp -> trivialCode (ADD Q False) x y
743 IntSubOp -> trivialCode (SUB Q False) x y
744 IntMulOp -> trivialCode (MUL Q False) x y
745 IntQuotOp -> trivialCode (DIV Q False) x y
746 IntRemOp -> trivialCode (REM Q False) x y
748 WordAddOp -> trivialCode (ADD Q False) x y
749 WordSubOp -> trivialCode (SUB Q False) x y
750 WordMulOp -> trivialCode (MUL Q False) x y
751 WordQuotOp -> trivialCode (DIV Q True) x y
752 WordRemOp -> trivialCode (REM Q True) x y
754 FloatAddOp -> trivialFCode W32 (FADD TF) x y
755 FloatSubOp -> trivialFCode W32 (FSUB TF) x y
756 FloatMulOp -> trivialFCode W32 (FMUL TF) x y
757 FloatDivOp -> trivialFCode W32 (FDIV TF) x y
759 DoubleAddOp -> trivialFCode W64 (FADD TF) x y
760 DoubleSubOp -> trivialFCode W64 (FSUB TF) x y
761 DoubleMulOp -> trivialFCode W64 (FMUL TF) x y
762 DoubleDivOp -> trivialFCode W64 (FDIV TF) x y
764 AddrAddOp -> trivialCode (ADD Q False) x y
765 AddrSubOp -> trivialCode (SUB Q False) x y
766 AddrRemOp -> trivialCode (REM Q True) x y
768 AndOp -> trivialCode AND x y
769 OrOp -> trivialCode OR x y
770 XorOp -> trivialCode XOR x y
771 SllOp -> trivialCode SLL x y
772 SrlOp -> trivialCode SRL x y
774 ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
775 ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
776 ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"
778 FloatPowerOp -> getRegister (StCall (fsLit "pow") CCallConv FF64 [x,y])
779 DoublePowerOp -> getRegister (StCall (fsLit "pow") CCallConv FF64 [x,y])
781 {- ------------------------------------------------------------
782 Some bizarre special code for getting condition codes into
783 registers. Integer non-equality is a test for equality
784 followed by an XOR with 1. (Integer comparisons always set
785 the result register to 0 or 1.) Floating point comparisons of
786 any kind leave the result in a floating point register, so we
787 need to wrangle an integer register out of things.
789 int_NE_code :: StixTree -> StixTree -> NatM Register
792 = trivialCode (CMP EQQ) x y `thenNat` \ register ->
793 getNewRegNat IntRep `thenNat` \ tmp ->
795 code = registerCode register tmp
796 src = registerName register tmp
797 code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
799 return (Any IntRep code__2)
801 {- ------------------------------------------------------------
802 Comments for int_NE_code also apply to cmpF_code
805 :: (Reg -> Reg -> Reg -> Instr)
807 -> StixTree -> StixTree
810 cmpF_code instr cond x y
811 = trivialFCode pr instr x y `thenNat` \ register ->
812 getNewRegNat FF64 `thenNat` \ tmp ->
813 getBlockIdNat `thenNat` \ lbl ->
815 code = registerCode register tmp
816 result = registerName register tmp
818 code__2 dst = code . mkSeqInstrs [
819 OR zeroh (RIImm (ImmInt 1)) dst,
820 BF cond result (ImmCLbl lbl),
821 OR zeroh (RIReg zeroh) dst,
824 return (Any IntRep code__2)
826 pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
827 ------------------------------------------------------------
829 getRegister (CmmLoad pk mem)
830 = getAmode mem `thenNat` \ amode ->
832 code = amodeCode amode
833 src = amodeAddr amode
834 size = primRepToSize pk
835 code__2 dst = code . mkSeqInstr (LD size dst src)
837 return (Any pk code__2)
839 getRegister (StInt i)
842 code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
844 return (Any IntRep code)
847 code dst = mkSeqInstr (LDI Q dst src)
849 return (Any IntRep code)
851 src = ImmInt (fromInteger i)
856 code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
858 return (Any PtrRep code)
861 imm__2 = case imm of Just x -> x
863 #endif /* alpha_TARGET_ARCH */
865 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
869 getRegister (CmmLit (CmmFloat f W32)) = do
870 lbl <- getNewLabelNat
871 dflags <- getDynFlagsNat
872 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
873 Amode addr addr_code <- getAmode dynRef
877 CmmStaticLit (CmmFloat f W32)]
878 `consOL` (addr_code `snocOL`
881 return (Any FF32 code)
884 getRegister (CmmLit (CmmFloat d W64))
886 = let code dst = unitOL (GLDZ dst)
887 in return (Any FF64 code)
890 = let code dst = unitOL (GLD1 dst)
891 in return (Any FF64 code)
894 lbl <- getNewLabelNat
895 dflags <- getDynFlagsNat
896 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
897 Amode addr addr_code <- getAmode dynRef
901 CmmStaticLit (CmmFloat d W64)]
902 `consOL` (addr_code `snocOL`
905 return (Any FF64 code)
907 #endif /* i386_TARGET_ARCH */
909 #if x86_64_TARGET_ARCH
911 getRegister (CmmLit (CmmFloat 0.0 w)) = do
912 let size = floatSize w
913 code dst = unitOL (XOR size (OpReg dst) (OpReg dst))
914 -- I don't know why there are xorpd, xorps, and pxor instructions.
915 -- They all appear to do the same thing --SDM
916 return (Any size code)
918 getRegister (CmmLit (CmmFloat f w)) = do
919 lbl <- getNewLabelNat
920 let code dst = toOL [
923 CmmStaticLit (CmmFloat f w)],
924 MOV size (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
927 return (Any size code)
928 where size = floatSize w
930 #endif /* x86_64_TARGET_ARCH */
932 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
934 -- catch simple cases of zero- or sign-extended load
935 getRegister (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _]) = do
936 code <- intLoadCode (MOVZxL II8) addr
937 return (Any II32 code)
939 getRegister (CmmMachOp (MO_SS_Conv W8 W32) [CmmLoad addr _]) = do
940 code <- intLoadCode (MOVSxL II8) addr
941 return (Any II32 code)
943 getRegister (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad addr _]) = do
944 code <- intLoadCode (MOVZxL II16) addr
945 return (Any II32 code)
947 getRegister (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad addr _]) = do
948 code <- intLoadCode (MOVSxL II16) addr
949 return (Any II32 code)
953 #if x86_64_TARGET_ARCH
955 -- catch simple cases of zero- or sign-extended load
956 getRegister (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad addr _]) = do
957 code <- intLoadCode (MOVZxL II8) addr
958 return (Any II64 code)
960 getRegister (CmmMachOp (MO_SS_Conv W8 W64) [CmmLoad addr _]) = do
961 code <- intLoadCode (MOVSxL II8) addr
962 return (Any II64 code)
964 getRegister (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad addr _]) = do
965 code <- intLoadCode (MOVZxL II16) addr
966 return (Any II64 code)
968 getRegister (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad addr _]) = do
969 code <- intLoadCode (MOVSxL II16) addr
970 return (Any II64 code)
972 getRegister (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad addr _]) = do
973 code <- intLoadCode (MOV II32) addr -- 32-bit loads zero-extend
974 return (Any II64 code)
976 getRegister (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad addr _]) = do
977 code <- intLoadCode (MOVSxL II32) addr
978 return (Any II64 code)
982 #if x86_64_TARGET_ARCH
983 getRegister (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
984 CmmLit displacement])
985 = return $ Any II64 (\dst -> unitOL $
986 LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
989 #if x86_64_TARGET_ARCH
990 getRegister (CmmMachOp (MO_F_Neg W32) [x]) = do
991 x_code <- getAnyReg x
992 lbl <- getNewLabelNat
994 code dst = x_code dst `appOL` toOL [
995 -- This is how gcc does it, so it can't be that bad:
996 LDATA ReadOnlyData16 [
999 CmmStaticLit (CmmInt 0x80000000 W32),
1000 CmmStaticLit (CmmInt 0 W32),
1001 CmmStaticLit (CmmInt 0 W32),
1002 CmmStaticLit (CmmInt 0 W32)
1004 XOR FF32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
1005 -- xorps, so we need the 128-bit constant
1006 -- ToDo: rip-relative
1009 return (Any FF32 code)
1011 getRegister (CmmMachOp (MO_F_Neg W64) [x]) = do
1012 x_code <- getAnyReg x
1013 lbl <- getNewLabelNat
1015 -- This is how gcc does it, so it can't be that bad:
1016 code dst = x_code dst `appOL` toOL [
1017 LDATA ReadOnlyData16 [
1020 CmmStaticLit (CmmInt 0x8000000000000000 W64),
1021 CmmStaticLit (CmmInt 0 W64)
1023 -- gcc puts an unpck here. Wonder if we need it.
1024 XOR FF64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
1025 -- xorpd, so we need the 128-bit constant
1028 return (Any FF64 code)
1031 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1033 getRegister (CmmMachOp mop [x]) -- unary MachOps
1035 #if i386_TARGET_ARCH
1036 MO_F_Neg W32 -> trivialUFCode FF32 (GNEG FF32) x
1037 MO_F_Neg W64 -> trivialUFCode FF64 (GNEG FF64) x
1040 MO_S_Neg w -> triv_ucode NEGI (intSize w)
1041 MO_F_Neg w -> triv_ucode NEGI (floatSize w)
1042 MO_Not w -> triv_ucode NOT (intSize w)
1045 MO_UU_Conv W32 W8 -> toI8Reg W32 x
1046 MO_SS_Conv W32 W8 -> toI8Reg W32 x
1047 MO_UU_Conv W16 W8 -> toI8Reg W16 x
1048 MO_SS_Conv W16 W8 -> toI8Reg W16 x
1049 MO_UU_Conv W32 W16 -> toI16Reg W32 x
1050 MO_SS_Conv W32 W16 -> toI16Reg W32 x
1052 #if x86_64_TARGET_ARCH
1053 MO_UU_Conv W64 W32 -> conversionNop II64 x
1054 MO_SS_Conv W64 W32 -> conversionNop II64 x
1055 MO_UU_Conv W64 W16 -> toI16Reg W64 x
1056 MO_SS_Conv W64 W16 -> toI16Reg W64 x
1057 MO_UU_Conv W64 W8 -> toI8Reg W64 x
1058 MO_SS_Conv W64 W8 -> toI8Reg W64 x
1061 MO_UU_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
1062 MO_SS_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
1065 MO_UU_Conv W8 W32 -> integerExtend W8 W32 MOVZxL x
1066 MO_UU_Conv W16 W32 -> integerExtend W16 W32 MOVZxL x
1067 MO_UU_Conv W8 W16 -> integerExtend W8 W16 MOVZxL x
1069 MO_SS_Conv W8 W32 -> integerExtend W8 W32 MOVSxL x
1070 MO_SS_Conv W16 W32 -> integerExtend W16 W32 MOVSxL x
1071 MO_SS_Conv W8 W16 -> integerExtend W8 W16 MOVSxL x
1073 #if x86_64_TARGET_ARCH
1074 MO_UU_Conv W8 W64 -> integerExtend W8 W64 MOVZxL x
1075 MO_UU_Conv W16 W64 -> integerExtend W16 W64 MOVZxL x
1076 MO_UU_Conv W32 W64 -> integerExtend W32 W64 MOVZxL x
1077 MO_SS_Conv W8 W64 -> integerExtend W8 W64 MOVSxL x
1078 MO_SS_Conv W16 W64 -> integerExtend W16 W64 MOVSxL x
1079 MO_SS_Conv W32 W64 -> integerExtend W32 W64 MOVSxL x
1080 -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
1081 -- However, we don't want the register allocator to throw it
1082 -- away as an unnecessary reg-to-reg move, so we keep it in
1083 -- the form of a movzl and print it as a movl later.
1086 #if i386_TARGET_ARCH
1087 MO_FF_Conv W32 W64 -> conversionNop FF64 x
1088 MO_FF_Conv W64 W32 -> conversionNop FF32 x
1090 MO_FF_Conv W32 W64 -> coerceFP2FP W64 x
1091 MO_FF_Conv W64 W32 -> coerceFP2FP W32 x
1094 MO_FS_Conv from to -> coerceFP2Int from to x
1095 MO_SF_Conv from to -> coerceInt2FP from to x
1097 other -> pprPanic "getRegister" (pprMachOp mop)
1099 triv_ucode :: (Size -> Operand -> Instr) -> Size -> NatM Register
1100 triv_ucode instr size = trivialUCode size (instr size) x
1102 -- signed or unsigned extension.
1103 integerExtend :: Width -> Width
1104 -> (Size -> Operand -> Operand -> Instr)
1105 -> CmmExpr -> NatM Register
1106 integerExtend from to instr expr = do
1107 (reg,e_code) <- if from == W8 then getByteReg expr
1108 else getSomeReg expr
1112 instr (intSize from) (OpReg reg) (OpReg dst)
1113 return (Any (intSize to) code)
1115 toI8Reg :: Width -> CmmExpr -> NatM Register
1116 toI8Reg new_rep expr
1117 = do codefn <- getAnyReg expr
1118 return (Any (intSize new_rep) codefn)
1119 -- HACK: use getAnyReg to get a byte-addressable register.
1120 -- If the source was a Fixed register, this will add the
1121 -- mov instruction to put it into the desired destination.
1122 -- We're assuming that the destination won't be a fixed
1123 -- non-byte-addressable register; it won't be, because all
1124 -- fixed registers are word-sized.
1126 toI16Reg = toI8Reg -- for now
1128 conversionNop :: Size -> CmmExpr -> NatM Register
1129 conversionNop new_size expr
1130 = do e_code <- getRegister expr
1131 return (swizzleRegisterRep e_code new_size)
1134 getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
1136 MO_F_Eq w -> condFltReg EQQ x y
1137 MO_F_Ne w -> condFltReg NE x y
1138 MO_F_Gt w -> condFltReg GTT x y
1139 MO_F_Ge w -> condFltReg GE x y
1140 MO_F_Lt w -> condFltReg LTT x y
1141 MO_F_Le w -> condFltReg LE x y
1143 MO_Eq rep -> condIntReg EQQ x y
1144 MO_Ne rep -> condIntReg NE x y
1146 MO_S_Gt rep -> condIntReg GTT x y
1147 MO_S_Ge rep -> condIntReg GE x y
1148 MO_S_Lt rep -> condIntReg LTT x y
1149 MO_S_Le rep -> condIntReg LE x y
1151 MO_U_Gt rep -> condIntReg GU x y
1152 MO_U_Ge rep -> condIntReg GEU x y
1153 MO_U_Lt rep -> condIntReg LU x y
1154 MO_U_Le rep -> condIntReg LEU x y
1156 #if i386_TARGET_ARCH
1157 MO_F_Add w -> trivialFCode w GADD x y
1158 MO_F_Sub w -> trivialFCode w GSUB x y
1159 MO_F_Quot w -> trivialFCode w GDIV x y
1160 MO_F_Mul w -> trivialFCode w GMUL x y
1163 #if x86_64_TARGET_ARCH
1164 MO_F_Add w -> trivialFCode w ADD x y
1165 MO_F_Sub w -> trivialFCode w SUB x y
1166 MO_F_Quot w -> trivialFCode w FDIV x y
1167 MO_F_Mul w -> trivialFCode w MUL x y
1170 MO_Add rep -> add_code rep x y
1171 MO_Sub rep -> sub_code rep x y
1173 MO_S_Quot rep -> div_code rep True True x y
1174 MO_S_Rem rep -> div_code rep True False x y
1175 MO_U_Quot rep -> div_code rep False True x y
1176 MO_U_Rem rep -> div_code rep False False x y
1178 MO_S_MulMayOflo rep -> imulMayOflo rep x y
1180 MO_Mul rep -> triv_op rep IMUL
1181 MO_And rep -> triv_op rep AND
1182 MO_Or rep -> triv_op rep OR
1183 MO_Xor rep -> triv_op rep XOR
1185 {- Shift ops on x86s have constraints on their source, it
1186 either has to be Imm, CL or 1
1187 => trivialCode is not restrictive enough (sigh.)
1189 MO_Shl rep -> shift_code rep SHL x y {-False-}
1190 MO_U_Shr rep -> shift_code rep SHR x y {-False-}
1191 MO_S_Shr rep -> shift_code rep SAR x y {-False-}
1193 other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
1195 --------------------
1196 triv_op width instr = trivialCode width op (Just op) x y
1197 where op = instr (intSize width)
1199 imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
1200 imulMayOflo rep a b = do
1201 (a_reg, a_code) <- getNonClobberedReg a
1202 b_code <- getAnyReg b
1204 shift_amt = case rep of
1207 _ -> panic "shift_amt"
1210 code = a_code `appOL` b_code eax `appOL`
1212 IMUL2 size (OpReg a_reg), -- result in %edx:%eax
1213 SAR size (OpImm (ImmInt shift_amt)) (OpReg eax),
1214 -- sign extend lower part
1215 SUB size (OpReg edx) (OpReg eax)
1216 -- compare against upper
1217 -- eax==0 if high part == sign extended low part
1220 return (Fixed size eax code)
1222 --------------------
1224 -> (Size -> Operand -> Operand -> Instr)
1229 {- Case1: shift length as immediate -}
1230 shift_code width instr x y@(CmmLit lit) = do
1231 x_code <- getAnyReg x
1233 size = intSize width
1235 = x_code dst `snocOL`
1236 instr size (OpImm (litToImm lit)) (OpReg dst)
1238 return (Any size code)
1240 {- Case2: shift length is complex (non-immediate)
1241 * y must go in %ecx.
1242 * we cannot do y first *and* put its result in %ecx, because
1243 %ecx might be clobbered by x.
1244 * if we do y second, then x cannot be
1245 in a clobbered reg. Also, we cannot clobber x's reg
1246 with the instruction itself.
1248 - do y first, put its result in a fresh tmp, then copy it to %ecx later
1249 - do y second and put its result into %ecx. x gets placed in a fresh
1250 tmp. This is likely to be better, becuase the reg alloc can
1251 eliminate this reg->reg move here (it won't eliminate the other one,
1252 because the move is into the fixed %ecx).
1254 shift_code width instr x y{-amount-} = do
1255 x_code <- getAnyReg x
1256 let size = intSize width
1257 tmp <- getNewRegNat size
1258 y_code <- getAnyReg y
1260 code = x_code tmp `appOL`
1262 instr size (OpReg ecx) (OpReg tmp)
1264 return (Fixed size tmp code)
1266 --------------------
1267 add_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
1268 add_code rep x (CmmLit (CmmInt y _))
1269 | is32BitInteger y = add_int rep x y
1270 add_code rep x y = trivialCode rep (ADD size) (Just (ADD size)) x y
1271 where size = intSize rep
1273 --------------------
1274 sub_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
1275 sub_code rep x (CmmLit (CmmInt y _))
1276 | is32BitInteger (-y) = add_int rep x (-y)
1277 sub_code rep x y = trivialCode rep (SUB (intSize rep)) Nothing x y
1279 -- our three-operand add instruction:
1280 add_int width x y = do
1281 (x_reg, x_code) <- getSomeReg x
1283 size = intSize width
1284 imm = ImmInt (fromInteger y)
1288 (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
1291 return (Any size code)
1293 ----------------------
1294 div_code width signed quotient x y = do
1295 (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
1296 x_code <- getAnyReg x
1298 size = intSize width
1299 widen | signed = CLTD size
1300 | otherwise = XOR size (OpReg edx) (OpReg edx)
1302 instr | signed = IDIV
1305 code = y_code `appOL`
1307 toOL [widen, instr size y_op]
1309 result | quotient = eax
1313 return (Fixed size result code)
1316 getRegister (CmmLoad mem pk)
1319 Amode src mem_code <- getAmode mem
1321 size = cmmTypeSize pk
1322 code dst = mem_code `snocOL`
1323 IF_ARCH_i386(GLD size src dst,
1324 MOV size (OpAddr src) (OpReg dst))
1325 return (Any size code)
1327 #if i386_TARGET_ARCH
1328 getRegister (CmmLoad mem pk)
1331 code <- intLoadCode instr mem
1332 return (Any size code)
1334 width = typeWidth pk
1335 size = intSize width
1336 instr = case width of
1339 -- We always zero-extend 8-bit loads, if we
1340 -- can't think of anything better. This is because
1341 -- we can't guarantee access to an 8-bit variant of every register
1342 -- (esi and edi don't have 8-bit variants), so to make things
1343 -- simpler we do our 8-bit arithmetic with full 32-bit registers.
1346 #if x86_64_TARGET_ARCH
1347 -- Simpler memory load code on x86_64
1348 getRegister (CmmLoad mem pk)
1350 code <- intLoadCode (MOV size) mem
1351 return (Any size code)
1352 where size = intSize $ typeWidth pk
1355 getRegister (CmmLit (CmmInt 0 width))
1357 size = intSize width
1359 -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
1360 adj_size = case size of II64 -> II32; _ -> size
1361 size1 = IF_ARCH_i386( size, adj_size )
1363 = unitOL (XOR size1 (OpReg dst) (OpReg dst))
1365 return (Any size code)
1367 #if x86_64_TARGET_ARCH
1368 -- optimisation for loading small literals on x86_64: take advantage
1369 -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
1370 -- instruction forms are shorter.
1371 getRegister (CmmLit lit)
1372 | isWord64 (cmmLitType lit), not (isBigLit lit)
1375 code dst = unitOL (MOV II32 (OpImm imm) (OpReg dst))
1377 return (Any II64 code)
1379 isBigLit (CmmInt i _) = i < 0 || i > 0xffffffff
1381 -- note1: not the same as (not.is32BitLit), because that checks for
1382 -- signed literals that fit in 32 bits, but we want unsigned
1384 -- note2: all labels are small, because we're assuming the
1385 -- small memory model (see gcc docs, -mcmodel=small).
1388 getRegister (CmmLit lit)
1390 size = cmmTypeSize (cmmLitType lit)
1392 code dst = unitOL (MOV size (OpImm imm) (OpReg dst))
1394 return (Any size code)
1396 getRegister other = pprPanic "getRegister(x86)" (ppr other)
1399 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
1400 -> NatM (Reg -> InstrBlock)
1401 intLoadCode instr mem = do
1402 Amode src mem_code <- getAmode mem
1403 return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
1405 -- Compute an expression into *any* register, adding the appropriate
1406 -- move instruction if necessary.
1407 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
1409 r <- getRegister expr
1412 anyReg :: Register -> NatM (Reg -> InstrBlock)
1413 anyReg (Any _ code) = return code
1414 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
1416 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
1417 -- Fixed registers might not be byte-addressable, so we make sure we've
1418 -- got a temporary, inserting an extra reg copy if necessary.
1419 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
1420 #if x86_64_TARGET_ARCH
1421 getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
1423 getByteReg expr = do
1424 r <- getRegister expr
1427 tmp <- getNewRegNat rep
1428 return (tmp, code tmp)
1430 | isVirtualReg reg -> return (reg,code)
1432 tmp <- getNewRegNat rep
1433 return (tmp, code `snocOL` reg2reg rep reg tmp)
1434 -- ToDo: could optimise slightly by checking for byte-addressable
1435 -- real registers, but that will happen very rarely if at all.
1438 -- Another variant: this time we want the result in a register that cannot
1439 -- be modified by code to evaluate an arbitrary expression.
1440 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1441 getNonClobberedReg expr = do
1442 r <- getRegister expr
1445 tmp <- getNewRegNat rep
1446 return (tmp, code tmp)
1448 -- only free regs can be clobbered
1449 | RealReg rr <- reg, isFastTrue (freeReg rr) -> do
1450 tmp <- getNewRegNat rep
1451 return (tmp, code `snocOL` reg2reg rep reg tmp)
1455 reg2reg :: Size -> Reg -> Reg -> Instr
1456 reg2reg size src dst
1457 #if i386_TARGET_ARCH
1458 | isFloatSize size = GMOV src dst
1460 | otherwise = MOV size (OpReg src) (OpReg dst)
1462 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1464 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1466 #if sparc_TARGET_ARCH
1468 -- getRegister :: CmmExpr -> NatM Register
1470 -- Load a literal float into a float register.
1471 -- The actual literal is stored in a new data area, and we load it
1473 getRegister (CmmLit (CmmFloat f W32)) = do
1475 -- a label for the new data area
1476 lbl <- getNewLabelNat
1477 tmp <- getNewRegNat II32
1479 let code dst = toOL [
1483 CmmStaticLit (CmmFloat f W32)],
1486 SETHI (HI (ImmCLbl lbl)) tmp,
1487 LD II32 (AddrRegImm tmp (LO (ImmCLbl lbl))) dst]
1489 return (Any FF32 code)
1491 getRegister (CmmLit (CmmFloat d W64)) = do
1492 lbl <- getNewLabelNat
1493 tmp <- getNewRegNat II32
1494 let code dst = toOL [
1497 CmmStaticLit (CmmFloat d W64)],
1498 SETHI (HI (ImmCLbl lbl)) tmp,
1499 LD II64 (AddrRegImm tmp (LO (ImmCLbl lbl))) dst]
1500 return (Any FF64 code)
1502 getRegister (CmmMachOp mop [x]) -- unary MachOps
1504 MO_F_Neg W32 -> trivialUFCode FF32 (FNEG FF32) x
1505 MO_F_Neg W64 -> trivialUFCode FF64 (FNEG FF64) x
1507 MO_S_Neg rep -> trivialUCode (intSize rep) (SUB False False g0) x
1508 MO_Not rep -> trivialUCode (intSize rep) (XNOR False g0) x
1510 MO_FF_Conv W64 W32-> coerceDbl2Flt x
1511 MO_FF_Conv W32 W64-> coerceFlt2Dbl x
1513 MO_FS_Conv from to -> coerceFP2Int from to x
1514 MO_SF_Conv from to -> coerceInt2FP from to x
1516 -- Conversions which are a nop on sparc
1518 | from == to -> conversionNop (intSize to) x
1519 MO_UU_Conv W32 W8 -> trivialCode W8 (AND False) x (CmmLit (CmmInt 255 W8))
1520 MO_UU_Conv W32 to -> conversionNop (intSize to) x
1521 MO_SS_Conv W32 to -> conversionNop (intSize to) x
1523 MO_UU_Conv W8 to@W32 -> conversionNop (intSize to) x
1524 MO_UU_Conv W16 to@W32 -> conversionNop (intSize to) x
1525 MO_UU_Conv W8 to@W16 -> conversionNop (intSize to) x
1528 MO_SS_Conv W8 W32 -> integerExtend W8 W32 x
1529 MO_SS_Conv W16 W32 -> integerExtend W16 W32 x
1530 MO_SS_Conv W8 W16 -> integerExtend W8 W16 x
1532 other_op -> panic ("Unknown unary mach op: " ++ show mop)
1535 -- | sign extend and widen
1537 :: Width -- ^ width of source expression
1538 -> Width -- ^ width of result
1539 -> CmmExpr -- ^ source expression
1542 integerExtend from to expr
1543 = do -- load the expr into some register
1544 (reg, e_code) <- getSomeReg expr
1545 tmp <- getNewRegNat II32
1547 = case (from, to) of
1554 -- local shift word left to load the sign bit
1555 `snocOL` SLL reg (RIImm (ImmInt bitCount)) tmp
1557 -- arithmetic shift right to sign extend
1558 `snocOL` SRA tmp (RIImm (ImmInt bitCount)) dst
1560 return (Any (intSize to) code)
1563 conversionNop new_rep expr
1564 = do e_code <- getRegister expr
1565 return (swizzleRegisterRep e_code new_rep)
1567 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1569 MO_Eq rep -> condIntReg EQQ x y
1570 MO_Ne rep -> condIntReg NE x y
1572 MO_S_Gt rep -> condIntReg GTT x y
1573 MO_S_Ge rep -> condIntReg GE x y
1574 MO_S_Lt rep -> condIntReg LTT x y
1575 MO_S_Le rep -> condIntReg LE x y
1577 MO_U_Gt W32 -> condIntReg GTT x y
1578 MO_U_Ge W32 -> condIntReg GE x y
1579 MO_U_Lt W32 -> condIntReg LTT x y
1580 MO_U_Le W32 -> condIntReg LE x y
1582 MO_U_Gt W16 -> condIntReg GU x y
1583 MO_U_Ge W16 -> condIntReg GEU x y
1584 MO_U_Lt W16 -> condIntReg LU x y
1585 MO_U_Le W16 -> condIntReg LEU x y
1587 MO_Add W32 -> trivialCode W32 (ADD False False) x y
1588 MO_Sub W32 -> trivialCode W32 (SUB False False) x y
1590 MO_S_MulMayOflo rep -> imulMayOflo rep x y
1592 MO_S_Quot W32 -> idiv True False x y
1593 MO_U_Quot W32 -> idiv False False x y
1595 MO_S_Rem W32 -> irem True x y
1596 MO_U_Rem W32 -> irem False x y
1598 MO_F_Eq w -> condFltReg EQQ x y
1599 MO_F_Ne w -> condFltReg NE x y
1601 MO_F_Gt w -> condFltReg GTT x y
1602 MO_F_Ge w -> condFltReg GE x y
1603 MO_F_Lt w -> condFltReg LTT x y
1604 MO_F_Le w -> condFltReg LE x y
1606 MO_F_Add w -> trivialFCode w FADD x y
1607 MO_F_Sub w -> trivialFCode w FSUB x y
1608 MO_F_Mul w -> trivialFCode w FMUL x y
1609 MO_F_Quot w -> trivialFCode w FDIV x y
1611 MO_And rep -> trivialCode rep (AND False) x y
1612 MO_Or rep -> trivialCode rep (OR False) x y
1613 MO_Xor rep -> trivialCode rep (XOR False) x y
1615 MO_Mul rep -> trivialCode rep (SMUL False) x y
1617 MO_Shl rep -> trivialCode rep SLL x y
1618 MO_U_Shr rep -> trivialCode rep SRL x y
1619 MO_S_Shr rep -> trivialCode rep SRA x y
1622 MO_F32_Pwr -> getRegister (StCall (Left (fsLit "pow")) CCallConv FF64
1623 [promote x, promote y])
1624 where promote x = CmmMachOp MO_F32_to_Dbl [x]
1625 MO_F64_Pwr -> getRegister (StCall (Left (fsLit "pow")) CCallConv FF64
1628 other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
1630 -- idiv fn x y = getRegister (StCall (Left fn) CCallConv II32 [x, y])
1633 -- | Generate an integer division instruction.
1634 idiv :: Bool -> Bool -> CmmExpr -> CmmExpr -> NatM Register
1636 -- For unsigned division with a 32 bit numerator,
1637 -- we can just clear the Y register.
1638 idiv False cc x y = do
1639 (a_reg, a_code) <- getSomeReg x
1640 (b_reg, b_code) <- getSomeReg y
1647 , UDIV cc a_reg (RIReg b_reg) dst]
1649 return (Any II32 code)
1652 -- For _signed_ division with a 32 bit numerator,
1653 -- we have to sign extend the numerator into the Y register.
1654 idiv True cc x y = do
1655 (a_reg, a_code) <- getSomeReg x
1656 (b_reg, b_code) <- getSomeReg y
1658 tmp <- getNewRegNat II32
1664 [ SRA a_reg (RIImm (ImmInt 16)) tmp -- sign extend
1665 , SRA tmp (RIImm (ImmInt 16)) tmp
1668 , SDIV cc a_reg (RIReg b_reg) dst]
1670 return (Any II32 code)
1673 -- | Do an integer remainder.
1675 -- NOTE: The SPARC v8 architecture manual says that integer division
1676 -- instructions _may_ generate a remainder, depending on the implementation.
1677 -- If so it is _recommended_ that the remainder is placed in the Y register.
1679 -- The UltraSparc 2007 manual says Y is _undefined_ after division.
1681 -- The SPARC T2 doesn't store the remainder, not sure about the others.
1682 -- It's probably best not to worry about it, and just generate our own
1685 irem :: Bool -> CmmExpr -> CmmExpr -> NatM Register
1687 -- For unsigned operands:
1688 -- Division is between a 64 bit numerator and a 32 bit denominator,
1689 -- so we still have to clear the Y register.
1691 (a_reg, a_code) <- getSomeReg x
1692 (b_reg, b_code) <- getSomeReg y
1694 tmp_reg <- getNewRegNat II32
1701 , UDIV False a_reg (RIReg b_reg) tmp_reg
1702 , UMUL False tmp_reg (RIReg b_reg) tmp_reg
1703 , SUB False False a_reg (RIReg tmp_reg) dst]
1705 return (Any II32 code)
1708 -- For signed operands:
1709 -- Make sure to sign extend into the Y register, or the remainder
1710 -- will have the wrong sign when the numerator is negative.
1712 -- TODO: When sign extending, GCC only shifts the a_reg right by 17 bits,
1713 -- not the full 32. Not sure why this is, something to do with overflow?
1714 -- If anyone cares enough about the speed of signed remainder they
1715 -- can work it out themselves (then tell me). -- BL 2009/01/20
1718 (a_reg, a_code) <- getSomeReg x
1719 (b_reg, b_code) <- getSomeReg y
1721 tmp1_reg <- getNewRegNat II32
1722 tmp2_reg <- getNewRegNat II32
1728 [ SRA a_reg (RIImm (ImmInt 16)) tmp1_reg -- sign extend
1729 , SRA tmp1_reg (RIImm (ImmInt 16)) tmp1_reg -- sign extend
1732 , SDIV False a_reg (RIReg b_reg) tmp2_reg
1733 , SMUL False tmp2_reg (RIReg b_reg) tmp2_reg
1734 , SUB False False a_reg (RIReg tmp2_reg) dst]
1736 return (Any II32 code)
1739 imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
1740 imulMayOflo rep a b = do
1741 (a_reg, a_code) <- getSomeReg a
1742 (b_reg, b_code) <- getSomeReg b
1743 res_lo <- getNewRegNat II32
1744 res_hi <- getNewRegNat II32
1746 shift_amt = case rep of
1749 _ -> panic "shift_amt"
1750 code dst = a_code `appOL` b_code `appOL`
1752 SMUL False a_reg (RIReg b_reg) res_lo,
1754 SRA res_lo (RIImm (ImmInt shift_amt)) res_lo,
1755 SUB False False res_lo (RIReg res_hi) dst
1757 return (Any II32 code)
1759 getRegister (CmmLoad mem pk) = do
1760 Amode src code <- getAmode mem
1762 code__2 dst = code `snocOL` LD (cmmTypeSize pk) src dst
1763 return (Any (cmmTypeSize pk) code__2)
1765 getRegister (CmmLit (CmmInt i _))
1768 src = ImmInt (fromInteger i)
1769 code dst = unitOL (OR False g0 (RIImm src) dst)
1771 return (Any II32 code)
1773 getRegister (CmmLit lit)
1774 = let rep = cmmLitType lit
1778 OR False dst (RIImm (LO imm)) dst]
1779 in return (Any II32 code)
1781 #endif /* sparc_TARGET_ARCH */
1783 #if powerpc_TARGET_ARCH
1784 getRegister (CmmLoad mem pk)
1787 Amode addr addr_code <- getAmode mem
1788 let code dst = ASSERT((regClass dst == RcDouble) == isFloatType pk)
1789 addr_code `snocOL` LD size dst addr
1790 return (Any size code)
1791 where size = cmmTypeSize pk
1793 -- catch simple cases of zero- or sign-extended load
1794 getRegister (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad mem _]) = do
1795 Amode addr addr_code <- getAmode mem
1796 return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr))
1798 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
1800 getRegister (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad mem _]) = do
1801 Amode addr addr_code <- getAmode mem
1802 return (Any II32 (\dst -> addr_code `snocOL` LD II16 dst addr))
1804 getRegister (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad mem _]) = do
1805 Amode addr addr_code <- getAmode mem
1806 return (Any II32 (\dst -> addr_code `snocOL` LA II16 dst addr))
1808 getRegister (CmmMachOp mop [x]) -- unary MachOps
1810 MO_Not rep -> triv_ucode_int rep NOT
1812 MO_F_Neg w -> triv_ucode_float w FNEG
1813 MO_S_Neg w -> triv_ucode_int w NEG
1815 MO_FF_Conv W64 W32 -> trivialUCode FF32 FRSP x
1816 MO_FF_Conv W32 W64 -> conversionNop FF64 x
1818 MO_FS_Conv from to -> coerceFP2Int from to x
1819 MO_SF_Conv from to -> coerceInt2FP from to x
1822 | from == to -> conversionNop (intSize to) x
1824 -- narrowing is a nop: we treat the high bits as undefined
1825 MO_SS_Conv W32 to -> conversionNop (intSize to) x
1826 MO_SS_Conv W16 W8 -> conversionNop II8 x
1827 MO_SS_Conv W8 to -> triv_ucode_int to (EXTS II8)
1828 MO_SS_Conv W16 to -> triv_ucode_int to (EXTS II16)
1831 | from == to -> conversionNop (intSize to) x
1832 -- narrowing is a nop: we treat the high bits as undefined
1833 MO_UU_Conv W32 to -> conversionNop (intSize to) x
1834 MO_UU_Conv W16 W8 -> conversionNop II8 x
1835 MO_UU_Conv W8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 W32))
1836 MO_UU_Conv W16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 W32))
1839 triv_ucode_int width instr = trivialUCode (intSize width) instr x
1840 triv_ucode_float width instr = trivialUCode (floatSize width) instr x
1842 conversionNop new_size expr
1843 = do e_code <- getRegister expr
1844 return (swizzleRegisterRep e_code new_size)
1846 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1848 MO_F_Eq w -> condFltReg EQQ x y
1849 MO_F_Ne w -> condFltReg NE x y
1850 MO_F_Gt w -> condFltReg GTT x y
1851 MO_F_Ge w -> condFltReg GE x y
1852 MO_F_Lt w -> condFltReg LTT x y
1853 MO_F_Le w -> condFltReg LE x y
1855 MO_Eq rep -> condIntReg EQQ (extendUExpr rep x) (extendUExpr rep y)
1856 MO_Ne rep -> condIntReg NE (extendUExpr rep x) (extendUExpr rep y)
1858 MO_S_Gt rep -> condIntReg GTT (extendSExpr rep x) (extendSExpr rep y)
1859 MO_S_Ge rep -> condIntReg GE (extendSExpr rep x) (extendSExpr rep y)
1860 MO_S_Lt rep -> condIntReg LTT (extendSExpr rep x) (extendSExpr rep y)
1861 MO_S_Le rep -> condIntReg LE (extendSExpr rep x) (extendSExpr rep y)
1863 MO_U_Gt rep -> condIntReg GU (extendUExpr rep x) (extendUExpr rep y)
1864 MO_U_Ge rep -> condIntReg GEU (extendUExpr rep x) (extendUExpr rep y)
1865 MO_U_Lt rep -> condIntReg LU (extendUExpr rep x) (extendUExpr rep y)
1866 MO_U_Le rep -> condIntReg LEU (extendUExpr rep x) (extendUExpr rep y)
1868 MO_F_Add w -> triv_float w FADD
1869 MO_F_Sub w -> triv_float w FSUB
1870 MO_F_Mul w -> triv_float w FMUL
1871 MO_F_Quot w -> triv_float w FDIV
1873 -- optimize addition with 32-bit immediate
1877 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate W32 True (-imm)
1878 -> trivialCode W32 True ADD x (CmmLit $ CmmInt imm immrep)
1881 (src, srcCode) <- getSomeReg x
1882 let imm = litToImm lit
1883 code dst = srcCode `appOL` toOL [
1884 ADDIS dst src (HA imm),
1885 ADD dst dst (RIImm (LO imm))
1887 return (Any II32 code)
1888 _ -> trivialCode W32 True ADD x y
1890 MO_Add rep -> trivialCode rep True ADD x y
1892 case y of -- subfi ('substract from' with immediate) doesn't exist
1893 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
1894 -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
1895 _ -> trivialCodeNoImm' (intSize rep) SUBF y x
1897 MO_Mul rep -> trivialCode rep True MULLW x y
1899 MO_S_MulMayOflo W32 -> trivialCodeNoImm' II32 MULLW_MayOflo x y
1901 MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= II32): not implemented"
1902 MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
1904 MO_S_Quot rep -> trivialCodeNoImm' (intSize rep) DIVW (extendSExpr rep x) (extendSExpr rep y)
1905 MO_U_Quot rep -> trivialCodeNoImm' (intSize rep) DIVWU (extendUExpr rep x) (extendUExpr rep y)
1907 MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1908 MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1910 MO_And rep -> trivialCode rep False AND x y
1911 MO_Or rep -> trivialCode rep False OR x y
1912 MO_Xor rep -> trivialCode rep False XOR x y
1914 MO_Shl rep -> trivialCode rep False SLW x y
1915 MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
1916 MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
1918 triv_float :: Width -> (Size -> Reg -> Reg -> Reg -> Instr) -> NatM Register
1919 triv_float width instr = trivialCodeNoImm (floatSize width) instr x y
1921 getRegister (CmmLit (CmmInt i rep))
1922 | Just imm <- makeImmediate rep True i
1924 code dst = unitOL (LI dst imm)
1926 return (Any (intSize rep) code)
1928 getRegister (CmmLit (CmmFloat f frep)) = do
1929 lbl <- getNewLabelNat
1930 dflags <- getDynFlagsNat
1931 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
1932 Amode addr addr_code <- getAmode dynRef
1933 let size = floatSize frep
1935 LDATA ReadOnlyData [CmmDataLabel lbl,
1936 CmmStaticLit (CmmFloat f frep)]
1937 `consOL` (addr_code `snocOL` LD size dst addr)
1938 return (Any size code)
1940 getRegister (CmmLit lit)
1941 = let rep = cmmLitType lit
1945 ADD dst dst (RIImm (LO imm))
1947 in return (Any (cmmTypeSize rep) code)
1949 getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
1951 -- extend?Rep: wrap integer expression of type rep
1952 -- in a conversion to II32
1953 extendSExpr W32 x = x
1954 extendSExpr rep x = CmmMachOp (MO_SS_Conv rep W32) [x]
1955 extendUExpr W32 x = x
1956 extendUExpr rep x = CmmMachOp (MO_UU_Conv rep W32) [x]
1958 #endif /* powerpc_TARGET_ARCH */
1961 -- -----------------------------------------------------------------------------
1962 -- The 'Amode' type: Memory addressing modes passed up the tree.
1964 data Amode = Amode AddrMode InstrBlock
1967 Now, given a tree (the argument to an CmmLoad) that references memory,
1968 produce a suitable addressing mode.
1970 A Rule of the Game (tm) for Amodes: use of the addr bit must
1971 immediately follow use of the code part, since the code part puts
1972 values in registers which the addr then refers to. So you can't put
1973 anything in between, lest it overwrite some of those registers. If
1974 you need to do some other computation between the code part and use of
1975 the addr bit, first store the effective address from the amode in a
1976 temporary, then do the other computation, and then use the temporary:
1980 ... other computation ...
1984 getAmode :: CmmExpr -> NatM Amode
1985 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
1987 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1989 #if alpha_TARGET_ARCH
1991 getAmode (StPrim IntSubOp [x, StInt i])
1992 = getNewRegNat PtrRep `thenNat` \ tmp ->
1993 getRegister x `thenNat` \ register ->
1995 code = registerCode register tmp
1996 reg = registerName register tmp
1997 off = ImmInt (-(fromInteger i))
1999 return (Amode (AddrRegImm reg off) code)
2001 getAmode (StPrim IntAddOp [x, StInt i])
2002 = getNewRegNat PtrRep `thenNat` \ tmp ->
2003 getRegister x `thenNat` \ register ->
2005 code = registerCode register tmp
2006 reg = registerName register tmp
2007 off = ImmInt (fromInteger i)
2009 return (Amode (AddrRegImm reg off) code)
2013 = return (Amode (AddrImm imm__2) id)
2016 imm__2 = case imm of Just x -> x
2019 = getNewRegNat PtrRep `thenNat` \ tmp ->
2020 getRegister other `thenNat` \ register ->
2022 code = registerCode register tmp
2023 reg = registerName register tmp
2025 return (Amode (AddrReg reg) code)
2027 #endif /* alpha_TARGET_ARCH */
2029 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2031 #if x86_64_TARGET_ARCH
2033 getAmode (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
2034 CmmLit displacement])
2035 = return $ Amode (ripRel (litToImm displacement)) nilOL
2039 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2041 -- This is all just ridiculous, since it carefully undoes
2042 -- what mangleIndexTree has just done.
2043 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
2045 -- ASSERT(rep == II32)???
2046 = do (x_reg, x_code) <- getSomeReg x
2047 let off = ImmInt (-(fromInteger i))
2048 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
2050 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
2052 -- ASSERT(rep == II32)???
2053 = do (x_reg, x_code) <- getSomeReg x
2054 let off = ImmInt (fromInteger i)
2055 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
2057 -- Turn (lit1 << n + lit2) into (lit2 + lit1 << n) so it will be
2058 -- recognised by the next rule.
2059 getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
2061 = getAmode (CmmMachOp (MO_Add rep) [b,a])
2063 getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
2064 [y, CmmLit (CmmInt shift _)]])
2065 | shift == 0 || shift == 1 || shift == 2 || shift == 3
2066 = x86_complex_amode x y shift 0
2068 getAmode (CmmMachOp (MO_Add rep)
2069 [x, CmmMachOp (MO_Add _)
2070 [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)],
2071 CmmLit (CmmInt offset _)]])
2072 | shift == 0 || shift == 1 || shift == 2 || shift == 3
2073 && is32BitInteger offset
2074 = x86_complex_amode x y shift offset
2076 getAmode (CmmMachOp (MO_Add rep) [x,y])
2077 = x86_complex_amode x y 0 0
2079 getAmode (CmmLit lit) | is32BitLit lit
2080 = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
2083 (reg,code) <- getSomeReg expr
2084 return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
2087 x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
2088 x86_complex_amode base index shift offset
2089 = do (x_reg, x_code) <- getNonClobberedReg base
2090 -- x must be in a temp, because it has to stay live over y_code
2091 -- we could compre x_reg and y_reg and do something better here...
2092 (y_reg, y_code) <- getSomeReg index
2094 code = x_code `appOL` y_code
2095 base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
2096 return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset)))
2099 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
2101 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2103 #if sparc_TARGET_ARCH
2105 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)])
2108 (reg, code) <- getSomeReg x
2110 off = ImmInt (-(fromInteger i))
2111 return (Amode (AddrRegImm reg off) code)
2114 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)])
2117 (reg, code) <- getSomeReg x
2119 off = ImmInt (fromInteger i)
2120 return (Amode (AddrRegImm reg off) code)
2122 getAmode (CmmMachOp (MO_Add rep) [x, y])
2124 (regX, codeX) <- getSomeReg x
2125 (regY, codeY) <- getSomeReg y
2127 code = codeX `appOL` codeY
2128 return (Amode (AddrRegReg regX regY) code)
2130 getAmode (CmmLit lit)
2132 let imm__2 = litToImm lit
2133 tmp1 <- getNewRegNat II32
2134 tmp2 <- getNewRegNat II32
2136 let code = toOL [ SETHI (HI imm__2) tmp1
2137 , OR False tmp1 (RIImm (LO imm__2)) tmp2]
2139 return (Amode (AddrRegReg tmp2 g0) code)
2143 (reg, code) <- getSomeReg other
2146 return (Amode (AddrRegImm reg off) code)
2148 #endif /* sparc_TARGET_ARCH */
2150 #ifdef powerpc_TARGET_ARCH
2151 getAmode (CmmMachOp (MO_Sub W32) [x, CmmLit (CmmInt i _)])
2152 | Just off <- makeImmediate W32 True (-i)
2154 (reg, code) <- getSomeReg x
2155 return (Amode (AddrRegImm reg off) code)
2158 getAmode (CmmMachOp (MO_Add W32) [x, CmmLit (CmmInt i _)])
2159 | Just off <- makeImmediate W32 True i
2161 (reg, code) <- getSomeReg x
2162 return (Amode (AddrRegImm reg off) code)
2164 -- optimize addition with 32-bit immediate
2166 getAmode (CmmMachOp (MO_Add W32) [x, CmmLit lit])
2168 tmp <- getNewRegNat II32
2169 (src, srcCode) <- getSomeReg x
2170 let imm = litToImm lit
2171 code = srcCode `snocOL` ADDIS tmp src (HA imm)
2172 return (Amode (AddrRegImm tmp (LO imm)) code)
2174 getAmode (CmmLit lit)
2176 tmp <- getNewRegNat II32
2177 let imm = litToImm lit
2178 code = unitOL (LIS tmp (HA imm))
2179 return (Amode (AddrRegImm tmp (LO imm)) code)
2181 getAmode (CmmMachOp (MO_Add W32) [x, y])
2183 (regX, codeX) <- getSomeReg x
2184 (regY, codeY) <- getSomeReg y
2185 return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
2189 (reg, code) <- getSomeReg other
2192 return (Amode (AddrRegImm reg off) code)
2193 #endif /* powerpc_TARGET_ARCH */
2195 -- -----------------------------------------------------------------------------
2196 -- getOperand: sometimes any operand will do.
2198 -- getNonClobberedOperand: the value of the operand will remain valid across
2199 -- the computation of an arbitrary expression, unless the expression
2200 -- is computed directly into a register which the operand refers to
2201 -- (see trivialCode where this function is used for an example).
2203 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2205 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2206 #if x86_64_TARGET_ARCH
2207 getNonClobberedOperand (CmmLit lit)
2208 | isSuitableFloatingPointLit lit = do
2209 lbl <- getNewLabelNat
2210 let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
2212 return (OpAddr (ripRel (ImmCLbl lbl)), code)
2214 getNonClobberedOperand (CmmLit lit)
2215 | is32BitLit lit && not (isFloatType (cmmLitType lit)) =
2216 return (OpImm (litToImm lit), nilOL)
2217 getNonClobberedOperand (CmmLoad mem pk)
2218 | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
2219 Amode src mem_code <- getAmode mem
2221 if (amodeCouldBeClobbered src)
2223 tmp <- getNewRegNat wordSize
2224 return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
2225 unitOL (LEA II32 (OpAddr src) (OpReg tmp)))
2228 return (OpAddr src', save_code `appOL` mem_code)
2229 getNonClobberedOperand e = do
2230 (reg, code) <- getNonClobberedReg e
2231 return (OpReg reg, code)
2233 amodeCouldBeClobbered :: AddrMode -> Bool
2234 amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
2236 regClobbered (RealReg rr) = isFastTrue (freeReg rr)
2237 regClobbered _ = False
2239 -- getOperand: the operand is not required to remain valid across the
2240 -- computation of an arbitrary expression.
2241 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2242 #if x86_64_TARGET_ARCH
2243 getOperand (CmmLit lit)
2244 | isSuitableFloatingPointLit lit = do
2245 lbl <- getNewLabelNat
2246 let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
2248 return (OpAddr (ripRel (ImmCLbl lbl)), code)
2250 getOperand (CmmLit lit)
2251 | is32BitLit lit && not (isFloatType (cmmLitType lit)) = do
2252 return (OpImm (litToImm lit), nilOL)
2253 getOperand (CmmLoad mem pk)
2254 | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
2255 Amode src mem_code <- getAmode mem
2256 return (OpAddr src, mem_code)
2258 (reg, code) <- getSomeReg e
2259 return (OpReg reg, code)
2261 isOperand :: CmmExpr -> Bool
2262 isOperand (CmmLoad _ _) = True
2263 isOperand (CmmLit lit) = is32BitLit lit
2264 || isSuitableFloatingPointLit lit
2267 -- if we want a floating-point literal as an operand, we can
2268 -- use it directly from memory. However, if the literal is
2269 -- zero, we're better off generating it into a register using
2271 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
2272 isSuitableFloatingPointLit _ = False
2274 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
2275 getRegOrMem (CmmLoad mem pk)
2276 | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
2277 Amode src mem_code <- getAmode mem
2278 return (OpAddr src, mem_code)
2280 (reg, code) <- getNonClobberedReg e
2281 return (OpReg reg, code)
2283 #if x86_64_TARGET_ARCH
2284 is32BitLit (CmmInt i W64) = is32BitInteger i
2285 -- assume that labels are in the range 0-2^31-1: this assumes the
2286 -- small memory model (see gcc docs, -mcmodel=small).
2291 is32BitInteger :: Integer -> Bool
2292 is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000
2293 where i64 = fromIntegral i :: Int64
2294 -- a CmmInt is intended to be truncated to the appropriate
2295 -- number of bits, so here we truncate it to Int64. This is
2296 -- important because e.g. -1 as a CmmInt might be either
2297 -- -1 or 18446744073709551615.
2299 -- -----------------------------------------------------------------------------
2300 -- The 'CondCode' type: Condition codes passed up the tree.
2302 data CondCode = CondCode Bool Cond InstrBlock
2304 -- Set up a condition code for a conditional branch.
2306 getCondCode :: CmmExpr -> NatM CondCode
2308 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2310 #if alpha_TARGET_ARCH
2311 getCondCode = panic "MachCode.getCondCode: not on Alphas"
2312 #endif /* alpha_TARGET_ARCH */
2314 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2316 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
2317 -- yes, they really do seem to want exactly the same!
2319 getCondCode (CmmMachOp mop [x, y])
2322 MO_F_Eq W32 -> condFltCode EQQ x y
2323 MO_F_Ne W32 -> condFltCode NE x y
2324 MO_F_Gt W32 -> condFltCode GTT x y
2325 MO_F_Ge W32 -> condFltCode GE x y
2326 MO_F_Lt W32 -> condFltCode LTT x y
2327 MO_F_Le W32 -> condFltCode LE x y
2329 MO_F_Eq W64 -> condFltCode EQQ x y
2330 MO_F_Ne W64 -> condFltCode NE x y
2331 MO_F_Gt W64 -> condFltCode GTT x y
2332 MO_F_Ge W64 -> condFltCode GE x y
2333 MO_F_Lt W64 -> condFltCode LTT x y
2334 MO_F_Le W64 -> condFltCode LE x y
2336 MO_Eq rep -> condIntCode EQQ x y
2337 MO_Ne rep -> condIntCode NE x y
2339 MO_S_Gt rep -> condIntCode GTT x y
2340 MO_S_Ge rep -> condIntCode GE x y
2341 MO_S_Lt rep -> condIntCode LTT x y
2342 MO_S_Le rep -> condIntCode LE x y
2344 MO_U_Gt rep -> condIntCode GU x y
2345 MO_U_Ge rep -> condIntCode GEU x y
2346 MO_U_Lt rep -> condIntCode LU x y
2347 MO_U_Le rep -> condIntCode LEU x y
2349 other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y]))
2351 getCondCode other = pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
2353 #elif powerpc_TARGET_ARCH
2355 -- almost the same as everywhere else - but we need to
2356 -- extend small integers to 32 bit first
2358 getCondCode (CmmMachOp mop [x, y])
2360 MO_F_Eq W32 -> condFltCode EQQ x y
2361 MO_F_Ne W32 -> condFltCode NE x y
2362 MO_F_Gt W32 -> condFltCode GTT x y
2363 MO_F_Ge W32 -> condFltCode GE x y
2364 MO_F_Lt W32 -> condFltCode LTT x y
2365 MO_F_Le W32 -> condFltCode LE x y
2367 MO_F_Eq W64 -> condFltCode EQQ x y
2368 MO_F_Ne W64 -> condFltCode NE x y
2369 MO_F_Gt W64 -> condFltCode GTT x y
2370 MO_F_Ge W64 -> condFltCode GE x y
2371 MO_F_Lt W64 -> condFltCode LTT x y
2372 MO_F_Le W64 -> condFltCode LE x y
2374 MO_Eq rep -> condIntCode EQQ (extendUExpr rep x) (extendUExpr rep y)
2375 MO_Ne rep -> condIntCode NE (extendUExpr rep x) (extendUExpr rep y)
2377 MO_S_Gt rep -> condIntCode GTT (extendSExpr rep x) (extendSExpr rep y)
2378 MO_S_Ge rep -> condIntCode GE (extendSExpr rep x) (extendSExpr rep y)
2379 MO_S_Lt rep -> condIntCode LTT (extendSExpr rep x) (extendSExpr rep y)
2380 MO_S_Le rep -> condIntCode LE (extendSExpr rep x) (extendSExpr rep y)
2382 MO_U_Gt rep -> condIntCode GU (extendUExpr rep x) (extendUExpr rep y)
2383 MO_U_Ge rep -> condIntCode GEU (extendUExpr rep x) (extendUExpr rep y)
2384 MO_U_Lt rep -> condIntCode LU (extendUExpr rep x) (extendUExpr rep y)
2385 MO_U_Le rep -> condIntCode LEU (extendUExpr rep x) (extendUExpr rep y)
2387 other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
2389 getCondCode other = panic "getCondCode(2)(powerpc)"
2395 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
2396 -- passed back up the tree.
2398 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
2400 #if alpha_TARGET_ARCH
2401 condIntCode = panic "MachCode.condIntCode: not on Alphas"
2402 condFltCode = panic "MachCode.condFltCode: not on Alphas"
2403 #endif /* alpha_TARGET_ARCH */
2405 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2406 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2408 -- memory vs immediate
2409 condIntCode cond (CmmLoad x pk) (CmmLit lit) | is32BitLit lit = do
2410 Amode x_addr x_code <- getAmode x
2413 code = x_code `snocOL`
2414 CMP (cmmTypeSize pk) (OpImm imm) (OpAddr x_addr)
2416 return (CondCode False cond code)
2418 -- anything vs zero, using a mask
2419 -- TODO: Add some sanity checking!!!!
2420 condIntCode cond (CmmMachOp (MO_And rep) [x,o2]) (CmmLit (CmmInt 0 pk))
2421 | (CmmLit lit@(CmmInt mask pk2)) <- o2, is32BitLit lit
2423 (x_reg, x_code) <- getSomeReg x
2425 code = x_code `snocOL`
2426 TEST (intSize pk) (OpImm (ImmInteger mask)) (OpReg x_reg)
2428 return (CondCode False cond code)
2431 condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
2432 (x_reg, x_code) <- getSomeReg x
2434 code = x_code `snocOL`
2435 TEST (intSize pk) (OpReg x_reg) (OpReg x_reg)
2437 return (CondCode False cond code)
2439 -- anything vs operand
2440 condIntCode cond x y | isOperand y = do
2441 (x_reg, x_code) <- getNonClobberedReg x
2442 (y_op, y_code) <- getOperand y
2444 code = x_code `appOL` y_code `snocOL`
2445 CMP (cmmTypeSize (cmmExprType x)) y_op (OpReg x_reg)
2447 return (CondCode False cond code)
2449 -- anything vs anything
2450 condIntCode cond x y = do
2451 (y_reg, y_code) <- getNonClobberedReg y
2452 (x_op, x_code) <- getRegOrMem x
2454 code = y_code `appOL`
2456 CMP (cmmTypeSize (cmmExprType x)) (OpReg y_reg) x_op
2458 return (CondCode False cond code)
2461 #if i386_TARGET_ARCH
2462 condFltCode cond x y
2463 = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
2464 (x_reg, x_code) <- getNonClobberedReg x
2465 (y_reg, y_code) <- getSomeReg y
2467 code = x_code `appOL` y_code `snocOL`
2468 GCMP cond x_reg y_reg
2469 -- The GCMP insn does the test and sets the zero flag if comparable
2470 -- and true. Hence we always supply EQQ as the condition to test.
2471 return (CondCode True EQQ code)
2472 #endif /* i386_TARGET_ARCH */
2474 #if x86_64_TARGET_ARCH
2475 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
2476 -- an operand, but the right must be a reg. We can probably do better
2477 -- than this general case...
2478 condFltCode cond x y = do
2479 (x_reg, x_code) <- getNonClobberedReg x
2480 (y_op, y_code) <- getOperand y
2482 code = x_code `appOL`
2484 CMP (floatSize $ cmmExprWidth x) y_op (OpReg x_reg)
2485 -- NB(1): we need to use the unsigned comparison operators on the
2486 -- result of this comparison.
2488 return (CondCode True (condToUnsigned cond) code)
2491 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2493 #if sparc_TARGET_ARCH
2495 condIntCode cond x (CmmLit (CmmInt y rep))
2498 (src1, code) <- getSomeReg x
2500 src2 = ImmInt (fromInteger y)
2501 code' = code `snocOL` SUB False True src1 (RIImm src2) g0
2502 return (CondCode False cond code')
2504 condIntCode cond x y = do
2505 (src1, code1) <- getSomeReg x
2506 (src2, code2) <- getSomeReg y
2508 code__2 = code1 `appOL` code2 `snocOL`
2509 SUB False True src1 (RIReg src2) g0
2510 return (CondCode False cond code__2)
2513 condFltCode cond x y = do
2514 (src1, code1) <- getSomeReg x
2515 (src2, code2) <- getSomeReg y
2516 tmp <- getNewRegNat FF64
2518 promote x = FxTOy FF32 FF64 x tmp
2524 if pk1 `cmmEqType` pk2 then
2525 code1 `appOL` code2 `snocOL`
2526 FCMP True (cmmTypeSize pk1) src1 src2
2527 else if typeWidth pk1 == W32 then
2528 code1 `snocOL` promote src1 `appOL` code2 `snocOL`
2529 FCMP True FF64 tmp src2
2531 code1 `appOL` code2 `snocOL` promote src2 `snocOL`
2532 FCMP True FF64 src1 tmp
2533 return (CondCode True cond code__2)
2535 #endif /* sparc_TARGET_ARCH */
2537 #if powerpc_TARGET_ARCH
2538 -- ###FIXME: I16 and I8!
2539 condIntCode cond x (CmmLit (CmmInt y rep))
2540 | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
2542 (src1, code) <- getSomeReg x
2544 code' = code `snocOL`
2545 (if condUnsigned cond then CMPL else CMP) II32 src1 (RIImm src2)
2546 return (CondCode False cond code')
2548 condIntCode cond x y = do
2549 (src1, code1) <- getSomeReg x
2550 (src2, code2) <- getSomeReg y
2552 code' = code1 `appOL` code2 `snocOL`
2553 (if condUnsigned cond then CMPL else CMP) II32 src1 (RIReg src2)
2554 return (CondCode False cond code')
2556 condFltCode cond x y = do
2557 (src1, code1) <- getSomeReg x
2558 (src2, code2) <- getSomeReg y
2560 code' = code1 `appOL` code2 `snocOL` FCMP src1 src2
2561 code'' = case cond of -- twiddle CR to handle unordered case
2562 GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
2563 LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
2566 ltbit = 0 ; eqbit = 2 ; gtbit = 1
2567 return (CondCode True cond code'')
2569 #endif /* powerpc_TARGET_ARCH */
2571 -- -----------------------------------------------------------------------------
2572 -- Generating assignments
2574 -- Assignments are really at the heart of the whole code generation
2575 -- business. Almost all top-level nodes of any real importance are
2576 -- assignments, which correspond to loads, stores, or register
2577 -- transfers. If we're really lucky, some of the register transfers
2578 -- will go away, because we can use the destination register to
2579 -- complete the code generation for the right hand side. This only
2580 -- fails when the right hand side is forced into a fixed register
2581 -- (e.g. the result of a call).
2583 assignMem_IntCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
2584 assignReg_IntCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock
2586 assignMem_FltCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
2587 assignReg_FltCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock
2589 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2591 #if alpha_TARGET_ARCH
2593 assignIntCode pk (CmmLoad dst _) src
2594 = getNewRegNat IntRep `thenNat` \ tmp ->
2595 getAmode dst `thenNat` \ amode ->
2596 getRegister src `thenNat` \ register ->
2598 code1 = amodeCode amode []
2599 dst__2 = amodeAddr amode
2600 code2 = registerCode register tmp []
2601 src__2 = registerName register tmp
2602 sz = primRepToSize pk
2603 code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2607 assignIntCode pk dst src
2608 = getRegister dst `thenNat` \ register1 ->
2609 getRegister src `thenNat` \ register2 ->
2611 dst__2 = registerName register1 zeroh
2612 code = registerCode register2 dst__2
2613 src__2 = registerName register2 dst__2
2614 code__2 = if isFixed register2
2615 then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2)
2620 #endif /* alpha_TARGET_ARCH */
2622 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2624 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2626 -- integer assignment to memory
2628 -- specific case of adding/subtracting an integer to a particular address.
2629 -- ToDo: catch other cases where we can use an operation directly on a memory
2631 assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _,
2632 CmmLit (CmmInt i _)])
2633 | addr == addr2, pk /= II64 || is32BitInteger i,
2634 Just instr <- check op
2635 = do Amode amode code_addr <- getAmode addr
2636 let code = code_addr `snocOL`
2637 instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode)
2640 check (MO_Add _) = Just ADD
2641 check (MO_Sub _) = Just SUB
2646 assignMem_IntCode pk addr src = do
2647 Amode addr code_addr <- getAmode addr
2648 (code_src, op_src) <- get_op_RI src
2650 code = code_src `appOL`
2652 MOV pk op_src (OpAddr addr)
2653 -- NOTE: op_src is stable, so it will still be valid
2654 -- after code_addr. This may involve the introduction
2655 -- of an extra MOV to a temporary register, but we hope
2656 -- the register allocator will get rid of it.
2660 get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand) -- code, operator
2661 get_op_RI (CmmLit lit) | is32BitLit lit
2662 = return (nilOL, OpImm (litToImm lit))
2664 = do (reg,code) <- getNonClobberedReg op
2665 return (code, OpReg reg)
2668 -- Assign; dst is a reg, rhs is mem
2669 assignReg_IntCode pk reg (CmmLoad src _) = do
2670 load_code <- intLoadCode (MOV pk) src
2671 return (load_code (getRegisterReg reg))
2673 -- dst is a reg, but src could be anything
2674 assignReg_IntCode pk reg src = do
2675 code <- getAnyReg src
2676 return (code (getRegisterReg reg))
2678 #endif /* i386_TARGET_ARCH */
2680 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2682 #if sparc_TARGET_ARCH
2684 assignMem_IntCode pk addr src = do
2685 (srcReg, code) <- getSomeReg src
2686 Amode dstAddr addr_code <- getAmode addr
2687 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2689 assignReg_IntCode pk reg src = do
2690 r <- getRegister src
2692 Any _ code -> code dst
2693 Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg freg) dst
2695 dst = getRegisterReg reg
2698 #endif /* sparc_TARGET_ARCH */
2700 #if powerpc_TARGET_ARCH
2702 assignMem_IntCode pk addr src = do
2703 (srcReg, code) <- getSomeReg src
2704 Amode dstAddr addr_code <- getAmode addr
2705 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2707 -- dst is a reg, but src could be anything
2708 assignReg_IntCode pk reg src
2710 r <- getRegister src
2712 Any _ code -> code dst
2713 Fixed _ freg fcode -> fcode `snocOL` MR dst freg
2715 dst = getRegisterReg reg
2717 #endif /* powerpc_TARGET_ARCH */
2720 -- -----------------------------------------------------------------------------
2721 -- Floating-point assignments
2723 #if alpha_TARGET_ARCH
2725 assignFltCode pk (CmmLoad dst _) src
2726 = getNewRegNat pk `thenNat` \ tmp ->
2727 getAmode dst `thenNat` \ amode ->
2728 getRegister src `thenNat` \ register ->
2730 code1 = amodeCode amode []
2731 dst__2 = amodeAddr amode
2732 code2 = registerCode register tmp []
2733 src__2 = registerName register tmp
2734 sz = primRepToSize pk
2735 code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2739 assignFltCode pk dst src
2740 = getRegister dst `thenNat` \ register1 ->
2741 getRegister src `thenNat` \ register2 ->
2743 dst__2 = registerName register1 zeroh
2744 code = registerCode register2 dst__2
2745 src__2 = registerName register2 dst__2
2746 code__2 = if isFixed register2
2747 then code . mkSeqInstr (FMOV src__2 dst__2)
2752 #endif /* alpha_TARGET_ARCH */
2754 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2756 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2758 -- Floating point assignment to memory
2759 assignMem_FltCode pk addr src = do
2760 (src_reg, src_code) <- getNonClobberedReg src
2761 Amode addr addr_code <- getAmode addr
2763 code = src_code `appOL`
2765 IF_ARCH_i386(GST pk src_reg addr,
2766 MOV pk (OpReg src_reg) (OpAddr addr))
2769 -- Floating point assignment to a register/temporary
2770 assignReg_FltCode pk reg src = do
2771 src_code <- getAnyReg src
2772 return (src_code (getRegisterReg reg))
2774 #endif /* i386_TARGET_ARCH */
2776 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2778 #if sparc_TARGET_ARCH
2780 -- Floating point assignment to memory
2781 assignMem_FltCode pk addr src = do
2782 Amode dst__2 code1 <- getAmode addr
2783 (src__2, code2) <- getSomeReg src
2784 tmp1 <- getNewRegNat pk
2786 pk__2 = cmmExprType src
2787 code__2 = code1 `appOL` code2 `appOL`
2788 if sizeToWidth pk == typeWidth pk__2
2789 then unitOL (ST pk src__2 dst__2)
2790 else toOL [ FxTOy (cmmTypeSize pk__2) pk src__2 tmp1
2791 , ST pk tmp1 dst__2]
2794 -- Floating point assignment to a register/temporary
2795 assignReg_FltCode pk dstCmmReg srcCmmExpr = do
2796 srcRegister <- getRegister srcCmmExpr
2797 let dstReg = getRegisterReg dstCmmReg
2799 return $ case srcRegister of
2800 Any _ code -> code dstReg
2801 Fixed _ srcFixedReg srcCode -> srcCode `snocOL` FMOV pk srcFixedReg dstReg
2803 #endif /* sparc_TARGET_ARCH */
2805 #if powerpc_TARGET_ARCH
2808 assignMem_FltCode = assignMem_IntCode
2809 assignReg_FltCode = assignReg_IntCode
2811 #endif /* powerpc_TARGET_ARCH */
2814 -- -----------------------------------------------------------------------------
2815 -- Generating an non-local jump
2817 -- (If applicable) Do not fill the delay slots here; you will confuse the
2818 -- register allocator.
2820 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
2822 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2824 #if alpha_TARGET_ARCH
2826 genJump (CmmLabel lbl)
2827 | isAsmTemp lbl = returnInstr (BR target)
2828 | otherwise = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0]
2830 target = ImmCLbl lbl
2833 = getRegister tree `thenNat` \ register ->
2834 getNewRegNat PtrRep `thenNat` \ tmp ->
2836 dst = registerName register pv
2837 code = registerCode register pv
2838 target = registerName register pv
2840 if isFixed register then
2841 returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0]
2843 return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0))
2845 #endif /* alpha_TARGET_ARCH */
2847 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2849 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2851 genJump (CmmLoad mem pk) = do
2852 Amode target code <- getAmode mem
2853 return (code `snocOL` JMP (OpAddr target))
2855 genJump (CmmLit lit) = do
2856 return (unitOL (JMP (OpImm (litToImm lit))))
2859 (reg,code) <- getSomeReg expr
2860 return (code `snocOL` JMP (OpReg reg))
2862 #endif /* i386_TARGET_ARCH */
2864 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2866 #if sparc_TARGET_ARCH
2868 genJump (CmmLit (CmmLabel lbl))
2869 = return (toOL [CALL (Left target) 0 True, NOP])
2871 target = ImmCLbl lbl
2875 (target, code) <- getSomeReg tree
2876 return (code `snocOL` JMP (AddrRegReg target g0) `snocOL` NOP)
2878 #endif /* sparc_TARGET_ARCH */
2880 #if powerpc_TARGET_ARCH
2881 genJump (CmmLit (CmmLabel lbl))
2882 = return (unitOL $ JMP lbl)
2886 (target,code) <- getSomeReg tree
2887 return (code `snocOL` MTCTR target `snocOL` BCTR [])
2888 #endif /* powerpc_TARGET_ARCH */
2891 -- -----------------------------------------------------------------------------
2892 -- Unconditional branches
2894 genBranch :: BlockId -> NatM InstrBlock
2896 genBranch = return . toOL . mkBranchInstr
2898 -- -----------------------------------------------------------------------------
2899 -- Conditional jumps
2902 Conditional jumps are always to local labels, so we can use branch
2903 instructions. We peek at the arguments to decide what kind of
2906 ALPHA: For comparisons with 0, we're laughing, because we can just do
2907 the desired conditional branch.
2909 I386: First, we have to ensure that the condition
2910 codes are set according to the supplied comparison operation.
2912 SPARC: First, we have to ensure that the condition codes are set
2913 according to the supplied comparison operation. We generate slightly
2914 different code for floating point comparisons, because a floating
2915 point operation cannot directly precede a @BF@. We assume the worst
2916 and fill that slot with a @NOP@.
2918 SPARC: Do not fill the delay slots here; you will confuse the register
2924 :: BlockId -- the branch target
2925 -> CmmExpr -- the condition on which to branch
2928 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2930 #if alpha_TARGET_ARCH
2932 genCondJump id (StPrim op [x, StInt 0])
2933 = getRegister x `thenNat` \ register ->
2934 getNewRegNat (registerRep register)
2937 code = registerCode register tmp
2938 value = registerName register tmp
2939 pk = registerRep register
2940 target = ImmCLbl lbl
2942 returnSeq code [BI (cmpOp op) value target]
2944 cmpOp CharGtOp = GTT
2946 cmpOp CharEqOp = EQQ
2948 cmpOp CharLtOp = LTT
2957 cmpOp WordGeOp = ALWAYS
2958 cmpOp WordEqOp = EQQ
2960 cmpOp WordLtOp = NEVER
2961 cmpOp WordLeOp = EQQ
2963 cmpOp AddrGeOp = ALWAYS
2964 cmpOp AddrEqOp = EQQ
2966 cmpOp AddrLtOp = NEVER
2967 cmpOp AddrLeOp = EQQ
2969 genCondJump lbl (StPrim op [x, StDouble 0.0])
2970 = getRegister x `thenNat` \ register ->
2971 getNewRegNat (registerRep register)
2974 code = registerCode register tmp
2975 value = registerName register tmp
2976 pk = registerRep register
2977 target = ImmCLbl lbl
2979 return (code . mkSeqInstr (BF (cmpOp op) value target))
2981 cmpOp FloatGtOp = GTT
2982 cmpOp FloatGeOp = GE
2983 cmpOp FloatEqOp = EQQ
2984 cmpOp FloatNeOp = NE
2985 cmpOp FloatLtOp = LTT
2986 cmpOp FloatLeOp = LE
2987 cmpOp DoubleGtOp = GTT
2988 cmpOp DoubleGeOp = GE
2989 cmpOp DoubleEqOp = EQQ
2990 cmpOp DoubleNeOp = NE
2991 cmpOp DoubleLtOp = LTT
2992 cmpOp DoubleLeOp = LE
2994 genCondJump lbl (StPrim op [x, y])
2996 = trivialFCode pr instr x y `thenNat` \ register ->
2997 getNewRegNat FF64 `thenNat` \ tmp ->
2999 code = registerCode register tmp
3000 result = registerName register tmp
3001 target = ImmCLbl lbl
3003 return (code . mkSeqInstr (BF cond result target))
3005 pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
3007 fltCmpOp op = case op of
3021 (instr, cond) = case op of
3022 FloatGtOp -> (FCMP TF LE, EQQ)
3023 FloatGeOp -> (FCMP TF LTT, EQQ)
3024 FloatEqOp -> (FCMP TF EQQ, NE)
3025 FloatNeOp -> (FCMP TF EQQ, EQQ)
3026 FloatLtOp -> (FCMP TF LTT, NE)
3027 FloatLeOp -> (FCMP TF LE, NE)
3028 DoubleGtOp -> (FCMP TF LE, EQQ)
3029 DoubleGeOp -> (FCMP TF LTT, EQQ)
3030 DoubleEqOp -> (FCMP TF EQQ, NE)
3031 DoubleNeOp -> (FCMP TF EQQ, EQQ)
3032 DoubleLtOp -> (FCMP TF LTT, NE)
3033 DoubleLeOp -> (FCMP TF LE, NE)
3035 genCondJump lbl (StPrim op [x, y])
3036 = trivialCode instr x y `thenNat` \ register ->
3037 getNewRegNat IntRep `thenNat` \ tmp ->
3039 code = registerCode register tmp
3040 result = registerName register tmp
3041 target = ImmCLbl lbl
3043 return (code . mkSeqInstr (BI cond result target))
3045 (instr, cond) = case op of
3046 CharGtOp -> (CMP LE, EQQ)
3047 CharGeOp -> (CMP LTT, EQQ)
3048 CharEqOp -> (CMP EQQ, NE)
3049 CharNeOp -> (CMP EQQ, EQQ)
3050 CharLtOp -> (CMP LTT, NE)
3051 CharLeOp -> (CMP LE, NE)
3052 IntGtOp -> (CMP LE, EQQ)
3053 IntGeOp -> (CMP LTT, EQQ)
3054 IntEqOp -> (CMP EQQ, NE)
3055 IntNeOp -> (CMP EQQ, EQQ)
3056 IntLtOp -> (CMP LTT, NE)
3057 IntLeOp -> (CMP LE, NE)
3058 WordGtOp -> (CMP ULE, EQQ)
3059 WordGeOp -> (CMP ULT, EQQ)
3060 WordEqOp -> (CMP EQQ, NE)
3061 WordNeOp -> (CMP EQQ, EQQ)
3062 WordLtOp -> (CMP ULT, NE)
3063 WordLeOp -> (CMP ULE, NE)
3064 AddrGtOp -> (CMP ULE, EQQ)
3065 AddrGeOp -> (CMP ULT, EQQ)
3066 AddrEqOp -> (CMP EQQ, NE)
3067 AddrNeOp -> (CMP EQQ, EQQ)
3068 AddrLtOp -> (CMP ULT, NE)
3069 AddrLeOp -> (CMP ULE, NE)
3071 #endif /* alpha_TARGET_ARCH */
3073 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3075 #if i386_TARGET_ARCH
3077 genCondJump id bool = do
3078 CondCode _ cond code <- getCondCode bool
3079 return (code `snocOL` JXX cond id)
3083 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3085 #if x86_64_TARGET_ARCH
3087 genCondJump id bool = do
3088 CondCode is_float cond cond_code <- getCondCode bool
3091 return (cond_code `snocOL` JXX cond id)
3093 lbl <- getBlockIdNat
3095 -- see comment with condFltReg
3096 let code = case cond of
3102 plain_test = unitOL (
3105 or_unordered = toOL [
3109 and_ordered = toOL [
3115 return (cond_code `appOL` code)
3119 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3121 #if sparc_TARGET_ARCH
3123 genCondJump bid bool = do
3124 CondCode is_float cond code <- getCondCode bool
3129 then [NOP, BF cond False bid, NOP]
3130 else [BI cond False bid, NOP]
3134 #endif /* sparc_TARGET_ARCH */
3137 #if powerpc_TARGET_ARCH
3139 genCondJump id bool = do
3140 CondCode is_float cond code <- getCondCode bool
3141 return (code `snocOL` BCC cond id)
3143 #endif /* powerpc_TARGET_ARCH */
3146 -- -----------------------------------------------------------------------------
3147 -- Generating C calls
3149 -- Now the biggest nightmare---calls. Most of the nastiness is buried in
3150 -- @get_arg@, which moves the arguments to the correct registers/stack
3151 -- locations. Apart from that, the code is easy.
3153 -- (If applicable) Do not fill the delay slots here; you will confuse the
3154 -- register allocator.
3157 :: CmmCallTarget -- function to call
3158 -> HintedCmmFormals -- where to put the result
3159 -> HintedCmmActuals -- arguments (of mixed type)
3162 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3164 #if alpha_TARGET_ARCH
3168 genCCall fn cconv result_regs args
3169 = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args
3170 `thenNat` \ ((unused,_), argCode) ->
3172 nRegs = length allArgRegs - length unused
3173 code = asmSeqThen (map ($ []) argCode)
3176 LDA pv (AddrImm (ImmLab (ptext fn))),
3177 JSR ra (AddrReg pv) nRegs,
3178 LDGP gp (AddrReg ra)]
3180 ------------------------
3181 {- Try to get a value into a specific register (or registers) for
3182 a call. The first 6 arguments go into the appropriate
3183 argument register (separate registers for integer and floating
3184 point arguments, but used in lock-step), and the remaining
3185 arguments are dumped to the stack, beginning at 0(sp). Our
3186 first argument is a pair of the list of remaining argument
3187 registers to be assigned for this call and the next stack
3188 offset to use for overflowing arguments. This way,
3189 @get_Arg@ can be applied to all of a call's arguments using
3193 :: ([(Reg,Reg)], Int) -- Argument registers and stack offset (accumulator)
3194 -> StixTree -- Current argument
3195 -> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code
3197 -- We have to use up all of our argument registers first...
3199 get_arg ((iDst,fDst):dsts, offset) arg
3200 = getRegister arg `thenNat` \ register ->
3202 reg = if isFloatType pk then fDst else iDst
3203 code = registerCode register reg
3204 src = registerName register reg
3205 pk = registerRep register
3208 if isFloatType pk then
3209 ((dsts, offset), if isFixed register then
3210 code . mkSeqInstr (FMOV src fDst)
3213 ((dsts, offset), if isFixed register then
3214 code . mkSeqInstr (OR src (RIReg src) iDst)
3217 -- Once we have run out of argument registers, we move to the
3220 get_arg ([], offset) arg
3221 = getRegister arg `thenNat` \ register ->
3222 getNewRegNat (registerRep register)
3225 code = registerCode register tmp
3226 src = registerName register tmp
3227 pk = registerRep register
3228 sz = primRepToSize pk
3230 return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset)))
3232 #endif /* alpha_TARGET_ARCH */
3234 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3236 #if i386_TARGET_ARCH
3238 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
3239 -- write barrier compiles to no code on x86/x86-64;
3240 -- we keep it this long in order to prevent earlier optimisations.
3242 -- we only cope with a single result for foreign calls
3243 genCCall (CmmPrim op) [CmmHinted r _] args = do
3244 l1 <- getNewLabelNat
3245 l2 <- getNewLabelNat
3247 MO_F32_Sqrt -> actuallyInlineFloatOp GSQRT FF32 args
3248 MO_F64_Sqrt -> actuallyInlineFloatOp GSQRT FF64 args
3250 MO_F32_Sin -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF32 args
3251 MO_F64_Sin -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF64 args
3253 MO_F32_Cos -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF32 args
3254 MO_F64_Cos -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF64 args
3256 MO_F32_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF32 args
3257 MO_F64_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF64 args
3259 other_op -> outOfLineFloatOp op r args
3261 actuallyInlineFloatOp instr size [CmmHinted x _]
3262 = do res <- trivialUFCode size (instr size) x
3264 return (any (getRegisterReg (CmmLocal r)))
3266 genCCall target dest_regs args = do
3268 sizes = map (arg_size . cmmExprType . hintlessCmm) (reverse args)
3269 #if !darwin_TARGET_OS
3270 tot_arg_size = sum sizes
3272 raw_arg_size = sum sizes
3273 tot_arg_size = roundTo 16 raw_arg_size
3274 arg_pad_size = tot_arg_size - raw_arg_size
3275 delta0 <- getDeltaNat
3276 setDeltaNat (delta0 - arg_pad_size)
3279 push_codes <- mapM push_arg (reverse args)
3280 delta <- getDeltaNat
3283 -- deal with static vs dynamic call targets
3284 (callinsns,cconv) <-
3287 CmmCallee (CmmLit (CmmLabel lbl)) conv
3288 -> -- ToDo: stdcall arg sizes
3289 return (unitOL (CALL (Left fn_imm) []), conv)
3290 where fn_imm = ImmCLbl lbl
3292 -> do { (dyn_c, dyn_r) <- get_op expr
3293 ; ASSERT( isWord32 (cmmExprType expr) )
3294 return (dyn_c `snocOL` CALL (Right dyn_r) [], conv) }
3297 #if darwin_TARGET_OS
3299 = toOL [SUB II32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
3300 DELTA (delta0 - arg_pad_size)]
3301 `appOL` concatOL push_codes
3304 = concatOL push_codes
3305 call = callinsns `appOL`
3307 -- Deallocate parameters after call for ccall;
3308 -- but not for stdcall (callee does it)
3309 (if cconv == StdCallConv || tot_arg_size==0 then [] else
3310 [ADD II32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
3312 [DELTA (delta + tot_arg_size)]
3315 setDeltaNat (delta + tot_arg_size)
3318 -- assign the results, if necessary
3319 assign_code [] = nilOL
3320 assign_code [CmmHinted dest _hint]
3321 | isFloatType ty = unitOL (GMOV fake0 r_dest)
3322 | isWord64 ty = toOL [MOV II32 (OpReg eax) (OpReg r_dest),
3323 MOV II32 (OpReg edx) (OpReg r_dest_hi)]
3324 | otherwise = unitOL (MOV (intSize w) (OpReg eax) (OpReg r_dest))
3326 ty = localRegType dest
3328 r_dest_hi = getHiVRegFromLo r_dest
3329 r_dest = getRegisterReg (CmmLocal dest)
3330 assign_code many = panic "genCCall.assign_code many"
3332 return (push_code `appOL`
3334 assign_code dest_regs)
3337 arg_size :: CmmType -> Int -- Width in bytes
3338 arg_size ty = widthInBytes (typeWidth ty)
3340 roundTo a x | x `mod` a == 0 = x
3341 | otherwise = x + a - (x `mod` a)
3344 push_arg :: HintedCmmActual {-current argument-}
3345 -> NatM InstrBlock -- code
3347 push_arg (CmmHinted arg _hint) -- we don't need the hints on x86
3348 | isWord64 arg_ty = do
3349 ChildCode64 code r_lo <- iselExpr64 arg
3350 delta <- getDeltaNat
3351 setDeltaNat (delta - 8)
3353 r_hi = getHiVRegFromLo r_lo
3355 return ( code `appOL`
3356 toOL [PUSH II32 (OpReg r_hi), DELTA (delta - 4),
3357 PUSH II32 (OpReg r_lo), DELTA (delta - 8),
3362 (code, reg) <- get_op arg
3363 delta <- getDeltaNat
3364 let size = arg_size arg_ty -- Byte size
3365 setDeltaNat (delta-size)
3366 if (isFloatType arg_ty)
3367 then return (code `appOL`
3368 toOL [SUB II32 (OpImm (ImmInt size)) (OpReg esp),
3370 GST (floatSize (typeWidth arg_ty))
3371 reg (AddrBaseIndex (EABaseReg esp)
3375 else return (code `snocOL`
3376 PUSH II32 (OpReg reg) `snocOL`
3380 arg_ty = cmmExprType arg
3383 get_op :: CmmExpr -> NatM (InstrBlock, Reg) -- code, reg
3385 (reg,code) <- getSomeReg op
3388 #endif /* i386_TARGET_ARCH */
3390 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3392 outOfLineFloatOp :: CallishMachOp -> CmmFormal -> HintedCmmActuals
3394 outOfLineFloatOp mop res args
3396 dflags <- getDynFlagsNat
3397 targetExpr <- cmmMakeDynamicReference dflags addImportNat CallReference lbl
3398 let target = CmmCallee targetExpr CCallConv
3400 if isFloat64 (localRegType res)
3402 stmtToInstrs (CmmCall target [CmmHinted res NoHint] args CmmUnsafe CmmMayReturn)
3406 tmp = LocalReg uq f64
3408 code1 <- stmtToInstrs (CmmCall target [CmmHinted tmp NoHint] args CmmUnsafe CmmMayReturn)
3409 code2 <- stmtToInstrs (CmmAssign (CmmLocal res) (CmmReg (CmmLocal tmp)))
3410 return (code1 `appOL` code2)
3412 lbl = mkForeignLabel fn Nothing False IsFunction
3415 MO_F32_Sqrt -> fsLit "sqrtf"
3416 MO_F32_Sin -> fsLit "sinf"
3417 MO_F32_Cos -> fsLit "cosf"
3418 MO_F32_Tan -> fsLit "tanf"
3419 MO_F32_Exp -> fsLit "expf"
3420 MO_F32_Log -> fsLit "logf"
3422 MO_F32_Asin -> fsLit "asinf"
3423 MO_F32_Acos -> fsLit "acosf"
3424 MO_F32_Atan -> fsLit "atanf"
3426 MO_F32_Sinh -> fsLit "sinhf"
3427 MO_F32_Cosh -> fsLit "coshf"
3428 MO_F32_Tanh -> fsLit "tanhf"
3429 MO_F32_Pwr -> fsLit "powf"
3431 MO_F64_Sqrt -> fsLit "sqrt"
3432 MO_F64_Sin -> fsLit "sin"
3433 MO_F64_Cos -> fsLit "cos"
3434 MO_F64_Tan -> fsLit "tan"
3435 MO_F64_Exp -> fsLit "exp"
3436 MO_F64_Log -> fsLit "log"
3438 MO_F64_Asin -> fsLit "asin"
3439 MO_F64_Acos -> fsLit "acos"
3440 MO_F64_Atan -> fsLit "atan"
3442 MO_F64_Sinh -> fsLit "sinh"
3443 MO_F64_Cosh -> fsLit "cosh"
3444 MO_F64_Tanh -> fsLit "tanh"
3445 MO_F64_Pwr -> fsLit "pow"
3447 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
3449 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3451 #if x86_64_TARGET_ARCH
3453 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
3454 -- write barrier compiles to no code on x86/x86-64;
3455 -- we keep it this long in order to prevent earlier optimisations.
3458 genCCall (CmmPrim op) [CmmHinted r _] args =
3459 outOfLineFloatOp op r args
3461 genCCall target dest_regs args = do
3463 -- load up the register arguments
3464 (stack_args, aregs, fregs, load_args_code)
3465 <- load_args args allArgRegs allFPArgRegs nilOL
3468 fp_regs_used = reverse (drop (length fregs) (reverse allFPArgRegs))
3469 int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
3470 arg_regs = [eax] ++ int_regs_used ++ fp_regs_used
3471 -- for annotating the call instruction with
3473 sse_regs = length fp_regs_used
3475 tot_arg_size = arg_size * length stack_args
3477 -- On entry to the called function, %rsp should be aligned
3478 -- on a 16-byte boundary +8 (i.e. the first stack arg after
3479 -- the return address is 16-byte aligned). In STG land
3480 -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
3481 -- need to make sure we push a multiple of 16-bytes of args,
3482 -- plus the return address, to get the correct alignment.
3483 -- Urg, this is hard. We need to feed the delta back into
3484 -- the arg pushing code.
3485 (real_size, adjust_rsp) <-
3486 if tot_arg_size `rem` 16 == 0
3487 then return (tot_arg_size, nilOL)
3488 else do -- we need to adjust...
3489 delta <- getDeltaNat
3490 setDeltaNat (delta-8)
3491 return (tot_arg_size+8, toOL [
3492 SUB II64 (OpImm (ImmInt 8)) (OpReg rsp),
3496 -- push the stack args, right to left
3497 push_code <- push_args (reverse stack_args) nilOL
3498 delta <- getDeltaNat
3500 -- deal with static vs dynamic call targets
3501 (callinsns,cconv) <-
3504 CmmCallee (CmmLit (CmmLabel lbl)) conv
3505 -> -- ToDo: stdcall arg sizes
3506 return (unitOL (CALL (Left fn_imm) arg_regs), conv)
3507 where fn_imm = ImmCLbl lbl
3509 -> do (dyn_r, dyn_c) <- getSomeReg expr
3510 return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
3513 -- The x86_64 ABI requires us to set %al to the number of SSE
3514 -- registers that contain arguments, if the called routine
3515 -- is a varargs function. We don't know whether it's a
3516 -- varargs function or not, so we have to assume it is.
3518 -- It's not safe to omit this assignment, even if the number
3519 -- of SSE regs in use is zero. If %al is larger than 8
3520 -- on entry to a varargs function, seg faults ensue.
3521 assign_eax n = unitOL (MOV II32 (OpImm (ImmInt n)) (OpReg eax))
3523 let call = callinsns `appOL`
3525 -- Deallocate parameters after call for ccall;
3526 -- but not for stdcall (callee does it)
3527 (if cconv == StdCallConv || real_size==0 then [] else
3528 [ADD (intSize wordWidth) (OpImm (ImmInt real_size)) (OpReg esp)])
3530 [DELTA (delta + real_size)]
3533 setDeltaNat (delta + real_size)
3536 -- assign the results, if necessary
3537 assign_code [] = nilOL
3538 assign_code [CmmHinted dest _hint] =
3539 case typeWidth rep of
3540 W32 | isFloatType rep -> unitOL (MOV (floatSize W32) (OpReg xmm0) (OpReg r_dest))
3541 W64 | isFloatType rep -> unitOL (MOV (floatSize W32) (OpReg xmm0) (OpReg r_dest))
3542 _ -> unitOL (MOV (cmmTypeSize rep) (OpReg rax) (OpReg r_dest))
3544 rep = localRegType dest
3545 r_dest = getRegisterReg (CmmLocal dest)
3546 assign_code many = panic "genCCall.assign_code many"
3548 return (load_args_code `appOL`
3551 assign_eax sse_regs `appOL`
3553 assign_code dest_regs)
3556 arg_size = 8 -- always, at the mo
3558 load_args :: [CmmHinted CmmExpr]
3559 -> [Reg] -- int regs avail for args
3560 -> [Reg] -- FP regs avail for args
3562 -> NatM ([CmmHinted CmmExpr],[Reg],[Reg],InstrBlock)
3563 load_args args [] [] code = return (args, [], [], code)
3564 -- no more regs to use
3565 load_args [] aregs fregs code = return ([], aregs, fregs, code)
3566 -- no more args to push
3567 load_args ((CmmHinted arg hint) : rest) aregs fregs code
3568 | isFloatType arg_rep =
3572 arg_code <- getAnyReg arg
3573 load_args rest aregs rs (code `appOL` arg_code r)
3578 arg_code <- getAnyReg arg
3579 load_args rest rs fregs (code `appOL` arg_code r)
3581 arg_rep = cmmExprType arg
3584 (args',ars,frs,code') <- load_args rest aregs fregs code
3585 return ((CmmHinted arg hint):args', ars, frs, code')
3587 push_args [] code = return code
3588 push_args ((CmmHinted arg hint):rest) code
3589 | isFloatType arg_rep = do
3590 (arg_reg, arg_code) <- getSomeReg arg
3591 delta <- getDeltaNat
3592 setDeltaNat (delta-arg_size)
3593 let code' = code `appOL` arg_code `appOL` toOL [
3594 SUB (intSize wordWidth) (OpImm (ImmInt arg_size)) (OpReg rsp) ,
3595 DELTA (delta-arg_size),
3596 MOV (floatSize width) (OpReg arg_reg) (OpAddr (spRel 0))]
3597 push_args rest code'
3600 -- we only ever generate word-sized function arguments. Promotion
3601 -- has already happened: our Int8# type is kept sign-extended
3602 -- in an Int#, for example.
3603 ASSERT(width == W64) return ()
3604 (arg_op, arg_code) <- getOperand arg
3605 delta <- getDeltaNat
3606 setDeltaNat (delta-arg_size)
3607 let code' = code `appOL` arg_code `appOL` toOL [
3609 DELTA (delta-arg_size)]
3610 push_args rest code'
3612 arg_rep = cmmExprType arg
3613 width = typeWidth arg_rep
3616 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3618 #if sparc_TARGET_ARCH
3620 The SPARC calling convention is an absolute
3621 nightmare. The first 6x32 bits of arguments are mapped into
3622 %o0 through %o5, and the remaining arguments are dumped to the
3623 stack, beginning at [%sp+92]. (Note that %o6 == %sp.)
3625 If we have to put args on the stack, move %o6==%sp down by
3626 the number of words to go on the stack, to ensure there's enough space.
3628 According to Fraser and Hanson's lcc book, page 478, fig 17.2,
3629 16 words above the stack pointer is a word for the address of
3630 a structure return value. I use this as a temporary location
3631 for moving values from float to int regs. Certainly it isn't
3632 safe to put anything in the 16 words starting at %sp, since
3633 this area can get trashed at any time due to window overflows
3634 caused by signal handlers.
3636 A final complication (if the above isn't enough) is that
3637 we can't blithely calculate the arguments one by one into
3638 %o0 .. %o5. Consider the following nested calls:
3642 Naive code moves a into %o0, and (fff b c) into %o1. Unfortunately
3643 the inner call will itself use %o0, which trashes the value put there
3644 in preparation for the outer call. Upshot: we need to calculate the
3645 args into temporary regs, and move those to arg regs or onto the
3646 stack only immediately prior to the call proper. Sigh.
3649 :: CmmCallTarget -- function to call
3650 -> HintedCmmFormals -- where to put the result
3651 -> HintedCmmActuals -- arguments (of mixed type)
3657 -- On SPARC under TSO (Total Store Ordering), writes earlier in the instruction stream
3658 -- are guaranteed to take place before writes afterwards (unlike on PowerPC).
3659 -- Ref: Section 8.4 of the SPARC V9 Architecture manual.
3661 -- In the SPARC case we don't need a barrier.
3663 genCCall (CmmPrim (MO_WriteBarrier)) _ _
3666 genCCall target dest_regs argsAndHints
3668 -- strip hints from the arg regs
3669 let args :: [CmmExpr]
3670 args = map hintlessCmm argsAndHints
3673 -- work out the arguments, and assign them to integer regs
3674 argcode_and_vregs <- mapM arg_to_int_vregs args
3675 let (argcodes, vregss) = unzip argcode_and_vregs
3676 let vregs = concat vregss
3678 let n_argRegs = length allArgRegs
3679 let n_argRegs_used = min (length vregs) n_argRegs
3682 -- deal with static vs dynamic call targets
3683 callinsns <- case target of
3684 CmmCallee (CmmLit (CmmLabel lbl)) conv ->
3685 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3688 -> do (dyn_c, [dyn_r]) <- arg_to_int_vregs expr
3689 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3692 -> do res <- outOfLineFloatOp mop
3693 lblOrMopExpr <- case res of
3695 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3698 (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
3699 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3703 let argcode = concatOL argcodes
3705 let (move_sp_down, move_sp_up)
3706 = let diff = length vregs - n_argRegs
3707 nn = if odd diff then diff + 1 else diff -- keep 8-byte alignment
3710 else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
3713 = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
3717 move_sp_down `appOL`
3718 transfer_code `appOL`
3722 assign_code dest_regs
3725 -- | Generate code to calculate an argument, and move it into one
3726 -- or two integer vregs.
3727 arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
3728 arg_to_int_vregs arg
3730 -- If the expr produces a 64 bit int, then we can just use iselExpr64
3731 | isWord64 (cmmExprType arg)
3732 = do (ChildCode64 code r_lo) <- iselExpr64 arg
3733 let r_hi = getHiVRegFromLo r_lo
3734 return (code, [r_hi, r_lo])
3737 = do (src, code) <- getSomeReg arg
3738 tmp <- getNewRegNat (cmmTypeSize $ cmmExprType arg)
3739 let pk = cmmExprType arg
3741 case cmmTypeSize pk of
3743 -- Load a 64 bit float return value into two integer regs.
3745 v1 <- getNewRegNat II32
3746 v2 <- getNewRegNat II32
3748 let Just f0_high = fPair f0
3752 FMOV FF64 src f0 `snocOL`
3753 ST FF32 f0 (spRel 16) `snocOL`
3754 LD II32 (spRel 16) v1 `snocOL`
3755 ST FF32 f0_high (spRel 16) `snocOL`
3756 LD II32 (spRel 16) v2
3758 return (code2, [v1,v2])
3760 -- Load a 32 bit float return value into an integer reg
3762 v1 <- getNewRegNat II32
3766 ST FF32 src (spRel 16) `snocOL`
3767 LD II32 (spRel 16) v1
3769 return (code2, [v1])
3771 -- Move an integer return value into its destination reg.
3773 v1 <- getNewRegNat II32
3777 OR False g0 (RIReg src) v1
3779 return (code2, [v1])
3782 -- | Move args from the integer vregs into which they have been
3783 -- marshalled, into %o0 .. %o5, and the rest onto the stack.
3785 move_final :: [Reg] -> [Reg] -> Int -> [Instr]
3788 move_final [] _ offset
3791 -- out of aregs; move to stack
3792 move_final (v:vs) [] offset
3793 = ST II32 v (spRel offset)
3794 : move_final vs [] (offset+1)
3796 -- move into an arg (%o[0..5]) reg
3797 move_final (v:vs) (a:az) offset
3798 = OR False g0 (RIReg v) a
3799 : move_final vs az offset
3802 -- | Assign results returned from the call into their
3805 assign_code :: [CmmHinted LocalReg] -> OrdList Instr
3806 assign_code [] = nilOL
3808 assign_code [CmmHinted dest _hint]
3809 = let rep = localRegType dest
3810 width = typeWidth rep
3811 r_dest = getRegisterReg (CmmLocal dest)
3816 = unitOL $ FMOV FF32 (RealReg $ fReg 0) r_dest
3820 = unitOL $ FMOV FF64 (RealReg $ fReg 0) r_dest
3822 | not $ isFloatType rep
3824 = unitOL $ mkRegRegMoveInstr (RealReg $ oReg 0) r_dest
3826 | not $ isFloatType rep
3828 , r_dest_hi <- getHiVRegFromLo r_dest
3829 = toOL [ mkRegRegMoveInstr (RealReg $ oReg 0) r_dest_hi
3830 , mkRegRegMoveInstr (RealReg $ oReg 1) r_dest]
3834 -- | Generate a call to implement an out-of-line floating point operation
3837 -> NatM (Either CLabel CmmExpr)
3839 outOfLineFloatOp mop
3840 = do let functionName
3841 = outOfLineFloatOp_table mop
3843 dflags <- getDynFlagsNat
3844 mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference
3845 $ mkForeignLabel functionName Nothing True IsFunction
3849 CmmLit (CmmLabel lbl) -> Left lbl
3852 return mopLabelOrExpr
3855 -- | Decide what C function to use to implement a CallishMachOp
3857 outOfLineFloatOp_table
3861 outOfLineFloatOp_table mop
3863 MO_F32_Exp -> fsLit "expf"
3864 MO_F32_Log -> fsLit "logf"
3865 MO_F32_Sqrt -> fsLit "sqrtf"
3866 MO_F32_Pwr -> fsLit "powf"
3868 MO_F32_Sin -> fsLit "sinf"
3869 MO_F32_Cos -> fsLit "cosf"
3870 MO_F32_Tan -> fsLit "tanf"
3872 MO_F32_Asin -> fsLit "asinf"
3873 MO_F32_Acos -> fsLit "acosf"
3874 MO_F32_Atan -> fsLit "atanf"
3876 MO_F32_Sinh -> fsLit "sinhf"
3877 MO_F32_Cosh -> fsLit "coshf"
3878 MO_F32_Tanh -> fsLit "tanhf"
3880 MO_F64_Exp -> fsLit "exp"
3881 MO_F64_Log -> fsLit "log"
3882 MO_F64_Sqrt -> fsLit "sqrt"
3883 MO_F64_Pwr -> fsLit "pow"
3885 MO_F64_Sin -> fsLit "sin"
3886 MO_F64_Cos -> fsLit "cos"
3887 MO_F64_Tan -> fsLit "tan"
3889 MO_F64_Asin -> fsLit "asin"
3890 MO_F64_Acos -> fsLit "acos"
3891 MO_F64_Atan -> fsLit "atan"
3893 MO_F64_Sinh -> fsLit "sinh"
3894 MO_F64_Cosh -> fsLit "cosh"
3895 MO_F64_Tanh -> fsLit "tanh"
3897 other -> pprPanic "outOfLineFloatOp(sparc): Unknown callish mach op "
3898 (pprCallishMachOp mop)
3901 #endif /* sparc_TARGET_ARCH */
3903 #if powerpc_TARGET_ARCH
3905 #if darwin_TARGET_OS || linux_TARGET_OS
3907 The PowerPC calling convention for Darwin/Mac OS X
3908 is described in Apple's document
3909 "Inside Mac OS X - Mach-O Runtime Architecture".
3911 PowerPC Linux uses the System V Release 4 Calling Convention
3912 for PowerPC. It is described in the
3913 "System V Application Binary Interface PowerPC Processor Supplement".
3915 Both conventions are similar:
3916 Parameters may be passed in general-purpose registers starting at r3, in
3917 floating point registers starting at f1, or on the stack.
3919 But there are substantial differences:
3920 * The number of registers used for parameter passing and the exact set of
3921 nonvolatile registers differs (see MachRegs.lhs).
3922 * On Darwin, stack space is always reserved for parameters, even if they are
3923 passed in registers. The called routine may choose to save parameters from
3924 registers to the corresponding space on the stack.
3925 * On Darwin, a corresponding amount of GPRs is skipped when a floating point
3926 parameter is passed in an FPR.
3927 * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
3928 starting with an odd-numbered GPR. It may skip a GPR to achieve this.
3929 Darwin just treats an I64 like two separate II32s (high word first).
3930 * I64 and FF64 arguments are 8-byte aligned on the stack for SysV, but only
3931 4-byte aligned like everything else on Darwin.
3932 * The SysV spec claims that FF32 is represented as FF64 on the stack. GCC on
3933 PowerPC Linux does not agree, so neither do we.
3935 According to both conventions, The parameter area should be part of the
3936 caller's stack frame, allocated in the caller's prologue code (large enough
3937 to hold the parameter lists for all called routines). The NCG already
3938 uses the stack for register spilling, leaving 64 bytes free at the top.
3939 If we need a larger parameter area than that, we just allocate a new stack
3940 frame just before ccalling.
3944 genCCall (CmmPrim MO_WriteBarrier) _ _
3945 = return $ unitOL LWSYNC
3947 genCCall target dest_regs argsAndHints
3948 = ASSERT (not $ any (`elem` [II8,II16]) $ map cmmTypeSize argReps)
3949 -- we rely on argument promotion in the codeGen
3951 (finalStack,passArgumentsCode,usedRegs) <- passArguments
3953 allArgRegs allFPArgRegs
3957 (labelOrExpr, reduceToFF32) <- case target of
3958 CmmCallee (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
3959 CmmCallee expr conv -> return (Right expr, False)
3960 CmmPrim mop -> outOfLineFloatOp mop
3962 let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
3963 codeAfter = move_sp_up finalStack `appOL` moveResult reduceToFF32
3968 `snocOL` BL lbl usedRegs
3971 (dynReg, dynCode) <- getSomeReg dyn
3973 `snocOL` MTCTR dynReg
3975 `snocOL` BCTRL usedRegs
3978 #if darwin_TARGET_OS
3979 initialStackOffset = 24
3980 -- size of linkage area + size of arguments, in bytes
3981 stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
3982 map (widthInBytes . typeWidth) argReps
3983 #elif linux_TARGET_OS
3984 initialStackOffset = 8
3985 stackDelta finalStack = roundTo 16 finalStack
3987 args = map hintlessCmm argsAndHints
3988 argReps = map cmmExprType args
3990 roundTo a x | x `mod` a == 0 = x
3991 | otherwise = x + a - (x `mod` a)
3993 move_sp_down finalStack
3995 toOL [STU II32 sp (AddrRegImm sp (ImmInt (-delta))),
3998 where delta = stackDelta finalStack
3999 move_sp_up finalStack
4001 toOL [ADD sp sp (RIImm (ImmInt delta)),
4004 where delta = stackDelta finalStack
4007 passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
4008 passArguments ((arg,arg_ty):args) gprs fprs stackOffset
4009 accumCode accumUsed | isWord64 arg_ty =
4011 ChildCode64 code vr_lo <- iselExpr64 arg
4012 let vr_hi = getHiVRegFromLo vr_lo
4014 #if darwin_TARGET_OS
4019 (accumCode `appOL` code
4020 `snocOL` storeWord vr_hi gprs stackOffset
4021 `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
4022 ((take 2 gprs) ++ accumUsed)
4024 storeWord vr (gpr:_) offset = MR gpr vr
4025 storeWord vr [] offset = ST II32 vr (AddrRegImm sp (ImmInt offset))
4027 #elif linux_TARGET_OS
4028 let stackOffset' = roundTo 8 stackOffset
4029 stackCode = accumCode `appOL` code
4030 `snocOL` ST II32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
4031 `snocOL` ST II32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
4032 regCode hireg loreg =
4033 accumCode `appOL` code
4034 `snocOL` MR hireg vr_hi
4035 `snocOL` MR loreg vr_lo
4038 hireg : loreg : regs | even (length gprs) ->
4039 passArguments args regs fprs stackOffset
4040 (regCode hireg loreg) (hireg : loreg : accumUsed)
4041 _skipped : hireg : loreg : regs ->
4042 passArguments args regs fprs stackOffset
4043 (regCode hireg loreg) (hireg : loreg : accumUsed)
4044 _ -> -- only one or no regs left
4045 passArguments args [] fprs (stackOffset'+8)
4049 passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
4050 | reg : _ <- regs = do
4051 register <- getRegister arg
4052 let code = case register of
4053 Fixed _ freg fcode -> fcode `snocOL` MR reg freg
4054 Any _ acode -> acode reg
4058 #if darwin_TARGET_OS
4059 -- The Darwin ABI requires that we reserve stack slots for register parameters
4060 (stackOffset + stackBytes)
4061 #elif linux_TARGET_OS
4062 -- ... the SysV ABI doesn't.
4065 (accumCode `appOL` code)
4068 (vr, code) <- getSomeReg arg
4072 (stackOffset' + stackBytes)
4073 (accumCode `appOL` code `snocOL` ST (cmmTypeSize rep) vr stackSlot)
4076 #if darwin_TARGET_OS
4077 -- stackOffset is at least 4-byte aligned
4078 -- The Darwin ABI is happy with that.
4079 stackOffset' = stackOffset
4081 -- ... the SysV ABI requires 8-byte alignment for doubles.
4082 stackOffset' | isFloatType rep && typeWidth rep == W64 =
4083 roundTo 8 stackOffset
4084 | otherwise = stackOffset
4086 stackSlot = AddrRegImm sp (ImmInt stackOffset')
4087 (nGprs, nFprs, stackBytes, regs) = case cmmTypeSize rep of
4088 II32 -> (1, 0, 4, gprs)
4089 #if darwin_TARGET_OS
4090 -- The Darwin ABI requires that we skip a corresponding number of GPRs when
4092 FF32 -> (1, 1, 4, fprs)
4093 FF64 -> (2, 1, 8, fprs)
4094 #elif linux_TARGET_OS
4095 -- ... the SysV ABI doesn't.
4096 FF32 -> (0, 1, 4, fprs)
4097 FF64 -> (0, 1, 8, fprs)
4100 moveResult reduceToFF32 =
4103 [CmmHinted dest _hint]
4104 | reduceToFF32 && isFloat32 rep -> unitOL (FRSP r_dest f1)
4105 | isFloat32 rep || isFloat64 rep -> unitOL (MR r_dest f1)
4106 | isWord64 rep -> toOL [MR (getHiVRegFromLo r_dest) r3,
4108 | otherwise -> unitOL (MR r_dest r3)
4109 where rep = cmmRegType (CmmLocal dest)
4110 r_dest = getRegisterReg (CmmLocal dest)
4112 outOfLineFloatOp mop =
4114 dflags <- getDynFlagsNat
4115 mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference $
4116 mkForeignLabel functionName Nothing True IsFunction
4117 let mopLabelOrExpr = case mopExpr of
4118 CmmLit (CmmLabel lbl) -> Left lbl
4120 return (mopLabelOrExpr, reduce)
4122 (functionName, reduce) = case mop of
4123 MO_F32_Exp -> (fsLit "exp", True)
4124 MO_F32_Log -> (fsLit "log", True)
4125 MO_F32_Sqrt -> (fsLit "sqrt", True)
4127 MO_F32_Sin -> (fsLit "sin", True)
4128 MO_F32_Cos -> (fsLit "cos", True)
4129 MO_F32_Tan -> (fsLit "tan", True)
4131 MO_F32_Asin -> (fsLit "asin", True)
4132 MO_F32_Acos -> (fsLit "acos", True)
4133 MO_F32_Atan -> (fsLit "atan", True)
4135 MO_F32_Sinh -> (fsLit "sinh", True)
4136 MO_F32_Cosh -> (fsLit "cosh", True)
4137 MO_F32_Tanh -> (fsLit "tanh", True)
4138 MO_F32_Pwr -> (fsLit "pow", True)
4140 MO_F64_Exp -> (fsLit "exp", False)
4141 MO_F64_Log -> (fsLit "log", False)
4142 MO_F64_Sqrt -> (fsLit "sqrt", False)
4144 MO_F64_Sin -> (fsLit "sin", False)
4145 MO_F64_Cos -> (fsLit "cos", False)
4146 MO_F64_Tan -> (fsLit "tan", False)
4148 MO_F64_Asin -> (fsLit "asin", False)
4149 MO_F64_Acos -> (fsLit "acos", False)
4150 MO_F64_Atan -> (fsLit "atan", False)
4152 MO_F64_Sinh -> (fsLit "sinh", False)
4153 MO_F64_Cosh -> (fsLit "cosh", False)
4154 MO_F64_Tanh -> (fsLit "tanh", False)
4155 MO_F64_Pwr -> (fsLit "pow", False)
4156 other -> pprPanic "genCCall(ppc): unknown callish op"
4157 (pprCallishMachOp other)
4159 #endif /* darwin_TARGET_OS || linux_TARGET_OS */
4161 #endif /* powerpc_TARGET_ARCH */
4164 -- -----------------------------------------------------------------------------
4165 -- Generating a table-branch
4167 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
4169 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4173 (reg,e_code) <- getSomeReg expr
4174 lbl <- getNewLabelNat
4175 dflags <- getDynFlagsNat
4176 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
4177 (tableReg,t_code) <- getSomeReg $ dynRef
4179 jumpTable = map jumpTableEntryRel ids
4181 jumpTableEntryRel Nothing
4182 = CmmStaticLit (CmmInt 0 wordWidth)
4183 jumpTableEntryRel (Just (BlockId id))
4184 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
4185 where blockLabel = mkAsmTempLabel id
4187 op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
4188 (EAIndex reg wORD_SIZE) (ImmInt 0))
4190 #if x86_64_TARGET_ARCH
4191 #if darwin_TARGET_OS
4192 -- on Mac OS X/x86_64, put the jump table in the text section
4193 -- to work around a limitation of the linker.
4194 -- ld64 is unable to handle the relocations for
4196 -- if L0 is not preceded by a non-anonymous label in its section.
4198 code = e_code `appOL` t_code `appOL` toOL [
4199 ADD (intSize wordWidth) op (OpReg tableReg),
4200 JMP_TBL (OpReg tableReg) [ id | Just id <- ids ],
4201 LDATA Text (CmmDataLabel lbl : jumpTable)
4204 -- HACK: On x86_64 binutils<2.17 is only able to generate PC32
4205 -- relocations, hence we only get 32-bit offsets in the jump
4206 -- table. As these offsets are always negative we need to properly
4207 -- sign extend them to 64-bit. This hack should be removed in
4208 -- conjunction with the hack in PprMach.hs/pprDataItem once
4209 -- binutils 2.17 is standard.
4210 code = e_code `appOL` t_code `appOL` toOL [
4211 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
4213 (OpAddr (AddrBaseIndex (EABaseReg tableReg)
4214 (EAIndex reg wORD_SIZE) (ImmInt 0)))
4216 ADD (intSize wordWidth) (OpReg reg) (OpReg tableReg),
4217 JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
4221 code = e_code `appOL` t_code `appOL` toOL [
4222 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
4223 ADD (intSize wordWidth) op (OpReg tableReg),
4224 JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
4230 (reg,e_code) <- getSomeReg expr
4231 lbl <- getNewLabelNat
4233 jumpTable = map jumpTableEntry ids
4234 op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
4235 code = e_code `appOL` toOL [
4236 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
4237 JMP_TBL op [ id | Just id <- ids ]
4241 #elif powerpc_TARGET_ARCH
4245 (reg,e_code) <- getSomeReg expr
4246 tmp <- getNewRegNat II32
4247 lbl <- getNewLabelNat
4248 dflags <- getDynFlagsNat
4249 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
4250 (tableReg,t_code) <- getSomeReg $ dynRef
4252 jumpTable = map jumpTableEntryRel ids
4254 jumpTableEntryRel Nothing
4255 = CmmStaticLit (CmmInt 0 wordWidth)
4256 jumpTableEntryRel (Just (BlockId id))
4257 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
4258 where blockLabel = mkAsmTempLabel id
4260 code = e_code `appOL` t_code `appOL` toOL [
4261 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
4262 SLW tmp reg (RIImm (ImmInt 2)),
4263 LD II32 tmp (AddrRegReg tableReg tmp),
4264 ADD tmp tmp (RIReg tableReg),
4266 BCTR [ id | Just id <- ids ]
4271 (reg,e_code) <- getSomeReg expr
4272 tmp <- getNewRegNat II32
4273 lbl <- getNewLabelNat
4275 jumpTable = map jumpTableEntry ids
4277 code = e_code `appOL` toOL [
4278 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
4279 SLW tmp reg (RIImm (ImmInt 2)),
4280 ADDIS tmp tmp (HA (ImmCLbl lbl)),
4281 LD II32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
4283 BCTR [ id | Just id <- ids ]
4286 #elif sparc_TARGET_ARCH
4289 = error "MachCodeGen: sparc genSwitch PIC not finished\n"
4292 = do (e_reg, e_code) <- getSomeReg expr
4294 base_reg <- getNewRegNat II32
4295 offset_reg <- getNewRegNat II32
4296 dst <- getNewRegNat II32
4298 label <- getNewLabelNat
4299 let jumpTable = map jumpTableEntry ids
4301 return $ e_code `appOL`
4304 [ LDATA ReadOnlyData (CmmDataLabel label : jumpTable)
4306 -- load base of jump table
4307 , SETHI (HI (ImmCLbl label)) base_reg
4308 , OR False base_reg (RIImm $ LO $ ImmCLbl label) base_reg
4310 -- the addrs in the table are 32 bits wide..
4311 , SLL e_reg (RIImm $ ImmInt 2) offset_reg
4313 -- load and jump to the destination
4314 , LD II32 (AddrRegReg base_reg offset_reg) dst
4315 , JMP_TBL (AddrRegImm dst (ImmInt 0)) [i | Just i <- ids]
4319 #error "ToDo: genSwitch"
4323 -- | Convert a BlockId to some CmmStatic data
4324 jumpTableEntry :: Maybe BlockId -> CmmStatic
4325 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordWidth)
4326 jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
4327 where blockLabel = mkAsmTempLabel id
4329 -- -----------------------------------------------------------------------------
4331 -- -----------------------------------------------------------------------------
4334 -- -----------------------------------------------------------------------------
4335 -- 'condIntReg' and 'condFltReg': condition codes into registers
4337 -- Turn those condition codes into integers now (when they appear on
4338 -- the right hand side of an assignment).
4340 -- (If applicable) Do not fill the delay slots here; you will confuse the
4341 -- register allocator.
4343 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
4345 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4347 #if alpha_TARGET_ARCH
4348 condIntReg = panic "MachCode.condIntReg (not on Alpha)"
4349 condFltReg = panic "MachCode.condFltReg (not on Alpha)"
4350 #endif /* alpha_TARGET_ARCH */
4352 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4354 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4356 condIntReg cond x y = do
4357 CondCode _ cond cond_code <- condIntCode cond x y
4358 tmp <- getNewRegNat II8
4360 code dst = cond_code `appOL` toOL [
4361 SETCC cond (OpReg tmp),
4362 MOVZxL II8 (OpReg tmp) (OpReg dst)
4365 return (Any II32 code)
4369 #if i386_TARGET_ARCH
4371 condFltReg cond x y = do
4372 CondCode _ cond cond_code <- condFltCode cond x y
4373 tmp <- getNewRegNat II8
4375 code dst = cond_code `appOL` toOL [
4376 SETCC cond (OpReg tmp),
4377 MOVZxL II8 (OpReg tmp) (OpReg dst)
4380 return (Any II32 code)
4384 #if x86_64_TARGET_ARCH
4386 condFltReg cond x y = do
4387 CondCode _ cond cond_code <- condFltCode cond x y
4388 tmp1 <- getNewRegNat wordSize
4389 tmp2 <- getNewRegNat wordSize
4391 -- We have to worry about unordered operands (eg. comparisons
4392 -- against NaN). If the operands are unordered, the comparison
4393 -- sets the parity flag, carry flag and zero flag.
4394 -- All comparisons are supposed to return false for unordered
4395 -- operands except for !=, which returns true.
4397 -- Optimisation: we don't have to test the parity flag if we
4398 -- know the test has already excluded the unordered case: eg >
4399 -- and >= test for a zero carry flag, which can only occur for
4400 -- ordered operands.
4402 -- ToDo: by reversing comparisons we could avoid testing the
4403 -- parity flag in more cases.
4408 NE -> or_unordered dst
4409 GU -> plain_test dst
4410 GEU -> plain_test dst
4411 _ -> and_ordered dst)
4413 plain_test dst = toOL [
4414 SETCC cond (OpReg tmp1),
4415 MOVZxL II8 (OpReg tmp1) (OpReg dst)
4417 or_unordered dst = toOL [
4418 SETCC cond (OpReg tmp1),
4419 SETCC PARITY (OpReg tmp2),
4420 OR II8 (OpReg tmp1) (OpReg tmp2),
4421 MOVZxL II8 (OpReg tmp2) (OpReg dst)
4423 and_ordered dst = toOL [
4424 SETCC cond (OpReg tmp1),
4425 SETCC NOTPARITY (OpReg tmp2),
4426 AND II8 (OpReg tmp1) (OpReg tmp2),
4427 MOVZxL II8 (OpReg tmp2) (OpReg dst)
4430 return (Any II32 code)
4434 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4436 #if sparc_TARGET_ARCH
4438 condIntReg EQQ x (CmmLit (CmmInt 0 d)) = do
4439 (src, code) <- getSomeReg x
4440 tmp <- getNewRegNat II32
4442 code__2 dst = code `appOL` toOL [
4443 SUB False True g0 (RIReg src) g0,
4444 SUB True False g0 (RIImm (ImmInt (-1))) dst]
4445 return (Any II32 code__2)
4447 condIntReg EQQ x y = do
4448 (src1, code1) <- getSomeReg x
4449 (src2, code2) <- getSomeReg y
4450 tmp1 <- getNewRegNat II32
4451 tmp2 <- getNewRegNat II32
4453 code__2 dst = code1 `appOL` code2 `appOL` toOL [
4454 XOR False src1 (RIReg src2) dst,
4455 SUB False True g0 (RIReg dst) g0,
4456 SUB True False g0 (RIImm (ImmInt (-1))) dst]
4457 return (Any II32 code__2)
4459 condIntReg NE x (CmmLit (CmmInt 0 d)) = do
4460 (src, code) <- getSomeReg x
4461 tmp <- getNewRegNat II32
4463 code__2 dst = code `appOL` toOL [
4464 SUB False True g0 (RIReg src) g0,
4465 ADD True False g0 (RIImm (ImmInt 0)) dst]
4466 return (Any II32 code__2)
4468 condIntReg NE x y = do
4469 (src1, code1) <- getSomeReg x
4470 (src2, code2) <- getSomeReg y
4471 tmp1 <- getNewRegNat II32
4472 tmp2 <- getNewRegNat II32
4474 code__2 dst = code1 `appOL` code2 `appOL` toOL [
4475 XOR False src1 (RIReg src2) dst,
4476 SUB False True g0 (RIReg dst) g0,
4477 ADD True False g0 (RIImm (ImmInt 0)) dst]
4478 return (Any II32 code__2)
4480 condIntReg cond x y = do
4481 bid1@(BlockId lbl1) <- getBlockIdNat
4482 bid2@(BlockId lbl2) <- getBlockIdNat
4483 CondCode _ cond cond_code <- condIntCode cond x y
4485 code__2 dst = cond_code `appOL` toOL [
4486 BI cond False bid1, NOP,
4487 OR False g0 (RIImm (ImmInt 0)) dst,
4488 BI ALWAYS False bid2, NOP,
4490 OR False g0 (RIImm (ImmInt 1)) dst,
4492 return (Any II32 code__2)
4494 condFltReg cond x y = do
4495 bid1@(BlockId lbl1) <- getBlockIdNat
4496 bid2@(BlockId lbl2) <- getBlockIdNat
4497 CondCode _ cond cond_code <- condFltCode cond x y
4499 code__2 dst = cond_code `appOL` toOL [
4501 BF cond False bid1, NOP,
4502 OR False g0 (RIImm (ImmInt 0)) dst,
4503 BI ALWAYS False bid2, NOP,
4505 OR False g0 (RIImm (ImmInt 1)) dst,
4507 return (Any II32 code__2)
4509 #endif /* sparc_TARGET_ARCH */
4511 #if powerpc_TARGET_ARCH
4512 condReg getCond = do
4513 lbl1 <- getBlockIdNat
4514 lbl2 <- getBlockIdNat
4515 CondCode _ cond cond_code <- getCond
4517 {- code dst = cond_code `appOL` toOL [
4526 code dst = cond_code
4530 RLWINM dst dst (bit + 1) 31 31
4533 negate_code | do_negate = unitOL (CRNOR bit bit bit)
4536 (bit, do_negate) = case cond of
4550 return (Any II32 code)
4552 condIntReg cond x y = condReg (condIntCode cond x y)
4553 condFltReg cond x y = condReg (condFltCode cond x y)
4554 #endif /* powerpc_TARGET_ARCH */
4557 -- -----------------------------------------------------------------------------
4558 -- 'trivial*Code': deal with trivial instructions
4560 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
4561 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
4562 -- Only look for constants on the right hand side, because that's
4563 -- where the generic optimizer will have put them.
4565 -- Similarly, for unary instructions, we don't have to worry about
4566 -- matching an StInt as the argument, because genericOpt will already
4567 -- have handled the constant-folding.
4570 :: Width -- Int only
4571 -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
4572 ,IF_ARCH_i386 ((Operand -> Operand -> Instr)
4573 -> Maybe (Operand -> Operand -> Instr)
4574 ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr)
4575 -> Maybe (Operand -> Operand -> Instr)
4576 ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
4577 ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
4579 -> CmmExpr -> CmmExpr -- the two arguments
4582 #ifndef powerpc_TARGET_ARCH
4584 :: Width -- Floating point only
4585 -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
4586 ,IF_ARCH_sparc((Size -> Reg -> Reg -> Reg -> Instr)
4587 ,IF_ARCH_i386 ((Size -> Reg -> Reg -> Reg -> Instr)
4588 ,IF_ARCH_x86_64 ((Size -> Operand -> Operand -> Instr)
4590 -> CmmExpr -> CmmExpr -- the two arguments
4596 -> IF_ARCH_alpha((RI -> Reg -> Instr)
4597 ,IF_ARCH_i386 ((Operand -> Instr)
4598 ,IF_ARCH_x86_64 ((Operand -> Instr)
4599 ,IF_ARCH_sparc((RI -> Reg -> Instr)
4600 ,IF_ARCH_powerpc((Reg -> Reg -> Instr)
4602 -> CmmExpr -- the one argument
4605 #ifndef powerpc_TARGET_ARCH
4608 -> IF_ARCH_alpha((Reg -> Reg -> Instr)
4609 ,IF_ARCH_i386 ((Reg -> Reg -> Instr)
4610 ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr)
4611 ,IF_ARCH_sparc((Reg -> Reg -> Instr)
4613 -> CmmExpr -- the one argument
4617 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4619 #if alpha_TARGET_ARCH
4621 trivialCode instr x (StInt y)
4623 = getRegister x `thenNat` \ register ->
4624 getNewRegNat IntRep `thenNat` \ tmp ->
4626 code = registerCode register tmp
4627 src1 = registerName register tmp
4628 src2 = ImmInt (fromInteger y)
4629 code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst)
4631 return (Any IntRep code__2)
4633 trivialCode instr x y
4634 = getRegister x `thenNat` \ register1 ->
4635 getRegister y `thenNat` \ register2 ->
4636 getNewRegNat IntRep `thenNat` \ tmp1 ->
4637 getNewRegNat IntRep `thenNat` \ tmp2 ->
4639 code1 = registerCode register1 tmp1 []
4640 src1 = registerName register1 tmp1
4641 code2 = registerCode register2 tmp2 []
4642 src2 = registerName register2 tmp2
4643 code__2 dst = asmSeqThen [code1, code2] .
4644 mkSeqInstr (instr src1 (RIReg src2) dst)
4646 return (Any IntRep code__2)
4649 trivialUCode instr x
4650 = getRegister x `thenNat` \ register ->
4651 getNewRegNat IntRep `thenNat` \ tmp ->
4653 code = registerCode register tmp
4654 src = registerName register tmp
4655 code__2 dst = code . mkSeqInstr (instr (RIReg src) dst)
4657 return (Any IntRep code__2)
4660 trivialFCode _ instr x y
4661 = getRegister x `thenNat` \ register1 ->
4662 getRegister y `thenNat` \ register2 ->
4663 getNewRegNat FF64 `thenNat` \ tmp1 ->
4664 getNewRegNat FF64 `thenNat` \ tmp2 ->
4666 code1 = registerCode register1 tmp1
4667 src1 = registerName register1 tmp1
4669 code2 = registerCode register2 tmp2
4670 src2 = registerName register2 tmp2
4672 code__2 dst = asmSeqThen [code1 [], code2 []] .
4673 mkSeqInstr (instr src1 src2 dst)
4675 return (Any FF64 code__2)
4677 trivialUFCode _ instr x
4678 = getRegister x `thenNat` \ register ->
4679 getNewRegNat FF64 `thenNat` \ tmp ->
4681 code = registerCode register tmp
4682 src = registerName register tmp
4683 code__2 dst = code . mkSeqInstr (instr src dst)
4685 return (Any FF64 code__2)
4687 #endif /* alpha_TARGET_ARCH */
4689 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4691 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4694 The Rules of the Game are:
4696 * You cannot assume anything about the destination register dst;
4697 it may be anything, including a fixed reg.
4699 * You may compute an operand into a fixed reg, but you may not
4700 subsequently change the contents of that fixed reg. If you
4701 want to do so, first copy the value either to a temporary
4702 or into dst. You are free to modify dst even if it happens
4703 to be a fixed reg -- that's not your problem.
4705 * You cannot assume that a fixed reg will stay live over an
4706 arbitrary computation. The same applies to the dst reg.
4708 * Temporary regs obtained from getNewRegNat are distinct from
4709 each other and from all other regs, and stay live over
4710 arbitrary computations.
4712 --------------------
4714 SDM's version of The Rules:
4716 * If getRegister returns Any, that means it can generate correct
4717 code which places the result in any register, period. Even if that
4718 register happens to be read during the computation.
4720 Corollary #1: this means that if you are generating code for an
4721 operation with two arbitrary operands, you cannot assign the result
4722 of the first operand into the destination register before computing
4723 the second operand. The second operand might require the old value
4724 of the destination register.
4726 Corollary #2: A function might be able to generate more efficient
4727 code if it knows the destination register is a new temporary (and
4728 therefore not read by any of the sub-computations).
4730 * If getRegister returns Any, then the code it generates may modify only:
4731 (a) fresh temporaries
4732 (b) the destination register
4733 (c) known registers (eg. %ecx is used by shifts)
4734 In particular, it may *not* modify global registers, unless the global
4735 register happens to be the destination register.
4738 trivialCode width instr (Just revinstr) (CmmLit lit_a) b
4739 | is32BitLit lit_a = do
4740 b_code <- getAnyReg b
4743 = b_code dst `snocOL`
4744 revinstr (OpImm (litToImm lit_a)) (OpReg dst)
4746 return (Any (intSize width) code)
4748 trivialCode width instr maybe_revinstr a b
4749 = genTrivialCode (intSize width) instr a b
4751 -- This is re-used for floating pt instructions too.
4752 genTrivialCode rep instr a b = do
4753 (b_op, b_code) <- getNonClobberedOperand b
4754 a_code <- getAnyReg a
4755 tmp <- getNewRegNat rep
4757 -- We want the value of b to stay alive across the computation of a.
4758 -- But, we want to calculate a straight into the destination register,
4759 -- because the instruction only has two operands (dst := dst `op` src).
4760 -- The troublesome case is when the result of b is in the same register
4761 -- as the destination reg. In this case, we have to save b in a
4762 -- new temporary across the computation of a.
4764 | dst `regClashesWithOp` b_op =
4766 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
4768 instr (OpReg tmp) (OpReg dst)
4772 instr b_op (OpReg dst)
4774 return (Any rep code)
4776 reg `regClashesWithOp` OpReg reg2 = reg == reg2
4777 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
4778 reg `regClashesWithOp` _ = False
4782 trivialUCode rep instr x = do
4783 x_code <- getAnyReg x
4788 return (Any rep code)
4792 #if i386_TARGET_ARCH
4794 trivialFCode width instr x y = do
4795 (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
4796 (y_reg, y_code) <- getSomeReg y
4798 size = floatSize width
4802 instr size x_reg y_reg dst
4803 return (Any size code)
4807 #if x86_64_TARGET_ARCH
4808 trivialFCode pk instr x y
4809 = genTrivialCode size (instr size) x y
4810 where size = floatSize pk
4815 trivialUFCode size instr x = do
4816 (x_reg, x_code) <- getSomeReg x
4822 return (Any size code)
4824 #endif /* i386_TARGET_ARCH */
4826 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4828 #if sparc_TARGET_ARCH
4830 trivialCode pk instr x (CmmLit (CmmInt y d))
4833 (src1, code) <- getSomeReg x
4834 tmp <- getNewRegNat II32
4836 src2 = ImmInt (fromInteger y)
4837 code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
4838 return (Any II32 code__2)
4840 trivialCode pk instr x y = do
4841 (src1, code1) <- getSomeReg x
4842 (src2, code2) <- getSomeReg y
4843 tmp1 <- getNewRegNat II32
4844 tmp2 <- getNewRegNat II32
4846 code__2 dst = code1 `appOL` code2 `snocOL`
4847 instr src1 (RIReg src2) dst
4848 return (Any II32 code__2)
4851 trivialFCode pk instr x y = do
4852 (src1, code1) <- getSomeReg x
4853 (src2, code2) <- getSomeReg y
4854 tmp1 <- getNewRegNat (cmmTypeSize $ cmmExprType x)
4855 tmp2 <- getNewRegNat (cmmTypeSize $ cmmExprType y)
4856 tmp <- getNewRegNat FF64
4858 promote x = FxTOy FF32 FF64 x tmp
4864 if pk1 `cmmEqType` pk2 then
4865 code1 `appOL` code2 `snocOL`
4866 instr (floatSize pk) src1 src2 dst
4867 else if typeWidth pk1 == W32 then
4868 code1 `snocOL` promote src1 `appOL` code2 `snocOL`
4869 instr FF64 tmp src2 dst
4871 code1 `appOL` code2 `snocOL` promote src2 `snocOL`
4872 instr FF64 src1 tmp dst
4873 return (Any (cmmTypeSize $ if pk1 `cmmEqType` pk2 then pk1 else cmmFloat W64)
4877 trivialUCode size instr x = do
4878 (src, code) <- getSomeReg x
4879 tmp <- getNewRegNat size
4881 code__2 dst = code `snocOL` instr (RIReg src) dst
4882 return (Any size code__2)
4885 trivialUFCode pk instr x = do
4886 (src, code) <- getSomeReg x
4887 tmp <- getNewRegNat pk
4889 code__2 dst = code `snocOL` instr src dst
4890 return (Any pk code__2)
4892 #endif /* sparc_TARGET_ARCH */
4894 #if powerpc_TARGET_ARCH
4897 Wolfgang's PowerPC version of The Rules:
4899 A slightly modified version of The Rules to take advantage of the fact
4900 that PowerPC instructions work on all registers and don't implicitly
4901 clobber any fixed registers.
4903 * The only expression for which getRegister returns Fixed is (CmmReg reg).
4905 * If getRegister returns Any, then the code it generates may modify only:
4906 (a) fresh temporaries
4907 (b) the destination register
4908 It may *not* modify global registers, unless the global
4909 register happens to be the destination register.
4910 It may not clobber any other registers. In fact, only ccalls clobber any
4912 Also, it may not modify the counter register (used by genCCall).
4914 Corollary: If a getRegister for a subexpression returns Fixed, you need
4915 not move it to a fresh temporary before evaluating the next subexpression.
4916 The Fixed register won't be modified.
4917 Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
4919 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
4920 the value of the destination register.
4923 trivialCode rep signed instr x (CmmLit (CmmInt y _))
4924 | Just imm <- makeImmediate rep signed y
4926 (src1, code1) <- getSomeReg x
4927 let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
4928 return (Any (intSize rep) code)
4930 trivialCode rep signed instr x y = do
4931 (src1, code1) <- getSomeReg x
4932 (src2, code2) <- getSomeReg y
4933 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
4934 return (Any (intSize rep) code)
4936 trivialCodeNoImm' :: Size -> (Reg -> Reg -> Reg -> Instr)
4937 -> CmmExpr -> CmmExpr -> NatM Register
4938 trivialCodeNoImm' size instr x y = do
4939 (src1, code1) <- getSomeReg x
4940 (src2, code2) <- getSomeReg y
4941 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
4942 return (Any size code)
4944 trivialCodeNoImm :: Size -> (Size -> Reg -> Reg -> Reg -> Instr)
4945 -> CmmExpr -> CmmExpr -> NatM Register
4946 trivialCodeNoImm size instr x y = trivialCodeNoImm' size (instr size) x y
4948 trivialUCode rep instr x = do
4949 (src, code) <- getSomeReg x
4950 let code' dst = code `snocOL` instr dst src
4951 return (Any rep code')
4953 -- There is no "remainder" instruction on the PPC, so we have to do
4955 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
4957 remainderCode :: Width -> (Reg -> Reg -> Reg -> Instr)
4958 -> CmmExpr -> CmmExpr -> NatM Register
4959 remainderCode rep div x y = do
4960 (src1, code1) <- getSomeReg x
4961 (src2, code2) <- getSomeReg y
4962 let code dst = code1 `appOL` code2 `appOL` toOL [
4964 MULLW dst dst (RIReg src2),
4967 return (Any (intSize rep) code)
4969 #endif /* powerpc_TARGET_ARCH */
4972 -- -----------------------------------------------------------------------------
4973 -- Coercing to/from integer/floating-point...
4975 -- When going to integer, we truncate (round towards 0).
4977 -- @coerce(Int2FP|FP2Int)@ are more complicated integer/float
4978 -- conversions. We have to store temporaries in memory to move
4979 -- between the integer and the floating point register sets.
4981 -- @coerceDbl2Flt@ and @coerceFlt2Dbl@ are done this way because we
4982 -- pretend, on sparc at least, that double and float regs are seperate
4983 -- kinds, so the value has to be computed into one kind before being
4984 -- explicitly "converted" to live in the other kind.
4986 coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
4987 coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
4989 #if sparc_TARGET_ARCH
4990 coerceDbl2Flt :: CmmExpr -> NatM Register
4991 coerceFlt2Dbl :: CmmExpr -> NatM Register
4994 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4996 #if alpha_TARGET_ARCH
4999 = getRegister x `thenNat` \ register ->
5000 getNewRegNat IntRep `thenNat` \ reg ->
5002 code = registerCode register reg
5003 src = registerName register reg
5005 code__2 dst = code . mkSeqInstrs [
5007 LD TF dst (spRel 0),
5010 return (Any FF64 code__2)
5014 = getRegister x `thenNat` \ register ->
5015 getNewRegNat FF64 `thenNat` \ tmp ->
5017 code = registerCode register tmp
5018 src = registerName register tmp
5020 code__2 dst = code . mkSeqInstrs [
5022 ST TF tmp (spRel 0),
5025 return (Any IntRep code__2)
5027 #endif /* alpha_TARGET_ARCH */
5029 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
5031 #if i386_TARGET_ARCH
5033 coerceInt2FP from to x = do
5034 (x_reg, x_code) <- getSomeReg x
5036 opc = case to of W32 -> GITOF; W64 -> GITOD
5037 code dst = x_code `snocOL` opc x_reg dst
5038 -- ToDo: works for non-II32 reps?
5039 return (Any (floatSize to) code)
5043 coerceFP2Int from to x = do
5044 (x_reg, x_code) <- getSomeReg x
5046 opc = case from of W32 -> GFTOI; W64 -> GDTOI
5047 code dst = x_code `snocOL` opc x_reg dst
5048 -- ToDo: works for non-II32 reps?
5050 return (Any (intSize to) code)
5052 #endif /* i386_TARGET_ARCH */
5054 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
5056 #if x86_64_TARGET_ARCH
5058 coerceFP2Int from to x = do
5059 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
5061 opc = case from of W32 -> CVTTSS2SIQ; W64 -> CVTTSD2SIQ
5062 code dst = x_code `snocOL` opc x_op dst
5064 return (Any (intSize to) code) -- works even if the destination rep is <II32
5066 coerceInt2FP from to x = do
5067 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
5069 opc = case to of W32 -> CVTSI2SS; W64 -> CVTSI2SD
5070 code dst = x_code `snocOL` opc x_op dst
5072 return (Any (floatSize to) code) -- works even if the destination rep is <II32
5074 coerceFP2FP :: Width -> CmmExpr -> NatM Register
5075 coerceFP2FP to x = do
5076 (x_reg, x_code) <- getSomeReg x
5078 opc = case to of W32 -> CVTSD2SS; W64 -> CVTSS2SD
5079 code dst = x_code `snocOL` opc x_reg dst
5081 return (Any (floatSize to) code)
5084 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
5086 #if sparc_TARGET_ARCH
5088 coerceInt2FP width1 width2 x = do
5089 (src, code) <- getSomeReg x
5091 code__2 dst = code `appOL` toOL [
5092 ST (intSize width1) src (spRel (-2)),
5093 LD (intSize width1) (spRel (-2)) dst,
5094 FxTOy (intSize width1) (floatSize width2) dst dst]
5095 return (Any (floatSize $ width2) code__2)
5098 -- | Coerce a floating point value to integer
5100 -- NOTE: On sparc v9 there are no instructions to move a value from an
5101 -- FP register directly to an int register, so we have to use a load/store.
5103 coerceFP2Int width1 width2 x
5104 = do let fsize1 = floatSize width1
5105 fsize2 = floatSize width2
5107 isize2 = intSize width2
5109 (fsrc, code) <- getSomeReg x
5110 fdst <- getNewRegNat fsize2
5115 -- convert float to int format, leaving it in a float reg.
5116 [ FxTOy fsize1 isize2 fsrc fdst
5118 -- store the int into mem, then load it back to move
5119 -- it into an actual int reg.
5120 , ST fsize2 fdst (spRel (-2))
5121 , LD isize2 (spRel (-2)) dst]
5123 return (Any isize2 code2)
5126 coerceDbl2Flt x = do
5127 (src, code) <- getSomeReg x
5128 return (Any FF32 (\dst -> code `snocOL` FxTOy FF64 FF32 src dst))
5131 coerceFlt2Dbl x = do
5132 (src, code) <- getSomeReg x
5133 return (Any FF64 (\dst -> code `snocOL` FxTOy FF32 FF64 src dst))
5135 #endif /* sparc_TARGET_ARCH */
5137 #if powerpc_TARGET_ARCH
5138 coerceInt2FP fromRep toRep x = do
5139 (src, code) <- getSomeReg x
5140 lbl <- getNewLabelNat
5141 itmp <- getNewRegNat II32
5142 ftmp <- getNewRegNat FF64
5143 dflags <- getDynFlagsNat
5144 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
5145 Amode addr addr_code <- getAmode dynRef
5147 code' dst = code `appOL` maybe_exts `appOL` toOL [
5150 CmmStaticLit (CmmInt 0x43300000 W32),
5151 CmmStaticLit (CmmInt 0x80000000 W32)],
5152 XORIS itmp src (ImmInt 0x8000),
5153 ST II32 itmp (spRel 3),
5154 LIS itmp (ImmInt 0x4330),
5155 ST II32 itmp (spRel 2),
5156 LD FF64 ftmp (spRel 2)
5157 ] `appOL` addr_code `appOL` toOL [
5159 FSUB FF64 dst ftmp dst
5160 ] `appOL` maybe_frsp dst
5162 maybe_exts = case fromRep of
5163 W8 -> unitOL $ EXTS II8 src src
5164 W16 -> unitOL $ EXTS II16 src src
5166 maybe_frsp dst = case toRep of
5167 W32 -> unitOL $ FRSP dst dst
5169 return (Any (floatSize toRep) code')
5171 coerceFP2Int fromRep toRep x = do
5172 -- the reps don't really matter: F*->FF64 and II32->I* are no-ops
5173 (src, code) <- getSomeReg x
5174 tmp <- getNewRegNat FF64
5176 code' dst = code `appOL` toOL [
5177 -- convert to int in FP reg
5179 -- store value (64bit) from FP to stack
5180 ST FF64 tmp (spRel 2),
5181 -- read low word of value (high word is undefined)
5182 LD II32 dst (spRel 3)]
5183 return (Any (intSize toRep) code')
5184 #endif /* powerpc_TARGET_ARCH */
5187 -- -----------------------------------------------------------------------------
5188 -- eXTRA_STK_ARGS_HERE
5190 -- We (allegedly) put the first six C-call arguments in registers;
5191 -- where do we start putting the rest of them?
5193 -- Moved from Instrs (SDM):
5195 #if alpha_TARGET_ARCH || sparc_TARGET_ARCH
5196 eXTRA_STK_ARGS_HERE :: Int
5198 = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???))