2 -- The above warning supression flag is a temporary kludge.
3 -- While working on this module you are encouraged to remove it and fix
4 -- any warnings in the module. See
5 -- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
8 -----------------------------------------------------------------------------
10 -- Generating machine code (instruction selection)
12 -- (c) The University of Glasgow 1996-2004
14 -----------------------------------------------------------------------------
16 -- This is a big module, but, if you pay attention to
17 -- (a) the sectioning, (b) the type signatures, and
18 -- (c) the #if blah_TARGET_ARCH} things, the
19 -- structure should not be too overwhelming.
21 module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where
23 #include "HsVersions.h"
24 #include "nativeGen/NCG.h"
31 import PositionIndependentCode
32 import RegAllocInfo ( mkBranchInstr )
34 -- Our intermediate code:
36 import PprCmm ( pprExpr )
40 import ClosureInfo ( C_SRT(..) )
43 import StaticFlags ( opt_PIC )
44 import ForeignCall ( CCallConv(..) )
49 import FastBool ( isFastTrue )
50 import Constants ( wORD_SIZE )
52 import Debug.Trace ( trace )
54 import Control.Monad ( mapAndUnzipM )
55 import Data.Maybe ( fromJust )
60 -- -----------------------------------------------------------------------------
61 -- Top-level of the instruction selector
63 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
64 -- They are really trees of insns to facilitate fast appending, where a
65 -- left-to-right traversal (pre-order?) yields the insns in the correct
68 type InstrBlock = OrdList Instr
70 cmmTopCodeGen :: RawCmmTop -> NatM [NatCmmTop]
71 cmmTopCodeGen (CmmProc info lab params (ListGraph blocks)) = do
72 (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
73 picBaseMb <- getPicBaseMaybeNat
74 let proc = CmmProc info lab params (ListGraph $ concat nat_blocks)
75 tops = proc : concat statics
77 Just picBase -> initializePicBase picBase tops
78 Nothing -> return tops
80 cmmTopCodeGen (CmmData sec dat) = do
81 return [CmmData sec dat] -- no translation, we just use CmmStatic
83 basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop])
84 basicBlockCodeGen (BasicBlock id stmts) = do
85 instrs <- stmtsToInstrs stmts
86 -- code generation may introduce new basic block boundaries, which
87 -- are indicated by the NEWBLOCK instruction. We must split up the
88 -- instruction stream into basic blocks again. Also, we extract
91 (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
93 mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
94 = ([], BasicBlock id instrs : blocks, statics)
95 mkBlocks (LDATA sec dat) (instrs,blocks,statics)
96 = (instrs, blocks, CmmData sec dat:statics)
97 mkBlocks instr (instrs,blocks,statics)
98 = (instr:instrs, blocks, statics)
100 return (BasicBlock id top : other_blocks, statics)
102 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
104 = do instrss <- mapM stmtToInstrs stmts
105 return (concatOL instrss)
107 stmtToInstrs :: CmmStmt -> NatM InstrBlock
108 stmtToInstrs stmt = case stmt of
109 CmmNop -> return nilOL
110 CmmComment s -> return (unitOL (COMMENT s))
113 | isFloatingRep kind -> assignReg_FltCode kind reg src
114 #if WORD_SIZE_IN_BITS==32
115 | kind == I64 -> assignReg_I64Code reg src
117 | otherwise -> assignReg_IntCode kind reg src
118 where kind = cmmRegRep reg
121 | isFloatingRep kind -> assignMem_FltCode kind addr src
122 #if WORD_SIZE_IN_BITS==32
123 | kind == I64 -> assignMem_I64Code addr src
125 | otherwise -> assignMem_IntCode kind addr src
126 where kind = cmmExprRep src
128 CmmCall target result_regs args _ _
129 -> genCCall target result_regs args
131 CmmBranch id -> genBranch id
132 CmmCondBranch arg id -> genCondJump id arg
133 CmmSwitch arg ids -> genSwitch arg ids
134 CmmJump arg params -> genJump arg
136 panic "stmtToInstrs: return statement should have been cps'd away"
138 -- -----------------------------------------------------------------------------
139 -- General things for putting together code sequences
141 -- Expand CmmRegOff. ToDo: should we do it this way around, or convert
142 -- CmmExprs into CmmRegOff?
143 mangleIndexTree :: CmmExpr -> CmmExpr
144 mangleIndexTree (CmmRegOff reg off)
145 = CmmMachOp (MO_Add rep) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) rep)]
146 where rep = cmmRegRep reg
148 -- -----------------------------------------------------------------------------
149 -- Code gen for 64-bit arithmetic on 32-bit platforms
152 Simple support for generating 64-bit code (ie, 64 bit values and 64
153 bit assignments) on 32-bit platforms. Unlike the main code generator
154 we merely shoot for generating working code as simply as possible, and
155 pay little attention to code quality. Specifically, there is no
156 attempt to deal cleverly with the fixed-vs-floating register
157 distinction; all values are generated into (pairs of) floating
158 registers, even if this would mean some redundant reg-reg moves as a
159 result. Only one of the VRegUniques is returned, since it will be
160 of the VRegUniqueLo form, and the upper-half VReg can be determined
161 by applying getHiVRegFromLo to it.
164 data ChildCode64 -- a.k.a "Register64"
167 Reg -- the lower 32-bit temporary which contains the
168 -- result; use getHiVRegFromLo to find the other
169 -- VRegUnique. Rules of this simplified insn
170 -- selection game are therefore that the returned
171 -- Reg may be modified
173 #if WORD_SIZE_IN_BITS==32
174 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
175 assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
178 #ifndef x86_64_TARGET_ARCH
179 iselExpr64 :: CmmExpr -> NatM ChildCode64
182 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
186 assignMem_I64Code addrTree valueTree = do
187 Amode addr addr_code <- getAmode addrTree
188 ChildCode64 vcode rlo <- iselExpr64 valueTree
190 rhi = getHiVRegFromLo rlo
192 -- Little-endian store
193 mov_lo = MOV I32 (OpReg rlo) (OpAddr addr)
194 mov_hi = MOV I32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
196 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
199 assignReg_I64Code (CmmLocal (LocalReg u_dst pk _)) valueTree = do
200 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
202 r_dst_lo = mkVReg u_dst I32
203 r_dst_hi = getHiVRegFromLo r_dst_lo
204 r_src_hi = getHiVRegFromLo r_src_lo
205 mov_lo = MOV I32 (OpReg r_src_lo) (OpReg r_dst_lo)
206 mov_hi = MOV I32 (OpReg r_src_hi) (OpReg r_dst_hi)
209 vcode `snocOL` mov_lo `snocOL` mov_hi
212 assignReg_I64Code lvalue valueTree
213 = panic "assignReg_I64Code(i386): invalid lvalue"
217 iselExpr64 (CmmLit (CmmInt i _)) = do
218 (rlo,rhi) <- getNewRegPairNat I32
220 r = fromIntegral (fromIntegral i :: Word32)
221 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
223 MOV I32 (OpImm (ImmInteger r)) (OpReg rlo),
224 MOV I32 (OpImm (ImmInteger q)) (OpReg rhi)
227 return (ChildCode64 code rlo)
229 iselExpr64 (CmmLoad addrTree I64) = do
230 Amode addr addr_code <- getAmode addrTree
231 (rlo,rhi) <- getNewRegPairNat I32
233 mov_lo = MOV I32 (OpAddr addr) (OpReg rlo)
234 mov_hi = MOV I32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
237 ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
241 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64 _)))
242 = return (ChildCode64 nilOL (mkVReg vu I32))
244 -- we handle addition, but rather badly
245 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
246 ChildCode64 code1 r1lo <- iselExpr64 e1
247 (rlo,rhi) <- getNewRegPairNat I32
249 r = fromIntegral (fromIntegral i :: Word32)
250 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
251 r1hi = getHiVRegFromLo r1lo
253 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
254 ADD I32 (OpImm (ImmInteger r)) (OpReg rlo),
255 MOV I32 (OpReg r1hi) (OpReg rhi),
256 ADC I32 (OpImm (ImmInteger q)) (OpReg rhi) ]
258 return (ChildCode64 code rlo)
260 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
261 ChildCode64 code1 r1lo <- iselExpr64 e1
262 ChildCode64 code2 r2lo <- iselExpr64 e2
263 (rlo,rhi) <- getNewRegPairNat I32
265 r1hi = getHiVRegFromLo r1lo
266 r2hi = getHiVRegFromLo r2lo
269 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
270 ADD I32 (OpReg r2lo) (OpReg rlo),
271 MOV I32 (OpReg r1hi) (OpReg rhi),
272 ADC I32 (OpReg r2hi) (OpReg rhi) ]
274 return (ChildCode64 code rlo)
276 iselExpr64 (CmmMachOp (MO_U_Conv _ I64) [expr]) = do
278 r_dst_lo <- getNewRegNat I32
279 let r_dst_hi = getHiVRegFromLo r_dst_lo
282 ChildCode64 (code `snocOL`
283 MOV I32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
288 = pprPanic "iselExpr64(i386)" (ppr expr)
290 #endif /* i386_TARGET_ARCH */
292 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
294 #if sparc_TARGET_ARCH
296 assignMem_I64Code addrTree valueTree = do
297 Amode addr addr_code <- getAmode addrTree
298 ChildCode64 vcode rlo <- iselExpr64 valueTree
299 (src, code) <- getSomeReg addrTree
301 rhi = getHiVRegFromLo rlo
303 mov_hi = ST I32 rhi (AddrRegImm src (ImmInt 0))
304 mov_lo = ST I32 rlo (AddrRegImm src (ImmInt 4))
305 return (vcode `appOL` code `snocOL` mov_hi `snocOL` mov_lo)
307 assignReg_I64Code (CmmLocal (LocalReg u_dst pk _)) valueTree = do
308 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
310 r_dst_lo = mkVReg u_dst pk
311 r_dst_hi = getHiVRegFromLo r_dst_lo
312 r_src_hi = getHiVRegFromLo r_src_lo
313 mov_lo = mkMOV r_src_lo r_dst_lo
314 mov_hi = mkMOV r_src_hi r_dst_hi
315 mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
316 return (vcode `snocOL` mov_hi `snocOL` mov_lo)
317 assignReg_I64Code lvalue valueTree
318 = panic "assignReg_I64Code(sparc): invalid lvalue"
321 -- Don't delete this -- it's very handy for debugging.
323 -- | trace ("iselExpr64: " ++ showSDoc (ppr expr)) False
324 -- = panic "iselExpr64(???)"
326 iselExpr64 (CmmLoad addrTree I64) = do
327 Amode (AddrRegReg r1 r2) addr_code <- getAmode addrTree
328 rlo <- getNewRegNat I32
329 let rhi = getHiVRegFromLo rlo
330 mov_hi = LD I32 (AddrRegImm r1 (ImmInt 0)) rhi
331 mov_lo = LD I32 (AddrRegImm r1 (ImmInt 4)) rlo
333 ChildCode64 (addr_code `snocOL` mov_hi `snocOL` mov_lo)
337 iselExpr64 (CmmReg (CmmLocal (LocalReg uq I64 _))) = do
338 r_dst_lo <- getNewRegNat I32
339 let r_dst_hi = getHiVRegFromLo r_dst_lo
340 r_src_lo = mkVReg uq I32
341 r_src_hi = getHiVRegFromLo r_src_lo
342 mov_lo = mkMOV r_src_lo r_dst_lo
343 mov_hi = mkMOV r_src_hi r_dst_hi
344 mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
346 ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo
350 = pprPanic "iselExpr64(sparc)" (ppr expr)
352 #endif /* sparc_TARGET_ARCH */
354 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
356 #if powerpc_TARGET_ARCH
358 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
359 getI64Amodes addrTree = do
360 Amode hi_addr addr_code <- getAmode addrTree
361 case addrOffset hi_addr 4 of
362 Just lo_addr -> return (hi_addr, lo_addr, addr_code)
363 Nothing -> do (hi_ptr, code) <- getSomeReg addrTree
364 return (AddrRegImm hi_ptr (ImmInt 0),
365 AddrRegImm hi_ptr (ImmInt 4),
368 assignMem_I64Code addrTree valueTree = do
369 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
370 ChildCode64 vcode rlo <- iselExpr64 valueTree
372 rhi = getHiVRegFromLo rlo
375 mov_hi = ST I32 rhi hi_addr
376 mov_lo = ST I32 rlo lo_addr
378 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
380 assignReg_I64Code (CmmLocal (LocalReg u_dst pk _)) valueTree = do
381 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
383 r_dst_lo = mkVReg u_dst I32
384 r_dst_hi = getHiVRegFromLo r_dst_lo
385 r_src_hi = getHiVRegFromLo r_src_lo
386 mov_lo = MR r_dst_lo r_src_lo
387 mov_hi = MR r_dst_hi r_src_hi
390 vcode `snocOL` mov_lo `snocOL` mov_hi
393 assignReg_I64Code lvalue valueTree
394 = panic "assignReg_I64Code(powerpc): invalid lvalue"
397 -- Don't delete this -- it's very handy for debugging.
399 -- | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
400 -- = panic "iselExpr64(???)"
402 iselExpr64 (CmmLoad addrTree I64) = do
403 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
404 (rlo, rhi) <- getNewRegPairNat I32
405 let mov_hi = LD I32 rhi hi_addr
406 mov_lo = LD I32 rlo lo_addr
407 return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
410 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64 _)))
411 = return (ChildCode64 nilOL (mkVReg vu I32))
413 iselExpr64 (CmmLit (CmmInt i _)) = do
414 (rlo,rhi) <- getNewRegPairNat I32
416 half0 = fromIntegral (fromIntegral i :: Word16)
417 half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
418 half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
419 half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
422 LIS rlo (ImmInt half1),
423 OR rlo rlo (RIImm $ ImmInt half0),
424 LIS rhi (ImmInt half3),
425 OR rlo rlo (RIImm $ ImmInt half2)
428 return (ChildCode64 code rlo)
430 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
431 ChildCode64 code1 r1lo <- iselExpr64 e1
432 ChildCode64 code2 r2lo <- iselExpr64 e2
433 (rlo,rhi) <- getNewRegPairNat I32
435 r1hi = getHiVRegFromLo r1lo
436 r2hi = getHiVRegFromLo r2lo
439 toOL [ ADDC rlo r1lo r2lo,
442 return (ChildCode64 code rlo)
444 iselExpr64 (CmmMachOp (MO_U_Conv I32 I64) [expr]) = do
445 (expr_reg,expr_code) <- getSomeReg expr
446 (rlo, rhi) <- getNewRegPairNat I32
447 let mov_hi = LI rhi (ImmInt 0)
448 mov_lo = MR rlo expr_reg
449 return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
452 = pprPanic "iselExpr64(powerpc)" (ppr expr)
454 #endif /* powerpc_TARGET_ARCH */
457 -- -----------------------------------------------------------------------------
458 -- The 'Register' type
460 -- 'Register's passed up the tree. If the stix code forces the register
461 -- to live in a pre-decided machine register, it comes out as @Fixed@;
462 -- otherwise, it comes out as @Any@, and the parent can decide which
463 -- register to put it in.
466 = Fixed MachRep Reg InstrBlock
467 | Any MachRep (Reg -> InstrBlock)
469 swizzleRegisterRep :: Register -> MachRep -> Register
470 swizzleRegisterRep (Fixed _ reg code) rep = Fixed rep reg code
471 swizzleRegisterRep (Any _ codefn) rep = Any rep codefn
474 -- -----------------------------------------------------------------------------
475 -- Utils based on getRegister, below
477 -- The dual to getAnyReg: compute an expression into a register, but
478 -- we don't mind which one it is.
479 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
481 r <- getRegister expr
484 tmp <- getNewRegNat rep
485 return (tmp, code tmp)
489 -- -----------------------------------------------------------------------------
490 -- Grab the Reg for a CmmReg
492 getRegisterReg :: CmmReg -> Reg
494 getRegisterReg (CmmLocal (LocalReg u pk _))
497 getRegisterReg (CmmGlobal mid)
498 = case get_GlobalReg_reg_or_addr mid of
499 Left (RealReg rrno) -> RealReg rrno
500 _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
501 -- By this stage, the only MagicIds remaining should be the
502 -- ones which map to a real machine register on this
503 -- platform. Hence ...
506 -- -----------------------------------------------------------------------------
507 -- Generate code to get a subtree into a Register
509 -- Don't delete this -- it's very handy for debugging.
511 -- | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False
512 -- = panic "getRegister(???)"
514 getRegister :: CmmExpr -> NatM Register
516 #if !x86_64_TARGET_ARCH
517 -- on x86_64, we have %rip for PicBaseReg, but it's not a full-featured
518 -- register, it can only be used for rip-relative addressing.
519 getRegister (CmmReg (CmmGlobal PicBaseReg))
521 reg <- getPicBaseNat wordRep
522 return (Fixed wordRep reg nilOL)
525 getRegister (CmmReg reg)
526 = return (Fixed (cmmRegRep reg) (getRegisterReg reg) nilOL)
528 getRegister tree@(CmmRegOff _ _)
529 = getRegister (mangleIndexTree tree)
532 #if WORD_SIZE_IN_BITS==32
533 -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
534 -- TO_W_(x), TO_W_(x >> 32)
536 getRegister (CmmMachOp (MO_U_Conv I64 I32)
537 [CmmMachOp (MO_U_Shr I64) [x,CmmLit (CmmInt 32 _)]]) = do
538 ChildCode64 code rlo <- iselExpr64 x
539 return $ Fixed I32 (getHiVRegFromLo rlo) code
541 getRegister (CmmMachOp (MO_S_Conv I64 I32)
542 [CmmMachOp (MO_U_Shr I64) [x,CmmLit (CmmInt 32 _)]]) = do
543 ChildCode64 code rlo <- iselExpr64 x
544 return $ Fixed I32 (getHiVRegFromLo rlo) code
546 getRegister (CmmMachOp (MO_U_Conv I64 I32) [x]) = do
547 ChildCode64 code rlo <- iselExpr64 x
548 return $ Fixed I32 rlo code
550 getRegister (CmmMachOp (MO_S_Conv I64 I32) [x]) = do
551 ChildCode64 code rlo <- iselExpr64 x
552 return $ Fixed I32 rlo code
556 -- end of machine-"independent" bit; here we go on the rest...
558 #if alpha_TARGET_ARCH
560 getRegister (StDouble d)
561 = getBlockIdNat `thenNat` \ lbl ->
562 getNewRegNat PtrRep `thenNat` \ tmp ->
563 let code dst = mkSeqInstrs [
564 LDATA RoDataSegment lbl [
565 DATA TF [ImmLab (rational d)]
567 LDA tmp (AddrImm (ImmCLbl lbl)),
568 LD TF dst (AddrReg tmp)]
570 return (Any F64 code)
572 getRegister (StPrim primop [x]) -- unary PrimOps
574 IntNegOp -> trivialUCode (NEG Q False) x
576 NotOp -> trivialUCode NOT x
578 FloatNegOp -> trivialUFCode FloatRep (FNEG TF) x
579 DoubleNegOp -> trivialUFCode F64 (FNEG TF) x
581 OrdOp -> coerceIntCode IntRep x
584 Float2IntOp -> coerceFP2Int x
585 Int2FloatOp -> coerceInt2FP pr x
586 Double2IntOp -> coerceFP2Int x
587 Int2DoubleOp -> coerceInt2FP pr x
589 Double2FloatOp -> coerceFltCode x
590 Float2DoubleOp -> coerceFltCode x
592 other_op -> getRegister (StCall fn CCallConv F64 [x])
594 fn = case other_op of
595 FloatExpOp -> fsLit "exp"
596 FloatLogOp -> fsLit "log"
597 FloatSqrtOp -> fsLit "sqrt"
598 FloatSinOp -> fsLit "sin"
599 FloatCosOp -> fsLit "cos"
600 FloatTanOp -> fsLit "tan"
601 FloatAsinOp -> fsLit "asin"
602 FloatAcosOp -> fsLit "acos"
603 FloatAtanOp -> fsLit "atan"
604 FloatSinhOp -> fsLit "sinh"
605 FloatCoshOp -> fsLit "cosh"
606 FloatTanhOp -> fsLit "tanh"
607 DoubleExpOp -> fsLit "exp"
608 DoubleLogOp -> fsLit "log"
609 DoubleSqrtOp -> fsLit "sqrt"
610 DoubleSinOp -> fsLit "sin"
611 DoubleCosOp -> fsLit "cos"
612 DoubleTanOp -> fsLit "tan"
613 DoubleAsinOp -> fsLit "asin"
614 DoubleAcosOp -> fsLit "acos"
615 DoubleAtanOp -> fsLit "atan"
616 DoubleSinhOp -> fsLit "sinh"
617 DoubleCoshOp -> fsLit "cosh"
618 DoubleTanhOp -> fsLit "tanh"
620 pr = panic "MachCode.getRegister: no primrep needed for Alpha"
622 getRegister (StPrim primop [x, y]) -- dyadic PrimOps
624 CharGtOp -> trivialCode (CMP LTT) y x
625 CharGeOp -> trivialCode (CMP LE) y x
626 CharEqOp -> trivialCode (CMP EQQ) x y
627 CharNeOp -> int_NE_code x y
628 CharLtOp -> trivialCode (CMP LTT) x y
629 CharLeOp -> trivialCode (CMP LE) x y
631 IntGtOp -> trivialCode (CMP LTT) y x
632 IntGeOp -> trivialCode (CMP LE) y x
633 IntEqOp -> trivialCode (CMP EQQ) x y
634 IntNeOp -> int_NE_code x y
635 IntLtOp -> trivialCode (CMP LTT) x y
636 IntLeOp -> trivialCode (CMP LE) x y
638 WordGtOp -> trivialCode (CMP ULT) y x
639 WordGeOp -> trivialCode (CMP ULE) x y
640 WordEqOp -> trivialCode (CMP EQQ) x y
641 WordNeOp -> int_NE_code x y
642 WordLtOp -> trivialCode (CMP ULT) x y
643 WordLeOp -> trivialCode (CMP ULE) x y
645 AddrGtOp -> trivialCode (CMP ULT) y x
646 AddrGeOp -> trivialCode (CMP ULE) y x
647 AddrEqOp -> trivialCode (CMP EQQ) x y
648 AddrNeOp -> int_NE_code x y
649 AddrLtOp -> trivialCode (CMP ULT) x y
650 AddrLeOp -> trivialCode (CMP ULE) x y
652 FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
653 FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
654 FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
655 FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
656 FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
657 FloatLeOp -> cmpF_code (FCMP TF LE) NE x y
659 DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
660 DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
661 DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
662 DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
663 DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
664 DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y
666 IntAddOp -> trivialCode (ADD Q False) x y
667 IntSubOp -> trivialCode (SUB Q False) x y
668 IntMulOp -> trivialCode (MUL Q False) x y
669 IntQuotOp -> trivialCode (DIV Q False) x y
670 IntRemOp -> trivialCode (REM Q False) x y
672 WordAddOp -> trivialCode (ADD Q False) x y
673 WordSubOp -> trivialCode (SUB Q False) x y
674 WordMulOp -> trivialCode (MUL Q False) x y
675 WordQuotOp -> trivialCode (DIV Q True) x y
676 WordRemOp -> trivialCode (REM Q True) x y
678 FloatAddOp -> trivialFCode FloatRep (FADD TF) x y
679 FloatSubOp -> trivialFCode FloatRep (FSUB TF) x y
680 FloatMulOp -> trivialFCode FloatRep (FMUL TF) x y
681 FloatDivOp -> trivialFCode FloatRep (FDIV TF) x y
683 DoubleAddOp -> trivialFCode F64 (FADD TF) x y
684 DoubleSubOp -> trivialFCode F64 (FSUB TF) x y
685 DoubleMulOp -> trivialFCode F64 (FMUL TF) x y
686 DoubleDivOp -> trivialFCode F64 (FDIV TF) x y
688 AddrAddOp -> trivialCode (ADD Q False) x y
689 AddrSubOp -> trivialCode (SUB Q False) x y
690 AddrRemOp -> trivialCode (REM Q True) x y
692 AndOp -> trivialCode AND x y
693 OrOp -> trivialCode OR x y
694 XorOp -> trivialCode XOR x y
695 SllOp -> trivialCode SLL x y
696 SrlOp -> trivialCode SRL x y
698 ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
699 ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
700 ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"
702 FloatPowerOp -> getRegister (StCall (fsLit "pow") CCallConv F64 [x,y])
703 DoublePowerOp -> getRegister (StCall (fsLit "pow") CCallConv F64 [x,y])
705 {- ------------------------------------------------------------
706 Some bizarre special code for getting condition codes into
707 registers. Integer non-equality is a test for equality
708 followed by an XOR with 1. (Integer comparisons always set
709 the result register to 0 or 1.) Floating point comparisons of
710 any kind leave the result in a floating point register, so we
711 need to wrangle an integer register out of things.
713 int_NE_code :: StixTree -> StixTree -> NatM Register
716 = trivialCode (CMP EQQ) x y `thenNat` \ register ->
717 getNewRegNat IntRep `thenNat` \ tmp ->
719 code = registerCode register tmp
720 src = registerName register tmp
721 code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
723 return (Any IntRep code__2)
725 {- ------------------------------------------------------------
726 Comments for int_NE_code also apply to cmpF_code
729 :: (Reg -> Reg -> Reg -> Instr)
731 -> StixTree -> StixTree
734 cmpF_code instr cond x y
735 = trivialFCode pr instr x y `thenNat` \ register ->
736 getNewRegNat F64 `thenNat` \ tmp ->
737 getBlockIdNat `thenNat` \ lbl ->
739 code = registerCode register tmp
740 result = registerName register tmp
742 code__2 dst = code . mkSeqInstrs [
743 OR zeroh (RIImm (ImmInt 1)) dst,
744 BF cond result (ImmCLbl lbl),
745 OR zeroh (RIReg zeroh) dst,
748 return (Any IntRep code__2)
750 pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
751 ------------------------------------------------------------
753 getRegister (CmmLoad pk mem)
754 = getAmode mem `thenNat` \ amode ->
756 code = amodeCode amode
757 src = amodeAddr amode
758 size = primRepToSize pk
759 code__2 dst = code . mkSeqInstr (LD size dst src)
761 return (Any pk code__2)
763 getRegister (StInt i)
766 code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
768 return (Any IntRep code)
771 code dst = mkSeqInstr (LDI Q dst src)
773 return (Any IntRep code)
775 src = ImmInt (fromInteger i)
780 code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
782 return (Any PtrRep code)
785 imm__2 = case imm of Just x -> x
787 #endif /* alpha_TARGET_ARCH */
789 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
793 getRegister (CmmLit (CmmFloat f F32)) = do
794 lbl <- getNewLabelNat
795 dflags <- getDynFlagsNat
796 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
797 Amode addr addr_code <- getAmode dynRef
801 CmmStaticLit (CmmFloat f F32)]
802 `consOL` (addr_code `snocOL`
805 return (Any F32 code)
808 getRegister (CmmLit (CmmFloat d F64))
810 = let code dst = unitOL (GLDZ dst)
811 in return (Any F64 code)
814 = let code dst = unitOL (GLD1 dst)
815 in return (Any F64 code)
818 lbl <- getNewLabelNat
819 dflags <- getDynFlagsNat
820 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
821 Amode addr addr_code <- getAmode dynRef
825 CmmStaticLit (CmmFloat d F64)]
826 `consOL` (addr_code `snocOL`
829 return (Any F64 code)
831 #endif /* i386_TARGET_ARCH */
833 #if x86_64_TARGET_ARCH
835 getRegister (CmmLit (CmmFloat 0.0 rep)) = do
836 let code dst = unitOL (XOR rep (OpReg dst) (OpReg dst))
837 -- I don't know why there are xorpd, xorps, and pxor instructions.
838 -- They all appear to do the same thing --SDM
839 return (Any rep code)
841 getRegister (CmmLit (CmmFloat f rep)) = do
842 lbl <- getNewLabelNat
843 let code dst = toOL [
846 CmmStaticLit (CmmFloat f rep)],
847 MOV rep (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
850 return (Any rep code)
852 #endif /* x86_64_TARGET_ARCH */
854 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
856 -- catch simple cases of zero- or sign-extended load
857 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad addr _]) = do
858 code <- intLoadCode (MOVZxL I8) addr
859 return (Any I32 code)
861 getRegister (CmmMachOp (MO_S_Conv I8 I32) [CmmLoad addr _]) = do
862 code <- intLoadCode (MOVSxL I8) addr
863 return (Any I32 code)
865 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad addr _]) = do
866 code <- intLoadCode (MOVZxL I16) addr
867 return (Any I32 code)
869 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad addr _]) = do
870 code <- intLoadCode (MOVSxL I16) addr
871 return (Any I32 code)
875 #if x86_64_TARGET_ARCH
877 -- catch simple cases of zero- or sign-extended load
878 getRegister (CmmMachOp (MO_U_Conv I8 I64) [CmmLoad addr _]) = do
879 code <- intLoadCode (MOVZxL I8) addr
880 return (Any I64 code)
882 getRegister (CmmMachOp (MO_S_Conv I8 I64) [CmmLoad addr _]) = do
883 code <- intLoadCode (MOVSxL I8) addr
884 return (Any I64 code)
886 getRegister (CmmMachOp (MO_U_Conv I16 I64) [CmmLoad addr _]) = do
887 code <- intLoadCode (MOVZxL I16) addr
888 return (Any I64 code)
890 getRegister (CmmMachOp (MO_S_Conv I16 I64) [CmmLoad addr _]) = do
891 code <- intLoadCode (MOVSxL I16) addr
892 return (Any I64 code)
894 getRegister (CmmMachOp (MO_U_Conv I32 I64) [CmmLoad addr _]) = do
895 code <- intLoadCode (MOV I32) addr -- 32-bit loads zero-extend
896 return (Any I64 code)
898 getRegister (CmmMachOp (MO_S_Conv I32 I64) [CmmLoad addr _]) = do
899 code <- intLoadCode (MOVSxL I32) addr
900 return (Any I64 code)
904 #if x86_64_TARGET_ARCH
905 getRegister (CmmMachOp (MO_Add I64) [CmmReg (CmmGlobal PicBaseReg),
906 CmmLit displacement])
907 = return $ Any I64 (\dst -> unitOL $
908 LEA I64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
911 #if x86_64_TARGET_ARCH
912 getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do
913 x_code <- getAnyReg x
914 lbl <- getNewLabelNat
916 code dst = x_code dst `appOL` toOL [
917 -- This is how gcc does it, so it can't be that bad:
918 LDATA ReadOnlyData16 [
921 CmmStaticLit (CmmInt 0x80000000 I32),
922 CmmStaticLit (CmmInt 0 I32),
923 CmmStaticLit (CmmInt 0 I32),
924 CmmStaticLit (CmmInt 0 I32)
926 XOR F32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
927 -- xorps, so we need the 128-bit constant
928 -- ToDo: rip-relative
931 return (Any F32 code)
933 getRegister (CmmMachOp (MO_S_Neg F64) [x]) = do
934 x_code <- getAnyReg x
935 lbl <- getNewLabelNat
937 -- This is how gcc does it, so it can't be that bad:
938 code dst = x_code dst `appOL` toOL [
939 LDATA ReadOnlyData16 [
942 CmmStaticLit (CmmInt 0x8000000000000000 I64),
943 CmmStaticLit (CmmInt 0 I64)
945 -- gcc puts an unpck here. Wonder if we need it.
946 XOR F64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
947 -- xorpd, so we need the 128-bit constant
950 return (Any F64 code)
953 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
955 getRegister (CmmMachOp mop [x]) -- unary MachOps
958 MO_S_Neg F32 -> trivialUFCode F32 (GNEG F32) x
959 MO_S_Neg F64 -> trivialUFCode F64 (GNEG F64) x
962 MO_S_Neg rep -> trivialUCode rep (NEGI rep) x
963 MO_Not rep -> trivialUCode rep (NOT rep) x
966 MO_U_Conv I32 I8 -> toI8Reg I32 x
967 MO_S_Conv I32 I8 -> toI8Reg I32 x
968 MO_U_Conv I16 I8 -> toI8Reg I16 x
969 MO_S_Conv I16 I8 -> toI8Reg I16 x
970 MO_U_Conv I32 I16 -> toI16Reg I32 x
971 MO_S_Conv I32 I16 -> toI16Reg I32 x
972 #if x86_64_TARGET_ARCH
973 MO_U_Conv I64 I32 -> conversionNop I64 x
974 MO_S_Conv I64 I32 -> conversionNop I64 x
975 MO_U_Conv I64 I16 -> toI16Reg I64 x
976 MO_S_Conv I64 I16 -> toI16Reg I64 x
977 MO_U_Conv I64 I8 -> toI8Reg I64 x
978 MO_S_Conv I64 I8 -> toI8Reg I64 x
981 MO_U_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
982 MO_S_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
985 MO_U_Conv I8 I32 -> integerExtend I8 I32 MOVZxL x
986 MO_U_Conv I16 I32 -> integerExtend I16 I32 MOVZxL x
987 MO_U_Conv I8 I16 -> integerExtend I8 I16 MOVZxL x
989 MO_S_Conv I8 I32 -> integerExtend I8 I32 MOVSxL x
990 MO_S_Conv I16 I32 -> integerExtend I16 I32 MOVSxL x
991 MO_S_Conv I8 I16 -> integerExtend I8 I16 MOVSxL x
993 #if x86_64_TARGET_ARCH
994 MO_U_Conv I8 I64 -> integerExtend I8 I64 MOVZxL x
995 MO_U_Conv I16 I64 -> integerExtend I16 I64 MOVZxL x
996 MO_U_Conv I32 I64 -> integerExtend I32 I64 MOVZxL x
997 MO_S_Conv I8 I64 -> integerExtend I8 I64 MOVSxL x
998 MO_S_Conv I16 I64 -> integerExtend I16 I64 MOVSxL x
999 MO_S_Conv I32 I64 -> integerExtend I32 I64 MOVSxL x
1000 -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
1001 -- However, we don't want the register allocator to throw it
1002 -- away as an unnecessary reg-to-reg move, so we keep it in
1003 -- the form of a movzl and print it as a movl later.
1006 #if i386_TARGET_ARCH
1007 MO_S_Conv F32 F64 -> conversionNop F64 x
1008 MO_S_Conv F64 F32 -> conversionNop F32 x
1010 MO_S_Conv F32 F64 -> coerceFP2FP F64 x
1011 MO_S_Conv F64 F32 -> coerceFP2FP F32 x
1015 | isFloatingRep from -> coerceFP2Int from to x
1016 | isFloatingRep to -> coerceInt2FP from to x
1018 other -> pprPanic "getRegister" (pprMachOp mop)
1020 -- signed or unsigned extension.
1021 integerExtend from to instr expr = do
1022 (reg,e_code) <- if from == I8 then getByteReg expr
1023 else getSomeReg expr
1027 instr from (OpReg reg) (OpReg dst)
1028 return (Any to code)
1030 toI8Reg new_rep expr
1031 = do codefn <- getAnyReg expr
1032 return (Any new_rep codefn)
1033 -- HACK: use getAnyReg to get a byte-addressable register.
1034 -- If the source was a Fixed register, this will add the
1035 -- mov instruction to put it into the desired destination.
1036 -- We're assuming that the destination won't be a fixed
1037 -- non-byte-addressable register; it won't be, because all
1038 -- fixed registers are word-sized.
1040 toI16Reg = toI8Reg -- for now
1042 conversionNop new_rep expr
1043 = do e_code <- getRegister expr
1044 return (swizzleRegisterRep e_code new_rep)
1047 getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
1049 MO_Eq F32 -> condFltReg EQQ x y
1050 MO_Ne F32 -> condFltReg NE x y
1051 MO_S_Gt F32 -> condFltReg GTT x y
1052 MO_S_Ge F32 -> condFltReg GE x y
1053 MO_S_Lt F32 -> condFltReg LTT x y
1054 MO_S_Le F32 -> condFltReg LE x y
1056 MO_Eq F64 -> condFltReg EQQ x y
1057 MO_Ne F64 -> condFltReg NE x y
1058 MO_S_Gt F64 -> condFltReg GTT x y
1059 MO_S_Ge F64 -> condFltReg GE x y
1060 MO_S_Lt F64 -> condFltReg LTT x y
1061 MO_S_Le F64 -> condFltReg LE x y
1063 MO_Eq rep -> condIntReg EQQ x y
1064 MO_Ne rep -> condIntReg NE x y
1066 MO_S_Gt rep -> condIntReg GTT x y
1067 MO_S_Ge rep -> condIntReg GE x y
1068 MO_S_Lt rep -> condIntReg LTT x y
1069 MO_S_Le rep -> condIntReg LE x y
1071 MO_U_Gt rep -> condIntReg GU x y
1072 MO_U_Ge rep -> condIntReg GEU x y
1073 MO_U_Lt rep -> condIntReg LU x y
1074 MO_U_Le rep -> condIntReg LEU x y
1076 #if i386_TARGET_ARCH
1077 MO_Add F32 -> trivialFCode F32 GADD x y
1078 MO_Sub F32 -> trivialFCode F32 GSUB x y
1080 MO_Add F64 -> trivialFCode F64 GADD x y
1081 MO_Sub F64 -> trivialFCode F64 GSUB x y
1083 MO_S_Quot F32 -> trivialFCode F32 GDIV x y
1084 MO_S_Quot F64 -> trivialFCode F64 GDIV x y
1087 #if x86_64_TARGET_ARCH
1088 MO_Add F32 -> trivialFCode F32 ADD x y
1089 MO_Sub F32 -> trivialFCode F32 SUB x y
1091 MO_Add F64 -> trivialFCode F64 ADD x y
1092 MO_Sub F64 -> trivialFCode F64 SUB x y
1094 MO_S_Quot F32 -> trivialFCode F32 FDIV x y
1095 MO_S_Quot F64 -> trivialFCode F64 FDIV x y
1098 MO_Add rep -> add_code rep x y
1099 MO_Sub rep -> sub_code rep x y
1101 MO_S_Quot rep -> div_code rep True True x y
1102 MO_S_Rem rep -> div_code rep True False x y
1103 MO_U_Quot rep -> div_code rep False True x y
1104 MO_U_Rem rep -> div_code rep False False x y
1106 #if i386_TARGET_ARCH
1107 MO_Mul F32 -> trivialFCode F32 GMUL x y
1108 MO_Mul F64 -> trivialFCode F64 GMUL x y
1111 #if x86_64_TARGET_ARCH
1112 MO_Mul F32 -> trivialFCode F32 MUL x y
1113 MO_Mul F64 -> trivialFCode F64 MUL x y
1116 MO_Mul rep -> let op = IMUL rep in
1117 trivialCode rep op (Just op) x y
1119 MO_S_MulMayOflo rep -> imulMayOflo rep x y
1121 MO_And rep -> let op = AND rep in
1122 trivialCode rep op (Just op) x y
1123 MO_Or rep -> let op = OR rep in
1124 trivialCode rep op (Just op) x y
1125 MO_Xor rep -> let op = XOR rep in
1126 trivialCode rep op (Just op) x y
1128 {- Shift ops on x86s have constraints on their source, it
1129 either has to be Imm, CL or 1
1130 => trivialCode is not restrictive enough (sigh.)
1132 MO_Shl rep -> shift_code rep (SHL rep) x y {-False-}
1133 MO_U_Shr rep -> shift_code rep (SHR rep) x y {-False-}
1134 MO_S_Shr rep -> shift_code rep (SAR rep) x y {-False-}
1136 other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
1138 --------------------
1139 imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1140 imulMayOflo rep a b = do
1141 (a_reg, a_code) <- getNonClobberedReg a
1142 b_code <- getAnyReg b
1144 shift_amt = case rep of
1147 _ -> panic "shift_amt"
1149 code = a_code `appOL` b_code eax `appOL`
1151 IMUL2 rep (OpReg a_reg), -- result in %edx:%eax
1152 SAR rep (OpImm (ImmInt shift_amt)) (OpReg eax),
1153 -- sign extend lower part
1154 SUB rep (OpReg edx) (OpReg eax)
1155 -- compare against upper
1156 -- eax==0 if high part == sign extended low part
1159 return (Fixed rep eax code)
1161 --------------------
1162 shift_code :: MachRep
1163 -> (Operand -> Operand -> Instr)
1168 {- Case1: shift length as immediate -}
1169 shift_code rep instr x y@(CmmLit lit) = do
1170 x_code <- getAnyReg x
1173 = x_code dst `snocOL`
1174 instr (OpImm (litToImm lit)) (OpReg dst)
1176 return (Any rep code)
1178 {- Case2: shift length is complex (non-immediate)
1179 * y must go in %ecx.
1180 * we cannot do y first *and* put its result in %ecx, because
1181 %ecx might be clobbered by x.
1182 * if we do y second, then x cannot be
1183 in a clobbered reg. Also, we cannot clobber x's reg
1184 with the instruction itself.
1186 - do y first, put its result in a fresh tmp, then copy it to %ecx later
1187 - do y second and put its result into %ecx. x gets placed in a fresh
1188 tmp. This is likely to be better, becuase the reg alloc can
1189 eliminate this reg->reg move here (it won't eliminate the other one,
1190 because the move is into the fixed %ecx).
1192 shift_code rep instr x y{-amount-} = do
1193 x_code <- getAnyReg x
1194 tmp <- getNewRegNat rep
1195 y_code <- getAnyReg y
1197 code = x_code tmp `appOL`
1199 instr (OpReg ecx) (OpReg tmp)
1201 return (Fixed rep tmp code)
1203 --------------------
1204 add_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1205 add_code rep x (CmmLit (CmmInt y _))
1206 | is32BitInteger y = add_int rep x y
1207 add_code rep x y = trivialCode rep (ADD rep) (Just (ADD rep)) x y
1209 --------------------
1210 sub_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1211 sub_code rep x (CmmLit (CmmInt y _))
1212 | is32BitInteger (-y) = add_int rep x (-y)
1213 sub_code rep x y = trivialCode rep (SUB rep) Nothing x y
1215 -- our three-operand add instruction:
1216 add_int rep x y = do
1217 (x_reg, x_code) <- getSomeReg x
1219 imm = ImmInt (fromInteger y)
1223 (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
1226 return (Any rep code)
1228 ----------------------
1229 div_code rep signed quotient x y = do
1230 (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
1231 x_code <- getAnyReg x
1233 widen | signed = CLTD rep
1234 | otherwise = XOR rep (OpReg edx) (OpReg edx)
1236 instr | signed = IDIV
1239 code = y_code `appOL`
1241 toOL [widen, instr rep y_op]
1243 result | quotient = eax
1247 return (Fixed rep result code)
1250 getRegister (CmmLoad mem pk)
1253 Amode src mem_code <- getAmode mem
1255 code dst = mem_code `snocOL`
1256 IF_ARCH_i386(GLD pk src dst,
1257 MOV pk (OpAddr src) (OpReg dst))
1259 return (Any pk code)
1261 #if i386_TARGET_ARCH
1262 getRegister (CmmLoad mem pk)
1265 code <- intLoadCode (instr pk) mem
1266 return (Any pk code)
1268 instr I8 = MOVZxL pk
1271 -- we always zero-extend 8-bit loads, if we
1272 -- can't think of anything better. This is because
1273 -- we can't guarantee access to an 8-bit variant of every register
1274 -- (esi and edi don't have 8-bit variants), so to make things
1275 -- simpler we do our 8-bit arithmetic with full 32-bit registers.
1278 #if x86_64_TARGET_ARCH
1279 -- Simpler memory load code on x86_64
1280 getRegister (CmmLoad mem pk)
1282 code <- intLoadCode (MOV pk) mem
1283 return (Any pk code)
1286 getRegister (CmmLit (CmmInt 0 rep))
1288 -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
1289 adj_rep = case rep of I64 -> I32; _ -> rep
1290 rep1 = IF_ARCH_i386( rep, adj_rep )
1292 = unitOL (XOR rep1 (OpReg dst) (OpReg dst))
1294 return (Any rep code)
1296 #if x86_64_TARGET_ARCH
1297 -- optimisation for loading small literals on x86_64: take advantage
1298 -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
1299 -- instruction forms are shorter.
1300 getRegister (CmmLit lit)
1301 | I64 <- cmmLitRep lit, not (isBigLit lit)
1304 code dst = unitOL (MOV I32 (OpImm imm) (OpReg dst))
1306 return (Any I64 code)
1308 isBigLit (CmmInt i I64) = i < 0 || i > 0xffffffff
1310 -- note1: not the same as (not.is32BitLit), because that checks for
1311 -- signed literals that fit in 32 bits, but we want unsigned
1313 -- note2: all labels are small, because we're assuming the
1314 -- small memory model (see gcc docs, -mcmodel=small).
1317 getRegister (CmmLit lit)
1321 code dst = unitOL (MOV rep (OpImm imm) (OpReg dst))
1323 return (Any rep code)
1325 getRegister other = pprPanic "getRegister(x86)" (ppr other)
1328 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
1329 -> NatM (Reg -> InstrBlock)
1330 intLoadCode instr mem = do
1331 Amode src mem_code <- getAmode mem
1332 return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
1334 -- Compute an expression into *any* register, adding the appropriate
1335 -- move instruction if necessary.
1336 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
1338 r <- getRegister expr
1341 anyReg :: Register -> NatM (Reg -> InstrBlock)
1342 anyReg (Any _ code) = return code
1343 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
1345 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
1346 -- Fixed registers might not be byte-addressable, so we make sure we've
1347 -- got a temporary, inserting an extra reg copy if necessary.
1348 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
1349 #if x86_64_TARGET_ARCH
1350 getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
1352 getByteReg expr = do
1353 r <- getRegister expr
1356 tmp <- getNewRegNat rep
1357 return (tmp, code tmp)
1359 | isVirtualReg reg -> return (reg,code)
1361 tmp <- getNewRegNat rep
1362 return (tmp, code `snocOL` reg2reg rep reg tmp)
1363 -- ToDo: could optimise slightly by checking for byte-addressable
1364 -- real registers, but that will happen very rarely if at all.
1367 -- Another variant: this time we want the result in a register that cannot
1368 -- be modified by code to evaluate an arbitrary expression.
1369 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1370 getNonClobberedReg expr = do
1371 r <- getRegister expr
1374 tmp <- getNewRegNat rep
1375 return (tmp, code tmp)
1377 -- only free regs can be clobbered
1378 | RealReg rr <- reg, isFastTrue (freeReg rr) -> do
1379 tmp <- getNewRegNat rep
1380 return (tmp, code `snocOL` reg2reg rep reg tmp)
1384 reg2reg :: MachRep -> Reg -> Reg -> Instr
1386 #if i386_TARGET_ARCH
1387 | isFloatingRep rep = GMOV src dst
1389 | otherwise = MOV rep (OpReg src) (OpReg dst)
1391 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1393 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1395 #if sparc_TARGET_ARCH
1397 getRegister (CmmLit (CmmFloat f F32)) = do
1398 lbl <- getNewLabelNat
1399 let code dst = toOL [
1402 CmmStaticLit (CmmFloat f F32)],
1403 SETHI (HI (ImmCLbl lbl)) dst,
1404 LD F32 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1405 return (Any F32 code)
1407 getRegister (CmmLit (CmmFloat d F64)) = do
1408 lbl <- getNewLabelNat
1409 let code dst = toOL [
1412 CmmStaticLit (CmmFloat d F64)],
1413 SETHI (HI (ImmCLbl lbl)) dst,
1414 LD F64 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1415 return (Any F64 code)
1417 getRegister (CmmMachOp mop [x]) -- unary MachOps
1419 MO_S_Neg F32 -> trivialUFCode F32 (FNEG F32) x
1420 MO_S_Neg F64 -> trivialUFCode F64 (FNEG F64) x
1422 MO_S_Neg rep -> trivialUCode rep (SUB False False g0) x
1423 MO_Not rep -> trivialUCode rep (XNOR False g0) x
1425 MO_U_Conv I32 I8 -> trivialCode I8 (AND False) x (CmmLit (CmmInt 255 I8))
1427 MO_U_Conv F64 F32-> coerceDbl2Flt x
1428 MO_U_Conv F32 F64-> coerceFlt2Dbl x
1430 MO_S_Conv F32 I32-> coerceFP2Int F32 I32 x
1431 MO_S_Conv I32 F32-> coerceInt2FP I32 F32 x
1432 MO_S_Conv F64 I32-> coerceFP2Int F64 I32 x
1433 MO_S_Conv I32 F64-> coerceInt2FP I32 F64 x
1435 -- Conversions which are a nop on sparc
1437 | from == to -> conversionNop to x
1438 MO_U_Conv I32 to -> conversionNop to x
1439 MO_S_Conv I32 to -> conversionNop to x
1442 MO_U_Conv I8 I32 -> integerExtend False I8 I32 x
1443 MO_U_Conv I16 I32 -> integerExtend False I16 I32 x
1444 MO_U_Conv I8 I16 -> integerExtend False I8 I16 x
1445 MO_S_Conv I16 I32 -> integerExtend True I16 I32 x
1447 other_op -> panic "Unknown unary mach op"
1450 integerExtend signed from to expr = do
1451 (reg, e_code) <- getSomeReg expr
1455 ((if signed then SRA else SRL)
1456 reg (RIImm (ImmInt 0)) dst)
1457 return (Any to code)
1458 conversionNop new_rep expr
1459 = do e_code <- getRegister expr
1460 return (swizzleRegisterRep e_code new_rep)
1462 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1464 MO_Eq F32 -> condFltReg EQQ x y
1465 MO_Ne F32 -> condFltReg NE x y
1467 MO_S_Gt F32 -> condFltReg GTT x y
1468 MO_S_Ge F32 -> condFltReg GE x y
1469 MO_S_Lt F32 -> condFltReg LTT x y
1470 MO_S_Le F32 -> condFltReg LE x y
1472 MO_Eq F64 -> condFltReg EQQ x y
1473 MO_Ne F64 -> condFltReg NE x y
1475 MO_S_Gt F64 -> condFltReg GTT x y
1476 MO_S_Ge F64 -> condFltReg GE x y
1477 MO_S_Lt F64 -> condFltReg LTT x y
1478 MO_S_Le F64 -> condFltReg LE x y
1480 MO_Eq rep -> condIntReg EQQ x y
1481 MO_Ne rep -> condIntReg NE x y
1483 MO_S_Gt rep -> condIntReg GTT x y
1484 MO_S_Ge rep -> condIntReg GE x y
1485 MO_S_Lt rep -> condIntReg LTT x y
1486 MO_S_Le rep -> condIntReg LE x y
1488 MO_U_Gt I32 -> condIntReg GTT x y
1489 MO_U_Ge I32 -> condIntReg GE x y
1490 MO_U_Lt I32 -> condIntReg LTT x y
1491 MO_U_Le I32 -> condIntReg LE x y
1493 MO_U_Gt I16 -> condIntReg GU x y
1494 MO_U_Ge I16 -> condIntReg GEU x y
1495 MO_U_Lt I16 -> condIntReg LU x y
1496 MO_U_Le I16 -> condIntReg LEU x y
1498 MO_Add I32 -> trivialCode I32 (ADD False False) x y
1499 MO_Sub I32 -> trivialCode I32 (SUB False False) x y
1501 MO_S_MulMayOflo rep -> imulMayOflo rep x y
1503 -- ToDo: teach about V8+ SPARC div instructions
1504 MO_S_Quot I32 -> idiv (fsLit ".div") x y
1505 MO_S_Rem I32 -> idiv (fsLit ".rem") x y
1506 MO_U_Quot I32 -> idiv (fsLit ".udiv") x y
1507 MO_U_Rem I32 -> idiv (fsLit ".urem") x y
1509 MO_Add F32 -> trivialFCode F32 FADD x y
1510 MO_Sub F32 -> trivialFCode F32 FSUB x y
1511 MO_Mul F32 -> trivialFCode F32 FMUL x y
1512 MO_S_Quot F32 -> trivialFCode F32 FDIV x y
1514 MO_Add F64 -> trivialFCode F64 FADD x y
1515 MO_Sub F64 -> trivialFCode F64 FSUB x y
1516 MO_Mul F64 -> trivialFCode F64 FMUL x y
1517 MO_S_Quot F64 -> trivialFCode F64 FDIV x y
1519 MO_And rep -> trivialCode rep (AND False) x y
1520 MO_Or rep -> trivialCode rep (OR False) x y
1521 MO_Xor rep -> trivialCode rep (XOR False) x y
1523 MO_Mul rep -> trivialCode rep (SMUL False) x y
1525 MO_Shl rep -> trivialCode rep SLL x y
1526 MO_U_Shr rep -> trivialCode rep SRL x y
1527 MO_S_Shr rep -> trivialCode rep SRA x y
1530 MO_F32_Pwr -> getRegister (StCall (Left (fsLit "pow")) CCallConv F64
1531 [promote x, promote y])
1532 where promote x = CmmMachOp MO_F32_to_Dbl [x]
1533 MO_F64_Pwr -> getRegister (StCall (Left (fsLit "pow")) CCallConv F64
1536 other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
1538 --idiv fn x y = getRegister (StCall (Left fn) CCallConv I32 [x, y])
1540 --------------------
1541 imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1542 imulMayOflo rep a b = do
1543 (a_reg, a_code) <- getSomeReg a
1544 (b_reg, b_code) <- getSomeReg b
1545 res_lo <- getNewRegNat I32
1546 res_hi <- getNewRegNat I32
1548 shift_amt = case rep of
1551 _ -> panic "shift_amt"
1552 code dst = a_code `appOL` b_code `appOL`
1554 SMUL False a_reg (RIReg b_reg) res_lo,
1556 SRA res_lo (RIImm (ImmInt shift_amt)) res_lo,
1557 SUB False False res_lo (RIReg res_hi) dst
1559 return (Any I32 code)
1561 getRegister (CmmLoad mem pk) = do
1562 Amode src code <- getAmode mem
1564 code__2 dst = code `snocOL` LD pk src dst
1565 return (Any pk code__2)
1567 getRegister (CmmLit (CmmInt i _))
1570 src = ImmInt (fromInteger i)
1571 code dst = unitOL (OR False g0 (RIImm src) dst)
1573 return (Any I32 code)
1575 getRegister (CmmLit lit)
1576 = let rep = cmmLitRep lit
1580 OR False dst (RIImm (LO imm)) dst]
1581 in return (Any I32 code)
1583 #endif /* sparc_TARGET_ARCH */
1585 #if powerpc_TARGET_ARCH
1586 getRegister (CmmLoad mem pk)
1589 Amode addr addr_code <- getAmode mem
1590 let code dst = ASSERT((regClass dst == RcDouble) == isFloatingRep pk)
1591 addr_code `snocOL` LD pk dst addr
1592 return (Any pk code)
1594 -- catch simple cases of zero- or sign-extended load
1595 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad mem _]) = do
1596 Amode addr addr_code <- getAmode mem
1597 return (Any I32 (\dst -> addr_code `snocOL` LD I8 dst addr))
1599 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
1601 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad mem _]) = do
1602 Amode addr addr_code <- getAmode mem
1603 return (Any I32 (\dst -> addr_code `snocOL` LD I16 dst addr))
1605 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad mem _]) = do
1606 Amode addr addr_code <- getAmode mem
1607 return (Any I32 (\dst -> addr_code `snocOL` LA I16 dst addr))
1609 getRegister (CmmMachOp mop [x]) -- unary MachOps
1611 MO_Not rep -> trivialUCode rep NOT x
1613 MO_S_Conv F64 F32 -> trivialUCode F32 FRSP x
1614 MO_S_Conv F32 F64 -> conversionNop F64 x
1617 | from == to -> conversionNop to x
1618 | isFloatingRep from -> coerceFP2Int from to x
1619 | isFloatingRep to -> coerceInt2FP from to x
1621 -- narrowing is a nop: we treat the high bits as undefined
1622 MO_S_Conv I32 to -> conversionNop to x
1623 MO_S_Conv I16 I8 -> conversionNop I8 x
1624 MO_S_Conv I8 to -> trivialUCode to (EXTS I8) x
1625 MO_S_Conv I16 to -> trivialUCode to (EXTS I16) x
1628 | from == to -> conversionNop to x
1629 -- narrowing is a nop: we treat the high bits as undefined
1630 MO_U_Conv I32 to -> conversionNop to x
1631 MO_U_Conv I16 I8 -> conversionNop I8 x
1632 MO_U_Conv I8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 I32))
1633 MO_U_Conv I16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 I32))
1635 MO_S_Neg F32 -> trivialUCode F32 FNEG x
1636 MO_S_Neg F64 -> trivialUCode F64 FNEG x
1637 MO_S_Neg rep -> trivialUCode rep NEG x
1640 conversionNop new_rep expr
1641 = do e_code <- getRegister expr
1642 return (swizzleRegisterRep e_code new_rep)
1644 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1646 MO_Eq F32 -> condFltReg EQQ x y
1647 MO_Ne F32 -> condFltReg NE x y
1649 MO_S_Gt F32 -> condFltReg GTT x y
1650 MO_S_Ge F32 -> condFltReg GE x y
1651 MO_S_Lt F32 -> condFltReg LTT x y
1652 MO_S_Le F32 -> condFltReg LE x y
1654 MO_Eq F64 -> condFltReg EQQ x y
1655 MO_Ne F64 -> condFltReg NE x y
1657 MO_S_Gt F64 -> condFltReg GTT x y
1658 MO_S_Ge F64 -> condFltReg GE x y
1659 MO_S_Lt F64 -> condFltReg LTT x y
1660 MO_S_Le F64 -> condFltReg LE x y
1662 MO_Eq rep -> condIntReg EQQ (extendUExpr rep x) (extendUExpr rep y)
1663 MO_Ne rep -> condIntReg NE (extendUExpr rep x) (extendUExpr rep y)
1665 MO_S_Gt rep -> condIntReg GTT (extendSExpr rep x) (extendSExpr rep y)
1666 MO_S_Ge rep -> condIntReg GE (extendSExpr rep x) (extendSExpr rep y)
1667 MO_S_Lt rep -> condIntReg LTT (extendSExpr rep x) (extendSExpr rep y)
1668 MO_S_Le rep -> condIntReg LE (extendSExpr rep x) (extendSExpr rep y)
1670 MO_U_Gt rep -> condIntReg GU (extendUExpr rep x) (extendUExpr rep y)
1671 MO_U_Ge rep -> condIntReg GEU (extendUExpr rep x) (extendUExpr rep y)
1672 MO_U_Lt rep -> condIntReg LU (extendUExpr rep x) (extendUExpr rep y)
1673 MO_U_Le rep -> condIntReg LEU (extendUExpr rep x) (extendUExpr rep y)
1675 MO_Add F32 -> trivialCodeNoImm F32 (FADD F32) x y
1676 MO_Sub F32 -> trivialCodeNoImm F32 (FSUB F32) x y
1677 MO_Mul F32 -> trivialCodeNoImm F32 (FMUL F32) x y
1678 MO_S_Quot F32 -> trivialCodeNoImm F32 (FDIV F32) x y
1680 MO_Add F64 -> trivialCodeNoImm F64 (FADD F64) x y
1681 MO_Sub F64 -> trivialCodeNoImm F64 (FSUB F64) x y
1682 MO_Mul F64 -> trivialCodeNoImm F64 (FMUL F64) x y
1683 MO_S_Quot F64 -> trivialCodeNoImm F64 (FDIV F64) x y
1685 -- optimize addition with 32-bit immediate
1689 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate I32 True (-imm)
1690 -> trivialCode I32 True ADD x (CmmLit $ CmmInt imm immrep)
1693 (src, srcCode) <- getSomeReg x
1694 let imm = litToImm lit
1695 code dst = srcCode `appOL` toOL [
1696 ADDIS dst src (HA imm),
1697 ADD dst dst (RIImm (LO imm))
1699 return (Any I32 code)
1700 _ -> trivialCode I32 True ADD x y
1702 MO_Add rep -> trivialCode rep True ADD x y
1704 case y of -- subfi ('substract from' with immediate) doesn't exist
1705 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
1706 -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
1707 _ -> trivialCodeNoImm rep SUBF y x
1709 MO_Mul rep -> trivialCode rep True MULLW x y
1711 MO_S_MulMayOflo I32 -> trivialCodeNoImm I32 MULLW_MayOflo x y
1713 MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= I32): not implemented"
1714 MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
1716 MO_S_Quot rep -> trivialCodeNoImm rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1717 MO_U_Quot rep -> trivialCodeNoImm rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1719 MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1720 MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1722 MO_And rep -> trivialCode rep False AND x y
1723 MO_Or rep -> trivialCode rep False OR x y
1724 MO_Xor rep -> trivialCode rep False XOR x y
1726 MO_Shl rep -> trivialCode rep False SLW x y
1727 MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
1728 MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
1730 getRegister (CmmLit (CmmInt i rep))
1731 | Just imm <- makeImmediate rep True i
1733 code dst = unitOL (LI dst imm)
1735 return (Any rep code)
1737 getRegister (CmmLit (CmmFloat f frep)) = do
1738 lbl <- getNewLabelNat
1739 dflags <- getDynFlagsNat
1740 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
1741 Amode addr addr_code <- getAmode dynRef
1743 LDATA ReadOnlyData [CmmDataLabel lbl,
1744 CmmStaticLit (CmmFloat f frep)]
1745 `consOL` (addr_code `snocOL` LD frep dst addr)
1746 return (Any frep code)
1748 getRegister (CmmLit lit)
1749 = let rep = cmmLitRep lit
1753 ADD dst dst (RIImm (LO imm))
1755 in return (Any rep code)
1757 getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
1759 -- extend?Rep: wrap integer expression of type rep
1760 -- in a conversion to I32
1761 extendSExpr I32 x = x
1762 extendSExpr rep x = CmmMachOp (MO_S_Conv rep I32) [x]
1763 extendUExpr I32 x = x
1764 extendUExpr rep x = CmmMachOp (MO_U_Conv rep I32) [x]
1766 #endif /* powerpc_TARGET_ARCH */
1769 -- -----------------------------------------------------------------------------
1770 -- The 'Amode' type: Memory addressing modes passed up the tree.
1772 data Amode = Amode AddrMode InstrBlock
1775 Now, given a tree (the argument to an CmmLoad) that references memory,
1776 produce a suitable addressing mode.
1778 A Rule of the Game (tm) for Amodes: use of the addr bit must
1779 immediately follow use of the code part, since the code part puts
1780 values in registers which the addr then refers to. So you can't put
1781 anything in between, lest it overwrite some of those registers. If
1782 you need to do some other computation between the code part and use of
1783 the addr bit, first store the effective address from the amode in a
1784 temporary, then do the other computation, and then use the temporary:
1788 ... other computation ...
1792 getAmode :: CmmExpr -> NatM Amode
1793 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
1795 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1797 #if alpha_TARGET_ARCH
1799 getAmode (StPrim IntSubOp [x, StInt i])
1800 = getNewRegNat PtrRep `thenNat` \ tmp ->
1801 getRegister x `thenNat` \ register ->
1803 code = registerCode register tmp
1804 reg = registerName register tmp
1805 off = ImmInt (-(fromInteger i))
1807 return (Amode (AddrRegImm reg off) code)
1809 getAmode (StPrim IntAddOp [x, StInt i])
1810 = getNewRegNat PtrRep `thenNat` \ tmp ->
1811 getRegister x `thenNat` \ register ->
1813 code = registerCode register tmp
1814 reg = registerName register tmp
1815 off = ImmInt (fromInteger i)
1817 return (Amode (AddrRegImm reg off) code)
1821 = return (Amode (AddrImm imm__2) id)
1824 imm__2 = case imm of Just x -> x
1827 = getNewRegNat PtrRep `thenNat` \ tmp ->
1828 getRegister other `thenNat` \ register ->
1830 code = registerCode register tmp
1831 reg = registerName register tmp
1833 return (Amode (AddrReg reg) code)
1835 #endif /* alpha_TARGET_ARCH */
1837 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1839 #if x86_64_TARGET_ARCH
1841 getAmode (CmmMachOp (MO_Add I64) [CmmReg (CmmGlobal PicBaseReg),
1842 CmmLit displacement])
1843 = return $ Amode (ripRel (litToImm displacement)) nilOL
1847 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1849 -- This is all just ridiculous, since it carefully undoes
1850 -- what mangleIndexTree has just done.
1851 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
1853 -- ASSERT(rep == I32)???
1854 = do (x_reg, x_code) <- getSomeReg x
1855 let off = ImmInt (-(fromInteger i))
1856 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1858 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
1860 -- ASSERT(rep == I32)???
1861 = do (x_reg, x_code) <- getSomeReg x
1862 let off = ImmInt (fromInteger i)
1863 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1865 -- Turn (lit1 << n + lit2) into (lit2 + lit1 << n) so it will be
1866 -- recognised by the next rule.
1867 getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1869 = getAmode (CmmMachOp (MO_Add rep) [b,a])
1871 getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
1872 [y, CmmLit (CmmInt shift _)]])
1873 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1874 = x86_complex_amode x y shift 0
1876 getAmode (CmmMachOp (MO_Add rep)
1877 [x, CmmMachOp (MO_Add _)
1878 [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)],
1879 CmmLit (CmmInt offset _)]])
1880 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1881 && is32BitInteger offset
1882 = x86_complex_amode x y shift offset
1884 getAmode (CmmMachOp (MO_Add rep) [x,y])
1885 = x86_complex_amode x y 0 0
1887 getAmode (CmmLit lit) | is32BitLit lit
1888 = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1891 (reg,code) <- getSomeReg expr
1892 return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
1895 x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
1896 x86_complex_amode base index shift offset
1897 = do (x_reg, x_code) <- getNonClobberedReg base
1898 -- x must be in a temp, because it has to stay live over y_code
1899 -- we could compre x_reg and y_reg and do something better here...
1900 (y_reg, y_code) <- getSomeReg index
1902 code = x_code `appOL` y_code
1903 base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
1904 return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset)))
1907 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1909 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1911 #if sparc_TARGET_ARCH
1913 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)])
1916 (reg, code) <- getSomeReg x
1918 off = ImmInt (-(fromInteger i))
1919 return (Amode (AddrRegImm reg off) code)
1922 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)])
1925 (reg, code) <- getSomeReg x
1927 off = ImmInt (fromInteger i)
1928 return (Amode (AddrRegImm reg off) code)
1930 getAmode (CmmMachOp (MO_Add rep) [x, y])
1932 (regX, codeX) <- getSomeReg x
1933 (regY, codeY) <- getSomeReg y
1935 code = codeX `appOL` codeY
1936 return (Amode (AddrRegReg regX regY) code)
1938 -- XXX Is this same as "leaf" in Stix?
1939 getAmode (CmmLit lit)
1941 tmp <- getNewRegNat I32
1943 code = unitOL (SETHI (HI imm__2) tmp)
1944 return (Amode (AddrRegImm tmp (LO imm__2)) code)
1946 imm__2 = litToImm lit
1950 (reg, code) <- getSomeReg other
1953 return (Amode (AddrRegImm reg off) code)
1955 #endif /* sparc_TARGET_ARCH */
1957 #ifdef powerpc_TARGET_ARCH
1958 getAmode (CmmMachOp (MO_Sub I32) [x, CmmLit (CmmInt i _)])
1959 | Just off <- makeImmediate I32 True (-i)
1961 (reg, code) <- getSomeReg x
1962 return (Amode (AddrRegImm reg off) code)
1965 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit (CmmInt i _)])
1966 | Just off <- makeImmediate I32 True i
1968 (reg, code) <- getSomeReg x
1969 return (Amode (AddrRegImm reg off) code)
1971 -- optimize addition with 32-bit immediate
1973 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit lit])
1975 tmp <- getNewRegNat I32
1976 (src, srcCode) <- getSomeReg x
1977 let imm = litToImm lit
1978 code = srcCode `snocOL` ADDIS tmp src (HA imm)
1979 return (Amode (AddrRegImm tmp (LO imm)) code)
1981 getAmode (CmmLit lit)
1983 tmp <- getNewRegNat I32
1984 let imm = litToImm lit
1985 code = unitOL (LIS tmp (HA imm))
1986 return (Amode (AddrRegImm tmp (LO imm)) code)
1988 getAmode (CmmMachOp (MO_Add I32) [x, y])
1990 (regX, codeX) <- getSomeReg x
1991 (regY, codeY) <- getSomeReg y
1992 return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
1996 (reg, code) <- getSomeReg other
1999 return (Amode (AddrRegImm reg off) code)
2000 #endif /* powerpc_TARGET_ARCH */
2002 -- -----------------------------------------------------------------------------
2003 -- getOperand: sometimes any operand will do.
2005 -- getNonClobberedOperand: the value of the operand will remain valid across
2006 -- the computation of an arbitrary expression, unless the expression
2007 -- is computed directly into a register which the operand refers to
2008 -- (see trivialCode where this function is used for an example).
2010 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2012 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2013 #if x86_64_TARGET_ARCH
2014 getNonClobberedOperand (CmmLit lit)
2015 | isSuitableFloatingPointLit lit = do
2016 lbl <- getNewLabelNat
2017 let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
2019 return (OpAddr (ripRel (ImmCLbl lbl)), code)
2021 getNonClobberedOperand (CmmLit lit)
2022 | is32BitLit lit && not (isFloatingRep (cmmLitRep lit)) =
2023 return (OpImm (litToImm lit), nilOL)
2024 getNonClobberedOperand (CmmLoad mem pk)
2025 | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2026 Amode src mem_code <- getAmode mem
2028 if (amodeCouldBeClobbered src)
2030 tmp <- getNewRegNat wordRep
2031 return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
2032 unitOL (LEA I32 (OpAddr src) (OpReg tmp)))
2035 return (OpAddr src', save_code `appOL` mem_code)
2036 getNonClobberedOperand e = do
2037 (reg, code) <- getNonClobberedReg e
2038 return (OpReg reg, code)
2040 amodeCouldBeClobbered :: AddrMode -> Bool
2041 amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
2043 regClobbered (RealReg rr) = isFastTrue (freeReg rr)
2044 regClobbered _ = False
2046 -- getOperand: the operand is not required to remain valid across the
2047 -- computation of an arbitrary expression.
2048 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2049 #if x86_64_TARGET_ARCH
2050 getOperand (CmmLit lit)
2051 | isSuitableFloatingPointLit lit = do
2052 lbl <- getNewLabelNat
2053 let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
2055 return (OpAddr (ripRel (ImmCLbl lbl)), code)
2057 getOperand (CmmLit lit)
2058 | is32BitLit lit && not (isFloatingRep (cmmLitRep lit)) = do
2059 return (OpImm (litToImm lit), nilOL)
2060 getOperand (CmmLoad mem pk)
2061 | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2062 Amode src mem_code <- getAmode mem
2063 return (OpAddr src, mem_code)
2065 (reg, code) <- getSomeReg e
2066 return (OpReg reg, code)
2068 isOperand :: CmmExpr -> Bool
2069 isOperand (CmmLoad _ _) = True
2070 isOperand (CmmLit lit) = is32BitLit lit
2071 || isSuitableFloatingPointLit lit
2074 -- if we want a floating-point literal as an operand, we can
2075 -- use it directly from memory. However, if the literal is
2076 -- zero, we're better off generating it into a register using
2078 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
2079 isSuitableFloatingPointLit _ = False
2081 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
2082 getRegOrMem (CmmLoad mem pk)
2083 | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2084 Amode src mem_code <- getAmode mem
2085 return (OpAddr src, mem_code)
2087 (reg, code) <- getNonClobberedReg e
2088 return (OpReg reg, code)
2090 #if x86_64_TARGET_ARCH
2091 is32BitLit (CmmInt i I64) = is32BitInteger i
2092 -- assume that labels are in the range 0-2^31-1: this assumes the
2093 -- small memory model (see gcc docs, -mcmodel=small).
2098 is32BitInteger :: Integer -> Bool
2099 is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000
2100 where i64 = fromIntegral i :: Int64
2101 -- a CmmInt is intended to be truncated to the appropriate
2102 -- number of bits, so here we truncate it to Int64. This is
2103 -- important because e.g. -1 as a CmmInt might be either
2104 -- -1 or 18446744073709551615.
2106 -- -----------------------------------------------------------------------------
2107 -- The 'CondCode' type: Condition codes passed up the tree.
2109 data CondCode = CondCode Bool Cond InstrBlock
2111 -- Set up a condition code for a conditional branch.
2113 getCondCode :: CmmExpr -> NatM CondCode
2115 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2117 #if alpha_TARGET_ARCH
2118 getCondCode = panic "MachCode.getCondCode: not on Alphas"
2119 #endif /* alpha_TARGET_ARCH */
2121 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2123 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
2124 -- yes, they really do seem to want exactly the same!
2126 getCondCode (CmmMachOp mop [x, y])
2129 MO_Eq F32 -> condFltCode EQQ x y
2130 MO_Ne F32 -> condFltCode NE x y
2132 MO_S_Gt F32 -> condFltCode GTT x y
2133 MO_S_Ge F32 -> condFltCode GE x y
2134 MO_S_Lt F32 -> condFltCode LTT x y
2135 MO_S_Le F32 -> condFltCode LE x y
2137 MO_Eq F64 -> condFltCode EQQ x y
2138 MO_Ne F64 -> condFltCode NE x y
2140 MO_S_Gt F64 -> condFltCode GTT x y
2141 MO_S_Ge F64 -> condFltCode GE x y
2142 MO_S_Lt F64 -> condFltCode LTT x y
2143 MO_S_Le F64 -> condFltCode LE x y
2145 MO_Eq rep -> condIntCode EQQ x y
2146 MO_Ne rep -> condIntCode NE x y
2148 MO_S_Gt rep -> condIntCode GTT x y
2149 MO_S_Ge rep -> condIntCode GE x y
2150 MO_S_Lt rep -> condIntCode LTT x y
2151 MO_S_Le rep -> condIntCode LE x y
2153 MO_U_Gt rep -> condIntCode GU x y
2154 MO_U_Ge rep -> condIntCode GEU x y
2155 MO_U_Lt rep -> condIntCode LU x y
2156 MO_U_Le rep -> condIntCode LEU x y
2158 other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y]))
2160 getCondCode other = pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
2162 #elif powerpc_TARGET_ARCH
2164 -- almost the same as everywhere else - but we need to
2165 -- extend small integers to 32 bit first
2167 getCondCode (CmmMachOp mop [x, y])
2169 MO_Eq F32 -> condFltCode EQQ x y
2170 MO_Ne F32 -> condFltCode NE x y
2172 MO_S_Gt F32 -> condFltCode GTT x y
2173 MO_S_Ge F32 -> condFltCode GE x y
2174 MO_S_Lt F32 -> condFltCode LTT x y
2175 MO_S_Le F32 -> condFltCode LE x y
2177 MO_Eq F64 -> condFltCode EQQ x y
2178 MO_Ne F64 -> condFltCode NE x y
2180 MO_S_Gt F64 -> condFltCode GTT x y
2181 MO_S_Ge F64 -> condFltCode GE x y
2182 MO_S_Lt F64 -> condFltCode LTT x y
2183 MO_S_Le F64 -> condFltCode LE x y
2185 MO_Eq rep -> condIntCode EQQ (extendUExpr rep x) (extendUExpr rep y)
2186 MO_Ne rep -> condIntCode NE (extendUExpr rep x) (extendUExpr rep y)
2188 MO_S_Gt rep -> condIntCode GTT (extendSExpr rep x) (extendSExpr rep y)
2189 MO_S_Ge rep -> condIntCode GE (extendSExpr rep x) (extendSExpr rep y)
2190 MO_S_Lt rep -> condIntCode LTT (extendSExpr rep x) (extendSExpr rep y)
2191 MO_S_Le rep -> condIntCode LE (extendSExpr rep x) (extendSExpr rep y)
2193 MO_U_Gt rep -> condIntCode GU (extendUExpr rep x) (extendUExpr rep y)
2194 MO_U_Ge rep -> condIntCode GEU (extendUExpr rep x) (extendUExpr rep y)
2195 MO_U_Lt rep -> condIntCode LU (extendUExpr rep x) (extendUExpr rep y)
2196 MO_U_Le rep -> condIntCode LEU (extendUExpr rep x) (extendUExpr rep y)
2198 other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
2200 getCondCode other = panic "getCondCode(2)(powerpc)"
2206 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
2207 -- passed back up the tree.
2209 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
2211 #if alpha_TARGET_ARCH
2212 condIntCode = panic "MachCode.condIntCode: not on Alphas"
2213 condFltCode = panic "MachCode.condFltCode: not on Alphas"
2214 #endif /* alpha_TARGET_ARCH */
2216 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2217 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2219 -- memory vs immediate
2220 condIntCode cond (CmmLoad x pk) (CmmLit lit) | is32BitLit lit = do
2221 Amode x_addr x_code <- getAmode x
2224 code = x_code `snocOL`
2225 CMP pk (OpImm imm) (OpAddr x_addr)
2227 return (CondCode False cond code)
2229 -- anything vs zero, using a mask
2230 -- TODO: Add some sanity checking!!!!
2231 condIntCode cond (CmmMachOp (MO_And rep) [x,o2]) (CmmLit (CmmInt 0 pk))
2232 | (CmmLit lit@(CmmInt mask pk2)) <- o2, is32BitLit lit
2234 (x_reg, x_code) <- getSomeReg x
2236 code = x_code `snocOL`
2237 TEST pk (OpImm (ImmInteger mask)) (OpReg x_reg)
2239 return (CondCode False cond code)
2242 condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
2243 (x_reg, x_code) <- getSomeReg x
2245 code = x_code `snocOL`
2246 TEST pk (OpReg x_reg) (OpReg x_reg)
2248 return (CondCode False cond code)
2250 -- anything vs operand
2251 condIntCode cond x y | isOperand y = do
2252 (x_reg, x_code) <- getNonClobberedReg x
2253 (y_op, y_code) <- getOperand y
2255 code = x_code `appOL` y_code `snocOL`
2256 CMP (cmmExprRep x) y_op (OpReg x_reg)
2258 return (CondCode False cond code)
2260 -- anything vs anything
2261 condIntCode cond x y = do
2262 (y_reg, y_code) <- getNonClobberedReg y
2263 (x_op, x_code) <- getRegOrMem x
2265 code = y_code `appOL`
2267 CMP (cmmExprRep x) (OpReg y_reg) x_op
2269 return (CondCode False cond code)
2272 #if i386_TARGET_ARCH
2273 condFltCode cond x y
2274 = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
2275 (x_reg, x_code) <- getNonClobberedReg x
2276 (y_reg, y_code) <- getSomeReg y
2278 code = x_code `appOL` y_code `snocOL`
2279 GCMP cond x_reg y_reg
2280 -- The GCMP insn does the test and sets the zero flag if comparable
2281 -- and true. Hence we always supply EQQ as the condition to test.
2282 return (CondCode True EQQ code)
2283 #endif /* i386_TARGET_ARCH */
2285 #if x86_64_TARGET_ARCH
2286 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
2287 -- an operand, but the right must be a reg. We can probably do better
2288 -- than this general case...
2289 condFltCode cond x y = do
2290 (x_reg, x_code) <- getNonClobberedReg x
2291 (y_op, y_code) <- getOperand y
2293 code = x_code `appOL`
2295 CMP (cmmExprRep x) y_op (OpReg x_reg)
2296 -- NB(1): we need to use the unsigned comparison operators on the
2297 -- result of this comparison.
2299 return (CondCode True (condToUnsigned cond) code)
2302 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2304 #if sparc_TARGET_ARCH
2306 condIntCode cond x (CmmLit (CmmInt y rep))
2309 (src1, code) <- getSomeReg x
2311 src2 = ImmInt (fromInteger y)
2312 code' = code `snocOL` SUB False True src1 (RIImm src2) g0
2313 return (CondCode False cond code')
2315 condIntCode cond x y = do
2316 (src1, code1) <- getSomeReg x
2317 (src2, code2) <- getSomeReg y
2319 code__2 = code1 `appOL` code2 `snocOL`
2320 SUB False True src1 (RIReg src2) g0
2321 return (CondCode False cond code__2)
2324 condFltCode cond x y = do
2325 (src1, code1) <- getSomeReg x
2326 (src2, code2) <- getSomeReg y
2327 tmp <- getNewRegNat F64
2329 promote x = FxTOy F32 F64 x tmp
2336 code1 `appOL` code2 `snocOL`
2337 FCMP True pk1 src1 src2
2338 else if pk1 == F32 then
2339 code1 `snocOL` promote src1 `appOL` code2 `snocOL`
2340 FCMP True F64 tmp src2
2342 code1 `appOL` code2 `snocOL` promote src2 `snocOL`
2343 FCMP True F64 src1 tmp
2344 return (CondCode True cond code__2)
2346 #endif /* sparc_TARGET_ARCH */
2348 #if powerpc_TARGET_ARCH
2349 -- ###FIXME: I16 and I8!
2350 condIntCode cond x (CmmLit (CmmInt y rep))
2351 | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
2353 (src1, code) <- getSomeReg x
2355 code' = code `snocOL`
2356 (if condUnsigned cond then CMPL else CMP) I32 src1 (RIImm src2)
2357 return (CondCode False cond code')
2359 condIntCode cond x y = do
2360 (src1, code1) <- getSomeReg x
2361 (src2, code2) <- getSomeReg y
2363 code' = code1 `appOL` code2 `snocOL`
2364 (if condUnsigned cond then CMPL else CMP) I32 src1 (RIReg src2)
2365 return (CondCode False cond code')
2367 condFltCode cond x y = do
2368 (src1, code1) <- getSomeReg x
2369 (src2, code2) <- getSomeReg y
2371 code' = code1 `appOL` code2 `snocOL` FCMP src1 src2
2372 code'' = case cond of -- twiddle CR to handle unordered case
2373 GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
2374 LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
2377 ltbit = 0 ; eqbit = 2 ; gtbit = 1
2378 return (CondCode True cond code'')
2380 #endif /* powerpc_TARGET_ARCH */
2382 -- -----------------------------------------------------------------------------
2383 -- Generating assignments
2385 -- Assignments are really at the heart of the whole code generation
2386 -- business. Almost all top-level nodes of any real importance are
2387 -- assignments, which correspond to loads, stores, or register
2388 -- transfers. If we're really lucky, some of the register transfers
2389 -- will go away, because we can use the destination register to
2390 -- complete the code generation for the right hand side. This only
2391 -- fails when the right hand side is forced into a fixed register
2392 -- (e.g. the result of a call).
2394 assignMem_IntCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2395 assignReg_IntCode :: MachRep -> CmmReg -> CmmExpr -> NatM InstrBlock
2397 assignMem_FltCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2398 assignReg_FltCode :: MachRep -> CmmReg -> CmmExpr -> NatM InstrBlock
2400 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2402 #if alpha_TARGET_ARCH
2404 assignIntCode pk (CmmLoad dst _) src
2405 = getNewRegNat IntRep `thenNat` \ tmp ->
2406 getAmode dst `thenNat` \ amode ->
2407 getRegister src `thenNat` \ register ->
2409 code1 = amodeCode amode []
2410 dst__2 = amodeAddr amode
2411 code2 = registerCode register tmp []
2412 src__2 = registerName register tmp
2413 sz = primRepToSize pk
2414 code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2418 assignIntCode pk dst src
2419 = getRegister dst `thenNat` \ register1 ->
2420 getRegister src `thenNat` \ register2 ->
2422 dst__2 = registerName register1 zeroh
2423 code = registerCode register2 dst__2
2424 src__2 = registerName register2 dst__2
2425 code__2 = if isFixed register2
2426 then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2)
2431 #endif /* alpha_TARGET_ARCH */
2433 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2435 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2437 -- integer assignment to memory
2439 -- specific case of adding/subtracting an integer to a particular address.
2440 -- ToDo: catch other cases where we can use an operation directly on a memory
2442 assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _,
2443 CmmLit (CmmInt i _)])
2444 | addr == addr2, pk /= I64 || is32BitInteger i,
2445 Just instr <- check op
2446 = do Amode amode code_addr <- getAmode addr
2447 let code = code_addr `snocOL`
2448 instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode)
2451 check (MO_Add _) = Just ADD
2452 check (MO_Sub _) = Just SUB
2457 assignMem_IntCode pk addr src = do
2458 Amode addr code_addr <- getAmode addr
2459 (code_src, op_src) <- get_op_RI src
2461 code = code_src `appOL`
2463 MOV pk op_src (OpAddr addr)
2464 -- NOTE: op_src is stable, so it will still be valid
2465 -- after code_addr. This may involve the introduction
2466 -- of an extra MOV to a temporary register, but we hope
2467 -- the register allocator will get rid of it.
2471 get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand) -- code, operator
2472 get_op_RI (CmmLit lit) | is32BitLit lit
2473 = return (nilOL, OpImm (litToImm lit))
2475 = do (reg,code) <- getNonClobberedReg op
2476 return (code, OpReg reg)
2479 -- Assign; dst is a reg, rhs is mem
2480 assignReg_IntCode pk reg (CmmLoad src _) = do
2481 load_code <- intLoadCode (MOV pk) src
2482 return (load_code (getRegisterReg reg))
2484 -- dst is a reg, but src could be anything
2485 assignReg_IntCode pk reg src = do
2486 code <- getAnyReg src
2487 return (code (getRegisterReg reg))
2489 #endif /* i386_TARGET_ARCH */
2491 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2493 #if sparc_TARGET_ARCH
2495 assignMem_IntCode pk addr src = do
2496 (srcReg, code) <- getSomeReg src
2497 Amode dstAddr addr_code <- getAmode addr
2498 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2500 assignReg_IntCode pk reg src = do
2501 r <- getRegister src
2503 Any _ code -> code dst
2504 Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg dst) freg
2506 dst = getRegisterReg reg
2509 #endif /* sparc_TARGET_ARCH */
2511 #if powerpc_TARGET_ARCH
2513 assignMem_IntCode pk addr src = do
2514 (srcReg, code) <- getSomeReg src
2515 Amode dstAddr addr_code <- getAmode addr
2516 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2518 -- dst is a reg, but src could be anything
2519 assignReg_IntCode pk reg src
2521 r <- getRegister src
2523 Any _ code -> code dst
2524 Fixed _ freg fcode -> fcode `snocOL` MR dst freg
2526 dst = getRegisterReg reg
2528 #endif /* powerpc_TARGET_ARCH */
2531 -- -----------------------------------------------------------------------------
2532 -- Floating-point assignments
2534 #if alpha_TARGET_ARCH
2536 assignFltCode pk (CmmLoad dst _) src
2537 = getNewRegNat pk `thenNat` \ tmp ->
2538 getAmode dst `thenNat` \ amode ->
2539 getRegister src `thenNat` \ register ->
2541 code1 = amodeCode amode []
2542 dst__2 = amodeAddr amode
2543 code2 = registerCode register tmp []
2544 src__2 = registerName register tmp
2545 sz = primRepToSize pk
2546 code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2550 assignFltCode pk dst src
2551 = getRegister dst `thenNat` \ register1 ->
2552 getRegister src `thenNat` \ register2 ->
2554 dst__2 = registerName register1 zeroh
2555 code = registerCode register2 dst__2
2556 src__2 = registerName register2 dst__2
2557 code__2 = if isFixed register2
2558 then code . mkSeqInstr (FMOV src__2 dst__2)
2563 #endif /* alpha_TARGET_ARCH */
2565 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2567 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2569 -- Floating point assignment to memory
2570 assignMem_FltCode pk addr src = do
2571 (src_reg, src_code) <- getNonClobberedReg src
2572 Amode addr addr_code <- getAmode addr
2574 code = src_code `appOL`
2576 IF_ARCH_i386(GST pk src_reg addr,
2577 MOV pk (OpReg src_reg) (OpAddr addr))
2580 -- Floating point assignment to a register/temporary
2581 assignReg_FltCode pk reg src = do
2582 src_code <- getAnyReg src
2583 return (src_code (getRegisterReg reg))
2585 #endif /* i386_TARGET_ARCH */
2587 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2589 #if sparc_TARGET_ARCH
2591 -- Floating point assignment to memory
2592 assignMem_FltCode pk addr src = do
2593 Amode dst__2 code1 <- getAmode addr
2594 (src__2, code2) <- getSomeReg src
2595 tmp1 <- getNewRegNat pk
2597 pk__2 = cmmExprRep src
2598 code__2 = code1 `appOL` code2 `appOL`
2600 then unitOL (ST pk src__2 dst__2)
2601 else toOL [FxTOy pk__2 pk src__2 tmp1, ST pk tmp1 dst__2]
2604 -- Floating point assignment to a register/temporary
2605 -- ToDo: Verify correctness
2606 assignReg_FltCode pk reg src = do
2607 r <- getRegister src
2608 v1 <- getNewRegNat pk
2610 Any _ code -> code dst
2611 Fixed _ freg fcode -> fcode `snocOL` FMOV pk freg v1
2613 dst = getRegisterReg reg
2615 #endif /* sparc_TARGET_ARCH */
2617 #if powerpc_TARGET_ARCH
2620 assignMem_FltCode = assignMem_IntCode
2621 assignReg_FltCode = assignReg_IntCode
2623 #endif /* powerpc_TARGET_ARCH */
2626 -- -----------------------------------------------------------------------------
2627 -- Generating an non-local jump
2629 -- (If applicable) Do not fill the delay slots here; you will confuse the
2630 -- register allocator.
2632 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
2634 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2636 #if alpha_TARGET_ARCH
2638 genJump (CmmLabel lbl)
2639 | isAsmTemp lbl = returnInstr (BR target)
2640 | otherwise = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0]
2642 target = ImmCLbl lbl
2645 = getRegister tree `thenNat` \ register ->
2646 getNewRegNat PtrRep `thenNat` \ tmp ->
2648 dst = registerName register pv
2649 code = registerCode register pv
2650 target = registerName register pv
2652 if isFixed register then
2653 returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0]
2655 return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0))
2657 #endif /* alpha_TARGET_ARCH */
2659 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2661 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2663 genJump (CmmLoad mem pk) = do
2664 Amode target code <- getAmode mem
2665 return (code `snocOL` JMP (OpAddr target))
2667 genJump (CmmLit lit) = do
2668 return (unitOL (JMP (OpImm (litToImm lit))))
2671 (reg,code) <- getSomeReg expr
2672 return (code `snocOL` JMP (OpReg reg))
2674 #endif /* i386_TARGET_ARCH */
2676 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2678 #if sparc_TARGET_ARCH
2680 genJump (CmmLit (CmmLabel lbl))
2681 = return (toOL [CALL (Left target) 0 True, NOP])
2683 target = ImmCLbl lbl
2687 (target, code) <- getSomeReg tree
2688 return (code `snocOL` JMP (AddrRegReg target g0) `snocOL` NOP)
2690 #endif /* sparc_TARGET_ARCH */
2692 #if powerpc_TARGET_ARCH
2693 genJump (CmmLit (CmmLabel lbl))
2694 = return (unitOL $ JMP lbl)
2698 (target,code) <- getSomeReg tree
2699 return (code `snocOL` MTCTR target `snocOL` BCTR [])
2700 #endif /* powerpc_TARGET_ARCH */
2703 -- -----------------------------------------------------------------------------
2704 -- Unconditional branches
2706 genBranch :: BlockId -> NatM InstrBlock
2708 genBranch = return . toOL . mkBranchInstr
2710 -- -----------------------------------------------------------------------------
2711 -- Conditional jumps
2714 Conditional jumps are always to local labels, so we can use branch
2715 instructions. We peek at the arguments to decide what kind of
2718 ALPHA: For comparisons with 0, we're laughing, because we can just do
2719 the desired conditional branch.
2721 I386: First, we have to ensure that the condition
2722 codes are set according to the supplied comparison operation.
2724 SPARC: First, we have to ensure that the condition codes are set
2725 according to the supplied comparison operation. We generate slightly
2726 different code for floating point comparisons, because a floating
2727 point operation cannot directly precede a @BF@. We assume the worst
2728 and fill that slot with a @NOP@.
2730 SPARC: Do not fill the delay slots here; you will confuse the register
2736 :: BlockId -- the branch target
2737 -> CmmExpr -- the condition on which to branch
2740 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2742 #if alpha_TARGET_ARCH
2744 genCondJump id (StPrim op [x, StInt 0])
2745 = getRegister x `thenNat` \ register ->
2746 getNewRegNat (registerRep register)
2749 code = registerCode register tmp
2750 value = registerName register tmp
2751 pk = registerRep register
2752 target = ImmCLbl lbl
2754 returnSeq code [BI (cmpOp op) value target]
2756 cmpOp CharGtOp = GTT
2758 cmpOp CharEqOp = EQQ
2760 cmpOp CharLtOp = LTT
2769 cmpOp WordGeOp = ALWAYS
2770 cmpOp WordEqOp = EQQ
2772 cmpOp WordLtOp = NEVER
2773 cmpOp WordLeOp = EQQ
2775 cmpOp AddrGeOp = ALWAYS
2776 cmpOp AddrEqOp = EQQ
2778 cmpOp AddrLtOp = NEVER
2779 cmpOp AddrLeOp = EQQ
2781 genCondJump lbl (StPrim op [x, StDouble 0.0])
2782 = getRegister x `thenNat` \ register ->
2783 getNewRegNat (registerRep register)
2786 code = registerCode register tmp
2787 value = registerName register tmp
2788 pk = registerRep register
2789 target = ImmCLbl lbl
2791 return (code . mkSeqInstr (BF (cmpOp op) value target))
2793 cmpOp FloatGtOp = GTT
2794 cmpOp FloatGeOp = GE
2795 cmpOp FloatEqOp = EQQ
2796 cmpOp FloatNeOp = NE
2797 cmpOp FloatLtOp = LTT
2798 cmpOp FloatLeOp = LE
2799 cmpOp DoubleGtOp = GTT
2800 cmpOp DoubleGeOp = GE
2801 cmpOp DoubleEqOp = EQQ
2802 cmpOp DoubleNeOp = NE
2803 cmpOp DoubleLtOp = LTT
2804 cmpOp DoubleLeOp = LE
2806 genCondJump lbl (StPrim op [x, y])
2808 = trivialFCode pr instr x y `thenNat` \ register ->
2809 getNewRegNat F64 `thenNat` \ tmp ->
2811 code = registerCode register tmp
2812 result = registerName register tmp
2813 target = ImmCLbl lbl
2815 return (code . mkSeqInstr (BF cond result target))
2817 pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
2819 fltCmpOp op = case op of
2833 (instr, cond) = case op of
2834 FloatGtOp -> (FCMP TF LE, EQQ)
2835 FloatGeOp -> (FCMP TF LTT, EQQ)
2836 FloatEqOp -> (FCMP TF EQQ, NE)
2837 FloatNeOp -> (FCMP TF EQQ, EQQ)
2838 FloatLtOp -> (FCMP TF LTT, NE)
2839 FloatLeOp -> (FCMP TF LE, NE)
2840 DoubleGtOp -> (FCMP TF LE, EQQ)
2841 DoubleGeOp -> (FCMP TF LTT, EQQ)
2842 DoubleEqOp -> (FCMP TF EQQ, NE)
2843 DoubleNeOp -> (FCMP TF EQQ, EQQ)
2844 DoubleLtOp -> (FCMP TF LTT, NE)
2845 DoubleLeOp -> (FCMP TF LE, NE)
2847 genCondJump lbl (StPrim op [x, y])
2848 = trivialCode instr x y `thenNat` \ register ->
2849 getNewRegNat IntRep `thenNat` \ tmp ->
2851 code = registerCode register tmp
2852 result = registerName register tmp
2853 target = ImmCLbl lbl
2855 return (code . mkSeqInstr (BI cond result target))
2857 (instr, cond) = case op of
2858 CharGtOp -> (CMP LE, EQQ)
2859 CharGeOp -> (CMP LTT, EQQ)
2860 CharEqOp -> (CMP EQQ, NE)
2861 CharNeOp -> (CMP EQQ, EQQ)
2862 CharLtOp -> (CMP LTT, NE)
2863 CharLeOp -> (CMP LE, NE)
2864 IntGtOp -> (CMP LE, EQQ)
2865 IntGeOp -> (CMP LTT, EQQ)
2866 IntEqOp -> (CMP EQQ, NE)
2867 IntNeOp -> (CMP EQQ, EQQ)
2868 IntLtOp -> (CMP LTT, NE)
2869 IntLeOp -> (CMP LE, NE)
2870 WordGtOp -> (CMP ULE, EQQ)
2871 WordGeOp -> (CMP ULT, EQQ)
2872 WordEqOp -> (CMP EQQ, NE)
2873 WordNeOp -> (CMP EQQ, EQQ)
2874 WordLtOp -> (CMP ULT, NE)
2875 WordLeOp -> (CMP ULE, NE)
2876 AddrGtOp -> (CMP ULE, EQQ)
2877 AddrGeOp -> (CMP ULT, EQQ)
2878 AddrEqOp -> (CMP EQQ, NE)
2879 AddrNeOp -> (CMP EQQ, EQQ)
2880 AddrLtOp -> (CMP ULT, NE)
2881 AddrLeOp -> (CMP ULE, NE)
2883 #endif /* alpha_TARGET_ARCH */
2885 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2887 #if i386_TARGET_ARCH
2889 genCondJump id bool = do
2890 CondCode _ cond code <- getCondCode bool
2891 return (code `snocOL` JXX cond id)
2895 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2897 #if x86_64_TARGET_ARCH
2899 genCondJump id bool = do
2900 CondCode is_float cond cond_code <- getCondCode bool
2903 return (cond_code `snocOL` JXX cond id)
2905 lbl <- getBlockIdNat
2907 -- see comment with condFltReg
2908 let code = case cond of
2914 plain_test = unitOL (
2917 or_unordered = toOL [
2921 and_ordered = toOL [
2927 return (cond_code `appOL` code)
2931 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2933 #if sparc_TARGET_ARCH
2935 genCondJump (BlockId id) bool = do
2936 CondCode is_float cond code <- getCondCode bool
2941 then [NOP, BF cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2942 else [BI cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2946 #endif /* sparc_TARGET_ARCH */
2949 #if powerpc_TARGET_ARCH
2951 genCondJump id bool = do
2952 CondCode is_float cond code <- getCondCode bool
2953 return (code `snocOL` BCC cond id)
2955 #endif /* powerpc_TARGET_ARCH */
2958 -- -----------------------------------------------------------------------------
2959 -- Generating C calls
2961 -- Now the biggest nightmare---calls. Most of the nastiness is buried in
2962 -- @get_arg@, which moves the arguments to the correct registers/stack
2963 -- locations. Apart from that, the code is easy.
2965 -- (If applicable) Do not fill the delay slots here; you will confuse the
2966 -- register allocator.
2969 :: CmmCallTarget -- function to call
2970 -> CmmFormals -- where to put the result
2971 -> CmmActuals -- arguments (of mixed type)
2974 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2976 #if alpha_TARGET_ARCH
2980 genCCall fn cconv result_regs args
2981 = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args
2982 `thenNat` \ ((unused,_), argCode) ->
2984 nRegs = length allArgRegs - length unused
2985 code = asmSeqThen (map ($ []) argCode)
2988 LDA pv (AddrImm (ImmLab (ptext fn))),
2989 JSR ra (AddrReg pv) nRegs,
2990 LDGP gp (AddrReg ra)]
2992 ------------------------
2993 {- Try to get a value into a specific register (or registers) for
2994 a call. The first 6 arguments go into the appropriate
2995 argument register (separate registers for integer and floating
2996 point arguments, but used in lock-step), and the remaining
2997 arguments are dumped to the stack, beginning at 0(sp). Our
2998 first argument is a pair of the list of remaining argument
2999 registers to be assigned for this call and the next stack
3000 offset to use for overflowing arguments. This way,
3001 @get_Arg@ can be applied to all of a call's arguments using
3005 :: ([(Reg,Reg)], Int) -- Argument registers and stack offset (accumulator)
3006 -> StixTree -- Current argument
3007 -> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code
3009 -- We have to use up all of our argument registers first...
3011 get_arg ((iDst,fDst):dsts, offset) arg
3012 = getRegister arg `thenNat` \ register ->
3014 reg = if isFloatingRep pk then fDst else iDst
3015 code = registerCode register reg
3016 src = registerName register reg
3017 pk = registerRep register
3020 if isFloatingRep pk then
3021 ((dsts, offset), if isFixed register then
3022 code . mkSeqInstr (FMOV src fDst)
3025 ((dsts, offset), if isFixed register then
3026 code . mkSeqInstr (OR src (RIReg src) iDst)
3029 -- Once we have run out of argument registers, we move to the
3032 get_arg ([], offset) arg
3033 = getRegister arg `thenNat` \ register ->
3034 getNewRegNat (registerRep register)
3037 code = registerCode register tmp
3038 src = registerName register tmp
3039 pk = registerRep register
3040 sz = primRepToSize pk
3042 return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset)))
3044 #endif /* alpha_TARGET_ARCH */
3046 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3048 #if i386_TARGET_ARCH
3050 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
3051 -- write barrier compiles to no code on x86/x86-64;
3052 -- we keep it this long in order to prevent earlier optimisations.
3054 -- we only cope with a single result for foreign calls
3055 genCCall (CmmPrim op) [CmmKinded r _] args = do
3056 l1 <- getNewLabelNat
3057 l2 <- getNewLabelNat
3059 MO_F32_Sqrt -> actuallyInlineFloatOp F32 (GSQRT F32) args
3060 MO_F64_Sqrt -> actuallyInlineFloatOp F64 (GSQRT F64) args
3062 MO_F32_Sin -> actuallyInlineFloatOp F32 (GSIN F32 l1 l2) args
3063 MO_F64_Sin -> actuallyInlineFloatOp F64 (GSIN F64 l1 l2) args
3065 MO_F32_Cos -> actuallyInlineFloatOp F32 (GCOS F32 l1 l2) args
3066 MO_F64_Cos -> actuallyInlineFloatOp F64 (GCOS F64 l1 l2) args
3068 MO_F32_Tan -> actuallyInlineFloatOp F32 (GTAN F32 l1 l2) args
3069 MO_F64_Tan -> actuallyInlineFloatOp F64 (GTAN F64 l1 l2) args
3071 other_op -> outOfLineFloatOp op r args
3073 actuallyInlineFloatOp rep instr [CmmKinded x _]
3074 = do res <- trivialUFCode rep instr x
3076 return (any (getRegisterReg (CmmLocal r)))
3078 genCCall target dest_regs args = do
3080 sizes = map (arg_size . cmmExprRep . kindlessCmm) (reverse args)
3081 #if !darwin_TARGET_OS
3082 tot_arg_size = sum sizes
3084 raw_arg_size = sum sizes
3085 tot_arg_size = roundTo 16 raw_arg_size
3086 arg_pad_size = tot_arg_size - raw_arg_size
3087 delta0 <- getDeltaNat
3088 setDeltaNat (delta0 - arg_pad_size)
3091 push_codes <- mapM push_arg (reverse args)
3092 delta <- getDeltaNat
3095 -- deal with static vs dynamic call targets
3096 (callinsns,cconv) <-
3099 CmmCallee (CmmLit (CmmLabel lbl)) conv
3100 -> -- ToDo: stdcall arg sizes
3101 return (unitOL (CALL (Left fn_imm) []), conv)
3102 where fn_imm = ImmCLbl lbl
3104 -> do (dyn_c, dyn_r, dyn_rep) <- get_op expr
3105 ASSERT(dyn_rep == I32)
3106 return (dyn_c `snocOL` CALL (Right dyn_r) [], conv)
3109 #if darwin_TARGET_OS
3111 = toOL [SUB I32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
3112 DELTA (delta0 - arg_pad_size)]
3113 `appOL` concatOL push_codes
3116 = concatOL push_codes
3117 call = callinsns `appOL`
3119 -- Deallocate parameters after call for ccall;
3120 -- but not for stdcall (callee does it)
3121 (if cconv == StdCallConv || tot_arg_size==0 then [] else
3122 [ADD I32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
3124 [DELTA (delta + tot_arg_size)]
3127 setDeltaNat (delta + tot_arg_size)
3130 -- assign the results, if necessary
3131 assign_code [] = nilOL
3132 assign_code [CmmKinded dest _hint] =
3134 I64 -> toOL [MOV I32 (OpReg eax) (OpReg r_dest),
3135 MOV I32 (OpReg edx) (OpReg r_dest_hi)]
3136 F32 -> unitOL (GMOV fake0 r_dest)
3137 F64 -> unitOL (GMOV fake0 r_dest)
3138 rep -> unitOL (MOV rep (OpReg eax) (OpReg r_dest))
3140 r_dest_hi = getHiVRegFromLo r_dest
3141 rep = localRegRep dest
3142 r_dest = getRegisterReg (CmmLocal dest)
3143 assign_code many = panic "genCCall.assign_code many"
3145 return (push_code `appOL`
3147 assign_code dest_regs)
3155 roundTo a x | x `mod` a == 0 = x
3156 | otherwise = x + a - (x `mod` a)
3159 push_arg :: (CmmKinded CmmExpr){-current argument-}
3160 -> NatM InstrBlock -- code
3162 push_arg (CmmKinded arg _hint) -- we don't need the hints on x86
3163 | arg_rep == I64 = do
3164 ChildCode64 code r_lo <- iselExpr64 arg
3165 delta <- getDeltaNat
3166 setDeltaNat (delta - 8)
3168 r_hi = getHiVRegFromLo r_lo
3170 return ( code `appOL`
3171 toOL [PUSH I32 (OpReg r_hi), DELTA (delta - 4),
3172 PUSH I32 (OpReg r_lo), DELTA (delta - 8),
3177 (code, reg, sz) <- get_op arg
3178 delta <- getDeltaNat
3179 let size = arg_size sz
3180 setDeltaNat (delta-size)
3181 if (case sz of F64 -> True; F32 -> True; _ -> False)
3182 then return (code `appOL`
3183 toOL [SUB I32 (OpImm (ImmInt size)) (OpReg esp),
3185 GST sz reg (AddrBaseIndex (EABaseReg esp)
3189 else return (code `snocOL`
3190 PUSH I32 (OpReg reg) `snocOL`
3194 arg_rep = cmmExprRep arg
3197 get_op :: CmmExpr -> NatM (InstrBlock, Reg, MachRep) -- code, reg, size
3199 (reg,code) <- getSomeReg op
3200 return (code, reg, cmmExprRep op)
3202 #endif /* i386_TARGET_ARCH */
3204 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3206 outOfLineFloatOp :: CallishMachOp -> CmmFormalWithoutKind -> CmmActuals
3208 outOfLineFloatOp mop res args
3210 dflags <- getDynFlagsNat
3211 targetExpr <- cmmMakeDynamicReference dflags addImportNat CallReference lbl
3212 let target = CmmCallee targetExpr CCallConv
3214 if localRegRep res == F64
3216 stmtToInstrs (CmmCall target [CmmKinded res FloatHint] args CmmUnsafe CmmMayReturn)
3220 tmp = LocalReg uq F64 GCKindNonPtr
3222 code1 <- stmtToInstrs (CmmCall target [CmmKinded tmp FloatHint] args CmmUnsafe CmmMayReturn)
3223 code2 <- stmtToInstrs (CmmAssign (CmmLocal res) (CmmReg (CmmLocal tmp)))
3224 return (code1 `appOL` code2)
3226 lbl = mkForeignLabel fn Nothing False
3229 MO_F32_Sqrt -> fsLit "sqrtf"
3230 MO_F32_Sin -> fsLit "sinf"
3231 MO_F32_Cos -> fsLit "cosf"
3232 MO_F32_Tan -> fsLit "tanf"
3233 MO_F32_Exp -> fsLit "expf"
3234 MO_F32_Log -> fsLit "logf"
3236 MO_F32_Asin -> fsLit "asinf"
3237 MO_F32_Acos -> fsLit "acosf"
3238 MO_F32_Atan -> fsLit "atanf"
3240 MO_F32_Sinh -> fsLit "sinhf"
3241 MO_F32_Cosh -> fsLit "coshf"
3242 MO_F32_Tanh -> fsLit "tanhf"
3243 MO_F32_Pwr -> fsLit "powf"
3245 MO_F64_Sqrt -> fsLit "sqrt"
3246 MO_F64_Sin -> fsLit "sin"
3247 MO_F64_Cos -> fsLit "cos"
3248 MO_F64_Tan -> fsLit "tan"
3249 MO_F64_Exp -> fsLit "exp"
3250 MO_F64_Log -> fsLit "log"
3252 MO_F64_Asin -> fsLit "asin"
3253 MO_F64_Acos -> fsLit "acos"
3254 MO_F64_Atan -> fsLit "atan"
3256 MO_F64_Sinh -> fsLit "sinh"
3257 MO_F64_Cosh -> fsLit "cosh"
3258 MO_F64_Tanh -> fsLit "tanh"
3259 MO_F64_Pwr -> fsLit "pow"
3261 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
3263 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3265 #if x86_64_TARGET_ARCH
3267 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
3268 -- write barrier compiles to no code on x86/x86-64;
3269 -- we keep it this long in order to prevent earlier optimisations.
3272 genCCall (CmmPrim op) [CmmKinded r _] args =
3273 outOfLineFloatOp op r args
3275 genCCall target dest_regs args = do
3277 -- load up the register arguments
3278 (stack_args, aregs, fregs, load_args_code)
3279 <- load_args args allArgRegs allFPArgRegs nilOL
3282 fp_regs_used = reverse (drop (length fregs) (reverse allFPArgRegs))
3283 int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
3284 arg_regs = [eax] ++ int_regs_used ++ fp_regs_used
3285 -- for annotating the call instruction with
3287 sse_regs = length fp_regs_used
3289 tot_arg_size = arg_size * length stack_args
3291 -- On entry to the called function, %rsp should be aligned
3292 -- on a 16-byte boundary +8 (i.e. the first stack arg after
3293 -- the return address is 16-byte aligned). In STG land
3294 -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
3295 -- need to make sure we push a multiple of 16-bytes of args,
3296 -- plus the return address, to get the correct alignment.
3297 -- Urg, this is hard. We need to feed the delta back into
3298 -- the arg pushing code.
3299 (real_size, adjust_rsp) <-
3300 if tot_arg_size `rem` 16 == 0
3301 then return (tot_arg_size, nilOL)
3302 else do -- we need to adjust...
3303 delta <- getDeltaNat
3304 setDeltaNat (delta-8)
3305 return (tot_arg_size+8, toOL [
3306 SUB I64 (OpImm (ImmInt 8)) (OpReg rsp),
3310 -- push the stack args, right to left
3311 push_code <- push_args (reverse stack_args) nilOL
3312 delta <- getDeltaNat
3314 -- deal with static vs dynamic call targets
3315 (callinsns,cconv) <-
3318 CmmCallee (CmmLit (CmmLabel lbl)) conv
3319 -> -- ToDo: stdcall arg sizes
3320 return (unitOL (CALL (Left fn_imm) arg_regs), conv)
3321 where fn_imm = ImmCLbl lbl
3323 -> do (dyn_r, dyn_c) <- getSomeReg expr
3324 return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
3327 -- The x86_64 ABI requires us to set %al to the number of SSE
3328 -- registers that contain arguments, if the called routine
3329 -- is a varargs function. We don't know whether it's a
3330 -- varargs function or not, so we have to assume it is.
3332 -- It's not safe to omit this assignment, even if the number
3333 -- of SSE regs in use is zero. If %al is larger than 8
3334 -- on entry to a varargs function, seg faults ensue.
3335 assign_eax n = unitOL (MOV I32 (OpImm (ImmInt n)) (OpReg eax))
3337 let call = callinsns `appOL`
3339 -- Deallocate parameters after call for ccall;
3340 -- but not for stdcall (callee does it)
3341 (if cconv == StdCallConv || real_size==0 then [] else
3342 [ADD wordRep (OpImm (ImmInt real_size)) (OpReg esp)])
3344 [DELTA (delta + real_size)]
3347 setDeltaNat (delta + real_size)
3350 -- assign the results, if necessary
3351 assign_code [] = nilOL
3352 assign_code [CmmKinded dest _hint] =
3354 F32 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3355 F64 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3356 rep -> unitOL (MOV rep (OpReg rax) (OpReg r_dest))
3358 rep = localRegRep dest
3359 r_dest = getRegisterReg (CmmLocal dest)
3360 assign_code many = panic "genCCall.assign_code many"
3362 return (load_args_code `appOL`
3365 assign_eax sse_regs `appOL`
3367 assign_code dest_regs)
3370 arg_size = 8 -- always, at the mo
3372 load_args :: [CmmKinded CmmExpr]
3373 -> [Reg] -- int regs avail for args
3374 -> [Reg] -- FP regs avail for args
3376 -> NatM ([CmmKinded CmmExpr],[Reg],[Reg],InstrBlock)
3377 load_args args [] [] code = return (args, [], [], code)
3378 -- no more regs to use
3379 load_args [] aregs fregs code = return ([], aregs, fregs, code)
3380 -- no more args to push
3381 load_args ((CmmKinded arg hint) : rest) aregs fregs code
3382 | isFloatingRep arg_rep =
3386 arg_code <- getAnyReg arg
3387 load_args rest aregs rs (code `appOL` arg_code r)
3392 arg_code <- getAnyReg arg
3393 load_args rest rs fregs (code `appOL` arg_code r)
3395 arg_rep = cmmExprRep arg
3398 (args',ars,frs,code') <- load_args rest aregs fregs code
3399 return ((CmmKinded arg hint):args', ars, frs, code')
3401 push_args [] code = return code
3402 push_args ((CmmKinded arg hint):rest) code
3403 | isFloatingRep arg_rep = do
3404 (arg_reg, arg_code) <- getSomeReg arg
3405 delta <- getDeltaNat
3406 setDeltaNat (delta-arg_size)
3407 let code' = code `appOL` arg_code `appOL` toOL [
3408 SUB wordRep (OpImm (ImmInt arg_size)) (OpReg rsp) ,
3409 DELTA (delta-arg_size),
3410 MOV arg_rep (OpReg arg_reg) (OpAddr (spRel 0))]
3411 push_args rest code'
3414 -- we only ever generate word-sized function arguments. Promotion
3415 -- has already happened: our Int8# type is kept sign-extended
3416 -- in an Int#, for example.
3417 ASSERT(arg_rep == I64) return ()
3418 (arg_op, arg_code) <- getOperand arg
3419 delta <- getDeltaNat
3420 setDeltaNat (delta-arg_size)
3421 let code' = code `appOL` toOL [PUSH I64 arg_op,
3422 DELTA (delta-arg_size)]
3423 push_args rest code'
3425 arg_rep = cmmExprRep arg
3428 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3430 #if sparc_TARGET_ARCH
3432 The SPARC calling convention is an absolute
3433 nightmare. The first 6x32 bits of arguments are mapped into
3434 %o0 through %o5, and the remaining arguments are dumped to the
3435 stack, beginning at [%sp+92]. (Note that %o6 == %sp.)
3437 If we have to put args on the stack, move %o6==%sp down by
3438 the number of words to go on the stack, to ensure there's enough space.
3440 According to Fraser and Hanson's lcc book, page 478, fig 17.2,
3441 16 words above the stack pointer is a word for the address of
3442 a structure return value. I use this as a temporary location
3443 for moving values from float to int regs. Certainly it isn't
3444 safe to put anything in the 16 words starting at %sp, since
3445 this area can get trashed at any time due to window overflows
3446 caused by signal handlers.
3448 A final complication (if the above isn't enough) is that
3449 we can't blithely calculate the arguments one by one into
3450 %o0 .. %o5. Consider the following nested calls:
3454 Naive code moves a into %o0, and (fff b c) into %o1. Unfortunately
3455 the inner call will itself use %o0, which trashes the value put there
3456 in preparation for the outer call. Upshot: we need to calculate the
3457 args into temporary regs, and move those to arg regs or onto the
3458 stack only immediately prior to the call proper. Sigh.
3461 genCCall target dest_regs argsAndHints = do
3463 args = map kindlessCmm argsAndHints
3464 argcode_and_vregs <- mapM arg_to_int_vregs args
3466 (argcodes, vregss) = unzip argcode_and_vregs
3467 n_argRegs = length allArgRegs
3468 n_argRegs_used = min (length vregs) n_argRegs
3469 vregs = concat vregss
3470 -- deal with static vs dynamic call targets
3471 callinsns <- (case target of
3472 CmmCallee (CmmLit (CmmLabel lbl)) conv -> do
3473 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3474 CmmCallee expr conv -> do
3475 (dyn_c, [dyn_r]) <- arg_to_int_vregs expr
3476 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3478 (res, reduce) <- outOfLineFloatOp mop
3479 lblOrMopExpr <- case res of
3481 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3483 (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
3484 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3485 if reduce then panic "genCCall(sparc): can not reduce" else return lblOrMopExpr
3489 argcode = concatOL argcodes
3490 (move_sp_down, move_sp_up)
3491 = let diff = length vregs - n_argRegs
3492 nn = if odd diff then diff + 1 else diff -- keep 8-byte alignment
3495 else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
3497 = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
3498 return (argcode `appOL`
3499 move_sp_down `appOL`
3500 transfer_code `appOL`
3505 -- move args from the integer vregs into which they have been
3506 -- marshalled, into %o0 .. %o5, and the rest onto the stack.
3507 move_final :: [Reg] -> [Reg] -> Int -> [Instr]
3509 move_final [] _ offset -- all args done
3512 move_final (v:vs) [] offset -- out of aregs; move to stack
3513 = ST I32 v (spRel offset)
3514 : move_final vs [] (offset+1)
3516 move_final (v:vs) (a:az) offset -- move into an arg (%o[0..5]) reg
3517 = OR False g0 (RIReg v) a
3518 : move_final vs az offset
3520 -- generate code to calculate an argument, and move it into one
3521 -- or two integer vregs.
3522 arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
3523 arg_to_int_vregs arg
3524 | (cmmExprRep arg) == I64
3526 (ChildCode64 code r_lo) <- iselExpr64 arg
3528 r_hi = getHiVRegFromLo r_lo
3529 return (code, [r_hi, r_lo])
3532 (src, code) <- getSomeReg arg
3533 tmp <- getNewRegNat (cmmExprRep arg)
3538 v1 <- getNewRegNat I32
3539 v2 <- getNewRegNat I32
3542 FMOV F64 src f0 `snocOL`
3543 ST F32 f0 (spRel 16) `snocOL`
3544 LD I32 (spRel 16) v1 `snocOL`
3545 ST F32 (fPair f0) (spRel 16) `snocOL`
3546 LD I32 (spRel 16) v2
3551 v1 <- getNewRegNat I32
3554 ST F32 src (spRel 16) `snocOL`
3555 LD I32 (spRel 16) v1
3560 v1 <- getNewRegNat I32
3562 code `snocOL` OR False g0 (RIReg src) v1
3566 outOfLineFloatOp mop =
3568 dflags <- getDynFlagsNat
3569 mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference $
3570 mkForeignLabel functionName Nothing True
3571 let mopLabelOrExpr = case mopExpr of
3572 CmmLit (CmmLabel lbl) -> Left lbl
3574 return (mopLabelOrExpr, reduce)
3576 (reduce, functionName) = case mop of
3577 MO_F32_Exp -> (True, fsLit "exp")
3578 MO_F32_Log -> (True, fsLit "log")
3579 MO_F32_Sqrt -> (True, fsLit "sqrt")
3581 MO_F32_Sin -> (True, fsLit "sin")
3582 MO_F32_Cos -> (True, fsLit "cos")
3583 MO_F32_Tan -> (True, fsLit "tan")
3585 MO_F32_Asin -> (True, fsLit "asin")
3586 MO_F32_Acos -> (True, fsLit "acos")
3587 MO_F32_Atan -> (True, fsLit "atan")
3589 MO_F32_Sinh -> (True, fsLit "sinh")
3590 MO_F32_Cosh -> (True, fsLit "cosh")
3591 MO_F32_Tanh -> (True, fsLit "tanh")
3593 MO_F64_Exp -> (False, fsLit "exp")
3594 MO_F64_Log -> (False, fsLit "log")
3595 MO_F64_Sqrt -> (False, fsLit "sqrt")
3597 MO_F64_Sin -> (False, fsLit "sin")
3598 MO_F64_Cos -> (False, fsLit "cos")
3599 MO_F64_Tan -> (False, fsLit "tan")
3601 MO_F64_Asin -> (False, fsLit "asin")
3602 MO_F64_Acos -> (False, fsLit "acos")
3603 MO_F64_Atan -> (False, fsLit "atan")
3605 MO_F64_Sinh -> (False, fsLit "sinh")
3606 MO_F64_Cosh -> (False, fsLit "cosh")
3607 MO_F64_Tanh -> (False, fsLit "tanh")
3609 other -> pprPanic "outOfLineFloatOp(sparc) "
3610 (pprCallishMachOp mop)
3612 #endif /* sparc_TARGET_ARCH */
3614 #if powerpc_TARGET_ARCH
3616 #if darwin_TARGET_OS || linux_TARGET_OS
3618 The PowerPC calling convention for Darwin/Mac OS X
3619 is described in Apple's document
3620 "Inside Mac OS X - Mach-O Runtime Architecture".
3622 PowerPC Linux uses the System V Release 4 Calling Convention
3623 for PowerPC. It is described in the
3624 "System V Application Binary Interface PowerPC Processor Supplement".
3626 Both conventions are similar:
3627 Parameters may be passed in general-purpose registers starting at r3, in
3628 floating point registers starting at f1, or on the stack.
3630 But there are substantial differences:
3631 * The number of registers used for parameter passing and the exact set of
3632 nonvolatile registers differs (see MachRegs.lhs).
3633 * On Darwin, stack space is always reserved for parameters, even if they are
3634 passed in registers. The called routine may choose to save parameters from
3635 registers to the corresponding space on the stack.
3636 * On Darwin, a corresponding amount of GPRs is skipped when a floating point
3637 parameter is passed in an FPR.
3638 * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
3639 starting with an odd-numbered GPR. It may skip a GPR to achieve this.
3640 Darwin just treats an I64 like two separate I32s (high word first).
3641 * I64 and F64 arguments are 8-byte aligned on the stack for SysV, but only
3642 4-byte aligned like everything else on Darwin.
3643 * The SysV spec claims that F32 is represented as F64 on the stack. GCC on
3644 PowerPC Linux does not agree, so neither do we.
3646 According to both conventions, The parameter area should be part of the
3647 caller's stack frame, allocated in the caller's prologue code (large enough
3648 to hold the parameter lists for all called routines). The NCG already
3649 uses the stack for register spilling, leaving 64 bytes free at the top.
3650 If we need a larger parameter area than that, we just allocate a new stack
3651 frame just before ccalling.
3655 genCCall (CmmPrim MO_WriteBarrier) _ _
3656 = return $ unitOL LWSYNC
3658 genCCall target dest_regs argsAndHints
3659 = ASSERT (not $ any (`elem` [I8,I16]) argReps)
3660 -- we rely on argument promotion in the codeGen
3662 (finalStack,passArgumentsCode,usedRegs) <- passArguments
3664 allArgRegs allFPArgRegs
3668 (labelOrExpr, reduceToF32) <- case target of
3669 CmmCallee (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
3670 CmmCallee expr conv -> return (Right expr, False)
3671 CmmPrim mop -> outOfLineFloatOp mop
3673 let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
3674 codeAfter = move_sp_up finalStack `appOL` moveResult reduceToF32
3679 `snocOL` BL lbl usedRegs
3682 (dynReg, dynCode) <- getSomeReg dyn
3684 `snocOL` MTCTR dynReg
3686 `snocOL` BCTRL usedRegs
3689 #if darwin_TARGET_OS
3690 initialStackOffset = 24
3691 -- size of linkage area + size of arguments, in bytes
3692 stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
3693 map machRepByteWidth argReps
3694 #elif linux_TARGET_OS
3695 initialStackOffset = 8
3696 stackDelta finalStack = roundTo 16 finalStack
3698 args = map kindlessCmm argsAndHints
3699 argReps = map cmmExprRep args
3701 roundTo a x | x `mod` a == 0 = x
3702 | otherwise = x + a - (x `mod` a)
3704 move_sp_down finalStack
3706 toOL [STU I32 sp (AddrRegImm sp (ImmInt (-delta))),
3709 where delta = stackDelta finalStack
3710 move_sp_up finalStack
3712 toOL [ADD sp sp (RIImm (ImmInt delta)),
3715 where delta = stackDelta finalStack
3718 passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
3719 passArguments ((arg,I64):args) gprs fprs stackOffset
3720 accumCode accumUsed =
3722 ChildCode64 code vr_lo <- iselExpr64 arg
3723 let vr_hi = getHiVRegFromLo vr_lo
3725 #if darwin_TARGET_OS
3730 (accumCode `appOL` code
3731 `snocOL` storeWord vr_hi gprs stackOffset
3732 `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
3733 ((take 2 gprs) ++ accumUsed)
3735 storeWord vr (gpr:_) offset = MR gpr vr
3736 storeWord vr [] offset = ST I32 vr (AddrRegImm sp (ImmInt offset))
3738 #elif linux_TARGET_OS
3739 let stackOffset' = roundTo 8 stackOffset
3740 stackCode = accumCode `appOL` code
3741 `snocOL` ST I32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
3742 `snocOL` ST I32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
3743 regCode hireg loreg =
3744 accumCode `appOL` code
3745 `snocOL` MR hireg vr_hi
3746 `snocOL` MR loreg vr_lo
3749 hireg : loreg : regs | even (length gprs) ->
3750 passArguments args regs fprs stackOffset
3751 (regCode hireg loreg) (hireg : loreg : accumUsed)
3752 _skipped : hireg : loreg : regs ->
3753 passArguments args regs fprs stackOffset
3754 (regCode hireg loreg) (hireg : loreg : accumUsed)
3755 _ -> -- only one or no regs left
3756 passArguments args [] fprs (stackOffset'+8)
3760 passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
3761 | reg : _ <- regs = do
3762 register <- getRegister arg
3763 let code = case register of
3764 Fixed _ freg fcode -> fcode `snocOL` MR reg freg
3765 Any _ acode -> acode reg
3769 #if darwin_TARGET_OS
3770 -- The Darwin ABI requires that we reserve stack slots for register parameters
3771 (stackOffset + stackBytes)
3772 #elif linux_TARGET_OS
3773 -- ... the SysV ABI doesn't.
3776 (accumCode `appOL` code)
3779 (vr, code) <- getSomeReg arg
3783 (stackOffset' + stackBytes)
3784 (accumCode `appOL` code `snocOL` ST rep vr stackSlot)
3787 #if darwin_TARGET_OS
3788 -- stackOffset is at least 4-byte aligned
3789 -- The Darwin ABI is happy with that.
3790 stackOffset' = stackOffset
3792 -- ... the SysV ABI requires 8-byte alignment for doubles.
3793 stackOffset' | rep == F64 = roundTo 8 stackOffset
3794 | otherwise = stackOffset
3796 stackSlot = AddrRegImm sp (ImmInt stackOffset')
3797 (nGprs, nFprs, stackBytes, regs) = case rep of
3798 I32 -> (1, 0, 4, gprs)
3799 #if darwin_TARGET_OS
3800 -- The Darwin ABI requires that we skip a corresponding number of GPRs when
3802 F32 -> (1, 1, 4, fprs)
3803 F64 -> (2, 1, 8, fprs)
3804 #elif linux_TARGET_OS
3805 -- ... the SysV ABI doesn't.
3806 F32 -> (0, 1, 4, fprs)
3807 F64 -> (0, 1, 8, fprs)
3810 moveResult reduceToF32 =
3813 [CmmKinded dest _hint]
3814 | reduceToF32 && rep == F32 -> unitOL (FRSP r_dest f1)
3815 | rep == F32 || rep == F64 -> unitOL (MR r_dest f1)
3816 | rep == I64 -> toOL [MR (getHiVRegFromLo r_dest) r3,
3818 | otherwise -> unitOL (MR r_dest r3)
3819 where rep = cmmRegRep (CmmLocal dest)
3820 r_dest = getRegisterReg (CmmLocal dest)
3822 outOfLineFloatOp mop =
3824 dflags <- getDynFlagsNat
3825 mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference $
3826 mkForeignLabel functionName Nothing True
3827 let mopLabelOrExpr = case mopExpr of
3828 CmmLit (CmmLabel lbl) -> Left lbl
3830 return (mopLabelOrExpr, reduce)
3832 (functionName, reduce) = case mop of
3833 MO_F32_Exp -> (fsLit "exp", True)
3834 MO_F32_Log -> (fsLit "log", True)
3835 MO_F32_Sqrt -> (fsLit "sqrt", True)
3837 MO_F32_Sin -> (fsLit "sin", True)
3838 MO_F32_Cos -> (fsLit "cos", True)
3839 MO_F32_Tan -> (fsLit "tan", True)
3841 MO_F32_Asin -> (fsLit "asin", True)
3842 MO_F32_Acos -> (fsLit "acos", True)
3843 MO_F32_Atan -> (fsLit "atan", True)
3845 MO_F32_Sinh -> (fsLit "sinh", True)
3846 MO_F32_Cosh -> (fsLit "cosh", True)
3847 MO_F32_Tanh -> (fsLit "tanh", True)
3848 MO_F32_Pwr -> (fsLit "pow", True)
3850 MO_F64_Exp -> (fsLit "exp", False)
3851 MO_F64_Log -> (fsLit "log", False)
3852 MO_F64_Sqrt -> (fsLit "sqrt", False)
3854 MO_F64_Sin -> (fsLit "sin", False)
3855 MO_F64_Cos -> (fsLit "cos", False)
3856 MO_F64_Tan -> (fsLit "tan", False)
3858 MO_F64_Asin -> (fsLit "asin", False)
3859 MO_F64_Acos -> (fsLit "acos", False)
3860 MO_F64_Atan -> (fsLit "atan", False)
3862 MO_F64_Sinh -> (fsLit "sinh", False)
3863 MO_F64_Cosh -> (fsLit "cosh", False)
3864 MO_F64_Tanh -> (fsLit "tanh", False)
3865 MO_F64_Pwr -> (fsLit "pow", False)
3866 other -> pprPanic "genCCall(ppc): unknown callish op"
3867 (pprCallishMachOp other)
3869 #endif /* darwin_TARGET_OS || linux_TARGET_OS */
3871 #endif /* powerpc_TARGET_ARCH */
3874 -- -----------------------------------------------------------------------------
3875 -- Generating a table-branch
3877 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
3879 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3883 (reg,e_code) <- getSomeReg expr
3884 lbl <- getNewLabelNat
3885 dflags <- getDynFlagsNat
3886 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
3887 (tableReg,t_code) <- getSomeReg $ dynRef
3889 jumpTable = map jumpTableEntryRel ids
3891 jumpTableEntryRel Nothing
3892 = CmmStaticLit (CmmInt 0 wordRep)
3893 jumpTableEntryRel (Just (BlockId id))
3894 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3895 where blockLabel = mkAsmTempLabel id
3897 op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
3898 (EAIndex reg wORD_SIZE) (ImmInt 0))
3900 #if x86_64_TARGET_ARCH
3901 #if darwin_TARGET_OS
3902 -- on Mac OS X/x86_64, put the jump table in the text section
3903 -- to work around a limitation of the linker.
3904 -- ld64 is unable to handle the relocations for
3906 -- if L0 is not preceded by a non-anonymous label in its section.
3908 code = e_code `appOL` t_code `appOL` toOL [
3909 ADD wordRep op (OpReg tableReg),
3910 JMP_TBL (OpReg tableReg) [ id | Just id <- ids ],
3911 LDATA Text (CmmDataLabel lbl : jumpTable)
3914 -- HACK: On x86_64 binutils<2.17 is only able to generate PC32
3915 -- relocations, hence we only get 32-bit offsets in the jump
3916 -- table. As these offsets are always negative we need to properly
3917 -- sign extend them to 64-bit. This hack should be removed in
3918 -- conjunction with the hack in PprMach.hs/pprDataItem once
3919 -- binutils 2.17 is standard.
3920 code = e_code `appOL` t_code `appOL` toOL [
3921 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3923 (OpAddr (AddrBaseIndex (EABaseReg tableReg)
3924 (EAIndex reg wORD_SIZE) (ImmInt 0)))
3926 ADD wordRep (OpReg reg) (OpReg tableReg),
3927 JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
3931 code = e_code `appOL` t_code `appOL` toOL [
3932 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3933 ADD wordRep op (OpReg tableReg),
3934 JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
3940 (reg,e_code) <- getSomeReg expr
3941 lbl <- getNewLabelNat
3943 jumpTable = map jumpTableEntry ids
3944 op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
3945 code = e_code `appOL` toOL [
3946 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3947 JMP_TBL op [ id | Just id <- ids ]
3951 #elif powerpc_TARGET_ARCH
3955 (reg,e_code) <- getSomeReg expr
3956 tmp <- getNewRegNat I32
3957 lbl <- getNewLabelNat
3958 dflags <- getDynFlagsNat
3959 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
3960 (tableReg,t_code) <- getSomeReg $ dynRef
3962 jumpTable = map jumpTableEntryRel ids
3964 jumpTableEntryRel Nothing
3965 = CmmStaticLit (CmmInt 0 wordRep)
3966 jumpTableEntryRel (Just (BlockId id))
3967 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3968 where blockLabel = mkAsmTempLabel id
3970 code = e_code `appOL` t_code `appOL` toOL [
3971 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3972 SLW tmp reg (RIImm (ImmInt 2)),
3973 LD I32 tmp (AddrRegReg tableReg tmp),
3974 ADD tmp tmp (RIReg tableReg),
3976 BCTR [ id | Just id <- ids ]
3981 (reg,e_code) <- getSomeReg expr
3982 tmp <- getNewRegNat I32
3983 lbl <- getNewLabelNat
3985 jumpTable = map jumpTableEntry ids
3987 code = e_code `appOL` toOL [
3988 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3989 SLW tmp reg (RIImm (ImmInt 2)),
3990 ADDIS tmp tmp (HA (ImmCLbl lbl)),
3991 LD I32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
3993 BCTR [ id | Just id <- ids ]
3997 #error "ToDo: genSwitch"
4000 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordRep)
4001 jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
4002 where blockLabel = mkAsmTempLabel id
4004 -- -----------------------------------------------------------------------------
4006 -- -----------------------------------------------------------------------------
4009 -- -----------------------------------------------------------------------------
4010 -- 'condIntReg' and 'condFltReg': condition codes into registers
4012 -- Turn those condition codes into integers now (when they appear on
4013 -- the right hand side of an assignment).
4015 -- (If applicable) Do not fill the delay slots here; you will confuse the
4016 -- register allocator.
4018 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
4020 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4022 #if alpha_TARGET_ARCH
4023 condIntReg = panic "MachCode.condIntReg (not on Alpha)"
4024 condFltReg = panic "MachCode.condFltReg (not on Alpha)"
4025 #endif /* alpha_TARGET_ARCH */
4027 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4029 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4031 condIntReg cond x y = do
4032 CondCode _ cond cond_code <- condIntCode cond x y
4033 tmp <- getNewRegNat I8
4035 code dst = cond_code `appOL` toOL [
4036 SETCC cond (OpReg tmp),
4037 MOVZxL I8 (OpReg tmp) (OpReg dst)
4040 return (Any I32 code)
4044 #if i386_TARGET_ARCH
4046 condFltReg cond x y = do
4047 CondCode _ cond cond_code <- condFltCode cond x y
4048 tmp <- getNewRegNat I8
4050 code dst = cond_code `appOL` toOL [
4051 SETCC cond (OpReg tmp),
4052 MOVZxL I8 (OpReg tmp) (OpReg dst)
4055 return (Any I32 code)
4059 #if x86_64_TARGET_ARCH
4061 condFltReg cond x y = do
4062 CondCode _ cond cond_code <- condFltCode cond x y
4063 tmp1 <- getNewRegNat wordRep
4064 tmp2 <- getNewRegNat wordRep
4066 -- We have to worry about unordered operands (eg. comparisons
4067 -- against NaN). If the operands are unordered, the comparison
4068 -- sets the parity flag, carry flag and zero flag.
4069 -- All comparisons are supposed to return false for unordered
4070 -- operands except for !=, which returns true.
4072 -- Optimisation: we don't have to test the parity flag if we
4073 -- know the test has already excluded the unordered case: eg >
4074 -- and >= test for a zero carry flag, which can only occur for
4075 -- ordered operands.
4077 -- ToDo: by reversing comparisons we could avoid testing the
4078 -- parity flag in more cases.
4083 NE -> or_unordered dst
4084 GU -> plain_test dst
4085 GEU -> plain_test dst
4086 _ -> and_ordered dst)
4088 plain_test dst = toOL [
4089 SETCC cond (OpReg tmp1),
4090 MOVZxL I8 (OpReg tmp1) (OpReg dst)
4092 or_unordered dst = toOL [
4093 SETCC cond (OpReg tmp1),
4094 SETCC PARITY (OpReg tmp2),
4095 OR I8 (OpReg tmp1) (OpReg tmp2),
4096 MOVZxL I8 (OpReg tmp2) (OpReg dst)
4098 and_ordered dst = toOL [
4099 SETCC cond (OpReg tmp1),
4100 SETCC NOTPARITY (OpReg tmp2),
4101 AND I8 (OpReg tmp1) (OpReg tmp2),
4102 MOVZxL I8 (OpReg tmp2) (OpReg dst)
4105 return (Any I32 code)
4109 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4111 #if sparc_TARGET_ARCH
4113 condIntReg EQQ x (CmmLit (CmmInt 0 d)) = do
4114 (src, code) <- getSomeReg x
4115 tmp <- getNewRegNat I32
4117 code__2 dst = code `appOL` toOL [
4118 SUB False True g0 (RIReg src) g0,
4119 SUB True False g0 (RIImm (ImmInt (-1))) dst]
4120 return (Any I32 code__2)
4122 condIntReg EQQ x y = do
4123 (src1, code1) <- getSomeReg x
4124 (src2, code2) <- getSomeReg y
4125 tmp1 <- getNewRegNat I32
4126 tmp2 <- getNewRegNat I32
4128 code__2 dst = code1 `appOL` code2 `appOL` toOL [
4129 XOR False src1 (RIReg src2) dst,
4130 SUB False True g0 (RIReg dst) g0,
4131 SUB True False g0 (RIImm (ImmInt (-1))) dst]
4132 return (Any I32 code__2)
4134 condIntReg NE x (CmmLit (CmmInt 0 d)) = do
4135 (src, code) <- getSomeReg x
4136 tmp <- getNewRegNat I32
4138 code__2 dst = code `appOL` toOL [
4139 SUB False True g0 (RIReg src) g0,
4140 ADD True False g0 (RIImm (ImmInt 0)) dst]
4141 return (Any I32 code__2)
4143 condIntReg NE x y = do
4144 (src1, code1) <- getSomeReg x
4145 (src2, code2) <- getSomeReg y
4146 tmp1 <- getNewRegNat I32
4147 tmp2 <- getNewRegNat I32
4149 code__2 dst = code1 `appOL` code2 `appOL` toOL [
4150 XOR False src1 (RIReg src2) dst,
4151 SUB False True g0 (RIReg dst) g0,
4152 ADD True False g0 (RIImm (ImmInt 0)) dst]
4153 return (Any I32 code__2)
4155 condIntReg cond x y = do
4156 BlockId lbl1 <- getBlockIdNat
4157 BlockId lbl2 <- getBlockIdNat
4158 CondCode _ cond cond_code <- condIntCode cond x y
4160 code__2 dst = cond_code `appOL` toOL [
4161 BI cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
4162 OR False g0 (RIImm (ImmInt 0)) dst,
4163 BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
4164 NEWBLOCK (BlockId lbl1),
4165 OR False g0 (RIImm (ImmInt 1)) dst,
4166 NEWBLOCK (BlockId lbl2)]
4167 return (Any I32 code__2)
4169 condFltReg cond x y = do
4170 BlockId lbl1 <- getBlockIdNat
4171 BlockId lbl2 <- getBlockIdNat
4172 CondCode _ cond cond_code <- condFltCode cond x y
4174 code__2 dst = cond_code `appOL` toOL [
4176 BF cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
4177 OR False g0 (RIImm (ImmInt 0)) dst,
4178 BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
4179 NEWBLOCK (BlockId lbl1),
4180 OR False g0 (RIImm (ImmInt 1)) dst,
4181 NEWBLOCK (BlockId lbl2)]
4182 return (Any I32 code__2)
4184 #endif /* sparc_TARGET_ARCH */
4186 #if powerpc_TARGET_ARCH
4187 condReg getCond = do
4188 lbl1 <- getBlockIdNat
4189 lbl2 <- getBlockIdNat
4190 CondCode _ cond cond_code <- getCond
4192 {- code dst = cond_code `appOL` toOL [
4201 code dst = cond_code
4205 RLWINM dst dst (bit + 1) 31 31
4208 negate_code | do_negate = unitOL (CRNOR bit bit bit)
4211 (bit, do_negate) = case cond of
4225 return (Any I32 code)
4227 condIntReg cond x y = condReg (condIntCode cond x y)
4228 condFltReg cond x y = condReg (condFltCode cond x y)
4229 #endif /* powerpc_TARGET_ARCH */
4232 -- -----------------------------------------------------------------------------
4233 -- 'trivial*Code': deal with trivial instructions
4235 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
4236 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
4237 -- Only look for constants on the right hand side, because that's
4238 -- where the generic optimizer will have put them.
4240 -- Similarly, for unary instructions, we don't have to worry about
4241 -- matching an StInt as the argument, because genericOpt will already
4242 -- have handled the constant-folding.
4246 -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
4247 ,IF_ARCH_i386 ((Operand -> Operand -> Instr)
4248 -> Maybe (Operand -> Operand -> Instr)
4249 ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr)
4250 -> Maybe (Operand -> Operand -> Instr)
4251 ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
4252 ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
4254 -> CmmExpr -> CmmExpr -- the two arguments
4257 #ifndef powerpc_TARGET_ARCH
4260 -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
4261 ,IF_ARCH_sparc((MachRep -> Reg -> Reg -> Reg -> Instr)
4262 ,IF_ARCH_i386 ((MachRep -> Reg -> Reg -> Reg -> Instr)
4263 ,IF_ARCH_x86_64 ((MachRep -> Operand -> Operand -> Instr)
4265 -> CmmExpr -> CmmExpr -- the two arguments
4271 -> IF_ARCH_alpha((RI -> Reg -> Instr)
4272 ,IF_ARCH_i386 ((Operand -> Instr)
4273 ,IF_ARCH_x86_64 ((Operand -> Instr)
4274 ,IF_ARCH_sparc((RI -> Reg -> Instr)
4275 ,IF_ARCH_powerpc((Reg -> Reg -> Instr)
4277 -> CmmExpr -- the one argument
4280 #ifndef powerpc_TARGET_ARCH
4283 -> IF_ARCH_alpha((Reg -> Reg -> Instr)
4284 ,IF_ARCH_i386 ((Reg -> Reg -> Instr)
4285 ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr)
4286 ,IF_ARCH_sparc((Reg -> Reg -> Instr)
4288 -> CmmExpr -- the one argument
4292 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4294 #if alpha_TARGET_ARCH
4296 trivialCode instr x (StInt y)
4298 = getRegister x `thenNat` \ register ->
4299 getNewRegNat IntRep `thenNat` \ tmp ->
4301 code = registerCode register tmp
4302 src1 = registerName register tmp
4303 src2 = ImmInt (fromInteger y)
4304 code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst)
4306 return (Any IntRep code__2)
4308 trivialCode instr x y
4309 = getRegister x `thenNat` \ register1 ->
4310 getRegister y `thenNat` \ register2 ->
4311 getNewRegNat IntRep `thenNat` \ tmp1 ->
4312 getNewRegNat IntRep `thenNat` \ tmp2 ->
4314 code1 = registerCode register1 tmp1 []
4315 src1 = registerName register1 tmp1
4316 code2 = registerCode register2 tmp2 []
4317 src2 = registerName register2 tmp2
4318 code__2 dst = asmSeqThen [code1, code2] .
4319 mkSeqInstr (instr src1 (RIReg src2) dst)
4321 return (Any IntRep code__2)
4324 trivialUCode instr x
4325 = getRegister x `thenNat` \ register ->
4326 getNewRegNat IntRep `thenNat` \ tmp ->
4328 code = registerCode register tmp
4329 src = registerName register tmp
4330 code__2 dst = code . mkSeqInstr (instr (RIReg src) dst)
4332 return (Any IntRep code__2)
4335 trivialFCode _ instr x y
4336 = getRegister x `thenNat` \ register1 ->
4337 getRegister y `thenNat` \ register2 ->
4338 getNewRegNat F64 `thenNat` \ tmp1 ->
4339 getNewRegNat F64 `thenNat` \ tmp2 ->
4341 code1 = registerCode register1 tmp1
4342 src1 = registerName register1 tmp1
4344 code2 = registerCode register2 tmp2
4345 src2 = registerName register2 tmp2
4347 code__2 dst = asmSeqThen [code1 [], code2 []] .
4348 mkSeqInstr (instr src1 src2 dst)
4350 return (Any F64 code__2)
4352 trivialUFCode _ instr x
4353 = getRegister x `thenNat` \ register ->
4354 getNewRegNat F64 `thenNat` \ tmp ->
4356 code = registerCode register tmp
4357 src = registerName register tmp
4358 code__2 dst = code . mkSeqInstr (instr src dst)
4360 return (Any F64 code__2)
4362 #endif /* alpha_TARGET_ARCH */
4364 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4366 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4369 The Rules of the Game are:
4371 * You cannot assume anything about the destination register dst;
4372 it may be anything, including a fixed reg.
4374 * You may compute an operand into a fixed reg, but you may not
4375 subsequently change the contents of that fixed reg. If you
4376 want to do so, first copy the value either to a temporary
4377 or into dst. You are free to modify dst even if it happens
4378 to be a fixed reg -- that's not your problem.
4380 * You cannot assume that a fixed reg will stay live over an
4381 arbitrary computation. The same applies to the dst reg.
4383 * Temporary regs obtained from getNewRegNat are distinct from
4384 each other and from all other regs, and stay live over
4385 arbitrary computations.
4387 --------------------
4389 SDM's version of The Rules:
4391 * If getRegister returns Any, that means it can generate correct
4392 code which places the result in any register, period. Even if that
4393 register happens to be read during the computation.
4395 Corollary #1: this means that if you are generating code for an
4396 operation with two arbitrary operands, you cannot assign the result
4397 of the first operand into the destination register before computing
4398 the second operand. The second operand might require the old value
4399 of the destination register.
4401 Corollary #2: A function might be able to generate more efficient
4402 code if it knows the destination register is a new temporary (and
4403 therefore not read by any of the sub-computations).
4405 * If getRegister returns Any, then the code it generates may modify only:
4406 (a) fresh temporaries
4407 (b) the destination register
4408 (c) known registers (eg. %ecx is used by shifts)
4409 In particular, it may *not* modify global registers, unless the global
4410 register happens to be the destination register.
4413 trivialCode rep instr (Just revinstr) (CmmLit lit_a) b
4414 | is32BitLit lit_a = do
4415 b_code <- getAnyReg b
4418 = b_code dst `snocOL`
4419 revinstr (OpImm (litToImm lit_a)) (OpReg dst)
4421 return (Any rep code)
4423 trivialCode rep instr maybe_revinstr a b = genTrivialCode rep instr a b
4425 -- This is re-used for floating pt instructions too.
4426 genTrivialCode rep instr a b = do
4427 (b_op, b_code) <- getNonClobberedOperand b
4428 a_code <- getAnyReg a
4429 tmp <- getNewRegNat rep
4431 -- We want the value of b to stay alive across the computation of a.
4432 -- But, we want to calculate a straight into the destination register,
4433 -- because the instruction only has two operands (dst := dst `op` src).
4434 -- The troublesome case is when the result of b is in the same register
4435 -- as the destination reg. In this case, we have to save b in a
4436 -- new temporary across the computation of a.
4438 | dst `regClashesWithOp` b_op =
4440 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
4442 instr (OpReg tmp) (OpReg dst)
4446 instr b_op (OpReg dst)
4448 return (Any rep code)
4450 reg `regClashesWithOp` OpReg reg2 = reg == reg2
4451 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
4452 reg `regClashesWithOp` _ = False
4456 trivialUCode rep instr x = do
4457 x_code <- getAnyReg x
4463 return (Any rep code)
4467 #if i386_TARGET_ARCH
4469 trivialFCode pk instr x y = do
4470 (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
4471 (y_reg, y_code) <- getSomeReg y
4476 instr pk x_reg y_reg dst
4478 return (Any pk code)
4482 #if x86_64_TARGET_ARCH
4484 trivialFCode pk instr x y = genTrivialCode pk (instr pk) x y
4490 trivialUFCode rep instr x = do
4491 (x_reg, x_code) <- getSomeReg x
4497 return (Any rep code)
4499 #endif /* i386_TARGET_ARCH */
4501 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4503 #if sparc_TARGET_ARCH
4505 trivialCode pk instr x (CmmLit (CmmInt y d))
4508 (src1, code) <- getSomeReg x
4509 tmp <- getNewRegNat I32
4511 src2 = ImmInt (fromInteger y)
4512 code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
4513 return (Any I32 code__2)
4515 trivialCode pk instr x y = do
4516 (src1, code1) <- getSomeReg x
4517 (src2, code2) <- getSomeReg y
4518 tmp1 <- getNewRegNat I32
4519 tmp2 <- getNewRegNat I32
4521 code__2 dst = code1 `appOL` code2 `snocOL`
4522 instr src1 (RIReg src2) dst
4523 return (Any I32 code__2)
4526 trivialFCode pk instr x y = do
4527 (src1, code1) <- getSomeReg x
4528 (src2, code2) <- getSomeReg y
4529 tmp1 <- getNewRegNat (cmmExprRep x)
4530 tmp2 <- getNewRegNat (cmmExprRep y)
4531 tmp <- getNewRegNat F64
4533 promote x = FxTOy F32 F64 x tmp
4540 code1 `appOL` code2 `snocOL`
4541 instr pk src1 src2 dst
4542 else if pk1 == F32 then
4543 code1 `snocOL` promote src1 `appOL` code2 `snocOL`
4544 instr F64 tmp src2 dst
4546 code1 `appOL` code2 `snocOL` promote src2 `snocOL`
4547 instr F64 src1 tmp dst
4548 return (Any (if pk1 == pk2 then pk1 else F64) code__2)
4551 trivialUCode pk instr x = do
4552 (src, code) <- getSomeReg x
4553 tmp <- getNewRegNat pk
4555 code__2 dst = code `snocOL` instr (RIReg src) dst
4556 return (Any pk code__2)
4559 trivialUFCode pk instr x = do
4560 (src, code) <- getSomeReg x
4561 tmp <- getNewRegNat pk
4563 code__2 dst = code `snocOL` instr src dst
4564 return (Any pk code__2)
4566 #endif /* sparc_TARGET_ARCH */
4568 #if powerpc_TARGET_ARCH
4571 Wolfgang's PowerPC version of The Rules:
4573 A slightly modified version of The Rules to take advantage of the fact
4574 that PowerPC instructions work on all registers and don't implicitly
4575 clobber any fixed registers.
4577 * The only expression for which getRegister returns Fixed is (CmmReg reg).
4579 * If getRegister returns Any, then the code it generates may modify only:
4580 (a) fresh temporaries
4581 (b) the destination register
4582 It may *not* modify global registers, unless the global
4583 register happens to be the destination register.
4584 It may not clobber any other registers. In fact, only ccalls clobber any
4586 Also, it may not modify the counter register (used by genCCall).
4588 Corollary: If a getRegister for a subexpression returns Fixed, you need
4589 not move it to a fresh temporary before evaluating the next subexpression.
4590 The Fixed register won't be modified.
4591 Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
4593 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
4594 the value of the destination register.
4597 trivialCode rep signed instr x (CmmLit (CmmInt y _))
4598 | Just imm <- makeImmediate rep signed y
4600 (src1, code1) <- getSomeReg x
4601 let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
4602 return (Any rep code)
4604 trivialCode rep signed instr x y = do
4605 (src1, code1) <- getSomeReg x
4606 (src2, code2) <- getSomeReg y
4607 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
4608 return (Any rep code)
4610 trivialCodeNoImm :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4611 -> CmmExpr -> CmmExpr -> NatM Register
4612 trivialCodeNoImm rep instr x y = do
4613 (src1, code1) <- getSomeReg x
4614 (src2, code2) <- getSomeReg y
4615 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
4616 return (Any rep code)
4618 trivialUCode rep instr x = do
4619 (src, code) <- getSomeReg x
4620 let code' dst = code `snocOL` instr dst src
4621 return (Any rep code')
4623 -- There is no "remainder" instruction on the PPC, so we have to do
4625 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
4627 remainderCode :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4628 -> CmmExpr -> CmmExpr -> NatM Register
4629 remainderCode rep div x y = do
4630 (src1, code1) <- getSomeReg x
4631 (src2, code2) <- getSomeReg y
4632 let code dst = code1 `appOL` code2 `appOL` toOL [
4634 MULLW dst dst (RIReg src2),
4637 return (Any rep code)
4639 #endif /* powerpc_TARGET_ARCH */
4642 -- -----------------------------------------------------------------------------
4643 -- Coercing to/from integer/floating-point...
4645 -- When going to integer, we truncate (round towards 0).
4647 -- @coerce(Int2FP|FP2Int)@ are more complicated integer/float
4648 -- conversions. We have to store temporaries in memory to move
4649 -- between the integer and the floating point register sets.
4651 -- @coerceDbl2Flt@ and @coerceFlt2Dbl@ are done this way because we
4652 -- pretend, on sparc at least, that double and float regs are seperate
4653 -- kinds, so the value has to be computed into one kind before being
4654 -- explicitly "converted" to live in the other kind.
4656 coerceInt2FP :: MachRep -> MachRep -> CmmExpr -> NatM Register
4657 coerceFP2Int :: MachRep -> MachRep -> CmmExpr -> NatM Register
4659 #if sparc_TARGET_ARCH
4660 coerceDbl2Flt :: CmmExpr -> NatM Register
4661 coerceFlt2Dbl :: CmmExpr -> NatM Register
4664 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4666 #if alpha_TARGET_ARCH
4669 = getRegister x `thenNat` \ register ->
4670 getNewRegNat IntRep `thenNat` \ reg ->
4672 code = registerCode register reg
4673 src = registerName register reg
4675 code__2 dst = code . mkSeqInstrs [
4677 LD TF dst (spRel 0),
4680 return (Any F64 code__2)
4684 = getRegister x `thenNat` \ register ->
4685 getNewRegNat F64 `thenNat` \ tmp ->
4687 code = registerCode register tmp
4688 src = registerName register tmp
4690 code__2 dst = code . mkSeqInstrs [
4692 ST TF tmp (spRel 0),
4695 return (Any IntRep code__2)
4697 #endif /* alpha_TARGET_ARCH */
4699 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4701 #if i386_TARGET_ARCH
4703 coerceInt2FP from to x = do
4704 (x_reg, x_code) <- getSomeReg x
4706 opc = case to of F32 -> GITOF; F64 -> GITOD
4707 code dst = x_code `snocOL` opc x_reg dst
4708 -- ToDo: works for non-I32 reps?
4710 return (Any to code)
4714 coerceFP2Int from to x = do
4715 (x_reg, x_code) <- getSomeReg x
4717 opc = case from of F32 -> GFTOI; F64 -> GDTOI
4718 code dst = x_code `snocOL` opc x_reg dst
4719 -- ToDo: works for non-I32 reps?
4721 return (Any to code)
4723 #endif /* i386_TARGET_ARCH */
4725 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4727 #if x86_64_TARGET_ARCH
4729 coerceFP2Int from to x = do
4730 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
4732 opc = case from of F32 -> CVTTSS2SIQ; F64 -> CVTTSD2SIQ
4733 code dst = x_code `snocOL` opc x_op dst
4735 return (Any to code) -- works even if the destination rep is <I32
4737 coerceInt2FP from to x = do
4738 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
4740 opc = case to of F32 -> CVTSI2SS; F64 -> CVTSI2SD
4741 code dst = x_code `snocOL` opc x_op dst
4743 return (Any to code) -- works even if the destination rep is <I32
4745 coerceFP2FP :: MachRep -> CmmExpr -> NatM Register
4746 coerceFP2FP to x = do
4747 (x_reg, x_code) <- getSomeReg x
4749 opc = case to of F32 -> CVTSD2SS; F64 -> CVTSS2SD
4750 code dst = x_code `snocOL` opc x_reg dst
4752 return (Any to code)
4756 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4758 #if sparc_TARGET_ARCH
4760 coerceInt2FP pk1 pk2 x = do
4761 (src, code) <- getSomeReg x
4763 code__2 dst = code `appOL` toOL [
4764 ST pk1 src (spRel (-2)),
4765 LD pk1 (spRel (-2)) dst,
4766 FxTOy pk1 pk2 dst dst]
4767 return (Any pk2 code__2)
4770 coerceFP2Int pk fprep x = do
4771 (src, code) <- getSomeReg x
4772 reg <- getNewRegNat fprep
4773 tmp <- getNewRegNat pk
4775 code__2 dst = ASSERT(fprep == F64 || fprep == F32)
4777 FxTOy fprep pk src tmp,
4778 ST pk tmp (spRel (-2)),
4779 LD pk (spRel (-2)) dst]
4780 return (Any pk code__2)
4783 coerceDbl2Flt x = do
4784 (src, code) <- getSomeReg x
4785 return (Any F32 (\dst -> code `snocOL` FxTOy F64 F32 src dst))
4788 coerceFlt2Dbl x = do
4789 (src, code) <- getSomeReg x
4790 return (Any F64 (\dst -> code `snocOL` FxTOy F32 F64 src dst))
4792 #endif /* sparc_TARGET_ARCH */
4794 #if powerpc_TARGET_ARCH
4795 coerceInt2FP fromRep toRep x = do
4796 (src, code) <- getSomeReg x
4797 lbl <- getNewLabelNat
4798 itmp <- getNewRegNat I32
4799 ftmp <- getNewRegNat F64
4800 dflags <- getDynFlagsNat
4801 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
4802 Amode addr addr_code <- getAmode dynRef
4804 code' dst = code `appOL` maybe_exts `appOL` toOL [
4807 CmmStaticLit (CmmInt 0x43300000 I32),
4808 CmmStaticLit (CmmInt 0x80000000 I32)],
4809 XORIS itmp src (ImmInt 0x8000),
4810 ST I32 itmp (spRel 3),
4811 LIS itmp (ImmInt 0x4330),
4812 ST I32 itmp (spRel 2),
4813 LD F64 ftmp (spRel 2)
4814 ] `appOL` addr_code `appOL` toOL [
4816 FSUB F64 dst ftmp dst
4817 ] `appOL` maybe_frsp dst
4819 maybe_exts = case fromRep of
4820 I8 -> unitOL $ EXTS I8 src src
4821 I16 -> unitOL $ EXTS I16 src src
4823 maybe_frsp dst = case toRep of
4824 F32 -> unitOL $ FRSP dst dst
4826 return (Any toRep code')
4828 coerceFP2Int fromRep toRep x = do
4829 -- the reps don't really matter: F*->F64 and I32->I* are no-ops
4830 (src, code) <- getSomeReg x
4831 tmp <- getNewRegNat F64
4833 code' dst = code `appOL` toOL [
4834 -- convert to int in FP reg
4836 -- store value (64bit) from FP to stack
4837 ST F64 tmp (spRel 2),
4838 -- read low word of value (high word is undefined)
4839 LD I32 dst (spRel 3)]
4840 return (Any toRep code')
4841 #endif /* powerpc_TARGET_ARCH */
4844 -- -----------------------------------------------------------------------------
4845 -- eXTRA_STK_ARGS_HERE
4847 -- We (allegedly) put the first six C-call arguments in registers;
4848 -- where do we start putting the rest of them?
4850 -- Moved from MachInstrs (SDM):
4852 #if alpha_TARGET_ARCH || sparc_TARGET_ARCH
4853 eXTRA_STK_ARGS_HERE :: Int
4855 = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???))