2 -- The above warning supression flag is a temporary kludge.
3 -- While working on this module you are encouraged to remove it and fix
4 -- any warnings in the module. See
5 -- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
8 -----------------------------------------------------------------------------
10 -- Generating machine code (instruction selection)
12 -- (c) The University of Glasgow 1996-2004
14 -----------------------------------------------------------------------------
16 -- This is a big module, but, if you pay attention to
17 -- (a) the sectioning, (b) the type signatures, and
18 -- (c) the #if blah_TARGET_ARCH} things, the
19 -- structure should not be too overwhelming.
21 module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where
23 #include "HsVersions.h"
24 #include "nativeGen/NCG.h"
31 import PositionIndependentCode
32 import RegAllocInfo ( mkBranchInstr )
34 -- Our intermediate code:
35 import PprCmm ( pprExpr )
39 import ClosureInfo ( C_SRT(..) )
42 import StaticFlags ( opt_PIC )
43 import ForeignCall ( CCallConv(..) )
48 import FastBool ( isFastTrue )
49 import Constants ( wORD_SIZE )
51 import Debug.Trace ( trace )
53 import Control.Monad ( mapAndUnzipM )
54 import Data.Maybe ( fromJust )
59 -- -----------------------------------------------------------------------------
60 -- Top-level of the instruction selector
62 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
63 -- They are really trees of insns to facilitate fast appending, where a
64 -- left-to-right traversal (pre-order?) yields the insns in the correct
67 type InstrBlock = OrdList Instr
69 cmmTopCodeGen :: RawCmmTop -> NatM [NatCmmTop]
70 cmmTopCodeGen (CmmProc info lab params (ListGraph blocks)) = do
71 (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
72 picBaseMb <- getPicBaseMaybeNat
73 let proc = CmmProc info lab params (ListGraph $ concat nat_blocks)
74 tops = proc : concat statics
76 Just picBase -> initializePicBase picBase tops
77 Nothing -> return tops
79 cmmTopCodeGen (CmmData sec dat) = do
80 return [CmmData sec dat] -- no translation, we just use CmmStatic
82 basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop])
83 basicBlockCodeGen (BasicBlock id stmts) = do
84 instrs <- stmtsToInstrs stmts
85 -- code generation may introduce new basic block boundaries, which
86 -- are indicated by the NEWBLOCK instruction. We must split up the
87 -- instruction stream into basic blocks again. Also, we extract
90 (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
92 mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
93 = ([], BasicBlock id instrs : blocks, statics)
94 mkBlocks (LDATA sec dat) (instrs,blocks,statics)
95 = (instrs, blocks, CmmData sec dat:statics)
96 mkBlocks instr (instrs,blocks,statics)
97 = (instr:instrs, blocks, statics)
99 return (BasicBlock id top : other_blocks, statics)
101 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
103 = do instrss <- mapM stmtToInstrs stmts
104 return (concatOL instrss)
106 stmtToInstrs :: CmmStmt -> NatM InstrBlock
107 stmtToInstrs stmt = case stmt of
108 CmmNop -> return nilOL
109 CmmComment s -> return (unitOL (COMMENT s))
112 | isFloatingRep kind -> assignReg_FltCode kind reg src
113 #if WORD_SIZE_IN_BITS==32
114 | kind == I64 -> assignReg_I64Code reg src
116 | otherwise -> assignReg_IntCode kind reg src
117 where kind = cmmRegRep reg
120 | isFloatingRep kind -> assignMem_FltCode kind addr src
121 #if WORD_SIZE_IN_BITS==32
122 | kind == I64 -> assignMem_I64Code addr src
124 | otherwise -> assignMem_IntCode kind addr src
125 where kind = cmmExprRep src
127 CmmCall target result_regs args _ _
128 -> genCCall target result_regs args
130 CmmBranch id -> genBranch id
131 CmmCondBranch arg id -> genCondJump id arg
132 CmmSwitch arg ids -> genSwitch arg ids
133 CmmJump arg params -> genJump arg
135 panic "stmtToInstrs: return statement should have been cps'd away"
137 -- -----------------------------------------------------------------------------
138 -- General things for putting together code sequences
140 -- Expand CmmRegOff. ToDo: should we do it this way around, or convert
141 -- CmmExprs into CmmRegOff?
142 mangleIndexTree :: CmmExpr -> CmmExpr
143 mangleIndexTree (CmmRegOff reg off)
144 = CmmMachOp (MO_Add rep) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) rep)]
145 where rep = cmmRegRep reg
147 -- -----------------------------------------------------------------------------
148 -- Code gen for 64-bit arithmetic on 32-bit platforms
151 Simple support for generating 64-bit code (ie, 64 bit values and 64
152 bit assignments) on 32-bit platforms. Unlike the main code generator
153 we merely shoot for generating working code as simply as possible, and
154 pay little attention to code quality. Specifically, there is no
155 attempt to deal cleverly with the fixed-vs-floating register
156 distinction; all values are generated into (pairs of) floating
157 registers, even if this would mean some redundant reg-reg moves as a
158 result. Only one of the VRegUniques is returned, since it will be
159 of the VRegUniqueLo form, and the upper-half VReg can be determined
160 by applying getHiVRegFromLo to it.
163 data ChildCode64 -- a.k.a "Register64"
166 Reg -- the lower 32-bit temporary which contains the
167 -- result; use getHiVRegFromLo to find the other
168 -- VRegUnique. Rules of this simplified insn
169 -- selection game are therefore that the returned
170 -- Reg may be modified
172 #if WORD_SIZE_IN_BITS==32
173 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
174 assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
177 #ifndef x86_64_TARGET_ARCH
178 iselExpr64 :: CmmExpr -> NatM ChildCode64
181 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
185 assignMem_I64Code addrTree valueTree = do
186 Amode addr addr_code <- getAmode addrTree
187 ChildCode64 vcode rlo <- iselExpr64 valueTree
189 rhi = getHiVRegFromLo rlo
191 -- Little-endian store
192 mov_lo = MOV I32 (OpReg rlo) (OpAddr addr)
193 mov_hi = MOV I32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
195 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
198 assignReg_I64Code (CmmLocal (LocalReg u_dst pk _)) valueTree = do
199 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
201 r_dst_lo = mkVReg u_dst I32
202 r_dst_hi = getHiVRegFromLo r_dst_lo
203 r_src_hi = getHiVRegFromLo r_src_lo
204 mov_lo = MOV I32 (OpReg r_src_lo) (OpReg r_dst_lo)
205 mov_hi = MOV I32 (OpReg r_src_hi) (OpReg r_dst_hi)
208 vcode `snocOL` mov_lo `snocOL` mov_hi
211 assignReg_I64Code lvalue valueTree
212 = panic "assignReg_I64Code(i386): invalid lvalue"
216 iselExpr64 (CmmLit (CmmInt i _)) = do
217 (rlo,rhi) <- getNewRegPairNat I32
219 r = fromIntegral (fromIntegral i :: Word32)
220 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
222 MOV I32 (OpImm (ImmInteger r)) (OpReg rlo),
223 MOV I32 (OpImm (ImmInteger q)) (OpReg rhi)
226 return (ChildCode64 code rlo)
228 iselExpr64 (CmmLoad addrTree I64) = do
229 Amode addr addr_code <- getAmode addrTree
230 (rlo,rhi) <- getNewRegPairNat I32
232 mov_lo = MOV I32 (OpAddr addr) (OpReg rlo)
233 mov_hi = MOV I32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
236 ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
240 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64 _)))
241 = return (ChildCode64 nilOL (mkVReg vu I32))
243 -- we handle addition, but rather badly
244 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
245 ChildCode64 code1 r1lo <- iselExpr64 e1
246 (rlo,rhi) <- getNewRegPairNat I32
248 r = fromIntegral (fromIntegral i :: Word32)
249 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
250 r1hi = getHiVRegFromLo r1lo
252 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
253 ADD I32 (OpImm (ImmInteger r)) (OpReg rlo),
254 MOV I32 (OpReg r1hi) (OpReg rhi),
255 ADC I32 (OpImm (ImmInteger q)) (OpReg rhi) ]
257 return (ChildCode64 code rlo)
259 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
260 ChildCode64 code1 r1lo <- iselExpr64 e1
261 ChildCode64 code2 r2lo <- iselExpr64 e2
262 (rlo,rhi) <- getNewRegPairNat I32
264 r1hi = getHiVRegFromLo r1lo
265 r2hi = getHiVRegFromLo r2lo
268 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
269 ADD I32 (OpReg r2lo) (OpReg rlo),
270 MOV I32 (OpReg r1hi) (OpReg rhi),
271 ADC I32 (OpReg r2hi) (OpReg rhi) ]
273 return (ChildCode64 code rlo)
275 iselExpr64 (CmmMachOp (MO_U_Conv _ I64) [expr]) = do
277 r_dst_lo <- getNewRegNat I32
278 let r_dst_hi = getHiVRegFromLo r_dst_lo
281 ChildCode64 (code `snocOL`
282 MOV I32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
287 = pprPanic "iselExpr64(i386)" (ppr expr)
289 #endif /* i386_TARGET_ARCH */
291 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
293 #if sparc_TARGET_ARCH
295 assignMem_I64Code addrTree valueTree = do
296 Amode addr addr_code <- getAmode addrTree
297 ChildCode64 vcode rlo <- iselExpr64 valueTree
298 (src, code) <- getSomeReg addrTree
300 rhi = getHiVRegFromLo rlo
302 mov_hi = ST I32 rhi (AddrRegImm src (ImmInt 0))
303 mov_lo = ST I32 rlo (AddrRegImm src (ImmInt 4))
304 return (vcode `appOL` code `snocOL` mov_hi `snocOL` mov_lo)
306 assignReg_I64Code (CmmLocal (LocalReg u_dst pk _)) valueTree = do
307 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
309 r_dst_lo = mkVReg u_dst pk
310 r_dst_hi = getHiVRegFromLo r_dst_lo
311 r_src_hi = getHiVRegFromLo r_src_lo
312 mov_lo = mkMOV r_src_lo r_dst_lo
313 mov_hi = mkMOV r_src_hi r_dst_hi
314 mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
315 return (vcode `snocOL` mov_hi `snocOL` mov_lo)
316 assignReg_I64Code lvalue valueTree
317 = panic "assignReg_I64Code(sparc): invalid lvalue"
320 -- Don't delete this -- it's very handy for debugging.
322 -- | trace ("iselExpr64: " ++ showSDoc (ppr expr)) False
323 -- = panic "iselExpr64(???)"
325 iselExpr64 (CmmLoad addrTree I64) = do
326 Amode (AddrRegReg r1 r2) addr_code <- getAmode addrTree
327 rlo <- getNewRegNat I32
328 let rhi = getHiVRegFromLo rlo
329 mov_hi = LD I32 (AddrRegImm r1 (ImmInt 0)) rhi
330 mov_lo = LD I32 (AddrRegImm r1 (ImmInt 4)) rlo
332 ChildCode64 (addr_code `snocOL` mov_hi `snocOL` mov_lo)
336 iselExpr64 (CmmReg (CmmLocal (LocalReg uq I64 _))) = do
337 r_dst_lo <- getNewRegNat I32
338 let r_dst_hi = getHiVRegFromLo r_dst_lo
339 r_src_lo = mkVReg uq I32
340 r_src_hi = getHiVRegFromLo r_src_lo
341 mov_lo = mkMOV r_src_lo r_dst_lo
342 mov_hi = mkMOV r_src_hi r_dst_hi
343 mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
345 ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo
349 = pprPanic "iselExpr64(sparc)" (ppr expr)
351 #endif /* sparc_TARGET_ARCH */
353 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
355 #if powerpc_TARGET_ARCH
357 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
358 getI64Amodes addrTree = do
359 Amode hi_addr addr_code <- getAmode addrTree
360 case addrOffset hi_addr 4 of
361 Just lo_addr -> return (hi_addr, lo_addr, addr_code)
362 Nothing -> do (hi_ptr, code) <- getSomeReg addrTree
363 return (AddrRegImm hi_ptr (ImmInt 0),
364 AddrRegImm hi_ptr (ImmInt 4),
367 assignMem_I64Code addrTree valueTree = do
368 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
369 ChildCode64 vcode rlo <- iselExpr64 valueTree
371 rhi = getHiVRegFromLo rlo
374 mov_hi = ST I32 rhi hi_addr
375 mov_lo = ST I32 rlo lo_addr
377 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
379 assignReg_I64Code (CmmLocal (LocalReg u_dst pk _)) valueTree = do
380 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
382 r_dst_lo = mkVReg u_dst I32
383 r_dst_hi = getHiVRegFromLo r_dst_lo
384 r_src_hi = getHiVRegFromLo r_src_lo
385 mov_lo = MR r_dst_lo r_src_lo
386 mov_hi = MR r_dst_hi r_src_hi
389 vcode `snocOL` mov_lo `snocOL` mov_hi
392 assignReg_I64Code lvalue valueTree
393 = panic "assignReg_I64Code(powerpc): invalid lvalue"
396 -- Don't delete this -- it's very handy for debugging.
398 -- | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
399 -- = panic "iselExpr64(???)"
401 iselExpr64 (CmmLoad addrTree I64) = do
402 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
403 (rlo, rhi) <- getNewRegPairNat I32
404 let mov_hi = LD I32 rhi hi_addr
405 mov_lo = LD I32 rlo lo_addr
406 return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
409 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64 _)))
410 = return (ChildCode64 nilOL (mkVReg vu I32))
412 iselExpr64 (CmmLit (CmmInt i _)) = do
413 (rlo,rhi) <- getNewRegPairNat I32
415 half0 = fromIntegral (fromIntegral i :: Word16)
416 half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
417 half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
418 half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
421 LIS rlo (ImmInt half1),
422 OR rlo rlo (RIImm $ ImmInt half0),
423 LIS rhi (ImmInt half3),
424 OR rlo rlo (RIImm $ ImmInt half2)
427 return (ChildCode64 code rlo)
429 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
430 ChildCode64 code1 r1lo <- iselExpr64 e1
431 ChildCode64 code2 r2lo <- iselExpr64 e2
432 (rlo,rhi) <- getNewRegPairNat I32
434 r1hi = getHiVRegFromLo r1lo
435 r2hi = getHiVRegFromLo r2lo
438 toOL [ ADDC rlo r1lo r2lo,
441 return (ChildCode64 code rlo)
443 iselExpr64 (CmmMachOp (MO_U_Conv I32 I64) [expr]) = do
444 (expr_reg,expr_code) <- getSomeReg expr
445 (rlo, rhi) <- getNewRegPairNat I32
446 let mov_hi = LI rhi (ImmInt 0)
447 mov_lo = MR rlo expr_reg
448 return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
451 = pprPanic "iselExpr64(powerpc)" (ppr expr)
453 #endif /* powerpc_TARGET_ARCH */
456 -- -----------------------------------------------------------------------------
457 -- The 'Register' type
459 -- 'Register's passed up the tree. If the stix code forces the register
460 -- to live in a pre-decided machine register, it comes out as @Fixed@;
461 -- otherwise, it comes out as @Any@, and the parent can decide which
462 -- register to put it in.
465 = Fixed MachRep Reg InstrBlock
466 | Any MachRep (Reg -> InstrBlock)
468 swizzleRegisterRep :: Register -> MachRep -> Register
469 swizzleRegisterRep (Fixed _ reg code) rep = Fixed rep reg code
470 swizzleRegisterRep (Any _ codefn) rep = Any rep codefn
473 -- -----------------------------------------------------------------------------
474 -- Utils based on getRegister, below
476 -- The dual to getAnyReg: compute an expression into a register, but
477 -- we don't mind which one it is.
478 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
480 r <- getRegister expr
483 tmp <- getNewRegNat rep
484 return (tmp, code tmp)
488 -- -----------------------------------------------------------------------------
489 -- Grab the Reg for a CmmReg
491 getRegisterReg :: CmmReg -> Reg
493 getRegisterReg (CmmLocal (LocalReg u pk _))
496 getRegisterReg (CmmGlobal mid)
497 = case get_GlobalReg_reg_or_addr mid of
498 Left (RealReg rrno) -> RealReg rrno
499 _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
500 -- By this stage, the only MagicIds remaining should be the
501 -- ones which map to a real machine register on this
502 -- platform. Hence ...
505 -- -----------------------------------------------------------------------------
506 -- Generate code to get a subtree into a Register
508 -- Don't delete this -- it's very handy for debugging.
510 -- | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False
511 -- = panic "getRegister(???)"
513 getRegister :: CmmExpr -> NatM Register
515 #if !x86_64_TARGET_ARCH
516 -- on x86_64, we have %rip for PicBaseReg, but it's not a full-featured
517 -- register, it can only be used for rip-relative addressing.
518 getRegister (CmmReg (CmmGlobal PicBaseReg))
520 reg <- getPicBaseNat wordRep
521 return (Fixed wordRep reg nilOL)
524 getRegister (CmmReg reg)
525 = return (Fixed (cmmRegRep reg) (getRegisterReg reg) nilOL)
527 getRegister tree@(CmmRegOff _ _)
528 = getRegister (mangleIndexTree tree)
531 #if WORD_SIZE_IN_BITS==32
532 -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
533 -- TO_W_(x), TO_W_(x >> 32)
535 getRegister (CmmMachOp (MO_U_Conv I64 I32)
536 [CmmMachOp (MO_U_Shr I64) [x,CmmLit (CmmInt 32 _)]]) = do
537 ChildCode64 code rlo <- iselExpr64 x
538 return $ Fixed I32 (getHiVRegFromLo rlo) code
540 getRegister (CmmMachOp (MO_S_Conv I64 I32)
541 [CmmMachOp (MO_U_Shr I64) [x,CmmLit (CmmInt 32 _)]]) = do
542 ChildCode64 code rlo <- iselExpr64 x
543 return $ Fixed I32 (getHiVRegFromLo rlo) code
545 getRegister (CmmMachOp (MO_U_Conv I64 I32) [x]) = do
546 ChildCode64 code rlo <- iselExpr64 x
547 return $ Fixed I32 rlo code
549 getRegister (CmmMachOp (MO_S_Conv I64 I32) [x]) = do
550 ChildCode64 code rlo <- iselExpr64 x
551 return $ Fixed I32 rlo code
555 -- end of machine-"independent" bit; here we go on the rest...
557 #if alpha_TARGET_ARCH
559 getRegister (StDouble d)
560 = getBlockIdNat `thenNat` \ lbl ->
561 getNewRegNat PtrRep `thenNat` \ tmp ->
562 let code dst = mkSeqInstrs [
563 LDATA RoDataSegment lbl [
564 DATA TF [ImmLab (rational d)]
566 LDA tmp (AddrImm (ImmCLbl lbl)),
567 LD TF dst (AddrReg tmp)]
569 return (Any F64 code)
571 getRegister (StPrim primop [x]) -- unary PrimOps
573 IntNegOp -> trivialUCode (NEG Q False) x
575 NotOp -> trivialUCode NOT x
577 FloatNegOp -> trivialUFCode FloatRep (FNEG TF) x
578 DoubleNegOp -> trivialUFCode F64 (FNEG TF) x
580 OrdOp -> coerceIntCode IntRep x
583 Float2IntOp -> coerceFP2Int x
584 Int2FloatOp -> coerceInt2FP pr x
585 Double2IntOp -> coerceFP2Int x
586 Int2DoubleOp -> coerceInt2FP pr x
588 Double2FloatOp -> coerceFltCode x
589 Float2DoubleOp -> coerceFltCode x
591 other_op -> getRegister (StCall fn CCallConv F64 [x])
593 fn = case other_op of
594 FloatExpOp -> fsLit "exp"
595 FloatLogOp -> fsLit "log"
596 FloatSqrtOp -> fsLit "sqrt"
597 FloatSinOp -> fsLit "sin"
598 FloatCosOp -> fsLit "cos"
599 FloatTanOp -> fsLit "tan"
600 FloatAsinOp -> fsLit "asin"
601 FloatAcosOp -> fsLit "acos"
602 FloatAtanOp -> fsLit "atan"
603 FloatSinhOp -> fsLit "sinh"
604 FloatCoshOp -> fsLit "cosh"
605 FloatTanhOp -> fsLit "tanh"
606 DoubleExpOp -> fsLit "exp"
607 DoubleLogOp -> fsLit "log"
608 DoubleSqrtOp -> fsLit "sqrt"
609 DoubleSinOp -> fsLit "sin"
610 DoubleCosOp -> fsLit "cos"
611 DoubleTanOp -> fsLit "tan"
612 DoubleAsinOp -> fsLit "asin"
613 DoubleAcosOp -> fsLit "acos"
614 DoubleAtanOp -> fsLit "atan"
615 DoubleSinhOp -> fsLit "sinh"
616 DoubleCoshOp -> fsLit "cosh"
617 DoubleTanhOp -> fsLit "tanh"
619 pr = panic "MachCode.getRegister: no primrep needed for Alpha"
621 getRegister (StPrim primop [x, y]) -- dyadic PrimOps
623 CharGtOp -> trivialCode (CMP LTT) y x
624 CharGeOp -> trivialCode (CMP LE) y x
625 CharEqOp -> trivialCode (CMP EQQ) x y
626 CharNeOp -> int_NE_code x y
627 CharLtOp -> trivialCode (CMP LTT) x y
628 CharLeOp -> trivialCode (CMP LE) x y
630 IntGtOp -> trivialCode (CMP LTT) y x
631 IntGeOp -> trivialCode (CMP LE) y x
632 IntEqOp -> trivialCode (CMP EQQ) x y
633 IntNeOp -> int_NE_code x y
634 IntLtOp -> trivialCode (CMP LTT) x y
635 IntLeOp -> trivialCode (CMP LE) x y
637 WordGtOp -> trivialCode (CMP ULT) y x
638 WordGeOp -> trivialCode (CMP ULE) x y
639 WordEqOp -> trivialCode (CMP EQQ) x y
640 WordNeOp -> int_NE_code x y
641 WordLtOp -> trivialCode (CMP ULT) x y
642 WordLeOp -> trivialCode (CMP ULE) x y
644 AddrGtOp -> trivialCode (CMP ULT) y x
645 AddrGeOp -> trivialCode (CMP ULE) y x
646 AddrEqOp -> trivialCode (CMP EQQ) x y
647 AddrNeOp -> int_NE_code x y
648 AddrLtOp -> trivialCode (CMP ULT) x y
649 AddrLeOp -> trivialCode (CMP ULE) x y
651 FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
652 FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
653 FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
654 FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
655 FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
656 FloatLeOp -> cmpF_code (FCMP TF LE) NE x y
658 DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
659 DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
660 DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
661 DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
662 DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
663 DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y
665 IntAddOp -> trivialCode (ADD Q False) x y
666 IntSubOp -> trivialCode (SUB Q False) x y
667 IntMulOp -> trivialCode (MUL Q False) x y
668 IntQuotOp -> trivialCode (DIV Q False) x y
669 IntRemOp -> trivialCode (REM Q False) x y
671 WordAddOp -> trivialCode (ADD Q False) x y
672 WordSubOp -> trivialCode (SUB Q False) x y
673 WordMulOp -> trivialCode (MUL Q False) x y
674 WordQuotOp -> trivialCode (DIV Q True) x y
675 WordRemOp -> trivialCode (REM Q True) x y
677 FloatAddOp -> trivialFCode FloatRep (FADD TF) x y
678 FloatSubOp -> trivialFCode FloatRep (FSUB TF) x y
679 FloatMulOp -> trivialFCode FloatRep (FMUL TF) x y
680 FloatDivOp -> trivialFCode FloatRep (FDIV TF) x y
682 DoubleAddOp -> trivialFCode F64 (FADD TF) x y
683 DoubleSubOp -> trivialFCode F64 (FSUB TF) x y
684 DoubleMulOp -> trivialFCode F64 (FMUL TF) x y
685 DoubleDivOp -> trivialFCode F64 (FDIV TF) x y
687 AddrAddOp -> trivialCode (ADD Q False) x y
688 AddrSubOp -> trivialCode (SUB Q False) x y
689 AddrRemOp -> trivialCode (REM Q True) x y
691 AndOp -> trivialCode AND x y
692 OrOp -> trivialCode OR x y
693 XorOp -> trivialCode XOR x y
694 SllOp -> trivialCode SLL x y
695 SrlOp -> trivialCode SRL x y
697 ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
698 ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
699 ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"
701 FloatPowerOp -> getRegister (StCall (fsLit "pow") CCallConv F64 [x,y])
702 DoublePowerOp -> getRegister (StCall (fsLit "pow") CCallConv F64 [x,y])
704 {- ------------------------------------------------------------
705 Some bizarre special code for getting condition codes into
706 registers. Integer non-equality is a test for equality
707 followed by an XOR with 1. (Integer comparisons always set
708 the result register to 0 or 1.) Floating point comparisons of
709 any kind leave the result in a floating point register, so we
710 need to wrangle an integer register out of things.
712 int_NE_code :: StixTree -> StixTree -> NatM Register
715 = trivialCode (CMP EQQ) x y `thenNat` \ register ->
716 getNewRegNat IntRep `thenNat` \ tmp ->
718 code = registerCode register tmp
719 src = registerName register tmp
720 code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
722 return (Any IntRep code__2)
724 {- ------------------------------------------------------------
725 Comments for int_NE_code also apply to cmpF_code
728 :: (Reg -> Reg -> Reg -> Instr)
730 -> StixTree -> StixTree
733 cmpF_code instr cond x y
734 = trivialFCode pr instr x y `thenNat` \ register ->
735 getNewRegNat F64 `thenNat` \ tmp ->
736 getBlockIdNat `thenNat` \ lbl ->
738 code = registerCode register tmp
739 result = registerName register tmp
741 code__2 dst = code . mkSeqInstrs [
742 OR zeroh (RIImm (ImmInt 1)) dst,
743 BF cond result (ImmCLbl lbl),
744 OR zeroh (RIReg zeroh) dst,
747 return (Any IntRep code__2)
749 pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
750 ------------------------------------------------------------
752 getRegister (CmmLoad pk mem)
753 = getAmode mem `thenNat` \ amode ->
755 code = amodeCode amode
756 src = amodeAddr amode
757 size = primRepToSize pk
758 code__2 dst = code . mkSeqInstr (LD size dst src)
760 return (Any pk code__2)
762 getRegister (StInt i)
765 code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
767 return (Any IntRep code)
770 code dst = mkSeqInstr (LDI Q dst src)
772 return (Any IntRep code)
774 src = ImmInt (fromInteger i)
779 code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
781 return (Any PtrRep code)
784 imm__2 = case imm of Just x -> x
786 #endif /* alpha_TARGET_ARCH */
788 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
792 getRegister (CmmLit (CmmFloat f F32)) = do
793 lbl <- getNewLabelNat
794 dflags <- getDynFlagsNat
795 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
796 Amode addr addr_code <- getAmode dynRef
800 CmmStaticLit (CmmFloat f F32)]
801 `consOL` (addr_code `snocOL`
804 return (Any F32 code)
807 getRegister (CmmLit (CmmFloat d F64))
809 = let code dst = unitOL (GLDZ dst)
810 in return (Any F64 code)
813 = let code dst = unitOL (GLD1 dst)
814 in return (Any F64 code)
817 lbl <- getNewLabelNat
818 dflags <- getDynFlagsNat
819 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
820 Amode addr addr_code <- getAmode dynRef
824 CmmStaticLit (CmmFloat d F64)]
825 `consOL` (addr_code `snocOL`
828 return (Any F64 code)
830 #endif /* i386_TARGET_ARCH */
832 #if x86_64_TARGET_ARCH
834 getRegister (CmmLit (CmmFloat 0.0 rep)) = do
835 let code dst = unitOL (XOR rep (OpReg dst) (OpReg dst))
836 -- I don't know why there are xorpd, xorps, and pxor instructions.
837 -- They all appear to do the same thing --SDM
838 return (Any rep code)
840 getRegister (CmmLit (CmmFloat f rep)) = do
841 lbl <- getNewLabelNat
842 let code dst = toOL [
845 CmmStaticLit (CmmFloat f rep)],
846 MOV rep (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
849 return (Any rep code)
851 #endif /* x86_64_TARGET_ARCH */
853 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
855 -- catch simple cases of zero- or sign-extended load
856 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad addr _]) = do
857 code <- intLoadCode (MOVZxL I8) addr
858 return (Any I32 code)
860 getRegister (CmmMachOp (MO_S_Conv I8 I32) [CmmLoad addr _]) = do
861 code <- intLoadCode (MOVSxL I8) addr
862 return (Any I32 code)
864 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad addr _]) = do
865 code <- intLoadCode (MOVZxL I16) addr
866 return (Any I32 code)
868 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad addr _]) = do
869 code <- intLoadCode (MOVSxL I16) addr
870 return (Any I32 code)
874 #if x86_64_TARGET_ARCH
876 -- catch simple cases of zero- or sign-extended load
877 getRegister (CmmMachOp (MO_U_Conv I8 I64) [CmmLoad addr _]) = do
878 code <- intLoadCode (MOVZxL I8) addr
879 return (Any I64 code)
881 getRegister (CmmMachOp (MO_S_Conv I8 I64) [CmmLoad addr _]) = do
882 code <- intLoadCode (MOVSxL I8) addr
883 return (Any I64 code)
885 getRegister (CmmMachOp (MO_U_Conv I16 I64) [CmmLoad addr _]) = do
886 code <- intLoadCode (MOVZxL I16) addr
887 return (Any I64 code)
889 getRegister (CmmMachOp (MO_S_Conv I16 I64) [CmmLoad addr _]) = do
890 code <- intLoadCode (MOVSxL I16) addr
891 return (Any I64 code)
893 getRegister (CmmMachOp (MO_U_Conv I32 I64) [CmmLoad addr _]) = do
894 code <- intLoadCode (MOV I32) addr -- 32-bit loads zero-extend
895 return (Any I64 code)
897 getRegister (CmmMachOp (MO_S_Conv I32 I64) [CmmLoad addr _]) = do
898 code <- intLoadCode (MOVSxL I32) addr
899 return (Any I64 code)
903 #if x86_64_TARGET_ARCH
904 getRegister (CmmMachOp (MO_Add I64) [CmmReg (CmmGlobal PicBaseReg),
905 CmmLit displacement])
906 = return $ Any I64 (\dst -> unitOL $
907 LEA I64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
910 #if x86_64_TARGET_ARCH
911 getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do
912 x_code <- getAnyReg x
913 lbl <- getNewLabelNat
915 code dst = x_code dst `appOL` toOL [
916 -- This is how gcc does it, so it can't be that bad:
917 LDATA ReadOnlyData16 [
920 CmmStaticLit (CmmInt 0x80000000 I32),
921 CmmStaticLit (CmmInt 0 I32),
922 CmmStaticLit (CmmInt 0 I32),
923 CmmStaticLit (CmmInt 0 I32)
925 XOR F32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
926 -- xorps, so we need the 128-bit constant
927 -- ToDo: rip-relative
930 return (Any F32 code)
932 getRegister (CmmMachOp (MO_S_Neg F64) [x]) = do
933 x_code <- getAnyReg x
934 lbl <- getNewLabelNat
936 -- This is how gcc does it, so it can't be that bad:
937 code dst = x_code dst `appOL` toOL [
938 LDATA ReadOnlyData16 [
941 CmmStaticLit (CmmInt 0x8000000000000000 I64),
942 CmmStaticLit (CmmInt 0 I64)
944 -- gcc puts an unpck here. Wonder if we need it.
945 XOR F64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
946 -- xorpd, so we need the 128-bit constant
949 return (Any F64 code)
952 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
954 getRegister (CmmMachOp mop [x]) -- unary MachOps
957 MO_S_Neg F32 -> trivialUFCode F32 (GNEG F32) x
958 MO_S_Neg F64 -> trivialUFCode F64 (GNEG F64) x
961 MO_S_Neg rep -> trivialUCode rep (NEGI rep) x
962 MO_Not rep -> trivialUCode rep (NOT rep) x
965 MO_U_Conv I32 I8 -> toI8Reg I32 x
966 MO_S_Conv I32 I8 -> toI8Reg I32 x
967 MO_U_Conv I16 I8 -> toI8Reg I16 x
968 MO_S_Conv I16 I8 -> toI8Reg I16 x
969 MO_U_Conv I32 I16 -> toI16Reg I32 x
970 MO_S_Conv I32 I16 -> toI16Reg I32 x
971 #if x86_64_TARGET_ARCH
972 MO_U_Conv I64 I32 -> conversionNop I64 x
973 MO_S_Conv I64 I32 -> conversionNop I64 x
974 MO_U_Conv I64 I16 -> toI16Reg I64 x
975 MO_S_Conv I64 I16 -> toI16Reg I64 x
976 MO_U_Conv I64 I8 -> toI8Reg I64 x
977 MO_S_Conv I64 I8 -> toI8Reg I64 x
980 MO_U_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
981 MO_S_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
984 MO_U_Conv I8 I32 -> integerExtend I8 I32 MOVZxL x
985 MO_U_Conv I16 I32 -> integerExtend I16 I32 MOVZxL x
986 MO_U_Conv I8 I16 -> integerExtend I8 I16 MOVZxL x
988 MO_S_Conv I8 I32 -> integerExtend I8 I32 MOVSxL x
989 MO_S_Conv I16 I32 -> integerExtend I16 I32 MOVSxL x
990 MO_S_Conv I8 I16 -> integerExtend I8 I16 MOVSxL x
992 #if x86_64_TARGET_ARCH
993 MO_U_Conv I8 I64 -> integerExtend I8 I64 MOVZxL x
994 MO_U_Conv I16 I64 -> integerExtend I16 I64 MOVZxL x
995 MO_U_Conv I32 I64 -> integerExtend I32 I64 MOVZxL x
996 MO_S_Conv I8 I64 -> integerExtend I8 I64 MOVSxL x
997 MO_S_Conv I16 I64 -> integerExtend I16 I64 MOVSxL x
998 MO_S_Conv I32 I64 -> integerExtend I32 I64 MOVSxL x
999 -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
1000 -- However, we don't want the register allocator to throw it
1001 -- away as an unnecessary reg-to-reg move, so we keep it in
1002 -- the form of a movzl and print it as a movl later.
1005 #if i386_TARGET_ARCH
1006 MO_S_Conv F32 F64 -> conversionNop F64 x
1007 MO_S_Conv F64 F32 -> conversionNop F32 x
1009 MO_S_Conv F32 F64 -> coerceFP2FP F64 x
1010 MO_S_Conv F64 F32 -> coerceFP2FP F32 x
1014 | isFloatingRep from -> coerceFP2Int from to x
1015 | isFloatingRep to -> coerceInt2FP from to x
1017 other -> pprPanic "getRegister" (pprMachOp mop)
1019 -- signed or unsigned extension.
1020 integerExtend from to instr expr = do
1021 (reg,e_code) <- if from == I8 then getByteReg expr
1022 else getSomeReg expr
1026 instr from (OpReg reg) (OpReg dst)
1027 return (Any to code)
1029 toI8Reg new_rep expr
1030 = do codefn <- getAnyReg expr
1031 return (Any new_rep codefn)
1032 -- HACK: use getAnyReg to get a byte-addressable register.
1033 -- If the source was a Fixed register, this will add the
1034 -- mov instruction to put it into the desired destination.
1035 -- We're assuming that the destination won't be a fixed
1036 -- non-byte-addressable register; it won't be, because all
1037 -- fixed registers are word-sized.
1039 toI16Reg = toI8Reg -- for now
1041 conversionNop new_rep expr
1042 = do e_code <- getRegister expr
1043 return (swizzleRegisterRep e_code new_rep)
1046 getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
1048 MO_Eq F32 -> condFltReg EQQ x y
1049 MO_Ne F32 -> condFltReg NE x y
1050 MO_S_Gt F32 -> condFltReg GTT x y
1051 MO_S_Ge F32 -> condFltReg GE x y
1052 MO_S_Lt F32 -> condFltReg LTT x y
1053 MO_S_Le F32 -> condFltReg LE x y
1055 MO_Eq F64 -> condFltReg EQQ x y
1056 MO_Ne F64 -> condFltReg NE x y
1057 MO_S_Gt F64 -> condFltReg GTT x y
1058 MO_S_Ge F64 -> condFltReg GE x y
1059 MO_S_Lt F64 -> condFltReg LTT x y
1060 MO_S_Le F64 -> condFltReg LE x y
1062 MO_Eq rep -> condIntReg EQQ x y
1063 MO_Ne rep -> condIntReg NE x y
1065 MO_S_Gt rep -> condIntReg GTT x y
1066 MO_S_Ge rep -> condIntReg GE x y
1067 MO_S_Lt rep -> condIntReg LTT x y
1068 MO_S_Le rep -> condIntReg LE x y
1070 MO_U_Gt rep -> condIntReg GU x y
1071 MO_U_Ge rep -> condIntReg GEU x y
1072 MO_U_Lt rep -> condIntReg LU x y
1073 MO_U_Le rep -> condIntReg LEU x y
1075 #if i386_TARGET_ARCH
1076 MO_Add F32 -> trivialFCode F32 GADD x y
1077 MO_Sub F32 -> trivialFCode F32 GSUB x y
1079 MO_Add F64 -> trivialFCode F64 GADD x y
1080 MO_Sub F64 -> trivialFCode F64 GSUB x y
1082 MO_S_Quot F32 -> trivialFCode F32 GDIV x y
1083 MO_S_Quot F64 -> trivialFCode F64 GDIV x y
1086 #if x86_64_TARGET_ARCH
1087 MO_Add F32 -> trivialFCode F32 ADD x y
1088 MO_Sub F32 -> trivialFCode F32 SUB x y
1090 MO_Add F64 -> trivialFCode F64 ADD x y
1091 MO_Sub F64 -> trivialFCode F64 SUB x y
1093 MO_S_Quot F32 -> trivialFCode F32 FDIV x y
1094 MO_S_Quot F64 -> trivialFCode F64 FDIV x y
1097 MO_Add rep -> add_code rep x y
1098 MO_Sub rep -> sub_code rep x y
1100 MO_S_Quot rep -> div_code rep True True x y
1101 MO_S_Rem rep -> div_code rep True False x y
1102 MO_U_Quot rep -> div_code rep False True x y
1103 MO_U_Rem rep -> div_code rep False False x y
1105 #if i386_TARGET_ARCH
1106 MO_Mul F32 -> trivialFCode F32 GMUL x y
1107 MO_Mul F64 -> trivialFCode F64 GMUL x y
1110 #if x86_64_TARGET_ARCH
1111 MO_Mul F32 -> trivialFCode F32 MUL x y
1112 MO_Mul F64 -> trivialFCode F64 MUL x y
1115 MO_Mul rep -> let op = IMUL rep in
1116 trivialCode rep op (Just op) x y
1118 MO_S_MulMayOflo rep -> imulMayOflo rep x y
1120 MO_And rep -> let op = AND rep in
1121 trivialCode rep op (Just op) x y
1122 MO_Or rep -> let op = OR rep in
1123 trivialCode rep op (Just op) x y
1124 MO_Xor rep -> let op = XOR rep in
1125 trivialCode rep op (Just op) x y
1127 {- Shift ops on x86s have constraints on their source, it
1128 either has to be Imm, CL or 1
1129 => trivialCode is not restrictive enough (sigh.)
1131 MO_Shl rep -> shift_code rep (SHL rep) x y {-False-}
1132 MO_U_Shr rep -> shift_code rep (SHR rep) x y {-False-}
1133 MO_S_Shr rep -> shift_code rep (SAR rep) x y {-False-}
1135 other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
1137 --------------------
1138 imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1139 imulMayOflo rep a b = do
1140 (a_reg, a_code) <- getNonClobberedReg a
1141 b_code <- getAnyReg b
1143 shift_amt = case rep of
1146 _ -> panic "shift_amt"
1148 code = a_code `appOL` b_code eax `appOL`
1150 IMUL2 rep (OpReg a_reg), -- result in %edx:%eax
1151 SAR rep (OpImm (ImmInt shift_amt)) (OpReg eax),
1152 -- sign extend lower part
1153 SUB rep (OpReg edx) (OpReg eax)
1154 -- compare against upper
1155 -- eax==0 if high part == sign extended low part
1158 return (Fixed rep eax code)
1160 --------------------
1161 shift_code :: MachRep
1162 -> (Operand -> Operand -> Instr)
1167 {- Case1: shift length as immediate -}
1168 shift_code rep instr x y@(CmmLit lit) = do
1169 x_code <- getAnyReg x
1172 = x_code dst `snocOL`
1173 instr (OpImm (litToImm lit)) (OpReg dst)
1175 return (Any rep code)
1177 {- Case2: shift length is complex (non-immediate)
1178 * y must go in %ecx.
1179 * we cannot do y first *and* put its result in %ecx, because
1180 %ecx might be clobbered by x.
1181 * if we do y second, then x cannot be
1182 in a clobbered reg. Also, we cannot clobber x's reg
1183 with the instruction itself.
1185 - do y first, put its result in a fresh tmp, then copy it to %ecx later
1186 - do y second and put its result into %ecx. x gets placed in a fresh
1187 tmp. This is likely to be better, becuase the reg alloc can
1188 eliminate this reg->reg move here (it won't eliminate the other one,
1189 because the move is into the fixed %ecx).
1191 shift_code rep instr x y{-amount-} = do
1192 x_code <- getAnyReg x
1193 tmp <- getNewRegNat rep
1194 y_code <- getAnyReg y
1196 code = x_code tmp `appOL`
1198 instr (OpReg ecx) (OpReg tmp)
1200 return (Fixed rep tmp code)
1202 --------------------
1203 add_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1204 add_code rep x (CmmLit (CmmInt y _))
1205 | not (is64BitInteger y) = add_int rep x y
1206 add_code rep x y = trivialCode rep (ADD rep) (Just (ADD rep)) x y
1208 --------------------
1209 sub_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1210 sub_code rep x (CmmLit (CmmInt y _))
1211 | not (is64BitInteger (-y)) = add_int rep x (-y)
1212 sub_code rep x y = trivialCode rep (SUB rep) Nothing x y
1214 -- our three-operand add instruction:
1215 add_int rep x y = do
1216 (x_reg, x_code) <- getSomeReg x
1218 imm = ImmInt (fromInteger y)
1222 (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
1225 return (Any rep code)
1227 ----------------------
1228 div_code rep signed quotient x y = do
1229 (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
1230 x_code <- getAnyReg x
1232 widen | signed = CLTD rep
1233 | otherwise = XOR rep (OpReg edx) (OpReg edx)
1235 instr | signed = IDIV
1238 code = y_code `appOL`
1240 toOL [widen, instr rep y_op]
1242 result | quotient = eax
1246 return (Fixed rep result code)
1249 getRegister (CmmLoad mem pk)
1252 Amode src mem_code <- getAmode mem
1254 code dst = mem_code `snocOL`
1255 IF_ARCH_i386(GLD pk src dst,
1256 MOV pk (OpAddr src) (OpReg dst))
1258 return (Any pk code)
1260 #if i386_TARGET_ARCH
1261 getRegister (CmmLoad mem pk)
1264 code <- intLoadCode (instr pk) mem
1265 return (Any pk code)
1267 instr I8 = MOVZxL pk
1270 -- we always zero-extend 8-bit loads, if we
1271 -- can't think of anything better. This is because
1272 -- we can't guarantee access to an 8-bit variant of every register
1273 -- (esi and edi don't have 8-bit variants), so to make things
1274 -- simpler we do our 8-bit arithmetic with full 32-bit registers.
1277 #if x86_64_TARGET_ARCH
1278 -- Simpler memory load code on x86_64
1279 getRegister (CmmLoad mem pk)
1281 code <- intLoadCode (MOV pk) mem
1282 return (Any pk code)
1285 getRegister (CmmLit (CmmInt 0 rep))
1287 -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
1288 adj_rep = case rep of I64 -> I32; _ -> rep
1289 rep1 = IF_ARCH_i386( rep, adj_rep )
1291 = unitOL (XOR rep1 (OpReg dst) (OpReg dst))
1293 return (Any rep code)
1295 #if x86_64_TARGET_ARCH
1296 -- optimisation for loading small literals on x86_64: take advantage
1297 -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
1298 -- instruction forms are shorter.
1299 getRegister (CmmLit lit)
1300 | I64 <- cmmLitRep lit, not (isBigLit lit)
1303 code dst = unitOL (MOV I32 (OpImm imm) (OpReg dst))
1305 return (Any I64 code)
1307 isBigLit (CmmInt i I64) = i < 0 || i > 0xffffffff
1309 -- note1: not the same as is64BitLit, because that checks for
1310 -- signed literals that fit in 32 bits, but we want unsigned
1312 -- note2: all labels are small, because we're assuming the
1313 -- small memory model (see gcc docs, -mcmodel=small).
1316 getRegister (CmmLit lit)
1320 code dst = unitOL (MOV rep (OpImm imm) (OpReg dst))
1322 return (Any rep code)
1324 getRegister other = pprPanic "getRegister(x86)" (ppr other)
1327 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
1328 -> NatM (Reg -> InstrBlock)
1329 intLoadCode instr mem = do
1330 Amode src mem_code <- getAmode mem
1331 return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
1333 -- Compute an expression into *any* register, adding the appropriate
1334 -- move instruction if necessary.
1335 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
1337 r <- getRegister expr
1340 anyReg :: Register -> NatM (Reg -> InstrBlock)
1341 anyReg (Any _ code) = return code
1342 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
1344 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
1345 -- Fixed registers might not be byte-addressable, so we make sure we've
1346 -- got a temporary, inserting an extra reg copy if necessary.
1347 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
1348 #if x86_64_TARGET_ARCH
1349 getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
1351 getByteReg expr = do
1352 r <- getRegister expr
1355 tmp <- getNewRegNat rep
1356 return (tmp, code tmp)
1358 | isVirtualReg reg -> return (reg,code)
1360 tmp <- getNewRegNat rep
1361 return (tmp, code `snocOL` reg2reg rep reg tmp)
1362 -- ToDo: could optimise slightly by checking for byte-addressable
1363 -- real registers, but that will happen very rarely if at all.
1366 -- Another variant: this time we want the result in a register that cannot
1367 -- be modified by code to evaluate an arbitrary expression.
1368 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1369 getNonClobberedReg expr = do
1370 r <- getRegister expr
1373 tmp <- getNewRegNat rep
1374 return (tmp, code tmp)
1376 -- only free regs can be clobbered
1377 | RealReg rr <- reg, isFastTrue (freeReg rr) -> do
1378 tmp <- getNewRegNat rep
1379 return (tmp, code `snocOL` reg2reg rep reg tmp)
1383 reg2reg :: MachRep -> Reg -> Reg -> Instr
1385 #if i386_TARGET_ARCH
1386 | isFloatingRep rep = GMOV src dst
1388 | otherwise = MOV rep (OpReg src) (OpReg dst)
1390 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1392 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1394 #if sparc_TARGET_ARCH
1396 getRegister (CmmLit (CmmFloat f F32)) = do
1397 lbl <- getNewLabelNat
1398 let code dst = toOL [
1401 CmmStaticLit (CmmFloat f F32)],
1402 SETHI (HI (ImmCLbl lbl)) dst,
1403 LD F32 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1404 return (Any F32 code)
1406 getRegister (CmmLit (CmmFloat d F64)) = do
1407 lbl <- getNewLabelNat
1408 let code dst = toOL [
1411 CmmStaticLit (CmmFloat d F64)],
1412 SETHI (HI (ImmCLbl lbl)) dst,
1413 LD F64 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1414 return (Any F64 code)
1416 getRegister (CmmMachOp mop [x]) -- unary MachOps
1418 MO_S_Neg F32 -> trivialUFCode F32 (FNEG F32) x
1419 MO_S_Neg F64 -> trivialUFCode F64 (FNEG F64) x
1421 MO_S_Neg rep -> trivialUCode rep (SUB False False g0) x
1422 MO_Not rep -> trivialUCode rep (XNOR False g0) x
1424 MO_U_Conv I32 I8 -> trivialCode I8 (AND False) x (CmmLit (CmmInt 255 I8))
1426 MO_U_Conv F64 F32-> coerceDbl2Flt x
1427 MO_U_Conv F32 F64-> coerceFlt2Dbl x
1429 MO_S_Conv F32 I32-> coerceFP2Int F32 I32 x
1430 MO_S_Conv I32 F32-> coerceInt2FP I32 F32 x
1431 MO_S_Conv F64 I32-> coerceFP2Int F64 I32 x
1432 MO_S_Conv I32 F64-> coerceInt2FP I32 F64 x
1434 -- Conversions which are a nop on sparc
1436 | from == to -> conversionNop to x
1437 MO_U_Conv I32 to -> conversionNop to x
1438 MO_S_Conv I32 to -> conversionNop to x
1441 MO_U_Conv I8 I32 -> integerExtend False I8 I32 x
1442 MO_U_Conv I16 I32 -> integerExtend False I16 I32 x
1443 MO_U_Conv I8 I16 -> integerExtend False I8 I16 x
1444 MO_S_Conv I16 I32 -> integerExtend True I16 I32 x
1446 other_op -> panic "Unknown unary mach op"
1449 integerExtend signed from to expr = do
1450 (reg, e_code) <- getSomeReg expr
1454 ((if signed then SRA else SRL)
1455 reg (RIImm (ImmInt 0)) dst)
1456 return (Any to code)
1457 conversionNop new_rep expr
1458 = do e_code <- getRegister expr
1459 return (swizzleRegisterRep e_code new_rep)
1461 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1463 MO_Eq F32 -> condFltReg EQQ x y
1464 MO_Ne F32 -> condFltReg NE x y
1466 MO_S_Gt F32 -> condFltReg GTT x y
1467 MO_S_Ge F32 -> condFltReg GE x y
1468 MO_S_Lt F32 -> condFltReg LTT x y
1469 MO_S_Le F32 -> condFltReg LE x y
1471 MO_Eq F64 -> condFltReg EQQ x y
1472 MO_Ne F64 -> condFltReg NE x y
1474 MO_S_Gt F64 -> condFltReg GTT x y
1475 MO_S_Ge F64 -> condFltReg GE x y
1476 MO_S_Lt F64 -> condFltReg LTT x y
1477 MO_S_Le F64 -> condFltReg LE x y
1479 MO_Eq rep -> condIntReg EQQ x y
1480 MO_Ne rep -> condIntReg NE x y
1482 MO_S_Gt rep -> condIntReg GTT x y
1483 MO_S_Ge rep -> condIntReg GE x y
1484 MO_S_Lt rep -> condIntReg LTT x y
1485 MO_S_Le rep -> condIntReg LE x y
1487 MO_U_Gt I32 -> condIntReg GTT x y
1488 MO_U_Ge I32 -> condIntReg GE x y
1489 MO_U_Lt I32 -> condIntReg LTT x y
1490 MO_U_Le I32 -> condIntReg LE x y
1492 MO_U_Gt I16 -> condIntReg GU x y
1493 MO_U_Ge I16 -> condIntReg GEU x y
1494 MO_U_Lt I16 -> condIntReg LU x y
1495 MO_U_Le I16 -> condIntReg LEU x y
1497 MO_Add I32 -> trivialCode I32 (ADD False False) x y
1498 MO_Sub I32 -> trivialCode I32 (SUB False False) x y
1500 MO_S_MulMayOflo rep -> imulMayOflo rep x y
1502 -- ToDo: teach about V8+ SPARC div instructions
1503 MO_S_Quot I32 -> idiv (fsLit ".div") x y
1504 MO_S_Rem I32 -> idiv (fsLit ".rem") x y
1505 MO_U_Quot I32 -> idiv (fsLit ".udiv") x y
1506 MO_U_Rem I32 -> idiv (fsLit ".urem") x y
1508 MO_Add F32 -> trivialFCode F32 FADD x y
1509 MO_Sub F32 -> trivialFCode F32 FSUB x y
1510 MO_Mul F32 -> trivialFCode F32 FMUL x y
1511 MO_S_Quot F32 -> trivialFCode F32 FDIV x y
1513 MO_Add F64 -> trivialFCode F64 FADD x y
1514 MO_Sub F64 -> trivialFCode F64 FSUB x y
1515 MO_Mul F64 -> trivialFCode F64 FMUL x y
1516 MO_S_Quot F64 -> trivialFCode F64 FDIV x y
1518 MO_And rep -> trivialCode rep (AND False) x y
1519 MO_Or rep -> trivialCode rep (OR False) x y
1520 MO_Xor rep -> trivialCode rep (XOR False) x y
1522 MO_Mul rep -> trivialCode rep (SMUL False) x y
1524 MO_Shl rep -> trivialCode rep SLL x y
1525 MO_U_Shr rep -> trivialCode rep SRL x y
1526 MO_S_Shr rep -> trivialCode rep SRA x y
1529 MO_F32_Pwr -> getRegister (StCall (Left (fsLit "pow")) CCallConv F64
1530 [promote x, promote y])
1531 where promote x = CmmMachOp MO_F32_to_Dbl [x]
1532 MO_F64_Pwr -> getRegister (StCall (Left (fsLit "pow")) CCallConv F64
1535 other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
1537 --idiv fn x y = getRegister (StCall (Left fn) CCallConv I32 [x, y])
1539 --------------------
1540 imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1541 imulMayOflo rep a b = do
1542 (a_reg, a_code) <- getSomeReg a
1543 (b_reg, b_code) <- getSomeReg b
1544 res_lo <- getNewRegNat I32
1545 res_hi <- getNewRegNat I32
1547 shift_amt = case rep of
1550 _ -> panic "shift_amt"
1551 code dst = a_code `appOL` b_code `appOL`
1553 SMUL False a_reg (RIReg b_reg) res_lo,
1555 SRA res_lo (RIImm (ImmInt shift_amt)) res_lo,
1556 SUB False False res_lo (RIReg res_hi) dst
1558 return (Any I32 code)
1560 getRegister (CmmLoad mem pk) = do
1561 Amode src code <- getAmode mem
1563 code__2 dst = code `snocOL` LD pk src dst
1564 return (Any pk code__2)
1566 getRegister (CmmLit (CmmInt i _))
1569 src = ImmInt (fromInteger i)
1570 code dst = unitOL (OR False g0 (RIImm src) dst)
1572 return (Any I32 code)
1574 getRegister (CmmLit lit)
1575 = let rep = cmmLitRep lit
1579 OR False dst (RIImm (LO imm)) dst]
1580 in return (Any I32 code)
1582 #endif /* sparc_TARGET_ARCH */
1584 #if powerpc_TARGET_ARCH
1585 getRegister (CmmLoad mem pk)
1588 Amode addr addr_code <- getAmode mem
1589 let code dst = ASSERT((regClass dst == RcDouble) == isFloatingRep pk)
1590 addr_code `snocOL` LD pk dst addr
1591 return (Any pk code)
1593 -- catch simple cases of zero- or sign-extended load
1594 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad mem _]) = do
1595 Amode addr addr_code <- getAmode mem
1596 return (Any I32 (\dst -> addr_code `snocOL` LD I8 dst addr))
1598 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
1600 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad mem _]) = do
1601 Amode addr addr_code <- getAmode mem
1602 return (Any I32 (\dst -> addr_code `snocOL` LD I16 dst addr))
1604 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad mem _]) = do
1605 Amode addr addr_code <- getAmode mem
1606 return (Any I32 (\dst -> addr_code `snocOL` LA I16 dst addr))
1608 getRegister (CmmMachOp mop [x]) -- unary MachOps
1610 MO_Not rep -> trivialUCode rep NOT x
1612 MO_S_Conv F64 F32 -> trivialUCode F32 FRSP x
1613 MO_S_Conv F32 F64 -> conversionNop F64 x
1616 | from == to -> conversionNop to x
1617 | isFloatingRep from -> coerceFP2Int from to x
1618 | isFloatingRep to -> coerceInt2FP from to x
1620 -- narrowing is a nop: we treat the high bits as undefined
1621 MO_S_Conv I32 to -> conversionNop to x
1622 MO_S_Conv I16 I8 -> conversionNop I8 x
1623 MO_S_Conv I8 to -> trivialUCode to (EXTS I8) x
1624 MO_S_Conv I16 to -> trivialUCode to (EXTS I16) x
1627 | from == to -> conversionNop to x
1628 -- narrowing is a nop: we treat the high bits as undefined
1629 MO_U_Conv I32 to -> conversionNop to x
1630 MO_U_Conv I16 I8 -> conversionNop I8 x
1631 MO_U_Conv I8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 I32))
1632 MO_U_Conv I16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 I32))
1634 MO_S_Neg F32 -> trivialUCode F32 FNEG x
1635 MO_S_Neg F64 -> trivialUCode F64 FNEG x
1636 MO_S_Neg rep -> trivialUCode rep NEG x
1639 conversionNop new_rep expr
1640 = do e_code <- getRegister expr
1641 return (swizzleRegisterRep e_code new_rep)
1643 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1645 MO_Eq F32 -> condFltReg EQQ x y
1646 MO_Ne F32 -> condFltReg NE x y
1648 MO_S_Gt F32 -> condFltReg GTT x y
1649 MO_S_Ge F32 -> condFltReg GE x y
1650 MO_S_Lt F32 -> condFltReg LTT x y
1651 MO_S_Le F32 -> condFltReg LE x y
1653 MO_Eq F64 -> condFltReg EQQ x y
1654 MO_Ne F64 -> condFltReg NE x y
1656 MO_S_Gt F64 -> condFltReg GTT x y
1657 MO_S_Ge F64 -> condFltReg GE x y
1658 MO_S_Lt F64 -> condFltReg LTT x y
1659 MO_S_Le F64 -> condFltReg LE x y
1661 MO_Eq rep -> condIntReg EQQ (extendUExpr rep x) (extendUExpr rep y)
1662 MO_Ne rep -> condIntReg NE (extendUExpr rep x) (extendUExpr rep y)
1664 MO_S_Gt rep -> condIntReg GTT (extendSExpr rep x) (extendSExpr rep y)
1665 MO_S_Ge rep -> condIntReg GE (extendSExpr rep x) (extendSExpr rep y)
1666 MO_S_Lt rep -> condIntReg LTT (extendSExpr rep x) (extendSExpr rep y)
1667 MO_S_Le rep -> condIntReg LE (extendSExpr rep x) (extendSExpr rep y)
1669 MO_U_Gt rep -> condIntReg GU (extendUExpr rep x) (extendUExpr rep y)
1670 MO_U_Ge rep -> condIntReg GEU (extendUExpr rep x) (extendUExpr rep y)
1671 MO_U_Lt rep -> condIntReg LU (extendUExpr rep x) (extendUExpr rep y)
1672 MO_U_Le rep -> condIntReg LEU (extendUExpr rep x) (extendUExpr rep y)
1674 MO_Add F32 -> trivialCodeNoImm F32 (FADD F32) x y
1675 MO_Sub F32 -> trivialCodeNoImm F32 (FSUB F32) x y
1676 MO_Mul F32 -> trivialCodeNoImm F32 (FMUL F32) x y
1677 MO_S_Quot F32 -> trivialCodeNoImm F32 (FDIV F32) x y
1679 MO_Add F64 -> trivialCodeNoImm F64 (FADD F64) x y
1680 MO_Sub F64 -> trivialCodeNoImm F64 (FSUB F64) x y
1681 MO_Mul F64 -> trivialCodeNoImm F64 (FMUL F64) x y
1682 MO_S_Quot F64 -> trivialCodeNoImm F64 (FDIV F64) x y
1684 -- optimize addition with 32-bit immediate
1688 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate I32 True (-imm)
1689 -> trivialCode I32 True ADD x (CmmLit $ CmmInt imm immrep)
1692 (src, srcCode) <- getSomeReg x
1693 let imm = litToImm lit
1694 code dst = srcCode `appOL` toOL [
1695 ADDIS dst src (HA imm),
1696 ADD dst dst (RIImm (LO imm))
1698 return (Any I32 code)
1699 _ -> trivialCode I32 True ADD x y
1701 MO_Add rep -> trivialCode rep True ADD x y
1703 case y of -- subfi ('substract from' with immediate) doesn't exist
1704 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
1705 -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
1706 _ -> trivialCodeNoImm rep SUBF y x
1708 MO_Mul rep -> trivialCode rep True MULLW x y
1710 MO_S_MulMayOflo I32 -> trivialCodeNoImm I32 MULLW_MayOflo x y
1712 MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= I32): not implemented"
1713 MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
1715 MO_S_Quot rep -> trivialCodeNoImm rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1716 MO_U_Quot rep -> trivialCodeNoImm rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1718 MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1719 MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1721 MO_And rep -> trivialCode rep False AND x y
1722 MO_Or rep -> trivialCode rep False OR x y
1723 MO_Xor rep -> trivialCode rep False XOR x y
1725 MO_Shl rep -> trivialCode rep False SLW x y
1726 MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
1727 MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
1729 getRegister (CmmLit (CmmInt i rep))
1730 | Just imm <- makeImmediate rep True i
1732 code dst = unitOL (LI dst imm)
1734 return (Any rep code)
1736 getRegister (CmmLit (CmmFloat f frep)) = do
1737 lbl <- getNewLabelNat
1738 dflags <- getDynFlagsNat
1739 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
1740 Amode addr addr_code <- getAmode dynRef
1742 LDATA ReadOnlyData [CmmDataLabel lbl,
1743 CmmStaticLit (CmmFloat f frep)]
1744 `consOL` (addr_code `snocOL` LD frep dst addr)
1745 return (Any frep code)
1747 getRegister (CmmLit lit)
1748 = let rep = cmmLitRep lit
1752 ADD dst dst (RIImm (LO imm))
1754 in return (Any rep code)
1756 getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
1758 -- extend?Rep: wrap integer expression of type rep
1759 -- in a conversion to I32
1760 extendSExpr I32 x = x
1761 extendSExpr rep x = CmmMachOp (MO_S_Conv rep I32) [x]
1762 extendUExpr I32 x = x
1763 extendUExpr rep x = CmmMachOp (MO_U_Conv rep I32) [x]
1765 #endif /* powerpc_TARGET_ARCH */
1768 -- -----------------------------------------------------------------------------
1769 -- The 'Amode' type: Memory addressing modes passed up the tree.
1771 data Amode = Amode AddrMode InstrBlock
1774 Now, given a tree (the argument to an CmmLoad) that references memory,
1775 produce a suitable addressing mode.
1777 A Rule of the Game (tm) for Amodes: use of the addr bit must
1778 immediately follow use of the code part, since the code part puts
1779 values in registers which the addr then refers to. So you can't put
1780 anything in between, lest it overwrite some of those registers. If
1781 you need to do some other computation between the code part and use of
1782 the addr bit, first store the effective address from the amode in a
1783 temporary, then do the other computation, and then use the temporary:
1787 ... other computation ...
1791 getAmode :: CmmExpr -> NatM Amode
1792 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
1794 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1796 #if alpha_TARGET_ARCH
1798 getAmode (StPrim IntSubOp [x, StInt i])
1799 = getNewRegNat PtrRep `thenNat` \ tmp ->
1800 getRegister x `thenNat` \ register ->
1802 code = registerCode register tmp
1803 reg = registerName register tmp
1804 off = ImmInt (-(fromInteger i))
1806 return (Amode (AddrRegImm reg off) code)
1808 getAmode (StPrim IntAddOp [x, StInt i])
1809 = getNewRegNat PtrRep `thenNat` \ tmp ->
1810 getRegister x `thenNat` \ register ->
1812 code = registerCode register tmp
1813 reg = registerName register tmp
1814 off = ImmInt (fromInteger i)
1816 return (Amode (AddrRegImm reg off) code)
1820 = return (Amode (AddrImm imm__2) id)
1823 imm__2 = case imm of Just x -> x
1826 = getNewRegNat PtrRep `thenNat` \ tmp ->
1827 getRegister other `thenNat` \ register ->
1829 code = registerCode register tmp
1830 reg = registerName register tmp
1832 return (Amode (AddrReg reg) code)
1834 #endif /* alpha_TARGET_ARCH */
1836 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1838 #if x86_64_TARGET_ARCH
1840 getAmode (CmmMachOp (MO_Add I64) [CmmReg (CmmGlobal PicBaseReg),
1841 CmmLit displacement])
1842 = return $ Amode (ripRel (litToImm displacement)) nilOL
1846 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1848 -- This is all just ridiculous, since it carefully undoes
1849 -- what mangleIndexTree has just done.
1850 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
1851 | not (is64BitLit lit)
1852 -- ASSERT(rep == I32)???
1853 = do (x_reg, x_code) <- getSomeReg x
1854 let off = ImmInt (-(fromInteger i))
1855 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1857 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
1858 | not (is64BitLit lit)
1859 -- ASSERT(rep == I32)???
1860 = do (x_reg, x_code) <- getSomeReg x
1861 let off = ImmInt (fromInteger i)
1862 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1864 -- Turn (lit1 << n + lit2) into (lit2 + lit1 << n) so it will be
1865 -- recognised by the next rule.
1866 getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1868 = getAmode (CmmMachOp (MO_Add rep) [b,a])
1870 getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
1871 [y, CmmLit (CmmInt shift _)]])
1872 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1873 = x86_complex_amode x y shift 0
1875 getAmode (CmmMachOp (MO_Add rep)
1876 [x, CmmMachOp (MO_Add _)
1877 [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)],
1878 CmmLit (CmmInt offset _)]])
1879 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1880 && not (is64BitInteger offset)
1881 = x86_complex_amode x y shift offset
1883 getAmode (CmmMachOp (MO_Add rep) [x,y])
1884 = x86_complex_amode x y 0 0
1886 getAmode (CmmLit lit) | not (is64BitLit lit)
1887 = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1890 (reg,code) <- getSomeReg expr
1891 return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
1894 x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
1895 x86_complex_amode base index shift offset
1896 = do (x_reg, x_code) <- getNonClobberedReg base
1897 -- x must be in a temp, because it has to stay live over y_code
1898 -- we could compre x_reg and y_reg and do something better here...
1899 (y_reg, y_code) <- getSomeReg index
1901 code = x_code `appOL` y_code
1902 base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
1903 return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset)))
1906 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1908 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1910 #if sparc_TARGET_ARCH
1912 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)])
1915 (reg, code) <- getSomeReg x
1917 off = ImmInt (-(fromInteger i))
1918 return (Amode (AddrRegImm reg off) code)
1921 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)])
1924 (reg, code) <- getSomeReg x
1926 off = ImmInt (fromInteger i)
1927 return (Amode (AddrRegImm reg off) code)
1929 getAmode (CmmMachOp (MO_Add rep) [x, y])
1931 (regX, codeX) <- getSomeReg x
1932 (regY, codeY) <- getSomeReg y
1934 code = codeX `appOL` codeY
1935 return (Amode (AddrRegReg regX regY) code)
1937 -- XXX Is this same as "leaf" in Stix?
1938 getAmode (CmmLit lit)
1940 tmp <- getNewRegNat I32
1942 code = unitOL (SETHI (HI imm__2) tmp)
1943 return (Amode (AddrRegImm tmp (LO imm__2)) code)
1945 imm__2 = litToImm lit
1949 (reg, code) <- getSomeReg other
1952 return (Amode (AddrRegImm reg off) code)
1954 #endif /* sparc_TARGET_ARCH */
1956 #ifdef powerpc_TARGET_ARCH
1957 getAmode (CmmMachOp (MO_Sub I32) [x, CmmLit (CmmInt i _)])
1958 | Just off <- makeImmediate I32 True (-i)
1960 (reg, code) <- getSomeReg x
1961 return (Amode (AddrRegImm reg off) code)
1964 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit (CmmInt i _)])
1965 | Just off <- makeImmediate I32 True i
1967 (reg, code) <- getSomeReg x
1968 return (Amode (AddrRegImm reg off) code)
1970 -- optimize addition with 32-bit immediate
1972 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit lit])
1974 tmp <- getNewRegNat I32
1975 (src, srcCode) <- getSomeReg x
1976 let imm = litToImm lit
1977 code = srcCode `snocOL` ADDIS tmp src (HA imm)
1978 return (Amode (AddrRegImm tmp (LO imm)) code)
1980 getAmode (CmmLit lit)
1982 tmp <- getNewRegNat I32
1983 let imm = litToImm lit
1984 code = unitOL (LIS tmp (HA imm))
1985 return (Amode (AddrRegImm tmp (LO imm)) code)
1987 getAmode (CmmMachOp (MO_Add I32) [x, y])
1989 (regX, codeX) <- getSomeReg x
1990 (regY, codeY) <- getSomeReg y
1991 return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
1995 (reg, code) <- getSomeReg other
1998 return (Amode (AddrRegImm reg off) code)
1999 #endif /* powerpc_TARGET_ARCH */
2001 -- -----------------------------------------------------------------------------
2002 -- getOperand: sometimes any operand will do.
2004 -- getNonClobberedOperand: the value of the operand will remain valid across
2005 -- the computation of an arbitrary expression, unless the expression
2006 -- is computed directly into a register which the operand refers to
2007 -- (see trivialCode where this function is used for an example).
2009 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2011 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2012 #if x86_64_TARGET_ARCH
2013 getNonClobberedOperand (CmmLit lit)
2014 | isSuitableFloatingPointLit lit = do
2015 lbl <- getNewLabelNat
2016 let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
2018 return (OpAddr (ripRel (ImmCLbl lbl)), code)
2020 getNonClobberedOperand (CmmLit lit)
2021 | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) =
2022 return (OpImm (litToImm lit), nilOL)
2023 getNonClobberedOperand (CmmLoad mem pk)
2024 | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2025 Amode src mem_code <- getAmode mem
2027 if (amodeCouldBeClobbered src)
2029 tmp <- getNewRegNat wordRep
2030 return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
2031 unitOL (LEA I32 (OpAddr src) (OpReg tmp)))
2034 return (OpAddr src', save_code `appOL` mem_code)
2035 getNonClobberedOperand e = do
2036 (reg, code) <- getNonClobberedReg e
2037 return (OpReg reg, code)
2039 amodeCouldBeClobbered :: AddrMode -> Bool
2040 amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
2042 regClobbered (RealReg rr) = isFastTrue (freeReg rr)
2043 regClobbered _ = False
2045 -- getOperand: the operand is not required to remain valid across the
2046 -- computation of an arbitrary expression.
2047 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2048 #if x86_64_TARGET_ARCH
2049 getOperand (CmmLit lit)
2050 | isSuitableFloatingPointLit lit = do
2051 lbl <- getNewLabelNat
2052 let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
2054 return (OpAddr (ripRel (ImmCLbl lbl)), code)
2056 getOperand (CmmLit lit)
2057 | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) = do
2058 return (OpImm (litToImm lit), nilOL)
2059 getOperand (CmmLoad mem pk)
2060 | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2061 Amode src mem_code <- getAmode mem
2062 return (OpAddr src, mem_code)
2064 (reg, code) <- getSomeReg e
2065 return (OpReg reg, code)
2067 isOperand :: CmmExpr -> Bool
2068 isOperand (CmmLoad _ _) = True
2069 isOperand (CmmLit lit) = not (is64BitLit lit)
2070 || isSuitableFloatingPointLit lit
2073 -- if we want a floating-point literal as an operand, we can
2074 -- use it directly from memory. However, if the literal is
2075 -- zero, we're better off generating it into a register using
2077 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
2078 isSuitableFloatingPointLit _ = False
2080 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
2081 getRegOrMem (CmmLoad mem pk)
2082 | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2083 Amode src mem_code <- getAmode mem
2084 return (OpAddr src, mem_code)
2086 (reg, code) <- getNonClobberedReg e
2087 return (OpReg reg, code)
2089 #if x86_64_TARGET_ARCH
2090 is64BitLit (CmmInt i I64) = is64BitInteger i
2091 -- assume that labels are in the range 0-2^31-1: this assumes the
2092 -- small memory model (see gcc docs, -mcmodel=small).
2094 is64BitLit x = False
2097 is64BitInteger :: Integer -> Bool
2098 is64BitInteger i = i64 > 0x7fffffff || i64 < -0x80000000
2099 where i64 = fromIntegral i :: Int64
2100 -- a CmmInt is intended to be truncated to the appropriate
2101 -- number of bits, so here we truncate it to Int64. This is
2102 -- important because e.g. -1 as a CmmInt might be either
2103 -- -1 or 18446744073709551615.
2105 -- -----------------------------------------------------------------------------
2106 -- The 'CondCode' type: Condition codes passed up the tree.
2108 data CondCode = CondCode Bool Cond InstrBlock
2110 -- Set up a condition code for a conditional branch.
2112 getCondCode :: CmmExpr -> NatM CondCode
2114 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2116 #if alpha_TARGET_ARCH
2117 getCondCode = panic "MachCode.getCondCode: not on Alphas"
2118 #endif /* alpha_TARGET_ARCH */
2120 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2122 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
2123 -- yes, they really do seem to want exactly the same!
2125 getCondCode (CmmMachOp mop [x, y])
2128 MO_Eq F32 -> condFltCode EQQ x y
2129 MO_Ne F32 -> condFltCode NE x y
2131 MO_S_Gt F32 -> condFltCode GTT x y
2132 MO_S_Ge F32 -> condFltCode GE x y
2133 MO_S_Lt F32 -> condFltCode LTT x y
2134 MO_S_Le F32 -> condFltCode LE x y
2136 MO_Eq F64 -> condFltCode EQQ x y
2137 MO_Ne F64 -> condFltCode NE x y
2139 MO_S_Gt F64 -> condFltCode GTT x y
2140 MO_S_Ge F64 -> condFltCode GE x y
2141 MO_S_Lt F64 -> condFltCode LTT x y
2142 MO_S_Le F64 -> condFltCode LE x y
2144 MO_Eq rep -> condIntCode EQQ x y
2145 MO_Ne rep -> condIntCode NE x y
2147 MO_S_Gt rep -> condIntCode GTT x y
2148 MO_S_Ge rep -> condIntCode GE x y
2149 MO_S_Lt rep -> condIntCode LTT x y
2150 MO_S_Le rep -> condIntCode LE x y
2152 MO_U_Gt rep -> condIntCode GU x y
2153 MO_U_Ge rep -> condIntCode GEU x y
2154 MO_U_Lt rep -> condIntCode LU x y
2155 MO_U_Le rep -> condIntCode LEU x y
2157 other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y]))
2159 getCondCode other = pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
2161 #elif powerpc_TARGET_ARCH
2163 -- almost the same as everywhere else - but we need to
2164 -- extend small integers to 32 bit first
2166 getCondCode (CmmMachOp mop [x, y])
2168 MO_Eq F32 -> condFltCode EQQ x y
2169 MO_Ne F32 -> condFltCode NE x y
2171 MO_S_Gt F32 -> condFltCode GTT x y
2172 MO_S_Ge F32 -> condFltCode GE x y
2173 MO_S_Lt F32 -> condFltCode LTT x y
2174 MO_S_Le F32 -> condFltCode LE x y
2176 MO_Eq F64 -> condFltCode EQQ x y
2177 MO_Ne F64 -> condFltCode NE x y
2179 MO_S_Gt F64 -> condFltCode GTT x y
2180 MO_S_Ge F64 -> condFltCode GE x y
2181 MO_S_Lt F64 -> condFltCode LTT x y
2182 MO_S_Le F64 -> condFltCode LE x y
2184 MO_Eq rep -> condIntCode EQQ (extendUExpr rep x) (extendUExpr rep y)
2185 MO_Ne rep -> condIntCode NE (extendUExpr rep x) (extendUExpr rep y)
2187 MO_S_Gt rep -> condIntCode GTT (extendSExpr rep x) (extendSExpr rep y)
2188 MO_S_Ge rep -> condIntCode GE (extendSExpr rep x) (extendSExpr rep y)
2189 MO_S_Lt rep -> condIntCode LTT (extendSExpr rep x) (extendSExpr rep y)
2190 MO_S_Le rep -> condIntCode LE (extendSExpr rep x) (extendSExpr rep y)
2192 MO_U_Gt rep -> condIntCode GU (extendUExpr rep x) (extendUExpr rep y)
2193 MO_U_Ge rep -> condIntCode GEU (extendUExpr rep x) (extendUExpr rep y)
2194 MO_U_Lt rep -> condIntCode LU (extendUExpr rep x) (extendUExpr rep y)
2195 MO_U_Le rep -> condIntCode LEU (extendUExpr rep x) (extendUExpr rep y)
2197 other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
2199 getCondCode other = panic "getCondCode(2)(powerpc)"
2205 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
2206 -- passed back up the tree.
2208 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
2210 #if alpha_TARGET_ARCH
2211 condIntCode = panic "MachCode.condIntCode: not on Alphas"
2212 condFltCode = panic "MachCode.condFltCode: not on Alphas"
2213 #endif /* alpha_TARGET_ARCH */
2215 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2216 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2218 -- memory vs immediate
2219 condIntCode cond (CmmLoad x pk) (CmmLit lit) | not (is64BitLit lit) = do
2220 Amode x_addr x_code <- getAmode x
2223 code = x_code `snocOL`
2224 CMP pk (OpImm imm) (OpAddr x_addr)
2226 return (CondCode False cond code)
2228 -- anything vs zero, using a mask
2229 -- TODO: Add some sanity checking!!!!
2230 condIntCode cond (CmmMachOp (MO_And rep) [x,o2]) (CmmLit (CmmInt 0 pk))
2231 | (CmmLit (CmmInt mask pk2)) <- o2
2233 (x_reg, x_code) <- getSomeReg x
2235 code = x_code `snocOL`
2236 TEST pk (OpImm (ImmInteger mask)) (OpReg x_reg)
2238 return (CondCode False cond code)
2241 condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
2242 (x_reg, x_code) <- getSomeReg x
2244 code = x_code `snocOL`
2245 TEST pk (OpReg x_reg) (OpReg x_reg)
2247 return (CondCode False cond code)
2249 -- anything vs operand
2250 condIntCode cond x y | isOperand y = do
2251 (x_reg, x_code) <- getNonClobberedReg x
2252 (y_op, y_code) <- getOperand y
2254 code = x_code `appOL` y_code `snocOL`
2255 CMP (cmmExprRep x) y_op (OpReg x_reg)
2257 return (CondCode False cond code)
2259 -- anything vs anything
2260 condIntCode cond x y = do
2261 (y_reg, y_code) <- getNonClobberedReg y
2262 (x_op, x_code) <- getRegOrMem x
2264 code = y_code `appOL`
2266 CMP (cmmExprRep x) (OpReg y_reg) x_op
2268 return (CondCode False cond code)
2271 #if i386_TARGET_ARCH
2272 condFltCode cond x y
2273 = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
2274 (x_reg, x_code) <- getNonClobberedReg x
2275 (y_reg, y_code) <- getSomeReg y
2277 code = x_code `appOL` y_code `snocOL`
2278 GCMP cond x_reg y_reg
2279 -- The GCMP insn does the test and sets the zero flag if comparable
2280 -- and true. Hence we always supply EQQ as the condition to test.
2281 return (CondCode True EQQ code)
2282 #endif /* i386_TARGET_ARCH */
2284 #if x86_64_TARGET_ARCH
2285 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
2286 -- an operand, but the right must be a reg. We can probably do better
2287 -- than this general case...
2288 condFltCode cond x y = do
2289 (x_reg, x_code) <- getNonClobberedReg x
2290 (y_op, y_code) <- getOperand y
2292 code = x_code `appOL`
2294 CMP (cmmExprRep x) y_op (OpReg x_reg)
2295 -- NB(1): we need to use the unsigned comparison operators on the
2296 -- result of this comparison.
2298 return (CondCode True (condToUnsigned cond) code)
2301 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2303 #if sparc_TARGET_ARCH
2305 condIntCode cond x (CmmLit (CmmInt y rep))
2308 (src1, code) <- getSomeReg x
2310 src2 = ImmInt (fromInteger y)
2311 code' = code `snocOL` SUB False True src1 (RIImm src2) g0
2312 return (CondCode False cond code')
2314 condIntCode cond x y = do
2315 (src1, code1) <- getSomeReg x
2316 (src2, code2) <- getSomeReg y
2318 code__2 = code1 `appOL` code2 `snocOL`
2319 SUB False True src1 (RIReg src2) g0
2320 return (CondCode False cond code__2)
2323 condFltCode cond x y = do
2324 (src1, code1) <- getSomeReg x
2325 (src2, code2) <- getSomeReg y
2326 tmp <- getNewRegNat F64
2328 promote x = FxTOy F32 F64 x tmp
2335 code1 `appOL` code2 `snocOL`
2336 FCMP True pk1 src1 src2
2337 else if pk1 == F32 then
2338 code1 `snocOL` promote src1 `appOL` code2 `snocOL`
2339 FCMP True F64 tmp src2
2341 code1 `appOL` code2 `snocOL` promote src2 `snocOL`
2342 FCMP True F64 src1 tmp
2343 return (CondCode True cond code__2)
2345 #endif /* sparc_TARGET_ARCH */
2347 #if powerpc_TARGET_ARCH
2348 -- ###FIXME: I16 and I8!
2349 condIntCode cond x (CmmLit (CmmInt y rep))
2350 | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
2352 (src1, code) <- getSomeReg x
2354 code' = code `snocOL`
2355 (if condUnsigned cond then CMPL else CMP) I32 src1 (RIImm src2)
2356 return (CondCode False cond code')
2358 condIntCode cond x y = do
2359 (src1, code1) <- getSomeReg x
2360 (src2, code2) <- getSomeReg y
2362 code' = code1 `appOL` code2 `snocOL`
2363 (if condUnsigned cond then CMPL else CMP) I32 src1 (RIReg src2)
2364 return (CondCode False cond code')
2366 condFltCode cond x y = do
2367 (src1, code1) <- getSomeReg x
2368 (src2, code2) <- getSomeReg y
2370 code' = code1 `appOL` code2 `snocOL` FCMP src1 src2
2371 code'' = case cond of -- twiddle CR to handle unordered case
2372 GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
2373 LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
2376 ltbit = 0 ; eqbit = 2 ; gtbit = 1
2377 return (CondCode True cond code'')
2379 #endif /* powerpc_TARGET_ARCH */
2381 -- -----------------------------------------------------------------------------
2382 -- Generating assignments
2384 -- Assignments are really at the heart of the whole code generation
2385 -- business. Almost all top-level nodes of any real importance are
2386 -- assignments, which correspond to loads, stores, or register
2387 -- transfers. If we're really lucky, some of the register transfers
2388 -- will go away, because we can use the destination register to
2389 -- complete the code generation for the right hand side. This only
2390 -- fails when the right hand side is forced into a fixed register
2391 -- (e.g. the result of a call).
2393 assignMem_IntCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2394 assignReg_IntCode :: MachRep -> CmmReg -> CmmExpr -> NatM InstrBlock
2396 assignMem_FltCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2397 assignReg_FltCode :: MachRep -> CmmReg -> CmmExpr -> NatM InstrBlock
2399 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2401 #if alpha_TARGET_ARCH
2403 assignIntCode pk (CmmLoad dst _) src
2404 = getNewRegNat IntRep `thenNat` \ tmp ->
2405 getAmode dst `thenNat` \ amode ->
2406 getRegister src `thenNat` \ register ->
2408 code1 = amodeCode amode []
2409 dst__2 = amodeAddr amode
2410 code2 = registerCode register tmp []
2411 src__2 = registerName register tmp
2412 sz = primRepToSize pk
2413 code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2417 assignIntCode pk dst src
2418 = getRegister dst `thenNat` \ register1 ->
2419 getRegister src `thenNat` \ register2 ->
2421 dst__2 = registerName register1 zeroh
2422 code = registerCode register2 dst__2
2423 src__2 = registerName register2 dst__2
2424 code__2 = if isFixed register2
2425 then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2)
2430 #endif /* alpha_TARGET_ARCH */
2432 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2434 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2436 -- integer assignment to memory
2438 -- specific case of adding/subtracting an integer to a particular address.
2439 -- ToDo: catch other cases where we can use an operation directly on a memory
2441 assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _,
2442 CmmLit (CmmInt i _)])
2443 | addr == addr2, pk /= I64 || not (is64BitInteger i),
2444 Just instr <- check op
2445 = do Amode amode code_addr <- getAmode addr
2446 let code = code_addr `snocOL`
2447 instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode)
2450 check (MO_Add _) = Just ADD
2451 check (MO_Sub _) = Just SUB
2456 assignMem_IntCode pk addr src = do
2457 Amode addr code_addr <- getAmode addr
2458 (code_src, op_src) <- get_op_RI src
2460 code = code_src `appOL`
2462 MOV pk op_src (OpAddr addr)
2463 -- NOTE: op_src is stable, so it will still be valid
2464 -- after code_addr. This may involve the introduction
2465 -- of an extra MOV to a temporary register, but we hope
2466 -- the register allocator will get rid of it.
2470 get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand) -- code, operator
2471 get_op_RI (CmmLit lit) | not (is64BitLit lit)
2472 = return (nilOL, OpImm (litToImm lit))
2474 = do (reg,code) <- getNonClobberedReg op
2475 return (code, OpReg reg)
2478 -- Assign; dst is a reg, rhs is mem
2479 assignReg_IntCode pk reg (CmmLoad src _) = do
2480 load_code <- intLoadCode (MOV pk) src
2481 return (load_code (getRegisterReg reg))
2483 -- dst is a reg, but src could be anything
2484 assignReg_IntCode pk reg src = do
2485 code <- getAnyReg src
2486 return (code (getRegisterReg reg))
2488 #endif /* i386_TARGET_ARCH */
2490 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2492 #if sparc_TARGET_ARCH
2494 assignMem_IntCode pk addr src = do
2495 (srcReg, code) <- getSomeReg src
2496 Amode dstAddr addr_code <- getAmode addr
2497 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2499 assignReg_IntCode pk reg src = do
2500 r <- getRegister src
2502 Any _ code -> code dst
2503 Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg dst) freg
2505 dst = getRegisterReg reg
2508 #endif /* sparc_TARGET_ARCH */
2510 #if powerpc_TARGET_ARCH
2512 assignMem_IntCode pk addr src = do
2513 (srcReg, code) <- getSomeReg src
2514 Amode dstAddr addr_code <- getAmode addr
2515 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2517 -- dst is a reg, but src could be anything
2518 assignReg_IntCode pk reg src
2520 r <- getRegister src
2522 Any _ code -> code dst
2523 Fixed _ freg fcode -> fcode `snocOL` MR dst freg
2525 dst = getRegisterReg reg
2527 #endif /* powerpc_TARGET_ARCH */
2530 -- -----------------------------------------------------------------------------
2531 -- Floating-point assignments
2533 #if alpha_TARGET_ARCH
2535 assignFltCode pk (CmmLoad dst _) src
2536 = getNewRegNat pk `thenNat` \ tmp ->
2537 getAmode dst `thenNat` \ amode ->
2538 getRegister src `thenNat` \ register ->
2540 code1 = amodeCode amode []
2541 dst__2 = amodeAddr amode
2542 code2 = registerCode register tmp []
2543 src__2 = registerName register tmp
2544 sz = primRepToSize pk
2545 code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2549 assignFltCode pk dst src
2550 = getRegister dst `thenNat` \ register1 ->
2551 getRegister src `thenNat` \ register2 ->
2553 dst__2 = registerName register1 zeroh
2554 code = registerCode register2 dst__2
2555 src__2 = registerName register2 dst__2
2556 code__2 = if isFixed register2
2557 then code . mkSeqInstr (FMOV src__2 dst__2)
2562 #endif /* alpha_TARGET_ARCH */
2564 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2566 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2568 -- Floating point assignment to memory
2569 assignMem_FltCode pk addr src = do
2570 (src_reg, src_code) <- getNonClobberedReg src
2571 Amode addr addr_code <- getAmode addr
2573 code = src_code `appOL`
2575 IF_ARCH_i386(GST pk src_reg addr,
2576 MOV pk (OpReg src_reg) (OpAddr addr))
2579 -- Floating point assignment to a register/temporary
2580 assignReg_FltCode pk reg src = do
2581 src_code <- getAnyReg src
2582 return (src_code (getRegisterReg reg))
2584 #endif /* i386_TARGET_ARCH */
2586 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2588 #if sparc_TARGET_ARCH
2590 -- Floating point assignment to memory
2591 assignMem_FltCode pk addr src = do
2592 Amode dst__2 code1 <- getAmode addr
2593 (src__2, code2) <- getSomeReg src
2594 tmp1 <- getNewRegNat pk
2596 pk__2 = cmmExprRep src
2597 code__2 = code1 `appOL` code2 `appOL`
2599 then unitOL (ST pk src__2 dst__2)
2600 else toOL [FxTOy pk__2 pk src__2 tmp1, ST pk tmp1 dst__2]
2603 -- Floating point assignment to a register/temporary
2604 -- ToDo: Verify correctness
2605 assignReg_FltCode pk reg src = do
2606 r <- getRegister src
2607 v1 <- getNewRegNat pk
2609 Any _ code -> code dst
2610 Fixed _ freg fcode -> fcode `snocOL` FMOV pk freg v1
2612 dst = getRegisterReg reg
2614 #endif /* sparc_TARGET_ARCH */
2616 #if powerpc_TARGET_ARCH
2619 assignMem_FltCode = assignMem_IntCode
2620 assignReg_FltCode = assignReg_IntCode
2622 #endif /* powerpc_TARGET_ARCH */
2625 -- -----------------------------------------------------------------------------
2626 -- Generating an non-local jump
2628 -- (If applicable) Do not fill the delay slots here; you will confuse the
2629 -- register allocator.
2631 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
2633 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2635 #if alpha_TARGET_ARCH
2637 genJump (CmmLabel lbl)
2638 | isAsmTemp lbl = returnInstr (BR target)
2639 | otherwise = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0]
2641 target = ImmCLbl lbl
2644 = getRegister tree `thenNat` \ register ->
2645 getNewRegNat PtrRep `thenNat` \ tmp ->
2647 dst = registerName register pv
2648 code = registerCode register pv
2649 target = registerName register pv
2651 if isFixed register then
2652 returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0]
2654 return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0))
2656 #endif /* alpha_TARGET_ARCH */
2658 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2660 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2662 genJump (CmmLoad mem pk) = do
2663 Amode target code <- getAmode mem
2664 return (code `snocOL` JMP (OpAddr target))
2666 genJump (CmmLit lit) = do
2667 return (unitOL (JMP (OpImm (litToImm lit))))
2670 (reg,code) <- getSomeReg expr
2671 return (code `snocOL` JMP (OpReg reg))
2673 #endif /* i386_TARGET_ARCH */
2675 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2677 #if sparc_TARGET_ARCH
2679 genJump (CmmLit (CmmLabel lbl))
2680 = return (toOL [CALL (Left target) 0 True, NOP])
2682 target = ImmCLbl lbl
2686 (target, code) <- getSomeReg tree
2687 return (code `snocOL` JMP (AddrRegReg target g0) `snocOL` NOP)
2689 #endif /* sparc_TARGET_ARCH */
2691 #if powerpc_TARGET_ARCH
2692 genJump (CmmLit (CmmLabel lbl))
2693 = return (unitOL $ JMP lbl)
2697 (target,code) <- getSomeReg tree
2698 return (code `snocOL` MTCTR target `snocOL` BCTR [])
2699 #endif /* powerpc_TARGET_ARCH */
2702 -- -----------------------------------------------------------------------------
2703 -- Unconditional branches
2705 genBranch :: BlockId -> NatM InstrBlock
2707 genBranch = return . toOL . mkBranchInstr
2709 -- -----------------------------------------------------------------------------
2710 -- Conditional jumps
2713 Conditional jumps are always to local labels, so we can use branch
2714 instructions. We peek at the arguments to decide what kind of
2717 ALPHA: For comparisons with 0, we're laughing, because we can just do
2718 the desired conditional branch.
2720 I386: First, we have to ensure that the condition
2721 codes are set according to the supplied comparison operation.
2723 SPARC: First, we have to ensure that the condition codes are set
2724 according to the supplied comparison operation. We generate slightly
2725 different code for floating point comparisons, because a floating
2726 point operation cannot directly precede a @BF@. We assume the worst
2727 and fill that slot with a @NOP@.
2729 SPARC: Do not fill the delay slots here; you will confuse the register
2735 :: BlockId -- the branch target
2736 -> CmmExpr -- the condition on which to branch
2739 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2741 #if alpha_TARGET_ARCH
2743 genCondJump id (StPrim op [x, StInt 0])
2744 = getRegister x `thenNat` \ register ->
2745 getNewRegNat (registerRep register)
2748 code = registerCode register tmp
2749 value = registerName register tmp
2750 pk = registerRep register
2751 target = ImmCLbl lbl
2753 returnSeq code [BI (cmpOp op) value target]
2755 cmpOp CharGtOp = GTT
2757 cmpOp CharEqOp = EQQ
2759 cmpOp CharLtOp = LTT
2768 cmpOp WordGeOp = ALWAYS
2769 cmpOp WordEqOp = EQQ
2771 cmpOp WordLtOp = NEVER
2772 cmpOp WordLeOp = EQQ
2774 cmpOp AddrGeOp = ALWAYS
2775 cmpOp AddrEqOp = EQQ
2777 cmpOp AddrLtOp = NEVER
2778 cmpOp AddrLeOp = EQQ
2780 genCondJump lbl (StPrim op [x, StDouble 0.0])
2781 = getRegister x `thenNat` \ register ->
2782 getNewRegNat (registerRep register)
2785 code = registerCode register tmp
2786 value = registerName register tmp
2787 pk = registerRep register
2788 target = ImmCLbl lbl
2790 return (code . mkSeqInstr (BF (cmpOp op) value target))
2792 cmpOp FloatGtOp = GTT
2793 cmpOp FloatGeOp = GE
2794 cmpOp FloatEqOp = EQQ
2795 cmpOp FloatNeOp = NE
2796 cmpOp FloatLtOp = LTT
2797 cmpOp FloatLeOp = LE
2798 cmpOp DoubleGtOp = GTT
2799 cmpOp DoubleGeOp = GE
2800 cmpOp DoubleEqOp = EQQ
2801 cmpOp DoubleNeOp = NE
2802 cmpOp DoubleLtOp = LTT
2803 cmpOp DoubleLeOp = LE
2805 genCondJump lbl (StPrim op [x, y])
2807 = trivialFCode pr instr x y `thenNat` \ register ->
2808 getNewRegNat F64 `thenNat` \ tmp ->
2810 code = registerCode register tmp
2811 result = registerName register tmp
2812 target = ImmCLbl lbl
2814 return (code . mkSeqInstr (BF cond result target))
2816 pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
2818 fltCmpOp op = case op of
2832 (instr, cond) = case op of
2833 FloatGtOp -> (FCMP TF LE, EQQ)
2834 FloatGeOp -> (FCMP TF LTT, EQQ)
2835 FloatEqOp -> (FCMP TF EQQ, NE)
2836 FloatNeOp -> (FCMP TF EQQ, EQQ)
2837 FloatLtOp -> (FCMP TF LTT, NE)
2838 FloatLeOp -> (FCMP TF LE, NE)
2839 DoubleGtOp -> (FCMP TF LE, EQQ)
2840 DoubleGeOp -> (FCMP TF LTT, EQQ)
2841 DoubleEqOp -> (FCMP TF EQQ, NE)
2842 DoubleNeOp -> (FCMP TF EQQ, EQQ)
2843 DoubleLtOp -> (FCMP TF LTT, NE)
2844 DoubleLeOp -> (FCMP TF LE, NE)
2846 genCondJump lbl (StPrim op [x, y])
2847 = trivialCode instr x y `thenNat` \ register ->
2848 getNewRegNat IntRep `thenNat` \ tmp ->
2850 code = registerCode register tmp
2851 result = registerName register tmp
2852 target = ImmCLbl lbl
2854 return (code . mkSeqInstr (BI cond result target))
2856 (instr, cond) = case op of
2857 CharGtOp -> (CMP LE, EQQ)
2858 CharGeOp -> (CMP LTT, EQQ)
2859 CharEqOp -> (CMP EQQ, NE)
2860 CharNeOp -> (CMP EQQ, EQQ)
2861 CharLtOp -> (CMP LTT, NE)
2862 CharLeOp -> (CMP LE, NE)
2863 IntGtOp -> (CMP LE, EQQ)
2864 IntGeOp -> (CMP LTT, EQQ)
2865 IntEqOp -> (CMP EQQ, NE)
2866 IntNeOp -> (CMP EQQ, EQQ)
2867 IntLtOp -> (CMP LTT, NE)
2868 IntLeOp -> (CMP LE, NE)
2869 WordGtOp -> (CMP ULE, EQQ)
2870 WordGeOp -> (CMP ULT, EQQ)
2871 WordEqOp -> (CMP EQQ, NE)
2872 WordNeOp -> (CMP EQQ, EQQ)
2873 WordLtOp -> (CMP ULT, NE)
2874 WordLeOp -> (CMP ULE, NE)
2875 AddrGtOp -> (CMP ULE, EQQ)
2876 AddrGeOp -> (CMP ULT, EQQ)
2877 AddrEqOp -> (CMP EQQ, NE)
2878 AddrNeOp -> (CMP EQQ, EQQ)
2879 AddrLtOp -> (CMP ULT, NE)
2880 AddrLeOp -> (CMP ULE, NE)
2882 #endif /* alpha_TARGET_ARCH */
2884 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2886 #if i386_TARGET_ARCH
2888 genCondJump id bool = do
2889 CondCode _ cond code <- getCondCode bool
2890 return (code `snocOL` JXX cond id)
2894 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2896 #if x86_64_TARGET_ARCH
2898 genCondJump id bool = do
2899 CondCode is_float cond cond_code <- getCondCode bool
2902 return (cond_code `snocOL` JXX cond id)
2904 lbl <- getBlockIdNat
2906 -- see comment with condFltReg
2907 let code = case cond of
2913 plain_test = unitOL (
2916 or_unordered = toOL [
2920 and_ordered = toOL [
2926 return (cond_code `appOL` code)
2930 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2932 #if sparc_TARGET_ARCH
2934 genCondJump (BlockId id) bool = do
2935 CondCode is_float cond code <- getCondCode bool
2940 then [NOP, BF cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2941 else [BI cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2945 #endif /* sparc_TARGET_ARCH */
2948 #if powerpc_TARGET_ARCH
2950 genCondJump id bool = do
2951 CondCode is_float cond code <- getCondCode bool
2952 return (code `snocOL` BCC cond id)
2954 #endif /* powerpc_TARGET_ARCH */
2957 -- -----------------------------------------------------------------------------
2958 -- Generating C calls
2960 -- Now the biggest nightmare---calls. Most of the nastiness is buried in
2961 -- @get_arg@, which moves the arguments to the correct registers/stack
2962 -- locations. Apart from that, the code is easy.
2964 -- (If applicable) Do not fill the delay slots here; you will confuse the
2965 -- register allocator.
2968 :: CmmCallTarget -- function to call
2969 -> CmmFormals -- where to put the result
2970 -> CmmActuals -- arguments (of mixed type)
2973 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2975 #if alpha_TARGET_ARCH
2979 genCCall fn cconv result_regs args
2980 = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args
2981 `thenNat` \ ((unused,_), argCode) ->
2983 nRegs = length allArgRegs - length unused
2984 code = asmSeqThen (map ($ []) argCode)
2987 LDA pv (AddrImm (ImmLab (ptext fn))),
2988 JSR ra (AddrReg pv) nRegs,
2989 LDGP gp (AddrReg ra)]
2991 ------------------------
2992 {- Try to get a value into a specific register (or registers) for
2993 a call. The first 6 arguments go into the appropriate
2994 argument register (separate registers for integer and floating
2995 point arguments, but used in lock-step), and the remaining
2996 arguments are dumped to the stack, beginning at 0(sp). Our
2997 first argument is a pair of the list of remaining argument
2998 registers to be assigned for this call and the next stack
2999 offset to use for overflowing arguments. This way,
3000 @get_Arg@ can be applied to all of a call's arguments using
3004 :: ([(Reg,Reg)], Int) -- Argument registers and stack offset (accumulator)
3005 -> StixTree -- Current argument
3006 -> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code
3008 -- We have to use up all of our argument registers first...
3010 get_arg ((iDst,fDst):dsts, offset) arg
3011 = getRegister arg `thenNat` \ register ->
3013 reg = if isFloatingRep pk then fDst else iDst
3014 code = registerCode register reg
3015 src = registerName register reg
3016 pk = registerRep register
3019 if isFloatingRep pk then
3020 ((dsts, offset), if isFixed register then
3021 code . mkSeqInstr (FMOV src fDst)
3024 ((dsts, offset), if isFixed register then
3025 code . mkSeqInstr (OR src (RIReg src) iDst)
3028 -- Once we have run out of argument registers, we move to the
3031 get_arg ([], offset) arg
3032 = getRegister arg `thenNat` \ register ->
3033 getNewRegNat (registerRep register)
3036 code = registerCode register tmp
3037 src = registerName register tmp
3038 pk = registerRep register
3039 sz = primRepToSize pk
3041 return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset)))
3043 #endif /* alpha_TARGET_ARCH */
3045 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3047 #if i386_TARGET_ARCH
3049 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
3050 -- write barrier compiles to no code on x86/x86-64;
3051 -- we keep it this long in order to prevent earlier optimisations.
3053 -- we only cope with a single result for foreign calls
3054 genCCall (CmmPrim op) [CmmKinded r _] args = do
3055 l1 <- getNewLabelNat
3056 l2 <- getNewLabelNat
3058 MO_F32_Sqrt -> actuallyInlineFloatOp F32 (GSQRT F32) args
3059 MO_F64_Sqrt -> actuallyInlineFloatOp F64 (GSQRT F64) args
3061 MO_F32_Sin -> actuallyInlineFloatOp F32 (GSIN F32 l1 l2) args
3062 MO_F64_Sin -> actuallyInlineFloatOp F64 (GSIN F64 l1 l2) args
3064 MO_F32_Cos -> actuallyInlineFloatOp F32 (GCOS F32 l1 l2) args
3065 MO_F64_Cos -> actuallyInlineFloatOp F64 (GCOS F64 l1 l2) args
3067 MO_F32_Tan -> actuallyInlineFloatOp F32 (GTAN F32 l1 l2) args
3068 MO_F64_Tan -> actuallyInlineFloatOp F64 (GTAN F64 l1 l2) args
3070 other_op -> outOfLineFloatOp op r args
3072 actuallyInlineFloatOp rep instr [CmmKinded x _]
3073 = do res <- trivialUFCode rep instr x
3075 return (any (getRegisterReg (CmmLocal r)))
3077 genCCall target dest_regs args = do
3079 sizes = map (arg_size . cmmExprRep . kindlessCmm) (reverse args)
3080 #if !darwin_TARGET_OS
3081 tot_arg_size = sum sizes
3083 raw_arg_size = sum sizes
3084 tot_arg_size = roundTo 16 raw_arg_size
3085 arg_pad_size = tot_arg_size - raw_arg_size
3086 delta0 <- getDeltaNat
3087 setDeltaNat (delta0 - arg_pad_size)
3090 push_codes <- mapM push_arg (reverse args)
3091 delta <- getDeltaNat
3094 -- deal with static vs dynamic call targets
3095 (callinsns,cconv) <-
3098 CmmCallee (CmmLit (CmmLabel lbl)) conv
3099 -> -- ToDo: stdcall arg sizes
3100 return (unitOL (CALL (Left fn_imm) []), conv)
3101 where fn_imm = ImmCLbl lbl
3103 -> do (dyn_c, dyn_r, dyn_rep) <- get_op expr
3104 ASSERT(dyn_rep == I32)
3105 return (dyn_c `snocOL` CALL (Right dyn_r) [], conv)
3108 #if darwin_TARGET_OS
3110 = toOL [SUB I32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
3111 DELTA (delta0 - arg_pad_size)]
3112 `appOL` concatOL push_codes
3115 = concatOL push_codes
3116 call = callinsns `appOL`
3118 -- Deallocate parameters after call for ccall;
3119 -- but not for stdcall (callee does it)
3120 (if cconv == StdCallConv || tot_arg_size==0 then [] else
3121 [ADD I32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
3123 [DELTA (delta + tot_arg_size)]
3126 setDeltaNat (delta + tot_arg_size)
3129 -- assign the results, if necessary
3130 assign_code [] = nilOL
3131 assign_code [CmmKinded dest _hint] =
3133 I64 -> toOL [MOV I32 (OpReg eax) (OpReg r_dest),
3134 MOV I32 (OpReg edx) (OpReg r_dest_hi)]
3135 F32 -> unitOL (GMOV fake0 r_dest)
3136 F64 -> unitOL (GMOV fake0 r_dest)
3137 rep -> unitOL (MOV rep (OpReg eax) (OpReg r_dest))
3139 r_dest_hi = getHiVRegFromLo r_dest
3140 rep = localRegRep dest
3141 r_dest = getRegisterReg (CmmLocal dest)
3142 assign_code many = panic "genCCall.assign_code many"
3144 return (push_code `appOL`
3146 assign_code dest_regs)
3154 roundTo a x | x `mod` a == 0 = x
3155 | otherwise = x + a - (x `mod` a)
3158 push_arg :: (CmmKinded CmmExpr){-current argument-}
3159 -> NatM InstrBlock -- code
3161 push_arg (CmmKinded arg _hint) -- we don't need the hints on x86
3162 | arg_rep == I64 = do
3163 ChildCode64 code r_lo <- iselExpr64 arg
3164 delta <- getDeltaNat
3165 setDeltaNat (delta - 8)
3167 r_hi = getHiVRegFromLo r_lo
3169 return ( code `appOL`
3170 toOL [PUSH I32 (OpReg r_hi), DELTA (delta - 4),
3171 PUSH I32 (OpReg r_lo), DELTA (delta - 8),
3176 (code, reg, sz) <- get_op arg
3177 delta <- getDeltaNat
3178 let size = arg_size sz
3179 setDeltaNat (delta-size)
3180 if (case sz of F64 -> True; F32 -> True; _ -> False)
3181 then return (code `appOL`
3182 toOL [SUB I32 (OpImm (ImmInt size)) (OpReg esp),
3184 GST sz reg (AddrBaseIndex (EABaseReg esp)
3188 else return (code `snocOL`
3189 PUSH I32 (OpReg reg) `snocOL`
3193 arg_rep = cmmExprRep arg
3196 get_op :: CmmExpr -> NatM (InstrBlock, Reg, MachRep) -- code, reg, size
3198 (reg,code) <- getSomeReg op
3199 return (code, reg, cmmExprRep op)
3201 #endif /* i386_TARGET_ARCH */
3203 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3205 outOfLineFloatOp :: CallishMachOp -> CmmFormalWithoutKind -> CmmActuals
3207 outOfLineFloatOp mop res args
3209 dflags <- getDynFlagsNat
3210 targetExpr <- cmmMakeDynamicReference dflags addImportNat CallReference lbl
3211 let target = CmmCallee targetExpr CCallConv
3213 if localRegRep res == F64
3215 stmtToInstrs (CmmCall target [CmmKinded res FloatHint] args CmmUnsafe CmmMayReturn)
3219 tmp = LocalReg uq F64 GCKindNonPtr
3221 code1 <- stmtToInstrs (CmmCall target [CmmKinded tmp FloatHint] args CmmUnsafe CmmMayReturn)
3222 code2 <- stmtToInstrs (CmmAssign (CmmLocal res) (CmmReg (CmmLocal tmp)))
3223 return (code1 `appOL` code2)
3225 lbl = mkForeignLabel fn Nothing False
3228 MO_F32_Sqrt -> fsLit "sqrtf"
3229 MO_F32_Sin -> fsLit "sinf"
3230 MO_F32_Cos -> fsLit "cosf"
3231 MO_F32_Tan -> fsLit "tanf"
3232 MO_F32_Exp -> fsLit "expf"
3233 MO_F32_Log -> fsLit "logf"
3235 MO_F32_Asin -> fsLit "asinf"
3236 MO_F32_Acos -> fsLit "acosf"
3237 MO_F32_Atan -> fsLit "atanf"
3239 MO_F32_Sinh -> fsLit "sinhf"
3240 MO_F32_Cosh -> fsLit "coshf"
3241 MO_F32_Tanh -> fsLit "tanhf"
3242 MO_F32_Pwr -> fsLit "powf"
3244 MO_F64_Sqrt -> fsLit "sqrt"
3245 MO_F64_Sin -> fsLit "sin"
3246 MO_F64_Cos -> fsLit "cos"
3247 MO_F64_Tan -> fsLit "tan"
3248 MO_F64_Exp -> fsLit "exp"
3249 MO_F64_Log -> fsLit "log"
3251 MO_F64_Asin -> fsLit "asin"
3252 MO_F64_Acos -> fsLit "acos"
3253 MO_F64_Atan -> fsLit "atan"
3255 MO_F64_Sinh -> fsLit "sinh"
3256 MO_F64_Cosh -> fsLit "cosh"
3257 MO_F64_Tanh -> fsLit "tanh"
3258 MO_F64_Pwr -> fsLit "pow"
3260 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
3262 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3264 #if x86_64_TARGET_ARCH
3266 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
3267 -- write barrier compiles to no code on x86/x86-64;
3268 -- we keep it this long in order to prevent earlier optimisations.
3271 genCCall (CmmPrim op) [CmmKinded r _] args =
3272 outOfLineFloatOp op r args
3274 genCCall target dest_regs args = do
3276 -- load up the register arguments
3277 (stack_args, aregs, fregs, load_args_code)
3278 <- load_args args allArgRegs allFPArgRegs nilOL
3281 fp_regs_used = reverse (drop (length fregs) (reverse allFPArgRegs))
3282 int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
3283 arg_regs = [eax] ++ int_regs_used ++ fp_regs_used
3284 -- for annotating the call instruction with
3286 sse_regs = length fp_regs_used
3288 tot_arg_size = arg_size * length stack_args
3290 -- On entry to the called function, %rsp should be aligned
3291 -- on a 16-byte boundary +8 (i.e. the first stack arg after
3292 -- the return address is 16-byte aligned). In STG land
3293 -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
3294 -- need to make sure we push a multiple of 16-bytes of args,
3295 -- plus the return address, to get the correct alignment.
3296 -- Urg, this is hard. We need to feed the delta back into
3297 -- the arg pushing code.
3298 (real_size, adjust_rsp) <-
3299 if tot_arg_size `rem` 16 == 0
3300 then return (tot_arg_size, nilOL)
3301 else do -- we need to adjust...
3302 delta <- getDeltaNat
3303 setDeltaNat (delta-8)
3304 return (tot_arg_size+8, toOL [
3305 SUB I64 (OpImm (ImmInt 8)) (OpReg rsp),
3309 -- push the stack args, right to left
3310 push_code <- push_args (reverse stack_args) nilOL
3311 delta <- getDeltaNat
3313 -- deal with static vs dynamic call targets
3314 (callinsns,cconv) <-
3317 CmmCallee (CmmLit (CmmLabel lbl)) conv
3318 -> -- ToDo: stdcall arg sizes
3319 return (unitOL (CALL (Left fn_imm) arg_regs), conv)
3320 where fn_imm = ImmCLbl lbl
3322 -> do (dyn_r, dyn_c) <- getSomeReg expr
3323 return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
3326 -- The x86_64 ABI requires us to set %al to the number of SSE
3327 -- registers that contain arguments, if the called routine
3328 -- is a varargs function. We don't know whether it's a
3329 -- varargs function or not, so we have to assume it is.
3331 -- It's not safe to omit this assignment, even if the number
3332 -- of SSE regs in use is zero. If %al is larger than 8
3333 -- on entry to a varargs function, seg faults ensue.
3334 assign_eax n = unitOL (MOV I32 (OpImm (ImmInt n)) (OpReg eax))
3336 let call = callinsns `appOL`
3338 -- Deallocate parameters after call for ccall;
3339 -- but not for stdcall (callee does it)
3340 (if cconv == StdCallConv || real_size==0 then [] else
3341 [ADD wordRep (OpImm (ImmInt real_size)) (OpReg esp)])
3343 [DELTA (delta + real_size)]
3346 setDeltaNat (delta + real_size)
3349 -- assign the results, if necessary
3350 assign_code [] = nilOL
3351 assign_code [CmmKinded dest _hint] =
3353 F32 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3354 F64 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3355 rep -> unitOL (MOV rep (OpReg rax) (OpReg r_dest))
3357 rep = localRegRep dest
3358 r_dest = getRegisterReg (CmmLocal dest)
3359 assign_code many = panic "genCCall.assign_code many"
3361 return (load_args_code `appOL`
3364 assign_eax sse_regs `appOL`
3366 assign_code dest_regs)
3369 arg_size = 8 -- always, at the mo
3371 load_args :: [CmmKinded CmmExpr]
3372 -> [Reg] -- int regs avail for args
3373 -> [Reg] -- FP regs avail for args
3375 -> NatM ([CmmKinded CmmExpr],[Reg],[Reg],InstrBlock)
3376 load_args args [] [] code = return (args, [], [], code)
3377 -- no more regs to use
3378 load_args [] aregs fregs code = return ([], aregs, fregs, code)
3379 -- no more args to push
3380 load_args ((CmmKinded arg hint) : rest) aregs fregs code
3381 | isFloatingRep arg_rep =
3385 arg_code <- getAnyReg arg
3386 load_args rest aregs rs (code `appOL` arg_code r)
3391 arg_code <- getAnyReg arg
3392 load_args rest rs fregs (code `appOL` arg_code r)
3394 arg_rep = cmmExprRep arg
3397 (args',ars,frs,code') <- load_args rest aregs fregs code
3398 return ((CmmKinded arg hint):args', ars, frs, code')
3400 push_args [] code = return code
3401 push_args ((CmmKinded arg hint):rest) code
3402 | isFloatingRep arg_rep = do
3403 (arg_reg, arg_code) <- getSomeReg arg
3404 delta <- getDeltaNat
3405 setDeltaNat (delta-arg_size)
3406 let code' = code `appOL` arg_code `appOL` toOL [
3407 SUB wordRep (OpImm (ImmInt arg_size)) (OpReg rsp) ,
3408 DELTA (delta-arg_size),
3409 MOV arg_rep (OpReg arg_reg) (OpAddr (spRel 0))]
3410 push_args rest code'
3413 -- we only ever generate word-sized function arguments. Promotion
3414 -- has already happened: our Int8# type is kept sign-extended
3415 -- in an Int#, for example.
3416 ASSERT(arg_rep == I64) return ()
3417 (arg_op, arg_code) <- getOperand arg
3418 delta <- getDeltaNat
3419 setDeltaNat (delta-arg_size)
3420 let code' = code `appOL` toOL [PUSH I64 arg_op,
3421 DELTA (delta-arg_size)]
3422 push_args rest code'
3424 arg_rep = cmmExprRep arg
3427 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3429 #if sparc_TARGET_ARCH
3431 The SPARC calling convention is an absolute
3432 nightmare. The first 6x32 bits of arguments are mapped into
3433 %o0 through %o5, and the remaining arguments are dumped to the
3434 stack, beginning at [%sp+92]. (Note that %o6 == %sp.)
3436 If we have to put args on the stack, move %o6==%sp down by
3437 the number of words to go on the stack, to ensure there's enough space.
3439 According to Fraser and Hanson's lcc book, page 478, fig 17.2,
3440 16 words above the stack pointer is a word for the address of
3441 a structure return value. I use this as a temporary location
3442 for moving values from float to int regs. Certainly it isn't
3443 safe to put anything in the 16 words starting at %sp, since
3444 this area can get trashed at any time due to window overflows
3445 caused by signal handlers.
3447 A final complication (if the above isn't enough) is that
3448 we can't blithely calculate the arguments one by one into
3449 %o0 .. %o5. Consider the following nested calls:
3453 Naive code moves a into %o0, and (fff b c) into %o1. Unfortunately
3454 the inner call will itself use %o0, which trashes the value put there
3455 in preparation for the outer call. Upshot: we need to calculate the
3456 args into temporary regs, and move those to arg regs or onto the
3457 stack only immediately prior to the call proper. Sigh.
3460 genCCall target dest_regs argsAndHints = do
3462 args = map kindlessCmm argsAndHints
3463 argcode_and_vregs <- mapM arg_to_int_vregs args
3465 (argcodes, vregss) = unzip argcode_and_vregs
3466 n_argRegs = length allArgRegs
3467 n_argRegs_used = min (length vregs) n_argRegs
3468 vregs = concat vregss
3469 -- deal with static vs dynamic call targets
3470 callinsns <- (case target of
3471 CmmCallee (CmmLit (CmmLabel lbl)) conv -> do
3472 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3473 CmmCallee expr conv -> do
3474 (dyn_c, [dyn_r]) <- arg_to_int_vregs expr
3475 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3477 (res, reduce) <- outOfLineFloatOp mop
3478 lblOrMopExpr <- case res of
3480 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3482 (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
3483 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3484 if reduce then panic "genCCall(sparc): can not reduce" else return lblOrMopExpr
3488 argcode = concatOL argcodes
3489 (move_sp_down, move_sp_up)
3490 = let diff = length vregs - n_argRegs
3491 nn = if odd diff then diff + 1 else diff -- keep 8-byte alignment
3494 else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
3496 = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
3497 return (argcode `appOL`
3498 move_sp_down `appOL`
3499 transfer_code `appOL`
3504 -- move args from the integer vregs into which they have been
3505 -- marshalled, into %o0 .. %o5, and the rest onto the stack.
3506 move_final :: [Reg] -> [Reg] -> Int -> [Instr]
3508 move_final [] _ offset -- all args done
3511 move_final (v:vs) [] offset -- out of aregs; move to stack
3512 = ST I32 v (spRel offset)
3513 : move_final vs [] (offset+1)
3515 move_final (v:vs) (a:az) offset -- move into an arg (%o[0..5]) reg
3516 = OR False g0 (RIReg v) a
3517 : move_final vs az offset
3519 -- generate code to calculate an argument, and move it into one
3520 -- or two integer vregs.
3521 arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
3522 arg_to_int_vregs arg
3523 | (cmmExprRep arg) == I64
3525 (ChildCode64 code r_lo) <- iselExpr64 arg
3527 r_hi = getHiVRegFromLo r_lo
3528 return (code, [r_hi, r_lo])
3531 (src, code) <- getSomeReg arg
3532 tmp <- getNewRegNat (cmmExprRep arg)
3537 v1 <- getNewRegNat I32
3538 v2 <- getNewRegNat I32
3541 FMOV F64 src f0 `snocOL`
3542 ST F32 f0 (spRel 16) `snocOL`
3543 LD I32 (spRel 16) v1 `snocOL`
3544 ST F32 (fPair f0) (spRel 16) `snocOL`
3545 LD I32 (spRel 16) v2
3550 v1 <- getNewRegNat I32
3553 ST F32 src (spRel 16) `snocOL`
3554 LD I32 (spRel 16) v1
3559 v1 <- getNewRegNat I32
3561 code `snocOL` OR False g0 (RIReg src) v1
3565 outOfLineFloatOp mop =
3567 dflags <- getDynFlagsNat
3568 mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference $
3569 mkForeignLabel functionName Nothing True
3570 let mopLabelOrExpr = case mopExpr of
3571 CmmLit (CmmLabel lbl) -> Left lbl
3573 return (mopLabelOrExpr, reduce)
3575 (reduce, functionName) = case mop of
3576 MO_F32_Exp -> (True, fsLit "exp")
3577 MO_F32_Log -> (True, fsLit "log")
3578 MO_F32_Sqrt -> (True, fsLit "sqrt")
3580 MO_F32_Sin -> (True, fsLit "sin")
3581 MO_F32_Cos -> (True, fsLit "cos")
3582 MO_F32_Tan -> (True, fsLit "tan")
3584 MO_F32_Asin -> (True, fsLit "asin")
3585 MO_F32_Acos -> (True, fsLit "acos")
3586 MO_F32_Atan -> (True, fsLit "atan")
3588 MO_F32_Sinh -> (True, fsLit "sinh")
3589 MO_F32_Cosh -> (True, fsLit "cosh")
3590 MO_F32_Tanh -> (True, fsLit "tanh")
3592 MO_F64_Exp -> (False, fsLit "exp")
3593 MO_F64_Log -> (False, fsLit "log")
3594 MO_F64_Sqrt -> (False, fsLit "sqrt")
3596 MO_F64_Sin -> (False, fsLit "sin")
3597 MO_F64_Cos -> (False, fsLit "cos")
3598 MO_F64_Tan -> (False, fsLit "tan")
3600 MO_F64_Asin -> (False, fsLit "asin")
3601 MO_F64_Acos -> (False, fsLit "acos")
3602 MO_F64_Atan -> (False, fsLit "atan")
3604 MO_F64_Sinh -> (False, fsLit "sinh")
3605 MO_F64_Cosh -> (False, fsLit "cosh")
3606 MO_F64_Tanh -> (False, fsLit "tanh")
3608 other -> pprPanic "outOfLineFloatOp(sparc) "
3609 (pprCallishMachOp mop)
3611 #endif /* sparc_TARGET_ARCH */
3613 #if powerpc_TARGET_ARCH
3615 #if darwin_TARGET_OS || linux_TARGET_OS
3617 The PowerPC calling convention for Darwin/Mac OS X
3618 is described in Apple's document
3619 "Inside Mac OS X - Mach-O Runtime Architecture".
3621 PowerPC Linux uses the System V Release 4 Calling Convention
3622 for PowerPC. It is described in the
3623 "System V Application Binary Interface PowerPC Processor Supplement".
3625 Both conventions are similar:
3626 Parameters may be passed in general-purpose registers starting at r3, in
3627 floating point registers starting at f1, or on the stack.
3629 But there are substantial differences:
3630 * The number of registers used for parameter passing and the exact set of
3631 nonvolatile registers differs (see MachRegs.lhs).
3632 * On Darwin, stack space is always reserved for parameters, even if they are
3633 passed in registers. The called routine may choose to save parameters from
3634 registers to the corresponding space on the stack.
3635 * On Darwin, a corresponding amount of GPRs is skipped when a floating point
3636 parameter is passed in an FPR.
3637 * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
3638 starting with an odd-numbered GPR. It may skip a GPR to achieve this.
3639 Darwin just treats an I64 like two separate I32s (high word first).
3640 * I64 and F64 arguments are 8-byte aligned on the stack for SysV, but only
3641 4-byte aligned like everything else on Darwin.
3642 * The SysV spec claims that F32 is represented as F64 on the stack. GCC on
3643 PowerPC Linux does not agree, so neither do we.
3645 According to both conventions, The parameter area should be part of the
3646 caller's stack frame, allocated in the caller's prologue code (large enough
3647 to hold the parameter lists for all called routines). The NCG already
3648 uses the stack for register spilling, leaving 64 bytes free at the top.
3649 If we need a larger parameter area than that, we just allocate a new stack
3650 frame just before ccalling.
3654 genCCall (CmmPrim MO_WriteBarrier) _ _
3655 = return $ unitOL LWSYNC
3657 genCCall target dest_regs argsAndHints
3658 = ASSERT (not $ any (`elem` [I8,I16]) argReps)
3659 -- we rely on argument promotion in the codeGen
3661 (finalStack,passArgumentsCode,usedRegs) <- passArguments
3663 allArgRegs allFPArgRegs
3667 (labelOrExpr, reduceToF32) <- case target of
3668 CmmCallee (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
3669 CmmCallee expr conv -> return (Right expr, False)
3670 CmmPrim mop -> outOfLineFloatOp mop
3672 let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
3673 codeAfter = move_sp_up finalStack `appOL` moveResult reduceToF32
3678 `snocOL` BL lbl usedRegs
3681 (dynReg, dynCode) <- getSomeReg dyn
3683 `snocOL` MTCTR dynReg
3685 `snocOL` BCTRL usedRegs
3688 #if darwin_TARGET_OS
3689 initialStackOffset = 24
3690 -- size of linkage area + size of arguments, in bytes
3691 stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
3692 map machRepByteWidth argReps
3693 #elif linux_TARGET_OS
3694 initialStackOffset = 8
3695 stackDelta finalStack = roundTo 16 finalStack
3697 args = map kindlessCmm argsAndHints
3698 argReps = map cmmExprRep args
3700 roundTo a x | x `mod` a == 0 = x
3701 | otherwise = x + a - (x `mod` a)
3703 move_sp_down finalStack
3705 toOL [STU I32 sp (AddrRegImm sp (ImmInt (-delta))),
3708 where delta = stackDelta finalStack
3709 move_sp_up finalStack
3711 toOL [ADD sp sp (RIImm (ImmInt delta)),
3714 where delta = stackDelta finalStack
3717 passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
3718 passArguments ((arg,I64):args) gprs fprs stackOffset
3719 accumCode accumUsed =
3721 ChildCode64 code vr_lo <- iselExpr64 arg
3722 let vr_hi = getHiVRegFromLo vr_lo
3724 #if darwin_TARGET_OS
3729 (accumCode `appOL` code
3730 `snocOL` storeWord vr_hi gprs stackOffset
3731 `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
3732 ((take 2 gprs) ++ accumUsed)
3734 storeWord vr (gpr:_) offset = MR gpr vr
3735 storeWord vr [] offset = ST I32 vr (AddrRegImm sp (ImmInt offset))
3737 #elif linux_TARGET_OS
3738 let stackOffset' = roundTo 8 stackOffset
3739 stackCode = accumCode `appOL` code
3740 `snocOL` ST I32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
3741 `snocOL` ST I32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
3742 regCode hireg loreg =
3743 accumCode `appOL` code
3744 `snocOL` MR hireg vr_hi
3745 `snocOL` MR loreg vr_lo
3748 hireg : loreg : regs | even (length gprs) ->
3749 passArguments args regs fprs stackOffset
3750 (regCode hireg loreg) (hireg : loreg : accumUsed)
3751 _skipped : hireg : loreg : regs ->
3752 passArguments args regs fprs stackOffset
3753 (regCode hireg loreg) (hireg : loreg : accumUsed)
3754 _ -> -- only one or no regs left
3755 passArguments args [] fprs (stackOffset'+8)
3759 passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
3760 | reg : _ <- regs = do
3761 register <- getRegister arg
3762 let code = case register of
3763 Fixed _ freg fcode -> fcode `snocOL` MR reg freg
3764 Any _ acode -> acode reg
3768 #if darwin_TARGET_OS
3769 -- The Darwin ABI requires that we reserve stack slots for register parameters
3770 (stackOffset + stackBytes)
3771 #elif linux_TARGET_OS
3772 -- ... the SysV ABI doesn't.
3775 (accumCode `appOL` code)
3778 (vr, code) <- getSomeReg arg
3782 (stackOffset' + stackBytes)
3783 (accumCode `appOL` code `snocOL` ST rep vr stackSlot)
3786 #if darwin_TARGET_OS
3787 -- stackOffset is at least 4-byte aligned
3788 -- The Darwin ABI is happy with that.
3789 stackOffset' = stackOffset
3791 -- ... the SysV ABI requires 8-byte alignment for doubles.
3792 stackOffset' | rep == F64 = roundTo 8 stackOffset
3793 | otherwise = stackOffset
3795 stackSlot = AddrRegImm sp (ImmInt stackOffset')
3796 (nGprs, nFprs, stackBytes, regs) = case rep of
3797 I32 -> (1, 0, 4, gprs)
3798 #if darwin_TARGET_OS
3799 -- The Darwin ABI requires that we skip a corresponding number of GPRs when
3801 F32 -> (1, 1, 4, fprs)
3802 F64 -> (2, 1, 8, fprs)
3803 #elif linux_TARGET_OS
3804 -- ... the SysV ABI doesn't.
3805 F32 -> (0, 1, 4, fprs)
3806 F64 -> (0, 1, 8, fprs)
3809 moveResult reduceToF32 =
3812 [CmmKinded dest _hint]
3813 | reduceToF32 && rep == F32 -> unitOL (FRSP r_dest f1)
3814 | rep == F32 || rep == F64 -> unitOL (MR r_dest f1)
3815 | rep == I64 -> toOL [MR (getHiVRegFromLo r_dest) r3,
3817 | otherwise -> unitOL (MR r_dest r3)
3818 where rep = cmmRegRep (CmmLocal dest)
3819 r_dest = getRegisterReg (CmmLocal dest)
3821 outOfLineFloatOp mop =
3823 dflags <- getDynFlagsNat
3824 mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference $
3825 mkForeignLabel functionName Nothing True
3826 let mopLabelOrExpr = case mopExpr of
3827 CmmLit (CmmLabel lbl) -> Left lbl
3829 return (mopLabelOrExpr, reduce)
3831 (functionName, reduce) = case mop of
3832 MO_F32_Exp -> (fsLit "exp", True)
3833 MO_F32_Log -> (fsLit "log", True)
3834 MO_F32_Sqrt -> (fsLit "sqrt", True)
3836 MO_F32_Sin -> (fsLit "sin", True)
3837 MO_F32_Cos -> (fsLit "cos", True)
3838 MO_F32_Tan -> (fsLit "tan", True)
3840 MO_F32_Asin -> (fsLit "asin", True)
3841 MO_F32_Acos -> (fsLit "acos", True)
3842 MO_F32_Atan -> (fsLit "atan", True)
3844 MO_F32_Sinh -> (fsLit "sinh", True)
3845 MO_F32_Cosh -> (fsLit "cosh", True)
3846 MO_F32_Tanh -> (fsLit "tanh", True)
3847 MO_F32_Pwr -> (fsLit "pow", True)
3849 MO_F64_Exp -> (fsLit "exp", False)
3850 MO_F64_Log -> (fsLit "log", False)
3851 MO_F64_Sqrt -> (fsLit "sqrt", False)
3853 MO_F64_Sin -> (fsLit "sin", False)
3854 MO_F64_Cos -> (fsLit "cos", False)
3855 MO_F64_Tan -> (fsLit "tan", False)
3857 MO_F64_Asin -> (fsLit "asin", False)
3858 MO_F64_Acos -> (fsLit "acos", False)
3859 MO_F64_Atan -> (fsLit "atan", False)
3861 MO_F64_Sinh -> (fsLit "sinh", False)
3862 MO_F64_Cosh -> (fsLit "cosh", False)
3863 MO_F64_Tanh -> (fsLit "tanh", False)
3864 MO_F64_Pwr -> (fsLit "pow", False)
3865 other -> pprPanic "genCCall(ppc): unknown callish op"
3866 (pprCallishMachOp other)
3868 #endif /* darwin_TARGET_OS || linux_TARGET_OS */
3870 #endif /* powerpc_TARGET_ARCH */
3873 -- -----------------------------------------------------------------------------
3874 -- Generating a table-branch
3876 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
3878 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3882 (reg,e_code) <- getSomeReg expr
3883 lbl <- getNewLabelNat
3884 dflags <- getDynFlagsNat
3885 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
3886 (tableReg,t_code) <- getSomeReg $ dynRef
3888 jumpTable = map jumpTableEntryRel ids
3890 jumpTableEntryRel Nothing
3891 = CmmStaticLit (CmmInt 0 wordRep)
3892 jumpTableEntryRel (Just (BlockId id))
3893 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3894 where blockLabel = mkAsmTempLabel id
3896 op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
3897 (EAIndex reg wORD_SIZE) (ImmInt 0))
3899 #if x86_64_TARGET_ARCH
3900 #if darwin_TARGET_OS
3901 -- on Mac OS X/x86_64, put the jump table in the text section
3902 -- to work around a limitation of the linker.
3903 -- ld64 is unable to handle the relocations for
3905 -- if L0 is not preceded by a non-anonymous label in its section.
3907 code = e_code `appOL` t_code `appOL` toOL [
3908 ADD wordRep op (OpReg tableReg),
3909 JMP_TBL (OpReg tableReg) [ id | Just id <- ids ],
3910 LDATA Text (CmmDataLabel lbl : jumpTable)
3913 -- HACK: On x86_64 binutils<2.17 is only able to generate PC32
3914 -- relocations, hence we only get 32-bit offsets in the jump
3915 -- table. As these offsets are always negative we need to properly
3916 -- sign extend them to 64-bit. This hack should be removed in
3917 -- conjunction with the hack in PprMach.hs/pprDataItem once
3918 -- binutils 2.17 is standard.
3919 code = e_code `appOL` t_code `appOL` toOL [
3920 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3922 (OpAddr (AddrBaseIndex (EABaseReg tableReg)
3923 (EAIndex reg wORD_SIZE) (ImmInt 0)))
3925 ADD wordRep (OpReg reg) (OpReg tableReg),
3926 JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
3930 code = e_code `appOL` t_code `appOL` toOL [
3931 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3932 ADD wordRep op (OpReg tableReg),
3933 JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
3939 (reg,e_code) <- getSomeReg expr
3940 lbl <- getNewLabelNat
3942 jumpTable = map jumpTableEntry ids
3943 op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
3944 code = e_code `appOL` toOL [
3945 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3946 JMP_TBL op [ id | Just id <- ids ]
3950 #elif powerpc_TARGET_ARCH
3954 (reg,e_code) <- getSomeReg expr
3955 tmp <- getNewRegNat I32
3956 lbl <- getNewLabelNat
3957 dflags <- getDynFlagsNat
3958 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
3959 (tableReg,t_code) <- getSomeReg $ dynRef
3961 jumpTable = map jumpTableEntryRel ids
3963 jumpTableEntryRel Nothing
3964 = CmmStaticLit (CmmInt 0 wordRep)
3965 jumpTableEntryRel (Just (BlockId id))
3966 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3967 where blockLabel = mkAsmTempLabel id
3969 code = e_code `appOL` t_code `appOL` toOL [
3970 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3971 SLW tmp reg (RIImm (ImmInt 2)),
3972 LD I32 tmp (AddrRegReg tableReg tmp),
3973 ADD tmp tmp (RIReg tableReg),
3975 BCTR [ id | Just id <- ids ]
3980 (reg,e_code) <- getSomeReg expr
3981 tmp <- getNewRegNat I32
3982 lbl <- getNewLabelNat
3984 jumpTable = map jumpTableEntry ids
3986 code = e_code `appOL` toOL [
3987 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3988 SLW tmp reg (RIImm (ImmInt 2)),
3989 ADDIS tmp tmp (HA (ImmCLbl lbl)),
3990 LD I32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
3992 BCTR [ id | Just id <- ids ]
3996 #error "ToDo: genSwitch"
3999 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordRep)
4000 jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
4001 where blockLabel = mkAsmTempLabel id
4003 -- -----------------------------------------------------------------------------
4005 -- -----------------------------------------------------------------------------
4008 -- -----------------------------------------------------------------------------
4009 -- 'condIntReg' and 'condFltReg': condition codes into registers
4011 -- Turn those condition codes into integers now (when they appear on
4012 -- the right hand side of an assignment).
4014 -- (If applicable) Do not fill the delay slots here; you will confuse the
4015 -- register allocator.
4017 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
4019 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4021 #if alpha_TARGET_ARCH
4022 condIntReg = panic "MachCode.condIntReg (not on Alpha)"
4023 condFltReg = panic "MachCode.condFltReg (not on Alpha)"
4024 #endif /* alpha_TARGET_ARCH */
4026 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4028 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4030 condIntReg cond x y = do
4031 CondCode _ cond cond_code <- condIntCode cond x y
4032 tmp <- getNewRegNat I8
4034 code dst = cond_code `appOL` toOL [
4035 SETCC cond (OpReg tmp),
4036 MOVZxL I8 (OpReg tmp) (OpReg dst)
4039 return (Any I32 code)
4043 #if i386_TARGET_ARCH
4045 condFltReg cond x y = do
4046 CondCode _ cond cond_code <- condFltCode cond x y
4047 tmp <- getNewRegNat I8
4049 code dst = cond_code `appOL` toOL [
4050 SETCC cond (OpReg tmp),
4051 MOVZxL I8 (OpReg tmp) (OpReg dst)
4054 return (Any I32 code)
4058 #if x86_64_TARGET_ARCH
4060 condFltReg cond x y = do
4061 CondCode _ cond cond_code <- condFltCode cond x y
4062 tmp1 <- getNewRegNat wordRep
4063 tmp2 <- getNewRegNat wordRep
4065 -- We have to worry about unordered operands (eg. comparisons
4066 -- against NaN). If the operands are unordered, the comparison
4067 -- sets the parity flag, carry flag and zero flag.
4068 -- All comparisons are supposed to return false for unordered
4069 -- operands except for !=, which returns true.
4071 -- Optimisation: we don't have to test the parity flag if we
4072 -- know the test has already excluded the unordered case: eg >
4073 -- and >= test for a zero carry flag, which can only occur for
4074 -- ordered operands.
4076 -- ToDo: by reversing comparisons we could avoid testing the
4077 -- parity flag in more cases.
4082 NE -> or_unordered dst
4083 GU -> plain_test dst
4084 GEU -> plain_test dst
4085 _ -> and_ordered dst)
4087 plain_test dst = toOL [
4088 SETCC cond (OpReg tmp1),
4089 MOVZxL I8 (OpReg tmp1) (OpReg dst)
4091 or_unordered dst = toOL [
4092 SETCC cond (OpReg tmp1),
4093 SETCC PARITY (OpReg tmp2),
4094 OR I8 (OpReg tmp1) (OpReg tmp2),
4095 MOVZxL I8 (OpReg tmp2) (OpReg dst)
4097 and_ordered dst = toOL [
4098 SETCC cond (OpReg tmp1),
4099 SETCC NOTPARITY (OpReg tmp2),
4100 AND I8 (OpReg tmp1) (OpReg tmp2),
4101 MOVZxL I8 (OpReg tmp2) (OpReg dst)
4104 return (Any I32 code)
4108 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4110 #if sparc_TARGET_ARCH
4112 condIntReg EQQ x (CmmLit (CmmInt 0 d)) = do
4113 (src, code) <- getSomeReg x
4114 tmp <- getNewRegNat I32
4116 code__2 dst = code `appOL` toOL [
4117 SUB False True g0 (RIReg src) g0,
4118 SUB True False g0 (RIImm (ImmInt (-1))) dst]
4119 return (Any I32 code__2)
4121 condIntReg EQQ x y = do
4122 (src1, code1) <- getSomeReg x
4123 (src2, code2) <- getSomeReg y
4124 tmp1 <- getNewRegNat I32
4125 tmp2 <- getNewRegNat I32
4127 code__2 dst = code1 `appOL` code2 `appOL` toOL [
4128 XOR False src1 (RIReg src2) dst,
4129 SUB False True g0 (RIReg dst) g0,
4130 SUB True False g0 (RIImm (ImmInt (-1))) dst]
4131 return (Any I32 code__2)
4133 condIntReg NE x (CmmLit (CmmInt 0 d)) = do
4134 (src, code) <- getSomeReg x
4135 tmp <- getNewRegNat I32
4137 code__2 dst = code `appOL` toOL [
4138 SUB False True g0 (RIReg src) g0,
4139 ADD True False g0 (RIImm (ImmInt 0)) dst]
4140 return (Any I32 code__2)
4142 condIntReg NE x y = do
4143 (src1, code1) <- getSomeReg x
4144 (src2, code2) <- getSomeReg y
4145 tmp1 <- getNewRegNat I32
4146 tmp2 <- getNewRegNat I32
4148 code__2 dst = code1 `appOL` code2 `appOL` toOL [
4149 XOR False src1 (RIReg src2) dst,
4150 SUB False True g0 (RIReg dst) g0,
4151 ADD True False g0 (RIImm (ImmInt 0)) dst]
4152 return (Any I32 code__2)
4154 condIntReg cond x y = do
4155 BlockId lbl1 <- getBlockIdNat
4156 BlockId lbl2 <- getBlockIdNat
4157 CondCode _ cond cond_code <- condIntCode cond x y
4159 code__2 dst = cond_code `appOL` toOL [
4160 BI cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
4161 OR False g0 (RIImm (ImmInt 0)) dst,
4162 BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
4163 NEWBLOCK (BlockId lbl1),
4164 OR False g0 (RIImm (ImmInt 1)) dst,
4165 NEWBLOCK (BlockId lbl2)]
4166 return (Any I32 code__2)
4168 condFltReg cond x y = do
4169 BlockId lbl1 <- getBlockIdNat
4170 BlockId lbl2 <- getBlockIdNat
4171 CondCode _ cond cond_code <- condFltCode cond x y
4173 code__2 dst = cond_code `appOL` toOL [
4175 BF cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
4176 OR False g0 (RIImm (ImmInt 0)) dst,
4177 BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
4178 NEWBLOCK (BlockId lbl1),
4179 OR False g0 (RIImm (ImmInt 1)) dst,
4180 NEWBLOCK (BlockId lbl2)]
4181 return (Any I32 code__2)
4183 #endif /* sparc_TARGET_ARCH */
4185 #if powerpc_TARGET_ARCH
4186 condReg getCond = do
4187 lbl1 <- getBlockIdNat
4188 lbl2 <- getBlockIdNat
4189 CondCode _ cond cond_code <- getCond
4191 {- code dst = cond_code `appOL` toOL [
4200 code dst = cond_code
4204 RLWINM dst dst (bit + 1) 31 31
4207 negate_code | do_negate = unitOL (CRNOR bit bit bit)
4210 (bit, do_negate) = case cond of
4224 return (Any I32 code)
4226 condIntReg cond x y = condReg (condIntCode cond x y)
4227 condFltReg cond x y = condReg (condFltCode cond x y)
4228 #endif /* powerpc_TARGET_ARCH */
4231 -- -----------------------------------------------------------------------------
4232 -- 'trivial*Code': deal with trivial instructions
4234 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
4235 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
4236 -- Only look for constants on the right hand side, because that's
4237 -- where the generic optimizer will have put them.
4239 -- Similarly, for unary instructions, we don't have to worry about
4240 -- matching an StInt as the argument, because genericOpt will already
4241 -- have handled the constant-folding.
4245 -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
4246 ,IF_ARCH_i386 ((Operand -> Operand -> Instr)
4247 -> Maybe (Operand -> Operand -> Instr)
4248 ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr)
4249 -> Maybe (Operand -> Operand -> Instr)
4250 ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
4251 ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
4253 -> CmmExpr -> CmmExpr -- the two arguments
4256 #ifndef powerpc_TARGET_ARCH
4259 -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
4260 ,IF_ARCH_sparc((MachRep -> Reg -> Reg -> Reg -> Instr)
4261 ,IF_ARCH_i386 ((MachRep -> Reg -> Reg -> Reg -> Instr)
4262 ,IF_ARCH_x86_64 ((MachRep -> Operand -> Operand -> Instr)
4264 -> CmmExpr -> CmmExpr -- the two arguments
4270 -> IF_ARCH_alpha((RI -> Reg -> Instr)
4271 ,IF_ARCH_i386 ((Operand -> Instr)
4272 ,IF_ARCH_x86_64 ((Operand -> Instr)
4273 ,IF_ARCH_sparc((RI -> Reg -> Instr)
4274 ,IF_ARCH_powerpc((Reg -> Reg -> Instr)
4276 -> CmmExpr -- the one argument
4279 #ifndef powerpc_TARGET_ARCH
4282 -> IF_ARCH_alpha((Reg -> Reg -> Instr)
4283 ,IF_ARCH_i386 ((Reg -> Reg -> Instr)
4284 ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr)
4285 ,IF_ARCH_sparc((Reg -> Reg -> Instr)
4287 -> CmmExpr -- the one argument
4291 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4293 #if alpha_TARGET_ARCH
4295 trivialCode instr x (StInt y)
4297 = getRegister x `thenNat` \ register ->
4298 getNewRegNat IntRep `thenNat` \ tmp ->
4300 code = registerCode register tmp
4301 src1 = registerName register tmp
4302 src2 = ImmInt (fromInteger y)
4303 code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst)
4305 return (Any IntRep code__2)
4307 trivialCode instr x y
4308 = getRegister x `thenNat` \ register1 ->
4309 getRegister y `thenNat` \ register2 ->
4310 getNewRegNat IntRep `thenNat` \ tmp1 ->
4311 getNewRegNat IntRep `thenNat` \ tmp2 ->
4313 code1 = registerCode register1 tmp1 []
4314 src1 = registerName register1 tmp1
4315 code2 = registerCode register2 tmp2 []
4316 src2 = registerName register2 tmp2
4317 code__2 dst = asmSeqThen [code1, code2] .
4318 mkSeqInstr (instr src1 (RIReg src2) dst)
4320 return (Any IntRep code__2)
4323 trivialUCode instr x
4324 = getRegister x `thenNat` \ register ->
4325 getNewRegNat IntRep `thenNat` \ tmp ->
4327 code = registerCode register tmp
4328 src = registerName register tmp
4329 code__2 dst = code . mkSeqInstr (instr (RIReg src) dst)
4331 return (Any IntRep code__2)
4334 trivialFCode _ instr x y
4335 = getRegister x `thenNat` \ register1 ->
4336 getRegister y `thenNat` \ register2 ->
4337 getNewRegNat F64 `thenNat` \ tmp1 ->
4338 getNewRegNat F64 `thenNat` \ tmp2 ->
4340 code1 = registerCode register1 tmp1
4341 src1 = registerName register1 tmp1
4343 code2 = registerCode register2 tmp2
4344 src2 = registerName register2 tmp2
4346 code__2 dst = asmSeqThen [code1 [], code2 []] .
4347 mkSeqInstr (instr src1 src2 dst)
4349 return (Any F64 code__2)
4351 trivialUFCode _ instr x
4352 = getRegister x `thenNat` \ register ->
4353 getNewRegNat F64 `thenNat` \ tmp ->
4355 code = registerCode register tmp
4356 src = registerName register tmp
4357 code__2 dst = code . mkSeqInstr (instr src dst)
4359 return (Any F64 code__2)
4361 #endif /* alpha_TARGET_ARCH */
4363 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4365 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4368 The Rules of the Game are:
4370 * You cannot assume anything about the destination register dst;
4371 it may be anything, including a fixed reg.
4373 * You may compute an operand into a fixed reg, but you may not
4374 subsequently change the contents of that fixed reg. If you
4375 want to do so, first copy the value either to a temporary
4376 or into dst. You are free to modify dst even if it happens
4377 to be a fixed reg -- that's not your problem.
4379 * You cannot assume that a fixed reg will stay live over an
4380 arbitrary computation. The same applies to the dst reg.
4382 * Temporary regs obtained from getNewRegNat are distinct from
4383 each other and from all other regs, and stay live over
4384 arbitrary computations.
4386 --------------------
4388 SDM's version of The Rules:
4390 * If getRegister returns Any, that means it can generate correct
4391 code which places the result in any register, period. Even if that
4392 register happens to be read during the computation.
4394 Corollary #1: this means that if you are generating code for an
4395 operation with two arbitrary operands, you cannot assign the result
4396 of the first operand into the destination register before computing
4397 the second operand. The second operand might require the old value
4398 of the destination register.
4400 Corollary #2: A function might be able to generate more efficient
4401 code if it knows the destination register is a new temporary (and
4402 therefore not read by any of the sub-computations).
4404 * If getRegister returns Any, then the code it generates may modify only:
4405 (a) fresh temporaries
4406 (b) the destination register
4407 (c) known registers (eg. %ecx is used by shifts)
4408 In particular, it may *not* modify global registers, unless the global
4409 register happens to be the destination register.
4412 trivialCode rep instr (Just revinstr) (CmmLit lit_a) b
4413 | not (is64BitLit lit_a) = do
4414 b_code <- getAnyReg b
4417 = b_code dst `snocOL`
4418 revinstr (OpImm (litToImm lit_a)) (OpReg dst)
4420 return (Any rep code)
4422 trivialCode rep instr maybe_revinstr a b = genTrivialCode rep instr a b
4424 -- This is re-used for floating pt instructions too.
4425 genTrivialCode rep instr a b = do
4426 (b_op, b_code) <- getNonClobberedOperand b
4427 a_code <- getAnyReg a
4428 tmp <- getNewRegNat rep
4430 -- We want the value of b to stay alive across the computation of a.
4431 -- But, we want to calculate a straight into the destination register,
4432 -- because the instruction only has two operands (dst := dst `op` src).
4433 -- The troublesome case is when the result of b is in the same register
4434 -- as the destination reg. In this case, we have to save b in a
4435 -- new temporary across the computation of a.
4437 | dst `regClashesWithOp` b_op =
4439 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
4441 instr (OpReg tmp) (OpReg dst)
4445 instr b_op (OpReg dst)
4447 return (Any rep code)
4449 reg `regClashesWithOp` OpReg reg2 = reg == reg2
4450 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
4451 reg `regClashesWithOp` _ = False
4455 trivialUCode rep instr x = do
4456 x_code <- getAnyReg x
4462 return (Any rep code)
4466 #if i386_TARGET_ARCH
4468 trivialFCode pk instr x y = do
4469 (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
4470 (y_reg, y_code) <- getSomeReg y
4475 instr pk x_reg y_reg dst
4477 return (Any pk code)
4481 #if x86_64_TARGET_ARCH
4483 trivialFCode pk instr x y = genTrivialCode pk (instr pk) x y
4489 trivialUFCode rep instr x = do
4490 (x_reg, x_code) <- getSomeReg x
4496 return (Any rep code)
4498 #endif /* i386_TARGET_ARCH */
4500 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4502 #if sparc_TARGET_ARCH
4504 trivialCode pk instr x (CmmLit (CmmInt y d))
4507 (src1, code) <- getSomeReg x
4508 tmp <- getNewRegNat I32
4510 src2 = ImmInt (fromInteger y)
4511 code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
4512 return (Any I32 code__2)
4514 trivialCode pk instr x y = do
4515 (src1, code1) <- getSomeReg x
4516 (src2, code2) <- getSomeReg y
4517 tmp1 <- getNewRegNat I32
4518 tmp2 <- getNewRegNat I32
4520 code__2 dst = code1 `appOL` code2 `snocOL`
4521 instr src1 (RIReg src2) dst
4522 return (Any I32 code__2)
4525 trivialFCode pk instr x y = do
4526 (src1, code1) <- getSomeReg x
4527 (src2, code2) <- getSomeReg y
4528 tmp1 <- getNewRegNat (cmmExprRep x)
4529 tmp2 <- getNewRegNat (cmmExprRep y)
4530 tmp <- getNewRegNat F64
4532 promote x = FxTOy F32 F64 x tmp
4539 code1 `appOL` code2 `snocOL`
4540 instr pk src1 src2 dst
4541 else if pk1 == F32 then
4542 code1 `snocOL` promote src1 `appOL` code2 `snocOL`
4543 instr F64 tmp src2 dst
4545 code1 `appOL` code2 `snocOL` promote src2 `snocOL`
4546 instr F64 src1 tmp dst
4547 return (Any (if pk1 == pk2 then pk1 else F64) code__2)
4550 trivialUCode pk instr x = do
4551 (src, code) <- getSomeReg x
4552 tmp <- getNewRegNat pk
4554 code__2 dst = code `snocOL` instr (RIReg src) dst
4555 return (Any pk code__2)
4558 trivialUFCode pk instr x = do
4559 (src, code) <- getSomeReg x
4560 tmp <- getNewRegNat pk
4562 code__2 dst = code `snocOL` instr src dst
4563 return (Any pk code__2)
4565 #endif /* sparc_TARGET_ARCH */
4567 #if powerpc_TARGET_ARCH
4570 Wolfgang's PowerPC version of The Rules:
4572 A slightly modified version of The Rules to take advantage of the fact
4573 that PowerPC instructions work on all registers and don't implicitly
4574 clobber any fixed registers.
4576 * The only expression for which getRegister returns Fixed is (CmmReg reg).
4578 * If getRegister returns Any, then the code it generates may modify only:
4579 (a) fresh temporaries
4580 (b) the destination register
4581 It may *not* modify global registers, unless the global
4582 register happens to be the destination register.
4583 It may not clobber any other registers. In fact, only ccalls clobber any
4585 Also, it may not modify the counter register (used by genCCall).
4587 Corollary: If a getRegister for a subexpression returns Fixed, you need
4588 not move it to a fresh temporary before evaluating the next subexpression.
4589 The Fixed register won't be modified.
4590 Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
4592 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
4593 the value of the destination register.
4596 trivialCode rep signed instr x (CmmLit (CmmInt y _))
4597 | Just imm <- makeImmediate rep signed y
4599 (src1, code1) <- getSomeReg x
4600 let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
4601 return (Any rep code)
4603 trivialCode rep signed instr x y = do
4604 (src1, code1) <- getSomeReg x
4605 (src2, code2) <- getSomeReg y
4606 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
4607 return (Any rep code)
4609 trivialCodeNoImm :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4610 -> CmmExpr -> CmmExpr -> NatM Register
4611 trivialCodeNoImm rep instr x y = do
4612 (src1, code1) <- getSomeReg x
4613 (src2, code2) <- getSomeReg y
4614 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
4615 return (Any rep code)
4617 trivialUCode rep instr x = do
4618 (src, code) <- getSomeReg x
4619 let code' dst = code `snocOL` instr dst src
4620 return (Any rep code')
4622 -- There is no "remainder" instruction on the PPC, so we have to do
4624 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
4626 remainderCode :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4627 -> CmmExpr -> CmmExpr -> NatM Register
4628 remainderCode rep div x y = do
4629 (src1, code1) <- getSomeReg x
4630 (src2, code2) <- getSomeReg y
4631 let code dst = code1 `appOL` code2 `appOL` toOL [
4633 MULLW dst dst (RIReg src2),
4636 return (Any rep code)
4638 #endif /* powerpc_TARGET_ARCH */
4641 -- -----------------------------------------------------------------------------
4642 -- Coercing to/from integer/floating-point...
4644 -- When going to integer, we truncate (round towards 0).
4646 -- @coerce(Int2FP|FP2Int)@ are more complicated integer/float
4647 -- conversions. We have to store temporaries in memory to move
4648 -- between the integer and the floating point register sets.
4650 -- @coerceDbl2Flt@ and @coerceFlt2Dbl@ are done this way because we
4651 -- pretend, on sparc at least, that double and float regs are seperate
4652 -- kinds, so the value has to be computed into one kind before being
4653 -- explicitly "converted" to live in the other kind.
4655 coerceInt2FP :: MachRep -> MachRep -> CmmExpr -> NatM Register
4656 coerceFP2Int :: MachRep -> MachRep -> CmmExpr -> NatM Register
4658 #if sparc_TARGET_ARCH
4659 coerceDbl2Flt :: CmmExpr -> NatM Register
4660 coerceFlt2Dbl :: CmmExpr -> NatM Register
4663 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4665 #if alpha_TARGET_ARCH
4668 = getRegister x `thenNat` \ register ->
4669 getNewRegNat IntRep `thenNat` \ reg ->
4671 code = registerCode register reg
4672 src = registerName register reg
4674 code__2 dst = code . mkSeqInstrs [
4676 LD TF dst (spRel 0),
4679 return (Any F64 code__2)
4683 = getRegister x `thenNat` \ register ->
4684 getNewRegNat F64 `thenNat` \ tmp ->
4686 code = registerCode register tmp
4687 src = registerName register tmp
4689 code__2 dst = code . mkSeqInstrs [
4691 ST TF tmp (spRel 0),
4694 return (Any IntRep code__2)
4696 #endif /* alpha_TARGET_ARCH */
4698 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4700 #if i386_TARGET_ARCH
4702 coerceInt2FP from to x = do
4703 (x_reg, x_code) <- getSomeReg x
4705 opc = case to of F32 -> GITOF; F64 -> GITOD
4706 code dst = x_code `snocOL` opc x_reg dst
4707 -- ToDo: works for non-I32 reps?
4709 return (Any to code)
4713 coerceFP2Int from to x = do
4714 (x_reg, x_code) <- getSomeReg x
4716 opc = case from of F32 -> GFTOI; F64 -> GDTOI
4717 code dst = x_code `snocOL` opc x_reg dst
4718 -- ToDo: works for non-I32 reps?
4720 return (Any to code)
4722 #endif /* i386_TARGET_ARCH */
4724 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4726 #if x86_64_TARGET_ARCH
4728 coerceFP2Int from to x = do
4729 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
4731 opc = case from of F32 -> CVTTSS2SIQ; F64 -> CVTTSD2SIQ
4732 code dst = x_code `snocOL` opc x_op dst
4734 return (Any to code) -- works even if the destination rep is <I32
4736 coerceInt2FP from to x = do
4737 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
4739 opc = case to of F32 -> CVTSI2SS; F64 -> CVTSI2SD
4740 code dst = x_code `snocOL` opc x_op dst
4742 return (Any to code) -- works even if the destination rep is <I32
4744 coerceFP2FP :: MachRep -> CmmExpr -> NatM Register
4745 coerceFP2FP to x = do
4746 (x_reg, x_code) <- getSomeReg x
4748 opc = case to of F32 -> CVTSD2SS; F64 -> CVTSS2SD
4749 code dst = x_code `snocOL` opc x_reg dst
4751 return (Any to code)
4755 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4757 #if sparc_TARGET_ARCH
4759 coerceInt2FP pk1 pk2 x = do
4760 (src, code) <- getSomeReg x
4762 code__2 dst = code `appOL` toOL [
4763 ST pk1 src (spRel (-2)),
4764 LD pk1 (spRel (-2)) dst,
4765 FxTOy pk1 pk2 dst dst]
4766 return (Any pk2 code__2)
4769 coerceFP2Int pk fprep x = do
4770 (src, code) <- getSomeReg x
4771 reg <- getNewRegNat fprep
4772 tmp <- getNewRegNat pk
4774 code__2 dst = ASSERT(fprep == F64 || fprep == F32)
4776 FxTOy fprep pk src tmp,
4777 ST pk tmp (spRel (-2)),
4778 LD pk (spRel (-2)) dst]
4779 return (Any pk code__2)
4782 coerceDbl2Flt x = do
4783 (src, code) <- getSomeReg x
4784 return (Any F32 (\dst -> code `snocOL` FxTOy F64 F32 src dst))
4787 coerceFlt2Dbl x = do
4788 (src, code) <- getSomeReg x
4789 return (Any F64 (\dst -> code `snocOL` FxTOy F32 F64 src dst))
4791 #endif /* sparc_TARGET_ARCH */
4793 #if powerpc_TARGET_ARCH
4794 coerceInt2FP fromRep toRep x = do
4795 (src, code) <- getSomeReg x
4796 lbl <- getNewLabelNat
4797 itmp <- getNewRegNat I32
4798 ftmp <- getNewRegNat F64
4799 dflags <- getDynFlagsNat
4800 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
4801 Amode addr addr_code <- getAmode dynRef
4803 code' dst = code `appOL` maybe_exts `appOL` toOL [
4806 CmmStaticLit (CmmInt 0x43300000 I32),
4807 CmmStaticLit (CmmInt 0x80000000 I32)],
4808 XORIS itmp src (ImmInt 0x8000),
4809 ST I32 itmp (spRel 3),
4810 LIS itmp (ImmInt 0x4330),
4811 ST I32 itmp (spRel 2),
4812 LD F64 ftmp (spRel 2)
4813 ] `appOL` addr_code `appOL` toOL [
4815 FSUB F64 dst ftmp dst
4816 ] `appOL` maybe_frsp dst
4818 maybe_exts = case fromRep of
4819 I8 -> unitOL $ EXTS I8 src src
4820 I16 -> unitOL $ EXTS I16 src src
4822 maybe_frsp dst = case toRep of
4823 F32 -> unitOL $ FRSP dst dst
4825 return (Any toRep code')
4827 coerceFP2Int fromRep toRep x = do
4828 -- the reps don't really matter: F*->F64 and I32->I* are no-ops
4829 (src, code) <- getSomeReg x
4830 tmp <- getNewRegNat F64
4832 code' dst = code `appOL` toOL [
4833 -- convert to int in FP reg
4835 -- store value (64bit) from FP to stack
4836 ST F64 tmp (spRel 2),
4837 -- read low word of value (high word is undefined)
4838 LD I32 dst (spRel 3)]
4839 return (Any toRep code')
4840 #endif /* powerpc_TARGET_ARCH */
4843 -- -----------------------------------------------------------------------------
4844 -- eXTRA_STK_ARGS_HERE
4846 -- We (allegedly) put the first six C-call arguments in registers;
4847 -- where do we start putting the rest of them?
4849 -- Moved from MachInstrs (SDM):
4851 #if alpha_TARGET_ARCH || sparc_TARGET_ARCH
4852 eXTRA_STK_ARGS_HERE :: Int
4854 = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???))