1 -----------------------------------------------------------------------------
3 -- Generating machine code (instruction selection)
5 -- (c) The University of Glasgow 1996-2004
7 -----------------------------------------------------------------------------
9 -- This is a big module, but, if you pay attention to
10 -- (a) the sectioning, (b) the type signatures, and
11 -- (c) the #if blah_TARGET_ARCH} things, the
12 -- structure should not be too overwhelming.
14 module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where
16 #include "HsVersions.h"
17 #include "nativeGen/NCG.h"
24 import PositionIndependentCode
25 import RegAllocInfo ( mkBranchInstr )
27 -- Our intermediate code:
28 import PprCmm ( pprExpr )
32 import ClosureInfo ( C_SRT(..) )
35 import StaticFlags ( opt_PIC )
36 import ForeignCall ( CCallConv(..) )
41 import FastTypes ( isFastTrue )
42 import Constants ( wORD_SIZE )
45 import Debug.Trace ( trace )
48 import Control.Monad ( mapAndUnzipM )
49 import Data.Maybe ( fromJust )
54 -- -----------------------------------------------------------------------------
55 -- Top-level of the instruction selector
57 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
58 -- They are really trees of insns to facilitate fast appending, where a
59 -- left-to-right traversal (pre-order?) yields the insns in the correct
62 type InstrBlock = OrdList Instr
64 cmmTopCodeGen :: RawCmmTop -> NatM [NatCmmTop]
65 cmmTopCodeGen (CmmProc info lab params blocks) = do
66 (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
67 picBaseMb <- getPicBaseMaybeNat
68 let proc = CmmProc info lab params (concat nat_blocks)
69 tops = proc : concat statics
71 Just picBase -> initializePicBase picBase tops
72 Nothing -> return tops
74 cmmTopCodeGen (CmmData sec dat) = do
75 return [CmmData sec dat] -- no translation, we just use CmmStatic
77 basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop])
78 basicBlockCodeGen (BasicBlock id stmts) = do
79 instrs <- stmtsToInstrs stmts
80 -- code generation may introduce new basic block boundaries, which
81 -- are indicated by the NEWBLOCK instruction. We must split up the
82 -- instruction stream into basic blocks again. Also, we extract
85 (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
87 mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
88 = ([], BasicBlock id instrs : blocks, statics)
89 mkBlocks (LDATA sec dat) (instrs,blocks,statics)
90 = (instrs, blocks, CmmData sec dat:statics)
91 mkBlocks instr (instrs,blocks,statics)
92 = (instr:instrs, blocks, statics)
94 return (BasicBlock id top : other_blocks, statics)
96 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
98 = do instrss <- mapM stmtToInstrs stmts
99 return (concatOL instrss)
101 stmtToInstrs :: CmmStmt -> NatM InstrBlock
102 stmtToInstrs stmt = case stmt of
103 CmmNop -> return nilOL
104 CmmComment s -> return (unitOL (COMMENT s))
107 | isFloatingRep kind -> assignReg_FltCode kind reg src
108 #if WORD_SIZE_IN_BITS==32
109 | kind == I64 -> assignReg_I64Code reg src
111 | otherwise -> assignReg_IntCode kind reg src
112 where kind = cmmRegRep reg
115 | isFloatingRep kind -> assignMem_FltCode kind addr src
116 #if WORD_SIZE_IN_BITS==32
117 | kind == I64 -> assignMem_I64Code addr src
119 | otherwise -> assignMem_IntCode kind addr src
120 where kind = cmmExprRep src
122 CmmCall target result_regs args _
123 -> genCCall target result_regs args
125 CmmBranch id -> genBranch id
126 CmmCondBranch arg id -> genCondJump id arg
127 CmmSwitch arg ids -> genSwitch arg ids
128 CmmJump arg params -> genJump arg
130 -- -----------------------------------------------------------------------------
131 -- General things for putting together code sequences
133 -- Expand CmmRegOff. ToDo: should we do it this way around, or convert
134 -- CmmExprs into CmmRegOff?
135 mangleIndexTree :: CmmExpr -> CmmExpr
136 mangleIndexTree (CmmRegOff reg off)
137 = CmmMachOp (MO_Add rep) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) rep)]
138 where rep = cmmRegRep reg
140 -- -----------------------------------------------------------------------------
141 -- Code gen for 64-bit arithmetic on 32-bit platforms
144 Simple support for generating 64-bit code (ie, 64 bit values and 64
145 bit assignments) on 32-bit platforms. Unlike the main code generator
146 we merely shoot for generating working code as simply as possible, and
147 pay little attention to code quality. Specifically, there is no
148 attempt to deal cleverly with the fixed-vs-floating register
149 distinction; all values are generated into (pairs of) floating
150 registers, even if this would mean some redundant reg-reg moves as a
151 result. Only one of the VRegUniques is returned, since it will be
152 of the VRegUniqueLo form, and the upper-half VReg can be determined
153 by applying getHiVRegFromLo to it.
156 data ChildCode64 -- a.k.a "Register64"
159 Reg -- the lower 32-bit temporary which contains the
160 -- result; use getHiVRegFromLo to find the other
161 -- VRegUnique. Rules of this simplified insn
162 -- selection game are therefore that the returned
163 -- Reg may be modified
165 #if WORD_SIZE_IN_BITS==32
166 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
167 assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
170 #ifndef x86_64_TARGET_ARCH
171 iselExpr64 :: CmmExpr -> NatM ChildCode64
174 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
178 assignMem_I64Code addrTree valueTree = do
179 Amode addr addr_code <- getAmode addrTree
180 ChildCode64 vcode rlo <- iselExpr64 valueTree
182 rhi = getHiVRegFromLo rlo
184 -- Little-endian store
185 mov_lo = MOV I32 (OpReg rlo) (OpAddr addr)
186 mov_hi = MOV I32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
188 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
191 assignReg_I64Code (CmmLocal (LocalReg u_dst pk _)) valueTree = do
192 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
194 r_dst_lo = mkVReg u_dst I32
195 r_dst_hi = getHiVRegFromLo r_dst_lo
196 r_src_hi = getHiVRegFromLo r_src_lo
197 mov_lo = MOV I32 (OpReg r_src_lo) (OpReg r_dst_lo)
198 mov_hi = MOV I32 (OpReg r_src_hi) (OpReg r_dst_hi)
201 vcode `snocOL` mov_lo `snocOL` mov_hi
204 assignReg_I64Code lvalue valueTree
205 = panic "assignReg_I64Code(i386): invalid lvalue"
209 iselExpr64 (CmmLit (CmmInt i _)) = do
210 (rlo,rhi) <- getNewRegPairNat I32
212 r = fromIntegral (fromIntegral i :: Word32)
213 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
215 MOV I32 (OpImm (ImmInteger r)) (OpReg rlo),
216 MOV I32 (OpImm (ImmInteger q)) (OpReg rhi)
219 return (ChildCode64 code rlo)
221 iselExpr64 (CmmLoad addrTree I64) = do
222 Amode addr addr_code <- getAmode addrTree
223 (rlo,rhi) <- getNewRegPairNat I32
225 mov_lo = MOV I32 (OpAddr addr) (OpReg rlo)
226 mov_hi = MOV I32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
229 ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
233 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64 _)))
234 = return (ChildCode64 nilOL (mkVReg vu I32))
236 -- we handle addition, but rather badly
237 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
238 ChildCode64 code1 r1lo <- iselExpr64 e1
239 (rlo,rhi) <- getNewRegPairNat I32
241 r = fromIntegral (fromIntegral i :: Word32)
242 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
243 r1hi = getHiVRegFromLo r1lo
245 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
246 ADD I32 (OpImm (ImmInteger r)) (OpReg rlo),
247 MOV I32 (OpReg r1hi) (OpReg rhi),
248 ADC I32 (OpImm (ImmInteger q)) (OpReg rhi) ]
250 return (ChildCode64 code rlo)
252 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
253 ChildCode64 code1 r1lo <- iselExpr64 e1
254 ChildCode64 code2 r2lo <- iselExpr64 e2
255 (rlo,rhi) <- getNewRegPairNat I32
257 r1hi = getHiVRegFromLo r1lo
258 r2hi = getHiVRegFromLo r2lo
261 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
262 ADD I32 (OpReg r2lo) (OpReg rlo),
263 MOV I32 (OpReg r1hi) (OpReg rhi),
264 ADC I32 (OpReg r2hi) (OpReg rhi) ]
266 return (ChildCode64 code rlo)
268 iselExpr64 (CmmMachOp (MO_U_Conv _ I64) [expr]) = do
270 r_dst_lo <- getNewRegNat I32
271 let r_dst_hi = getHiVRegFromLo r_dst_lo
274 ChildCode64 (code `snocOL`
275 MOV I32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
280 = pprPanic "iselExpr64(i386)" (ppr expr)
282 #endif /* i386_TARGET_ARCH */
284 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
286 #if sparc_TARGET_ARCH
288 assignMem_I64Code addrTree valueTree = do
289 Amode addr addr_code <- getAmode addrTree
290 ChildCode64 vcode rlo <- iselExpr64 valueTree
291 (src, code) <- getSomeReg addrTree
293 rhi = getHiVRegFromLo rlo
295 mov_hi = ST I32 rhi (AddrRegImm src (ImmInt 0))
296 mov_lo = ST I32 rlo (AddrRegImm src (ImmInt 4))
297 return (vcode `appOL` code `snocOL` mov_hi `snocOL` mov_lo)
299 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
300 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
302 r_dst_lo = mkVReg u_dst pk
303 r_dst_hi = getHiVRegFromLo r_dst_lo
304 r_src_hi = getHiVRegFromLo r_src_lo
305 mov_lo = mkMOV r_src_lo r_dst_lo
306 mov_hi = mkMOV r_src_hi r_dst_hi
307 mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
308 return (vcode `snocOL` mov_hi `snocOL` mov_lo)
309 assignReg_I64Code lvalue valueTree
310 = panic "assignReg_I64Code(sparc): invalid lvalue"
313 -- Don't delete this -- it's very handy for debugging.
315 -- | trace ("iselExpr64: " ++ showSDoc (ppr expr)) False
316 -- = panic "iselExpr64(???)"
318 iselExpr64 (CmmLoad addrTree I64) = do
319 Amode (AddrRegReg r1 r2) addr_code <- getAmode addrTree
320 rlo <- getNewRegNat I32
321 let rhi = getHiVRegFromLo rlo
322 mov_hi = LD I32 (AddrRegImm r1 (ImmInt 0)) rhi
323 mov_lo = LD I32 (AddrRegImm r1 (ImmInt 4)) rlo
325 ChildCode64 (addr_code `snocOL` mov_hi `snocOL` mov_lo)
329 iselExpr64 (CmmReg (CmmLocal (LocalReg uq I64))) = do
330 r_dst_lo <- getNewRegNat I32
331 let r_dst_hi = getHiVRegFromLo r_dst_lo
332 r_src_lo = mkVReg uq I32
333 r_src_hi = getHiVRegFromLo r_src_lo
334 mov_lo = mkMOV r_src_lo r_dst_lo
335 mov_hi = mkMOV r_src_hi r_dst_hi
336 mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
338 ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo
342 = pprPanic "iselExpr64(sparc)" (ppr expr)
344 #endif /* sparc_TARGET_ARCH */
346 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
348 #if powerpc_TARGET_ARCH
350 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
351 getI64Amodes addrTree = do
352 Amode hi_addr addr_code <- getAmode addrTree
353 case addrOffset hi_addr 4 of
354 Just lo_addr -> return (hi_addr, lo_addr, addr_code)
355 Nothing -> do (hi_ptr, code) <- getSomeReg addrTree
356 return (AddrRegImm hi_ptr (ImmInt 0),
357 AddrRegImm hi_ptr (ImmInt 4),
360 assignMem_I64Code addrTree valueTree = do
361 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
362 ChildCode64 vcode rlo <- iselExpr64 valueTree
364 rhi = getHiVRegFromLo rlo
367 mov_hi = ST I32 rhi hi_addr
368 mov_lo = ST I32 rlo lo_addr
370 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
372 assignReg_I64Code (CmmLocal (LocalReg u_dst pk _)) valueTree = do
373 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
375 r_dst_lo = mkVReg u_dst I32
376 r_dst_hi = getHiVRegFromLo r_dst_lo
377 r_src_hi = getHiVRegFromLo r_src_lo
378 mov_lo = MR r_dst_lo r_src_lo
379 mov_hi = MR r_dst_hi r_src_hi
382 vcode `snocOL` mov_lo `snocOL` mov_hi
385 assignReg_I64Code lvalue valueTree
386 = panic "assignReg_I64Code(powerpc): invalid lvalue"
389 -- Don't delete this -- it's very handy for debugging.
391 -- | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
392 -- = panic "iselExpr64(???)"
394 iselExpr64 (CmmLoad addrTree I64) = do
395 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
396 (rlo, rhi) <- getNewRegPairNat I32
397 let mov_hi = LD I32 rhi hi_addr
398 mov_lo = LD I32 rlo lo_addr
399 return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
402 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64 _)))
403 = return (ChildCode64 nilOL (mkVReg vu I32))
405 iselExpr64 (CmmLit (CmmInt i _)) = do
406 (rlo,rhi) <- getNewRegPairNat I32
408 half0 = fromIntegral (fromIntegral i :: Word16)
409 half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
410 half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
411 half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
414 LIS rlo (ImmInt half1),
415 OR rlo rlo (RIImm $ ImmInt half0),
416 LIS rhi (ImmInt half3),
417 OR rlo rlo (RIImm $ ImmInt half2)
420 return (ChildCode64 code rlo)
422 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
423 ChildCode64 code1 r1lo <- iselExpr64 e1
424 ChildCode64 code2 r2lo <- iselExpr64 e2
425 (rlo,rhi) <- getNewRegPairNat I32
427 r1hi = getHiVRegFromLo r1lo
428 r2hi = getHiVRegFromLo r2lo
431 toOL [ ADDC rlo r1lo r2lo,
434 return (ChildCode64 code rlo)
436 iselExpr64 (CmmMachOp (MO_U_Conv I32 I64) [expr]) = do
437 (expr_reg,expr_code) <- getSomeReg expr
438 (rlo, rhi) <- getNewRegPairNat I32
439 let mov_hi = LI rhi (ImmInt 0)
440 mov_lo = MR rlo expr_reg
441 return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
444 = pprPanic "iselExpr64(powerpc)" (ppr expr)
446 #endif /* powerpc_TARGET_ARCH */
449 -- -----------------------------------------------------------------------------
450 -- The 'Register' type
452 -- 'Register's passed up the tree. If the stix code forces the register
453 -- to live in a pre-decided machine register, it comes out as @Fixed@;
454 -- otherwise, it comes out as @Any@, and the parent can decide which
455 -- register to put it in.
458 = Fixed MachRep Reg InstrBlock
459 | Any MachRep (Reg -> InstrBlock)
461 swizzleRegisterRep :: Register -> MachRep -> Register
462 swizzleRegisterRep (Fixed _ reg code) rep = Fixed rep reg code
463 swizzleRegisterRep (Any _ codefn) rep = Any rep codefn
466 -- -----------------------------------------------------------------------------
467 -- Utils based on getRegister, below
469 -- The dual to getAnyReg: compute an expression into a register, but
470 -- we don't mind which one it is.
471 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
473 r <- getRegister expr
476 tmp <- getNewRegNat rep
477 return (tmp, code tmp)
481 -- -----------------------------------------------------------------------------
482 -- Grab the Reg for a CmmReg
484 getRegisterReg :: CmmReg -> Reg
486 getRegisterReg (CmmLocal (LocalReg u pk _))
489 getRegisterReg (CmmGlobal mid)
490 = case get_GlobalReg_reg_or_addr mid of
491 Left (RealReg rrno) -> RealReg rrno
492 _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
493 -- By this stage, the only MagicIds remaining should be the
494 -- ones which map to a real machine register on this
495 -- platform. Hence ...
498 -- -----------------------------------------------------------------------------
499 -- Generate code to get a subtree into a Register
501 -- Don't delete this -- it's very handy for debugging.
503 -- | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False
504 -- = panic "getRegister(???)"
506 getRegister :: CmmExpr -> NatM Register
508 #if !x86_64_TARGET_ARCH
509 -- on x86_64, we have %rip for PicBaseReg, but it's not a full-featured
510 -- register, it can only be used for rip-relative addressing.
511 getRegister (CmmReg (CmmGlobal PicBaseReg))
513 reg <- getPicBaseNat wordRep
514 return (Fixed wordRep reg nilOL)
517 getRegister (CmmReg reg)
518 = return (Fixed (cmmRegRep reg) (getRegisterReg reg) nilOL)
520 getRegister tree@(CmmRegOff _ _)
521 = getRegister (mangleIndexTree tree)
524 #if WORD_SIZE_IN_BITS==32
525 -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
526 -- TO_W_(x), TO_W_(x >> 32)
528 getRegister (CmmMachOp (MO_U_Conv I64 I32)
529 [CmmMachOp (MO_U_Shr I64) [x,CmmLit (CmmInt 32 _)]]) = do
530 ChildCode64 code rlo <- iselExpr64 x
531 return $ Fixed I32 (getHiVRegFromLo rlo) code
533 getRegister (CmmMachOp (MO_S_Conv I64 I32)
534 [CmmMachOp (MO_U_Shr I64) [x,CmmLit (CmmInt 32 _)]]) = do
535 ChildCode64 code rlo <- iselExpr64 x
536 return $ Fixed I32 (getHiVRegFromLo rlo) code
538 getRegister (CmmMachOp (MO_U_Conv I64 I32) [x]) = do
539 ChildCode64 code rlo <- iselExpr64 x
540 return $ Fixed I32 rlo code
542 getRegister (CmmMachOp (MO_S_Conv I64 I32) [x]) = do
543 ChildCode64 code rlo <- iselExpr64 x
544 return $ Fixed I32 rlo code
548 -- end of machine-"independent" bit; here we go on the rest...
550 #if alpha_TARGET_ARCH
552 getRegister (StDouble d)
553 = getBlockIdNat `thenNat` \ lbl ->
554 getNewRegNat PtrRep `thenNat` \ tmp ->
555 let code dst = mkSeqInstrs [
556 LDATA RoDataSegment lbl [
557 DATA TF [ImmLab (rational d)]
559 LDA tmp (AddrImm (ImmCLbl lbl)),
560 LD TF dst (AddrReg tmp)]
562 return (Any F64 code)
564 getRegister (StPrim primop [x]) -- unary PrimOps
566 IntNegOp -> trivialUCode (NEG Q False) x
568 NotOp -> trivialUCode NOT x
570 FloatNegOp -> trivialUFCode FloatRep (FNEG TF) x
571 DoubleNegOp -> trivialUFCode F64 (FNEG TF) x
573 OrdOp -> coerceIntCode IntRep x
576 Float2IntOp -> coerceFP2Int x
577 Int2FloatOp -> coerceInt2FP pr x
578 Double2IntOp -> coerceFP2Int x
579 Int2DoubleOp -> coerceInt2FP pr x
581 Double2FloatOp -> coerceFltCode x
582 Float2DoubleOp -> coerceFltCode x
584 other_op -> getRegister (StCall fn CCallConv F64 [x])
586 fn = case other_op of
587 FloatExpOp -> FSLIT("exp")
588 FloatLogOp -> FSLIT("log")
589 FloatSqrtOp -> FSLIT("sqrt")
590 FloatSinOp -> FSLIT("sin")
591 FloatCosOp -> FSLIT("cos")
592 FloatTanOp -> FSLIT("tan")
593 FloatAsinOp -> FSLIT("asin")
594 FloatAcosOp -> FSLIT("acos")
595 FloatAtanOp -> FSLIT("atan")
596 FloatSinhOp -> FSLIT("sinh")
597 FloatCoshOp -> FSLIT("cosh")
598 FloatTanhOp -> FSLIT("tanh")
599 DoubleExpOp -> FSLIT("exp")
600 DoubleLogOp -> FSLIT("log")
601 DoubleSqrtOp -> FSLIT("sqrt")
602 DoubleSinOp -> FSLIT("sin")
603 DoubleCosOp -> FSLIT("cos")
604 DoubleTanOp -> FSLIT("tan")
605 DoubleAsinOp -> FSLIT("asin")
606 DoubleAcosOp -> FSLIT("acos")
607 DoubleAtanOp -> FSLIT("atan")
608 DoubleSinhOp -> FSLIT("sinh")
609 DoubleCoshOp -> FSLIT("cosh")
610 DoubleTanhOp -> FSLIT("tanh")
612 pr = panic "MachCode.getRegister: no primrep needed for Alpha"
614 getRegister (StPrim primop [x, y]) -- dyadic PrimOps
616 CharGtOp -> trivialCode (CMP LTT) y x
617 CharGeOp -> trivialCode (CMP LE) y x
618 CharEqOp -> trivialCode (CMP EQQ) x y
619 CharNeOp -> int_NE_code x y
620 CharLtOp -> trivialCode (CMP LTT) x y
621 CharLeOp -> trivialCode (CMP LE) x y
623 IntGtOp -> trivialCode (CMP LTT) y x
624 IntGeOp -> trivialCode (CMP LE) y x
625 IntEqOp -> trivialCode (CMP EQQ) x y
626 IntNeOp -> int_NE_code x y
627 IntLtOp -> trivialCode (CMP LTT) x y
628 IntLeOp -> trivialCode (CMP LE) x y
630 WordGtOp -> trivialCode (CMP ULT) y x
631 WordGeOp -> trivialCode (CMP ULE) x y
632 WordEqOp -> trivialCode (CMP EQQ) x y
633 WordNeOp -> int_NE_code x y
634 WordLtOp -> trivialCode (CMP ULT) x y
635 WordLeOp -> trivialCode (CMP ULE) x y
637 AddrGtOp -> trivialCode (CMP ULT) y x
638 AddrGeOp -> trivialCode (CMP ULE) y x
639 AddrEqOp -> trivialCode (CMP EQQ) x y
640 AddrNeOp -> int_NE_code x y
641 AddrLtOp -> trivialCode (CMP ULT) x y
642 AddrLeOp -> trivialCode (CMP ULE) x y
644 FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
645 FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
646 FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
647 FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
648 FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
649 FloatLeOp -> cmpF_code (FCMP TF LE) NE x y
651 DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
652 DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
653 DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
654 DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
655 DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
656 DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y
658 IntAddOp -> trivialCode (ADD Q False) x y
659 IntSubOp -> trivialCode (SUB Q False) x y
660 IntMulOp -> trivialCode (MUL Q False) x y
661 IntQuotOp -> trivialCode (DIV Q False) x y
662 IntRemOp -> trivialCode (REM Q False) x y
664 WordAddOp -> trivialCode (ADD Q False) x y
665 WordSubOp -> trivialCode (SUB Q False) x y
666 WordMulOp -> trivialCode (MUL Q False) x y
667 WordQuotOp -> trivialCode (DIV Q True) x y
668 WordRemOp -> trivialCode (REM Q True) x y
670 FloatAddOp -> trivialFCode FloatRep (FADD TF) x y
671 FloatSubOp -> trivialFCode FloatRep (FSUB TF) x y
672 FloatMulOp -> trivialFCode FloatRep (FMUL TF) x y
673 FloatDivOp -> trivialFCode FloatRep (FDIV TF) x y
675 DoubleAddOp -> trivialFCode F64 (FADD TF) x y
676 DoubleSubOp -> trivialFCode F64 (FSUB TF) x y
677 DoubleMulOp -> trivialFCode F64 (FMUL TF) x y
678 DoubleDivOp -> trivialFCode F64 (FDIV TF) x y
680 AddrAddOp -> trivialCode (ADD Q False) x y
681 AddrSubOp -> trivialCode (SUB Q False) x y
682 AddrRemOp -> trivialCode (REM Q True) x y
684 AndOp -> trivialCode AND x y
685 OrOp -> trivialCode OR x y
686 XorOp -> trivialCode XOR x y
687 SllOp -> trivialCode SLL x y
688 SrlOp -> trivialCode SRL x y
690 ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
691 ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
692 ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"
694 FloatPowerOp -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
695 DoublePowerOp -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
697 {- ------------------------------------------------------------
698 Some bizarre special code for getting condition codes into
699 registers. Integer non-equality is a test for equality
700 followed by an XOR with 1. (Integer comparisons always set
701 the result register to 0 or 1.) Floating point comparisons of
702 any kind leave the result in a floating point register, so we
703 need to wrangle an integer register out of things.
705 int_NE_code :: StixTree -> StixTree -> NatM Register
708 = trivialCode (CMP EQQ) x y `thenNat` \ register ->
709 getNewRegNat IntRep `thenNat` \ tmp ->
711 code = registerCode register tmp
712 src = registerName register tmp
713 code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
715 return (Any IntRep code__2)
717 {- ------------------------------------------------------------
718 Comments for int_NE_code also apply to cmpF_code
721 :: (Reg -> Reg -> Reg -> Instr)
723 -> StixTree -> StixTree
726 cmpF_code instr cond x y
727 = trivialFCode pr instr x y `thenNat` \ register ->
728 getNewRegNat F64 `thenNat` \ tmp ->
729 getBlockIdNat `thenNat` \ lbl ->
731 code = registerCode register tmp
732 result = registerName register tmp
734 code__2 dst = code . mkSeqInstrs [
735 OR zeroh (RIImm (ImmInt 1)) dst,
736 BF cond result (ImmCLbl lbl),
737 OR zeroh (RIReg zeroh) dst,
740 return (Any IntRep code__2)
742 pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
743 ------------------------------------------------------------
745 getRegister (CmmLoad pk mem)
746 = getAmode mem `thenNat` \ amode ->
748 code = amodeCode amode
749 src = amodeAddr amode
750 size = primRepToSize pk
751 code__2 dst = code . mkSeqInstr (LD size dst src)
753 return (Any pk code__2)
755 getRegister (StInt i)
758 code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
760 return (Any IntRep code)
763 code dst = mkSeqInstr (LDI Q dst src)
765 return (Any IntRep code)
767 src = ImmInt (fromInteger i)
772 code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
774 return (Any PtrRep code)
777 imm__2 = case imm of Just x -> x
779 #endif /* alpha_TARGET_ARCH */
781 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
785 getRegister (CmmLit (CmmFloat f F32)) = do
786 lbl <- getNewLabelNat
787 dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
788 Amode addr addr_code <- getAmode dynRef
792 CmmStaticLit (CmmFloat f F32)]
793 `consOL` (addr_code `snocOL`
796 return (Any F32 code)
799 getRegister (CmmLit (CmmFloat d F64))
801 = let code dst = unitOL (GLDZ dst)
802 in return (Any F64 code)
805 = let code dst = unitOL (GLD1 dst)
806 in return (Any F64 code)
809 lbl <- getNewLabelNat
810 dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
811 Amode addr addr_code <- getAmode dynRef
815 CmmStaticLit (CmmFloat d F64)]
816 `consOL` (addr_code `snocOL`
819 return (Any F64 code)
821 #endif /* i386_TARGET_ARCH */
823 #if x86_64_TARGET_ARCH
825 getRegister (CmmLit (CmmFloat 0.0 rep)) = do
826 let code dst = unitOL (XOR rep (OpReg dst) (OpReg dst))
827 -- I don't know why there are xorpd, xorps, and pxor instructions.
828 -- They all appear to do the same thing --SDM
829 return (Any rep code)
831 getRegister (CmmLit (CmmFloat f rep)) = do
832 lbl <- getNewLabelNat
833 let code dst = toOL [
836 CmmStaticLit (CmmFloat f rep)],
837 MOV rep (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
840 return (Any rep code)
842 #endif /* x86_64_TARGET_ARCH */
844 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
846 -- catch simple cases of zero- or sign-extended load
847 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad addr _]) = do
848 code <- intLoadCode (MOVZxL I8) addr
849 return (Any I32 code)
851 getRegister (CmmMachOp (MO_S_Conv I8 I32) [CmmLoad addr _]) = do
852 code <- intLoadCode (MOVSxL I8) addr
853 return (Any I32 code)
855 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad addr _]) = do
856 code <- intLoadCode (MOVZxL I16) addr
857 return (Any I32 code)
859 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad addr _]) = do
860 code <- intLoadCode (MOVSxL I16) addr
861 return (Any I32 code)
865 #if x86_64_TARGET_ARCH
867 -- catch simple cases of zero- or sign-extended load
868 getRegister (CmmMachOp (MO_U_Conv I8 I64) [CmmLoad addr _]) = do
869 code <- intLoadCode (MOVZxL I8) addr
870 return (Any I64 code)
872 getRegister (CmmMachOp (MO_S_Conv I8 I64) [CmmLoad addr _]) = do
873 code <- intLoadCode (MOVSxL I8) addr
874 return (Any I64 code)
876 getRegister (CmmMachOp (MO_U_Conv I16 I64) [CmmLoad addr _]) = do
877 code <- intLoadCode (MOVZxL I16) addr
878 return (Any I64 code)
880 getRegister (CmmMachOp (MO_S_Conv I16 I64) [CmmLoad addr _]) = do
881 code <- intLoadCode (MOVSxL I16) addr
882 return (Any I64 code)
884 getRegister (CmmMachOp (MO_U_Conv I32 I64) [CmmLoad addr _]) = do
885 code <- intLoadCode (MOV I32) addr -- 32-bit loads zero-extend
886 return (Any I64 code)
888 getRegister (CmmMachOp (MO_S_Conv I32 I64) [CmmLoad addr _]) = do
889 code <- intLoadCode (MOVSxL I32) addr
890 return (Any I64 code)
894 #if x86_64_TARGET_ARCH
895 getRegister (CmmMachOp (MO_Add I64) [CmmReg (CmmGlobal PicBaseReg),
896 CmmLit displacement])
897 = return $ Any I64 (\dst -> unitOL $
898 LEA I64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
901 #if x86_64_TARGET_ARCH
902 getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do
903 x_code <- getAnyReg x
904 lbl <- getNewLabelNat
906 code dst = x_code dst `appOL` toOL [
907 -- This is how gcc does it, so it can't be that bad:
908 LDATA ReadOnlyData16 [
911 CmmStaticLit (CmmInt 0x80000000 I32),
912 CmmStaticLit (CmmInt 0 I32),
913 CmmStaticLit (CmmInt 0 I32),
914 CmmStaticLit (CmmInt 0 I32)
916 XOR F32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
917 -- xorps, so we need the 128-bit constant
918 -- ToDo: rip-relative
921 return (Any F32 code)
923 getRegister (CmmMachOp (MO_S_Neg F64) [x]) = do
924 x_code <- getAnyReg x
925 lbl <- getNewLabelNat
927 -- This is how gcc does it, so it can't be that bad:
928 code dst = x_code dst `appOL` toOL [
929 LDATA ReadOnlyData16 [
932 CmmStaticLit (CmmInt 0x8000000000000000 I64),
933 CmmStaticLit (CmmInt 0 I64)
935 -- gcc puts an unpck here. Wonder if we need it.
936 XOR F64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
937 -- xorpd, so we need the 128-bit constant
940 return (Any F64 code)
943 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
945 getRegister (CmmMachOp mop [x]) -- unary MachOps
948 MO_S_Neg F32 -> trivialUFCode F32 (GNEG F32) x
949 MO_S_Neg F64 -> trivialUFCode F64 (GNEG F64) x
952 MO_S_Neg rep -> trivialUCode rep (NEGI rep) x
953 MO_Not rep -> trivialUCode rep (NOT rep) x
956 MO_U_Conv I32 I8 -> toI8Reg I32 x
957 MO_S_Conv I32 I8 -> toI8Reg I32 x
958 MO_U_Conv I16 I8 -> toI8Reg I16 x
959 MO_S_Conv I16 I8 -> toI8Reg I16 x
960 MO_U_Conv I32 I16 -> toI16Reg I32 x
961 MO_S_Conv I32 I16 -> toI16Reg I32 x
962 #if x86_64_TARGET_ARCH
963 MO_U_Conv I64 I32 -> conversionNop I64 x
964 MO_S_Conv I64 I32 -> conversionNop I64 x
965 MO_U_Conv I64 I16 -> toI16Reg I64 x
966 MO_S_Conv I64 I16 -> toI16Reg I64 x
967 MO_U_Conv I64 I8 -> toI8Reg I64 x
968 MO_S_Conv I64 I8 -> toI8Reg I64 x
971 MO_U_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
972 MO_S_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
975 MO_U_Conv I8 I32 -> integerExtend I8 I32 MOVZxL x
976 MO_U_Conv I16 I32 -> integerExtend I16 I32 MOVZxL x
977 MO_U_Conv I8 I16 -> integerExtend I8 I16 MOVZxL x
979 MO_S_Conv I8 I32 -> integerExtend I8 I32 MOVSxL x
980 MO_S_Conv I16 I32 -> integerExtend I16 I32 MOVSxL x
981 MO_S_Conv I8 I16 -> integerExtend I8 I16 MOVSxL x
983 #if x86_64_TARGET_ARCH
984 MO_U_Conv I8 I64 -> integerExtend I8 I64 MOVZxL x
985 MO_U_Conv I16 I64 -> integerExtend I16 I64 MOVZxL x
986 MO_U_Conv I32 I64 -> integerExtend I32 I64 MOVZxL x
987 MO_S_Conv I8 I64 -> integerExtend I8 I64 MOVSxL x
988 MO_S_Conv I16 I64 -> integerExtend I16 I64 MOVSxL x
989 MO_S_Conv I32 I64 -> integerExtend I32 I64 MOVSxL x
990 -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
991 -- However, we don't want the register allocator to throw it
992 -- away as an unnecessary reg-to-reg move, so we keep it in
993 -- the form of a movzl and print it as a movl later.
997 MO_S_Conv F32 F64 -> conversionNop F64 x
998 MO_S_Conv F64 F32 -> conversionNop F32 x
1000 MO_S_Conv F32 F64 -> coerceFP2FP F64 x
1001 MO_S_Conv F64 F32 -> coerceFP2FP F32 x
1005 | isFloatingRep from -> coerceFP2Int from to x
1006 | isFloatingRep to -> coerceInt2FP from to x
1008 other -> pprPanic "getRegister" (pprMachOp mop)
1010 -- signed or unsigned extension.
1011 integerExtend from to instr expr = do
1012 (reg,e_code) <- if from == I8 then getByteReg expr
1013 else getSomeReg expr
1017 instr from (OpReg reg) (OpReg dst)
1018 return (Any to code)
1020 toI8Reg new_rep expr
1021 = do codefn <- getAnyReg expr
1022 return (Any new_rep codefn)
1023 -- HACK: use getAnyReg to get a byte-addressable register.
1024 -- If the source was a Fixed register, this will add the
1025 -- mov instruction to put it into the desired destination.
1026 -- We're assuming that the destination won't be a fixed
1027 -- non-byte-addressable register; it won't be, because all
1028 -- fixed registers are word-sized.
1030 toI16Reg = toI8Reg -- for now
1032 conversionNop new_rep expr
1033 = do e_code <- getRegister expr
1034 return (swizzleRegisterRep e_code new_rep)
1037 getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
1038 = ASSERT2(cmmExprRep x /= I8, pprExpr e)
1040 MO_Eq F32 -> condFltReg EQQ x y
1041 MO_Ne F32 -> condFltReg NE x y
1042 MO_S_Gt F32 -> condFltReg GTT x y
1043 MO_S_Ge F32 -> condFltReg GE x y
1044 MO_S_Lt F32 -> condFltReg LTT x y
1045 MO_S_Le F32 -> condFltReg LE x y
1047 MO_Eq F64 -> condFltReg EQQ x y
1048 MO_Ne F64 -> condFltReg NE x y
1049 MO_S_Gt F64 -> condFltReg GTT x y
1050 MO_S_Ge F64 -> condFltReg GE x y
1051 MO_S_Lt F64 -> condFltReg LTT x y
1052 MO_S_Le F64 -> condFltReg LE x y
1054 MO_Eq rep -> condIntReg EQQ x y
1055 MO_Ne rep -> condIntReg NE x y
1057 MO_S_Gt rep -> condIntReg GTT x y
1058 MO_S_Ge rep -> condIntReg GE x y
1059 MO_S_Lt rep -> condIntReg LTT x y
1060 MO_S_Le rep -> condIntReg LE x y
1062 MO_U_Gt rep -> condIntReg GU x y
1063 MO_U_Ge rep -> condIntReg GEU x y
1064 MO_U_Lt rep -> condIntReg LU x y
1065 MO_U_Le rep -> condIntReg LEU x y
1067 #if i386_TARGET_ARCH
1068 MO_Add F32 -> trivialFCode F32 GADD x y
1069 MO_Sub F32 -> trivialFCode F32 GSUB x y
1071 MO_Add F64 -> trivialFCode F64 GADD x y
1072 MO_Sub F64 -> trivialFCode F64 GSUB x y
1074 MO_S_Quot F32 -> trivialFCode F32 GDIV x y
1075 MO_S_Quot F64 -> trivialFCode F64 GDIV x y
1078 #if x86_64_TARGET_ARCH
1079 MO_Add F32 -> trivialFCode F32 ADD x y
1080 MO_Sub F32 -> trivialFCode F32 SUB x y
1082 MO_Add F64 -> trivialFCode F64 ADD x y
1083 MO_Sub F64 -> trivialFCode F64 SUB x y
1085 MO_S_Quot F32 -> trivialFCode F32 FDIV x y
1086 MO_S_Quot F64 -> trivialFCode F64 FDIV x y
1089 MO_Add rep -> add_code rep x y
1090 MO_Sub rep -> sub_code rep x y
1092 MO_S_Quot rep -> div_code rep True True x y
1093 MO_S_Rem rep -> div_code rep True False x y
1094 MO_U_Quot rep -> div_code rep False True x y
1095 MO_U_Rem rep -> div_code rep False False x y
1097 #if i386_TARGET_ARCH
1098 MO_Mul F32 -> trivialFCode F32 GMUL x y
1099 MO_Mul F64 -> trivialFCode F64 GMUL x y
1102 #if x86_64_TARGET_ARCH
1103 MO_Mul F32 -> trivialFCode F32 MUL x y
1104 MO_Mul F64 -> trivialFCode F64 MUL x y
1107 MO_Mul rep -> let op = IMUL rep in
1108 trivialCode rep op (Just op) x y
1110 MO_S_MulMayOflo rep -> imulMayOflo rep x y
1112 MO_And rep -> let op = AND rep in
1113 trivialCode rep op (Just op) x y
1114 MO_Or rep -> let op = OR rep in
1115 trivialCode rep op (Just op) x y
1116 MO_Xor rep -> let op = XOR rep in
1117 trivialCode rep op (Just op) x y
1119 {- Shift ops on x86s have constraints on their source, it
1120 either has to be Imm, CL or 1
1121 => trivialCode is not restrictive enough (sigh.)
1123 MO_Shl rep -> shift_code rep (SHL rep) x y {-False-}
1124 MO_U_Shr rep -> shift_code rep (SHR rep) x y {-False-}
1125 MO_S_Shr rep -> shift_code rep (SAR rep) x y {-False-}
1127 other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
1129 --------------------
1130 imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1131 imulMayOflo rep a b = do
1132 (a_reg, a_code) <- getNonClobberedReg a
1133 b_code <- getAnyReg b
1135 shift_amt = case rep of
1138 _ -> panic "shift_amt"
1140 code = a_code `appOL` b_code eax `appOL`
1142 IMUL2 rep (OpReg a_reg), -- result in %edx:%eax
1143 SAR rep (OpImm (ImmInt shift_amt)) (OpReg eax),
1144 -- sign extend lower part
1145 SUB rep (OpReg edx) (OpReg eax)
1146 -- compare against upper
1147 -- eax==0 if high part == sign extended low part
1150 return (Fixed rep eax code)
1152 --------------------
1153 shift_code :: MachRep
1154 -> (Operand -> Operand -> Instr)
1159 {- Case1: shift length as immediate -}
1160 shift_code rep instr x y@(CmmLit lit) = do
1161 x_code <- getAnyReg x
1164 = x_code dst `snocOL`
1165 instr (OpImm (litToImm lit)) (OpReg dst)
1167 return (Any rep code)
1169 {- Case2: shift length is complex (non-immediate)
1170 * y must go in %ecx.
1171 * we cannot do y first *and* put its result in %ecx, because
1172 %ecx might be clobbered by x.
1173 * if we do y second, then x cannot be
1174 in a clobbered reg. Also, we cannot clobber x's reg
1175 with the instruction itself.
1177 - do y first, put its result in a fresh tmp, then copy it to %ecx later
1178 - do y second and put its result into %ecx. x gets placed in a fresh
1179 tmp. This is likely to be better, becuase the reg alloc can
1180 eliminate this reg->reg move here (it won't eliminate the other one,
1181 because the move is into the fixed %ecx).
1183 shift_code rep instr x y{-amount-} = do
1184 x_code <- getAnyReg x
1185 tmp <- getNewRegNat rep
1186 y_code <- getAnyReg y
1188 code = x_code tmp `appOL`
1190 instr (OpReg ecx) (OpReg tmp)
1192 return (Fixed rep tmp code)
1194 --------------------
1195 add_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1196 add_code rep x (CmmLit (CmmInt y _))
1197 | not (is64BitInteger y) = add_int rep x y
1198 add_code rep x y = trivialCode rep (ADD rep) (Just (ADD rep)) x y
1200 --------------------
1201 sub_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1202 sub_code rep x (CmmLit (CmmInt y _))
1203 | not (is64BitInteger (-y)) = add_int rep x (-y)
1204 sub_code rep x y = trivialCode rep (SUB rep) Nothing x y
1206 -- our three-operand add instruction:
1207 add_int rep x y = do
1208 (x_reg, x_code) <- getSomeReg x
1210 imm = ImmInt (fromInteger y)
1214 (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
1217 return (Any rep code)
1219 ----------------------
1220 div_code rep signed quotient x y = do
1221 (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
1222 x_code <- getAnyReg x
1224 widen | signed = CLTD rep
1225 | otherwise = XOR rep (OpReg edx) (OpReg edx)
1227 instr | signed = IDIV
1230 code = y_code `appOL`
1232 toOL [widen, instr rep y_op]
1234 result | quotient = eax
1238 return (Fixed rep result code)
1241 getRegister (CmmLoad mem pk)
1244 Amode src mem_code <- getAmode mem
1246 code dst = mem_code `snocOL`
1247 IF_ARCH_i386(GLD pk src dst,
1248 MOV pk (OpAddr src) (OpReg dst))
1250 return (Any pk code)
1252 #if i386_TARGET_ARCH
1253 getRegister (CmmLoad mem pk)
1256 code <- intLoadCode (instr pk) mem
1257 return (Any pk code)
1259 instr I8 = MOVZxL pk
1262 -- we always zero-extend 8-bit loads, if we
1263 -- can't think of anything better. This is because
1264 -- we can't guarantee access to an 8-bit variant of every register
1265 -- (esi and edi don't have 8-bit variants), so to make things
1266 -- simpler we do our 8-bit arithmetic with full 32-bit registers.
1269 #if x86_64_TARGET_ARCH
1270 -- Simpler memory load code on x86_64
1271 getRegister (CmmLoad mem pk)
1273 code <- intLoadCode (MOV pk) mem
1274 return (Any pk code)
1277 getRegister (CmmLit (CmmInt 0 rep))
1279 -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
1280 adj_rep = case rep of I64 -> I32; _ -> rep
1281 rep1 = IF_ARCH_i386( rep, adj_rep )
1283 = unitOL (XOR rep1 (OpReg dst) (OpReg dst))
1285 return (Any rep code)
1287 #if x86_64_TARGET_ARCH
1288 -- optimisation for loading small literals on x86_64: take advantage
1289 -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
1290 -- instruction forms are shorter.
1291 getRegister (CmmLit lit)
1292 | I64 <- cmmLitRep lit, not (isBigLit lit)
1295 code dst = unitOL (MOV I32 (OpImm imm) (OpReg dst))
1297 return (Any I64 code)
1299 isBigLit (CmmInt i I64) = i < 0 || i > 0xffffffff
1301 -- note1: not the same as is64BitLit, because that checks for
1302 -- signed literals that fit in 32 bits, but we want unsigned
1304 -- note2: all labels are small, because we're assuming the
1305 -- small memory model (see gcc docs, -mcmodel=small).
1308 getRegister (CmmLit lit)
1312 code dst = unitOL (MOV rep (OpImm imm) (OpReg dst))
1314 return (Any rep code)
1316 getRegister other = pprPanic "getRegister(x86)" (ppr other)
1319 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
1320 -> NatM (Reg -> InstrBlock)
1321 intLoadCode instr mem = do
1322 Amode src mem_code <- getAmode mem
1323 return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
1325 -- Compute an expression into *any* register, adding the appropriate
1326 -- move instruction if necessary.
1327 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
1329 r <- getRegister expr
1332 anyReg :: Register -> NatM (Reg -> InstrBlock)
1333 anyReg (Any _ code) = return code
1334 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
1336 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
1337 -- Fixed registers might not be byte-addressable, so we make sure we've
1338 -- got a temporary, inserting an extra reg copy if necessary.
1339 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
1340 #if x86_64_TARGET_ARCH
1341 getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
1343 getByteReg expr = do
1344 r <- getRegister expr
1347 tmp <- getNewRegNat rep
1348 return (tmp, code tmp)
1350 | isVirtualReg reg -> return (reg,code)
1352 tmp <- getNewRegNat rep
1353 return (tmp, code `snocOL` reg2reg rep reg tmp)
1354 -- ToDo: could optimise slightly by checking for byte-addressable
1355 -- real registers, but that will happen very rarely if at all.
1358 -- Another variant: this time we want the result in a register that cannot
1359 -- be modified by code to evaluate an arbitrary expression.
1360 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1361 getNonClobberedReg expr = do
1362 r <- getRegister expr
1365 tmp <- getNewRegNat rep
1366 return (tmp, code tmp)
1368 -- only free regs can be clobbered
1369 | RealReg rr <- reg, isFastTrue (freeReg rr) -> do
1370 tmp <- getNewRegNat rep
1371 return (tmp, code `snocOL` reg2reg rep reg tmp)
1375 reg2reg :: MachRep -> Reg -> Reg -> Instr
1377 #if i386_TARGET_ARCH
1378 | isFloatingRep rep = GMOV src dst
1380 | otherwise = MOV rep (OpReg src) (OpReg dst)
1382 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1384 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1386 #if sparc_TARGET_ARCH
1388 getRegister (CmmLit (CmmFloat f F32)) = do
1389 lbl <- getNewLabelNat
1390 let code dst = toOL [
1393 CmmStaticLit (CmmFloat f F32)],
1394 SETHI (HI (ImmCLbl lbl)) dst,
1395 LD F32 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1396 return (Any F32 code)
1398 getRegister (CmmLit (CmmFloat d F64)) = do
1399 lbl <- getNewLabelNat
1400 let code dst = toOL [
1403 CmmStaticLit (CmmFloat d F64)],
1404 SETHI (HI (ImmCLbl lbl)) dst,
1405 LD F64 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1406 return (Any F64 code)
1408 getRegister (CmmMachOp mop [x]) -- unary MachOps
1410 MO_S_Neg F32 -> trivialUFCode F32 (FNEG F32) x
1411 MO_S_Neg F64 -> trivialUFCode F64 (FNEG F64) x
1413 MO_S_Neg rep -> trivialUCode rep (SUB False False g0) x
1414 MO_Not rep -> trivialUCode rep (XNOR False g0) x
1416 MO_U_Conv I32 I8 -> trivialCode I8 (AND False) x (CmmLit (CmmInt 255 I8))
1418 MO_U_Conv F64 F32-> coerceDbl2Flt x
1419 MO_U_Conv F32 F64-> coerceFlt2Dbl x
1421 MO_S_Conv F32 I32-> coerceFP2Int F32 I32 x
1422 MO_S_Conv I32 F32-> coerceInt2FP I32 F32 x
1423 MO_S_Conv F64 I32-> coerceFP2Int F64 I32 x
1424 MO_S_Conv I32 F64-> coerceInt2FP I32 F64 x
1426 -- Conversions which are a nop on sparc
1428 | from == to -> conversionNop to x
1429 MO_U_Conv I32 to -> conversionNop to x
1430 MO_S_Conv I32 to -> conversionNop to x
1433 MO_U_Conv I8 I32 -> integerExtend False I8 I32 x
1434 MO_U_Conv I16 I32 -> integerExtend False I16 I32 x
1435 MO_U_Conv I8 I16 -> integerExtend False I8 I16 x
1436 MO_S_Conv I16 I32 -> integerExtend True I16 I32 x
1438 other_op -> panic "Unknown unary mach op"
1441 integerExtend signed from to expr = do
1442 (reg, e_code) <- getSomeReg expr
1446 ((if signed then SRA else SRL)
1447 reg (RIImm (ImmInt 0)) dst)
1448 return (Any to code)
1449 conversionNop new_rep expr
1450 = do e_code <- getRegister expr
1451 return (swizzleRegisterRep e_code new_rep)
1453 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1455 MO_Eq F32 -> condFltReg EQQ x y
1456 MO_Ne F32 -> condFltReg NE x y
1458 MO_S_Gt F32 -> condFltReg GTT x y
1459 MO_S_Ge F32 -> condFltReg GE x y
1460 MO_S_Lt F32 -> condFltReg LTT x y
1461 MO_S_Le F32 -> condFltReg LE x y
1463 MO_Eq F64 -> condFltReg EQQ x y
1464 MO_Ne F64 -> condFltReg NE x y
1466 MO_S_Gt F64 -> condFltReg GTT x y
1467 MO_S_Ge F64 -> condFltReg GE x y
1468 MO_S_Lt F64 -> condFltReg LTT x y
1469 MO_S_Le F64 -> condFltReg LE x y
1471 MO_Eq rep -> condIntReg EQQ x y
1472 MO_Ne rep -> condIntReg NE x y
1474 MO_S_Gt rep -> condIntReg GTT x y
1475 MO_S_Ge rep -> condIntReg GE x y
1476 MO_S_Lt rep -> condIntReg LTT x y
1477 MO_S_Le rep -> condIntReg LE x y
1479 MO_U_Gt I32 -> condIntReg GTT x y
1480 MO_U_Ge I32 -> condIntReg GE x y
1481 MO_U_Lt I32 -> condIntReg LTT x y
1482 MO_U_Le I32 -> condIntReg LE x y
1484 MO_U_Gt I16 -> condIntReg GU x y
1485 MO_U_Ge I16 -> condIntReg GEU x y
1486 MO_U_Lt I16 -> condIntReg LU x y
1487 MO_U_Le I16 -> condIntReg LEU x y
1489 MO_Add I32 -> trivialCode I32 (ADD False False) x y
1490 MO_Sub I32 -> trivialCode I32 (SUB False False) x y
1492 MO_S_MulMayOflo rep -> imulMayOflo rep x y
1494 -- ToDo: teach about V8+ SPARC div instructions
1495 MO_S_Quot I32 -> idiv FSLIT(".div") x y
1496 MO_S_Rem I32 -> idiv FSLIT(".rem") x y
1497 MO_U_Quot I32 -> idiv FSLIT(".udiv") x y
1498 MO_U_Rem I32 -> idiv FSLIT(".urem") x y
1500 MO_Add F32 -> trivialFCode F32 FADD x y
1501 MO_Sub F32 -> trivialFCode F32 FSUB x y
1502 MO_Mul F32 -> trivialFCode F32 FMUL x y
1503 MO_S_Quot F32 -> trivialFCode F32 FDIV x y
1505 MO_Add F64 -> trivialFCode F64 FADD x y
1506 MO_Sub F64 -> trivialFCode F64 FSUB x y
1507 MO_Mul F64 -> trivialFCode F64 FMUL x y
1508 MO_S_Quot F64 -> trivialFCode F64 FDIV x y
1510 MO_And rep -> trivialCode rep (AND False) x y
1511 MO_Or rep -> trivialCode rep (OR False) x y
1512 MO_Xor rep -> trivialCode rep (XOR False) x y
1514 MO_Mul rep -> trivialCode rep (SMUL False) x y
1516 MO_Shl rep -> trivialCode rep SLL x y
1517 MO_U_Shr rep -> trivialCode rep SRL x y
1518 MO_S_Shr rep -> trivialCode rep SRA x y
1521 MO_F32_Pwr -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1522 [promote x, promote y])
1523 where promote x = CmmMachOp MO_F32_to_Dbl [x]
1524 MO_F64_Pwr -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1527 other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
1529 --idiv fn x y = getRegister (StCall (Left fn) CCallConv I32 [x, y])
1531 --------------------
1532 imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1533 imulMayOflo rep a b = do
1534 (a_reg, a_code) <- getSomeReg a
1535 (b_reg, b_code) <- getSomeReg b
1536 res_lo <- getNewRegNat I32
1537 res_hi <- getNewRegNat I32
1539 shift_amt = case rep of
1542 _ -> panic "shift_amt"
1543 code dst = a_code `appOL` b_code `appOL`
1545 SMUL False a_reg (RIReg b_reg) res_lo,
1547 SRA res_lo (RIImm (ImmInt shift_amt)) res_lo,
1548 SUB False False res_lo (RIReg res_hi) dst
1550 return (Any I32 code)
1552 getRegister (CmmLoad mem pk) = do
1553 Amode src code <- getAmode mem
1555 code__2 dst = code `snocOL` LD pk src dst
1556 return (Any pk code__2)
1558 getRegister (CmmLit (CmmInt i _))
1561 src = ImmInt (fromInteger i)
1562 code dst = unitOL (OR False g0 (RIImm src) dst)
1564 return (Any I32 code)
1566 getRegister (CmmLit lit)
1567 = let rep = cmmLitRep lit
1571 OR False dst (RIImm (LO imm)) dst]
1572 in return (Any I32 code)
1574 #endif /* sparc_TARGET_ARCH */
1576 #if powerpc_TARGET_ARCH
1577 getRegister (CmmLoad mem pk)
1580 Amode addr addr_code <- getAmode mem
1581 let code dst = ASSERT((regClass dst == RcDouble) == isFloatingRep pk)
1582 addr_code `snocOL` LD pk dst addr
1583 return (Any pk code)
1585 -- catch simple cases of zero- or sign-extended load
1586 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad mem _]) = do
1587 Amode addr addr_code <- getAmode mem
1588 return (Any I32 (\dst -> addr_code `snocOL` LD I8 dst addr))
1590 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
1592 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad mem _]) = do
1593 Amode addr addr_code <- getAmode mem
1594 return (Any I32 (\dst -> addr_code `snocOL` LD I16 dst addr))
1596 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad mem _]) = do
1597 Amode addr addr_code <- getAmode mem
1598 return (Any I32 (\dst -> addr_code `snocOL` LA I16 dst addr))
1600 getRegister (CmmMachOp mop [x]) -- unary MachOps
1602 MO_Not rep -> trivialUCode rep NOT x
1604 MO_S_Conv F64 F32 -> trivialUCode F32 FRSP x
1605 MO_S_Conv F32 F64 -> conversionNop F64 x
1608 | from == to -> conversionNop to x
1609 | isFloatingRep from -> coerceFP2Int from to x
1610 | isFloatingRep to -> coerceInt2FP from to x
1612 -- narrowing is a nop: we treat the high bits as undefined
1613 MO_S_Conv I32 to -> conversionNop to x
1614 MO_S_Conv I16 I8 -> conversionNop I8 x
1615 MO_S_Conv I8 to -> trivialUCode to (EXTS I8) x
1616 MO_S_Conv I16 to -> trivialUCode to (EXTS I16) x
1619 | from == to -> conversionNop to x
1620 -- narrowing is a nop: we treat the high bits as undefined
1621 MO_U_Conv I32 to -> conversionNop to x
1622 MO_U_Conv I16 I8 -> conversionNop I8 x
1623 MO_U_Conv I8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 I32))
1624 MO_U_Conv I16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 I32))
1626 MO_S_Neg F32 -> trivialUCode F32 FNEG x
1627 MO_S_Neg F64 -> trivialUCode F64 FNEG x
1628 MO_S_Neg rep -> trivialUCode rep NEG x
1631 conversionNop new_rep expr
1632 = do e_code <- getRegister expr
1633 return (swizzleRegisterRep e_code new_rep)
1635 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1637 MO_Eq F32 -> condFltReg EQQ x y
1638 MO_Ne F32 -> condFltReg NE x y
1640 MO_S_Gt F32 -> condFltReg GTT x y
1641 MO_S_Ge F32 -> condFltReg GE x y
1642 MO_S_Lt F32 -> condFltReg LTT x y
1643 MO_S_Le F32 -> condFltReg LE x y
1645 MO_Eq F64 -> condFltReg EQQ x y
1646 MO_Ne F64 -> condFltReg NE x y
1648 MO_S_Gt F64 -> condFltReg GTT x y
1649 MO_S_Ge F64 -> condFltReg GE x y
1650 MO_S_Lt F64 -> condFltReg LTT x y
1651 MO_S_Le F64 -> condFltReg LE x y
1653 MO_Eq rep -> condIntReg EQQ (extendUExpr rep x) (extendUExpr rep y)
1654 MO_Ne rep -> condIntReg NE (extendUExpr rep x) (extendUExpr rep y)
1656 MO_S_Gt rep -> condIntReg GTT (extendSExpr rep x) (extendSExpr rep y)
1657 MO_S_Ge rep -> condIntReg GE (extendSExpr rep x) (extendSExpr rep y)
1658 MO_S_Lt rep -> condIntReg LTT (extendSExpr rep x) (extendSExpr rep y)
1659 MO_S_Le rep -> condIntReg LE (extendSExpr rep x) (extendSExpr rep y)
1661 MO_U_Gt rep -> condIntReg GU (extendUExpr rep x) (extendUExpr rep y)
1662 MO_U_Ge rep -> condIntReg GEU (extendUExpr rep x) (extendUExpr rep y)
1663 MO_U_Lt rep -> condIntReg LU (extendUExpr rep x) (extendUExpr rep y)
1664 MO_U_Le rep -> condIntReg LEU (extendUExpr rep x) (extendUExpr rep y)
1666 MO_Add F32 -> trivialCodeNoImm F32 (FADD F32) x y
1667 MO_Sub F32 -> trivialCodeNoImm F32 (FSUB F32) x y
1668 MO_Mul F32 -> trivialCodeNoImm F32 (FMUL F32) x y
1669 MO_S_Quot F32 -> trivialCodeNoImm F32 (FDIV F32) x y
1671 MO_Add F64 -> trivialCodeNoImm F64 (FADD F64) x y
1672 MO_Sub F64 -> trivialCodeNoImm F64 (FSUB F64) x y
1673 MO_Mul F64 -> trivialCodeNoImm F64 (FMUL F64) x y
1674 MO_S_Quot F64 -> trivialCodeNoImm F64 (FDIV F64) x y
1676 -- optimize addition with 32-bit immediate
1680 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate I32 True (-imm)
1681 -> trivialCode I32 True ADD x (CmmLit $ CmmInt imm immrep)
1684 (src, srcCode) <- getSomeReg x
1685 let imm = litToImm lit
1686 code dst = srcCode `appOL` toOL [
1687 ADDIS dst src (HA imm),
1688 ADD dst dst (RIImm (LO imm))
1690 return (Any I32 code)
1691 _ -> trivialCode I32 True ADD x y
1693 MO_Add rep -> trivialCode rep True ADD x y
1695 case y of -- subfi ('substract from' with immediate) doesn't exist
1696 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
1697 -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
1698 _ -> trivialCodeNoImm rep SUBF y x
1700 MO_Mul rep -> trivialCode rep True MULLW x y
1702 MO_S_MulMayOflo I32 -> trivialCodeNoImm I32 MULLW_MayOflo x y
1704 MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= I32): not implemented"
1705 MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
1707 MO_S_Quot rep -> trivialCodeNoImm rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1708 MO_U_Quot rep -> trivialCodeNoImm rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1710 MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1711 MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1713 MO_And rep -> trivialCode rep False AND x y
1714 MO_Or rep -> trivialCode rep False OR x y
1715 MO_Xor rep -> trivialCode rep False XOR x y
1717 MO_Shl rep -> trivialCode rep False SLW x y
1718 MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
1719 MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
1721 getRegister (CmmLit (CmmInt i rep))
1722 | Just imm <- makeImmediate rep True i
1724 code dst = unitOL (LI dst imm)
1726 return (Any rep code)
1728 getRegister (CmmLit (CmmFloat f frep)) = do
1729 lbl <- getNewLabelNat
1730 dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
1731 Amode addr addr_code <- getAmode dynRef
1733 LDATA ReadOnlyData [CmmDataLabel lbl,
1734 CmmStaticLit (CmmFloat f frep)]
1735 `consOL` (addr_code `snocOL` LD frep dst addr)
1736 return (Any frep code)
1738 getRegister (CmmLit lit)
1739 = let rep = cmmLitRep lit
1743 OR dst dst (RIImm (LO imm))
1745 in return (Any rep code)
1747 getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
1749 -- extend?Rep: wrap integer expression of type rep
1750 -- in a conversion to I32
1751 extendSExpr I32 x = x
1752 extendSExpr rep x = CmmMachOp (MO_S_Conv rep I32) [x]
1753 extendUExpr I32 x = x
1754 extendUExpr rep x = CmmMachOp (MO_U_Conv rep I32) [x]
1756 #endif /* powerpc_TARGET_ARCH */
1759 -- -----------------------------------------------------------------------------
1760 -- The 'Amode' type: Memory addressing modes passed up the tree.
1762 data Amode = Amode AddrMode InstrBlock
1765 Now, given a tree (the argument to an CmmLoad) that references memory,
1766 produce a suitable addressing mode.
1768 A Rule of the Game (tm) for Amodes: use of the addr bit must
1769 immediately follow use of the code part, since the code part puts
1770 values in registers which the addr then refers to. So you can't put
1771 anything in between, lest it overwrite some of those registers. If
1772 you need to do some other computation between the code part and use of
1773 the addr bit, first store the effective address from the amode in a
1774 temporary, then do the other computation, and then use the temporary:
1778 ... other computation ...
1782 getAmode :: CmmExpr -> NatM Amode
1783 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
1785 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1787 #if alpha_TARGET_ARCH
1789 getAmode (StPrim IntSubOp [x, StInt i])
1790 = getNewRegNat PtrRep `thenNat` \ tmp ->
1791 getRegister x `thenNat` \ register ->
1793 code = registerCode register tmp
1794 reg = registerName register tmp
1795 off = ImmInt (-(fromInteger i))
1797 return (Amode (AddrRegImm reg off) code)
1799 getAmode (StPrim IntAddOp [x, StInt i])
1800 = getNewRegNat PtrRep `thenNat` \ tmp ->
1801 getRegister x `thenNat` \ register ->
1803 code = registerCode register tmp
1804 reg = registerName register tmp
1805 off = ImmInt (fromInteger i)
1807 return (Amode (AddrRegImm reg off) code)
1811 = return (Amode (AddrImm imm__2) id)
1814 imm__2 = case imm of Just x -> x
1817 = getNewRegNat PtrRep `thenNat` \ tmp ->
1818 getRegister other `thenNat` \ register ->
1820 code = registerCode register tmp
1821 reg = registerName register tmp
1823 return (Amode (AddrReg reg) code)
1825 #endif /* alpha_TARGET_ARCH */
1827 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1829 #if x86_64_TARGET_ARCH
1831 getAmode (CmmMachOp (MO_Add I64) [CmmReg (CmmGlobal PicBaseReg),
1832 CmmLit displacement])
1833 = return $ Amode (ripRel (litToImm displacement)) nilOL
1837 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1839 -- This is all just ridiculous, since it carefully undoes
1840 -- what mangleIndexTree has just done.
1841 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
1842 | not (is64BitLit lit)
1843 -- ASSERT(rep == I32)???
1844 = do (x_reg, x_code) <- getSomeReg x
1845 let off = ImmInt (-(fromInteger i))
1846 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1848 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
1849 | not (is64BitLit lit)
1850 -- ASSERT(rep == I32)???
1851 = do (x_reg, x_code) <- getSomeReg x
1852 let off = ImmInt (fromInteger i)
1853 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1855 -- Turn (lit1 << n + lit2) into (lit2 + lit1 << n) so it will be
1856 -- recognised by the next rule.
1857 getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1859 = getAmode (CmmMachOp (MO_Add rep) [b,a])
1861 getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
1862 [y, CmmLit (CmmInt shift _)]])
1863 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1864 = x86_complex_amode x y shift 0
1866 getAmode (CmmMachOp (MO_Add rep)
1867 [x, CmmMachOp (MO_Add _)
1868 [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)],
1869 CmmLit (CmmInt offset _)]])
1870 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1871 && not (is64BitInteger offset)
1872 = x86_complex_amode x y shift offset
1874 getAmode (CmmMachOp (MO_Add rep) [x,y])
1875 = x86_complex_amode x y 0 0
1877 getAmode (CmmLit lit) | not (is64BitLit lit)
1878 = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1881 (reg,code) <- getSomeReg expr
1882 return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
1885 x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
1886 x86_complex_amode base index shift offset
1887 = do (x_reg, x_code) <- getNonClobberedReg base
1888 -- x must be in a temp, because it has to stay live over y_code
1889 -- we could compre x_reg and y_reg and do something better here...
1890 (y_reg, y_code) <- getSomeReg index
1892 code = x_code `appOL` y_code
1893 base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
1894 return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset)))
1897 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1899 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1901 #if sparc_TARGET_ARCH
1903 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)])
1906 (reg, code) <- getSomeReg x
1908 off = ImmInt (-(fromInteger i))
1909 return (Amode (AddrRegImm reg off) code)
1912 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)])
1915 (reg, code) <- getSomeReg x
1917 off = ImmInt (fromInteger i)
1918 return (Amode (AddrRegImm reg off) code)
1920 getAmode (CmmMachOp (MO_Add rep) [x, y])
1922 (regX, codeX) <- getSomeReg x
1923 (regY, codeY) <- getSomeReg y
1925 code = codeX `appOL` codeY
1926 return (Amode (AddrRegReg regX regY) code)
1928 -- XXX Is this same as "leaf" in Stix?
1929 getAmode (CmmLit lit)
1931 tmp <- getNewRegNat I32
1933 code = unitOL (SETHI (HI imm__2) tmp)
1934 return (Amode (AddrRegImm tmp (LO imm__2)) code)
1936 imm__2 = litToImm lit
1940 (reg, code) <- getSomeReg other
1943 return (Amode (AddrRegImm reg off) code)
1945 #endif /* sparc_TARGET_ARCH */
1947 #ifdef powerpc_TARGET_ARCH
1948 getAmode (CmmMachOp (MO_Sub I32) [x, CmmLit (CmmInt i _)])
1949 | Just off <- makeImmediate I32 True (-i)
1951 (reg, code) <- getSomeReg x
1952 return (Amode (AddrRegImm reg off) code)
1955 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit (CmmInt i _)])
1956 | Just off <- makeImmediate I32 True i
1958 (reg, code) <- getSomeReg x
1959 return (Amode (AddrRegImm reg off) code)
1961 -- optimize addition with 32-bit immediate
1963 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit lit])
1965 tmp <- getNewRegNat I32
1966 (src, srcCode) <- getSomeReg x
1967 let imm = litToImm lit
1968 code = srcCode `snocOL` ADDIS tmp src (HA imm)
1969 return (Amode (AddrRegImm tmp (LO imm)) code)
1971 getAmode (CmmLit lit)
1973 tmp <- getNewRegNat I32
1974 let imm = litToImm lit
1975 code = unitOL (LIS tmp (HA imm))
1976 return (Amode (AddrRegImm tmp (LO imm)) code)
1978 getAmode (CmmMachOp (MO_Add I32) [x, y])
1980 (regX, codeX) <- getSomeReg x
1981 (regY, codeY) <- getSomeReg y
1982 return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
1986 (reg, code) <- getSomeReg other
1989 return (Amode (AddrRegImm reg off) code)
1990 #endif /* powerpc_TARGET_ARCH */
1992 -- -----------------------------------------------------------------------------
1993 -- getOperand: sometimes any operand will do.
1995 -- getNonClobberedOperand: the value of the operand will remain valid across
1996 -- the computation of an arbitrary expression, unless the expression
1997 -- is computed directly into a register which the operand refers to
1998 -- (see trivialCode where this function is used for an example).
2000 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2002 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2003 #if x86_64_TARGET_ARCH
2004 getNonClobberedOperand (CmmLit lit)
2005 | isSuitableFloatingPointLit lit = do
2006 lbl <- getNewLabelNat
2007 let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
2009 return (OpAddr (ripRel (ImmCLbl lbl)), code)
2011 getNonClobberedOperand (CmmLit lit)
2012 | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) =
2013 return (OpImm (litToImm lit), nilOL)
2014 getNonClobberedOperand (CmmLoad mem pk)
2015 | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2016 Amode src mem_code <- getAmode mem
2018 if (amodeCouldBeClobbered src)
2020 tmp <- getNewRegNat wordRep
2021 return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
2022 unitOL (LEA I32 (OpAddr src) (OpReg tmp)))
2025 return (OpAddr src', save_code `appOL` mem_code)
2026 getNonClobberedOperand e = do
2027 (reg, code) <- getNonClobberedReg e
2028 return (OpReg reg, code)
2030 amodeCouldBeClobbered :: AddrMode -> Bool
2031 amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
2033 regClobbered (RealReg rr) = isFastTrue (freeReg rr)
2034 regClobbered _ = False
2036 -- getOperand: the operand is not required to remain valid across the
2037 -- computation of an arbitrary expression.
2038 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2039 #if x86_64_TARGET_ARCH
2040 getOperand (CmmLit lit)
2041 | isSuitableFloatingPointLit lit = do
2042 lbl <- getNewLabelNat
2043 let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
2045 return (OpAddr (ripRel (ImmCLbl lbl)), code)
2047 getOperand (CmmLit lit)
2048 | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) = do
2049 return (OpImm (litToImm lit), nilOL)
2050 getOperand (CmmLoad mem pk)
2051 | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2052 Amode src mem_code <- getAmode mem
2053 return (OpAddr src, mem_code)
2055 (reg, code) <- getSomeReg e
2056 return (OpReg reg, code)
2058 isOperand :: CmmExpr -> Bool
2059 isOperand (CmmLoad _ _) = True
2060 isOperand (CmmLit lit) = not (is64BitLit lit)
2061 || isSuitableFloatingPointLit lit
2064 -- if we want a floating-point literal as an operand, we can
2065 -- use it directly from memory. However, if the literal is
2066 -- zero, we're better off generating it into a register using
2068 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
2069 isSuitableFloatingPointLit _ = False
2071 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
2072 getRegOrMem (CmmLoad mem pk)
2073 | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2074 Amode src mem_code <- getAmode mem
2075 return (OpAddr src, mem_code)
2077 (reg, code) <- getNonClobberedReg e
2078 return (OpReg reg, code)
2080 #if x86_64_TARGET_ARCH
2081 is64BitLit (CmmInt i I64) = is64BitInteger i
2082 -- assume that labels are in the range 0-2^31-1: this assumes the
2083 -- small memory model (see gcc docs, -mcmodel=small).
2085 is64BitLit x = False
2088 is64BitInteger :: Integer -> Bool
2089 is64BitInteger i = i64 > 0x7fffffff || i64 < -0x80000000
2090 where i64 = fromIntegral i :: Int64
2091 -- a CmmInt is intended to be truncated to the appropriate
2092 -- number of bits, so here we truncate it to Int64. This is
2093 -- important because e.g. -1 as a CmmInt might be either
2094 -- -1 or 18446744073709551615.
2096 -- -----------------------------------------------------------------------------
2097 -- The 'CondCode' type: Condition codes passed up the tree.
2099 data CondCode = CondCode Bool Cond InstrBlock
2101 -- Set up a condition code for a conditional branch.
2103 getCondCode :: CmmExpr -> NatM CondCode
2105 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2107 #if alpha_TARGET_ARCH
2108 getCondCode = panic "MachCode.getCondCode: not on Alphas"
2109 #endif /* alpha_TARGET_ARCH */
2111 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2113 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
2114 -- yes, they really do seem to want exactly the same!
2116 getCondCode (CmmMachOp mop [x, y])
2119 MO_Eq F32 -> condFltCode EQQ x y
2120 MO_Ne F32 -> condFltCode NE x y
2122 MO_S_Gt F32 -> condFltCode GTT x y
2123 MO_S_Ge F32 -> condFltCode GE x y
2124 MO_S_Lt F32 -> condFltCode LTT x y
2125 MO_S_Le F32 -> condFltCode LE x y
2127 MO_Eq F64 -> condFltCode EQQ x y
2128 MO_Ne F64 -> condFltCode NE x y
2130 MO_S_Gt F64 -> condFltCode GTT x y
2131 MO_S_Ge F64 -> condFltCode GE x y
2132 MO_S_Lt F64 -> condFltCode LTT x y
2133 MO_S_Le F64 -> condFltCode LE x y
2135 MO_Eq rep -> condIntCode EQQ x y
2136 MO_Ne rep -> condIntCode NE x y
2138 MO_S_Gt rep -> condIntCode GTT x y
2139 MO_S_Ge rep -> condIntCode GE x y
2140 MO_S_Lt rep -> condIntCode LTT x y
2141 MO_S_Le rep -> condIntCode LE x y
2143 MO_U_Gt rep -> condIntCode GU x y
2144 MO_U_Ge rep -> condIntCode GEU x y
2145 MO_U_Lt rep -> condIntCode LU x y
2146 MO_U_Le rep -> condIntCode LEU x y
2148 other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y]))
2150 getCondCode other = pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
2152 #elif powerpc_TARGET_ARCH
2154 -- almost the same as everywhere else - but we need to
2155 -- extend small integers to 32 bit first
2157 getCondCode (CmmMachOp mop [x, y])
2159 MO_Eq F32 -> condFltCode EQQ x y
2160 MO_Ne F32 -> condFltCode NE x y
2162 MO_S_Gt F32 -> condFltCode GTT x y
2163 MO_S_Ge F32 -> condFltCode GE x y
2164 MO_S_Lt F32 -> condFltCode LTT x y
2165 MO_S_Le F32 -> condFltCode LE x y
2167 MO_Eq F64 -> condFltCode EQQ x y
2168 MO_Ne F64 -> condFltCode NE x y
2170 MO_S_Gt F64 -> condFltCode GTT x y
2171 MO_S_Ge F64 -> condFltCode GE x y
2172 MO_S_Lt F64 -> condFltCode LTT x y
2173 MO_S_Le F64 -> condFltCode LE x y
2175 MO_Eq rep -> condIntCode EQQ (extendUExpr rep x) (extendUExpr rep y)
2176 MO_Ne rep -> condIntCode NE (extendUExpr rep x) (extendUExpr rep y)
2178 MO_S_Gt rep -> condIntCode GTT (extendSExpr rep x) (extendSExpr rep y)
2179 MO_S_Ge rep -> condIntCode GE (extendSExpr rep x) (extendSExpr rep y)
2180 MO_S_Lt rep -> condIntCode LTT (extendSExpr rep x) (extendSExpr rep y)
2181 MO_S_Le rep -> condIntCode LE (extendSExpr rep x) (extendSExpr rep y)
2183 MO_U_Gt rep -> condIntCode GU (extendUExpr rep x) (extendUExpr rep y)
2184 MO_U_Ge rep -> condIntCode GEU (extendUExpr rep x) (extendUExpr rep y)
2185 MO_U_Lt rep -> condIntCode LU (extendUExpr rep x) (extendUExpr rep y)
2186 MO_U_Le rep -> condIntCode LEU (extendUExpr rep x) (extendUExpr rep y)
2188 other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
2190 getCondCode other = panic "getCondCode(2)(powerpc)"
2196 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
2197 -- passed back up the tree.
2199 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
2201 #if alpha_TARGET_ARCH
2202 condIntCode = panic "MachCode.condIntCode: not on Alphas"
2203 condFltCode = panic "MachCode.condFltCode: not on Alphas"
2204 #endif /* alpha_TARGET_ARCH */
2206 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2207 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2209 -- memory vs immediate
2210 condIntCode cond (CmmLoad x pk) (CmmLit lit) | not (is64BitLit lit) = do
2211 Amode x_addr x_code <- getAmode x
2214 code = x_code `snocOL`
2215 CMP pk (OpImm imm) (OpAddr x_addr)
2217 return (CondCode False cond code)
2219 -- anything vs zero, using a mask
2220 -- TODO: Add some sanity checking!!!!
2221 condIntCode cond (CmmMachOp (MO_And rep) [x,o2]) (CmmLit (CmmInt 0 pk))
2222 | (CmmLit (CmmInt mask pk2)) <- o2
2224 (x_reg, x_code) <- getSomeReg x
2226 code = x_code `snocOL`
2227 TEST pk (OpImm (ImmInteger mask)) (OpReg x_reg)
2229 return (CondCode False cond code)
2232 condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
2233 (x_reg, x_code) <- getSomeReg x
2235 code = x_code `snocOL`
2236 TEST pk (OpReg x_reg) (OpReg x_reg)
2238 return (CondCode False cond code)
2240 -- anything vs operand
2241 condIntCode cond x y | isOperand y = do
2242 (x_reg, x_code) <- getNonClobberedReg x
2243 (y_op, y_code) <- getOperand y
2245 code = x_code `appOL` y_code `snocOL`
2246 CMP (cmmExprRep x) y_op (OpReg x_reg)
2248 return (CondCode False cond code)
2250 -- anything vs anything
2251 condIntCode cond x y = do
2252 (y_reg, y_code) <- getNonClobberedReg y
2253 (x_op, x_code) <- getRegOrMem x
2255 code = y_code `appOL`
2257 CMP (cmmExprRep x) (OpReg y_reg) x_op
2259 return (CondCode False cond code)
2262 #if i386_TARGET_ARCH
2263 condFltCode cond x y
2264 = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
2265 (x_reg, x_code) <- getNonClobberedReg x
2266 (y_reg, y_code) <- getSomeReg y
2268 code = x_code `appOL` y_code `snocOL`
2269 GCMP cond x_reg y_reg
2270 -- The GCMP insn does the test and sets the zero flag if comparable
2271 -- and true. Hence we always supply EQQ as the condition to test.
2272 return (CondCode True EQQ code)
2273 #endif /* i386_TARGET_ARCH */
2275 #if x86_64_TARGET_ARCH
2276 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
2277 -- an operand, but the right must be a reg. We can probably do better
2278 -- than this general case...
2279 condFltCode cond x y = do
2280 (x_reg, x_code) <- getNonClobberedReg x
2281 (y_op, y_code) <- getOperand y
2283 code = x_code `appOL`
2285 CMP (cmmExprRep x) y_op (OpReg x_reg)
2286 -- NB(1): we need to use the unsigned comparison operators on the
2287 -- result of this comparison.
2289 return (CondCode True (condToUnsigned cond) code)
2292 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2294 #if sparc_TARGET_ARCH
2296 condIntCode cond x (CmmLit (CmmInt y rep))
2299 (src1, code) <- getSomeReg x
2301 src2 = ImmInt (fromInteger y)
2302 code' = code `snocOL` SUB False True src1 (RIImm src2) g0
2303 return (CondCode False cond code')
2305 condIntCode cond x y = do
2306 (src1, code1) <- getSomeReg x
2307 (src2, code2) <- getSomeReg y
2309 code__2 = code1 `appOL` code2 `snocOL`
2310 SUB False True src1 (RIReg src2) g0
2311 return (CondCode False cond code__2)
2314 condFltCode cond x y = do
2315 (src1, code1) <- getSomeReg x
2316 (src2, code2) <- getSomeReg y
2317 tmp <- getNewRegNat F64
2319 promote x = FxTOy F32 F64 x tmp
2326 code1 `appOL` code2 `snocOL`
2327 FCMP True pk1 src1 src2
2328 else if pk1 == F32 then
2329 code1 `snocOL` promote src1 `appOL` code2 `snocOL`
2330 FCMP True F64 tmp src2
2332 code1 `appOL` code2 `snocOL` promote src2 `snocOL`
2333 FCMP True F64 src1 tmp
2334 return (CondCode True cond code__2)
2336 #endif /* sparc_TARGET_ARCH */
2338 #if powerpc_TARGET_ARCH
2339 -- ###FIXME: I16 and I8!
2340 condIntCode cond x (CmmLit (CmmInt y rep))
2341 | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
2343 (src1, code) <- getSomeReg x
2345 code' = code `snocOL`
2346 (if condUnsigned cond then CMPL else CMP) I32 src1 (RIImm src2)
2347 return (CondCode False cond code')
2349 condIntCode cond x y = do
2350 (src1, code1) <- getSomeReg x
2351 (src2, code2) <- getSomeReg y
2353 code' = code1 `appOL` code2 `snocOL`
2354 (if condUnsigned cond then CMPL else CMP) I32 src1 (RIReg src2)
2355 return (CondCode False cond code')
2357 condFltCode cond x y = do
2358 (src1, code1) <- getSomeReg x
2359 (src2, code2) <- getSomeReg y
2361 code' = code1 `appOL` code2 `snocOL` FCMP src1 src2
2362 code'' = case cond of -- twiddle CR to handle unordered case
2363 GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
2364 LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
2367 ltbit = 0 ; eqbit = 2 ; gtbit = 1
2368 return (CondCode True cond code'')
2370 #endif /* powerpc_TARGET_ARCH */
2372 -- -----------------------------------------------------------------------------
2373 -- Generating assignments
2375 -- Assignments are really at the heart of the whole code generation
2376 -- business. Almost all top-level nodes of any real importance are
2377 -- assignments, which correspond to loads, stores, or register
2378 -- transfers. If we're really lucky, some of the register transfers
2379 -- will go away, because we can use the destination register to
2380 -- complete the code generation for the right hand side. This only
2381 -- fails when the right hand side is forced into a fixed register
2382 -- (e.g. the result of a call).
2384 assignMem_IntCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2385 assignReg_IntCode :: MachRep -> CmmReg -> CmmExpr -> NatM InstrBlock
2387 assignMem_FltCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2388 assignReg_FltCode :: MachRep -> CmmReg -> CmmExpr -> NatM InstrBlock
2390 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2392 #if alpha_TARGET_ARCH
2394 assignIntCode pk (CmmLoad dst _) src
2395 = getNewRegNat IntRep `thenNat` \ tmp ->
2396 getAmode dst `thenNat` \ amode ->
2397 getRegister src `thenNat` \ register ->
2399 code1 = amodeCode amode []
2400 dst__2 = amodeAddr amode
2401 code2 = registerCode register tmp []
2402 src__2 = registerName register tmp
2403 sz = primRepToSize pk
2404 code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2408 assignIntCode pk dst src
2409 = getRegister dst `thenNat` \ register1 ->
2410 getRegister src `thenNat` \ register2 ->
2412 dst__2 = registerName register1 zeroh
2413 code = registerCode register2 dst__2
2414 src__2 = registerName register2 dst__2
2415 code__2 = if isFixed register2
2416 then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2)
2421 #endif /* alpha_TARGET_ARCH */
2423 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2425 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2427 -- integer assignment to memory
2429 -- specific case of adding/subtracting an integer to a particular address.
2430 -- ToDo: catch other cases where we can use an operation directly on a memory
2432 assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _,
2433 CmmLit (CmmInt i _)])
2434 | addr == addr2, pk /= I64 || not (is64BitInteger i),
2435 Just instr <- check op
2436 = do Amode amode code_addr <- getAmode addr
2437 let code = code_addr `snocOL`
2438 instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode)
2441 check (MO_Add _) = Just ADD
2442 check (MO_Sub _) = Just SUB
2447 assignMem_IntCode pk addr src = do
2448 Amode addr code_addr <- getAmode addr
2449 (code_src, op_src) <- get_op_RI src
2451 code = code_src `appOL`
2453 MOV pk op_src (OpAddr addr)
2454 -- NOTE: op_src is stable, so it will still be valid
2455 -- after code_addr. This may involve the introduction
2456 -- of an extra MOV to a temporary register, but we hope
2457 -- the register allocator will get rid of it.
2461 get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand) -- code, operator
2462 get_op_RI (CmmLit lit) | not (is64BitLit lit)
2463 = return (nilOL, OpImm (litToImm lit))
2465 = do (reg,code) <- getNonClobberedReg op
2466 return (code, OpReg reg)
2469 -- Assign; dst is a reg, rhs is mem
2470 assignReg_IntCode pk reg (CmmLoad src _) = do
2471 load_code <- intLoadCode (MOV pk) src
2472 return (load_code (getRegisterReg reg))
2474 -- dst is a reg, but src could be anything
2475 assignReg_IntCode pk reg src = do
2476 code <- getAnyReg src
2477 return (code (getRegisterReg reg))
2479 #endif /* i386_TARGET_ARCH */
2481 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2483 #if sparc_TARGET_ARCH
2485 assignMem_IntCode pk addr src = do
2486 (srcReg, code) <- getSomeReg src
2487 Amode dstAddr addr_code <- getAmode addr
2488 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2490 assignReg_IntCode pk reg src = do
2491 r <- getRegister src
2493 Any _ code -> code dst
2494 Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg dst) freg
2496 dst = getRegisterReg reg
2499 #endif /* sparc_TARGET_ARCH */
2501 #if powerpc_TARGET_ARCH
2503 assignMem_IntCode pk addr src = do
2504 (srcReg, code) <- getSomeReg src
2505 Amode dstAddr addr_code <- getAmode addr
2506 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2508 -- dst is a reg, but src could be anything
2509 assignReg_IntCode pk reg src
2511 r <- getRegister src
2513 Any _ code -> code dst
2514 Fixed _ freg fcode -> fcode `snocOL` MR dst freg
2516 dst = getRegisterReg reg
2518 #endif /* powerpc_TARGET_ARCH */
2521 -- -----------------------------------------------------------------------------
2522 -- Floating-point assignments
2524 #if alpha_TARGET_ARCH
2526 assignFltCode pk (CmmLoad dst _) src
2527 = getNewRegNat pk `thenNat` \ tmp ->
2528 getAmode dst `thenNat` \ amode ->
2529 getRegister src `thenNat` \ register ->
2531 code1 = amodeCode amode []
2532 dst__2 = amodeAddr amode
2533 code2 = registerCode register tmp []
2534 src__2 = registerName register tmp
2535 sz = primRepToSize pk
2536 code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2540 assignFltCode pk dst src
2541 = getRegister dst `thenNat` \ register1 ->
2542 getRegister src `thenNat` \ register2 ->
2544 dst__2 = registerName register1 zeroh
2545 code = registerCode register2 dst__2
2546 src__2 = registerName register2 dst__2
2547 code__2 = if isFixed register2
2548 then code . mkSeqInstr (FMOV src__2 dst__2)
2553 #endif /* alpha_TARGET_ARCH */
2555 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2557 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2559 -- Floating point assignment to memory
2560 assignMem_FltCode pk addr src = do
2561 (src_reg, src_code) <- getNonClobberedReg src
2562 Amode addr addr_code <- getAmode addr
2564 code = src_code `appOL`
2566 IF_ARCH_i386(GST pk src_reg addr,
2567 MOV pk (OpReg src_reg) (OpAddr addr))
2570 -- Floating point assignment to a register/temporary
2571 assignReg_FltCode pk reg src = do
2572 src_code <- getAnyReg src
2573 return (src_code (getRegisterReg reg))
2575 #endif /* i386_TARGET_ARCH */
2577 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2579 #if sparc_TARGET_ARCH
2581 -- Floating point assignment to memory
2582 assignMem_FltCode pk addr src = do
2583 Amode dst__2 code1 <- getAmode addr
2584 (src__2, code2) <- getSomeReg src
2585 tmp1 <- getNewRegNat pk
2587 pk__2 = cmmExprRep src
2588 code__2 = code1 `appOL` code2 `appOL`
2590 then unitOL (ST pk src__2 dst__2)
2591 else toOL [FxTOy pk__2 pk src__2 tmp1, ST pk tmp1 dst__2]
2594 -- Floating point assignment to a register/temporary
2595 -- ToDo: Verify correctness
2596 assignReg_FltCode pk reg src = do
2597 r <- getRegister src
2598 v1 <- getNewRegNat pk
2600 Any _ code -> code dst
2601 Fixed _ freg fcode -> fcode `snocOL` FMOV pk freg v1
2603 dst = getRegisterReg reg
2605 #endif /* sparc_TARGET_ARCH */
2607 #if powerpc_TARGET_ARCH
2610 assignMem_FltCode = assignMem_IntCode
2611 assignReg_FltCode = assignReg_IntCode
2613 #endif /* powerpc_TARGET_ARCH */
2616 -- -----------------------------------------------------------------------------
2617 -- Generating an non-local jump
2619 -- (If applicable) Do not fill the delay slots here; you will confuse the
2620 -- register allocator.
2622 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
2624 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2626 #if alpha_TARGET_ARCH
2628 genJump (CmmLabel lbl)
2629 | isAsmTemp lbl = returnInstr (BR target)
2630 | otherwise = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0]
2632 target = ImmCLbl lbl
2635 = getRegister tree `thenNat` \ register ->
2636 getNewRegNat PtrRep `thenNat` \ tmp ->
2638 dst = registerName register pv
2639 code = registerCode register pv
2640 target = registerName register pv
2642 if isFixed register then
2643 returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0]
2645 return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0))
2647 #endif /* alpha_TARGET_ARCH */
2649 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2651 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2653 genJump (CmmLoad mem pk) = do
2654 Amode target code <- getAmode mem
2655 return (code `snocOL` JMP (OpAddr target))
2657 genJump (CmmLit lit) = do
2658 return (unitOL (JMP (OpImm (litToImm lit))))
2661 (reg,code) <- getSomeReg expr
2662 return (code `snocOL` JMP (OpReg reg))
2664 #endif /* i386_TARGET_ARCH */
2666 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2668 #if sparc_TARGET_ARCH
2670 genJump (CmmLit (CmmLabel lbl))
2671 = return (toOL [CALL (Left target) 0 True, NOP])
2673 target = ImmCLbl lbl
2677 (target, code) <- getSomeReg tree
2678 return (code `snocOL` JMP (AddrRegReg target g0) `snocOL` NOP)
2680 #endif /* sparc_TARGET_ARCH */
2682 #if powerpc_TARGET_ARCH
2683 genJump (CmmLit (CmmLabel lbl))
2684 = return (unitOL $ JMP lbl)
2688 (target,code) <- getSomeReg tree
2689 return (code `snocOL` MTCTR target `snocOL` BCTR [])
2690 #endif /* powerpc_TARGET_ARCH */
2693 -- -----------------------------------------------------------------------------
2694 -- Unconditional branches
2696 genBranch :: BlockId -> NatM InstrBlock
2698 genBranch = return . toOL . mkBranchInstr
2700 -- -----------------------------------------------------------------------------
2701 -- Conditional jumps
2704 Conditional jumps are always to local labels, so we can use branch
2705 instructions. We peek at the arguments to decide what kind of
2708 ALPHA: For comparisons with 0, we're laughing, because we can just do
2709 the desired conditional branch.
2711 I386: First, we have to ensure that the condition
2712 codes are set according to the supplied comparison operation.
2714 SPARC: First, we have to ensure that the condition codes are set
2715 according to the supplied comparison operation. We generate slightly
2716 different code for floating point comparisons, because a floating
2717 point operation cannot directly precede a @BF@. We assume the worst
2718 and fill that slot with a @NOP@.
2720 SPARC: Do not fill the delay slots here; you will confuse the register
2726 :: BlockId -- the branch target
2727 -> CmmExpr -- the condition on which to branch
2730 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2732 #if alpha_TARGET_ARCH
2734 genCondJump id (StPrim op [x, StInt 0])
2735 = getRegister x `thenNat` \ register ->
2736 getNewRegNat (registerRep register)
2739 code = registerCode register tmp
2740 value = registerName register tmp
2741 pk = registerRep register
2742 target = ImmCLbl lbl
2744 returnSeq code [BI (cmpOp op) value target]
2746 cmpOp CharGtOp = GTT
2748 cmpOp CharEqOp = EQQ
2750 cmpOp CharLtOp = LTT
2759 cmpOp WordGeOp = ALWAYS
2760 cmpOp WordEqOp = EQQ
2762 cmpOp WordLtOp = NEVER
2763 cmpOp WordLeOp = EQQ
2765 cmpOp AddrGeOp = ALWAYS
2766 cmpOp AddrEqOp = EQQ
2768 cmpOp AddrLtOp = NEVER
2769 cmpOp AddrLeOp = EQQ
2771 genCondJump lbl (StPrim op [x, StDouble 0.0])
2772 = getRegister x `thenNat` \ register ->
2773 getNewRegNat (registerRep register)
2776 code = registerCode register tmp
2777 value = registerName register tmp
2778 pk = registerRep register
2779 target = ImmCLbl lbl
2781 return (code . mkSeqInstr (BF (cmpOp op) value target))
2783 cmpOp FloatGtOp = GTT
2784 cmpOp FloatGeOp = GE
2785 cmpOp FloatEqOp = EQQ
2786 cmpOp FloatNeOp = NE
2787 cmpOp FloatLtOp = LTT
2788 cmpOp FloatLeOp = LE
2789 cmpOp DoubleGtOp = GTT
2790 cmpOp DoubleGeOp = GE
2791 cmpOp DoubleEqOp = EQQ
2792 cmpOp DoubleNeOp = NE
2793 cmpOp DoubleLtOp = LTT
2794 cmpOp DoubleLeOp = LE
2796 genCondJump lbl (StPrim op [x, y])
2798 = trivialFCode pr instr x y `thenNat` \ register ->
2799 getNewRegNat F64 `thenNat` \ tmp ->
2801 code = registerCode register tmp
2802 result = registerName register tmp
2803 target = ImmCLbl lbl
2805 return (code . mkSeqInstr (BF cond result target))
2807 pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
2809 fltCmpOp op = case op of
2823 (instr, cond) = case op of
2824 FloatGtOp -> (FCMP TF LE, EQQ)
2825 FloatGeOp -> (FCMP TF LTT, EQQ)
2826 FloatEqOp -> (FCMP TF EQQ, NE)
2827 FloatNeOp -> (FCMP TF EQQ, EQQ)
2828 FloatLtOp -> (FCMP TF LTT, NE)
2829 FloatLeOp -> (FCMP TF LE, NE)
2830 DoubleGtOp -> (FCMP TF LE, EQQ)
2831 DoubleGeOp -> (FCMP TF LTT, EQQ)
2832 DoubleEqOp -> (FCMP TF EQQ, NE)
2833 DoubleNeOp -> (FCMP TF EQQ, EQQ)
2834 DoubleLtOp -> (FCMP TF LTT, NE)
2835 DoubleLeOp -> (FCMP TF LE, NE)
2837 genCondJump lbl (StPrim op [x, y])
2838 = trivialCode instr x y `thenNat` \ register ->
2839 getNewRegNat IntRep `thenNat` \ tmp ->
2841 code = registerCode register tmp
2842 result = registerName register tmp
2843 target = ImmCLbl lbl
2845 return (code . mkSeqInstr (BI cond result target))
2847 (instr, cond) = case op of
2848 CharGtOp -> (CMP LE, EQQ)
2849 CharGeOp -> (CMP LTT, EQQ)
2850 CharEqOp -> (CMP EQQ, NE)
2851 CharNeOp -> (CMP EQQ, EQQ)
2852 CharLtOp -> (CMP LTT, NE)
2853 CharLeOp -> (CMP LE, NE)
2854 IntGtOp -> (CMP LE, EQQ)
2855 IntGeOp -> (CMP LTT, EQQ)
2856 IntEqOp -> (CMP EQQ, NE)
2857 IntNeOp -> (CMP EQQ, EQQ)
2858 IntLtOp -> (CMP LTT, NE)
2859 IntLeOp -> (CMP LE, NE)
2860 WordGtOp -> (CMP ULE, EQQ)
2861 WordGeOp -> (CMP ULT, EQQ)
2862 WordEqOp -> (CMP EQQ, NE)
2863 WordNeOp -> (CMP EQQ, EQQ)
2864 WordLtOp -> (CMP ULT, NE)
2865 WordLeOp -> (CMP ULE, NE)
2866 AddrGtOp -> (CMP ULE, EQQ)
2867 AddrGeOp -> (CMP ULT, EQQ)
2868 AddrEqOp -> (CMP EQQ, NE)
2869 AddrNeOp -> (CMP EQQ, EQQ)
2870 AddrLtOp -> (CMP ULT, NE)
2871 AddrLeOp -> (CMP ULE, NE)
2873 #endif /* alpha_TARGET_ARCH */
2875 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2877 #if i386_TARGET_ARCH
2879 genCondJump id bool = do
2880 CondCode _ cond code <- getCondCode bool
2881 return (code `snocOL` JXX cond id)
2885 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2887 #if x86_64_TARGET_ARCH
2889 genCondJump id bool = do
2890 CondCode is_float cond cond_code <- getCondCode bool
2893 return (cond_code `snocOL` JXX cond id)
2895 lbl <- getBlockIdNat
2897 -- see comment with condFltReg
2898 let code = case cond of
2904 plain_test = unitOL (
2907 or_unordered = toOL [
2911 and_ordered = toOL [
2917 return (cond_code `appOL` code)
2921 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2923 #if sparc_TARGET_ARCH
2925 genCondJump (BlockId id) bool = do
2926 CondCode is_float cond code <- getCondCode bool
2931 then [NOP, BF cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2932 else [BI cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2936 #endif /* sparc_TARGET_ARCH */
2939 #if powerpc_TARGET_ARCH
2941 genCondJump id bool = do
2942 CondCode is_float cond code <- getCondCode bool
2943 return (code `snocOL` BCC cond id)
2945 #endif /* powerpc_TARGET_ARCH */
2948 -- -----------------------------------------------------------------------------
2949 -- Generating C calls
2951 -- Now the biggest nightmare---calls. Most of the nastiness is buried in
2952 -- @get_arg@, which moves the arguments to the correct registers/stack
2953 -- locations. Apart from that, the code is easy.
2955 -- (If applicable) Do not fill the delay slots here; you will confuse the
2956 -- register allocator.
2959 :: CmmCallTarget -- function to call
2960 -> CmmHintFormals -- where to put the result
2961 -> CmmActuals -- arguments (of mixed type)
2964 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2966 #if alpha_TARGET_ARCH
2970 genCCall fn cconv result_regs args
2971 = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args
2972 `thenNat` \ ((unused,_), argCode) ->
2974 nRegs = length allArgRegs - length unused
2975 code = asmSeqThen (map ($ []) argCode)
2978 LDA pv (AddrImm (ImmLab (ptext fn))),
2979 JSR ra (AddrReg pv) nRegs,
2980 LDGP gp (AddrReg ra)]
2982 ------------------------
2983 {- Try to get a value into a specific register (or registers) for
2984 a call. The first 6 arguments go into the appropriate
2985 argument register (separate registers for integer and floating
2986 point arguments, but used in lock-step), and the remaining
2987 arguments are dumped to the stack, beginning at 0(sp). Our
2988 first argument is a pair of the list of remaining argument
2989 registers to be assigned for this call and the next stack
2990 offset to use for overflowing arguments. This way,
2991 @get_Arg@ can be applied to all of a call's arguments using
2995 :: ([(Reg,Reg)], Int) -- Argument registers and stack offset (accumulator)
2996 -> StixTree -- Current argument
2997 -> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code
2999 -- We have to use up all of our argument registers first...
3001 get_arg ((iDst,fDst):dsts, offset) arg
3002 = getRegister arg `thenNat` \ register ->
3004 reg = if isFloatingRep pk then fDst else iDst
3005 code = registerCode register reg
3006 src = registerName register reg
3007 pk = registerRep register
3010 if isFloatingRep pk then
3011 ((dsts, offset), if isFixed register then
3012 code . mkSeqInstr (FMOV src fDst)
3015 ((dsts, offset), if isFixed register then
3016 code . mkSeqInstr (OR src (RIReg src) iDst)
3019 -- Once we have run out of argument registers, we move to the
3022 get_arg ([], offset) arg
3023 = getRegister arg `thenNat` \ register ->
3024 getNewRegNat (registerRep register)
3027 code = registerCode register tmp
3028 src = registerName register tmp
3029 pk = registerRep register
3030 sz = primRepToSize pk
3032 return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset)))
3034 #endif /* alpha_TARGET_ARCH */
3036 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3038 #if i386_TARGET_ARCH
3040 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
3041 -- write barrier compiles to no code on x86/x86-64;
3042 -- we keep it this long in order to prevent earlier optimisations.
3044 -- we only cope with a single result for foreign calls
3045 genCCall (CmmPrim op) [(r,_)] args = do
3047 MO_F32_Sqrt -> actuallyInlineFloatOp F32 (GSQRT F32) args
3048 MO_F64_Sqrt -> actuallyInlineFloatOp F64 (GSQRT F64) args
3050 MO_F32_Sin -> actuallyInlineFloatOp F32 (GSIN F32) args
3051 MO_F64_Sin -> actuallyInlineFloatOp F64 (GSIN F64) args
3053 MO_F32_Cos -> actuallyInlineFloatOp F32 (GCOS F32) args
3054 MO_F64_Cos -> actuallyInlineFloatOp F64 (GCOS F64) args
3056 MO_F32_Tan -> actuallyInlineFloatOp F32 (GTAN F32) args
3057 MO_F64_Tan -> actuallyInlineFloatOp F64 (GTAN F64) args
3059 other_op -> outOfLineFloatOp op r args
3061 actuallyInlineFloatOp rep instr [(x,_)]
3062 = do res <- trivialUFCode rep instr x
3064 return (any (getRegisterReg (CmmLocal r)))
3066 genCCall target dest_regs args = do
3068 sizes = map (arg_size . cmmExprRep . fst) (reverse args)
3069 #if !darwin_TARGET_OS
3070 tot_arg_size = sum sizes
3072 raw_arg_size = sum sizes
3073 tot_arg_size = roundTo 16 raw_arg_size
3074 arg_pad_size = tot_arg_size - raw_arg_size
3075 delta0 <- getDeltaNat
3076 setDeltaNat (delta0 - arg_pad_size)
3079 push_codes <- mapM push_arg (reverse args)
3080 delta <- getDeltaNat
3083 -- deal with static vs dynamic call targets
3084 (callinsns,cconv) <-
3087 CmmForeignCall (CmmLit (CmmLabel lbl)) conv
3088 -> -- ToDo: stdcall arg sizes
3089 return (unitOL (CALL (Left fn_imm) []), conv)
3090 where fn_imm = ImmCLbl lbl
3091 CmmForeignCall expr conv
3092 -> do (dyn_c, dyn_r, dyn_rep) <- get_op expr
3093 ASSERT(dyn_rep == I32)
3094 return (dyn_c `snocOL` CALL (Right dyn_r) [], conv)
3097 #if darwin_TARGET_OS
3099 = toOL [SUB I32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
3100 DELTA (delta0 - arg_pad_size)]
3101 `appOL` concatOL push_codes
3104 = concatOL push_codes
3105 call = callinsns `appOL`
3107 -- Deallocate parameters after call for ccall;
3108 -- but not for stdcall (callee does it)
3109 (if cconv == StdCallConv || tot_arg_size==0 then [] else
3110 [ADD I32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
3112 [DELTA (delta + tot_arg_size)]
3115 setDeltaNat (delta + tot_arg_size)
3118 -- assign the results, if necessary
3119 assign_code [] = nilOL
3120 assign_code [(dest,_hint)] =
3122 I64 -> toOL [MOV I32 (OpReg eax) (OpReg r_dest),
3123 MOV I32 (OpReg edx) (OpReg r_dest_hi)]
3124 F32 -> unitOL (GMOV fake0 r_dest)
3125 F64 -> unitOL (GMOV fake0 r_dest)
3126 rep -> unitOL (MOV rep (OpReg eax) (OpReg r_dest))
3128 r_dest_hi = getHiVRegFromLo r_dest
3129 rep = localRegRep dest
3130 r_dest = getRegisterReg (CmmLocal dest)
3131 assign_code many = panic "genCCall.assign_code many"
3133 return (push_code `appOL`
3135 assign_code dest_regs)
3143 roundTo a x | x `mod` a == 0 = x
3144 | otherwise = x + a - (x `mod` a)
3147 push_arg :: (CmmExpr,MachHint){-current argument-}
3148 -> NatM InstrBlock -- code
3150 push_arg (arg,_hint) -- we don't need the hints on x86
3151 | arg_rep == I64 = do
3152 ChildCode64 code r_lo <- iselExpr64 arg
3153 delta <- getDeltaNat
3154 setDeltaNat (delta - 8)
3156 r_hi = getHiVRegFromLo r_lo
3158 return ( code `appOL`
3159 toOL [PUSH I32 (OpReg r_hi), DELTA (delta - 4),
3160 PUSH I32 (OpReg r_lo), DELTA (delta - 8),
3165 (code, reg, sz) <- get_op arg
3166 delta <- getDeltaNat
3167 let size = arg_size sz
3168 setDeltaNat (delta-size)
3169 if (case sz of F64 -> True; F32 -> True; _ -> False)
3170 then return (code `appOL`
3171 toOL [SUB I32 (OpImm (ImmInt size)) (OpReg esp),
3173 GST sz reg (AddrBaseIndex (EABaseReg esp)
3177 else return (code `snocOL`
3178 PUSH I32 (OpReg reg) `snocOL`
3182 arg_rep = cmmExprRep arg
3185 get_op :: CmmExpr -> NatM (InstrBlock, Reg, MachRep) -- code, reg, size
3187 (reg,code) <- getSomeReg op
3188 return (code, reg, cmmExprRep op)
3190 #endif /* i386_TARGET_ARCH */
3192 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3194 outOfLineFloatOp :: CallishMachOp -> CmmFormal -> CmmActuals
3196 outOfLineFloatOp mop res args
3198 targetExpr <- cmmMakeDynamicReference addImportNat CallReference lbl
3199 let target = CmmForeignCall targetExpr CCallConv
3201 if localRegRep res == F64
3203 stmtToInstrs (CmmCall target [(res,FloatHint)] args CmmUnsafe)
3207 tmp = LocalReg uq F64 KindNonPtr
3209 code1 <- stmtToInstrs (CmmCall target [(tmp,FloatHint)] args CmmUnsafe)
3210 code2 <- stmtToInstrs (CmmAssign (CmmLocal res) (CmmReg (CmmLocal tmp)))
3211 return (code1 `appOL` code2)
3213 lbl = mkForeignLabel fn Nothing False
3216 MO_F32_Sqrt -> FSLIT("sqrtf")
3217 MO_F32_Sin -> FSLIT("sinf")
3218 MO_F32_Cos -> FSLIT("cosf")
3219 MO_F32_Tan -> FSLIT("tanf")
3220 MO_F32_Exp -> FSLIT("expf")
3221 MO_F32_Log -> FSLIT("logf")
3223 MO_F32_Asin -> FSLIT("asinf")
3224 MO_F32_Acos -> FSLIT("acosf")
3225 MO_F32_Atan -> FSLIT("atanf")
3227 MO_F32_Sinh -> FSLIT("sinhf")
3228 MO_F32_Cosh -> FSLIT("coshf")
3229 MO_F32_Tanh -> FSLIT("tanhf")
3230 MO_F32_Pwr -> FSLIT("powf")
3232 MO_F64_Sqrt -> FSLIT("sqrt")
3233 MO_F64_Sin -> FSLIT("sin")
3234 MO_F64_Cos -> FSLIT("cos")
3235 MO_F64_Tan -> FSLIT("tan")
3236 MO_F64_Exp -> FSLIT("exp")
3237 MO_F64_Log -> FSLIT("log")
3239 MO_F64_Asin -> FSLIT("asin")
3240 MO_F64_Acos -> FSLIT("acos")
3241 MO_F64_Atan -> FSLIT("atan")
3243 MO_F64_Sinh -> FSLIT("sinh")
3244 MO_F64_Cosh -> FSLIT("cosh")
3245 MO_F64_Tanh -> FSLIT("tanh")
3246 MO_F64_Pwr -> FSLIT("pow")
3248 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
3250 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3252 #if x86_64_TARGET_ARCH
3254 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
3255 -- write barrier compiles to no code on x86/x86-64;
3256 -- we keep it this long in order to prevent earlier optimisations.
3258 genCCall (CmmPrim op) [(r,_)] args =
3259 outOfLineFloatOp op r args
3261 genCCall target dest_regs args = do
3263 -- load up the register arguments
3264 (stack_args, aregs, fregs, load_args_code)
3265 <- load_args args allArgRegs allFPArgRegs nilOL
3268 fp_regs_used = reverse (drop (length fregs) (reverse allFPArgRegs))
3269 int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
3270 arg_regs = [eax] ++ int_regs_used ++ fp_regs_used
3271 -- for annotating the call instruction with
3273 sse_regs = length fp_regs_used
3275 tot_arg_size = arg_size * length stack_args
3277 -- On entry to the called function, %rsp should be aligned
3278 -- on a 16-byte boundary +8 (i.e. the first stack arg after
3279 -- the return address is 16-byte aligned). In STG land
3280 -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
3281 -- need to make sure we push a multiple of 16-bytes of args,
3282 -- plus the return address, to get the correct alignment.
3283 -- Urg, this is hard. We need to feed the delta back into
3284 -- the arg pushing code.
3285 (real_size, adjust_rsp) <-
3286 if tot_arg_size `rem` 16 == 0
3287 then return (tot_arg_size, nilOL)
3288 else do -- we need to adjust...
3289 delta <- getDeltaNat
3290 setDeltaNat (delta-8)
3291 return (tot_arg_size+8, toOL [
3292 SUB I64 (OpImm (ImmInt 8)) (OpReg rsp),
3296 -- push the stack args, right to left
3297 push_code <- push_args (reverse stack_args) nilOL
3298 delta <- getDeltaNat
3300 -- deal with static vs dynamic call targets
3301 (callinsns,cconv) <-
3304 CmmForeignCall (CmmLit (CmmLabel lbl)) conv
3305 -> -- ToDo: stdcall arg sizes
3306 return (unitOL (CALL (Left fn_imm) arg_regs), conv)
3307 where fn_imm = ImmCLbl lbl
3308 CmmForeignCall expr conv
3309 -> do (dyn_r, dyn_c) <- getSomeReg expr
3310 return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
3313 -- The x86_64 ABI requires us to set %al to the number of SSE
3314 -- registers that contain arguments, if the called routine
3315 -- is a varargs function. We don't know whether it's a
3316 -- varargs function or not, so we have to assume it is.
3318 -- It's not safe to omit this assignment, even if the number
3319 -- of SSE regs in use is zero. If %al is larger than 8
3320 -- on entry to a varargs function, seg faults ensue.
3321 assign_eax n = unitOL (MOV I32 (OpImm (ImmInt n)) (OpReg eax))
3323 let call = callinsns `appOL`
3325 -- Deallocate parameters after call for ccall;
3326 -- but not for stdcall (callee does it)
3327 (if cconv == StdCallConv || real_size==0 then [] else
3328 [ADD wordRep (OpImm (ImmInt real_size)) (OpReg esp)])
3330 [DELTA (delta + real_size)]
3333 setDeltaNat (delta + real_size)
3336 -- assign the results, if necessary
3337 assign_code [] = nilOL
3338 assign_code [(dest,_hint)] =
3340 F32 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3341 F64 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3342 rep -> unitOL (MOV rep (OpReg rax) (OpReg r_dest))
3344 rep = localRegRep dest
3345 r_dest = getRegisterReg (CmmLocal dest)
3346 assign_code many = panic "genCCall.assign_code many"
3348 return (load_args_code `appOL`
3351 assign_eax sse_regs `appOL`
3353 assign_code dest_regs)
3356 arg_size = 8 -- always, at the mo
3358 load_args :: [(CmmExpr,MachHint)]
3359 -> [Reg] -- int regs avail for args
3360 -> [Reg] -- FP regs avail for args
3362 -> NatM ([(CmmExpr,MachHint)],[Reg],[Reg],InstrBlock)
3363 load_args args [] [] code = return (args, [], [], code)
3364 -- no more regs to use
3365 load_args [] aregs fregs code = return ([], aregs, fregs, code)
3366 -- no more args to push
3367 load_args ((arg,hint) : rest) aregs fregs code
3368 | isFloatingRep arg_rep =
3372 arg_code <- getAnyReg arg
3373 load_args rest aregs rs (code `appOL` arg_code r)
3378 arg_code <- getAnyReg arg
3379 load_args rest rs fregs (code `appOL` arg_code r)
3381 arg_rep = cmmExprRep arg
3384 (args',ars,frs,code') <- load_args rest aregs fregs code
3385 return ((arg,hint):args', ars, frs, code')
3387 push_args [] code = return code
3388 push_args ((arg,hint):rest) code
3389 | isFloatingRep arg_rep = do
3390 (arg_reg, arg_code) <- getSomeReg arg
3391 delta <- getDeltaNat
3392 setDeltaNat (delta-arg_size)
3393 let code' = code `appOL` arg_code `appOL` toOL [
3394 SUB wordRep (OpImm (ImmInt arg_size)) (OpReg rsp) ,
3395 DELTA (delta-arg_size),
3396 MOV arg_rep (OpReg arg_reg) (OpAddr (spRel 0))]
3397 push_args rest code'
3400 -- we only ever generate word-sized function arguments. Promotion
3401 -- has already happened: our Int8# type is kept sign-extended
3402 -- in an Int#, for example.
3403 ASSERT(arg_rep == I64) return ()
3404 (arg_op, arg_code) <- getOperand arg
3405 delta <- getDeltaNat
3406 setDeltaNat (delta-arg_size)
3407 let code' = code `appOL` toOL [PUSH I64 arg_op,
3408 DELTA (delta-arg_size)]
3409 push_args rest code'
3411 arg_rep = cmmExprRep arg
3414 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3416 #if sparc_TARGET_ARCH
3418 The SPARC calling convention is an absolute
3419 nightmare. The first 6x32 bits of arguments are mapped into
3420 %o0 through %o5, and the remaining arguments are dumped to the
3421 stack, beginning at [%sp+92]. (Note that %o6 == %sp.)
3423 If we have to put args on the stack, move %o6==%sp down by
3424 the number of words to go on the stack, to ensure there's enough space.
3426 According to Fraser and Hanson's lcc book, page 478, fig 17.2,
3427 16 words above the stack pointer is a word for the address of
3428 a structure return value. I use this as a temporary location
3429 for moving values from float to int regs. Certainly it isn't
3430 safe to put anything in the 16 words starting at %sp, since
3431 this area can get trashed at any time due to window overflows
3432 caused by signal handlers.
3434 A final complication (if the above isn't enough) is that
3435 we can't blithely calculate the arguments one by one into
3436 %o0 .. %o5. Consider the following nested calls:
3440 Naive code moves a into %o0, and (fff b c) into %o1. Unfortunately
3441 the inner call will itself use %o0, which trashes the value put there
3442 in preparation for the outer call. Upshot: we need to calculate the
3443 args into temporary regs, and move those to arg regs or onto the
3444 stack only immediately prior to the call proper. Sigh.
3447 genCCall target dest_regs argsAndHints = do
3449 args = map fst argsAndHints
3450 argcode_and_vregs <- mapM arg_to_int_vregs args
3452 (argcodes, vregss) = unzip argcode_and_vregs
3453 n_argRegs = length allArgRegs
3454 n_argRegs_used = min (length vregs) n_argRegs
3455 vregs = concat vregss
3456 -- deal with static vs dynamic call targets
3457 callinsns <- (case target of
3458 CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> do
3459 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3460 CmmForeignCall expr conv -> do
3461 (dyn_c, [dyn_r]) <- arg_to_int_vregs expr
3462 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3464 (res, reduce) <- outOfLineFloatOp mop
3465 lblOrMopExpr <- case res of
3467 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3469 (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
3470 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3471 if reduce then panic "genCCall(sparc): can not reduce" else return lblOrMopExpr
3475 argcode = concatOL argcodes
3476 (move_sp_down, move_sp_up)
3477 = let diff = length vregs - n_argRegs
3478 nn = if odd diff then diff + 1 else diff -- keep 8-byte alignment
3481 else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
3483 = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
3484 return (argcode `appOL`
3485 move_sp_down `appOL`
3486 transfer_code `appOL`
3491 -- move args from the integer vregs into which they have been
3492 -- marshalled, into %o0 .. %o5, and the rest onto the stack.
3493 move_final :: [Reg] -> [Reg] -> Int -> [Instr]
3495 move_final [] _ offset -- all args done
3498 move_final (v:vs) [] offset -- out of aregs; move to stack
3499 = ST I32 v (spRel offset)
3500 : move_final vs [] (offset+1)
3502 move_final (v:vs) (a:az) offset -- move into an arg (%o[0..5]) reg
3503 = OR False g0 (RIReg v) a
3504 : move_final vs az offset
3506 -- generate code to calculate an argument, and move it into one
3507 -- or two integer vregs.
3508 arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
3509 arg_to_int_vregs arg
3510 | (cmmExprRep arg) == I64
3512 (ChildCode64 code r_lo) <- iselExpr64 arg
3514 r_hi = getHiVRegFromLo r_lo
3515 return (code, [r_hi, r_lo])
3518 (src, code) <- getSomeReg arg
3519 tmp <- getNewRegNat (cmmExprRep arg)
3524 v1 <- getNewRegNat I32
3525 v2 <- getNewRegNat I32
3528 FMOV F64 src f0 `snocOL`
3529 ST F32 f0 (spRel 16) `snocOL`
3530 LD I32 (spRel 16) v1 `snocOL`
3531 ST F32 (fPair f0) (spRel 16) `snocOL`
3532 LD I32 (spRel 16) v2
3537 v1 <- getNewRegNat I32
3540 ST F32 src (spRel 16) `snocOL`
3541 LD I32 (spRel 16) v1
3546 v1 <- getNewRegNat I32
3548 code `snocOL` OR False g0 (RIReg src) v1
3552 outOfLineFloatOp mop =
3554 mopExpr <- cmmMakeDynamicReference addImportNat CallReference $
3555 mkForeignLabel functionName Nothing True
3556 let mopLabelOrExpr = case mopExpr of
3557 CmmLit (CmmLabel lbl) -> Left lbl
3559 return (mopLabelOrExpr, reduce)
3561 (reduce, functionName) = case mop of
3562 MO_F32_Exp -> (True, FSLIT("exp"))
3563 MO_F32_Log -> (True, FSLIT("log"))
3564 MO_F32_Sqrt -> (True, FSLIT("sqrt"))
3566 MO_F32_Sin -> (True, FSLIT("sin"))
3567 MO_F32_Cos -> (True, FSLIT("cos"))
3568 MO_F32_Tan -> (True, FSLIT("tan"))
3570 MO_F32_Asin -> (True, FSLIT("asin"))
3571 MO_F32_Acos -> (True, FSLIT("acos"))
3572 MO_F32_Atan -> (True, FSLIT("atan"))
3574 MO_F32_Sinh -> (True, FSLIT("sinh"))
3575 MO_F32_Cosh -> (True, FSLIT("cosh"))
3576 MO_F32_Tanh -> (True, FSLIT("tanh"))
3578 MO_F64_Exp -> (False, FSLIT("exp"))
3579 MO_F64_Log -> (False, FSLIT("log"))
3580 MO_F64_Sqrt -> (False, FSLIT("sqrt"))
3582 MO_F64_Sin -> (False, FSLIT("sin"))
3583 MO_F64_Cos -> (False, FSLIT("cos"))
3584 MO_F64_Tan -> (False, FSLIT("tan"))
3586 MO_F64_Asin -> (False, FSLIT("asin"))
3587 MO_F64_Acos -> (False, FSLIT("acos"))
3588 MO_F64_Atan -> (False, FSLIT("atan"))
3590 MO_F64_Sinh -> (False, FSLIT("sinh"))
3591 MO_F64_Cosh -> (False, FSLIT("cosh"))
3592 MO_F64_Tanh -> (False, FSLIT("tanh"))
3594 other -> pprPanic "outOfLineFloatOp(sparc) "
3595 (pprCallishMachOp mop)
3597 #endif /* sparc_TARGET_ARCH */
3599 #if powerpc_TARGET_ARCH
3601 #if darwin_TARGET_OS || linux_TARGET_OS
3603 The PowerPC calling convention for Darwin/Mac OS X
3604 is described in Apple's document
3605 "Inside Mac OS X - Mach-O Runtime Architecture".
3607 PowerPC Linux uses the System V Release 4 Calling Convention
3608 for PowerPC. It is described in the
3609 "System V Application Binary Interface PowerPC Processor Supplement".
3611 Both conventions are similar:
3612 Parameters may be passed in general-purpose registers starting at r3, in
3613 floating point registers starting at f1, or on the stack.
3615 But there are substantial differences:
3616 * The number of registers used for parameter passing and the exact set of
3617 nonvolatile registers differs (see MachRegs.lhs).
3618 * On Darwin, stack space is always reserved for parameters, even if they are
3619 passed in registers. The called routine may choose to save parameters from
3620 registers to the corresponding space on the stack.
3621 * On Darwin, a corresponding amount of GPRs is skipped when a floating point
3622 parameter is passed in an FPR.
3623 * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
3624 starting with an odd-numbered GPR. It may skip a GPR to achieve this.
3625 Darwin just treats an I64 like two separate I32s (high word first).
3626 * I64 and F64 arguments are 8-byte aligned on the stack for SysV, but only
3627 4-byte aligned like everything else on Darwin.
3628 * The SysV spec claims that F32 is represented as F64 on the stack. GCC on
3629 PowerPC Linux does not agree, so neither do we.
3631 According to both conventions, The parameter area should be part of the
3632 caller's stack frame, allocated in the caller's prologue code (large enough
3633 to hold the parameter lists for all called routines). The NCG already
3634 uses the stack for register spilling, leaving 64 bytes free at the top.
3635 If we need a larger parameter area than that, we just allocate a new stack
3636 frame just before ccalling.
3640 genCCall (CmmPrim MO_WriteBarrier) _ _
3641 = return $ unitOL LWSYNC
3643 genCCall target dest_regs argsAndHints
3644 = ASSERT (not $ any (`elem` [I8,I16]) argReps)
3645 -- we rely on argument promotion in the codeGen
3647 (finalStack,passArgumentsCode,usedRegs) <- passArguments
3649 allArgRegs allFPArgRegs
3653 (labelOrExpr, reduceToF32) <- case target of
3654 CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
3655 CmmForeignCall expr conv -> return (Right expr, False)
3656 CmmPrim mop -> outOfLineFloatOp mop
3658 let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
3659 codeAfter = move_sp_up finalStack `appOL` moveResult reduceToF32
3664 `snocOL` BL lbl usedRegs
3667 (dynReg, dynCode) <- getSomeReg dyn
3669 `snocOL` MTCTR dynReg
3671 `snocOL` BCTRL usedRegs
3674 #if darwin_TARGET_OS
3675 initialStackOffset = 24
3676 -- size of linkage area + size of arguments, in bytes
3677 stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
3678 map machRepByteWidth argReps
3679 #elif linux_TARGET_OS
3680 initialStackOffset = 8
3681 stackDelta finalStack = roundTo 16 finalStack
3683 args = map fst argsAndHints
3684 argReps = map cmmExprRep args
3686 roundTo a x | x `mod` a == 0 = x
3687 | otherwise = x + a - (x `mod` a)
3689 move_sp_down finalStack
3691 toOL [STU I32 sp (AddrRegImm sp (ImmInt (-delta))),
3694 where delta = stackDelta finalStack
3695 move_sp_up finalStack
3697 toOL [ADD sp sp (RIImm (ImmInt delta)),
3700 where delta = stackDelta finalStack
3703 passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
3704 passArguments ((arg,I64):args) gprs fprs stackOffset
3705 accumCode accumUsed =
3707 ChildCode64 code vr_lo <- iselExpr64 arg
3708 let vr_hi = getHiVRegFromLo vr_lo
3710 #if darwin_TARGET_OS
3715 (accumCode `appOL` code
3716 `snocOL` storeWord vr_hi gprs stackOffset
3717 `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
3718 ((take 2 gprs) ++ accumUsed)
3720 storeWord vr (gpr:_) offset = MR gpr vr
3721 storeWord vr [] offset = ST I32 vr (AddrRegImm sp (ImmInt offset))
3723 #elif linux_TARGET_OS
3724 let stackOffset' = roundTo 8 stackOffset
3725 stackCode = accumCode `appOL` code
3726 `snocOL` ST I32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
3727 `snocOL` ST I32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
3728 regCode hireg loreg =
3729 accumCode `appOL` code
3730 `snocOL` MR hireg vr_hi
3731 `snocOL` MR loreg vr_lo
3734 hireg : loreg : regs | even (length gprs) ->
3735 passArguments args regs fprs stackOffset
3736 (regCode hireg loreg) (hireg : loreg : accumUsed)
3737 _skipped : hireg : loreg : regs ->
3738 passArguments args regs fprs stackOffset
3739 (regCode hireg loreg) (hireg : loreg : accumUsed)
3740 _ -> -- only one or no regs left
3741 passArguments args [] fprs (stackOffset'+8)
3745 passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
3746 | reg : _ <- regs = do
3747 register <- getRegister arg
3748 let code = case register of
3749 Fixed _ freg fcode -> fcode `snocOL` MR reg freg
3750 Any _ acode -> acode reg
3754 #if darwin_TARGET_OS
3755 -- The Darwin ABI requires that we reserve stack slots for register parameters
3756 (stackOffset + stackBytes)
3757 #elif linux_TARGET_OS
3758 -- ... the SysV ABI doesn't.
3761 (accumCode `appOL` code)
3764 (vr, code) <- getSomeReg arg
3768 (stackOffset' + stackBytes)
3769 (accumCode `appOL` code `snocOL` ST rep vr stackSlot)
3772 #if darwin_TARGET_OS
3773 -- stackOffset is at least 4-byte aligned
3774 -- The Darwin ABI is happy with that.
3775 stackOffset' = stackOffset
3777 -- ... the SysV ABI requires 8-byte alignment for doubles.
3778 stackOffset' | rep == F64 = roundTo 8 stackOffset
3779 | otherwise = stackOffset
3781 stackSlot = AddrRegImm sp (ImmInt stackOffset')
3782 (nGprs, nFprs, stackBytes, regs) = case rep of
3783 I32 -> (1, 0, 4, gprs)
3784 #if darwin_TARGET_OS
3785 -- The Darwin ABI requires that we skip a corresponding number of GPRs when
3787 F32 -> (1, 1, 4, fprs)
3788 F64 -> (2, 1, 8, fprs)
3789 #elif linux_TARGET_OS
3790 -- ... the SysV ABI doesn't.
3791 F32 -> (0, 1, 4, fprs)
3792 F64 -> (0, 1, 8, fprs)
3795 moveResult reduceToF32 =
3799 | reduceToF32 && rep == F32 -> unitOL (FRSP r_dest f1)
3800 | rep == F32 || rep == F64 -> unitOL (MR r_dest f1)
3801 | rep == I64 -> toOL [MR (getHiVRegFromLo r_dest) r3,
3803 | otherwise -> unitOL (MR r_dest r3)
3804 where rep = cmmRegRep (CmmLocal dest)
3805 r_dest = getRegisterReg (CmmLocal dest)
3807 outOfLineFloatOp mop =
3809 mopExpr <- cmmMakeDynamicReference addImportNat CallReference $
3810 mkForeignLabel functionName Nothing True
3811 let mopLabelOrExpr = case mopExpr of
3812 CmmLit (CmmLabel lbl) -> Left lbl
3814 return (mopLabelOrExpr, reduce)
3816 (functionName, reduce) = case mop of
3817 MO_F32_Exp -> (FSLIT("exp"), True)
3818 MO_F32_Log -> (FSLIT("log"), True)
3819 MO_F32_Sqrt -> (FSLIT("sqrt"), True)
3821 MO_F32_Sin -> (FSLIT("sin"), True)
3822 MO_F32_Cos -> (FSLIT("cos"), True)
3823 MO_F32_Tan -> (FSLIT("tan"), True)
3825 MO_F32_Asin -> (FSLIT("asin"), True)
3826 MO_F32_Acos -> (FSLIT("acos"), True)
3827 MO_F32_Atan -> (FSLIT("atan"), True)
3829 MO_F32_Sinh -> (FSLIT("sinh"), True)
3830 MO_F32_Cosh -> (FSLIT("cosh"), True)
3831 MO_F32_Tanh -> (FSLIT("tanh"), True)
3832 MO_F32_Pwr -> (FSLIT("pow"), True)
3834 MO_F64_Exp -> (FSLIT("exp"), False)
3835 MO_F64_Log -> (FSLIT("log"), False)
3836 MO_F64_Sqrt -> (FSLIT("sqrt"), False)
3838 MO_F64_Sin -> (FSLIT("sin"), False)
3839 MO_F64_Cos -> (FSLIT("cos"), False)
3840 MO_F64_Tan -> (FSLIT("tan"), False)
3842 MO_F64_Asin -> (FSLIT("asin"), False)
3843 MO_F64_Acos -> (FSLIT("acos"), False)
3844 MO_F64_Atan -> (FSLIT("atan"), False)
3846 MO_F64_Sinh -> (FSLIT("sinh"), False)
3847 MO_F64_Cosh -> (FSLIT("cosh"), False)
3848 MO_F64_Tanh -> (FSLIT("tanh"), False)
3849 MO_F64_Pwr -> (FSLIT("pow"), False)
3850 other -> pprPanic "genCCall(ppc): unknown callish op"
3851 (pprCallishMachOp other)
3853 #endif /* darwin_TARGET_OS || linux_TARGET_OS */
3855 #endif /* powerpc_TARGET_ARCH */
3858 -- -----------------------------------------------------------------------------
3859 -- Generating a table-branch
3861 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
3863 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3867 (reg,e_code) <- getSomeReg expr
3868 lbl <- getNewLabelNat
3869 dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
3870 (tableReg,t_code) <- getSomeReg $ dynRef
3872 jumpTable = map jumpTableEntryRel ids
3874 jumpTableEntryRel Nothing
3875 = CmmStaticLit (CmmInt 0 wordRep)
3876 jumpTableEntryRel (Just (BlockId id))
3877 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3878 where blockLabel = mkAsmTempLabel id
3880 op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
3881 (EAIndex reg wORD_SIZE) (ImmInt 0))
3883 #if x86_64_TARGET_ARCH && darwin_TARGET_OS
3884 -- on Mac OS X/x86_64, put the jump table in the text section
3885 -- to work around a limitation of the linker.
3886 -- ld64 is unable to handle the relocations for
3888 -- if L0 is not preceded by a non-anonymous label in its section.
3890 code = e_code `appOL` t_code `appOL` toOL [
3891 ADD wordRep op (OpReg tableReg),
3892 JMP_TBL (OpReg tableReg) [ id | Just id <- ids ],
3893 LDATA Text (CmmDataLabel lbl : jumpTable)
3896 code = e_code `appOL` t_code `appOL` toOL [
3897 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3898 ADD wordRep op (OpReg tableReg),
3899 JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
3905 (reg,e_code) <- getSomeReg expr
3906 lbl <- getNewLabelNat
3908 jumpTable = map jumpTableEntry ids
3909 op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
3910 code = e_code `appOL` toOL [
3911 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3912 JMP_TBL op [ id | Just id <- ids ]
3916 #elif powerpc_TARGET_ARCH
3920 (reg,e_code) <- getSomeReg expr
3921 tmp <- getNewRegNat I32
3922 lbl <- getNewLabelNat
3923 dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
3924 (tableReg,t_code) <- getSomeReg $ dynRef
3926 jumpTable = map jumpTableEntryRel ids
3928 jumpTableEntryRel Nothing
3929 = CmmStaticLit (CmmInt 0 wordRep)
3930 jumpTableEntryRel (Just (BlockId id))
3931 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3932 where blockLabel = mkAsmTempLabel id
3934 code = e_code `appOL` t_code `appOL` toOL [
3935 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3936 SLW tmp reg (RIImm (ImmInt 2)),
3937 LD I32 tmp (AddrRegReg tableReg tmp),
3938 ADD tmp tmp (RIReg tableReg),
3940 BCTR [ id | Just id <- ids ]
3945 (reg,e_code) <- getSomeReg expr
3946 tmp <- getNewRegNat I32
3947 lbl <- getNewLabelNat
3949 jumpTable = map jumpTableEntry ids
3951 code = e_code `appOL` toOL [
3952 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3953 SLW tmp reg (RIImm (ImmInt 2)),
3954 ADDIS tmp tmp (HA (ImmCLbl lbl)),
3955 LD I32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
3957 BCTR [ id | Just id <- ids ]
3961 genSwitch expr ids = panic "ToDo: genSwitch"
3964 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordRep)
3965 jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
3966 where blockLabel = mkAsmTempLabel id
3968 -- -----------------------------------------------------------------------------
3970 -- -----------------------------------------------------------------------------
3973 -- -----------------------------------------------------------------------------
3974 -- 'condIntReg' and 'condFltReg': condition codes into registers
3976 -- Turn those condition codes into integers now (when they appear on
3977 -- the right hand side of an assignment).
3979 -- (If applicable) Do not fill the delay slots here; you will confuse the
3980 -- register allocator.
3982 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
3984 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3986 #if alpha_TARGET_ARCH
3987 condIntReg = panic "MachCode.condIntReg (not on Alpha)"
3988 condFltReg = panic "MachCode.condFltReg (not on Alpha)"
3989 #endif /* alpha_TARGET_ARCH */
3991 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3993 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3995 condIntReg cond x y = do
3996 CondCode _ cond cond_code <- condIntCode cond x y
3997 tmp <- getNewRegNat I8
3999 code dst = cond_code `appOL` toOL [
4000 SETCC cond (OpReg tmp),
4001 MOVZxL I8 (OpReg tmp) (OpReg dst)
4004 return (Any I32 code)
4008 #if i386_TARGET_ARCH
4010 condFltReg cond x y = do
4011 CondCode _ cond cond_code <- condFltCode cond x y
4012 tmp <- getNewRegNat I8
4014 code dst = cond_code `appOL` toOL [
4015 SETCC cond (OpReg tmp),
4016 MOVZxL I8 (OpReg tmp) (OpReg dst)
4019 return (Any I32 code)
4023 #if x86_64_TARGET_ARCH
4025 condFltReg cond x y = do
4026 CondCode _ cond cond_code <- condFltCode cond x y
4027 tmp1 <- getNewRegNat wordRep
4028 tmp2 <- getNewRegNat wordRep
4030 -- We have to worry about unordered operands (eg. comparisons
4031 -- against NaN). If the operands are unordered, the comparison
4032 -- sets the parity flag, carry flag and zero flag.
4033 -- All comparisons are supposed to return false for unordered
4034 -- operands except for !=, which returns true.
4036 -- Optimisation: we don't have to test the parity flag if we
4037 -- know the test has already excluded the unordered case: eg >
4038 -- and >= test for a zero carry flag, which can only occur for
4039 -- ordered operands.
4041 -- ToDo: by reversing comparisons we could avoid testing the
4042 -- parity flag in more cases.
4047 NE -> or_unordered dst
4048 GU -> plain_test dst
4049 GEU -> plain_test dst
4050 _ -> and_ordered dst)
4052 plain_test dst = toOL [
4053 SETCC cond (OpReg tmp1),
4054 MOVZxL I8 (OpReg tmp1) (OpReg dst)
4056 or_unordered dst = toOL [
4057 SETCC cond (OpReg tmp1),
4058 SETCC PARITY (OpReg tmp2),
4059 OR I8 (OpReg tmp1) (OpReg tmp2),
4060 MOVZxL I8 (OpReg tmp2) (OpReg dst)
4062 and_ordered dst = toOL [
4063 SETCC cond (OpReg tmp1),
4064 SETCC NOTPARITY (OpReg tmp2),
4065 AND I8 (OpReg tmp1) (OpReg tmp2),
4066 MOVZxL I8 (OpReg tmp2) (OpReg dst)
4069 return (Any I32 code)
4073 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4075 #if sparc_TARGET_ARCH
4077 condIntReg EQQ x (CmmLit (CmmInt 0 d)) = do
4078 (src, code) <- getSomeReg x
4079 tmp <- getNewRegNat I32
4081 code__2 dst = code `appOL` toOL [
4082 SUB False True g0 (RIReg src) g0,
4083 SUB True False g0 (RIImm (ImmInt (-1))) dst]
4084 return (Any I32 code__2)
4086 condIntReg EQQ x y = do
4087 (src1, code1) <- getSomeReg x
4088 (src2, code2) <- getSomeReg y
4089 tmp1 <- getNewRegNat I32
4090 tmp2 <- getNewRegNat I32
4092 code__2 dst = code1 `appOL` code2 `appOL` toOL [
4093 XOR False src1 (RIReg src2) dst,
4094 SUB False True g0 (RIReg dst) g0,
4095 SUB True False g0 (RIImm (ImmInt (-1))) dst]
4096 return (Any I32 code__2)
4098 condIntReg NE x (CmmLit (CmmInt 0 d)) = do
4099 (src, code) <- getSomeReg x
4100 tmp <- getNewRegNat I32
4102 code__2 dst = code `appOL` toOL [
4103 SUB False True g0 (RIReg src) g0,
4104 ADD True False g0 (RIImm (ImmInt 0)) dst]
4105 return (Any I32 code__2)
4107 condIntReg NE x y = do
4108 (src1, code1) <- getSomeReg x
4109 (src2, code2) <- getSomeReg y
4110 tmp1 <- getNewRegNat I32
4111 tmp2 <- getNewRegNat I32
4113 code__2 dst = code1 `appOL` code2 `appOL` toOL [
4114 XOR False src1 (RIReg src2) dst,
4115 SUB False True g0 (RIReg dst) g0,
4116 ADD True False g0 (RIImm (ImmInt 0)) dst]
4117 return (Any I32 code__2)
4119 condIntReg cond x y = do
4120 BlockId lbl1 <- getBlockIdNat
4121 BlockId lbl2 <- getBlockIdNat
4122 CondCode _ cond cond_code <- condIntCode cond x y
4124 code__2 dst = cond_code `appOL` toOL [
4125 BI cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
4126 OR False g0 (RIImm (ImmInt 0)) dst,
4127 BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
4128 NEWBLOCK (BlockId lbl1),
4129 OR False g0 (RIImm (ImmInt 1)) dst,
4130 NEWBLOCK (BlockId lbl2)]
4131 return (Any I32 code__2)
4133 condFltReg cond x y = do
4134 BlockId lbl1 <- getBlockIdNat
4135 BlockId lbl2 <- getBlockIdNat
4136 CondCode _ cond cond_code <- condFltCode cond x y
4138 code__2 dst = cond_code `appOL` toOL [
4140 BF cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
4141 OR False g0 (RIImm (ImmInt 0)) dst,
4142 BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
4143 NEWBLOCK (BlockId lbl1),
4144 OR False g0 (RIImm (ImmInt 1)) dst,
4145 NEWBLOCK (BlockId lbl2)]
4146 return (Any I32 code__2)
4148 #endif /* sparc_TARGET_ARCH */
4150 #if powerpc_TARGET_ARCH
4151 condReg getCond = do
4152 lbl1 <- getBlockIdNat
4153 lbl2 <- getBlockIdNat
4154 CondCode _ cond cond_code <- getCond
4156 {- code dst = cond_code `appOL` toOL [
4165 code dst = cond_code
4169 RLWINM dst dst (bit + 1) 31 31
4172 negate_code | do_negate = unitOL (CRNOR bit bit bit)
4175 (bit, do_negate) = case cond of
4189 return (Any I32 code)
4191 condIntReg cond x y = condReg (condIntCode cond x y)
4192 condFltReg cond x y = condReg (condFltCode cond x y)
4193 #endif /* powerpc_TARGET_ARCH */
4196 -- -----------------------------------------------------------------------------
4197 -- 'trivial*Code': deal with trivial instructions
4199 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
4200 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
4201 -- Only look for constants on the right hand side, because that's
4202 -- where the generic optimizer will have put them.
4204 -- Similarly, for unary instructions, we don't have to worry about
4205 -- matching an StInt as the argument, because genericOpt will already
4206 -- have handled the constant-folding.
4210 -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
4211 ,IF_ARCH_i386 ((Operand -> Operand -> Instr)
4212 -> Maybe (Operand -> Operand -> Instr)
4213 ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr)
4214 -> Maybe (Operand -> Operand -> Instr)
4215 ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
4216 ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
4218 -> CmmExpr -> CmmExpr -- the two arguments
4221 #ifndef powerpc_TARGET_ARCH
4224 -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
4225 ,IF_ARCH_sparc((MachRep -> Reg -> Reg -> Reg -> Instr)
4226 ,IF_ARCH_i386 ((MachRep -> Reg -> Reg -> Reg -> Instr)
4227 ,IF_ARCH_x86_64 ((MachRep -> Operand -> Operand -> Instr)
4229 -> CmmExpr -> CmmExpr -- the two arguments
4235 -> IF_ARCH_alpha((RI -> Reg -> Instr)
4236 ,IF_ARCH_i386 ((Operand -> Instr)
4237 ,IF_ARCH_x86_64 ((Operand -> Instr)
4238 ,IF_ARCH_sparc((RI -> Reg -> Instr)
4239 ,IF_ARCH_powerpc((Reg -> Reg -> Instr)
4241 -> CmmExpr -- the one argument
4244 #ifndef powerpc_TARGET_ARCH
4247 -> IF_ARCH_alpha((Reg -> Reg -> Instr)
4248 ,IF_ARCH_i386 ((Reg -> Reg -> Instr)
4249 ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr)
4250 ,IF_ARCH_sparc((Reg -> Reg -> Instr)
4252 -> CmmExpr -- the one argument
4256 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4258 #if alpha_TARGET_ARCH
4260 trivialCode instr x (StInt y)
4262 = getRegister x `thenNat` \ register ->
4263 getNewRegNat IntRep `thenNat` \ tmp ->
4265 code = registerCode register tmp
4266 src1 = registerName register tmp
4267 src2 = ImmInt (fromInteger y)
4268 code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst)
4270 return (Any IntRep code__2)
4272 trivialCode instr x y
4273 = getRegister x `thenNat` \ register1 ->
4274 getRegister y `thenNat` \ register2 ->
4275 getNewRegNat IntRep `thenNat` \ tmp1 ->
4276 getNewRegNat IntRep `thenNat` \ tmp2 ->
4278 code1 = registerCode register1 tmp1 []
4279 src1 = registerName register1 tmp1
4280 code2 = registerCode register2 tmp2 []
4281 src2 = registerName register2 tmp2
4282 code__2 dst = asmSeqThen [code1, code2] .
4283 mkSeqInstr (instr src1 (RIReg src2) dst)
4285 return (Any IntRep code__2)
4288 trivialUCode instr x
4289 = getRegister x `thenNat` \ register ->
4290 getNewRegNat IntRep `thenNat` \ tmp ->
4292 code = registerCode register tmp
4293 src = registerName register tmp
4294 code__2 dst = code . mkSeqInstr (instr (RIReg src) dst)
4296 return (Any IntRep code__2)
4299 trivialFCode _ instr x y
4300 = getRegister x `thenNat` \ register1 ->
4301 getRegister y `thenNat` \ register2 ->
4302 getNewRegNat F64 `thenNat` \ tmp1 ->
4303 getNewRegNat F64 `thenNat` \ tmp2 ->
4305 code1 = registerCode register1 tmp1
4306 src1 = registerName register1 tmp1
4308 code2 = registerCode register2 tmp2
4309 src2 = registerName register2 tmp2
4311 code__2 dst = asmSeqThen [code1 [], code2 []] .
4312 mkSeqInstr (instr src1 src2 dst)
4314 return (Any F64 code__2)
4316 trivialUFCode _ instr x
4317 = getRegister x `thenNat` \ register ->
4318 getNewRegNat F64 `thenNat` \ tmp ->
4320 code = registerCode register tmp
4321 src = registerName register tmp
4322 code__2 dst = code . mkSeqInstr (instr src dst)
4324 return (Any F64 code__2)
4326 #endif /* alpha_TARGET_ARCH */
4328 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4330 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4333 The Rules of the Game are:
4335 * You cannot assume anything about the destination register dst;
4336 it may be anything, including a fixed reg.
4338 * You may compute an operand into a fixed reg, but you may not
4339 subsequently change the contents of that fixed reg. If you
4340 want to do so, first copy the value either to a temporary
4341 or into dst. You are free to modify dst even if it happens
4342 to be a fixed reg -- that's not your problem.
4344 * You cannot assume that a fixed reg will stay live over an
4345 arbitrary computation. The same applies to the dst reg.
4347 * Temporary regs obtained from getNewRegNat are distinct from
4348 each other and from all other regs, and stay live over
4349 arbitrary computations.
4351 --------------------
4353 SDM's version of The Rules:
4355 * If getRegister returns Any, that means it can generate correct
4356 code which places the result in any register, period. Even if that
4357 register happens to be read during the computation.
4359 Corollary #1: this means that if you are generating code for an
4360 operation with two arbitrary operands, you cannot assign the result
4361 of the first operand into the destination register before computing
4362 the second operand. The second operand might require the old value
4363 of the destination register.
4365 Corollary #2: A function might be able to generate more efficient
4366 code if it knows the destination register is a new temporary (and
4367 therefore not read by any of the sub-computations).
4369 * If getRegister returns Any, then the code it generates may modify only:
4370 (a) fresh temporaries
4371 (b) the destination register
4372 (c) known registers (eg. %ecx is used by shifts)
4373 In particular, it may *not* modify global registers, unless the global
4374 register happens to be the destination register.
4377 trivialCode rep instr (Just revinstr) (CmmLit lit_a) b
4378 | not (is64BitLit lit_a) = do
4379 b_code <- getAnyReg b
4382 = b_code dst `snocOL`
4383 revinstr (OpImm (litToImm lit_a)) (OpReg dst)
4385 return (Any rep code)
4387 trivialCode rep instr maybe_revinstr a b = genTrivialCode rep instr a b
4389 -- This is re-used for floating pt instructions too.
4390 genTrivialCode rep instr a b = do
4391 (b_op, b_code) <- getNonClobberedOperand b
4392 a_code <- getAnyReg a
4393 tmp <- getNewRegNat rep
4395 -- We want the value of b to stay alive across the computation of a.
4396 -- But, we want to calculate a straight into the destination register,
4397 -- because the instruction only has two operands (dst := dst `op` src).
4398 -- The troublesome case is when the result of b is in the same register
4399 -- as the destination reg. In this case, we have to save b in a
4400 -- new temporary across the computation of a.
4402 | dst `regClashesWithOp` b_op =
4404 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
4406 instr (OpReg tmp) (OpReg dst)
4410 instr b_op (OpReg dst)
4412 return (Any rep code)
4414 reg `regClashesWithOp` OpReg reg2 = reg == reg2
4415 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
4416 reg `regClashesWithOp` _ = False
4420 trivialUCode rep instr x = do
4421 x_code <- getAnyReg x
4427 return (Any rep code)
4431 #if i386_TARGET_ARCH
4433 trivialFCode pk instr x y = do
4434 (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
4435 (y_reg, y_code) <- getSomeReg y
4440 instr pk x_reg y_reg dst
4442 return (Any pk code)
4446 #if x86_64_TARGET_ARCH
4448 trivialFCode pk instr x y = genTrivialCode pk (instr pk) x y
4454 trivialUFCode rep instr x = do
4455 (x_reg, x_code) <- getSomeReg x
4461 return (Any rep code)
4463 #endif /* i386_TARGET_ARCH */
4465 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4467 #if sparc_TARGET_ARCH
4469 trivialCode pk instr x (CmmLit (CmmInt y d))
4472 (src1, code) <- getSomeReg x
4473 tmp <- getNewRegNat I32
4475 src2 = ImmInt (fromInteger y)
4476 code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
4477 return (Any I32 code__2)
4479 trivialCode pk instr x y = do
4480 (src1, code1) <- getSomeReg x
4481 (src2, code2) <- getSomeReg y
4482 tmp1 <- getNewRegNat I32
4483 tmp2 <- getNewRegNat I32
4485 code__2 dst = code1 `appOL` code2 `snocOL`
4486 instr src1 (RIReg src2) dst
4487 return (Any I32 code__2)
4490 trivialFCode pk instr x y = do
4491 (src1, code1) <- getSomeReg x
4492 (src2, code2) <- getSomeReg y
4493 tmp1 <- getNewRegNat (cmmExprRep x)
4494 tmp2 <- getNewRegNat (cmmExprRep y)
4495 tmp <- getNewRegNat F64
4497 promote x = FxTOy F32 F64 x tmp
4504 code1 `appOL` code2 `snocOL`
4505 instr pk src1 src2 dst
4506 else if pk1 == F32 then
4507 code1 `snocOL` promote src1 `appOL` code2 `snocOL`
4508 instr F64 tmp src2 dst
4510 code1 `appOL` code2 `snocOL` promote src2 `snocOL`
4511 instr F64 src1 tmp dst
4512 return (Any (if pk1 == pk2 then pk1 else F64) code__2)
4515 trivialUCode pk instr x = do
4516 (src, code) <- getSomeReg x
4517 tmp <- getNewRegNat pk
4519 code__2 dst = code `snocOL` instr (RIReg src) dst
4520 return (Any pk code__2)
4523 trivialUFCode pk instr x = do
4524 (src, code) <- getSomeReg x
4525 tmp <- getNewRegNat pk
4527 code__2 dst = code `snocOL` instr src dst
4528 return (Any pk code__2)
4530 #endif /* sparc_TARGET_ARCH */
4532 #if powerpc_TARGET_ARCH
4535 Wolfgang's PowerPC version of The Rules:
4537 A slightly modified version of The Rules to take advantage of the fact
4538 that PowerPC instructions work on all registers and don't implicitly
4539 clobber any fixed registers.
4541 * The only expression for which getRegister returns Fixed is (CmmReg reg).
4543 * If getRegister returns Any, then the code it generates may modify only:
4544 (a) fresh temporaries
4545 (b) the destination register
4546 It may *not* modify global registers, unless the global
4547 register happens to be the destination register.
4548 It may not clobber any other registers. In fact, only ccalls clobber any
4550 Also, it may not modify the counter register (used by genCCall).
4552 Corollary: If a getRegister for a subexpression returns Fixed, you need
4553 not move it to a fresh temporary before evaluating the next subexpression.
4554 The Fixed register won't be modified.
4555 Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
4557 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
4558 the value of the destination register.
4561 trivialCode rep signed instr x (CmmLit (CmmInt y _))
4562 | Just imm <- makeImmediate rep signed y
4564 (src1, code1) <- getSomeReg x
4565 let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
4566 return (Any rep code)
4568 trivialCode rep signed instr x y = do
4569 (src1, code1) <- getSomeReg x
4570 (src2, code2) <- getSomeReg y
4571 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
4572 return (Any rep code)
4574 trivialCodeNoImm :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4575 -> CmmExpr -> CmmExpr -> NatM Register
4576 trivialCodeNoImm rep instr x y = do
4577 (src1, code1) <- getSomeReg x
4578 (src2, code2) <- getSomeReg y
4579 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
4580 return (Any rep code)
4582 trivialUCode rep instr x = do
4583 (src, code) <- getSomeReg x
4584 let code' dst = code `snocOL` instr dst src
4585 return (Any rep code')
4587 -- There is no "remainder" instruction on the PPC, so we have to do
4589 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
4591 remainderCode :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4592 -> CmmExpr -> CmmExpr -> NatM Register
4593 remainderCode rep div x y = do
4594 (src1, code1) <- getSomeReg x
4595 (src2, code2) <- getSomeReg y
4596 let code dst = code1 `appOL` code2 `appOL` toOL [
4598 MULLW dst dst (RIReg src2),
4601 return (Any rep code)
4603 #endif /* powerpc_TARGET_ARCH */
4606 -- -----------------------------------------------------------------------------
4607 -- Coercing to/from integer/floating-point...
4609 -- When going to integer, we truncate (round towards 0).
4611 -- @coerce(Int2FP|FP2Int)@ are more complicated integer/float
4612 -- conversions. We have to store temporaries in memory to move
4613 -- between the integer and the floating point register sets.
4615 -- @coerceDbl2Flt@ and @coerceFlt2Dbl@ are done this way because we
4616 -- pretend, on sparc at least, that double and float regs are seperate
4617 -- kinds, so the value has to be computed into one kind before being
4618 -- explicitly "converted" to live in the other kind.
4620 coerceInt2FP :: MachRep -> MachRep -> CmmExpr -> NatM Register
4621 coerceFP2Int :: MachRep -> MachRep -> CmmExpr -> NatM Register
4623 #if sparc_TARGET_ARCH
4624 coerceDbl2Flt :: CmmExpr -> NatM Register
4625 coerceFlt2Dbl :: CmmExpr -> NatM Register
4628 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4630 #if alpha_TARGET_ARCH
4633 = getRegister x `thenNat` \ register ->
4634 getNewRegNat IntRep `thenNat` \ reg ->
4636 code = registerCode register reg
4637 src = registerName register reg
4639 code__2 dst = code . mkSeqInstrs [
4641 LD TF dst (spRel 0),
4644 return (Any F64 code__2)
4648 = getRegister x `thenNat` \ register ->
4649 getNewRegNat F64 `thenNat` \ tmp ->
4651 code = registerCode register tmp
4652 src = registerName register tmp
4654 code__2 dst = code . mkSeqInstrs [
4656 ST TF tmp (spRel 0),
4659 return (Any IntRep code__2)
4661 #endif /* alpha_TARGET_ARCH */
4663 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4665 #if i386_TARGET_ARCH
4667 coerceInt2FP from to x = do
4668 (x_reg, x_code) <- getSomeReg x
4670 opc = case to of F32 -> GITOF; F64 -> GITOD
4671 code dst = x_code `snocOL` opc x_reg dst
4672 -- ToDo: works for non-I32 reps?
4674 return (Any to code)
4678 coerceFP2Int from to x = do
4679 (x_reg, x_code) <- getSomeReg x
4681 opc = case from of F32 -> GFTOI; F64 -> GDTOI
4682 code dst = x_code `snocOL` opc x_reg dst
4683 -- ToDo: works for non-I32 reps?
4685 return (Any to code)
4687 #endif /* i386_TARGET_ARCH */
4689 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4691 #if x86_64_TARGET_ARCH
4693 coerceFP2Int from to x = do
4694 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
4696 opc = case from of F32 -> CVTTSS2SIQ; F64 -> CVTTSD2SIQ
4697 code dst = x_code `snocOL` opc x_op dst
4699 return (Any to code) -- works even if the destination rep is <I32
4701 coerceInt2FP from to x = do
4702 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
4704 opc = case to of F32 -> CVTSI2SS; F64 -> CVTSI2SD
4705 code dst = x_code `snocOL` opc x_op dst
4707 return (Any to code) -- works even if the destination rep is <I32
4709 coerceFP2FP :: MachRep -> CmmExpr -> NatM Register
4710 coerceFP2FP to x = do
4711 (x_reg, x_code) <- getSomeReg x
4713 opc = case to of F32 -> CVTSD2SS; F64 -> CVTSS2SD
4714 code dst = x_code `snocOL` opc x_reg dst
4716 return (Any to code)
4720 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4722 #if sparc_TARGET_ARCH
4724 coerceInt2FP pk1 pk2 x = do
4725 (src, code) <- getSomeReg x
4727 code__2 dst = code `appOL` toOL [
4728 ST pk1 src (spRel (-2)),
4729 LD pk1 (spRel (-2)) dst,
4730 FxTOy pk1 pk2 dst dst]
4731 return (Any pk2 code__2)
4734 coerceFP2Int pk fprep x = do
4735 (src, code) <- getSomeReg x
4736 reg <- getNewRegNat fprep
4737 tmp <- getNewRegNat pk
4739 code__2 dst = ASSERT(fprep == F64 || fprep == F32)
4741 FxTOy fprep pk src tmp,
4742 ST pk tmp (spRel (-2)),
4743 LD pk (spRel (-2)) dst]
4744 return (Any pk code__2)
4747 coerceDbl2Flt x = do
4748 (src, code) <- getSomeReg x
4749 return (Any F32 (\dst -> code `snocOL` FxTOy F64 F32 src dst))
4752 coerceFlt2Dbl x = do
4753 (src, code) <- getSomeReg x
4754 return (Any F64 (\dst -> code `snocOL` FxTOy F32 F64 src dst))
4756 #endif /* sparc_TARGET_ARCH */
4758 #if powerpc_TARGET_ARCH
4759 coerceInt2FP fromRep toRep x = do
4760 (src, code) <- getSomeReg x
4761 lbl <- getNewLabelNat
4762 itmp <- getNewRegNat I32
4763 ftmp <- getNewRegNat F64
4764 dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
4765 Amode addr addr_code <- getAmode dynRef
4767 code' dst = code `appOL` maybe_exts `appOL` toOL [
4770 CmmStaticLit (CmmInt 0x43300000 I32),
4771 CmmStaticLit (CmmInt 0x80000000 I32)],
4772 XORIS itmp src (ImmInt 0x8000),
4773 ST I32 itmp (spRel 3),
4774 LIS itmp (ImmInt 0x4330),
4775 ST I32 itmp (spRel 2),
4776 LD F64 ftmp (spRel 2)
4777 ] `appOL` addr_code `appOL` toOL [
4779 FSUB F64 dst ftmp dst
4780 ] `appOL` maybe_frsp dst
4782 maybe_exts = case fromRep of
4783 I8 -> unitOL $ EXTS I8 src src
4784 I16 -> unitOL $ EXTS I16 src src
4786 maybe_frsp dst = case toRep of
4787 F32 -> unitOL $ FRSP dst dst
4789 return (Any toRep code')
4791 coerceFP2Int fromRep toRep x = do
4792 -- the reps don't really matter: F*->F64 and I32->I* are no-ops
4793 (src, code) <- getSomeReg x
4794 tmp <- getNewRegNat F64
4796 code' dst = code `appOL` toOL [
4797 -- convert to int in FP reg
4799 -- store value (64bit) from FP to stack
4800 ST F64 tmp (spRel 2),
4801 -- read low word of value (high word is undefined)
4802 LD I32 dst (spRel 3)]
4803 return (Any toRep code')
4804 #endif /* powerpc_TARGET_ARCH */
4807 -- -----------------------------------------------------------------------------
4808 -- eXTRA_STK_ARGS_HERE
4810 -- We (allegedly) put the first six C-call arguments in registers;
4811 -- where do we start putting the rest of them?
4813 -- Moved from MachInstrs (SDM):
4815 #if alpha_TARGET_ARCH || sparc_TARGET_ARCH
4816 eXTRA_STK_ARGS_HERE :: Int
4818 = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???))