1 -----------------------------------------------------------------------------
3 -- Generating machine code (instruction selection)
5 -- (c) The University of Glasgow 1996-2004
7 -----------------------------------------------------------------------------
9 -- This is a big module, but, if you pay attention to
10 -- (a) the sectioning, (b) the type signatures, and
11 -- (c) the #if blah_TARGET_ARCH} things, the
12 -- structure should not be too overwhelming.
14 module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where
16 #include "HsVersions.h"
17 #include "nativeGen/NCG.h"
24 import PositionIndependentCode
25 import RegAllocInfo ( mkBranchInstr )
27 -- Our intermediate code:
28 import PprCmm ( pprExpr )
32 import ClosureInfo ( C_SRT(..) )
35 import StaticFlags ( opt_PIC )
36 import ForeignCall ( CCallConv(..) )
41 import FastTypes ( isFastTrue )
42 import Constants ( wORD_SIZE )
45 import Outputable ( assertPanic )
46 import Debug.Trace ( trace )
48 import Debug.Trace ( trace )
50 import Control.Monad ( mapAndUnzipM )
51 import Data.Maybe ( fromJust )
56 -- -----------------------------------------------------------------------------
57 -- Top-level of the instruction selector
59 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
60 -- They are really trees of insns to facilitate fast appending, where a
61 -- left-to-right traversal (pre-order?) yields the insns in the correct
64 type InstrBlock = OrdList Instr
66 cmmTopCodeGen :: RawCmmTop -> NatM [NatCmmTop]
67 cmmTopCodeGen (CmmProc info lab params blocks) = do
68 (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
69 picBaseMb <- getPicBaseMaybeNat
70 let proc = CmmProc info lab params (concat nat_blocks)
71 tops = proc : concat statics
73 Just picBase -> initializePicBase picBase tops
74 Nothing -> return tops
76 cmmTopCodeGen (CmmData sec dat) = do
77 return [CmmData sec dat] -- no translation, we just use CmmStatic
79 basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop])
80 basicBlockCodeGen (BasicBlock id stmts) = do
81 instrs <- stmtsToInstrs stmts
82 -- code generation may introduce new basic block boundaries, which
83 -- are indicated by the NEWBLOCK instruction. We must split up the
84 -- instruction stream into basic blocks again. Also, we extract
87 (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
89 mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
90 = ([], BasicBlock id instrs : blocks, statics)
91 mkBlocks (LDATA sec dat) (instrs,blocks,statics)
92 = (instrs, blocks, CmmData sec dat:statics)
93 mkBlocks instr (instrs,blocks,statics)
94 = (instr:instrs, blocks, statics)
96 return (BasicBlock id top : other_blocks, statics)
98 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
100 = do instrss <- mapM stmtToInstrs stmts
101 return (concatOL instrss)
103 stmtToInstrs :: CmmStmt -> NatM InstrBlock
104 stmtToInstrs stmt = case stmt of
105 CmmNop -> return nilOL
106 CmmComment s -> return (unitOL (COMMENT s))
109 | isFloatingRep kind -> assignReg_FltCode kind reg src
110 #if WORD_SIZE_IN_BITS==32
111 | kind == I64 -> assignReg_I64Code reg src
113 | otherwise -> assignReg_IntCode kind reg src
114 where kind = cmmRegRep reg
117 | isFloatingRep kind -> assignMem_FltCode kind addr src
118 #if WORD_SIZE_IN_BITS==32
119 | kind == I64 -> assignMem_I64Code addr src
121 | otherwise -> assignMem_IntCode kind addr src
122 where kind = cmmExprRep src
124 CmmCall target result_regs args _ _
125 -> genCCall target result_regs args
127 CmmBranch id -> genBranch id
128 CmmCondBranch arg id -> genCondJump id arg
129 CmmSwitch arg ids -> genSwitch arg ids
130 CmmJump arg params -> genJump arg
132 -- -----------------------------------------------------------------------------
133 -- General things for putting together code sequences
135 -- Expand CmmRegOff. ToDo: should we do it this way around, or convert
136 -- CmmExprs into CmmRegOff?
137 mangleIndexTree :: CmmExpr -> CmmExpr
138 mangleIndexTree (CmmRegOff reg off)
139 = CmmMachOp (MO_Add rep) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) rep)]
140 where rep = cmmRegRep reg
142 -- -----------------------------------------------------------------------------
143 -- Code gen for 64-bit arithmetic on 32-bit platforms
146 Simple support for generating 64-bit code (ie, 64 bit values and 64
147 bit assignments) on 32-bit platforms. Unlike the main code generator
148 we merely shoot for generating working code as simply as possible, and
149 pay little attention to code quality. Specifically, there is no
150 attempt to deal cleverly with the fixed-vs-floating register
151 distinction; all values are generated into (pairs of) floating
152 registers, even if this would mean some redundant reg-reg moves as a
153 result. Only one of the VRegUniques is returned, since it will be
154 of the VRegUniqueLo form, and the upper-half VReg can be determined
155 by applying getHiVRegFromLo to it.
158 data ChildCode64 -- a.k.a "Register64"
161 Reg -- the lower 32-bit temporary which contains the
162 -- result; use getHiVRegFromLo to find the other
163 -- VRegUnique. Rules of this simplified insn
164 -- selection game are therefore that the returned
165 -- Reg may be modified
167 #if WORD_SIZE_IN_BITS==32
168 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
169 assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
172 #ifndef x86_64_TARGET_ARCH
173 iselExpr64 :: CmmExpr -> NatM ChildCode64
176 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
180 assignMem_I64Code addrTree valueTree = do
181 Amode addr addr_code <- getAmode addrTree
182 ChildCode64 vcode rlo <- iselExpr64 valueTree
184 rhi = getHiVRegFromLo rlo
186 -- Little-endian store
187 mov_lo = MOV I32 (OpReg rlo) (OpAddr addr)
188 mov_hi = MOV I32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
190 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
193 assignReg_I64Code (CmmLocal (LocalReg u_dst pk _)) valueTree = do
194 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
196 r_dst_lo = mkVReg u_dst I32
197 r_dst_hi = getHiVRegFromLo r_dst_lo
198 r_src_hi = getHiVRegFromLo r_src_lo
199 mov_lo = MOV I32 (OpReg r_src_lo) (OpReg r_dst_lo)
200 mov_hi = MOV I32 (OpReg r_src_hi) (OpReg r_dst_hi)
203 vcode `snocOL` mov_lo `snocOL` mov_hi
206 assignReg_I64Code lvalue valueTree
207 = panic "assignReg_I64Code(i386): invalid lvalue"
211 iselExpr64 (CmmLit (CmmInt i _)) = do
212 (rlo,rhi) <- getNewRegPairNat I32
214 r = fromIntegral (fromIntegral i :: Word32)
215 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
217 MOV I32 (OpImm (ImmInteger r)) (OpReg rlo),
218 MOV I32 (OpImm (ImmInteger q)) (OpReg rhi)
221 return (ChildCode64 code rlo)
223 iselExpr64 (CmmLoad addrTree I64) = do
224 Amode addr addr_code <- getAmode addrTree
225 (rlo,rhi) <- getNewRegPairNat I32
227 mov_lo = MOV I32 (OpAddr addr) (OpReg rlo)
228 mov_hi = MOV I32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
231 ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
235 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64 _)))
236 = return (ChildCode64 nilOL (mkVReg vu I32))
238 -- we handle addition, but rather badly
239 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
240 ChildCode64 code1 r1lo <- iselExpr64 e1
241 (rlo,rhi) <- getNewRegPairNat I32
243 r = fromIntegral (fromIntegral i :: Word32)
244 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
245 r1hi = getHiVRegFromLo r1lo
247 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
248 ADD I32 (OpImm (ImmInteger r)) (OpReg rlo),
249 MOV I32 (OpReg r1hi) (OpReg rhi),
250 ADC I32 (OpImm (ImmInteger q)) (OpReg rhi) ]
252 return (ChildCode64 code rlo)
254 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
255 ChildCode64 code1 r1lo <- iselExpr64 e1
256 ChildCode64 code2 r2lo <- iselExpr64 e2
257 (rlo,rhi) <- getNewRegPairNat I32
259 r1hi = getHiVRegFromLo r1lo
260 r2hi = getHiVRegFromLo r2lo
263 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
264 ADD I32 (OpReg r2lo) (OpReg rlo),
265 MOV I32 (OpReg r1hi) (OpReg rhi),
266 ADC I32 (OpReg r2hi) (OpReg rhi) ]
268 return (ChildCode64 code rlo)
270 iselExpr64 (CmmMachOp (MO_U_Conv _ I64) [expr]) = do
272 r_dst_lo <- getNewRegNat I32
273 let r_dst_hi = getHiVRegFromLo r_dst_lo
276 ChildCode64 (code `snocOL`
277 MOV I32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
282 = pprPanic "iselExpr64(i386)" (ppr expr)
284 #endif /* i386_TARGET_ARCH */
286 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
288 #if sparc_TARGET_ARCH
290 assignMem_I64Code addrTree valueTree = do
291 Amode addr addr_code <- getAmode addrTree
292 ChildCode64 vcode rlo <- iselExpr64 valueTree
293 (src, code) <- getSomeReg addrTree
295 rhi = getHiVRegFromLo rlo
297 mov_hi = ST I32 rhi (AddrRegImm src (ImmInt 0))
298 mov_lo = ST I32 rlo (AddrRegImm src (ImmInt 4))
299 return (vcode `appOL` code `snocOL` mov_hi `snocOL` mov_lo)
301 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
302 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
304 r_dst_lo = mkVReg u_dst pk
305 r_dst_hi = getHiVRegFromLo r_dst_lo
306 r_src_hi = getHiVRegFromLo r_src_lo
307 mov_lo = mkMOV r_src_lo r_dst_lo
308 mov_hi = mkMOV r_src_hi r_dst_hi
309 mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
310 return (vcode `snocOL` mov_hi `snocOL` mov_lo)
311 assignReg_I64Code lvalue valueTree
312 = panic "assignReg_I64Code(sparc): invalid lvalue"
315 -- Don't delete this -- it's very handy for debugging.
317 -- | trace ("iselExpr64: " ++ showSDoc (ppr expr)) False
318 -- = panic "iselExpr64(???)"
320 iselExpr64 (CmmLoad addrTree I64) = do
321 Amode (AddrRegReg r1 r2) addr_code <- getAmode addrTree
322 rlo <- getNewRegNat I32
323 let rhi = getHiVRegFromLo rlo
324 mov_hi = LD I32 (AddrRegImm r1 (ImmInt 0)) rhi
325 mov_lo = LD I32 (AddrRegImm r1 (ImmInt 4)) rlo
327 ChildCode64 (addr_code `snocOL` mov_hi `snocOL` mov_lo)
331 iselExpr64 (CmmReg (CmmLocal (LocalReg uq I64))) = do
332 r_dst_lo <- getNewRegNat I32
333 let r_dst_hi = getHiVRegFromLo r_dst_lo
334 r_src_lo = mkVReg uq I32
335 r_src_hi = getHiVRegFromLo r_src_lo
336 mov_lo = mkMOV r_src_lo r_dst_lo
337 mov_hi = mkMOV r_src_hi r_dst_hi
338 mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
340 ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo
344 = pprPanic "iselExpr64(sparc)" (ppr expr)
346 #endif /* sparc_TARGET_ARCH */
348 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
350 #if powerpc_TARGET_ARCH
352 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
353 getI64Amodes addrTree = do
354 Amode hi_addr addr_code <- getAmode addrTree
355 case addrOffset hi_addr 4 of
356 Just lo_addr -> return (hi_addr, lo_addr, addr_code)
357 Nothing -> do (hi_ptr, code) <- getSomeReg addrTree
358 return (AddrRegImm hi_ptr (ImmInt 0),
359 AddrRegImm hi_ptr (ImmInt 4),
362 assignMem_I64Code addrTree valueTree = do
363 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
364 ChildCode64 vcode rlo <- iselExpr64 valueTree
366 rhi = getHiVRegFromLo rlo
369 mov_hi = ST I32 rhi hi_addr
370 mov_lo = ST I32 rlo lo_addr
372 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
374 assignReg_I64Code (CmmLocal (LocalReg u_dst pk _)) valueTree = do
375 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
377 r_dst_lo = mkVReg u_dst I32
378 r_dst_hi = getHiVRegFromLo r_dst_lo
379 r_src_hi = getHiVRegFromLo r_src_lo
380 mov_lo = MR r_dst_lo r_src_lo
381 mov_hi = MR r_dst_hi r_src_hi
384 vcode `snocOL` mov_lo `snocOL` mov_hi
387 assignReg_I64Code lvalue valueTree
388 = panic "assignReg_I64Code(powerpc): invalid lvalue"
391 -- Don't delete this -- it's very handy for debugging.
393 -- | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
394 -- = panic "iselExpr64(???)"
396 iselExpr64 (CmmLoad addrTree I64) = do
397 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
398 (rlo, rhi) <- getNewRegPairNat I32
399 let mov_hi = LD I32 rhi hi_addr
400 mov_lo = LD I32 rlo lo_addr
401 return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
404 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64 _)))
405 = return (ChildCode64 nilOL (mkVReg vu I32))
407 iselExpr64 (CmmLit (CmmInt i _)) = do
408 (rlo,rhi) <- getNewRegPairNat I32
410 half0 = fromIntegral (fromIntegral i :: Word16)
411 half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
412 half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
413 half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
416 LIS rlo (ImmInt half1),
417 OR rlo rlo (RIImm $ ImmInt half0),
418 LIS rhi (ImmInt half3),
419 OR rlo rlo (RIImm $ ImmInt half2)
422 return (ChildCode64 code rlo)
424 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
425 ChildCode64 code1 r1lo <- iselExpr64 e1
426 ChildCode64 code2 r2lo <- iselExpr64 e2
427 (rlo,rhi) <- getNewRegPairNat I32
429 r1hi = getHiVRegFromLo r1lo
430 r2hi = getHiVRegFromLo r2lo
433 toOL [ ADDC rlo r1lo r2lo,
436 return (ChildCode64 code rlo)
438 iselExpr64 (CmmMachOp (MO_U_Conv I32 I64) [expr]) = do
439 (expr_reg,expr_code) <- getSomeReg expr
440 (rlo, rhi) <- getNewRegPairNat I32
441 let mov_hi = LI rhi (ImmInt 0)
442 mov_lo = MR rlo expr_reg
443 return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
446 = pprPanic "iselExpr64(powerpc)" (ppr expr)
448 #endif /* powerpc_TARGET_ARCH */
451 -- -----------------------------------------------------------------------------
452 -- The 'Register' type
454 -- 'Register's passed up the tree. If the stix code forces the register
455 -- to live in a pre-decided machine register, it comes out as @Fixed@;
456 -- otherwise, it comes out as @Any@, and the parent can decide which
457 -- register to put it in.
460 = Fixed MachRep Reg InstrBlock
461 | Any MachRep (Reg -> InstrBlock)
463 swizzleRegisterRep :: Register -> MachRep -> Register
464 swizzleRegisterRep (Fixed _ reg code) rep = Fixed rep reg code
465 swizzleRegisterRep (Any _ codefn) rep = Any rep codefn
468 -- -----------------------------------------------------------------------------
469 -- Utils based on getRegister, below
471 -- The dual to getAnyReg: compute an expression into a register, but
472 -- we don't mind which one it is.
473 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
475 r <- getRegister expr
478 tmp <- getNewRegNat rep
479 return (tmp, code tmp)
483 -- -----------------------------------------------------------------------------
484 -- Grab the Reg for a CmmReg
486 getRegisterReg :: CmmReg -> Reg
488 getRegisterReg (CmmLocal (LocalReg u pk _))
491 getRegisterReg (CmmGlobal mid)
492 = case get_GlobalReg_reg_or_addr mid of
493 Left (RealReg rrno) -> RealReg rrno
494 _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
495 -- By this stage, the only MagicIds remaining should be the
496 -- ones which map to a real machine register on this
497 -- platform. Hence ...
500 -- -----------------------------------------------------------------------------
501 -- Generate code to get a subtree into a Register
503 -- Don't delete this -- it's very handy for debugging.
505 -- | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False
506 -- = panic "getRegister(???)"
508 getRegister :: CmmExpr -> NatM Register
510 #if !x86_64_TARGET_ARCH
511 -- on x86_64, we have %rip for PicBaseReg, but it's not a full-featured
512 -- register, it can only be used for rip-relative addressing.
513 getRegister (CmmReg (CmmGlobal PicBaseReg))
515 reg <- getPicBaseNat wordRep
516 return (Fixed wordRep reg nilOL)
519 getRegister (CmmReg reg)
520 = return (Fixed (cmmRegRep reg) (getRegisterReg reg) nilOL)
522 getRegister tree@(CmmRegOff _ _)
523 = getRegister (mangleIndexTree tree)
526 #if WORD_SIZE_IN_BITS==32
527 -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
528 -- TO_W_(x), TO_W_(x >> 32)
530 getRegister (CmmMachOp (MO_U_Conv I64 I32)
531 [CmmMachOp (MO_U_Shr I64) [x,CmmLit (CmmInt 32 _)]]) = do
532 ChildCode64 code rlo <- iselExpr64 x
533 return $ Fixed I32 (getHiVRegFromLo rlo) code
535 getRegister (CmmMachOp (MO_S_Conv I64 I32)
536 [CmmMachOp (MO_U_Shr I64) [x,CmmLit (CmmInt 32 _)]]) = do
537 ChildCode64 code rlo <- iselExpr64 x
538 return $ Fixed I32 (getHiVRegFromLo rlo) code
540 getRegister (CmmMachOp (MO_U_Conv I64 I32) [x]) = do
541 ChildCode64 code rlo <- iselExpr64 x
542 return $ Fixed I32 rlo code
544 getRegister (CmmMachOp (MO_S_Conv I64 I32) [x]) = do
545 ChildCode64 code rlo <- iselExpr64 x
546 return $ Fixed I32 rlo code
550 -- end of machine-"independent" bit; here we go on the rest...
552 #if alpha_TARGET_ARCH
554 getRegister (StDouble d)
555 = getBlockIdNat `thenNat` \ lbl ->
556 getNewRegNat PtrRep `thenNat` \ tmp ->
557 let code dst = mkSeqInstrs [
558 LDATA RoDataSegment lbl [
559 DATA TF [ImmLab (rational d)]
561 LDA tmp (AddrImm (ImmCLbl lbl)),
562 LD TF dst (AddrReg tmp)]
564 return (Any F64 code)
566 getRegister (StPrim primop [x]) -- unary PrimOps
568 IntNegOp -> trivialUCode (NEG Q False) x
570 NotOp -> trivialUCode NOT x
572 FloatNegOp -> trivialUFCode FloatRep (FNEG TF) x
573 DoubleNegOp -> trivialUFCode F64 (FNEG TF) x
575 OrdOp -> coerceIntCode IntRep x
578 Float2IntOp -> coerceFP2Int x
579 Int2FloatOp -> coerceInt2FP pr x
580 Double2IntOp -> coerceFP2Int x
581 Int2DoubleOp -> coerceInt2FP pr x
583 Double2FloatOp -> coerceFltCode x
584 Float2DoubleOp -> coerceFltCode x
586 other_op -> getRegister (StCall fn CCallConv F64 [x])
588 fn = case other_op of
589 FloatExpOp -> FSLIT("exp")
590 FloatLogOp -> FSLIT("log")
591 FloatSqrtOp -> FSLIT("sqrt")
592 FloatSinOp -> FSLIT("sin")
593 FloatCosOp -> FSLIT("cos")
594 FloatTanOp -> FSLIT("tan")
595 FloatAsinOp -> FSLIT("asin")
596 FloatAcosOp -> FSLIT("acos")
597 FloatAtanOp -> FSLIT("atan")
598 FloatSinhOp -> FSLIT("sinh")
599 FloatCoshOp -> FSLIT("cosh")
600 FloatTanhOp -> FSLIT("tanh")
601 DoubleExpOp -> FSLIT("exp")
602 DoubleLogOp -> FSLIT("log")
603 DoubleSqrtOp -> FSLIT("sqrt")
604 DoubleSinOp -> FSLIT("sin")
605 DoubleCosOp -> FSLIT("cos")
606 DoubleTanOp -> FSLIT("tan")
607 DoubleAsinOp -> FSLIT("asin")
608 DoubleAcosOp -> FSLIT("acos")
609 DoubleAtanOp -> FSLIT("atan")
610 DoubleSinhOp -> FSLIT("sinh")
611 DoubleCoshOp -> FSLIT("cosh")
612 DoubleTanhOp -> FSLIT("tanh")
614 pr = panic "MachCode.getRegister: no primrep needed for Alpha"
616 getRegister (StPrim primop [x, y]) -- dyadic PrimOps
618 CharGtOp -> trivialCode (CMP LTT) y x
619 CharGeOp -> trivialCode (CMP LE) y x
620 CharEqOp -> trivialCode (CMP EQQ) x y
621 CharNeOp -> int_NE_code x y
622 CharLtOp -> trivialCode (CMP LTT) x y
623 CharLeOp -> trivialCode (CMP LE) x y
625 IntGtOp -> trivialCode (CMP LTT) y x
626 IntGeOp -> trivialCode (CMP LE) y x
627 IntEqOp -> trivialCode (CMP EQQ) x y
628 IntNeOp -> int_NE_code x y
629 IntLtOp -> trivialCode (CMP LTT) x y
630 IntLeOp -> trivialCode (CMP LE) x y
632 WordGtOp -> trivialCode (CMP ULT) y x
633 WordGeOp -> trivialCode (CMP ULE) x y
634 WordEqOp -> trivialCode (CMP EQQ) x y
635 WordNeOp -> int_NE_code x y
636 WordLtOp -> trivialCode (CMP ULT) x y
637 WordLeOp -> trivialCode (CMP ULE) x y
639 AddrGtOp -> trivialCode (CMP ULT) y x
640 AddrGeOp -> trivialCode (CMP ULE) y x
641 AddrEqOp -> trivialCode (CMP EQQ) x y
642 AddrNeOp -> int_NE_code x y
643 AddrLtOp -> trivialCode (CMP ULT) x y
644 AddrLeOp -> trivialCode (CMP ULE) x y
646 FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
647 FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
648 FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
649 FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
650 FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
651 FloatLeOp -> cmpF_code (FCMP TF LE) NE x y
653 DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
654 DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
655 DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
656 DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
657 DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
658 DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y
660 IntAddOp -> trivialCode (ADD Q False) x y
661 IntSubOp -> trivialCode (SUB Q False) x y
662 IntMulOp -> trivialCode (MUL Q False) x y
663 IntQuotOp -> trivialCode (DIV Q False) x y
664 IntRemOp -> trivialCode (REM Q False) x y
666 WordAddOp -> trivialCode (ADD Q False) x y
667 WordSubOp -> trivialCode (SUB Q False) x y
668 WordMulOp -> trivialCode (MUL Q False) x y
669 WordQuotOp -> trivialCode (DIV Q True) x y
670 WordRemOp -> trivialCode (REM Q True) x y
672 FloatAddOp -> trivialFCode FloatRep (FADD TF) x y
673 FloatSubOp -> trivialFCode FloatRep (FSUB TF) x y
674 FloatMulOp -> trivialFCode FloatRep (FMUL TF) x y
675 FloatDivOp -> trivialFCode FloatRep (FDIV TF) x y
677 DoubleAddOp -> trivialFCode F64 (FADD TF) x y
678 DoubleSubOp -> trivialFCode F64 (FSUB TF) x y
679 DoubleMulOp -> trivialFCode F64 (FMUL TF) x y
680 DoubleDivOp -> trivialFCode F64 (FDIV TF) x y
682 AddrAddOp -> trivialCode (ADD Q False) x y
683 AddrSubOp -> trivialCode (SUB Q False) x y
684 AddrRemOp -> trivialCode (REM Q True) x y
686 AndOp -> trivialCode AND x y
687 OrOp -> trivialCode OR x y
688 XorOp -> trivialCode XOR x y
689 SllOp -> trivialCode SLL x y
690 SrlOp -> trivialCode SRL x y
692 ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
693 ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
694 ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"
696 FloatPowerOp -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
697 DoublePowerOp -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
699 {- ------------------------------------------------------------
700 Some bizarre special code for getting condition codes into
701 registers. Integer non-equality is a test for equality
702 followed by an XOR with 1. (Integer comparisons always set
703 the result register to 0 or 1.) Floating point comparisons of
704 any kind leave the result in a floating point register, so we
705 need to wrangle an integer register out of things.
707 int_NE_code :: StixTree -> StixTree -> NatM Register
710 = trivialCode (CMP EQQ) x y `thenNat` \ register ->
711 getNewRegNat IntRep `thenNat` \ tmp ->
713 code = registerCode register tmp
714 src = registerName register tmp
715 code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
717 return (Any IntRep code__2)
719 {- ------------------------------------------------------------
720 Comments for int_NE_code also apply to cmpF_code
723 :: (Reg -> Reg -> Reg -> Instr)
725 -> StixTree -> StixTree
728 cmpF_code instr cond x y
729 = trivialFCode pr instr x y `thenNat` \ register ->
730 getNewRegNat F64 `thenNat` \ tmp ->
731 getBlockIdNat `thenNat` \ lbl ->
733 code = registerCode register tmp
734 result = registerName register tmp
736 code__2 dst = code . mkSeqInstrs [
737 OR zeroh (RIImm (ImmInt 1)) dst,
738 BF cond result (ImmCLbl lbl),
739 OR zeroh (RIReg zeroh) dst,
742 return (Any IntRep code__2)
744 pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
745 ------------------------------------------------------------
747 getRegister (CmmLoad pk mem)
748 = getAmode mem `thenNat` \ amode ->
750 code = amodeCode amode
751 src = amodeAddr amode
752 size = primRepToSize pk
753 code__2 dst = code . mkSeqInstr (LD size dst src)
755 return (Any pk code__2)
757 getRegister (StInt i)
760 code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
762 return (Any IntRep code)
765 code dst = mkSeqInstr (LDI Q dst src)
767 return (Any IntRep code)
769 src = ImmInt (fromInteger i)
774 code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
776 return (Any PtrRep code)
779 imm__2 = case imm of Just x -> x
781 #endif /* alpha_TARGET_ARCH */
783 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
787 getRegister (CmmLit (CmmFloat f F32)) = do
788 lbl <- getNewLabelNat
789 dflags <- getDynFlagsNat
790 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
791 Amode addr addr_code <- getAmode dynRef
795 CmmStaticLit (CmmFloat f F32)]
796 `consOL` (addr_code `snocOL`
799 return (Any F32 code)
802 getRegister (CmmLit (CmmFloat d F64))
804 = let code dst = unitOL (GLDZ dst)
805 in return (Any F64 code)
808 = let code dst = unitOL (GLD1 dst)
809 in return (Any F64 code)
812 lbl <- getNewLabelNat
813 dflags <- getDynFlagsNat
814 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
815 Amode addr addr_code <- getAmode dynRef
819 CmmStaticLit (CmmFloat d F64)]
820 `consOL` (addr_code `snocOL`
823 return (Any F64 code)
825 #endif /* i386_TARGET_ARCH */
827 #if x86_64_TARGET_ARCH
829 getRegister (CmmLit (CmmFloat 0.0 rep)) = do
830 let code dst = unitOL (XOR rep (OpReg dst) (OpReg dst))
831 -- I don't know why there are xorpd, xorps, and pxor instructions.
832 -- They all appear to do the same thing --SDM
833 return (Any rep code)
835 getRegister (CmmLit (CmmFloat f rep)) = do
836 lbl <- getNewLabelNat
837 let code dst = toOL [
840 CmmStaticLit (CmmFloat f rep)],
841 MOV rep (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
844 return (Any rep code)
846 #endif /* x86_64_TARGET_ARCH */
848 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
850 -- catch simple cases of zero- or sign-extended load
851 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad addr _]) = do
852 code <- intLoadCode (MOVZxL I8) addr
853 return (Any I32 code)
855 getRegister (CmmMachOp (MO_S_Conv I8 I32) [CmmLoad addr _]) = do
856 code <- intLoadCode (MOVSxL I8) addr
857 return (Any I32 code)
859 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad addr _]) = do
860 code <- intLoadCode (MOVZxL I16) addr
861 return (Any I32 code)
863 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad addr _]) = do
864 code <- intLoadCode (MOVSxL I16) addr
865 return (Any I32 code)
869 #if x86_64_TARGET_ARCH
871 -- catch simple cases of zero- or sign-extended load
872 getRegister (CmmMachOp (MO_U_Conv I8 I64) [CmmLoad addr _]) = do
873 code <- intLoadCode (MOVZxL I8) addr
874 return (Any I64 code)
876 getRegister (CmmMachOp (MO_S_Conv I8 I64) [CmmLoad addr _]) = do
877 code <- intLoadCode (MOVSxL I8) addr
878 return (Any I64 code)
880 getRegister (CmmMachOp (MO_U_Conv I16 I64) [CmmLoad addr _]) = do
881 code <- intLoadCode (MOVZxL I16) addr
882 return (Any I64 code)
884 getRegister (CmmMachOp (MO_S_Conv I16 I64) [CmmLoad addr _]) = do
885 code <- intLoadCode (MOVSxL I16) addr
886 return (Any I64 code)
888 getRegister (CmmMachOp (MO_U_Conv I32 I64) [CmmLoad addr _]) = do
889 code <- intLoadCode (MOV I32) addr -- 32-bit loads zero-extend
890 return (Any I64 code)
892 getRegister (CmmMachOp (MO_S_Conv I32 I64) [CmmLoad addr _]) = do
893 code <- intLoadCode (MOVSxL I32) addr
894 return (Any I64 code)
898 #if x86_64_TARGET_ARCH
899 getRegister (CmmMachOp (MO_Add I64) [CmmReg (CmmGlobal PicBaseReg),
900 CmmLit displacement])
901 = return $ Any I64 (\dst -> unitOL $
902 LEA I64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
905 #if x86_64_TARGET_ARCH
906 getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do
907 x_code <- getAnyReg x
908 lbl <- getNewLabelNat
910 code dst = x_code dst `appOL` toOL [
911 -- This is how gcc does it, so it can't be that bad:
912 LDATA ReadOnlyData16 [
915 CmmStaticLit (CmmInt 0x80000000 I32),
916 CmmStaticLit (CmmInt 0 I32),
917 CmmStaticLit (CmmInt 0 I32),
918 CmmStaticLit (CmmInt 0 I32)
920 XOR F32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
921 -- xorps, so we need the 128-bit constant
922 -- ToDo: rip-relative
925 return (Any F32 code)
927 getRegister (CmmMachOp (MO_S_Neg F64) [x]) = do
928 x_code <- getAnyReg x
929 lbl <- getNewLabelNat
931 -- This is how gcc does it, so it can't be that bad:
932 code dst = x_code dst `appOL` toOL [
933 LDATA ReadOnlyData16 [
936 CmmStaticLit (CmmInt 0x8000000000000000 I64),
937 CmmStaticLit (CmmInt 0 I64)
939 -- gcc puts an unpck here. Wonder if we need it.
940 XOR F64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
941 -- xorpd, so we need the 128-bit constant
944 return (Any F64 code)
947 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
949 getRegister (CmmMachOp mop [x]) -- unary MachOps
952 MO_S_Neg F32 -> trivialUFCode F32 (GNEG F32) x
953 MO_S_Neg F64 -> trivialUFCode F64 (GNEG F64) x
956 MO_S_Neg rep -> trivialUCode rep (NEGI rep) x
957 MO_Not rep -> trivialUCode rep (NOT rep) x
960 MO_U_Conv I32 I8 -> toI8Reg I32 x
961 MO_S_Conv I32 I8 -> toI8Reg I32 x
962 MO_U_Conv I16 I8 -> toI8Reg I16 x
963 MO_S_Conv I16 I8 -> toI8Reg I16 x
964 MO_U_Conv I32 I16 -> toI16Reg I32 x
965 MO_S_Conv I32 I16 -> toI16Reg I32 x
966 #if x86_64_TARGET_ARCH
967 MO_U_Conv I64 I32 -> conversionNop I64 x
968 MO_S_Conv I64 I32 -> conversionNop I64 x
969 MO_U_Conv I64 I16 -> toI16Reg I64 x
970 MO_S_Conv I64 I16 -> toI16Reg I64 x
971 MO_U_Conv I64 I8 -> toI8Reg I64 x
972 MO_S_Conv I64 I8 -> toI8Reg I64 x
975 MO_U_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
976 MO_S_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
979 MO_U_Conv I8 I32 -> integerExtend I8 I32 MOVZxL x
980 MO_U_Conv I16 I32 -> integerExtend I16 I32 MOVZxL x
981 MO_U_Conv I8 I16 -> integerExtend I8 I16 MOVZxL x
983 MO_S_Conv I8 I32 -> integerExtend I8 I32 MOVSxL x
984 MO_S_Conv I16 I32 -> integerExtend I16 I32 MOVSxL x
985 MO_S_Conv I8 I16 -> integerExtend I8 I16 MOVSxL x
987 #if x86_64_TARGET_ARCH
988 MO_U_Conv I8 I64 -> integerExtend I8 I64 MOVZxL x
989 MO_U_Conv I16 I64 -> integerExtend I16 I64 MOVZxL x
990 MO_U_Conv I32 I64 -> integerExtend I32 I64 MOVZxL x
991 MO_S_Conv I8 I64 -> integerExtend I8 I64 MOVSxL x
992 MO_S_Conv I16 I64 -> integerExtend I16 I64 MOVSxL x
993 MO_S_Conv I32 I64 -> integerExtend I32 I64 MOVSxL x
994 -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
995 -- However, we don't want the register allocator to throw it
996 -- away as an unnecessary reg-to-reg move, so we keep it in
997 -- the form of a movzl and print it as a movl later.
1000 #if i386_TARGET_ARCH
1001 MO_S_Conv F32 F64 -> conversionNop F64 x
1002 MO_S_Conv F64 F32 -> conversionNop F32 x
1004 MO_S_Conv F32 F64 -> coerceFP2FP F64 x
1005 MO_S_Conv F64 F32 -> coerceFP2FP F32 x
1009 | isFloatingRep from -> coerceFP2Int from to x
1010 | isFloatingRep to -> coerceInt2FP from to x
1012 other -> pprPanic "getRegister" (pprMachOp mop)
1014 -- signed or unsigned extension.
1015 integerExtend from to instr expr = do
1016 (reg,e_code) <- if from == I8 then getByteReg expr
1017 else getSomeReg expr
1021 instr from (OpReg reg) (OpReg dst)
1022 return (Any to code)
1024 toI8Reg new_rep expr
1025 = do codefn <- getAnyReg expr
1026 return (Any new_rep codefn)
1027 -- HACK: use getAnyReg to get a byte-addressable register.
1028 -- If the source was a Fixed register, this will add the
1029 -- mov instruction to put it into the desired destination.
1030 -- We're assuming that the destination won't be a fixed
1031 -- non-byte-addressable register; it won't be, because all
1032 -- fixed registers are word-sized.
1034 toI16Reg = toI8Reg -- for now
1036 conversionNop new_rep expr
1037 = do e_code <- getRegister expr
1038 return (swizzleRegisterRep e_code new_rep)
1041 getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
1042 = ASSERT2(cmmExprRep x /= I8, pprExpr e)
1044 MO_Eq F32 -> condFltReg EQQ x y
1045 MO_Ne F32 -> condFltReg NE x y
1046 MO_S_Gt F32 -> condFltReg GTT x y
1047 MO_S_Ge F32 -> condFltReg GE x y
1048 MO_S_Lt F32 -> condFltReg LTT x y
1049 MO_S_Le F32 -> condFltReg LE x y
1051 MO_Eq F64 -> condFltReg EQQ x y
1052 MO_Ne F64 -> condFltReg NE x y
1053 MO_S_Gt F64 -> condFltReg GTT x y
1054 MO_S_Ge F64 -> condFltReg GE x y
1055 MO_S_Lt F64 -> condFltReg LTT x y
1056 MO_S_Le F64 -> condFltReg LE x y
1058 MO_Eq rep -> condIntReg EQQ x y
1059 MO_Ne rep -> condIntReg NE x y
1061 MO_S_Gt rep -> condIntReg GTT x y
1062 MO_S_Ge rep -> condIntReg GE x y
1063 MO_S_Lt rep -> condIntReg LTT x y
1064 MO_S_Le rep -> condIntReg LE x y
1066 MO_U_Gt rep -> condIntReg GU x y
1067 MO_U_Ge rep -> condIntReg GEU x y
1068 MO_U_Lt rep -> condIntReg LU x y
1069 MO_U_Le rep -> condIntReg LEU x y
1071 #if i386_TARGET_ARCH
1072 MO_Add F32 -> trivialFCode F32 GADD x y
1073 MO_Sub F32 -> trivialFCode F32 GSUB x y
1075 MO_Add F64 -> trivialFCode F64 GADD x y
1076 MO_Sub F64 -> trivialFCode F64 GSUB x y
1078 MO_S_Quot F32 -> trivialFCode F32 GDIV x y
1079 MO_S_Quot F64 -> trivialFCode F64 GDIV x y
1082 #if x86_64_TARGET_ARCH
1083 MO_Add F32 -> trivialFCode F32 ADD x y
1084 MO_Sub F32 -> trivialFCode F32 SUB x y
1086 MO_Add F64 -> trivialFCode F64 ADD x y
1087 MO_Sub F64 -> trivialFCode F64 SUB x y
1089 MO_S_Quot F32 -> trivialFCode F32 FDIV x y
1090 MO_S_Quot F64 -> trivialFCode F64 FDIV x y
1093 MO_Add rep -> add_code rep x y
1094 MO_Sub rep -> sub_code rep x y
1096 MO_S_Quot rep -> div_code rep True True x y
1097 MO_S_Rem rep -> div_code rep True False x y
1098 MO_U_Quot rep -> div_code rep False True x y
1099 MO_U_Rem rep -> div_code rep False False x y
1101 #if i386_TARGET_ARCH
1102 MO_Mul F32 -> trivialFCode F32 GMUL x y
1103 MO_Mul F64 -> trivialFCode F64 GMUL x y
1106 #if x86_64_TARGET_ARCH
1107 MO_Mul F32 -> trivialFCode F32 MUL x y
1108 MO_Mul F64 -> trivialFCode F64 MUL x y
1111 MO_Mul rep -> let op = IMUL rep in
1112 trivialCode rep op (Just op) x y
1114 MO_S_MulMayOflo rep -> imulMayOflo rep x y
1116 MO_And rep -> let op = AND rep in
1117 trivialCode rep op (Just op) x y
1118 MO_Or rep -> let op = OR rep in
1119 trivialCode rep op (Just op) x y
1120 MO_Xor rep -> let op = XOR rep in
1121 trivialCode rep op (Just op) x y
1123 {- Shift ops on x86s have constraints on their source, it
1124 either has to be Imm, CL or 1
1125 => trivialCode is not restrictive enough (sigh.)
1127 MO_Shl rep -> shift_code rep (SHL rep) x y {-False-}
1128 MO_U_Shr rep -> shift_code rep (SHR rep) x y {-False-}
1129 MO_S_Shr rep -> shift_code rep (SAR rep) x y {-False-}
1131 other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
1133 --------------------
1134 imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1135 imulMayOflo rep a b = do
1136 (a_reg, a_code) <- getNonClobberedReg a
1137 b_code <- getAnyReg b
1139 shift_amt = case rep of
1142 _ -> panic "shift_amt"
1144 code = a_code `appOL` b_code eax `appOL`
1146 IMUL2 rep (OpReg a_reg), -- result in %edx:%eax
1147 SAR rep (OpImm (ImmInt shift_amt)) (OpReg eax),
1148 -- sign extend lower part
1149 SUB rep (OpReg edx) (OpReg eax)
1150 -- compare against upper
1151 -- eax==0 if high part == sign extended low part
1154 return (Fixed rep eax code)
1156 --------------------
1157 shift_code :: MachRep
1158 -> (Operand -> Operand -> Instr)
1163 {- Case1: shift length as immediate -}
1164 shift_code rep instr x y@(CmmLit lit) = do
1165 x_code <- getAnyReg x
1168 = x_code dst `snocOL`
1169 instr (OpImm (litToImm lit)) (OpReg dst)
1171 return (Any rep code)
1173 {- Case2: shift length is complex (non-immediate)
1174 * y must go in %ecx.
1175 * we cannot do y first *and* put its result in %ecx, because
1176 %ecx might be clobbered by x.
1177 * if we do y second, then x cannot be
1178 in a clobbered reg. Also, we cannot clobber x's reg
1179 with the instruction itself.
1181 - do y first, put its result in a fresh tmp, then copy it to %ecx later
1182 - do y second and put its result into %ecx. x gets placed in a fresh
1183 tmp. This is likely to be better, becuase the reg alloc can
1184 eliminate this reg->reg move here (it won't eliminate the other one,
1185 because the move is into the fixed %ecx).
1187 shift_code rep instr x y{-amount-} = do
1188 x_code <- getAnyReg x
1189 tmp <- getNewRegNat rep
1190 y_code <- getAnyReg y
1192 code = x_code tmp `appOL`
1194 instr (OpReg ecx) (OpReg tmp)
1196 return (Fixed rep tmp code)
1198 --------------------
1199 add_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1200 add_code rep x (CmmLit (CmmInt y _))
1201 | not (is64BitInteger y) = add_int rep x y
1202 add_code rep x y = trivialCode rep (ADD rep) (Just (ADD rep)) x y
1204 --------------------
1205 sub_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1206 sub_code rep x (CmmLit (CmmInt y _))
1207 | not (is64BitInteger (-y)) = add_int rep x (-y)
1208 sub_code rep x y = trivialCode rep (SUB rep) Nothing x y
1210 -- our three-operand add instruction:
1211 add_int rep x y = do
1212 (x_reg, x_code) <- getSomeReg x
1214 imm = ImmInt (fromInteger y)
1218 (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
1221 return (Any rep code)
1223 ----------------------
1224 div_code rep signed quotient x y = do
1225 (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
1226 x_code <- getAnyReg x
1228 widen | signed = CLTD rep
1229 | otherwise = XOR rep (OpReg edx) (OpReg edx)
1231 instr | signed = IDIV
1234 code = y_code `appOL`
1236 toOL [widen, instr rep y_op]
1238 result | quotient = eax
1242 return (Fixed rep result code)
1245 getRegister (CmmLoad mem pk)
1248 Amode src mem_code <- getAmode mem
1250 code dst = mem_code `snocOL`
1251 IF_ARCH_i386(GLD pk src dst,
1252 MOV pk (OpAddr src) (OpReg dst))
1254 return (Any pk code)
1256 #if i386_TARGET_ARCH
1257 getRegister (CmmLoad mem pk)
1260 code <- intLoadCode (instr pk) mem
1261 return (Any pk code)
1263 instr I8 = MOVZxL pk
1266 -- we always zero-extend 8-bit loads, if we
1267 -- can't think of anything better. This is because
1268 -- we can't guarantee access to an 8-bit variant of every register
1269 -- (esi and edi don't have 8-bit variants), so to make things
1270 -- simpler we do our 8-bit arithmetic with full 32-bit registers.
1273 #if x86_64_TARGET_ARCH
1274 -- Simpler memory load code on x86_64
1275 getRegister (CmmLoad mem pk)
1277 code <- intLoadCode (MOV pk) mem
1278 return (Any pk code)
1281 getRegister (CmmLit (CmmInt 0 rep))
1283 -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
1284 adj_rep = case rep of I64 -> I32; _ -> rep
1285 rep1 = IF_ARCH_i386( rep, adj_rep )
1287 = unitOL (XOR rep1 (OpReg dst) (OpReg dst))
1289 return (Any rep code)
1291 #if x86_64_TARGET_ARCH
1292 -- optimisation for loading small literals on x86_64: take advantage
1293 -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
1294 -- instruction forms are shorter.
1295 getRegister (CmmLit lit)
1296 | I64 <- cmmLitRep lit, not (isBigLit lit)
1299 code dst = unitOL (MOV I32 (OpImm imm) (OpReg dst))
1301 return (Any I64 code)
1303 isBigLit (CmmInt i I64) = i < 0 || i > 0xffffffff
1305 -- note1: not the same as is64BitLit, because that checks for
1306 -- signed literals that fit in 32 bits, but we want unsigned
1308 -- note2: all labels are small, because we're assuming the
1309 -- small memory model (see gcc docs, -mcmodel=small).
1312 getRegister (CmmLit lit)
1316 code dst = unitOL (MOV rep (OpImm imm) (OpReg dst))
1318 return (Any rep code)
1320 getRegister other = pprPanic "getRegister(x86)" (ppr other)
1323 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
1324 -> NatM (Reg -> InstrBlock)
1325 intLoadCode instr mem = do
1326 Amode src mem_code <- getAmode mem
1327 return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
1329 -- Compute an expression into *any* register, adding the appropriate
1330 -- move instruction if necessary.
1331 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
1333 r <- getRegister expr
1336 anyReg :: Register -> NatM (Reg -> InstrBlock)
1337 anyReg (Any _ code) = return code
1338 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
1340 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
1341 -- Fixed registers might not be byte-addressable, so we make sure we've
1342 -- got a temporary, inserting an extra reg copy if necessary.
1343 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
1344 #if x86_64_TARGET_ARCH
1345 getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
1347 getByteReg expr = do
1348 r <- getRegister expr
1351 tmp <- getNewRegNat rep
1352 return (tmp, code tmp)
1354 | isVirtualReg reg -> return (reg,code)
1356 tmp <- getNewRegNat rep
1357 return (tmp, code `snocOL` reg2reg rep reg tmp)
1358 -- ToDo: could optimise slightly by checking for byte-addressable
1359 -- real registers, but that will happen very rarely if at all.
1362 -- Another variant: this time we want the result in a register that cannot
1363 -- be modified by code to evaluate an arbitrary expression.
1364 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1365 getNonClobberedReg expr = do
1366 r <- getRegister expr
1369 tmp <- getNewRegNat rep
1370 return (tmp, code tmp)
1372 -- only free regs can be clobbered
1373 | RealReg rr <- reg, isFastTrue (freeReg rr) -> do
1374 tmp <- getNewRegNat rep
1375 return (tmp, code `snocOL` reg2reg rep reg tmp)
1379 reg2reg :: MachRep -> Reg -> Reg -> Instr
1381 #if i386_TARGET_ARCH
1382 | isFloatingRep rep = GMOV src dst
1384 | otherwise = MOV rep (OpReg src) (OpReg dst)
1386 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1388 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1390 #if sparc_TARGET_ARCH
1392 getRegister (CmmLit (CmmFloat f F32)) = do
1393 lbl <- getNewLabelNat
1394 let code dst = toOL [
1397 CmmStaticLit (CmmFloat f F32)],
1398 SETHI (HI (ImmCLbl lbl)) dst,
1399 LD F32 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1400 return (Any F32 code)
1402 getRegister (CmmLit (CmmFloat d F64)) = do
1403 lbl <- getNewLabelNat
1404 let code dst = toOL [
1407 CmmStaticLit (CmmFloat d F64)],
1408 SETHI (HI (ImmCLbl lbl)) dst,
1409 LD F64 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1410 return (Any F64 code)
1412 getRegister (CmmMachOp mop [x]) -- unary MachOps
1414 MO_S_Neg F32 -> trivialUFCode F32 (FNEG F32) x
1415 MO_S_Neg F64 -> trivialUFCode F64 (FNEG F64) x
1417 MO_S_Neg rep -> trivialUCode rep (SUB False False g0) x
1418 MO_Not rep -> trivialUCode rep (XNOR False g0) x
1420 MO_U_Conv I32 I8 -> trivialCode I8 (AND False) x (CmmLit (CmmInt 255 I8))
1422 MO_U_Conv F64 F32-> coerceDbl2Flt x
1423 MO_U_Conv F32 F64-> coerceFlt2Dbl x
1425 MO_S_Conv F32 I32-> coerceFP2Int F32 I32 x
1426 MO_S_Conv I32 F32-> coerceInt2FP I32 F32 x
1427 MO_S_Conv F64 I32-> coerceFP2Int F64 I32 x
1428 MO_S_Conv I32 F64-> coerceInt2FP I32 F64 x
1430 -- Conversions which are a nop on sparc
1432 | from == to -> conversionNop to x
1433 MO_U_Conv I32 to -> conversionNop to x
1434 MO_S_Conv I32 to -> conversionNop to x
1437 MO_U_Conv I8 I32 -> integerExtend False I8 I32 x
1438 MO_U_Conv I16 I32 -> integerExtend False I16 I32 x
1439 MO_U_Conv I8 I16 -> integerExtend False I8 I16 x
1440 MO_S_Conv I16 I32 -> integerExtend True I16 I32 x
1442 other_op -> panic "Unknown unary mach op"
1445 integerExtend signed from to expr = do
1446 (reg, e_code) <- getSomeReg expr
1450 ((if signed then SRA else SRL)
1451 reg (RIImm (ImmInt 0)) dst)
1452 return (Any to code)
1453 conversionNop new_rep expr
1454 = do e_code <- getRegister expr
1455 return (swizzleRegisterRep e_code new_rep)
1457 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1459 MO_Eq F32 -> condFltReg EQQ x y
1460 MO_Ne F32 -> condFltReg NE x y
1462 MO_S_Gt F32 -> condFltReg GTT x y
1463 MO_S_Ge F32 -> condFltReg GE x y
1464 MO_S_Lt F32 -> condFltReg LTT x y
1465 MO_S_Le F32 -> condFltReg LE x y
1467 MO_Eq F64 -> condFltReg EQQ x y
1468 MO_Ne F64 -> condFltReg NE x y
1470 MO_S_Gt F64 -> condFltReg GTT x y
1471 MO_S_Ge F64 -> condFltReg GE x y
1472 MO_S_Lt F64 -> condFltReg LTT x y
1473 MO_S_Le F64 -> condFltReg LE x y
1475 MO_Eq rep -> condIntReg EQQ x y
1476 MO_Ne rep -> condIntReg NE x y
1478 MO_S_Gt rep -> condIntReg GTT x y
1479 MO_S_Ge rep -> condIntReg GE x y
1480 MO_S_Lt rep -> condIntReg LTT x y
1481 MO_S_Le rep -> condIntReg LE x y
1483 MO_U_Gt I32 -> condIntReg GTT x y
1484 MO_U_Ge I32 -> condIntReg GE x y
1485 MO_U_Lt I32 -> condIntReg LTT x y
1486 MO_U_Le I32 -> condIntReg LE x y
1488 MO_U_Gt I16 -> condIntReg GU x y
1489 MO_U_Ge I16 -> condIntReg GEU x y
1490 MO_U_Lt I16 -> condIntReg LU x y
1491 MO_U_Le I16 -> condIntReg LEU x y
1493 MO_Add I32 -> trivialCode I32 (ADD False False) x y
1494 MO_Sub I32 -> trivialCode I32 (SUB False False) x y
1496 MO_S_MulMayOflo rep -> imulMayOflo rep x y
1498 -- ToDo: teach about V8+ SPARC div instructions
1499 MO_S_Quot I32 -> idiv FSLIT(".div") x y
1500 MO_S_Rem I32 -> idiv FSLIT(".rem") x y
1501 MO_U_Quot I32 -> idiv FSLIT(".udiv") x y
1502 MO_U_Rem I32 -> idiv FSLIT(".urem") x y
1504 MO_Add F32 -> trivialFCode F32 FADD x y
1505 MO_Sub F32 -> trivialFCode F32 FSUB x y
1506 MO_Mul F32 -> trivialFCode F32 FMUL x y
1507 MO_S_Quot F32 -> trivialFCode F32 FDIV x y
1509 MO_Add F64 -> trivialFCode F64 FADD x y
1510 MO_Sub F64 -> trivialFCode F64 FSUB x y
1511 MO_Mul F64 -> trivialFCode F64 FMUL x y
1512 MO_S_Quot F64 -> trivialFCode F64 FDIV x y
1514 MO_And rep -> trivialCode rep (AND False) x y
1515 MO_Or rep -> trivialCode rep (OR False) x y
1516 MO_Xor rep -> trivialCode rep (XOR False) x y
1518 MO_Mul rep -> trivialCode rep (SMUL False) x y
1520 MO_Shl rep -> trivialCode rep SLL x y
1521 MO_U_Shr rep -> trivialCode rep SRL x y
1522 MO_S_Shr rep -> trivialCode rep SRA x y
1525 MO_F32_Pwr -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1526 [promote x, promote y])
1527 where promote x = CmmMachOp MO_F32_to_Dbl [x]
1528 MO_F64_Pwr -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1531 other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
1533 --idiv fn x y = getRegister (StCall (Left fn) CCallConv I32 [x, y])
1535 --------------------
1536 imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1537 imulMayOflo rep a b = do
1538 (a_reg, a_code) <- getSomeReg a
1539 (b_reg, b_code) <- getSomeReg b
1540 res_lo <- getNewRegNat I32
1541 res_hi <- getNewRegNat I32
1543 shift_amt = case rep of
1546 _ -> panic "shift_amt"
1547 code dst = a_code `appOL` b_code `appOL`
1549 SMUL False a_reg (RIReg b_reg) res_lo,
1551 SRA res_lo (RIImm (ImmInt shift_amt)) res_lo,
1552 SUB False False res_lo (RIReg res_hi) dst
1554 return (Any I32 code)
1556 getRegister (CmmLoad mem pk) = do
1557 Amode src code <- getAmode mem
1559 code__2 dst = code `snocOL` LD pk src dst
1560 return (Any pk code__2)
1562 getRegister (CmmLit (CmmInt i _))
1565 src = ImmInt (fromInteger i)
1566 code dst = unitOL (OR False g0 (RIImm src) dst)
1568 return (Any I32 code)
1570 getRegister (CmmLit lit)
1571 = let rep = cmmLitRep lit
1575 OR False dst (RIImm (LO imm)) dst]
1576 in return (Any I32 code)
1578 #endif /* sparc_TARGET_ARCH */
1580 #if powerpc_TARGET_ARCH
1581 getRegister (CmmLoad mem pk)
1584 Amode addr addr_code <- getAmode mem
1585 let code dst = ASSERT((regClass dst == RcDouble) == isFloatingRep pk)
1586 addr_code `snocOL` LD pk dst addr
1587 return (Any pk code)
1589 -- catch simple cases of zero- or sign-extended load
1590 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad mem _]) = do
1591 Amode addr addr_code <- getAmode mem
1592 return (Any I32 (\dst -> addr_code `snocOL` LD I8 dst addr))
1594 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
1596 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad mem _]) = do
1597 Amode addr addr_code <- getAmode mem
1598 return (Any I32 (\dst -> addr_code `snocOL` LD I16 dst addr))
1600 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad mem _]) = do
1601 Amode addr addr_code <- getAmode mem
1602 return (Any I32 (\dst -> addr_code `snocOL` LA I16 dst addr))
1604 getRegister (CmmMachOp mop [x]) -- unary MachOps
1606 MO_Not rep -> trivialUCode rep NOT x
1608 MO_S_Conv F64 F32 -> trivialUCode F32 FRSP x
1609 MO_S_Conv F32 F64 -> conversionNop F64 x
1612 | from == to -> conversionNop to x
1613 | isFloatingRep from -> coerceFP2Int from to x
1614 | isFloatingRep to -> coerceInt2FP from to x
1616 -- narrowing is a nop: we treat the high bits as undefined
1617 MO_S_Conv I32 to -> conversionNop to x
1618 MO_S_Conv I16 I8 -> conversionNop I8 x
1619 MO_S_Conv I8 to -> trivialUCode to (EXTS I8) x
1620 MO_S_Conv I16 to -> trivialUCode to (EXTS I16) x
1623 | from == to -> conversionNop to x
1624 -- narrowing is a nop: we treat the high bits as undefined
1625 MO_U_Conv I32 to -> conversionNop to x
1626 MO_U_Conv I16 I8 -> conversionNop I8 x
1627 MO_U_Conv I8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 I32))
1628 MO_U_Conv I16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 I32))
1630 MO_S_Neg F32 -> trivialUCode F32 FNEG x
1631 MO_S_Neg F64 -> trivialUCode F64 FNEG x
1632 MO_S_Neg rep -> trivialUCode rep NEG x
1635 conversionNop new_rep expr
1636 = do e_code <- getRegister expr
1637 return (swizzleRegisterRep e_code new_rep)
1639 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1641 MO_Eq F32 -> condFltReg EQQ x y
1642 MO_Ne F32 -> condFltReg NE x y
1644 MO_S_Gt F32 -> condFltReg GTT x y
1645 MO_S_Ge F32 -> condFltReg GE x y
1646 MO_S_Lt F32 -> condFltReg LTT x y
1647 MO_S_Le F32 -> condFltReg LE x y
1649 MO_Eq F64 -> condFltReg EQQ x y
1650 MO_Ne F64 -> condFltReg NE x y
1652 MO_S_Gt F64 -> condFltReg GTT x y
1653 MO_S_Ge F64 -> condFltReg GE x y
1654 MO_S_Lt F64 -> condFltReg LTT x y
1655 MO_S_Le F64 -> condFltReg LE x y
1657 MO_Eq rep -> condIntReg EQQ (extendUExpr rep x) (extendUExpr rep y)
1658 MO_Ne rep -> condIntReg NE (extendUExpr rep x) (extendUExpr rep y)
1660 MO_S_Gt rep -> condIntReg GTT (extendSExpr rep x) (extendSExpr rep y)
1661 MO_S_Ge rep -> condIntReg GE (extendSExpr rep x) (extendSExpr rep y)
1662 MO_S_Lt rep -> condIntReg LTT (extendSExpr rep x) (extendSExpr rep y)
1663 MO_S_Le rep -> condIntReg LE (extendSExpr rep x) (extendSExpr rep y)
1665 MO_U_Gt rep -> condIntReg GU (extendUExpr rep x) (extendUExpr rep y)
1666 MO_U_Ge rep -> condIntReg GEU (extendUExpr rep x) (extendUExpr rep y)
1667 MO_U_Lt rep -> condIntReg LU (extendUExpr rep x) (extendUExpr rep y)
1668 MO_U_Le rep -> condIntReg LEU (extendUExpr rep x) (extendUExpr rep y)
1670 MO_Add F32 -> trivialCodeNoImm F32 (FADD F32) x y
1671 MO_Sub F32 -> trivialCodeNoImm F32 (FSUB F32) x y
1672 MO_Mul F32 -> trivialCodeNoImm F32 (FMUL F32) x y
1673 MO_S_Quot F32 -> trivialCodeNoImm F32 (FDIV F32) x y
1675 MO_Add F64 -> trivialCodeNoImm F64 (FADD F64) x y
1676 MO_Sub F64 -> trivialCodeNoImm F64 (FSUB F64) x y
1677 MO_Mul F64 -> trivialCodeNoImm F64 (FMUL F64) x y
1678 MO_S_Quot F64 -> trivialCodeNoImm F64 (FDIV F64) x y
1680 -- optimize addition with 32-bit immediate
1684 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate I32 True (-imm)
1685 -> trivialCode I32 True ADD x (CmmLit $ CmmInt imm immrep)
1688 (src, srcCode) <- getSomeReg x
1689 let imm = litToImm lit
1690 code dst = srcCode `appOL` toOL [
1691 ADDIS dst src (HA imm),
1692 ADD dst dst (RIImm (LO imm))
1694 return (Any I32 code)
1695 _ -> trivialCode I32 True ADD x y
1697 MO_Add rep -> trivialCode rep True ADD x y
1699 case y of -- subfi ('substract from' with immediate) doesn't exist
1700 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
1701 -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
1702 _ -> trivialCodeNoImm rep SUBF y x
1704 MO_Mul rep -> trivialCode rep True MULLW x y
1706 MO_S_MulMayOflo I32 -> trivialCodeNoImm I32 MULLW_MayOflo x y
1708 MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= I32): not implemented"
1709 MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
1711 MO_S_Quot rep -> trivialCodeNoImm rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1712 MO_U_Quot rep -> trivialCodeNoImm rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1714 MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1715 MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1717 MO_And rep -> trivialCode rep False AND x y
1718 MO_Or rep -> trivialCode rep False OR x y
1719 MO_Xor rep -> trivialCode rep False XOR x y
1721 MO_Shl rep -> trivialCode rep False SLW x y
1722 MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
1723 MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
1725 getRegister (CmmLit (CmmInt i rep))
1726 | Just imm <- makeImmediate rep True i
1728 code dst = unitOL (LI dst imm)
1730 return (Any rep code)
1732 getRegister (CmmLit (CmmFloat f frep)) = do
1733 lbl <- getNewLabelNat
1734 dflags <- getDynFlagsNat
1735 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
1736 Amode addr addr_code <- getAmode dynRef
1738 LDATA ReadOnlyData [CmmDataLabel lbl,
1739 CmmStaticLit (CmmFloat f frep)]
1740 `consOL` (addr_code `snocOL` LD frep dst addr)
1741 return (Any frep code)
1743 getRegister (CmmLit lit)
1744 = let rep = cmmLitRep lit
1748 OR dst dst (RIImm (LO imm))
1750 in return (Any rep code)
1752 getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
1754 -- extend?Rep: wrap integer expression of type rep
1755 -- in a conversion to I32
1756 extendSExpr I32 x = x
1757 extendSExpr rep x = CmmMachOp (MO_S_Conv rep I32) [x]
1758 extendUExpr I32 x = x
1759 extendUExpr rep x = CmmMachOp (MO_U_Conv rep I32) [x]
1761 #endif /* powerpc_TARGET_ARCH */
1764 -- -----------------------------------------------------------------------------
1765 -- The 'Amode' type: Memory addressing modes passed up the tree.
1767 data Amode = Amode AddrMode InstrBlock
1770 Now, given a tree (the argument to an CmmLoad) that references memory,
1771 produce a suitable addressing mode.
1773 A Rule of the Game (tm) for Amodes: use of the addr bit must
1774 immediately follow use of the code part, since the code part puts
1775 values in registers which the addr then refers to. So you can't put
1776 anything in between, lest it overwrite some of those registers. If
1777 you need to do some other computation between the code part and use of
1778 the addr bit, first store the effective address from the amode in a
1779 temporary, then do the other computation, and then use the temporary:
1783 ... other computation ...
1787 getAmode :: CmmExpr -> NatM Amode
1788 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
1790 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1792 #if alpha_TARGET_ARCH
1794 getAmode (StPrim IntSubOp [x, StInt i])
1795 = getNewRegNat PtrRep `thenNat` \ tmp ->
1796 getRegister x `thenNat` \ register ->
1798 code = registerCode register tmp
1799 reg = registerName register tmp
1800 off = ImmInt (-(fromInteger i))
1802 return (Amode (AddrRegImm reg off) code)
1804 getAmode (StPrim IntAddOp [x, StInt i])
1805 = getNewRegNat PtrRep `thenNat` \ tmp ->
1806 getRegister x `thenNat` \ register ->
1808 code = registerCode register tmp
1809 reg = registerName register tmp
1810 off = ImmInt (fromInteger i)
1812 return (Amode (AddrRegImm reg off) code)
1816 = return (Amode (AddrImm imm__2) id)
1819 imm__2 = case imm of Just x -> x
1822 = getNewRegNat PtrRep `thenNat` \ tmp ->
1823 getRegister other `thenNat` \ register ->
1825 code = registerCode register tmp
1826 reg = registerName register tmp
1828 return (Amode (AddrReg reg) code)
1830 #endif /* alpha_TARGET_ARCH */
1832 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1834 #if x86_64_TARGET_ARCH
1836 getAmode (CmmMachOp (MO_Add I64) [CmmReg (CmmGlobal PicBaseReg),
1837 CmmLit displacement])
1838 = return $ Amode (ripRel (litToImm displacement)) nilOL
1842 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1844 -- This is all just ridiculous, since it carefully undoes
1845 -- what mangleIndexTree has just done.
1846 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
1847 | not (is64BitLit lit)
1848 -- ASSERT(rep == I32)???
1849 = do (x_reg, x_code) <- getSomeReg x
1850 let off = ImmInt (-(fromInteger i))
1851 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1853 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
1854 | not (is64BitLit lit)
1855 -- ASSERT(rep == I32)???
1856 = do (x_reg, x_code) <- getSomeReg x
1857 let off = ImmInt (fromInteger i)
1858 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1860 -- Turn (lit1 << n + lit2) into (lit2 + lit1 << n) so it will be
1861 -- recognised by the next rule.
1862 getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1864 = getAmode (CmmMachOp (MO_Add rep) [b,a])
1866 getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
1867 [y, CmmLit (CmmInt shift _)]])
1868 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1869 = x86_complex_amode x y shift 0
1871 getAmode (CmmMachOp (MO_Add rep)
1872 [x, CmmMachOp (MO_Add _)
1873 [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)],
1874 CmmLit (CmmInt offset _)]])
1875 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1876 && not (is64BitInteger offset)
1877 = x86_complex_amode x y shift offset
1879 getAmode (CmmMachOp (MO_Add rep) [x,y])
1880 = x86_complex_amode x y 0 0
1882 getAmode (CmmLit lit) | not (is64BitLit lit)
1883 = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1886 (reg,code) <- getSomeReg expr
1887 return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
1890 x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
1891 x86_complex_amode base index shift offset
1892 = do (x_reg, x_code) <- getNonClobberedReg base
1893 -- x must be in a temp, because it has to stay live over y_code
1894 -- we could compre x_reg and y_reg and do something better here...
1895 (y_reg, y_code) <- getSomeReg index
1897 code = x_code `appOL` y_code
1898 base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
1899 return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset)))
1902 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1904 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1906 #if sparc_TARGET_ARCH
1908 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)])
1911 (reg, code) <- getSomeReg x
1913 off = ImmInt (-(fromInteger i))
1914 return (Amode (AddrRegImm reg off) code)
1917 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)])
1920 (reg, code) <- getSomeReg x
1922 off = ImmInt (fromInteger i)
1923 return (Amode (AddrRegImm reg off) code)
1925 getAmode (CmmMachOp (MO_Add rep) [x, y])
1927 (regX, codeX) <- getSomeReg x
1928 (regY, codeY) <- getSomeReg y
1930 code = codeX `appOL` codeY
1931 return (Amode (AddrRegReg regX regY) code)
1933 -- XXX Is this same as "leaf" in Stix?
1934 getAmode (CmmLit lit)
1936 tmp <- getNewRegNat I32
1938 code = unitOL (SETHI (HI imm__2) tmp)
1939 return (Amode (AddrRegImm tmp (LO imm__2)) code)
1941 imm__2 = litToImm lit
1945 (reg, code) <- getSomeReg other
1948 return (Amode (AddrRegImm reg off) code)
1950 #endif /* sparc_TARGET_ARCH */
1952 #ifdef powerpc_TARGET_ARCH
1953 getAmode (CmmMachOp (MO_Sub I32) [x, CmmLit (CmmInt i _)])
1954 | Just off <- makeImmediate I32 True (-i)
1956 (reg, code) <- getSomeReg x
1957 return (Amode (AddrRegImm reg off) code)
1960 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit (CmmInt i _)])
1961 | Just off <- makeImmediate I32 True i
1963 (reg, code) <- getSomeReg x
1964 return (Amode (AddrRegImm reg off) code)
1966 -- optimize addition with 32-bit immediate
1968 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit lit])
1970 tmp <- getNewRegNat I32
1971 (src, srcCode) <- getSomeReg x
1972 let imm = litToImm lit
1973 code = srcCode `snocOL` ADDIS tmp src (HA imm)
1974 return (Amode (AddrRegImm tmp (LO imm)) code)
1976 getAmode (CmmLit lit)
1978 tmp <- getNewRegNat I32
1979 let imm = litToImm lit
1980 code = unitOL (LIS tmp (HA imm))
1981 return (Amode (AddrRegImm tmp (LO imm)) code)
1983 getAmode (CmmMachOp (MO_Add I32) [x, y])
1985 (regX, codeX) <- getSomeReg x
1986 (regY, codeY) <- getSomeReg y
1987 return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
1991 (reg, code) <- getSomeReg other
1994 return (Amode (AddrRegImm reg off) code)
1995 #endif /* powerpc_TARGET_ARCH */
1997 -- -----------------------------------------------------------------------------
1998 -- getOperand: sometimes any operand will do.
2000 -- getNonClobberedOperand: the value of the operand will remain valid across
2001 -- the computation of an arbitrary expression, unless the expression
2002 -- is computed directly into a register which the operand refers to
2003 -- (see trivialCode where this function is used for an example).
2005 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2007 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2008 #if x86_64_TARGET_ARCH
2009 getNonClobberedOperand (CmmLit lit)
2010 | isSuitableFloatingPointLit lit = do
2011 lbl <- getNewLabelNat
2012 let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
2014 return (OpAddr (ripRel (ImmCLbl lbl)), code)
2016 getNonClobberedOperand (CmmLit lit)
2017 | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) =
2018 return (OpImm (litToImm lit), nilOL)
2019 getNonClobberedOperand (CmmLoad mem pk)
2020 | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2021 Amode src mem_code <- getAmode mem
2023 if (amodeCouldBeClobbered src)
2025 tmp <- getNewRegNat wordRep
2026 return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
2027 unitOL (LEA I32 (OpAddr src) (OpReg tmp)))
2030 return (OpAddr src', save_code `appOL` mem_code)
2031 getNonClobberedOperand e = do
2032 (reg, code) <- getNonClobberedReg e
2033 return (OpReg reg, code)
2035 amodeCouldBeClobbered :: AddrMode -> Bool
2036 amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
2038 regClobbered (RealReg rr) = isFastTrue (freeReg rr)
2039 regClobbered _ = False
2041 -- getOperand: the operand is not required to remain valid across the
2042 -- computation of an arbitrary expression.
2043 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2044 #if x86_64_TARGET_ARCH
2045 getOperand (CmmLit lit)
2046 | isSuitableFloatingPointLit lit = do
2047 lbl <- getNewLabelNat
2048 let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
2050 return (OpAddr (ripRel (ImmCLbl lbl)), code)
2052 getOperand (CmmLit lit)
2053 | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) = do
2054 return (OpImm (litToImm lit), nilOL)
2055 getOperand (CmmLoad mem pk)
2056 | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2057 Amode src mem_code <- getAmode mem
2058 return (OpAddr src, mem_code)
2060 (reg, code) <- getSomeReg e
2061 return (OpReg reg, code)
2063 isOperand :: CmmExpr -> Bool
2064 isOperand (CmmLoad _ _) = True
2065 isOperand (CmmLit lit) = not (is64BitLit lit)
2066 || isSuitableFloatingPointLit lit
2069 -- if we want a floating-point literal as an operand, we can
2070 -- use it directly from memory. However, if the literal is
2071 -- zero, we're better off generating it into a register using
2073 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
2074 isSuitableFloatingPointLit _ = False
2076 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
2077 getRegOrMem (CmmLoad mem pk)
2078 | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2079 Amode src mem_code <- getAmode mem
2080 return (OpAddr src, mem_code)
2082 (reg, code) <- getNonClobberedReg e
2083 return (OpReg reg, code)
2085 #if x86_64_TARGET_ARCH
2086 is64BitLit (CmmInt i I64) = is64BitInteger i
2087 -- assume that labels are in the range 0-2^31-1: this assumes the
2088 -- small memory model (see gcc docs, -mcmodel=small).
2090 is64BitLit x = False
2093 is64BitInteger :: Integer -> Bool
2094 is64BitInteger i = i64 > 0x7fffffff || i64 < -0x80000000
2095 where i64 = fromIntegral i :: Int64
2096 -- a CmmInt is intended to be truncated to the appropriate
2097 -- number of bits, so here we truncate it to Int64. This is
2098 -- important because e.g. -1 as a CmmInt might be either
2099 -- -1 or 18446744073709551615.
2101 -- -----------------------------------------------------------------------------
2102 -- The 'CondCode' type: Condition codes passed up the tree.
2104 data CondCode = CondCode Bool Cond InstrBlock
2106 -- Set up a condition code for a conditional branch.
2108 getCondCode :: CmmExpr -> NatM CondCode
2110 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2112 #if alpha_TARGET_ARCH
2113 getCondCode = panic "MachCode.getCondCode: not on Alphas"
2114 #endif /* alpha_TARGET_ARCH */
2116 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2118 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
2119 -- yes, they really do seem to want exactly the same!
2121 getCondCode (CmmMachOp mop [x, y])
2124 MO_Eq F32 -> condFltCode EQQ x y
2125 MO_Ne F32 -> condFltCode NE x y
2127 MO_S_Gt F32 -> condFltCode GTT x y
2128 MO_S_Ge F32 -> condFltCode GE x y
2129 MO_S_Lt F32 -> condFltCode LTT x y
2130 MO_S_Le F32 -> condFltCode LE x y
2132 MO_Eq F64 -> condFltCode EQQ x y
2133 MO_Ne F64 -> condFltCode NE x y
2135 MO_S_Gt F64 -> condFltCode GTT x y
2136 MO_S_Ge F64 -> condFltCode GE x y
2137 MO_S_Lt F64 -> condFltCode LTT x y
2138 MO_S_Le F64 -> condFltCode LE x y
2140 MO_Eq rep -> condIntCode EQQ x y
2141 MO_Ne rep -> condIntCode NE x y
2143 MO_S_Gt rep -> condIntCode GTT x y
2144 MO_S_Ge rep -> condIntCode GE x y
2145 MO_S_Lt rep -> condIntCode LTT x y
2146 MO_S_Le rep -> condIntCode LE x y
2148 MO_U_Gt rep -> condIntCode GU x y
2149 MO_U_Ge rep -> condIntCode GEU x y
2150 MO_U_Lt rep -> condIntCode LU x y
2151 MO_U_Le rep -> condIntCode LEU x y
2153 other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y]))
2155 getCondCode other = pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
2157 #elif powerpc_TARGET_ARCH
2159 -- almost the same as everywhere else - but we need to
2160 -- extend small integers to 32 bit first
2162 getCondCode (CmmMachOp mop [x, y])
2164 MO_Eq F32 -> condFltCode EQQ x y
2165 MO_Ne F32 -> condFltCode NE x y
2167 MO_S_Gt F32 -> condFltCode GTT x y
2168 MO_S_Ge F32 -> condFltCode GE x y
2169 MO_S_Lt F32 -> condFltCode LTT x y
2170 MO_S_Le F32 -> condFltCode LE x y
2172 MO_Eq F64 -> condFltCode EQQ x y
2173 MO_Ne F64 -> condFltCode NE x y
2175 MO_S_Gt F64 -> condFltCode GTT x y
2176 MO_S_Ge F64 -> condFltCode GE x y
2177 MO_S_Lt F64 -> condFltCode LTT x y
2178 MO_S_Le F64 -> condFltCode LE x y
2180 MO_Eq rep -> condIntCode EQQ (extendUExpr rep x) (extendUExpr rep y)
2181 MO_Ne rep -> condIntCode NE (extendUExpr rep x) (extendUExpr rep y)
2183 MO_S_Gt rep -> condIntCode GTT (extendSExpr rep x) (extendSExpr rep y)
2184 MO_S_Ge rep -> condIntCode GE (extendSExpr rep x) (extendSExpr rep y)
2185 MO_S_Lt rep -> condIntCode LTT (extendSExpr rep x) (extendSExpr rep y)
2186 MO_S_Le rep -> condIntCode LE (extendSExpr rep x) (extendSExpr rep y)
2188 MO_U_Gt rep -> condIntCode GU (extendUExpr rep x) (extendUExpr rep y)
2189 MO_U_Ge rep -> condIntCode GEU (extendUExpr rep x) (extendUExpr rep y)
2190 MO_U_Lt rep -> condIntCode LU (extendUExpr rep x) (extendUExpr rep y)
2191 MO_U_Le rep -> condIntCode LEU (extendUExpr rep x) (extendUExpr rep y)
2193 other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
2195 getCondCode other = panic "getCondCode(2)(powerpc)"
2201 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
2202 -- passed back up the tree.
2204 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
2206 #if alpha_TARGET_ARCH
2207 condIntCode = panic "MachCode.condIntCode: not on Alphas"
2208 condFltCode = panic "MachCode.condFltCode: not on Alphas"
2209 #endif /* alpha_TARGET_ARCH */
2211 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2212 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2214 -- memory vs immediate
2215 condIntCode cond (CmmLoad x pk) (CmmLit lit) | not (is64BitLit lit) = do
2216 Amode x_addr x_code <- getAmode x
2219 code = x_code `snocOL`
2220 CMP pk (OpImm imm) (OpAddr x_addr)
2222 return (CondCode False cond code)
2224 -- anything vs zero, using a mask
2225 -- TODO: Add some sanity checking!!!!
2226 condIntCode cond (CmmMachOp (MO_And rep) [x,o2]) (CmmLit (CmmInt 0 pk))
2227 | (CmmLit (CmmInt mask pk2)) <- o2
2229 (x_reg, x_code) <- getSomeReg x
2231 code = x_code `snocOL`
2232 TEST pk (OpImm (ImmInteger mask)) (OpReg x_reg)
2234 return (CondCode False cond code)
2237 condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
2238 (x_reg, x_code) <- getSomeReg x
2240 code = x_code `snocOL`
2241 TEST pk (OpReg x_reg) (OpReg x_reg)
2243 return (CondCode False cond code)
2245 -- anything vs operand
2246 condIntCode cond x y | isOperand y = do
2247 (x_reg, x_code) <- getNonClobberedReg x
2248 (y_op, y_code) <- getOperand y
2250 code = x_code `appOL` y_code `snocOL`
2251 CMP (cmmExprRep x) y_op (OpReg x_reg)
2253 return (CondCode False cond code)
2255 -- anything vs anything
2256 condIntCode cond x y = do
2257 (y_reg, y_code) <- getNonClobberedReg y
2258 (x_op, x_code) <- getRegOrMem x
2260 code = y_code `appOL`
2262 CMP (cmmExprRep x) (OpReg y_reg) x_op
2264 return (CondCode False cond code)
2267 #if i386_TARGET_ARCH
2268 condFltCode cond x y
2269 = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
2270 (x_reg, x_code) <- getNonClobberedReg x
2271 (y_reg, y_code) <- getSomeReg y
2273 code = x_code `appOL` y_code `snocOL`
2274 GCMP cond x_reg y_reg
2275 -- The GCMP insn does the test and sets the zero flag if comparable
2276 -- and true. Hence we always supply EQQ as the condition to test.
2277 return (CondCode True EQQ code)
2278 #endif /* i386_TARGET_ARCH */
2280 #if x86_64_TARGET_ARCH
2281 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
2282 -- an operand, but the right must be a reg. We can probably do better
2283 -- than this general case...
2284 condFltCode cond x y = do
2285 (x_reg, x_code) <- getNonClobberedReg x
2286 (y_op, y_code) <- getOperand y
2288 code = x_code `appOL`
2290 CMP (cmmExprRep x) y_op (OpReg x_reg)
2291 -- NB(1): we need to use the unsigned comparison operators on the
2292 -- result of this comparison.
2294 return (CondCode True (condToUnsigned cond) code)
2297 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2299 #if sparc_TARGET_ARCH
2301 condIntCode cond x (CmmLit (CmmInt y rep))
2304 (src1, code) <- getSomeReg x
2306 src2 = ImmInt (fromInteger y)
2307 code' = code `snocOL` SUB False True src1 (RIImm src2) g0
2308 return (CondCode False cond code')
2310 condIntCode cond x y = do
2311 (src1, code1) <- getSomeReg x
2312 (src2, code2) <- getSomeReg y
2314 code__2 = code1 `appOL` code2 `snocOL`
2315 SUB False True src1 (RIReg src2) g0
2316 return (CondCode False cond code__2)
2319 condFltCode cond x y = do
2320 (src1, code1) <- getSomeReg x
2321 (src2, code2) <- getSomeReg y
2322 tmp <- getNewRegNat F64
2324 promote x = FxTOy F32 F64 x tmp
2331 code1 `appOL` code2 `snocOL`
2332 FCMP True pk1 src1 src2
2333 else if pk1 == F32 then
2334 code1 `snocOL` promote src1 `appOL` code2 `snocOL`
2335 FCMP True F64 tmp src2
2337 code1 `appOL` code2 `snocOL` promote src2 `snocOL`
2338 FCMP True F64 src1 tmp
2339 return (CondCode True cond code__2)
2341 #endif /* sparc_TARGET_ARCH */
2343 #if powerpc_TARGET_ARCH
2344 -- ###FIXME: I16 and I8!
2345 condIntCode cond x (CmmLit (CmmInt y rep))
2346 | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
2348 (src1, code) <- getSomeReg x
2350 code' = code `snocOL`
2351 (if condUnsigned cond then CMPL else CMP) I32 src1 (RIImm src2)
2352 return (CondCode False cond code')
2354 condIntCode cond x y = do
2355 (src1, code1) <- getSomeReg x
2356 (src2, code2) <- getSomeReg y
2358 code' = code1 `appOL` code2 `snocOL`
2359 (if condUnsigned cond then CMPL else CMP) I32 src1 (RIReg src2)
2360 return (CondCode False cond code')
2362 condFltCode cond x y = do
2363 (src1, code1) <- getSomeReg x
2364 (src2, code2) <- getSomeReg y
2366 code' = code1 `appOL` code2 `snocOL` FCMP src1 src2
2367 code'' = case cond of -- twiddle CR to handle unordered case
2368 GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
2369 LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
2372 ltbit = 0 ; eqbit = 2 ; gtbit = 1
2373 return (CondCode True cond code'')
2375 #endif /* powerpc_TARGET_ARCH */
2377 -- -----------------------------------------------------------------------------
2378 -- Generating assignments
2380 -- Assignments are really at the heart of the whole code generation
2381 -- business. Almost all top-level nodes of any real importance are
2382 -- assignments, which correspond to loads, stores, or register
2383 -- transfers. If we're really lucky, some of the register transfers
2384 -- will go away, because we can use the destination register to
2385 -- complete the code generation for the right hand side. This only
2386 -- fails when the right hand side is forced into a fixed register
2387 -- (e.g. the result of a call).
2389 assignMem_IntCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2390 assignReg_IntCode :: MachRep -> CmmReg -> CmmExpr -> NatM InstrBlock
2392 assignMem_FltCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2393 assignReg_FltCode :: MachRep -> CmmReg -> CmmExpr -> NatM InstrBlock
2395 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2397 #if alpha_TARGET_ARCH
2399 assignIntCode pk (CmmLoad dst _) src
2400 = getNewRegNat IntRep `thenNat` \ tmp ->
2401 getAmode dst `thenNat` \ amode ->
2402 getRegister src `thenNat` \ register ->
2404 code1 = amodeCode amode []
2405 dst__2 = amodeAddr amode
2406 code2 = registerCode register tmp []
2407 src__2 = registerName register tmp
2408 sz = primRepToSize pk
2409 code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2413 assignIntCode pk dst src
2414 = getRegister dst `thenNat` \ register1 ->
2415 getRegister src `thenNat` \ register2 ->
2417 dst__2 = registerName register1 zeroh
2418 code = registerCode register2 dst__2
2419 src__2 = registerName register2 dst__2
2420 code__2 = if isFixed register2
2421 then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2)
2426 #endif /* alpha_TARGET_ARCH */
2428 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2430 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2432 -- integer assignment to memory
2434 -- specific case of adding/subtracting an integer to a particular address.
2435 -- ToDo: catch other cases where we can use an operation directly on a memory
2437 assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _,
2438 CmmLit (CmmInt i _)])
2439 | addr == addr2, pk /= I64 || not (is64BitInteger i),
2440 Just instr <- check op
2441 = do Amode amode code_addr <- getAmode addr
2442 let code = code_addr `snocOL`
2443 instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode)
2446 check (MO_Add _) = Just ADD
2447 check (MO_Sub _) = Just SUB
2452 assignMem_IntCode pk addr src = do
2453 Amode addr code_addr <- getAmode addr
2454 (code_src, op_src) <- get_op_RI src
2456 code = code_src `appOL`
2458 MOV pk op_src (OpAddr addr)
2459 -- NOTE: op_src is stable, so it will still be valid
2460 -- after code_addr. This may involve the introduction
2461 -- of an extra MOV to a temporary register, but we hope
2462 -- the register allocator will get rid of it.
2466 get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand) -- code, operator
2467 get_op_RI (CmmLit lit) | not (is64BitLit lit)
2468 = return (nilOL, OpImm (litToImm lit))
2470 = do (reg,code) <- getNonClobberedReg op
2471 return (code, OpReg reg)
2474 -- Assign; dst is a reg, rhs is mem
2475 assignReg_IntCode pk reg (CmmLoad src _) = do
2476 load_code <- intLoadCode (MOV pk) src
2477 return (load_code (getRegisterReg reg))
2479 -- dst is a reg, but src could be anything
2480 assignReg_IntCode pk reg src = do
2481 code <- getAnyReg src
2482 return (code (getRegisterReg reg))
2484 #endif /* i386_TARGET_ARCH */
2486 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2488 #if sparc_TARGET_ARCH
2490 assignMem_IntCode pk addr src = do
2491 (srcReg, code) <- getSomeReg src
2492 Amode dstAddr addr_code <- getAmode addr
2493 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2495 assignReg_IntCode pk reg src = do
2496 r <- getRegister src
2498 Any _ code -> code dst
2499 Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg dst) freg
2501 dst = getRegisterReg reg
2504 #endif /* sparc_TARGET_ARCH */
2506 #if powerpc_TARGET_ARCH
2508 assignMem_IntCode pk addr src = do
2509 (srcReg, code) <- getSomeReg src
2510 Amode dstAddr addr_code <- getAmode addr
2511 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2513 -- dst is a reg, but src could be anything
2514 assignReg_IntCode pk reg src
2516 r <- getRegister src
2518 Any _ code -> code dst
2519 Fixed _ freg fcode -> fcode `snocOL` MR dst freg
2521 dst = getRegisterReg reg
2523 #endif /* powerpc_TARGET_ARCH */
2526 -- -----------------------------------------------------------------------------
2527 -- Floating-point assignments
2529 #if alpha_TARGET_ARCH
2531 assignFltCode pk (CmmLoad dst _) src
2532 = getNewRegNat pk `thenNat` \ tmp ->
2533 getAmode dst `thenNat` \ amode ->
2534 getRegister src `thenNat` \ register ->
2536 code1 = amodeCode amode []
2537 dst__2 = amodeAddr amode
2538 code2 = registerCode register tmp []
2539 src__2 = registerName register tmp
2540 sz = primRepToSize pk
2541 code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2545 assignFltCode pk dst src
2546 = getRegister dst `thenNat` \ register1 ->
2547 getRegister src `thenNat` \ register2 ->
2549 dst__2 = registerName register1 zeroh
2550 code = registerCode register2 dst__2
2551 src__2 = registerName register2 dst__2
2552 code__2 = if isFixed register2
2553 then code . mkSeqInstr (FMOV src__2 dst__2)
2558 #endif /* alpha_TARGET_ARCH */
2560 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2562 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2564 -- Floating point assignment to memory
2565 assignMem_FltCode pk addr src = do
2566 (src_reg, src_code) <- getNonClobberedReg src
2567 Amode addr addr_code <- getAmode addr
2569 code = src_code `appOL`
2571 IF_ARCH_i386(GST pk src_reg addr,
2572 MOV pk (OpReg src_reg) (OpAddr addr))
2575 -- Floating point assignment to a register/temporary
2576 assignReg_FltCode pk reg src = do
2577 src_code <- getAnyReg src
2578 return (src_code (getRegisterReg reg))
2580 #endif /* i386_TARGET_ARCH */
2582 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2584 #if sparc_TARGET_ARCH
2586 -- Floating point assignment to memory
2587 assignMem_FltCode pk addr src = do
2588 Amode dst__2 code1 <- getAmode addr
2589 (src__2, code2) <- getSomeReg src
2590 tmp1 <- getNewRegNat pk
2592 pk__2 = cmmExprRep src
2593 code__2 = code1 `appOL` code2 `appOL`
2595 then unitOL (ST pk src__2 dst__2)
2596 else toOL [FxTOy pk__2 pk src__2 tmp1, ST pk tmp1 dst__2]
2599 -- Floating point assignment to a register/temporary
2600 -- ToDo: Verify correctness
2601 assignReg_FltCode pk reg src = do
2602 r <- getRegister src
2603 v1 <- getNewRegNat pk
2605 Any _ code -> code dst
2606 Fixed _ freg fcode -> fcode `snocOL` FMOV pk freg v1
2608 dst = getRegisterReg reg
2610 #endif /* sparc_TARGET_ARCH */
2612 #if powerpc_TARGET_ARCH
2615 assignMem_FltCode = assignMem_IntCode
2616 assignReg_FltCode = assignReg_IntCode
2618 #endif /* powerpc_TARGET_ARCH */
2621 -- -----------------------------------------------------------------------------
2622 -- Generating an non-local jump
2624 -- (If applicable) Do not fill the delay slots here; you will confuse the
2625 -- register allocator.
2627 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
2629 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2631 #if alpha_TARGET_ARCH
2633 genJump (CmmLabel lbl)
2634 | isAsmTemp lbl = returnInstr (BR target)
2635 | otherwise = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0]
2637 target = ImmCLbl lbl
2640 = getRegister tree `thenNat` \ register ->
2641 getNewRegNat PtrRep `thenNat` \ tmp ->
2643 dst = registerName register pv
2644 code = registerCode register pv
2645 target = registerName register pv
2647 if isFixed register then
2648 returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0]
2650 return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0))
2652 #endif /* alpha_TARGET_ARCH */
2654 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2656 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2658 genJump (CmmLoad mem pk) = do
2659 Amode target code <- getAmode mem
2660 return (code `snocOL` JMP (OpAddr target))
2662 genJump (CmmLit lit) = do
2663 return (unitOL (JMP (OpImm (litToImm lit))))
2666 (reg,code) <- getSomeReg expr
2667 return (code `snocOL` JMP (OpReg reg))
2669 #endif /* i386_TARGET_ARCH */
2671 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2673 #if sparc_TARGET_ARCH
2675 genJump (CmmLit (CmmLabel lbl))
2676 = return (toOL [CALL (Left target) 0 True, NOP])
2678 target = ImmCLbl lbl
2682 (target, code) <- getSomeReg tree
2683 return (code `snocOL` JMP (AddrRegReg target g0) `snocOL` NOP)
2685 #endif /* sparc_TARGET_ARCH */
2687 #if powerpc_TARGET_ARCH
2688 genJump (CmmLit (CmmLabel lbl))
2689 = return (unitOL $ JMP lbl)
2693 (target,code) <- getSomeReg tree
2694 return (code `snocOL` MTCTR target `snocOL` BCTR [])
2695 #endif /* powerpc_TARGET_ARCH */
2698 -- -----------------------------------------------------------------------------
2699 -- Unconditional branches
2701 genBranch :: BlockId -> NatM InstrBlock
2703 genBranch = return . toOL . mkBranchInstr
2705 -- -----------------------------------------------------------------------------
2706 -- Conditional jumps
2709 Conditional jumps are always to local labels, so we can use branch
2710 instructions. We peek at the arguments to decide what kind of
2713 ALPHA: For comparisons with 0, we're laughing, because we can just do
2714 the desired conditional branch.
2716 I386: First, we have to ensure that the condition
2717 codes are set according to the supplied comparison operation.
2719 SPARC: First, we have to ensure that the condition codes are set
2720 according to the supplied comparison operation. We generate slightly
2721 different code for floating point comparisons, because a floating
2722 point operation cannot directly precede a @BF@. We assume the worst
2723 and fill that slot with a @NOP@.
2725 SPARC: Do not fill the delay slots here; you will confuse the register
2731 :: BlockId -- the branch target
2732 -> CmmExpr -- the condition on which to branch
2735 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2737 #if alpha_TARGET_ARCH
2739 genCondJump id (StPrim op [x, StInt 0])
2740 = getRegister x `thenNat` \ register ->
2741 getNewRegNat (registerRep register)
2744 code = registerCode register tmp
2745 value = registerName register tmp
2746 pk = registerRep register
2747 target = ImmCLbl lbl
2749 returnSeq code [BI (cmpOp op) value target]
2751 cmpOp CharGtOp = GTT
2753 cmpOp CharEqOp = EQQ
2755 cmpOp CharLtOp = LTT
2764 cmpOp WordGeOp = ALWAYS
2765 cmpOp WordEqOp = EQQ
2767 cmpOp WordLtOp = NEVER
2768 cmpOp WordLeOp = EQQ
2770 cmpOp AddrGeOp = ALWAYS
2771 cmpOp AddrEqOp = EQQ
2773 cmpOp AddrLtOp = NEVER
2774 cmpOp AddrLeOp = EQQ
2776 genCondJump lbl (StPrim op [x, StDouble 0.0])
2777 = getRegister x `thenNat` \ register ->
2778 getNewRegNat (registerRep register)
2781 code = registerCode register tmp
2782 value = registerName register tmp
2783 pk = registerRep register
2784 target = ImmCLbl lbl
2786 return (code . mkSeqInstr (BF (cmpOp op) value target))
2788 cmpOp FloatGtOp = GTT
2789 cmpOp FloatGeOp = GE
2790 cmpOp FloatEqOp = EQQ
2791 cmpOp FloatNeOp = NE
2792 cmpOp FloatLtOp = LTT
2793 cmpOp FloatLeOp = LE
2794 cmpOp DoubleGtOp = GTT
2795 cmpOp DoubleGeOp = GE
2796 cmpOp DoubleEqOp = EQQ
2797 cmpOp DoubleNeOp = NE
2798 cmpOp DoubleLtOp = LTT
2799 cmpOp DoubleLeOp = LE
2801 genCondJump lbl (StPrim op [x, y])
2803 = trivialFCode pr instr x y `thenNat` \ register ->
2804 getNewRegNat F64 `thenNat` \ tmp ->
2806 code = registerCode register tmp
2807 result = registerName register tmp
2808 target = ImmCLbl lbl
2810 return (code . mkSeqInstr (BF cond result target))
2812 pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
2814 fltCmpOp op = case op of
2828 (instr, cond) = case op of
2829 FloatGtOp -> (FCMP TF LE, EQQ)
2830 FloatGeOp -> (FCMP TF LTT, EQQ)
2831 FloatEqOp -> (FCMP TF EQQ, NE)
2832 FloatNeOp -> (FCMP TF EQQ, EQQ)
2833 FloatLtOp -> (FCMP TF LTT, NE)
2834 FloatLeOp -> (FCMP TF LE, NE)
2835 DoubleGtOp -> (FCMP TF LE, EQQ)
2836 DoubleGeOp -> (FCMP TF LTT, EQQ)
2837 DoubleEqOp -> (FCMP TF EQQ, NE)
2838 DoubleNeOp -> (FCMP TF EQQ, EQQ)
2839 DoubleLtOp -> (FCMP TF LTT, NE)
2840 DoubleLeOp -> (FCMP TF LE, NE)
2842 genCondJump lbl (StPrim op [x, y])
2843 = trivialCode instr x y `thenNat` \ register ->
2844 getNewRegNat IntRep `thenNat` \ tmp ->
2846 code = registerCode register tmp
2847 result = registerName register tmp
2848 target = ImmCLbl lbl
2850 return (code . mkSeqInstr (BI cond result target))
2852 (instr, cond) = case op of
2853 CharGtOp -> (CMP LE, EQQ)
2854 CharGeOp -> (CMP LTT, EQQ)
2855 CharEqOp -> (CMP EQQ, NE)
2856 CharNeOp -> (CMP EQQ, EQQ)
2857 CharLtOp -> (CMP LTT, NE)
2858 CharLeOp -> (CMP LE, NE)
2859 IntGtOp -> (CMP LE, EQQ)
2860 IntGeOp -> (CMP LTT, EQQ)
2861 IntEqOp -> (CMP EQQ, NE)
2862 IntNeOp -> (CMP EQQ, EQQ)
2863 IntLtOp -> (CMP LTT, NE)
2864 IntLeOp -> (CMP LE, NE)
2865 WordGtOp -> (CMP ULE, EQQ)
2866 WordGeOp -> (CMP ULT, EQQ)
2867 WordEqOp -> (CMP EQQ, NE)
2868 WordNeOp -> (CMP EQQ, EQQ)
2869 WordLtOp -> (CMP ULT, NE)
2870 WordLeOp -> (CMP ULE, NE)
2871 AddrGtOp -> (CMP ULE, EQQ)
2872 AddrGeOp -> (CMP ULT, EQQ)
2873 AddrEqOp -> (CMP EQQ, NE)
2874 AddrNeOp -> (CMP EQQ, EQQ)
2875 AddrLtOp -> (CMP ULT, NE)
2876 AddrLeOp -> (CMP ULE, NE)
2878 #endif /* alpha_TARGET_ARCH */
2880 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2882 #if i386_TARGET_ARCH
2884 genCondJump id bool = do
2885 CondCode _ cond code <- getCondCode bool
2886 return (code `snocOL` JXX cond id)
2890 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2892 #if x86_64_TARGET_ARCH
2894 genCondJump id bool = do
2895 CondCode is_float cond cond_code <- getCondCode bool
2898 return (cond_code `snocOL` JXX cond id)
2900 lbl <- getBlockIdNat
2902 -- see comment with condFltReg
2903 let code = case cond of
2909 plain_test = unitOL (
2912 or_unordered = toOL [
2916 and_ordered = toOL [
2922 return (cond_code `appOL` code)
2926 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2928 #if sparc_TARGET_ARCH
2930 genCondJump (BlockId id) bool = do
2931 CondCode is_float cond code <- getCondCode bool
2936 then [NOP, BF cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2937 else [BI cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2941 #endif /* sparc_TARGET_ARCH */
2944 #if powerpc_TARGET_ARCH
2946 genCondJump id bool = do
2947 CondCode is_float cond code <- getCondCode bool
2948 return (code `snocOL` BCC cond id)
2950 #endif /* powerpc_TARGET_ARCH */
2953 -- -----------------------------------------------------------------------------
2954 -- Generating C calls
2956 -- Now the biggest nightmare---calls. Most of the nastiness is buried in
2957 -- @get_arg@, which moves the arguments to the correct registers/stack
2958 -- locations. Apart from that, the code is easy.
2960 -- (If applicable) Do not fill the delay slots here; you will confuse the
2961 -- register allocator.
2964 :: CmmCallTarget -- function to call
2965 -> CmmHintFormals -- where to put the result
2966 -> CmmActuals -- arguments (of mixed type)
2969 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2971 #if alpha_TARGET_ARCH
2975 genCCall fn cconv result_regs args
2976 = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args
2977 `thenNat` \ ((unused,_), argCode) ->
2979 nRegs = length allArgRegs - length unused
2980 code = asmSeqThen (map ($ []) argCode)
2983 LDA pv (AddrImm (ImmLab (ptext fn))),
2984 JSR ra (AddrReg pv) nRegs,
2985 LDGP gp (AddrReg ra)]
2987 ------------------------
2988 {- Try to get a value into a specific register (or registers) for
2989 a call. The first 6 arguments go into the appropriate
2990 argument register (separate registers for integer and floating
2991 point arguments, but used in lock-step), and the remaining
2992 arguments are dumped to the stack, beginning at 0(sp). Our
2993 first argument is a pair of the list of remaining argument
2994 registers to be assigned for this call and the next stack
2995 offset to use for overflowing arguments. This way,
2996 @get_Arg@ can be applied to all of a call's arguments using
3000 :: ([(Reg,Reg)], Int) -- Argument registers and stack offset (accumulator)
3001 -> StixTree -- Current argument
3002 -> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code
3004 -- We have to use up all of our argument registers first...
3006 get_arg ((iDst,fDst):dsts, offset) arg
3007 = getRegister arg `thenNat` \ register ->
3009 reg = if isFloatingRep pk then fDst else iDst
3010 code = registerCode register reg
3011 src = registerName register reg
3012 pk = registerRep register
3015 if isFloatingRep pk then
3016 ((dsts, offset), if isFixed register then
3017 code . mkSeqInstr (FMOV src fDst)
3020 ((dsts, offset), if isFixed register then
3021 code . mkSeqInstr (OR src (RIReg src) iDst)
3024 -- Once we have run out of argument registers, we move to the
3027 get_arg ([], offset) arg
3028 = getRegister arg `thenNat` \ register ->
3029 getNewRegNat (registerRep register)
3032 code = registerCode register tmp
3033 src = registerName register tmp
3034 pk = registerRep register
3035 sz = primRepToSize pk
3037 return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset)))
3039 #endif /* alpha_TARGET_ARCH */
3041 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3043 #if i386_TARGET_ARCH
3045 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
3046 -- write barrier compiles to no code on x86/x86-64;
3047 -- we keep it this long in order to prevent earlier optimisations.
3049 -- we only cope with a single result for foreign calls
3050 genCCall (CmmPrim op) [(r,_)] args = do
3052 MO_F32_Sqrt -> actuallyInlineFloatOp F32 (GSQRT F32) args
3053 MO_F64_Sqrt -> actuallyInlineFloatOp F64 (GSQRT F64) args
3055 MO_F32_Sin -> actuallyInlineFloatOp F32 (GSIN F32) args
3056 MO_F64_Sin -> actuallyInlineFloatOp F64 (GSIN F64) args
3058 MO_F32_Cos -> actuallyInlineFloatOp F32 (GCOS F32) args
3059 MO_F64_Cos -> actuallyInlineFloatOp F64 (GCOS F64) args
3061 MO_F32_Tan -> actuallyInlineFloatOp F32 (GTAN F32) args
3062 MO_F64_Tan -> actuallyInlineFloatOp F64 (GTAN F64) args
3064 other_op -> outOfLineFloatOp op r args
3066 actuallyInlineFloatOp rep instr [(x,_)]
3067 = do res <- trivialUFCode rep instr x
3069 return (any (getRegisterReg (CmmLocal r)))
3071 genCCall target dest_regs args = do
3073 sizes = map (arg_size . cmmExprRep . fst) (reverse args)
3074 #if !darwin_TARGET_OS
3075 tot_arg_size = sum sizes
3077 raw_arg_size = sum sizes
3078 tot_arg_size = roundTo 16 raw_arg_size
3079 arg_pad_size = tot_arg_size - raw_arg_size
3080 delta0 <- getDeltaNat
3081 setDeltaNat (delta0 - arg_pad_size)
3084 push_codes <- mapM push_arg (reverse args)
3085 delta <- getDeltaNat
3088 -- deal with static vs dynamic call targets
3089 (callinsns,cconv) <-
3092 CmmCallee (CmmLit (CmmLabel lbl)) conv
3093 -> -- ToDo: stdcall arg sizes
3094 return (unitOL (CALL (Left fn_imm) []), conv)
3095 where fn_imm = ImmCLbl lbl
3097 -> do (dyn_c, dyn_r, dyn_rep) <- get_op expr
3098 ASSERT(dyn_rep == I32)
3099 return (dyn_c `snocOL` CALL (Right dyn_r) [], conv)
3102 #if darwin_TARGET_OS
3104 = toOL [SUB I32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
3105 DELTA (delta0 - arg_pad_size)]
3106 `appOL` concatOL push_codes
3109 = concatOL push_codes
3110 call = callinsns `appOL`
3112 -- Deallocate parameters after call for ccall;
3113 -- but not for stdcall (callee does it)
3114 (if cconv == StdCallConv || tot_arg_size==0 then [] else
3115 [ADD I32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
3117 [DELTA (delta + tot_arg_size)]
3120 setDeltaNat (delta + tot_arg_size)
3123 -- assign the results, if necessary
3124 assign_code [] = nilOL
3125 assign_code [(dest,_hint)] =
3127 I64 -> toOL [MOV I32 (OpReg eax) (OpReg r_dest),
3128 MOV I32 (OpReg edx) (OpReg r_dest_hi)]
3129 F32 -> unitOL (GMOV fake0 r_dest)
3130 F64 -> unitOL (GMOV fake0 r_dest)
3131 rep -> unitOL (MOV rep (OpReg eax) (OpReg r_dest))
3133 r_dest_hi = getHiVRegFromLo r_dest
3134 rep = localRegRep dest
3135 r_dest = getRegisterReg (CmmLocal dest)
3136 assign_code many = panic "genCCall.assign_code many"
3138 return (push_code `appOL`
3140 assign_code dest_regs)
3148 roundTo a x | x `mod` a == 0 = x
3149 | otherwise = x + a - (x `mod` a)
3152 push_arg :: (CmmExpr,MachHint){-current argument-}
3153 -> NatM InstrBlock -- code
3155 push_arg (arg,_hint) -- we don't need the hints on x86
3156 | arg_rep == I64 = do
3157 ChildCode64 code r_lo <- iselExpr64 arg
3158 delta <- getDeltaNat
3159 setDeltaNat (delta - 8)
3161 r_hi = getHiVRegFromLo r_lo
3163 return ( code `appOL`
3164 toOL [PUSH I32 (OpReg r_hi), DELTA (delta - 4),
3165 PUSH I32 (OpReg r_lo), DELTA (delta - 8),
3170 (code, reg, sz) <- get_op arg
3171 delta <- getDeltaNat
3172 let size = arg_size sz
3173 setDeltaNat (delta-size)
3174 if (case sz of F64 -> True; F32 -> True; _ -> False)
3175 then return (code `appOL`
3176 toOL [SUB I32 (OpImm (ImmInt size)) (OpReg esp),
3178 GST sz reg (AddrBaseIndex (EABaseReg esp)
3182 else return (code `snocOL`
3183 PUSH I32 (OpReg reg) `snocOL`
3187 arg_rep = cmmExprRep arg
3190 get_op :: CmmExpr -> NatM (InstrBlock, Reg, MachRep) -- code, reg, size
3192 (reg,code) <- getSomeReg op
3193 return (code, reg, cmmExprRep op)
3195 #endif /* i386_TARGET_ARCH */
3197 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3199 outOfLineFloatOp :: CallishMachOp -> CmmFormal -> CmmActuals
3201 outOfLineFloatOp mop res args
3203 dflags <- getDynFlagsNat
3204 targetExpr <- cmmMakeDynamicReference dflags addImportNat CallReference lbl
3205 let target = CmmCallee targetExpr CCallConv
3207 if localRegRep res == F64
3209 stmtToInstrs (CmmCall target [(res,FloatHint)] args CmmUnsafe CmmMayReturn)
3213 tmp = LocalReg uq F64 KindNonPtr
3215 code1 <- stmtToInstrs (CmmCall target [(tmp,FloatHint)] args CmmUnsafe CmmMayReturn)
3216 code2 <- stmtToInstrs (CmmAssign (CmmLocal res) (CmmReg (CmmLocal tmp)))
3217 return (code1 `appOL` code2)
3219 lbl = mkForeignLabel fn Nothing False
3222 MO_F32_Sqrt -> FSLIT("sqrtf")
3223 MO_F32_Sin -> FSLIT("sinf")
3224 MO_F32_Cos -> FSLIT("cosf")
3225 MO_F32_Tan -> FSLIT("tanf")
3226 MO_F32_Exp -> FSLIT("expf")
3227 MO_F32_Log -> FSLIT("logf")
3229 MO_F32_Asin -> FSLIT("asinf")
3230 MO_F32_Acos -> FSLIT("acosf")
3231 MO_F32_Atan -> FSLIT("atanf")
3233 MO_F32_Sinh -> FSLIT("sinhf")
3234 MO_F32_Cosh -> FSLIT("coshf")
3235 MO_F32_Tanh -> FSLIT("tanhf")
3236 MO_F32_Pwr -> FSLIT("powf")
3238 MO_F64_Sqrt -> FSLIT("sqrt")
3239 MO_F64_Sin -> FSLIT("sin")
3240 MO_F64_Cos -> FSLIT("cos")
3241 MO_F64_Tan -> FSLIT("tan")
3242 MO_F64_Exp -> FSLIT("exp")
3243 MO_F64_Log -> FSLIT("log")
3245 MO_F64_Asin -> FSLIT("asin")
3246 MO_F64_Acos -> FSLIT("acos")
3247 MO_F64_Atan -> FSLIT("atan")
3249 MO_F64_Sinh -> FSLIT("sinh")
3250 MO_F64_Cosh -> FSLIT("cosh")
3251 MO_F64_Tanh -> FSLIT("tanh")
3252 MO_F64_Pwr -> FSLIT("pow")
3254 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
3256 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3258 #if x86_64_TARGET_ARCH
3260 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
3261 -- write barrier compiles to no code on x86/x86-64;
3262 -- we keep it this long in order to prevent earlier optimisations.
3264 genCCall (CmmPrim op) [(r,_)] args =
3265 outOfLineFloatOp op r args
3267 genCCall target dest_regs args = do
3269 -- load up the register arguments
3270 (stack_args, aregs, fregs, load_args_code)
3271 <- load_args args allArgRegs allFPArgRegs nilOL
3274 fp_regs_used = reverse (drop (length fregs) (reverse allFPArgRegs))
3275 int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
3276 arg_regs = [eax] ++ int_regs_used ++ fp_regs_used
3277 -- for annotating the call instruction with
3279 sse_regs = length fp_regs_used
3281 tot_arg_size = arg_size * length stack_args
3283 -- On entry to the called function, %rsp should be aligned
3284 -- on a 16-byte boundary +8 (i.e. the first stack arg after
3285 -- the return address is 16-byte aligned). In STG land
3286 -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
3287 -- need to make sure we push a multiple of 16-bytes of args,
3288 -- plus the return address, to get the correct alignment.
3289 -- Urg, this is hard. We need to feed the delta back into
3290 -- the arg pushing code.
3291 (real_size, adjust_rsp) <-
3292 if tot_arg_size `rem` 16 == 0
3293 then return (tot_arg_size, nilOL)
3294 else do -- we need to adjust...
3295 delta <- getDeltaNat
3296 setDeltaNat (delta-8)
3297 return (tot_arg_size+8, toOL [
3298 SUB I64 (OpImm (ImmInt 8)) (OpReg rsp),
3302 -- push the stack args, right to left
3303 push_code <- push_args (reverse stack_args) nilOL
3304 delta <- getDeltaNat
3306 -- deal with static vs dynamic call targets
3307 (callinsns,cconv) <-
3310 CmmCallee (CmmLit (CmmLabel lbl)) conv
3311 -> -- ToDo: stdcall arg sizes
3312 return (unitOL (CALL (Left fn_imm) arg_regs), conv)
3313 where fn_imm = ImmCLbl lbl
3315 -> do (dyn_r, dyn_c) <- getSomeReg expr
3316 return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
3319 -- The x86_64 ABI requires us to set %al to the number of SSE
3320 -- registers that contain arguments, if the called routine
3321 -- is a varargs function. We don't know whether it's a
3322 -- varargs function or not, so we have to assume it is.
3324 -- It's not safe to omit this assignment, even if the number
3325 -- of SSE regs in use is zero. If %al is larger than 8
3326 -- on entry to a varargs function, seg faults ensue.
3327 assign_eax n = unitOL (MOV I32 (OpImm (ImmInt n)) (OpReg eax))
3329 let call = callinsns `appOL`
3331 -- Deallocate parameters after call for ccall;
3332 -- but not for stdcall (callee does it)
3333 (if cconv == StdCallConv || real_size==0 then [] else
3334 [ADD wordRep (OpImm (ImmInt real_size)) (OpReg esp)])
3336 [DELTA (delta + real_size)]
3339 setDeltaNat (delta + real_size)
3342 -- assign the results, if necessary
3343 assign_code [] = nilOL
3344 assign_code [(dest,_hint)] =
3346 F32 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3347 F64 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3348 rep -> unitOL (MOV rep (OpReg rax) (OpReg r_dest))
3350 rep = localRegRep dest
3351 r_dest = getRegisterReg (CmmLocal dest)
3352 assign_code many = panic "genCCall.assign_code many"
3354 return (load_args_code `appOL`
3357 assign_eax sse_regs `appOL`
3359 assign_code dest_regs)
3362 arg_size = 8 -- always, at the mo
3364 load_args :: [(CmmExpr,MachHint)]
3365 -> [Reg] -- int regs avail for args
3366 -> [Reg] -- FP regs avail for args
3368 -> NatM ([(CmmExpr,MachHint)],[Reg],[Reg],InstrBlock)
3369 load_args args [] [] code = return (args, [], [], code)
3370 -- no more regs to use
3371 load_args [] aregs fregs code = return ([], aregs, fregs, code)
3372 -- no more args to push
3373 load_args ((arg,hint) : rest) aregs fregs code
3374 | isFloatingRep arg_rep =
3378 arg_code <- getAnyReg arg
3379 load_args rest aregs rs (code `appOL` arg_code r)
3384 arg_code <- getAnyReg arg
3385 load_args rest rs fregs (code `appOL` arg_code r)
3387 arg_rep = cmmExprRep arg
3390 (args',ars,frs,code') <- load_args rest aregs fregs code
3391 return ((arg,hint):args', ars, frs, code')
3393 push_args [] code = return code
3394 push_args ((arg,hint):rest) code
3395 | isFloatingRep arg_rep = do
3396 (arg_reg, arg_code) <- getSomeReg arg
3397 delta <- getDeltaNat
3398 setDeltaNat (delta-arg_size)
3399 let code' = code `appOL` arg_code `appOL` toOL [
3400 SUB wordRep (OpImm (ImmInt arg_size)) (OpReg rsp) ,
3401 DELTA (delta-arg_size),
3402 MOV arg_rep (OpReg arg_reg) (OpAddr (spRel 0))]
3403 push_args rest code'
3406 -- we only ever generate word-sized function arguments. Promotion
3407 -- has already happened: our Int8# type is kept sign-extended
3408 -- in an Int#, for example.
3409 ASSERT(arg_rep == I64) return ()
3410 (arg_op, arg_code) <- getOperand arg
3411 delta <- getDeltaNat
3412 setDeltaNat (delta-arg_size)
3413 let code' = code `appOL` toOL [PUSH I64 arg_op,
3414 DELTA (delta-arg_size)]
3415 push_args rest code'
3417 arg_rep = cmmExprRep arg
3420 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3422 #if sparc_TARGET_ARCH
3424 The SPARC calling convention is an absolute
3425 nightmare. The first 6x32 bits of arguments are mapped into
3426 %o0 through %o5, and the remaining arguments are dumped to the
3427 stack, beginning at [%sp+92]. (Note that %o6 == %sp.)
3429 If we have to put args on the stack, move %o6==%sp down by
3430 the number of words to go on the stack, to ensure there's enough space.
3432 According to Fraser and Hanson's lcc book, page 478, fig 17.2,
3433 16 words above the stack pointer is a word for the address of
3434 a structure return value. I use this as a temporary location
3435 for moving values from float to int regs. Certainly it isn't
3436 safe to put anything in the 16 words starting at %sp, since
3437 this area can get trashed at any time due to window overflows
3438 caused by signal handlers.
3440 A final complication (if the above isn't enough) is that
3441 we can't blithely calculate the arguments one by one into
3442 %o0 .. %o5. Consider the following nested calls:
3446 Naive code moves a into %o0, and (fff b c) into %o1. Unfortunately
3447 the inner call will itself use %o0, which trashes the value put there
3448 in preparation for the outer call. Upshot: we need to calculate the
3449 args into temporary regs, and move those to arg regs or onto the
3450 stack only immediately prior to the call proper. Sigh.
3453 genCCall target dest_regs argsAndHints = do
3455 args = map fst argsAndHints
3456 argcode_and_vregs <- mapM arg_to_int_vregs args
3458 (argcodes, vregss) = unzip argcode_and_vregs
3459 n_argRegs = length allArgRegs
3460 n_argRegs_used = min (length vregs) n_argRegs
3461 vregs = concat vregss
3462 -- deal with static vs dynamic call targets
3463 callinsns <- (case target of
3464 CmmCallee (CmmLit (CmmLabel lbl)) conv -> do
3465 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3466 CmmCallee expr conv -> do
3467 (dyn_c, [dyn_r]) <- arg_to_int_vregs expr
3468 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3470 (res, reduce) <- outOfLineFloatOp mop
3471 lblOrMopExpr <- case res of
3473 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3475 (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
3476 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3477 if reduce then panic "genCCall(sparc): can not reduce" else return lblOrMopExpr
3481 argcode = concatOL argcodes
3482 (move_sp_down, move_sp_up)
3483 = let diff = length vregs - n_argRegs
3484 nn = if odd diff then diff + 1 else diff -- keep 8-byte alignment
3487 else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
3489 = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
3490 return (argcode `appOL`
3491 move_sp_down `appOL`
3492 transfer_code `appOL`
3497 -- move args from the integer vregs into which they have been
3498 -- marshalled, into %o0 .. %o5, and the rest onto the stack.
3499 move_final :: [Reg] -> [Reg] -> Int -> [Instr]
3501 move_final [] _ offset -- all args done
3504 move_final (v:vs) [] offset -- out of aregs; move to stack
3505 = ST I32 v (spRel offset)
3506 : move_final vs [] (offset+1)
3508 move_final (v:vs) (a:az) offset -- move into an arg (%o[0..5]) reg
3509 = OR False g0 (RIReg v) a
3510 : move_final vs az offset
3512 -- generate code to calculate an argument, and move it into one
3513 -- or two integer vregs.
3514 arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
3515 arg_to_int_vregs arg
3516 | (cmmExprRep arg) == I64
3518 (ChildCode64 code r_lo) <- iselExpr64 arg
3520 r_hi = getHiVRegFromLo r_lo
3521 return (code, [r_hi, r_lo])
3524 (src, code) <- getSomeReg arg
3525 tmp <- getNewRegNat (cmmExprRep arg)
3530 v1 <- getNewRegNat I32
3531 v2 <- getNewRegNat I32
3534 FMOV F64 src f0 `snocOL`
3535 ST F32 f0 (spRel 16) `snocOL`
3536 LD I32 (spRel 16) v1 `snocOL`
3537 ST F32 (fPair f0) (spRel 16) `snocOL`
3538 LD I32 (spRel 16) v2
3543 v1 <- getNewRegNat I32
3546 ST F32 src (spRel 16) `snocOL`
3547 LD I32 (spRel 16) v1
3552 v1 <- getNewRegNat I32
3554 code `snocOL` OR False g0 (RIReg src) v1
3558 outOfLineFloatOp mop =
3560 dflags <- getDynFlagsNat
3561 mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference $
3562 mkForeignLabel functionName Nothing True
3563 let mopLabelOrExpr = case mopExpr of
3564 CmmLit (CmmLabel lbl) -> Left lbl
3566 return (mopLabelOrExpr, reduce)
3568 (reduce, functionName) = case mop of
3569 MO_F32_Exp -> (True, FSLIT("exp"))
3570 MO_F32_Log -> (True, FSLIT("log"))
3571 MO_F32_Sqrt -> (True, FSLIT("sqrt"))
3573 MO_F32_Sin -> (True, FSLIT("sin"))
3574 MO_F32_Cos -> (True, FSLIT("cos"))
3575 MO_F32_Tan -> (True, FSLIT("tan"))
3577 MO_F32_Asin -> (True, FSLIT("asin"))
3578 MO_F32_Acos -> (True, FSLIT("acos"))
3579 MO_F32_Atan -> (True, FSLIT("atan"))
3581 MO_F32_Sinh -> (True, FSLIT("sinh"))
3582 MO_F32_Cosh -> (True, FSLIT("cosh"))
3583 MO_F32_Tanh -> (True, FSLIT("tanh"))
3585 MO_F64_Exp -> (False, FSLIT("exp"))
3586 MO_F64_Log -> (False, FSLIT("log"))
3587 MO_F64_Sqrt -> (False, FSLIT("sqrt"))
3589 MO_F64_Sin -> (False, FSLIT("sin"))
3590 MO_F64_Cos -> (False, FSLIT("cos"))
3591 MO_F64_Tan -> (False, FSLIT("tan"))
3593 MO_F64_Asin -> (False, FSLIT("asin"))
3594 MO_F64_Acos -> (False, FSLIT("acos"))
3595 MO_F64_Atan -> (False, FSLIT("atan"))
3597 MO_F64_Sinh -> (False, FSLIT("sinh"))
3598 MO_F64_Cosh -> (False, FSLIT("cosh"))
3599 MO_F64_Tanh -> (False, FSLIT("tanh"))
3601 other -> pprPanic "outOfLineFloatOp(sparc) "
3602 (pprCallishMachOp mop)
3604 #endif /* sparc_TARGET_ARCH */
3606 #if powerpc_TARGET_ARCH
3608 #if darwin_TARGET_OS || linux_TARGET_OS
3610 The PowerPC calling convention for Darwin/Mac OS X
3611 is described in Apple's document
3612 "Inside Mac OS X - Mach-O Runtime Architecture".
3614 PowerPC Linux uses the System V Release 4 Calling Convention
3615 for PowerPC. It is described in the
3616 "System V Application Binary Interface PowerPC Processor Supplement".
3618 Both conventions are similar:
3619 Parameters may be passed in general-purpose registers starting at r3, in
3620 floating point registers starting at f1, or on the stack.
3622 But there are substantial differences:
3623 * The number of registers used for parameter passing and the exact set of
3624 nonvolatile registers differs (see MachRegs.lhs).
3625 * On Darwin, stack space is always reserved for parameters, even if they are
3626 passed in registers. The called routine may choose to save parameters from
3627 registers to the corresponding space on the stack.
3628 * On Darwin, a corresponding amount of GPRs is skipped when a floating point
3629 parameter is passed in an FPR.
3630 * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
3631 starting with an odd-numbered GPR. It may skip a GPR to achieve this.
3632 Darwin just treats an I64 like two separate I32s (high word first).
3633 * I64 and F64 arguments are 8-byte aligned on the stack for SysV, but only
3634 4-byte aligned like everything else on Darwin.
3635 * The SysV spec claims that F32 is represented as F64 on the stack. GCC on
3636 PowerPC Linux does not agree, so neither do we.
3638 According to both conventions, The parameter area should be part of the
3639 caller's stack frame, allocated in the caller's prologue code (large enough
3640 to hold the parameter lists for all called routines). The NCG already
3641 uses the stack for register spilling, leaving 64 bytes free at the top.
3642 If we need a larger parameter area than that, we just allocate a new stack
3643 frame just before ccalling.
3647 genCCall (CmmPrim MO_WriteBarrier) _ _
3648 = return $ unitOL LWSYNC
3650 genCCall target dest_regs argsAndHints
3651 = ASSERT (not $ any (`elem` [I8,I16]) argReps)
3652 -- we rely on argument promotion in the codeGen
3654 (finalStack,passArgumentsCode,usedRegs) <- passArguments
3656 allArgRegs allFPArgRegs
3660 (labelOrExpr, reduceToF32) <- case target of
3661 CmmCallee (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
3662 CmmCallee expr conv -> return (Right expr, False)
3663 CmmPrim mop -> outOfLineFloatOp mop
3665 let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
3666 codeAfter = move_sp_up finalStack `appOL` moveResult reduceToF32
3671 `snocOL` BL lbl usedRegs
3674 (dynReg, dynCode) <- getSomeReg dyn
3676 `snocOL` MTCTR dynReg
3678 `snocOL` BCTRL usedRegs
3681 #if darwin_TARGET_OS
3682 initialStackOffset = 24
3683 -- size of linkage area + size of arguments, in bytes
3684 stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
3685 map machRepByteWidth argReps
3686 #elif linux_TARGET_OS
3687 initialStackOffset = 8
3688 stackDelta finalStack = roundTo 16 finalStack
3690 args = map fst argsAndHints
3691 argReps = map cmmExprRep args
3693 roundTo a x | x `mod` a == 0 = x
3694 | otherwise = x + a - (x `mod` a)
3696 move_sp_down finalStack
3698 toOL [STU I32 sp (AddrRegImm sp (ImmInt (-delta))),
3701 where delta = stackDelta finalStack
3702 move_sp_up finalStack
3704 toOL [ADD sp sp (RIImm (ImmInt delta)),
3707 where delta = stackDelta finalStack
3710 passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
3711 passArguments ((arg,I64):args) gprs fprs stackOffset
3712 accumCode accumUsed =
3714 ChildCode64 code vr_lo <- iselExpr64 arg
3715 let vr_hi = getHiVRegFromLo vr_lo
3717 #if darwin_TARGET_OS
3722 (accumCode `appOL` code
3723 `snocOL` storeWord vr_hi gprs stackOffset
3724 `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
3725 ((take 2 gprs) ++ accumUsed)
3727 storeWord vr (gpr:_) offset = MR gpr vr
3728 storeWord vr [] offset = ST I32 vr (AddrRegImm sp (ImmInt offset))
3730 #elif linux_TARGET_OS
3731 let stackOffset' = roundTo 8 stackOffset
3732 stackCode = accumCode `appOL` code
3733 `snocOL` ST I32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
3734 `snocOL` ST I32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
3735 regCode hireg loreg =
3736 accumCode `appOL` code
3737 `snocOL` MR hireg vr_hi
3738 `snocOL` MR loreg vr_lo
3741 hireg : loreg : regs | even (length gprs) ->
3742 passArguments args regs fprs stackOffset
3743 (regCode hireg loreg) (hireg : loreg : accumUsed)
3744 _skipped : hireg : loreg : regs ->
3745 passArguments args regs fprs stackOffset
3746 (regCode hireg loreg) (hireg : loreg : accumUsed)
3747 _ -> -- only one or no regs left
3748 passArguments args [] fprs (stackOffset'+8)
3752 passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
3753 | reg : _ <- regs = do
3754 register <- getRegister arg
3755 let code = case register of
3756 Fixed _ freg fcode -> fcode `snocOL` MR reg freg
3757 Any _ acode -> acode reg
3761 #if darwin_TARGET_OS
3762 -- The Darwin ABI requires that we reserve stack slots for register parameters
3763 (stackOffset + stackBytes)
3764 #elif linux_TARGET_OS
3765 -- ... the SysV ABI doesn't.
3768 (accumCode `appOL` code)
3771 (vr, code) <- getSomeReg arg
3775 (stackOffset' + stackBytes)
3776 (accumCode `appOL` code `snocOL` ST rep vr stackSlot)
3779 #if darwin_TARGET_OS
3780 -- stackOffset is at least 4-byte aligned
3781 -- The Darwin ABI is happy with that.
3782 stackOffset' = stackOffset
3784 -- ... the SysV ABI requires 8-byte alignment for doubles.
3785 stackOffset' | rep == F64 = roundTo 8 stackOffset
3786 | otherwise = stackOffset
3788 stackSlot = AddrRegImm sp (ImmInt stackOffset')
3789 (nGprs, nFprs, stackBytes, regs) = case rep of
3790 I32 -> (1, 0, 4, gprs)
3791 #if darwin_TARGET_OS
3792 -- The Darwin ABI requires that we skip a corresponding number of GPRs when
3794 F32 -> (1, 1, 4, fprs)
3795 F64 -> (2, 1, 8, fprs)
3796 #elif linux_TARGET_OS
3797 -- ... the SysV ABI doesn't.
3798 F32 -> (0, 1, 4, fprs)
3799 F64 -> (0, 1, 8, fprs)
3802 moveResult reduceToF32 =
3806 | reduceToF32 && rep == F32 -> unitOL (FRSP r_dest f1)
3807 | rep == F32 || rep == F64 -> unitOL (MR r_dest f1)
3808 | rep == I64 -> toOL [MR (getHiVRegFromLo r_dest) r3,
3810 | otherwise -> unitOL (MR r_dest r3)
3811 where rep = cmmRegRep (CmmLocal dest)
3812 r_dest = getRegisterReg (CmmLocal dest)
3814 outOfLineFloatOp mop =
3816 dflags <- getDynFlagsNat
3817 mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference $
3818 mkForeignLabel functionName Nothing True
3819 let mopLabelOrExpr = case mopExpr of
3820 CmmLit (CmmLabel lbl) -> Left lbl
3822 return (mopLabelOrExpr, reduce)
3824 (functionName, reduce) = case mop of
3825 MO_F32_Exp -> (FSLIT("exp"), True)
3826 MO_F32_Log -> (FSLIT("log"), True)
3827 MO_F32_Sqrt -> (FSLIT("sqrt"), True)
3829 MO_F32_Sin -> (FSLIT("sin"), True)
3830 MO_F32_Cos -> (FSLIT("cos"), True)
3831 MO_F32_Tan -> (FSLIT("tan"), True)
3833 MO_F32_Asin -> (FSLIT("asin"), True)
3834 MO_F32_Acos -> (FSLIT("acos"), True)
3835 MO_F32_Atan -> (FSLIT("atan"), True)
3837 MO_F32_Sinh -> (FSLIT("sinh"), True)
3838 MO_F32_Cosh -> (FSLIT("cosh"), True)
3839 MO_F32_Tanh -> (FSLIT("tanh"), True)
3840 MO_F32_Pwr -> (FSLIT("pow"), True)
3842 MO_F64_Exp -> (FSLIT("exp"), False)
3843 MO_F64_Log -> (FSLIT("log"), False)
3844 MO_F64_Sqrt -> (FSLIT("sqrt"), False)
3846 MO_F64_Sin -> (FSLIT("sin"), False)
3847 MO_F64_Cos -> (FSLIT("cos"), False)
3848 MO_F64_Tan -> (FSLIT("tan"), False)
3850 MO_F64_Asin -> (FSLIT("asin"), False)
3851 MO_F64_Acos -> (FSLIT("acos"), False)
3852 MO_F64_Atan -> (FSLIT("atan"), False)
3854 MO_F64_Sinh -> (FSLIT("sinh"), False)
3855 MO_F64_Cosh -> (FSLIT("cosh"), False)
3856 MO_F64_Tanh -> (FSLIT("tanh"), False)
3857 MO_F64_Pwr -> (FSLIT("pow"), False)
3858 other -> pprPanic "genCCall(ppc): unknown callish op"
3859 (pprCallishMachOp other)
3861 #endif /* darwin_TARGET_OS || linux_TARGET_OS */
3863 #endif /* powerpc_TARGET_ARCH */
3866 -- -----------------------------------------------------------------------------
3867 -- Generating a table-branch
3869 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
3871 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3875 (reg,e_code) <- getSomeReg expr
3876 lbl <- getNewLabelNat
3877 dflags <- getDynFlagsNat
3878 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
3879 (tableReg,t_code) <- getSomeReg $ dynRef
3881 jumpTable = map jumpTableEntryRel ids
3883 jumpTableEntryRel Nothing
3884 = CmmStaticLit (CmmInt 0 wordRep)
3885 jumpTableEntryRel (Just (BlockId id))
3886 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3887 where blockLabel = mkAsmTempLabel id
3889 op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
3890 (EAIndex reg wORD_SIZE) (ImmInt 0))
3892 #if x86_64_TARGET_ARCH && darwin_TARGET_OS
3893 -- on Mac OS X/x86_64, put the jump table in the text section
3894 -- to work around a limitation of the linker.
3895 -- ld64 is unable to handle the relocations for
3897 -- if L0 is not preceded by a non-anonymous label in its section.
3899 code = e_code `appOL` t_code `appOL` toOL [
3900 ADD wordRep op (OpReg tableReg),
3901 JMP_TBL (OpReg tableReg) [ id | Just id <- ids ],
3902 LDATA Text (CmmDataLabel lbl : jumpTable)
3905 code = e_code `appOL` t_code `appOL` toOL [
3906 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3907 ADD wordRep op (OpReg tableReg),
3908 JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
3914 (reg,e_code) <- getSomeReg expr
3915 lbl <- getNewLabelNat
3917 jumpTable = map jumpTableEntry ids
3918 op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
3919 code = e_code `appOL` toOL [
3920 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3921 JMP_TBL op [ id | Just id <- ids ]
3925 #elif powerpc_TARGET_ARCH
3929 (reg,e_code) <- getSomeReg expr
3930 tmp <- getNewRegNat I32
3931 lbl <- getNewLabelNat
3932 dflags <- getDynFlagsNat
3933 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
3934 (tableReg,t_code) <- getSomeReg $ dynRef
3936 jumpTable = map jumpTableEntryRel ids
3938 jumpTableEntryRel Nothing
3939 = CmmStaticLit (CmmInt 0 wordRep)
3940 jumpTableEntryRel (Just (BlockId id))
3941 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3942 where blockLabel = mkAsmTempLabel id
3944 code = e_code `appOL` t_code `appOL` toOL [
3945 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3946 SLW tmp reg (RIImm (ImmInt 2)),
3947 LD I32 tmp (AddrRegReg tableReg tmp),
3948 ADD tmp tmp (RIReg tableReg),
3950 BCTR [ id | Just id <- ids ]
3955 (reg,e_code) <- getSomeReg expr
3956 tmp <- getNewRegNat I32
3957 lbl <- getNewLabelNat
3959 jumpTable = map jumpTableEntry ids
3961 code = e_code `appOL` toOL [
3962 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3963 SLW tmp reg (RIImm (ImmInt 2)),
3964 ADDIS tmp tmp (HA (ImmCLbl lbl)),
3965 LD I32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
3967 BCTR [ id | Just id <- ids ]
3971 genSwitch expr ids = panic "ToDo: genSwitch"
3974 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordRep)
3975 jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
3976 where blockLabel = mkAsmTempLabel id
3978 -- -----------------------------------------------------------------------------
3980 -- -----------------------------------------------------------------------------
3983 -- -----------------------------------------------------------------------------
3984 -- 'condIntReg' and 'condFltReg': condition codes into registers
3986 -- Turn those condition codes into integers now (when they appear on
3987 -- the right hand side of an assignment).
3989 -- (If applicable) Do not fill the delay slots here; you will confuse the
3990 -- register allocator.
3992 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
3994 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3996 #if alpha_TARGET_ARCH
3997 condIntReg = panic "MachCode.condIntReg (not on Alpha)"
3998 condFltReg = panic "MachCode.condFltReg (not on Alpha)"
3999 #endif /* alpha_TARGET_ARCH */
4001 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4003 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4005 condIntReg cond x y = do
4006 CondCode _ cond cond_code <- condIntCode cond x y
4007 tmp <- getNewRegNat I8
4009 code dst = cond_code `appOL` toOL [
4010 SETCC cond (OpReg tmp),
4011 MOVZxL I8 (OpReg tmp) (OpReg dst)
4014 return (Any I32 code)
4018 #if i386_TARGET_ARCH
4020 condFltReg cond x y = do
4021 CondCode _ cond cond_code <- condFltCode cond x y
4022 tmp <- getNewRegNat I8
4024 code dst = cond_code `appOL` toOL [
4025 SETCC cond (OpReg tmp),
4026 MOVZxL I8 (OpReg tmp) (OpReg dst)
4029 return (Any I32 code)
4033 #if x86_64_TARGET_ARCH
4035 condFltReg cond x y = do
4036 CondCode _ cond cond_code <- condFltCode cond x y
4037 tmp1 <- getNewRegNat wordRep
4038 tmp2 <- getNewRegNat wordRep
4040 -- We have to worry about unordered operands (eg. comparisons
4041 -- against NaN). If the operands are unordered, the comparison
4042 -- sets the parity flag, carry flag and zero flag.
4043 -- All comparisons are supposed to return false for unordered
4044 -- operands except for !=, which returns true.
4046 -- Optimisation: we don't have to test the parity flag if we
4047 -- know the test has already excluded the unordered case: eg >
4048 -- and >= test for a zero carry flag, which can only occur for
4049 -- ordered operands.
4051 -- ToDo: by reversing comparisons we could avoid testing the
4052 -- parity flag in more cases.
4057 NE -> or_unordered dst
4058 GU -> plain_test dst
4059 GEU -> plain_test dst
4060 _ -> and_ordered dst)
4062 plain_test dst = toOL [
4063 SETCC cond (OpReg tmp1),
4064 MOVZxL I8 (OpReg tmp1) (OpReg dst)
4066 or_unordered dst = toOL [
4067 SETCC cond (OpReg tmp1),
4068 SETCC PARITY (OpReg tmp2),
4069 OR I8 (OpReg tmp1) (OpReg tmp2),
4070 MOVZxL I8 (OpReg tmp2) (OpReg dst)
4072 and_ordered dst = toOL [
4073 SETCC cond (OpReg tmp1),
4074 SETCC NOTPARITY (OpReg tmp2),
4075 AND I8 (OpReg tmp1) (OpReg tmp2),
4076 MOVZxL I8 (OpReg tmp2) (OpReg dst)
4079 return (Any I32 code)
4083 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4085 #if sparc_TARGET_ARCH
4087 condIntReg EQQ x (CmmLit (CmmInt 0 d)) = do
4088 (src, code) <- getSomeReg x
4089 tmp <- getNewRegNat I32
4091 code__2 dst = code `appOL` toOL [
4092 SUB False True g0 (RIReg src) g0,
4093 SUB True False g0 (RIImm (ImmInt (-1))) dst]
4094 return (Any I32 code__2)
4096 condIntReg EQQ x y = do
4097 (src1, code1) <- getSomeReg x
4098 (src2, code2) <- getSomeReg y
4099 tmp1 <- getNewRegNat I32
4100 tmp2 <- getNewRegNat I32
4102 code__2 dst = code1 `appOL` code2 `appOL` toOL [
4103 XOR False src1 (RIReg src2) dst,
4104 SUB False True g0 (RIReg dst) g0,
4105 SUB True False g0 (RIImm (ImmInt (-1))) dst]
4106 return (Any I32 code__2)
4108 condIntReg NE x (CmmLit (CmmInt 0 d)) = do
4109 (src, code) <- getSomeReg x
4110 tmp <- getNewRegNat I32
4112 code__2 dst = code `appOL` toOL [
4113 SUB False True g0 (RIReg src) g0,
4114 ADD True False g0 (RIImm (ImmInt 0)) dst]
4115 return (Any I32 code__2)
4117 condIntReg NE x y = do
4118 (src1, code1) <- getSomeReg x
4119 (src2, code2) <- getSomeReg y
4120 tmp1 <- getNewRegNat I32
4121 tmp2 <- getNewRegNat I32
4123 code__2 dst = code1 `appOL` code2 `appOL` toOL [
4124 XOR False src1 (RIReg src2) dst,
4125 SUB False True g0 (RIReg dst) g0,
4126 ADD True False g0 (RIImm (ImmInt 0)) dst]
4127 return (Any I32 code__2)
4129 condIntReg cond x y = do
4130 BlockId lbl1 <- getBlockIdNat
4131 BlockId lbl2 <- getBlockIdNat
4132 CondCode _ cond cond_code <- condIntCode cond x y
4134 code__2 dst = cond_code `appOL` toOL [
4135 BI cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
4136 OR False g0 (RIImm (ImmInt 0)) dst,
4137 BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
4138 NEWBLOCK (BlockId lbl1),
4139 OR False g0 (RIImm (ImmInt 1)) dst,
4140 NEWBLOCK (BlockId lbl2)]
4141 return (Any I32 code__2)
4143 condFltReg cond x y = do
4144 BlockId lbl1 <- getBlockIdNat
4145 BlockId lbl2 <- getBlockIdNat
4146 CondCode _ cond cond_code <- condFltCode cond x y
4148 code__2 dst = cond_code `appOL` toOL [
4150 BF cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
4151 OR False g0 (RIImm (ImmInt 0)) dst,
4152 BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
4153 NEWBLOCK (BlockId lbl1),
4154 OR False g0 (RIImm (ImmInt 1)) dst,
4155 NEWBLOCK (BlockId lbl2)]
4156 return (Any I32 code__2)
4158 #endif /* sparc_TARGET_ARCH */
4160 #if powerpc_TARGET_ARCH
4161 condReg getCond = do
4162 lbl1 <- getBlockIdNat
4163 lbl2 <- getBlockIdNat
4164 CondCode _ cond cond_code <- getCond
4166 {- code dst = cond_code `appOL` toOL [
4175 code dst = cond_code
4179 RLWINM dst dst (bit + 1) 31 31
4182 negate_code | do_negate = unitOL (CRNOR bit bit bit)
4185 (bit, do_negate) = case cond of
4199 return (Any I32 code)
4201 condIntReg cond x y = condReg (condIntCode cond x y)
4202 condFltReg cond x y = condReg (condFltCode cond x y)
4203 #endif /* powerpc_TARGET_ARCH */
4206 -- -----------------------------------------------------------------------------
4207 -- 'trivial*Code': deal with trivial instructions
4209 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
4210 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
4211 -- Only look for constants on the right hand side, because that's
4212 -- where the generic optimizer will have put them.
4214 -- Similarly, for unary instructions, we don't have to worry about
4215 -- matching an StInt as the argument, because genericOpt will already
4216 -- have handled the constant-folding.
4220 -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
4221 ,IF_ARCH_i386 ((Operand -> Operand -> Instr)
4222 -> Maybe (Operand -> Operand -> Instr)
4223 ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr)
4224 -> Maybe (Operand -> Operand -> Instr)
4225 ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
4226 ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
4228 -> CmmExpr -> CmmExpr -- the two arguments
4231 #ifndef powerpc_TARGET_ARCH
4234 -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
4235 ,IF_ARCH_sparc((MachRep -> Reg -> Reg -> Reg -> Instr)
4236 ,IF_ARCH_i386 ((MachRep -> Reg -> Reg -> Reg -> Instr)
4237 ,IF_ARCH_x86_64 ((MachRep -> Operand -> Operand -> Instr)
4239 -> CmmExpr -> CmmExpr -- the two arguments
4245 -> IF_ARCH_alpha((RI -> Reg -> Instr)
4246 ,IF_ARCH_i386 ((Operand -> Instr)
4247 ,IF_ARCH_x86_64 ((Operand -> Instr)
4248 ,IF_ARCH_sparc((RI -> Reg -> Instr)
4249 ,IF_ARCH_powerpc((Reg -> Reg -> Instr)
4251 -> CmmExpr -- the one argument
4254 #ifndef powerpc_TARGET_ARCH
4257 -> IF_ARCH_alpha((Reg -> Reg -> Instr)
4258 ,IF_ARCH_i386 ((Reg -> Reg -> Instr)
4259 ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr)
4260 ,IF_ARCH_sparc((Reg -> Reg -> Instr)
4262 -> CmmExpr -- the one argument
4266 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4268 #if alpha_TARGET_ARCH
4270 trivialCode instr x (StInt y)
4272 = getRegister x `thenNat` \ register ->
4273 getNewRegNat IntRep `thenNat` \ tmp ->
4275 code = registerCode register tmp
4276 src1 = registerName register tmp
4277 src2 = ImmInt (fromInteger y)
4278 code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst)
4280 return (Any IntRep code__2)
4282 trivialCode instr x y
4283 = getRegister x `thenNat` \ register1 ->
4284 getRegister y `thenNat` \ register2 ->
4285 getNewRegNat IntRep `thenNat` \ tmp1 ->
4286 getNewRegNat IntRep `thenNat` \ tmp2 ->
4288 code1 = registerCode register1 tmp1 []
4289 src1 = registerName register1 tmp1
4290 code2 = registerCode register2 tmp2 []
4291 src2 = registerName register2 tmp2
4292 code__2 dst = asmSeqThen [code1, code2] .
4293 mkSeqInstr (instr src1 (RIReg src2) dst)
4295 return (Any IntRep code__2)
4298 trivialUCode instr x
4299 = getRegister x `thenNat` \ register ->
4300 getNewRegNat IntRep `thenNat` \ tmp ->
4302 code = registerCode register tmp
4303 src = registerName register tmp
4304 code__2 dst = code . mkSeqInstr (instr (RIReg src) dst)
4306 return (Any IntRep code__2)
4309 trivialFCode _ instr x y
4310 = getRegister x `thenNat` \ register1 ->
4311 getRegister y `thenNat` \ register2 ->
4312 getNewRegNat F64 `thenNat` \ tmp1 ->
4313 getNewRegNat F64 `thenNat` \ tmp2 ->
4315 code1 = registerCode register1 tmp1
4316 src1 = registerName register1 tmp1
4318 code2 = registerCode register2 tmp2
4319 src2 = registerName register2 tmp2
4321 code__2 dst = asmSeqThen [code1 [], code2 []] .
4322 mkSeqInstr (instr src1 src2 dst)
4324 return (Any F64 code__2)
4326 trivialUFCode _ instr x
4327 = getRegister x `thenNat` \ register ->
4328 getNewRegNat F64 `thenNat` \ tmp ->
4330 code = registerCode register tmp
4331 src = registerName register tmp
4332 code__2 dst = code . mkSeqInstr (instr src dst)
4334 return (Any F64 code__2)
4336 #endif /* alpha_TARGET_ARCH */
4338 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4340 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4343 The Rules of the Game are:
4345 * You cannot assume anything about the destination register dst;
4346 it may be anything, including a fixed reg.
4348 * You may compute an operand into a fixed reg, but you may not
4349 subsequently change the contents of that fixed reg. If you
4350 want to do so, first copy the value either to a temporary
4351 or into dst. You are free to modify dst even if it happens
4352 to be a fixed reg -- that's not your problem.
4354 * You cannot assume that a fixed reg will stay live over an
4355 arbitrary computation. The same applies to the dst reg.
4357 * Temporary regs obtained from getNewRegNat are distinct from
4358 each other and from all other regs, and stay live over
4359 arbitrary computations.
4361 --------------------
4363 SDM's version of The Rules:
4365 * If getRegister returns Any, that means it can generate correct
4366 code which places the result in any register, period. Even if that
4367 register happens to be read during the computation.
4369 Corollary #1: this means that if you are generating code for an
4370 operation with two arbitrary operands, you cannot assign the result
4371 of the first operand into the destination register before computing
4372 the second operand. The second operand might require the old value
4373 of the destination register.
4375 Corollary #2: A function might be able to generate more efficient
4376 code if it knows the destination register is a new temporary (and
4377 therefore not read by any of the sub-computations).
4379 * If getRegister returns Any, then the code it generates may modify only:
4380 (a) fresh temporaries
4381 (b) the destination register
4382 (c) known registers (eg. %ecx is used by shifts)
4383 In particular, it may *not* modify global registers, unless the global
4384 register happens to be the destination register.
4387 trivialCode rep instr (Just revinstr) (CmmLit lit_a) b
4388 | not (is64BitLit lit_a) = do
4389 b_code <- getAnyReg b
4392 = b_code dst `snocOL`
4393 revinstr (OpImm (litToImm lit_a)) (OpReg dst)
4395 return (Any rep code)
4397 trivialCode rep instr maybe_revinstr a b = genTrivialCode rep instr a b
4399 -- This is re-used for floating pt instructions too.
4400 genTrivialCode rep instr a b = do
4401 (b_op, b_code) <- getNonClobberedOperand b
4402 a_code <- getAnyReg a
4403 tmp <- getNewRegNat rep
4405 -- We want the value of b to stay alive across the computation of a.
4406 -- But, we want to calculate a straight into the destination register,
4407 -- because the instruction only has two operands (dst := dst `op` src).
4408 -- The troublesome case is when the result of b is in the same register
4409 -- as the destination reg. In this case, we have to save b in a
4410 -- new temporary across the computation of a.
4412 | dst `regClashesWithOp` b_op =
4414 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
4416 instr (OpReg tmp) (OpReg dst)
4420 instr b_op (OpReg dst)
4422 return (Any rep code)
4424 reg `regClashesWithOp` OpReg reg2 = reg == reg2
4425 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
4426 reg `regClashesWithOp` _ = False
4430 trivialUCode rep instr x = do
4431 x_code <- getAnyReg x
4437 return (Any rep code)
4441 #if i386_TARGET_ARCH
4443 trivialFCode pk instr x y = do
4444 (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
4445 (y_reg, y_code) <- getSomeReg y
4450 instr pk x_reg y_reg dst
4452 return (Any pk code)
4456 #if x86_64_TARGET_ARCH
4458 trivialFCode pk instr x y = genTrivialCode pk (instr pk) x y
4464 trivialUFCode rep instr x = do
4465 (x_reg, x_code) <- getSomeReg x
4471 return (Any rep code)
4473 #endif /* i386_TARGET_ARCH */
4475 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4477 #if sparc_TARGET_ARCH
4479 trivialCode pk instr x (CmmLit (CmmInt y d))
4482 (src1, code) <- getSomeReg x
4483 tmp <- getNewRegNat I32
4485 src2 = ImmInt (fromInteger y)
4486 code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
4487 return (Any I32 code__2)
4489 trivialCode pk instr x y = do
4490 (src1, code1) <- getSomeReg x
4491 (src2, code2) <- getSomeReg y
4492 tmp1 <- getNewRegNat I32
4493 tmp2 <- getNewRegNat I32
4495 code__2 dst = code1 `appOL` code2 `snocOL`
4496 instr src1 (RIReg src2) dst
4497 return (Any I32 code__2)
4500 trivialFCode pk instr x y = do
4501 (src1, code1) <- getSomeReg x
4502 (src2, code2) <- getSomeReg y
4503 tmp1 <- getNewRegNat (cmmExprRep x)
4504 tmp2 <- getNewRegNat (cmmExprRep y)
4505 tmp <- getNewRegNat F64
4507 promote x = FxTOy F32 F64 x tmp
4514 code1 `appOL` code2 `snocOL`
4515 instr pk src1 src2 dst
4516 else if pk1 == F32 then
4517 code1 `snocOL` promote src1 `appOL` code2 `snocOL`
4518 instr F64 tmp src2 dst
4520 code1 `appOL` code2 `snocOL` promote src2 `snocOL`
4521 instr F64 src1 tmp dst
4522 return (Any (if pk1 == pk2 then pk1 else F64) code__2)
4525 trivialUCode pk instr x = do
4526 (src, code) <- getSomeReg x
4527 tmp <- getNewRegNat pk
4529 code__2 dst = code `snocOL` instr (RIReg src) dst
4530 return (Any pk code__2)
4533 trivialUFCode pk instr x = do
4534 (src, code) <- getSomeReg x
4535 tmp <- getNewRegNat pk
4537 code__2 dst = code `snocOL` instr src dst
4538 return (Any pk code__2)
4540 #endif /* sparc_TARGET_ARCH */
4542 #if powerpc_TARGET_ARCH
4545 Wolfgang's PowerPC version of The Rules:
4547 A slightly modified version of The Rules to take advantage of the fact
4548 that PowerPC instructions work on all registers and don't implicitly
4549 clobber any fixed registers.
4551 * The only expression for which getRegister returns Fixed is (CmmReg reg).
4553 * If getRegister returns Any, then the code it generates may modify only:
4554 (a) fresh temporaries
4555 (b) the destination register
4556 It may *not* modify global registers, unless the global
4557 register happens to be the destination register.
4558 It may not clobber any other registers. In fact, only ccalls clobber any
4560 Also, it may not modify the counter register (used by genCCall).
4562 Corollary: If a getRegister for a subexpression returns Fixed, you need
4563 not move it to a fresh temporary before evaluating the next subexpression.
4564 The Fixed register won't be modified.
4565 Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
4567 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
4568 the value of the destination register.
4571 trivialCode rep signed instr x (CmmLit (CmmInt y _))
4572 | Just imm <- makeImmediate rep signed y
4574 (src1, code1) <- getSomeReg x
4575 let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
4576 return (Any rep code)
4578 trivialCode rep signed instr x y = do
4579 (src1, code1) <- getSomeReg x
4580 (src2, code2) <- getSomeReg y
4581 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
4582 return (Any rep code)
4584 trivialCodeNoImm :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4585 -> CmmExpr -> CmmExpr -> NatM Register
4586 trivialCodeNoImm rep instr x y = do
4587 (src1, code1) <- getSomeReg x
4588 (src2, code2) <- getSomeReg y
4589 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
4590 return (Any rep code)
4592 trivialUCode rep instr x = do
4593 (src, code) <- getSomeReg x
4594 let code' dst = code `snocOL` instr dst src
4595 return (Any rep code')
4597 -- There is no "remainder" instruction on the PPC, so we have to do
4599 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
4601 remainderCode :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4602 -> CmmExpr -> CmmExpr -> NatM Register
4603 remainderCode rep div x y = do
4604 (src1, code1) <- getSomeReg x
4605 (src2, code2) <- getSomeReg y
4606 let code dst = code1 `appOL` code2 `appOL` toOL [
4608 MULLW dst dst (RIReg src2),
4611 return (Any rep code)
4613 #endif /* powerpc_TARGET_ARCH */
4616 -- -----------------------------------------------------------------------------
4617 -- Coercing to/from integer/floating-point...
4619 -- When going to integer, we truncate (round towards 0).
4621 -- @coerce(Int2FP|FP2Int)@ are more complicated integer/float
4622 -- conversions. We have to store temporaries in memory to move
4623 -- between the integer and the floating point register sets.
4625 -- @coerceDbl2Flt@ and @coerceFlt2Dbl@ are done this way because we
4626 -- pretend, on sparc at least, that double and float regs are seperate
4627 -- kinds, so the value has to be computed into one kind before being
4628 -- explicitly "converted" to live in the other kind.
4630 coerceInt2FP :: MachRep -> MachRep -> CmmExpr -> NatM Register
4631 coerceFP2Int :: MachRep -> MachRep -> CmmExpr -> NatM Register
4633 #if sparc_TARGET_ARCH
4634 coerceDbl2Flt :: CmmExpr -> NatM Register
4635 coerceFlt2Dbl :: CmmExpr -> NatM Register
4638 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4640 #if alpha_TARGET_ARCH
4643 = getRegister x `thenNat` \ register ->
4644 getNewRegNat IntRep `thenNat` \ reg ->
4646 code = registerCode register reg
4647 src = registerName register reg
4649 code__2 dst = code . mkSeqInstrs [
4651 LD TF dst (spRel 0),
4654 return (Any F64 code__2)
4658 = getRegister x `thenNat` \ register ->
4659 getNewRegNat F64 `thenNat` \ tmp ->
4661 code = registerCode register tmp
4662 src = registerName register tmp
4664 code__2 dst = code . mkSeqInstrs [
4666 ST TF tmp (spRel 0),
4669 return (Any IntRep code__2)
4671 #endif /* alpha_TARGET_ARCH */
4673 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4675 #if i386_TARGET_ARCH
4677 coerceInt2FP from to x = do
4678 (x_reg, x_code) <- getSomeReg x
4680 opc = case to of F32 -> GITOF; F64 -> GITOD
4681 code dst = x_code `snocOL` opc x_reg dst
4682 -- ToDo: works for non-I32 reps?
4684 return (Any to code)
4688 coerceFP2Int from to x = do
4689 (x_reg, x_code) <- getSomeReg x
4691 opc = case from of F32 -> GFTOI; F64 -> GDTOI
4692 code dst = x_code `snocOL` opc x_reg dst
4693 -- ToDo: works for non-I32 reps?
4695 return (Any to code)
4697 #endif /* i386_TARGET_ARCH */
4699 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4701 #if x86_64_TARGET_ARCH
4703 coerceFP2Int from to x = do
4704 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
4706 opc = case from of F32 -> CVTTSS2SIQ; F64 -> CVTTSD2SIQ
4707 code dst = x_code `snocOL` opc x_op dst
4709 return (Any to code) -- works even if the destination rep is <I32
4711 coerceInt2FP from to x = do
4712 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
4714 opc = case to of F32 -> CVTSI2SS; F64 -> CVTSI2SD
4715 code dst = x_code `snocOL` opc x_op dst
4717 return (Any to code) -- works even if the destination rep is <I32
4719 coerceFP2FP :: MachRep -> CmmExpr -> NatM Register
4720 coerceFP2FP to x = do
4721 (x_reg, x_code) <- getSomeReg x
4723 opc = case to of F32 -> CVTSD2SS; F64 -> CVTSS2SD
4724 code dst = x_code `snocOL` opc x_reg dst
4726 return (Any to code)
4730 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4732 #if sparc_TARGET_ARCH
4734 coerceInt2FP pk1 pk2 x = do
4735 (src, code) <- getSomeReg x
4737 code__2 dst = code `appOL` toOL [
4738 ST pk1 src (spRel (-2)),
4739 LD pk1 (spRel (-2)) dst,
4740 FxTOy pk1 pk2 dst dst]
4741 return (Any pk2 code__2)
4744 coerceFP2Int pk fprep x = do
4745 (src, code) <- getSomeReg x
4746 reg <- getNewRegNat fprep
4747 tmp <- getNewRegNat pk
4749 code__2 dst = ASSERT(fprep == F64 || fprep == F32)
4751 FxTOy fprep pk src tmp,
4752 ST pk tmp (spRel (-2)),
4753 LD pk (spRel (-2)) dst]
4754 return (Any pk code__2)
4757 coerceDbl2Flt x = do
4758 (src, code) <- getSomeReg x
4759 return (Any F32 (\dst -> code `snocOL` FxTOy F64 F32 src dst))
4762 coerceFlt2Dbl x = do
4763 (src, code) <- getSomeReg x
4764 return (Any F64 (\dst -> code `snocOL` FxTOy F32 F64 src dst))
4766 #endif /* sparc_TARGET_ARCH */
4768 #if powerpc_TARGET_ARCH
4769 coerceInt2FP fromRep toRep x = do
4770 (src, code) <- getSomeReg x
4771 lbl <- getNewLabelNat
4772 itmp <- getNewRegNat I32
4773 ftmp <- getNewRegNat F64
4774 dflags <- getDynFlagsNat
4775 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
4776 Amode addr addr_code <- getAmode dynRef
4778 code' dst = code `appOL` maybe_exts `appOL` toOL [
4781 CmmStaticLit (CmmInt 0x43300000 I32),
4782 CmmStaticLit (CmmInt 0x80000000 I32)],
4783 XORIS itmp src (ImmInt 0x8000),
4784 ST I32 itmp (spRel 3),
4785 LIS itmp (ImmInt 0x4330),
4786 ST I32 itmp (spRel 2),
4787 LD F64 ftmp (spRel 2)
4788 ] `appOL` addr_code `appOL` toOL [
4790 FSUB F64 dst ftmp dst
4791 ] `appOL` maybe_frsp dst
4793 maybe_exts = case fromRep of
4794 I8 -> unitOL $ EXTS I8 src src
4795 I16 -> unitOL $ EXTS I16 src src
4797 maybe_frsp dst = case toRep of
4798 F32 -> unitOL $ FRSP dst dst
4800 return (Any toRep code')
4802 coerceFP2Int fromRep toRep x = do
4803 -- the reps don't really matter: F*->F64 and I32->I* are no-ops
4804 (src, code) <- getSomeReg x
4805 tmp <- getNewRegNat F64
4807 code' dst = code `appOL` toOL [
4808 -- convert to int in FP reg
4810 -- store value (64bit) from FP to stack
4811 ST F64 tmp (spRel 2),
4812 -- read low word of value (high word is undefined)
4813 LD I32 dst (spRel 3)]
4814 return (Any toRep code')
4815 #endif /* powerpc_TARGET_ARCH */
4818 -- -----------------------------------------------------------------------------
4819 -- eXTRA_STK_ARGS_HERE
4821 -- We (allegedly) put the first six C-call arguments in registers;
4822 -- where do we start putting the rest of them?
4824 -- Moved from MachInstrs (SDM):
4826 #if alpha_TARGET_ARCH || sparc_TARGET_ARCH
4827 eXTRA_STK_ARGS_HERE :: Int
4829 = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???))