2 -- The above warning supression flag is a temporary kludge.
3 -- While working on this module you are encouraged to remove it and fix
4 -- any warnings in the module. See
5 -- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
8 -----------------------------------------------------------------------------
10 -- Generating machine code (instruction selection)
12 -- (c) The University of Glasgow 1996-2004
14 -----------------------------------------------------------------------------
16 -- This is a big module, but, if you pay attention to
17 -- (a) the sectioning, (b) the type signatures, and
18 -- (c) the #if blah_TARGET_ARCH} things, the
19 -- structure should not be too overwhelming.
21 module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where
23 #include "HsVersions.h"
24 #include "nativeGen/NCG.h"
31 import PositionIndependentCode
32 import RegAllocInfo ( mkBranchInstr )
34 -- Our intermediate code:
35 import PprCmm ( pprExpr )
39 import ClosureInfo ( C_SRT(..) )
42 import StaticFlags ( opt_PIC )
43 import ForeignCall ( CCallConv(..) )
48 import FastBool ( isFastTrue )
49 import Constants ( wORD_SIZE )
51 import Debug.Trace ( trace )
53 import Control.Monad ( mapAndUnzipM )
54 import Data.Maybe ( fromJust )
59 -- -----------------------------------------------------------------------------
60 -- Top-level of the instruction selector
62 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
63 -- They are really trees of insns to facilitate fast appending, where a
64 -- left-to-right traversal (pre-order?) yields the insns in the correct
67 type InstrBlock = OrdList Instr
69 cmmTopCodeGen :: RawCmmTop -> NatM [NatCmmTop]
70 cmmTopCodeGen (CmmProc info lab params (ListGraph blocks)) = do
71 (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
72 picBaseMb <- getPicBaseMaybeNat
73 let proc = CmmProc info lab params (ListGraph $ concat nat_blocks)
74 tops = proc : concat statics
76 Just picBase -> initializePicBase picBase tops
77 Nothing -> return tops
79 cmmTopCodeGen (CmmData sec dat) = do
80 return [CmmData sec dat] -- no translation, we just use CmmStatic
82 basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop])
83 basicBlockCodeGen (BasicBlock id stmts) = do
84 instrs <- stmtsToInstrs stmts
85 -- code generation may introduce new basic block boundaries, which
86 -- are indicated by the NEWBLOCK instruction. We must split up the
87 -- instruction stream into basic blocks again. Also, we extract
90 (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
92 mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
93 = ([], BasicBlock id instrs : blocks, statics)
94 mkBlocks (LDATA sec dat) (instrs,blocks,statics)
95 = (instrs, blocks, CmmData sec dat:statics)
96 mkBlocks instr (instrs,blocks,statics)
97 = (instr:instrs, blocks, statics)
99 return (BasicBlock id top : other_blocks, statics)
101 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
103 = do instrss <- mapM stmtToInstrs stmts
104 return (concatOL instrss)
106 stmtToInstrs :: CmmStmt -> NatM InstrBlock
107 stmtToInstrs stmt = case stmt of
108 CmmNop -> return nilOL
109 CmmComment s -> return (unitOL (COMMENT s))
112 | isFloatingRep kind -> assignReg_FltCode kind reg src
113 #if WORD_SIZE_IN_BITS==32
114 | kind == I64 -> assignReg_I64Code reg src
116 | otherwise -> assignReg_IntCode kind reg src
117 where kind = cmmRegRep reg
120 | isFloatingRep kind -> assignMem_FltCode kind addr src
121 #if WORD_SIZE_IN_BITS==32
122 | kind == I64 -> assignMem_I64Code addr src
124 | otherwise -> assignMem_IntCode kind addr src
125 where kind = cmmExprRep src
127 CmmCall target result_regs args _ _
128 -> genCCall target result_regs args
130 CmmBranch id -> genBranch id
131 CmmCondBranch arg id -> genCondJump id arg
132 CmmSwitch arg ids -> genSwitch arg ids
133 CmmJump arg params -> genJump arg
135 -- -----------------------------------------------------------------------------
136 -- General things for putting together code sequences
138 -- Expand CmmRegOff. ToDo: should we do it this way around, or convert
139 -- CmmExprs into CmmRegOff?
140 mangleIndexTree :: CmmExpr -> CmmExpr
141 mangleIndexTree (CmmRegOff reg off)
142 = CmmMachOp (MO_Add rep) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) rep)]
143 where rep = cmmRegRep reg
145 -- -----------------------------------------------------------------------------
146 -- Code gen for 64-bit arithmetic on 32-bit platforms
149 Simple support for generating 64-bit code (ie, 64 bit values and 64
150 bit assignments) on 32-bit platforms. Unlike the main code generator
151 we merely shoot for generating working code as simply as possible, and
152 pay little attention to code quality. Specifically, there is no
153 attempt to deal cleverly with the fixed-vs-floating register
154 distinction; all values are generated into (pairs of) floating
155 registers, even if this would mean some redundant reg-reg moves as a
156 result. Only one of the VRegUniques is returned, since it will be
157 of the VRegUniqueLo form, and the upper-half VReg can be determined
158 by applying getHiVRegFromLo to it.
161 data ChildCode64 -- a.k.a "Register64"
164 Reg -- the lower 32-bit temporary which contains the
165 -- result; use getHiVRegFromLo to find the other
166 -- VRegUnique. Rules of this simplified insn
167 -- selection game are therefore that the returned
168 -- Reg may be modified
170 #if WORD_SIZE_IN_BITS==32
171 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
172 assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
175 #ifndef x86_64_TARGET_ARCH
176 iselExpr64 :: CmmExpr -> NatM ChildCode64
179 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
183 assignMem_I64Code addrTree valueTree = do
184 Amode addr addr_code <- getAmode addrTree
185 ChildCode64 vcode rlo <- iselExpr64 valueTree
187 rhi = getHiVRegFromLo rlo
189 -- Little-endian store
190 mov_lo = MOV I32 (OpReg rlo) (OpAddr addr)
191 mov_hi = MOV I32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
193 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
196 assignReg_I64Code (CmmLocal (LocalReg u_dst pk _)) valueTree = do
197 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
199 r_dst_lo = mkVReg u_dst I32
200 r_dst_hi = getHiVRegFromLo r_dst_lo
201 r_src_hi = getHiVRegFromLo r_src_lo
202 mov_lo = MOV I32 (OpReg r_src_lo) (OpReg r_dst_lo)
203 mov_hi = MOV I32 (OpReg r_src_hi) (OpReg r_dst_hi)
206 vcode `snocOL` mov_lo `snocOL` mov_hi
209 assignReg_I64Code lvalue valueTree
210 = panic "assignReg_I64Code(i386): invalid lvalue"
214 iselExpr64 (CmmLit (CmmInt i _)) = do
215 (rlo,rhi) <- getNewRegPairNat I32
217 r = fromIntegral (fromIntegral i :: Word32)
218 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
220 MOV I32 (OpImm (ImmInteger r)) (OpReg rlo),
221 MOV I32 (OpImm (ImmInteger q)) (OpReg rhi)
224 return (ChildCode64 code rlo)
226 iselExpr64 (CmmLoad addrTree I64) = do
227 Amode addr addr_code <- getAmode addrTree
228 (rlo,rhi) <- getNewRegPairNat I32
230 mov_lo = MOV I32 (OpAddr addr) (OpReg rlo)
231 mov_hi = MOV I32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
234 ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
238 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64 _)))
239 = return (ChildCode64 nilOL (mkVReg vu I32))
241 -- we handle addition, but rather badly
242 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
243 ChildCode64 code1 r1lo <- iselExpr64 e1
244 (rlo,rhi) <- getNewRegPairNat I32
246 r = fromIntegral (fromIntegral i :: Word32)
247 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
248 r1hi = getHiVRegFromLo r1lo
250 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
251 ADD I32 (OpImm (ImmInteger r)) (OpReg rlo),
252 MOV I32 (OpReg r1hi) (OpReg rhi),
253 ADC I32 (OpImm (ImmInteger q)) (OpReg rhi) ]
255 return (ChildCode64 code rlo)
257 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
258 ChildCode64 code1 r1lo <- iselExpr64 e1
259 ChildCode64 code2 r2lo <- iselExpr64 e2
260 (rlo,rhi) <- getNewRegPairNat I32
262 r1hi = getHiVRegFromLo r1lo
263 r2hi = getHiVRegFromLo r2lo
266 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
267 ADD I32 (OpReg r2lo) (OpReg rlo),
268 MOV I32 (OpReg r1hi) (OpReg rhi),
269 ADC I32 (OpReg r2hi) (OpReg rhi) ]
271 return (ChildCode64 code rlo)
273 iselExpr64 (CmmMachOp (MO_U_Conv _ I64) [expr]) = do
275 r_dst_lo <- getNewRegNat I32
276 let r_dst_hi = getHiVRegFromLo r_dst_lo
279 ChildCode64 (code `snocOL`
280 MOV I32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
285 = pprPanic "iselExpr64(i386)" (ppr expr)
287 #endif /* i386_TARGET_ARCH */
289 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
291 #if sparc_TARGET_ARCH
293 assignMem_I64Code addrTree valueTree = do
294 Amode addr addr_code <- getAmode addrTree
295 ChildCode64 vcode rlo <- iselExpr64 valueTree
296 (src, code) <- getSomeReg addrTree
298 rhi = getHiVRegFromLo rlo
300 mov_hi = ST I32 rhi (AddrRegImm src (ImmInt 0))
301 mov_lo = ST I32 rlo (AddrRegImm src (ImmInt 4))
302 return (vcode `appOL` code `snocOL` mov_hi `snocOL` mov_lo)
304 assignReg_I64Code (CmmLocal (LocalReg u_dst pk _)) valueTree = do
305 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
307 r_dst_lo = mkVReg u_dst pk
308 r_dst_hi = getHiVRegFromLo r_dst_lo
309 r_src_hi = getHiVRegFromLo r_src_lo
310 mov_lo = mkMOV r_src_lo r_dst_lo
311 mov_hi = mkMOV r_src_hi r_dst_hi
312 mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
313 return (vcode `snocOL` mov_hi `snocOL` mov_lo)
314 assignReg_I64Code lvalue valueTree
315 = panic "assignReg_I64Code(sparc): invalid lvalue"
318 -- Don't delete this -- it's very handy for debugging.
320 -- | trace ("iselExpr64: " ++ showSDoc (ppr expr)) False
321 -- = panic "iselExpr64(???)"
323 iselExpr64 (CmmLoad addrTree I64) = do
324 Amode (AddrRegReg r1 r2) addr_code <- getAmode addrTree
325 rlo <- getNewRegNat I32
326 let rhi = getHiVRegFromLo rlo
327 mov_hi = LD I32 (AddrRegImm r1 (ImmInt 0)) rhi
328 mov_lo = LD I32 (AddrRegImm r1 (ImmInt 4)) rlo
330 ChildCode64 (addr_code `snocOL` mov_hi `snocOL` mov_lo)
334 iselExpr64 (CmmReg (CmmLocal (LocalReg uq I64 _))) = do
335 r_dst_lo <- getNewRegNat I32
336 let r_dst_hi = getHiVRegFromLo r_dst_lo
337 r_src_lo = mkVReg uq I32
338 r_src_hi = getHiVRegFromLo r_src_lo
339 mov_lo = mkMOV r_src_lo r_dst_lo
340 mov_hi = mkMOV r_src_hi r_dst_hi
341 mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
343 ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo
347 = pprPanic "iselExpr64(sparc)" (ppr expr)
349 #endif /* sparc_TARGET_ARCH */
351 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
353 #if powerpc_TARGET_ARCH
355 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
356 getI64Amodes addrTree = do
357 Amode hi_addr addr_code <- getAmode addrTree
358 case addrOffset hi_addr 4 of
359 Just lo_addr -> return (hi_addr, lo_addr, addr_code)
360 Nothing -> do (hi_ptr, code) <- getSomeReg addrTree
361 return (AddrRegImm hi_ptr (ImmInt 0),
362 AddrRegImm hi_ptr (ImmInt 4),
365 assignMem_I64Code addrTree valueTree = do
366 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
367 ChildCode64 vcode rlo <- iselExpr64 valueTree
369 rhi = getHiVRegFromLo rlo
372 mov_hi = ST I32 rhi hi_addr
373 mov_lo = ST I32 rlo lo_addr
375 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
377 assignReg_I64Code (CmmLocal (LocalReg u_dst pk _)) valueTree = do
378 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
380 r_dst_lo = mkVReg u_dst I32
381 r_dst_hi = getHiVRegFromLo r_dst_lo
382 r_src_hi = getHiVRegFromLo r_src_lo
383 mov_lo = MR r_dst_lo r_src_lo
384 mov_hi = MR r_dst_hi r_src_hi
387 vcode `snocOL` mov_lo `snocOL` mov_hi
390 assignReg_I64Code lvalue valueTree
391 = panic "assignReg_I64Code(powerpc): invalid lvalue"
394 -- Don't delete this -- it's very handy for debugging.
396 -- | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
397 -- = panic "iselExpr64(???)"
399 iselExpr64 (CmmLoad addrTree I64) = do
400 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
401 (rlo, rhi) <- getNewRegPairNat I32
402 let mov_hi = LD I32 rhi hi_addr
403 mov_lo = LD I32 rlo lo_addr
404 return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
407 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64 _)))
408 = return (ChildCode64 nilOL (mkVReg vu I32))
410 iselExpr64 (CmmLit (CmmInt i _)) = do
411 (rlo,rhi) <- getNewRegPairNat I32
413 half0 = fromIntegral (fromIntegral i :: Word16)
414 half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
415 half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
416 half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
419 LIS rlo (ImmInt half1),
420 OR rlo rlo (RIImm $ ImmInt half0),
421 LIS rhi (ImmInt half3),
422 OR rlo rlo (RIImm $ ImmInt half2)
425 return (ChildCode64 code rlo)
427 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
428 ChildCode64 code1 r1lo <- iselExpr64 e1
429 ChildCode64 code2 r2lo <- iselExpr64 e2
430 (rlo,rhi) <- getNewRegPairNat I32
432 r1hi = getHiVRegFromLo r1lo
433 r2hi = getHiVRegFromLo r2lo
436 toOL [ ADDC rlo r1lo r2lo,
439 return (ChildCode64 code rlo)
441 iselExpr64 (CmmMachOp (MO_U_Conv I32 I64) [expr]) = do
442 (expr_reg,expr_code) <- getSomeReg expr
443 (rlo, rhi) <- getNewRegPairNat I32
444 let mov_hi = LI rhi (ImmInt 0)
445 mov_lo = MR rlo expr_reg
446 return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
449 = pprPanic "iselExpr64(powerpc)" (ppr expr)
451 #endif /* powerpc_TARGET_ARCH */
454 -- -----------------------------------------------------------------------------
455 -- The 'Register' type
457 -- 'Register's passed up the tree. If the stix code forces the register
458 -- to live in a pre-decided machine register, it comes out as @Fixed@;
459 -- otherwise, it comes out as @Any@, and the parent can decide which
460 -- register to put it in.
463 = Fixed MachRep Reg InstrBlock
464 | Any MachRep (Reg -> InstrBlock)
466 swizzleRegisterRep :: Register -> MachRep -> Register
467 swizzleRegisterRep (Fixed _ reg code) rep = Fixed rep reg code
468 swizzleRegisterRep (Any _ codefn) rep = Any rep codefn
471 -- -----------------------------------------------------------------------------
472 -- Utils based on getRegister, below
474 -- The dual to getAnyReg: compute an expression into a register, but
475 -- we don't mind which one it is.
476 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
478 r <- getRegister expr
481 tmp <- getNewRegNat rep
482 return (tmp, code tmp)
486 -- -----------------------------------------------------------------------------
487 -- Grab the Reg for a CmmReg
489 getRegisterReg :: CmmReg -> Reg
491 getRegisterReg (CmmLocal (LocalReg u pk _))
494 getRegisterReg (CmmGlobal mid)
495 = case get_GlobalReg_reg_or_addr mid of
496 Left (RealReg rrno) -> RealReg rrno
497 _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
498 -- By this stage, the only MagicIds remaining should be the
499 -- ones which map to a real machine register on this
500 -- platform. Hence ...
503 -- -----------------------------------------------------------------------------
504 -- Generate code to get a subtree into a Register
506 -- Don't delete this -- it's very handy for debugging.
508 -- | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False
509 -- = panic "getRegister(???)"
511 getRegister :: CmmExpr -> NatM Register
513 #if !x86_64_TARGET_ARCH
514 -- on x86_64, we have %rip for PicBaseReg, but it's not a full-featured
515 -- register, it can only be used for rip-relative addressing.
516 getRegister (CmmReg (CmmGlobal PicBaseReg))
518 reg <- getPicBaseNat wordRep
519 return (Fixed wordRep reg nilOL)
522 getRegister (CmmReg reg)
523 = return (Fixed (cmmRegRep reg) (getRegisterReg reg) nilOL)
525 getRegister tree@(CmmRegOff _ _)
526 = getRegister (mangleIndexTree tree)
529 #if WORD_SIZE_IN_BITS==32
530 -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
531 -- TO_W_(x), TO_W_(x >> 32)
533 getRegister (CmmMachOp (MO_U_Conv I64 I32)
534 [CmmMachOp (MO_U_Shr I64) [x,CmmLit (CmmInt 32 _)]]) = do
535 ChildCode64 code rlo <- iselExpr64 x
536 return $ Fixed I32 (getHiVRegFromLo rlo) code
538 getRegister (CmmMachOp (MO_S_Conv I64 I32)
539 [CmmMachOp (MO_U_Shr I64) [x,CmmLit (CmmInt 32 _)]]) = do
540 ChildCode64 code rlo <- iselExpr64 x
541 return $ Fixed I32 (getHiVRegFromLo rlo) code
543 getRegister (CmmMachOp (MO_U_Conv I64 I32) [x]) = do
544 ChildCode64 code rlo <- iselExpr64 x
545 return $ Fixed I32 rlo code
547 getRegister (CmmMachOp (MO_S_Conv I64 I32) [x]) = do
548 ChildCode64 code rlo <- iselExpr64 x
549 return $ Fixed I32 rlo code
553 -- end of machine-"independent" bit; here we go on the rest...
555 #if alpha_TARGET_ARCH
557 getRegister (StDouble d)
558 = getBlockIdNat `thenNat` \ lbl ->
559 getNewRegNat PtrRep `thenNat` \ tmp ->
560 let code dst = mkSeqInstrs [
561 LDATA RoDataSegment lbl [
562 DATA TF [ImmLab (rational d)]
564 LDA tmp (AddrImm (ImmCLbl lbl)),
565 LD TF dst (AddrReg tmp)]
567 return (Any F64 code)
569 getRegister (StPrim primop [x]) -- unary PrimOps
571 IntNegOp -> trivialUCode (NEG Q False) x
573 NotOp -> trivialUCode NOT x
575 FloatNegOp -> trivialUFCode FloatRep (FNEG TF) x
576 DoubleNegOp -> trivialUFCode F64 (FNEG TF) x
578 OrdOp -> coerceIntCode IntRep x
581 Float2IntOp -> coerceFP2Int x
582 Int2FloatOp -> coerceInt2FP pr x
583 Double2IntOp -> coerceFP2Int x
584 Int2DoubleOp -> coerceInt2FP pr x
586 Double2FloatOp -> coerceFltCode x
587 Float2DoubleOp -> coerceFltCode x
589 other_op -> getRegister (StCall fn CCallConv F64 [x])
591 fn = case other_op of
592 FloatExpOp -> fsLit "exp"
593 FloatLogOp -> fsLit "log"
594 FloatSqrtOp -> fsLit "sqrt"
595 FloatSinOp -> fsLit "sin"
596 FloatCosOp -> fsLit "cos"
597 FloatTanOp -> fsLit "tan"
598 FloatAsinOp -> fsLit "asin"
599 FloatAcosOp -> fsLit "acos"
600 FloatAtanOp -> fsLit "atan"
601 FloatSinhOp -> fsLit "sinh"
602 FloatCoshOp -> fsLit "cosh"
603 FloatTanhOp -> fsLit "tanh"
604 DoubleExpOp -> fsLit "exp"
605 DoubleLogOp -> fsLit "log"
606 DoubleSqrtOp -> fsLit "sqrt"
607 DoubleSinOp -> fsLit "sin"
608 DoubleCosOp -> fsLit "cos"
609 DoubleTanOp -> fsLit "tan"
610 DoubleAsinOp -> fsLit "asin"
611 DoubleAcosOp -> fsLit "acos"
612 DoubleAtanOp -> fsLit "atan"
613 DoubleSinhOp -> fsLit "sinh"
614 DoubleCoshOp -> fsLit "cosh"
615 DoubleTanhOp -> fsLit "tanh"
617 pr = panic "MachCode.getRegister: no primrep needed for Alpha"
619 getRegister (StPrim primop [x, y]) -- dyadic PrimOps
621 CharGtOp -> trivialCode (CMP LTT) y x
622 CharGeOp -> trivialCode (CMP LE) y x
623 CharEqOp -> trivialCode (CMP EQQ) x y
624 CharNeOp -> int_NE_code x y
625 CharLtOp -> trivialCode (CMP LTT) x y
626 CharLeOp -> trivialCode (CMP LE) x y
628 IntGtOp -> trivialCode (CMP LTT) y x
629 IntGeOp -> trivialCode (CMP LE) y x
630 IntEqOp -> trivialCode (CMP EQQ) x y
631 IntNeOp -> int_NE_code x y
632 IntLtOp -> trivialCode (CMP LTT) x y
633 IntLeOp -> trivialCode (CMP LE) x y
635 WordGtOp -> trivialCode (CMP ULT) y x
636 WordGeOp -> trivialCode (CMP ULE) x y
637 WordEqOp -> trivialCode (CMP EQQ) x y
638 WordNeOp -> int_NE_code x y
639 WordLtOp -> trivialCode (CMP ULT) x y
640 WordLeOp -> trivialCode (CMP ULE) x y
642 AddrGtOp -> trivialCode (CMP ULT) y x
643 AddrGeOp -> trivialCode (CMP ULE) y x
644 AddrEqOp -> trivialCode (CMP EQQ) x y
645 AddrNeOp -> int_NE_code x y
646 AddrLtOp -> trivialCode (CMP ULT) x y
647 AddrLeOp -> trivialCode (CMP ULE) x y
649 FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
650 FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
651 FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
652 FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
653 FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
654 FloatLeOp -> cmpF_code (FCMP TF LE) NE x y
656 DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
657 DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
658 DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
659 DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
660 DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
661 DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y
663 IntAddOp -> trivialCode (ADD Q False) x y
664 IntSubOp -> trivialCode (SUB Q False) x y
665 IntMulOp -> trivialCode (MUL Q False) x y
666 IntQuotOp -> trivialCode (DIV Q False) x y
667 IntRemOp -> trivialCode (REM Q False) x y
669 WordAddOp -> trivialCode (ADD Q False) x y
670 WordSubOp -> trivialCode (SUB Q False) x y
671 WordMulOp -> trivialCode (MUL Q False) x y
672 WordQuotOp -> trivialCode (DIV Q True) x y
673 WordRemOp -> trivialCode (REM Q True) x y
675 FloatAddOp -> trivialFCode FloatRep (FADD TF) x y
676 FloatSubOp -> trivialFCode FloatRep (FSUB TF) x y
677 FloatMulOp -> trivialFCode FloatRep (FMUL TF) x y
678 FloatDivOp -> trivialFCode FloatRep (FDIV TF) x y
680 DoubleAddOp -> trivialFCode F64 (FADD TF) x y
681 DoubleSubOp -> trivialFCode F64 (FSUB TF) x y
682 DoubleMulOp -> trivialFCode F64 (FMUL TF) x y
683 DoubleDivOp -> trivialFCode F64 (FDIV TF) x y
685 AddrAddOp -> trivialCode (ADD Q False) x y
686 AddrSubOp -> trivialCode (SUB Q False) x y
687 AddrRemOp -> trivialCode (REM Q True) x y
689 AndOp -> trivialCode AND x y
690 OrOp -> trivialCode OR x y
691 XorOp -> trivialCode XOR x y
692 SllOp -> trivialCode SLL x y
693 SrlOp -> trivialCode SRL x y
695 ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
696 ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
697 ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"
699 FloatPowerOp -> getRegister (StCall (fsLit "pow") CCallConv F64 [x,y])
700 DoublePowerOp -> getRegister (StCall (fsLit "pow") CCallConv F64 [x,y])
702 {- ------------------------------------------------------------
703 Some bizarre special code for getting condition codes into
704 registers. Integer non-equality is a test for equality
705 followed by an XOR with 1. (Integer comparisons always set
706 the result register to 0 or 1.) Floating point comparisons of
707 any kind leave the result in a floating point register, so we
708 need to wrangle an integer register out of things.
710 int_NE_code :: StixTree -> StixTree -> NatM Register
713 = trivialCode (CMP EQQ) x y `thenNat` \ register ->
714 getNewRegNat IntRep `thenNat` \ tmp ->
716 code = registerCode register tmp
717 src = registerName register tmp
718 code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
720 return (Any IntRep code__2)
722 {- ------------------------------------------------------------
723 Comments for int_NE_code also apply to cmpF_code
726 :: (Reg -> Reg -> Reg -> Instr)
728 -> StixTree -> StixTree
731 cmpF_code instr cond x y
732 = trivialFCode pr instr x y `thenNat` \ register ->
733 getNewRegNat F64 `thenNat` \ tmp ->
734 getBlockIdNat `thenNat` \ lbl ->
736 code = registerCode register tmp
737 result = registerName register tmp
739 code__2 dst = code . mkSeqInstrs [
740 OR zeroh (RIImm (ImmInt 1)) dst,
741 BF cond result (ImmCLbl lbl),
742 OR zeroh (RIReg zeroh) dst,
745 return (Any IntRep code__2)
747 pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
748 ------------------------------------------------------------
750 getRegister (CmmLoad pk mem)
751 = getAmode mem `thenNat` \ amode ->
753 code = amodeCode amode
754 src = amodeAddr amode
755 size = primRepToSize pk
756 code__2 dst = code . mkSeqInstr (LD size dst src)
758 return (Any pk code__2)
760 getRegister (StInt i)
763 code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
765 return (Any IntRep code)
768 code dst = mkSeqInstr (LDI Q dst src)
770 return (Any IntRep code)
772 src = ImmInt (fromInteger i)
777 code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
779 return (Any PtrRep code)
782 imm__2 = case imm of Just x -> x
784 #endif /* alpha_TARGET_ARCH */
786 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
790 getRegister (CmmLit (CmmFloat f F32)) = do
791 lbl <- getNewLabelNat
792 dflags <- getDynFlagsNat
793 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
794 Amode addr addr_code <- getAmode dynRef
798 CmmStaticLit (CmmFloat f F32)]
799 `consOL` (addr_code `snocOL`
802 return (Any F32 code)
805 getRegister (CmmLit (CmmFloat d F64))
807 = let code dst = unitOL (GLDZ dst)
808 in return (Any F64 code)
811 = let code dst = unitOL (GLD1 dst)
812 in return (Any F64 code)
815 lbl <- getNewLabelNat
816 dflags <- getDynFlagsNat
817 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
818 Amode addr addr_code <- getAmode dynRef
822 CmmStaticLit (CmmFloat d F64)]
823 `consOL` (addr_code `snocOL`
826 return (Any F64 code)
828 #endif /* i386_TARGET_ARCH */
830 #if x86_64_TARGET_ARCH
832 getRegister (CmmLit (CmmFloat 0.0 rep)) = do
833 let code dst = unitOL (XOR rep (OpReg dst) (OpReg dst))
834 -- I don't know why there are xorpd, xorps, and pxor instructions.
835 -- They all appear to do the same thing --SDM
836 return (Any rep code)
838 getRegister (CmmLit (CmmFloat f rep)) = do
839 lbl <- getNewLabelNat
840 let code dst = toOL [
843 CmmStaticLit (CmmFloat f rep)],
844 MOV rep (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
847 return (Any rep code)
849 #endif /* x86_64_TARGET_ARCH */
851 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
853 -- catch simple cases of zero- or sign-extended load
854 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad addr _]) = do
855 code <- intLoadCode (MOVZxL I8) addr
856 return (Any I32 code)
858 getRegister (CmmMachOp (MO_S_Conv I8 I32) [CmmLoad addr _]) = do
859 code <- intLoadCode (MOVSxL I8) addr
860 return (Any I32 code)
862 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad addr _]) = do
863 code <- intLoadCode (MOVZxL I16) addr
864 return (Any I32 code)
866 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad addr _]) = do
867 code <- intLoadCode (MOVSxL I16) addr
868 return (Any I32 code)
872 #if x86_64_TARGET_ARCH
874 -- catch simple cases of zero- or sign-extended load
875 getRegister (CmmMachOp (MO_U_Conv I8 I64) [CmmLoad addr _]) = do
876 code <- intLoadCode (MOVZxL I8) addr
877 return (Any I64 code)
879 getRegister (CmmMachOp (MO_S_Conv I8 I64) [CmmLoad addr _]) = do
880 code <- intLoadCode (MOVSxL I8) addr
881 return (Any I64 code)
883 getRegister (CmmMachOp (MO_U_Conv I16 I64) [CmmLoad addr _]) = do
884 code <- intLoadCode (MOVZxL I16) addr
885 return (Any I64 code)
887 getRegister (CmmMachOp (MO_S_Conv I16 I64) [CmmLoad addr _]) = do
888 code <- intLoadCode (MOVSxL I16) addr
889 return (Any I64 code)
891 getRegister (CmmMachOp (MO_U_Conv I32 I64) [CmmLoad addr _]) = do
892 code <- intLoadCode (MOV I32) addr -- 32-bit loads zero-extend
893 return (Any I64 code)
895 getRegister (CmmMachOp (MO_S_Conv I32 I64) [CmmLoad addr _]) = do
896 code <- intLoadCode (MOVSxL I32) addr
897 return (Any I64 code)
901 #if x86_64_TARGET_ARCH
902 getRegister (CmmMachOp (MO_Add I64) [CmmReg (CmmGlobal PicBaseReg),
903 CmmLit displacement])
904 = return $ Any I64 (\dst -> unitOL $
905 LEA I64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
908 #if x86_64_TARGET_ARCH
909 getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do
910 x_code <- getAnyReg x
911 lbl <- getNewLabelNat
913 code dst = x_code dst `appOL` toOL [
914 -- This is how gcc does it, so it can't be that bad:
915 LDATA ReadOnlyData16 [
918 CmmStaticLit (CmmInt 0x80000000 I32),
919 CmmStaticLit (CmmInt 0 I32),
920 CmmStaticLit (CmmInt 0 I32),
921 CmmStaticLit (CmmInt 0 I32)
923 XOR F32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
924 -- xorps, so we need the 128-bit constant
925 -- ToDo: rip-relative
928 return (Any F32 code)
930 getRegister (CmmMachOp (MO_S_Neg F64) [x]) = do
931 x_code <- getAnyReg x
932 lbl <- getNewLabelNat
934 -- This is how gcc does it, so it can't be that bad:
935 code dst = x_code dst `appOL` toOL [
936 LDATA ReadOnlyData16 [
939 CmmStaticLit (CmmInt 0x8000000000000000 I64),
940 CmmStaticLit (CmmInt 0 I64)
942 -- gcc puts an unpck here. Wonder if we need it.
943 XOR F64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
944 -- xorpd, so we need the 128-bit constant
947 return (Any F64 code)
950 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
952 getRegister (CmmMachOp mop [x]) -- unary MachOps
955 MO_S_Neg F32 -> trivialUFCode F32 (GNEG F32) x
956 MO_S_Neg F64 -> trivialUFCode F64 (GNEG F64) x
959 MO_S_Neg rep -> trivialUCode rep (NEGI rep) x
960 MO_Not rep -> trivialUCode rep (NOT rep) x
963 MO_U_Conv I32 I8 -> toI8Reg I32 x
964 MO_S_Conv I32 I8 -> toI8Reg I32 x
965 MO_U_Conv I16 I8 -> toI8Reg I16 x
966 MO_S_Conv I16 I8 -> toI8Reg I16 x
967 MO_U_Conv I32 I16 -> toI16Reg I32 x
968 MO_S_Conv I32 I16 -> toI16Reg I32 x
969 #if x86_64_TARGET_ARCH
970 MO_U_Conv I64 I32 -> conversionNop I64 x
971 MO_S_Conv I64 I32 -> conversionNop I64 x
972 MO_U_Conv I64 I16 -> toI16Reg I64 x
973 MO_S_Conv I64 I16 -> toI16Reg I64 x
974 MO_U_Conv I64 I8 -> toI8Reg I64 x
975 MO_S_Conv I64 I8 -> toI8Reg I64 x
978 MO_U_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
979 MO_S_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
982 MO_U_Conv I8 I32 -> integerExtend I8 I32 MOVZxL x
983 MO_U_Conv I16 I32 -> integerExtend I16 I32 MOVZxL x
984 MO_U_Conv I8 I16 -> integerExtend I8 I16 MOVZxL x
986 MO_S_Conv I8 I32 -> integerExtend I8 I32 MOVSxL x
987 MO_S_Conv I16 I32 -> integerExtend I16 I32 MOVSxL x
988 MO_S_Conv I8 I16 -> integerExtend I8 I16 MOVSxL x
990 #if x86_64_TARGET_ARCH
991 MO_U_Conv I8 I64 -> integerExtend I8 I64 MOVZxL x
992 MO_U_Conv I16 I64 -> integerExtend I16 I64 MOVZxL x
993 MO_U_Conv I32 I64 -> integerExtend I32 I64 MOVZxL x
994 MO_S_Conv I8 I64 -> integerExtend I8 I64 MOVSxL x
995 MO_S_Conv I16 I64 -> integerExtend I16 I64 MOVSxL x
996 MO_S_Conv I32 I64 -> integerExtend I32 I64 MOVSxL x
997 -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
998 -- However, we don't want the register allocator to throw it
999 -- away as an unnecessary reg-to-reg move, so we keep it in
1000 -- the form of a movzl and print it as a movl later.
1003 #if i386_TARGET_ARCH
1004 MO_S_Conv F32 F64 -> conversionNop F64 x
1005 MO_S_Conv F64 F32 -> conversionNop F32 x
1007 MO_S_Conv F32 F64 -> coerceFP2FP F64 x
1008 MO_S_Conv F64 F32 -> coerceFP2FP F32 x
1012 | isFloatingRep from -> coerceFP2Int from to x
1013 | isFloatingRep to -> coerceInt2FP from to x
1015 other -> pprPanic "getRegister" (pprMachOp mop)
1017 -- signed or unsigned extension.
1018 integerExtend from to instr expr = do
1019 (reg,e_code) <- if from == I8 then getByteReg expr
1020 else getSomeReg expr
1024 instr from (OpReg reg) (OpReg dst)
1025 return (Any to code)
1027 toI8Reg new_rep expr
1028 = do codefn <- getAnyReg expr
1029 return (Any new_rep codefn)
1030 -- HACK: use getAnyReg to get a byte-addressable register.
1031 -- If the source was a Fixed register, this will add the
1032 -- mov instruction to put it into the desired destination.
1033 -- We're assuming that the destination won't be a fixed
1034 -- non-byte-addressable register; it won't be, because all
1035 -- fixed registers are word-sized.
1037 toI16Reg = toI8Reg -- for now
1039 conversionNop new_rep expr
1040 = do e_code <- getRegister expr
1041 return (swizzleRegisterRep e_code new_rep)
1044 getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
1046 MO_Eq F32 -> condFltReg EQQ x y
1047 MO_Ne F32 -> condFltReg NE x y
1048 MO_S_Gt F32 -> condFltReg GTT x y
1049 MO_S_Ge F32 -> condFltReg GE x y
1050 MO_S_Lt F32 -> condFltReg LTT x y
1051 MO_S_Le F32 -> condFltReg LE x y
1053 MO_Eq F64 -> condFltReg EQQ x y
1054 MO_Ne F64 -> condFltReg NE x y
1055 MO_S_Gt F64 -> condFltReg GTT x y
1056 MO_S_Ge F64 -> condFltReg GE x y
1057 MO_S_Lt F64 -> condFltReg LTT x y
1058 MO_S_Le F64 -> condFltReg LE x y
1060 MO_Eq rep -> condIntReg EQQ x y
1061 MO_Ne rep -> condIntReg NE x y
1063 MO_S_Gt rep -> condIntReg GTT x y
1064 MO_S_Ge rep -> condIntReg GE x y
1065 MO_S_Lt rep -> condIntReg LTT x y
1066 MO_S_Le rep -> condIntReg LE x y
1068 MO_U_Gt rep -> condIntReg GU x y
1069 MO_U_Ge rep -> condIntReg GEU x y
1070 MO_U_Lt rep -> condIntReg LU x y
1071 MO_U_Le rep -> condIntReg LEU x y
1073 #if i386_TARGET_ARCH
1074 MO_Add F32 -> trivialFCode F32 GADD x y
1075 MO_Sub F32 -> trivialFCode F32 GSUB x y
1077 MO_Add F64 -> trivialFCode F64 GADD x y
1078 MO_Sub F64 -> trivialFCode F64 GSUB x y
1080 MO_S_Quot F32 -> trivialFCode F32 GDIV x y
1081 MO_S_Quot F64 -> trivialFCode F64 GDIV x y
1084 #if x86_64_TARGET_ARCH
1085 MO_Add F32 -> trivialFCode F32 ADD x y
1086 MO_Sub F32 -> trivialFCode F32 SUB x y
1088 MO_Add F64 -> trivialFCode F64 ADD x y
1089 MO_Sub F64 -> trivialFCode F64 SUB x y
1091 MO_S_Quot F32 -> trivialFCode F32 FDIV x y
1092 MO_S_Quot F64 -> trivialFCode F64 FDIV x y
1095 MO_Add rep -> add_code rep x y
1096 MO_Sub rep -> sub_code rep x y
1098 MO_S_Quot rep -> div_code rep True True x y
1099 MO_S_Rem rep -> div_code rep True False x y
1100 MO_U_Quot rep -> div_code rep False True x y
1101 MO_U_Rem rep -> div_code rep False False x y
1103 #if i386_TARGET_ARCH
1104 MO_Mul F32 -> trivialFCode F32 GMUL x y
1105 MO_Mul F64 -> trivialFCode F64 GMUL x y
1108 #if x86_64_TARGET_ARCH
1109 MO_Mul F32 -> trivialFCode F32 MUL x y
1110 MO_Mul F64 -> trivialFCode F64 MUL x y
1113 MO_Mul rep -> let op = IMUL rep in
1114 trivialCode rep op (Just op) x y
1116 MO_S_MulMayOflo rep -> imulMayOflo rep x y
1118 MO_And rep -> let op = AND rep in
1119 trivialCode rep op (Just op) x y
1120 MO_Or rep -> let op = OR rep in
1121 trivialCode rep op (Just op) x y
1122 MO_Xor rep -> let op = XOR rep in
1123 trivialCode rep op (Just op) x y
1125 {- Shift ops on x86s have constraints on their source, it
1126 either has to be Imm, CL or 1
1127 => trivialCode is not restrictive enough (sigh.)
1129 MO_Shl rep -> shift_code rep (SHL rep) x y {-False-}
1130 MO_U_Shr rep -> shift_code rep (SHR rep) x y {-False-}
1131 MO_S_Shr rep -> shift_code rep (SAR rep) x y {-False-}
1133 other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
1135 --------------------
1136 imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1137 imulMayOflo rep a b = do
1138 (a_reg, a_code) <- getNonClobberedReg a
1139 b_code <- getAnyReg b
1141 shift_amt = case rep of
1144 _ -> panic "shift_amt"
1146 code = a_code `appOL` b_code eax `appOL`
1148 IMUL2 rep (OpReg a_reg), -- result in %edx:%eax
1149 SAR rep (OpImm (ImmInt shift_amt)) (OpReg eax),
1150 -- sign extend lower part
1151 SUB rep (OpReg edx) (OpReg eax)
1152 -- compare against upper
1153 -- eax==0 if high part == sign extended low part
1156 return (Fixed rep eax code)
1158 --------------------
1159 shift_code :: MachRep
1160 -> (Operand -> Operand -> Instr)
1165 {- Case1: shift length as immediate -}
1166 shift_code rep instr x y@(CmmLit lit) = do
1167 x_code <- getAnyReg x
1170 = x_code dst `snocOL`
1171 instr (OpImm (litToImm lit)) (OpReg dst)
1173 return (Any rep code)
1175 {- Case2: shift length is complex (non-immediate)
1176 * y must go in %ecx.
1177 * we cannot do y first *and* put its result in %ecx, because
1178 %ecx might be clobbered by x.
1179 * if we do y second, then x cannot be
1180 in a clobbered reg. Also, we cannot clobber x's reg
1181 with the instruction itself.
1183 - do y first, put its result in a fresh tmp, then copy it to %ecx later
1184 - do y second and put its result into %ecx. x gets placed in a fresh
1185 tmp. This is likely to be better, becuase the reg alloc can
1186 eliminate this reg->reg move here (it won't eliminate the other one,
1187 because the move is into the fixed %ecx).
1189 shift_code rep instr x y{-amount-} = do
1190 x_code <- getAnyReg x
1191 tmp <- getNewRegNat rep
1192 y_code <- getAnyReg y
1194 code = x_code tmp `appOL`
1196 instr (OpReg ecx) (OpReg tmp)
1198 return (Fixed rep tmp code)
1200 --------------------
1201 add_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1202 add_code rep x (CmmLit (CmmInt y _))
1203 | not (is64BitInteger y) = add_int rep x y
1204 add_code rep x y = trivialCode rep (ADD rep) (Just (ADD rep)) x y
1206 --------------------
1207 sub_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1208 sub_code rep x (CmmLit (CmmInt y _))
1209 | not (is64BitInteger (-y)) = add_int rep x (-y)
1210 sub_code rep x y = trivialCode rep (SUB rep) Nothing x y
1212 -- our three-operand add instruction:
1213 add_int rep x y = do
1214 (x_reg, x_code) <- getSomeReg x
1216 imm = ImmInt (fromInteger y)
1220 (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
1223 return (Any rep code)
1225 ----------------------
1226 div_code rep signed quotient x y = do
1227 (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
1228 x_code <- getAnyReg x
1230 widen | signed = CLTD rep
1231 | otherwise = XOR rep (OpReg edx) (OpReg edx)
1233 instr | signed = IDIV
1236 code = y_code `appOL`
1238 toOL [widen, instr rep y_op]
1240 result | quotient = eax
1244 return (Fixed rep result code)
1247 getRegister (CmmLoad mem pk)
1250 Amode src mem_code <- getAmode mem
1252 code dst = mem_code `snocOL`
1253 IF_ARCH_i386(GLD pk src dst,
1254 MOV pk (OpAddr src) (OpReg dst))
1256 return (Any pk code)
1258 #if i386_TARGET_ARCH
1259 getRegister (CmmLoad mem pk)
1262 code <- intLoadCode (instr pk) mem
1263 return (Any pk code)
1265 instr I8 = MOVZxL pk
1268 -- we always zero-extend 8-bit loads, if we
1269 -- can't think of anything better. This is because
1270 -- we can't guarantee access to an 8-bit variant of every register
1271 -- (esi and edi don't have 8-bit variants), so to make things
1272 -- simpler we do our 8-bit arithmetic with full 32-bit registers.
1275 #if x86_64_TARGET_ARCH
1276 -- Simpler memory load code on x86_64
1277 getRegister (CmmLoad mem pk)
1279 code <- intLoadCode (MOV pk) mem
1280 return (Any pk code)
1283 getRegister (CmmLit (CmmInt 0 rep))
1285 -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
1286 adj_rep = case rep of I64 -> I32; _ -> rep
1287 rep1 = IF_ARCH_i386( rep, adj_rep )
1289 = unitOL (XOR rep1 (OpReg dst) (OpReg dst))
1291 return (Any rep code)
1293 #if x86_64_TARGET_ARCH
1294 -- optimisation for loading small literals on x86_64: take advantage
1295 -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
1296 -- instruction forms are shorter.
1297 getRegister (CmmLit lit)
1298 | I64 <- cmmLitRep lit, not (isBigLit lit)
1301 code dst = unitOL (MOV I32 (OpImm imm) (OpReg dst))
1303 return (Any I64 code)
1305 isBigLit (CmmInt i I64) = i < 0 || i > 0xffffffff
1307 -- note1: not the same as is64BitLit, because that checks for
1308 -- signed literals that fit in 32 bits, but we want unsigned
1310 -- note2: all labels are small, because we're assuming the
1311 -- small memory model (see gcc docs, -mcmodel=small).
1314 getRegister (CmmLit lit)
1318 code dst = unitOL (MOV rep (OpImm imm) (OpReg dst))
1320 return (Any rep code)
1322 getRegister other = pprPanic "getRegister(x86)" (ppr other)
1325 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
1326 -> NatM (Reg -> InstrBlock)
1327 intLoadCode instr mem = do
1328 Amode src mem_code <- getAmode mem
1329 return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
1331 -- Compute an expression into *any* register, adding the appropriate
1332 -- move instruction if necessary.
1333 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
1335 r <- getRegister expr
1338 anyReg :: Register -> NatM (Reg -> InstrBlock)
1339 anyReg (Any _ code) = return code
1340 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
1342 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
1343 -- Fixed registers might not be byte-addressable, so we make sure we've
1344 -- got a temporary, inserting an extra reg copy if necessary.
1345 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
1346 #if x86_64_TARGET_ARCH
1347 getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
1349 getByteReg expr = do
1350 r <- getRegister expr
1353 tmp <- getNewRegNat rep
1354 return (tmp, code tmp)
1356 | isVirtualReg reg -> return (reg,code)
1358 tmp <- getNewRegNat rep
1359 return (tmp, code `snocOL` reg2reg rep reg tmp)
1360 -- ToDo: could optimise slightly by checking for byte-addressable
1361 -- real registers, but that will happen very rarely if at all.
1364 -- Another variant: this time we want the result in a register that cannot
1365 -- be modified by code to evaluate an arbitrary expression.
1366 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1367 getNonClobberedReg expr = do
1368 r <- getRegister expr
1371 tmp <- getNewRegNat rep
1372 return (tmp, code tmp)
1374 -- only free regs can be clobbered
1375 | RealReg rr <- reg, isFastTrue (freeReg rr) -> do
1376 tmp <- getNewRegNat rep
1377 return (tmp, code `snocOL` reg2reg rep reg tmp)
1381 reg2reg :: MachRep -> Reg -> Reg -> Instr
1383 #if i386_TARGET_ARCH
1384 | isFloatingRep rep = GMOV src dst
1386 | otherwise = MOV rep (OpReg src) (OpReg dst)
1388 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1390 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1392 #if sparc_TARGET_ARCH
1394 getRegister (CmmLit (CmmFloat f F32)) = do
1395 lbl <- getNewLabelNat
1396 let code dst = toOL [
1399 CmmStaticLit (CmmFloat f F32)],
1400 SETHI (HI (ImmCLbl lbl)) dst,
1401 LD F32 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1402 return (Any F32 code)
1404 getRegister (CmmLit (CmmFloat d F64)) = do
1405 lbl <- getNewLabelNat
1406 let code dst = toOL [
1409 CmmStaticLit (CmmFloat d F64)],
1410 SETHI (HI (ImmCLbl lbl)) dst,
1411 LD F64 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1412 return (Any F64 code)
1414 getRegister (CmmMachOp mop [x]) -- unary MachOps
1416 MO_S_Neg F32 -> trivialUFCode F32 (FNEG F32) x
1417 MO_S_Neg F64 -> trivialUFCode F64 (FNEG F64) x
1419 MO_S_Neg rep -> trivialUCode rep (SUB False False g0) x
1420 MO_Not rep -> trivialUCode rep (XNOR False g0) x
1422 MO_U_Conv I32 I8 -> trivialCode I8 (AND False) x (CmmLit (CmmInt 255 I8))
1424 MO_U_Conv F64 F32-> coerceDbl2Flt x
1425 MO_U_Conv F32 F64-> coerceFlt2Dbl x
1427 MO_S_Conv F32 I32-> coerceFP2Int F32 I32 x
1428 MO_S_Conv I32 F32-> coerceInt2FP I32 F32 x
1429 MO_S_Conv F64 I32-> coerceFP2Int F64 I32 x
1430 MO_S_Conv I32 F64-> coerceInt2FP I32 F64 x
1432 -- Conversions which are a nop on sparc
1434 | from == to -> conversionNop to x
1435 MO_U_Conv I32 to -> conversionNop to x
1436 MO_S_Conv I32 to -> conversionNop to x
1439 MO_U_Conv I8 I32 -> integerExtend False I8 I32 x
1440 MO_U_Conv I16 I32 -> integerExtend False I16 I32 x
1441 MO_U_Conv I8 I16 -> integerExtend False I8 I16 x
1442 MO_S_Conv I16 I32 -> integerExtend True I16 I32 x
1444 other_op -> panic "Unknown unary mach op"
1447 integerExtend signed from to expr = do
1448 (reg, e_code) <- getSomeReg expr
1452 ((if signed then SRA else SRL)
1453 reg (RIImm (ImmInt 0)) dst)
1454 return (Any to code)
1455 conversionNop new_rep expr
1456 = do e_code <- getRegister expr
1457 return (swizzleRegisterRep e_code new_rep)
1459 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1461 MO_Eq F32 -> condFltReg EQQ x y
1462 MO_Ne F32 -> condFltReg NE x y
1464 MO_S_Gt F32 -> condFltReg GTT x y
1465 MO_S_Ge F32 -> condFltReg GE x y
1466 MO_S_Lt F32 -> condFltReg LTT x y
1467 MO_S_Le F32 -> condFltReg LE x y
1469 MO_Eq F64 -> condFltReg EQQ x y
1470 MO_Ne F64 -> condFltReg NE x y
1472 MO_S_Gt F64 -> condFltReg GTT x y
1473 MO_S_Ge F64 -> condFltReg GE x y
1474 MO_S_Lt F64 -> condFltReg LTT x y
1475 MO_S_Le F64 -> condFltReg LE x y
1477 MO_Eq rep -> condIntReg EQQ x y
1478 MO_Ne rep -> condIntReg NE x y
1480 MO_S_Gt rep -> condIntReg GTT x y
1481 MO_S_Ge rep -> condIntReg GE x y
1482 MO_S_Lt rep -> condIntReg LTT x y
1483 MO_S_Le rep -> condIntReg LE x y
1485 MO_U_Gt I32 -> condIntReg GTT x y
1486 MO_U_Ge I32 -> condIntReg GE x y
1487 MO_U_Lt I32 -> condIntReg LTT x y
1488 MO_U_Le I32 -> condIntReg LE x y
1490 MO_U_Gt I16 -> condIntReg GU x y
1491 MO_U_Ge I16 -> condIntReg GEU x y
1492 MO_U_Lt I16 -> condIntReg LU x y
1493 MO_U_Le I16 -> condIntReg LEU x y
1495 MO_Add I32 -> trivialCode I32 (ADD False False) x y
1496 MO_Sub I32 -> trivialCode I32 (SUB False False) x y
1498 MO_S_MulMayOflo rep -> imulMayOflo rep x y
1500 -- ToDo: teach about V8+ SPARC div instructions
1501 MO_S_Quot I32 -> idiv (fsLit ".div") x y
1502 MO_S_Rem I32 -> idiv (fsLit ".rem") x y
1503 MO_U_Quot I32 -> idiv (fsLit ".udiv") x y
1504 MO_U_Rem I32 -> idiv (fsLit ".urem") x y
1506 MO_Add F32 -> trivialFCode F32 FADD x y
1507 MO_Sub F32 -> trivialFCode F32 FSUB x y
1508 MO_Mul F32 -> trivialFCode F32 FMUL x y
1509 MO_S_Quot F32 -> trivialFCode F32 FDIV x y
1511 MO_Add F64 -> trivialFCode F64 FADD x y
1512 MO_Sub F64 -> trivialFCode F64 FSUB x y
1513 MO_Mul F64 -> trivialFCode F64 FMUL x y
1514 MO_S_Quot F64 -> trivialFCode F64 FDIV x y
1516 MO_And rep -> trivialCode rep (AND False) x y
1517 MO_Or rep -> trivialCode rep (OR False) x y
1518 MO_Xor rep -> trivialCode rep (XOR False) x y
1520 MO_Mul rep -> trivialCode rep (SMUL False) x y
1522 MO_Shl rep -> trivialCode rep SLL x y
1523 MO_U_Shr rep -> trivialCode rep SRL x y
1524 MO_S_Shr rep -> trivialCode rep SRA x y
1527 MO_F32_Pwr -> getRegister (StCall (Left (fsLit "pow")) CCallConv F64
1528 [promote x, promote y])
1529 where promote x = CmmMachOp MO_F32_to_Dbl [x]
1530 MO_F64_Pwr -> getRegister (StCall (Left (fsLit "pow")) CCallConv F64
1533 other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
1535 --idiv fn x y = getRegister (StCall (Left fn) CCallConv I32 [x, y])
1537 --------------------
1538 imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1539 imulMayOflo rep a b = do
1540 (a_reg, a_code) <- getSomeReg a
1541 (b_reg, b_code) <- getSomeReg b
1542 res_lo <- getNewRegNat I32
1543 res_hi <- getNewRegNat I32
1545 shift_amt = case rep of
1548 _ -> panic "shift_amt"
1549 code dst = a_code `appOL` b_code `appOL`
1551 SMUL False a_reg (RIReg b_reg) res_lo,
1553 SRA res_lo (RIImm (ImmInt shift_amt)) res_lo,
1554 SUB False False res_lo (RIReg res_hi) dst
1556 return (Any I32 code)
1558 getRegister (CmmLoad mem pk) = do
1559 Amode src code <- getAmode mem
1561 code__2 dst = code `snocOL` LD pk src dst
1562 return (Any pk code__2)
1564 getRegister (CmmLit (CmmInt i _))
1567 src = ImmInt (fromInteger i)
1568 code dst = unitOL (OR False g0 (RIImm src) dst)
1570 return (Any I32 code)
1572 getRegister (CmmLit lit)
1573 = let rep = cmmLitRep lit
1577 OR False dst (RIImm (LO imm)) dst]
1578 in return (Any I32 code)
1580 #endif /* sparc_TARGET_ARCH */
1582 #if powerpc_TARGET_ARCH
1583 getRegister (CmmLoad mem pk)
1586 Amode addr addr_code <- getAmode mem
1587 let code dst = ASSERT((regClass dst == RcDouble) == isFloatingRep pk)
1588 addr_code `snocOL` LD pk dst addr
1589 return (Any pk code)
1591 -- catch simple cases of zero- or sign-extended load
1592 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad mem _]) = do
1593 Amode addr addr_code <- getAmode mem
1594 return (Any I32 (\dst -> addr_code `snocOL` LD I8 dst addr))
1596 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
1598 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad mem _]) = do
1599 Amode addr addr_code <- getAmode mem
1600 return (Any I32 (\dst -> addr_code `snocOL` LD I16 dst addr))
1602 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad mem _]) = do
1603 Amode addr addr_code <- getAmode mem
1604 return (Any I32 (\dst -> addr_code `snocOL` LA I16 dst addr))
1606 getRegister (CmmMachOp mop [x]) -- unary MachOps
1608 MO_Not rep -> trivialUCode rep NOT x
1610 MO_S_Conv F64 F32 -> trivialUCode F32 FRSP x
1611 MO_S_Conv F32 F64 -> conversionNop F64 x
1614 | from == to -> conversionNop to x
1615 | isFloatingRep from -> coerceFP2Int from to x
1616 | isFloatingRep to -> coerceInt2FP from to x
1618 -- narrowing is a nop: we treat the high bits as undefined
1619 MO_S_Conv I32 to -> conversionNop to x
1620 MO_S_Conv I16 I8 -> conversionNop I8 x
1621 MO_S_Conv I8 to -> trivialUCode to (EXTS I8) x
1622 MO_S_Conv I16 to -> trivialUCode to (EXTS I16) x
1625 | from == to -> conversionNop to x
1626 -- narrowing is a nop: we treat the high bits as undefined
1627 MO_U_Conv I32 to -> conversionNop to x
1628 MO_U_Conv I16 I8 -> conversionNop I8 x
1629 MO_U_Conv I8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 I32))
1630 MO_U_Conv I16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 I32))
1632 MO_S_Neg F32 -> trivialUCode F32 FNEG x
1633 MO_S_Neg F64 -> trivialUCode F64 FNEG x
1634 MO_S_Neg rep -> trivialUCode rep NEG x
1637 conversionNop new_rep expr
1638 = do e_code <- getRegister expr
1639 return (swizzleRegisterRep e_code new_rep)
1641 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1643 MO_Eq F32 -> condFltReg EQQ x y
1644 MO_Ne F32 -> condFltReg NE x y
1646 MO_S_Gt F32 -> condFltReg GTT x y
1647 MO_S_Ge F32 -> condFltReg GE x y
1648 MO_S_Lt F32 -> condFltReg LTT x y
1649 MO_S_Le F32 -> condFltReg LE x y
1651 MO_Eq F64 -> condFltReg EQQ x y
1652 MO_Ne F64 -> condFltReg NE x y
1654 MO_S_Gt F64 -> condFltReg GTT x y
1655 MO_S_Ge F64 -> condFltReg GE x y
1656 MO_S_Lt F64 -> condFltReg LTT x y
1657 MO_S_Le F64 -> condFltReg LE x y
1659 MO_Eq rep -> condIntReg EQQ (extendUExpr rep x) (extendUExpr rep y)
1660 MO_Ne rep -> condIntReg NE (extendUExpr rep x) (extendUExpr rep y)
1662 MO_S_Gt rep -> condIntReg GTT (extendSExpr rep x) (extendSExpr rep y)
1663 MO_S_Ge rep -> condIntReg GE (extendSExpr rep x) (extendSExpr rep y)
1664 MO_S_Lt rep -> condIntReg LTT (extendSExpr rep x) (extendSExpr rep y)
1665 MO_S_Le rep -> condIntReg LE (extendSExpr rep x) (extendSExpr rep y)
1667 MO_U_Gt rep -> condIntReg GU (extendUExpr rep x) (extendUExpr rep y)
1668 MO_U_Ge rep -> condIntReg GEU (extendUExpr rep x) (extendUExpr rep y)
1669 MO_U_Lt rep -> condIntReg LU (extendUExpr rep x) (extendUExpr rep y)
1670 MO_U_Le rep -> condIntReg LEU (extendUExpr rep x) (extendUExpr rep y)
1672 MO_Add F32 -> trivialCodeNoImm F32 (FADD F32) x y
1673 MO_Sub F32 -> trivialCodeNoImm F32 (FSUB F32) x y
1674 MO_Mul F32 -> trivialCodeNoImm F32 (FMUL F32) x y
1675 MO_S_Quot F32 -> trivialCodeNoImm F32 (FDIV F32) x y
1677 MO_Add F64 -> trivialCodeNoImm F64 (FADD F64) x y
1678 MO_Sub F64 -> trivialCodeNoImm F64 (FSUB F64) x y
1679 MO_Mul F64 -> trivialCodeNoImm F64 (FMUL F64) x y
1680 MO_S_Quot F64 -> trivialCodeNoImm F64 (FDIV F64) x y
1682 -- optimize addition with 32-bit immediate
1686 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate I32 True (-imm)
1687 -> trivialCode I32 True ADD x (CmmLit $ CmmInt imm immrep)
1690 (src, srcCode) <- getSomeReg x
1691 let imm = litToImm lit
1692 code dst = srcCode `appOL` toOL [
1693 ADDIS dst src (HA imm),
1694 ADD dst dst (RIImm (LO imm))
1696 return (Any I32 code)
1697 _ -> trivialCode I32 True ADD x y
1699 MO_Add rep -> trivialCode rep True ADD x y
1701 case y of -- subfi ('substract from' with immediate) doesn't exist
1702 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
1703 -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
1704 _ -> trivialCodeNoImm rep SUBF y x
1706 MO_Mul rep -> trivialCode rep True MULLW x y
1708 MO_S_MulMayOflo I32 -> trivialCodeNoImm I32 MULLW_MayOflo x y
1710 MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= I32): not implemented"
1711 MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
1713 MO_S_Quot rep -> trivialCodeNoImm rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1714 MO_U_Quot rep -> trivialCodeNoImm rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1716 MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1717 MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1719 MO_And rep -> trivialCode rep False AND x y
1720 MO_Or rep -> trivialCode rep False OR x y
1721 MO_Xor rep -> trivialCode rep False XOR x y
1723 MO_Shl rep -> trivialCode rep False SLW x y
1724 MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
1725 MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
1727 getRegister (CmmLit (CmmInt i rep))
1728 | Just imm <- makeImmediate rep True i
1730 code dst = unitOL (LI dst imm)
1732 return (Any rep code)
1734 getRegister (CmmLit (CmmFloat f frep)) = do
1735 lbl <- getNewLabelNat
1736 dflags <- getDynFlagsNat
1737 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
1738 Amode addr addr_code <- getAmode dynRef
1740 LDATA ReadOnlyData [CmmDataLabel lbl,
1741 CmmStaticLit (CmmFloat f frep)]
1742 `consOL` (addr_code `snocOL` LD frep dst addr)
1743 return (Any frep code)
1745 getRegister (CmmLit lit)
1746 = let rep = cmmLitRep lit
1750 ADD dst dst (RIImm (LO imm))
1752 in return (Any rep code)
1754 getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
1756 -- extend?Rep: wrap integer expression of type rep
1757 -- in a conversion to I32
1758 extendSExpr I32 x = x
1759 extendSExpr rep x = CmmMachOp (MO_S_Conv rep I32) [x]
1760 extendUExpr I32 x = x
1761 extendUExpr rep x = CmmMachOp (MO_U_Conv rep I32) [x]
1763 #endif /* powerpc_TARGET_ARCH */
1766 -- -----------------------------------------------------------------------------
1767 -- The 'Amode' type: Memory addressing modes passed up the tree.
1769 data Amode = Amode AddrMode InstrBlock
1772 Now, given a tree (the argument to an CmmLoad) that references memory,
1773 produce a suitable addressing mode.
1775 A Rule of the Game (tm) for Amodes: use of the addr bit must
1776 immediately follow use of the code part, since the code part puts
1777 values in registers which the addr then refers to. So you can't put
1778 anything in between, lest it overwrite some of those registers. If
1779 you need to do some other computation between the code part and use of
1780 the addr bit, first store the effective address from the amode in a
1781 temporary, then do the other computation, and then use the temporary:
1785 ... other computation ...
1789 getAmode :: CmmExpr -> NatM Amode
1790 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
1792 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1794 #if alpha_TARGET_ARCH
1796 getAmode (StPrim IntSubOp [x, StInt i])
1797 = getNewRegNat PtrRep `thenNat` \ tmp ->
1798 getRegister x `thenNat` \ register ->
1800 code = registerCode register tmp
1801 reg = registerName register tmp
1802 off = ImmInt (-(fromInteger i))
1804 return (Amode (AddrRegImm reg off) code)
1806 getAmode (StPrim IntAddOp [x, StInt i])
1807 = getNewRegNat PtrRep `thenNat` \ tmp ->
1808 getRegister x `thenNat` \ register ->
1810 code = registerCode register tmp
1811 reg = registerName register tmp
1812 off = ImmInt (fromInteger i)
1814 return (Amode (AddrRegImm reg off) code)
1818 = return (Amode (AddrImm imm__2) id)
1821 imm__2 = case imm of Just x -> x
1824 = getNewRegNat PtrRep `thenNat` \ tmp ->
1825 getRegister other `thenNat` \ register ->
1827 code = registerCode register tmp
1828 reg = registerName register tmp
1830 return (Amode (AddrReg reg) code)
1832 #endif /* alpha_TARGET_ARCH */
1834 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1836 #if x86_64_TARGET_ARCH
1838 getAmode (CmmMachOp (MO_Add I64) [CmmReg (CmmGlobal PicBaseReg),
1839 CmmLit displacement])
1840 = return $ Amode (ripRel (litToImm displacement)) nilOL
1844 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1846 -- This is all just ridiculous, since it carefully undoes
1847 -- what mangleIndexTree has just done.
1848 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
1849 | not (is64BitLit lit)
1850 -- ASSERT(rep == I32)???
1851 = do (x_reg, x_code) <- getSomeReg x
1852 let off = ImmInt (-(fromInteger i))
1853 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1855 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
1856 | not (is64BitLit lit)
1857 -- ASSERT(rep == I32)???
1858 = do (x_reg, x_code) <- getSomeReg x
1859 let off = ImmInt (fromInteger i)
1860 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1862 -- Turn (lit1 << n + lit2) into (lit2 + lit1 << n) so it will be
1863 -- recognised by the next rule.
1864 getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1866 = getAmode (CmmMachOp (MO_Add rep) [b,a])
1868 getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
1869 [y, CmmLit (CmmInt shift _)]])
1870 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1871 = x86_complex_amode x y shift 0
1873 getAmode (CmmMachOp (MO_Add rep)
1874 [x, CmmMachOp (MO_Add _)
1875 [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)],
1876 CmmLit (CmmInt offset _)]])
1877 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1878 && not (is64BitInteger offset)
1879 = x86_complex_amode x y shift offset
1881 getAmode (CmmMachOp (MO_Add rep) [x,y])
1882 = x86_complex_amode x y 0 0
1884 getAmode (CmmLit lit) | not (is64BitLit lit)
1885 = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1888 (reg,code) <- getSomeReg expr
1889 return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
1892 x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
1893 x86_complex_amode base index shift offset
1894 = do (x_reg, x_code) <- getNonClobberedReg base
1895 -- x must be in a temp, because it has to stay live over y_code
1896 -- we could compre x_reg and y_reg and do something better here...
1897 (y_reg, y_code) <- getSomeReg index
1899 code = x_code `appOL` y_code
1900 base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
1901 return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset)))
1904 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1906 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1908 #if sparc_TARGET_ARCH
1910 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)])
1913 (reg, code) <- getSomeReg x
1915 off = ImmInt (-(fromInteger i))
1916 return (Amode (AddrRegImm reg off) code)
1919 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)])
1922 (reg, code) <- getSomeReg x
1924 off = ImmInt (fromInteger i)
1925 return (Amode (AddrRegImm reg off) code)
1927 getAmode (CmmMachOp (MO_Add rep) [x, y])
1929 (regX, codeX) <- getSomeReg x
1930 (regY, codeY) <- getSomeReg y
1932 code = codeX `appOL` codeY
1933 return (Amode (AddrRegReg regX regY) code)
1935 -- XXX Is this same as "leaf" in Stix?
1936 getAmode (CmmLit lit)
1938 tmp <- getNewRegNat I32
1940 code = unitOL (SETHI (HI imm__2) tmp)
1941 return (Amode (AddrRegImm tmp (LO imm__2)) code)
1943 imm__2 = litToImm lit
1947 (reg, code) <- getSomeReg other
1950 return (Amode (AddrRegImm reg off) code)
1952 #endif /* sparc_TARGET_ARCH */
1954 #ifdef powerpc_TARGET_ARCH
1955 getAmode (CmmMachOp (MO_Sub I32) [x, CmmLit (CmmInt i _)])
1956 | Just off <- makeImmediate I32 True (-i)
1958 (reg, code) <- getSomeReg x
1959 return (Amode (AddrRegImm reg off) code)
1962 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit (CmmInt i _)])
1963 | Just off <- makeImmediate I32 True i
1965 (reg, code) <- getSomeReg x
1966 return (Amode (AddrRegImm reg off) code)
1968 -- optimize addition with 32-bit immediate
1970 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit lit])
1972 tmp <- getNewRegNat I32
1973 (src, srcCode) <- getSomeReg x
1974 let imm = litToImm lit
1975 code = srcCode `snocOL` ADDIS tmp src (HA imm)
1976 return (Amode (AddrRegImm tmp (LO imm)) code)
1978 getAmode (CmmLit lit)
1980 tmp <- getNewRegNat I32
1981 let imm = litToImm lit
1982 code = unitOL (LIS tmp (HA imm))
1983 return (Amode (AddrRegImm tmp (LO imm)) code)
1985 getAmode (CmmMachOp (MO_Add I32) [x, y])
1987 (regX, codeX) <- getSomeReg x
1988 (regY, codeY) <- getSomeReg y
1989 return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
1993 (reg, code) <- getSomeReg other
1996 return (Amode (AddrRegImm reg off) code)
1997 #endif /* powerpc_TARGET_ARCH */
1999 -- -----------------------------------------------------------------------------
2000 -- getOperand: sometimes any operand will do.
2002 -- getNonClobberedOperand: the value of the operand will remain valid across
2003 -- the computation of an arbitrary expression, unless the expression
2004 -- is computed directly into a register which the operand refers to
2005 -- (see trivialCode where this function is used for an example).
2007 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2009 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2010 #if x86_64_TARGET_ARCH
2011 getNonClobberedOperand (CmmLit lit)
2012 | isSuitableFloatingPointLit lit = do
2013 lbl <- getNewLabelNat
2014 let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
2016 return (OpAddr (ripRel (ImmCLbl lbl)), code)
2018 getNonClobberedOperand (CmmLit lit)
2019 | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) =
2020 return (OpImm (litToImm lit), nilOL)
2021 getNonClobberedOperand (CmmLoad mem pk)
2022 | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2023 Amode src mem_code <- getAmode mem
2025 if (amodeCouldBeClobbered src)
2027 tmp <- getNewRegNat wordRep
2028 return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
2029 unitOL (LEA I32 (OpAddr src) (OpReg tmp)))
2032 return (OpAddr src', save_code `appOL` mem_code)
2033 getNonClobberedOperand e = do
2034 (reg, code) <- getNonClobberedReg e
2035 return (OpReg reg, code)
2037 amodeCouldBeClobbered :: AddrMode -> Bool
2038 amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
2040 regClobbered (RealReg rr) = isFastTrue (freeReg rr)
2041 regClobbered _ = False
2043 -- getOperand: the operand is not required to remain valid across the
2044 -- computation of an arbitrary expression.
2045 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2046 #if x86_64_TARGET_ARCH
2047 getOperand (CmmLit lit)
2048 | isSuitableFloatingPointLit lit = do
2049 lbl <- getNewLabelNat
2050 let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
2052 return (OpAddr (ripRel (ImmCLbl lbl)), code)
2054 getOperand (CmmLit lit)
2055 | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) = do
2056 return (OpImm (litToImm lit), nilOL)
2057 getOperand (CmmLoad mem pk)
2058 | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2059 Amode src mem_code <- getAmode mem
2060 return (OpAddr src, mem_code)
2062 (reg, code) <- getSomeReg e
2063 return (OpReg reg, code)
2065 isOperand :: CmmExpr -> Bool
2066 isOperand (CmmLoad _ _) = True
2067 isOperand (CmmLit lit) = not (is64BitLit lit)
2068 || isSuitableFloatingPointLit lit
2071 -- if we want a floating-point literal as an operand, we can
2072 -- use it directly from memory. However, if the literal is
2073 -- zero, we're better off generating it into a register using
2075 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
2076 isSuitableFloatingPointLit _ = False
2078 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
2079 getRegOrMem (CmmLoad mem pk)
2080 | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2081 Amode src mem_code <- getAmode mem
2082 return (OpAddr src, mem_code)
2084 (reg, code) <- getNonClobberedReg e
2085 return (OpReg reg, code)
2087 #if x86_64_TARGET_ARCH
2088 is64BitLit (CmmInt i I64) = is64BitInteger i
2089 -- assume that labels are in the range 0-2^31-1: this assumes the
2090 -- small memory model (see gcc docs, -mcmodel=small).
2092 is64BitLit x = False
2095 is64BitInteger :: Integer -> Bool
2096 is64BitInteger i = i64 > 0x7fffffff || i64 < -0x80000000
2097 where i64 = fromIntegral i :: Int64
2098 -- a CmmInt is intended to be truncated to the appropriate
2099 -- number of bits, so here we truncate it to Int64. This is
2100 -- important because e.g. -1 as a CmmInt might be either
2101 -- -1 or 18446744073709551615.
2103 -- -----------------------------------------------------------------------------
2104 -- The 'CondCode' type: Condition codes passed up the tree.
2106 data CondCode = CondCode Bool Cond InstrBlock
2108 -- Set up a condition code for a conditional branch.
2110 getCondCode :: CmmExpr -> NatM CondCode
2112 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2114 #if alpha_TARGET_ARCH
2115 getCondCode = panic "MachCode.getCondCode: not on Alphas"
2116 #endif /* alpha_TARGET_ARCH */
2118 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2120 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
2121 -- yes, they really do seem to want exactly the same!
2123 getCondCode (CmmMachOp mop [x, y])
2126 MO_Eq F32 -> condFltCode EQQ x y
2127 MO_Ne F32 -> condFltCode NE x y
2129 MO_S_Gt F32 -> condFltCode GTT x y
2130 MO_S_Ge F32 -> condFltCode GE x y
2131 MO_S_Lt F32 -> condFltCode LTT x y
2132 MO_S_Le F32 -> condFltCode LE x y
2134 MO_Eq F64 -> condFltCode EQQ x y
2135 MO_Ne F64 -> condFltCode NE x y
2137 MO_S_Gt F64 -> condFltCode GTT x y
2138 MO_S_Ge F64 -> condFltCode GE x y
2139 MO_S_Lt F64 -> condFltCode LTT x y
2140 MO_S_Le F64 -> condFltCode LE x y
2142 MO_Eq rep -> condIntCode EQQ x y
2143 MO_Ne rep -> condIntCode NE x y
2145 MO_S_Gt rep -> condIntCode GTT x y
2146 MO_S_Ge rep -> condIntCode GE x y
2147 MO_S_Lt rep -> condIntCode LTT x y
2148 MO_S_Le rep -> condIntCode LE x y
2150 MO_U_Gt rep -> condIntCode GU x y
2151 MO_U_Ge rep -> condIntCode GEU x y
2152 MO_U_Lt rep -> condIntCode LU x y
2153 MO_U_Le rep -> condIntCode LEU x y
2155 other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y]))
2157 getCondCode other = pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
2159 #elif powerpc_TARGET_ARCH
2161 -- almost the same as everywhere else - but we need to
2162 -- extend small integers to 32 bit first
2164 getCondCode (CmmMachOp mop [x, y])
2166 MO_Eq F32 -> condFltCode EQQ x y
2167 MO_Ne F32 -> condFltCode NE x y
2169 MO_S_Gt F32 -> condFltCode GTT x y
2170 MO_S_Ge F32 -> condFltCode GE x y
2171 MO_S_Lt F32 -> condFltCode LTT x y
2172 MO_S_Le F32 -> condFltCode LE x y
2174 MO_Eq F64 -> condFltCode EQQ x y
2175 MO_Ne F64 -> condFltCode NE x y
2177 MO_S_Gt F64 -> condFltCode GTT x y
2178 MO_S_Ge F64 -> condFltCode GE x y
2179 MO_S_Lt F64 -> condFltCode LTT x y
2180 MO_S_Le F64 -> condFltCode LE x y
2182 MO_Eq rep -> condIntCode EQQ (extendUExpr rep x) (extendUExpr rep y)
2183 MO_Ne rep -> condIntCode NE (extendUExpr rep x) (extendUExpr rep y)
2185 MO_S_Gt rep -> condIntCode GTT (extendSExpr rep x) (extendSExpr rep y)
2186 MO_S_Ge rep -> condIntCode GE (extendSExpr rep x) (extendSExpr rep y)
2187 MO_S_Lt rep -> condIntCode LTT (extendSExpr rep x) (extendSExpr rep y)
2188 MO_S_Le rep -> condIntCode LE (extendSExpr rep x) (extendSExpr rep y)
2190 MO_U_Gt rep -> condIntCode GU (extendUExpr rep x) (extendUExpr rep y)
2191 MO_U_Ge rep -> condIntCode GEU (extendUExpr rep x) (extendUExpr rep y)
2192 MO_U_Lt rep -> condIntCode LU (extendUExpr rep x) (extendUExpr rep y)
2193 MO_U_Le rep -> condIntCode LEU (extendUExpr rep x) (extendUExpr rep y)
2195 other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
2197 getCondCode other = panic "getCondCode(2)(powerpc)"
2203 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
2204 -- passed back up the tree.
2206 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
2208 #if alpha_TARGET_ARCH
2209 condIntCode = panic "MachCode.condIntCode: not on Alphas"
2210 condFltCode = panic "MachCode.condFltCode: not on Alphas"
2211 #endif /* alpha_TARGET_ARCH */
2213 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2214 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2216 -- memory vs immediate
2217 condIntCode cond (CmmLoad x pk) (CmmLit lit) | not (is64BitLit lit) = do
2218 Amode x_addr x_code <- getAmode x
2221 code = x_code `snocOL`
2222 CMP pk (OpImm imm) (OpAddr x_addr)
2224 return (CondCode False cond code)
2226 -- anything vs zero, using a mask
2227 -- TODO: Add some sanity checking!!!!
2228 condIntCode cond (CmmMachOp (MO_And rep) [x,o2]) (CmmLit (CmmInt 0 pk))
2229 | (CmmLit (CmmInt mask pk2)) <- o2
2231 (x_reg, x_code) <- getSomeReg x
2233 code = x_code `snocOL`
2234 TEST pk (OpImm (ImmInteger mask)) (OpReg x_reg)
2236 return (CondCode False cond code)
2239 condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
2240 (x_reg, x_code) <- getSomeReg x
2242 code = x_code `snocOL`
2243 TEST pk (OpReg x_reg) (OpReg x_reg)
2245 return (CondCode False cond code)
2247 -- anything vs operand
2248 condIntCode cond x y | isOperand y = do
2249 (x_reg, x_code) <- getNonClobberedReg x
2250 (y_op, y_code) <- getOperand y
2252 code = x_code `appOL` y_code `snocOL`
2253 CMP (cmmExprRep x) y_op (OpReg x_reg)
2255 return (CondCode False cond code)
2257 -- anything vs anything
2258 condIntCode cond x y = do
2259 (y_reg, y_code) <- getNonClobberedReg y
2260 (x_op, x_code) <- getRegOrMem x
2262 code = y_code `appOL`
2264 CMP (cmmExprRep x) (OpReg y_reg) x_op
2266 return (CondCode False cond code)
2269 #if i386_TARGET_ARCH
2270 condFltCode cond x y
2271 = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
2272 (x_reg, x_code) <- getNonClobberedReg x
2273 (y_reg, y_code) <- getSomeReg y
2275 code = x_code `appOL` y_code `snocOL`
2276 GCMP cond x_reg y_reg
2277 -- The GCMP insn does the test and sets the zero flag if comparable
2278 -- and true. Hence we always supply EQQ as the condition to test.
2279 return (CondCode True EQQ code)
2280 #endif /* i386_TARGET_ARCH */
2282 #if x86_64_TARGET_ARCH
2283 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
2284 -- an operand, but the right must be a reg. We can probably do better
2285 -- than this general case...
2286 condFltCode cond x y = do
2287 (x_reg, x_code) <- getNonClobberedReg x
2288 (y_op, y_code) <- getOperand y
2290 code = x_code `appOL`
2292 CMP (cmmExprRep x) y_op (OpReg x_reg)
2293 -- NB(1): we need to use the unsigned comparison operators on the
2294 -- result of this comparison.
2296 return (CondCode True (condToUnsigned cond) code)
2299 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2301 #if sparc_TARGET_ARCH
2303 condIntCode cond x (CmmLit (CmmInt y rep))
2306 (src1, code) <- getSomeReg x
2308 src2 = ImmInt (fromInteger y)
2309 code' = code `snocOL` SUB False True src1 (RIImm src2) g0
2310 return (CondCode False cond code')
2312 condIntCode cond x y = do
2313 (src1, code1) <- getSomeReg x
2314 (src2, code2) <- getSomeReg y
2316 code__2 = code1 `appOL` code2 `snocOL`
2317 SUB False True src1 (RIReg src2) g0
2318 return (CondCode False cond code__2)
2321 condFltCode cond x y = do
2322 (src1, code1) <- getSomeReg x
2323 (src2, code2) <- getSomeReg y
2324 tmp <- getNewRegNat F64
2326 promote x = FxTOy F32 F64 x tmp
2333 code1 `appOL` code2 `snocOL`
2334 FCMP True pk1 src1 src2
2335 else if pk1 == F32 then
2336 code1 `snocOL` promote src1 `appOL` code2 `snocOL`
2337 FCMP True F64 tmp src2
2339 code1 `appOL` code2 `snocOL` promote src2 `snocOL`
2340 FCMP True F64 src1 tmp
2341 return (CondCode True cond code__2)
2343 #endif /* sparc_TARGET_ARCH */
2345 #if powerpc_TARGET_ARCH
2346 -- ###FIXME: I16 and I8!
2347 condIntCode cond x (CmmLit (CmmInt y rep))
2348 | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
2350 (src1, code) <- getSomeReg x
2352 code' = code `snocOL`
2353 (if condUnsigned cond then CMPL else CMP) I32 src1 (RIImm src2)
2354 return (CondCode False cond code')
2356 condIntCode cond x y = do
2357 (src1, code1) <- getSomeReg x
2358 (src2, code2) <- getSomeReg y
2360 code' = code1 `appOL` code2 `snocOL`
2361 (if condUnsigned cond then CMPL else CMP) I32 src1 (RIReg src2)
2362 return (CondCode False cond code')
2364 condFltCode cond x y = do
2365 (src1, code1) <- getSomeReg x
2366 (src2, code2) <- getSomeReg y
2368 code' = code1 `appOL` code2 `snocOL` FCMP src1 src2
2369 code'' = case cond of -- twiddle CR to handle unordered case
2370 GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
2371 LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
2374 ltbit = 0 ; eqbit = 2 ; gtbit = 1
2375 return (CondCode True cond code'')
2377 #endif /* powerpc_TARGET_ARCH */
2379 -- -----------------------------------------------------------------------------
2380 -- Generating assignments
2382 -- Assignments are really at the heart of the whole code generation
2383 -- business. Almost all top-level nodes of any real importance are
2384 -- assignments, which correspond to loads, stores, or register
2385 -- transfers. If we're really lucky, some of the register transfers
2386 -- will go away, because we can use the destination register to
2387 -- complete the code generation for the right hand side. This only
2388 -- fails when the right hand side is forced into a fixed register
2389 -- (e.g. the result of a call).
2391 assignMem_IntCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2392 assignReg_IntCode :: MachRep -> CmmReg -> CmmExpr -> NatM InstrBlock
2394 assignMem_FltCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2395 assignReg_FltCode :: MachRep -> CmmReg -> CmmExpr -> NatM InstrBlock
2397 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2399 #if alpha_TARGET_ARCH
2401 assignIntCode pk (CmmLoad dst _) src
2402 = getNewRegNat IntRep `thenNat` \ tmp ->
2403 getAmode dst `thenNat` \ amode ->
2404 getRegister src `thenNat` \ register ->
2406 code1 = amodeCode amode []
2407 dst__2 = amodeAddr amode
2408 code2 = registerCode register tmp []
2409 src__2 = registerName register tmp
2410 sz = primRepToSize pk
2411 code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2415 assignIntCode pk dst src
2416 = getRegister dst `thenNat` \ register1 ->
2417 getRegister src `thenNat` \ register2 ->
2419 dst__2 = registerName register1 zeroh
2420 code = registerCode register2 dst__2
2421 src__2 = registerName register2 dst__2
2422 code__2 = if isFixed register2
2423 then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2)
2428 #endif /* alpha_TARGET_ARCH */
2430 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2432 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2434 -- integer assignment to memory
2436 -- specific case of adding/subtracting an integer to a particular address.
2437 -- ToDo: catch other cases where we can use an operation directly on a memory
2439 assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _,
2440 CmmLit (CmmInt i _)])
2441 | addr == addr2, pk /= I64 || not (is64BitInteger i),
2442 Just instr <- check op
2443 = do Amode amode code_addr <- getAmode addr
2444 let code = code_addr `snocOL`
2445 instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode)
2448 check (MO_Add _) = Just ADD
2449 check (MO_Sub _) = Just SUB
2454 assignMem_IntCode pk addr src = do
2455 Amode addr code_addr <- getAmode addr
2456 (code_src, op_src) <- get_op_RI src
2458 code = code_src `appOL`
2460 MOV pk op_src (OpAddr addr)
2461 -- NOTE: op_src is stable, so it will still be valid
2462 -- after code_addr. This may involve the introduction
2463 -- of an extra MOV to a temporary register, but we hope
2464 -- the register allocator will get rid of it.
2468 get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand) -- code, operator
2469 get_op_RI (CmmLit lit) | not (is64BitLit lit)
2470 = return (nilOL, OpImm (litToImm lit))
2472 = do (reg,code) <- getNonClobberedReg op
2473 return (code, OpReg reg)
2476 -- Assign; dst is a reg, rhs is mem
2477 assignReg_IntCode pk reg (CmmLoad src _) = do
2478 load_code <- intLoadCode (MOV pk) src
2479 return (load_code (getRegisterReg reg))
2481 -- dst is a reg, but src could be anything
2482 assignReg_IntCode pk reg src = do
2483 code <- getAnyReg src
2484 return (code (getRegisterReg reg))
2486 #endif /* i386_TARGET_ARCH */
2488 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2490 #if sparc_TARGET_ARCH
2492 assignMem_IntCode pk addr src = do
2493 (srcReg, code) <- getSomeReg src
2494 Amode dstAddr addr_code <- getAmode addr
2495 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2497 assignReg_IntCode pk reg src = do
2498 r <- getRegister src
2500 Any _ code -> code dst
2501 Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg dst) freg
2503 dst = getRegisterReg reg
2506 #endif /* sparc_TARGET_ARCH */
2508 #if powerpc_TARGET_ARCH
2510 assignMem_IntCode pk addr src = do
2511 (srcReg, code) <- getSomeReg src
2512 Amode dstAddr addr_code <- getAmode addr
2513 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2515 -- dst is a reg, but src could be anything
2516 assignReg_IntCode pk reg src
2518 r <- getRegister src
2520 Any _ code -> code dst
2521 Fixed _ freg fcode -> fcode `snocOL` MR dst freg
2523 dst = getRegisterReg reg
2525 #endif /* powerpc_TARGET_ARCH */
2528 -- -----------------------------------------------------------------------------
2529 -- Floating-point assignments
2531 #if alpha_TARGET_ARCH
2533 assignFltCode pk (CmmLoad dst _) src
2534 = getNewRegNat pk `thenNat` \ tmp ->
2535 getAmode dst `thenNat` \ amode ->
2536 getRegister src `thenNat` \ register ->
2538 code1 = amodeCode amode []
2539 dst__2 = amodeAddr amode
2540 code2 = registerCode register tmp []
2541 src__2 = registerName register tmp
2542 sz = primRepToSize pk
2543 code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2547 assignFltCode pk dst src
2548 = getRegister dst `thenNat` \ register1 ->
2549 getRegister src `thenNat` \ register2 ->
2551 dst__2 = registerName register1 zeroh
2552 code = registerCode register2 dst__2
2553 src__2 = registerName register2 dst__2
2554 code__2 = if isFixed register2
2555 then code . mkSeqInstr (FMOV src__2 dst__2)
2560 #endif /* alpha_TARGET_ARCH */
2562 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2564 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2566 -- Floating point assignment to memory
2567 assignMem_FltCode pk addr src = do
2568 (src_reg, src_code) <- getNonClobberedReg src
2569 Amode addr addr_code <- getAmode addr
2571 code = src_code `appOL`
2573 IF_ARCH_i386(GST pk src_reg addr,
2574 MOV pk (OpReg src_reg) (OpAddr addr))
2577 -- Floating point assignment to a register/temporary
2578 assignReg_FltCode pk reg src = do
2579 src_code <- getAnyReg src
2580 return (src_code (getRegisterReg reg))
2582 #endif /* i386_TARGET_ARCH */
2584 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2586 #if sparc_TARGET_ARCH
2588 -- Floating point assignment to memory
2589 assignMem_FltCode pk addr src = do
2590 Amode dst__2 code1 <- getAmode addr
2591 (src__2, code2) <- getSomeReg src
2592 tmp1 <- getNewRegNat pk
2594 pk__2 = cmmExprRep src
2595 code__2 = code1 `appOL` code2 `appOL`
2597 then unitOL (ST pk src__2 dst__2)
2598 else toOL [FxTOy pk__2 pk src__2 tmp1, ST pk tmp1 dst__2]
2601 -- Floating point assignment to a register/temporary
2602 -- ToDo: Verify correctness
2603 assignReg_FltCode pk reg src = do
2604 r <- getRegister src
2605 v1 <- getNewRegNat pk
2607 Any _ code -> code dst
2608 Fixed _ freg fcode -> fcode `snocOL` FMOV pk freg v1
2610 dst = getRegisterReg reg
2612 #endif /* sparc_TARGET_ARCH */
2614 #if powerpc_TARGET_ARCH
2617 assignMem_FltCode = assignMem_IntCode
2618 assignReg_FltCode = assignReg_IntCode
2620 #endif /* powerpc_TARGET_ARCH */
2623 -- -----------------------------------------------------------------------------
2624 -- Generating an non-local jump
2626 -- (If applicable) Do not fill the delay slots here; you will confuse the
2627 -- register allocator.
2629 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
2631 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2633 #if alpha_TARGET_ARCH
2635 genJump (CmmLabel lbl)
2636 | isAsmTemp lbl = returnInstr (BR target)
2637 | otherwise = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0]
2639 target = ImmCLbl lbl
2642 = getRegister tree `thenNat` \ register ->
2643 getNewRegNat PtrRep `thenNat` \ tmp ->
2645 dst = registerName register pv
2646 code = registerCode register pv
2647 target = registerName register pv
2649 if isFixed register then
2650 returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0]
2652 return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0))
2654 #endif /* alpha_TARGET_ARCH */
2656 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2658 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2660 genJump (CmmLoad mem pk) = do
2661 Amode target code <- getAmode mem
2662 return (code `snocOL` JMP (OpAddr target))
2664 genJump (CmmLit lit) = do
2665 return (unitOL (JMP (OpImm (litToImm lit))))
2668 (reg,code) <- getSomeReg expr
2669 return (code `snocOL` JMP (OpReg reg))
2671 #endif /* i386_TARGET_ARCH */
2673 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2675 #if sparc_TARGET_ARCH
2677 genJump (CmmLit (CmmLabel lbl))
2678 = return (toOL [CALL (Left target) 0 True, NOP])
2680 target = ImmCLbl lbl
2684 (target, code) <- getSomeReg tree
2685 return (code `snocOL` JMP (AddrRegReg target g0) `snocOL` NOP)
2687 #endif /* sparc_TARGET_ARCH */
2689 #if powerpc_TARGET_ARCH
2690 genJump (CmmLit (CmmLabel lbl))
2691 = return (unitOL $ JMP lbl)
2695 (target,code) <- getSomeReg tree
2696 return (code `snocOL` MTCTR target `snocOL` BCTR [])
2697 #endif /* powerpc_TARGET_ARCH */
2700 -- -----------------------------------------------------------------------------
2701 -- Unconditional branches
2703 genBranch :: BlockId -> NatM InstrBlock
2705 genBranch = return . toOL . mkBranchInstr
2707 -- -----------------------------------------------------------------------------
2708 -- Conditional jumps
2711 Conditional jumps are always to local labels, so we can use branch
2712 instructions. We peek at the arguments to decide what kind of
2715 ALPHA: For comparisons with 0, we're laughing, because we can just do
2716 the desired conditional branch.
2718 I386: First, we have to ensure that the condition
2719 codes are set according to the supplied comparison operation.
2721 SPARC: First, we have to ensure that the condition codes are set
2722 according to the supplied comparison operation. We generate slightly
2723 different code for floating point comparisons, because a floating
2724 point operation cannot directly precede a @BF@. We assume the worst
2725 and fill that slot with a @NOP@.
2727 SPARC: Do not fill the delay slots here; you will confuse the register
2733 :: BlockId -- the branch target
2734 -> CmmExpr -- the condition on which to branch
2737 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2739 #if alpha_TARGET_ARCH
2741 genCondJump id (StPrim op [x, StInt 0])
2742 = getRegister x `thenNat` \ register ->
2743 getNewRegNat (registerRep register)
2746 code = registerCode register tmp
2747 value = registerName register tmp
2748 pk = registerRep register
2749 target = ImmCLbl lbl
2751 returnSeq code [BI (cmpOp op) value target]
2753 cmpOp CharGtOp = GTT
2755 cmpOp CharEqOp = EQQ
2757 cmpOp CharLtOp = LTT
2766 cmpOp WordGeOp = ALWAYS
2767 cmpOp WordEqOp = EQQ
2769 cmpOp WordLtOp = NEVER
2770 cmpOp WordLeOp = EQQ
2772 cmpOp AddrGeOp = ALWAYS
2773 cmpOp AddrEqOp = EQQ
2775 cmpOp AddrLtOp = NEVER
2776 cmpOp AddrLeOp = EQQ
2778 genCondJump lbl (StPrim op [x, StDouble 0.0])
2779 = getRegister x `thenNat` \ register ->
2780 getNewRegNat (registerRep register)
2783 code = registerCode register tmp
2784 value = registerName register tmp
2785 pk = registerRep register
2786 target = ImmCLbl lbl
2788 return (code . mkSeqInstr (BF (cmpOp op) value target))
2790 cmpOp FloatGtOp = GTT
2791 cmpOp FloatGeOp = GE
2792 cmpOp FloatEqOp = EQQ
2793 cmpOp FloatNeOp = NE
2794 cmpOp FloatLtOp = LTT
2795 cmpOp FloatLeOp = LE
2796 cmpOp DoubleGtOp = GTT
2797 cmpOp DoubleGeOp = GE
2798 cmpOp DoubleEqOp = EQQ
2799 cmpOp DoubleNeOp = NE
2800 cmpOp DoubleLtOp = LTT
2801 cmpOp DoubleLeOp = LE
2803 genCondJump lbl (StPrim op [x, y])
2805 = trivialFCode pr instr x y `thenNat` \ register ->
2806 getNewRegNat F64 `thenNat` \ tmp ->
2808 code = registerCode register tmp
2809 result = registerName register tmp
2810 target = ImmCLbl lbl
2812 return (code . mkSeqInstr (BF cond result target))
2814 pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
2816 fltCmpOp op = case op of
2830 (instr, cond) = case op of
2831 FloatGtOp -> (FCMP TF LE, EQQ)
2832 FloatGeOp -> (FCMP TF LTT, EQQ)
2833 FloatEqOp -> (FCMP TF EQQ, NE)
2834 FloatNeOp -> (FCMP TF EQQ, EQQ)
2835 FloatLtOp -> (FCMP TF LTT, NE)
2836 FloatLeOp -> (FCMP TF LE, NE)
2837 DoubleGtOp -> (FCMP TF LE, EQQ)
2838 DoubleGeOp -> (FCMP TF LTT, EQQ)
2839 DoubleEqOp -> (FCMP TF EQQ, NE)
2840 DoubleNeOp -> (FCMP TF EQQ, EQQ)
2841 DoubleLtOp -> (FCMP TF LTT, NE)
2842 DoubleLeOp -> (FCMP TF LE, NE)
2844 genCondJump lbl (StPrim op [x, y])
2845 = trivialCode instr x y `thenNat` \ register ->
2846 getNewRegNat IntRep `thenNat` \ tmp ->
2848 code = registerCode register tmp
2849 result = registerName register tmp
2850 target = ImmCLbl lbl
2852 return (code . mkSeqInstr (BI cond result target))
2854 (instr, cond) = case op of
2855 CharGtOp -> (CMP LE, EQQ)
2856 CharGeOp -> (CMP LTT, EQQ)
2857 CharEqOp -> (CMP EQQ, NE)
2858 CharNeOp -> (CMP EQQ, EQQ)
2859 CharLtOp -> (CMP LTT, NE)
2860 CharLeOp -> (CMP LE, NE)
2861 IntGtOp -> (CMP LE, EQQ)
2862 IntGeOp -> (CMP LTT, EQQ)
2863 IntEqOp -> (CMP EQQ, NE)
2864 IntNeOp -> (CMP EQQ, EQQ)
2865 IntLtOp -> (CMP LTT, NE)
2866 IntLeOp -> (CMP LE, NE)
2867 WordGtOp -> (CMP ULE, EQQ)
2868 WordGeOp -> (CMP ULT, EQQ)
2869 WordEqOp -> (CMP EQQ, NE)
2870 WordNeOp -> (CMP EQQ, EQQ)
2871 WordLtOp -> (CMP ULT, NE)
2872 WordLeOp -> (CMP ULE, NE)
2873 AddrGtOp -> (CMP ULE, EQQ)
2874 AddrGeOp -> (CMP ULT, EQQ)
2875 AddrEqOp -> (CMP EQQ, NE)
2876 AddrNeOp -> (CMP EQQ, EQQ)
2877 AddrLtOp -> (CMP ULT, NE)
2878 AddrLeOp -> (CMP ULE, NE)
2880 #endif /* alpha_TARGET_ARCH */
2882 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2884 #if i386_TARGET_ARCH
2886 genCondJump id bool = do
2887 CondCode _ cond code <- getCondCode bool
2888 return (code `snocOL` JXX cond id)
2892 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2894 #if x86_64_TARGET_ARCH
2896 genCondJump id bool = do
2897 CondCode is_float cond cond_code <- getCondCode bool
2900 return (cond_code `snocOL` JXX cond id)
2902 lbl <- getBlockIdNat
2904 -- see comment with condFltReg
2905 let code = case cond of
2911 plain_test = unitOL (
2914 or_unordered = toOL [
2918 and_ordered = toOL [
2924 return (cond_code `appOL` code)
2928 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2930 #if sparc_TARGET_ARCH
2932 genCondJump (BlockId id) bool = do
2933 CondCode is_float cond code <- getCondCode bool
2938 then [NOP, BF cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2939 else [BI cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2943 #endif /* sparc_TARGET_ARCH */
2946 #if powerpc_TARGET_ARCH
2948 genCondJump id bool = do
2949 CondCode is_float cond code <- getCondCode bool
2950 return (code `snocOL` BCC cond id)
2952 #endif /* powerpc_TARGET_ARCH */
2955 -- -----------------------------------------------------------------------------
2956 -- Generating C calls
2958 -- Now the biggest nightmare---calls. Most of the nastiness is buried in
2959 -- @get_arg@, which moves the arguments to the correct registers/stack
2960 -- locations. Apart from that, the code is easy.
2962 -- (If applicable) Do not fill the delay slots here; you will confuse the
2963 -- register allocator.
2966 :: CmmCallTarget -- function to call
2967 -> CmmFormals -- where to put the result
2968 -> CmmActuals -- arguments (of mixed type)
2971 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2973 #if alpha_TARGET_ARCH
2977 genCCall fn cconv result_regs args
2978 = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args
2979 `thenNat` \ ((unused,_), argCode) ->
2981 nRegs = length allArgRegs - length unused
2982 code = asmSeqThen (map ($ []) argCode)
2985 LDA pv (AddrImm (ImmLab (ptext fn))),
2986 JSR ra (AddrReg pv) nRegs,
2987 LDGP gp (AddrReg ra)]
2989 ------------------------
2990 {- Try to get a value into a specific register (or registers) for
2991 a call. The first 6 arguments go into the appropriate
2992 argument register (separate registers for integer and floating
2993 point arguments, but used in lock-step), and the remaining
2994 arguments are dumped to the stack, beginning at 0(sp). Our
2995 first argument is a pair of the list of remaining argument
2996 registers to be assigned for this call and the next stack
2997 offset to use for overflowing arguments. This way,
2998 @get_Arg@ can be applied to all of a call's arguments using
3002 :: ([(Reg,Reg)], Int) -- Argument registers and stack offset (accumulator)
3003 -> StixTree -- Current argument
3004 -> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code
3006 -- We have to use up all of our argument registers first...
3008 get_arg ((iDst,fDst):dsts, offset) arg
3009 = getRegister arg `thenNat` \ register ->
3011 reg = if isFloatingRep pk then fDst else iDst
3012 code = registerCode register reg
3013 src = registerName register reg
3014 pk = registerRep register
3017 if isFloatingRep pk then
3018 ((dsts, offset), if isFixed register then
3019 code . mkSeqInstr (FMOV src fDst)
3022 ((dsts, offset), if isFixed register then
3023 code . mkSeqInstr (OR src (RIReg src) iDst)
3026 -- Once we have run out of argument registers, we move to the
3029 get_arg ([], offset) arg
3030 = getRegister arg `thenNat` \ register ->
3031 getNewRegNat (registerRep register)
3034 code = registerCode register tmp
3035 src = registerName register tmp
3036 pk = registerRep register
3037 sz = primRepToSize pk
3039 return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset)))
3041 #endif /* alpha_TARGET_ARCH */
3043 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3045 #if i386_TARGET_ARCH
3047 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
3048 -- write barrier compiles to no code on x86/x86-64;
3049 -- we keep it this long in order to prevent earlier optimisations.
3051 -- we only cope with a single result for foreign calls
3052 genCCall (CmmPrim op) [CmmKinded r _] args = do
3054 MO_F32_Sqrt -> actuallyInlineFloatOp F32 (GSQRT F32) args
3055 MO_F64_Sqrt -> actuallyInlineFloatOp F64 (GSQRT F64) args
3057 MO_F32_Sin -> actuallyInlineFloatOp F32 (GSIN F32) args
3058 MO_F64_Sin -> actuallyInlineFloatOp F64 (GSIN F64) args
3060 MO_F32_Cos -> actuallyInlineFloatOp F32 (GCOS F32) args
3061 MO_F64_Cos -> actuallyInlineFloatOp F64 (GCOS F64) args
3063 MO_F32_Tan -> actuallyInlineFloatOp F32 (GTAN F32) args
3064 MO_F64_Tan -> actuallyInlineFloatOp F64 (GTAN F64) args
3066 other_op -> outOfLineFloatOp op r args
3068 actuallyInlineFloatOp rep instr [CmmKinded x _]
3069 = do res <- trivialUFCode rep instr x
3071 return (any (getRegisterReg (CmmLocal r)))
3073 genCCall target dest_regs args = do
3075 sizes = map (arg_size . cmmExprRep . kindlessCmm) (reverse args)
3076 #if !darwin_TARGET_OS
3077 tot_arg_size = sum sizes
3079 raw_arg_size = sum sizes
3080 tot_arg_size = roundTo 16 raw_arg_size
3081 arg_pad_size = tot_arg_size - raw_arg_size
3082 delta0 <- getDeltaNat
3083 setDeltaNat (delta0 - arg_pad_size)
3086 push_codes <- mapM push_arg (reverse args)
3087 delta <- getDeltaNat
3090 -- deal with static vs dynamic call targets
3091 (callinsns,cconv) <-
3094 CmmCallee (CmmLit (CmmLabel lbl)) conv
3095 -> -- ToDo: stdcall arg sizes
3096 return (unitOL (CALL (Left fn_imm) []), conv)
3097 where fn_imm = ImmCLbl lbl
3099 -> do (dyn_c, dyn_r, dyn_rep) <- get_op expr
3100 ASSERT(dyn_rep == I32)
3101 return (dyn_c `snocOL` CALL (Right dyn_r) [], conv)
3104 #if darwin_TARGET_OS
3106 = toOL [SUB I32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
3107 DELTA (delta0 - arg_pad_size)]
3108 `appOL` concatOL push_codes
3111 = concatOL push_codes
3112 call = callinsns `appOL`
3114 -- Deallocate parameters after call for ccall;
3115 -- but not for stdcall (callee does it)
3116 (if cconv == StdCallConv || tot_arg_size==0 then [] else
3117 [ADD I32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
3119 [DELTA (delta + tot_arg_size)]
3122 setDeltaNat (delta + tot_arg_size)
3125 -- assign the results, if necessary
3126 assign_code [] = nilOL
3127 assign_code [CmmKinded dest _hint] =
3129 I64 -> toOL [MOV I32 (OpReg eax) (OpReg r_dest),
3130 MOV I32 (OpReg edx) (OpReg r_dest_hi)]
3131 F32 -> unitOL (GMOV fake0 r_dest)
3132 F64 -> unitOL (GMOV fake0 r_dest)
3133 rep -> unitOL (MOV rep (OpReg eax) (OpReg r_dest))
3135 r_dest_hi = getHiVRegFromLo r_dest
3136 rep = localRegRep dest
3137 r_dest = getRegisterReg (CmmLocal dest)
3138 assign_code many = panic "genCCall.assign_code many"
3140 return (push_code `appOL`
3142 assign_code dest_regs)
3150 roundTo a x | x `mod` a == 0 = x
3151 | otherwise = x + a - (x `mod` a)
3154 push_arg :: (CmmKinded CmmExpr){-current argument-}
3155 -> NatM InstrBlock -- code
3157 push_arg (CmmKinded arg _hint) -- we don't need the hints on x86
3158 | arg_rep == I64 = do
3159 ChildCode64 code r_lo <- iselExpr64 arg
3160 delta <- getDeltaNat
3161 setDeltaNat (delta - 8)
3163 r_hi = getHiVRegFromLo r_lo
3165 return ( code `appOL`
3166 toOL [PUSH I32 (OpReg r_hi), DELTA (delta - 4),
3167 PUSH I32 (OpReg r_lo), DELTA (delta - 8),
3172 (code, reg, sz) <- get_op arg
3173 delta <- getDeltaNat
3174 let size = arg_size sz
3175 setDeltaNat (delta-size)
3176 if (case sz of F64 -> True; F32 -> True; _ -> False)
3177 then return (code `appOL`
3178 toOL [SUB I32 (OpImm (ImmInt size)) (OpReg esp),
3180 GST sz reg (AddrBaseIndex (EABaseReg esp)
3184 else return (code `snocOL`
3185 PUSH I32 (OpReg reg) `snocOL`
3189 arg_rep = cmmExprRep arg
3192 get_op :: CmmExpr -> NatM (InstrBlock, Reg, MachRep) -- code, reg, size
3194 (reg,code) <- getSomeReg op
3195 return (code, reg, cmmExprRep op)
3197 #endif /* i386_TARGET_ARCH */
3199 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3201 outOfLineFloatOp :: CallishMachOp -> CmmFormalWithoutKind -> CmmActuals
3203 outOfLineFloatOp mop res args
3205 dflags <- getDynFlagsNat
3206 targetExpr <- cmmMakeDynamicReference dflags addImportNat CallReference lbl
3207 let target = CmmCallee targetExpr CCallConv
3209 if localRegRep res == F64
3211 stmtToInstrs (CmmCall target [CmmKinded res FloatHint] args CmmUnsafe CmmMayReturn)
3215 tmp = LocalReg uq F64 GCKindNonPtr
3217 code1 <- stmtToInstrs (CmmCall target [CmmKinded tmp FloatHint] args CmmUnsafe CmmMayReturn)
3218 code2 <- stmtToInstrs (CmmAssign (CmmLocal res) (CmmReg (CmmLocal tmp)))
3219 return (code1 `appOL` code2)
3221 lbl = mkForeignLabel fn Nothing False
3224 MO_F32_Sqrt -> fsLit "sqrtf"
3225 MO_F32_Sin -> fsLit "sinf"
3226 MO_F32_Cos -> fsLit "cosf"
3227 MO_F32_Tan -> fsLit "tanf"
3228 MO_F32_Exp -> fsLit "expf"
3229 MO_F32_Log -> fsLit "logf"
3231 MO_F32_Asin -> fsLit "asinf"
3232 MO_F32_Acos -> fsLit "acosf"
3233 MO_F32_Atan -> fsLit "atanf"
3235 MO_F32_Sinh -> fsLit "sinhf"
3236 MO_F32_Cosh -> fsLit "coshf"
3237 MO_F32_Tanh -> fsLit "tanhf"
3238 MO_F32_Pwr -> fsLit "powf"
3240 MO_F64_Sqrt -> fsLit "sqrt"
3241 MO_F64_Sin -> fsLit "sin"
3242 MO_F64_Cos -> fsLit "cos"
3243 MO_F64_Tan -> fsLit "tan"
3244 MO_F64_Exp -> fsLit "exp"
3245 MO_F64_Log -> fsLit "log"
3247 MO_F64_Asin -> fsLit "asin"
3248 MO_F64_Acos -> fsLit "acos"
3249 MO_F64_Atan -> fsLit "atan"
3251 MO_F64_Sinh -> fsLit "sinh"
3252 MO_F64_Cosh -> fsLit "cosh"
3253 MO_F64_Tanh -> fsLit "tanh"
3254 MO_F64_Pwr -> fsLit "pow"
3256 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
3258 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3260 #if x86_64_TARGET_ARCH
3262 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
3263 -- write barrier compiles to no code on x86/x86-64;
3264 -- we keep it this long in order to prevent earlier optimisations.
3267 genCCall (CmmPrim op) [CmmKinded r _] args =
3268 outOfLineFloatOp op r args
3270 genCCall target dest_regs args = do
3272 -- load up the register arguments
3273 (stack_args, aregs, fregs, load_args_code)
3274 <- load_args args allArgRegs allFPArgRegs nilOL
3277 fp_regs_used = reverse (drop (length fregs) (reverse allFPArgRegs))
3278 int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
3279 arg_regs = [eax] ++ int_regs_used ++ fp_regs_used
3280 -- for annotating the call instruction with
3282 sse_regs = length fp_regs_used
3284 tot_arg_size = arg_size * length stack_args
3286 -- On entry to the called function, %rsp should be aligned
3287 -- on a 16-byte boundary +8 (i.e. the first stack arg after
3288 -- the return address is 16-byte aligned). In STG land
3289 -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
3290 -- need to make sure we push a multiple of 16-bytes of args,
3291 -- plus the return address, to get the correct alignment.
3292 -- Urg, this is hard. We need to feed the delta back into
3293 -- the arg pushing code.
3294 (real_size, adjust_rsp) <-
3295 if tot_arg_size `rem` 16 == 0
3296 then return (tot_arg_size, nilOL)
3297 else do -- we need to adjust...
3298 delta <- getDeltaNat
3299 setDeltaNat (delta-8)
3300 return (tot_arg_size+8, toOL [
3301 SUB I64 (OpImm (ImmInt 8)) (OpReg rsp),
3305 -- push the stack args, right to left
3306 push_code <- push_args (reverse stack_args) nilOL
3307 delta <- getDeltaNat
3309 -- deal with static vs dynamic call targets
3310 (callinsns,cconv) <-
3313 CmmCallee (CmmLit (CmmLabel lbl)) conv
3314 -> -- ToDo: stdcall arg sizes
3315 return (unitOL (CALL (Left fn_imm) arg_regs), conv)
3316 where fn_imm = ImmCLbl lbl
3318 -> do (dyn_r, dyn_c) <- getSomeReg expr
3319 return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
3322 -- The x86_64 ABI requires us to set %al to the number of SSE
3323 -- registers that contain arguments, if the called routine
3324 -- is a varargs function. We don't know whether it's a
3325 -- varargs function or not, so we have to assume it is.
3327 -- It's not safe to omit this assignment, even if the number
3328 -- of SSE regs in use is zero. If %al is larger than 8
3329 -- on entry to a varargs function, seg faults ensue.
3330 assign_eax n = unitOL (MOV I32 (OpImm (ImmInt n)) (OpReg eax))
3332 let call = callinsns `appOL`
3334 -- Deallocate parameters after call for ccall;
3335 -- but not for stdcall (callee does it)
3336 (if cconv == StdCallConv || real_size==0 then [] else
3337 [ADD wordRep (OpImm (ImmInt real_size)) (OpReg esp)])
3339 [DELTA (delta + real_size)]
3342 setDeltaNat (delta + real_size)
3345 -- assign the results, if necessary
3346 assign_code [] = nilOL
3347 assign_code [CmmKinded dest _hint] =
3349 F32 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3350 F64 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3351 rep -> unitOL (MOV rep (OpReg rax) (OpReg r_dest))
3353 rep = localRegRep dest
3354 r_dest = getRegisterReg (CmmLocal dest)
3355 assign_code many = panic "genCCall.assign_code many"
3357 return (load_args_code `appOL`
3360 assign_eax sse_regs `appOL`
3362 assign_code dest_regs)
3365 arg_size = 8 -- always, at the mo
3367 load_args :: [CmmKinded CmmExpr]
3368 -> [Reg] -- int regs avail for args
3369 -> [Reg] -- FP regs avail for args
3371 -> NatM ([CmmKinded CmmExpr],[Reg],[Reg],InstrBlock)
3372 load_args args [] [] code = return (args, [], [], code)
3373 -- no more regs to use
3374 load_args [] aregs fregs code = return ([], aregs, fregs, code)
3375 -- no more args to push
3376 load_args ((CmmKinded arg hint) : rest) aregs fregs code
3377 | isFloatingRep arg_rep =
3381 arg_code <- getAnyReg arg
3382 load_args rest aregs rs (code `appOL` arg_code r)
3387 arg_code <- getAnyReg arg
3388 load_args rest rs fregs (code `appOL` arg_code r)
3390 arg_rep = cmmExprRep arg
3393 (args',ars,frs,code') <- load_args rest aregs fregs code
3394 return ((CmmKinded arg hint):args', ars, frs, code')
3396 push_args [] code = return code
3397 push_args ((CmmKinded arg hint):rest) code
3398 | isFloatingRep arg_rep = do
3399 (arg_reg, arg_code) <- getSomeReg arg
3400 delta <- getDeltaNat
3401 setDeltaNat (delta-arg_size)
3402 let code' = code `appOL` arg_code `appOL` toOL [
3403 SUB wordRep (OpImm (ImmInt arg_size)) (OpReg rsp) ,
3404 DELTA (delta-arg_size),
3405 MOV arg_rep (OpReg arg_reg) (OpAddr (spRel 0))]
3406 push_args rest code'
3409 -- we only ever generate word-sized function arguments. Promotion
3410 -- has already happened: our Int8# type is kept sign-extended
3411 -- in an Int#, for example.
3412 ASSERT(arg_rep == I64) return ()
3413 (arg_op, arg_code) <- getOperand arg
3414 delta <- getDeltaNat
3415 setDeltaNat (delta-arg_size)
3416 let code' = code `appOL` toOL [PUSH I64 arg_op,
3417 DELTA (delta-arg_size)]
3418 push_args rest code'
3420 arg_rep = cmmExprRep arg
3423 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3425 #if sparc_TARGET_ARCH
3427 The SPARC calling convention is an absolute
3428 nightmare. The first 6x32 bits of arguments are mapped into
3429 %o0 through %o5, and the remaining arguments are dumped to the
3430 stack, beginning at [%sp+92]. (Note that %o6 == %sp.)
3432 If we have to put args on the stack, move %o6==%sp down by
3433 the number of words to go on the stack, to ensure there's enough space.
3435 According to Fraser and Hanson's lcc book, page 478, fig 17.2,
3436 16 words above the stack pointer is a word for the address of
3437 a structure return value. I use this as a temporary location
3438 for moving values from float to int regs. Certainly it isn't
3439 safe to put anything in the 16 words starting at %sp, since
3440 this area can get trashed at any time due to window overflows
3441 caused by signal handlers.
3443 A final complication (if the above isn't enough) is that
3444 we can't blithely calculate the arguments one by one into
3445 %o0 .. %o5. Consider the following nested calls:
3449 Naive code moves a into %o0, and (fff b c) into %o1. Unfortunately
3450 the inner call will itself use %o0, which trashes the value put there
3451 in preparation for the outer call. Upshot: we need to calculate the
3452 args into temporary regs, and move those to arg regs or onto the
3453 stack only immediately prior to the call proper. Sigh.
3456 genCCall target dest_regs argsAndHints = do
3458 args = map kindlessCmm argsAndHints
3459 argcode_and_vregs <- mapM arg_to_int_vregs args
3461 (argcodes, vregss) = unzip argcode_and_vregs
3462 n_argRegs = length allArgRegs
3463 n_argRegs_used = min (length vregs) n_argRegs
3464 vregs = concat vregss
3465 -- deal with static vs dynamic call targets
3466 callinsns <- (case target of
3467 CmmCallee (CmmLit (CmmLabel lbl)) conv -> do
3468 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3469 CmmCallee expr conv -> do
3470 (dyn_c, [dyn_r]) <- arg_to_int_vregs expr
3471 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3473 (res, reduce) <- outOfLineFloatOp mop
3474 lblOrMopExpr <- case res of
3476 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3478 (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
3479 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3480 if reduce then panic "genCCall(sparc): can not reduce" else return lblOrMopExpr
3484 argcode = concatOL argcodes
3485 (move_sp_down, move_sp_up)
3486 = let diff = length vregs - n_argRegs
3487 nn = if odd diff then diff + 1 else diff -- keep 8-byte alignment
3490 else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
3492 = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
3493 return (argcode `appOL`
3494 move_sp_down `appOL`
3495 transfer_code `appOL`
3500 -- move args from the integer vregs into which they have been
3501 -- marshalled, into %o0 .. %o5, and the rest onto the stack.
3502 move_final :: [Reg] -> [Reg] -> Int -> [Instr]
3504 move_final [] _ offset -- all args done
3507 move_final (v:vs) [] offset -- out of aregs; move to stack
3508 = ST I32 v (spRel offset)
3509 : move_final vs [] (offset+1)
3511 move_final (v:vs) (a:az) offset -- move into an arg (%o[0..5]) reg
3512 = OR False g0 (RIReg v) a
3513 : move_final vs az offset
3515 -- generate code to calculate an argument, and move it into one
3516 -- or two integer vregs.
3517 arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
3518 arg_to_int_vregs arg
3519 | (cmmExprRep arg) == I64
3521 (ChildCode64 code r_lo) <- iselExpr64 arg
3523 r_hi = getHiVRegFromLo r_lo
3524 return (code, [r_hi, r_lo])
3527 (src, code) <- getSomeReg arg
3528 tmp <- getNewRegNat (cmmExprRep arg)
3533 v1 <- getNewRegNat I32
3534 v2 <- getNewRegNat I32
3537 FMOV F64 src f0 `snocOL`
3538 ST F32 f0 (spRel 16) `snocOL`
3539 LD I32 (spRel 16) v1 `snocOL`
3540 ST F32 (fPair f0) (spRel 16) `snocOL`
3541 LD I32 (spRel 16) v2
3546 v1 <- getNewRegNat I32
3549 ST F32 src (spRel 16) `snocOL`
3550 LD I32 (spRel 16) v1
3555 v1 <- getNewRegNat I32
3557 code `snocOL` OR False g0 (RIReg src) v1
3561 outOfLineFloatOp mop =
3563 dflags <- getDynFlagsNat
3564 mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference $
3565 mkForeignLabel functionName Nothing True
3566 let mopLabelOrExpr = case mopExpr of
3567 CmmLit (CmmLabel lbl) -> Left lbl
3569 return (mopLabelOrExpr, reduce)
3571 (reduce, functionName) = case mop of
3572 MO_F32_Exp -> (True, fsLit "exp")
3573 MO_F32_Log -> (True, fsLit "log")
3574 MO_F32_Sqrt -> (True, fsLit "sqrt")
3576 MO_F32_Sin -> (True, fsLit "sin")
3577 MO_F32_Cos -> (True, fsLit "cos")
3578 MO_F32_Tan -> (True, fsLit "tan")
3580 MO_F32_Asin -> (True, fsLit "asin")
3581 MO_F32_Acos -> (True, fsLit "acos")
3582 MO_F32_Atan -> (True, fsLit "atan")
3584 MO_F32_Sinh -> (True, fsLit "sinh")
3585 MO_F32_Cosh -> (True, fsLit "cosh")
3586 MO_F32_Tanh -> (True, fsLit "tanh")
3588 MO_F64_Exp -> (False, fsLit "exp")
3589 MO_F64_Log -> (False, fsLit "log")
3590 MO_F64_Sqrt -> (False, fsLit "sqrt")
3592 MO_F64_Sin -> (False, fsLit "sin")
3593 MO_F64_Cos -> (False, fsLit "cos")
3594 MO_F64_Tan -> (False, fsLit "tan")
3596 MO_F64_Asin -> (False, fsLit "asin")
3597 MO_F64_Acos -> (False, fsLit "acos")
3598 MO_F64_Atan -> (False, fsLit "atan")
3600 MO_F64_Sinh -> (False, fsLit "sinh")
3601 MO_F64_Cosh -> (False, fsLit "cosh")
3602 MO_F64_Tanh -> (False, fsLit "tanh")
3604 other -> pprPanic "outOfLineFloatOp(sparc) "
3605 (pprCallishMachOp mop)
3607 #endif /* sparc_TARGET_ARCH */
3609 #if powerpc_TARGET_ARCH
3611 #if darwin_TARGET_OS || linux_TARGET_OS
3613 The PowerPC calling convention for Darwin/Mac OS X
3614 is described in Apple's document
3615 "Inside Mac OS X - Mach-O Runtime Architecture".
3617 PowerPC Linux uses the System V Release 4 Calling Convention
3618 for PowerPC. It is described in the
3619 "System V Application Binary Interface PowerPC Processor Supplement".
3621 Both conventions are similar:
3622 Parameters may be passed in general-purpose registers starting at r3, in
3623 floating point registers starting at f1, or on the stack.
3625 But there are substantial differences:
3626 * The number of registers used for parameter passing and the exact set of
3627 nonvolatile registers differs (see MachRegs.lhs).
3628 * On Darwin, stack space is always reserved for parameters, even if they are
3629 passed in registers. The called routine may choose to save parameters from
3630 registers to the corresponding space on the stack.
3631 * On Darwin, a corresponding amount of GPRs is skipped when a floating point
3632 parameter is passed in an FPR.
3633 * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
3634 starting with an odd-numbered GPR. It may skip a GPR to achieve this.
3635 Darwin just treats an I64 like two separate I32s (high word first).
3636 * I64 and F64 arguments are 8-byte aligned on the stack for SysV, but only
3637 4-byte aligned like everything else on Darwin.
3638 * The SysV spec claims that F32 is represented as F64 on the stack. GCC on
3639 PowerPC Linux does not agree, so neither do we.
3641 According to both conventions, The parameter area should be part of the
3642 caller's stack frame, allocated in the caller's prologue code (large enough
3643 to hold the parameter lists for all called routines). The NCG already
3644 uses the stack for register spilling, leaving 64 bytes free at the top.
3645 If we need a larger parameter area than that, we just allocate a new stack
3646 frame just before ccalling.
3650 genCCall (CmmPrim MO_WriteBarrier) _ _
3651 = return $ unitOL LWSYNC
3653 genCCall target dest_regs argsAndHints
3654 = ASSERT (not $ any (`elem` [I8,I16]) argReps)
3655 -- we rely on argument promotion in the codeGen
3657 (finalStack,passArgumentsCode,usedRegs) <- passArguments
3659 allArgRegs allFPArgRegs
3663 (labelOrExpr, reduceToF32) <- case target of
3664 CmmCallee (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
3665 CmmCallee expr conv -> return (Right expr, False)
3666 CmmPrim mop -> outOfLineFloatOp mop
3668 let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
3669 codeAfter = move_sp_up finalStack `appOL` moveResult reduceToF32
3674 `snocOL` BL lbl usedRegs
3677 (dynReg, dynCode) <- getSomeReg dyn
3679 `snocOL` MTCTR dynReg
3681 `snocOL` BCTRL usedRegs
3684 #if darwin_TARGET_OS
3685 initialStackOffset = 24
3686 -- size of linkage area + size of arguments, in bytes
3687 stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
3688 map machRepByteWidth argReps
3689 #elif linux_TARGET_OS
3690 initialStackOffset = 8
3691 stackDelta finalStack = roundTo 16 finalStack
3693 args = map kindlessCmm argsAndHints
3694 argReps = map cmmExprRep args
3696 roundTo a x | x `mod` a == 0 = x
3697 | otherwise = x + a - (x `mod` a)
3699 move_sp_down finalStack
3701 toOL [STU I32 sp (AddrRegImm sp (ImmInt (-delta))),
3704 where delta = stackDelta finalStack
3705 move_sp_up finalStack
3707 toOL [ADD sp sp (RIImm (ImmInt delta)),
3710 where delta = stackDelta finalStack
3713 passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
3714 passArguments ((arg,I64):args) gprs fprs stackOffset
3715 accumCode accumUsed =
3717 ChildCode64 code vr_lo <- iselExpr64 arg
3718 let vr_hi = getHiVRegFromLo vr_lo
3720 #if darwin_TARGET_OS
3725 (accumCode `appOL` code
3726 `snocOL` storeWord vr_hi gprs stackOffset
3727 `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
3728 ((take 2 gprs) ++ accumUsed)
3730 storeWord vr (gpr:_) offset = MR gpr vr
3731 storeWord vr [] offset = ST I32 vr (AddrRegImm sp (ImmInt offset))
3733 #elif linux_TARGET_OS
3734 let stackOffset' = roundTo 8 stackOffset
3735 stackCode = accumCode `appOL` code
3736 `snocOL` ST I32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
3737 `snocOL` ST I32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
3738 regCode hireg loreg =
3739 accumCode `appOL` code
3740 `snocOL` MR hireg vr_hi
3741 `snocOL` MR loreg vr_lo
3744 hireg : loreg : regs | even (length gprs) ->
3745 passArguments args regs fprs stackOffset
3746 (regCode hireg loreg) (hireg : loreg : accumUsed)
3747 _skipped : hireg : loreg : regs ->
3748 passArguments args regs fprs stackOffset
3749 (regCode hireg loreg) (hireg : loreg : accumUsed)
3750 _ -> -- only one or no regs left
3751 passArguments args [] fprs (stackOffset'+8)
3755 passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
3756 | reg : _ <- regs = do
3757 register <- getRegister arg
3758 let code = case register of
3759 Fixed _ freg fcode -> fcode `snocOL` MR reg freg
3760 Any _ acode -> acode reg
3764 #if darwin_TARGET_OS
3765 -- The Darwin ABI requires that we reserve stack slots for register parameters
3766 (stackOffset + stackBytes)
3767 #elif linux_TARGET_OS
3768 -- ... the SysV ABI doesn't.
3771 (accumCode `appOL` code)
3774 (vr, code) <- getSomeReg arg
3778 (stackOffset' + stackBytes)
3779 (accumCode `appOL` code `snocOL` ST rep vr stackSlot)
3782 #if darwin_TARGET_OS
3783 -- stackOffset is at least 4-byte aligned
3784 -- The Darwin ABI is happy with that.
3785 stackOffset' = stackOffset
3787 -- ... the SysV ABI requires 8-byte alignment for doubles.
3788 stackOffset' | rep == F64 = roundTo 8 stackOffset
3789 | otherwise = stackOffset
3791 stackSlot = AddrRegImm sp (ImmInt stackOffset')
3792 (nGprs, nFprs, stackBytes, regs) = case rep of
3793 I32 -> (1, 0, 4, gprs)
3794 #if darwin_TARGET_OS
3795 -- The Darwin ABI requires that we skip a corresponding number of GPRs when
3797 F32 -> (1, 1, 4, fprs)
3798 F64 -> (2, 1, 8, fprs)
3799 #elif linux_TARGET_OS
3800 -- ... the SysV ABI doesn't.
3801 F32 -> (0, 1, 4, fprs)
3802 F64 -> (0, 1, 8, fprs)
3805 moveResult reduceToF32 =
3808 [CmmKinded dest _hint]
3809 | reduceToF32 && rep == F32 -> unitOL (FRSP r_dest f1)
3810 | rep == F32 || rep == F64 -> unitOL (MR r_dest f1)
3811 | rep == I64 -> toOL [MR (getHiVRegFromLo r_dest) r3,
3813 | otherwise -> unitOL (MR r_dest r3)
3814 where rep = cmmRegRep (CmmLocal dest)
3815 r_dest = getRegisterReg (CmmLocal dest)
3817 outOfLineFloatOp mop =
3819 dflags <- getDynFlagsNat
3820 mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference $
3821 mkForeignLabel functionName Nothing True
3822 let mopLabelOrExpr = case mopExpr of
3823 CmmLit (CmmLabel lbl) -> Left lbl
3825 return (mopLabelOrExpr, reduce)
3827 (functionName, reduce) = case mop of
3828 MO_F32_Exp -> (fsLit "exp", True)
3829 MO_F32_Log -> (fsLit "log", True)
3830 MO_F32_Sqrt -> (fsLit "sqrt", True)
3832 MO_F32_Sin -> (fsLit "sin", True)
3833 MO_F32_Cos -> (fsLit "cos", True)
3834 MO_F32_Tan -> (fsLit "tan", True)
3836 MO_F32_Asin -> (fsLit "asin", True)
3837 MO_F32_Acos -> (fsLit "acos", True)
3838 MO_F32_Atan -> (fsLit "atan", True)
3840 MO_F32_Sinh -> (fsLit "sinh", True)
3841 MO_F32_Cosh -> (fsLit "cosh", True)
3842 MO_F32_Tanh -> (fsLit "tanh", True)
3843 MO_F32_Pwr -> (fsLit "pow", True)
3845 MO_F64_Exp -> (fsLit "exp", False)
3846 MO_F64_Log -> (fsLit "log", False)
3847 MO_F64_Sqrt -> (fsLit "sqrt", False)
3849 MO_F64_Sin -> (fsLit "sin", False)
3850 MO_F64_Cos -> (fsLit "cos", False)
3851 MO_F64_Tan -> (fsLit "tan", False)
3853 MO_F64_Asin -> (fsLit "asin", False)
3854 MO_F64_Acos -> (fsLit "acos", False)
3855 MO_F64_Atan -> (fsLit "atan", False)
3857 MO_F64_Sinh -> (fsLit "sinh", False)
3858 MO_F64_Cosh -> (fsLit "cosh", False)
3859 MO_F64_Tanh -> (fsLit "tanh", False)
3860 MO_F64_Pwr -> (fsLit "pow", False)
3861 other -> pprPanic "genCCall(ppc): unknown callish op"
3862 (pprCallishMachOp other)
3864 #endif /* darwin_TARGET_OS || linux_TARGET_OS */
3866 #endif /* powerpc_TARGET_ARCH */
3869 -- -----------------------------------------------------------------------------
3870 -- Generating a table-branch
3872 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
3874 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3878 (reg,e_code) <- getSomeReg expr
3879 lbl <- getNewLabelNat
3880 dflags <- getDynFlagsNat
3881 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
3882 (tableReg,t_code) <- getSomeReg $ dynRef
3884 jumpTable = map jumpTableEntryRel ids
3886 jumpTableEntryRel Nothing
3887 = CmmStaticLit (CmmInt 0 wordRep)
3888 jumpTableEntryRel (Just (BlockId id))
3889 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3890 where blockLabel = mkAsmTempLabel id
3892 op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
3893 (EAIndex reg wORD_SIZE) (ImmInt 0))
3895 #if x86_64_TARGET_ARCH
3896 #if darwin_TARGET_OS
3897 -- on Mac OS X/x86_64, put the jump table in the text section
3898 -- to work around a limitation of the linker.
3899 -- ld64 is unable to handle the relocations for
3901 -- if L0 is not preceded by a non-anonymous label in its section.
3903 code = e_code `appOL` t_code `appOL` toOL [
3904 ADD wordRep op (OpReg tableReg),
3905 JMP_TBL (OpReg tableReg) [ id | Just id <- ids ],
3906 LDATA Text (CmmDataLabel lbl : jumpTable)
3909 -- HACK: On x86_64 binutils<2.17 is only able to generate PC32
3910 -- relocations, hence we only get 32-bit offsets in the jump
3911 -- table. As these offsets are always negative we need to properly
3912 -- sign extend them to 64-bit. This hack should be removed in
3913 -- conjunction with the hack in PprMach.hs/pprDataItem once
3914 -- binutils 2.17 is standard.
3915 code = e_code `appOL` t_code `appOL` toOL [
3916 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3918 (OpAddr (AddrBaseIndex (EABaseReg tableReg)
3919 (EAIndex reg wORD_SIZE) (ImmInt 0)))
3921 ADD wordRep (OpReg reg) (OpReg tableReg),
3922 JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
3926 code = e_code `appOL` t_code `appOL` toOL [
3927 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3928 ADD wordRep op (OpReg tableReg),
3929 JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
3935 (reg,e_code) <- getSomeReg expr
3936 lbl <- getNewLabelNat
3938 jumpTable = map jumpTableEntry ids
3939 op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
3940 code = e_code `appOL` toOL [
3941 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3942 JMP_TBL op [ id | Just id <- ids ]
3946 #elif powerpc_TARGET_ARCH
3950 (reg,e_code) <- getSomeReg expr
3951 tmp <- getNewRegNat I32
3952 lbl <- getNewLabelNat
3953 dflags <- getDynFlagsNat
3954 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
3955 (tableReg,t_code) <- getSomeReg $ dynRef
3957 jumpTable = map jumpTableEntryRel ids
3959 jumpTableEntryRel Nothing
3960 = CmmStaticLit (CmmInt 0 wordRep)
3961 jumpTableEntryRel (Just (BlockId id))
3962 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3963 where blockLabel = mkAsmTempLabel id
3965 code = e_code `appOL` t_code `appOL` toOL [
3966 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3967 SLW tmp reg (RIImm (ImmInt 2)),
3968 LD I32 tmp (AddrRegReg tableReg tmp),
3969 ADD tmp tmp (RIReg tableReg),
3971 BCTR [ id | Just id <- ids ]
3976 (reg,e_code) <- getSomeReg expr
3977 tmp <- getNewRegNat I32
3978 lbl <- getNewLabelNat
3980 jumpTable = map jumpTableEntry ids
3982 code = e_code `appOL` toOL [
3983 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3984 SLW tmp reg (RIImm (ImmInt 2)),
3985 ADDIS tmp tmp (HA (ImmCLbl lbl)),
3986 LD I32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
3988 BCTR [ id | Just id <- ids ]
3992 #error "ToDo: genSwitch"
3995 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordRep)
3996 jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
3997 where blockLabel = mkAsmTempLabel id
3999 -- -----------------------------------------------------------------------------
4001 -- -----------------------------------------------------------------------------
4004 -- -----------------------------------------------------------------------------
4005 -- 'condIntReg' and 'condFltReg': condition codes into registers
4007 -- Turn those condition codes into integers now (when they appear on
4008 -- the right hand side of an assignment).
4010 -- (If applicable) Do not fill the delay slots here; you will confuse the
4011 -- register allocator.
4013 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
4015 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4017 #if alpha_TARGET_ARCH
4018 condIntReg = panic "MachCode.condIntReg (not on Alpha)"
4019 condFltReg = panic "MachCode.condFltReg (not on Alpha)"
4020 #endif /* alpha_TARGET_ARCH */
4022 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4024 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4026 condIntReg cond x y = do
4027 CondCode _ cond cond_code <- condIntCode cond x y
4028 tmp <- getNewRegNat I8
4030 code dst = cond_code `appOL` toOL [
4031 SETCC cond (OpReg tmp),
4032 MOVZxL I8 (OpReg tmp) (OpReg dst)
4035 return (Any I32 code)
4039 #if i386_TARGET_ARCH
4041 condFltReg cond x y = do
4042 CondCode _ cond cond_code <- condFltCode cond x y
4043 tmp <- getNewRegNat I8
4045 code dst = cond_code `appOL` toOL [
4046 SETCC cond (OpReg tmp),
4047 MOVZxL I8 (OpReg tmp) (OpReg dst)
4050 return (Any I32 code)
4054 #if x86_64_TARGET_ARCH
4056 condFltReg cond x y = do
4057 CondCode _ cond cond_code <- condFltCode cond x y
4058 tmp1 <- getNewRegNat wordRep
4059 tmp2 <- getNewRegNat wordRep
4061 -- We have to worry about unordered operands (eg. comparisons
4062 -- against NaN). If the operands are unordered, the comparison
4063 -- sets the parity flag, carry flag and zero flag.
4064 -- All comparisons are supposed to return false for unordered
4065 -- operands except for !=, which returns true.
4067 -- Optimisation: we don't have to test the parity flag if we
4068 -- know the test has already excluded the unordered case: eg >
4069 -- and >= test for a zero carry flag, which can only occur for
4070 -- ordered operands.
4072 -- ToDo: by reversing comparisons we could avoid testing the
4073 -- parity flag in more cases.
4078 NE -> or_unordered dst
4079 GU -> plain_test dst
4080 GEU -> plain_test dst
4081 _ -> and_ordered dst)
4083 plain_test dst = toOL [
4084 SETCC cond (OpReg tmp1),
4085 MOVZxL I8 (OpReg tmp1) (OpReg dst)
4087 or_unordered dst = toOL [
4088 SETCC cond (OpReg tmp1),
4089 SETCC PARITY (OpReg tmp2),
4090 OR I8 (OpReg tmp1) (OpReg tmp2),
4091 MOVZxL I8 (OpReg tmp2) (OpReg dst)
4093 and_ordered dst = toOL [
4094 SETCC cond (OpReg tmp1),
4095 SETCC NOTPARITY (OpReg tmp2),
4096 AND I8 (OpReg tmp1) (OpReg tmp2),
4097 MOVZxL I8 (OpReg tmp2) (OpReg dst)
4100 return (Any I32 code)
4104 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4106 #if sparc_TARGET_ARCH
4108 condIntReg EQQ x (CmmLit (CmmInt 0 d)) = do
4109 (src, code) <- getSomeReg x
4110 tmp <- getNewRegNat I32
4112 code__2 dst = code `appOL` toOL [
4113 SUB False True g0 (RIReg src) g0,
4114 SUB True False g0 (RIImm (ImmInt (-1))) dst]
4115 return (Any I32 code__2)
4117 condIntReg EQQ x y = do
4118 (src1, code1) <- getSomeReg x
4119 (src2, code2) <- getSomeReg y
4120 tmp1 <- getNewRegNat I32
4121 tmp2 <- getNewRegNat I32
4123 code__2 dst = code1 `appOL` code2 `appOL` toOL [
4124 XOR False src1 (RIReg src2) dst,
4125 SUB False True g0 (RIReg dst) g0,
4126 SUB True False g0 (RIImm (ImmInt (-1))) dst]
4127 return (Any I32 code__2)
4129 condIntReg NE x (CmmLit (CmmInt 0 d)) = do
4130 (src, code) <- getSomeReg x
4131 tmp <- getNewRegNat I32
4133 code__2 dst = code `appOL` toOL [
4134 SUB False True g0 (RIReg src) g0,
4135 ADD True False g0 (RIImm (ImmInt 0)) dst]
4136 return (Any I32 code__2)
4138 condIntReg NE x y = do
4139 (src1, code1) <- getSomeReg x
4140 (src2, code2) <- getSomeReg y
4141 tmp1 <- getNewRegNat I32
4142 tmp2 <- getNewRegNat I32
4144 code__2 dst = code1 `appOL` code2 `appOL` toOL [
4145 XOR False src1 (RIReg src2) dst,
4146 SUB False True g0 (RIReg dst) g0,
4147 ADD True False g0 (RIImm (ImmInt 0)) dst]
4148 return (Any I32 code__2)
4150 condIntReg cond x y = do
4151 BlockId lbl1 <- getBlockIdNat
4152 BlockId lbl2 <- getBlockIdNat
4153 CondCode _ cond cond_code <- condIntCode cond x y
4155 code__2 dst = cond_code `appOL` toOL [
4156 BI cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
4157 OR False g0 (RIImm (ImmInt 0)) dst,
4158 BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
4159 NEWBLOCK (BlockId lbl1),
4160 OR False g0 (RIImm (ImmInt 1)) dst,
4161 NEWBLOCK (BlockId lbl2)]
4162 return (Any I32 code__2)
4164 condFltReg cond x y = do
4165 BlockId lbl1 <- getBlockIdNat
4166 BlockId lbl2 <- getBlockIdNat
4167 CondCode _ cond cond_code <- condFltCode cond x y
4169 code__2 dst = cond_code `appOL` toOL [
4171 BF cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
4172 OR False g0 (RIImm (ImmInt 0)) dst,
4173 BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
4174 NEWBLOCK (BlockId lbl1),
4175 OR False g0 (RIImm (ImmInt 1)) dst,
4176 NEWBLOCK (BlockId lbl2)]
4177 return (Any I32 code__2)
4179 #endif /* sparc_TARGET_ARCH */
4181 #if powerpc_TARGET_ARCH
4182 condReg getCond = do
4183 lbl1 <- getBlockIdNat
4184 lbl2 <- getBlockIdNat
4185 CondCode _ cond cond_code <- getCond
4187 {- code dst = cond_code `appOL` toOL [
4196 code dst = cond_code
4200 RLWINM dst dst (bit + 1) 31 31
4203 negate_code | do_negate = unitOL (CRNOR bit bit bit)
4206 (bit, do_negate) = case cond of
4220 return (Any I32 code)
4222 condIntReg cond x y = condReg (condIntCode cond x y)
4223 condFltReg cond x y = condReg (condFltCode cond x y)
4224 #endif /* powerpc_TARGET_ARCH */
4227 -- -----------------------------------------------------------------------------
4228 -- 'trivial*Code': deal with trivial instructions
4230 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
4231 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
4232 -- Only look for constants on the right hand side, because that's
4233 -- where the generic optimizer will have put them.
4235 -- Similarly, for unary instructions, we don't have to worry about
4236 -- matching an StInt as the argument, because genericOpt will already
4237 -- have handled the constant-folding.
4241 -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
4242 ,IF_ARCH_i386 ((Operand -> Operand -> Instr)
4243 -> Maybe (Operand -> Operand -> Instr)
4244 ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr)
4245 -> Maybe (Operand -> Operand -> Instr)
4246 ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
4247 ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
4249 -> CmmExpr -> CmmExpr -- the two arguments
4252 #ifndef powerpc_TARGET_ARCH
4255 -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
4256 ,IF_ARCH_sparc((MachRep -> Reg -> Reg -> Reg -> Instr)
4257 ,IF_ARCH_i386 ((MachRep -> Reg -> Reg -> Reg -> Instr)
4258 ,IF_ARCH_x86_64 ((MachRep -> Operand -> Operand -> Instr)
4260 -> CmmExpr -> CmmExpr -- the two arguments
4266 -> IF_ARCH_alpha((RI -> Reg -> Instr)
4267 ,IF_ARCH_i386 ((Operand -> Instr)
4268 ,IF_ARCH_x86_64 ((Operand -> Instr)
4269 ,IF_ARCH_sparc((RI -> Reg -> Instr)
4270 ,IF_ARCH_powerpc((Reg -> Reg -> Instr)
4272 -> CmmExpr -- the one argument
4275 #ifndef powerpc_TARGET_ARCH
4278 -> IF_ARCH_alpha((Reg -> Reg -> Instr)
4279 ,IF_ARCH_i386 ((Reg -> Reg -> Instr)
4280 ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr)
4281 ,IF_ARCH_sparc((Reg -> Reg -> Instr)
4283 -> CmmExpr -- the one argument
4287 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4289 #if alpha_TARGET_ARCH
4291 trivialCode instr x (StInt y)
4293 = getRegister x `thenNat` \ register ->
4294 getNewRegNat IntRep `thenNat` \ tmp ->
4296 code = registerCode register tmp
4297 src1 = registerName register tmp
4298 src2 = ImmInt (fromInteger y)
4299 code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst)
4301 return (Any IntRep code__2)
4303 trivialCode instr x y
4304 = getRegister x `thenNat` \ register1 ->
4305 getRegister y `thenNat` \ register2 ->
4306 getNewRegNat IntRep `thenNat` \ tmp1 ->
4307 getNewRegNat IntRep `thenNat` \ tmp2 ->
4309 code1 = registerCode register1 tmp1 []
4310 src1 = registerName register1 tmp1
4311 code2 = registerCode register2 tmp2 []
4312 src2 = registerName register2 tmp2
4313 code__2 dst = asmSeqThen [code1, code2] .
4314 mkSeqInstr (instr src1 (RIReg src2) dst)
4316 return (Any IntRep code__2)
4319 trivialUCode instr x
4320 = getRegister x `thenNat` \ register ->
4321 getNewRegNat IntRep `thenNat` \ tmp ->
4323 code = registerCode register tmp
4324 src = registerName register tmp
4325 code__2 dst = code . mkSeqInstr (instr (RIReg src) dst)
4327 return (Any IntRep code__2)
4330 trivialFCode _ instr x y
4331 = getRegister x `thenNat` \ register1 ->
4332 getRegister y `thenNat` \ register2 ->
4333 getNewRegNat F64 `thenNat` \ tmp1 ->
4334 getNewRegNat F64 `thenNat` \ tmp2 ->
4336 code1 = registerCode register1 tmp1
4337 src1 = registerName register1 tmp1
4339 code2 = registerCode register2 tmp2
4340 src2 = registerName register2 tmp2
4342 code__2 dst = asmSeqThen [code1 [], code2 []] .
4343 mkSeqInstr (instr src1 src2 dst)
4345 return (Any F64 code__2)
4347 trivialUFCode _ instr x
4348 = getRegister x `thenNat` \ register ->
4349 getNewRegNat F64 `thenNat` \ tmp ->
4351 code = registerCode register tmp
4352 src = registerName register tmp
4353 code__2 dst = code . mkSeqInstr (instr src dst)
4355 return (Any F64 code__2)
4357 #endif /* alpha_TARGET_ARCH */
4359 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4361 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4364 The Rules of the Game are:
4366 * You cannot assume anything about the destination register dst;
4367 it may be anything, including a fixed reg.
4369 * You may compute an operand into a fixed reg, but you may not
4370 subsequently change the contents of that fixed reg. If you
4371 want to do so, first copy the value either to a temporary
4372 or into dst. You are free to modify dst even if it happens
4373 to be a fixed reg -- that's not your problem.
4375 * You cannot assume that a fixed reg will stay live over an
4376 arbitrary computation. The same applies to the dst reg.
4378 * Temporary regs obtained from getNewRegNat are distinct from
4379 each other and from all other regs, and stay live over
4380 arbitrary computations.
4382 --------------------
4384 SDM's version of The Rules:
4386 * If getRegister returns Any, that means it can generate correct
4387 code which places the result in any register, period. Even if that
4388 register happens to be read during the computation.
4390 Corollary #1: this means that if you are generating code for an
4391 operation with two arbitrary operands, you cannot assign the result
4392 of the first operand into the destination register before computing
4393 the second operand. The second operand might require the old value
4394 of the destination register.
4396 Corollary #2: A function might be able to generate more efficient
4397 code if it knows the destination register is a new temporary (and
4398 therefore not read by any of the sub-computations).
4400 * If getRegister returns Any, then the code it generates may modify only:
4401 (a) fresh temporaries
4402 (b) the destination register
4403 (c) known registers (eg. %ecx is used by shifts)
4404 In particular, it may *not* modify global registers, unless the global
4405 register happens to be the destination register.
4408 trivialCode rep instr (Just revinstr) (CmmLit lit_a) b
4409 | not (is64BitLit lit_a) = do
4410 b_code <- getAnyReg b
4413 = b_code dst `snocOL`
4414 revinstr (OpImm (litToImm lit_a)) (OpReg dst)
4416 return (Any rep code)
4418 trivialCode rep instr maybe_revinstr a b = genTrivialCode rep instr a b
4420 -- This is re-used for floating pt instructions too.
4421 genTrivialCode rep instr a b = do
4422 (b_op, b_code) <- getNonClobberedOperand b
4423 a_code <- getAnyReg a
4424 tmp <- getNewRegNat rep
4426 -- We want the value of b to stay alive across the computation of a.
4427 -- But, we want to calculate a straight into the destination register,
4428 -- because the instruction only has two operands (dst := dst `op` src).
4429 -- The troublesome case is when the result of b is in the same register
4430 -- as the destination reg. In this case, we have to save b in a
4431 -- new temporary across the computation of a.
4433 | dst `regClashesWithOp` b_op =
4435 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
4437 instr (OpReg tmp) (OpReg dst)
4441 instr b_op (OpReg dst)
4443 return (Any rep code)
4445 reg `regClashesWithOp` OpReg reg2 = reg == reg2
4446 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
4447 reg `regClashesWithOp` _ = False
4451 trivialUCode rep instr x = do
4452 x_code <- getAnyReg x
4458 return (Any rep code)
4462 #if i386_TARGET_ARCH
4464 trivialFCode pk instr x y = do
4465 (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
4466 (y_reg, y_code) <- getSomeReg y
4471 instr pk x_reg y_reg dst
4473 return (Any pk code)
4477 #if x86_64_TARGET_ARCH
4479 trivialFCode pk instr x y = genTrivialCode pk (instr pk) x y
4485 trivialUFCode rep instr x = do
4486 (x_reg, x_code) <- getSomeReg x
4492 return (Any rep code)
4494 #endif /* i386_TARGET_ARCH */
4496 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4498 #if sparc_TARGET_ARCH
4500 trivialCode pk instr x (CmmLit (CmmInt y d))
4503 (src1, code) <- getSomeReg x
4504 tmp <- getNewRegNat I32
4506 src2 = ImmInt (fromInteger y)
4507 code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
4508 return (Any I32 code__2)
4510 trivialCode pk instr x y = do
4511 (src1, code1) <- getSomeReg x
4512 (src2, code2) <- getSomeReg y
4513 tmp1 <- getNewRegNat I32
4514 tmp2 <- getNewRegNat I32
4516 code__2 dst = code1 `appOL` code2 `snocOL`
4517 instr src1 (RIReg src2) dst
4518 return (Any I32 code__2)
4521 trivialFCode pk instr x y = do
4522 (src1, code1) <- getSomeReg x
4523 (src2, code2) <- getSomeReg y
4524 tmp1 <- getNewRegNat (cmmExprRep x)
4525 tmp2 <- getNewRegNat (cmmExprRep y)
4526 tmp <- getNewRegNat F64
4528 promote x = FxTOy F32 F64 x tmp
4535 code1 `appOL` code2 `snocOL`
4536 instr pk src1 src2 dst
4537 else if pk1 == F32 then
4538 code1 `snocOL` promote src1 `appOL` code2 `snocOL`
4539 instr F64 tmp src2 dst
4541 code1 `appOL` code2 `snocOL` promote src2 `snocOL`
4542 instr F64 src1 tmp dst
4543 return (Any (if pk1 == pk2 then pk1 else F64) code__2)
4546 trivialUCode pk instr x = do
4547 (src, code) <- getSomeReg x
4548 tmp <- getNewRegNat pk
4550 code__2 dst = code `snocOL` instr (RIReg src) dst
4551 return (Any pk code__2)
4554 trivialUFCode pk instr x = do
4555 (src, code) <- getSomeReg x
4556 tmp <- getNewRegNat pk
4558 code__2 dst = code `snocOL` instr src dst
4559 return (Any pk code__2)
4561 #endif /* sparc_TARGET_ARCH */
4563 #if powerpc_TARGET_ARCH
4566 Wolfgang's PowerPC version of The Rules:
4568 A slightly modified version of The Rules to take advantage of the fact
4569 that PowerPC instructions work on all registers and don't implicitly
4570 clobber any fixed registers.
4572 * The only expression for which getRegister returns Fixed is (CmmReg reg).
4574 * If getRegister returns Any, then the code it generates may modify only:
4575 (a) fresh temporaries
4576 (b) the destination register
4577 It may *not* modify global registers, unless the global
4578 register happens to be the destination register.
4579 It may not clobber any other registers. In fact, only ccalls clobber any
4581 Also, it may not modify the counter register (used by genCCall).
4583 Corollary: If a getRegister for a subexpression returns Fixed, you need
4584 not move it to a fresh temporary before evaluating the next subexpression.
4585 The Fixed register won't be modified.
4586 Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
4588 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
4589 the value of the destination register.
4592 trivialCode rep signed instr x (CmmLit (CmmInt y _))
4593 | Just imm <- makeImmediate rep signed y
4595 (src1, code1) <- getSomeReg x
4596 let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
4597 return (Any rep code)
4599 trivialCode rep signed instr x y = do
4600 (src1, code1) <- getSomeReg x
4601 (src2, code2) <- getSomeReg y
4602 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
4603 return (Any rep code)
4605 trivialCodeNoImm :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4606 -> CmmExpr -> CmmExpr -> NatM Register
4607 trivialCodeNoImm rep instr x y = do
4608 (src1, code1) <- getSomeReg x
4609 (src2, code2) <- getSomeReg y
4610 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
4611 return (Any rep code)
4613 trivialUCode rep instr x = do
4614 (src, code) <- getSomeReg x
4615 let code' dst = code `snocOL` instr dst src
4616 return (Any rep code')
4618 -- There is no "remainder" instruction on the PPC, so we have to do
4620 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
4622 remainderCode :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4623 -> CmmExpr -> CmmExpr -> NatM Register
4624 remainderCode rep div x y = do
4625 (src1, code1) <- getSomeReg x
4626 (src2, code2) <- getSomeReg y
4627 let code dst = code1 `appOL` code2 `appOL` toOL [
4629 MULLW dst dst (RIReg src2),
4632 return (Any rep code)
4634 #endif /* powerpc_TARGET_ARCH */
4637 -- -----------------------------------------------------------------------------
4638 -- Coercing to/from integer/floating-point...
4640 -- When going to integer, we truncate (round towards 0).
4642 -- @coerce(Int2FP|FP2Int)@ are more complicated integer/float
4643 -- conversions. We have to store temporaries in memory to move
4644 -- between the integer and the floating point register sets.
4646 -- @coerceDbl2Flt@ and @coerceFlt2Dbl@ are done this way because we
4647 -- pretend, on sparc at least, that double and float regs are seperate
4648 -- kinds, so the value has to be computed into one kind before being
4649 -- explicitly "converted" to live in the other kind.
4651 coerceInt2FP :: MachRep -> MachRep -> CmmExpr -> NatM Register
4652 coerceFP2Int :: MachRep -> MachRep -> CmmExpr -> NatM Register
4654 #if sparc_TARGET_ARCH
4655 coerceDbl2Flt :: CmmExpr -> NatM Register
4656 coerceFlt2Dbl :: CmmExpr -> NatM Register
4659 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4661 #if alpha_TARGET_ARCH
4664 = getRegister x `thenNat` \ register ->
4665 getNewRegNat IntRep `thenNat` \ reg ->
4667 code = registerCode register reg
4668 src = registerName register reg
4670 code__2 dst = code . mkSeqInstrs [
4672 LD TF dst (spRel 0),
4675 return (Any F64 code__2)
4679 = getRegister x `thenNat` \ register ->
4680 getNewRegNat F64 `thenNat` \ tmp ->
4682 code = registerCode register tmp
4683 src = registerName register tmp
4685 code__2 dst = code . mkSeqInstrs [
4687 ST TF tmp (spRel 0),
4690 return (Any IntRep code__2)
4692 #endif /* alpha_TARGET_ARCH */
4694 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4696 #if i386_TARGET_ARCH
4698 coerceInt2FP from to x = do
4699 (x_reg, x_code) <- getSomeReg x
4701 opc = case to of F32 -> GITOF; F64 -> GITOD
4702 code dst = x_code `snocOL` opc x_reg dst
4703 -- ToDo: works for non-I32 reps?
4705 return (Any to code)
4709 coerceFP2Int from to x = do
4710 (x_reg, x_code) <- getSomeReg x
4712 opc = case from of F32 -> GFTOI; F64 -> GDTOI
4713 code dst = x_code `snocOL` opc x_reg dst
4714 -- ToDo: works for non-I32 reps?
4716 return (Any to code)
4718 #endif /* i386_TARGET_ARCH */
4720 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4722 #if x86_64_TARGET_ARCH
4724 coerceFP2Int from to x = do
4725 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
4727 opc = case from of F32 -> CVTTSS2SIQ; F64 -> CVTTSD2SIQ
4728 code dst = x_code `snocOL` opc x_op dst
4730 return (Any to code) -- works even if the destination rep is <I32
4732 coerceInt2FP from to x = do
4733 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
4735 opc = case to of F32 -> CVTSI2SS; F64 -> CVTSI2SD
4736 code dst = x_code `snocOL` opc x_op dst
4738 return (Any to code) -- works even if the destination rep is <I32
4740 coerceFP2FP :: MachRep -> CmmExpr -> NatM Register
4741 coerceFP2FP to x = do
4742 (x_reg, x_code) <- getSomeReg x
4744 opc = case to of F32 -> CVTSD2SS; F64 -> CVTSS2SD
4745 code dst = x_code `snocOL` opc x_reg dst
4747 return (Any to code)
4751 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4753 #if sparc_TARGET_ARCH
4755 coerceInt2FP pk1 pk2 x = do
4756 (src, code) <- getSomeReg x
4758 code__2 dst = code `appOL` toOL [
4759 ST pk1 src (spRel (-2)),
4760 LD pk1 (spRel (-2)) dst,
4761 FxTOy pk1 pk2 dst dst]
4762 return (Any pk2 code__2)
4765 coerceFP2Int pk fprep x = do
4766 (src, code) <- getSomeReg x
4767 reg <- getNewRegNat fprep
4768 tmp <- getNewRegNat pk
4770 code__2 dst = ASSERT(fprep == F64 || fprep == F32)
4772 FxTOy fprep pk src tmp,
4773 ST pk tmp (spRel (-2)),
4774 LD pk (spRel (-2)) dst]
4775 return (Any pk code__2)
4778 coerceDbl2Flt x = do
4779 (src, code) <- getSomeReg x
4780 return (Any F32 (\dst -> code `snocOL` FxTOy F64 F32 src dst))
4783 coerceFlt2Dbl x = do
4784 (src, code) <- getSomeReg x
4785 return (Any F64 (\dst -> code `snocOL` FxTOy F32 F64 src dst))
4787 #endif /* sparc_TARGET_ARCH */
4789 #if powerpc_TARGET_ARCH
4790 coerceInt2FP fromRep toRep x = do
4791 (src, code) <- getSomeReg x
4792 lbl <- getNewLabelNat
4793 itmp <- getNewRegNat I32
4794 ftmp <- getNewRegNat F64
4795 dflags <- getDynFlagsNat
4796 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
4797 Amode addr addr_code <- getAmode dynRef
4799 code' dst = code `appOL` maybe_exts `appOL` toOL [
4802 CmmStaticLit (CmmInt 0x43300000 I32),
4803 CmmStaticLit (CmmInt 0x80000000 I32)],
4804 XORIS itmp src (ImmInt 0x8000),
4805 ST I32 itmp (spRel 3),
4806 LIS itmp (ImmInt 0x4330),
4807 ST I32 itmp (spRel 2),
4808 LD F64 ftmp (spRel 2)
4809 ] `appOL` addr_code `appOL` toOL [
4811 FSUB F64 dst ftmp dst
4812 ] `appOL` maybe_frsp dst
4814 maybe_exts = case fromRep of
4815 I8 -> unitOL $ EXTS I8 src src
4816 I16 -> unitOL $ EXTS I16 src src
4818 maybe_frsp dst = case toRep of
4819 F32 -> unitOL $ FRSP dst dst
4821 return (Any toRep code')
4823 coerceFP2Int fromRep toRep x = do
4824 -- the reps don't really matter: F*->F64 and I32->I* are no-ops
4825 (src, code) <- getSomeReg x
4826 tmp <- getNewRegNat F64
4828 code' dst = code `appOL` toOL [
4829 -- convert to int in FP reg
4831 -- store value (64bit) from FP to stack
4832 ST F64 tmp (spRel 2),
4833 -- read low word of value (high word is undefined)
4834 LD I32 dst (spRel 3)]
4835 return (Any toRep code')
4836 #endif /* powerpc_TARGET_ARCH */
4839 -- -----------------------------------------------------------------------------
4840 -- eXTRA_STK_ARGS_HERE
4842 -- We (allegedly) put the first six C-call arguments in registers;
4843 -- where do we start putting the rest of them?
4845 -- Moved from MachInstrs (SDM):
4847 #if alpha_TARGET_ARCH || sparc_TARGET_ARCH
4848 eXTRA_STK_ARGS_HERE :: Int
4850 = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???))