2 -- The above warning supression flag is a temporary kludge.
3 -- While working on this module you are encouraged to remove it and fix
4 -- any warnings in the module. See
5 -- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
8 -----------------------------------------------------------------------------
10 -- Generating machine code (instruction selection)
12 -- (c) The University of Glasgow 1996-2004
14 -----------------------------------------------------------------------------
16 -- This is a big module, but, if you pay attention to
17 -- (a) the sectioning, (b) the type signatures, and
18 -- (c) the #if blah_TARGET_ARCH} things, the
19 -- structure should not be too overwhelming.
21 module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where
23 #include "HsVersions.h"
24 #include "nativeGen/NCG.h"
31 import PositionIndependentCode
32 import RegAllocInfo ( mkBranchInstr )
34 -- Our intermediate code:
36 import PprCmm ( pprExpr )
39 import ClosureInfo ( C_SRT(..) )
42 import StaticFlags ( opt_PIC )
43 import ForeignCall ( CCallConv(..) )
46 import qualified Outputable as O
49 import FastBool ( isFastTrue )
50 import Constants ( wORD_SIZE )
52 import Debug.Trace ( trace )
54 import Control.Monad ( mapAndUnzipM )
55 import Data.Maybe ( fromJust )
60 -- -----------------------------------------------------------------------------
61 -- Top-level of the instruction selector
63 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
64 -- They are really trees of insns to facilitate fast appending, where a
65 -- left-to-right traversal (pre-order?) yields the insns in the correct
68 type InstrBlock = OrdList Instr
70 cmmTopCodeGen :: RawCmmTop -> NatM [NatCmmTop]
71 cmmTopCodeGen (CmmProc info lab params (ListGraph blocks)) = do
72 (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
73 picBaseMb <- getPicBaseMaybeNat
74 let proc = CmmProc info lab params (ListGraph $ concat nat_blocks)
75 tops = proc : concat statics
77 Just picBase -> initializePicBase picBase tops
78 Nothing -> return tops
80 cmmTopCodeGen (CmmData sec dat) = do
81 return [CmmData sec dat] -- no translation, we just use CmmStatic
83 basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop])
84 basicBlockCodeGen (BasicBlock id stmts) = do
85 instrs <- stmtsToInstrs stmts
86 -- code generation may introduce new basic block boundaries, which
87 -- are indicated by the NEWBLOCK instruction. We must split up the
88 -- instruction stream into basic blocks again. Also, we extract
91 (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
93 mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
94 = ([], BasicBlock id instrs : blocks, statics)
95 mkBlocks (LDATA sec dat) (instrs,blocks,statics)
96 = (instrs, blocks, CmmData sec dat:statics)
97 mkBlocks instr (instrs,blocks,statics)
98 = (instr:instrs, blocks, statics)
100 return (BasicBlock id top : other_blocks, statics)
102 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
104 = do instrss <- mapM stmtToInstrs stmts
105 return (concatOL instrss)
107 stmtToInstrs :: CmmStmt -> NatM InstrBlock
108 stmtToInstrs stmt = case stmt of
109 CmmNop -> return nilOL
110 CmmComment s -> return (unitOL (COMMENT s))
113 | isFloatType ty -> assignReg_FltCode size reg src
114 #if WORD_SIZE_IN_BITS==32
115 | isWord64 ty -> assignReg_I64Code reg src
117 | otherwise -> assignReg_IntCode size reg src
118 where ty = cmmRegType reg
119 size = cmmTypeSize ty
122 | isFloatType ty -> assignMem_FltCode size addr src
123 #if WORD_SIZE_IN_BITS==32
124 | isWord64 ty -> assignMem_I64Code addr src
126 | otherwise -> assignMem_IntCode size addr src
127 where ty = cmmExprType src
128 size = cmmTypeSize ty
130 CmmCall target result_regs args _ _
131 -> genCCall target result_regs args
133 CmmBranch id -> genBranch id
134 CmmCondBranch arg id -> genCondJump id arg
135 CmmSwitch arg ids -> genSwitch arg ids
136 CmmJump arg params -> genJump arg
138 panic "stmtToInstrs: return statement should have been cps'd away"
140 -- -----------------------------------------------------------------------------
141 -- General things for putting together code sequences
143 -- Expand CmmRegOff. ToDo: should we do it this way around, or convert
144 -- CmmExprs into CmmRegOff?
145 mangleIndexTree :: CmmExpr -> CmmExpr
146 mangleIndexTree (CmmRegOff reg off)
147 = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
148 where width = typeWidth (cmmRegType reg)
150 -- -----------------------------------------------------------------------------
151 -- Code gen for 64-bit arithmetic on 32-bit platforms
154 Simple support for generating 64-bit code (ie, 64 bit values and 64
155 bit assignments) on 32-bit platforms. Unlike the main code generator
156 we merely shoot for generating working code as simply as possible, and
157 pay little attention to code quality. Specifically, there is no
158 attempt to deal cleverly with the fixed-vs-floating register
159 distinction; all values are generated into (pairs of) floating
160 registers, even if this would mean some redundant reg-reg moves as a
161 result. Only one of the VRegUniques is returned, since it will be
162 of the VRegUniqueLo form, and the upper-half VReg can be determined
163 by applying getHiVRegFromLo to it.
166 data ChildCode64 -- a.k.a "Register64"
169 Reg -- the lower 32-bit temporary which contains the
170 -- result; use getHiVRegFromLo to find the other
171 -- VRegUnique. Rules of this simplified insn
172 -- selection game are therefore that the returned
173 -- Reg may be modified
175 #if WORD_SIZE_IN_BITS==32
176 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
177 assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
180 #ifndef x86_64_TARGET_ARCH
181 iselExpr64 :: CmmExpr -> NatM ChildCode64
184 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
188 assignMem_I64Code addrTree valueTree = do
189 Amode addr addr_code <- getAmode addrTree
190 ChildCode64 vcode rlo <- iselExpr64 valueTree
192 rhi = getHiVRegFromLo rlo
194 -- Little-endian store
195 mov_lo = MOV II32 (OpReg rlo) (OpAddr addr)
196 mov_hi = MOV II32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
198 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
201 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
202 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
204 r_dst_lo = mkVReg u_dst II32
205 r_dst_hi = getHiVRegFromLo r_dst_lo
206 r_src_hi = getHiVRegFromLo r_src_lo
207 mov_lo = MOV II32 (OpReg r_src_lo) (OpReg r_dst_lo)
208 mov_hi = MOV II32 (OpReg r_src_hi) (OpReg r_dst_hi)
211 vcode `snocOL` mov_lo `snocOL` mov_hi
214 assignReg_I64Code lvalue valueTree
215 = panic "assignReg_I64Code(i386): invalid lvalue"
219 iselExpr64 (CmmLit (CmmInt i _)) = do
220 (rlo,rhi) <- getNewRegPairNat II32
222 r = fromIntegral (fromIntegral i :: Word32)
223 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
225 MOV II32 (OpImm (ImmInteger r)) (OpReg rlo),
226 MOV II32 (OpImm (ImmInteger q)) (OpReg rhi)
229 return (ChildCode64 code rlo)
231 iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
232 Amode addr addr_code <- getAmode addrTree
233 (rlo,rhi) <- getNewRegPairNat II32
235 mov_lo = MOV II32 (OpAddr addr) (OpReg rlo)
236 mov_hi = MOV II32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
239 ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
243 iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
244 = return (ChildCode64 nilOL (mkVReg vu II32))
246 -- we handle addition, but rather badly
247 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
248 ChildCode64 code1 r1lo <- iselExpr64 e1
249 (rlo,rhi) <- getNewRegPairNat II32
251 r = fromIntegral (fromIntegral i :: Word32)
252 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
253 r1hi = getHiVRegFromLo r1lo
255 toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
256 ADD II32 (OpImm (ImmInteger r)) (OpReg rlo),
257 MOV II32 (OpReg r1hi) (OpReg rhi),
258 ADC II32 (OpImm (ImmInteger q)) (OpReg rhi) ]
260 return (ChildCode64 code rlo)
262 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
263 ChildCode64 code1 r1lo <- iselExpr64 e1
264 ChildCode64 code2 r2lo <- iselExpr64 e2
265 (rlo,rhi) <- getNewRegPairNat II32
267 r1hi = getHiVRegFromLo r1lo
268 r2hi = getHiVRegFromLo r2lo
271 toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
272 ADD II32 (OpReg r2lo) (OpReg rlo),
273 MOV II32 (OpReg r1hi) (OpReg rhi),
274 ADC II32 (OpReg r2hi) (OpReg rhi) ]
276 return (ChildCode64 code rlo)
278 iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do
280 r_dst_lo <- getNewRegNat II32
281 let r_dst_hi = getHiVRegFromLo r_dst_lo
284 ChildCode64 (code `snocOL`
285 MOV II32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
290 = pprPanic "iselExpr64(i386)" (ppr expr)
292 #endif /* i386_TARGET_ARCH */
294 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
296 #if sparc_TARGET_ARCH
298 assignMem_I64Code addrTree valueTree = do
299 Amode addr addr_code <- getAmode addrTree
300 ChildCode64 vcode rlo <- iselExpr64 valueTree
301 (src, code) <- getSomeReg addrTree
303 rhi = getHiVRegFromLo rlo
305 mov_hi = ST II32 rhi (AddrRegImm src (ImmInt 0))
306 mov_lo = ST II32 rlo (AddrRegImm src (ImmInt 4))
307 return (vcode `appOL` code `snocOL` mov_hi `snocOL` mov_lo)
309 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
310 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
312 r_dst_lo = mkVReg u_dst (cmmTypeSize pk)
313 r_dst_hi = getHiVRegFromLo r_dst_lo
314 r_src_hi = getHiVRegFromLo r_src_lo
315 mov_lo = mkMOV r_src_lo r_dst_lo
316 mov_hi = mkMOV r_src_hi r_dst_hi
317 mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
318 return (vcode `snocOL` mov_hi `snocOL` mov_lo)
319 assignReg_I64Code lvalue valueTree
320 = panic "assignReg_I64Code(sparc): invalid lvalue"
323 -- Don't delete this -- it's very handy for debugging.
325 -- | trace ("iselExpr64: " ++ showSDoc (ppr expr)) False
326 -- = panic "iselExpr64(???)"
328 iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
329 Amode (AddrRegReg r1 r2) addr_code <- getAmode addrTree
330 rlo <- getNewRegNat II32
331 let rhi = getHiVRegFromLo rlo
332 mov_hi = LD II32 (AddrRegImm r1 (ImmInt 0)) rhi
333 mov_lo = LD II32 (AddrRegImm r1 (ImmInt 4)) rlo
335 ChildCode64 (addr_code `snocOL` mov_hi `snocOL` mov_lo)
339 iselExpr64 (CmmReg (CmmLocal (LocalReg uq ty))) | isWord64 ty = do
340 r_dst_lo <- getNewRegNat II32
341 let r_dst_hi = getHiVRegFromLo r_dst_lo
342 r_src_lo = mkVReg uq II32
343 r_src_hi = getHiVRegFromLo r_src_lo
344 mov_lo = mkMOV r_src_lo r_dst_lo
345 mov_hi = mkMOV r_src_hi r_dst_hi
346 mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
348 ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo
352 = pprPanic "iselExpr64(sparc)" (ppr expr)
354 #endif /* sparc_TARGET_ARCH */
356 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
358 #if powerpc_TARGET_ARCH
360 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
361 getI64Amodes addrTree = do
362 Amode hi_addr addr_code <- getAmode addrTree
363 case addrOffset hi_addr 4 of
364 Just lo_addr -> return (hi_addr, lo_addr, addr_code)
365 Nothing -> do (hi_ptr, code) <- getSomeReg addrTree
366 return (AddrRegImm hi_ptr (ImmInt 0),
367 AddrRegImm hi_ptr (ImmInt 4),
370 assignMem_I64Code addrTree valueTree = do
371 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
372 ChildCode64 vcode rlo <- iselExpr64 valueTree
374 rhi = getHiVRegFromLo rlo
377 mov_hi = ST II32 rhi hi_addr
378 mov_lo = ST II32 rlo lo_addr
380 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
382 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
383 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
385 r_dst_lo = mkVReg u_dst II32
386 r_dst_hi = getHiVRegFromLo r_dst_lo
387 r_src_hi = getHiVRegFromLo r_src_lo
388 mov_lo = MR r_dst_lo r_src_lo
389 mov_hi = MR r_dst_hi r_src_hi
392 vcode `snocOL` mov_lo `snocOL` mov_hi
395 assignReg_I64Code lvalue valueTree
396 = panic "assignReg_I64Code(powerpc): invalid lvalue"
399 -- Don't delete this -- it's very handy for debugging.
401 -- | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
402 -- = panic "iselExpr64(???)"
404 iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
405 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
406 (rlo, rhi) <- getNewRegPairNat II32
407 let mov_hi = LD II32 rhi hi_addr
408 mov_lo = LD II32 rlo lo_addr
409 return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
412 iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
413 = return (ChildCode64 nilOL (mkVReg vu II32))
415 iselExpr64 (CmmLit (CmmInt i _)) = do
416 (rlo,rhi) <- getNewRegPairNat II32
418 half0 = fromIntegral (fromIntegral i :: Word16)
419 half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
420 half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
421 half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
424 LIS rlo (ImmInt half1),
425 OR rlo rlo (RIImm $ ImmInt half0),
426 LIS rhi (ImmInt half3),
427 OR rlo rlo (RIImm $ ImmInt half2)
430 return (ChildCode64 code rlo)
432 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
433 ChildCode64 code1 r1lo <- iselExpr64 e1
434 ChildCode64 code2 r2lo <- iselExpr64 e2
435 (rlo,rhi) <- getNewRegPairNat II32
437 r1hi = getHiVRegFromLo r1lo
438 r2hi = getHiVRegFromLo r2lo
441 toOL [ ADDC rlo r1lo r2lo,
444 return (ChildCode64 code rlo)
446 iselExpr64 (CmmMachOp (MO_UU_Conv W32 W64) [expr]) = do
447 (expr_reg,expr_code) <- getSomeReg expr
448 (rlo, rhi) <- getNewRegPairNat II32
449 let mov_hi = LI rhi (ImmInt 0)
450 mov_lo = MR rlo expr_reg
451 return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
454 = pprPanic "iselExpr64(powerpc)" (ppr expr)
456 #endif /* powerpc_TARGET_ARCH */
459 -- -----------------------------------------------------------------------------
460 -- The 'Register' type
462 -- 'Register's passed up the tree. If the stix code forces the register
463 -- to live in a pre-decided machine register, it comes out as @Fixed@;
464 -- otherwise, it comes out as @Any@, and the parent can decide which
465 -- register to put it in.
468 = Fixed Size Reg InstrBlock
469 | Any Size (Reg -> InstrBlock)
471 swizzleRegisterRep :: Register -> Size -> Register
472 -- Change the width; it's a no-op
473 swizzleRegisterRep (Fixed _ reg code) size = Fixed size reg code
474 swizzleRegisterRep (Any _ codefn) size = Any size codefn
477 -- -----------------------------------------------------------------------------
478 -- Utils based on getRegister, below
480 -- The dual to getAnyReg: compute an expression into a register, but
481 -- we don't mind which one it is.
482 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
484 r <- getRegister expr
487 tmp <- getNewRegNat rep
488 return (tmp, code tmp)
492 -- -----------------------------------------------------------------------------
493 -- Grab the Reg for a CmmReg
495 getRegisterReg :: CmmReg -> Reg
497 getRegisterReg (CmmLocal (LocalReg u pk))
498 = mkVReg u (cmmTypeSize pk)
500 getRegisterReg (CmmGlobal mid)
501 = case get_GlobalReg_reg_or_addr mid of
502 Left (RealReg rrno) -> RealReg rrno
503 _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
504 -- By this stage, the only MagicIds remaining should be the
505 -- ones which map to a real machine register on this
506 -- platform. Hence ...
509 -- -----------------------------------------------------------------------------
510 -- Generate code to get a subtree into a Register
512 -- Don't delete this -- it's very handy for debugging.
514 -- | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False
515 -- = panic "getRegister(???)"
517 getRegister :: CmmExpr -> NatM Register
519 #if !x86_64_TARGET_ARCH
520 -- on x86_64, we have %rip for PicBaseReg, but it's not a full-featured
521 -- register, it can only be used for rip-relative addressing.
522 getRegister (CmmReg (CmmGlobal PicBaseReg))
524 reg <- getPicBaseNat wordSize
525 return (Fixed wordSize reg nilOL)
528 getRegister (CmmReg reg)
529 = return (Fixed (cmmTypeSize (cmmRegType reg))
530 (getRegisterReg reg) nilOL)
532 getRegister tree@(CmmRegOff _ _)
533 = getRegister (mangleIndexTree tree)
536 #if WORD_SIZE_IN_BITS==32
537 -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
538 -- TO_W_(x), TO_W_(x >> 32)
540 getRegister (CmmMachOp (MO_UU_Conv W64 W32)
541 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
542 ChildCode64 code rlo <- iselExpr64 x
543 return $ Fixed II32 (getHiVRegFromLo rlo) code
545 getRegister (CmmMachOp (MO_SS_Conv W64 W32)
546 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
547 ChildCode64 code rlo <- iselExpr64 x
548 return $ Fixed II32 (getHiVRegFromLo rlo) code
550 getRegister (CmmMachOp (MO_UU_Conv W64 W32) [x]) = do
551 ChildCode64 code rlo <- iselExpr64 x
552 return $ Fixed II32 rlo code
554 getRegister (CmmMachOp (MO_SS_Conv W64 W32) [x]) = do
555 ChildCode64 code rlo <- iselExpr64 x
556 return $ Fixed II32 rlo code
560 -- end of machine-"independent" bit; here we go on the rest...
562 #if alpha_TARGET_ARCH
564 getRegister (StDouble d)
565 = getBlockIdNat `thenNat` \ lbl ->
566 getNewRegNat PtrRep `thenNat` \ tmp ->
567 let code dst = mkSeqInstrs [
568 LDATA RoDataSegment lbl [
569 DATA TF [ImmLab (rational d)]
571 LDA tmp (AddrImm (ImmCLbl lbl)),
572 LD TF dst (AddrReg tmp)]
574 return (Any FF64 code)
576 getRegister (StPrim primop [x]) -- unary PrimOps
578 IntNegOp -> trivialUCode (NEG Q False) x
580 NotOp -> trivialUCode NOT x
582 FloatNegOp -> trivialUFCode FloatRep (FNEG TF) x
583 DoubleNegOp -> trivialUFCode FF64 (FNEG TF) x
585 OrdOp -> coerceIntCode IntRep x
588 Float2IntOp -> coerceFP2Int x
589 Int2FloatOp -> coerceInt2FP pr x
590 Double2IntOp -> coerceFP2Int x
591 Int2DoubleOp -> coerceInt2FP pr x
593 Double2FloatOp -> coerceFltCode x
594 Float2DoubleOp -> coerceFltCode x
596 other_op -> getRegister (StCall fn CCallConv FF64 [x])
598 fn = case other_op of
599 FloatExpOp -> fsLit "exp"
600 FloatLogOp -> fsLit "log"
601 FloatSqrtOp -> fsLit "sqrt"
602 FloatSinOp -> fsLit "sin"
603 FloatCosOp -> fsLit "cos"
604 FloatTanOp -> fsLit "tan"
605 FloatAsinOp -> fsLit "asin"
606 FloatAcosOp -> fsLit "acos"
607 FloatAtanOp -> fsLit "atan"
608 FloatSinhOp -> fsLit "sinh"
609 FloatCoshOp -> fsLit "cosh"
610 FloatTanhOp -> fsLit "tanh"
611 DoubleExpOp -> fsLit "exp"
612 DoubleLogOp -> fsLit "log"
613 DoubleSqrtOp -> fsLit "sqrt"
614 DoubleSinOp -> fsLit "sin"
615 DoubleCosOp -> fsLit "cos"
616 DoubleTanOp -> fsLit "tan"
617 DoubleAsinOp -> fsLit "asin"
618 DoubleAcosOp -> fsLit "acos"
619 DoubleAtanOp -> fsLit "atan"
620 DoubleSinhOp -> fsLit "sinh"
621 DoubleCoshOp -> fsLit "cosh"
622 DoubleTanhOp -> fsLit "tanh"
624 pr = panic "MachCode.getRegister: no primrep needed for Alpha"
626 getRegister (StPrim primop [x, y]) -- dyadic PrimOps
628 CharGtOp -> trivialCode (CMP LTT) y x
629 CharGeOp -> trivialCode (CMP LE) y x
630 CharEqOp -> trivialCode (CMP EQQ) x y
631 CharNeOp -> int_NE_code x y
632 CharLtOp -> trivialCode (CMP LTT) x y
633 CharLeOp -> trivialCode (CMP LE) x y
635 IntGtOp -> trivialCode (CMP LTT) y x
636 IntGeOp -> trivialCode (CMP LE) y x
637 IntEqOp -> trivialCode (CMP EQQ) x y
638 IntNeOp -> int_NE_code x y
639 IntLtOp -> trivialCode (CMP LTT) x y
640 IntLeOp -> trivialCode (CMP LE) x y
642 WordGtOp -> trivialCode (CMP ULT) y x
643 WordGeOp -> trivialCode (CMP ULE) x y
644 WordEqOp -> trivialCode (CMP EQQ) x y
645 WordNeOp -> int_NE_code x y
646 WordLtOp -> trivialCode (CMP ULT) x y
647 WordLeOp -> trivialCode (CMP ULE) x y
649 AddrGtOp -> trivialCode (CMP ULT) y x
650 AddrGeOp -> trivialCode (CMP ULE) y x
651 AddrEqOp -> trivialCode (CMP EQQ) x y
652 AddrNeOp -> int_NE_code x y
653 AddrLtOp -> trivialCode (CMP ULT) x y
654 AddrLeOp -> trivialCode (CMP ULE) x y
656 FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
657 FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
658 FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
659 FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
660 FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
661 FloatLeOp -> cmpF_code (FCMP TF LE) NE x y
663 DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
664 DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
665 DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
666 DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
667 DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
668 DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y
670 IntAddOp -> trivialCode (ADD Q False) x y
671 IntSubOp -> trivialCode (SUB Q False) x y
672 IntMulOp -> trivialCode (MUL Q False) x y
673 IntQuotOp -> trivialCode (DIV Q False) x y
674 IntRemOp -> trivialCode (REM Q False) x y
676 WordAddOp -> trivialCode (ADD Q False) x y
677 WordSubOp -> trivialCode (SUB Q False) x y
678 WordMulOp -> trivialCode (MUL Q False) x y
679 WordQuotOp -> trivialCode (DIV Q True) x y
680 WordRemOp -> trivialCode (REM Q True) x y
682 FloatAddOp -> trivialFCode W32 (FADD TF) x y
683 FloatSubOp -> trivialFCode W32 (FSUB TF) x y
684 FloatMulOp -> trivialFCode W32 (FMUL TF) x y
685 FloatDivOp -> trivialFCode W32 (FDIV TF) x y
687 DoubleAddOp -> trivialFCode W64 (FADD TF) x y
688 DoubleSubOp -> trivialFCode W64 (FSUB TF) x y
689 DoubleMulOp -> trivialFCode W64 (FMUL TF) x y
690 DoubleDivOp -> trivialFCode W64 (FDIV TF) x y
692 AddrAddOp -> trivialCode (ADD Q False) x y
693 AddrSubOp -> trivialCode (SUB Q False) x y
694 AddrRemOp -> trivialCode (REM Q True) x y
696 AndOp -> trivialCode AND x y
697 OrOp -> trivialCode OR x y
698 XorOp -> trivialCode XOR x y
699 SllOp -> trivialCode SLL x y
700 SrlOp -> trivialCode SRL x y
702 ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
703 ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
704 ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"
706 FloatPowerOp -> getRegister (StCall (fsLit "pow") CCallConv FF64 [x,y])
707 DoublePowerOp -> getRegister (StCall (fsLit "pow") CCallConv FF64 [x,y])
709 {- ------------------------------------------------------------
710 Some bizarre special code for getting condition codes into
711 registers. Integer non-equality is a test for equality
712 followed by an XOR with 1. (Integer comparisons always set
713 the result register to 0 or 1.) Floating point comparisons of
714 any kind leave the result in a floating point register, so we
715 need to wrangle an integer register out of things.
717 int_NE_code :: StixTree -> StixTree -> NatM Register
720 = trivialCode (CMP EQQ) x y `thenNat` \ register ->
721 getNewRegNat IntRep `thenNat` \ tmp ->
723 code = registerCode register tmp
724 src = registerName register tmp
725 code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
727 return (Any IntRep code__2)
729 {- ------------------------------------------------------------
730 Comments for int_NE_code also apply to cmpF_code
733 :: (Reg -> Reg -> Reg -> Instr)
735 -> StixTree -> StixTree
738 cmpF_code instr cond x y
739 = trivialFCode pr instr x y `thenNat` \ register ->
740 getNewRegNat FF64 `thenNat` \ tmp ->
741 getBlockIdNat `thenNat` \ lbl ->
743 code = registerCode register tmp
744 result = registerName register tmp
746 code__2 dst = code . mkSeqInstrs [
747 OR zeroh (RIImm (ImmInt 1)) dst,
748 BF cond result (ImmCLbl lbl),
749 OR zeroh (RIReg zeroh) dst,
752 return (Any IntRep code__2)
754 pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
755 ------------------------------------------------------------
757 getRegister (CmmLoad pk mem)
758 = getAmode mem `thenNat` \ amode ->
760 code = amodeCode amode
761 src = amodeAddr amode
762 size = primRepToSize pk
763 code__2 dst = code . mkSeqInstr (LD size dst src)
765 return (Any pk code__2)
767 getRegister (StInt i)
770 code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
772 return (Any IntRep code)
775 code dst = mkSeqInstr (LDI Q dst src)
777 return (Any IntRep code)
779 src = ImmInt (fromInteger i)
784 code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
786 return (Any PtrRep code)
789 imm__2 = case imm of Just x -> x
791 #endif /* alpha_TARGET_ARCH */
793 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
797 getRegister (CmmLit (CmmFloat f W32)) = do
798 lbl <- getNewLabelNat
799 dflags <- getDynFlagsNat
800 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
801 Amode addr addr_code <- getAmode dynRef
805 CmmStaticLit (CmmFloat f W32)]
806 `consOL` (addr_code `snocOL`
809 return (Any FF32 code)
812 getRegister (CmmLit (CmmFloat d W64))
814 = let code dst = unitOL (GLDZ dst)
815 in return (Any FF64 code)
818 = let code dst = unitOL (GLD1 dst)
819 in return (Any FF64 code)
822 lbl <- getNewLabelNat
823 dflags <- getDynFlagsNat
824 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
825 Amode addr addr_code <- getAmode dynRef
829 CmmStaticLit (CmmFloat d W64)]
830 `consOL` (addr_code `snocOL`
833 return (Any FF64 code)
835 #endif /* i386_TARGET_ARCH */
837 #if x86_64_TARGET_ARCH
839 getRegister (CmmLit (CmmFloat 0.0 w)) = do
840 let size = floatSize w
841 code dst = unitOL (XOR size (OpReg dst) (OpReg dst))
842 -- I don't know why there are xorpd, xorps, and pxor instructions.
843 -- They all appear to do the same thing --SDM
844 return (Any size code)
846 getRegister (CmmLit (CmmFloat f w)) = do
847 lbl <- getNewLabelNat
848 let code dst = toOL [
851 CmmStaticLit (CmmFloat f w)],
852 MOV size (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
855 return (Any size code)
856 where size = floatSize w
858 #endif /* x86_64_TARGET_ARCH */
860 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
862 -- catch simple cases of zero- or sign-extended load
863 getRegister (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _]) = do
864 code <- intLoadCode (MOVZxL II8) addr
865 return (Any II32 code)
867 getRegister (CmmMachOp (MO_SS_Conv W8 W32) [CmmLoad addr _]) = do
868 code <- intLoadCode (MOVSxL II8) addr
869 return (Any II32 code)
871 getRegister (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad addr _]) = do
872 code <- intLoadCode (MOVZxL II16) addr
873 return (Any II32 code)
875 getRegister (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad addr _]) = do
876 code <- intLoadCode (MOVSxL II16) addr
877 return (Any II32 code)
881 #if x86_64_TARGET_ARCH
883 -- catch simple cases of zero- or sign-extended load
884 getRegister (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad addr _]) = do
885 code <- intLoadCode (MOVZxL II8) addr
886 return (Any II64 code)
888 getRegister (CmmMachOp (MO_SS_Conv W8 W64) [CmmLoad addr _]) = do
889 code <- intLoadCode (MOVSxL II8) addr
890 return (Any II64 code)
892 getRegister (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad addr _]) = do
893 code <- intLoadCode (MOVZxL II16) addr
894 return (Any II64 code)
896 getRegister (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad addr _]) = do
897 code <- intLoadCode (MOVSxL II16) addr
898 return (Any II64 code)
900 getRegister (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad addr _]) = do
901 code <- intLoadCode (MOV II32) addr -- 32-bit loads zero-extend
902 return (Any II64 code)
904 getRegister (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad addr _]) = do
905 code <- intLoadCode (MOVSxL II32) addr
906 return (Any II64 code)
910 #if x86_64_TARGET_ARCH
911 getRegister (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
912 CmmLit displacement])
913 = return $ Any II64 (\dst -> unitOL $
914 LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
917 #if x86_64_TARGET_ARCH
918 getRegister (CmmMachOp (MO_F_Neg W32) [x]) = do
919 x_code <- getAnyReg x
920 lbl <- getNewLabelNat
922 code dst = x_code dst `appOL` toOL [
923 -- This is how gcc does it, so it can't be that bad:
924 LDATA ReadOnlyData16 [
927 CmmStaticLit (CmmInt 0x80000000 W32),
928 CmmStaticLit (CmmInt 0 W32),
929 CmmStaticLit (CmmInt 0 W32),
930 CmmStaticLit (CmmInt 0 W32)
932 XOR FF32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
933 -- xorps, so we need the 128-bit constant
934 -- ToDo: rip-relative
937 return (Any FF32 code)
939 getRegister (CmmMachOp (MO_F_Neg W64) [x]) = do
940 x_code <- getAnyReg x
941 lbl <- getNewLabelNat
943 -- This is how gcc does it, so it can't be that bad:
944 code dst = x_code dst `appOL` toOL [
945 LDATA ReadOnlyData16 [
948 CmmStaticLit (CmmInt 0x8000000000000000 W64),
949 CmmStaticLit (CmmInt 0 W64)
951 -- gcc puts an unpck here. Wonder if we need it.
952 XOR FF64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
953 -- xorpd, so we need the 128-bit constant
956 return (Any FF64 code)
959 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
961 getRegister (CmmMachOp mop [x]) -- unary MachOps
964 MO_F_Neg W32 -> trivialUFCode FF32 (GNEG FF32) x
965 MO_F_Neg W64 -> trivialUFCode FF64 (GNEG FF64) x
968 MO_S_Neg w -> triv_ucode NEGI (intSize w)
969 MO_F_Neg w -> triv_ucode NEGI (floatSize w)
970 MO_Not w -> triv_ucode NOT (intSize w)
973 MO_UU_Conv W32 W8 -> toI8Reg W32 x
974 MO_SS_Conv W32 W8 -> toI8Reg W32 x
975 MO_UU_Conv W16 W8 -> toI8Reg W16 x
976 MO_SS_Conv W16 W8 -> toI8Reg W16 x
977 MO_UU_Conv W32 W16 -> toI16Reg W32 x
978 MO_SS_Conv W32 W16 -> toI16Reg W32 x
980 #if x86_64_TARGET_ARCH
981 MO_UU_Conv W64 W32 -> conversionNop II64 x
982 MO_SS_Conv W64 W32 -> conversionNop II64 x
983 MO_UU_Conv W64 W16 -> toI16Reg W64 x
984 MO_SS_Conv W64 W16 -> toI16Reg W64 x
985 MO_UU_Conv W64 W8 -> toI8Reg W64 x
986 MO_SS_Conv W64 W8 -> toI8Reg W64 x
989 MO_UU_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
990 MO_SS_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
993 MO_UU_Conv W8 W32 -> integerExtend W8 W32 MOVZxL x
994 MO_UU_Conv W16 W32 -> integerExtend W16 W32 MOVZxL x
995 MO_UU_Conv W8 W16 -> integerExtend W8 W16 MOVZxL x
997 MO_SS_Conv W8 W32 -> integerExtend W8 W32 MOVSxL x
998 MO_SS_Conv W16 W32 -> integerExtend W16 W32 MOVSxL x
999 MO_SS_Conv W8 W16 -> integerExtend W8 W16 MOVSxL x
1001 #if x86_64_TARGET_ARCH
1002 MO_UU_Conv W8 W64 -> integerExtend W8 W64 MOVZxL x
1003 MO_UU_Conv W16 W64 -> integerExtend W16 W64 MOVZxL x
1004 MO_UU_Conv W32 W64 -> integerExtend W32 W64 MOVZxL x
1005 MO_SS_Conv W8 W64 -> integerExtend W8 W64 MOVSxL x
1006 MO_SS_Conv W16 W64 -> integerExtend W16 W64 MOVSxL x
1007 MO_SS_Conv W32 W64 -> integerExtend W32 W64 MOVSxL x
1008 -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
1009 -- However, we don't want the register allocator to throw it
1010 -- away as an unnecessary reg-to-reg move, so we keep it in
1011 -- the form of a movzl and print it as a movl later.
1014 #if i386_TARGET_ARCH
1015 MO_FF_Conv W32 W64 -> conversionNop FF64 x
1016 MO_FF_Conv W64 W32 -> conversionNop FF32 x
1018 MO_FF_Conv W32 W64 -> coerceFP2FP W64 x
1019 MO_FF_Conv W64 W32 -> coerceFP2FP W32 x
1022 MO_FS_Conv from to -> coerceFP2Int from to x
1023 MO_SF_Conv from to -> coerceInt2FP from to x
1025 other -> pprPanic "getRegister" (pprMachOp mop)
1027 triv_ucode :: (Size -> Operand -> Instr) -> Size -> NatM Register
1028 triv_ucode instr size = trivialUCode size (instr size) x
1030 -- signed or unsigned extension.
1031 integerExtend :: Width -> Width
1032 -> (Size -> Operand -> Operand -> Instr)
1033 -> CmmExpr -> NatM Register
1034 integerExtend from to instr expr = do
1035 (reg,e_code) <- if from == W8 then getByteReg expr
1036 else getSomeReg expr
1040 instr (intSize from) (OpReg reg) (OpReg dst)
1041 return (Any (intSize to) code)
1043 toI8Reg :: Width -> CmmExpr -> NatM Register
1044 toI8Reg new_rep expr
1045 = do codefn <- getAnyReg expr
1046 return (Any (intSize new_rep) codefn)
1047 -- HACK: use getAnyReg to get a byte-addressable register.
1048 -- If the source was a Fixed register, this will add the
1049 -- mov instruction to put it into the desired destination.
1050 -- We're assuming that the destination won't be a fixed
1051 -- non-byte-addressable register; it won't be, because all
1052 -- fixed registers are word-sized.
1054 toI16Reg = toI8Reg -- for now
1056 conversionNop :: Size -> CmmExpr -> NatM Register
1057 conversionNop new_size expr
1058 = do e_code <- getRegister expr
1059 return (swizzleRegisterRep e_code new_size)
1062 getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
1064 MO_F_Eq w -> condFltReg EQQ x y
1065 MO_F_Ne w -> condFltReg NE x y
1066 MO_F_Gt w -> condFltReg GTT x y
1067 MO_F_Ge w -> condFltReg GE x y
1068 MO_F_Lt w -> condFltReg LTT x y
1069 MO_F_Le w -> condFltReg LE x y
1071 MO_Eq rep -> condIntReg EQQ x y
1072 MO_Ne rep -> condIntReg NE x y
1074 MO_S_Gt rep -> condIntReg GTT x y
1075 MO_S_Ge rep -> condIntReg GE x y
1076 MO_S_Lt rep -> condIntReg LTT x y
1077 MO_S_Le rep -> condIntReg LE x y
1079 MO_U_Gt rep -> condIntReg GU x y
1080 MO_U_Ge rep -> condIntReg GEU x y
1081 MO_U_Lt rep -> condIntReg LU x y
1082 MO_U_Le rep -> condIntReg LEU x y
1084 #if i386_TARGET_ARCH
1085 MO_F_Add w -> trivialFCode w GADD x y
1086 MO_F_Sub w -> trivialFCode w GSUB x y
1087 MO_F_Quot w -> trivialFCode w GDIV x y
1088 MO_F_Mul w -> trivialFCode w GMUL x y
1091 #if x86_64_TARGET_ARCH
1092 MO_F_Add w -> trivialFCode w ADD x y
1093 MO_F_Sub w -> trivialFCode w SUB x y
1094 MO_F_Quot w -> trivialFCode w FDIV x y
1095 MO_F_Mul w -> trivialFCode w MUL x y
1098 MO_Add rep -> add_code rep x y
1099 MO_Sub rep -> sub_code rep x y
1101 MO_S_Quot rep -> div_code rep True True x y
1102 MO_S_Rem rep -> div_code rep True False x y
1103 MO_U_Quot rep -> div_code rep False True x y
1104 MO_U_Rem rep -> div_code rep False False x y
1106 MO_S_MulMayOflo rep -> imulMayOflo rep x y
1108 MO_Mul rep -> triv_op rep IMUL
1109 MO_And rep -> triv_op rep AND
1110 MO_Or rep -> triv_op rep OR
1111 MO_Xor rep -> triv_op rep XOR
1113 {- Shift ops on x86s have constraints on their source, it
1114 either has to be Imm, CL or 1
1115 => trivialCode is not restrictive enough (sigh.)
1117 MO_Shl rep -> shift_code rep SHL x y {-False-}
1118 MO_U_Shr rep -> shift_code rep SHR x y {-False-}
1119 MO_S_Shr rep -> shift_code rep SAR x y {-False-}
1121 other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
1123 --------------------
1124 triv_op width instr = trivialCode width op (Just op) x y
1125 where op = instr (intSize width)
1127 imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
1128 imulMayOflo rep a b = do
1129 (a_reg, a_code) <- getNonClobberedReg a
1130 b_code <- getAnyReg b
1132 shift_amt = case rep of
1135 _ -> panic "shift_amt"
1138 code = a_code `appOL` b_code eax `appOL`
1140 IMUL2 size (OpReg a_reg), -- result in %edx:%eax
1141 SAR size (OpImm (ImmInt shift_amt)) (OpReg eax),
1142 -- sign extend lower part
1143 SUB size (OpReg edx) (OpReg eax)
1144 -- compare against upper
1145 -- eax==0 if high part == sign extended low part
1148 return (Fixed size eax code)
1150 --------------------
1152 -> (Size -> Operand -> Operand -> Instr)
1157 {- Case1: shift length as immediate -}
1158 shift_code width instr x y@(CmmLit lit) = do
1159 x_code <- getAnyReg x
1161 size = intSize width
1163 = x_code dst `snocOL`
1164 instr size (OpImm (litToImm lit)) (OpReg dst)
1166 return (Any size code)
1168 {- Case2: shift length is complex (non-immediate)
1169 * y must go in %ecx.
1170 * we cannot do y first *and* put its result in %ecx, because
1171 %ecx might be clobbered by x.
1172 * if we do y second, then x cannot be
1173 in a clobbered reg. Also, we cannot clobber x's reg
1174 with the instruction itself.
1176 - do y first, put its result in a fresh tmp, then copy it to %ecx later
1177 - do y second and put its result into %ecx. x gets placed in a fresh
1178 tmp. This is likely to be better, becuase the reg alloc can
1179 eliminate this reg->reg move here (it won't eliminate the other one,
1180 because the move is into the fixed %ecx).
1182 shift_code width instr x y{-amount-} = do
1183 x_code <- getAnyReg x
1184 let size = intSize width
1185 tmp <- getNewRegNat size
1186 y_code <- getAnyReg y
1188 code = x_code tmp `appOL`
1190 instr size (OpReg ecx) (OpReg tmp)
1192 return (Fixed size tmp code)
1194 --------------------
1195 add_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
1196 add_code rep x (CmmLit (CmmInt y _))
1197 | is32BitInteger y = add_int rep x y
1198 add_code rep x y = trivialCode rep (ADD size) (Just (ADD size)) x y
1199 where size = intSize rep
1201 --------------------
1202 sub_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
1203 sub_code rep x (CmmLit (CmmInt y _))
1204 | is32BitInteger (-y) = add_int rep x (-y)
1205 sub_code rep x y = trivialCode rep (SUB (intSize rep)) Nothing x y
1207 -- our three-operand add instruction:
1208 add_int width x y = do
1209 (x_reg, x_code) <- getSomeReg x
1211 size = intSize width
1212 imm = ImmInt (fromInteger y)
1216 (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
1219 return (Any size code)
1221 ----------------------
1222 div_code width signed quotient x y = do
1223 (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
1224 x_code <- getAnyReg x
1226 size = intSize width
1227 widen | signed = CLTD size
1228 | otherwise = XOR size (OpReg edx) (OpReg edx)
1230 instr | signed = IDIV
1233 code = y_code `appOL`
1235 toOL [widen, instr size y_op]
1237 result | quotient = eax
1241 return (Fixed size result code)
1244 getRegister (CmmLoad mem pk)
1247 Amode src mem_code <- getAmode mem
1249 size = cmmTypeSize pk
1250 code dst = mem_code `snocOL`
1251 IF_ARCH_i386(GLD size src dst,
1252 MOV size (OpAddr src) (OpReg dst))
1253 return (Any size code)
1255 #if i386_TARGET_ARCH
1256 getRegister (CmmLoad mem pk)
1259 code <- intLoadCode instr mem
1260 return (Any size code)
1262 width = typeWidth pk
1263 size = intSize width
1264 instr = case width of
1267 -- We always zero-extend 8-bit loads, if we
1268 -- can't think of anything better. This is because
1269 -- we can't guarantee access to an 8-bit variant of every register
1270 -- (esi and edi don't have 8-bit variants), so to make things
1271 -- simpler we do our 8-bit arithmetic with full 32-bit registers.
1274 #if x86_64_TARGET_ARCH
1275 -- Simpler memory load code on x86_64
1276 getRegister (CmmLoad mem pk)
1278 code <- intLoadCode (MOV size) mem
1279 return (Any size code)
1280 where size = intSize $ typeWidth pk
1283 getRegister (CmmLit (CmmInt 0 width))
1285 size = intSize width
1287 -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
1288 adj_size = case size of II64 -> II32; _ -> size
1289 size1 = IF_ARCH_i386( size, adj_size )
1291 = unitOL (XOR size1 (OpReg dst) (OpReg dst))
1293 return (Any size code)
1295 #if x86_64_TARGET_ARCH
1296 -- optimisation for loading small literals on x86_64: take advantage
1297 -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
1298 -- instruction forms are shorter.
1299 getRegister (CmmLit lit)
1300 | isWord64 (cmmLitType lit), not (isBigLit lit)
1303 code dst = unitOL (MOV II32 (OpImm imm) (OpReg dst))
1305 return (Any II64 code)
1307 isBigLit (CmmInt i _) = i < 0 || i > 0xffffffff
1309 -- note1: not the same as (not.is32BitLit), because that checks for
1310 -- signed literals that fit in 32 bits, but we want unsigned
1312 -- note2: all labels are small, because we're assuming the
1313 -- small memory model (see gcc docs, -mcmodel=small).
1316 getRegister (CmmLit lit)
1318 size = cmmTypeSize (cmmLitType lit)
1320 code dst = unitOL (MOV size (OpImm imm) (OpReg dst))
1322 return (Any size code)
1324 getRegister other = pprPanic "getRegister(x86)" (ppr other)
1327 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
1328 -> NatM (Reg -> InstrBlock)
1329 intLoadCode instr mem = do
1330 Amode src mem_code <- getAmode mem
1331 return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
1333 -- Compute an expression into *any* register, adding the appropriate
1334 -- move instruction if necessary.
1335 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
1337 r <- getRegister expr
1340 anyReg :: Register -> NatM (Reg -> InstrBlock)
1341 anyReg (Any _ code) = return code
1342 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
1344 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
1345 -- Fixed registers might not be byte-addressable, so we make sure we've
1346 -- got a temporary, inserting an extra reg copy if necessary.
1347 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
1348 #if x86_64_TARGET_ARCH
1349 getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
1351 getByteReg expr = do
1352 r <- getRegister expr
1355 tmp <- getNewRegNat rep
1356 return (tmp, code tmp)
1358 | isVirtualReg reg -> return (reg,code)
1360 tmp <- getNewRegNat rep
1361 return (tmp, code `snocOL` reg2reg rep reg tmp)
1362 -- ToDo: could optimise slightly by checking for byte-addressable
1363 -- real registers, but that will happen very rarely if at all.
1366 -- Another variant: this time we want the result in a register that cannot
1367 -- be modified by code to evaluate an arbitrary expression.
1368 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1369 getNonClobberedReg expr = do
1370 r <- getRegister expr
1373 tmp <- getNewRegNat rep
1374 return (tmp, code tmp)
1376 -- only free regs can be clobbered
1377 | RealReg rr <- reg, isFastTrue (freeReg rr) -> do
1378 tmp <- getNewRegNat rep
1379 return (tmp, code `snocOL` reg2reg rep reg tmp)
1383 reg2reg :: Size -> Reg -> Reg -> Instr
1384 reg2reg size src dst
1385 #if i386_TARGET_ARCH
1386 | isFloatSize size = GMOV src dst
1388 | otherwise = MOV size (OpReg src) (OpReg dst)
1390 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1392 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1394 #if sparc_TARGET_ARCH
1396 -- getRegister :: CmmExpr -> NatM Register
1398 -- Load a literal float into a float register.
1399 -- The actual literal is stored in a new data area, and we load it
1401 getRegister (CmmLit (CmmFloat f W32)) = do
1403 -- a label for the new data area
1404 lbl <- getNewLabelNat
1405 tmp <- getNewRegNat II32
1407 let code dst = toOL [
1411 CmmStaticLit (CmmFloat f W32)],
1414 SETHI (HI (ImmCLbl lbl)) tmp,
1415 LD II32 (AddrRegImm tmp (LO (ImmCLbl lbl))) dst]
1417 return (Any FF32 code)
1419 getRegister (CmmLit (CmmFloat d W64)) = do
1420 lbl <- getNewLabelNat
1421 tmp <- getNewRegNat II32
1422 let code dst = toOL [
1425 CmmStaticLit (CmmFloat d W64)],
1426 SETHI (HI (ImmCLbl lbl)) tmp,
1427 LD II64 (AddrRegImm tmp (LO (ImmCLbl lbl))) dst]
1428 return (Any FF64 code)
1430 getRegister (CmmMachOp mop [x]) -- unary MachOps
1432 MO_F_Neg W32 -> trivialUFCode FF32 (FNEG FF32) x
1433 MO_F_Neg W64 -> trivialUFCode FF64 (FNEG FF64) x
1435 MO_S_Neg rep -> trivialUCode (intSize rep) (SUB False False g0) x
1436 MO_Not rep -> trivialUCode (intSize rep) (XNOR False g0) x
1438 MO_FF_Conv W64 W32-> coerceDbl2Flt x
1439 MO_FF_Conv W32 W64-> coerceFlt2Dbl x
1441 MO_FS_Conv from to -> coerceFP2Int from to x
1442 MO_SF_Conv from to -> coerceInt2FP from to x
1444 -- Conversions which are a nop on sparc
1446 | from == to -> conversionNop (intSize to) x
1447 MO_UU_Conv W32 W8 -> trivialCode W8 (AND False) x (CmmLit (CmmInt 255 W8))
1448 MO_UU_Conv W32 to -> conversionNop (intSize to) x
1449 MO_SS_Conv W32 to -> conversionNop (intSize to) x
1452 MO_UU_Conv W8 W32 -> integerExtend False W8 W32 x
1453 MO_UU_Conv W16 W32 -> integerExtend False W16 W32 x
1454 MO_UU_Conv W8 W16 -> integerExtend False W8 W16 x
1455 MO_SS_Conv W16 W32 -> integerExtend True W16 W32 x
1457 other_op -> panic "Unknown unary mach op"
1460 integerExtend signed from to expr = do
1461 (reg, e_code) <- getSomeReg expr
1465 ((if signed then SRA else SRL)
1466 reg (RIImm (ImmInt 0)) dst)
1467 return (Any (intSize to) code)
1468 conversionNop new_rep expr
1469 = do e_code <- getRegister expr
1470 return (swizzleRegisterRep e_code new_rep)
1472 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1474 MO_Eq rep -> condIntReg EQQ x y
1475 MO_Ne rep -> condIntReg NE x y
1477 MO_S_Gt rep -> condIntReg GTT x y
1478 MO_S_Ge rep -> condIntReg GE x y
1479 MO_S_Lt rep -> condIntReg LTT x y
1480 MO_S_Le rep -> condIntReg LE x y
1482 MO_U_Gt W32 -> condIntReg GTT x y
1483 MO_U_Ge W32 -> condIntReg GE x y
1484 MO_U_Lt W32 -> condIntReg LTT x y
1485 MO_U_Le W32 -> condIntReg LE x y
1487 MO_U_Gt W16 -> condIntReg GU x y
1488 MO_U_Ge W16 -> condIntReg GEU x y
1489 MO_U_Lt W16 -> condIntReg LU x y
1490 MO_U_Le W16 -> condIntReg LEU x y
1492 MO_Add W32 -> trivialCode W32 (ADD False False) x y
1493 MO_Sub W32 -> trivialCode W32 (SUB False False) x y
1495 MO_S_MulMayOflo rep -> imulMayOflo rep x y
1497 -- ToDo: teach about V8+ SPARC div instructions
1498 MO_S_Quot W32 -> idiv FSLIT(".div") x y
1499 MO_S_Rem W32 -> idiv FSLIT(".rem") x y
1500 MO_U_Quot W32 -> idiv FSLIT(".udiv") x y
1501 MO_U_Rem W32 -> idiv FSLIT(".urem") x y
1504 MO_F_Eq w -> condFltReg EQQ x y
1505 MO_F_Ne w -> condFltReg NE x y
1507 MO_F_Gt w -> condFltReg GTT x y
1508 MO_F_Ge w -> condFltReg GE x y
1509 MO_F_Lt w -> condFltReg LTT x y
1510 MO_F_Le w -> condFltReg LE x y
1512 MO_F_Add w -> trivialFCode w FADD x y
1513 MO_F_Sub w -> trivialFCode w FSUB x y
1514 MO_F_Mul w -> trivialFCode w FMUL x y
1515 MO_F_Quot w -> trivialFCode w FDIV x y
1517 MO_And rep -> trivialCode rep (AND False) x y
1518 MO_Or rep -> trivialCode rep (OR False) x y
1519 MO_Xor rep -> trivialCode rep (XOR False) x y
1521 MO_Mul rep -> trivialCode rep (SMUL False) x y
1523 MO_Shl rep -> trivialCode rep SLL x y
1524 MO_U_Shr rep -> trivialCode rep SRL x y
1525 MO_S_Shr rep -> trivialCode rep SRA x y
1528 MO_F32_Pwr -> getRegister (StCall (Left (fsLit "pow")) CCallConv FF64
1529 [promote x, promote y])
1530 where promote x = CmmMachOp MO_F32_to_Dbl [x]
1531 MO_F64_Pwr -> getRegister (StCall (Left (fsLit "pow")) CCallConv FF64
1534 other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
1536 --idiv fn x y = getRegister (StCall (Left fn) CCallConv II32 [x, y])
1538 --------------------
1539 imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
1540 imulMayOflo rep a b = do
1541 (a_reg, a_code) <- getSomeReg a
1542 (b_reg, b_code) <- getSomeReg b
1543 res_lo <- getNewRegNat II32
1544 res_hi <- getNewRegNat II32
1546 shift_amt = case rep of
1549 _ -> panic "shift_amt"
1550 code dst = a_code `appOL` b_code `appOL`
1552 SMUL False a_reg (RIReg b_reg) res_lo,
1554 SRA res_lo (RIImm (ImmInt shift_amt)) res_lo,
1555 SUB False False res_lo (RIReg res_hi) dst
1557 return (Any II32 code)
1559 getRegister (CmmLoad mem pk) = do
1560 Amode src code <- getAmode mem
1562 code__2 dst = code `snocOL` LD (cmmTypeSize pk) src dst
1563 return (Any (cmmTypeSize pk) code__2)
1565 getRegister (CmmLit (CmmInt i _))
1568 src = ImmInt (fromInteger i)
1569 code dst = unitOL (OR False g0 (RIImm src) dst)
1571 return (Any II32 code)
1573 getRegister (CmmLit lit)
1574 = let rep = cmmLitType lit
1578 OR False dst (RIImm (LO imm)) dst]
1579 in return (Any II32 code)
1581 #endif /* sparc_TARGET_ARCH */
1583 #if powerpc_TARGET_ARCH
1584 getRegister (CmmLoad mem pk)
1587 Amode addr addr_code <- getAmode mem
1588 let code dst = ASSERT((regClass dst == RcDouble) == isFloatType pk)
1589 addr_code `snocOL` LD size dst addr
1590 return (Any size code)
1591 where size = cmmTypeSize pk
1593 -- catch simple cases of zero- or sign-extended load
1594 getRegister (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad mem _]) = do
1595 Amode addr addr_code <- getAmode mem
1596 return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr))
1598 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
1600 getRegister (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad mem _]) = do
1601 Amode addr addr_code <- getAmode mem
1602 return (Any II32 (\dst -> addr_code `snocOL` LD II16 dst addr))
1604 getRegister (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad mem _]) = do
1605 Amode addr addr_code <- getAmode mem
1606 return (Any II32 (\dst -> addr_code `snocOL` LA II16 dst addr))
1608 getRegister (CmmMachOp mop [x]) -- unary MachOps
1610 MO_Not rep -> triv_ucode_int rep NOT
1612 MO_F_Neg w -> triv_ucode_float w FNEG
1613 MO_S_Neg w -> triv_ucode_int w NEG
1615 MO_FF_Conv W64 W32 -> trivialUCode FF32 FRSP x
1616 MO_FF_Conv W32 W64 -> conversionNop FF64 x
1618 MO_FS_Conv from to -> coerceFP2Int from to x
1619 MO_SF_Conv from to -> coerceInt2FP from to x
1622 | from == to -> conversionNop (intSize to) x
1624 -- narrowing is a nop: we treat the high bits as undefined
1625 MO_SS_Conv W32 to -> conversionNop (intSize to) x
1626 MO_SS_Conv W16 W8 -> conversionNop II8 x
1627 MO_SS_Conv W8 to -> triv_ucode_int to (EXTS II8)
1628 MO_SS_Conv W16 to -> triv_ucode_int to (EXTS II16)
1631 | from == to -> conversionNop (intSize to) x
1632 -- narrowing is a nop: we treat the high bits as undefined
1633 MO_UU_Conv W32 to -> conversionNop (intSize to) x
1634 MO_UU_Conv W16 W8 -> conversionNop II8 x
1635 MO_UU_Conv W8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 W32))
1636 MO_UU_Conv W16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 W32))
1639 triv_ucode_int width instr = trivialUCode (intSize width) instr x
1640 triv_ucode_float width instr = trivialUCode (floatSize width) instr x
1642 conversionNop new_size expr
1643 = do e_code <- getRegister expr
1644 return (swizzleRegisterRep e_code new_size)
1646 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1648 MO_F_Eq w -> condFltReg EQQ x y
1649 MO_F_Ne w -> condFltReg NE x y
1650 MO_F_Gt w -> condFltReg GTT x y
1651 MO_F_Ge w -> condFltReg GE x y
1652 MO_F_Lt w -> condFltReg LTT x y
1653 MO_F_Le w -> condFltReg LE x y
1655 MO_Eq rep -> condIntReg EQQ (extendUExpr rep x) (extendUExpr rep y)
1656 MO_Ne rep -> condIntReg NE (extendUExpr rep x) (extendUExpr rep y)
1658 MO_S_Gt rep -> condIntReg GTT (extendSExpr rep x) (extendSExpr rep y)
1659 MO_S_Ge rep -> condIntReg GE (extendSExpr rep x) (extendSExpr rep y)
1660 MO_S_Lt rep -> condIntReg LTT (extendSExpr rep x) (extendSExpr rep y)
1661 MO_S_Le rep -> condIntReg LE (extendSExpr rep x) (extendSExpr rep y)
1663 MO_U_Gt rep -> condIntReg GU (extendUExpr rep x) (extendUExpr rep y)
1664 MO_U_Ge rep -> condIntReg GEU (extendUExpr rep x) (extendUExpr rep y)
1665 MO_U_Lt rep -> condIntReg LU (extendUExpr rep x) (extendUExpr rep y)
1666 MO_U_Le rep -> condIntReg LEU (extendUExpr rep x) (extendUExpr rep y)
1668 MO_F_Add w -> triv_float w FADD
1669 MO_F_Sub w -> triv_float w FSUB
1670 MO_F_Mul w -> triv_float w FMUL
1671 MO_F_Quot w -> triv_float w FDIV
1673 -- optimize addition with 32-bit immediate
1677 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate W32 True (-imm)
1678 -> trivialCode W32 True ADD x (CmmLit $ CmmInt imm immrep)
1681 (src, srcCode) <- getSomeReg x
1682 let imm = litToImm lit
1683 code dst = srcCode `appOL` toOL [
1684 ADDIS dst src (HA imm),
1685 ADD dst dst (RIImm (LO imm))
1687 return (Any II32 code)
1688 _ -> trivialCode W32 True ADD x y
1690 MO_Add rep -> trivialCode rep True ADD x y
1692 case y of -- subfi ('substract from' with immediate) doesn't exist
1693 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
1694 -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
1695 _ -> trivialCodeNoImm' (intSize rep) SUBF y x
1697 MO_Mul rep -> trivialCode rep True MULLW x y
1699 MO_S_MulMayOflo W32 -> trivialCodeNoImm' II32 MULLW_MayOflo x y
1701 MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= II32): not implemented"
1702 MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
1704 MO_S_Quot rep -> trivialCodeNoImm' (intSize rep) DIVW (extendSExpr rep x) (extendSExpr rep y)
1705 MO_U_Quot rep -> trivialCodeNoImm' (intSize rep) DIVWU (extendUExpr rep x) (extendUExpr rep y)
1707 MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1708 MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1710 MO_And rep -> trivialCode rep False AND x y
1711 MO_Or rep -> trivialCode rep False OR x y
1712 MO_Xor rep -> trivialCode rep False XOR x y
1714 MO_Shl rep -> trivialCode rep False SLW x y
1715 MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
1716 MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
1718 triv_float :: Width -> (Size -> Reg -> Reg -> Reg -> Instr) -> NatM Register
1719 triv_float width instr = trivialCodeNoImm (floatSize width) instr x y
1721 getRegister (CmmLit (CmmInt i rep))
1722 | Just imm <- makeImmediate rep True i
1724 code dst = unitOL (LI dst imm)
1726 return (Any (intSize rep) code)
1728 getRegister (CmmLit (CmmFloat f frep)) = do
1729 lbl <- getNewLabelNat
1730 dflags <- getDynFlagsNat
1731 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
1732 Amode addr addr_code <- getAmode dynRef
1733 let size = floatSize frep
1735 LDATA ReadOnlyData [CmmDataLabel lbl,
1736 CmmStaticLit (CmmFloat f frep)]
1737 `consOL` (addr_code `snocOL` LD size dst addr)
1738 return (Any size code)
1740 getRegister (CmmLit lit)
1741 = let rep = cmmLitType lit
1745 ADD dst dst (RIImm (LO imm))
1747 in return (Any (cmmTypeSize rep) code)
1749 getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
1751 -- extend?Rep: wrap integer expression of type rep
1752 -- in a conversion to II32
1753 extendSExpr W32 x = x
1754 extendSExpr rep x = CmmMachOp (MO_SS_Conv rep W32) [x]
1755 extendUExpr W32 x = x
1756 extendUExpr rep x = CmmMachOp (MO_UU_Conv rep W32) [x]
1758 #endif /* powerpc_TARGET_ARCH */
1761 -- -----------------------------------------------------------------------------
1762 -- The 'Amode' type: Memory addressing modes passed up the tree.
1764 data Amode = Amode AddrMode InstrBlock
1767 Now, given a tree (the argument to an CmmLoad) that references memory,
1768 produce a suitable addressing mode.
1770 A Rule of the Game (tm) for Amodes: use of the addr bit must
1771 immediately follow use of the code part, since the code part puts
1772 values in registers which the addr then refers to. So you can't put
1773 anything in between, lest it overwrite some of those registers. If
1774 you need to do some other computation between the code part and use of
1775 the addr bit, first store the effective address from the amode in a
1776 temporary, then do the other computation, and then use the temporary:
1780 ... other computation ...
1784 getAmode :: CmmExpr -> NatM Amode
1785 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
1787 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1789 #if alpha_TARGET_ARCH
1791 getAmode (StPrim IntSubOp [x, StInt i])
1792 = getNewRegNat PtrRep `thenNat` \ tmp ->
1793 getRegister x `thenNat` \ register ->
1795 code = registerCode register tmp
1796 reg = registerName register tmp
1797 off = ImmInt (-(fromInteger i))
1799 return (Amode (AddrRegImm reg off) code)
1801 getAmode (StPrim IntAddOp [x, StInt i])
1802 = getNewRegNat PtrRep `thenNat` \ tmp ->
1803 getRegister x `thenNat` \ register ->
1805 code = registerCode register tmp
1806 reg = registerName register tmp
1807 off = ImmInt (fromInteger i)
1809 return (Amode (AddrRegImm reg off) code)
1813 = return (Amode (AddrImm imm__2) id)
1816 imm__2 = case imm of Just x -> x
1819 = getNewRegNat PtrRep `thenNat` \ tmp ->
1820 getRegister other `thenNat` \ register ->
1822 code = registerCode register tmp
1823 reg = registerName register tmp
1825 return (Amode (AddrReg reg) code)
1827 #endif /* alpha_TARGET_ARCH */
1829 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1831 #if x86_64_TARGET_ARCH
1833 getAmode (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
1834 CmmLit displacement])
1835 = return $ Amode (ripRel (litToImm displacement)) nilOL
1839 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1841 -- This is all just ridiculous, since it carefully undoes
1842 -- what mangleIndexTree has just done.
1843 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
1845 -- ASSERT(rep == II32)???
1846 = do (x_reg, x_code) <- getSomeReg x
1847 let off = ImmInt (-(fromInteger i))
1848 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1850 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
1852 -- ASSERT(rep == II32)???
1853 = do (x_reg, x_code) <- getSomeReg x
1854 let off = ImmInt (fromInteger i)
1855 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1857 -- Turn (lit1 << n + lit2) into (lit2 + lit1 << n) so it will be
1858 -- recognised by the next rule.
1859 getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1861 = getAmode (CmmMachOp (MO_Add rep) [b,a])
1863 getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
1864 [y, CmmLit (CmmInt shift _)]])
1865 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1866 = x86_complex_amode x y shift 0
1868 getAmode (CmmMachOp (MO_Add rep)
1869 [x, CmmMachOp (MO_Add _)
1870 [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)],
1871 CmmLit (CmmInt offset _)]])
1872 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1873 && is32BitInteger offset
1874 = x86_complex_amode x y shift offset
1876 getAmode (CmmMachOp (MO_Add rep) [x,y])
1877 = x86_complex_amode x y 0 0
1879 getAmode (CmmLit lit) | is32BitLit lit
1880 = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1883 (reg,code) <- getSomeReg expr
1884 return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
1887 x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
1888 x86_complex_amode base index shift offset
1889 = do (x_reg, x_code) <- getNonClobberedReg base
1890 -- x must be in a temp, because it has to stay live over y_code
1891 -- we could compre x_reg and y_reg and do something better here...
1892 (y_reg, y_code) <- getSomeReg index
1894 code = x_code `appOL` y_code
1895 base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
1896 return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset)))
1899 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1901 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1903 #if sparc_TARGET_ARCH
1905 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)])
1908 (reg, code) <- getSomeReg x
1910 off = ImmInt (-(fromInteger i))
1911 return (Amode (AddrRegImm reg off) code)
1914 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)])
1917 (reg, code) <- getSomeReg x
1919 off = ImmInt (fromInteger i)
1920 return (Amode (AddrRegImm reg off) code)
1922 getAmode (CmmMachOp (MO_Add rep) [x, y])
1924 (regX, codeX) <- getSomeReg x
1925 (regY, codeY) <- getSomeReg y
1927 code = codeX `appOL` codeY
1928 return (Amode (AddrRegReg regX regY) code)
1930 -- XXX Is this same as "leaf" in Stix?
1931 getAmode (CmmLit lit)
1933 tmp <- getNewRegNat II32
1935 code = unitOL (SETHI (HI imm__2) tmp)
1936 return (Amode (AddrRegImm tmp (LO imm__2)) code)
1938 imm__2 = litToImm lit
1942 (reg, code) <- getSomeReg other
1945 return (Amode (AddrRegImm reg off) code)
1947 #endif /* sparc_TARGET_ARCH */
1949 #ifdef powerpc_TARGET_ARCH
1950 getAmode (CmmMachOp (MO_Sub W32) [x, CmmLit (CmmInt i _)])
1951 | Just off <- makeImmediate W32 True (-i)
1953 (reg, code) <- getSomeReg x
1954 return (Amode (AddrRegImm reg off) code)
1957 getAmode (CmmMachOp (MO_Add W32) [x, CmmLit (CmmInt i _)])
1958 | Just off <- makeImmediate W32 True i
1960 (reg, code) <- getSomeReg x
1961 return (Amode (AddrRegImm reg off) code)
1963 -- optimize addition with 32-bit immediate
1965 getAmode (CmmMachOp (MO_Add W32) [x, CmmLit lit])
1967 tmp <- getNewRegNat II32
1968 (src, srcCode) <- getSomeReg x
1969 let imm = litToImm lit
1970 code = srcCode `snocOL` ADDIS tmp src (HA imm)
1971 return (Amode (AddrRegImm tmp (LO imm)) code)
1973 getAmode (CmmLit lit)
1975 tmp <- getNewRegNat II32
1976 let imm = litToImm lit
1977 code = unitOL (LIS tmp (HA imm))
1978 return (Amode (AddrRegImm tmp (LO imm)) code)
1980 getAmode (CmmMachOp (MO_Add W32) [x, y])
1982 (regX, codeX) <- getSomeReg x
1983 (regY, codeY) <- getSomeReg y
1984 return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
1988 (reg, code) <- getSomeReg other
1991 return (Amode (AddrRegImm reg off) code)
1992 #endif /* powerpc_TARGET_ARCH */
1994 -- -----------------------------------------------------------------------------
1995 -- getOperand: sometimes any operand will do.
1997 -- getNonClobberedOperand: the value of the operand will remain valid across
1998 -- the computation of an arbitrary expression, unless the expression
1999 -- is computed directly into a register which the operand refers to
2000 -- (see trivialCode where this function is used for an example).
2002 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2004 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2005 #if x86_64_TARGET_ARCH
2006 getNonClobberedOperand (CmmLit lit)
2007 | isSuitableFloatingPointLit lit = do
2008 lbl <- getNewLabelNat
2009 let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
2011 return (OpAddr (ripRel (ImmCLbl lbl)), code)
2013 getNonClobberedOperand (CmmLit lit)
2014 | is32BitLit lit && not (isFloatType (cmmLitType lit)) =
2015 return (OpImm (litToImm lit), nilOL)
2016 getNonClobberedOperand (CmmLoad mem pk)
2017 | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
2018 Amode src mem_code <- getAmode mem
2020 if (amodeCouldBeClobbered src)
2022 tmp <- getNewRegNat wordSize
2023 return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
2024 unitOL (LEA II32 (OpAddr src) (OpReg tmp)))
2027 return (OpAddr src', save_code `appOL` mem_code)
2028 getNonClobberedOperand e = do
2029 (reg, code) <- getNonClobberedReg e
2030 return (OpReg reg, code)
2032 amodeCouldBeClobbered :: AddrMode -> Bool
2033 amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
2035 regClobbered (RealReg rr) = isFastTrue (freeReg rr)
2036 regClobbered _ = False
2038 -- getOperand: the operand is not required to remain valid across the
2039 -- computation of an arbitrary expression.
2040 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2041 #if x86_64_TARGET_ARCH
2042 getOperand (CmmLit lit)
2043 | isSuitableFloatingPointLit lit = do
2044 lbl <- getNewLabelNat
2045 let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
2047 return (OpAddr (ripRel (ImmCLbl lbl)), code)
2049 getOperand (CmmLit lit)
2050 | is32BitLit lit && not (isFloatType (cmmLitType lit)) = do
2051 return (OpImm (litToImm lit), nilOL)
2052 getOperand (CmmLoad mem pk)
2053 | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
2054 Amode src mem_code <- getAmode mem
2055 return (OpAddr src, mem_code)
2057 (reg, code) <- getSomeReg e
2058 return (OpReg reg, code)
2060 isOperand :: CmmExpr -> Bool
2061 isOperand (CmmLoad _ _) = True
2062 isOperand (CmmLit lit) = is32BitLit lit
2063 || isSuitableFloatingPointLit lit
2066 -- if we want a floating-point literal as an operand, we can
2067 -- use it directly from memory. However, if the literal is
2068 -- zero, we're better off generating it into a register using
2070 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
2071 isSuitableFloatingPointLit _ = False
2073 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
2074 getRegOrMem (CmmLoad mem pk)
2075 | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
2076 Amode src mem_code <- getAmode mem
2077 return (OpAddr src, mem_code)
2079 (reg, code) <- getNonClobberedReg e
2080 return (OpReg reg, code)
2082 #if x86_64_TARGET_ARCH
2083 is32BitLit (CmmInt i W64) = is32BitInteger i
2084 -- assume that labels are in the range 0-2^31-1: this assumes the
2085 -- small memory model (see gcc docs, -mcmodel=small).
2090 is32BitInteger :: Integer -> Bool
2091 is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000
2092 where i64 = fromIntegral i :: Int64
2093 -- a CmmInt is intended to be truncated to the appropriate
2094 -- number of bits, so here we truncate it to Int64. This is
2095 -- important because e.g. -1 as a CmmInt might be either
2096 -- -1 or 18446744073709551615.
2098 -- -----------------------------------------------------------------------------
2099 -- The 'CondCode' type: Condition codes passed up the tree.
2101 data CondCode = CondCode Bool Cond InstrBlock
2103 -- Set up a condition code for a conditional branch.
2105 getCondCode :: CmmExpr -> NatM CondCode
2107 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2109 #if alpha_TARGET_ARCH
2110 getCondCode = panic "MachCode.getCondCode: not on Alphas"
2111 #endif /* alpha_TARGET_ARCH */
2113 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2115 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
2116 -- yes, they really do seem to want exactly the same!
2118 getCondCode (CmmMachOp mop [x, y])
2121 MO_F_Eq W32 -> condFltCode EQQ x y
2122 MO_F_Ne W32 -> condFltCode NE x y
2123 MO_F_Gt W32 -> condFltCode GTT x y
2124 MO_F_Ge W32 -> condFltCode GE x y
2125 MO_F_Lt W32 -> condFltCode LTT x y
2126 MO_F_Le W32 -> condFltCode LE x y
2128 MO_F_Eq W64 -> condFltCode EQQ x y
2129 MO_F_Ne W64 -> condFltCode NE x y
2130 MO_F_Gt W64 -> condFltCode GTT x y
2131 MO_F_Ge W64 -> condFltCode GE x y
2132 MO_F_Lt W64 -> condFltCode LTT x y
2133 MO_F_Le W64 -> condFltCode LE x y
2135 MO_Eq rep -> condIntCode EQQ x y
2136 MO_Ne rep -> condIntCode NE x y
2138 MO_S_Gt rep -> condIntCode GTT x y
2139 MO_S_Ge rep -> condIntCode GE x y
2140 MO_S_Lt rep -> condIntCode LTT x y
2141 MO_S_Le rep -> condIntCode LE x y
2143 MO_U_Gt rep -> condIntCode GU x y
2144 MO_U_Ge rep -> condIntCode GEU x y
2145 MO_U_Lt rep -> condIntCode LU x y
2146 MO_U_Le rep -> condIntCode LEU x y
2148 other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y]))
2150 getCondCode other = pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
2152 #elif powerpc_TARGET_ARCH
2154 -- almost the same as everywhere else - but we need to
2155 -- extend small integers to 32 bit first
2157 getCondCode (CmmMachOp mop [x, y])
2159 MO_F_Eq W32 -> condFltCode EQQ x y
2160 MO_F_Ne W32 -> condFltCode NE x y
2161 MO_F_Gt W32 -> condFltCode GTT x y
2162 MO_F_Ge W32 -> condFltCode GE x y
2163 MO_F_Lt W32 -> condFltCode LTT x y
2164 MO_F_Le W32 -> condFltCode LE x y
2166 MO_F_Eq W64 -> condFltCode EQQ x y
2167 MO_F_Ne W64 -> condFltCode NE x y
2168 MO_F_Gt W64 -> condFltCode GTT x y
2169 MO_F_Ge W64 -> condFltCode GE x y
2170 MO_F_Lt W64 -> condFltCode LTT x y
2171 MO_F_Le W64 -> condFltCode LE x y
2173 MO_Eq rep -> condIntCode EQQ (extendUExpr rep x) (extendUExpr rep y)
2174 MO_Ne rep -> condIntCode NE (extendUExpr rep x) (extendUExpr rep y)
2176 MO_S_Gt rep -> condIntCode GTT (extendSExpr rep x) (extendSExpr rep y)
2177 MO_S_Ge rep -> condIntCode GE (extendSExpr rep x) (extendSExpr rep y)
2178 MO_S_Lt rep -> condIntCode LTT (extendSExpr rep x) (extendSExpr rep y)
2179 MO_S_Le rep -> condIntCode LE (extendSExpr rep x) (extendSExpr rep y)
2181 MO_U_Gt rep -> condIntCode GU (extendUExpr rep x) (extendUExpr rep y)
2182 MO_U_Ge rep -> condIntCode GEU (extendUExpr rep x) (extendUExpr rep y)
2183 MO_U_Lt rep -> condIntCode LU (extendUExpr rep x) (extendUExpr rep y)
2184 MO_U_Le rep -> condIntCode LEU (extendUExpr rep x) (extendUExpr rep y)
2186 other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
2188 getCondCode other = panic "getCondCode(2)(powerpc)"
2194 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
2195 -- passed back up the tree.
2197 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
2199 #if alpha_TARGET_ARCH
2200 condIntCode = panic "MachCode.condIntCode: not on Alphas"
2201 condFltCode = panic "MachCode.condFltCode: not on Alphas"
2202 #endif /* alpha_TARGET_ARCH */
2204 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2205 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2207 -- memory vs immediate
2208 condIntCode cond (CmmLoad x pk) (CmmLit lit) | is32BitLit lit = do
2209 Amode x_addr x_code <- getAmode x
2212 code = x_code `snocOL`
2213 CMP (cmmTypeSize pk) (OpImm imm) (OpAddr x_addr)
2215 return (CondCode False cond code)
2217 -- anything vs zero, using a mask
2218 -- TODO: Add some sanity checking!!!!
2219 condIntCode cond (CmmMachOp (MO_And rep) [x,o2]) (CmmLit (CmmInt 0 pk))
2220 | (CmmLit lit@(CmmInt mask pk2)) <- o2, is32BitLit lit
2222 (x_reg, x_code) <- getSomeReg x
2224 code = x_code `snocOL`
2225 TEST (intSize pk) (OpImm (ImmInteger mask)) (OpReg x_reg)
2227 return (CondCode False cond code)
2230 condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
2231 (x_reg, x_code) <- getSomeReg x
2233 code = x_code `snocOL`
2234 TEST (intSize pk) (OpReg x_reg) (OpReg x_reg)
2236 return (CondCode False cond code)
2238 -- anything vs operand
2239 condIntCode cond x y | isOperand y = do
2240 (x_reg, x_code) <- getNonClobberedReg x
2241 (y_op, y_code) <- getOperand y
2243 code = x_code `appOL` y_code `snocOL`
2244 CMP (cmmTypeSize (cmmExprType x)) y_op (OpReg x_reg)
2246 return (CondCode False cond code)
2248 -- anything vs anything
2249 condIntCode cond x y = do
2250 (y_reg, y_code) <- getNonClobberedReg y
2251 (x_op, x_code) <- getRegOrMem x
2253 code = y_code `appOL`
2255 CMP (cmmTypeSize (cmmExprType x)) (OpReg y_reg) x_op
2257 return (CondCode False cond code)
2260 #if i386_TARGET_ARCH
2261 condFltCode cond x y
2262 = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
2263 (x_reg, x_code) <- getNonClobberedReg x
2264 (y_reg, y_code) <- getSomeReg y
2266 code = x_code `appOL` y_code `snocOL`
2267 GCMP cond x_reg y_reg
2268 -- The GCMP insn does the test and sets the zero flag if comparable
2269 -- and true. Hence we always supply EQQ as the condition to test.
2270 return (CondCode True EQQ code)
2271 #endif /* i386_TARGET_ARCH */
2273 #if x86_64_TARGET_ARCH
2274 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
2275 -- an operand, but the right must be a reg. We can probably do better
2276 -- than this general case...
2277 condFltCode cond x y = do
2278 (x_reg, x_code) <- getNonClobberedReg x
2279 (y_op, y_code) <- getOperand y
2281 code = x_code `appOL`
2283 CMP (floatSize $ cmmExprWidth x) y_op (OpReg x_reg)
2284 -- NB(1): we need to use the unsigned comparison operators on the
2285 -- result of this comparison.
2287 return (CondCode True (condToUnsigned cond) code)
2290 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2292 #if sparc_TARGET_ARCH
2294 condIntCode cond x (CmmLit (CmmInt y rep))
2297 (src1, code) <- getSomeReg x
2299 src2 = ImmInt (fromInteger y)
2300 code' = code `snocOL` SUB False True src1 (RIImm src2) g0
2301 return (CondCode False cond code')
2303 condIntCode cond x y = do
2304 (src1, code1) <- getSomeReg x
2305 (src2, code2) <- getSomeReg y
2307 code__2 = code1 `appOL` code2 `snocOL`
2308 SUB False True src1 (RIReg src2) g0
2309 return (CondCode False cond code__2)
2312 condFltCode cond x y = do
2313 (src1, code1) <- getSomeReg x
2314 (src2, code2) <- getSomeReg y
2315 tmp <- getNewRegNat FF64
2317 promote x = FxTOy FF32 FF64 x tmp
2323 if pk1 `cmmEqType` pk2 then
2324 code1 `appOL` code2 `snocOL`
2325 FCMP True (cmmTypeSize pk1) src1 src2
2326 else if typeWidth pk1 == W32 then
2327 code1 `snocOL` promote src1 `appOL` code2 `snocOL`
2328 FCMP True FF64 tmp src2
2330 code1 `appOL` code2 `snocOL` promote src2 `snocOL`
2331 FCMP True FF64 src1 tmp
2332 return (CondCode True cond code__2)
2334 #endif /* sparc_TARGET_ARCH */
2336 #if powerpc_TARGET_ARCH
2337 -- ###FIXME: I16 and I8!
2338 condIntCode cond x (CmmLit (CmmInt y rep))
2339 | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
2341 (src1, code) <- getSomeReg x
2343 code' = code `snocOL`
2344 (if condUnsigned cond then CMPL else CMP) II32 src1 (RIImm src2)
2345 return (CondCode False cond code')
2347 condIntCode cond x y = do
2348 (src1, code1) <- getSomeReg x
2349 (src2, code2) <- getSomeReg y
2351 code' = code1 `appOL` code2 `snocOL`
2352 (if condUnsigned cond then CMPL else CMP) II32 src1 (RIReg src2)
2353 return (CondCode False cond code')
2355 condFltCode cond x y = do
2356 (src1, code1) <- getSomeReg x
2357 (src2, code2) <- getSomeReg y
2359 code' = code1 `appOL` code2 `snocOL` FCMP src1 src2
2360 code'' = case cond of -- twiddle CR to handle unordered case
2361 GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
2362 LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
2365 ltbit = 0 ; eqbit = 2 ; gtbit = 1
2366 return (CondCode True cond code'')
2368 #endif /* powerpc_TARGET_ARCH */
2370 -- -----------------------------------------------------------------------------
2371 -- Generating assignments
2373 -- Assignments are really at the heart of the whole code generation
2374 -- business. Almost all top-level nodes of any real importance are
2375 -- assignments, which correspond to loads, stores, or register
2376 -- transfers. If we're really lucky, some of the register transfers
2377 -- will go away, because we can use the destination register to
2378 -- complete the code generation for the right hand side. This only
2379 -- fails when the right hand side is forced into a fixed register
2380 -- (e.g. the result of a call).
2382 assignMem_IntCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
2383 assignReg_IntCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock
2385 assignMem_FltCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
2386 assignReg_FltCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock
2388 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2390 #if alpha_TARGET_ARCH
2392 assignIntCode pk (CmmLoad dst _) src
2393 = getNewRegNat IntRep `thenNat` \ tmp ->
2394 getAmode dst `thenNat` \ amode ->
2395 getRegister src `thenNat` \ register ->
2397 code1 = amodeCode amode []
2398 dst__2 = amodeAddr amode
2399 code2 = registerCode register tmp []
2400 src__2 = registerName register tmp
2401 sz = primRepToSize pk
2402 code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2406 assignIntCode pk dst src
2407 = getRegister dst `thenNat` \ register1 ->
2408 getRegister src `thenNat` \ register2 ->
2410 dst__2 = registerName register1 zeroh
2411 code = registerCode register2 dst__2
2412 src__2 = registerName register2 dst__2
2413 code__2 = if isFixed register2
2414 then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2)
2419 #endif /* alpha_TARGET_ARCH */
2421 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2423 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2425 -- integer assignment to memory
2427 -- specific case of adding/subtracting an integer to a particular address.
2428 -- ToDo: catch other cases where we can use an operation directly on a memory
2430 assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _,
2431 CmmLit (CmmInt i _)])
2432 | addr == addr2, pk /= II64 || is32BitInteger i,
2433 Just instr <- check op
2434 = do Amode amode code_addr <- getAmode addr
2435 let code = code_addr `snocOL`
2436 instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode)
2439 check (MO_Add _) = Just ADD
2440 check (MO_Sub _) = Just SUB
2445 assignMem_IntCode pk addr src = do
2446 Amode addr code_addr <- getAmode addr
2447 (code_src, op_src) <- get_op_RI src
2449 code = code_src `appOL`
2451 MOV pk op_src (OpAddr addr)
2452 -- NOTE: op_src is stable, so it will still be valid
2453 -- after code_addr. This may involve the introduction
2454 -- of an extra MOV to a temporary register, but we hope
2455 -- the register allocator will get rid of it.
2459 get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand) -- code, operator
2460 get_op_RI (CmmLit lit) | is32BitLit lit
2461 = return (nilOL, OpImm (litToImm lit))
2463 = do (reg,code) <- getNonClobberedReg op
2464 return (code, OpReg reg)
2467 -- Assign; dst is a reg, rhs is mem
2468 assignReg_IntCode pk reg (CmmLoad src _) = do
2469 load_code <- intLoadCode (MOV pk) src
2470 return (load_code (getRegisterReg reg))
2472 -- dst is a reg, but src could be anything
2473 assignReg_IntCode pk reg src = do
2474 code <- getAnyReg src
2475 return (code (getRegisterReg reg))
2477 #endif /* i386_TARGET_ARCH */
2479 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2481 #if sparc_TARGET_ARCH
2483 assignMem_IntCode pk addr src = do
2484 (srcReg, code) <- getSomeReg src
2485 Amode dstAddr addr_code <- getAmode addr
2486 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2488 assignReg_IntCode pk reg src = do
2489 r <- getRegister src
2491 Any _ code -> code dst
2492 Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg freg) dst
2494 dst = getRegisterReg reg
2497 #endif /* sparc_TARGET_ARCH */
2499 #if powerpc_TARGET_ARCH
2501 assignMem_IntCode pk addr src = do
2502 (srcReg, code) <- getSomeReg src
2503 Amode dstAddr addr_code <- getAmode addr
2504 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2506 -- dst is a reg, but src could be anything
2507 assignReg_IntCode pk reg src
2509 r <- getRegister src
2511 Any _ code -> code dst
2512 Fixed _ freg fcode -> fcode `snocOL` MR dst freg
2514 dst = getRegisterReg reg
2516 #endif /* powerpc_TARGET_ARCH */
2519 -- -----------------------------------------------------------------------------
2520 -- Floating-point assignments
2522 #if alpha_TARGET_ARCH
2524 assignFltCode pk (CmmLoad dst _) src
2525 = getNewRegNat pk `thenNat` \ tmp ->
2526 getAmode dst `thenNat` \ amode ->
2527 getRegister src `thenNat` \ register ->
2529 code1 = amodeCode amode []
2530 dst__2 = amodeAddr amode
2531 code2 = registerCode register tmp []
2532 src__2 = registerName register tmp
2533 sz = primRepToSize pk
2534 code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2538 assignFltCode pk dst src
2539 = getRegister dst `thenNat` \ register1 ->
2540 getRegister src `thenNat` \ register2 ->
2542 dst__2 = registerName register1 zeroh
2543 code = registerCode register2 dst__2
2544 src__2 = registerName register2 dst__2
2545 code__2 = if isFixed register2
2546 then code . mkSeqInstr (FMOV src__2 dst__2)
2551 #endif /* alpha_TARGET_ARCH */
2553 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2555 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2557 -- Floating point assignment to memory
2558 assignMem_FltCode pk addr src = do
2559 (src_reg, src_code) <- getNonClobberedReg src
2560 Amode addr addr_code <- getAmode addr
2562 code = src_code `appOL`
2564 IF_ARCH_i386(GST pk src_reg addr,
2565 MOV pk (OpReg src_reg) (OpAddr addr))
2568 -- Floating point assignment to a register/temporary
2569 assignReg_FltCode pk reg src = do
2570 src_code <- getAnyReg src
2571 return (src_code (getRegisterReg reg))
2573 #endif /* i386_TARGET_ARCH */
2575 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2577 #if sparc_TARGET_ARCH
2579 -- Floating point assignment to memory
2580 assignMem_FltCode pk addr src = do
2581 Amode dst__2 code1 <- getAmode addr
2582 (src__2, code2) <- getSomeReg src
2583 tmp1 <- getNewRegNat pk
2585 pk__2 = cmmExprType src
2586 code__2 = code1 `appOL` code2 `appOL`
2587 if sizeToWidth pk == typeWidth pk__2
2588 then unitOL (ST pk src__2 dst__2)
2589 else toOL [ FxTOy (cmmTypeSize pk__2) pk src__2 tmp1
2590 , ST pk tmp1 dst__2]
2593 -- Floating point assignment to a register/temporary
2594 -- ToDo: Verify correctness
2595 assignReg_FltCode pk reg src = do
2596 r <- getRegister src
2597 v1 <- getNewRegNat pk
2599 Any _ code -> code dst
2600 Fixed _ freg fcode -> fcode `snocOL` FMOV pk freg v1
2602 dst = getRegisterReg reg
2604 #endif /* sparc_TARGET_ARCH */
2606 #if powerpc_TARGET_ARCH
2609 assignMem_FltCode = assignMem_IntCode
2610 assignReg_FltCode = assignReg_IntCode
2612 #endif /* powerpc_TARGET_ARCH */
2615 -- -----------------------------------------------------------------------------
2616 -- Generating an non-local jump
2618 -- (If applicable) Do not fill the delay slots here; you will confuse the
2619 -- register allocator.
2621 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
2623 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2625 #if alpha_TARGET_ARCH
2627 genJump (CmmLabel lbl)
2628 | isAsmTemp lbl = returnInstr (BR target)
2629 | otherwise = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0]
2631 target = ImmCLbl lbl
2634 = getRegister tree `thenNat` \ register ->
2635 getNewRegNat PtrRep `thenNat` \ tmp ->
2637 dst = registerName register pv
2638 code = registerCode register pv
2639 target = registerName register pv
2641 if isFixed register then
2642 returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0]
2644 return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0))
2646 #endif /* alpha_TARGET_ARCH */
2648 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2650 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2652 genJump (CmmLoad mem pk) = do
2653 Amode target code <- getAmode mem
2654 return (code `snocOL` JMP (OpAddr target))
2656 genJump (CmmLit lit) = do
2657 return (unitOL (JMP (OpImm (litToImm lit))))
2660 (reg,code) <- getSomeReg expr
2661 return (code `snocOL` JMP (OpReg reg))
2663 #endif /* i386_TARGET_ARCH */
2665 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2667 #if sparc_TARGET_ARCH
2669 genJump (CmmLit (CmmLabel lbl))
2670 = return (toOL [CALL (Left target) 0 True, NOP])
2672 target = ImmCLbl lbl
2676 (target, code) <- getSomeReg tree
2677 return (code `snocOL` JMP (AddrRegReg target g0) `snocOL` NOP)
2679 #endif /* sparc_TARGET_ARCH */
2681 #if powerpc_TARGET_ARCH
2682 genJump (CmmLit (CmmLabel lbl))
2683 = return (unitOL $ JMP lbl)
2687 (target,code) <- getSomeReg tree
2688 return (code `snocOL` MTCTR target `snocOL` BCTR [])
2689 #endif /* powerpc_TARGET_ARCH */
2692 -- -----------------------------------------------------------------------------
2693 -- Unconditional branches
2695 genBranch :: BlockId -> NatM InstrBlock
2697 genBranch = return . toOL . mkBranchInstr
2699 -- -----------------------------------------------------------------------------
2700 -- Conditional jumps
2703 Conditional jumps are always to local labels, so we can use branch
2704 instructions. We peek at the arguments to decide what kind of
2707 ALPHA: For comparisons with 0, we're laughing, because we can just do
2708 the desired conditional branch.
2710 I386: First, we have to ensure that the condition
2711 codes are set according to the supplied comparison operation.
2713 SPARC: First, we have to ensure that the condition codes are set
2714 according to the supplied comparison operation. We generate slightly
2715 different code for floating point comparisons, because a floating
2716 point operation cannot directly precede a @BF@. We assume the worst
2717 and fill that slot with a @NOP@.
2719 SPARC: Do not fill the delay slots here; you will confuse the register
2725 :: BlockId -- the branch target
2726 -> CmmExpr -- the condition on which to branch
2729 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2731 #if alpha_TARGET_ARCH
2733 genCondJump id (StPrim op [x, StInt 0])
2734 = getRegister x `thenNat` \ register ->
2735 getNewRegNat (registerRep register)
2738 code = registerCode register tmp
2739 value = registerName register tmp
2740 pk = registerRep register
2741 target = ImmCLbl lbl
2743 returnSeq code [BI (cmpOp op) value target]
2745 cmpOp CharGtOp = GTT
2747 cmpOp CharEqOp = EQQ
2749 cmpOp CharLtOp = LTT
2758 cmpOp WordGeOp = ALWAYS
2759 cmpOp WordEqOp = EQQ
2761 cmpOp WordLtOp = NEVER
2762 cmpOp WordLeOp = EQQ
2764 cmpOp AddrGeOp = ALWAYS
2765 cmpOp AddrEqOp = EQQ
2767 cmpOp AddrLtOp = NEVER
2768 cmpOp AddrLeOp = EQQ
2770 genCondJump lbl (StPrim op [x, StDouble 0.0])
2771 = getRegister x `thenNat` \ register ->
2772 getNewRegNat (registerRep register)
2775 code = registerCode register tmp
2776 value = registerName register tmp
2777 pk = registerRep register
2778 target = ImmCLbl lbl
2780 return (code . mkSeqInstr (BF (cmpOp op) value target))
2782 cmpOp FloatGtOp = GTT
2783 cmpOp FloatGeOp = GE
2784 cmpOp FloatEqOp = EQQ
2785 cmpOp FloatNeOp = NE
2786 cmpOp FloatLtOp = LTT
2787 cmpOp FloatLeOp = LE
2788 cmpOp DoubleGtOp = GTT
2789 cmpOp DoubleGeOp = GE
2790 cmpOp DoubleEqOp = EQQ
2791 cmpOp DoubleNeOp = NE
2792 cmpOp DoubleLtOp = LTT
2793 cmpOp DoubleLeOp = LE
2795 genCondJump lbl (StPrim op [x, y])
2797 = trivialFCode pr instr x y `thenNat` \ register ->
2798 getNewRegNat FF64 `thenNat` \ tmp ->
2800 code = registerCode register tmp
2801 result = registerName register tmp
2802 target = ImmCLbl lbl
2804 return (code . mkSeqInstr (BF cond result target))
2806 pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
2808 fltCmpOp op = case op of
2822 (instr, cond) = case op of
2823 FloatGtOp -> (FCMP TF LE, EQQ)
2824 FloatGeOp -> (FCMP TF LTT, EQQ)
2825 FloatEqOp -> (FCMP TF EQQ, NE)
2826 FloatNeOp -> (FCMP TF EQQ, EQQ)
2827 FloatLtOp -> (FCMP TF LTT, NE)
2828 FloatLeOp -> (FCMP TF LE, NE)
2829 DoubleGtOp -> (FCMP TF LE, EQQ)
2830 DoubleGeOp -> (FCMP TF LTT, EQQ)
2831 DoubleEqOp -> (FCMP TF EQQ, NE)
2832 DoubleNeOp -> (FCMP TF EQQ, EQQ)
2833 DoubleLtOp -> (FCMP TF LTT, NE)
2834 DoubleLeOp -> (FCMP TF LE, NE)
2836 genCondJump lbl (StPrim op [x, y])
2837 = trivialCode instr x y `thenNat` \ register ->
2838 getNewRegNat IntRep `thenNat` \ tmp ->
2840 code = registerCode register tmp
2841 result = registerName register tmp
2842 target = ImmCLbl lbl
2844 return (code . mkSeqInstr (BI cond result target))
2846 (instr, cond) = case op of
2847 CharGtOp -> (CMP LE, EQQ)
2848 CharGeOp -> (CMP LTT, EQQ)
2849 CharEqOp -> (CMP EQQ, NE)
2850 CharNeOp -> (CMP EQQ, EQQ)
2851 CharLtOp -> (CMP LTT, NE)
2852 CharLeOp -> (CMP LE, NE)
2853 IntGtOp -> (CMP LE, EQQ)
2854 IntGeOp -> (CMP LTT, EQQ)
2855 IntEqOp -> (CMP EQQ, NE)
2856 IntNeOp -> (CMP EQQ, EQQ)
2857 IntLtOp -> (CMP LTT, NE)
2858 IntLeOp -> (CMP LE, NE)
2859 WordGtOp -> (CMP ULE, EQQ)
2860 WordGeOp -> (CMP ULT, EQQ)
2861 WordEqOp -> (CMP EQQ, NE)
2862 WordNeOp -> (CMP EQQ, EQQ)
2863 WordLtOp -> (CMP ULT, NE)
2864 WordLeOp -> (CMP ULE, NE)
2865 AddrGtOp -> (CMP ULE, EQQ)
2866 AddrGeOp -> (CMP ULT, EQQ)
2867 AddrEqOp -> (CMP EQQ, NE)
2868 AddrNeOp -> (CMP EQQ, EQQ)
2869 AddrLtOp -> (CMP ULT, NE)
2870 AddrLeOp -> (CMP ULE, NE)
2872 #endif /* alpha_TARGET_ARCH */
2874 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2876 #if i386_TARGET_ARCH
2878 genCondJump id bool = do
2879 CondCode _ cond code <- getCondCode bool
2880 return (code `snocOL` JXX cond id)
2884 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2886 #if x86_64_TARGET_ARCH
2888 genCondJump id bool = do
2889 CondCode is_float cond cond_code <- getCondCode bool
2892 return (cond_code `snocOL` JXX cond id)
2894 lbl <- getBlockIdNat
2896 -- see comment with condFltReg
2897 let code = case cond of
2903 plain_test = unitOL (
2906 or_unordered = toOL [
2910 and_ordered = toOL [
2916 return (cond_code `appOL` code)
2920 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2922 #if sparc_TARGET_ARCH
2924 genCondJump (BlockId id) bool = do
2925 CondCode is_float cond code <- getCondCode bool
2930 then [NOP, BF cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2931 else [BI cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2935 #endif /* sparc_TARGET_ARCH */
2938 #if powerpc_TARGET_ARCH
2940 genCondJump id bool = do
2941 CondCode is_float cond code <- getCondCode bool
2942 return (code `snocOL` BCC cond id)
2944 #endif /* powerpc_TARGET_ARCH */
2947 -- -----------------------------------------------------------------------------
2948 -- Generating C calls
2950 -- Now the biggest nightmare---calls. Most of the nastiness is buried in
2951 -- @get_arg@, which moves the arguments to the correct registers/stack
2952 -- locations. Apart from that, the code is easy.
2954 -- (If applicable) Do not fill the delay slots here; you will confuse the
2955 -- register allocator.
2958 :: CmmCallTarget -- function to call
2959 -> HintedCmmFormals -- where to put the result
2960 -> HintedCmmActuals -- arguments (of mixed type)
2963 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2965 #if alpha_TARGET_ARCH
2969 genCCall fn cconv result_regs args
2970 = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args
2971 `thenNat` \ ((unused,_), argCode) ->
2973 nRegs = length allArgRegs - length unused
2974 code = asmSeqThen (map ($ []) argCode)
2977 LDA pv (AddrImm (ImmLab (ptext fn))),
2978 JSR ra (AddrReg pv) nRegs,
2979 LDGP gp (AddrReg ra)]
2981 ------------------------
2982 {- Try to get a value into a specific register (or registers) for
2983 a call. The first 6 arguments go into the appropriate
2984 argument register (separate registers for integer and floating
2985 point arguments, but used in lock-step), and the remaining
2986 arguments are dumped to the stack, beginning at 0(sp). Our
2987 first argument is a pair of the list of remaining argument
2988 registers to be assigned for this call and the next stack
2989 offset to use for overflowing arguments. This way,
2990 @get_Arg@ can be applied to all of a call's arguments using
2994 :: ([(Reg,Reg)], Int) -- Argument registers and stack offset (accumulator)
2995 -> StixTree -- Current argument
2996 -> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code
2998 -- We have to use up all of our argument registers first...
3000 get_arg ((iDst,fDst):dsts, offset) arg
3001 = getRegister arg `thenNat` \ register ->
3003 reg = if isFloatType pk then fDst else iDst
3004 code = registerCode register reg
3005 src = registerName register reg
3006 pk = registerRep register
3009 if isFloatType pk then
3010 ((dsts, offset), if isFixed register then
3011 code . mkSeqInstr (FMOV src fDst)
3014 ((dsts, offset), if isFixed register then
3015 code . mkSeqInstr (OR src (RIReg src) iDst)
3018 -- Once we have run out of argument registers, we move to the
3021 get_arg ([], offset) arg
3022 = getRegister arg `thenNat` \ register ->
3023 getNewRegNat (registerRep register)
3026 code = registerCode register tmp
3027 src = registerName register tmp
3028 pk = registerRep register
3029 sz = primRepToSize pk
3031 return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset)))
3033 #endif /* alpha_TARGET_ARCH */
3035 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3037 #if i386_TARGET_ARCH
3039 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
3040 -- write barrier compiles to no code on x86/x86-64;
3041 -- we keep it this long in order to prevent earlier optimisations.
3043 -- we only cope with a single result for foreign calls
3044 genCCall (CmmPrim op) [CmmHinted r _] args = do
3045 l1 <- getNewLabelNat
3046 l2 <- getNewLabelNat
3048 MO_F32_Sqrt -> actuallyInlineFloatOp GSQRT FF32 args
3049 MO_F64_Sqrt -> actuallyInlineFloatOp GSQRT FF64 args
3051 MO_F32_Sin -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF32 args
3052 MO_F64_Sin -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF64 args
3054 MO_F32_Cos -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF32 args
3055 MO_F64_Cos -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF64 args
3057 MO_F32_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF32 args
3058 MO_F64_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF64 args
3060 other_op -> outOfLineFloatOp op r args
3062 actuallyInlineFloatOp instr size [CmmHinted x _]
3063 = do res <- trivialUFCode size (instr size) x
3065 return (any (getRegisterReg (CmmLocal r)))
3067 genCCall target dest_regs args = do
3069 sizes = map (arg_size . cmmExprType . hintlessCmm) (reverse args)
3070 #if !darwin_TARGET_OS
3071 tot_arg_size = sum sizes
3073 raw_arg_size = sum sizes
3074 tot_arg_size = roundTo 16 raw_arg_size
3075 arg_pad_size = tot_arg_size - raw_arg_size
3076 delta0 <- getDeltaNat
3077 setDeltaNat (delta0 - arg_pad_size)
3080 push_codes <- mapM push_arg (reverse args)
3081 delta <- getDeltaNat
3084 -- deal with static vs dynamic call targets
3085 (callinsns,cconv) <-
3088 CmmCallee (CmmLit (CmmLabel lbl)) conv
3089 -> -- ToDo: stdcall arg sizes
3090 return (unitOL (CALL (Left fn_imm) []), conv)
3091 where fn_imm = ImmCLbl lbl
3093 -> do { (dyn_c, dyn_r) <- get_op expr
3094 ; ASSERT( isWord32 (cmmExprType expr) )
3095 return (dyn_c `snocOL` CALL (Right dyn_r) [], conv) }
3098 #if darwin_TARGET_OS
3100 = toOL [SUB II32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
3101 DELTA (delta0 - arg_pad_size)]
3102 `appOL` concatOL push_codes
3105 = concatOL push_codes
3106 call = callinsns `appOL`
3108 -- Deallocate parameters after call for ccall;
3109 -- but not for stdcall (callee does it)
3110 (if cconv == StdCallConv || tot_arg_size==0 then [] else
3111 [ADD II32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
3113 [DELTA (delta + tot_arg_size)]
3116 setDeltaNat (delta + tot_arg_size)
3119 -- assign the results, if necessary
3120 assign_code [] = nilOL
3121 assign_code [CmmHinted dest _hint]
3122 | isFloatType ty = unitOL (GMOV fake0 r_dest)
3123 | isWord64 ty = toOL [MOV II32 (OpReg eax) (OpReg r_dest),
3124 MOV II32 (OpReg edx) (OpReg r_dest_hi)]
3125 | otherwise = unitOL (MOV (intSize w) (OpReg eax) (OpReg r_dest))
3127 ty = localRegType dest
3129 r_dest_hi = getHiVRegFromLo r_dest
3130 r_dest = getRegisterReg (CmmLocal dest)
3131 assign_code many = panic "genCCall.assign_code many"
3133 return (push_code `appOL`
3135 assign_code dest_regs)
3138 arg_size :: CmmType -> Int -- Width in bytes
3139 arg_size ty = widthInBytes (typeWidth ty)
3141 roundTo a x | x `mod` a == 0 = x
3142 | otherwise = x + a - (x `mod` a)
3145 push_arg :: HintedCmmActual {-current argument-}
3146 -> NatM InstrBlock -- code
3148 push_arg (CmmHinted arg _hint) -- we don't need the hints on x86
3149 | isWord64 arg_ty = do
3150 ChildCode64 code r_lo <- iselExpr64 arg
3151 delta <- getDeltaNat
3152 setDeltaNat (delta - 8)
3154 r_hi = getHiVRegFromLo r_lo
3156 return ( code `appOL`
3157 toOL [PUSH II32 (OpReg r_hi), DELTA (delta - 4),
3158 PUSH II32 (OpReg r_lo), DELTA (delta - 8),
3163 (code, reg) <- get_op arg
3164 delta <- getDeltaNat
3165 let size = arg_size arg_ty -- Byte size
3166 setDeltaNat (delta-size)
3167 if (isFloatType arg_ty)
3168 then return (code `appOL`
3169 toOL [SUB II32 (OpImm (ImmInt size)) (OpReg esp),
3171 GST (floatSize (typeWidth arg_ty))
3172 reg (AddrBaseIndex (EABaseReg esp)
3176 else return (code `snocOL`
3177 PUSH II32 (OpReg reg) `snocOL`
3181 arg_ty = cmmExprType arg
3184 get_op :: CmmExpr -> NatM (InstrBlock, Reg) -- code, reg
3186 (reg,code) <- getSomeReg op
3189 #endif /* i386_TARGET_ARCH */
3191 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3193 outOfLineFloatOp :: CallishMachOp -> CmmFormal -> HintedCmmActuals
3195 outOfLineFloatOp mop res args
3197 dflags <- getDynFlagsNat
3198 targetExpr <- cmmMakeDynamicReference dflags addImportNat CallReference lbl
3199 let target = CmmCallee targetExpr CCallConv
3201 if isFloat64 (localRegType res)
3203 stmtToInstrs (CmmCall target [CmmHinted res NoHint] args CmmUnsafe CmmMayReturn)
3207 tmp = LocalReg uq f64
3209 code1 <- stmtToInstrs (CmmCall target [CmmHinted tmp NoHint] args CmmUnsafe CmmMayReturn)
3210 code2 <- stmtToInstrs (CmmAssign (CmmLocal res) (CmmReg (CmmLocal tmp)))
3211 return (code1 `appOL` code2)
3213 lbl = mkForeignLabel fn Nothing False
3216 MO_F32_Sqrt -> fsLit "sqrtf"
3217 MO_F32_Sin -> fsLit "sinf"
3218 MO_F32_Cos -> fsLit "cosf"
3219 MO_F32_Tan -> fsLit "tanf"
3220 MO_F32_Exp -> fsLit "expf"
3221 MO_F32_Log -> fsLit "logf"
3223 MO_F32_Asin -> fsLit "asinf"
3224 MO_F32_Acos -> fsLit "acosf"
3225 MO_F32_Atan -> fsLit "atanf"
3227 MO_F32_Sinh -> fsLit "sinhf"
3228 MO_F32_Cosh -> fsLit "coshf"
3229 MO_F32_Tanh -> fsLit "tanhf"
3230 MO_F32_Pwr -> fsLit "powf"
3232 MO_F64_Sqrt -> fsLit "sqrt"
3233 MO_F64_Sin -> fsLit "sin"
3234 MO_F64_Cos -> fsLit "cos"
3235 MO_F64_Tan -> fsLit "tan"
3236 MO_F64_Exp -> fsLit "exp"
3237 MO_F64_Log -> fsLit "log"
3239 MO_F64_Asin -> fsLit "asin"
3240 MO_F64_Acos -> fsLit "acos"
3241 MO_F64_Atan -> fsLit "atan"
3243 MO_F64_Sinh -> fsLit "sinh"
3244 MO_F64_Cosh -> fsLit "cosh"
3245 MO_F64_Tanh -> fsLit "tanh"
3246 MO_F64_Pwr -> fsLit "pow"
3248 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
3250 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3252 #if x86_64_TARGET_ARCH
3254 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
3255 -- write barrier compiles to no code on x86/x86-64;
3256 -- we keep it this long in order to prevent earlier optimisations.
3259 genCCall (CmmPrim op) [CmmHinted r _] args =
3260 outOfLineFloatOp op r args
3262 genCCall target dest_regs args = do
3264 -- load up the register arguments
3265 (stack_args, aregs, fregs, load_args_code)
3266 <- load_args args allArgRegs allFPArgRegs nilOL
3269 fp_regs_used = reverse (drop (length fregs) (reverse allFPArgRegs))
3270 int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
3271 arg_regs = [eax] ++ int_regs_used ++ fp_regs_used
3272 -- for annotating the call instruction with
3274 sse_regs = length fp_regs_used
3276 tot_arg_size = arg_size * length stack_args
3278 -- On entry to the called function, %rsp should be aligned
3279 -- on a 16-byte boundary +8 (i.e. the first stack arg after
3280 -- the return address is 16-byte aligned). In STG land
3281 -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
3282 -- need to make sure we push a multiple of 16-bytes of args,
3283 -- plus the return address, to get the correct alignment.
3284 -- Urg, this is hard. We need to feed the delta back into
3285 -- the arg pushing code.
3286 (real_size, adjust_rsp) <-
3287 if tot_arg_size `rem` 16 == 0
3288 then return (tot_arg_size, nilOL)
3289 else do -- we need to adjust...
3290 delta <- getDeltaNat
3291 setDeltaNat (delta-8)
3292 return (tot_arg_size+8, toOL [
3293 SUB II64 (OpImm (ImmInt 8)) (OpReg rsp),
3297 -- push the stack args, right to left
3298 push_code <- push_args (reverse stack_args) nilOL
3299 delta <- getDeltaNat
3301 -- deal with static vs dynamic call targets
3302 (callinsns,cconv) <-
3305 CmmCallee (CmmLit (CmmLabel lbl)) conv
3306 -> -- ToDo: stdcall arg sizes
3307 return (unitOL (CALL (Left fn_imm) arg_regs), conv)
3308 where fn_imm = ImmCLbl lbl
3310 -> do (dyn_r, dyn_c) <- getSomeReg expr
3311 return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
3314 -- The x86_64 ABI requires us to set %al to the number of SSE
3315 -- registers that contain arguments, if the called routine
3316 -- is a varargs function. We don't know whether it's a
3317 -- varargs function or not, so we have to assume it is.
3319 -- It's not safe to omit this assignment, even if the number
3320 -- of SSE regs in use is zero. If %al is larger than 8
3321 -- on entry to a varargs function, seg faults ensue.
3322 assign_eax n = unitOL (MOV II32 (OpImm (ImmInt n)) (OpReg eax))
3324 let call = callinsns `appOL`
3326 -- Deallocate parameters after call for ccall;
3327 -- but not for stdcall (callee does it)
3328 (if cconv == StdCallConv || real_size==0 then [] else
3329 [ADD (intSize wordWidth) (OpImm (ImmInt real_size)) (OpReg esp)])
3331 [DELTA (delta + real_size)]
3334 setDeltaNat (delta + real_size)
3337 -- assign the results, if necessary
3338 assign_code [] = nilOL
3339 assign_code [CmmHinted dest _hint] =
3340 case typeWidth rep of
3341 W32 | isFloatType rep -> unitOL (MOV (floatSize W32) (OpReg xmm0) (OpReg r_dest))
3342 W64 | isFloatType rep -> unitOL (MOV (floatSize W32) (OpReg xmm0) (OpReg r_dest))
3343 _ -> unitOL (MOV (cmmTypeSize rep) (OpReg rax) (OpReg r_dest))
3345 rep = localRegType dest
3346 r_dest = getRegisterReg (CmmLocal dest)
3347 assign_code many = panic "genCCall.assign_code many"
3349 return (load_args_code `appOL`
3352 assign_eax sse_regs `appOL`
3354 assign_code dest_regs)
3357 arg_size = 8 -- always, at the mo
3359 load_args :: [CmmHinted CmmExpr]
3360 -> [Reg] -- int regs avail for args
3361 -> [Reg] -- FP regs avail for args
3363 -> NatM ([CmmHinted CmmExpr],[Reg],[Reg],InstrBlock)
3364 load_args args [] [] code = return (args, [], [], code)
3365 -- no more regs to use
3366 load_args [] aregs fregs code = return ([], aregs, fregs, code)
3367 -- no more args to push
3368 load_args ((CmmHinted arg hint) : rest) aregs fregs code
3369 | isFloatType arg_rep =
3373 arg_code <- getAnyReg arg
3374 load_args rest aregs rs (code `appOL` arg_code r)
3379 arg_code <- getAnyReg arg
3380 load_args rest rs fregs (code `appOL` arg_code r)
3382 arg_rep = cmmExprType arg
3385 (args',ars,frs,code') <- load_args rest aregs fregs code
3386 return ((CmmHinted arg hint):args', ars, frs, code')
3388 push_args [] code = return code
3389 push_args ((CmmHinted arg hint):rest) code
3390 | isFloatType arg_rep = do
3391 (arg_reg, arg_code) <- getSomeReg arg
3392 delta <- getDeltaNat
3393 setDeltaNat (delta-arg_size)
3394 let code' = code `appOL` arg_code `appOL` toOL [
3395 SUB (intSize wordWidth) (OpImm (ImmInt arg_size)) (OpReg rsp) ,
3396 DELTA (delta-arg_size),
3397 MOV (floatSize width) (OpReg arg_reg) (OpAddr (spRel 0))]
3398 push_args rest code'
3401 -- we only ever generate word-sized function arguments. Promotion
3402 -- has already happened: our Int8# type is kept sign-extended
3403 -- in an Int#, for example.
3404 ASSERT(width == W64) return ()
3405 (arg_op, arg_code) <- getOperand arg
3406 delta <- getDeltaNat
3407 setDeltaNat (delta-arg_size)
3408 let code' = code `appOL` toOL [PUSH II64 arg_op,
3409 DELTA (delta-arg_size)]
3410 push_args rest code'
3412 arg_rep = cmmExprType arg
3413 width = typeWidth arg_rep
3416 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3418 #if sparc_TARGET_ARCH
3420 The SPARC calling convention is an absolute
3421 nightmare. The first 6x32 bits of arguments are mapped into
3422 %o0 through %o5, and the remaining arguments are dumped to the
3423 stack, beginning at [%sp+92]. (Note that %o6 == %sp.)
3425 If we have to put args on the stack, move %o6==%sp down by
3426 the number of words to go on the stack, to ensure there's enough space.
3428 According to Fraser and Hanson's lcc book, page 478, fig 17.2,
3429 16 words above the stack pointer is a word for the address of
3430 a structure return value. I use this as a temporary location
3431 for moving values from float to int regs. Certainly it isn't
3432 safe to put anything in the 16 words starting at %sp, since
3433 this area can get trashed at any time due to window overflows
3434 caused by signal handlers.
3436 A final complication (if the above isn't enough) is that
3437 we can't blithely calculate the arguments one by one into
3438 %o0 .. %o5. Consider the following nested calls:
3442 Naive code moves a into %o0, and (fff b c) into %o1. Unfortunately
3443 the inner call will itself use %o0, which trashes the value put there
3444 in preparation for the outer call. Upshot: we need to calculate the
3445 args into temporary regs, and move those to arg regs or onto the
3446 stack only immediately prior to the call proper. Sigh.
3449 genCCall target dest_regs argsAndHints = do
3451 args = map hintlessCmm argsAndHints
3452 argcode_and_vregs <- mapM arg_to_int_vregs args
3454 (argcodes, vregss) = unzip argcode_and_vregs
3455 n_argRegs = length allArgRegs
3456 n_argRegs_used = min (length vregs) n_argRegs
3457 vregs = concat vregss
3458 -- deal with static vs dynamic call targets
3459 callinsns <- (case target of
3460 CmmCallee (CmmLit (CmmLabel lbl)) conv -> do
3461 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3462 CmmCallee expr conv -> do
3463 (dyn_c, [dyn_r]) <- arg_to_int_vregs expr
3464 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3466 (res, reduce) <- outOfLineFloatOp mop
3467 lblOrMopExpr <- case res of
3469 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3471 (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
3472 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3473 if reduce then panic "genCCall(sparc): can not reduce" else return lblOrMopExpr
3477 argcode = concatOL argcodes
3478 (move_sp_down, move_sp_up)
3479 = let diff = length vregs - n_argRegs
3480 nn = if odd diff then diff + 1 else diff -- keep 8-byte alignment
3483 else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
3485 = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
3486 return (argcode `appOL`
3487 move_sp_down `appOL`
3488 transfer_code `appOL`
3493 -- move args from the integer vregs into which they have been
3494 -- marshalled, into %o0 .. %o5, and the rest onto the stack.
3495 move_final :: [Reg] -> [Reg] -> Int -> [Instr]
3497 move_final [] _ offset -- all args done
3500 move_final (v:vs) [] offset -- out of aregs; move to stack
3501 = ST II32 v (spRel offset)
3502 : move_final vs [] (offset+1)
3504 move_final (v:vs) (a:az) offset -- move into an arg (%o[0..5]) reg
3505 = OR False g0 (RIReg v) a
3506 : move_final vs az offset
3508 -- generate code to calculate an argument, and move it into one
3509 -- or two integer vregs.
3510 arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
3511 arg_to_int_vregs arg
3512 | isWord64 (cmmExprType arg)
3514 (ChildCode64 code r_lo) <- iselExpr64 arg
3516 r_hi = getHiVRegFromLo r_lo
3517 return (code, [r_hi, r_lo])
3520 (src, code) <- getSomeReg arg
3521 tmp <- getNewRegNat (cmmTypeSize $ cmmExprType arg)
3523 pk = cmmExprType arg
3524 case cmmTypeSize pk of
3526 v1 <- getNewRegNat II32
3527 v2 <- getNewRegNat II32
3530 FMOV FF64 src f0 `snocOL`
3531 ST FF32 f0 (spRel 16) `snocOL`
3532 LD II32 (spRel 16) v1 `snocOL`
3533 ST FF32 (fPair f0) (spRel 16) `snocOL`
3534 LD II32 (spRel 16) v2
3539 v1 <- getNewRegNat II32
3542 ST FF32 src (spRel 16) `snocOL`
3543 LD II32 (spRel 16) v1
3548 v1 <- getNewRegNat II32
3550 code `snocOL` OR False g0 (RIReg src) v1
3554 outOfLineFloatOp mop =
3556 dflags <- getDynFlagsNat
3557 mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference $
3558 mkForeignLabel functionName Nothing True
3559 let mopLabelOrExpr = case mopExpr of
3560 CmmLit (CmmLabel lbl) -> Left lbl
3562 return (mopLabelOrExpr, reduce)
3564 (reduce, functionName) = case mop of
3565 MO_F32_Exp -> (True, fsLit "exp")
3566 MO_F32_Log -> (True, fsLit "log")
3567 MO_F32_Sqrt -> (True, fsLit "sqrt")
3569 MO_F32_Sin -> (True, fsLit "sin")
3570 MO_F32_Cos -> (True, fsLit "cos")
3571 MO_F32_Tan -> (True, fsLit "tan")
3573 MO_F32_Asin -> (True, fsLit "asin")
3574 MO_F32_Acos -> (True, fsLit "acos")
3575 MO_F32_Atan -> (True, fsLit "atan")
3577 MO_F32_Sinh -> (True, fsLit "sinh")
3578 MO_F32_Cosh -> (True, fsLit "cosh")
3579 MO_F32_Tanh -> (True, fsLit "tanh")
3581 MO_F64_Exp -> (False, fsLit "exp")
3582 MO_F64_Log -> (False, fsLit "log")
3583 MO_F64_Sqrt -> (False, fsLit "sqrt")
3585 MO_F64_Sin -> (False, fsLit "sin")
3586 MO_F64_Cos -> (False, fsLit "cos")
3587 MO_F64_Tan -> (False, fsLit "tan")
3589 MO_F64_Asin -> (False, fsLit "asin")
3590 MO_F64_Acos -> (False, fsLit "acos")
3591 MO_F64_Atan -> (False, fsLit "atan")
3593 MO_F64_Sinh -> (False, fsLit "sinh")
3594 MO_F64_Cosh -> (False, fsLit "cosh")
3595 MO_F64_Tanh -> (False, fsLit "tanh")
3597 other -> pprPanic "outOfLineFloatOp(sparc) "
3598 (pprCallishMachOp mop)
3600 #endif /* sparc_TARGET_ARCH */
3602 #if powerpc_TARGET_ARCH
3604 #if darwin_TARGET_OS || linux_TARGET_OS
3606 The PowerPC calling convention for Darwin/Mac OS X
3607 is described in Apple's document
3608 "Inside Mac OS X - Mach-O Runtime Architecture".
3610 PowerPC Linux uses the System V Release 4 Calling Convention
3611 for PowerPC. It is described in the
3612 "System V Application Binary Interface PowerPC Processor Supplement".
3614 Both conventions are similar:
3615 Parameters may be passed in general-purpose registers starting at r3, in
3616 floating point registers starting at f1, or on the stack.
3618 But there are substantial differences:
3619 * The number of registers used for parameter passing and the exact set of
3620 nonvolatile registers differs (see MachRegs.lhs).
3621 * On Darwin, stack space is always reserved for parameters, even if they are
3622 passed in registers. The called routine may choose to save parameters from
3623 registers to the corresponding space on the stack.
3624 * On Darwin, a corresponding amount of GPRs is skipped when a floating point
3625 parameter is passed in an FPR.
3626 * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
3627 starting with an odd-numbered GPR. It may skip a GPR to achieve this.
3628 Darwin just treats an I64 like two separate II32s (high word first).
3629 * I64 and FF64 arguments are 8-byte aligned on the stack for SysV, but only
3630 4-byte aligned like everything else on Darwin.
3631 * The SysV spec claims that FF32 is represented as FF64 on the stack. GCC on
3632 PowerPC Linux does not agree, so neither do we.
3634 According to both conventions, The parameter area should be part of the
3635 caller's stack frame, allocated in the caller's prologue code (large enough
3636 to hold the parameter lists for all called routines). The NCG already
3637 uses the stack for register spilling, leaving 64 bytes free at the top.
3638 If we need a larger parameter area than that, we just allocate a new stack
3639 frame just before ccalling.
3643 genCCall (CmmPrim MO_WriteBarrier) _ _
3644 = return $ unitOL LWSYNC
3646 genCCall target dest_regs argsAndHints
3647 = ASSERT (not $ any (`elem` [II8,II16]) $ map cmmTypeSize argReps)
3648 -- we rely on argument promotion in the codeGen
3650 (finalStack,passArgumentsCode,usedRegs) <- passArguments
3652 allArgRegs allFPArgRegs
3656 (labelOrExpr, reduceToFF32) <- case target of
3657 CmmCallee (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
3658 CmmCallee expr conv -> return (Right expr, False)
3659 CmmPrim mop -> outOfLineFloatOp mop
3661 let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
3662 codeAfter = move_sp_up finalStack `appOL` moveResult reduceToFF32
3667 `snocOL` BL lbl usedRegs
3670 (dynReg, dynCode) <- getSomeReg dyn
3672 `snocOL` MTCTR dynReg
3674 `snocOL` BCTRL usedRegs
3677 #if darwin_TARGET_OS
3678 initialStackOffset = 24
3679 -- size of linkage area + size of arguments, in bytes
3680 stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
3681 map (widthInBytes . typeWidth) argReps
3682 #elif linux_TARGET_OS
3683 initialStackOffset = 8
3684 stackDelta finalStack = roundTo 16 finalStack
3686 args = map hintlessCmm argsAndHints
3687 argReps = map cmmExprType args
3689 roundTo a x | x `mod` a == 0 = x
3690 | otherwise = x + a - (x `mod` a)
3692 move_sp_down finalStack
3694 toOL [STU II32 sp (AddrRegImm sp (ImmInt (-delta))),
3697 where delta = stackDelta finalStack
3698 move_sp_up finalStack
3700 toOL [ADD sp sp (RIImm (ImmInt delta)),
3703 where delta = stackDelta finalStack
3706 passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
3707 passArguments ((arg,arg_ty):args) gprs fprs stackOffset
3708 accumCode accumUsed | isWord64 arg_ty =
3710 ChildCode64 code vr_lo <- iselExpr64 arg
3711 let vr_hi = getHiVRegFromLo vr_lo
3713 #if darwin_TARGET_OS
3718 (accumCode `appOL` code
3719 `snocOL` storeWord vr_hi gprs stackOffset
3720 `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
3721 ((take 2 gprs) ++ accumUsed)
3723 storeWord vr (gpr:_) offset = MR gpr vr
3724 storeWord vr [] offset = ST II32 vr (AddrRegImm sp (ImmInt offset))
3726 #elif linux_TARGET_OS
3727 let stackOffset' = roundTo 8 stackOffset
3728 stackCode = accumCode `appOL` code
3729 `snocOL` ST II32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
3730 `snocOL` ST II32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
3731 regCode hireg loreg =
3732 accumCode `appOL` code
3733 `snocOL` MR hireg vr_hi
3734 `snocOL` MR loreg vr_lo
3737 hireg : loreg : regs | even (length gprs) ->
3738 passArguments args regs fprs stackOffset
3739 (regCode hireg loreg) (hireg : loreg : accumUsed)
3740 _skipped : hireg : loreg : regs ->
3741 passArguments args regs fprs stackOffset
3742 (regCode hireg loreg) (hireg : loreg : accumUsed)
3743 _ -> -- only one or no regs left
3744 passArguments args [] fprs (stackOffset'+8)
3748 passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
3749 | reg : _ <- regs = do
3750 register <- getRegister arg
3751 let code = case register of
3752 Fixed _ freg fcode -> fcode `snocOL` MR reg freg
3753 Any _ acode -> acode reg
3757 #if darwin_TARGET_OS
3758 -- The Darwin ABI requires that we reserve stack slots for register parameters
3759 (stackOffset + stackBytes)
3760 #elif linux_TARGET_OS
3761 -- ... the SysV ABI doesn't.
3764 (accumCode `appOL` code)
3767 (vr, code) <- getSomeReg arg
3771 (stackOffset' + stackBytes)
3772 (accumCode `appOL` code `snocOL` ST (cmmTypeSize rep) vr stackSlot)
3775 #if darwin_TARGET_OS
3776 -- stackOffset is at least 4-byte aligned
3777 -- The Darwin ABI is happy with that.
3778 stackOffset' = stackOffset
3780 -- ... the SysV ABI requires 8-byte alignment for doubles.
3781 stackOffset' | isFloatType rep && typeWidth rep == W64 =
3782 roundTo 8 stackOffset
3783 | otherwise = stackOffset
3785 stackSlot = AddrRegImm sp (ImmInt stackOffset')
3786 (nGprs, nFprs, stackBytes, regs) = case cmmTypeSize rep of
3787 II32 -> (1, 0, 4, gprs)
3788 #if darwin_TARGET_OS
3789 -- The Darwin ABI requires that we skip a corresponding number of GPRs when
3791 FF32 -> (1, 1, 4, fprs)
3792 FF64 -> (2, 1, 8, fprs)
3793 #elif linux_TARGET_OS
3794 -- ... the SysV ABI doesn't.
3795 FF32 -> (0, 1, 4, fprs)
3796 FF64 -> (0, 1, 8, fprs)
3799 moveResult reduceToFF32 =
3802 [CmmHinted dest _hint]
3803 | reduceToFF32 && isFloat32 rep -> unitOL (FRSP r_dest f1)
3804 | isFloat32 rep || isFloat64 rep -> unitOL (MR r_dest f1)
3805 | isWord64 rep -> toOL [MR (getHiVRegFromLo r_dest) r3,
3807 | otherwise -> unitOL (MR r_dest r3)
3808 where rep = cmmRegType (CmmLocal dest)
3809 r_dest = getRegisterReg (CmmLocal dest)
3811 outOfLineFloatOp mop =
3813 dflags <- getDynFlagsNat
3814 mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference $
3815 mkForeignLabel functionName Nothing True
3816 let mopLabelOrExpr = case mopExpr of
3817 CmmLit (CmmLabel lbl) -> Left lbl
3819 return (mopLabelOrExpr, reduce)
3821 (functionName, reduce) = case mop of
3822 MO_F32_Exp -> (fsLit "exp", True)
3823 MO_F32_Log -> (fsLit "log", True)
3824 MO_F32_Sqrt -> (fsLit "sqrt", True)
3826 MO_F32_Sin -> (fsLit "sin", True)
3827 MO_F32_Cos -> (fsLit "cos", True)
3828 MO_F32_Tan -> (fsLit "tan", True)
3830 MO_F32_Asin -> (fsLit "asin", True)
3831 MO_F32_Acos -> (fsLit "acos", True)
3832 MO_F32_Atan -> (fsLit "atan", True)
3834 MO_F32_Sinh -> (fsLit "sinh", True)
3835 MO_F32_Cosh -> (fsLit "cosh", True)
3836 MO_F32_Tanh -> (fsLit "tanh", True)
3837 MO_F32_Pwr -> (fsLit "pow", True)
3839 MO_F64_Exp -> (fsLit "exp", False)
3840 MO_F64_Log -> (fsLit "log", False)
3841 MO_F64_Sqrt -> (fsLit "sqrt", False)
3843 MO_F64_Sin -> (fsLit "sin", False)
3844 MO_F64_Cos -> (fsLit "cos", False)
3845 MO_F64_Tan -> (fsLit "tan", False)
3847 MO_F64_Asin -> (fsLit "asin", False)
3848 MO_F64_Acos -> (fsLit "acos", False)
3849 MO_F64_Atan -> (fsLit "atan", False)
3851 MO_F64_Sinh -> (fsLit "sinh", False)
3852 MO_F64_Cosh -> (fsLit "cosh", False)
3853 MO_F64_Tanh -> (fsLit "tanh", False)
3854 MO_F64_Pwr -> (fsLit "pow", False)
3855 other -> pprPanic "genCCall(ppc): unknown callish op"
3856 (pprCallishMachOp other)
3858 #endif /* darwin_TARGET_OS || linux_TARGET_OS */
3860 #endif /* powerpc_TARGET_ARCH */
3863 -- -----------------------------------------------------------------------------
3864 -- Generating a table-branch
3866 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
3868 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3872 (reg,e_code) <- getSomeReg expr
3873 lbl <- getNewLabelNat
3874 dflags <- getDynFlagsNat
3875 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
3876 (tableReg,t_code) <- getSomeReg $ dynRef
3878 jumpTable = map jumpTableEntryRel ids
3880 jumpTableEntryRel Nothing
3881 = CmmStaticLit (CmmInt 0 wordWidth)
3882 jumpTableEntryRel (Just (BlockId id))
3883 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3884 where blockLabel = mkAsmTempLabel id
3886 op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
3887 (EAIndex reg wORD_SIZE) (ImmInt 0))
3889 #if x86_64_TARGET_ARCH
3890 #if darwin_TARGET_OS
3891 -- on Mac OS X/x86_64, put the jump table in the text section
3892 -- to work around a limitation of the linker.
3893 -- ld64 is unable to handle the relocations for
3895 -- if L0 is not preceded by a non-anonymous label in its section.
3897 code = e_code `appOL` t_code `appOL` toOL [
3898 ADD (intSize wordWidth) op (OpReg tableReg),
3899 JMP_TBL (OpReg tableReg) [ id | Just id <- ids ],
3900 LDATA Text (CmmDataLabel lbl : jumpTable)
3903 -- HACK: On x86_64 binutils<2.17 is only able to generate PC32
3904 -- relocations, hence we only get 32-bit offsets in the jump
3905 -- table. As these offsets are always negative we need to properly
3906 -- sign extend them to 64-bit. This hack should be removed in
3907 -- conjunction with the hack in PprMach.hs/pprDataItem once
3908 -- binutils 2.17 is standard.
3909 code = e_code `appOL` t_code `appOL` toOL [
3910 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3912 (OpAddr (AddrBaseIndex (EABaseReg tableReg)
3913 (EAIndex reg wORD_SIZE) (ImmInt 0)))
3915 ADD (intSize wordWidth) (OpReg reg) (OpReg tableReg),
3916 JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
3920 code = e_code `appOL` t_code `appOL` toOL [
3921 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3922 ADD (intSize wordWidth) op (OpReg tableReg),
3923 JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
3929 (reg,e_code) <- getSomeReg expr
3930 lbl <- getNewLabelNat
3932 jumpTable = map jumpTableEntry ids
3933 op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
3934 code = e_code `appOL` toOL [
3935 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3936 JMP_TBL op [ id | Just id <- ids ]
3940 #elif powerpc_TARGET_ARCH
3944 (reg,e_code) <- getSomeReg expr
3945 tmp <- getNewRegNat II32
3946 lbl <- getNewLabelNat
3947 dflags <- getDynFlagsNat
3948 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
3949 (tableReg,t_code) <- getSomeReg $ dynRef
3951 jumpTable = map jumpTableEntryRel ids
3953 jumpTableEntryRel Nothing
3954 = CmmStaticLit (CmmInt 0 wordWidth)
3955 jumpTableEntryRel (Just (BlockId id))
3956 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3957 where blockLabel = mkAsmTempLabel id
3959 code = e_code `appOL` t_code `appOL` toOL [
3960 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3961 SLW tmp reg (RIImm (ImmInt 2)),
3962 LD II32 tmp (AddrRegReg tableReg tmp),
3963 ADD tmp tmp (RIReg tableReg),
3965 BCTR [ id | Just id <- ids ]
3970 (reg,e_code) <- getSomeReg expr
3971 tmp <- getNewRegNat II32
3972 lbl <- getNewLabelNat
3974 jumpTable = map jumpTableEntry ids
3976 code = e_code `appOL` toOL [
3977 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3978 SLW tmp reg (RIImm (ImmInt 2)),
3979 ADDIS tmp tmp (HA (ImmCLbl lbl)),
3980 LD II32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
3982 BCTR [ id | Just id <- ids ]
3985 #elif sparc_TARGET_ARCH
3988 = error "MachCodeGen: sparc genSwitch PIC not finished\n"
3991 = error "MachCodeGen: sparc genSwitch non-PIC not finished\n"
3993 #error "ToDo: genSwitch"
3996 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordWidth)
3997 jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
3998 where blockLabel = mkAsmTempLabel id
4000 -- -----------------------------------------------------------------------------
4002 -- -----------------------------------------------------------------------------
4005 -- -----------------------------------------------------------------------------
4006 -- 'condIntReg' and 'condFltReg': condition codes into registers
4008 -- Turn those condition codes into integers now (when they appear on
4009 -- the right hand side of an assignment).
4011 -- (If applicable) Do not fill the delay slots here; you will confuse the
4012 -- register allocator.
4014 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
4016 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4018 #if alpha_TARGET_ARCH
4019 condIntReg = panic "MachCode.condIntReg (not on Alpha)"
4020 condFltReg = panic "MachCode.condFltReg (not on Alpha)"
4021 #endif /* alpha_TARGET_ARCH */
4023 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4025 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4027 condIntReg cond x y = do
4028 CondCode _ cond cond_code <- condIntCode cond x y
4029 tmp <- getNewRegNat II8
4031 code dst = cond_code `appOL` toOL [
4032 SETCC cond (OpReg tmp),
4033 MOVZxL II8 (OpReg tmp) (OpReg dst)
4036 return (Any II32 code)
4040 #if i386_TARGET_ARCH
4042 condFltReg cond x y = do
4043 CondCode _ cond cond_code <- condFltCode cond x y
4044 tmp <- getNewRegNat II8
4046 code dst = cond_code `appOL` toOL [
4047 SETCC cond (OpReg tmp),
4048 MOVZxL II8 (OpReg tmp) (OpReg dst)
4051 return (Any II32 code)
4055 #if x86_64_TARGET_ARCH
4057 condFltReg cond x y = do
4058 CondCode _ cond cond_code <- condFltCode cond x y
4059 tmp1 <- getNewRegNat wordSize
4060 tmp2 <- getNewRegNat wordSize
4062 -- We have to worry about unordered operands (eg. comparisons
4063 -- against NaN). If the operands are unordered, the comparison
4064 -- sets the parity flag, carry flag and zero flag.
4065 -- All comparisons are supposed to return false for unordered
4066 -- operands except for !=, which returns true.
4068 -- Optimisation: we don't have to test the parity flag if we
4069 -- know the test has already excluded the unordered case: eg >
4070 -- and >= test for a zero carry flag, which can only occur for
4071 -- ordered operands.
4073 -- ToDo: by reversing comparisons we could avoid testing the
4074 -- parity flag in more cases.
4079 NE -> or_unordered dst
4080 GU -> plain_test dst
4081 GEU -> plain_test dst
4082 _ -> and_ordered dst)
4084 plain_test dst = toOL [
4085 SETCC cond (OpReg tmp1),
4086 MOVZxL II8 (OpReg tmp1) (OpReg dst)
4088 or_unordered dst = toOL [
4089 SETCC cond (OpReg tmp1),
4090 SETCC PARITY (OpReg tmp2),
4091 OR II8 (OpReg tmp1) (OpReg tmp2),
4092 MOVZxL II8 (OpReg tmp2) (OpReg dst)
4094 and_ordered dst = toOL [
4095 SETCC cond (OpReg tmp1),
4096 SETCC NOTPARITY (OpReg tmp2),
4097 AND II8 (OpReg tmp1) (OpReg tmp2),
4098 MOVZxL II8 (OpReg tmp2) (OpReg dst)
4101 return (Any II32 code)
4105 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4107 #if sparc_TARGET_ARCH
4109 condIntReg EQQ x (CmmLit (CmmInt 0 d)) = do
4110 (src, code) <- getSomeReg x
4111 tmp <- getNewRegNat II32
4113 code__2 dst = code `appOL` toOL [
4114 SUB False True g0 (RIReg src) g0,
4115 SUB True False g0 (RIImm (ImmInt (-1))) dst]
4116 return (Any II32 code__2)
4118 condIntReg EQQ x y = do
4119 (src1, code1) <- getSomeReg x
4120 (src2, code2) <- getSomeReg y
4121 tmp1 <- getNewRegNat II32
4122 tmp2 <- getNewRegNat II32
4124 code__2 dst = code1 `appOL` code2 `appOL` toOL [
4125 XOR False src1 (RIReg src2) dst,
4126 SUB False True g0 (RIReg dst) g0,
4127 SUB True False g0 (RIImm (ImmInt (-1))) dst]
4128 return (Any II32 code__2)
4130 condIntReg NE x (CmmLit (CmmInt 0 d)) = do
4131 (src, code) <- getSomeReg x
4132 tmp <- getNewRegNat II32
4134 code__2 dst = code `appOL` toOL [
4135 SUB False True g0 (RIReg src) g0,
4136 ADD True False g0 (RIImm (ImmInt 0)) dst]
4137 return (Any II32 code__2)
4139 condIntReg NE x y = do
4140 (src1, code1) <- getSomeReg x
4141 (src2, code2) <- getSomeReg y
4142 tmp1 <- getNewRegNat II32
4143 tmp2 <- getNewRegNat II32
4145 code__2 dst = code1 `appOL` code2 `appOL` toOL [
4146 XOR False src1 (RIReg src2) dst,
4147 SUB False True g0 (RIReg dst) g0,
4148 ADD True False g0 (RIImm (ImmInt 0)) dst]
4149 return (Any II32 code__2)
4151 condIntReg cond x y = do
4152 BlockId lbl1 <- getBlockIdNat
4153 BlockId lbl2 <- getBlockIdNat
4154 CondCode _ cond cond_code <- condIntCode cond x y
4156 code__2 dst = cond_code `appOL` toOL [
4157 BI cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
4158 OR False g0 (RIImm (ImmInt 0)) dst,
4159 BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
4160 NEWBLOCK (BlockId lbl1),
4161 OR False g0 (RIImm (ImmInt 1)) dst,
4162 NEWBLOCK (BlockId lbl2)]
4163 return (Any II32 code__2)
4165 condFltReg cond x y = do
4166 BlockId lbl1 <- getBlockIdNat
4167 BlockId lbl2 <- getBlockIdNat
4168 CondCode _ cond cond_code <- condFltCode cond x y
4170 code__2 dst = cond_code `appOL` toOL [
4172 BF cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
4173 OR False g0 (RIImm (ImmInt 0)) dst,
4174 BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
4175 NEWBLOCK (BlockId lbl1),
4176 OR False g0 (RIImm (ImmInt 1)) dst,
4177 NEWBLOCK (BlockId lbl2)]
4178 return (Any II32 code__2)
4180 #endif /* sparc_TARGET_ARCH */
4182 #if powerpc_TARGET_ARCH
4183 condReg getCond = do
4184 lbl1 <- getBlockIdNat
4185 lbl2 <- getBlockIdNat
4186 CondCode _ cond cond_code <- getCond
4188 {- code dst = cond_code `appOL` toOL [
4197 code dst = cond_code
4201 RLWINM dst dst (bit + 1) 31 31
4204 negate_code | do_negate = unitOL (CRNOR bit bit bit)
4207 (bit, do_negate) = case cond of
4221 return (Any II32 code)
4223 condIntReg cond x y = condReg (condIntCode cond x y)
4224 condFltReg cond x y = condReg (condFltCode cond x y)
4225 #endif /* powerpc_TARGET_ARCH */
4228 -- -----------------------------------------------------------------------------
4229 -- 'trivial*Code': deal with trivial instructions
4231 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
4232 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
4233 -- Only look for constants on the right hand side, because that's
4234 -- where the generic optimizer will have put them.
4236 -- Similarly, for unary instructions, we don't have to worry about
4237 -- matching an StInt as the argument, because genericOpt will already
4238 -- have handled the constant-folding.
4241 :: Width -- Int only
4242 -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
4243 ,IF_ARCH_i386 ((Operand -> Operand -> Instr)
4244 -> Maybe (Operand -> Operand -> Instr)
4245 ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr)
4246 -> Maybe (Operand -> Operand -> Instr)
4247 ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
4248 ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
4250 -> CmmExpr -> CmmExpr -- the two arguments
4253 #ifndef powerpc_TARGET_ARCH
4255 :: Width -- Floating point only
4256 -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
4257 ,IF_ARCH_sparc((Size -> Reg -> Reg -> Reg -> Instr)
4258 ,IF_ARCH_i386 ((Size -> Reg -> Reg -> Reg -> Instr)
4259 ,IF_ARCH_x86_64 ((Size -> Operand -> Operand -> Instr)
4261 -> CmmExpr -> CmmExpr -- the two arguments
4267 -> IF_ARCH_alpha((RI -> Reg -> Instr)
4268 ,IF_ARCH_i386 ((Operand -> Instr)
4269 ,IF_ARCH_x86_64 ((Operand -> Instr)
4270 ,IF_ARCH_sparc((RI -> Reg -> Instr)
4271 ,IF_ARCH_powerpc((Reg -> Reg -> Instr)
4273 -> CmmExpr -- the one argument
4276 #ifndef powerpc_TARGET_ARCH
4279 -> IF_ARCH_alpha((Reg -> Reg -> Instr)
4280 ,IF_ARCH_i386 ((Reg -> Reg -> Instr)
4281 ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr)
4282 ,IF_ARCH_sparc((Reg -> Reg -> Instr)
4284 -> CmmExpr -- the one argument
4288 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4290 #if alpha_TARGET_ARCH
4292 trivialCode instr x (StInt y)
4294 = getRegister x `thenNat` \ register ->
4295 getNewRegNat IntRep `thenNat` \ tmp ->
4297 code = registerCode register tmp
4298 src1 = registerName register tmp
4299 src2 = ImmInt (fromInteger y)
4300 code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst)
4302 return (Any IntRep code__2)
4304 trivialCode instr x y
4305 = getRegister x `thenNat` \ register1 ->
4306 getRegister y `thenNat` \ register2 ->
4307 getNewRegNat IntRep `thenNat` \ tmp1 ->
4308 getNewRegNat IntRep `thenNat` \ tmp2 ->
4310 code1 = registerCode register1 tmp1 []
4311 src1 = registerName register1 tmp1
4312 code2 = registerCode register2 tmp2 []
4313 src2 = registerName register2 tmp2
4314 code__2 dst = asmSeqThen [code1, code2] .
4315 mkSeqInstr (instr src1 (RIReg src2) dst)
4317 return (Any IntRep code__2)
4320 trivialUCode instr x
4321 = getRegister x `thenNat` \ register ->
4322 getNewRegNat IntRep `thenNat` \ tmp ->
4324 code = registerCode register tmp
4325 src = registerName register tmp
4326 code__2 dst = code . mkSeqInstr (instr (RIReg src) dst)
4328 return (Any IntRep code__2)
4331 trivialFCode _ instr x y
4332 = getRegister x `thenNat` \ register1 ->
4333 getRegister y `thenNat` \ register2 ->
4334 getNewRegNat FF64 `thenNat` \ tmp1 ->
4335 getNewRegNat FF64 `thenNat` \ tmp2 ->
4337 code1 = registerCode register1 tmp1
4338 src1 = registerName register1 tmp1
4340 code2 = registerCode register2 tmp2
4341 src2 = registerName register2 tmp2
4343 code__2 dst = asmSeqThen [code1 [], code2 []] .
4344 mkSeqInstr (instr src1 src2 dst)
4346 return (Any FF64 code__2)
4348 trivialUFCode _ instr x
4349 = getRegister x `thenNat` \ register ->
4350 getNewRegNat FF64 `thenNat` \ tmp ->
4352 code = registerCode register tmp
4353 src = registerName register tmp
4354 code__2 dst = code . mkSeqInstr (instr src dst)
4356 return (Any FF64 code__2)
4358 #endif /* alpha_TARGET_ARCH */
4360 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4362 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4365 The Rules of the Game are:
4367 * You cannot assume anything about the destination register dst;
4368 it may be anything, including a fixed reg.
4370 * You may compute an operand into a fixed reg, but you may not
4371 subsequently change the contents of that fixed reg. If you
4372 want to do so, first copy the value either to a temporary
4373 or into dst. You are free to modify dst even if it happens
4374 to be a fixed reg -- that's not your problem.
4376 * You cannot assume that a fixed reg will stay live over an
4377 arbitrary computation. The same applies to the dst reg.
4379 * Temporary regs obtained from getNewRegNat are distinct from
4380 each other and from all other regs, and stay live over
4381 arbitrary computations.
4383 --------------------
4385 SDM's version of The Rules:
4387 * If getRegister returns Any, that means it can generate correct
4388 code which places the result in any register, period. Even if that
4389 register happens to be read during the computation.
4391 Corollary #1: this means that if you are generating code for an
4392 operation with two arbitrary operands, you cannot assign the result
4393 of the first operand into the destination register before computing
4394 the second operand. The second operand might require the old value
4395 of the destination register.
4397 Corollary #2: A function might be able to generate more efficient
4398 code if it knows the destination register is a new temporary (and
4399 therefore not read by any of the sub-computations).
4401 * If getRegister returns Any, then the code it generates may modify only:
4402 (a) fresh temporaries
4403 (b) the destination register
4404 (c) known registers (eg. %ecx is used by shifts)
4405 In particular, it may *not* modify global registers, unless the global
4406 register happens to be the destination register.
4409 trivialCode width instr (Just revinstr) (CmmLit lit_a) b
4410 | is32BitLit lit_a = do
4411 b_code <- getAnyReg b
4414 = b_code dst `snocOL`
4415 revinstr (OpImm (litToImm lit_a)) (OpReg dst)
4417 return (Any (intSize width) code)
4419 trivialCode width instr maybe_revinstr a b
4420 = genTrivialCode (intSize width) instr a b
4422 -- This is re-used for floating pt instructions too.
4423 genTrivialCode rep instr a b = do
4424 (b_op, b_code) <- getNonClobberedOperand b
4425 a_code <- getAnyReg a
4426 tmp <- getNewRegNat rep
4428 -- We want the value of b to stay alive across the computation of a.
4429 -- But, we want to calculate a straight into the destination register,
4430 -- because the instruction only has two operands (dst := dst `op` src).
4431 -- The troublesome case is when the result of b is in the same register
4432 -- as the destination reg. In this case, we have to save b in a
4433 -- new temporary across the computation of a.
4435 | dst `regClashesWithOp` b_op =
4437 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
4439 instr (OpReg tmp) (OpReg dst)
4443 instr b_op (OpReg dst)
4445 return (Any rep code)
4447 reg `regClashesWithOp` OpReg reg2 = reg == reg2
4448 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
4449 reg `regClashesWithOp` _ = False
4453 trivialUCode rep instr x = do
4454 x_code <- getAnyReg x
4459 return (Any rep code)
4463 #if i386_TARGET_ARCH
4465 trivialFCode width instr x y = do
4466 (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
4467 (y_reg, y_code) <- getSomeReg y
4469 size = floatSize width
4473 instr size x_reg y_reg dst
4474 return (Any size code)
4478 #if x86_64_TARGET_ARCH
4479 trivialFCode pk instr x y
4480 = genTrivialCode size (instr size) x y
4481 where size = floatSize pk
4486 trivialUFCode size instr x = do
4487 (x_reg, x_code) <- getSomeReg x
4493 return (Any size code)
4495 #endif /* i386_TARGET_ARCH */
4497 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4499 #if sparc_TARGET_ARCH
4501 trivialCode pk instr x (CmmLit (CmmInt y d))
4504 (src1, code) <- getSomeReg x
4505 tmp <- getNewRegNat II32
4507 src2 = ImmInt (fromInteger y)
4508 code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
4509 return (Any II32 code__2)
4511 trivialCode pk instr x y = do
4512 (src1, code1) <- getSomeReg x
4513 (src2, code2) <- getSomeReg y
4514 tmp1 <- getNewRegNat II32
4515 tmp2 <- getNewRegNat II32
4517 code__2 dst = code1 `appOL` code2 `snocOL`
4518 instr src1 (RIReg src2) dst
4519 return (Any II32 code__2)
4522 trivialFCode pk instr x y = do
4523 (src1, code1) <- getSomeReg x
4524 (src2, code2) <- getSomeReg y
4525 tmp1 <- getNewRegNat (cmmTypeSize $ cmmExprType x)
4526 tmp2 <- getNewRegNat (cmmTypeSize $ cmmExprType y)
4527 tmp <- getNewRegNat FF64
4529 promote x = FxTOy FF32 FF64 x tmp
4535 if pk1 `cmmEqType` pk2 then
4536 code1 `appOL` code2 `snocOL`
4537 instr (floatSize pk) src1 src2 dst
4538 else if typeWidth pk1 == W32 then
4539 code1 `snocOL` promote src1 `appOL` code2 `snocOL`
4540 instr FF64 tmp src2 dst
4542 code1 `appOL` code2 `snocOL` promote src2 `snocOL`
4543 instr FF64 src1 tmp dst
4544 return (Any (cmmTypeSize $ if pk1 `cmmEqType` pk2 then pk1 else cmmFloat W64)
4548 trivialUCode size instr x = do
4549 (src, code) <- getSomeReg x
4550 tmp <- getNewRegNat size
4552 code__2 dst = code `snocOL` instr (RIReg src) dst
4553 return (Any size code__2)
4556 trivialUFCode pk instr x = do
4557 (src, code) <- getSomeReg x
4558 tmp <- getNewRegNat pk
4560 code__2 dst = code `snocOL` instr src dst
4561 return (Any pk code__2)
4563 #endif /* sparc_TARGET_ARCH */
4565 #if powerpc_TARGET_ARCH
4568 Wolfgang's PowerPC version of The Rules:
4570 A slightly modified version of The Rules to take advantage of the fact
4571 that PowerPC instructions work on all registers and don't implicitly
4572 clobber any fixed registers.
4574 * The only expression for which getRegister returns Fixed is (CmmReg reg).
4576 * If getRegister returns Any, then the code it generates may modify only:
4577 (a) fresh temporaries
4578 (b) the destination register
4579 It may *not* modify global registers, unless the global
4580 register happens to be the destination register.
4581 It may not clobber any other registers. In fact, only ccalls clobber any
4583 Also, it may not modify the counter register (used by genCCall).
4585 Corollary: If a getRegister for a subexpression returns Fixed, you need
4586 not move it to a fresh temporary before evaluating the next subexpression.
4587 The Fixed register won't be modified.
4588 Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
4590 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
4591 the value of the destination register.
4594 trivialCode rep signed instr x (CmmLit (CmmInt y _))
4595 | Just imm <- makeImmediate rep signed y
4597 (src1, code1) <- getSomeReg x
4598 let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
4599 return (Any (intSize rep) code)
4601 trivialCode rep signed instr x y = do
4602 (src1, code1) <- getSomeReg x
4603 (src2, code2) <- getSomeReg y
4604 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
4605 return (Any (intSize rep) code)
4607 trivialCodeNoImm' :: Size -> (Reg -> Reg -> Reg -> Instr)
4608 -> CmmExpr -> CmmExpr -> NatM Register
4609 trivialCodeNoImm' size instr x y = do
4610 (src1, code1) <- getSomeReg x
4611 (src2, code2) <- getSomeReg y
4612 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
4613 return (Any size code)
4615 trivialCodeNoImm :: Size -> (Size -> Reg -> Reg -> Reg -> Instr)
4616 -> CmmExpr -> CmmExpr -> NatM Register
4617 trivialCodeNoImm size instr x y = trivialCodeNoImm' size (instr size) x y
4619 trivialUCode rep instr x = do
4620 (src, code) <- getSomeReg x
4621 let code' dst = code `snocOL` instr dst src
4622 return (Any rep code')
4624 -- There is no "remainder" instruction on the PPC, so we have to do
4626 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
4628 remainderCode :: Width -> (Reg -> Reg -> Reg -> Instr)
4629 -> CmmExpr -> CmmExpr -> NatM Register
4630 remainderCode rep div x y = do
4631 (src1, code1) <- getSomeReg x
4632 (src2, code2) <- getSomeReg y
4633 let code dst = code1 `appOL` code2 `appOL` toOL [
4635 MULLW dst dst (RIReg src2),
4638 return (Any (intSize rep) code)
4640 #endif /* powerpc_TARGET_ARCH */
4643 -- -----------------------------------------------------------------------------
4644 -- Coercing to/from integer/floating-point...
4646 -- When going to integer, we truncate (round towards 0).
4648 -- @coerce(Int2FP|FP2Int)@ are more complicated integer/float
4649 -- conversions. We have to store temporaries in memory to move
4650 -- between the integer and the floating point register sets.
4652 -- @coerceDbl2Flt@ and @coerceFlt2Dbl@ are done this way because we
4653 -- pretend, on sparc at least, that double and float regs are seperate
4654 -- kinds, so the value has to be computed into one kind before being
4655 -- explicitly "converted" to live in the other kind.
4657 coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
4658 coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
4660 #if sparc_TARGET_ARCH
4661 coerceDbl2Flt :: CmmExpr -> NatM Register
4662 coerceFlt2Dbl :: CmmExpr -> NatM Register
4665 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4667 #if alpha_TARGET_ARCH
4670 = getRegister x `thenNat` \ register ->
4671 getNewRegNat IntRep `thenNat` \ reg ->
4673 code = registerCode register reg
4674 src = registerName register reg
4676 code__2 dst = code . mkSeqInstrs [
4678 LD TF dst (spRel 0),
4681 return (Any FF64 code__2)
4685 = getRegister x `thenNat` \ register ->
4686 getNewRegNat FF64 `thenNat` \ tmp ->
4688 code = registerCode register tmp
4689 src = registerName register tmp
4691 code__2 dst = code . mkSeqInstrs [
4693 ST TF tmp (spRel 0),
4696 return (Any IntRep code__2)
4698 #endif /* alpha_TARGET_ARCH */
4700 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4702 #if i386_TARGET_ARCH
4704 coerceInt2FP from to x = do
4705 (x_reg, x_code) <- getSomeReg x
4707 opc = case to of W32 -> GITOF; W64 -> GITOD
4708 code dst = x_code `snocOL` opc x_reg dst
4709 -- ToDo: works for non-II32 reps?
4710 return (Any (floatSize to) code)
4714 coerceFP2Int from to x = do
4715 (x_reg, x_code) <- getSomeReg x
4717 opc = case from of W32 -> GFTOI; W64 -> GDTOI
4718 code dst = x_code `snocOL` opc x_reg dst
4719 -- ToDo: works for non-II32 reps?
4721 return (Any (intSize to) code)
4723 #endif /* i386_TARGET_ARCH */
4725 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4727 #if x86_64_TARGET_ARCH
4729 coerceFP2Int from to x = do
4730 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
4732 opc = case from of W32 -> CVTTSS2SIQ; W64 -> CVTTSD2SIQ
4733 code dst = x_code `snocOL` opc x_op dst
4735 return (Any (intSize to) code) -- works even if the destination rep is <II32
4737 coerceInt2FP from to x = do
4738 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
4740 opc = case to of W32 -> CVTSI2SS; W64 -> CVTSI2SD
4741 code dst = x_code `snocOL` opc x_op dst
4743 return (Any (floatSize to) code) -- works even if the destination rep is <II32
4745 coerceFP2FP :: Width -> CmmExpr -> NatM Register
4746 coerceFP2FP to x = do
4747 (x_reg, x_code) <- getSomeReg x
4749 opc = case to of W32 -> CVTSD2SS; W64 -> CVTSS2SD
4750 code dst = x_code `snocOL` opc x_reg dst
4752 return (Any (floatSize to) code)
4755 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4757 #if sparc_TARGET_ARCH
4759 coerceInt2FP width1 width2 x = do
4760 (src, code) <- getSomeReg x
4762 code__2 dst = code `appOL` toOL [
4763 ST (intSize width1) src (spRel (-2)),
4764 LD (intSize width1) (spRel (-2)) dst,
4765 FxTOy (intSize width1) (floatSize width1) dst dst]
4766 return (Any (floatSize $ width2) code__2)
4769 coerceFP2Int width1 width2 x = do
4770 let pk = intSize width1
4771 fprep = floatSize width2
4773 (src, code) <- getSomeReg x
4774 reg <- getNewRegNat fprep
4775 tmp <- getNewRegNat pk
4777 code__2 dst = ASSERT(fprep == FF64 || fprep == FF32)
4779 FxTOy fprep pk src tmp,
4780 ST pk tmp (spRel (-2)),
4781 LD pk (spRel (-2)) dst]
4782 return (Any pk code__2)
4785 coerceDbl2Flt x = do
4786 (src, code) <- getSomeReg x
4787 return (Any FF32 (\dst -> code `snocOL` FxTOy FF64 FF32 src dst))
4790 coerceFlt2Dbl x = do
4791 (src, code) <- getSomeReg x
4792 return (Any FF64 (\dst -> code `snocOL` FxTOy FF32 FF64 src dst))
4794 #endif /* sparc_TARGET_ARCH */
4796 #if powerpc_TARGET_ARCH
4797 coerceInt2FP fromRep toRep x = do
4798 (src, code) <- getSomeReg x
4799 lbl <- getNewLabelNat
4800 itmp <- getNewRegNat II32
4801 ftmp <- getNewRegNat FF64
4802 dflags <- getDynFlagsNat
4803 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
4804 Amode addr addr_code <- getAmode dynRef
4806 code' dst = code `appOL` maybe_exts `appOL` toOL [
4809 CmmStaticLit (CmmInt 0x43300000 W32),
4810 CmmStaticLit (CmmInt 0x80000000 W32)],
4811 XORIS itmp src (ImmInt 0x8000),
4812 ST II32 itmp (spRel 3),
4813 LIS itmp (ImmInt 0x4330),
4814 ST II32 itmp (spRel 2),
4815 LD FF64 ftmp (spRel 2)
4816 ] `appOL` addr_code `appOL` toOL [
4818 FSUB FF64 dst ftmp dst
4819 ] `appOL` maybe_frsp dst
4821 maybe_exts = case fromRep of
4822 W8 -> unitOL $ EXTS II8 src src
4823 W16 -> unitOL $ EXTS II16 src src
4825 maybe_frsp dst = case toRep of
4826 W32 -> unitOL $ FRSP dst dst
4828 return (Any (floatSize toRep) code')
4830 coerceFP2Int fromRep toRep x = do
4831 -- the reps don't really matter: F*->FF64 and II32->I* are no-ops
4832 (src, code) <- getSomeReg x
4833 tmp <- getNewRegNat FF64
4835 code' dst = code `appOL` toOL [
4836 -- convert to int in FP reg
4838 -- store value (64bit) from FP to stack
4839 ST FF64 tmp (spRel 2),
4840 -- read low word of value (high word is undefined)
4841 LD II32 dst (spRel 3)]
4842 return (Any (intSize toRep) code')
4843 #endif /* powerpc_TARGET_ARCH */
4846 -- -----------------------------------------------------------------------------
4847 -- eXTRA_STK_ARGS_HERE
4849 -- We (allegedly) put the first six C-call arguments in registers;
4850 -- where do we start putting the rest of them?
4852 -- Moved from MachInstrs (SDM):
4854 #if alpha_TARGET_ARCH || sparc_TARGET_ARCH
4855 eXTRA_STK_ARGS_HERE :: Int
4857 = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???))