2 -- The above warning supression flag is a temporary kludge.
3 -- While working on this module you are encouraged to remove it and fix
4 -- any warnings in the module. See
5 -- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
8 -----------------------------------------------------------------------------
10 -- Generating machine code (instruction selection)
12 -- (c) The University of Glasgow 1996-2004
14 -----------------------------------------------------------------------------
16 -- This is a big module, but, if you pay attention to
17 -- (a) the sectioning, (b) the type signatures, and
18 -- (c) the #if blah_TARGET_ARCH} things, the
19 -- structure should not be too overwhelming.
21 module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where
23 #include "HsVersions.h"
24 #include "nativeGen/NCG.h"
31 import PositionIndependentCode
32 import RegAllocInfo ( mkBranchInstr, mkRegRegMoveInstr )
36 -- Our intermediate code:
38 import PprCmm ( pprExpr )
41 import ClosureInfo ( C_SRT(..) )
44 import StaticFlags ( opt_PIC )
45 import ForeignCall ( CCallConv(..) )
48 import qualified Outputable as O
51 import FastBool ( isFastTrue )
52 import Constants ( wORD_SIZE )
54 import Debug.Trace ( trace )
56 import Control.Monad ( mapAndUnzipM )
57 import Data.Maybe ( fromJust )
63 -- -----------------------------------------------------------------------------
64 -- Top-level of the instruction selector
66 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
67 -- They are really trees of insns to facilitate fast appending, where a
68 -- left-to-right traversal (pre-order?) yields the insns in the correct
71 type InstrBlock = OrdList Instr
73 cmmTopCodeGen :: RawCmmTop -> NatM [NatCmmTop]
74 cmmTopCodeGen (CmmProc info lab params (ListGraph blocks)) = do
75 (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
76 picBaseMb <- getPicBaseMaybeNat
77 let proc = CmmProc info lab params (ListGraph $ concat nat_blocks)
78 tops = proc : concat statics
80 Just picBase -> initializePicBase picBase tops
81 Nothing -> return tops
83 cmmTopCodeGen (CmmData sec dat) = do
84 return [CmmData sec dat] -- no translation, we just use CmmStatic
86 basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop])
87 basicBlockCodeGen (BasicBlock id stmts) = do
88 instrs <- stmtsToInstrs stmts
89 -- code generation may introduce new basic block boundaries, which
90 -- are indicated by the NEWBLOCK instruction. We must split up the
91 -- instruction stream into basic blocks again. Also, we extract
94 (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
96 mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
97 = ([], BasicBlock id instrs : blocks, statics)
98 mkBlocks (LDATA sec dat) (instrs,blocks,statics)
99 = (instrs, blocks, CmmData sec dat:statics)
100 mkBlocks instr (instrs,blocks,statics)
101 = (instr:instrs, blocks, statics)
103 return (BasicBlock id top : other_blocks, statics)
105 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
107 = do instrss <- mapM stmtToInstrs stmts
108 return (concatOL instrss)
110 stmtToInstrs :: CmmStmt -> NatM InstrBlock
111 stmtToInstrs stmt = case stmt of
112 CmmNop -> return nilOL
113 CmmComment s -> return (unitOL (COMMENT s))
116 | isFloatType ty -> assignReg_FltCode size reg src
117 #if WORD_SIZE_IN_BITS==32
118 | isWord64 ty -> assignReg_I64Code reg src
120 | otherwise -> assignReg_IntCode size reg src
121 where ty = cmmRegType reg
122 size = cmmTypeSize ty
125 | isFloatType ty -> assignMem_FltCode size addr src
126 #if WORD_SIZE_IN_BITS==32
127 | isWord64 ty -> assignMem_I64Code addr src
129 | otherwise -> assignMem_IntCode size addr src
130 where ty = cmmExprType src
131 size = cmmTypeSize ty
133 CmmCall target result_regs args _ _
134 -> genCCall target result_regs args
136 CmmBranch id -> genBranch id
137 CmmCondBranch arg id -> genCondJump id arg
138 CmmSwitch arg ids -> genSwitch arg ids
139 CmmJump arg params -> genJump arg
141 panic "stmtToInstrs: return statement should have been cps'd away"
143 -- -----------------------------------------------------------------------------
144 -- General things for putting together code sequences
146 -- Expand CmmRegOff. ToDo: should we do it this way around, or convert
147 -- CmmExprs into CmmRegOff?
148 mangleIndexTree :: CmmExpr -> CmmExpr
149 mangleIndexTree (CmmRegOff reg off)
150 = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
151 where width = typeWidth (cmmRegType reg)
153 -- -----------------------------------------------------------------------------
154 -- Code gen for 64-bit arithmetic on 32-bit platforms
157 Simple support for generating 64-bit code (ie, 64 bit values and 64
158 bit assignments) on 32-bit platforms. Unlike the main code generator
159 we merely shoot for generating working code as simply as possible, and
160 pay little attention to code quality. Specifically, there is no
161 attempt to deal cleverly with the fixed-vs-floating register
162 distinction; all values are generated into (pairs of) floating
163 registers, even if this would mean some redundant reg-reg moves as a
164 result. Only one of the VRegUniques is returned, since it will be
165 of the VRegUniqueLo form, and the upper-half VReg can be determined
166 by applying getHiVRegFromLo to it.
169 data ChildCode64 -- a.k.a "Register64"
172 Reg -- the lower 32-bit temporary which contains the
173 -- result; use getHiVRegFromLo to find the other
174 -- VRegUnique. Rules of this simplified insn
175 -- selection game are therefore that the returned
176 -- Reg may be modified
178 #if WORD_SIZE_IN_BITS==32
179 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
180 assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
183 #ifndef x86_64_TARGET_ARCH
184 iselExpr64 :: CmmExpr -> NatM ChildCode64
187 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
191 assignMem_I64Code addrTree valueTree = do
192 Amode addr addr_code <- getAmode addrTree
193 ChildCode64 vcode rlo <- iselExpr64 valueTree
195 rhi = getHiVRegFromLo rlo
197 -- Little-endian store
198 mov_lo = MOV II32 (OpReg rlo) (OpAddr addr)
199 mov_hi = MOV II32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
201 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
204 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
205 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
207 r_dst_lo = mkVReg u_dst II32
208 r_dst_hi = getHiVRegFromLo r_dst_lo
209 r_src_hi = getHiVRegFromLo r_src_lo
210 mov_lo = MOV II32 (OpReg r_src_lo) (OpReg r_dst_lo)
211 mov_hi = MOV II32 (OpReg r_src_hi) (OpReg r_dst_hi)
214 vcode `snocOL` mov_lo `snocOL` mov_hi
217 assignReg_I64Code lvalue valueTree
218 = panic "assignReg_I64Code(i386): invalid lvalue"
222 iselExpr64 (CmmLit (CmmInt i _)) = do
223 (rlo,rhi) <- getNewRegPairNat II32
225 r = fromIntegral (fromIntegral i :: Word32)
226 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
228 MOV II32 (OpImm (ImmInteger r)) (OpReg rlo),
229 MOV II32 (OpImm (ImmInteger q)) (OpReg rhi)
232 return (ChildCode64 code rlo)
234 iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
235 Amode addr addr_code <- getAmode addrTree
236 (rlo,rhi) <- getNewRegPairNat II32
238 mov_lo = MOV II32 (OpAddr addr) (OpReg rlo)
239 mov_hi = MOV II32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
242 ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
246 iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
247 = return (ChildCode64 nilOL (mkVReg vu II32))
249 -- we handle addition, but rather badly
250 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
251 ChildCode64 code1 r1lo <- iselExpr64 e1
252 (rlo,rhi) <- getNewRegPairNat II32
254 r = fromIntegral (fromIntegral i :: Word32)
255 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
256 r1hi = getHiVRegFromLo r1lo
258 toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
259 ADD II32 (OpImm (ImmInteger r)) (OpReg rlo),
260 MOV II32 (OpReg r1hi) (OpReg rhi),
261 ADC II32 (OpImm (ImmInteger q)) (OpReg rhi) ]
263 return (ChildCode64 code rlo)
265 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
266 ChildCode64 code1 r1lo <- iselExpr64 e1
267 ChildCode64 code2 r2lo <- iselExpr64 e2
268 (rlo,rhi) <- getNewRegPairNat II32
270 r1hi = getHiVRegFromLo r1lo
271 r2hi = getHiVRegFromLo r2lo
274 toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
275 ADD II32 (OpReg r2lo) (OpReg rlo),
276 MOV II32 (OpReg r1hi) (OpReg rhi),
277 ADC II32 (OpReg r2hi) (OpReg rhi) ]
279 return (ChildCode64 code rlo)
281 iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do
283 r_dst_lo <- getNewRegNat II32
284 let r_dst_hi = getHiVRegFromLo r_dst_lo
287 ChildCode64 (code `snocOL`
288 MOV II32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
293 = pprPanic "iselExpr64(i386)" (ppr expr)
295 #endif /* i386_TARGET_ARCH */
297 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
299 #if sparc_TARGET_ARCH
301 assignMem_I64Code addrTree valueTree = do
302 Amode addr addr_code <- getAmode addrTree
303 ChildCode64 vcode rlo <- iselExpr64 valueTree
304 (src, code) <- getSomeReg addrTree
306 rhi = getHiVRegFromLo rlo
308 mov_hi = ST II32 rhi (AddrRegImm src (ImmInt 0))
309 mov_lo = ST II32 rlo (AddrRegImm src (ImmInt 4))
310 return (vcode `appOL` code `snocOL` mov_hi `snocOL` mov_lo)
312 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
313 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
315 r_dst_lo = mkVReg u_dst (cmmTypeSize pk)
316 r_dst_hi = getHiVRegFromLo r_dst_lo
317 r_src_hi = getHiVRegFromLo r_src_lo
318 mov_lo = mkMOV r_src_lo r_dst_lo
319 mov_hi = mkMOV r_src_hi r_dst_hi
320 mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
321 return (vcode `snocOL` mov_hi `snocOL` mov_lo)
322 assignReg_I64Code lvalue valueTree
323 = panic "assignReg_I64Code(sparc): invalid lvalue"
326 -- Load a 64 bit word
327 iselExpr64 (CmmLoad addrTree ty)
329 = do Amode amode addr_code <- getAmode addrTree
332 | AddrRegReg r1 r2 <- amode
333 = do rlo <- getNewRegNat II32
334 tmp <- getNewRegNat II32
335 let rhi = getHiVRegFromLo rlo
340 [ ADD False False r1 (RIReg r2) tmp
341 , LD II32 (AddrRegImm tmp (ImmInt 0)) rhi
342 , LD II32 (AddrRegImm tmp (ImmInt 4)) rlo ])
345 | AddrRegImm r1 (ImmInt i) <- amode
346 = do rlo <- getNewRegNat II32
347 let rhi = getHiVRegFromLo rlo
352 [ LD II32 (AddrRegImm r1 (ImmInt $ 0 + i)) rhi
353 , LD II32 (AddrRegImm r1 (ImmInt $ 4 + i)) rlo ])
359 -- Add a literal to a 64 bit integer
360 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)])
361 = do ChildCode64 code1 r1_lo <- iselExpr64 e1
362 let r1_hi = getHiVRegFromLo r1_lo
364 r_dst_lo <- getNewRegNat II32
365 let r_dst_hi = getHiVRegFromLo r_dst_lo
369 [ ADD False False r1_lo (RIImm (ImmInteger i)) r_dst_lo
370 , ADD True False r1_hi (RIReg g0) r_dst_hi ])
374 iselExpr64 (CmmReg (CmmLocal (LocalReg uq ty))) | isWord64 ty = do
375 r_dst_lo <- getNewRegNat II32
376 let r_dst_hi = getHiVRegFromLo r_dst_lo
377 r_src_lo = mkVReg uq II32
378 r_src_hi = getHiVRegFromLo r_src_lo
379 mov_lo = mkMOV r_src_lo r_dst_lo
380 mov_hi = mkMOV r_src_hi r_dst_hi
381 mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
383 ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo
387 = pprPanic "iselExpr64(sparc)" (ppr expr)
389 #endif /* sparc_TARGET_ARCH */
391 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
393 #if powerpc_TARGET_ARCH
395 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
396 getI64Amodes addrTree = do
397 Amode hi_addr addr_code <- getAmode addrTree
398 case addrOffset hi_addr 4 of
399 Just lo_addr -> return (hi_addr, lo_addr, addr_code)
400 Nothing -> do (hi_ptr, code) <- getSomeReg addrTree
401 return (AddrRegImm hi_ptr (ImmInt 0),
402 AddrRegImm hi_ptr (ImmInt 4),
405 assignMem_I64Code addrTree valueTree = do
406 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
407 ChildCode64 vcode rlo <- iselExpr64 valueTree
409 rhi = getHiVRegFromLo rlo
412 mov_hi = ST II32 rhi hi_addr
413 mov_lo = ST II32 rlo lo_addr
415 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
417 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
418 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
420 r_dst_lo = mkVReg u_dst II32
421 r_dst_hi = getHiVRegFromLo r_dst_lo
422 r_src_hi = getHiVRegFromLo r_src_lo
423 mov_lo = MR r_dst_lo r_src_lo
424 mov_hi = MR r_dst_hi r_src_hi
427 vcode `snocOL` mov_lo `snocOL` mov_hi
430 assignReg_I64Code lvalue valueTree
431 = panic "assignReg_I64Code(powerpc): invalid lvalue"
434 -- Don't delete this -- it's very handy for debugging.
436 -- | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
437 -- = panic "iselExpr64(???)"
439 iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
440 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
441 (rlo, rhi) <- getNewRegPairNat II32
442 let mov_hi = LD II32 rhi hi_addr
443 mov_lo = LD II32 rlo lo_addr
444 return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
447 iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
448 = return (ChildCode64 nilOL (mkVReg vu II32))
450 iselExpr64 (CmmLit (CmmInt i _)) = do
451 (rlo,rhi) <- getNewRegPairNat II32
453 half0 = fromIntegral (fromIntegral i :: Word16)
454 half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
455 half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
456 half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
459 LIS rlo (ImmInt half1),
460 OR rlo rlo (RIImm $ ImmInt half0),
461 LIS rhi (ImmInt half3),
462 OR rlo rlo (RIImm $ ImmInt half2)
465 return (ChildCode64 code rlo)
467 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
468 ChildCode64 code1 r1lo <- iselExpr64 e1
469 ChildCode64 code2 r2lo <- iselExpr64 e2
470 (rlo,rhi) <- getNewRegPairNat II32
472 r1hi = getHiVRegFromLo r1lo
473 r2hi = getHiVRegFromLo r2lo
476 toOL [ ADDC rlo r1lo r2lo,
479 return (ChildCode64 code rlo)
481 iselExpr64 (CmmMachOp (MO_UU_Conv W32 W64) [expr]) = do
482 (expr_reg,expr_code) <- getSomeReg expr
483 (rlo, rhi) <- getNewRegPairNat II32
484 let mov_hi = LI rhi (ImmInt 0)
485 mov_lo = MR rlo expr_reg
486 return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
489 = pprPanic "iselExpr64(powerpc)" (ppr expr)
491 #endif /* powerpc_TARGET_ARCH */
494 -- -----------------------------------------------------------------------------
495 -- The 'Register' type
497 -- 'Register's passed up the tree. If the stix code forces the register
498 -- to live in a pre-decided machine register, it comes out as @Fixed@;
499 -- otherwise, it comes out as @Any@, and the parent can decide which
500 -- register to put it in.
503 = Fixed Size Reg InstrBlock
504 | Any Size (Reg -> InstrBlock)
506 swizzleRegisterRep :: Register -> Size -> Register
507 -- Change the width; it's a no-op
508 swizzleRegisterRep (Fixed _ reg code) size = Fixed size reg code
509 swizzleRegisterRep (Any _ codefn) size = Any size codefn
512 -- -----------------------------------------------------------------------------
513 -- Utils based on getRegister, below
515 -- The dual to getAnyReg: compute an expression into a register, but
516 -- we don't mind which one it is.
517 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
519 r <- getRegister expr
522 tmp <- getNewRegNat rep
523 return (tmp, code tmp)
527 -- -----------------------------------------------------------------------------
528 -- Grab the Reg for a CmmReg
530 getRegisterReg :: CmmReg -> Reg
532 getRegisterReg (CmmLocal (LocalReg u pk))
533 = mkVReg u (cmmTypeSize pk)
535 getRegisterReg (CmmGlobal mid)
536 = case get_GlobalReg_reg_or_addr mid of
537 Left (RealReg rrno) -> RealReg rrno
538 _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
539 -- By this stage, the only MagicIds remaining should be the
540 -- ones which map to a real machine register on this
541 -- platform. Hence ...
544 -- -----------------------------------------------------------------------------
545 -- Generate code to get a subtree into a Register
547 -- Don't delete this -- it's very handy for debugging.
549 -- | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False
550 -- = panic "getRegister(???)"
552 getRegister :: CmmExpr -> NatM Register
554 #if !x86_64_TARGET_ARCH
555 -- on x86_64, we have %rip for PicBaseReg, but it's not a full-featured
556 -- register, it can only be used for rip-relative addressing.
557 getRegister (CmmReg (CmmGlobal PicBaseReg))
559 reg <- getPicBaseNat wordSize
560 return (Fixed wordSize reg nilOL)
563 getRegister (CmmReg reg)
564 = return (Fixed (cmmTypeSize (cmmRegType reg))
565 (getRegisterReg reg) nilOL)
567 getRegister tree@(CmmRegOff _ _)
568 = getRegister (mangleIndexTree tree)
571 #if WORD_SIZE_IN_BITS==32
572 -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
573 -- TO_W_(x), TO_W_(x >> 32)
575 getRegister (CmmMachOp (MO_UU_Conv W64 W32)
576 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
577 ChildCode64 code rlo <- iselExpr64 x
578 return $ Fixed II32 (getHiVRegFromLo rlo) code
580 getRegister (CmmMachOp (MO_SS_Conv W64 W32)
581 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
582 ChildCode64 code rlo <- iselExpr64 x
583 return $ Fixed II32 (getHiVRegFromLo rlo) code
585 getRegister (CmmMachOp (MO_UU_Conv W64 W32) [x]) = do
586 ChildCode64 code rlo <- iselExpr64 x
587 return $ Fixed II32 rlo code
589 getRegister (CmmMachOp (MO_SS_Conv W64 W32) [x]) = do
590 ChildCode64 code rlo <- iselExpr64 x
591 return $ Fixed II32 rlo code
595 -- end of machine-"independent" bit; here we go on the rest...
597 #if alpha_TARGET_ARCH
599 getRegister (StDouble d)
600 = getBlockIdNat `thenNat` \ lbl ->
601 getNewRegNat PtrRep `thenNat` \ tmp ->
602 let code dst = mkSeqInstrs [
603 LDATA RoDataSegment lbl [
604 DATA TF [ImmLab (rational d)]
606 LDA tmp (AddrImm (ImmCLbl lbl)),
607 LD TF dst (AddrReg tmp)]
609 return (Any FF64 code)
611 getRegister (StPrim primop [x]) -- unary PrimOps
613 IntNegOp -> trivialUCode (NEG Q False) x
615 NotOp -> trivialUCode NOT x
617 FloatNegOp -> trivialUFCode FloatRep (FNEG TF) x
618 DoubleNegOp -> trivialUFCode FF64 (FNEG TF) x
620 OrdOp -> coerceIntCode IntRep x
623 Float2IntOp -> coerceFP2Int x
624 Int2FloatOp -> coerceInt2FP pr x
625 Double2IntOp -> coerceFP2Int x
626 Int2DoubleOp -> coerceInt2FP pr x
628 Double2FloatOp -> coerceFltCode x
629 Float2DoubleOp -> coerceFltCode x
631 other_op -> getRegister (StCall fn CCallConv FF64 [x])
633 fn = case other_op of
634 FloatExpOp -> fsLit "exp"
635 FloatLogOp -> fsLit "log"
636 FloatSqrtOp -> fsLit "sqrt"
637 FloatSinOp -> fsLit "sin"
638 FloatCosOp -> fsLit "cos"
639 FloatTanOp -> fsLit "tan"
640 FloatAsinOp -> fsLit "asin"
641 FloatAcosOp -> fsLit "acos"
642 FloatAtanOp -> fsLit "atan"
643 FloatSinhOp -> fsLit "sinh"
644 FloatCoshOp -> fsLit "cosh"
645 FloatTanhOp -> fsLit "tanh"
646 DoubleExpOp -> fsLit "exp"
647 DoubleLogOp -> fsLit "log"
648 DoubleSqrtOp -> fsLit "sqrt"
649 DoubleSinOp -> fsLit "sin"
650 DoubleCosOp -> fsLit "cos"
651 DoubleTanOp -> fsLit "tan"
652 DoubleAsinOp -> fsLit "asin"
653 DoubleAcosOp -> fsLit "acos"
654 DoubleAtanOp -> fsLit "atan"
655 DoubleSinhOp -> fsLit "sinh"
656 DoubleCoshOp -> fsLit "cosh"
657 DoubleTanhOp -> fsLit "tanh"
659 pr = panic "MachCode.getRegister: no primrep needed for Alpha"
661 getRegister (StPrim primop [x, y]) -- dyadic PrimOps
663 CharGtOp -> trivialCode (CMP LTT) y x
664 CharGeOp -> trivialCode (CMP LE) y x
665 CharEqOp -> trivialCode (CMP EQQ) x y
666 CharNeOp -> int_NE_code x y
667 CharLtOp -> trivialCode (CMP LTT) x y
668 CharLeOp -> trivialCode (CMP LE) x y
670 IntGtOp -> trivialCode (CMP LTT) y x
671 IntGeOp -> trivialCode (CMP LE) y x
672 IntEqOp -> trivialCode (CMP EQQ) x y
673 IntNeOp -> int_NE_code x y
674 IntLtOp -> trivialCode (CMP LTT) x y
675 IntLeOp -> trivialCode (CMP LE) x y
677 WordGtOp -> trivialCode (CMP ULT) y x
678 WordGeOp -> trivialCode (CMP ULE) x y
679 WordEqOp -> trivialCode (CMP EQQ) x y
680 WordNeOp -> int_NE_code x y
681 WordLtOp -> trivialCode (CMP ULT) x y
682 WordLeOp -> trivialCode (CMP ULE) x y
684 AddrGtOp -> trivialCode (CMP ULT) y x
685 AddrGeOp -> trivialCode (CMP ULE) y x
686 AddrEqOp -> trivialCode (CMP EQQ) x y
687 AddrNeOp -> int_NE_code x y
688 AddrLtOp -> trivialCode (CMP ULT) x y
689 AddrLeOp -> trivialCode (CMP ULE) x y
691 FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
692 FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
693 FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
694 FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
695 FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
696 FloatLeOp -> cmpF_code (FCMP TF LE) NE x y
698 DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
699 DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
700 DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
701 DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
702 DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
703 DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y
705 IntAddOp -> trivialCode (ADD Q False) x y
706 IntSubOp -> trivialCode (SUB Q False) x y
707 IntMulOp -> trivialCode (MUL Q False) x y
708 IntQuotOp -> trivialCode (DIV Q False) x y
709 IntRemOp -> trivialCode (REM Q False) x y
711 WordAddOp -> trivialCode (ADD Q False) x y
712 WordSubOp -> trivialCode (SUB Q False) x y
713 WordMulOp -> trivialCode (MUL Q False) x y
714 WordQuotOp -> trivialCode (DIV Q True) x y
715 WordRemOp -> trivialCode (REM Q True) x y
717 FloatAddOp -> trivialFCode W32 (FADD TF) x y
718 FloatSubOp -> trivialFCode W32 (FSUB TF) x y
719 FloatMulOp -> trivialFCode W32 (FMUL TF) x y
720 FloatDivOp -> trivialFCode W32 (FDIV TF) x y
722 DoubleAddOp -> trivialFCode W64 (FADD TF) x y
723 DoubleSubOp -> trivialFCode W64 (FSUB TF) x y
724 DoubleMulOp -> trivialFCode W64 (FMUL TF) x y
725 DoubleDivOp -> trivialFCode W64 (FDIV TF) x y
727 AddrAddOp -> trivialCode (ADD Q False) x y
728 AddrSubOp -> trivialCode (SUB Q False) x y
729 AddrRemOp -> trivialCode (REM Q True) x y
731 AndOp -> trivialCode AND x y
732 OrOp -> trivialCode OR x y
733 XorOp -> trivialCode XOR x y
734 SllOp -> trivialCode SLL x y
735 SrlOp -> trivialCode SRL x y
737 ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
738 ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
739 ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"
741 FloatPowerOp -> getRegister (StCall (fsLit "pow") CCallConv FF64 [x,y])
742 DoublePowerOp -> getRegister (StCall (fsLit "pow") CCallConv FF64 [x,y])
744 {- ------------------------------------------------------------
745 Some bizarre special code for getting condition codes into
746 registers. Integer non-equality is a test for equality
747 followed by an XOR with 1. (Integer comparisons always set
748 the result register to 0 or 1.) Floating point comparisons of
749 any kind leave the result in a floating point register, so we
750 need to wrangle an integer register out of things.
752 int_NE_code :: StixTree -> StixTree -> NatM Register
755 = trivialCode (CMP EQQ) x y `thenNat` \ register ->
756 getNewRegNat IntRep `thenNat` \ tmp ->
758 code = registerCode register tmp
759 src = registerName register tmp
760 code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
762 return (Any IntRep code__2)
764 {- ------------------------------------------------------------
765 Comments for int_NE_code also apply to cmpF_code
768 :: (Reg -> Reg -> Reg -> Instr)
770 -> StixTree -> StixTree
773 cmpF_code instr cond x y
774 = trivialFCode pr instr x y `thenNat` \ register ->
775 getNewRegNat FF64 `thenNat` \ tmp ->
776 getBlockIdNat `thenNat` \ lbl ->
778 code = registerCode register tmp
779 result = registerName register tmp
781 code__2 dst = code . mkSeqInstrs [
782 OR zeroh (RIImm (ImmInt 1)) dst,
783 BF cond result (ImmCLbl lbl),
784 OR zeroh (RIReg zeroh) dst,
787 return (Any IntRep code__2)
789 pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
790 ------------------------------------------------------------
792 getRegister (CmmLoad pk mem)
793 = getAmode mem `thenNat` \ amode ->
795 code = amodeCode amode
796 src = amodeAddr amode
797 size = primRepToSize pk
798 code__2 dst = code . mkSeqInstr (LD size dst src)
800 return (Any pk code__2)
802 getRegister (StInt i)
805 code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
807 return (Any IntRep code)
810 code dst = mkSeqInstr (LDI Q dst src)
812 return (Any IntRep code)
814 src = ImmInt (fromInteger i)
819 code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
821 return (Any PtrRep code)
824 imm__2 = case imm of Just x -> x
826 #endif /* alpha_TARGET_ARCH */
828 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
832 getRegister (CmmLit (CmmFloat f W32)) = do
833 lbl <- getNewLabelNat
834 dflags <- getDynFlagsNat
835 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
836 Amode addr addr_code <- getAmode dynRef
840 CmmStaticLit (CmmFloat f W32)]
841 `consOL` (addr_code `snocOL`
844 return (Any FF32 code)
847 getRegister (CmmLit (CmmFloat d W64))
849 = let code dst = unitOL (GLDZ dst)
850 in return (Any FF64 code)
853 = let code dst = unitOL (GLD1 dst)
854 in return (Any FF64 code)
857 lbl <- getNewLabelNat
858 dflags <- getDynFlagsNat
859 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
860 Amode addr addr_code <- getAmode dynRef
864 CmmStaticLit (CmmFloat d W64)]
865 `consOL` (addr_code `snocOL`
868 return (Any FF64 code)
870 #endif /* i386_TARGET_ARCH */
872 #if x86_64_TARGET_ARCH
874 getRegister (CmmLit (CmmFloat 0.0 w)) = do
875 let size = floatSize w
876 code dst = unitOL (XOR size (OpReg dst) (OpReg dst))
877 -- I don't know why there are xorpd, xorps, and pxor instructions.
878 -- They all appear to do the same thing --SDM
879 return (Any size code)
881 getRegister (CmmLit (CmmFloat f w)) = do
882 lbl <- getNewLabelNat
883 let code dst = toOL [
886 CmmStaticLit (CmmFloat f w)],
887 MOV size (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
890 return (Any size code)
891 where size = floatSize w
893 #endif /* x86_64_TARGET_ARCH */
895 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
897 -- catch simple cases of zero- or sign-extended load
898 getRegister (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _]) = do
899 code <- intLoadCode (MOVZxL II8) addr
900 return (Any II32 code)
902 getRegister (CmmMachOp (MO_SS_Conv W8 W32) [CmmLoad addr _]) = do
903 code <- intLoadCode (MOVSxL II8) addr
904 return (Any II32 code)
906 getRegister (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad addr _]) = do
907 code <- intLoadCode (MOVZxL II16) addr
908 return (Any II32 code)
910 getRegister (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad addr _]) = do
911 code <- intLoadCode (MOVSxL II16) addr
912 return (Any II32 code)
916 #if x86_64_TARGET_ARCH
918 -- catch simple cases of zero- or sign-extended load
919 getRegister (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad addr _]) = do
920 code <- intLoadCode (MOVZxL II8) addr
921 return (Any II64 code)
923 getRegister (CmmMachOp (MO_SS_Conv W8 W64) [CmmLoad addr _]) = do
924 code <- intLoadCode (MOVSxL II8) addr
925 return (Any II64 code)
927 getRegister (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad addr _]) = do
928 code <- intLoadCode (MOVZxL II16) addr
929 return (Any II64 code)
931 getRegister (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad addr _]) = do
932 code <- intLoadCode (MOVSxL II16) addr
933 return (Any II64 code)
935 getRegister (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad addr _]) = do
936 code <- intLoadCode (MOV II32) addr -- 32-bit loads zero-extend
937 return (Any II64 code)
939 getRegister (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad addr _]) = do
940 code <- intLoadCode (MOVSxL II32) addr
941 return (Any II64 code)
945 #if x86_64_TARGET_ARCH
946 getRegister (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
947 CmmLit displacement])
948 = return $ Any II64 (\dst -> unitOL $
949 LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
952 #if x86_64_TARGET_ARCH
953 getRegister (CmmMachOp (MO_F_Neg W32) [x]) = do
954 x_code <- getAnyReg x
955 lbl <- getNewLabelNat
957 code dst = x_code dst `appOL` toOL [
958 -- This is how gcc does it, so it can't be that bad:
959 LDATA ReadOnlyData16 [
962 CmmStaticLit (CmmInt 0x80000000 W32),
963 CmmStaticLit (CmmInt 0 W32),
964 CmmStaticLit (CmmInt 0 W32),
965 CmmStaticLit (CmmInt 0 W32)
967 XOR FF32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
968 -- xorps, so we need the 128-bit constant
969 -- ToDo: rip-relative
972 return (Any FF32 code)
974 getRegister (CmmMachOp (MO_F_Neg W64) [x]) = do
975 x_code <- getAnyReg x
976 lbl <- getNewLabelNat
978 -- This is how gcc does it, so it can't be that bad:
979 code dst = x_code dst `appOL` toOL [
980 LDATA ReadOnlyData16 [
983 CmmStaticLit (CmmInt 0x8000000000000000 W64),
984 CmmStaticLit (CmmInt 0 W64)
986 -- gcc puts an unpck here. Wonder if we need it.
987 XOR FF64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
988 -- xorpd, so we need the 128-bit constant
991 return (Any FF64 code)
994 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
996 getRegister (CmmMachOp mop [x]) -- unary MachOps
999 MO_F_Neg W32 -> trivialUFCode FF32 (GNEG FF32) x
1000 MO_F_Neg W64 -> trivialUFCode FF64 (GNEG FF64) x
1003 MO_S_Neg w -> triv_ucode NEGI (intSize w)
1004 MO_F_Neg w -> triv_ucode NEGI (floatSize w)
1005 MO_Not w -> triv_ucode NOT (intSize w)
1008 MO_UU_Conv W32 W8 -> toI8Reg W32 x
1009 MO_SS_Conv W32 W8 -> toI8Reg W32 x
1010 MO_UU_Conv W16 W8 -> toI8Reg W16 x
1011 MO_SS_Conv W16 W8 -> toI8Reg W16 x
1012 MO_UU_Conv W32 W16 -> toI16Reg W32 x
1013 MO_SS_Conv W32 W16 -> toI16Reg W32 x
1015 #if x86_64_TARGET_ARCH
1016 MO_UU_Conv W64 W32 -> conversionNop II64 x
1017 MO_SS_Conv W64 W32 -> conversionNop II64 x
1018 MO_UU_Conv W64 W16 -> toI16Reg W64 x
1019 MO_SS_Conv W64 W16 -> toI16Reg W64 x
1020 MO_UU_Conv W64 W8 -> toI8Reg W64 x
1021 MO_SS_Conv W64 W8 -> toI8Reg W64 x
1024 MO_UU_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
1025 MO_SS_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
1028 MO_UU_Conv W8 W32 -> integerExtend W8 W32 MOVZxL x
1029 MO_UU_Conv W16 W32 -> integerExtend W16 W32 MOVZxL x
1030 MO_UU_Conv W8 W16 -> integerExtend W8 W16 MOVZxL x
1032 MO_SS_Conv W8 W32 -> integerExtend W8 W32 MOVSxL x
1033 MO_SS_Conv W16 W32 -> integerExtend W16 W32 MOVSxL x
1034 MO_SS_Conv W8 W16 -> integerExtend W8 W16 MOVSxL x
1036 #if x86_64_TARGET_ARCH
1037 MO_UU_Conv W8 W64 -> integerExtend W8 W64 MOVZxL x
1038 MO_UU_Conv W16 W64 -> integerExtend W16 W64 MOVZxL x
1039 MO_UU_Conv W32 W64 -> integerExtend W32 W64 MOVZxL x
1040 MO_SS_Conv W8 W64 -> integerExtend W8 W64 MOVSxL x
1041 MO_SS_Conv W16 W64 -> integerExtend W16 W64 MOVSxL x
1042 MO_SS_Conv W32 W64 -> integerExtend W32 W64 MOVSxL x
1043 -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
1044 -- However, we don't want the register allocator to throw it
1045 -- away as an unnecessary reg-to-reg move, so we keep it in
1046 -- the form of a movzl and print it as a movl later.
1049 #if i386_TARGET_ARCH
1050 MO_FF_Conv W32 W64 -> conversionNop FF64 x
1051 MO_FF_Conv W64 W32 -> conversionNop FF32 x
1053 MO_FF_Conv W32 W64 -> coerceFP2FP W64 x
1054 MO_FF_Conv W64 W32 -> coerceFP2FP W32 x
1057 MO_FS_Conv from to -> coerceFP2Int from to x
1058 MO_SF_Conv from to -> coerceInt2FP from to x
1060 other -> pprPanic "getRegister" (pprMachOp mop)
1062 triv_ucode :: (Size -> Operand -> Instr) -> Size -> NatM Register
1063 triv_ucode instr size = trivialUCode size (instr size) x
1065 -- signed or unsigned extension.
1066 integerExtend :: Width -> Width
1067 -> (Size -> Operand -> Operand -> Instr)
1068 -> CmmExpr -> NatM Register
1069 integerExtend from to instr expr = do
1070 (reg,e_code) <- if from == W8 then getByteReg expr
1071 else getSomeReg expr
1075 instr (intSize from) (OpReg reg) (OpReg dst)
1076 return (Any (intSize to) code)
1078 toI8Reg :: Width -> CmmExpr -> NatM Register
1079 toI8Reg new_rep expr
1080 = do codefn <- getAnyReg expr
1081 return (Any (intSize new_rep) codefn)
1082 -- HACK: use getAnyReg to get a byte-addressable register.
1083 -- If the source was a Fixed register, this will add the
1084 -- mov instruction to put it into the desired destination.
1085 -- We're assuming that the destination won't be a fixed
1086 -- non-byte-addressable register; it won't be, because all
1087 -- fixed registers are word-sized.
1089 toI16Reg = toI8Reg -- for now
1091 conversionNop :: Size -> CmmExpr -> NatM Register
1092 conversionNop new_size expr
1093 = do e_code <- getRegister expr
1094 return (swizzleRegisterRep e_code new_size)
1097 getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
1099 MO_F_Eq w -> condFltReg EQQ x y
1100 MO_F_Ne w -> condFltReg NE x y
1101 MO_F_Gt w -> condFltReg GTT x y
1102 MO_F_Ge w -> condFltReg GE x y
1103 MO_F_Lt w -> condFltReg LTT x y
1104 MO_F_Le w -> condFltReg LE x y
1106 MO_Eq rep -> condIntReg EQQ x y
1107 MO_Ne rep -> condIntReg NE x y
1109 MO_S_Gt rep -> condIntReg GTT x y
1110 MO_S_Ge rep -> condIntReg GE x y
1111 MO_S_Lt rep -> condIntReg LTT x y
1112 MO_S_Le rep -> condIntReg LE x y
1114 MO_U_Gt rep -> condIntReg GU x y
1115 MO_U_Ge rep -> condIntReg GEU x y
1116 MO_U_Lt rep -> condIntReg LU x y
1117 MO_U_Le rep -> condIntReg LEU x y
1119 #if i386_TARGET_ARCH
1120 MO_F_Add w -> trivialFCode w GADD x y
1121 MO_F_Sub w -> trivialFCode w GSUB x y
1122 MO_F_Quot w -> trivialFCode w GDIV x y
1123 MO_F_Mul w -> trivialFCode w GMUL x y
1126 #if x86_64_TARGET_ARCH
1127 MO_F_Add w -> trivialFCode w ADD x y
1128 MO_F_Sub w -> trivialFCode w SUB x y
1129 MO_F_Quot w -> trivialFCode w FDIV x y
1130 MO_F_Mul w -> trivialFCode w MUL x y
1133 MO_Add rep -> add_code rep x y
1134 MO_Sub rep -> sub_code rep x y
1136 MO_S_Quot rep -> div_code rep True True x y
1137 MO_S_Rem rep -> div_code rep True False x y
1138 MO_U_Quot rep -> div_code rep False True x y
1139 MO_U_Rem rep -> div_code rep False False x y
1141 MO_S_MulMayOflo rep -> imulMayOflo rep x y
1143 MO_Mul rep -> triv_op rep IMUL
1144 MO_And rep -> triv_op rep AND
1145 MO_Or rep -> triv_op rep OR
1146 MO_Xor rep -> triv_op rep XOR
1148 {- Shift ops on x86s have constraints on their source, it
1149 either has to be Imm, CL or 1
1150 => trivialCode is not restrictive enough (sigh.)
1152 MO_Shl rep -> shift_code rep SHL x y {-False-}
1153 MO_U_Shr rep -> shift_code rep SHR x y {-False-}
1154 MO_S_Shr rep -> shift_code rep SAR x y {-False-}
1156 other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
1158 --------------------
1159 triv_op width instr = trivialCode width op (Just op) x y
1160 where op = instr (intSize width)
1162 imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
1163 imulMayOflo rep a b = do
1164 (a_reg, a_code) <- getNonClobberedReg a
1165 b_code <- getAnyReg b
1167 shift_amt = case rep of
1170 _ -> panic "shift_amt"
1173 code = a_code `appOL` b_code eax `appOL`
1175 IMUL2 size (OpReg a_reg), -- result in %edx:%eax
1176 SAR size (OpImm (ImmInt shift_amt)) (OpReg eax),
1177 -- sign extend lower part
1178 SUB size (OpReg edx) (OpReg eax)
1179 -- compare against upper
1180 -- eax==0 if high part == sign extended low part
1183 return (Fixed size eax code)
1185 --------------------
1187 -> (Size -> Operand -> Operand -> Instr)
1192 {- Case1: shift length as immediate -}
1193 shift_code width instr x y@(CmmLit lit) = do
1194 x_code <- getAnyReg x
1196 size = intSize width
1198 = x_code dst `snocOL`
1199 instr size (OpImm (litToImm lit)) (OpReg dst)
1201 return (Any size code)
1203 {- Case2: shift length is complex (non-immediate)
1204 * y must go in %ecx.
1205 * we cannot do y first *and* put its result in %ecx, because
1206 %ecx might be clobbered by x.
1207 * if we do y second, then x cannot be
1208 in a clobbered reg. Also, we cannot clobber x's reg
1209 with the instruction itself.
1211 - do y first, put its result in a fresh tmp, then copy it to %ecx later
1212 - do y second and put its result into %ecx. x gets placed in a fresh
1213 tmp. This is likely to be better, becuase the reg alloc can
1214 eliminate this reg->reg move here (it won't eliminate the other one,
1215 because the move is into the fixed %ecx).
1217 shift_code width instr x y{-amount-} = do
1218 x_code <- getAnyReg x
1219 let size = intSize width
1220 tmp <- getNewRegNat size
1221 y_code <- getAnyReg y
1223 code = x_code tmp `appOL`
1225 instr size (OpReg ecx) (OpReg tmp)
1227 return (Fixed size tmp code)
1229 --------------------
1230 add_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
1231 add_code rep x (CmmLit (CmmInt y _))
1232 | is32BitInteger y = add_int rep x y
1233 add_code rep x y = trivialCode rep (ADD size) (Just (ADD size)) x y
1234 where size = intSize rep
1236 --------------------
1237 sub_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
1238 sub_code rep x (CmmLit (CmmInt y _))
1239 | is32BitInteger (-y) = add_int rep x (-y)
1240 sub_code rep x y = trivialCode rep (SUB (intSize rep)) Nothing x y
1242 -- our three-operand add instruction:
1243 add_int width x y = do
1244 (x_reg, x_code) <- getSomeReg x
1246 size = intSize width
1247 imm = ImmInt (fromInteger y)
1251 (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
1254 return (Any size code)
1256 ----------------------
1257 div_code width signed quotient x y = do
1258 (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
1259 x_code <- getAnyReg x
1261 size = intSize width
1262 widen | signed = CLTD size
1263 | otherwise = XOR size (OpReg edx) (OpReg edx)
1265 instr | signed = IDIV
1268 code = y_code `appOL`
1270 toOL [widen, instr size y_op]
1272 result | quotient = eax
1276 return (Fixed size result code)
1279 getRegister (CmmLoad mem pk)
1282 Amode src mem_code <- getAmode mem
1284 size = cmmTypeSize pk
1285 code dst = mem_code `snocOL`
1286 IF_ARCH_i386(GLD size src dst,
1287 MOV size (OpAddr src) (OpReg dst))
1288 return (Any size code)
1290 #if i386_TARGET_ARCH
1291 getRegister (CmmLoad mem pk)
1294 code <- intLoadCode instr mem
1295 return (Any size code)
1297 width = typeWidth pk
1298 size = intSize width
1299 instr = case width of
1302 -- We always zero-extend 8-bit loads, if we
1303 -- can't think of anything better. This is because
1304 -- we can't guarantee access to an 8-bit variant of every register
1305 -- (esi and edi don't have 8-bit variants), so to make things
1306 -- simpler we do our 8-bit arithmetic with full 32-bit registers.
1309 #if x86_64_TARGET_ARCH
1310 -- Simpler memory load code on x86_64
1311 getRegister (CmmLoad mem pk)
1313 code <- intLoadCode (MOV size) mem
1314 return (Any size code)
1315 where size = intSize $ typeWidth pk
1318 getRegister (CmmLit (CmmInt 0 width))
1320 size = intSize width
1322 -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
1323 adj_size = case size of II64 -> II32; _ -> size
1324 size1 = IF_ARCH_i386( size, adj_size )
1326 = unitOL (XOR size1 (OpReg dst) (OpReg dst))
1328 return (Any size code)
1330 #if x86_64_TARGET_ARCH
1331 -- optimisation for loading small literals on x86_64: take advantage
1332 -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
1333 -- instruction forms are shorter.
1334 getRegister (CmmLit lit)
1335 | isWord64 (cmmLitType lit), not (isBigLit lit)
1338 code dst = unitOL (MOV II32 (OpImm imm) (OpReg dst))
1340 return (Any II64 code)
1342 isBigLit (CmmInt i _) = i < 0 || i > 0xffffffff
1344 -- note1: not the same as (not.is32BitLit), because that checks for
1345 -- signed literals that fit in 32 bits, but we want unsigned
1347 -- note2: all labels are small, because we're assuming the
1348 -- small memory model (see gcc docs, -mcmodel=small).
1351 getRegister (CmmLit lit)
1353 size = cmmTypeSize (cmmLitType lit)
1355 code dst = unitOL (MOV size (OpImm imm) (OpReg dst))
1357 return (Any size code)
1359 getRegister other = pprPanic "getRegister(x86)" (ppr other)
1362 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
1363 -> NatM (Reg -> InstrBlock)
1364 intLoadCode instr mem = do
1365 Amode src mem_code <- getAmode mem
1366 return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
1368 -- Compute an expression into *any* register, adding the appropriate
1369 -- move instruction if necessary.
1370 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
1372 r <- getRegister expr
1375 anyReg :: Register -> NatM (Reg -> InstrBlock)
1376 anyReg (Any _ code) = return code
1377 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
1379 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
1380 -- Fixed registers might not be byte-addressable, so we make sure we've
1381 -- got a temporary, inserting an extra reg copy if necessary.
1382 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
1383 #if x86_64_TARGET_ARCH
1384 getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
1386 getByteReg expr = do
1387 r <- getRegister expr
1390 tmp <- getNewRegNat rep
1391 return (tmp, code tmp)
1393 | isVirtualReg reg -> return (reg,code)
1395 tmp <- getNewRegNat rep
1396 return (tmp, code `snocOL` reg2reg rep reg tmp)
1397 -- ToDo: could optimise slightly by checking for byte-addressable
1398 -- real registers, but that will happen very rarely if at all.
1401 -- Another variant: this time we want the result in a register that cannot
1402 -- be modified by code to evaluate an arbitrary expression.
1403 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1404 getNonClobberedReg expr = do
1405 r <- getRegister expr
1408 tmp <- getNewRegNat rep
1409 return (tmp, code tmp)
1411 -- only free regs can be clobbered
1412 | RealReg rr <- reg, isFastTrue (freeReg rr) -> do
1413 tmp <- getNewRegNat rep
1414 return (tmp, code `snocOL` reg2reg rep reg tmp)
1418 reg2reg :: Size -> Reg -> Reg -> Instr
1419 reg2reg size src dst
1420 #if i386_TARGET_ARCH
1421 | isFloatSize size = GMOV src dst
1423 | otherwise = MOV size (OpReg src) (OpReg dst)
1425 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1427 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1429 #if sparc_TARGET_ARCH
1431 -- getRegister :: CmmExpr -> NatM Register
1433 -- Load a literal float into a float register.
1434 -- The actual literal is stored in a new data area, and we load it
1436 getRegister (CmmLit (CmmFloat f W32)) = do
1438 -- a label for the new data area
1439 lbl <- getNewLabelNat
1440 tmp <- getNewRegNat II32
1442 let code dst = toOL [
1446 CmmStaticLit (CmmFloat f W32)],
1449 SETHI (HI (ImmCLbl lbl)) tmp,
1450 LD II32 (AddrRegImm tmp (LO (ImmCLbl lbl))) dst]
1452 return (Any FF32 code)
1454 getRegister (CmmLit (CmmFloat d W64)) = do
1455 lbl <- getNewLabelNat
1456 tmp <- getNewRegNat II32
1457 let code dst = toOL [
1460 CmmStaticLit (CmmFloat d W64)],
1461 SETHI (HI (ImmCLbl lbl)) tmp,
1462 LD II64 (AddrRegImm tmp (LO (ImmCLbl lbl))) dst]
1463 return (Any FF64 code)
1465 getRegister (CmmMachOp mop [x]) -- unary MachOps
1467 MO_F_Neg W32 -> trivialUFCode FF32 (FNEG FF32) x
1468 MO_F_Neg W64 -> trivialUFCode FF64 (FNEG FF64) x
1470 MO_S_Neg rep -> trivialUCode (intSize rep) (SUB False False g0) x
1471 MO_Not rep -> trivialUCode (intSize rep) (XNOR False g0) x
1473 MO_FF_Conv W64 W32-> coerceDbl2Flt x
1474 MO_FF_Conv W32 W64-> coerceFlt2Dbl x
1476 MO_FS_Conv from to -> coerceFP2Int from to x
1477 MO_SF_Conv from to -> coerceInt2FP from to x
1479 -- Conversions which are a nop on sparc
1481 | from == to -> conversionNop (intSize to) x
1482 MO_UU_Conv W32 W8 -> trivialCode W8 (AND False) x (CmmLit (CmmInt 255 W8))
1483 MO_UU_Conv W32 to -> conversionNop (intSize to) x
1484 MO_SS_Conv W32 to -> conversionNop (intSize to) x
1486 MO_UU_Conv W8 to@W32 -> conversionNop (intSize to) x
1487 MO_UU_Conv W16 to@W32 -> conversionNop (intSize to) x
1488 MO_UU_Conv W8 to@W16 -> conversionNop (intSize to) x
1491 MO_SS_Conv W8 W32 -> integerExtend W8 W32 x
1492 MO_SS_Conv W16 W32 -> integerExtend W16 W32 x
1493 MO_SS_Conv W8 W16 -> integerExtend W8 W16 x
1495 other_op -> panic ("Unknown unary mach op: " ++ show mop)
1498 -- | sign extend and widen
1500 :: Width -- ^ width of source expression
1501 -> Width -- ^ width of result
1502 -> CmmExpr -- ^ source expression
1505 integerExtend from to expr
1506 = do -- load the expr into some register
1507 (reg, e_code) <- getSomeReg expr
1508 tmp <- getNewRegNat II32
1510 = case (from, to) of
1517 -- local shift word left to load the sign bit
1518 `snocOL` SLL reg (RIImm (ImmInt bitCount)) tmp
1520 -- arithmetic shift right to sign extend
1521 `snocOL` SRA tmp (RIImm (ImmInt bitCount)) dst
1523 return (Any (intSize to) code)
1526 conversionNop new_rep expr
1527 = do e_code <- getRegister expr
1528 return (swizzleRegisterRep e_code new_rep)
1530 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1532 MO_Eq rep -> condIntReg EQQ x y
1533 MO_Ne rep -> condIntReg NE x y
1535 MO_S_Gt rep -> condIntReg GTT x y
1536 MO_S_Ge rep -> condIntReg GE x y
1537 MO_S_Lt rep -> condIntReg LTT x y
1538 MO_S_Le rep -> condIntReg LE x y
1540 MO_U_Gt W32 -> condIntReg GTT x y
1541 MO_U_Ge W32 -> condIntReg GE x y
1542 MO_U_Lt W32 -> condIntReg LTT x y
1543 MO_U_Le W32 -> condIntReg LE x y
1545 MO_U_Gt W16 -> condIntReg GU x y
1546 MO_U_Ge W16 -> condIntReg GEU x y
1547 MO_U_Lt W16 -> condIntReg LU x y
1548 MO_U_Le W16 -> condIntReg LEU x y
1550 MO_Add W32 -> trivialCode W32 (ADD False False) x y
1551 MO_Sub W32 -> trivialCode W32 (SUB False False) x y
1553 MO_S_MulMayOflo rep -> imulMayOflo rep x y
1555 MO_S_Quot W32 -> idiv True False x y
1556 MO_U_Quot W32 -> idiv False False x y
1558 MO_S_Rem W32 -> irem True x y
1559 MO_U_Rem W32 -> irem False x y
1561 MO_F_Eq w -> condFltReg EQQ x y
1562 MO_F_Ne w -> condFltReg NE x y
1564 MO_F_Gt w -> condFltReg GTT x y
1565 MO_F_Ge w -> condFltReg GE x y
1566 MO_F_Lt w -> condFltReg LTT x y
1567 MO_F_Le w -> condFltReg LE x y
1569 MO_F_Add w -> trivialFCode w FADD x y
1570 MO_F_Sub w -> trivialFCode w FSUB x y
1571 MO_F_Mul w -> trivialFCode w FMUL x y
1572 MO_F_Quot w -> trivialFCode w FDIV x y
1574 MO_And rep -> trivialCode rep (AND False) x y
1575 MO_Or rep -> trivialCode rep (OR False) x y
1576 MO_Xor rep -> trivialCode rep (XOR False) x y
1578 MO_Mul rep -> trivialCode rep (SMUL False) x y
1580 MO_Shl rep -> trivialCode rep SLL x y
1581 MO_U_Shr rep -> trivialCode rep SRL x y
1582 MO_S_Shr rep -> trivialCode rep SRA x y
1585 MO_F32_Pwr -> getRegister (StCall (Left (fsLit "pow")) CCallConv FF64
1586 [promote x, promote y])
1587 where promote x = CmmMachOp MO_F32_to_Dbl [x]
1588 MO_F64_Pwr -> getRegister (StCall (Left (fsLit "pow")) CCallConv FF64
1591 other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
1593 -- idiv fn x y = getRegister (StCall (Left fn) CCallConv II32 [x, y])
1596 -- | Generate an integer division instruction.
1597 idiv :: Bool -> Bool -> CmmExpr -> CmmExpr -> NatM Register
1599 -- For unsigned division with a 32 bit numerator,
1600 -- we can just clear the Y register.
1601 idiv False cc x y = do
1602 (a_reg, a_code) <- getSomeReg x
1603 (b_reg, b_code) <- getSomeReg y
1610 , UDIV cc a_reg (RIReg b_reg) dst]
1612 return (Any II32 code)
1615 -- For _signed_ division with a 32 bit numerator,
1616 -- we have to sign extend the numerator into the Y register.
1617 idiv True cc x y = do
1618 (a_reg, a_code) <- getSomeReg x
1619 (b_reg, b_code) <- getSomeReg y
1621 tmp <- getNewRegNat II32
1627 [ SRA a_reg (RIImm (ImmInt 16)) tmp -- sign extend
1628 , SRA tmp (RIImm (ImmInt 16)) tmp
1631 , SDIV cc a_reg (RIReg b_reg) dst]
1633 return (Any II32 code)
1636 -- | Do an integer remainder.
1638 -- NOTE: The SPARC v8 architecture manual says that integer division
1639 -- instructions _may_ generate a remainder, depending on the implementation.
1640 -- If so it is _recommended_ that the remainder is placed in the Y register.
1642 -- The UltraSparc 2007 manual says Y is _undefined_ after division.
1644 -- The SPARC T2 doesn't store the remainder, not sure about the others.
1645 -- It's probably best not to worry about it, and just generate our own
1648 irem :: Bool -> CmmExpr -> CmmExpr -> NatM Register
1650 -- For unsigned operands:
1651 -- Division is between a 64 bit numerator and a 32 bit denominator,
1652 -- so we still have to clear the Y register.
1654 (a_reg, a_code) <- getSomeReg x
1655 (b_reg, b_code) <- getSomeReg y
1657 tmp_reg <- getNewRegNat II32
1664 , UDIV False a_reg (RIReg b_reg) tmp_reg
1665 , UMUL False tmp_reg (RIReg b_reg) tmp_reg
1666 , SUB False False a_reg (RIReg tmp_reg) dst]
1668 return (Any II32 code)
1671 -- For signed operands:
1672 -- Make sure to sign extend into the Y register, or the remainder
1673 -- will have the wrong sign when the numerator is negative.
1675 -- TODO: When sign extending, GCC only shifts the a_reg right by 17 bits,
1676 -- not the full 32. Not sure why this is, something to do with overflow?
1677 -- If anyone cares enough about the speed of signed remainder they
1678 -- can work it out themselves (then tell me). -- BL 2009/01/20
1681 (a_reg, a_code) <- getSomeReg x
1682 (b_reg, b_code) <- getSomeReg y
1684 tmp1_reg <- getNewRegNat II32
1685 tmp2_reg <- getNewRegNat II32
1691 [ SRA a_reg (RIImm (ImmInt 16)) tmp1_reg -- sign extend
1692 , SRA tmp1_reg (RIImm (ImmInt 16)) tmp1_reg -- sign extend
1695 , SDIV False a_reg (RIReg b_reg) tmp2_reg
1696 , SMUL False tmp2_reg (RIReg b_reg) tmp2_reg
1697 , SUB False False a_reg (RIReg tmp2_reg) dst]
1699 return (Any II32 code)
1702 imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
1703 imulMayOflo rep a b = do
1704 (a_reg, a_code) <- getSomeReg a
1705 (b_reg, b_code) <- getSomeReg b
1706 res_lo <- getNewRegNat II32
1707 res_hi <- getNewRegNat II32
1709 shift_amt = case rep of
1712 _ -> panic "shift_amt"
1713 code dst = a_code `appOL` b_code `appOL`
1715 SMUL False a_reg (RIReg b_reg) res_lo,
1717 SRA res_lo (RIImm (ImmInt shift_amt)) res_lo,
1718 SUB False False res_lo (RIReg res_hi) dst
1720 return (Any II32 code)
1722 getRegister (CmmLoad mem pk) = do
1723 Amode src code <- getAmode mem
1725 code__2 dst = code `snocOL` LD (cmmTypeSize pk) src dst
1726 return (Any (cmmTypeSize pk) code__2)
1728 getRegister (CmmLit (CmmInt i _))
1731 src = ImmInt (fromInteger i)
1732 code dst = unitOL (OR False g0 (RIImm src) dst)
1734 return (Any II32 code)
1736 getRegister (CmmLit lit)
1737 = let rep = cmmLitType lit
1741 OR False dst (RIImm (LO imm)) dst]
1742 in return (Any II32 code)
1744 #endif /* sparc_TARGET_ARCH */
1746 #if powerpc_TARGET_ARCH
1747 getRegister (CmmLoad mem pk)
1750 Amode addr addr_code <- getAmode mem
1751 let code dst = ASSERT((regClass dst == RcDouble) == isFloatType pk)
1752 addr_code `snocOL` LD size dst addr
1753 return (Any size code)
1754 where size = cmmTypeSize pk
1756 -- catch simple cases of zero- or sign-extended load
1757 getRegister (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad mem _]) = do
1758 Amode addr addr_code <- getAmode mem
1759 return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr))
1761 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
1763 getRegister (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad mem _]) = do
1764 Amode addr addr_code <- getAmode mem
1765 return (Any II32 (\dst -> addr_code `snocOL` LD II16 dst addr))
1767 getRegister (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad mem _]) = do
1768 Amode addr addr_code <- getAmode mem
1769 return (Any II32 (\dst -> addr_code `snocOL` LA II16 dst addr))
1771 getRegister (CmmMachOp mop [x]) -- unary MachOps
1773 MO_Not rep -> triv_ucode_int rep NOT
1775 MO_F_Neg w -> triv_ucode_float w FNEG
1776 MO_S_Neg w -> triv_ucode_int w NEG
1778 MO_FF_Conv W64 W32 -> trivialUCode FF32 FRSP x
1779 MO_FF_Conv W32 W64 -> conversionNop FF64 x
1781 MO_FS_Conv from to -> coerceFP2Int from to x
1782 MO_SF_Conv from to -> coerceInt2FP from to x
1785 | from == to -> conversionNop (intSize to) x
1787 -- narrowing is a nop: we treat the high bits as undefined
1788 MO_SS_Conv W32 to -> conversionNop (intSize to) x
1789 MO_SS_Conv W16 W8 -> conversionNop II8 x
1790 MO_SS_Conv W8 to -> triv_ucode_int to (EXTS II8)
1791 MO_SS_Conv W16 to -> triv_ucode_int to (EXTS II16)
1794 | from == to -> conversionNop (intSize to) x
1795 -- narrowing is a nop: we treat the high bits as undefined
1796 MO_UU_Conv W32 to -> conversionNop (intSize to) x
1797 MO_UU_Conv W16 W8 -> conversionNop II8 x
1798 MO_UU_Conv W8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 W32))
1799 MO_UU_Conv W16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 W32))
1802 triv_ucode_int width instr = trivialUCode (intSize width) instr x
1803 triv_ucode_float width instr = trivialUCode (floatSize width) instr x
1805 conversionNop new_size expr
1806 = do e_code <- getRegister expr
1807 return (swizzleRegisterRep e_code new_size)
1809 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1811 MO_F_Eq w -> condFltReg EQQ x y
1812 MO_F_Ne w -> condFltReg NE x y
1813 MO_F_Gt w -> condFltReg GTT x y
1814 MO_F_Ge w -> condFltReg GE x y
1815 MO_F_Lt w -> condFltReg LTT x y
1816 MO_F_Le w -> condFltReg LE x y
1818 MO_Eq rep -> condIntReg EQQ (extendUExpr rep x) (extendUExpr rep y)
1819 MO_Ne rep -> condIntReg NE (extendUExpr rep x) (extendUExpr rep y)
1821 MO_S_Gt rep -> condIntReg GTT (extendSExpr rep x) (extendSExpr rep y)
1822 MO_S_Ge rep -> condIntReg GE (extendSExpr rep x) (extendSExpr rep y)
1823 MO_S_Lt rep -> condIntReg LTT (extendSExpr rep x) (extendSExpr rep y)
1824 MO_S_Le rep -> condIntReg LE (extendSExpr rep x) (extendSExpr rep y)
1826 MO_U_Gt rep -> condIntReg GU (extendUExpr rep x) (extendUExpr rep y)
1827 MO_U_Ge rep -> condIntReg GEU (extendUExpr rep x) (extendUExpr rep y)
1828 MO_U_Lt rep -> condIntReg LU (extendUExpr rep x) (extendUExpr rep y)
1829 MO_U_Le rep -> condIntReg LEU (extendUExpr rep x) (extendUExpr rep y)
1831 MO_F_Add w -> triv_float w FADD
1832 MO_F_Sub w -> triv_float w FSUB
1833 MO_F_Mul w -> triv_float w FMUL
1834 MO_F_Quot w -> triv_float w FDIV
1836 -- optimize addition with 32-bit immediate
1840 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate W32 True (-imm)
1841 -> trivialCode W32 True ADD x (CmmLit $ CmmInt imm immrep)
1844 (src, srcCode) <- getSomeReg x
1845 let imm = litToImm lit
1846 code dst = srcCode `appOL` toOL [
1847 ADDIS dst src (HA imm),
1848 ADD dst dst (RIImm (LO imm))
1850 return (Any II32 code)
1851 _ -> trivialCode W32 True ADD x y
1853 MO_Add rep -> trivialCode rep True ADD x y
1855 case y of -- subfi ('substract from' with immediate) doesn't exist
1856 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
1857 -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
1858 _ -> trivialCodeNoImm' (intSize rep) SUBF y x
1860 MO_Mul rep -> trivialCode rep True MULLW x y
1862 MO_S_MulMayOflo W32 -> trivialCodeNoImm' II32 MULLW_MayOflo x y
1864 MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= II32): not implemented"
1865 MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
1867 MO_S_Quot rep -> trivialCodeNoImm' (intSize rep) DIVW (extendSExpr rep x) (extendSExpr rep y)
1868 MO_U_Quot rep -> trivialCodeNoImm' (intSize rep) DIVWU (extendUExpr rep x) (extendUExpr rep y)
1870 MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1871 MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1873 MO_And rep -> trivialCode rep False AND x y
1874 MO_Or rep -> trivialCode rep False OR x y
1875 MO_Xor rep -> trivialCode rep False XOR x y
1877 MO_Shl rep -> trivialCode rep False SLW x y
1878 MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
1879 MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
1881 triv_float :: Width -> (Size -> Reg -> Reg -> Reg -> Instr) -> NatM Register
1882 triv_float width instr = trivialCodeNoImm (floatSize width) instr x y
1884 getRegister (CmmLit (CmmInt i rep))
1885 | Just imm <- makeImmediate rep True i
1887 code dst = unitOL (LI dst imm)
1889 return (Any (intSize rep) code)
1891 getRegister (CmmLit (CmmFloat f frep)) = do
1892 lbl <- getNewLabelNat
1893 dflags <- getDynFlagsNat
1894 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
1895 Amode addr addr_code <- getAmode dynRef
1896 let size = floatSize frep
1898 LDATA ReadOnlyData [CmmDataLabel lbl,
1899 CmmStaticLit (CmmFloat f frep)]
1900 `consOL` (addr_code `snocOL` LD size dst addr)
1901 return (Any size code)
1903 getRegister (CmmLit lit)
1904 = let rep = cmmLitType lit
1908 ADD dst dst (RIImm (LO imm))
1910 in return (Any (cmmTypeSize rep) code)
1912 getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
1914 -- extend?Rep: wrap integer expression of type rep
1915 -- in a conversion to II32
1916 extendSExpr W32 x = x
1917 extendSExpr rep x = CmmMachOp (MO_SS_Conv rep W32) [x]
1918 extendUExpr W32 x = x
1919 extendUExpr rep x = CmmMachOp (MO_UU_Conv rep W32) [x]
1921 #endif /* powerpc_TARGET_ARCH */
1924 -- -----------------------------------------------------------------------------
1925 -- The 'Amode' type: Memory addressing modes passed up the tree.
1927 data Amode = Amode AddrMode InstrBlock
1930 Now, given a tree (the argument to an CmmLoad) that references memory,
1931 produce a suitable addressing mode.
1933 A Rule of the Game (tm) for Amodes: use of the addr bit must
1934 immediately follow use of the code part, since the code part puts
1935 values in registers which the addr then refers to. So you can't put
1936 anything in between, lest it overwrite some of those registers. If
1937 you need to do some other computation between the code part and use of
1938 the addr bit, first store the effective address from the amode in a
1939 temporary, then do the other computation, and then use the temporary:
1943 ... other computation ...
1947 getAmode :: CmmExpr -> NatM Amode
1948 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
1950 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1952 #if alpha_TARGET_ARCH
1954 getAmode (StPrim IntSubOp [x, StInt i])
1955 = getNewRegNat PtrRep `thenNat` \ tmp ->
1956 getRegister x `thenNat` \ register ->
1958 code = registerCode register tmp
1959 reg = registerName register tmp
1960 off = ImmInt (-(fromInteger i))
1962 return (Amode (AddrRegImm reg off) code)
1964 getAmode (StPrim IntAddOp [x, StInt i])
1965 = getNewRegNat PtrRep `thenNat` \ tmp ->
1966 getRegister x `thenNat` \ register ->
1968 code = registerCode register tmp
1969 reg = registerName register tmp
1970 off = ImmInt (fromInteger i)
1972 return (Amode (AddrRegImm reg off) code)
1976 = return (Amode (AddrImm imm__2) id)
1979 imm__2 = case imm of Just x -> x
1982 = getNewRegNat PtrRep `thenNat` \ tmp ->
1983 getRegister other `thenNat` \ register ->
1985 code = registerCode register tmp
1986 reg = registerName register tmp
1988 return (Amode (AddrReg reg) code)
1990 #endif /* alpha_TARGET_ARCH */
1992 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1994 #if x86_64_TARGET_ARCH
1996 getAmode (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
1997 CmmLit displacement])
1998 = return $ Amode (ripRel (litToImm displacement)) nilOL
2002 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2004 -- This is all just ridiculous, since it carefully undoes
2005 -- what mangleIndexTree has just done.
2006 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
2008 -- ASSERT(rep == II32)???
2009 = do (x_reg, x_code) <- getSomeReg x
2010 let off = ImmInt (-(fromInteger i))
2011 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
2013 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
2015 -- ASSERT(rep == II32)???
2016 = do (x_reg, x_code) <- getSomeReg x
2017 let off = ImmInt (fromInteger i)
2018 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
2020 -- Turn (lit1 << n + lit2) into (lit2 + lit1 << n) so it will be
2021 -- recognised by the next rule.
2022 getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
2024 = getAmode (CmmMachOp (MO_Add rep) [b,a])
2026 getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
2027 [y, CmmLit (CmmInt shift _)]])
2028 | shift == 0 || shift == 1 || shift == 2 || shift == 3
2029 = x86_complex_amode x y shift 0
2031 getAmode (CmmMachOp (MO_Add rep)
2032 [x, CmmMachOp (MO_Add _)
2033 [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)],
2034 CmmLit (CmmInt offset _)]])
2035 | shift == 0 || shift == 1 || shift == 2 || shift == 3
2036 && is32BitInteger offset
2037 = x86_complex_amode x y shift offset
2039 getAmode (CmmMachOp (MO_Add rep) [x,y])
2040 = x86_complex_amode x y 0 0
2042 getAmode (CmmLit lit) | is32BitLit lit
2043 = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
2046 (reg,code) <- getSomeReg expr
2047 return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
2050 x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
2051 x86_complex_amode base index shift offset
2052 = do (x_reg, x_code) <- getNonClobberedReg base
2053 -- x must be in a temp, because it has to stay live over y_code
2054 -- we could compre x_reg and y_reg and do something better here...
2055 (y_reg, y_code) <- getSomeReg index
2057 code = x_code `appOL` y_code
2058 base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
2059 return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset)))
2062 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
2064 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2066 #if sparc_TARGET_ARCH
2068 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)])
2071 (reg, code) <- getSomeReg x
2073 off = ImmInt (-(fromInteger i))
2074 return (Amode (AddrRegImm reg off) code)
2077 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)])
2080 (reg, code) <- getSomeReg x
2082 off = ImmInt (fromInteger i)
2083 return (Amode (AddrRegImm reg off) code)
2085 getAmode (CmmMachOp (MO_Add rep) [x, y])
2087 (regX, codeX) <- getSomeReg x
2088 (regY, codeY) <- getSomeReg y
2090 code = codeX `appOL` codeY
2091 return (Amode (AddrRegReg regX regY) code)
2093 getAmode (CmmLit lit)
2095 let imm__2 = litToImm lit
2096 tmp1 <- getNewRegNat II32
2097 tmp2 <- getNewRegNat II32
2099 let code = toOL [ SETHI (HI imm__2) tmp1
2100 , OR False tmp1 (RIImm (LO imm__2)) tmp2]
2102 return (Amode (AddrRegReg tmp2 g0) code)
2106 (reg, code) <- getSomeReg other
2109 return (Amode (AddrRegImm reg off) code)
2111 #endif /* sparc_TARGET_ARCH */
2113 #ifdef powerpc_TARGET_ARCH
2114 getAmode (CmmMachOp (MO_Sub W32) [x, CmmLit (CmmInt i _)])
2115 | Just off <- makeImmediate W32 True (-i)
2117 (reg, code) <- getSomeReg x
2118 return (Amode (AddrRegImm reg off) code)
2121 getAmode (CmmMachOp (MO_Add W32) [x, CmmLit (CmmInt i _)])
2122 | Just off <- makeImmediate W32 True i
2124 (reg, code) <- getSomeReg x
2125 return (Amode (AddrRegImm reg off) code)
2127 -- optimize addition with 32-bit immediate
2129 getAmode (CmmMachOp (MO_Add W32) [x, CmmLit lit])
2131 tmp <- getNewRegNat II32
2132 (src, srcCode) <- getSomeReg x
2133 let imm = litToImm lit
2134 code = srcCode `snocOL` ADDIS tmp src (HA imm)
2135 return (Amode (AddrRegImm tmp (LO imm)) code)
2137 getAmode (CmmLit lit)
2139 tmp <- getNewRegNat II32
2140 let imm = litToImm lit
2141 code = unitOL (LIS tmp (HA imm))
2142 return (Amode (AddrRegImm tmp (LO imm)) code)
2144 getAmode (CmmMachOp (MO_Add W32) [x, y])
2146 (regX, codeX) <- getSomeReg x
2147 (regY, codeY) <- getSomeReg y
2148 return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
2152 (reg, code) <- getSomeReg other
2155 return (Amode (AddrRegImm reg off) code)
2156 #endif /* powerpc_TARGET_ARCH */
2158 -- -----------------------------------------------------------------------------
2159 -- getOperand: sometimes any operand will do.
2161 -- getNonClobberedOperand: the value of the operand will remain valid across
2162 -- the computation of an arbitrary expression, unless the expression
2163 -- is computed directly into a register which the operand refers to
2164 -- (see trivialCode where this function is used for an example).
2166 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2168 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2169 #if x86_64_TARGET_ARCH
2170 getNonClobberedOperand (CmmLit lit)
2171 | isSuitableFloatingPointLit lit = do
2172 lbl <- getNewLabelNat
2173 let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
2175 return (OpAddr (ripRel (ImmCLbl lbl)), code)
2177 getNonClobberedOperand (CmmLit lit)
2178 | is32BitLit lit && not (isFloatType (cmmLitType lit)) =
2179 return (OpImm (litToImm lit), nilOL)
2180 getNonClobberedOperand (CmmLoad mem pk)
2181 | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
2182 Amode src mem_code <- getAmode mem
2184 if (amodeCouldBeClobbered src)
2186 tmp <- getNewRegNat wordSize
2187 return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
2188 unitOL (LEA II32 (OpAddr src) (OpReg tmp)))
2191 return (OpAddr src', save_code `appOL` mem_code)
2192 getNonClobberedOperand e = do
2193 (reg, code) <- getNonClobberedReg e
2194 return (OpReg reg, code)
2196 amodeCouldBeClobbered :: AddrMode -> Bool
2197 amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
2199 regClobbered (RealReg rr) = isFastTrue (freeReg rr)
2200 regClobbered _ = False
2202 -- getOperand: the operand is not required to remain valid across the
2203 -- computation of an arbitrary expression.
2204 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2205 #if x86_64_TARGET_ARCH
2206 getOperand (CmmLit lit)
2207 | isSuitableFloatingPointLit lit = do
2208 lbl <- getNewLabelNat
2209 let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
2211 return (OpAddr (ripRel (ImmCLbl lbl)), code)
2213 getOperand (CmmLit lit)
2214 | is32BitLit lit && not (isFloatType (cmmLitType lit)) = do
2215 return (OpImm (litToImm lit), nilOL)
2216 getOperand (CmmLoad mem pk)
2217 | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
2218 Amode src mem_code <- getAmode mem
2219 return (OpAddr src, mem_code)
2221 (reg, code) <- getSomeReg e
2222 return (OpReg reg, code)
2224 isOperand :: CmmExpr -> Bool
2225 isOperand (CmmLoad _ _) = True
2226 isOperand (CmmLit lit) = is32BitLit lit
2227 || isSuitableFloatingPointLit lit
2230 -- if we want a floating-point literal as an operand, we can
2231 -- use it directly from memory. However, if the literal is
2232 -- zero, we're better off generating it into a register using
2234 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
2235 isSuitableFloatingPointLit _ = False
2237 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
2238 getRegOrMem (CmmLoad mem pk)
2239 | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
2240 Amode src mem_code <- getAmode mem
2241 return (OpAddr src, mem_code)
2243 (reg, code) <- getNonClobberedReg e
2244 return (OpReg reg, code)
2246 #if x86_64_TARGET_ARCH
2247 is32BitLit (CmmInt i W64) = is32BitInteger i
2248 -- assume that labels are in the range 0-2^31-1: this assumes the
2249 -- small memory model (see gcc docs, -mcmodel=small).
2254 is32BitInteger :: Integer -> Bool
2255 is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000
2256 where i64 = fromIntegral i :: Int64
2257 -- a CmmInt is intended to be truncated to the appropriate
2258 -- number of bits, so here we truncate it to Int64. This is
2259 -- important because e.g. -1 as a CmmInt might be either
2260 -- -1 or 18446744073709551615.
2262 -- -----------------------------------------------------------------------------
2263 -- The 'CondCode' type: Condition codes passed up the tree.
2265 data CondCode = CondCode Bool Cond InstrBlock
2267 -- Set up a condition code for a conditional branch.
2269 getCondCode :: CmmExpr -> NatM CondCode
2271 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2273 #if alpha_TARGET_ARCH
2274 getCondCode = panic "MachCode.getCondCode: not on Alphas"
2275 #endif /* alpha_TARGET_ARCH */
2277 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2279 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
2280 -- yes, they really do seem to want exactly the same!
2282 getCondCode (CmmMachOp mop [x, y])
2285 MO_F_Eq W32 -> condFltCode EQQ x y
2286 MO_F_Ne W32 -> condFltCode NE x y
2287 MO_F_Gt W32 -> condFltCode GTT x y
2288 MO_F_Ge W32 -> condFltCode GE x y
2289 MO_F_Lt W32 -> condFltCode LTT x y
2290 MO_F_Le W32 -> condFltCode LE x y
2292 MO_F_Eq W64 -> condFltCode EQQ x y
2293 MO_F_Ne W64 -> condFltCode NE x y
2294 MO_F_Gt W64 -> condFltCode GTT x y
2295 MO_F_Ge W64 -> condFltCode GE x y
2296 MO_F_Lt W64 -> condFltCode LTT x y
2297 MO_F_Le W64 -> condFltCode LE x y
2299 MO_Eq rep -> condIntCode EQQ x y
2300 MO_Ne rep -> condIntCode NE x y
2302 MO_S_Gt rep -> condIntCode GTT x y
2303 MO_S_Ge rep -> condIntCode GE x y
2304 MO_S_Lt rep -> condIntCode LTT x y
2305 MO_S_Le rep -> condIntCode LE x y
2307 MO_U_Gt rep -> condIntCode GU x y
2308 MO_U_Ge rep -> condIntCode GEU x y
2309 MO_U_Lt rep -> condIntCode LU x y
2310 MO_U_Le rep -> condIntCode LEU x y
2312 other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y]))
2314 getCondCode other = pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
2316 #elif powerpc_TARGET_ARCH
2318 -- almost the same as everywhere else - but we need to
2319 -- extend small integers to 32 bit first
2321 getCondCode (CmmMachOp mop [x, y])
2323 MO_F_Eq W32 -> condFltCode EQQ x y
2324 MO_F_Ne W32 -> condFltCode NE x y
2325 MO_F_Gt W32 -> condFltCode GTT x y
2326 MO_F_Ge W32 -> condFltCode GE x y
2327 MO_F_Lt W32 -> condFltCode LTT x y
2328 MO_F_Le W32 -> condFltCode LE x y
2330 MO_F_Eq W64 -> condFltCode EQQ x y
2331 MO_F_Ne W64 -> condFltCode NE x y
2332 MO_F_Gt W64 -> condFltCode GTT x y
2333 MO_F_Ge W64 -> condFltCode GE x y
2334 MO_F_Lt W64 -> condFltCode LTT x y
2335 MO_F_Le W64 -> condFltCode LE x y
2337 MO_Eq rep -> condIntCode EQQ (extendUExpr rep x) (extendUExpr rep y)
2338 MO_Ne rep -> condIntCode NE (extendUExpr rep x) (extendUExpr rep y)
2340 MO_S_Gt rep -> condIntCode GTT (extendSExpr rep x) (extendSExpr rep y)
2341 MO_S_Ge rep -> condIntCode GE (extendSExpr rep x) (extendSExpr rep y)
2342 MO_S_Lt rep -> condIntCode LTT (extendSExpr rep x) (extendSExpr rep y)
2343 MO_S_Le rep -> condIntCode LE (extendSExpr rep x) (extendSExpr rep y)
2345 MO_U_Gt rep -> condIntCode GU (extendUExpr rep x) (extendUExpr rep y)
2346 MO_U_Ge rep -> condIntCode GEU (extendUExpr rep x) (extendUExpr rep y)
2347 MO_U_Lt rep -> condIntCode LU (extendUExpr rep x) (extendUExpr rep y)
2348 MO_U_Le rep -> condIntCode LEU (extendUExpr rep x) (extendUExpr rep y)
2350 other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
2352 getCondCode other = panic "getCondCode(2)(powerpc)"
2358 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
2359 -- passed back up the tree.
2361 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
2363 #if alpha_TARGET_ARCH
2364 condIntCode = panic "MachCode.condIntCode: not on Alphas"
2365 condFltCode = panic "MachCode.condFltCode: not on Alphas"
2366 #endif /* alpha_TARGET_ARCH */
2368 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2369 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2371 -- memory vs immediate
2372 condIntCode cond (CmmLoad x pk) (CmmLit lit) | is32BitLit lit = do
2373 Amode x_addr x_code <- getAmode x
2376 code = x_code `snocOL`
2377 CMP (cmmTypeSize pk) (OpImm imm) (OpAddr x_addr)
2379 return (CondCode False cond code)
2381 -- anything vs zero, using a mask
2382 -- TODO: Add some sanity checking!!!!
2383 condIntCode cond (CmmMachOp (MO_And rep) [x,o2]) (CmmLit (CmmInt 0 pk))
2384 | (CmmLit lit@(CmmInt mask pk2)) <- o2, is32BitLit lit
2386 (x_reg, x_code) <- getSomeReg x
2388 code = x_code `snocOL`
2389 TEST (intSize pk) (OpImm (ImmInteger mask)) (OpReg x_reg)
2391 return (CondCode False cond code)
2394 condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
2395 (x_reg, x_code) <- getSomeReg x
2397 code = x_code `snocOL`
2398 TEST (intSize pk) (OpReg x_reg) (OpReg x_reg)
2400 return (CondCode False cond code)
2402 -- anything vs operand
2403 condIntCode cond x y | isOperand y = do
2404 (x_reg, x_code) <- getNonClobberedReg x
2405 (y_op, y_code) <- getOperand y
2407 code = x_code `appOL` y_code `snocOL`
2408 CMP (cmmTypeSize (cmmExprType x)) y_op (OpReg x_reg)
2410 return (CondCode False cond code)
2412 -- anything vs anything
2413 condIntCode cond x y = do
2414 (y_reg, y_code) <- getNonClobberedReg y
2415 (x_op, x_code) <- getRegOrMem x
2417 code = y_code `appOL`
2419 CMP (cmmTypeSize (cmmExprType x)) (OpReg y_reg) x_op
2421 return (CondCode False cond code)
2424 #if i386_TARGET_ARCH
2425 condFltCode cond x y
2426 = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
2427 (x_reg, x_code) <- getNonClobberedReg x
2428 (y_reg, y_code) <- getSomeReg y
2430 code = x_code `appOL` y_code `snocOL`
2431 GCMP cond x_reg y_reg
2432 -- The GCMP insn does the test and sets the zero flag if comparable
2433 -- and true. Hence we always supply EQQ as the condition to test.
2434 return (CondCode True EQQ code)
2435 #endif /* i386_TARGET_ARCH */
2437 #if x86_64_TARGET_ARCH
2438 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
2439 -- an operand, but the right must be a reg. We can probably do better
2440 -- than this general case...
2441 condFltCode cond x y = do
2442 (x_reg, x_code) <- getNonClobberedReg x
2443 (y_op, y_code) <- getOperand y
2445 code = x_code `appOL`
2447 CMP (floatSize $ cmmExprWidth x) y_op (OpReg x_reg)
2448 -- NB(1): we need to use the unsigned comparison operators on the
2449 -- result of this comparison.
2451 return (CondCode True (condToUnsigned cond) code)
2454 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2456 #if sparc_TARGET_ARCH
2458 condIntCode cond x (CmmLit (CmmInt y rep))
2461 (src1, code) <- getSomeReg x
2463 src2 = ImmInt (fromInteger y)
2464 code' = code `snocOL` SUB False True src1 (RIImm src2) g0
2465 return (CondCode False cond code')
2467 condIntCode cond x y = do
2468 (src1, code1) <- getSomeReg x
2469 (src2, code2) <- getSomeReg y
2471 code__2 = code1 `appOL` code2 `snocOL`
2472 SUB False True src1 (RIReg src2) g0
2473 return (CondCode False cond code__2)
2476 condFltCode cond x y = do
2477 (src1, code1) <- getSomeReg x
2478 (src2, code2) <- getSomeReg y
2479 tmp <- getNewRegNat FF64
2481 promote x = FxTOy FF32 FF64 x tmp
2487 if pk1 `cmmEqType` pk2 then
2488 code1 `appOL` code2 `snocOL`
2489 FCMP True (cmmTypeSize pk1) src1 src2
2490 else if typeWidth pk1 == W32 then
2491 code1 `snocOL` promote src1 `appOL` code2 `snocOL`
2492 FCMP True FF64 tmp src2
2494 code1 `appOL` code2 `snocOL` promote src2 `snocOL`
2495 FCMP True FF64 src1 tmp
2496 return (CondCode True cond code__2)
2498 #endif /* sparc_TARGET_ARCH */
2500 #if powerpc_TARGET_ARCH
2501 -- ###FIXME: I16 and I8!
2502 condIntCode cond x (CmmLit (CmmInt y rep))
2503 | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
2505 (src1, code) <- getSomeReg x
2507 code' = code `snocOL`
2508 (if condUnsigned cond then CMPL else CMP) II32 src1 (RIImm src2)
2509 return (CondCode False cond code')
2511 condIntCode cond x y = do
2512 (src1, code1) <- getSomeReg x
2513 (src2, code2) <- getSomeReg y
2515 code' = code1 `appOL` code2 `snocOL`
2516 (if condUnsigned cond then CMPL else CMP) II32 src1 (RIReg src2)
2517 return (CondCode False cond code')
2519 condFltCode cond x y = do
2520 (src1, code1) <- getSomeReg x
2521 (src2, code2) <- getSomeReg y
2523 code' = code1 `appOL` code2 `snocOL` FCMP src1 src2
2524 code'' = case cond of -- twiddle CR to handle unordered case
2525 GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
2526 LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
2529 ltbit = 0 ; eqbit = 2 ; gtbit = 1
2530 return (CondCode True cond code'')
2532 #endif /* powerpc_TARGET_ARCH */
2534 -- -----------------------------------------------------------------------------
2535 -- Generating assignments
2537 -- Assignments are really at the heart of the whole code generation
2538 -- business. Almost all top-level nodes of any real importance are
2539 -- assignments, which correspond to loads, stores, or register
2540 -- transfers. If we're really lucky, some of the register transfers
2541 -- will go away, because we can use the destination register to
2542 -- complete the code generation for the right hand side. This only
2543 -- fails when the right hand side is forced into a fixed register
2544 -- (e.g. the result of a call).
2546 assignMem_IntCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
2547 assignReg_IntCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock
2549 assignMem_FltCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
2550 assignReg_FltCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock
2552 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2554 #if alpha_TARGET_ARCH
2556 assignIntCode pk (CmmLoad dst _) src
2557 = getNewRegNat IntRep `thenNat` \ tmp ->
2558 getAmode dst `thenNat` \ amode ->
2559 getRegister src `thenNat` \ register ->
2561 code1 = amodeCode amode []
2562 dst__2 = amodeAddr amode
2563 code2 = registerCode register tmp []
2564 src__2 = registerName register tmp
2565 sz = primRepToSize pk
2566 code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2570 assignIntCode pk dst src
2571 = getRegister dst `thenNat` \ register1 ->
2572 getRegister src `thenNat` \ register2 ->
2574 dst__2 = registerName register1 zeroh
2575 code = registerCode register2 dst__2
2576 src__2 = registerName register2 dst__2
2577 code__2 = if isFixed register2
2578 then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2)
2583 #endif /* alpha_TARGET_ARCH */
2585 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2587 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2589 -- integer assignment to memory
2591 -- specific case of adding/subtracting an integer to a particular address.
2592 -- ToDo: catch other cases where we can use an operation directly on a memory
2594 assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _,
2595 CmmLit (CmmInt i _)])
2596 | addr == addr2, pk /= II64 || is32BitInteger i,
2597 Just instr <- check op
2598 = do Amode amode code_addr <- getAmode addr
2599 let code = code_addr `snocOL`
2600 instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode)
2603 check (MO_Add _) = Just ADD
2604 check (MO_Sub _) = Just SUB
2609 assignMem_IntCode pk addr src = do
2610 Amode addr code_addr <- getAmode addr
2611 (code_src, op_src) <- get_op_RI src
2613 code = code_src `appOL`
2615 MOV pk op_src (OpAddr addr)
2616 -- NOTE: op_src is stable, so it will still be valid
2617 -- after code_addr. This may involve the introduction
2618 -- of an extra MOV to a temporary register, but we hope
2619 -- the register allocator will get rid of it.
2623 get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand) -- code, operator
2624 get_op_RI (CmmLit lit) | is32BitLit lit
2625 = return (nilOL, OpImm (litToImm lit))
2627 = do (reg,code) <- getNonClobberedReg op
2628 return (code, OpReg reg)
2631 -- Assign; dst is a reg, rhs is mem
2632 assignReg_IntCode pk reg (CmmLoad src _) = do
2633 load_code <- intLoadCode (MOV pk) src
2634 return (load_code (getRegisterReg reg))
2636 -- dst is a reg, but src could be anything
2637 assignReg_IntCode pk reg src = do
2638 code <- getAnyReg src
2639 return (code (getRegisterReg reg))
2641 #endif /* i386_TARGET_ARCH */
2643 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2645 #if sparc_TARGET_ARCH
2647 assignMem_IntCode pk addr src = do
2648 (srcReg, code) <- getSomeReg src
2649 Amode dstAddr addr_code <- getAmode addr
2650 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2652 assignReg_IntCode pk reg src = do
2653 r <- getRegister src
2655 Any _ code -> code dst
2656 Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg freg) dst
2658 dst = getRegisterReg reg
2661 #endif /* sparc_TARGET_ARCH */
2663 #if powerpc_TARGET_ARCH
2665 assignMem_IntCode pk addr src = do
2666 (srcReg, code) <- getSomeReg src
2667 Amode dstAddr addr_code <- getAmode addr
2668 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2670 -- dst is a reg, but src could be anything
2671 assignReg_IntCode pk reg src
2673 r <- getRegister src
2675 Any _ code -> code dst
2676 Fixed _ freg fcode -> fcode `snocOL` MR dst freg
2678 dst = getRegisterReg reg
2680 #endif /* powerpc_TARGET_ARCH */
2683 -- -----------------------------------------------------------------------------
2684 -- Floating-point assignments
2686 #if alpha_TARGET_ARCH
2688 assignFltCode pk (CmmLoad dst _) src
2689 = getNewRegNat pk `thenNat` \ tmp ->
2690 getAmode dst `thenNat` \ amode ->
2691 getRegister src `thenNat` \ register ->
2693 code1 = amodeCode amode []
2694 dst__2 = amodeAddr amode
2695 code2 = registerCode register tmp []
2696 src__2 = registerName register tmp
2697 sz = primRepToSize pk
2698 code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2702 assignFltCode pk dst src
2703 = getRegister dst `thenNat` \ register1 ->
2704 getRegister src `thenNat` \ register2 ->
2706 dst__2 = registerName register1 zeroh
2707 code = registerCode register2 dst__2
2708 src__2 = registerName register2 dst__2
2709 code__2 = if isFixed register2
2710 then code . mkSeqInstr (FMOV src__2 dst__2)
2715 #endif /* alpha_TARGET_ARCH */
2717 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2719 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2721 -- Floating point assignment to memory
2722 assignMem_FltCode pk addr src = do
2723 (src_reg, src_code) <- getNonClobberedReg src
2724 Amode addr addr_code <- getAmode addr
2726 code = src_code `appOL`
2728 IF_ARCH_i386(GST pk src_reg addr,
2729 MOV pk (OpReg src_reg) (OpAddr addr))
2732 -- Floating point assignment to a register/temporary
2733 assignReg_FltCode pk reg src = do
2734 src_code <- getAnyReg src
2735 return (src_code (getRegisterReg reg))
2737 #endif /* i386_TARGET_ARCH */
2739 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2741 #if sparc_TARGET_ARCH
2743 -- Floating point assignment to memory
2744 assignMem_FltCode pk addr src = do
2745 Amode dst__2 code1 <- getAmode addr
2746 (src__2, code2) <- getSomeReg src
2747 tmp1 <- getNewRegNat pk
2749 pk__2 = cmmExprType src
2750 code__2 = code1 `appOL` code2 `appOL`
2751 if sizeToWidth pk == typeWidth pk__2
2752 then unitOL (ST pk src__2 dst__2)
2753 else toOL [ FxTOy (cmmTypeSize pk__2) pk src__2 tmp1
2754 , ST pk tmp1 dst__2]
2757 -- Floating point assignment to a register/temporary
2758 assignReg_FltCode pk dstCmmReg srcCmmExpr = do
2759 srcRegister <- getRegister srcCmmExpr
2760 let dstReg = getRegisterReg dstCmmReg
2762 return $ case srcRegister of
2763 Any _ code -> code dstReg
2764 Fixed _ srcFixedReg srcCode -> srcCode `snocOL` FMOV pk srcFixedReg dstReg
2766 #endif /* sparc_TARGET_ARCH */
2768 #if powerpc_TARGET_ARCH
2771 assignMem_FltCode = assignMem_IntCode
2772 assignReg_FltCode = assignReg_IntCode
2774 #endif /* powerpc_TARGET_ARCH */
2777 -- -----------------------------------------------------------------------------
2778 -- Generating an non-local jump
2780 -- (If applicable) Do not fill the delay slots here; you will confuse the
2781 -- register allocator.
2783 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
2785 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2787 #if alpha_TARGET_ARCH
2789 genJump (CmmLabel lbl)
2790 | isAsmTemp lbl = returnInstr (BR target)
2791 | otherwise = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0]
2793 target = ImmCLbl lbl
2796 = getRegister tree `thenNat` \ register ->
2797 getNewRegNat PtrRep `thenNat` \ tmp ->
2799 dst = registerName register pv
2800 code = registerCode register pv
2801 target = registerName register pv
2803 if isFixed register then
2804 returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0]
2806 return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0))
2808 #endif /* alpha_TARGET_ARCH */
2810 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2812 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2814 genJump (CmmLoad mem pk) = do
2815 Amode target code <- getAmode mem
2816 return (code `snocOL` JMP (OpAddr target))
2818 genJump (CmmLit lit) = do
2819 return (unitOL (JMP (OpImm (litToImm lit))))
2822 (reg,code) <- getSomeReg expr
2823 return (code `snocOL` JMP (OpReg reg))
2825 #endif /* i386_TARGET_ARCH */
2827 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2829 #if sparc_TARGET_ARCH
2831 genJump (CmmLit (CmmLabel lbl))
2832 = return (toOL [CALL (Left target) 0 True, NOP])
2834 target = ImmCLbl lbl
2838 (target, code) <- getSomeReg tree
2839 return (code `snocOL` JMP (AddrRegReg target g0) `snocOL` NOP)
2841 #endif /* sparc_TARGET_ARCH */
2843 #if powerpc_TARGET_ARCH
2844 genJump (CmmLit (CmmLabel lbl))
2845 = return (unitOL $ JMP lbl)
2849 (target,code) <- getSomeReg tree
2850 return (code `snocOL` MTCTR target `snocOL` BCTR [])
2851 #endif /* powerpc_TARGET_ARCH */
2854 -- -----------------------------------------------------------------------------
2855 -- Unconditional branches
2857 genBranch :: BlockId -> NatM InstrBlock
2859 genBranch = return . toOL . mkBranchInstr
2861 -- -----------------------------------------------------------------------------
2862 -- Conditional jumps
2865 Conditional jumps are always to local labels, so we can use branch
2866 instructions. We peek at the arguments to decide what kind of
2869 ALPHA: For comparisons with 0, we're laughing, because we can just do
2870 the desired conditional branch.
2872 I386: First, we have to ensure that the condition
2873 codes are set according to the supplied comparison operation.
2875 SPARC: First, we have to ensure that the condition codes are set
2876 according to the supplied comparison operation. We generate slightly
2877 different code for floating point comparisons, because a floating
2878 point operation cannot directly precede a @BF@. We assume the worst
2879 and fill that slot with a @NOP@.
2881 SPARC: Do not fill the delay slots here; you will confuse the register
2887 :: BlockId -- the branch target
2888 -> CmmExpr -- the condition on which to branch
2891 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2893 #if alpha_TARGET_ARCH
2895 genCondJump id (StPrim op [x, StInt 0])
2896 = getRegister x `thenNat` \ register ->
2897 getNewRegNat (registerRep register)
2900 code = registerCode register tmp
2901 value = registerName register tmp
2902 pk = registerRep register
2903 target = ImmCLbl lbl
2905 returnSeq code [BI (cmpOp op) value target]
2907 cmpOp CharGtOp = GTT
2909 cmpOp CharEqOp = EQQ
2911 cmpOp CharLtOp = LTT
2920 cmpOp WordGeOp = ALWAYS
2921 cmpOp WordEqOp = EQQ
2923 cmpOp WordLtOp = NEVER
2924 cmpOp WordLeOp = EQQ
2926 cmpOp AddrGeOp = ALWAYS
2927 cmpOp AddrEqOp = EQQ
2929 cmpOp AddrLtOp = NEVER
2930 cmpOp AddrLeOp = EQQ
2932 genCondJump lbl (StPrim op [x, StDouble 0.0])
2933 = getRegister x `thenNat` \ register ->
2934 getNewRegNat (registerRep register)
2937 code = registerCode register tmp
2938 value = registerName register tmp
2939 pk = registerRep register
2940 target = ImmCLbl lbl
2942 return (code . mkSeqInstr (BF (cmpOp op) value target))
2944 cmpOp FloatGtOp = GTT
2945 cmpOp FloatGeOp = GE
2946 cmpOp FloatEqOp = EQQ
2947 cmpOp FloatNeOp = NE
2948 cmpOp FloatLtOp = LTT
2949 cmpOp FloatLeOp = LE
2950 cmpOp DoubleGtOp = GTT
2951 cmpOp DoubleGeOp = GE
2952 cmpOp DoubleEqOp = EQQ
2953 cmpOp DoubleNeOp = NE
2954 cmpOp DoubleLtOp = LTT
2955 cmpOp DoubleLeOp = LE
2957 genCondJump lbl (StPrim op [x, y])
2959 = trivialFCode pr instr x y `thenNat` \ register ->
2960 getNewRegNat FF64 `thenNat` \ tmp ->
2962 code = registerCode register tmp
2963 result = registerName register tmp
2964 target = ImmCLbl lbl
2966 return (code . mkSeqInstr (BF cond result target))
2968 pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
2970 fltCmpOp op = case op of
2984 (instr, cond) = case op of
2985 FloatGtOp -> (FCMP TF LE, EQQ)
2986 FloatGeOp -> (FCMP TF LTT, EQQ)
2987 FloatEqOp -> (FCMP TF EQQ, NE)
2988 FloatNeOp -> (FCMP TF EQQ, EQQ)
2989 FloatLtOp -> (FCMP TF LTT, NE)
2990 FloatLeOp -> (FCMP TF LE, NE)
2991 DoubleGtOp -> (FCMP TF LE, EQQ)
2992 DoubleGeOp -> (FCMP TF LTT, EQQ)
2993 DoubleEqOp -> (FCMP TF EQQ, NE)
2994 DoubleNeOp -> (FCMP TF EQQ, EQQ)
2995 DoubleLtOp -> (FCMP TF LTT, NE)
2996 DoubleLeOp -> (FCMP TF LE, NE)
2998 genCondJump lbl (StPrim op [x, y])
2999 = trivialCode instr x y `thenNat` \ register ->
3000 getNewRegNat IntRep `thenNat` \ tmp ->
3002 code = registerCode register tmp
3003 result = registerName register tmp
3004 target = ImmCLbl lbl
3006 return (code . mkSeqInstr (BI cond result target))
3008 (instr, cond) = case op of
3009 CharGtOp -> (CMP LE, EQQ)
3010 CharGeOp -> (CMP LTT, EQQ)
3011 CharEqOp -> (CMP EQQ, NE)
3012 CharNeOp -> (CMP EQQ, EQQ)
3013 CharLtOp -> (CMP LTT, NE)
3014 CharLeOp -> (CMP LE, NE)
3015 IntGtOp -> (CMP LE, EQQ)
3016 IntGeOp -> (CMP LTT, EQQ)
3017 IntEqOp -> (CMP EQQ, NE)
3018 IntNeOp -> (CMP EQQ, EQQ)
3019 IntLtOp -> (CMP LTT, NE)
3020 IntLeOp -> (CMP LE, NE)
3021 WordGtOp -> (CMP ULE, EQQ)
3022 WordGeOp -> (CMP ULT, EQQ)
3023 WordEqOp -> (CMP EQQ, NE)
3024 WordNeOp -> (CMP EQQ, EQQ)
3025 WordLtOp -> (CMP ULT, NE)
3026 WordLeOp -> (CMP ULE, NE)
3027 AddrGtOp -> (CMP ULE, EQQ)
3028 AddrGeOp -> (CMP ULT, EQQ)
3029 AddrEqOp -> (CMP EQQ, NE)
3030 AddrNeOp -> (CMP EQQ, EQQ)
3031 AddrLtOp -> (CMP ULT, NE)
3032 AddrLeOp -> (CMP ULE, NE)
3034 #endif /* alpha_TARGET_ARCH */
3036 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3038 #if i386_TARGET_ARCH
3040 genCondJump id bool = do
3041 CondCode _ cond code <- getCondCode bool
3042 return (code `snocOL` JXX cond id)
3046 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3048 #if x86_64_TARGET_ARCH
3050 genCondJump id bool = do
3051 CondCode is_float cond cond_code <- getCondCode bool
3054 return (cond_code `snocOL` JXX cond id)
3056 lbl <- getBlockIdNat
3058 -- see comment with condFltReg
3059 let code = case cond of
3065 plain_test = unitOL (
3068 or_unordered = toOL [
3072 and_ordered = toOL [
3078 return (cond_code `appOL` code)
3082 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3084 #if sparc_TARGET_ARCH
3086 genCondJump bid bool = do
3087 CondCode is_float cond code <- getCondCode bool
3092 then [NOP, BF cond False bid, NOP]
3093 else [BI cond False bid, NOP]
3097 #endif /* sparc_TARGET_ARCH */
3100 #if powerpc_TARGET_ARCH
3102 genCondJump id bool = do
3103 CondCode is_float cond code <- getCondCode bool
3104 return (code `snocOL` BCC cond id)
3106 #endif /* powerpc_TARGET_ARCH */
3109 -- -----------------------------------------------------------------------------
3110 -- Generating C calls
3112 -- Now the biggest nightmare---calls. Most of the nastiness is buried in
3113 -- @get_arg@, which moves the arguments to the correct registers/stack
3114 -- locations. Apart from that, the code is easy.
3116 -- (If applicable) Do not fill the delay slots here; you will confuse the
3117 -- register allocator.
3120 :: CmmCallTarget -- function to call
3121 -> HintedCmmFormals -- where to put the result
3122 -> HintedCmmActuals -- arguments (of mixed type)
3125 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3127 #if alpha_TARGET_ARCH
3131 genCCall fn cconv result_regs args
3132 = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args
3133 `thenNat` \ ((unused,_), argCode) ->
3135 nRegs = length allArgRegs - length unused
3136 code = asmSeqThen (map ($ []) argCode)
3139 LDA pv (AddrImm (ImmLab (ptext fn))),
3140 JSR ra (AddrReg pv) nRegs,
3141 LDGP gp (AddrReg ra)]
3143 ------------------------
3144 {- Try to get a value into a specific register (or registers) for
3145 a call. The first 6 arguments go into the appropriate
3146 argument register (separate registers for integer and floating
3147 point arguments, but used in lock-step), and the remaining
3148 arguments are dumped to the stack, beginning at 0(sp). Our
3149 first argument is a pair of the list of remaining argument
3150 registers to be assigned for this call and the next stack
3151 offset to use for overflowing arguments. This way,
3152 @get_Arg@ can be applied to all of a call's arguments using
3156 :: ([(Reg,Reg)], Int) -- Argument registers and stack offset (accumulator)
3157 -> StixTree -- Current argument
3158 -> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code
3160 -- We have to use up all of our argument registers first...
3162 get_arg ((iDst,fDst):dsts, offset) arg
3163 = getRegister arg `thenNat` \ register ->
3165 reg = if isFloatType pk then fDst else iDst
3166 code = registerCode register reg
3167 src = registerName register reg
3168 pk = registerRep register
3171 if isFloatType pk then
3172 ((dsts, offset), if isFixed register then
3173 code . mkSeqInstr (FMOV src fDst)
3176 ((dsts, offset), if isFixed register then
3177 code . mkSeqInstr (OR src (RIReg src) iDst)
3180 -- Once we have run out of argument registers, we move to the
3183 get_arg ([], offset) arg
3184 = getRegister arg `thenNat` \ register ->
3185 getNewRegNat (registerRep register)
3188 code = registerCode register tmp
3189 src = registerName register tmp
3190 pk = registerRep register
3191 sz = primRepToSize pk
3193 return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset)))
3195 #endif /* alpha_TARGET_ARCH */
3197 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3199 #if i386_TARGET_ARCH
3201 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
3202 -- write barrier compiles to no code on x86/x86-64;
3203 -- we keep it this long in order to prevent earlier optimisations.
3205 -- we only cope with a single result for foreign calls
3206 genCCall (CmmPrim op) [CmmHinted r _] args = do
3207 l1 <- getNewLabelNat
3208 l2 <- getNewLabelNat
3210 MO_F32_Sqrt -> actuallyInlineFloatOp GSQRT FF32 args
3211 MO_F64_Sqrt -> actuallyInlineFloatOp GSQRT FF64 args
3213 MO_F32_Sin -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF32 args
3214 MO_F64_Sin -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF64 args
3216 MO_F32_Cos -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF32 args
3217 MO_F64_Cos -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF64 args
3219 MO_F32_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF32 args
3220 MO_F64_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF64 args
3222 other_op -> outOfLineFloatOp op r args
3224 actuallyInlineFloatOp instr size [CmmHinted x _]
3225 = do res <- trivialUFCode size (instr size) x
3227 return (any (getRegisterReg (CmmLocal r)))
3229 genCCall target dest_regs args = do
3231 sizes = map (arg_size . cmmExprType . hintlessCmm) (reverse args)
3232 #if !darwin_TARGET_OS
3233 tot_arg_size = sum sizes
3235 raw_arg_size = sum sizes
3236 tot_arg_size = roundTo 16 raw_arg_size
3237 arg_pad_size = tot_arg_size - raw_arg_size
3238 delta0 <- getDeltaNat
3239 setDeltaNat (delta0 - arg_pad_size)
3242 push_codes <- mapM push_arg (reverse args)
3243 delta <- getDeltaNat
3246 -- deal with static vs dynamic call targets
3247 (callinsns,cconv) <-
3250 CmmCallee (CmmLit (CmmLabel lbl)) conv
3251 -> -- ToDo: stdcall arg sizes
3252 return (unitOL (CALL (Left fn_imm) []), conv)
3253 where fn_imm = ImmCLbl lbl
3255 -> do { (dyn_c, dyn_r) <- get_op expr
3256 ; ASSERT( isWord32 (cmmExprType expr) )
3257 return (dyn_c `snocOL` CALL (Right dyn_r) [], conv) }
3260 #if darwin_TARGET_OS
3262 = toOL [SUB II32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
3263 DELTA (delta0 - arg_pad_size)]
3264 `appOL` concatOL push_codes
3267 = concatOL push_codes
3268 call = callinsns `appOL`
3270 -- Deallocate parameters after call for ccall;
3271 -- but not for stdcall (callee does it)
3272 (if cconv == StdCallConv || tot_arg_size==0 then [] else
3273 [ADD II32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
3275 [DELTA (delta + tot_arg_size)]
3278 setDeltaNat (delta + tot_arg_size)
3281 -- assign the results, if necessary
3282 assign_code [] = nilOL
3283 assign_code [CmmHinted dest _hint]
3284 | isFloatType ty = unitOL (GMOV fake0 r_dest)
3285 | isWord64 ty = toOL [MOV II32 (OpReg eax) (OpReg r_dest),
3286 MOV II32 (OpReg edx) (OpReg r_dest_hi)]
3287 | otherwise = unitOL (MOV (intSize w) (OpReg eax) (OpReg r_dest))
3289 ty = localRegType dest
3291 r_dest_hi = getHiVRegFromLo r_dest
3292 r_dest = getRegisterReg (CmmLocal dest)
3293 assign_code many = panic "genCCall.assign_code many"
3295 return (push_code `appOL`
3297 assign_code dest_regs)
3300 arg_size :: CmmType -> Int -- Width in bytes
3301 arg_size ty = widthInBytes (typeWidth ty)
3303 roundTo a x | x `mod` a == 0 = x
3304 | otherwise = x + a - (x `mod` a)
3307 push_arg :: HintedCmmActual {-current argument-}
3308 -> NatM InstrBlock -- code
3310 push_arg (CmmHinted arg _hint) -- we don't need the hints on x86
3311 | isWord64 arg_ty = do
3312 ChildCode64 code r_lo <- iselExpr64 arg
3313 delta <- getDeltaNat
3314 setDeltaNat (delta - 8)
3316 r_hi = getHiVRegFromLo r_lo
3318 return ( code `appOL`
3319 toOL [PUSH II32 (OpReg r_hi), DELTA (delta - 4),
3320 PUSH II32 (OpReg r_lo), DELTA (delta - 8),
3325 (code, reg) <- get_op arg
3326 delta <- getDeltaNat
3327 let size = arg_size arg_ty -- Byte size
3328 setDeltaNat (delta-size)
3329 if (isFloatType arg_ty)
3330 then return (code `appOL`
3331 toOL [SUB II32 (OpImm (ImmInt size)) (OpReg esp),
3333 GST (floatSize (typeWidth arg_ty))
3334 reg (AddrBaseIndex (EABaseReg esp)
3338 else return (code `snocOL`
3339 PUSH II32 (OpReg reg) `snocOL`
3343 arg_ty = cmmExprType arg
3346 get_op :: CmmExpr -> NatM (InstrBlock, Reg) -- code, reg
3348 (reg,code) <- getSomeReg op
3351 #endif /* i386_TARGET_ARCH */
3353 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3355 outOfLineFloatOp :: CallishMachOp -> CmmFormal -> HintedCmmActuals
3357 outOfLineFloatOp mop res args
3359 dflags <- getDynFlagsNat
3360 targetExpr <- cmmMakeDynamicReference dflags addImportNat CallReference lbl
3361 let target = CmmCallee targetExpr CCallConv
3363 if isFloat64 (localRegType res)
3365 stmtToInstrs (CmmCall target [CmmHinted res NoHint] args CmmUnsafe CmmMayReturn)
3369 tmp = LocalReg uq f64
3371 code1 <- stmtToInstrs (CmmCall target [CmmHinted tmp NoHint] args CmmUnsafe CmmMayReturn)
3372 code2 <- stmtToInstrs (CmmAssign (CmmLocal res) (CmmReg (CmmLocal tmp)))
3373 return (code1 `appOL` code2)
3375 lbl = mkForeignLabel fn Nothing False
3378 MO_F32_Sqrt -> fsLit "sqrtf"
3379 MO_F32_Sin -> fsLit "sinf"
3380 MO_F32_Cos -> fsLit "cosf"
3381 MO_F32_Tan -> fsLit "tanf"
3382 MO_F32_Exp -> fsLit "expf"
3383 MO_F32_Log -> fsLit "logf"
3385 MO_F32_Asin -> fsLit "asinf"
3386 MO_F32_Acos -> fsLit "acosf"
3387 MO_F32_Atan -> fsLit "atanf"
3389 MO_F32_Sinh -> fsLit "sinhf"
3390 MO_F32_Cosh -> fsLit "coshf"
3391 MO_F32_Tanh -> fsLit "tanhf"
3392 MO_F32_Pwr -> fsLit "powf"
3394 MO_F64_Sqrt -> fsLit "sqrt"
3395 MO_F64_Sin -> fsLit "sin"
3396 MO_F64_Cos -> fsLit "cos"
3397 MO_F64_Tan -> fsLit "tan"
3398 MO_F64_Exp -> fsLit "exp"
3399 MO_F64_Log -> fsLit "log"
3401 MO_F64_Asin -> fsLit "asin"
3402 MO_F64_Acos -> fsLit "acos"
3403 MO_F64_Atan -> fsLit "atan"
3405 MO_F64_Sinh -> fsLit "sinh"
3406 MO_F64_Cosh -> fsLit "cosh"
3407 MO_F64_Tanh -> fsLit "tanh"
3408 MO_F64_Pwr -> fsLit "pow"
3410 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
3412 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3414 #if x86_64_TARGET_ARCH
3416 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
3417 -- write barrier compiles to no code on x86/x86-64;
3418 -- we keep it this long in order to prevent earlier optimisations.
3421 genCCall (CmmPrim op) [CmmHinted r _] args =
3422 outOfLineFloatOp op r args
3424 genCCall target dest_regs args = do
3426 -- load up the register arguments
3427 (stack_args, aregs, fregs, load_args_code)
3428 <- load_args args allArgRegs allFPArgRegs nilOL
3431 fp_regs_used = reverse (drop (length fregs) (reverse allFPArgRegs))
3432 int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
3433 arg_regs = [eax] ++ int_regs_used ++ fp_regs_used
3434 -- for annotating the call instruction with
3436 sse_regs = length fp_regs_used
3438 tot_arg_size = arg_size * length stack_args
3440 -- On entry to the called function, %rsp should be aligned
3441 -- on a 16-byte boundary +8 (i.e. the first stack arg after
3442 -- the return address is 16-byte aligned). In STG land
3443 -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
3444 -- need to make sure we push a multiple of 16-bytes of args,
3445 -- plus the return address, to get the correct alignment.
3446 -- Urg, this is hard. We need to feed the delta back into
3447 -- the arg pushing code.
3448 (real_size, adjust_rsp) <-
3449 if tot_arg_size `rem` 16 == 0
3450 then return (tot_arg_size, nilOL)
3451 else do -- we need to adjust...
3452 delta <- getDeltaNat
3453 setDeltaNat (delta-8)
3454 return (tot_arg_size+8, toOL [
3455 SUB II64 (OpImm (ImmInt 8)) (OpReg rsp),
3459 -- push the stack args, right to left
3460 push_code <- push_args (reverse stack_args) nilOL
3461 delta <- getDeltaNat
3463 -- deal with static vs dynamic call targets
3464 (callinsns,cconv) <-
3467 CmmCallee (CmmLit (CmmLabel lbl)) conv
3468 -> -- ToDo: stdcall arg sizes
3469 return (unitOL (CALL (Left fn_imm) arg_regs), conv)
3470 where fn_imm = ImmCLbl lbl
3472 -> do (dyn_r, dyn_c) <- getSomeReg expr
3473 return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
3476 -- The x86_64 ABI requires us to set %al to the number of SSE
3477 -- registers that contain arguments, if the called routine
3478 -- is a varargs function. We don't know whether it's a
3479 -- varargs function or not, so we have to assume it is.
3481 -- It's not safe to omit this assignment, even if the number
3482 -- of SSE regs in use is zero. If %al is larger than 8
3483 -- on entry to a varargs function, seg faults ensue.
3484 assign_eax n = unitOL (MOV II32 (OpImm (ImmInt n)) (OpReg eax))
3486 let call = callinsns `appOL`
3488 -- Deallocate parameters after call for ccall;
3489 -- but not for stdcall (callee does it)
3490 (if cconv == StdCallConv || real_size==0 then [] else
3491 [ADD (intSize wordWidth) (OpImm (ImmInt real_size)) (OpReg esp)])
3493 [DELTA (delta + real_size)]
3496 setDeltaNat (delta + real_size)
3499 -- assign the results, if necessary
3500 assign_code [] = nilOL
3501 assign_code [CmmHinted dest _hint] =
3502 case typeWidth rep of
3503 W32 | isFloatType rep -> unitOL (MOV (floatSize W32) (OpReg xmm0) (OpReg r_dest))
3504 W64 | isFloatType rep -> unitOL (MOV (floatSize W32) (OpReg xmm0) (OpReg r_dest))
3505 _ -> unitOL (MOV (cmmTypeSize rep) (OpReg rax) (OpReg r_dest))
3507 rep = localRegType dest
3508 r_dest = getRegisterReg (CmmLocal dest)
3509 assign_code many = panic "genCCall.assign_code many"
3511 return (load_args_code `appOL`
3514 assign_eax sse_regs `appOL`
3516 assign_code dest_regs)
3519 arg_size = 8 -- always, at the mo
3521 load_args :: [CmmHinted CmmExpr]
3522 -> [Reg] -- int regs avail for args
3523 -> [Reg] -- FP regs avail for args
3525 -> NatM ([CmmHinted CmmExpr],[Reg],[Reg],InstrBlock)
3526 load_args args [] [] code = return (args, [], [], code)
3527 -- no more regs to use
3528 load_args [] aregs fregs code = return ([], aregs, fregs, code)
3529 -- no more args to push
3530 load_args ((CmmHinted arg hint) : rest) aregs fregs code
3531 | isFloatType arg_rep =
3535 arg_code <- getAnyReg arg
3536 load_args rest aregs rs (code `appOL` arg_code r)
3541 arg_code <- getAnyReg arg
3542 load_args rest rs fregs (code `appOL` arg_code r)
3544 arg_rep = cmmExprType arg
3547 (args',ars,frs,code') <- load_args rest aregs fregs code
3548 return ((CmmHinted arg hint):args', ars, frs, code')
3550 push_args [] code = return code
3551 push_args ((CmmHinted arg hint):rest) code
3552 | isFloatType arg_rep = do
3553 (arg_reg, arg_code) <- getSomeReg arg
3554 delta <- getDeltaNat
3555 setDeltaNat (delta-arg_size)
3556 let code' = code `appOL` arg_code `appOL` toOL [
3557 SUB (intSize wordWidth) (OpImm (ImmInt arg_size)) (OpReg rsp) ,
3558 DELTA (delta-arg_size),
3559 MOV (floatSize width) (OpReg arg_reg) (OpAddr (spRel 0))]
3560 push_args rest code'
3563 -- we only ever generate word-sized function arguments. Promotion
3564 -- has already happened: our Int8# type is kept sign-extended
3565 -- in an Int#, for example.
3566 ASSERT(width == W64) return ()
3567 (arg_op, arg_code) <- getOperand arg
3568 delta <- getDeltaNat
3569 setDeltaNat (delta-arg_size)
3570 let code' = code `appOL` arg_code `appOL` toOL [
3572 DELTA (delta-arg_size)]
3573 push_args rest code'
3575 arg_rep = cmmExprType arg
3576 width = typeWidth arg_rep
3579 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3581 #if sparc_TARGET_ARCH
3583 The SPARC calling convention is an absolute
3584 nightmare. The first 6x32 bits of arguments are mapped into
3585 %o0 through %o5, and the remaining arguments are dumped to the
3586 stack, beginning at [%sp+92]. (Note that %o6 == %sp.)
3588 If we have to put args on the stack, move %o6==%sp down by
3589 the number of words to go on the stack, to ensure there's enough space.
3591 According to Fraser and Hanson's lcc book, page 478, fig 17.2,
3592 16 words above the stack pointer is a word for the address of
3593 a structure return value. I use this as a temporary location
3594 for moving values from float to int regs. Certainly it isn't
3595 safe to put anything in the 16 words starting at %sp, since
3596 this area can get trashed at any time due to window overflows
3597 caused by signal handlers.
3599 A final complication (if the above isn't enough) is that
3600 we can't blithely calculate the arguments one by one into
3601 %o0 .. %o5. Consider the following nested calls:
3605 Naive code moves a into %o0, and (fff b c) into %o1. Unfortunately
3606 the inner call will itself use %o0, which trashes the value put there
3607 in preparation for the outer call. Upshot: we need to calculate the
3608 args into temporary regs, and move those to arg regs or onto the
3609 stack only immediately prior to the call proper. Sigh.
3612 :: CmmCallTarget -- function to call
3613 -> HintedCmmFormals -- where to put the result
3614 -> HintedCmmActuals -- arguments (of mixed type)
3619 genCCall target dest_regs argsAndHints
3621 -- strip hints from the arg regs
3622 let args :: [CmmExpr]
3623 args = map hintlessCmm argsAndHints
3626 -- work out the arguments, and assign them to integer regs
3627 argcode_and_vregs <- mapM arg_to_int_vregs args
3628 let (argcodes, vregss) = unzip argcode_and_vregs
3629 let vregs = concat vregss
3631 let n_argRegs = length allArgRegs
3632 let n_argRegs_used = min (length vregs) n_argRegs
3635 -- deal with static vs dynamic call targets
3636 callinsns <- case target of
3637 CmmCallee (CmmLit (CmmLabel lbl)) conv ->
3638 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3641 -> do (dyn_c, [dyn_r]) <- arg_to_int_vregs expr
3642 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3645 -> do res <- outOfLineFloatOp mop
3646 lblOrMopExpr <- case res of
3648 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3651 (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
3652 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3656 let argcode = concatOL argcodes
3658 let (move_sp_down, move_sp_up)
3659 = let diff = length vregs - n_argRegs
3660 nn = if odd diff then diff + 1 else diff -- keep 8-byte alignment
3663 else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
3666 = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
3670 move_sp_down `appOL`
3671 transfer_code `appOL`
3675 assign_code dest_regs
3678 -- | Generate code to calculate an argument, and move it into one
3679 -- or two integer vregs.
3680 arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
3681 arg_to_int_vregs arg
3683 -- If the expr produces a 64 bit int, then we can just use iselExpr64
3684 | isWord64 (cmmExprType arg)
3685 = do (ChildCode64 code r_lo) <- iselExpr64 arg
3686 let r_hi = getHiVRegFromLo r_lo
3687 return (code, [r_hi, r_lo])
3690 = do (src, code) <- getSomeReg arg
3691 tmp <- getNewRegNat (cmmTypeSize $ cmmExprType arg)
3692 let pk = cmmExprType arg
3694 case cmmTypeSize pk of
3696 -- Load a 64 bit float return value into two integer regs.
3698 v1 <- getNewRegNat II32
3699 v2 <- getNewRegNat II32
3701 let Just f0_high = fPair f0
3705 FMOV FF64 src f0 `snocOL`
3706 ST FF32 f0 (spRel 16) `snocOL`
3707 LD II32 (spRel 16) v1 `snocOL`
3708 ST FF32 f0_high (spRel 16) `snocOL`
3709 LD II32 (spRel 16) v2
3711 return (code2, [v1,v2])
3713 -- Load a 32 bit float return value into an integer reg
3715 v1 <- getNewRegNat II32
3719 ST FF32 src (spRel 16) `snocOL`
3720 LD II32 (spRel 16) v1
3722 return (code2, [v1])
3724 -- Move an integer return value into its destination reg.
3726 v1 <- getNewRegNat II32
3730 OR False g0 (RIReg src) v1
3732 return (code2, [v1])
3735 -- | Move args from the integer vregs into which they have been
3736 -- marshalled, into %o0 .. %o5, and the rest onto the stack.
3738 move_final :: [Reg] -> [Reg] -> Int -> [Instr]
3741 move_final [] _ offset
3744 -- out of aregs; move to stack
3745 move_final (v:vs) [] offset
3746 = ST II32 v (spRel offset)
3747 : move_final vs [] (offset+1)
3749 -- move into an arg (%o[0..5]) reg
3750 move_final (v:vs) (a:az) offset
3751 = OR False g0 (RIReg v) a
3752 : move_final vs az offset
3755 -- | Assign results returned from the call into their
3758 assign_code :: [CmmHinted LocalReg] -> OrdList Instr
3759 assign_code [] = nilOL
3761 assign_code [CmmHinted dest _hint]
3762 = let rep = localRegType dest
3763 width = typeWidth rep
3764 r_dest = getRegisterReg (CmmLocal dest)
3769 = unitOL $ FMOV FF32 (RealReg $ fReg 0) r_dest
3773 = unitOL $ FMOV FF64 (RealReg $ fReg 0) r_dest
3775 | not $ isFloatType rep
3777 = unitOL $ mkRegRegMoveInstr (RealReg $ oReg 0) r_dest
3779 | not $ isFloatType rep
3781 , r_dest_hi <- getHiVRegFromLo r_dest
3782 = toOL [ mkRegRegMoveInstr (RealReg $ oReg 0) r_dest_hi
3783 , mkRegRegMoveInstr (RealReg $ oReg 1) r_dest]
3787 -- | Generate a call to implement an out-of-line floating point operation
3790 -> NatM (Either CLabel CmmExpr)
3792 outOfLineFloatOp mop
3793 = do let functionName
3794 = outOfLineFloatOp_table mop
3796 dflags <- getDynFlagsNat
3797 mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference
3798 $ mkForeignLabel functionName Nothing True
3802 CmmLit (CmmLabel lbl) -> Left lbl
3805 return mopLabelOrExpr
3808 -- | Decide what C function to use to implement a CallishMachOp
3810 outOfLineFloatOp_table
3814 outOfLineFloatOp_table mop
3816 MO_F32_Exp -> fsLit "expf"
3817 MO_F32_Log -> fsLit "logf"
3818 MO_F32_Sqrt -> fsLit "sqrtf"
3820 MO_F32_Sin -> fsLit "sinf"
3821 MO_F32_Cos -> fsLit "cosf"
3822 MO_F32_Tan -> fsLit "tanf"
3824 MO_F32_Asin -> fsLit "asinf"
3825 MO_F32_Acos -> fsLit "acosf"
3826 MO_F32_Atan -> fsLit "atanf"
3828 MO_F32_Sinh -> fsLit "sinhf"
3829 MO_F32_Cosh -> fsLit "coshf"
3830 MO_F32_Tanh -> fsLit "tanhf"
3832 MO_F64_Exp -> fsLit "exp"
3833 MO_F64_Log -> fsLit "log"
3834 MO_F64_Sqrt -> fsLit "sqrt"
3836 MO_F64_Sin -> fsLit "sin"
3837 MO_F64_Cos -> fsLit "cos"
3838 MO_F64_Tan -> fsLit "tan"
3840 MO_F64_Asin -> fsLit "asin"
3841 MO_F64_Acos -> fsLit "acos"
3842 MO_F64_Atan -> fsLit "atan"
3844 MO_F64_Sinh -> fsLit "sinh"
3845 MO_F64_Cosh -> fsLit "cosh"
3846 MO_F64_Tanh -> fsLit "tanh"
3848 other -> pprPanic "outOfLineFloatOp(sparc): Unknown callish mach op "
3849 (pprCallishMachOp mop)
3852 #endif /* sparc_TARGET_ARCH */
3854 #if powerpc_TARGET_ARCH
3856 #if darwin_TARGET_OS || linux_TARGET_OS
3858 The PowerPC calling convention for Darwin/Mac OS X
3859 is described in Apple's document
3860 "Inside Mac OS X - Mach-O Runtime Architecture".
3862 PowerPC Linux uses the System V Release 4 Calling Convention
3863 for PowerPC. It is described in the
3864 "System V Application Binary Interface PowerPC Processor Supplement".
3866 Both conventions are similar:
3867 Parameters may be passed in general-purpose registers starting at r3, in
3868 floating point registers starting at f1, or on the stack.
3870 But there are substantial differences:
3871 * The number of registers used for parameter passing and the exact set of
3872 nonvolatile registers differs (see MachRegs.lhs).
3873 * On Darwin, stack space is always reserved for parameters, even if they are
3874 passed in registers. The called routine may choose to save parameters from
3875 registers to the corresponding space on the stack.
3876 * On Darwin, a corresponding amount of GPRs is skipped when a floating point
3877 parameter is passed in an FPR.
3878 * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
3879 starting with an odd-numbered GPR. It may skip a GPR to achieve this.
3880 Darwin just treats an I64 like two separate II32s (high word first).
3881 * I64 and FF64 arguments are 8-byte aligned on the stack for SysV, but only
3882 4-byte aligned like everything else on Darwin.
3883 * The SysV spec claims that FF32 is represented as FF64 on the stack. GCC on
3884 PowerPC Linux does not agree, so neither do we.
3886 According to both conventions, The parameter area should be part of the
3887 caller's stack frame, allocated in the caller's prologue code (large enough
3888 to hold the parameter lists for all called routines). The NCG already
3889 uses the stack for register spilling, leaving 64 bytes free at the top.
3890 If we need a larger parameter area than that, we just allocate a new stack
3891 frame just before ccalling.
3895 genCCall (CmmPrim MO_WriteBarrier) _ _
3896 = return $ unitOL LWSYNC
3898 genCCall target dest_regs argsAndHints
3899 = ASSERT (not $ any (`elem` [II8,II16]) $ map cmmTypeSize argReps)
3900 -- we rely on argument promotion in the codeGen
3902 (finalStack,passArgumentsCode,usedRegs) <- passArguments
3904 allArgRegs allFPArgRegs
3908 (labelOrExpr, reduceToFF32) <- case target of
3909 CmmCallee (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
3910 CmmCallee expr conv -> return (Right expr, False)
3911 CmmPrim mop -> outOfLineFloatOp mop
3913 let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
3914 codeAfter = move_sp_up finalStack `appOL` moveResult reduceToFF32
3919 `snocOL` BL lbl usedRegs
3922 (dynReg, dynCode) <- getSomeReg dyn
3924 `snocOL` MTCTR dynReg
3926 `snocOL` BCTRL usedRegs
3929 #if darwin_TARGET_OS
3930 initialStackOffset = 24
3931 -- size of linkage area + size of arguments, in bytes
3932 stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
3933 map (widthInBytes . typeWidth) argReps
3934 #elif linux_TARGET_OS
3935 initialStackOffset = 8
3936 stackDelta finalStack = roundTo 16 finalStack
3938 args = map hintlessCmm argsAndHints
3939 argReps = map cmmExprType args
3941 roundTo a x | x `mod` a == 0 = x
3942 | otherwise = x + a - (x `mod` a)
3944 move_sp_down finalStack
3946 toOL [STU II32 sp (AddrRegImm sp (ImmInt (-delta))),
3949 where delta = stackDelta finalStack
3950 move_sp_up finalStack
3952 toOL [ADD sp sp (RIImm (ImmInt delta)),
3955 where delta = stackDelta finalStack
3958 passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
3959 passArguments ((arg,arg_ty):args) gprs fprs stackOffset
3960 accumCode accumUsed | isWord64 arg_ty =
3962 ChildCode64 code vr_lo <- iselExpr64 arg
3963 let vr_hi = getHiVRegFromLo vr_lo
3965 #if darwin_TARGET_OS
3970 (accumCode `appOL` code
3971 `snocOL` storeWord vr_hi gprs stackOffset
3972 `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
3973 ((take 2 gprs) ++ accumUsed)
3975 storeWord vr (gpr:_) offset = MR gpr vr
3976 storeWord vr [] offset = ST II32 vr (AddrRegImm sp (ImmInt offset))
3978 #elif linux_TARGET_OS
3979 let stackOffset' = roundTo 8 stackOffset
3980 stackCode = accumCode `appOL` code
3981 `snocOL` ST II32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
3982 `snocOL` ST II32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
3983 regCode hireg loreg =
3984 accumCode `appOL` code
3985 `snocOL` MR hireg vr_hi
3986 `snocOL` MR loreg vr_lo
3989 hireg : loreg : regs | even (length gprs) ->
3990 passArguments args regs fprs stackOffset
3991 (regCode hireg loreg) (hireg : loreg : accumUsed)
3992 _skipped : hireg : loreg : regs ->
3993 passArguments args regs fprs stackOffset
3994 (regCode hireg loreg) (hireg : loreg : accumUsed)
3995 _ -> -- only one or no regs left
3996 passArguments args [] fprs (stackOffset'+8)
4000 passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
4001 | reg : _ <- regs = do
4002 register <- getRegister arg
4003 let code = case register of
4004 Fixed _ freg fcode -> fcode `snocOL` MR reg freg
4005 Any _ acode -> acode reg
4009 #if darwin_TARGET_OS
4010 -- The Darwin ABI requires that we reserve stack slots for register parameters
4011 (stackOffset + stackBytes)
4012 #elif linux_TARGET_OS
4013 -- ... the SysV ABI doesn't.
4016 (accumCode `appOL` code)
4019 (vr, code) <- getSomeReg arg
4023 (stackOffset' + stackBytes)
4024 (accumCode `appOL` code `snocOL` ST (cmmTypeSize rep) vr stackSlot)
4027 #if darwin_TARGET_OS
4028 -- stackOffset is at least 4-byte aligned
4029 -- The Darwin ABI is happy with that.
4030 stackOffset' = stackOffset
4032 -- ... the SysV ABI requires 8-byte alignment for doubles.
4033 stackOffset' | isFloatType rep && typeWidth rep == W64 =
4034 roundTo 8 stackOffset
4035 | otherwise = stackOffset
4037 stackSlot = AddrRegImm sp (ImmInt stackOffset')
4038 (nGprs, nFprs, stackBytes, regs) = case cmmTypeSize rep of
4039 II32 -> (1, 0, 4, gprs)
4040 #if darwin_TARGET_OS
4041 -- The Darwin ABI requires that we skip a corresponding number of GPRs when
4043 FF32 -> (1, 1, 4, fprs)
4044 FF64 -> (2, 1, 8, fprs)
4045 #elif linux_TARGET_OS
4046 -- ... the SysV ABI doesn't.
4047 FF32 -> (0, 1, 4, fprs)
4048 FF64 -> (0, 1, 8, fprs)
4051 moveResult reduceToFF32 =
4054 [CmmHinted dest _hint]
4055 | reduceToFF32 && isFloat32 rep -> unitOL (FRSP r_dest f1)
4056 | isFloat32 rep || isFloat64 rep -> unitOL (MR r_dest f1)
4057 | isWord64 rep -> toOL [MR (getHiVRegFromLo r_dest) r3,
4059 | otherwise -> unitOL (MR r_dest r3)
4060 where rep = cmmRegType (CmmLocal dest)
4061 r_dest = getRegisterReg (CmmLocal dest)
4063 outOfLineFloatOp mop =
4065 dflags <- getDynFlagsNat
4066 mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference $
4067 mkForeignLabel functionName Nothing True
4068 let mopLabelOrExpr = case mopExpr of
4069 CmmLit (CmmLabel lbl) -> Left lbl
4071 return (mopLabelOrExpr, reduce)
4073 (functionName, reduce) = case mop of
4074 MO_F32_Exp -> (fsLit "exp", True)
4075 MO_F32_Log -> (fsLit "log", True)
4076 MO_F32_Sqrt -> (fsLit "sqrt", True)
4078 MO_F32_Sin -> (fsLit "sin", True)
4079 MO_F32_Cos -> (fsLit "cos", True)
4080 MO_F32_Tan -> (fsLit "tan", True)
4082 MO_F32_Asin -> (fsLit "asin", True)
4083 MO_F32_Acos -> (fsLit "acos", True)
4084 MO_F32_Atan -> (fsLit "atan", True)
4086 MO_F32_Sinh -> (fsLit "sinh", True)
4087 MO_F32_Cosh -> (fsLit "cosh", True)
4088 MO_F32_Tanh -> (fsLit "tanh", True)
4089 MO_F32_Pwr -> (fsLit "pow", True)
4091 MO_F64_Exp -> (fsLit "exp", False)
4092 MO_F64_Log -> (fsLit "log", False)
4093 MO_F64_Sqrt -> (fsLit "sqrt", False)
4095 MO_F64_Sin -> (fsLit "sin", False)
4096 MO_F64_Cos -> (fsLit "cos", False)
4097 MO_F64_Tan -> (fsLit "tan", False)
4099 MO_F64_Asin -> (fsLit "asin", False)
4100 MO_F64_Acos -> (fsLit "acos", False)
4101 MO_F64_Atan -> (fsLit "atan", False)
4103 MO_F64_Sinh -> (fsLit "sinh", False)
4104 MO_F64_Cosh -> (fsLit "cosh", False)
4105 MO_F64_Tanh -> (fsLit "tanh", False)
4106 MO_F64_Pwr -> (fsLit "pow", False)
4107 other -> pprPanic "genCCall(ppc): unknown callish op"
4108 (pprCallishMachOp other)
4110 #endif /* darwin_TARGET_OS || linux_TARGET_OS */
4112 #endif /* powerpc_TARGET_ARCH */
4115 -- -----------------------------------------------------------------------------
4116 -- Generating a table-branch
4118 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
4120 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4124 (reg,e_code) <- getSomeReg expr
4125 lbl <- getNewLabelNat
4126 dflags <- getDynFlagsNat
4127 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
4128 (tableReg,t_code) <- getSomeReg $ dynRef
4130 jumpTable = map jumpTableEntryRel ids
4132 jumpTableEntryRel Nothing
4133 = CmmStaticLit (CmmInt 0 wordWidth)
4134 jumpTableEntryRel (Just (BlockId id))
4135 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
4136 where blockLabel = mkAsmTempLabel id
4138 op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
4139 (EAIndex reg wORD_SIZE) (ImmInt 0))
4141 #if x86_64_TARGET_ARCH
4142 #if darwin_TARGET_OS
4143 -- on Mac OS X/x86_64, put the jump table in the text section
4144 -- to work around a limitation of the linker.
4145 -- ld64 is unable to handle the relocations for
4147 -- if L0 is not preceded by a non-anonymous label in its section.
4149 code = e_code `appOL` t_code `appOL` toOL [
4150 ADD (intSize wordWidth) op (OpReg tableReg),
4151 JMP_TBL (OpReg tableReg) [ id | Just id <- ids ],
4152 LDATA Text (CmmDataLabel lbl : jumpTable)
4155 -- HACK: On x86_64 binutils<2.17 is only able to generate PC32
4156 -- relocations, hence we only get 32-bit offsets in the jump
4157 -- table. As these offsets are always negative we need to properly
4158 -- sign extend them to 64-bit. This hack should be removed in
4159 -- conjunction with the hack in PprMach.hs/pprDataItem once
4160 -- binutils 2.17 is standard.
4161 code = e_code `appOL` t_code `appOL` toOL [
4162 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
4164 (OpAddr (AddrBaseIndex (EABaseReg tableReg)
4165 (EAIndex reg wORD_SIZE) (ImmInt 0)))
4167 ADD (intSize wordWidth) (OpReg reg) (OpReg tableReg),
4168 JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
4172 code = e_code `appOL` t_code `appOL` toOL [
4173 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
4174 ADD (intSize wordWidth) op (OpReg tableReg),
4175 JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
4181 (reg,e_code) <- getSomeReg expr
4182 lbl <- getNewLabelNat
4184 jumpTable = map jumpTableEntry ids
4185 op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
4186 code = e_code `appOL` toOL [
4187 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
4188 JMP_TBL op [ id | Just id <- ids ]
4192 #elif powerpc_TARGET_ARCH
4196 (reg,e_code) <- getSomeReg expr
4197 tmp <- getNewRegNat II32
4198 lbl <- getNewLabelNat
4199 dflags <- getDynFlagsNat
4200 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
4201 (tableReg,t_code) <- getSomeReg $ dynRef
4203 jumpTable = map jumpTableEntryRel ids
4205 jumpTableEntryRel Nothing
4206 = CmmStaticLit (CmmInt 0 wordWidth)
4207 jumpTableEntryRel (Just (BlockId id))
4208 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
4209 where blockLabel = mkAsmTempLabel id
4211 code = e_code `appOL` t_code `appOL` toOL [
4212 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
4213 SLW tmp reg (RIImm (ImmInt 2)),
4214 LD II32 tmp (AddrRegReg tableReg tmp),
4215 ADD tmp tmp (RIReg tableReg),
4217 BCTR [ id | Just id <- ids ]
4222 (reg,e_code) <- getSomeReg expr
4223 tmp <- getNewRegNat II32
4224 lbl <- getNewLabelNat
4226 jumpTable = map jumpTableEntry ids
4228 code = e_code `appOL` toOL [
4229 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
4230 SLW tmp reg (RIImm (ImmInt 2)),
4231 ADDIS tmp tmp (HA (ImmCLbl lbl)),
4232 LD II32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
4234 BCTR [ id | Just id <- ids ]
4237 #elif sparc_TARGET_ARCH
4240 = error "MachCodeGen: sparc genSwitch PIC not finished\n"
4243 = do (e_reg, e_code) <- getSomeReg expr
4245 base_reg <- getNewRegNat II32
4246 offset_reg <- getNewRegNat II32
4247 dst <- getNewRegNat II32
4249 label <- getNewLabelNat
4250 let jumpTable = map jumpTableEntry ids
4252 return $ e_code `appOL`
4255 [ LDATA ReadOnlyData (CmmDataLabel label : jumpTable)
4257 -- load base of jump table
4258 , SETHI (HI (ImmCLbl label)) base_reg
4259 , OR False base_reg (RIImm $ LO $ ImmCLbl label) base_reg
4261 -- the addrs in the table are 32 bits wide..
4262 , SLL e_reg (RIImm $ ImmInt 2) offset_reg
4264 -- load and jump to the destination
4265 , LD II32 (AddrRegReg base_reg offset_reg) dst
4266 , JMP (AddrRegImm dst (ImmInt 0))
4270 #error "ToDo: genSwitch"
4274 -- | Convert a BlockId to some CmmStatic data
4275 jumpTableEntry :: Maybe BlockId -> CmmStatic
4276 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordWidth)
4277 jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
4278 where blockLabel = mkAsmTempLabel id
4280 -- -----------------------------------------------------------------------------
4282 -- -----------------------------------------------------------------------------
4285 -- -----------------------------------------------------------------------------
4286 -- 'condIntReg' and 'condFltReg': condition codes into registers
4288 -- Turn those condition codes into integers now (when they appear on
4289 -- the right hand side of an assignment).
4291 -- (If applicable) Do not fill the delay slots here; you will confuse the
4292 -- register allocator.
4294 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
4296 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4298 #if alpha_TARGET_ARCH
4299 condIntReg = panic "MachCode.condIntReg (not on Alpha)"
4300 condFltReg = panic "MachCode.condFltReg (not on Alpha)"
4301 #endif /* alpha_TARGET_ARCH */
4303 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4305 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4307 condIntReg cond x y = do
4308 CondCode _ cond cond_code <- condIntCode cond x y
4309 tmp <- getNewRegNat II8
4311 code dst = cond_code `appOL` toOL [
4312 SETCC cond (OpReg tmp),
4313 MOVZxL II8 (OpReg tmp) (OpReg dst)
4316 return (Any II32 code)
4320 #if i386_TARGET_ARCH
4322 condFltReg cond x y = do
4323 CondCode _ cond cond_code <- condFltCode cond x y
4324 tmp <- getNewRegNat II8
4326 code dst = cond_code `appOL` toOL [
4327 SETCC cond (OpReg tmp),
4328 MOVZxL II8 (OpReg tmp) (OpReg dst)
4331 return (Any II32 code)
4335 #if x86_64_TARGET_ARCH
4337 condFltReg cond x y = do
4338 CondCode _ cond cond_code <- condFltCode cond x y
4339 tmp1 <- getNewRegNat wordSize
4340 tmp2 <- getNewRegNat wordSize
4342 -- We have to worry about unordered operands (eg. comparisons
4343 -- against NaN). If the operands are unordered, the comparison
4344 -- sets the parity flag, carry flag and zero flag.
4345 -- All comparisons are supposed to return false for unordered
4346 -- operands except for !=, which returns true.
4348 -- Optimisation: we don't have to test the parity flag if we
4349 -- know the test has already excluded the unordered case: eg >
4350 -- and >= test for a zero carry flag, which can only occur for
4351 -- ordered operands.
4353 -- ToDo: by reversing comparisons we could avoid testing the
4354 -- parity flag in more cases.
4359 NE -> or_unordered dst
4360 GU -> plain_test dst
4361 GEU -> plain_test dst
4362 _ -> and_ordered dst)
4364 plain_test dst = toOL [
4365 SETCC cond (OpReg tmp1),
4366 MOVZxL II8 (OpReg tmp1) (OpReg dst)
4368 or_unordered dst = toOL [
4369 SETCC cond (OpReg tmp1),
4370 SETCC PARITY (OpReg tmp2),
4371 OR II8 (OpReg tmp1) (OpReg tmp2),
4372 MOVZxL II8 (OpReg tmp2) (OpReg dst)
4374 and_ordered dst = toOL [
4375 SETCC cond (OpReg tmp1),
4376 SETCC NOTPARITY (OpReg tmp2),
4377 AND II8 (OpReg tmp1) (OpReg tmp2),
4378 MOVZxL II8 (OpReg tmp2) (OpReg dst)
4381 return (Any II32 code)
4385 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4387 #if sparc_TARGET_ARCH
4389 condIntReg EQQ x (CmmLit (CmmInt 0 d)) = do
4390 (src, code) <- getSomeReg x
4391 tmp <- getNewRegNat II32
4393 code__2 dst = code `appOL` toOL [
4394 SUB False True g0 (RIReg src) g0,
4395 SUB True False g0 (RIImm (ImmInt (-1))) dst]
4396 return (Any II32 code__2)
4398 condIntReg EQQ x y = do
4399 (src1, code1) <- getSomeReg x
4400 (src2, code2) <- getSomeReg y
4401 tmp1 <- getNewRegNat II32
4402 tmp2 <- getNewRegNat II32
4404 code__2 dst = code1 `appOL` code2 `appOL` toOL [
4405 XOR False src1 (RIReg src2) dst,
4406 SUB False True g0 (RIReg dst) g0,
4407 SUB True False g0 (RIImm (ImmInt (-1))) dst]
4408 return (Any II32 code__2)
4410 condIntReg NE x (CmmLit (CmmInt 0 d)) = do
4411 (src, code) <- getSomeReg x
4412 tmp <- getNewRegNat II32
4414 code__2 dst = code `appOL` toOL [
4415 SUB False True g0 (RIReg src) g0,
4416 ADD True False g0 (RIImm (ImmInt 0)) dst]
4417 return (Any II32 code__2)
4419 condIntReg NE x y = do
4420 (src1, code1) <- getSomeReg x
4421 (src2, code2) <- getSomeReg y
4422 tmp1 <- getNewRegNat II32
4423 tmp2 <- getNewRegNat II32
4425 code__2 dst = code1 `appOL` code2 `appOL` toOL [
4426 XOR False src1 (RIReg src2) dst,
4427 SUB False True g0 (RIReg dst) g0,
4428 ADD True False g0 (RIImm (ImmInt 0)) dst]
4429 return (Any II32 code__2)
4431 condIntReg cond x y = do
4432 bid1@(BlockId lbl1) <- getBlockIdNat
4433 bid2@(BlockId lbl2) <- getBlockIdNat
4434 CondCode _ cond cond_code <- condIntCode cond x y
4436 code__2 dst = cond_code `appOL` toOL [
4437 BI cond False bid1, NOP,
4438 OR False g0 (RIImm (ImmInt 0)) dst,
4439 BI ALWAYS False bid2, NOP,
4441 OR False g0 (RIImm (ImmInt 1)) dst,
4443 return (Any II32 code__2)
4445 condFltReg cond x y = do
4446 bid1@(BlockId lbl1) <- getBlockIdNat
4447 bid2@(BlockId lbl2) <- getBlockIdNat
4448 CondCode _ cond cond_code <- condFltCode cond x y
4450 code__2 dst = cond_code `appOL` toOL [
4452 BF cond False bid1, NOP,
4453 OR False g0 (RIImm (ImmInt 0)) dst,
4454 BI ALWAYS False bid2, NOP,
4456 OR False g0 (RIImm (ImmInt 1)) dst,
4458 return (Any II32 code__2)
4460 #endif /* sparc_TARGET_ARCH */
4462 #if powerpc_TARGET_ARCH
4463 condReg getCond = do
4464 lbl1 <- getBlockIdNat
4465 lbl2 <- getBlockIdNat
4466 CondCode _ cond cond_code <- getCond
4468 {- code dst = cond_code `appOL` toOL [
4477 code dst = cond_code
4481 RLWINM dst dst (bit + 1) 31 31
4484 negate_code | do_negate = unitOL (CRNOR bit bit bit)
4487 (bit, do_negate) = case cond of
4501 return (Any II32 code)
4503 condIntReg cond x y = condReg (condIntCode cond x y)
4504 condFltReg cond x y = condReg (condFltCode cond x y)
4505 #endif /* powerpc_TARGET_ARCH */
4508 -- -----------------------------------------------------------------------------
4509 -- 'trivial*Code': deal with trivial instructions
4511 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
4512 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
4513 -- Only look for constants on the right hand side, because that's
4514 -- where the generic optimizer will have put them.
4516 -- Similarly, for unary instructions, we don't have to worry about
4517 -- matching an StInt as the argument, because genericOpt will already
4518 -- have handled the constant-folding.
4521 :: Width -- Int only
4522 -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
4523 ,IF_ARCH_i386 ((Operand -> Operand -> Instr)
4524 -> Maybe (Operand -> Operand -> Instr)
4525 ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr)
4526 -> Maybe (Operand -> Operand -> Instr)
4527 ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
4528 ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
4530 -> CmmExpr -> CmmExpr -- the two arguments
4533 #ifndef powerpc_TARGET_ARCH
4535 :: Width -- Floating point only
4536 -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
4537 ,IF_ARCH_sparc((Size -> Reg -> Reg -> Reg -> Instr)
4538 ,IF_ARCH_i386 ((Size -> Reg -> Reg -> Reg -> Instr)
4539 ,IF_ARCH_x86_64 ((Size -> Operand -> Operand -> Instr)
4541 -> CmmExpr -> CmmExpr -- the two arguments
4547 -> IF_ARCH_alpha((RI -> Reg -> Instr)
4548 ,IF_ARCH_i386 ((Operand -> Instr)
4549 ,IF_ARCH_x86_64 ((Operand -> Instr)
4550 ,IF_ARCH_sparc((RI -> Reg -> Instr)
4551 ,IF_ARCH_powerpc((Reg -> Reg -> Instr)
4553 -> CmmExpr -- the one argument
4556 #ifndef powerpc_TARGET_ARCH
4559 -> IF_ARCH_alpha((Reg -> Reg -> Instr)
4560 ,IF_ARCH_i386 ((Reg -> Reg -> Instr)
4561 ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr)
4562 ,IF_ARCH_sparc((Reg -> Reg -> Instr)
4564 -> CmmExpr -- the one argument
4568 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4570 #if alpha_TARGET_ARCH
4572 trivialCode instr x (StInt y)
4574 = getRegister x `thenNat` \ register ->
4575 getNewRegNat IntRep `thenNat` \ tmp ->
4577 code = registerCode register tmp
4578 src1 = registerName register tmp
4579 src2 = ImmInt (fromInteger y)
4580 code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst)
4582 return (Any IntRep code__2)
4584 trivialCode instr x y
4585 = getRegister x `thenNat` \ register1 ->
4586 getRegister y `thenNat` \ register2 ->
4587 getNewRegNat IntRep `thenNat` \ tmp1 ->
4588 getNewRegNat IntRep `thenNat` \ tmp2 ->
4590 code1 = registerCode register1 tmp1 []
4591 src1 = registerName register1 tmp1
4592 code2 = registerCode register2 tmp2 []
4593 src2 = registerName register2 tmp2
4594 code__2 dst = asmSeqThen [code1, code2] .
4595 mkSeqInstr (instr src1 (RIReg src2) dst)
4597 return (Any IntRep code__2)
4600 trivialUCode instr x
4601 = getRegister x `thenNat` \ register ->
4602 getNewRegNat IntRep `thenNat` \ tmp ->
4604 code = registerCode register tmp
4605 src = registerName register tmp
4606 code__2 dst = code . mkSeqInstr (instr (RIReg src) dst)
4608 return (Any IntRep code__2)
4611 trivialFCode _ instr x y
4612 = getRegister x `thenNat` \ register1 ->
4613 getRegister y `thenNat` \ register2 ->
4614 getNewRegNat FF64 `thenNat` \ tmp1 ->
4615 getNewRegNat FF64 `thenNat` \ tmp2 ->
4617 code1 = registerCode register1 tmp1
4618 src1 = registerName register1 tmp1
4620 code2 = registerCode register2 tmp2
4621 src2 = registerName register2 tmp2
4623 code__2 dst = asmSeqThen [code1 [], code2 []] .
4624 mkSeqInstr (instr src1 src2 dst)
4626 return (Any FF64 code__2)
4628 trivialUFCode _ instr x
4629 = getRegister x `thenNat` \ register ->
4630 getNewRegNat FF64 `thenNat` \ tmp ->
4632 code = registerCode register tmp
4633 src = registerName register tmp
4634 code__2 dst = code . mkSeqInstr (instr src dst)
4636 return (Any FF64 code__2)
4638 #endif /* alpha_TARGET_ARCH */
4640 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4642 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4645 The Rules of the Game are:
4647 * You cannot assume anything about the destination register dst;
4648 it may be anything, including a fixed reg.
4650 * You may compute an operand into a fixed reg, but you may not
4651 subsequently change the contents of that fixed reg. If you
4652 want to do so, first copy the value either to a temporary
4653 or into dst. You are free to modify dst even if it happens
4654 to be a fixed reg -- that's not your problem.
4656 * You cannot assume that a fixed reg will stay live over an
4657 arbitrary computation. The same applies to the dst reg.
4659 * Temporary regs obtained from getNewRegNat are distinct from
4660 each other and from all other regs, and stay live over
4661 arbitrary computations.
4663 --------------------
4665 SDM's version of The Rules:
4667 * If getRegister returns Any, that means it can generate correct
4668 code which places the result in any register, period. Even if that
4669 register happens to be read during the computation.
4671 Corollary #1: this means that if you are generating code for an
4672 operation with two arbitrary operands, you cannot assign the result
4673 of the first operand into the destination register before computing
4674 the second operand. The second operand might require the old value
4675 of the destination register.
4677 Corollary #2: A function might be able to generate more efficient
4678 code if it knows the destination register is a new temporary (and
4679 therefore not read by any of the sub-computations).
4681 * If getRegister returns Any, then the code it generates may modify only:
4682 (a) fresh temporaries
4683 (b) the destination register
4684 (c) known registers (eg. %ecx is used by shifts)
4685 In particular, it may *not* modify global registers, unless the global
4686 register happens to be the destination register.
4689 trivialCode width instr (Just revinstr) (CmmLit lit_a) b
4690 | is32BitLit lit_a = do
4691 b_code <- getAnyReg b
4694 = b_code dst `snocOL`
4695 revinstr (OpImm (litToImm lit_a)) (OpReg dst)
4697 return (Any (intSize width) code)
4699 trivialCode width instr maybe_revinstr a b
4700 = genTrivialCode (intSize width) instr a b
4702 -- This is re-used for floating pt instructions too.
4703 genTrivialCode rep instr a b = do
4704 (b_op, b_code) <- getNonClobberedOperand b
4705 a_code <- getAnyReg a
4706 tmp <- getNewRegNat rep
4708 -- We want the value of b to stay alive across the computation of a.
4709 -- But, we want to calculate a straight into the destination register,
4710 -- because the instruction only has two operands (dst := dst `op` src).
4711 -- The troublesome case is when the result of b is in the same register
4712 -- as the destination reg. In this case, we have to save b in a
4713 -- new temporary across the computation of a.
4715 | dst `regClashesWithOp` b_op =
4717 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
4719 instr (OpReg tmp) (OpReg dst)
4723 instr b_op (OpReg dst)
4725 return (Any rep code)
4727 reg `regClashesWithOp` OpReg reg2 = reg == reg2
4728 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
4729 reg `regClashesWithOp` _ = False
4733 trivialUCode rep instr x = do
4734 x_code <- getAnyReg x
4739 return (Any rep code)
4743 #if i386_TARGET_ARCH
4745 trivialFCode width instr x y = do
4746 (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
4747 (y_reg, y_code) <- getSomeReg y
4749 size = floatSize width
4753 instr size x_reg y_reg dst
4754 return (Any size code)
4758 #if x86_64_TARGET_ARCH
4759 trivialFCode pk instr x y
4760 = genTrivialCode size (instr size) x y
4761 where size = floatSize pk
4766 trivialUFCode size instr x = do
4767 (x_reg, x_code) <- getSomeReg x
4773 return (Any size code)
4775 #endif /* i386_TARGET_ARCH */
4777 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4779 #if sparc_TARGET_ARCH
4781 trivialCode pk instr x (CmmLit (CmmInt y d))
4784 (src1, code) <- getSomeReg x
4785 tmp <- getNewRegNat II32
4787 src2 = ImmInt (fromInteger y)
4788 code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
4789 return (Any II32 code__2)
4791 trivialCode pk instr x y = do
4792 (src1, code1) <- getSomeReg x
4793 (src2, code2) <- getSomeReg y
4794 tmp1 <- getNewRegNat II32
4795 tmp2 <- getNewRegNat II32
4797 code__2 dst = code1 `appOL` code2 `snocOL`
4798 instr src1 (RIReg src2) dst
4799 return (Any II32 code__2)
4802 trivialFCode pk instr x y = do
4803 (src1, code1) <- getSomeReg x
4804 (src2, code2) <- getSomeReg y
4805 tmp1 <- getNewRegNat (cmmTypeSize $ cmmExprType x)
4806 tmp2 <- getNewRegNat (cmmTypeSize $ cmmExprType y)
4807 tmp <- getNewRegNat FF64
4809 promote x = FxTOy FF32 FF64 x tmp
4815 if pk1 `cmmEqType` pk2 then
4816 code1 `appOL` code2 `snocOL`
4817 instr (floatSize pk) src1 src2 dst
4818 else if typeWidth pk1 == W32 then
4819 code1 `snocOL` promote src1 `appOL` code2 `snocOL`
4820 instr FF64 tmp src2 dst
4822 code1 `appOL` code2 `snocOL` promote src2 `snocOL`
4823 instr FF64 src1 tmp dst
4824 return (Any (cmmTypeSize $ if pk1 `cmmEqType` pk2 then pk1 else cmmFloat W64)
4828 trivialUCode size instr x = do
4829 (src, code) <- getSomeReg x
4830 tmp <- getNewRegNat size
4832 code__2 dst = code `snocOL` instr (RIReg src) dst
4833 return (Any size code__2)
4836 trivialUFCode pk instr x = do
4837 (src, code) <- getSomeReg x
4838 tmp <- getNewRegNat pk
4840 code__2 dst = code `snocOL` instr src dst
4841 return (Any pk code__2)
4843 #endif /* sparc_TARGET_ARCH */
4845 #if powerpc_TARGET_ARCH
4848 Wolfgang's PowerPC version of The Rules:
4850 A slightly modified version of The Rules to take advantage of the fact
4851 that PowerPC instructions work on all registers and don't implicitly
4852 clobber any fixed registers.
4854 * The only expression for which getRegister returns Fixed is (CmmReg reg).
4856 * If getRegister returns Any, then the code it generates may modify only:
4857 (a) fresh temporaries
4858 (b) the destination register
4859 It may *not* modify global registers, unless the global
4860 register happens to be the destination register.
4861 It may not clobber any other registers. In fact, only ccalls clobber any
4863 Also, it may not modify the counter register (used by genCCall).
4865 Corollary: If a getRegister for a subexpression returns Fixed, you need
4866 not move it to a fresh temporary before evaluating the next subexpression.
4867 The Fixed register won't be modified.
4868 Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
4870 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
4871 the value of the destination register.
4874 trivialCode rep signed instr x (CmmLit (CmmInt y _))
4875 | Just imm <- makeImmediate rep signed y
4877 (src1, code1) <- getSomeReg x
4878 let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
4879 return (Any (intSize rep) code)
4881 trivialCode rep signed instr x y = do
4882 (src1, code1) <- getSomeReg x
4883 (src2, code2) <- getSomeReg y
4884 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
4885 return (Any (intSize rep) code)
4887 trivialCodeNoImm' :: Size -> (Reg -> Reg -> Reg -> Instr)
4888 -> CmmExpr -> CmmExpr -> NatM Register
4889 trivialCodeNoImm' size instr x y = do
4890 (src1, code1) <- getSomeReg x
4891 (src2, code2) <- getSomeReg y
4892 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
4893 return (Any size code)
4895 trivialCodeNoImm :: Size -> (Size -> Reg -> Reg -> Reg -> Instr)
4896 -> CmmExpr -> CmmExpr -> NatM Register
4897 trivialCodeNoImm size instr x y = trivialCodeNoImm' size (instr size) x y
4899 trivialUCode rep instr x = do
4900 (src, code) <- getSomeReg x
4901 let code' dst = code `snocOL` instr dst src
4902 return (Any rep code')
4904 -- There is no "remainder" instruction on the PPC, so we have to do
4906 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
4908 remainderCode :: Width -> (Reg -> Reg -> Reg -> Instr)
4909 -> CmmExpr -> CmmExpr -> NatM Register
4910 remainderCode rep div x y = do
4911 (src1, code1) <- getSomeReg x
4912 (src2, code2) <- getSomeReg y
4913 let code dst = code1 `appOL` code2 `appOL` toOL [
4915 MULLW dst dst (RIReg src2),
4918 return (Any (intSize rep) code)
4920 #endif /* powerpc_TARGET_ARCH */
4923 -- -----------------------------------------------------------------------------
4924 -- Coercing to/from integer/floating-point...
4926 -- When going to integer, we truncate (round towards 0).
4928 -- @coerce(Int2FP|FP2Int)@ are more complicated integer/float
4929 -- conversions. We have to store temporaries in memory to move
4930 -- between the integer and the floating point register sets.
4932 -- @coerceDbl2Flt@ and @coerceFlt2Dbl@ are done this way because we
4933 -- pretend, on sparc at least, that double and float regs are seperate
4934 -- kinds, so the value has to be computed into one kind before being
4935 -- explicitly "converted" to live in the other kind.
4937 coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
4938 coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
4940 #if sparc_TARGET_ARCH
4941 coerceDbl2Flt :: CmmExpr -> NatM Register
4942 coerceFlt2Dbl :: CmmExpr -> NatM Register
4945 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4947 #if alpha_TARGET_ARCH
4950 = getRegister x `thenNat` \ register ->
4951 getNewRegNat IntRep `thenNat` \ reg ->
4953 code = registerCode register reg
4954 src = registerName register reg
4956 code__2 dst = code . mkSeqInstrs [
4958 LD TF dst (spRel 0),
4961 return (Any FF64 code__2)
4965 = getRegister x `thenNat` \ register ->
4966 getNewRegNat FF64 `thenNat` \ tmp ->
4968 code = registerCode register tmp
4969 src = registerName register tmp
4971 code__2 dst = code . mkSeqInstrs [
4973 ST TF tmp (spRel 0),
4976 return (Any IntRep code__2)
4978 #endif /* alpha_TARGET_ARCH */
4980 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4982 #if i386_TARGET_ARCH
4984 coerceInt2FP from to x = do
4985 (x_reg, x_code) <- getSomeReg x
4987 opc = case to of W32 -> GITOF; W64 -> GITOD
4988 code dst = x_code `snocOL` opc x_reg dst
4989 -- ToDo: works for non-II32 reps?
4990 return (Any (floatSize to) code)
4994 coerceFP2Int from to x = do
4995 (x_reg, x_code) <- getSomeReg x
4997 opc = case from of W32 -> GFTOI; W64 -> GDTOI
4998 code dst = x_code `snocOL` opc x_reg dst
4999 -- ToDo: works for non-II32 reps?
5001 return (Any (intSize to) code)
5003 #endif /* i386_TARGET_ARCH */
5005 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
5007 #if x86_64_TARGET_ARCH
5009 coerceFP2Int from to x = do
5010 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
5012 opc = case from of W32 -> CVTTSS2SIQ; W64 -> CVTTSD2SIQ
5013 code dst = x_code `snocOL` opc x_op dst
5015 return (Any (intSize to) code) -- works even if the destination rep is <II32
5017 coerceInt2FP from to x = do
5018 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
5020 opc = case to of W32 -> CVTSI2SS; W64 -> CVTSI2SD
5021 code dst = x_code `snocOL` opc x_op dst
5023 return (Any (floatSize to) code) -- works even if the destination rep is <II32
5025 coerceFP2FP :: Width -> CmmExpr -> NatM Register
5026 coerceFP2FP to x = do
5027 (x_reg, x_code) <- getSomeReg x
5029 opc = case to of W32 -> CVTSD2SS; W64 -> CVTSS2SD
5030 code dst = x_code `snocOL` opc x_reg dst
5032 return (Any (floatSize to) code)
5035 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
5037 #if sparc_TARGET_ARCH
5039 coerceInt2FP width1 width2 x = do
5040 (src, code) <- getSomeReg x
5042 code__2 dst = code `appOL` toOL [
5043 ST (intSize width1) src (spRel (-2)),
5044 LD (intSize width1) (spRel (-2)) dst,
5045 FxTOy (intSize width1) (floatSize width2) dst dst]
5046 return (Any (floatSize $ width2) code__2)
5049 -- | Coerce a floating point value to integer
5051 -- NOTE: On sparc v9 there are no instructions to move a value from an
5052 -- FP register directly to an int register, so we have to use a load/store.
5054 coerceFP2Int width1 width2 x
5055 = do let fsize1 = floatSize width1
5056 fsize2 = floatSize width2
5058 isize2 = intSize width2
5060 (fsrc, code) <- getSomeReg x
5061 fdst <- getNewRegNat fsize2
5066 -- convert float to int format, leaving it in a float reg.
5067 [ FxTOy fsize1 isize2 fsrc fdst
5069 -- store the int into mem, then load it back to move
5070 -- it into an actual int reg.
5071 , ST fsize2 fdst (spRel (-2))
5072 , LD isize2 (spRel (-2)) dst]
5074 return (Any isize2 code2)
5077 coerceDbl2Flt x = do
5078 (src, code) <- getSomeReg x
5079 return (Any FF32 (\dst -> code `snocOL` FxTOy FF64 FF32 src dst))
5082 coerceFlt2Dbl x = do
5083 (src, code) <- getSomeReg x
5084 return (Any FF64 (\dst -> code `snocOL` FxTOy FF32 FF64 src dst))
5086 #endif /* sparc_TARGET_ARCH */
5088 #if powerpc_TARGET_ARCH
5089 coerceInt2FP fromRep toRep x = do
5090 (src, code) <- getSomeReg x
5091 lbl <- getNewLabelNat
5092 itmp <- getNewRegNat II32
5093 ftmp <- getNewRegNat FF64
5094 dflags <- getDynFlagsNat
5095 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
5096 Amode addr addr_code <- getAmode dynRef
5098 code' dst = code `appOL` maybe_exts `appOL` toOL [
5101 CmmStaticLit (CmmInt 0x43300000 W32),
5102 CmmStaticLit (CmmInt 0x80000000 W32)],
5103 XORIS itmp src (ImmInt 0x8000),
5104 ST II32 itmp (spRel 3),
5105 LIS itmp (ImmInt 0x4330),
5106 ST II32 itmp (spRel 2),
5107 LD FF64 ftmp (spRel 2)
5108 ] `appOL` addr_code `appOL` toOL [
5110 FSUB FF64 dst ftmp dst
5111 ] `appOL` maybe_frsp dst
5113 maybe_exts = case fromRep of
5114 W8 -> unitOL $ EXTS II8 src src
5115 W16 -> unitOL $ EXTS II16 src src
5117 maybe_frsp dst = case toRep of
5118 W32 -> unitOL $ FRSP dst dst
5120 return (Any (floatSize toRep) code')
5122 coerceFP2Int fromRep toRep x = do
5123 -- the reps don't really matter: F*->FF64 and II32->I* are no-ops
5124 (src, code) <- getSomeReg x
5125 tmp <- getNewRegNat FF64
5127 code' dst = code `appOL` toOL [
5128 -- convert to int in FP reg
5130 -- store value (64bit) from FP to stack
5131 ST FF64 tmp (spRel 2),
5132 -- read low word of value (high word is undefined)
5133 LD II32 dst (spRel 3)]
5134 return (Any (intSize toRep) code')
5135 #endif /* powerpc_TARGET_ARCH */
5138 -- -----------------------------------------------------------------------------
5139 -- eXTRA_STK_ARGS_HERE
5141 -- We (allegedly) put the first six C-call arguments in registers;
5142 -- where do we start putting the rest of them?
5144 -- Moved from MachInstrs (SDM):
5146 #if alpha_TARGET_ARCH || sparc_TARGET_ARCH
5147 eXTRA_STK_ARGS_HERE :: Int
5149 = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???))