2 -- The above warning supression flag is a temporary kludge.
3 -- While working on this module you are encouraged to remove it and fix
4 -- any warnings in the module. See
5 -- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
8 -----------------------------------------------------------------------------
10 -- Generating machine code (instruction selection)
12 -- (c) The University of Glasgow 1996-2004
14 -----------------------------------------------------------------------------
16 -- This is a big module, but, if you pay attention to
17 -- (a) the sectioning, (b) the type signatures, and
18 -- (c) the #if blah_TARGET_ARCH} things, the
19 -- structure should not be too overwhelming.
21 module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where
23 #include "HsVersions.h"
24 #include "nativeGen/NCG.h"
31 import PositionIndependentCode
32 import RegAllocInfo ( mkBranchInstr )
34 -- Our intermediate code:
35 import PprCmm ( pprExpr )
39 import ClosureInfo ( C_SRT(..) )
42 import StaticFlags ( opt_PIC )
43 import ForeignCall ( CCallConv(..) )
48 import FastBool ( isFastTrue )
49 import Constants ( wORD_SIZE )
52 import Outputable ( assertPanic )
53 import Debug.Trace ( trace )
55 import Debug.Trace ( trace )
57 import Control.Monad ( mapAndUnzipM )
58 import Data.Maybe ( fromJust )
63 -- -----------------------------------------------------------------------------
64 -- Top-level of the instruction selector
66 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
67 -- They are really trees of insns to facilitate fast appending, where a
68 -- left-to-right traversal (pre-order?) yields the insns in the correct
71 type InstrBlock = OrdList Instr
73 cmmTopCodeGen :: RawCmmTop -> NatM [NatCmmTop]
74 cmmTopCodeGen (CmmProc info lab params (ListGraph blocks)) = do
75 (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
76 picBaseMb <- getPicBaseMaybeNat
77 let proc = CmmProc info lab params (ListGraph $ concat nat_blocks)
78 tops = proc : concat statics
80 Just picBase -> initializePicBase picBase tops
81 Nothing -> return tops
83 cmmTopCodeGen (CmmData sec dat) = do
84 return [CmmData sec dat] -- no translation, we just use CmmStatic
86 basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop])
87 basicBlockCodeGen (BasicBlock id stmts) = do
88 instrs <- stmtsToInstrs stmts
89 -- code generation may introduce new basic block boundaries, which
90 -- are indicated by the NEWBLOCK instruction. We must split up the
91 -- instruction stream into basic blocks again. Also, we extract
94 (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
96 mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
97 = ([], BasicBlock id instrs : blocks, statics)
98 mkBlocks (LDATA sec dat) (instrs,blocks,statics)
99 = (instrs, blocks, CmmData sec dat:statics)
100 mkBlocks instr (instrs,blocks,statics)
101 = (instr:instrs, blocks, statics)
103 return (BasicBlock id top : other_blocks, statics)
105 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
107 = do instrss <- mapM stmtToInstrs stmts
108 return (concatOL instrss)
110 stmtToInstrs :: CmmStmt -> NatM InstrBlock
111 stmtToInstrs stmt = case stmt of
112 CmmNop -> return nilOL
113 CmmComment s -> return (unitOL (COMMENT s))
116 | isFloatingRep kind -> assignReg_FltCode kind reg src
117 #if WORD_SIZE_IN_BITS==32
118 | kind == I64 -> assignReg_I64Code reg src
120 | otherwise -> assignReg_IntCode kind reg src
121 where kind = cmmRegRep reg
124 | isFloatingRep kind -> assignMem_FltCode kind addr src
125 #if WORD_SIZE_IN_BITS==32
126 | kind == I64 -> assignMem_I64Code addr src
128 | otherwise -> assignMem_IntCode kind addr src
129 where kind = cmmExprRep src
131 CmmCall target result_regs args _ _
132 -> genCCall target result_regs args
134 CmmBranch id -> genBranch id
135 CmmCondBranch arg id -> genCondJump id arg
136 CmmSwitch arg ids -> genSwitch arg ids
137 CmmJump arg params -> genJump arg
139 -- -----------------------------------------------------------------------------
140 -- General things for putting together code sequences
142 -- Expand CmmRegOff. ToDo: should we do it this way around, or convert
143 -- CmmExprs into CmmRegOff?
144 mangleIndexTree :: CmmExpr -> CmmExpr
145 mangleIndexTree (CmmRegOff reg off)
146 = CmmMachOp (MO_Add rep) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) rep)]
147 where rep = cmmRegRep reg
149 -- -----------------------------------------------------------------------------
150 -- Code gen for 64-bit arithmetic on 32-bit platforms
153 Simple support for generating 64-bit code (ie, 64 bit values and 64
154 bit assignments) on 32-bit platforms. Unlike the main code generator
155 we merely shoot for generating working code as simply as possible, and
156 pay little attention to code quality. Specifically, there is no
157 attempt to deal cleverly with the fixed-vs-floating register
158 distinction; all values are generated into (pairs of) floating
159 registers, even if this would mean some redundant reg-reg moves as a
160 result. Only one of the VRegUniques is returned, since it will be
161 of the VRegUniqueLo form, and the upper-half VReg can be determined
162 by applying getHiVRegFromLo to it.
165 data ChildCode64 -- a.k.a "Register64"
168 Reg -- the lower 32-bit temporary which contains the
169 -- result; use getHiVRegFromLo to find the other
170 -- VRegUnique. Rules of this simplified insn
171 -- selection game are therefore that the returned
172 -- Reg may be modified
174 #if WORD_SIZE_IN_BITS==32
175 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
176 assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
179 #ifndef x86_64_TARGET_ARCH
180 iselExpr64 :: CmmExpr -> NatM ChildCode64
183 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
187 assignMem_I64Code addrTree valueTree = do
188 Amode addr addr_code <- getAmode addrTree
189 ChildCode64 vcode rlo <- iselExpr64 valueTree
191 rhi = getHiVRegFromLo rlo
193 -- Little-endian store
194 mov_lo = MOV I32 (OpReg rlo) (OpAddr addr)
195 mov_hi = MOV I32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
197 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
200 assignReg_I64Code (CmmLocal (LocalReg u_dst pk _)) valueTree = do
201 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
203 r_dst_lo = mkVReg u_dst I32
204 r_dst_hi = getHiVRegFromLo r_dst_lo
205 r_src_hi = getHiVRegFromLo r_src_lo
206 mov_lo = MOV I32 (OpReg r_src_lo) (OpReg r_dst_lo)
207 mov_hi = MOV I32 (OpReg r_src_hi) (OpReg r_dst_hi)
210 vcode `snocOL` mov_lo `snocOL` mov_hi
213 assignReg_I64Code lvalue valueTree
214 = panic "assignReg_I64Code(i386): invalid lvalue"
218 iselExpr64 (CmmLit (CmmInt i _)) = do
219 (rlo,rhi) <- getNewRegPairNat I32
221 r = fromIntegral (fromIntegral i :: Word32)
222 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
224 MOV I32 (OpImm (ImmInteger r)) (OpReg rlo),
225 MOV I32 (OpImm (ImmInteger q)) (OpReg rhi)
228 return (ChildCode64 code rlo)
230 iselExpr64 (CmmLoad addrTree I64) = do
231 Amode addr addr_code <- getAmode addrTree
232 (rlo,rhi) <- getNewRegPairNat I32
234 mov_lo = MOV I32 (OpAddr addr) (OpReg rlo)
235 mov_hi = MOV I32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
238 ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
242 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64 _)))
243 = return (ChildCode64 nilOL (mkVReg vu I32))
245 -- we handle addition, but rather badly
246 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
247 ChildCode64 code1 r1lo <- iselExpr64 e1
248 (rlo,rhi) <- getNewRegPairNat I32
250 r = fromIntegral (fromIntegral i :: Word32)
251 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
252 r1hi = getHiVRegFromLo r1lo
254 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
255 ADD I32 (OpImm (ImmInteger r)) (OpReg rlo),
256 MOV I32 (OpReg r1hi) (OpReg rhi),
257 ADC I32 (OpImm (ImmInteger q)) (OpReg rhi) ]
259 return (ChildCode64 code rlo)
261 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
262 ChildCode64 code1 r1lo <- iselExpr64 e1
263 ChildCode64 code2 r2lo <- iselExpr64 e2
264 (rlo,rhi) <- getNewRegPairNat I32
266 r1hi = getHiVRegFromLo r1lo
267 r2hi = getHiVRegFromLo r2lo
270 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
271 ADD I32 (OpReg r2lo) (OpReg rlo),
272 MOV I32 (OpReg r1hi) (OpReg rhi),
273 ADC I32 (OpReg r2hi) (OpReg rhi) ]
275 return (ChildCode64 code rlo)
277 iselExpr64 (CmmMachOp (MO_U_Conv _ I64) [expr]) = do
279 r_dst_lo <- getNewRegNat I32
280 let r_dst_hi = getHiVRegFromLo r_dst_lo
283 ChildCode64 (code `snocOL`
284 MOV I32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
289 = pprPanic "iselExpr64(i386)" (ppr expr)
291 #endif /* i386_TARGET_ARCH */
293 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
295 #if sparc_TARGET_ARCH
297 assignMem_I64Code addrTree valueTree = do
298 Amode addr addr_code <- getAmode addrTree
299 ChildCode64 vcode rlo <- iselExpr64 valueTree
300 (src, code) <- getSomeReg addrTree
302 rhi = getHiVRegFromLo rlo
304 mov_hi = ST I32 rhi (AddrRegImm src (ImmInt 0))
305 mov_lo = ST I32 rlo (AddrRegImm src (ImmInt 4))
306 return (vcode `appOL` code `snocOL` mov_hi `snocOL` mov_lo)
308 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
309 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
311 r_dst_lo = mkVReg u_dst pk
312 r_dst_hi = getHiVRegFromLo r_dst_lo
313 r_src_hi = getHiVRegFromLo r_src_lo
314 mov_lo = mkMOV r_src_lo r_dst_lo
315 mov_hi = mkMOV r_src_hi r_dst_hi
316 mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
317 return (vcode `snocOL` mov_hi `snocOL` mov_lo)
318 assignReg_I64Code lvalue valueTree
319 = panic "assignReg_I64Code(sparc): invalid lvalue"
322 -- Don't delete this -- it's very handy for debugging.
324 -- | trace ("iselExpr64: " ++ showSDoc (ppr expr)) False
325 -- = panic "iselExpr64(???)"
327 iselExpr64 (CmmLoad addrTree I64) = do
328 Amode (AddrRegReg r1 r2) addr_code <- getAmode addrTree
329 rlo <- getNewRegNat I32
330 let rhi = getHiVRegFromLo rlo
331 mov_hi = LD I32 (AddrRegImm r1 (ImmInt 0)) rhi
332 mov_lo = LD I32 (AddrRegImm r1 (ImmInt 4)) rlo
334 ChildCode64 (addr_code `snocOL` mov_hi `snocOL` mov_lo)
338 iselExpr64 (CmmReg (CmmLocal (LocalReg uq I64))) = do
339 r_dst_lo <- getNewRegNat I32
340 let r_dst_hi = getHiVRegFromLo r_dst_lo
341 r_src_lo = mkVReg uq I32
342 r_src_hi = getHiVRegFromLo r_src_lo
343 mov_lo = mkMOV r_src_lo r_dst_lo
344 mov_hi = mkMOV r_src_hi r_dst_hi
345 mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
347 ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo
351 = pprPanic "iselExpr64(sparc)" (ppr expr)
353 #endif /* sparc_TARGET_ARCH */
355 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
357 #if powerpc_TARGET_ARCH
359 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
360 getI64Amodes addrTree = do
361 Amode hi_addr addr_code <- getAmode addrTree
362 case addrOffset hi_addr 4 of
363 Just lo_addr -> return (hi_addr, lo_addr, addr_code)
364 Nothing -> do (hi_ptr, code) <- getSomeReg addrTree
365 return (AddrRegImm hi_ptr (ImmInt 0),
366 AddrRegImm hi_ptr (ImmInt 4),
369 assignMem_I64Code addrTree valueTree = do
370 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
371 ChildCode64 vcode rlo <- iselExpr64 valueTree
373 rhi = getHiVRegFromLo rlo
376 mov_hi = ST I32 rhi hi_addr
377 mov_lo = ST I32 rlo lo_addr
379 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
381 assignReg_I64Code (CmmLocal (LocalReg u_dst pk _)) valueTree = do
382 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
384 r_dst_lo = mkVReg u_dst I32
385 r_dst_hi = getHiVRegFromLo r_dst_lo
386 r_src_hi = getHiVRegFromLo r_src_lo
387 mov_lo = MR r_dst_lo r_src_lo
388 mov_hi = MR r_dst_hi r_src_hi
391 vcode `snocOL` mov_lo `snocOL` mov_hi
394 assignReg_I64Code lvalue valueTree
395 = panic "assignReg_I64Code(powerpc): invalid lvalue"
398 -- Don't delete this -- it's very handy for debugging.
400 -- | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
401 -- = panic "iselExpr64(???)"
403 iselExpr64 (CmmLoad addrTree I64) = do
404 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
405 (rlo, rhi) <- getNewRegPairNat I32
406 let mov_hi = LD I32 rhi hi_addr
407 mov_lo = LD I32 rlo lo_addr
408 return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
411 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64 _)))
412 = return (ChildCode64 nilOL (mkVReg vu I32))
414 iselExpr64 (CmmLit (CmmInt i _)) = do
415 (rlo,rhi) <- getNewRegPairNat I32
417 half0 = fromIntegral (fromIntegral i :: Word16)
418 half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
419 half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
420 half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
423 LIS rlo (ImmInt half1),
424 OR rlo rlo (RIImm $ ImmInt half0),
425 LIS rhi (ImmInt half3),
426 OR rlo rlo (RIImm $ ImmInt half2)
429 return (ChildCode64 code rlo)
431 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
432 ChildCode64 code1 r1lo <- iselExpr64 e1
433 ChildCode64 code2 r2lo <- iselExpr64 e2
434 (rlo,rhi) <- getNewRegPairNat I32
436 r1hi = getHiVRegFromLo r1lo
437 r2hi = getHiVRegFromLo r2lo
440 toOL [ ADDC rlo r1lo r2lo,
443 return (ChildCode64 code rlo)
445 iselExpr64 (CmmMachOp (MO_U_Conv I32 I64) [expr]) = do
446 (expr_reg,expr_code) <- getSomeReg expr
447 (rlo, rhi) <- getNewRegPairNat I32
448 let mov_hi = LI rhi (ImmInt 0)
449 mov_lo = MR rlo expr_reg
450 return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
453 = pprPanic "iselExpr64(powerpc)" (ppr expr)
455 #endif /* powerpc_TARGET_ARCH */
458 -- -----------------------------------------------------------------------------
459 -- The 'Register' type
461 -- 'Register's passed up the tree. If the stix code forces the register
462 -- to live in a pre-decided machine register, it comes out as @Fixed@;
463 -- otherwise, it comes out as @Any@, and the parent can decide which
464 -- register to put it in.
467 = Fixed MachRep Reg InstrBlock
468 | Any MachRep (Reg -> InstrBlock)
470 swizzleRegisterRep :: Register -> MachRep -> Register
471 swizzleRegisterRep (Fixed _ reg code) rep = Fixed rep reg code
472 swizzleRegisterRep (Any _ codefn) rep = Any rep codefn
475 -- -----------------------------------------------------------------------------
476 -- Utils based on getRegister, below
478 -- The dual to getAnyReg: compute an expression into a register, but
479 -- we don't mind which one it is.
480 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
482 r <- getRegister expr
485 tmp <- getNewRegNat rep
486 return (tmp, code tmp)
490 -- -----------------------------------------------------------------------------
491 -- Grab the Reg for a CmmReg
493 getRegisterReg :: CmmReg -> Reg
495 getRegisterReg (CmmLocal (LocalReg u pk _))
498 getRegisterReg (CmmGlobal mid)
499 = case get_GlobalReg_reg_or_addr mid of
500 Left (RealReg rrno) -> RealReg rrno
501 _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
502 -- By this stage, the only MagicIds remaining should be the
503 -- ones which map to a real machine register on this
504 -- platform. Hence ...
507 -- -----------------------------------------------------------------------------
508 -- Generate code to get a subtree into a Register
510 -- Don't delete this -- it's very handy for debugging.
512 -- | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False
513 -- = panic "getRegister(???)"
515 getRegister :: CmmExpr -> NatM Register
517 #if !x86_64_TARGET_ARCH
518 -- on x86_64, we have %rip for PicBaseReg, but it's not a full-featured
519 -- register, it can only be used for rip-relative addressing.
520 getRegister (CmmReg (CmmGlobal PicBaseReg))
522 reg <- getPicBaseNat wordRep
523 return (Fixed wordRep reg nilOL)
526 getRegister (CmmReg reg)
527 = return (Fixed (cmmRegRep reg) (getRegisterReg reg) nilOL)
529 getRegister tree@(CmmRegOff _ _)
530 = getRegister (mangleIndexTree tree)
533 #if WORD_SIZE_IN_BITS==32
534 -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
535 -- TO_W_(x), TO_W_(x >> 32)
537 getRegister (CmmMachOp (MO_U_Conv I64 I32)
538 [CmmMachOp (MO_U_Shr I64) [x,CmmLit (CmmInt 32 _)]]) = do
539 ChildCode64 code rlo <- iselExpr64 x
540 return $ Fixed I32 (getHiVRegFromLo rlo) code
542 getRegister (CmmMachOp (MO_S_Conv I64 I32)
543 [CmmMachOp (MO_U_Shr I64) [x,CmmLit (CmmInt 32 _)]]) = do
544 ChildCode64 code rlo <- iselExpr64 x
545 return $ Fixed I32 (getHiVRegFromLo rlo) code
547 getRegister (CmmMachOp (MO_U_Conv I64 I32) [x]) = do
548 ChildCode64 code rlo <- iselExpr64 x
549 return $ Fixed I32 rlo code
551 getRegister (CmmMachOp (MO_S_Conv I64 I32) [x]) = do
552 ChildCode64 code rlo <- iselExpr64 x
553 return $ Fixed I32 rlo code
557 -- end of machine-"independent" bit; here we go on the rest...
559 #if alpha_TARGET_ARCH
561 getRegister (StDouble d)
562 = getBlockIdNat `thenNat` \ lbl ->
563 getNewRegNat PtrRep `thenNat` \ tmp ->
564 let code dst = mkSeqInstrs [
565 LDATA RoDataSegment lbl [
566 DATA TF [ImmLab (rational d)]
568 LDA tmp (AddrImm (ImmCLbl lbl)),
569 LD TF dst (AddrReg tmp)]
571 return (Any F64 code)
573 getRegister (StPrim primop [x]) -- unary PrimOps
575 IntNegOp -> trivialUCode (NEG Q False) x
577 NotOp -> trivialUCode NOT x
579 FloatNegOp -> trivialUFCode FloatRep (FNEG TF) x
580 DoubleNegOp -> trivialUFCode F64 (FNEG TF) x
582 OrdOp -> coerceIntCode IntRep x
585 Float2IntOp -> coerceFP2Int x
586 Int2FloatOp -> coerceInt2FP pr x
587 Double2IntOp -> coerceFP2Int x
588 Int2DoubleOp -> coerceInt2FP pr x
590 Double2FloatOp -> coerceFltCode x
591 Float2DoubleOp -> coerceFltCode x
593 other_op -> getRegister (StCall fn CCallConv F64 [x])
595 fn = case other_op of
596 FloatExpOp -> FSLIT("exp")
597 FloatLogOp -> FSLIT("log")
598 FloatSqrtOp -> FSLIT("sqrt")
599 FloatSinOp -> FSLIT("sin")
600 FloatCosOp -> FSLIT("cos")
601 FloatTanOp -> FSLIT("tan")
602 FloatAsinOp -> FSLIT("asin")
603 FloatAcosOp -> FSLIT("acos")
604 FloatAtanOp -> FSLIT("atan")
605 FloatSinhOp -> FSLIT("sinh")
606 FloatCoshOp -> FSLIT("cosh")
607 FloatTanhOp -> FSLIT("tanh")
608 DoubleExpOp -> FSLIT("exp")
609 DoubleLogOp -> FSLIT("log")
610 DoubleSqrtOp -> FSLIT("sqrt")
611 DoubleSinOp -> FSLIT("sin")
612 DoubleCosOp -> FSLIT("cos")
613 DoubleTanOp -> FSLIT("tan")
614 DoubleAsinOp -> FSLIT("asin")
615 DoubleAcosOp -> FSLIT("acos")
616 DoubleAtanOp -> FSLIT("atan")
617 DoubleSinhOp -> FSLIT("sinh")
618 DoubleCoshOp -> FSLIT("cosh")
619 DoubleTanhOp -> FSLIT("tanh")
621 pr = panic "MachCode.getRegister: no primrep needed for Alpha"
623 getRegister (StPrim primop [x, y]) -- dyadic PrimOps
625 CharGtOp -> trivialCode (CMP LTT) y x
626 CharGeOp -> trivialCode (CMP LE) y x
627 CharEqOp -> trivialCode (CMP EQQ) x y
628 CharNeOp -> int_NE_code x y
629 CharLtOp -> trivialCode (CMP LTT) x y
630 CharLeOp -> trivialCode (CMP LE) x y
632 IntGtOp -> trivialCode (CMP LTT) y x
633 IntGeOp -> trivialCode (CMP LE) y x
634 IntEqOp -> trivialCode (CMP EQQ) x y
635 IntNeOp -> int_NE_code x y
636 IntLtOp -> trivialCode (CMP LTT) x y
637 IntLeOp -> trivialCode (CMP LE) x y
639 WordGtOp -> trivialCode (CMP ULT) y x
640 WordGeOp -> trivialCode (CMP ULE) x y
641 WordEqOp -> trivialCode (CMP EQQ) x y
642 WordNeOp -> int_NE_code x y
643 WordLtOp -> trivialCode (CMP ULT) x y
644 WordLeOp -> trivialCode (CMP ULE) x y
646 AddrGtOp -> trivialCode (CMP ULT) y x
647 AddrGeOp -> trivialCode (CMP ULE) y x
648 AddrEqOp -> trivialCode (CMP EQQ) x y
649 AddrNeOp -> int_NE_code x y
650 AddrLtOp -> trivialCode (CMP ULT) x y
651 AddrLeOp -> trivialCode (CMP ULE) x y
653 FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
654 FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
655 FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
656 FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
657 FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
658 FloatLeOp -> cmpF_code (FCMP TF LE) NE x y
660 DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
661 DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
662 DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
663 DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
664 DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
665 DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y
667 IntAddOp -> trivialCode (ADD Q False) x y
668 IntSubOp -> trivialCode (SUB Q False) x y
669 IntMulOp -> trivialCode (MUL Q False) x y
670 IntQuotOp -> trivialCode (DIV Q False) x y
671 IntRemOp -> trivialCode (REM Q False) x y
673 WordAddOp -> trivialCode (ADD Q False) x y
674 WordSubOp -> trivialCode (SUB Q False) x y
675 WordMulOp -> trivialCode (MUL Q False) x y
676 WordQuotOp -> trivialCode (DIV Q True) x y
677 WordRemOp -> trivialCode (REM Q True) x y
679 FloatAddOp -> trivialFCode FloatRep (FADD TF) x y
680 FloatSubOp -> trivialFCode FloatRep (FSUB TF) x y
681 FloatMulOp -> trivialFCode FloatRep (FMUL TF) x y
682 FloatDivOp -> trivialFCode FloatRep (FDIV TF) x y
684 DoubleAddOp -> trivialFCode F64 (FADD TF) x y
685 DoubleSubOp -> trivialFCode F64 (FSUB TF) x y
686 DoubleMulOp -> trivialFCode F64 (FMUL TF) x y
687 DoubleDivOp -> trivialFCode F64 (FDIV TF) x y
689 AddrAddOp -> trivialCode (ADD Q False) x y
690 AddrSubOp -> trivialCode (SUB Q False) x y
691 AddrRemOp -> trivialCode (REM Q True) x y
693 AndOp -> trivialCode AND x y
694 OrOp -> trivialCode OR x y
695 XorOp -> trivialCode XOR x y
696 SllOp -> trivialCode SLL x y
697 SrlOp -> trivialCode SRL x y
699 ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
700 ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
701 ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"
703 FloatPowerOp -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
704 DoublePowerOp -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
706 {- ------------------------------------------------------------
707 Some bizarre special code for getting condition codes into
708 registers. Integer non-equality is a test for equality
709 followed by an XOR with 1. (Integer comparisons always set
710 the result register to 0 or 1.) Floating point comparisons of
711 any kind leave the result in a floating point register, so we
712 need to wrangle an integer register out of things.
714 int_NE_code :: StixTree -> StixTree -> NatM Register
717 = trivialCode (CMP EQQ) x y `thenNat` \ register ->
718 getNewRegNat IntRep `thenNat` \ tmp ->
720 code = registerCode register tmp
721 src = registerName register tmp
722 code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
724 return (Any IntRep code__2)
726 {- ------------------------------------------------------------
727 Comments for int_NE_code also apply to cmpF_code
730 :: (Reg -> Reg -> Reg -> Instr)
732 -> StixTree -> StixTree
735 cmpF_code instr cond x y
736 = trivialFCode pr instr x y `thenNat` \ register ->
737 getNewRegNat F64 `thenNat` \ tmp ->
738 getBlockIdNat `thenNat` \ lbl ->
740 code = registerCode register tmp
741 result = registerName register tmp
743 code__2 dst = code . mkSeqInstrs [
744 OR zeroh (RIImm (ImmInt 1)) dst,
745 BF cond result (ImmCLbl lbl),
746 OR zeroh (RIReg zeroh) dst,
749 return (Any IntRep code__2)
751 pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
752 ------------------------------------------------------------
754 getRegister (CmmLoad pk mem)
755 = getAmode mem `thenNat` \ amode ->
757 code = amodeCode amode
758 src = amodeAddr amode
759 size = primRepToSize pk
760 code__2 dst = code . mkSeqInstr (LD size dst src)
762 return (Any pk code__2)
764 getRegister (StInt i)
767 code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
769 return (Any IntRep code)
772 code dst = mkSeqInstr (LDI Q dst src)
774 return (Any IntRep code)
776 src = ImmInt (fromInteger i)
781 code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
783 return (Any PtrRep code)
786 imm__2 = case imm of Just x -> x
788 #endif /* alpha_TARGET_ARCH */
790 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
794 getRegister (CmmLit (CmmFloat f F32)) = do
795 lbl <- getNewLabelNat
796 dflags <- getDynFlagsNat
797 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
798 Amode addr addr_code <- getAmode dynRef
802 CmmStaticLit (CmmFloat f F32)]
803 `consOL` (addr_code `snocOL`
806 return (Any F32 code)
809 getRegister (CmmLit (CmmFloat d F64))
811 = let code dst = unitOL (GLDZ dst)
812 in return (Any F64 code)
815 = let code dst = unitOL (GLD1 dst)
816 in return (Any F64 code)
819 lbl <- getNewLabelNat
820 dflags <- getDynFlagsNat
821 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
822 Amode addr addr_code <- getAmode dynRef
826 CmmStaticLit (CmmFloat d F64)]
827 `consOL` (addr_code `snocOL`
830 return (Any F64 code)
832 #endif /* i386_TARGET_ARCH */
834 #if x86_64_TARGET_ARCH
836 getRegister (CmmLit (CmmFloat 0.0 rep)) = do
837 let code dst = unitOL (XOR rep (OpReg dst) (OpReg dst))
838 -- I don't know why there are xorpd, xorps, and pxor instructions.
839 -- They all appear to do the same thing --SDM
840 return (Any rep code)
842 getRegister (CmmLit (CmmFloat f rep)) = do
843 lbl <- getNewLabelNat
844 let code dst = toOL [
847 CmmStaticLit (CmmFloat f rep)],
848 MOV rep (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
851 return (Any rep code)
853 #endif /* x86_64_TARGET_ARCH */
855 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
857 -- catch simple cases of zero- or sign-extended load
858 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad addr _]) = do
859 code <- intLoadCode (MOVZxL I8) addr
860 return (Any I32 code)
862 getRegister (CmmMachOp (MO_S_Conv I8 I32) [CmmLoad addr _]) = do
863 code <- intLoadCode (MOVSxL I8) addr
864 return (Any I32 code)
866 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad addr _]) = do
867 code <- intLoadCode (MOVZxL I16) addr
868 return (Any I32 code)
870 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad addr _]) = do
871 code <- intLoadCode (MOVSxL I16) addr
872 return (Any I32 code)
876 #if x86_64_TARGET_ARCH
878 -- catch simple cases of zero- or sign-extended load
879 getRegister (CmmMachOp (MO_U_Conv I8 I64) [CmmLoad addr _]) = do
880 code <- intLoadCode (MOVZxL I8) addr
881 return (Any I64 code)
883 getRegister (CmmMachOp (MO_S_Conv I8 I64) [CmmLoad addr _]) = do
884 code <- intLoadCode (MOVSxL I8) addr
885 return (Any I64 code)
887 getRegister (CmmMachOp (MO_U_Conv I16 I64) [CmmLoad addr _]) = do
888 code <- intLoadCode (MOVZxL I16) addr
889 return (Any I64 code)
891 getRegister (CmmMachOp (MO_S_Conv I16 I64) [CmmLoad addr _]) = do
892 code <- intLoadCode (MOVSxL I16) addr
893 return (Any I64 code)
895 getRegister (CmmMachOp (MO_U_Conv I32 I64) [CmmLoad addr _]) = do
896 code <- intLoadCode (MOV I32) addr -- 32-bit loads zero-extend
897 return (Any I64 code)
899 getRegister (CmmMachOp (MO_S_Conv I32 I64) [CmmLoad addr _]) = do
900 code <- intLoadCode (MOVSxL I32) addr
901 return (Any I64 code)
905 #if x86_64_TARGET_ARCH
906 getRegister (CmmMachOp (MO_Add I64) [CmmReg (CmmGlobal PicBaseReg),
907 CmmLit displacement])
908 = return $ Any I64 (\dst -> unitOL $
909 LEA I64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
912 #if x86_64_TARGET_ARCH
913 getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do
914 x_code <- getAnyReg x
915 lbl <- getNewLabelNat
917 code dst = x_code dst `appOL` toOL [
918 -- This is how gcc does it, so it can't be that bad:
919 LDATA ReadOnlyData16 [
922 CmmStaticLit (CmmInt 0x80000000 I32),
923 CmmStaticLit (CmmInt 0 I32),
924 CmmStaticLit (CmmInt 0 I32),
925 CmmStaticLit (CmmInt 0 I32)
927 XOR F32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
928 -- xorps, so we need the 128-bit constant
929 -- ToDo: rip-relative
932 return (Any F32 code)
934 getRegister (CmmMachOp (MO_S_Neg F64) [x]) = do
935 x_code <- getAnyReg x
936 lbl <- getNewLabelNat
938 -- This is how gcc does it, so it can't be that bad:
939 code dst = x_code dst `appOL` toOL [
940 LDATA ReadOnlyData16 [
943 CmmStaticLit (CmmInt 0x8000000000000000 I64),
944 CmmStaticLit (CmmInt 0 I64)
946 -- gcc puts an unpck here. Wonder if we need it.
947 XOR F64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
948 -- xorpd, so we need the 128-bit constant
951 return (Any F64 code)
954 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
956 getRegister (CmmMachOp mop [x]) -- unary MachOps
959 MO_S_Neg F32 -> trivialUFCode F32 (GNEG F32) x
960 MO_S_Neg F64 -> trivialUFCode F64 (GNEG F64) x
963 MO_S_Neg rep -> trivialUCode rep (NEGI rep) x
964 MO_Not rep -> trivialUCode rep (NOT rep) x
967 MO_U_Conv I32 I8 -> toI8Reg I32 x
968 MO_S_Conv I32 I8 -> toI8Reg I32 x
969 MO_U_Conv I16 I8 -> toI8Reg I16 x
970 MO_S_Conv I16 I8 -> toI8Reg I16 x
971 MO_U_Conv I32 I16 -> toI16Reg I32 x
972 MO_S_Conv I32 I16 -> toI16Reg I32 x
973 #if x86_64_TARGET_ARCH
974 MO_U_Conv I64 I32 -> conversionNop I64 x
975 MO_S_Conv I64 I32 -> conversionNop I64 x
976 MO_U_Conv I64 I16 -> toI16Reg I64 x
977 MO_S_Conv I64 I16 -> toI16Reg I64 x
978 MO_U_Conv I64 I8 -> toI8Reg I64 x
979 MO_S_Conv I64 I8 -> toI8Reg I64 x
982 MO_U_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
983 MO_S_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
986 MO_U_Conv I8 I32 -> integerExtend I8 I32 MOVZxL x
987 MO_U_Conv I16 I32 -> integerExtend I16 I32 MOVZxL x
988 MO_U_Conv I8 I16 -> integerExtend I8 I16 MOVZxL x
990 MO_S_Conv I8 I32 -> integerExtend I8 I32 MOVSxL x
991 MO_S_Conv I16 I32 -> integerExtend I16 I32 MOVSxL x
992 MO_S_Conv I8 I16 -> integerExtend I8 I16 MOVSxL x
994 #if x86_64_TARGET_ARCH
995 MO_U_Conv I8 I64 -> integerExtend I8 I64 MOVZxL x
996 MO_U_Conv I16 I64 -> integerExtend I16 I64 MOVZxL x
997 MO_U_Conv I32 I64 -> integerExtend I32 I64 MOVZxL x
998 MO_S_Conv I8 I64 -> integerExtend I8 I64 MOVSxL x
999 MO_S_Conv I16 I64 -> integerExtend I16 I64 MOVSxL x
1000 MO_S_Conv I32 I64 -> integerExtend I32 I64 MOVSxL x
1001 -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
1002 -- However, we don't want the register allocator to throw it
1003 -- away as an unnecessary reg-to-reg move, so we keep it in
1004 -- the form of a movzl and print it as a movl later.
1007 #if i386_TARGET_ARCH
1008 MO_S_Conv F32 F64 -> conversionNop F64 x
1009 MO_S_Conv F64 F32 -> conversionNop F32 x
1011 MO_S_Conv F32 F64 -> coerceFP2FP F64 x
1012 MO_S_Conv F64 F32 -> coerceFP2FP F32 x
1016 | isFloatingRep from -> coerceFP2Int from to x
1017 | isFloatingRep to -> coerceInt2FP from to x
1019 other -> pprPanic "getRegister" (pprMachOp mop)
1021 -- signed or unsigned extension.
1022 integerExtend from to instr expr = do
1023 (reg,e_code) <- if from == I8 then getByteReg expr
1024 else getSomeReg expr
1028 instr from (OpReg reg) (OpReg dst)
1029 return (Any to code)
1031 toI8Reg new_rep expr
1032 = do codefn <- getAnyReg expr
1033 return (Any new_rep codefn)
1034 -- HACK: use getAnyReg to get a byte-addressable register.
1035 -- If the source was a Fixed register, this will add the
1036 -- mov instruction to put it into the desired destination.
1037 -- We're assuming that the destination won't be a fixed
1038 -- non-byte-addressable register; it won't be, because all
1039 -- fixed registers are word-sized.
1041 toI16Reg = toI8Reg -- for now
1043 conversionNop new_rep expr
1044 = do e_code <- getRegister expr
1045 return (swizzleRegisterRep e_code new_rep)
1048 getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
1050 MO_Eq F32 -> condFltReg EQQ x y
1051 MO_Ne F32 -> condFltReg NE x y
1052 MO_S_Gt F32 -> condFltReg GTT x y
1053 MO_S_Ge F32 -> condFltReg GE x y
1054 MO_S_Lt F32 -> condFltReg LTT x y
1055 MO_S_Le F32 -> condFltReg LE x y
1057 MO_Eq F64 -> condFltReg EQQ x y
1058 MO_Ne F64 -> condFltReg NE x y
1059 MO_S_Gt F64 -> condFltReg GTT x y
1060 MO_S_Ge F64 -> condFltReg GE x y
1061 MO_S_Lt F64 -> condFltReg LTT x y
1062 MO_S_Le F64 -> condFltReg LE x y
1064 MO_Eq rep -> condIntReg EQQ x y
1065 MO_Ne rep -> condIntReg NE x y
1067 MO_S_Gt rep -> condIntReg GTT x y
1068 MO_S_Ge rep -> condIntReg GE x y
1069 MO_S_Lt rep -> condIntReg LTT x y
1070 MO_S_Le rep -> condIntReg LE x y
1072 MO_U_Gt rep -> condIntReg GU x y
1073 MO_U_Ge rep -> condIntReg GEU x y
1074 MO_U_Lt rep -> condIntReg LU x y
1075 MO_U_Le rep -> condIntReg LEU x y
1077 #if i386_TARGET_ARCH
1078 MO_Add F32 -> trivialFCode F32 GADD x y
1079 MO_Sub F32 -> trivialFCode F32 GSUB x y
1081 MO_Add F64 -> trivialFCode F64 GADD x y
1082 MO_Sub F64 -> trivialFCode F64 GSUB x y
1084 MO_S_Quot F32 -> trivialFCode F32 GDIV x y
1085 MO_S_Quot F64 -> trivialFCode F64 GDIV x y
1088 #if x86_64_TARGET_ARCH
1089 MO_Add F32 -> trivialFCode F32 ADD x y
1090 MO_Sub F32 -> trivialFCode F32 SUB x y
1092 MO_Add F64 -> trivialFCode F64 ADD x y
1093 MO_Sub F64 -> trivialFCode F64 SUB x y
1095 MO_S_Quot F32 -> trivialFCode F32 FDIV x y
1096 MO_S_Quot F64 -> trivialFCode F64 FDIV x y
1099 MO_Add rep -> add_code rep x y
1100 MO_Sub rep -> sub_code rep x y
1102 MO_S_Quot rep -> div_code rep True True x y
1103 MO_S_Rem rep -> div_code rep True False x y
1104 MO_U_Quot rep -> div_code rep False True x y
1105 MO_U_Rem rep -> div_code rep False False x y
1107 #if i386_TARGET_ARCH
1108 MO_Mul F32 -> trivialFCode F32 GMUL x y
1109 MO_Mul F64 -> trivialFCode F64 GMUL x y
1112 #if x86_64_TARGET_ARCH
1113 MO_Mul F32 -> trivialFCode F32 MUL x y
1114 MO_Mul F64 -> trivialFCode F64 MUL x y
1117 MO_Mul rep -> let op = IMUL rep in
1118 trivialCode rep op (Just op) x y
1120 MO_S_MulMayOflo rep -> imulMayOflo rep x y
1122 MO_And rep -> let op = AND rep in
1123 trivialCode rep op (Just op) x y
1124 MO_Or rep -> let op = OR rep in
1125 trivialCode rep op (Just op) x y
1126 MO_Xor rep -> let op = XOR rep in
1127 trivialCode rep op (Just op) x y
1129 {- Shift ops on x86s have constraints on their source, it
1130 either has to be Imm, CL or 1
1131 => trivialCode is not restrictive enough (sigh.)
1133 MO_Shl rep -> shift_code rep (SHL rep) x y {-False-}
1134 MO_U_Shr rep -> shift_code rep (SHR rep) x y {-False-}
1135 MO_S_Shr rep -> shift_code rep (SAR rep) x y {-False-}
1137 other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
1139 --------------------
1140 imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1141 imulMayOflo rep a b = do
1142 (a_reg, a_code) <- getNonClobberedReg a
1143 b_code <- getAnyReg b
1145 shift_amt = case rep of
1148 _ -> panic "shift_amt"
1150 code = a_code `appOL` b_code eax `appOL`
1152 IMUL2 rep (OpReg a_reg), -- result in %edx:%eax
1153 SAR rep (OpImm (ImmInt shift_amt)) (OpReg eax),
1154 -- sign extend lower part
1155 SUB rep (OpReg edx) (OpReg eax)
1156 -- compare against upper
1157 -- eax==0 if high part == sign extended low part
1160 return (Fixed rep eax code)
1162 --------------------
1163 shift_code :: MachRep
1164 -> (Operand -> Operand -> Instr)
1169 {- Case1: shift length as immediate -}
1170 shift_code rep instr x y@(CmmLit lit) = do
1171 x_code <- getAnyReg x
1174 = x_code dst `snocOL`
1175 instr (OpImm (litToImm lit)) (OpReg dst)
1177 return (Any rep code)
1179 {- Case2: shift length is complex (non-immediate)
1180 * y must go in %ecx.
1181 * we cannot do y first *and* put its result in %ecx, because
1182 %ecx might be clobbered by x.
1183 * if we do y second, then x cannot be
1184 in a clobbered reg. Also, we cannot clobber x's reg
1185 with the instruction itself.
1187 - do y first, put its result in a fresh tmp, then copy it to %ecx later
1188 - do y second and put its result into %ecx. x gets placed in a fresh
1189 tmp. This is likely to be better, becuase the reg alloc can
1190 eliminate this reg->reg move here (it won't eliminate the other one,
1191 because the move is into the fixed %ecx).
1193 shift_code rep instr x y{-amount-} = do
1194 x_code <- getAnyReg x
1195 tmp <- getNewRegNat rep
1196 y_code <- getAnyReg y
1198 code = x_code tmp `appOL`
1200 instr (OpReg ecx) (OpReg tmp)
1202 return (Fixed rep tmp code)
1204 --------------------
1205 add_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1206 add_code rep x (CmmLit (CmmInt y _))
1207 | not (is64BitInteger y) = add_int rep x y
1208 add_code rep x y = trivialCode rep (ADD rep) (Just (ADD rep)) x y
1210 --------------------
1211 sub_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1212 sub_code rep x (CmmLit (CmmInt y _))
1213 | not (is64BitInteger (-y)) = add_int rep x (-y)
1214 sub_code rep x y = trivialCode rep (SUB rep) Nothing x y
1216 -- our three-operand add instruction:
1217 add_int rep x y = do
1218 (x_reg, x_code) <- getSomeReg x
1220 imm = ImmInt (fromInteger y)
1224 (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
1227 return (Any rep code)
1229 ----------------------
1230 div_code rep signed quotient x y = do
1231 (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
1232 x_code <- getAnyReg x
1234 widen | signed = CLTD rep
1235 | otherwise = XOR rep (OpReg edx) (OpReg edx)
1237 instr | signed = IDIV
1240 code = y_code `appOL`
1242 toOL [widen, instr rep y_op]
1244 result | quotient = eax
1248 return (Fixed rep result code)
1251 getRegister (CmmLoad mem pk)
1254 Amode src mem_code <- getAmode mem
1256 code dst = mem_code `snocOL`
1257 IF_ARCH_i386(GLD pk src dst,
1258 MOV pk (OpAddr src) (OpReg dst))
1260 return (Any pk code)
1262 #if i386_TARGET_ARCH
1263 getRegister (CmmLoad mem pk)
1266 code <- intLoadCode (instr pk) mem
1267 return (Any pk code)
1269 instr I8 = MOVZxL pk
1272 -- we always zero-extend 8-bit loads, if we
1273 -- can't think of anything better. This is because
1274 -- we can't guarantee access to an 8-bit variant of every register
1275 -- (esi and edi don't have 8-bit variants), so to make things
1276 -- simpler we do our 8-bit arithmetic with full 32-bit registers.
1279 #if x86_64_TARGET_ARCH
1280 -- Simpler memory load code on x86_64
1281 getRegister (CmmLoad mem pk)
1283 code <- intLoadCode (MOV pk) mem
1284 return (Any pk code)
1287 getRegister (CmmLit (CmmInt 0 rep))
1289 -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
1290 adj_rep = case rep of I64 -> I32; _ -> rep
1291 rep1 = IF_ARCH_i386( rep, adj_rep )
1293 = unitOL (XOR rep1 (OpReg dst) (OpReg dst))
1295 return (Any rep code)
1297 #if x86_64_TARGET_ARCH
1298 -- optimisation for loading small literals on x86_64: take advantage
1299 -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
1300 -- instruction forms are shorter.
1301 getRegister (CmmLit lit)
1302 | I64 <- cmmLitRep lit, not (isBigLit lit)
1305 code dst = unitOL (MOV I32 (OpImm imm) (OpReg dst))
1307 return (Any I64 code)
1309 isBigLit (CmmInt i I64) = i < 0 || i > 0xffffffff
1311 -- note1: not the same as is64BitLit, because that checks for
1312 -- signed literals that fit in 32 bits, but we want unsigned
1314 -- note2: all labels are small, because we're assuming the
1315 -- small memory model (see gcc docs, -mcmodel=small).
1318 getRegister (CmmLit lit)
1322 code dst = unitOL (MOV rep (OpImm imm) (OpReg dst))
1324 return (Any rep code)
1326 getRegister other = pprPanic "getRegister(x86)" (ppr other)
1329 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
1330 -> NatM (Reg -> InstrBlock)
1331 intLoadCode instr mem = do
1332 Amode src mem_code <- getAmode mem
1333 return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
1335 -- Compute an expression into *any* register, adding the appropriate
1336 -- move instruction if necessary.
1337 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
1339 r <- getRegister expr
1342 anyReg :: Register -> NatM (Reg -> InstrBlock)
1343 anyReg (Any _ code) = return code
1344 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
1346 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
1347 -- Fixed registers might not be byte-addressable, so we make sure we've
1348 -- got a temporary, inserting an extra reg copy if necessary.
1349 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
1350 #if x86_64_TARGET_ARCH
1351 getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
1353 getByteReg expr = do
1354 r <- getRegister expr
1357 tmp <- getNewRegNat rep
1358 return (tmp, code tmp)
1360 | isVirtualReg reg -> return (reg,code)
1362 tmp <- getNewRegNat rep
1363 return (tmp, code `snocOL` reg2reg rep reg tmp)
1364 -- ToDo: could optimise slightly by checking for byte-addressable
1365 -- real registers, but that will happen very rarely if at all.
1368 -- Another variant: this time we want the result in a register that cannot
1369 -- be modified by code to evaluate an arbitrary expression.
1370 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1371 getNonClobberedReg expr = do
1372 r <- getRegister expr
1375 tmp <- getNewRegNat rep
1376 return (tmp, code tmp)
1378 -- only free regs can be clobbered
1379 | RealReg rr <- reg, isFastTrue (freeReg rr) -> do
1380 tmp <- getNewRegNat rep
1381 return (tmp, code `snocOL` reg2reg rep reg tmp)
1385 reg2reg :: MachRep -> Reg -> Reg -> Instr
1387 #if i386_TARGET_ARCH
1388 | isFloatingRep rep = GMOV src dst
1390 | otherwise = MOV rep (OpReg src) (OpReg dst)
1392 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1394 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1396 #if sparc_TARGET_ARCH
1398 getRegister (CmmLit (CmmFloat f F32)) = do
1399 lbl <- getNewLabelNat
1400 let code dst = toOL [
1403 CmmStaticLit (CmmFloat f F32)],
1404 SETHI (HI (ImmCLbl lbl)) dst,
1405 LD F32 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1406 return (Any F32 code)
1408 getRegister (CmmLit (CmmFloat d F64)) = do
1409 lbl <- getNewLabelNat
1410 let code dst = toOL [
1413 CmmStaticLit (CmmFloat d F64)],
1414 SETHI (HI (ImmCLbl lbl)) dst,
1415 LD F64 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1416 return (Any F64 code)
1418 getRegister (CmmMachOp mop [x]) -- unary MachOps
1420 MO_S_Neg F32 -> trivialUFCode F32 (FNEG F32) x
1421 MO_S_Neg F64 -> trivialUFCode F64 (FNEG F64) x
1423 MO_S_Neg rep -> trivialUCode rep (SUB False False g0) x
1424 MO_Not rep -> trivialUCode rep (XNOR False g0) x
1426 MO_U_Conv I32 I8 -> trivialCode I8 (AND False) x (CmmLit (CmmInt 255 I8))
1428 MO_U_Conv F64 F32-> coerceDbl2Flt x
1429 MO_U_Conv F32 F64-> coerceFlt2Dbl x
1431 MO_S_Conv F32 I32-> coerceFP2Int F32 I32 x
1432 MO_S_Conv I32 F32-> coerceInt2FP I32 F32 x
1433 MO_S_Conv F64 I32-> coerceFP2Int F64 I32 x
1434 MO_S_Conv I32 F64-> coerceInt2FP I32 F64 x
1436 -- Conversions which are a nop on sparc
1438 | from == to -> conversionNop to x
1439 MO_U_Conv I32 to -> conversionNop to x
1440 MO_S_Conv I32 to -> conversionNop to x
1443 MO_U_Conv I8 I32 -> integerExtend False I8 I32 x
1444 MO_U_Conv I16 I32 -> integerExtend False I16 I32 x
1445 MO_U_Conv I8 I16 -> integerExtend False I8 I16 x
1446 MO_S_Conv I16 I32 -> integerExtend True I16 I32 x
1448 other_op -> panic "Unknown unary mach op"
1451 integerExtend signed from to expr = do
1452 (reg, e_code) <- getSomeReg expr
1456 ((if signed then SRA else SRL)
1457 reg (RIImm (ImmInt 0)) dst)
1458 return (Any to code)
1459 conversionNop new_rep expr
1460 = do e_code <- getRegister expr
1461 return (swizzleRegisterRep e_code new_rep)
1463 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1465 MO_Eq F32 -> condFltReg EQQ x y
1466 MO_Ne F32 -> condFltReg NE x y
1468 MO_S_Gt F32 -> condFltReg GTT x y
1469 MO_S_Ge F32 -> condFltReg GE x y
1470 MO_S_Lt F32 -> condFltReg LTT x y
1471 MO_S_Le F32 -> condFltReg LE x y
1473 MO_Eq F64 -> condFltReg EQQ x y
1474 MO_Ne F64 -> condFltReg NE x y
1476 MO_S_Gt F64 -> condFltReg GTT x y
1477 MO_S_Ge F64 -> condFltReg GE x y
1478 MO_S_Lt F64 -> condFltReg LTT x y
1479 MO_S_Le F64 -> condFltReg LE x y
1481 MO_Eq rep -> condIntReg EQQ x y
1482 MO_Ne rep -> condIntReg NE x y
1484 MO_S_Gt rep -> condIntReg GTT x y
1485 MO_S_Ge rep -> condIntReg GE x y
1486 MO_S_Lt rep -> condIntReg LTT x y
1487 MO_S_Le rep -> condIntReg LE x y
1489 MO_U_Gt I32 -> condIntReg GTT x y
1490 MO_U_Ge I32 -> condIntReg GE x y
1491 MO_U_Lt I32 -> condIntReg LTT x y
1492 MO_U_Le I32 -> condIntReg LE x y
1494 MO_U_Gt I16 -> condIntReg GU x y
1495 MO_U_Ge I16 -> condIntReg GEU x y
1496 MO_U_Lt I16 -> condIntReg LU x y
1497 MO_U_Le I16 -> condIntReg LEU x y
1499 MO_Add I32 -> trivialCode I32 (ADD False False) x y
1500 MO_Sub I32 -> trivialCode I32 (SUB False False) x y
1502 MO_S_MulMayOflo rep -> imulMayOflo rep x y
1504 -- ToDo: teach about V8+ SPARC div instructions
1505 MO_S_Quot I32 -> idiv FSLIT(".div") x y
1506 MO_S_Rem I32 -> idiv FSLIT(".rem") x y
1507 MO_U_Quot I32 -> idiv FSLIT(".udiv") x y
1508 MO_U_Rem I32 -> idiv FSLIT(".urem") x y
1510 MO_Add F32 -> trivialFCode F32 FADD x y
1511 MO_Sub F32 -> trivialFCode F32 FSUB x y
1512 MO_Mul F32 -> trivialFCode F32 FMUL x y
1513 MO_S_Quot F32 -> trivialFCode F32 FDIV x y
1515 MO_Add F64 -> trivialFCode F64 FADD x y
1516 MO_Sub F64 -> trivialFCode F64 FSUB x y
1517 MO_Mul F64 -> trivialFCode F64 FMUL x y
1518 MO_S_Quot F64 -> trivialFCode F64 FDIV x y
1520 MO_And rep -> trivialCode rep (AND False) x y
1521 MO_Or rep -> trivialCode rep (OR False) x y
1522 MO_Xor rep -> trivialCode rep (XOR False) x y
1524 MO_Mul rep -> trivialCode rep (SMUL False) x y
1526 MO_Shl rep -> trivialCode rep SLL x y
1527 MO_U_Shr rep -> trivialCode rep SRL x y
1528 MO_S_Shr rep -> trivialCode rep SRA x y
1531 MO_F32_Pwr -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1532 [promote x, promote y])
1533 where promote x = CmmMachOp MO_F32_to_Dbl [x]
1534 MO_F64_Pwr -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1537 other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
1539 --idiv fn x y = getRegister (StCall (Left fn) CCallConv I32 [x, y])
1541 --------------------
1542 imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1543 imulMayOflo rep a b = do
1544 (a_reg, a_code) <- getSomeReg a
1545 (b_reg, b_code) <- getSomeReg b
1546 res_lo <- getNewRegNat I32
1547 res_hi <- getNewRegNat I32
1549 shift_amt = case rep of
1552 _ -> panic "shift_amt"
1553 code dst = a_code `appOL` b_code `appOL`
1555 SMUL False a_reg (RIReg b_reg) res_lo,
1557 SRA res_lo (RIImm (ImmInt shift_amt)) res_lo,
1558 SUB False False res_lo (RIReg res_hi) dst
1560 return (Any I32 code)
1562 getRegister (CmmLoad mem pk) = do
1563 Amode src code <- getAmode mem
1565 code__2 dst = code `snocOL` LD pk src dst
1566 return (Any pk code__2)
1568 getRegister (CmmLit (CmmInt i _))
1571 src = ImmInt (fromInteger i)
1572 code dst = unitOL (OR False g0 (RIImm src) dst)
1574 return (Any I32 code)
1576 getRegister (CmmLit lit)
1577 = let rep = cmmLitRep lit
1581 OR False dst (RIImm (LO imm)) dst]
1582 in return (Any I32 code)
1584 #endif /* sparc_TARGET_ARCH */
1586 #if powerpc_TARGET_ARCH
1587 getRegister (CmmLoad mem pk)
1590 Amode addr addr_code <- getAmode mem
1591 let code dst = ASSERT((regClass dst == RcDouble) == isFloatingRep pk)
1592 addr_code `snocOL` LD pk dst addr
1593 return (Any pk code)
1595 -- catch simple cases of zero- or sign-extended load
1596 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad mem _]) = do
1597 Amode addr addr_code <- getAmode mem
1598 return (Any I32 (\dst -> addr_code `snocOL` LD I8 dst addr))
1600 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
1602 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad mem _]) = do
1603 Amode addr addr_code <- getAmode mem
1604 return (Any I32 (\dst -> addr_code `snocOL` LD I16 dst addr))
1606 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad mem _]) = do
1607 Amode addr addr_code <- getAmode mem
1608 return (Any I32 (\dst -> addr_code `snocOL` LA I16 dst addr))
1610 getRegister (CmmMachOp mop [x]) -- unary MachOps
1612 MO_Not rep -> trivialUCode rep NOT x
1614 MO_S_Conv F64 F32 -> trivialUCode F32 FRSP x
1615 MO_S_Conv F32 F64 -> conversionNop F64 x
1618 | from == to -> conversionNop to x
1619 | isFloatingRep from -> coerceFP2Int from to x
1620 | isFloatingRep to -> coerceInt2FP from to x
1622 -- narrowing is a nop: we treat the high bits as undefined
1623 MO_S_Conv I32 to -> conversionNop to x
1624 MO_S_Conv I16 I8 -> conversionNop I8 x
1625 MO_S_Conv I8 to -> trivialUCode to (EXTS I8) x
1626 MO_S_Conv I16 to -> trivialUCode to (EXTS I16) x
1629 | from == to -> conversionNop to x
1630 -- narrowing is a nop: we treat the high bits as undefined
1631 MO_U_Conv I32 to -> conversionNop to x
1632 MO_U_Conv I16 I8 -> conversionNop I8 x
1633 MO_U_Conv I8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 I32))
1634 MO_U_Conv I16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 I32))
1636 MO_S_Neg F32 -> trivialUCode F32 FNEG x
1637 MO_S_Neg F64 -> trivialUCode F64 FNEG x
1638 MO_S_Neg rep -> trivialUCode rep NEG x
1641 conversionNop new_rep expr
1642 = do e_code <- getRegister expr
1643 return (swizzleRegisterRep e_code new_rep)
1645 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1647 MO_Eq F32 -> condFltReg EQQ x y
1648 MO_Ne F32 -> condFltReg NE x y
1650 MO_S_Gt F32 -> condFltReg GTT x y
1651 MO_S_Ge F32 -> condFltReg GE x y
1652 MO_S_Lt F32 -> condFltReg LTT x y
1653 MO_S_Le F32 -> condFltReg LE x y
1655 MO_Eq F64 -> condFltReg EQQ x y
1656 MO_Ne F64 -> condFltReg NE x y
1658 MO_S_Gt F64 -> condFltReg GTT x y
1659 MO_S_Ge F64 -> condFltReg GE x y
1660 MO_S_Lt F64 -> condFltReg LTT x y
1661 MO_S_Le F64 -> condFltReg LE x y
1663 MO_Eq rep -> condIntReg EQQ (extendUExpr rep x) (extendUExpr rep y)
1664 MO_Ne rep -> condIntReg NE (extendUExpr rep x) (extendUExpr rep y)
1666 MO_S_Gt rep -> condIntReg GTT (extendSExpr rep x) (extendSExpr rep y)
1667 MO_S_Ge rep -> condIntReg GE (extendSExpr rep x) (extendSExpr rep y)
1668 MO_S_Lt rep -> condIntReg LTT (extendSExpr rep x) (extendSExpr rep y)
1669 MO_S_Le rep -> condIntReg LE (extendSExpr rep x) (extendSExpr rep y)
1671 MO_U_Gt rep -> condIntReg GU (extendUExpr rep x) (extendUExpr rep y)
1672 MO_U_Ge rep -> condIntReg GEU (extendUExpr rep x) (extendUExpr rep y)
1673 MO_U_Lt rep -> condIntReg LU (extendUExpr rep x) (extendUExpr rep y)
1674 MO_U_Le rep -> condIntReg LEU (extendUExpr rep x) (extendUExpr rep y)
1676 MO_Add F32 -> trivialCodeNoImm F32 (FADD F32) x y
1677 MO_Sub F32 -> trivialCodeNoImm F32 (FSUB F32) x y
1678 MO_Mul F32 -> trivialCodeNoImm F32 (FMUL F32) x y
1679 MO_S_Quot F32 -> trivialCodeNoImm F32 (FDIV F32) x y
1681 MO_Add F64 -> trivialCodeNoImm F64 (FADD F64) x y
1682 MO_Sub F64 -> trivialCodeNoImm F64 (FSUB F64) x y
1683 MO_Mul F64 -> trivialCodeNoImm F64 (FMUL F64) x y
1684 MO_S_Quot F64 -> trivialCodeNoImm F64 (FDIV F64) x y
1686 -- optimize addition with 32-bit immediate
1690 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate I32 True (-imm)
1691 -> trivialCode I32 True ADD x (CmmLit $ CmmInt imm immrep)
1694 (src, srcCode) <- getSomeReg x
1695 let imm = litToImm lit
1696 code dst = srcCode `appOL` toOL [
1697 ADDIS dst src (HA imm),
1698 ADD dst dst (RIImm (LO imm))
1700 return (Any I32 code)
1701 _ -> trivialCode I32 True ADD x y
1703 MO_Add rep -> trivialCode rep True ADD x y
1705 case y of -- subfi ('substract from' with immediate) doesn't exist
1706 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
1707 -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
1708 _ -> trivialCodeNoImm rep SUBF y x
1710 MO_Mul rep -> trivialCode rep True MULLW x y
1712 MO_S_MulMayOflo I32 -> trivialCodeNoImm I32 MULLW_MayOflo x y
1714 MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= I32): not implemented"
1715 MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
1717 MO_S_Quot rep -> trivialCodeNoImm rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1718 MO_U_Quot rep -> trivialCodeNoImm rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1720 MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1721 MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1723 MO_And rep -> trivialCode rep False AND x y
1724 MO_Or rep -> trivialCode rep False OR x y
1725 MO_Xor rep -> trivialCode rep False XOR x y
1727 MO_Shl rep -> trivialCode rep False SLW x y
1728 MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
1729 MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
1731 getRegister (CmmLit (CmmInt i rep))
1732 | Just imm <- makeImmediate rep True i
1734 code dst = unitOL (LI dst imm)
1736 return (Any rep code)
1738 getRegister (CmmLit (CmmFloat f frep)) = do
1739 lbl <- getNewLabelNat
1740 dflags <- getDynFlagsNat
1741 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
1742 Amode addr addr_code <- getAmode dynRef
1744 LDATA ReadOnlyData [CmmDataLabel lbl,
1745 CmmStaticLit (CmmFloat f frep)]
1746 `consOL` (addr_code `snocOL` LD frep dst addr)
1747 return (Any frep code)
1749 getRegister (CmmLit lit)
1750 = let rep = cmmLitRep lit
1754 ADD dst dst (RIImm (LO imm))
1756 in return (Any rep code)
1758 getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
1760 -- extend?Rep: wrap integer expression of type rep
1761 -- in a conversion to I32
1762 extendSExpr I32 x = x
1763 extendSExpr rep x = CmmMachOp (MO_S_Conv rep I32) [x]
1764 extendUExpr I32 x = x
1765 extendUExpr rep x = CmmMachOp (MO_U_Conv rep I32) [x]
1767 #endif /* powerpc_TARGET_ARCH */
1770 -- -----------------------------------------------------------------------------
1771 -- The 'Amode' type: Memory addressing modes passed up the tree.
1773 data Amode = Amode AddrMode InstrBlock
1776 Now, given a tree (the argument to an CmmLoad) that references memory,
1777 produce a suitable addressing mode.
1779 A Rule of the Game (tm) for Amodes: use of the addr bit must
1780 immediately follow use of the code part, since the code part puts
1781 values in registers which the addr then refers to. So you can't put
1782 anything in between, lest it overwrite some of those registers. If
1783 you need to do some other computation between the code part and use of
1784 the addr bit, first store the effective address from the amode in a
1785 temporary, then do the other computation, and then use the temporary:
1789 ... other computation ...
1793 getAmode :: CmmExpr -> NatM Amode
1794 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
1796 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1798 #if alpha_TARGET_ARCH
1800 getAmode (StPrim IntSubOp [x, StInt i])
1801 = getNewRegNat PtrRep `thenNat` \ tmp ->
1802 getRegister x `thenNat` \ register ->
1804 code = registerCode register tmp
1805 reg = registerName register tmp
1806 off = ImmInt (-(fromInteger i))
1808 return (Amode (AddrRegImm reg off) code)
1810 getAmode (StPrim IntAddOp [x, StInt i])
1811 = getNewRegNat PtrRep `thenNat` \ tmp ->
1812 getRegister x `thenNat` \ register ->
1814 code = registerCode register tmp
1815 reg = registerName register tmp
1816 off = ImmInt (fromInteger i)
1818 return (Amode (AddrRegImm reg off) code)
1822 = return (Amode (AddrImm imm__2) id)
1825 imm__2 = case imm of Just x -> x
1828 = getNewRegNat PtrRep `thenNat` \ tmp ->
1829 getRegister other `thenNat` \ register ->
1831 code = registerCode register tmp
1832 reg = registerName register tmp
1834 return (Amode (AddrReg reg) code)
1836 #endif /* alpha_TARGET_ARCH */
1838 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1840 #if x86_64_TARGET_ARCH
1842 getAmode (CmmMachOp (MO_Add I64) [CmmReg (CmmGlobal PicBaseReg),
1843 CmmLit displacement])
1844 = return $ Amode (ripRel (litToImm displacement)) nilOL
1848 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1850 -- This is all just ridiculous, since it carefully undoes
1851 -- what mangleIndexTree has just done.
1852 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
1853 | not (is64BitLit lit)
1854 -- ASSERT(rep == I32)???
1855 = do (x_reg, x_code) <- getSomeReg x
1856 let off = ImmInt (-(fromInteger i))
1857 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1859 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
1860 | not (is64BitLit lit)
1861 -- ASSERT(rep == I32)???
1862 = do (x_reg, x_code) <- getSomeReg x
1863 let off = ImmInt (fromInteger i)
1864 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1866 -- Turn (lit1 << n + lit2) into (lit2 + lit1 << n) so it will be
1867 -- recognised by the next rule.
1868 getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1870 = getAmode (CmmMachOp (MO_Add rep) [b,a])
1872 getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
1873 [y, CmmLit (CmmInt shift _)]])
1874 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1875 = x86_complex_amode x y shift 0
1877 getAmode (CmmMachOp (MO_Add rep)
1878 [x, CmmMachOp (MO_Add _)
1879 [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)],
1880 CmmLit (CmmInt offset _)]])
1881 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1882 && not (is64BitInteger offset)
1883 = x86_complex_amode x y shift offset
1885 getAmode (CmmMachOp (MO_Add rep) [x,y])
1886 = x86_complex_amode x y 0 0
1888 getAmode (CmmLit lit) | not (is64BitLit lit)
1889 = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1892 (reg,code) <- getSomeReg expr
1893 return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
1896 x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
1897 x86_complex_amode base index shift offset
1898 = do (x_reg, x_code) <- getNonClobberedReg base
1899 -- x must be in a temp, because it has to stay live over y_code
1900 -- we could compre x_reg and y_reg and do something better here...
1901 (y_reg, y_code) <- getSomeReg index
1903 code = x_code `appOL` y_code
1904 base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
1905 return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset)))
1908 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1910 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1912 #if sparc_TARGET_ARCH
1914 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)])
1917 (reg, code) <- getSomeReg x
1919 off = ImmInt (-(fromInteger i))
1920 return (Amode (AddrRegImm reg off) code)
1923 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)])
1926 (reg, code) <- getSomeReg x
1928 off = ImmInt (fromInteger i)
1929 return (Amode (AddrRegImm reg off) code)
1931 getAmode (CmmMachOp (MO_Add rep) [x, y])
1933 (regX, codeX) <- getSomeReg x
1934 (regY, codeY) <- getSomeReg y
1936 code = codeX `appOL` codeY
1937 return (Amode (AddrRegReg regX regY) code)
1939 -- XXX Is this same as "leaf" in Stix?
1940 getAmode (CmmLit lit)
1942 tmp <- getNewRegNat I32
1944 code = unitOL (SETHI (HI imm__2) tmp)
1945 return (Amode (AddrRegImm tmp (LO imm__2)) code)
1947 imm__2 = litToImm lit
1951 (reg, code) <- getSomeReg other
1954 return (Amode (AddrRegImm reg off) code)
1956 #endif /* sparc_TARGET_ARCH */
1958 #ifdef powerpc_TARGET_ARCH
1959 getAmode (CmmMachOp (MO_Sub I32) [x, CmmLit (CmmInt i _)])
1960 | Just off <- makeImmediate I32 True (-i)
1962 (reg, code) <- getSomeReg x
1963 return (Amode (AddrRegImm reg off) code)
1966 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit (CmmInt i _)])
1967 | Just off <- makeImmediate I32 True i
1969 (reg, code) <- getSomeReg x
1970 return (Amode (AddrRegImm reg off) code)
1972 -- optimize addition with 32-bit immediate
1974 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit lit])
1976 tmp <- getNewRegNat I32
1977 (src, srcCode) <- getSomeReg x
1978 let imm = litToImm lit
1979 code = srcCode `snocOL` ADDIS tmp src (HA imm)
1980 return (Amode (AddrRegImm tmp (LO imm)) code)
1982 getAmode (CmmLit lit)
1984 tmp <- getNewRegNat I32
1985 let imm = litToImm lit
1986 code = unitOL (LIS tmp (HA imm))
1987 return (Amode (AddrRegImm tmp (LO imm)) code)
1989 getAmode (CmmMachOp (MO_Add I32) [x, y])
1991 (regX, codeX) <- getSomeReg x
1992 (regY, codeY) <- getSomeReg y
1993 return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
1997 (reg, code) <- getSomeReg other
2000 return (Amode (AddrRegImm reg off) code)
2001 #endif /* powerpc_TARGET_ARCH */
2003 -- -----------------------------------------------------------------------------
2004 -- getOperand: sometimes any operand will do.
2006 -- getNonClobberedOperand: the value of the operand will remain valid across
2007 -- the computation of an arbitrary expression, unless the expression
2008 -- is computed directly into a register which the operand refers to
2009 -- (see trivialCode where this function is used for an example).
2011 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2013 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2014 #if x86_64_TARGET_ARCH
2015 getNonClobberedOperand (CmmLit lit)
2016 | isSuitableFloatingPointLit lit = do
2017 lbl <- getNewLabelNat
2018 let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
2020 return (OpAddr (ripRel (ImmCLbl lbl)), code)
2022 getNonClobberedOperand (CmmLit lit)
2023 | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) =
2024 return (OpImm (litToImm lit), nilOL)
2025 getNonClobberedOperand (CmmLoad mem pk)
2026 | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2027 Amode src mem_code <- getAmode mem
2029 if (amodeCouldBeClobbered src)
2031 tmp <- getNewRegNat wordRep
2032 return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
2033 unitOL (LEA I32 (OpAddr src) (OpReg tmp)))
2036 return (OpAddr src', save_code `appOL` mem_code)
2037 getNonClobberedOperand e = do
2038 (reg, code) <- getNonClobberedReg e
2039 return (OpReg reg, code)
2041 amodeCouldBeClobbered :: AddrMode -> Bool
2042 amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
2044 regClobbered (RealReg rr) = isFastTrue (freeReg rr)
2045 regClobbered _ = False
2047 -- getOperand: the operand is not required to remain valid across the
2048 -- computation of an arbitrary expression.
2049 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2050 #if x86_64_TARGET_ARCH
2051 getOperand (CmmLit lit)
2052 | isSuitableFloatingPointLit lit = do
2053 lbl <- getNewLabelNat
2054 let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
2056 return (OpAddr (ripRel (ImmCLbl lbl)), code)
2058 getOperand (CmmLit lit)
2059 | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) = do
2060 return (OpImm (litToImm lit), nilOL)
2061 getOperand (CmmLoad mem pk)
2062 | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2063 Amode src mem_code <- getAmode mem
2064 return (OpAddr src, mem_code)
2066 (reg, code) <- getSomeReg e
2067 return (OpReg reg, code)
2069 isOperand :: CmmExpr -> Bool
2070 isOperand (CmmLoad _ _) = True
2071 isOperand (CmmLit lit) = not (is64BitLit lit)
2072 || isSuitableFloatingPointLit lit
2075 -- if we want a floating-point literal as an operand, we can
2076 -- use it directly from memory. However, if the literal is
2077 -- zero, we're better off generating it into a register using
2079 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
2080 isSuitableFloatingPointLit _ = False
2082 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
2083 getRegOrMem (CmmLoad mem pk)
2084 | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2085 Amode src mem_code <- getAmode mem
2086 return (OpAddr src, mem_code)
2088 (reg, code) <- getNonClobberedReg e
2089 return (OpReg reg, code)
2091 #if x86_64_TARGET_ARCH
2092 is64BitLit (CmmInt i I64) = is64BitInteger i
2093 -- assume that labels are in the range 0-2^31-1: this assumes the
2094 -- small memory model (see gcc docs, -mcmodel=small).
2096 is64BitLit x = False
2099 is64BitInteger :: Integer -> Bool
2100 is64BitInteger i = i64 > 0x7fffffff || i64 < -0x80000000
2101 where i64 = fromIntegral i :: Int64
2102 -- a CmmInt is intended to be truncated to the appropriate
2103 -- number of bits, so here we truncate it to Int64. This is
2104 -- important because e.g. -1 as a CmmInt might be either
2105 -- -1 or 18446744073709551615.
2107 -- -----------------------------------------------------------------------------
2108 -- The 'CondCode' type: Condition codes passed up the tree.
2110 data CondCode = CondCode Bool Cond InstrBlock
2112 -- Set up a condition code for a conditional branch.
2114 getCondCode :: CmmExpr -> NatM CondCode
2116 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2118 #if alpha_TARGET_ARCH
2119 getCondCode = panic "MachCode.getCondCode: not on Alphas"
2120 #endif /* alpha_TARGET_ARCH */
2122 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2124 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
2125 -- yes, they really do seem to want exactly the same!
2127 getCondCode (CmmMachOp mop [x, y])
2130 MO_Eq F32 -> condFltCode EQQ x y
2131 MO_Ne F32 -> condFltCode NE x y
2133 MO_S_Gt F32 -> condFltCode GTT x y
2134 MO_S_Ge F32 -> condFltCode GE x y
2135 MO_S_Lt F32 -> condFltCode LTT x y
2136 MO_S_Le F32 -> condFltCode LE x y
2138 MO_Eq F64 -> condFltCode EQQ x y
2139 MO_Ne F64 -> condFltCode NE x y
2141 MO_S_Gt F64 -> condFltCode GTT x y
2142 MO_S_Ge F64 -> condFltCode GE x y
2143 MO_S_Lt F64 -> condFltCode LTT x y
2144 MO_S_Le F64 -> condFltCode LE x y
2146 MO_Eq rep -> condIntCode EQQ x y
2147 MO_Ne rep -> condIntCode NE x y
2149 MO_S_Gt rep -> condIntCode GTT x y
2150 MO_S_Ge rep -> condIntCode GE x y
2151 MO_S_Lt rep -> condIntCode LTT x y
2152 MO_S_Le rep -> condIntCode LE x y
2154 MO_U_Gt rep -> condIntCode GU x y
2155 MO_U_Ge rep -> condIntCode GEU x y
2156 MO_U_Lt rep -> condIntCode LU x y
2157 MO_U_Le rep -> condIntCode LEU x y
2159 other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y]))
2161 getCondCode other = pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
2163 #elif powerpc_TARGET_ARCH
2165 -- almost the same as everywhere else - but we need to
2166 -- extend small integers to 32 bit first
2168 getCondCode (CmmMachOp mop [x, y])
2170 MO_Eq F32 -> condFltCode EQQ x y
2171 MO_Ne F32 -> condFltCode NE x y
2173 MO_S_Gt F32 -> condFltCode GTT x y
2174 MO_S_Ge F32 -> condFltCode GE x y
2175 MO_S_Lt F32 -> condFltCode LTT x y
2176 MO_S_Le F32 -> condFltCode LE x y
2178 MO_Eq F64 -> condFltCode EQQ x y
2179 MO_Ne F64 -> condFltCode NE x y
2181 MO_S_Gt F64 -> condFltCode GTT x y
2182 MO_S_Ge F64 -> condFltCode GE x y
2183 MO_S_Lt F64 -> condFltCode LTT x y
2184 MO_S_Le F64 -> condFltCode LE x y
2186 MO_Eq rep -> condIntCode EQQ (extendUExpr rep x) (extendUExpr rep y)
2187 MO_Ne rep -> condIntCode NE (extendUExpr rep x) (extendUExpr rep y)
2189 MO_S_Gt rep -> condIntCode GTT (extendSExpr rep x) (extendSExpr rep y)
2190 MO_S_Ge rep -> condIntCode GE (extendSExpr rep x) (extendSExpr rep y)
2191 MO_S_Lt rep -> condIntCode LTT (extendSExpr rep x) (extendSExpr rep y)
2192 MO_S_Le rep -> condIntCode LE (extendSExpr rep x) (extendSExpr rep y)
2194 MO_U_Gt rep -> condIntCode GU (extendUExpr rep x) (extendUExpr rep y)
2195 MO_U_Ge rep -> condIntCode GEU (extendUExpr rep x) (extendUExpr rep y)
2196 MO_U_Lt rep -> condIntCode LU (extendUExpr rep x) (extendUExpr rep y)
2197 MO_U_Le rep -> condIntCode LEU (extendUExpr rep x) (extendUExpr rep y)
2199 other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
2201 getCondCode other = panic "getCondCode(2)(powerpc)"
2207 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
2208 -- passed back up the tree.
2210 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
2212 #if alpha_TARGET_ARCH
2213 condIntCode = panic "MachCode.condIntCode: not on Alphas"
2214 condFltCode = panic "MachCode.condFltCode: not on Alphas"
2215 #endif /* alpha_TARGET_ARCH */
2217 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2218 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2220 -- memory vs immediate
2221 condIntCode cond (CmmLoad x pk) (CmmLit lit) | not (is64BitLit lit) = do
2222 Amode x_addr x_code <- getAmode x
2225 code = x_code `snocOL`
2226 CMP pk (OpImm imm) (OpAddr x_addr)
2228 return (CondCode False cond code)
2230 -- anything vs zero, using a mask
2231 -- TODO: Add some sanity checking!!!!
2232 condIntCode cond (CmmMachOp (MO_And rep) [x,o2]) (CmmLit (CmmInt 0 pk))
2233 | (CmmLit (CmmInt mask pk2)) <- o2
2235 (x_reg, x_code) <- getSomeReg x
2237 code = x_code `snocOL`
2238 TEST pk (OpImm (ImmInteger mask)) (OpReg x_reg)
2240 return (CondCode False cond code)
2243 condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
2244 (x_reg, x_code) <- getSomeReg x
2246 code = x_code `snocOL`
2247 TEST pk (OpReg x_reg) (OpReg x_reg)
2249 return (CondCode False cond code)
2251 -- anything vs operand
2252 condIntCode cond x y | isOperand y = do
2253 (x_reg, x_code) <- getNonClobberedReg x
2254 (y_op, y_code) <- getOperand y
2256 code = x_code `appOL` y_code `snocOL`
2257 CMP (cmmExprRep x) y_op (OpReg x_reg)
2259 return (CondCode False cond code)
2261 -- anything vs anything
2262 condIntCode cond x y = do
2263 (y_reg, y_code) <- getNonClobberedReg y
2264 (x_op, x_code) <- getRegOrMem x
2266 code = y_code `appOL`
2268 CMP (cmmExprRep x) (OpReg y_reg) x_op
2270 return (CondCode False cond code)
2273 #if i386_TARGET_ARCH
2274 condFltCode cond x y
2275 = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
2276 (x_reg, x_code) <- getNonClobberedReg x
2277 (y_reg, y_code) <- getSomeReg y
2279 code = x_code `appOL` y_code `snocOL`
2280 GCMP cond x_reg y_reg
2281 -- The GCMP insn does the test and sets the zero flag if comparable
2282 -- and true. Hence we always supply EQQ as the condition to test.
2283 return (CondCode True EQQ code)
2284 #endif /* i386_TARGET_ARCH */
2286 #if x86_64_TARGET_ARCH
2287 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
2288 -- an operand, but the right must be a reg. We can probably do better
2289 -- than this general case...
2290 condFltCode cond x y = do
2291 (x_reg, x_code) <- getNonClobberedReg x
2292 (y_op, y_code) <- getOperand y
2294 code = x_code `appOL`
2296 CMP (cmmExprRep x) y_op (OpReg x_reg)
2297 -- NB(1): we need to use the unsigned comparison operators on the
2298 -- result of this comparison.
2300 return (CondCode True (condToUnsigned cond) code)
2303 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2305 #if sparc_TARGET_ARCH
2307 condIntCode cond x (CmmLit (CmmInt y rep))
2310 (src1, code) <- getSomeReg x
2312 src2 = ImmInt (fromInteger y)
2313 code' = code `snocOL` SUB False True src1 (RIImm src2) g0
2314 return (CondCode False cond code')
2316 condIntCode cond x y = do
2317 (src1, code1) <- getSomeReg x
2318 (src2, code2) <- getSomeReg y
2320 code__2 = code1 `appOL` code2 `snocOL`
2321 SUB False True src1 (RIReg src2) g0
2322 return (CondCode False cond code__2)
2325 condFltCode cond x y = do
2326 (src1, code1) <- getSomeReg x
2327 (src2, code2) <- getSomeReg y
2328 tmp <- getNewRegNat F64
2330 promote x = FxTOy F32 F64 x tmp
2337 code1 `appOL` code2 `snocOL`
2338 FCMP True pk1 src1 src2
2339 else if pk1 == F32 then
2340 code1 `snocOL` promote src1 `appOL` code2 `snocOL`
2341 FCMP True F64 tmp src2
2343 code1 `appOL` code2 `snocOL` promote src2 `snocOL`
2344 FCMP True F64 src1 tmp
2345 return (CondCode True cond code__2)
2347 #endif /* sparc_TARGET_ARCH */
2349 #if powerpc_TARGET_ARCH
2350 -- ###FIXME: I16 and I8!
2351 condIntCode cond x (CmmLit (CmmInt y rep))
2352 | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
2354 (src1, code) <- getSomeReg x
2356 code' = code `snocOL`
2357 (if condUnsigned cond then CMPL else CMP) I32 src1 (RIImm src2)
2358 return (CondCode False cond code')
2360 condIntCode cond x y = do
2361 (src1, code1) <- getSomeReg x
2362 (src2, code2) <- getSomeReg y
2364 code' = code1 `appOL` code2 `snocOL`
2365 (if condUnsigned cond then CMPL else CMP) I32 src1 (RIReg src2)
2366 return (CondCode False cond code')
2368 condFltCode cond x y = do
2369 (src1, code1) <- getSomeReg x
2370 (src2, code2) <- getSomeReg y
2372 code' = code1 `appOL` code2 `snocOL` FCMP src1 src2
2373 code'' = case cond of -- twiddle CR to handle unordered case
2374 GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
2375 LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
2378 ltbit = 0 ; eqbit = 2 ; gtbit = 1
2379 return (CondCode True cond code'')
2381 #endif /* powerpc_TARGET_ARCH */
2383 -- -----------------------------------------------------------------------------
2384 -- Generating assignments
2386 -- Assignments are really at the heart of the whole code generation
2387 -- business. Almost all top-level nodes of any real importance are
2388 -- assignments, which correspond to loads, stores, or register
2389 -- transfers. If we're really lucky, some of the register transfers
2390 -- will go away, because we can use the destination register to
2391 -- complete the code generation for the right hand side. This only
2392 -- fails when the right hand side is forced into a fixed register
2393 -- (e.g. the result of a call).
2395 assignMem_IntCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2396 assignReg_IntCode :: MachRep -> CmmReg -> CmmExpr -> NatM InstrBlock
2398 assignMem_FltCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2399 assignReg_FltCode :: MachRep -> CmmReg -> CmmExpr -> NatM InstrBlock
2401 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2403 #if alpha_TARGET_ARCH
2405 assignIntCode pk (CmmLoad dst _) src
2406 = getNewRegNat IntRep `thenNat` \ tmp ->
2407 getAmode dst `thenNat` \ amode ->
2408 getRegister src `thenNat` \ register ->
2410 code1 = amodeCode amode []
2411 dst__2 = amodeAddr amode
2412 code2 = registerCode register tmp []
2413 src__2 = registerName register tmp
2414 sz = primRepToSize pk
2415 code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2419 assignIntCode pk dst src
2420 = getRegister dst `thenNat` \ register1 ->
2421 getRegister src `thenNat` \ register2 ->
2423 dst__2 = registerName register1 zeroh
2424 code = registerCode register2 dst__2
2425 src__2 = registerName register2 dst__2
2426 code__2 = if isFixed register2
2427 then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2)
2432 #endif /* alpha_TARGET_ARCH */
2434 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2436 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2438 -- integer assignment to memory
2440 -- specific case of adding/subtracting an integer to a particular address.
2441 -- ToDo: catch other cases where we can use an operation directly on a memory
2443 assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _,
2444 CmmLit (CmmInt i _)])
2445 | addr == addr2, pk /= I64 || not (is64BitInteger i),
2446 Just instr <- check op
2447 = do Amode amode code_addr <- getAmode addr
2448 let code = code_addr `snocOL`
2449 instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode)
2452 check (MO_Add _) = Just ADD
2453 check (MO_Sub _) = Just SUB
2458 assignMem_IntCode pk addr src = do
2459 Amode addr code_addr <- getAmode addr
2460 (code_src, op_src) <- get_op_RI src
2462 code = code_src `appOL`
2464 MOV pk op_src (OpAddr addr)
2465 -- NOTE: op_src is stable, so it will still be valid
2466 -- after code_addr. This may involve the introduction
2467 -- of an extra MOV to a temporary register, but we hope
2468 -- the register allocator will get rid of it.
2472 get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand) -- code, operator
2473 get_op_RI (CmmLit lit) | not (is64BitLit lit)
2474 = return (nilOL, OpImm (litToImm lit))
2476 = do (reg,code) <- getNonClobberedReg op
2477 return (code, OpReg reg)
2480 -- Assign; dst is a reg, rhs is mem
2481 assignReg_IntCode pk reg (CmmLoad src _) = do
2482 load_code <- intLoadCode (MOV pk) src
2483 return (load_code (getRegisterReg reg))
2485 -- dst is a reg, but src could be anything
2486 assignReg_IntCode pk reg src = do
2487 code <- getAnyReg src
2488 return (code (getRegisterReg reg))
2490 #endif /* i386_TARGET_ARCH */
2492 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2494 #if sparc_TARGET_ARCH
2496 assignMem_IntCode pk addr src = do
2497 (srcReg, code) <- getSomeReg src
2498 Amode dstAddr addr_code <- getAmode addr
2499 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2501 assignReg_IntCode pk reg src = do
2502 r <- getRegister src
2504 Any _ code -> code dst
2505 Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg dst) freg
2507 dst = getRegisterReg reg
2510 #endif /* sparc_TARGET_ARCH */
2512 #if powerpc_TARGET_ARCH
2514 assignMem_IntCode pk addr src = do
2515 (srcReg, code) <- getSomeReg src
2516 Amode dstAddr addr_code <- getAmode addr
2517 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2519 -- dst is a reg, but src could be anything
2520 assignReg_IntCode pk reg src
2522 r <- getRegister src
2524 Any _ code -> code dst
2525 Fixed _ freg fcode -> fcode `snocOL` MR dst freg
2527 dst = getRegisterReg reg
2529 #endif /* powerpc_TARGET_ARCH */
2532 -- -----------------------------------------------------------------------------
2533 -- Floating-point assignments
2535 #if alpha_TARGET_ARCH
2537 assignFltCode pk (CmmLoad dst _) src
2538 = getNewRegNat pk `thenNat` \ tmp ->
2539 getAmode dst `thenNat` \ amode ->
2540 getRegister src `thenNat` \ register ->
2542 code1 = amodeCode amode []
2543 dst__2 = amodeAddr amode
2544 code2 = registerCode register tmp []
2545 src__2 = registerName register tmp
2546 sz = primRepToSize pk
2547 code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2551 assignFltCode pk dst src
2552 = getRegister dst `thenNat` \ register1 ->
2553 getRegister src `thenNat` \ register2 ->
2555 dst__2 = registerName register1 zeroh
2556 code = registerCode register2 dst__2
2557 src__2 = registerName register2 dst__2
2558 code__2 = if isFixed register2
2559 then code . mkSeqInstr (FMOV src__2 dst__2)
2564 #endif /* alpha_TARGET_ARCH */
2566 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2568 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2570 -- Floating point assignment to memory
2571 assignMem_FltCode pk addr src = do
2572 (src_reg, src_code) <- getNonClobberedReg src
2573 Amode addr addr_code <- getAmode addr
2575 code = src_code `appOL`
2577 IF_ARCH_i386(GST pk src_reg addr,
2578 MOV pk (OpReg src_reg) (OpAddr addr))
2581 -- Floating point assignment to a register/temporary
2582 assignReg_FltCode pk reg src = do
2583 src_code <- getAnyReg src
2584 return (src_code (getRegisterReg reg))
2586 #endif /* i386_TARGET_ARCH */
2588 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2590 #if sparc_TARGET_ARCH
2592 -- Floating point assignment to memory
2593 assignMem_FltCode pk addr src = do
2594 Amode dst__2 code1 <- getAmode addr
2595 (src__2, code2) <- getSomeReg src
2596 tmp1 <- getNewRegNat pk
2598 pk__2 = cmmExprRep src
2599 code__2 = code1 `appOL` code2 `appOL`
2601 then unitOL (ST pk src__2 dst__2)
2602 else toOL [FxTOy pk__2 pk src__2 tmp1, ST pk tmp1 dst__2]
2605 -- Floating point assignment to a register/temporary
2606 -- ToDo: Verify correctness
2607 assignReg_FltCode pk reg src = do
2608 r <- getRegister src
2609 v1 <- getNewRegNat pk
2611 Any _ code -> code dst
2612 Fixed _ freg fcode -> fcode `snocOL` FMOV pk freg v1
2614 dst = getRegisterReg reg
2616 #endif /* sparc_TARGET_ARCH */
2618 #if powerpc_TARGET_ARCH
2621 assignMem_FltCode = assignMem_IntCode
2622 assignReg_FltCode = assignReg_IntCode
2624 #endif /* powerpc_TARGET_ARCH */
2627 -- -----------------------------------------------------------------------------
2628 -- Generating an non-local jump
2630 -- (If applicable) Do not fill the delay slots here; you will confuse the
2631 -- register allocator.
2633 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
2635 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2637 #if alpha_TARGET_ARCH
2639 genJump (CmmLabel lbl)
2640 | isAsmTemp lbl = returnInstr (BR target)
2641 | otherwise = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0]
2643 target = ImmCLbl lbl
2646 = getRegister tree `thenNat` \ register ->
2647 getNewRegNat PtrRep `thenNat` \ tmp ->
2649 dst = registerName register pv
2650 code = registerCode register pv
2651 target = registerName register pv
2653 if isFixed register then
2654 returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0]
2656 return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0))
2658 #endif /* alpha_TARGET_ARCH */
2660 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2662 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2664 genJump (CmmLoad mem pk) = do
2665 Amode target code <- getAmode mem
2666 return (code `snocOL` JMP (OpAddr target))
2668 genJump (CmmLit lit) = do
2669 return (unitOL (JMP (OpImm (litToImm lit))))
2672 (reg,code) <- getSomeReg expr
2673 return (code `snocOL` JMP (OpReg reg))
2675 #endif /* i386_TARGET_ARCH */
2677 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2679 #if sparc_TARGET_ARCH
2681 genJump (CmmLit (CmmLabel lbl))
2682 = return (toOL [CALL (Left target) 0 True, NOP])
2684 target = ImmCLbl lbl
2688 (target, code) <- getSomeReg tree
2689 return (code `snocOL` JMP (AddrRegReg target g0) `snocOL` NOP)
2691 #endif /* sparc_TARGET_ARCH */
2693 #if powerpc_TARGET_ARCH
2694 genJump (CmmLit (CmmLabel lbl))
2695 = return (unitOL $ JMP lbl)
2699 (target,code) <- getSomeReg tree
2700 return (code `snocOL` MTCTR target `snocOL` BCTR [])
2701 #endif /* powerpc_TARGET_ARCH */
2704 -- -----------------------------------------------------------------------------
2705 -- Unconditional branches
2707 genBranch :: BlockId -> NatM InstrBlock
2709 genBranch = return . toOL . mkBranchInstr
2711 -- -----------------------------------------------------------------------------
2712 -- Conditional jumps
2715 Conditional jumps are always to local labels, so we can use branch
2716 instructions. We peek at the arguments to decide what kind of
2719 ALPHA: For comparisons with 0, we're laughing, because we can just do
2720 the desired conditional branch.
2722 I386: First, we have to ensure that the condition
2723 codes are set according to the supplied comparison operation.
2725 SPARC: First, we have to ensure that the condition codes are set
2726 according to the supplied comparison operation. We generate slightly
2727 different code for floating point comparisons, because a floating
2728 point operation cannot directly precede a @BF@. We assume the worst
2729 and fill that slot with a @NOP@.
2731 SPARC: Do not fill the delay slots here; you will confuse the register
2737 :: BlockId -- the branch target
2738 -> CmmExpr -- the condition on which to branch
2741 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2743 #if alpha_TARGET_ARCH
2745 genCondJump id (StPrim op [x, StInt 0])
2746 = getRegister x `thenNat` \ register ->
2747 getNewRegNat (registerRep register)
2750 code = registerCode register tmp
2751 value = registerName register tmp
2752 pk = registerRep register
2753 target = ImmCLbl lbl
2755 returnSeq code [BI (cmpOp op) value target]
2757 cmpOp CharGtOp = GTT
2759 cmpOp CharEqOp = EQQ
2761 cmpOp CharLtOp = LTT
2770 cmpOp WordGeOp = ALWAYS
2771 cmpOp WordEqOp = EQQ
2773 cmpOp WordLtOp = NEVER
2774 cmpOp WordLeOp = EQQ
2776 cmpOp AddrGeOp = ALWAYS
2777 cmpOp AddrEqOp = EQQ
2779 cmpOp AddrLtOp = NEVER
2780 cmpOp AddrLeOp = EQQ
2782 genCondJump lbl (StPrim op [x, StDouble 0.0])
2783 = getRegister x `thenNat` \ register ->
2784 getNewRegNat (registerRep register)
2787 code = registerCode register tmp
2788 value = registerName register tmp
2789 pk = registerRep register
2790 target = ImmCLbl lbl
2792 return (code . mkSeqInstr (BF (cmpOp op) value target))
2794 cmpOp FloatGtOp = GTT
2795 cmpOp FloatGeOp = GE
2796 cmpOp FloatEqOp = EQQ
2797 cmpOp FloatNeOp = NE
2798 cmpOp FloatLtOp = LTT
2799 cmpOp FloatLeOp = LE
2800 cmpOp DoubleGtOp = GTT
2801 cmpOp DoubleGeOp = GE
2802 cmpOp DoubleEqOp = EQQ
2803 cmpOp DoubleNeOp = NE
2804 cmpOp DoubleLtOp = LTT
2805 cmpOp DoubleLeOp = LE
2807 genCondJump lbl (StPrim op [x, y])
2809 = trivialFCode pr instr x y `thenNat` \ register ->
2810 getNewRegNat F64 `thenNat` \ tmp ->
2812 code = registerCode register tmp
2813 result = registerName register tmp
2814 target = ImmCLbl lbl
2816 return (code . mkSeqInstr (BF cond result target))
2818 pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
2820 fltCmpOp op = case op of
2834 (instr, cond) = case op of
2835 FloatGtOp -> (FCMP TF LE, EQQ)
2836 FloatGeOp -> (FCMP TF LTT, EQQ)
2837 FloatEqOp -> (FCMP TF EQQ, NE)
2838 FloatNeOp -> (FCMP TF EQQ, EQQ)
2839 FloatLtOp -> (FCMP TF LTT, NE)
2840 FloatLeOp -> (FCMP TF LE, NE)
2841 DoubleGtOp -> (FCMP TF LE, EQQ)
2842 DoubleGeOp -> (FCMP TF LTT, EQQ)
2843 DoubleEqOp -> (FCMP TF EQQ, NE)
2844 DoubleNeOp -> (FCMP TF EQQ, EQQ)
2845 DoubleLtOp -> (FCMP TF LTT, NE)
2846 DoubleLeOp -> (FCMP TF LE, NE)
2848 genCondJump lbl (StPrim op [x, y])
2849 = trivialCode instr x y `thenNat` \ register ->
2850 getNewRegNat IntRep `thenNat` \ tmp ->
2852 code = registerCode register tmp
2853 result = registerName register tmp
2854 target = ImmCLbl lbl
2856 return (code . mkSeqInstr (BI cond result target))
2858 (instr, cond) = case op of
2859 CharGtOp -> (CMP LE, EQQ)
2860 CharGeOp -> (CMP LTT, EQQ)
2861 CharEqOp -> (CMP EQQ, NE)
2862 CharNeOp -> (CMP EQQ, EQQ)
2863 CharLtOp -> (CMP LTT, NE)
2864 CharLeOp -> (CMP LE, NE)
2865 IntGtOp -> (CMP LE, EQQ)
2866 IntGeOp -> (CMP LTT, EQQ)
2867 IntEqOp -> (CMP EQQ, NE)
2868 IntNeOp -> (CMP EQQ, EQQ)
2869 IntLtOp -> (CMP LTT, NE)
2870 IntLeOp -> (CMP LE, NE)
2871 WordGtOp -> (CMP ULE, EQQ)
2872 WordGeOp -> (CMP ULT, EQQ)
2873 WordEqOp -> (CMP EQQ, NE)
2874 WordNeOp -> (CMP EQQ, EQQ)
2875 WordLtOp -> (CMP ULT, NE)
2876 WordLeOp -> (CMP ULE, NE)
2877 AddrGtOp -> (CMP ULE, EQQ)
2878 AddrGeOp -> (CMP ULT, EQQ)
2879 AddrEqOp -> (CMP EQQ, NE)
2880 AddrNeOp -> (CMP EQQ, EQQ)
2881 AddrLtOp -> (CMP ULT, NE)
2882 AddrLeOp -> (CMP ULE, NE)
2884 #endif /* alpha_TARGET_ARCH */
2886 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2888 #if i386_TARGET_ARCH
2890 genCondJump id bool = do
2891 CondCode _ cond code <- getCondCode bool
2892 return (code `snocOL` JXX cond id)
2896 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2898 #if x86_64_TARGET_ARCH
2900 genCondJump id bool = do
2901 CondCode is_float cond cond_code <- getCondCode bool
2904 return (cond_code `snocOL` JXX cond id)
2906 lbl <- getBlockIdNat
2908 -- see comment with condFltReg
2909 let code = case cond of
2915 plain_test = unitOL (
2918 or_unordered = toOL [
2922 and_ordered = toOL [
2928 return (cond_code `appOL` code)
2932 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2934 #if sparc_TARGET_ARCH
2936 genCondJump (BlockId id) bool = do
2937 CondCode is_float cond code <- getCondCode bool
2942 then [NOP, BF cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2943 else [BI cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2947 #endif /* sparc_TARGET_ARCH */
2950 #if powerpc_TARGET_ARCH
2952 genCondJump id bool = do
2953 CondCode is_float cond code <- getCondCode bool
2954 return (code `snocOL` BCC cond id)
2956 #endif /* powerpc_TARGET_ARCH */
2959 -- -----------------------------------------------------------------------------
2960 -- Generating C calls
2962 -- Now the biggest nightmare---calls. Most of the nastiness is buried in
2963 -- @get_arg@, which moves the arguments to the correct registers/stack
2964 -- locations. Apart from that, the code is easy.
2966 -- (If applicable) Do not fill the delay slots here; you will confuse the
2967 -- register allocator.
2970 :: CmmCallTarget -- function to call
2971 -> CmmFormals -- where to put the result
2972 -> CmmActuals -- arguments (of mixed type)
2975 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2977 #if alpha_TARGET_ARCH
2981 genCCall fn cconv result_regs args
2982 = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args
2983 `thenNat` \ ((unused,_), argCode) ->
2985 nRegs = length allArgRegs - length unused
2986 code = asmSeqThen (map ($ []) argCode)
2989 LDA pv (AddrImm (ImmLab (ptext fn))),
2990 JSR ra (AddrReg pv) nRegs,
2991 LDGP gp (AddrReg ra)]
2993 ------------------------
2994 {- Try to get a value into a specific register (or registers) for
2995 a call. The first 6 arguments go into the appropriate
2996 argument register (separate registers for integer and floating
2997 point arguments, but used in lock-step), and the remaining
2998 arguments are dumped to the stack, beginning at 0(sp). Our
2999 first argument is a pair of the list of remaining argument
3000 registers to be assigned for this call and the next stack
3001 offset to use for overflowing arguments. This way,
3002 @get_Arg@ can be applied to all of a call's arguments using
3006 :: ([(Reg,Reg)], Int) -- Argument registers and stack offset (accumulator)
3007 -> StixTree -- Current argument
3008 -> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code
3010 -- We have to use up all of our argument registers first...
3012 get_arg ((iDst,fDst):dsts, offset) arg
3013 = getRegister arg `thenNat` \ register ->
3015 reg = if isFloatingRep pk then fDst else iDst
3016 code = registerCode register reg
3017 src = registerName register reg
3018 pk = registerRep register
3021 if isFloatingRep pk then
3022 ((dsts, offset), if isFixed register then
3023 code . mkSeqInstr (FMOV src fDst)
3026 ((dsts, offset), if isFixed register then
3027 code . mkSeqInstr (OR src (RIReg src) iDst)
3030 -- Once we have run out of argument registers, we move to the
3033 get_arg ([], offset) arg
3034 = getRegister arg `thenNat` \ register ->
3035 getNewRegNat (registerRep register)
3038 code = registerCode register tmp
3039 src = registerName register tmp
3040 pk = registerRep register
3041 sz = primRepToSize pk
3043 return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset)))
3045 #endif /* alpha_TARGET_ARCH */
3047 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3049 #if i386_TARGET_ARCH
3051 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
3052 -- write barrier compiles to no code on x86/x86-64;
3053 -- we keep it this long in order to prevent earlier optimisations.
3055 -- we only cope with a single result for foreign calls
3056 genCCall (CmmPrim op) [CmmHinted r _] args = do
3058 MO_F32_Sqrt -> actuallyInlineFloatOp F32 (GSQRT F32) args
3059 MO_F64_Sqrt -> actuallyInlineFloatOp F64 (GSQRT F64) args
3061 MO_F32_Sin -> actuallyInlineFloatOp F32 (GSIN F32) args
3062 MO_F64_Sin -> actuallyInlineFloatOp F64 (GSIN F64) args
3064 MO_F32_Cos -> actuallyInlineFloatOp F32 (GCOS F32) args
3065 MO_F64_Cos -> actuallyInlineFloatOp F64 (GCOS F64) args
3067 MO_F32_Tan -> actuallyInlineFloatOp F32 (GTAN F32) args
3068 MO_F64_Tan -> actuallyInlineFloatOp F64 (GTAN F64) args
3070 other_op -> outOfLineFloatOp op r args
3072 actuallyInlineFloatOp rep instr [CmmHinted x _]
3073 = do res <- trivialUFCode rep instr x
3075 return (any (getRegisterReg (CmmLocal r)))
3077 genCCall target dest_regs args = do
3079 sizes = map (arg_size . cmmExprRep . hintlessCmm) (reverse args)
3080 #if !darwin_TARGET_OS
3081 tot_arg_size = sum sizes
3083 raw_arg_size = sum sizes
3084 tot_arg_size = roundTo 16 raw_arg_size
3085 arg_pad_size = tot_arg_size - raw_arg_size
3086 delta0 <- getDeltaNat
3087 setDeltaNat (delta0 - arg_pad_size)
3090 push_codes <- mapM push_arg (reverse args)
3091 delta <- getDeltaNat
3094 -- deal with static vs dynamic call targets
3095 (callinsns,cconv) <-
3098 CmmCallee (CmmLit (CmmLabel lbl)) conv
3099 -> -- ToDo: stdcall arg sizes
3100 return (unitOL (CALL (Left fn_imm) []), conv)
3101 where fn_imm = ImmCLbl lbl
3103 -> do (dyn_c, dyn_r, dyn_rep) <- get_op expr
3104 ASSERT(dyn_rep == I32)
3105 return (dyn_c `snocOL` CALL (Right dyn_r) [], conv)
3108 #if darwin_TARGET_OS
3110 = toOL [SUB I32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
3111 DELTA (delta0 - arg_pad_size)]
3112 `appOL` concatOL push_codes
3115 = concatOL push_codes
3116 call = callinsns `appOL`
3118 -- Deallocate parameters after call for ccall;
3119 -- but not for stdcall (callee does it)
3120 (if cconv == StdCallConv || tot_arg_size==0 then [] else
3121 [ADD I32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
3123 [DELTA (delta + tot_arg_size)]
3126 setDeltaNat (delta + tot_arg_size)
3129 -- assign the results, if necessary
3130 assign_code [] = nilOL
3131 assign_code [CmmHinted dest _hint] =
3133 I64 -> toOL [MOV I32 (OpReg eax) (OpReg r_dest),
3134 MOV I32 (OpReg edx) (OpReg r_dest_hi)]
3135 F32 -> unitOL (GMOV fake0 r_dest)
3136 F64 -> unitOL (GMOV fake0 r_dest)
3137 rep -> unitOL (MOV rep (OpReg eax) (OpReg r_dest))
3139 r_dest_hi = getHiVRegFromLo r_dest
3140 rep = localRegRep dest
3141 r_dest = getRegisterReg (CmmLocal dest)
3142 assign_code many = panic "genCCall.assign_code many"
3144 return (push_code `appOL`
3146 assign_code dest_regs)
3154 roundTo a x | x `mod` a == 0 = x
3155 | otherwise = x + a - (x `mod` a)
3158 push_arg :: (CmmHinted CmmExpr){-current argument-}
3159 -> NatM InstrBlock -- code
3161 push_arg (CmmHinted arg _hint) -- we don't need the hints on x86
3162 | arg_rep == I64 = do
3163 ChildCode64 code r_lo <- iselExpr64 arg
3164 delta <- getDeltaNat
3165 setDeltaNat (delta - 8)
3167 r_hi = getHiVRegFromLo r_lo
3169 return ( code `appOL`
3170 toOL [PUSH I32 (OpReg r_hi), DELTA (delta - 4),
3171 PUSH I32 (OpReg r_lo), DELTA (delta - 8),
3176 (code, reg, sz) <- get_op arg
3177 delta <- getDeltaNat
3178 let size = arg_size sz
3179 setDeltaNat (delta-size)
3180 if (case sz of F64 -> True; F32 -> True; _ -> False)
3181 then return (code `appOL`
3182 toOL [SUB I32 (OpImm (ImmInt size)) (OpReg esp),
3184 GST sz reg (AddrBaseIndex (EABaseReg esp)
3188 else return (code `snocOL`
3189 PUSH I32 (OpReg reg) `snocOL`
3193 arg_rep = cmmExprRep arg
3196 get_op :: CmmExpr -> NatM (InstrBlock, Reg, MachRep) -- code, reg, size
3198 (reg,code) <- getSomeReg op
3199 return (code, reg, cmmExprRep op)
3201 #endif /* i386_TARGET_ARCH */
3203 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3205 outOfLineFloatOp :: CallishMachOp -> CmmFormalWithoutKind -> CmmActuals
3207 outOfLineFloatOp mop res args
3209 dflags <- getDynFlagsNat
3210 targetExpr <- cmmMakeDynamicReference dflags addImportNat CallReference lbl
3211 let target = CmmCallee targetExpr CCallConv
3213 if localRegRep res == F64
3215 stmtToInstrs (CmmCall target [CmmHinted res FloatHint] args CmmUnsafe CmmMayReturn)
3219 tmp = LocalReg uq F64 GCKindNonPtr
3221 code1 <- stmtToInstrs (CmmCall target [CmmHinted tmp FloatHint] args CmmUnsafe CmmMayReturn)
3222 code2 <- stmtToInstrs (CmmAssign (CmmLocal res) (CmmReg (CmmLocal tmp)))
3223 return (code1 `appOL` code2)
3225 lbl = mkForeignLabel fn Nothing False
3228 MO_F32_Sqrt -> FSLIT("sqrtf")
3229 MO_F32_Sin -> FSLIT("sinf")
3230 MO_F32_Cos -> FSLIT("cosf")
3231 MO_F32_Tan -> FSLIT("tanf")
3232 MO_F32_Exp -> FSLIT("expf")
3233 MO_F32_Log -> FSLIT("logf")
3235 MO_F32_Asin -> FSLIT("asinf")
3236 MO_F32_Acos -> FSLIT("acosf")
3237 MO_F32_Atan -> FSLIT("atanf")
3239 MO_F32_Sinh -> FSLIT("sinhf")
3240 MO_F32_Cosh -> FSLIT("coshf")
3241 MO_F32_Tanh -> FSLIT("tanhf")
3242 MO_F32_Pwr -> FSLIT("powf")
3244 MO_F64_Sqrt -> FSLIT("sqrt")
3245 MO_F64_Sin -> FSLIT("sin")
3246 MO_F64_Cos -> FSLIT("cos")
3247 MO_F64_Tan -> FSLIT("tan")
3248 MO_F64_Exp -> FSLIT("exp")
3249 MO_F64_Log -> FSLIT("log")
3251 MO_F64_Asin -> FSLIT("asin")
3252 MO_F64_Acos -> FSLIT("acos")
3253 MO_F64_Atan -> FSLIT("atan")
3255 MO_F64_Sinh -> FSLIT("sinh")
3256 MO_F64_Cosh -> FSLIT("cosh")
3257 MO_F64_Tanh -> FSLIT("tanh")
3258 MO_F64_Pwr -> FSLIT("pow")
3260 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
3262 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3264 #if x86_64_TARGET_ARCH
3266 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
3267 -- write barrier compiles to no code on x86/x86-64;
3268 -- we keep it this long in order to prevent earlier optimisations.
3271 genCCall (CmmPrim op) [CmmHinted r _] args =
3272 outOfLineFloatOp op r args
3274 genCCall target dest_regs args = do
3276 -- load up the register arguments
3277 (stack_args, aregs, fregs, load_args_code)
3278 <- load_args args allArgRegs allFPArgRegs nilOL
3281 fp_regs_used = reverse (drop (length fregs) (reverse allFPArgRegs))
3282 int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
3283 arg_regs = [eax] ++ int_regs_used ++ fp_regs_used
3284 -- for annotating the call instruction with
3286 sse_regs = length fp_regs_used
3288 tot_arg_size = arg_size * length stack_args
3290 -- On entry to the called function, %rsp should be aligned
3291 -- on a 16-byte boundary +8 (i.e. the first stack arg after
3292 -- the return address is 16-byte aligned). In STG land
3293 -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
3294 -- need to make sure we push a multiple of 16-bytes of args,
3295 -- plus the return address, to get the correct alignment.
3296 -- Urg, this is hard. We need to feed the delta back into
3297 -- the arg pushing code.
3298 (real_size, adjust_rsp) <-
3299 if tot_arg_size `rem` 16 == 0
3300 then return (tot_arg_size, nilOL)
3301 else do -- we need to adjust...
3302 delta <- getDeltaNat
3303 setDeltaNat (delta-8)
3304 return (tot_arg_size+8, toOL [
3305 SUB I64 (OpImm (ImmInt 8)) (OpReg rsp),
3309 -- push the stack args, right to left
3310 push_code <- push_args (reverse stack_args) nilOL
3311 delta <- getDeltaNat
3313 -- deal with static vs dynamic call targets
3314 (callinsns,cconv) <-
3317 CmmCallee (CmmLit (CmmLabel lbl)) conv
3318 -> -- ToDo: stdcall arg sizes
3319 return (unitOL (CALL (Left fn_imm) arg_regs), conv)
3320 where fn_imm = ImmCLbl lbl
3322 -> do (dyn_r, dyn_c) <- getSomeReg expr
3323 return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
3326 -- The x86_64 ABI requires us to set %al to the number of SSE
3327 -- registers that contain arguments, if the called routine
3328 -- is a varargs function. We don't know whether it's a
3329 -- varargs function or not, so we have to assume it is.
3331 -- It's not safe to omit this assignment, even if the number
3332 -- of SSE regs in use is zero. If %al is larger than 8
3333 -- on entry to a varargs function, seg faults ensue.
3334 assign_eax n = unitOL (MOV I32 (OpImm (ImmInt n)) (OpReg eax))
3336 let call = callinsns `appOL`
3338 -- Deallocate parameters after call for ccall;
3339 -- but not for stdcall (callee does it)
3340 (if cconv == StdCallConv || real_size==0 then [] else
3341 [ADD wordRep (OpImm (ImmInt real_size)) (OpReg esp)])
3343 [DELTA (delta + real_size)]
3346 setDeltaNat (delta + real_size)
3349 -- assign the results, if necessary
3350 assign_code [] = nilOL
3351 assign_code [CmmHinted dest _hint] =
3353 F32 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3354 F64 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3355 rep -> unitOL (MOV rep (OpReg rax) (OpReg r_dest))
3357 rep = localRegRep dest
3358 r_dest = getRegisterReg (CmmLocal dest)
3359 assign_code many = panic "genCCall.assign_code many"
3361 return (load_args_code `appOL`
3364 assign_eax sse_regs `appOL`
3366 assign_code dest_regs)
3369 arg_size = 8 -- always, at the mo
3371 load_args :: [CmmHinted CmmExpr]
3372 -> [Reg] -- int regs avail for args
3373 -> [Reg] -- FP regs avail for args
3375 -> NatM ([CmmHinted CmmExpr],[Reg],[Reg],InstrBlock)
3376 load_args args [] [] code = return (args, [], [], code)
3377 -- no more regs to use
3378 load_args [] aregs fregs code = return ([], aregs, fregs, code)
3379 -- no more args to push
3380 load_args ((CmmHinted arg hint) : rest) aregs fregs code
3381 | isFloatingRep arg_rep =
3385 arg_code <- getAnyReg arg
3386 load_args rest aregs rs (code `appOL` arg_code r)
3391 arg_code <- getAnyReg arg
3392 load_args rest rs fregs (code `appOL` arg_code r)
3394 arg_rep = cmmExprRep arg
3397 (args',ars,frs,code') <- load_args rest aregs fregs code
3398 return ((CmmHinted arg hint):args', ars, frs, code')
3400 push_args [] code = return code
3401 push_args ((CmmHinted arg hint):rest) code
3402 | isFloatingRep arg_rep = do
3403 (arg_reg, arg_code) <- getSomeReg arg
3404 delta <- getDeltaNat
3405 setDeltaNat (delta-arg_size)
3406 let code' = code `appOL` arg_code `appOL` toOL [
3407 SUB wordRep (OpImm (ImmInt arg_size)) (OpReg rsp) ,
3408 DELTA (delta-arg_size),
3409 MOV arg_rep (OpReg arg_reg) (OpAddr (spRel 0))]
3410 push_args rest code'
3413 -- we only ever generate word-sized function arguments. Promotion
3414 -- has already happened: our Int8# type is kept sign-extended
3415 -- in an Int#, for example.
3416 ASSERT(arg_rep == I64) return ()
3417 (arg_op, arg_code) <- getOperand arg
3418 delta <- getDeltaNat
3419 setDeltaNat (delta-arg_size)
3420 let code' = code `appOL` toOL [PUSH I64 arg_op,
3421 DELTA (delta-arg_size)]
3422 push_args rest code'
3424 arg_rep = cmmExprRep arg
3427 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3429 #if sparc_TARGET_ARCH
3431 The SPARC calling convention is an absolute
3432 nightmare. The first 6x32 bits of arguments are mapped into
3433 %o0 through %o5, and the remaining arguments are dumped to the
3434 stack, beginning at [%sp+92]. (Note that %o6 == %sp.)
3436 If we have to put args on the stack, move %o6==%sp down by
3437 the number of words to go on the stack, to ensure there's enough space.
3439 According to Fraser and Hanson's lcc book, page 478, fig 17.2,
3440 16 words above the stack pointer is a word for the address of
3441 a structure return value. I use this as a temporary location
3442 for moving values from float to int regs. Certainly it isn't
3443 safe to put anything in the 16 words starting at %sp, since
3444 this area can get trashed at any time due to window overflows
3445 caused by signal handlers.
3447 A final complication (if the above isn't enough) is that
3448 we can't blithely calculate the arguments one by one into
3449 %o0 .. %o5. Consider the following nested calls:
3453 Naive code moves a into %o0, and (fff b c) into %o1. Unfortunately
3454 the inner call will itself use %o0, which trashes the value put there
3455 in preparation for the outer call. Upshot: we need to calculate the
3456 args into temporary regs, and move those to arg regs or onto the
3457 stack only immediately prior to the call proper. Sigh.
3460 genCCall target dest_regs argsAndHints = do
3462 args = map hintlessCmm argsAndHints
3463 argcode_and_vregs <- mapM arg_to_int_vregs args
3465 (argcodes, vregss) = unzip argcode_and_vregs
3466 n_argRegs = length allArgRegs
3467 n_argRegs_used = min (length vregs) n_argRegs
3468 vregs = concat vregss
3469 -- deal with static vs dynamic call targets
3470 callinsns <- (case target of
3471 CmmCallee (CmmLit (CmmLabel lbl)) conv -> do
3472 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3473 CmmCallee expr conv -> do
3474 (dyn_c, [dyn_r]) <- arg_to_int_vregs expr
3475 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3477 (res, reduce) <- outOfLineFloatOp mop
3478 lblOrMopExpr <- case res of
3480 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3482 (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
3483 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3484 if reduce then panic "genCCall(sparc): can not reduce" else return lblOrMopExpr
3488 argcode = concatOL argcodes
3489 (move_sp_down, move_sp_up)
3490 = let diff = length vregs - n_argRegs
3491 nn = if odd diff then diff + 1 else diff -- keep 8-byte alignment
3494 else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
3496 = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
3497 return (argcode `appOL`
3498 move_sp_down `appOL`
3499 transfer_code `appOL`
3504 -- move args from the integer vregs into which they have been
3505 -- marshalled, into %o0 .. %o5, and the rest onto the stack.
3506 move_final :: [Reg] -> [Reg] -> Int -> [Instr]
3508 move_final [] _ offset -- all args done
3511 move_final (v:vs) [] offset -- out of aregs; move to stack
3512 = ST I32 v (spRel offset)
3513 : move_final vs [] (offset+1)
3515 move_final (v:vs) (a:az) offset -- move into an arg (%o[0..5]) reg
3516 = OR False g0 (RIReg v) a
3517 : move_final vs az offset
3519 -- generate code to calculate an argument, and move it into one
3520 -- or two integer vregs.
3521 arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
3522 arg_to_int_vregs arg
3523 | (cmmExprRep arg) == I64
3525 (ChildCode64 code r_lo) <- iselExpr64 arg
3527 r_hi = getHiVRegFromLo r_lo
3528 return (code, [r_hi, r_lo])
3531 (src, code) <- getSomeReg arg
3532 tmp <- getNewRegNat (cmmExprRep arg)
3537 v1 <- getNewRegNat I32
3538 v2 <- getNewRegNat I32
3541 FMOV F64 src f0 `snocOL`
3542 ST F32 f0 (spRel 16) `snocOL`
3543 LD I32 (spRel 16) v1 `snocOL`
3544 ST F32 (fPair f0) (spRel 16) `snocOL`
3545 LD I32 (spRel 16) v2
3550 v1 <- getNewRegNat I32
3553 ST F32 src (spRel 16) `snocOL`
3554 LD I32 (spRel 16) v1
3559 v1 <- getNewRegNat I32
3561 code `snocOL` OR False g0 (RIReg src) v1
3565 outOfLineFloatOp mop =
3567 dflags <- getDynFlagsNat
3568 mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference $
3569 mkForeignLabel functionName Nothing True
3570 let mopLabelOrExpr = case mopExpr of
3571 CmmLit (CmmLabel lbl) -> Left lbl
3573 return (mopLabelOrExpr, reduce)
3575 (reduce, functionName) = case mop of
3576 MO_F32_Exp -> (True, FSLIT("exp"))
3577 MO_F32_Log -> (True, FSLIT("log"))
3578 MO_F32_Sqrt -> (True, FSLIT("sqrt"))
3580 MO_F32_Sin -> (True, FSLIT("sin"))
3581 MO_F32_Cos -> (True, FSLIT("cos"))
3582 MO_F32_Tan -> (True, FSLIT("tan"))
3584 MO_F32_Asin -> (True, FSLIT("asin"))
3585 MO_F32_Acos -> (True, FSLIT("acos"))
3586 MO_F32_Atan -> (True, FSLIT("atan"))
3588 MO_F32_Sinh -> (True, FSLIT("sinh"))
3589 MO_F32_Cosh -> (True, FSLIT("cosh"))
3590 MO_F32_Tanh -> (True, FSLIT("tanh"))
3592 MO_F64_Exp -> (False, FSLIT("exp"))
3593 MO_F64_Log -> (False, FSLIT("log"))
3594 MO_F64_Sqrt -> (False, FSLIT("sqrt"))
3596 MO_F64_Sin -> (False, FSLIT("sin"))
3597 MO_F64_Cos -> (False, FSLIT("cos"))
3598 MO_F64_Tan -> (False, FSLIT("tan"))
3600 MO_F64_Asin -> (False, FSLIT("asin"))
3601 MO_F64_Acos -> (False, FSLIT("acos"))
3602 MO_F64_Atan -> (False, FSLIT("atan"))
3604 MO_F64_Sinh -> (False, FSLIT("sinh"))
3605 MO_F64_Cosh -> (False, FSLIT("cosh"))
3606 MO_F64_Tanh -> (False, FSLIT("tanh"))
3608 other -> pprPanic "outOfLineFloatOp(sparc) "
3609 (pprCallishMachOp mop)
3611 #endif /* sparc_TARGET_ARCH */
3613 #if powerpc_TARGET_ARCH
3615 #if darwin_TARGET_OS || linux_TARGET_OS
3617 The PowerPC calling convention for Darwin/Mac OS X
3618 is described in Apple's document
3619 "Inside Mac OS X - Mach-O Runtime Architecture".
3621 PowerPC Linux uses the System V Release 4 Calling Convention
3622 for PowerPC. It is described in the
3623 "System V Application Binary Interface PowerPC Processor Supplement".
3625 Both conventions are similar:
3626 Parameters may be passed in general-purpose registers starting at r3, in
3627 floating point registers starting at f1, or on the stack.
3629 But there are substantial differences:
3630 * The number of registers used for parameter passing and the exact set of
3631 nonvolatile registers differs (see MachRegs.lhs).
3632 * On Darwin, stack space is always reserved for parameters, even if they are
3633 passed in registers. The called routine may choose to save parameters from
3634 registers to the corresponding space on the stack.
3635 * On Darwin, a corresponding amount of GPRs is skipped when a floating point
3636 parameter is passed in an FPR.
3637 * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
3638 starting with an odd-numbered GPR. It may skip a GPR to achieve this.
3639 Darwin just treats an I64 like two separate I32s (high word first).
3640 * I64 and F64 arguments are 8-byte aligned on the stack for SysV, but only
3641 4-byte aligned like everything else on Darwin.
3642 * The SysV spec claims that F32 is represented as F64 on the stack. GCC on
3643 PowerPC Linux does not agree, so neither do we.
3645 According to both conventions, The parameter area should be part of the
3646 caller's stack frame, allocated in the caller's prologue code (large enough
3647 to hold the parameter lists for all called routines). The NCG already
3648 uses the stack for register spilling, leaving 64 bytes free at the top.
3649 If we need a larger parameter area than that, we just allocate a new stack
3650 frame just before ccalling.
3654 genCCall (CmmPrim MO_WriteBarrier) _ _
3655 = return $ unitOL LWSYNC
3657 genCCall target dest_regs argsAndHints
3658 = ASSERT (not $ any (`elem` [I8,I16]) argReps)
3659 -- we rely on argument promotion in the codeGen
3661 (finalStack,passArgumentsCode,usedRegs) <- passArguments
3663 allArgRegs allFPArgRegs
3667 (labelOrExpr, reduceToF32) <- case target of
3668 CmmCallee (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
3669 CmmCallee expr conv -> return (Right expr, False)
3670 CmmPrim mop -> outOfLineFloatOp mop
3672 let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
3673 codeAfter = move_sp_up finalStack `appOL` moveResult reduceToF32
3678 `snocOL` BL lbl usedRegs
3681 (dynReg, dynCode) <- getSomeReg dyn
3683 `snocOL` MTCTR dynReg
3685 `snocOL` BCTRL usedRegs
3688 #if darwin_TARGET_OS
3689 initialStackOffset = 24
3690 -- size of linkage area + size of arguments, in bytes
3691 stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
3692 map machRepByteWidth argReps
3693 #elif linux_TARGET_OS
3694 initialStackOffset = 8
3695 stackDelta finalStack = roundTo 16 finalStack
3697 args = map hintlessCmm argsAndHints
3698 argReps = map cmmExprRep args
3700 roundTo a x | x `mod` a == 0 = x
3701 | otherwise = x + a - (x `mod` a)
3703 move_sp_down finalStack
3705 toOL [STU I32 sp (AddrRegImm sp (ImmInt (-delta))),
3708 where delta = stackDelta finalStack
3709 move_sp_up finalStack
3711 toOL [ADD sp sp (RIImm (ImmInt delta)),
3714 where delta = stackDelta finalStack
3717 passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
3718 passArguments ((arg,I64):args) gprs fprs stackOffset
3719 accumCode accumUsed =
3721 ChildCode64 code vr_lo <- iselExpr64 arg
3722 let vr_hi = getHiVRegFromLo vr_lo
3724 #if darwin_TARGET_OS
3729 (accumCode `appOL` code
3730 `snocOL` storeWord vr_hi gprs stackOffset
3731 `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
3732 ((take 2 gprs) ++ accumUsed)
3734 storeWord vr (gpr:_) offset = MR gpr vr
3735 storeWord vr [] offset = ST I32 vr (AddrRegImm sp (ImmInt offset))
3737 #elif linux_TARGET_OS
3738 let stackOffset' = roundTo 8 stackOffset
3739 stackCode = accumCode `appOL` code
3740 `snocOL` ST I32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
3741 `snocOL` ST I32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
3742 regCode hireg loreg =
3743 accumCode `appOL` code
3744 `snocOL` MR hireg vr_hi
3745 `snocOL` MR loreg vr_lo
3748 hireg : loreg : regs | even (length gprs) ->
3749 passArguments args regs fprs stackOffset
3750 (regCode hireg loreg) (hireg : loreg : accumUsed)
3751 _skipped : hireg : loreg : regs ->
3752 passArguments args regs fprs stackOffset
3753 (regCode hireg loreg) (hireg : loreg : accumUsed)
3754 _ -> -- only one or no regs left
3755 passArguments args [] fprs (stackOffset'+8)
3759 passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
3760 | reg : _ <- regs = do
3761 register <- getRegister arg
3762 let code = case register of
3763 Fixed _ freg fcode -> fcode `snocOL` MR reg freg
3764 Any _ acode -> acode reg
3768 #if darwin_TARGET_OS
3769 -- The Darwin ABI requires that we reserve stack slots for register parameters
3770 (stackOffset + stackBytes)
3771 #elif linux_TARGET_OS
3772 -- ... the SysV ABI doesn't.
3775 (accumCode `appOL` code)
3778 (vr, code) <- getSomeReg arg
3782 (stackOffset' + stackBytes)
3783 (accumCode `appOL` code `snocOL` ST rep vr stackSlot)
3786 #if darwin_TARGET_OS
3787 -- stackOffset is at least 4-byte aligned
3788 -- The Darwin ABI is happy with that.
3789 stackOffset' = stackOffset
3791 -- ... the SysV ABI requires 8-byte alignment for doubles.
3792 stackOffset' | rep == F64 = roundTo 8 stackOffset
3793 | otherwise = stackOffset
3795 stackSlot = AddrRegImm sp (ImmInt stackOffset')
3796 (nGprs, nFprs, stackBytes, regs) = case rep of
3797 I32 -> (1, 0, 4, gprs)
3798 #if darwin_TARGET_OS
3799 -- The Darwin ABI requires that we skip a corresponding number of GPRs when
3801 F32 -> (1, 1, 4, fprs)
3802 F64 -> (2, 1, 8, fprs)
3803 #elif linux_TARGET_OS
3804 -- ... the SysV ABI doesn't.
3805 F32 -> (0, 1, 4, fprs)
3806 F64 -> (0, 1, 8, fprs)
3809 moveResult reduceToF32 =
3812 [CmmHinted dest _hint]
3813 | reduceToF32 && rep == F32 -> unitOL (FRSP r_dest f1)
3814 | rep == F32 || rep == F64 -> unitOL (MR r_dest f1)
3815 | rep == I64 -> toOL [MR (getHiVRegFromLo r_dest) r3,
3817 | otherwise -> unitOL (MR r_dest r3)
3818 where rep = cmmRegRep (CmmLocal dest)
3819 r_dest = getRegisterReg (CmmLocal dest)
3821 outOfLineFloatOp mop =
3823 dflags <- getDynFlagsNat
3824 mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference $
3825 mkForeignLabel functionName Nothing True
3826 let mopLabelOrExpr = case mopExpr of
3827 CmmLit (CmmLabel lbl) -> Left lbl
3829 return (mopLabelOrExpr, reduce)
3831 (functionName, reduce) = case mop of
3832 MO_F32_Exp -> (FSLIT("exp"), True)
3833 MO_F32_Log -> (FSLIT("log"), True)
3834 MO_F32_Sqrt -> (FSLIT("sqrt"), True)
3836 MO_F32_Sin -> (FSLIT("sin"), True)
3837 MO_F32_Cos -> (FSLIT("cos"), True)
3838 MO_F32_Tan -> (FSLIT("tan"), True)
3840 MO_F32_Asin -> (FSLIT("asin"), True)
3841 MO_F32_Acos -> (FSLIT("acos"), True)
3842 MO_F32_Atan -> (FSLIT("atan"), True)
3844 MO_F32_Sinh -> (FSLIT("sinh"), True)
3845 MO_F32_Cosh -> (FSLIT("cosh"), True)
3846 MO_F32_Tanh -> (FSLIT("tanh"), True)
3847 MO_F32_Pwr -> (FSLIT("pow"), True)
3849 MO_F64_Exp -> (FSLIT("exp"), False)
3850 MO_F64_Log -> (FSLIT("log"), False)
3851 MO_F64_Sqrt -> (FSLIT("sqrt"), False)
3853 MO_F64_Sin -> (FSLIT("sin"), False)
3854 MO_F64_Cos -> (FSLIT("cos"), False)
3855 MO_F64_Tan -> (FSLIT("tan"), False)
3857 MO_F64_Asin -> (FSLIT("asin"), False)
3858 MO_F64_Acos -> (FSLIT("acos"), False)
3859 MO_F64_Atan -> (FSLIT("atan"), False)
3861 MO_F64_Sinh -> (FSLIT("sinh"), False)
3862 MO_F64_Cosh -> (FSLIT("cosh"), False)
3863 MO_F64_Tanh -> (FSLIT("tanh"), False)
3864 MO_F64_Pwr -> (FSLIT("pow"), False)
3865 other -> pprPanic "genCCall(ppc): unknown callish op"
3866 (pprCallishMachOp other)
3868 #endif /* darwin_TARGET_OS || linux_TARGET_OS */
3870 #endif /* powerpc_TARGET_ARCH */
3873 -- -----------------------------------------------------------------------------
3874 -- Generating a table-branch
3876 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
3878 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3882 (reg,e_code) <- getSomeReg expr
3883 lbl <- getNewLabelNat
3884 dflags <- getDynFlagsNat
3885 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
3886 (tableReg,t_code) <- getSomeReg $ dynRef
3888 jumpTable = map jumpTableEntryRel ids
3890 jumpTableEntryRel Nothing
3891 = CmmStaticLit (CmmInt 0 wordRep)
3892 jumpTableEntryRel (Just (BlockId id))
3893 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3894 where blockLabel = mkAsmTempLabel id
3896 op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
3897 (EAIndex reg wORD_SIZE) (ImmInt 0))
3899 #if x86_64_TARGET_ARCH
3900 #if darwin_TARGET_OS
3901 -- on Mac OS X/x86_64, put the jump table in the text section
3902 -- to work around a limitation of the linker.
3903 -- ld64 is unable to handle the relocations for
3905 -- if L0 is not preceded by a non-anonymous label in its section.
3907 code = e_code `appOL` t_code `appOL` toOL [
3908 ADD wordRep op (OpReg tableReg),
3909 JMP_TBL (OpReg tableReg) [ id | Just id <- ids ],
3910 LDATA Text (CmmDataLabel lbl : jumpTable)
3913 -- HACK: On x86_64 binutils<2.17 is only able to generate PC32
3914 -- relocations, hence we only get 32-bit offsets in the jump
3915 -- table. As these offsets are always negative we need to properly
3916 -- sign extend them to 64-bit. This hack should be removed in
3917 -- conjunction with the hack in PprMach.hs/pprDataItem once
3918 -- binutils 2.17 is standard.
3919 code = e_code `appOL` t_code `appOL` toOL [
3920 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3922 (OpAddr (AddrBaseIndex (EABaseReg tableReg)
3923 (EAIndex reg wORD_SIZE) (ImmInt 0)))
3925 ADD wordRep (OpReg reg) (OpReg tableReg),
3926 JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
3930 code = e_code `appOL` t_code `appOL` toOL [
3931 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3932 ADD wordRep op (OpReg tableReg),
3933 JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
3939 (reg,e_code) <- getSomeReg expr
3940 lbl <- getNewLabelNat
3942 jumpTable = map jumpTableEntry ids
3943 op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
3944 code = e_code `appOL` toOL [
3945 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3946 JMP_TBL op [ id | Just id <- ids ]
3950 #elif powerpc_TARGET_ARCH
3954 (reg,e_code) <- getSomeReg expr
3955 tmp <- getNewRegNat I32
3956 lbl <- getNewLabelNat
3957 dflags <- getDynFlagsNat
3958 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
3959 (tableReg,t_code) <- getSomeReg $ dynRef
3961 jumpTable = map jumpTableEntryRel ids
3963 jumpTableEntryRel Nothing
3964 = CmmStaticLit (CmmInt 0 wordRep)
3965 jumpTableEntryRel (Just (BlockId id))
3966 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3967 where blockLabel = mkAsmTempLabel id
3969 code = e_code `appOL` t_code `appOL` toOL [
3970 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3971 SLW tmp reg (RIImm (ImmInt 2)),
3972 LD I32 tmp (AddrRegReg tableReg tmp),
3973 ADD tmp tmp (RIReg tableReg),
3975 BCTR [ id | Just id <- ids ]
3980 (reg,e_code) <- getSomeReg expr
3981 tmp <- getNewRegNat I32
3982 lbl <- getNewLabelNat
3984 jumpTable = map jumpTableEntry ids
3986 code = e_code `appOL` toOL [
3987 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3988 SLW tmp reg (RIImm (ImmInt 2)),
3989 ADDIS tmp tmp (HA (ImmCLbl lbl)),
3990 LD I32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
3992 BCTR [ id | Just id <- ids ]
3996 genSwitch expr ids = panic "ToDo: genSwitch"
3999 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordRep)
4000 jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
4001 where blockLabel = mkAsmTempLabel id
4003 -- -----------------------------------------------------------------------------
4005 -- -----------------------------------------------------------------------------
4008 -- -----------------------------------------------------------------------------
4009 -- 'condIntReg' and 'condFltReg': condition codes into registers
4011 -- Turn those condition codes into integers now (when they appear on
4012 -- the right hand side of an assignment).
4014 -- (If applicable) Do not fill the delay slots here; you will confuse the
4015 -- register allocator.
4017 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
4019 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4021 #if alpha_TARGET_ARCH
4022 condIntReg = panic "MachCode.condIntReg (not on Alpha)"
4023 condFltReg = panic "MachCode.condFltReg (not on Alpha)"
4024 #endif /* alpha_TARGET_ARCH */
4026 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4028 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4030 condIntReg cond x y = do
4031 CondCode _ cond cond_code <- condIntCode cond x y
4032 tmp <- getNewRegNat I8
4034 code dst = cond_code `appOL` toOL [
4035 SETCC cond (OpReg tmp),
4036 MOVZxL I8 (OpReg tmp) (OpReg dst)
4039 return (Any I32 code)
4043 #if i386_TARGET_ARCH
4045 condFltReg cond x y = do
4046 CondCode _ cond cond_code <- condFltCode cond x y
4047 tmp <- getNewRegNat I8
4049 code dst = cond_code `appOL` toOL [
4050 SETCC cond (OpReg tmp),
4051 MOVZxL I8 (OpReg tmp) (OpReg dst)
4054 return (Any I32 code)
4058 #if x86_64_TARGET_ARCH
4060 condFltReg cond x y = do
4061 CondCode _ cond cond_code <- condFltCode cond x y
4062 tmp1 <- getNewRegNat wordRep
4063 tmp2 <- getNewRegNat wordRep
4065 -- We have to worry about unordered operands (eg. comparisons
4066 -- against NaN). If the operands are unordered, the comparison
4067 -- sets the parity flag, carry flag and zero flag.
4068 -- All comparisons are supposed to return false for unordered
4069 -- operands except for !=, which returns true.
4071 -- Optimisation: we don't have to test the parity flag if we
4072 -- know the test has already excluded the unordered case: eg >
4073 -- and >= test for a zero carry flag, which can only occur for
4074 -- ordered operands.
4076 -- ToDo: by reversing comparisons we could avoid testing the
4077 -- parity flag in more cases.
4082 NE -> or_unordered dst
4083 GU -> plain_test dst
4084 GEU -> plain_test dst
4085 _ -> and_ordered dst)
4087 plain_test dst = toOL [
4088 SETCC cond (OpReg tmp1),
4089 MOVZxL I8 (OpReg tmp1) (OpReg dst)
4091 or_unordered dst = toOL [
4092 SETCC cond (OpReg tmp1),
4093 SETCC PARITY (OpReg tmp2),
4094 OR I8 (OpReg tmp1) (OpReg tmp2),
4095 MOVZxL I8 (OpReg tmp2) (OpReg dst)
4097 and_ordered dst = toOL [
4098 SETCC cond (OpReg tmp1),
4099 SETCC NOTPARITY (OpReg tmp2),
4100 AND I8 (OpReg tmp1) (OpReg tmp2),
4101 MOVZxL I8 (OpReg tmp2) (OpReg dst)
4104 return (Any I32 code)
4108 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4110 #if sparc_TARGET_ARCH
4112 condIntReg EQQ x (CmmLit (CmmInt 0 d)) = do
4113 (src, code) <- getSomeReg x
4114 tmp <- getNewRegNat I32
4116 code__2 dst = code `appOL` toOL [
4117 SUB False True g0 (RIReg src) g0,
4118 SUB True False g0 (RIImm (ImmInt (-1))) dst]
4119 return (Any I32 code__2)
4121 condIntReg EQQ x y = do
4122 (src1, code1) <- getSomeReg x
4123 (src2, code2) <- getSomeReg y
4124 tmp1 <- getNewRegNat I32
4125 tmp2 <- getNewRegNat I32
4127 code__2 dst = code1 `appOL` code2 `appOL` toOL [
4128 XOR False src1 (RIReg src2) dst,
4129 SUB False True g0 (RIReg dst) g0,
4130 SUB True False g0 (RIImm (ImmInt (-1))) dst]
4131 return (Any I32 code__2)
4133 condIntReg NE x (CmmLit (CmmInt 0 d)) = do
4134 (src, code) <- getSomeReg x
4135 tmp <- getNewRegNat I32
4137 code__2 dst = code `appOL` toOL [
4138 SUB False True g0 (RIReg src) g0,
4139 ADD True False g0 (RIImm (ImmInt 0)) dst]
4140 return (Any I32 code__2)
4142 condIntReg NE x y = do
4143 (src1, code1) <- getSomeReg x
4144 (src2, code2) <- getSomeReg y
4145 tmp1 <- getNewRegNat I32
4146 tmp2 <- getNewRegNat I32
4148 code__2 dst = code1 `appOL` code2 `appOL` toOL [
4149 XOR False src1 (RIReg src2) dst,
4150 SUB False True g0 (RIReg dst) g0,
4151 ADD True False g0 (RIImm (ImmInt 0)) dst]
4152 return (Any I32 code__2)
4154 condIntReg cond x y = do
4155 BlockId lbl1 <- getBlockIdNat
4156 BlockId lbl2 <- getBlockIdNat
4157 CondCode _ cond cond_code <- condIntCode cond x y
4159 code__2 dst = cond_code `appOL` toOL [
4160 BI cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
4161 OR False g0 (RIImm (ImmInt 0)) dst,
4162 BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
4163 NEWBLOCK (BlockId lbl1),
4164 OR False g0 (RIImm (ImmInt 1)) dst,
4165 NEWBLOCK (BlockId lbl2)]
4166 return (Any I32 code__2)
4168 condFltReg cond x y = do
4169 BlockId lbl1 <- getBlockIdNat
4170 BlockId lbl2 <- getBlockIdNat
4171 CondCode _ cond cond_code <- condFltCode cond x y
4173 code__2 dst = cond_code `appOL` toOL [
4175 BF cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
4176 OR False g0 (RIImm (ImmInt 0)) dst,
4177 BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
4178 NEWBLOCK (BlockId lbl1),
4179 OR False g0 (RIImm (ImmInt 1)) dst,
4180 NEWBLOCK (BlockId lbl2)]
4181 return (Any I32 code__2)
4183 #endif /* sparc_TARGET_ARCH */
4185 #if powerpc_TARGET_ARCH
4186 condReg getCond = do
4187 lbl1 <- getBlockIdNat
4188 lbl2 <- getBlockIdNat
4189 CondCode _ cond cond_code <- getCond
4191 {- code dst = cond_code `appOL` toOL [
4200 code dst = cond_code
4204 RLWINM dst dst (bit + 1) 31 31
4207 negate_code | do_negate = unitOL (CRNOR bit bit bit)
4210 (bit, do_negate) = case cond of
4224 return (Any I32 code)
4226 condIntReg cond x y = condReg (condIntCode cond x y)
4227 condFltReg cond x y = condReg (condFltCode cond x y)
4228 #endif /* powerpc_TARGET_ARCH */
4231 -- -----------------------------------------------------------------------------
4232 -- 'trivial*Code': deal with trivial instructions
4234 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
4235 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
4236 -- Only look for constants on the right hand side, because that's
4237 -- where the generic optimizer will have put them.
4239 -- Similarly, for unary instructions, we don't have to worry about
4240 -- matching an StInt as the argument, because genericOpt will already
4241 -- have handled the constant-folding.
4245 -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
4246 ,IF_ARCH_i386 ((Operand -> Operand -> Instr)
4247 -> Maybe (Operand -> Operand -> Instr)
4248 ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr)
4249 -> Maybe (Operand -> Operand -> Instr)
4250 ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
4251 ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
4253 -> CmmExpr -> CmmExpr -- the two arguments
4256 #ifndef powerpc_TARGET_ARCH
4259 -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
4260 ,IF_ARCH_sparc((MachRep -> Reg -> Reg -> Reg -> Instr)
4261 ,IF_ARCH_i386 ((MachRep -> Reg -> Reg -> Reg -> Instr)
4262 ,IF_ARCH_x86_64 ((MachRep -> Operand -> Operand -> Instr)
4264 -> CmmExpr -> CmmExpr -- the two arguments
4270 -> IF_ARCH_alpha((RI -> Reg -> Instr)
4271 ,IF_ARCH_i386 ((Operand -> Instr)
4272 ,IF_ARCH_x86_64 ((Operand -> Instr)
4273 ,IF_ARCH_sparc((RI -> Reg -> Instr)
4274 ,IF_ARCH_powerpc((Reg -> Reg -> Instr)
4276 -> CmmExpr -- the one argument
4279 #ifndef powerpc_TARGET_ARCH
4282 -> IF_ARCH_alpha((Reg -> Reg -> Instr)
4283 ,IF_ARCH_i386 ((Reg -> Reg -> Instr)
4284 ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr)
4285 ,IF_ARCH_sparc((Reg -> Reg -> Instr)
4287 -> CmmExpr -- the one argument
4291 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4293 #if alpha_TARGET_ARCH
4295 trivialCode instr x (StInt y)
4297 = getRegister x `thenNat` \ register ->
4298 getNewRegNat IntRep `thenNat` \ tmp ->
4300 code = registerCode register tmp
4301 src1 = registerName register tmp
4302 src2 = ImmInt (fromInteger y)
4303 code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst)
4305 return (Any IntRep code__2)
4307 trivialCode instr x y
4308 = getRegister x `thenNat` \ register1 ->
4309 getRegister y `thenNat` \ register2 ->
4310 getNewRegNat IntRep `thenNat` \ tmp1 ->
4311 getNewRegNat IntRep `thenNat` \ tmp2 ->
4313 code1 = registerCode register1 tmp1 []
4314 src1 = registerName register1 tmp1
4315 code2 = registerCode register2 tmp2 []
4316 src2 = registerName register2 tmp2
4317 code__2 dst = asmSeqThen [code1, code2] .
4318 mkSeqInstr (instr src1 (RIReg src2) dst)
4320 return (Any IntRep code__2)
4323 trivialUCode instr x
4324 = getRegister x `thenNat` \ register ->
4325 getNewRegNat IntRep `thenNat` \ tmp ->
4327 code = registerCode register tmp
4328 src = registerName register tmp
4329 code__2 dst = code . mkSeqInstr (instr (RIReg src) dst)
4331 return (Any IntRep code__2)
4334 trivialFCode _ instr x y
4335 = getRegister x `thenNat` \ register1 ->
4336 getRegister y `thenNat` \ register2 ->
4337 getNewRegNat F64 `thenNat` \ tmp1 ->
4338 getNewRegNat F64 `thenNat` \ tmp2 ->
4340 code1 = registerCode register1 tmp1
4341 src1 = registerName register1 tmp1
4343 code2 = registerCode register2 tmp2
4344 src2 = registerName register2 tmp2
4346 code__2 dst = asmSeqThen [code1 [], code2 []] .
4347 mkSeqInstr (instr src1 src2 dst)
4349 return (Any F64 code__2)
4351 trivialUFCode _ instr x
4352 = getRegister x `thenNat` \ register ->
4353 getNewRegNat F64 `thenNat` \ tmp ->
4355 code = registerCode register tmp
4356 src = registerName register tmp
4357 code__2 dst = code . mkSeqInstr (instr src dst)
4359 return (Any F64 code__2)
4361 #endif /* alpha_TARGET_ARCH */
4363 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4365 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4368 The Rules of the Game are:
4370 * You cannot assume anything about the destination register dst;
4371 it may be anything, including a fixed reg.
4373 * You may compute an operand into a fixed reg, but you may not
4374 subsequently change the contents of that fixed reg. If you
4375 want to do so, first copy the value either to a temporary
4376 or into dst. You are free to modify dst even if it happens
4377 to be a fixed reg -- that's not your problem.
4379 * You cannot assume that a fixed reg will stay live over an
4380 arbitrary computation. The same applies to the dst reg.
4382 * Temporary regs obtained from getNewRegNat are distinct from
4383 each other and from all other regs, and stay live over
4384 arbitrary computations.
4386 --------------------
4388 SDM's version of The Rules:
4390 * If getRegister returns Any, that means it can generate correct
4391 code which places the result in any register, period. Even if that
4392 register happens to be read during the computation.
4394 Corollary #1: this means that if you are generating code for an
4395 operation with two arbitrary operands, you cannot assign the result
4396 of the first operand into the destination register before computing
4397 the second operand. The second operand might require the old value
4398 of the destination register.
4400 Corollary #2: A function might be able to generate more efficient
4401 code if it knows the destination register is a new temporary (and
4402 therefore not read by any of the sub-computations).
4404 * If getRegister returns Any, then the code it generates may modify only:
4405 (a) fresh temporaries
4406 (b) the destination register
4407 (c) known registers (eg. %ecx is used by shifts)
4408 In particular, it may *not* modify global registers, unless the global
4409 register happens to be the destination register.
4412 trivialCode rep instr (Just revinstr) (CmmLit lit_a) b
4413 | not (is64BitLit lit_a) = do
4414 b_code <- getAnyReg b
4417 = b_code dst `snocOL`
4418 revinstr (OpImm (litToImm lit_a)) (OpReg dst)
4420 return (Any rep code)
4422 trivialCode rep instr maybe_revinstr a b = genTrivialCode rep instr a b
4424 -- This is re-used for floating pt instructions too.
4425 genTrivialCode rep instr a b = do
4426 (b_op, b_code) <- getNonClobberedOperand b
4427 a_code <- getAnyReg a
4428 tmp <- getNewRegNat rep
4430 -- We want the value of b to stay alive across the computation of a.
4431 -- But, we want to calculate a straight into the destination register,
4432 -- because the instruction only has two operands (dst := dst `op` src).
4433 -- The troublesome case is when the result of b is in the same register
4434 -- as the destination reg. In this case, we have to save b in a
4435 -- new temporary across the computation of a.
4437 | dst `regClashesWithOp` b_op =
4439 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
4441 instr (OpReg tmp) (OpReg dst)
4445 instr b_op (OpReg dst)
4447 return (Any rep code)
4449 reg `regClashesWithOp` OpReg reg2 = reg == reg2
4450 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
4451 reg `regClashesWithOp` _ = False
4455 trivialUCode rep instr x = do
4456 x_code <- getAnyReg x
4462 return (Any rep code)
4466 #if i386_TARGET_ARCH
4468 trivialFCode pk instr x y = do
4469 (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
4470 (y_reg, y_code) <- getSomeReg y
4475 instr pk x_reg y_reg dst
4477 return (Any pk code)
4481 #if x86_64_TARGET_ARCH
4483 trivialFCode pk instr x y = genTrivialCode pk (instr pk) x y
4489 trivialUFCode rep instr x = do
4490 (x_reg, x_code) <- getSomeReg x
4496 return (Any rep code)
4498 #endif /* i386_TARGET_ARCH */
4500 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4502 #if sparc_TARGET_ARCH
4504 trivialCode pk instr x (CmmLit (CmmInt y d))
4507 (src1, code) <- getSomeReg x
4508 tmp <- getNewRegNat I32
4510 src2 = ImmInt (fromInteger y)
4511 code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
4512 return (Any I32 code__2)
4514 trivialCode pk instr x y = do
4515 (src1, code1) <- getSomeReg x
4516 (src2, code2) <- getSomeReg y
4517 tmp1 <- getNewRegNat I32
4518 tmp2 <- getNewRegNat I32
4520 code__2 dst = code1 `appOL` code2 `snocOL`
4521 instr src1 (RIReg src2) dst
4522 return (Any I32 code__2)
4525 trivialFCode pk instr x y = do
4526 (src1, code1) <- getSomeReg x
4527 (src2, code2) <- getSomeReg y
4528 tmp1 <- getNewRegNat (cmmExprRep x)
4529 tmp2 <- getNewRegNat (cmmExprRep y)
4530 tmp <- getNewRegNat F64
4532 promote x = FxTOy F32 F64 x tmp
4539 code1 `appOL` code2 `snocOL`
4540 instr pk src1 src2 dst
4541 else if pk1 == F32 then
4542 code1 `snocOL` promote src1 `appOL` code2 `snocOL`
4543 instr F64 tmp src2 dst
4545 code1 `appOL` code2 `snocOL` promote src2 `snocOL`
4546 instr F64 src1 tmp dst
4547 return (Any (if pk1 == pk2 then pk1 else F64) code__2)
4550 trivialUCode pk instr x = do
4551 (src, code) <- getSomeReg x
4552 tmp <- getNewRegNat pk
4554 code__2 dst = code `snocOL` instr (RIReg src) dst
4555 return (Any pk code__2)
4558 trivialUFCode pk instr x = do
4559 (src, code) <- getSomeReg x
4560 tmp <- getNewRegNat pk
4562 code__2 dst = code `snocOL` instr src dst
4563 return (Any pk code__2)
4565 #endif /* sparc_TARGET_ARCH */
4567 #if powerpc_TARGET_ARCH
4570 Wolfgang's PowerPC version of The Rules:
4572 A slightly modified version of The Rules to take advantage of the fact
4573 that PowerPC instructions work on all registers and don't implicitly
4574 clobber any fixed registers.
4576 * The only expression for which getRegister returns Fixed is (CmmReg reg).
4578 * If getRegister returns Any, then the code it generates may modify only:
4579 (a) fresh temporaries
4580 (b) the destination register
4581 It may *not* modify global registers, unless the global
4582 register happens to be the destination register.
4583 It may not clobber any other registers. In fact, only ccalls clobber any
4585 Also, it may not modify the counter register (used by genCCall).
4587 Corollary: If a getRegister for a subexpression returns Fixed, you need
4588 not move it to a fresh temporary before evaluating the next subexpression.
4589 The Fixed register won't be modified.
4590 Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
4592 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
4593 the value of the destination register.
4596 trivialCode rep signed instr x (CmmLit (CmmInt y _))
4597 | Just imm <- makeImmediate rep signed y
4599 (src1, code1) <- getSomeReg x
4600 let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
4601 return (Any rep code)
4603 trivialCode rep signed instr x y = do
4604 (src1, code1) <- getSomeReg x
4605 (src2, code2) <- getSomeReg y
4606 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
4607 return (Any rep code)
4609 trivialCodeNoImm :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4610 -> CmmExpr -> CmmExpr -> NatM Register
4611 trivialCodeNoImm rep instr x y = do
4612 (src1, code1) <- getSomeReg x
4613 (src2, code2) <- getSomeReg y
4614 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
4615 return (Any rep code)
4617 trivialUCode rep instr x = do
4618 (src, code) <- getSomeReg x
4619 let code' dst = code `snocOL` instr dst src
4620 return (Any rep code')
4622 -- There is no "remainder" instruction on the PPC, so we have to do
4624 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
4626 remainderCode :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4627 -> CmmExpr -> CmmExpr -> NatM Register
4628 remainderCode rep div x y = do
4629 (src1, code1) <- getSomeReg x
4630 (src2, code2) <- getSomeReg y
4631 let code dst = code1 `appOL` code2 `appOL` toOL [
4633 MULLW dst dst (RIReg src2),
4636 return (Any rep code)
4638 #endif /* powerpc_TARGET_ARCH */
4641 -- -----------------------------------------------------------------------------
4642 -- Coercing to/from integer/floating-point...
4644 -- When going to integer, we truncate (round towards 0).
4646 -- @coerce(Int2FP|FP2Int)@ are more complicated integer/float
4647 -- conversions. We have to store temporaries in memory to move
4648 -- between the integer and the floating point register sets.
4650 -- @coerceDbl2Flt@ and @coerceFlt2Dbl@ are done this way because we
4651 -- pretend, on sparc at least, that double and float regs are seperate
4652 -- kinds, so the value has to be computed into one kind before being
4653 -- explicitly "converted" to live in the other kind.
4655 coerceInt2FP :: MachRep -> MachRep -> CmmExpr -> NatM Register
4656 coerceFP2Int :: MachRep -> MachRep -> CmmExpr -> NatM Register
4658 #if sparc_TARGET_ARCH
4659 coerceDbl2Flt :: CmmExpr -> NatM Register
4660 coerceFlt2Dbl :: CmmExpr -> NatM Register
4663 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4665 #if alpha_TARGET_ARCH
4668 = getRegister x `thenNat` \ register ->
4669 getNewRegNat IntRep `thenNat` \ reg ->
4671 code = registerCode register reg
4672 src = registerName register reg
4674 code__2 dst = code . mkSeqInstrs [
4676 LD TF dst (spRel 0),
4679 return (Any F64 code__2)
4683 = getRegister x `thenNat` \ register ->
4684 getNewRegNat F64 `thenNat` \ tmp ->
4686 code = registerCode register tmp
4687 src = registerName register tmp
4689 code__2 dst = code . mkSeqInstrs [
4691 ST TF tmp (spRel 0),
4694 return (Any IntRep code__2)
4696 #endif /* alpha_TARGET_ARCH */
4698 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4700 #if i386_TARGET_ARCH
4702 coerceInt2FP from to x = do
4703 (x_reg, x_code) <- getSomeReg x
4705 opc = case to of F32 -> GITOF; F64 -> GITOD
4706 code dst = x_code `snocOL` opc x_reg dst
4707 -- ToDo: works for non-I32 reps?
4709 return (Any to code)
4713 coerceFP2Int from to x = do
4714 (x_reg, x_code) <- getSomeReg x
4716 opc = case from of F32 -> GFTOI; F64 -> GDTOI
4717 code dst = x_code `snocOL` opc x_reg dst
4718 -- ToDo: works for non-I32 reps?
4720 return (Any to code)
4722 #endif /* i386_TARGET_ARCH */
4724 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4726 #if x86_64_TARGET_ARCH
4728 coerceFP2Int from to x = do
4729 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
4731 opc = case from of F32 -> CVTTSS2SIQ; F64 -> CVTTSD2SIQ
4732 code dst = x_code `snocOL` opc x_op dst
4734 return (Any to code) -- works even if the destination rep is <I32
4736 coerceInt2FP from to x = do
4737 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
4739 opc = case to of F32 -> CVTSI2SS; F64 -> CVTSI2SD
4740 code dst = x_code `snocOL` opc x_op dst
4742 return (Any to code) -- works even if the destination rep is <I32
4744 coerceFP2FP :: MachRep -> CmmExpr -> NatM Register
4745 coerceFP2FP to x = do
4746 (x_reg, x_code) <- getSomeReg x
4748 opc = case to of F32 -> CVTSD2SS; F64 -> CVTSS2SD
4749 code dst = x_code `snocOL` opc x_reg dst
4751 return (Any to code)
4755 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4757 #if sparc_TARGET_ARCH
4759 coerceInt2FP pk1 pk2 x = do
4760 (src, code) <- getSomeReg x
4762 code__2 dst = code `appOL` toOL [
4763 ST pk1 src (spRel (-2)),
4764 LD pk1 (spRel (-2)) dst,
4765 FxTOy pk1 pk2 dst dst]
4766 return (Any pk2 code__2)
4769 coerceFP2Int pk fprep x = do
4770 (src, code) <- getSomeReg x
4771 reg <- getNewRegNat fprep
4772 tmp <- getNewRegNat pk
4774 code__2 dst = ASSERT(fprep == F64 || fprep == F32)
4776 FxTOy fprep pk src tmp,
4777 ST pk tmp (spRel (-2)),
4778 LD pk (spRel (-2)) dst]
4779 return (Any pk code__2)
4782 coerceDbl2Flt x = do
4783 (src, code) <- getSomeReg x
4784 return (Any F32 (\dst -> code `snocOL` FxTOy F64 F32 src dst))
4787 coerceFlt2Dbl x = do
4788 (src, code) <- getSomeReg x
4789 return (Any F64 (\dst -> code `snocOL` FxTOy F32 F64 src dst))
4791 #endif /* sparc_TARGET_ARCH */
4793 #if powerpc_TARGET_ARCH
4794 coerceInt2FP fromRep toRep x = do
4795 (src, code) <- getSomeReg x
4796 lbl <- getNewLabelNat
4797 itmp <- getNewRegNat I32
4798 ftmp <- getNewRegNat F64
4799 dflags <- getDynFlagsNat
4800 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
4801 Amode addr addr_code <- getAmode dynRef
4803 code' dst = code `appOL` maybe_exts `appOL` toOL [
4806 CmmStaticLit (CmmInt 0x43300000 I32),
4807 CmmStaticLit (CmmInt 0x80000000 I32)],
4808 XORIS itmp src (ImmInt 0x8000),
4809 ST I32 itmp (spRel 3),
4810 LIS itmp (ImmInt 0x4330),
4811 ST I32 itmp (spRel 2),
4812 LD F64 ftmp (spRel 2)
4813 ] `appOL` addr_code `appOL` toOL [
4815 FSUB F64 dst ftmp dst
4816 ] `appOL` maybe_frsp dst
4818 maybe_exts = case fromRep of
4819 I8 -> unitOL $ EXTS I8 src src
4820 I16 -> unitOL $ EXTS I16 src src
4822 maybe_frsp dst = case toRep of
4823 F32 -> unitOL $ FRSP dst dst
4825 return (Any toRep code')
4827 coerceFP2Int fromRep toRep x = do
4828 -- the reps don't really matter: F*->F64 and I32->I* are no-ops
4829 (src, code) <- getSomeReg x
4830 tmp <- getNewRegNat F64
4832 code' dst = code `appOL` toOL [
4833 -- convert to int in FP reg
4835 -- store value (64bit) from FP to stack
4836 ST F64 tmp (spRel 2),
4837 -- read low word of value (high word is undefined)
4838 LD I32 dst (spRel 3)]
4839 return (Any toRep code')
4840 #endif /* powerpc_TARGET_ARCH */
4843 -- -----------------------------------------------------------------------------
4844 -- eXTRA_STK_ARGS_HERE
4846 -- We (allegedly) put the first six C-call arguments in registers;
4847 -- where do we start putting the rest of them?
4849 -- Moved from MachInstrs (SDM):
4851 #if alpha_TARGET_ARCH || sparc_TARGET_ARCH
4852 eXTRA_STK_ARGS_HERE :: Int
4854 = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???))