2 -- The above warning supression flag is a temporary kludge.
3 -- While working on this module you are encouraged to remove it and fix
4 -- any warnings in the module. See
5 -- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
8 -----------------------------------------------------------------------------
10 -- Generating machine code (instruction selection)
12 -- (c) The University of Glasgow 1996-2004
14 -----------------------------------------------------------------------------
16 -- This is a big module, but, if you pay attention to
17 -- (a) the sectioning, (b) the type signatures, and
18 -- (c) the #if blah_TARGET_ARCH} things, the
19 -- structure should not be too overwhelming.
21 module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where
23 #include "HsVersions.h"
24 #include "nativeGen/NCG.h"
31 import PositionIndependentCode
32 import RegAllocInfo ( mkBranchInstr )
34 -- Our intermediate code:
35 import PprCmm ( pprExpr )
39 import ClosureInfo ( C_SRT(..) )
42 import StaticFlags ( opt_PIC )
43 import ForeignCall ( CCallConv(..) )
48 import FastTypes ( isFastTrue )
49 import Constants ( wORD_SIZE )
52 import Outputable ( assertPanic )
53 import Debug.Trace ( trace )
55 import Debug.Trace ( trace )
57 import Control.Monad ( mapAndUnzipM )
58 import Data.Maybe ( fromJust )
63 -- -----------------------------------------------------------------------------
64 -- Top-level of the instruction selector
66 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
67 -- They are really trees of insns to facilitate fast appending, where a
68 -- left-to-right traversal (pre-order?) yields the insns in the correct
71 type InstrBlock = OrdList Instr
73 cmmTopCodeGen :: RawCmmTop -> NatM [NatCmmTop]
74 cmmTopCodeGen (CmmProc info lab params (ListGraph blocks)) = do
75 (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
76 picBaseMb <- getPicBaseMaybeNat
77 let proc = CmmProc info lab params (ListGraph $ concat nat_blocks)
78 tops = proc : concat statics
80 Just picBase -> initializePicBase picBase tops
81 Nothing -> return tops
83 cmmTopCodeGen (CmmData sec dat) = do
84 return [CmmData sec dat] -- no translation, we just use CmmStatic
86 basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop])
87 basicBlockCodeGen (BasicBlock id stmts) = do
88 instrs <- stmtsToInstrs stmts
89 -- code generation may introduce new basic block boundaries, which
90 -- are indicated by the NEWBLOCK instruction. We must split up the
91 -- instruction stream into basic blocks again. Also, we extract
94 (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
96 mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
97 = ([], BasicBlock id instrs : blocks, statics)
98 mkBlocks (LDATA sec dat) (instrs,blocks,statics)
99 = (instrs, blocks, CmmData sec dat:statics)
100 mkBlocks instr (instrs,blocks,statics)
101 = (instr:instrs, blocks, statics)
103 return (BasicBlock id top : other_blocks, statics)
105 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
107 = do instrss <- mapM stmtToInstrs stmts
108 return (concatOL instrss)
110 stmtToInstrs :: CmmStmt -> NatM InstrBlock
111 stmtToInstrs stmt = case stmt of
112 CmmNop -> return nilOL
113 CmmComment s -> return (unitOL (COMMENT s))
116 | isFloatingRep kind -> assignReg_FltCode kind reg src
117 #if WORD_SIZE_IN_BITS==32
118 | kind == I64 -> assignReg_I64Code reg src
120 | otherwise -> assignReg_IntCode kind reg src
121 where kind = cmmRegRep reg
124 | isFloatingRep kind -> assignMem_FltCode kind addr src
125 #if WORD_SIZE_IN_BITS==32
126 | kind == I64 -> assignMem_I64Code addr src
128 | otherwise -> assignMem_IntCode kind addr src
129 where kind = cmmExprRep src
131 CmmCall target result_regs args _ _
132 -> genCCall target result_regs args
134 CmmBranch id -> genBranch id
135 CmmCondBranch arg id -> genCondJump id arg
136 CmmSwitch arg ids -> genSwitch arg ids
137 CmmJump arg params -> genJump arg
139 -- -----------------------------------------------------------------------------
140 -- General things for putting together code sequences
142 -- Expand CmmRegOff. ToDo: should we do it this way around, or convert
143 -- CmmExprs into CmmRegOff?
144 mangleIndexTree :: CmmExpr -> CmmExpr
145 mangleIndexTree (CmmRegOff reg off)
146 = CmmMachOp (MO_Add rep) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) rep)]
147 where rep = cmmRegRep reg
149 -- -----------------------------------------------------------------------------
150 -- Code gen for 64-bit arithmetic on 32-bit platforms
153 Simple support for generating 64-bit code (ie, 64 bit values and 64
154 bit assignments) on 32-bit platforms. Unlike the main code generator
155 we merely shoot for generating working code as simply as possible, and
156 pay little attention to code quality. Specifically, there is no
157 attempt to deal cleverly with the fixed-vs-floating register
158 distinction; all values are generated into (pairs of) floating
159 registers, even if this would mean some redundant reg-reg moves as a
160 result. Only one of the VRegUniques is returned, since it will be
161 of the VRegUniqueLo form, and the upper-half VReg can be determined
162 by applying getHiVRegFromLo to it.
165 data ChildCode64 -- a.k.a "Register64"
168 Reg -- the lower 32-bit temporary which contains the
169 -- result; use getHiVRegFromLo to find the other
170 -- VRegUnique. Rules of this simplified insn
171 -- selection game are therefore that the returned
172 -- Reg may be modified
174 #if WORD_SIZE_IN_BITS==32
175 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
176 assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
179 #ifndef x86_64_TARGET_ARCH
180 iselExpr64 :: CmmExpr -> NatM ChildCode64
183 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
187 assignMem_I64Code addrTree valueTree = do
188 Amode addr addr_code <- getAmode addrTree
189 ChildCode64 vcode rlo <- iselExpr64 valueTree
191 rhi = getHiVRegFromLo rlo
193 -- Little-endian store
194 mov_lo = MOV I32 (OpReg rlo) (OpAddr addr)
195 mov_hi = MOV I32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
197 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
200 assignReg_I64Code (CmmLocal (LocalReg u_dst pk _)) valueTree = do
201 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
203 r_dst_lo = mkVReg u_dst I32
204 r_dst_hi = getHiVRegFromLo r_dst_lo
205 r_src_hi = getHiVRegFromLo r_src_lo
206 mov_lo = MOV I32 (OpReg r_src_lo) (OpReg r_dst_lo)
207 mov_hi = MOV I32 (OpReg r_src_hi) (OpReg r_dst_hi)
210 vcode `snocOL` mov_lo `snocOL` mov_hi
213 assignReg_I64Code lvalue valueTree
214 = panic "assignReg_I64Code(i386): invalid lvalue"
218 iselExpr64 (CmmLit (CmmInt i _)) = do
219 (rlo,rhi) <- getNewRegPairNat I32
221 r = fromIntegral (fromIntegral i :: Word32)
222 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
224 MOV I32 (OpImm (ImmInteger r)) (OpReg rlo),
225 MOV I32 (OpImm (ImmInteger q)) (OpReg rhi)
228 return (ChildCode64 code rlo)
230 iselExpr64 (CmmLoad addrTree I64) = do
231 Amode addr addr_code <- getAmode addrTree
232 (rlo,rhi) <- getNewRegPairNat I32
234 mov_lo = MOV I32 (OpAddr addr) (OpReg rlo)
235 mov_hi = MOV I32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
238 ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
242 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64 _)))
243 = return (ChildCode64 nilOL (mkVReg vu I32))
245 -- we handle addition, but rather badly
246 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
247 ChildCode64 code1 r1lo <- iselExpr64 e1
248 (rlo,rhi) <- getNewRegPairNat I32
250 r = fromIntegral (fromIntegral i :: Word32)
251 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
252 r1hi = getHiVRegFromLo r1lo
254 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
255 ADD I32 (OpImm (ImmInteger r)) (OpReg rlo),
256 MOV I32 (OpReg r1hi) (OpReg rhi),
257 ADC I32 (OpImm (ImmInteger q)) (OpReg rhi) ]
259 return (ChildCode64 code rlo)
261 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
262 ChildCode64 code1 r1lo <- iselExpr64 e1
263 ChildCode64 code2 r2lo <- iselExpr64 e2
264 (rlo,rhi) <- getNewRegPairNat I32
266 r1hi = getHiVRegFromLo r1lo
267 r2hi = getHiVRegFromLo r2lo
270 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
271 ADD I32 (OpReg r2lo) (OpReg rlo),
272 MOV I32 (OpReg r1hi) (OpReg rhi),
273 ADC I32 (OpReg r2hi) (OpReg rhi) ]
275 return (ChildCode64 code rlo)
277 iselExpr64 (CmmMachOp (MO_U_Conv _ I64) [expr]) = do
279 r_dst_lo <- getNewRegNat I32
280 let r_dst_hi = getHiVRegFromLo r_dst_lo
283 ChildCode64 (code `snocOL`
284 MOV I32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
289 = pprPanic "iselExpr64(i386)" (ppr expr)
291 #endif /* i386_TARGET_ARCH */
293 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
295 #if sparc_TARGET_ARCH
297 assignMem_I64Code addrTree valueTree = do
298 Amode addr addr_code <- getAmode addrTree
299 ChildCode64 vcode rlo <- iselExpr64 valueTree
300 (src, code) <- getSomeReg addrTree
302 rhi = getHiVRegFromLo rlo
304 mov_hi = ST I32 rhi (AddrRegImm src (ImmInt 0))
305 mov_lo = ST I32 rlo (AddrRegImm src (ImmInt 4))
306 return (vcode `appOL` code `snocOL` mov_hi `snocOL` mov_lo)
308 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
309 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
311 r_dst_lo = mkVReg u_dst pk
312 r_dst_hi = getHiVRegFromLo r_dst_lo
313 r_src_hi = getHiVRegFromLo r_src_lo
314 mov_lo = mkMOV r_src_lo r_dst_lo
315 mov_hi = mkMOV r_src_hi r_dst_hi
316 mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
317 return (vcode `snocOL` mov_hi `snocOL` mov_lo)
318 assignReg_I64Code lvalue valueTree
319 = panic "assignReg_I64Code(sparc): invalid lvalue"
322 -- Don't delete this -- it's very handy for debugging.
324 -- | trace ("iselExpr64: " ++ showSDoc (ppr expr)) False
325 -- = panic "iselExpr64(???)"
327 iselExpr64 (CmmLoad addrTree I64) = do
328 Amode (AddrRegReg r1 r2) addr_code <- getAmode addrTree
329 rlo <- getNewRegNat I32
330 let rhi = getHiVRegFromLo rlo
331 mov_hi = LD I32 (AddrRegImm r1 (ImmInt 0)) rhi
332 mov_lo = LD I32 (AddrRegImm r1 (ImmInt 4)) rlo
334 ChildCode64 (addr_code `snocOL` mov_hi `snocOL` mov_lo)
338 iselExpr64 (CmmReg (CmmLocal (LocalReg uq I64))) = do
339 r_dst_lo <- getNewRegNat I32
340 let r_dst_hi = getHiVRegFromLo r_dst_lo
341 r_src_lo = mkVReg uq I32
342 r_src_hi = getHiVRegFromLo r_src_lo
343 mov_lo = mkMOV r_src_lo r_dst_lo
344 mov_hi = mkMOV r_src_hi r_dst_hi
345 mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
347 ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo
351 = pprPanic "iselExpr64(sparc)" (ppr expr)
353 #endif /* sparc_TARGET_ARCH */
355 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
357 #if powerpc_TARGET_ARCH
359 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
360 getI64Amodes addrTree = do
361 Amode hi_addr addr_code <- getAmode addrTree
362 case addrOffset hi_addr 4 of
363 Just lo_addr -> return (hi_addr, lo_addr, addr_code)
364 Nothing -> do (hi_ptr, code) <- getSomeReg addrTree
365 return (AddrRegImm hi_ptr (ImmInt 0),
366 AddrRegImm hi_ptr (ImmInt 4),
369 assignMem_I64Code addrTree valueTree = do
370 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
371 ChildCode64 vcode rlo <- iselExpr64 valueTree
373 rhi = getHiVRegFromLo rlo
376 mov_hi = ST I32 rhi hi_addr
377 mov_lo = ST I32 rlo lo_addr
379 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
381 assignReg_I64Code (CmmLocal (LocalReg u_dst pk _)) valueTree = do
382 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
384 r_dst_lo = mkVReg u_dst I32
385 r_dst_hi = getHiVRegFromLo r_dst_lo
386 r_src_hi = getHiVRegFromLo r_src_lo
387 mov_lo = MR r_dst_lo r_src_lo
388 mov_hi = MR r_dst_hi r_src_hi
391 vcode `snocOL` mov_lo `snocOL` mov_hi
394 assignReg_I64Code lvalue valueTree
395 = panic "assignReg_I64Code(powerpc): invalid lvalue"
398 -- Don't delete this -- it's very handy for debugging.
400 -- | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
401 -- = panic "iselExpr64(???)"
403 iselExpr64 (CmmLoad addrTree I64) = do
404 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
405 (rlo, rhi) <- getNewRegPairNat I32
406 let mov_hi = LD I32 rhi hi_addr
407 mov_lo = LD I32 rlo lo_addr
408 return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
411 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64 _)))
412 = return (ChildCode64 nilOL (mkVReg vu I32))
414 iselExpr64 (CmmLit (CmmInt i _)) = do
415 (rlo,rhi) <- getNewRegPairNat I32
417 half0 = fromIntegral (fromIntegral i :: Word16)
418 half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
419 half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
420 half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
423 LIS rlo (ImmInt half1),
424 OR rlo rlo (RIImm $ ImmInt half0),
425 LIS rhi (ImmInt half3),
426 OR rlo rlo (RIImm $ ImmInt half2)
429 return (ChildCode64 code rlo)
431 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
432 ChildCode64 code1 r1lo <- iselExpr64 e1
433 ChildCode64 code2 r2lo <- iselExpr64 e2
434 (rlo,rhi) <- getNewRegPairNat I32
436 r1hi = getHiVRegFromLo r1lo
437 r2hi = getHiVRegFromLo r2lo
440 toOL [ ADDC rlo r1lo r2lo,
443 return (ChildCode64 code rlo)
445 iselExpr64 (CmmMachOp (MO_U_Conv I32 I64) [expr]) = do
446 (expr_reg,expr_code) <- getSomeReg expr
447 (rlo, rhi) <- getNewRegPairNat I32
448 let mov_hi = LI rhi (ImmInt 0)
449 mov_lo = MR rlo expr_reg
450 return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
453 = pprPanic "iselExpr64(powerpc)" (ppr expr)
455 #endif /* powerpc_TARGET_ARCH */
458 -- -----------------------------------------------------------------------------
459 -- The 'Register' type
461 -- 'Register's passed up the tree. If the stix code forces the register
462 -- to live in a pre-decided machine register, it comes out as @Fixed@;
463 -- otherwise, it comes out as @Any@, and the parent can decide which
464 -- register to put it in.
467 = Fixed MachRep Reg InstrBlock
468 | Any MachRep (Reg -> InstrBlock)
470 swizzleRegisterRep :: Register -> MachRep -> Register
471 swizzleRegisterRep (Fixed _ reg code) rep = Fixed rep reg code
472 swizzleRegisterRep (Any _ codefn) rep = Any rep codefn
475 -- -----------------------------------------------------------------------------
476 -- Utils based on getRegister, below
478 -- The dual to getAnyReg: compute an expression into a register, but
479 -- we don't mind which one it is.
480 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
482 r <- getRegister expr
485 tmp <- getNewRegNat rep
486 return (tmp, code tmp)
490 -- -----------------------------------------------------------------------------
491 -- Grab the Reg for a CmmReg
493 getRegisterReg :: CmmReg -> Reg
495 getRegisterReg (CmmLocal (LocalReg u pk _))
498 getRegisterReg (CmmGlobal mid)
499 = case get_GlobalReg_reg_or_addr mid of
500 Left (RealReg rrno) -> RealReg rrno
501 _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
502 -- By this stage, the only MagicIds remaining should be the
503 -- ones which map to a real machine register on this
504 -- platform. Hence ...
507 -- -----------------------------------------------------------------------------
508 -- Generate code to get a subtree into a Register
510 -- Don't delete this -- it's very handy for debugging.
512 -- | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False
513 -- = panic "getRegister(???)"
515 getRegister :: CmmExpr -> NatM Register
517 #if !x86_64_TARGET_ARCH
518 -- on x86_64, we have %rip for PicBaseReg, but it's not a full-featured
519 -- register, it can only be used for rip-relative addressing.
520 getRegister (CmmReg (CmmGlobal PicBaseReg))
522 reg <- getPicBaseNat wordRep
523 return (Fixed wordRep reg nilOL)
526 getRegister (CmmReg reg)
527 = return (Fixed (cmmRegRep reg) (getRegisterReg reg) nilOL)
529 getRegister tree@(CmmRegOff _ _)
530 = getRegister (mangleIndexTree tree)
533 #if WORD_SIZE_IN_BITS==32
534 -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
535 -- TO_W_(x), TO_W_(x >> 32)
537 getRegister (CmmMachOp (MO_U_Conv I64 I32)
538 [CmmMachOp (MO_U_Shr I64) [x,CmmLit (CmmInt 32 _)]]) = do
539 ChildCode64 code rlo <- iselExpr64 x
540 return $ Fixed I32 (getHiVRegFromLo rlo) code
542 getRegister (CmmMachOp (MO_S_Conv I64 I32)
543 [CmmMachOp (MO_U_Shr I64) [x,CmmLit (CmmInt 32 _)]]) = do
544 ChildCode64 code rlo <- iselExpr64 x
545 return $ Fixed I32 (getHiVRegFromLo rlo) code
547 getRegister (CmmMachOp (MO_U_Conv I64 I32) [x]) = do
548 ChildCode64 code rlo <- iselExpr64 x
549 return $ Fixed I32 rlo code
551 getRegister (CmmMachOp (MO_S_Conv I64 I32) [x]) = do
552 ChildCode64 code rlo <- iselExpr64 x
553 return $ Fixed I32 rlo code
557 -- end of machine-"independent" bit; here we go on the rest...
559 #if alpha_TARGET_ARCH
561 getRegister (StDouble d)
562 = getBlockIdNat `thenNat` \ lbl ->
563 getNewRegNat PtrRep `thenNat` \ tmp ->
564 let code dst = mkSeqInstrs [
565 LDATA RoDataSegment lbl [
566 DATA TF [ImmLab (rational d)]
568 LDA tmp (AddrImm (ImmCLbl lbl)),
569 LD TF dst (AddrReg tmp)]
571 return (Any F64 code)
573 getRegister (StPrim primop [x]) -- unary PrimOps
575 IntNegOp -> trivialUCode (NEG Q False) x
577 NotOp -> trivialUCode NOT x
579 FloatNegOp -> trivialUFCode FloatRep (FNEG TF) x
580 DoubleNegOp -> trivialUFCode F64 (FNEG TF) x
582 OrdOp -> coerceIntCode IntRep x
585 Float2IntOp -> coerceFP2Int x
586 Int2FloatOp -> coerceInt2FP pr x
587 Double2IntOp -> coerceFP2Int x
588 Int2DoubleOp -> coerceInt2FP pr x
590 Double2FloatOp -> coerceFltCode x
591 Float2DoubleOp -> coerceFltCode x
593 other_op -> getRegister (StCall fn CCallConv F64 [x])
595 fn = case other_op of
596 FloatExpOp -> FSLIT("exp")
597 FloatLogOp -> FSLIT("log")
598 FloatSqrtOp -> FSLIT("sqrt")
599 FloatSinOp -> FSLIT("sin")
600 FloatCosOp -> FSLIT("cos")
601 FloatTanOp -> FSLIT("tan")
602 FloatAsinOp -> FSLIT("asin")
603 FloatAcosOp -> FSLIT("acos")
604 FloatAtanOp -> FSLIT("atan")
605 FloatSinhOp -> FSLIT("sinh")
606 FloatCoshOp -> FSLIT("cosh")
607 FloatTanhOp -> FSLIT("tanh")
608 DoubleExpOp -> FSLIT("exp")
609 DoubleLogOp -> FSLIT("log")
610 DoubleSqrtOp -> FSLIT("sqrt")
611 DoubleSinOp -> FSLIT("sin")
612 DoubleCosOp -> FSLIT("cos")
613 DoubleTanOp -> FSLIT("tan")
614 DoubleAsinOp -> FSLIT("asin")
615 DoubleAcosOp -> FSLIT("acos")
616 DoubleAtanOp -> FSLIT("atan")
617 DoubleSinhOp -> FSLIT("sinh")
618 DoubleCoshOp -> FSLIT("cosh")
619 DoubleTanhOp -> FSLIT("tanh")
621 pr = panic "MachCode.getRegister: no primrep needed for Alpha"
623 getRegister (StPrim primop [x, y]) -- dyadic PrimOps
625 CharGtOp -> trivialCode (CMP LTT) y x
626 CharGeOp -> trivialCode (CMP LE) y x
627 CharEqOp -> trivialCode (CMP EQQ) x y
628 CharNeOp -> int_NE_code x y
629 CharLtOp -> trivialCode (CMP LTT) x y
630 CharLeOp -> trivialCode (CMP LE) x y
632 IntGtOp -> trivialCode (CMP LTT) y x
633 IntGeOp -> trivialCode (CMP LE) y x
634 IntEqOp -> trivialCode (CMP EQQ) x y
635 IntNeOp -> int_NE_code x y
636 IntLtOp -> trivialCode (CMP LTT) x y
637 IntLeOp -> trivialCode (CMP LE) x y
639 WordGtOp -> trivialCode (CMP ULT) y x
640 WordGeOp -> trivialCode (CMP ULE) x y
641 WordEqOp -> trivialCode (CMP EQQ) x y
642 WordNeOp -> int_NE_code x y
643 WordLtOp -> trivialCode (CMP ULT) x y
644 WordLeOp -> trivialCode (CMP ULE) x y
646 AddrGtOp -> trivialCode (CMP ULT) y x
647 AddrGeOp -> trivialCode (CMP ULE) y x
648 AddrEqOp -> trivialCode (CMP EQQ) x y
649 AddrNeOp -> int_NE_code x y
650 AddrLtOp -> trivialCode (CMP ULT) x y
651 AddrLeOp -> trivialCode (CMP ULE) x y
653 FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
654 FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
655 FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
656 FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
657 FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
658 FloatLeOp -> cmpF_code (FCMP TF LE) NE x y
660 DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
661 DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
662 DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
663 DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
664 DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
665 DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y
667 IntAddOp -> trivialCode (ADD Q False) x y
668 IntSubOp -> trivialCode (SUB Q False) x y
669 IntMulOp -> trivialCode (MUL Q False) x y
670 IntQuotOp -> trivialCode (DIV Q False) x y
671 IntRemOp -> trivialCode (REM Q False) x y
673 WordAddOp -> trivialCode (ADD Q False) x y
674 WordSubOp -> trivialCode (SUB Q False) x y
675 WordMulOp -> trivialCode (MUL Q False) x y
676 WordQuotOp -> trivialCode (DIV Q True) x y
677 WordRemOp -> trivialCode (REM Q True) x y
679 FloatAddOp -> trivialFCode FloatRep (FADD TF) x y
680 FloatSubOp -> trivialFCode FloatRep (FSUB TF) x y
681 FloatMulOp -> trivialFCode FloatRep (FMUL TF) x y
682 FloatDivOp -> trivialFCode FloatRep (FDIV TF) x y
684 DoubleAddOp -> trivialFCode F64 (FADD TF) x y
685 DoubleSubOp -> trivialFCode F64 (FSUB TF) x y
686 DoubleMulOp -> trivialFCode F64 (FMUL TF) x y
687 DoubleDivOp -> trivialFCode F64 (FDIV TF) x y
689 AddrAddOp -> trivialCode (ADD Q False) x y
690 AddrSubOp -> trivialCode (SUB Q False) x y
691 AddrRemOp -> trivialCode (REM Q True) x y
693 AndOp -> trivialCode AND x y
694 OrOp -> trivialCode OR x y
695 XorOp -> trivialCode XOR x y
696 SllOp -> trivialCode SLL x y
697 SrlOp -> trivialCode SRL x y
699 ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
700 ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
701 ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"
703 FloatPowerOp -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
704 DoublePowerOp -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
706 {- ------------------------------------------------------------
707 Some bizarre special code for getting condition codes into
708 registers. Integer non-equality is a test for equality
709 followed by an XOR with 1. (Integer comparisons always set
710 the result register to 0 or 1.) Floating point comparisons of
711 any kind leave the result in a floating point register, so we
712 need to wrangle an integer register out of things.
714 int_NE_code :: StixTree -> StixTree -> NatM Register
717 = trivialCode (CMP EQQ) x y `thenNat` \ register ->
718 getNewRegNat IntRep `thenNat` \ tmp ->
720 code = registerCode register tmp
721 src = registerName register tmp
722 code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
724 return (Any IntRep code__2)
726 {- ------------------------------------------------------------
727 Comments for int_NE_code also apply to cmpF_code
730 :: (Reg -> Reg -> Reg -> Instr)
732 -> StixTree -> StixTree
735 cmpF_code instr cond x y
736 = trivialFCode pr instr x y `thenNat` \ register ->
737 getNewRegNat F64 `thenNat` \ tmp ->
738 getBlockIdNat `thenNat` \ lbl ->
740 code = registerCode register tmp
741 result = registerName register tmp
743 code__2 dst = code . mkSeqInstrs [
744 OR zeroh (RIImm (ImmInt 1)) dst,
745 BF cond result (ImmCLbl lbl),
746 OR zeroh (RIReg zeroh) dst,
749 return (Any IntRep code__2)
751 pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
752 ------------------------------------------------------------
754 getRegister (CmmLoad pk mem)
755 = getAmode mem `thenNat` \ amode ->
757 code = amodeCode amode
758 src = amodeAddr amode
759 size = primRepToSize pk
760 code__2 dst = code . mkSeqInstr (LD size dst src)
762 return (Any pk code__2)
764 getRegister (StInt i)
767 code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
769 return (Any IntRep code)
772 code dst = mkSeqInstr (LDI Q dst src)
774 return (Any IntRep code)
776 src = ImmInt (fromInteger i)
781 code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
783 return (Any PtrRep code)
786 imm__2 = case imm of Just x -> x
788 #endif /* alpha_TARGET_ARCH */
790 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
794 getRegister (CmmLit (CmmFloat f F32)) = do
795 lbl <- getNewLabelNat
796 dflags <- getDynFlagsNat
797 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
798 Amode addr addr_code <- getAmode dynRef
802 CmmStaticLit (CmmFloat f F32)]
803 `consOL` (addr_code `snocOL`
806 return (Any F32 code)
809 getRegister (CmmLit (CmmFloat d F64))
811 = let code dst = unitOL (GLDZ dst)
812 in return (Any F64 code)
815 = let code dst = unitOL (GLD1 dst)
816 in return (Any F64 code)
819 lbl <- getNewLabelNat
820 dflags <- getDynFlagsNat
821 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
822 Amode addr addr_code <- getAmode dynRef
826 CmmStaticLit (CmmFloat d F64)]
827 `consOL` (addr_code `snocOL`
830 return (Any F64 code)
832 #endif /* i386_TARGET_ARCH */
834 #if x86_64_TARGET_ARCH
836 getRegister (CmmLit (CmmFloat 0.0 rep)) = do
837 let code dst = unitOL (XOR rep (OpReg dst) (OpReg dst))
838 -- I don't know why there are xorpd, xorps, and pxor instructions.
839 -- They all appear to do the same thing --SDM
840 return (Any rep code)
842 getRegister (CmmLit (CmmFloat f rep)) = do
843 lbl <- getNewLabelNat
844 let code dst = toOL [
847 CmmStaticLit (CmmFloat f rep)],
848 MOV rep (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
851 return (Any rep code)
853 #endif /* x86_64_TARGET_ARCH */
855 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
857 -- catch simple cases of zero- or sign-extended load
858 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad addr _]) = do
859 code <- intLoadCode (MOVZxL I8) addr
860 return (Any I32 code)
862 getRegister (CmmMachOp (MO_S_Conv I8 I32) [CmmLoad addr _]) = do
863 code <- intLoadCode (MOVSxL I8) addr
864 return (Any I32 code)
866 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad addr _]) = do
867 code <- intLoadCode (MOVZxL I16) addr
868 return (Any I32 code)
870 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad addr _]) = do
871 code <- intLoadCode (MOVSxL I16) addr
872 return (Any I32 code)
876 #if x86_64_TARGET_ARCH
878 -- catch simple cases of zero- or sign-extended load
879 getRegister (CmmMachOp (MO_U_Conv I8 I64) [CmmLoad addr _]) = do
880 code <- intLoadCode (MOVZxL I8) addr
881 return (Any I64 code)
883 getRegister (CmmMachOp (MO_S_Conv I8 I64) [CmmLoad addr _]) = do
884 code <- intLoadCode (MOVSxL I8) addr
885 return (Any I64 code)
887 getRegister (CmmMachOp (MO_U_Conv I16 I64) [CmmLoad addr _]) = do
888 code <- intLoadCode (MOVZxL I16) addr
889 return (Any I64 code)
891 getRegister (CmmMachOp (MO_S_Conv I16 I64) [CmmLoad addr _]) = do
892 code <- intLoadCode (MOVSxL I16) addr
893 return (Any I64 code)
895 getRegister (CmmMachOp (MO_U_Conv I32 I64) [CmmLoad addr _]) = do
896 code <- intLoadCode (MOV I32) addr -- 32-bit loads zero-extend
897 return (Any I64 code)
899 getRegister (CmmMachOp (MO_S_Conv I32 I64) [CmmLoad addr _]) = do
900 code <- intLoadCode (MOVSxL I32) addr
901 return (Any I64 code)
905 #if x86_64_TARGET_ARCH
906 getRegister (CmmMachOp (MO_Add I64) [CmmReg (CmmGlobal PicBaseReg),
907 CmmLit displacement])
908 = return $ Any I64 (\dst -> unitOL $
909 LEA I64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
912 #if x86_64_TARGET_ARCH
913 getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do
914 x_code <- getAnyReg x
915 lbl <- getNewLabelNat
917 code dst = x_code dst `appOL` toOL [
918 -- This is how gcc does it, so it can't be that bad:
919 LDATA ReadOnlyData16 [
922 CmmStaticLit (CmmInt 0x80000000 I32),
923 CmmStaticLit (CmmInt 0 I32),
924 CmmStaticLit (CmmInt 0 I32),
925 CmmStaticLit (CmmInt 0 I32)
927 XOR F32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
928 -- xorps, so we need the 128-bit constant
929 -- ToDo: rip-relative
932 return (Any F32 code)
934 getRegister (CmmMachOp (MO_S_Neg F64) [x]) = do
935 x_code <- getAnyReg x
936 lbl <- getNewLabelNat
938 -- This is how gcc does it, so it can't be that bad:
939 code dst = x_code dst `appOL` toOL [
940 LDATA ReadOnlyData16 [
943 CmmStaticLit (CmmInt 0x8000000000000000 I64),
944 CmmStaticLit (CmmInt 0 I64)
946 -- gcc puts an unpck here. Wonder if we need it.
947 XOR F64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
948 -- xorpd, so we need the 128-bit constant
951 return (Any F64 code)
954 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
956 getRegister (CmmMachOp mop [x]) -- unary MachOps
959 MO_S_Neg F32 -> trivialUFCode F32 (GNEG F32) x
960 MO_S_Neg F64 -> trivialUFCode F64 (GNEG F64) x
963 MO_S_Neg rep -> trivialUCode rep (NEGI rep) x
964 MO_Not rep -> trivialUCode rep (NOT rep) x
967 MO_U_Conv I32 I8 -> toI8Reg I32 x
968 MO_S_Conv I32 I8 -> toI8Reg I32 x
969 MO_U_Conv I16 I8 -> toI8Reg I16 x
970 MO_S_Conv I16 I8 -> toI8Reg I16 x
971 MO_U_Conv I32 I16 -> toI16Reg I32 x
972 MO_S_Conv I32 I16 -> toI16Reg I32 x
973 #if x86_64_TARGET_ARCH
974 MO_U_Conv I64 I32 -> conversionNop I64 x
975 MO_S_Conv I64 I32 -> conversionNop I64 x
976 MO_U_Conv I64 I16 -> toI16Reg I64 x
977 MO_S_Conv I64 I16 -> toI16Reg I64 x
978 MO_U_Conv I64 I8 -> toI8Reg I64 x
979 MO_S_Conv I64 I8 -> toI8Reg I64 x
982 MO_U_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
983 MO_S_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
986 MO_U_Conv I8 I32 -> integerExtend I8 I32 MOVZxL x
987 MO_U_Conv I16 I32 -> integerExtend I16 I32 MOVZxL x
988 MO_U_Conv I8 I16 -> integerExtend I8 I16 MOVZxL x
990 MO_S_Conv I8 I32 -> integerExtend I8 I32 MOVSxL x
991 MO_S_Conv I16 I32 -> integerExtend I16 I32 MOVSxL x
992 MO_S_Conv I8 I16 -> integerExtend I8 I16 MOVSxL x
994 #if x86_64_TARGET_ARCH
995 MO_U_Conv I8 I64 -> integerExtend I8 I64 MOVZxL x
996 MO_U_Conv I16 I64 -> integerExtend I16 I64 MOVZxL x
997 MO_U_Conv I32 I64 -> integerExtend I32 I64 MOVZxL x
998 MO_S_Conv I8 I64 -> integerExtend I8 I64 MOVSxL x
999 MO_S_Conv I16 I64 -> integerExtend I16 I64 MOVSxL x
1000 MO_S_Conv I32 I64 -> integerExtend I32 I64 MOVSxL x
1001 -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
1002 -- However, we don't want the register allocator to throw it
1003 -- away as an unnecessary reg-to-reg move, so we keep it in
1004 -- the form of a movzl and print it as a movl later.
1007 #if i386_TARGET_ARCH
1008 MO_S_Conv F32 F64 -> conversionNop F64 x
1009 MO_S_Conv F64 F32 -> conversionNop F32 x
1011 MO_S_Conv F32 F64 -> coerceFP2FP F64 x
1012 MO_S_Conv F64 F32 -> coerceFP2FP F32 x
1016 | isFloatingRep from -> coerceFP2Int from to x
1017 | isFloatingRep to -> coerceInt2FP from to x
1019 other -> pprPanic "getRegister" (pprMachOp mop)
1021 -- signed or unsigned extension.
1022 integerExtend from to instr expr = do
1023 (reg,e_code) <- if from == I8 then getByteReg expr
1024 else getSomeReg expr
1028 instr from (OpReg reg) (OpReg dst)
1029 return (Any to code)
1031 toI8Reg new_rep expr
1032 = do codefn <- getAnyReg expr
1033 return (Any new_rep codefn)
1034 -- HACK: use getAnyReg to get a byte-addressable register.
1035 -- If the source was a Fixed register, this will add the
1036 -- mov instruction to put it into the desired destination.
1037 -- We're assuming that the destination won't be a fixed
1038 -- non-byte-addressable register; it won't be, because all
1039 -- fixed registers are word-sized.
1041 toI16Reg = toI8Reg -- for now
1043 conversionNop new_rep expr
1044 = do e_code <- getRegister expr
1045 return (swizzleRegisterRep e_code new_rep)
1048 getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
1049 = ASSERT2(cmmExprRep x /= I8, pprExpr e)
1051 MO_Eq F32 -> condFltReg EQQ x y
1052 MO_Ne F32 -> condFltReg NE x y
1053 MO_S_Gt F32 -> condFltReg GTT x y
1054 MO_S_Ge F32 -> condFltReg GE x y
1055 MO_S_Lt F32 -> condFltReg LTT x y
1056 MO_S_Le F32 -> condFltReg LE x y
1058 MO_Eq F64 -> condFltReg EQQ x y
1059 MO_Ne F64 -> condFltReg NE x y
1060 MO_S_Gt F64 -> condFltReg GTT x y
1061 MO_S_Ge F64 -> condFltReg GE x y
1062 MO_S_Lt F64 -> condFltReg LTT x y
1063 MO_S_Le F64 -> condFltReg LE x y
1065 MO_Eq rep -> condIntReg EQQ x y
1066 MO_Ne rep -> condIntReg NE x y
1068 MO_S_Gt rep -> condIntReg GTT x y
1069 MO_S_Ge rep -> condIntReg GE x y
1070 MO_S_Lt rep -> condIntReg LTT x y
1071 MO_S_Le rep -> condIntReg LE x y
1073 MO_U_Gt rep -> condIntReg GU x y
1074 MO_U_Ge rep -> condIntReg GEU x y
1075 MO_U_Lt rep -> condIntReg LU x y
1076 MO_U_Le rep -> condIntReg LEU x y
1078 #if i386_TARGET_ARCH
1079 MO_Add F32 -> trivialFCode F32 GADD x y
1080 MO_Sub F32 -> trivialFCode F32 GSUB x y
1082 MO_Add F64 -> trivialFCode F64 GADD x y
1083 MO_Sub F64 -> trivialFCode F64 GSUB x y
1085 MO_S_Quot F32 -> trivialFCode F32 GDIV x y
1086 MO_S_Quot F64 -> trivialFCode F64 GDIV x y
1089 #if x86_64_TARGET_ARCH
1090 MO_Add F32 -> trivialFCode F32 ADD x y
1091 MO_Sub F32 -> trivialFCode F32 SUB x y
1093 MO_Add F64 -> trivialFCode F64 ADD x y
1094 MO_Sub F64 -> trivialFCode F64 SUB x y
1096 MO_S_Quot F32 -> trivialFCode F32 FDIV x y
1097 MO_S_Quot F64 -> trivialFCode F64 FDIV x y
1100 MO_Add rep -> add_code rep x y
1101 MO_Sub rep -> sub_code rep x y
1103 MO_S_Quot rep -> div_code rep True True x y
1104 MO_S_Rem rep -> div_code rep True False x y
1105 MO_U_Quot rep -> div_code rep False True x y
1106 MO_U_Rem rep -> div_code rep False False x y
1108 #if i386_TARGET_ARCH
1109 MO_Mul F32 -> trivialFCode F32 GMUL x y
1110 MO_Mul F64 -> trivialFCode F64 GMUL x y
1113 #if x86_64_TARGET_ARCH
1114 MO_Mul F32 -> trivialFCode F32 MUL x y
1115 MO_Mul F64 -> trivialFCode F64 MUL x y
1118 MO_Mul rep -> let op = IMUL rep in
1119 trivialCode rep op (Just op) x y
1121 MO_S_MulMayOflo rep -> imulMayOflo rep x y
1123 MO_And rep -> let op = AND rep in
1124 trivialCode rep op (Just op) x y
1125 MO_Or rep -> let op = OR rep in
1126 trivialCode rep op (Just op) x y
1127 MO_Xor rep -> let op = XOR rep in
1128 trivialCode rep op (Just op) x y
1130 {- Shift ops on x86s have constraints on their source, it
1131 either has to be Imm, CL or 1
1132 => trivialCode is not restrictive enough (sigh.)
1134 MO_Shl rep -> shift_code rep (SHL rep) x y {-False-}
1135 MO_U_Shr rep -> shift_code rep (SHR rep) x y {-False-}
1136 MO_S_Shr rep -> shift_code rep (SAR rep) x y {-False-}
1138 other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
1140 --------------------
1141 imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1142 imulMayOflo rep a b = do
1143 (a_reg, a_code) <- getNonClobberedReg a
1144 b_code <- getAnyReg b
1146 shift_amt = case rep of
1149 _ -> panic "shift_amt"
1151 code = a_code `appOL` b_code eax `appOL`
1153 IMUL2 rep (OpReg a_reg), -- result in %edx:%eax
1154 SAR rep (OpImm (ImmInt shift_amt)) (OpReg eax),
1155 -- sign extend lower part
1156 SUB rep (OpReg edx) (OpReg eax)
1157 -- compare against upper
1158 -- eax==0 if high part == sign extended low part
1161 return (Fixed rep eax code)
1163 --------------------
1164 shift_code :: MachRep
1165 -> (Operand -> Operand -> Instr)
1170 {- Case1: shift length as immediate -}
1171 shift_code rep instr x y@(CmmLit lit) = do
1172 x_code <- getAnyReg x
1175 = x_code dst `snocOL`
1176 instr (OpImm (litToImm lit)) (OpReg dst)
1178 return (Any rep code)
1180 {- Case2: shift length is complex (non-immediate)
1181 * y must go in %ecx.
1182 * we cannot do y first *and* put its result in %ecx, because
1183 %ecx might be clobbered by x.
1184 * if we do y second, then x cannot be
1185 in a clobbered reg. Also, we cannot clobber x's reg
1186 with the instruction itself.
1188 - do y first, put its result in a fresh tmp, then copy it to %ecx later
1189 - do y second and put its result into %ecx. x gets placed in a fresh
1190 tmp. This is likely to be better, becuase the reg alloc can
1191 eliminate this reg->reg move here (it won't eliminate the other one,
1192 because the move is into the fixed %ecx).
1194 shift_code rep instr x y{-amount-} = do
1195 x_code <- getAnyReg x
1196 tmp <- getNewRegNat rep
1197 y_code <- getAnyReg y
1199 code = x_code tmp `appOL`
1201 instr (OpReg ecx) (OpReg tmp)
1203 return (Fixed rep tmp code)
1205 --------------------
1206 add_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1207 add_code rep x (CmmLit (CmmInt y _))
1208 | not (is64BitInteger y) = add_int rep x y
1209 add_code rep x y = trivialCode rep (ADD rep) (Just (ADD rep)) x y
1211 --------------------
1212 sub_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1213 sub_code rep x (CmmLit (CmmInt y _))
1214 | not (is64BitInteger (-y)) = add_int rep x (-y)
1215 sub_code rep x y = trivialCode rep (SUB rep) Nothing x y
1217 -- our three-operand add instruction:
1218 add_int rep x y = do
1219 (x_reg, x_code) <- getSomeReg x
1221 imm = ImmInt (fromInteger y)
1225 (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
1228 return (Any rep code)
1230 ----------------------
1231 div_code rep signed quotient x y = do
1232 (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
1233 x_code <- getAnyReg x
1235 widen | signed = CLTD rep
1236 | otherwise = XOR rep (OpReg edx) (OpReg edx)
1238 instr | signed = IDIV
1241 code = y_code `appOL`
1243 toOL [widen, instr rep y_op]
1245 result | quotient = eax
1249 return (Fixed rep result code)
1252 getRegister (CmmLoad mem pk)
1255 Amode src mem_code <- getAmode mem
1257 code dst = mem_code `snocOL`
1258 IF_ARCH_i386(GLD pk src dst,
1259 MOV pk (OpAddr src) (OpReg dst))
1261 return (Any pk code)
1263 #if i386_TARGET_ARCH
1264 getRegister (CmmLoad mem pk)
1267 code <- intLoadCode (instr pk) mem
1268 return (Any pk code)
1270 instr I8 = MOVZxL pk
1273 -- we always zero-extend 8-bit loads, if we
1274 -- can't think of anything better. This is because
1275 -- we can't guarantee access to an 8-bit variant of every register
1276 -- (esi and edi don't have 8-bit variants), so to make things
1277 -- simpler we do our 8-bit arithmetic with full 32-bit registers.
1280 #if x86_64_TARGET_ARCH
1281 -- Simpler memory load code on x86_64
1282 getRegister (CmmLoad mem pk)
1284 code <- intLoadCode (MOV pk) mem
1285 return (Any pk code)
1288 getRegister (CmmLit (CmmInt 0 rep))
1290 -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
1291 adj_rep = case rep of I64 -> I32; _ -> rep
1292 rep1 = IF_ARCH_i386( rep, adj_rep )
1294 = unitOL (XOR rep1 (OpReg dst) (OpReg dst))
1296 return (Any rep code)
1298 #if x86_64_TARGET_ARCH
1299 -- optimisation for loading small literals on x86_64: take advantage
1300 -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
1301 -- instruction forms are shorter.
1302 getRegister (CmmLit lit)
1303 | I64 <- cmmLitRep lit, not (isBigLit lit)
1306 code dst = unitOL (MOV I32 (OpImm imm) (OpReg dst))
1308 return (Any I64 code)
1310 isBigLit (CmmInt i I64) = i < 0 || i > 0xffffffff
1312 -- note1: not the same as is64BitLit, because that checks for
1313 -- signed literals that fit in 32 bits, but we want unsigned
1315 -- note2: all labels are small, because we're assuming the
1316 -- small memory model (see gcc docs, -mcmodel=small).
1319 getRegister (CmmLit lit)
1323 code dst = unitOL (MOV rep (OpImm imm) (OpReg dst))
1325 return (Any rep code)
1327 getRegister other = pprPanic "getRegister(x86)" (ppr other)
1330 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
1331 -> NatM (Reg -> InstrBlock)
1332 intLoadCode instr mem = do
1333 Amode src mem_code <- getAmode mem
1334 return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
1336 -- Compute an expression into *any* register, adding the appropriate
1337 -- move instruction if necessary.
1338 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
1340 r <- getRegister expr
1343 anyReg :: Register -> NatM (Reg -> InstrBlock)
1344 anyReg (Any _ code) = return code
1345 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
1347 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
1348 -- Fixed registers might not be byte-addressable, so we make sure we've
1349 -- got a temporary, inserting an extra reg copy if necessary.
1350 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
1351 #if x86_64_TARGET_ARCH
1352 getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
1354 getByteReg expr = do
1355 r <- getRegister expr
1358 tmp <- getNewRegNat rep
1359 return (tmp, code tmp)
1361 | isVirtualReg reg -> return (reg,code)
1363 tmp <- getNewRegNat rep
1364 return (tmp, code `snocOL` reg2reg rep reg tmp)
1365 -- ToDo: could optimise slightly by checking for byte-addressable
1366 -- real registers, but that will happen very rarely if at all.
1369 -- Another variant: this time we want the result in a register that cannot
1370 -- be modified by code to evaluate an arbitrary expression.
1371 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1372 getNonClobberedReg expr = do
1373 r <- getRegister expr
1376 tmp <- getNewRegNat rep
1377 return (tmp, code tmp)
1379 -- only free regs can be clobbered
1380 | RealReg rr <- reg, isFastTrue (freeReg rr) -> do
1381 tmp <- getNewRegNat rep
1382 return (tmp, code `snocOL` reg2reg rep reg tmp)
1386 reg2reg :: MachRep -> Reg -> Reg -> Instr
1388 #if i386_TARGET_ARCH
1389 | isFloatingRep rep = GMOV src dst
1391 | otherwise = MOV rep (OpReg src) (OpReg dst)
1393 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1395 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1397 #if sparc_TARGET_ARCH
1399 getRegister (CmmLit (CmmFloat f F32)) = do
1400 lbl <- getNewLabelNat
1401 let code dst = toOL [
1404 CmmStaticLit (CmmFloat f F32)],
1405 SETHI (HI (ImmCLbl lbl)) dst,
1406 LD F32 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1407 return (Any F32 code)
1409 getRegister (CmmLit (CmmFloat d F64)) = do
1410 lbl <- getNewLabelNat
1411 let code dst = toOL [
1414 CmmStaticLit (CmmFloat d F64)],
1415 SETHI (HI (ImmCLbl lbl)) dst,
1416 LD F64 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1417 return (Any F64 code)
1419 getRegister (CmmMachOp mop [x]) -- unary MachOps
1421 MO_S_Neg F32 -> trivialUFCode F32 (FNEG F32) x
1422 MO_S_Neg F64 -> trivialUFCode F64 (FNEG F64) x
1424 MO_S_Neg rep -> trivialUCode rep (SUB False False g0) x
1425 MO_Not rep -> trivialUCode rep (XNOR False g0) x
1427 MO_U_Conv I32 I8 -> trivialCode I8 (AND False) x (CmmLit (CmmInt 255 I8))
1429 MO_U_Conv F64 F32-> coerceDbl2Flt x
1430 MO_U_Conv F32 F64-> coerceFlt2Dbl x
1432 MO_S_Conv F32 I32-> coerceFP2Int F32 I32 x
1433 MO_S_Conv I32 F32-> coerceInt2FP I32 F32 x
1434 MO_S_Conv F64 I32-> coerceFP2Int F64 I32 x
1435 MO_S_Conv I32 F64-> coerceInt2FP I32 F64 x
1437 -- Conversions which are a nop on sparc
1439 | from == to -> conversionNop to x
1440 MO_U_Conv I32 to -> conversionNop to x
1441 MO_S_Conv I32 to -> conversionNop to x
1444 MO_U_Conv I8 I32 -> integerExtend False I8 I32 x
1445 MO_U_Conv I16 I32 -> integerExtend False I16 I32 x
1446 MO_U_Conv I8 I16 -> integerExtend False I8 I16 x
1447 MO_S_Conv I16 I32 -> integerExtend True I16 I32 x
1449 other_op -> panic "Unknown unary mach op"
1452 integerExtend signed from to expr = do
1453 (reg, e_code) <- getSomeReg expr
1457 ((if signed then SRA else SRL)
1458 reg (RIImm (ImmInt 0)) dst)
1459 return (Any to code)
1460 conversionNop new_rep expr
1461 = do e_code <- getRegister expr
1462 return (swizzleRegisterRep e_code new_rep)
1464 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1466 MO_Eq F32 -> condFltReg EQQ x y
1467 MO_Ne F32 -> condFltReg NE x y
1469 MO_S_Gt F32 -> condFltReg GTT x y
1470 MO_S_Ge F32 -> condFltReg GE x y
1471 MO_S_Lt F32 -> condFltReg LTT x y
1472 MO_S_Le F32 -> condFltReg LE x y
1474 MO_Eq F64 -> condFltReg EQQ x y
1475 MO_Ne F64 -> condFltReg NE x y
1477 MO_S_Gt F64 -> condFltReg GTT x y
1478 MO_S_Ge F64 -> condFltReg GE x y
1479 MO_S_Lt F64 -> condFltReg LTT x y
1480 MO_S_Le F64 -> condFltReg LE x y
1482 MO_Eq rep -> condIntReg EQQ x y
1483 MO_Ne rep -> condIntReg NE x y
1485 MO_S_Gt rep -> condIntReg GTT x y
1486 MO_S_Ge rep -> condIntReg GE x y
1487 MO_S_Lt rep -> condIntReg LTT x y
1488 MO_S_Le rep -> condIntReg LE x y
1490 MO_U_Gt I32 -> condIntReg GTT x y
1491 MO_U_Ge I32 -> condIntReg GE x y
1492 MO_U_Lt I32 -> condIntReg LTT x y
1493 MO_U_Le I32 -> condIntReg LE x y
1495 MO_U_Gt I16 -> condIntReg GU x y
1496 MO_U_Ge I16 -> condIntReg GEU x y
1497 MO_U_Lt I16 -> condIntReg LU x y
1498 MO_U_Le I16 -> condIntReg LEU x y
1500 MO_Add I32 -> trivialCode I32 (ADD False False) x y
1501 MO_Sub I32 -> trivialCode I32 (SUB False False) x y
1503 MO_S_MulMayOflo rep -> imulMayOflo rep x y
1505 -- ToDo: teach about V8+ SPARC div instructions
1506 MO_S_Quot I32 -> idiv FSLIT(".div") x y
1507 MO_S_Rem I32 -> idiv FSLIT(".rem") x y
1508 MO_U_Quot I32 -> idiv FSLIT(".udiv") x y
1509 MO_U_Rem I32 -> idiv FSLIT(".urem") x y
1511 MO_Add F32 -> trivialFCode F32 FADD x y
1512 MO_Sub F32 -> trivialFCode F32 FSUB x y
1513 MO_Mul F32 -> trivialFCode F32 FMUL x y
1514 MO_S_Quot F32 -> trivialFCode F32 FDIV x y
1516 MO_Add F64 -> trivialFCode F64 FADD x y
1517 MO_Sub F64 -> trivialFCode F64 FSUB x y
1518 MO_Mul F64 -> trivialFCode F64 FMUL x y
1519 MO_S_Quot F64 -> trivialFCode F64 FDIV x y
1521 MO_And rep -> trivialCode rep (AND False) x y
1522 MO_Or rep -> trivialCode rep (OR False) x y
1523 MO_Xor rep -> trivialCode rep (XOR False) x y
1525 MO_Mul rep -> trivialCode rep (SMUL False) x y
1527 MO_Shl rep -> trivialCode rep SLL x y
1528 MO_U_Shr rep -> trivialCode rep SRL x y
1529 MO_S_Shr rep -> trivialCode rep SRA x y
1532 MO_F32_Pwr -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1533 [promote x, promote y])
1534 where promote x = CmmMachOp MO_F32_to_Dbl [x]
1535 MO_F64_Pwr -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1538 other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
1540 --idiv fn x y = getRegister (StCall (Left fn) CCallConv I32 [x, y])
1542 --------------------
1543 imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1544 imulMayOflo rep a b = do
1545 (a_reg, a_code) <- getSomeReg a
1546 (b_reg, b_code) <- getSomeReg b
1547 res_lo <- getNewRegNat I32
1548 res_hi <- getNewRegNat I32
1550 shift_amt = case rep of
1553 _ -> panic "shift_amt"
1554 code dst = a_code `appOL` b_code `appOL`
1556 SMUL False a_reg (RIReg b_reg) res_lo,
1558 SRA res_lo (RIImm (ImmInt shift_amt)) res_lo,
1559 SUB False False res_lo (RIReg res_hi) dst
1561 return (Any I32 code)
1563 getRegister (CmmLoad mem pk) = do
1564 Amode src code <- getAmode mem
1566 code__2 dst = code `snocOL` LD pk src dst
1567 return (Any pk code__2)
1569 getRegister (CmmLit (CmmInt i _))
1572 src = ImmInt (fromInteger i)
1573 code dst = unitOL (OR False g0 (RIImm src) dst)
1575 return (Any I32 code)
1577 getRegister (CmmLit lit)
1578 = let rep = cmmLitRep lit
1582 OR False dst (RIImm (LO imm)) dst]
1583 in return (Any I32 code)
1585 #endif /* sparc_TARGET_ARCH */
1587 #if powerpc_TARGET_ARCH
1588 getRegister (CmmLoad mem pk)
1591 Amode addr addr_code <- getAmode mem
1592 let code dst = ASSERT((regClass dst == RcDouble) == isFloatingRep pk)
1593 addr_code `snocOL` LD pk dst addr
1594 return (Any pk code)
1596 -- catch simple cases of zero- or sign-extended load
1597 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad mem _]) = do
1598 Amode addr addr_code <- getAmode mem
1599 return (Any I32 (\dst -> addr_code `snocOL` LD I8 dst addr))
1601 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
1603 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad mem _]) = do
1604 Amode addr addr_code <- getAmode mem
1605 return (Any I32 (\dst -> addr_code `snocOL` LD I16 dst addr))
1607 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad mem _]) = do
1608 Amode addr addr_code <- getAmode mem
1609 return (Any I32 (\dst -> addr_code `snocOL` LA I16 dst addr))
1611 getRegister (CmmMachOp mop [x]) -- unary MachOps
1613 MO_Not rep -> trivialUCode rep NOT x
1615 MO_S_Conv F64 F32 -> trivialUCode F32 FRSP x
1616 MO_S_Conv F32 F64 -> conversionNop F64 x
1619 | from == to -> conversionNop to x
1620 | isFloatingRep from -> coerceFP2Int from to x
1621 | isFloatingRep to -> coerceInt2FP from to x
1623 -- narrowing is a nop: we treat the high bits as undefined
1624 MO_S_Conv I32 to -> conversionNop to x
1625 MO_S_Conv I16 I8 -> conversionNop I8 x
1626 MO_S_Conv I8 to -> trivialUCode to (EXTS I8) x
1627 MO_S_Conv I16 to -> trivialUCode to (EXTS I16) x
1630 | from == to -> conversionNop to x
1631 -- narrowing is a nop: we treat the high bits as undefined
1632 MO_U_Conv I32 to -> conversionNop to x
1633 MO_U_Conv I16 I8 -> conversionNop I8 x
1634 MO_U_Conv I8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 I32))
1635 MO_U_Conv I16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 I32))
1637 MO_S_Neg F32 -> trivialUCode F32 FNEG x
1638 MO_S_Neg F64 -> trivialUCode F64 FNEG x
1639 MO_S_Neg rep -> trivialUCode rep NEG x
1642 conversionNop new_rep expr
1643 = do e_code <- getRegister expr
1644 return (swizzleRegisterRep e_code new_rep)
1646 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1648 MO_Eq F32 -> condFltReg EQQ x y
1649 MO_Ne F32 -> condFltReg NE x y
1651 MO_S_Gt F32 -> condFltReg GTT x y
1652 MO_S_Ge F32 -> condFltReg GE x y
1653 MO_S_Lt F32 -> condFltReg LTT x y
1654 MO_S_Le F32 -> condFltReg LE x y
1656 MO_Eq F64 -> condFltReg EQQ x y
1657 MO_Ne F64 -> condFltReg NE x y
1659 MO_S_Gt F64 -> condFltReg GTT x y
1660 MO_S_Ge F64 -> condFltReg GE x y
1661 MO_S_Lt F64 -> condFltReg LTT x y
1662 MO_S_Le F64 -> condFltReg LE x y
1664 MO_Eq rep -> condIntReg EQQ (extendUExpr rep x) (extendUExpr rep y)
1665 MO_Ne rep -> condIntReg NE (extendUExpr rep x) (extendUExpr rep y)
1667 MO_S_Gt rep -> condIntReg GTT (extendSExpr rep x) (extendSExpr rep y)
1668 MO_S_Ge rep -> condIntReg GE (extendSExpr rep x) (extendSExpr rep y)
1669 MO_S_Lt rep -> condIntReg LTT (extendSExpr rep x) (extendSExpr rep y)
1670 MO_S_Le rep -> condIntReg LE (extendSExpr rep x) (extendSExpr rep y)
1672 MO_U_Gt rep -> condIntReg GU (extendUExpr rep x) (extendUExpr rep y)
1673 MO_U_Ge rep -> condIntReg GEU (extendUExpr rep x) (extendUExpr rep y)
1674 MO_U_Lt rep -> condIntReg LU (extendUExpr rep x) (extendUExpr rep y)
1675 MO_U_Le rep -> condIntReg LEU (extendUExpr rep x) (extendUExpr rep y)
1677 MO_Add F32 -> trivialCodeNoImm F32 (FADD F32) x y
1678 MO_Sub F32 -> trivialCodeNoImm F32 (FSUB F32) x y
1679 MO_Mul F32 -> trivialCodeNoImm F32 (FMUL F32) x y
1680 MO_S_Quot F32 -> trivialCodeNoImm F32 (FDIV F32) x y
1682 MO_Add F64 -> trivialCodeNoImm F64 (FADD F64) x y
1683 MO_Sub F64 -> trivialCodeNoImm F64 (FSUB F64) x y
1684 MO_Mul F64 -> trivialCodeNoImm F64 (FMUL F64) x y
1685 MO_S_Quot F64 -> trivialCodeNoImm F64 (FDIV F64) x y
1687 -- optimize addition with 32-bit immediate
1691 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate I32 True (-imm)
1692 -> trivialCode I32 True ADD x (CmmLit $ CmmInt imm immrep)
1695 (src, srcCode) <- getSomeReg x
1696 let imm = litToImm lit
1697 code dst = srcCode `appOL` toOL [
1698 ADDIS dst src (HA imm),
1699 ADD dst dst (RIImm (LO imm))
1701 return (Any I32 code)
1702 _ -> trivialCode I32 True ADD x y
1704 MO_Add rep -> trivialCode rep True ADD x y
1706 case y of -- subfi ('substract from' with immediate) doesn't exist
1707 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
1708 -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
1709 _ -> trivialCodeNoImm rep SUBF y x
1711 MO_Mul rep -> trivialCode rep True MULLW x y
1713 MO_S_MulMayOflo I32 -> trivialCodeNoImm I32 MULLW_MayOflo x y
1715 MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= I32): not implemented"
1716 MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
1718 MO_S_Quot rep -> trivialCodeNoImm rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1719 MO_U_Quot rep -> trivialCodeNoImm rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1721 MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1722 MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1724 MO_And rep -> trivialCode rep False AND x y
1725 MO_Or rep -> trivialCode rep False OR x y
1726 MO_Xor rep -> trivialCode rep False XOR x y
1728 MO_Shl rep -> trivialCode rep False SLW x y
1729 MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
1730 MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
1732 getRegister (CmmLit (CmmInt i rep))
1733 | Just imm <- makeImmediate rep True i
1735 code dst = unitOL (LI dst imm)
1737 return (Any rep code)
1739 getRegister (CmmLit (CmmFloat f frep)) = do
1740 lbl <- getNewLabelNat
1741 dflags <- getDynFlagsNat
1742 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
1743 Amode addr addr_code <- getAmode dynRef
1745 LDATA ReadOnlyData [CmmDataLabel lbl,
1746 CmmStaticLit (CmmFloat f frep)]
1747 `consOL` (addr_code `snocOL` LD frep dst addr)
1748 return (Any frep code)
1750 getRegister (CmmLit lit)
1751 = let rep = cmmLitRep lit
1755 OR dst dst (RIImm (LO imm))
1757 in return (Any rep code)
1759 getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
1761 -- extend?Rep: wrap integer expression of type rep
1762 -- in a conversion to I32
1763 extendSExpr I32 x = x
1764 extendSExpr rep x = CmmMachOp (MO_S_Conv rep I32) [x]
1765 extendUExpr I32 x = x
1766 extendUExpr rep x = CmmMachOp (MO_U_Conv rep I32) [x]
1768 #endif /* powerpc_TARGET_ARCH */
1771 -- -----------------------------------------------------------------------------
1772 -- The 'Amode' type: Memory addressing modes passed up the tree.
1774 data Amode = Amode AddrMode InstrBlock
1777 Now, given a tree (the argument to an CmmLoad) that references memory,
1778 produce a suitable addressing mode.
1780 A Rule of the Game (tm) for Amodes: use of the addr bit must
1781 immediately follow use of the code part, since the code part puts
1782 values in registers which the addr then refers to. So you can't put
1783 anything in between, lest it overwrite some of those registers. If
1784 you need to do some other computation between the code part and use of
1785 the addr bit, first store the effective address from the amode in a
1786 temporary, then do the other computation, and then use the temporary:
1790 ... other computation ...
1794 getAmode :: CmmExpr -> NatM Amode
1795 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
1797 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1799 #if alpha_TARGET_ARCH
1801 getAmode (StPrim IntSubOp [x, StInt i])
1802 = getNewRegNat PtrRep `thenNat` \ tmp ->
1803 getRegister x `thenNat` \ register ->
1805 code = registerCode register tmp
1806 reg = registerName register tmp
1807 off = ImmInt (-(fromInteger i))
1809 return (Amode (AddrRegImm reg off) code)
1811 getAmode (StPrim IntAddOp [x, StInt i])
1812 = getNewRegNat PtrRep `thenNat` \ tmp ->
1813 getRegister x `thenNat` \ register ->
1815 code = registerCode register tmp
1816 reg = registerName register tmp
1817 off = ImmInt (fromInteger i)
1819 return (Amode (AddrRegImm reg off) code)
1823 = return (Amode (AddrImm imm__2) id)
1826 imm__2 = case imm of Just x -> x
1829 = getNewRegNat PtrRep `thenNat` \ tmp ->
1830 getRegister other `thenNat` \ register ->
1832 code = registerCode register tmp
1833 reg = registerName register tmp
1835 return (Amode (AddrReg reg) code)
1837 #endif /* alpha_TARGET_ARCH */
1839 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1841 #if x86_64_TARGET_ARCH
1843 getAmode (CmmMachOp (MO_Add I64) [CmmReg (CmmGlobal PicBaseReg),
1844 CmmLit displacement])
1845 = return $ Amode (ripRel (litToImm displacement)) nilOL
1849 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1851 -- This is all just ridiculous, since it carefully undoes
1852 -- what mangleIndexTree has just done.
1853 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
1854 | not (is64BitLit lit)
1855 -- ASSERT(rep == I32)???
1856 = do (x_reg, x_code) <- getSomeReg x
1857 let off = ImmInt (-(fromInteger i))
1858 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1860 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
1861 | not (is64BitLit lit)
1862 -- ASSERT(rep == I32)???
1863 = do (x_reg, x_code) <- getSomeReg x
1864 let off = ImmInt (fromInteger i)
1865 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1867 -- Turn (lit1 << n + lit2) into (lit2 + lit1 << n) so it will be
1868 -- recognised by the next rule.
1869 getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1871 = getAmode (CmmMachOp (MO_Add rep) [b,a])
1873 getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
1874 [y, CmmLit (CmmInt shift _)]])
1875 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1876 = x86_complex_amode x y shift 0
1878 getAmode (CmmMachOp (MO_Add rep)
1879 [x, CmmMachOp (MO_Add _)
1880 [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)],
1881 CmmLit (CmmInt offset _)]])
1882 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1883 && not (is64BitInteger offset)
1884 = x86_complex_amode x y shift offset
1886 getAmode (CmmMachOp (MO_Add rep) [x,y])
1887 = x86_complex_amode x y 0 0
1889 getAmode (CmmLit lit) | not (is64BitLit lit)
1890 = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1893 (reg,code) <- getSomeReg expr
1894 return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
1897 x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
1898 x86_complex_amode base index shift offset
1899 = do (x_reg, x_code) <- getNonClobberedReg base
1900 -- x must be in a temp, because it has to stay live over y_code
1901 -- we could compre x_reg and y_reg and do something better here...
1902 (y_reg, y_code) <- getSomeReg index
1904 code = x_code `appOL` y_code
1905 base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
1906 return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset)))
1909 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1911 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1913 #if sparc_TARGET_ARCH
1915 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)])
1918 (reg, code) <- getSomeReg x
1920 off = ImmInt (-(fromInteger i))
1921 return (Amode (AddrRegImm reg off) code)
1924 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)])
1927 (reg, code) <- getSomeReg x
1929 off = ImmInt (fromInteger i)
1930 return (Amode (AddrRegImm reg off) code)
1932 getAmode (CmmMachOp (MO_Add rep) [x, y])
1934 (regX, codeX) <- getSomeReg x
1935 (regY, codeY) <- getSomeReg y
1937 code = codeX `appOL` codeY
1938 return (Amode (AddrRegReg regX regY) code)
1940 -- XXX Is this same as "leaf" in Stix?
1941 getAmode (CmmLit lit)
1943 tmp <- getNewRegNat I32
1945 code = unitOL (SETHI (HI imm__2) tmp)
1946 return (Amode (AddrRegImm tmp (LO imm__2)) code)
1948 imm__2 = litToImm lit
1952 (reg, code) <- getSomeReg other
1955 return (Amode (AddrRegImm reg off) code)
1957 #endif /* sparc_TARGET_ARCH */
1959 #ifdef powerpc_TARGET_ARCH
1960 getAmode (CmmMachOp (MO_Sub I32) [x, CmmLit (CmmInt i _)])
1961 | Just off <- makeImmediate I32 True (-i)
1963 (reg, code) <- getSomeReg x
1964 return (Amode (AddrRegImm reg off) code)
1967 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit (CmmInt i _)])
1968 | Just off <- makeImmediate I32 True i
1970 (reg, code) <- getSomeReg x
1971 return (Amode (AddrRegImm reg off) code)
1973 -- optimize addition with 32-bit immediate
1975 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit lit])
1977 tmp <- getNewRegNat I32
1978 (src, srcCode) <- getSomeReg x
1979 let imm = litToImm lit
1980 code = srcCode `snocOL` ADDIS tmp src (HA imm)
1981 return (Amode (AddrRegImm tmp (LO imm)) code)
1983 getAmode (CmmLit lit)
1985 tmp <- getNewRegNat I32
1986 let imm = litToImm lit
1987 code = unitOL (LIS tmp (HA imm))
1988 return (Amode (AddrRegImm tmp (LO imm)) code)
1990 getAmode (CmmMachOp (MO_Add I32) [x, y])
1992 (regX, codeX) <- getSomeReg x
1993 (regY, codeY) <- getSomeReg y
1994 return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
1998 (reg, code) <- getSomeReg other
2001 return (Amode (AddrRegImm reg off) code)
2002 #endif /* powerpc_TARGET_ARCH */
2004 -- -----------------------------------------------------------------------------
2005 -- getOperand: sometimes any operand will do.
2007 -- getNonClobberedOperand: the value of the operand will remain valid across
2008 -- the computation of an arbitrary expression, unless the expression
2009 -- is computed directly into a register which the operand refers to
2010 -- (see trivialCode where this function is used for an example).
2012 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2014 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2015 #if x86_64_TARGET_ARCH
2016 getNonClobberedOperand (CmmLit lit)
2017 | isSuitableFloatingPointLit lit = do
2018 lbl <- getNewLabelNat
2019 let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
2021 return (OpAddr (ripRel (ImmCLbl lbl)), code)
2023 getNonClobberedOperand (CmmLit lit)
2024 | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) =
2025 return (OpImm (litToImm lit), nilOL)
2026 getNonClobberedOperand (CmmLoad mem pk)
2027 | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2028 Amode src mem_code <- getAmode mem
2030 if (amodeCouldBeClobbered src)
2032 tmp <- getNewRegNat wordRep
2033 return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
2034 unitOL (LEA I32 (OpAddr src) (OpReg tmp)))
2037 return (OpAddr src', save_code `appOL` mem_code)
2038 getNonClobberedOperand e = do
2039 (reg, code) <- getNonClobberedReg e
2040 return (OpReg reg, code)
2042 amodeCouldBeClobbered :: AddrMode -> Bool
2043 amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
2045 regClobbered (RealReg rr) = isFastTrue (freeReg rr)
2046 regClobbered _ = False
2048 -- getOperand: the operand is not required to remain valid across the
2049 -- computation of an arbitrary expression.
2050 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2051 #if x86_64_TARGET_ARCH
2052 getOperand (CmmLit lit)
2053 | isSuitableFloatingPointLit lit = do
2054 lbl <- getNewLabelNat
2055 let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
2057 return (OpAddr (ripRel (ImmCLbl lbl)), code)
2059 getOperand (CmmLit lit)
2060 | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) = do
2061 return (OpImm (litToImm lit), nilOL)
2062 getOperand (CmmLoad mem pk)
2063 | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2064 Amode src mem_code <- getAmode mem
2065 return (OpAddr src, mem_code)
2067 (reg, code) <- getSomeReg e
2068 return (OpReg reg, code)
2070 isOperand :: CmmExpr -> Bool
2071 isOperand (CmmLoad _ _) = True
2072 isOperand (CmmLit lit) = not (is64BitLit lit)
2073 || isSuitableFloatingPointLit lit
2076 -- if we want a floating-point literal as an operand, we can
2077 -- use it directly from memory. However, if the literal is
2078 -- zero, we're better off generating it into a register using
2080 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
2081 isSuitableFloatingPointLit _ = False
2083 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
2084 getRegOrMem (CmmLoad mem pk)
2085 | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2086 Amode src mem_code <- getAmode mem
2087 return (OpAddr src, mem_code)
2089 (reg, code) <- getNonClobberedReg e
2090 return (OpReg reg, code)
2092 #if x86_64_TARGET_ARCH
2093 is64BitLit (CmmInt i I64) = is64BitInteger i
2094 -- assume that labels are in the range 0-2^31-1: this assumes the
2095 -- small memory model (see gcc docs, -mcmodel=small).
2097 is64BitLit x = False
2100 is64BitInteger :: Integer -> Bool
2101 is64BitInteger i = i64 > 0x7fffffff || i64 < -0x80000000
2102 where i64 = fromIntegral i :: Int64
2103 -- a CmmInt is intended to be truncated to the appropriate
2104 -- number of bits, so here we truncate it to Int64. This is
2105 -- important because e.g. -1 as a CmmInt might be either
2106 -- -1 or 18446744073709551615.
2108 -- -----------------------------------------------------------------------------
2109 -- The 'CondCode' type: Condition codes passed up the tree.
2111 data CondCode = CondCode Bool Cond InstrBlock
2113 -- Set up a condition code for a conditional branch.
2115 getCondCode :: CmmExpr -> NatM CondCode
2117 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2119 #if alpha_TARGET_ARCH
2120 getCondCode = panic "MachCode.getCondCode: not on Alphas"
2121 #endif /* alpha_TARGET_ARCH */
2123 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2125 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
2126 -- yes, they really do seem to want exactly the same!
2128 getCondCode (CmmMachOp mop [x, y])
2131 MO_Eq F32 -> condFltCode EQQ x y
2132 MO_Ne F32 -> condFltCode NE x y
2134 MO_S_Gt F32 -> condFltCode GTT x y
2135 MO_S_Ge F32 -> condFltCode GE x y
2136 MO_S_Lt F32 -> condFltCode LTT x y
2137 MO_S_Le F32 -> condFltCode LE x y
2139 MO_Eq F64 -> condFltCode EQQ x y
2140 MO_Ne F64 -> condFltCode NE x y
2142 MO_S_Gt F64 -> condFltCode GTT x y
2143 MO_S_Ge F64 -> condFltCode GE x y
2144 MO_S_Lt F64 -> condFltCode LTT x y
2145 MO_S_Le F64 -> condFltCode LE x y
2147 MO_Eq rep -> condIntCode EQQ x y
2148 MO_Ne rep -> condIntCode NE x y
2150 MO_S_Gt rep -> condIntCode GTT x y
2151 MO_S_Ge rep -> condIntCode GE x y
2152 MO_S_Lt rep -> condIntCode LTT x y
2153 MO_S_Le rep -> condIntCode LE x y
2155 MO_U_Gt rep -> condIntCode GU x y
2156 MO_U_Ge rep -> condIntCode GEU x y
2157 MO_U_Lt rep -> condIntCode LU x y
2158 MO_U_Le rep -> condIntCode LEU x y
2160 other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y]))
2162 getCondCode other = pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
2164 #elif powerpc_TARGET_ARCH
2166 -- almost the same as everywhere else - but we need to
2167 -- extend small integers to 32 bit first
2169 getCondCode (CmmMachOp mop [x, y])
2171 MO_Eq F32 -> condFltCode EQQ x y
2172 MO_Ne F32 -> condFltCode NE x y
2174 MO_S_Gt F32 -> condFltCode GTT x y
2175 MO_S_Ge F32 -> condFltCode GE x y
2176 MO_S_Lt F32 -> condFltCode LTT x y
2177 MO_S_Le F32 -> condFltCode LE x y
2179 MO_Eq F64 -> condFltCode EQQ x y
2180 MO_Ne F64 -> condFltCode NE x y
2182 MO_S_Gt F64 -> condFltCode GTT x y
2183 MO_S_Ge F64 -> condFltCode GE x y
2184 MO_S_Lt F64 -> condFltCode LTT x y
2185 MO_S_Le F64 -> condFltCode LE x y
2187 MO_Eq rep -> condIntCode EQQ (extendUExpr rep x) (extendUExpr rep y)
2188 MO_Ne rep -> condIntCode NE (extendUExpr rep x) (extendUExpr rep y)
2190 MO_S_Gt rep -> condIntCode GTT (extendSExpr rep x) (extendSExpr rep y)
2191 MO_S_Ge rep -> condIntCode GE (extendSExpr rep x) (extendSExpr rep y)
2192 MO_S_Lt rep -> condIntCode LTT (extendSExpr rep x) (extendSExpr rep y)
2193 MO_S_Le rep -> condIntCode LE (extendSExpr rep x) (extendSExpr rep y)
2195 MO_U_Gt rep -> condIntCode GU (extendUExpr rep x) (extendUExpr rep y)
2196 MO_U_Ge rep -> condIntCode GEU (extendUExpr rep x) (extendUExpr rep y)
2197 MO_U_Lt rep -> condIntCode LU (extendUExpr rep x) (extendUExpr rep y)
2198 MO_U_Le rep -> condIntCode LEU (extendUExpr rep x) (extendUExpr rep y)
2200 other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
2202 getCondCode other = panic "getCondCode(2)(powerpc)"
2208 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
2209 -- passed back up the tree.
2211 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
2213 #if alpha_TARGET_ARCH
2214 condIntCode = panic "MachCode.condIntCode: not on Alphas"
2215 condFltCode = panic "MachCode.condFltCode: not on Alphas"
2216 #endif /* alpha_TARGET_ARCH */
2218 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2219 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2221 -- memory vs immediate
2222 condIntCode cond (CmmLoad x pk) (CmmLit lit) | not (is64BitLit lit) = do
2223 Amode x_addr x_code <- getAmode x
2226 code = x_code `snocOL`
2227 CMP pk (OpImm imm) (OpAddr x_addr)
2229 return (CondCode False cond code)
2231 -- anything vs zero, using a mask
2232 -- TODO: Add some sanity checking!!!!
2233 condIntCode cond (CmmMachOp (MO_And rep) [x,o2]) (CmmLit (CmmInt 0 pk))
2234 | (CmmLit (CmmInt mask pk2)) <- o2
2236 (x_reg, x_code) <- getSomeReg x
2238 code = x_code `snocOL`
2239 TEST pk (OpImm (ImmInteger mask)) (OpReg x_reg)
2241 return (CondCode False cond code)
2244 condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
2245 (x_reg, x_code) <- getSomeReg x
2247 code = x_code `snocOL`
2248 TEST pk (OpReg x_reg) (OpReg x_reg)
2250 return (CondCode False cond code)
2252 -- anything vs operand
2253 condIntCode cond x y | isOperand y = do
2254 (x_reg, x_code) <- getNonClobberedReg x
2255 (y_op, y_code) <- getOperand y
2257 code = x_code `appOL` y_code `snocOL`
2258 CMP (cmmExprRep x) y_op (OpReg x_reg)
2260 return (CondCode False cond code)
2262 -- anything vs anything
2263 condIntCode cond x y = do
2264 (y_reg, y_code) <- getNonClobberedReg y
2265 (x_op, x_code) <- getRegOrMem x
2267 code = y_code `appOL`
2269 CMP (cmmExprRep x) (OpReg y_reg) x_op
2271 return (CondCode False cond code)
2274 #if i386_TARGET_ARCH
2275 condFltCode cond x y
2276 = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
2277 (x_reg, x_code) <- getNonClobberedReg x
2278 (y_reg, y_code) <- getSomeReg y
2280 code = x_code `appOL` y_code `snocOL`
2281 GCMP cond x_reg y_reg
2282 -- The GCMP insn does the test and sets the zero flag if comparable
2283 -- and true. Hence we always supply EQQ as the condition to test.
2284 return (CondCode True EQQ code)
2285 #endif /* i386_TARGET_ARCH */
2287 #if x86_64_TARGET_ARCH
2288 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
2289 -- an operand, but the right must be a reg. We can probably do better
2290 -- than this general case...
2291 condFltCode cond x y = do
2292 (x_reg, x_code) <- getNonClobberedReg x
2293 (y_op, y_code) <- getOperand y
2295 code = x_code `appOL`
2297 CMP (cmmExprRep x) y_op (OpReg x_reg)
2298 -- NB(1): we need to use the unsigned comparison operators on the
2299 -- result of this comparison.
2301 return (CondCode True (condToUnsigned cond) code)
2304 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2306 #if sparc_TARGET_ARCH
2308 condIntCode cond x (CmmLit (CmmInt y rep))
2311 (src1, code) <- getSomeReg x
2313 src2 = ImmInt (fromInteger y)
2314 code' = code `snocOL` SUB False True src1 (RIImm src2) g0
2315 return (CondCode False cond code')
2317 condIntCode cond x y = do
2318 (src1, code1) <- getSomeReg x
2319 (src2, code2) <- getSomeReg y
2321 code__2 = code1 `appOL` code2 `snocOL`
2322 SUB False True src1 (RIReg src2) g0
2323 return (CondCode False cond code__2)
2326 condFltCode cond x y = do
2327 (src1, code1) <- getSomeReg x
2328 (src2, code2) <- getSomeReg y
2329 tmp <- getNewRegNat F64
2331 promote x = FxTOy F32 F64 x tmp
2338 code1 `appOL` code2 `snocOL`
2339 FCMP True pk1 src1 src2
2340 else if pk1 == F32 then
2341 code1 `snocOL` promote src1 `appOL` code2 `snocOL`
2342 FCMP True F64 tmp src2
2344 code1 `appOL` code2 `snocOL` promote src2 `snocOL`
2345 FCMP True F64 src1 tmp
2346 return (CondCode True cond code__2)
2348 #endif /* sparc_TARGET_ARCH */
2350 #if powerpc_TARGET_ARCH
2351 -- ###FIXME: I16 and I8!
2352 condIntCode cond x (CmmLit (CmmInt y rep))
2353 | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
2355 (src1, code) <- getSomeReg x
2357 code' = code `snocOL`
2358 (if condUnsigned cond then CMPL else CMP) I32 src1 (RIImm src2)
2359 return (CondCode False cond code')
2361 condIntCode cond x y = do
2362 (src1, code1) <- getSomeReg x
2363 (src2, code2) <- getSomeReg y
2365 code' = code1 `appOL` code2 `snocOL`
2366 (if condUnsigned cond then CMPL else CMP) I32 src1 (RIReg src2)
2367 return (CondCode False cond code')
2369 condFltCode cond x y = do
2370 (src1, code1) <- getSomeReg x
2371 (src2, code2) <- getSomeReg y
2373 code' = code1 `appOL` code2 `snocOL` FCMP src1 src2
2374 code'' = case cond of -- twiddle CR to handle unordered case
2375 GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
2376 LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
2379 ltbit = 0 ; eqbit = 2 ; gtbit = 1
2380 return (CondCode True cond code'')
2382 #endif /* powerpc_TARGET_ARCH */
2384 -- -----------------------------------------------------------------------------
2385 -- Generating assignments
2387 -- Assignments are really at the heart of the whole code generation
2388 -- business. Almost all top-level nodes of any real importance are
2389 -- assignments, which correspond to loads, stores, or register
2390 -- transfers. If we're really lucky, some of the register transfers
2391 -- will go away, because we can use the destination register to
2392 -- complete the code generation for the right hand side. This only
2393 -- fails when the right hand side is forced into a fixed register
2394 -- (e.g. the result of a call).
2396 assignMem_IntCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2397 assignReg_IntCode :: MachRep -> CmmReg -> CmmExpr -> NatM InstrBlock
2399 assignMem_FltCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2400 assignReg_FltCode :: MachRep -> CmmReg -> CmmExpr -> NatM InstrBlock
2402 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2404 #if alpha_TARGET_ARCH
2406 assignIntCode pk (CmmLoad dst _) src
2407 = getNewRegNat IntRep `thenNat` \ tmp ->
2408 getAmode dst `thenNat` \ amode ->
2409 getRegister src `thenNat` \ register ->
2411 code1 = amodeCode amode []
2412 dst__2 = amodeAddr amode
2413 code2 = registerCode register tmp []
2414 src__2 = registerName register tmp
2415 sz = primRepToSize pk
2416 code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2420 assignIntCode pk dst src
2421 = getRegister dst `thenNat` \ register1 ->
2422 getRegister src `thenNat` \ register2 ->
2424 dst__2 = registerName register1 zeroh
2425 code = registerCode register2 dst__2
2426 src__2 = registerName register2 dst__2
2427 code__2 = if isFixed register2
2428 then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2)
2433 #endif /* alpha_TARGET_ARCH */
2435 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2437 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2439 -- integer assignment to memory
2441 -- specific case of adding/subtracting an integer to a particular address.
2442 -- ToDo: catch other cases where we can use an operation directly on a memory
2444 assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _,
2445 CmmLit (CmmInt i _)])
2446 | addr == addr2, pk /= I64 || not (is64BitInteger i),
2447 Just instr <- check op
2448 = do Amode amode code_addr <- getAmode addr
2449 let code = code_addr `snocOL`
2450 instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode)
2453 check (MO_Add _) = Just ADD
2454 check (MO_Sub _) = Just SUB
2459 assignMem_IntCode pk addr src = do
2460 Amode addr code_addr <- getAmode addr
2461 (code_src, op_src) <- get_op_RI src
2463 code = code_src `appOL`
2465 MOV pk op_src (OpAddr addr)
2466 -- NOTE: op_src is stable, so it will still be valid
2467 -- after code_addr. This may involve the introduction
2468 -- of an extra MOV to a temporary register, but we hope
2469 -- the register allocator will get rid of it.
2473 get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand) -- code, operator
2474 get_op_RI (CmmLit lit) | not (is64BitLit lit)
2475 = return (nilOL, OpImm (litToImm lit))
2477 = do (reg,code) <- getNonClobberedReg op
2478 return (code, OpReg reg)
2481 -- Assign; dst is a reg, rhs is mem
2482 assignReg_IntCode pk reg (CmmLoad src _) = do
2483 load_code <- intLoadCode (MOV pk) src
2484 return (load_code (getRegisterReg reg))
2486 -- dst is a reg, but src could be anything
2487 assignReg_IntCode pk reg src = do
2488 code <- getAnyReg src
2489 return (code (getRegisterReg reg))
2491 #endif /* i386_TARGET_ARCH */
2493 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2495 #if sparc_TARGET_ARCH
2497 assignMem_IntCode pk addr src = do
2498 (srcReg, code) <- getSomeReg src
2499 Amode dstAddr addr_code <- getAmode addr
2500 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2502 assignReg_IntCode pk reg src = do
2503 r <- getRegister src
2505 Any _ code -> code dst
2506 Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg dst) freg
2508 dst = getRegisterReg reg
2511 #endif /* sparc_TARGET_ARCH */
2513 #if powerpc_TARGET_ARCH
2515 assignMem_IntCode pk addr src = do
2516 (srcReg, code) <- getSomeReg src
2517 Amode dstAddr addr_code <- getAmode addr
2518 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2520 -- dst is a reg, but src could be anything
2521 assignReg_IntCode pk reg src
2523 r <- getRegister src
2525 Any _ code -> code dst
2526 Fixed _ freg fcode -> fcode `snocOL` MR dst freg
2528 dst = getRegisterReg reg
2530 #endif /* powerpc_TARGET_ARCH */
2533 -- -----------------------------------------------------------------------------
2534 -- Floating-point assignments
2536 #if alpha_TARGET_ARCH
2538 assignFltCode pk (CmmLoad dst _) src
2539 = getNewRegNat pk `thenNat` \ tmp ->
2540 getAmode dst `thenNat` \ amode ->
2541 getRegister src `thenNat` \ register ->
2543 code1 = amodeCode amode []
2544 dst__2 = amodeAddr amode
2545 code2 = registerCode register tmp []
2546 src__2 = registerName register tmp
2547 sz = primRepToSize pk
2548 code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2552 assignFltCode pk dst src
2553 = getRegister dst `thenNat` \ register1 ->
2554 getRegister src `thenNat` \ register2 ->
2556 dst__2 = registerName register1 zeroh
2557 code = registerCode register2 dst__2
2558 src__2 = registerName register2 dst__2
2559 code__2 = if isFixed register2
2560 then code . mkSeqInstr (FMOV src__2 dst__2)
2565 #endif /* alpha_TARGET_ARCH */
2567 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2569 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2571 -- Floating point assignment to memory
2572 assignMem_FltCode pk addr src = do
2573 (src_reg, src_code) <- getNonClobberedReg src
2574 Amode addr addr_code <- getAmode addr
2576 code = src_code `appOL`
2578 IF_ARCH_i386(GST pk src_reg addr,
2579 MOV pk (OpReg src_reg) (OpAddr addr))
2582 -- Floating point assignment to a register/temporary
2583 assignReg_FltCode pk reg src = do
2584 src_code <- getAnyReg src
2585 return (src_code (getRegisterReg reg))
2587 #endif /* i386_TARGET_ARCH */
2589 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2591 #if sparc_TARGET_ARCH
2593 -- Floating point assignment to memory
2594 assignMem_FltCode pk addr src = do
2595 Amode dst__2 code1 <- getAmode addr
2596 (src__2, code2) <- getSomeReg src
2597 tmp1 <- getNewRegNat pk
2599 pk__2 = cmmExprRep src
2600 code__2 = code1 `appOL` code2 `appOL`
2602 then unitOL (ST pk src__2 dst__2)
2603 else toOL [FxTOy pk__2 pk src__2 tmp1, ST pk tmp1 dst__2]
2606 -- Floating point assignment to a register/temporary
2607 -- ToDo: Verify correctness
2608 assignReg_FltCode pk reg src = do
2609 r <- getRegister src
2610 v1 <- getNewRegNat pk
2612 Any _ code -> code dst
2613 Fixed _ freg fcode -> fcode `snocOL` FMOV pk freg v1
2615 dst = getRegisterReg reg
2617 #endif /* sparc_TARGET_ARCH */
2619 #if powerpc_TARGET_ARCH
2622 assignMem_FltCode = assignMem_IntCode
2623 assignReg_FltCode = assignReg_IntCode
2625 #endif /* powerpc_TARGET_ARCH */
2628 -- -----------------------------------------------------------------------------
2629 -- Generating an non-local jump
2631 -- (If applicable) Do not fill the delay slots here; you will confuse the
2632 -- register allocator.
2634 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
2636 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2638 #if alpha_TARGET_ARCH
2640 genJump (CmmLabel lbl)
2641 | isAsmTemp lbl = returnInstr (BR target)
2642 | otherwise = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0]
2644 target = ImmCLbl lbl
2647 = getRegister tree `thenNat` \ register ->
2648 getNewRegNat PtrRep `thenNat` \ tmp ->
2650 dst = registerName register pv
2651 code = registerCode register pv
2652 target = registerName register pv
2654 if isFixed register then
2655 returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0]
2657 return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0))
2659 #endif /* alpha_TARGET_ARCH */
2661 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2663 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2665 genJump (CmmLoad mem pk) = do
2666 Amode target code <- getAmode mem
2667 return (code `snocOL` JMP (OpAddr target))
2669 genJump (CmmLit lit) = do
2670 return (unitOL (JMP (OpImm (litToImm lit))))
2673 (reg,code) <- getSomeReg expr
2674 return (code `snocOL` JMP (OpReg reg))
2676 #endif /* i386_TARGET_ARCH */
2678 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2680 #if sparc_TARGET_ARCH
2682 genJump (CmmLit (CmmLabel lbl))
2683 = return (toOL [CALL (Left target) 0 True, NOP])
2685 target = ImmCLbl lbl
2689 (target, code) <- getSomeReg tree
2690 return (code `snocOL` JMP (AddrRegReg target g0) `snocOL` NOP)
2692 #endif /* sparc_TARGET_ARCH */
2694 #if powerpc_TARGET_ARCH
2695 genJump (CmmLit (CmmLabel lbl))
2696 = return (unitOL $ JMP lbl)
2700 (target,code) <- getSomeReg tree
2701 return (code `snocOL` MTCTR target `snocOL` BCTR [])
2702 #endif /* powerpc_TARGET_ARCH */
2705 -- -----------------------------------------------------------------------------
2706 -- Unconditional branches
2708 genBranch :: BlockId -> NatM InstrBlock
2710 genBranch = return . toOL . mkBranchInstr
2712 -- -----------------------------------------------------------------------------
2713 -- Conditional jumps
2716 Conditional jumps are always to local labels, so we can use branch
2717 instructions. We peek at the arguments to decide what kind of
2720 ALPHA: For comparisons with 0, we're laughing, because we can just do
2721 the desired conditional branch.
2723 I386: First, we have to ensure that the condition
2724 codes are set according to the supplied comparison operation.
2726 SPARC: First, we have to ensure that the condition codes are set
2727 according to the supplied comparison operation. We generate slightly
2728 different code for floating point comparisons, because a floating
2729 point operation cannot directly precede a @BF@. We assume the worst
2730 and fill that slot with a @NOP@.
2732 SPARC: Do not fill the delay slots here; you will confuse the register
2738 :: BlockId -- the branch target
2739 -> CmmExpr -- the condition on which to branch
2742 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2744 #if alpha_TARGET_ARCH
2746 genCondJump id (StPrim op [x, StInt 0])
2747 = getRegister x `thenNat` \ register ->
2748 getNewRegNat (registerRep register)
2751 code = registerCode register tmp
2752 value = registerName register tmp
2753 pk = registerRep register
2754 target = ImmCLbl lbl
2756 returnSeq code [BI (cmpOp op) value target]
2758 cmpOp CharGtOp = GTT
2760 cmpOp CharEqOp = EQQ
2762 cmpOp CharLtOp = LTT
2771 cmpOp WordGeOp = ALWAYS
2772 cmpOp WordEqOp = EQQ
2774 cmpOp WordLtOp = NEVER
2775 cmpOp WordLeOp = EQQ
2777 cmpOp AddrGeOp = ALWAYS
2778 cmpOp AddrEqOp = EQQ
2780 cmpOp AddrLtOp = NEVER
2781 cmpOp AddrLeOp = EQQ
2783 genCondJump lbl (StPrim op [x, StDouble 0.0])
2784 = getRegister x `thenNat` \ register ->
2785 getNewRegNat (registerRep register)
2788 code = registerCode register tmp
2789 value = registerName register tmp
2790 pk = registerRep register
2791 target = ImmCLbl lbl
2793 return (code . mkSeqInstr (BF (cmpOp op) value target))
2795 cmpOp FloatGtOp = GTT
2796 cmpOp FloatGeOp = GE
2797 cmpOp FloatEqOp = EQQ
2798 cmpOp FloatNeOp = NE
2799 cmpOp FloatLtOp = LTT
2800 cmpOp FloatLeOp = LE
2801 cmpOp DoubleGtOp = GTT
2802 cmpOp DoubleGeOp = GE
2803 cmpOp DoubleEqOp = EQQ
2804 cmpOp DoubleNeOp = NE
2805 cmpOp DoubleLtOp = LTT
2806 cmpOp DoubleLeOp = LE
2808 genCondJump lbl (StPrim op [x, y])
2810 = trivialFCode pr instr x y `thenNat` \ register ->
2811 getNewRegNat F64 `thenNat` \ tmp ->
2813 code = registerCode register tmp
2814 result = registerName register tmp
2815 target = ImmCLbl lbl
2817 return (code . mkSeqInstr (BF cond result target))
2819 pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
2821 fltCmpOp op = case op of
2835 (instr, cond) = case op of
2836 FloatGtOp -> (FCMP TF LE, EQQ)
2837 FloatGeOp -> (FCMP TF LTT, EQQ)
2838 FloatEqOp -> (FCMP TF EQQ, NE)
2839 FloatNeOp -> (FCMP TF EQQ, EQQ)
2840 FloatLtOp -> (FCMP TF LTT, NE)
2841 FloatLeOp -> (FCMP TF LE, NE)
2842 DoubleGtOp -> (FCMP TF LE, EQQ)
2843 DoubleGeOp -> (FCMP TF LTT, EQQ)
2844 DoubleEqOp -> (FCMP TF EQQ, NE)
2845 DoubleNeOp -> (FCMP TF EQQ, EQQ)
2846 DoubleLtOp -> (FCMP TF LTT, NE)
2847 DoubleLeOp -> (FCMP TF LE, NE)
2849 genCondJump lbl (StPrim op [x, y])
2850 = trivialCode instr x y `thenNat` \ register ->
2851 getNewRegNat IntRep `thenNat` \ tmp ->
2853 code = registerCode register tmp
2854 result = registerName register tmp
2855 target = ImmCLbl lbl
2857 return (code . mkSeqInstr (BI cond result target))
2859 (instr, cond) = case op of
2860 CharGtOp -> (CMP LE, EQQ)
2861 CharGeOp -> (CMP LTT, EQQ)
2862 CharEqOp -> (CMP EQQ, NE)
2863 CharNeOp -> (CMP EQQ, EQQ)
2864 CharLtOp -> (CMP LTT, NE)
2865 CharLeOp -> (CMP LE, NE)
2866 IntGtOp -> (CMP LE, EQQ)
2867 IntGeOp -> (CMP LTT, EQQ)
2868 IntEqOp -> (CMP EQQ, NE)
2869 IntNeOp -> (CMP EQQ, EQQ)
2870 IntLtOp -> (CMP LTT, NE)
2871 IntLeOp -> (CMP LE, NE)
2872 WordGtOp -> (CMP ULE, EQQ)
2873 WordGeOp -> (CMP ULT, EQQ)
2874 WordEqOp -> (CMP EQQ, NE)
2875 WordNeOp -> (CMP EQQ, EQQ)
2876 WordLtOp -> (CMP ULT, NE)
2877 WordLeOp -> (CMP ULE, NE)
2878 AddrGtOp -> (CMP ULE, EQQ)
2879 AddrGeOp -> (CMP ULT, EQQ)
2880 AddrEqOp -> (CMP EQQ, NE)
2881 AddrNeOp -> (CMP EQQ, EQQ)
2882 AddrLtOp -> (CMP ULT, NE)
2883 AddrLeOp -> (CMP ULE, NE)
2885 #endif /* alpha_TARGET_ARCH */
2887 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2889 #if i386_TARGET_ARCH
2891 genCondJump id bool = do
2892 CondCode _ cond code <- getCondCode bool
2893 return (code `snocOL` JXX cond id)
2897 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2899 #if x86_64_TARGET_ARCH
2901 genCondJump id bool = do
2902 CondCode is_float cond cond_code <- getCondCode bool
2905 return (cond_code `snocOL` JXX cond id)
2907 lbl <- getBlockIdNat
2909 -- see comment with condFltReg
2910 let code = case cond of
2916 plain_test = unitOL (
2919 or_unordered = toOL [
2923 and_ordered = toOL [
2929 return (cond_code `appOL` code)
2933 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2935 #if sparc_TARGET_ARCH
2937 genCondJump (BlockId id) bool = do
2938 CondCode is_float cond code <- getCondCode bool
2943 then [NOP, BF cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2944 else [BI cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2948 #endif /* sparc_TARGET_ARCH */
2951 #if powerpc_TARGET_ARCH
2953 genCondJump id bool = do
2954 CondCode is_float cond code <- getCondCode bool
2955 return (code `snocOL` BCC cond id)
2957 #endif /* powerpc_TARGET_ARCH */
2960 -- -----------------------------------------------------------------------------
2961 -- Generating C calls
2963 -- Now the biggest nightmare---calls. Most of the nastiness is buried in
2964 -- @get_arg@, which moves the arguments to the correct registers/stack
2965 -- locations. Apart from that, the code is easy.
2967 -- (If applicable) Do not fill the delay slots here; you will confuse the
2968 -- register allocator.
2971 :: CmmCallTarget -- function to call
2972 -> CmmFormals -- where to put the result
2973 -> CmmActuals -- arguments (of mixed type)
2976 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2978 #if alpha_TARGET_ARCH
2982 genCCall fn cconv result_regs args
2983 = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args
2984 `thenNat` \ ((unused,_), argCode) ->
2986 nRegs = length allArgRegs - length unused
2987 code = asmSeqThen (map ($ []) argCode)
2990 LDA pv (AddrImm (ImmLab (ptext fn))),
2991 JSR ra (AddrReg pv) nRegs,
2992 LDGP gp (AddrReg ra)]
2994 ------------------------
2995 {- Try to get a value into a specific register (or registers) for
2996 a call. The first 6 arguments go into the appropriate
2997 argument register (separate registers for integer and floating
2998 point arguments, but used in lock-step), and the remaining
2999 arguments are dumped to the stack, beginning at 0(sp). Our
3000 first argument is a pair of the list of remaining argument
3001 registers to be assigned for this call and the next stack
3002 offset to use for overflowing arguments. This way,
3003 @get_Arg@ can be applied to all of a call's arguments using
3007 :: ([(Reg,Reg)], Int) -- Argument registers and stack offset (accumulator)
3008 -> StixTree -- Current argument
3009 -> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code
3011 -- We have to use up all of our argument registers first...
3013 get_arg ((iDst,fDst):dsts, offset) arg
3014 = getRegister arg `thenNat` \ register ->
3016 reg = if isFloatingRep pk then fDst else iDst
3017 code = registerCode register reg
3018 src = registerName register reg
3019 pk = registerRep register
3022 if isFloatingRep pk then
3023 ((dsts, offset), if isFixed register then
3024 code . mkSeqInstr (FMOV src fDst)
3027 ((dsts, offset), if isFixed register then
3028 code . mkSeqInstr (OR src (RIReg src) iDst)
3031 -- Once we have run out of argument registers, we move to the
3034 get_arg ([], offset) arg
3035 = getRegister arg `thenNat` \ register ->
3036 getNewRegNat (registerRep register)
3039 code = registerCode register tmp
3040 src = registerName register tmp
3041 pk = registerRep register
3042 sz = primRepToSize pk
3044 return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset)))
3046 #endif /* alpha_TARGET_ARCH */
3048 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3050 #if i386_TARGET_ARCH
3052 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
3053 -- write barrier compiles to no code on x86/x86-64;
3054 -- we keep it this long in order to prevent earlier optimisations.
3056 -- we only cope with a single result for foreign calls
3057 genCCall (CmmPrim op) [(r,_)] args = do
3059 MO_F32_Sqrt -> actuallyInlineFloatOp F32 (GSQRT F32) args
3060 MO_F64_Sqrt -> actuallyInlineFloatOp F64 (GSQRT F64) args
3062 MO_F32_Sin -> actuallyInlineFloatOp F32 (GSIN F32) args
3063 MO_F64_Sin -> actuallyInlineFloatOp F64 (GSIN F64) args
3065 MO_F32_Cos -> actuallyInlineFloatOp F32 (GCOS F32) args
3066 MO_F64_Cos -> actuallyInlineFloatOp F64 (GCOS F64) args
3068 MO_F32_Tan -> actuallyInlineFloatOp F32 (GTAN F32) args
3069 MO_F64_Tan -> actuallyInlineFloatOp F64 (GTAN F64) args
3071 other_op -> outOfLineFloatOp op r args
3073 actuallyInlineFloatOp rep instr [(x,_)]
3074 = do res <- trivialUFCode rep instr x
3076 return (any (getRegisterReg (CmmLocal r)))
3078 genCCall target dest_regs args = do
3080 sizes = map (arg_size . cmmExprRep . fst) (reverse args)
3081 #if !darwin_TARGET_OS
3082 tot_arg_size = sum sizes
3084 raw_arg_size = sum sizes
3085 tot_arg_size = roundTo 16 raw_arg_size
3086 arg_pad_size = tot_arg_size - raw_arg_size
3087 delta0 <- getDeltaNat
3088 setDeltaNat (delta0 - arg_pad_size)
3091 push_codes <- mapM push_arg (reverse args)
3092 delta <- getDeltaNat
3095 -- deal with static vs dynamic call targets
3096 (callinsns,cconv) <-
3099 CmmCallee (CmmLit (CmmLabel lbl)) conv
3100 -> -- ToDo: stdcall arg sizes
3101 return (unitOL (CALL (Left fn_imm) []), conv)
3102 where fn_imm = ImmCLbl lbl
3104 -> do (dyn_c, dyn_r, dyn_rep) <- get_op expr
3105 ASSERT(dyn_rep == I32)
3106 return (dyn_c `snocOL` CALL (Right dyn_r) [], conv)
3109 #if darwin_TARGET_OS
3111 = toOL [SUB I32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
3112 DELTA (delta0 - arg_pad_size)]
3113 `appOL` concatOL push_codes
3116 = concatOL push_codes
3117 call = callinsns `appOL`
3119 -- Deallocate parameters after call for ccall;
3120 -- but not for stdcall (callee does it)
3121 (if cconv == StdCallConv || tot_arg_size==0 then [] else
3122 [ADD I32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
3124 [DELTA (delta + tot_arg_size)]
3127 setDeltaNat (delta + tot_arg_size)
3130 -- assign the results, if necessary
3131 assign_code [] = nilOL
3132 assign_code [(dest,_hint)] =
3134 I64 -> toOL [MOV I32 (OpReg eax) (OpReg r_dest),
3135 MOV I32 (OpReg edx) (OpReg r_dest_hi)]
3136 F32 -> unitOL (GMOV fake0 r_dest)
3137 F64 -> unitOL (GMOV fake0 r_dest)
3138 rep -> unitOL (MOV rep (OpReg eax) (OpReg r_dest))
3140 r_dest_hi = getHiVRegFromLo r_dest
3141 rep = localRegRep dest
3142 r_dest = getRegisterReg (CmmLocal dest)
3143 assign_code many = panic "genCCall.assign_code many"
3145 return (push_code `appOL`
3147 assign_code dest_regs)
3155 roundTo a x | x `mod` a == 0 = x
3156 | otherwise = x + a - (x `mod` a)
3159 push_arg :: (CmmExpr,MachHint){-current argument-}
3160 -> NatM InstrBlock -- code
3162 push_arg (arg,_hint) -- we don't need the hints on x86
3163 | arg_rep == I64 = do
3164 ChildCode64 code r_lo <- iselExpr64 arg
3165 delta <- getDeltaNat
3166 setDeltaNat (delta - 8)
3168 r_hi = getHiVRegFromLo r_lo
3170 return ( code `appOL`
3171 toOL [PUSH I32 (OpReg r_hi), DELTA (delta - 4),
3172 PUSH I32 (OpReg r_lo), DELTA (delta - 8),
3177 (code, reg, sz) <- get_op arg
3178 delta <- getDeltaNat
3179 let size = arg_size sz
3180 setDeltaNat (delta-size)
3181 if (case sz of F64 -> True; F32 -> True; _ -> False)
3182 then return (code `appOL`
3183 toOL [SUB I32 (OpImm (ImmInt size)) (OpReg esp),
3185 GST sz reg (AddrBaseIndex (EABaseReg esp)
3189 else return (code `snocOL`
3190 PUSH I32 (OpReg reg) `snocOL`
3194 arg_rep = cmmExprRep arg
3197 get_op :: CmmExpr -> NatM (InstrBlock, Reg, MachRep) -- code, reg, size
3199 (reg,code) <- getSomeReg op
3200 return (code, reg, cmmExprRep op)
3202 #endif /* i386_TARGET_ARCH */
3204 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3206 outOfLineFloatOp :: CallishMachOp -> CmmFormalWithoutKind -> CmmActuals
3208 outOfLineFloatOp mop res args
3210 dflags <- getDynFlagsNat
3211 targetExpr <- cmmMakeDynamicReference dflags addImportNat CallReference lbl
3212 let target = CmmCallee targetExpr CCallConv
3214 if localRegRep res == F64
3216 stmtToInstrs (CmmCall target [(res,FloatHint)] args CmmUnsafe CmmMayReturn)
3220 tmp = LocalReg uq F64 GCKindNonPtr
3222 code1 <- stmtToInstrs (CmmCall target [(tmp,FloatHint)] args CmmUnsafe CmmMayReturn)
3223 code2 <- stmtToInstrs (CmmAssign (CmmLocal res) (CmmReg (CmmLocal tmp)))
3224 return (code1 `appOL` code2)
3226 lbl = mkForeignLabel fn Nothing False
3229 MO_F32_Sqrt -> FSLIT("sqrtf")
3230 MO_F32_Sin -> FSLIT("sinf")
3231 MO_F32_Cos -> FSLIT("cosf")
3232 MO_F32_Tan -> FSLIT("tanf")
3233 MO_F32_Exp -> FSLIT("expf")
3234 MO_F32_Log -> FSLIT("logf")
3236 MO_F32_Asin -> FSLIT("asinf")
3237 MO_F32_Acos -> FSLIT("acosf")
3238 MO_F32_Atan -> FSLIT("atanf")
3240 MO_F32_Sinh -> FSLIT("sinhf")
3241 MO_F32_Cosh -> FSLIT("coshf")
3242 MO_F32_Tanh -> FSLIT("tanhf")
3243 MO_F32_Pwr -> FSLIT("powf")
3245 MO_F64_Sqrt -> FSLIT("sqrt")
3246 MO_F64_Sin -> FSLIT("sin")
3247 MO_F64_Cos -> FSLIT("cos")
3248 MO_F64_Tan -> FSLIT("tan")
3249 MO_F64_Exp -> FSLIT("exp")
3250 MO_F64_Log -> FSLIT("log")
3252 MO_F64_Asin -> FSLIT("asin")
3253 MO_F64_Acos -> FSLIT("acos")
3254 MO_F64_Atan -> FSLIT("atan")
3256 MO_F64_Sinh -> FSLIT("sinh")
3257 MO_F64_Cosh -> FSLIT("cosh")
3258 MO_F64_Tanh -> FSLIT("tanh")
3259 MO_F64_Pwr -> FSLIT("pow")
3261 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
3263 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3265 #if x86_64_TARGET_ARCH
3267 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
3268 -- write barrier compiles to no code on x86/x86-64;
3269 -- we keep it this long in order to prevent earlier optimisations.
3271 genCCall (CmmPrim op) [(r,_)] args =
3272 outOfLineFloatOp op r args
3274 genCCall target dest_regs args = do
3276 -- load up the register arguments
3277 (stack_args, aregs, fregs, load_args_code)
3278 <- load_args args allArgRegs allFPArgRegs nilOL
3281 fp_regs_used = reverse (drop (length fregs) (reverse allFPArgRegs))
3282 int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
3283 arg_regs = [eax] ++ int_regs_used ++ fp_regs_used
3284 -- for annotating the call instruction with
3286 sse_regs = length fp_regs_used
3288 tot_arg_size = arg_size * length stack_args
3290 -- On entry to the called function, %rsp should be aligned
3291 -- on a 16-byte boundary +8 (i.e. the first stack arg after
3292 -- the return address is 16-byte aligned). In STG land
3293 -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
3294 -- need to make sure we push a multiple of 16-bytes of args,
3295 -- plus the return address, to get the correct alignment.
3296 -- Urg, this is hard. We need to feed the delta back into
3297 -- the arg pushing code.
3298 (real_size, adjust_rsp) <-
3299 if tot_arg_size `rem` 16 == 0
3300 then return (tot_arg_size, nilOL)
3301 else do -- we need to adjust...
3302 delta <- getDeltaNat
3303 setDeltaNat (delta-8)
3304 return (tot_arg_size+8, toOL [
3305 SUB I64 (OpImm (ImmInt 8)) (OpReg rsp),
3309 -- push the stack args, right to left
3310 push_code <- push_args (reverse stack_args) nilOL
3311 delta <- getDeltaNat
3313 -- deal with static vs dynamic call targets
3314 (callinsns,cconv) <-
3317 CmmCallee (CmmLit (CmmLabel lbl)) conv
3318 -> -- ToDo: stdcall arg sizes
3319 return (unitOL (CALL (Left fn_imm) arg_regs), conv)
3320 where fn_imm = ImmCLbl lbl
3322 -> do (dyn_r, dyn_c) <- getSomeReg expr
3323 return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
3326 -- The x86_64 ABI requires us to set %al to the number of SSE
3327 -- registers that contain arguments, if the called routine
3328 -- is a varargs function. We don't know whether it's a
3329 -- varargs function or not, so we have to assume it is.
3331 -- It's not safe to omit this assignment, even if the number
3332 -- of SSE regs in use is zero. If %al is larger than 8
3333 -- on entry to a varargs function, seg faults ensue.
3334 assign_eax n = unitOL (MOV I32 (OpImm (ImmInt n)) (OpReg eax))
3336 let call = callinsns `appOL`
3338 -- Deallocate parameters after call for ccall;
3339 -- but not for stdcall (callee does it)
3340 (if cconv == StdCallConv || real_size==0 then [] else
3341 [ADD wordRep (OpImm (ImmInt real_size)) (OpReg esp)])
3343 [DELTA (delta + real_size)]
3346 setDeltaNat (delta + real_size)
3349 -- assign the results, if necessary
3350 assign_code [] = nilOL
3351 assign_code [(dest,_hint)] =
3353 F32 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3354 F64 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3355 rep -> unitOL (MOV rep (OpReg rax) (OpReg r_dest))
3357 rep = localRegRep dest
3358 r_dest = getRegisterReg (CmmLocal dest)
3359 assign_code many = panic "genCCall.assign_code many"
3361 return (load_args_code `appOL`
3364 assign_eax sse_regs `appOL`
3366 assign_code dest_regs)
3369 arg_size = 8 -- always, at the mo
3371 load_args :: [(CmmExpr,MachHint)]
3372 -> [Reg] -- int regs avail for args
3373 -> [Reg] -- FP regs avail for args
3375 -> NatM ([(CmmExpr,MachHint)],[Reg],[Reg],InstrBlock)
3376 load_args args [] [] code = return (args, [], [], code)
3377 -- no more regs to use
3378 load_args [] aregs fregs code = return ([], aregs, fregs, code)
3379 -- no more args to push
3380 load_args ((arg,hint) : rest) aregs fregs code
3381 | isFloatingRep arg_rep =
3385 arg_code <- getAnyReg arg
3386 load_args rest aregs rs (code `appOL` arg_code r)
3391 arg_code <- getAnyReg arg
3392 load_args rest rs fregs (code `appOL` arg_code r)
3394 arg_rep = cmmExprRep arg
3397 (args',ars,frs,code') <- load_args rest aregs fregs code
3398 return ((arg,hint):args', ars, frs, code')
3400 push_args [] code = return code
3401 push_args ((arg,hint):rest) code
3402 | isFloatingRep arg_rep = do
3403 (arg_reg, arg_code) <- getSomeReg arg
3404 delta <- getDeltaNat
3405 setDeltaNat (delta-arg_size)
3406 let code' = code `appOL` arg_code `appOL` toOL [
3407 SUB wordRep (OpImm (ImmInt arg_size)) (OpReg rsp) ,
3408 DELTA (delta-arg_size),
3409 MOV arg_rep (OpReg arg_reg) (OpAddr (spRel 0))]
3410 push_args rest code'
3413 -- we only ever generate word-sized function arguments. Promotion
3414 -- has already happened: our Int8# type is kept sign-extended
3415 -- in an Int#, for example.
3416 ASSERT(arg_rep == I64) return ()
3417 (arg_op, arg_code) <- getOperand arg
3418 delta <- getDeltaNat
3419 setDeltaNat (delta-arg_size)
3420 let code' = code `appOL` toOL [PUSH I64 arg_op,
3421 DELTA (delta-arg_size)]
3422 push_args rest code'
3424 arg_rep = cmmExprRep arg
3427 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3429 #if sparc_TARGET_ARCH
3431 The SPARC calling convention is an absolute
3432 nightmare. The first 6x32 bits of arguments are mapped into
3433 %o0 through %o5, and the remaining arguments are dumped to the
3434 stack, beginning at [%sp+92]. (Note that %o6 == %sp.)
3436 If we have to put args on the stack, move %o6==%sp down by
3437 the number of words to go on the stack, to ensure there's enough space.
3439 According to Fraser and Hanson's lcc book, page 478, fig 17.2,
3440 16 words above the stack pointer is a word for the address of
3441 a structure return value. I use this as a temporary location
3442 for moving values from float to int regs. Certainly it isn't
3443 safe to put anything in the 16 words starting at %sp, since
3444 this area can get trashed at any time due to window overflows
3445 caused by signal handlers.
3447 A final complication (if the above isn't enough) is that
3448 we can't blithely calculate the arguments one by one into
3449 %o0 .. %o5. Consider the following nested calls:
3453 Naive code moves a into %o0, and (fff b c) into %o1. Unfortunately
3454 the inner call will itself use %o0, which trashes the value put there
3455 in preparation for the outer call. Upshot: we need to calculate the
3456 args into temporary regs, and move those to arg regs or onto the
3457 stack only immediately prior to the call proper. Sigh.
3460 genCCall target dest_regs argsAndHints = do
3462 args = map fst argsAndHints
3463 argcode_and_vregs <- mapM arg_to_int_vregs args
3465 (argcodes, vregss) = unzip argcode_and_vregs
3466 n_argRegs = length allArgRegs
3467 n_argRegs_used = min (length vregs) n_argRegs
3468 vregs = concat vregss
3469 -- deal with static vs dynamic call targets
3470 callinsns <- (case target of
3471 CmmCallee (CmmLit (CmmLabel lbl)) conv -> do
3472 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3473 CmmCallee expr conv -> do
3474 (dyn_c, [dyn_r]) <- arg_to_int_vregs expr
3475 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3477 (res, reduce) <- outOfLineFloatOp mop
3478 lblOrMopExpr <- case res of
3480 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3482 (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
3483 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3484 if reduce then panic "genCCall(sparc): can not reduce" else return lblOrMopExpr
3488 argcode = concatOL argcodes
3489 (move_sp_down, move_sp_up)
3490 = let diff = length vregs - n_argRegs
3491 nn = if odd diff then diff + 1 else diff -- keep 8-byte alignment
3494 else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
3496 = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
3497 return (argcode `appOL`
3498 move_sp_down `appOL`
3499 transfer_code `appOL`
3504 -- move args from the integer vregs into which they have been
3505 -- marshalled, into %o0 .. %o5, and the rest onto the stack.
3506 move_final :: [Reg] -> [Reg] -> Int -> [Instr]
3508 move_final [] _ offset -- all args done
3511 move_final (v:vs) [] offset -- out of aregs; move to stack
3512 = ST I32 v (spRel offset)
3513 : move_final vs [] (offset+1)
3515 move_final (v:vs) (a:az) offset -- move into an arg (%o[0..5]) reg
3516 = OR False g0 (RIReg v) a
3517 : move_final vs az offset
3519 -- generate code to calculate an argument, and move it into one
3520 -- or two integer vregs.
3521 arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
3522 arg_to_int_vregs arg
3523 | (cmmExprRep arg) == I64
3525 (ChildCode64 code r_lo) <- iselExpr64 arg
3527 r_hi = getHiVRegFromLo r_lo
3528 return (code, [r_hi, r_lo])
3531 (src, code) <- getSomeReg arg
3532 tmp <- getNewRegNat (cmmExprRep arg)
3537 v1 <- getNewRegNat I32
3538 v2 <- getNewRegNat I32
3541 FMOV F64 src f0 `snocOL`
3542 ST F32 f0 (spRel 16) `snocOL`
3543 LD I32 (spRel 16) v1 `snocOL`
3544 ST F32 (fPair f0) (spRel 16) `snocOL`
3545 LD I32 (spRel 16) v2
3550 v1 <- getNewRegNat I32
3553 ST F32 src (spRel 16) `snocOL`
3554 LD I32 (spRel 16) v1
3559 v1 <- getNewRegNat I32
3561 code `snocOL` OR False g0 (RIReg src) v1
3565 outOfLineFloatOp mop =
3567 dflags <- getDynFlagsNat
3568 mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference $
3569 mkForeignLabel functionName Nothing True
3570 let mopLabelOrExpr = case mopExpr of
3571 CmmLit (CmmLabel lbl) -> Left lbl
3573 return (mopLabelOrExpr, reduce)
3575 (reduce, functionName) = case mop of
3576 MO_F32_Exp -> (True, FSLIT("exp"))
3577 MO_F32_Log -> (True, FSLIT("log"))
3578 MO_F32_Sqrt -> (True, FSLIT("sqrt"))
3580 MO_F32_Sin -> (True, FSLIT("sin"))
3581 MO_F32_Cos -> (True, FSLIT("cos"))
3582 MO_F32_Tan -> (True, FSLIT("tan"))
3584 MO_F32_Asin -> (True, FSLIT("asin"))
3585 MO_F32_Acos -> (True, FSLIT("acos"))
3586 MO_F32_Atan -> (True, FSLIT("atan"))
3588 MO_F32_Sinh -> (True, FSLIT("sinh"))
3589 MO_F32_Cosh -> (True, FSLIT("cosh"))
3590 MO_F32_Tanh -> (True, FSLIT("tanh"))
3592 MO_F64_Exp -> (False, FSLIT("exp"))
3593 MO_F64_Log -> (False, FSLIT("log"))
3594 MO_F64_Sqrt -> (False, FSLIT("sqrt"))
3596 MO_F64_Sin -> (False, FSLIT("sin"))
3597 MO_F64_Cos -> (False, FSLIT("cos"))
3598 MO_F64_Tan -> (False, FSLIT("tan"))
3600 MO_F64_Asin -> (False, FSLIT("asin"))
3601 MO_F64_Acos -> (False, FSLIT("acos"))
3602 MO_F64_Atan -> (False, FSLIT("atan"))
3604 MO_F64_Sinh -> (False, FSLIT("sinh"))
3605 MO_F64_Cosh -> (False, FSLIT("cosh"))
3606 MO_F64_Tanh -> (False, FSLIT("tanh"))
3608 other -> pprPanic "outOfLineFloatOp(sparc) "
3609 (pprCallishMachOp mop)
3611 #endif /* sparc_TARGET_ARCH */
3613 #if powerpc_TARGET_ARCH
3615 #if darwin_TARGET_OS || linux_TARGET_OS
3617 The PowerPC calling convention for Darwin/Mac OS X
3618 is described in Apple's document
3619 "Inside Mac OS X - Mach-O Runtime Architecture".
3621 PowerPC Linux uses the System V Release 4 Calling Convention
3622 for PowerPC. It is described in the
3623 "System V Application Binary Interface PowerPC Processor Supplement".
3625 Both conventions are similar:
3626 Parameters may be passed in general-purpose registers starting at r3, in
3627 floating point registers starting at f1, or on the stack.
3629 But there are substantial differences:
3630 * The number of registers used for parameter passing and the exact set of
3631 nonvolatile registers differs (see MachRegs.lhs).
3632 * On Darwin, stack space is always reserved for parameters, even if they are
3633 passed in registers. The called routine may choose to save parameters from
3634 registers to the corresponding space on the stack.
3635 * On Darwin, a corresponding amount of GPRs is skipped when a floating point
3636 parameter is passed in an FPR.
3637 * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
3638 starting with an odd-numbered GPR. It may skip a GPR to achieve this.
3639 Darwin just treats an I64 like two separate I32s (high word first).
3640 * I64 and F64 arguments are 8-byte aligned on the stack for SysV, but only
3641 4-byte aligned like everything else on Darwin.
3642 * The SysV spec claims that F32 is represented as F64 on the stack. GCC on
3643 PowerPC Linux does not agree, so neither do we.
3645 According to both conventions, The parameter area should be part of the
3646 caller's stack frame, allocated in the caller's prologue code (large enough
3647 to hold the parameter lists for all called routines). The NCG already
3648 uses the stack for register spilling, leaving 64 bytes free at the top.
3649 If we need a larger parameter area than that, we just allocate a new stack
3650 frame just before ccalling.
3654 genCCall (CmmPrim MO_WriteBarrier) _ _
3655 = return $ unitOL LWSYNC
3657 genCCall target dest_regs argsAndHints
3658 = ASSERT (not $ any (`elem` [I8,I16]) argReps)
3659 -- we rely on argument promotion in the codeGen
3661 (finalStack,passArgumentsCode,usedRegs) <- passArguments
3663 allArgRegs allFPArgRegs
3667 (labelOrExpr, reduceToF32) <- case target of
3668 CmmCallee (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
3669 CmmCallee expr conv -> return (Right expr, False)
3670 CmmPrim mop -> outOfLineFloatOp mop
3672 let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
3673 codeAfter = move_sp_up finalStack `appOL` moveResult reduceToF32
3678 `snocOL` BL lbl usedRegs
3681 (dynReg, dynCode) <- getSomeReg dyn
3683 `snocOL` MTCTR dynReg
3685 `snocOL` BCTRL usedRegs
3688 #if darwin_TARGET_OS
3689 initialStackOffset = 24
3690 -- size of linkage area + size of arguments, in bytes
3691 stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
3692 map machRepByteWidth argReps
3693 #elif linux_TARGET_OS
3694 initialStackOffset = 8
3695 stackDelta finalStack = roundTo 16 finalStack
3697 args = map fst argsAndHints
3698 argReps = map cmmExprRep args
3700 roundTo a x | x `mod` a == 0 = x
3701 | otherwise = x + a - (x `mod` a)
3703 move_sp_down finalStack
3705 toOL [STU I32 sp (AddrRegImm sp (ImmInt (-delta))),
3708 where delta = stackDelta finalStack
3709 move_sp_up finalStack
3711 toOL [ADD sp sp (RIImm (ImmInt delta)),
3714 where delta = stackDelta finalStack
3717 passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
3718 passArguments ((arg,I64):args) gprs fprs stackOffset
3719 accumCode accumUsed =
3721 ChildCode64 code vr_lo <- iselExpr64 arg
3722 let vr_hi = getHiVRegFromLo vr_lo
3724 #if darwin_TARGET_OS
3729 (accumCode `appOL` code
3730 `snocOL` storeWord vr_hi gprs stackOffset
3731 `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
3732 ((take 2 gprs) ++ accumUsed)
3734 storeWord vr (gpr:_) offset = MR gpr vr
3735 storeWord vr [] offset = ST I32 vr (AddrRegImm sp (ImmInt offset))
3737 #elif linux_TARGET_OS
3738 let stackOffset' = roundTo 8 stackOffset
3739 stackCode = accumCode `appOL` code
3740 `snocOL` ST I32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
3741 `snocOL` ST I32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
3742 regCode hireg loreg =
3743 accumCode `appOL` code
3744 `snocOL` MR hireg vr_hi
3745 `snocOL` MR loreg vr_lo
3748 hireg : loreg : regs | even (length gprs) ->
3749 passArguments args regs fprs stackOffset
3750 (regCode hireg loreg) (hireg : loreg : accumUsed)
3751 _skipped : hireg : loreg : regs ->
3752 passArguments args regs fprs stackOffset
3753 (regCode hireg loreg) (hireg : loreg : accumUsed)
3754 _ -> -- only one or no regs left
3755 passArguments args [] fprs (stackOffset'+8)
3759 passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
3760 | reg : _ <- regs = do
3761 register <- getRegister arg
3762 let code = case register of
3763 Fixed _ freg fcode -> fcode `snocOL` MR reg freg
3764 Any _ acode -> acode reg
3768 #if darwin_TARGET_OS
3769 -- The Darwin ABI requires that we reserve stack slots for register parameters
3770 (stackOffset + stackBytes)
3771 #elif linux_TARGET_OS
3772 -- ... the SysV ABI doesn't.
3775 (accumCode `appOL` code)
3778 (vr, code) <- getSomeReg arg
3782 (stackOffset' + stackBytes)
3783 (accumCode `appOL` code `snocOL` ST rep vr stackSlot)
3786 #if darwin_TARGET_OS
3787 -- stackOffset is at least 4-byte aligned
3788 -- The Darwin ABI is happy with that.
3789 stackOffset' = stackOffset
3791 -- ... the SysV ABI requires 8-byte alignment for doubles.
3792 stackOffset' | rep == F64 = roundTo 8 stackOffset
3793 | otherwise = stackOffset
3795 stackSlot = AddrRegImm sp (ImmInt stackOffset')
3796 (nGprs, nFprs, stackBytes, regs) = case rep of
3797 I32 -> (1, 0, 4, gprs)
3798 #if darwin_TARGET_OS
3799 -- The Darwin ABI requires that we skip a corresponding number of GPRs when
3801 F32 -> (1, 1, 4, fprs)
3802 F64 -> (2, 1, 8, fprs)
3803 #elif linux_TARGET_OS
3804 -- ... the SysV ABI doesn't.
3805 F32 -> (0, 1, 4, fprs)
3806 F64 -> (0, 1, 8, fprs)
3809 moveResult reduceToF32 =
3813 | reduceToF32 && rep == F32 -> unitOL (FRSP r_dest f1)
3814 | rep == F32 || rep == F64 -> unitOL (MR r_dest f1)
3815 | rep == I64 -> toOL [MR (getHiVRegFromLo r_dest) r3,
3817 | otherwise -> unitOL (MR r_dest r3)
3818 where rep = cmmRegRep (CmmLocal dest)
3819 r_dest = getRegisterReg (CmmLocal dest)
3821 outOfLineFloatOp mop =
3823 dflags <- getDynFlagsNat
3824 mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference $
3825 mkForeignLabel functionName Nothing True
3826 let mopLabelOrExpr = case mopExpr of
3827 CmmLit (CmmLabel lbl) -> Left lbl
3829 return (mopLabelOrExpr, reduce)
3831 (functionName, reduce) = case mop of
3832 MO_F32_Exp -> (FSLIT("exp"), True)
3833 MO_F32_Log -> (FSLIT("log"), True)
3834 MO_F32_Sqrt -> (FSLIT("sqrt"), True)
3836 MO_F32_Sin -> (FSLIT("sin"), True)
3837 MO_F32_Cos -> (FSLIT("cos"), True)
3838 MO_F32_Tan -> (FSLIT("tan"), True)
3840 MO_F32_Asin -> (FSLIT("asin"), True)
3841 MO_F32_Acos -> (FSLIT("acos"), True)
3842 MO_F32_Atan -> (FSLIT("atan"), True)
3844 MO_F32_Sinh -> (FSLIT("sinh"), True)
3845 MO_F32_Cosh -> (FSLIT("cosh"), True)
3846 MO_F32_Tanh -> (FSLIT("tanh"), True)
3847 MO_F32_Pwr -> (FSLIT("pow"), True)
3849 MO_F64_Exp -> (FSLIT("exp"), False)
3850 MO_F64_Log -> (FSLIT("log"), False)
3851 MO_F64_Sqrt -> (FSLIT("sqrt"), False)
3853 MO_F64_Sin -> (FSLIT("sin"), False)
3854 MO_F64_Cos -> (FSLIT("cos"), False)
3855 MO_F64_Tan -> (FSLIT("tan"), False)
3857 MO_F64_Asin -> (FSLIT("asin"), False)
3858 MO_F64_Acos -> (FSLIT("acos"), False)
3859 MO_F64_Atan -> (FSLIT("atan"), False)
3861 MO_F64_Sinh -> (FSLIT("sinh"), False)
3862 MO_F64_Cosh -> (FSLIT("cosh"), False)
3863 MO_F64_Tanh -> (FSLIT("tanh"), False)
3864 MO_F64_Pwr -> (FSLIT("pow"), False)
3865 other -> pprPanic "genCCall(ppc): unknown callish op"
3866 (pprCallishMachOp other)
3868 #endif /* darwin_TARGET_OS || linux_TARGET_OS */
3870 #endif /* powerpc_TARGET_ARCH */
3873 -- -----------------------------------------------------------------------------
3874 -- Generating a table-branch
3876 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
3878 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3882 (reg,e_code) <- getSomeReg expr
3883 lbl <- getNewLabelNat
3884 dflags <- getDynFlagsNat
3885 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
3886 (tableReg,t_code) <- getSomeReg $ dynRef
3888 jumpTable = map jumpTableEntryRel ids
3890 jumpTableEntryRel Nothing
3891 = CmmStaticLit (CmmInt 0 wordRep)
3892 jumpTableEntryRel (Just (BlockId id))
3893 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3894 where blockLabel = mkAsmTempLabel id
3896 op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
3897 (EAIndex reg wORD_SIZE) (ImmInt 0))
3899 #if x86_64_TARGET_ARCH
3900 #if darwin_TARGET_OS
3901 -- on Mac OS X/x86_64, put the jump table in the text section
3902 -- to work around a limitation of the linker.
3903 -- ld64 is unable to handle the relocations for
3905 -- if L0 is not preceded by a non-anonymous label in its section.
3907 code = e_code `appOL` t_code `appOL` toOL [
3908 ADD wordRep op (OpReg tableReg),
3909 JMP_TBL (OpReg tableReg) [ id | Just id <- ids ],
3910 LDATA Text (CmmDataLabel lbl : jumpTable)
3913 -- HACK: On x86_64 binutils<2.17 is only able to generate PC32
3914 -- relocations, hence we only get 32-bit offsets in the jump
3915 -- table. As these offsets are always negative we need to properly
3916 -- sign extend them to 64-bit. This hack should be removed in
3917 -- conjunction with the hack in PprMach.hs/pprDataItem once
3918 -- binutils 2.17 is standard.
3919 code = e_code `appOL` t_code `appOL` toOL [
3920 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3922 (OpAddr (AddrBaseIndex (EABaseReg tableReg)
3923 (EAIndex reg wORD_SIZE) (ImmInt 0)))
3925 ADD wordRep (OpReg reg) (OpReg tableReg),
3926 JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
3930 code = e_code `appOL` t_code `appOL` toOL [
3931 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3932 ADD wordRep op (OpReg tableReg),
3933 JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
3939 (reg,e_code) <- getSomeReg expr
3940 lbl <- getNewLabelNat
3942 jumpTable = map jumpTableEntry ids
3943 op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
3944 code = e_code `appOL` toOL [
3945 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3946 JMP_TBL op [ id | Just id <- ids ]
3950 #elif powerpc_TARGET_ARCH
3954 (reg,e_code) <- getSomeReg expr
3955 tmp <- getNewRegNat I32
3956 lbl <- getNewLabelNat
3957 dflags <- getDynFlagsNat
3958 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
3959 (tableReg,t_code) <- getSomeReg $ dynRef
3961 jumpTable = map jumpTableEntryRel ids
3963 jumpTableEntryRel Nothing
3964 = CmmStaticLit (CmmInt 0 wordRep)
3965 jumpTableEntryRel (Just (BlockId id))
3966 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3967 where blockLabel = mkAsmTempLabel id
3969 code = e_code `appOL` t_code `appOL` toOL [
3970 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3971 SLW tmp reg (RIImm (ImmInt 2)),
3972 LD I32 tmp (AddrRegReg tableReg tmp),
3973 ADD tmp tmp (RIReg tableReg),
3975 BCTR [ id | Just id <- ids ]
3980 (reg,e_code) <- getSomeReg expr
3981 tmp <- getNewRegNat I32
3982 lbl <- getNewLabelNat
3984 jumpTable = map jumpTableEntry ids
3986 code = e_code `appOL` toOL [
3987 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3988 SLW tmp reg (RIImm (ImmInt 2)),
3989 ADDIS tmp tmp (HA (ImmCLbl lbl)),
3990 LD I32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
3992 BCTR [ id | Just id <- ids ]
3996 genSwitch expr ids = panic "ToDo: genSwitch"
3999 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordRep)
4000 jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
4001 where blockLabel = mkAsmTempLabel id
4003 -- -----------------------------------------------------------------------------
4005 -- -----------------------------------------------------------------------------
4008 -- -----------------------------------------------------------------------------
4009 -- 'condIntReg' and 'condFltReg': condition codes into registers
4011 -- Turn those condition codes into integers now (when they appear on
4012 -- the right hand side of an assignment).
4014 -- (If applicable) Do not fill the delay slots here; you will confuse the
4015 -- register allocator.
4017 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
4019 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4021 #if alpha_TARGET_ARCH
4022 condIntReg = panic "MachCode.condIntReg (not on Alpha)"
4023 condFltReg = panic "MachCode.condFltReg (not on Alpha)"
4024 #endif /* alpha_TARGET_ARCH */
4026 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4028 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4030 condIntReg cond x y = do
4031 CondCode _ cond cond_code <- condIntCode cond x y
4032 tmp <- getNewRegNat I8
4034 code dst = cond_code `appOL` toOL [
4035 SETCC cond (OpReg tmp),
4036 MOVZxL I8 (OpReg tmp) (OpReg dst)
4039 return (Any I32 code)
4043 #if i386_TARGET_ARCH
4045 condFltReg cond x y = do
4046 CondCode _ cond cond_code <- condFltCode cond x y
4047 tmp <- getNewRegNat I8
4049 code dst = cond_code `appOL` toOL [
4050 SETCC cond (OpReg tmp),
4051 MOVZxL I8 (OpReg tmp) (OpReg dst)
4054 return (Any I32 code)
4058 #if x86_64_TARGET_ARCH
4060 condFltReg cond x y = do
4061 CondCode _ cond cond_code <- condFltCode cond x y
4062 tmp1 <- getNewRegNat wordRep
4063 tmp2 <- getNewRegNat wordRep
4065 -- We have to worry about unordered operands (eg. comparisons
4066 -- against NaN). If the operands are unordered, the comparison
4067 -- sets the parity flag, carry flag and zero flag.
4068 -- All comparisons are supposed to return false for unordered
4069 -- operands except for !=, which returns true.
4071 -- Optimisation: we don't have to test the parity flag if we
4072 -- know the test has already excluded the unordered case: eg >
4073 -- and >= test for a zero carry flag, which can only occur for
4074 -- ordered operands.
4076 -- ToDo: by reversing comparisons we could avoid testing the
4077 -- parity flag in more cases.
4082 NE -> or_unordered dst
4083 GU -> plain_test dst
4084 GEU -> plain_test dst
4085 _ -> and_ordered dst)
4087 plain_test dst = toOL [
4088 SETCC cond (OpReg tmp1),
4089 MOVZxL I8 (OpReg tmp1) (OpReg dst)
4091 or_unordered dst = toOL [
4092 SETCC cond (OpReg tmp1),
4093 SETCC PARITY (OpReg tmp2),
4094 OR I8 (OpReg tmp1) (OpReg tmp2),
4095 MOVZxL I8 (OpReg tmp2) (OpReg dst)
4097 and_ordered dst = toOL [
4098 SETCC cond (OpReg tmp1),
4099 SETCC NOTPARITY (OpReg tmp2),
4100 AND I8 (OpReg tmp1) (OpReg tmp2),
4101 MOVZxL I8 (OpReg tmp2) (OpReg dst)
4104 return (Any I32 code)
4108 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4110 #if sparc_TARGET_ARCH
4112 condIntReg EQQ x (CmmLit (CmmInt 0 d)) = do
4113 (src, code) <- getSomeReg x
4114 tmp <- getNewRegNat I32
4116 code__2 dst = code `appOL` toOL [
4117 SUB False True g0 (RIReg src) g0,
4118 SUB True False g0 (RIImm (ImmInt (-1))) dst]
4119 return (Any I32 code__2)
4121 condIntReg EQQ x y = do
4122 (src1, code1) <- getSomeReg x
4123 (src2, code2) <- getSomeReg y
4124 tmp1 <- getNewRegNat I32
4125 tmp2 <- getNewRegNat I32
4127 code__2 dst = code1 `appOL` code2 `appOL` toOL [
4128 XOR False src1 (RIReg src2) dst,
4129 SUB False True g0 (RIReg dst) g0,
4130 SUB True False g0 (RIImm (ImmInt (-1))) dst]
4131 return (Any I32 code__2)
4133 condIntReg NE x (CmmLit (CmmInt 0 d)) = do
4134 (src, code) <- getSomeReg x
4135 tmp <- getNewRegNat I32
4137 code__2 dst = code `appOL` toOL [
4138 SUB False True g0 (RIReg src) g0,
4139 ADD True False g0 (RIImm (ImmInt 0)) dst]
4140 return (Any I32 code__2)
4142 condIntReg NE x y = do
4143 (src1, code1) <- getSomeReg x
4144 (src2, code2) <- getSomeReg y
4145 tmp1 <- getNewRegNat I32
4146 tmp2 <- getNewRegNat I32
4148 code__2 dst = code1 `appOL` code2 `appOL` toOL [
4149 XOR False src1 (RIReg src2) dst,
4150 SUB False True g0 (RIReg dst) g0,
4151 ADD True False g0 (RIImm (ImmInt 0)) dst]
4152 return (Any I32 code__2)
4154 condIntReg cond x y = do
4155 BlockId lbl1 <- getBlockIdNat
4156 BlockId lbl2 <- getBlockIdNat
4157 CondCode _ cond cond_code <- condIntCode cond x y
4159 code__2 dst = cond_code `appOL` toOL [
4160 BI cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
4161 OR False g0 (RIImm (ImmInt 0)) dst,
4162 BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
4163 NEWBLOCK (BlockId lbl1),
4164 OR False g0 (RIImm (ImmInt 1)) dst,
4165 NEWBLOCK (BlockId lbl2)]
4166 return (Any I32 code__2)
4168 condFltReg cond x y = do
4169 BlockId lbl1 <- getBlockIdNat
4170 BlockId lbl2 <- getBlockIdNat
4171 CondCode _ cond cond_code <- condFltCode cond x y
4173 code__2 dst = cond_code `appOL` toOL [
4175 BF cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
4176 OR False g0 (RIImm (ImmInt 0)) dst,
4177 BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
4178 NEWBLOCK (BlockId lbl1),
4179 OR False g0 (RIImm (ImmInt 1)) dst,
4180 NEWBLOCK (BlockId lbl2)]
4181 return (Any I32 code__2)
4183 #endif /* sparc_TARGET_ARCH */
4185 #if powerpc_TARGET_ARCH
4186 condReg getCond = do
4187 lbl1 <- getBlockIdNat
4188 lbl2 <- getBlockIdNat
4189 CondCode _ cond cond_code <- getCond
4191 {- code dst = cond_code `appOL` toOL [
4200 code dst = cond_code
4204 RLWINM dst dst (bit + 1) 31 31
4207 negate_code | do_negate = unitOL (CRNOR bit bit bit)
4210 (bit, do_negate) = case cond of
4224 return (Any I32 code)
4226 condIntReg cond x y = condReg (condIntCode cond x y)
4227 condFltReg cond x y = condReg (condFltCode cond x y)
4228 #endif /* powerpc_TARGET_ARCH */
4231 -- -----------------------------------------------------------------------------
4232 -- 'trivial*Code': deal with trivial instructions
4234 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
4235 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
4236 -- Only look for constants on the right hand side, because that's
4237 -- where the generic optimizer will have put them.
4239 -- Similarly, for unary instructions, we don't have to worry about
4240 -- matching an StInt as the argument, because genericOpt will already
4241 -- have handled the constant-folding.
4245 -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
4246 ,IF_ARCH_i386 ((Operand -> Operand -> Instr)
4247 -> Maybe (Operand -> Operand -> Instr)
4248 ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr)
4249 -> Maybe (Operand -> Operand -> Instr)
4250 ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
4251 ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
4253 -> CmmExpr -> CmmExpr -- the two arguments
4256 #ifndef powerpc_TARGET_ARCH
4259 -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
4260 ,IF_ARCH_sparc((MachRep -> Reg -> Reg -> Reg -> Instr)
4261 ,IF_ARCH_i386 ((MachRep -> Reg -> Reg -> Reg -> Instr)
4262 ,IF_ARCH_x86_64 ((MachRep -> Operand -> Operand -> Instr)
4264 -> CmmExpr -> CmmExpr -- the two arguments
4270 -> IF_ARCH_alpha((RI -> Reg -> Instr)
4271 ,IF_ARCH_i386 ((Operand -> Instr)
4272 ,IF_ARCH_x86_64 ((Operand -> Instr)
4273 ,IF_ARCH_sparc((RI -> Reg -> Instr)
4274 ,IF_ARCH_powerpc((Reg -> Reg -> Instr)
4276 -> CmmExpr -- the one argument
4279 #ifndef powerpc_TARGET_ARCH
4282 -> IF_ARCH_alpha((Reg -> Reg -> Instr)
4283 ,IF_ARCH_i386 ((Reg -> Reg -> Instr)
4284 ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr)
4285 ,IF_ARCH_sparc((Reg -> Reg -> Instr)
4287 -> CmmExpr -- the one argument
4291 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4293 #if alpha_TARGET_ARCH
4295 trivialCode instr x (StInt y)
4297 = getRegister x `thenNat` \ register ->
4298 getNewRegNat IntRep `thenNat` \ tmp ->
4300 code = registerCode register tmp
4301 src1 = registerName register tmp
4302 src2 = ImmInt (fromInteger y)
4303 code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst)
4305 return (Any IntRep code__2)
4307 trivialCode instr x y
4308 = getRegister x `thenNat` \ register1 ->
4309 getRegister y `thenNat` \ register2 ->
4310 getNewRegNat IntRep `thenNat` \ tmp1 ->
4311 getNewRegNat IntRep `thenNat` \ tmp2 ->
4313 code1 = registerCode register1 tmp1 []
4314 src1 = registerName register1 tmp1
4315 code2 = registerCode register2 tmp2 []
4316 src2 = registerName register2 tmp2
4317 code__2 dst = asmSeqThen [code1, code2] .
4318 mkSeqInstr (instr src1 (RIReg src2) dst)
4320 return (Any IntRep code__2)
4323 trivialUCode instr x
4324 = getRegister x `thenNat` \ register ->
4325 getNewRegNat IntRep `thenNat` \ tmp ->
4327 code = registerCode register tmp
4328 src = registerName register tmp
4329 code__2 dst = code . mkSeqInstr (instr (RIReg src) dst)
4331 return (Any IntRep code__2)
4334 trivialFCode _ instr x y
4335 = getRegister x `thenNat` \ register1 ->
4336 getRegister y `thenNat` \ register2 ->
4337 getNewRegNat F64 `thenNat` \ tmp1 ->
4338 getNewRegNat F64 `thenNat` \ tmp2 ->
4340 code1 = registerCode register1 tmp1
4341 src1 = registerName register1 tmp1
4343 code2 = registerCode register2 tmp2
4344 src2 = registerName register2 tmp2
4346 code__2 dst = asmSeqThen [code1 [], code2 []] .
4347 mkSeqInstr (instr src1 src2 dst)
4349 return (Any F64 code__2)
4351 trivialUFCode _ instr x
4352 = getRegister x `thenNat` \ register ->
4353 getNewRegNat F64 `thenNat` \ tmp ->
4355 code = registerCode register tmp
4356 src = registerName register tmp
4357 code__2 dst = code . mkSeqInstr (instr src dst)
4359 return (Any F64 code__2)
4361 #endif /* alpha_TARGET_ARCH */
4363 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4365 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4368 The Rules of the Game are:
4370 * You cannot assume anything about the destination register dst;
4371 it may be anything, including a fixed reg.
4373 * You may compute an operand into a fixed reg, but you may not
4374 subsequently change the contents of that fixed reg. If you
4375 want to do so, first copy the value either to a temporary
4376 or into dst. You are free to modify dst even if it happens
4377 to be a fixed reg -- that's not your problem.
4379 * You cannot assume that a fixed reg will stay live over an
4380 arbitrary computation. The same applies to the dst reg.
4382 * Temporary regs obtained from getNewRegNat are distinct from
4383 each other and from all other regs, and stay live over
4384 arbitrary computations.
4386 --------------------
4388 SDM's version of The Rules:
4390 * If getRegister returns Any, that means it can generate correct
4391 code which places the result in any register, period. Even if that
4392 register happens to be read during the computation.
4394 Corollary #1: this means that if you are generating code for an
4395 operation with two arbitrary operands, you cannot assign the result
4396 of the first operand into the destination register before computing
4397 the second operand. The second operand might require the old value
4398 of the destination register.
4400 Corollary #2: A function might be able to generate more efficient
4401 code if it knows the destination register is a new temporary (and
4402 therefore not read by any of the sub-computations).
4404 * If getRegister returns Any, then the code it generates may modify only:
4405 (a) fresh temporaries
4406 (b) the destination register
4407 (c) known registers (eg. %ecx is used by shifts)
4408 In particular, it may *not* modify global registers, unless the global
4409 register happens to be the destination register.
4412 trivialCode rep instr (Just revinstr) (CmmLit lit_a) b
4413 | not (is64BitLit lit_a) = do
4414 b_code <- getAnyReg b
4417 = b_code dst `snocOL`
4418 revinstr (OpImm (litToImm lit_a)) (OpReg dst)
4420 return (Any rep code)
4422 trivialCode rep instr maybe_revinstr a b = genTrivialCode rep instr a b
4424 -- This is re-used for floating pt instructions too.
4425 genTrivialCode rep instr a b = do
4426 (b_op, b_code) <- getNonClobberedOperand b
4427 a_code <- getAnyReg a
4428 tmp <- getNewRegNat rep
4430 -- We want the value of b to stay alive across the computation of a.
4431 -- But, we want to calculate a straight into the destination register,
4432 -- because the instruction only has two operands (dst := dst `op` src).
4433 -- The troublesome case is when the result of b is in the same register
4434 -- as the destination reg. In this case, we have to save b in a
4435 -- new temporary across the computation of a.
4437 | dst `regClashesWithOp` b_op =
4439 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
4441 instr (OpReg tmp) (OpReg dst)
4445 instr b_op (OpReg dst)
4447 return (Any rep code)
4449 reg `regClashesWithOp` OpReg reg2 = reg == reg2
4450 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
4451 reg `regClashesWithOp` _ = False
4455 trivialUCode rep instr x = do
4456 x_code <- getAnyReg x
4462 return (Any rep code)
4466 #if i386_TARGET_ARCH
4468 trivialFCode pk instr x y = do
4469 (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
4470 (y_reg, y_code) <- getSomeReg y
4475 instr pk x_reg y_reg dst
4477 return (Any pk code)
4481 #if x86_64_TARGET_ARCH
4483 trivialFCode pk instr x y = genTrivialCode pk (instr pk) x y
4489 trivialUFCode rep instr x = do
4490 (x_reg, x_code) <- getSomeReg x
4496 return (Any rep code)
4498 #endif /* i386_TARGET_ARCH */
4500 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4502 #if sparc_TARGET_ARCH
4504 trivialCode pk instr x (CmmLit (CmmInt y d))
4507 (src1, code) <- getSomeReg x
4508 tmp <- getNewRegNat I32
4510 src2 = ImmInt (fromInteger y)
4511 code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
4512 return (Any I32 code__2)
4514 trivialCode pk instr x y = do
4515 (src1, code1) <- getSomeReg x
4516 (src2, code2) <- getSomeReg y
4517 tmp1 <- getNewRegNat I32
4518 tmp2 <- getNewRegNat I32
4520 code__2 dst = code1 `appOL` code2 `snocOL`
4521 instr src1 (RIReg src2) dst
4522 return (Any I32 code__2)
4525 trivialFCode pk instr x y = do
4526 (src1, code1) <- getSomeReg x
4527 (src2, code2) <- getSomeReg y
4528 tmp1 <- getNewRegNat (cmmExprRep x)
4529 tmp2 <- getNewRegNat (cmmExprRep y)
4530 tmp <- getNewRegNat F64
4532 promote x = FxTOy F32 F64 x tmp
4539 code1 `appOL` code2 `snocOL`
4540 instr pk src1 src2 dst
4541 else if pk1 == F32 then
4542 code1 `snocOL` promote src1 `appOL` code2 `snocOL`
4543 instr F64 tmp src2 dst
4545 code1 `appOL` code2 `snocOL` promote src2 `snocOL`
4546 instr F64 src1 tmp dst
4547 return (Any (if pk1 == pk2 then pk1 else F64) code__2)
4550 trivialUCode pk instr x = do
4551 (src, code) <- getSomeReg x
4552 tmp <- getNewRegNat pk
4554 code__2 dst = code `snocOL` instr (RIReg src) dst
4555 return (Any pk code__2)
4558 trivialUFCode pk instr x = do
4559 (src, code) <- getSomeReg x
4560 tmp <- getNewRegNat pk
4562 code__2 dst = code `snocOL` instr src dst
4563 return (Any pk code__2)
4565 #endif /* sparc_TARGET_ARCH */
4567 #if powerpc_TARGET_ARCH
4570 Wolfgang's PowerPC version of The Rules:
4572 A slightly modified version of The Rules to take advantage of the fact
4573 that PowerPC instructions work on all registers and don't implicitly
4574 clobber any fixed registers.
4576 * The only expression for which getRegister returns Fixed is (CmmReg reg).
4578 * If getRegister returns Any, then the code it generates may modify only:
4579 (a) fresh temporaries
4580 (b) the destination register
4581 It may *not* modify global registers, unless the global
4582 register happens to be the destination register.
4583 It may not clobber any other registers. In fact, only ccalls clobber any
4585 Also, it may not modify the counter register (used by genCCall).
4587 Corollary: If a getRegister for a subexpression returns Fixed, you need
4588 not move it to a fresh temporary before evaluating the next subexpression.
4589 The Fixed register won't be modified.
4590 Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
4592 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
4593 the value of the destination register.
4596 trivialCode rep signed instr x (CmmLit (CmmInt y _))
4597 | Just imm <- makeImmediate rep signed y
4599 (src1, code1) <- getSomeReg x
4600 let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
4601 return (Any rep code)
4603 trivialCode rep signed instr x y = do
4604 (src1, code1) <- getSomeReg x
4605 (src2, code2) <- getSomeReg y
4606 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
4607 return (Any rep code)
4609 trivialCodeNoImm :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4610 -> CmmExpr -> CmmExpr -> NatM Register
4611 trivialCodeNoImm rep instr x y = do
4612 (src1, code1) <- getSomeReg x
4613 (src2, code2) <- getSomeReg y
4614 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
4615 return (Any rep code)
4617 trivialUCode rep instr x = do
4618 (src, code) <- getSomeReg x
4619 let code' dst = code `snocOL` instr dst src
4620 return (Any rep code')
4622 -- There is no "remainder" instruction on the PPC, so we have to do
4624 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
4626 remainderCode :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4627 -> CmmExpr -> CmmExpr -> NatM Register
4628 remainderCode rep div x y = do
4629 (src1, code1) <- getSomeReg x
4630 (src2, code2) <- getSomeReg y
4631 let code dst = code1 `appOL` code2 `appOL` toOL [
4633 MULLW dst dst (RIReg src2),
4636 return (Any rep code)
4638 #endif /* powerpc_TARGET_ARCH */
4641 -- -----------------------------------------------------------------------------
4642 -- Coercing to/from integer/floating-point...
4644 -- When going to integer, we truncate (round towards 0).
4646 -- @coerce(Int2FP|FP2Int)@ are more complicated integer/float
4647 -- conversions. We have to store temporaries in memory to move
4648 -- between the integer and the floating point register sets.
4650 -- @coerceDbl2Flt@ and @coerceFlt2Dbl@ are done this way because we
4651 -- pretend, on sparc at least, that double and float regs are seperate
4652 -- kinds, so the value has to be computed into one kind before being
4653 -- explicitly "converted" to live in the other kind.
4655 coerceInt2FP :: MachRep -> MachRep -> CmmExpr -> NatM Register
4656 coerceFP2Int :: MachRep -> MachRep -> CmmExpr -> NatM Register
4658 #if sparc_TARGET_ARCH
4659 coerceDbl2Flt :: CmmExpr -> NatM Register
4660 coerceFlt2Dbl :: CmmExpr -> NatM Register
4663 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4665 #if alpha_TARGET_ARCH
4668 = getRegister x `thenNat` \ register ->
4669 getNewRegNat IntRep `thenNat` \ reg ->
4671 code = registerCode register reg
4672 src = registerName register reg
4674 code__2 dst = code . mkSeqInstrs [
4676 LD TF dst (spRel 0),
4679 return (Any F64 code__2)
4683 = getRegister x `thenNat` \ register ->
4684 getNewRegNat F64 `thenNat` \ tmp ->
4686 code = registerCode register tmp
4687 src = registerName register tmp
4689 code__2 dst = code . mkSeqInstrs [
4691 ST TF tmp (spRel 0),
4694 return (Any IntRep code__2)
4696 #endif /* alpha_TARGET_ARCH */
4698 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4700 #if i386_TARGET_ARCH
4702 coerceInt2FP from to x = do
4703 (x_reg, x_code) <- getSomeReg x
4705 opc = case to of F32 -> GITOF; F64 -> GITOD
4706 code dst = x_code `snocOL` opc x_reg dst
4707 -- ToDo: works for non-I32 reps?
4709 return (Any to code)
4713 coerceFP2Int from to x = do
4714 (x_reg, x_code) <- getSomeReg x
4716 opc = case from of F32 -> GFTOI; F64 -> GDTOI
4717 code dst = x_code `snocOL` opc x_reg dst
4718 -- ToDo: works for non-I32 reps?
4720 return (Any to code)
4722 #endif /* i386_TARGET_ARCH */
4724 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4726 #if x86_64_TARGET_ARCH
4728 coerceFP2Int from to x = do
4729 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
4731 opc = case from of F32 -> CVTTSS2SIQ; F64 -> CVTTSD2SIQ
4732 code dst = x_code `snocOL` opc x_op dst
4734 return (Any to code) -- works even if the destination rep is <I32
4736 coerceInt2FP from to x = do
4737 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
4739 opc = case to of F32 -> CVTSI2SS; F64 -> CVTSI2SD
4740 code dst = x_code `snocOL` opc x_op dst
4742 return (Any to code) -- works even if the destination rep is <I32
4744 coerceFP2FP :: MachRep -> CmmExpr -> NatM Register
4745 coerceFP2FP to x = do
4746 (x_reg, x_code) <- getSomeReg x
4748 opc = case to of F32 -> CVTSD2SS; F64 -> CVTSS2SD
4749 code dst = x_code `snocOL` opc x_reg dst
4751 return (Any to code)
4755 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4757 #if sparc_TARGET_ARCH
4759 coerceInt2FP pk1 pk2 x = do
4760 (src, code) <- getSomeReg x
4762 code__2 dst = code `appOL` toOL [
4763 ST pk1 src (spRel (-2)),
4764 LD pk1 (spRel (-2)) dst,
4765 FxTOy pk1 pk2 dst dst]
4766 return (Any pk2 code__2)
4769 coerceFP2Int pk fprep x = do
4770 (src, code) <- getSomeReg x
4771 reg <- getNewRegNat fprep
4772 tmp <- getNewRegNat pk
4774 code__2 dst = ASSERT(fprep == F64 || fprep == F32)
4776 FxTOy fprep pk src tmp,
4777 ST pk tmp (spRel (-2)),
4778 LD pk (spRel (-2)) dst]
4779 return (Any pk code__2)
4782 coerceDbl2Flt x = do
4783 (src, code) <- getSomeReg x
4784 return (Any F32 (\dst -> code `snocOL` FxTOy F64 F32 src dst))
4787 coerceFlt2Dbl x = do
4788 (src, code) <- getSomeReg x
4789 return (Any F64 (\dst -> code `snocOL` FxTOy F32 F64 src dst))
4791 #endif /* sparc_TARGET_ARCH */
4793 #if powerpc_TARGET_ARCH
4794 coerceInt2FP fromRep toRep x = do
4795 (src, code) <- getSomeReg x
4796 lbl <- getNewLabelNat
4797 itmp <- getNewRegNat I32
4798 ftmp <- getNewRegNat F64
4799 dflags <- getDynFlagsNat
4800 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
4801 Amode addr addr_code <- getAmode dynRef
4803 code' dst = code `appOL` maybe_exts `appOL` toOL [
4806 CmmStaticLit (CmmInt 0x43300000 I32),
4807 CmmStaticLit (CmmInt 0x80000000 I32)],
4808 XORIS itmp src (ImmInt 0x8000),
4809 ST I32 itmp (spRel 3),
4810 LIS itmp (ImmInt 0x4330),
4811 ST I32 itmp (spRel 2),
4812 LD F64 ftmp (spRel 2)
4813 ] `appOL` addr_code `appOL` toOL [
4815 FSUB F64 dst ftmp dst
4816 ] `appOL` maybe_frsp dst
4818 maybe_exts = case fromRep of
4819 I8 -> unitOL $ EXTS I8 src src
4820 I16 -> unitOL $ EXTS I16 src src
4822 maybe_frsp dst = case toRep of
4823 F32 -> unitOL $ FRSP dst dst
4825 return (Any toRep code')
4827 coerceFP2Int fromRep toRep x = do
4828 -- the reps don't really matter: F*->F64 and I32->I* are no-ops
4829 (src, code) <- getSomeReg x
4830 tmp <- getNewRegNat F64
4832 code' dst = code `appOL` toOL [
4833 -- convert to int in FP reg
4835 -- store value (64bit) from FP to stack
4836 ST F64 tmp (spRel 2),
4837 -- read low word of value (high word is undefined)
4838 LD I32 dst (spRel 3)]
4839 return (Any toRep code')
4840 #endif /* powerpc_TARGET_ARCH */
4843 -- -----------------------------------------------------------------------------
4844 -- eXTRA_STK_ARGS_HERE
4846 -- We (allegedly) put the first six C-call arguments in registers;
4847 -- where do we start putting the rest of them?
4849 -- Moved from MachInstrs (SDM):
4851 #if alpha_TARGET_ARCH || sparc_TARGET_ARCH
4852 eXTRA_STK_ARGS_HERE :: Int
4854 = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???))