1 -----------------------------------------------------------------------------
3 -- Generating machine code (instruction selection)
5 -- (c) The University of Glasgow 1996-2004
7 -----------------------------------------------------------------------------
9 -- This is a big module, but, if you pay attention to
10 -- (a) the sectioning, (b) the type signatures, and
11 -- (c) the #if blah_TARGET_ARCH} things, the
12 -- structure should not be too overwhelming.
14 module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where
16 #include "HsVersions.h"
17 #include "nativeGen/NCG.h"
24 import PositionIndependentCode
25 import RegAllocInfo ( mkBranchInstr )
27 -- Our intermediate code:
28 import PprCmm ( pprExpr )
34 import StaticFlags ( opt_PIC )
35 import ForeignCall ( CCallConv(..) )
40 import FastTypes ( isFastTrue )
41 import Constants ( wORD_SIZE )
44 import Outputable ( assertPanic )
45 import Debug.Trace ( trace )
48 import Control.Monad ( mapAndUnzipM )
49 import Data.Maybe ( fromJust )
53 -- -----------------------------------------------------------------------------
54 -- Top-level of the instruction selector
56 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
57 -- They are really trees of insns to facilitate fast appending, where a
58 -- left-to-right traversal (pre-order?) yields the insns in the correct
61 type InstrBlock = OrdList Instr
63 cmmTopCodeGen :: CmmTop -> NatM [NatCmmTop]
64 cmmTopCodeGen (CmmProc info lab params blocks) = do
65 (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
66 picBaseMb <- getPicBaseMaybeNat
67 let proc = CmmProc info lab params (concat nat_blocks)
68 tops = proc : concat statics
70 Just picBase -> initializePicBase picBase tops
71 Nothing -> return tops
73 cmmTopCodeGen (CmmData sec dat) = do
74 return [CmmData sec dat] -- no translation, we just use CmmStatic
76 basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop])
77 basicBlockCodeGen (BasicBlock id stmts) = do
78 instrs <- stmtsToInstrs stmts
79 -- code generation may introduce new basic block boundaries, which
80 -- are indicated by the NEWBLOCK instruction. We must split up the
81 -- instruction stream into basic blocks again. Also, we extract
84 (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
86 mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
87 = ([], BasicBlock id instrs : blocks, statics)
88 mkBlocks (LDATA sec dat) (instrs,blocks,statics)
89 = (instrs, blocks, CmmData sec dat:statics)
90 mkBlocks instr (instrs,blocks,statics)
91 = (instr:instrs, blocks, statics)
93 return (BasicBlock id top : other_blocks, statics)
95 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
97 = do instrss <- mapM stmtToInstrs stmts
98 return (concatOL instrss)
100 stmtToInstrs :: CmmStmt -> NatM InstrBlock
101 stmtToInstrs stmt = case stmt of
102 CmmNop -> return nilOL
103 CmmComment s -> return (unitOL (COMMENT s))
106 | isFloatingRep kind -> assignReg_FltCode kind reg src
107 #if WORD_SIZE_IN_BITS==32
108 | kind == I64 -> assignReg_I64Code reg src
110 | otherwise -> assignReg_IntCode kind reg src
111 where kind = cmmRegRep reg
114 | isFloatingRep kind -> assignMem_FltCode kind addr src
115 #if WORD_SIZE_IN_BITS==32
116 | kind == I64 -> assignMem_I64Code addr src
118 | otherwise -> assignMem_IntCode kind addr src
119 where kind = cmmExprRep src
121 CmmCall target result_regs args vols
122 -> genCCall target result_regs args vols
124 CmmBranch id -> genBranch id
125 CmmCondBranch arg id -> genCondJump id arg
126 CmmSwitch arg ids -> genSwitch arg ids
127 CmmJump arg params -> genJump arg
129 -- -----------------------------------------------------------------------------
130 -- General things for putting together code sequences
132 -- Expand CmmRegOff. ToDo: should we do it this way around, or convert
133 -- CmmExprs into CmmRegOff?
134 mangleIndexTree :: CmmExpr -> CmmExpr
135 mangleIndexTree (CmmRegOff reg off)
136 = CmmMachOp (MO_Add rep) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) rep)]
137 where rep = cmmRegRep reg
139 -- -----------------------------------------------------------------------------
140 -- Code gen for 64-bit arithmetic on 32-bit platforms
143 Simple support for generating 64-bit code (ie, 64 bit values and 64
144 bit assignments) on 32-bit platforms. Unlike the main code generator
145 we merely shoot for generating working code as simply as possible, and
146 pay little attention to code quality. Specifically, there is no
147 attempt to deal cleverly with the fixed-vs-floating register
148 distinction; all values are generated into (pairs of) floating
149 registers, even if this would mean some redundant reg-reg moves as a
150 result. Only one of the VRegUniques is returned, since it will be
151 of the VRegUniqueLo form, and the upper-half VReg can be determined
152 by applying getHiVRegFromLo to it.
155 data ChildCode64 -- a.k.a "Register64"
158 Reg -- the lower 32-bit temporary which contains the
159 -- result; use getHiVRegFromLo to find the other
160 -- VRegUnique. Rules of this simplified insn
161 -- selection game are therefore that the returned
162 -- Reg may be modified
164 #if WORD_SIZE_IN_BITS==32
165 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
166 assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
169 #ifndef x86_64_TARGET_ARCH
170 iselExpr64 :: CmmExpr -> NatM ChildCode64
173 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
177 assignMem_I64Code addrTree valueTree = do
178 Amode addr addr_code <- getAmode addrTree
179 ChildCode64 vcode rlo <- iselExpr64 valueTree
181 rhi = getHiVRegFromLo rlo
183 -- Little-endian store
184 mov_lo = MOV I32 (OpReg rlo) (OpAddr addr)
185 mov_hi = MOV I32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
187 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
190 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
191 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
193 r_dst_lo = mkVReg u_dst I32
194 r_dst_hi = getHiVRegFromLo r_dst_lo
195 r_src_hi = getHiVRegFromLo r_src_lo
196 mov_lo = MOV I32 (OpReg r_src_lo) (OpReg r_dst_lo)
197 mov_hi = MOV I32 (OpReg r_src_hi) (OpReg r_dst_hi)
200 vcode `snocOL` mov_lo `snocOL` mov_hi
203 assignReg_I64Code lvalue valueTree
204 = panic "assignReg_I64Code(i386): invalid lvalue"
208 iselExpr64 (CmmLit (CmmInt i _)) = do
209 (rlo,rhi) <- getNewRegPairNat I32
211 r = fromIntegral (fromIntegral i :: Word32)
212 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
214 MOV I32 (OpImm (ImmInteger r)) (OpReg rlo),
215 MOV I32 (OpImm (ImmInteger q)) (OpReg rhi)
218 return (ChildCode64 code rlo)
220 iselExpr64 (CmmLoad addrTree I64) = do
221 Amode addr addr_code <- getAmode addrTree
222 (rlo,rhi) <- getNewRegPairNat I32
224 mov_lo = MOV I32 (OpAddr addr) (OpReg rlo)
225 mov_hi = MOV I32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
228 ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
232 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64)))
233 = return (ChildCode64 nilOL (mkVReg vu I32))
235 -- we handle addition, but rather badly
236 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
237 ChildCode64 code1 r1lo <- iselExpr64 e1
238 (rlo,rhi) <- getNewRegPairNat I32
240 r = fromIntegral (fromIntegral i :: Word32)
241 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
242 r1hi = getHiVRegFromLo r1lo
244 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
245 ADD I32 (OpImm (ImmInteger r)) (OpReg rlo),
246 MOV I32 (OpReg r1hi) (OpReg rhi),
247 ADC I32 (OpImm (ImmInteger q)) (OpReg rhi) ]
249 return (ChildCode64 code rlo)
251 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
252 ChildCode64 code1 r1lo <- iselExpr64 e1
253 ChildCode64 code2 r2lo <- iselExpr64 e2
254 (rlo,rhi) <- getNewRegPairNat I32
256 r1hi = getHiVRegFromLo r1lo
257 r2hi = getHiVRegFromLo r2lo
260 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
261 ADD I32 (OpReg r2lo) (OpReg rlo),
262 MOV I32 (OpReg r1hi) (OpReg rhi),
263 ADC I32 (OpReg r2hi) (OpReg rhi) ]
265 return (ChildCode64 code rlo)
268 = pprPanic "iselExpr64(i386)" (ppr expr)
270 #endif /* i386_TARGET_ARCH */
272 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
274 #if sparc_TARGET_ARCH
276 assignMem_I64Code addrTree valueTree = do
277 Amode addr addr_code <- getAmode addrTree
278 ChildCode64 vcode rlo <- iselExpr64 valueTree
279 (src, code) <- getSomeReg addrTree
281 rhi = getHiVRegFromLo rlo
283 mov_hi = ST I32 rhi (AddrRegImm src (ImmInt 0))
284 mov_lo = ST I32 rlo (AddrRegImm src (ImmInt 4))
285 return (vcode `appOL` code `snocOL` mov_hi `snocOL` mov_lo)
287 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
288 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
290 r_dst_lo = mkVReg u_dst pk
291 r_dst_hi = getHiVRegFromLo r_dst_lo
292 r_src_hi = getHiVRegFromLo r_src_lo
293 mov_lo = mkMOV r_src_lo r_dst_lo
294 mov_hi = mkMOV r_src_hi r_dst_hi
295 mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
296 return (vcode `snocOL` mov_hi `snocOL` mov_lo)
297 assignReg_I64Code lvalue valueTree
298 = panic "assignReg_I64Code(sparc): invalid lvalue"
301 -- Don't delete this -- it's very handy for debugging.
303 -- | trace ("iselExpr64: " ++ showSDoc (ppr expr)) False
304 -- = panic "iselExpr64(???)"
306 iselExpr64 (CmmLoad addrTree I64) = do
307 Amode (AddrRegReg r1 r2) addr_code <- getAmode addrTree
308 rlo <- getNewRegNat I32
309 let rhi = getHiVRegFromLo rlo
310 mov_hi = LD I32 (AddrRegImm r1 (ImmInt 0)) rhi
311 mov_lo = LD I32 (AddrRegImm r1 (ImmInt 4)) rlo
313 ChildCode64 (addr_code `snocOL` mov_hi `snocOL` mov_lo)
317 iselExpr64 (CmmReg (CmmLocal (LocalReg uq I64))) = do
318 r_dst_lo <- getNewRegNat I32
319 let r_dst_hi = getHiVRegFromLo r_dst_lo
320 r_src_lo = mkVReg uq I32
321 r_src_hi = getHiVRegFromLo r_src_lo
322 mov_lo = mkMOV r_src_lo r_dst_lo
323 mov_hi = mkMOV r_src_hi r_dst_hi
324 mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
326 ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo
330 = pprPanic "iselExpr64(sparc)" (ppr expr)
332 #endif /* sparc_TARGET_ARCH */
334 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
336 #if powerpc_TARGET_ARCH
338 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
339 getI64Amodes addrTree = do
340 Amode hi_addr addr_code <- getAmode addrTree
341 case addrOffset hi_addr 4 of
342 Just lo_addr -> return (hi_addr, lo_addr, addr_code)
343 Nothing -> do (hi_ptr, code) <- getSomeReg addrTree
344 return (AddrRegImm hi_ptr (ImmInt 0),
345 AddrRegImm hi_ptr (ImmInt 4),
348 assignMem_I64Code addrTree valueTree = do
349 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
350 ChildCode64 vcode rlo <- iselExpr64 valueTree
352 rhi = getHiVRegFromLo rlo
355 mov_hi = ST I32 rhi hi_addr
356 mov_lo = ST I32 rlo lo_addr
358 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
360 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
361 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
363 r_dst_lo = mkVReg u_dst I32
364 r_dst_hi = getHiVRegFromLo r_dst_lo
365 r_src_hi = getHiVRegFromLo r_src_lo
366 mov_lo = MR r_dst_lo r_src_lo
367 mov_hi = MR r_dst_hi r_src_hi
370 vcode `snocOL` mov_lo `snocOL` mov_hi
373 assignReg_I64Code lvalue valueTree
374 = panic "assignReg_I64Code(powerpc): invalid lvalue"
377 -- Don't delete this -- it's very handy for debugging.
379 -- | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
380 -- = panic "iselExpr64(???)"
382 iselExpr64 (CmmLoad addrTree I64) = do
383 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
384 (rlo, rhi) <- getNewRegPairNat I32
385 let mov_hi = LD I32 rhi hi_addr
386 mov_lo = LD I32 rlo lo_addr
387 return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
390 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64)))
391 = return (ChildCode64 nilOL (mkVReg vu I32))
393 iselExpr64 (CmmLit (CmmInt i _)) = do
394 (rlo,rhi) <- getNewRegPairNat I32
396 half0 = fromIntegral (fromIntegral i :: Word16)
397 half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
398 half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
399 half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
402 LIS rlo (ImmInt half1),
403 OR rlo rlo (RIImm $ ImmInt half0),
404 LIS rhi (ImmInt half3),
405 OR rlo rlo (RIImm $ ImmInt half2)
408 return (ChildCode64 code rlo)
410 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
411 ChildCode64 code1 r1lo <- iselExpr64 e1
412 ChildCode64 code2 r2lo <- iselExpr64 e2
413 (rlo,rhi) <- getNewRegPairNat I32
415 r1hi = getHiVRegFromLo r1lo
416 r2hi = getHiVRegFromLo r2lo
419 toOL [ ADDC rlo r1lo r2lo,
422 return (ChildCode64 code rlo)
425 = pprPanic "iselExpr64(powerpc)" (ppr expr)
427 #endif /* powerpc_TARGET_ARCH */
430 -- -----------------------------------------------------------------------------
431 -- The 'Register' type
433 -- 'Register's passed up the tree. If the stix code forces the register
434 -- to live in a pre-decided machine register, it comes out as @Fixed@;
435 -- otherwise, it comes out as @Any@, and the parent can decide which
436 -- register to put it in.
439 = Fixed MachRep Reg InstrBlock
440 | Any MachRep (Reg -> InstrBlock)
442 swizzleRegisterRep :: Register -> MachRep -> Register
443 swizzleRegisterRep (Fixed _ reg code) rep = Fixed rep reg code
444 swizzleRegisterRep (Any _ codefn) rep = Any rep codefn
447 -- -----------------------------------------------------------------------------
448 -- Utils based on getRegister, below
450 -- The dual to getAnyReg: compute an expression into a register, but
451 -- we don't mind which one it is.
452 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
454 r <- getRegister expr
457 tmp <- getNewRegNat rep
458 return (tmp, code tmp)
462 -- -----------------------------------------------------------------------------
463 -- Grab the Reg for a CmmReg
465 getRegisterReg :: CmmReg -> Reg
467 getRegisterReg (CmmLocal (LocalReg u pk))
470 getRegisterReg (CmmGlobal mid)
471 = case get_GlobalReg_reg_or_addr mid of
472 Left (RealReg rrno) -> RealReg rrno
473 _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
474 -- By this stage, the only MagicIds remaining should be the
475 -- ones which map to a real machine register on this
476 -- platform. Hence ...
479 -- -----------------------------------------------------------------------------
480 -- Generate code to get a subtree into a Register
482 -- Don't delete this -- it's very handy for debugging.
484 -- | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False
485 -- = panic "getRegister(???)"
487 getRegister :: CmmExpr -> NatM Register
489 #if !x86_64_TARGET_ARCH
490 -- on x86_64, we have %rip for PicBaseReg, but it's not a full-featured
491 -- register, it can only be used for rip-relative addressing.
492 getRegister (CmmReg (CmmGlobal PicBaseReg))
494 reg <- getPicBaseNat wordRep
495 return (Fixed wordRep reg nilOL)
498 getRegister (CmmReg reg)
499 = return (Fixed (cmmRegRep reg) (getRegisterReg reg) nilOL)
501 getRegister tree@(CmmRegOff _ _)
502 = getRegister (mangleIndexTree tree)
505 #if WORD_SIZE_IN_BITS==32
506 -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
507 -- TO_W_(x), TO_W_(x >> 32)
509 getRegister (CmmMachOp (MO_U_Conv I64 I32)
510 [CmmMachOp (MO_U_Shr I64) [x,CmmLit (CmmInt 32 _)]]) = do
511 ChildCode64 code rlo <- iselExpr64 x
512 return $ Fixed I32 (getHiVRegFromLo rlo) code
514 getRegister (CmmMachOp (MO_S_Conv I64 I32)
515 [CmmMachOp (MO_U_Shr I64) [x,CmmLit (CmmInt 32 _)]]) = do
516 ChildCode64 code rlo <- iselExpr64 x
517 return $ Fixed I32 (getHiVRegFromLo rlo) code
519 getRegister (CmmMachOp (MO_U_Conv I64 I32) [x]) = do
520 ChildCode64 code rlo <- iselExpr64 x
521 return $ Fixed I32 rlo code
523 getRegister (CmmMachOp (MO_S_Conv I64 I32) [x]) = do
524 ChildCode64 code rlo <- iselExpr64 x
525 return $ Fixed I32 rlo code
529 -- end of machine-"independent" bit; here we go on the rest...
531 #if alpha_TARGET_ARCH
533 getRegister (StDouble d)
534 = getBlockIdNat `thenNat` \ lbl ->
535 getNewRegNat PtrRep `thenNat` \ tmp ->
536 let code dst = mkSeqInstrs [
537 LDATA RoDataSegment lbl [
538 DATA TF [ImmLab (rational d)]
540 LDA tmp (AddrImm (ImmCLbl lbl)),
541 LD TF dst (AddrReg tmp)]
543 return (Any F64 code)
545 getRegister (StPrim primop [x]) -- unary PrimOps
547 IntNegOp -> trivialUCode (NEG Q False) x
549 NotOp -> trivialUCode NOT x
551 FloatNegOp -> trivialUFCode FloatRep (FNEG TF) x
552 DoubleNegOp -> trivialUFCode F64 (FNEG TF) x
554 OrdOp -> coerceIntCode IntRep x
557 Float2IntOp -> coerceFP2Int x
558 Int2FloatOp -> coerceInt2FP pr x
559 Double2IntOp -> coerceFP2Int x
560 Int2DoubleOp -> coerceInt2FP pr x
562 Double2FloatOp -> coerceFltCode x
563 Float2DoubleOp -> coerceFltCode x
565 other_op -> getRegister (StCall fn CCallConv F64 [x])
567 fn = case other_op of
568 FloatExpOp -> FSLIT("exp")
569 FloatLogOp -> FSLIT("log")
570 FloatSqrtOp -> FSLIT("sqrt")
571 FloatSinOp -> FSLIT("sin")
572 FloatCosOp -> FSLIT("cos")
573 FloatTanOp -> FSLIT("tan")
574 FloatAsinOp -> FSLIT("asin")
575 FloatAcosOp -> FSLIT("acos")
576 FloatAtanOp -> FSLIT("atan")
577 FloatSinhOp -> FSLIT("sinh")
578 FloatCoshOp -> FSLIT("cosh")
579 FloatTanhOp -> FSLIT("tanh")
580 DoubleExpOp -> FSLIT("exp")
581 DoubleLogOp -> FSLIT("log")
582 DoubleSqrtOp -> FSLIT("sqrt")
583 DoubleSinOp -> FSLIT("sin")
584 DoubleCosOp -> FSLIT("cos")
585 DoubleTanOp -> FSLIT("tan")
586 DoubleAsinOp -> FSLIT("asin")
587 DoubleAcosOp -> FSLIT("acos")
588 DoubleAtanOp -> FSLIT("atan")
589 DoubleSinhOp -> FSLIT("sinh")
590 DoubleCoshOp -> FSLIT("cosh")
591 DoubleTanhOp -> FSLIT("tanh")
593 pr = panic "MachCode.getRegister: no primrep needed for Alpha"
595 getRegister (StPrim primop [x, y]) -- dyadic PrimOps
597 CharGtOp -> trivialCode (CMP LTT) y x
598 CharGeOp -> trivialCode (CMP LE) y x
599 CharEqOp -> trivialCode (CMP EQQ) x y
600 CharNeOp -> int_NE_code x y
601 CharLtOp -> trivialCode (CMP LTT) x y
602 CharLeOp -> trivialCode (CMP LE) x y
604 IntGtOp -> trivialCode (CMP LTT) y x
605 IntGeOp -> trivialCode (CMP LE) y x
606 IntEqOp -> trivialCode (CMP EQQ) x y
607 IntNeOp -> int_NE_code x y
608 IntLtOp -> trivialCode (CMP LTT) x y
609 IntLeOp -> trivialCode (CMP LE) x y
611 WordGtOp -> trivialCode (CMP ULT) y x
612 WordGeOp -> trivialCode (CMP ULE) x y
613 WordEqOp -> trivialCode (CMP EQQ) x y
614 WordNeOp -> int_NE_code x y
615 WordLtOp -> trivialCode (CMP ULT) x y
616 WordLeOp -> trivialCode (CMP ULE) x y
618 AddrGtOp -> trivialCode (CMP ULT) y x
619 AddrGeOp -> trivialCode (CMP ULE) y x
620 AddrEqOp -> trivialCode (CMP EQQ) x y
621 AddrNeOp -> int_NE_code x y
622 AddrLtOp -> trivialCode (CMP ULT) x y
623 AddrLeOp -> trivialCode (CMP ULE) x y
625 FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
626 FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
627 FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
628 FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
629 FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
630 FloatLeOp -> cmpF_code (FCMP TF LE) NE x y
632 DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
633 DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
634 DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
635 DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
636 DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
637 DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y
639 IntAddOp -> trivialCode (ADD Q False) x y
640 IntSubOp -> trivialCode (SUB Q False) x y
641 IntMulOp -> trivialCode (MUL Q False) x y
642 IntQuotOp -> trivialCode (DIV Q False) x y
643 IntRemOp -> trivialCode (REM Q False) x y
645 WordAddOp -> trivialCode (ADD Q False) x y
646 WordSubOp -> trivialCode (SUB Q False) x y
647 WordMulOp -> trivialCode (MUL Q False) x y
648 WordQuotOp -> trivialCode (DIV Q True) x y
649 WordRemOp -> trivialCode (REM Q True) x y
651 FloatAddOp -> trivialFCode FloatRep (FADD TF) x y
652 FloatSubOp -> trivialFCode FloatRep (FSUB TF) x y
653 FloatMulOp -> trivialFCode FloatRep (FMUL TF) x y
654 FloatDivOp -> trivialFCode FloatRep (FDIV TF) x y
656 DoubleAddOp -> trivialFCode F64 (FADD TF) x y
657 DoubleSubOp -> trivialFCode F64 (FSUB TF) x y
658 DoubleMulOp -> trivialFCode F64 (FMUL TF) x y
659 DoubleDivOp -> trivialFCode F64 (FDIV TF) x y
661 AddrAddOp -> trivialCode (ADD Q False) x y
662 AddrSubOp -> trivialCode (SUB Q False) x y
663 AddrRemOp -> trivialCode (REM Q True) x y
665 AndOp -> trivialCode AND x y
666 OrOp -> trivialCode OR x y
667 XorOp -> trivialCode XOR x y
668 SllOp -> trivialCode SLL x y
669 SrlOp -> trivialCode SRL x y
671 ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
672 ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
673 ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"
675 FloatPowerOp -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
676 DoublePowerOp -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
678 {- ------------------------------------------------------------
679 Some bizarre special code for getting condition codes into
680 registers. Integer non-equality is a test for equality
681 followed by an XOR with 1. (Integer comparisons always set
682 the result register to 0 or 1.) Floating point comparisons of
683 any kind leave the result in a floating point register, so we
684 need to wrangle an integer register out of things.
686 int_NE_code :: StixTree -> StixTree -> NatM Register
689 = trivialCode (CMP EQQ) x y `thenNat` \ register ->
690 getNewRegNat IntRep `thenNat` \ tmp ->
692 code = registerCode register tmp
693 src = registerName register tmp
694 code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
696 return (Any IntRep code__2)
698 {- ------------------------------------------------------------
699 Comments for int_NE_code also apply to cmpF_code
702 :: (Reg -> Reg -> Reg -> Instr)
704 -> StixTree -> StixTree
707 cmpF_code instr cond x y
708 = trivialFCode pr instr x y `thenNat` \ register ->
709 getNewRegNat F64 `thenNat` \ tmp ->
710 getBlockIdNat `thenNat` \ lbl ->
712 code = registerCode register tmp
713 result = registerName register tmp
715 code__2 dst = code . mkSeqInstrs [
716 OR zeroh (RIImm (ImmInt 1)) dst,
717 BF cond result (ImmCLbl lbl),
718 OR zeroh (RIReg zeroh) dst,
721 return (Any IntRep code__2)
723 pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
724 ------------------------------------------------------------
726 getRegister (CmmLoad pk mem)
727 = getAmode mem `thenNat` \ amode ->
729 code = amodeCode amode
730 src = amodeAddr amode
731 size = primRepToSize pk
732 code__2 dst = code . mkSeqInstr (LD size dst src)
734 return (Any pk code__2)
736 getRegister (StInt i)
739 code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
741 return (Any IntRep code)
744 code dst = mkSeqInstr (LDI Q dst src)
746 return (Any IntRep code)
748 src = ImmInt (fromInteger i)
753 code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
755 return (Any PtrRep code)
758 imm__2 = case imm of Just x -> x
760 #endif /* alpha_TARGET_ARCH */
762 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
766 getRegister (CmmLit (CmmFloat f F32)) = do
767 lbl <- getNewLabelNat
768 dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
769 Amode addr addr_code <- getAmode dynRef
773 CmmStaticLit (CmmFloat f F32)]
774 `consOL` (addr_code `snocOL`
777 return (Any F32 code)
780 getRegister (CmmLit (CmmFloat d F64))
782 = let code dst = unitOL (GLDZ dst)
783 in return (Any F64 code)
786 = let code dst = unitOL (GLD1 dst)
787 in return (Any F64 code)
790 lbl <- getNewLabelNat
791 dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
792 Amode addr addr_code <- getAmode dynRef
796 CmmStaticLit (CmmFloat d F64)]
797 `consOL` (addr_code `snocOL`
800 return (Any F64 code)
802 #endif /* i386_TARGET_ARCH */
804 #if x86_64_TARGET_ARCH
806 getRegister (CmmLit (CmmFloat 0.0 rep)) = do
807 let code dst = unitOL (XOR rep (OpReg dst) (OpReg dst))
808 -- I don't know why there are xorpd, xorps, and pxor instructions.
809 -- They all appear to do the same thing --SDM
810 return (Any rep code)
812 getRegister (CmmLit (CmmFloat f rep)) = do
813 lbl <- getNewLabelNat
814 let code dst = toOL [
817 CmmStaticLit (CmmFloat f rep)],
818 MOV rep (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
821 return (Any rep code)
823 #endif /* x86_64_TARGET_ARCH */
825 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
827 -- catch simple cases of zero- or sign-extended load
828 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad addr _]) = do
829 code <- intLoadCode (MOVZxL I8) addr
830 return (Any I32 code)
832 getRegister (CmmMachOp (MO_S_Conv I8 I32) [CmmLoad addr _]) = do
833 code <- intLoadCode (MOVSxL I8) addr
834 return (Any I32 code)
836 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad addr _]) = do
837 code <- intLoadCode (MOVZxL I16) addr
838 return (Any I32 code)
840 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad addr _]) = do
841 code <- intLoadCode (MOVSxL I16) addr
842 return (Any I32 code)
846 #if x86_64_TARGET_ARCH
848 -- catch simple cases of zero- or sign-extended load
849 getRegister (CmmMachOp (MO_U_Conv I8 I64) [CmmLoad addr _]) = do
850 code <- intLoadCode (MOVZxL I8) addr
851 return (Any I64 code)
853 getRegister (CmmMachOp (MO_S_Conv I8 I64) [CmmLoad addr _]) = do
854 code <- intLoadCode (MOVSxL I8) addr
855 return (Any I64 code)
857 getRegister (CmmMachOp (MO_U_Conv I16 I64) [CmmLoad addr _]) = do
858 code <- intLoadCode (MOVZxL I16) addr
859 return (Any I64 code)
861 getRegister (CmmMachOp (MO_S_Conv I16 I64) [CmmLoad addr _]) = do
862 code <- intLoadCode (MOVSxL I16) addr
863 return (Any I64 code)
865 getRegister (CmmMachOp (MO_U_Conv I32 I64) [CmmLoad addr _]) = do
866 code <- intLoadCode (MOV I32) addr -- 32-bit loads zero-extend
867 return (Any I64 code)
869 getRegister (CmmMachOp (MO_S_Conv I32 I64) [CmmLoad addr _]) = do
870 code <- intLoadCode (MOVSxL I32) addr
871 return (Any I64 code)
875 #if x86_64_TARGET_ARCH
876 getRegister (CmmMachOp (MO_Add I64) [CmmReg (CmmGlobal PicBaseReg),
877 CmmLit displacement])
878 = return $ Any I64 (\dst -> unitOL $
879 LEA I64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
882 #if x86_64_TARGET_ARCH
883 getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do
884 x_code <- getAnyReg x
885 lbl <- getNewLabelNat
887 code dst = x_code dst `appOL` toOL [
888 -- This is how gcc does it, so it can't be that bad:
889 LDATA ReadOnlyData16 [
892 CmmStaticLit (CmmInt 0x80000000 I32),
893 CmmStaticLit (CmmInt 0 I32),
894 CmmStaticLit (CmmInt 0 I32),
895 CmmStaticLit (CmmInt 0 I32)
897 XOR F32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
898 -- xorps, so we need the 128-bit constant
899 -- ToDo: rip-relative
902 return (Any F32 code)
904 getRegister (CmmMachOp (MO_S_Neg F64) [x]) = do
905 x_code <- getAnyReg x
906 lbl <- getNewLabelNat
908 -- This is how gcc does it, so it can't be that bad:
909 code dst = x_code dst `appOL` toOL [
910 LDATA ReadOnlyData16 [
913 CmmStaticLit (CmmInt 0x8000000000000000 I64),
914 CmmStaticLit (CmmInt 0 I64)
916 -- gcc puts an unpck here. Wonder if we need it.
917 XOR F64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
918 -- xorpd, so we need the 128-bit constant
921 return (Any F64 code)
924 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
926 getRegister (CmmMachOp mop [x]) -- unary MachOps
929 MO_S_Neg F32 -> trivialUFCode F32 (GNEG F32) x
930 MO_S_Neg F64 -> trivialUFCode F64 (GNEG F64) x
933 MO_S_Neg rep -> trivialUCode rep (NEGI rep) x
934 MO_Not rep -> trivialUCode rep (NOT rep) x
937 MO_U_Conv I32 I8 -> toI8Reg I32 x
938 MO_S_Conv I32 I8 -> toI8Reg I32 x
939 MO_U_Conv I16 I8 -> toI8Reg I16 x
940 MO_S_Conv I16 I8 -> toI8Reg I16 x
941 MO_U_Conv I32 I16 -> toI16Reg I32 x
942 MO_S_Conv I32 I16 -> toI16Reg I32 x
943 #if x86_64_TARGET_ARCH
944 MO_U_Conv I64 I32 -> conversionNop I64 x
945 MO_S_Conv I64 I32 -> conversionNop I64 x
946 MO_U_Conv I64 I16 -> toI16Reg I64 x
947 MO_S_Conv I64 I16 -> toI16Reg I64 x
948 MO_U_Conv I64 I8 -> toI8Reg I64 x
949 MO_S_Conv I64 I8 -> toI8Reg I64 x
952 MO_U_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
953 MO_S_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
956 MO_U_Conv I8 I32 -> integerExtend I8 I32 MOVZxL x
957 MO_U_Conv I16 I32 -> integerExtend I16 I32 MOVZxL x
958 MO_U_Conv I8 I16 -> integerExtend I8 I16 MOVZxL x
960 MO_S_Conv I8 I32 -> integerExtend I8 I32 MOVSxL x
961 MO_S_Conv I16 I32 -> integerExtend I16 I32 MOVSxL x
962 MO_S_Conv I8 I16 -> integerExtend I8 I16 MOVSxL x
964 #if x86_64_TARGET_ARCH
965 MO_U_Conv I8 I64 -> integerExtend I8 I64 MOVZxL x
966 MO_U_Conv I16 I64 -> integerExtend I16 I64 MOVZxL x
967 MO_U_Conv I32 I64 -> integerExtend I32 I64 MOVZxL x
968 MO_S_Conv I8 I64 -> integerExtend I8 I64 MOVSxL x
969 MO_S_Conv I16 I64 -> integerExtend I16 I64 MOVSxL x
970 MO_S_Conv I32 I64 -> integerExtend I32 I64 MOVSxL x
971 -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
972 -- However, we don't want the register allocator to throw it
973 -- away as an unnecessary reg-to-reg move, so we keep it in
974 -- the form of a movzl and print it as a movl later.
978 MO_S_Conv F32 F64 -> conversionNop F64 x
979 MO_S_Conv F64 F32 -> conversionNop F32 x
981 MO_S_Conv F32 F64 -> coerceFP2FP F64 x
982 MO_S_Conv F64 F32 -> coerceFP2FP F32 x
986 | isFloatingRep from -> coerceFP2Int from to x
987 | isFloatingRep to -> coerceInt2FP from to x
989 other -> pprPanic "getRegister" (pprMachOp mop)
991 -- signed or unsigned extension.
992 integerExtend from to instr expr = do
993 (reg,e_code) <- if from == I8 then getByteReg expr
998 instr from (OpReg reg) (OpReg dst)
1001 toI8Reg new_rep expr
1002 = do codefn <- getAnyReg expr
1003 return (Any new_rep codefn)
1004 -- HACK: use getAnyReg to get a byte-addressable register.
1005 -- If the source was a Fixed register, this will add the
1006 -- mov instruction to put it into the desired destination.
1007 -- We're assuming that the destination won't be a fixed
1008 -- non-byte-addressable register; it won't be, because all
1009 -- fixed registers are word-sized.
1011 toI16Reg = toI8Reg -- for now
1013 conversionNop new_rep expr
1014 = do e_code <- getRegister expr
1015 return (swizzleRegisterRep e_code new_rep)
1018 getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
1019 = ASSERT2(cmmExprRep x /= I8, pprExpr e)
1021 MO_Eq F32 -> condFltReg EQQ x y
1022 MO_Ne F32 -> condFltReg NE x y
1023 MO_S_Gt F32 -> condFltReg GTT x y
1024 MO_S_Ge F32 -> condFltReg GE x y
1025 MO_S_Lt F32 -> condFltReg LTT x y
1026 MO_S_Le F32 -> condFltReg LE x y
1028 MO_Eq F64 -> condFltReg EQQ x y
1029 MO_Ne F64 -> condFltReg NE x y
1030 MO_S_Gt F64 -> condFltReg GTT x y
1031 MO_S_Ge F64 -> condFltReg GE x y
1032 MO_S_Lt F64 -> condFltReg LTT x y
1033 MO_S_Le F64 -> condFltReg LE x y
1035 MO_Eq rep -> condIntReg EQQ x y
1036 MO_Ne rep -> condIntReg NE x y
1038 MO_S_Gt rep -> condIntReg GTT x y
1039 MO_S_Ge rep -> condIntReg GE x y
1040 MO_S_Lt rep -> condIntReg LTT x y
1041 MO_S_Le rep -> condIntReg LE x y
1043 MO_U_Gt rep -> condIntReg GU x y
1044 MO_U_Ge rep -> condIntReg GEU x y
1045 MO_U_Lt rep -> condIntReg LU x y
1046 MO_U_Le rep -> condIntReg LEU x y
1048 #if i386_TARGET_ARCH
1049 MO_Add F32 -> trivialFCode F32 GADD x y
1050 MO_Sub F32 -> trivialFCode F32 GSUB x y
1052 MO_Add F64 -> trivialFCode F64 GADD x y
1053 MO_Sub F64 -> trivialFCode F64 GSUB x y
1055 MO_S_Quot F32 -> trivialFCode F32 GDIV x y
1056 MO_S_Quot F64 -> trivialFCode F64 GDIV x y
1059 #if x86_64_TARGET_ARCH
1060 MO_Add F32 -> trivialFCode F32 ADD x y
1061 MO_Sub F32 -> trivialFCode F32 SUB x y
1063 MO_Add F64 -> trivialFCode F64 ADD x y
1064 MO_Sub F64 -> trivialFCode F64 SUB x y
1066 MO_S_Quot F32 -> trivialFCode F32 FDIV x y
1067 MO_S_Quot F64 -> trivialFCode F64 FDIV x y
1070 MO_Add rep -> add_code rep x y
1071 MO_Sub rep -> sub_code rep x y
1073 MO_S_Quot rep -> div_code rep True True x y
1074 MO_S_Rem rep -> div_code rep True False x y
1075 MO_U_Quot rep -> div_code rep False True x y
1076 MO_U_Rem rep -> div_code rep False False x y
1078 #if i386_TARGET_ARCH
1079 MO_Mul F32 -> trivialFCode F32 GMUL x y
1080 MO_Mul F64 -> trivialFCode F64 GMUL x y
1083 #if x86_64_TARGET_ARCH
1084 MO_Mul F32 -> trivialFCode F32 MUL x y
1085 MO_Mul F64 -> trivialFCode F64 MUL x y
1088 MO_Mul rep -> let op = IMUL rep in
1089 trivialCode rep op (Just op) x y
1091 MO_S_MulMayOflo rep -> imulMayOflo rep x y
1093 MO_And rep -> let op = AND rep in
1094 trivialCode rep op (Just op) x y
1095 MO_Or rep -> let op = OR rep in
1096 trivialCode rep op (Just op) x y
1097 MO_Xor rep -> let op = XOR rep in
1098 trivialCode rep op (Just op) x y
1100 {- Shift ops on x86s have constraints on their source, it
1101 either has to be Imm, CL or 1
1102 => trivialCode is not restrictive enough (sigh.)
1104 MO_Shl rep -> shift_code rep (SHL rep) x y {-False-}
1105 MO_U_Shr rep -> shift_code rep (SHR rep) x y {-False-}
1106 MO_S_Shr rep -> shift_code rep (SAR rep) x y {-False-}
1108 other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
1110 --------------------
1111 imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1112 imulMayOflo rep a b = do
1113 (a_reg, a_code) <- getNonClobberedReg a
1114 b_code <- getAnyReg b
1116 shift_amt = case rep of
1119 _ -> panic "shift_amt"
1121 code = a_code `appOL` b_code eax `appOL`
1123 IMUL2 rep (OpReg a_reg), -- result in %edx:%eax
1124 SAR rep (OpImm (ImmInt shift_amt)) (OpReg eax),
1125 -- sign extend lower part
1126 SUB rep (OpReg edx) (OpReg eax)
1127 -- compare against upper
1128 -- eax==0 if high part == sign extended low part
1131 return (Fixed rep eax code)
1133 --------------------
1134 shift_code :: MachRep
1135 -> (Operand -> Operand -> Instr)
1140 {- Case1: shift length as immediate -}
1141 shift_code rep instr x y@(CmmLit lit) = do
1142 x_code <- getAnyReg x
1145 = x_code dst `snocOL`
1146 instr (OpImm (litToImm lit)) (OpReg dst)
1148 return (Any rep code)
1150 {- Case2: shift length is complex (non-immediate) -}
1151 shift_code rep instr x y{-amount-} = do
1152 (x_reg, x_code) <- getNonClobberedReg x
1153 y_code <- getAnyReg y
1155 code = x_code `appOL`
1157 instr (OpReg ecx) (OpReg x_reg)
1159 return (Fixed rep x_reg code)
1161 --------------------
1162 add_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1163 add_code rep x (CmmLit (CmmInt y _))
1164 | not (is64BitInteger y) = add_int rep x y
1165 add_code rep x y = trivialCode rep (ADD rep) (Just (ADD rep)) x y
1167 --------------------
1168 sub_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1169 sub_code rep x (CmmLit (CmmInt y _))
1170 | not (is64BitInteger (-y)) = add_int rep x (-y)
1171 sub_code rep x y = trivialCode rep (SUB rep) Nothing x y
1173 -- our three-operand add instruction:
1174 add_int rep x y = do
1175 (x_reg, x_code) <- getSomeReg x
1177 imm = ImmInt (fromInteger y)
1181 (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
1184 return (Any rep code)
1186 ----------------------
1187 div_code rep signed quotient x y = do
1188 (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
1189 x_code <- getAnyReg x
1191 widen | signed = CLTD rep
1192 | otherwise = XOR rep (OpReg edx) (OpReg edx)
1194 instr | signed = IDIV
1197 code = y_code `appOL`
1199 toOL [widen, instr rep y_op]
1201 result | quotient = eax
1205 return (Fixed rep result code)
1208 getRegister (CmmLoad mem pk)
1211 Amode src mem_code <- getAmode mem
1213 code dst = mem_code `snocOL`
1214 IF_ARCH_i386(GLD pk src dst,
1215 MOV pk (OpAddr src) (OpReg dst))
1217 return (Any pk code)
1219 #if i386_TARGET_ARCH
1220 getRegister (CmmLoad mem pk)
1223 code <- intLoadCode (instr pk) mem
1224 return (Any pk code)
1226 instr I8 = MOVZxL pk
1229 -- we always zero-extend 8-bit loads, if we
1230 -- can't think of anything better. This is because
1231 -- we can't guarantee access to an 8-bit variant of every register
1232 -- (esi and edi don't have 8-bit variants), so to make things
1233 -- simpler we do our 8-bit arithmetic with full 32-bit registers.
1236 #if x86_64_TARGET_ARCH
1237 -- Simpler memory load code on x86_64
1238 getRegister (CmmLoad mem pk)
1240 code <- intLoadCode (MOV pk) mem
1241 return (Any pk code)
1244 getRegister (CmmLit (CmmInt 0 rep))
1246 -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
1247 adj_rep = case rep of I64 -> I32; _ -> rep
1248 rep1 = IF_ARCH_i386( rep, adj_rep )
1250 = unitOL (XOR rep1 (OpReg dst) (OpReg dst))
1252 return (Any rep code)
1254 #if x86_64_TARGET_ARCH
1255 -- optimisation for loading small literals on x86_64: take advantage
1256 -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
1257 -- instruction forms are shorter.
1258 getRegister (CmmLit lit)
1259 | I64 <- cmmLitRep lit, not (isBigLit lit)
1262 code dst = unitOL (MOV I32 (OpImm imm) (OpReg dst))
1264 return (Any I64 code)
1266 isBigLit (CmmInt i I64) = i < 0 || i > 0xffffffff
1268 -- note1: not the same as is64BitLit, because that checks for
1269 -- signed literals that fit in 32 bits, but we want unsigned
1271 -- note2: all labels are small, because we're assuming the
1272 -- small memory model (see gcc docs, -mcmodel=small).
1275 getRegister (CmmLit lit)
1279 code dst = unitOL (MOV rep (OpImm imm) (OpReg dst))
1281 return (Any rep code)
1283 getRegister other = pprPanic "getRegister(x86)" (ppr other)
1286 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
1287 -> NatM (Reg -> InstrBlock)
1288 intLoadCode instr mem = do
1289 Amode src mem_code <- getAmode mem
1290 return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
1292 -- Compute an expression into *any* register, adding the appropriate
1293 -- move instruction if necessary.
1294 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
1296 r <- getRegister expr
1299 anyReg :: Register -> NatM (Reg -> InstrBlock)
1300 anyReg (Any _ code) = return code
1301 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
1303 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
1304 -- Fixed registers might not be byte-addressable, so we make sure we've
1305 -- got a temporary, inserting an extra reg copy if necessary.
1306 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
1307 #if x86_64_TARGET_ARCH
1308 getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
1310 getByteReg expr = do
1311 r <- getRegister expr
1314 tmp <- getNewRegNat rep
1315 return (tmp, code tmp)
1317 | isVirtualReg reg -> return (reg,code)
1319 tmp <- getNewRegNat rep
1320 return (tmp, code `snocOL` reg2reg rep reg tmp)
1321 -- ToDo: could optimise slightly by checking for byte-addressable
1322 -- real registers, but that will happen very rarely if at all.
1325 -- Another variant: this time we want the result in a register that cannot
1326 -- be modified by code to evaluate an arbitrary expression.
1327 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1328 getNonClobberedReg expr = do
1329 r <- getRegister expr
1332 tmp <- getNewRegNat rep
1333 return (tmp, code tmp)
1335 -- only free regs can be clobbered
1336 | RealReg rr <- reg, isFastTrue (freeReg rr) -> do
1337 tmp <- getNewRegNat rep
1338 return (tmp, code `snocOL` reg2reg rep reg tmp)
1342 reg2reg :: MachRep -> Reg -> Reg -> Instr
1344 #if i386_TARGET_ARCH
1345 | isFloatingRep rep = GMOV src dst
1347 | otherwise = MOV rep (OpReg src) (OpReg dst)
1349 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1351 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1353 #if sparc_TARGET_ARCH
1355 getRegister (CmmLit (CmmFloat f F32)) = do
1356 lbl <- getNewLabelNat
1357 let code dst = toOL [
1360 CmmStaticLit (CmmFloat f F32)],
1361 SETHI (HI (ImmCLbl lbl)) dst,
1362 LD F32 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1363 return (Any F32 code)
1365 getRegister (CmmLit (CmmFloat d F64)) = do
1366 lbl <- getNewLabelNat
1367 let code dst = toOL [
1370 CmmStaticLit (CmmFloat d F64)],
1371 SETHI (HI (ImmCLbl lbl)) dst,
1372 LD F64 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1373 return (Any F64 code)
1375 getRegister (CmmMachOp mop [x]) -- unary MachOps
1377 MO_S_Neg F32 -> trivialUFCode F32 (FNEG F32) x
1378 MO_S_Neg F64 -> trivialUFCode F64 (FNEG F64) x
1380 MO_S_Neg rep -> trivialUCode rep (SUB False False g0) x
1381 MO_Not rep -> trivialUCode rep (XNOR False g0) x
1383 MO_U_Conv I32 I8 -> trivialCode I8 (AND False) x (CmmLit (CmmInt 255 I8))
1385 MO_U_Conv F64 F32-> coerceDbl2Flt x
1386 MO_U_Conv F32 F64-> coerceFlt2Dbl x
1388 MO_S_Conv F32 I32-> coerceFP2Int F32 I32 x
1389 MO_S_Conv I32 F32-> coerceInt2FP I32 F32 x
1390 MO_S_Conv F64 I32-> coerceFP2Int F64 I32 x
1391 MO_S_Conv I32 F64-> coerceInt2FP I32 F64 x
1393 -- Conversions which are a nop on sparc
1395 | from == to -> conversionNop to x
1396 MO_U_Conv I32 to -> conversionNop to x
1397 MO_S_Conv I32 to -> conversionNop to x
1400 MO_U_Conv I8 I32 -> integerExtend False I8 I32 x
1401 MO_U_Conv I16 I32 -> integerExtend False I16 I32 x
1402 MO_U_Conv I8 I16 -> integerExtend False I8 I16 x
1403 MO_S_Conv I16 I32 -> integerExtend True I16 I32 x
1405 other_op -> panic "Unknown unary mach op"
1408 integerExtend signed from to expr = do
1409 (reg, e_code) <- getSomeReg expr
1413 ((if signed then SRA else SRL)
1414 reg (RIImm (ImmInt 0)) dst)
1415 return (Any to code)
1416 conversionNop new_rep expr
1417 = do e_code <- getRegister expr
1418 return (swizzleRegisterRep e_code new_rep)
1420 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1422 MO_Eq F32 -> condFltReg EQQ x y
1423 MO_Ne F32 -> condFltReg NE x y
1425 MO_S_Gt F32 -> condFltReg GTT x y
1426 MO_S_Ge F32 -> condFltReg GE x y
1427 MO_S_Lt F32 -> condFltReg LTT x y
1428 MO_S_Le F32 -> condFltReg LE x y
1430 MO_Eq F64 -> condFltReg EQQ x y
1431 MO_Ne F64 -> condFltReg NE x y
1433 MO_S_Gt F64 -> condFltReg GTT x y
1434 MO_S_Ge F64 -> condFltReg GE x y
1435 MO_S_Lt F64 -> condFltReg LTT x y
1436 MO_S_Le F64 -> condFltReg LE x y
1438 MO_Eq rep -> condIntReg EQQ x y
1439 MO_Ne rep -> condIntReg NE x y
1441 MO_S_Gt rep -> condIntReg GTT x y
1442 MO_S_Ge rep -> condIntReg GE x y
1443 MO_S_Lt rep -> condIntReg LTT x y
1444 MO_S_Le rep -> condIntReg LE x y
1446 MO_U_Gt I32 -> condIntReg GTT x y
1447 MO_U_Ge I32 -> condIntReg GE x y
1448 MO_U_Lt I32 -> condIntReg LTT x y
1449 MO_U_Le I32 -> condIntReg LE x y
1451 MO_U_Gt I16 -> condIntReg GU x y
1452 MO_U_Ge I16 -> condIntReg GEU x y
1453 MO_U_Lt I16 -> condIntReg LU x y
1454 MO_U_Le I16 -> condIntReg LEU x y
1456 MO_Add I32 -> trivialCode I32 (ADD False False) x y
1457 MO_Sub I32 -> trivialCode I32 (SUB False False) x y
1459 MO_S_MulMayOflo rep -> imulMayOflo rep x y
1461 -- ToDo: teach about V8+ SPARC div instructions
1462 MO_S_Quot I32 -> idiv FSLIT(".div") x y
1463 MO_S_Rem I32 -> idiv FSLIT(".rem") x y
1464 MO_U_Quot I32 -> idiv FSLIT(".udiv") x y
1465 MO_U_Rem I32 -> idiv FSLIT(".urem") x y
1467 MO_Add F32 -> trivialFCode F32 FADD x y
1468 MO_Sub F32 -> trivialFCode F32 FSUB x y
1469 MO_Mul F32 -> trivialFCode F32 FMUL x y
1470 MO_S_Quot F32 -> trivialFCode F32 FDIV x y
1472 MO_Add F64 -> trivialFCode F64 FADD x y
1473 MO_Sub F64 -> trivialFCode F64 FSUB x y
1474 MO_Mul F64 -> trivialFCode F64 FMUL x y
1475 MO_S_Quot F64 -> trivialFCode F64 FDIV x y
1477 MO_And rep -> trivialCode rep (AND False) x y
1478 MO_Or rep -> trivialCode rep (OR False) x y
1479 MO_Xor rep -> trivialCode rep (XOR False) x y
1481 MO_Mul rep -> trivialCode rep (SMUL False) x y
1483 MO_Shl rep -> trivialCode rep SLL x y
1484 MO_U_Shr rep -> trivialCode rep SRL x y
1485 MO_S_Shr rep -> trivialCode rep SRA x y
1488 MO_F32_Pwr -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1489 [promote x, promote y])
1490 where promote x = CmmMachOp MO_F32_to_Dbl [x]
1491 MO_F64_Pwr -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1494 other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
1496 --idiv fn x y = getRegister (StCall (Left fn) CCallConv I32 [x, y])
1498 --------------------
1499 imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1500 imulMayOflo rep a b = do
1501 (a_reg, a_code) <- getSomeReg a
1502 (b_reg, b_code) <- getSomeReg b
1503 res_lo <- getNewRegNat I32
1504 res_hi <- getNewRegNat I32
1506 shift_amt = case rep of
1509 _ -> panic "shift_amt"
1510 code dst = a_code `appOL` b_code `appOL`
1512 SMUL False a_reg (RIReg b_reg) res_lo,
1514 SRA res_lo (RIImm (ImmInt shift_amt)) res_lo,
1515 SUB False False res_lo (RIReg res_hi) dst
1517 return (Any I32 code)
1519 getRegister (CmmLoad mem pk) = do
1520 Amode src code <- getAmode mem
1522 code__2 dst = code `snocOL` LD pk src dst
1523 return (Any pk code__2)
1525 getRegister (CmmLit (CmmInt i _))
1528 src = ImmInt (fromInteger i)
1529 code dst = unitOL (OR False g0 (RIImm src) dst)
1531 return (Any I32 code)
1533 getRegister (CmmLit lit)
1534 = let rep = cmmLitRep lit
1538 OR False dst (RIImm (LO imm)) dst]
1539 in return (Any I32 code)
1541 #endif /* sparc_TARGET_ARCH */
1543 #if powerpc_TARGET_ARCH
1544 getRegister (CmmLoad mem pk)
1547 Amode addr addr_code <- getAmode mem
1548 let code dst = ASSERT((regClass dst == RcDouble) == isFloatingRep pk)
1549 addr_code `snocOL` LD pk dst addr
1550 return (Any pk code)
1552 -- catch simple cases of zero- or sign-extended load
1553 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad mem _]) = do
1554 Amode addr addr_code <- getAmode mem
1555 return (Any I32 (\dst -> addr_code `snocOL` LD I8 dst addr))
1557 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
1559 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad mem _]) = do
1560 Amode addr addr_code <- getAmode mem
1561 return (Any I32 (\dst -> addr_code `snocOL` LD I16 dst addr))
1563 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad mem _]) = do
1564 Amode addr addr_code <- getAmode mem
1565 return (Any I32 (\dst -> addr_code `snocOL` LA I16 dst addr))
1567 getRegister (CmmMachOp mop [x]) -- unary MachOps
1569 MO_Not rep -> trivialUCode rep NOT x
1571 MO_S_Conv F64 F32 -> trivialUCode F32 FRSP x
1572 MO_S_Conv F32 F64 -> conversionNop F64 x
1575 | from == to -> conversionNop to x
1576 | isFloatingRep from -> coerceFP2Int from to x
1577 | isFloatingRep to -> coerceInt2FP from to x
1579 -- narrowing is a nop: we treat the high bits as undefined
1580 MO_S_Conv I32 to -> conversionNop to x
1581 MO_S_Conv I16 I8 -> conversionNop I8 x
1582 MO_S_Conv I8 to -> trivialUCode to (EXTS I8) x
1583 MO_S_Conv I16 to -> trivialUCode to (EXTS I16) x
1586 | from == to -> conversionNop to x
1587 -- narrowing is a nop: we treat the high bits as undefined
1588 MO_U_Conv I32 to -> conversionNop to x
1589 MO_U_Conv I16 I8 -> conversionNop I8 x
1590 MO_U_Conv I8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 I32))
1591 MO_U_Conv I16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 I32))
1593 MO_S_Neg F32 -> trivialUCode F32 FNEG x
1594 MO_S_Neg F64 -> trivialUCode F64 FNEG x
1595 MO_S_Neg rep -> trivialUCode rep NEG x
1598 conversionNop new_rep expr
1599 = do e_code <- getRegister expr
1600 return (swizzleRegisterRep e_code new_rep)
1602 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1604 MO_Eq F32 -> condFltReg EQQ x y
1605 MO_Ne F32 -> condFltReg NE x y
1607 MO_S_Gt F32 -> condFltReg GTT x y
1608 MO_S_Ge F32 -> condFltReg GE x y
1609 MO_S_Lt F32 -> condFltReg LTT x y
1610 MO_S_Le F32 -> condFltReg LE x y
1612 MO_Eq F64 -> condFltReg EQQ x y
1613 MO_Ne F64 -> condFltReg NE x y
1615 MO_S_Gt F64 -> condFltReg GTT x y
1616 MO_S_Ge F64 -> condFltReg GE x y
1617 MO_S_Lt F64 -> condFltReg LTT x y
1618 MO_S_Le F64 -> condFltReg LE x y
1620 MO_Eq rep -> condIntReg EQQ (extendUExpr rep x) (extendUExpr rep y)
1621 MO_Ne rep -> condIntReg NE (extendUExpr rep x) (extendUExpr rep y)
1623 MO_S_Gt rep -> condIntReg GTT (extendSExpr rep x) (extendSExpr rep y)
1624 MO_S_Ge rep -> condIntReg GE (extendSExpr rep x) (extendSExpr rep y)
1625 MO_S_Lt rep -> condIntReg LTT (extendSExpr rep x) (extendSExpr rep y)
1626 MO_S_Le rep -> condIntReg LE (extendSExpr rep x) (extendSExpr rep y)
1628 MO_U_Gt rep -> condIntReg GU (extendUExpr rep x) (extendUExpr rep y)
1629 MO_U_Ge rep -> condIntReg GEU (extendUExpr rep x) (extendUExpr rep y)
1630 MO_U_Lt rep -> condIntReg LU (extendUExpr rep x) (extendUExpr rep y)
1631 MO_U_Le rep -> condIntReg LEU (extendUExpr rep x) (extendUExpr rep y)
1633 MO_Add F32 -> trivialCodeNoImm F32 (FADD F32) x y
1634 MO_Sub F32 -> trivialCodeNoImm F32 (FSUB F32) x y
1635 MO_Mul F32 -> trivialCodeNoImm F32 (FMUL F32) x y
1636 MO_S_Quot F32 -> trivialCodeNoImm F32 (FDIV F32) x y
1638 MO_Add F64 -> trivialCodeNoImm F64 (FADD F64) x y
1639 MO_Sub F64 -> trivialCodeNoImm F64 (FSUB F64) x y
1640 MO_Mul F64 -> trivialCodeNoImm F64 (FMUL F64) x y
1641 MO_S_Quot F64 -> trivialCodeNoImm F64 (FDIV F64) x y
1643 -- optimize addition with 32-bit immediate
1647 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate I32 True (-imm)
1648 -> trivialCode I32 True ADD x (CmmLit $ CmmInt imm immrep)
1651 (src, srcCode) <- getSomeReg x
1652 let imm = litToImm lit
1653 code dst = srcCode `appOL` toOL [
1654 ADDIS dst src (HA imm),
1655 ADD dst dst (RIImm (LO imm))
1657 return (Any I32 code)
1658 _ -> trivialCode I32 True ADD x y
1660 MO_Add rep -> trivialCode rep True ADD x y
1662 case y of -- subfi ('substract from' with immediate) doesn't exist
1663 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
1664 -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
1665 _ -> trivialCodeNoImm rep SUBF y x
1667 MO_Mul rep -> trivialCode rep True MULLW x y
1669 MO_S_MulMayOflo I32 -> trivialCodeNoImm I32 MULLW_MayOflo x y
1671 MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= I32): not implemented"
1672 MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
1674 MO_S_Quot rep -> trivialCodeNoImm rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1675 MO_U_Quot rep -> trivialCodeNoImm rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1677 MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1678 MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1680 MO_And rep -> trivialCode rep False AND x y
1681 MO_Or rep -> trivialCode rep False OR x y
1682 MO_Xor rep -> trivialCode rep False XOR x y
1684 MO_Shl rep -> trivialCode rep False SLW x y
1685 MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
1686 MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
1688 getRegister (CmmLit (CmmInt i rep))
1689 | Just imm <- makeImmediate rep True i
1691 code dst = unitOL (LI dst imm)
1693 return (Any rep code)
1695 getRegister (CmmLit (CmmFloat f frep)) = do
1696 lbl <- getNewLabelNat
1697 dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
1698 Amode addr addr_code <- getAmode dynRef
1700 LDATA ReadOnlyData [CmmDataLabel lbl,
1701 CmmStaticLit (CmmFloat f frep)]
1702 `consOL` (addr_code `snocOL` LD frep dst addr)
1703 return (Any frep code)
1705 getRegister (CmmLit lit)
1706 = let rep = cmmLitRep lit
1710 OR dst dst (RIImm (LO imm))
1712 in return (Any rep code)
1714 getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
1716 -- extend?Rep: wrap integer expression of type rep
1717 -- in a conversion to I32
1718 extendSExpr I32 x = x
1719 extendSExpr rep x = CmmMachOp (MO_S_Conv rep I32) [x]
1720 extendUExpr I32 x = x
1721 extendUExpr rep x = CmmMachOp (MO_U_Conv rep I32) [x]
1723 #endif /* powerpc_TARGET_ARCH */
1726 -- -----------------------------------------------------------------------------
1727 -- The 'Amode' type: Memory addressing modes passed up the tree.
1729 data Amode = Amode AddrMode InstrBlock
1732 Now, given a tree (the argument to an CmmLoad) that references memory,
1733 produce a suitable addressing mode.
1735 A Rule of the Game (tm) for Amodes: use of the addr bit must
1736 immediately follow use of the code part, since the code part puts
1737 values in registers which the addr then refers to. So you can't put
1738 anything in between, lest it overwrite some of those registers. If
1739 you need to do some other computation between the code part and use of
1740 the addr bit, first store the effective address from the amode in a
1741 temporary, then do the other computation, and then use the temporary:
1745 ... other computation ...
1749 getAmode :: CmmExpr -> NatM Amode
1750 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
1752 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1754 #if alpha_TARGET_ARCH
1756 getAmode (StPrim IntSubOp [x, StInt i])
1757 = getNewRegNat PtrRep `thenNat` \ tmp ->
1758 getRegister x `thenNat` \ register ->
1760 code = registerCode register tmp
1761 reg = registerName register tmp
1762 off = ImmInt (-(fromInteger i))
1764 return (Amode (AddrRegImm reg off) code)
1766 getAmode (StPrim IntAddOp [x, StInt i])
1767 = getNewRegNat PtrRep `thenNat` \ tmp ->
1768 getRegister x `thenNat` \ register ->
1770 code = registerCode register tmp
1771 reg = registerName register tmp
1772 off = ImmInt (fromInteger i)
1774 return (Amode (AddrRegImm reg off) code)
1778 = return (Amode (AddrImm imm__2) id)
1781 imm__2 = case imm of Just x -> x
1784 = getNewRegNat PtrRep `thenNat` \ tmp ->
1785 getRegister other `thenNat` \ register ->
1787 code = registerCode register tmp
1788 reg = registerName register tmp
1790 return (Amode (AddrReg reg) code)
1792 #endif /* alpha_TARGET_ARCH */
1794 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1796 #if x86_64_TARGET_ARCH
1798 getAmode (CmmMachOp (MO_Add I64) [CmmReg (CmmGlobal PicBaseReg),
1799 CmmLit displacement])
1800 = return $ Amode (ripRel (litToImm displacement)) nilOL
1804 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1806 -- This is all just ridiculous, since it carefully undoes
1807 -- what mangleIndexTree has just done.
1808 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
1809 | not (is64BitLit lit)
1810 -- ASSERT(rep == I32)???
1811 = do (x_reg, x_code) <- getSomeReg x
1812 let off = ImmInt (-(fromInteger i))
1813 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1815 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
1816 | not (is64BitLit lit)
1817 -- ASSERT(rep == I32)???
1818 = do (x_reg, x_code) <- getSomeReg x
1819 let off = ImmInt (fromInteger i)
1820 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1822 -- Turn (lit1 << n + lit2) into (lit2 + lit1 << n) so it will be
1823 -- recognised by the next rule.
1824 getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1826 = getAmode (CmmMachOp (MO_Add rep) [b,a])
1828 getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
1829 [y, CmmLit (CmmInt shift _)]])
1830 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1831 = do (x_reg, x_code) <- getNonClobberedReg x
1832 -- x must be in a temp, because it has to stay live over y_code
1833 -- we could compre x_reg and y_reg and do something better here...
1834 (y_reg, y_code) <- getSomeReg y
1836 code = x_code `appOL` y_code
1837 base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
1838 return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt 0))
1841 getAmode (CmmLit lit) | not (is64BitLit lit)
1842 = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1845 (reg,code) <- getSomeReg expr
1846 return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
1848 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1850 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1852 #if sparc_TARGET_ARCH
1854 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)])
1857 (reg, code) <- getSomeReg x
1859 off = ImmInt (-(fromInteger i))
1860 return (Amode (AddrRegImm reg off) code)
1863 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)])
1866 (reg, code) <- getSomeReg x
1868 off = ImmInt (fromInteger i)
1869 return (Amode (AddrRegImm reg off) code)
1871 getAmode (CmmMachOp (MO_Add rep) [x, y])
1873 (regX, codeX) <- getSomeReg x
1874 (regY, codeY) <- getSomeReg y
1876 code = codeX `appOL` codeY
1877 return (Amode (AddrRegReg regX regY) code)
1879 -- XXX Is this same as "leaf" in Stix?
1880 getAmode (CmmLit lit)
1882 tmp <- getNewRegNat I32
1884 code = unitOL (SETHI (HI imm__2) tmp)
1885 return (Amode (AddrRegImm tmp (LO imm__2)) code)
1887 imm__2 = litToImm lit
1891 (reg, code) <- getSomeReg other
1894 return (Amode (AddrRegImm reg off) code)
1896 #endif /* sparc_TARGET_ARCH */
1898 #ifdef powerpc_TARGET_ARCH
1899 getAmode (CmmMachOp (MO_Sub I32) [x, CmmLit (CmmInt i _)])
1900 | Just off <- makeImmediate I32 True (-i)
1902 (reg, code) <- getSomeReg x
1903 return (Amode (AddrRegImm reg off) code)
1906 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit (CmmInt i _)])
1907 | Just off <- makeImmediate I32 True i
1909 (reg, code) <- getSomeReg x
1910 return (Amode (AddrRegImm reg off) code)
1912 -- optimize addition with 32-bit immediate
1914 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit lit])
1916 tmp <- getNewRegNat I32
1917 (src, srcCode) <- getSomeReg x
1918 let imm = litToImm lit
1919 code = srcCode `snocOL` ADDIS tmp src (HA imm)
1920 return (Amode (AddrRegImm tmp (LO imm)) code)
1922 getAmode (CmmLit lit)
1924 tmp <- getNewRegNat I32
1925 let imm = litToImm lit
1926 code = unitOL (LIS tmp (HA imm))
1927 return (Amode (AddrRegImm tmp (LO imm)) code)
1929 getAmode (CmmMachOp (MO_Add I32) [x, y])
1931 (regX, codeX) <- getSomeReg x
1932 (regY, codeY) <- getSomeReg y
1933 return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
1937 (reg, code) <- getSomeReg other
1940 return (Amode (AddrRegImm reg off) code)
1941 #endif /* powerpc_TARGET_ARCH */
1943 -- -----------------------------------------------------------------------------
1944 -- getOperand: sometimes any operand will do.
1946 -- getNonClobberedOperand: the value of the operand will remain valid across
1947 -- the computation of an arbitrary expression, unless the expression
1948 -- is computed directly into a register which the operand refers to
1949 -- (see trivialCode where this function is used for an example).
1951 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1953 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1954 #if x86_64_TARGET_ARCH
1955 getNonClobberedOperand (CmmLit lit)
1956 | isSuitableFloatingPointLit lit = do
1957 lbl <- getNewLabelNat
1958 let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
1960 return (OpAddr (ripRel (ImmCLbl lbl)), code)
1962 getNonClobberedOperand (CmmLit lit)
1963 | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) =
1964 return (OpImm (litToImm lit), nilOL)
1965 getNonClobberedOperand (CmmLoad mem pk)
1966 | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
1967 Amode src mem_code <- getAmode mem
1969 if (amodeCouldBeClobbered src)
1971 tmp <- getNewRegNat wordRep
1972 return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
1973 unitOL (LEA I32 (OpAddr src) (OpReg tmp)))
1976 return (OpAddr src', save_code `appOL` mem_code)
1977 getNonClobberedOperand e = do
1978 (reg, code) <- getNonClobberedReg e
1979 return (OpReg reg, code)
1981 amodeCouldBeClobbered :: AddrMode -> Bool
1982 amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
1984 regClobbered (RealReg rr) = isFastTrue (freeReg rr)
1985 regClobbered _ = False
1987 -- getOperand: the operand is not required to remain valid across the
1988 -- computation of an arbitrary expression.
1989 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1990 #if x86_64_TARGET_ARCH
1991 getOperand (CmmLit lit)
1992 | isSuitableFloatingPointLit lit = do
1993 lbl <- getNewLabelNat
1994 let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
1996 return (OpAddr (ripRel (ImmCLbl lbl)), code)
1998 getOperand (CmmLit lit)
1999 | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) = do
2000 return (OpImm (litToImm lit), nilOL)
2001 getOperand (CmmLoad mem pk)
2002 | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2003 Amode src mem_code <- getAmode mem
2004 return (OpAddr src, mem_code)
2006 (reg, code) <- getSomeReg e
2007 return (OpReg reg, code)
2009 isOperand :: CmmExpr -> Bool
2010 isOperand (CmmLoad _ _) = True
2011 isOperand (CmmLit lit) = not (is64BitLit lit)
2012 || isSuitableFloatingPointLit lit
2015 -- if we want a floating-point literal as an operand, we can
2016 -- use it directly from memory. However, if the literal is
2017 -- zero, we're better off generating it into a register using
2019 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
2020 isSuitableFloatingPointLit _ = False
2022 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
2023 getRegOrMem (CmmLoad mem pk)
2024 | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2025 Amode src mem_code <- getAmode mem
2026 return (OpAddr src, mem_code)
2028 (reg, code) <- getNonClobberedReg e
2029 return (OpReg reg, code)
2031 #if x86_64_TARGET_ARCH
2032 is64BitLit (CmmInt i I64) = is64BitInteger i
2033 -- assume that labels are in the range 0-2^31-1: this assumes the
2034 -- small memory model (see gcc docs, -mcmodel=small).
2036 is64BitLit x = False
2039 is64BitInteger :: Integer -> Bool
2040 is64BitInteger i = i > 0x7fffffff || i < -0x80000000
2042 -- -----------------------------------------------------------------------------
2043 -- The 'CondCode' type: Condition codes passed up the tree.
2045 data CondCode = CondCode Bool Cond InstrBlock
2047 -- Set up a condition code for a conditional branch.
2049 getCondCode :: CmmExpr -> NatM CondCode
2051 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2053 #if alpha_TARGET_ARCH
2054 getCondCode = panic "MachCode.getCondCode: not on Alphas"
2055 #endif /* alpha_TARGET_ARCH */
2057 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2059 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
2060 -- yes, they really do seem to want exactly the same!
2062 getCondCode (CmmMachOp mop [x, y])
2063 = ASSERT (cmmExprRep x /= I8) -- tmp, not set up to handle 8-bit comparisons
2065 MO_Eq F32 -> condFltCode EQQ x y
2066 MO_Ne F32 -> condFltCode NE x y
2068 MO_S_Gt F32 -> condFltCode GTT x y
2069 MO_S_Ge F32 -> condFltCode GE x y
2070 MO_S_Lt F32 -> condFltCode LTT x y
2071 MO_S_Le F32 -> condFltCode LE x y
2073 MO_Eq F64 -> condFltCode EQQ x y
2074 MO_Ne F64 -> condFltCode NE x y
2076 MO_S_Gt F64 -> condFltCode GTT x y
2077 MO_S_Ge F64 -> condFltCode GE x y
2078 MO_S_Lt F64 -> condFltCode LTT x y
2079 MO_S_Le F64 -> condFltCode LE x y
2081 MO_Eq rep -> condIntCode EQQ x y
2082 MO_Ne rep -> condIntCode NE x y
2084 MO_S_Gt rep -> condIntCode GTT x y
2085 MO_S_Ge rep -> condIntCode GE x y
2086 MO_S_Lt rep -> condIntCode LTT x y
2087 MO_S_Le rep -> condIntCode LE x y
2089 MO_U_Gt rep -> condIntCode GU x y
2090 MO_U_Ge rep -> condIntCode GEU x y
2091 MO_U_Lt rep -> condIntCode LU x y
2092 MO_U_Le rep -> condIntCode LEU x y
2094 other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y]))
2096 getCondCode other = pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
2098 #elif powerpc_TARGET_ARCH
2100 -- almost the same as everywhere else - but we need to
2101 -- extend small integers to 32 bit first
2103 getCondCode (CmmMachOp mop [x, y])
2105 MO_Eq F32 -> condFltCode EQQ x y
2106 MO_Ne F32 -> condFltCode NE x y
2108 MO_S_Gt F32 -> condFltCode GTT x y
2109 MO_S_Ge F32 -> condFltCode GE x y
2110 MO_S_Lt F32 -> condFltCode LTT x y
2111 MO_S_Le F32 -> condFltCode LE x y
2113 MO_Eq F64 -> condFltCode EQQ x y
2114 MO_Ne F64 -> condFltCode NE x y
2116 MO_S_Gt F64 -> condFltCode GTT x y
2117 MO_S_Ge F64 -> condFltCode GE x y
2118 MO_S_Lt F64 -> condFltCode LTT x y
2119 MO_S_Le F64 -> condFltCode LE x y
2121 MO_Eq rep -> condIntCode EQQ (extendUExpr rep x) (extendUExpr rep y)
2122 MO_Ne rep -> condIntCode NE (extendUExpr rep x) (extendUExpr rep y)
2124 MO_S_Gt rep -> condIntCode GTT (extendSExpr rep x) (extendSExpr rep y)
2125 MO_S_Ge rep -> condIntCode GE (extendSExpr rep x) (extendSExpr rep y)
2126 MO_S_Lt rep -> condIntCode LTT (extendSExpr rep x) (extendSExpr rep y)
2127 MO_S_Le rep -> condIntCode LE (extendSExpr rep x) (extendSExpr rep y)
2129 MO_U_Gt rep -> condIntCode GU (extendUExpr rep x) (extendUExpr rep y)
2130 MO_U_Ge rep -> condIntCode GEU (extendUExpr rep x) (extendUExpr rep y)
2131 MO_U_Lt rep -> condIntCode LU (extendUExpr rep x) (extendUExpr rep y)
2132 MO_U_Le rep -> condIntCode LEU (extendUExpr rep x) (extendUExpr rep y)
2134 other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
2136 getCondCode other = panic "getCondCode(2)(powerpc)"
2142 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
2143 -- passed back up the tree.
2145 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
2147 #if alpha_TARGET_ARCH
2148 condIntCode = panic "MachCode.condIntCode: not on Alphas"
2149 condFltCode = panic "MachCode.condFltCode: not on Alphas"
2150 #endif /* alpha_TARGET_ARCH */
2152 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2153 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2155 -- memory vs immediate
2156 condIntCode cond (CmmLoad x pk) (CmmLit lit) | not (is64BitLit lit) = do
2157 Amode x_addr x_code <- getAmode x
2160 code = x_code `snocOL`
2161 CMP pk (OpImm imm) (OpAddr x_addr)
2163 return (CondCode False cond code)
2166 condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
2167 (x_reg, x_code) <- getSomeReg x
2169 code = x_code `snocOL`
2170 TEST pk (OpReg x_reg) (OpReg x_reg)
2172 return (CondCode False cond code)
2174 -- anything vs operand
2175 condIntCode cond x y | isOperand y = do
2176 (x_reg, x_code) <- getNonClobberedReg x
2177 (y_op, y_code) <- getOperand y
2179 code = x_code `appOL` y_code `snocOL`
2180 CMP (cmmExprRep x) y_op (OpReg x_reg)
2182 return (CondCode False cond code)
2184 -- anything vs anything
2185 condIntCode cond x y = do
2186 (y_reg, y_code) <- getNonClobberedReg y
2187 (x_op, x_code) <- getRegOrMem x
2189 code = y_code `appOL`
2191 CMP (cmmExprRep x) (OpReg y_reg) x_op
2193 return (CondCode False cond code)
2196 #if i386_TARGET_ARCH
2197 condFltCode cond x y
2198 = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
2199 (x_reg, x_code) <- getNonClobberedReg x
2200 (y_reg, y_code) <- getSomeReg y
2202 code = x_code `appOL` y_code `snocOL`
2203 GCMP cond x_reg y_reg
2204 -- The GCMP insn does the test and sets the zero flag if comparable
2205 -- and true. Hence we always supply EQQ as the condition to test.
2206 return (CondCode True EQQ code)
2207 #endif /* i386_TARGET_ARCH */
2209 #if x86_64_TARGET_ARCH
2210 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
2211 -- an operand, but the right must be a reg. We can probably do better
2212 -- than this general case...
2213 condFltCode cond x y = do
2214 (x_reg, x_code) <- getNonClobberedReg x
2215 (y_op, y_code) <- getOperand y
2217 code = x_code `appOL`
2219 CMP (cmmExprRep x) y_op (OpReg x_reg)
2220 -- NB(1): we need to use the unsigned comparison operators on the
2221 -- result of this comparison.
2223 return (CondCode True (condToUnsigned cond) code)
2226 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2228 #if sparc_TARGET_ARCH
2230 condIntCode cond x (CmmLit (CmmInt y rep))
2233 (src1, code) <- getSomeReg x
2235 src2 = ImmInt (fromInteger y)
2236 code' = code `snocOL` SUB False True src1 (RIImm src2) g0
2237 return (CondCode False cond code')
2239 condIntCode cond x y = do
2240 (src1, code1) <- getSomeReg x
2241 (src2, code2) <- getSomeReg y
2243 code__2 = code1 `appOL` code2 `snocOL`
2244 SUB False True src1 (RIReg src2) g0
2245 return (CondCode False cond code__2)
2248 condFltCode cond x y = do
2249 (src1, code1) <- getSomeReg x
2250 (src2, code2) <- getSomeReg y
2251 tmp <- getNewRegNat F64
2253 promote x = FxTOy F32 F64 x tmp
2260 code1 `appOL` code2 `snocOL`
2261 FCMP True pk1 src1 src2
2262 else if pk1 == F32 then
2263 code1 `snocOL` promote src1 `appOL` code2 `snocOL`
2264 FCMP True F64 tmp src2
2266 code1 `appOL` code2 `snocOL` promote src2 `snocOL`
2267 FCMP True F64 src1 tmp
2268 return (CondCode True cond code__2)
2270 #endif /* sparc_TARGET_ARCH */
2272 #if powerpc_TARGET_ARCH
2273 -- ###FIXME: I16 and I8!
2274 condIntCode cond x (CmmLit (CmmInt y rep))
2275 | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
2277 (src1, code) <- getSomeReg x
2279 code' = code `snocOL`
2280 (if condUnsigned cond then CMPL else CMP) I32 src1 (RIImm src2)
2281 return (CondCode False cond code')
2283 condIntCode cond x y = do
2284 (src1, code1) <- getSomeReg x
2285 (src2, code2) <- getSomeReg y
2287 code' = code1 `appOL` code2 `snocOL`
2288 (if condUnsigned cond then CMPL else CMP) I32 src1 (RIReg src2)
2289 return (CondCode False cond code')
2291 condFltCode cond x y = do
2292 (src1, code1) <- getSomeReg x
2293 (src2, code2) <- getSomeReg y
2295 code' = code1 `appOL` code2 `snocOL` FCMP src1 src2
2296 code'' = case cond of -- twiddle CR to handle unordered case
2297 GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
2298 LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
2301 ltbit = 0 ; eqbit = 2 ; gtbit = 1
2302 return (CondCode True cond code'')
2304 #endif /* powerpc_TARGET_ARCH */
2306 -- -----------------------------------------------------------------------------
2307 -- Generating assignments
2309 -- Assignments are really at the heart of the whole code generation
2310 -- business. Almost all top-level nodes of any real importance are
2311 -- assignments, which correspond to loads, stores, or register
2312 -- transfers. If we're really lucky, some of the register transfers
2313 -- will go away, because we can use the destination register to
2314 -- complete the code generation for the right hand side. This only
2315 -- fails when the right hand side is forced into a fixed register
2316 -- (e.g. the result of a call).
2318 assignMem_IntCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2319 assignReg_IntCode :: MachRep -> CmmReg -> CmmExpr -> NatM InstrBlock
2321 assignMem_FltCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2322 assignReg_FltCode :: MachRep -> CmmReg -> CmmExpr -> NatM InstrBlock
2324 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2326 #if alpha_TARGET_ARCH
2328 assignIntCode pk (CmmLoad dst _) src
2329 = getNewRegNat IntRep `thenNat` \ tmp ->
2330 getAmode dst `thenNat` \ amode ->
2331 getRegister src `thenNat` \ register ->
2333 code1 = amodeCode amode []
2334 dst__2 = amodeAddr amode
2335 code2 = registerCode register tmp []
2336 src__2 = registerName register tmp
2337 sz = primRepToSize pk
2338 code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2342 assignIntCode pk dst src
2343 = getRegister dst `thenNat` \ register1 ->
2344 getRegister src `thenNat` \ register2 ->
2346 dst__2 = registerName register1 zeroh
2347 code = registerCode register2 dst__2
2348 src__2 = registerName register2 dst__2
2349 code__2 = if isFixed register2
2350 then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2)
2355 #endif /* alpha_TARGET_ARCH */
2357 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2359 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2361 -- integer assignment to memory
2362 assignMem_IntCode pk addr src = do
2363 Amode addr code_addr <- getAmode addr
2364 (code_src, op_src) <- get_op_RI src
2366 code = code_src `appOL`
2368 MOV pk op_src (OpAddr addr)
2369 -- NOTE: op_src is stable, so it will still be valid
2370 -- after code_addr. This may involve the introduction
2371 -- of an extra MOV to a temporary register, but we hope
2372 -- the register allocator will get rid of it.
2376 get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand) -- code, operator
2377 get_op_RI (CmmLit lit) | not (is64BitLit lit)
2378 = return (nilOL, OpImm (litToImm lit))
2380 = do (reg,code) <- getNonClobberedReg op
2381 return (code, OpReg reg)
2384 -- Assign; dst is a reg, rhs is mem
2385 assignReg_IntCode pk reg (CmmLoad src _) = do
2386 load_code <- intLoadCode (MOV pk) src
2387 return (load_code (getRegisterReg reg))
2389 -- dst is a reg, but src could be anything
2390 assignReg_IntCode pk reg src = do
2391 code <- getAnyReg src
2392 return (code (getRegisterReg reg))
2394 #endif /* i386_TARGET_ARCH */
2396 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2398 #if sparc_TARGET_ARCH
2400 assignMem_IntCode pk addr src = do
2401 (srcReg, code) <- getSomeReg src
2402 Amode dstAddr addr_code <- getAmode addr
2403 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2405 assignReg_IntCode pk reg src = do
2406 r <- getRegister src
2408 Any _ code -> code dst
2409 Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg dst) freg
2411 dst = getRegisterReg reg
2414 #endif /* sparc_TARGET_ARCH */
2416 #if powerpc_TARGET_ARCH
2418 assignMem_IntCode pk addr src = do
2419 (srcReg, code) <- getSomeReg src
2420 Amode dstAddr addr_code <- getAmode addr
2421 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2423 -- dst is a reg, but src could be anything
2424 assignReg_IntCode pk reg src
2426 r <- getRegister src
2428 Any _ code -> code dst
2429 Fixed _ freg fcode -> fcode `snocOL` MR dst freg
2431 dst = getRegisterReg reg
2433 #endif /* powerpc_TARGET_ARCH */
2436 -- -----------------------------------------------------------------------------
2437 -- Floating-point assignments
2439 #if alpha_TARGET_ARCH
2441 assignFltCode pk (CmmLoad dst _) src
2442 = getNewRegNat pk `thenNat` \ tmp ->
2443 getAmode dst `thenNat` \ amode ->
2444 getRegister src `thenNat` \ register ->
2446 code1 = amodeCode amode []
2447 dst__2 = amodeAddr amode
2448 code2 = registerCode register tmp []
2449 src__2 = registerName register tmp
2450 sz = primRepToSize pk
2451 code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2455 assignFltCode pk dst src
2456 = getRegister dst `thenNat` \ register1 ->
2457 getRegister src `thenNat` \ register2 ->
2459 dst__2 = registerName register1 zeroh
2460 code = registerCode register2 dst__2
2461 src__2 = registerName register2 dst__2
2462 code__2 = if isFixed register2
2463 then code . mkSeqInstr (FMOV src__2 dst__2)
2468 #endif /* alpha_TARGET_ARCH */
2470 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2472 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2474 -- Floating point assignment to memory
2475 assignMem_FltCode pk addr src = do
2476 (src_reg, src_code) <- getNonClobberedReg src
2477 Amode addr addr_code <- getAmode addr
2479 code = src_code `appOL`
2481 IF_ARCH_i386(GST pk src_reg addr,
2482 MOV pk (OpReg src_reg) (OpAddr addr))
2485 -- Floating point assignment to a register/temporary
2486 assignReg_FltCode pk reg src = do
2487 src_code <- getAnyReg src
2488 return (src_code (getRegisterReg reg))
2490 #endif /* i386_TARGET_ARCH */
2492 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2494 #if sparc_TARGET_ARCH
2496 -- Floating point assignment to memory
2497 assignMem_FltCode pk addr src = do
2498 Amode dst__2 code1 <- getAmode addr
2499 (src__2, code2) <- getSomeReg src
2500 tmp1 <- getNewRegNat pk
2502 pk__2 = cmmExprRep src
2503 code__2 = code1 `appOL` code2 `appOL`
2505 then unitOL (ST pk src__2 dst__2)
2506 else toOL [FxTOy pk__2 pk src__2 tmp1, ST pk tmp1 dst__2]
2509 -- Floating point assignment to a register/temporary
2510 -- ToDo: Verify correctness
2511 assignReg_FltCode pk reg src = do
2512 r <- getRegister src
2513 v1 <- getNewRegNat pk
2515 Any _ code -> code dst
2516 Fixed _ freg fcode -> fcode `snocOL` FMOV pk freg v1
2518 dst = getRegisterReg reg
2520 #endif /* sparc_TARGET_ARCH */
2522 #if powerpc_TARGET_ARCH
2525 assignMem_FltCode = assignMem_IntCode
2526 assignReg_FltCode = assignReg_IntCode
2528 #endif /* powerpc_TARGET_ARCH */
2531 -- -----------------------------------------------------------------------------
2532 -- Generating an non-local jump
2534 -- (If applicable) Do not fill the delay slots here; you will confuse the
2535 -- register allocator.
2537 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
2539 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2541 #if alpha_TARGET_ARCH
2543 genJump (CmmLabel lbl)
2544 | isAsmTemp lbl = returnInstr (BR target)
2545 | otherwise = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0]
2547 target = ImmCLbl lbl
2550 = getRegister tree `thenNat` \ register ->
2551 getNewRegNat PtrRep `thenNat` \ tmp ->
2553 dst = registerName register pv
2554 code = registerCode register pv
2555 target = registerName register pv
2557 if isFixed register then
2558 returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0]
2560 return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0))
2562 #endif /* alpha_TARGET_ARCH */
2564 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2566 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2568 genJump (CmmLoad mem pk) = do
2569 Amode target code <- getAmode mem
2570 return (code `snocOL` JMP (OpAddr target))
2572 genJump (CmmLit lit) = do
2573 return (unitOL (JMP (OpImm (litToImm lit))))
2576 (reg,code) <- getSomeReg expr
2577 return (code `snocOL` JMP (OpReg reg))
2579 #endif /* i386_TARGET_ARCH */
2581 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2583 #if sparc_TARGET_ARCH
2585 genJump (CmmLit (CmmLabel lbl))
2586 = return (toOL [CALL (Left target) 0 True, NOP])
2588 target = ImmCLbl lbl
2592 (target, code) <- getSomeReg tree
2593 return (code `snocOL` JMP (AddrRegReg target g0) `snocOL` NOP)
2595 #endif /* sparc_TARGET_ARCH */
2597 #if powerpc_TARGET_ARCH
2598 genJump (CmmLit (CmmLabel lbl))
2599 = return (unitOL $ JMP lbl)
2603 (target,code) <- getSomeReg tree
2604 return (code `snocOL` MTCTR target `snocOL` BCTR [])
2605 #endif /* powerpc_TARGET_ARCH */
2608 -- -----------------------------------------------------------------------------
2609 -- Unconditional branches
2611 genBranch :: BlockId -> NatM InstrBlock
2613 genBranch = return . toOL . mkBranchInstr
2615 -- -----------------------------------------------------------------------------
2616 -- Conditional jumps
2619 Conditional jumps are always to local labels, so we can use branch
2620 instructions. We peek at the arguments to decide what kind of
2623 ALPHA: For comparisons with 0, we're laughing, because we can just do
2624 the desired conditional branch.
2626 I386: First, we have to ensure that the condition
2627 codes are set according to the supplied comparison operation.
2629 SPARC: First, we have to ensure that the condition codes are set
2630 according to the supplied comparison operation. We generate slightly
2631 different code for floating point comparisons, because a floating
2632 point operation cannot directly precede a @BF@. We assume the worst
2633 and fill that slot with a @NOP@.
2635 SPARC: Do not fill the delay slots here; you will confuse the register
2641 :: BlockId -- the branch target
2642 -> CmmExpr -- the condition on which to branch
2645 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2647 #if alpha_TARGET_ARCH
2649 genCondJump id (StPrim op [x, StInt 0])
2650 = getRegister x `thenNat` \ register ->
2651 getNewRegNat (registerRep register)
2654 code = registerCode register tmp
2655 value = registerName register tmp
2656 pk = registerRep register
2657 target = ImmCLbl lbl
2659 returnSeq code [BI (cmpOp op) value target]
2661 cmpOp CharGtOp = GTT
2663 cmpOp CharEqOp = EQQ
2665 cmpOp CharLtOp = LTT
2674 cmpOp WordGeOp = ALWAYS
2675 cmpOp WordEqOp = EQQ
2677 cmpOp WordLtOp = NEVER
2678 cmpOp WordLeOp = EQQ
2680 cmpOp AddrGeOp = ALWAYS
2681 cmpOp AddrEqOp = EQQ
2683 cmpOp AddrLtOp = NEVER
2684 cmpOp AddrLeOp = EQQ
2686 genCondJump lbl (StPrim op [x, StDouble 0.0])
2687 = getRegister x `thenNat` \ register ->
2688 getNewRegNat (registerRep register)
2691 code = registerCode register tmp
2692 value = registerName register tmp
2693 pk = registerRep register
2694 target = ImmCLbl lbl
2696 return (code . mkSeqInstr (BF (cmpOp op) value target))
2698 cmpOp FloatGtOp = GTT
2699 cmpOp FloatGeOp = GE
2700 cmpOp FloatEqOp = EQQ
2701 cmpOp FloatNeOp = NE
2702 cmpOp FloatLtOp = LTT
2703 cmpOp FloatLeOp = LE
2704 cmpOp DoubleGtOp = GTT
2705 cmpOp DoubleGeOp = GE
2706 cmpOp DoubleEqOp = EQQ
2707 cmpOp DoubleNeOp = NE
2708 cmpOp DoubleLtOp = LTT
2709 cmpOp DoubleLeOp = LE
2711 genCondJump lbl (StPrim op [x, y])
2713 = trivialFCode pr instr x y `thenNat` \ register ->
2714 getNewRegNat F64 `thenNat` \ tmp ->
2716 code = registerCode register tmp
2717 result = registerName register tmp
2718 target = ImmCLbl lbl
2720 return (code . mkSeqInstr (BF cond result target))
2722 pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
2724 fltCmpOp op = case op of
2738 (instr, cond) = case op of
2739 FloatGtOp -> (FCMP TF LE, EQQ)
2740 FloatGeOp -> (FCMP TF LTT, EQQ)
2741 FloatEqOp -> (FCMP TF EQQ, NE)
2742 FloatNeOp -> (FCMP TF EQQ, EQQ)
2743 FloatLtOp -> (FCMP TF LTT, NE)
2744 FloatLeOp -> (FCMP TF LE, NE)
2745 DoubleGtOp -> (FCMP TF LE, EQQ)
2746 DoubleGeOp -> (FCMP TF LTT, EQQ)
2747 DoubleEqOp -> (FCMP TF EQQ, NE)
2748 DoubleNeOp -> (FCMP TF EQQ, EQQ)
2749 DoubleLtOp -> (FCMP TF LTT, NE)
2750 DoubleLeOp -> (FCMP TF LE, NE)
2752 genCondJump lbl (StPrim op [x, y])
2753 = trivialCode instr x y `thenNat` \ register ->
2754 getNewRegNat IntRep `thenNat` \ tmp ->
2756 code = registerCode register tmp
2757 result = registerName register tmp
2758 target = ImmCLbl lbl
2760 return (code . mkSeqInstr (BI cond result target))
2762 (instr, cond) = case op of
2763 CharGtOp -> (CMP LE, EQQ)
2764 CharGeOp -> (CMP LTT, EQQ)
2765 CharEqOp -> (CMP EQQ, NE)
2766 CharNeOp -> (CMP EQQ, EQQ)
2767 CharLtOp -> (CMP LTT, NE)
2768 CharLeOp -> (CMP LE, NE)
2769 IntGtOp -> (CMP LE, EQQ)
2770 IntGeOp -> (CMP LTT, EQQ)
2771 IntEqOp -> (CMP EQQ, NE)
2772 IntNeOp -> (CMP EQQ, EQQ)
2773 IntLtOp -> (CMP LTT, NE)
2774 IntLeOp -> (CMP LE, NE)
2775 WordGtOp -> (CMP ULE, EQQ)
2776 WordGeOp -> (CMP ULT, EQQ)
2777 WordEqOp -> (CMP EQQ, NE)
2778 WordNeOp -> (CMP EQQ, EQQ)
2779 WordLtOp -> (CMP ULT, NE)
2780 WordLeOp -> (CMP ULE, NE)
2781 AddrGtOp -> (CMP ULE, EQQ)
2782 AddrGeOp -> (CMP ULT, EQQ)
2783 AddrEqOp -> (CMP EQQ, NE)
2784 AddrNeOp -> (CMP EQQ, EQQ)
2785 AddrLtOp -> (CMP ULT, NE)
2786 AddrLeOp -> (CMP ULE, NE)
2788 #endif /* alpha_TARGET_ARCH */
2790 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2792 #if i386_TARGET_ARCH
2794 genCondJump id bool = do
2795 CondCode _ cond code <- getCondCode bool
2796 return (code `snocOL` JXX cond id)
2800 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2802 #if x86_64_TARGET_ARCH
2804 genCondJump id bool = do
2805 CondCode is_float cond cond_code <- getCondCode bool
2808 return (cond_code `snocOL` JXX cond id)
2810 lbl <- getBlockIdNat
2812 -- see comment with condFltReg
2813 let code = case cond of
2819 plain_test = unitOL (
2822 or_unordered = toOL [
2826 and_ordered = toOL [
2832 return (cond_code `appOL` code)
2836 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2838 #if sparc_TARGET_ARCH
2840 genCondJump (BlockId id) bool = do
2841 CondCode is_float cond code <- getCondCode bool
2846 then [NOP, BF cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2847 else [BI cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2851 #endif /* sparc_TARGET_ARCH */
2854 #if powerpc_TARGET_ARCH
2856 genCondJump id bool = do
2857 CondCode is_float cond code <- getCondCode bool
2858 return (code `snocOL` BCC cond id)
2860 #endif /* powerpc_TARGET_ARCH */
2863 -- -----------------------------------------------------------------------------
2864 -- Generating C calls
2866 -- Now the biggest nightmare---calls. Most of the nastiness is buried in
2867 -- @get_arg@, which moves the arguments to the correct registers/stack
2868 -- locations. Apart from that, the code is easy.
2870 -- (If applicable) Do not fill the delay slots here; you will confuse the
2871 -- register allocator.
2874 :: CmmCallTarget -- function to call
2875 -> [(CmmReg,MachHint)] -- where to put the result
2876 -> [(CmmExpr,MachHint)] -- arguments (of mixed type)
2877 -> Maybe [GlobalReg] -- volatile regs to save
2880 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2882 #if alpha_TARGET_ARCH
2886 genCCall fn cconv result_regs args
2887 = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args
2888 `thenNat` \ ((unused,_), argCode) ->
2890 nRegs = length allArgRegs - length unused
2891 code = asmSeqThen (map ($ []) argCode)
2894 LDA pv (AddrImm (ImmLab (ptext fn))),
2895 JSR ra (AddrReg pv) nRegs,
2896 LDGP gp (AddrReg ra)]
2898 ------------------------
2899 {- Try to get a value into a specific register (or registers) for
2900 a call. The first 6 arguments go into the appropriate
2901 argument register (separate registers for integer and floating
2902 point arguments, but used in lock-step), and the remaining
2903 arguments are dumped to the stack, beginning at 0(sp). Our
2904 first argument is a pair of the list of remaining argument
2905 registers to be assigned for this call and the next stack
2906 offset to use for overflowing arguments. This way,
2907 @get_Arg@ can be applied to all of a call's arguments using
2911 :: ([(Reg,Reg)], Int) -- Argument registers and stack offset (accumulator)
2912 -> StixTree -- Current argument
2913 -> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code
2915 -- We have to use up all of our argument registers first...
2917 get_arg ((iDst,fDst):dsts, offset) arg
2918 = getRegister arg `thenNat` \ register ->
2920 reg = if isFloatingRep pk then fDst else iDst
2921 code = registerCode register reg
2922 src = registerName register reg
2923 pk = registerRep register
2926 if isFloatingRep pk then
2927 ((dsts, offset), if isFixed register then
2928 code . mkSeqInstr (FMOV src fDst)
2931 ((dsts, offset), if isFixed register then
2932 code . mkSeqInstr (OR src (RIReg src) iDst)
2935 -- Once we have run out of argument registers, we move to the
2938 get_arg ([], offset) arg
2939 = getRegister arg `thenNat` \ register ->
2940 getNewRegNat (registerRep register)
2943 code = registerCode register tmp
2944 src = registerName register tmp
2945 pk = registerRep register
2946 sz = primRepToSize pk
2948 return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset)))
2950 #endif /* alpha_TARGET_ARCH */
2952 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2954 #if i386_TARGET_ARCH
2956 genCCall (CmmPrim MO_WriteBarrier) _ _ _ = return nilOL
2957 -- write barrier compiles to no code on x86/x86-64;
2958 -- we keep it this long in order to prevent earlier optimisations.
2960 -- we only cope with a single result for foreign calls
2961 genCCall (CmmPrim op) [(r,_)] args vols = do
2963 MO_F32_Sqrt -> actuallyInlineFloatOp F32 (GSQRT F32) args
2964 MO_F64_Sqrt -> actuallyInlineFloatOp F64 (GSQRT F64) args
2966 MO_F32_Sin -> actuallyInlineFloatOp F32 (GSIN F32) args
2967 MO_F64_Sin -> actuallyInlineFloatOp F64 (GSIN F64) args
2969 MO_F32_Cos -> actuallyInlineFloatOp F32 (GCOS F32) args
2970 MO_F64_Cos -> actuallyInlineFloatOp F64 (GCOS F64) args
2972 MO_F32_Tan -> actuallyInlineFloatOp F32 (GTAN F32) args
2973 MO_F64_Tan -> actuallyInlineFloatOp F64 (GTAN F64) args
2975 other_op -> outOfLineFloatOp op r args vols
2977 actuallyInlineFloatOp rep instr [(x,_)]
2978 = do res <- trivialUFCode rep instr x
2980 return (any (getRegisterReg r))
2982 genCCall target dest_regs args vols = do
2984 sizes = map (arg_size . cmmExprRep . fst) (reverse args)
2985 #if !darwin_TARGET_OS
2986 tot_arg_size = sum sizes
2988 raw_arg_size = sum sizes
2989 tot_arg_size = roundTo 16 raw_arg_size
2990 arg_pad_size = tot_arg_size - raw_arg_size
2991 delta0 <- getDeltaNat
2992 setDeltaNat (delta0 - arg_pad_size)
2995 push_codes <- mapM push_arg (reverse args)
2996 delta <- getDeltaNat
2999 -- deal with static vs dynamic call targets
3000 (callinsns,cconv) <-
3003 CmmForeignCall (CmmLit (CmmLabel lbl)) conv
3004 -> -- ToDo: stdcall arg sizes
3005 return (unitOL (CALL (Left fn_imm) []), conv)
3006 where fn_imm = ImmCLbl lbl
3007 CmmForeignCall expr conv
3008 -> do (dyn_c, dyn_r, dyn_rep) <- get_op expr
3009 ASSERT(dyn_rep == I32)
3010 return (dyn_c `snocOL` CALL (Right dyn_r) [], conv)
3013 #if darwin_TARGET_OS
3015 = toOL [SUB I32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
3016 DELTA (delta0 - arg_pad_size)]
3017 `appOL` concatOL push_codes
3020 = concatOL push_codes
3021 call = callinsns `appOL`
3023 -- Deallocate parameters after call for ccall;
3024 -- but not for stdcall (callee does it)
3025 (if cconv == StdCallConv || tot_arg_size==0 then [] else
3026 [ADD I32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
3028 [DELTA (delta + tot_arg_size)]
3031 setDeltaNat (delta + tot_arg_size)
3034 -- assign the results, if necessary
3035 assign_code [] = nilOL
3036 assign_code [(dest,_hint)] =
3038 I64 -> toOL [MOV I32 (OpReg eax) (OpReg r_dest),
3039 MOV I32 (OpReg edx) (OpReg r_dest_hi)]
3040 F32 -> unitOL (GMOV fake0 r_dest)
3041 F64 -> unitOL (GMOV fake0 r_dest)
3042 rep -> unitOL (MOV rep (OpReg eax) (OpReg r_dest))
3044 r_dest_hi = getHiVRegFromLo r_dest
3045 rep = cmmRegRep dest
3046 r_dest = getRegisterReg dest
3047 assign_code many = panic "genCCall.assign_code many"
3049 return (push_code `appOL`
3051 assign_code dest_regs)
3059 roundTo a x | x `mod` a == 0 = x
3060 | otherwise = x + a - (x `mod` a)
3063 push_arg :: (CmmExpr,MachHint){-current argument-}
3064 -> NatM InstrBlock -- code
3066 push_arg (arg,_hint) -- we don't need the hints on x86
3067 | arg_rep == I64 = do
3068 ChildCode64 code r_lo <- iselExpr64 arg
3069 delta <- getDeltaNat
3070 setDeltaNat (delta - 8)
3072 r_hi = getHiVRegFromLo r_lo
3074 return ( code `appOL`
3075 toOL [PUSH I32 (OpReg r_hi), DELTA (delta - 4),
3076 PUSH I32 (OpReg r_lo), DELTA (delta - 8),
3081 (code, reg, sz) <- get_op arg
3082 delta <- getDeltaNat
3083 let size = arg_size sz
3084 setDeltaNat (delta-size)
3085 if (case sz of F64 -> True; F32 -> True; _ -> False)
3086 then return (code `appOL`
3087 toOL [SUB I32 (OpImm (ImmInt size)) (OpReg esp),
3089 GST sz reg (AddrBaseIndex (EABaseReg esp)
3093 else return (code `snocOL`
3094 PUSH I32 (OpReg reg) `snocOL`
3098 arg_rep = cmmExprRep arg
3101 get_op :: CmmExpr -> NatM (InstrBlock, Reg, MachRep) -- code, reg, size
3103 (reg,code) <- getSomeReg op
3104 return (code, reg, cmmExprRep op)
3106 #endif /* i386_TARGET_ARCH */
3108 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3110 outOfLineFloatOp :: CallishMachOp -> CmmReg -> [(CmmExpr,MachHint)]
3111 -> Maybe [GlobalReg] -> NatM InstrBlock
3112 outOfLineFloatOp mop res args vols
3114 targetExpr <- cmmMakeDynamicReference addImportNat CallReference lbl
3115 let target = CmmForeignCall targetExpr CCallConv
3117 if cmmRegRep res == F64
3119 stmtToInstrs (CmmCall target [(res,FloatHint)] args vols)
3123 tmp = CmmLocal (LocalReg uq F64)
3125 code1 <- stmtToInstrs (CmmCall target [(tmp,FloatHint)] args vols)
3126 code2 <- stmtToInstrs (CmmAssign res (CmmReg tmp))
3127 return (code1 `appOL` code2)
3129 lbl = mkForeignLabel fn Nothing False
3132 MO_F32_Sqrt -> FSLIT("sqrtf")
3133 MO_F32_Sin -> FSLIT("sinf")
3134 MO_F32_Cos -> FSLIT("cosf")
3135 MO_F32_Tan -> FSLIT("tanf")
3136 MO_F32_Exp -> FSLIT("expf")
3137 MO_F32_Log -> FSLIT("logf")
3139 MO_F32_Asin -> FSLIT("asinf")
3140 MO_F32_Acos -> FSLIT("acosf")
3141 MO_F32_Atan -> FSLIT("atanf")
3143 MO_F32_Sinh -> FSLIT("sinhf")
3144 MO_F32_Cosh -> FSLIT("coshf")
3145 MO_F32_Tanh -> FSLIT("tanhf")
3146 MO_F32_Pwr -> FSLIT("powf")
3148 MO_F64_Sqrt -> FSLIT("sqrt")
3149 MO_F64_Sin -> FSLIT("sin")
3150 MO_F64_Cos -> FSLIT("cos")
3151 MO_F64_Tan -> FSLIT("tan")
3152 MO_F64_Exp -> FSLIT("exp")
3153 MO_F64_Log -> FSLIT("log")
3155 MO_F64_Asin -> FSLIT("asin")
3156 MO_F64_Acos -> FSLIT("acos")
3157 MO_F64_Atan -> FSLIT("atan")
3159 MO_F64_Sinh -> FSLIT("sinh")
3160 MO_F64_Cosh -> FSLIT("cosh")
3161 MO_F64_Tanh -> FSLIT("tanh")
3162 MO_F64_Pwr -> FSLIT("pow")
3164 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
3166 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3168 #if x86_64_TARGET_ARCH
3170 genCCall (CmmPrim MO_WriteBarrier) _ _ _ = return nilOL
3171 -- write barrier compiles to no code on x86/x86-64;
3172 -- we keep it this long in order to prevent earlier optimisations.
3174 genCCall (CmmPrim op) [(r,_)] args vols =
3175 outOfLineFloatOp op r args vols
3177 genCCall target dest_regs args vols = do
3179 -- load up the register arguments
3180 (stack_args, aregs, fregs, load_args_code)
3181 <- load_args args allArgRegs allFPArgRegs nilOL
3184 fp_regs_used = reverse (drop (length fregs) (reverse allFPArgRegs))
3185 int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
3186 arg_regs = [eax] ++ int_regs_used ++ fp_regs_used
3187 -- for annotating the call instruction with
3189 sse_regs = length fp_regs_used
3191 tot_arg_size = arg_size * length stack_args
3193 -- On entry to the called function, %rsp should be aligned
3194 -- on a 16-byte boundary +8 (i.e. the first stack arg after
3195 -- the return address is 16-byte aligned). In STG land
3196 -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
3197 -- need to make sure we push a multiple of 16-bytes of args,
3198 -- plus the return address, to get the correct alignment.
3199 -- Urg, this is hard. We need to feed the delta back into
3200 -- the arg pushing code.
3201 (real_size, adjust_rsp) <-
3202 if tot_arg_size `rem` 16 == 0
3203 then return (tot_arg_size, nilOL)
3204 else do -- we need to adjust...
3205 delta <- getDeltaNat
3206 setDeltaNat (delta-8)
3207 return (tot_arg_size+8, toOL [
3208 SUB I64 (OpImm (ImmInt 8)) (OpReg rsp),
3212 -- push the stack args, right to left
3213 push_code <- push_args (reverse stack_args) nilOL
3214 delta <- getDeltaNat
3216 -- deal with static vs dynamic call targets
3217 (callinsns,cconv) <-
3220 CmmForeignCall (CmmLit (CmmLabel lbl)) conv
3221 -> -- ToDo: stdcall arg sizes
3222 return (unitOL (CALL (Left fn_imm) arg_regs), conv)
3223 where fn_imm = ImmCLbl lbl
3224 CmmForeignCall expr conv
3225 -> do (dyn_r, dyn_c) <- getSomeReg expr
3226 return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
3229 -- The x86_64 ABI requires us to set %al to the number of SSE
3230 -- registers that contain arguments, if the called routine
3231 -- is a varargs function. We don't know whether it's a
3232 -- varargs function or not, so we have to assume it is.
3234 -- It's not safe to omit this assignment, even if the number
3235 -- of SSE regs in use is zero. If %al is larger than 8
3236 -- on entry to a varargs function, seg faults ensue.
3237 assign_eax n = unitOL (MOV I32 (OpImm (ImmInt n)) (OpReg eax))
3239 let call = callinsns `appOL`
3241 -- Deallocate parameters after call for ccall;
3242 -- but not for stdcall (callee does it)
3243 (if cconv == StdCallConv || real_size==0 then [] else
3244 [ADD wordRep (OpImm (ImmInt real_size)) (OpReg esp)])
3246 [DELTA (delta + real_size)]
3249 setDeltaNat (delta + real_size)
3252 -- assign the results, if necessary
3253 assign_code [] = nilOL
3254 assign_code [(dest,_hint)] =
3256 F32 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3257 F64 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3258 rep -> unitOL (MOV rep (OpReg rax) (OpReg r_dest))
3260 rep = cmmRegRep dest
3261 r_dest = getRegisterReg dest
3262 assign_code many = panic "genCCall.assign_code many"
3264 return (load_args_code `appOL`
3267 assign_eax sse_regs `appOL`
3269 assign_code dest_regs)
3272 arg_size = 8 -- always, at the mo
3274 load_args :: [(CmmExpr,MachHint)]
3275 -> [Reg] -- int regs avail for args
3276 -> [Reg] -- FP regs avail for args
3278 -> NatM ([(CmmExpr,MachHint)],[Reg],[Reg],InstrBlock)
3279 load_args args [] [] code = return (args, [], [], code)
3280 -- no more regs to use
3281 load_args [] aregs fregs code = return ([], aregs, fregs, code)
3282 -- no more args to push
3283 load_args ((arg,hint) : rest) aregs fregs code
3284 | isFloatingRep arg_rep =
3288 arg_code <- getAnyReg arg
3289 load_args rest aregs rs (code `appOL` arg_code r)
3294 arg_code <- getAnyReg arg
3295 load_args rest rs fregs (code `appOL` arg_code r)
3297 arg_rep = cmmExprRep arg
3300 (args',ars,frs,code') <- load_args rest aregs fregs code
3301 return ((arg,hint):args', ars, frs, code')
3303 push_args [] code = return code
3304 push_args ((arg,hint):rest) code
3305 | isFloatingRep arg_rep = do
3306 (arg_reg, arg_code) <- getSomeReg arg
3307 delta <- getDeltaNat
3308 setDeltaNat (delta-arg_size)
3309 let code' = code `appOL` toOL [
3310 MOV arg_rep (OpReg arg_reg) (OpAddr (spRel 0)),
3311 SUB wordRep (OpImm (ImmInt arg_size)) (OpReg rsp) ,
3312 DELTA (delta-arg_size)]
3313 push_args rest code'
3316 -- we only ever generate word-sized function arguments. Promotion
3317 -- has already happened: our Int8# type is kept sign-extended
3318 -- in an Int#, for example.
3319 ASSERT(arg_rep == I64) return ()
3320 (arg_op, arg_code) <- getOperand arg
3321 delta <- getDeltaNat
3322 setDeltaNat (delta-arg_size)
3323 let code' = code `appOL` toOL [PUSH I64 arg_op,
3324 DELTA (delta-arg_size)]
3325 push_args rest code'
3327 arg_rep = cmmExprRep arg
3330 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3332 #if sparc_TARGET_ARCH
3334 The SPARC calling convention is an absolute
3335 nightmare. The first 6x32 bits of arguments are mapped into
3336 %o0 through %o5, and the remaining arguments are dumped to the
3337 stack, beginning at [%sp+92]. (Note that %o6 == %sp.)
3339 If we have to put args on the stack, move %o6==%sp down by
3340 the number of words to go on the stack, to ensure there's enough space.
3342 According to Fraser and Hanson's lcc book, page 478, fig 17.2,
3343 16 words above the stack pointer is a word for the address of
3344 a structure return value. I use this as a temporary location
3345 for moving values from float to int regs. Certainly it isn't
3346 safe to put anything in the 16 words starting at %sp, since
3347 this area can get trashed at any time due to window overflows
3348 caused by signal handlers.
3350 A final complication (if the above isn't enough) is that
3351 we can't blithely calculate the arguments one by one into
3352 %o0 .. %o5. Consider the following nested calls:
3356 Naive code moves a into %o0, and (fff b c) into %o1. Unfortunately
3357 the inner call will itself use %o0, which trashes the value put there
3358 in preparation for the outer call. Upshot: we need to calculate the
3359 args into temporary regs, and move those to arg regs or onto the
3360 stack only immediately prior to the call proper. Sigh.
3363 genCCall target dest_regs argsAndHints vols = do
3365 args = map fst argsAndHints
3366 argcode_and_vregs <- mapM arg_to_int_vregs args
3368 (argcodes, vregss) = unzip argcode_and_vregs
3369 n_argRegs = length allArgRegs
3370 n_argRegs_used = min (length vregs) n_argRegs
3371 vregs = concat vregss
3372 -- deal with static vs dynamic call targets
3373 callinsns <- (case target of
3374 CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> do
3375 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3376 CmmForeignCall expr conv -> do
3377 (dyn_c, [dyn_r]) <- arg_to_int_vregs expr
3378 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3380 (res, reduce) <- outOfLineFloatOp mop
3381 lblOrMopExpr <- case res of
3383 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3385 (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
3386 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3387 if reduce then panic "genCCall(sparc): can not reduce" else return lblOrMopExpr
3391 argcode = concatOL argcodes
3392 (move_sp_down, move_sp_up)
3393 = let diff = length vregs - n_argRegs
3394 nn = if odd diff then diff + 1 else diff -- keep 8-byte alignment
3397 else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
3399 = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
3400 return (argcode `appOL`
3401 move_sp_down `appOL`
3402 transfer_code `appOL`
3407 -- move args from the integer vregs into which they have been
3408 -- marshalled, into %o0 .. %o5, and the rest onto the stack.
3409 move_final :: [Reg] -> [Reg] -> Int -> [Instr]
3411 move_final [] _ offset -- all args done
3414 move_final (v:vs) [] offset -- out of aregs; move to stack
3415 = ST I32 v (spRel offset)
3416 : move_final vs [] (offset+1)
3418 move_final (v:vs) (a:az) offset -- move into an arg (%o[0..5]) reg
3419 = OR False g0 (RIReg v) a
3420 : move_final vs az offset
3422 -- generate code to calculate an argument, and move it into one
3423 -- or two integer vregs.
3424 arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
3425 arg_to_int_vregs arg
3426 | (cmmExprRep arg) == I64
3428 (ChildCode64 code r_lo) <- iselExpr64 arg
3430 r_hi = getHiVRegFromLo r_lo
3431 return (code, [r_hi, r_lo])
3434 (src, code) <- getSomeReg arg
3435 tmp <- getNewRegNat (cmmExprRep arg)
3440 v1 <- getNewRegNat I32
3441 v2 <- getNewRegNat I32
3444 FMOV F64 src f0 `snocOL`
3445 ST F32 f0 (spRel 16) `snocOL`
3446 LD I32 (spRel 16) v1 `snocOL`
3447 ST F32 (fPair f0) (spRel 16) `snocOL`
3448 LD I32 (spRel 16) v2
3453 v1 <- getNewRegNat I32
3456 ST F32 src (spRel 16) `snocOL`
3457 LD I32 (spRel 16) v1
3462 v1 <- getNewRegNat I32
3464 code `snocOL` OR False g0 (RIReg src) v1
3468 outOfLineFloatOp mop =
3470 mopExpr <- cmmMakeDynamicReference addImportNat CallReference $
3471 mkForeignLabel functionName Nothing True
3472 let mopLabelOrExpr = case mopExpr of
3473 CmmLit (CmmLabel lbl) -> Left lbl
3475 return (mopLabelOrExpr, reduce)
3477 (reduce, functionName) = case mop of
3478 MO_F32_Exp -> (True, FSLIT("exp"))
3479 MO_F32_Log -> (True, FSLIT("log"))
3480 MO_F32_Sqrt -> (True, FSLIT("sqrt"))
3482 MO_F32_Sin -> (True, FSLIT("sin"))
3483 MO_F32_Cos -> (True, FSLIT("cos"))
3484 MO_F32_Tan -> (True, FSLIT("tan"))
3486 MO_F32_Asin -> (True, FSLIT("asin"))
3487 MO_F32_Acos -> (True, FSLIT("acos"))
3488 MO_F32_Atan -> (True, FSLIT("atan"))
3490 MO_F32_Sinh -> (True, FSLIT("sinh"))
3491 MO_F32_Cosh -> (True, FSLIT("cosh"))
3492 MO_F32_Tanh -> (True, FSLIT("tanh"))
3494 MO_F64_Exp -> (False, FSLIT("exp"))
3495 MO_F64_Log -> (False, FSLIT("log"))
3496 MO_F64_Sqrt -> (False, FSLIT("sqrt"))
3498 MO_F64_Sin -> (False, FSLIT("sin"))
3499 MO_F64_Cos -> (False, FSLIT("cos"))
3500 MO_F64_Tan -> (False, FSLIT("tan"))
3502 MO_F64_Asin -> (False, FSLIT("asin"))
3503 MO_F64_Acos -> (False, FSLIT("acos"))
3504 MO_F64_Atan -> (False, FSLIT("atan"))
3506 MO_F64_Sinh -> (False, FSLIT("sinh"))
3507 MO_F64_Cosh -> (False, FSLIT("cosh"))
3508 MO_F64_Tanh -> (False, FSLIT("tanh"))
3510 other -> pprPanic "outOfLineFloatOp(sparc) "
3511 (pprCallishMachOp mop)
3513 #endif /* sparc_TARGET_ARCH */
3515 #if powerpc_TARGET_ARCH
3517 #if darwin_TARGET_OS || linux_TARGET_OS
3519 The PowerPC calling convention for Darwin/Mac OS X
3520 is described in Apple's document
3521 "Inside Mac OS X - Mach-O Runtime Architecture".
3523 PowerPC Linux uses the System V Release 4 Calling Convention
3524 for PowerPC. It is described in the
3525 "System V Application Binary Interface PowerPC Processor Supplement".
3527 Both conventions are similar:
3528 Parameters may be passed in general-purpose registers starting at r3, in
3529 floating point registers starting at f1, or on the stack.
3531 But there are substantial differences:
3532 * The number of registers used for parameter passing and the exact set of
3533 nonvolatile registers differs (see MachRegs.lhs).
3534 * On Darwin, stack space is always reserved for parameters, even if they are
3535 passed in registers. The called routine may choose to save parameters from
3536 registers to the corresponding space on the stack.
3537 * On Darwin, a corresponding amount of GPRs is skipped when a floating point
3538 parameter is passed in an FPR.
3539 * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
3540 starting with an odd-numbered GPR. It may skip a GPR to achieve this.
3541 Darwin just treats an I64 like two separate I32s (high word first).
3542 * I64 and F64 arguments are 8-byte aligned on the stack for SysV, but only
3543 4-byte aligned like everything else on Darwin.
3544 * The SysV spec claims that F32 is represented as F64 on the stack. GCC on
3545 PowerPC Linux does not agree, so neither do we.
3547 According to both conventions, The parameter area should be part of the
3548 caller's stack frame, allocated in the caller's prologue code (large enough
3549 to hold the parameter lists for all called routines). The NCG already
3550 uses the stack for register spilling, leaving 64 bytes free at the top.
3551 If we need a larger parameter area than that, we just allocate a new stack
3552 frame just before ccalling.
3556 genCCall (CmmPrim MO_WriteBarrier) _ _ _
3557 = return $ unitOL LWSYNC
3559 genCCall target dest_regs argsAndHints vols
3560 = ASSERT (not $ any (`elem` [I8,I16]) argReps)
3561 -- we rely on argument promotion in the codeGen
3563 (finalStack,passArgumentsCode,usedRegs) <- passArguments
3565 allArgRegs allFPArgRegs
3569 (labelOrExpr, reduceToF32) <- case target of
3570 CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
3571 CmmForeignCall expr conv -> return (Right expr, False)
3572 CmmPrim mop -> outOfLineFloatOp mop
3574 let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
3575 codeAfter = move_sp_up finalStack `appOL` moveResult reduceToF32
3580 `snocOL` BL lbl usedRegs
3583 (dynReg, dynCode) <- getSomeReg dyn
3585 `snocOL` MTCTR dynReg
3587 `snocOL` BCTRL usedRegs
3590 #if darwin_TARGET_OS
3591 initialStackOffset = 24
3592 -- size of linkage area + size of arguments, in bytes
3593 stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
3594 map machRepByteWidth argReps
3595 #elif linux_TARGET_OS
3596 initialStackOffset = 8
3597 stackDelta finalStack = roundTo 16 finalStack
3599 args = map fst argsAndHints
3600 argReps = map cmmExprRep args
3602 roundTo a x | x `mod` a == 0 = x
3603 | otherwise = x + a - (x `mod` a)
3605 move_sp_down finalStack
3607 toOL [STU I32 sp (AddrRegImm sp (ImmInt (-delta))),
3610 where delta = stackDelta finalStack
3611 move_sp_up finalStack
3613 toOL [ADD sp sp (RIImm (ImmInt delta)),
3616 where delta = stackDelta finalStack
3619 passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
3620 passArguments ((arg,I64):args) gprs fprs stackOffset
3621 accumCode accumUsed =
3623 ChildCode64 code vr_lo <- iselExpr64 arg
3624 let vr_hi = getHiVRegFromLo vr_lo
3626 #if darwin_TARGET_OS
3631 (accumCode `appOL` code
3632 `snocOL` storeWord vr_hi gprs stackOffset
3633 `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
3634 ((take 2 gprs) ++ accumUsed)
3636 storeWord vr (gpr:_) offset = MR gpr vr
3637 storeWord vr [] offset = ST I32 vr (AddrRegImm sp (ImmInt offset))
3639 #elif linux_TARGET_OS
3640 let stackOffset' = roundTo 8 stackOffset
3641 stackCode = accumCode `appOL` code
3642 `snocOL` ST I32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
3643 `snocOL` ST I32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
3644 regCode hireg loreg =
3645 accumCode `appOL` code
3646 `snocOL` MR hireg vr_hi
3647 `snocOL` MR loreg vr_lo
3650 hireg : loreg : regs | even (length gprs) ->
3651 passArguments args regs fprs stackOffset
3652 (regCode hireg loreg) (hireg : loreg : accumUsed)
3653 _skipped : hireg : loreg : regs ->
3654 passArguments args regs fprs stackOffset
3655 (regCode hireg loreg) (hireg : loreg : accumUsed)
3656 _ -> -- only one or no regs left
3657 passArguments args [] fprs (stackOffset'+8)
3661 passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
3662 | reg : _ <- regs = do
3663 register <- getRegister arg
3664 let code = case register of
3665 Fixed _ freg fcode -> fcode `snocOL` MR reg freg
3666 Any _ acode -> acode reg
3670 #if darwin_TARGET_OS
3671 -- The Darwin ABI requires that we reserve stack slots for register parameters
3672 (stackOffset + stackBytes)
3673 #elif linux_TARGET_OS
3674 -- ... the SysV ABI doesn't.
3677 (accumCode `appOL` code)
3680 (vr, code) <- getSomeReg arg
3684 (stackOffset' + stackBytes)
3685 (accumCode `appOL` code `snocOL` ST rep vr stackSlot)
3688 #if darwin_TARGET_OS
3689 -- stackOffset is at least 4-byte aligned
3690 -- The Darwin ABI is happy with that.
3691 stackOffset' = stackOffset
3693 -- ... the SysV ABI requires 8-byte alignment for doubles.
3694 stackOffset' | rep == F64 = roundTo 8 stackOffset
3695 | otherwise = stackOffset
3697 stackSlot = AddrRegImm sp (ImmInt stackOffset')
3698 (nGprs, nFprs, stackBytes, regs) = case rep of
3699 I32 -> (1, 0, 4, gprs)
3700 #if darwin_TARGET_OS
3701 -- The Darwin ABI requires that we skip a corresponding number of GPRs when
3703 F32 -> (1, 1, 4, fprs)
3704 F64 -> (2, 1, 8, fprs)
3705 #elif linux_TARGET_OS
3706 -- ... the SysV ABI doesn't.
3707 F32 -> (0, 1, 4, fprs)
3708 F64 -> (0, 1, 8, fprs)
3711 moveResult reduceToF32 =
3715 | reduceToF32 && rep == F32 -> unitOL (FRSP r_dest f1)
3716 | rep == F32 || rep == F64 -> unitOL (MR r_dest f1)
3717 | rep == I64 -> toOL [MR (getHiVRegFromLo r_dest) r3,
3719 | otherwise -> unitOL (MR r_dest r3)
3720 where rep = cmmRegRep dest
3721 r_dest = getRegisterReg dest
3723 outOfLineFloatOp mop =
3725 mopExpr <- cmmMakeDynamicReference addImportNat CallReference $
3726 mkForeignLabel functionName Nothing True
3727 let mopLabelOrExpr = case mopExpr of
3728 CmmLit (CmmLabel lbl) -> Left lbl
3730 return (mopLabelOrExpr, reduce)
3732 (functionName, reduce) = case mop of
3733 MO_F32_Exp -> (FSLIT("exp"), True)
3734 MO_F32_Log -> (FSLIT("log"), True)
3735 MO_F32_Sqrt -> (FSLIT("sqrt"), True)
3737 MO_F32_Sin -> (FSLIT("sin"), True)
3738 MO_F32_Cos -> (FSLIT("cos"), True)
3739 MO_F32_Tan -> (FSLIT("tan"), True)
3741 MO_F32_Asin -> (FSLIT("asin"), True)
3742 MO_F32_Acos -> (FSLIT("acos"), True)
3743 MO_F32_Atan -> (FSLIT("atan"), True)
3745 MO_F32_Sinh -> (FSLIT("sinh"), True)
3746 MO_F32_Cosh -> (FSLIT("cosh"), True)
3747 MO_F32_Tanh -> (FSLIT("tanh"), True)
3748 MO_F32_Pwr -> (FSLIT("pow"), True)
3750 MO_F64_Exp -> (FSLIT("exp"), False)
3751 MO_F64_Log -> (FSLIT("log"), False)
3752 MO_F64_Sqrt -> (FSLIT("sqrt"), False)
3754 MO_F64_Sin -> (FSLIT("sin"), False)
3755 MO_F64_Cos -> (FSLIT("cos"), False)
3756 MO_F64_Tan -> (FSLIT("tan"), False)
3758 MO_F64_Asin -> (FSLIT("asin"), False)
3759 MO_F64_Acos -> (FSLIT("acos"), False)
3760 MO_F64_Atan -> (FSLIT("atan"), False)
3762 MO_F64_Sinh -> (FSLIT("sinh"), False)
3763 MO_F64_Cosh -> (FSLIT("cosh"), False)
3764 MO_F64_Tanh -> (FSLIT("tanh"), False)
3765 MO_F64_Pwr -> (FSLIT("pow"), False)
3766 other -> pprPanic "genCCall(ppc): unknown callish op"
3767 (pprCallishMachOp other)
3769 #endif /* darwin_TARGET_OS || linux_TARGET_OS */
3771 #endif /* powerpc_TARGET_ARCH */
3774 -- -----------------------------------------------------------------------------
3775 -- Generating a table-branch
3777 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
3779 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3783 (reg,e_code) <- getSomeReg expr
3784 lbl <- getNewLabelNat
3785 dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
3786 (tableReg,t_code) <- getSomeReg $ dynRef
3788 jumpTable = map jumpTableEntryRel ids
3790 jumpTableEntryRel Nothing
3791 = CmmStaticLit (CmmInt 0 wordRep)
3792 jumpTableEntryRel (Just (BlockId id))
3793 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3794 where blockLabel = mkAsmTempLabel id
3796 op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
3797 (EAIndex reg wORD_SIZE) (ImmInt 0))
3799 #if x86_64_TARGET_ARCH && darwin_TARGET_OS
3800 -- on Mac OS X/x86_64, put the jump table in the text section
3801 -- to work around a limitation of the linker.
3802 -- ld64 is unable to handle the relocations for
3804 -- if L0 is not preceded by a non-anonymous label in its section.
3806 code = e_code `appOL` t_code `appOL` toOL [
3807 ADD wordRep op (OpReg tableReg),
3808 JMP_TBL (OpReg tableReg) [ id | Just id <- ids ],
3809 LDATA Text (CmmDataLabel lbl : jumpTable)
3812 code = e_code `appOL` t_code `appOL` toOL [
3813 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3814 ADD wordRep op (OpReg tableReg),
3815 JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
3821 (reg,e_code) <- getSomeReg expr
3822 lbl <- getNewLabelNat
3824 jumpTable = map jumpTableEntry ids
3825 op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
3826 code = e_code `appOL` toOL [
3827 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3828 JMP_TBL op [ id | Just id <- ids ]
3832 #elif powerpc_TARGET_ARCH
3836 (reg,e_code) <- getSomeReg expr
3837 tmp <- getNewRegNat I32
3838 lbl <- getNewLabelNat
3839 dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
3840 (tableReg,t_code) <- getSomeReg $ dynRef
3842 jumpTable = map jumpTableEntryRel ids
3844 jumpTableEntryRel Nothing
3845 = CmmStaticLit (CmmInt 0 wordRep)
3846 jumpTableEntryRel (Just (BlockId id))
3847 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3848 where blockLabel = mkAsmTempLabel id
3850 code = e_code `appOL` t_code `appOL` toOL [
3851 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3852 SLW tmp reg (RIImm (ImmInt 2)),
3853 LD I32 tmp (AddrRegReg tableReg tmp),
3854 ADD tmp tmp (RIReg tableReg),
3856 BCTR [ id | Just id <- ids ]
3861 (reg,e_code) <- getSomeReg expr
3862 tmp <- getNewRegNat I32
3863 lbl <- getNewLabelNat
3865 jumpTable = map jumpTableEntry ids
3867 code = e_code `appOL` toOL [
3868 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3869 SLW tmp reg (RIImm (ImmInt 2)),
3870 ADDIS tmp tmp (HA (ImmCLbl lbl)),
3871 LD I32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
3873 BCTR [ id | Just id <- ids ]
3877 genSwitch expr ids = panic "ToDo: genSwitch"
3880 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordRep)
3881 jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
3882 where blockLabel = mkAsmTempLabel id
3884 -- -----------------------------------------------------------------------------
3886 -- -----------------------------------------------------------------------------
3889 -- -----------------------------------------------------------------------------
3890 -- 'condIntReg' and 'condFltReg': condition codes into registers
3892 -- Turn those condition codes into integers now (when they appear on
3893 -- the right hand side of an assignment).
3895 -- (If applicable) Do not fill the delay slots here; you will confuse the
3896 -- register allocator.
3898 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
3900 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3902 #if alpha_TARGET_ARCH
3903 condIntReg = panic "MachCode.condIntReg (not on Alpha)"
3904 condFltReg = panic "MachCode.condFltReg (not on Alpha)"
3905 #endif /* alpha_TARGET_ARCH */
3907 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3909 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3911 condIntReg cond x y = do
3912 CondCode _ cond cond_code <- condIntCode cond x y
3913 tmp <- getNewRegNat I8
3915 code dst = cond_code `appOL` toOL [
3916 SETCC cond (OpReg tmp),
3917 MOVZxL I8 (OpReg tmp) (OpReg dst)
3920 return (Any I32 code)
3924 #if i386_TARGET_ARCH
3926 condFltReg cond x y = do
3927 CondCode _ cond cond_code <- condFltCode cond x y
3928 tmp <- getNewRegNat I8
3930 code dst = cond_code `appOL` toOL [
3931 SETCC cond (OpReg tmp),
3932 MOVZxL I8 (OpReg tmp) (OpReg dst)
3935 return (Any I32 code)
3939 #if x86_64_TARGET_ARCH
3941 condFltReg cond x y = do
3942 CondCode _ cond cond_code <- condFltCode cond x y
3943 tmp1 <- getNewRegNat wordRep
3944 tmp2 <- getNewRegNat wordRep
3946 -- We have to worry about unordered operands (eg. comparisons
3947 -- against NaN). If the operands are unordered, the comparison
3948 -- sets the parity flag, carry flag and zero flag.
3949 -- All comparisons are supposed to return false for unordered
3950 -- operands except for !=, which returns true.
3952 -- Optimisation: we don't have to test the parity flag if we
3953 -- know the test has already excluded the unordered case: eg >
3954 -- and >= test for a zero carry flag, which can only occur for
3955 -- ordered operands.
3957 -- ToDo: by reversing comparisons we could avoid testing the
3958 -- parity flag in more cases.
3963 NE -> or_unordered dst
3964 GU -> plain_test dst
3965 GEU -> plain_test dst
3966 _ -> and_ordered dst)
3968 plain_test dst = toOL [
3969 SETCC cond (OpReg tmp1),
3970 MOVZxL I8 (OpReg tmp1) (OpReg dst)
3972 or_unordered dst = toOL [
3973 SETCC cond (OpReg tmp1),
3974 SETCC PARITY (OpReg tmp2),
3975 OR I8 (OpReg tmp1) (OpReg tmp2),
3976 MOVZxL I8 (OpReg tmp2) (OpReg dst)
3978 and_ordered dst = toOL [
3979 SETCC cond (OpReg tmp1),
3980 SETCC NOTPARITY (OpReg tmp2),
3981 AND I8 (OpReg tmp1) (OpReg tmp2),
3982 MOVZxL I8 (OpReg tmp2) (OpReg dst)
3985 return (Any I32 code)
3989 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3991 #if sparc_TARGET_ARCH
3993 condIntReg EQQ x (CmmLit (CmmInt 0 d)) = do
3994 (src, code) <- getSomeReg x
3995 tmp <- getNewRegNat I32
3997 code__2 dst = code `appOL` toOL [
3998 SUB False True g0 (RIReg src) g0,
3999 SUB True False g0 (RIImm (ImmInt (-1))) dst]
4000 return (Any I32 code__2)
4002 condIntReg EQQ x y = do
4003 (src1, code1) <- getSomeReg x
4004 (src2, code2) <- getSomeReg y
4005 tmp1 <- getNewRegNat I32
4006 tmp2 <- getNewRegNat I32
4008 code__2 dst = code1 `appOL` code2 `appOL` toOL [
4009 XOR False src1 (RIReg src2) dst,
4010 SUB False True g0 (RIReg dst) g0,
4011 SUB True False g0 (RIImm (ImmInt (-1))) dst]
4012 return (Any I32 code__2)
4014 condIntReg NE x (CmmLit (CmmInt 0 d)) = do
4015 (src, code) <- getSomeReg x
4016 tmp <- getNewRegNat I32
4018 code__2 dst = code `appOL` toOL [
4019 SUB False True g0 (RIReg src) g0,
4020 ADD True False g0 (RIImm (ImmInt 0)) dst]
4021 return (Any I32 code__2)
4023 condIntReg NE x y = do
4024 (src1, code1) <- getSomeReg x
4025 (src2, code2) <- getSomeReg y
4026 tmp1 <- getNewRegNat I32
4027 tmp2 <- getNewRegNat I32
4029 code__2 dst = code1 `appOL` code2 `appOL` toOL [
4030 XOR False src1 (RIReg src2) dst,
4031 SUB False True g0 (RIReg dst) g0,
4032 ADD True False g0 (RIImm (ImmInt 0)) dst]
4033 return (Any I32 code__2)
4035 condIntReg cond x y = do
4036 BlockId lbl1 <- getBlockIdNat
4037 BlockId lbl2 <- getBlockIdNat
4038 CondCode _ cond cond_code <- condIntCode cond x y
4040 code__2 dst = cond_code `appOL` toOL [
4041 BI cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
4042 OR False g0 (RIImm (ImmInt 0)) dst,
4043 BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
4044 NEWBLOCK (BlockId lbl1),
4045 OR False g0 (RIImm (ImmInt 1)) dst,
4046 NEWBLOCK (BlockId lbl2)]
4047 return (Any I32 code__2)
4049 condFltReg cond x y = do
4050 BlockId lbl1 <- getBlockIdNat
4051 BlockId lbl2 <- getBlockIdNat
4052 CondCode _ cond cond_code <- condFltCode cond x y
4054 code__2 dst = cond_code `appOL` toOL [
4056 BF cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
4057 OR False g0 (RIImm (ImmInt 0)) dst,
4058 BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
4059 NEWBLOCK (BlockId lbl1),
4060 OR False g0 (RIImm (ImmInt 1)) dst,
4061 NEWBLOCK (BlockId lbl2)]
4062 return (Any I32 code__2)
4064 #endif /* sparc_TARGET_ARCH */
4066 #if powerpc_TARGET_ARCH
4067 condReg getCond = do
4068 lbl1 <- getBlockIdNat
4069 lbl2 <- getBlockIdNat
4070 CondCode _ cond cond_code <- getCond
4072 {- code dst = cond_code `appOL` toOL [
4081 code dst = cond_code
4085 RLWINM dst dst (bit + 1) 31 31
4088 negate_code | do_negate = unitOL (CRNOR bit bit bit)
4091 (bit, do_negate) = case cond of
4105 return (Any I32 code)
4107 condIntReg cond x y = condReg (condIntCode cond x y)
4108 condFltReg cond x y = condReg (condFltCode cond x y)
4109 #endif /* powerpc_TARGET_ARCH */
4112 -- -----------------------------------------------------------------------------
4113 -- 'trivial*Code': deal with trivial instructions
4115 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
4116 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
4117 -- Only look for constants on the right hand side, because that's
4118 -- where the generic optimizer will have put them.
4120 -- Similarly, for unary instructions, we don't have to worry about
4121 -- matching an StInt as the argument, because genericOpt will already
4122 -- have handled the constant-folding.
4126 -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
4127 ,IF_ARCH_i386 ((Operand -> Operand -> Instr)
4128 -> Maybe (Operand -> Operand -> Instr)
4129 ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr)
4130 -> Maybe (Operand -> Operand -> Instr)
4131 ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
4132 ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
4134 -> CmmExpr -> CmmExpr -- the two arguments
4137 #ifndef powerpc_TARGET_ARCH
4140 -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
4141 ,IF_ARCH_sparc((MachRep -> Reg -> Reg -> Reg -> Instr)
4142 ,IF_ARCH_i386 ((MachRep -> Reg -> Reg -> Reg -> Instr)
4143 ,IF_ARCH_x86_64 ((MachRep -> Operand -> Operand -> Instr)
4145 -> CmmExpr -> CmmExpr -- the two arguments
4151 -> IF_ARCH_alpha((RI -> Reg -> Instr)
4152 ,IF_ARCH_i386 ((Operand -> Instr)
4153 ,IF_ARCH_x86_64 ((Operand -> Instr)
4154 ,IF_ARCH_sparc((RI -> Reg -> Instr)
4155 ,IF_ARCH_powerpc((Reg -> Reg -> Instr)
4157 -> CmmExpr -- the one argument
4160 #ifndef powerpc_TARGET_ARCH
4163 -> IF_ARCH_alpha((Reg -> Reg -> Instr)
4164 ,IF_ARCH_i386 ((Reg -> Reg -> Instr)
4165 ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr)
4166 ,IF_ARCH_sparc((Reg -> Reg -> Instr)
4168 -> CmmExpr -- the one argument
4172 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4174 #if alpha_TARGET_ARCH
4176 trivialCode instr x (StInt y)
4178 = getRegister x `thenNat` \ register ->
4179 getNewRegNat IntRep `thenNat` \ tmp ->
4181 code = registerCode register tmp
4182 src1 = registerName register tmp
4183 src2 = ImmInt (fromInteger y)
4184 code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst)
4186 return (Any IntRep code__2)
4188 trivialCode instr x y
4189 = getRegister x `thenNat` \ register1 ->
4190 getRegister y `thenNat` \ register2 ->
4191 getNewRegNat IntRep `thenNat` \ tmp1 ->
4192 getNewRegNat IntRep `thenNat` \ tmp2 ->
4194 code1 = registerCode register1 tmp1 []
4195 src1 = registerName register1 tmp1
4196 code2 = registerCode register2 tmp2 []
4197 src2 = registerName register2 tmp2
4198 code__2 dst = asmSeqThen [code1, code2] .
4199 mkSeqInstr (instr src1 (RIReg src2) dst)
4201 return (Any IntRep code__2)
4204 trivialUCode instr x
4205 = getRegister x `thenNat` \ register ->
4206 getNewRegNat IntRep `thenNat` \ tmp ->
4208 code = registerCode register tmp
4209 src = registerName register tmp
4210 code__2 dst = code . mkSeqInstr (instr (RIReg src) dst)
4212 return (Any IntRep code__2)
4215 trivialFCode _ instr x y
4216 = getRegister x `thenNat` \ register1 ->
4217 getRegister y `thenNat` \ register2 ->
4218 getNewRegNat F64 `thenNat` \ tmp1 ->
4219 getNewRegNat F64 `thenNat` \ tmp2 ->
4221 code1 = registerCode register1 tmp1
4222 src1 = registerName register1 tmp1
4224 code2 = registerCode register2 tmp2
4225 src2 = registerName register2 tmp2
4227 code__2 dst = asmSeqThen [code1 [], code2 []] .
4228 mkSeqInstr (instr src1 src2 dst)
4230 return (Any F64 code__2)
4232 trivialUFCode _ instr x
4233 = getRegister x `thenNat` \ register ->
4234 getNewRegNat F64 `thenNat` \ tmp ->
4236 code = registerCode register tmp
4237 src = registerName register tmp
4238 code__2 dst = code . mkSeqInstr (instr src dst)
4240 return (Any F64 code__2)
4242 #endif /* alpha_TARGET_ARCH */
4244 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4246 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4249 The Rules of the Game are:
4251 * You cannot assume anything about the destination register dst;
4252 it may be anything, including a fixed reg.
4254 * You may compute an operand into a fixed reg, but you may not
4255 subsequently change the contents of that fixed reg. If you
4256 want to do so, first copy the value either to a temporary
4257 or into dst. You are free to modify dst even if it happens
4258 to be a fixed reg -- that's not your problem.
4260 * You cannot assume that a fixed reg will stay live over an
4261 arbitrary computation. The same applies to the dst reg.
4263 * Temporary regs obtained from getNewRegNat are distinct from
4264 each other and from all other regs, and stay live over
4265 arbitrary computations.
4267 --------------------
4269 SDM's version of The Rules:
4271 * If getRegister returns Any, that means it can generate correct
4272 code which places the result in any register, period. Even if that
4273 register happens to be read during the computation.
4275 Corollary #1: this means that if you are generating code for an
4276 operation with two arbitrary operands, you cannot assign the result
4277 of the first operand into the destination register before computing
4278 the second operand. The second operand might require the old value
4279 of the destination register.
4281 Corollary #2: A function might be able to generate more efficient
4282 code if it knows the destination register is a new temporary (and
4283 therefore not read by any of the sub-computations).
4285 * If getRegister returns Any, then the code it generates may modify only:
4286 (a) fresh temporaries
4287 (b) the destination register
4288 (c) known registers (eg. %ecx is used by shifts)
4289 In particular, it may *not* modify global registers, unless the global
4290 register happens to be the destination register.
4293 trivialCode rep instr (Just revinstr) (CmmLit lit_a) b
4294 | not (is64BitLit lit_a) = do
4295 b_code <- getAnyReg b
4298 = b_code dst `snocOL`
4299 revinstr (OpImm (litToImm lit_a)) (OpReg dst)
4301 return (Any rep code)
4303 trivialCode rep instr maybe_revinstr a b = genTrivialCode rep instr a b
4305 -- This is re-used for floating pt instructions too.
4306 genTrivialCode rep instr a b = do
4307 (b_op, b_code) <- getNonClobberedOperand b
4308 a_code <- getAnyReg a
4309 tmp <- getNewRegNat rep
4311 -- We want the value of b to stay alive across the computation of a.
4312 -- But, we want to calculate a straight into the destination register,
4313 -- because the instruction only has two operands (dst := dst `op` src).
4314 -- The troublesome case is when the result of b is in the same register
4315 -- as the destination reg. In this case, we have to save b in a
4316 -- new temporary across the computation of a.
4318 | dst `regClashesWithOp` b_op =
4320 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
4322 instr (OpReg tmp) (OpReg dst)
4326 instr b_op (OpReg dst)
4328 return (Any rep code)
4330 reg `regClashesWithOp` OpReg reg2 = reg == reg2
4331 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
4332 reg `regClashesWithOp` _ = False
4336 trivialUCode rep instr x = do
4337 x_code <- getAnyReg x
4343 return (Any rep code)
4347 #if i386_TARGET_ARCH
4349 trivialFCode pk instr x y = do
4350 (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
4351 (y_reg, y_code) <- getSomeReg y
4356 instr pk x_reg y_reg dst
4358 return (Any pk code)
4362 #if x86_64_TARGET_ARCH
4364 trivialFCode pk instr x y = genTrivialCode pk (instr pk) x y
4370 trivialUFCode rep instr x = do
4371 (x_reg, x_code) <- getSomeReg x
4377 return (Any rep code)
4379 #endif /* i386_TARGET_ARCH */
4381 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4383 #if sparc_TARGET_ARCH
4385 trivialCode pk instr x (CmmLit (CmmInt y d))
4388 (src1, code) <- getSomeReg x
4389 tmp <- getNewRegNat I32
4391 src2 = ImmInt (fromInteger y)
4392 code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
4393 return (Any I32 code__2)
4395 trivialCode pk instr x y = do
4396 (src1, code1) <- getSomeReg x
4397 (src2, code2) <- getSomeReg y
4398 tmp1 <- getNewRegNat I32
4399 tmp2 <- getNewRegNat I32
4401 code__2 dst = code1 `appOL` code2 `snocOL`
4402 instr src1 (RIReg src2) dst
4403 return (Any I32 code__2)
4406 trivialFCode pk instr x y = do
4407 (src1, code1) <- getSomeReg x
4408 (src2, code2) <- getSomeReg y
4409 tmp1 <- getNewRegNat (cmmExprRep x)
4410 tmp2 <- getNewRegNat (cmmExprRep y)
4411 tmp <- getNewRegNat F64
4413 promote x = FxTOy F32 F64 x tmp
4420 code1 `appOL` code2 `snocOL`
4421 instr pk src1 src2 dst
4422 else if pk1 == F32 then
4423 code1 `snocOL` promote src1 `appOL` code2 `snocOL`
4424 instr F64 tmp src2 dst
4426 code1 `appOL` code2 `snocOL` promote src2 `snocOL`
4427 instr F64 src1 tmp dst
4428 return (Any (if pk1 == pk2 then pk1 else F64) code__2)
4431 trivialUCode pk instr x = do
4432 (src, code) <- getSomeReg x
4433 tmp <- getNewRegNat pk
4435 code__2 dst = code `snocOL` instr (RIReg src) dst
4436 return (Any pk code__2)
4439 trivialUFCode pk instr x = do
4440 (src, code) <- getSomeReg x
4441 tmp <- getNewRegNat pk
4443 code__2 dst = code `snocOL` instr src dst
4444 return (Any pk code__2)
4446 #endif /* sparc_TARGET_ARCH */
4448 #if powerpc_TARGET_ARCH
4451 Wolfgang's PowerPC version of The Rules:
4453 A slightly modified version of The Rules to take advantage of the fact
4454 that PowerPC instructions work on all registers and don't implicitly
4455 clobber any fixed registers.
4457 * The only expression for which getRegister returns Fixed is (CmmReg reg).
4459 * If getRegister returns Any, then the code it generates may modify only:
4460 (a) fresh temporaries
4461 (b) the destination register
4462 It may *not* modify global registers, unless the global
4463 register happens to be the destination register.
4464 It may not clobber any other registers. In fact, only ccalls clobber any
4466 Also, it may not modify the counter register (used by genCCall).
4468 Corollary: If a getRegister for a subexpression returns Fixed, you need
4469 not move it to a fresh temporary before evaluating the next subexpression.
4470 The Fixed register won't be modified.
4471 Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
4473 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
4474 the value of the destination register.
4477 trivialCode rep signed instr x (CmmLit (CmmInt y _))
4478 | Just imm <- makeImmediate rep signed y
4480 (src1, code1) <- getSomeReg x
4481 let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
4482 return (Any rep code)
4484 trivialCode rep signed instr x y = do
4485 (src1, code1) <- getSomeReg x
4486 (src2, code2) <- getSomeReg y
4487 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
4488 return (Any rep code)
4490 trivialCodeNoImm :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4491 -> CmmExpr -> CmmExpr -> NatM Register
4492 trivialCodeNoImm rep instr x y = do
4493 (src1, code1) <- getSomeReg x
4494 (src2, code2) <- getSomeReg y
4495 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
4496 return (Any rep code)
4498 trivialUCode rep instr x = do
4499 (src, code) <- getSomeReg x
4500 let code' dst = code `snocOL` instr dst src
4501 return (Any rep code')
4503 -- There is no "remainder" instruction on the PPC, so we have to do
4505 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
4507 remainderCode :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4508 -> CmmExpr -> CmmExpr -> NatM Register
4509 remainderCode rep div x y = do
4510 (src1, code1) <- getSomeReg x
4511 (src2, code2) <- getSomeReg y
4512 let code dst = code1 `appOL` code2 `appOL` toOL [
4514 MULLW dst dst (RIReg src2),
4517 return (Any rep code)
4519 #endif /* powerpc_TARGET_ARCH */
4522 -- -----------------------------------------------------------------------------
4523 -- Coercing to/from integer/floating-point...
4525 -- @coerce(Int2FP|FP2Int)@ are more complicated integer/float
4526 -- conversions. We have to store temporaries in memory to move
4527 -- between the integer and the floating point register sets.
4529 -- @coerceDbl2Flt@ and @coerceFlt2Dbl@ are done this way because we
4530 -- pretend, on sparc at least, that double and float regs are seperate
4531 -- kinds, so the value has to be computed into one kind before being
4532 -- explicitly "converted" to live in the other kind.
4534 coerceInt2FP :: MachRep -> MachRep -> CmmExpr -> NatM Register
4535 coerceFP2Int :: MachRep -> MachRep -> CmmExpr -> NatM Register
4537 #if sparc_TARGET_ARCH
4538 coerceDbl2Flt :: CmmExpr -> NatM Register
4539 coerceFlt2Dbl :: CmmExpr -> NatM Register
4542 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4544 #if alpha_TARGET_ARCH
4547 = getRegister x `thenNat` \ register ->
4548 getNewRegNat IntRep `thenNat` \ reg ->
4550 code = registerCode register reg
4551 src = registerName register reg
4553 code__2 dst = code . mkSeqInstrs [
4555 LD TF dst (spRel 0),
4558 return (Any F64 code__2)
4562 = getRegister x `thenNat` \ register ->
4563 getNewRegNat F64 `thenNat` \ tmp ->
4565 code = registerCode register tmp
4566 src = registerName register tmp
4568 code__2 dst = code . mkSeqInstrs [
4570 ST TF tmp (spRel 0),
4573 return (Any IntRep code__2)
4575 #endif /* alpha_TARGET_ARCH */
4577 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4579 #if i386_TARGET_ARCH
4581 coerceInt2FP from to x = do
4582 (x_reg, x_code) <- getSomeReg x
4584 opc = case to of F32 -> GITOF; F64 -> GITOD
4585 code dst = x_code `snocOL` opc x_reg dst
4586 -- ToDo: works for non-I32 reps?
4588 return (Any to code)
4592 coerceFP2Int from to x = do
4593 (x_reg, x_code) <- getSomeReg x
4595 opc = case from of F32 -> GFTOI; F64 -> GDTOI
4596 code dst = x_code `snocOL` opc x_reg dst
4597 -- ToDo: works for non-I32 reps?
4599 return (Any to code)
4601 #endif /* i386_TARGET_ARCH */
4603 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4605 #if x86_64_TARGET_ARCH
4607 coerceFP2Int from to x = do
4608 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
4610 opc = case from of F32 -> CVTSS2SI; F64 -> CVTSD2SI
4611 code dst = x_code `snocOL` opc x_op dst
4613 return (Any to code) -- works even if the destination rep is <I32
4615 coerceInt2FP from to x = do
4616 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
4618 opc = case to of F32 -> CVTSI2SS; F64 -> CVTSI2SD
4619 code dst = x_code `snocOL` opc x_op dst
4621 return (Any to code) -- works even if the destination rep is <I32
4623 coerceFP2FP :: MachRep -> CmmExpr -> NatM Register
4624 coerceFP2FP to x = do
4625 (x_reg, x_code) <- getSomeReg x
4627 opc = case to of F32 -> CVTSD2SS; F64 -> CVTSS2SD
4628 code dst = x_code `snocOL` opc x_reg dst
4630 return (Any to code)
4634 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4636 #if sparc_TARGET_ARCH
4638 coerceInt2FP pk1 pk2 x = do
4639 (src, code) <- getSomeReg x
4641 code__2 dst = code `appOL` toOL [
4642 ST pk1 src (spRel (-2)),
4643 LD pk1 (spRel (-2)) dst,
4644 FxTOy pk1 pk2 dst dst]
4645 return (Any pk2 code__2)
4648 coerceFP2Int pk fprep x = do
4649 (src, code) <- getSomeReg x
4650 reg <- getNewRegNat fprep
4651 tmp <- getNewRegNat pk
4653 code__2 dst = ASSERT(fprep == F64 || fprep == F32)
4655 FxTOy fprep pk src tmp,
4656 ST pk tmp (spRel (-2)),
4657 LD pk (spRel (-2)) dst]
4658 return (Any pk code__2)
4661 coerceDbl2Flt x = do
4662 (src, code) <- getSomeReg x
4663 return (Any F32 (\dst -> code `snocOL` FxTOy F64 F32 src dst))
4666 coerceFlt2Dbl x = do
4667 (src, code) <- getSomeReg x
4668 return (Any F64 (\dst -> code `snocOL` FxTOy F32 F64 src dst))
4670 #endif /* sparc_TARGET_ARCH */
4672 #if powerpc_TARGET_ARCH
4673 coerceInt2FP fromRep toRep x = do
4674 (src, code) <- getSomeReg x
4675 lbl <- getNewLabelNat
4676 itmp <- getNewRegNat I32
4677 ftmp <- getNewRegNat F64
4678 dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
4679 Amode addr addr_code <- getAmode dynRef
4681 code' dst = code `appOL` maybe_exts `appOL` toOL [
4684 CmmStaticLit (CmmInt 0x43300000 I32),
4685 CmmStaticLit (CmmInt 0x80000000 I32)],
4686 XORIS itmp src (ImmInt 0x8000),
4687 ST I32 itmp (spRel 3),
4688 LIS itmp (ImmInt 0x4330),
4689 ST I32 itmp (spRel 2),
4690 LD F64 ftmp (spRel 2)
4691 ] `appOL` addr_code `appOL` toOL [
4693 FSUB F64 dst ftmp dst
4694 ] `appOL` maybe_frsp dst
4696 maybe_exts = case fromRep of
4697 I8 -> unitOL $ EXTS I8 src src
4698 I16 -> unitOL $ EXTS I16 src src
4700 maybe_frsp dst = case toRep of
4701 F32 -> unitOL $ FRSP dst dst
4703 return (Any toRep code')
4705 coerceFP2Int fromRep toRep x = do
4706 -- the reps don't really matter: F*->F64 and I32->I* are no-ops
4707 (src, code) <- getSomeReg x
4708 tmp <- getNewRegNat F64
4710 code' dst = code `appOL` toOL [
4711 -- convert to int in FP reg
4713 -- store value (64bit) from FP to stack
4714 ST F64 tmp (spRel 2),
4715 -- read low word of value (high word is undefined)
4716 LD I32 dst (spRel 3)]
4717 return (Any toRep code')
4718 #endif /* powerpc_TARGET_ARCH */
4721 -- -----------------------------------------------------------------------------
4722 -- eXTRA_STK_ARGS_HERE
4724 -- We (allegedly) put the first six C-call arguments in registers;
4725 -- where do we start putting the rest of them?
4727 -- Moved from MachInstrs (SDM):
4729 #if alpha_TARGET_ARCH || sparc_TARGET_ARCH
4730 eXTRA_STK_ARGS_HERE :: Int
4732 = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???))