1 -----------------------------------------------------------------------------
3 -- Generating machine code (instruction selection)
5 -- (c) The University of Glasgow 1996-2004
7 -----------------------------------------------------------------------------
9 -- This is a big module, but, if you pay attention to
10 -- (a) the sectioning, (b) the type signatures, and
11 -- (c) the #if blah_TARGET_ARCH} things, the
12 -- structure should not be too overwhelming.
14 module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where
16 #include "HsVersions.h"
17 #include "nativeGen/NCG.h"
24 import PositionIndependentCode ( cmmMakeDynamicReference, initializePicBase )
25 import RegAllocInfo ( mkBranchInstr )
27 -- Our intermediate code:
28 import PprCmm ( pprExpr )
34 import StaticFlags ( opt_PIC )
35 import ForeignCall ( CCallConv(..) )
40 import FastTypes ( isFastTrue )
41 import Constants ( wORD_SIZE )
44 import Outputable ( assertPanic )
45 import Debug.Trace ( trace )
48 import Control.Monad ( mapAndUnzipM )
49 import Data.Maybe ( fromJust )
53 -- -----------------------------------------------------------------------------
54 -- Top-level of the instruction selector
56 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
57 -- They are really trees of insns to facilitate fast appending, where a
58 -- left-to-right traversal (pre-order?) yields the insns in the correct
61 type InstrBlock = OrdList Instr
63 cmmTopCodeGen :: CmmTop -> NatM [NatCmmTop]
64 cmmTopCodeGen (CmmProc info lab params blocks) = do
65 (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
66 picBaseMb <- getPicBaseMaybeNat
67 let proc = CmmProc info lab params (concat nat_blocks)
68 tops = proc : concat statics
70 Just picBase -> initializePicBase picBase tops
71 Nothing -> return tops
73 cmmTopCodeGen (CmmData sec dat) = do
74 return [CmmData sec dat] -- no translation, we just use CmmStatic
76 basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop])
77 basicBlockCodeGen (BasicBlock id stmts) = do
78 instrs <- stmtsToInstrs stmts
79 -- code generation may introduce new basic block boundaries, which
80 -- are indicated by the NEWBLOCK instruction. We must split up the
81 -- instruction stream into basic blocks again. Also, we extract
84 (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
86 mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
87 = ([], BasicBlock id instrs : blocks, statics)
88 mkBlocks (LDATA sec dat) (instrs,blocks,statics)
89 = (instrs, blocks, CmmData sec dat:statics)
90 mkBlocks instr (instrs,blocks,statics)
91 = (instr:instrs, blocks, statics)
93 return (BasicBlock id top : other_blocks, statics)
95 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
97 = do instrss <- mapM stmtToInstrs stmts
98 return (concatOL instrss)
100 stmtToInstrs :: CmmStmt -> NatM InstrBlock
101 stmtToInstrs stmt = case stmt of
102 CmmNop -> return nilOL
103 CmmComment s -> return (unitOL (COMMENT s))
106 | isFloatingRep kind -> assignReg_FltCode kind reg src
107 #if WORD_SIZE_IN_BITS==32
108 | kind == I64 -> assignReg_I64Code reg src
110 | otherwise -> assignReg_IntCode kind reg src
111 where kind = cmmRegRep reg
114 | isFloatingRep kind -> assignMem_FltCode kind addr src
115 #if WORD_SIZE_IN_BITS==32
116 | kind == I64 -> assignMem_I64Code addr src
118 | otherwise -> assignMem_IntCode kind addr src
119 where kind = cmmExprRep src
121 CmmCall target result_regs args vols
122 -> genCCall target result_regs args vols
124 CmmBranch id -> genBranch id
125 CmmCondBranch arg id -> genCondJump id arg
126 CmmSwitch arg ids -> genSwitch arg ids
127 CmmJump arg params -> genJump arg
129 -- -----------------------------------------------------------------------------
130 -- General things for putting together code sequences
132 -- Expand CmmRegOff. ToDo: should we do it this way around, or convert
133 -- CmmExprs into CmmRegOff?
134 mangleIndexTree :: CmmExpr -> CmmExpr
135 mangleIndexTree (CmmRegOff reg off)
136 = CmmMachOp (MO_Add rep) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) rep)]
137 where rep = cmmRegRep reg
139 -- -----------------------------------------------------------------------------
140 -- Code gen for 64-bit arithmetic on 32-bit platforms
143 Simple support for generating 64-bit code (ie, 64 bit values and 64
144 bit assignments) on 32-bit platforms. Unlike the main code generator
145 we merely shoot for generating working code as simply as possible, and
146 pay little attention to code quality. Specifically, there is no
147 attempt to deal cleverly with the fixed-vs-floating register
148 distinction; all values are generated into (pairs of) floating
149 registers, even if this would mean some redundant reg-reg moves as a
150 result. Only one of the VRegUniques is returned, since it will be
151 of the VRegUniqueLo form, and the upper-half VReg can be determined
152 by applying getHiVRegFromLo to it.
155 data ChildCode64 -- a.k.a "Register64"
158 Reg -- the lower 32-bit temporary which contains the
159 -- result; use getHiVRegFromLo to find the other
160 -- VRegUnique. Rules of this simplified insn
161 -- selection game are therefore that the returned
162 -- Reg may be modified
164 #if WORD_SIZE_IN_BITS==32
165 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
166 assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
169 #ifndef x86_64_TARGET_ARCH
170 iselExpr64 :: CmmExpr -> NatM ChildCode64
173 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
177 assignMem_I64Code addrTree valueTree = do
178 Amode addr addr_code <- getAmode addrTree
179 ChildCode64 vcode rlo <- iselExpr64 valueTree
181 rhi = getHiVRegFromLo rlo
183 -- Little-endian store
184 mov_lo = MOV I32 (OpReg rlo) (OpAddr addr)
185 mov_hi = MOV I32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
187 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
190 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
191 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
193 r_dst_lo = mkVReg u_dst I32
194 r_dst_hi = getHiVRegFromLo r_dst_lo
195 r_src_hi = getHiVRegFromLo r_src_lo
196 mov_lo = MOV I32 (OpReg r_src_lo) (OpReg r_dst_lo)
197 mov_hi = MOV I32 (OpReg r_src_hi) (OpReg r_dst_hi)
200 vcode `snocOL` mov_lo `snocOL` mov_hi
203 assignReg_I64Code lvalue valueTree
204 = panic "assignReg_I64Code(i386): invalid lvalue"
208 iselExpr64 (CmmLit (CmmInt i _)) = do
209 (rlo,rhi) <- getNewRegPairNat I32
211 r = fromIntegral (fromIntegral i :: Word32)
212 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
214 MOV I32 (OpImm (ImmInteger r)) (OpReg rlo),
215 MOV I32 (OpImm (ImmInteger q)) (OpReg rhi)
218 return (ChildCode64 code rlo)
220 iselExpr64 (CmmLoad addrTree I64) = do
221 Amode addr addr_code <- getAmode addrTree
222 (rlo,rhi) <- getNewRegPairNat I32
224 mov_lo = MOV I32 (OpAddr addr) (OpReg rlo)
225 mov_hi = MOV I32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
228 ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
232 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64)))
233 = return (ChildCode64 nilOL (mkVReg vu I32))
235 -- we handle addition, but rather badly
236 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
237 ChildCode64 code1 r1lo <- iselExpr64 e1
238 (rlo,rhi) <- getNewRegPairNat I32
240 r = fromIntegral (fromIntegral i :: Word32)
241 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
242 r1hi = getHiVRegFromLo r1lo
244 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
245 ADD I32 (OpImm (ImmInteger r)) (OpReg rlo),
246 MOV I32 (OpReg r1hi) (OpReg rhi),
247 ADC I32 (OpImm (ImmInteger q)) (OpReg rhi) ]
249 return (ChildCode64 code rlo)
251 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
252 ChildCode64 code1 r1lo <- iselExpr64 e1
253 ChildCode64 code2 r2lo <- iselExpr64 e2
254 (rlo,rhi) <- getNewRegPairNat I32
256 r1hi = getHiVRegFromLo r1lo
257 r2hi = getHiVRegFromLo r2lo
260 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
261 ADD I32 (OpReg r2lo) (OpReg rlo),
262 MOV I32 (OpReg r1hi) (OpReg rhi),
263 ADC I32 (OpReg r2hi) (OpReg rhi) ]
265 return (ChildCode64 code rlo)
268 = pprPanic "iselExpr64(i386)" (ppr expr)
270 #endif /* i386_TARGET_ARCH */
272 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
274 #if sparc_TARGET_ARCH
276 assignMem_I64Code addrTree valueTree = do
277 Amode addr addr_code <- getAmode addrTree
278 ChildCode64 vcode rlo <- iselExpr64 valueTree
279 (src, code) <- getSomeReg addrTree
281 rhi = getHiVRegFromLo rlo
283 mov_hi = ST I32 rhi (AddrRegImm src (ImmInt 0))
284 mov_lo = ST I32 rlo (AddrRegImm src (ImmInt 4))
285 return (vcode `appOL` code `snocOL` mov_hi `snocOL` mov_lo)
287 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
288 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
290 r_dst_lo = mkVReg u_dst pk
291 r_dst_hi = getHiVRegFromLo r_dst_lo
292 r_src_hi = getHiVRegFromLo r_src_lo
293 mov_lo = mkMOV r_src_lo r_dst_lo
294 mov_hi = mkMOV r_src_hi r_dst_hi
295 mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
296 return (vcode `snocOL` mov_hi `snocOL` mov_lo)
297 assignReg_I64Code lvalue valueTree
298 = panic "assignReg_I64Code(sparc): invalid lvalue"
301 -- Don't delete this -- it's very handy for debugging.
303 -- | trace ("iselExpr64: " ++ showSDoc (ppr expr)) False
304 -- = panic "iselExpr64(???)"
306 iselExpr64 (CmmLoad addrTree I64) = do
307 Amode (AddrRegReg r1 r2) addr_code <- getAmode addrTree
308 rlo <- getNewRegNat I32
309 let rhi = getHiVRegFromLo rlo
310 mov_hi = LD I32 (AddrRegImm r1 (ImmInt 0)) rhi
311 mov_lo = LD I32 (AddrRegImm r1 (ImmInt 4)) rlo
313 ChildCode64 (addr_code `snocOL` mov_hi `snocOL` mov_lo)
317 iselExpr64 (CmmReg (CmmLocal (LocalReg uq I64))) = do
318 r_dst_lo <- getNewRegNat I32
319 let r_dst_hi = getHiVRegFromLo r_dst_lo
320 r_src_lo = mkVReg uq I32
321 r_src_hi = getHiVRegFromLo r_src_lo
322 mov_lo = mkMOV r_src_lo r_dst_lo
323 mov_hi = mkMOV r_src_hi r_dst_hi
324 mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
326 ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo
330 = pprPanic "iselExpr64(sparc)" (ppr expr)
332 #endif /* sparc_TARGET_ARCH */
334 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
336 #if powerpc_TARGET_ARCH
338 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
339 getI64Amodes addrTree = do
340 Amode hi_addr addr_code <- getAmode addrTree
341 case addrOffset hi_addr 4 of
342 Just lo_addr -> return (hi_addr, lo_addr, addr_code)
343 Nothing -> do (hi_ptr, code) <- getSomeReg addrTree
344 return (AddrRegImm hi_ptr (ImmInt 0),
345 AddrRegImm hi_ptr (ImmInt 4),
348 assignMem_I64Code addrTree valueTree = do
349 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
350 ChildCode64 vcode rlo <- iselExpr64 valueTree
352 rhi = getHiVRegFromLo rlo
355 mov_hi = ST I32 rhi hi_addr
356 mov_lo = ST I32 rlo lo_addr
358 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
360 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
361 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
363 r_dst_lo = mkVReg u_dst I32
364 r_dst_hi = getHiVRegFromLo r_dst_lo
365 r_src_hi = getHiVRegFromLo r_src_lo
366 mov_lo = MR r_dst_lo r_src_lo
367 mov_hi = MR r_dst_hi r_src_hi
370 vcode `snocOL` mov_lo `snocOL` mov_hi
373 assignReg_I64Code lvalue valueTree
374 = panic "assignReg_I64Code(powerpc): invalid lvalue"
377 -- Don't delete this -- it's very handy for debugging.
379 -- | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
380 -- = panic "iselExpr64(???)"
382 iselExpr64 (CmmLoad addrTree I64) = do
383 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
384 (rlo, rhi) <- getNewRegPairNat I32
385 let mov_hi = LD I32 rhi hi_addr
386 mov_lo = LD I32 rlo lo_addr
387 return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
390 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64)))
391 = return (ChildCode64 nilOL (mkVReg vu I32))
393 iselExpr64 (CmmLit (CmmInt i _)) = do
394 (rlo,rhi) <- getNewRegPairNat I32
396 half0 = fromIntegral (fromIntegral i :: Word16)
397 half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
398 half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
399 half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
402 LIS rlo (ImmInt half1),
403 OR rlo rlo (RIImm $ ImmInt half0),
404 LIS rhi (ImmInt half3),
405 OR rlo rlo (RIImm $ ImmInt half2)
408 return (ChildCode64 code rlo)
410 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
411 ChildCode64 code1 r1lo <- iselExpr64 e1
412 ChildCode64 code2 r2lo <- iselExpr64 e2
413 (rlo,rhi) <- getNewRegPairNat I32
415 r1hi = getHiVRegFromLo r1lo
416 r2hi = getHiVRegFromLo r2lo
419 toOL [ ADDC rlo r1lo r2lo,
422 return (ChildCode64 code rlo)
425 = pprPanic "iselExpr64(powerpc)" (ppr expr)
427 #endif /* powerpc_TARGET_ARCH */
430 -- -----------------------------------------------------------------------------
431 -- The 'Register' type
433 -- 'Register's passed up the tree. If the stix code forces the register
434 -- to live in a pre-decided machine register, it comes out as @Fixed@;
435 -- otherwise, it comes out as @Any@, and the parent can decide which
436 -- register to put it in.
439 = Fixed MachRep Reg InstrBlock
440 | Any MachRep (Reg -> InstrBlock)
442 swizzleRegisterRep :: Register -> MachRep -> Register
443 swizzleRegisterRep (Fixed _ reg code) rep = Fixed rep reg code
444 swizzleRegisterRep (Any _ codefn) rep = Any rep codefn
447 -- -----------------------------------------------------------------------------
448 -- Utils based on getRegister, below
450 -- The dual to getAnyReg: compute an expression into a register, but
451 -- we don't mind which one it is.
452 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
454 r <- getRegister expr
457 tmp <- getNewRegNat rep
458 return (tmp, code tmp)
462 -- -----------------------------------------------------------------------------
463 -- Grab the Reg for a CmmReg
465 getRegisterReg :: CmmReg -> Reg
467 getRegisterReg (CmmLocal (LocalReg u pk))
470 getRegisterReg (CmmGlobal mid)
471 = case get_GlobalReg_reg_or_addr mid of
472 Left (RealReg rrno) -> RealReg rrno
473 _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
474 -- By this stage, the only MagicIds remaining should be the
475 -- ones which map to a real machine register on this
476 -- platform. Hence ...
479 -- -----------------------------------------------------------------------------
480 -- Generate code to get a subtree into a Register
482 -- Don't delete this -- it's very handy for debugging.
484 -- | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False
485 -- = panic "getRegister(???)"
487 getRegister :: CmmExpr -> NatM Register
489 getRegister (CmmReg (CmmGlobal PicBaseReg))
491 reg <- getPicBaseNat wordRep
492 return (Fixed wordRep reg nilOL)
494 getRegister (CmmReg reg)
495 = return (Fixed (cmmRegRep reg) (getRegisterReg reg) nilOL)
497 getRegister tree@(CmmRegOff _ _)
498 = getRegister (mangleIndexTree tree)
501 #if WORD_SIZE_IN_BITS==32
502 -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
503 -- TO_W_(x), TO_W_(x >> 32)
505 getRegister (CmmMachOp (MO_U_Conv I64 I32)
506 [CmmMachOp (MO_U_Shr I64) [x,CmmLit (CmmInt 32 _)]]) = do
507 ChildCode64 code rlo <- iselExpr64 x
508 return $ Fixed I32 (getHiVRegFromLo rlo) code
510 getRegister (CmmMachOp (MO_S_Conv I64 I32)
511 [CmmMachOp (MO_U_Shr I64) [x,CmmLit (CmmInt 32 _)]]) = do
512 ChildCode64 code rlo <- iselExpr64 x
513 return $ Fixed I32 (getHiVRegFromLo rlo) code
515 getRegister (CmmMachOp (MO_U_Conv I64 I32) [x]) = do
516 ChildCode64 code rlo <- iselExpr64 x
517 return $ Fixed I32 rlo code
519 getRegister (CmmMachOp (MO_S_Conv I64 I32) [x]) = do
520 ChildCode64 code rlo <- iselExpr64 x
521 return $ Fixed I32 rlo code
525 -- end of machine-"independent" bit; here we go on the rest...
527 #if alpha_TARGET_ARCH
529 getRegister (StDouble d)
530 = getBlockIdNat `thenNat` \ lbl ->
531 getNewRegNat PtrRep `thenNat` \ tmp ->
532 let code dst = mkSeqInstrs [
533 LDATA RoDataSegment lbl [
534 DATA TF [ImmLab (rational d)]
536 LDA tmp (AddrImm (ImmCLbl lbl)),
537 LD TF dst (AddrReg tmp)]
539 return (Any F64 code)
541 getRegister (StPrim primop [x]) -- unary PrimOps
543 IntNegOp -> trivialUCode (NEG Q False) x
545 NotOp -> trivialUCode NOT x
547 FloatNegOp -> trivialUFCode FloatRep (FNEG TF) x
548 DoubleNegOp -> trivialUFCode F64 (FNEG TF) x
550 OrdOp -> coerceIntCode IntRep x
553 Float2IntOp -> coerceFP2Int x
554 Int2FloatOp -> coerceInt2FP pr x
555 Double2IntOp -> coerceFP2Int x
556 Int2DoubleOp -> coerceInt2FP pr x
558 Double2FloatOp -> coerceFltCode x
559 Float2DoubleOp -> coerceFltCode x
561 other_op -> getRegister (StCall fn CCallConv F64 [x])
563 fn = case other_op of
564 FloatExpOp -> FSLIT("exp")
565 FloatLogOp -> FSLIT("log")
566 FloatSqrtOp -> FSLIT("sqrt")
567 FloatSinOp -> FSLIT("sin")
568 FloatCosOp -> FSLIT("cos")
569 FloatTanOp -> FSLIT("tan")
570 FloatAsinOp -> FSLIT("asin")
571 FloatAcosOp -> FSLIT("acos")
572 FloatAtanOp -> FSLIT("atan")
573 FloatSinhOp -> FSLIT("sinh")
574 FloatCoshOp -> FSLIT("cosh")
575 FloatTanhOp -> FSLIT("tanh")
576 DoubleExpOp -> FSLIT("exp")
577 DoubleLogOp -> FSLIT("log")
578 DoubleSqrtOp -> FSLIT("sqrt")
579 DoubleSinOp -> FSLIT("sin")
580 DoubleCosOp -> FSLIT("cos")
581 DoubleTanOp -> FSLIT("tan")
582 DoubleAsinOp -> FSLIT("asin")
583 DoubleAcosOp -> FSLIT("acos")
584 DoubleAtanOp -> FSLIT("atan")
585 DoubleSinhOp -> FSLIT("sinh")
586 DoubleCoshOp -> FSLIT("cosh")
587 DoubleTanhOp -> FSLIT("tanh")
589 pr = panic "MachCode.getRegister: no primrep needed for Alpha"
591 getRegister (StPrim primop [x, y]) -- dyadic PrimOps
593 CharGtOp -> trivialCode (CMP LTT) y x
594 CharGeOp -> trivialCode (CMP LE) y x
595 CharEqOp -> trivialCode (CMP EQQ) x y
596 CharNeOp -> int_NE_code x y
597 CharLtOp -> trivialCode (CMP LTT) x y
598 CharLeOp -> trivialCode (CMP LE) x y
600 IntGtOp -> trivialCode (CMP LTT) y x
601 IntGeOp -> trivialCode (CMP LE) y x
602 IntEqOp -> trivialCode (CMP EQQ) x y
603 IntNeOp -> int_NE_code x y
604 IntLtOp -> trivialCode (CMP LTT) x y
605 IntLeOp -> trivialCode (CMP LE) x y
607 WordGtOp -> trivialCode (CMP ULT) y x
608 WordGeOp -> trivialCode (CMP ULE) x y
609 WordEqOp -> trivialCode (CMP EQQ) x y
610 WordNeOp -> int_NE_code x y
611 WordLtOp -> trivialCode (CMP ULT) x y
612 WordLeOp -> trivialCode (CMP ULE) x y
614 AddrGtOp -> trivialCode (CMP ULT) y x
615 AddrGeOp -> trivialCode (CMP ULE) y x
616 AddrEqOp -> trivialCode (CMP EQQ) x y
617 AddrNeOp -> int_NE_code x y
618 AddrLtOp -> trivialCode (CMP ULT) x y
619 AddrLeOp -> trivialCode (CMP ULE) x y
621 FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
622 FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
623 FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
624 FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
625 FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
626 FloatLeOp -> cmpF_code (FCMP TF LE) NE x y
628 DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
629 DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
630 DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
631 DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
632 DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
633 DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y
635 IntAddOp -> trivialCode (ADD Q False) x y
636 IntSubOp -> trivialCode (SUB Q False) x y
637 IntMulOp -> trivialCode (MUL Q False) x y
638 IntQuotOp -> trivialCode (DIV Q False) x y
639 IntRemOp -> trivialCode (REM Q False) x y
641 WordAddOp -> trivialCode (ADD Q False) x y
642 WordSubOp -> trivialCode (SUB Q False) x y
643 WordMulOp -> trivialCode (MUL Q False) x y
644 WordQuotOp -> trivialCode (DIV Q True) x y
645 WordRemOp -> trivialCode (REM Q True) x y
647 FloatAddOp -> trivialFCode FloatRep (FADD TF) x y
648 FloatSubOp -> trivialFCode FloatRep (FSUB TF) x y
649 FloatMulOp -> trivialFCode FloatRep (FMUL TF) x y
650 FloatDivOp -> trivialFCode FloatRep (FDIV TF) x y
652 DoubleAddOp -> trivialFCode F64 (FADD TF) x y
653 DoubleSubOp -> trivialFCode F64 (FSUB TF) x y
654 DoubleMulOp -> trivialFCode F64 (FMUL TF) x y
655 DoubleDivOp -> trivialFCode F64 (FDIV TF) x y
657 AddrAddOp -> trivialCode (ADD Q False) x y
658 AddrSubOp -> trivialCode (SUB Q False) x y
659 AddrRemOp -> trivialCode (REM Q True) x y
661 AndOp -> trivialCode AND x y
662 OrOp -> trivialCode OR x y
663 XorOp -> trivialCode XOR x y
664 SllOp -> trivialCode SLL x y
665 SrlOp -> trivialCode SRL x y
667 ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
668 ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
669 ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"
671 FloatPowerOp -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
672 DoublePowerOp -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
674 {- ------------------------------------------------------------
675 Some bizarre special code for getting condition codes into
676 registers. Integer non-equality is a test for equality
677 followed by an XOR with 1. (Integer comparisons always set
678 the result register to 0 or 1.) Floating point comparisons of
679 any kind leave the result in a floating point register, so we
680 need to wrangle an integer register out of things.
682 int_NE_code :: StixTree -> StixTree -> NatM Register
685 = trivialCode (CMP EQQ) x y `thenNat` \ register ->
686 getNewRegNat IntRep `thenNat` \ tmp ->
688 code = registerCode register tmp
689 src = registerName register tmp
690 code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
692 return (Any IntRep code__2)
694 {- ------------------------------------------------------------
695 Comments for int_NE_code also apply to cmpF_code
698 :: (Reg -> Reg -> Reg -> Instr)
700 -> StixTree -> StixTree
703 cmpF_code instr cond x y
704 = trivialFCode pr instr x y `thenNat` \ register ->
705 getNewRegNat F64 `thenNat` \ tmp ->
706 getBlockIdNat `thenNat` \ lbl ->
708 code = registerCode register tmp
709 result = registerName register tmp
711 code__2 dst = code . mkSeqInstrs [
712 OR zeroh (RIImm (ImmInt 1)) dst,
713 BF cond result (ImmCLbl lbl),
714 OR zeroh (RIReg zeroh) dst,
717 return (Any IntRep code__2)
719 pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
720 ------------------------------------------------------------
722 getRegister (CmmLoad pk mem)
723 = getAmode mem `thenNat` \ amode ->
725 code = amodeCode amode
726 src = amodeAddr amode
727 size = primRepToSize pk
728 code__2 dst = code . mkSeqInstr (LD size dst src)
730 return (Any pk code__2)
732 getRegister (StInt i)
735 code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
737 return (Any IntRep code)
740 code dst = mkSeqInstr (LDI Q dst src)
742 return (Any IntRep code)
744 src = ImmInt (fromInteger i)
749 code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
751 return (Any PtrRep code)
754 imm__2 = case imm of Just x -> x
756 #endif /* alpha_TARGET_ARCH */
758 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
762 getRegister (CmmLit (CmmFloat f F32)) = do
763 lbl <- getNewLabelNat
764 dynRef <- cmmMakeDynamicReference addImportNat False lbl
765 Amode addr addr_code <- getAmode dynRef
769 CmmStaticLit (CmmFloat f F32)]
770 `consOL` (addr_code `snocOL`
773 return (Any F32 code)
776 getRegister (CmmLit (CmmFloat d F64))
778 = let code dst = unitOL (GLDZ dst)
779 in return (Any F64 code)
782 = let code dst = unitOL (GLD1 dst)
783 in return (Any F64 code)
786 lbl <- getNewLabelNat
787 dynRef <- cmmMakeDynamicReference addImportNat False lbl
788 Amode addr addr_code <- getAmode dynRef
792 CmmStaticLit (CmmFloat d F64)]
793 `consOL` (addr_code `snocOL`
796 return (Any F64 code)
798 #endif /* i386_TARGET_ARCH */
800 #if x86_64_TARGET_ARCH
802 getRegister (CmmLit (CmmFloat 0.0 rep)) = do
803 let code dst = unitOL (XOR rep (OpReg dst) (OpReg dst))
804 -- I don't know why there are xorpd, xorps, and pxor instructions.
805 -- They all appear to do the same thing --SDM
806 return (Any rep code)
808 getRegister (CmmLit (CmmFloat f rep)) = do
809 lbl <- getNewLabelNat
810 let code dst = toOL [
813 CmmStaticLit (CmmFloat f rep)],
814 MOV rep (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
817 return (Any rep code)
819 #endif /* x86_64_TARGET_ARCH */
821 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
823 -- catch simple cases of zero- or sign-extended load
824 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad addr _]) = do
825 code <- intLoadCode (MOVZxL I8) addr
826 return (Any I32 code)
828 getRegister (CmmMachOp (MO_S_Conv I8 I32) [CmmLoad addr _]) = do
829 code <- intLoadCode (MOVSxL I8) addr
830 return (Any I32 code)
832 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad addr _]) = do
833 code <- intLoadCode (MOVZxL I16) addr
834 return (Any I32 code)
836 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad addr _]) = do
837 code <- intLoadCode (MOVSxL I16) addr
838 return (Any I32 code)
842 #if x86_64_TARGET_ARCH
844 -- catch simple cases of zero- or sign-extended load
845 getRegister (CmmMachOp (MO_U_Conv I8 I64) [CmmLoad addr _]) = do
846 code <- intLoadCode (MOVZxL I8) addr
847 return (Any I64 code)
849 getRegister (CmmMachOp (MO_S_Conv I8 I64) [CmmLoad addr _]) = do
850 code <- intLoadCode (MOVSxL I8) addr
851 return (Any I64 code)
853 getRegister (CmmMachOp (MO_U_Conv I16 I64) [CmmLoad addr _]) = do
854 code <- intLoadCode (MOVZxL I16) addr
855 return (Any I64 code)
857 getRegister (CmmMachOp (MO_S_Conv I16 I64) [CmmLoad addr _]) = do
858 code <- intLoadCode (MOVSxL I16) addr
859 return (Any I64 code)
861 getRegister (CmmMachOp (MO_U_Conv I32 I64) [CmmLoad addr _]) = do
862 code <- intLoadCode (MOV I32) addr -- 32-bit loads zero-extend
863 return (Any I64 code)
865 getRegister (CmmMachOp (MO_S_Conv I32 I64) [CmmLoad addr _]) = do
866 code <- intLoadCode (MOVSxL I32) addr
867 return (Any I64 code)
871 #if x86_64_TARGET_ARCH
872 getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do
873 x_code <- getAnyReg x
874 lbl <- getNewLabelNat
876 code dst = x_code dst `appOL` toOL [
877 -- This is how gcc does it, so it can't be that bad:
878 LDATA ReadOnlyData16 [
881 CmmStaticLit (CmmInt 0x80000000 I32),
882 CmmStaticLit (CmmInt 0 I32),
883 CmmStaticLit (CmmInt 0 I32),
884 CmmStaticLit (CmmInt 0 I32)
886 XOR F32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
887 -- xorps, so we need the 128-bit constant
888 -- ToDo: rip-relative
891 return (Any F32 code)
893 getRegister (CmmMachOp (MO_S_Neg F64) [x]) = do
894 x_code <- getAnyReg x
895 lbl <- getNewLabelNat
897 -- This is how gcc does it, so it can't be that bad:
898 code dst = x_code dst `appOL` toOL [
899 LDATA ReadOnlyData16 [
902 CmmStaticLit (CmmInt 0x8000000000000000 I64),
903 CmmStaticLit (CmmInt 0 I64)
905 -- gcc puts an unpck here. Wonder if we need it.
906 XOR F64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
907 -- xorpd, so we need the 128-bit constant
910 return (Any F64 code)
913 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
915 getRegister (CmmMachOp mop [x]) -- unary MachOps
918 MO_S_Neg F32 -> trivialUFCode F32 (GNEG F32) x
919 MO_S_Neg F64 -> trivialUFCode F64 (GNEG F64) x
922 MO_S_Neg rep -> trivialUCode rep (NEGI rep) x
923 MO_Not rep -> trivialUCode rep (NOT rep) x
926 MO_U_Conv I32 I8 -> toI8Reg I32 x
927 MO_S_Conv I32 I8 -> toI8Reg I32 x
928 MO_U_Conv I16 I8 -> toI8Reg I16 x
929 MO_S_Conv I16 I8 -> toI8Reg I16 x
930 MO_U_Conv I32 I16 -> toI16Reg I32 x
931 MO_S_Conv I32 I16 -> toI16Reg I32 x
932 #if x86_64_TARGET_ARCH
933 MO_U_Conv I64 I32 -> conversionNop I64 x
934 MO_S_Conv I64 I32 -> conversionNop I64 x
935 MO_U_Conv I64 I16 -> toI16Reg I64 x
936 MO_S_Conv I64 I16 -> toI16Reg I64 x
937 MO_U_Conv I64 I8 -> toI8Reg I64 x
938 MO_S_Conv I64 I8 -> toI8Reg I64 x
941 MO_U_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
942 MO_S_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
945 MO_U_Conv I8 I32 -> integerExtend I8 I32 MOVZxL x
946 MO_U_Conv I16 I32 -> integerExtend I16 I32 MOVZxL x
947 MO_U_Conv I8 I16 -> integerExtend I8 I16 MOVZxL x
949 MO_S_Conv I8 I32 -> integerExtend I8 I32 MOVSxL x
950 MO_S_Conv I16 I32 -> integerExtend I16 I32 MOVSxL x
951 MO_S_Conv I8 I16 -> integerExtend I8 I16 MOVSxL x
953 #if x86_64_TARGET_ARCH
954 MO_U_Conv I8 I64 -> integerExtend I8 I64 MOVZxL x
955 MO_U_Conv I16 I64 -> integerExtend I16 I64 MOVZxL x
956 MO_U_Conv I32 I64 -> integerExtend I32 I64 MOVZxL x
957 MO_S_Conv I8 I64 -> integerExtend I8 I64 MOVSxL x
958 MO_S_Conv I16 I64 -> integerExtend I16 I64 MOVSxL x
959 MO_S_Conv I32 I64 -> integerExtend I32 I64 MOVSxL x
960 -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
961 -- However, we don't want the register allocator to throw it
962 -- away as an unnecessary reg-to-reg move, so we keep it in
963 -- the form of a movzl and print it as a movl later.
967 MO_S_Conv F32 F64 -> conversionNop F64 x
968 MO_S_Conv F64 F32 -> conversionNop F32 x
970 MO_S_Conv F32 F64 -> coerceFP2FP F64 x
971 MO_S_Conv F64 F32 -> coerceFP2FP F32 x
975 | isFloatingRep from -> coerceFP2Int from to x
976 | isFloatingRep to -> coerceInt2FP from to x
978 other -> pprPanic "getRegister" (pprMachOp mop)
980 -- signed or unsigned extension.
981 integerExtend from to instr expr = do
982 (reg,e_code) <- if from == I8 then getByteReg expr
987 instr from (OpReg reg) (OpReg dst)
991 = do codefn <- getAnyReg expr
992 return (Any new_rep codefn)
993 -- HACK: use getAnyReg to get a byte-addressable register.
994 -- If the source was a Fixed register, this will add the
995 -- mov instruction to put it into the desired destination.
996 -- We're assuming that the destination won't be a fixed
997 -- non-byte-addressable register; it won't be, because all
998 -- fixed registers are word-sized.
1000 toI16Reg = toI8Reg -- for now
1002 conversionNop new_rep expr
1003 = do e_code <- getRegister expr
1004 return (swizzleRegisterRep e_code new_rep)
1007 getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
1008 = ASSERT2(cmmExprRep x /= I8, pprExpr e)
1010 MO_Eq F32 -> condFltReg EQQ x y
1011 MO_Ne F32 -> condFltReg NE x y
1012 MO_S_Gt F32 -> condFltReg GTT x y
1013 MO_S_Ge F32 -> condFltReg GE x y
1014 MO_S_Lt F32 -> condFltReg LTT x y
1015 MO_S_Le F32 -> condFltReg LE x y
1017 MO_Eq F64 -> condFltReg EQQ x y
1018 MO_Ne F64 -> condFltReg NE x y
1019 MO_S_Gt F64 -> condFltReg GTT x y
1020 MO_S_Ge F64 -> condFltReg GE x y
1021 MO_S_Lt F64 -> condFltReg LTT x y
1022 MO_S_Le F64 -> condFltReg LE x y
1024 MO_Eq rep -> condIntReg EQQ x y
1025 MO_Ne rep -> condIntReg NE x y
1027 MO_S_Gt rep -> condIntReg GTT x y
1028 MO_S_Ge rep -> condIntReg GE x y
1029 MO_S_Lt rep -> condIntReg LTT x y
1030 MO_S_Le rep -> condIntReg LE x y
1032 MO_U_Gt rep -> condIntReg GU x y
1033 MO_U_Ge rep -> condIntReg GEU x y
1034 MO_U_Lt rep -> condIntReg LU x y
1035 MO_U_Le rep -> condIntReg LEU x y
1037 #if i386_TARGET_ARCH
1038 MO_Add F32 -> trivialFCode F32 GADD x y
1039 MO_Sub F32 -> trivialFCode F32 GSUB x y
1041 MO_Add F64 -> trivialFCode F64 GADD x y
1042 MO_Sub F64 -> trivialFCode F64 GSUB x y
1044 MO_S_Quot F32 -> trivialFCode F32 GDIV x y
1045 MO_S_Quot F64 -> trivialFCode F64 GDIV x y
1048 #if x86_64_TARGET_ARCH
1049 MO_Add F32 -> trivialFCode F32 ADD x y
1050 MO_Sub F32 -> trivialFCode F32 SUB x y
1052 MO_Add F64 -> trivialFCode F64 ADD x y
1053 MO_Sub F64 -> trivialFCode F64 SUB x y
1055 MO_S_Quot F32 -> trivialFCode F32 FDIV x y
1056 MO_S_Quot F64 -> trivialFCode F64 FDIV x y
1059 MO_Add rep -> add_code rep x y
1060 MO_Sub rep -> sub_code rep x y
1062 MO_S_Quot rep -> div_code rep True True x y
1063 MO_S_Rem rep -> div_code rep True False x y
1064 MO_U_Quot rep -> div_code rep False True x y
1065 MO_U_Rem rep -> div_code rep False False x y
1067 #if i386_TARGET_ARCH
1068 MO_Mul F32 -> trivialFCode F32 GMUL x y
1069 MO_Mul F64 -> trivialFCode F64 GMUL x y
1072 #if x86_64_TARGET_ARCH
1073 MO_Mul F32 -> trivialFCode F32 MUL x y
1074 MO_Mul F64 -> trivialFCode F64 MUL x y
1077 MO_Mul rep -> let op = IMUL rep in
1078 trivialCode rep op (Just op) x y
1080 MO_S_MulMayOflo rep -> imulMayOflo rep x y
1082 MO_And rep -> let op = AND rep in
1083 trivialCode rep op (Just op) x y
1084 MO_Or rep -> let op = OR rep in
1085 trivialCode rep op (Just op) x y
1086 MO_Xor rep -> let op = XOR rep in
1087 trivialCode rep op (Just op) x y
1089 {- Shift ops on x86s have constraints on their source, it
1090 either has to be Imm, CL or 1
1091 => trivialCode is not restrictive enough (sigh.)
1093 MO_Shl rep -> shift_code rep (SHL rep) x y {-False-}
1094 MO_U_Shr rep -> shift_code rep (SHR rep) x y {-False-}
1095 MO_S_Shr rep -> shift_code rep (SAR rep) x y {-False-}
1097 other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
1099 --------------------
1100 imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1101 imulMayOflo rep a b = do
1102 (a_reg, a_code) <- getNonClobberedReg a
1103 b_code <- getAnyReg b
1105 shift_amt = case rep of
1108 _ -> panic "shift_amt"
1110 code = a_code `appOL` b_code eax `appOL`
1112 IMUL2 rep (OpReg a_reg), -- result in %edx:%eax
1113 SAR rep (OpImm (ImmInt shift_amt)) (OpReg eax),
1114 -- sign extend lower part
1115 SUB rep (OpReg edx) (OpReg eax)
1116 -- compare against upper
1117 -- eax==0 if high part == sign extended low part
1120 return (Fixed rep eax code)
1122 --------------------
1123 shift_code :: MachRep
1124 -> (Operand -> Operand -> Instr)
1129 {- Case1: shift length as immediate -}
1130 shift_code rep instr x y@(CmmLit lit) = do
1131 x_code <- getAnyReg x
1134 = x_code dst `snocOL`
1135 instr (OpImm (litToImm lit)) (OpReg dst)
1137 return (Any rep code)
1139 {- Case2: shift length is complex (non-immediate) -}
1140 shift_code rep instr x y{-amount-} = do
1141 (x_reg, x_code) <- getNonClobberedReg x
1142 y_code <- getAnyReg y
1144 code = x_code `appOL`
1146 instr (OpReg ecx) (OpReg x_reg)
1148 return (Fixed rep x_reg code)
1150 --------------------
1151 add_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1152 add_code rep x (CmmLit (CmmInt y _))
1153 | not (is64BitInteger y) = add_int rep x y
1154 add_code rep x y = trivialCode rep (ADD rep) (Just (ADD rep)) x y
1156 --------------------
1157 sub_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1158 sub_code rep x (CmmLit (CmmInt y _))
1159 | not (is64BitInteger (-y)) = add_int rep x (-y)
1160 sub_code rep x y = trivialCode rep (SUB rep) Nothing x y
1162 -- our three-operand add instruction:
1163 add_int rep x y = do
1164 (x_reg, x_code) <- getSomeReg x
1166 imm = ImmInt (fromInteger y)
1170 (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
1173 return (Any rep code)
1175 ----------------------
1176 div_code rep signed quotient x y = do
1177 (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
1178 x_code <- getAnyReg x
1180 widen | signed = CLTD rep
1181 | otherwise = XOR rep (OpReg edx) (OpReg edx)
1183 instr | signed = IDIV
1186 code = y_code `appOL`
1188 toOL [widen, instr rep y_op]
1190 result | quotient = eax
1194 return (Fixed rep result code)
1197 getRegister (CmmLoad mem pk)
1200 Amode src mem_code <- getAmode mem
1202 code dst = mem_code `snocOL`
1203 IF_ARCH_i386(GLD pk src dst,
1204 MOV pk (OpAddr src) (OpReg dst))
1206 return (Any pk code)
1208 #if i386_TARGET_ARCH
1209 getRegister (CmmLoad mem pk)
1212 code <- intLoadCode (instr pk) mem
1213 return (Any pk code)
1215 instr I8 = MOVZxL pk
1218 -- we always zero-extend 8-bit loads, if we
1219 -- can't think of anything better. This is because
1220 -- we can't guarantee access to an 8-bit variant of every register
1221 -- (esi and edi don't have 8-bit variants), so to make things
1222 -- simpler we do our 8-bit arithmetic with full 32-bit registers.
1225 #if x86_64_TARGET_ARCH
1226 -- Simpler memory load code on x86_64
1227 getRegister (CmmLoad mem pk)
1229 code <- intLoadCode (MOV pk) mem
1230 return (Any pk code)
1233 getRegister (CmmLit (CmmInt 0 rep))
1235 -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
1236 adj_rep = case rep of I64 -> I32; _ -> rep
1237 rep1 = IF_ARCH_i386( rep, adj_rep )
1239 = unitOL (XOR rep1 (OpReg dst) (OpReg dst))
1241 return (Any rep code)
1243 #if x86_64_TARGET_ARCH
1244 -- optimisation for loading small literals on x86_64: take advantage
1245 -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
1246 -- instruction forms are shorter.
1247 getRegister (CmmLit lit)
1248 | I64 <- cmmLitRep lit, not (isBigLit lit)
1251 code dst = unitOL (MOV I32 (OpImm imm) (OpReg dst))
1253 return (Any I64 code)
1255 isBigLit (CmmInt i I64) = i < 0 || i > 0xffffffff
1257 -- note1: not the same as is64BitLit, because that checks for
1258 -- signed literals that fit in 32 bits, but we want unsigned
1260 -- note2: all labels are small, because we're assuming the
1261 -- small memory model (see gcc docs, -mcmodel=small).
1264 getRegister (CmmLit lit)
1268 code dst = unitOL (MOV rep (OpImm imm) (OpReg dst))
1270 return (Any rep code)
1272 getRegister other = pprPanic "getRegister(x86)" (ppr other)
1275 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
1276 -> NatM (Reg -> InstrBlock)
1277 intLoadCode instr mem = do
1278 Amode src mem_code <- getAmode mem
1279 return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
1281 -- Compute an expression into *any* register, adding the appropriate
1282 -- move instruction if necessary.
1283 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
1285 r <- getRegister expr
1288 anyReg :: Register -> NatM (Reg -> InstrBlock)
1289 anyReg (Any _ code) = return code
1290 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
1292 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
1293 -- Fixed registers might not be byte-addressable, so we make sure we've
1294 -- got a temporary, inserting an extra reg copy if necessary.
1295 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
1296 #if x86_64_TARGET_ARCH
1297 getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
1299 getByteReg expr = do
1300 r <- getRegister expr
1303 tmp <- getNewRegNat rep
1304 return (tmp, code tmp)
1306 | isVirtualReg reg -> return (reg,code)
1308 tmp <- getNewRegNat rep
1309 return (tmp, code `snocOL` reg2reg rep reg tmp)
1310 -- ToDo: could optimise slightly by checking for byte-addressable
1311 -- real registers, but that will happen very rarely if at all.
1314 -- Another variant: this time we want the result in a register that cannot
1315 -- be modified by code to evaluate an arbitrary expression.
1316 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1317 getNonClobberedReg expr = do
1318 r <- getRegister expr
1321 tmp <- getNewRegNat rep
1322 return (tmp, code tmp)
1324 -- only free regs can be clobbered
1325 | RealReg rr <- reg, isFastTrue (freeReg rr) -> do
1326 tmp <- getNewRegNat rep
1327 return (tmp, code `snocOL` reg2reg rep reg tmp)
1331 reg2reg :: MachRep -> Reg -> Reg -> Instr
1333 #if i386_TARGET_ARCH
1334 | isFloatingRep rep = GMOV src dst
1336 | otherwise = MOV rep (OpReg src) (OpReg dst)
1338 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1340 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1342 #if sparc_TARGET_ARCH
1344 getRegister (CmmLit (CmmFloat f F32)) = do
1345 lbl <- getNewLabelNat
1346 let code dst = toOL [
1349 CmmStaticLit (CmmFloat f F32)],
1350 SETHI (HI (ImmCLbl lbl)) dst,
1351 LD F32 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1352 return (Any F32 code)
1354 getRegister (CmmLit (CmmFloat d F64)) = do
1355 lbl <- getNewLabelNat
1356 let code dst = toOL [
1359 CmmStaticLit (CmmFloat d F64)],
1360 SETHI (HI (ImmCLbl lbl)) dst,
1361 LD F64 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1362 return (Any F64 code)
1364 getRegister (CmmMachOp mop [x]) -- unary MachOps
1366 MO_S_Neg F32 -> trivialUFCode F32 (FNEG F32) x
1367 MO_S_Neg F64 -> trivialUFCode F64 (FNEG F64) x
1369 MO_S_Neg rep -> trivialUCode rep (SUB False False g0) x
1370 MO_Not rep -> trivialUCode rep (XNOR False g0) x
1372 MO_U_Conv I32 I8 -> trivialCode I8 (AND False) x (CmmLit (CmmInt 255 I8))
1374 MO_U_Conv F64 F32-> coerceDbl2Flt x
1375 MO_U_Conv F32 F64-> coerceFlt2Dbl x
1377 MO_S_Conv F32 I32-> coerceFP2Int F32 I32 x
1378 MO_S_Conv I32 F32-> coerceInt2FP I32 F32 x
1379 MO_S_Conv F64 I32-> coerceFP2Int F64 I32 x
1380 MO_S_Conv I32 F64-> coerceInt2FP I32 F64 x
1382 -- Conversions which are a nop on sparc
1384 | from == to -> conversionNop to x
1385 MO_U_Conv I32 to -> conversionNop to x
1386 MO_S_Conv I32 to -> conversionNop to x
1389 MO_U_Conv I8 I32 -> integerExtend False I8 I32 x
1390 MO_U_Conv I16 I32 -> integerExtend False I16 I32 x
1391 MO_U_Conv I8 I16 -> integerExtend False I8 I16 x
1392 MO_S_Conv I16 I32 -> integerExtend True I16 I32 x
1394 other_op -> panic "Unknown unary mach op"
1397 integerExtend signed from to expr = do
1398 (reg, e_code) <- getSomeReg expr
1402 ((if signed then SRA else SRL)
1403 reg (RIImm (ImmInt 0)) dst)
1404 return (Any to code)
1405 conversionNop new_rep expr
1406 = do e_code <- getRegister expr
1407 return (swizzleRegisterRep e_code new_rep)
1409 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1411 MO_Eq F32 -> condFltReg EQQ x y
1412 MO_Ne F32 -> condFltReg NE x y
1414 MO_S_Gt F32 -> condFltReg GTT x y
1415 MO_S_Ge F32 -> condFltReg GE x y
1416 MO_S_Lt F32 -> condFltReg LTT x y
1417 MO_S_Le F32 -> condFltReg LE x y
1419 MO_Eq F64 -> condFltReg EQQ x y
1420 MO_Ne F64 -> condFltReg NE x y
1422 MO_S_Gt F64 -> condFltReg GTT x y
1423 MO_S_Ge F64 -> condFltReg GE x y
1424 MO_S_Lt F64 -> condFltReg LTT x y
1425 MO_S_Le F64 -> condFltReg LE x y
1427 MO_Eq rep -> condIntReg EQQ x y
1428 MO_Ne rep -> condIntReg NE x y
1430 MO_S_Gt rep -> condIntReg GTT x y
1431 MO_S_Ge rep -> condIntReg GE x y
1432 MO_S_Lt rep -> condIntReg LTT x y
1433 MO_S_Le rep -> condIntReg LE x y
1435 MO_U_Gt I32 -> condIntReg GTT x y
1436 MO_U_Ge I32 -> condIntReg GE x y
1437 MO_U_Lt I32 -> condIntReg LTT x y
1438 MO_U_Le I32 -> condIntReg LE x y
1440 MO_U_Gt I16 -> condIntReg GU x y
1441 MO_U_Ge I16 -> condIntReg GEU x y
1442 MO_U_Lt I16 -> condIntReg LU x y
1443 MO_U_Le I16 -> condIntReg LEU x y
1445 MO_Add I32 -> trivialCode I32 (ADD False False) x y
1446 MO_Sub I32 -> trivialCode I32 (SUB False False) x y
1448 MO_S_MulMayOflo rep -> imulMayOflo rep x y
1450 -- ToDo: teach about V8+ SPARC div instructions
1451 MO_S_Quot I32 -> idiv FSLIT(".div") x y
1452 MO_S_Rem I32 -> idiv FSLIT(".rem") x y
1453 MO_U_Quot I32 -> idiv FSLIT(".udiv") x y
1454 MO_U_Rem I32 -> idiv FSLIT(".urem") x y
1456 MO_Add F32 -> trivialFCode F32 FADD x y
1457 MO_Sub F32 -> trivialFCode F32 FSUB x y
1458 MO_Mul F32 -> trivialFCode F32 FMUL x y
1459 MO_S_Quot F32 -> trivialFCode F32 FDIV x y
1461 MO_Add F64 -> trivialFCode F64 FADD x y
1462 MO_Sub F64 -> trivialFCode F64 FSUB x y
1463 MO_Mul F64 -> trivialFCode F64 FMUL x y
1464 MO_S_Quot F64 -> trivialFCode F64 FDIV x y
1466 MO_And rep -> trivialCode rep (AND False) x y
1467 MO_Or rep -> trivialCode rep (OR False) x y
1468 MO_Xor rep -> trivialCode rep (XOR False) x y
1470 MO_Mul rep -> trivialCode rep (SMUL False) x y
1472 MO_Shl rep -> trivialCode rep SLL x y
1473 MO_U_Shr rep -> trivialCode rep SRL x y
1474 MO_S_Shr rep -> trivialCode rep SRA x y
1477 MO_F32_Pwr -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1478 [promote x, promote y])
1479 where promote x = CmmMachOp MO_F32_to_Dbl [x]
1480 MO_F64_Pwr -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1483 other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
1485 --idiv fn x y = getRegister (StCall (Left fn) CCallConv I32 [x, y])
1487 --------------------
1488 imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1489 imulMayOflo rep a b = do
1490 (a_reg, a_code) <- getSomeReg a
1491 (b_reg, b_code) <- getSomeReg b
1492 res_lo <- getNewRegNat I32
1493 res_hi <- getNewRegNat I32
1495 shift_amt = case rep of
1498 _ -> panic "shift_amt"
1499 code dst = a_code `appOL` b_code `appOL`
1501 SMUL False a_reg (RIReg b_reg) res_lo,
1503 SRA res_lo (RIImm (ImmInt shift_amt)) res_lo,
1504 SUB False False res_lo (RIReg res_hi) dst
1506 return (Any I32 code)
1508 getRegister (CmmLoad mem pk) = do
1509 Amode src code <- getAmode mem
1511 code__2 dst = code `snocOL` LD pk src dst
1512 return (Any pk code__2)
1514 getRegister (CmmLit (CmmInt i _))
1517 src = ImmInt (fromInteger i)
1518 code dst = unitOL (OR False g0 (RIImm src) dst)
1520 return (Any I32 code)
1522 getRegister (CmmLit lit)
1523 = let rep = cmmLitRep lit
1527 OR False dst (RIImm (LO imm)) dst]
1528 in return (Any I32 code)
1530 #endif /* sparc_TARGET_ARCH */
1532 #if powerpc_TARGET_ARCH
1533 getRegister (CmmLoad mem pk)
1536 Amode addr addr_code <- getAmode mem
1537 let code dst = ASSERT((regClass dst == RcDouble) == isFloatingRep pk)
1538 addr_code `snocOL` LD pk dst addr
1539 return (Any pk code)
1541 -- catch simple cases of zero- or sign-extended load
1542 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad mem _]) = do
1543 Amode addr addr_code <- getAmode mem
1544 return (Any I32 (\dst -> addr_code `snocOL` LD I8 dst addr))
1546 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
1548 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad mem _]) = do
1549 Amode addr addr_code <- getAmode mem
1550 return (Any I32 (\dst -> addr_code `snocOL` LD I16 dst addr))
1552 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad mem _]) = do
1553 Amode addr addr_code <- getAmode mem
1554 return (Any I32 (\dst -> addr_code `snocOL` LA I16 dst addr))
1556 getRegister (CmmMachOp mop [x]) -- unary MachOps
1558 MO_Not rep -> trivialUCode rep NOT x
1560 MO_S_Conv F64 F32 -> trivialUCode F32 FRSP x
1561 MO_S_Conv F32 F64 -> conversionNop F64 x
1564 | from == to -> conversionNop to x
1565 | isFloatingRep from -> coerceFP2Int from to x
1566 | isFloatingRep to -> coerceInt2FP from to x
1568 -- narrowing is a nop: we treat the high bits as undefined
1569 MO_S_Conv I32 to -> conversionNop to x
1570 MO_S_Conv I16 I8 -> conversionNop I8 x
1571 MO_S_Conv I8 to -> trivialUCode to (EXTS I8) x
1572 MO_S_Conv I16 to -> trivialUCode to (EXTS I16) x
1575 | from == to -> conversionNop to x
1576 -- narrowing is a nop: we treat the high bits as undefined
1577 MO_U_Conv I32 to -> conversionNop to x
1578 MO_U_Conv I16 I8 -> conversionNop I8 x
1579 MO_U_Conv I8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 I32))
1580 MO_U_Conv I16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 I32))
1582 MO_S_Neg F32 -> trivialUCode F32 FNEG x
1583 MO_S_Neg F64 -> trivialUCode F64 FNEG x
1584 MO_S_Neg rep -> trivialUCode rep NEG x
1587 conversionNop new_rep expr
1588 = do e_code <- getRegister expr
1589 return (swizzleRegisterRep e_code new_rep)
1591 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1593 MO_Eq F32 -> condFltReg EQQ x y
1594 MO_Ne F32 -> condFltReg NE x y
1596 MO_S_Gt F32 -> condFltReg GTT x y
1597 MO_S_Ge F32 -> condFltReg GE x y
1598 MO_S_Lt F32 -> condFltReg LTT x y
1599 MO_S_Le F32 -> condFltReg LE x y
1601 MO_Eq F64 -> condFltReg EQQ x y
1602 MO_Ne F64 -> condFltReg NE x y
1604 MO_S_Gt F64 -> condFltReg GTT x y
1605 MO_S_Ge F64 -> condFltReg GE x y
1606 MO_S_Lt F64 -> condFltReg LTT x y
1607 MO_S_Le F64 -> condFltReg LE x y
1609 MO_Eq rep -> condIntReg EQQ (extendUExpr rep x) (extendUExpr rep y)
1610 MO_Ne rep -> condIntReg NE (extendUExpr rep x) (extendUExpr rep y)
1612 MO_S_Gt rep -> condIntReg GTT (extendSExpr rep x) (extendSExpr rep y)
1613 MO_S_Ge rep -> condIntReg GE (extendSExpr rep x) (extendSExpr rep y)
1614 MO_S_Lt rep -> condIntReg LTT (extendSExpr rep x) (extendSExpr rep y)
1615 MO_S_Le rep -> condIntReg LE (extendSExpr rep x) (extendSExpr rep y)
1617 MO_U_Gt rep -> condIntReg GU (extendUExpr rep x) (extendUExpr rep y)
1618 MO_U_Ge rep -> condIntReg GEU (extendUExpr rep x) (extendUExpr rep y)
1619 MO_U_Lt rep -> condIntReg LU (extendUExpr rep x) (extendUExpr rep y)
1620 MO_U_Le rep -> condIntReg LEU (extendUExpr rep x) (extendUExpr rep y)
1622 MO_Add F32 -> trivialCodeNoImm F32 (FADD F32) x y
1623 MO_Sub F32 -> trivialCodeNoImm F32 (FSUB F32) x y
1624 MO_Mul F32 -> trivialCodeNoImm F32 (FMUL F32) x y
1625 MO_S_Quot F32 -> trivialCodeNoImm F32 (FDIV F32) x y
1627 MO_Add F64 -> trivialCodeNoImm F64 (FADD F64) x y
1628 MO_Sub F64 -> trivialCodeNoImm F64 (FSUB F64) x y
1629 MO_Mul F64 -> trivialCodeNoImm F64 (FMUL F64) x y
1630 MO_S_Quot F64 -> trivialCodeNoImm F64 (FDIV F64) x y
1632 -- optimize addition with 32-bit immediate
1636 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate I32 True (-imm)
1637 -> trivialCode I32 True ADD x (CmmLit $ CmmInt imm immrep)
1640 (src, srcCode) <- getSomeReg x
1641 let imm = litToImm lit
1642 code dst = srcCode `appOL` toOL [
1643 ADDIS dst src (HA imm),
1644 ADD dst dst (RIImm (LO imm))
1646 return (Any I32 code)
1647 _ -> trivialCode I32 True ADD x y
1649 MO_Add rep -> trivialCode rep True ADD x y
1651 case y of -- subfi ('substract from' with immediate) doesn't exist
1652 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
1653 -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
1654 _ -> trivialCodeNoImm rep SUBF y x
1656 MO_Mul rep -> trivialCode rep True MULLW x y
1658 MO_S_MulMayOflo I32 -> trivialCodeNoImm I32 MULLW_MayOflo x y
1660 MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= I32): not implemented"
1661 MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
1663 MO_S_Quot rep -> trivialCodeNoImm rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1664 MO_U_Quot rep -> trivialCodeNoImm rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1666 MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1667 MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1669 MO_And rep -> trivialCode rep False AND x y
1670 MO_Or rep -> trivialCode rep False OR x y
1671 MO_Xor rep -> trivialCode rep False XOR x y
1673 MO_Shl rep -> trivialCode rep False SLW x y
1674 MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
1675 MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
1677 getRegister (CmmLit (CmmInt i rep))
1678 | Just imm <- makeImmediate rep True i
1680 code dst = unitOL (LI dst imm)
1682 return (Any rep code)
1684 getRegister (CmmLit (CmmFloat f frep)) = do
1685 lbl <- getNewLabelNat
1686 dynRef <- cmmMakeDynamicReference addImportNat False lbl
1687 Amode addr addr_code <- getAmode dynRef
1689 LDATA ReadOnlyData [CmmDataLabel lbl,
1690 CmmStaticLit (CmmFloat f frep)]
1691 `consOL` (addr_code `snocOL` LD frep dst addr)
1692 return (Any frep code)
1694 getRegister (CmmLit lit)
1695 = let rep = cmmLitRep lit
1699 OR dst dst (RIImm (LO imm))
1701 in return (Any rep code)
1703 getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
1705 -- extend?Rep: wrap integer expression of type rep
1706 -- in a conversion to I32
1707 extendSExpr I32 x = x
1708 extendSExpr rep x = CmmMachOp (MO_S_Conv rep I32) [x]
1709 extendUExpr I32 x = x
1710 extendUExpr rep x = CmmMachOp (MO_U_Conv rep I32) [x]
1712 #endif /* powerpc_TARGET_ARCH */
1715 -- -----------------------------------------------------------------------------
1716 -- The 'Amode' type: Memory addressing modes passed up the tree.
1718 data Amode = Amode AddrMode InstrBlock
1721 Now, given a tree (the argument to an CmmLoad) that references memory,
1722 produce a suitable addressing mode.
1724 A Rule of the Game (tm) for Amodes: use of the addr bit must
1725 immediately follow use of the code part, since the code part puts
1726 values in registers which the addr then refers to. So you can't put
1727 anything in between, lest it overwrite some of those registers. If
1728 you need to do some other computation between the code part and use of
1729 the addr bit, first store the effective address from the amode in a
1730 temporary, then do the other computation, and then use the temporary:
1734 ... other computation ...
1738 getAmode :: CmmExpr -> NatM Amode
1739 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
1741 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1743 #if alpha_TARGET_ARCH
1745 getAmode (StPrim IntSubOp [x, StInt i])
1746 = getNewRegNat PtrRep `thenNat` \ tmp ->
1747 getRegister x `thenNat` \ register ->
1749 code = registerCode register tmp
1750 reg = registerName register tmp
1751 off = ImmInt (-(fromInteger i))
1753 return (Amode (AddrRegImm reg off) code)
1755 getAmode (StPrim IntAddOp [x, StInt i])
1756 = getNewRegNat PtrRep `thenNat` \ tmp ->
1757 getRegister x `thenNat` \ register ->
1759 code = registerCode register tmp
1760 reg = registerName register tmp
1761 off = ImmInt (fromInteger i)
1763 return (Amode (AddrRegImm reg off) code)
1767 = return (Amode (AddrImm imm__2) id)
1770 imm__2 = case imm of Just x -> x
1773 = getNewRegNat PtrRep `thenNat` \ tmp ->
1774 getRegister other `thenNat` \ register ->
1776 code = registerCode register tmp
1777 reg = registerName register tmp
1779 return (Amode (AddrReg reg) code)
1781 #endif /* alpha_TARGET_ARCH */
1783 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1785 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1787 -- This is all just ridiculous, since it carefully undoes
1788 -- what mangleIndexTree has just done.
1789 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
1790 | not (is64BitLit lit)
1791 -- ASSERT(rep == I32)???
1792 = do (x_reg, x_code) <- getSomeReg x
1793 let off = ImmInt (-(fromInteger i))
1794 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1796 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
1797 | not (is64BitLit lit)
1798 -- ASSERT(rep == I32)???
1799 = do (x_reg, x_code) <- getSomeReg x
1800 let off = ImmInt (fromInteger i)
1801 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1803 -- Turn (lit1 << n + lit2) into (lit2 + lit1 << n) so it will be
1804 -- recognised by the next rule.
1805 getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1807 = getAmode (CmmMachOp (MO_Add rep) [b,a])
1809 getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
1810 [y, CmmLit (CmmInt shift _)]])
1811 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1812 = do (x_reg, x_code) <- getNonClobberedReg x
1813 -- x must be in a temp, because it has to stay live over y_code
1814 -- we could compre x_reg and y_reg and do something better here...
1815 (y_reg, y_code) <- getSomeReg y
1817 code = x_code `appOL` y_code
1818 base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
1819 return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt 0))
1822 getAmode (CmmLit lit) | not (is64BitLit lit)
1823 = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1826 (reg,code) <- getSomeReg expr
1827 return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
1829 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1831 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1833 #if sparc_TARGET_ARCH
1835 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)])
1838 (reg, code) <- getSomeReg x
1840 off = ImmInt (-(fromInteger i))
1841 return (Amode (AddrRegImm reg off) code)
1844 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)])
1847 (reg, code) <- getSomeReg x
1849 off = ImmInt (fromInteger i)
1850 return (Amode (AddrRegImm reg off) code)
1852 getAmode (CmmMachOp (MO_Add rep) [x, y])
1854 (regX, codeX) <- getSomeReg x
1855 (regY, codeY) <- getSomeReg y
1857 code = codeX `appOL` codeY
1858 return (Amode (AddrRegReg regX regY) code)
1860 -- XXX Is this same as "leaf" in Stix?
1861 getAmode (CmmLit lit)
1863 tmp <- getNewRegNat I32
1865 code = unitOL (SETHI (HI imm__2) tmp)
1866 return (Amode (AddrRegImm tmp (LO imm__2)) code)
1868 imm__2 = litToImm lit
1872 (reg, code) <- getSomeReg other
1875 return (Amode (AddrRegImm reg off) code)
1877 #endif /* sparc_TARGET_ARCH */
1879 #ifdef powerpc_TARGET_ARCH
1880 getAmode (CmmMachOp (MO_Sub I32) [x, CmmLit (CmmInt i _)])
1881 | Just off <- makeImmediate I32 True (-i)
1883 (reg, code) <- getSomeReg x
1884 return (Amode (AddrRegImm reg off) code)
1887 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit (CmmInt i _)])
1888 | Just off <- makeImmediate I32 True i
1890 (reg, code) <- getSomeReg x
1891 return (Amode (AddrRegImm reg off) code)
1893 -- optimize addition with 32-bit immediate
1895 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit lit])
1897 tmp <- getNewRegNat I32
1898 (src, srcCode) <- getSomeReg x
1899 let imm = litToImm lit
1900 code = srcCode `snocOL` ADDIS tmp src (HA imm)
1901 return (Amode (AddrRegImm tmp (LO imm)) code)
1903 getAmode (CmmLit lit)
1905 tmp <- getNewRegNat I32
1906 let imm = litToImm lit
1907 code = unitOL (LIS tmp (HA imm))
1908 return (Amode (AddrRegImm tmp (LO imm)) code)
1910 getAmode (CmmMachOp (MO_Add I32) [x, y])
1912 (regX, codeX) <- getSomeReg x
1913 (regY, codeY) <- getSomeReg y
1914 return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
1918 (reg, code) <- getSomeReg other
1921 return (Amode (AddrRegImm reg off) code)
1922 #endif /* powerpc_TARGET_ARCH */
1924 -- -----------------------------------------------------------------------------
1925 -- getOperand: sometimes any operand will do.
1927 -- getNonClobberedOperand: the value of the operand will remain valid across
1928 -- the computation of an arbitrary expression, unless the expression
1929 -- is computed directly into a register which the operand refers to
1930 -- (see trivialCode where this function is used for an example).
1932 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1934 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1935 #if x86_64_TARGET_ARCH
1936 getNonClobberedOperand (CmmLit lit)
1937 | isSuitableFloatingPointLit lit = do
1938 lbl <- getNewLabelNat
1939 let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
1941 return (OpAddr (ripRel (ImmCLbl lbl)), code)
1943 getNonClobberedOperand (CmmLit lit)
1944 | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) =
1945 return (OpImm (litToImm lit), nilOL)
1946 getNonClobberedOperand (CmmLoad mem pk)
1947 | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
1948 Amode src mem_code <- getAmode mem
1950 if (amodeCouldBeClobbered src)
1952 tmp <- getNewRegNat wordRep
1953 return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
1954 unitOL (LEA I32 (OpAddr src) (OpReg tmp)))
1957 return (OpAddr src', save_code `appOL` mem_code)
1958 getNonClobberedOperand e = do
1959 (reg, code) <- getNonClobberedReg e
1960 return (OpReg reg, code)
1962 amodeCouldBeClobbered :: AddrMode -> Bool
1963 amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
1965 regClobbered (RealReg rr) = isFastTrue (freeReg rr)
1966 regClobbered _ = False
1968 -- getOperand: the operand is not required to remain valid across the
1969 -- computation of an arbitrary expression.
1970 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1971 #if x86_64_TARGET_ARCH
1972 getOperand (CmmLit lit)
1973 | isSuitableFloatingPointLit lit = do
1974 lbl <- getNewLabelNat
1975 let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
1977 return (OpAddr (ripRel (ImmCLbl lbl)), code)
1979 getOperand (CmmLit lit)
1980 | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) = do
1981 return (OpImm (litToImm lit), nilOL)
1982 getOperand (CmmLoad mem pk)
1983 | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
1984 Amode src mem_code <- getAmode mem
1985 return (OpAddr src, mem_code)
1987 (reg, code) <- getSomeReg e
1988 return (OpReg reg, code)
1990 isOperand :: CmmExpr -> Bool
1991 isOperand (CmmLoad _ _) = True
1992 isOperand (CmmLit lit) = not (is64BitLit lit)
1993 || isSuitableFloatingPointLit lit
1996 -- if we want a floating-point literal as an operand, we can
1997 -- use it directly from memory. However, if the literal is
1998 -- zero, we're better off generating it into a register using
2000 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
2001 isSuitableFloatingPointLit _ = False
2003 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
2004 getRegOrMem (CmmLoad mem pk)
2005 | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2006 Amode src mem_code <- getAmode mem
2007 return (OpAddr src, mem_code)
2009 (reg, code) <- getNonClobberedReg e
2010 return (OpReg reg, code)
2012 #if x86_64_TARGET_ARCH
2013 is64BitLit (CmmInt i I64) = is64BitInteger i
2014 -- assume that labels are in the range 0-2^31-1: this assumes the
2015 -- small memory model (see gcc docs, -mcmodel=small).
2017 is64BitLit x = False
2020 is64BitInteger :: Integer -> Bool
2021 is64BitInteger i = i > 0x7fffffff || i < -0x80000000
2023 -- -----------------------------------------------------------------------------
2024 -- The 'CondCode' type: Condition codes passed up the tree.
2026 data CondCode = CondCode Bool Cond InstrBlock
2028 -- Set up a condition code for a conditional branch.
2030 getCondCode :: CmmExpr -> NatM CondCode
2032 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2034 #if alpha_TARGET_ARCH
2035 getCondCode = panic "MachCode.getCondCode: not on Alphas"
2036 #endif /* alpha_TARGET_ARCH */
2038 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2040 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
2041 -- yes, they really do seem to want exactly the same!
2043 getCondCode (CmmMachOp mop [x, y])
2044 = ASSERT (cmmExprRep x /= I8) -- tmp, not set up to handle 8-bit comparisons
2046 MO_Eq F32 -> condFltCode EQQ x y
2047 MO_Ne F32 -> condFltCode NE x y
2049 MO_S_Gt F32 -> condFltCode GTT x y
2050 MO_S_Ge F32 -> condFltCode GE x y
2051 MO_S_Lt F32 -> condFltCode LTT x y
2052 MO_S_Le F32 -> condFltCode LE x y
2054 MO_Eq F64 -> condFltCode EQQ x y
2055 MO_Ne F64 -> condFltCode NE x y
2057 MO_S_Gt F64 -> condFltCode GTT x y
2058 MO_S_Ge F64 -> condFltCode GE x y
2059 MO_S_Lt F64 -> condFltCode LTT x y
2060 MO_S_Le F64 -> condFltCode LE x y
2062 MO_Eq rep -> condIntCode EQQ x y
2063 MO_Ne rep -> condIntCode NE x y
2065 MO_S_Gt rep -> condIntCode GTT x y
2066 MO_S_Ge rep -> condIntCode GE x y
2067 MO_S_Lt rep -> condIntCode LTT x y
2068 MO_S_Le rep -> condIntCode LE x y
2070 MO_U_Gt rep -> condIntCode GU x y
2071 MO_U_Ge rep -> condIntCode GEU x y
2072 MO_U_Lt rep -> condIntCode LU x y
2073 MO_U_Le rep -> condIntCode LEU x y
2075 other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y]))
2077 getCondCode other = pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
2079 #elif powerpc_TARGET_ARCH
2081 -- almost the same as everywhere else - but we need to
2082 -- extend small integers to 32 bit first
2084 getCondCode (CmmMachOp mop [x, y])
2086 MO_Eq F32 -> condFltCode EQQ x y
2087 MO_Ne F32 -> condFltCode NE x y
2089 MO_S_Gt F32 -> condFltCode GTT x y
2090 MO_S_Ge F32 -> condFltCode GE x y
2091 MO_S_Lt F32 -> condFltCode LTT x y
2092 MO_S_Le F32 -> condFltCode LE x y
2094 MO_Eq F64 -> condFltCode EQQ x y
2095 MO_Ne F64 -> condFltCode NE x y
2097 MO_S_Gt F64 -> condFltCode GTT x y
2098 MO_S_Ge F64 -> condFltCode GE x y
2099 MO_S_Lt F64 -> condFltCode LTT x y
2100 MO_S_Le F64 -> condFltCode LE x y
2102 MO_Eq rep -> condIntCode EQQ (extendUExpr rep x) (extendUExpr rep y)
2103 MO_Ne rep -> condIntCode NE (extendUExpr rep x) (extendUExpr rep y)
2105 MO_S_Gt rep -> condIntCode GTT (extendSExpr rep x) (extendSExpr rep y)
2106 MO_S_Ge rep -> condIntCode GE (extendSExpr rep x) (extendSExpr rep y)
2107 MO_S_Lt rep -> condIntCode LTT (extendSExpr rep x) (extendSExpr rep y)
2108 MO_S_Le rep -> condIntCode LE (extendSExpr rep x) (extendSExpr rep y)
2110 MO_U_Gt rep -> condIntCode GU (extendUExpr rep x) (extendUExpr rep y)
2111 MO_U_Ge rep -> condIntCode GEU (extendUExpr rep x) (extendUExpr rep y)
2112 MO_U_Lt rep -> condIntCode LU (extendUExpr rep x) (extendUExpr rep y)
2113 MO_U_Le rep -> condIntCode LEU (extendUExpr rep x) (extendUExpr rep y)
2115 other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
2117 getCondCode other = panic "getCondCode(2)(powerpc)"
2123 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
2124 -- passed back up the tree.
2126 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
2128 #if alpha_TARGET_ARCH
2129 condIntCode = panic "MachCode.condIntCode: not on Alphas"
2130 condFltCode = panic "MachCode.condFltCode: not on Alphas"
2131 #endif /* alpha_TARGET_ARCH */
2133 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2134 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2136 -- memory vs immediate
2137 condIntCode cond (CmmLoad x pk) (CmmLit lit) | not (is64BitLit lit) = do
2138 Amode x_addr x_code <- getAmode x
2141 code = x_code `snocOL`
2142 CMP pk (OpImm imm) (OpAddr x_addr)
2144 return (CondCode False cond code)
2147 condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
2148 (x_reg, x_code) <- getSomeReg x
2150 code = x_code `snocOL`
2151 TEST pk (OpReg x_reg) (OpReg x_reg)
2153 return (CondCode False cond code)
2155 -- anything vs operand
2156 condIntCode cond x y | isOperand y = do
2157 (x_reg, x_code) <- getNonClobberedReg x
2158 (y_op, y_code) <- getOperand y
2160 code = x_code `appOL` y_code `snocOL`
2161 CMP (cmmExprRep x) y_op (OpReg x_reg)
2163 return (CondCode False cond code)
2165 -- anything vs anything
2166 condIntCode cond x y = do
2167 (y_reg, y_code) <- getNonClobberedReg y
2168 (x_op, x_code) <- getRegOrMem x
2170 code = y_code `appOL`
2172 CMP (cmmExprRep x) (OpReg y_reg) x_op
2174 return (CondCode False cond code)
2177 #if i386_TARGET_ARCH
2178 condFltCode cond x y
2179 = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
2180 (x_reg, x_code) <- getNonClobberedReg x
2181 (y_reg, y_code) <- getSomeReg y
2183 code = x_code `appOL` y_code `snocOL`
2184 GCMP cond x_reg y_reg
2185 -- The GCMP insn does the test and sets the zero flag if comparable
2186 -- and true. Hence we always supply EQQ as the condition to test.
2187 return (CondCode True EQQ code)
2188 #endif /* i386_TARGET_ARCH */
2190 #if x86_64_TARGET_ARCH
2191 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
2192 -- an operand, but the right must be a reg. We can probably do better
2193 -- than this general case...
2194 condFltCode cond x y = do
2195 (x_reg, x_code) <- getNonClobberedReg x
2196 (y_op, y_code) <- getOperand y
2198 code = x_code `appOL`
2200 CMP (cmmExprRep x) y_op (OpReg x_reg)
2201 -- NB(1): we need to use the unsigned comparison operators on the
2202 -- result of this comparison.
2204 return (CondCode True (condToUnsigned cond) code)
2207 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2209 #if sparc_TARGET_ARCH
2211 condIntCode cond x (CmmLit (CmmInt y rep))
2214 (src1, code) <- getSomeReg x
2216 src2 = ImmInt (fromInteger y)
2217 code' = code `snocOL` SUB False True src1 (RIImm src2) g0
2218 return (CondCode False cond code')
2220 condIntCode cond x y = do
2221 (src1, code1) <- getSomeReg x
2222 (src2, code2) <- getSomeReg y
2224 code__2 = code1 `appOL` code2 `snocOL`
2225 SUB False True src1 (RIReg src2) g0
2226 return (CondCode False cond code__2)
2229 condFltCode cond x y = do
2230 (src1, code1) <- getSomeReg x
2231 (src2, code2) <- getSomeReg y
2232 tmp <- getNewRegNat F64
2234 promote x = FxTOy F32 F64 x tmp
2241 code1 `appOL` code2 `snocOL`
2242 FCMP True pk1 src1 src2
2243 else if pk1 == F32 then
2244 code1 `snocOL` promote src1 `appOL` code2 `snocOL`
2245 FCMP True F64 tmp src2
2247 code1 `appOL` code2 `snocOL` promote src2 `snocOL`
2248 FCMP True F64 src1 tmp
2249 return (CondCode True cond code__2)
2251 #endif /* sparc_TARGET_ARCH */
2253 #if powerpc_TARGET_ARCH
2254 -- ###FIXME: I16 and I8!
2255 condIntCode cond x (CmmLit (CmmInt y rep))
2256 | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
2258 (src1, code) <- getSomeReg x
2260 code' = code `snocOL`
2261 (if condUnsigned cond then CMPL else CMP) I32 src1 (RIImm src2)
2262 return (CondCode False cond code')
2264 condIntCode cond x y = do
2265 (src1, code1) <- getSomeReg x
2266 (src2, code2) <- getSomeReg y
2268 code' = code1 `appOL` code2 `snocOL`
2269 (if condUnsigned cond then CMPL else CMP) I32 src1 (RIReg src2)
2270 return (CondCode False cond code')
2272 condFltCode cond x y = do
2273 (src1, code1) <- getSomeReg x
2274 (src2, code2) <- getSomeReg y
2276 code' = code1 `appOL` code2 `snocOL` FCMP src1 src2
2277 code'' = case cond of -- twiddle CR to handle unordered case
2278 GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
2279 LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
2282 ltbit = 0 ; eqbit = 2 ; gtbit = 1
2283 return (CondCode True cond code'')
2285 #endif /* powerpc_TARGET_ARCH */
2287 -- -----------------------------------------------------------------------------
2288 -- Generating assignments
2290 -- Assignments are really at the heart of the whole code generation
2291 -- business. Almost all top-level nodes of any real importance are
2292 -- assignments, which correspond to loads, stores, or register
2293 -- transfers. If we're really lucky, some of the register transfers
2294 -- will go away, because we can use the destination register to
2295 -- complete the code generation for the right hand side. This only
2296 -- fails when the right hand side is forced into a fixed register
2297 -- (e.g. the result of a call).
2299 assignMem_IntCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2300 assignReg_IntCode :: MachRep -> CmmReg -> CmmExpr -> NatM InstrBlock
2302 assignMem_FltCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2303 assignReg_FltCode :: MachRep -> CmmReg -> CmmExpr -> NatM InstrBlock
2305 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2307 #if alpha_TARGET_ARCH
2309 assignIntCode pk (CmmLoad dst _) src
2310 = getNewRegNat IntRep `thenNat` \ tmp ->
2311 getAmode dst `thenNat` \ amode ->
2312 getRegister src `thenNat` \ register ->
2314 code1 = amodeCode amode []
2315 dst__2 = amodeAddr amode
2316 code2 = registerCode register tmp []
2317 src__2 = registerName register tmp
2318 sz = primRepToSize pk
2319 code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2323 assignIntCode pk dst src
2324 = getRegister dst `thenNat` \ register1 ->
2325 getRegister src `thenNat` \ register2 ->
2327 dst__2 = registerName register1 zeroh
2328 code = registerCode register2 dst__2
2329 src__2 = registerName register2 dst__2
2330 code__2 = if isFixed register2
2331 then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2)
2336 #endif /* alpha_TARGET_ARCH */
2338 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2340 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2342 -- integer assignment to memory
2343 assignMem_IntCode pk addr src = do
2344 Amode addr code_addr <- getAmode addr
2345 (code_src, op_src) <- get_op_RI src
2347 code = code_src `appOL`
2349 MOV pk op_src (OpAddr addr)
2350 -- NOTE: op_src is stable, so it will still be valid
2351 -- after code_addr. This may involve the introduction
2352 -- of an extra MOV to a temporary register, but we hope
2353 -- the register allocator will get rid of it.
2357 get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand) -- code, operator
2358 get_op_RI (CmmLit lit) | not (is64BitLit lit)
2359 = return (nilOL, OpImm (litToImm lit))
2361 = do (reg,code) <- getNonClobberedReg op
2362 return (code, OpReg reg)
2365 -- Assign; dst is a reg, rhs is mem
2366 assignReg_IntCode pk reg (CmmLoad src _) = do
2367 load_code <- intLoadCode (MOV pk) src
2368 return (load_code (getRegisterReg reg))
2370 -- dst is a reg, but src could be anything
2371 assignReg_IntCode pk reg src = do
2372 code <- getAnyReg src
2373 return (code (getRegisterReg reg))
2375 #endif /* i386_TARGET_ARCH */
2377 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2379 #if sparc_TARGET_ARCH
2381 assignMem_IntCode pk addr src = do
2382 (srcReg, code) <- getSomeReg src
2383 Amode dstAddr addr_code <- getAmode addr
2384 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2386 assignReg_IntCode pk reg src = do
2387 r <- getRegister src
2389 Any _ code -> code dst
2390 Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg dst) freg
2392 dst = getRegisterReg reg
2395 #endif /* sparc_TARGET_ARCH */
2397 #if powerpc_TARGET_ARCH
2399 assignMem_IntCode pk addr src = do
2400 (srcReg, code) <- getSomeReg src
2401 Amode dstAddr addr_code <- getAmode addr
2402 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2404 -- dst is a reg, but src could be anything
2405 assignReg_IntCode pk reg src
2407 r <- getRegister src
2409 Any _ code -> code dst
2410 Fixed _ freg fcode -> fcode `snocOL` MR dst freg
2412 dst = getRegisterReg reg
2414 #endif /* powerpc_TARGET_ARCH */
2417 -- -----------------------------------------------------------------------------
2418 -- Floating-point assignments
2420 #if alpha_TARGET_ARCH
2422 assignFltCode pk (CmmLoad dst _) src
2423 = getNewRegNat pk `thenNat` \ tmp ->
2424 getAmode dst `thenNat` \ amode ->
2425 getRegister src `thenNat` \ register ->
2427 code1 = amodeCode amode []
2428 dst__2 = amodeAddr amode
2429 code2 = registerCode register tmp []
2430 src__2 = registerName register tmp
2431 sz = primRepToSize pk
2432 code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2436 assignFltCode pk dst src
2437 = getRegister dst `thenNat` \ register1 ->
2438 getRegister src `thenNat` \ register2 ->
2440 dst__2 = registerName register1 zeroh
2441 code = registerCode register2 dst__2
2442 src__2 = registerName register2 dst__2
2443 code__2 = if isFixed register2
2444 then code . mkSeqInstr (FMOV src__2 dst__2)
2449 #endif /* alpha_TARGET_ARCH */
2451 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2453 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2455 -- Floating point assignment to memory
2456 assignMem_FltCode pk addr src = do
2457 (src_reg, src_code) <- getNonClobberedReg src
2458 Amode addr addr_code <- getAmode addr
2460 code = src_code `appOL`
2462 IF_ARCH_i386(GST pk src_reg addr,
2463 MOV pk (OpReg src_reg) (OpAddr addr))
2466 -- Floating point assignment to a register/temporary
2467 assignReg_FltCode pk reg src = do
2468 src_code <- getAnyReg src
2469 return (src_code (getRegisterReg reg))
2471 #endif /* i386_TARGET_ARCH */
2473 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2475 #if sparc_TARGET_ARCH
2477 -- Floating point assignment to memory
2478 assignMem_FltCode pk addr src = do
2479 Amode dst__2 code1 <- getAmode addr
2480 (src__2, code2) <- getSomeReg src
2481 tmp1 <- getNewRegNat pk
2483 pk__2 = cmmExprRep src
2484 code__2 = code1 `appOL` code2 `appOL`
2486 then unitOL (ST pk src__2 dst__2)
2487 else toOL [FxTOy pk__2 pk src__2 tmp1, ST pk tmp1 dst__2]
2490 -- Floating point assignment to a register/temporary
2491 -- ToDo: Verify correctness
2492 assignReg_FltCode pk reg src = do
2493 r <- getRegister src
2494 v1 <- getNewRegNat pk
2496 Any _ code -> code dst
2497 Fixed _ freg fcode -> fcode `snocOL` FMOV pk freg v1
2499 dst = getRegisterReg reg
2501 #endif /* sparc_TARGET_ARCH */
2503 #if powerpc_TARGET_ARCH
2506 assignMem_FltCode = assignMem_IntCode
2507 assignReg_FltCode = assignReg_IntCode
2509 #endif /* powerpc_TARGET_ARCH */
2512 -- -----------------------------------------------------------------------------
2513 -- Generating an non-local jump
2515 -- (If applicable) Do not fill the delay slots here; you will confuse the
2516 -- register allocator.
2518 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
2520 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2522 #if alpha_TARGET_ARCH
2524 genJump (CmmLabel lbl)
2525 | isAsmTemp lbl = returnInstr (BR target)
2526 | otherwise = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0]
2528 target = ImmCLbl lbl
2531 = getRegister tree `thenNat` \ register ->
2532 getNewRegNat PtrRep `thenNat` \ tmp ->
2534 dst = registerName register pv
2535 code = registerCode register pv
2536 target = registerName register pv
2538 if isFixed register then
2539 returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0]
2541 return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0))
2543 #endif /* alpha_TARGET_ARCH */
2545 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2547 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2549 genJump (CmmLoad mem pk) = do
2550 Amode target code <- getAmode mem
2551 return (code `snocOL` JMP (OpAddr target))
2553 genJump (CmmLit lit) = do
2554 return (unitOL (JMP (OpImm (litToImm lit))))
2557 (reg,code) <- getSomeReg expr
2558 return (code `snocOL` JMP (OpReg reg))
2560 #endif /* i386_TARGET_ARCH */
2562 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2564 #if sparc_TARGET_ARCH
2566 genJump (CmmLit (CmmLabel lbl))
2567 = return (toOL [CALL (Left target) 0 True, NOP])
2569 target = ImmCLbl lbl
2573 (target, code) <- getSomeReg tree
2574 return (code `snocOL` JMP (AddrRegReg target g0) `snocOL` NOP)
2576 #endif /* sparc_TARGET_ARCH */
2578 #if powerpc_TARGET_ARCH
2579 genJump (CmmLit (CmmLabel lbl))
2580 = return (unitOL $ JMP lbl)
2584 (target,code) <- getSomeReg tree
2585 return (code `snocOL` MTCTR target `snocOL` BCTR [])
2586 #endif /* powerpc_TARGET_ARCH */
2589 -- -----------------------------------------------------------------------------
2590 -- Unconditional branches
2592 genBranch :: BlockId -> NatM InstrBlock
2594 genBranch = return . toOL . mkBranchInstr
2596 -- -----------------------------------------------------------------------------
2597 -- Conditional jumps
2600 Conditional jumps are always to local labels, so we can use branch
2601 instructions. We peek at the arguments to decide what kind of
2604 ALPHA: For comparisons with 0, we're laughing, because we can just do
2605 the desired conditional branch.
2607 I386: First, we have to ensure that the condition
2608 codes are set according to the supplied comparison operation.
2610 SPARC: First, we have to ensure that the condition codes are set
2611 according to the supplied comparison operation. We generate slightly
2612 different code for floating point comparisons, because a floating
2613 point operation cannot directly precede a @BF@. We assume the worst
2614 and fill that slot with a @NOP@.
2616 SPARC: Do not fill the delay slots here; you will confuse the register
2622 :: BlockId -- the branch target
2623 -> CmmExpr -- the condition on which to branch
2626 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2628 #if alpha_TARGET_ARCH
2630 genCondJump id (StPrim op [x, StInt 0])
2631 = getRegister x `thenNat` \ register ->
2632 getNewRegNat (registerRep register)
2635 code = registerCode register tmp
2636 value = registerName register tmp
2637 pk = registerRep register
2638 target = ImmCLbl lbl
2640 returnSeq code [BI (cmpOp op) value target]
2642 cmpOp CharGtOp = GTT
2644 cmpOp CharEqOp = EQQ
2646 cmpOp CharLtOp = LTT
2655 cmpOp WordGeOp = ALWAYS
2656 cmpOp WordEqOp = EQQ
2658 cmpOp WordLtOp = NEVER
2659 cmpOp WordLeOp = EQQ
2661 cmpOp AddrGeOp = ALWAYS
2662 cmpOp AddrEqOp = EQQ
2664 cmpOp AddrLtOp = NEVER
2665 cmpOp AddrLeOp = EQQ
2667 genCondJump lbl (StPrim op [x, StDouble 0.0])
2668 = getRegister x `thenNat` \ register ->
2669 getNewRegNat (registerRep register)
2672 code = registerCode register tmp
2673 value = registerName register tmp
2674 pk = registerRep register
2675 target = ImmCLbl lbl
2677 return (code . mkSeqInstr (BF (cmpOp op) value target))
2679 cmpOp FloatGtOp = GTT
2680 cmpOp FloatGeOp = GE
2681 cmpOp FloatEqOp = EQQ
2682 cmpOp FloatNeOp = NE
2683 cmpOp FloatLtOp = LTT
2684 cmpOp FloatLeOp = LE
2685 cmpOp DoubleGtOp = GTT
2686 cmpOp DoubleGeOp = GE
2687 cmpOp DoubleEqOp = EQQ
2688 cmpOp DoubleNeOp = NE
2689 cmpOp DoubleLtOp = LTT
2690 cmpOp DoubleLeOp = LE
2692 genCondJump lbl (StPrim op [x, y])
2694 = trivialFCode pr instr x y `thenNat` \ register ->
2695 getNewRegNat F64 `thenNat` \ tmp ->
2697 code = registerCode register tmp
2698 result = registerName register tmp
2699 target = ImmCLbl lbl
2701 return (code . mkSeqInstr (BF cond result target))
2703 pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
2705 fltCmpOp op = case op of
2719 (instr, cond) = case op of
2720 FloatGtOp -> (FCMP TF LE, EQQ)
2721 FloatGeOp -> (FCMP TF LTT, EQQ)
2722 FloatEqOp -> (FCMP TF EQQ, NE)
2723 FloatNeOp -> (FCMP TF EQQ, EQQ)
2724 FloatLtOp -> (FCMP TF LTT, NE)
2725 FloatLeOp -> (FCMP TF LE, NE)
2726 DoubleGtOp -> (FCMP TF LE, EQQ)
2727 DoubleGeOp -> (FCMP TF LTT, EQQ)
2728 DoubleEqOp -> (FCMP TF EQQ, NE)
2729 DoubleNeOp -> (FCMP TF EQQ, EQQ)
2730 DoubleLtOp -> (FCMP TF LTT, NE)
2731 DoubleLeOp -> (FCMP TF LE, NE)
2733 genCondJump lbl (StPrim op [x, y])
2734 = trivialCode instr x y `thenNat` \ register ->
2735 getNewRegNat IntRep `thenNat` \ tmp ->
2737 code = registerCode register tmp
2738 result = registerName register tmp
2739 target = ImmCLbl lbl
2741 return (code . mkSeqInstr (BI cond result target))
2743 (instr, cond) = case op of
2744 CharGtOp -> (CMP LE, EQQ)
2745 CharGeOp -> (CMP LTT, EQQ)
2746 CharEqOp -> (CMP EQQ, NE)
2747 CharNeOp -> (CMP EQQ, EQQ)
2748 CharLtOp -> (CMP LTT, NE)
2749 CharLeOp -> (CMP LE, NE)
2750 IntGtOp -> (CMP LE, EQQ)
2751 IntGeOp -> (CMP LTT, EQQ)
2752 IntEqOp -> (CMP EQQ, NE)
2753 IntNeOp -> (CMP EQQ, EQQ)
2754 IntLtOp -> (CMP LTT, NE)
2755 IntLeOp -> (CMP LE, NE)
2756 WordGtOp -> (CMP ULE, EQQ)
2757 WordGeOp -> (CMP ULT, EQQ)
2758 WordEqOp -> (CMP EQQ, NE)
2759 WordNeOp -> (CMP EQQ, EQQ)
2760 WordLtOp -> (CMP ULT, NE)
2761 WordLeOp -> (CMP ULE, NE)
2762 AddrGtOp -> (CMP ULE, EQQ)
2763 AddrGeOp -> (CMP ULT, EQQ)
2764 AddrEqOp -> (CMP EQQ, NE)
2765 AddrNeOp -> (CMP EQQ, EQQ)
2766 AddrLtOp -> (CMP ULT, NE)
2767 AddrLeOp -> (CMP ULE, NE)
2769 #endif /* alpha_TARGET_ARCH */
2771 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2773 #if i386_TARGET_ARCH
2775 genCondJump id bool = do
2776 CondCode _ cond code <- getCondCode bool
2777 return (code `snocOL` JXX cond id)
2781 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2783 #if x86_64_TARGET_ARCH
2785 genCondJump id bool = do
2786 CondCode is_float cond cond_code <- getCondCode bool
2789 return (cond_code `snocOL` JXX cond id)
2791 lbl <- getBlockIdNat
2793 -- see comment with condFltReg
2794 let code = case cond of
2800 plain_test = unitOL (
2803 or_unordered = toOL [
2807 and_ordered = toOL [
2813 return (cond_code `appOL` code)
2817 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2819 #if sparc_TARGET_ARCH
2821 genCondJump (BlockId id) bool = do
2822 CondCode is_float cond code <- getCondCode bool
2827 then [NOP, BF cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2828 else [BI cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2832 #endif /* sparc_TARGET_ARCH */
2835 #if powerpc_TARGET_ARCH
2837 genCondJump id bool = do
2838 CondCode is_float cond code <- getCondCode bool
2839 return (code `snocOL` BCC cond id)
2841 #endif /* powerpc_TARGET_ARCH */
2844 -- -----------------------------------------------------------------------------
2845 -- Generating C calls
2847 -- Now the biggest nightmare---calls. Most of the nastiness is buried in
2848 -- @get_arg@, which moves the arguments to the correct registers/stack
2849 -- locations. Apart from that, the code is easy.
2851 -- (If applicable) Do not fill the delay slots here; you will confuse the
2852 -- register allocator.
2855 :: CmmCallTarget -- function to call
2856 -> [(CmmReg,MachHint)] -- where to put the result
2857 -> [(CmmExpr,MachHint)] -- arguments (of mixed type)
2858 -> Maybe [GlobalReg] -- volatile regs to save
2861 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2863 #if alpha_TARGET_ARCH
2867 genCCall fn cconv result_regs args
2868 = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args
2869 `thenNat` \ ((unused,_), argCode) ->
2871 nRegs = length allArgRegs - length unused
2872 code = asmSeqThen (map ($ []) argCode)
2875 LDA pv (AddrImm (ImmLab (ptext fn))),
2876 JSR ra (AddrReg pv) nRegs,
2877 LDGP gp (AddrReg ra)]
2879 ------------------------
2880 {- Try to get a value into a specific register (or registers) for
2881 a call. The first 6 arguments go into the appropriate
2882 argument register (separate registers for integer and floating
2883 point arguments, but used in lock-step), and the remaining
2884 arguments are dumped to the stack, beginning at 0(sp). Our
2885 first argument is a pair of the list of remaining argument
2886 registers to be assigned for this call and the next stack
2887 offset to use for overflowing arguments. This way,
2888 @get_Arg@ can be applied to all of a call's arguments using
2892 :: ([(Reg,Reg)], Int) -- Argument registers and stack offset (accumulator)
2893 -> StixTree -- Current argument
2894 -> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code
2896 -- We have to use up all of our argument registers first...
2898 get_arg ((iDst,fDst):dsts, offset) arg
2899 = getRegister arg `thenNat` \ register ->
2901 reg = if isFloatingRep pk then fDst else iDst
2902 code = registerCode register reg
2903 src = registerName register reg
2904 pk = registerRep register
2907 if isFloatingRep pk then
2908 ((dsts, offset), if isFixed register then
2909 code . mkSeqInstr (FMOV src fDst)
2912 ((dsts, offset), if isFixed register then
2913 code . mkSeqInstr (OR src (RIReg src) iDst)
2916 -- Once we have run out of argument registers, we move to the
2919 get_arg ([], offset) arg
2920 = getRegister arg `thenNat` \ register ->
2921 getNewRegNat (registerRep register)
2924 code = registerCode register tmp
2925 src = registerName register tmp
2926 pk = registerRep register
2927 sz = primRepToSize pk
2929 return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset)))
2931 #endif /* alpha_TARGET_ARCH */
2933 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2935 #if i386_TARGET_ARCH
2937 genCCall (CmmPrim MO_WriteBarrier) _ _ _ = return nilOL
2938 -- write barrier compiles to no code on x86/x86-64;
2939 -- we keep it this long in order to prevent earlier optimisations.
2941 -- we only cope with a single result for foreign calls
2942 genCCall (CmmPrim op) [(r,_)] args vols = do
2944 MO_F32_Sqrt -> actuallyInlineFloatOp F32 (GSQRT F32) args
2945 MO_F64_Sqrt -> actuallyInlineFloatOp F64 (GSQRT F64) args
2947 MO_F32_Sin -> actuallyInlineFloatOp F32 (GSIN F32) args
2948 MO_F64_Sin -> actuallyInlineFloatOp F64 (GSIN F64) args
2950 MO_F32_Cos -> actuallyInlineFloatOp F32 (GCOS F32) args
2951 MO_F64_Cos -> actuallyInlineFloatOp F64 (GCOS F64) args
2953 MO_F32_Tan -> actuallyInlineFloatOp F32 (GTAN F32) args
2954 MO_F64_Tan -> actuallyInlineFloatOp F64 (GTAN F64) args
2956 other_op -> outOfLineFloatOp op r args vols
2958 actuallyInlineFloatOp rep instr [(x,_)]
2959 = do res <- trivialUFCode rep instr x
2961 return (any (getRegisterReg r))
2963 genCCall target dest_regs args vols = do
2965 sizes = map (arg_size . cmmExprRep . fst) (reverse args)
2966 #if !darwin_TARGET_OS
2967 tot_arg_size = sum sizes
2969 raw_arg_size = sum sizes
2970 tot_arg_size = roundTo 16 raw_arg_size
2971 arg_pad_size = tot_arg_size - raw_arg_size
2972 delta0 <- getDeltaNat
2973 setDeltaNat (delta0 - arg_pad_size)
2976 push_codes <- mapM push_arg (reverse args)
2977 delta <- getDeltaNat
2980 -- deal with static vs dynamic call targets
2981 (callinsns,cconv) <-
2984 CmmForeignCall (CmmLit (CmmLabel lbl)) conv
2985 -> -- ToDo: stdcall arg sizes
2986 return (unitOL (CALL (Left fn_imm) []), conv)
2987 where fn_imm = ImmCLbl lbl
2988 CmmForeignCall expr conv
2989 -> do (dyn_c, dyn_r, dyn_rep) <- get_op expr
2990 ASSERT(dyn_rep == I32)
2991 return (dyn_c `snocOL` CALL (Right dyn_r) [], conv)
2994 #if darwin_TARGET_OS
2996 = toOL [SUB I32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
2997 DELTA (delta0 - arg_pad_size)]
2998 `appOL` concatOL push_codes
3001 = concatOL push_codes
3002 call = callinsns `appOL`
3004 -- Deallocate parameters after call for ccall;
3005 -- but not for stdcall (callee does it)
3006 (if cconv == StdCallConv || tot_arg_size==0 then [] else
3007 [ADD I32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
3009 [DELTA (delta + tot_arg_size)]
3012 setDeltaNat (delta + tot_arg_size)
3015 -- assign the results, if necessary
3016 assign_code [] = nilOL
3017 assign_code [(dest,_hint)] =
3019 I64 -> toOL [MOV I32 (OpReg eax) (OpReg r_dest),
3020 MOV I32 (OpReg edx) (OpReg r_dest_hi)]
3021 F32 -> unitOL (GMOV fake0 r_dest)
3022 F64 -> unitOL (GMOV fake0 r_dest)
3023 rep -> unitOL (MOV rep (OpReg eax) (OpReg r_dest))
3025 r_dest_hi = getHiVRegFromLo r_dest
3026 rep = cmmRegRep dest
3027 r_dest = getRegisterReg dest
3028 assign_code many = panic "genCCall.assign_code many"
3030 return (push_code `appOL`
3032 assign_code dest_regs)
3040 roundTo a x | x `mod` a == 0 = x
3041 | otherwise = x + a - (x `mod` a)
3044 push_arg :: (CmmExpr,MachHint){-current argument-}
3045 -> NatM InstrBlock -- code
3047 push_arg (arg,_hint) -- we don't need the hints on x86
3048 | arg_rep == I64 = do
3049 ChildCode64 code r_lo <- iselExpr64 arg
3050 delta <- getDeltaNat
3051 setDeltaNat (delta - 8)
3053 r_hi = getHiVRegFromLo r_lo
3055 return ( code `appOL`
3056 toOL [PUSH I32 (OpReg r_hi), DELTA (delta - 4),
3057 PUSH I32 (OpReg r_lo), DELTA (delta - 8),
3062 (code, reg, sz) <- get_op arg
3063 delta <- getDeltaNat
3064 let size = arg_size sz
3065 setDeltaNat (delta-size)
3066 if (case sz of F64 -> True; F32 -> True; _ -> False)
3067 then return (code `appOL`
3068 toOL [SUB I32 (OpImm (ImmInt size)) (OpReg esp),
3070 GST sz reg (AddrBaseIndex (EABaseReg esp)
3074 else return (code `snocOL`
3075 PUSH I32 (OpReg reg) `snocOL`
3079 arg_rep = cmmExprRep arg
3082 get_op :: CmmExpr -> NatM (InstrBlock, Reg, MachRep) -- code, reg, size
3084 (reg,code) <- getSomeReg op
3085 return (code, reg, cmmExprRep op)
3087 #endif /* i386_TARGET_ARCH */
3089 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3091 outOfLineFloatOp :: CallishMachOp -> CmmReg -> [(CmmExpr,MachHint)]
3092 -> Maybe [GlobalReg] -> NatM InstrBlock
3093 outOfLineFloatOp mop res args vols
3095 targetExpr <- cmmMakeDynamicReference addImportNat True lbl
3096 let target = CmmForeignCall targetExpr CCallConv
3098 if cmmRegRep res == F64
3100 stmtToInstrs (CmmCall target [(res,FloatHint)] args vols)
3104 tmp = CmmLocal (LocalReg uq F64)
3106 code1 <- stmtToInstrs (CmmCall target [(tmp,FloatHint)] args vols)
3107 code2 <- stmtToInstrs (CmmAssign res (CmmReg tmp))
3108 return (code1 `appOL` code2)
3110 lbl = mkForeignLabel fn Nothing False
3113 MO_F32_Sqrt -> FSLIT("sqrtf")
3114 MO_F32_Sin -> FSLIT("sinf")
3115 MO_F32_Cos -> FSLIT("cosf")
3116 MO_F32_Tan -> FSLIT("tanf")
3117 MO_F32_Exp -> FSLIT("expf")
3118 MO_F32_Log -> FSLIT("logf")
3120 MO_F32_Asin -> FSLIT("asinf")
3121 MO_F32_Acos -> FSLIT("acosf")
3122 MO_F32_Atan -> FSLIT("atanf")
3124 MO_F32_Sinh -> FSLIT("sinhf")
3125 MO_F32_Cosh -> FSLIT("coshf")
3126 MO_F32_Tanh -> FSLIT("tanhf")
3127 MO_F32_Pwr -> FSLIT("powf")
3129 MO_F64_Sqrt -> FSLIT("sqrt")
3130 MO_F64_Sin -> FSLIT("sin")
3131 MO_F64_Cos -> FSLIT("cos")
3132 MO_F64_Tan -> FSLIT("tan")
3133 MO_F64_Exp -> FSLIT("exp")
3134 MO_F64_Log -> FSLIT("log")
3136 MO_F64_Asin -> FSLIT("asin")
3137 MO_F64_Acos -> FSLIT("acos")
3138 MO_F64_Atan -> FSLIT("atan")
3140 MO_F64_Sinh -> FSLIT("sinh")
3141 MO_F64_Cosh -> FSLIT("cosh")
3142 MO_F64_Tanh -> FSLIT("tanh")
3143 MO_F64_Pwr -> FSLIT("pow")
3145 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
3147 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3149 #if x86_64_TARGET_ARCH
3151 genCCall (CmmPrim MO_WriteBarrier) _ _ _ = return nilOL
3152 -- write barrier compiles to no code on x86/x86-64;
3153 -- we keep it this long in order to prevent earlier optimisations.
3155 genCCall (CmmPrim op) [(r,_)] args vols =
3156 outOfLineFloatOp op r args vols
3158 genCCall target dest_regs args vols = do
3160 -- load up the register arguments
3161 (stack_args, aregs, fregs, load_args_code)
3162 <- load_args args allArgRegs allFPArgRegs nilOL
3165 fp_regs_used = reverse (drop (length fregs) (reverse allFPArgRegs))
3166 int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
3167 arg_regs = int_regs_used ++ fp_regs_used
3168 -- for annotating the call instruction with
3170 sse_regs = length fp_regs_used
3172 tot_arg_size = arg_size * length stack_args
3174 -- On entry to the called function, %rsp should be aligned
3175 -- on a 16-byte boundary +8 (i.e. the first stack arg after
3176 -- the return address is 16-byte aligned). In STG land
3177 -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
3178 -- need to make sure we push a multiple of 16-bytes of args,
3179 -- plus the return address, to get the correct alignment.
3180 -- Urg, this is hard. We need to feed the delta back into
3181 -- the arg pushing code.
3182 (real_size, adjust_rsp) <-
3183 if tot_arg_size `rem` 16 == 0
3184 then return (tot_arg_size, nilOL)
3185 else do -- we need to adjust...
3186 delta <- getDeltaNat
3187 setDeltaNat (delta-8)
3188 return (tot_arg_size+8, toOL [
3189 SUB I64 (OpImm (ImmInt 8)) (OpReg rsp),
3193 -- push the stack args, right to left
3194 push_code <- push_args (reverse stack_args) nilOL
3195 delta <- getDeltaNat
3197 -- deal with static vs dynamic call targets
3198 (callinsns,cconv) <-
3201 CmmForeignCall (CmmLit (CmmLabel lbl)) conv
3202 -> -- ToDo: stdcall arg sizes
3203 return (unitOL (CALL (Left fn_imm) arg_regs), conv)
3204 where fn_imm = ImmCLbl lbl
3205 CmmForeignCall expr conv
3206 -> do (dyn_r, dyn_c) <- getSomeReg expr
3207 return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
3210 -- The x86_64 ABI requires us to set %al to the number of SSE
3211 -- registers that contain arguments, if the called routine
3212 -- is a varargs function. We don't know whether it's a
3213 -- varargs function or not, so we have to assume it is.
3215 -- It's not safe to omit this assignment, even if the number
3216 -- of SSE regs in use is zero. If %al is larger than 8
3217 -- on entry to a varargs function, seg faults ensue.
3218 assign_eax n = unitOL (MOV I32 (OpImm (ImmInt n)) (OpReg eax))
3220 let call = callinsns `appOL`
3222 -- Deallocate parameters after call for ccall;
3223 -- but not for stdcall (callee does it)
3224 (if cconv == StdCallConv || real_size==0 then [] else
3225 [ADD wordRep (OpImm (ImmInt real_size)) (OpReg esp)])
3227 [DELTA (delta + real_size)]
3230 setDeltaNat (delta + real_size)
3233 -- assign the results, if necessary
3234 assign_code [] = nilOL
3235 assign_code [(dest,_hint)] =
3237 F32 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3238 F64 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3239 rep -> unitOL (MOV rep (OpReg rax) (OpReg r_dest))
3241 rep = cmmRegRep dest
3242 r_dest = getRegisterReg dest
3243 assign_code many = panic "genCCall.assign_code many"
3245 return (load_args_code `appOL`
3248 assign_eax sse_regs `appOL`
3250 assign_code dest_regs)
3253 arg_size = 8 -- always, at the mo
3255 load_args :: [(CmmExpr,MachHint)]
3256 -> [Reg] -- int regs avail for args
3257 -> [Reg] -- FP regs avail for args
3259 -> NatM ([(CmmExpr,MachHint)],[Reg],[Reg],InstrBlock)
3260 load_args args [] [] code = return (args, [], [], code)
3261 -- no more regs to use
3262 load_args [] aregs fregs code = return ([], aregs, fregs, code)
3263 -- no more args to push
3264 load_args ((arg,hint) : rest) aregs fregs code
3265 | isFloatingRep arg_rep =
3269 arg_code <- getAnyReg arg
3270 load_args rest aregs rs (code `appOL` arg_code r)
3275 arg_code <- getAnyReg arg
3276 load_args rest rs fregs (code `appOL` arg_code r)
3278 arg_rep = cmmExprRep arg
3281 (args',ars,frs,code') <- load_args rest aregs fregs code
3282 return ((arg,hint):args', ars, frs, code')
3284 push_args [] code = return code
3285 push_args ((arg,hint):rest) code
3286 | isFloatingRep arg_rep = do
3287 (arg_reg, arg_code) <- getSomeReg arg
3288 delta <- getDeltaNat
3289 setDeltaNat (delta-arg_size)
3290 let code' = code `appOL` toOL [
3291 MOV arg_rep (OpReg arg_reg) (OpAddr (spRel 0)),
3292 SUB wordRep (OpImm (ImmInt arg_size)) (OpReg rsp) ,
3293 DELTA (delta-arg_size)]
3294 push_args rest code'
3297 -- we only ever generate word-sized function arguments. Promotion
3298 -- has already happened: our Int8# type is kept sign-extended
3299 -- in an Int#, for example.
3300 ASSERT(arg_rep == I64) return ()
3301 (arg_op, arg_code) <- getOperand arg
3302 delta <- getDeltaNat
3303 setDeltaNat (delta-arg_size)
3304 let code' = code `appOL` toOL [PUSH I64 arg_op,
3305 DELTA (delta-arg_size)]
3306 push_args rest code'
3308 arg_rep = cmmExprRep arg
3311 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3313 #if sparc_TARGET_ARCH
3315 The SPARC calling convention is an absolute
3316 nightmare. The first 6x32 bits of arguments are mapped into
3317 %o0 through %o5, and the remaining arguments are dumped to the
3318 stack, beginning at [%sp+92]. (Note that %o6 == %sp.)
3320 If we have to put args on the stack, move %o6==%sp down by
3321 the number of words to go on the stack, to ensure there's enough space.
3323 According to Fraser and Hanson's lcc book, page 478, fig 17.2,
3324 16 words above the stack pointer is a word for the address of
3325 a structure return value. I use this as a temporary location
3326 for moving values from float to int regs. Certainly it isn't
3327 safe to put anything in the 16 words starting at %sp, since
3328 this area can get trashed at any time due to window overflows
3329 caused by signal handlers.
3331 A final complication (if the above isn't enough) is that
3332 we can't blithely calculate the arguments one by one into
3333 %o0 .. %o5. Consider the following nested calls:
3337 Naive code moves a into %o0, and (fff b c) into %o1. Unfortunately
3338 the inner call will itself use %o0, which trashes the value put there
3339 in preparation for the outer call. Upshot: we need to calculate the
3340 args into temporary regs, and move those to arg regs or onto the
3341 stack only immediately prior to the call proper. Sigh.
3344 genCCall target dest_regs argsAndHints vols = do
3346 args = map fst argsAndHints
3347 argcode_and_vregs <- mapM arg_to_int_vregs args
3349 (argcodes, vregss) = unzip argcode_and_vregs
3350 n_argRegs = length allArgRegs
3351 n_argRegs_used = min (length vregs) n_argRegs
3352 vregs = concat vregss
3353 -- deal with static vs dynamic call targets
3354 callinsns <- (case target of
3355 CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> do
3356 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3357 CmmForeignCall expr conv -> do
3358 (dyn_c, [dyn_r]) <- arg_to_int_vregs expr
3359 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3361 (res, reduce) <- outOfLineFloatOp mop
3362 lblOrMopExpr <- case res of
3364 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3366 (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
3367 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3368 if reduce then panic "genCCall(sparc): can not reduce" else return lblOrMopExpr
3372 argcode = concatOL argcodes
3373 (move_sp_down, move_sp_up)
3374 = let diff = length vregs - n_argRegs
3375 nn = if odd diff then diff + 1 else diff -- keep 8-byte alignment
3378 else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
3380 = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
3381 return (argcode `appOL`
3382 move_sp_down `appOL`
3383 transfer_code `appOL`
3388 -- move args from the integer vregs into which they have been
3389 -- marshalled, into %o0 .. %o5, and the rest onto the stack.
3390 move_final :: [Reg] -> [Reg] -> Int -> [Instr]
3392 move_final [] _ offset -- all args done
3395 move_final (v:vs) [] offset -- out of aregs; move to stack
3396 = ST I32 v (spRel offset)
3397 : move_final vs [] (offset+1)
3399 move_final (v:vs) (a:az) offset -- move into an arg (%o[0..5]) reg
3400 = OR False g0 (RIReg v) a
3401 : move_final vs az offset
3403 -- generate code to calculate an argument, and move it into one
3404 -- or two integer vregs.
3405 arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
3406 arg_to_int_vregs arg
3407 | (cmmExprRep arg) == I64
3409 (ChildCode64 code r_lo) <- iselExpr64 arg
3411 r_hi = getHiVRegFromLo r_lo
3412 return (code, [r_hi, r_lo])
3415 (src, code) <- getSomeReg arg
3416 tmp <- getNewRegNat (cmmExprRep arg)
3421 v1 <- getNewRegNat I32
3422 v2 <- getNewRegNat I32
3425 FMOV F64 src f0 `snocOL`
3426 ST F32 f0 (spRel 16) `snocOL`
3427 LD I32 (spRel 16) v1 `snocOL`
3428 ST F32 (fPair f0) (spRel 16) `snocOL`
3429 LD I32 (spRel 16) v2
3434 v1 <- getNewRegNat I32
3437 ST F32 src (spRel 16) `snocOL`
3438 LD I32 (spRel 16) v1
3443 v1 <- getNewRegNat I32
3445 code `snocOL` OR False g0 (RIReg src) v1
3449 outOfLineFloatOp mop =
3451 mopExpr <- cmmMakeDynamicReference addImportNat True $
3452 mkForeignLabel functionName Nothing True
3453 let mopLabelOrExpr = case mopExpr of
3454 CmmLit (CmmLabel lbl) -> Left lbl
3456 return (mopLabelOrExpr, reduce)
3458 (reduce, functionName) = case mop of
3459 MO_F32_Exp -> (True, FSLIT("exp"))
3460 MO_F32_Log -> (True, FSLIT("log"))
3461 MO_F32_Sqrt -> (True, FSLIT("sqrt"))
3463 MO_F32_Sin -> (True, FSLIT("sin"))
3464 MO_F32_Cos -> (True, FSLIT("cos"))
3465 MO_F32_Tan -> (True, FSLIT("tan"))
3467 MO_F32_Asin -> (True, FSLIT("asin"))
3468 MO_F32_Acos -> (True, FSLIT("acos"))
3469 MO_F32_Atan -> (True, FSLIT("atan"))
3471 MO_F32_Sinh -> (True, FSLIT("sinh"))
3472 MO_F32_Cosh -> (True, FSLIT("cosh"))
3473 MO_F32_Tanh -> (True, FSLIT("tanh"))
3475 MO_F64_Exp -> (False, FSLIT("exp"))
3476 MO_F64_Log -> (False, FSLIT("log"))
3477 MO_F64_Sqrt -> (False, FSLIT("sqrt"))
3479 MO_F64_Sin -> (False, FSLIT("sin"))
3480 MO_F64_Cos -> (False, FSLIT("cos"))
3481 MO_F64_Tan -> (False, FSLIT("tan"))
3483 MO_F64_Asin -> (False, FSLIT("asin"))
3484 MO_F64_Acos -> (False, FSLIT("acos"))
3485 MO_F64_Atan -> (False, FSLIT("atan"))
3487 MO_F64_Sinh -> (False, FSLIT("sinh"))
3488 MO_F64_Cosh -> (False, FSLIT("cosh"))
3489 MO_F64_Tanh -> (False, FSLIT("tanh"))
3491 other -> pprPanic "outOfLineFloatOp(sparc) "
3492 (pprCallishMachOp mop)
3494 #endif /* sparc_TARGET_ARCH */
3496 #if powerpc_TARGET_ARCH
3498 #if darwin_TARGET_OS || linux_TARGET_OS
3500 The PowerPC calling convention for Darwin/Mac OS X
3501 is described in Apple's document
3502 "Inside Mac OS X - Mach-O Runtime Architecture".
3504 PowerPC Linux uses the System V Release 4 Calling Convention
3505 for PowerPC. It is described in the
3506 "System V Application Binary Interface PowerPC Processor Supplement".
3508 Both conventions are similar:
3509 Parameters may be passed in general-purpose registers starting at r3, in
3510 floating point registers starting at f1, or on the stack.
3512 But there are substantial differences:
3513 * The number of registers used for parameter passing and the exact set of
3514 nonvolatile registers differs (see MachRegs.lhs).
3515 * On Darwin, stack space is always reserved for parameters, even if they are
3516 passed in registers. The called routine may choose to save parameters from
3517 registers to the corresponding space on the stack.
3518 * On Darwin, a corresponding amount of GPRs is skipped when a floating point
3519 parameter is passed in an FPR.
3520 * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
3521 starting with an odd-numbered GPR. It may skip a GPR to achieve this.
3522 Darwin just treats an I64 like two separate I32s (high word first).
3523 * I64 and F64 arguments are 8-byte aligned on the stack for SysV, but only
3524 4-byte aligned like everything else on Darwin.
3525 * The SysV spec claims that F32 is represented as F64 on the stack. GCC on
3526 PowerPC Linux does not agree, so neither do we.
3528 According to both conventions, The parameter area should be part of the
3529 caller's stack frame, allocated in the caller's prologue code (large enough
3530 to hold the parameter lists for all called routines). The NCG already
3531 uses the stack for register spilling, leaving 64 bytes free at the top.
3532 If we need a larger parameter area than that, we just allocate a new stack
3533 frame just before ccalling.
3536 genCCall target dest_regs argsAndHints vols
3537 = ASSERT (not $ any (`elem` [I8,I16]) argReps)
3538 -- we rely on argument promotion in the codeGen
3540 (finalStack,passArgumentsCode,usedRegs) <- passArguments
3542 allArgRegs allFPArgRegs
3546 (labelOrExpr, reduceToF32) <- case target of
3547 CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
3548 CmmForeignCall expr conv -> return (Right expr, False)
3549 CmmPrim mop -> outOfLineFloatOp mop
3551 let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
3552 codeAfter = move_sp_up finalStack `appOL` moveResult reduceToF32
3557 `snocOL` BL lbl usedRegs
3560 (dynReg, dynCode) <- getSomeReg dyn
3562 `snocOL` MTCTR dynReg
3564 `snocOL` BCTRL usedRegs
3567 #if darwin_TARGET_OS
3568 initialStackOffset = 24
3569 -- size of linkage area + size of arguments, in bytes
3570 stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
3571 map machRepByteWidth argReps
3572 #elif linux_TARGET_OS
3573 initialStackOffset = 8
3574 stackDelta finalStack = roundTo 16 finalStack
3576 args = map fst argsAndHints
3577 argReps = map cmmExprRep args
3579 roundTo a x | x `mod` a == 0 = x
3580 | otherwise = x + a - (x `mod` a)
3582 move_sp_down finalStack
3584 toOL [STU I32 sp (AddrRegImm sp (ImmInt (-delta))),
3587 where delta = stackDelta finalStack
3588 move_sp_up finalStack
3590 toOL [ADD sp sp (RIImm (ImmInt delta)),
3593 where delta = stackDelta finalStack
3596 passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
3597 passArguments ((arg,I64):args) gprs fprs stackOffset
3598 accumCode accumUsed =
3600 ChildCode64 code vr_lo <- iselExpr64 arg
3601 let vr_hi = getHiVRegFromLo vr_lo
3603 #if darwin_TARGET_OS
3608 (accumCode `appOL` code
3609 `snocOL` storeWord vr_hi gprs stackOffset
3610 `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
3611 ((take 2 gprs) ++ accumUsed)
3613 storeWord vr (gpr:_) offset = MR gpr vr
3614 storeWord vr [] offset = ST I32 vr (AddrRegImm sp (ImmInt offset))
3616 #elif linux_TARGET_OS
3617 let stackOffset' = roundTo 8 stackOffset
3618 stackCode = accumCode `appOL` code
3619 `snocOL` ST I32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
3620 `snocOL` ST I32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
3621 regCode hireg loreg =
3622 accumCode `appOL` code
3623 `snocOL` MR hireg vr_hi
3624 `snocOL` MR loreg vr_lo
3627 hireg : loreg : regs | even (length gprs) ->
3628 passArguments args regs fprs stackOffset
3629 (regCode hireg loreg) (hireg : loreg : accumUsed)
3630 _skipped : hireg : loreg : regs ->
3631 passArguments args regs fprs stackOffset
3632 (regCode hireg loreg) (hireg : loreg : accumUsed)
3633 _ -> -- only one or no regs left
3634 passArguments args [] fprs (stackOffset'+8)
3638 passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
3639 | reg : _ <- regs = do
3640 register <- getRegister arg
3641 let code = case register of
3642 Fixed _ freg fcode -> fcode `snocOL` MR reg freg
3643 Any _ acode -> acode reg
3647 #if darwin_TARGET_OS
3648 -- The Darwin ABI requires that we reserve stack slots for register parameters
3649 (stackOffset + stackBytes)
3650 #elif linux_TARGET_OS
3651 -- ... the SysV ABI doesn't.
3654 (accumCode `appOL` code)
3657 (vr, code) <- getSomeReg arg
3661 (stackOffset' + stackBytes)
3662 (accumCode `appOL` code `snocOL` ST rep vr stackSlot)
3665 #if darwin_TARGET_OS
3666 -- stackOffset is at least 4-byte aligned
3667 -- The Darwin ABI is happy with that.
3668 stackOffset' = stackOffset
3670 -- ... the SysV ABI requires 8-byte alignment for doubles.
3671 stackOffset' | rep == F64 = roundTo 8 stackOffset
3672 | otherwise = stackOffset
3674 stackSlot = AddrRegImm sp (ImmInt stackOffset')
3675 (nGprs, nFprs, stackBytes, regs) = case rep of
3676 I32 -> (1, 0, 4, gprs)
3677 #if darwin_TARGET_OS
3678 -- The Darwin ABI requires that we skip a corresponding number of GPRs when
3680 F32 -> (1, 1, 4, fprs)
3681 F64 -> (2, 1, 8, fprs)
3682 #elif linux_TARGET_OS
3683 -- ... the SysV ABI doesn't.
3684 F32 -> (0, 1, 4, fprs)
3685 F64 -> (0, 1, 8, fprs)
3688 moveResult reduceToF32 =
3692 | reduceToF32 && rep == F32 -> unitOL (FRSP r_dest f1)
3693 | rep == F32 || rep == F64 -> unitOL (MR r_dest f1)
3694 | rep == I64 -> toOL [MR (getHiVRegFromLo r_dest) r3,
3696 | otherwise -> unitOL (MR r_dest r3)
3697 where rep = cmmRegRep dest
3698 r_dest = getRegisterReg dest
3700 outOfLineFloatOp mop =
3702 mopExpr <- cmmMakeDynamicReference addImportNat True $
3703 mkForeignLabel functionName Nothing True
3704 let mopLabelOrExpr = case mopExpr of
3705 CmmLit (CmmLabel lbl) -> Left lbl
3707 return (mopLabelOrExpr, reduce)
3709 (functionName, reduce) = case mop of
3710 MO_F32_Exp -> (FSLIT("exp"), True)
3711 MO_F32_Log -> (FSLIT("log"), True)
3712 MO_F32_Sqrt -> (FSLIT("sqrt"), True)
3714 MO_F32_Sin -> (FSLIT("sin"), True)
3715 MO_F32_Cos -> (FSLIT("cos"), True)
3716 MO_F32_Tan -> (FSLIT("tan"), True)
3718 MO_F32_Asin -> (FSLIT("asin"), True)
3719 MO_F32_Acos -> (FSLIT("acos"), True)
3720 MO_F32_Atan -> (FSLIT("atan"), True)
3722 MO_F32_Sinh -> (FSLIT("sinh"), True)
3723 MO_F32_Cosh -> (FSLIT("cosh"), True)
3724 MO_F32_Tanh -> (FSLIT("tanh"), True)
3725 MO_F32_Pwr -> (FSLIT("pow"), True)
3727 MO_F64_Exp -> (FSLIT("exp"), False)
3728 MO_F64_Log -> (FSLIT("log"), False)
3729 MO_F64_Sqrt -> (FSLIT("sqrt"), False)
3731 MO_F64_Sin -> (FSLIT("sin"), False)
3732 MO_F64_Cos -> (FSLIT("cos"), False)
3733 MO_F64_Tan -> (FSLIT("tan"), False)
3735 MO_F64_Asin -> (FSLIT("asin"), False)
3736 MO_F64_Acos -> (FSLIT("acos"), False)
3737 MO_F64_Atan -> (FSLIT("atan"), False)
3739 MO_F64_Sinh -> (FSLIT("sinh"), False)
3740 MO_F64_Cosh -> (FSLIT("cosh"), False)
3741 MO_F64_Tanh -> (FSLIT("tanh"), False)
3742 MO_F64_Pwr -> (FSLIT("pow"), False)
3743 other -> pprPanic "genCCall(ppc): unknown callish op"
3744 (pprCallishMachOp other)
3746 #endif /* darwin_TARGET_OS || linux_TARGET_OS */
3748 #endif /* powerpc_TARGET_ARCH */
3751 -- -----------------------------------------------------------------------------
3752 -- Generating a table-branch
3754 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
3756 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3760 (reg,e_code) <- getSomeReg expr
3761 lbl <- getNewLabelNat
3762 dynRef <- cmmMakeDynamicReference addImportNat False lbl
3763 (tableReg,t_code) <- getSomeReg $ dynRef
3765 jumpTable = map jumpTableEntryRel ids
3767 jumpTableEntryRel Nothing
3768 = CmmStaticLit (CmmInt 0 wordRep)
3769 jumpTableEntryRel (Just (BlockId id))
3770 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3771 where blockLabel = mkAsmTempLabel id
3773 op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
3774 (EAIndex reg wORD_SIZE) (ImmInt 0))
3776 code = e_code `appOL` t_code `appOL` toOL [
3777 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3778 ADD wordRep op (OpReg tableReg),
3779 JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
3784 (reg,e_code) <- getSomeReg expr
3785 lbl <- getNewLabelNat
3787 jumpTable = map jumpTableEntry ids
3788 op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
3789 code = e_code `appOL` toOL [
3790 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3791 JMP_TBL op [ id | Just id <- ids ]
3795 #elif powerpc_TARGET_ARCH
3799 (reg,e_code) <- getSomeReg expr
3800 tmp <- getNewRegNat I32
3801 lbl <- getNewLabelNat
3802 dynRef <- cmmMakeDynamicReference addImportNat False lbl
3803 (tableReg,t_code) <- getSomeReg $ dynRef
3805 jumpTable = map jumpTableEntryRel ids
3807 jumpTableEntryRel Nothing
3808 = CmmStaticLit (CmmInt 0 wordRep)
3809 jumpTableEntryRel (Just (BlockId id))
3810 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3811 where blockLabel = mkAsmTempLabel id
3813 code = e_code `appOL` t_code `appOL` toOL [
3814 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3815 SLW tmp reg (RIImm (ImmInt 2)),
3816 LD I32 tmp (AddrRegReg tableReg tmp),
3817 ADD tmp tmp (RIReg tableReg),
3819 BCTR [ id | Just id <- ids ]
3824 (reg,e_code) <- getSomeReg expr
3825 tmp <- getNewRegNat I32
3826 lbl <- getNewLabelNat
3828 jumpTable = map jumpTableEntry ids
3830 code = e_code `appOL` toOL [
3831 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3832 SLW tmp reg (RIImm (ImmInt 2)),
3833 ADDIS tmp tmp (HA (ImmCLbl lbl)),
3834 LD I32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
3836 BCTR [ id | Just id <- ids ]
3840 genSwitch expr ids = panic "ToDo: genSwitch"
3843 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordRep)
3844 jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
3845 where blockLabel = mkAsmTempLabel id
3847 -- -----------------------------------------------------------------------------
3849 -- -----------------------------------------------------------------------------
3852 -- -----------------------------------------------------------------------------
3853 -- 'condIntReg' and 'condFltReg': condition codes into registers
3855 -- Turn those condition codes into integers now (when they appear on
3856 -- the right hand side of an assignment).
3858 -- (If applicable) Do not fill the delay slots here; you will confuse the
3859 -- register allocator.
3861 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
3863 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3865 #if alpha_TARGET_ARCH
3866 condIntReg = panic "MachCode.condIntReg (not on Alpha)"
3867 condFltReg = panic "MachCode.condFltReg (not on Alpha)"
3868 #endif /* alpha_TARGET_ARCH */
3870 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3872 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3874 condIntReg cond x y = do
3875 CondCode _ cond cond_code <- condIntCode cond x y
3876 tmp <- getNewRegNat I8
3878 code dst = cond_code `appOL` toOL [
3879 SETCC cond (OpReg tmp),
3880 MOVZxL I8 (OpReg tmp) (OpReg dst)
3883 return (Any I32 code)
3887 #if i386_TARGET_ARCH
3889 condFltReg cond x y = do
3890 CondCode _ cond cond_code <- condFltCode cond x y
3891 tmp <- getNewRegNat I8
3893 code dst = cond_code `appOL` toOL [
3894 SETCC cond (OpReg tmp),
3895 MOVZxL I8 (OpReg tmp) (OpReg dst)
3898 return (Any I32 code)
3902 #if x86_64_TARGET_ARCH
3904 condFltReg cond x y = do
3905 CondCode _ cond cond_code <- condFltCode cond x y
3906 tmp1 <- getNewRegNat wordRep
3907 tmp2 <- getNewRegNat wordRep
3909 -- We have to worry about unordered operands (eg. comparisons
3910 -- against NaN). If the operands are unordered, the comparison
3911 -- sets the parity flag, carry flag and zero flag.
3912 -- All comparisons are supposed to return false for unordered
3913 -- operands except for !=, which returns true.
3915 -- Optimisation: we don't have to test the parity flag if we
3916 -- know the test has already excluded the unordered case: eg >
3917 -- and >= test for a zero carry flag, which can only occur for
3918 -- ordered operands.
3920 -- ToDo: by reversing comparisons we could avoid testing the
3921 -- parity flag in more cases.
3926 NE -> or_unordered dst
3927 GU -> plain_test dst
3928 GEU -> plain_test dst
3929 _ -> and_ordered dst)
3931 plain_test dst = toOL [
3932 SETCC cond (OpReg tmp1),
3933 MOVZxL I8 (OpReg tmp1) (OpReg dst)
3935 or_unordered dst = toOL [
3936 SETCC cond (OpReg tmp1),
3937 SETCC PARITY (OpReg tmp2),
3938 OR I8 (OpReg tmp1) (OpReg tmp2),
3939 MOVZxL I8 (OpReg tmp2) (OpReg dst)
3941 and_ordered dst = toOL [
3942 SETCC cond (OpReg tmp1),
3943 SETCC NOTPARITY (OpReg tmp2),
3944 AND I8 (OpReg tmp1) (OpReg tmp2),
3945 MOVZxL I8 (OpReg tmp2) (OpReg dst)
3948 return (Any I32 code)
3952 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3954 #if sparc_TARGET_ARCH
3956 condIntReg EQQ x (CmmLit (CmmInt 0 d)) = do
3957 (src, code) <- getSomeReg x
3958 tmp <- getNewRegNat I32
3960 code__2 dst = code `appOL` toOL [
3961 SUB False True g0 (RIReg src) g0,
3962 SUB True False g0 (RIImm (ImmInt (-1))) dst]
3963 return (Any I32 code__2)
3965 condIntReg EQQ x y = do
3966 (src1, code1) <- getSomeReg x
3967 (src2, code2) <- getSomeReg y
3968 tmp1 <- getNewRegNat I32
3969 tmp2 <- getNewRegNat I32
3971 code__2 dst = code1 `appOL` code2 `appOL` toOL [
3972 XOR False src1 (RIReg src2) dst,
3973 SUB False True g0 (RIReg dst) g0,
3974 SUB True False g0 (RIImm (ImmInt (-1))) dst]
3975 return (Any I32 code__2)
3977 condIntReg NE x (CmmLit (CmmInt 0 d)) = do
3978 (src, code) <- getSomeReg x
3979 tmp <- getNewRegNat I32
3981 code__2 dst = code `appOL` toOL [
3982 SUB False True g0 (RIReg src) g0,
3983 ADD True False g0 (RIImm (ImmInt 0)) dst]
3984 return (Any I32 code__2)
3986 condIntReg NE x y = do
3987 (src1, code1) <- getSomeReg x
3988 (src2, code2) <- getSomeReg y
3989 tmp1 <- getNewRegNat I32
3990 tmp2 <- getNewRegNat I32
3992 code__2 dst = code1 `appOL` code2 `appOL` toOL [
3993 XOR False src1 (RIReg src2) dst,
3994 SUB False True g0 (RIReg dst) g0,
3995 ADD True False g0 (RIImm (ImmInt 0)) dst]
3996 return (Any I32 code__2)
3998 condIntReg cond x y = do
3999 BlockId lbl1 <- getBlockIdNat
4000 BlockId lbl2 <- getBlockIdNat
4001 CondCode _ cond cond_code <- condIntCode cond x y
4003 code__2 dst = cond_code `appOL` toOL [
4004 BI cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
4005 OR False g0 (RIImm (ImmInt 0)) dst,
4006 BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
4007 NEWBLOCK (BlockId lbl1),
4008 OR False g0 (RIImm (ImmInt 1)) dst,
4009 NEWBLOCK (BlockId lbl2)]
4010 return (Any I32 code__2)
4012 condFltReg cond x y = do
4013 BlockId lbl1 <- getBlockIdNat
4014 BlockId lbl2 <- getBlockIdNat
4015 CondCode _ cond cond_code <- condFltCode cond x y
4017 code__2 dst = cond_code `appOL` toOL [
4019 BF cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
4020 OR False g0 (RIImm (ImmInt 0)) dst,
4021 BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
4022 NEWBLOCK (BlockId lbl1),
4023 OR False g0 (RIImm (ImmInt 1)) dst,
4024 NEWBLOCK (BlockId lbl2)]
4025 return (Any I32 code__2)
4027 #endif /* sparc_TARGET_ARCH */
4029 #if powerpc_TARGET_ARCH
4030 condReg getCond = do
4031 lbl1 <- getBlockIdNat
4032 lbl2 <- getBlockIdNat
4033 CondCode _ cond cond_code <- getCond
4035 {- code dst = cond_code `appOL` toOL [
4044 code dst = cond_code
4048 RLWINM dst dst (bit + 1) 31 31
4051 negate_code | do_negate = unitOL (CRNOR bit bit bit)
4054 (bit, do_negate) = case cond of
4068 return (Any I32 code)
4070 condIntReg cond x y = condReg (condIntCode cond x y)
4071 condFltReg cond x y = condReg (condFltCode cond x y)
4072 #endif /* powerpc_TARGET_ARCH */
4075 -- -----------------------------------------------------------------------------
4076 -- 'trivial*Code': deal with trivial instructions
4078 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
4079 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
4080 -- Only look for constants on the right hand side, because that's
4081 -- where the generic optimizer will have put them.
4083 -- Similarly, for unary instructions, we don't have to worry about
4084 -- matching an StInt as the argument, because genericOpt will already
4085 -- have handled the constant-folding.
4089 -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
4090 ,IF_ARCH_i386 ((Operand -> Operand -> Instr)
4091 -> Maybe (Operand -> Operand -> Instr)
4092 ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr)
4093 -> Maybe (Operand -> Operand -> Instr)
4094 ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
4095 ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
4097 -> CmmExpr -> CmmExpr -- the two arguments
4100 #ifndef powerpc_TARGET_ARCH
4103 -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
4104 ,IF_ARCH_sparc((MachRep -> Reg -> Reg -> Reg -> Instr)
4105 ,IF_ARCH_i386 ((MachRep -> Reg -> Reg -> Reg -> Instr)
4106 ,IF_ARCH_x86_64 ((MachRep -> Operand -> Operand -> Instr)
4108 -> CmmExpr -> CmmExpr -- the two arguments
4114 -> IF_ARCH_alpha((RI -> Reg -> Instr)
4115 ,IF_ARCH_i386 ((Operand -> Instr)
4116 ,IF_ARCH_x86_64 ((Operand -> Instr)
4117 ,IF_ARCH_sparc((RI -> Reg -> Instr)
4118 ,IF_ARCH_powerpc((Reg -> Reg -> Instr)
4120 -> CmmExpr -- the one argument
4123 #ifndef powerpc_TARGET_ARCH
4126 -> IF_ARCH_alpha((Reg -> Reg -> Instr)
4127 ,IF_ARCH_i386 ((Reg -> Reg -> Instr)
4128 ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr)
4129 ,IF_ARCH_sparc((Reg -> Reg -> Instr)
4131 -> CmmExpr -- the one argument
4135 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4137 #if alpha_TARGET_ARCH
4139 trivialCode instr x (StInt y)
4141 = getRegister x `thenNat` \ register ->
4142 getNewRegNat IntRep `thenNat` \ tmp ->
4144 code = registerCode register tmp
4145 src1 = registerName register tmp
4146 src2 = ImmInt (fromInteger y)
4147 code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst)
4149 return (Any IntRep code__2)
4151 trivialCode instr x y
4152 = getRegister x `thenNat` \ register1 ->
4153 getRegister y `thenNat` \ register2 ->
4154 getNewRegNat IntRep `thenNat` \ tmp1 ->
4155 getNewRegNat IntRep `thenNat` \ tmp2 ->
4157 code1 = registerCode register1 tmp1 []
4158 src1 = registerName register1 tmp1
4159 code2 = registerCode register2 tmp2 []
4160 src2 = registerName register2 tmp2
4161 code__2 dst = asmSeqThen [code1, code2] .
4162 mkSeqInstr (instr src1 (RIReg src2) dst)
4164 return (Any IntRep code__2)
4167 trivialUCode instr x
4168 = getRegister x `thenNat` \ register ->
4169 getNewRegNat IntRep `thenNat` \ tmp ->
4171 code = registerCode register tmp
4172 src = registerName register tmp
4173 code__2 dst = code . mkSeqInstr (instr (RIReg src) dst)
4175 return (Any IntRep code__2)
4178 trivialFCode _ instr x y
4179 = getRegister x `thenNat` \ register1 ->
4180 getRegister y `thenNat` \ register2 ->
4181 getNewRegNat F64 `thenNat` \ tmp1 ->
4182 getNewRegNat F64 `thenNat` \ tmp2 ->
4184 code1 = registerCode register1 tmp1
4185 src1 = registerName register1 tmp1
4187 code2 = registerCode register2 tmp2
4188 src2 = registerName register2 tmp2
4190 code__2 dst = asmSeqThen [code1 [], code2 []] .
4191 mkSeqInstr (instr src1 src2 dst)
4193 return (Any F64 code__2)
4195 trivialUFCode _ instr x
4196 = getRegister x `thenNat` \ register ->
4197 getNewRegNat F64 `thenNat` \ tmp ->
4199 code = registerCode register tmp
4200 src = registerName register tmp
4201 code__2 dst = code . mkSeqInstr (instr src dst)
4203 return (Any F64 code__2)
4205 #endif /* alpha_TARGET_ARCH */
4207 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4209 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4212 The Rules of the Game are:
4214 * You cannot assume anything about the destination register dst;
4215 it may be anything, including a fixed reg.
4217 * You may compute an operand into a fixed reg, but you may not
4218 subsequently change the contents of that fixed reg. If you
4219 want to do so, first copy the value either to a temporary
4220 or into dst. You are free to modify dst even if it happens
4221 to be a fixed reg -- that's not your problem.
4223 * You cannot assume that a fixed reg will stay live over an
4224 arbitrary computation. The same applies to the dst reg.
4226 * Temporary regs obtained from getNewRegNat are distinct from
4227 each other and from all other regs, and stay live over
4228 arbitrary computations.
4230 --------------------
4232 SDM's version of The Rules:
4234 * If getRegister returns Any, that means it can generate correct
4235 code which places the result in any register, period. Even if that
4236 register happens to be read during the computation.
4238 Corollary #1: this means that if you are generating code for an
4239 operation with two arbitrary operands, you cannot assign the result
4240 of the first operand into the destination register before computing
4241 the second operand. The second operand might require the old value
4242 of the destination register.
4244 Corollary #2: A function might be able to generate more efficient
4245 code if it knows the destination register is a new temporary (and
4246 therefore not read by any of the sub-computations).
4248 * If getRegister returns Any, then the code it generates may modify only:
4249 (a) fresh temporaries
4250 (b) the destination register
4251 (c) known registers (eg. %ecx is used by shifts)
4252 In particular, it may *not* modify global registers, unless the global
4253 register happens to be the destination register.
4256 trivialCode rep instr (Just revinstr) (CmmLit lit_a) b
4257 | not (is64BitLit lit_a) = do
4258 b_code <- getAnyReg b
4261 = b_code dst `snocOL`
4262 revinstr (OpImm (litToImm lit_a)) (OpReg dst)
4264 return (Any rep code)
4266 trivialCode rep instr maybe_revinstr a b = genTrivialCode rep instr a b
4268 -- This is re-used for floating pt instructions too.
4269 genTrivialCode rep instr a b = do
4270 (b_op, b_code) <- getNonClobberedOperand b
4271 a_code <- getAnyReg a
4272 tmp <- getNewRegNat rep
4274 -- We want the value of b to stay alive across the computation of a.
4275 -- But, we want to calculate a straight into the destination register,
4276 -- because the instruction only has two operands (dst := dst `op` src).
4277 -- The troublesome case is when the result of b is in the same register
4278 -- as the destination reg. In this case, we have to save b in a
4279 -- new temporary across the computation of a.
4281 | dst `regClashesWithOp` b_op =
4283 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
4285 instr (OpReg tmp) (OpReg dst)
4289 instr b_op (OpReg dst)
4291 return (Any rep code)
4293 reg `regClashesWithOp` OpReg reg2 = reg == reg2
4294 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
4295 reg `regClashesWithOp` _ = False
4299 trivialUCode rep instr x = do
4300 x_code <- getAnyReg x
4306 return (Any rep code)
4310 #if i386_TARGET_ARCH
4312 trivialFCode pk instr x y = do
4313 (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
4314 (y_reg, y_code) <- getSomeReg y
4319 instr pk x_reg y_reg dst
4321 return (Any pk code)
4325 #if x86_64_TARGET_ARCH
4327 trivialFCode pk instr x y = genTrivialCode pk (instr pk) x y
4333 trivialUFCode rep instr x = do
4334 (x_reg, x_code) <- getSomeReg x
4340 return (Any rep code)
4342 #endif /* i386_TARGET_ARCH */
4344 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4346 #if sparc_TARGET_ARCH
4348 trivialCode pk instr x (CmmLit (CmmInt y d))
4351 (src1, code) <- getSomeReg x
4352 tmp <- getNewRegNat I32
4354 src2 = ImmInt (fromInteger y)
4355 code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
4356 return (Any I32 code__2)
4358 trivialCode pk instr x y = do
4359 (src1, code1) <- getSomeReg x
4360 (src2, code2) <- getSomeReg y
4361 tmp1 <- getNewRegNat I32
4362 tmp2 <- getNewRegNat I32
4364 code__2 dst = code1 `appOL` code2 `snocOL`
4365 instr src1 (RIReg src2) dst
4366 return (Any I32 code__2)
4369 trivialFCode pk instr x y = do
4370 (src1, code1) <- getSomeReg x
4371 (src2, code2) <- getSomeReg y
4372 tmp1 <- getNewRegNat (cmmExprRep x)
4373 tmp2 <- getNewRegNat (cmmExprRep y)
4374 tmp <- getNewRegNat F64
4376 promote x = FxTOy F32 F64 x tmp
4383 code1 `appOL` code2 `snocOL`
4384 instr pk src1 src2 dst
4385 else if pk1 == F32 then
4386 code1 `snocOL` promote src1 `appOL` code2 `snocOL`
4387 instr F64 tmp src2 dst
4389 code1 `appOL` code2 `snocOL` promote src2 `snocOL`
4390 instr F64 src1 tmp dst
4391 return (Any (if pk1 == pk2 then pk1 else F64) code__2)
4394 trivialUCode pk instr x = do
4395 (src, code) <- getSomeReg x
4396 tmp <- getNewRegNat pk
4398 code__2 dst = code `snocOL` instr (RIReg src) dst
4399 return (Any pk code__2)
4402 trivialUFCode pk instr x = do
4403 (src, code) <- getSomeReg x
4404 tmp <- getNewRegNat pk
4406 code__2 dst = code `snocOL` instr src dst
4407 return (Any pk code__2)
4409 #endif /* sparc_TARGET_ARCH */
4411 #if powerpc_TARGET_ARCH
4414 Wolfgang's PowerPC version of The Rules:
4416 A slightly modified version of The Rules to take advantage of the fact
4417 that PowerPC instructions work on all registers and don't implicitly
4418 clobber any fixed registers.
4420 * The only expression for which getRegister returns Fixed is (CmmReg reg).
4422 * If getRegister returns Any, then the code it generates may modify only:
4423 (a) fresh temporaries
4424 (b) the destination register
4425 It may *not* modify global registers, unless the global
4426 register happens to be the destination register.
4427 It may not clobber any other registers. In fact, only ccalls clobber any
4429 Also, it may not modify the counter register (used by genCCall).
4431 Corollary: If a getRegister for a subexpression returns Fixed, you need
4432 not move it to a fresh temporary before evaluating the next subexpression.
4433 The Fixed register won't be modified.
4434 Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
4436 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
4437 the value of the destination register.
4440 trivialCode rep signed instr x (CmmLit (CmmInt y _))
4441 | Just imm <- makeImmediate rep signed y
4443 (src1, code1) <- getSomeReg x
4444 let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
4445 return (Any rep code)
4447 trivialCode rep signed instr x y = do
4448 (src1, code1) <- getSomeReg x
4449 (src2, code2) <- getSomeReg y
4450 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
4451 return (Any rep code)
4453 trivialCodeNoImm :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4454 -> CmmExpr -> CmmExpr -> NatM Register
4455 trivialCodeNoImm rep instr x y = do
4456 (src1, code1) <- getSomeReg x
4457 (src2, code2) <- getSomeReg y
4458 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
4459 return (Any rep code)
4461 trivialUCode rep instr x = do
4462 (src, code) <- getSomeReg x
4463 let code' dst = code `snocOL` instr dst src
4464 return (Any rep code')
4466 -- There is no "remainder" instruction on the PPC, so we have to do
4468 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
4470 remainderCode :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4471 -> CmmExpr -> CmmExpr -> NatM Register
4472 remainderCode rep div x y = do
4473 (src1, code1) <- getSomeReg x
4474 (src2, code2) <- getSomeReg y
4475 let code dst = code1 `appOL` code2 `appOL` toOL [
4477 MULLW dst dst (RIReg src2),
4480 return (Any rep code)
4482 #endif /* powerpc_TARGET_ARCH */
4485 -- -----------------------------------------------------------------------------
4486 -- Coercing to/from integer/floating-point...
4488 -- @coerce(Int2FP|FP2Int)@ are more complicated integer/float
4489 -- conversions. We have to store temporaries in memory to move
4490 -- between the integer and the floating point register sets.
4492 -- @coerceDbl2Flt@ and @coerceFlt2Dbl@ are done this way because we
4493 -- pretend, on sparc at least, that double and float regs are seperate
4494 -- kinds, so the value has to be computed into one kind before being
4495 -- explicitly "converted" to live in the other kind.
4497 coerceInt2FP :: MachRep -> MachRep -> CmmExpr -> NatM Register
4498 coerceFP2Int :: MachRep -> MachRep -> CmmExpr -> NatM Register
4500 #if sparc_TARGET_ARCH
4501 coerceDbl2Flt :: CmmExpr -> NatM Register
4502 coerceFlt2Dbl :: CmmExpr -> NatM Register
4505 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4507 #if alpha_TARGET_ARCH
4510 = getRegister x `thenNat` \ register ->
4511 getNewRegNat IntRep `thenNat` \ reg ->
4513 code = registerCode register reg
4514 src = registerName register reg
4516 code__2 dst = code . mkSeqInstrs [
4518 LD TF dst (spRel 0),
4521 return (Any F64 code__2)
4525 = getRegister x `thenNat` \ register ->
4526 getNewRegNat F64 `thenNat` \ tmp ->
4528 code = registerCode register tmp
4529 src = registerName register tmp
4531 code__2 dst = code . mkSeqInstrs [
4533 ST TF tmp (spRel 0),
4536 return (Any IntRep code__2)
4538 #endif /* alpha_TARGET_ARCH */
4540 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4542 #if i386_TARGET_ARCH
4544 coerceInt2FP from to x = do
4545 (x_reg, x_code) <- getSomeReg x
4547 opc = case to of F32 -> GITOF; F64 -> GITOD
4548 code dst = x_code `snocOL` opc x_reg dst
4549 -- ToDo: works for non-I32 reps?
4551 return (Any to code)
4555 coerceFP2Int from to x = do
4556 (x_reg, x_code) <- getSomeReg x
4558 opc = case from of F32 -> GFTOI; F64 -> GDTOI
4559 code dst = x_code `snocOL` opc x_reg dst
4560 -- ToDo: works for non-I32 reps?
4562 return (Any to code)
4564 #endif /* i386_TARGET_ARCH */
4566 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4568 #if x86_64_TARGET_ARCH
4570 coerceFP2Int from to x = do
4571 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
4573 opc = case from of F32 -> CVTSS2SI; F64 -> CVTSD2SI
4574 code dst = x_code `snocOL` opc x_op dst
4576 return (Any to code) -- works even if the destination rep is <I32
4578 coerceInt2FP from to x = do
4579 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
4581 opc = case to of F32 -> CVTSI2SS; F64 -> CVTSI2SD
4582 code dst = x_code `snocOL` opc x_op dst
4584 return (Any to code) -- works even if the destination rep is <I32
4586 coerceFP2FP :: MachRep -> CmmExpr -> NatM Register
4587 coerceFP2FP to x = do
4588 (x_reg, x_code) <- getSomeReg x
4590 opc = case to of F32 -> CVTSD2SS; F64 -> CVTSS2SD
4591 code dst = x_code `snocOL` opc x_reg dst
4593 return (Any to code)
4597 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4599 #if sparc_TARGET_ARCH
4601 coerceInt2FP pk1 pk2 x = do
4602 (src, code) <- getSomeReg x
4604 code__2 dst = code `appOL` toOL [
4605 ST pk1 src (spRel (-2)),
4606 LD pk1 (spRel (-2)) dst,
4607 FxTOy pk1 pk2 dst dst]
4608 return (Any pk2 code__2)
4611 coerceFP2Int pk fprep x = do
4612 (src, code) <- getSomeReg x
4613 reg <- getNewRegNat fprep
4614 tmp <- getNewRegNat pk
4616 code__2 dst = ASSERT(fprep == F64 || fprep == F32)
4618 FxTOy fprep pk src tmp,
4619 ST pk tmp (spRel (-2)),
4620 LD pk (spRel (-2)) dst]
4621 return (Any pk code__2)
4624 coerceDbl2Flt x = do
4625 (src, code) <- getSomeReg x
4626 return (Any F32 (\dst -> code `snocOL` FxTOy F64 F32 src dst))
4629 coerceFlt2Dbl x = do
4630 (src, code) <- getSomeReg x
4631 return (Any F64 (\dst -> code `snocOL` FxTOy F32 F64 src dst))
4633 #endif /* sparc_TARGET_ARCH */
4635 #if powerpc_TARGET_ARCH
4636 coerceInt2FP fromRep toRep x = do
4637 (src, code) <- getSomeReg x
4638 lbl <- getNewLabelNat
4639 itmp <- getNewRegNat I32
4640 ftmp <- getNewRegNat F64
4641 dynRef <- cmmMakeDynamicReference addImportNat False lbl
4642 Amode addr addr_code <- getAmode dynRef
4644 code' dst = code `appOL` maybe_exts `appOL` toOL [
4647 CmmStaticLit (CmmInt 0x43300000 I32),
4648 CmmStaticLit (CmmInt 0x80000000 I32)],
4649 XORIS itmp src (ImmInt 0x8000),
4650 ST I32 itmp (spRel 3),
4651 LIS itmp (ImmInt 0x4330),
4652 ST I32 itmp (spRel 2),
4653 LD F64 ftmp (spRel 2)
4654 ] `appOL` addr_code `appOL` toOL [
4656 FSUB F64 dst ftmp dst
4657 ] `appOL` maybe_frsp dst
4659 maybe_exts = case fromRep of
4660 I8 -> unitOL $ EXTS I8 src src
4661 I16 -> unitOL $ EXTS I16 src src
4663 maybe_frsp dst = case toRep of
4664 F32 -> unitOL $ FRSP dst dst
4666 return (Any toRep code')
4668 coerceFP2Int fromRep toRep x = do
4669 -- the reps don't really matter: F*->F64 and I32->I* are no-ops
4670 (src, code) <- getSomeReg x
4671 tmp <- getNewRegNat F64
4673 code' dst = code `appOL` toOL [
4674 -- convert to int in FP reg
4676 -- store value (64bit) from FP to stack
4677 ST F64 tmp (spRel 2),
4678 -- read low word of value (high word is undefined)
4679 LD I32 dst (spRel 3)]
4680 return (Any toRep code')
4681 #endif /* powerpc_TARGET_ARCH */
4684 -- -----------------------------------------------------------------------------
4685 -- eXTRA_STK_ARGS_HERE
4687 -- We (allegedly) put the first six C-call arguments in registers;
4688 -- where do we start putting the rest of them?
4690 -- Moved from MachInstrs (SDM):
4692 #if alpha_TARGET_ARCH || sparc_TARGET_ARCH
4693 eXTRA_STK_ARGS_HERE :: Int
4695 = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???))