1 -----------------------------------------------------------------------------
3 -- Generating machine code (instruction selection)
5 -- (c) The University of Glasgow 1996-2004
7 -----------------------------------------------------------------------------
9 -- This is a big module, but, if you pay attention to
10 -- (a) the sectioning, (b) the type signatures, and
11 -- (c) the #if blah_TARGET_ARCH} things, the
12 -- structure should not be too overwhelming.
14 module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where
16 #include "HsVersions.h"
17 #include "nativeGen/NCG.h"
24 import PositionIndependentCode ( cmmMakeDynamicReference, initializePicBase )
25 import RegAllocInfo ( mkBranchInstr )
27 -- Our intermediate code:
28 import PprCmm ( pprExpr )
34 import StaticFlags ( opt_PIC )
35 import ForeignCall ( CCallConv(..) )
40 import FastTypes ( isFastTrue )
41 import Constants ( wORD_SIZE )
44 import Outputable ( assertPanic )
45 import TRACE ( trace )
48 import Control.Monad ( mapAndUnzipM )
49 import Maybe ( fromJust )
53 -- -----------------------------------------------------------------------------
54 -- Top-level of the instruction selector
56 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
57 -- They are really trees of insns to facilitate fast appending, where a
58 -- left-to-right traversal (pre-order?) yields the insns in the correct
61 type InstrBlock = OrdList Instr
63 cmmTopCodeGen :: CmmTop -> NatM [NatCmmTop]
64 cmmTopCodeGen (CmmProc info lab params blocks) = do
65 (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
66 picBaseMb <- getPicBaseMaybeNat
67 let proc = CmmProc info lab params (concat nat_blocks)
68 tops = proc : concat statics
70 Just picBase -> initializePicBase picBase tops
71 Nothing -> return tops
73 cmmTopCodeGen (CmmData sec dat) = do
74 return [CmmData sec dat] -- no translation, we just use CmmStatic
76 basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop])
77 basicBlockCodeGen (BasicBlock id stmts) = do
78 instrs <- stmtsToInstrs stmts
79 -- code generation may introduce new basic block boundaries, which
80 -- are indicated by the NEWBLOCK instruction. We must split up the
81 -- instruction stream into basic blocks again. Also, we extract
84 (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
86 mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
87 = ([], BasicBlock id instrs : blocks, statics)
88 mkBlocks (LDATA sec dat) (instrs,blocks,statics)
89 = (instrs, blocks, CmmData sec dat:statics)
90 mkBlocks instr (instrs,blocks,statics)
91 = (instr:instrs, blocks, statics)
93 return (BasicBlock id top : other_blocks, statics)
95 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
97 = do instrss <- mapM stmtToInstrs stmts
98 return (concatOL instrss)
100 stmtToInstrs :: CmmStmt -> NatM InstrBlock
101 stmtToInstrs stmt = case stmt of
102 CmmNop -> return nilOL
103 CmmComment s -> return (unitOL (COMMENT s))
106 | isFloatingRep kind -> assignReg_FltCode kind reg src
107 #if WORD_SIZE_IN_BITS==32
108 | kind == I64 -> assignReg_I64Code reg src
110 | otherwise -> assignReg_IntCode kind reg src
111 where kind = cmmRegRep reg
114 | isFloatingRep kind -> assignMem_FltCode kind addr src
115 #if WORD_SIZE_IN_BITS==32
116 | kind == I64 -> assignMem_I64Code addr src
118 | otherwise -> assignMem_IntCode kind addr src
119 where kind = cmmExprRep src
121 CmmCall target result_regs args vols
122 -> genCCall target result_regs args vols
124 CmmBranch id -> genBranch id
125 CmmCondBranch arg id -> genCondJump id arg
126 CmmSwitch arg ids -> genSwitch arg ids
127 CmmJump arg params -> genJump arg
129 -- -----------------------------------------------------------------------------
130 -- General things for putting together code sequences
132 -- Expand CmmRegOff. ToDo: should we do it this way around, or convert
133 -- CmmExprs into CmmRegOff?
134 mangleIndexTree :: CmmExpr -> CmmExpr
135 mangleIndexTree (CmmRegOff reg off)
136 = CmmMachOp (MO_Add rep) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) rep)]
137 where rep = cmmRegRep reg
139 -- -----------------------------------------------------------------------------
140 -- Code gen for 64-bit arithmetic on 32-bit platforms
143 Simple support for generating 64-bit code (ie, 64 bit values and 64
144 bit assignments) on 32-bit platforms. Unlike the main code generator
145 we merely shoot for generating working code as simply as possible, and
146 pay little attention to code quality. Specifically, there is no
147 attempt to deal cleverly with the fixed-vs-floating register
148 distinction; all values are generated into (pairs of) floating
149 registers, even if this would mean some redundant reg-reg moves as a
150 result. Only one of the VRegUniques is returned, since it will be
151 of the VRegUniqueLo form, and the upper-half VReg can be determined
152 by applying getHiVRegFromLo to it.
155 data ChildCode64 -- a.k.a "Register64"
158 Reg -- the lower 32-bit temporary which contains the
159 -- result; use getHiVRegFromLo to find the other
160 -- VRegUnique. Rules of this simplified insn
161 -- selection game are therefore that the returned
162 -- Reg may be modified
164 #if WORD_SIZE_IN_BITS==32
165 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
166 assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
169 #ifndef x86_64_TARGET_ARCH
170 iselExpr64 :: CmmExpr -> NatM ChildCode64
173 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
177 assignMem_I64Code addrTree valueTree = do
178 Amode addr addr_code <- getAmode addrTree
179 ChildCode64 vcode rlo <- iselExpr64 valueTree
181 rhi = getHiVRegFromLo rlo
183 -- Little-endian store
184 mov_lo = MOV I32 (OpReg rlo) (OpAddr addr)
185 mov_hi = MOV I32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
187 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
190 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
191 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
193 r_dst_lo = mkVReg u_dst I32
194 r_dst_hi = getHiVRegFromLo r_dst_lo
195 r_src_hi = getHiVRegFromLo r_src_lo
196 mov_lo = MOV I32 (OpReg r_src_lo) (OpReg r_dst_lo)
197 mov_hi = MOV I32 (OpReg r_src_hi) (OpReg r_dst_hi)
200 vcode `snocOL` mov_lo `snocOL` mov_hi
203 assignReg_I64Code lvalue valueTree
204 = panic "assignReg_I64Code(i386): invalid lvalue"
208 iselExpr64 (CmmLit (CmmInt i _)) = do
209 (rlo,rhi) <- getNewRegPairNat I32
211 r = fromIntegral (fromIntegral i :: Word32)
212 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
214 MOV I32 (OpImm (ImmInteger r)) (OpReg rlo),
215 MOV I32 (OpImm (ImmInteger q)) (OpReg rhi)
218 return (ChildCode64 code rlo)
220 iselExpr64 (CmmLoad addrTree I64) = do
221 Amode addr addr_code <- getAmode addrTree
222 (rlo,rhi) <- getNewRegPairNat I32
224 mov_lo = MOV I32 (OpAddr addr) (OpReg rlo)
225 mov_hi = MOV I32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
228 ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
232 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64)))
233 = return (ChildCode64 nilOL (mkVReg vu I32))
235 -- we handle addition, but rather badly
236 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
237 ChildCode64 code1 r1lo <- iselExpr64 e1
238 (rlo,rhi) <- getNewRegPairNat I32
240 r = fromIntegral (fromIntegral i :: Word32)
241 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
242 r1hi = getHiVRegFromLo r1lo
244 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
245 ADD I32 (OpImm (ImmInteger r)) (OpReg rlo),
246 MOV I32 (OpReg r1hi) (OpReg rhi),
247 ADC I32 (OpImm (ImmInteger q)) (OpReg rhi) ]
249 return (ChildCode64 code rlo)
251 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
252 ChildCode64 code1 r1lo <- iselExpr64 e1
253 ChildCode64 code2 r2lo <- iselExpr64 e2
254 (rlo,rhi) <- getNewRegPairNat I32
256 r1hi = getHiVRegFromLo r1lo
257 r2hi = getHiVRegFromLo r2lo
260 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
261 ADD I32 (OpReg r2lo) (OpReg rlo),
262 MOV I32 (OpReg r1hi) (OpReg rhi),
263 ADC I32 (OpReg r2hi) (OpReg rhi) ]
265 return (ChildCode64 code rlo)
268 = pprPanic "iselExpr64(i386)" (ppr expr)
270 #endif /* i386_TARGET_ARCH */
272 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
274 #if sparc_TARGET_ARCH
276 assignMem_I64Code addrTree valueTree = do
277 Amode addr addr_code <- getAmode addrTree
278 ChildCode64 vcode rlo <- iselExpr64 valueTree
279 (src, code) <- getSomeReg addrTree
281 rhi = getHiVRegFromLo rlo
283 mov_hi = ST I32 rhi (AddrRegImm src (ImmInt 0))
284 mov_lo = ST I32 rlo (AddrRegImm src (ImmInt 4))
285 return (vcode `appOL` code `snocOL` mov_hi `snocOL` mov_lo)
287 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
288 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
290 r_dst_lo = mkVReg u_dst pk
291 r_dst_hi = getHiVRegFromLo r_dst_lo
292 r_src_hi = getHiVRegFromLo r_src_lo
293 mov_lo = mkMOV r_src_lo r_dst_lo
294 mov_hi = mkMOV r_src_hi r_dst_hi
295 mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
296 return (vcode `snocOL` mov_hi `snocOL` mov_lo)
297 assignReg_I64Code lvalue valueTree
298 = panic "assignReg_I64Code(sparc): invalid lvalue"
301 -- Don't delete this -- it's very handy for debugging.
303 -- | trace ("iselExpr64: " ++ showSDoc (ppr expr)) False
304 -- = panic "iselExpr64(???)"
306 iselExpr64 (CmmLoad addrTree I64) = do
307 Amode (AddrRegReg r1 r2) addr_code <- getAmode addrTree
308 rlo <- getNewRegNat I32
309 let rhi = getHiVRegFromLo rlo
310 mov_hi = LD I32 (AddrRegImm r1 (ImmInt 0)) rhi
311 mov_lo = LD I32 (AddrRegImm r1 (ImmInt 4)) rlo
313 ChildCode64 (addr_code `snocOL` mov_hi `snocOL` mov_lo)
317 iselExpr64 (CmmReg (CmmLocal (LocalReg uq I64))) = do
318 r_dst_lo <- getNewRegNat I32
319 let r_dst_hi = getHiVRegFromLo r_dst_lo
320 r_src_lo = mkVReg uq I32
321 r_src_hi = getHiVRegFromLo r_src_lo
322 mov_lo = mkMOV r_src_lo r_dst_lo
323 mov_hi = mkMOV r_src_hi r_dst_hi
324 mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
326 ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo
330 = pprPanic "iselExpr64(sparc)" (ppr expr)
332 #endif /* sparc_TARGET_ARCH */
334 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
336 #if powerpc_TARGET_ARCH
338 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
339 getI64Amodes addrTree = do
340 Amode hi_addr addr_code <- getAmode addrTree
341 case addrOffset hi_addr 4 of
342 Just lo_addr -> return (hi_addr, lo_addr, addr_code)
343 Nothing -> do (hi_ptr, code) <- getSomeReg addrTree
344 return (AddrRegImm hi_ptr (ImmInt 0),
345 AddrRegImm hi_ptr (ImmInt 4),
348 assignMem_I64Code addrTree valueTree = do
349 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
350 ChildCode64 vcode rlo <- iselExpr64 valueTree
352 rhi = getHiVRegFromLo rlo
355 mov_hi = ST I32 rhi hi_addr
356 mov_lo = ST I32 rlo lo_addr
358 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
360 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
361 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
363 r_dst_lo = mkVReg u_dst I32
364 r_dst_hi = getHiVRegFromLo r_dst_lo
365 r_src_hi = getHiVRegFromLo r_src_lo
366 mov_lo = MR r_dst_lo r_src_lo
367 mov_hi = MR r_dst_hi r_src_hi
370 vcode `snocOL` mov_lo `snocOL` mov_hi
373 assignReg_I64Code lvalue valueTree
374 = panic "assignReg_I64Code(powerpc): invalid lvalue"
377 -- Don't delete this -- it's very handy for debugging.
379 -- | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
380 -- = panic "iselExpr64(???)"
382 iselExpr64 (CmmLoad addrTree I64) = do
383 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
384 (rlo, rhi) <- getNewRegPairNat I32
385 let mov_hi = LD I32 rhi hi_addr
386 mov_lo = LD I32 rlo lo_addr
387 return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
390 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64)))
391 = return (ChildCode64 nilOL (mkVReg vu I32))
393 iselExpr64 (CmmLit (CmmInt i _)) = do
394 (rlo,rhi) <- getNewRegPairNat I32
396 half0 = fromIntegral (fromIntegral i :: Word16)
397 half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
398 half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
399 half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
402 LIS rlo (ImmInt half1),
403 OR rlo rlo (RIImm $ ImmInt half0),
404 LIS rhi (ImmInt half3),
405 OR rlo rlo (RIImm $ ImmInt half2)
408 return (ChildCode64 code rlo)
410 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
411 ChildCode64 code1 r1lo <- iselExpr64 e1
412 ChildCode64 code2 r2lo <- iselExpr64 e2
413 (rlo,rhi) <- getNewRegPairNat I32
415 r1hi = getHiVRegFromLo r1lo
416 r2hi = getHiVRegFromLo r2lo
419 toOL [ ADDC rlo r1lo r2lo,
422 return (ChildCode64 code rlo)
425 = pprPanic "iselExpr64(powerpc)" (ppr expr)
427 #endif /* powerpc_TARGET_ARCH */
430 -- -----------------------------------------------------------------------------
431 -- The 'Register' type
433 -- 'Register's passed up the tree. If the stix code forces the register
434 -- to live in a pre-decided machine register, it comes out as @Fixed@;
435 -- otherwise, it comes out as @Any@, and the parent can decide which
436 -- register to put it in.
439 = Fixed MachRep Reg InstrBlock
440 | Any MachRep (Reg -> InstrBlock)
442 swizzleRegisterRep :: Register -> MachRep -> Register
443 swizzleRegisterRep (Fixed _ reg code) rep = Fixed rep reg code
444 swizzleRegisterRep (Any _ codefn) rep = Any rep codefn
447 -- -----------------------------------------------------------------------------
448 -- Utils based on getRegister, below
450 -- The dual to getAnyReg: compute an expression into a register, but
451 -- we don't mind which one it is.
452 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
454 r <- getRegister expr
457 tmp <- getNewRegNat rep
458 return (tmp, code tmp)
462 -- -----------------------------------------------------------------------------
463 -- Grab the Reg for a CmmReg
465 getRegisterReg :: CmmReg -> Reg
467 getRegisterReg (CmmLocal (LocalReg u pk))
470 getRegisterReg (CmmGlobal mid)
471 = case get_GlobalReg_reg_or_addr mid of
472 Left (RealReg rrno) -> RealReg rrno
473 _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
474 -- By this stage, the only MagicIds remaining should be the
475 -- ones which map to a real machine register on this
476 -- platform. Hence ...
479 -- -----------------------------------------------------------------------------
480 -- Generate code to get a subtree into a Register
482 -- Don't delete this -- it's very handy for debugging.
484 -- | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False
485 -- = panic "getRegister(???)"
487 getRegister :: CmmExpr -> NatM Register
489 getRegister (CmmReg (CmmGlobal PicBaseReg))
491 reg <- getPicBaseNat wordRep
492 return (Fixed wordRep reg nilOL)
494 getRegister (CmmReg reg)
495 = return (Fixed (cmmRegRep reg) (getRegisterReg reg) nilOL)
497 getRegister tree@(CmmRegOff _ _)
498 = getRegister (mangleIndexTree tree)
500 -- end of machine-"independent" bit; here we go on the rest...
502 #if alpha_TARGET_ARCH
504 getRegister (StDouble d)
505 = getBlockIdNat `thenNat` \ lbl ->
506 getNewRegNat PtrRep `thenNat` \ tmp ->
507 let code dst = mkSeqInstrs [
508 LDATA RoDataSegment lbl [
509 DATA TF [ImmLab (rational d)]
511 LDA tmp (AddrImm (ImmCLbl lbl)),
512 LD TF dst (AddrReg tmp)]
514 return (Any F64 code)
516 getRegister (StPrim primop [x]) -- unary PrimOps
518 IntNegOp -> trivialUCode (NEG Q False) x
520 NotOp -> trivialUCode NOT x
522 FloatNegOp -> trivialUFCode FloatRep (FNEG TF) x
523 DoubleNegOp -> trivialUFCode F64 (FNEG TF) x
525 OrdOp -> coerceIntCode IntRep x
528 Float2IntOp -> coerceFP2Int x
529 Int2FloatOp -> coerceInt2FP pr x
530 Double2IntOp -> coerceFP2Int x
531 Int2DoubleOp -> coerceInt2FP pr x
533 Double2FloatOp -> coerceFltCode x
534 Float2DoubleOp -> coerceFltCode x
536 other_op -> getRegister (StCall fn CCallConv F64 [x])
538 fn = case other_op of
539 FloatExpOp -> FSLIT("exp")
540 FloatLogOp -> FSLIT("log")
541 FloatSqrtOp -> FSLIT("sqrt")
542 FloatSinOp -> FSLIT("sin")
543 FloatCosOp -> FSLIT("cos")
544 FloatTanOp -> FSLIT("tan")
545 FloatAsinOp -> FSLIT("asin")
546 FloatAcosOp -> FSLIT("acos")
547 FloatAtanOp -> FSLIT("atan")
548 FloatSinhOp -> FSLIT("sinh")
549 FloatCoshOp -> FSLIT("cosh")
550 FloatTanhOp -> FSLIT("tanh")
551 DoubleExpOp -> FSLIT("exp")
552 DoubleLogOp -> FSLIT("log")
553 DoubleSqrtOp -> FSLIT("sqrt")
554 DoubleSinOp -> FSLIT("sin")
555 DoubleCosOp -> FSLIT("cos")
556 DoubleTanOp -> FSLIT("tan")
557 DoubleAsinOp -> FSLIT("asin")
558 DoubleAcosOp -> FSLIT("acos")
559 DoubleAtanOp -> FSLIT("atan")
560 DoubleSinhOp -> FSLIT("sinh")
561 DoubleCoshOp -> FSLIT("cosh")
562 DoubleTanhOp -> FSLIT("tanh")
564 pr = panic "MachCode.getRegister: no primrep needed for Alpha"
566 getRegister (StPrim primop [x, y]) -- dyadic PrimOps
568 CharGtOp -> trivialCode (CMP LTT) y x
569 CharGeOp -> trivialCode (CMP LE) y x
570 CharEqOp -> trivialCode (CMP EQQ) x y
571 CharNeOp -> int_NE_code x y
572 CharLtOp -> trivialCode (CMP LTT) x y
573 CharLeOp -> trivialCode (CMP LE) x y
575 IntGtOp -> trivialCode (CMP LTT) y x
576 IntGeOp -> trivialCode (CMP LE) y x
577 IntEqOp -> trivialCode (CMP EQQ) x y
578 IntNeOp -> int_NE_code x y
579 IntLtOp -> trivialCode (CMP LTT) x y
580 IntLeOp -> trivialCode (CMP LE) x y
582 WordGtOp -> trivialCode (CMP ULT) y x
583 WordGeOp -> trivialCode (CMP ULE) x y
584 WordEqOp -> trivialCode (CMP EQQ) x y
585 WordNeOp -> int_NE_code x y
586 WordLtOp -> trivialCode (CMP ULT) x y
587 WordLeOp -> trivialCode (CMP ULE) x y
589 AddrGtOp -> trivialCode (CMP ULT) y x
590 AddrGeOp -> trivialCode (CMP ULE) y x
591 AddrEqOp -> trivialCode (CMP EQQ) x y
592 AddrNeOp -> int_NE_code x y
593 AddrLtOp -> trivialCode (CMP ULT) x y
594 AddrLeOp -> trivialCode (CMP ULE) x y
596 FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
597 FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
598 FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
599 FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
600 FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
601 FloatLeOp -> cmpF_code (FCMP TF LE) NE x y
603 DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
604 DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
605 DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
606 DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
607 DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
608 DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y
610 IntAddOp -> trivialCode (ADD Q False) x y
611 IntSubOp -> trivialCode (SUB Q False) x y
612 IntMulOp -> trivialCode (MUL Q False) x y
613 IntQuotOp -> trivialCode (DIV Q False) x y
614 IntRemOp -> trivialCode (REM Q False) x y
616 WordAddOp -> trivialCode (ADD Q False) x y
617 WordSubOp -> trivialCode (SUB Q False) x y
618 WordMulOp -> trivialCode (MUL Q False) x y
619 WordQuotOp -> trivialCode (DIV Q True) x y
620 WordRemOp -> trivialCode (REM Q True) x y
622 FloatAddOp -> trivialFCode FloatRep (FADD TF) x y
623 FloatSubOp -> trivialFCode FloatRep (FSUB TF) x y
624 FloatMulOp -> trivialFCode FloatRep (FMUL TF) x y
625 FloatDivOp -> trivialFCode FloatRep (FDIV TF) x y
627 DoubleAddOp -> trivialFCode F64 (FADD TF) x y
628 DoubleSubOp -> trivialFCode F64 (FSUB TF) x y
629 DoubleMulOp -> trivialFCode F64 (FMUL TF) x y
630 DoubleDivOp -> trivialFCode F64 (FDIV TF) x y
632 AddrAddOp -> trivialCode (ADD Q False) x y
633 AddrSubOp -> trivialCode (SUB Q False) x y
634 AddrRemOp -> trivialCode (REM Q True) x y
636 AndOp -> trivialCode AND x y
637 OrOp -> trivialCode OR x y
638 XorOp -> trivialCode XOR x y
639 SllOp -> trivialCode SLL x y
640 SrlOp -> trivialCode SRL x y
642 ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
643 ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
644 ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"
646 FloatPowerOp -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
647 DoublePowerOp -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
649 {- ------------------------------------------------------------
650 Some bizarre special code for getting condition codes into
651 registers. Integer non-equality is a test for equality
652 followed by an XOR with 1. (Integer comparisons always set
653 the result register to 0 or 1.) Floating point comparisons of
654 any kind leave the result in a floating point register, so we
655 need to wrangle an integer register out of things.
657 int_NE_code :: StixTree -> StixTree -> NatM Register
660 = trivialCode (CMP EQQ) x y `thenNat` \ register ->
661 getNewRegNat IntRep `thenNat` \ tmp ->
663 code = registerCode register tmp
664 src = registerName register tmp
665 code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
667 return (Any IntRep code__2)
669 {- ------------------------------------------------------------
670 Comments for int_NE_code also apply to cmpF_code
673 :: (Reg -> Reg -> Reg -> Instr)
675 -> StixTree -> StixTree
678 cmpF_code instr cond x y
679 = trivialFCode pr instr x y `thenNat` \ register ->
680 getNewRegNat F64 `thenNat` \ tmp ->
681 getBlockIdNat `thenNat` \ lbl ->
683 code = registerCode register tmp
684 result = registerName register tmp
686 code__2 dst = code . mkSeqInstrs [
687 OR zeroh (RIImm (ImmInt 1)) dst,
688 BF cond result (ImmCLbl lbl),
689 OR zeroh (RIReg zeroh) dst,
692 return (Any IntRep code__2)
694 pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
695 ------------------------------------------------------------
697 getRegister (CmmLoad pk mem)
698 = getAmode mem `thenNat` \ amode ->
700 code = amodeCode amode
701 src = amodeAddr amode
702 size = primRepToSize pk
703 code__2 dst = code . mkSeqInstr (LD size dst src)
705 return (Any pk code__2)
707 getRegister (StInt i)
710 code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
712 return (Any IntRep code)
715 code dst = mkSeqInstr (LDI Q dst src)
717 return (Any IntRep code)
719 src = ImmInt (fromInteger i)
724 code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
726 return (Any PtrRep code)
729 imm__2 = case imm of Just x -> x
731 #endif /* alpha_TARGET_ARCH */
733 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
737 getRegister (CmmLit (CmmFloat f F32)) = do
738 lbl <- getNewLabelNat
739 dynRef <- cmmMakeDynamicReference addImportNat False lbl
740 Amode addr addr_code <- getAmode dynRef
744 CmmStaticLit (CmmFloat f F32)]
745 `consOL` (addr_code `snocOL`
748 return (Any F32 code)
751 getRegister (CmmLit (CmmFloat d F64))
753 = let code dst = unitOL (GLDZ dst)
754 in return (Any F64 code)
757 = let code dst = unitOL (GLD1 dst)
758 in return (Any F64 code)
761 lbl <- getNewLabelNat
762 dynRef <- cmmMakeDynamicReference addImportNat False lbl
763 Amode addr addr_code <- getAmode dynRef
767 CmmStaticLit (CmmFloat d F64)]
768 `consOL` (addr_code `snocOL`
771 return (Any F64 code)
773 #endif /* i386_TARGET_ARCH */
775 #if x86_64_TARGET_ARCH
777 getRegister (CmmLit (CmmFloat 0.0 rep)) = do
778 let code dst = unitOL (XOR rep (OpReg dst) (OpReg dst))
779 -- I don't know why there are xorpd, xorps, and pxor instructions.
780 -- They all appear to do the same thing --SDM
781 return (Any rep code)
783 getRegister (CmmLit (CmmFloat f rep)) = do
784 lbl <- getNewLabelNat
785 let code dst = toOL [
788 CmmStaticLit (CmmFloat f rep)],
789 MOV rep (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
792 return (Any rep code)
794 #endif /* x86_64_TARGET_ARCH */
796 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
798 -- catch simple cases of zero- or sign-extended load
799 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad addr _]) = do
800 code <- intLoadCode (MOVZxL I8) addr
801 return (Any I32 code)
803 getRegister (CmmMachOp (MO_S_Conv I8 I32) [CmmLoad addr _]) = do
804 code <- intLoadCode (MOVSxL I8) addr
805 return (Any I32 code)
807 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad addr _]) = do
808 code <- intLoadCode (MOVZxL I16) addr
809 return (Any I32 code)
811 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad addr _]) = do
812 code <- intLoadCode (MOVSxL I16) addr
813 return (Any I32 code)
817 #if x86_64_TARGET_ARCH
819 -- catch simple cases of zero- or sign-extended load
820 getRegister (CmmMachOp (MO_U_Conv I8 I64) [CmmLoad addr _]) = do
821 code <- intLoadCode (MOVZxL I8) addr
822 return (Any I64 code)
824 getRegister (CmmMachOp (MO_S_Conv I8 I64) [CmmLoad addr _]) = do
825 code <- intLoadCode (MOVSxL I8) addr
826 return (Any I64 code)
828 getRegister (CmmMachOp (MO_U_Conv I16 I64) [CmmLoad addr _]) = do
829 code <- intLoadCode (MOVZxL I16) addr
830 return (Any I64 code)
832 getRegister (CmmMachOp (MO_S_Conv I16 I64) [CmmLoad addr _]) = do
833 code <- intLoadCode (MOVSxL I16) addr
834 return (Any I64 code)
836 getRegister (CmmMachOp (MO_U_Conv I32 I64) [CmmLoad addr _]) = do
837 code <- intLoadCode (MOV I32) addr -- 32-bit loads zero-extend
838 return (Any I64 code)
840 getRegister (CmmMachOp (MO_S_Conv I32 I64) [CmmLoad addr _]) = do
841 code <- intLoadCode (MOVSxL I32) addr
842 return (Any I64 code)
846 #if x86_64_TARGET_ARCH
847 getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do
848 x_code <- getAnyReg x
849 lbl <- getNewLabelNat
851 code dst = x_code dst `appOL` toOL [
852 -- This is how gcc does it, so it can't be that bad:
853 LDATA ReadOnlyData16 [
856 CmmStaticLit (CmmInt 0x80000000 I32),
857 CmmStaticLit (CmmInt 0 I32),
858 CmmStaticLit (CmmInt 0 I32),
859 CmmStaticLit (CmmInt 0 I32)
861 XOR F32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
862 -- xorps, so we need the 128-bit constant
863 -- ToDo: rip-relative
866 return (Any F32 code)
868 getRegister (CmmMachOp (MO_S_Neg F64) [x]) = do
869 x_code <- getAnyReg x
870 lbl <- getNewLabelNat
872 -- This is how gcc does it, so it can't be that bad:
873 code dst = x_code dst `appOL` toOL [
874 LDATA ReadOnlyData16 [
877 CmmStaticLit (CmmInt 0x8000000000000000 I64),
878 CmmStaticLit (CmmInt 0 I64)
880 -- gcc puts an unpck here. Wonder if we need it.
881 XOR F64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
882 -- xorpd, so we need the 128-bit constant
885 return (Any F64 code)
888 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
890 getRegister (CmmMachOp mop [x]) -- unary MachOps
893 MO_S_Neg F32 -> trivialUFCode F32 (GNEG F32) x
894 MO_S_Neg F64 -> trivialUFCode F64 (GNEG F64) x
897 MO_S_Neg rep -> trivialUCode rep (NEGI rep) x
898 MO_Not rep -> trivialUCode rep (NOT rep) x
901 MO_U_Conv I32 I8 -> toI8Reg I32 x
902 MO_S_Conv I32 I8 -> toI8Reg I32 x
903 MO_U_Conv I16 I8 -> toI8Reg I16 x
904 MO_S_Conv I16 I8 -> toI8Reg I16 x
905 MO_U_Conv I32 I16 -> toI16Reg I32 x
906 MO_S_Conv I32 I16 -> toI16Reg I32 x
907 #if x86_64_TARGET_ARCH
908 MO_U_Conv I64 I32 -> conversionNop I64 x
909 MO_S_Conv I64 I32 -> conversionNop I64 x
910 MO_U_Conv I64 I16 -> toI16Reg I64 x
911 MO_S_Conv I64 I16 -> toI16Reg I64 x
912 MO_U_Conv I64 I8 -> toI8Reg I64 x
913 MO_S_Conv I64 I8 -> toI8Reg I64 x
916 MO_U_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
917 MO_S_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
920 MO_U_Conv I8 I32 -> integerExtend I8 I32 MOVZxL x
921 MO_U_Conv I16 I32 -> integerExtend I16 I32 MOVZxL x
922 MO_U_Conv I8 I16 -> integerExtend I8 I16 MOVZxL x
924 MO_S_Conv I8 I32 -> integerExtend I8 I32 MOVSxL x
925 MO_S_Conv I16 I32 -> integerExtend I16 I32 MOVSxL x
926 MO_S_Conv I8 I16 -> integerExtend I8 I16 MOVSxL x
928 #if x86_64_TARGET_ARCH
929 MO_U_Conv I8 I64 -> integerExtend I8 I64 MOVZxL x
930 MO_U_Conv I16 I64 -> integerExtend I16 I64 MOVZxL x
931 MO_U_Conv I32 I64 -> integerExtend I32 I64 MOVZxL x
932 MO_S_Conv I8 I64 -> integerExtend I8 I64 MOVSxL x
933 MO_S_Conv I16 I64 -> integerExtend I16 I64 MOVSxL x
934 MO_S_Conv I32 I64 -> integerExtend I32 I64 MOVSxL x
935 -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
936 -- However, we don't want the register allocator to throw it
937 -- away as an unnecessary reg-to-reg move, so we keep it in
938 -- the form of a movzl and print it as a movl later.
942 MO_S_Conv F32 F64 -> conversionNop F64 x
943 MO_S_Conv F64 F32 -> conversionNop F32 x
945 MO_S_Conv F32 F64 -> coerceFP2FP F64 x
946 MO_S_Conv F64 F32 -> coerceFP2FP F32 x
950 | isFloatingRep from -> coerceFP2Int from to x
951 | isFloatingRep to -> coerceInt2FP from to x
953 other -> pprPanic "getRegister" (pprMachOp mop)
955 -- signed or unsigned extension.
956 integerExtend from to instr expr = do
957 (reg,e_code) <- if from == I8 then getByteReg expr
962 instr from (OpReg reg) (OpReg dst)
966 = do codefn <- getAnyReg expr
967 return (Any new_rep codefn)
968 -- HACK: use getAnyReg to get a byte-addressable register.
969 -- If the source was a Fixed register, this will add the
970 -- mov instruction to put it into the desired destination.
971 -- We're assuming that the destination won't be a fixed
972 -- non-byte-addressable register; it won't be, because all
973 -- fixed registers are word-sized.
975 toI16Reg = toI8Reg -- for now
977 conversionNop new_rep expr
978 = do e_code <- getRegister expr
979 return (swizzleRegisterRep e_code new_rep)
982 getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
983 = ASSERT2(cmmExprRep x /= I8, pprExpr e)
985 MO_Eq F32 -> condFltReg EQQ x y
986 MO_Ne F32 -> condFltReg NE x y
987 MO_S_Gt F32 -> condFltReg GTT x y
988 MO_S_Ge F32 -> condFltReg GE x y
989 MO_S_Lt F32 -> condFltReg LTT x y
990 MO_S_Le F32 -> condFltReg LE x y
992 MO_Eq F64 -> condFltReg EQQ x y
993 MO_Ne F64 -> condFltReg NE x y
994 MO_S_Gt F64 -> condFltReg GTT x y
995 MO_S_Ge F64 -> condFltReg GE x y
996 MO_S_Lt F64 -> condFltReg LTT x y
997 MO_S_Le F64 -> condFltReg LE x y
999 MO_Eq rep -> condIntReg EQQ x y
1000 MO_Ne rep -> condIntReg NE x y
1002 MO_S_Gt rep -> condIntReg GTT x y
1003 MO_S_Ge rep -> condIntReg GE x y
1004 MO_S_Lt rep -> condIntReg LTT x y
1005 MO_S_Le rep -> condIntReg LE x y
1007 MO_U_Gt rep -> condIntReg GU x y
1008 MO_U_Ge rep -> condIntReg GEU x y
1009 MO_U_Lt rep -> condIntReg LU x y
1010 MO_U_Le rep -> condIntReg LEU x y
1012 #if i386_TARGET_ARCH
1013 MO_Add F32 -> trivialFCode F32 GADD x y
1014 MO_Sub F32 -> trivialFCode F32 GSUB x y
1016 MO_Add F64 -> trivialFCode F64 GADD x y
1017 MO_Sub F64 -> trivialFCode F64 GSUB x y
1019 MO_S_Quot F32 -> trivialFCode F32 GDIV x y
1020 MO_S_Quot F64 -> trivialFCode F64 GDIV x y
1023 #if x86_64_TARGET_ARCH
1024 MO_Add F32 -> trivialFCode F32 ADD x y
1025 MO_Sub F32 -> trivialFCode F32 SUB x y
1027 MO_Add F64 -> trivialFCode F64 ADD x y
1028 MO_Sub F64 -> trivialFCode F64 SUB x y
1030 MO_S_Quot F32 -> trivialFCode F32 FDIV x y
1031 MO_S_Quot F64 -> trivialFCode F64 FDIV x y
1034 MO_Add rep -> add_code rep x y
1035 MO_Sub rep -> sub_code rep x y
1037 MO_S_Quot rep -> div_code rep True True x y
1038 MO_S_Rem rep -> div_code rep True False x y
1039 MO_U_Quot rep -> div_code rep False True x y
1040 MO_U_Rem rep -> div_code rep False False x y
1042 #if i386_TARGET_ARCH
1043 MO_Mul F32 -> trivialFCode F32 GMUL x y
1044 MO_Mul F64 -> trivialFCode F64 GMUL x y
1047 #if x86_64_TARGET_ARCH
1048 MO_Mul F32 -> trivialFCode F32 MUL x y
1049 MO_Mul F64 -> trivialFCode F64 MUL x y
1052 MO_Mul rep -> let op = IMUL rep in
1053 trivialCode rep op (Just op) x y
1055 MO_S_MulMayOflo rep -> imulMayOflo rep x y
1057 MO_And rep -> let op = AND rep in
1058 trivialCode rep op (Just op) x y
1059 MO_Or rep -> let op = OR rep in
1060 trivialCode rep op (Just op) x y
1061 MO_Xor rep -> let op = XOR rep in
1062 trivialCode rep op (Just op) x y
1064 {- Shift ops on x86s have constraints on their source, it
1065 either has to be Imm, CL or 1
1066 => trivialCode is not restrictive enough (sigh.)
1068 MO_Shl rep -> shift_code rep (SHL rep) x y {-False-}
1069 MO_U_Shr rep -> shift_code rep (SHR rep) x y {-False-}
1070 MO_S_Shr rep -> shift_code rep (SAR rep) x y {-False-}
1072 other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
1074 --------------------
1075 imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1076 imulMayOflo rep a b = do
1077 (a_reg, a_code) <- getNonClobberedReg a
1078 b_code <- getAnyReg b
1080 shift_amt = case rep of
1083 _ -> panic "shift_amt"
1085 code = a_code `appOL` b_code eax `appOL`
1087 IMUL2 rep (OpReg a_reg), -- result in %edx:%eax
1088 SAR rep (OpImm (ImmInt shift_amt)) (OpReg eax),
1089 -- sign extend lower part
1090 SUB rep (OpReg edx) (OpReg eax)
1091 -- compare against upper
1092 -- eax==0 if high part == sign extended low part
1095 return (Fixed rep eax code)
1097 --------------------
1098 shift_code :: MachRep
1099 -> (Operand -> Operand -> Instr)
1104 {- Case1: shift length as immediate -}
1105 shift_code rep instr x y@(CmmLit lit) = do
1106 x_code <- getAnyReg x
1109 = x_code dst `snocOL`
1110 instr (OpImm (litToImm lit)) (OpReg dst)
1112 return (Any rep code)
1114 {- Case2: shift length is complex (non-immediate) -}
1115 shift_code rep instr x y{-amount-} = do
1116 (x_reg, x_code) <- getNonClobberedReg x
1117 y_code <- getAnyReg y
1119 code = x_code `appOL`
1121 instr (OpReg ecx) (OpReg x_reg)
1123 return (Fixed rep x_reg code)
1125 --------------------
1126 add_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1127 add_code rep x (CmmLit (CmmInt y _))
1128 | not (is64BitInteger y) = add_int rep x y
1129 add_code rep x y = trivialCode rep (ADD rep) (Just (ADD rep)) x y
1131 --------------------
1132 sub_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1133 sub_code rep x (CmmLit (CmmInt y _))
1134 | not (is64BitInteger (-y)) = add_int rep x (-y)
1135 sub_code rep x y = trivialCode rep (SUB rep) Nothing x y
1137 -- our three-operand add instruction:
1138 add_int rep x y = do
1139 (x_reg, x_code) <- getSomeReg x
1141 imm = ImmInt (fromInteger y)
1145 (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
1148 return (Any rep code)
1150 ----------------------
1151 div_code rep signed quotient x y = do
1152 (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
1153 x_code <- getAnyReg x
1155 widen | signed = CLTD rep
1156 | otherwise = XOR rep (OpReg edx) (OpReg edx)
1158 instr | signed = IDIV
1161 code = y_code `appOL`
1163 toOL [widen, instr rep y_op]
1165 result | quotient = eax
1169 return (Fixed rep result code)
1172 getRegister (CmmLoad mem pk)
1175 Amode src mem_code <- getAmode mem
1177 code dst = mem_code `snocOL`
1178 IF_ARCH_i386(GLD pk src dst,
1179 MOV pk (OpAddr src) (OpReg dst))
1181 return (Any pk code)
1183 #if i386_TARGET_ARCH
1184 getRegister (CmmLoad mem pk)
1187 code <- intLoadCode (instr pk) mem
1188 return (Any pk code)
1190 instr I8 = MOVZxL pk
1193 -- we always zero-extend 8-bit loads, if we
1194 -- can't think of anything better. This is because
1195 -- we can't guarantee access to an 8-bit variant of every register
1196 -- (esi and edi don't have 8-bit variants), so to make things
1197 -- simpler we do our 8-bit arithmetic with full 32-bit registers.
1200 #if x86_64_TARGET_ARCH
1201 -- Simpler memory load code on x86_64
1202 getRegister (CmmLoad mem pk)
1204 code <- intLoadCode (MOV pk) mem
1205 return (Any pk code)
1208 getRegister (CmmLit (CmmInt 0 rep))
1210 -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
1211 adj_rep = case rep of I64 -> I32; _ -> rep
1212 rep1 = IF_ARCH_i386( rep, adj_rep )
1214 = unitOL (XOR rep1 (OpReg dst) (OpReg dst))
1216 return (Any rep code)
1218 #if x86_64_TARGET_ARCH
1219 -- optimisation for loading small literals on x86_64: take advantage
1220 -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
1221 -- instruction forms are shorter.
1222 getRegister (CmmLit lit)
1223 | I64 <- cmmLitRep lit, not (isBigLit lit)
1226 code dst = unitOL (MOV I32 (OpImm imm) (OpReg dst))
1228 return (Any I64 code)
1230 isBigLit (CmmInt i I64) = i < 0 || i > 0xffffffff
1232 -- note1: not the same as is64BitLit, because that checks for
1233 -- signed literals that fit in 32 bits, but we want unsigned
1235 -- note2: all labels are small, because we're assuming the
1236 -- small memory model (see gcc docs, -mcmodel=small).
1239 getRegister (CmmLit lit)
1243 code dst = unitOL (MOV rep (OpImm imm) (OpReg dst))
1245 return (Any rep code)
1247 getRegister other = pprPanic "getRegister(x86)" (ppr other)
1250 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
1251 -> NatM (Reg -> InstrBlock)
1252 intLoadCode instr mem = do
1253 Amode src mem_code <- getAmode mem
1254 return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
1256 -- Compute an expression into *any* register, adding the appropriate
1257 -- move instruction if necessary.
1258 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
1260 r <- getRegister expr
1263 anyReg :: Register -> NatM (Reg -> InstrBlock)
1264 anyReg (Any _ code) = return code
1265 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
1267 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
1268 -- Fixed registers might not be byte-addressable, so we make sure we've
1269 -- got a temporary, inserting an extra reg copy if necessary.
1270 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
1271 #if x86_64_TARGET_ARCH
1272 getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
1274 getByteReg expr = do
1275 r <- getRegister expr
1278 tmp <- getNewRegNat rep
1279 return (tmp, code tmp)
1281 | isVirtualReg reg -> return (reg,code)
1283 tmp <- getNewRegNat rep
1284 return (tmp, code `snocOL` reg2reg rep reg tmp)
1285 -- ToDo: could optimise slightly by checking for byte-addressable
1286 -- real registers, but that will happen very rarely if at all.
1289 -- Another variant: this time we want the result in a register that cannot
1290 -- be modified by code to evaluate an arbitrary expression.
1291 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1292 getNonClobberedReg expr = do
1293 r <- getRegister expr
1296 tmp <- getNewRegNat rep
1297 return (tmp, code tmp)
1299 -- only free regs can be clobbered
1300 | RealReg rr <- reg, isFastTrue (freeReg rr) -> do
1301 tmp <- getNewRegNat rep
1302 return (tmp, code `snocOL` reg2reg rep reg tmp)
1306 reg2reg :: MachRep -> Reg -> Reg -> Instr
1308 #if i386_TARGET_ARCH
1309 | isFloatingRep rep = GMOV src dst
1311 | otherwise = MOV rep (OpReg src) (OpReg dst)
1313 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1315 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1317 #if sparc_TARGET_ARCH
1319 getRegister (CmmLit (CmmFloat f F32)) = do
1320 lbl <- getNewLabelNat
1321 let code dst = toOL [
1324 CmmStaticLit (CmmFloat f F32)],
1325 SETHI (HI (ImmCLbl lbl)) dst,
1326 LD F32 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1327 return (Any F32 code)
1329 getRegister (CmmLit (CmmFloat d F64)) = do
1330 lbl <- getNewLabelNat
1331 let code dst = toOL [
1334 CmmStaticLit (CmmFloat d F64)],
1335 SETHI (HI (ImmCLbl lbl)) dst,
1336 LD F64 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1337 return (Any F64 code)
1339 getRegister (CmmMachOp mop [x]) -- unary MachOps
1341 MO_S_Neg F32 -> trivialUFCode F32 (FNEG F32) x
1342 MO_S_Neg F64 -> trivialUFCode F64 (FNEG F64) x
1344 MO_S_Neg rep -> trivialUCode rep (SUB False False g0) x
1345 MO_Not rep -> trivialUCode rep (XNOR False g0) x
1347 MO_U_Conv I32 I8 -> trivialCode I8 (AND False) x (CmmLit (CmmInt 255 I8))
1349 MO_U_Conv F64 F32-> coerceDbl2Flt x
1350 MO_U_Conv F32 F64-> coerceFlt2Dbl x
1352 MO_S_Conv F32 I32-> coerceFP2Int F32 I32 x
1353 MO_S_Conv I32 F32-> coerceInt2FP I32 F32 x
1354 MO_S_Conv F64 I32-> coerceFP2Int F64 I32 x
1355 MO_S_Conv I32 F64-> coerceInt2FP I32 F64 x
1357 -- Conversions which are a nop on sparc
1359 | from == to -> conversionNop to x
1360 MO_U_Conv I32 to -> conversionNop to x
1361 MO_S_Conv I32 to -> conversionNop to x
1364 MO_U_Conv I8 I32 -> integerExtend False I8 I32 x
1365 MO_U_Conv I16 I32 -> integerExtend False I16 I32 x
1366 MO_U_Conv I8 I16 -> integerExtend False I8 I16 x
1367 MO_S_Conv I16 I32 -> integerExtend True I16 I32 x
1369 other_op -> panic "Unknown unary mach op"
1372 integerExtend signed from to expr = do
1373 (reg, e_code) <- getSomeReg expr
1377 ((if signed then SRA else SRL)
1378 reg (RIImm (ImmInt 0)) dst)
1379 return (Any to code)
1380 conversionNop new_rep expr
1381 = do e_code <- getRegister expr
1382 return (swizzleRegisterRep e_code new_rep)
1384 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1386 MO_Eq F32 -> condFltReg EQQ x y
1387 MO_Ne F32 -> condFltReg NE x y
1389 MO_S_Gt F32 -> condFltReg GTT x y
1390 MO_S_Ge F32 -> condFltReg GE x y
1391 MO_S_Lt F32 -> condFltReg LTT x y
1392 MO_S_Le F32 -> condFltReg LE x y
1394 MO_Eq F64 -> condFltReg EQQ x y
1395 MO_Ne F64 -> condFltReg NE x y
1397 MO_S_Gt F64 -> condFltReg GTT x y
1398 MO_S_Ge F64 -> condFltReg GE x y
1399 MO_S_Lt F64 -> condFltReg LTT x y
1400 MO_S_Le F64 -> condFltReg LE x y
1402 MO_Eq rep -> condIntReg EQQ x y
1403 MO_Ne rep -> condIntReg NE x y
1405 MO_S_Gt rep -> condIntReg GTT x y
1406 MO_S_Ge rep -> condIntReg GE x y
1407 MO_S_Lt rep -> condIntReg LTT x y
1408 MO_S_Le rep -> condIntReg LE x y
1410 MO_U_Gt I32 -> condIntReg GTT x y
1411 MO_U_Ge I32 -> condIntReg GE x y
1412 MO_U_Lt I32 -> condIntReg LTT x y
1413 MO_U_Le I32 -> condIntReg LE x y
1415 MO_U_Gt I16 -> condIntReg GU x y
1416 MO_U_Ge I16 -> condIntReg GEU x y
1417 MO_U_Lt I16 -> condIntReg LU x y
1418 MO_U_Le I16 -> condIntReg LEU x y
1420 MO_Add I32 -> trivialCode I32 (ADD False False) x y
1421 MO_Sub I32 -> trivialCode I32 (SUB False False) x y
1423 MO_S_MulMayOflo rep -> imulMayOflo rep x y
1425 -- ToDo: teach about V8+ SPARC div instructions
1426 MO_S_Quot I32 -> idiv FSLIT(".div") x y
1427 MO_S_Rem I32 -> idiv FSLIT(".rem") x y
1428 MO_U_Quot I32 -> idiv FSLIT(".udiv") x y
1429 MO_U_Rem I32 -> idiv FSLIT(".urem") x y
1431 MO_Add F32 -> trivialFCode F32 FADD x y
1432 MO_Sub F32 -> trivialFCode F32 FSUB x y
1433 MO_Mul F32 -> trivialFCode F32 FMUL x y
1434 MO_S_Quot F32 -> trivialFCode F32 FDIV x y
1436 MO_Add F64 -> trivialFCode F64 FADD x y
1437 MO_Sub F64 -> trivialFCode F64 FSUB x y
1438 MO_Mul F64 -> trivialFCode F64 FMUL x y
1439 MO_S_Quot F64 -> trivialFCode F64 FDIV x y
1441 MO_And rep -> trivialCode rep (AND False) x y
1442 MO_Or rep -> trivialCode rep (OR False) x y
1443 MO_Xor rep -> trivialCode rep (XOR False) x y
1445 MO_Mul rep -> trivialCode rep (SMUL False) x y
1447 MO_Shl rep -> trivialCode rep SLL x y
1448 MO_U_Shr rep -> trivialCode rep SRL x y
1449 MO_S_Shr rep -> trivialCode rep SRA x y
1452 MO_F32_Pwr -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1453 [promote x, promote y])
1454 where promote x = CmmMachOp MO_F32_to_Dbl [x]
1455 MO_F64_Pwr -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1458 other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
1460 --idiv fn x y = getRegister (StCall (Left fn) CCallConv I32 [x, y])
1462 --------------------
1463 imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1464 imulMayOflo rep a b = do
1465 (a_reg, a_code) <- getSomeReg a
1466 (b_reg, b_code) <- getSomeReg b
1467 res_lo <- getNewRegNat I32
1468 res_hi <- getNewRegNat I32
1470 shift_amt = case rep of
1473 _ -> panic "shift_amt"
1474 code dst = a_code `appOL` b_code `appOL`
1476 SMUL False a_reg (RIReg b_reg) res_lo,
1478 SRA res_lo (RIImm (ImmInt shift_amt)) res_lo,
1479 SUB False False res_lo (RIReg res_hi) dst
1481 return (Any I32 code)
1483 getRegister (CmmLoad mem pk) = do
1484 Amode src code <- getAmode mem
1486 code__2 dst = code `snocOL` LD pk src dst
1487 return (Any pk code__2)
1489 getRegister (CmmLit (CmmInt i _))
1492 src = ImmInt (fromInteger i)
1493 code dst = unitOL (OR False g0 (RIImm src) dst)
1495 return (Any I32 code)
1497 getRegister (CmmLit lit)
1498 = let rep = cmmLitRep lit
1502 OR False dst (RIImm (LO imm)) dst]
1503 in return (Any I32 code)
1505 #endif /* sparc_TARGET_ARCH */
1507 #if powerpc_TARGET_ARCH
1508 getRegister (CmmLoad mem pk)
1511 Amode addr addr_code <- getAmode mem
1512 let code dst = ASSERT((regClass dst == RcDouble) == isFloatingRep pk)
1513 addr_code `snocOL` LD pk dst addr
1514 return (Any pk code)
1516 -- catch simple cases of zero- or sign-extended load
1517 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad mem _]) = do
1518 Amode addr addr_code <- getAmode mem
1519 return (Any I32 (\dst -> addr_code `snocOL` LD I8 dst addr))
1521 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
1523 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad mem _]) = do
1524 Amode addr addr_code <- getAmode mem
1525 return (Any I32 (\dst -> addr_code `snocOL` LD I16 dst addr))
1527 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad mem _]) = do
1528 Amode addr addr_code <- getAmode mem
1529 return (Any I32 (\dst -> addr_code `snocOL` LA I16 dst addr))
1531 getRegister (CmmMachOp mop [x]) -- unary MachOps
1533 MO_Not rep -> trivialUCode rep NOT x
1535 MO_S_Conv F64 F32 -> trivialUCode F32 FRSP x
1536 MO_S_Conv F32 F64 -> conversionNop F64 x
1539 | from == to -> conversionNop to x
1540 | isFloatingRep from -> coerceFP2Int from to x
1541 | isFloatingRep to -> coerceInt2FP from to x
1543 -- narrowing is a nop: we treat the high bits as undefined
1544 MO_S_Conv I32 to -> conversionNop to x
1545 MO_S_Conv I16 I8 -> conversionNop I8 x
1546 MO_S_Conv I8 to -> trivialUCode to (EXTS I8) x
1547 MO_S_Conv I16 to -> trivialUCode to (EXTS I16) x
1550 | from == to -> conversionNop to x
1551 -- narrowing is a nop: we treat the high bits as undefined
1552 MO_U_Conv I32 to -> conversionNop to x
1553 MO_U_Conv I16 I8 -> conversionNop I8 x
1554 MO_U_Conv I8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 I32))
1555 MO_U_Conv I16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 I32))
1557 MO_S_Neg F32 -> trivialUCode F32 FNEG x
1558 MO_S_Neg F64 -> trivialUCode F64 FNEG x
1559 MO_S_Neg rep -> trivialUCode rep NEG x
1562 conversionNop new_rep expr
1563 = do e_code <- getRegister expr
1564 return (swizzleRegisterRep e_code new_rep)
1566 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1568 MO_Eq F32 -> condFltReg EQQ x y
1569 MO_Ne F32 -> condFltReg NE x y
1571 MO_S_Gt F32 -> condFltReg GTT x y
1572 MO_S_Ge F32 -> condFltReg GE x y
1573 MO_S_Lt F32 -> condFltReg LTT x y
1574 MO_S_Le F32 -> condFltReg LE x y
1576 MO_Eq F64 -> condFltReg EQQ x y
1577 MO_Ne F64 -> condFltReg NE x y
1579 MO_S_Gt F64 -> condFltReg GTT x y
1580 MO_S_Ge F64 -> condFltReg GE x y
1581 MO_S_Lt F64 -> condFltReg LTT x y
1582 MO_S_Le F64 -> condFltReg LE x y
1584 MO_Eq rep -> condIntReg EQQ (extendUExpr rep x) (extendUExpr rep y)
1585 MO_Ne rep -> condIntReg NE (extendUExpr rep x) (extendUExpr rep y)
1587 MO_S_Gt rep -> condIntReg GTT (extendSExpr rep x) (extendSExpr rep y)
1588 MO_S_Ge rep -> condIntReg GE (extendSExpr rep x) (extendSExpr rep y)
1589 MO_S_Lt rep -> condIntReg LTT (extendSExpr rep x) (extendSExpr rep y)
1590 MO_S_Le rep -> condIntReg LE (extendSExpr rep x) (extendSExpr rep y)
1592 MO_U_Gt rep -> condIntReg GU (extendUExpr rep x) (extendUExpr rep y)
1593 MO_U_Ge rep -> condIntReg GEU (extendUExpr rep x) (extendUExpr rep y)
1594 MO_U_Lt rep -> condIntReg LU (extendUExpr rep x) (extendUExpr rep y)
1595 MO_U_Le rep -> condIntReg LEU (extendUExpr rep x) (extendUExpr rep y)
1597 MO_Add F32 -> trivialCodeNoImm F32 (FADD F32) x y
1598 MO_Sub F32 -> trivialCodeNoImm F32 (FSUB F32) x y
1599 MO_Mul F32 -> trivialCodeNoImm F32 (FMUL F32) x y
1600 MO_S_Quot F32 -> trivialCodeNoImm F32 (FDIV F32) x y
1602 MO_Add F64 -> trivialCodeNoImm F64 (FADD F64) x y
1603 MO_Sub F64 -> trivialCodeNoImm F64 (FSUB F64) x y
1604 MO_Mul F64 -> trivialCodeNoImm F64 (FMUL F64) x y
1605 MO_S_Quot F64 -> trivialCodeNoImm F64 (FDIV F64) x y
1607 -- optimize addition with 32-bit immediate
1611 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate I32 True (-imm)
1612 -> trivialCode I32 True ADD x (CmmLit $ CmmInt imm immrep)
1615 (src, srcCode) <- getSomeReg x
1616 let imm = litToImm lit
1617 code dst = srcCode `appOL` toOL [
1618 ADDIS dst src (HA imm),
1619 ADD dst dst (RIImm (LO imm))
1621 return (Any I32 code)
1622 _ -> trivialCode I32 True ADD x y
1624 MO_Add rep -> trivialCode rep True ADD x y
1626 case y of -- subfi ('substract from' with immediate) doesn't exist
1627 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
1628 -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
1629 _ -> trivialCodeNoImm rep SUBF y x
1631 MO_Mul rep -> trivialCode rep True MULLW x y
1633 MO_S_MulMayOflo I32 -> trivialCodeNoImm I32 MULLW_MayOflo x y
1635 MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= I32): not implemented"
1636 MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
1638 MO_S_Quot rep -> trivialCodeNoImm rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1639 MO_U_Quot rep -> trivialCodeNoImm rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1641 MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1642 MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1644 MO_And rep -> trivialCode rep False AND x y
1645 MO_Or rep -> trivialCode rep False OR x y
1646 MO_Xor rep -> trivialCode rep False XOR x y
1648 MO_Shl rep -> trivialCode rep False SLW x y
1649 MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
1650 MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
1652 getRegister (CmmLit (CmmInt i rep))
1653 | Just imm <- makeImmediate rep True i
1655 code dst = unitOL (LI dst imm)
1657 return (Any rep code)
1659 getRegister (CmmLit (CmmFloat f frep)) = do
1660 lbl <- getNewLabelNat
1661 dynRef <- cmmMakeDynamicReference addImportNat False lbl
1662 Amode addr addr_code <- getAmode dynRef
1664 LDATA ReadOnlyData [CmmDataLabel lbl,
1665 CmmStaticLit (CmmFloat f frep)]
1666 `consOL` (addr_code `snocOL` LD frep dst addr)
1667 return (Any frep code)
1669 getRegister (CmmLit lit)
1670 = let rep = cmmLitRep lit
1674 OR dst dst (RIImm (LO imm))
1676 in return (Any rep code)
1678 getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
1680 -- extend?Rep: wrap integer expression of type rep
1681 -- in a conversion to I32
1682 extendSExpr I32 x = x
1683 extendSExpr rep x = CmmMachOp (MO_S_Conv rep I32) [x]
1684 extendUExpr I32 x = x
1685 extendUExpr rep x = CmmMachOp (MO_U_Conv rep I32) [x]
1687 #endif /* powerpc_TARGET_ARCH */
1690 -- -----------------------------------------------------------------------------
1691 -- The 'Amode' type: Memory addressing modes passed up the tree.
1693 data Amode = Amode AddrMode InstrBlock
1696 Now, given a tree (the argument to an CmmLoad) that references memory,
1697 produce a suitable addressing mode.
1699 A Rule of the Game (tm) for Amodes: use of the addr bit must
1700 immediately follow use of the code part, since the code part puts
1701 values in registers which the addr then refers to. So you can't put
1702 anything in between, lest it overwrite some of those registers. If
1703 you need to do some other computation between the code part and use of
1704 the addr bit, first store the effective address from the amode in a
1705 temporary, then do the other computation, and then use the temporary:
1709 ... other computation ...
1713 getAmode :: CmmExpr -> NatM Amode
1714 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
1716 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1718 #if alpha_TARGET_ARCH
1720 getAmode (StPrim IntSubOp [x, StInt i])
1721 = getNewRegNat PtrRep `thenNat` \ tmp ->
1722 getRegister x `thenNat` \ register ->
1724 code = registerCode register tmp
1725 reg = registerName register tmp
1726 off = ImmInt (-(fromInteger i))
1728 return (Amode (AddrRegImm reg off) code)
1730 getAmode (StPrim IntAddOp [x, StInt i])
1731 = getNewRegNat PtrRep `thenNat` \ tmp ->
1732 getRegister x `thenNat` \ register ->
1734 code = registerCode register tmp
1735 reg = registerName register tmp
1736 off = ImmInt (fromInteger i)
1738 return (Amode (AddrRegImm reg off) code)
1742 = return (Amode (AddrImm imm__2) id)
1745 imm__2 = case imm of Just x -> x
1748 = getNewRegNat PtrRep `thenNat` \ tmp ->
1749 getRegister other `thenNat` \ register ->
1751 code = registerCode register tmp
1752 reg = registerName register tmp
1754 return (Amode (AddrReg reg) code)
1756 #endif /* alpha_TARGET_ARCH */
1758 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1760 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1762 -- This is all just ridiculous, since it carefully undoes
1763 -- what mangleIndexTree has just done.
1764 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
1765 | not (is64BitLit lit)
1766 -- ASSERT(rep == I32)???
1767 = do (x_reg, x_code) <- getSomeReg x
1768 let off = ImmInt (-(fromInteger i))
1769 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1771 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
1772 | not (is64BitLit lit)
1773 -- ASSERT(rep == I32)???
1774 = do (x_reg, x_code) <- getSomeReg x
1775 let off = ImmInt (fromInteger i)
1776 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1778 -- Turn (lit1 << n + lit2) into (lit2 + lit1 << n) so it will be
1779 -- recognised by the next rule.
1780 getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1782 = getAmode (CmmMachOp (MO_Add rep) [b,a])
1784 getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
1785 [y, CmmLit (CmmInt shift _)]])
1786 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1787 = do (x_reg, x_code) <- getNonClobberedReg x
1788 -- x must be in a temp, because it has to stay live over y_code
1789 -- we could compre x_reg and y_reg and do something better here...
1790 (y_reg, y_code) <- getSomeReg y
1792 code = x_code `appOL` y_code
1793 base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
1794 return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt 0))
1797 getAmode (CmmLit lit) | not (is64BitLit lit)
1798 = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1801 (reg,code) <- getSomeReg expr
1802 return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
1804 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1806 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1808 #if sparc_TARGET_ARCH
1810 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)])
1813 (reg, code) <- getSomeReg x
1815 off = ImmInt (-(fromInteger i))
1816 return (Amode (AddrRegImm reg off) code)
1819 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)])
1822 (reg, code) <- getSomeReg x
1824 off = ImmInt (fromInteger i)
1825 return (Amode (AddrRegImm reg off) code)
1827 getAmode (CmmMachOp (MO_Add rep) [x, y])
1829 (regX, codeX) <- getSomeReg x
1830 (regY, codeY) <- getSomeReg y
1832 code = codeX `appOL` codeY
1833 return (Amode (AddrRegReg regX regY) code)
1835 -- XXX Is this same as "leaf" in Stix?
1836 getAmode (CmmLit lit)
1838 tmp <- getNewRegNat I32
1840 code = unitOL (SETHI (HI imm__2) tmp)
1841 return (Amode (AddrRegImm tmp (LO imm__2)) code)
1843 imm__2 = litToImm lit
1847 (reg, code) <- getSomeReg other
1850 return (Amode (AddrRegImm reg off) code)
1852 #endif /* sparc_TARGET_ARCH */
1854 #ifdef powerpc_TARGET_ARCH
1855 getAmode (CmmMachOp (MO_Sub I32) [x, CmmLit (CmmInt i _)])
1856 | Just off <- makeImmediate I32 True (-i)
1858 (reg, code) <- getSomeReg x
1859 return (Amode (AddrRegImm reg off) code)
1862 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit (CmmInt i _)])
1863 | Just off <- makeImmediate I32 True i
1865 (reg, code) <- getSomeReg x
1866 return (Amode (AddrRegImm reg off) code)
1868 -- optimize addition with 32-bit immediate
1870 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit lit])
1872 tmp <- getNewRegNat I32
1873 (src, srcCode) <- getSomeReg x
1874 let imm = litToImm lit
1875 code = srcCode `snocOL` ADDIS tmp src (HA imm)
1876 return (Amode (AddrRegImm tmp (LO imm)) code)
1878 getAmode (CmmLit lit)
1880 tmp <- getNewRegNat I32
1881 let imm = litToImm lit
1882 code = unitOL (LIS tmp (HA imm))
1883 return (Amode (AddrRegImm tmp (LO imm)) code)
1885 getAmode (CmmMachOp (MO_Add I32) [x, y])
1887 (regX, codeX) <- getSomeReg x
1888 (regY, codeY) <- getSomeReg y
1889 return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
1893 (reg, code) <- getSomeReg other
1896 return (Amode (AddrRegImm reg off) code)
1897 #endif /* powerpc_TARGET_ARCH */
1899 -- -----------------------------------------------------------------------------
1900 -- getOperand: sometimes any operand will do.
1902 -- getNonClobberedOperand: the value of the operand will remain valid across
1903 -- the computation of an arbitrary expression, unless the expression
1904 -- is computed directly into a register which the operand refers to
1905 -- (see trivialCode where this function is used for an example).
1907 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1909 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1910 #if x86_64_TARGET_ARCH
1911 getNonClobberedOperand (CmmLit lit)
1912 | isSuitableFloatingPointLit lit = do
1913 lbl <- getNewLabelNat
1914 let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
1916 return (OpAddr (ripRel (ImmCLbl lbl)), code)
1918 getNonClobberedOperand (CmmLit lit)
1919 | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) =
1920 return (OpImm (litToImm lit), nilOL)
1921 getNonClobberedOperand (CmmLoad mem pk)
1922 | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
1923 Amode src mem_code <- getAmode mem
1925 if (amodeCouldBeClobbered src)
1927 tmp <- getNewRegNat wordRep
1928 return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
1929 unitOL (LEA I32 (OpAddr src) (OpReg tmp)))
1932 return (OpAddr src', save_code `appOL` mem_code)
1933 getNonClobberedOperand e = do
1934 (reg, code) <- getNonClobberedReg e
1935 return (OpReg reg, code)
1937 amodeCouldBeClobbered :: AddrMode -> Bool
1938 amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
1940 regClobbered (RealReg rr) = isFastTrue (freeReg rr)
1941 regClobbered _ = False
1943 -- getOperand: the operand is not required to remain valid across the
1944 -- computation of an arbitrary expression.
1945 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1946 #if x86_64_TARGET_ARCH
1947 getOperand (CmmLit lit)
1948 | isSuitableFloatingPointLit lit = do
1949 lbl <- getNewLabelNat
1950 let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
1952 return (OpAddr (ripRel (ImmCLbl lbl)), code)
1954 getOperand (CmmLit lit)
1955 | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) = do
1956 return (OpImm (litToImm lit), nilOL)
1957 getOperand (CmmLoad mem pk)
1958 | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
1959 Amode src mem_code <- getAmode mem
1960 return (OpAddr src, mem_code)
1962 (reg, code) <- getSomeReg e
1963 return (OpReg reg, code)
1965 isOperand :: CmmExpr -> Bool
1966 isOperand (CmmLoad _ _) = True
1967 isOperand (CmmLit lit) = not (is64BitLit lit)
1968 || isSuitableFloatingPointLit lit
1971 -- if we want a floating-point literal as an operand, we can
1972 -- use it directly from memory. However, if the literal is
1973 -- zero, we're better off generating it into a register using
1975 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
1976 isSuitableFloatingPointLit _ = False
1978 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
1979 getRegOrMem (CmmLoad mem pk)
1980 | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
1981 Amode src mem_code <- getAmode mem
1982 return (OpAddr src, mem_code)
1984 (reg, code) <- getNonClobberedReg e
1985 return (OpReg reg, code)
1987 #if x86_64_TARGET_ARCH
1988 is64BitLit (CmmInt i I64) = is64BitInteger i
1989 -- assume that labels are in the range 0-2^31-1: this assumes the
1990 -- small memory model (see gcc docs, -mcmodel=small).
1992 is64BitLit x = False
1995 is64BitInteger :: Integer -> Bool
1996 is64BitInteger i = i > 0x7fffffff || i < -0x80000000
1998 -- -----------------------------------------------------------------------------
1999 -- The 'CondCode' type: Condition codes passed up the tree.
2001 data CondCode = CondCode Bool Cond InstrBlock
2003 -- Set up a condition code for a conditional branch.
2005 getCondCode :: CmmExpr -> NatM CondCode
2007 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2009 #if alpha_TARGET_ARCH
2010 getCondCode = panic "MachCode.getCondCode: not on Alphas"
2011 #endif /* alpha_TARGET_ARCH */
2013 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2015 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
2016 -- yes, they really do seem to want exactly the same!
2018 getCondCode (CmmMachOp mop [x, y])
2019 = ASSERT (cmmExprRep x /= I8) -- tmp, not set up to handle 8-bit comparisons
2021 MO_Eq F32 -> condFltCode EQQ x y
2022 MO_Ne F32 -> condFltCode NE x y
2024 MO_S_Gt F32 -> condFltCode GTT x y
2025 MO_S_Ge F32 -> condFltCode GE x y
2026 MO_S_Lt F32 -> condFltCode LTT x y
2027 MO_S_Le F32 -> condFltCode LE x y
2029 MO_Eq F64 -> condFltCode EQQ x y
2030 MO_Ne F64 -> condFltCode NE x y
2032 MO_S_Gt F64 -> condFltCode GTT x y
2033 MO_S_Ge F64 -> condFltCode GE x y
2034 MO_S_Lt F64 -> condFltCode LTT x y
2035 MO_S_Le F64 -> condFltCode LE x y
2037 MO_Eq rep -> condIntCode EQQ x y
2038 MO_Ne rep -> condIntCode NE x y
2040 MO_S_Gt rep -> condIntCode GTT x y
2041 MO_S_Ge rep -> condIntCode GE x y
2042 MO_S_Lt rep -> condIntCode LTT x y
2043 MO_S_Le rep -> condIntCode LE x y
2045 MO_U_Gt rep -> condIntCode GU x y
2046 MO_U_Ge rep -> condIntCode GEU x y
2047 MO_U_Lt rep -> condIntCode LU x y
2048 MO_U_Le rep -> condIntCode LEU x y
2050 other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y]))
2052 getCondCode other = pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
2054 #elif powerpc_TARGET_ARCH
2056 -- almost the same as everywhere else - but we need to
2057 -- extend small integers to 32 bit first
2059 getCondCode (CmmMachOp mop [x, y])
2061 MO_Eq F32 -> condFltCode EQQ x y
2062 MO_Ne F32 -> condFltCode NE x y
2064 MO_S_Gt F32 -> condFltCode GTT x y
2065 MO_S_Ge F32 -> condFltCode GE x y
2066 MO_S_Lt F32 -> condFltCode LTT x y
2067 MO_S_Le F32 -> condFltCode LE x y
2069 MO_Eq F64 -> condFltCode EQQ x y
2070 MO_Ne F64 -> condFltCode NE x y
2072 MO_S_Gt F64 -> condFltCode GTT x y
2073 MO_S_Ge F64 -> condFltCode GE x y
2074 MO_S_Lt F64 -> condFltCode LTT x y
2075 MO_S_Le F64 -> condFltCode LE x y
2077 MO_Eq rep -> condIntCode EQQ (extendUExpr rep x) (extendUExpr rep y)
2078 MO_Ne rep -> condIntCode NE (extendUExpr rep x) (extendUExpr rep y)
2080 MO_S_Gt rep -> condIntCode GTT (extendSExpr rep x) (extendSExpr rep y)
2081 MO_S_Ge rep -> condIntCode GE (extendSExpr rep x) (extendSExpr rep y)
2082 MO_S_Lt rep -> condIntCode LTT (extendSExpr rep x) (extendSExpr rep y)
2083 MO_S_Le rep -> condIntCode LE (extendSExpr rep x) (extendSExpr rep y)
2085 MO_U_Gt rep -> condIntCode GU (extendUExpr rep x) (extendUExpr rep y)
2086 MO_U_Ge rep -> condIntCode GEU (extendUExpr rep x) (extendUExpr rep y)
2087 MO_U_Lt rep -> condIntCode LU (extendUExpr rep x) (extendUExpr rep y)
2088 MO_U_Le rep -> condIntCode LEU (extendUExpr rep x) (extendUExpr rep y)
2090 other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
2092 getCondCode other = panic "getCondCode(2)(powerpc)"
2098 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
2099 -- passed back up the tree.
2101 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
2103 #if alpha_TARGET_ARCH
2104 condIntCode = panic "MachCode.condIntCode: not on Alphas"
2105 condFltCode = panic "MachCode.condFltCode: not on Alphas"
2106 #endif /* alpha_TARGET_ARCH */
2108 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2109 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2111 -- memory vs immediate
2112 condIntCode cond (CmmLoad x pk) (CmmLit lit) | not (is64BitLit lit) = do
2113 Amode x_addr x_code <- getAmode x
2116 code = x_code `snocOL`
2117 CMP pk (OpImm imm) (OpAddr x_addr)
2119 return (CondCode False cond code)
2122 condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
2123 (x_reg, x_code) <- getSomeReg x
2125 code = x_code `snocOL`
2126 TEST pk (OpReg x_reg) (OpReg x_reg)
2128 return (CondCode False cond code)
2130 -- anything vs operand
2131 condIntCode cond x y | isOperand y = do
2132 (x_reg, x_code) <- getNonClobberedReg x
2133 (y_op, y_code) <- getOperand y
2135 code = x_code `appOL` y_code `snocOL`
2136 CMP (cmmExprRep x) y_op (OpReg x_reg)
2138 return (CondCode False cond code)
2140 -- anything vs anything
2141 condIntCode cond x y = do
2142 (y_reg, y_code) <- getNonClobberedReg y
2143 (x_op, x_code) <- getRegOrMem x
2145 code = y_code `appOL`
2147 CMP (cmmExprRep x) (OpReg y_reg) x_op
2149 return (CondCode False cond code)
2152 #if i386_TARGET_ARCH
2153 condFltCode cond x y
2154 = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
2155 (x_reg, x_code) <- getNonClobberedReg x
2156 (y_reg, y_code) <- getSomeReg y
2158 code = x_code `appOL` y_code `snocOL`
2159 GCMP cond x_reg y_reg
2160 -- The GCMP insn does the test and sets the zero flag if comparable
2161 -- and true. Hence we always supply EQQ as the condition to test.
2162 return (CondCode True EQQ code)
2163 #endif /* i386_TARGET_ARCH */
2165 #if x86_64_TARGET_ARCH
2166 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
2167 -- an operand, but the right must be a reg. We can probably do better
2168 -- than this general case...
2169 condFltCode cond x y = do
2170 (x_reg, x_code) <- getNonClobberedReg x
2171 (y_op, y_code) <- getOperand y
2173 code = x_code `appOL`
2175 CMP (cmmExprRep x) y_op (OpReg x_reg)
2176 -- NB(1): we need to use the unsigned comparison operators on the
2177 -- result of this comparison.
2179 return (CondCode True (condToUnsigned cond) code)
2182 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2184 #if sparc_TARGET_ARCH
2186 condIntCode cond x (CmmLit (CmmInt y rep))
2189 (src1, code) <- getSomeReg x
2191 src2 = ImmInt (fromInteger y)
2192 code' = code `snocOL` SUB False True src1 (RIImm src2) g0
2193 return (CondCode False cond code')
2195 condIntCode cond x y = do
2196 (src1, code1) <- getSomeReg x
2197 (src2, code2) <- getSomeReg y
2199 code__2 = code1 `appOL` code2 `snocOL`
2200 SUB False True src1 (RIReg src2) g0
2201 return (CondCode False cond code__2)
2204 condFltCode cond x y = do
2205 (src1, code1) <- getSomeReg x
2206 (src2, code2) <- getSomeReg y
2207 tmp <- getNewRegNat F64
2209 promote x = FxTOy F32 F64 x tmp
2216 code1 `appOL` code2 `snocOL`
2217 FCMP True pk1 src1 src2
2218 else if pk1 == F32 then
2219 code1 `snocOL` promote src1 `appOL` code2 `snocOL`
2220 FCMP True F64 tmp src2
2222 code1 `appOL` code2 `snocOL` promote src2 `snocOL`
2223 FCMP True F64 src1 tmp
2224 return (CondCode True cond code__2)
2226 #endif /* sparc_TARGET_ARCH */
2228 #if powerpc_TARGET_ARCH
2229 -- ###FIXME: I16 and I8!
2230 condIntCode cond x (CmmLit (CmmInt y rep))
2231 | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
2233 (src1, code) <- getSomeReg x
2235 code' = code `snocOL`
2236 (if condUnsigned cond then CMPL else CMP) I32 src1 (RIImm src2)
2237 return (CondCode False cond code')
2239 condIntCode cond x y = do
2240 (src1, code1) <- getSomeReg x
2241 (src2, code2) <- getSomeReg y
2243 code' = code1 `appOL` code2 `snocOL`
2244 (if condUnsigned cond then CMPL else CMP) I32 src1 (RIReg src2)
2245 return (CondCode False cond code')
2247 condFltCode cond x y = do
2248 (src1, code1) <- getSomeReg x
2249 (src2, code2) <- getSomeReg y
2251 code' = code1 `appOL` code2 `snocOL` FCMP src1 src2
2252 code'' = case cond of -- twiddle CR to handle unordered case
2253 GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
2254 LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
2257 ltbit = 0 ; eqbit = 2 ; gtbit = 1
2258 return (CondCode True cond code'')
2260 #endif /* powerpc_TARGET_ARCH */
2262 -- -----------------------------------------------------------------------------
2263 -- Generating assignments
2265 -- Assignments are really at the heart of the whole code generation
2266 -- business. Almost all top-level nodes of any real importance are
2267 -- assignments, which correspond to loads, stores, or register
2268 -- transfers. If we're really lucky, some of the register transfers
2269 -- will go away, because we can use the destination register to
2270 -- complete the code generation for the right hand side. This only
2271 -- fails when the right hand side is forced into a fixed register
2272 -- (e.g. the result of a call).
2274 assignMem_IntCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2275 assignReg_IntCode :: MachRep -> CmmReg -> CmmExpr -> NatM InstrBlock
2277 assignMem_FltCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2278 assignReg_FltCode :: MachRep -> CmmReg -> CmmExpr -> NatM InstrBlock
2280 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2282 #if alpha_TARGET_ARCH
2284 assignIntCode pk (CmmLoad dst _) src
2285 = getNewRegNat IntRep `thenNat` \ tmp ->
2286 getAmode dst `thenNat` \ amode ->
2287 getRegister src `thenNat` \ register ->
2289 code1 = amodeCode amode []
2290 dst__2 = amodeAddr amode
2291 code2 = registerCode register tmp []
2292 src__2 = registerName register tmp
2293 sz = primRepToSize pk
2294 code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2298 assignIntCode pk dst src
2299 = getRegister dst `thenNat` \ register1 ->
2300 getRegister src `thenNat` \ register2 ->
2302 dst__2 = registerName register1 zeroh
2303 code = registerCode register2 dst__2
2304 src__2 = registerName register2 dst__2
2305 code__2 = if isFixed register2
2306 then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2)
2311 #endif /* alpha_TARGET_ARCH */
2313 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2315 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2317 -- integer assignment to memory
2318 assignMem_IntCode pk addr src = do
2319 Amode addr code_addr <- getAmode addr
2320 (code_src, op_src) <- get_op_RI src
2322 code = code_src `appOL`
2324 MOV pk op_src (OpAddr addr)
2325 -- NOTE: op_src is stable, so it will still be valid
2326 -- after code_addr. This may involve the introduction
2327 -- of an extra MOV to a temporary register, but we hope
2328 -- the register allocator will get rid of it.
2332 get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand) -- code, operator
2333 get_op_RI (CmmLit lit) | not (is64BitLit lit)
2334 = return (nilOL, OpImm (litToImm lit))
2336 = do (reg,code) <- getNonClobberedReg op
2337 return (code, OpReg reg)
2340 -- Assign; dst is a reg, rhs is mem
2341 assignReg_IntCode pk reg (CmmLoad src _) = do
2342 load_code <- intLoadCode (MOV pk) src
2343 return (load_code (getRegisterReg reg))
2345 -- dst is a reg, but src could be anything
2346 assignReg_IntCode pk reg src = do
2347 code <- getAnyReg src
2348 return (code (getRegisterReg reg))
2350 #endif /* i386_TARGET_ARCH */
2352 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2354 #if sparc_TARGET_ARCH
2356 assignMem_IntCode pk addr src = do
2357 (srcReg, code) <- getSomeReg src
2358 Amode dstAddr addr_code <- getAmode addr
2359 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2361 assignReg_IntCode pk reg src = do
2362 r <- getRegister src
2364 Any _ code -> code dst
2365 Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg dst) freg
2367 dst = getRegisterReg reg
2370 #endif /* sparc_TARGET_ARCH */
2372 #if powerpc_TARGET_ARCH
2374 assignMem_IntCode pk addr src = do
2375 (srcReg, code) <- getSomeReg src
2376 Amode dstAddr addr_code <- getAmode addr
2377 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2379 -- dst is a reg, but src could be anything
2380 assignReg_IntCode pk reg src
2382 r <- getRegister src
2384 Any _ code -> code dst
2385 Fixed _ freg fcode -> fcode `snocOL` MR dst freg
2387 dst = getRegisterReg reg
2389 #endif /* powerpc_TARGET_ARCH */
2392 -- -----------------------------------------------------------------------------
2393 -- Floating-point assignments
2395 #if alpha_TARGET_ARCH
2397 assignFltCode pk (CmmLoad dst _) src
2398 = getNewRegNat pk `thenNat` \ tmp ->
2399 getAmode dst `thenNat` \ amode ->
2400 getRegister src `thenNat` \ register ->
2402 code1 = amodeCode amode []
2403 dst__2 = amodeAddr amode
2404 code2 = registerCode register tmp []
2405 src__2 = registerName register tmp
2406 sz = primRepToSize pk
2407 code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2411 assignFltCode pk dst src
2412 = getRegister dst `thenNat` \ register1 ->
2413 getRegister src `thenNat` \ register2 ->
2415 dst__2 = registerName register1 zeroh
2416 code = registerCode register2 dst__2
2417 src__2 = registerName register2 dst__2
2418 code__2 = if isFixed register2
2419 then code . mkSeqInstr (FMOV src__2 dst__2)
2424 #endif /* alpha_TARGET_ARCH */
2426 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2428 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2430 -- Floating point assignment to memory
2431 assignMem_FltCode pk addr src = do
2432 (src_reg, src_code) <- getNonClobberedReg src
2433 Amode addr addr_code <- getAmode addr
2435 code = src_code `appOL`
2437 IF_ARCH_i386(GST pk src_reg addr,
2438 MOV pk (OpReg src_reg) (OpAddr addr))
2441 -- Floating point assignment to a register/temporary
2442 assignReg_FltCode pk reg src = do
2443 src_code <- getAnyReg src
2444 return (src_code (getRegisterReg reg))
2446 #endif /* i386_TARGET_ARCH */
2448 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2450 #if sparc_TARGET_ARCH
2452 -- Floating point assignment to memory
2453 assignMem_FltCode pk addr src = do
2454 Amode dst__2 code1 <- getAmode addr
2455 (src__2, code2) <- getSomeReg src
2456 tmp1 <- getNewRegNat pk
2458 pk__2 = cmmExprRep src
2459 code__2 = code1 `appOL` code2 `appOL`
2461 then unitOL (ST pk src__2 dst__2)
2462 else toOL [FxTOy pk__2 pk src__2 tmp1, ST pk tmp1 dst__2]
2465 -- Floating point assignment to a register/temporary
2466 -- ToDo: Verify correctness
2467 assignReg_FltCode pk reg src = do
2468 r <- getRegister src
2469 v1 <- getNewRegNat pk
2471 Any _ code -> code dst
2472 Fixed _ freg fcode -> fcode `snocOL` FMOV pk freg v1
2474 dst = getRegisterReg reg
2476 #endif /* sparc_TARGET_ARCH */
2478 #if powerpc_TARGET_ARCH
2481 assignMem_FltCode = assignMem_IntCode
2482 assignReg_FltCode = assignReg_IntCode
2484 #endif /* powerpc_TARGET_ARCH */
2487 -- -----------------------------------------------------------------------------
2488 -- Generating an non-local jump
2490 -- (If applicable) Do not fill the delay slots here; you will confuse the
2491 -- register allocator.
2493 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
2495 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2497 #if alpha_TARGET_ARCH
2499 genJump (CmmLabel lbl)
2500 | isAsmTemp lbl = returnInstr (BR target)
2501 | otherwise = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0]
2503 target = ImmCLbl lbl
2506 = getRegister tree `thenNat` \ register ->
2507 getNewRegNat PtrRep `thenNat` \ tmp ->
2509 dst = registerName register pv
2510 code = registerCode register pv
2511 target = registerName register pv
2513 if isFixed register then
2514 returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0]
2516 return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0))
2518 #endif /* alpha_TARGET_ARCH */
2520 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2522 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2524 genJump (CmmLoad mem pk) = do
2525 Amode target code <- getAmode mem
2526 return (code `snocOL` JMP (OpAddr target))
2528 genJump (CmmLit lit) = do
2529 return (unitOL (JMP (OpImm (litToImm lit))))
2532 (reg,code) <- getSomeReg expr
2533 return (code `snocOL` JMP (OpReg reg))
2535 #endif /* i386_TARGET_ARCH */
2537 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2539 #if sparc_TARGET_ARCH
2541 genJump (CmmLit (CmmLabel lbl))
2542 = return (toOL [CALL (Left target) 0 True, NOP])
2544 target = ImmCLbl lbl
2548 (target, code) <- getSomeReg tree
2549 return (code `snocOL` JMP (AddrRegReg target g0) `snocOL` NOP)
2551 #endif /* sparc_TARGET_ARCH */
2553 #if powerpc_TARGET_ARCH
2554 genJump (CmmLit (CmmLabel lbl))
2555 = return (unitOL $ JMP lbl)
2559 (target,code) <- getSomeReg tree
2560 return (code `snocOL` MTCTR target `snocOL` BCTR [])
2561 #endif /* powerpc_TARGET_ARCH */
2564 -- -----------------------------------------------------------------------------
2565 -- Unconditional branches
2567 genBranch :: BlockId -> NatM InstrBlock
2569 genBranch = return . toOL . mkBranchInstr
2571 -- -----------------------------------------------------------------------------
2572 -- Conditional jumps
2575 Conditional jumps are always to local labels, so we can use branch
2576 instructions. We peek at the arguments to decide what kind of
2579 ALPHA: For comparisons with 0, we're laughing, because we can just do
2580 the desired conditional branch.
2582 I386: First, we have to ensure that the condition
2583 codes are set according to the supplied comparison operation.
2585 SPARC: First, we have to ensure that the condition codes are set
2586 according to the supplied comparison operation. We generate slightly
2587 different code for floating point comparisons, because a floating
2588 point operation cannot directly precede a @BF@. We assume the worst
2589 and fill that slot with a @NOP@.
2591 SPARC: Do not fill the delay slots here; you will confuse the register
2597 :: BlockId -- the branch target
2598 -> CmmExpr -- the condition on which to branch
2601 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2603 #if alpha_TARGET_ARCH
2605 genCondJump id (StPrim op [x, StInt 0])
2606 = getRegister x `thenNat` \ register ->
2607 getNewRegNat (registerRep register)
2610 code = registerCode register tmp
2611 value = registerName register tmp
2612 pk = registerRep register
2613 target = ImmCLbl lbl
2615 returnSeq code [BI (cmpOp op) value target]
2617 cmpOp CharGtOp = GTT
2619 cmpOp CharEqOp = EQQ
2621 cmpOp CharLtOp = LTT
2630 cmpOp WordGeOp = ALWAYS
2631 cmpOp WordEqOp = EQQ
2633 cmpOp WordLtOp = NEVER
2634 cmpOp WordLeOp = EQQ
2636 cmpOp AddrGeOp = ALWAYS
2637 cmpOp AddrEqOp = EQQ
2639 cmpOp AddrLtOp = NEVER
2640 cmpOp AddrLeOp = EQQ
2642 genCondJump lbl (StPrim op [x, StDouble 0.0])
2643 = getRegister x `thenNat` \ register ->
2644 getNewRegNat (registerRep register)
2647 code = registerCode register tmp
2648 value = registerName register tmp
2649 pk = registerRep register
2650 target = ImmCLbl lbl
2652 return (code . mkSeqInstr (BF (cmpOp op) value target))
2654 cmpOp FloatGtOp = GTT
2655 cmpOp FloatGeOp = GE
2656 cmpOp FloatEqOp = EQQ
2657 cmpOp FloatNeOp = NE
2658 cmpOp FloatLtOp = LTT
2659 cmpOp FloatLeOp = LE
2660 cmpOp DoubleGtOp = GTT
2661 cmpOp DoubleGeOp = GE
2662 cmpOp DoubleEqOp = EQQ
2663 cmpOp DoubleNeOp = NE
2664 cmpOp DoubleLtOp = LTT
2665 cmpOp DoubleLeOp = LE
2667 genCondJump lbl (StPrim op [x, y])
2669 = trivialFCode pr instr x y `thenNat` \ register ->
2670 getNewRegNat F64 `thenNat` \ tmp ->
2672 code = registerCode register tmp
2673 result = registerName register tmp
2674 target = ImmCLbl lbl
2676 return (code . mkSeqInstr (BF cond result target))
2678 pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
2680 fltCmpOp op = case op of
2694 (instr, cond) = case op of
2695 FloatGtOp -> (FCMP TF LE, EQQ)
2696 FloatGeOp -> (FCMP TF LTT, EQQ)
2697 FloatEqOp -> (FCMP TF EQQ, NE)
2698 FloatNeOp -> (FCMP TF EQQ, EQQ)
2699 FloatLtOp -> (FCMP TF LTT, NE)
2700 FloatLeOp -> (FCMP TF LE, NE)
2701 DoubleGtOp -> (FCMP TF LE, EQQ)
2702 DoubleGeOp -> (FCMP TF LTT, EQQ)
2703 DoubleEqOp -> (FCMP TF EQQ, NE)
2704 DoubleNeOp -> (FCMP TF EQQ, EQQ)
2705 DoubleLtOp -> (FCMP TF LTT, NE)
2706 DoubleLeOp -> (FCMP TF LE, NE)
2708 genCondJump lbl (StPrim op [x, y])
2709 = trivialCode instr x y `thenNat` \ register ->
2710 getNewRegNat IntRep `thenNat` \ tmp ->
2712 code = registerCode register tmp
2713 result = registerName register tmp
2714 target = ImmCLbl lbl
2716 return (code . mkSeqInstr (BI cond result target))
2718 (instr, cond) = case op of
2719 CharGtOp -> (CMP LE, EQQ)
2720 CharGeOp -> (CMP LTT, EQQ)
2721 CharEqOp -> (CMP EQQ, NE)
2722 CharNeOp -> (CMP EQQ, EQQ)
2723 CharLtOp -> (CMP LTT, NE)
2724 CharLeOp -> (CMP LE, NE)
2725 IntGtOp -> (CMP LE, EQQ)
2726 IntGeOp -> (CMP LTT, EQQ)
2727 IntEqOp -> (CMP EQQ, NE)
2728 IntNeOp -> (CMP EQQ, EQQ)
2729 IntLtOp -> (CMP LTT, NE)
2730 IntLeOp -> (CMP LE, NE)
2731 WordGtOp -> (CMP ULE, EQQ)
2732 WordGeOp -> (CMP ULT, EQQ)
2733 WordEqOp -> (CMP EQQ, NE)
2734 WordNeOp -> (CMP EQQ, EQQ)
2735 WordLtOp -> (CMP ULT, NE)
2736 WordLeOp -> (CMP ULE, NE)
2737 AddrGtOp -> (CMP ULE, EQQ)
2738 AddrGeOp -> (CMP ULT, EQQ)
2739 AddrEqOp -> (CMP EQQ, NE)
2740 AddrNeOp -> (CMP EQQ, EQQ)
2741 AddrLtOp -> (CMP ULT, NE)
2742 AddrLeOp -> (CMP ULE, NE)
2744 #endif /* alpha_TARGET_ARCH */
2746 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2748 #if i386_TARGET_ARCH
2750 genCondJump id bool = do
2751 CondCode _ cond code <- getCondCode bool
2752 return (code `snocOL` JXX cond id)
2756 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2758 #if x86_64_TARGET_ARCH
2760 genCondJump id bool = do
2761 CondCode is_float cond cond_code <- getCondCode bool
2764 return (cond_code `snocOL` JXX cond id)
2766 lbl <- getBlockIdNat
2768 -- see comment with condFltReg
2769 let code = case cond of
2775 plain_test = unitOL (
2778 or_unordered = toOL [
2782 and_ordered = toOL [
2788 return (cond_code `appOL` code)
2792 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2794 #if sparc_TARGET_ARCH
2796 genCondJump (BlockId id) bool = do
2797 CondCode is_float cond code <- getCondCode bool
2802 then [NOP, BF cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2803 else [BI cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2807 #endif /* sparc_TARGET_ARCH */
2810 #if powerpc_TARGET_ARCH
2812 genCondJump id bool = do
2813 CondCode is_float cond code <- getCondCode bool
2814 return (code `snocOL` BCC cond id)
2816 #endif /* powerpc_TARGET_ARCH */
2819 -- -----------------------------------------------------------------------------
2820 -- Generating C calls
2822 -- Now the biggest nightmare---calls. Most of the nastiness is buried in
2823 -- @get_arg@, which moves the arguments to the correct registers/stack
2824 -- locations. Apart from that, the code is easy.
2826 -- (If applicable) Do not fill the delay slots here; you will confuse the
2827 -- register allocator.
2830 :: CmmCallTarget -- function to call
2831 -> [(CmmReg,MachHint)] -- where to put the result
2832 -> [(CmmExpr,MachHint)] -- arguments (of mixed type)
2833 -> Maybe [GlobalReg] -- volatile regs to save
2836 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2838 #if alpha_TARGET_ARCH
2842 genCCall fn cconv result_regs args
2843 = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args
2844 `thenNat` \ ((unused,_), argCode) ->
2846 nRegs = length allArgRegs - length unused
2847 code = asmSeqThen (map ($ []) argCode)
2850 LDA pv (AddrImm (ImmLab (ptext fn))),
2851 JSR ra (AddrReg pv) nRegs,
2852 LDGP gp (AddrReg ra)]
2854 ------------------------
2855 {- Try to get a value into a specific register (or registers) for
2856 a call. The first 6 arguments go into the appropriate
2857 argument register (separate registers for integer and floating
2858 point arguments, but used in lock-step), and the remaining
2859 arguments are dumped to the stack, beginning at 0(sp). Our
2860 first argument is a pair of the list of remaining argument
2861 registers to be assigned for this call and the next stack
2862 offset to use for overflowing arguments. This way,
2863 @get_Arg@ can be applied to all of a call's arguments using
2867 :: ([(Reg,Reg)], Int) -- Argument registers and stack offset (accumulator)
2868 -> StixTree -- Current argument
2869 -> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code
2871 -- We have to use up all of our argument registers first...
2873 get_arg ((iDst,fDst):dsts, offset) arg
2874 = getRegister arg `thenNat` \ register ->
2876 reg = if isFloatingRep pk then fDst else iDst
2877 code = registerCode register reg
2878 src = registerName register reg
2879 pk = registerRep register
2882 if isFloatingRep pk then
2883 ((dsts, offset), if isFixed register then
2884 code . mkSeqInstr (FMOV src fDst)
2887 ((dsts, offset), if isFixed register then
2888 code . mkSeqInstr (OR src (RIReg src) iDst)
2891 -- Once we have run out of argument registers, we move to the
2894 get_arg ([], offset) arg
2895 = getRegister arg `thenNat` \ register ->
2896 getNewRegNat (registerRep register)
2899 code = registerCode register tmp
2900 src = registerName register tmp
2901 pk = registerRep register
2902 sz = primRepToSize pk
2904 return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset)))
2906 #endif /* alpha_TARGET_ARCH */
2908 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2910 #if i386_TARGET_ARCH
2912 genCCall (CmmPrim MO_WriteBarrier) _ _ _ = return nilOL
2913 -- write barrier compiles to no code on x86/x86-64;
2914 -- we keep it this long in order to prevent earlier optimisations.
2916 -- we only cope with a single result for foreign calls
2917 genCCall (CmmPrim op) [(r,_)] args vols = do
2919 MO_F32_Sqrt -> actuallyInlineFloatOp F32 (GSQRT F32) args
2920 MO_F64_Sqrt -> actuallyInlineFloatOp F64 (GSQRT F64) args
2922 MO_F32_Sin -> actuallyInlineFloatOp F32 (GSIN F32) args
2923 MO_F64_Sin -> actuallyInlineFloatOp F64 (GSIN F64) args
2925 MO_F32_Cos -> actuallyInlineFloatOp F32 (GCOS F32) args
2926 MO_F64_Cos -> actuallyInlineFloatOp F64 (GCOS F64) args
2928 MO_F32_Tan -> actuallyInlineFloatOp F32 (GTAN F32) args
2929 MO_F64_Tan -> actuallyInlineFloatOp F64 (GTAN F64) args
2931 other_op -> outOfLineFloatOp op r args vols
2933 actuallyInlineFloatOp rep instr [(x,_)]
2934 = do res <- trivialUFCode rep instr x
2936 return (any (getRegisterReg r))
2938 genCCall target dest_regs args vols = do
2940 sizes = map (arg_size . cmmExprRep . fst) (reverse args)
2941 #if !darwin_TARGET_OS
2942 tot_arg_size = sum sizes
2944 raw_arg_size = sum sizes
2945 tot_arg_size = roundTo 16 raw_arg_size
2946 arg_pad_size = tot_arg_size - raw_arg_size
2947 delta0 <- getDeltaNat
2948 setDeltaNat (delta0 - arg_pad_size)
2951 push_codes <- mapM push_arg (reverse args)
2952 delta <- getDeltaNat
2955 -- deal with static vs dynamic call targets
2956 (callinsns,cconv) <-
2959 CmmForeignCall (CmmLit (CmmLabel lbl)) conv
2960 -> -- ToDo: stdcall arg sizes
2961 return (unitOL (CALL (Left fn_imm) []), conv)
2962 where fn_imm = ImmCLbl lbl
2963 CmmForeignCall expr conv
2964 -> do (dyn_c, dyn_r, dyn_rep) <- get_op expr
2965 ASSERT(dyn_rep == I32)
2966 return (dyn_c `snocOL` CALL (Right dyn_r) [], conv)
2969 #if darwin_TARGET_OS
2971 = toOL [SUB I32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
2972 DELTA (delta0 - arg_pad_size)]
2973 `appOL` concatOL push_codes
2976 = concatOL push_codes
2977 call = callinsns `appOL`
2979 -- Deallocate parameters after call for ccall;
2980 -- but not for stdcall (callee does it)
2981 (if cconv == StdCallConv || tot_arg_size==0 then [] else
2982 [ADD I32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
2984 [DELTA (delta + tot_arg_size)]
2987 setDeltaNat (delta + tot_arg_size)
2990 -- assign the results, if necessary
2991 assign_code [] = nilOL
2992 assign_code [(dest,_hint)] =
2994 I64 -> toOL [MOV I32 (OpReg eax) (OpReg r_dest),
2995 MOV I32 (OpReg edx) (OpReg r_dest_hi)]
2996 F32 -> unitOL (GMOV fake0 r_dest)
2997 F64 -> unitOL (GMOV fake0 r_dest)
2998 rep -> unitOL (MOV rep (OpReg eax) (OpReg r_dest))
3000 r_dest_hi = getHiVRegFromLo r_dest
3001 rep = cmmRegRep dest
3002 r_dest = getRegisterReg dest
3003 assign_code many = panic "genCCall.assign_code many"
3005 return (push_code `appOL`
3007 assign_code dest_regs)
3015 roundTo a x | x `mod` a == 0 = x
3016 | otherwise = x + a - (x `mod` a)
3019 push_arg :: (CmmExpr,MachHint){-current argument-}
3020 -> NatM InstrBlock -- code
3022 push_arg (arg,_hint) -- we don't need the hints on x86
3023 | arg_rep == I64 = do
3024 ChildCode64 code r_lo <- iselExpr64 arg
3025 delta <- getDeltaNat
3026 setDeltaNat (delta - 8)
3028 r_hi = getHiVRegFromLo r_lo
3030 return ( code `appOL`
3031 toOL [PUSH I32 (OpReg r_hi), DELTA (delta - 4),
3032 PUSH I32 (OpReg r_lo), DELTA (delta - 8),
3037 (code, reg, sz) <- get_op arg
3038 delta <- getDeltaNat
3039 let size = arg_size sz
3040 setDeltaNat (delta-size)
3041 if (case sz of F64 -> True; F32 -> True; _ -> False)
3042 then return (code `appOL`
3043 toOL [SUB I32 (OpImm (ImmInt size)) (OpReg esp),
3045 GST sz reg (AddrBaseIndex (EABaseReg esp)
3049 else return (code `snocOL`
3050 PUSH I32 (OpReg reg) `snocOL`
3054 arg_rep = cmmExprRep arg
3057 get_op :: CmmExpr -> NatM (InstrBlock, Reg, MachRep) -- code, reg, size
3059 (reg,code) <- getSomeReg op
3060 return (code, reg, cmmExprRep op)
3062 #endif /* i386_TARGET_ARCH */
3064 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3066 outOfLineFloatOp :: CallishMachOp -> CmmReg -> [(CmmExpr,MachHint)]
3067 -> Maybe [GlobalReg] -> NatM InstrBlock
3068 outOfLineFloatOp mop res args vols
3070 targetExpr <- cmmMakeDynamicReference addImportNat True lbl
3071 let target = CmmForeignCall targetExpr CCallConv
3073 if cmmRegRep res == F64
3075 stmtToInstrs (CmmCall target [(res,FloatHint)] args vols)
3079 tmp = CmmLocal (LocalReg uq F64)
3081 code1 <- stmtToInstrs (CmmCall target [(tmp,FloatHint)] args vols)
3082 code2 <- stmtToInstrs (CmmAssign res (CmmReg tmp))
3083 return (code1 `appOL` code2)
3085 lbl = mkForeignLabel fn Nothing True
3088 MO_F32_Sqrt -> FSLIT("sqrtf")
3089 MO_F32_Sin -> FSLIT("sinf")
3090 MO_F32_Cos -> FSLIT("cosf")
3091 MO_F32_Tan -> FSLIT("tanf")
3092 MO_F32_Exp -> FSLIT("expf")
3093 MO_F32_Log -> FSLIT("logf")
3095 MO_F32_Asin -> FSLIT("asinf")
3096 MO_F32_Acos -> FSLIT("acosf")
3097 MO_F32_Atan -> FSLIT("atanf")
3099 MO_F32_Sinh -> FSLIT("sinhf")
3100 MO_F32_Cosh -> FSLIT("coshf")
3101 MO_F32_Tanh -> FSLIT("tanhf")
3102 MO_F32_Pwr -> FSLIT("powf")
3104 MO_F64_Sqrt -> FSLIT("sqrt")
3105 MO_F64_Sin -> FSLIT("sin")
3106 MO_F64_Cos -> FSLIT("cos")
3107 MO_F64_Tan -> FSLIT("tan")
3108 MO_F64_Exp -> FSLIT("exp")
3109 MO_F64_Log -> FSLIT("log")
3111 MO_F64_Asin -> FSLIT("asin")
3112 MO_F64_Acos -> FSLIT("acos")
3113 MO_F64_Atan -> FSLIT("atan")
3115 MO_F64_Sinh -> FSLIT("sinh")
3116 MO_F64_Cosh -> FSLIT("cosh")
3117 MO_F64_Tanh -> FSLIT("tanh")
3118 MO_F64_Pwr -> FSLIT("pow")
3120 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
3122 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3124 #if x86_64_TARGET_ARCH
3126 genCCall (CmmPrim MO_WriteBarrier) _ _ _ = return nilOL
3127 -- write barrier compiles to no code on x86/x86-64;
3128 -- we keep it this long in order to prevent earlier optimisations.
3130 genCCall (CmmPrim op) [(r,_)] args vols =
3131 outOfLineFloatOp op r args vols
3133 genCCall target dest_regs args vols = do
3135 -- load up the register arguments
3136 (stack_args, aregs, fregs, load_args_code)
3137 <- load_args args allArgRegs allFPArgRegs nilOL
3140 fp_regs_used = reverse (drop (length fregs) (reverse allFPArgRegs))
3141 int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
3142 arg_regs = int_regs_used ++ fp_regs_used
3143 -- for annotating the call instruction with
3145 sse_regs = length fp_regs_used
3147 tot_arg_size = arg_size * length stack_args
3149 -- On entry to the called function, %rsp should be aligned
3150 -- on a 16-byte boundary +8 (i.e. the first stack arg after
3151 -- the return address is 16-byte aligned). In STG land
3152 -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
3153 -- need to make sure we push a multiple of 16-bytes of args,
3154 -- plus the return address, to get the correct alignment.
3155 -- Urg, this is hard. We need to feed the delta back into
3156 -- the arg pushing code.
3157 (real_size, adjust_rsp) <-
3158 if tot_arg_size `rem` 16 == 0
3159 then return (tot_arg_size, nilOL)
3160 else do -- we need to adjust...
3161 delta <- getDeltaNat
3162 setDeltaNat (delta-8)
3163 return (tot_arg_size+8, toOL [
3164 SUB I64 (OpImm (ImmInt 8)) (OpReg rsp),
3168 -- push the stack args, right to left
3169 push_code <- push_args (reverse stack_args) nilOL
3170 delta <- getDeltaNat
3172 -- deal with static vs dynamic call targets
3173 (callinsns,cconv) <-
3176 CmmForeignCall (CmmLit (CmmLabel lbl)) conv
3177 -> -- ToDo: stdcall arg sizes
3178 return (unitOL (CALL (Left fn_imm) arg_regs), conv)
3179 where fn_imm = ImmCLbl lbl
3180 CmmForeignCall expr conv
3181 -> do (dyn_r, dyn_c) <- getSomeReg expr
3182 return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
3185 -- The x86_64 ABI requires us to set %al to the number of SSE
3186 -- registers that contain arguments, if the called routine
3187 -- is a varargs function. We don't know whether it's a
3188 -- varargs function or not, so we have to assume it is.
3190 -- It's not safe to omit this assignment, even if the number
3191 -- of SSE regs in use is zero. If %al is larger than 8
3192 -- on entry to a varargs function, seg faults ensue.
3193 assign_eax n = unitOL (MOV I32 (OpImm (ImmInt n)) (OpReg eax))
3195 let call = callinsns `appOL`
3197 -- Deallocate parameters after call for ccall;
3198 -- but not for stdcall (callee does it)
3199 (if cconv == StdCallConv || real_size==0 then [] else
3200 [ADD wordRep (OpImm (ImmInt real_size)) (OpReg esp)])
3202 [DELTA (delta + real_size)]
3205 setDeltaNat (delta + real_size)
3208 -- assign the results, if necessary
3209 assign_code [] = nilOL
3210 assign_code [(dest,_hint)] =
3212 F32 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3213 F64 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3214 rep -> unitOL (MOV rep (OpReg rax) (OpReg r_dest))
3216 rep = cmmRegRep dest
3217 r_dest = getRegisterReg dest
3218 assign_code many = panic "genCCall.assign_code many"
3220 return (load_args_code `appOL`
3223 assign_eax sse_regs `appOL`
3225 assign_code dest_regs)
3228 arg_size = 8 -- always, at the mo
3230 load_args :: [(CmmExpr,MachHint)]
3231 -> [Reg] -- int regs avail for args
3232 -> [Reg] -- FP regs avail for args
3234 -> NatM ([(CmmExpr,MachHint)],[Reg],[Reg],InstrBlock)
3235 load_args args [] [] code = return (args, [], [], code)
3236 -- no more regs to use
3237 load_args [] aregs fregs code = return ([], aregs, fregs, code)
3238 -- no more args to push
3239 load_args ((arg,hint) : rest) aregs fregs code
3240 | isFloatingRep arg_rep =
3244 arg_code <- getAnyReg arg
3245 load_args rest aregs rs (code `appOL` arg_code r)
3250 arg_code <- getAnyReg arg
3251 load_args rest rs fregs (code `appOL` arg_code r)
3253 arg_rep = cmmExprRep arg
3256 (args',ars,frs,code') <- load_args rest aregs fregs code
3257 return ((arg,hint):args', ars, frs, code')
3259 push_args [] code = return code
3260 push_args ((arg,hint):rest) code
3261 | isFloatingRep arg_rep = do
3262 (arg_reg, arg_code) <- getSomeReg arg
3263 delta <- getDeltaNat
3264 setDeltaNat (delta-arg_size)
3265 let code' = code `appOL` toOL [
3266 MOV arg_rep (OpReg arg_reg) (OpAddr (spRel 0)),
3267 SUB wordRep (OpImm (ImmInt arg_size)) (OpReg rsp) ,
3268 DELTA (delta-arg_size)]
3269 push_args rest code'
3272 -- we only ever generate word-sized function arguments. Promotion
3273 -- has already happened: our Int8# type is kept sign-extended
3274 -- in an Int#, for example.
3275 ASSERT(arg_rep == I64) return ()
3276 (arg_op, arg_code) <- getOperand arg
3277 delta <- getDeltaNat
3278 setDeltaNat (delta-arg_size)
3279 let code' = code `appOL` toOL [PUSH I64 arg_op,
3280 DELTA (delta-arg_size)]
3281 push_args rest code'
3283 arg_rep = cmmExprRep arg
3286 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3288 #if sparc_TARGET_ARCH
3290 The SPARC calling convention is an absolute
3291 nightmare. The first 6x32 bits of arguments are mapped into
3292 %o0 through %o5, and the remaining arguments are dumped to the
3293 stack, beginning at [%sp+92]. (Note that %o6 == %sp.)
3295 If we have to put args on the stack, move %o6==%sp down by
3296 the number of words to go on the stack, to ensure there's enough space.
3298 According to Fraser and Hanson's lcc book, page 478, fig 17.2,
3299 16 words above the stack pointer is a word for the address of
3300 a structure return value. I use this as a temporary location
3301 for moving values from float to int regs. Certainly it isn't
3302 safe to put anything in the 16 words starting at %sp, since
3303 this area can get trashed at any time due to window overflows
3304 caused by signal handlers.
3306 A final complication (if the above isn't enough) is that
3307 we can't blithely calculate the arguments one by one into
3308 %o0 .. %o5. Consider the following nested calls:
3312 Naive code moves a into %o0, and (fff b c) into %o1. Unfortunately
3313 the inner call will itself use %o0, which trashes the value put there
3314 in preparation for the outer call. Upshot: we need to calculate the
3315 args into temporary regs, and move those to arg regs or onto the
3316 stack only immediately prior to the call proper. Sigh.
3319 genCCall target dest_regs argsAndHints vols = do
3321 args = map fst argsAndHints
3322 argcode_and_vregs <- mapM arg_to_int_vregs args
3324 (argcodes, vregss) = unzip argcode_and_vregs
3325 n_argRegs = length allArgRegs
3326 n_argRegs_used = min (length vregs) n_argRegs
3327 vregs = concat vregss
3328 -- deal with static vs dynamic call targets
3329 callinsns <- (case target of
3330 CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> do
3331 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3332 CmmForeignCall expr conv -> do
3333 (dyn_c, [dyn_r]) <- arg_to_int_vregs expr
3334 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3336 (res, reduce) <- outOfLineFloatOp mop
3337 lblOrMopExpr <- case res of
3339 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3341 (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
3342 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3343 if reduce then panic "genCCall(sparc): can not reduce" else return lblOrMopExpr
3347 argcode = concatOL argcodes
3348 (move_sp_down, move_sp_up)
3349 = let diff = length vregs - n_argRegs
3350 nn = if odd diff then diff + 1 else diff -- keep 8-byte alignment
3353 else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
3355 = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
3356 return (argcode `appOL`
3357 move_sp_down `appOL`
3358 transfer_code `appOL`
3363 -- move args from the integer vregs into which they have been
3364 -- marshalled, into %o0 .. %o5, and the rest onto the stack.
3365 move_final :: [Reg] -> [Reg] -> Int -> [Instr]
3367 move_final [] _ offset -- all args done
3370 move_final (v:vs) [] offset -- out of aregs; move to stack
3371 = ST I32 v (spRel offset)
3372 : move_final vs [] (offset+1)
3374 move_final (v:vs) (a:az) offset -- move into an arg (%o[0..5]) reg
3375 = OR False g0 (RIReg v) a
3376 : move_final vs az offset
3378 -- generate code to calculate an argument, and move it into one
3379 -- or two integer vregs.
3380 arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
3381 arg_to_int_vregs arg
3382 | (cmmExprRep arg) == I64
3384 (ChildCode64 code r_lo) <- iselExpr64 arg
3386 r_hi = getHiVRegFromLo r_lo
3387 return (code, [r_hi, r_lo])
3390 (src, code) <- getSomeReg arg
3391 tmp <- getNewRegNat (cmmExprRep arg)
3396 v1 <- getNewRegNat I32
3397 v2 <- getNewRegNat I32
3400 FMOV F64 src f0 `snocOL`
3401 ST F32 f0 (spRel 16) `snocOL`
3402 LD I32 (spRel 16) v1 `snocOL`
3403 ST F32 (fPair f0) (spRel 16) `snocOL`
3404 LD I32 (spRel 16) v2
3409 v1 <- getNewRegNat I32
3412 ST F32 src (spRel 16) `snocOL`
3413 LD I32 (spRel 16) v1
3418 v1 <- getNewRegNat I32
3420 code `snocOL` OR False g0 (RIReg src) v1
3424 outOfLineFloatOp mop =
3426 mopExpr <- cmmMakeDynamicReference addImportNat True $
3427 mkForeignLabel functionName Nothing True
3428 let mopLabelOrExpr = case mopExpr of
3429 CmmLit (CmmLabel lbl) -> Left lbl
3431 return (mopLabelOrExpr, reduce)
3433 (reduce, functionName) = case mop of
3434 MO_F32_Exp -> (True, FSLIT("exp"))
3435 MO_F32_Log -> (True, FSLIT("log"))
3436 MO_F32_Sqrt -> (True, FSLIT("sqrt"))
3438 MO_F32_Sin -> (True, FSLIT("sin"))
3439 MO_F32_Cos -> (True, FSLIT("cos"))
3440 MO_F32_Tan -> (True, FSLIT("tan"))
3442 MO_F32_Asin -> (True, FSLIT("asin"))
3443 MO_F32_Acos -> (True, FSLIT("acos"))
3444 MO_F32_Atan -> (True, FSLIT("atan"))
3446 MO_F32_Sinh -> (True, FSLIT("sinh"))
3447 MO_F32_Cosh -> (True, FSLIT("cosh"))
3448 MO_F32_Tanh -> (True, FSLIT("tanh"))
3450 MO_F64_Exp -> (False, FSLIT("exp"))
3451 MO_F64_Log -> (False, FSLIT("log"))
3452 MO_F64_Sqrt -> (False, FSLIT("sqrt"))
3454 MO_F64_Sin -> (False, FSLIT("sin"))
3455 MO_F64_Cos -> (False, FSLIT("cos"))
3456 MO_F64_Tan -> (False, FSLIT("tan"))
3458 MO_F64_Asin -> (False, FSLIT("asin"))
3459 MO_F64_Acos -> (False, FSLIT("acos"))
3460 MO_F64_Atan -> (False, FSLIT("atan"))
3462 MO_F64_Sinh -> (False, FSLIT("sinh"))
3463 MO_F64_Cosh -> (False, FSLIT("cosh"))
3464 MO_F64_Tanh -> (False, FSLIT("tanh"))
3466 other -> pprPanic "outOfLineFloatOp(sparc) "
3467 (pprCallishMachOp mop)
3469 #endif /* sparc_TARGET_ARCH */
3471 #if powerpc_TARGET_ARCH
3473 #if darwin_TARGET_OS || linux_TARGET_OS
3475 The PowerPC calling convention for Darwin/Mac OS X
3476 is described in Apple's document
3477 "Inside Mac OS X - Mach-O Runtime Architecture".
3479 PowerPC Linux uses the System V Release 4 Calling Convention
3480 for PowerPC. It is described in the
3481 "System V Application Binary Interface PowerPC Processor Supplement".
3483 Both conventions are similar:
3484 Parameters may be passed in general-purpose registers starting at r3, in
3485 floating point registers starting at f1, or on the stack.
3487 But there are substantial differences:
3488 * The number of registers used for parameter passing and the exact set of
3489 nonvolatile registers differs (see MachRegs.lhs).
3490 * On Darwin, stack space is always reserved for parameters, even if they are
3491 passed in registers. The called routine may choose to save parameters from
3492 registers to the corresponding space on the stack.
3493 * On Darwin, a corresponding amount of GPRs is skipped when a floating point
3494 parameter is passed in an FPR.
3495 * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
3496 starting with an odd-numbered GPR. It may skip a GPR to achieve this.
3497 Darwin just treats an I64 like two separate I32s (high word first).
3498 * I64 and F64 arguments are 8-byte aligned on the stack for SysV, but only
3499 4-byte aligned like everything else on Darwin.
3500 * The SysV spec claims that F32 is represented as F64 on the stack. GCC on
3501 PowerPC Linux does not agree, so neither do we.
3503 According to both conventions, The parameter area should be part of the
3504 caller's stack frame, allocated in the caller's prologue code (large enough
3505 to hold the parameter lists for all called routines). The NCG already
3506 uses the stack for register spilling, leaving 64 bytes free at the top.
3507 If we need a larger parameter area than that, we just allocate a new stack
3508 frame just before ccalling.
3511 genCCall target dest_regs argsAndHints vols
3512 = ASSERT (not $ any (`elem` [I8,I16]) argReps)
3513 -- we rely on argument promotion in the codeGen
3515 (finalStack,passArgumentsCode,usedRegs) <- passArguments
3517 allArgRegs allFPArgRegs
3521 (labelOrExpr, reduceToF32) <- case target of
3522 CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
3523 CmmForeignCall expr conv -> return (Right expr, False)
3524 CmmPrim mop -> outOfLineFloatOp mop
3526 let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
3527 codeAfter = move_sp_up finalStack `appOL` moveResult reduceToF32
3532 `snocOL` BL lbl usedRegs
3535 (dynReg, dynCode) <- getSomeReg dyn
3537 `snocOL` MTCTR dynReg
3539 `snocOL` BCTRL usedRegs
3542 #if darwin_TARGET_OS
3543 initialStackOffset = 24
3544 -- size of linkage area + size of arguments, in bytes
3545 stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
3546 map machRepByteWidth argReps
3547 #elif linux_TARGET_OS
3548 initialStackOffset = 8
3549 stackDelta finalStack = roundTo 16 finalStack
3551 args = map fst argsAndHints
3552 argReps = map cmmExprRep args
3554 roundTo a x | x `mod` a == 0 = x
3555 | otherwise = x + a - (x `mod` a)
3557 move_sp_down finalStack
3559 toOL [STU I32 sp (AddrRegImm sp (ImmInt (-delta))),
3562 where delta = stackDelta finalStack
3563 move_sp_up finalStack
3565 toOL [ADD sp sp (RIImm (ImmInt delta)),
3568 where delta = stackDelta finalStack
3571 passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
3572 passArguments ((arg,I64):args) gprs fprs stackOffset
3573 accumCode accumUsed =
3575 ChildCode64 code vr_lo <- iselExpr64 arg
3576 let vr_hi = getHiVRegFromLo vr_lo
3578 #if darwin_TARGET_OS
3583 (accumCode `appOL` code
3584 `snocOL` storeWord vr_hi gprs stackOffset
3585 `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
3586 ((take 2 gprs) ++ accumUsed)
3588 storeWord vr (gpr:_) offset = MR gpr vr
3589 storeWord vr [] offset = ST I32 vr (AddrRegImm sp (ImmInt offset))
3591 #elif linux_TARGET_OS
3592 let stackOffset' = roundTo 8 stackOffset
3593 stackCode = accumCode `appOL` code
3594 `snocOL` ST I32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
3595 `snocOL` ST I32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
3596 regCode hireg loreg =
3597 accumCode `appOL` code
3598 `snocOL` MR hireg vr_hi
3599 `snocOL` MR loreg vr_lo
3602 hireg : loreg : regs | even (length gprs) ->
3603 passArguments args regs fprs stackOffset
3604 (regCode hireg loreg) (hireg : loreg : accumUsed)
3605 _skipped : hireg : loreg : regs ->
3606 passArguments args regs fprs stackOffset
3607 (regCode hireg loreg) (hireg : loreg : accumUsed)
3608 _ -> -- only one or no regs left
3609 passArguments args [] fprs (stackOffset'+8)
3613 passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
3614 | reg : _ <- regs = do
3615 register <- getRegister arg
3616 let code = case register of
3617 Fixed _ freg fcode -> fcode `snocOL` MR reg freg
3618 Any _ acode -> acode reg
3622 #if darwin_TARGET_OS
3623 -- The Darwin ABI requires that we reserve stack slots for register parameters
3624 (stackOffset + stackBytes)
3625 #elif linux_TARGET_OS
3626 -- ... the SysV ABI doesn't.
3629 (accumCode `appOL` code)
3632 (vr, code) <- getSomeReg arg
3636 (stackOffset' + stackBytes)
3637 (accumCode `appOL` code `snocOL` ST rep vr stackSlot)
3640 #if darwin_TARGET_OS
3641 -- stackOffset is at least 4-byte aligned
3642 -- The Darwin ABI is happy with that.
3643 stackOffset' = stackOffset
3645 -- ... the SysV ABI requires 8-byte alignment for doubles.
3646 stackOffset' | rep == F64 = roundTo 8 stackOffset
3647 | otherwise = stackOffset
3649 stackSlot = AddrRegImm sp (ImmInt stackOffset')
3650 (nGprs, nFprs, stackBytes, regs) = case rep of
3651 I32 -> (1, 0, 4, gprs)
3652 #if darwin_TARGET_OS
3653 -- The Darwin ABI requires that we skip a corresponding number of GPRs when
3655 F32 -> (1, 1, 4, fprs)
3656 F64 -> (2, 1, 8, fprs)
3657 #elif linux_TARGET_OS
3658 -- ... the SysV ABI doesn't.
3659 F32 -> (0, 1, 4, fprs)
3660 F64 -> (0, 1, 8, fprs)
3663 moveResult reduceToF32 =
3667 | reduceToF32 && rep == F32 -> unitOL (FRSP r_dest f1)
3668 | rep == F32 || rep == F64 -> unitOL (MR r_dest f1)
3669 | rep == I64 -> toOL [MR (getHiVRegFromLo r_dest) r3,
3671 | otherwise -> unitOL (MR r_dest r3)
3672 where rep = cmmRegRep dest
3673 r_dest = getRegisterReg dest
3675 outOfLineFloatOp mop =
3677 mopExpr <- cmmMakeDynamicReference addImportNat True $
3678 mkForeignLabel functionName Nothing True
3679 let mopLabelOrExpr = case mopExpr of
3680 CmmLit (CmmLabel lbl) -> Left lbl
3682 return (mopLabelOrExpr, reduce)
3684 (functionName, reduce) = case mop of
3685 MO_F32_Exp -> (FSLIT("exp"), True)
3686 MO_F32_Log -> (FSLIT("log"), True)
3687 MO_F32_Sqrt -> (FSLIT("sqrt"), True)
3689 MO_F32_Sin -> (FSLIT("sin"), True)
3690 MO_F32_Cos -> (FSLIT("cos"), True)
3691 MO_F32_Tan -> (FSLIT("tan"), True)
3693 MO_F32_Asin -> (FSLIT("asin"), True)
3694 MO_F32_Acos -> (FSLIT("acos"), True)
3695 MO_F32_Atan -> (FSLIT("atan"), True)
3697 MO_F32_Sinh -> (FSLIT("sinh"), True)
3698 MO_F32_Cosh -> (FSLIT("cosh"), True)
3699 MO_F32_Tanh -> (FSLIT("tanh"), True)
3700 MO_F32_Pwr -> (FSLIT("pow"), True)
3702 MO_F64_Exp -> (FSLIT("exp"), False)
3703 MO_F64_Log -> (FSLIT("log"), False)
3704 MO_F64_Sqrt -> (FSLIT("sqrt"), False)
3706 MO_F64_Sin -> (FSLIT("sin"), False)
3707 MO_F64_Cos -> (FSLIT("cos"), False)
3708 MO_F64_Tan -> (FSLIT("tan"), False)
3710 MO_F64_Asin -> (FSLIT("asin"), False)
3711 MO_F64_Acos -> (FSLIT("acos"), False)
3712 MO_F64_Atan -> (FSLIT("atan"), False)
3714 MO_F64_Sinh -> (FSLIT("sinh"), False)
3715 MO_F64_Cosh -> (FSLIT("cosh"), False)
3716 MO_F64_Tanh -> (FSLIT("tanh"), False)
3717 MO_F64_Pwr -> (FSLIT("pow"), False)
3718 other -> pprPanic "genCCall(ppc): unknown callish op"
3719 (pprCallishMachOp other)
3721 #endif /* darwin_TARGET_OS || linux_TARGET_OS */
3723 #endif /* powerpc_TARGET_ARCH */
3726 -- -----------------------------------------------------------------------------
3727 -- Generating a table-branch
3729 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
3731 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3735 (reg,e_code) <- getSomeReg expr
3736 lbl <- getNewLabelNat
3737 dynRef <- cmmMakeDynamicReference addImportNat False lbl
3738 (tableReg,t_code) <- getSomeReg $ dynRef
3740 jumpTable = map jumpTableEntryRel ids
3742 jumpTableEntryRel Nothing
3743 = CmmStaticLit (CmmInt 0 wordRep)
3744 jumpTableEntryRel (Just (BlockId id))
3745 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3746 where blockLabel = mkAsmTempLabel id
3748 op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
3749 (EAIndex reg wORD_SIZE) (ImmInt 0))
3751 code = e_code `appOL` t_code `appOL` toOL [
3752 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3753 ADD wordRep op (OpReg tableReg),
3754 JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
3759 (reg,e_code) <- getSomeReg expr
3760 lbl <- getNewLabelNat
3762 jumpTable = map jumpTableEntry ids
3763 op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
3764 code = e_code `appOL` toOL [
3765 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3766 JMP_TBL op [ id | Just id <- ids ]
3770 #elif powerpc_TARGET_ARCH
3774 (reg,e_code) <- getSomeReg expr
3775 tmp <- getNewRegNat I32
3776 lbl <- getNewLabelNat
3777 dynRef <- cmmMakeDynamicReference addImportNat False lbl
3778 (tableReg,t_code) <- getSomeReg $ dynRef
3780 jumpTable = map jumpTableEntryRel ids
3782 jumpTableEntryRel Nothing
3783 = CmmStaticLit (CmmInt 0 wordRep)
3784 jumpTableEntryRel (Just (BlockId id))
3785 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3786 where blockLabel = mkAsmTempLabel id
3788 code = e_code `appOL` t_code `appOL` toOL [
3789 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3790 SLW tmp reg (RIImm (ImmInt 2)),
3791 LD I32 tmp (AddrRegReg tableReg tmp),
3792 ADD tmp tmp (RIReg tableReg),
3794 BCTR [ id | Just id <- ids ]
3799 (reg,e_code) <- getSomeReg expr
3800 tmp <- getNewRegNat I32
3801 lbl <- getNewLabelNat
3803 jumpTable = map jumpTableEntry ids
3805 code = e_code `appOL` toOL [
3806 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3807 SLW tmp reg (RIImm (ImmInt 2)),
3808 ADDIS tmp tmp (HA (ImmCLbl lbl)),
3809 LD I32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
3811 BCTR [ id | Just id <- ids ]
3815 genSwitch expr ids = panic "ToDo: genSwitch"
3818 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordRep)
3819 jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
3820 where blockLabel = mkAsmTempLabel id
3822 -- -----------------------------------------------------------------------------
3824 -- -----------------------------------------------------------------------------
3827 -- -----------------------------------------------------------------------------
3828 -- 'condIntReg' and 'condFltReg': condition codes into registers
3830 -- Turn those condition codes into integers now (when they appear on
3831 -- the right hand side of an assignment).
3833 -- (If applicable) Do not fill the delay slots here; you will confuse the
3834 -- register allocator.
3836 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
3838 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3840 #if alpha_TARGET_ARCH
3841 condIntReg = panic "MachCode.condIntReg (not on Alpha)"
3842 condFltReg = panic "MachCode.condFltReg (not on Alpha)"
3843 #endif /* alpha_TARGET_ARCH */
3845 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3847 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3849 condIntReg cond x y = do
3850 CondCode _ cond cond_code <- condIntCode cond x y
3851 tmp <- getNewRegNat I8
3853 code dst = cond_code `appOL` toOL [
3854 SETCC cond (OpReg tmp),
3855 MOVZxL I8 (OpReg tmp) (OpReg dst)
3858 return (Any I32 code)
3862 #if i386_TARGET_ARCH
3864 condFltReg cond x y = do
3865 CondCode _ cond cond_code <- condFltCode cond x y
3866 tmp <- getNewRegNat I8
3868 code dst = cond_code `appOL` toOL [
3869 SETCC cond (OpReg tmp),
3870 MOVZxL I8 (OpReg tmp) (OpReg dst)
3873 return (Any I32 code)
3877 #if x86_64_TARGET_ARCH
3879 condFltReg cond x y = do
3880 CondCode _ cond cond_code <- condFltCode cond x y
3881 tmp1 <- getNewRegNat wordRep
3882 tmp2 <- getNewRegNat wordRep
3884 -- We have to worry about unordered operands (eg. comparisons
3885 -- against NaN). If the operands are unordered, the comparison
3886 -- sets the parity flag, carry flag and zero flag.
3887 -- All comparisons are supposed to return false for unordered
3888 -- operands except for !=, which returns true.
3890 -- Optimisation: we don't have to test the parity flag if we
3891 -- know the test has already excluded the unordered case: eg >
3892 -- and >= test for a zero carry flag, which can only occur for
3893 -- ordered operands.
3895 -- ToDo: by reversing comparisons we could avoid testing the
3896 -- parity flag in more cases.
3901 NE -> or_unordered dst
3902 GU -> plain_test dst
3903 GEU -> plain_test dst
3904 _ -> and_ordered dst)
3906 plain_test dst = toOL [
3907 SETCC cond (OpReg tmp1),
3908 MOVZxL I8 (OpReg tmp1) (OpReg dst)
3910 or_unordered dst = toOL [
3911 SETCC cond (OpReg tmp1),
3912 SETCC PARITY (OpReg tmp2),
3913 OR I8 (OpReg tmp1) (OpReg tmp2),
3914 MOVZxL I8 (OpReg tmp2) (OpReg dst)
3916 and_ordered dst = toOL [
3917 SETCC cond (OpReg tmp1),
3918 SETCC NOTPARITY (OpReg tmp2),
3919 AND I8 (OpReg tmp1) (OpReg tmp2),
3920 MOVZxL I8 (OpReg tmp2) (OpReg dst)
3923 return (Any I32 code)
3927 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3929 #if sparc_TARGET_ARCH
3931 condIntReg EQQ x (CmmLit (CmmInt 0 d)) = do
3932 (src, code) <- getSomeReg x
3933 tmp <- getNewRegNat I32
3935 code__2 dst = code `appOL` toOL [
3936 SUB False True g0 (RIReg src) g0,
3937 SUB True False g0 (RIImm (ImmInt (-1))) dst]
3938 return (Any I32 code__2)
3940 condIntReg EQQ x y = do
3941 (src1, code1) <- getSomeReg x
3942 (src2, code2) <- getSomeReg y
3943 tmp1 <- getNewRegNat I32
3944 tmp2 <- getNewRegNat I32
3946 code__2 dst = code1 `appOL` code2 `appOL` toOL [
3947 XOR False src1 (RIReg src2) dst,
3948 SUB False True g0 (RIReg dst) g0,
3949 SUB True False g0 (RIImm (ImmInt (-1))) dst]
3950 return (Any I32 code__2)
3952 condIntReg NE x (CmmLit (CmmInt 0 d)) = do
3953 (src, code) <- getSomeReg x
3954 tmp <- getNewRegNat I32
3956 code__2 dst = code `appOL` toOL [
3957 SUB False True g0 (RIReg src) g0,
3958 ADD True False g0 (RIImm (ImmInt 0)) dst]
3959 return (Any I32 code__2)
3961 condIntReg NE x y = do
3962 (src1, code1) <- getSomeReg x
3963 (src2, code2) <- getSomeReg y
3964 tmp1 <- getNewRegNat I32
3965 tmp2 <- getNewRegNat I32
3967 code__2 dst = code1 `appOL` code2 `appOL` toOL [
3968 XOR False src1 (RIReg src2) dst,
3969 SUB False True g0 (RIReg dst) g0,
3970 ADD True False g0 (RIImm (ImmInt 0)) dst]
3971 return (Any I32 code__2)
3973 condIntReg cond x y = do
3974 BlockId lbl1 <- getBlockIdNat
3975 BlockId lbl2 <- getBlockIdNat
3976 CondCode _ cond cond_code <- condIntCode cond x y
3978 code__2 dst = cond_code `appOL` toOL [
3979 BI cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
3980 OR False g0 (RIImm (ImmInt 0)) dst,
3981 BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
3982 NEWBLOCK (BlockId lbl1),
3983 OR False g0 (RIImm (ImmInt 1)) dst,
3984 NEWBLOCK (BlockId lbl2)]
3985 return (Any I32 code__2)
3987 condFltReg cond x y = do
3988 BlockId lbl1 <- getBlockIdNat
3989 BlockId lbl2 <- getBlockIdNat
3990 CondCode _ cond cond_code <- condFltCode cond x y
3992 code__2 dst = cond_code `appOL` toOL [
3994 BF cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
3995 OR False g0 (RIImm (ImmInt 0)) dst,
3996 BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
3997 NEWBLOCK (BlockId lbl1),
3998 OR False g0 (RIImm (ImmInt 1)) dst,
3999 NEWBLOCK (BlockId lbl2)]
4000 return (Any I32 code__2)
4002 #endif /* sparc_TARGET_ARCH */
4004 #if powerpc_TARGET_ARCH
4005 condReg getCond = do
4006 lbl1 <- getBlockIdNat
4007 lbl2 <- getBlockIdNat
4008 CondCode _ cond cond_code <- getCond
4010 {- code dst = cond_code `appOL` toOL [
4019 code dst = cond_code
4023 RLWINM dst dst (bit + 1) 31 31
4026 negate_code | do_negate = unitOL (CRNOR bit bit bit)
4029 (bit, do_negate) = case cond of
4043 return (Any I32 code)
4045 condIntReg cond x y = condReg (condIntCode cond x y)
4046 condFltReg cond x y = condReg (condFltCode cond x y)
4047 #endif /* powerpc_TARGET_ARCH */
4050 -- -----------------------------------------------------------------------------
4051 -- 'trivial*Code': deal with trivial instructions
4053 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
4054 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
4055 -- Only look for constants on the right hand side, because that's
4056 -- where the generic optimizer will have put them.
4058 -- Similarly, for unary instructions, we don't have to worry about
4059 -- matching an StInt as the argument, because genericOpt will already
4060 -- have handled the constant-folding.
4064 -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
4065 ,IF_ARCH_i386 ((Operand -> Operand -> Instr)
4066 -> Maybe (Operand -> Operand -> Instr)
4067 ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr)
4068 -> Maybe (Operand -> Operand -> Instr)
4069 ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
4070 ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
4072 -> CmmExpr -> CmmExpr -- the two arguments
4075 #ifndef powerpc_TARGET_ARCH
4078 -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
4079 ,IF_ARCH_sparc((MachRep -> Reg -> Reg -> Reg -> Instr)
4080 ,IF_ARCH_i386 ((MachRep -> Reg -> Reg -> Reg -> Instr)
4081 ,IF_ARCH_x86_64 ((MachRep -> Operand -> Operand -> Instr)
4083 -> CmmExpr -> CmmExpr -- the two arguments
4089 -> IF_ARCH_alpha((RI -> Reg -> Instr)
4090 ,IF_ARCH_i386 ((Operand -> Instr)
4091 ,IF_ARCH_x86_64 ((Operand -> Instr)
4092 ,IF_ARCH_sparc((RI -> Reg -> Instr)
4093 ,IF_ARCH_powerpc((Reg -> Reg -> Instr)
4095 -> CmmExpr -- the one argument
4098 #ifndef powerpc_TARGET_ARCH
4101 -> IF_ARCH_alpha((Reg -> Reg -> Instr)
4102 ,IF_ARCH_i386 ((Reg -> Reg -> Instr)
4103 ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr)
4104 ,IF_ARCH_sparc((Reg -> Reg -> Instr)
4106 -> CmmExpr -- the one argument
4110 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4112 #if alpha_TARGET_ARCH
4114 trivialCode instr x (StInt y)
4116 = getRegister x `thenNat` \ register ->
4117 getNewRegNat IntRep `thenNat` \ tmp ->
4119 code = registerCode register tmp
4120 src1 = registerName register tmp
4121 src2 = ImmInt (fromInteger y)
4122 code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst)
4124 return (Any IntRep code__2)
4126 trivialCode instr x y
4127 = getRegister x `thenNat` \ register1 ->
4128 getRegister y `thenNat` \ register2 ->
4129 getNewRegNat IntRep `thenNat` \ tmp1 ->
4130 getNewRegNat IntRep `thenNat` \ tmp2 ->
4132 code1 = registerCode register1 tmp1 []
4133 src1 = registerName register1 tmp1
4134 code2 = registerCode register2 tmp2 []
4135 src2 = registerName register2 tmp2
4136 code__2 dst = asmSeqThen [code1, code2] .
4137 mkSeqInstr (instr src1 (RIReg src2) dst)
4139 return (Any IntRep code__2)
4142 trivialUCode instr x
4143 = getRegister x `thenNat` \ register ->
4144 getNewRegNat IntRep `thenNat` \ tmp ->
4146 code = registerCode register tmp
4147 src = registerName register tmp
4148 code__2 dst = code . mkSeqInstr (instr (RIReg src) dst)
4150 return (Any IntRep code__2)
4153 trivialFCode _ instr x y
4154 = getRegister x `thenNat` \ register1 ->
4155 getRegister y `thenNat` \ register2 ->
4156 getNewRegNat F64 `thenNat` \ tmp1 ->
4157 getNewRegNat F64 `thenNat` \ tmp2 ->
4159 code1 = registerCode register1 tmp1
4160 src1 = registerName register1 tmp1
4162 code2 = registerCode register2 tmp2
4163 src2 = registerName register2 tmp2
4165 code__2 dst = asmSeqThen [code1 [], code2 []] .
4166 mkSeqInstr (instr src1 src2 dst)
4168 return (Any F64 code__2)
4170 trivialUFCode _ instr x
4171 = getRegister x `thenNat` \ register ->
4172 getNewRegNat F64 `thenNat` \ tmp ->
4174 code = registerCode register tmp
4175 src = registerName register tmp
4176 code__2 dst = code . mkSeqInstr (instr src dst)
4178 return (Any F64 code__2)
4180 #endif /* alpha_TARGET_ARCH */
4182 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4184 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4187 The Rules of the Game are:
4189 * You cannot assume anything about the destination register dst;
4190 it may be anything, including a fixed reg.
4192 * You may compute an operand into a fixed reg, but you may not
4193 subsequently change the contents of that fixed reg. If you
4194 want to do so, first copy the value either to a temporary
4195 or into dst. You are free to modify dst even if it happens
4196 to be a fixed reg -- that's not your problem.
4198 * You cannot assume that a fixed reg will stay live over an
4199 arbitrary computation. The same applies to the dst reg.
4201 * Temporary regs obtained from getNewRegNat are distinct from
4202 each other and from all other regs, and stay live over
4203 arbitrary computations.
4205 --------------------
4207 SDM's version of The Rules:
4209 * If getRegister returns Any, that means it can generate correct
4210 code which places the result in any register, period. Even if that
4211 register happens to be read during the computation.
4213 Corollary #1: this means that if you are generating code for an
4214 operation with two arbitrary operands, you cannot assign the result
4215 of the first operand into the destination register before computing
4216 the second operand. The second operand might require the old value
4217 of the destination register.
4219 Corollary #2: A function might be able to generate more efficient
4220 code if it knows the destination register is a new temporary (and
4221 therefore not read by any of the sub-computations).
4223 * If getRegister returns Any, then the code it generates may modify only:
4224 (a) fresh temporaries
4225 (b) the destination register
4226 (c) known registers (eg. %ecx is used by shifts)
4227 In particular, it may *not* modify global registers, unless the global
4228 register happens to be the destination register.
4231 trivialCode rep instr (Just revinstr) (CmmLit lit_a) b
4232 | not (is64BitLit lit_a) = do
4233 b_code <- getAnyReg b
4236 = b_code dst `snocOL`
4237 revinstr (OpImm (litToImm lit_a)) (OpReg dst)
4239 return (Any rep code)
4241 trivialCode rep instr maybe_revinstr a b = genTrivialCode rep instr a b
4243 -- This is re-used for floating pt instructions too.
4244 genTrivialCode rep instr a b = do
4245 (b_op, b_code) <- getNonClobberedOperand b
4246 a_code <- getAnyReg a
4247 tmp <- getNewRegNat rep
4249 -- We want the value of b to stay alive across the computation of a.
4250 -- But, we want to calculate a straight into the destination register,
4251 -- because the instruction only has two operands (dst := dst `op` src).
4252 -- The troublesome case is when the result of b is in the same register
4253 -- as the destination reg. In this case, we have to save b in a
4254 -- new temporary across the computation of a.
4256 | dst `regClashesWithOp` b_op =
4258 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
4260 instr (OpReg tmp) (OpReg dst)
4264 instr b_op (OpReg dst)
4266 return (Any rep code)
4268 reg `regClashesWithOp` OpReg reg2 = reg == reg2
4269 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
4270 reg `regClashesWithOp` _ = False
4274 trivialUCode rep instr x = do
4275 x_code <- getAnyReg x
4281 return (Any rep code)
4285 #if i386_TARGET_ARCH
4287 trivialFCode pk instr x y = do
4288 (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
4289 (y_reg, y_code) <- getSomeReg y
4294 instr pk x_reg y_reg dst
4296 return (Any pk code)
4300 #if x86_64_TARGET_ARCH
4302 trivialFCode pk instr x y = genTrivialCode pk (instr pk) x y
4308 trivialUFCode rep instr x = do
4309 (x_reg, x_code) <- getSomeReg x
4315 return (Any rep code)
4317 #endif /* i386_TARGET_ARCH */
4319 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4321 #if sparc_TARGET_ARCH
4323 trivialCode pk instr x (CmmLit (CmmInt y d))
4326 (src1, code) <- getSomeReg x
4327 tmp <- getNewRegNat I32
4329 src2 = ImmInt (fromInteger y)
4330 code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
4331 return (Any I32 code__2)
4333 trivialCode pk instr x y = do
4334 (src1, code1) <- getSomeReg x
4335 (src2, code2) <- getSomeReg y
4336 tmp1 <- getNewRegNat I32
4337 tmp2 <- getNewRegNat I32
4339 code__2 dst = code1 `appOL` code2 `snocOL`
4340 instr src1 (RIReg src2) dst
4341 return (Any I32 code__2)
4344 trivialFCode pk instr x y = do
4345 (src1, code1) <- getSomeReg x
4346 (src2, code2) <- getSomeReg y
4347 tmp1 <- getNewRegNat (cmmExprRep x)
4348 tmp2 <- getNewRegNat (cmmExprRep y)
4349 tmp <- getNewRegNat F64
4351 promote x = FxTOy F32 F64 x tmp
4358 code1 `appOL` code2 `snocOL`
4359 instr pk src1 src2 dst
4360 else if pk1 == F32 then
4361 code1 `snocOL` promote src1 `appOL` code2 `snocOL`
4362 instr F64 tmp src2 dst
4364 code1 `appOL` code2 `snocOL` promote src2 `snocOL`
4365 instr F64 src1 tmp dst
4366 return (Any (if pk1 == pk2 then pk1 else F64) code__2)
4369 trivialUCode pk instr x = do
4370 (src, code) <- getSomeReg x
4371 tmp <- getNewRegNat pk
4373 code__2 dst = code `snocOL` instr (RIReg src) dst
4374 return (Any pk code__2)
4377 trivialUFCode pk instr x = do
4378 (src, code) <- getSomeReg x
4379 tmp <- getNewRegNat pk
4381 code__2 dst = code `snocOL` instr src dst
4382 return (Any pk code__2)
4384 #endif /* sparc_TARGET_ARCH */
4386 #if powerpc_TARGET_ARCH
4389 Wolfgang's PowerPC version of The Rules:
4391 A slightly modified version of The Rules to take advantage of the fact
4392 that PowerPC instructions work on all registers and don't implicitly
4393 clobber any fixed registers.
4395 * The only expression for which getRegister returns Fixed is (CmmReg reg).
4397 * If getRegister returns Any, then the code it generates may modify only:
4398 (a) fresh temporaries
4399 (b) the destination register
4400 It may *not* modify global registers, unless the global
4401 register happens to be the destination register.
4402 It may not clobber any other registers. In fact, only ccalls clobber any
4404 Also, it may not modify the counter register (used by genCCall).
4406 Corollary: If a getRegister for a subexpression returns Fixed, you need
4407 not move it to a fresh temporary before evaluating the next subexpression.
4408 The Fixed register won't be modified.
4409 Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
4411 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
4412 the value of the destination register.
4415 trivialCode rep signed instr x (CmmLit (CmmInt y _))
4416 | Just imm <- makeImmediate rep signed y
4418 (src1, code1) <- getSomeReg x
4419 let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
4420 return (Any rep code)
4422 trivialCode rep signed instr x y = do
4423 (src1, code1) <- getSomeReg x
4424 (src2, code2) <- getSomeReg y
4425 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
4426 return (Any rep code)
4428 trivialCodeNoImm :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4429 -> CmmExpr -> CmmExpr -> NatM Register
4430 trivialCodeNoImm rep instr x y = do
4431 (src1, code1) <- getSomeReg x
4432 (src2, code2) <- getSomeReg y
4433 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
4434 return (Any rep code)
4436 trivialUCode rep instr x = do
4437 (src, code) <- getSomeReg x
4438 let code' dst = code `snocOL` instr dst src
4439 return (Any rep code')
4441 -- There is no "remainder" instruction on the PPC, so we have to do
4443 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
4445 remainderCode :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4446 -> CmmExpr -> CmmExpr -> NatM Register
4447 remainderCode rep div x y = do
4448 (src1, code1) <- getSomeReg x
4449 (src2, code2) <- getSomeReg y
4450 let code dst = code1 `appOL` code2 `appOL` toOL [
4452 MULLW dst dst (RIReg src2),
4455 return (Any rep code)
4457 #endif /* powerpc_TARGET_ARCH */
4460 -- -----------------------------------------------------------------------------
4461 -- Coercing to/from integer/floating-point...
4463 -- @coerce(Int2FP|FP2Int)@ are more complicated integer/float
4464 -- conversions. We have to store temporaries in memory to move
4465 -- between the integer and the floating point register sets.
4467 -- @coerceDbl2Flt@ and @coerceFlt2Dbl@ are done this way because we
4468 -- pretend, on sparc at least, that double and float regs are seperate
4469 -- kinds, so the value has to be computed into one kind before being
4470 -- explicitly "converted" to live in the other kind.
4472 coerceInt2FP :: MachRep -> MachRep -> CmmExpr -> NatM Register
4473 coerceFP2Int :: MachRep -> MachRep -> CmmExpr -> NatM Register
4475 #if sparc_TARGET_ARCH
4476 coerceDbl2Flt :: CmmExpr -> NatM Register
4477 coerceFlt2Dbl :: CmmExpr -> NatM Register
4480 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4482 #if alpha_TARGET_ARCH
4485 = getRegister x `thenNat` \ register ->
4486 getNewRegNat IntRep `thenNat` \ reg ->
4488 code = registerCode register reg
4489 src = registerName register reg
4491 code__2 dst = code . mkSeqInstrs [
4493 LD TF dst (spRel 0),
4496 return (Any F64 code__2)
4500 = getRegister x `thenNat` \ register ->
4501 getNewRegNat F64 `thenNat` \ tmp ->
4503 code = registerCode register tmp
4504 src = registerName register tmp
4506 code__2 dst = code . mkSeqInstrs [
4508 ST TF tmp (spRel 0),
4511 return (Any IntRep code__2)
4513 #endif /* alpha_TARGET_ARCH */
4515 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4517 #if i386_TARGET_ARCH
4519 coerceInt2FP from to x = do
4520 (x_reg, x_code) <- getSomeReg x
4522 opc = case to of F32 -> GITOF; F64 -> GITOD
4523 code dst = x_code `snocOL` opc x_reg dst
4524 -- ToDo: works for non-I32 reps?
4526 return (Any to code)
4530 coerceFP2Int from to x = do
4531 (x_reg, x_code) <- getSomeReg x
4533 opc = case from of F32 -> GFTOI; F64 -> GDTOI
4534 code dst = x_code `snocOL` opc x_reg dst
4535 -- ToDo: works for non-I32 reps?
4537 return (Any to code)
4539 #endif /* i386_TARGET_ARCH */
4541 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4543 #if x86_64_TARGET_ARCH
4545 coerceFP2Int from to x = do
4546 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
4548 opc = case from of F32 -> CVTSS2SI; F64 -> CVTSD2SI
4549 code dst = x_code `snocOL` opc x_op dst
4551 return (Any to code) -- works even if the destination rep is <I32
4553 coerceInt2FP from to x = do
4554 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
4556 opc = case to of F32 -> CVTSI2SS; F64 -> CVTSI2SD
4557 code dst = x_code `snocOL` opc x_op dst
4559 return (Any to code) -- works even if the destination rep is <I32
4561 coerceFP2FP :: MachRep -> CmmExpr -> NatM Register
4562 coerceFP2FP to x = do
4563 (x_reg, x_code) <- getSomeReg x
4565 opc = case to of F32 -> CVTSD2SS; F64 -> CVTSS2SD
4566 code dst = x_code `snocOL` opc x_reg dst
4568 return (Any to code)
4572 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4574 #if sparc_TARGET_ARCH
4576 coerceInt2FP pk1 pk2 x = do
4577 (src, code) <- getSomeReg x
4579 code__2 dst = code `appOL` toOL [
4580 ST pk1 src (spRel (-2)),
4581 LD pk1 (spRel (-2)) dst,
4582 FxTOy pk1 pk2 dst dst]
4583 return (Any pk2 code__2)
4586 coerceFP2Int pk fprep x = do
4587 (src, code) <- getSomeReg x
4588 reg <- getNewRegNat fprep
4589 tmp <- getNewRegNat pk
4591 code__2 dst = ASSERT(fprep == F64 || fprep == F32)
4593 FxTOy fprep pk src tmp,
4594 ST pk tmp (spRel (-2)),
4595 LD pk (spRel (-2)) dst]
4596 return (Any pk code__2)
4599 coerceDbl2Flt x = do
4600 (src, code) <- getSomeReg x
4601 return (Any F32 (\dst -> code `snocOL` FxTOy F64 F32 src dst))
4604 coerceFlt2Dbl x = do
4605 (src, code) <- getSomeReg x
4606 return (Any F64 (\dst -> code `snocOL` FxTOy F32 F64 src dst))
4608 #endif /* sparc_TARGET_ARCH */
4610 #if powerpc_TARGET_ARCH
4611 coerceInt2FP fromRep toRep x = do
4612 (src, code) <- getSomeReg x
4613 lbl <- getNewLabelNat
4614 itmp <- getNewRegNat I32
4615 ftmp <- getNewRegNat F64
4616 dynRef <- cmmMakeDynamicReference addImportNat False lbl
4617 Amode addr addr_code <- getAmode dynRef
4619 code' dst = code `appOL` maybe_exts `appOL` toOL [
4622 CmmStaticLit (CmmInt 0x43300000 I32),
4623 CmmStaticLit (CmmInt 0x80000000 I32)],
4624 XORIS itmp src (ImmInt 0x8000),
4625 ST I32 itmp (spRel 3),
4626 LIS itmp (ImmInt 0x4330),
4627 ST I32 itmp (spRel 2),
4628 LD F64 ftmp (spRel 2)
4629 ] `appOL` addr_code `appOL` toOL [
4631 FSUB F64 dst ftmp dst
4632 ] `appOL` maybe_frsp dst
4634 maybe_exts = case fromRep of
4635 I8 -> unitOL $ EXTS I8 src src
4636 I16 -> unitOL $ EXTS I16 src src
4638 maybe_frsp dst = case toRep of
4639 F32 -> unitOL $ FRSP dst dst
4641 return (Any toRep code')
4643 coerceFP2Int fromRep toRep x = do
4644 -- the reps don't really matter: F*->F64 and I32->I* are no-ops
4645 (src, code) <- getSomeReg x
4646 tmp <- getNewRegNat F64
4648 code' dst = code `appOL` toOL [
4649 -- convert to int in FP reg
4651 -- store value (64bit) from FP to stack
4652 ST F64 tmp (spRel 2),
4653 -- read low word of value (high word is undefined)
4654 LD I32 dst (spRel 3)]
4655 return (Any toRep code')
4656 #endif /* powerpc_TARGET_ARCH */
4659 -- -----------------------------------------------------------------------------
4660 -- eXTRA_STK_ARGS_HERE
4662 -- We (allegedly) put the first six C-call arguments in registers;
4663 -- where do we start putting the rest of them?
4665 -- Moved from MachInstrs (SDM):
4667 #if alpha_TARGET_ARCH || sparc_TARGET_ARCH
4668 eXTRA_STK_ARGS_HERE :: Int
4670 = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???))