1 -----------------------------------------------------------------------------
3 -- Generating machine code (instruction selection)
5 -- (c) The University of Glasgow 1996-2004
7 -----------------------------------------------------------------------------
9 -- This is a big module, but, if you pay attention to
10 -- (a) the sectioning, (b) the type signatures, and
11 -- (c) the #if blah_TARGET_ARCH} things, the
12 -- structure should not be too overwhelming.
14 module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where
16 #include "HsVersions.h"
17 #include "nativeGen/NCG.h"
23 import PositionIndependentCode ( cmmMakeDynamicReference, initializePicBase )
25 -- Our intermediate code:
26 import PprCmm ( pprExpr )
32 import StaticFlags ( opt_PIC )
33 import ForeignCall ( CCallConv(..) )
37 import qualified Outputable
39 import FastTypes ( isFastTrue )
40 import Constants ( wORD_SIZE )
43 import Outputable ( assertPanic )
44 import TRACE ( trace )
47 import Control.Monad ( mapAndUnzipM )
48 import Maybe ( fromJust )
52 -- -----------------------------------------------------------------------------
53 -- Top-level of the instruction selector
55 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
56 -- They are really trees of insns to facilitate fast appending, where a
57 -- left-to-right traversal (pre-order?) yields the insns in the correct
60 type InstrBlock = OrdList Instr
62 cmmTopCodeGen :: CmmTop -> NatM [NatCmmTop]
63 cmmTopCodeGen (CmmProc info lab params blocks) = do
64 (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
65 picBaseMb <- getPicBaseMaybeNat
66 let proc = CmmProc info lab params (concat nat_blocks)
67 tops = proc : concat statics
69 Just picBase -> initializePicBase picBase tops
70 Nothing -> return tops
72 cmmTopCodeGen (CmmData sec dat) = do
73 return [CmmData sec dat] -- no translation, we just use CmmStatic
75 basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop])
76 basicBlockCodeGen (BasicBlock id stmts) = do
77 instrs <- stmtsToInstrs stmts
78 -- code generation may introduce new basic block boundaries, which
79 -- are indicated by the NEWBLOCK instruction. We must split up the
80 -- instruction stream into basic blocks again. Also, we extract
83 (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
85 mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
86 = ([], BasicBlock id instrs : blocks, statics)
87 mkBlocks (LDATA sec dat) (instrs,blocks,statics)
88 = (instrs, blocks, CmmData sec dat:statics)
89 mkBlocks instr (instrs,blocks,statics)
90 = (instr:instrs, blocks, statics)
92 return (BasicBlock id top : other_blocks, statics)
94 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
96 = do instrss <- mapM stmtToInstrs stmts
97 return (concatOL instrss)
99 stmtToInstrs :: CmmStmt -> NatM InstrBlock
100 stmtToInstrs stmt = case stmt of
101 CmmNop -> return nilOL
102 CmmComment s -> return (unitOL (COMMENT s))
105 | isFloatingRep kind -> assignReg_FltCode kind reg src
106 #if WORD_SIZE_IN_BITS==32
107 | kind == I64 -> assignReg_I64Code reg src
109 | otherwise -> assignReg_IntCode kind reg src
110 where kind = cmmRegRep reg
113 | isFloatingRep kind -> assignMem_FltCode kind addr src
114 #if WORD_SIZE_IN_BITS==32
115 | kind == I64 -> assignMem_I64Code addr src
117 | otherwise -> assignMem_IntCode kind addr src
118 where kind = cmmExprRep src
120 CmmCall target result_regs args vols
121 -> genCCall target result_regs args vols
123 CmmBranch id -> genBranch id
124 CmmCondBranch arg id -> genCondJump id arg
125 CmmSwitch arg ids -> genSwitch arg ids
126 CmmJump arg params -> genJump arg
128 -- -----------------------------------------------------------------------------
129 -- General things for putting together code sequences
131 -- Expand CmmRegOff. ToDo: should we do it this way around, or convert
132 -- CmmExprs into CmmRegOff?
133 mangleIndexTree :: CmmExpr -> CmmExpr
134 mangleIndexTree (CmmRegOff reg off)
135 = CmmMachOp (MO_Add rep) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) rep)]
136 where rep = cmmRegRep reg
138 -- -----------------------------------------------------------------------------
139 -- Code gen for 64-bit arithmetic on 32-bit platforms
142 Simple support for generating 64-bit code (ie, 64 bit values and 64
143 bit assignments) on 32-bit platforms. Unlike the main code generator
144 we merely shoot for generating working code as simply as possible, and
145 pay little attention to code quality. Specifically, there is no
146 attempt to deal cleverly with the fixed-vs-floating register
147 distinction; all values are generated into (pairs of) floating
148 registers, even if this would mean some redundant reg-reg moves as a
149 result. Only one of the VRegUniques is returned, since it will be
150 of the VRegUniqueLo form, and the upper-half VReg can be determined
151 by applying getHiVRegFromLo to it.
154 data ChildCode64 -- a.k.a "Register64"
157 Reg -- the lower 32-bit temporary which contains the
158 -- result; use getHiVRegFromLo to find the other
159 -- VRegUnique. Rules of this simplified insn
160 -- selection game are therefore that the returned
161 -- Reg may be modified
163 #if WORD_SIZE_IN_BITS==32
164 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
165 assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
168 #ifndef x86_64_TARGET_ARCH
169 iselExpr64 :: CmmExpr -> NatM ChildCode64
172 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
176 assignMem_I64Code addrTree valueTree = do
177 Amode addr addr_code <- getAmode addrTree
178 ChildCode64 vcode rlo <- iselExpr64 valueTree
180 rhi = getHiVRegFromLo rlo
182 -- Little-endian store
183 mov_lo = MOV I32 (OpReg rlo) (OpAddr addr)
184 mov_hi = MOV I32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
186 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
189 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
190 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
192 r_dst_lo = mkVReg u_dst I32
193 r_dst_hi = getHiVRegFromLo r_dst_lo
194 r_src_hi = getHiVRegFromLo r_src_lo
195 mov_lo = MOV I32 (OpReg r_src_lo) (OpReg r_dst_lo)
196 mov_hi = MOV I32 (OpReg r_src_hi) (OpReg r_dst_hi)
199 vcode `snocOL` mov_lo `snocOL` mov_hi
202 assignReg_I64Code lvalue valueTree
203 = panic "assignReg_I64Code(i386): invalid lvalue"
207 iselExpr64 (CmmLit (CmmInt i _)) = do
208 (rlo,rhi) <- getNewRegPairNat I32
210 r = fromIntegral (fromIntegral i :: Word32)
211 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
213 MOV I32 (OpImm (ImmInteger r)) (OpReg rlo),
214 MOV I32 (OpImm (ImmInteger q)) (OpReg rhi)
217 return (ChildCode64 code rlo)
219 iselExpr64 (CmmLoad addrTree I64) = do
220 Amode addr addr_code <- getAmode addrTree
221 (rlo,rhi) <- getNewRegPairNat I32
223 mov_lo = MOV I32 (OpAddr addr) (OpReg rlo)
224 mov_hi = MOV I32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
227 ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
231 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64)))
232 = return (ChildCode64 nilOL (mkVReg vu I32))
234 -- we handle addition, but rather badly
235 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
236 ChildCode64 code1 r1lo <- iselExpr64 e1
237 (rlo,rhi) <- getNewRegPairNat I32
239 r = fromIntegral (fromIntegral i :: Word32)
240 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
241 r1hi = getHiVRegFromLo r1lo
243 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
244 ADD I32 (OpImm (ImmInteger r)) (OpReg rlo),
245 MOV I32 (OpReg r1hi) (OpReg rhi),
246 ADC I32 (OpImm (ImmInteger q)) (OpReg rhi) ]
248 return (ChildCode64 code rlo)
250 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
251 ChildCode64 code1 r1lo <- iselExpr64 e1
252 ChildCode64 code2 r2lo <- iselExpr64 e2
253 (rlo,rhi) <- getNewRegPairNat I32
255 r1hi = getHiVRegFromLo r1lo
256 r2hi = getHiVRegFromLo r2lo
259 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
260 ADD I32 (OpReg r2lo) (OpReg rlo),
261 MOV I32 (OpReg r1hi) (OpReg rhi),
262 ADC I32 (OpReg r2hi) (OpReg rhi) ]
264 return (ChildCode64 code rlo)
267 = pprPanic "iselExpr64(i386)" (ppr expr)
269 #endif /* i386_TARGET_ARCH */
271 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
273 #if sparc_TARGET_ARCH
275 assignMem_I64Code addrTree valueTree
276 = iselExpr64 valueTree `thenNat` \ (ChildCode64 vcode vrlo) ->
277 getRegister addrTree `thenNat` \ register_addr ->
278 getNewRegNat IntRep `thenNat` \ t_addr ->
279 let rlo = VirtualRegI vrlo
280 rhi = getHiVRegFromLo rlo
281 code_addr = registerCode register_addr t_addr
282 reg_addr = registerName register_addr t_addr
284 mov_hi = ST W rhi (AddrRegImm reg_addr (ImmInt 0))
285 mov_lo = ST W rlo (AddrRegImm reg_addr (ImmInt 4))
287 return (vcode `appOL` code_addr `snocOL` mov_hi `snocOL` mov_lo)
290 assignReg_I64Code (StixTemp (StixVReg u_dst pk)) valueTree
291 = iselExpr64 valueTree `thenNat` \ (ChildCode64 vcode vr_src_lo) ->
293 r_dst_lo = mkVReg u_dst IntRep
294 r_src_lo = VirtualRegI vr_src_lo
295 r_dst_hi = getHiVRegFromLo r_dst_lo
296 r_src_hi = getHiVRegFromLo r_src_lo
297 mov_lo = mkMOV r_src_lo r_dst_lo
298 mov_hi = mkMOV r_src_hi r_dst_hi
299 mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
302 vcode `snocOL` mov_hi `snocOL` mov_lo
304 assignReg_I64Code lvalue valueTree
305 = pprPanic "assignReg_I64Code(sparc): invalid lvalue"
309 -- Don't delete this -- it's very handy for debugging.
311 -- | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
312 -- = panic "iselExpr64(???)"
314 iselExpr64 (CmmLoad I64 addrTree)
315 = getRegister addrTree `thenNat` \ register_addr ->
316 getNewRegNat IntRep `thenNat` \ t_addr ->
317 getNewRegNat IntRep `thenNat` \ rlo ->
318 let rhi = getHiVRegFromLo rlo
319 code_addr = registerCode register_addr t_addr
320 reg_addr = registerName register_addr t_addr
321 mov_hi = LD W (AddrRegImm reg_addr (ImmInt 0)) rhi
322 mov_lo = LD W (AddrRegImm reg_addr (ImmInt 4)) rlo
325 ChildCode64 (code_addr `snocOL` mov_hi `snocOL` mov_lo)
329 iselExpr64 (CmmReg (CmmLocal (LocalReg uq I64)))
330 = getNewRegNat IntRep `thenNat` \ r_dst_lo ->
331 let r_dst_hi = getHiVRegFromLo r_dst_lo
332 r_src_lo = mkVReg vu IntRep
333 r_src_hi = getHiVRegFromLo r_src_lo
334 mov_lo = mkMOV r_src_lo r_dst_lo
335 mov_hi = mkMOV r_src_hi r_dst_hi
336 mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
339 ChildCode64 (toOL [mov_hi, mov_lo]) (getVRegUnique r_dst_lo)
342 iselExpr64 (StCall fn cconv I64 args)
343 = genCCall fn cconv kind args `thenNat` \ call ->
344 getNewRegNat IntRep `thenNat` \ r_dst_lo ->
345 let r_dst_hi = getHiVRegFromLo r_dst_lo
346 mov_lo = mkMOV o0 r_dst_lo
347 mov_hi = mkMOV o1 r_dst_hi
348 mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
351 ChildCode64 (call `snocOL` mov_hi `snocOL` mov_lo)
352 (getVRegUnique r_dst_lo)
356 = pprPanic "iselExpr64(sparc)" (pprCmmExpr expr)
358 #endif /* sparc_TARGET_ARCH */
360 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
362 #if powerpc_TARGET_ARCH
364 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
365 getI64Amodes addrTree = do
366 Amode hi_addr addr_code <- getAmode addrTree
367 case addrOffset hi_addr 4 of
368 Just lo_addr -> return (hi_addr, lo_addr, addr_code)
369 Nothing -> do (hi_ptr, code) <- getSomeReg addrTree
370 return (AddrRegImm hi_ptr (ImmInt 0),
371 AddrRegImm hi_ptr (ImmInt 4),
374 assignMem_I64Code addrTree valueTree = do
375 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
376 ChildCode64 vcode rlo <- iselExpr64 valueTree
378 rhi = getHiVRegFromLo rlo
381 mov_hi = ST I32 rhi hi_addr
382 mov_lo = ST I32 rlo lo_addr
384 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
386 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
387 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
389 r_dst_lo = mkVReg u_dst I32
390 r_dst_hi = getHiVRegFromLo r_dst_lo
391 r_src_hi = getHiVRegFromLo r_src_lo
392 mov_lo = MR r_dst_lo r_src_lo
393 mov_hi = MR r_dst_hi r_src_hi
396 vcode `snocOL` mov_lo `snocOL` mov_hi
399 assignReg_I64Code lvalue valueTree
400 = panic "assignReg_I64Code(powerpc): invalid lvalue"
403 -- Don't delete this -- it's very handy for debugging.
405 -- | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
406 -- = panic "iselExpr64(???)"
408 iselExpr64 (CmmLoad addrTree I64) = do
409 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
410 (rlo, rhi) <- getNewRegPairNat I32
411 let mov_hi = LD I32 rhi hi_addr
412 mov_lo = LD I32 rlo lo_addr
413 return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
416 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64)))
417 = return (ChildCode64 nilOL (mkVReg vu I32))
419 iselExpr64 (CmmLit (CmmInt i _)) = do
420 (rlo,rhi) <- getNewRegPairNat I32
422 half0 = fromIntegral (fromIntegral i :: Word16)
423 half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
424 half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
425 half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
428 LIS rlo (ImmInt half1),
429 OR rlo rlo (RIImm $ ImmInt half0),
430 LIS rhi (ImmInt half3),
431 OR rlo rlo (RIImm $ ImmInt half2)
434 return (ChildCode64 code rlo)
436 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
437 ChildCode64 code1 r1lo <- iselExpr64 e1
438 ChildCode64 code2 r2lo <- iselExpr64 e2
439 (rlo,rhi) <- getNewRegPairNat I32
441 r1hi = getHiVRegFromLo r1lo
442 r2hi = getHiVRegFromLo r2lo
445 toOL [ ADDC rlo r1lo r2lo,
448 return (ChildCode64 code rlo)
451 = pprPanic "iselExpr64(powerpc)" (ppr expr)
453 #endif /* powerpc_TARGET_ARCH */
456 -- -----------------------------------------------------------------------------
457 -- The 'Register' type
459 -- 'Register's passed up the tree. If the stix code forces the register
460 -- to live in a pre-decided machine register, it comes out as @Fixed@;
461 -- otherwise, it comes out as @Any@, and the parent can decide which
462 -- register to put it in.
465 = Fixed MachRep Reg InstrBlock
466 | Any MachRep (Reg -> InstrBlock)
468 swizzleRegisterRep :: Register -> MachRep -> Register
469 swizzleRegisterRep (Fixed _ reg code) rep = Fixed rep reg code
470 swizzleRegisterRep (Any _ codefn) rep = Any rep codefn
473 -- -----------------------------------------------------------------------------
474 -- Utils based on getRegister, below
476 -- The dual to getAnyReg: compute an expression into a register, but
477 -- we don't mind which one it is.
478 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
480 r <- getRegister expr
483 tmp <- getNewRegNat rep
484 return (tmp, code tmp)
488 -- -----------------------------------------------------------------------------
489 -- Grab the Reg for a CmmReg
491 getRegisterReg :: CmmReg -> Reg
493 getRegisterReg (CmmLocal (LocalReg u pk))
496 getRegisterReg (CmmGlobal mid)
497 = case get_GlobalReg_reg_or_addr mid of
498 Left (RealReg rrno) -> RealReg rrno
499 _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
500 -- By this stage, the only MagicIds remaining should be the
501 -- ones which map to a real machine register on this
502 -- platform. Hence ...
505 -- -----------------------------------------------------------------------------
506 -- Generate code to get a subtree into a Register
508 -- Don't delete this -- it's very handy for debugging.
510 -- | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False
511 -- = panic "getRegister(???)"
513 getRegister :: CmmExpr -> NatM Register
515 getRegister (CmmReg reg)
516 = return (Fixed (cmmRegRep reg) (getRegisterReg reg) nilOL)
518 getRegister tree@(CmmRegOff _ _)
519 = getRegister (mangleIndexTree tree)
521 getRegister CmmPicBaseReg
523 reg <- getPicBaseNat wordRep
524 return (Fixed wordRep reg nilOL)
526 -- end of machine-"independent" bit; here we go on the rest...
528 #if alpha_TARGET_ARCH
530 getRegister (StDouble d)
531 = getBlockIdNat `thenNat` \ lbl ->
532 getNewRegNat PtrRep `thenNat` \ tmp ->
533 let code dst = mkSeqInstrs [
534 LDATA RoDataSegment lbl [
535 DATA TF [ImmLab (rational d)]
537 LDA tmp (AddrImm (ImmCLbl lbl)),
538 LD TF dst (AddrReg tmp)]
540 return (Any F64 code)
542 getRegister (StPrim primop [x]) -- unary PrimOps
544 IntNegOp -> trivialUCode (NEG Q False) x
546 NotOp -> trivialUCode NOT x
548 FloatNegOp -> trivialUFCode FloatRep (FNEG TF) x
549 DoubleNegOp -> trivialUFCode F64 (FNEG TF) x
551 OrdOp -> coerceIntCode IntRep x
554 Float2IntOp -> coerceFP2Int x
555 Int2FloatOp -> coerceInt2FP pr x
556 Double2IntOp -> coerceFP2Int x
557 Int2DoubleOp -> coerceInt2FP pr x
559 Double2FloatOp -> coerceFltCode x
560 Float2DoubleOp -> coerceFltCode x
562 other_op -> getRegister (StCall fn CCallConv F64 [x])
564 fn = case other_op of
565 FloatExpOp -> FSLIT("exp")
566 FloatLogOp -> FSLIT("log")
567 FloatSqrtOp -> FSLIT("sqrt")
568 FloatSinOp -> FSLIT("sin")
569 FloatCosOp -> FSLIT("cos")
570 FloatTanOp -> FSLIT("tan")
571 FloatAsinOp -> FSLIT("asin")
572 FloatAcosOp -> FSLIT("acos")
573 FloatAtanOp -> FSLIT("atan")
574 FloatSinhOp -> FSLIT("sinh")
575 FloatCoshOp -> FSLIT("cosh")
576 FloatTanhOp -> FSLIT("tanh")
577 DoubleExpOp -> FSLIT("exp")
578 DoubleLogOp -> FSLIT("log")
579 DoubleSqrtOp -> FSLIT("sqrt")
580 DoubleSinOp -> FSLIT("sin")
581 DoubleCosOp -> FSLIT("cos")
582 DoubleTanOp -> FSLIT("tan")
583 DoubleAsinOp -> FSLIT("asin")
584 DoubleAcosOp -> FSLIT("acos")
585 DoubleAtanOp -> FSLIT("atan")
586 DoubleSinhOp -> FSLIT("sinh")
587 DoubleCoshOp -> FSLIT("cosh")
588 DoubleTanhOp -> FSLIT("tanh")
590 pr = panic "MachCode.getRegister: no primrep needed for Alpha"
592 getRegister (StPrim primop [x, y]) -- dyadic PrimOps
594 CharGtOp -> trivialCode (CMP LTT) y x
595 CharGeOp -> trivialCode (CMP LE) y x
596 CharEqOp -> trivialCode (CMP EQQ) x y
597 CharNeOp -> int_NE_code x y
598 CharLtOp -> trivialCode (CMP LTT) x y
599 CharLeOp -> trivialCode (CMP LE) x y
601 IntGtOp -> trivialCode (CMP LTT) y x
602 IntGeOp -> trivialCode (CMP LE) y x
603 IntEqOp -> trivialCode (CMP EQQ) x y
604 IntNeOp -> int_NE_code x y
605 IntLtOp -> trivialCode (CMP LTT) x y
606 IntLeOp -> trivialCode (CMP LE) x y
608 WordGtOp -> trivialCode (CMP ULT) y x
609 WordGeOp -> trivialCode (CMP ULE) x y
610 WordEqOp -> trivialCode (CMP EQQ) x y
611 WordNeOp -> int_NE_code x y
612 WordLtOp -> trivialCode (CMP ULT) x y
613 WordLeOp -> trivialCode (CMP ULE) x y
615 AddrGtOp -> trivialCode (CMP ULT) y x
616 AddrGeOp -> trivialCode (CMP ULE) y x
617 AddrEqOp -> trivialCode (CMP EQQ) x y
618 AddrNeOp -> int_NE_code x y
619 AddrLtOp -> trivialCode (CMP ULT) x y
620 AddrLeOp -> trivialCode (CMP ULE) x y
622 FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
623 FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
624 FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
625 FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
626 FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
627 FloatLeOp -> cmpF_code (FCMP TF LE) NE x y
629 DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
630 DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
631 DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
632 DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
633 DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
634 DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y
636 IntAddOp -> trivialCode (ADD Q False) x y
637 IntSubOp -> trivialCode (SUB Q False) x y
638 IntMulOp -> trivialCode (MUL Q False) x y
639 IntQuotOp -> trivialCode (DIV Q False) x y
640 IntRemOp -> trivialCode (REM Q False) x y
642 WordAddOp -> trivialCode (ADD Q False) x y
643 WordSubOp -> trivialCode (SUB Q False) x y
644 WordMulOp -> trivialCode (MUL Q False) x y
645 WordQuotOp -> trivialCode (DIV Q True) x y
646 WordRemOp -> trivialCode (REM Q True) x y
648 FloatAddOp -> trivialFCode FloatRep (FADD TF) x y
649 FloatSubOp -> trivialFCode FloatRep (FSUB TF) x y
650 FloatMulOp -> trivialFCode FloatRep (FMUL TF) x y
651 FloatDivOp -> trivialFCode FloatRep (FDIV TF) x y
653 DoubleAddOp -> trivialFCode F64 (FADD TF) x y
654 DoubleSubOp -> trivialFCode F64 (FSUB TF) x y
655 DoubleMulOp -> trivialFCode F64 (FMUL TF) x y
656 DoubleDivOp -> trivialFCode F64 (FDIV TF) x y
658 AddrAddOp -> trivialCode (ADD Q False) x y
659 AddrSubOp -> trivialCode (SUB Q False) x y
660 AddrRemOp -> trivialCode (REM Q True) x y
662 AndOp -> trivialCode AND x y
663 OrOp -> trivialCode OR x y
664 XorOp -> trivialCode XOR x y
665 SllOp -> trivialCode SLL x y
666 SrlOp -> trivialCode SRL x y
668 ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
669 ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
670 ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"
672 FloatPowerOp -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
673 DoublePowerOp -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
675 {- ------------------------------------------------------------
676 Some bizarre special code for getting condition codes into
677 registers. Integer non-equality is a test for equality
678 followed by an XOR with 1. (Integer comparisons always set
679 the result register to 0 or 1.) Floating point comparisons of
680 any kind leave the result in a floating point register, so we
681 need to wrangle an integer register out of things.
683 int_NE_code :: StixTree -> StixTree -> NatM Register
686 = trivialCode (CMP EQQ) x y `thenNat` \ register ->
687 getNewRegNat IntRep `thenNat` \ tmp ->
689 code = registerCode register tmp
690 src = registerName register tmp
691 code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
693 return (Any IntRep code__2)
695 {- ------------------------------------------------------------
696 Comments for int_NE_code also apply to cmpF_code
699 :: (Reg -> Reg -> Reg -> Instr)
701 -> StixTree -> StixTree
704 cmpF_code instr cond x y
705 = trivialFCode pr instr x y `thenNat` \ register ->
706 getNewRegNat F64 `thenNat` \ tmp ->
707 getBlockIdNat `thenNat` \ lbl ->
709 code = registerCode register tmp
710 result = registerName register tmp
712 code__2 dst = code . mkSeqInstrs [
713 OR zeroh (RIImm (ImmInt 1)) dst,
714 BF cond result (ImmCLbl lbl),
715 OR zeroh (RIReg zeroh) dst,
718 return (Any IntRep code__2)
720 pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
721 ------------------------------------------------------------
723 getRegister (CmmLoad pk mem)
724 = getAmode mem `thenNat` \ amode ->
726 code = amodeCode amode
727 src = amodeAddr amode
728 size = primRepToSize pk
729 code__2 dst = code . mkSeqInstr (LD size dst src)
731 return (Any pk code__2)
733 getRegister (StInt i)
736 code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
738 return (Any IntRep code)
741 code dst = mkSeqInstr (LDI Q dst src)
743 return (Any IntRep code)
745 src = ImmInt (fromInteger i)
750 code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
752 return (Any PtrRep code)
755 imm__2 = case imm of Just x -> x
757 #endif /* alpha_TARGET_ARCH */
759 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
763 getRegister (CmmLit (CmmFloat f F32)) = do
764 lbl <- getNewLabelNat
765 let code dst = toOL [
768 CmmStaticLit (CmmFloat f F32)],
769 GLD F32 (ImmAddr (ImmCLbl lbl) 0) dst
772 return (Any F32 code)
775 getRegister (CmmLit (CmmFloat d F64))
777 = let code dst = unitOL (GLDZ dst)
778 in return (Any F64 code)
781 = let code dst = unitOL (GLD1 dst)
782 in return (Any F64 code)
785 lbl <- getNewLabelNat
786 let code dst = toOL [
789 CmmStaticLit (CmmFloat d F64)],
790 GLD F64 (ImmAddr (ImmCLbl lbl) 0) dst
793 return (Any F64 code)
795 #endif /* i386_TARGET_ARCH */
797 #if x86_64_TARGET_ARCH
799 getRegister (CmmLit (CmmFloat 0.0 rep)) = do
800 let code dst = unitOL (XOR rep (OpReg dst) (OpReg dst))
801 -- I don't know why there are xorpd, xorps, and pxor instructions.
802 -- They all appear to do the same thing --SDM
803 return (Any rep code)
805 getRegister (CmmLit (CmmFloat f rep)) = do
806 lbl <- getNewLabelNat
807 let code dst = toOL [
810 CmmStaticLit (CmmFloat f rep)],
811 MOV rep (OpAddr (ImmAddr (ImmCLbl lbl) 0)) (OpReg dst)
812 -- ToDo: should use %rip-relative
815 return (Any rep code)
817 #endif /* x86_64_TARGET_ARCH */
819 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
821 -- catch simple cases of zero- or sign-extended load
822 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad addr _]) = do
823 code <- intLoadCode (MOVZxL I8) addr
824 return (Any I32 code)
826 getRegister (CmmMachOp (MO_S_Conv I8 I32) [CmmLoad addr _]) = do
827 code <- intLoadCode (MOVSxL I8) addr
828 return (Any I32 code)
830 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad addr _]) = do
831 code <- intLoadCode (MOVZxL I16) addr
832 return (Any I32 code)
834 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad addr _]) = do
835 code <- intLoadCode (MOVSxL I16) addr
836 return (Any I32 code)
840 #if x86_64_TARGET_ARCH
842 -- catch simple cases of zero- or sign-extended load
843 getRegister (CmmMachOp (MO_U_Conv I8 I64) [CmmLoad addr _]) = do
844 code <- intLoadCode (MOVZxL I8) addr
845 return (Any I64 code)
847 getRegister (CmmMachOp (MO_S_Conv I8 I64) [CmmLoad addr _]) = do
848 code <- intLoadCode (MOVSxL I8) addr
849 return (Any I64 code)
851 getRegister (CmmMachOp (MO_U_Conv I16 I64) [CmmLoad addr _]) = do
852 code <- intLoadCode (MOVZxL I16) addr
853 return (Any I64 code)
855 getRegister (CmmMachOp (MO_S_Conv I16 I64) [CmmLoad addr _]) = do
856 code <- intLoadCode (MOVSxL I16) addr
857 return (Any I64 code)
859 getRegister (CmmMachOp (MO_U_Conv I32 I64) [CmmLoad addr _]) = do
860 code <- intLoadCode (MOV I32) addr -- 32-bit loads zero-extend
861 return (Any I64 code)
863 getRegister (CmmMachOp (MO_S_Conv I32 I64) [CmmLoad addr _]) = do
864 code <- intLoadCode (MOVSxL I32) addr
865 return (Any I64 code)
869 #if x86_64_TARGET_ARCH
870 getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do
871 lbl <- getNewLabelNat
874 -- This is how gcc does it, so it can't be that bad:
875 LDATA ReadOnlyData16 [
878 CmmStaticLit (CmmInt 0x80000000 I32),
879 CmmStaticLit (CmmInt 0 I32),
880 CmmStaticLit (CmmInt 0 I32),
881 CmmStaticLit (CmmInt 0 I32)
883 XOR F32 (OpAddr (ImmAddr (ImmCLbl lbl) 0)) (OpReg dst)
884 -- xorps, so we need the 128-bit constant
885 -- ToDo: rip-relative
888 return (Any F32 code)
890 getRegister (CmmMachOp (MO_S_Neg F64) [x]) = do
891 lbl <- getNewLabelNat
893 -- This is how gcc does it, so it can't be that bad:
895 LDATA ReadOnlyData16 [
898 CmmStaticLit (CmmInt 0x8000000000000000 I64),
899 CmmStaticLit (CmmInt 0 I64)
901 -- gcc puts an unpck here. Wonder if we need it.
902 XOR F64 (OpAddr (ImmAddr (ImmCLbl lbl) 0)) (OpReg dst)
903 -- xorpd, so we need the 128-bit constant
904 -- ToDo: rip-relative
907 return (Any F64 code)
910 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
912 getRegister (CmmMachOp mop [x]) -- unary MachOps
915 MO_S_Neg F32 -> trivialUFCode F32 (GNEG F32) x
916 MO_S_Neg F64 -> trivialUFCode F64 (GNEG F64) x
919 MO_S_Neg rep -> trivialUCode rep (NEGI rep) x
920 MO_Not rep -> trivialUCode rep (NOT rep) x
923 -- TODO: these are only nops if the arg is not a fixed register that
924 -- can't be byte-addressed.
925 MO_U_Conv I32 I8 -> conversionNop I32 x
926 MO_S_Conv I32 I8 -> conversionNop I32 x
927 MO_U_Conv I16 I8 -> conversionNop I16 x
928 MO_S_Conv I16 I8 -> conversionNop I16 x
929 MO_U_Conv I32 I16 -> conversionNop I32 x
930 MO_S_Conv I32 I16 -> conversionNop I32 x
931 #if x86_64_TARGET_ARCH
932 MO_U_Conv I64 I32 -> conversionNop I64 x
933 MO_S_Conv I64 I32 -> conversionNop I64 x
934 MO_U_Conv I64 I16 -> conversionNop I64 x
935 MO_S_Conv I64 I16 -> conversionNop I64 x
936 MO_U_Conv I64 I8 -> conversionNop I64 x
937 MO_S_Conv I64 I8 -> conversionNop I64 x
940 MO_U_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
941 MO_S_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
944 MO_U_Conv I8 I32 -> integerExtend I8 I32 MOVZxL x
945 MO_U_Conv I16 I32 -> integerExtend I16 I32 MOVZxL x
946 MO_U_Conv I8 I16 -> integerExtend I8 I16 MOVZxL x
948 MO_S_Conv I8 I32 -> integerExtend I8 I32 MOVSxL x
949 MO_S_Conv I16 I32 -> integerExtend I16 I32 MOVSxL x
950 MO_S_Conv I8 I16 -> integerExtend I8 I16 MOVSxL x
952 #if x86_64_TARGET_ARCH
953 MO_U_Conv I8 I64 -> integerExtend I8 I64 MOVZxL x
954 MO_U_Conv I16 I64 -> integerExtend I16 I64 MOVZxL x
955 MO_U_Conv I32 I64 -> integerExtend I32 I64 MOVZxL x
956 MO_S_Conv I8 I64 -> integerExtend I8 I64 MOVSxL x
957 MO_S_Conv I16 I64 -> integerExtend I16 I64 MOVSxL x
958 MO_S_Conv I32 I64 -> integerExtend I32 I64 MOVSxL x
959 -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
960 -- However, we don't want the register allocator to throw it
961 -- away as an unnecessary reg-to-reg move, so we keep it in
962 -- the form of a movzl and print it as a movl later.
966 MO_S_Conv F32 F64 -> conversionNop F64 x
967 MO_S_Conv F64 F32 -> conversionNop F32 x
969 MO_S_Conv F32 F64 -> coerceFP2FP F64 x
970 MO_S_Conv F64 F32 -> coerceFP2FP F32 x
974 | isFloatingRep from -> coerceFP2Int from to x
975 | isFloatingRep to -> coerceInt2FP from to x
977 other -> pprPanic "getRegister" (pprMachOp mop)
979 -- signed or unsigned extension.
980 integerExtend from to instr expr = do
981 (reg,e_code) <- if from == I8 then getByteReg expr
986 instr from (OpReg reg) (OpReg dst)
989 conversionNop new_rep expr
990 = do e_code <- getRegister expr
991 return (swizzleRegisterRep e_code new_rep)
994 getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
995 = ASSERT2(cmmExprRep x /= I8, pprExpr e)
997 MO_Eq F32 -> condFltReg EQQ x y
998 MO_Ne F32 -> condFltReg NE x y
999 MO_S_Gt F32 -> condFltReg GTT x y
1000 MO_S_Ge F32 -> condFltReg GE x y
1001 MO_S_Lt F32 -> condFltReg LTT x y
1002 MO_S_Le F32 -> condFltReg LE x y
1004 MO_Eq F64 -> condFltReg EQQ x y
1005 MO_Ne F64 -> condFltReg NE x y
1006 MO_S_Gt F64 -> condFltReg GTT x y
1007 MO_S_Ge F64 -> condFltReg GE x y
1008 MO_S_Lt F64 -> condFltReg LTT x y
1009 MO_S_Le F64 -> condFltReg LE x y
1011 MO_Eq rep -> condIntReg EQQ x y
1012 MO_Ne rep -> condIntReg NE x y
1014 MO_S_Gt rep -> condIntReg GTT x y
1015 MO_S_Ge rep -> condIntReg GE x y
1016 MO_S_Lt rep -> condIntReg LTT x y
1017 MO_S_Le rep -> condIntReg LE x y
1019 MO_U_Gt rep -> condIntReg GU x y
1020 MO_U_Ge rep -> condIntReg GEU x y
1021 MO_U_Lt rep -> condIntReg LU x y
1022 MO_U_Le rep -> condIntReg LEU x y
1024 #if i386_TARGET_ARCH
1025 MO_Add F32 -> trivialFCode F32 GADD x y
1026 MO_Sub F32 -> trivialFCode F32 GSUB x y
1028 MO_Add F64 -> trivialFCode F64 GADD x y
1029 MO_Sub F64 -> trivialFCode F64 GSUB x y
1031 MO_S_Quot F32 -> trivialFCode F32 GDIV x y
1032 MO_S_Quot F64 -> trivialFCode F64 GDIV x y
1035 #if x86_64_TARGET_ARCH
1036 MO_Add F32 -> trivialFCode F32 ADD x y
1037 MO_Sub F32 -> trivialFCode F32 SUB x y
1039 MO_Add F64 -> trivialFCode F64 ADD x y
1040 MO_Sub F64 -> trivialFCode F64 SUB x y
1042 MO_S_Quot F32 -> trivialFCode F32 FDIV x y
1043 MO_S_Quot F64 -> trivialFCode F64 FDIV x y
1046 MO_Add rep -> add_code rep x y
1047 MO_Sub rep -> sub_code rep x y
1049 MO_S_Quot rep -> div_code rep True True x y
1050 MO_S_Rem rep -> div_code rep True False x y
1051 MO_U_Quot rep -> div_code rep False True x y
1052 MO_U_Rem rep -> div_code rep False False x y
1054 #if i386_TARGET_ARCH
1055 MO_Mul F32 -> trivialFCode F32 GMUL x y
1056 MO_Mul F64 -> trivialFCode F64 GMUL x y
1059 #if x86_64_TARGET_ARCH
1060 MO_Mul F32 -> trivialFCode F32 MUL x y
1061 MO_Mul F64 -> trivialFCode F64 MUL x y
1064 MO_Mul rep -> let op = IMUL rep in
1065 trivialCode rep op (Just op) x y
1067 MO_S_MulMayOflo rep -> imulMayOflo rep x y
1069 MO_And rep -> let op = AND rep in
1070 trivialCode rep op (Just op) x y
1071 MO_Or rep -> let op = OR rep in
1072 trivialCode rep op (Just op) x y
1073 MO_Xor rep -> let op = XOR rep in
1074 trivialCode rep op (Just op) x y
1076 {- Shift ops on x86s have constraints on their source, it
1077 either has to be Imm, CL or 1
1078 => trivialCode is not restrictive enough (sigh.)
1080 MO_Shl rep -> shift_code rep (SHL rep) x y {-False-}
1081 MO_U_Shr rep -> shift_code rep (SHR rep) x y {-False-}
1082 MO_S_Shr rep -> shift_code rep (SAR rep) x y {-False-}
1084 other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
1086 --------------------
1087 imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1088 imulMayOflo rep a b = do
1089 res_lo <- getNewRegNat rep
1090 res_hi <- getNewRegNat rep
1091 (a_reg, a_code) <- getNonClobberedReg a
1092 (b_reg, b_code) <- getSomeReg b
1094 code dst = a_code `appOL` b_code `appOL`
1096 MOV rep (OpReg a_reg) (OpReg res_hi),
1097 MOV rep (OpReg b_reg) (OpReg res_lo),
1098 IMUL64 res_hi res_lo, -- result in res_hi:res_lo
1099 SAR rep (OpImm (ImmInt 31)) (OpReg res_lo), -- sign extend lower part
1100 SUB rep (OpReg res_hi) (OpReg res_lo), -- compare against upper
1101 MOV rep (OpReg res_lo) (OpReg dst)
1102 -- dst==0 if high part == sign extended low part
1105 return (Any rep code)
1107 --------------------
1108 shift_code :: MachRep
1109 -> (Operand -> Operand -> Instr)
1114 {- Case1: shift length as immediate -}
1115 shift_code rep instr x y@(CmmLit lit) = do
1116 x_code <- getAnyReg x
1119 = x_code dst `snocOL`
1120 instr (OpImm (litToImm lit)) (OpReg dst)
1122 return (Any rep code)
1124 {- Case2: shift length is complex (non-immediate) -}
1125 shift_code rep instr x y{-amount-} = do
1126 (x_reg, x_code) <- getNonClobberedReg x
1127 y_code <- getAnyReg y
1129 code = x_code `appOL`
1131 instr (OpReg ecx) (OpReg x_reg)
1133 return (Fixed rep x_reg code)
1135 --------------------
1136 add_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1137 add_code rep x (CmmLit (CmmInt y _)) = add_int rep x y
1138 add_code rep x y = trivialCode rep (ADD rep) (Just (ADD rep)) x y
1140 --------------------
1141 sub_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1142 sub_code rep x (CmmLit (CmmInt y _)) = add_int rep x (-y)
1143 sub_code rep x y = trivialCode rep (SUB rep) Nothing x y
1145 -- our three-operand add instruction:
1146 add_int rep x y = do
1147 (x_reg, x_code) <- getSomeReg x
1149 imm = ImmInt (fromInteger y)
1153 (OpAddr (AddrBaseIndex (Just x_reg) Nothing imm))
1156 return (Any rep code)
1158 ----------------------
1159 div_code rep signed quotient x y = do
1160 (y_op, y_code) <- getOperand y -- cannot be clobbered
1161 x_code <- getAnyReg x
1163 widen | signed = CLTD rep
1164 | otherwise = XOR rep (OpReg edx) (OpReg edx)
1166 instr | signed = IDIV
1169 code = y_code `appOL`
1171 toOL [widen, instr rep y_op]
1173 result | quotient = eax
1177 return (Fixed rep result code)
1180 getRegister (CmmLoad mem pk)
1183 Amode src mem_code <- getAmode mem
1185 code dst = mem_code `snocOL`
1186 IF_ARCH_i386(GLD pk src dst,
1187 MOV pk (OpAddr src) (OpReg dst))
1189 return (Any pk code)
1191 #if i386_TARGET_ARCH
1192 getRegister (CmmLoad mem pk)
1195 code <- intLoadCode (instr pk) mem
1196 return (Any pk code)
1198 instr I8 = MOVZxL pk
1201 -- we always zero-extend 8-bit loads, if we
1202 -- can't think of anything better. This is because
1203 -- we can't guarantee access to an 8-bit variant of every register
1204 -- (esi and edi don't have 8-bit variants), so to make things
1205 -- simpler we do our 8-bit arithmetic with full 32-bit registers.
1208 #if x86_64_TARGET_ARCH
1209 -- Simpler memory load code on x86_64
1210 getRegister (CmmLoad mem pk)
1212 code <- intLoadCode (MOV pk) mem
1213 return (Any pk code)
1216 getRegister (CmmLit (CmmInt 0 rep))
1219 = unitOL (XOR rep (OpReg dst) (OpReg dst))
1221 return (Any rep code)
1223 getRegister (CmmLit lit)
1227 code dst = unitOL (MOV rep (OpImm imm) (OpReg dst))
1229 return (Any rep code)
1231 getRegister other = panic "getRegister(x86)"
1234 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
1235 -> NatM (Reg -> InstrBlock)
1236 intLoadCode instr mem = do
1237 Amode src mem_code <- getAmode mem
1238 return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
1240 -- Compute an expression into *any* register, adding the appropriate
1241 -- move instruction if necessary.
1242 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
1244 r <- getRegister expr
1247 anyReg :: Register -> NatM (Reg -> InstrBlock)
1248 anyReg (Any _ code) = return code
1249 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
1251 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
1252 -- Fixed registers might not be byte-addressable, so we make sure we've
1253 -- got a temporary, inserting an extra reg copy if necessary.
1254 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
1255 #if x86_64_TARGET_ARCH
1256 getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
1258 getByteReg expr = do
1259 r <- getRegister expr
1262 tmp <- getNewRegNat rep
1263 return (tmp, code tmp)
1265 | isVirtualReg reg -> return (reg,code)
1267 tmp <- getNewRegNat rep
1268 return (tmp, code `snocOL` reg2reg rep reg tmp)
1269 -- ToDo: could optimise slightly by checking for byte-addressable
1270 -- real registers, but that will happen very rarely if at all.
1273 -- Another variant: this time we want the result in a register that cannot
1274 -- be modified by code to evaluate an arbitrary expression.
1275 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1276 getNonClobberedReg expr = do
1277 r <- getRegister expr
1280 tmp <- getNewRegNat rep
1281 return (tmp, code tmp)
1283 -- only free regs can be clobbered
1284 | RealReg rr <- reg, isFastTrue (freeReg rr) -> do
1285 tmp <- getNewRegNat rep
1286 return (tmp, code `snocOL` reg2reg rep reg tmp)
1290 reg2reg :: MachRep -> Reg -> Reg -> Instr
1292 #if i386_TARGET_ARCH
1293 | isFloatingRep rep = GMOV src dst
1295 | otherwise = MOV rep (OpReg src) (OpReg dst)
1297 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1299 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1301 #if sparc_TARGET_ARCH
1303 getRegister (StFloat d)
1304 = getBlockIdNat `thenNat` \ lbl ->
1305 getNewRegNat PtrRep `thenNat` \ tmp ->
1306 let code dst = toOL [
1307 SEGMENT DataSegment,
1309 DATA F [ImmFloat d],
1310 SEGMENT TextSegment,
1311 SETHI (HI (ImmCLbl lbl)) tmp,
1312 LD F (AddrRegImm tmp (LO (ImmCLbl lbl))) dst]
1314 return (Any F32 code)
1316 getRegister (StDouble d)
1317 = getBlockIdNat `thenNat` \ lbl ->
1318 getNewRegNat PtrRep `thenNat` \ tmp ->
1319 let code dst = toOL [
1320 SEGMENT DataSegment,
1322 DATA DF [ImmDouble d],
1323 SEGMENT TextSegment,
1324 SETHI (HI (ImmCLbl lbl)) tmp,
1325 LD DF (AddrRegImm tmp (LO (ImmCLbl lbl))) dst]
1327 return (Any F64 code)
1330 getRegister (CmmMachOp mop [x]) -- unary PrimOps
1332 MO_NatS_Neg -> trivialUCode (SUB False False g0) x
1333 MO_Nat_Not -> trivialUCode (XNOR False g0) x
1334 MO_32U_to_8U -> trivialCode (AND False) x (StInt 255)
1336 MO_F32_Neg -> trivialUFCode F32 (FNEG F) x
1337 MO_F64_Neg -> trivialUFCode F64 (FNEG DF) x
1339 MO_F64_to_Flt -> coerceDbl2Flt x
1340 MO_F32_to_Dbl -> coerceFlt2Dbl x
1342 MO_F32_to_NatS -> coerceFP2Int F32 x
1343 MO_NatS_to_Flt -> coerceInt2FP F32 x
1344 MO_F64_to_NatS -> coerceFP2Int F64 x
1345 MO_NatS_to_Dbl -> coerceInt2FP F64 x
1347 -- Conversions which are a nop on sparc
1348 MO_32U_to_NatS -> conversionNop IntRep x
1349 MO_32S_to_NatS -> conversionNop IntRep x
1350 MO_NatS_to_32U -> conversionNop WordRep x
1351 MO_32U_to_NatU -> conversionNop WordRep x
1353 MO_NatU_to_NatS -> conversionNop IntRep x
1354 MO_NatS_to_NatU -> conversionNop WordRep x
1355 MO_NatP_to_NatU -> conversionNop WordRep x
1356 MO_NatU_to_NatP -> conversionNop PtrRep x
1357 MO_NatS_to_NatP -> conversionNop PtrRep x
1358 MO_NatP_to_NatS -> conversionNop IntRep x
1360 -- sign-extending widenings
1361 MO_8U_to_32U -> integerExtend False 24 x
1362 MO_8U_to_NatU -> integerExtend False 24 x
1363 MO_8S_to_NatS -> integerExtend True 24 x
1364 MO_16U_to_NatU -> integerExtend False 16 x
1365 MO_16S_to_NatS -> integerExtend True 16 x
1368 let fixed_x = if is_float_op -- promote to double
1369 then CmmMachOp MO_F32_to_Dbl [x]
1372 getRegister (StCall (Left fn) CCallConv F64 [fixed_x])
1374 integerExtend signed nBits x
1376 CmmMachOp (if signed then MO_Nat_Sar else MO_Nat_Shr)
1377 [CmmMachOp MO_Nat_Shl [x, StInt nBits], StInt nBits]
1379 conversionNop new_rep expr
1380 = getRegister expr `thenNat` \ e_code ->
1381 return (swizzleRegisterRep e_code new_rep)
1385 MO_F32_Exp -> (True, FSLIT("exp"))
1386 MO_F32_Log -> (True, FSLIT("log"))
1387 MO_F32_Sqrt -> (True, FSLIT("sqrt"))
1389 MO_F32_Sin -> (True, FSLIT("sin"))
1390 MO_F32_Cos -> (True, FSLIT("cos"))
1391 MO_F32_Tan -> (True, FSLIT("tan"))
1393 MO_F32_Asin -> (True, FSLIT("asin"))
1394 MO_F32_Acos -> (True, FSLIT("acos"))
1395 MO_F32_Atan -> (True, FSLIT("atan"))
1397 MO_F32_Sinh -> (True, FSLIT("sinh"))
1398 MO_F32_Cosh -> (True, FSLIT("cosh"))
1399 MO_F32_Tanh -> (True, FSLIT("tanh"))
1401 MO_F64_Exp -> (False, FSLIT("exp"))
1402 MO_F64_Log -> (False, FSLIT("log"))
1403 MO_F64_Sqrt -> (False, FSLIT("sqrt"))
1405 MO_F64_Sin -> (False, FSLIT("sin"))
1406 MO_F64_Cos -> (False, FSLIT("cos"))
1407 MO_F64_Tan -> (False, FSLIT("tan"))
1409 MO_F64_Asin -> (False, FSLIT("asin"))
1410 MO_F64_Acos -> (False, FSLIT("acos"))
1411 MO_F64_Atan -> (False, FSLIT("atan"))
1413 MO_F64_Sinh -> (False, FSLIT("sinh"))
1414 MO_F64_Cosh -> (False, FSLIT("cosh"))
1415 MO_F64_Tanh -> (False, FSLIT("tanh"))
1417 other -> pprPanic "getRegister(sparc) - binary CmmMachOp (2)"
1421 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1423 MO_32U_Gt -> condIntReg GTT x y
1424 MO_32U_Ge -> condIntReg GE x y
1425 MO_32U_Eq -> condIntReg EQQ x y
1426 MO_32U_Ne -> condIntReg NE x y
1427 MO_32U_Lt -> condIntReg LTT x y
1428 MO_32U_Le -> condIntReg LE x y
1430 MO_Nat_Eq -> condIntReg EQQ x y
1431 MO_Nat_Ne -> condIntReg NE x y
1433 MO_NatS_Gt -> condIntReg GTT x y
1434 MO_NatS_Ge -> condIntReg GE x y
1435 MO_NatS_Lt -> condIntReg LTT x y
1436 MO_NatS_Le -> condIntReg LE x y
1438 MO_NatU_Gt -> condIntReg GU x y
1439 MO_NatU_Ge -> condIntReg GEU x y
1440 MO_NatU_Lt -> condIntReg LU x y
1441 MO_NatU_Le -> condIntReg LEU x y
1443 MO_F32_Gt -> condFltReg GTT x y
1444 MO_F32_Ge -> condFltReg GE x y
1445 MO_F32_Eq -> condFltReg EQQ x y
1446 MO_F32_Ne -> condFltReg NE x y
1447 MO_F32_Lt -> condFltReg LTT x y
1448 MO_F32_Le -> condFltReg LE x y
1450 MO_F64_Gt -> condFltReg GTT x y
1451 MO_F64_Ge -> condFltReg GE x y
1452 MO_F64_Eq -> condFltReg EQQ x y
1453 MO_F64_Ne -> condFltReg NE x y
1454 MO_F64_Lt -> condFltReg LTT x y
1455 MO_F64_Le -> condFltReg LE x y
1457 MO_Nat_Add -> trivialCode (ADD False False) x y
1458 MO_Nat_Sub -> trivialCode (SUB False False) x y
1460 MO_NatS_Mul -> trivialCode (SMUL False) x y
1461 MO_NatU_Mul -> trivialCode (UMUL False) x y
1462 MO_NatS_MulMayOflo -> imulMayOflo x y
1464 -- ToDo: teach about V8+ SPARC div instructions
1465 MO_NatS_Quot -> idiv FSLIT(".div") x y
1466 MO_NatS_Rem -> idiv FSLIT(".rem") x y
1467 MO_NatU_Quot -> idiv FSLIT(".udiv") x y
1468 MO_NatU_Rem -> idiv FSLIT(".urem") x y
1470 MO_F32_Add -> trivialFCode F32 FADD x y
1471 MO_F32_Sub -> trivialFCode F32 FSUB x y
1472 MO_F32_Mul -> trivialFCode F32 FMUL x y
1473 MO_F32_Div -> trivialFCode F32 FDIV x y
1475 MO_F64_Add -> trivialFCode F64 FADD x y
1476 MO_F64_Sub -> trivialFCode F64 FSUB x y
1477 MO_F64_Mul -> trivialFCode F64 FMUL x y
1478 MO_F64_Div -> trivialFCode F64 FDIV x y
1480 MO_Nat_And -> trivialCode (AND False) x y
1481 MO_Nat_Or -> trivialCode (OR False) x y
1482 MO_Nat_Xor -> trivialCode (XOR False) x y
1484 MO_Nat_Shl -> trivialCode SLL x y
1485 MO_Nat_Shr -> trivialCode SRL x y
1486 MO_Nat_Sar -> trivialCode SRA x y
1488 MO_F32_Pwr -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1489 [promote x, promote y])
1490 where promote x = CmmMachOp MO_F32_to_Dbl [x]
1491 MO_F64_Pwr -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1494 other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
1496 idiv fn x y = getRegister (StCall (Left fn) CCallConv IntRep [x, y])
1498 --------------------
1499 imulMayOflo :: CmmExpr -> CmmExpr -> NatM Register
1501 = getNewRegNat IntRep `thenNat` \ t1 ->
1502 getNewRegNat IntRep `thenNat` \ t2 ->
1503 getNewRegNat IntRep `thenNat` \ res_lo ->
1504 getNewRegNat IntRep `thenNat` \ res_hi ->
1505 getRegister a1 `thenNat` \ reg1 ->
1506 getRegister a2 `thenNat` \ reg2 ->
1507 let code1 = registerCode reg1 t1
1508 code2 = registerCode reg2 t2
1509 src1 = registerName reg1 t1
1510 src2 = registerName reg2 t2
1511 code dst = code1 `appOL` code2 `appOL`
1513 SMUL False src1 (RIReg src2) res_lo,
1515 SRA res_lo (RIImm (ImmInt 31)) res_lo,
1516 SUB False False res_lo (RIReg res_hi) dst
1519 return (Any IntRep code)
1521 getRegister (CmmLoad pk mem) = do
1522 Amode src code <- getAmode mem
1524 size = primRepToSize pk
1525 code__2 dst = code `snocOL` LD size src dst
1527 return (Any pk code__2)
1529 getRegister (StInt i)
1532 src = ImmInt (fromInteger i)
1533 code dst = unitOL (OR False g0 (RIImm src) dst)
1535 return (Any IntRep code)
1541 SETHI (HI imm__2) dst,
1542 OR False dst (RIImm (LO imm__2)) dst]
1544 return (Any PtrRep code)
1546 = ncgPrimopMoan "getRegister(sparc)" (pprCmmExpr leaf)
1549 imm__2 = case imm of Just x -> x
1551 #endif /* sparc_TARGET_ARCH */
1553 #if powerpc_TARGET_ARCH
1554 getRegister (CmmLoad mem pk)
1557 Amode addr addr_code <- getAmode mem
1558 let code dst = ASSERT((regClass dst == RcDouble) == isFloatingRep pk)
1559 addr_code `snocOL` LD pk dst addr
1560 return (Any pk code)
1562 -- catch simple cases of zero- or sign-extended load
1563 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad mem _]) = do
1564 Amode addr addr_code <- getAmode mem
1565 return (Any I32 (\dst -> addr_code `snocOL` LD I8 dst addr))
1567 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
1569 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad mem _]) = do
1570 Amode addr addr_code <- getAmode mem
1571 return (Any I32 (\dst -> addr_code `snocOL` LD I16 dst addr))
1573 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad mem _]) = do
1574 Amode addr addr_code <- getAmode mem
1575 return (Any I32 (\dst -> addr_code `snocOL` LA I16 dst addr))
1577 getRegister (CmmMachOp mop [x]) -- unary MachOps
1579 MO_Not rep -> trivialUCode rep NOT x
1581 MO_S_Conv F64 F32 -> trivialUCode F32 FRSP x
1582 MO_S_Conv F32 F64 -> conversionNop F64 x
1585 | from == to -> conversionNop to x
1586 | isFloatingRep from -> coerceFP2Int from to x
1587 | isFloatingRep to -> coerceInt2FP from to x
1589 -- narrowing is a nop: we treat the high bits as undefined
1590 MO_S_Conv I32 to -> conversionNop to x
1591 MO_S_Conv I16 I8 -> conversionNop I8 x
1592 MO_S_Conv I8 to -> trivialUCode to (EXTS I8) x
1593 MO_S_Conv I16 to -> trivialUCode to (EXTS I16) x
1596 | from == to -> conversionNop to x
1597 -- narrowing is a nop: we treat the high bits as undefined
1598 MO_U_Conv I32 to -> conversionNop to x
1599 MO_U_Conv I16 I8 -> conversionNop I8 x
1600 MO_U_Conv I8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 I32))
1601 MO_U_Conv I16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 I32))
1603 MO_S_Neg F32 -> trivialUCode F32 FNEG x
1604 MO_S_Neg F64 -> trivialUCode F64 FNEG x
1605 MO_S_Neg rep -> trivialUCode rep NEG x
1608 conversionNop new_rep expr
1609 = do e_code <- getRegister expr
1610 return (swizzleRegisterRep e_code new_rep)
1612 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1614 MO_Eq F32 -> condFltReg EQQ x y
1615 MO_Ne F32 -> condFltReg NE x y
1617 MO_S_Gt F32 -> condFltReg GTT x y
1618 MO_S_Ge F32 -> condFltReg GE x y
1619 MO_S_Lt F32 -> condFltReg LTT x y
1620 MO_S_Le F32 -> condFltReg LE x y
1622 MO_Eq F64 -> condFltReg EQQ x y
1623 MO_Ne F64 -> condFltReg NE x y
1625 MO_S_Gt F64 -> condFltReg GTT x y
1626 MO_S_Ge F64 -> condFltReg GE x y
1627 MO_S_Lt F64 -> condFltReg LTT x y
1628 MO_S_Le F64 -> condFltReg LE x y
1630 MO_Eq rep -> condIntReg EQQ (extendUExpr rep x) (extendUExpr rep y)
1631 MO_Ne rep -> condIntReg NE (extendUExpr rep x) (extendUExpr rep y)
1633 MO_S_Gt rep -> condIntReg GTT (extendSExpr rep x) (extendSExpr rep y)
1634 MO_S_Ge rep -> condIntReg GE (extendSExpr rep x) (extendSExpr rep y)
1635 MO_S_Lt rep -> condIntReg LTT (extendSExpr rep x) (extendSExpr rep y)
1636 MO_S_Le rep -> condIntReg LE (extendSExpr rep x) (extendSExpr rep y)
1638 MO_U_Gt rep -> condIntReg GU (extendUExpr rep x) (extendUExpr rep y)
1639 MO_U_Ge rep -> condIntReg GEU (extendUExpr rep x) (extendUExpr rep y)
1640 MO_U_Lt rep -> condIntReg LU (extendUExpr rep x) (extendUExpr rep y)
1641 MO_U_Le rep -> condIntReg LEU (extendUExpr rep x) (extendUExpr rep y)
1643 MO_Add F32 -> trivialCodeNoImm F32 (FADD F32) x y
1644 MO_Sub F32 -> trivialCodeNoImm F32 (FSUB F32) x y
1645 MO_Mul F32 -> trivialCodeNoImm F32 (FMUL F32) x y
1646 MO_S_Quot F32 -> trivialCodeNoImm F32 (FDIV F32) x y
1648 MO_Add F64 -> trivialCodeNoImm F64 (FADD F64) x y
1649 MO_Sub F64 -> trivialCodeNoImm F64 (FSUB F64) x y
1650 MO_Mul F64 -> trivialCodeNoImm F64 (FMUL F64) x y
1651 MO_S_Quot F64 -> trivialCodeNoImm F64 (FDIV F64) x y
1653 -- optimize addition with 32-bit immediate
1657 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate I32 True (-imm)
1658 -> trivialCode I32 True ADD x (CmmLit $ CmmInt imm immrep)
1661 (src, srcCode) <- getSomeReg x
1662 let imm = litToImm lit
1663 code dst = srcCode `appOL` toOL [
1664 ADDIS dst src (HA imm),
1665 ADD dst dst (RIImm (LO imm))
1667 return (Any I32 code)
1668 _ -> trivialCode I32 True ADD x y
1670 MO_Add rep -> trivialCode rep True ADD x y
1672 case y of -- subfi ('substract from' with immediate) doesn't exist
1673 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
1674 -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
1675 _ -> trivialCodeNoImm rep SUBF y x
1677 MO_Mul rep -> trivialCode rep True MULLW x y
1679 MO_S_MulMayOflo I32 -> trivialCodeNoImm I32 MULLW_MayOflo x y
1681 MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= I32): not implemented"
1682 MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
1684 MO_S_Quot rep -> trivialCodeNoImm rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1685 MO_U_Quot rep -> trivialCodeNoImm rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1687 MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1688 MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1690 MO_And rep -> trivialCode rep False AND x y
1691 MO_Or rep -> trivialCode rep False OR x y
1692 MO_Xor rep -> trivialCode rep False XOR x y
1694 MO_Shl rep -> trivialCode rep False SLW x y
1695 MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
1696 MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
1698 getRegister (CmmLit (CmmInt i rep))
1699 | Just imm <- makeImmediate rep True i
1701 code dst = unitOL (LI dst imm)
1703 return (Any rep code)
1705 getRegister (CmmLit (CmmFloat f frep)) = do
1706 lbl <- getNewLabelNat
1707 dynRef <- cmmMakeDynamicReference addImportNat False lbl
1708 Amode addr addr_code <- getAmode dynRef
1710 LDATA ReadOnlyData [CmmDataLabel lbl,
1711 CmmStaticLit (CmmFloat f frep)]
1712 `consOL` (addr_code `snocOL` LD frep dst addr)
1713 return (Any frep code)
1715 getRegister (CmmLit lit)
1716 = let rep = cmmLitRep lit
1720 OR dst dst (RIImm (LO imm))
1722 in return (Any rep code)
1724 getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
1726 -- extend?Rep: wrap integer expression of type rep
1727 -- in a conversion to I32
1728 extendSExpr I32 x = x
1729 extendSExpr rep x = CmmMachOp (MO_S_Conv rep I32) [x]
1730 extendUExpr I32 x = x
1731 extendUExpr rep x = CmmMachOp (MO_U_Conv rep I32) [x]
1733 #endif /* powerpc_TARGET_ARCH */
1736 -- -----------------------------------------------------------------------------
1737 -- The 'Amode' type: Memory addressing modes passed up the tree.
1739 data Amode = Amode AddrMode InstrBlock
1742 Now, given a tree (the argument to an CmmLoad) that references memory,
1743 produce a suitable addressing mode.
1745 A Rule of the Game (tm) for Amodes: use of the addr bit must
1746 immediately follow use of the code part, since the code part puts
1747 values in registers which the addr then refers to. So you can't put
1748 anything in between, lest it overwrite some of those registers. If
1749 you need to do some other computation between the code part and use of
1750 the addr bit, first store the effective address from the amode in a
1751 temporary, then do the other computation, and then use the temporary:
1755 ... other computation ...
1759 getAmode :: CmmExpr -> NatM Amode
1760 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
1762 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1764 #if alpha_TARGET_ARCH
1766 getAmode (StPrim IntSubOp [x, StInt i])
1767 = getNewRegNat PtrRep `thenNat` \ tmp ->
1768 getRegister x `thenNat` \ register ->
1770 code = registerCode register tmp
1771 reg = registerName register tmp
1772 off = ImmInt (-(fromInteger i))
1774 return (Amode (AddrRegImm reg off) code)
1776 getAmode (StPrim IntAddOp [x, StInt i])
1777 = getNewRegNat PtrRep `thenNat` \ tmp ->
1778 getRegister x `thenNat` \ register ->
1780 code = registerCode register tmp
1781 reg = registerName register tmp
1782 off = ImmInt (fromInteger i)
1784 return (Amode (AddrRegImm reg off) code)
1788 = return (Amode (AddrImm imm__2) id)
1791 imm__2 = case imm of Just x -> x
1794 = getNewRegNat PtrRep `thenNat` \ tmp ->
1795 getRegister other `thenNat` \ register ->
1797 code = registerCode register tmp
1798 reg = registerName register tmp
1800 return (Amode (AddrReg reg) code)
1802 #endif /* alpha_TARGET_ARCH */
1804 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1806 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1808 -- This is all just ridiculous, since it carefully undoes
1809 -- what mangleIndexTree has just done.
1810 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
1811 | not (is64BitLit lit)
1812 -- ASSERT(rep == I32)???
1813 = do (x_reg, x_code) <- getSomeReg x
1814 let off = ImmInt (-(fromInteger i))
1815 return (Amode (AddrBaseIndex (Just x_reg) Nothing off) x_code)
1817 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
1818 | not (is64BitLit lit)
1819 -- ASSERT(rep == I32)???
1820 = do (x_reg, x_code) <- getSomeReg x
1821 let off = ImmInt (fromInteger i)
1822 return (Amode (AddrBaseIndex (Just x_reg) Nothing off) x_code)
1824 -- Turn (lit1 << n + lit2) into (lit2 + lit1 << n) so it will be
1825 -- recognised by the next rule.
1826 getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1828 = getAmode (CmmMachOp (MO_Add rep) [b,a])
1830 getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
1831 [y, CmmLit (CmmInt shift _)]])
1832 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1833 = do (x_reg, x_code) <- getNonClobberedReg x
1834 -- x must be in a temp, because it has to stay live over y_code
1835 -- we could compre x_reg and y_reg and do something better here...
1836 (y_reg, y_code) <- getSomeReg y
1838 code = x_code `appOL` y_code
1839 base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
1840 return (Amode (AddrBaseIndex (Just x_reg) (Just (y_reg,base)) (ImmInt 0))
1843 getAmode (CmmLit lit) | not (is64BitLit lit)
1844 = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1847 (reg,code) <- getSomeReg expr
1848 return (Amode (AddrBaseIndex (Just reg) Nothing (ImmInt 0)) code)
1850 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1852 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1854 #if sparc_TARGET_ARCH
1856 getAmode (CmmMachOp MO_Nat_Sub [x, StInt i])
1858 = getNewRegNat PtrRep `thenNat` \ tmp ->
1859 getRegister x `thenNat` \ register ->
1861 code = registerCode register tmp
1862 reg = registerName register tmp
1863 off = ImmInt (-(fromInteger i))
1865 return (Amode (AddrRegImm reg off) code)
1868 getAmode (CmmMachOp MO_Nat_Add [x, StInt i])
1870 = getNewRegNat PtrRep `thenNat` \ tmp ->
1871 getRegister x `thenNat` \ register ->
1873 code = registerCode register tmp
1874 reg = registerName register tmp
1875 off = ImmInt (fromInteger i)
1877 return (Amode (AddrRegImm reg off) code)
1879 getAmode (CmmMachOp MO_Nat_Add [x, y])
1880 = getNewRegNat PtrRep `thenNat` \ tmp1 ->
1881 getNewRegNat IntRep `thenNat` \ tmp2 ->
1882 getRegister x `thenNat` \ register1 ->
1883 getRegister y `thenNat` \ register2 ->
1885 code1 = registerCode register1 tmp1
1886 reg1 = registerName register1 tmp1
1887 code2 = registerCode register2 tmp2
1888 reg2 = registerName register2 tmp2
1889 code__2 = code1 `appOL` code2
1891 return (Amode (AddrRegReg reg1 reg2) code__2)
1895 = getNewRegNat PtrRep `thenNat` \ tmp ->
1897 code = unitOL (SETHI (HI imm__2) tmp)
1899 return (Amode (AddrRegImm tmp (LO imm__2)) code)
1902 imm__2 = case imm of Just x -> x
1905 = getNewRegNat PtrRep `thenNat` \ tmp ->
1906 getRegister other `thenNat` \ register ->
1908 code = registerCode register tmp
1909 reg = registerName register tmp
1912 return (Amode (AddrRegImm reg off) code)
1914 #endif /* sparc_TARGET_ARCH */
1916 #ifdef powerpc_TARGET_ARCH
1917 getAmode (CmmMachOp (MO_Sub I32) [x, CmmLit (CmmInt i _)])
1918 | Just off <- makeImmediate I32 True (-i)
1920 (reg, code) <- getSomeReg x
1921 return (Amode (AddrRegImm reg off) code)
1924 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit (CmmInt i _)])
1925 | Just off <- makeImmediate I32 True i
1927 (reg, code) <- getSomeReg x
1928 return (Amode (AddrRegImm reg off) code)
1930 -- optimize addition with 32-bit immediate
1932 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit lit])
1934 tmp <- getNewRegNat I32
1935 (src, srcCode) <- getSomeReg x
1936 let imm = litToImm lit
1937 code = srcCode `snocOL` ADDIS tmp src (HA imm)
1938 return (Amode (AddrRegImm tmp (LO imm)) code)
1940 getAmode (CmmLit lit)
1942 tmp <- getNewRegNat I32
1943 let imm = litToImm lit
1944 code = unitOL (LIS tmp (HA imm))
1945 return (Amode (AddrRegImm tmp (LO imm)) code)
1947 getAmode (CmmMachOp (MO_Add I32) [x, y])
1949 (regX, codeX) <- getSomeReg x
1950 (regY, codeY) <- getSomeReg y
1951 return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
1955 (reg, code) <- getSomeReg other
1958 return (Amode (AddrRegImm reg off) code)
1959 #endif /* powerpc_TARGET_ARCH */
1961 -- -----------------------------------------------------------------------------
1962 -- getOperand: sometimes any operand will do.
1964 -- getNonClobberedOperand: the value of the operand will remain valid across
1965 -- the computation of an arbitrary expression, unless the expression
1966 -- is computed directly into a register which the operand refers to
1967 -- (see trivialCode where this function is used for an example).
1969 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1971 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1972 getNonClobberedOperand (CmmLit lit)
1973 | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) =
1974 return (OpImm (litToImm lit), nilOL)
1975 getNonClobberedOperand (CmmLoad mem pk)
1976 | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
1977 Amode src mem_code <- getAmode mem
1979 if (amodeCouldBeClobbered src)
1981 tmp <- getNewRegNat wordRep
1982 return (AddrBaseIndex (Just tmp) Nothing (ImmInt 0),
1983 unitOL (LEA I32 (OpAddr src) (OpReg tmp)))
1986 return (OpAddr src', save_code `appOL` mem_code)
1987 getNonClobberedOperand e = do
1988 (reg, code) <- getNonClobberedReg e
1989 return (OpReg reg, code)
1991 amodeCouldBeClobbered :: AddrMode -> Bool
1992 amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
1994 regClobbered (RealReg rr) = isFastTrue (freeReg rr)
1995 regClobbered _ = False
1997 -- getOperand: the operand is not required to remain valid across the
1998 -- computation of an arbitrary expression.
1999 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2000 getOperand (CmmLit lit)
2001 | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) =
2002 return (OpImm (litToImm lit), nilOL)
2003 getOperand (CmmLoad mem pk)
2004 | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2005 Amode src mem_code <- getAmode mem
2006 return (OpAddr src, mem_code)
2008 (reg, code) <- getNonClobberedReg e
2009 return (OpReg reg, code)
2011 isOperand :: CmmExpr -> Bool
2012 isOperand (CmmLoad _ _) = True
2013 isOperand (CmmLit lit) = not (is64BitLit lit) &&
2014 not (isFloatingRep (cmmLitRep lit))
2017 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
2018 getRegOrMem (CmmLoad mem pk)
2019 | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2020 Amode src mem_code <- getAmode mem
2021 return (OpAddr src, mem_code)
2023 (reg, code) <- getNonClobberedReg e
2024 return (OpReg reg, code)
2026 #if x86_64_TARGET_ARCH
2027 is64BitLit (CmmInt i I64) = i > 0x7fffffff || i < -0x80000000
2029 is64BitLit x = False
2032 -- -----------------------------------------------------------------------------
2033 -- The 'CondCode' type: Condition codes passed up the tree.
2035 data CondCode = CondCode Bool Cond InstrBlock
2037 -- Set up a condition code for a conditional branch.
2039 getCondCode :: CmmExpr -> NatM CondCode
2041 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2043 #if alpha_TARGET_ARCH
2044 getCondCode = panic "MachCode.getCondCode: not on Alphas"
2045 #endif /* alpha_TARGET_ARCH */
2047 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2049 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
2050 -- yes, they really do seem to want exactly the same!
2052 getCondCode (CmmMachOp mop [x, y])
2053 = ASSERT (cmmExprRep x /= I8) -- tmp, not set up to handle 8-bit comparisons
2055 MO_Eq F32 -> condFltCode EQQ x y
2056 MO_Ne F32 -> condFltCode NE x y
2058 MO_S_Gt F32 -> condFltCode GTT x y
2059 MO_S_Ge F32 -> condFltCode GE x y
2060 MO_S_Lt F32 -> condFltCode LTT x y
2061 MO_S_Le F32 -> condFltCode LE x y
2063 MO_Eq F64 -> condFltCode EQQ x y
2064 MO_Ne F64 -> condFltCode NE x y
2066 MO_S_Gt F64 -> condFltCode GTT x y
2067 MO_S_Ge F64 -> condFltCode GE x y
2068 MO_S_Lt F64 -> condFltCode LTT x y
2069 MO_S_Le F64 -> condFltCode LE x y
2071 MO_Eq rep -> condIntCode EQQ x y
2072 MO_Ne rep -> condIntCode NE x y
2074 MO_S_Gt rep -> condIntCode GTT x y
2075 MO_S_Ge rep -> condIntCode GE x y
2076 MO_S_Lt rep -> condIntCode LTT x y
2077 MO_S_Le rep -> condIntCode LE x y
2079 MO_U_Gt rep -> condIntCode GU x y
2080 MO_U_Ge rep -> condIntCode GEU x y
2081 MO_U_Lt rep -> condIntCode LU x y
2082 MO_U_Le rep -> condIntCode LEU x y
2084 other -> pprPanic "getCondCode(x86,sparc)" (pprMachOp mop)
2086 getCondCode other = pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
2088 #elif powerpc_TARGET_ARCH
2090 -- almost the same as everywhere else - but we need to
2091 -- extend small integers to 32 bit first
2093 getCondCode (CmmMachOp mop [x, y])
2095 MO_Eq F32 -> condFltCode EQQ x y
2096 MO_Ne F32 -> condFltCode NE x y
2098 MO_S_Gt F32 -> condFltCode GTT x y
2099 MO_S_Ge F32 -> condFltCode GE x y
2100 MO_S_Lt F32 -> condFltCode LTT x y
2101 MO_S_Le F32 -> condFltCode LE x y
2103 MO_Eq F64 -> condFltCode EQQ x y
2104 MO_Ne F64 -> condFltCode NE x y
2106 MO_S_Gt F64 -> condFltCode GTT x y
2107 MO_S_Ge F64 -> condFltCode GE x y
2108 MO_S_Lt F64 -> condFltCode LTT x y
2109 MO_S_Le F64 -> condFltCode LE x y
2111 MO_Eq rep -> condIntCode EQQ (extendUExpr rep x) (extendUExpr rep y)
2112 MO_Ne rep -> condIntCode NE (extendUExpr rep x) (extendUExpr rep y)
2114 MO_S_Gt rep -> condIntCode GTT (extendSExpr rep x) (extendSExpr rep y)
2115 MO_S_Ge rep -> condIntCode GE (extendSExpr rep x) (extendSExpr rep y)
2116 MO_S_Lt rep -> condIntCode LTT (extendSExpr rep x) (extendSExpr rep y)
2117 MO_S_Le rep -> condIntCode LE (extendSExpr rep x) (extendSExpr rep y)
2119 MO_U_Gt rep -> condIntCode GU (extendUExpr rep x) (extendUExpr rep y)
2120 MO_U_Ge rep -> condIntCode GEU (extendUExpr rep x) (extendUExpr rep y)
2121 MO_U_Lt rep -> condIntCode LU (extendUExpr rep x) (extendUExpr rep y)
2122 MO_U_Le rep -> condIntCode LEU (extendUExpr rep x) (extendUExpr rep y)
2124 other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
2126 getCondCode other = panic "getCondCode(2)(powerpc)"
2132 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
2133 -- passed back up the tree.
2135 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
2137 #if alpha_TARGET_ARCH
2138 condIntCode = panic "MachCode.condIntCode: not on Alphas"
2139 condFltCode = panic "MachCode.condFltCode: not on Alphas"
2140 #endif /* alpha_TARGET_ARCH */
2142 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2143 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2145 -- memory vs immediate
2146 condIntCode cond (CmmLoad x pk) (CmmLit lit) | not (is64BitLit lit) = do
2147 Amode x_addr x_code <- getAmode x
2150 code = x_code `snocOL`
2151 CMP pk (OpImm imm) (OpAddr x_addr)
2153 return (CondCode False cond code)
2156 condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
2157 (x_reg, x_code) <- getSomeReg x
2159 code = x_code `snocOL`
2160 TEST pk (OpReg x_reg) (OpReg x_reg)
2162 return (CondCode False cond code)
2164 -- anything vs operand
2165 condIntCode cond x y | isOperand y = do
2166 (x_reg, x_code) <- getNonClobberedReg x
2167 (y_op, y_code) <- getOperand y
2169 code = x_code `appOL` y_code `snocOL`
2170 CMP (cmmExprRep x) y_op (OpReg x_reg)
2172 return (CondCode False cond code)
2174 -- anything vs anything
2175 condIntCode cond x y = do
2176 (y_reg, y_code) <- getNonClobberedReg y
2177 (x_op, x_code) <- getRegOrMem x
2179 code = y_code `appOL`
2181 CMP (cmmExprRep x) (OpReg y_reg) x_op
2183 return (CondCode False cond code)
2186 #if i386_TARGET_ARCH
2187 condFltCode cond x y
2188 = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
2189 (x_reg, x_code) <- getNonClobberedReg x
2190 (y_reg, y_code) <- getSomeReg y
2192 code = x_code `appOL` y_code `snocOL`
2193 GCMP cond x_reg y_reg
2194 -- The GCMP insn does the test and sets the zero flag if comparable
2195 -- and true. Hence we always supply EQQ as the condition to test.
2196 return (CondCode True EQQ code)
2197 #endif /* i386_TARGET_ARCH */
2199 #if x86_64_TARGET_ARCH
2200 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
2201 -- an operand, but the right must be a reg. We can probably do better
2202 -- than this general case...
2203 condFltCode cond x y = do
2204 (x_reg, x_code) <- getNonClobberedReg x
2205 (y_op, y_code) <- getOperand y
2207 code = x_code `appOL`
2209 CMP (cmmExprRep x) y_op (OpReg x_reg)
2211 return (CondCode False (condToUnsigned cond) code)
2212 -- we need to use the unsigned comparison operators on the
2213 -- result of this comparison.
2216 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2218 #if sparc_TARGET_ARCH
2220 condIntCode cond x (StInt y)
2222 = getRegister x `thenNat` \ register ->
2223 getNewRegNat IntRep `thenNat` \ tmp ->
2225 code = registerCode register tmp
2226 src1 = registerName register tmp
2227 src2 = ImmInt (fromInteger y)
2228 code__2 = code `snocOL` SUB False True src1 (RIImm src2) g0
2230 return (CondCode False cond code__2)
2232 condIntCode cond x y
2233 = getRegister x `thenNat` \ register1 ->
2234 getRegister y `thenNat` \ register2 ->
2235 getNewRegNat IntRep `thenNat` \ tmp1 ->
2236 getNewRegNat IntRep `thenNat` \ tmp2 ->
2238 code1 = registerCode register1 tmp1
2239 src1 = registerName register1 tmp1
2240 code2 = registerCode register2 tmp2
2241 src2 = registerName register2 tmp2
2242 code__2 = code1 `appOL` code2 `snocOL`
2243 SUB False True src1 (RIReg src2) g0
2245 return (CondCode False cond code__2)
2248 condFltCode cond x y
2249 = getRegister x `thenNat` \ register1 ->
2250 getRegister y `thenNat` \ register2 ->
2251 getNewRegNat (registerRep register1)
2253 getNewRegNat (registerRep register2)
2255 getNewRegNat F64 `thenNat` \ tmp ->
2257 promote x = FxTOy F DF x tmp
2259 pk1 = registerRep register1
2260 code1 = registerCode register1 tmp1
2261 src1 = registerName register1 tmp1
2263 pk2 = registerRep register2
2264 code2 = registerCode register2 tmp2
2265 src2 = registerName register2 tmp2
2269 code1 `appOL` code2 `snocOL`
2270 FCMP True (primRepToSize pk1) src1 src2
2271 else if pk1 == F32 then
2272 code1 `snocOL` promote src1 `appOL` code2 `snocOL`
2273 FCMP True DF tmp src2
2275 code1 `appOL` code2 `snocOL` promote src2 `snocOL`
2276 FCMP True DF src1 tmp
2278 return (CondCode True cond code__2)
2280 #endif /* sparc_TARGET_ARCH */
2282 #if powerpc_TARGET_ARCH
2283 -- ###FIXME: I16 and I8!
2284 condIntCode cond x (CmmLit (CmmInt y rep))
2285 | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
2287 (src1, code) <- getSomeReg x
2289 code' = code `snocOL`
2290 (if condUnsigned cond then CMPL else CMP) I32 src1 (RIImm src2)
2291 return (CondCode False cond code')
2293 condIntCode cond x y = do
2294 (src1, code1) <- getSomeReg x
2295 (src2, code2) <- getSomeReg y
2297 code' = code1 `appOL` code2 `snocOL`
2298 (if condUnsigned cond then CMPL else CMP) I32 src1 (RIReg src2)
2299 return (CondCode False cond code')
2301 condFltCode cond x y = do
2302 (src1, code1) <- getSomeReg x
2303 (src2, code2) <- getSomeReg y
2305 code' = code1 `appOL` code2 `snocOL` FCMP src1 src2
2306 code'' = case cond of -- twiddle CR to handle unordered case
2307 GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
2308 LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
2311 ltbit = 0 ; eqbit = 2 ; gtbit = 1
2312 return (CondCode True cond code'')
2314 #endif /* powerpc_TARGET_ARCH */
2316 -- -----------------------------------------------------------------------------
2317 -- Generating assignments
2319 -- Assignments are really at the heart of the whole code generation
2320 -- business. Almost all top-level nodes of any real importance are
2321 -- assignments, which correspond to loads, stores, or register
2322 -- transfers. If we're really lucky, some of the register transfers
2323 -- will go away, because we can use the destination register to
2324 -- complete the code generation for the right hand side. This only
2325 -- fails when the right hand side is forced into a fixed register
2326 -- (e.g. the result of a call).
2328 assignMem_IntCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2329 assignReg_IntCode :: MachRep -> CmmReg -> CmmExpr -> NatM InstrBlock
2331 assignMem_FltCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2332 assignReg_FltCode :: MachRep -> CmmReg -> CmmExpr -> NatM InstrBlock
2334 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2336 #if alpha_TARGET_ARCH
2338 assignIntCode pk (CmmLoad dst _) src
2339 = getNewRegNat IntRep `thenNat` \ tmp ->
2340 getAmode dst `thenNat` \ amode ->
2341 getRegister src `thenNat` \ register ->
2343 code1 = amodeCode amode []
2344 dst__2 = amodeAddr amode
2345 code2 = registerCode register tmp []
2346 src__2 = registerName register tmp
2347 sz = primRepToSize pk
2348 code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2352 assignIntCode pk dst src
2353 = getRegister dst `thenNat` \ register1 ->
2354 getRegister src `thenNat` \ register2 ->
2356 dst__2 = registerName register1 zeroh
2357 code = registerCode register2 dst__2
2358 src__2 = registerName register2 dst__2
2359 code__2 = if isFixed register2
2360 then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2)
2365 #endif /* alpha_TARGET_ARCH */
2367 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2369 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2371 -- integer assignment to memory
2372 assignMem_IntCode pk addr src = do
2373 Amode addr code_addr <- getAmode addr
2374 (code_src, op_src) <- get_op_RI src
2376 code = code_src `appOL`
2378 MOV pk op_src (OpAddr addr)
2379 -- NOTE: op_src is stable, so it will still be valid
2380 -- after code_addr. This may involve the introduction
2381 -- of an extra MOV to a temporary register, but we hope
2382 -- the register allocator will get rid of it.
2386 get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand) -- code, operator
2387 get_op_RI (CmmLit lit) | not (is64BitLit lit)
2388 = return (nilOL, OpImm (litToImm lit))
2390 = do (reg,code) <- getNonClobberedReg op
2391 return (code, OpReg reg)
2394 -- Assign; dst is a reg, rhs is mem
2395 assignReg_IntCode pk reg (CmmLoad src _) = do
2396 load_code <- intLoadCode (MOV pk) src
2397 return (load_code (getRegisterReg reg))
2399 -- dst is a reg, but src could be anything
2400 assignReg_IntCode pk reg src = do
2401 code <- getAnyReg src
2402 return (code (getRegisterReg reg))
2404 #endif /* i386_TARGET_ARCH */
2406 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2408 #if sparc_TARGET_ARCH
2410 assignMem_IntCode pk addr src
2411 = getNewRegNat IntRep `thenNat` \ tmp ->
2412 getAmode addr `thenNat` \ amode ->
2413 getRegister src `thenNat` \ register ->
2415 code1 = amodeCode amode
2416 dst__2 = amodeAddr amode
2417 code2 = registerCode register tmp
2418 src__2 = registerName register tmp
2419 sz = primRepToSize pk
2420 code__2 = code1 `appOL` code2 `snocOL` ST sz src__2 dst__2
2424 assignReg_IntCode pk reg src
2425 = getRegister src `thenNat` \ register2 ->
2426 getRegisterReg reg `thenNat` \ register1 ->
2427 getNewRegNat IntRep `thenNat` \ tmp ->
2429 dst__2 = registerName register1 tmp
2430 code = registerCode register2 dst__2
2431 src__2 = registerName register2 dst__2
2432 code__2 = if isFixed register2
2433 then code `snocOL` OR False g0 (RIReg src__2) dst__2
2438 #endif /* sparc_TARGET_ARCH */
2440 #if powerpc_TARGET_ARCH
2442 assignMem_IntCode pk addr src = do
2443 (srcReg, code) <- getSomeReg src
2444 Amode dstAddr addr_code <- getAmode addr
2445 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2447 -- dst is a reg, but src could be anything
2448 assignReg_IntCode pk reg src
2450 r <- getRegister src
2452 Any _ code -> code dst
2453 Fixed _ freg fcode -> fcode `snocOL` MR dst freg
2455 dst = getRegisterReg reg
2457 #endif /* powerpc_TARGET_ARCH */
2460 -- -----------------------------------------------------------------------------
2461 -- Floating-point assignments
2463 #if alpha_TARGET_ARCH
2465 assignFltCode pk (CmmLoad dst _) src
2466 = getNewRegNat pk `thenNat` \ tmp ->
2467 getAmode dst `thenNat` \ amode ->
2468 getRegister src `thenNat` \ register ->
2470 code1 = amodeCode amode []
2471 dst__2 = amodeAddr amode
2472 code2 = registerCode register tmp []
2473 src__2 = registerName register tmp
2474 sz = primRepToSize pk
2475 code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2479 assignFltCode pk dst src
2480 = getRegister dst `thenNat` \ register1 ->
2481 getRegister src `thenNat` \ register2 ->
2483 dst__2 = registerName register1 zeroh
2484 code = registerCode register2 dst__2
2485 src__2 = registerName register2 dst__2
2486 code__2 = if isFixed register2
2487 then code . mkSeqInstr (FMOV src__2 dst__2)
2492 #endif /* alpha_TARGET_ARCH */
2494 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2496 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2498 -- Floating point assignment to memory
2499 assignMem_FltCode pk addr src = do
2500 (src_reg, src_code) <- getNonClobberedReg src
2501 Amode addr addr_code <- getAmode addr
2503 code = src_code `appOL`
2505 IF_ARCH_i386(GST pk src_reg addr,
2506 MOV pk (OpReg src_reg) (OpAddr addr))
2509 -- Floating point assignment to a register/temporary
2510 assignReg_FltCode pk reg src = do
2511 src_code <- getAnyReg src
2512 return (src_code (getRegisterReg reg))
2514 #endif /* i386_TARGET_ARCH */
2516 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2518 #if sparc_TARGET_ARCH
2520 -- Floating point assignment to memory
2521 assignMem_FltCode pk addr src
2522 = getNewRegNat pk `thenNat` \ tmp1 ->
2523 getAmode addr `thenNat` \ amode ->
2524 getRegister src `thenNat` \ register ->
2526 sz = primRepToSize pk
2527 dst__2 = amodeAddr amode
2529 code1 = amodeCode amode
2530 code2 = registerCode register tmp1
2532 src__2 = registerName register tmp1
2533 pk__2 = registerRep register
2534 sz__2 = primRepToSize pk__2
2536 code__2 = code1 `appOL` code2 `appOL`
2538 then unitOL (ST sz src__2 dst__2)
2539 else toOL [FxTOy sz__2 sz src__2 tmp1, ST sz tmp1 dst__2]
2543 -- Floating point assignment to a register/temporary
2544 -- Why is this so bizarrely ugly?
2545 assignReg_FltCode pk reg src
2546 = getRegisterReg reg `thenNat` \ register1 ->
2547 getRegister src `thenNat` \ register2 ->
2549 pk__2 = registerRep register2
2550 sz__2 = primRepToSize pk__2
2552 getNewRegNat pk__2 `thenNat` \ tmp ->
2554 sz = primRepToSize pk
2555 dst__2 = registerName register1 g0 -- must be Fixed
2556 reg__2 = if pk /= pk__2 then tmp else dst__2
2557 code = registerCode register2 reg__2
2558 src__2 = registerName register2 reg__2
2561 code `snocOL` FxTOy sz__2 sz src__2 dst__2
2562 else if isFixed register2 then
2563 code `snocOL` FMOV sz src__2 dst__2
2569 #endif /* sparc_TARGET_ARCH */
2571 #if powerpc_TARGET_ARCH
2574 assignMem_FltCode = assignMem_IntCode
2575 assignReg_FltCode = assignReg_IntCode
2577 #endif /* powerpc_TARGET_ARCH */
2580 -- -----------------------------------------------------------------------------
2581 -- Generating an non-local jump
2583 -- (If applicable) Do not fill the delay slots here; you will confuse the
2584 -- register allocator.
2586 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
2588 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2590 #if alpha_TARGET_ARCH
2592 genJump (CmmLabel lbl)
2593 | isAsmTemp lbl = returnInstr (BR target)
2594 | otherwise = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0]
2596 target = ImmCLbl lbl
2599 = getRegister tree `thenNat` \ register ->
2600 getNewRegNat PtrRep `thenNat` \ tmp ->
2602 dst = registerName register pv
2603 code = registerCode register pv
2604 target = registerName register pv
2606 if isFixed register then
2607 returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0]
2609 return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0))
2611 #endif /* alpha_TARGET_ARCH */
2613 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2615 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2617 genJump (CmmLoad mem pk) = do
2618 Amode target code <- getAmode mem
2619 return (code `snocOL` JMP (OpAddr target))
2621 genJump (CmmLit lit) = do
2622 return (unitOL (JMP (OpImm (litToImm lit))))
2625 (reg,code) <- getSomeReg expr
2626 return (code `snocOL` JMP (OpReg reg))
2628 #endif /* i386_TARGET_ARCH */
2630 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2632 #if sparc_TARGET_ARCH
2634 genJump (CmmLabel lbl)
2635 = return (toOL [CALL (Left target) 0 True, NOP])
2637 target = ImmCLbl lbl
2640 = getRegister tree `thenNat` \ register ->
2641 getNewRegNat PtrRep `thenNat` \ tmp ->
2643 code = registerCode register tmp
2644 target = registerName register tmp
2646 return (code `snocOL` JMP dsts (AddrRegReg target g0) `snocOL` NOP)
2648 #endif /* sparc_TARGET_ARCH */
2650 #if powerpc_TARGET_ARCH
2651 genJump (CmmLit (CmmLabel lbl))
2652 = return (unitOL $ JMP lbl)
2656 (target,code) <- getSomeReg tree
2657 return (code `snocOL` MTCTR target `snocOL` BCTR [])
2658 #endif /* powerpc_TARGET_ARCH */
2661 -- -----------------------------------------------------------------------------
2662 -- Unconditional branches
2664 genBranch :: BlockId -> NatM InstrBlock
2666 #if alpha_TARGET_ARCH
2667 genBranch id = return (unitOL (BR id))
2670 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2671 genBranch id = return (unitOL (JXX ALWAYS id))
2674 #if sparc_TARGET_ARCH
2675 genBranch id = return (toOL [BI ALWAYS False id, NOP])
2678 #if powerpc_TARGET_ARCH
2679 genBranch id = return (unitOL (BCC ALWAYS id))
2683 -- -----------------------------------------------------------------------------
2684 -- Conditional jumps
2687 Conditional jumps are always to local labels, so we can use branch
2688 instructions. We peek at the arguments to decide what kind of
2691 ALPHA: For comparisons with 0, we're laughing, because we can just do
2692 the desired conditional branch.
2694 I386: First, we have to ensure that the condition
2695 codes are set according to the supplied comparison operation.
2697 SPARC: First, we have to ensure that the condition codes are set
2698 according to the supplied comparison operation. We generate slightly
2699 different code for floating point comparisons, because a floating
2700 point operation cannot directly precede a @BF@. We assume the worst
2701 and fill that slot with a @NOP@.
2703 SPARC: Do not fill the delay slots here; you will confuse the register
2709 :: BlockId -- the branch target
2710 -> CmmExpr -- the condition on which to branch
2713 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2715 #if alpha_TARGET_ARCH
2717 genCondJump id (StPrim op [x, StInt 0])
2718 = getRegister x `thenNat` \ register ->
2719 getNewRegNat (registerRep register)
2722 code = registerCode register tmp
2723 value = registerName register tmp
2724 pk = registerRep register
2725 target = ImmCLbl lbl
2727 returnSeq code [BI (cmpOp op) value target]
2729 cmpOp CharGtOp = GTT
2731 cmpOp CharEqOp = EQQ
2733 cmpOp CharLtOp = LTT
2742 cmpOp WordGeOp = ALWAYS
2743 cmpOp WordEqOp = EQQ
2745 cmpOp WordLtOp = NEVER
2746 cmpOp WordLeOp = EQQ
2748 cmpOp AddrGeOp = ALWAYS
2749 cmpOp AddrEqOp = EQQ
2751 cmpOp AddrLtOp = NEVER
2752 cmpOp AddrLeOp = EQQ
2754 genCondJump lbl (StPrim op [x, StDouble 0.0])
2755 = getRegister x `thenNat` \ register ->
2756 getNewRegNat (registerRep register)
2759 code = registerCode register tmp
2760 value = registerName register tmp
2761 pk = registerRep register
2762 target = ImmCLbl lbl
2764 return (code . mkSeqInstr (BF (cmpOp op) value target))
2766 cmpOp FloatGtOp = GTT
2767 cmpOp FloatGeOp = GE
2768 cmpOp FloatEqOp = EQQ
2769 cmpOp FloatNeOp = NE
2770 cmpOp FloatLtOp = LTT
2771 cmpOp FloatLeOp = LE
2772 cmpOp DoubleGtOp = GTT
2773 cmpOp DoubleGeOp = GE
2774 cmpOp DoubleEqOp = EQQ
2775 cmpOp DoubleNeOp = NE
2776 cmpOp DoubleLtOp = LTT
2777 cmpOp DoubleLeOp = LE
2779 genCondJump lbl (StPrim op [x, y])
2781 = trivialFCode pr instr x y `thenNat` \ register ->
2782 getNewRegNat F64 `thenNat` \ tmp ->
2784 code = registerCode register tmp
2785 result = registerName register tmp
2786 target = ImmCLbl lbl
2788 return (code . mkSeqInstr (BF cond result target))
2790 pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
2792 fltCmpOp op = case op of
2806 (instr, cond) = case op of
2807 FloatGtOp -> (FCMP TF LE, EQQ)
2808 FloatGeOp -> (FCMP TF LTT, EQQ)
2809 FloatEqOp -> (FCMP TF EQQ, NE)
2810 FloatNeOp -> (FCMP TF EQQ, EQQ)
2811 FloatLtOp -> (FCMP TF LTT, NE)
2812 FloatLeOp -> (FCMP TF LE, NE)
2813 DoubleGtOp -> (FCMP TF LE, EQQ)
2814 DoubleGeOp -> (FCMP TF LTT, EQQ)
2815 DoubleEqOp -> (FCMP TF EQQ, NE)
2816 DoubleNeOp -> (FCMP TF EQQ, EQQ)
2817 DoubleLtOp -> (FCMP TF LTT, NE)
2818 DoubleLeOp -> (FCMP TF LE, NE)
2820 genCondJump lbl (StPrim op [x, y])
2821 = trivialCode instr x y `thenNat` \ register ->
2822 getNewRegNat IntRep `thenNat` \ tmp ->
2824 code = registerCode register tmp
2825 result = registerName register tmp
2826 target = ImmCLbl lbl
2828 return (code . mkSeqInstr (BI cond result target))
2830 (instr, cond) = case op of
2831 CharGtOp -> (CMP LE, EQQ)
2832 CharGeOp -> (CMP LTT, EQQ)
2833 CharEqOp -> (CMP EQQ, NE)
2834 CharNeOp -> (CMP EQQ, EQQ)
2835 CharLtOp -> (CMP LTT, NE)
2836 CharLeOp -> (CMP LE, NE)
2837 IntGtOp -> (CMP LE, EQQ)
2838 IntGeOp -> (CMP LTT, EQQ)
2839 IntEqOp -> (CMP EQQ, NE)
2840 IntNeOp -> (CMP EQQ, EQQ)
2841 IntLtOp -> (CMP LTT, NE)
2842 IntLeOp -> (CMP LE, NE)
2843 WordGtOp -> (CMP ULE, EQQ)
2844 WordGeOp -> (CMP ULT, EQQ)
2845 WordEqOp -> (CMP EQQ, NE)
2846 WordNeOp -> (CMP EQQ, EQQ)
2847 WordLtOp -> (CMP ULT, NE)
2848 WordLeOp -> (CMP ULE, NE)
2849 AddrGtOp -> (CMP ULE, EQQ)
2850 AddrGeOp -> (CMP ULT, EQQ)
2851 AddrEqOp -> (CMP EQQ, NE)
2852 AddrNeOp -> (CMP EQQ, EQQ)
2853 AddrLtOp -> (CMP ULT, NE)
2854 AddrLeOp -> (CMP ULE, NE)
2856 #endif /* alpha_TARGET_ARCH */
2858 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2860 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2862 genCondJump id bool = do
2863 CondCode _ cond code <- getCondCode bool
2864 return (code `snocOL` JXX cond id)
2866 #endif /* i386_TARGET_ARCH */
2869 #if sparc_TARGET_ARCH
2871 genCondJump id bool = do
2872 CondCode is_float cond code <- getCondCode bool
2877 then [NOP, BF cond False id, NOP]
2878 else [BI cond False id, NOP]
2882 #endif /* sparc_TARGET_ARCH */
2885 #if powerpc_TARGET_ARCH
2887 genCondJump id bool = do
2888 CondCode is_float cond code <- getCondCode bool
2889 return (code `snocOL` BCC cond id)
2891 #endif /* powerpc_TARGET_ARCH */
2894 -- -----------------------------------------------------------------------------
2895 -- Generating C calls
2897 -- Now the biggest nightmare---calls. Most of the nastiness is buried in
2898 -- @get_arg@, which moves the arguments to the correct registers/stack
2899 -- locations. Apart from that, the code is easy.
2901 -- (If applicable) Do not fill the delay slots here; you will confuse the
2902 -- register allocator.
2905 :: CmmCallTarget -- function to call
2906 -> [(CmmReg,MachHint)] -- where to put the result
2907 -> [(CmmExpr,MachHint)] -- arguments (of mixed type)
2908 -> Maybe [GlobalReg] -- volatile regs to save
2911 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2913 #if alpha_TARGET_ARCH
2917 genCCall fn cconv result_regs args
2918 = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args
2919 `thenNat` \ ((unused,_), argCode) ->
2921 nRegs = length allArgRegs - length unused
2922 code = asmSeqThen (map ($ []) argCode)
2925 LDA pv (AddrImm (ImmLab (ptext fn))),
2926 JSR ra (AddrReg pv) nRegs,
2927 LDGP gp (AddrReg ra)]
2929 ------------------------
2930 {- Try to get a value into a specific register (or registers) for
2931 a call. The first 6 arguments go into the appropriate
2932 argument register (separate registers for integer and floating
2933 point arguments, but used in lock-step), and the remaining
2934 arguments are dumped to the stack, beginning at 0(sp). Our
2935 first argument is a pair of the list of remaining argument
2936 registers to be assigned for this call and the next stack
2937 offset to use for overflowing arguments. This way,
2938 @get_Arg@ can be applied to all of a call's arguments using
2942 :: ([(Reg,Reg)], Int) -- Argument registers and stack offset (accumulator)
2943 -> StixTree -- Current argument
2944 -> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code
2946 -- We have to use up all of our argument registers first...
2948 get_arg ((iDst,fDst):dsts, offset) arg
2949 = getRegister arg `thenNat` \ register ->
2951 reg = if isFloatingRep pk then fDst else iDst
2952 code = registerCode register reg
2953 src = registerName register reg
2954 pk = registerRep register
2957 if isFloatingRep pk then
2958 ((dsts, offset), if isFixed register then
2959 code . mkSeqInstr (FMOV src fDst)
2962 ((dsts, offset), if isFixed register then
2963 code . mkSeqInstr (OR src (RIReg src) iDst)
2966 -- Once we have run out of argument registers, we move to the
2969 get_arg ([], offset) arg
2970 = getRegister arg `thenNat` \ register ->
2971 getNewRegNat (registerRep register)
2974 code = registerCode register tmp
2975 src = registerName register tmp
2976 pk = registerRep register
2977 sz = primRepToSize pk
2979 return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset)))
2981 #endif /* alpha_TARGET_ARCH */
2983 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2985 #if i386_TARGET_ARCH
2987 -- we only cope with a single result for foreign calls
2988 genCCall (CmmPrim op) [(r,_)] args vols = do
2990 MO_F32_Sqrt -> actuallyInlineFloatOp F32 (GSQRT F32) args
2991 MO_F64_Sqrt -> actuallyInlineFloatOp F64 (GSQRT F64) args
2993 MO_F32_Sin -> actuallyInlineFloatOp F32 (GSIN F32) args
2994 MO_F64_Sin -> actuallyInlineFloatOp F64 (GSIN F64) args
2996 MO_F32_Cos -> actuallyInlineFloatOp F32 (GCOS F32) args
2997 MO_F64_Cos -> actuallyInlineFloatOp F64 (GCOS F64) args
2999 MO_F32_Tan -> actuallyInlineFloatOp F32 (GTAN F32) args
3000 MO_F64_Tan -> actuallyInlineFloatOp F64 (GTAN F64) args
3002 other_op -> outOfLineFloatOp op r args vols
3004 actuallyInlineFloatOp rep instr [(x,_)]
3005 = do res <- trivialUFCode rep instr x
3007 return (any (getRegisterReg r))
3009 genCCall target dest_regs args vols = do
3010 sizes_n_codes <- mapM push_arg (reverse args)
3011 delta <- getDeltaNat
3013 (sizes, push_codes) = unzip sizes_n_codes
3014 tot_arg_size = sum sizes
3016 -- deal with static vs dynamic call targets
3017 (callinsns,cconv) <-
3020 CmmForeignCall (CmmLit (CmmLabel lbl)) conv
3021 -> -- ToDo: stdcall arg sizes
3022 return (unitOL (CALL (Left fn_imm)), conv)
3023 where fn_imm = ImmCLbl lbl
3024 CmmForeignCall expr conv
3025 -> do (dyn_c, dyn_r, dyn_rep) <- get_op expr
3026 ASSERT(dyn_rep == I32)
3027 return (dyn_c `snocOL` CALL (Right dyn_r), conv)
3029 let push_code = concatOL push_codes
3030 call = callinsns `appOL`
3032 -- Deallocate parameters after call for ccall;
3033 -- but not for stdcall (callee does it)
3034 (if cconv == StdCallConv || tot_arg_size==0 then [] else
3035 [ADD I32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
3037 [DELTA (delta + tot_arg_size)]
3040 setDeltaNat (delta + tot_arg_size)
3043 -- assign the results, if necessary
3044 assign_code [] = nilOL
3045 assign_code [(dest,_hint)] =
3047 I64 -> toOL [MOV I32 (OpReg eax) (OpReg r_dest),
3048 MOV I32 (OpReg edx) (OpReg r_dest_hi)]
3049 F32 -> unitOL (GMOV fake0 r_dest)
3050 F64 -> unitOL (GMOV fake0 r_dest)
3051 rep -> unitOL (MOV rep (OpReg eax) (OpReg r_dest))
3053 r_dest_hi = getHiVRegFromLo r_dest
3054 rep = cmmRegRep dest
3055 r_dest = getRegisterReg dest
3056 assign_code many = panic "genCCall.assign_code many"
3058 return (push_code `appOL`
3060 assign_code dest_regs)
3067 push_arg :: (CmmExpr,MachHint){-current argument-}
3068 -> NatM (Int, InstrBlock) -- argsz, code
3070 push_arg (arg,_hint) -- we don't need the hints on x86
3071 | arg_rep == I64 = do
3072 ChildCode64 code r_lo <- iselExpr64 arg
3073 delta <- getDeltaNat
3074 setDeltaNat (delta - 8)
3076 r_hi = getHiVRegFromLo r_lo
3078 return (8, code `appOL`
3079 toOL [PUSH I32 (OpReg r_hi), DELTA (delta - 4),
3080 PUSH I32 (OpReg r_lo), DELTA (delta - 8),
3085 (code, reg, sz) <- get_op arg
3086 delta <- getDeltaNat
3087 let size = arg_size sz
3088 setDeltaNat (delta-size)
3089 if (case sz of F64 -> True; F32 -> True; _ -> False)
3092 toOL [SUB I32 (OpImm (ImmInt size)) (OpReg esp),
3094 GST sz reg (AddrBaseIndex (Just esp)
3100 PUSH I32 (OpReg reg) `snocOL`
3104 arg_rep = cmmExprRep arg
3107 get_op :: CmmExpr -> NatM (InstrBlock, Reg, MachRep) -- code, reg, size
3109 (reg,code) <- getSomeReg op
3110 return (code, reg, cmmExprRep op)
3113 outOfLineFloatOp :: CallishMachOp -> CmmReg -> [(CmmExpr,MachHint)]
3114 -> Maybe [GlobalReg] -> NatM InstrBlock
3115 outOfLineFloatOp mop res args vols
3116 | cmmRegRep res == F64
3117 = stmtToInstrs (CmmCall target [(res,FloatHint)] args vols)
3120 = do uq <- getUniqueNat
3122 tmp = CmmLocal (LocalReg uq F64)
3124 code1 <- stmtToInstrs (CmmCall target [(tmp,FloatHint)] (map promote args) vols)
3125 code2 <- stmtToInstrs (CmmAssign res (demote (CmmReg tmp)))
3126 return (code1 `appOL` code2)
3128 promote (x,hint) = (CmmMachOp (MO_S_Conv F32 F64) [x], hint)
3129 demote x = CmmMachOp (MO_S_Conv F64 F32) [x]
3131 target = CmmForeignCall (CmmLit lbl) CCallConv
3132 lbl = CmmLabel (mkForeignLabel fn Nothing False)
3135 MO_F32_Exp -> FSLIT("exp")
3136 MO_F32_Log -> FSLIT("log")
3138 MO_F32_Asin -> FSLIT("asin")
3139 MO_F32_Acos -> FSLIT("acos")
3140 MO_F32_Atan -> FSLIT("atan")
3142 MO_F32_Sinh -> FSLIT("sinh")
3143 MO_F32_Cosh -> FSLIT("cosh")
3144 MO_F32_Tanh -> FSLIT("tanh")
3145 MO_F32_Pwr -> FSLIT("pow")
3147 MO_F64_Exp -> FSLIT("exp")
3148 MO_F64_Log -> FSLIT("log")
3150 MO_F64_Asin -> FSLIT("asin")
3151 MO_F64_Acos -> FSLIT("acos")
3152 MO_F64_Atan -> FSLIT("atan")
3154 MO_F64_Sinh -> FSLIT("sinh")
3155 MO_F64_Cosh -> FSLIT("cosh")
3156 MO_F64_Tanh -> FSLIT("tanh")
3157 MO_F64_Pwr -> FSLIT("pow")
3159 other -> pprPanic "outOfLineFloatOp" (pprCallishMachOp mop)
3161 #endif /* i386_TARGET_ARCH */
3163 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3165 #if x86_64_TARGET_ARCH
3167 genCCall (CmmPrim op) [(r,_)] args vols =
3168 panic "genCCall(CmmPrim)(x86_64)"
3170 genCCall target dest_regs args vols = do
3172 -- load up the register arguments
3173 (stack_args, sse_regs, load_args_code)
3174 <- load_args args allArgRegs allFPArgRegs 0 nilOL
3177 tot_arg_size = arg_size * length stack_args
3179 -- On entry to the called function, %rsp should be aligned
3180 -- on a 16-byte boundary +8 (i.e. the first stack arg after
3181 -- the return address is 16-byte aligned). In STG land
3182 -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
3183 -- need to make sure we push a multiple of 16-bytes of args,
3184 -- plus the return address, to get the correct alignment.
3185 -- Urg, this is hard. We need to feed the delta back into
3186 -- the arg pushing code.
3187 (real_size, adjust_rsp) <-
3188 if tot_arg_size `rem` 16 == 0
3189 then return (tot_arg_size, nilOL)
3190 else do -- we need to adjust...
3191 delta <- getDeltaNat
3192 setDeltaNat (delta-8)
3193 return (tot_arg_size+8, toOL [
3194 SUB I64 (OpImm (ImmInt 8)) (OpReg rsp),
3198 -- push the stack args, right to left
3199 push_code <- push_args (reverse stack_args) nilOL
3200 delta <- getDeltaNat
3202 -- deal with static vs dynamic call targets
3203 (callinsns,cconv) <-
3206 CmmForeignCall (CmmLit (CmmLabel lbl)) conv
3207 -> -- ToDo: stdcall arg sizes
3208 return (unitOL (CALL (Left fn_imm)), conv)
3209 where fn_imm = ImmCLbl lbl
3210 CmmForeignCall expr conv
3211 -> do (dyn_r, dyn_c) <- getSomeReg expr
3212 return (dyn_c `snocOL` CALL (Right dyn_r), conv)
3215 -- The x86_64 ABI requires us to set %al to the number of SSE
3216 -- registers that contain arguments, if the called routine
3217 -- is a varargs function. We don't know whether it's a
3218 -- varargs function or not, so we have to assume it is.
3220 -- It's not safe to omit this assignment, even if the number
3221 -- of SSE regs in use is zero. If %al is larger than 8
3222 -- on entry to a varargs function, seg faults ensue.
3223 assign_eax n = unitOL (MOV I32 (OpImm (ImmInt n)) (OpReg eax))
3225 let call = callinsns `appOL`
3227 -- Deallocate parameters after call for ccall;
3228 -- but not for stdcall (callee does it)
3229 (if cconv == StdCallConv || real_size==0 then [] else
3230 [ADD wordRep (OpImm (ImmInt real_size)) (OpReg esp)])
3232 [DELTA (delta + real_size)]
3235 setDeltaNat (delta + real_size)
3238 -- assign the results, if necessary
3239 assign_code [] = nilOL
3240 assign_code [(dest,_hint)] =
3242 F32 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3243 F64 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3244 rep -> unitOL (MOV rep (OpReg rax) (OpReg r_dest))
3246 rep = cmmRegRep dest
3247 r_dest = getRegisterReg dest
3248 assign_code many = panic "genCCall.assign_code many"
3250 return (load_args_code `appOL`
3253 assign_eax sse_regs `appOL`
3255 assign_code dest_regs)
3258 arg_size = 8 -- always, at the mo
3260 load_args :: [(CmmExpr,MachHint)]
3261 -> [Reg] -- int regs avail for args
3262 -> [Reg] -- FP regs avail for args
3263 -> Int -> InstrBlock
3264 -> NatM ([(CmmExpr,MachHint)],Int,InstrBlock)
3265 load_args args [] [] sse_regs code = return (args, sse_regs, code)
3266 -- no more regs to use
3267 load_args [] aregs fregs sse_regs code = return ([],sse_regs,code)
3268 -- no more args to push
3269 load_args ((arg,hint) : rest) aregs fregs sse_regs code
3270 | isFloatingRep arg_rep =
3274 arg_code <- getAnyReg arg
3275 load_args rest aregs rs (sse_regs+1) (code `appOL` arg_code r)
3280 arg_code <- getAnyReg arg
3281 load_args rest rs fregs sse_regs (code `appOL` arg_code r)
3283 arg_rep = cmmExprRep arg
3286 (args',sse',code') <- load_args rest aregs fregs sse_regs code
3287 return ((arg,hint):args', sse', code')
3289 push_args [] code = return code
3290 push_args ((arg,hint):rest) code
3291 | isFloatingRep arg_rep = do
3292 (arg_reg, arg_code) <- getSomeReg arg
3293 delta <- getDeltaNat
3294 setDeltaNat (delta-arg_size)
3295 let code' = code `appOL` toOL [
3296 MOV arg_rep (OpReg arg_reg) (OpAddr (spRel 0)),
3297 SUB wordRep (OpImm (ImmInt arg_size)) (OpReg rsp) ,
3298 DELTA (delta-arg_size)]
3299 push_args rest code'
3302 -- we only ever generate word-sized function arguments. Promotion
3303 -- has already happened: our Int8# type is kept sign-extended
3304 -- in an Int#, for example.
3305 ASSERT(arg_rep == I64) return ()
3306 (arg_op, arg_code) <- getOperand arg
3307 delta <- getDeltaNat
3308 setDeltaNat (delta-arg_size)
3309 let code' = code `appOL` toOL [PUSH I64 arg_op,
3310 DELTA (delta-arg_size)]
3311 push_args rest code'
3313 arg_rep = cmmExprRep arg
3316 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3318 #if sparc_TARGET_ARCH
3320 The SPARC calling convention is an absolute
3321 nightmare. The first 6x32 bits of arguments are mapped into
3322 %o0 through %o5, and the remaining arguments are dumped to the
3323 stack, beginning at [%sp+92]. (Note that %o6 == %sp.)
3325 If we have to put args on the stack, move %o6==%sp down by
3326 the number of words to go on the stack, to ensure there's enough space.
3328 According to Fraser and Hanson's lcc book, page 478, fig 17.2,
3329 16 words above the stack pointer is a word for the address of
3330 a structure return value. I use this as a temporary location
3331 for moving values from float to int regs. Certainly it isn't
3332 safe to put anything in the 16 words starting at %sp, since
3333 this area can get trashed at any time due to window overflows
3334 caused by signal handlers.
3336 A final complication (if the above isn't enough) is that
3337 we can't blithely calculate the arguments one by one into
3338 %o0 .. %o5. Consider the following nested calls:
3342 Naive code moves a into %o0, and (fff b c) into %o1. Unfortunately
3343 the inner call will itself use %o0, which trashes the value put there
3344 in preparation for the outer call. Upshot: we need to calculate the
3345 args into temporary regs, and move those to arg regs or onto the
3346 stack only immediately prior to the call proper. Sigh.
3349 genCCall fn cconv kind args
3350 = mapNat arg_to_int_vregs args `thenNat` \ argcode_and_vregs ->
3352 (argcodes, vregss) = unzip argcode_and_vregs
3353 n_argRegs = length allArgRegs
3354 n_argRegs_used = min (length vregs) n_argRegs
3355 vregs = concat vregss
3357 -- deal with static vs dynamic call targets
3360 -> return (unitOL (CALL (Left fn__2) n_argRegs_used False))
3362 -> arg_to_int_vregs dyn `thenNat` \ (dyn_c, [dyn_r]) ->
3363 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3365 `thenNat` \ callinsns ->
3367 argcode = concatOL argcodes
3368 (move_sp_down, move_sp_up)
3369 = let diff = length vregs - n_argRegs
3370 nn = if odd diff then diff + 1 else diff -- keep 8-byte alignment
3373 else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
3375 = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
3377 return (argcode `appOL`
3378 move_sp_down `appOL`
3379 transfer_code `appOL`
3384 -- function names that begin with '.' are assumed to be special
3385 -- internally generated names like '.mul,' which don't get an
3386 -- underscore prefix
3387 -- ToDo:needed (WDP 96/03) ???
3388 fn_static = unLeft fn
3389 fn__2 = case (headFS fn_static) of
3390 '.' -> ImmLit (ftext fn_static)
3391 _ -> ImmCLbl (mkForeignLabel fn_static False)
3393 -- move args from the integer vregs into which they have been
3394 -- marshalled, into %o0 .. %o5, and the rest onto the stack.
3395 move_final :: [Reg] -> [Reg] -> Int -> [Instr]
3397 move_final [] _ offset -- all args done
3400 move_final (v:vs) [] offset -- out of aregs; move to stack
3401 = ST W v (spRel offset)
3402 : move_final vs [] (offset+1)
3404 move_final (v:vs) (a:az) offset -- move into an arg (%o[0..5]) reg
3405 = OR False g0 (RIReg v) a
3406 : move_final vs az offset
3408 -- generate code to calculate an argument, and move it into one
3409 -- or two integer vregs.
3410 arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
3411 arg_to_int_vregs arg
3412 | is64BitRep (repOfCmmExpr arg)
3413 = iselExpr64 arg `thenNat` \ (ChildCode64 code vr_lo) ->
3414 let r_lo = VirtualRegI vr_lo
3415 r_hi = getHiVRegFromLo r_lo
3416 in return (code, [r_hi, r_lo])
3418 = getRegister arg `thenNat` \ register ->
3419 getNewRegNat (registerRep register) `thenNat` \ tmp ->
3420 let code = registerCode register tmp
3421 src = registerName register tmp
3422 pk = registerRep register
3424 -- the value is in src. Get it into 1 or 2 int vregs.
3427 getNewRegNat WordRep `thenNat` \ v1 ->
3428 getNewRegNat WordRep `thenNat` \ v2 ->
3431 FMOV DF src f0 `snocOL`
3432 ST F f0 (spRel 16) `snocOL`
3433 LD W (spRel 16) v1 `snocOL`
3434 ST F (fPair f0) (spRel 16) `snocOL`
3440 getNewRegNat WordRep `thenNat` \ v1 ->
3443 ST F src (spRel 16) `snocOL`
3449 getNewRegNat WordRep `thenNat` \ v1 ->
3451 code `snocOL` OR False g0 (RIReg src) v1
3455 #endif /* sparc_TARGET_ARCH */
3457 #if powerpc_TARGET_ARCH
3459 #if darwin_TARGET_OS || linux_TARGET_OS
3461 The PowerPC calling convention for Darwin/Mac OS X
3462 is described in Apple's document
3463 "Inside Mac OS X - Mach-O Runtime Architecture".
3465 PowerPC Linux uses the System V Release 4 Calling Convention
3466 for PowerPC. It is described in the
3467 "System V Application Binary Interface PowerPC Processor Supplement".
3469 Both conventions are similar:
3470 Parameters may be passed in general-purpose registers starting at r3, in
3471 floating point registers starting at f1, or on the stack.
3473 But there are substantial differences:
3474 * The number of registers used for parameter passing and the exact set of
3475 nonvolatile registers differs (see MachRegs.lhs).
3476 * On Darwin, stack space is always reserved for parameters, even if they are
3477 passed in registers. The called routine may choose to save parameters from
3478 registers to the corresponding space on the stack.
3479 * On Darwin, a corresponding amount of GPRs is skipped when a floating point
3480 parameter is passed in an FPR.
3481 * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
3482 starting with an odd-numbered GPR. It may skip a GPR to achieve this.
3483 Darwin just treats an I64 like two separate I32s (high word first).
3484 * I64 and F64 arguments are 8-byte aligned on the stack for SysV, but only
3485 4-byte aligned like everything else on Darwin.
3486 * The SysV spec claims that F32 is represented as F64 on the stack. GCC on
3487 PowerPC Linux does not agree, so neither do we.
3489 According to both conventions, The parameter area should be part of the
3490 caller's stack frame, allocated in the caller's prologue code (large enough
3491 to hold the parameter lists for all called routines). The NCG already
3492 uses the stack for register spilling, leaving 64 bytes free at the top.
3493 If we need a larger parameter area than that, we just allocate a new stack
3494 frame just before ccalling.
3497 genCCall target dest_regs argsAndHints vols
3498 = ASSERT (not $ any (`elem` [I8,I16]) argReps)
3499 -- we rely on argument promotion in the codeGen
3501 (finalStack,passArgumentsCode,usedRegs) <- passArguments
3503 allArgRegs allFPArgRegs
3507 (labelOrExpr, reduceToF32) <- case target of
3508 CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
3509 CmmForeignCall expr conv -> return (Right expr, False)
3510 CmmPrim mop -> outOfLineFloatOp mop
3512 let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
3513 codeAfter = move_sp_up finalStack `appOL` moveResult reduceToF32
3518 `snocOL` BL lbl usedRegs
3521 (dynReg, dynCode) <- getSomeReg dyn
3523 `snocOL` MTCTR dynReg
3525 `snocOL` BCTRL usedRegs
3528 #if darwin_TARGET_OS
3529 initialStackOffset = 24
3530 -- size of linkage area + size of arguments, in bytes
3531 stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
3532 map machRepByteWidth argReps
3533 #elif linux_TARGET_OS
3534 initialStackOffset = 8
3535 stackDelta finalStack = roundTo 16 finalStack
3537 args = map fst argsAndHints
3538 argReps = map cmmExprRep args
3540 roundTo a x | x `mod` a == 0 = x
3541 | otherwise = x + a - (x `mod` a)
3543 move_sp_down finalStack
3545 toOL [STU I32 sp (AddrRegImm sp (ImmInt (-delta))),
3548 where delta = stackDelta finalStack
3549 move_sp_up finalStack
3551 toOL [ADD sp sp (RIImm (ImmInt delta)),
3554 where delta = stackDelta finalStack
3557 passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
3558 passArguments ((arg,I64):args) gprs fprs stackOffset
3559 accumCode accumUsed =
3561 ChildCode64 code vr_lo <- iselExpr64 arg
3562 let vr_hi = getHiVRegFromLo vr_lo
3564 #if darwin_TARGET_OS
3569 (accumCode `appOL` code
3570 `snocOL` storeWord vr_hi gprs stackOffset
3571 `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
3572 ((take 2 gprs) ++ accumUsed)
3574 storeWord vr (gpr:_) offset = MR gpr vr
3575 storeWord vr [] offset = ST I32 vr (AddrRegImm sp (ImmInt offset))
3577 #elif linux_TARGET_OS
3578 let stackOffset' = roundTo 8 stackOffset
3579 stackCode = accumCode `appOL` code
3580 `snocOL` ST I32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
3581 `snocOL` ST I32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
3582 regCode hireg loreg =
3583 accumCode `appOL` code
3584 `snocOL` MR hireg vr_hi
3585 `snocOL` MR loreg vr_lo
3588 hireg : loreg : regs | even (length gprs) ->
3589 passArguments args regs fprs stackOffset
3590 (regCode hireg loreg) (hireg : loreg : accumUsed)
3591 _skipped : hireg : loreg : regs ->
3592 passArguments args regs fprs stackOffset
3593 (regCode hireg loreg) (hireg : loreg : accumUsed)
3594 _ -> -- only one or no regs left
3595 passArguments args [] fprs (stackOffset'+8)
3599 passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
3600 | reg : _ <- regs = do
3601 register <- getRegister arg
3602 let code = case register of
3603 Fixed _ freg fcode -> fcode `snocOL` MR reg freg
3604 Any _ acode -> acode reg
3608 #if darwin_TARGET_OS
3609 -- The Darwin ABI requires that we reserve stack slots for register parameters
3610 (stackOffset + stackBytes)
3611 #elif linux_TARGET_OS
3612 -- ... the SysV ABI doesn't.
3615 (accumCode `appOL` code)
3618 (vr, code) <- getSomeReg arg
3622 (stackOffset' + stackBytes)
3623 (accumCode `appOL` code `snocOL` ST rep vr stackSlot)
3626 #if darwin_TARGET_OS
3627 -- stackOffset is at least 4-byte aligned
3628 -- The Darwin ABI is happy with that.
3629 stackOffset' = stackOffset
3631 -- ... the SysV ABI requires 8-byte alignment for doubles.
3632 stackOffset' | rep == F64 = roundTo 8 stackOffset
3633 | otherwise = stackOffset
3635 stackSlot = AddrRegImm sp (ImmInt stackOffset')
3636 (nGprs, nFprs, stackBytes, regs) = case rep of
3637 I32 -> (1, 0, 4, gprs)
3638 #if darwin_TARGET_OS
3639 -- The Darwin ABI requires that we skip a corresponding number of GPRs when
3641 F32 -> (1, 1, 4, fprs)
3642 F64 -> (2, 1, 8, fprs)
3643 #elif linux_TARGET_OS
3644 -- ... the SysV ABI doesn't.
3645 F32 -> (0, 1, 4, fprs)
3646 F64 -> (0, 1, 8, fprs)
3649 moveResult reduceToF32 =
3653 | reduceToF32 && rep == F32 -> unitOL (FRSP r_dest f1)
3654 | rep == F32 || rep == F64 -> unitOL (MR r_dest f1)
3655 | rep == I64 -> toOL [MR (getHiVRegFromLo r_dest) r3,
3657 | otherwise -> unitOL (MR r_dest r3)
3658 where rep = cmmRegRep dest
3659 r_dest = getRegisterReg dest
3661 outOfLineFloatOp mop =
3663 mopExpr <- cmmMakeDynamicReference addImportNat True $
3664 mkForeignLabel functionName Nothing True
3665 let mopLabelOrExpr = case mopExpr of
3666 CmmLit (CmmLabel lbl) -> Left lbl
3668 return (mopLabelOrExpr, reduce)
3670 (functionName, reduce) = case mop of
3671 MO_F32_Exp -> (FSLIT("exp"), True)
3672 MO_F32_Log -> (FSLIT("log"), True)
3673 MO_F32_Sqrt -> (FSLIT("sqrt"), True)
3675 MO_F32_Sin -> (FSLIT("sin"), True)
3676 MO_F32_Cos -> (FSLIT("cos"), True)
3677 MO_F32_Tan -> (FSLIT("tan"), True)
3679 MO_F32_Asin -> (FSLIT("asin"), True)
3680 MO_F32_Acos -> (FSLIT("acos"), True)
3681 MO_F32_Atan -> (FSLIT("atan"), True)
3683 MO_F32_Sinh -> (FSLIT("sinh"), True)
3684 MO_F32_Cosh -> (FSLIT("cosh"), True)
3685 MO_F32_Tanh -> (FSLIT("tanh"), True)
3686 MO_F32_Pwr -> (FSLIT("pow"), True)
3688 MO_F64_Exp -> (FSLIT("exp"), False)
3689 MO_F64_Log -> (FSLIT("log"), False)
3690 MO_F64_Sqrt -> (FSLIT("sqrt"), False)
3692 MO_F64_Sin -> (FSLIT("sin"), False)
3693 MO_F64_Cos -> (FSLIT("cos"), False)
3694 MO_F64_Tan -> (FSLIT("tan"), False)
3696 MO_F64_Asin -> (FSLIT("asin"), False)
3697 MO_F64_Acos -> (FSLIT("acos"), False)
3698 MO_F64_Atan -> (FSLIT("atan"), False)
3700 MO_F64_Sinh -> (FSLIT("sinh"), False)
3701 MO_F64_Cosh -> (FSLIT("cosh"), False)
3702 MO_F64_Tanh -> (FSLIT("tanh"), False)
3703 MO_F64_Pwr -> (FSLIT("pow"), False)
3704 other -> pprPanic "genCCall(ppc): unknown callish op"
3705 (pprCallishMachOp other)
3707 #endif /* darwin_TARGET_OS || linux_TARGET_OS */
3709 #endif /* powerpc_TARGET_ARCH */
3712 -- -----------------------------------------------------------------------------
3713 -- Generating a table-branch
3715 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
3717 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3718 genSwitch expr ids = do
3719 (reg,e_code) <- getSomeReg expr
3720 lbl <- getNewLabelNat
3722 jumpTable = map jumpTableEntry ids
3723 op = OpAddr (AddrBaseIndex Nothing (Just (reg,wORD_SIZE)) (ImmCLbl lbl))
3724 code = e_code `appOL` toOL [
3725 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3726 JMP_TBL op [ id | Just id <- ids ]
3730 #elif powerpc_TARGET_ARCH
3734 (reg,e_code) <- getSomeReg expr
3735 tmp <- getNewRegNat I32
3736 lbl <- getNewLabelNat
3737 dynRef <- cmmMakeDynamicReference addImportNat False lbl
3738 (tableReg,t_code) <- getSomeReg $ dynRef
3740 jumpTable = map jumpTableEntryRel ids
3742 jumpTableEntryRel Nothing
3743 = CmmStaticLit (CmmInt 0 wordRep)
3744 jumpTableEntryRel (Just (BlockId id))
3745 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3746 where blockLabel = mkAsmTempLabel id
3748 code = e_code `appOL` t_code `appOL` toOL [
3749 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3750 SLW tmp reg (RIImm (ImmInt 2)),
3751 LD I32 tmp (AddrRegReg tableReg tmp),
3752 ADD tmp tmp (RIReg tableReg),
3754 BCTR [ id | Just id <- ids ]
3759 (reg,e_code) <- getSomeReg expr
3760 tmp <- getNewRegNat I32
3761 lbl <- getNewLabelNat
3763 jumpTable = map jumpTableEntry ids
3765 code = e_code `appOL` toOL [
3766 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3767 SLW tmp reg (RIImm (ImmInt 2)),
3768 ADDIS tmp tmp (HA (ImmCLbl lbl)),
3769 LD I32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
3771 BCTR [ id | Just id <- ids ]
3775 genSwitch expr ids = panic "ToDo: genSwitch"
3778 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordRep)
3779 jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
3780 where blockLabel = mkAsmTempLabel id
3782 -- -----------------------------------------------------------------------------
3784 -- -----------------------------------------------------------------------------
3787 -- -----------------------------------------------------------------------------
3788 -- 'condIntReg' and 'condFltReg': condition codes into registers
3790 -- Turn those condition codes into integers now (when they appear on
3791 -- the right hand side of an assignment).
3793 -- (If applicable) Do not fill the delay slots here; you will confuse the
3794 -- register allocator.
3796 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
3798 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3800 #if alpha_TARGET_ARCH
3801 condIntReg = panic "MachCode.condIntReg (not on Alpha)"
3802 condFltReg = panic "MachCode.condFltReg (not on Alpha)"
3803 #endif /* alpha_TARGET_ARCH */
3805 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3807 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3809 condIntReg cond x y = do
3810 CondCode _ cond cond_code <- condIntCode cond x y
3811 tmp <- getNewRegNat I8
3813 code dst = cond_code `appOL` toOL [
3814 SETCC cond (OpReg tmp),
3815 MOV I32 (OpReg tmp) (OpReg dst),
3816 AND I32 (OpImm (ImmInt 1)) (OpReg dst)
3818 -- NB. (1) Tha AND is needed here because the x86 only
3819 -- sets the low byte in the SETCC instruction.
3820 -- NB. (2) The extra temporary register is a hack to
3821 -- work around the fact that the setcc instructions only
3822 -- accept byte registers. dst might not be a byte-able reg,
3823 -- but currently all free registers are byte-able, so we're
3824 -- guaranteed that a new temporary is byte-able.
3826 return (Any I32 code)
3829 condFltReg cond x y = do
3830 lbl1 <- getBlockIdNat
3831 lbl2 <- getBlockIdNat
3832 CondCode _ cond cond_code <- condFltCode cond x y
3834 code dst = cond_code `appOL` toOL [
3836 MOV I32 (OpImm (ImmInt 0)) (OpReg dst),
3839 MOV I32 (OpImm (ImmInt 1)) (OpReg dst),
3842 -- SIGH, have to split up this block somehow...
3844 return (Any I32 code)
3846 #endif /* i386_TARGET_ARCH */
3848 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3850 #if sparc_TARGET_ARCH
3852 condIntReg EQQ x (StInt 0)
3853 = getRegister x `thenNat` \ register ->
3854 getNewRegNat IntRep `thenNat` \ tmp ->
3856 code = registerCode register tmp
3857 src = registerName register tmp
3858 code__2 dst = code `appOL` toOL [
3859 SUB False True g0 (RIReg src) g0,
3860 SUB True False g0 (RIImm (ImmInt (-1))) dst]
3862 return (Any IntRep code__2)
3865 = getRegister x `thenNat` \ register1 ->
3866 getRegister y `thenNat` \ register2 ->
3867 getNewRegNat IntRep `thenNat` \ tmp1 ->
3868 getNewRegNat IntRep `thenNat` \ tmp2 ->
3870 code1 = registerCode register1 tmp1
3871 src1 = registerName register1 tmp1
3872 code2 = registerCode register2 tmp2
3873 src2 = registerName register2 tmp2
3874 code__2 dst = code1 `appOL` code2 `appOL` toOL [
3875 XOR False src1 (RIReg src2) dst,
3876 SUB False True g0 (RIReg dst) g0,
3877 SUB True False g0 (RIImm (ImmInt (-1))) dst]
3879 return (Any IntRep code__2)
3881 condIntReg NE x (StInt 0)
3882 = getRegister x `thenNat` \ register ->
3883 getNewRegNat IntRep `thenNat` \ tmp ->
3885 code = registerCode register tmp
3886 src = registerName register tmp
3887 code__2 dst = code `appOL` toOL [
3888 SUB False True g0 (RIReg src) g0,
3889 ADD True False g0 (RIImm (ImmInt 0)) dst]
3891 return (Any IntRep code__2)
3894 = getRegister x `thenNat` \ register1 ->
3895 getRegister y `thenNat` \ register2 ->
3896 getNewRegNat IntRep `thenNat` \ tmp1 ->
3897 getNewRegNat IntRep `thenNat` \ tmp2 ->
3899 code1 = registerCode register1 tmp1
3900 src1 = registerName register1 tmp1
3901 code2 = registerCode register2 tmp2
3902 src2 = registerName register2 tmp2
3903 code__2 dst = code1 `appOL` code2 `appOL` toOL [
3904 XOR False src1 (RIReg src2) dst,
3905 SUB False True g0 (RIReg dst) g0,
3906 ADD True False g0 (RIImm (ImmInt 0)) dst]
3908 return (Any IntRep code__2)
3911 = getBlockIdNat `thenNat` \ lbl1 ->
3912 getBlockIdNat `thenNat` \ lbl2 ->
3913 condIntCode cond x y `thenNat` \ condition ->
3915 code = condCode condition
3916 cond = condName condition
3917 code__2 dst = code `appOL` toOL [
3918 BI cond False (ImmCLbl lbl1), NOP,
3919 OR False g0 (RIImm (ImmInt 0)) dst,
3920 BI ALWAYS False (ImmCLbl lbl2), NOP,
3922 OR False g0 (RIImm (ImmInt 1)) dst,
3925 return (Any IntRep code__2)
3928 = getBlockIdNat `thenNat` \ lbl1 ->
3929 getBlockIdNat `thenNat` \ lbl2 ->
3930 condFltCode cond x y `thenNat` \ condition ->
3932 code = condCode condition
3933 cond = condName condition
3934 code__2 dst = code `appOL` toOL [
3936 BF cond False (ImmCLbl lbl1), NOP,
3937 OR False g0 (RIImm (ImmInt 0)) dst,
3938 BI ALWAYS False (ImmCLbl lbl2), NOP,
3940 OR False g0 (RIImm (ImmInt 1)) dst,
3943 return (Any IntRep code__2)
3945 #endif /* sparc_TARGET_ARCH */
3947 #if powerpc_TARGET_ARCH
3948 condReg getCond = do
3949 lbl1 <- getBlockIdNat
3950 lbl2 <- getBlockIdNat
3951 CondCode _ cond cond_code <- getCond
3953 {- code dst = cond_code `appOL` toOL [
3962 code dst = cond_code
3966 RLWINM dst dst (bit + 1) 31 31
3969 negate_code | do_negate = unitOL (CRNOR bit bit bit)
3972 (bit, do_negate) = case cond of
3986 return (Any I32 code)
3988 condIntReg cond x y = condReg (condIntCode cond x y)
3989 condFltReg cond x y = condReg (condFltCode cond x y)
3990 #endif /* powerpc_TARGET_ARCH */
3993 -- -----------------------------------------------------------------------------
3994 -- 'trivial*Code': deal with trivial instructions
3996 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
3997 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
3998 -- Only look for constants on the right hand side, because that's
3999 -- where the generic optimizer will have put them.
4001 -- Similarly, for unary instructions, we don't have to worry about
4002 -- matching an StInt as the argument, because genericOpt will already
4003 -- have handled the constant-folding.
4007 -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
4008 ,IF_ARCH_i386 ((Operand -> Operand -> Instr)
4009 -> Maybe (Operand -> Operand -> Instr)
4010 ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr)
4011 -> Maybe (Operand -> Operand -> Instr)
4012 ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
4013 ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
4015 -> CmmExpr -> CmmExpr -- the two arguments
4018 #ifndef powerpc_TARGET_ARCH
4021 -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
4022 ,IF_ARCH_sparc((Size -> Reg -> Reg -> Reg -> Instr)
4023 ,IF_ARCH_i386 ((MachRep -> Reg -> Reg -> Reg -> Instr)
4024 ,IF_ARCH_x86_64 ((MachRep -> Operand -> Operand -> Instr)
4026 -> CmmExpr -> CmmExpr -- the two arguments
4032 -> IF_ARCH_alpha((RI -> Reg -> Instr)
4033 ,IF_ARCH_i386 ((Operand -> Instr)
4034 ,IF_ARCH_x86_64 ((Operand -> Instr)
4035 ,IF_ARCH_sparc((RI -> Reg -> Instr)
4036 ,IF_ARCH_powerpc((Reg -> Reg -> Instr)
4038 -> CmmExpr -- the one argument
4041 #ifndef powerpc_TARGET_ARCH
4044 -> IF_ARCH_alpha((Reg -> Reg -> Instr)
4045 ,IF_ARCH_i386 ((Reg -> Reg -> Instr)
4046 ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr)
4047 ,IF_ARCH_sparc((Reg -> Reg -> Instr)
4049 -> CmmExpr -- the one argument
4053 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4055 #if alpha_TARGET_ARCH
4057 trivialCode instr x (StInt y)
4059 = getRegister x `thenNat` \ register ->
4060 getNewRegNat IntRep `thenNat` \ tmp ->
4062 code = registerCode register tmp
4063 src1 = registerName register tmp
4064 src2 = ImmInt (fromInteger y)
4065 code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst)
4067 return (Any IntRep code__2)
4069 trivialCode instr x y
4070 = getRegister x `thenNat` \ register1 ->
4071 getRegister y `thenNat` \ register2 ->
4072 getNewRegNat IntRep `thenNat` \ tmp1 ->
4073 getNewRegNat IntRep `thenNat` \ tmp2 ->
4075 code1 = registerCode register1 tmp1 []
4076 src1 = registerName register1 tmp1
4077 code2 = registerCode register2 tmp2 []
4078 src2 = registerName register2 tmp2
4079 code__2 dst = asmSeqThen [code1, code2] .
4080 mkSeqInstr (instr src1 (RIReg src2) dst)
4082 return (Any IntRep code__2)
4085 trivialUCode instr x
4086 = getRegister x `thenNat` \ register ->
4087 getNewRegNat IntRep `thenNat` \ tmp ->
4089 code = registerCode register tmp
4090 src = registerName register tmp
4091 code__2 dst = code . mkSeqInstr (instr (RIReg src) dst)
4093 return (Any IntRep code__2)
4096 trivialFCode _ instr x y
4097 = getRegister x `thenNat` \ register1 ->
4098 getRegister y `thenNat` \ register2 ->
4099 getNewRegNat F64 `thenNat` \ tmp1 ->
4100 getNewRegNat F64 `thenNat` \ tmp2 ->
4102 code1 = registerCode register1 tmp1
4103 src1 = registerName register1 tmp1
4105 code2 = registerCode register2 tmp2
4106 src2 = registerName register2 tmp2
4108 code__2 dst = asmSeqThen [code1 [], code2 []] .
4109 mkSeqInstr (instr src1 src2 dst)
4111 return (Any F64 code__2)
4113 trivialUFCode _ instr x
4114 = getRegister x `thenNat` \ register ->
4115 getNewRegNat F64 `thenNat` \ tmp ->
4117 code = registerCode register tmp
4118 src = registerName register tmp
4119 code__2 dst = code . mkSeqInstr (instr src dst)
4121 return (Any F64 code__2)
4123 #endif /* alpha_TARGET_ARCH */
4125 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4127 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4130 The Rules of the Game are:
4132 * You cannot assume anything about the destination register dst;
4133 it may be anything, including a fixed reg.
4135 * You may compute an operand into a fixed reg, but you may not
4136 subsequently change the contents of that fixed reg. If you
4137 want to do so, first copy the value either to a temporary
4138 or into dst. You are free to modify dst even if it happens
4139 to be a fixed reg -- that's not your problem.
4141 * You cannot assume that a fixed reg will stay live over an
4142 arbitrary computation. The same applies to the dst reg.
4144 * Temporary regs obtained from getNewRegNat are distinct from
4145 each other and from all other regs, and stay live over
4146 arbitrary computations.
4148 --------------------
4150 SDM's version of The Rules:
4152 * If getRegister returns Any, that means it can generate correct
4153 code which places the result in any register, period. Even if that
4154 register happens to be read during the computation.
4156 Corollary #1: this means that if you are generating code for an
4157 operation with two arbitrary operands, you cannot assign the result
4158 of the first operand into the destination register before computing
4159 the second operand. The second operand might require the old value
4160 of the destination register.
4162 Corollary #2: A function might be able to generate more efficient
4163 code if it knows the destination register is a new temporary (and
4164 therefore not read by any of the sub-computations).
4166 * If getRegister returns Any, then the code it generates may modify only:
4167 (a) fresh temporaries
4168 (b) the destination register
4169 (c) known registers (eg. %ecx is used by shifts)
4170 In particular, it may *not* modify global registers, unless the global
4171 register happens to be the destination register.
4174 trivialCode rep instr (Just revinstr) (CmmLit lit_a) b
4175 | not (is64BitLit lit_a) = do
4176 b_code <- getAnyReg b
4179 = b_code dst `snocOL`
4180 revinstr (OpImm (litToImm lit_a)) (OpReg dst)
4182 return (Any rep code)
4184 trivialCode rep instr maybe_revinstr a b = do
4185 (b_op, b_code) <- getNonClobberedOperand b
4186 a_code <- getAnyReg a
4187 tmp <- getNewRegNat rep
4189 -- We want the value of b to stay alive across the computation of a.
4190 -- But, we want to calculate a straight into the destination register,
4191 -- because the instruction only has two operands (dst := dst `op` src).
4192 -- The troublesome case is when the result of b is in the same register
4193 -- as the destination reg. In this case, we have to save b in a
4194 -- new temporary across the computation of a.
4196 | dst `clashesWith` b_op =
4198 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
4200 instr (OpReg tmp) (OpReg dst)
4204 instr b_op (OpReg dst)
4206 return (Any rep code)
4208 reg `clashesWith` OpReg reg2 = reg == reg2
4209 reg `clashesWith` OpAddr amode = any (==reg) (addrModeRegs amode)
4210 reg `clashesWith` _ = False
4214 trivialUCode rep instr x = do
4215 x_code <- getAnyReg x
4221 return (Any rep code)
4225 #if i386_TARGET_ARCH
4227 trivialFCode pk instr x y = do
4228 (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
4229 (y_reg, y_code) <- getSomeReg y
4234 instr pk x_reg y_reg dst
4236 return (Any pk code)
4240 #if x86_64_TARGET_ARCH
4242 -- We use the 2-operand SSE2 floating pt instructions. ToDo: improve on
4243 -- this by using some of the special cases in trivialCode above.
4244 trivialFCode pk instr x y = do
4245 (y_reg, y_code) <- getNonClobberedReg y -- these work for float regs too
4246 x_code <- getAnyReg x
4251 instr pk (IF_ARCH_x86_64(OpReg,) y_reg)
4252 (IF_ARCH_x86_64(OpReg,) dst)
4254 return (Any pk code)
4260 trivialUFCode rep instr x = do
4261 (x_reg, x_code) <- getSomeReg x
4267 return (Any rep code)
4269 #endif /* i386_TARGET_ARCH */
4271 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4273 #if sparc_TARGET_ARCH
4275 trivialCode instr x (StInt y)
4277 = getRegister x `thenNat` \ register ->
4278 getNewRegNat IntRep `thenNat` \ tmp ->
4280 code = registerCode register tmp
4281 src1 = registerName register tmp
4282 src2 = ImmInt (fromInteger y)
4283 code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
4285 return (Any IntRep code__2)
4287 trivialCode instr x y
4288 = getRegister x `thenNat` \ register1 ->
4289 getRegister y `thenNat` \ register2 ->
4290 getNewRegNat IntRep `thenNat` \ tmp1 ->
4291 getNewRegNat IntRep `thenNat` \ tmp2 ->
4293 code1 = registerCode register1 tmp1
4294 src1 = registerName register1 tmp1
4295 code2 = registerCode register2 tmp2
4296 src2 = registerName register2 tmp2
4297 code__2 dst = code1 `appOL` code2 `snocOL`
4298 instr src1 (RIReg src2) dst
4300 return (Any IntRep code__2)
4303 trivialFCode pk instr x y
4304 = getRegister x `thenNat` \ register1 ->
4305 getRegister y `thenNat` \ register2 ->
4306 getNewRegNat (registerRep register1)
4308 getNewRegNat (registerRep register2)
4310 getNewRegNat F64 `thenNat` \ tmp ->
4312 promote x = FxTOy F DF x tmp
4314 pk1 = registerRep register1
4315 code1 = registerCode register1 tmp1
4316 src1 = registerName register1 tmp1
4318 pk2 = registerRep register2
4319 code2 = registerCode register2 tmp2
4320 src2 = registerName register2 tmp2
4324 code1 `appOL` code2 `snocOL`
4325 instr (primRepToSize pk) src1 src2 dst
4326 else if pk1 == F32 then
4327 code1 `snocOL` promote src1 `appOL` code2 `snocOL`
4328 instr DF tmp src2 dst
4330 code1 `appOL` code2 `snocOL` promote src2 `snocOL`
4331 instr DF src1 tmp dst
4333 return (Any (if pk1 == pk2 then pk1 else F64) code__2)
4336 trivialUCode instr x
4337 = getRegister x `thenNat` \ register ->
4338 getNewRegNat IntRep `thenNat` \ tmp ->
4340 code = registerCode register tmp
4341 src = registerName register tmp
4342 code__2 dst = code `snocOL` instr (RIReg src) dst
4344 return (Any IntRep code__2)
4347 trivialUFCode pk instr x
4348 = getRegister x `thenNat` \ register ->
4349 getNewRegNat pk `thenNat` \ tmp ->
4351 code = registerCode register tmp
4352 src = registerName register tmp
4353 code__2 dst = code `snocOL` instr src dst
4355 return (Any pk code__2)
4357 #endif /* sparc_TARGET_ARCH */
4359 #if powerpc_TARGET_ARCH
4362 Wolfgang's PowerPC version of The Rules:
4364 A slightly modified version of The Rules to take advantage of the fact
4365 that PowerPC instructions work on all registers and don't implicitly
4366 clobber any fixed registers.
4368 * The only expression for which getRegister returns Fixed is (CmmReg reg).
4370 * If getRegister returns Any, then the code it generates may modify only:
4371 (a) fresh temporaries
4372 (b) the destination register
4373 It may *not* modify global registers, unless the global
4374 register happens to be the destination register.
4375 It may not clobber any other registers. In fact, only ccalls clobber any
4377 Also, it may not modify the counter register (used by genCCall).
4379 Corollary: If a getRegister for a subexpression returns Fixed, you need
4380 not move it to a fresh temporary before evaluating the next subexpression.
4381 The Fixed register won't be modified.
4382 Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
4384 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
4385 the value of the destination register.
4388 trivialCode rep signed instr x (CmmLit (CmmInt y _))
4389 | Just imm <- makeImmediate rep signed y
4391 (src1, code1) <- getSomeReg x
4392 let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
4393 return (Any rep code)
4395 trivialCode rep signed instr x y = do
4396 (src1, code1) <- getSomeReg x
4397 (src2, code2) <- getSomeReg y
4398 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
4399 return (Any rep code)
4401 trivialCodeNoImm :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4402 -> CmmExpr -> CmmExpr -> NatM Register
4403 trivialCodeNoImm rep instr x y = do
4404 (src1, code1) <- getSomeReg x
4405 (src2, code2) <- getSomeReg y
4406 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
4407 return (Any rep code)
4409 trivialUCode rep instr x = do
4410 (src, code) <- getSomeReg x
4411 let code' dst = code `snocOL` instr dst src
4412 return (Any rep code')
4414 -- There is no "remainder" instruction on the PPC, so we have to do
4416 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
4418 remainderCode :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4419 -> CmmExpr -> CmmExpr -> NatM Register
4420 remainderCode rep div x y = do
4421 (src1, code1) <- getSomeReg x
4422 (src2, code2) <- getSomeReg y
4423 let code dst = code1 `appOL` code2 `appOL` toOL [
4425 MULLW dst dst (RIReg src2),
4428 return (Any rep code)
4430 #endif /* powerpc_TARGET_ARCH */
4433 -- -----------------------------------------------------------------------------
4434 -- Coercing to/from integer/floating-point...
4436 -- @coerce(Int2FP|FP2Int)@ are more complicated integer/float
4437 -- conversions. We have to store temporaries in memory to move
4438 -- between the integer and the floating point register sets.
4440 -- @coerceDbl2Flt@ and @coerceFlt2Dbl@ are done this way because we
4441 -- pretend, on sparc at least, that double and float regs are seperate
4442 -- kinds, so the value has to be computed into one kind before being
4443 -- explicitly "converted" to live in the other kind.
4445 coerceInt2FP :: MachRep -> MachRep -> CmmExpr -> NatM Register
4446 coerceFP2Int :: MachRep -> MachRep -> CmmExpr -> NatM Register
4448 #if sparc_TARGET_ARCH
4449 coerceDbl2Flt :: CmmExpr -> NatM Register
4450 coerceFlt2Dbl :: CmmExpr -> NatM Register
4453 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4455 #if alpha_TARGET_ARCH
4458 = getRegister x `thenNat` \ register ->
4459 getNewRegNat IntRep `thenNat` \ reg ->
4461 code = registerCode register reg
4462 src = registerName register reg
4464 code__2 dst = code . mkSeqInstrs [
4466 LD TF dst (spRel 0),
4469 return (Any F64 code__2)
4473 = getRegister x `thenNat` \ register ->
4474 getNewRegNat F64 `thenNat` \ tmp ->
4476 code = registerCode register tmp
4477 src = registerName register tmp
4479 code__2 dst = code . mkSeqInstrs [
4481 ST TF tmp (spRel 0),
4484 return (Any IntRep code__2)
4486 #endif /* alpha_TARGET_ARCH */
4488 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4490 #if i386_TARGET_ARCH
4492 coerceInt2FP from to x = do
4493 (x_reg, x_code) <- getSomeReg x
4495 opc = case to of F32 -> GITOF; F64 -> GITOD
4496 code dst = x_code `snocOL` opc x_reg dst
4497 -- ToDo: works for non-I32 reps?
4499 return (Any to code)
4503 coerceFP2Int from to x = do
4504 (x_reg, x_code) <- getSomeReg x
4506 opc = case from of F32 -> GFTOI; F64 -> GDTOI
4507 code dst = x_code `snocOL` opc x_reg dst
4508 -- ToDo: works for non-I32 reps?
4510 return (Any to code)
4512 #endif /* i386_TARGET_ARCH */
4514 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4516 #if x86_64_TARGET_ARCH
4518 coerceFP2Int from to x = do
4519 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
4521 opc = case to of F32 -> CVTSS2SI; F64 -> CVTSD2SI
4522 code dst = x_code `snocOL` opc x_op dst
4524 return (Any to code) -- works even if the destination rep is <I32
4526 coerceInt2FP from to x = do
4527 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
4529 opc = case to of F32 -> CVTSI2SS; F64 -> CVTSI2SD
4530 code dst = x_code `snocOL` opc x_op dst
4532 return (Any to code) -- works even if the destination rep is <I32
4534 coerceFP2FP :: MachRep -> CmmExpr -> NatM Register
4535 coerceFP2FP to x = do
4536 (x_reg, x_code) <- getSomeReg x
4538 opc = case to of F32 -> CVTSD2SS; F64 -> CVTSS2SD
4539 code dst = x_code `snocOL` opc x_reg dst
4541 return (Any to code)
4545 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4547 #if sparc_TARGET_ARCH
4550 = getRegister x `thenNat` \ register ->
4551 getNewRegNat IntRep `thenNat` \ reg ->
4553 code = registerCode register reg
4554 src = registerName register reg
4556 code__2 dst = code `appOL` toOL [
4557 ST W src (spRel (-2)),
4558 LD W (spRel (-2)) dst,
4559 FxTOy W (primRepToSize pk) dst dst]
4561 return (Any pk code__2)
4564 coerceFP2Int fprep x
4565 = ASSERT(fprep == F64 || fprep == F32)
4566 getRegister x `thenNat` \ register ->
4567 getNewRegNat fprep `thenNat` \ reg ->
4568 getNewRegNat F32 `thenNat` \ tmp ->
4570 code = registerCode register reg
4571 src = registerName register reg
4572 code__2 dst = code `appOL` toOL [
4573 FxTOy (primRepToSize fprep) W src tmp,
4574 ST W tmp (spRel (-2)),
4575 LD W (spRel (-2)) dst]
4577 return (Any IntRep code__2)
4581 = getRegister x `thenNat` \ register ->
4582 getNewRegNat F64 `thenNat` \ tmp ->
4583 let code = registerCode register tmp
4584 src = registerName register tmp
4587 (\dst -> code `snocOL` FxTOy DF F src dst))
4591 = getRegister x `thenNat` \ register ->
4592 getNewRegNat F32 `thenNat` \ tmp ->
4593 let code = registerCode register tmp
4594 src = registerName register tmp
4597 (\dst -> code `snocOL` FxTOy F DF src dst))
4599 #endif /* sparc_TARGET_ARCH */
4601 #if powerpc_TARGET_ARCH
4602 coerceInt2FP fromRep toRep x = do
4603 (src, code) <- getSomeReg x
4604 lbl <- getNewLabelNat
4605 itmp <- getNewRegNat I32
4606 ftmp <- getNewRegNat F64
4607 dynRef <- cmmMakeDynamicReference addImportNat False lbl
4608 Amode addr addr_code <- getAmode dynRef
4610 code' dst = code `appOL` maybe_exts `appOL` toOL [
4613 CmmStaticLit (CmmInt 0x43300000 I32),
4614 CmmStaticLit (CmmInt 0x80000000 I32)],
4615 XORIS itmp src (ImmInt 0x8000),
4616 ST I32 itmp (spRel 3),
4617 LIS itmp (ImmInt 0x4330),
4618 ST I32 itmp (spRel 2),
4619 LD F64 ftmp (spRel 2)
4620 ] `appOL` addr_code `appOL` toOL [
4622 FSUB F64 dst ftmp dst
4623 ] `appOL` maybe_frsp dst
4625 maybe_exts = case fromRep of
4626 I8 -> unitOL $ EXTS I8 src src
4627 I16 -> unitOL $ EXTS I16 src src
4629 maybe_frsp dst = case toRep of
4630 F32 -> unitOL $ FRSP dst dst
4632 return (Any toRep code')
4634 coerceFP2Int fromRep toRep x = do
4635 -- the reps don't really matter: F*->F64 and I32->I* are no-ops
4636 (src, code) <- getSomeReg x
4637 tmp <- getNewRegNat F64
4639 code' dst = code `appOL` toOL [
4640 -- convert to int in FP reg
4642 -- store value (64bit) from FP to stack
4643 ST F64 tmp (spRel 2),
4644 -- read low word of value (high word is undefined)
4645 LD I32 dst (spRel 3)]
4646 return (Any toRep code')
4647 #endif /* powerpc_TARGET_ARCH */
4650 -- -----------------------------------------------------------------------------
4651 -- eXTRA_STK_ARGS_HERE
4653 -- We (allegedly) put the first six C-call arguments in registers;
4654 -- where do we start putting the rest of them?
4656 -- Moved from MachInstrs (SDM):
4658 #if alpha_TARGET_ARCH || sparc_TARGET_ARCH
4659 eXTRA_STK_ARGS_HERE :: Int
4661 = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???))