1 -----------------------------------------------------------------------------
3 -- Generating machine code (instruction selection)
5 -- (c) The University of Glasgow 1996-2004
7 -----------------------------------------------------------------------------
9 -- This is a big module, but, if you pay attention to
10 -- (a) the sectioning, (b) the type signatures, and
11 -- (c) the #if blah_TARGET_ARCH} things, the
12 -- structure should not be too overwhelming.
14 module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where
16 #include "HsVersions.h"
17 #include "nativeGen/NCG.h"
24 import PositionIndependentCode ( cmmMakeDynamicReference, initializePicBase )
26 -- Our intermediate code:
27 import PprCmm ( pprExpr )
33 import StaticFlags ( opt_PIC )
34 import ForeignCall ( CCallConv(..) )
38 import qualified Outputable
40 import FastTypes ( isFastTrue )
41 import Constants ( wORD_SIZE )
44 import Outputable ( assertPanic )
45 import TRACE ( trace )
48 import Control.Monad ( mapAndUnzipM )
49 import Maybe ( fromJust )
53 -- -----------------------------------------------------------------------------
54 -- Top-level of the instruction selector
56 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
57 -- They are really trees of insns to facilitate fast appending, where a
58 -- left-to-right traversal (pre-order?) yields the insns in the correct
61 type InstrBlock = OrdList Instr
63 cmmTopCodeGen :: CmmTop -> NatM [NatCmmTop]
64 cmmTopCodeGen (CmmProc info lab params blocks) = do
65 (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
66 picBaseMb <- getPicBaseMaybeNat
67 let proc = CmmProc info lab params (concat nat_blocks)
68 tops = proc : concat statics
70 Just picBase -> initializePicBase picBase tops
71 Nothing -> return tops
73 cmmTopCodeGen (CmmData sec dat) = do
74 return [CmmData sec dat] -- no translation, we just use CmmStatic
76 basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop])
77 basicBlockCodeGen (BasicBlock id stmts) = do
78 instrs <- stmtsToInstrs stmts
79 -- code generation may introduce new basic block boundaries, which
80 -- are indicated by the NEWBLOCK instruction. We must split up the
81 -- instruction stream into basic blocks again. Also, we extract
84 (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
86 mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
87 = ([], BasicBlock id instrs : blocks, statics)
88 mkBlocks (LDATA sec dat) (instrs,blocks,statics)
89 = (instrs, blocks, CmmData sec dat:statics)
90 mkBlocks instr (instrs,blocks,statics)
91 = (instr:instrs, blocks, statics)
93 return (BasicBlock id top : other_blocks, statics)
95 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
97 = do instrss <- mapM stmtToInstrs stmts
98 return (concatOL instrss)
100 stmtToInstrs :: CmmStmt -> NatM InstrBlock
101 stmtToInstrs stmt = case stmt of
102 CmmNop -> return nilOL
103 CmmComment s -> return (unitOL (COMMENT s))
106 | isFloatingRep kind -> assignReg_FltCode kind reg src
107 #if WORD_SIZE_IN_BITS==32
108 | kind == I64 -> assignReg_I64Code reg src
110 | otherwise -> assignReg_IntCode kind reg src
111 where kind = cmmRegRep reg
114 | isFloatingRep kind -> assignMem_FltCode kind addr src
115 #if WORD_SIZE_IN_BITS==32
116 | kind == I64 -> assignMem_I64Code addr src
118 | otherwise -> assignMem_IntCode kind addr src
119 where kind = cmmExprRep src
121 CmmCall target result_regs args vols
122 -> genCCall target result_regs args vols
124 CmmBranch id -> genBranch id
125 CmmCondBranch arg id -> genCondJump id arg
126 CmmSwitch arg ids -> genSwitch arg ids
127 CmmJump arg params -> genJump arg
129 -- -----------------------------------------------------------------------------
130 -- General things for putting together code sequences
132 -- Expand CmmRegOff. ToDo: should we do it this way around, or convert
133 -- CmmExprs into CmmRegOff?
134 mangleIndexTree :: CmmExpr -> CmmExpr
135 mangleIndexTree (CmmRegOff reg off)
136 = CmmMachOp (MO_Add rep) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) rep)]
137 where rep = cmmRegRep reg
139 -- -----------------------------------------------------------------------------
140 -- Code gen for 64-bit arithmetic on 32-bit platforms
143 Simple support for generating 64-bit code (ie, 64 bit values and 64
144 bit assignments) on 32-bit platforms. Unlike the main code generator
145 we merely shoot for generating working code as simply as possible, and
146 pay little attention to code quality. Specifically, there is no
147 attempt to deal cleverly with the fixed-vs-floating register
148 distinction; all values are generated into (pairs of) floating
149 registers, even if this would mean some redundant reg-reg moves as a
150 result. Only one of the VRegUniques is returned, since it will be
151 of the VRegUniqueLo form, and the upper-half VReg can be determined
152 by applying getHiVRegFromLo to it.
155 data ChildCode64 -- a.k.a "Register64"
158 Reg -- the lower 32-bit temporary which contains the
159 -- result; use getHiVRegFromLo to find the other
160 -- VRegUnique. Rules of this simplified insn
161 -- selection game are therefore that the returned
162 -- Reg may be modified
164 #if WORD_SIZE_IN_BITS==32
165 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
166 assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
169 #ifndef x86_64_TARGET_ARCH
170 iselExpr64 :: CmmExpr -> NatM ChildCode64
173 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
177 assignMem_I64Code addrTree valueTree = do
178 Amode addr addr_code <- getAmode addrTree
179 ChildCode64 vcode rlo <- iselExpr64 valueTree
181 rhi = getHiVRegFromLo rlo
183 -- Little-endian store
184 mov_lo = MOV I32 (OpReg rlo) (OpAddr addr)
185 mov_hi = MOV I32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
187 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
190 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
191 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
193 r_dst_lo = mkVReg u_dst I32
194 r_dst_hi = getHiVRegFromLo r_dst_lo
195 r_src_hi = getHiVRegFromLo r_src_lo
196 mov_lo = MOV I32 (OpReg r_src_lo) (OpReg r_dst_lo)
197 mov_hi = MOV I32 (OpReg r_src_hi) (OpReg r_dst_hi)
200 vcode `snocOL` mov_lo `snocOL` mov_hi
203 assignReg_I64Code lvalue valueTree
204 = panic "assignReg_I64Code(i386): invalid lvalue"
208 iselExpr64 (CmmLit (CmmInt i _)) = do
209 (rlo,rhi) <- getNewRegPairNat I32
211 r = fromIntegral (fromIntegral i :: Word32)
212 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
214 MOV I32 (OpImm (ImmInteger r)) (OpReg rlo),
215 MOV I32 (OpImm (ImmInteger q)) (OpReg rhi)
218 return (ChildCode64 code rlo)
220 iselExpr64 (CmmLoad addrTree I64) = do
221 Amode addr addr_code <- getAmode addrTree
222 (rlo,rhi) <- getNewRegPairNat I32
224 mov_lo = MOV I32 (OpAddr addr) (OpReg rlo)
225 mov_hi = MOV I32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
228 ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
232 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64)))
233 = return (ChildCode64 nilOL (mkVReg vu I32))
235 -- we handle addition, but rather badly
236 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
237 ChildCode64 code1 r1lo <- iselExpr64 e1
238 (rlo,rhi) <- getNewRegPairNat I32
240 r = fromIntegral (fromIntegral i :: Word32)
241 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
242 r1hi = getHiVRegFromLo r1lo
244 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
245 ADD I32 (OpImm (ImmInteger r)) (OpReg rlo),
246 MOV I32 (OpReg r1hi) (OpReg rhi),
247 ADC I32 (OpImm (ImmInteger q)) (OpReg rhi) ]
249 return (ChildCode64 code rlo)
251 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
252 ChildCode64 code1 r1lo <- iselExpr64 e1
253 ChildCode64 code2 r2lo <- iselExpr64 e2
254 (rlo,rhi) <- getNewRegPairNat I32
256 r1hi = getHiVRegFromLo r1lo
257 r2hi = getHiVRegFromLo r2lo
260 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
261 ADD I32 (OpReg r2lo) (OpReg rlo),
262 MOV I32 (OpReg r1hi) (OpReg rhi),
263 ADC I32 (OpReg r2hi) (OpReg rhi) ]
265 return (ChildCode64 code rlo)
268 = pprPanic "iselExpr64(i386)" (ppr expr)
270 #endif /* i386_TARGET_ARCH */
272 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
274 #if sparc_TARGET_ARCH
276 assignMem_I64Code addrTree valueTree
277 = iselExpr64 valueTree `thenNat` \ (ChildCode64 vcode vrlo) ->
278 getRegister addrTree `thenNat` \ register_addr ->
279 getNewRegNat IntRep `thenNat` \ t_addr ->
280 let rlo = VirtualRegI vrlo
281 rhi = getHiVRegFromLo rlo
282 code_addr = registerCode register_addr t_addr
283 reg_addr = registerName register_addr t_addr
285 mov_hi = ST W rhi (AddrRegImm reg_addr (ImmInt 0))
286 mov_lo = ST W rlo (AddrRegImm reg_addr (ImmInt 4))
288 return (vcode `appOL` code_addr `snocOL` mov_hi `snocOL` mov_lo)
291 assignReg_I64Code (StixTemp (StixVReg u_dst pk)) valueTree
292 = iselExpr64 valueTree `thenNat` \ (ChildCode64 vcode vr_src_lo) ->
294 r_dst_lo = mkVReg u_dst IntRep
295 r_src_lo = VirtualRegI vr_src_lo
296 r_dst_hi = getHiVRegFromLo r_dst_lo
297 r_src_hi = getHiVRegFromLo r_src_lo
298 mov_lo = mkMOV r_src_lo r_dst_lo
299 mov_hi = mkMOV r_src_hi r_dst_hi
300 mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
303 vcode `snocOL` mov_hi `snocOL` mov_lo
305 assignReg_I64Code lvalue valueTree
306 = pprPanic "assignReg_I64Code(sparc): invalid lvalue"
310 -- Don't delete this -- it's very handy for debugging.
312 -- | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
313 -- = panic "iselExpr64(???)"
315 iselExpr64 (CmmLoad I64 addrTree)
316 = getRegister addrTree `thenNat` \ register_addr ->
317 getNewRegNat IntRep `thenNat` \ t_addr ->
318 getNewRegNat IntRep `thenNat` \ rlo ->
319 let rhi = getHiVRegFromLo rlo
320 code_addr = registerCode register_addr t_addr
321 reg_addr = registerName register_addr t_addr
322 mov_hi = LD W (AddrRegImm reg_addr (ImmInt 0)) rhi
323 mov_lo = LD W (AddrRegImm reg_addr (ImmInt 4)) rlo
326 ChildCode64 (code_addr `snocOL` mov_hi `snocOL` mov_lo)
330 iselExpr64 (CmmReg (CmmLocal (LocalReg uq I64)))
331 = getNewRegNat IntRep `thenNat` \ r_dst_lo ->
332 let r_dst_hi = getHiVRegFromLo r_dst_lo
333 r_src_lo = mkVReg vu IntRep
334 r_src_hi = getHiVRegFromLo r_src_lo
335 mov_lo = mkMOV r_src_lo r_dst_lo
336 mov_hi = mkMOV r_src_hi r_dst_hi
337 mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
340 ChildCode64 (toOL [mov_hi, mov_lo]) (getVRegUnique r_dst_lo)
343 iselExpr64 (StCall fn cconv I64 args)
344 = genCCall fn cconv kind args `thenNat` \ call ->
345 getNewRegNat IntRep `thenNat` \ r_dst_lo ->
346 let r_dst_hi = getHiVRegFromLo r_dst_lo
347 mov_lo = mkMOV o0 r_dst_lo
348 mov_hi = mkMOV o1 r_dst_hi
349 mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
352 ChildCode64 (call `snocOL` mov_hi `snocOL` mov_lo)
353 (getVRegUnique r_dst_lo)
357 = pprPanic "iselExpr64(sparc)" (pprCmmExpr expr)
359 #endif /* sparc_TARGET_ARCH */
361 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
363 #if powerpc_TARGET_ARCH
365 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
366 getI64Amodes addrTree = do
367 Amode hi_addr addr_code <- getAmode addrTree
368 case addrOffset hi_addr 4 of
369 Just lo_addr -> return (hi_addr, lo_addr, addr_code)
370 Nothing -> do (hi_ptr, code) <- getSomeReg addrTree
371 return (AddrRegImm hi_ptr (ImmInt 0),
372 AddrRegImm hi_ptr (ImmInt 4),
375 assignMem_I64Code addrTree valueTree = do
376 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
377 ChildCode64 vcode rlo <- iselExpr64 valueTree
379 rhi = getHiVRegFromLo rlo
382 mov_hi = ST I32 rhi hi_addr
383 mov_lo = ST I32 rlo lo_addr
385 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
387 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
388 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
390 r_dst_lo = mkVReg u_dst I32
391 r_dst_hi = getHiVRegFromLo r_dst_lo
392 r_src_hi = getHiVRegFromLo r_src_lo
393 mov_lo = MR r_dst_lo r_src_lo
394 mov_hi = MR r_dst_hi r_src_hi
397 vcode `snocOL` mov_lo `snocOL` mov_hi
400 assignReg_I64Code lvalue valueTree
401 = panic "assignReg_I64Code(powerpc): invalid lvalue"
404 -- Don't delete this -- it's very handy for debugging.
406 -- | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
407 -- = panic "iselExpr64(???)"
409 iselExpr64 (CmmLoad addrTree I64) = do
410 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
411 (rlo, rhi) <- getNewRegPairNat I32
412 let mov_hi = LD I32 rhi hi_addr
413 mov_lo = LD I32 rlo lo_addr
414 return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
417 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64)))
418 = return (ChildCode64 nilOL (mkVReg vu I32))
420 iselExpr64 (CmmLit (CmmInt i _)) = do
421 (rlo,rhi) <- getNewRegPairNat I32
423 half0 = fromIntegral (fromIntegral i :: Word16)
424 half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
425 half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
426 half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
429 LIS rlo (ImmInt half1),
430 OR rlo rlo (RIImm $ ImmInt half0),
431 LIS rhi (ImmInt half3),
432 OR rlo rlo (RIImm $ ImmInt half2)
435 return (ChildCode64 code rlo)
437 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
438 ChildCode64 code1 r1lo <- iselExpr64 e1
439 ChildCode64 code2 r2lo <- iselExpr64 e2
440 (rlo,rhi) <- getNewRegPairNat I32
442 r1hi = getHiVRegFromLo r1lo
443 r2hi = getHiVRegFromLo r2lo
446 toOL [ ADDC rlo r1lo r2lo,
449 return (ChildCode64 code rlo)
452 = pprPanic "iselExpr64(powerpc)" (ppr expr)
454 #endif /* powerpc_TARGET_ARCH */
457 -- -----------------------------------------------------------------------------
458 -- The 'Register' type
460 -- 'Register's passed up the tree. If the stix code forces the register
461 -- to live in a pre-decided machine register, it comes out as @Fixed@;
462 -- otherwise, it comes out as @Any@, and the parent can decide which
463 -- register to put it in.
466 = Fixed MachRep Reg InstrBlock
467 | Any MachRep (Reg -> InstrBlock)
469 swizzleRegisterRep :: Register -> MachRep -> Register
470 swizzleRegisterRep (Fixed _ reg code) rep = Fixed rep reg code
471 swizzleRegisterRep (Any _ codefn) rep = Any rep codefn
474 -- -----------------------------------------------------------------------------
475 -- Utils based on getRegister, below
477 -- The dual to getAnyReg: compute an expression into a register, but
478 -- we don't mind which one it is.
479 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
481 r <- getRegister expr
484 tmp <- getNewRegNat rep
485 return (tmp, code tmp)
489 -- -----------------------------------------------------------------------------
490 -- Grab the Reg for a CmmReg
492 getRegisterReg :: CmmReg -> Reg
494 getRegisterReg (CmmLocal (LocalReg u pk))
497 getRegisterReg (CmmGlobal mid)
498 = case get_GlobalReg_reg_or_addr mid of
499 Left (RealReg rrno) -> RealReg rrno
500 _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
501 -- By this stage, the only MagicIds remaining should be the
502 -- ones which map to a real machine register on this
503 -- platform. Hence ...
506 -- -----------------------------------------------------------------------------
507 -- Generate code to get a subtree into a Register
509 -- Don't delete this -- it's very handy for debugging.
511 -- | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False
512 -- = panic "getRegister(???)"
514 getRegister :: CmmExpr -> NatM Register
516 getRegister (CmmReg reg)
517 = return (Fixed (cmmRegRep reg) (getRegisterReg reg) nilOL)
519 getRegister tree@(CmmRegOff _ _)
520 = getRegister (mangleIndexTree tree)
522 getRegister CmmPicBaseReg
524 reg <- getPicBaseNat wordRep
525 return (Fixed wordRep reg nilOL)
527 -- end of machine-"independent" bit; here we go on the rest...
529 #if alpha_TARGET_ARCH
531 getRegister (StDouble d)
532 = getBlockIdNat `thenNat` \ lbl ->
533 getNewRegNat PtrRep `thenNat` \ tmp ->
534 let code dst = mkSeqInstrs [
535 LDATA RoDataSegment lbl [
536 DATA TF [ImmLab (rational d)]
538 LDA tmp (AddrImm (ImmCLbl lbl)),
539 LD TF dst (AddrReg tmp)]
541 return (Any F64 code)
543 getRegister (StPrim primop [x]) -- unary PrimOps
545 IntNegOp -> trivialUCode (NEG Q False) x
547 NotOp -> trivialUCode NOT x
549 FloatNegOp -> trivialUFCode FloatRep (FNEG TF) x
550 DoubleNegOp -> trivialUFCode F64 (FNEG TF) x
552 OrdOp -> coerceIntCode IntRep x
555 Float2IntOp -> coerceFP2Int x
556 Int2FloatOp -> coerceInt2FP pr x
557 Double2IntOp -> coerceFP2Int x
558 Int2DoubleOp -> coerceInt2FP pr x
560 Double2FloatOp -> coerceFltCode x
561 Float2DoubleOp -> coerceFltCode x
563 other_op -> getRegister (StCall fn CCallConv F64 [x])
565 fn = case other_op of
566 FloatExpOp -> FSLIT("exp")
567 FloatLogOp -> FSLIT("log")
568 FloatSqrtOp -> FSLIT("sqrt")
569 FloatSinOp -> FSLIT("sin")
570 FloatCosOp -> FSLIT("cos")
571 FloatTanOp -> FSLIT("tan")
572 FloatAsinOp -> FSLIT("asin")
573 FloatAcosOp -> FSLIT("acos")
574 FloatAtanOp -> FSLIT("atan")
575 FloatSinhOp -> FSLIT("sinh")
576 FloatCoshOp -> FSLIT("cosh")
577 FloatTanhOp -> FSLIT("tanh")
578 DoubleExpOp -> FSLIT("exp")
579 DoubleLogOp -> FSLIT("log")
580 DoubleSqrtOp -> FSLIT("sqrt")
581 DoubleSinOp -> FSLIT("sin")
582 DoubleCosOp -> FSLIT("cos")
583 DoubleTanOp -> FSLIT("tan")
584 DoubleAsinOp -> FSLIT("asin")
585 DoubleAcosOp -> FSLIT("acos")
586 DoubleAtanOp -> FSLIT("atan")
587 DoubleSinhOp -> FSLIT("sinh")
588 DoubleCoshOp -> FSLIT("cosh")
589 DoubleTanhOp -> FSLIT("tanh")
591 pr = panic "MachCode.getRegister: no primrep needed for Alpha"
593 getRegister (StPrim primop [x, y]) -- dyadic PrimOps
595 CharGtOp -> trivialCode (CMP LTT) y x
596 CharGeOp -> trivialCode (CMP LE) y x
597 CharEqOp -> trivialCode (CMP EQQ) x y
598 CharNeOp -> int_NE_code x y
599 CharLtOp -> trivialCode (CMP LTT) x y
600 CharLeOp -> trivialCode (CMP LE) x y
602 IntGtOp -> trivialCode (CMP LTT) y x
603 IntGeOp -> trivialCode (CMP LE) y x
604 IntEqOp -> trivialCode (CMP EQQ) x y
605 IntNeOp -> int_NE_code x y
606 IntLtOp -> trivialCode (CMP LTT) x y
607 IntLeOp -> trivialCode (CMP LE) x y
609 WordGtOp -> trivialCode (CMP ULT) y x
610 WordGeOp -> trivialCode (CMP ULE) x y
611 WordEqOp -> trivialCode (CMP EQQ) x y
612 WordNeOp -> int_NE_code x y
613 WordLtOp -> trivialCode (CMP ULT) x y
614 WordLeOp -> trivialCode (CMP ULE) x y
616 AddrGtOp -> trivialCode (CMP ULT) y x
617 AddrGeOp -> trivialCode (CMP ULE) y x
618 AddrEqOp -> trivialCode (CMP EQQ) x y
619 AddrNeOp -> int_NE_code x y
620 AddrLtOp -> trivialCode (CMP ULT) x y
621 AddrLeOp -> trivialCode (CMP ULE) x y
623 FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
624 FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
625 FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
626 FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
627 FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
628 FloatLeOp -> cmpF_code (FCMP TF LE) NE x y
630 DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
631 DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
632 DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
633 DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
634 DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
635 DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y
637 IntAddOp -> trivialCode (ADD Q False) x y
638 IntSubOp -> trivialCode (SUB Q False) x y
639 IntMulOp -> trivialCode (MUL Q False) x y
640 IntQuotOp -> trivialCode (DIV Q False) x y
641 IntRemOp -> trivialCode (REM Q False) x y
643 WordAddOp -> trivialCode (ADD Q False) x y
644 WordSubOp -> trivialCode (SUB Q False) x y
645 WordMulOp -> trivialCode (MUL Q False) x y
646 WordQuotOp -> trivialCode (DIV Q True) x y
647 WordRemOp -> trivialCode (REM Q True) x y
649 FloatAddOp -> trivialFCode FloatRep (FADD TF) x y
650 FloatSubOp -> trivialFCode FloatRep (FSUB TF) x y
651 FloatMulOp -> trivialFCode FloatRep (FMUL TF) x y
652 FloatDivOp -> trivialFCode FloatRep (FDIV TF) x y
654 DoubleAddOp -> trivialFCode F64 (FADD TF) x y
655 DoubleSubOp -> trivialFCode F64 (FSUB TF) x y
656 DoubleMulOp -> trivialFCode F64 (FMUL TF) x y
657 DoubleDivOp -> trivialFCode F64 (FDIV TF) x y
659 AddrAddOp -> trivialCode (ADD Q False) x y
660 AddrSubOp -> trivialCode (SUB Q False) x y
661 AddrRemOp -> trivialCode (REM Q True) x y
663 AndOp -> trivialCode AND x y
664 OrOp -> trivialCode OR x y
665 XorOp -> trivialCode XOR x y
666 SllOp -> trivialCode SLL x y
667 SrlOp -> trivialCode SRL x y
669 ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
670 ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
671 ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"
673 FloatPowerOp -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
674 DoublePowerOp -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
676 {- ------------------------------------------------------------
677 Some bizarre special code for getting condition codes into
678 registers. Integer non-equality is a test for equality
679 followed by an XOR with 1. (Integer comparisons always set
680 the result register to 0 or 1.) Floating point comparisons of
681 any kind leave the result in a floating point register, so we
682 need to wrangle an integer register out of things.
684 int_NE_code :: StixTree -> StixTree -> NatM Register
687 = trivialCode (CMP EQQ) x y `thenNat` \ register ->
688 getNewRegNat IntRep `thenNat` \ tmp ->
690 code = registerCode register tmp
691 src = registerName register tmp
692 code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
694 return (Any IntRep code__2)
696 {- ------------------------------------------------------------
697 Comments for int_NE_code also apply to cmpF_code
700 :: (Reg -> Reg -> Reg -> Instr)
702 -> StixTree -> StixTree
705 cmpF_code instr cond x y
706 = trivialFCode pr instr x y `thenNat` \ register ->
707 getNewRegNat F64 `thenNat` \ tmp ->
708 getBlockIdNat `thenNat` \ lbl ->
710 code = registerCode register tmp
711 result = registerName register tmp
713 code__2 dst = code . mkSeqInstrs [
714 OR zeroh (RIImm (ImmInt 1)) dst,
715 BF cond result (ImmCLbl lbl),
716 OR zeroh (RIReg zeroh) dst,
719 return (Any IntRep code__2)
721 pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
722 ------------------------------------------------------------
724 getRegister (CmmLoad pk mem)
725 = getAmode mem `thenNat` \ amode ->
727 code = amodeCode amode
728 src = amodeAddr amode
729 size = primRepToSize pk
730 code__2 dst = code . mkSeqInstr (LD size dst src)
732 return (Any pk code__2)
734 getRegister (StInt i)
737 code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
739 return (Any IntRep code)
742 code dst = mkSeqInstr (LDI Q dst src)
744 return (Any IntRep code)
746 src = ImmInt (fromInteger i)
751 code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
753 return (Any PtrRep code)
756 imm__2 = case imm of Just x -> x
758 #endif /* alpha_TARGET_ARCH */
760 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
764 getRegister (CmmLit (CmmFloat f F32)) = do
765 lbl <- getNewLabelNat
766 let code dst = toOL [
769 CmmStaticLit (CmmFloat f F32)],
770 GLD F32 (ImmAddr (ImmCLbl lbl) 0) dst
773 return (Any F32 code)
776 getRegister (CmmLit (CmmFloat d F64))
778 = let code dst = unitOL (GLDZ dst)
779 in return (Any F64 code)
782 = let code dst = unitOL (GLD1 dst)
783 in return (Any F64 code)
786 lbl <- getNewLabelNat
787 let code dst = toOL [
790 CmmStaticLit (CmmFloat d F64)],
791 GLD F64 (ImmAddr (ImmCLbl lbl) 0) dst
794 return (Any F64 code)
796 #endif /* i386_TARGET_ARCH */
798 #if x86_64_TARGET_ARCH
800 getRegister (CmmLit (CmmFloat 0.0 rep)) = do
801 let code dst = unitOL (XOR rep (OpReg dst) (OpReg dst))
802 -- I don't know why there are xorpd, xorps, and pxor instructions.
803 -- They all appear to do the same thing --SDM
804 return (Any rep code)
806 getRegister (CmmLit (CmmFloat f rep)) = do
807 lbl <- getNewLabelNat
808 let code dst = toOL [
811 CmmStaticLit (CmmFloat f rep)],
812 MOV rep (OpAddr (ImmAddr (ImmCLbl lbl) 0)) (OpReg dst)
813 -- ToDo: should use %rip-relative
816 return (Any rep code)
818 #endif /* x86_64_TARGET_ARCH */
820 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
822 -- catch simple cases of zero- or sign-extended load
823 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad addr _]) = do
824 code <- intLoadCode (MOVZxL I8) addr
825 return (Any I32 code)
827 getRegister (CmmMachOp (MO_S_Conv I8 I32) [CmmLoad addr _]) = do
828 code <- intLoadCode (MOVSxL I8) addr
829 return (Any I32 code)
831 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad addr _]) = do
832 code <- intLoadCode (MOVZxL I16) addr
833 return (Any I32 code)
835 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad addr _]) = do
836 code <- intLoadCode (MOVSxL I16) addr
837 return (Any I32 code)
841 #if x86_64_TARGET_ARCH
843 -- catch simple cases of zero- or sign-extended load
844 getRegister (CmmMachOp (MO_U_Conv I8 I64) [CmmLoad addr _]) = do
845 code <- intLoadCode (MOVZxL I8) addr
846 return (Any I64 code)
848 getRegister (CmmMachOp (MO_S_Conv I8 I64) [CmmLoad addr _]) = do
849 code <- intLoadCode (MOVSxL I8) addr
850 return (Any I64 code)
852 getRegister (CmmMachOp (MO_U_Conv I16 I64) [CmmLoad addr _]) = do
853 code <- intLoadCode (MOVZxL I16) addr
854 return (Any I64 code)
856 getRegister (CmmMachOp (MO_S_Conv I16 I64) [CmmLoad addr _]) = do
857 code <- intLoadCode (MOVSxL I16) addr
858 return (Any I64 code)
860 getRegister (CmmMachOp (MO_U_Conv I32 I64) [CmmLoad addr _]) = do
861 code <- intLoadCode (MOV I32) addr -- 32-bit loads zero-extend
862 return (Any I64 code)
864 getRegister (CmmMachOp (MO_S_Conv I32 I64) [CmmLoad addr _]) = do
865 code <- intLoadCode (MOVSxL I32) addr
866 return (Any I64 code)
870 #if x86_64_TARGET_ARCH
871 getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do
872 lbl <- getNewLabelNat
875 -- This is how gcc does it, so it can't be that bad:
876 LDATA ReadOnlyData16 [
879 CmmStaticLit (CmmInt 0x80000000 I32),
880 CmmStaticLit (CmmInt 0 I32),
881 CmmStaticLit (CmmInt 0 I32),
882 CmmStaticLit (CmmInt 0 I32)
884 XOR F32 (OpAddr (ImmAddr (ImmCLbl lbl) 0)) (OpReg dst)
885 -- xorps, so we need the 128-bit constant
886 -- ToDo: rip-relative
889 return (Any F32 code)
891 getRegister (CmmMachOp (MO_S_Neg F64) [x]) = do
892 lbl <- getNewLabelNat
894 -- This is how gcc does it, so it can't be that bad:
896 LDATA ReadOnlyData16 [
899 CmmStaticLit (CmmInt 0x8000000000000000 I64),
900 CmmStaticLit (CmmInt 0 I64)
902 -- gcc puts an unpck here. Wonder if we need it.
903 XOR F64 (OpAddr (ImmAddr (ImmCLbl lbl) 0)) (OpReg dst)
904 -- xorpd, so we need the 128-bit constant
905 -- ToDo: rip-relative
908 return (Any F64 code)
911 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
913 getRegister (CmmMachOp mop [x]) -- unary MachOps
916 MO_S_Neg F32 -> trivialUFCode F32 (GNEG F32) x
917 MO_S_Neg F64 -> trivialUFCode F64 (GNEG F64) x
920 MO_S_Neg rep -> trivialUCode rep (NEGI rep) x
921 MO_Not rep -> trivialUCode rep (NOT rep) x
924 -- TODO: these are only nops if the arg is not a fixed register that
925 -- can't be byte-addressed.
926 MO_U_Conv I32 I8 -> conversionNop I32 x
927 MO_S_Conv I32 I8 -> conversionNop I32 x
928 MO_U_Conv I16 I8 -> conversionNop I16 x
929 MO_S_Conv I16 I8 -> conversionNop I16 x
930 MO_U_Conv I32 I16 -> conversionNop I32 x
931 MO_S_Conv I32 I16 -> conversionNop I32 x
932 #if x86_64_TARGET_ARCH
933 MO_U_Conv I64 I32 -> conversionNop I64 x
934 MO_S_Conv I64 I32 -> conversionNop I64 x
935 MO_U_Conv I64 I16 -> conversionNop I64 x
936 MO_S_Conv I64 I16 -> conversionNop I64 x
937 MO_U_Conv I64 I8 -> conversionNop I64 x
938 MO_S_Conv I64 I8 -> conversionNop I64 x
941 MO_U_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
942 MO_S_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
945 MO_U_Conv I8 I32 -> integerExtend I8 I32 MOVZxL x
946 MO_U_Conv I16 I32 -> integerExtend I16 I32 MOVZxL x
947 MO_U_Conv I8 I16 -> integerExtend I8 I16 MOVZxL x
949 MO_S_Conv I8 I32 -> integerExtend I8 I32 MOVSxL x
950 MO_S_Conv I16 I32 -> integerExtend I16 I32 MOVSxL x
951 MO_S_Conv I8 I16 -> integerExtend I8 I16 MOVSxL x
953 #if x86_64_TARGET_ARCH
954 MO_U_Conv I8 I64 -> integerExtend I8 I64 MOVZxL x
955 MO_U_Conv I16 I64 -> integerExtend I16 I64 MOVZxL x
956 MO_U_Conv I32 I64 -> integerExtend I32 I64 MOVZxL x
957 MO_S_Conv I8 I64 -> integerExtend I8 I64 MOVSxL x
958 MO_S_Conv I16 I64 -> integerExtend I16 I64 MOVSxL x
959 MO_S_Conv I32 I64 -> integerExtend I32 I64 MOVSxL x
960 -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
961 -- However, we don't want the register allocator to throw it
962 -- away as an unnecessary reg-to-reg move, so we keep it in
963 -- the form of a movzl and print it as a movl later.
967 MO_S_Conv F32 F64 -> conversionNop F64 x
968 MO_S_Conv F64 F32 -> conversionNop F32 x
970 MO_S_Conv F32 F64 -> coerceFP2FP F64 x
971 MO_S_Conv F64 F32 -> coerceFP2FP F32 x
975 | isFloatingRep from -> coerceFP2Int from to x
976 | isFloatingRep to -> coerceInt2FP from to x
978 other -> pprPanic "getRegister" (pprMachOp mop)
980 -- signed or unsigned extension.
981 integerExtend from to instr expr = do
982 (reg,e_code) <- if from == I8 then getByteReg expr
987 instr from (OpReg reg) (OpReg dst)
990 conversionNop new_rep expr
991 = do e_code <- getRegister expr
992 return (swizzleRegisterRep e_code new_rep)
995 getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
996 = ASSERT2(cmmExprRep x /= I8, pprExpr e)
998 MO_Eq F32 -> condFltReg EQQ x y
999 MO_Ne F32 -> condFltReg NE x y
1000 MO_S_Gt F32 -> condFltReg GTT x y
1001 MO_S_Ge F32 -> condFltReg GE x y
1002 MO_S_Lt F32 -> condFltReg LTT x y
1003 MO_S_Le F32 -> condFltReg LE x y
1005 MO_Eq F64 -> condFltReg EQQ x y
1006 MO_Ne F64 -> condFltReg NE x y
1007 MO_S_Gt F64 -> condFltReg GTT x y
1008 MO_S_Ge F64 -> condFltReg GE x y
1009 MO_S_Lt F64 -> condFltReg LTT x y
1010 MO_S_Le F64 -> condFltReg LE x y
1012 MO_Eq rep -> condIntReg EQQ x y
1013 MO_Ne rep -> condIntReg NE x y
1015 MO_S_Gt rep -> condIntReg GTT x y
1016 MO_S_Ge rep -> condIntReg GE x y
1017 MO_S_Lt rep -> condIntReg LTT x y
1018 MO_S_Le rep -> condIntReg LE x y
1020 MO_U_Gt rep -> condIntReg GU x y
1021 MO_U_Ge rep -> condIntReg GEU x y
1022 MO_U_Lt rep -> condIntReg LU x y
1023 MO_U_Le rep -> condIntReg LEU x y
1025 #if i386_TARGET_ARCH
1026 MO_Add F32 -> trivialFCode F32 GADD x y
1027 MO_Sub F32 -> trivialFCode F32 GSUB x y
1029 MO_Add F64 -> trivialFCode F64 GADD x y
1030 MO_Sub F64 -> trivialFCode F64 GSUB x y
1032 MO_S_Quot F32 -> trivialFCode F32 GDIV x y
1033 MO_S_Quot F64 -> trivialFCode F64 GDIV x y
1036 #if x86_64_TARGET_ARCH
1037 MO_Add F32 -> trivialFCode F32 ADD x y
1038 MO_Sub F32 -> trivialFCode F32 SUB x y
1040 MO_Add F64 -> trivialFCode F64 ADD x y
1041 MO_Sub F64 -> trivialFCode F64 SUB x y
1043 MO_S_Quot F32 -> trivialFCode F32 FDIV x y
1044 MO_S_Quot F64 -> trivialFCode F64 FDIV x y
1047 MO_Add rep -> add_code rep x y
1048 MO_Sub rep -> sub_code rep x y
1050 MO_S_Quot rep -> div_code rep True True x y
1051 MO_S_Rem rep -> div_code rep True False x y
1052 MO_U_Quot rep -> div_code rep False True x y
1053 MO_U_Rem rep -> div_code rep False False x y
1055 #if i386_TARGET_ARCH
1056 MO_Mul F32 -> trivialFCode F32 GMUL x y
1057 MO_Mul F64 -> trivialFCode F64 GMUL x y
1060 #if x86_64_TARGET_ARCH
1061 MO_Mul F32 -> trivialFCode F32 MUL x y
1062 MO_Mul F64 -> trivialFCode F64 MUL x y
1065 MO_Mul rep -> let op = IMUL rep in
1066 trivialCode rep op (Just op) x y
1068 MO_S_MulMayOflo rep -> imulMayOflo rep x y
1070 MO_And rep -> let op = AND rep in
1071 trivialCode rep op (Just op) x y
1072 MO_Or rep -> let op = OR rep in
1073 trivialCode rep op (Just op) x y
1074 MO_Xor rep -> let op = XOR rep in
1075 trivialCode rep op (Just op) x y
1077 {- Shift ops on x86s have constraints on their source, it
1078 either has to be Imm, CL or 1
1079 => trivialCode is not restrictive enough (sigh.)
1081 MO_Shl rep -> shift_code rep (SHL rep) x y {-False-}
1082 MO_U_Shr rep -> shift_code rep (SHR rep) x y {-False-}
1083 MO_S_Shr rep -> shift_code rep (SAR rep) x y {-False-}
1085 other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
1087 --------------------
1088 imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1089 imulMayOflo rep a b = do
1090 res_lo <- getNewRegNat rep
1091 res_hi <- getNewRegNat rep
1092 (a_reg, a_code) <- getNonClobberedReg a
1093 (b_reg, b_code) <- getSomeReg b
1095 code dst = a_code `appOL` b_code `appOL`
1097 MOV rep (OpReg a_reg) (OpReg res_hi),
1098 MOV rep (OpReg b_reg) (OpReg res_lo),
1099 IMUL64 res_hi res_lo, -- result in res_hi:res_lo
1100 SAR rep (OpImm (ImmInt 31)) (OpReg res_lo), -- sign extend lower part
1101 SUB rep (OpReg res_hi) (OpReg res_lo), -- compare against upper
1102 MOV rep (OpReg res_lo) (OpReg dst)
1103 -- dst==0 if high part == sign extended low part
1106 return (Any rep code)
1108 --------------------
1109 shift_code :: MachRep
1110 -> (Operand -> Operand -> Instr)
1115 {- Case1: shift length as immediate -}
1116 shift_code rep instr x y@(CmmLit lit) = do
1117 x_code <- getAnyReg x
1120 = x_code dst `snocOL`
1121 instr (OpImm (litToImm lit)) (OpReg dst)
1123 return (Any rep code)
1125 {- Case2: shift length is complex (non-immediate) -}
1126 shift_code rep instr x y{-amount-} = do
1127 (x_reg, x_code) <- getNonClobberedReg x
1128 y_code <- getAnyReg y
1130 code = x_code `appOL`
1132 instr (OpReg ecx) (OpReg x_reg)
1134 return (Fixed rep x_reg code)
1136 --------------------
1137 add_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1138 add_code rep x (CmmLit (CmmInt y _)) = add_int rep x y
1139 add_code rep x y = trivialCode rep (ADD rep) (Just (ADD rep)) x y
1141 --------------------
1142 sub_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1143 sub_code rep x (CmmLit (CmmInt y _)) = add_int rep x (-y)
1144 sub_code rep x y = trivialCode rep (SUB rep) Nothing x y
1146 -- our three-operand add instruction:
1147 add_int rep x y = do
1148 (x_reg, x_code) <- getSomeReg x
1150 imm = ImmInt (fromInteger y)
1154 (OpAddr (AddrBaseIndex (Just x_reg) Nothing imm))
1157 return (Any rep code)
1159 ----------------------
1160 div_code rep signed quotient x y = do
1161 (y_op, y_code) <- getOperand y -- cannot be clobbered
1162 x_code <- getAnyReg x
1164 widen | signed = CLTD rep
1165 | otherwise = XOR rep (OpReg edx) (OpReg edx)
1167 instr | signed = IDIV
1170 code = y_code `appOL`
1172 toOL [widen, instr rep y_op]
1174 result | quotient = eax
1178 return (Fixed rep result code)
1181 getRegister (CmmLoad mem pk)
1184 Amode src mem_code <- getAmode mem
1186 code dst = mem_code `snocOL`
1187 IF_ARCH_i386(GLD pk src dst,
1188 MOV pk (OpAddr src) (OpReg dst))
1190 return (Any pk code)
1192 #if i386_TARGET_ARCH
1193 getRegister (CmmLoad mem pk)
1196 code <- intLoadCode (instr pk) mem
1197 return (Any pk code)
1199 instr I8 = MOVZxL pk
1202 -- we always zero-extend 8-bit loads, if we
1203 -- can't think of anything better. This is because
1204 -- we can't guarantee access to an 8-bit variant of every register
1205 -- (esi and edi don't have 8-bit variants), so to make things
1206 -- simpler we do our 8-bit arithmetic with full 32-bit registers.
1209 #if x86_64_TARGET_ARCH
1210 -- Simpler memory load code on x86_64
1211 getRegister (CmmLoad mem pk)
1213 code <- intLoadCode (MOV pk) mem
1214 return (Any pk code)
1217 getRegister (CmmLit (CmmInt 0 rep))
1220 = unitOL (XOR rep (OpReg dst) (OpReg dst))
1222 return (Any rep code)
1224 getRegister (CmmLit lit)
1228 code dst = unitOL (MOV rep (OpImm imm) (OpReg dst))
1230 return (Any rep code)
1232 getRegister other = pprPanic "getRegister(x86)" (ppr other)
1235 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
1236 -> NatM (Reg -> InstrBlock)
1237 intLoadCode instr mem = do
1238 Amode src mem_code <- getAmode mem
1239 return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
1241 -- Compute an expression into *any* register, adding the appropriate
1242 -- move instruction if necessary.
1243 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
1245 r <- getRegister expr
1248 anyReg :: Register -> NatM (Reg -> InstrBlock)
1249 anyReg (Any _ code) = return code
1250 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
1252 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
1253 -- Fixed registers might not be byte-addressable, so we make sure we've
1254 -- got a temporary, inserting an extra reg copy if necessary.
1255 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
1256 #if x86_64_TARGET_ARCH
1257 getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
1259 getByteReg expr = do
1260 r <- getRegister expr
1263 tmp <- getNewRegNat rep
1264 return (tmp, code tmp)
1266 | isVirtualReg reg -> return (reg,code)
1268 tmp <- getNewRegNat rep
1269 return (tmp, code `snocOL` reg2reg rep reg tmp)
1270 -- ToDo: could optimise slightly by checking for byte-addressable
1271 -- real registers, but that will happen very rarely if at all.
1274 -- Another variant: this time we want the result in a register that cannot
1275 -- be modified by code to evaluate an arbitrary expression.
1276 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1277 getNonClobberedReg expr = do
1278 r <- getRegister expr
1281 tmp <- getNewRegNat rep
1282 return (tmp, code tmp)
1284 -- only free regs can be clobbered
1285 | RealReg rr <- reg, isFastTrue (freeReg rr) -> do
1286 tmp <- getNewRegNat rep
1287 return (tmp, code `snocOL` reg2reg rep reg tmp)
1291 reg2reg :: MachRep -> Reg -> Reg -> Instr
1293 #if i386_TARGET_ARCH
1294 | isFloatingRep rep = GMOV src dst
1296 | otherwise = MOV rep (OpReg src) (OpReg dst)
1298 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1300 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1302 #if sparc_TARGET_ARCH
1304 getRegister (StFloat d)
1305 = getBlockIdNat `thenNat` \ lbl ->
1306 getNewRegNat PtrRep `thenNat` \ tmp ->
1307 let code dst = toOL [
1308 SEGMENT DataSegment,
1310 DATA F [ImmFloat d],
1311 SEGMENT TextSegment,
1312 SETHI (HI (ImmCLbl lbl)) tmp,
1313 LD F (AddrRegImm tmp (LO (ImmCLbl lbl))) dst]
1315 return (Any F32 code)
1317 getRegister (StDouble d)
1318 = getBlockIdNat `thenNat` \ lbl ->
1319 getNewRegNat PtrRep `thenNat` \ tmp ->
1320 let code dst = toOL [
1321 SEGMENT DataSegment,
1323 DATA DF [ImmDouble d],
1324 SEGMENT TextSegment,
1325 SETHI (HI (ImmCLbl lbl)) tmp,
1326 LD DF (AddrRegImm tmp (LO (ImmCLbl lbl))) dst]
1328 return (Any F64 code)
1331 getRegister (CmmMachOp mop [x]) -- unary PrimOps
1333 MO_NatS_Neg -> trivialUCode (SUB False False g0) x
1334 MO_Nat_Not -> trivialUCode (XNOR False g0) x
1335 MO_32U_to_8U -> trivialCode (AND False) x (StInt 255)
1337 MO_F32_Neg -> trivialUFCode F32 (FNEG F) x
1338 MO_F64_Neg -> trivialUFCode F64 (FNEG DF) x
1340 MO_F64_to_Flt -> coerceDbl2Flt x
1341 MO_F32_to_Dbl -> coerceFlt2Dbl x
1343 MO_F32_to_NatS -> coerceFP2Int F32 x
1344 MO_NatS_to_Flt -> coerceInt2FP F32 x
1345 MO_F64_to_NatS -> coerceFP2Int F64 x
1346 MO_NatS_to_Dbl -> coerceInt2FP F64 x
1348 -- Conversions which are a nop on sparc
1349 MO_32U_to_NatS -> conversionNop IntRep x
1350 MO_32S_to_NatS -> conversionNop IntRep x
1351 MO_NatS_to_32U -> conversionNop WordRep x
1352 MO_32U_to_NatU -> conversionNop WordRep x
1354 MO_NatU_to_NatS -> conversionNop IntRep x
1355 MO_NatS_to_NatU -> conversionNop WordRep x
1356 MO_NatP_to_NatU -> conversionNop WordRep x
1357 MO_NatU_to_NatP -> conversionNop PtrRep x
1358 MO_NatS_to_NatP -> conversionNop PtrRep x
1359 MO_NatP_to_NatS -> conversionNop IntRep x
1361 -- sign-extending widenings
1362 MO_8U_to_32U -> integerExtend False 24 x
1363 MO_8U_to_NatU -> integerExtend False 24 x
1364 MO_8S_to_NatS -> integerExtend True 24 x
1365 MO_16U_to_NatU -> integerExtend False 16 x
1366 MO_16S_to_NatS -> integerExtend True 16 x
1369 let fixed_x = if is_float_op -- promote to double
1370 then CmmMachOp MO_F32_to_Dbl [x]
1373 getRegister (StCall (Left fn) CCallConv F64 [fixed_x])
1375 integerExtend signed nBits x
1377 CmmMachOp (if signed then MO_Nat_Sar else MO_Nat_Shr)
1378 [CmmMachOp MO_Nat_Shl [x, StInt nBits], StInt nBits]
1380 conversionNop new_rep expr
1381 = getRegister expr `thenNat` \ e_code ->
1382 return (swizzleRegisterRep e_code new_rep)
1386 MO_F32_Exp -> (True, FSLIT("exp"))
1387 MO_F32_Log -> (True, FSLIT("log"))
1388 MO_F32_Sqrt -> (True, FSLIT("sqrt"))
1390 MO_F32_Sin -> (True, FSLIT("sin"))
1391 MO_F32_Cos -> (True, FSLIT("cos"))
1392 MO_F32_Tan -> (True, FSLIT("tan"))
1394 MO_F32_Asin -> (True, FSLIT("asin"))
1395 MO_F32_Acos -> (True, FSLIT("acos"))
1396 MO_F32_Atan -> (True, FSLIT("atan"))
1398 MO_F32_Sinh -> (True, FSLIT("sinh"))
1399 MO_F32_Cosh -> (True, FSLIT("cosh"))
1400 MO_F32_Tanh -> (True, FSLIT("tanh"))
1402 MO_F64_Exp -> (False, FSLIT("exp"))
1403 MO_F64_Log -> (False, FSLIT("log"))
1404 MO_F64_Sqrt -> (False, FSLIT("sqrt"))
1406 MO_F64_Sin -> (False, FSLIT("sin"))
1407 MO_F64_Cos -> (False, FSLIT("cos"))
1408 MO_F64_Tan -> (False, FSLIT("tan"))
1410 MO_F64_Asin -> (False, FSLIT("asin"))
1411 MO_F64_Acos -> (False, FSLIT("acos"))
1412 MO_F64_Atan -> (False, FSLIT("atan"))
1414 MO_F64_Sinh -> (False, FSLIT("sinh"))
1415 MO_F64_Cosh -> (False, FSLIT("cosh"))
1416 MO_F64_Tanh -> (False, FSLIT("tanh"))
1418 other -> pprPanic "getRegister(sparc) - binary CmmMachOp (2)"
1422 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1424 MO_32U_Gt -> condIntReg GTT x y
1425 MO_32U_Ge -> condIntReg GE x y
1426 MO_32U_Eq -> condIntReg EQQ x y
1427 MO_32U_Ne -> condIntReg NE x y
1428 MO_32U_Lt -> condIntReg LTT x y
1429 MO_32U_Le -> condIntReg LE x y
1431 MO_Nat_Eq -> condIntReg EQQ x y
1432 MO_Nat_Ne -> condIntReg NE x y
1434 MO_NatS_Gt -> condIntReg GTT x y
1435 MO_NatS_Ge -> condIntReg GE x y
1436 MO_NatS_Lt -> condIntReg LTT x y
1437 MO_NatS_Le -> condIntReg LE x y
1439 MO_NatU_Gt -> condIntReg GU x y
1440 MO_NatU_Ge -> condIntReg GEU x y
1441 MO_NatU_Lt -> condIntReg LU x y
1442 MO_NatU_Le -> condIntReg LEU x y
1444 MO_F32_Gt -> condFltReg GTT x y
1445 MO_F32_Ge -> condFltReg GE x y
1446 MO_F32_Eq -> condFltReg EQQ x y
1447 MO_F32_Ne -> condFltReg NE x y
1448 MO_F32_Lt -> condFltReg LTT x y
1449 MO_F32_Le -> condFltReg LE x y
1451 MO_F64_Gt -> condFltReg GTT x y
1452 MO_F64_Ge -> condFltReg GE x y
1453 MO_F64_Eq -> condFltReg EQQ x y
1454 MO_F64_Ne -> condFltReg NE x y
1455 MO_F64_Lt -> condFltReg LTT x y
1456 MO_F64_Le -> condFltReg LE x y
1458 MO_Nat_Add -> trivialCode (ADD False False) x y
1459 MO_Nat_Sub -> trivialCode (SUB False False) x y
1461 MO_NatS_Mul -> trivialCode (SMUL False) x y
1462 MO_NatU_Mul -> trivialCode (UMUL False) x y
1463 MO_NatS_MulMayOflo -> imulMayOflo x y
1465 -- ToDo: teach about V8+ SPARC div instructions
1466 MO_NatS_Quot -> idiv FSLIT(".div") x y
1467 MO_NatS_Rem -> idiv FSLIT(".rem") x y
1468 MO_NatU_Quot -> idiv FSLIT(".udiv") x y
1469 MO_NatU_Rem -> idiv FSLIT(".urem") x y
1471 MO_F32_Add -> trivialFCode F32 FADD x y
1472 MO_F32_Sub -> trivialFCode F32 FSUB x y
1473 MO_F32_Mul -> trivialFCode F32 FMUL x y
1474 MO_F32_Div -> trivialFCode F32 FDIV x y
1476 MO_F64_Add -> trivialFCode F64 FADD x y
1477 MO_F64_Sub -> trivialFCode F64 FSUB x y
1478 MO_F64_Mul -> trivialFCode F64 FMUL x y
1479 MO_F64_Div -> trivialFCode F64 FDIV x y
1481 MO_Nat_And -> trivialCode (AND False) x y
1482 MO_Nat_Or -> trivialCode (OR False) x y
1483 MO_Nat_Xor -> trivialCode (XOR False) x y
1485 MO_Nat_Shl -> trivialCode SLL x y
1486 MO_Nat_Shr -> trivialCode SRL x y
1487 MO_Nat_Sar -> trivialCode SRA x y
1489 MO_F32_Pwr -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1490 [promote x, promote y])
1491 where promote x = CmmMachOp MO_F32_to_Dbl [x]
1492 MO_F64_Pwr -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1495 other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
1497 idiv fn x y = getRegister (StCall (Left fn) CCallConv IntRep [x, y])
1499 --------------------
1500 imulMayOflo :: CmmExpr -> CmmExpr -> NatM Register
1502 = getNewRegNat IntRep `thenNat` \ t1 ->
1503 getNewRegNat IntRep `thenNat` \ t2 ->
1504 getNewRegNat IntRep `thenNat` \ res_lo ->
1505 getNewRegNat IntRep `thenNat` \ res_hi ->
1506 getRegister a1 `thenNat` \ reg1 ->
1507 getRegister a2 `thenNat` \ reg2 ->
1508 let code1 = registerCode reg1 t1
1509 code2 = registerCode reg2 t2
1510 src1 = registerName reg1 t1
1511 src2 = registerName reg2 t2
1512 code dst = code1 `appOL` code2 `appOL`
1514 SMUL False src1 (RIReg src2) res_lo,
1516 SRA res_lo (RIImm (ImmInt 31)) res_lo,
1517 SUB False False res_lo (RIReg res_hi) dst
1520 return (Any IntRep code)
1522 getRegister (CmmLoad pk mem) = do
1523 Amode src code <- getAmode mem
1525 size = primRepToSize pk
1526 code__2 dst = code `snocOL` LD size src dst
1528 return (Any pk code__2)
1530 getRegister (StInt i)
1533 src = ImmInt (fromInteger i)
1534 code dst = unitOL (OR False g0 (RIImm src) dst)
1536 return (Any IntRep code)
1542 SETHI (HI imm__2) dst,
1543 OR False dst (RIImm (LO imm__2)) dst]
1545 return (Any PtrRep code)
1547 = ncgPrimopMoan "getRegister(sparc)" (pprCmmExpr leaf)
1550 imm__2 = case imm of Just x -> x
1552 #endif /* sparc_TARGET_ARCH */
1554 #if powerpc_TARGET_ARCH
1555 getRegister (CmmLoad mem pk)
1558 Amode addr addr_code <- getAmode mem
1559 let code dst = ASSERT((regClass dst == RcDouble) == isFloatingRep pk)
1560 addr_code `snocOL` LD pk dst addr
1561 return (Any pk code)
1563 -- catch simple cases of zero- or sign-extended load
1564 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad mem _]) = do
1565 Amode addr addr_code <- getAmode mem
1566 return (Any I32 (\dst -> addr_code `snocOL` LD I8 dst addr))
1568 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
1570 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad mem _]) = do
1571 Amode addr addr_code <- getAmode mem
1572 return (Any I32 (\dst -> addr_code `snocOL` LD I16 dst addr))
1574 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad mem _]) = do
1575 Amode addr addr_code <- getAmode mem
1576 return (Any I32 (\dst -> addr_code `snocOL` LA I16 dst addr))
1578 getRegister (CmmMachOp mop [x]) -- unary MachOps
1580 MO_Not rep -> trivialUCode rep NOT x
1582 MO_S_Conv F64 F32 -> trivialUCode F32 FRSP x
1583 MO_S_Conv F32 F64 -> conversionNop F64 x
1586 | from == to -> conversionNop to x
1587 | isFloatingRep from -> coerceFP2Int from to x
1588 | isFloatingRep to -> coerceInt2FP from to x
1590 -- narrowing is a nop: we treat the high bits as undefined
1591 MO_S_Conv I32 to -> conversionNop to x
1592 MO_S_Conv I16 I8 -> conversionNop I8 x
1593 MO_S_Conv I8 to -> trivialUCode to (EXTS I8) x
1594 MO_S_Conv I16 to -> trivialUCode to (EXTS I16) x
1597 | from == to -> conversionNop to x
1598 -- narrowing is a nop: we treat the high bits as undefined
1599 MO_U_Conv I32 to -> conversionNop to x
1600 MO_U_Conv I16 I8 -> conversionNop I8 x
1601 MO_U_Conv I8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 I32))
1602 MO_U_Conv I16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 I32))
1604 MO_S_Neg F32 -> trivialUCode F32 FNEG x
1605 MO_S_Neg F64 -> trivialUCode F64 FNEG x
1606 MO_S_Neg rep -> trivialUCode rep NEG x
1609 conversionNop new_rep expr
1610 = do e_code <- getRegister expr
1611 return (swizzleRegisterRep e_code new_rep)
1613 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1615 MO_Eq F32 -> condFltReg EQQ x y
1616 MO_Ne F32 -> condFltReg NE x y
1618 MO_S_Gt F32 -> condFltReg GTT x y
1619 MO_S_Ge F32 -> condFltReg GE x y
1620 MO_S_Lt F32 -> condFltReg LTT x y
1621 MO_S_Le F32 -> condFltReg LE x y
1623 MO_Eq F64 -> condFltReg EQQ x y
1624 MO_Ne F64 -> condFltReg NE x y
1626 MO_S_Gt F64 -> condFltReg GTT x y
1627 MO_S_Ge F64 -> condFltReg GE x y
1628 MO_S_Lt F64 -> condFltReg LTT x y
1629 MO_S_Le F64 -> condFltReg LE x y
1631 MO_Eq rep -> condIntReg EQQ (extendUExpr rep x) (extendUExpr rep y)
1632 MO_Ne rep -> condIntReg NE (extendUExpr rep x) (extendUExpr rep y)
1634 MO_S_Gt rep -> condIntReg GTT (extendSExpr rep x) (extendSExpr rep y)
1635 MO_S_Ge rep -> condIntReg GE (extendSExpr rep x) (extendSExpr rep y)
1636 MO_S_Lt rep -> condIntReg LTT (extendSExpr rep x) (extendSExpr rep y)
1637 MO_S_Le rep -> condIntReg LE (extendSExpr rep x) (extendSExpr rep y)
1639 MO_U_Gt rep -> condIntReg GU (extendUExpr rep x) (extendUExpr rep y)
1640 MO_U_Ge rep -> condIntReg GEU (extendUExpr rep x) (extendUExpr rep y)
1641 MO_U_Lt rep -> condIntReg LU (extendUExpr rep x) (extendUExpr rep y)
1642 MO_U_Le rep -> condIntReg LEU (extendUExpr rep x) (extendUExpr rep y)
1644 MO_Add F32 -> trivialCodeNoImm F32 (FADD F32) x y
1645 MO_Sub F32 -> trivialCodeNoImm F32 (FSUB F32) x y
1646 MO_Mul F32 -> trivialCodeNoImm F32 (FMUL F32) x y
1647 MO_S_Quot F32 -> trivialCodeNoImm F32 (FDIV F32) x y
1649 MO_Add F64 -> trivialCodeNoImm F64 (FADD F64) x y
1650 MO_Sub F64 -> trivialCodeNoImm F64 (FSUB F64) x y
1651 MO_Mul F64 -> trivialCodeNoImm F64 (FMUL F64) x y
1652 MO_S_Quot F64 -> trivialCodeNoImm F64 (FDIV F64) x y
1654 -- optimize addition with 32-bit immediate
1658 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate I32 True (-imm)
1659 -> trivialCode I32 True ADD x (CmmLit $ CmmInt imm immrep)
1662 (src, srcCode) <- getSomeReg x
1663 let imm = litToImm lit
1664 code dst = srcCode `appOL` toOL [
1665 ADDIS dst src (HA imm),
1666 ADD dst dst (RIImm (LO imm))
1668 return (Any I32 code)
1669 _ -> trivialCode I32 True ADD x y
1671 MO_Add rep -> trivialCode rep True ADD x y
1673 case y of -- subfi ('substract from' with immediate) doesn't exist
1674 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
1675 -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
1676 _ -> trivialCodeNoImm rep SUBF y x
1678 MO_Mul rep -> trivialCode rep True MULLW x y
1680 MO_S_MulMayOflo I32 -> trivialCodeNoImm I32 MULLW_MayOflo x y
1682 MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= I32): not implemented"
1683 MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
1685 MO_S_Quot rep -> trivialCodeNoImm rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1686 MO_U_Quot rep -> trivialCodeNoImm rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1688 MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1689 MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1691 MO_And rep -> trivialCode rep False AND x y
1692 MO_Or rep -> trivialCode rep False OR x y
1693 MO_Xor rep -> trivialCode rep False XOR x y
1695 MO_Shl rep -> trivialCode rep False SLW x y
1696 MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
1697 MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
1699 getRegister (CmmLit (CmmInt i rep))
1700 | Just imm <- makeImmediate rep True i
1702 code dst = unitOL (LI dst imm)
1704 return (Any rep code)
1706 getRegister (CmmLit (CmmFloat f frep)) = do
1707 lbl <- getNewLabelNat
1708 dynRef <- cmmMakeDynamicReference addImportNat False lbl
1709 Amode addr addr_code <- getAmode dynRef
1711 LDATA ReadOnlyData [CmmDataLabel lbl,
1712 CmmStaticLit (CmmFloat f frep)]
1713 `consOL` (addr_code `snocOL` LD frep dst addr)
1714 return (Any frep code)
1716 getRegister (CmmLit lit)
1717 = let rep = cmmLitRep lit
1721 OR dst dst (RIImm (LO imm))
1723 in return (Any rep code)
1725 getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
1727 -- extend?Rep: wrap integer expression of type rep
1728 -- in a conversion to I32
1729 extendSExpr I32 x = x
1730 extendSExpr rep x = CmmMachOp (MO_S_Conv rep I32) [x]
1731 extendUExpr I32 x = x
1732 extendUExpr rep x = CmmMachOp (MO_U_Conv rep I32) [x]
1734 #endif /* powerpc_TARGET_ARCH */
1737 -- -----------------------------------------------------------------------------
1738 -- The 'Amode' type: Memory addressing modes passed up the tree.
1740 data Amode = Amode AddrMode InstrBlock
1743 Now, given a tree (the argument to an CmmLoad) that references memory,
1744 produce a suitable addressing mode.
1746 A Rule of the Game (tm) for Amodes: use of the addr bit must
1747 immediately follow use of the code part, since the code part puts
1748 values in registers which the addr then refers to. So you can't put
1749 anything in between, lest it overwrite some of those registers. If
1750 you need to do some other computation between the code part and use of
1751 the addr bit, first store the effective address from the amode in a
1752 temporary, then do the other computation, and then use the temporary:
1756 ... other computation ...
1760 getAmode :: CmmExpr -> NatM Amode
1761 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
1763 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1765 #if alpha_TARGET_ARCH
1767 getAmode (StPrim IntSubOp [x, StInt i])
1768 = getNewRegNat PtrRep `thenNat` \ tmp ->
1769 getRegister x `thenNat` \ register ->
1771 code = registerCode register tmp
1772 reg = registerName register tmp
1773 off = ImmInt (-(fromInteger i))
1775 return (Amode (AddrRegImm reg off) code)
1777 getAmode (StPrim IntAddOp [x, StInt i])
1778 = getNewRegNat PtrRep `thenNat` \ tmp ->
1779 getRegister x `thenNat` \ register ->
1781 code = registerCode register tmp
1782 reg = registerName register tmp
1783 off = ImmInt (fromInteger i)
1785 return (Amode (AddrRegImm reg off) code)
1789 = return (Amode (AddrImm imm__2) id)
1792 imm__2 = case imm of Just x -> x
1795 = getNewRegNat PtrRep `thenNat` \ tmp ->
1796 getRegister other `thenNat` \ register ->
1798 code = registerCode register tmp
1799 reg = registerName register tmp
1801 return (Amode (AddrReg reg) code)
1803 #endif /* alpha_TARGET_ARCH */
1805 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1807 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1809 -- This is all just ridiculous, since it carefully undoes
1810 -- what mangleIndexTree has just done.
1811 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
1812 | not (is64BitLit lit)
1813 -- ASSERT(rep == I32)???
1814 = do (x_reg, x_code) <- getSomeReg x
1815 let off = ImmInt (-(fromInteger i))
1816 return (Amode (AddrBaseIndex (Just x_reg) Nothing off) x_code)
1818 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
1819 | not (is64BitLit lit)
1820 -- ASSERT(rep == I32)???
1821 = do (x_reg, x_code) <- getSomeReg x
1822 let off = ImmInt (fromInteger i)
1823 return (Amode (AddrBaseIndex (Just x_reg) Nothing off) x_code)
1825 -- Turn (lit1 << n + lit2) into (lit2 + lit1 << n) so it will be
1826 -- recognised by the next rule.
1827 getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1829 = getAmode (CmmMachOp (MO_Add rep) [b,a])
1831 getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
1832 [y, CmmLit (CmmInt shift _)]])
1833 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1834 = do (x_reg, x_code) <- getNonClobberedReg x
1835 -- x must be in a temp, because it has to stay live over y_code
1836 -- we could compre x_reg and y_reg and do something better here...
1837 (y_reg, y_code) <- getSomeReg y
1839 code = x_code `appOL` y_code
1840 base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
1841 return (Amode (AddrBaseIndex (Just x_reg) (Just (y_reg,base)) (ImmInt 0))
1844 getAmode (CmmLit lit) | not (is64BitLit lit)
1845 = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1848 (reg,code) <- getSomeReg expr
1849 return (Amode (AddrBaseIndex (Just reg) Nothing (ImmInt 0)) code)
1851 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1853 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1855 #if sparc_TARGET_ARCH
1857 getAmode (CmmMachOp MO_Nat_Sub [x, StInt i])
1859 = getNewRegNat PtrRep `thenNat` \ tmp ->
1860 getRegister x `thenNat` \ register ->
1862 code = registerCode register tmp
1863 reg = registerName register tmp
1864 off = ImmInt (-(fromInteger i))
1866 return (Amode (AddrRegImm reg off) code)
1869 getAmode (CmmMachOp MO_Nat_Add [x, StInt i])
1871 = getNewRegNat PtrRep `thenNat` \ tmp ->
1872 getRegister x `thenNat` \ register ->
1874 code = registerCode register tmp
1875 reg = registerName register tmp
1876 off = ImmInt (fromInteger i)
1878 return (Amode (AddrRegImm reg off) code)
1880 getAmode (CmmMachOp MO_Nat_Add [x, y])
1881 = getNewRegNat PtrRep `thenNat` \ tmp1 ->
1882 getNewRegNat IntRep `thenNat` \ tmp2 ->
1883 getRegister x `thenNat` \ register1 ->
1884 getRegister y `thenNat` \ register2 ->
1886 code1 = registerCode register1 tmp1
1887 reg1 = registerName register1 tmp1
1888 code2 = registerCode register2 tmp2
1889 reg2 = registerName register2 tmp2
1890 code__2 = code1 `appOL` code2
1892 return (Amode (AddrRegReg reg1 reg2) code__2)
1896 = getNewRegNat PtrRep `thenNat` \ tmp ->
1898 code = unitOL (SETHI (HI imm__2) tmp)
1900 return (Amode (AddrRegImm tmp (LO imm__2)) code)
1903 imm__2 = case imm of Just x -> x
1906 = getNewRegNat PtrRep `thenNat` \ tmp ->
1907 getRegister other `thenNat` \ register ->
1909 code = registerCode register tmp
1910 reg = registerName register tmp
1913 return (Amode (AddrRegImm reg off) code)
1915 #endif /* sparc_TARGET_ARCH */
1917 #ifdef powerpc_TARGET_ARCH
1918 getAmode (CmmMachOp (MO_Sub I32) [x, CmmLit (CmmInt i _)])
1919 | Just off <- makeImmediate I32 True (-i)
1921 (reg, code) <- getSomeReg x
1922 return (Amode (AddrRegImm reg off) code)
1925 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit (CmmInt i _)])
1926 | Just off <- makeImmediate I32 True i
1928 (reg, code) <- getSomeReg x
1929 return (Amode (AddrRegImm reg off) code)
1931 -- optimize addition with 32-bit immediate
1933 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit lit])
1935 tmp <- getNewRegNat I32
1936 (src, srcCode) <- getSomeReg x
1937 let imm = litToImm lit
1938 code = srcCode `snocOL` ADDIS tmp src (HA imm)
1939 return (Amode (AddrRegImm tmp (LO imm)) code)
1941 getAmode (CmmLit lit)
1943 tmp <- getNewRegNat I32
1944 let imm = litToImm lit
1945 code = unitOL (LIS tmp (HA imm))
1946 return (Amode (AddrRegImm tmp (LO imm)) code)
1948 getAmode (CmmMachOp (MO_Add I32) [x, y])
1950 (regX, codeX) <- getSomeReg x
1951 (regY, codeY) <- getSomeReg y
1952 return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
1956 (reg, code) <- getSomeReg other
1959 return (Amode (AddrRegImm reg off) code)
1960 #endif /* powerpc_TARGET_ARCH */
1962 -- -----------------------------------------------------------------------------
1963 -- getOperand: sometimes any operand will do.
1965 -- getNonClobberedOperand: the value of the operand will remain valid across
1966 -- the computation of an arbitrary expression, unless the expression
1967 -- is computed directly into a register which the operand refers to
1968 -- (see trivialCode where this function is used for an example).
1970 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1972 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1973 getNonClobberedOperand (CmmLit lit)
1974 | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) =
1975 return (OpImm (litToImm lit), nilOL)
1976 getNonClobberedOperand (CmmLoad mem pk)
1977 | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
1978 Amode src mem_code <- getAmode mem
1980 if (amodeCouldBeClobbered src)
1982 tmp <- getNewRegNat wordRep
1983 return (AddrBaseIndex (Just tmp) Nothing (ImmInt 0),
1984 unitOL (LEA I32 (OpAddr src) (OpReg tmp)))
1987 return (OpAddr src', save_code `appOL` mem_code)
1988 getNonClobberedOperand e = do
1989 (reg, code) <- getNonClobberedReg e
1990 return (OpReg reg, code)
1992 amodeCouldBeClobbered :: AddrMode -> Bool
1993 amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
1995 regClobbered (RealReg rr) = isFastTrue (freeReg rr)
1996 regClobbered _ = False
1998 -- getOperand: the operand is not required to remain valid across the
1999 -- computation of an arbitrary expression.
2000 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2001 getOperand (CmmLit lit)
2002 | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) =
2003 return (OpImm (litToImm lit), nilOL)
2004 getOperand (CmmLoad mem pk)
2005 | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2006 Amode src mem_code <- getAmode mem
2007 return (OpAddr src, mem_code)
2009 (reg, code) <- getNonClobberedReg e
2010 return (OpReg reg, code)
2012 isOperand :: CmmExpr -> Bool
2013 isOperand (CmmLoad _ _) = True
2014 isOperand (CmmLit lit) = not (is64BitLit lit) &&
2015 not (isFloatingRep (cmmLitRep lit))
2018 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
2019 getRegOrMem (CmmLoad mem pk)
2020 | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2021 Amode src mem_code <- getAmode mem
2022 return (OpAddr src, mem_code)
2024 (reg, code) <- getNonClobberedReg e
2025 return (OpReg reg, code)
2027 #if x86_64_TARGET_ARCH
2028 is64BitLit (CmmInt i I64) = i > 0x7fffffff || i < -0x80000000
2030 is64BitLit x = False
2033 -- -----------------------------------------------------------------------------
2034 -- The 'CondCode' type: Condition codes passed up the tree.
2036 data CondCode = CondCode Bool Cond InstrBlock
2038 -- Set up a condition code for a conditional branch.
2040 getCondCode :: CmmExpr -> NatM CondCode
2042 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2044 #if alpha_TARGET_ARCH
2045 getCondCode = panic "MachCode.getCondCode: not on Alphas"
2046 #endif /* alpha_TARGET_ARCH */
2048 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2050 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
2051 -- yes, they really do seem to want exactly the same!
2053 getCondCode (CmmMachOp mop [x, y])
2054 = ASSERT (cmmExprRep x /= I8) -- tmp, not set up to handle 8-bit comparisons
2056 MO_Eq F32 -> condFltCode EQQ x y
2057 MO_Ne F32 -> condFltCode NE x y
2059 MO_S_Gt F32 -> condFltCode GTT x y
2060 MO_S_Ge F32 -> condFltCode GE x y
2061 MO_S_Lt F32 -> condFltCode LTT x y
2062 MO_S_Le F32 -> condFltCode LE x y
2064 MO_Eq F64 -> condFltCode EQQ x y
2065 MO_Ne F64 -> condFltCode NE x y
2067 MO_S_Gt F64 -> condFltCode GTT x y
2068 MO_S_Ge F64 -> condFltCode GE x y
2069 MO_S_Lt F64 -> condFltCode LTT x y
2070 MO_S_Le F64 -> condFltCode LE x y
2072 MO_Eq rep -> condIntCode EQQ x y
2073 MO_Ne rep -> condIntCode NE x y
2075 MO_S_Gt rep -> condIntCode GTT x y
2076 MO_S_Ge rep -> condIntCode GE x y
2077 MO_S_Lt rep -> condIntCode LTT x y
2078 MO_S_Le rep -> condIntCode LE x y
2080 MO_U_Gt rep -> condIntCode GU x y
2081 MO_U_Ge rep -> condIntCode GEU x y
2082 MO_U_Lt rep -> condIntCode LU x y
2083 MO_U_Le rep -> condIntCode LEU x y
2085 other -> pprPanic "getCondCode(x86,sparc)" (pprMachOp mop)
2087 getCondCode other = pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
2089 #elif powerpc_TARGET_ARCH
2091 -- almost the same as everywhere else - but we need to
2092 -- extend small integers to 32 bit first
2094 getCondCode (CmmMachOp mop [x, y])
2096 MO_Eq F32 -> condFltCode EQQ x y
2097 MO_Ne F32 -> condFltCode NE x y
2099 MO_S_Gt F32 -> condFltCode GTT x y
2100 MO_S_Ge F32 -> condFltCode GE x y
2101 MO_S_Lt F32 -> condFltCode LTT x y
2102 MO_S_Le F32 -> condFltCode LE x y
2104 MO_Eq F64 -> condFltCode EQQ x y
2105 MO_Ne F64 -> condFltCode NE x y
2107 MO_S_Gt F64 -> condFltCode GTT x y
2108 MO_S_Ge F64 -> condFltCode GE x y
2109 MO_S_Lt F64 -> condFltCode LTT x y
2110 MO_S_Le F64 -> condFltCode LE x y
2112 MO_Eq rep -> condIntCode EQQ (extendUExpr rep x) (extendUExpr rep y)
2113 MO_Ne rep -> condIntCode NE (extendUExpr rep x) (extendUExpr rep y)
2115 MO_S_Gt rep -> condIntCode GTT (extendSExpr rep x) (extendSExpr rep y)
2116 MO_S_Ge rep -> condIntCode GE (extendSExpr rep x) (extendSExpr rep y)
2117 MO_S_Lt rep -> condIntCode LTT (extendSExpr rep x) (extendSExpr rep y)
2118 MO_S_Le rep -> condIntCode LE (extendSExpr rep x) (extendSExpr rep y)
2120 MO_U_Gt rep -> condIntCode GU (extendUExpr rep x) (extendUExpr rep y)
2121 MO_U_Ge rep -> condIntCode GEU (extendUExpr rep x) (extendUExpr rep y)
2122 MO_U_Lt rep -> condIntCode LU (extendUExpr rep x) (extendUExpr rep y)
2123 MO_U_Le rep -> condIntCode LEU (extendUExpr rep x) (extendUExpr rep y)
2125 other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
2127 getCondCode other = panic "getCondCode(2)(powerpc)"
2133 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
2134 -- passed back up the tree.
2136 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
2138 #if alpha_TARGET_ARCH
2139 condIntCode = panic "MachCode.condIntCode: not on Alphas"
2140 condFltCode = panic "MachCode.condFltCode: not on Alphas"
2141 #endif /* alpha_TARGET_ARCH */
2143 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2144 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2146 -- memory vs immediate
2147 condIntCode cond (CmmLoad x pk) (CmmLit lit) | not (is64BitLit lit) = do
2148 Amode x_addr x_code <- getAmode x
2151 code = x_code `snocOL`
2152 CMP pk (OpImm imm) (OpAddr x_addr)
2154 return (CondCode False cond code)
2157 condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
2158 (x_reg, x_code) <- getSomeReg x
2160 code = x_code `snocOL`
2161 TEST pk (OpReg x_reg) (OpReg x_reg)
2163 return (CondCode False cond code)
2165 -- anything vs operand
2166 condIntCode cond x y | isOperand y = do
2167 (x_reg, x_code) <- getNonClobberedReg x
2168 (y_op, y_code) <- getOperand y
2170 code = x_code `appOL` y_code `snocOL`
2171 CMP (cmmExprRep x) y_op (OpReg x_reg)
2173 return (CondCode False cond code)
2175 -- anything vs anything
2176 condIntCode cond x y = do
2177 (y_reg, y_code) <- getNonClobberedReg y
2178 (x_op, x_code) <- getRegOrMem x
2180 code = y_code `appOL`
2182 CMP (cmmExprRep x) (OpReg y_reg) x_op
2184 return (CondCode False cond code)
2187 #if i386_TARGET_ARCH
2188 condFltCode cond x y
2189 = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
2190 (x_reg, x_code) <- getNonClobberedReg x
2191 (y_reg, y_code) <- getSomeReg y
2193 code = x_code `appOL` y_code `snocOL`
2194 GCMP cond x_reg y_reg
2195 -- The GCMP insn does the test and sets the zero flag if comparable
2196 -- and true. Hence we always supply EQQ as the condition to test.
2197 return (CondCode True EQQ code)
2198 #endif /* i386_TARGET_ARCH */
2200 #if x86_64_TARGET_ARCH
2201 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
2202 -- an operand, but the right must be a reg. We can probably do better
2203 -- than this general case...
2204 condFltCode cond x y = do
2205 (x_reg, x_code) <- getNonClobberedReg x
2206 (y_op, y_code) <- getOperand y
2208 code = x_code `appOL`
2210 CMP (cmmExprRep x) y_op (OpReg x_reg)
2212 return (CondCode False (condToUnsigned cond) code)
2213 -- we need to use the unsigned comparison operators on the
2214 -- result of this comparison.
2217 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2219 #if sparc_TARGET_ARCH
2221 condIntCode cond x (StInt y)
2223 = getRegister x `thenNat` \ register ->
2224 getNewRegNat IntRep `thenNat` \ tmp ->
2226 code = registerCode register tmp
2227 src1 = registerName register tmp
2228 src2 = ImmInt (fromInteger y)
2229 code__2 = code `snocOL` SUB False True src1 (RIImm src2) g0
2231 return (CondCode False cond code__2)
2233 condIntCode cond x y
2234 = getRegister x `thenNat` \ register1 ->
2235 getRegister y `thenNat` \ register2 ->
2236 getNewRegNat IntRep `thenNat` \ tmp1 ->
2237 getNewRegNat IntRep `thenNat` \ tmp2 ->
2239 code1 = registerCode register1 tmp1
2240 src1 = registerName register1 tmp1
2241 code2 = registerCode register2 tmp2
2242 src2 = registerName register2 tmp2
2243 code__2 = code1 `appOL` code2 `snocOL`
2244 SUB False True src1 (RIReg src2) g0
2246 return (CondCode False cond code__2)
2249 condFltCode cond x y
2250 = getRegister x `thenNat` \ register1 ->
2251 getRegister y `thenNat` \ register2 ->
2252 getNewRegNat (registerRep register1)
2254 getNewRegNat (registerRep register2)
2256 getNewRegNat F64 `thenNat` \ tmp ->
2258 promote x = FxTOy F DF x tmp
2260 pk1 = registerRep register1
2261 code1 = registerCode register1 tmp1
2262 src1 = registerName register1 tmp1
2264 pk2 = registerRep register2
2265 code2 = registerCode register2 tmp2
2266 src2 = registerName register2 tmp2
2270 code1 `appOL` code2 `snocOL`
2271 FCMP True (primRepToSize pk1) src1 src2
2272 else if pk1 == F32 then
2273 code1 `snocOL` promote src1 `appOL` code2 `snocOL`
2274 FCMP True DF tmp src2
2276 code1 `appOL` code2 `snocOL` promote src2 `snocOL`
2277 FCMP True DF src1 tmp
2279 return (CondCode True cond code__2)
2281 #endif /* sparc_TARGET_ARCH */
2283 #if powerpc_TARGET_ARCH
2284 -- ###FIXME: I16 and I8!
2285 condIntCode cond x (CmmLit (CmmInt y rep))
2286 | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
2288 (src1, code) <- getSomeReg x
2290 code' = code `snocOL`
2291 (if condUnsigned cond then CMPL else CMP) I32 src1 (RIImm src2)
2292 return (CondCode False cond code')
2294 condIntCode cond x y = do
2295 (src1, code1) <- getSomeReg x
2296 (src2, code2) <- getSomeReg y
2298 code' = code1 `appOL` code2 `snocOL`
2299 (if condUnsigned cond then CMPL else CMP) I32 src1 (RIReg src2)
2300 return (CondCode False cond code')
2302 condFltCode cond x y = do
2303 (src1, code1) <- getSomeReg x
2304 (src2, code2) <- getSomeReg y
2306 code' = code1 `appOL` code2 `snocOL` FCMP src1 src2
2307 code'' = case cond of -- twiddle CR to handle unordered case
2308 GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
2309 LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
2312 ltbit = 0 ; eqbit = 2 ; gtbit = 1
2313 return (CondCode True cond code'')
2315 #endif /* powerpc_TARGET_ARCH */
2317 -- -----------------------------------------------------------------------------
2318 -- Generating assignments
2320 -- Assignments are really at the heart of the whole code generation
2321 -- business. Almost all top-level nodes of any real importance are
2322 -- assignments, which correspond to loads, stores, or register
2323 -- transfers. If we're really lucky, some of the register transfers
2324 -- will go away, because we can use the destination register to
2325 -- complete the code generation for the right hand side. This only
2326 -- fails when the right hand side is forced into a fixed register
2327 -- (e.g. the result of a call).
2329 assignMem_IntCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2330 assignReg_IntCode :: MachRep -> CmmReg -> CmmExpr -> NatM InstrBlock
2332 assignMem_FltCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2333 assignReg_FltCode :: MachRep -> CmmReg -> CmmExpr -> NatM InstrBlock
2335 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2337 #if alpha_TARGET_ARCH
2339 assignIntCode pk (CmmLoad dst _) src
2340 = getNewRegNat IntRep `thenNat` \ tmp ->
2341 getAmode dst `thenNat` \ amode ->
2342 getRegister src `thenNat` \ register ->
2344 code1 = amodeCode amode []
2345 dst__2 = amodeAddr amode
2346 code2 = registerCode register tmp []
2347 src__2 = registerName register tmp
2348 sz = primRepToSize pk
2349 code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2353 assignIntCode pk dst src
2354 = getRegister dst `thenNat` \ register1 ->
2355 getRegister src `thenNat` \ register2 ->
2357 dst__2 = registerName register1 zeroh
2358 code = registerCode register2 dst__2
2359 src__2 = registerName register2 dst__2
2360 code__2 = if isFixed register2
2361 then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2)
2366 #endif /* alpha_TARGET_ARCH */
2368 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2370 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2372 -- integer assignment to memory
2373 assignMem_IntCode pk addr src = do
2374 Amode addr code_addr <- getAmode addr
2375 (code_src, op_src) <- get_op_RI src
2377 code = code_src `appOL`
2379 MOV pk op_src (OpAddr addr)
2380 -- NOTE: op_src is stable, so it will still be valid
2381 -- after code_addr. This may involve the introduction
2382 -- of an extra MOV to a temporary register, but we hope
2383 -- the register allocator will get rid of it.
2387 get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand) -- code, operator
2388 get_op_RI (CmmLit lit) | not (is64BitLit lit)
2389 = return (nilOL, OpImm (litToImm lit))
2391 = do (reg,code) <- getNonClobberedReg op
2392 return (code, OpReg reg)
2395 -- Assign; dst is a reg, rhs is mem
2396 assignReg_IntCode pk reg (CmmLoad src _) = do
2397 load_code <- intLoadCode (MOV pk) src
2398 return (load_code (getRegisterReg reg))
2400 -- dst is a reg, but src could be anything
2401 assignReg_IntCode pk reg src = do
2402 code <- getAnyReg src
2403 return (code (getRegisterReg reg))
2405 #endif /* i386_TARGET_ARCH */
2407 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2409 #if sparc_TARGET_ARCH
2411 assignMem_IntCode pk addr src
2412 = getNewRegNat IntRep `thenNat` \ tmp ->
2413 getAmode addr `thenNat` \ amode ->
2414 getRegister src `thenNat` \ register ->
2416 code1 = amodeCode amode
2417 dst__2 = amodeAddr amode
2418 code2 = registerCode register tmp
2419 src__2 = registerName register tmp
2420 sz = primRepToSize pk
2421 code__2 = code1 `appOL` code2 `snocOL` ST sz src__2 dst__2
2425 assignReg_IntCode pk reg src
2426 = getRegister src `thenNat` \ register2 ->
2427 getRegisterReg reg `thenNat` \ register1 ->
2428 getNewRegNat IntRep `thenNat` \ tmp ->
2430 dst__2 = registerName register1 tmp
2431 code = registerCode register2 dst__2
2432 src__2 = registerName register2 dst__2
2433 code__2 = if isFixed register2
2434 then code `snocOL` OR False g0 (RIReg src__2) dst__2
2439 #endif /* sparc_TARGET_ARCH */
2441 #if powerpc_TARGET_ARCH
2443 assignMem_IntCode pk addr src = do
2444 (srcReg, code) <- getSomeReg src
2445 Amode dstAddr addr_code <- getAmode addr
2446 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2448 -- dst is a reg, but src could be anything
2449 assignReg_IntCode pk reg src
2451 r <- getRegister src
2453 Any _ code -> code dst
2454 Fixed _ freg fcode -> fcode `snocOL` MR dst freg
2456 dst = getRegisterReg reg
2458 #endif /* powerpc_TARGET_ARCH */
2461 -- -----------------------------------------------------------------------------
2462 -- Floating-point assignments
2464 #if alpha_TARGET_ARCH
2466 assignFltCode pk (CmmLoad dst _) src
2467 = getNewRegNat pk `thenNat` \ tmp ->
2468 getAmode dst `thenNat` \ amode ->
2469 getRegister src `thenNat` \ register ->
2471 code1 = amodeCode amode []
2472 dst__2 = amodeAddr amode
2473 code2 = registerCode register tmp []
2474 src__2 = registerName register tmp
2475 sz = primRepToSize pk
2476 code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2480 assignFltCode pk dst src
2481 = getRegister dst `thenNat` \ register1 ->
2482 getRegister src `thenNat` \ register2 ->
2484 dst__2 = registerName register1 zeroh
2485 code = registerCode register2 dst__2
2486 src__2 = registerName register2 dst__2
2487 code__2 = if isFixed register2
2488 then code . mkSeqInstr (FMOV src__2 dst__2)
2493 #endif /* alpha_TARGET_ARCH */
2495 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2497 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2499 -- Floating point assignment to memory
2500 assignMem_FltCode pk addr src = do
2501 (src_reg, src_code) <- getNonClobberedReg src
2502 Amode addr addr_code <- getAmode addr
2504 code = src_code `appOL`
2506 IF_ARCH_i386(GST pk src_reg addr,
2507 MOV pk (OpReg src_reg) (OpAddr addr))
2510 -- Floating point assignment to a register/temporary
2511 assignReg_FltCode pk reg src = do
2512 src_code <- getAnyReg src
2513 return (src_code (getRegisterReg reg))
2515 #endif /* i386_TARGET_ARCH */
2517 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2519 #if sparc_TARGET_ARCH
2521 -- Floating point assignment to memory
2522 assignMem_FltCode pk addr src
2523 = getNewRegNat pk `thenNat` \ tmp1 ->
2524 getAmode addr `thenNat` \ amode ->
2525 getRegister src `thenNat` \ register ->
2527 sz = primRepToSize pk
2528 dst__2 = amodeAddr amode
2530 code1 = amodeCode amode
2531 code2 = registerCode register tmp1
2533 src__2 = registerName register tmp1
2534 pk__2 = registerRep register
2535 sz__2 = primRepToSize pk__2
2537 code__2 = code1 `appOL` code2 `appOL`
2539 then unitOL (ST sz src__2 dst__2)
2540 else toOL [FxTOy sz__2 sz src__2 tmp1, ST sz tmp1 dst__2]
2544 -- Floating point assignment to a register/temporary
2545 -- Why is this so bizarrely ugly?
2546 assignReg_FltCode pk reg src
2547 = getRegisterReg reg `thenNat` \ register1 ->
2548 getRegister src `thenNat` \ register2 ->
2550 pk__2 = registerRep register2
2551 sz__2 = primRepToSize pk__2
2553 getNewRegNat pk__2 `thenNat` \ tmp ->
2555 sz = primRepToSize pk
2556 dst__2 = registerName register1 g0 -- must be Fixed
2557 reg__2 = if pk /= pk__2 then tmp else dst__2
2558 code = registerCode register2 reg__2
2559 src__2 = registerName register2 reg__2
2562 code `snocOL` FxTOy sz__2 sz src__2 dst__2
2563 else if isFixed register2 then
2564 code `snocOL` FMOV sz src__2 dst__2
2570 #endif /* sparc_TARGET_ARCH */
2572 #if powerpc_TARGET_ARCH
2575 assignMem_FltCode = assignMem_IntCode
2576 assignReg_FltCode = assignReg_IntCode
2578 #endif /* powerpc_TARGET_ARCH */
2581 -- -----------------------------------------------------------------------------
2582 -- Generating an non-local jump
2584 -- (If applicable) Do not fill the delay slots here; you will confuse the
2585 -- register allocator.
2587 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
2589 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2591 #if alpha_TARGET_ARCH
2593 genJump (CmmLabel lbl)
2594 | isAsmTemp lbl = returnInstr (BR target)
2595 | otherwise = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0]
2597 target = ImmCLbl lbl
2600 = getRegister tree `thenNat` \ register ->
2601 getNewRegNat PtrRep `thenNat` \ tmp ->
2603 dst = registerName register pv
2604 code = registerCode register pv
2605 target = registerName register pv
2607 if isFixed register then
2608 returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0]
2610 return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0))
2612 #endif /* alpha_TARGET_ARCH */
2614 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2616 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2618 genJump (CmmLoad mem pk) = do
2619 Amode target code <- getAmode mem
2620 return (code `snocOL` JMP (OpAddr target))
2622 genJump (CmmLit lit) = do
2623 return (unitOL (JMP (OpImm (litToImm lit))))
2626 (reg,code) <- getSomeReg expr
2627 return (code `snocOL` JMP (OpReg reg))
2629 #endif /* i386_TARGET_ARCH */
2631 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2633 #if sparc_TARGET_ARCH
2635 genJump (CmmLabel lbl)
2636 = return (toOL [CALL (Left target) 0 True, NOP])
2638 target = ImmCLbl lbl
2641 = getRegister tree `thenNat` \ register ->
2642 getNewRegNat PtrRep `thenNat` \ tmp ->
2644 code = registerCode register tmp
2645 target = registerName register tmp
2647 return (code `snocOL` JMP dsts (AddrRegReg target g0) `snocOL` NOP)
2649 #endif /* sparc_TARGET_ARCH */
2651 #if powerpc_TARGET_ARCH
2652 genJump (CmmLit (CmmLabel lbl))
2653 = return (unitOL $ JMP lbl)
2657 (target,code) <- getSomeReg tree
2658 return (code `snocOL` MTCTR target `snocOL` BCTR [])
2659 #endif /* powerpc_TARGET_ARCH */
2662 -- -----------------------------------------------------------------------------
2663 -- Unconditional branches
2665 genBranch :: BlockId -> NatM InstrBlock
2667 #if alpha_TARGET_ARCH
2668 genBranch id = return (unitOL (BR id))
2671 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2672 genBranch id = return (unitOL (JXX ALWAYS id))
2675 #if sparc_TARGET_ARCH
2676 genBranch id = return (toOL [BI ALWAYS False id, NOP])
2679 #if powerpc_TARGET_ARCH
2680 genBranch id = return (unitOL (BCC ALWAYS id))
2684 -- -----------------------------------------------------------------------------
2685 -- Conditional jumps
2688 Conditional jumps are always to local labels, so we can use branch
2689 instructions. We peek at the arguments to decide what kind of
2692 ALPHA: For comparisons with 0, we're laughing, because we can just do
2693 the desired conditional branch.
2695 I386: First, we have to ensure that the condition
2696 codes are set according to the supplied comparison operation.
2698 SPARC: First, we have to ensure that the condition codes are set
2699 according to the supplied comparison operation. We generate slightly
2700 different code for floating point comparisons, because a floating
2701 point operation cannot directly precede a @BF@. We assume the worst
2702 and fill that slot with a @NOP@.
2704 SPARC: Do not fill the delay slots here; you will confuse the register
2710 :: BlockId -- the branch target
2711 -> CmmExpr -- the condition on which to branch
2714 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2716 #if alpha_TARGET_ARCH
2718 genCondJump id (StPrim op [x, StInt 0])
2719 = getRegister x `thenNat` \ register ->
2720 getNewRegNat (registerRep register)
2723 code = registerCode register tmp
2724 value = registerName register tmp
2725 pk = registerRep register
2726 target = ImmCLbl lbl
2728 returnSeq code [BI (cmpOp op) value target]
2730 cmpOp CharGtOp = GTT
2732 cmpOp CharEqOp = EQQ
2734 cmpOp CharLtOp = LTT
2743 cmpOp WordGeOp = ALWAYS
2744 cmpOp WordEqOp = EQQ
2746 cmpOp WordLtOp = NEVER
2747 cmpOp WordLeOp = EQQ
2749 cmpOp AddrGeOp = ALWAYS
2750 cmpOp AddrEqOp = EQQ
2752 cmpOp AddrLtOp = NEVER
2753 cmpOp AddrLeOp = EQQ
2755 genCondJump lbl (StPrim op [x, StDouble 0.0])
2756 = getRegister x `thenNat` \ register ->
2757 getNewRegNat (registerRep register)
2760 code = registerCode register tmp
2761 value = registerName register tmp
2762 pk = registerRep register
2763 target = ImmCLbl lbl
2765 return (code . mkSeqInstr (BF (cmpOp op) value target))
2767 cmpOp FloatGtOp = GTT
2768 cmpOp FloatGeOp = GE
2769 cmpOp FloatEqOp = EQQ
2770 cmpOp FloatNeOp = NE
2771 cmpOp FloatLtOp = LTT
2772 cmpOp FloatLeOp = LE
2773 cmpOp DoubleGtOp = GTT
2774 cmpOp DoubleGeOp = GE
2775 cmpOp DoubleEqOp = EQQ
2776 cmpOp DoubleNeOp = NE
2777 cmpOp DoubleLtOp = LTT
2778 cmpOp DoubleLeOp = LE
2780 genCondJump lbl (StPrim op [x, y])
2782 = trivialFCode pr instr x y `thenNat` \ register ->
2783 getNewRegNat F64 `thenNat` \ tmp ->
2785 code = registerCode register tmp
2786 result = registerName register tmp
2787 target = ImmCLbl lbl
2789 return (code . mkSeqInstr (BF cond result target))
2791 pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
2793 fltCmpOp op = case op of
2807 (instr, cond) = case op of
2808 FloatGtOp -> (FCMP TF LE, EQQ)
2809 FloatGeOp -> (FCMP TF LTT, EQQ)
2810 FloatEqOp -> (FCMP TF EQQ, NE)
2811 FloatNeOp -> (FCMP TF EQQ, EQQ)
2812 FloatLtOp -> (FCMP TF LTT, NE)
2813 FloatLeOp -> (FCMP TF LE, NE)
2814 DoubleGtOp -> (FCMP TF LE, EQQ)
2815 DoubleGeOp -> (FCMP TF LTT, EQQ)
2816 DoubleEqOp -> (FCMP TF EQQ, NE)
2817 DoubleNeOp -> (FCMP TF EQQ, EQQ)
2818 DoubleLtOp -> (FCMP TF LTT, NE)
2819 DoubleLeOp -> (FCMP TF LE, NE)
2821 genCondJump lbl (StPrim op [x, y])
2822 = trivialCode instr x y `thenNat` \ register ->
2823 getNewRegNat IntRep `thenNat` \ tmp ->
2825 code = registerCode register tmp
2826 result = registerName register tmp
2827 target = ImmCLbl lbl
2829 return (code . mkSeqInstr (BI cond result target))
2831 (instr, cond) = case op of
2832 CharGtOp -> (CMP LE, EQQ)
2833 CharGeOp -> (CMP LTT, EQQ)
2834 CharEqOp -> (CMP EQQ, NE)
2835 CharNeOp -> (CMP EQQ, EQQ)
2836 CharLtOp -> (CMP LTT, NE)
2837 CharLeOp -> (CMP LE, NE)
2838 IntGtOp -> (CMP LE, EQQ)
2839 IntGeOp -> (CMP LTT, EQQ)
2840 IntEqOp -> (CMP EQQ, NE)
2841 IntNeOp -> (CMP EQQ, EQQ)
2842 IntLtOp -> (CMP LTT, NE)
2843 IntLeOp -> (CMP LE, NE)
2844 WordGtOp -> (CMP ULE, EQQ)
2845 WordGeOp -> (CMP ULT, EQQ)
2846 WordEqOp -> (CMP EQQ, NE)
2847 WordNeOp -> (CMP EQQ, EQQ)
2848 WordLtOp -> (CMP ULT, NE)
2849 WordLeOp -> (CMP ULE, NE)
2850 AddrGtOp -> (CMP ULE, EQQ)
2851 AddrGeOp -> (CMP ULT, EQQ)
2852 AddrEqOp -> (CMP EQQ, NE)
2853 AddrNeOp -> (CMP EQQ, EQQ)
2854 AddrLtOp -> (CMP ULT, NE)
2855 AddrLeOp -> (CMP ULE, NE)
2857 #endif /* alpha_TARGET_ARCH */
2859 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2861 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2863 genCondJump id bool = do
2864 CondCode _ cond code <- getCondCode bool
2865 return (code `snocOL` JXX cond id)
2867 #endif /* i386_TARGET_ARCH */
2870 #if sparc_TARGET_ARCH
2872 genCondJump id bool = do
2873 CondCode is_float cond code <- getCondCode bool
2878 then [NOP, BF cond False id, NOP]
2879 else [BI cond False id, NOP]
2883 #endif /* sparc_TARGET_ARCH */
2886 #if powerpc_TARGET_ARCH
2888 genCondJump id bool = do
2889 CondCode is_float cond code <- getCondCode bool
2890 return (code `snocOL` BCC cond id)
2892 #endif /* powerpc_TARGET_ARCH */
2895 -- -----------------------------------------------------------------------------
2896 -- Generating C calls
2898 -- Now the biggest nightmare---calls. Most of the nastiness is buried in
2899 -- @get_arg@, which moves the arguments to the correct registers/stack
2900 -- locations. Apart from that, the code is easy.
2902 -- (If applicable) Do not fill the delay slots here; you will confuse the
2903 -- register allocator.
2906 :: CmmCallTarget -- function to call
2907 -> [(CmmReg,MachHint)] -- where to put the result
2908 -> [(CmmExpr,MachHint)] -- arguments (of mixed type)
2909 -> Maybe [GlobalReg] -- volatile regs to save
2912 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2914 #if alpha_TARGET_ARCH
2918 genCCall fn cconv result_regs args
2919 = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args
2920 `thenNat` \ ((unused,_), argCode) ->
2922 nRegs = length allArgRegs - length unused
2923 code = asmSeqThen (map ($ []) argCode)
2926 LDA pv (AddrImm (ImmLab (ptext fn))),
2927 JSR ra (AddrReg pv) nRegs,
2928 LDGP gp (AddrReg ra)]
2930 ------------------------
2931 {- Try to get a value into a specific register (or registers) for
2932 a call. The first 6 arguments go into the appropriate
2933 argument register (separate registers for integer and floating
2934 point arguments, but used in lock-step), and the remaining
2935 arguments are dumped to the stack, beginning at 0(sp). Our
2936 first argument is a pair of the list of remaining argument
2937 registers to be assigned for this call and the next stack
2938 offset to use for overflowing arguments. This way,
2939 @get_Arg@ can be applied to all of a call's arguments using
2943 :: ([(Reg,Reg)], Int) -- Argument registers and stack offset (accumulator)
2944 -> StixTree -- Current argument
2945 -> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code
2947 -- We have to use up all of our argument registers first...
2949 get_arg ((iDst,fDst):dsts, offset) arg
2950 = getRegister arg `thenNat` \ register ->
2952 reg = if isFloatingRep pk then fDst else iDst
2953 code = registerCode register reg
2954 src = registerName register reg
2955 pk = registerRep register
2958 if isFloatingRep pk then
2959 ((dsts, offset), if isFixed register then
2960 code . mkSeqInstr (FMOV src fDst)
2963 ((dsts, offset), if isFixed register then
2964 code . mkSeqInstr (OR src (RIReg src) iDst)
2967 -- Once we have run out of argument registers, we move to the
2970 get_arg ([], offset) arg
2971 = getRegister arg `thenNat` \ register ->
2972 getNewRegNat (registerRep register)
2975 code = registerCode register tmp
2976 src = registerName register tmp
2977 pk = registerRep register
2978 sz = primRepToSize pk
2980 return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset)))
2982 #endif /* alpha_TARGET_ARCH */
2984 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2986 #if i386_TARGET_ARCH
2988 -- we only cope with a single result for foreign calls
2989 genCCall (CmmPrim op) [(r,_)] args vols = do
2991 MO_F32_Sqrt -> actuallyInlineFloatOp F32 (GSQRT F32) args
2992 MO_F64_Sqrt -> actuallyInlineFloatOp F64 (GSQRT F64) args
2994 MO_F32_Sin -> actuallyInlineFloatOp F32 (GSIN F32) args
2995 MO_F64_Sin -> actuallyInlineFloatOp F64 (GSIN F64) args
2997 MO_F32_Cos -> actuallyInlineFloatOp F32 (GCOS F32) args
2998 MO_F64_Cos -> actuallyInlineFloatOp F64 (GCOS F64) args
3000 MO_F32_Tan -> actuallyInlineFloatOp F32 (GTAN F32) args
3001 MO_F64_Tan -> actuallyInlineFloatOp F64 (GTAN F64) args
3003 other_op -> outOfLineFloatOp op r args vols
3005 actuallyInlineFloatOp rep instr [(x,_)]
3006 = do res <- trivialUFCode rep instr x
3008 return (any (getRegisterReg r))
3010 genCCall target dest_regs args vols = do
3011 sizes_n_codes <- mapM push_arg (reverse args)
3012 delta <- getDeltaNat
3014 (sizes, push_codes) = unzip sizes_n_codes
3015 tot_arg_size = sum sizes
3017 -- deal with static vs dynamic call targets
3018 (callinsns,cconv) <-
3021 CmmForeignCall (CmmLit (CmmLabel lbl)) conv
3022 -> -- ToDo: stdcall arg sizes
3023 return (unitOL (CALL (Left fn_imm)), conv)
3024 where fn_imm = ImmCLbl lbl
3025 CmmForeignCall expr conv
3026 -> do (dyn_c, dyn_r, dyn_rep) <- get_op expr
3027 ASSERT(dyn_rep == I32)
3028 return (dyn_c `snocOL` CALL (Right dyn_r), conv)
3030 let push_code = concatOL push_codes
3031 call = callinsns `appOL`
3033 -- Deallocate parameters after call for ccall;
3034 -- but not for stdcall (callee does it)
3035 (if cconv == StdCallConv || tot_arg_size==0 then [] else
3036 [ADD I32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
3038 [DELTA (delta + tot_arg_size)]
3041 setDeltaNat (delta + tot_arg_size)
3044 -- assign the results, if necessary
3045 assign_code [] = nilOL
3046 assign_code [(dest,_hint)] =
3048 I64 -> toOL [MOV I32 (OpReg eax) (OpReg r_dest),
3049 MOV I32 (OpReg edx) (OpReg r_dest_hi)]
3050 F32 -> unitOL (GMOV fake0 r_dest)
3051 F64 -> unitOL (GMOV fake0 r_dest)
3052 rep -> unitOL (MOV rep (OpReg eax) (OpReg r_dest))
3054 r_dest_hi = getHiVRegFromLo r_dest
3055 rep = cmmRegRep dest
3056 r_dest = getRegisterReg dest
3057 assign_code many = panic "genCCall.assign_code many"
3059 return (push_code `appOL`
3061 assign_code dest_regs)
3068 push_arg :: (CmmExpr,MachHint){-current argument-}
3069 -> NatM (Int, InstrBlock) -- argsz, code
3071 push_arg (arg,_hint) -- we don't need the hints on x86
3072 | arg_rep == I64 = do
3073 ChildCode64 code r_lo <- iselExpr64 arg
3074 delta <- getDeltaNat
3075 setDeltaNat (delta - 8)
3077 r_hi = getHiVRegFromLo r_lo
3079 return (8, code `appOL`
3080 toOL [PUSH I32 (OpReg r_hi), DELTA (delta - 4),
3081 PUSH I32 (OpReg r_lo), DELTA (delta - 8),
3086 (code, reg, sz) <- get_op arg
3087 delta <- getDeltaNat
3088 let size = arg_size sz
3089 setDeltaNat (delta-size)
3090 if (case sz of F64 -> True; F32 -> True; _ -> False)
3093 toOL [SUB I32 (OpImm (ImmInt size)) (OpReg esp),
3095 GST sz reg (AddrBaseIndex (Just esp)
3101 PUSH I32 (OpReg reg) `snocOL`
3105 arg_rep = cmmExprRep arg
3108 get_op :: CmmExpr -> NatM (InstrBlock, Reg, MachRep) -- code, reg, size
3110 (reg,code) <- getSomeReg op
3111 return (code, reg, cmmExprRep op)
3114 outOfLineFloatOp :: CallishMachOp -> CmmReg -> [(CmmExpr,MachHint)]
3115 -> Maybe [GlobalReg] -> NatM InstrBlock
3116 outOfLineFloatOp mop res args vols
3117 | cmmRegRep res == F64
3118 = stmtToInstrs (CmmCall target [(res,FloatHint)] args vols)
3121 = do uq <- getUniqueNat
3123 tmp = CmmLocal (LocalReg uq F64)
3125 code1 <- stmtToInstrs (CmmCall target [(tmp,FloatHint)] (map promote args) vols)
3126 code2 <- stmtToInstrs (CmmAssign res (demote (CmmReg tmp)))
3127 return (code1 `appOL` code2)
3129 promote (x,hint) = (CmmMachOp (MO_S_Conv F32 F64) [x], hint)
3130 demote x = CmmMachOp (MO_S_Conv F64 F32) [x]
3132 target = CmmForeignCall (CmmLit lbl) CCallConv
3133 lbl = CmmLabel (mkForeignLabel fn Nothing False)
3136 MO_F32_Exp -> FSLIT("exp")
3137 MO_F32_Log -> FSLIT("log")
3139 MO_F32_Asin -> FSLIT("asin")
3140 MO_F32_Acos -> FSLIT("acos")
3141 MO_F32_Atan -> FSLIT("atan")
3143 MO_F32_Sinh -> FSLIT("sinh")
3144 MO_F32_Cosh -> FSLIT("cosh")
3145 MO_F32_Tanh -> FSLIT("tanh")
3146 MO_F32_Pwr -> FSLIT("pow")
3148 MO_F64_Exp -> FSLIT("exp")
3149 MO_F64_Log -> FSLIT("log")
3151 MO_F64_Asin -> FSLIT("asin")
3152 MO_F64_Acos -> FSLIT("acos")
3153 MO_F64_Atan -> FSLIT("atan")
3155 MO_F64_Sinh -> FSLIT("sinh")
3156 MO_F64_Cosh -> FSLIT("cosh")
3157 MO_F64_Tanh -> FSLIT("tanh")
3158 MO_F64_Pwr -> FSLIT("pow")
3160 other -> pprPanic "outOfLineFloatOp" (pprCallishMachOp mop)
3162 #endif /* i386_TARGET_ARCH */
3164 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3166 #if x86_64_TARGET_ARCH
3168 genCCall (CmmPrim op) [(r,_)] args vols =
3169 panic "genCCall(CmmPrim)(x86_64)"
3171 genCCall target dest_regs args vols = do
3173 -- load up the register arguments
3174 (stack_args, sse_regs, load_args_code)
3175 <- load_args args allArgRegs allFPArgRegs 0 nilOL
3178 tot_arg_size = arg_size * length stack_args
3180 -- On entry to the called function, %rsp should be aligned
3181 -- on a 16-byte boundary +8 (i.e. the first stack arg after
3182 -- the return address is 16-byte aligned). In STG land
3183 -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
3184 -- need to make sure we push a multiple of 16-bytes of args,
3185 -- plus the return address, to get the correct alignment.
3186 -- Urg, this is hard. We need to feed the delta back into
3187 -- the arg pushing code.
3188 (real_size, adjust_rsp) <-
3189 if tot_arg_size `rem` 16 == 0
3190 then return (tot_arg_size, nilOL)
3191 else do -- we need to adjust...
3192 delta <- getDeltaNat
3193 setDeltaNat (delta-8)
3194 return (tot_arg_size+8, toOL [
3195 SUB I64 (OpImm (ImmInt 8)) (OpReg rsp),
3199 -- push the stack args, right to left
3200 push_code <- push_args (reverse stack_args) nilOL
3201 delta <- getDeltaNat
3203 -- deal with static vs dynamic call targets
3204 (callinsns,cconv) <-
3207 CmmForeignCall (CmmLit (CmmLabel lbl)) conv
3208 -> -- ToDo: stdcall arg sizes
3209 return (unitOL (CALL (Left fn_imm)), conv)
3210 where fn_imm = ImmCLbl lbl
3211 CmmForeignCall expr conv
3212 -> do (dyn_r, dyn_c) <- getSomeReg expr
3213 return (dyn_c `snocOL` CALL (Right dyn_r), conv)
3216 -- The x86_64 ABI requires us to set %al to the number of SSE
3217 -- registers that contain arguments, if the called routine
3218 -- is a varargs function. We don't know whether it's a
3219 -- varargs function or not, so we have to assume it is.
3221 -- It's not safe to omit this assignment, even if the number
3222 -- of SSE regs in use is zero. If %al is larger than 8
3223 -- on entry to a varargs function, seg faults ensue.
3224 assign_eax n = unitOL (MOV I32 (OpImm (ImmInt n)) (OpReg eax))
3226 let call = callinsns `appOL`
3228 -- Deallocate parameters after call for ccall;
3229 -- but not for stdcall (callee does it)
3230 (if cconv == StdCallConv || real_size==0 then [] else
3231 [ADD wordRep (OpImm (ImmInt real_size)) (OpReg esp)])
3233 [DELTA (delta + real_size)]
3236 setDeltaNat (delta + real_size)
3239 -- assign the results, if necessary
3240 assign_code [] = nilOL
3241 assign_code [(dest,_hint)] =
3243 F32 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3244 F64 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3245 rep -> unitOL (MOV rep (OpReg rax) (OpReg r_dest))
3247 rep = cmmRegRep dest
3248 r_dest = getRegisterReg dest
3249 assign_code many = panic "genCCall.assign_code many"
3251 return (load_args_code `appOL`
3254 assign_eax sse_regs `appOL`
3256 assign_code dest_regs)
3259 arg_size = 8 -- always, at the mo
3261 load_args :: [(CmmExpr,MachHint)]
3262 -> [Reg] -- int regs avail for args
3263 -> [Reg] -- FP regs avail for args
3264 -> Int -> InstrBlock
3265 -> NatM ([(CmmExpr,MachHint)],Int,InstrBlock)
3266 load_args args [] [] sse_regs code = return (args, sse_regs, code)
3267 -- no more regs to use
3268 load_args [] aregs fregs sse_regs code = return ([],sse_regs,code)
3269 -- no more args to push
3270 load_args ((arg,hint) : rest) aregs fregs sse_regs code
3271 | isFloatingRep arg_rep =
3275 arg_code <- getAnyReg arg
3276 load_args rest aregs rs (sse_regs+1) (code `appOL` arg_code r)
3281 arg_code <- getAnyReg arg
3282 load_args rest rs fregs sse_regs (code `appOL` arg_code r)
3284 arg_rep = cmmExprRep arg
3287 (args',sse',code') <- load_args rest aregs fregs sse_regs code
3288 return ((arg,hint):args', sse', code')
3290 push_args [] code = return code
3291 push_args ((arg,hint):rest) code
3292 | isFloatingRep arg_rep = do
3293 (arg_reg, arg_code) <- getSomeReg arg
3294 delta <- getDeltaNat
3295 setDeltaNat (delta-arg_size)
3296 let code' = code `appOL` toOL [
3297 MOV arg_rep (OpReg arg_reg) (OpAddr (spRel 0)),
3298 SUB wordRep (OpImm (ImmInt arg_size)) (OpReg rsp) ,
3299 DELTA (delta-arg_size)]
3300 push_args rest code'
3303 -- we only ever generate word-sized function arguments. Promotion
3304 -- has already happened: our Int8# type is kept sign-extended
3305 -- in an Int#, for example.
3306 ASSERT(arg_rep == I64) return ()
3307 (arg_op, arg_code) <- getOperand arg
3308 delta <- getDeltaNat
3309 setDeltaNat (delta-arg_size)
3310 let code' = code `appOL` toOL [PUSH I64 arg_op,
3311 DELTA (delta-arg_size)]
3312 push_args rest code'
3314 arg_rep = cmmExprRep arg
3317 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3319 #if sparc_TARGET_ARCH
3321 The SPARC calling convention is an absolute
3322 nightmare. The first 6x32 bits of arguments are mapped into
3323 %o0 through %o5, and the remaining arguments are dumped to the
3324 stack, beginning at [%sp+92]. (Note that %o6 == %sp.)
3326 If we have to put args on the stack, move %o6==%sp down by
3327 the number of words to go on the stack, to ensure there's enough space.
3329 According to Fraser and Hanson's lcc book, page 478, fig 17.2,
3330 16 words above the stack pointer is a word for the address of
3331 a structure return value. I use this as a temporary location
3332 for moving values from float to int regs. Certainly it isn't
3333 safe to put anything in the 16 words starting at %sp, since
3334 this area can get trashed at any time due to window overflows
3335 caused by signal handlers.
3337 A final complication (if the above isn't enough) is that
3338 we can't blithely calculate the arguments one by one into
3339 %o0 .. %o5. Consider the following nested calls:
3343 Naive code moves a into %o0, and (fff b c) into %o1. Unfortunately
3344 the inner call will itself use %o0, which trashes the value put there
3345 in preparation for the outer call. Upshot: we need to calculate the
3346 args into temporary regs, and move those to arg regs or onto the
3347 stack only immediately prior to the call proper. Sigh.
3350 genCCall fn cconv kind args
3351 = mapNat arg_to_int_vregs args `thenNat` \ argcode_and_vregs ->
3353 (argcodes, vregss) = unzip argcode_and_vregs
3354 n_argRegs = length allArgRegs
3355 n_argRegs_used = min (length vregs) n_argRegs
3356 vregs = concat vregss
3358 -- deal with static vs dynamic call targets
3361 -> return (unitOL (CALL (Left fn__2) n_argRegs_used False))
3363 -> arg_to_int_vregs dyn `thenNat` \ (dyn_c, [dyn_r]) ->
3364 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3366 `thenNat` \ callinsns ->
3368 argcode = concatOL argcodes
3369 (move_sp_down, move_sp_up)
3370 = let diff = length vregs - n_argRegs
3371 nn = if odd diff then diff + 1 else diff -- keep 8-byte alignment
3374 else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
3376 = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
3378 return (argcode `appOL`
3379 move_sp_down `appOL`
3380 transfer_code `appOL`
3385 -- function names that begin with '.' are assumed to be special
3386 -- internally generated names like '.mul,' which don't get an
3387 -- underscore prefix
3388 -- ToDo:needed (WDP 96/03) ???
3389 fn_static = unLeft fn
3390 fn__2 = case (headFS fn_static) of
3391 '.' -> ImmLit (ftext fn_static)
3392 _ -> ImmCLbl (mkForeignLabel fn_static False)
3394 -- move args from the integer vregs into which they have been
3395 -- marshalled, into %o0 .. %o5, and the rest onto the stack.
3396 move_final :: [Reg] -> [Reg] -> Int -> [Instr]
3398 move_final [] _ offset -- all args done
3401 move_final (v:vs) [] offset -- out of aregs; move to stack
3402 = ST W v (spRel offset)
3403 : move_final vs [] (offset+1)
3405 move_final (v:vs) (a:az) offset -- move into an arg (%o[0..5]) reg
3406 = OR False g0 (RIReg v) a
3407 : move_final vs az offset
3409 -- generate code to calculate an argument, and move it into one
3410 -- or two integer vregs.
3411 arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
3412 arg_to_int_vregs arg
3413 | is64BitRep (repOfCmmExpr arg)
3414 = iselExpr64 arg `thenNat` \ (ChildCode64 code vr_lo) ->
3415 let r_lo = VirtualRegI vr_lo
3416 r_hi = getHiVRegFromLo r_lo
3417 in return (code, [r_hi, r_lo])
3419 = getRegister arg `thenNat` \ register ->
3420 getNewRegNat (registerRep register) `thenNat` \ tmp ->
3421 let code = registerCode register tmp
3422 src = registerName register tmp
3423 pk = registerRep register
3425 -- the value is in src. Get it into 1 or 2 int vregs.
3428 getNewRegNat WordRep `thenNat` \ v1 ->
3429 getNewRegNat WordRep `thenNat` \ v2 ->
3432 FMOV DF src f0 `snocOL`
3433 ST F f0 (spRel 16) `snocOL`
3434 LD W (spRel 16) v1 `snocOL`
3435 ST F (fPair f0) (spRel 16) `snocOL`
3441 getNewRegNat WordRep `thenNat` \ v1 ->
3444 ST F src (spRel 16) `snocOL`
3450 getNewRegNat WordRep `thenNat` \ v1 ->
3452 code `snocOL` OR False g0 (RIReg src) v1
3456 #endif /* sparc_TARGET_ARCH */
3458 #if powerpc_TARGET_ARCH
3460 #if darwin_TARGET_OS || linux_TARGET_OS
3462 The PowerPC calling convention for Darwin/Mac OS X
3463 is described in Apple's document
3464 "Inside Mac OS X - Mach-O Runtime Architecture".
3466 PowerPC Linux uses the System V Release 4 Calling Convention
3467 for PowerPC. It is described in the
3468 "System V Application Binary Interface PowerPC Processor Supplement".
3470 Both conventions are similar:
3471 Parameters may be passed in general-purpose registers starting at r3, in
3472 floating point registers starting at f1, or on the stack.
3474 But there are substantial differences:
3475 * The number of registers used for parameter passing and the exact set of
3476 nonvolatile registers differs (see MachRegs.lhs).
3477 * On Darwin, stack space is always reserved for parameters, even if they are
3478 passed in registers. The called routine may choose to save parameters from
3479 registers to the corresponding space on the stack.
3480 * On Darwin, a corresponding amount of GPRs is skipped when a floating point
3481 parameter is passed in an FPR.
3482 * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
3483 starting with an odd-numbered GPR. It may skip a GPR to achieve this.
3484 Darwin just treats an I64 like two separate I32s (high word first).
3485 * I64 and F64 arguments are 8-byte aligned on the stack for SysV, but only
3486 4-byte aligned like everything else on Darwin.
3487 * The SysV spec claims that F32 is represented as F64 on the stack. GCC on
3488 PowerPC Linux does not agree, so neither do we.
3490 According to both conventions, The parameter area should be part of the
3491 caller's stack frame, allocated in the caller's prologue code (large enough
3492 to hold the parameter lists for all called routines). The NCG already
3493 uses the stack for register spilling, leaving 64 bytes free at the top.
3494 If we need a larger parameter area than that, we just allocate a new stack
3495 frame just before ccalling.
3498 genCCall target dest_regs argsAndHints vols
3499 = ASSERT (not $ any (`elem` [I8,I16]) argReps)
3500 -- we rely on argument promotion in the codeGen
3502 (finalStack,passArgumentsCode,usedRegs) <- passArguments
3504 allArgRegs allFPArgRegs
3508 (labelOrExpr, reduceToF32) <- case target of
3509 CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
3510 CmmForeignCall expr conv -> return (Right expr, False)
3511 CmmPrim mop -> outOfLineFloatOp mop
3513 let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
3514 codeAfter = move_sp_up finalStack `appOL` moveResult reduceToF32
3519 `snocOL` BL lbl usedRegs
3522 (dynReg, dynCode) <- getSomeReg dyn
3524 `snocOL` MTCTR dynReg
3526 `snocOL` BCTRL usedRegs
3529 #if darwin_TARGET_OS
3530 initialStackOffset = 24
3531 -- size of linkage area + size of arguments, in bytes
3532 stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
3533 map machRepByteWidth argReps
3534 #elif linux_TARGET_OS
3535 initialStackOffset = 8
3536 stackDelta finalStack = roundTo 16 finalStack
3538 args = map fst argsAndHints
3539 argReps = map cmmExprRep args
3541 roundTo a x | x `mod` a == 0 = x
3542 | otherwise = x + a - (x `mod` a)
3544 move_sp_down finalStack
3546 toOL [STU I32 sp (AddrRegImm sp (ImmInt (-delta))),
3549 where delta = stackDelta finalStack
3550 move_sp_up finalStack
3552 toOL [ADD sp sp (RIImm (ImmInt delta)),
3555 where delta = stackDelta finalStack
3558 passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
3559 passArguments ((arg,I64):args) gprs fprs stackOffset
3560 accumCode accumUsed =
3562 ChildCode64 code vr_lo <- iselExpr64 arg
3563 let vr_hi = getHiVRegFromLo vr_lo
3565 #if darwin_TARGET_OS
3570 (accumCode `appOL` code
3571 `snocOL` storeWord vr_hi gprs stackOffset
3572 `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
3573 ((take 2 gprs) ++ accumUsed)
3575 storeWord vr (gpr:_) offset = MR gpr vr
3576 storeWord vr [] offset = ST I32 vr (AddrRegImm sp (ImmInt offset))
3578 #elif linux_TARGET_OS
3579 let stackOffset' = roundTo 8 stackOffset
3580 stackCode = accumCode `appOL` code
3581 `snocOL` ST I32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
3582 `snocOL` ST I32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
3583 regCode hireg loreg =
3584 accumCode `appOL` code
3585 `snocOL` MR hireg vr_hi
3586 `snocOL` MR loreg vr_lo
3589 hireg : loreg : regs | even (length gprs) ->
3590 passArguments args regs fprs stackOffset
3591 (regCode hireg loreg) (hireg : loreg : accumUsed)
3592 _skipped : hireg : loreg : regs ->
3593 passArguments args regs fprs stackOffset
3594 (regCode hireg loreg) (hireg : loreg : accumUsed)
3595 _ -> -- only one or no regs left
3596 passArguments args [] fprs (stackOffset'+8)
3600 passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
3601 | reg : _ <- regs = do
3602 register <- getRegister arg
3603 let code = case register of
3604 Fixed _ freg fcode -> fcode `snocOL` MR reg freg
3605 Any _ acode -> acode reg
3609 #if darwin_TARGET_OS
3610 -- The Darwin ABI requires that we reserve stack slots for register parameters
3611 (stackOffset + stackBytes)
3612 #elif linux_TARGET_OS
3613 -- ... the SysV ABI doesn't.
3616 (accumCode `appOL` code)
3619 (vr, code) <- getSomeReg arg
3623 (stackOffset' + stackBytes)
3624 (accumCode `appOL` code `snocOL` ST rep vr stackSlot)
3627 #if darwin_TARGET_OS
3628 -- stackOffset is at least 4-byte aligned
3629 -- The Darwin ABI is happy with that.
3630 stackOffset' = stackOffset
3632 -- ... the SysV ABI requires 8-byte alignment for doubles.
3633 stackOffset' | rep == F64 = roundTo 8 stackOffset
3634 | otherwise = stackOffset
3636 stackSlot = AddrRegImm sp (ImmInt stackOffset')
3637 (nGprs, nFprs, stackBytes, regs) = case rep of
3638 I32 -> (1, 0, 4, gprs)
3639 #if darwin_TARGET_OS
3640 -- The Darwin ABI requires that we skip a corresponding number of GPRs when
3642 F32 -> (1, 1, 4, fprs)
3643 F64 -> (2, 1, 8, fprs)
3644 #elif linux_TARGET_OS
3645 -- ... the SysV ABI doesn't.
3646 F32 -> (0, 1, 4, fprs)
3647 F64 -> (0, 1, 8, fprs)
3650 moveResult reduceToF32 =
3654 | reduceToF32 && rep == F32 -> unitOL (FRSP r_dest f1)
3655 | rep == F32 || rep == F64 -> unitOL (MR r_dest f1)
3656 | rep == I64 -> toOL [MR (getHiVRegFromLo r_dest) r3,
3658 | otherwise -> unitOL (MR r_dest r3)
3659 where rep = cmmRegRep dest
3660 r_dest = getRegisterReg dest
3662 outOfLineFloatOp mop =
3664 mopExpr <- cmmMakeDynamicReference addImportNat True $
3665 mkForeignLabel functionName Nothing True
3666 let mopLabelOrExpr = case mopExpr of
3667 CmmLit (CmmLabel lbl) -> Left lbl
3669 return (mopLabelOrExpr, reduce)
3671 (functionName, reduce) = case mop of
3672 MO_F32_Exp -> (FSLIT("exp"), True)
3673 MO_F32_Log -> (FSLIT("log"), True)
3674 MO_F32_Sqrt -> (FSLIT("sqrt"), True)
3676 MO_F32_Sin -> (FSLIT("sin"), True)
3677 MO_F32_Cos -> (FSLIT("cos"), True)
3678 MO_F32_Tan -> (FSLIT("tan"), True)
3680 MO_F32_Asin -> (FSLIT("asin"), True)
3681 MO_F32_Acos -> (FSLIT("acos"), True)
3682 MO_F32_Atan -> (FSLIT("atan"), True)
3684 MO_F32_Sinh -> (FSLIT("sinh"), True)
3685 MO_F32_Cosh -> (FSLIT("cosh"), True)
3686 MO_F32_Tanh -> (FSLIT("tanh"), True)
3687 MO_F32_Pwr -> (FSLIT("pow"), True)
3689 MO_F64_Exp -> (FSLIT("exp"), False)
3690 MO_F64_Log -> (FSLIT("log"), False)
3691 MO_F64_Sqrt -> (FSLIT("sqrt"), False)
3693 MO_F64_Sin -> (FSLIT("sin"), False)
3694 MO_F64_Cos -> (FSLIT("cos"), False)
3695 MO_F64_Tan -> (FSLIT("tan"), False)
3697 MO_F64_Asin -> (FSLIT("asin"), False)
3698 MO_F64_Acos -> (FSLIT("acos"), False)
3699 MO_F64_Atan -> (FSLIT("atan"), False)
3701 MO_F64_Sinh -> (FSLIT("sinh"), False)
3702 MO_F64_Cosh -> (FSLIT("cosh"), False)
3703 MO_F64_Tanh -> (FSLIT("tanh"), False)
3704 MO_F64_Pwr -> (FSLIT("pow"), False)
3705 other -> pprPanic "genCCall(ppc): unknown callish op"
3706 (pprCallishMachOp other)
3708 #endif /* darwin_TARGET_OS || linux_TARGET_OS */
3710 #endif /* powerpc_TARGET_ARCH */
3713 -- -----------------------------------------------------------------------------
3714 -- Generating a table-branch
3716 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
3718 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3719 genSwitch expr ids = do
3720 (reg,e_code) <- getSomeReg expr
3721 lbl <- getNewLabelNat
3723 jumpTable = map jumpTableEntry ids
3724 op = OpAddr (AddrBaseIndex Nothing (Just (reg,wORD_SIZE)) (ImmCLbl lbl))
3725 code = e_code `appOL` toOL [
3726 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3727 JMP_TBL op [ id | Just id <- ids ]
3731 #elif powerpc_TARGET_ARCH
3735 (reg,e_code) <- getSomeReg expr
3736 tmp <- getNewRegNat I32
3737 lbl <- getNewLabelNat
3738 dynRef <- cmmMakeDynamicReference addImportNat False lbl
3739 (tableReg,t_code) <- getSomeReg $ dynRef
3741 jumpTable = map jumpTableEntryRel ids
3743 jumpTableEntryRel Nothing
3744 = CmmStaticLit (CmmInt 0 wordRep)
3745 jumpTableEntryRel (Just (BlockId id))
3746 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3747 where blockLabel = mkAsmTempLabel id
3749 code = e_code `appOL` t_code `appOL` toOL [
3750 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3751 SLW tmp reg (RIImm (ImmInt 2)),
3752 LD I32 tmp (AddrRegReg tableReg tmp),
3753 ADD tmp tmp (RIReg tableReg),
3755 BCTR [ id | Just id <- ids ]
3760 (reg,e_code) <- getSomeReg expr
3761 tmp <- getNewRegNat I32
3762 lbl <- getNewLabelNat
3764 jumpTable = map jumpTableEntry ids
3766 code = e_code `appOL` toOL [
3767 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3768 SLW tmp reg (RIImm (ImmInt 2)),
3769 ADDIS tmp tmp (HA (ImmCLbl lbl)),
3770 LD I32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
3772 BCTR [ id | Just id <- ids ]
3776 genSwitch expr ids = panic "ToDo: genSwitch"
3779 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordRep)
3780 jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
3781 where blockLabel = mkAsmTempLabel id
3783 -- -----------------------------------------------------------------------------
3785 -- -----------------------------------------------------------------------------
3788 -- -----------------------------------------------------------------------------
3789 -- 'condIntReg' and 'condFltReg': condition codes into registers
3791 -- Turn those condition codes into integers now (when they appear on
3792 -- the right hand side of an assignment).
3794 -- (If applicable) Do not fill the delay slots here; you will confuse the
3795 -- register allocator.
3797 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
3799 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3801 #if alpha_TARGET_ARCH
3802 condIntReg = panic "MachCode.condIntReg (not on Alpha)"
3803 condFltReg = panic "MachCode.condFltReg (not on Alpha)"
3804 #endif /* alpha_TARGET_ARCH */
3806 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3808 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3810 condIntReg cond x y = do
3811 CondCode _ cond cond_code <- condIntCode cond x y
3812 tmp <- getNewRegNat I8
3814 code dst = cond_code `appOL` toOL [
3815 SETCC cond (OpReg tmp),
3816 MOV I32 (OpReg tmp) (OpReg dst),
3817 AND I32 (OpImm (ImmInt 1)) (OpReg dst)
3819 -- NB. (1) Tha AND is needed here because the x86 only
3820 -- sets the low byte in the SETCC instruction.
3821 -- NB. (2) The extra temporary register is a hack to
3822 -- work around the fact that the setcc instructions only
3823 -- accept byte registers. dst might not be a byte-able reg,
3824 -- but currently all free registers are byte-able, so we're
3825 -- guaranteed that a new temporary is byte-able.
3827 return (Any I32 code)
3830 condFltReg cond x y = do
3831 lbl1 <- getBlockIdNat
3832 lbl2 <- getBlockIdNat
3833 CondCode _ cond cond_code <- condFltCode cond x y
3835 code dst = cond_code `appOL` toOL [
3837 MOV I32 (OpImm (ImmInt 0)) (OpReg dst),
3840 MOV I32 (OpImm (ImmInt 1)) (OpReg dst),
3843 -- SIGH, have to split up this block somehow...
3845 return (Any I32 code)
3847 #endif /* i386_TARGET_ARCH */
3849 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3851 #if sparc_TARGET_ARCH
3853 condIntReg EQQ x (StInt 0)
3854 = getRegister x `thenNat` \ register ->
3855 getNewRegNat IntRep `thenNat` \ tmp ->
3857 code = registerCode register tmp
3858 src = registerName register tmp
3859 code__2 dst = code `appOL` toOL [
3860 SUB False True g0 (RIReg src) g0,
3861 SUB True False g0 (RIImm (ImmInt (-1))) dst]
3863 return (Any IntRep code__2)
3866 = getRegister x `thenNat` \ register1 ->
3867 getRegister y `thenNat` \ register2 ->
3868 getNewRegNat IntRep `thenNat` \ tmp1 ->
3869 getNewRegNat IntRep `thenNat` \ tmp2 ->
3871 code1 = registerCode register1 tmp1
3872 src1 = registerName register1 tmp1
3873 code2 = registerCode register2 tmp2
3874 src2 = registerName register2 tmp2
3875 code__2 dst = code1 `appOL` code2 `appOL` toOL [
3876 XOR False src1 (RIReg src2) dst,
3877 SUB False True g0 (RIReg dst) g0,
3878 SUB True False g0 (RIImm (ImmInt (-1))) dst]
3880 return (Any IntRep code__2)
3882 condIntReg NE x (StInt 0)
3883 = getRegister x `thenNat` \ register ->
3884 getNewRegNat IntRep `thenNat` \ tmp ->
3886 code = registerCode register tmp
3887 src = registerName register tmp
3888 code__2 dst = code `appOL` toOL [
3889 SUB False True g0 (RIReg src) g0,
3890 ADD True False g0 (RIImm (ImmInt 0)) dst]
3892 return (Any IntRep code__2)
3895 = getRegister x `thenNat` \ register1 ->
3896 getRegister y `thenNat` \ register2 ->
3897 getNewRegNat IntRep `thenNat` \ tmp1 ->
3898 getNewRegNat IntRep `thenNat` \ tmp2 ->
3900 code1 = registerCode register1 tmp1
3901 src1 = registerName register1 tmp1
3902 code2 = registerCode register2 tmp2
3903 src2 = registerName register2 tmp2
3904 code__2 dst = code1 `appOL` code2 `appOL` toOL [
3905 XOR False src1 (RIReg src2) dst,
3906 SUB False True g0 (RIReg dst) g0,
3907 ADD True False g0 (RIImm (ImmInt 0)) dst]
3909 return (Any IntRep code__2)
3912 = getBlockIdNat `thenNat` \ lbl1 ->
3913 getBlockIdNat `thenNat` \ lbl2 ->
3914 condIntCode cond x y `thenNat` \ condition ->
3916 code = condCode condition
3917 cond = condName condition
3918 code__2 dst = code `appOL` toOL [
3919 BI cond False (ImmCLbl lbl1), NOP,
3920 OR False g0 (RIImm (ImmInt 0)) dst,
3921 BI ALWAYS False (ImmCLbl lbl2), NOP,
3923 OR False g0 (RIImm (ImmInt 1)) dst,
3926 return (Any IntRep code__2)
3929 = getBlockIdNat `thenNat` \ lbl1 ->
3930 getBlockIdNat `thenNat` \ lbl2 ->
3931 condFltCode cond x y `thenNat` \ condition ->
3933 code = condCode condition
3934 cond = condName condition
3935 code__2 dst = code `appOL` toOL [
3937 BF cond False (ImmCLbl lbl1), NOP,
3938 OR False g0 (RIImm (ImmInt 0)) dst,
3939 BI ALWAYS False (ImmCLbl lbl2), NOP,
3941 OR False g0 (RIImm (ImmInt 1)) dst,
3944 return (Any IntRep code__2)
3946 #endif /* sparc_TARGET_ARCH */
3948 #if powerpc_TARGET_ARCH
3949 condReg getCond = do
3950 lbl1 <- getBlockIdNat
3951 lbl2 <- getBlockIdNat
3952 CondCode _ cond cond_code <- getCond
3954 {- code dst = cond_code `appOL` toOL [
3963 code dst = cond_code
3967 RLWINM dst dst (bit + 1) 31 31
3970 negate_code | do_negate = unitOL (CRNOR bit bit bit)
3973 (bit, do_negate) = case cond of
3987 return (Any I32 code)
3989 condIntReg cond x y = condReg (condIntCode cond x y)
3990 condFltReg cond x y = condReg (condFltCode cond x y)
3991 #endif /* powerpc_TARGET_ARCH */
3994 -- -----------------------------------------------------------------------------
3995 -- 'trivial*Code': deal with trivial instructions
3997 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
3998 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
3999 -- Only look for constants on the right hand side, because that's
4000 -- where the generic optimizer will have put them.
4002 -- Similarly, for unary instructions, we don't have to worry about
4003 -- matching an StInt as the argument, because genericOpt will already
4004 -- have handled the constant-folding.
4008 -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
4009 ,IF_ARCH_i386 ((Operand -> Operand -> Instr)
4010 -> Maybe (Operand -> Operand -> Instr)
4011 ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr)
4012 -> Maybe (Operand -> Operand -> Instr)
4013 ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
4014 ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
4016 -> CmmExpr -> CmmExpr -- the two arguments
4019 #ifndef powerpc_TARGET_ARCH
4022 -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
4023 ,IF_ARCH_sparc((Size -> Reg -> Reg -> Reg -> Instr)
4024 ,IF_ARCH_i386 ((MachRep -> Reg -> Reg -> Reg -> Instr)
4025 ,IF_ARCH_x86_64 ((MachRep -> Operand -> Operand -> Instr)
4027 -> CmmExpr -> CmmExpr -- the two arguments
4033 -> IF_ARCH_alpha((RI -> Reg -> Instr)
4034 ,IF_ARCH_i386 ((Operand -> Instr)
4035 ,IF_ARCH_x86_64 ((Operand -> Instr)
4036 ,IF_ARCH_sparc((RI -> Reg -> Instr)
4037 ,IF_ARCH_powerpc((Reg -> Reg -> Instr)
4039 -> CmmExpr -- the one argument
4042 #ifndef powerpc_TARGET_ARCH
4045 -> IF_ARCH_alpha((Reg -> Reg -> Instr)
4046 ,IF_ARCH_i386 ((Reg -> Reg -> Instr)
4047 ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr)
4048 ,IF_ARCH_sparc((Reg -> Reg -> Instr)
4050 -> CmmExpr -- the one argument
4054 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4056 #if alpha_TARGET_ARCH
4058 trivialCode instr x (StInt y)
4060 = getRegister x `thenNat` \ register ->
4061 getNewRegNat IntRep `thenNat` \ tmp ->
4063 code = registerCode register tmp
4064 src1 = registerName register tmp
4065 src2 = ImmInt (fromInteger y)
4066 code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst)
4068 return (Any IntRep code__2)
4070 trivialCode instr x y
4071 = getRegister x `thenNat` \ register1 ->
4072 getRegister y `thenNat` \ register2 ->
4073 getNewRegNat IntRep `thenNat` \ tmp1 ->
4074 getNewRegNat IntRep `thenNat` \ tmp2 ->
4076 code1 = registerCode register1 tmp1 []
4077 src1 = registerName register1 tmp1
4078 code2 = registerCode register2 tmp2 []
4079 src2 = registerName register2 tmp2
4080 code__2 dst = asmSeqThen [code1, code2] .
4081 mkSeqInstr (instr src1 (RIReg src2) dst)
4083 return (Any IntRep code__2)
4086 trivialUCode instr x
4087 = getRegister x `thenNat` \ register ->
4088 getNewRegNat IntRep `thenNat` \ tmp ->
4090 code = registerCode register tmp
4091 src = registerName register tmp
4092 code__2 dst = code . mkSeqInstr (instr (RIReg src) dst)
4094 return (Any IntRep code__2)
4097 trivialFCode _ instr x y
4098 = getRegister x `thenNat` \ register1 ->
4099 getRegister y `thenNat` \ register2 ->
4100 getNewRegNat F64 `thenNat` \ tmp1 ->
4101 getNewRegNat F64 `thenNat` \ tmp2 ->
4103 code1 = registerCode register1 tmp1
4104 src1 = registerName register1 tmp1
4106 code2 = registerCode register2 tmp2
4107 src2 = registerName register2 tmp2
4109 code__2 dst = asmSeqThen [code1 [], code2 []] .
4110 mkSeqInstr (instr src1 src2 dst)
4112 return (Any F64 code__2)
4114 trivialUFCode _ instr x
4115 = getRegister x `thenNat` \ register ->
4116 getNewRegNat F64 `thenNat` \ tmp ->
4118 code = registerCode register tmp
4119 src = registerName register tmp
4120 code__2 dst = code . mkSeqInstr (instr src dst)
4122 return (Any F64 code__2)
4124 #endif /* alpha_TARGET_ARCH */
4126 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4128 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4131 The Rules of the Game are:
4133 * You cannot assume anything about the destination register dst;
4134 it may be anything, including a fixed reg.
4136 * You may compute an operand into a fixed reg, but you may not
4137 subsequently change the contents of that fixed reg. If you
4138 want to do so, first copy the value either to a temporary
4139 or into dst. You are free to modify dst even if it happens
4140 to be a fixed reg -- that's not your problem.
4142 * You cannot assume that a fixed reg will stay live over an
4143 arbitrary computation. The same applies to the dst reg.
4145 * Temporary regs obtained from getNewRegNat are distinct from
4146 each other and from all other regs, and stay live over
4147 arbitrary computations.
4149 --------------------
4151 SDM's version of The Rules:
4153 * If getRegister returns Any, that means it can generate correct
4154 code which places the result in any register, period. Even if that
4155 register happens to be read during the computation.
4157 Corollary #1: this means that if you are generating code for an
4158 operation with two arbitrary operands, you cannot assign the result
4159 of the first operand into the destination register before computing
4160 the second operand. The second operand might require the old value
4161 of the destination register.
4163 Corollary #2: A function might be able to generate more efficient
4164 code if it knows the destination register is a new temporary (and
4165 therefore not read by any of the sub-computations).
4167 * If getRegister returns Any, then the code it generates may modify only:
4168 (a) fresh temporaries
4169 (b) the destination register
4170 (c) known registers (eg. %ecx is used by shifts)
4171 In particular, it may *not* modify global registers, unless the global
4172 register happens to be the destination register.
4175 trivialCode rep instr (Just revinstr) (CmmLit lit_a) b
4176 | not (is64BitLit lit_a) = do
4177 b_code <- getAnyReg b
4180 = b_code dst `snocOL`
4181 revinstr (OpImm (litToImm lit_a)) (OpReg dst)
4183 return (Any rep code)
4185 trivialCode rep instr maybe_revinstr a b = do
4186 (b_op, b_code) <- getNonClobberedOperand b
4187 a_code <- getAnyReg a
4188 tmp <- getNewRegNat rep
4190 -- We want the value of b to stay alive across the computation of a.
4191 -- But, we want to calculate a straight into the destination register,
4192 -- because the instruction only has two operands (dst := dst `op` src).
4193 -- The troublesome case is when the result of b is in the same register
4194 -- as the destination reg. In this case, we have to save b in a
4195 -- new temporary across the computation of a.
4197 | dst `clashesWith` b_op =
4199 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
4201 instr (OpReg tmp) (OpReg dst)
4205 instr b_op (OpReg dst)
4207 return (Any rep code)
4209 reg `clashesWith` OpReg reg2 = reg == reg2
4210 reg `clashesWith` OpAddr amode = any (==reg) (addrModeRegs amode)
4211 reg `clashesWith` _ = False
4215 trivialUCode rep instr x = do
4216 x_code <- getAnyReg x
4222 return (Any rep code)
4226 #if i386_TARGET_ARCH
4228 trivialFCode pk instr x y = do
4229 (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
4230 (y_reg, y_code) <- getSomeReg y
4235 instr pk x_reg y_reg dst
4237 return (Any pk code)
4241 #if x86_64_TARGET_ARCH
4243 -- We use the 2-operand SSE2 floating pt instructions. ToDo: improve on
4244 -- this by using some of the special cases in trivialCode above.
4245 trivialFCode pk instr x y = do
4246 (y_reg, y_code) <- getNonClobberedReg y -- these work for float regs too
4247 x_code <- getAnyReg x
4252 instr pk (IF_ARCH_x86_64(OpReg,) y_reg)
4253 (IF_ARCH_x86_64(OpReg,) dst)
4255 return (Any pk code)
4261 trivialUFCode rep instr x = do
4262 (x_reg, x_code) <- getSomeReg x
4268 return (Any rep code)
4270 #endif /* i386_TARGET_ARCH */
4272 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4274 #if sparc_TARGET_ARCH
4276 trivialCode instr x (StInt y)
4278 = getRegister x `thenNat` \ register ->
4279 getNewRegNat IntRep `thenNat` \ tmp ->
4281 code = registerCode register tmp
4282 src1 = registerName register tmp
4283 src2 = ImmInt (fromInteger y)
4284 code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
4286 return (Any IntRep code__2)
4288 trivialCode instr x y
4289 = getRegister x `thenNat` \ register1 ->
4290 getRegister y `thenNat` \ register2 ->
4291 getNewRegNat IntRep `thenNat` \ tmp1 ->
4292 getNewRegNat IntRep `thenNat` \ tmp2 ->
4294 code1 = registerCode register1 tmp1
4295 src1 = registerName register1 tmp1
4296 code2 = registerCode register2 tmp2
4297 src2 = registerName register2 tmp2
4298 code__2 dst = code1 `appOL` code2 `snocOL`
4299 instr src1 (RIReg src2) dst
4301 return (Any IntRep code__2)
4304 trivialFCode pk instr x y
4305 = getRegister x `thenNat` \ register1 ->
4306 getRegister y `thenNat` \ register2 ->
4307 getNewRegNat (registerRep register1)
4309 getNewRegNat (registerRep register2)
4311 getNewRegNat F64 `thenNat` \ tmp ->
4313 promote x = FxTOy F DF x tmp
4315 pk1 = registerRep register1
4316 code1 = registerCode register1 tmp1
4317 src1 = registerName register1 tmp1
4319 pk2 = registerRep register2
4320 code2 = registerCode register2 tmp2
4321 src2 = registerName register2 tmp2
4325 code1 `appOL` code2 `snocOL`
4326 instr (primRepToSize pk) src1 src2 dst
4327 else if pk1 == F32 then
4328 code1 `snocOL` promote src1 `appOL` code2 `snocOL`
4329 instr DF tmp src2 dst
4331 code1 `appOL` code2 `snocOL` promote src2 `snocOL`
4332 instr DF src1 tmp dst
4334 return (Any (if pk1 == pk2 then pk1 else F64) code__2)
4337 trivialUCode instr x
4338 = getRegister x `thenNat` \ register ->
4339 getNewRegNat IntRep `thenNat` \ tmp ->
4341 code = registerCode register tmp
4342 src = registerName register tmp
4343 code__2 dst = code `snocOL` instr (RIReg src) dst
4345 return (Any IntRep code__2)
4348 trivialUFCode pk instr x
4349 = getRegister x `thenNat` \ register ->
4350 getNewRegNat pk `thenNat` \ tmp ->
4352 code = registerCode register tmp
4353 src = registerName register tmp
4354 code__2 dst = code `snocOL` instr src dst
4356 return (Any pk code__2)
4358 #endif /* sparc_TARGET_ARCH */
4360 #if powerpc_TARGET_ARCH
4363 Wolfgang's PowerPC version of The Rules:
4365 A slightly modified version of The Rules to take advantage of the fact
4366 that PowerPC instructions work on all registers and don't implicitly
4367 clobber any fixed registers.
4369 * The only expression for which getRegister returns Fixed is (CmmReg reg).
4371 * If getRegister returns Any, then the code it generates may modify only:
4372 (a) fresh temporaries
4373 (b) the destination register
4374 It may *not* modify global registers, unless the global
4375 register happens to be the destination register.
4376 It may not clobber any other registers. In fact, only ccalls clobber any
4378 Also, it may not modify the counter register (used by genCCall).
4380 Corollary: If a getRegister for a subexpression returns Fixed, you need
4381 not move it to a fresh temporary before evaluating the next subexpression.
4382 The Fixed register won't be modified.
4383 Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
4385 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
4386 the value of the destination register.
4389 trivialCode rep signed instr x (CmmLit (CmmInt y _))
4390 | Just imm <- makeImmediate rep signed y
4392 (src1, code1) <- getSomeReg x
4393 let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
4394 return (Any rep code)
4396 trivialCode rep signed instr x y = do
4397 (src1, code1) <- getSomeReg x
4398 (src2, code2) <- getSomeReg y
4399 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
4400 return (Any rep code)
4402 trivialCodeNoImm :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4403 -> CmmExpr -> CmmExpr -> NatM Register
4404 trivialCodeNoImm rep instr x y = do
4405 (src1, code1) <- getSomeReg x
4406 (src2, code2) <- getSomeReg y
4407 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
4408 return (Any rep code)
4410 trivialUCode rep instr x = do
4411 (src, code) <- getSomeReg x
4412 let code' dst = code `snocOL` instr dst src
4413 return (Any rep code')
4415 -- There is no "remainder" instruction on the PPC, so we have to do
4417 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
4419 remainderCode :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4420 -> CmmExpr -> CmmExpr -> NatM Register
4421 remainderCode rep div x y = do
4422 (src1, code1) <- getSomeReg x
4423 (src2, code2) <- getSomeReg y
4424 let code dst = code1 `appOL` code2 `appOL` toOL [
4426 MULLW dst dst (RIReg src2),
4429 return (Any rep code)
4431 #endif /* powerpc_TARGET_ARCH */
4434 -- -----------------------------------------------------------------------------
4435 -- Coercing to/from integer/floating-point...
4437 -- @coerce(Int2FP|FP2Int)@ are more complicated integer/float
4438 -- conversions. We have to store temporaries in memory to move
4439 -- between the integer and the floating point register sets.
4441 -- @coerceDbl2Flt@ and @coerceFlt2Dbl@ are done this way because we
4442 -- pretend, on sparc at least, that double and float regs are seperate
4443 -- kinds, so the value has to be computed into one kind before being
4444 -- explicitly "converted" to live in the other kind.
4446 coerceInt2FP :: MachRep -> MachRep -> CmmExpr -> NatM Register
4447 coerceFP2Int :: MachRep -> MachRep -> CmmExpr -> NatM Register
4449 #if sparc_TARGET_ARCH
4450 coerceDbl2Flt :: CmmExpr -> NatM Register
4451 coerceFlt2Dbl :: CmmExpr -> NatM Register
4454 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4456 #if alpha_TARGET_ARCH
4459 = getRegister x `thenNat` \ register ->
4460 getNewRegNat IntRep `thenNat` \ reg ->
4462 code = registerCode register reg
4463 src = registerName register reg
4465 code__2 dst = code . mkSeqInstrs [
4467 LD TF dst (spRel 0),
4470 return (Any F64 code__2)
4474 = getRegister x `thenNat` \ register ->
4475 getNewRegNat F64 `thenNat` \ tmp ->
4477 code = registerCode register tmp
4478 src = registerName register tmp
4480 code__2 dst = code . mkSeqInstrs [
4482 ST TF tmp (spRel 0),
4485 return (Any IntRep code__2)
4487 #endif /* alpha_TARGET_ARCH */
4489 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4491 #if i386_TARGET_ARCH
4493 coerceInt2FP from to x = do
4494 (x_reg, x_code) <- getSomeReg x
4496 opc = case to of F32 -> GITOF; F64 -> GITOD
4497 code dst = x_code `snocOL` opc x_reg dst
4498 -- ToDo: works for non-I32 reps?
4500 return (Any to code)
4504 coerceFP2Int from to x = do
4505 (x_reg, x_code) <- getSomeReg x
4507 opc = case from of F32 -> GFTOI; F64 -> GDTOI
4508 code dst = x_code `snocOL` opc x_reg dst
4509 -- ToDo: works for non-I32 reps?
4511 return (Any to code)
4513 #endif /* i386_TARGET_ARCH */
4515 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4517 #if x86_64_TARGET_ARCH
4519 coerceFP2Int from to x = do
4520 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
4522 opc = case to of F32 -> CVTSS2SI; F64 -> CVTSD2SI
4523 code dst = x_code `snocOL` opc x_op dst
4525 return (Any to code) -- works even if the destination rep is <I32
4527 coerceInt2FP from to x = do
4528 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
4530 opc = case to of F32 -> CVTSI2SS; F64 -> CVTSI2SD
4531 code dst = x_code `snocOL` opc x_op dst
4533 return (Any to code) -- works even if the destination rep is <I32
4535 coerceFP2FP :: MachRep -> CmmExpr -> NatM Register
4536 coerceFP2FP to x = do
4537 (x_reg, x_code) <- getSomeReg x
4539 opc = case to of F32 -> CVTSD2SS; F64 -> CVTSS2SD
4540 code dst = x_code `snocOL` opc x_reg dst
4542 return (Any to code)
4546 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4548 #if sparc_TARGET_ARCH
4551 = getRegister x `thenNat` \ register ->
4552 getNewRegNat IntRep `thenNat` \ reg ->
4554 code = registerCode register reg
4555 src = registerName register reg
4557 code__2 dst = code `appOL` toOL [
4558 ST W src (spRel (-2)),
4559 LD W (spRel (-2)) dst,
4560 FxTOy W (primRepToSize pk) dst dst]
4562 return (Any pk code__2)
4565 coerceFP2Int fprep x
4566 = ASSERT(fprep == F64 || fprep == F32)
4567 getRegister x `thenNat` \ register ->
4568 getNewRegNat fprep `thenNat` \ reg ->
4569 getNewRegNat F32 `thenNat` \ tmp ->
4571 code = registerCode register reg
4572 src = registerName register reg
4573 code__2 dst = code `appOL` toOL [
4574 FxTOy (primRepToSize fprep) W src tmp,
4575 ST W tmp (spRel (-2)),
4576 LD W (spRel (-2)) dst]
4578 return (Any IntRep code__2)
4582 = getRegister x `thenNat` \ register ->
4583 getNewRegNat F64 `thenNat` \ tmp ->
4584 let code = registerCode register tmp
4585 src = registerName register tmp
4588 (\dst -> code `snocOL` FxTOy DF F src dst))
4592 = getRegister x `thenNat` \ register ->
4593 getNewRegNat F32 `thenNat` \ tmp ->
4594 let code = registerCode register tmp
4595 src = registerName register tmp
4598 (\dst -> code `snocOL` FxTOy F DF src dst))
4600 #endif /* sparc_TARGET_ARCH */
4602 #if powerpc_TARGET_ARCH
4603 coerceInt2FP fromRep toRep x = do
4604 (src, code) <- getSomeReg x
4605 lbl <- getNewLabelNat
4606 itmp <- getNewRegNat I32
4607 ftmp <- getNewRegNat F64
4608 dynRef <- cmmMakeDynamicReference addImportNat False lbl
4609 Amode addr addr_code <- getAmode dynRef
4611 code' dst = code `appOL` maybe_exts `appOL` toOL [
4614 CmmStaticLit (CmmInt 0x43300000 I32),
4615 CmmStaticLit (CmmInt 0x80000000 I32)],
4616 XORIS itmp src (ImmInt 0x8000),
4617 ST I32 itmp (spRel 3),
4618 LIS itmp (ImmInt 0x4330),
4619 ST I32 itmp (spRel 2),
4620 LD F64 ftmp (spRel 2)
4621 ] `appOL` addr_code `appOL` toOL [
4623 FSUB F64 dst ftmp dst
4624 ] `appOL` maybe_frsp dst
4626 maybe_exts = case fromRep of
4627 I8 -> unitOL $ EXTS I8 src src
4628 I16 -> unitOL $ EXTS I16 src src
4630 maybe_frsp dst = case toRep of
4631 F32 -> unitOL $ FRSP dst dst
4633 return (Any toRep code')
4635 coerceFP2Int fromRep toRep x = do
4636 -- the reps don't really matter: F*->F64 and I32->I* are no-ops
4637 (src, code) <- getSomeReg x
4638 tmp <- getNewRegNat F64
4640 code' dst = code `appOL` toOL [
4641 -- convert to int in FP reg
4643 -- store value (64bit) from FP to stack
4644 ST F64 tmp (spRel 2),
4645 -- read low word of value (high word is undefined)
4646 LD I32 dst (spRel 3)]
4647 return (Any toRep code')
4648 #endif /* powerpc_TARGET_ARCH */
4651 -- -----------------------------------------------------------------------------
4652 -- eXTRA_STK_ARGS_HERE
4654 -- We (allegedly) put the first six C-call arguments in registers;
4655 -- where do we start putting the rest of them?
4657 -- Moved from MachInstrs (SDM):
4659 #if alpha_TARGET_ARCH || sparc_TARGET_ARCH
4660 eXTRA_STK_ARGS_HERE :: Int
4662 = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???))