1 -----------------------------------------------------------------------------
3 -- Generating machine code (instruction selection)
5 -- (c) The University of Glasgow 1996-2004
7 -----------------------------------------------------------------------------
9 -- This is a big module, but, if you pay attention to
10 -- (a) the sectioning, (b) the type signatures, and
11 -- (c) the #if blah_TARGET_ARCH} things, the
12 -- structure should not be too overwhelming.
14 module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where
16 #include "HsVersions.h"
17 #include "nativeGen/NCG.h"
24 import PositionIndependentCode ( cmmMakeDynamicReference, initializePicBase )
26 -- Our intermediate code:
27 import PprCmm ( pprExpr )
33 import StaticFlags ( opt_PIC )
34 import ForeignCall ( CCallConv(..) )
39 import FastTypes ( isFastTrue )
40 import Constants ( wORD_SIZE )
43 import Outputable ( assertPanic )
44 import TRACE ( trace )
47 import Control.Monad ( mapAndUnzipM )
48 import Maybe ( fromJust )
52 -- -----------------------------------------------------------------------------
53 -- Top-level of the instruction selector
55 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
56 -- They are really trees of insns to facilitate fast appending, where a
57 -- left-to-right traversal (pre-order?) yields the insns in the correct
60 type InstrBlock = OrdList Instr
62 cmmTopCodeGen :: CmmTop -> NatM [NatCmmTop]
63 cmmTopCodeGen (CmmProc info lab params blocks) = do
64 (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
65 picBaseMb <- getPicBaseMaybeNat
66 let proc = CmmProc info lab params (concat nat_blocks)
67 tops = proc : concat statics
69 Just picBase -> initializePicBase picBase tops
70 Nothing -> return tops
72 cmmTopCodeGen (CmmData sec dat) = do
73 return [CmmData sec dat] -- no translation, we just use CmmStatic
75 basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop])
76 basicBlockCodeGen (BasicBlock id stmts) = do
77 instrs <- stmtsToInstrs stmts
78 -- code generation may introduce new basic block boundaries, which
79 -- are indicated by the NEWBLOCK instruction. We must split up the
80 -- instruction stream into basic blocks again. Also, we extract
83 (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
85 mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
86 = ([], BasicBlock id instrs : blocks, statics)
87 mkBlocks (LDATA sec dat) (instrs,blocks,statics)
88 = (instrs, blocks, CmmData sec dat:statics)
89 mkBlocks instr (instrs,blocks,statics)
90 = (instr:instrs, blocks, statics)
92 return (BasicBlock id top : other_blocks, statics)
94 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
96 = do instrss <- mapM stmtToInstrs stmts
97 return (concatOL instrss)
99 stmtToInstrs :: CmmStmt -> NatM InstrBlock
100 stmtToInstrs stmt = case stmt of
101 CmmNop -> return nilOL
102 CmmComment s -> return (unitOL (COMMENT s))
105 | isFloatingRep kind -> assignReg_FltCode kind reg src
106 #if WORD_SIZE_IN_BITS==32
107 | kind == I64 -> assignReg_I64Code reg src
109 | otherwise -> assignReg_IntCode kind reg src
110 where kind = cmmRegRep reg
113 | isFloatingRep kind -> assignMem_FltCode kind addr src
114 #if WORD_SIZE_IN_BITS==32
115 | kind == I64 -> assignMem_I64Code addr src
117 | otherwise -> assignMem_IntCode kind addr src
118 where kind = cmmExprRep src
120 CmmCall target result_regs args vols
121 -> genCCall target result_regs args vols
123 CmmBranch id -> genBranch id
124 CmmCondBranch arg id -> genCondJump id arg
125 CmmSwitch arg ids -> genSwitch arg ids
126 CmmJump arg params -> genJump arg
128 -- -----------------------------------------------------------------------------
129 -- General things for putting together code sequences
131 -- Expand CmmRegOff. ToDo: should we do it this way around, or convert
132 -- CmmExprs into CmmRegOff?
133 mangleIndexTree :: CmmExpr -> CmmExpr
134 mangleIndexTree (CmmRegOff reg off)
135 = CmmMachOp (MO_Add rep) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) rep)]
136 where rep = cmmRegRep reg
138 -- -----------------------------------------------------------------------------
139 -- Code gen for 64-bit arithmetic on 32-bit platforms
142 Simple support for generating 64-bit code (ie, 64 bit values and 64
143 bit assignments) on 32-bit platforms. Unlike the main code generator
144 we merely shoot for generating working code as simply as possible, and
145 pay little attention to code quality. Specifically, there is no
146 attempt to deal cleverly with the fixed-vs-floating register
147 distinction; all values are generated into (pairs of) floating
148 registers, even if this would mean some redundant reg-reg moves as a
149 result. Only one of the VRegUniques is returned, since it will be
150 of the VRegUniqueLo form, and the upper-half VReg can be determined
151 by applying getHiVRegFromLo to it.
154 data ChildCode64 -- a.k.a "Register64"
157 Reg -- the lower 32-bit temporary which contains the
158 -- result; use getHiVRegFromLo to find the other
159 -- VRegUnique. Rules of this simplified insn
160 -- selection game are therefore that the returned
161 -- Reg may be modified
163 #if WORD_SIZE_IN_BITS==32
164 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
165 assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
168 #ifndef x86_64_TARGET_ARCH
169 iselExpr64 :: CmmExpr -> NatM ChildCode64
172 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
176 assignMem_I64Code addrTree valueTree = do
177 Amode addr addr_code <- getAmode addrTree
178 ChildCode64 vcode rlo <- iselExpr64 valueTree
180 rhi = getHiVRegFromLo rlo
182 -- Little-endian store
183 mov_lo = MOV I32 (OpReg rlo) (OpAddr addr)
184 mov_hi = MOV I32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
186 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
189 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
190 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
192 r_dst_lo = mkVReg u_dst I32
193 r_dst_hi = getHiVRegFromLo r_dst_lo
194 r_src_hi = getHiVRegFromLo r_src_lo
195 mov_lo = MOV I32 (OpReg r_src_lo) (OpReg r_dst_lo)
196 mov_hi = MOV I32 (OpReg r_src_hi) (OpReg r_dst_hi)
199 vcode `snocOL` mov_lo `snocOL` mov_hi
202 assignReg_I64Code lvalue valueTree
203 = panic "assignReg_I64Code(i386): invalid lvalue"
207 iselExpr64 (CmmLit (CmmInt i _)) = do
208 (rlo,rhi) <- getNewRegPairNat I32
210 r = fromIntegral (fromIntegral i :: Word32)
211 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
213 MOV I32 (OpImm (ImmInteger r)) (OpReg rlo),
214 MOV I32 (OpImm (ImmInteger q)) (OpReg rhi)
217 return (ChildCode64 code rlo)
219 iselExpr64 (CmmLoad addrTree I64) = do
220 Amode addr addr_code <- getAmode addrTree
221 (rlo,rhi) <- getNewRegPairNat I32
223 mov_lo = MOV I32 (OpAddr addr) (OpReg rlo)
224 mov_hi = MOV I32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
227 ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
231 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64)))
232 = return (ChildCode64 nilOL (mkVReg vu I32))
234 -- we handle addition, but rather badly
235 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
236 ChildCode64 code1 r1lo <- iselExpr64 e1
237 (rlo,rhi) <- getNewRegPairNat I32
239 r = fromIntegral (fromIntegral i :: Word32)
240 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
241 r1hi = getHiVRegFromLo r1lo
243 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
244 ADD I32 (OpImm (ImmInteger r)) (OpReg rlo),
245 MOV I32 (OpReg r1hi) (OpReg rhi),
246 ADC I32 (OpImm (ImmInteger q)) (OpReg rhi) ]
248 return (ChildCode64 code rlo)
250 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
251 ChildCode64 code1 r1lo <- iselExpr64 e1
252 ChildCode64 code2 r2lo <- iselExpr64 e2
253 (rlo,rhi) <- getNewRegPairNat I32
255 r1hi = getHiVRegFromLo r1lo
256 r2hi = getHiVRegFromLo r2lo
259 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
260 ADD I32 (OpReg r2lo) (OpReg rlo),
261 MOV I32 (OpReg r1hi) (OpReg rhi),
262 ADC I32 (OpReg r2hi) (OpReg rhi) ]
264 return (ChildCode64 code rlo)
267 = pprPanic "iselExpr64(i386)" (ppr expr)
269 #endif /* i386_TARGET_ARCH */
271 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
273 #if sparc_TARGET_ARCH
275 assignMem_I64Code addrTree valueTree
276 = iselExpr64 valueTree `thenNat` \ (ChildCode64 vcode vrlo) ->
277 getRegister addrTree `thenNat` \ register_addr ->
278 getNewRegNat IntRep `thenNat` \ t_addr ->
279 let rlo = VirtualRegI vrlo
280 rhi = getHiVRegFromLo rlo
281 code_addr = registerCode register_addr t_addr
282 reg_addr = registerName register_addr t_addr
284 mov_hi = ST W rhi (AddrRegImm reg_addr (ImmInt 0))
285 mov_lo = ST W rlo (AddrRegImm reg_addr (ImmInt 4))
287 return (vcode `appOL` code_addr `snocOL` mov_hi `snocOL` mov_lo)
290 assignReg_I64Code (StixTemp (StixVReg u_dst pk)) valueTree
291 = iselExpr64 valueTree `thenNat` \ (ChildCode64 vcode vr_src_lo) ->
293 r_dst_lo = mkVReg u_dst IntRep
294 r_src_lo = VirtualRegI vr_src_lo
295 r_dst_hi = getHiVRegFromLo r_dst_lo
296 r_src_hi = getHiVRegFromLo r_src_lo
297 mov_lo = mkMOV r_src_lo r_dst_lo
298 mov_hi = mkMOV r_src_hi r_dst_hi
299 mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
302 vcode `snocOL` mov_hi `snocOL` mov_lo
304 assignReg_I64Code lvalue valueTree
305 = pprPanic "assignReg_I64Code(sparc): invalid lvalue"
309 -- Don't delete this -- it's very handy for debugging.
311 -- | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
312 -- = panic "iselExpr64(???)"
314 iselExpr64 (CmmLoad I64 addrTree)
315 = getRegister addrTree `thenNat` \ register_addr ->
316 getNewRegNat IntRep `thenNat` \ t_addr ->
317 getNewRegNat IntRep `thenNat` \ rlo ->
318 let rhi = getHiVRegFromLo rlo
319 code_addr = registerCode register_addr t_addr
320 reg_addr = registerName register_addr t_addr
321 mov_hi = LD W (AddrRegImm reg_addr (ImmInt 0)) rhi
322 mov_lo = LD W (AddrRegImm reg_addr (ImmInt 4)) rlo
325 ChildCode64 (code_addr `snocOL` mov_hi `snocOL` mov_lo)
329 iselExpr64 (CmmReg (CmmLocal (LocalReg uq I64)))
330 = getNewRegNat IntRep `thenNat` \ r_dst_lo ->
331 let r_dst_hi = getHiVRegFromLo r_dst_lo
332 r_src_lo = mkVReg vu IntRep
333 r_src_hi = getHiVRegFromLo r_src_lo
334 mov_lo = mkMOV r_src_lo r_dst_lo
335 mov_hi = mkMOV r_src_hi r_dst_hi
336 mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
339 ChildCode64 (toOL [mov_hi, mov_lo]) (getVRegUnique r_dst_lo)
342 iselExpr64 (StCall fn cconv I64 args)
343 = genCCall fn cconv kind args `thenNat` \ call ->
344 getNewRegNat IntRep `thenNat` \ r_dst_lo ->
345 let r_dst_hi = getHiVRegFromLo r_dst_lo
346 mov_lo = mkMOV o0 r_dst_lo
347 mov_hi = mkMOV o1 r_dst_hi
348 mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
351 ChildCode64 (call `snocOL` mov_hi `snocOL` mov_lo)
352 (getVRegUnique r_dst_lo)
356 = pprPanic "iselExpr64(sparc)" (pprCmmExpr expr)
358 #endif /* sparc_TARGET_ARCH */
360 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
362 #if powerpc_TARGET_ARCH
364 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
365 getI64Amodes addrTree = do
366 Amode hi_addr addr_code <- getAmode addrTree
367 case addrOffset hi_addr 4 of
368 Just lo_addr -> return (hi_addr, lo_addr, addr_code)
369 Nothing -> do (hi_ptr, code) <- getSomeReg addrTree
370 return (AddrRegImm hi_ptr (ImmInt 0),
371 AddrRegImm hi_ptr (ImmInt 4),
374 assignMem_I64Code addrTree valueTree = do
375 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
376 ChildCode64 vcode rlo <- iselExpr64 valueTree
378 rhi = getHiVRegFromLo rlo
381 mov_hi = ST I32 rhi hi_addr
382 mov_lo = ST I32 rlo lo_addr
384 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
386 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
387 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
389 r_dst_lo = mkVReg u_dst I32
390 r_dst_hi = getHiVRegFromLo r_dst_lo
391 r_src_hi = getHiVRegFromLo r_src_lo
392 mov_lo = MR r_dst_lo r_src_lo
393 mov_hi = MR r_dst_hi r_src_hi
396 vcode `snocOL` mov_lo `snocOL` mov_hi
399 assignReg_I64Code lvalue valueTree
400 = panic "assignReg_I64Code(powerpc): invalid lvalue"
403 -- Don't delete this -- it's very handy for debugging.
405 -- | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
406 -- = panic "iselExpr64(???)"
408 iselExpr64 (CmmLoad addrTree I64) = do
409 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
410 (rlo, rhi) <- getNewRegPairNat I32
411 let mov_hi = LD I32 rhi hi_addr
412 mov_lo = LD I32 rlo lo_addr
413 return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
416 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64)))
417 = return (ChildCode64 nilOL (mkVReg vu I32))
419 iselExpr64 (CmmLit (CmmInt i _)) = do
420 (rlo,rhi) <- getNewRegPairNat I32
422 half0 = fromIntegral (fromIntegral i :: Word16)
423 half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
424 half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
425 half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
428 LIS rlo (ImmInt half1),
429 OR rlo rlo (RIImm $ ImmInt half0),
430 LIS rhi (ImmInt half3),
431 OR rlo rlo (RIImm $ ImmInt half2)
434 return (ChildCode64 code rlo)
436 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
437 ChildCode64 code1 r1lo <- iselExpr64 e1
438 ChildCode64 code2 r2lo <- iselExpr64 e2
439 (rlo,rhi) <- getNewRegPairNat I32
441 r1hi = getHiVRegFromLo r1lo
442 r2hi = getHiVRegFromLo r2lo
445 toOL [ ADDC rlo r1lo r2lo,
448 return (ChildCode64 code rlo)
451 = pprPanic "iselExpr64(powerpc)" (ppr expr)
453 #endif /* powerpc_TARGET_ARCH */
456 -- -----------------------------------------------------------------------------
457 -- The 'Register' type
459 -- 'Register's passed up the tree. If the stix code forces the register
460 -- to live in a pre-decided machine register, it comes out as @Fixed@;
461 -- otherwise, it comes out as @Any@, and the parent can decide which
462 -- register to put it in.
465 = Fixed MachRep Reg InstrBlock
466 | Any MachRep (Reg -> InstrBlock)
468 swizzleRegisterRep :: Register -> MachRep -> Register
469 swizzleRegisterRep (Fixed _ reg code) rep = Fixed rep reg code
470 swizzleRegisterRep (Any _ codefn) rep = Any rep codefn
473 -- -----------------------------------------------------------------------------
474 -- Utils based on getRegister, below
476 -- The dual to getAnyReg: compute an expression into a register, but
477 -- we don't mind which one it is.
478 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
480 r <- getRegister expr
483 tmp <- getNewRegNat rep
484 return (tmp, code tmp)
488 -- -----------------------------------------------------------------------------
489 -- Grab the Reg for a CmmReg
491 getRegisterReg :: CmmReg -> Reg
493 getRegisterReg (CmmLocal (LocalReg u pk))
496 getRegisterReg (CmmGlobal mid)
497 = case get_GlobalReg_reg_or_addr mid of
498 Left (RealReg rrno) -> RealReg rrno
499 _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
500 -- By this stage, the only MagicIds remaining should be the
501 -- ones which map to a real machine register on this
502 -- platform. Hence ...
505 -- -----------------------------------------------------------------------------
506 -- Generate code to get a subtree into a Register
508 -- Don't delete this -- it's very handy for debugging.
510 -- | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False
511 -- = panic "getRegister(???)"
513 getRegister :: CmmExpr -> NatM Register
515 getRegister (CmmReg (CmmGlobal PicBaseReg))
517 reg <- getPicBaseNat wordRep
518 return (Fixed wordRep reg nilOL)
520 getRegister (CmmReg reg)
521 = return (Fixed (cmmRegRep reg) (getRegisterReg reg) nilOL)
523 getRegister tree@(CmmRegOff _ _)
524 = getRegister (mangleIndexTree tree)
526 -- end of machine-"independent" bit; here we go on the rest...
528 #if alpha_TARGET_ARCH
530 getRegister (StDouble d)
531 = getBlockIdNat `thenNat` \ lbl ->
532 getNewRegNat PtrRep `thenNat` \ tmp ->
533 let code dst = mkSeqInstrs [
534 LDATA RoDataSegment lbl [
535 DATA TF [ImmLab (rational d)]
537 LDA tmp (AddrImm (ImmCLbl lbl)),
538 LD TF dst (AddrReg tmp)]
540 return (Any F64 code)
542 getRegister (StPrim primop [x]) -- unary PrimOps
544 IntNegOp -> trivialUCode (NEG Q False) x
546 NotOp -> trivialUCode NOT x
548 FloatNegOp -> trivialUFCode FloatRep (FNEG TF) x
549 DoubleNegOp -> trivialUFCode F64 (FNEG TF) x
551 OrdOp -> coerceIntCode IntRep x
554 Float2IntOp -> coerceFP2Int x
555 Int2FloatOp -> coerceInt2FP pr x
556 Double2IntOp -> coerceFP2Int x
557 Int2DoubleOp -> coerceInt2FP pr x
559 Double2FloatOp -> coerceFltCode x
560 Float2DoubleOp -> coerceFltCode x
562 other_op -> getRegister (StCall fn CCallConv F64 [x])
564 fn = case other_op of
565 FloatExpOp -> FSLIT("exp")
566 FloatLogOp -> FSLIT("log")
567 FloatSqrtOp -> FSLIT("sqrt")
568 FloatSinOp -> FSLIT("sin")
569 FloatCosOp -> FSLIT("cos")
570 FloatTanOp -> FSLIT("tan")
571 FloatAsinOp -> FSLIT("asin")
572 FloatAcosOp -> FSLIT("acos")
573 FloatAtanOp -> FSLIT("atan")
574 FloatSinhOp -> FSLIT("sinh")
575 FloatCoshOp -> FSLIT("cosh")
576 FloatTanhOp -> FSLIT("tanh")
577 DoubleExpOp -> FSLIT("exp")
578 DoubleLogOp -> FSLIT("log")
579 DoubleSqrtOp -> FSLIT("sqrt")
580 DoubleSinOp -> FSLIT("sin")
581 DoubleCosOp -> FSLIT("cos")
582 DoubleTanOp -> FSLIT("tan")
583 DoubleAsinOp -> FSLIT("asin")
584 DoubleAcosOp -> FSLIT("acos")
585 DoubleAtanOp -> FSLIT("atan")
586 DoubleSinhOp -> FSLIT("sinh")
587 DoubleCoshOp -> FSLIT("cosh")
588 DoubleTanhOp -> FSLIT("tanh")
590 pr = panic "MachCode.getRegister: no primrep needed for Alpha"
592 getRegister (StPrim primop [x, y]) -- dyadic PrimOps
594 CharGtOp -> trivialCode (CMP LTT) y x
595 CharGeOp -> trivialCode (CMP LE) y x
596 CharEqOp -> trivialCode (CMP EQQ) x y
597 CharNeOp -> int_NE_code x y
598 CharLtOp -> trivialCode (CMP LTT) x y
599 CharLeOp -> trivialCode (CMP LE) x y
601 IntGtOp -> trivialCode (CMP LTT) y x
602 IntGeOp -> trivialCode (CMP LE) y x
603 IntEqOp -> trivialCode (CMP EQQ) x y
604 IntNeOp -> int_NE_code x y
605 IntLtOp -> trivialCode (CMP LTT) x y
606 IntLeOp -> trivialCode (CMP LE) x y
608 WordGtOp -> trivialCode (CMP ULT) y x
609 WordGeOp -> trivialCode (CMP ULE) x y
610 WordEqOp -> trivialCode (CMP EQQ) x y
611 WordNeOp -> int_NE_code x y
612 WordLtOp -> trivialCode (CMP ULT) x y
613 WordLeOp -> trivialCode (CMP ULE) x y
615 AddrGtOp -> trivialCode (CMP ULT) y x
616 AddrGeOp -> trivialCode (CMP ULE) y x
617 AddrEqOp -> trivialCode (CMP EQQ) x y
618 AddrNeOp -> int_NE_code x y
619 AddrLtOp -> trivialCode (CMP ULT) x y
620 AddrLeOp -> trivialCode (CMP ULE) x y
622 FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
623 FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
624 FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
625 FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
626 FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
627 FloatLeOp -> cmpF_code (FCMP TF LE) NE x y
629 DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
630 DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
631 DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
632 DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
633 DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
634 DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y
636 IntAddOp -> trivialCode (ADD Q False) x y
637 IntSubOp -> trivialCode (SUB Q False) x y
638 IntMulOp -> trivialCode (MUL Q False) x y
639 IntQuotOp -> trivialCode (DIV Q False) x y
640 IntRemOp -> trivialCode (REM Q False) x y
642 WordAddOp -> trivialCode (ADD Q False) x y
643 WordSubOp -> trivialCode (SUB Q False) x y
644 WordMulOp -> trivialCode (MUL Q False) x y
645 WordQuotOp -> trivialCode (DIV Q True) x y
646 WordRemOp -> trivialCode (REM Q True) x y
648 FloatAddOp -> trivialFCode FloatRep (FADD TF) x y
649 FloatSubOp -> trivialFCode FloatRep (FSUB TF) x y
650 FloatMulOp -> trivialFCode FloatRep (FMUL TF) x y
651 FloatDivOp -> trivialFCode FloatRep (FDIV TF) x y
653 DoubleAddOp -> trivialFCode F64 (FADD TF) x y
654 DoubleSubOp -> trivialFCode F64 (FSUB TF) x y
655 DoubleMulOp -> trivialFCode F64 (FMUL TF) x y
656 DoubleDivOp -> trivialFCode F64 (FDIV TF) x y
658 AddrAddOp -> trivialCode (ADD Q False) x y
659 AddrSubOp -> trivialCode (SUB Q False) x y
660 AddrRemOp -> trivialCode (REM Q True) x y
662 AndOp -> trivialCode AND x y
663 OrOp -> trivialCode OR x y
664 XorOp -> trivialCode XOR x y
665 SllOp -> trivialCode SLL x y
666 SrlOp -> trivialCode SRL x y
668 ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
669 ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
670 ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"
672 FloatPowerOp -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
673 DoublePowerOp -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
675 {- ------------------------------------------------------------
676 Some bizarre special code for getting condition codes into
677 registers. Integer non-equality is a test for equality
678 followed by an XOR with 1. (Integer comparisons always set
679 the result register to 0 or 1.) Floating point comparisons of
680 any kind leave the result in a floating point register, so we
681 need to wrangle an integer register out of things.
683 int_NE_code :: StixTree -> StixTree -> NatM Register
686 = trivialCode (CMP EQQ) x y `thenNat` \ register ->
687 getNewRegNat IntRep `thenNat` \ tmp ->
689 code = registerCode register tmp
690 src = registerName register tmp
691 code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
693 return (Any IntRep code__2)
695 {- ------------------------------------------------------------
696 Comments for int_NE_code also apply to cmpF_code
699 :: (Reg -> Reg -> Reg -> Instr)
701 -> StixTree -> StixTree
704 cmpF_code instr cond x y
705 = trivialFCode pr instr x y `thenNat` \ register ->
706 getNewRegNat F64 `thenNat` \ tmp ->
707 getBlockIdNat `thenNat` \ lbl ->
709 code = registerCode register tmp
710 result = registerName register tmp
712 code__2 dst = code . mkSeqInstrs [
713 OR zeroh (RIImm (ImmInt 1)) dst,
714 BF cond result (ImmCLbl lbl),
715 OR zeroh (RIReg zeroh) dst,
718 return (Any IntRep code__2)
720 pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
721 ------------------------------------------------------------
723 getRegister (CmmLoad pk mem)
724 = getAmode mem `thenNat` \ amode ->
726 code = amodeCode amode
727 src = amodeAddr amode
728 size = primRepToSize pk
729 code__2 dst = code . mkSeqInstr (LD size dst src)
731 return (Any pk code__2)
733 getRegister (StInt i)
736 code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
738 return (Any IntRep code)
741 code dst = mkSeqInstr (LDI Q dst src)
743 return (Any IntRep code)
745 src = ImmInt (fromInteger i)
750 code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
752 return (Any PtrRep code)
755 imm__2 = case imm of Just x -> x
757 #endif /* alpha_TARGET_ARCH */
759 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
763 getRegister (CmmLit (CmmFloat f F32)) = do
764 lbl <- getNewLabelNat
765 let code dst = toOL [
768 CmmStaticLit (CmmFloat f F32)],
769 GLD F32 (ImmAddr (ImmCLbl lbl) 0) dst
772 return (Any F32 code)
775 getRegister (CmmLit (CmmFloat d F64))
777 = let code dst = unitOL (GLDZ dst)
778 in return (Any F64 code)
781 = let code dst = unitOL (GLD1 dst)
782 in return (Any F64 code)
785 lbl <- getNewLabelNat
786 let code dst = toOL [
789 CmmStaticLit (CmmFloat d F64)],
790 GLD F64 (ImmAddr (ImmCLbl lbl) 0) dst
793 return (Any F64 code)
795 #endif /* i386_TARGET_ARCH */
797 #if x86_64_TARGET_ARCH
799 getRegister (CmmLit (CmmFloat 0.0 rep)) = do
800 let code dst = unitOL (XOR rep (OpReg dst) (OpReg dst))
801 -- I don't know why there are xorpd, xorps, and pxor instructions.
802 -- They all appear to do the same thing --SDM
803 return (Any rep code)
805 getRegister (CmmLit (CmmFloat f rep)) = do
806 lbl <- getNewLabelNat
807 let code dst = toOL [
810 CmmStaticLit (CmmFloat f rep)],
811 MOV rep (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
814 return (Any rep code)
816 #endif /* x86_64_TARGET_ARCH */
818 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
820 -- catch simple cases of zero- or sign-extended load
821 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad addr _]) = do
822 code <- intLoadCode (MOVZxL I8) addr
823 return (Any I32 code)
825 getRegister (CmmMachOp (MO_S_Conv I8 I32) [CmmLoad addr _]) = do
826 code <- intLoadCode (MOVSxL I8) addr
827 return (Any I32 code)
829 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad addr _]) = do
830 code <- intLoadCode (MOVZxL I16) addr
831 return (Any I32 code)
833 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad addr _]) = do
834 code <- intLoadCode (MOVSxL I16) addr
835 return (Any I32 code)
839 #if x86_64_TARGET_ARCH
841 -- catch simple cases of zero- or sign-extended load
842 getRegister (CmmMachOp (MO_U_Conv I8 I64) [CmmLoad addr _]) = do
843 code <- intLoadCode (MOVZxL I8) addr
844 return (Any I64 code)
846 getRegister (CmmMachOp (MO_S_Conv I8 I64) [CmmLoad addr _]) = do
847 code <- intLoadCode (MOVSxL I8) addr
848 return (Any I64 code)
850 getRegister (CmmMachOp (MO_U_Conv I16 I64) [CmmLoad addr _]) = do
851 code <- intLoadCode (MOVZxL I16) addr
852 return (Any I64 code)
854 getRegister (CmmMachOp (MO_S_Conv I16 I64) [CmmLoad addr _]) = do
855 code <- intLoadCode (MOVSxL I16) addr
856 return (Any I64 code)
858 getRegister (CmmMachOp (MO_U_Conv I32 I64) [CmmLoad addr _]) = do
859 code <- intLoadCode (MOV I32) addr -- 32-bit loads zero-extend
860 return (Any I64 code)
862 getRegister (CmmMachOp (MO_S_Conv I32 I64) [CmmLoad addr _]) = do
863 code <- intLoadCode (MOVSxL I32) addr
864 return (Any I64 code)
868 #if x86_64_TARGET_ARCH
869 getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do
870 x_code <- getAnyReg x
871 lbl <- getNewLabelNat
873 code dst = x_code dst `appOL` toOL [
874 -- This is how gcc does it, so it can't be that bad:
875 LDATA ReadOnlyData16 [
878 CmmStaticLit (CmmInt 0x80000000 I32),
879 CmmStaticLit (CmmInt 0 I32),
880 CmmStaticLit (CmmInt 0 I32),
881 CmmStaticLit (CmmInt 0 I32)
883 XOR F32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
884 -- xorps, so we need the 128-bit constant
885 -- ToDo: rip-relative
888 return (Any F32 code)
890 getRegister (CmmMachOp (MO_S_Neg F64) [x]) = do
891 x_code <- getAnyReg x
892 lbl <- getNewLabelNat
894 -- This is how gcc does it, so it can't be that bad:
895 code dst = x_code dst `appOL` toOL [
896 LDATA ReadOnlyData16 [
899 CmmStaticLit (CmmInt 0x8000000000000000 I64),
900 CmmStaticLit (CmmInt 0 I64)
902 -- gcc puts an unpck here. Wonder if we need it.
903 XOR F64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
904 -- xorpd, so we need the 128-bit constant
907 return (Any F64 code)
910 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
912 getRegister (CmmMachOp mop [x]) -- unary MachOps
915 MO_S_Neg F32 -> trivialUFCode F32 (GNEG F32) x
916 MO_S_Neg F64 -> trivialUFCode F64 (GNEG F64) x
919 MO_S_Neg rep -> trivialUCode rep (NEGI rep) x
920 MO_Not rep -> trivialUCode rep (NOT rep) x
923 -- TODO: these are only nops if the arg is not a fixed register that
924 -- can't be byte-addressed.
925 MO_U_Conv I32 I8 -> conversionNop I32 x
926 MO_S_Conv I32 I8 -> conversionNop I32 x
927 MO_U_Conv I16 I8 -> conversionNop I16 x
928 MO_S_Conv I16 I8 -> conversionNop I16 x
929 MO_U_Conv I32 I16 -> conversionNop I32 x
930 MO_S_Conv I32 I16 -> conversionNop I32 x
931 #if x86_64_TARGET_ARCH
932 MO_U_Conv I64 I32 -> conversionNop I64 x
933 MO_S_Conv I64 I32 -> conversionNop I64 x
934 MO_U_Conv I64 I16 -> conversionNop I64 x
935 MO_S_Conv I64 I16 -> conversionNop I64 x
936 MO_U_Conv I64 I8 -> conversionNop I64 x
937 MO_S_Conv I64 I8 -> conversionNop I64 x
940 MO_U_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
941 MO_S_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
944 MO_U_Conv I8 I32 -> integerExtend I8 I32 MOVZxL x
945 MO_U_Conv I16 I32 -> integerExtend I16 I32 MOVZxL x
946 MO_U_Conv I8 I16 -> integerExtend I8 I16 MOVZxL x
948 MO_S_Conv I8 I32 -> integerExtend I8 I32 MOVSxL x
949 MO_S_Conv I16 I32 -> integerExtend I16 I32 MOVSxL x
950 MO_S_Conv I8 I16 -> integerExtend I8 I16 MOVSxL x
952 #if x86_64_TARGET_ARCH
953 MO_U_Conv I8 I64 -> integerExtend I8 I64 MOVZxL x
954 MO_U_Conv I16 I64 -> integerExtend I16 I64 MOVZxL x
955 MO_U_Conv I32 I64 -> integerExtend I32 I64 MOVZxL x
956 MO_S_Conv I8 I64 -> integerExtend I8 I64 MOVSxL x
957 MO_S_Conv I16 I64 -> integerExtend I16 I64 MOVSxL x
958 MO_S_Conv I32 I64 -> integerExtend I32 I64 MOVSxL x
959 -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
960 -- However, we don't want the register allocator to throw it
961 -- away as an unnecessary reg-to-reg move, so we keep it in
962 -- the form of a movzl and print it as a movl later.
966 MO_S_Conv F32 F64 -> conversionNop F64 x
967 MO_S_Conv F64 F32 -> conversionNop F32 x
969 MO_S_Conv F32 F64 -> coerceFP2FP F64 x
970 MO_S_Conv F64 F32 -> coerceFP2FP F32 x
974 | isFloatingRep from -> coerceFP2Int from to x
975 | isFloatingRep to -> coerceInt2FP from to x
977 other -> pprPanic "getRegister" (pprMachOp mop)
979 -- signed or unsigned extension.
980 integerExtend from to instr expr = do
981 (reg,e_code) <- if from == I8 then getByteReg expr
986 instr from (OpReg reg) (OpReg dst)
989 conversionNop new_rep expr
990 = do e_code <- getRegister expr
991 return (swizzleRegisterRep e_code new_rep)
994 getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
995 = ASSERT2(cmmExprRep x /= I8, pprExpr e)
997 MO_Eq F32 -> condFltReg EQQ x y
998 MO_Ne F32 -> condFltReg NE x y
999 MO_S_Gt F32 -> condFltReg GTT x y
1000 MO_S_Ge F32 -> condFltReg GE x y
1001 MO_S_Lt F32 -> condFltReg LTT x y
1002 MO_S_Le F32 -> condFltReg LE x y
1004 MO_Eq F64 -> condFltReg EQQ x y
1005 MO_Ne F64 -> condFltReg NE x y
1006 MO_S_Gt F64 -> condFltReg GTT x y
1007 MO_S_Ge F64 -> condFltReg GE x y
1008 MO_S_Lt F64 -> condFltReg LTT x y
1009 MO_S_Le F64 -> condFltReg LE x y
1011 MO_Eq rep -> condIntReg EQQ x y
1012 MO_Ne rep -> condIntReg NE x y
1014 MO_S_Gt rep -> condIntReg GTT x y
1015 MO_S_Ge rep -> condIntReg GE x y
1016 MO_S_Lt rep -> condIntReg LTT x y
1017 MO_S_Le rep -> condIntReg LE x y
1019 MO_U_Gt rep -> condIntReg GU x y
1020 MO_U_Ge rep -> condIntReg GEU x y
1021 MO_U_Lt rep -> condIntReg LU x y
1022 MO_U_Le rep -> condIntReg LEU x y
1024 #if i386_TARGET_ARCH
1025 MO_Add F32 -> trivialFCode F32 GADD x y
1026 MO_Sub F32 -> trivialFCode F32 GSUB x y
1028 MO_Add F64 -> trivialFCode F64 GADD x y
1029 MO_Sub F64 -> trivialFCode F64 GSUB x y
1031 MO_S_Quot F32 -> trivialFCode F32 GDIV x y
1032 MO_S_Quot F64 -> trivialFCode F64 GDIV x y
1035 #if x86_64_TARGET_ARCH
1036 MO_Add F32 -> trivialFCode F32 ADD x y
1037 MO_Sub F32 -> trivialFCode F32 SUB x y
1039 MO_Add F64 -> trivialFCode F64 ADD x y
1040 MO_Sub F64 -> trivialFCode F64 SUB x y
1042 MO_S_Quot F32 -> trivialFCode F32 FDIV x y
1043 MO_S_Quot F64 -> trivialFCode F64 FDIV x y
1046 MO_Add rep -> add_code rep x y
1047 MO_Sub rep -> sub_code rep x y
1049 MO_S_Quot rep -> div_code rep True True x y
1050 MO_S_Rem rep -> div_code rep True False x y
1051 MO_U_Quot rep -> div_code rep False True x y
1052 MO_U_Rem rep -> div_code rep False False x y
1054 #if i386_TARGET_ARCH
1055 MO_Mul F32 -> trivialFCode F32 GMUL x y
1056 MO_Mul F64 -> trivialFCode F64 GMUL x y
1059 #if x86_64_TARGET_ARCH
1060 MO_Mul F32 -> trivialFCode F32 MUL x y
1061 MO_Mul F64 -> trivialFCode F64 MUL x y
1064 MO_Mul rep -> let op = IMUL rep in
1065 trivialCode rep op (Just op) x y
1067 MO_S_MulMayOflo rep -> imulMayOflo rep x y
1069 MO_And rep -> let op = AND rep in
1070 trivialCode rep op (Just op) x y
1071 MO_Or rep -> let op = OR rep in
1072 trivialCode rep op (Just op) x y
1073 MO_Xor rep -> let op = XOR rep in
1074 trivialCode rep op (Just op) x y
1076 {- Shift ops on x86s have constraints on their source, it
1077 either has to be Imm, CL or 1
1078 => trivialCode is not restrictive enough (sigh.)
1080 MO_Shl rep -> shift_code rep (SHL rep) x y {-False-}
1081 MO_U_Shr rep -> shift_code rep (SHR rep) x y {-False-}
1082 MO_S_Shr rep -> shift_code rep (SAR rep) x y {-False-}
1084 other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
1086 --------------------
1087 imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1088 imulMayOflo rep a b = do
1089 (a_reg, a_code) <- getNonClobberedReg a
1090 b_code <- getAnyReg b
1092 shift_amt = case rep of
1095 _ -> panic "shift_amt"
1097 code = a_code `appOL` b_code eax `appOL`
1099 IMUL2 rep (OpReg a_reg), -- result in %edx:%eax
1100 SAR rep (OpImm (ImmInt shift_amt)) (OpReg eax),
1101 -- sign extend lower part
1102 SUB rep (OpReg edx) (OpReg eax)
1103 -- compare against upper
1104 -- eax==0 if high part == sign extended low part
1107 return (Fixed rep eax code)
1109 --------------------
1110 shift_code :: MachRep
1111 -> (Operand -> Operand -> Instr)
1116 {- Case1: shift length as immediate -}
1117 shift_code rep instr x y@(CmmLit lit) = do
1118 x_code <- getAnyReg x
1121 = x_code dst `snocOL`
1122 instr (OpImm (litToImm lit)) (OpReg dst)
1124 return (Any rep code)
1126 {- Case2: shift length is complex (non-immediate) -}
1127 shift_code rep instr x y{-amount-} = do
1128 (x_reg, x_code) <- getNonClobberedReg x
1129 y_code <- getAnyReg y
1131 code = x_code `appOL`
1133 instr (OpReg ecx) (OpReg x_reg)
1135 return (Fixed rep x_reg code)
1137 --------------------
1138 add_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1139 add_code rep x (CmmLit (CmmInt y _)) = add_int rep x y
1140 add_code rep x y = trivialCode rep (ADD rep) (Just (ADD rep)) x y
1142 --------------------
1143 sub_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1144 sub_code rep x (CmmLit (CmmInt y _)) = add_int rep x (-y)
1145 sub_code rep x y = trivialCode rep (SUB rep) Nothing x y
1147 -- our three-operand add instruction:
1148 add_int rep x y = do
1149 (x_reg, x_code) <- getSomeReg x
1151 imm = ImmInt (fromInteger y)
1155 (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
1158 return (Any rep code)
1160 ----------------------
1161 div_code rep signed quotient x y = do
1162 (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
1163 x_code <- getAnyReg x
1165 widen | signed = CLTD rep
1166 | otherwise = XOR rep (OpReg edx) (OpReg edx)
1168 instr | signed = IDIV
1171 code = y_code `appOL`
1173 toOL [widen, instr rep y_op]
1175 result | quotient = eax
1179 return (Fixed rep result code)
1182 getRegister (CmmLoad mem pk)
1185 Amode src mem_code <- getAmode mem
1187 code dst = mem_code `snocOL`
1188 IF_ARCH_i386(GLD pk src dst,
1189 MOV pk (OpAddr src) (OpReg dst))
1191 return (Any pk code)
1193 #if i386_TARGET_ARCH
1194 getRegister (CmmLoad mem pk)
1197 code <- intLoadCode (instr pk) mem
1198 return (Any pk code)
1200 instr I8 = MOVZxL pk
1203 -- we always zero-extend 8-bit loads, if we
1204 -- can't think of anything better. This is because
1205 -- we can't guarantee access to an 8-bit variant of every register
1206 -- (esi and edi don't have 8-bit variants), so to make things
1207 -- simpler we do our 8-bit arithmetic with full 32-bit registers.
1210 #if x86_64_TARGET_ARCH
1211 -- Simpler memory load code on x86_64
1212 getRegister (CmmLoad mem pk)
1214 code <- intLoadCode (MOV pk) mem
1215 return (Any pk code)
1218 getRegister (CmmLit (CmmInt 0 rep))
1220 -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
1221 adj_rep = case rep of I64 -> I32; _ -> rep
1222 rep1 = IF_ARCH_i386( rep, adj_rep )
1224 = unitOL (XOR rep1 (OpReg dst) (OpReg dst))
1226 return (Any rep code)
1228 #if x86_64_TARGET_ARCH
1229 -- optimisation for loading small literals on x86_64: take advantage
1230 -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
1231 -- instruction forms are shorter.
1232 getRegister (CmmLit lit)
1233 | I64 <- cmmLitRep lit, not (isBigLit lit)
1236 code dst = unitOL (MOV I32 (OpImm imm) (OpReg dst))
1238 return (Any I64 code)
1240 isBigLit (CmmInt i I64) = i < 0 || i > 0xffffffff
1242 -- note1: not the same as is64BitLit, because that checks for
1243 -- signed literals that fit in 32 bits, but we want unsigned
1245 -- note2: all labels are small, because we're assuming the
1246 -- small memory model (see gcc docs, -mcmodel=small).
1249 getRegister (CmmLit lit)
1253 code dst = unitOL (MOV rep (OpImm imm) (OpReg dst))
1255 return (Any rep code)
1257 getRegister other = pprPanic "getRegister(x86)" (ppr other)
1260 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
1261 -> NatM (Reg -> InstrBlock)
1262 intLoadCode instr mem = do
1263 Amode src mem_code <- getAmode mem
1264 return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
1266 -- Compute an expression into *any* register, adding the appropriate
1267 -- move instruction if necessary.
1268 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
1270 r <- getRegister expr
1273 anyReg :: Register -> NatM (Reg -> InstrBlock)
1274 anyReg (Any _ code) = return code
1275 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
1277 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
1278 -- Fixed registers might not be byte-addressable, so we make sure we've
1279 -- got a temporary, inserting an extra reg copy if necessary.
1280 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
1281 #if x86_64_TARGET_ARCH
1282 getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
1284 getByteReg expr = do
1285 r <- getRegister expr
1288 tmp <- getNewRegNat rep
1289 return (tmp, code tmp)
1291 | isVirtualReg reg -> return (reg,code)
1293 tmp <- getNewRegNat rep
1294 return (tmp, code `snocOL` reg2reg rep reg tmp)
1295 -- ToDo: could optimise slightly by checking for byte-addressable
1296 -- real registers, but that will happen very rarely if at all.
1299 -- Another variant: this time we want the result in a register that cannot
1300 -- be modified by code to evaluate an arbitrary expression.
1301 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1302 getNonClobberedReg expr = do
1303 r <- getRegister expr
1306 tmp <- getNewRegNat rep
1307 return (tmp, code tmp)
1309 -- only free regs can be clobbered
1310 | RealReg rr <- reg, isFastTrue (freeReg rr) -> do
1311 tmp <- getNewRegNat rep
1312 return (tmp, code `snocOL` reg2reg rep reg tmp)
1316 reg2reg :: MachRep -> Reg -> Reg -> Instr
1318 #if i386_TARGET_ARCH
1319 | isFloatingRep rep = GMOV src dst
1321 | otherwise = MOV rep (OpReg src) (OpReg dst)
1323 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1325 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1327 #if sparc_TARGET_ARCH
1329 getRegister (StFloat d)
1330 = getBlockIdNat `thenNat` \ lbl ->
1331 getNewRegNat PtrRep `thenNat` \ tmp ->
1332 let code dst = toOL [
1333 SEGMENT DataSegment,
1335 DATA F [ImmFloat d],
1336 SEGMENT TextSegment,
1337 SETHI (HI (ImmCLbl lbl)) tmp,
1338 LD F (AddrRegImm tmp (LO (ImmCLbl lbl))) dst]
1340 return (Any F32 code)
1342 getRegister (StDouble d)
1343 = getBlockIdNat `thenNat` \ lbl ->
1344 getNewRegNat PtrRep `thenNat` \ tmp ->
1345 let code dst = toOL [
1346 SEGMENT DataSegment,
1348 DATA DF [ImmDouble d],
1349 SEGMENT TextSegment,
1350 SETHI (HI (ImmCLbl lbl)) tmp,
1351 LD DF (AddrRegImm tmp (LO (ImmCLbl lbl))) dst]
1353 return (Any F64 code)
1356 getRegister (CmmMachOp mop [x]) -- unary PrimOps
1358 MO_NatS_Neg -> trivialUCode (SUB False False g0) x
1359 MO_Nat_Not -> trivialUCode (XNOR False g0) x
1360 MO_32U_to_8U -> trivialCode (AND False) x (StInt 255)
1362 MO_F32_Neg -> trivialUFCode F32 (FNEG F) x
1363 MO_F64_Neg -> trivialUFCode F64 (FNEG DF) x
1365 MO_F64_to_Flt -> coerceDbl2Flt x
1366 MO_F32_to_Dbl -> coerceFlt2Dbl x
1368 MO_F32_to_NatS -> coerceFP2Int F32 x
1369 MO_NatS_to_Flt -> coerceInt2FP F32 x
1370 MO_F64_to_NatS -> coerceFP2Int F64 x
1371 MO_NatS_to_Dbl -> coerceInt2FP F64 x
1373 -- Conversions which are a nop on sparc
1374 MO_32U_to_NatS -> conversionNop IntRep x
1375 MO_32S_to_NatS -> conversionNop IntRep x
1376 MO_NatS_to_32U -> conversionNop WordRep x
1377 MO_32U_to_NatU -> conversionNop WordRep x
1379 MO_NatU_to_NatS -> conversionNop IntRep x
1380 MO_NatS_to_NatU -> conversionNop WordRep x
1381 MO_NatP_to_NatU -> conversionNop WordRep x
1382 MO_NatU_to_NatP -> conversionNop PtrRep x
1383 MO_NatS_to_NatP -> conversionNop PtrRep x
1384 MO_NatP_to_NatS -> conversionNop IntRep x
1386 -- sign-extending widenings
1387 MO_8U_to_32U -> integerExtend False 24 x
1388 MO_8U_to_NatU -> integerExtend False 24 x
1389 MO_8S_to_NatS -> integerExtend True 24 x
1390 MO_16U_to_NatU -> integerExtend False 16 x
1391 MO_16S_to_NatS -> integerExtend True 16 x
1394 let fixed_x = if is_float_op -- promote to double
1395 then CmmMachOp MO_F32_to_Dbl [x]
1398 getRegister (StCall (Left fn) CCallConv F64 [fixed_x])
1400 integerExtend signed nBits x
1402 CmmMachOp (if signed then MO_Nat_Sar else MO_Nat_Shr)
1403 [CmmMachOp MO_Nat_Shl [x, StInt nBits], StInt nBits]
1405 conversionNop new_rep expr
1406 = getRegister expr `thenNat` \ e_code ->
1407 return (swizzleRegisterRep e_code new_rep)
1411 MO_F32_Exp -> (True, FSLIT("exp"))
1412 MO_F32_Log -> (True, FSLIT("log"))
1413 MO_F32_Sqrt -> (True, FSLIT("sqrt"))
1415 MO_F32_Sin -> (True, FSLIT("sin"))
1416 MO_F32_Cos -> (True, FSLIT("cos"))
1417 MO_F32_Tan -> (True, FSLIT("tan"))
1419 MO_F32_Asin -> (True, FSLIT("asin"))
1420 MO_F32_Acos -> (True, FSLIT("acos"))
1421 MO_F32_Atan -> (True, FSLIT("atan"))
1423 MO_F32_Sinh -> (True, FSLIT("sinh"))
1424 MO_F32_Cosh -> (True, FSLIT("cosh"))
1425 MO_F32_Tanh -> (True, FSLIT("tanh"))
1427 MO_F64_Exp -> (False, FSLIT("exp"))
1428 MO_F64_Log -> (False, FSLIT("log"))
1429 MO_F64_Sqrt -> (False, FSLIT("sqrt"))
1431 MO_F64_Sin -> (False, FSLIT("sin"))
1432 MO_F64_Cos -> (False, FSLIT("cos"))
1433 MO_F64_Tan -> (False, FSLIT("tan"))
1435 MO_F64_Asin -> (False, FSLIT("asin"))
1436 MO_F64_Acos -> (False, FSLIT("acos"))
1437 MO_F64_Atan -> (False, FSLIT("atan"))
1439 MO_F64_Sinh -> (False, FSLIT("sinh"))
1440 MO_F64_Cosh -> (False, FSLIT("cosh"))
1441 MO_F64_Tanh -> (False, FSLIT("tanh"))
1443 other -> pprPanic "getRegister(sparc) - binary CmmMachOp (2)"
1447 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1449 MO_32U_Gt -> condIntReg GTT x y
1450 MO_32U_Ge -> condIntReg GE x y
1451 MO_32U_Eq -> condIntReg EQQ x y
1452 MO_32U_Ne -> condIntReg NE x y
1453 MO_32U_Lt -> condIntReg LTT x y
1454 MO_32U_Le -> condIntReg LE x y
1456 MO_Nat_Eq -> condIntReg EQQ x y
1457 MO_Nat_Ne -> condIntReg NE x y
1459 MO_NatS_Gt -> condIntReg GTT x y
1460 MO_NatS_Ge -> condIntReg GE x y
1461 MO_NatS_Lt -> condIntReg LTT x y
1462 MO_NatS_Le -> condIntReg LE x y
1464 MO_NatU_Gt -> condIntReg GU x y
1465 MO_NatU_Ge -> condIntReg GEU x y
1466 MO_NatU_Lt -> condIntReg LU x y
1467 MO_NatU_Le -> condIntReg LEU x y
1469 MO_F32_Gt -> condFltReg GTT x y
1470 MO_F32_Ge -> condFltReg GE x y
1471 MO_F32_Eq -> condFltReg EQQ x y
1472 MO_F32_Ne -> condFltReg NE x y
1473 MO_F32_Lt -> condFltReg LTT x y
1474 MO_F32_Le -> condFltReg LE x y
1476 MO_F64_Gt -> condFltReg GTT x y
1477 MO_F64_Ge -> condFltReg GE x y
1478 MO_F64_Eq -> condFltReg EQQ x y
1479 MO_F64_Ne -> condFltReg NE x y
1480 MO_F64_Lt -> condFltReg LTT x y
1481 MO_F64_Le -> condFltReg LE x y
1483 MO_Nat_Add -> trivialCode (ADD False False) x y
1484 MO_Nat_Sub -> trivialCode (SUB False False) x y
1486 MO_NatS_Mul -> trivialCode (SMUL False) x y
1487 MO_NatU_Mul -> trivialCode (UMUL False) x y
1488 MO_NatS_MulMayOflo -> imulMayOflo x y
1490 -- ToDo: teach about V8+ SPARC div instructions
1491 MO_NatS_Quot -> idiv FSLIT(".div") x y
1492 MO_NatS_Rem -> idiv FSLIT(".rem") x y
1493 MO_NatU_Quot -> idiv FSLIT(".udiv") x y
1494 MO_NatU_Rem -> idiv FSLIT(".urem") x y
1496 MO_F32_Add -> trivialFCode F32 FADD x y
1497 MO_F32_Sub -> trivialFCode F32 FSUB x y
1498 MO_F32_Mul -> trivialFCode F32 FMUL x y
1499 MO_F32_Div -> trivialFCode F32 FDIV x y
1501 MO_F64_Add -> trivialFCode F64 FADD x y
1502 MO_F64_Sub -> trivialFCode F64 FSUB x y
1503 MO_F64_Mul -> trivialFCode F64 FMUL x y
1504 MO_F64_Div -> trivialFCode F64 FDIV x y
1506 MO_Nat_And -> trivialCode (AND False) x y
1507 MO_Nat_Or -> trivialCode (OR False) x y
1508 MO_Nat_Xor -> trivialCode (XOR False) x y
1510 MO_Nat_Shl -> trivialCode SLL x y
1511 MO_Nat_Shr -> trivialCode SRL x y
1512 MO_Nat_Sar -> trivialCode SRA x y
1514 MO_F32_Pwr -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1515 [promote x, promote y])
1516 where promote x = CmmMachOp MO_F32_to_Dbl [x]
1517 MO_F64_Pwr -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1520 other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
1522 idiv fn x y = getRegister (StCall (Left fn) CCallConv IntRep [x, y])
1524 --------------------
1525 imulMayOflo :: CmmExpr -> CmmExpr -> NatM Register
1527 = getNewRegNat IntRep `thenNat` \ t1 ->
1528 getNewRegNat IntRep `thenNat` \ t2 ->
1529 getNewRegNat IntRep `thenNat` \ res_lo ->
1530 getNewRegNat IntRep `thenNat` \ res_hi ->
1531 getRegister a1 `thenNat` \ reg1 ->
1532 getRegister a2 `thenNat` \ reg2 ->
1533 let code1 = registerCode reg1 t1
1534 code2 = registerCode reg2 t2
1535 src1 = registerName reg1 t1
1536 src2 = registerName reg2 t2
1537 code dst = code1 `appOL` code2 `appOL`
1539 SMUL False src1 (RIReg src2) res_lo,
1541 SRA res_lo (RIImm (ImmInt 31)) res_lo,
1542 SUB False False res_lo (RIReg res_hi) dst
1545 return (Any IntRep code)
1547 getRegister (CmmLoad pk mem) = do
1548 Amode src code <- getAmode mem
1550 size = primRepToSize pk
1551 code__2 dst = code `snocOL` LD size src dst
1553 return (Any pk code__2)
1555 getRegister (StInt i)
1558 src = ImmInt (fromInteger i)
1559 code dst = unitOL (OR False g0 (RIImm src) dst)
1561 return (Any IntRep code)
1567 SETHI (HI imm__2) dst,
1568 OR False dst (RIImm (LO imm__2)) dst]
1570 return (Any PtrRep code)
1572 = ncgPrimopMoan "getRegister(sparc)" (pprCmmExpr leaf)
1575 imm__2 = case imm of Just x -> x
1577 #endif /* sparc_TARGET_ARCH */
1579 #if powerpc_TARGET_ARCH
1580 getRegister (CmmLoad mem pk)
1583 Amode addr addr_code <- getAmode mem
1584 let code dst = ASSERT((regClass dst == RcDouble) == isFloatingRep pk)
1585 addr_code `snocOL` LD pk dst addr
1586 return (Any pk code)
1588 -- catch simple cases of zero- or sign-extended load
1589 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad mem _]) = do
1590 Amode addr addr_code <- getAmode mem
1591 return (Any I32 (\dst -> addr_code `snocOL` LD I8 dst addr))
1593 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
1595 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad mem _]) = do
1596 Amode addr addr_code <- getAmode mem
1597 return (Any I32 (\dst -> addr_code `snocOL` LD I16 dst addr))
1599 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad mem _]) = do
1600 Amode addr addr_code <- getAmode mem
1601 return (Any I32 (\dst -> addr_code `snocOL` LA I16 dst addr))
1603 getRegister (CmmMachOp mop [x]) -- unary MachOps
1605 MO_Not rep -> trivialUCode rep NOT x
1607 MO_S_Conv F64 F32 -> trivialUCode F32 FRSP x
1608 MO_S_Conv F32 F64 -> conversionNop F64 x
1611 | from == to -> conversionNop to x
1612 | isFloatingRep from -> coerceFP2Int from to x
1613 | isFloatingRep to -> coerceInt2FP from to x
1615 -- narrowing is a nop: we treat the high bits as undefined
1616 MO_S_Conv I32 to -> conversionNop to x
1617 MO_S_Conv I16 I8 -> conversionNop I8 x
1618 MO_S_Conv I8 to -> trivialUCode to (EXTS I8) x
1619 MO_S_Conv I16 to -> trivialUCode to (EXTS I16) x
1622 | from == to -> conversionNop to x
1623 -- narrowing is a nop: we treat the high bits as undefined
1624 MO_U_Conv I32 to -> conversionNop to x
1625 MO_U_Conv I16 I8 -> conversionNop I8 x
1626 MO_U_Conv I8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 I32))
1627 MO_U_Conv I16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 I32))
1629 MO_S_Neg F32 -> trivialUCode F32 FNEG x
1630 MO_S_Neg F64 -> trivialUCode F64 FNEG x
1631 MO_S_Neg rep -> trivialUCode rep NEG x
1634 conversionNop new_rep expr
1635 = do e_code <- getRegister expr
1636 return (swizzleRegisterRep e_code new_rep)
1638 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1640 MO_Eq F32 -> condFltReg EQQ x y
1641 MO_Ne F32 -> condFltReg NE x y
1643 MO_S_Gt F32 -> condFltReg GTT x y
1644 MO_S_Ge F32 -> condFltReg GE x y
1645 MO_S_Lt F32 -> condFltReg LTT x y
1646 MO_S_Le F32 -> condFltReg LE x y
1648 MO_Eq F64 -> condFltReg EQQ x y
1649 MO_Ne F64 -> condFltReg NE x y
1651 MO_S_Gt F64 -> condFltReg GTT x y
1652 MO_S_Ge F64 -> condFltReg GE x y
1653 MO_S_Lt F64 -> condFltReg LTT x y
1654 MO_S_Le F64 -> condFltReg LE x y
1656 MO_Eq rep -> condIntReg EQQ (extendUExpr rep x) (extendUExpr rep y)
1657 MO_Ne rep -> condIntReg NE (extendUExpr rep x) (extendUExpr rep y)
1659 MO_S_Gt rep -> condIntReg GTT (extendSExpr rep x) (extendSExpr rep y)
1660 MO_S_Ge rep -> condIntReg GE (extendSExpr rep x) (extendSExpr rep y)
1661 MO_S_Lt rep -> condIntReg LTT (extendSExpr rep x) (extendSExpr rep y)
1662 MO_S_Le rep -> condIntReg LE (extendSExpr rep x) (extendSExpr rep y)
1664 MO_U_Gt rep -> condIntReg GU (extendUExpr rep x) (extendUExpr rep y)
1665 MO_U_Ge rep -> condIntReg GEU (extendUExpr rep x) (extendUExpr rep y)
1666 MO_U_Lt rep -> condIntReg LU (extendUExpr rep x) (extendUExpr rep y)
1667 MO_U_Le rep -> condIntReg LEU (extendUExpr rep x) (extendUExpr rep y)
1669 MO_Add F32 -> trivialCodeNoImm F32 (FADD F32) x y
1670 MO_Sub F32 -> trivialCodeNoImm F32 (FSUB F32) x y
1671 MO_Mul F32 -> trivialCodeNoImm F32 (FMUL F32) x y
1672 MO_S_Quot F32 -> trivialCodeNoImm F32 (FDIV F32) x y
1674 MO_Add F64 -> trivialCodeNoImm F64 (FADD F64) x y
1675 MO_Sub F64 -> trivialCodeNoImm F64 (FSUB F64) x y
1676 MO_Mul F64 -> trivialCodeNoImm F64 (FMUL F64) x y
1677 MO_S_Quot F64 -> trivialCodeNoImm F64 (FDIV F64) x y
1679 -- optimize addition with 32-bit immediate
1683 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate I32 True (-imm)
1684 -> trivialCode I32 True ADD x (CmmLit $ CmmInt imm immrep)
1687 (src, srcCode) <- getSomeReg x
1688 let imm = litToImm lit
1689 code dst = srcCode `appOL` toOL [
1690 ADDIS dst src (HA imm),
1691 ADD dst dst (RIImm (LO imm))
1693 return (Any I32 code)
1694 _ -> trivialCode I32 True ADD x y
1696 MO_Add rep -> trivialCode rep True ADD x y
1698 case y of -- subfi ('substract from' with immediate) doesn't exist
1699 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
1700 -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
1701 _ -> trivialCodeNoImm rep SUBF y x
1703 MO_Mul rep -> trivialCode rep True MULLW x y
1705 MO_S_MulMayOflo I32 -> trivialCodeNoImm I32 MULLW_MayOflo x y
1707 MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= I32): not implemented"
1708 MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
1710 MO_S_Quot rep -> trivialCodeNoImm rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1711 MO_U_Quot rep -> trivialCodeNoImm rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1713 MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1714 MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1716 MO_And rep -> trivialCode rep False AND x y
1717 MO_Or rep -> trivialCode rep False OR x y
1718 MO_Xor rep -> trivialCode rep False XOR x y
1720 MO_Shl rep -> trivialCode rep False SLW x y
1721 MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
1722 MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
1724 getRegister (CmmLit (CmmInt i rep))
1725 | Just imm <- makeImmediate rep True i
1727 code dst = unitOL (LI dst imm)
1729 return (Any rep code)
1731 getRegister (CmmLit (CmmFloat f frep)) = do
1732 lbl <- getNewLabelNat
1733 dynRef <- cmmMakeDynamicReference addImportNat False lbl
1734 Amode addr addr_code <- getAmode dynRef
1736 LDATA ReadOnlyData [CmmDataLabel lbl,
1737 CmmStaticLit (CmmFloat f frep)]
1738 `consOL` (addr_code `snocOL` LD frep dst addr)
1739 return (Any frep code)
1741 getRegister (CmmLit lit)
1742 = let rep = cmmLitRep lit
1746 OR dst dst (RIImm (LO imm))
1748 in return (Any rep code)
1750 getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
1752 -- extend?Rep: wrap integer expression of type rep
1753 -- in a conversion to I32
1754 extendSExpr I32 x = x
1755 extendSExpr rep x = CmmMachOp (MO_S_Conv rep I32) [x]
1756 extendUExpr I32 x = x
1757 extendUExpr rep x = CmmMachOp (MO_U_Conv rep I32) [x]
1759 #endif /* powerpc_TARGET_ARCH */
1762 -- -----------------------------------------------------------------------------
1763 -- The 'Amode' type: Memory addressing modes passed up the tree.
1765 data Amode = Amode AddrMode InstrBlock
1768 Now, given a tree (the argument to an CmmLoad) that references memory,
1769 produce a suitable addressing mode.
1771 A Rule of the Game (tm) for Amodes: use of the addr bit must
1772 immediately follow use of the code part, since the code part puts
1773 values in registers which the addr then refers to. So you can't put
1774 anything in between, lest it overwrite some of those registers. If
1775 you need to do some other computation between the code part and use of
1776 the addr bit, first store the effective address from the amode in a
1777 temporary, then do the other computation, and then use the temporary:
1781 ... other computation ...
1785 getAmode :: CmmExpr -> NatM Amode
1786 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
1788 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1790 #if alpha_TARGET_ARCH
1792 getAmode (StPrim IntSubOp [x, StInt i])
1793 = getNewRegNat PtrRep `thenNat` \ tmp ->
1794 getRegister x `thenNat` \ register ->
1796 code = registerCode register tmp
1797 reg = registerName register tmp
1798 off = ImmInt (-(fromInteger i))
1800 return (Amode (AddrRegImm reg off) code)
1802 getAmode (StPrim IntAddOp [x, StInt i])
1803 = getNewRegNat PtrRep `thenNat` \ tmp ->
1804 getRegister x `thenNat` \ register ->
1806 code = registerCode register tmp
1807 reg = registerName register tmp
1808 off = ImmInt (fromInteger i)
1810 return (Amode (AddrRegImm reg off) code)
1814 = return (Amode (AddrImm imm__2) id)
1817 imm__2 = case imm of Just x -> x
1820 = getNewRegNat PtrRep `thenNat` \ tmp ->
1821 getRegister other `thenNat` \ register ->
1823 code = registerCode register tmp
1824 reg = registerName register tmp
1826 return (Amode (AddrReg reg) code)
1828 #endif /* alpha_TARGET_ARCH */
1830 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1832 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1834 -- This is all just ridiculous, since it carefully undoes
1835 -- what mangleIndexTree has just done.
1836 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
1837 | not (is64BitLit lit)
1838 -- ASSERT(rep == I32)???
1839 = do (x_reg, x_code) <- getSomeReg x
1840 let off = ImmInt (-(fromInteger i))
1841 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1843 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
1844 | not (is64BitLit lit)
1845 -- ASSERT(rep == I32)???
1846 = do (x_reg, x_code) <- getSomeReg x
1847 let off = ImmInt (fromInteger i)
1848 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1850 -- Turn (lit1 << n + lit2) into (lit2 + lit1 << n) so it will be
1851 -- recognised by the next rule.
1852 getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1854 = getAmode (CmmMachOp (MO_Add rep) [b,a])
1856 getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
1857 [y, CmmLit (CmmInt shift _)]])
1858 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1859 = do (x_reg, x_code) <- getNonClobberedReg x
1860 -- x must be in a temp, because it has to stay live over y_code
1861 -- we could compre x_reg and y_reg and do something better here...
1862 (y_reg, y_code) <- getSomeReg y
1864 code = x_code `appOL` y_code
1865 base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
1866 return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt 0))
1869 getAmode (CmmLit lit) | not (is64BitLit lit)
1870 = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1873 (reg,code) <- getSomeReg expr
1874 return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
1876 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1878 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1880 #if sparc_TARGET_ARCH
1882 getAmode (CmmMachOp MO_Nat_Sub [x, StInt i])
1884 = getNewRegNat PtrRep `thenNat` \ tmp ->
1885 getRegister x `thenNat` \ register ->
1887 code = registerCode register tmp
1888 reg = registerName register tmp
1889 off = ImmInt (-(fromInteger i))
1891 return (Amode (AddrRegImm reg off) code)
1894 getAmode (CmmMachOp MO_Nat_Add [x, StInt i])
1896 = getNewRegNat PtrRep `thenNat` \ tmp ->
1897 getRegister x `thenNat` \ register ->
1899 code = registerCode register tmp
1900 reg = registerName register tmp
1901 off = ImmInt (fromInteger i)
1903 return (Amode (AddrRegImm reg off) code)
1905 getAmode (CmmMachOp MO_Nat_Add [x, y])
1906 = getNewRegNat PtrRep `thenNat` \ tmp1 ->
1907 getNewRegNat IntRep `thenNat` \ tmp2 ->
1908 getRegister x `thenNat` \ register1 ->
1909 getRegister y `thenNat` \ register2 ->
1911 code1 = registerCode register1 tmp1
1912 reg1 = registerName register1 tmp1
1913 code2 = registerCode register2 tmp2
1914 reg2 = registerName register2 tmp2
1915 code__2 = code1 `appOL` code2
1917 return (Amode (AddrRegReg reg1 reg2) code__2)
1921 = getNewRegNat PtrRep `thenNat` \ tmp ->
1923 code = unitOL (SETHI (HI imm__2) tmp)
1925 return (Amode (AddrRegImm tmp (LO imm__2)) code)
1928 imm__2 = case imm of Just x -> x
1931 = getNewRegNat PtrRep `thenNat` \ tmp ->
1932 getRegister other `thenNat` \ register ->
1934 code = registerCode register tmp
1935 reg = registerName register tmp
1938 return (Amode (AddrRegImm reg off) code)
1940 #endif /* sparc_TARGET_ARCH */
1942 #ifdef powerpc_TARGET_ARCH
1943 getAmode (CmmMachOp (MO_Sub I32) [x, CmmLit (CmmInt i _)])
1944 | Just off <- makeImmediate I32 True (-i)
1946 (reg, code) <- getSomeReg x
1947 return (Amode (AddrRegImm reg off) code)
1950 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit (CmmInt i _)])
1951 | Just off <- makeImmediate I32 True i
1953 (reg, code) <- getSomeReg x
1954 return (Amode (AddrRegImm reg off) code)
1956 -- optimize addition with 32-bit immediate
1958 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit lit])
1960 tmp <- getNewRegNat I32
1961 (src, srcCode) <- getSomeReg x
1962 let imm = litToImm lit
1963 code = srcCode `snocOL` ADDIS tmp src (HA imm)
1964 return (Amode (AddrRegImm tmp (LO imm)) code)
1966 getAmode (CmmLit lit)
1968 tmp <- getNewRegNat I32
1969 let imm = litToImm lit
1970 code = unitOL (LIS tmp (HA imm))
1971 return (Amode (AddrRegImm tmp (LO imm)) code)
1973 getAmode (CmmMachOp (MO_Add I32) [x, y])
1975 (regX, codeX) <- getSomeReg x
1976 (regY, codeY) <- getSomeReg y
1977 return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
1981 (reg, code) <- getSomeReg other
1984 return (Amode (AddrRegImm reg off) code)
1985 #endif /* powerpc_TARGET_ARCH */
1987 -- -----------------------------------------------------------------------------
1988 -- getOperand: sometimes any operand will do.
1990 -- getNonClobberedOperand: the value of the operand will remain valid across
1991 -- the computation of an arbitrary expression, unless the expression
1992 -- is computed directly into a register which the operand refers to
1993 -- (see trivialCode where this function is used for an example).
1995 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1997 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1998 #if x86_64_TARGET_ARCH
1999 getNonClobberedOperand (CmmLit lit)
2000 | isSuitableFloatingPointLit lit = do
2001 lbl <- getNewLabelNat
2002 let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
2004 return (OpAddr (ripRel (ImmCLbl lbl)), code)
2006 getNonClobberedOperand (CmmLit lit)
2007 | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) =
2008 return (OpImm (litToImm lit), nilOL)
2009 getNonClobberedOperand (CmmLoad mem pk)
2010 | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2011 Amode src mem_code <- getAmode mem
2013 if (amodeCouldBeClobbered src)
2015 tmp <- getNewRegNat wordRep
2016 return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
2017 unitOL (LEA I32 (OpAddr src) (OpReg tmp)))
2020 return (OpAddr src', save_code `appOL` mem_code)
2021 getNonClobberedOperand e = do
2022 (reg, code) <- getNonClobberedReg e
2023 return (OpReg reg, code)
2025 amodeCouldBeClobbered :: AddrMode -> Bool
2026 amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
2028 regClobbered (RealReg rr) = isFastTrue (freeReg rr)
2029 regClobbered _ = False
2031 -- getOperand: the operand is not required to remain valid across the
2032 -- computation of an arbitrary expression.
2033 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2034 #if x86_64_TARGET_ARCH
2035 getOperand (CmmLit lit)
2036 | isSuitableFloatingPointLit lit = do
2037 lbl <- getNewLabelNat
2038 let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
2040 return (OpAddr (ripRel (ImmCLbl lbl)), code)
2042 getOperand (CmmLit lit)
2043 | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) = do
2044 return (OpImm (litToImm lit), nilOL)
2045 getOperand (CmmLoad mem pk)
2046 | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2047 Amode src mem_code <- getAmode mem
2048 return (OpAddr src, mem_code)
2050 (reg, code) <- getSomeReg e
2051 return (OpReg reg, code)
2053 isOperand :: CmmExpr -> Bool
2054 isOperand (CmmLoad _ _) = True
2055 isOperand (CmmLit lit) = not (is64BitLit lit)
2056 || isSuitableFloatingPointLit lit
2059 -- if we want a floating-point literal as an operand, we can
2060 -- use it directly from memory. However, if the literal is
2061 -- zero, we're better off generating it into a register using
2063 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
2064 isSuitableFloatingPointLit _ = False
2066 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
2067 getRegOrMem (CmmLoad mem pk)
2068 | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2069 Amode src mem_code <- getAmode mem
2070 return (OpAddr src, mem_code)
2072 (reg, code) <- getNonClobberedReg e
2073 return (OpReg reg, code)
2075 #if x86_64_TARGET_ARCH
2076 is64BitLit (CmmInt i I64) = i > 0x7fffffff || i < -0x80000000
2077 -- assume that labels are in the range 0-2^31-1: this assumes the
2078 -- small memory model (see gcc docs, -mcmodel=small).
2080 is64BitLit x = False
2083 -- -----------------------------------------------------------------------------
2084 -- The 'CondCode' type: Condition codes passed up the tree.
2086 data CondCode = CondCode Bool Cond InstrBlock
2088 -- Set up a condition code for a conditional branch.
2090 getCondCode :: CmmExpr -> NatM CondCode
2092 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2094 #if alpha_TARGET_ARCH
2095 getCondCode = panic "MachCode.getCondCode: not on Alphas"
2096 #endif /* alpha_TARGET_ARCH */
2098 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2100 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
2101 -- yes, they really do seem to want exactly the same!
2103 getCondCode (CmmMachOp mop [x, y])
2104 = ASSERT (cmmExprRep x /= I8) -- tmp, not set up to handle 8-bit comparisons
2106 MO_Eq F32 -> condFltCode EQQ x y
2107 MO_Ne F32 -> condFltCode NE x y
2109 MO_S_Gt F32 -> condFltCode GTT x y
2110 MO_S_Ge F32 -> condFltCode GE x y
2111 MO_S_Lt F32 -> condFltCode LTT x y
2112 MO_S_Le F32 -> condFltCode LE x y
2114 MO_Eq F64 -> condFltCode EQQ x y
2115 MO_Ne F64 -> condFltCode NE x y
2117 MO_S_Gt F64 -> condFltCode GTT x y
2118 MO_S_Ge F64 -> condFltCode GE x y
2119 MO_S_Lt F64 -> condFltCode LTT x y
2120 MO_S_Le F64 -> condFltCode LE x y
2122 MO_Eq rep -> condIntCode EQQ x y
2123 MO_Ne rep -> condIntCode NE x y
2125 MO_S_Gt rep -> condIntCode GTT x y
2126 MO_S_Ge rep -> condIntCode GE x y
2127 MO_S_Lt rep -> condIntCode LTT x y
2128 MO_S_Le rep -> condIntCode LE x y
2130 MO_U_Gt rep -> condIntCode GU x y
2131 MO_U_Ge rep -> condIntCode GEU x y
2132 MO_U_Lt rep -> condIntCode LU x y
2133 MO_U_Le rep -> condIntCode LEU x y
2135 other -> pprPanic "getCondCode(x86,sparc)" (pprMachOp mop)
2137 getCondCode other = pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
2139 #elif powerpc_TARGET_ARCH
2141 -- almost the same as everywhere else - but we need to
2142 -- extend small integers to 32 bit first
2144 getCondCode (CmmMachOp mop [x, y])
2146 MO_Eq F32 -> condFltCode EQQ x y
2147 MO_Ne F32 -> condFltCode NE x y
2149 MO_S_Gt F32 -> condFltCode GTT x y
2150 MO_S_Ge F32 -> condFltCode GE x y
2151 MO_S_Lt F32 -> condFltCode LTT x y
2152 MO_S_Le F32 -> condFltCode LE x y
2154 MO_Eq F64 -> condFltCode EQQ x y
2155 MO_Ne F64 -> condFltCode NE x y
2157 MO_S_Gt F64 -> condFltCode GTT x y
2158 MO_S_Ge F64 -> condFltCode GE x y
2159 MO_S_Lt F64 -> condFltCode LTT x y
2160 MO_S_Le F64 -> condFltCode LE x y
2162 MO_Eq rep -> condIntCode EQQ (extendUExpr rep x) (extendUExpr rep y)
2163 MO_Ne rep -> condIntCode NE (extendUExpr rep x) (extendUExpr rep y)
2165 MO_S_Gt rep -> condIntCode GTT (extendSExpr rep x) (extendSExpr rep y)
2166 MO_S_Ge rep -> condIntCode GE (extendSExpr rep x) (extendSExpr rep y)
2167 MO_S_Lt rep -> condIntCode LTT (extendSExpr rep x) (extendSExpr rep y)
2168 MO_S_Le rep -> condIntCode LE (extendSExpr rep x) (extendSExpr rep y)
2170 MO_U_Gt rep -> condIntCode GU (extendUExpr rep x) (extendUExpr rep y)
2171 MO_U_Ge rep -> condIntCode GEU (extendUExpr rep x) (extendUExpr rep y)
2172 MO_U_Lt rep -> condIntCode LU (extendUExpr rep x) (extendUExpr rep y)
2173 MO_U_Le rep -> condIntCode LEU (extendUExpr rep x) (extendUExpr rep y)
2175 other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
2177 getCondCode other = panic "getCondCode(2)(powerpc)"
2183 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
2184 -- passed back up the tree.
2186 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
2188 #if alpha_TARGET_ARCH
2189 condIntCode = panic "MachCode.condIntCode: not on Alphas"
2190 condFltCode = panic "MachCode.condFltCode: not on Alphas"
2191 #endif /* alpha_TARGET_ARCH */
2193 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2194 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2196 -- memory vs immediate
2197 condIntCode cond (CmmLoad x pk) (CmmLit lit) | not (is64BitLit lit) = do
2198 Amode x_addr x_code <- getAmode x
2201 code = x_code `snocOL`
2202 CMP pk (OpImm imm) (OpAddr x_addr)
2204 return (CondCode False cond code)
2207 condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
2208 (x_reg, x_code) <- getSomeReg x
2210 code = x_code `snocOL`
2211 TEST pk (OpReg x_reg) (OpReg x_reg)
2213 return (CondCode False cond code)
2215 -- anything vs operand
2216 condIntCode cond x y | isOperand y = do
2217 (x_reg, x_code) <- getNonClobberedReg x
2218 (y_op, y_code) <- getOperand y
2220 code = x_code `appOL` y_code `snocOL`
2221 CMP (cmmExprRep x) y_op (OpReg x_reg)
2223 return (CondCode False cond code)
2225 -- anything vs anything
2226 condIntCode cond x y = do
2227 (y_reg, y_code) <- getNonClobberedReg y
2228 (x_op, x_code) <- getRegOrMem x
2230 code = y_code `appOL`
2232 CMP (cmmExprRep x) (OpReg y_reg) x_op
2234 return (CondCode False cond code)
2237 #if i386_TARGET_ARCH
2238 condFltCode cond x y
2239 = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
2240 (x_reg, x_code) <- getNonClobberedReg x
2241 (y_reg, y_code) <- getSomeReg y
2243 code = x_code `appOL` y_code `snocOL`
2244 GCMP cond x_reg y_reg
2245 -- The GCMP insn does the test and sets the zero flag if comparable
2246 -- and true. Hence we always supply EQQ as the condition to test.
2247 return (CondCode True EQQ code)
2248 #endif /* i386_TARGET_ARCH */
2250 #if x86_64_TARGET_ARCH
2251 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
2252 -- an operand, but the right must be a reg. We can probably do better
2253 -- than this general case...
2254 condFltCode cond x y = do
2255 (x_reg, x_code) <- getNonClobberedReg x
2256 (y_op, y_code) <- getOperand y
2258 code = x_code `appOL`
2260 CMP (cmmExprRep x) y_op (OpReg x_reg)
2261 -- NB(1): we need to use the unsigned comparison operators on the
2262 -- result of this comparison.
2264 return (CondCode True (condToUnsigned cond) code)
2267 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2269 #if sparc_TARGET_ARCH
2271 condIntCode cond x (StInt y)
2273 = getRegister x `thenNat` \ register ->
2274 getNewRegNat IntRep `thenNat` \ tmp ->
2276 code = registerCode register tmp
2277 src1 = registerName register tmp
2278 src2 = ImmInt (fromInteger y)
2279 code__2 = code `snocOL` SUB False True src1 (RIImm src2) g0
2281 return (CondCode False cond code__2)
2283 condIntCode cond x y
2284 = getRegister x `thenNat` \ register1 ->
2285 getRegister y `thenNat` \ register2 ->
2286 getNewRegNat IntRep `thenNat` \ tmp1 ->
2287 getNewRegNat IntRep `thenNat` \ tmp2 ->
2289 code1 = registerCode register1 tmp1
2290 src1 = registerName register1 tmp1
2291 code2 = registerCode register2 tmp2
2292 src2 = registerName register2 tmp2
2293 code__2 = code1 `appOL` code2 `snocOL`
2294 SUB False True src1 (RIReg src2) g0
2296 return (CondCode False cond code__2)
2299 condFltCode cond x y
2300 = getRegister x `thenNat` \ register1 ->
2301 getRegister y `thenNat` \ register2 ->
2302 getNewRegNat (registerRep register1)
2304 getNewRegNat (registerRep register2)
2306 getNewRegNat F64 `thenNat` \ tmp ->
2308 promote x = FxTOy F DF x tmp
2310 pk1 = registerRep register1
2311 code1 = registerCode register1 tmp1
2312 src1 = registerName register1 tmp1
2314 pk2 = registerRep register2
2315 code2 = registerCode register2 tmp2
2316 src2 = registerName register2 tmp2
2320 code1 `appOL` code2 `snocOL`
2321 FCMP True (primRepToSize pk1) src1 src2
2322 else if pk1 == F32 then
2323 code1 `snocOL` promote src1 `appOL` code2 `snocOL`
2324 FCMP True DF tmp src2
2326 code1 `appOL` code2 `snocOL` promote src2 `snocOL`
2327 FCMP True DF src1 tmp
2329 return (CondCode True cond code__2)
2331 #endif /* sparc_TARGET_ARCH */
2333 #if powerpc_TARGET_ARCH
2334 -- ###FIXME: I16 and I8!
2335 condIntCode cond x (CmmLit (CmmInt y rep))
2336 | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
2338 (src1, code) <- getSomeReg x
2340 code' = code `snocOL`
2341 (if condUnsigned cond then CMPL else CMP) I32 src1 (RIImm src2)
2342 return (CondCode False cond code')
2344 condIntCode cond x y = do
2345 (src1, code1) <- getSomeReg x
2346 (src2, code2) <- getSomeReg y
2348 code' = code1 `appOL` code2 `snocOL`
2349 (if condUnsigned cond then CMPL else CMP) I32 src1 (RIReg src2)
2350 return (CondCode False cond code')
2352 condFltCode cond x y = do
2353 (src1, code1) <- getSomeReg x
2354 (src2, code2) <- getSomeReg y
2356 code' = code1 `appOL` code2 `snocOL` FCMP src1 src2
2357 code'' = case cond of -- twiddle CR to handle unordered case
2358 GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
2359 LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
2362 ltbit = 0 ; eqbit = 2 ; gtbit = 1
2363 return (CondCode True cond code'')
2365 #endif /* powerpc_TARGET_ARCH */
2367 -- -----------------------------------------------------------------------------
2368 -- Generating assignments
2370 -- Assignments are really at the heart of the whole code generation
2371 -- business. Almost all top-level nodes of any real importance are
2372 -- assignments, which correspond to loads, stores, or register
2373 -- transfers. If we're really lucky, some of the register transfers
2374 -- will go away, because we can use the destination register to
2375 -- complete the code generation for the right hand side. This only
2376 -- fails when the right hand side is forced into a fixed register
2377 -- (e.g. the result of a call).
2379 assignMem_IntCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2380 assignReg_IntCode :: MachRep -> CmmReg -> CmmExpr -> NatM InstrBlock
2382 assignMem_FltCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2383 assignReg_FltCode :: MachRep -> CmmReg -> CmmExpr -> NatM InstrBlock
2385 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2387 #if alpha_TARGET_ARCH
2389 assignIntCode pk (CmmLoad dst _) src
2390 = getNewRegNat IntRep `thenNat` \ tmp ->
2391 getAmode dst `thenNat` \ amode ->
2392 getRegister src `thenNat` \ register ->
2394 code1 = amodeCode amode []
2395 dst__2 = amodeAddr amode
2396 code2 = registerCode register tmp []
2397 src__2 = registerName register tmp
2398 sz = primRepToSize pk
2399 code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2403 assignIntCode pk dst src
2404 = getRegister dst `thenNat` \ register1 ->
2405 getRegister src `thenNat` \ register2 ->
2407 dst__2 = registerName register1 zeroh
2408 code = registerCode register2 dst__2
2409 src__2 = registerName register2 dst__2
2410 code__2 = if isFixed register2
2411 then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2)
2416 #endif /* alpha_TARGET_ARCH */
2418 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2420 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2422 -- integer assignment to memory
2423 assignMem_IntCode pk addr src = do
2424 Amode addr code_addr <- getAmode addr
2425 (code_src, op_src) <- get_op_RI src
2427 code = code_src `appOL`
2429 MOV pk op_src (OpAddr addr)
2430 -- NOTE: op_src is stable, so it will still be valid
2431 -- after code_addr. This may involve the introduction
2432 -- of an extra MOV to a temporary register, but we hope
2433 -- the register allocator will get rid of it.
2437 get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand) -- code, operator
2438 get_op_RI (CmmLit lit) | not (is64BitLit lit)
2439 = return (nilOL, OpImm (litToImm lit))
2441 = do (reg,code) <- getNonClobberedReg op
2442 return (code, OpReg reg)
2445 -- Assign; dst is a reg, rhs is mem
2446 assignReg_IntCode pk reg (CmmLoad src _) = do
2447 load_code <- intLoadCode (MOV pk) src
2448 return (load_code (getRegisterReg reg))
2450 -- dst is a reg, but src could be anything
2451 assignReg_IntCode pk reg src = do
2452 code <- getAnyReg src
2453 return (code (getRegisterReg reg))
2455 #endif /* i386_TARGET_ARCH */
2457 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2459 #if sparc_TARGET_ARCH
2461 assignMem_IntCode pk addr src
2462 = getNewRegNat IntRep `thenNat` \ tmp ->
2463 getAmode addr `thenNat` \ amode ->
2464 getRegister src `thenNat` \ register ->
2466 code1 = amodeCode amode
2467 dst__2 = amodeAddr amode
2468 code2 = registerCode register tmp
2469 src__2 = registerName register tmp
2470 sz = primRepToSize pk
2471 code__2 = code1 `appOL` code2 `snocOL` ST sz src__2 dst__2
2475 assignReg_IntCode pk reg src
2476 = getRegister src `thenNat` \ register2 ->
2477 getRegisterReg reg `thenNat` \ register1 ->
2478 getNewRegNat IntRep `thenNat` \ tmp ->
2480 dst__2 = registerName register1 tmp
2481 code = registerCode register2 dst__2
2482 src__2 = registerName register2 dst__2
2483 code__2 = if isFixed register2
2484 then code `snocOL` OR False g0 (RIReg src__2) dst__2
2489 #endif /* sparc_TARGET_ARCH */
2491 #if powerpc_TARGET_ARCH
2493 assignMem_IntCode pk addr src = do
2494 (srcReg, code) <- getSomeReg src
2495 Amode dstAddr addr_code <- getAmode addr
2496 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2498 -- dst is a reg, but src could be anything
2499 assignReg_IntCode pk reg src
2501 r <- getRegister src
2503 Any _ code -> code dst
2504 Fixed _ freg fcode -> fcode `snocOL` MR dst freg
2506 dst = getRegisterReg reg
2508 #endif /* powerpc_TARGET_ARCH */
2511 -- -----------------------------------------------------------------------------
2512 -- Floating-point assignments
2514 #if alpha_TARGET_ARCH
2516 assignFltCode pk (CmmLoad dst _) src
2517 = getNewRegNat pk `thenNat` \ tmp ->
2518 getAmode dst `thenNat` \ amode ->
2519 getRegister src `thenNat` \ register ->
2521 code1 = amodeCode amode []
2522 dst__2 = amodeAddr amode
2523 code2 = registerCode register tmp []
2524 src__2 = registerName register tmp
2525 sz = primRepToSize pk
2526 code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2530 assignFltCode pk dst src
2531 = getRegister dst `thenNat` \ register1 ->
2532 getRegister src `thenNat` \ register2 ->
2534 dst__2 = registerName register1 zeroh
2535 code = registerCode register2 dst__2
2536 src__2 = registerName register2 dst__2
2537 code__2 = if isFixed register2
2538 then code . mkSeqInstr (FMOV src__2 dst__2)
2543 #endif /* alpha_TARGET_ARCH */
2545 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2547 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2549 -- Floating point assignment to memory
2550 assignMem_FltCode pk addr src = do
2551 (src_reg, src_code) <- getNonClobberedReg src
2552 Amode addr addr_code <- getAmode addr
2554 code = src_code `appOL`
2556 IF_ARCH_i386(GST pk src_reg addr,
2557 MOV pk (OpReg src_reg) (OpAddr addr))
2560 -- Floating point assignment to a register/temporary
2561 assignReg_FltCode pk reg src = do
2562 src_code <- getAnyReg src
2563 return (src_code (getRegisterReg reg))
2565 #endif /* i386_TARGET_ARCH */
2567 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2569 #if sparc_TARGET_ARCH
2571 -- Floating point assignment to memory
2572 assignMem_FltCode pk addr src
2573 = getNewRegNat pk `thenNat` \ tmp1 ->
2574 getAmode addr `thenNat` \ amode ->
2575 getRegister src `thenNat` \ register ->
2577 sz = primRepToSize pk
2578 dst__2 = amodeAddr amode
2580 code1 = amodeCode amode
2581 code2 = registerCode register tmp1
2583 src__2 = registerName register tmp1
2584 pk__2 = registerRep register
2585 sz__2 = primRepToSize pk__2
2587 code__2 = code1 `appOL` code2 `appOL`
2589 then unitOL (ST sz src__2 dst__2)
2590 else toOL [FxTOy sz__2 sz src__2 tmp1, ST sz tmp1 dst__2]
2594 -- Floating point assignment to a register/temporary
2595 -- Why is this so bizarrely ugly?
2596 assignReg_FltCode pk reg src
2597 = getRegisterReg reg `thenNat` \ register1 ->
2598 getRegister src `thenNat` \ register2 ->
2600 pk__2 = registerRep register2
2601 sz__2 = primRepToSize pk__2
2603 getNewRegNat pk__2 `thenNat` \ tmp ->
2605 sz = primRepToSize pk
2606 dst__2 = registerName register1 g0 -- must be Fixed
2607 reg__2 = if pk /= pk__2 then tmp else dst__2
2608 code = registerCode register2 reg__2
2609 src__2 = registerName register2 reg__2
2612 code `snocOL` FxTOy sz__2 sz src__2 dst__2
2613 else if isFixed register2 then
2614 code `snocOL` FMOV sz src__2 dst__2
2620 #endif /* sparc_TARGET_ARCH */
2622 #if powerpc_TARGET_ARCH
2625 assignMem_FltCode = assignMem_IntCode
2626 assignReg_FltCode = assignReg_IntCode
2628 #endif /* powerpc_TARGET_ARCH */
2631 -- -----------------------------------------------------------------------------
2632 -- Generating an non-local jump
2634 -- (If applicable) Do not fill the delay slots here; you will confuse the
2635 -- register allocator.
2637 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
2639 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2641 #if alpha_TARGET_ARCH
2643 genJump (CmmLabel lbl)
2644 | isAsmTemp lbl = returnInstr (BR target)
2645 | otherwise = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0]
2647 target = ImmCLbl lbl
2650 = getRegister tree `thenNat` \ register ->
2651 getNewRegNat PtrRep `thenNat` \ tmp ->
2653 dst = registerName register pv
2654 code = registerCode register pv
2655 target = registerName register pv
2657 if isFixed register then
2658 returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0]
2660 return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0))
2662 #endif /* alpha_TARGET_ARCH */
2664 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2666 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2668 genJump (CmmLoad mem pk) = do
2669 Amode target code <- getAmode mem
2670 return (code `snocOL` JMP (OpAddr target))
2672 genJump (CmmLit lit) = do
2673 return (unitOL (JMP (OpImm (litToImm lit))))
2676 (reg,code) <- getSomeReg expr
2677 return (code `snocOL` JMP (OpReg reg))
2679 #endif /* i386_TARGET_ARCH */
2681 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2683 #if sparc_TARGET_ARCH
2685 genJump (CmmLabel lbl)
2686 = return (toOL [CALL (Left target) 0 True, NOP])
2688 target = ImmCLbl lbl
2691 = getRegister tree `thenNat` \ register ->
2692 getNewRegNat PtrRep `thenNat` \ tmp ->
2694 code = registerCode register tmp
2695 target = registerName register tmp
2697 return (code `snocOL` JMP dsts (AddrRegReg target g0) `snocOL` NOP)
2699 #endif /* sparc_TARGET_ARCH */
2701 #if powerpc_TARGET_ARCH
2702 genJump (CmmLit (CmmLabel lbl))
2703 = return (unitOL $ JMP lbl)
2707 (target,code) <- getSomeReg tree
2708 return (code `snocOL` MTCTR target `snocOL` BCTR [])
2709 #endif /* powerpc_TARGET_ARCH */
2712 -- -----------------------------------------------------------------------------
2713 -- Unconditional branches
2715 genBranch :: BlockId -> NatM InstrBlock
2717 #if alpha_TARGET_ARCH
2718 genBranch id = return (unitOL (BR id))
2721 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2722 genBranch id = return (unitOL (JXX ALWAYS id))
2725 #if sparc_TARGET_ARCH
2726 genBranch id = return (toOL [BI ALWAYS False id, NOP])
2729 #if powerpc_TARGET_ARCH
2730 genBranch id = return (unitOL (BCC ALWAYS id))
2734 -- -----------------------------------------------------------------------------
2735 -- Conditional jumps
2738 Conditional jumps are always to local labels, so we can use branch
2739 instructions. We peek at the arguments to decide what kind of
2742 ALPHA: For comparisons with 0, we're laughing, because we can just do
2743 the desired conditional branch.
2745 I386: First, we have to ensure that the condition
2746 codes are set according to the supplied comparison operation.
2748 SPARC: First, we have to ensure that the condition codes are set
2749 according to the supplied comparison operation. We generate slightly
2750 different code for floating point comparisons, because a floating
2751 point operation cannot directly precede a @BF@. We assume the worst
2752 and fill that slot with a @NOP@.
2754 SPARC: Do not fill the delay slots here; you will confuse the register
2760 :: BlockId -- the branch target
2761 -> CmmExpr -- the condition on which to branch
2764 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2766 #if alpha_TARGET_ARCH
2768 genCondJump id (StPrim op [x, StInt 0])
2769 = getRegister x `thenNat` \ register ->
2770 getNewRegNat (registerRep register)
2773 code = registerCode register tmp
2774 value = registerName register tmp
2775 pk = registerRep register
2776 target = ImmCLbl lbl
2778 returnSeq code [BI (cmpOp op) value target]
2780 cmpOp CharGtOp = GTT
2782 cmpOp CharEqOp = EQQ
2784 cmpOp CharLtOp = LTT
2793 cmpOp WordGeOp = ALWAYS
2794 cmpOp WordEqOp = EQQ
2796 cmpOp WordLtOp = NEVER
2797 cmpOp WordLeOp = EQQ
2799 cmpOp AddrGeOp = ALWAYS
2800 cmpOp AddrEqOp = EQQ
2802 cmpOp AddrLtOp = NEVER
2803 cmpOp AddrLeOp = EQQ
2805 genCondJump lbl (StPrim op [x, StDouble 0.0])
2806 = getRegister x `thenNat` \ register ->
2807 getNewRegNat (registerRep register)
2810 code = registerCode register tmp
2811 value = registerName register tmp
2812 pk = registerRep register
2813 target = ImmCLbl lbl
2815 return (code . mkSeqInstr (BF (cmpOp op) value target))
2817 cmpOp FloatGtOp = GTT
2818 cmpOp FloatGeOp = GE
2819 cmpOp FloatEqOp = EQQ
2820 cmpOp FloatNeOp = NE
2821 cmpOp FloatLtOp = LTT
2822 cmpOp FloatLeOp = LE
2823 cmpOp DoubleGtOp = GTT
2824 cmpOp DoubleGeOp = GE
2825 cmpOp DoubleEqOp = EQQ
2826 cmpOp DoubleNeOp = NE
2827 cmpOp DoubleLtOp = LTT
2828 cmpOp DoubleLeOp = LE
2830 genCondJump lbl (StPrim op [x, y])
2832 = trivialFCode pr instr x y `thenNat` \ register ->
2833 getNewRegNat F64 `thenNat` \ tmp ->
2835 code = registerCode register tmp
2836 result = registerName register tmp
2837 target = ImmCLbl lbl
2839 return (code . mkSeqInstr (BF cond result target))
2841 pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
2843 fltCmpOp op = case op of
2857 (instr, cond) = case op of
2858 FloatGtOp -> (FCMP TF LE, EQQ)
2859 FloatGeOp -> (FCMP TF LTT, EQQ)
2860 FloatEqOp -> (FCMP TF EQQ, NE)
2861 FloatNeOp -> (FCMP TF EQQ, EQQ)
2862 FloatLtOp -> (FCMP TF LTT, NE)
2863 FloatLeOp -> (FCMP TF LE, NE)
2864 DoubleGtOp -> (FCMP TF LE, EQQ)
2865 DoubleGeOp -> (FCMP TF LTT, EQQ)
2866 DoubleEqOp -> (FCMP TF EQQ, NE)
2867 DoubleNeOp -> (FCMP TF EQQ, EQQ)
2868 DoubleLtOp -> (FCMP TF LTT, NE)
2869 DoubleLeOp -> (FCMP TF LE, NE)
2871 genCondJump lbl (StPrim op [x, y])
2872 = trivialCode instr x y `thenNat` \ register ->
2873 getNewRegNat IntRep `thenNat` \ tmp ->
2875 code = registerCode register tmp
2876 result = registerName register tmp
2877 target = ImmCLbl lbl
2879 return (code . mkSeqInstr (BI cond result target))
2881 (instr, cond) = case op of
2882 CharGtOp -> (CMP LE, EQQ)
2883 CharGeOp -> (CMP LTT, EQQ)
2884 CharEqOp -> (CMP EQQ, NE)
2885 CharNeOp -> (CMP EQQ, EQQ)
2886 CharLtOp -> (CMP LTT, NE)
2887 CharLeOp -> (CMP LE, NE)
2888 IntGtOp -> (CMP LE, EQQ)
2889 IntGeOp -> (CMP LTT, EQQ)
2890 IntEqOp -> (CMP EQQ, NE)
2891 IntNeOp -> (CMP EQQ, EQQ)
2892 IntLtOp -> (CMP LTT, NE)
2893 IntLeOp -> (CMP LE, NE)
2894 WordGtOp -> (CMP ULE, EQQ)
2895 WordGeOp -> (CMP ULT, EQQ)
2896 WordEqOp -> (CMP EQQ, NE)
2897 WordNeOp -> (CMP EQQ, EQQ)
2898 WordLtOp -> (CMP ULT, NE)
2899 WordLeOp -> (CMP ULE, NE)
2900 AddrGtOp -> (CMP ULE, EQQ)
2901 AddrGeOp -> (CMP ULT, EQQ)
2902 AddrEqOp -> (CMP EQQ, NE)
2903 AddrNeOp -> (CMP EQQ, EQQ)
2904 AddrLtOp -> (CMP ULT, NE)
2905 AddrLeOp -> (CMP ULE, NE)
2907 #endif /* alpha_TARGET_ARCH */
2909 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2911 #if i386_TARGET_ARCH
2913 genCondJump id bool = do
2914 CondCode _ cond code <- getCondCode bool
2915 return (code `snocOL` JXX cond id)
2919 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2921 #if x86_64_TARGET_ARCH
2923 genCondJump id bool = do
2924 CondCode is_float cond cond_code <- getCondCode bool
2927 return (cond_code `snocOL` JXX cond id)
2929 lbl <- getBlockIdNat
2931 -- see comment with condFltReg
2932 let code = case cond of
2938 plain_test = unitOL (
2941 or_unordered = toOL [
2945 and_ordered = toOL [
2951 return (cond_code `appOL` code)
2955 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2957 #if sparc_TARGET_ARCH
2959 genCondJump id bool = do
2960 CondCode is_float cond code <- getCondCode bool
2965 then [NOP, BF cond False id, NOP]
2966 else [BI cond False id, NOP]
2970 #endif /* sparc_TARGET_ARCH */
2973 #if powerpc_TARGET_ARCH
2975 genCondJump id bool = do
2976 CondCode is_float cond code <- getCondCode bool
2977 return (code `snocOL` BCC cond id)
2979 #endif /* powerpc_TARGET_ARCH */
2982 -- -----------------------------------------------------------------------------
2983 -- Generating C calls
2985 -- Now the biggest nightmare---calls. Most of the nastiness is buried in
2986 -- @get_arg@, which moves the arguments to the correct registers/stack
2987 -- locations. Apart from that, the code is easy.
2989 -- (If applicable) Do not fill the delay slots here; you will confuse the
2990 -- register allocator.
2993 :: CmmCallTarget -- function to call
2994 -> [(CmmReg,MachHint)] -- where to put the result
2995 -> [(CmmExpr,MachHint)] -- arguments (of mixed type)
2996 -> Maybe [GlobalReg] -- volatile regs to save
2999 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3001 #if alpha_TARGET_ARCH
3005 genCCall fn cconv result_regs args
3006 = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args
3007 `thenNat` \ ((unused,_), argCode) ->
3009 nRegs = length allArgRegs - length unused
3010 code = asmSeqThen (map ($ []) argCode)
3013 LDA pv (AddrImm (ImmLab (ptext fn))),
3014 JSR ra (AddrReg pv) nRegs,
3015 LDGP gp (AddrReg ra)]
3017 ------------------------
3018 {- Try to get a value into a specific register (or registers) for
3019 a call. The first 6 arguments go into the appropriate
3020 argument register (separate registers for integer and floating
3021 point arguments, but used in lock-step), and the remaining
3022 arguments are dumped to the stack, beginning at 0(sp). Our
3023 first argument is a pair of the list of remaining argument
3024 registers to be assigned for this call and the next stack
3025 offset to use for overflowing arguments. This way,
3026 @get_Arg@ can be applied to all of a call's arguments using
3030 :: ([(Reg,Reg)], Int) -- Argument registers and stack offset (accumulator)
3031 -> StixTree -- Current argument
3032 -> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code
3034 -- We have to use up all of our argument registers first...
3036 get_arg ((iDst,fDst):dsts, offset) arg
3037 = getRegister arg `thenNat` \ register ->
3039 reg = if isFloatingRep pk then fDst else iDst
3040 code = registerCode register reg
3041 src = registerName register reg
3042 pk = registerRep register
3045 if isFloatingRep pk then
3046 ((dsts, offset), if isFixed register then
3047 code . mkSeqInstr (FMOV src fDst)
3050 ((dsts, offset), if isFixed register then
3051 code . mkSeqInstr (OR src (RIReg src) iDst)
3054 -- Once we have run out of argument registers, we move to the
3057 get_arg ([], offset) arg
3058 = getRegister arg `thenNat` \ register ->
3059 getNewRegNat (registerRep register)
3062 code = registerCode register tmp
3063 src = registerName register tmp
3064 pk = registerRep register
3065 sz = primRepToSize pk
3067 return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset)))
3069 #endif /* alpha_TARGET_ARCH */
3071 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3073 #if i386_TARGET_ARCH
3075 -- we only cope with a single result for foreign calls
3076 genCCall (CmmPrim op) [(r,_)] args vols = do
3078 MO_F32_Sqrt -> actuallyInlineFloatOp F32 (GSQRT F32) args
3079 MO_F64_Sqrt -> actuallyInlineFloatOp F64 (GSQRT F64) args
3081 MO_F32_Sin -> actuallyInlineFloatOp F32 (GSIN F32) args
3082 MO_F64_Sin -> actuallyInlineFloatOp F64 (GSIN F64) args
3084 MO_F32_Cos -> actuallyInlineFloatOp F32 (GCOS F32) args
3085 MO_F64_Cos -> actuallyInlineFloatOp F64 (GCOS F64) args
3087 MO_F32_Tan -> actuallyInlineFloatOp F32 (GTAN F32) args
3088 MO_F64_Tan -> actuallyInlineFloatOp F64 (GTAN F64) args
3090 other_op -> outOfLineFloatOp op r args vols
3092 actuallyInlineFloatOp rep instr [(x,_)]
3093 = do res <- trivialUFCode rep instr x
3095 return (any (getRegisterReg r))
3097 genCCall target dest_regs args vols = do
3098 sizes_n_codes <- mapM push_arg (reverse args)
3099 delta <- getDeltaNat
3101 (sizes, push_codes) = unzip sizes_n_codes
3102 tot_arg_size = sum sizes
3104 -- deal with static vs dynamic call targets
3105 (callinsns,cconv) <-
3108 CmmForeignCall (CmmLit (CmmLabel lbl)) conv
3109 -> -- ToDo: stdcall arg sizes
3110 return (unitOL (CALL (Left fn_imm) []), conv)
3111 where fn_imm = ImmCLbl lbl
3112 CmmForeignCall expr conv
3113 -> do (dyn_c, dyn_r, dyn_rep) <- get_op expr
3114 ASSERT(dyn_rep == I32)
3115 return (dyn_c `snocOL` CALL (Right dyn_r) [], conv)
3117 let push_code = concatOL push_codes
3118 call = callinsns `appOL`
3120 -- Deallocate parameters after call for ccall;
3121 -- but not for stdcall (callee does it)
3122 (if cconv == StdCallConv || tot_arg_size==0 then [] else
3123 [ADD I32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
3125 [DELTA (delta + tot_arg_size)]
3128 setDeltaNat (delta + tot_arg_size)
3131 -- assign the results, if necessary
3132 assign_code [] = nilOL
3133 assign_code [(dest,_hint)] =
3135 I64 -> toOL [MOV I32 (OpReg eax) (OpReg r_dest),
3136 MOV I32 (OpReg edx) (OpReg r_dest_hi)]
3137 F32 -> unitOL (GMOV fake0 r_dest)
3138 F64 -> unitOL (GMOV fake0 r_dest)
3139 rep -> unitOL (MOV rep (OpReg eax) (OpReg r_dest))
3141 r_dest_hi = getHiVRegFromLo r_dest
3142 rep = cmmRegRep dest
3143 r_dest = getRegisterReg dest
3144 assign_code many = panic "genCCall.assign_code many"
3146 return (push_code `appOL`
3148 assign_code dest_regs)
3155 push_arg :: (CmmExpr,MachHint){-current argument-}
3156 -> NatM (Int, InstrBlock) -- argsz, code
3158 push_arg (arg,_hint) -- we don't need the hints on x86
3159 | arg_rep == I64 = do
3160 ChildCode64 code r_lo <- iselExpr64 arg
3161 delta <- getDeltaNat
3162 setDeltaNat (delta - 8)
3164 r_hi = getHiVRegFromLo r_lo
3166 return (8, code `appOL`
3167 toOL [PUSH I32 (OpReg r_hi), DELTA (delta - 4),
3168 PUSH I32 (OpReg r_lo), DELTA (delta - 8),
3173 (code, reg, sz) <- get_op arg
3174 delta <- getDeltaNat
3175 let size = arg_size sz
3176 setDeltaNat (delta-size)
3177 if (case sz of F64 -> True; F32 -> True; _ -> False)
3180 toOL [SUB I32 (OpImm (ImmInt size)) (OpReg esp),
3182 GST sz reg (AddrBaseIndex (EABaseReg esp)
3188 PUSH I32 (OpReg reg) `snocOL`
3192 arg_rep = cmmExprRep arg
3195 get_op :: CmmExpr -> NatM (InstrBlock, Reg, MachRep) -- code, reg, size
3197 (reg,code) <- getSomeReg op
3198 return (code, reg, cmmExprRep op)
3200 #endif /* i386_TARGET_ARCH */
3202 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3204 outOfLineFloatOp :: CallishMachOp -> CmmReg -> [(CmmExpr,MachHint)]
3205 -> Maybe [GlobalReg] -> NatM InstrBlock
3206 outOfLineFloatOp mop res args vols
3207 | cmmRegRep res == F64
3208 = stmtToInstrs (CmmCall target [(res,FloatHint)] args vols)
3211 = do uq <- getUniqueNat
3213 tmp = CmmLocal (LocalReg uq F64)
3215 code1 <- stmtToInstrs (CmmCall target [(tmp,FloatHint)] (map promote args) vols)
3216 code2 <- stmtToInstrs (CmmAssign res (demote (CmmReg tmp)))
3217 return (code1 `appOL` code2)
3219 promote (x,hint) = (CmmMachOp (MO_S_Conv F32 F64) [x], hint)
3220 demote x = CmmMachOp (MO_S_Conv F64 F32) [x]
3222 target = CmmForeignCall (CmmLit lbl) CCallConv
3223 lbl = CmmLabel (mkForeignLabel fn Nothing False)
3226 MO_F32_Sqrt -> FSLIT("sqrt")
3227 MO_F32_Sin -> FSLIT("sin")
3228 MO_F32_Cos -> FSLIT("cos")
3229 MO_F32_Tan -> FSLIT("tan")
3230 MO_F32_Exp -> FSLIT("exp")
3231 MO_F32_Log -> FSLIT("log")
3233 MO_F32_Asin -> FSLIT("asin")
3234 MO_F32_Acos -> FSLIT("acos")
3235 MO_F32_Atan -> FSLIT("atan")
3237 MO_F32_Sinh -> FSLIT("sinh")
3238 MO_F32_Cosh -> FSLIT("cosh")
3239 MO_F32_Tanh -> FSLIT("tanh")
3240 MO_F32_Pwr -> FSLIT("pow")
3242 MO_F64_Sqrt -> FSLIT("sqrt")
3243 MO_F64_Sin -> FSLIT("sin")
3244 MO_F64_Cos -> FSLIT("cos")
3245 MO_F64_Tan -> FSLIT("tan")
3246 MO_F64_Exp -> FSLIT("exp")
3247 MO_F64_Log -> FSLIT("log")
3249 MO_F64_Asin -> FSLIT("asin")
3250 MO_F64_Acos -> FSLIT("acos")
3251 MO_F64_Atan -> FSLIT("atan")
3253 MO_F64_Sinh -> FSLIT("sinh")
3254 MO_F64_Cosh -> FSLIT("cosh")
3255 MO_F64_Tanh -> FSLIT("tanh")
3256 MO_F64_Pwr -> FSLIT("pow")
3258 other -> pprPanic "outOfLineFloatOp" (pprCallishMachOp mop)
3260 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
3262 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3264 #if x86_64_TARGET_ARCH
3266 genCCall (CmmPrim op) [(r,_)] args vols =
3267 outOfLineFloatOp op r args vols
3269 genCCall target dest_regs args vols = do
3271 -- load up the register arguments
3272 (stack_args, aregs, fregs, load_args_code)
3273 <- load_args args allArgRegs allFPArgRegs nilOL
3276 fp_regs_used = reverse (drop (length fregs) (reverse allFPArgRegs))
3277 int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
3278 arg_regs = int_regs_used ++ fp_regs_used
3279 -- for annotating the call instruction with
3281 sse_regs = length fp_regs_used
3283 tot_arg_size = arg_size * length stack_args
3285 -- On entry to the called function, %rsp should be aligned
3286 -- on a 16-byte boundary +8 (i.e. the first stack arg after
3287 -- the return address is 16-byte aligned). In STG land
3288 -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
3289 -- need to make sure we push a multiple of 16-bytes of args,
3290 -- plus the return address, to get the correct alignment.
3291 -- Urg, this is hard. We need to feed the delta back into
3292 -- the arg pushing code.
3293 (real_size, adjust_rsp) <-
3294 if tot_arg_size `rem` 16 == 0
3295 then return (tot_arg_size, nilOL)
3296 else do -- we need to adjust...
3297 delta <- getDeltaNat
3298 setDeltaNat (delta-8)
3299 return (tot_arg_size+8, toOL [
3300 SUB I64 (OpImm (ImmInt 8)) (OpReg rsp),
3304 -- push the stack args, right to left
3305 push_code <- push_args (reverse stack_args) nilOL
3306 delta <- getDeltaNat
3308 -- deal with static vs dynamic call targets
3309 (callinsns,cconv) <-
3312 CmmForeignCall (CmmLit (CmmLabel lbl)) conv
3313 -> -- ToDo: stdcall arg sizes
3314 return (unitOL (CALL (Left fn_imm) arg_regs), conv)
3315 where fn_imm = ImmCLbl lbl
3316 CmmForeignCall expr conv
3317 -> do (dyn_r, dyn_c) <- getSomeReg expr
3318 return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
3321 -- The x86_64 ABI requires us to set %al to the number of SSE
3322 -- registers that contain arguments, if the called routine
3323 -- is a varargs function. We don't know whether it's a
3324 -- varargs function or not, so we have to assume it is.
3326 -- It's not safe to omit this assignment, even if the number
3327 -- of SSE regs in use is zero. If %al is larger than 8
3328 -- on entry to a varargs function, seg faults ensue.
3329 assign_eax n = unitOL (MOV I32 (OpImm (ImmInt n)) (OpReg eax))
3331 let call = callinsns `appOL`
3333 -- Deallocate parameters after call for ccall;
3334 -- but not for stdcall (callee does it)
3335 (if cconv == StdCallConv || real_size==0 then [] else
3336 [ADD wordRep (OpImm (ImmInt real_size)) (OpReg esp)])
3338 [DELTA (delta + real_size)]
3341 setDeltaNat (delta + real_size)
3344 -- assign the results, if necessary
3345 assign_code [] = nilOL
3346 assign_code [(dest,_hint)] =
3348 F32 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3349 F64 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3350 rep -> unitOL (MOV rep (OpReg rax) (OpReg r_dest))
3352 rep = cmmRegRep dest
3353 r_dest = getRegisterReg dest
3354 assign_code many = panic "genCCall.assign_code many"
3356 return (load_args_code `appOL`
3359 assign_eax sse_regs `appOL`
3361 assign_code dest_regs)
3364 arg_size = 8 -- always, at the mo
3366 load_args :: [(CmmExpr,MachHint)]
3367 -> [Reg] -- int regs avail for args
3368 -> [Reg] -- FP regs avail for args
3370 -> NatM ([(CmmExpr,MachHint)],[Reg],[Reg],InstrBlock)
3371 load_args args [] [] code = return (args, [], [], code)
3372 -- no more regs to use
3373 load_args [] aregs fregs code = return ([], aregs, fregs, code)
3374 -- no more args to push
3375 load_args ((arg,hint) : rest) aregs fregs code
3376 | isFloatingRep arg_rep =
3380 arg_code <- getAnyReg arg
3381 load_args rest aregs rs (code `appOL` arg_code r)
3386 arg_code <- getAnyReg arg
3387 load_args rest rs fregs (code `appOL` arg_code r)
3389 arg_rep = cmmExprRep arg
3392 (args',ars,frs,code') <- load_args rest aregs fregs code
3393 return ((arg,hint):args', ars, frs, code')
3395 push_args [] code = return code
3396 push_args ((arg,hint):rest) code
3397 | isFloatingRep arg_rep = do
3398 (arg_reg, arg_code) <- getSomeReg arg
3399 delta <- getDeltaNat
3400 setDeltaNat (delta-arg_size)
3401 let code' = code `appOL` toOL [
3402 MOV arg_rep (OpReg arg_reg) (OpAddr (spRel 0)),
3403 SUB wordRep (OpImm (ImmInt arg_size)) (OpReg rsp) ,
3404 DELTA (delta-arg_size)]
3405 push_args rest code'
3408 -- we only ever generate word-sized function arguments. Promotion
3409 -- has already happened: our Int8# type is kept sign-extended
3410 -- in an Int#, for example.
3411 ASSERT(arg_rep == I64) return ()
3412 (arg_op, arg_code) <- getOperand arg
3413 delta <- getDeltaNat
3414 setDeltaNat (delta-arg_size)
3415 let code' = code `appOL` toOL [PUSH I64 arg_op,
3416 DELTA (delta-arg_size)]
3417 push_args rest code'
3419 arg_rep = cmmExprRep arg
3422 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3424 #if sparc_TARGET_ARCH
3426 The SPARC calling convention is an absolute
3427 nightmare. The first 6x32 bits of arguments are mapped into
3428 %o0 through %o5, and the remaining arguments are dumped to the
3429 stack, beginning at [%sp+92]. (Note that %o6 == %sp.)
3431 If we have to put args on the stack, move %o6==%sp down by
3432 the number of words to go on the stack, to ensure there's enough space.
3434 According to Fraser and Hanson's lcc book, page 478, fig 17.2,
3435 16 words above the stack pointer is a word for the address of
3436 a structure return value. I use this as a temporary location
3437 for moving values from float to int regs. Certainly it isn't
3438 safe to put anything in the 16 words starting at %sp, since
3439 this area can get trashed at any time due to window overflows
3440 caused by signal handlers.
3442 A final complication (if the above isn't enough) is that
3443 we can't blithely calculate the arguments one by one into
3444 %o0 .. %o5. Consider the following nested calls:
3448 Naive code moves a into %o0, and (fff b c) into %o1. Unfortunately
3449 the inner call will itself use %o0, which trashes the value put there
3450 in preparation for the outer call. Upshot: we need to calculate the
3451 args into temporary regs, and move those to arg regs or onto the
3452 stack only immediately prior to the call proper. Sigh.
3455 genCCall fn cconv kind args
3456 = mapNat arg_to_int_vregs args `thenNat` \ argcode_and_vregs ->
3458 (argcodes, vregss) = unzip argcode_and_vregs
3459 n_argRegs = length allArgRegs
3460 n_argRegs_used = min (length vregs) n_argRegs
3461 vregs = concat vregss
3463 -- deal with static vs dynamic call targets
3466 -> return (unitOL (CALL (Left fn__2) n_argRegs_used False))
3468 -> arg_to_int_vregs dyn `thenNat` \ (dyn_c, [dyn_r]) ->
3469 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3471 `thenNat` \ callinsns ->
3473 argcode = concatOL argcodes
3474 (move_sp_down, move_sp_up)
3475 = let diff = length vregs - n_argRegs
3476 nn = if odd diff then diff + 1 else diff -- keep 8-byte alignment
3479 else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
3481 = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
3483 return (argcode `appOL`
3484 move_sp_down `appOL`
3485 transfer_code `appOL`
3490 -- function names that begin with '.' are assumed to be special
3491 -- internally generated names like '.mul,' which don't get an
3492 -- underscore prefix
3493 -- ToDo:needed (WDP 96/03) ???
3494 fn_static = unLeft fn
3495 fn__2 = case (headFS fn_static) of
3496 '.' -> ImmLit (ftext fn_static)
3497 _ -> ImmCLbl (mkForeignLabel fn_static False)
3499 -- move args from the integer vregs into which they have been
3500 -- marshalled, into %o0 .. %o5, and the rest onto the stack.
3501 move_final :: [Reg] -> [Reg] -> Int -> [Instr]
3503 move_final [] _ offset -- all args done
3506 move_final (v:vs) [] offset -- out of aregs; move to stack
3507 = ST W v (spRel offset)
3508 : move_final vs [] (offset+1)
3510 move_final (v:vs) (a:az) offset -- move into an arg (%o[0..5]) reg
3511 = OR False g0 (RIReg v) a
3512 : move_final vs az offset
3514 -- generate code to calculate an argument, and move it into one
3515 -- or two integer vregs.
3516 arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
3517 arg_to_int_vregs arg
3518 | is64BitRep (repOfCmmExpr arg)
3519 = iselExpr64 arg `thenNat` \ (ChildCode64 code vr_lo) ->
3520 let r_lo = VirtualRegI vr_lo
3521 r_hi = getHiVRegFromLo r_lo
3522 in return (code, [r_hi, r_lo])
3524 = getRegister arg `thenNat` \ register ->
3525 getNewRegNat (registerRep register) `thenNat` \ tmp ->
3526 let code = registerCode register tmp
3527 src = registerName register tmp
3528 pk = registerRep register
3530 -- the value is in src. Get it into 1 or 2 int vregs.
3533 getNewRegNat WordRep `thenNat` \ v1 ->
3534 getNewRegNat WordRep `thenNat` \ v2 ->
3537 FMOV DF src f0 `snocOL`
3538 ST F f0 (spRel 16) `snocOL`
3539 LD W (spRel 16) v1 `snocOL`
3540 ST F (fPair f0) (spRel 16) `snocOL`
3546 getNewRegNat WordRep `thenNat` \ v1 ->
3549 ST F src (spRel 16) `snocOL`
3555 getNewRegNat WordRep `thenNat` \ v1 ->
3557 code `snocOL` OR False g0 (RIReg src) v1
3561 #endif /* sparc_TARGET_ARCH */
3563 #if powerpc_TARGET_ARCH
3565 #if darwin_TARGET_OS || linux_TARGET_OS
3567 The PowerPC calling convention for Darwin/Mac OS X
3568 is described in Apple's document
3569 "Inside Mac OS X - Mach-O Runtime Architecture".
3571 PowerPC Linux uses the System V Release 4 Calling Convention
3572 for PowerPC. It is described in the
3573 "System V Application Binary Interface PowerPC Processor Supplement".
3575 Both conventions are similar:
3576 Parameters may be passed in general-purpose registers starting at r3, in
3577 floating point registers starting at f1, or on the stack.
3579 But there are substantial differences:
3580 * The number of registers used for parameter passing and the exact set of
3581 nonvolatile registers differs (see MachRegs.lhs).
3582 * On Darwin, stack space is always reserved for parameters, even if they are
3583 passed in registers. The called routine may choose to save parameters from
3584 registers to the corresponding space on the stack.
3585 * On Darwin, a corresponding amount of GPRs is skipped when a floating point
3586 parameter is passed in an FPR.
3587 * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
3588 starting with an odd-numbered GPR. It may skip a GPR to achieve this.
3589 Darwin just treats an I64 like two separate I32s (high word first).
3590 * I64 and F64 arguments are 8-byte aligned on the stack for SysV, but only
3591 4-byte aligned like everything else on Darwin.
3592 * The SysV spec claims that F32 is represented as F64 on the stack. GCC on
3593 PowerPC Linux does not agree, so neither do we.
3595 According to both conventions, The parameter area should be part of the
3596 caller's stack frame, allocated in the caller's prologue code (large enough
3597 to hold the parameter lists for all called routines). The NCG already
3598 uses the stack for register spilling, leaving 64 bytes free at the top.
3599 If we need a larger parameter area than that, we just allocate a new stack
3600 frame just before ccalling.
3603 genCCall target dest_regs argsAndHints vols
3604 = ASSERT (not $ any (`elem` [I8,I16]) argReps)
3605 -- we rely on argument promotion in the codeGen
3607 (finalStack,passArgumentsCode,usedRegs) <- passArguments
3609 allArgRegs allFPArgRegs
3613 (labelOrExpr, reduceToF32) <- case target of
3614 CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
3615 CmmForeignCall expr conv -> return (Right expr, False)
3616 CmmPrim mop -> outOfLineFloatOp mop
3618 let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
3619 codeAfter = move_sp_up finalStack `appOL` moveResult reduceToF32
3624 `snocOL` BL lbl usedRegs
3627 (dynReg, dynCode) <- getSomeReg dyn
3629 `snocOL` MTCTR dynReg
3631 `snocOL` BCTRL usedRegs
3634 #if darwin_TARGET_OS
3635 initialStackOffset = 24
3636 -- size of linkage area + size of arguments, in bytes
3637 stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
3638 map machRepByteWidth argReps
3639 #elif linux_TARGET_OS
3640 initialStackOffset = 8
3641 stackDelta finalStack = roundTo 16 finalStack
3643 args = map fst argsAndHints
3644 argReps = map cmmExprRep args
3646 roundTo a x | x `mod` a == 0 = x
3647 | otherwise = x + a - (x `mod` a)
3649 move_sp_down finalStack
3651 toOL [STU I32 sp (AddrRegImm sp (ImmInt (-delta))),
3654 where delta = stackDelta finalStack
3655 move_sp_up finalStack
3657 toOL [ADD sp sp (RIImm (ImmInt delta)),
3660 where delta = stackDelta finalStack
3663 passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
3664 passArguments ((arg,I64):args) gprs fprs stackOffset
3665 accumCode accumUsed =
3667 ChildCode64 code vr_lo <- iselExpr64 arg
3668 let vr_hi = getHiVRegFromLo vr_lo
3670 #if darwin_TARGET_OS
3675 (accumCode `appOL` code
3676 `snocOL` storeWord vr_hi gprs stackOffset
3677 `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
3678 ((take 2 gprs) ++ accumUsed)
3680 storeWord vr (gpr:_) offset = MR gpr vr
3681 storeWord vr [] offset = ST I32 vr (AddrRegImm sp (ImmInt offset))
3683 #elif linux_TARGET_OS
3684 let stackOffset' = roundTo 8 stackOffset
3685 stackCode = accumCode `appOL` code
3686 `snocOL` ST I32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
3687 `snocOL` ST I32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
3688 regCode hireg loreg =
3689 accumCode `appOL` code
3690 `snocOL` MR hireg vr_hi
3691 `snocOL` MR loreg vr_lo
3694 hireg : loreg : regs | even (length gprs) ->
3695 passArguments args regs fprs stackOffset
3696 (regCode hireg loreg) (hireg : loreg : accumUsed)
3697 _skipped : hireg : loreg : regs ->
3698 passArguments args regs fprs stackOffset
3699 (regCode hireg loreg) (hireg : loreg : accumUsed)
3700 _ -> -- only one or no regs left
3701 passArguments args [] fprs (stackOffset'+8)
3705 passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
3706 | reg : _ <- regs = do
3707 register <- getRegister arg
3708 let code = case register of
3709 Fixed _ freg fcode -> fcode `snocOL` MR reg freg
3710 Any _ acode -> acode reg
3714 #if darwin_TARGET_OS
3715 -- The Darwin ABI requires that we reserve stack slots for register parameters
3716 (stackOffset + stackBytes)
3717 #elif linux_TARGET_OS
3718 -- ... the SysV ABI doesn't.
3721 (accumCode `appOL` code)
3724 (vr, code) <- getSomeReg arg
3728 (stackOffset' + stackBytes)
3729 (accumCode `appOL` code `snocOL` ST rep vr stackSlot)
3732 #if darwin_TARGET_OS
3733 -- stackOffset is at least 4-byte aligned
3734 -- The Darwin ABI is happy with that.
3735 stackOffset' = stackOffset
3737 -- ... the SysV ABI requires 8-byte alignment for doubles.
3738 stackOffset' | rep == F64 = roundTo 8 stackOffset
3739 | otherwise = stackOffset
3741 stackSlot = AddrRegImm sp (ImmInt stackOffset')
3742 (nGprs, nFprs, stackBytes, regs) = case rep of
3743 I32 -> (1, 0, 4, gprs)
3744 #if darwin_TARGET_OS
3745 -- The Darwin ABI requires that we skip a corresponding number of GPRs when
3747 F32 -> (1, 1, 4, fprs)
3748 F64 -> (2, 1, 8, fprs)
3749 #elif linux_TARGET_OS
3750 -- ... the SysV ABI doesn't.
3751 F32 -> (0, 1, 4, fprs)
3752 F64 -> (0, 1, 8, fprs)
3755 moveResult reduceToF32 =
3759 | reduceToF32 && rep == F32 -> unitOL (FRSP r_dest f1)
3760 | rep == F32 || rep == F64 -> unitOL (MR r_dest f1)
3761 | rep == I64 -> toOL [MR (getHiVRegFromLo r_dest) r3,
3763 | otherwise -> unitOL (MR r_dest r3)
3764 where rep = cmmRegRep dest
3765 r_dest = getRegisterReg dest
3767 outOfLineFloatOp mop =
3769 mopExpr <- cmmMakeDynamicReference addImportNat True $
3770 mkForeignLabel functionName Nothing True
3771 let mopLabelOrExpr = case mopExpr of
3772 CmmLit (CmmLabel lbl) -> Left lbl
3774 return (mopLabelOrExpr, reduce)
3776 (functionName, reduce) = case mop of
3777 MO_F32_Exp -> (FSLIT("exp"), True)
3778 MO_F32_Log -> (FSLIT("log"), True)
3779 MO_F32_Sqrt -> (FSLIT("sqrt"), True)
3781 MO_F32_Sin -> (FSLIT("sin"), True)
3782 MO_F32_Cos -> (FSLIT("cos"), True)
3783 MO_F32_Tan -> (FSLIT("tan"), True)
3785 MO_F32_Asin -> (FSLIT("asin"), True)
3786 MO_F32_Acos -> (FSLIT("acos"), True)
3787 MO_F32_Atan -> (FSLIT("atan"), True)
3789 MO_F32_Sinh -> (FSLIT("sinh"), True)
3790 MO_F32_Cosh -> (FSLIT("cosh"), True)
3791 MO_F32_Tanh -> (FSLIT("tanh"), True)
3792 MO_F32_Pwr -> (FSLIT("pow"), True)
3794 MO_F64_Exp -> (FSLIT("exp"), False)
3795 MO_F64_Log -> (FSLIT("log"), False)
3796 MO_F64_Sqrt -> (FSLIT("sqrt"), False)
3798 MO_F64_Sin -> (FSLIT("sin"), False)
3799 MO_F64_Cos -> (FSLIT("cos"), False)
3800 MO_F64_Tan -> (FSLIT("tan"), False)
3802 MO_F64_Asin -> (FSLIT("asin"), False)
3803 MO_F64_Acos -> (FSLIT("acos"), False)
3804 MO_F64_Atan -> (FSLIT("atan"), False)
3806 MO_F64_Sinh -> (FSLIT("sinh"), False)
3807 MO_F64_Cosh -> (FSLIT("cosh"), False)
3808 MO_F64_Tanh -> (FSLIT("tanh"), False)
3809 MO_F64_Pwr -> (FSLIT("pow"), False)
3810 other -> pprPanic "genCCall(ppc): unknown callish op"
3811 (pprCallishMachOp other)
3813 #endif /* darwin_TARGET_OS || linux_TARGET_OS */
3815 #endif /* powerpc_TARGET_ARCH */
3818 -- -----------------------------------------------------------------------------
3819 -- Generating a table-branch
3821 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
3823 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3824 genSwitch expr ids = do
3825 (reg,e_code) <- getSomeReg expr
3826 lbl <- getNewLabelNat
3828 jumpTable = map jumpTableEntry ids
3829 op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
3830 code = e_code `appOL` toOL [
3831 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3832 JMP_TBL op [ id | Just id <- ids ]
3836 #elif powerpc_TARGET_ARCH
3840 (reg,e_code) <- getSomeReg expr
3841 tmp <- getNewRegNat I32
3842 lbl <- getNewLabelNat
3843 dynRef <- cmmMakeDynamicReference addImportNat False lbl
3844 (tableReg,t_code) <- getSomeReg $ dynRef
3846 jumpTable = map jumpTableEntryRel ids
3848 jumpTableEntryRel Nothing
3849 = CmmStaticLit (CmmInt 0 wordRep)
3850 jumpTableEntryRel (Just (BlockId id))
3851 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3852 where blockLabel = mkAsmTempLabel id
3854 code = e_code `appOL` t_code `appOL` toOL [
3855 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3856 SLW tmp reg (RIImm (ImmInt 2)),
3857 LD I32 tmp (AddrRegReg tableReg tmp),
3858 ADD tmp tmp (RIReg tableReg),
3860 BCTR [ id | Just id <- ids ]
3865 (reg,e_code) <- getSomeReg expr
3866 tmp <- getNewRegNat I32
3867 lbl <- getNewLabelNat
3869 jumpTable = map jumpTableEntry ids
3871 code = e_code `appOL` toOL [
3872 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3873 SLW tmp reg (RIImm (ImmInt 2)),
3874 ADDIS tmp tmp (HA (ImmCLbl lbl)),
3875 LD I32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
3877 BCTR [ id | Just id <- ids ]
3881 genSwitch expr ids = panic "ToDo: genSwitch"
3884 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordRep)
3885 jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
3886 where blockLabel = mkAsmTempLabel id
3888 -- -----------------------------------------------------------------------------
3890 -- -----------------------------------------------------------------------------
3893 -- -----------------------------------------------------------------------------
3894 -- 'condIntReg' and 'condFltReg': condition codes into registers
3896 -- Turn those condition codes into integers now (when they appear on
3897 -- the right hand side of an assignment).
3899 -- (If applicable) Do not fill the delay slots here; you will confuse the
3900 -- register allocator.
3902 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
3904 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3906 #if alpha_TARGET_ARCH
3907 condIntReg = panic "MachCode.condIntReg (not on Alpha)"
3908 condFltReg = panic "MachCode.condFltReg (not on Alpha)"
3909 #endif /* alpha_TARGET_ARCH */
3911 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3913 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3915 condIntReg cond x y = do
3916 CondCode _ cond cond_code <- condIntCode cond x y
3917 tmp <- getNewRegNat I8
3919 code dst = cond_code `appOL` toOL [
3920 SETCC cond (OpReg tmp),
3921 MOVZxL I8 (OpReg tmp) (OpReg dst)
3924 return (Any I32 code)
3928 #if i386_TARGET_ARCH
3930 condFltReg cond x y = do
3931 CondCode _ cond cond_code <- condFltCode cond x y
3932 tmp <- getNewRegNat I8
3934 code dst = cond_code `appOL` toOL [
3935 SETCC cond (OpReg tmp),
3936 MOVZxL I8 (OpReg tmp) (OpReg dst)
3939 return (Any I32 code)
3943 #if x86_64_TARGET_ARCH
3945 condFltReg cond x y = do
3946 CondCode _ cond cond_code <- condFltCode cond x y
3947 tmp1 <- getNewRegNat wordRep
3948 tmp2 <- getNewRegNat wordRep
3950 -- We have to worry about unordered operands (eg. comparisons
3951 -- against NaN). If the operands are unordered, the comparison
3952 -- sets the parity flag, carry flag and zero flag.
3953 -- All comparisons are supposed to return false for unordered
3954 -- operands except for !=, which returns true.
3956 -- Optimisation: we don't have to test the parity flag if we
3957 -- know the test has already excluded the unordered case: eg >
3958 -- and >= test for a zero carry flag, which can only occur for
3959 -- ordered operands.
3961 -- ToDo: by reversing comparisons we could avoid testing the
3962 -- parity flag in more cases.
3967 NE -> or_unordered dst
3968 GU -> plain_test dst
3969 GEU -> plain_test dst
3970 _ -> and_ordered dst)
3972 plain_test dst = toOL [
3973 SETCC cond (OpReg tmp1),
3974 MOVZxL I8 (OpReg tmp1) (OpReg dst)
3976 or_unordered dst = toOL [
3977 SETCC cond (OpReg tmp1),
3978 SETCC PARITY (OpReg tmp2),
3979 OR I8 (OpReg tmp1) (OpReg tmp2),
3980 MOVZxL I8 (OpReg tmp2) (OpReg dst)
3982 and_ordered dst = toOL [
3983 SETCC cond (OpReg tmp1),
3984 SETCC NOTPARITY (OpReg tmp2),
3985 AND I8 (OpReg tmp1) (OpReg tmp2),
3986 MOVZxL I8 (OpReg tmp2) (OpReg dst)
3989 return (Any I32 code)
3993 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3995 #if sparc_TARGET_ARCH
3997 condIntReg EQQ x (StInt 0)
3998 = getRegister x `thenNat` \ register ->
3999 getNewRegNat IntRep `thenNat` \ tmp ->
4001 code = registerCode register tmp
4002 src = registerName register tmp
4003 code__2 dst = code `appOL` toOL [
4004 SUB False True g0 (RIReg src) g0,
4005 SUB True False g0 (RIImm (ImmInt (-1))) dst]
4007 return (Any IntRep code__2)
4010 = getRegister x `thenNat` \ register1 ->
4011 getRegister y `thenNat` \ register2 ->
4012 getNewRegNat IntRep `thenNat` \ tmp1 ->
4013 getNewRegNat IntRep `thenNat` \ tmp2 ->
4015 code1 = registerCode register1 tmp1
4016 src1 = registerName register1 tmp1
4017 code2 = registerCode register2 tmp2
4018 src2 = registerName register2 tmp2
4019 code__2 dst = code1 `appOL` code2 `appOL` toOL [
4020 XOR False src1 (RIReg src2) dst,
4021 SUB False True g0 (RIReg dst) g0,
4022 SUB True False g0 (RIImm (ImmInt (-1))) dst]
4024 return (Any IntRep code__2)
4026 condIntReg NE x (StInt 0)
4027 = getRegister x `thenNat` \ register ->
4028 getNewRegNat IntRep `thenNat` \ tmp ->
4030 code = registerCode register tmp
4031 src = registerName register tmp
4032 code__2 dst = code `appOL` toOL [
4033 SUB False True g0 (RIReg src) g0,
4034 ADD True False g0 (RIImm (ImmInt 0)) dst]
4036 return (Any IntRep code__2)
4039 = getRegister x `thenNat` \ register1 ->
4040 getRegister y `thenNat` \ register2 ->
4041 getNewRegNat IntRep `thenNat` \ tmp1 ->
4042 getNewRegNat IntRep `thenNat` \ tmp2 ->
4044 code1 = registerCode register1 tmp1
4045 src1 = registerName register1 tmp1
4046 code2 = registerCode register2 tmp2
4047 src2 = registerName register2 tmp2
4048 code__2 dst = code1 `appOL` code2 `appOL` toOL [
4049 XOR False src1 (RIReg src2) dst,
4050 SUB False True g0 (RIReg dst) g0,
4051 ADD True False g0 (RIImm (ImmInt 0)) dst]
4053 return (Any IntRep code__2)
4056 = getBlockIdNat `thenNat` \ lbl1 ->
4057 getBlockIdNat `thenNat` \ lbl2 ->
4058 condIntCode cond x y `thenNat` \ condition ->
4060 code = condCode condition
4061 cond = condName condition
4062 code__2 dst = code `appOL` toOL [
4063 BI cond False (ImmCLbl lbl1), NOP,
4064 OR False g0 (RIImm (ImmInt 0)) dst,
4065 BI ALWAYS False (ImmCLbl lbl2), NOP,
4067 OR False g0 (RIImm (ImmInt 1)) dst,
4070 return (Any IntRep code__2)
4073 = getBlockIdNat `thenNat` \ lbl1 ->
4074 getBlockIdNat `thenNat` \ lbl2 ->
4075 condFltCode cond x y `thenNat` \ condition ->
4077 code = condCode condition
4078 cond = condName condition
4079 code__2 dst = code `appOL` toOL [
4081 BF cond False (ImmCLbl lbl1), NOP,
4082 OR False g0 (RIImm (ImmInt 0)) dst,
4083 BI ALWAYS False (ImmCLbl lbl2), NOP,
4085 OR False g0 (RIImm (ImmInt 1)) dst,
4088 return (Any IntRep code__2)
4090 #endif /* sparc_TARGET_ARCH */
4092 #if powerpc_TARGET_ARCH
4093 condReg getCond = do
4094 lbl1 <- getBlockIdNat
4095 lbl2 <- getBlockIdNat
4096 CondCode _ cond cond_code <- getCond
4098 {- code dst = cond_code `appOL` toOL [
4107 code dst = cond_code
4111 RLWINM dst dst (bit + 1) 31 31
4114 negate_code | do_negate = unitOL (CRNOR bit bit bit)
4117 (bit, do_negate) = case cond of
4131 return (Any I32 code)
4133 condIntReg cond x y = condReg (condIntCode cond x y)
4134 condFltReg cond x y = condReg (condFltCode cond x y)
4135 #endif /* powerpc_TARGET_ARCH */
4138 -- -----------------------------------------------------------------------------
4139 -- 'trivial*Code': deal with trivial instructions
4141 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
4142 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
4143 -- Only look for constants on the right hand side, because that's
4144 -- where the generic optimizer will have put them.
4146 -- Similarly, for unary instructions, we don't have to worry about
4147 -- matching an StInt as the argument, because genericOpt will already
4148 -- have handled the constant-folding.
4152 -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
4153 ,IF_ARCH_i386 ((Operand -> Operand -> Instr)
4154 -> Maybe (Operand -> Operand -> Instr)
4155 ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr)
4156 -> Maybe (Operand -> Operand -> Instr)
4157 ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
4158 ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
4160 -> CmmExpr -> CmmExpr -- the two arguments
4163 #ifndef powerpc_TARGET_ARCH
4166 -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
4167 ,IF_ARCH_sparc((Size -> Reg -> Reg -> Reg -> Instr)
4168 ,IF_ARCH_i386 ((MachRep -> Reg -> Reg -> Reg -> Instr)
4169 ,IF_ARCH_x86_64 ((MachRep -> Operand -> Operand -> Instr)
4171 -> CmmExpr -> CmmExpr -- the two arguments
4177 -> IF_ARCH_alpha((RI -> Reg -> Instr)
4178 ,IF_ARCH_i386 ((Operand -> Instr)
4179 ,IF_ARCH_x86_64 ((Operand -> Instr)
4180 ,IF_ARCH_sparc((RI -> Reg -> Instr)
4181 ,IF_ARCH_powerpc((Reg -> Reg -> Instr)
4183 -> CmmExpr -- the one argument
4186 #ifndef powerpc_TARGET_ARCH
4189 -> IF_ARCH_alpha((Reg -> Reg -> Instr)
4190 ,IF_ARCH_i386 ((Reg -> Reg -> Instr)
4191 ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr)
4192 ,IF_ARCH_sparc((Reg -> Reg -> Instr)
4194 -> CmmExpr -- the one argument
4198 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4200 #if alpha_TARGET_ARCH
4202 trivialCode instr x (StInt y)
4204 = getRegister x `thenNat` \ register ->
4205 getNewRegNat IntRep `thenNat` \ tmp ->
4207 code = registerCode register tmp
4208 src1 = registerName register tmp
4209 src2 = ImmInt (fromInteger y)
4210 code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst)
4212 return (Any IntRep code__2)
4214 trivialCode instr x y
4215 = getRegister x `thenNat` \ register1 ->
4216 getRegister y `thenNat` \ register2 ->
4217 getNewRegNat IntRep `thenNat` \ tmp1 ->
4218 getNewRegNat IntRep `thenNat` \ tmp2 ->
4220 code1 = registerCode register1 tmp1 []
4221 src1 = registerName register1 tmp1
4222 code2 = registerCode register2 tmp2 []
4223 src2 = registerName register2 tmp2
4224 code__2 dst = asmSeqThen [code1, code2] .
4225 mkSeqInstr (instr src1 (RIReg src2) dst)
4227 return (Any IntRep code__2)
4230 trivialUCode instr x
4231 = getRegister x `thenNat` \ register ->
4232 getNewRegNat IntRep `thenNat` \ tmp ->
4234 code = registerCode register tmp
4235 src = registerName register tmp
4236 code__2 dst = code . mkSeqInstr (instr (RIReg src) dst)
4238 return (Any IntRep code__2)
4241 trivialFCode _ instr x y
4242 = getRegister x `thenNat` \ register1 ->
4243 getRegister y `thenNat` \ register2 ->
4244 getNewRegNat F64 `thenNat` \ tmp1 ->
4245 getNewRegNat F64 `thenNat` \ tmp2 ->
4247 code1 = registerCode register1 tmp1
4248 src1 = registerName register1 tmp1
4250 code2 = registerCode register2 tmp2
4251 src2 = registerName register2 tmp2
4253 code__2 dst = asmSeqThen [code1 [], code2 []] .
4254 mkSeqInstr (instr src1 src2 dst)
4256 return (Any F64 code__2)
4258 trivialUFCode _ instr x
4259 = getRegister x `thenNat` \ register ->
4260 getNewRegNat F64 `thenNat` \ tmp ->
4262 code = registerCode register tmp
4263 src = registerName register tmp
4264 code__2 dst = code . mkSeqInstr (instr src dst)
4266 return (Any F64 code__2)
4268 #endif /* alpha_TARGET_ARCH */
4270 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4272 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4275 The Rules of the Game are:
4277 * You cannot assume anything about the destination register dst;
4278 it may be anything, including a fixed reg.
4280 * You may compute an operand into a fixed reg, but you may not
4281 subsequently change the contents of that fixed reg. If you
4282 want to do so, first copy the value either to a temporary
4283 or into dst. You are free to modify dst even if it happens
4284 to be a fixed reg -- that's not your problem.
4286 * You cannot assume that a fixed reg will stay live over an
4287 arbitrary computation. The same applies to the dst reg.
4289 * Temporary regs obtained from getNewRegNat are distinct from
4290 each other and from all other regs, and stay live over
4291 arbitrary computations.
4293 --------------------
4295 SDM's version of The Rules:
4297 * If getRegister returns Any, that means it can generate correct
4298 code which places the result in any register, period. Even if that
4299 register happens to be read during the computation.
4301 Corollary #1: this means that if you are generating code for an
4302 operation with two arbitrary operands, you cannot assign the result
4303 of the first operand into the destination register before computing
4304 the second operand. The second operand might require the old value
4305 of the destination register.
4307 Corollary #2: A function might be able to generate more efficient
4308 code if it knows the destination register is a new temporary (and
4309 therefore not read by any of the sub-computations).
4311 * If getRegister returns Any, then the code it generates may modify only:
4312 (a) fresh temporaries
4313 (b) the destination register
4314 (c) known registers (eg. %ecx is used by shifts)
4315 In particular, it may *not* modify global registers, unless the global
4316 register happens to be the destination register.
4319 trivialCode rep instr (Just revinstr) (CmmLit lit_a) b
4320 | not (is64BitLit lit_a) = do
4321 b_code <- getAnyReg b
4324 = b_code dst `snocOL`
4325 revinstr (OpImm (litToImm lit_a)) (OpReg dst)
4327 return (Any rep code)
4329 trivialCode rep instr maybe_revinstr a b = genTrivialCode rep instr a b
4331 -- This is re-used for floating pt instructions too.
4332 genTrivialCode rep instr a b = do
4333 (b_op, b_code) <- getNonClobberedOperand b
4334 a_code <- getAnyReg a
4335 tmp <- getNewRegNat rep
4337 -- We want the value of b to stay alive across the computation of a.
4338 -- But, we want to calculate a straight into the destination register,
4339 -- because the instruction only has two operands (dst := dst `op` src).
4340 -- The troublesome case is when the result of b is in the same register
4341 -- as the destination reg. In this case, we have to save b in a
4342 -- new temporary across the computation of a.
4344 | dst `regClashesWithOp` b_op =
4346 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
4348 instr (OpReg tmp) (OpReg dst)
4352 instr b_op (OpReg dst)
4354 return (Any rep code)
4356 reg `regClashesWithOp` OpReg reg2 = reg == reg2
4357 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
4358 reg `regClashesWithOp` _ = False
4362 trivialUCode rep instr x = do
4363 x_code <- getAnyReg x
4369 return (Any rep code)
4373 #if i386_TARGET_ARCH
4375 trivialFCode pk instr x y = do
4376 (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
4377 (y_reg, y_code) <- getSomeReg y
4382 instr pk x_reg y_reg dst
4384 return (Any pk code)
4388 #if x86_64_TARGET_ARCH
4390 trivialFCode pk instr x y = genTrivialCode pk (instr pk) x y
4396 trivialUFCode rep instr x = do
4397 (x_reg, x_code) <- getSomeReg x
4403 return (Any rep code)
4405 #endif /* i386_TARGET_ARCH */
4407 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4409 #if sparc_TARGET_ARCH
4411 trivialCode instr x (StInt y)
4413 = getRegister x `thenNat` \ register ->
4414 getNewRegNat IntRep `thenNat` \ tmp ->
4416 code = registerCode register tmp
4417 src1 = registerName register tmp
4418 src2 = ImmInt (fromInteger y)
4419 code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
4421 return (Any IntRep code__2)
4423 trivialCode instr x y
4424 = getRegister x `thenNat` \ register1 ->
4425 getRegister y `thenNat` \ register2 ->
4426 getNewRegNat IntRep `thenNat` \ tmp1 ->
4427 getNewRegNat IntRep `thenNat` \ tmp2 ->
4429 code1 = registerCode register1 tmp1
4430 src1 = registerName register1 tmp1
4431 code2 = registerCode register2 tmp2
4432 src2 = registerName register2 tmp2
4433 code__2 dst = code1 `appOL` code2 `snocOL`
4434 instr src1 (RIReg src2) dst
4436 return (Any IntRep code__2)
4439 trivialFCode pk instr x y
4440 = getRegister x `thenNat` \ register1 ->
4441 getRegister y `thenNat` \ register2 ->
4442 getNewRegNat (registerRep register1)
4444 getNewRegNat (registerRep register2)
4446 getNewRegNat F64 `thenNat` \ tmp ->
4448 promote x = FxTOy F DF x tmp
4450 pk1 = registerRep register1
4451 code1 = registerCode register1 tmp1
4452 src1 = registerName register1 tmp1
4454 pk2 = registerRep register2
4455 code2 = registerCode register2 tmp2
4456 src2 = registerName register2 tmp2
4460 code1 `appOL` code2 `snocOL`
4461 instr (primRepToSize pk) src1 src2 dst
4462 else if pk1 == F32 then
4463 code1 `snocOL` promote src1 `appOL` code2 `snocOL`
4464 instr DF tmp src2 dst
4466 code1 `appOL` code2 `snocOL` promote src2 `snocOL`
4467 instr DF src1 tmp dst
4469 return (Any (if pk1 == pk2 then pk1 else F64) code__2)
4472 trivialUCode instr x
4473 = getRegister x `thenNat` \ register ->
4474 getNewRegNat IntRep `thenNat` \ tmp ->
4476 code = registerCode register tmp
4477 src = registerName register tmp
4478 code__2 dst = code `snocOL` instr (RIReg src) dst
4480 return (Any IntRep code__2)
4483 trivialUFCode pk instr x
4484 = getRegister x `thenNat` \ register ->
4485 getNewRegNat pk `thenNat` \ tmp ->
4487 code = registerCode register tmp
4488 src = registerName register tmp
4489 code__2 dst = code `snocOL` instr src dst
4491 return (Any pk code__2)
4493 #endif /* sparc_TARGET_ARCH */
4495 #if powerpc_TARGET_ARCH
4498 Wolfgang's PowerPC version of The Rules:
4500 A slightly modified version of The Rules to take advantage of the fact
4501 that PowerPC instructions work on all registers and don't implicitly
4502 clobber any fixed registers.
4504 * The only expression for which getRegister returns Fixed is (CmmReg reg).
4506 * If getRegister returns Any, then the code it generates may modify only:
4507 (a) fresh temporaries
4508 (b) the destination register
4509 It may *not* modify global registers, unless the global
4510 register happens to be the destination register.
4511 It may not clobber any other registers. In fact, only ccalls clobber any
4513 Also, it may not modify the counter register (used by genCCall).
4515 Corollary: If a getRegister for a subexpression returns Fixed, you need
4516 not move it to a fresh temporary before evaluating the next subexpression.
4517 The Fixed register won't be modified.
4518 Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
4520 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
4521 the value of the destination register.
4524 trivialCode rep signed instr x (CmmLit (CmmInt y _))
4525 | Just imm <- makeImmediate rep signed y
4527 (src1, code1) <- getSomeReg x
4528 let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
4529 return (Any rep code)
4531 trivialCode rep signed instr x y = do
4532 (src1, code1) <- getSomeReg x
4533 (src2, code2) <- getSomeReg y
4534 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
4535 return (Any rep code)
4537 trivialCodeNoImm :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4538 -> CmmExpr -> CmmExpr -> NatM Register
4539 trivialCodeNoImm rep instr x y = do
4540 (src1, code1) <- getSomeReg x
4541 (src2, code2) <- getSomeReg y
4542 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
4543 return (Any rep code)
4545 trivialUCode rep instr x = do
4546 (src, code) <- getSomeReg x
4547 let code' dst = code `snocOL` instr dst src
4548 return (Any rep code')
4550 -- There is no "remainder" instruction on the PPC, so we have to do
4552 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
4554 remainderCode :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4555 -> CmmExpr -> CmmExpr -> NatM Register
4556 remainderCode rep div x y = do
4557 (src1, code1) <- getSomeReg x
4558 (src2, code2) <- getSomeReg y
4559 let code dst = code1 `appOL` code2 `appOL` toOL [
4561 MULLW dst dst (RIReg src2),
4564 return (Any rep code)
4566 #endif /* powerpc_TARGET_ARCH */
4569 -- -----------------------------------------------------------------------------
4570 -- Coercing to/from integer/floating-point...
4572 -- @coerce(Int2FP|FP2Int)@ are more complicated integer/float
4573 -- conversions. We have to store temporaries in memory to move
4574 -- between the integer and the floating point register sets.
4576 -- @coerceDbl2Flt@ and @coerceFlt2Dbl@ are done this way because we
4577 -- pretend, on sparc at least, that double and float regs are seperate
4578 -- kinds, so the value has to be computed into one kind before being
4579 -- explicitly "converted" to live in the other kind.
4581 coerceInt2FP :: MachRep -> MachRep -> CmmExpr -> NatM Register
4582 coerceFP2Int :: MachRep -> MachRep -> CmmExpr -> NatM Register
4584 #if sparc_TARGET_ARCH
4585 coerceDbl2Flt :: CmmExpr -> NatM Register
4586 coerceFlt2Dbl :: CmmExpr -> NatM Register
4589 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4591 #if alpha_TARGET_ARCH
4594 = getRegister x `thenNat` \ register ->
4595 getNewRegNat IntRep `thenNat` \ reg ->
4597 code = registerCode register reg
4598 src = registerName register reg
4600 code__2 dst = code . mkSeqInstrs [
4602 LD TF dst (spRel 0),
4605 return (Any F64 code__2)
4609 = getRegister x `thenNat` \ register ->
4610 getNewRegNat F64 `thenNat` \ tmp ->
4612 code = registerCode register tmp
4613 src = registerName register tmp
4615 code__2 dst = code . mkSeqInstrs [
4617 ST TF tmp (spRel 0),
4620 return (Any IntRep code__2)
4622 #endif /* alpha_TARGET_ARCH */
4624 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4626 #if i386_TARGET_ARCH
4628 coerceInt2FP from to x = do
4629 (x_reg, x_code) <- getSomeReg x
4631 opc = case to of F32 -> GITOF; F64 -> GITOD
4632 code dst = x_code `snocOL` opc x_reg dst
4633 -- ToDo: works for non-I32 reps?
4635 return (Any to code)
4639 coerceFP2Int from to x = do
4640 (x_reg, x_code) <- getSomeReg x
4642 opc = case from of F32 -> GFTOI; F64 -> GDTOI
4643 code dst = x_code `snocOL` opc x_reg dst
4644 -- ToDo: works for non-I32 reps?
4646 return (Any to code)
4648 #endif /* i386_TARGET_ARCH */
4650 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4652 #if x86_64_TARGET_ARCH
4654 coerceFP2Int from to x = do
4655 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
4657 opc = case from of F32 -> CVTSS2SI; F64 -> CVTSD2SI
4658 code dst = x_code `snocOL` opc x_op dst
4660 return (Any to code) -- works even if the destination rep is <I32
4662 coerceInt2FP from to x = do
4663 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
4665 opc = case to of F32 -> CVTSI2SS; F64 -> CVTSI2SD
4666 code dst = x_code `snocOL` opc x_op dst
4668 return (Any to code) -- works even if the destination rep is <I32
4670 coerceFP2FP :: MachRep -> CmmExpr -> NatM Register
4671 coerceFP2FP to x = do
4672 (x_reg, x_code) <- getSomeReg x
4674 opc = case to of F32 -> CVTSD2SS; F64 -> CVTSS2SD
4675 code dst = x_code `snocOL` opc x_reg dst
4677 return (Any to code)
4681 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4683 #if sparc_TARGET_ARCH
4686 = getRegister x `thenNat` \ register ->
4687 getNewRegNat IntRep `thenNat` \ reg ->
4689 code = registerCode register reg
4690 src = registerName register reg
4692 code__2 dst = code `appOL` toOL [
4693 ST W src (spRel (-2)),
4694 LD W (spRel (-2)) dst,
4695 FxTOy W (primRepToSize pk) dst dst]
4697 return (Any pk code__2)
4700 coerceFP2Int fprep x
4701 = ASSERT(fprep == F64 || fprep == F32)
4702 getRegister x `thenNat` \ register ->
4703 getNewRegNat fprep `thenNat` \ reg ->
4704 getNewRegNat F32 `thenNat` \ tmp ->
4706 code = registerCode register reg
4707 src = registerName register reg
4708 code__2 dst = code `appOL` toOL [
4709 FxTOy (primRepToSize fprep) W src tmp,
4710 ST W tmp (spRel (-2)),
4711 LD W (spRel (-2)) dst]
4713 return (Any IntRep code__2)
4717 = getRegister x `thenNat` \ register ->
4718 getNewRegNat F64 `thenNat` \ tmp ->
4719 let code = registerCode register tmp
4720 src = registerName register tmp
4723 (\dst -> code `snocOL` FxTOy DF F src dst))
4727 = getRegister x `thenNat` \ register ->
4728 getNewRegNat F32 `thenNat` \ tmp ->
4729 let code = registerCode register tmp
4730 src = registerName register tmp
4733 (\dst -> code `snocOL` FxTOy F DF src dst))
4735 #endif /* sparc_TARGET_ARCH */
4737 #if powerpc_TARGET_ARCH
4738 coerceInt2FP fromRep toRep x = do
4739 (src, code) <- getSomeReg x
4740 lbl <- getNewLabelNat
4741 itmp <- getNewRegNat I32
4742 ftmp <- getNewRegNat F64
4743 dynRef <- cmmMakeDynamicReference addImportNat False lbl
4744 Amode addr addr_code <- getAmode dynRef
4746 code' dst = code `appOL` maybe_exts `appOL` toOL [
4749 CmmStaticLit (CmmInt 0x43300000 I32),
4750 CmmStaticLit (CmmInt 0x80000000 I32)],
4751 XORIS itmp src (ImmInt 0x8000),
4752 ST I32 itmp (spRel 3),
4753 LIS itmp (ImmInt 0x4330),
4754 ST I32 itmp (spRel 2),
4755 LD F64 ftmp (spRel 2)
4756 ] `appOL` addr_code `appOL` toOL [
4758 FSUB F64 dst ftmp dst
4759 ] `appOL` maybe_frsp dst
4761 maybe_exts = case fromRep of
4762 I8 -> unitOL $ EXTS I8 src src
4763 I16 -> unitOL $ EXTS I16 src src
4765 maybe_frsp dst = case toRep of
4766 F32 -> unitOL $ FRSP dst dst
4768 return (Any toRep code')
4770 coerceFP2Int fromRep toRep x = do
4771 -- the reps don't really matter: F*->F64 and I32->I* are no-ops
4772 (src, code) <- getSomeReg x
4773 tmp <- getNewRegNat F64
4775 code' dst = code `appOL` toOL [
4776 -- convert to int in FP reg
4778 -- store value (64bit) from FP to stack
4779 ST F64 tmp (spRel 2),
4780 -- read low word of value (high word is undefined)
4781 LD I32 dst (spRel 3)]
4782 return (Any toRep code')
4783 #endif /* powerpc_TARGET_ARCH */
4786 -- -----------------------------------------------------------------------------
4787 -- eXTRA_STK_ARGS_HERE
4789 -- We (allegedly) put the first six C-call arguments in registers;
4790 -- where do we start putting the rest of them?
4792 -- Moved from MachInstrs (SDM):
4794 #if alpha_TARGET_ARCH || sparc_TARGET_ARCH
4795 eXTRA_STK_ARGS_HERE :: Int
4797 = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???))