1 -----------------------------------------------------------------------------
3 -- Generating machine code (instruction selection)
5 -- (c) The University of Glasgow 1996-2004
7 -----------------------------------------------------------------------------
9 -- This is a big module, but, if you pay attention to
10 -- (a) the sectioning, (b) the type signatures, and
11 -- (c) the #if blah_TARGET_ARCH} things, the
12 -- structure should not be too overwhelming.
14 module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where
16 #include "HsVersions.h"
17 #include "nativeGen/NCG.h"
24 import PositionIndependentCode ( cmmMakeDynamicReference, initializePicBase )
26 -- Our intermediate code:
27 import PprCmm ( pprExpr )
33 import StaticFlags ( opt_PIC )
34 import ForeignCall ( CCallConv(..) )
39 import FastTypes ( isFastTrue )
40 import Constants ( wORD_SIZE )
43 import Outputable ( assertPanic )
44 import TRACE ( trace )
47 import Control.Monad ( mapAndUnzipM )
48 import Maybe ( fromJust )
52 -- -----------------------------------------------------------------------------
53 -- Top-level of the instruction selector
55 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
56 -- They are really trees of insns to facilitate fast appending, where a
57 -- left-to-right traversal (pre-order?) yields the insns in the correct
60 type InstrBlock = OrdList Instr
62 cmmTopCodeGen :: CmmTop -> NatM [NatCmmTop]
63 cmmTopCodeGen (CmmProc info lab params blocks) = do
64 (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
65 picBaseMb <- getPicBaseMaybeNat
66 let proc = CmmProc info lab params (concat nat_blocks)
67 tops = proc : concat statics
69 Just picBase -> initializePicBase picBase tops
70 Nothing -> return tops
72 cmmTopCodeGen (CmmData sec dat) = do
73 return [CmmData sec dat] -- no translation, we just use CmmStatic
75 basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop])
76 basicBlockCodeGen (BasicBlock id stmts) = do
77 instrs <- stmtsToInstrs stmts
78 -- code generation may introduce new basic block boundaries, which
79 -- are indicated by the NEWBLOCK instruction. We must split up the
80 -- instruction stream into basic blocks again. Also, we extract
83 (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
85 mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
86 = ([], BasicBlock id instrs : blocks, statics)
87 mkBlocks (LDATA sec dat) (instrs,blocks,statics)
88 = (instrs, blocks, CmmData sec dat:statics)
89 mkBlocks instr (instrs,blocks,statics)
90 = (instr:instrs, blocks, statics)
92 return (BasicBlock id top : other_blocks, statics)
94 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
96 = do instrss <- mapM stmtToInstrs stmts
97 return (concatOL instrss)
99 stmtToInstrs :: CmmStmt -> NatM InstrBlock
100 stmtToInstrs stmt = case stmt of
101 CmmNop -> return nilOL
102 CmmComment s -> return (unitOL (COMMENT s))
105 | isFloatingRep kind -> assignReg_FltCode kind reg src
106 #if WORD_SIZE_IN_BITS==32
107 | kind == I64 -> assignReg_I64Code reg src
109 | otherwise -> assignReg_IntCode kind reg src
110 where kind = cmmRegRep reg
113 | isFloatingRep kind -> assignMem_FltCode kind addr src
114 #if WORD_SIZE_IN_BITS==32
115 | kind == I64 -> assignMem_I64Code addr src
117 | otherwise -> assignMem_IntCode kind addr src
118 where kind = cmmExprRep src
120 CmmCall target result_regs args vols
121 -> genCCall target result_regs args vols
123 CmmBranch id -> genBranch id
124 CmmCondBranch arg id -> genCondJump id arg
125 CmmSwitch arg ids -> genSwitch arg ids
126 CmmJump arg params -> genJump arg
128 -- -----------------------------------------------------------------------------
129 -- General things for putting together code sequences
131 -- Expand CmmRegOff. ToDo: should we do it this way around, or convert
132 -- CmmExprs into CmmRegOff?
133 mangleIndexTree :: CmmExpr -> CmmExpr
134 mangleIndexTree (CmmRegOff reg off)
135 = CmmMachOp (MO_Add rep) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) rep)]
136 where rep = cmmRegRep reg
138 -- -----------------------------------------------------------------------------
139 -- Code gen for 64-bit arithmetic on 32-bit platforms
142 Simple support for generating 64-bit code (ie, 64 bit values and 64
143 bit assignments) on 32-bit platforms. Unlike the main code generator
144 we merely shoot for generating working code as simply as possible, and
145 pay little attention to code quality. Specifically, there is no
146 attempt to deal cleverly with the fixed-vs-floating register
147 distinction; all values are generated into (pairs of) floating
148 registers, even if this would mean some redundant reg-reg moves as a
149 result. Only one of the VRegUniques is returned, since it will be
150 of the VRegUniqueLo form, and the upper-half VReg can be determined
151 by applying getHiVRegFromLo to it.
154 data ChildCode64 -- a.k.a "Register64"
157 Reg -- the lower 32-bit temporary which contains the
158 -- result; use getHiVRegFromLo to find the other
159 -- VRegUnique. Rules of this simplified insn
160 -- selection game are therefore that the returned
161 -- Reg may be modified
163 #if WORD_SIZE_IN_BITS==32
164 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
165 assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
168 #ifndef x86_64_TARGET_ARCH
169 iselExpr64 :: CmmExpr -> NatM ChildCode64
172 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
176 assignMem_I64Code addrTree valueTree = do
177 Amode addr addr_code <- getAmode addrTree
178 ChildCode64 vcode rlo <- iselExpr64 valueTree
180 rhi = getHiVRegFromLo rlo
182 -- Little-endian store
183 mov_lo = MOV I32 (OpReg rlo) (OpAddr addr)
184 mov_hi = MOV I32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
186 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
189 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
190 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
192 r_dst_lo = mkVReg u_dst I32
193 r_dst_hi = getHiVRegFromLo r_dst_lo
194 r_src_hi = getHiVRegFromLo r_src_lo
195 mov_lo = MOV I32 (OpReg r_src_lo) (OpReg r_dst_lo)
196 mov_hi = MOV I32 (OpReg r_src_hi) (OpReg r_dst_hi)
199 vcode `snocOL` mov_lo `snocOL` mov_hi
202 assignReg_I64Code lvalue valueTree
203 = panic "assignReg_I64Code(i386): invalid lvalue"
207 iselExpr64 (CmmLit (CmmInt i _)) = do
208 (rlo,rhi) <- getNewRegPairNat I32
210 r = fromIntegral (fromIntegral i :: Word32)
211 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
213 MOV I32 (OpImm (ImmInteger r)) (OpReg rlo),
214 MOV I32 (OpImm (ImmInteger q)) (OpReg rhi)
217 return (ChildCode64 code rlo)
219 iselExpr64 (CmmLoad addrTree I64) = do
220 Amode addr addr_code <- getAmode addrTree
221 (rlo,rhi) <- getNewRegPairNat I32
223 mov_lo = MOV I32 (OpAddr addr) (OpReg rlo)
224 mov_hi = MOV I32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
227 ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
231 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64)))
232 = return (ChildCode64 nilOL (mkVReg vu I32))
234 -- we handle addition, but rather badly
235 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
236 ChildCode64 code1 r1lo <- iselExpr64 e1
237 (rlo,rhi) <- getNewRegPairNat I32
239 r = fromIntegral (fromIntegral i :: Word32)
240 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
241 r1hi = getHiVRegFromLo r1lo
243 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
244 ADD I32 (OpImm (ImmInteger r)) (OpReg rlo),
245 MOV I32 (OpReg r1hi) (OpReg rhi),
246 ADC I32 (OpImm (ImmInteger q)) (OpReg rhi) ]
248 return (ChildCode64 code rlo)
250 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
251 ChildCode64 code1 r1lo <- iselExpr64 e1
252 ChildCode64 code2 r2lo <- iselExpr64 e2
253 (rlo,rhi) <- getNewRegPairNat I32
255 r1hi = getHiVRegFromLo r1lo
256 r2hi = getHiVRegFromLo r2lo
259 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
260 ADD I32 (OpReg r2lo) (OpReg rlo),
261 MOV I32 (OpReg r1hi) (OpReg rhi),
262 ADC I32 (OpReg r2hi) (OpReg rhi) ]
264 return (ChildCode64 code rlo)
267 = pprPanic "iselExpr64(i386)" (ppr expr)
269 #endif /* i386_TARGET_ARCH */
271 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
273 #if sparc_TARGET_ARCH
275 assignMem_I64Code addrTree valueTree = do
276 Amode addr addr_code <- getAmode addrTree
277 ChildCode64 vcode rlo <- iselExpr64 valueTree
278 (src, code) <- getSomeReg addrTree
280 rhi = getHiVRegFromLo rlo
282 mov_hi = ST I32 rhi (AddrRegImm src (ImmInt 0))
283 mov_lo = ST I32 rlo (AddrRegImm src (ImmInt 4))
284 return (vcode `appOL` code `snocOL` mov_hi `snocOL` mov_lo)
286 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
287 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
289 r_dst_lo = mkVReg u_dst pk
290 r_dst_hi = getHiVRegFromLo r_dst_lo
291 r_src_hi = getHiVRegFromLo r_src_lo
292 mov_lo = mkMOV r_src_lo r_dst_lo
293 mov_hi = mkMOV r_src_hi r_dst_hi
294 mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
295 return (vcode `snocOL` mov_hi `snocOL` mov_lo)
296 assignReg_I64Code lvalue valueTree
297 = panic "assignReg_I64Code(sparc): invalid lvalue"
300 -- Don't delete this -- it's very handy for debugging.
302 -- | trace ("iselExpr64: " ++ showSDoc (ppr expr)) False
303 -- = panic "iselExpr64(???)"
305 iselExpr64 (CmmLoad addrTree I64) = do
306 Amode (AddrRegReg r1 r2) addr_code <- getAmode addrTree
307 rlo <- getNewRegNat I32
308 let rhi = getHiVRegFromLo rlo
309 mov_hi = LD I32 (AddrRegImm r1 (ImmInt 0)) rhi
310 mov_lo = LD I32 (AddrRegImm r1 (ImmInt 4)) rlo
312 ChildCode64 (addr_code `snocOL` mov_hi `snocOL` mov_lo)
316 iselExpr64 (CmmReg (CmmLocal (LocalReg uq I64))) = do
317 r_dst_lo <- getNewRegNat I32
318 let r_dst_hi = getHiVRegFromLo r_dst_lo
319 r_src_lo = mkVReg uq I32
320 r_src_hi = getHiVRegFromLo r_src_lo
321 mov_lo = mkMOV r_src_lo r_dst_lo
322 mov_hi = mkMOV r_src_hi r_dst_hi
323 mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
325 ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo
329 = pprPanic "iselExpr64(sparc)" (ppr expr)
331 #endif /* sparc_TARGET_ARCH */
333 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
335 #if powerpc_TARGET_ARCH
337 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
338 getI64Amodes addrTree = do
339 Amode hi_addr addr_code <- getAmode addrTree
340 case addrOffset hi_addr 4 of
341 Just lo_addr -> return (hi_addr, lo_addr, addr_code)
342 Nothing -> do (hi_ptr, code) <- getSomeReg addrTree
343 return (AddrRegImm hi_ptr (ImmInt 0),
344 AddrRegImm hi_ptr (ImmInt 4),
347 assignMem_I64Code addrTree valueTree = do
348 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
349 ChildCode64 vcode rlo <- iselExpr64 valueTree
351 rhi = getHiVRegFromLo rlo
354 mov_hi = ST I32 rhi hi_addr
355 mov_lo = ST I32 rlo lo_addr
357 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
359 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
360 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
362 r_dst_lo = mkVReg u_dst I32
363 r_dst_hi = getHiVRegFromLo r_dst_lo
364 r_src_hi = getHiVRegFromLo r_src_lo
365 mov_lo = MR r_dst_lo r_src_lo
366 mov_hi = MR r_dst_hi r_src_hi
369 vcode `snocOL` mov_lo `snocOL` mov_hi
372 assignReg_I64Code lvalue valueTree
373 = panic "assignReg_I64Code(powerpc): invalid lvalue"
376 -- Don't delete this -- it's very handy for debugging.
378 -- | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
379 -- = panic "iselExpr64(???)"
381 iselExpr64 (CmmLoad addrTree I64) = do
382 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
383 (rlo, rhi) <- getNewRegPairNat I32
384 let mov_hi = LD I32 rhi hi_addr
385 mov_lo = LD I32 rlo lo_addr
386 return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
389 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64)))
390 = return (ChildCode64 nilOL (mkVReg vu I32))
392 iselExpr64 (CmmLit (CmmInt i _)) = do
393 (rlo,rhi) <- getNewRegPairNat I32
395 half0 = fromIntegral (fromIntegral i :: Word16)
396 half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
397 half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
398 half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
401 LIS rlo (ImmInt half1),
402 OR rlo rlo (RIImm $ ImmInt half0),
403 LIS rhi (ImmInt half3),
404 OR rlo rlo (RIImm $ ImmInt half2)
407 return (ChildCode64 code rlo)
409 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
410 ChildCode64 code1 r1lo <- iselExpr64 e1
411 ChildCode64 code2 r2lo <- iselExpr64 e2
412 (rlo,rhi) <- getNewRegPairNat I32
414 r1hi = getHiVRegFromLo r1lo
415 r2hi = getHiVRegFromLo r2lo
418 toOL [ ADDC rlo r1lo r2lo,
421 return (ChildCode64 code rlo)
424 = pprPanic "iselExpr64(powerpc)" (ppr expr)
426 #endif /* powerpc_TARGET_ARCH */
429 -- -----------------------------------------------------------------------------
430 -- The 'Register' type
432 -- 'Register's passed up the tree. If the stix code forces the register
433 -- to live in a pre-decided machine register, it comes out as @Fixed@;
434 -- otherwise, it comes out as @Any@, and the parent can decide which
435 -- register to put it in.
438 = Fixed MachRep Reg InstrBlock
439 | Any MachRep (Reg -> InstrBlock)
441 swizzleRegisterRep :: Register -> MachRep -> Register
442 swizzleRegisterRep (Fixed _ reg code) rep = Fixed rep reg code
443 swizzleRegisterRep (Any _ codefn) rep = Any rep codefn
446 -- -----------------------------------------------------------------------------
447 -- Utils based on getRegister, below
449 -- The dual to getAnyReg: compute an expression into a register, but
450 -- we don't mind which one it is.
451 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
453 r <- getRegister expr
456 tmp <- getNewRegNat rep
457 return (tmp, code tmp)
461 -- -----------------------------------------------------------------------------
462 -- Grab the Reg for a CmmReg
464 getRegisterReg :: CmmReg -> Reg
466 getRegisterReg (CmmLocal (LocalReg u pk))
469 getRegisterReg (CmmGlobal mid)
470 = case get_GlobalReg_reg_or_addr mid of
471 Left (RealReg rrno) -> RealReg rrno
472 _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
473 -- By this stage, the only MagicIds remaining should be the
474 -- ones which map to a real machine register on this
475 -- platform. Hence ...
478 -- -----------------------------------------------------------------------------
479 -- Generate code to get a subtree into a Register
481 -- Don't delete this -- it's very handy for debugging.
483 -- | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False
484 -- = panic "getRegister(???)"
486 getRegister :: CmmExpr -> NatM Register
488 getRegister (CmmReg (CmmGlobal PicBaseReg))
490 reg <- getPicBaseNat wordRep
491 return (Fixed wordRep reg nilOL)
493 getRegister (CmmReg reg)
494 = return (Fixed (cmmRegRep reg) (getRegisterReg reg) nilOL)
496 getRegister tree@(CmmRegOff _ _)
497 = getRegister (mangleIndexTree tree)
499 -- end of machine-"independent" bit; here we go on the rest...
501 #if alpha_TARGET_ARCH
503 getRegister (StDouble d)
504 = getBlockIdNat `thenNat` \ lbl ->
505 getNewRegNat PtrRep `thenNat` \ tmp ->
506 let code dst = mkSeqInstrs [
507 LDATA RoDataSegment lbl [
508 DATA TF [ImmLab (rational d)]
510 LDA tmp (AddrImm (ImmCLbl lbl)),
511 LD TF dst (AddrReg tmp)]
513 return (Any F64 code)
515 getRegister (StPrim primop [x]) -- unary PrimOps
517 IntNegOp -> trivialUCode (NEG Q False) x
519 NotOp -> trivialUCode NOT x
521 FloatNegOp -> trivialUFCode FloatRep (FNEG TF) x
522 DoubleNegOp -> trivialUFCode F64 (FNEG TF) x
524 OrdOp -> coerceIntCode IntRep x
527 Float2IntOp -> coerceFP2Int x
528 Int2FloatOp -> coerceInt2FP pr x
529 Double2IntOp -> coerceFP2Int x
530 Int2DoubleOp -> coerceInt2FP pr x
532 Double2FloatOp -> coerceFltCode x
533 Float2DoubleOp -> coerceFltCode x
535 other_op -> getRegister (StCall fn CCallConv F64 [x])
537 fn = case other_op of
538 FloatExpOp -> FSLIT("exp")
539 FloatLogOp -> FSLIT("log")
540 FloatSqrtOp -> FSLIT("sqrt")
541 FloatSinOp -> FSLIT("sin")
542 FloatCosOp -> FSLIT("cos")
543 FloatTanOp -> FSLIT("tan")
544 FloatAsinOp -> FSLIT("asin")
545 FloatAcosOp -> FSLIT("acos")
546 FloatAtanOp -> FSLIT("atan")
547 FloatSinhOp -> FSLIT("sinh")
548 FloatCoshOp -> FSLIT("cosh")
549 FloatTanhOp -> FSLIT("tanh")
550 DoubleExpOp -> FSLIT("exp")
551 DoubleLogOp -> FSLIT("log")
552 DoubleSqrtOp -> FSLIT("sqrt")
553 DoubleSinOp -> FSLIT("sin")
554 DoubleCosOp -> FSLIT("cos")
555 DoubleTanOp -> FSLIT("tan")
556 DoubleAsinOp -> FSLIT("asin")
557 DoubleAcosOp -> FSLIT("acos")
558 DoubleAtanOp -> FSLIT("atan")
559 DoubleSinhOp -> FSLIT("sinh")
560 DoubleCoshOp -> FSLIT("cosh")
561 DoubleTanhOp -> FSLIT("tanh")
563 pr = panic "MachCode.getRegister: no primrep needed for Alpha"
565 getRegister (StPrim primop [x, y]) -- dyadic PrimOps
567 CharGtOp -> trivialCode (CMP LTT) y x
568 CharGeOp -> trivialCode (CMP LE) y x
569 CharEqOp -> trivialCode (CMP EQQ) x y
570 CharNeOp -> int_NE_code x y
571 CharLtOp -> trivialCode (CMP LTT) x y
572 CharLeOp -> trivialCode (CMP LE) x y
574 IntGtOp -> trivialCode (CMP LTT) y x
575 IntGeOp -> trivialCode (CMP LE) y x
576 IntEqOp -> trivialCode (CMP EQQ) x y
577 IntNeOp -> int_NE_code x y
578 IntLtOp -> trivialCode (CMP LTT) x y
579 IntLeOp -> trivialCode (CMP LE) x y
581 WordGtOp -> trivialCode (CMP ULT) y x
582 WordGeOp -> trivialCode (CMP ULE) x y
583 WordEqOp -> trivialCode (CMP EQQ) x y
584 WordNeOp -> int_NE_code x y
585 WordLtOp -> trivialCode (CMP ULT) x y
586 WordLeOp -> trivialCode (CMP ULE) x y
588 AddrGtOp -> trivialCode (CMP ULT) y x
589 AddrGeOp -> trivialCode (CMP ULE) y x
590 AddrEqOp -> trivialCode (CMP EQQ) x y
591 AddrNeOp -> int_NE_code x y
592 AddrLtOp -> trivialCode (CMP ULT) x y
593 AddrLeOp -> trivialCode (CMP ULE) x y
595 FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
596 FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
597 FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
598 FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
599 FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
600 FloatLeOp -> cmpF_code (FCMP TF LE) NE x y
602 DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
603 DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
604 DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
605 DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
606 DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
607 DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y
609 IntAddOp -> trivialCode (ADD Q False) x y
610 IntSubOp -> trivialCode (SUB Q False) x y
611 IntMulOp -> trivialCode (MUL Q False) x y
612 IntQuotOp -> trivialCode (DIV Q False) x y
613 IntRemOp -> trivialCode (REM Q False) x y
615 WordAddOp -> trivialCode (ADD Q False) x y
616 WordSubOp -> trivialCode (SUB Q False) x y
617 WordMulOp -> trivialCode (MUL Q False) x y
618 WordQuotOp -> trivialCode (DIV Q True) x y
619 WordRemOp -> trivialCode (REM Q True) x y
621 FloatAddOp -> trivialFCode FloatRep (FADD TF) x y
622 FloatSubOp -> trivialFCode FloatRep (FSUB TF) x y
623 FloatMulOp -> trivialFCode FloatRep (FMUL TF) x y
624 FloatDivOp -> trivialFCode FloatRep (FDIV TF) x y
626 DoubleAddOp -> trivialFCode F64 (FADD TF) x y
627 DoubleSubOp -> trivialFCode F64 (FSUB TF) x y
628 DoubleMulOp -> trivialFCode F64 (FMUL TF) x y
629 DoubleDivOp -> trivialFCode F64 (FDIV TF) x y
631 AddrAddOp -> trivialCode (ADD Q False) x y
632 AddrSubOp -> trivialCode (SUB Q False) x y
633 AddrRemOp -> trivialCode (REM Q True) x y
635 AndOp -> trivialCode AND x y
636 OrOp -> trivialCode OR x y
637 XorOp -> trivialCode XOR x y
638 SllOp -> trivialCode SLL x y
639 SrlOp -> trivialCode SRL x y
641 ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
642 ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
643 ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"
645 FloatPowerOp -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
646 DoublePowerOp -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
648 {- ------------------------------------------------------------
649 Some bizarre special code for getting condition codes into
650 registers. Integer non-equality is a test for equality
651 followed by an XOR with 1. (Integer comparisons always set
652 the result register to 0 or 1.) Floating point comparisons of
653 any kind leave the result in a floating point register, so we
654 need to wrangle an integer register out of things.
656 int_NE_code :: StixTree -> StixTree -> NatM Register
659 = trivialCode (CMP EQQ) x y `thenNat` \ register ->
660 getNewRegNat IntRep `thenNat` \ tmp ->
662 code = registerCode register tmp
663 src = registerName register tmp
664 code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
666 return (Any IntRep code__2)
668 {- ------------------------------------------------------------
669 Comments for int_NE_code also apply to cmpF_code
672 :: (Reg -> Reg -> Reg -> Instr)
674 -> StixTree -> StixTree
677 cmpF_code instr cond x y
678 = trivialFCode pr instr x y `thenNat` \ register ->
679 getNewRegNat F64 `thenNat` \ tmp ->
680 getBlockIdNat `thenNat` \ lbl ->
682 code = registerCode register tmp
683 result = registerName register tmp
685 code__2 dst = code . mkSeqInstrs [
686 OR zeroh (RIImm (ImmInt 1)) dst,
687 BF cond result (ImmCLbl lbl),
688 OR zeroh (RIReg zeroh) dst,
691 return (Any IntRep code__2)
693 pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
694 ------------------------------------------------------------
696 getRegister (CmmLoad pk mem)
697 = getAmode mem `thenNat` \ amode ->
699 code = amodeCode amode
700 src = amodeAddr amode
701 size = primRepToSize pk
702 code__2 dst = code . mkSeqInstr (LD size dst src)
704 return (Any pk code__2)
706 getRegister (StInt i)
709 code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
711 return (Any IntRep code)
714 code dst = mkSeqInstr (LDI Q dst src)
716 return (Any IntRep code)
718 src = ImmInt (fromInteger i)
723 code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
725 return (Any PtrRep code)
728 imm__2 = case imm of Just x -> x
730 #endif /* alpha_TARGET_ARCH */
732 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
736 getRegister (CmmLit (CmmFloat f F32)) = do
737 lbl <- getNewLabelNat
738 dynRef <- cmmMakeDynamicReference addImportNat False lbl
739 Amode addr addr_code <- getAmode dynRef
743 CmmStaticLit (CmmFloat f F32)]
744 `consOL` (addr_code `snocOL`
747 return (Any F32 code)
750 getRegister (CmmLit (CmmFloat d F64))
752 = let code dst = unitOL (GLDZ dst)
753 in return (Any F64 code)
756 = let code dst = unitOL (GLD1 dst)
757 in return (Any F64 code)
760 lbl <- getNewLabelNat
761 dynRef <- cmmMakeDynamicReference addImportNat False lbl
762 Amode addr addr_code <- getAmode dynRef
766 CmmStaticLit (CmmFloat d F64)]
767 `consOL` (addr_code `snocOL`
770 return (Any F64 code)
772 #endif /* i386_TARGET_ARCH */
774 #if x86_64_TARGET_ARCH
776 getRegister (CmmLit (CmmFloat 0.0 rep)) = do
777 let code dst = unitOL (XOR rep (OpReg dst) (OpReg dst))
778 -- I don't know why there are xorpd, xorps, and pxor instructions.
779 -- They all appear to do the same thing --SDM
780 return (Any rep code)
782 getRegister (CmmLit (CmmFloat f rep)) = do
783 lbl <- getNewLabelNat
784 let code dst = toOL [
787 CmmStaticLit (CmmFloat f rep)],
788 MOV rep (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
791 return (Any rep code)
793 #endif /* x86_64_TARGET_ARCH */
795 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
797 -- catch simple cases of zero- or sign-extended load
798 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad addr _]) = do
799 code <- intLoadCode (MOVZxL I8) addr
800 return (Any I32 code)
802 getRegister (CmmMachOp (MO_S_Conv I8 I32) [CmmLoad addr _]) = do
803 code <- intLoadCode (MOVSxL I8) addr
804 return (Any I32 code)
806 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad addr _]) = do
807 code <- intLoadCode (MOVZxL I16) addr
808 return (Any I32 code)
810 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad addr _]) = do
811 code <- intLoadCode (MOVSxL I16) addr
812 return (Any I32 code)
816 #if x86_64_TARGET_ARCH
818 -- catch simple cases of zero- or sign-extended load
819 getRegister (CmmMachOp (MO_U_Conv I8 I64) [CmmLoad addr _]) = do
820 code <- intLoadCode (MOVZxL I8) addr
821 return (Any I64 code)
823 getRegister (CmmMachOp (MO_S_Conv I8 I64) [CmmLoad addr _]) = do
824 code <- intLoadCode (MOVSxL I8) addr
825 return (Any I64 code)
827 getRegister (CmmMachOp (MO_U_Conv I16 I64) [CmmLoad addr _]) = do
828 code <- intLoadCode (MOVZxL I16) addr
829 return (Any I64 code)
831 getRegister (CmmMachOp (MO_S_Conv I16 I64) [CmmLoad addr _]) = do
832 code <- intLoadCode (MOVSxL I16) addr
833 return (Any I64 code)
835 getRegister (CmmMachOp (MO_U_Conv I32 I64) [CmmLoad addr _]) = do
836 code <- intLoadCode (MOV I32) addr -- 32-bit loads zero-extend
837 return (Any I64 code)
839 getRegister (CmmMachOp (MO_S_Conv I32 I64) [CmmLoad addr _]) = do
840 code <- intLoadCode (MOVSxL I32) addr
841 return (Any I64 code)
845 #if x86_64_TARGET_ARCH
846 getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do
847 x_code <- getAnyReg x
848 lbl <- getNewLabelNat
850 code dst = x_code dst `appOL` toOL [
851 -- This is how gcc does it, so it can't be that bad:
852 LDATA ReadOnlyData16 [
855 CmmStaticLit (CmmInt 0x80000000 I32),
856 CmmStaticLit (CmmInt 0 I32),
857 CmmStaticLit (CmmInt 0 I32),
858 CmmStaticLit (CmmInt 0 I32)
860 XOR F32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
861 -- xorps, so we need the 128-bit constant
862 -- ToDo: rip-relative
865 return (Any F32 code)
867 getRegister (CmmMachOp (MO_S_Neg F64) [x]) = do
868 x_code <- getAnyReg x
869 lbl <- getNewLabelNat
871 -- This is how gcc does it, so it can't be that bad:
872 code dst = x_code dst `appOL` toOL [
873 LDATA ReadOnlyData16 [
876 CmmStaticLit (CmmInt 0x8000000000000000 I64),
877 CmmStaticLit (CmmInt 0 I64)
879 -- gcc puts an unpck here. Wonder if we need it.
880 XOR F64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
881 -- xorpd, so we need the 128-bit constant
884 return (Any F64 code)
887 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
889 getRegister (CmmMachOp mop [x]) -- unary MachOps
892 MO_S_Neg F32 -> trivialUFCode F32 (GNEG F32) x
893 MO_S_Neg F64 -> trivialUFCode F64 (GNEG F64) x
896 MO_S_Neg rep -> trivialUCode rep (NEGI rep) x
897 MO_Not rep -> trivialUCode rep (NOT rep) x
900 -- TODO: these are only nops if the arg is not a fixed register that
901 -- can't be byte-addressed.
902 MO_U_Conv I32 I8 -> conversionNop I32 x
903 MO_S_Conv I32 I8 -> conversionNop I32 x
904 MO_U_Conv I16 I8 -> conversionNop I16 x
905 MO_S_Conv I16 I8 -> conversionNop I16 x
906 MO_U_Conv I32 I16 -> conversionNop I32 x
907 MO_S_Conv I32 I16 -> conversionNop I32 x
908 #if x86_64_TARGET_ARCH
909 MO_U_Conv I64 I32 -> conversionNop I64 x
910 MO_S_Conv I64 I32 -> conversionNop I64 x
911 MO_U_Conv I64 I16 -> conversionNop I64 x
912 MO_S_Conv I64 I16 -> conversionNop I64 x
913 MO_U_Conv I64 I8 -> conversionNop I64 x
914 MO_S_Conv I64 I8 -> conversionNop I64 x
917 MO_U_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
918 MO_S_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
921 MO_U_Conv I8 I32 -> integerExtend I8 I32 MOVZxL x
922 MO_U_Conv I16 I32 -> integerExtend I16 I32 MOVZxL x
923 MO_U_Conv I8 I16 -> integerExtend I8 I16 MOVZxL x
925 MO_S_Conv I8 I32 -> integerExtend I8 I32 MOVSxL x
926 MO_S_Conv I16 I32 -> integerExtend I16 I32 MOVSxL x
927 MO_S_Conv I8 I16 -> integerExtend I8 I16 MOVSxL x
929 #if x86_64_TARGET_ARCH
930 MO_U_Conv I8 I64 -> integerExtend I8 I64 MOVZxL x
931 MO_U_Conv I16 I64 -> integerExtend I16 I64 MOVZxL x
932 MO_U_Conv I32 I64 -> integerExtend I32 I64 MOVZxL x
933 MO_S_Conv I8 I64 -> integerExtend I8 I64 MOVSxL x
934 MO_S_Conv I16 I64 -> integerExtend I16 I64 MOVSxL x
935 MO_S_Conv I32 I64 -> integerExtend I32 I64 MOVSxL x
936 -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
937 -- However, we don't want the register allocator to throw it
938 -- away as an unnecessary reg-to-reg move, so we keep it in
939 -- the form of a movzl and print it as a movl later.
943 MO_S_Conv F32 F64 -> conversionNop F64 x
944 MO_S_Conv F64 F32 -> conversionNop F32 x
946 MO_S_Conv F32 F64 -> coerceFP2FP F64 x
947 MO_S_Conv F64 F32 -> coerceFP2FP F32 x
951 | isFloatingRep from -> coerceFP2Int from to x
952 | isFloatingRep to -> coerceInt2FP from to x
954 other -> pprPanic "getRegister" (pprMachOp mop)
956 -- signed or unsigned extension.
957 integerExtend from to instr expr = do
958 (reg,e_code) <- if from == I8 then getByteReg expr
963 instr from (OpReg reg) (OpReg dst)
966 conversionNop new_rep expr
967 = do e_code <- getRegister expr
968 return (swizzleRegisterRep e_code new_rep)
971 getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
972 = ASSERT2(cmmExprRep x /= I8, pprExpr e)
974 MO_Eq F32 -> condFltReg EQQ x y
975 MO_Ne F32 -> condFltReg NE x y
976 MO_S_Gt F32 -> condFltReg GTT x y
977 MO_S_Ge F32 -> condFltReg GE x y
978 MO_S_Lt F32 -> condFltReg LTT x y
979 MO_S_Le F32 -> condFltReg LE x y
981 MO_Eq F64 -> condFltReg EQQ x y
982 MO_Ne F64 -> condFltReg NE x y
983 MO_S_Gt F64 -> condFltReg GTT x y
984 MO_S_Ge F64 -> condFltReg GE x y
985 MO_S_Lt F64 -> condFltReg LTT x y
986 MO_S_Le F64 -> condFltReg LE x y
988 MO_Eq rep -> condIntReg EQQ x y
989 MO_Ne rep -> condIntReg NE x y
991 MO_S_Gt rep -> condIntReg GTT x y
992 MO_S_Ge rep -> condIntReg GE x y
993 MO_S_Lt rep -> condIntReg LTT x y
994 MO_S_Le rep -> condIntReg LE x y
996 MO_U_Gt rep -> condIntReg GU x y
997 MO_U_Ge rep -> condIntReg GEU x y
998 MO_U_Lt rep -> condIntReg LU x y
999 MO_U_Le rep -> condIntReg LEU x y
1001 #if i386_TARGET_ARCH
1002 MO_Add F32 -> trivialFCode F32 GADD x y
1003 MO_Sub F32 -> trivialFCode F32 GSUB x y
1005 MO_Add F64 -> trivialFCode F64 GADD x y
1006 MO_Sub F64 -> trivialFCode F64 GSUB x y
1008 MO_S_Quot F32 -> trivialFCode F32 GDIV x y
1009 MO_S_Quot F64 -> trivialFCode F64 GDIV x y
1012 #if x86_64_TARGET_ARCH
1013 MO_Add F32 -> trivialFCode F32 ADD x y
1014 MO_Sub F32 -> trivialFCode F32 SUB x y
1016 MO_Add F64 -> trivialFCode F64 ADD x y
1017 MO_Sub F64 -> trivialFCode F64 SUB x y
1019 MO_S_Quot F32 -> trivialFCode F32 FDIV x y
1020 MO_S_Quot F64 -> trivialFCode F64 FDIV x y
1023 MO_Add rep -> add_code rep x y
1024 MO_Sub rep -> sub_code rep x y
1026 MO_S_Quot rep -> div_code rep True True x y
1027 MO_S_Rem rep -> div_code rep True False x y
1028 MO_U_Quot rep -> div_code rep False True x y
1029 MO_U_Rem rep -> div_code rep False False x y
1031 #if i386_TARGET_ARCH
1032 MO_Mul F32 -> trivialFCode F32 GMUL x y
1033 MO_Mul F64 -> trivialFCode F64 GMUL x y
1036 #if x86_64_TARGET_ARCH
1037 MO_Mul F32 -> trivialFCode F32 MUL x y
1038 MO_Mul F64 -> trivialFCode F64 MUL x y
1041 MO_Mul rep -> let op = IMUL rep in
1042 trivialCode rep op (Just op) x y
1044 MO_S_MulMayOflo rep -> imulMayOflo rep x y
1046 MO_And rep -> let op = AND rep in
1047 trivialCode rep op (Just op) x y
1048 MO_Or rep -> let op = OR rep in
1049 trivialCode rep op (Just op) x y
1050 MO_Xor rep -> let op = XOR rep in
1051 trivialCode rep op (Just op) x y
1053 {- Shift ops on x86s have constraints on their source, it
1054 either has to be Imm, CL or 1
1055 => trivialCode is not restrictive enough (sigh.)
1057 MO_Shl rep -> shift_code rep (SHL rep) x y {-False-}
1058 MO_U_Shr rep -> shift_code rep (SHR rep) x y {-False-}
1059 MO_S_Shr rep -> shift_code rep (SAR rep) x y {-False-}
1061 other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
1063 --------------------
1064 imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1065 imulMayOflo rep a b = do
1066 (a_reg, a_code) <- getNonClobberedReg a
1067 b_code <- getAnyReg b
1069 shift_amt = case rep of
1072 _ -> panic "shift_amt"
1074 code = a_code `appOL` b_code eax `appOL`
1076 IMUL2 rep (OpReg a_reg), -- result in %edx:%eax
1077 SAR rep (OpImm (ImmInt shift_amt)) (OpReg eax),
1078 -- sign extend lower part
1079 SUB rep (OpReg edx) (OpReg eax)
1080 -- compare against upper
1081 -- eax==0 if high part == sign extended low part
1084 return (Fixed rep eax code)
1086 --------------------
1087 shift_code :: MachRep
1088 -> (Operand -> Operand -> Instr)
1093 {- Case1: shift length as immediate -}
1094 shift_code rep instr x y@(CmmLit lit) = do
1095 x_code <- getAnyReg x
1098 = x_code dst `snocOL`
1099 instr (OpImm (litToImm lit)) (OpReg dst)
1101 return (Any rep code)
1103 {- Case2: shift length is complex (non-immediate) -}
1104 shift_code rep instr x y{-amount-} = do
1105 (x_reg, x_code) <- getNonClobberedReg x
1106 y_code <- getAnyReg y
1108 code = x_code `appOL`
1110 instr (OpReg ecx) (OpReg x_reg)
1112 return (Fixed rep x_reg code)
1114 --------------------
1115 add_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1116 add_code rep x (CmmLit (CmmInt y _))
1117 | not (is64BitInteger y) = add_int rep x y
1118 add_code rep x y = trivialCode rep (ADD rep) (Just (ADD rep)) x y
1120 --------------------
1121 sub_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1122 sub_code rep x (CmmLit (CmmInt y _))
1123 | not (is64BitInteger (-y)) = add_int rep x (-y)
1124 sub_code rep x y = trivialCode rep (SUB rep) Nothing x y
1126 -- our three-operand add instruction:
1127 add_int rep x y = do
1128 (x_reg, x_code) <- getSomeReg x
1130 imm = ImmInt (fromInteger y)
1134 (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
1137 return (Any rep code)
1139 ----------------------
1140 div_code rep signed quotient x y = do
1141 (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
1142 x_code <- getAnyReg x
1144 widen | signed = CLTD rep
1145 | otherwise = XOR rep (OpReg edx) (OpReg edx)
1147 instr | signed = IDIV
1150 code = y_code `appOL`
1152 toOL [widen, instr rep y_op]
1154 result | quotient = eax
1158 return (Fixed rep result code)
1161 getRegister (CmmLoad mem pk)
1164 Amode src mem_code <- getAmode mem
1166 code dst = mem_code `snocOL`
1167 IF_ARCH_i386(GLD pk src dst,
1168 MOV pk (OpAddr src) (OpReg dst))
1170 return (Any pk code)
1172 #if i386_TARGET_ARCH
1173 getRegister (CmmLoad mem pk)
1176 code <- intLoadCode (instr pk) mem
1177 return (Any pk code)
1179 instr I8 = MOVZxL pk
1182 -- we always zero-extend 8-bit loads, if we
1183 -- can't think of anything better. This is because
1184 -- we can't guarantee access to an 8-bit variant of every register
1185 -- (esi and edi don't have 8-bit variants), so to make things
1186 -- simpler we do our 8-bit arithmetic with full 32-bit registers.
1189 #if x86_64_TARGET_ARCH
1190 -- Simpler memory load code on x86_64
1191 getRegister (CmmLoad mem pk)
1193 code <- intLoadCode (MOV pk) mem
1194 return (Any pk code)
1197 getRegister (CmmLit (CmmInt 0 rep))
1199 -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
1200 adj_rep = case rep of I64 -> I32; _ -> rep
1201 rep1 = IF_ARCH_i386( rep, adj_rep )
1203 = unitOL (XOR rep1 (OpReg dst) (OpReg dst))
1205 return (Any rep code)
1207 #if x86_64_TARGET_ARCH
1208 -- optimisation for loading small literals on x86_64: take advantage
1209 -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
1210 -- instruction forms are shorter.
1211 getRegister (CmmLit lit)
1212 | I64 <- cmmLitRep lit, not (isBigLit lit)
1215 code dst = unitOL (MOV I32 (OpImm imm) (OpReg dst))
1217 return (Any I64 code)
1219 isBigLit (CmmInt i I64) = i < 0 || i > 0xffffffff
1221 -- note1: not the same as is64BitLit, because that checks for
1222 -- signed literals that fit in 32 bits, but we want unsigned
1224 -- note2: all labels are small, because we're assuming the
1225 -- small memory model (see gcc docs, -mcmodel=small).
1228 getRegister (CmmLit lit)
1232 code dst = unitOL (MOV rep (OpImm imm) (OpReg dst))
1234 return (Any rep code)
1236 getRegister other = pprPanic "getRegister(x86)" (ppr other)
1239 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
1240 -> NatM (Reg -> InstrBlock)
1241 intLoadCode instr mem = do
1242 Amode src mem_code <- getAmode mem
1243 return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
1245 -- Compute an expression into *any* register, adding the appropriate
1246 -- move instruction if necessary.
1247 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
1249 r <- getRegister expr
1252 anyReg :: Register -> NatM (Reg -> InstrBlock)
1253 anyReg (Any _ code) = return code
1254 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
1256 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
1257 -- Fixed registers might not be byte-addressable, so we make sure we've
1258 -- got a temporary, inserting an extra reg copy if necessary.
1259 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
1260 #if x86_64_TARGET_ARCH
1261 getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
1263 getByteReg expr = do
1264 r <- getRegister expr
1267 tmp <- getNewRegNat rep
1268 return (tmp, code tmp)
1270 | isVirtualReg reg -> return (reg,code)
1272 tmp <- getNewRegNat rep
1273 return (tmp, code `snocOL` reg2reg rep reg tmp)
1274 -- ToDo: could optimise slightly by checking for byte-addressable
1275 -- real registers, but that will happen very rarely if at all.
1278 -- Another variant: this time we want the result in a register that cannot
1279 -- be modified by code to evaluate an arbitrary expression.
1280 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1281 getNonClobberedReg expr = do
1282 r <- getRegister expr
1285 tmp <- getNewRegNat rep
1286 return (tmp, code tmp)
1288 -- only free regs can be clobbered
1289 | RealReg rr <- reg, isFastTrue (freeReg rr) -> do
1290 tmp <- getNewRegNat rep
1291 return (tmp, code `snocOL` reg2reg rep reg tmp)
1295 reg2reg :: MachRep -> Reg -> Reg -> Instr
1297 #if i386_TARGET_ARCH
1298 | isFloatingRep rep = GMOV src dst
1300 | otherwise = MOV rep (OpReg src) (OpReg dst)
1302 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1304 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1306 #if sparc_TARGET_ARCH
1308 getRegister (CmmLit (CmmFloat f F32)) = do
1309 lbl <- getNewLabelNat
1310 let code dst = toOL [
1313 CmmStaticLit (CmmFloat f F32)],
1314 SETHI (HI (ImmCLbl lbl)) dst,
1315 LD F32 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1316 return (Any F32 code)
1318 getRegister (CmmLit (CmmFloat d F64)) = do
1319 lbl <- getNewLabelNat
1320 let code dst = toOL [
1323 CmmStaticLit (CmmFloat d F64)],
1324 SETHI (HI (ImmCLbl lbl)) dst,
1325 LD F64 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1326 return (Any F64 code)
1328 getRegister (CmmMachOp mop [x]) -- unary MachOps
1330 MO_S_Neg F32 -> trivialUFCode F32 (FNEG F32) x
1331 MO_S_Neg F64 -> trivialUFCode F64 (FNEG F64) x
1333 MO_S_Neg rep -> trivialUCode rep (SUB False False g0) x
1334 MO_Not rep -> trivialUCode rep (XNOR False g0) x
1336 MO_U_Conv I32 I8 -> trivialCode I8 (AND False) x (CmmLit (CmmInt 255 I8))
1338 MO_U_Conv F64 F32-> coerceDbl2Flt x
1339 MO_U_Conv F32 F64-> coerceFlt2Dbl x
1341 MO_S_Conv F32 I32-> coerceFP2Int F32 I32 x
1342 MO_S_Conv I32 F32-> coerceInt2FP I32 F32 x
1343 MO_S_Conv F64 I32-> coerceFP2Int F64 I32 x
1344 MO_S_Conv I32 F64-> coerceInt2FP I32 F64 x
1346 -- Conversions which are a nop on sparc
1348 | from == to -> conversionNop to x
1349 MO_U_Conv I32 to -> conversionNop to x
1350 MO_S_Conv I32 to -> conversionNop to x
1353 MO_U_Conv I8 I32 -> integerExtend False I8 I32 x
1354 MO_U_Conv I16 I32 -> integerExtend False I16 I32 x
1355 MO_U_Conv I8 I16 -> integerExtend False I8 I16 x
1356 MO_S_Conv I16 I32 -> integerExtend True I16 I32 x
1358 other_op -> panic "Unknown unary mach op"
1361 integerExtend signed from to expr = do
1362 (reg, e_code) <- getSomeReg expr
1366 ((if signed then SRA else SRL)
1367 reg (RIImm (ImmInt 0)) dst)
1368 return (Any to code)
1369 conversionNop new_rep expr
1370 = do e_code <- getRegister expr
1371 return (swizzleRegisterRep e_code new_rep)
1373 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1375 MO_Eq F32 -> condFltReg EQQ x y
1376 MO_Ne F32 -> condFltReg NE x y
1378 MO_S_Gt F32 -> condFltReg GTT x y
1379 MO_S_Ge F32 -> condFltReg GE x y
1380 MO_S_Lt F32 -> condFltReg LTT x y
1381 MO_S_Le F32 -> condFltReg LE x y
1383 MO_Eq F64 -> condFltReg EQQ x y
1384 MO_Ne F64 -> condFltReg NE x y
1386 MO_S_Gt F64 -> condFltReg GTT x y
1387 MO_S_Ge F64 -> condFltReg GE x y
1388 MO_S_Lt F64 -> condFltReg LTT x y
1389 MO_S_Le F64 -> condFltReg LE x y
1391 MO_Eq rep -> condIntReg EQQ x y
1392 MO_Ne rep -> condIntReg NE x y
1394 MO_S_Gt rep -> condIntReg GTT x y
1395 MO_S_Ge rep -> condIntReg GE x y
1396 MO_S_Lt rep -> condIntReg LTT x y
1397 MO_S_Le rep -> condIntReg LE x y
1399 MO_U_Gt I32 -> condIntReg GTT x y
1400 MO_U_Ge I32 -> condIntReg GE x y
1401 MO_U_Lt I32 -> condIntReg LTT x y
1402 MO_U_Le I32 -> condIntReg LE x y
1404 MO_U_Gt I16 -> condIntReg GU x y
1405 MO_U_Ge I16 -> condIntReg GEU x y
1406 MO_U_Lt I16 -> condIntReg LU x y
1407 MO_U_Le I16 -> condIntReg LEU x y
1409 MO_Add I32 -> trivialCode I32 (ADD False False) x y
1410 MO_Sub I32 -> trivialCode I32 (SUB False False) x y
1412 MO_S_MulMayOflo rep -> imulMayOflo rep x y
1414 -- ToDo: teach about V8+ SPARC div instructions
1415 MO_S_Quot I32 -> idiv FSLIT(".div") x y
1416 MO_S_Rem I32 -> idiv FSLIT(".rem") x y
1417 MO_U_Quot I32 -> idiv FSLIT(".udiv") x y
1418 MO_U_Rem I32 -> idiv FSLIT(".urem") x y
1420 MO_Add F32 -> trivialFCode F32 FADD x y
1421 MO_Sub F32 -> trivialFCode F32 FSUB x y
1422 MO_Mul F32 -> trivialFCode F32 FMUL x y
1423 MO_S_Quot F32 -> trivialFCode F32 FDIV x y
1425 MO_Add F64 -> trivialFCode F64 FADD x y
1426 MO_Sub F64 -> trivialFCode F64 FSUB x y
1427 MO_Mul F64 -> trivialFCode F64 FMUL x y
1428 MO_S_Quot F64 -> trivialFCode F64 FDIV x y
1430 MO_And rep -> trivialCode rep (AND False) x y
1431 MO_Or rep -> trivialCode rep (OR False) x y
1432 MO_Xor rep -> trivialCode rep (XOR False) x y
1434 MO_Mul rep -> trivialCode rep (SMUL False) x y
1436 MO_Shl rep -> trivialCode rep SLL x y
1437 MO_U_Shr rep -> trivialCode rep SRL x y
1438 MO_S_Shr rep -> trivialCode rep SRA x y
1441 MO_F32_Pwr -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1442 [promote x, promote y])
1443 where promote x = CmmMachOp MO_F32_to_Dbl [x]
1444 MO_F64_Pwr -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1447 other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
1449 --idiv fn x y = getRegister (StCall (Left fn) CCallConv I32 [x, y])
1451 --------------------
1452 imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1453 imulMayOflo rep a b = do
1454 (a_reg, a_code) <- getSomeReg a
1455 (b_reg, b_code) <- getSomeReg b
1456 res_lo <- getNewRegNat I32
1457 res_hi <- getNewRegNat I32
1459 shift_amt = case rep of
1462 _ -> panic "shift_amt"
1463 code dst = a_code `appOL` b_code `appOL`
1465 SMUL False a_reg (RIReg b_reg) res_lo,
1467 SRA res_lo (RIImm (ImmInt shift_amt)) res_lo,
1468 SUB False False res_lo (RIReg res_hi) dst
1470 return (Any I32 code)
1472 getRegister (CmmLoad mem pk) = do
1473 Amode src code <- getAmode mem
1475 code__2 dst = code `snocOL` LD pk src dst
1476 return (Any pk code__2)
1478 getRegister (CmmLit (CmmInt i _))
1481 src = ImmInt (fromInteger i)
1482 code dst = unitOL (OR False g0 (RIImm src) dst)
1484 return (Any I32 code)
1486 getRegister (CmmLit lit)
1487 = let rep = cmmLitRep lit
1491 OR False dst (RIImm (LO imm)) dst]
1492 in return (Any I32 code)
1494 #endif /* sparc_TARGET_ARCH */
1496 #if powerpc_TARGET_ARCH
1497 getRegister (CmmLoad mem pk)
1500 Amode addr addr_code <- getAmode mem
1501 let code dst = ASSERT((regClass dst == RcDouble) == isFloatingRep pk)
1502 addr_code `snocOL` LD pk dst addr
1503 return (Any pk code)
1505 -- catch simple cases of zero- or sign-extended load
1506 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad mem _]) = do
1507 Amode addr addr_code <- getAmode mem
1508 return (Any I32 (\dst -> addr_code `snocOL` LD I8 dst addr))
1510 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
1512 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad mem _]) = do
1513 Amode addr addr_code <- getAmode mem
1514 return (Any I32 (\dst -> addr_code `snocOL` LD I16 dst addr))
1516 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad mem _]) = do
1517 Amode addr addr_code <- getAmode mem
1518 return (Any I32 (\dst -> addr_code `snocOL` LA I16 dst addr))
1520 getRegister (CmmMachOp mop [x]) -- unary MachOps
1522 MO_Not rep -> trivialUCode rep NOT x
1524 MO_S_Conv F64 F32 -> trivialUCode F32 FRSP x
1525 MO_S_Conv F32 F64 -> conversionNop F64 x
1528 | from == to -> conversionNop to x
1529 | isFloatingRep from -> coerceFP2Int from to x
1530 | isFloatingRep to -> coerceInt2FP from to x
1532 -- narrowing is a nop: we treat the high bits as undefined
1533 MO_S_Conv I32 to -> conversionNop to x
1534 MO_S_Conv I16 I8 -> conversionNop I8 x
1535 MO_S_Conv I8 to -> trivialUCode to (EXTS I8) x
1536 MO_S_Conv I16 to -> trivialUCode to (EXTS I16) x
1539 | from == to -> conversionNop to x
1540 -- narrowing is a nop: we treat the high bits as undefined
1541 MO_U_Conv I32 to -> conversionNop to x
1542 MO_U_Conv I16 I8 -> conversionNop I8 x
1543 MO_U_Conv I8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 I32))
1544 MO_U_Conv I16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 I32))
1546 MO_S_Neg F32 -> trivialUCode F32 FNEG x
1547 MO_S_Neg F64 -> trivialUCode F64 FNEG x
1548 MO_S_Neg rep -> trivialUCode rep NEG x
1551 conversionNop new_rep expr
1552 = do e_code <- getRegister expr
1553 return (swizzleRegisterRep e_code new_rep)
1555 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1557 MO_Eq F32 -> condFltReg EQQ x y
1558 MO_Ne F32 -> condFltReg NE x y
1560 MO_S_Gt F32 -> condFltReg GTT x y
1561 MO_S_Ge F32 -> condFltReg GE x y
1562 MO_S_Lt F32 -> condFltReg LTT x y
1563 MO_S_Le F32 -> condFltReg LE x y
1565 MO_Eq F64 -> condFltReg EQQ x y
1566 MO_Ne F64 -> condFltReg NE x y
1568 MO_S_Gt F64 -> condFltReg GTT x y
1569 MO_S_Ge F64 -> condFltReg GE x y
1570 MO_S_Lt F64 -> condFltReg LTT x y
1571 MO_S_Le F64 -> condFltReg LE x y
1573 MO_Eq rep -> condIntReg EQQ (extendUExpr rep x) (extendUExpr rep y)
1574 MO_Ne rep -> condIntReg NE (extendUExpr rep x) (extendUExpr rep y)
1576 MO_S_Gt rep -> condIntReg GTT (extendSExpr rep x) (extendSExpr rep y)
1577 MO_S_Ge rep -> condIntReg GE (extendSExpr rep x) (extendSExpr rep y)
1578 MO_S_Lt rep -> condIntReg LTT (extendSExpr rep x) (extendSExpr rep y)
1579 MO_S_Le rep -> condIntReg LE (extendSExpr rep x) (extendSExpr rep y)
1581 MO_U_Gt rep -> condIntReg GU (extendUExpr rep x) (extendUExpr rep y)
1582 MO_U_Ge rep -> condIntReg GEU (extendUExpr rep x) (extendUExpr rep y)
1583 MO_U_Lt rep -> condIntReg LU (extendUExpr rep x) (extendUExpr rep y)
1584 MO_U_Le rep -> condIntReg LEU (extendUExpr rep x) (extendUExpr rep y)
1586 MO_Add F32 -> trivialCodeNoImm F32 (FADD F32) x y
1587 MO_Sub F32 -> trivialCodeNoImm F32 (FSUB F32) x y
1588 MO_Mul F32 -> trivialCodeNoImm F32 (FMUL F32) x y
1589 MO_S_Quot F32 -> trivialCodeNoImm F32 (FDIV F32) x y
1591 MO_Add F64 -> trivialCodeNoImm F64 (FADD F64) x y
1592 MO_Sub F64 -> trivialCodeNoImm F64 (FSUB F64) x y
1593 MO_Mul F64 -> trivialCodeNoImm F64 (FMUL F64) x y
1594 MO_S_Quot F64 -> trivialCodeNoImm F64 (FDIV F64) x y
1596 -- optimize addition with 32-bit immediate
1600 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate I32 True (-imm)
1601 -> trivialCode I32 True ADD x (CmmLit $ CmmInt imm immrep)
1604 (src, srcCode) <- getSomeReg x
1605 let imm = litToImm lit
1606 code dst = srcCode `appOL` toOL [
1607 ADDIS dst src (HA imm),
1608 ADD dst dst (RIImm (LO imm))
1610 return (Any I32 code)
1611 _ -> trivialCode I32 True ADD x y
1613 MO_Add rep -> trivialCode rep True ADD x y
1615 case y of -- subfi ('substract from' with immediate) doesn't exist
1616 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
1617 -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
1618 _ -> trivialCodeNoImm rep SUBF y x
1620 MO_Mul rep -> trivialCode rep True MULLW x y
1622 MO_S_MulMayOflo I32 -> trivialCodeNoImm I32 MULLW_MayOflo x y
1624 MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= I32): not implemented"
1625 MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
1627 MO_S_Quot rep -> trivialCodeNoImm rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1628 MO_U_Quot rep -> trivialCodeNoImm rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1630 MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1631 MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1633 MO_And rep -> trivialCode rep False AND x y
1634 MO_Or rep -> trivialCode rep False OR x y
1635 MO_Xor rep -> trivialCode rep False XOR x y
1637 MO_Shl rep -> trivialCode rep False SLW x y
1638 MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
1639 MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
1641 getRegister (CmmLit (CmmInt i rep))
1642 | Just imm <- makeImmediate rep True i
1644 code dst = unitOL (LI dst imm)
1646 return (Any rep code)
1648 getRegister (CmmLit (CmmFloat f frep)) = do
1649 lbl <- getNewLabelNat
1650 dynRef <- cmmMakeDynamicReference addImportNat False lbl
1651 Amode addr addr_code <- getAmode dynRef
1653 LDATA ReadOnlyData [CmmDataLabel lbl,
1654 CmmStaticLit (CmmFloat f frep)]
1655 `consOL` (addr_code `snocOL` LD frep dst addr)
1656 return (Any frep code)
1658 getRegister (CmmLit lit)
1659 = let rep = cmmLitRep lit
1663 OR dst dst (RIImm (LO imm))
1665 in return (Any rep code)
1667 getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
1669 -- extend?Rep: wrap integer expression of type rep
1670 -- in a conversion to I32
1671 extendSExpr I32 x = x
1672 extendSExpr rep x = CmmMachOp (MO_S_Conv rep I32) [x]
1673 extendUExpr I32 x = x
1674 extendUExpr rep x = CmmMachOp (MO_U_Conv rep I32) [x]
1676 #endif /* powerpc_TARGET_ARCH */
1679 -- -----------------------------------------------------------------------------
1680 -- The 'Amode' type: Memory addressing modes passed up the tree.
1682 data Amode = Amode AddrMode InstrBlock
1685 Now, given a tree (the argument to an CmmLoad) that references memory,
1686 produce a suitable addressing mode.
1688 A Rule of the Game (tm) for Amodes: use of the addr bit must
1689 immediately follow use of the code part, since the code part puts
1690 values in registers which the addr then refers to. So you can't put
1691 anything in between, lest it overwrite some of those registers. If
1692 you need to do some other computation between the code part and use of
1693 the addr bit, first store the effective address from the amode in a
1694 temporary, then do the other computation, and then use the temporary:
1698 ... other computation ...
1702 getAmode :: CmmExpr -> NatM Amode
1703 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
1705 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1707 #if alpha_TARGET_ARCH
1709 getAmode (StPrim IntSubOp [x, StInt i])
1710 = getNewRegNat PtrRep `thenNat` \ tmp ->
1711 getRegister x `thenNat` \ register ->
1713 code = registerCode register tmp
1714 reg = registerName register tmp
1715 off = ImmInt (-(fromInteger i))
1717 return (Amode (AddrRegImm reg off) code)
1719 getAmode (StPrim IntAddOp [x, StInt i])
1720 = getNewRegNat PtrRep `thenNat` \ tmp ->
1721 getRegister x `thenNat` \ register ->
1723 code = registerCode register tmp
1724 reg = registerName register tmp
1725 off = ImmInt (fromInteger i)
1727 return (Amode (AddrRegImm reg off) code)
1731 = return (Amode (AddrImm imm__2) id)
1734 imm__2 = case imm of Just x -> x
1737 = getNewRegNat PtrRep `thenNat` \ tmp ->
1738 getRegister other `thenNat` \ register ->
1740 code = registerCode register tmp
1741 reg = registerName register tmp
1743 return (Amode (AddrReg reg) code)
1745 #endif /* alpha_TARGET_ARCH */
1747 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1749 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1751 -- This is all just ridiculous, since it carefully undoes
1752 -- what mangleIndexTree has just done.
1753 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
1754 | not (is64BitLit lit)
1755 -- ASSERT(rep == I32)???
1756 = do (x_reg, x_code) <- getSomeReg x
1757 let off = ImmInt (-(fromInteger i))
1758 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1760 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
1761 | not (is64BitLit lit)
1762 -- ASSERT(rep == I32)???
1763 = do (x_reg, x_code) <- getSomeReg x
1764 let off = ImmInt (fromInteger i)
1765 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1767 -- Turn (lit1 << n + lit2) into (lit2 + lit1 << n) so it will be
1768 -- recognised by the next rule.
1769 getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1771 = getAmode (CmmMachOp (MO_Add rep) [b,a])
1773 getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
1774 [y, CmmLit (CmmInt shift _)]])
1775 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1776 = do (x_reg, x_code) <- getNonClobberedReg x
1777 -- x must be in a temp, because it has to stay live over y_code
1778 -- we could compre x_reg and y_reg and do something better here...
1779 (y_reg, y_code) <- getSomeReg y
1781 code = x_code `appOL` y_code
1782 base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
1783 return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt 0))
1786 getAmode (CmmLit lit) | not (is64BitLit lit)
1787 = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1790 (reg,code) <- getSomeReg expr
1791 return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
1793 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1795 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1797 #if sparc_TARGET_ARCH
1799 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)])
1802 (reg, code) <- getSomeReg x
1804 off = ImmInt (-(fromInteger i))
1805 return (Amode (AddrRegImm reg off) code)
1808 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)])
1811 (reg, code) <- getSomeReg x
1813 off = ImmInt (fromInteger i)
1814 return (Amode (AddrRegImm reg off) code)
1816 getAmode (CmmMachOp (MO_Add rep) [x, y])
1818 (regX, codeX) <- getSomeReg x
1819 (regY, codeY) <- getSomeReg y
1821 code = codeX `appOL` codeY
1822 return (Amode (AddrRegReg regX regY) code)
1824 -- XXX Is this same as "leaf" in Stix?
1825 getAmode (CmmLit lit)
1827 tmp <- getNewRegNat I32
1829 code = unitOL (SETHI (HI imm__2) tmp)
1830 return (Amode (AddrRegImm tmp (LO imm__2)) code)
1832 imm__2 = litToImm lit
1836 (reg, code) <- getSomeReg other
1839 return (Amode (AddrRegImm reg off) code)
1841 #endif /* sparc_TARGET_ARCH */
1843 #ifdef powerpc_TARGET_ARCH
1844 getAmode (CmmMachOp (MO_Sub I32) [x, CmmLit (CmmInt i _)])
1845 | Just off <- makeImmediate I32 True (-i)
1847 (reg, code) <- getSomeReg x
1848 return (Amode (AddrRegImm reg off) code)
1851 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit (CmmInt i _)])
1852 | Just off <- makeImmediate I32 True i
1854 (reg, code) <- getSomeReg x
1855 return (Amode (AddrRegImm reg off) code)
1857 -- optimize addition with 32-bit immediate
1859 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit lit])
1861 tmp <- getNewRegNat I32
1862 (src, srcCode) <- getSomeReg x
1863 let imm = litToImm lit
1864 code = srcCode `snocOL` ADDIS tmp src (HA imm)
1865 return (Amode (AddrRegImm tmp (LO imm)) code)
1867 getAmode (CmmLit lit)
1869 tmp <- getNewRegNat I32
1870 let imm = litToImm lit
1871 code = unitOL (LIS tmp (HA imm))
1872 return (Amode (AddrRegImm tmp (LO imm)) code)
1874 getAmode (CmmMachOp (MO_Add I32) [x, y])
1876 (regX, codeX) <- getSomeReg x
1877 (regY, codeY) <- getSomeReg y
1878 return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
1882 (reg, code) <- getSomeReg other
1885 return (Amode (AddrRegImm reg off) code)
1886 #endif /* powerpc_TARGET_ARCH */
1888 -- -----------------------------------------------------------------------------
1889 -- getOperand: sometimes any operand will do.
1891 -- getNonClobberedOperand: the value of the operand will remain valid across
1892 -- the computation of an arbitrary expression, unless the expression
1893 -- is computed directly into a register which the operand refers to
1894 -- (see trivialCode where this function is used for an example).
1896 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1898 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1899 #if x86_64_TARGET_ARCH
1900 getNonClobberedOperand (CmmLit lit)
1901 | isSuitableFloatingPointLit lit = do
1902 lbl <- getNewLabelNat
1903 let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
1905 return (OpAddr (ripRel (ImmCLbl lbl)), code)
1907 getNonClobberedOperand (CmmLit lit)
1908 | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) =
1909 return (OpImm (litToImm lit), nilOL)
1910 getNonClobberedOperand (CmmLoad mem pk)
1911 | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
1912 Amode src mem_code <- getAmode mem
1914 if (amodeCouldBeClobbered src)
1916 tmp <- getNewRegNat wordRep
1917 return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
1918 unitOL (LEA I32 (OpAddr src) (OpReg tmp)))
1921 return (OpAddr src', save_code `appOL` mem_code)
1922 getNonClobberedOperand e = do
1923 (reg, code) <- getNonClobberedReg e
1924 return (OpReg reg, code)
1926 amodeCouldBeClobbered :: AddrMode -> Bool
1927 amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
1929 regClobbered (RealReg rr) = isFastTrue (freeReg rr)
1930 regClobbered _ = False
1932 -- getOperand: the operand is not required to remain valid across the
1933 -- computation of an arbitrary expression.
1934 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1935 #if x86_64_TARGET_ARCH
1936 getOperand (CmmLit lit)
1937 | isSuitableFloatingPointLit lit = do
1938 lbl <- getNewLabelNat
1939 let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
1941 return (OpAddr (ripRel (ImmCLbl lbl)), code)
1943 getOperand (CmmLit lit)
1944 | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) = do
1945 return (OpImm (litToImm lit), nilOL)
1946 getOperand (CmmLoad mem pk)
1947 | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
1948 Amode src mem_code <- getAmode mem
1949 return (OpAddr src, mem_code)
1951 (reg, code) <- getSomeReg e
1952 return (OpReg reg, code)
1954 isOperand :: CmmExpr -> Bool
1955 isOperand (CmmLoad _ _) = True
1956 isOperand (CmmLit lit) = not (is64BitLit lit)
1957 || isSuitableFloatingPointLit lit
1960 -- if we want a floating-point literal as an operand, we can
1961 -- use it directly from memory. However, if the literal is
1962 -- zero, we're better off generating it into a register using
1964 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
1965 isSuitableFloatingPointLit _ = False
1967 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
1968 getRegOrMem (CmmLoad mem pk)
1969 | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
1970 Amode src mem_code <- getAmode mem
1971 return (OpAddr src, mem_code)
1973 (reg, code) <- getNonClobberedReg e
1974 return (OpReg reg, code)
1976 #if x86_64_TARGET_ARCH
1977 is64BitLit (CmmInt i I64) = is64BitInteger i
1978 -- assume that labels are in the range 0-2^31-1: this assumes the
1979 -- small memory model (see gcc docs, -mcmodel=small).
1981 is64BitLit x = False
1984 is64BitInteger :: Integer -> Bool
1985 is64BitInteger i = i > 0x7fffffff || i < -0x80000000
1987 -- -----------------------------------------------------------------------------
1988 -- The 'CondCode' type: Condition codes passed up the tree.
1990 data CondCode = CondCode Bool Cond InstrBlock
1992 -- Set up a condition code for a conditional branch.
1994 getCondCode :: CmmExpr -> NatM CondCode
1996 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1998 #if alpha_TARGET_ARCH
1999 getCondCode = panic "MachCode.getCondCode: not on Alphas"
2000 #endif /* alpha_TARGET_ARCH */
2002 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2004 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
2005 -- yes, they really do seem to want exactly the same!
2007 getCondCode (CmmMachOp mop [x, y])
2008 = ASSERT (cmmExprRep x /= I8) -- tmp, not set up to handle 8-bit comparisons
2010 MO_Eq F32 -> condFltCode EQQ x y
2011 MO_Ne F32 -> condFltCode NE x y
2013 MO_S_Gt F32 -> condFltCode GTT x y
2014 MO_S_Ge F32 -> condFltCode GE x y
2015 MO_S_Lt F32 -> condFltCode LTT x y
2016 MO_S_Le F32 -> condFltCode LE x y
2018 MO_Eq F64 -> condFltCode EQQ x y
2019 MO_Ne F64 -> condFltCode NE x y
2021 MO_S_Gt F64 -> condFltCode GTT x y
2022 MO_S_Ge F64 -> condFltCode GE x y
2023 MO_S_Lt F64 -> condFltCode LTT x y
2024 MO_S_Le F64 -> condFltCode LE x y
2026 MO_Eq rep -> condIntCode EQQ x y
2027 MO_Ne rep -> condIntCode NE x y
2029 MO_S_Gt rep -> condIntCode GTT x y
2030 MO_S_Ge rep -> condIntCode GE x y
2031 MO_S_Lt rep -> condIntCode LTT x y
2032 MO_S_Le rep -> condIntCode LE x y
2034 MO_U_Gt rep -> condIntCode GU x y
2035 MO_U_Ge rep -> condIntCode GEU x y
2036 MO_U_Lt rep -> condIntCode LU x y
2037 MO_U_Le rep -> condIntCode LEU x y
2039 other -> pprPanic "getCondCode(x86,sparc)" (pprMachOp mop)
2041 getCondCode other = pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
2043 #elif powerpc_TARGET_ARCH
2045 -- almost the same as everywhere else - but we need to
2046 -- extend small integers to 32 bit first
2048 getCondCode (CmmMachOp mop [x, y])
2050 MO_Eq F32 -> condFltCode EQQ x y
2051 MO_Ne F32 -> condFltCode NE x y
2053 MO_S_Gt F32 -> condFltCode GTT x y
2054 MO_S_Ge F32 -> condFltCode GE x y
2055 MO_S_Lt F32 -> condFltCode LTT x y
2056 MO_S_Le F32 -> condFltCode LE x y
2058 MO_Eq F64 -> condFltCode EQQ x y
2059 MO_Ne F64 -> condFltCode NE x y
2061 MO_S_Gt F64 -> condFltCode GTT x y
2062 MO_S_Ge F64 -> condFltCode GE x y
2063 MO_S_Lt F64 -> condFltCode LTT x y
2064 MO_S_Le F64 -> condFltCode LE x y
2066 MO_Eq rep -> condIntCode EQQ (extendUExpr rep x) (extendUExpr rep y)
2067 MO_Ne rep -> condIntCode NE (extendUExpr rep x) (extendUExpr rep y)
2069 MO_S_Gt rep -> condIntCode GTT (extendSExpr rep x) (extendSExpr rep y)
2070 MO_S_Ge rep -> condIntCode GE (extendSExpr rep x) (extendSExpr rep y)
2071 MO_S_Lt rep -> condIntCode LTT (extendSExpr rep x) (extendSExpr rep y)
2072 MO_S_Le rep -> condIntCode LE (extendSExpr rep x) (extendSExpr rep y)
2074 MO_U_Gt rep -> condIntCode GU (extendUExpr rep x) (extendUExpr rep y)
2075 MO_U_Ge rep -> condIntCode GEU (extendUExpr rep x) (extendUExpr rep y)
2076 MO_U_Lt rep -> condIntCode LU (extendUExpr rep x) (extendUExpr rep y)
2077 MO_U_Le rep -> condIntCode LEU (extendUExpr rep x) (extendUExpr rep y)
2079 other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
2081 getCondCode other = panic "getCondCode(2)(powerpc)"
2087 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
2088 -- passed back up the tree.
2090 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
2092 #if alpha_TARGET_ARCH
2093 condIntCode = panic "MachCode.condIntCode: not on Alphas"
2094 condFltCode = panic "MachCode.condFltCode: not on Alphas"
2095 #endif /* alpha_TARGET_ARCH */
2097 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2098 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2100 -- memory vs immediate
2101 condIntCode cond (CmmLoad x pk) (CmmLit lit) | not (is64BitLit lit) = do
2102 Amode x_addr x_code <- getAmode x
2105 code = x_code `snocOL`
2106 CMP pk (OpImm imm) (OpAddr x_addr)
2108 return (CondCode False cond code)
2111 condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
2112 (x_reg, x_code) <- getSomeReg x
2114 code = x_code `snocOL`
2115 TEST pk (OpReg x_reg) (OpReg x_reg)
2117 return (CondCode False cond code)
2119 -- anything vs operand
2120 condIntCode cond x y | isOperand y = do
2121 (x_reg, x_code) <- getNonClobberedReg x
2122 (y_op, y_code) <- getOperand y
2124 code = x_code `appOL` y_code `snocOL`
2125 CMP (cmmExprRep x) y_op (OpReg x_reg)
2127 return (CondCode False cond code)
2129 -- anything vs anything
2130 condIntCode cond x y = do
2131 (y_reg, y_code) <- getNonClobberedReg y
2132 (x_op, x_code) <- getRegOrMem x
2134 code = y_code `appOL`
2136 CMP (cmmExprRep x) (OpReg y_reg) x_op
2138 return (CondCode False cond code)
2141 #if i386_TARGET_ARCH
2142 condFltCode cond x y
2143 = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
2144 (x_reg, x_code) <- getNonClobberedReg x
2145 (y_reg, y_code) <- getSomeReg y
2147 code = x_code `appOL` y_code `snocOL`
2148 GCMP cond x_reg y_reg
2149 -- The GCMP insn does the test and sets the zero flag if comparable
2150 -- and true. Hence we always supply EQQ as the condition to test.
2151 return (CondCode True EQQ code)
2152 #endif /* i386_TARGET_ARCH */
2154 #if x86_64_TARGET_ARCH
2155 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
2156 -- an operand, but the right must be a reg. We can probably do better
2157 -- than this general case...
2158 condFltCode cond x y = do
2159 (x_reg, x_code) <- getNonClobberedReg x
2160 (y_op, y_code) <- getOperand y
2162 code = x_code `appOL`
2164 CMP (cmmExprRep x) y_op (OpReg x_reg)
2165 -- NB(1): we need to use the unsigned comparison operators on the
2166 -- result of this comparison.
2168 return (CondCode True (condToUnsigned cond) code)
2171 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2173 #if sparc_TARGET_ARCH
2175 condIntCode cond x (CmmLit (CmmInt y rep))
2178 (src1, code) <- getSomeReg x
2180 src2 = ImmInt (fromInteger y)
2181 code' = code `snocOL` SUB False True src1 (RIImm src2) g0
2182 return (CondCode False cond code')
2184 condIntCode cond x y = do
2185 (src1, code1) <- getSomeReg x
2186 (src2, code2) <- getSomeReg y
2188 code__2 = code1 `appOL` code2 `snocOL`
2189 SUB False True src1 (RIReg src2) g0
2190 return (CondCode False cond code__2)
2193 condFltCode cond x y = do
2194 (src1, code1) <- getSomeReg x
2195 (src2, code2) <- getSomeReg y
2196 tmp <- getNewRegNat F64
2198 promote x = FxTOy F32 F64 x tmp
2205 code1 `appOL` code2 `snocOL`
2206 FCMP True pk1 src1 src2
2207 else if pk1 == F32 then
2208 code1 `snocOL` promote src1 `appOL` code2 `snocOL`
2209 FCMP True F64 tmp src2
2211 code1 `appOL` code2 `snocOL` promote src2 `snocOL`
2212 FCMP True F64 src1 tmp
2213 return (CondCode True cond code__2)
2215 #endif /* sparc_TARGET_ARCH */
2217 #if powerpc_TARGET_ARCH
2218 -- ###FIXME: I16 and I8!
2219 condIntCode cond x (CmmLit (CmmInt y rep))
2220 | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
2222 (src1, code) <- getSomeReg x
2224 code' = code `snocOL`
2225 (if condUnsigned cond then CMPL else CMP) I32 src1 (RIImm src2)
2226 return (CondCode False cond code')
2228 condIntCode cond x y = do
2229 (src1, code1) <- getSomeReg x
2230 (src2, code2) <- getSomeReg y
2232 code' = code1 `appOL` code2 `snocOL`
2233 (if condUnsigned cond then CMPL else CMP) I32 src1 (RIReg src2)
2234 return (CondCode False cond code')
2236 condFltCode cond x y = do
2237 (src1, code1) <- getSomeReg x
2238 (src2, code2) <- getSomeReg y
2240 code' = code1 `appOL` code2 `snocOL` FCMP src1 src2
2241 code'' = case cond of -- twiddle CR to handle unordered case
2242 GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
2243 LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
2246 ltbit = 0 ; eqbit = 2 ; gtbit = 1
2247 return (CondCode True cond code'')
2249 #endif /* powerpc_TARGET_ARCH */
2251 -- -----------------------------------------------------------------------------
2252 -- Generating assignments
2254 -- Assignments are really at the heart of the whole code generation
2255 -- business. Almost all top-level nodes of any real importance are
2256 -- assignments, which correspond to loads, stores, or register
2257 -- transfers. If we're really lucky, some of the register transfers
2258 -- will go away, because we can use the destination register to
2259 -- complete the code generation for the right hand side. This only
2260 -- fails when the right hand side is forced into a fixed register
2261 -- (e.g. the result of a call).
2263 assignMem_IntCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2264 assignReg_IntCode :: MachRep -> CmmReg -> CmmExpr -> NatM InstrBlock
2266 assignMem_FltCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2267 assignReg_FltCode :: MachRep -> CmmReg -> CmmExpr -> NatM InstrBlock
2269 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2271 #if alpha_TARGET_ARCH
2273 assignIntCode pk (CmmLoad dst _) src
2274 = getNewRegNat IntRep `thenNat` \ tmp ->
2275 getAmode dst `thenNat` \ amode ->
2276 getRegister src `thenNat` \ register ->
2278 code1 = amodeCode amode []
2279 dst__2 = amodeAddr amode
2280 code2 = registerCode register tmp []
2281 src__2 = registerName register tmp
2282 sz = primRepToSize pk
2283 code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2287 assignIntCode pk dst src
2288 = getRegister dst `thenNat` \ register1 ->
2289 getRegister src `thenNat` \ register2 ->
2291 dst__2 = registerName register1 zeroh
2292 code = registerCode register2 dst__2
2293 src__2 = registerName register2 dst__2
2294 code__2 = if isFixed register2
2295 then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2)
2300 #endif /* alpha_TARGET_ARCH */
2302 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2304 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2306 -- integer assignment to memory
2307 assignMem_IntCode pk addr src = do
2308 Amode addr code_addr <- getAmode addr
2309 (code_src, op_src) <- get_op_RI src
2311 code = code_src `appOL`
2313 MOV pk op_src (OpAddr addr)
2314 -- NOTE: op_src is stable, so it will still be valid
2315 -- after code_addr. This may involve the introduction
2316 -- of an extra MOV to a temporary register, but we hope
2317 -- the register allocator will get rid of it.
2321 get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand) -- code, operator
2322 get_op_RI (CmmLit lit) | not (is64BitLit lit)
2323 = return (nilOL, OpImm (litToImm lit))
2325 = do (reg,code) <- getNonClobberedReg op
2326 return (code, OpReg reg)
2329 -- Assign; dst is a reg, rhs is mem
2330 assignReg_IntCode pk reg (CmmLoad src _) = do
2331 load_code <- intLoadCode (MOV pk) src
2332 return (load_code (getRegisterReg reg))
2334 -- dst is a reg, but src could be anything
2335 assignReg_IntCode pk reg src = do
2336 code <- getAnyReg src
2337 return (code (getRegisterReg reg))
2339 #endif /* i386_TARGET_ARCH */
2341 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2343 #if sparc_TARGET_ARCH
2345 assignMem_IntCode pk addr src = do
2346 (srcReg, code) <- getSomeReg src
2347 Amode dstAddr addr_code <- getAmode addr
2348 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2350 assignReg_IntCode pk reg src = do
2351 r <- getRegister src
2353 Any _ code -> code dst
2354 Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg dst) freg
2356 dst = getRegisterReg reg
2359 #endif /* sparc_TARGET_ARCH */
2361 #if powerpc_TARGET_ARCH
2363 assignMem_IntCode pk addr src = do
2364 (srcReg, code) <- getSomeReg src
2365 Amode dstAddr addr_code <- getAmode addr
2366 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2368 -- dst is a reg, but src could be anything
2369 assignReg_IntCode pk reg src
2371 r <- getRegister src
2373 Any _ code -> code dst
2374 Fixed _ freg fcode -> fcode `snocOL` MR dst freg
2376 dst = getRegisterReg reg
2378 #endif /* powerpc_TARGET_ARCH */
2381 -- -----------------------------------------------------------------------------
2382 -- Floating-point assignments
2384 #if alpha_TARGET_ARCH
2386 assignFltCode pk (CmmLoad dst _) src
2387 = getNewRegNat pk `thenNat` \ tmp ->
2388 getAmode dst `thenNat` \ amode ->
2389 getRegister src `thenNat` \ register ->
2391 code1 = amodeCode amode []
2392 dst__2 = amodeAddr amode
2393 code2 = registerCode register tmp []
2394 src__2 = registerName register tmp
2395 sz = primRepToSize pk
2396 code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2400 assignFltCode pk dst src
2401 = getRegister dst `thenNat` \ register1 ->
2402 getRegister src `thenNat` \ register2 ->
2404 dst__2 = registerName register1 zeroh
2405 code = registerCode register2 dst__2
2406 src__2 = registerName register2 dst__2
2407 code__2 = if isFixed register2
2408 then code . mkSeqInstr (FMOV src__2 dst__2)
2413 #endif /* alpha_TARGET_ARCH */
2415 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2417 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2419 -- Floating point assignment to memory
2420 assignMem_FltCode pk addr src = do
2421 (src_reg, src_code) <- getNonClobberedReg src
2422 Amode addr addr_code <- getAmode addr
2424 code = src_code `appOL`
2426 IF_ARCH_i386(GST pk src_reg addr,
2427 MOV pk (OpReg src_reg) (OpAddr addr))
2430 -- Floating point assignment to a register/temporary
2431 assignReg_FltCode pk reg src = do
2432 src_code <- getAnyReg src
2433 return (src_code (getRegisterReg reg))
2435 #endif /* i386_TARGET_ARCH */
2437 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2439 #if sparc_TARGET_ARCH
2441 -- Floating point assignment to memory
2442 assignMem_FltCode pk addr src = do
2443 Amode dst__2 code1 <- getAmode addr
2444 (src__2, code2) <- getSomeReg src
2445 tmp1 <- getNewRegNat pk
2447 pk__2 = cmmExprRep src
2448 code__2 = code1 `appOL` code2 `appOL`
2450 then unitOL (ST pk src__2 dst__2)
2451 else toOL [FxTOy pk__2 pk src__2 tmp1, ST pk tmp1 dst__2]
2454 -- Floating point assignment to a register/temporary
2455 -- ToDo: Verify correctness
2456 assignReg_FltCode pk reg src = do
2457 r <- getRegister src
2458 v1 <- getNewRegNat pk
2460 Any _ code -> code dst
2461 Fixed _ freg fcode -> fcode `snocOL` FMOV pk freg v1
2463 dst = getRegisterReg reg
2465 #endif /* sparc_TARGET_ARCH */
2467 #if powerpc_TARGET_ARCH
2470 assignMem_FltCode = assignMem_IntCode
2471 assignReg_FltCode = assignReg_IntCode
2473 #endif /* powerpc_TARGET_ARCH */
2476 -- -----------------------------------------------------------------------------
2477 -- Generating an non-local jump
2479 -- (If applicable) Do not fill the delay slots here; you will confuse the
2480 -- register allocator.
2482 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
2484 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2486 #if alpha_TARGET_ARCH
2488 genJump (CmmLabel lbl)
2489 | isAsmTemp lbl = returnInstr (BR target)
2490 | otherwise = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0]
2492 target = ImmCLbl lbl
2495 = getRegister tree `thenNat` \ register ->
2496 getNewRegNat PtrRep `thenNat` \ tmp ->
2498 dst = registerName register pv
2499 code = registerCode register pv
2500 target = registerName register pv
2502 if isFixed register then
2503 returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0]
2505 return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0))
2507 #endif /* alpha_TARGET_ARCH */
2509 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2511 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2513 genJump (CmmLoad mem pk) = do
2514 Amode target code <- getAmode mem
2515 return (code `snocOL` JMP (OpAddr target))
2517 genJump (CmmLit lit) = do
2518 return (unitOL (JMP (OpImm (litToImm lit))))
2521 (reg,code) <- getSomeReg expr
2522 return (code `snocOL` JMP (OpReg reg))
2524 #endif /* i386_TARGET_ARCH */
2526 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2528 #if sparc_TARGET_ARCH
2530 genJump (CmmLit (CmmLabel lbl))
2531 = return (toOL [CALL (Left target) 0 True, NOP])
2533 target = ImmCLbl lbl
2537 (target, code) <- getSomeReg tree
2538 return (code `snocOL` JMP (AddrRegReg target g0) `snocOL` NOP)
2540 #endif /* sparc_TARGET_ARCH */
2542 #if powerpc_TARGET_ARCH
2543 genJump (CmmLit (CmmLabel lbl))
2544 = return (unitOL $ JMP lbl)
2548 (target,code) <- getSomeReg tree
2549 return (code `snocOL` MTCTR target `snocOL` BCTR [])
2550 #endif /* powerpc_TARGET_ARCH */
2553 -- -----------------------------------------------------------------------------
2554 -- Unconditional branches
2556 genBranch :: BlockId -> NatM InstrBlock
2558 #if alpha_TARGET_ARCH
2559 genBranch id = return (unitOL (BR id))
2562 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2563 genBranch id = return (unitOL (JXX ALWAYS id))
2566 #if sparc_TARGET_ARCH
2567 genBranch (BlockId id) = return (toOL [BI ALWAYS False (ImmCLbl (mkAsmTempLabel id)), NOP])
2570 #if powerpc_TARGET_ARCH
2571 genBranch id = return (unitOL (BCC ALWAYS id))
2575 -- -----------------------------------------------------------------------------
2576 -- Conditional jumps
2579 Conditional jumps are always to local labels, so we can use branch
2580 instructions. We peek at the arguments to decide what kind of
2583 ALPHA: For comparisons with 0, we're laughing, because we can just do
2584 the desired conditional branch.
2586 I386: First, we have to ensure that the condition
2587 codes are set according to the supplied comparison operation.
2589 SPARC: First, we have to ensure that the condition codes are set
2590 according to the supplied comparison operation. We generate slightly
2591 different code for floating point comparisons, because a floating
2592 point operation cannot directly precede a @BF@. We assume the worst
2593 and fill that slot with a @NOP@.
2595 SPARC: Do not fill the delay slots here; you will confuse the register
2601 :: BlockId -- the branch target
2602 -> CmmExpr -- the condition on which to branch
2605 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2607 #if alpha_TARGET_ARCH
2609 genCondJump id (StPrim op [x, StInt 0])
2610 = getRegister x `thenNat` \ register ->
2611 getNewRegNat (registerRep register)
2614 code = registerCode register tmp
2615 value = registerName register tmp
2616 pk = registerRep register
2617 target = ImmCLbl lbl
2619 returnSeq code [BI (cmpOp op) value target]
2621 cmpOp CharGtOp = GTT
2623 cmpOp CharEqOp = EQQ
2625 cmpOp CharLtOp = LTT
2634 cmpOp WordGeOp = ALWAYS
2635 cmpOp WordEqOp = EQQ
2637 cmpOp WordLtOp = NEVER
2638 cmpOp WordLeOp = EQQ
2640 cmpOp AddrGeOp = ALWAYS
2641 cmpOp AddrEqOp = EQQ
2643 cmpOp AddrLtOp = NEVER
2644 cmpOp AddrLeOp = EQQ
2646 genCondJump lbl (StPrim op [x, StDouble 0.0])
2647 = getRegister x `thenNat` \ register ->
2648 getNewRegNat (registerRep register)
2651 code = registerCode register tmp
2652 value = registerName register tmp
2653 pk = registerRep register
2654 target = ImmCLbl lbl
2656 return (code . mkSeqInstr (BF (cmpOp op) value target))
2658 cmpOp FloatGtOp = GTT
2659 cmpOp FloatGeOp = GE
2660 cmpOp FloatEqOp = EQQ
2661 cmpOp FloatNeOp = NE
2662 cmpOp FloatLtOp = LTT
2663 cmpOp FloatLeOp = LE
2664 cmpOp DoubleGtOp = GTT
2665 cmpOp DoubleGeOp = GE
2666 cmpOp DoubleEqOp = EQQ
2667 cmpOp DoubleNeOp = NE
2668 cmpOp DoubleLtOp = LTT
2669 cmpOp DoubleLeOp = LE
2671 genCondJump lbl (StPrim op [x, y])
2673 = trivialFCode pr instr x y `thenNat` \ register ->
2674 getNewRegNat F64 `thenNat` \ tmp ->
2676 code = registerCode register tmp
2677 result = registerName register tmp
2678 target = ImmCLbl lbl
2680 return (code . mkSeqInstr (BF cond result target))
2682 pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
2684 fltCmpOp op = case op of
2698 (instr, cond) = case op of
2699 FloatGtOp -> (FCMP TF LE, EQQ)
2700 FloatGeOp -> (FCMP TF LTT, EQQ)
2701 FloatEqOp -> (FCMP TF EQQ, NE)
2702 FloatNeOp -> (FCMP TF EQQ, EQQ)
2703 FloatLtOp -> (FCMP TF LTT, NE)
2704 FloatLeOp -> (FCMP TF LE, NE)
2705 DoubleGtOp -> (FCMP TF LE, EQQ)
2706 DoubleGeOp -> (FCMP TF LTT, EQQ)
2707 DoubleEqOp -> (FCMP TF EQQ, NE)
2708 DoubleNeOp -> (FCMP TF EQQ, EQQ)
2709 DoubleLtOp -> (FCMP TF LTT, NE)
2710 DoubleLeOp -> (FCMP TF LE, NE)
2712 genCondJump lbl (StPrim op [x, y])
2713 = trivialCode instr x y `thenNat` \ register ->
2714 getNewRegNat IntRep `thenNat` \ tmp ->
2716 code = registerCode register tmp
2717 result = registerName register tmp
2718 target = ImmCLbl lbl
2720 return (code . mkSeqInstr (BI cond result target))
2722 (instr, cond) = case op of
2723 CharGtOp -> (CMP LE, EQQ)
2724 CharGeOp -> (CMP LTT, EQQ)
2725 CharEqOp -> (CMP EQQ, NE)
2726 CharNeOp -> (CMP EQQ, EQQ)
2727 CharLtOp -> (CMP LTT, NE)
2728 CharLeOp -> (CMP LE, NE)
2729 IntGtOp -> (CMP LE, EQQ)
2730 IntGeOp -> (CMP LTT, EQQ)
2731 IntEqOp -> (CMP EQQ, NE)
2732 IntNeOp -> (CMP EQQ, EQQ)
2733 IntLtOp -> (CMP LTT, NE)
2734 IntLeOp -> (CMP LE, NE)
2735 WordGtOp -> (CMP ULE, EQQ)
2736 WordGeOp -> (CMP ULT, EQQ)
2737 WordEqOp -> (CMP EQQ, NE)
2738 WordNeOp -> (CMP EQQ, EQQ)
2739 WordLtOp -> (CMP ULT, NE)
2740 WordLeOp -> (CMP ULE, NE)
2741 AddrGtOp -> (CMP ULE, EQQ)
2742 AddrGeOp -> (CMP ULT, EQQ)
2743 AddrEqOp -> (CMP EQQ, NE)
2744 AddrNeOp -> (CMP EQQ, EQQ)
2745 AddrLtOp -> (CMP ULT, NE)
2746 AddrLeOp -> (CMP ULE, NE)
2748 #endif /* alpha_TARGET_ARCH */
2750 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2752 #if i386_TARGET_ARCH
2754 genCondJump id bool = do
2755 CondCode _ cond code <- getCondCode bool
2756 return (code `snocOL` JXX cond id)
2760 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2762 #if x86_64_TARGET_ARCH
2764 genCondJump id bool = do
2765 CondCode is_float cond cond_code <- getCondCode bool
2768 return (cond_code `snocOL` JXX cond id)
2770 lbl <- getBlockIdNat
2772 -- see comment with condFltReg
2773 let code = case cond of
2779 plain_test = unitOL (
2782 or_unordered = toOL [
2786 and_ordered = toOL [
2792 return (cond_code `appOL` code)
2796 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2798 #if sparc_TARGET_ARCH
2800 genCondJump (BlockId id) bool = do
2801 CondCode is_float cond code <- getCondCode bool
2806 then [NOP, BF cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2807 else [BI cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2811 #endif /* sparc_TARGET_ARCH */
2814 #if powerpc_TARGET_ARCH
2816 genCondJump id bool = do
2817 CondCode is_float cond code <- getCondCode bool
2818 return (code `snocOL` BCC cond id)
2820 #endif /* powerpc_TARGET_ARCH */
2823 -- -----------------------------------------------------------------------------
2824 -- Generating C calls
2826 -- Now the biggest nightmare---calls. Most of the nastiness is buried in
2827 -- @get_arg@, which moves the arguments to the correct registers/stack
2828 -- locations. Apart from that, the code is easy.
2830 -- (If applicable) Do not fill the delay slots here; you will confuse the
2831 -- register allocator.
2834 :: CmmCallTarget -- function to call
2835 -> [(CmmReg,MachHint)] -- where to put the result
2836 -> [(CmmExpr,MachHint)] -- arguments (of mixed type)
2837 -> Maybe [GlobalReg] -- volatile regs to save
2840 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2842 #if alpha_TARGET_ARCH
2846 genCCall fn cconv result_regs args
2847 = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args
2848 `thenNat` \ ((unused,_), argCode) ->
2850 nRegs = length allArgRegs - length unused
2851 code = asmSeqThen (map ($ []) argCode)
2854 LDA pv (AddrImm (ImmLab (ptext fn))),
2855 JSR ra (AddrReg pv) nRegs,
2856 LDGP gp (AddrReg ra)]
2858 ------------------------
2859 {- Try to get a value into a specific register (or registers) for
2860 a call. The first 6 arguments go into the appropriate
2861 argument register (separate registers for integer and floating
2862 point arguments, but used in lock-step), and the remaining
2863 arguments are dumped to the stack, beginning at 0(sp). Our
2864 first argument is a pair of the list of remaining argument
2865 registers to be assigned for this call and the next stack
2866 offset to use for overflowing arguments. This way,
2867 @get_Arg@ can be applied to all of a call's arguments using
2871 :: ([(Reg,Reg)], Int) -- Argument registers and stack offset (accumulator)
2872 -> StixTree -- Current argument
2873 -> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code
2875 -- We have to use up all of our argument registers first...
2877 get_arg ((iDst,fDst):dsts, offset) arg
2878 = getRegister arg `thenNat` \ register ->
2880 reg = if isFloatingRep pk then fDst else iDst
2881 code = registerCode register reg
2882 src = registerName register reg
2883 pk = registerRep register
2886 if isFloatingRep pk then
2887 ((dsts, offset), if isFixed register then
2888 code . mkSeqInstr (FMOV src fDst)
2891 ((dsts, offset), if isFixed register then
2892 code . mkSeqInstr (OR src (RIReg src) iDst)
2895 -- Once we have run out of argument registers, we move to the
2898 get_arg ([], offset) arg
2899 = getRegister arg `thenNat` \ register ->
2900 getNewRegNat (registerRep register)
2903 code = registerCode register tmp
2904 src = registerName register tmp
2905 pk = registerRep register
2906 sz = primRepToSize pk
2908 return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset)))
2910 #endif /* alpha_TARGET_ARCH */
2912 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2914 #if i386_TARGET_ARCH
2916 -- we only cope with a single result for foreign calls
2917 genCCall (CmmPrim op) [(r,_)] args vols = do
2919 MO_F32_Sqrt -> actuallyInlineFloatOp F32 (GSQRT F32) args
2920 MO_F64_Sqrt -> actuallyInlineFloatOp F64 (GSQRT F64) args
2922 MO_F32_Sin -> actuallyInlineFloatOp F32 (GSIN F32) args
2923 MO_F64_Sin -> actuallyInlineFloatOp F64 (GSIN F64) args
2925 MO_F32_Cos -> actuallyInlineFloatOp F32 (GCOS F32) args
2926 MO_F64_Cos -> actuallyInlineFloatOp F64 (GCOS F64) args
2928 MO_F32_Tan -> actuallyInlineFloatOp F32 (GTAN F32) args
2929 MO_F64_Tan -> actuallyInlineFloatOp F64 (GTAN F64) args
2931 other_op -> outOfLineFloatOp op r args vols
2933 actuallyInlineFloatOp rep instr [(x,_)]
2934 = do res <- trivialUFCode rep instr x
2936 return (any (getRegisterReg r))
2938 genCCall target dest_regs args vols = do
2940 sizes = map (arg_size . cmmExprRep . fst) (reverse args)
2941 #if !darwin_TARGET_OS
2942 tot_arg_size = sum sizes
2944 raw_arg_size = sum sizes
2945 tot_arg_size = roundTo 16 raw_arg_size
2946 arg_pad_size = tot_arg_size - raw_arg_size
2947 delta0 <- getDeltaNat
2948 setDeltaNat (delta0 - arg_pad_size)
2951 push_codes <- mapM push_arg (reverse args)
2952 delta <- getDeltaNat
2955 -- deal with static vs dynamic call targets
2956 (callinsns,cconv) <-
2959 CmmForeignCall (CmmLit (CmmLabel lbl)) conv
2960 -> -- ToDo: stdcall arg sizes
2961 return (unitOL (CALL (Left fn_imm) []), conv)
2962 where fn_imm = ImmCLbl lbl
2963 CmmForeignCall expr conv
2964 -> do (dyn_c, dyn_r, dyn_rep) <- get_op expr
2965 ASSERT(dyn_rep == I32)
2966 return (dyn_c `snocOL` CALL (Right dyn_r) [], conv)
2969 #if darwin_TARGET_OS
2971 = toOL [SUB I32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
2972 DELTA (delta0 - arg_pad_size)]
2973 `appOL` concatOL push_codes
2976 = concatOL push_codes
2977 call = callinsns `appOL`
2979 -- Deallocate parameters after call for ccall;
2980 -- but not for stdcall (callee does it)
2981 (if cconv == StdCallConv || tot_arg_size==0 then [] else
2982 [ADD I32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
2984 [DELTA (delta + tot_arg_size)]
2987 setDeltaNat (delta + tot_arg_size)
2990 -- assign the results, if necessary
2991 assign_code [] = nilOL
2992 assign_code [(dest,_hint)] =
2994 I64 -> toOL [MOV I32 (OpReg eax) (OpReg r_dest),
2995 MOV I32 (OpReg edx) (OpReg r_dest_hi)]
2996 F32 -> unitOL (GMOV fake0 r_dest)
2997 F64 -> unitOL (GMOV fake0 r_dest)
2998 rep -> unitOL (MOV rep (OpReg eax) (OpReg r_dest))
3000 r_dest_hi = getHiVRegFromLo r_dest
3001 rep = cmmRegRep dest
3002 r_dest = getRegisterReg dest
3003 assign_code many = panic "genCCall.assign_code many"
3005 return (push_code `appOL`
3007 assign_code dest_regs)
3015 roundTo a x | x `mod` a == 0 = x
3016 | otherwise = x + a - (x `mod` a)
3019 push_arg :: (CmmExpr,MachHint){-current argument-}
3020 -> NatM InstrBlock -- code
3022 push_arg (arg,_hint) -- we don't need the hints on x86
3023 | arg_rep == I64 = do
3024 ChildCode64 code r_lo <- iselExpr64 arg
3025 delta <- getDeltaNat
3026 setDeltaNat (delta - 8)
3028 r_hi = getHiVRegFromLo r_lo
3030 return ( code `appOL`
3031 toOL [PUSH I32 (OpReg r_hi), DELTA (delta - 4),
3032 PUSH I32 (OpReg r_lo), DELTA (delta - 8),
3037 (code, reg, sz) <- get_op arg
3038 delta <- getDeltaNat
3039 let size = arg_size sz
3040 setDeltaNat (delta-size)
3041 if (case sz of F64 -> True; F32 -> True; _ -> False)
3042 then return (code `appOL`
3043 toOL [SUB I32 (OpImm (ImmInt size)) (OpReg esp),
3045 GST sz reg (AddrBaseIndex (EABaseReg esp)
3049 else return (code `snocOL`
3050 PUSH I32 (OpReg reg) `snocOL`
3054 arg_rep = cmmExprRep arg
3057 get_op :: CmmExpr -> NatM (InstrBlock, Reg, MachRep) -- code, reg, size
3059 (reg,code) <- getSomeReg op
3060 return (code, reg, cmmExprRep op)
3062 #endif /* i386_TARGET_ARCH */
3064 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3066 outOfLineFloatOp :: CallishMachOp -> CmmReg -> [(CmmExpr,MachHint)]
3067 -> Maybe [GlobalReg] -> NatM InstrBlock
3068 outOfLineFloatOp mop res args vols
3070 targetExpr <- cmmMakeDynamicReference addImportNat True lbl
3071 let target = CmmForeignCall targetExpr CCallConv
3073 if cmmRegRep res == F64
3075 stmtToInstrs (CmmCall target [(res,FloatHint)] args vols)
3079 tmp = CmmLocal (LocalReg uq F64)
3081 code1 <- stmtToInstrs (CmmCall target [(tmp,FloatHint)]
3082 (map promote args) vols)
3083 code2 <- stmtToInstrs (CmmAssign res (demote (CmmReg tmp)))
3084 return (code1 `appOL` code2)
3086 #if i386_TARGET_ARCH
3087 promote (x,hint) = (CmmMachOp (MO_S_Conv F32 F64) [x], hint)
3088 demote x = CmmMachOp (MO_S_Conv F64 F32) [x]
3090 promote (x,hint) = (x,hint)
3094 lbl = mkForeignLabel fn Nothing True
3097 MO_F32_Sqrt -> FSLIT("sqrtf")
3098 MO_F32_Sin -> FSLIT("sinf")
3099 MO_F32_Cos -> FSLIT("cosf")
3100 MO_F32_Tan -> FSLIT("tanf")
3101 MO_F32_Exp -> FSLIT("expf")
3102 MO_F32_Log -> FSLIT("logf")
3104 MO_F32_Asin -> FSLIT("asinf")
3105 MO_F32_Acos -> FSLIT("acosf")
3106 MO_F32_Atan -> FSLIT("atanf")
3108 MO_F32_Sinh -> FSLIT("sinhf")
3109 MO_F32_Cosh -> FSLIT("coshf")
3110 MO_F32_Tanh -> FSLIT("tanhf")
3111 MO_F32_Pwr -> FSLIT("powf")
3113 MO_F64_Sqrt -> FSLIT("sqrt")
3114 MO_F64_Sin -> FSLIT("sin")
3115 MO_F64_Cos -> FSLIT("cos")
3116 MO_F64_Tan -> FSLIT("tan")
3117 MO_F64_Exp -> FSLIT("exp")
3118 MO_F64_Log -> FSLIT("log")
3120 MO_F64_Asin -> FSLIT("asin")
3121 MO_F64_Acos -> FSLIT("acos")
3122 MO_F64_Atan -> FSLIT("atan")
3124 MO_F64_Sinh -> FSLIT("sinh")
3125 MO_F64_Cosh -> FSLIT("cosh")
3126 MO_F64_Tanh -> FSLIT("tanh")
3127 MO_F64_Pwr -> FSLIT("pow")
3129 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
3131 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3133 #if x86_64_TARGET_ARCH
3135 genCCall (CmmPrim op) [(r,_)] args vols =
3136 outOfLineFloatOp op r args vols
3138 genCCall target dest_regs args vols = do
3140 -- load up the register arguments
3141 (stack_args, aregs, fregs, load_args_code)
3142 <- load_args args allArgRegs allFPArgRegs nilOL
3145 fp_regs_used = reverse (drop (length fregs) (reverse allFPArgRegs))
3146 int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
3147 arg_regs = int_regs_used ++ fp_regs_used
3148 -- for annotating the call instruction with
3150 sse_regs = length fp_regs_used
3152 tot_arg_size = arg_size * length stack_args
3154 -- On entry to the called function, %rsp should be aligned
3155 -- on a 16-byte boundary +8 (i.e. the first stack arg after
3156 -- the return address is 16-byte aligned). In STG land
3157 -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
3158 -- need to make sure we push a multiple of 16-bytes of args,
3159 -- plus the return address, to get the correct alignment.
3160 -- Urg, this is hard. We need to feed the delta back into
3161 -- the arg pushing code.
3162 (real_size, adjust_rsp) <-
3163 if tot_arg_size `rem` 16 == 0
3164 then return (tot_arg_size, nilOL)
3165 else do -- we need to adjust...
3166 delta <- getDeltaNat
3167 setDeltaNat (delta-8)
3168 return (tot_arg_size+8, toOL [
3169 SUB I64 (OpImm (ImmInt 8)) (OpReg rsp),
3173 -- push the stack args, right to left
3174 push_code <- push_args (reverse stack_args) nilOL
3175 delta <- getDeltaNat
3177 -- deal with static vs dynamic call targets
3178 (callinsns,cconv) <-
3181 CmmForeignCall (CmmLit (CmmLabel lbl)) conv
3182 -> -- ToDo: stdcall arg sizes
3183 return (unitOL (CALL (Left fn_imm) arg_regs), conv)
3184 where fn_imm = ImmCLbl lbl
3185 CmmForeignCall expr conv
3186 -> do (dyn_r, dyn_c) <- getSomeReg expr
3187 return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
3190 -- The x86_64 ABI requires us to set %al to the number of SSE
3191 -- registers that contain arguments, if the called routine
3192 -- is a varargs function. We don't know whether it's a
3193 -- varargs function or not, so we have to assume it is.
3195 -- It's not safe to omit this assignment, even if the number
3196 -- of SSE regs in use is zero. If %al is larger than 8
3197 -- on entry to a varargs function, seg faults ensue.
3198 assign_eax n = unitOL (MOV I32 (OpImm (ImmInt n)) (OpReg eax))
3200 let call = callinsns `appOL`
3202 -- Deallocate parameters after call for ccall;
3203 -- but not for stdcall (callee does it)
3204 (if cconv == StdCallConv || real_size==0 then [] else
3205 [ADD wordRep (OpImm (ImmInt real_size)) (OpReg esp)])
3207 [DELTA (delta + real_size)]
3210 setDeltaNat (delta + real_size)
3213 -- assign the results, if necessary
3214 assign_code [] = nilOL
3215 assign_code [(dest,_hint)] =
3217 F32 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3218 F64 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3219 rep -> unitOL (MOV rep (OpReg rax) (OpReg r_dest))
3221 rep = cmmRegRep dest
3222 r_dest = getRegisterReg dest
3223 assign_code many = panic "genCCall.assign_code many"
3225 return (load_args_code `appOL`
3228 assign_eax sse_regs `appOL`
3230 assign_code dest_regs)
3233 arg_size = 8 -- always, at the mo
3235 load_args :: [(CmmExpr,MachHint)]
3236 -> [Reg] -- int regs avail for args
3237 -> [Reg] -- FP regs avail for args
3239 -> NatM ([(CmmExpr,MachHint)],[Reg],[Reg],InstrBlock)
3240 load_args args [] [] code = return (args, [], [], code)
3241 -- no more regs to use
3242 load_args [] aregs fregs code = return ([], aregs, fregs, code)
3243 -- no more args to push
3244 load_args ((arg,hint) : rest) aregs fregs code
3245 | isFloatingRep arg_rep =
3249 arg_code <- getAnyReg arg
3250 load_args rest aregs rs (code `appOL` arg_code r)
3255 arg_code <- getAnyReg arg
3256 load_args rest rs fregs (code `appOL` arg_code r)
3258 arg_rep = cmmExprRep arg
3261 (args',ars,frs,code') <- load_args rest aregs fregs code
3262 return ((arg,hint):args', ars, frs, code')
3264 push_args [] code = return code
3265 push_args ((arg,hint):rest) code
3266 | isFloatingRep arg_rep = do
3267 (arg_reg, arg_code) <- getSomeReg arg
3268 delta <- getDeltaNat
3269 setDeltaNat (delta-arg_size)
3270 let code' = code `appOL` toOL [
3271 MOV arg_rep (OpReg arg_reg) (OpAddr (spRel 0)),
3272 SUB wordRep (OpImm (ImmInt arg_size)) (OpReg rsp) ,
3273 DELTA (delta-arg_size)]
3274 push_args rest code'
3277 -- we only ever generate word-sized function arguments. Promotion
3278 -- has already happened: our Int8# type is kept sign-extended
3279 -- in an Int#, for example.
3280 ASSERT(arg_rep == I64) return ()
3281 (arg_op, arg_code) <- getOperand arg
3282 delta <- getDeltaNat
3283 setDeltaNat (delta-arg_size)
3284 let code' = code `appOL` toOL [PUSH I64 arg_op,
3285 DELTA (delta-arg_size)]
3286 push_args rest code'
3288 arg_rep = cmmExprRep arg
3291 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3293 #if sparc_TARGET_ARCH
3295 The SPARC calling convention is an absolute
3296 nightmare. The first 6x32 bits of arguments are mapped into
3297 %o0 through %o5, and the remaining arguments are dumped to the
3298 stack, beginning at [%sp+92]. (Note that %o6 == %sp.)
3300 If we have to put args on the stack, move %o6==%sp down by
3301 the number of words to go on the stack, to ensure there's enough space.
3303 According to Fraser and Hanson's lcc book, page 478, fig 17.2,
3304 16 words above the stack pointer is a word for the address of
3305 a structure return value. I use this as a temporary location
3306 for moving values from float to int regs. Certainly it isn't
3307 safe to put anything in the 16 words starting at %sp, since
3308 this area can get trashed at any time due to window overflows
3309 caused by signal handlers.
3311 A final complication (if the above isn't enough) is that
3312 we can't blithely calculate the arguments one by one into
3313 %o0 .. %o5. Consider the following nested calls:
3317 Naive code moves a into %o0, and (fff b c) into %o1. Unfortunately
3318 the inner call will itself use %o0, which trashes the value put there
3319 in preparation for the outer call. Upshot: we need to calculate the
3320 args into temporary regs, and move those to arg regs or onto the
3321 stack only immediately prior to the call proper. Sigh.
3324 genCCall target dest_regs argsAndHints vols = do
3326 args = map fst argsAndHints
3327 argcode_and_vregs <- mapM arg_to_int_vregs args
3329 (argcodes, vregss) = unzip argcode_and_vregs
3330 n_argRegs = length allArgRegs
3331 n_argRegs_used = min (length vregs) n_argRegs
3332 vregs = concat vregss
3333 -- deal with static vs dynamic call targets
3334 callinsns <- (case target of
3335 CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> do
3336 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3337 CmmForeignCall expr conv -> do
3338 (dyn_c, [dyn_r]) <- arg_to_int_vregs expr
3339 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3341 (res, reduce) <- outOfLineFloatOp mop
3342 lblOrMopExpr <- case res of
3344 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3346 (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
3347 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3348 if reduce then panic "genCCall(sparc): can not reduce" else return lblOrMopExpr
3352 argcode = concatOL argcodes
3353 (move_sp_down, move_sp_up)
3354 = let diff = length vregs - n_argRegs
3355 nn = if odd diff then diff + 1 else diff -- keep 8-byte alignment
3358 else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
3360 = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
3361 return (argcode `appOL`
3362 move_sp_down `appOL`
3363 transfer_code `appOL`
3368 -- move args from the integer vregs into which they have been
3369 -- marshalled, into %o0 .. %o5, and the rest onto the stack.
3370 move_final :: [Reg] -> [Reg] -> Int -> [Instr]
3372 move_final [] _ offset -- all args done
3375 move_final (v:vs) [] offset -- out of aregs; move to stack
3376 = ST I32 v (spRel offset)
3377 : move_final vs [] (offset+1)
3379 move_final (v:vs) (a:az) offset -- move into an arg (%o[0..5]) reg
3380 = OR False g0 (RIReg v) a
3381 : move_final vs az offset
3383 -- generate code to calculate an argument, and move it into one
3384 -- or two integer vregs.
3385 arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
3386 arg_to_int_vregs arg
3387 | (cmmExprRep arg) == I64
3389 (ChildCode64 code r_lo) <- iselExpr64 arg
3391 r_hi = getHiVRegFromLo r_lo
3392 return (code, [r_hi, r_lo])
3395 (src, code) <- getSomeReg arg
3396 tmp <- getNewRegNat (cmmExprRep arg)
3401 v1 <- getNewRegNat I32
3402 v2 <- getNewRegNat I32
3405 FMOV F64 src f0 `snocOL`
3406 ST F32 f0 (spRel 16) `snocOL`
3407 LD I32 (spRel 16) v1 `snocOL`
3408 ST F32 (fPair f0) (spRel 16) `snocOL`
3409 LD I32 (spRel 16) v2
3414 v1 <- getNewRegNat I32
3417 ST F32 src (spRel 16) `snocOL`
3418 LD I32 (spRel 16) v1
3423 v1 <- getNewRegNat I32
3425 code `snocOL` OR False g0 (RIReg src) v1
3429 outOfLineFloatOp mop =
3431 mopExpr <- cmmMakeDynamicReference addImportNat True $
3432 mkForeignLabel functionName Nothing True
3433 let mopLabelOrExpr = case mopExpr of
3434 CmmLit (CmmLabel lbl) -> Left lbl
3436 return (mopLabelOrExpr, reduce)
3438 (reduce, functionName) = case mop of
3439 MO_F32_Exp -> (True, FSLIT("exp"))
3440 MO_F32_Log -> (True, FSLIT("log"))
3441 MO_F32_Sqrt -> (True, FSLIT("sqrt"))
3443 MO_F32_Sin -> (True, FSLIT("sin"))
3444 MO_F32_Cos -> (True, FSLIT("cos"))
3445 MO_F32_Tan -> (True, FSLIT("tan"))
3447 MO_F32_Asin -> (True, FSLIT("asin"))
3448 MO_F32_Acos -> (True, FSLIT("acos"))
3449 MO_F32_Atan -> (True, FSLIT("atan"))
3451 MO_F32_Sinh -> (True, FSLIT("sinh"))
3452 MO_F32_Cosh -> (True, FSLIT("cosh"))
3453 MO_F32_Tanh -> (True, FSLIT("tanh"))
3455 MO_F64_Exp -> (False, FSLIT("exp"))
3456 MO_F64_Log -> (False, FSLIT("log"))
3457 MO_F64_Sqrt -> (False, FSLIT("sqrt"))
3459 MO_F64_Sin -> (False, FSLIT("sin"))
3460 MO_F64_Cos -> (False, FSLIT("cos"))
3461 MO_F64_Tan -> (False, FSLIT("tan"))
3463 MO_F64_Asin -> (False, FSLIT("asin"))
3464 MO_F64_Acos -> (False, FSLIT("acos"))
3465 MO_F64_Atan -> (False, FSLIT("atan"))
3467 MO_F64_Sinh -> (False, FSLIT("sinh"))
3468 MO_F64_Cosh -> (False, FSLIT("cosh"))
3469 MO_F64_Tanh -> (False, FSLIT("tanh"))
3471 other -> pprPanic "outOfLineFloatOp(sparc) "
3472 (pprCallishMachOp mop)
3474 #endif /* sparc_TARGET_ARCH */
3476 #if powerpc_TARGET_ARCH
3478 #if darwin_TARGET_OS || linux_TARGET_OS
3480 The PowerPC calling convention for Darwin/Mac OS X
3481 is described in Apple's document
3482 "Inside Mac OS X - Mach-O Runtime Architecture".
3484 PowerPC Linux uses the System V Release 4 Calling Convention
3485 for PowerPC. It is described in the
3486 "System V Application Binary Interface PowerPC Processor Supplement".
3488 Both conventions are similar:
3489 Parameters may be passed in general-purpose registers starting at r3, in
3490 floating point registers starting at f1, or on the stack.
3492 But there are substantial differences:
3493 * The number of registers used for parameter passing and the exact set of
3494 nonvolatile registers differs (see MachRegs.lhs).
3495 * On Darwin, stack space is always reserved for parameters, even if they are
3496 passed in registers. The called routine may choose to save parameters from
3497 registers to the corresponding space on the stack.
3498 * On Darwin, a corresponding amount of GPRs is skipped when a floating point
3499 parameter is passed in an FPR.
3500 * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
3501 starting with an odd-numbered GPR. It may skip a GPR to achieve this.
3502 Darwin just treats an I64 like two separate I32s (high word first).
3503 * I64 and F64 arguments are 8-byte aligned on the stack for SysV, but only
3504 4-byte aligned like everything else on Darwin.
3505 * The SysV spec claims that F32 is represented as F64 on the stack. GCC on
3506 PowerPC Linux does not agree, so neither do we.
3508 According to both conventions, The parameter area should be part of the
3509 caller's stack frame, allocated in the caller's prologue code (large enough
3510 to hold the parameter lists for all called routines). The NCG already
3511 uses the stack for register spilling, leaving 64 bytes free at the top.
3512 If we need a larger parameter area than that, we just allocate a new stack
3513 frame just before ccalling.
3516 genCCall target dest_regs argsAndHints vols
3517 = ASSERT (not $ any (`elem` [I8,I16]) argReps)
3518 -- we rely on argument promotion in the codeGen
3520 (finalStack,passArgumentsCode,usedRegs) <- passArguments
3522 allArgRegs allFPArgRegs
3526 (labelOrExpr, reduceToF32) <- case target of
3527 CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
3528 CmmForeignCall expr conv -> return (Right expr, False)
3529 CmmPrim mop -> outOfLineFloatOp mop
3531 let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
3532 codeAfter = move_sp_up finalStack `appOL` moveResult reduceToF32
3537 `snocOL` BL lbl usedRegs
3540 (dynReg, dynCode) <- getSomeReg dyn
3542 `snocOL` MTCTR dynReg
3544 `snocOL` BCTRL usedRegs
3547 #if darwin_TARGET_OS
3548 initialStackOffset = 24
3549 -- size of linkage area + size of arguments, in bytes
3550 stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
3551 map machRepByteWidth argReps
3552 #elif linux_TARGET_OS
3553 initialStackOffset = 8
3554 stackDelta finalStack = roundTo 16 finalStack
3556 args = map fst argsAndHints
3557 argReps = map cmmExprRep args
3559 roundTo a x | x `mod` a == 0 = x
3560 | otherwise = x + a - (x `mod` a)
3562 move_sp_down finalStack
3564 toOL [STU I32 sp (AddrRegImm sp (ImmInt (-delta))),
3567 where delta = stackDelta finalStack
3568 move_sp_up finalStack
3570 toOL [ADD sp sp (RIImm (ImmInt delta)),
3573 where delta = stackDelta finalStack
3576 passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
3577 passArguments ((arg,I64):args) gprs fprs stackOffset
3578 accumCode accumUsed =
3580 ChildCode64 code vr_lo <- iselExpr64 arg
3581 let vr_hi = getHiVRegFromLo vr_lo
3583 #if darwin_TARGET_OS
3588 (accumCode `appOL` code
3589 `snocOL` storeWord vr_hi gprs stackOffset
3590 `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
3591 ((take 2 gprs) ++ accumUsed)
3593 storeWord vr (gpr:_) offset = MR gpr vr
3594 storeWord vr [] offset = ST I32 vr (AddrRegImm sp (ImmInt offset))
3596 #elif linux_TARGET_OS
3597 let stackOffset' = roundTo 8 stackOffset
3598 stackCode = accumCode `appOL` code
3599 `snocOL` ST I32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
3600 `snocOL` ST I32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
3601 regCode hireg loreg =
3602 accumCode `appOL` code
3603 `snocOL` MR hireg vr_hi
3604 `snocOL` MR loreg vr_lo
3607 hireg : loreg : regs | even (length gprs) ->
3608 passArguments args regs fprs stackOffset
3609 (regCode hireg loreg) (hireg : loreg : accumUsed)
3610 _skipped : hireg : loreg : regs ->
3611 passArguments args regs fprs stackOffset
3612 (regCode hireg loreg) (hireg : loreg : accumUsed)
3613 _ -> -- only one or no regs left
3614 passArguments args [] fprs (stackOffset'+8)
3618 passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
3619 | reg : _ <- regs = do
3620 register <- getRegister arg
3621 let code = case register of
3622 Fixed _ freg fcode -> fcode `snocOL` MR reg freg
3623 Any _ acode -> acode reg
3627 #if darwin_TARGET_OS
3628 -- The Darwin ABI requires that we reserve stack slots for register parameters
3629 (stackOffset + stackBytes)
3630 #elif linux_TARGET_OS
3631 -- ... the SysV ABI doesn't.
3634 (accumCode `appOL` code)
3637 (vr, code) <- getSomeReg arg
3641 (stackOffset' + stackBytes)
3642 (accumCode `appOL` code `snocOL` ST rep vr stackSlot)
3645 #if darwin_TARGET_OS
3646 -- stackOffset is at least 4-byte aligned
3647 -- The Darwin ABI is happy with that.
3648 stackOffset' = stackOffset
3650 -- ... the SysV ABI requires 8-byte alignment for doubles.
3651 stackOffset' | rep == F64 = roundTo 8 stackOffset
3652 | otherwise = stackOffset
3654 stackSlot = AddrRegImm sp (ImmInt stackOffset')
3655 (nGprs, nFprs, stackBytes, regs) = case rep of
3656 I32 -> (1, 0, 4, gprs)
3657 #if darwin_TARGET_OS
3658 -- The Darwin ABI requires that we skip a corresponding number of GPRs when
3660 F32 -> (1, 1, 4, fprs)
3661 F64 -> (2, 1, 8, fprs)
3662 #elif linux_TARGET_OS
3663 -- ... the SysV ABI doesn't.
3664 F32 -> (0, 1, 4, fprs)
3665 F64 -> (0, 1, 8, fprs)
3668 moveResult reduceToF32 =
3672 | reduceToF32 && rep == F32 -> unitOL (FRSP r_dest f1)
3673 | rep == F32 || rep == F64 -> unitOL (MR r_dest f1)
3674 | rep == I64 -> toOL [MR (getHiVRegFromLo r_dest) r3,
3676 | otherwise -> unitOL (MR r_dest r3)
3677 where rep = cmmRegRep dest
3678 r_dest = getRegisterReg dest
3680 outOfLineFloatOp mop =
3682 mopExpr <- cmmMakeDynamicReference addImportNat True $
3683 mkForeignLabel functionName Nothing True
3684 let mopLabelOrExpr = case mopExpr of
3685 CmmLit (CmmLabel lbl) -> Left lbl
3687 return (mopLabelOrExpr, reduce)
3689 (functionName, reduce) = case mop of
3690 MO_F32_Exp -> (FSLIT("exp"), True)
3691 MO_F32_Log -> (FSLIT("log"), True)
3692 MO_F32_Sqrt -> (FSLIT("sqrt"), True)
3694 MO_F32_Sin -> (FSLIT("sin"), True)
3695 MO_F32_Cos -> (FSLIT("cos"), True)
3696 MO_F32_Tan -> (FSLIT("tan"), True)
3698 MO_F32_Asin -> (FSLIT("asin"), True)
3699 MO_F32_Acos -> (FSLIT("acos"), True)
3700 MO_F32_Atan -> (FSLIT("atan"), True)
3702 MO_F32_Sinh -> (FSLIT("sinh"), True)
3703 MO_F32_Cosh -> (FSLIT("cosh"), True)
3704 MO_F32_Tanh -> (FSLIT("tanh"), True)
3705 MO_F32_Pwr -> (FSLIT("pow"), True)
3707 MO_F64_Exp -> (FSLIT("exp"), False)
3708 MO_F64_Log -> (FSLIT("log"), False)
3709 MO_F64_Sqrt -> (FSLIT("sqrt"), False)
3711 MO_F64_Sin -> (FSLIT("sin"), False)
3712 MO_F64_Cos -> (FSLIT("cos"), False)
3713 MO_F64_Tan -> (FSLIT("tan"), False)
3715 MO_F64_Asin -> (FSLIT("asin"), False)
3716 MO_F64_Acos -> (FSLIT("acos"), False)
3717 MO_F64_Atan -> (FSLIT("atan"), False)
3719 MO_F64_Sinh -> (FSLIT("sinh"), False)
3720 MO_F64_Cosh -> (FSLIT("cosh"), False)
3721 MO_F64_Tanh -> (FSLIT("tanh"), False)
3722 MO_F64_Pwr -> (FSLIT("pow"), False)
3723 other -> pprPanic "genCCall(ppc): unknown callish op"
3724 (pprCallishMachOp other)
3726 #endif /* darwin_TARGET_OS || linux_TARGET_OS */
3728 #endif /* powerpc_TARGET_ARCH */
3731 -- -----------------------------------------------------------------------------
3732 -- Generating a table-branch
3734 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
3736 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3740 (reg,e_code) <- getSomeReg expr
3741 lbl <- getNewLabelNat
3742 dynRef <- cmmMakeDynamicReference addImportNat False lbl
3743 (tableReg,t_code) <- getSomeReg $ dynRef
3745 jumpTable = map jumpTableEntryRel ids
3747 jumpTableEntryRel Nothing
3748 = CmmStaticLit (CmmInt 0 wordRep)
3749 jumpTableEntryRel (Just (BlockId id))
3750 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3751 where blockLabel = mkAsmTempLabel id
3753 op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
3754 (EAIndex reg wORD_SIZE) (ImmInt 0))
3756 code = e_code `appOL` t_code `appOL` toOL [
3757 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3758 ADD wordRep op (OpReg tableReg),
3759 JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
3764 (reg,e_code) <- getSomeReg expr
3765 lbl <- getNewLabelNat
3767 jumpTable = map jumpTableEntry ids
3768 op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
3769 code = e_code `appOL` toOL [
3770 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3771 JMP_TBL op [ id | Just id <- ids ]
3775 #elif powerpc_TARGET_ARCH
3779 (reg,e_code) <- getSomeReg expr
3780 tmp <- getNewRegNat I32
3781 lbl <- getNewLabelNat
3782 dynRef <- cmmMakeDynamicReference addImportNat False lbl
3783 (tableReg,t_code) <- getSomeReg $ dynRef
3785 jumpTable = map jumpTableEntryRel ids
3787 jumpTableEntryRel Nothing
3788 = CmmStaticLit (CmmInt 0 wordRep)
3789 jumpTableEntryRel (Just (BlockId id))
3790 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3791 where blockLabel = mkAsmTempLabel id
3793 code = e_code `appOL` t_code `appOL` toOL [
3794 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3795 SLW tmp reg (RIImm (ImmInt 2)),
3796 LD I32 tmp (AddrRegReg tableReg tmp),
3797 ADD tmp tmp (RIReg tableReg),
3799 BCTR [ id | Just id <- ids ]
3804 (reg,e_code) <- getSomeReg expr
3805 tmp <- getNewRegNat I32
3806 lbl <- getNewLabelNat
3808 jumpTable = map jumpTableEntry ids
3810 code = e_code `appOL` toOL [
3811 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3812 SLW tmp reg (RIImm (ImmInt 2)),
3813 ADDIS tmp tmp (HA (ImmCLbl lbl)),
3814 LD I32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
3816 BCTR [ id | Just id <- ids ]
3820 genSwitch expr ids = panic "ToDo: genSwitch"
3823 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordRep)
3824 jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
3825 where blockLabel = mkAsmTempLabel id
3827 -- -----------------------------------------------------------------------------
3829 -- -----------------------------------------------------------------------------
3832 -- -----------------------------------------------------------------------------
3833 -- 'condIntReg' and 'condFltReg': condition codes into registers
3835 -- Turn those condition codes into integers now (when they appear on
3836 -- the right hand side of an assignment).
3838 -- (If applicable) Do not fill the delay slots here; you will confuse the
3839 -- register allocator.
3841 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
3843 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3845 #if alpha_TARGET_ARCH
3846 condIntReg = panic "MachCode.condIntReg (not on Alpha)"
3847 condFltReg = panic "MachCode.condFltReg (not on Alpha)"
3848 #endif /* alpha_TARGET_ARCH */
3850 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3852 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3854 condIntReg cond x y = do
3855 CondCode _ cond cond_code <- condIntCode cond x y
3856 tmp <- getNewRegNat I8
3858 code dst = cond_code `appOL` toOL [
3859 SETCC cond (OpReg tmp),
3860 MOVZxL I8 (OpReg tmp) (OpReg dst)
3863 return (Any I32 code)
3867 #if i386_TARGET_ARCH
3869 condFltReg cond x y = do
3870 CondCode _ cond cond_code <- condFltCode cond x y
3871 tmp <- getNewRegNat I8
3873 code dst = cond_code `appOL` toOL [
3874 SETCC cond (OpReg tmp),
3875 MOVZxL I8 (OpReg tmp) (OpReg dst)
3878 return (Any I32 code)
3882 #if x86_64_TARGET_ARCH
3884 condFltReg cond x y = do
3885 CondCode _ cond cond_code <- condFltCode cond x y
3886 tmp1 <- getNewRegNat wordRep
3887 tmp2 <- getNewRegNat wordRep
3889 -- We have to worry about unordered operands (eg. comparisons
3890 -- against NaN). If the operands are unordered, the comparison
3891 -- sets the parity flag, carry flag and zero flag.
3892 -- All comparisons are supposed to return false for unordered
3893 -- operands except for !=, which returns true.
3895 -- Optimisation: we don't have to test the parity flag if we
3896 -- know the test has already excluded the unordered case: eg >
3897 -- and >= test for a zero carry flag, which can only occur for
3898 -- ordered operands.
3900 -- ToDo: by reversing comparisons we could avoid testing the
3901 -- parity flag in more cases.
3906 NE -> or_unordered dst
3907 GU -> plain_test dst
3908 GEU -> plain_test dst
3909 _ -> and_ordered dst)
3911 plain_test dst = toOL [
3912 SETCC cond (OpReg tmp1),
3913 MOVZxL I8 (OpReg tmp1) (OpReg dst)
3915 or_unordered dst = toOL [
3916 SETCC cond (OpReg tmp1),
3917 SETCC PARITY (OpReg tmp2),
3918 OR I8 (OpReg tmp1) (OpReg tmp2),
3919 MOVZxL I8 (OpReg tmp2) (OpReg dst)
3921 and_ordered dst = toOL [
3922 SETCC cond (OpReg tmp1),
3923 SETCC NOTPARITY (OpReg tmp2),
3924 AND I8 (OpReg tmp1) (OpReg tmp2),
3925 MOVZxL I8 (OpReg tmp2) (OpReg dst)
3928 return (Any I32 code)
3932 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3934 #if sparc_TARGET_ARCH
3936 condIntReg EQQ x (CmmLit (CmmInt 0 d)) = do
3937 (src, code) <- getSomeReg x
3938 tmp <- getNewRegNat I32
3940 code__2 dst = code `appOL` toOL [
3941 SUB False True g0 (RIReg src) g0,
3942 SUB True False g0 (RIImm (ImmInt (-1))) dst]
3943 return (Any I32 code__2)
3945 condIntReg EQQ x y = do
3946 (src1, code1) <- getSomeReg x
3947 (src2, code2) <- getSomeReg y
3948 tmp1 <- getNewRegNat I32
3949 tmp2 <- getNewRegNat I32
3951 code__2 dst = code1 `appOL` code2 `appOL` toOL [
3952 XOR False src1 (RIReg src2) dst,
3953 SUB False True g0 (RIReg dst) g0,
3954 SUB True False g0 (RIImm (ImmInt (-1))) dst]
3955 return (Any I32 code__2)
3957 condIntReg NE x (CmmLit (CmmInt 0 d)) = do
3958 (src, code) <- getSomeReg x
3959 tmp <- getNewRegNat I32
3961 code__2 dst = code `appOL` toOL [
3962 SUB False True g0 (RIReg src) g0,
3963 ADD True False g0 (RIImm (ImmInt 0)) dst]
3964 return (Any I32 code__2)
3966 condIntReg NE x y = do
3967 (src1, code1) <- getSomeReg x
3968 (src2, code2) <- getSomeReg y
3969 tmp1 <- getNewRegNat I32
3970 tmp2 <- getNewRegNat I32
3972 code__2 dst = code1 `appOL` code2 `appOL` toOL [
3973 XOR False src1 (RIReg src2) dst,
3974 SUB False True g0 (RIReg dst) g0,
3975 ADD True False g0 (RIImm (ImmInt 0)) dst]
3976 return (Any I32 code__2)
3978 condIntReg cond x y = do
3979 BlockId lbl1 <- getBlockIdNat
3980 BlockId lbl2 <- getBlockIdNat
3981 CondCode _ cond cond_code <- condIntCode cond x y
3983 code__2 dst = cond_code `appOL` toOL [
3984 BI cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
3985 OR False g0 (RIImm (ImmInt 0)) dst,
3986 BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
3987 NEWBLOCK (BlockId lbl1),
3988 OR False g0 (RIImm (ImmInt 1)) dst,
3989 NEWBLOCK (BlockId lbl2)]
3990 return (Any I32 code__2)
3992 condFltReg cond x y = do
3993 BlockId lbl1 <- getBlockIdNat
3994 BlockId lbl2 <- getBlockIdNat
3995 CondCode _ cond cond_code <- condFltCode cond x y
3997 code__2 dst = cond_code `appOL` toOL [
3999 BF cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
4000 OR False g0 (RIImm (ImmInt 0)) dst,
4001 BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
4002 NEWBLOCK (BlockId lbl1),
4003 OR False g0 (RIImm (ImmInt 1)) dst,
4004 NEWBLOCK (BlockId lbl2)]
4005 return (Any I32 code__2)
4007 #endif /* sparc_TARGET_ARCH */
4009 #if powerpc_TARGET_ARCH
4010 condReg getCond = do
4011 lbl1 <- getBlockIdNat
4012 lbl2 <- getBlockIdNat
4013 CondCode _ cond cond_code <- getCond
4015 {- code dst = cond_code `appOL` toOL [
4024 code dst = cond_code
4028 RLWINM dst dst (bit + 1) 31 31
4031 negate_code | do_negate = unitOL (CRNOR bit bit bit)
4034 (bit, do_negate) = case cond of
4048 return (Any I32 code)
4050 condIntReg cond x y = condReg (condIntCode cond x y)
4051 condFltReg cond x y = condReg (condFltCode cond x y)
4052 #endif /* powerpc_TARGET_ARCH */
4055 -- -----------------------------------------------------------------------------
4056 -- 'trivial*Code': deal with trivial instructions
4058 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
4059 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
4060 -- Only look for constants on the right hand side, because that's
4061 -- where the generic optimizer will have put them.
4063 -- Similarly, for unary instructions, we don't have to worry about
4064 -- matching an StInt as the argument, because genericOpt will already
4065 -- have handled the constant-folding.
4069 -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
4070 ,IF_ARCH_i386 ((Operand -> Operand -> Instr)
4071 -> Maybe (Operand -> Operand -> Instr)
4072 ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr)
4073 -> Maybe (Operand -> Operand -> Instr)
4074 ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
4075 ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
4077 -> CmmExpr -> CmmExpr -- the two arguments
4080 #ifndef powerpc_TARGET_ARCH
4083 -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
4084 ,IF_ARCH_sparc((MachRep -> Reg -> Reg -> Reg -> Instr)
4085 ,IF_ARCH_i386 ((MachRep -> Reg -> Reg -> Reg -> Instr)
4086 ,IF_ARCH_x86_64 ((MachRep -> Operand -> Operand -> Instr)
4088 -> CmmExpr -> CmmExpr -- the two arguments
4094 -> IF_ARCH_alpha((RI -> Reg -> Instr)
4095 ,IF_ARCH_i386 ((Operand -> Instr)
4096 ,IF_ARCH_x86_64 ((Operand -> Instr)
4097 ,IF_ARCH_sparc((RI -> Reg -> Instr)
4098 ,IF_ARCH_powerpc((Reg -> Reg -> Instr)
4100 -> CmmExpr -- the one argument
4103 #ifndef powerpc_TARGET_ARCH
4106 -> IF_ARCH_alpha((Reg -> Reg -> Instr)
4107 ,IF_ARCH_i386 ((Reg -> Reg -> Instr)
4108 ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr)
4109 ,IF_ARCH_sparc((Reg -> Reg -> Instr)
4111 -> CmmExpr -- the one argument
4115 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4117 #if alpha_TARGET_ARCH
4119 trivialCode instr x (StInt y)
4121 = getRegister x `thenNat` \ register ->
4122 getNewRegNat IntRep `thenNat` \ tmp ->
4124 code = registerCode register tmp
4125 src1 = registerName register tmp
4126 src2 = ImmInt (fromInteger y)
4127 code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst)
4129 return (Any IntRep code__2)
4131 trivialCode instr x y
4132 = getRegister x `thenNat` \ register1 ->
4133 getRegister y `thenNat` \ register2 ->
4134 getNewRegNat IntRep `thenNat` \ tmp1 ->
4135 getNewRegNat IntRep `thenNat` \ tmp2 ->
4137 code1 = registerCode register1 tmp1 []
4138 src1 = registerName register1 tmp1
4139 code2 = registerCode register2 tmp2 []
4140 src2 = registerName register2 tmp2
4141 code__2 dst = asmSeqThen [code1, code2] .
4142 mkSeqInstr (instr src1 (RIReg src2) dst)
4144 return (Any IntRep code__2)
4147 trivialUCode instr x
4148 = getRegister x `thenNat` \ register ->
4149 getNewRegNat IntRep `thenNat` \ tmp ->
4151 code = registerCode register tmp
4152 src = registerName register tmp
4153 code__2 dst = code . mkSeqInstr (instr (RIReg src) dst)
4155 return (Any IntRep code__2)
4158 trivialFCode _ instr x y
4159 = getRegister x `thenNat` \ register1 ->
4160 getRegister y `thenNat` \ register2 ->
4161 getNewRegNat F64 `thenNat` \ tmp1 ->
4162 getNewRegNat F64 `thenNat` \ tmp2 ->
4164 code1 = registerCode register1 tmp1
4165 src1 = registerName register1 tmp1
4167 code2 = registerCode register2 tmp2
4168 src2 = registerName register2 tmp2
4170 code__2 dst = asmSeqThen [code1 [], code2 []] .
4171 mkSeqInstr (instr src1 src2 dst)
4173 return (Any F64 code__2)
4175 trivialUFCode _ instr x
4176 = getRegister x `thenNat` \ register ->
4177 getNewRegNat F64 `thenNat` \ tmp ->
4179 code = registerCode register tmp
4180 src = registerName register tmp
4181 code__2 dst = code . mkSeqInstr (instr src dst)
4183 return (Any F64 code__2)
4185 #endif /* alpha_TARGET_ARCH */
4187 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4189 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4192 The Rules of the Game are:
4194 * You cannot assume anything about the destination register dst;
4195 it may be anything, including a fixed reg.
4197 * You may compute an operand into a fixed reg, but you may not
4198 subsequently change the contents of that fixed reg. If you
4199 want to do so, first copy the value either to a temporary
4200 or into dst. You are free to modify dst even if it happens
4201 to be a fixed reg -- that's not your problem.
4203 * You cannot assume that a fixed reg will stay live over an
4204 arbitrary computation. The same applies to the dst reg.
4206 * Temporary regs obtained from getNewRegNat are distinct from
4207 each other and from all other regs, and stay live over
4208 arbitrary computations.
4210 --------------------
4212 SDM's version of The Rules:
4214 * If getRegister returns Any, that means it can generate correct
4215 code which places the result in any register, period. Even if that
4216 register happens to be read during the computation.
4218 Corollary #1: this means that if you are generating code for an
4219 operation with two arbitrary operands, you cannot assign the result
4220 of the first operand into the destination register before computing
4221 the second operand. The second operand might require the old value
4222 of the destination register.
4224 Corollary #2: A function might be able to generate more efficient
4225 code if it knows the destination register is a new temporary (and
4226 therefore not read by any of the sub-computations).
4228 * If getRegister returns Any, then the code it generates may modify only:
4229 (a) fresh temporaries
4230 (b) the destination register
4231 (c) known registers (eg. %ecx is used by shifts)
4232 In particular, it may *not* modify global registers, unless the global
4233 register happens to be the destination register.
4236 trivialCode rep instr (Just revinstr) (CmmLit lit_a) b
4237 | not (is64BitLit lit_a) = do
4238 b_code <- getAnyReg b
4241 = b_code dst `snocOL`
4242 revinstr (OpImm (litToImm lit_a)) (OpReg dst)
4244 return (Any rep code)
4246 trivialCode rep instr maybe_revinstr a b = genTrivialCode rep instr a b
4248 -- This is re-used for floating pt instructions too.
4249 genTrivialCode rep instr a b = do
4250 (b_op, b_code) <- getNonClobberedOperand b
4251 a_code <- getAnyReg a
4252 tmp <- getNewRegNat rep
4254 -- We want the value of b to stay alive across the computation of a.
4255 -- But, we want to calculate a straight into the destination register,
4256 -- because the instruction only has two operands (dst := dst `op` src).
4257 -- The troublesome case is when the result of b is in the same register
4258 -- as the destination reg. In this case, we have to save b in a
4259 -- new temporary across the computation of a.
4261 | dst `regClashesWithOp` b_op =
4263 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
4265 instr (OpReg tmp) (OpReg dst)
4269 instr b_op (OpReg dst)
4271 return (Any rep code)
4273 reg `regClashesWithOp` OpReg reg2 = reg == reg2
4274 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
4275 reg `regClashesWithOp` _ = False
4279 trivialUCode rep instr x = do
4280 x_code <- getAnyReg x
4286 return (Any rep code)
4290 #if i386_TARGET_ARCH
4292 trivialFCode pk instr x y = do
4293 (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
4294 (y_reg, y_code) <- getSomeReg y
4299 instr pk x_reg y_reg dst
4301 return (Any pk code)
4305 #if x86_64_TARGET_ARCH
4307 trivialFCode pk instr x y = genTrivialCode pk (instr pk) x y
4313 trivialUFCode rep instr x = do
4314 (x_reg, x_code) <- getSomeReg x
4320 return (Any rep code)
4322 #endif /* i386_TARGET_ARCH */
4324 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4326 #if sparc_TARGET_ARCH
4328 trivialCode pk instr x (CmmLit (CmmInt y d))
4331 (src1, code) <- getSomeReg x
4332 tmp <- getNewRegNat I32
4334 src2 = ImmInt (fromInteger y)
4335 code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
4336 return (Any I32 code__2)
4338 trivialCode pk instr x y = do
4339 (src1, code1) <- getSomeReg x
4340 (src2, code2) <- getSomeReg y
4341 tmp1 <- getNewRegNat I32
4342 tmp2 <- getNewRegNat I32
4344 code__2 dst = code1 `appOL` code2 `snocOL`
4345 instr src1 (RIReg src2) dst
4346 return (Any I32 code__2)
4349 trivialFCode pk instr x y = do
4350 (src1, code1) <- getSomeReg x
4351 (src2, code2) <- getSomeReg y
4352 tmp1 <- getNewRegNat (cmmExprRep x)
4353 tmp2 <- getNewRegNat (cmmExprRep y)
4354 tmp <- getNewRegNat F64
4356 promote x = FxTOy F32 F64 x tmp
4363 code1 `appOL` code2 `snocOL`
4364 instr pk src1 src2 dst
4365 else if pk1 == F32 then
4366 code1 `snocOL` promote src1 `appOL` code2 `snocOL`
4367 instr F64 tmp src2 dst
4369 code1 `appOL` code2 `snocOL` promote src2 `snocOL`
4370 instr F64 src1 tmp dst
4371 return (Any (if pk1 == pk2 then pk1 else F64) code__2)
4374 trivialUCode pk instr x = do
4375 (src, code) <- getSomeReg x
4376 tmp <- getNewRegNat pk
4378 code__2 dst = code `snocOL` instr (RIReg src) dst
4379 return (Any pk code__2)
4382 trivialUFCode pk instr x = do
4383 (src, code) <- getSomeReg x
4384 tmp <- getNewRegNat pk
4386 code__2 dst = code `snocOL` instr src dst
4387 return (Any pk code__2)
4389 #endif /* sparc_TARGET_ARCH */
4391 #if powerpc_TARGET_ARCH
4394 Wolfgang's PowerPC version of The Rules:
4396 A slightly modified version of The Rules to take advantage of the fact
4397 that PowerPC instructions work on all registers and don't implicitly
4398 clobber any fixed registers.
4400 * The only expression for which getRegister returns Fixed is (CmmReg reg).
4402 * If getRegister returns Any, then the code it generates may modify only:
4403 (a) fresh temporaries
4404 (b) the destination register
4405 It may *not* modify global registers, unless the global
4406 register happens to be the destination register.
4407 It may not clobber any other registers. In fact, only ccalls clobber any
4409 Also, it may not modify the counter register (used by genCCall).
4411 Corollary: If a getRegister for a subexpression returns Fixed, you need
4412 not move it to a fresh temporary before evaluating the next subexpression.
4413 The Fixed register won't be modified.
4414 Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
4416 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
4417 the value of the destination register.
4420 trivialCode rep signed instr x (CmmLit (CmmInt y _))
4421 | Just imm <- makeImmediate rep signed y
4423 (src1, code1) <- getSomeReg x
4424 let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
4425 return (Any rep code)
4427 trivialCode rep signed instr x y = do
4428 (src1, code1) <- getSomeReg x
4429 (src2, code2) <- getSomeReg y
4430 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
4431 return (Any rep code)
4433 trivialCodeNoImm :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4434 -> CmmExpr -> CmmExpr -> NatM Register
4435 trivialCodeNoImm rep instr x y = do
4436 (src1, code1) <- getSomeReg x
4437 (src2, code2) <- getSomeReg y
4438 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
4439 return (Any rep code)
4441 trivialUCode rep instr x = do
4442 (src, code) <- getSomeReg x
4443 let code' dst = code `snocOL` instr dst src
4444 return (Any rep code')
4446 -- There is no "remainder" instruction on the PPC, so we have to do
4448 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
4450 remainderCode :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4451 -> CmmExpr -> CmmExpr -> NatM Register
4452 remainderCode rep div x y = do
4453 (src1, code1) <- getSomeReg x
4454 (src2, code2) <- getSomeReg y
4455 let code dst = code1 `appOL` code2 `appOL` toOL [
4457 MULLW dst dst (RIReg src2),
4460 return (Any rep code)
4462 #endif /* powerpc_TARGET_ARCH */
4465 -- -----------------------------------------------------------------------------
4466 -- Coercing to/from integer/floating-point...
4468 -- @coerce(Int2FP|FP2Int)@ are more complicated integer/float
4469 -- conversions. We have to store temporaries in memory to move
4470 -- between the integer and the floating point register sets.
4472 -- @coerceDbl2Flt@ and @coerceFlt2Dbl@ are done this way because we
4473 -- pretend, on sparc at least, that double and float regs are seperate
4474 -- kinds, so the value has to be computed into one kind before being
4475 -- explicitly "converted" to live in the other kind.
4477 coerceInt2FP :: MachRep -> MachRep -> CmmExpr -> NatM Register
4478 coerceFP2Int :: MachRep -> MachRep -> CmmExpr -> NatM Register
4480 #if sparc_TARGET_ARCH
4481 coerceDbl2Flt :: CmmExpr -> NatM Register
4482 coerceFlt2Dbl :: CmmExpr -> NatM Register
4485 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4487 #if alpha_TARGET_ARCH
4490 = getRegister x `thenNat` \ register ->
4491 getNewRegNat IntRep `thenNat` \ reg ->
4493 code = registerCode register reg
4494 src = registerName register reg
4496 code__2 dst = code . mkSeqInstrs [
4498 LD TF dst (spRel 0),
4501 return (Any F64 code__2)
4505 = getRegister x `thenNat` \ register ->
4506 getNewRegNat F64 `thenNat` \ tmp ->
4508 code = registerCode register tmp
4509 src = registerName register tmp
4511 code__2 dst = code . mkSeqInstrs [
4513 ST TF tmp (spRel 0),
4516 return (Any IntRep code__2)
4518 #endif /* alpha_TARGET_ARCH */
4520 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4522 #if i386_TARGET_ARCH
4524 coerceInt2FP from to x = do
4525 (x_reg, x_code) <- getSomeReg x
4527 opc = case to of F32 -> GITOF; F64 -> GITOD
4528 code dst = x_code `snocOL` opc x_reg dst
4529 -- ToDo: works for non-I32 reps?
4531 return (Any to code)
4535 coerceFP2Int from to x = do
4536 (x_reg, x_code) <- getSomeReg x
4538 opc = case from of F32 -> GFTOI; F64 -> GDTOI
4539 code dst = x_code `snocOL` opc x_reg dst
4540 -- ToDo: works for non-I32 reps?
4542 return (Any to code)
4544 #endif /* i386_TARGET_ARCH */
4546 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4548 #if x86_64_TARGET_ARCH
4550 coerceFP2Int from to x = do
4551 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
4553 opc = case from of F32 -> CVTSS2SI; F64 -> CVTSD2SI
4554 code dst = x_code `snocOL` opc x_op dst
4556 return (Any to code) -- works even if the destination rep is <I32
4558 coerceInt2FP from to x = do
4559 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
4561 opc = case to of F32 -> CVTSI2SS; F64 -> CVTSI2SD
4562 code dst = x_code `snocOL` opc x_op dst
4564 return (Any to code) -- works even if the destination rep is <I32
4566 coerceFP2FP :: MachRep -> CmmExpr -> NatM Register
4567 coerceFP2FP to x = do
4568 (x_reg, x_code) <- getSomeReg x
4570 opc = case to of F32 -> CVTSD2SS; F64 -> CVTSS2SD
4571 code dst = x_code `snocOL` opc x_reg dst
4573 return (Any to code)
4577 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4579 #if sparc_TARGET_ARCH
4581 coerceInt2FP pk1 pk2 x = do
4582 (src, code) <- getSomeReg x
4584 code__2 dst = code `appOL` toOL [
4585 ST pk1 src (spRel (-2)),
4586 LD pk1 (spRel (-2)) dst,
4587 FxTOy pk1 pk2 dst dst]
4588 return (Any pk2 code__2)
4591 coerceFP2Int pk fprep x = do
4592 (src, code) <- getSomeReg x
4593 reg <- getNewRegNat fprep
4594 tmp <- getNewRegNat pk
4596 code__2 dst = ASSERT(fprep == F64 || fprep == F32)
4598 FxTOy fprep pk src tmp,
4599 ST pk tmp (spRel (-2)),
4600 LD pk (spRel (-2)) dst]
4601 return (Any pk code__2)
4604 coerceDbl2Flt x = do
4605 (src, code) <- getSomeReg x
4606 return (Any F32 (\dst -> code `snocOL` FxTOy F64 F32 src dst))
4609 coerceFlt2Dbl x = do
4610 (src, code) <- getSomeReg x
4611 return (Any F64 (\dst -> code `snocOL` FxTOy F32 F64 src dst))
4613 #endif /* sparc_TARGET_ARCH */
4615 #if powerpc_TARGET_ARCH
4616 coerceInt2FP fromRep toRep x = do
4617 (src, code) <- getSomeReg x
4618 lbl <- getNewLabelNat
4619 itmp <- getNewRegNat I32
4620 ftmp <- getNewRegNat F64
4621 dynRef <- cmmMakeDynamicReference addImportNat False lbl
4622 Amode addr addr_code <- getAmode dynRef
4624 code' dst = code `appOL` maybe_exts `appOL` toOL [
4627 CmmStaticLit (CmmInt 0x43300000 I32),
4628 CmmStaticLit (CmmInt 0x80000000 I32)],
4629 XORIS itmp src (ImmInt 0x8000),
4630 ST I32 itmp (spRel 3),
4631 LIS itmp (ImmInt 0x4330),
4632 ST I32 itmp (spRel 2),
4633 LD F64 ftmp (spRel 2)
4634 ] `appOL` addr_code `appOL` toOL [
4636 FSUB F64 dst ftmp dst
4637 ] `appOL` maybe_frsp dst
4639 maybe_exts = case fromRep of
4640 I8 -> unitOL $ EXTS I8 src src
4641 I16 -> unitOL $ EXTS I16 src src
4643 maybe_frsp dst = case toRep of
4644 F32 -> unitOL $ FRSP dst dst
4646 return (Any toRep code')
4648 coerceFP2Int fromRep toRep x = do
4649 -- the reps don't really matter: F*->F64 and I32->I* are no-ops
4650 (src, code) <- getSomeReg x
4651 tmp <- getNewRegNat F64
4653 code' dst = code `appOL` toOL [
4654 -- convert to int in FP reg
4656 -- store value (64bit) from FP to stack
4657 ST F64 tmp (spRel 2),
4658 -- read low word of value (high word is undefined)
4659 LD I32 dst (spRel 3)]
4660 return (Any toRep code')
4661 #endif /* powerpc_TARGET_ARCH */
4664 -- -----------------------------------------------------------------------------
4665 -- eXTRA_STK_ARGS_HERE
4667 -- We (allegedly) put the first six C-call arguments in registers;
4668 -- where do we start putting the rest of them?
4670 -- Moved from MachInstrs (SDM):
4672 #if alpha_TARGET_ARCH || sparc_TARGET_ARCH
4673 eXTRA_STK_ARGS_HERE :: Int
4675 = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???))