1 -----------------------------------------------------------------------------
3 -- Generating machine code (instruction selection)
5 -- (c) The University of Glasgow 1996-2004
7 -----------------------------------------------------------------------------
9 -- This is a big module, but, if you pay attention to
10 -- (a) the sectioning, (b) the type signatures, and
11 -- (c) the #if blah_TARGET_ARCH} things, the
12 -- structure should not be too overwhelming.
14 module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where
16 #include "HsVersions.h"
17 #include "nativeGen/NCG.h"
24 import PositionIndependentCode ( cmmMakeDynamicReference, initializePicBase )
26 -- Our intermediate code:
27 import PprCmm ( pprExpr )
33 import StaticFlags ( opt_PIC )
34 import ForeignCall ( CCallConv(..) )
38 import qualified Outputable
40 import FastTypes ( isFastTrue )
41 import Constants ( wORD_SIZE )
44 import Outputable ( assertPanic )
45 import TRACE ( trace )
48 import Control.Monad ( mapAndUnzipM )
49 import Maybe ( fromJust )
53 -- -----------------------------------------------------------------------------
54 -- Top-level of the instruction selector
56 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
57 -- They are really trees of insns to facilitate fast appending, where a
58 -- left-to-right traversal (pre-order?) yields the insns in the correct
61 type InstrBlock = OrdList Instr
63 cmmTopCodeGen :: CmmTop -> NatM [NatCmmTop]
64 cmmTopCodeGen (CmmProc info lab params blocks) = do
65 (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
66 picBaseMb <- getPicBaseMaybeNat
67 let proc = CmmProc info lab params (concat nat_blocks)
68 tops = proc : concat statics
70 Just picBase -> initializePicBase picBase tops
71 Nothing -> return tops
73 cmmTopCodeGen (CmmData sec dat) = do
74 return [CmmData sec dat] -- no translation, we just use CmmStatic
76 basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop])
77 basicBlockCodeGen (BasicBlock id stmts) = do
78 instrs <- stmtsToInstrs stmts
79 -- code generation may introduce new basic block boundaries, which
80 -- are indicated by the NEWBLOCK instruction. We must split up the
81 -- instruction stream into basic blocks again. Also, we extract
84 (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
86 mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
87 = ([], BasicBlock id instrs : blocks, statics)
88 mkBlocks (LDATA sec dat) (instrs,blocks,statics)
89 = (instrs, blocks, CmmData sec dat:statics)
90 mkBlocks instr (instrs,blocks,statics)
91 = (instr:instrs, blocks, statics)
93 return (BasicBlock id top : other_blocks, statics)
95 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
97 = do instrss <- mapM stmtToInstrs stmts
98 return (concatOL instrss)
100 stmtToInstrs :: CmmStmt -> NatM InstrBlock
101 stmtToInstrs stmt = case stmt of
102 CmmNop -> return nilOL
103 CmmComment s -> return (unitOL (COMMENT s))
106 | isFloatingRep kind -> assignReg_FltCode kind reg src
107 #if WORD_SIZE_IN_BITS==32
108 | kind == I64 -> assignReg_I64Code reg src
110 | otherwise -> assignReg_IntCode kind reg src
111 where kind = cmmRegRep reg
114 | isFloatingRep kind -> assignMem_FltCode kind addr src
115 #if WORD_SIZE_IN_BITS==32
116 | kind == I64 -> assignMem_I64Code addr src
118 | otherwise -> assignMem_IntCode kind addr src
119 where kind = cmmExprRep src
121 CmmCall target result_regs args vols
122 -> genCCall target result_regs args vols
124 CmmBranch id -> genBranch id
125 CmmCondBranch arg id -> genCondJump id arg
126 CmmSwitch arg ids -> genSwitch arg ids
127 CmmJump arg params -> genJump arg
129 -- -----------------------------------------------------------------------------
130 -- General things for putting together code sequences
132 -- Expand CmmRegOff. ToDo: should we do it this way around, or convert
133 -- CmmExprs into CmmRegOff?
134 mangleIndexTree :: CmmExpr -> CmmExpr
135 mangleIndexTree (CmmRegOff reg off)
136 = CmmMachOp (MO_Add rep) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) rep)]
137 where rep = cmmRegRep reg
139 -- -----------------------------------------------------------------------------
140 -- Code gen for 64-bit arithmetic on 32-bit platforms
143 Simple support for generating 64-bit code (ie, 64 bit values and 64
144 bit assignments) on 32-bit platforms. Unlike the main code generator
145 we merely shoot for generating working code as simply as possible, and
146 pay little attention to code quality. Specifically, there is no
147 attempt to deal cleverly with the fixed-vs-floating register
148 distinction; all values are generated into (pairs of) floating
149 registers, even if this would mean some redundant reg-reg moves as a
150 result. Only one of the VRegUniques is returned, since it will be
151 of the VRegUniqueLo form, and the upper-half VReg can be determined
152 by applying getHiVRegFromLo to it.
155 data ChildCode64 -- a.k.a "Register64"
158 Reg -- the lower 32-bit temporary which contains the
159 -- result; use getHiVRegFromLo to find the other
160 -- VRegUnique. Rules of this simplified insn
161 -- selection game are therefore that the returned
162 -- Reg may be modified
164 #if WORD_SIZE_IN_BITS==32
165 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
166 assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
169 #ifndef x86_64_TARGET_ARCH
170 iselExpr64 :: CmmExpr -> NatM ChildCode64
173 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
177 assignMem_I64Code addrTree valueTree = do
178 Amode addr addr_code <- getAmode addrTree
179 ChildCode64 vcode rlo <- iselExpr64 valueTree
181 rhi = getHiVRegFromLo rlo
183 -- Little-endian store
184 mov_lo = MOV I32 (OpReg rlo) (OpAddr addr)
185 mov_hi = MOV I32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
187 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
190 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
191 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
193 r_dst_lo = mkVReg u_dst I32
194 r_dst_hi = getHiVRegFromLo r_dst_lo
195 r_src_hi = getHiVRegFromLo r_src_lo
196 mov_lo = MOV I32 (OpReg r_src_lo) (OpReg r_dst_lo)
197 mov_hi = MOV I32 (OpReg r_src_hi) (OpReg r_dst_hi)
200 vcode `snocOL` mov_lo `snocOL` mov_hi
203 assignReg_I64Code lvalue valueTree
204 = panic "assignReg_I64Code(i386): invalid lvalue"
208 iselExpr64 (CmmLit (CmmInt i _)) = do
209 (rlo,rhi) <- getNewRegPairNat I32
211 r = fromIntegral (fromIntegral i :: Word32)
212 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
214 MOV I32 (OpImm (ImmInteger r)) (OpReg rlo),
215 MOV I32 (OpImm (ImmInteger q)) (OpReg rhi)
218 return (ChildCode64 code rlo)
220 iselExpr64 (CmmLoad addrTree I64) = do
221 Amode addr addr_code <- getAmode addrTree
222 (rlo,rhi) <- getNewRegPairNat I32
224 mov_lo = MOV I32 (OpAddr addr) (OpReg rlo)
225 mov_hi = MOV I32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
228 ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
232 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64)))
233 = return (ChildCode64 nilOL (mkVReg vu I32))
235 -- we handle addition, but rather badly
236 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
237 ChildCode64 code1 r1lo <- iselExpr64 e1
238 (rlo,rhi) <- getNewRegPairNat I32
240 r = fromIntegral (fromIntegral i :: Word32)
241 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
242 r1hi = getHiVRegFromLo r1lo
244 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
245 ADD I32 (OpImm (ImmInteger r)) (OpReg rlo),
246 MOV I32 (OpReg r1hi) (OpReg rhi),
247 ADC I32 (OpImm (ImmInteger q)) (OpReg rhi) ]
249 return (ChildCode64 code rlo)
251 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
252 ChildCode64 code1 r1lo <- iselExpr64 e1
253 ChildCode64 code2 r2lo <- iselExpr64 e2
254 (rlo,rhi) <- getNewRegPairNat I32
256 r1hi = getHiVRegFromLo r1lo
257 r2hi = getHiVRegFromLo r2lo
260 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
261 ADD I32 (OpReg r2lo) (OpReg rlo),
262 MOV I32 (OpReg r1hi) (OpReg rhi),
263 ADC I32 (OpReg r2hi) (OpReg rhi) ]
265 return (ChildCode64 code rlo)
268 = pprPanic "iselExpr64(i386)" (ppr expr)
270 #endif /* i386_TARGET_ARCH */
272 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
274 #if sparc_TARGET_ARCH
276 assignMem_I64Code addrTree valueTree
277 = iselExpr64 valueTree `thenNat` \ (ChildCode64 vcode vrlo) ->
278 getRegister addrTree `thenNat` \ register_addr ->
279 getNewRegNat IntRep `thenNat` \ t_addr ->
280 let rlo = VirtualRegI vrlo
281 rhi = getHiVRegFromLo rlo
282 code_addr = registerCode register_addr t_addr
283 reg_addr = registerName register_addr t_addr
285 mov_hi = ST W rhi (AddrRegImm reg_addr (ImmInt 0))
286 mov_lo = ST W rlo (AddrRegImm reg_addr (ImmInt 4))
288 return (vcode `appOL` code_addr `snocOL` mov_hi `snocOL` mov_lo)
291 assignReg_I64Code (StixTemp (StixVReg u_dst pk)) valueTree
292 = iselExpr64 valueTree `thenNat` \ (ChildCode64 vcode vr_src_lo) ->
294 r_dst_lo = mkVReg u_dst IntRep
295 r_src_lo = VirtualRegI vr_src_lo
296 r_dst_hi = getHiVRegFromLo r_dst_lo
297 r_src_hi = getHiVRegFromLo r_src_lo
298 mov_lo = mkMOV r_src_lo r_dst_lo
299 mov_hi = mkMOV r_src_hi r_dst_hi
300 mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
303 vcode `snocOL` mov_hi `snocOL` mov_lo
305 assignReg_I64Code lvalue valueTree
306 = pprPanic "assignReg_I64Code(sparc): invalid lvalue"
310 -- Don't delete this -- it's very handy for debugging.
312 -- | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
313 -- = panic "iselExpr64(???)"
315 iselExpr64 (CmmLoad I64 addrTree)
316 = getRegister addrTree `thenNat` \ register_addr ->
317 getNewRegNat IntRep `thenNat` \ t_addr ->
318 getNewRegNat IntRep `thenNat` \ rlo ->
319 let rhi = getHiVRegFromLo rlo
320 code_addr = registerCode register_addr t_addr
321 reg_addr = registerName register_addr t_addr
322 mov_hi = LD W (AddrRegImm reg_addr (ImmInt 0)) rhi
323 mov_lo = LD W (AddrRegImm reg_addr (ImmInt 4)) rlo
326 ChildCode64 (code_addr `snocOL` mov_hi `snocOL` mov_lo)
330 iselExpr64 (CmmReg (CmmLocal (LocalReg uq I64)))
331 = getNewRegNat IntRep `thenNat` \ r_dst_lo ->
332 let r_dst_hi = getHiVRegFromLo r_dst_lo
333 r_src_lo = mkVReg vu IntRep
334 r_src_hi = getHiVRegFromLo r_src_lo
335 mov_lo = mkMOV r_src_lo r_dst_lo
336 mov_hi = mkMOV r_src_hi r_dst_hi
337 mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
340 ChildCode64 (toOL [mov_hi, mov_lo]) (getVRegUnique r_dst_lo)
343 iselExpr64 (StCall fn cconv I64 args)
344 = genCCall fn cconv kind args `thenNat` \ call ->
345 getNewRegNat IntRep `thenNat` \ r_dst_lo ->
346 let r_dst_hi = getHiVRegFromLo r_dst_lo
347 mov_lo = mkMOV o0 r_dst_lo
348 mov_hi = mkMOV o1 r_dst_hi
349 mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
352 ChildCode64 (call `snocOL` mov_hi `snocOL` mov_lo)
353 (getVRegUnique r_dst_lo)
357 = pprPanic "iselExpr64(sparc)" (pprCmmExpr expr)
359 #endif /* sparc_TARGET_ARCH */
361 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
363 #if powerpc_TARGET_ARCH
365 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
366 getI64Amodes addrTree = do
367 Amode hi_addr addr_code <- getAmode addrTree
368 case addrOffset hi_addr 4 of
369 Just lo_addr -> return (hi_addr, lo_addr, addr_code)
370 Nothing -> do (hi_ptr, code) <- getSomeReg addrTree
371 return (AddrRegImm hi_ptr (ImmInt 0),
372 AddrRegImm hi_ptr (ImmInt 4),
375 assignMem_I64Code addrTree valueTree = do
376 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
377 ChildCode64 vcode rlo <- iselExpr64 valueTree
379 rhi = getHiVRegFromLo rlo
382 mov_hi = ST I32 rhi hi_addr
383 mov_lo = ST I32 rlo lo_addr
385 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
387 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
388 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
390 r_dst_lo = mkVReg u_dst I32
391 r_dst_hi = getHiVRegFromLo r_dst_lo
392 r_src_hi = getHiVRegFromLo r_src_lo
393 mov_lo = MR r_dst_lo r_src_lo
394 mov_hi = MR r_dst_hi r_src_hi
397 vcode `snocOL` mov_lo `snocOL` mov_hi
400 assignReg_I64Code lvalue valueTree
401 = panic "assignReg_I64Code(powerpc): invalid lvalue"
404 -- Don't delete this -- it's very handy for debugging.
406 -- | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
407 -- = panic "iselExpr64(???)"
409 iselExpr64 (CmmLoad addrTree I64) = do
410 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
411 (rlo, rhi) <- getNewRegPairNat I32
412 let mov_hi = LD I32 rhi hi_addr
413 mov_lo = LD I32 rlo lo_addr
414 return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
417 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64)))
418 = return (ChildCode64 nilOL (mkVReg vu I32))
420 iselExpr64 (CmmLit (CmmInt i _)) = do
421 (rlo,rhi) <- getNewRegPairNat I32
423 half0 = fromIntegral (fromIntegral i :: Word16)
424 half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
425 half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
426 half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
429 LIS rlo (ImmInt half1),
430 OR rlo rlo (RIImm $ ImmInt half0),
431 LIS rhi (ImmInt half3),
432 OR rlo rlo (RIImm $ ImmInt half2)
435 return (ChildCode64 code rlo)
437 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
438 ChildCode64 code1 r1lo <- iselExpr64 e1
439 ChildCode64 code2 r2lo <- iselExpr64 e2
440 (rlo,rhi) <- getNewRegPairNat I32
442 r1hi = getHiVRegFromLo r1lo
443 r2hi = getHiVRegFromLo r2lo
446 toOL [ ADDC rlo r1lo r2lo,
449 return (ChildCode64 code rlo)
452 = pprPanic "iselExpr64(powerpc)" (ppr expr)
454 #endif /* powerpc_TARGET_ARCH */
457 -- -----------------------------------------------------------------------------
458 -- The 'Register' type
460 -- 'Register's passed up the tree. If the stix code forces the register
461 -- to live in a pre-decided machine register, it comes out as @Fixed@;
462 -- otherwise, it comes out as @Any@, and the parent can decide which
463 -- register to put it in.
466 = Fixed MachRep Reg InstrBlock
467 | Any MachRep (Reg -> InstrBlock)
469 swizzleRegisterRep :: Register -> MachRep -> Register
470 swizzleRegisterRep (Fixed _ reg code) rep = Fixed rep reg code
471 swizzleRegisterRep (Any _ codefn) rep = Any rep codefn
474 -- -----------------------------------------------------------------------------
475 -- Utils based on getRegister, below
477 -- The dual to getAnyReg: compute an expression into a register, but
478 -- we don't mind which one it is.
479 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
481 r <- getRegister expr
484 tmp <- getNewRegNat rep
485 return (tmp, code tmp)
489 -- -----------------------------------------------------------------------------
490 -- Grab the Reg for a CmmReg
492 getRegisterReg :: CmmReg -> Reg
494 getRegisterReg (CmmLocal (LocalReg u pk))
497 getRegisterReg (CmmGlobal mid)
498 = case get_GlobalReg_reg_or_addr mid of
499 Left (RealReg rrno) -> RealReg rrno
500 _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
501 -- By this stage, the only MagicIds remaining should be the
502 -- ones which map to a real machine register on this
503 -- platform. Hence ...
506 -- -----------------------------------------------------------------------------
507 -- Generate code to get a subtree into a Register
509 -- Don't delete this -- it's very handy for debugging.
511 -- | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False
512 -- = panic "getRegister(???)"
514 getRegister :: CmmExpr -> NatM Register
516 getRegister (CmmReg (CmmGlobal PicBaseReg))
518 reg <- getPicBaseNat wordRep
519 return (Fixed wordRep reg nilOL)
521 getRegister (CmmReg reg)
522 = return (Fixed (cmmRegRep reg) (getRegisterReg reg) nilOL)
524 getRegister tree@(CmmRegOff _ _)
525 = getRegister (mangleIndexTree tree)
527 -- end of machine-"independent" bit; here we go on the rest...
529 #if alpha_TARGET_ARCH
531 getRegister (StDouble d)
532 = getBlockIdNat `thenNat` \ lbl ->
533 getNewRegNat PtrRep `thenNat` \ tmp ->
534 let code dst = mkSeqInstrs [
535 LDATA RoDataSegment lbl [
536 DATA TF [ImmLab (rational d)]
538 LDA tmp (AddrImm (ImmCLbl lbl)),
539 LD TF dst (AddrReg tmp)]
541 return (Any F64 code)
543 getRegister (StPrim primop [x]) -- unary PrimOps
545 IntNegOp -> trivialUCode (NEG Q False) x
547 NotOp -> trivialUCode NOT x
549 FloatNegOp -> trivialUFCode FloatRep (FNEG TF) x
550 DoubleNegOp -> trivialUFCode F64 (FNEG TF) x
552 OrdOp -> coerceIntCode IntRep x
555 Float2IntOp -> coerceFP2Int x
556 Int2FloatOp -> coerceInt2FP pr x
557 Double2IntOp -> coerceFP2Int x
558 Int2DoubleOp -> coerceInt2FP pr x
560 Double2FloatOp -> coerceFltCode x
561 Float2DoubleOp -> coerceFltCode x
563 other_op -> getRegister (StCall fn CCallConv F64 [x])
565 fn = case other_op of
566 FloatExpOp -> FSLIT("exp")
567 FloatLogOp -> FSLIT("log")
568 FloatSqrtOp -> FSLIT("sqrt")
569 FloatSinOp -> FSLIT("sin")
570 FloatCosOp -> FSLIT("cos")
571 FloatTanOp -> FSLIT("tan")
572 FloatAsinOp -> FSLIT("asin")
573 FloatAcosOp -> FSLIT("acos")
574 FloatAtanOp -> FSLIT("atan")
575 FloatSinhOp -> FSLIT("sinh")
576 FloatCoshOp -> FSLIT("cosh")
577 FloatTanhOp -> FSLIT("tanh")
578 DoubleExpOp -> FSLIT("exp")
579 DoubleLogOp -> FSLIT("log")
580 DoubleSqrtOp -> FSLIT("sqrt")
581 DoubleSinOp -> FSLIT("sin")
582 DoubleCosOp -> FSLIT("cos")
583 DoubleTanOp -> FSLIT("tan")
584 DoubleAsinOp -> FSLIT("asin")
585 DoubleAcosOp -> FSLIT("acos")
586 DoubleAtanOp -> FSLIT("atan")
587 DoubleSinhOp -> FSLIT("sinh")
588 DoubleCoshOp -> FSLIT("cosh")
589 DoubleTanhOp -> FSLIT("tanh")
591 pr = panic "MachCode.getRegister: no primrep needed for Alpha"
593 getRegister (StPrim primop [x, y]) -- dyadic PrimOps
595 CharGtOp -> trivialCode (CMP LTT) y x
596 CharGeOp -> trivialCode (CMP LE) y x
597 CharEqOp -> trivialCode (CMP EQQ) x y
598 CharNeOp -> int_NE_code x y
599 CharLtOp -> trivialCode (CMP LTT) x y
600 CharLeOp -> trivialCode (CMP LE) x y
602 IntGtOp -> trivialCode (CMP LTT) y x
603 IntGeOp -> trivialCode (CMP LE) y x
604 IntEqOp -> trivialCode (CMP EQQ) x y
605 IntNeOp -> int_NE_code x y
606 IntLtOp -> trivialCode (CMP LTT) x y
607 IntLeOp -> trivialCode (CMP LE) x y
609 WordGtOp -> trivialCode (CMP ULT) y x
610 WordGeOp -> trivialCode (CMP ULE) x y
611 WordEqOp -> trivialCode (CMP EQQ) x y
612 WordNeOp -> int_NE_code x y
613 WordLtOp -> trivialCode (CMP ULT) x y
614 WordLeOp -> trivialCode (CMP ULE) x y
616 AddrGtOp -> trivialCode (CMP ULT) y x
617 AddrGeOp -> trivialCode (CMP ULE) y x
618 AddrEqOp -> trivialCode (CMP EQQ) x y
619 AddrNeOp -> int_NE_code x y
620 AddrLtOp -> trivialCode (CMP ULT) x y
621 AddrLeOp -> trivialCode (CMP ULE) x y
623 FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
624 FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
625 FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
626 FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
627 FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
628 FloatLeOp -> cmpF_code (FCMP TF LE) NE x y
630 DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
631 DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
632 DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
633 DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
634 DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
635 DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y
637 IntAddOp -> trivialCode (ADD Q False) x y
638 IntSubOp -> trivialCode (SUB Q False) x y
639 IntMulOp -> trivialCode (MUL Q False) x y
640 IntQuotOp -> trivialCode (DIV Q False) x y
641 IntRemOp -> trivialCode (REM Q False) x y
643 WordAddOp -> trivialCode (ADD Q False) x y
644 WordSubOp -> trivialCode (SUB Q False) x y
645 WordMulOp -> trivialCode (MUL Q False) x y
646 WordQuotOp -> trivialCode (DIV Q True) x y
647 WordRemOp -> trivialCode (REM Q True) x y
649 FloatAddOp -> trivialFCode FloatRep (FADD TF) x y
650 FloatSubOp -> trivialFCode FloatRep (FSUB TF) x y
651 FloatMulOp -> trivialFCode FloatRep (FMUL TF) x y
652 FloatDivOp -> trivialFCode FloatRep (FDIV TF) x y
654 DoubleAddOp -> trivialFCode F64 (FADD TF) x y
655 DoubleSubOp -> trivialFCode F64 (FSUB TF) x y
656 DoubleMulOp -> trivialFCode F64 (FMUL TF) x y
657 DoubleDivOp -> trivialFCode F64 (FDIV TF) x y
659 AddrAddOp -> trivialCode (ADD Q False) x y
660 AddrSubOp -> trivialCode (SUB Q False) x y
661 AddrRemOp -> trivialCode (REM Q True) x y
663 AndOp -> trivialCode AND x y
664 OrOp -> trivialCode OR x y
665 XorOp -> trivialCode XOR x y
666 SllOp -> trivialCode SLL x y
667 SrlOp -> trivialCode SRL x y
669 ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
670 ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
671 ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"
673 FloatPowerOp -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
674 DoublePowerOp -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
676 {- ------------------------------------------------------------
677 Some bizarre special code for getting condition codes into
678 registers. Integer non-equality is a test for equality
679 followed by an XOR with 1. (Integer comparisons always set
680 the result register to 0 or 1.) Floating point comparisons of
681 any kind leave the result in a floating point register, so we
682 need to wrangle an integer register out of things.
684 int_NE_code :: StixTree -> StixTree -> NatM Register
687 = trivialCode (CMP EQQ) x y `thenNat` \ register ->
688 getNewRegNat IntRep `thenNat` \ tmp ->
690 code = registerCode register tmp
691 src = registerName register tmp
692 code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
694 return (Any IntRep code__2)
696 {- ------------------------------------------------------------
697 Comments for int_NE_code also apply to cmpF_code
700 :: (Reg -> Reg -> Reg -> Instr)
702 -> StixTree -> StixTree
705 cmpF_code instr cond x y
706 = trivialFCode pr instr x y `thenNat` \ register ->
707 getNewRegNat F64 `thenNat` \ tmp ->
708 getBlockIdNat `thenNat` \ lbl ->
710 code = registerCode register tmp
711 result = registerName register tmp
713 code__2 dst = code . mkSeqInstrs [
714 OR zeroh (RIImm (ImmInt 1)) dst,
715 BF cond result (ImmCLbl lbl),
716 OR zeroh (RIReg zeroh) dst,
719 return (Any IntRep code__2)
721 pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
722 ------------------------------------------------------------
724 getRegister (CmmLoad pk mem)
725 = getAmode mem `thenNat` \ amode ->
727 code = amodeCode amode
728 src = amodeAddr amode
729 size = primRepToSize pk
730 code__2 dst = code . mkSeqInstr (LD size dst src)
732 return (Any pk code__2)
734 getRegister (StInt i)
737 code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
739 return (Any IntRep code)
742 code dst = mkSeqInstr (LDI Q dst src)
744 return (Any IntRep code)
746 src = ImmInt (fromInteger i)
751 code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
753 return (Any PtrRep code)
756 imm__2 = case imm of Just x -> x
758 #endif /* alpha_TARGET_ARCH */
760 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
764 getRegister (CmmLit (CmmFloat f F32)) = do
765 lbl <- getNewLabelNat
766 let code dst = toOL [
769 CmmStaticLit (CmmFloat f F32)],
770 GLD F32 (ImmAddr (ImmCLbl lbl) 0) dst
773 return (Any F32 code)
776 getRegister (CmmLit (CmmFloat d F64))
778 = let code dst = unitOL (GLDZ dst)
779 in return (Any F64 code)
782 = let code dst = unitOL (GLD1 dst)
783 in return (Any F64 code)
786 lbl <- getNewLabelNat
787 let code dst = toOL [
790 CmmStaticLit (CmmFloat d F64)],
791 GLD F64 (ImmAddr (ImmCLbl lbl) 0) dst
794 return (Any F64 code)
796 #endif /* i386_TARGET_ARCH */
798 #if x86_64_TARGET_ARCH
800 getRegister (CmmLit (CmmFloat 0.0 rep)) = do
801 let code dst = unitOL (XOR rep (OpReg dst) (OpReg dst))
802 -- I don't know why there are xorpd, xorps, and pxor instructions.
803 -- They all appear to do the same thing --SDM
804 return (Any rep code)
806 getRegister (CmmLit (CmmFloat f rep)) = do
807 lbl <- getNewLabelNat
808 let code dst = toOL [
811 CmmStaticLit (CmmFloat f rep)],
812 MOV rep (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
815 return (Any rep code)
817 #endif /* x86_64_TARGET_ARCH */
819 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
821 -- catch simple cases of zero- or sign-extended load
822 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad addr _]) = do
823 code <- intLoadCode (MOVZxL I8) addr
824 return (Any I32 code)
826 getRegister (CmmMachOp (MO_S_Conv I8 I32) [CmmLoad addr _]) = do
827 code <- intLoadCode (MOVSxL I8) addr
828 return (Any I32 code)
830 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad addr _]) = do
831 code <- intLoadCode (MOVZxL I16) addr
832 return (Any I32 code)
834 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad addr _]) = do
835 code <- intLoadCode (MOVSxL I16) addr
836 return (Any I32 code)
840 #if x86_64_TARGET_ARCH
842 -- catch simple cases of zero- or sign-extended load
843 getRegister (CmmMachOp (MO_U_Conv I8 I64) [CmmLoad addr _]) = do
844 code <- intLoadCode (MOVZxL I8) addr
845 return (Any I64 code)
847 getRegister (CmmMachOp (MO_S_Conv I8 I64) [CmmLoad addr _]) = do
848 code <- intLoadCode (MOVSxL I8) addr
849 return (Any I64 code)
851 getRegister (CmmMachOp (MO_U_Conv I16 I64) [CmmLoad addr _]) = do
852 code <- intLoadCode (MOVZxL I16) addr
853 return (Any I64 code)
855 getRegister (CmmMachOp (MO_S_Conv I16 I64) [CmmLoad addr _]) = do
856 code <- intLoadCode (MOVSxL I16) addr
857 return (Any I64 code)
859 getRegister (CmmMachOp (MO_U_Conv I32 I64) [CmmLoad addr _]) = do
860 code <- intLoadCode (MOV I32) addr -- 32-bit loads zero-extend
861 return (Any I64 code)
863 getRegister (CmmMachOp (MO_S_Conv I32 I64) [CmmLoad addr _]) = do
864 code <- intLoadCode (MOVSxL I32) addr
865 return (Any I64 code)
869 #if x86_64_TARGET_ARCH
870 getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do
871 x_code <- getAnyReg x
872 lbl <- getNewLabelNat
874 code dst = x_code dst `appOL` toOL [
875 -- This is how gcc does it, so it can't be that bad:
876 LDATA ReadOnlyData16 [
879 CmmStaticLit (CmmInt 0x80000000 I32),
880 CmmStaticLit (CmmInt 0 I32),
881 CmmStaticLit (CmmInt 0 I32),
882 CmmStaticLit (CmmInt 0 I32)
884 XOR F32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
885 -- xorps, so we need the 128-bit constant
886 -- ToDo: rip-relative
889 return (Any F32 code)
891 getRegister (CmmMachOp (MO_S_Neg F64) [x]) = do
892 x_code <- getAnyReg x
893 lbl <- getNewLabelNat
895 -- This is how gcc does it, so it can't be that bad:
896 code dst = x_code dst `appOL` toOL [
897 LDATA ReadOnlyData16 [
900 CmmStaticLit (CmmInt 0x8000000000000000 I64),
901 CmmStaticLit (CmmInt 0 I64)
903 -- gcc puts an unpck here. Wonder if we need it.
904 XOR F64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
905 -- xorpd, so we need the 128-bit constant
908 return (Any F64 code)
911 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
913 getRegister (CmmMachOp mop [x]) -- unary MachOps
916 MO_S_Neg F32 -> trivialUFCode F32 (GNEG F32) x
917 MO_S_Neg F64 -> trivialUFCode F64 (GNEG F64) x
920 MO_S_Neg rep -> trivialUCode rep (NEGI rep) x
921 MO_Not rep -> trivialUCode rep (NOT rep) x
924 -- TODO: these are only nops if the arg is not a fixed register that
925 -- can't be byte-addressed.
926 MO_U_Conv I32 I8 -> conversionNop I32 x
927 MO_S_Conv I32 I8 -> conversionNop I32 x
928 MO_U_Conv I16 I8 -> conversionNop I16 x
929 MO_S_Conv I16 I8 -> conversionNop I16 x
930 MO_U_Conv I32 I16 -> conversionNop I32 x
931 MO_S_Conv I32 I16 -> conversionNop I32 x
932 #if x86_64_TARGET_ARCH
933 MO_U_Conv I64 I32 -> conversionNop I64 x
934 MO_S_Conv I64 I32 -> conversionNop I64 x
935 MO_U_Conv I64 I16 -> conversionNop I64 x
936 MO_S_Conv I64 I16 -> conversionNop I64 x
937 MO_U_Conv I64 I8 -> conversionNop I64 x
938 MO_S_Conv I64 I8 -> conversionNop I64 x
941 MO_U_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
942 MO_S_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
945 MO_U_Conv I8 I32 -> integerExtend I8 I32 MOVZxL x
946 MO_U_Conv I16 I32 -> integerExtend I16 I32 MOVZxL x
947 MO_U_Conv I8 I16 -> integerExtend I8 I16 MOVZxL x
949 MO_S_Conv I8 I32 -> integerExtend I8 I32 MOVSxL x
950 MO_S_Conv I16 I32 -> integerExtend I16 I32 MOVSxL x
951 MO_S_Conv I8 I16 -> integerExtend I8 I16 MOVSxL x
953 #if x86_64_TARGET_ARCH
954 MO_U_Conv I8 I64 -> integerExtend I8 I64 MOVZxL x
955 MO_U_Conv I16 I64 -> integerExtend I16 I64 MOVZxL x
956 MO_U_Conv I32 I64 -> integerExtend I32 I64 MOVZxL x
957 MO_S_Conv I8 I64 -> integerExtend I8 I64 MOVSxL x
958 MO_S_Conv I16 I64 -> integerExtend I16 I64 MOVSxL x
959 MO_S_Conv I32 I64 -> integerExtend I32 I64 MOVSxL x
960 -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
961 -- However, we don't want the register allocator to throw it
962 -- away as an unnecessary reg-to-reg move, so we keep it in
963 -- the form of a movzl and print it as a movl later.
967 MO_S_Conv F32 F64 -> conversionNop F64 x
968 MO_S_Conv F64 F32 -> conversionNop F32 x
970 MO_S_Conv F32 F64 -> coerceFP2FP F64 x
971 MO_S_Conv F64 F32 -> coerceFP2FP F32 x
975 | isFloatingRep from -> coerceFP2Int from to x
976 | isFloatingRep to -> coerceInt2FP from to x
978 other -> pprPanic "getRegister" (pprMachOp mop)
980 -- signed or unsigned extension.
981 integerExtend from to instr expr = do
982 (reg,e_code) <- if from == I8 then getByteReg expr
987 instr from (OpReg reg) (OpReg dst)
990 conversionNop new_rep expr
991 = do e_code <- getRegister expr
992 return (swizzleRegisterRep e_code new_rep)
995 getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
996 = ASSERT2(cmmExprRep x /= I8, pprExpr e)
998 MO_Eq F32 -> condFltReg EQQ x y
999 MO_Ne F32 -> condFltReg NE x y
1000 MO_S_Gt F32 -> condFltReg GTT x y
1001 MO_S_Ge F32 -> condFltReg GE x y
1002 MO_S_Lt F32 -> condFltReg LTT x y
1003 MO_S_Le F32 -> condFltReg LE x y
1005 MO_Eq F64 -> condFltReg EQQ x y
1006 MO_Ne F64 -> condFltReg NE x y
1007 MO_S_Gt F64 -> condFltReg GTT x y
1008 MO_S_Ge F64 -> condFltReg GE x y
1009 MO_S_Lt F64 -> condFltReg LTT x y
1010 MO_S_Le F64 -> condFltReg LE x y
1012 MO_Eq rep -> condIntReg EQQ x y
1013 MO_Ne rep -> condIntReg NE x y
1015 MO_S_Gt rep -> condIntReg GTT x y
1016 MO_S_Ge rep -> condIntReg GE x y
1017 MO_S_Lt rep -> condIntReg LTT x y
1018 MO_S_Le rep -> condIntReg LE x y
1020 MO_U_Gt rep -> condIntReg GU x y
1021 MO_U_Ge rep -> condIntReg GEU x y
1022 MO_U_Lt rep -> condIntReg LU x y
1023 MO_U_Le rep -> condIntReg LEU x y
1025 #if i386_TARGET_ARCH
1026 MO_Add F32 -> trivialFCode F32 GADD x y
1027 MO_Sub F32 -> trivialFCode F32 GSUB x y
1029 MO_Add F64 -> trivialFCode F64 GADD x y
1030 MO_Sub F64 -> trivialFCode F64 GSUB x y
1032 MO_S_Quot F32 -> trivialFCode F32 GDIV x y
1033 MO_S_Quot F64 -> trivialFCode F64 GDIV x y
1036 #if x86_64_TARGET_ARCH
1037 MO_Add F32 -> trivialFCode F32 ADD x y
1038 MO_Sub F32 -> trivialFCode F32 SUB x y
1040 MO_Add F64 -> trivialFCode F64 ADD x y
1041 MO_Sub F64 -> trivialFCode F64 SUB x y
1043 MO_S_Quot F32 -> trivialFCode F32 FDIV x y
1044 MO_S_Quot F64 -> trivialFCode F64 FDIV x y
1047 MO_Add rep -> add_code rep x y
1048 MO_Sub rep -> sub_code rep x y
1050 MO_S_Quot rep -> div_code rep True True x y
1051 MO_S_Rem rep -> div_code rep True False x y
1052 MO_U_Quot rep -> div_code rep False True x y
1053 MO_U_Rem rep -> div_code rep False False x y
1055 #if i386_TARGET_ARCH
1056 MO_Mul F32 -> trivialFCode F32 GMUL x y
1057 MO_Mul F64 -> trivialFCode F64 GMUL x y
1060 #if x86_64_TARGET_ARCH
1061 MO_Mul F32 -> trivialFCode F32 MUL x y
1062 MO_Mul F64 -> trivialFCode F64 MUL x y
1065 MO_Mul rep -> let op = IMUL rep in
1066 trivialCode rep op (Just op) x y
1068 MO_S_MulMayOflo rep -> imulMayOflo rep x y
1070 MO_And rep -> let op = AND rep in
1071 trivialCode rep op (Just op) x y
1072 MO_Or rep -> let op = OR rep in
1073 trivialCode rep op (Just op) x y
1074 MO_Xor rep -> let op = XOR rep in
1075 trivialCode rep op (Just op) x y
1077 {- Shift ops on x86s have constraints on their source, it
1078 either has to be Imm, CL or 1
1079 => trivialCode is not restrictive enough (sigh.)
1081 MO_Shl rep -> shift_code rep (SHL rep) x y {-False-}
1082 MO_U_Shr rep -> shift_code rep (SHR rep) x y {-False-}
1083 MO_S_Shr rep -> shift_code rep (SAR rep) x y {-False-}
1085 other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
1087 --------------------
1088 imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1089 imulMayOflo rep a b = do
1090 (a_reg, a_code) <- getNonClobberedReg a
1091 b_code <- getAnyReg b
1093 shift_amt = case rep of
1096 _ -> panic "shift_amt"
1098 code = a_code `appOL` b_code eax `appOL`
1100 IMUL2 rep (OpReg a_reg), -- result in %edx:%eax
1101 SAR rep (OpImm (ImmInt shift_amt)) (OpReg eax),
1102 -- sign extend lower part
1103 SUB rep (OpReg edx) (OpReg eax)
1104 -- compare against upper
1105 -- eax==0 if high part == sign extended low part
1108 return (Fixed rep eax code)
1110 --------------------
1111 shift_code :: MachRep
1112 -> (Operand -> Operand -> Instr)
1117 {- Case1: shift length as immediate -}
1118 shift_code rep instr x y@(CmmLit lit) = do
1119 x_code <- getAnyReg x
1122 = x_code dst `snocOL`
1123 instr (OpImm (litToImm lit)) (OpReg dst)
1125 return (Any rep code)
1127 {- Case2: shift length is complex (non-immediate) -}
1128 shift_code rep instr x y{-amount-} = do
1129 (x_reg, x_code) <- getNonClobberedReg x
1130 y_code <- getAnyReg y
1132 code = x_code `appOL`
1134 instr (OpReg ecx) (OpReg x_reg)
1136 return (Fixed rep x_reg code)
1138 --------------------
1139 add_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1140 add_code rep x (CmmLit (CmmInt y _)) = add_int rep x y
1141 add_code rep x y = trivialCode rep (ADD rep) (Just (ADD rep)) x y
1143 --------------------
1144 sub_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1145 sub_code rep x (CmmLit (CmmInt y _)) = add_int rep x (-y)
1146 sub_code rep x y = trivialCode rep (SUB rep) Nothing x y
1148 -- our three-operand add instruction:
1149 add_int rep x y = do
1150 (x_reg, x_code) <- getSomeReg x
1152 imm = ImmInt (fromInteger y)
1156 (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
1159 return (Any rep code)
1161 ----------------------
1162 div_code rep signed quotient x y = do
1163 (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
1164 x_code <- getAnyReg x
1166 widen | signed = CLTD rep
1167 | otherwise = XOR rep (OpReg edx) (OpReg edx)
1169 instr | signed = IDIV
1172 code = y_code `appOL`
1174 toOL [widen, instr rep y_op]
1176 result | quotient = eax
1180 return (Fixed rep result code)
1183 getRegister (CmmLoad mem pk)
1186 Amode src mem_code <- getAmode mem
1188 code dst = mem_code `snocOL`
1189 IF_ARCH_i386(GLD pk src dst,
1190 MOV pk (OpAddr src) (OpReg dst))
1192 return (Any pk code)
1194 #if i386_TARGET_ARCH
1195 getRegister (CmmLoad mem pk)
1198 code <- intLoadCode (instr pk) mem
1199 return (Any pk code)
1201 instr I8 = MOVZxL pk
1204 -- we always zero-extend 8-bit loads, if we
1205 -- can't think of anything better. This is because
1206 -- we can't guarantee access to an 8-bit variant of every register
1207 -- (esi and edi don't have 8-bit variants), so to make things
1208 -- simpler we do our 8-bit arithmetic with full 32-bit registers.
1211 #if x86_64_TARGET_ARCH
1212 -- Simpler memory load code on x86_64
1213 getRegister (CmmLoad mem pk)
1215 code <- intLoadCode (MOV pk) mem
1216 return (Any pk code)
1219 getRegister (CmmLit (CmmInt 0 rep))
1221 -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
1222 adj_rep = case rep of I64 -> I32; _ -> rep
1223 rep1 = IF_ARCH_i386( rep, adj_rep )
1225 = unitOL (XOR rep1 (OpReg dst) (OpReg dst))
1227 return (Any rep code)
1229 #if x86_64_TARGET_ARCH
1230 -- optimisation for loading small literals on x86_64: take advantage
1231 -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
1232 -- instruction forms are shorter.
1233 getRegister (CmmLit lit)
1234 | I64 <- cmmLitRep lit, not (isBigLit lit)
1237 code dst = unitOL (MOV I32 (OpImm imm) (OpReg dst))
1239 return (Any I64 code)
1241 isBigLit (CmmInt i I64) = i < 0 || i > 0xffffffff
1243 -- note1: not the same as is64BitLit, because that checks for
1244 -- signed literals that fit in 32 bits, but we want unsigned
1246 -- note2: all labels are small, because we're assuming the
1247 -- small memory model (see gcc docs, -mcmodel=small).
1250 getRegister (CmmLit lit)
1254 code dst = unitOL (MOV rep (OpImm imm) (OpReg dst))
1256 return (Any rep code)
1258 getRegister other = pprPanic "getRegister(x86)" (ppr other)
1261 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
1262 -> NatM (Reg -> InstrBlock)
1263 intLoadCode instr mem = do
1264 Amode src mem_code <- getAmode mem
1265 return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
1267 -- Compute an expression into *any* register, adding the appropriate
1268 -- move instruction if necessary.
1269 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
1271 r <- getRegister expr
1274 anyReg :: Register -> NatM (Reg -> InstrBlock)
1275 anyReg (Any _ code) = return code
1276 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
1278 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
1279 -- Fixed registers might not be byte-addressable, so we make sure we've
1280 -- got a temporary, inserting an extra reg copy if necessary.
1281 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
1282 #if x86_64_TARGET_ARCH
1283 getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
1285 getByteReg expr = do
1286 r <- getRegister expr
1289 tmp <- getNewRegNat rep
1290 return (tmp, code tmp)
1292 | isVirtualReg reg -> return (reg,code)
1294 tmp <- getNewRegNat rep
1295 return (tmp, code `snocOL` reg2reg rep reg tmp)
1296 -- ToDo: could optimise slightly by checking for byte-addressable
1297 -- real registers, but that will happen very rarely if at all.
1300 -- Another variant: this time we want the result in a register that cannot
1301 -- be modified by code to evaluate an arbitrary expression.
1302 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1303 getNonClobberedReg expr = do
1304 r <- getRegister expr
1307 tmp <- getNewRegNat rep
1308 return (tmp, code tmp)
1310 -- only free regs can be clobbered
1311 | RealReg rr <- reg, isFastTrue (freeReg rr) -> do
1312 tmp <- getNewRegNat rep
1313 return (tmp, code `snocOL` reg2reg rep reg tmp)
1317 reg2reg :: MachRep -> Reg -> Reg -> Instr
1319 #if i386_TARGET_ARCH
1320 | isFloatingRep rep = GMOV src dst
1322 | otherwise = MOV rep (OpReg src) (OpReg dst)
1324 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1326 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1328 #if sparc_TARGET_ARCH
1330 getRegister (StFloat d)
1331 = getBlockIdNat `thenNat` \ lbl ->
1332 getNewRegNat PtrRep `thenNat` \ tmp ->
1333 let code dst = toOL [
1334 SEGMENT DataSegment,
1336 DATA F [ImmFloat d],
1337 SEGMENT TextSegment,
1338 SETHI (HI (ImmCLbl lbl)) tmp,
1339 LD F (AddrRegImm tmp (LO (ImmCLbl lbl))) dst]
1341 return (Any F32 code)
1343 getRegister (StDouble d)
1344 = getBlockIdNat `thenNat` \ lbl ->
1345 getNewRegNat PtrRep `thenNat` \ tmp ->
1346 let code dst = toOL [
1347 SEGMENT DataSegment,
1349 DATA DF [ImmDouble d],
1350 SEGMENT TextSegment,
1351 SETHI (HI (ImmCLbl lbl)) tmp,
1352 LD DF (AddrRegImm tmp (LO (ImmCLbl lbl))) dst]
1354 return (Any F64 code)
1357 getRegister (CmmMachOp mop [x]) -- unary PrimOps
1359 MO_NatS_Neg -> trivialUCode (SUB False False g0) x
1360 MO_Nat_Not -> trivialUCode (XNOR False g0) x
1361 MO_32U_to_8U -> trivialCode (AND False) x (StInt 255)
1363 MO_F32_Neg -> trivialUFCode F32 (FNEG F) x
1364 MO_F64_Neg -> trivialUFCode F64 (FNEG DF) x
1366 MO_F64_to_Flt -> coerceDbl2Flt x
1367 MO_F32_to_Dbl -> coerceFlt2Dbl x
1369 MO_F32_to_NatS -> coerceFP2Int F32 x
1370 MO_NatS_to_Flt -> coerceInt2FP F32 x
1371 MO_F64_to_NatS -> coerceFP2Int F64 x
1372 MO_NatS_to_Dbl -> coerceInt2FP F64 x
1374 -- Conversions which are a nop on sparc
1375 MO_32U_to_NatS -> conversionNop IntRep x
1376 MO_32S_to_NatS -> conversionNop IntRep x
1377 MO_NatS_to_32U -> conversionNop WordRep x
1378 MO_32U_to_NatU -> conversionNop WordRep x
1380 MO_NatU_to_NatS -> conversionNop IntRep x
1381 MO_NatS_to_NatU -> conversionNop WordRep x
1382 MO_NatP_to_NatU -> conversionNop WordRep x
1383 MO_NatU_to_NatP -> conversionNop PtrRep x
1384 MO_NatS_to_NatP -> conversionNop PtrRep x
1385 MO_NatP_to_NatS -> conversionNop IntRep x
1387 -- sign-extending widenings
1388 MO_8U_to_32U -> integerExtend False 24 x
1389 MO_8U_to_NatU -> integerExtend False 24 x
1390 MO_8S_to_NatS -> integerExtend True 24 x
1391 MO_16U_to_NatU -> integerExtend False 16 x
1392 MO_16S_to_NatS -> integerExtend True 16 x
1395 let fixed_x = if is_float_op -- promote to double
1396 then CmmMachOp MO_F32_to_Dbl [x]
1399 getRegister (StCall (Left fn) CCallConv F64 [fixed_x])
1401 integerExtend signed nBits x
1403 CmmMachOp (if signed then MO_Nat_Sar else MO_Nat_Shr)
1404 [CmmMachOp MO_Nat_Shl [x, StInt nBits], StInt nBits]
1406 conversionNop new_rep expr
1407 = getRegister expr `thenNat` \ e_code ->
1408 return (swizzleRegisterRep e_code new_rep)
1412 MO_F32_Exp -> (True, FSLIT("exp"))
1413 MO_F32_Log -> (True, FSLIT("log"))
1414 MO_F32_Sqrt -> (True, FSLIT("sqrt"))
1416 MO_F32_Sin -> (True, FSLIT("sin"))
1417 MO_F32_Cos -> (True, FSLIT("cos"))
1418 MO_F32_Tan -> (True, FSLIT("tan"))
1420 MO_F32_Asin -> (True, FSLIT("asin"))
1421 MO_F32_Acos -> (True, FSLIT("acos"))
1422 MO_F32_Atan -> (True, FSLIT("atan"))
1424 MO_F32_Sinh -> (True, FSLIT("sinh"))
1425 MO_F32_Cosh -> (True, FSLIT("cosh"))
1426 MO_F32_Tanh -> (True, FSLIT("tanh"))
1428 MO_F64_Exp -> (False, FSLIT("exp"))
1429 MO_F64_Log -> (False, FSLIT("log"))
1430 MO_F64_Sqrt -> (False, FSLIT("sqrt"))
1432 MO_F64_Sin -> (False, FSLIT("sin"))
1433 MO_F64_Cos -> (False, FSLIT("cos"))
1434 MO_F64_Tan -> (False, FSLIT("tan"))
1436 MO_F64_Asin -> (False, FSLIT("asin"))
1437 MO_F64_Acos -> (False, FSLIT("acos"))
1438 MO_F64_Atan -> (False, FSLIT("atan"))
1440 MO_F64_Sinh -> (False, FSLIT("sinh"))
1441 MO_F64_Cosh -> (False, FSLIT("cosh"))
1442 MO_F64_Tanh -> (False, FSLIT("tanh"))
1444 other -> pprPanic "getRegister(sparc) - binary CmmMachOp (2)"
1448 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1450 MO_32U_Gt -> condIntReg GTT x y
1451 MO_32U_Ge -> condIntReg GE x y
1452 MO_32U_Eq -> condIntReg EQQ x y
1453 MO_32U_Ne -> condIntReg NE x y
1454 MO_32U_Lt -> condIntReg LTT x y
1455 MO_32U_Le -> condIntReg LE x y
1457 MO_Nat_Eq -> condIntReg EQQ x y
1458 MO_Nat_Ne -> condIntReg NE x y
1460 MO_NatS_Gt -> condIntReg GTT x y
1461 MO_NatS_Ge -> condIntReg GE x y
1462 MO_NatS_Lt -> condIntReg LTT x y
1463 MO_NatS_Le -> condIntReg LE x y
1465 MO_NatU_Gt -> condIntReg GU x y
1466 MO_NatU_Ge -> condIntReg GEU x y
1467 MO_NatU_Lt -> condIntReg LU x y
1468 MO_NatU_Le -> condIntReg LEU x y
1470 MO_F32_Gt -> condFltReg GTT x y
1471 MO_F32_Ge -> condFltReg GE x y
1472 MO_F32_Eq -> condFltReg EQQ x y
1473 MO_F32_Ne -> condFltReg NE x y
1474 MO_F32_Lt -> condFltReg LTT x y
1475 MO_F32_Le -> condFltReg LE x y
1477 MO_F64_Gt -> condFltReg GTT x y
1478 MO_F64_Ge -> condFltReg GE x y
1479 MO_F64_Eq -> condFltReg EQQ x y
1480 MO_F64_Ne -> condFltReg NE x y
1481 MO_F64_Lt -> condFltReg LTT x y
1482 MO_F64_Le -> condFltReg LE x y
1484 MO_Nat_Add -> trivialCode (ADD False False) x y
1485 MO_Nat_Sub -> trivialCode (SUB False False) x y
1487 MO_NatS_Mul -> trivialCode (SMUL False) x y
1488 MO_NatU_Mul -> trivialCode (UMUL False) x y
1489 MO_NatS_MulMayOflo -> imulMayOflo x y
1491 -- ToDo: teach about V8+ SPARC div instructions
1492 MO_NatS_Quot -> idiv FSLIT(".div") x y
1493 MO_NatS_Rem -> idiv FSLIT(".rem") x y
1494 MO_NatU_Quot -> idiv FSLIT(".udiv") x y
1495 MO_NatU_Rem -> idiv FSLIT(".urem") x y
1497 MO_F32_Add -> trivialFCode F32 FADD x y
1498 MO_F32_Sub -> trivialFCode F32 FSUB x y
1499 MO_F32_Mul -> trivialFCode F32 FMUL x y
1500 MO_F32_Div -> trivialFCode F32 FDIV x y
1502 MO_F64_Add -> trivialFCode F64 FADD x y
1503 MO_F64_Sub -> trivialFCode F64 FSUB x y
1504 MO_F64_Mul -> trivialFCode F64 FMUL x y
1505 MO_F64_Div -> trivialFCode F64 FDIV x y
1507 MO_Nat_And -> trivialCode (AND False) x y
1508 MO_Nat_Or -> trivialCode (OR False) x y
1509 MO_Nat_Xor -> trivialCode (XOR False) x y
1511 MO_Nat_Shl -> trivialCode SLL x y
1512 MO_Nat_Shr -> trivialCode SRL x y
1513 MO_Nat_Sar -> trivialCode SRA x y
1515 MO_F32_Pwr -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1516 [promote x, promote y])
1517 where promote x = CmmMachOp MO_F32_to_Dbl [x]
1518 MO_F64_Pwr -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1521 other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
1523 idiv fn x y = getRegister (StCall (Left fn) CCallConv IntRep [x, y])
1525 --------------------
1526 imulMayOflo :: CmmExpr -> CmmExpr -> NatM Register
1528 = getNewRegNat IntRep `thenNat` \ t1 ->
1529 getNewRegNat IntRep `thenNat` \ t2 ->
1530 getNewRegNat IntRep `thenNat` \ res_lo ->
1531 getNewRegNat IntRep `thenNat` \ res_hi ->
1532 getRegister a1 `thenNat` \ reg1 ->
1533 getRegister a2 `thenNat` \ reg2 ->
1534 let code1 = registerCode reg1 t1
1535 code2 = registerCode reg2 t2
1536 src1 = registerName reg1 t1
1537 src2 = registerName reg2 t2
1538 code dst = code1 `appOL` code2 `appOL`
1540 SMUL False src1 (RIReg src2) res_lo,
1542 SRA res_lo (RIImm (ImmInt 31)) res_lo,
1543 SUB False False res_lo (RIReg res_hi) dst
1546 return (Any IntRep code)
1548 getRegister (CmmLoad pk mem) = do
1549 Amode src code <- getAmode mem
1551 size = primRepToSize pk
1552 code__2 dst = code `snocOL` LD size src dst
1554 return (Any pk code__2)
1556 getRegister (StInt i)
1559 src = ImmInt (fromInteger i)
1560 code dst = unitOL (OR False g0 (RIImm src) dst)
1562 return (Any IntRep code)
1568 SETHI (HI imm__2) dst,
1569 OR False dst (RIImm (LO imm__2)) dst]
1571 return (Any PtrRep code)
1573 = ncgPrimopMoan "getRegister(sparc)" (pprCmmExpr leaf)
1576 imm__2 = case imm of Just x -> x
1578 #endif /* sparc_TARGET_ARCH */
1580 #if powerpc_TARGET_ARCH
1581 getRegister (CmmLoad mem pk)
1584 Amode addr addr_code <- getAmode mem
1585 let code dst = ASSERT((regClass dst == RcDouble) == isFloatingRep pk)
1586 addr_code `snocOL` LD pk dst addr
1587 return (Any pk code)
1589 -- catch simple cases of zero- or sign-extended load
1590 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad mem _]) = do
1591 Amode addr addr_code <- getAmode mem
1592 return (Any I32 (\dst -> addr_code `snocOL` LD I8 dst addr))
1594 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
1596 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad mem _]) = do
1597 Amode addr addr_code <- getAmode mem
1598 return (Any I32 (\dst -> addr_code `snocOL` LD I16 dst addr))
1600 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad mem _]) = do
1601 Amode addr addr_code <- getAmode mem
1602 return (Any I32 (\dst -> addr_code `snocOL` LA I16 dst addr))
1604 getRegister (CmmMachOp mop [x]) -- unary MachOps
1606 MO_Not rep -> trivialUCode rep NOT x
1608 MO_S_Conv F64 F32 -> trivialUCode F32 FRSP x
1609 MO_S_Conv F32 F64 -> conversionNop F64 x
1612 | from == to -> conversionNop to x
1613 | isFloatingRep from -> coerceFP2Int from to x
1614 | isFloatingRep to -> coerceInt2FP from to x
1616 -- narrowing is a nop: we treat the high bits as undefined
1617 MO_S_Conv I32 to -> conversionNop to x
1618 MO_S_Conv I16 I8 -> conversionNop I8 x
1619 MO_S_Conv I8 to -> trivialUCode to (EXTS I8) x
1620 MO_S_Conv I16 to -> trivialUCode to (EXTS I16) x
1623 | from == to -> conversionNop to x
1624 -- narrowing is a nop: we treat the high bits as undefined
1625 MO_U_Conv I32 to -> conversionNop to x
1626 MO_U_Conv I16 I8 -> conversionNop I8 x
1627 MO_U_Conv I8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 I32))
1628 MO_U_Conv I16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 I32))
1630 MO_S_Neg F32 -> trivialUCode F32 FNEG x
1631 MO_S_Neg F64 -> trivialUCode F64 FNEG x
1632 MO_S_Neg rep -> trivialUCode rep NEG x
1635 conversionNop new_rep expr
1636 = do e_code <- getRegister expr
1637 return (swizzleRegisterRep e_code new_rep)
1639 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1641 MO_Eq F32 -> condFltReg EQQ x y
1642 MO_Ne F32 -> condFltReg NE x y
1644 MO_S_Gt F32 -> condFltReg GTT x y
1645 MO_S_Ge F32 -> condFltReg GE x y
1646 MO_S_Lt F32 -> condFltReg LTT x y
1647 MO_S_Le F32 -> condFltReg LE x y
1649 MO_Eq F64 -> condFltReg EQQ x y
1650 MO_Ne F64 -> condFltReg NE x y
1652 MO_S_Gt F64 -> condFltReg GTT x y
1653 MO_S_Ge F64 -> condFltReg GE x y
1654 MO_S_Lt F64 -> condFltReg LTT x y
1655 MO_S_Le F64 -> condFltReg LE x y
1657 MO_Eq rep -> condIntReg EQQ (extendUExpr rep x) (extendUExpr rep y)
1658 MO_Ne rep -> condIntReg NE (extendUExpr rep x) (extendUExpr rep y)
1660 MO_S_Gt rep -> condIntReg GTT (extendSExpr rep x) (extendSExpr rep y)
1661 MO_S_Ge rep -> condIntReg GE (extendSExpr rep x) (extendSExpr rep y)
1662 MO_S_Lt rep -> condIntReg LTT (extendSExpr rep x) (extendSExpr rep y)
1663 MO_S_Le rep -> condIntReg LE (extendSExpr rep x) (extendSExpr rep y)
1665 MO_U_Gt rep -> condIntReg GU (extendUExpr rep x) (extendUExpr rep y)
1666 MO_U_Ge rep -> condIntReg GEU (extendUExpr rep x) (extendUExpr rep y)
1667 MO_U_Lt rep -> condIntReg LU (extendUExpr rep x) (extendUExpr rep y)
1668 MO_U_Le rep -> condIntReg LEU (extendUExpr rep x) (extendUExpr rep y)
1670 MO_Add F32 -> trivialCodeNoImm F32 (FADD F32) x y
1671 MO_Sub F32 -> trivialCodeNoImm F32 (FSUB F32) x y
1672 MO_Mul F32 -> trivialCodeNoImm F32 (FMUL F32) x y
1673 MO_S_Quot F32 -> trivialCodeNoImm F32 (FDIV F32) x y
1675 MO_Add F64 -> trivialCodeNoImm F64 (FADD F64) x y
1676 MO_Sub F64 -> trivialCodeNoImm F64 (FSUB F64) x y
1677 MO_Mul F64 -> trivialCodeNoImm F64 (FMUL F64) x y
1678 MO_S_Quot F64 -> trivialCodeNoImm F64 (FDIV F64) x y
1680 -- optimize addition with 32-bit immediate
1684 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate I32 True (-imm)
1685 -> trivialCode I32 True ADD x (CmmLit $ CmmInt imm immrep)
1688 (src, srcCode) <- getSomeReg x
1689 let imm = litToImm lit
1690 code dst = srcCode `appOL` toOL [
1691 ADDIS dst src (HA imm),
1692 ADD dst dst (RIImm (LO imm))
1694 return (Any I32 code)
1695 _ -> trivialCode I32 True ADD x y
1697 MO_Add rep -> trivialCode rep True ADD x y
1699 case y of -- subfi ('substract from' with immediate) doesn't exist
1700 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
1701 -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
1702 _ -> trivialCodeNoImm rep SUBF y x
1704 MO_Mul rep -> trivialCode rep True MULLW x y
1706 MO_S_MulMayOflo I32 -> trivialCodeNoImm I32 MULLW_MayOflo x y
1708 MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= I32): not implemented"
1709 MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
1711 MO_S_Quot rep -> trivialCodeNoImm rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1712 MO_U_Quot rep -> trivialCodeNoImm rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1714 MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1715 MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1717 MO_And rep -> trivialCode rep False AND x y
1718 MO_Or rep -> trivialCode rep False OR x y
1719 MO_Xor rep -> trivialCode rep False XOR x y
1721 MO_Shl rep -> trivialCode rep False SLW x y
1722 MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
1723 MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
1725 getRegister (CmmLit (CmmInt i rep))
1726 | Just imm <- makeImmediate rep True i
1728 code dst = unitOL (LI dst imm)
1730 return (Any rep code)
1732 getRegister (CmmLit (CmmFloat f frep)) = do
1733 lbl <- getNewLabelNat
1734 dynRef <- cmmMakeDynamicReference addImportNat False lbl
1735 Amode addr addr_code <- getAmode dynRef
1737 LDATA ReadOnlyData [CmmDataLabel lbl,
1738 CmmStaticLit (CmmFloat f frep)]
1739 `consOL` (addr_code `snocOL` LD frep dst addr)
1740 return (Any frep code)
1742 getRegister (CmmLit lit)
1743 = let rep = cmmLitRep lit
1747 OR dst dst (RIImm (LO imm))
1749 in return (Any rep code)
1751 getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
1753 -- extend?Rep: wrap integer expression of type rep
1754 -- in a conversion to I32
1755 extendSExpr I32 x = x
1756 extendSExpr rep x = CmmMachOp (MO_S_Conv rep I32) [x]
1757 extendUExpr I32 x = x
1758 extendUExpr rep x = CmmMachOp (MO_U_Conv rep I32) [x]
1760 #endif /* powerpc_TARGET_ARCH */
1763 -- -----------------------------------------------------------------------------
1764 -- The 'Amode' type: Memory addressing modes passed up the tree.
1766 data Amode = Amode AddrMode InstrBlock
1769 Now, given a tree (the argument to an CmmLoad) that references memory,
1770 produce a suitable addressing mode.
1772 A Rule of the Game (tm) for Amodes: use of the addr bit must
1773 immediately follow use of the code part, since the code part puts
1774 values in registers which the addr then refers to. So you can't put
1775 anything in between, lest it overwrite some of those registers. If
1776 you need to do some other computation between the code part and use of
1777 the addr bit, first store the effective address from the amode in a
1778 temporary, then do the other computation, and then use the temporary:
1782 ... other computation ...
1786 getAmode :: CmmExpr -> NatM Amode
1787 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
1789 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1791 #if alpha_TARGET_ARCH
1793 getAmode (StPrim IntSubOp [x, StInt i])
1794 = getNewRegNat PtrRep `thenNat` \ tmp ->
1795 getRegister x `thenNat` \ register ->
1797 code = registerCode register tmp
1798 reg = registerName register tmp
1799 off = ImmInt (-(fromInteger i))
1801 return (Amode (AddrRegImm reg off) code)
1803 getAmode (StPrim IntAddOp [x, StInt i])
1804 = getNewRegNat PtrRep `thenNat` \ tmp ->
1805 getRegister x `thenNat` \ register ->
1807 code = registerCode register tmp
1808 reg = registerName register tmp
1809 off = ImmInt (fromInteger i)
1811 return (Amode (AddrRegImm reg off) code)
1815 = return (Amode (AddrImm imm__2) id)
1818 imm__2 = case imm of Just x -> x
1821 = getNewRegNat PtrRep `thenNat` \ tmp ->
1822 getRegister other `thenNat` \ register ->
1824 code = registerCode register tmp
1825 reg = registerName register tmp
1827 return (Amode (AddrReg reg) code)
1829 #endif /* alpha_TARGET_ARCH */
1831 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1833 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1835 -- This is all just ridiculous, since it carefully undoes
1836 -- what mangleIndexTree has just done.
1837 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
1838 | not (is64BitLit lit)
1839 -- ASSERT(rep == I32)???
1840 = do (x_reg, x_code) <- getSomeReg x
1841 let off = ImmInt (-(fromInteger i))
1842 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1844 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
1845 | not (is64BitLit lit)
1846 -- ASSERT(rep == I32)???
1847 = do (x_reg, x_code) <- getSomeReg x
1848 let off = ImmInt (fromInteger i)
1849 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1851 -- Turn (lit1 << n + lit2) into (lit2 + lit1 << n) so it will be
1852 -- recognised by the next rule.
1853 getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1855 = getAmode (CmmMachOp (MO_Add rep) [b,a])
1857 getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
1858 [y, CmmLit (CmmInt shift _)]])
1859 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1860 = do (x_reg, x_code) <- getNonClobberedReg x
1861 -- x must be in a temp, because it has to stay live over y_code
1862 -- we could compre x_reg and y_reg and do something better here...
1863 (y_reg, y_code) <- getSomeReg y
1865 code = x_code `appOL` y_code
1866 base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
1867 return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt 0))
1870 getAmode (CmmLit lit) | not (is64BitLit lit)
1871 = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1874 (reg,code) <- getSomeReg expr
1875 return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
1877 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1879 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1881 #if sparc_TARGET_ARCH
1883 getAmode (CmmMachOp MO_Nat_Sub [x, StInt i])
1885 = getNewRegNat PtrRep `thenNat` \ tmp ->
1886 getRegister x `thenNat` \ register ->
1888 code = registerCode register tmp
1889 reg = registerName register tmp
1890 off = ImmInt (-(fromInteger i))
1892 return (Amode (AddrRegImm reg off) code)
1895 getAmode (CmmMachOp MO_Nat_Add [x, StInt i])
1897 = getNewRegNat PtrRep `thenNat` \ tmp ->
1898 getRegister x `thenNat` \ register ->
1900 code = registerCode register tmp
1901 reg = registerName register tmp
1902 off = ImmInt (fromInteger i)
1904 return (Amode (AddrRegImm reg off) code)
1906 getAmode (CmmMachOp MO_Nat_Add [x, y])
1907 = getNewRegNat PtrRep `thenNat` \ tmp1 ->
1908 getNewRegNat IntRep `thenNat` \ tmp2 ->
1909 getRegister x `thenNat` \ register1 ->
1910 getRegister y `thenNat` \ register2 ->
1912 code1 = registerCode register1 tmp1
1913 reg1 = registerName register1 tmp1
1914 code2 = registerCode register2 tmp2
1915 reg2 = registerName register2 tmp2
1916 code__2 = code1 `appOL` code2
1918 return (Amode (AddrRegReg reg1 reg2) code__2)
1922 = getNewRegNat PtrRep `thenNat` \ tmp ->
1924 code = unitOL (SETHI (HI imm__2) tmp)
1926 return (Amode (AddrRegImm tmp (LO imm__2)) code)
1929 imm__2 = case imm of Just x -> x
1932 = getNewRegNat PtrRep `thenNat` \ tmp ->
1933 getRegister other `thenNat` \ register ->
1935 code = registerCode register tmp
1936 reg = registerName register tmp
1939 return (Amode (AddrRegImm reg off) code)
1941 #endif /* sparc_TARGET_ARCH */
1943 #ifdef powerpc_TARGET_ARCH
1944 getAmode (CmmMachOp (MO_Sub I32) [x, CmmLit (CmmInt i _)])
1945 | Just off <- makeImmediate I32 True (-i)
1947 (reg, code) <- getSomeReg x
1948 return (Amode (AddrRegImm reg off) code)
1951 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit (CmmInt i _)])
1952 | Just off <- makeImmediate I32 True i
1954 (reg, code) <- getSomeReg x
1955 return (Amode (AddrRegImm reg off) code)
1957 -- optimize addition with 32-bit immediate
1959 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit lit])
1961 tmp <- getNewRegNat I32
1962 (src, srcCode) <- getSomeReg x
1963 let imm = litToImm lit
1964 code = srcCode `snocOL` ADDIS tmp src (HA imm)
1965 return (Amode (AddrRegImm tmp (LO imm)) code)
1967 getAmode (CmmLit lit)
1969 tmp <- getNewRegNat I32
1970 let imm = litToImm lit
1971 code = unitOL (LIS tmp (HA imm))
1972 return (Amode (AddrRegImm tmp (LO imm)) code)
1974 getAmode (CmmMachOp (MO_Add I32) [x, y])
1976 (regX, codeX) <- getSomeReg x
1977 (regY, codeY) <- getSomeReg y
1978 return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
1982 (reg, code) <- getSomeReg other
1985 return (Amode (AddrRegImm reg off) code)
1986 #endif /* powerpc_TARGET_ARCH */
1988 -- -----------------------------------------------------------------------------
1989 -- getOperand: sometimes any operand will do.
1991 -- getNonClobberedOperand: the value of the operand will remain valid across
1992 -- the computation of an arbitrary expression, unless the expression
1993 -- is computed directly into a register which the operand refers to
1994 -- (see trivialCode where this function is used for an example).
1996 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1998 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1999 #if x86_64_TARGET_ARCH
2000 getNonClobberedOperand (CmmLit lit)
2001 | isSuitableFloatingPointLit lit = do
2002 lbl <- getNewLabelNat
2003 let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
2005 return (OpAddr (ripRel (ImmCLbl lbl)), code)
2007 getNonClobberedOperand (CmmLit lit)
2008 | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) =
2009 return (OpImm (litToImm lit), nilOL)
2010 getNonClobberedOperand (CmmLoad mem pk)
2011 | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2012 Amode src mem_code <- getAmode mem
2014 if (amodeCouldBeClobbered src)
2016 tmp <- getNewRegNat wordRep
2017 return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
2018 unitOL (LEA I32 (OpAddr src) (OpReg tmp)))
2021 return (OpAddr src', save_code `appOL` mem_code)
2022 getNonClobberedOperand e = do
2023 (reg, code) <- getNonClobberedReg e
2024 return (OpReg reg, code)
2026 amodeCouldBeClobbered :: AddrMode -> Bool
2027 amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
2029 regClobbered (RealReg rr) = isFastTrue (freeReg rr)
2030 regClobbered _ = False
2032 -- getOperand: the operand is not required to remain valid across the
2033 -- computation of an arbitrary expression.
2034 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2035 #if x86_64_TARGET_ARCH
2036 getOperand (CmmLit lit)
2037 | isSuitableFloatingPointLit lit = do
2038 lbl <- getNewLabelNat
2039 let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
2041 return (OpAddr (ripRel (ImmCLbl lbl)), code)
2043 getOperand (CmmLit lit)
2044 | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) = do
2045 return (OpImm (litToImm lit), nilOL)
2046 getOperand (CmmLoad mem pk)
2047 | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2048 Amode src mem_code <- getAmode mem
2049 return (OpAddr src, mem_code)
2051 (reg, code) <- getSomeReg e
2052 return (OpReg reg, code)
2054 isOperand :: CmmExpr -> Bool
2055 isOperand (CmmLoad _ _) = True
2056 isOperand (CmmLit lit) = not (is64BitLit lit)
2057 || isSuitableFloatingPointLit lit
2060 -- if we want a floating-point literal as an operand, we can
2061 -- use it directly from memory. However, if the literal is
2062 -- zero, we're better off generating it into a register using
2064 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
2065 isSuitableFloatingPointLit _ = False
2067 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
2068 getRegOrMem (CmmLoad mem pk)
2069 | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2070 Amode src mem_code <- getAmode mem
2071 return (OpAddr src, mem_code)
2073 (reg, code) <- getNonClobberedReg e
2074 return (OpReg reg, code)
2076 #if x86_64_TARGET_ARCH
2077 is64BitLit (CmmInt i I64) = i > 0x7fffffff || i < -0x80000000
2078 -- assume that labels are in the range 0-2^31-1: this assumes the
2079 -- small memory model (see gcc docs, -mcmodel=small).
2081 is64BitLit x = False
2084 -- -----------------------------------------------------------------------------
2085 -- The 'CondCode' type: Condition codes passed up the tree.
2087 data CondCode = CondCode Bool Cond InstrBlock
2089 -- Set up a condition code for a conditional branch.
2091 getCondCode :: CmmExpr -> NatM CondCode
2093 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2095 #if alpha_TARGET_ARCH
2096 getCondCode = panic "MachCode.getCondCode: not on Alphas"
2097 #endif /* alpha_TARGET_ARCH */
2099 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2101 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
2102 -- yes, they really do seem to want exactly the same!
2104 getCondCode (CmmMachOp mop [x, y])
2105 = ASSERT (cmmExprRep x /= I8) -- tmp, not set up to handle 8-bit comparisons
2107 MO_Eq F32 -> condFltCode EQQ x y
2108 MO_Ne F32 -> condFltCode NE x y
2110 MO_S_Gt F32 -> condFltCode GTT x y
2111 MO_S_Ge F32 -> condFltCode GE x y
2112 MO_S_Lt F32 -> condFltCode LTT x y
2113 MO_S_Le F32 -> condFltCode LE x y
2115 MO_Eq F64 -> condFltCode EQQ x y
2116 MO_Ne F64 -> condFltCode NE x y
2118 MO_S_Gt F64 -> condFltCode GTT x y
2119 MO_S_Ge F64 -> condFltCode GE x y
2120 MO_S_Lt F64 -> condFltCode LTT x y
2121 MO_S_Le F64 -> condFltCode LE x y
2123 MO_Eq rep -> condIntCode EQQ x y
2124 MO_Ne rep -> condIntCode NE x y
2126 MO_S_Gt rep -> condIntCode GTT x y
2127 MO_S_Ge rep -> condIntCode GE x y
2128 MO_S_Lt rep -> condIntCode LTT x y
2129 MO_S_Le rep -> condIntCode LE x y
2131 MO_U_Gt rep -> condIntCode GU x y
2132 MO_U_Ge rep -> condIntCode GEU x y
2133 MO_U_Lt rep -> condIntCode LU x y
2134 MO_U_Le rep -> condIntCode LEU x y
2136 other -> pprPanic "getCondCode(x86,sparc)" (pprMachOp mop)
2138 getCondCode other = pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
2140 #elif powerpc_TARGET_ARCH
2142 -- almost the same as everywhere else - but we need to
2143 -- extend small integers to 32 bit first
2145 getCondCode (CmmMachOp mop [x, y])
2147 MO_Eq F32 -> condFltCode EQQ x y
2148 MO_Ne F32 -> condFltCode NE x y
2150 MO_S_Gt F32 -> condFltCode GTT x y
2151 MO_S_Ge F32 -> condFltCode GE x y
2152 MO_S_Lt F32 -> condFltCode LTT x y
2153 MO_S_Le F32 -> condFltCode LE x y
2155 MO_Eq F64 -> condFltCode EQQ x y
2156 MO_Ne F64 -> condFltCode NE x y
2158 MO_S_Gt F64 -> condFltCode GTT x y
2159 MO_S_Ge F64 -> condFltCode GE x y
2160 MO_S_Lt F64 -> condFltCode LTT x y
2161 MO_S_Le F64 -> condFltCode LE x y
2163 MO_Eq rep -> condIntCode EQQ (extendUExpr rep x) (extendUExpr rep y)
2164 MO_Ne rep -> condIntCode NE (extendUExpr rep x) (extendUExpr rep y)
2166 MO_S_Gt rep -> condIntCode GTT (extendSExpr rep x) (extendSExpr rep y)
2167 MO_S_Ge rep -> condIntCode GE (extendSExpr rep x) (extendSExpr rep y)
2168 MO_S_Lt rep -> condIntCode LTT (extendSExpr rep x) (extendSExpr rep y)
2169 MO_S_Le rep -> condIntCode LE (extendSExpr rep x) (extendSExpr rep y)
2171 MO_U_Gt rep -> condIntCode GU (extendUExpr rep x) (extendUExpr rep y)
2172 MO_U_Ge rep -> condIntCode GEU (extendUExpr rep x) (extendUExpr rep y)
2173 MO_U_Lt rep -> condIntCode LU (extendUExpr rep x) (extendUExpr rep y)
2174 MO_U_Le rep -> condIntCode LEU (extendUExpr rep x) (extendUExpr rep y)
2176 other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
2178 getCondCode other = panic "getCondCode(2)(powerpc)"
2184 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
2185 -- passed back up the tree.
2187 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
2189 #if alpha_TARGET_ARCH
2190 condIntCode = panic "MachCode.condIntCode: not on Alphas"
2191 condFltCode = panic "MachCode.condFltCode: not on Alphas"
2192 #endif /* alpha_TARGET_ARCH */
2194 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2195 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2197 -- memory vs immediate
2198 condIntCode cond (CmmLoad x pk) (CmmLit lit) | not (is64BitLit lit) = do
2199 Amode x_addr x_code <- getAmode x
2202 code = x_code `snocOL`
2203 CMP pk (OpImm imm) (OpAddr x_addr)
2205 return (CondCode False cond code)
2208 condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
2209 (x_reg, x_code) <- getSomeReg x
2211 code = x_code `snocOL`
2212 TEST pk (OpReg x_reg) (OpReg x_reg)
2214 return (CondCode False cond code)
2216 -- anything vs operand
2217 condIntCode cond x y | isOperand y = do
2218 (x_reg, x_code) <- getNonClobberedReg x
2219 (y_op, y_code) <- getOperand y
2221 code = x_code `appOL` y_code `snocOL`
2222 CMP (cmmExprRep x) y_op (OpReg x_reg)
2224 return (CondCode False cond code)
2226 -- anything vs anything
2227 condIntCode cond x y = do
2228 (y_reg, y_code) <- getNonClobberedReg y
2229 (x_op, x_code) <- getRegOrMem x
2231 code = y_code `appOL`
2233 CMP (cmmExprRep x) (OpReg y_reg) x_op
2235 return (CondCode False cond code)
2238 #if i386_TARGET_ARCH
2239 condFltCode cond x y
2240 = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
2241 (x_reg, x_code) <- getNonClobberedReg x
2242 (y_reg, y_code) <- getSomeReg y
2244 code = x_code `appOL` y_code `snocOL`
2245 GCMP cond x_reg y_reg
2246 -- The GCMP insn does the test and sets the zero flag if comparable
2247 -- and true. Hence we always supply EQQ as the condition to test.
2248 return (CondCode True EQQ code)
2249 #endif /* i386_TARGET_ARCH */
2251 #if x86_64_TARGET_ARCH
2252 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
2253 -- an operand, but the right must be a reg. We can probably do better
2254 -- than this general case...
2255 condFltCode cond x y = do
2256 (x_reg, x_code) <- getNonClobberedReg x
2257 (y_op, y_code) <- getOperand y
2259 code = x_code `appOL`
2261 CMP (cmmExprRep x) y_op (OpReg x_reg)
2262 -- NB(1): we need to use the unsigned comparison operators on the
2263 -- result of this comparison.
2265 return (CondCode True (condToUnsigned cond) code)
2268 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2270 #if sparc_TARGET_ARCH
2272 condIntCode cond x (StInt y)
2274 = getRegister x `thenNat` \ register ->
2275 getNewRegNat IntRep `thenNat` \ tmp ->
2277 code = registerCode register tmp
2278 src1 = registerName register tmp
2279 src2 = ImmInt (fromInteger y)
2280 code__2 = code `snocOL` SUB False True src1 (RIImm src2) g0
2282 return (CondCode False cond code__2)
2284 condIntCode cond x y
2285 = getRegister x `thenNat` \ register1 ->
2286 getRegister y `thenNat` \ register2 ->
2287 getNewRegNat IntRep `thenNat` \ tmp1 ->
2288 getNewRegNat IntRep `thenNat` \ tmp2 ->
2290 code1 = registerCode register1 tmp1
2291 src1 = registerName register1 tmp1
2292 code2 = registerCode register2 tmp2
2293 src2 = registerName register2 tmp2
2294 code__2 = code1 `appOL` code2 `snocOL`
2295 SUB False True src1 (RIReg src2) g0
2297 return (CondCode False cond code__2)
2300 condFltCode cond x y
2301 = getRegister x `thenNat` \ register1 ->
2302 getRegister y `thenNat` \ register2 ->
2303 getNewRegNat (registerRep register1)
2305 getNewRegNat (registerRep register2)
2307 getNewRegNat F64 `thenNat` \ tmp ->
2309 promote x = FxTOy F DF x tmp
2311 pk1 = registerRep register1
2312 code1 = registerCode register1 tmp1
2313 src1 = registerName register1 tmp1
2315 pk2 = registerRep register2
2316 code2 = registerCode register2 tmp2
2317 src2 = registerName register2 tmp2
2321 code1 `appOL` code2 `snocOL`
2322 FCMP True (primRepToSize pk1) src1 src2
2323 else if pk1 == F32 then
2324 code1 `snocOL` promote src1 `appOL` code2 `snocOL`
2325 FCMP True DF tmp src2
2327 code1 `appOL` code2 `snocOL` promote src2 `snocOL`
2328 FCMP True DF src1 tmp
2330 return (CondCode True cond code__2)
2332 #endif /* sparc_TARGET_ARCH */
2334 #if powerpc_TARGET_ARCH
2335 -- ###FIXME: I16 and I8!
2336 condIntCode cond x (CmmLit (CmmInt y rep))
2337 | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
2339 (src1, code) <- getSomeReg x
2341 code' = code `snocOL`
2342 (if condUnsigned cond then CMPL else CMP) I32 src1 (RIImm src2)
2343 return (CondCode False cond code')
2345 condIntCode cond x y = do
2346 (src1, code1) <- getSomeReg x
2347 (src2, code2) <- getSomeReg y
2349 code' = code1 `appOL` code2 `snocOL`
2350 (if condUnsigned cond then CMPL else CMP) I32 src1 (RIReg src2)
2351 return (CondCode False cond code')
2353 condFltCode cond x y = do
2354 (src1, code1) <- getSomeReg x
2355 (src2, code2) <- getSomeReg y
2357 code' = code1 `appOL` code2 `snocOL` FCMP src1 src2
2358 code'' = case cond of -- twiddle CR to handle unordered case
2359 GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
2360 LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
2363 ltbit = 0 ; eqbit = 2 ; gtbit = 1
2364 return (CondCode True cond code'')
2366 #endif /* powerpc_TARGET_ARCH */
2368 -- -----------------------------------------------------------------------------
2369 -- Generating assignments
2371 -- Assignments are really at the heart of the whole code generation
2372 -- business. Almost all top-level nodes of any real importance are
2373 -- assignments, which correspond to loads, stores, or register
2374 -- transfers. If we're really lucky, some of the register transfers
2375 -- will go away, because we can use the destination register to
2376 -- complete the code generation for the right hand side. This only
2377 -- fails when the right hand side is forced into a fixed register
2378 -- (e.g. the result of a call).
2380 assignMem_IntCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2381 assignReg_IntCode :: MachRep -> CmmReg -> CmmExpr -> NatM InstrBlock
2383 assignMem_FltCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2384 assignReg_FltCode :: MachRep -> CmmReg -> CmmExpr -> NatM InstrBlock
2386 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2388 #if alpha_TARGET_ARCH
2390 assignIntCode pk (CmmLoad dst _) src
2391 = getNewRegNat IntRep `thenNat` \ tmp ->
2392 getAmode dst `thenNat` \ amode ->
2393 getRegister src `thenNat` \ register ->
2395 code1 = amodeCode amode []
2396 dst__2 = amodeAddr amode
2397 code2 = registerCode register tmp []
2398 src__2 = registerName register tmp
2399 sz = primRepToSize pk
2400 code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2404 assignIntCode pk dst src
2405 = getRegister dst `thenNat` \ register1 ->
2406 getRegister src `thenNat` \ register2 ->
2408 dst__2 = registerName register1 zeroh
2409 code = registerCode register2 dst__2
2410 src__2 = registerName register2 dst__2
2411 code__2 = if isFixed register2
2412 then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2)
2417 #endif /* alpha_TARGET_ARCH */
2419 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2421 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2423 -- integer assignment to memory
2424 assignMem_IntCode pk addr src = do
2425 Amode addr code_addr <- getAmode addr
2426 (code_src, op_src) <- get_op_RI src
2428 code = code_src `appOL`
2430 MOV pk op_src (OpAddr addr)
2431 -- NOTE: op_src is stable, so it will still be valid
2432 -- after code_addr. This may involve the introduction
2433 -- of an extra MOV to a temporary register, but we hope
2434 -- the register allocator will get rid of it.
2438 get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand) -- code, operator
2439 get_op_RI (CmmLit lit) | not (is64BitLit lit)
2440 = return (nilOL, OpImm (litToImm lit))
2442 = do (reg,code) <- getNonClobberedReg op
2443 return (code, OpReg reg)
2446 -- Assign; dst is a reg, rhs is mem
2447 assignReg_IntCode pk reg (CmmLoad src _) = do
2448 load_code <- intLoadCode (MOV pk) src
2449 return (load_code (getRegisterReg reg))
2451 -- dst is a reg, but src could be anything
2452 assignReg_IntCode pk reg src = do
2453 code <- getAnyReg src
2454 return (code (getRegisterReg reg))
2456 #endif /* i386_TARGET_ARCH */
2458 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2460 #if sparc_TARGET_ARCH
2462 assignMem_IntCode pk addr src
2463 = getNewRegNat IntRep `thenNat` \ tmp ->
2464 getAmode addr `thenNat` \ amode ->
2465 getRegister src `thenNat` \ register ->
2467 code1 = amodeCode amode
2468 dst__2 = amodeAddr amode
2469 code2 = registerCode register tmp
2470 src__2 = registerName register tmp
2471 sz = primRepToSize pk
2472 code__2 = code1 `appOL` code2 `snocOL` ST sz src__2 dst__2
2476 assignReg_IntCode pk reg src
2477 = getRegister src `thenNat` \ register2 ->
2478 getRegisterReg reg `thenNat` \ register1 ->
2479 getNewRegNat IntRep `thenNat` \ tmp ->
2481 dst__2 = registerName register1 tmp
2482 code = registerCode register2 dst__2
2483 src__2 = registerName register2 dst__2
2484 code__2 = if isFixed register2
2485 then code `snocOL` OR False g0 (RIReg src__2) dst__2
2490 #endif /* sparc_TARGET_ARCH */
2492 #if powerpc_TARGET_ARCH
2494 assignMem_IntCode pk addr src = do
2495 (srcReg, code) <- getSomeReg src
2496 Amode dstAddr addr_code <- getAmode addr
2497 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2499 -- dst is a reg, but src could be anything
2500 assignReg_IntCode pk reg src
2502 r <- getRegister src
2504 Any _ code -> code dst
2505 Fixed _ freg fcode -> fcode `snocOL` MR dst freg
2507 dst = getRegisterReg reg
2509 #endif /* powerpc_TARGET_ARCH */
2512 -- -----------------------------------------------------------------------------
2513 -- Floating-point assignments
2515 #if alpha_TARGET_ARCH
2517 assignFltCode pk (CmmLoad dst _) src
2518 = getNewRegNat pk `thenNat` \ tmp ->
2519 getAmode dst `thenNat` \ amode ->
2520 getRegister src `thenNat` \ register ->
2522 code1 = amodeCode amode []
2523 dst__2 = amodeAddr amode
2524 code2 = registerCode register tmp []
2525 src__2 = registerName register tmp
2526 sz = primRepToSize pk
2527 code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2531 assignFltCode pk dst src
2532 = getRegister dst `thenNat` \ register1 ->
2533 getRegister src `thenNat` \ register2 ->
2535 dst__2 = registerName register1 zeroh
2536 code = registerCode register2 dst__2
2537 src__2 = registerName register2 dst__2
2538 code__2 = if isFixed register2
2539 then code . mkSeqInstr (FMOV src__2 dst__2)
2544 #endif /* alpha_TARGET_ARCH */
2546 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2548 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2550 -- Floating point assignment to memory
2551 assignMem_FltCode pk addr src = do
2552 (src_reg, src_code) <- getNonClobberedReg src
2553 Amode addr addr_code <- getAmode addr
2555 code = src_code `appOL`
2557 IF_ARCH_i386(GST pk src_reg addr,
2558 MOV pk (OpReg src_reg) (OpAddr addr))
2561 -- Floating point assignment to a register/temporary
2562 assignReg_FltCode pk reg src = do
2563 src_code <- getAnyReg src
2564 return (src_code (getRegisterReg reg))
2566 #endif /* i386_TARGET_ARCH */
2568 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2570 #if sparc_TARGET_ARCH
2572 -- Floating point assignment to memory
2573 assignMem_FltCode pk addr src
2574 = getNewRegNat pk `thenNat` \ tmp1 ->
2575 getAmode addr `thenNat` \ amode ->
2576 getRegister src `thenNat` \ register ->
2578 sz = primRepToSize pk
2579 dst__2 = amodeAddr amode
2581 code1 = amodeCode amode
2582 code2 = registerCode register tmp1
2584 src__2 = registerName register tmp1
2585 pk__2 = registerRep register
2586 sz__2 = primRepToSize pk__2
2588 code__2 = code1 `appOL` code2 `appOL`
2590 then unitOL (ST sz src__2 dst__2)
2591 else toOL [FxTOy sz__2 sz src__2 tmp1, ST sz tmp1 dst__2]
2595 -- Floating point assignment to a register/temporary
2596 -- Why is this so bizarrely ugly?
2597 assignReg_FltCode pk reg src
2598 = getRegisterReg reg `thenNat` \ register1 ->
2599 getRegister src `thenNat` \ register2 ->
2601 pk__2 = registerRep register2
2602 sz__2 = primRepToSize pk__2
2604 getNewRegNat pk__2 `thenNat` \ tmp ->
2606 sz = primRepToSize pk
2607 dst__2 = registerName register1 g0 -- must be Fixed
2608 reg__2 = if pk /= pk__2 then tmp else dst__2
2609 code = registerCode register2 reg__2
2610 src__2 = registerName register2 reg__2
2613 code `snocOL` FxTOy sz__2 sz src__2 dst__2
2614 else if isFixed register2 then
2615 code `snocOL` FMOV sz src__2 dst__2
2621 #endif /* sparc_TARGET_ARCH */
2623 #if powerpc_TARGET_ARCH
2626 assignMem_FltCode = assignMem_IntCode
2627 assignReg_FltCode = assignReg_IntCode
2629 #endif /* powerpc_TARGET_ARCH */
2632 -- -----------------------------------------------------------------------------
2633 -- Generating an non-local jump
2635 -- (If applicable) Do not fill the delay slots here; you will confuse the
2636 -- register allocator.
2638 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
2640 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2642 #if alpha_TARGET_ARCH
2644 genJump (CmmLabel lbl)
2645 | isAsmTemp lbl = returnInstr (BR target)
2646 | otherwise = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0]
2648 target = ImmCLbl lbl
2651 = getRegister tree `thenNat` \ register ->
2652 getNewRegNat PtrRep `thenNat` \ tmp ->
2654 dst = registerName register pv
2655 code = registerCode register pv
2656 target = registerName register pv
2658 if isFixed register then
2659 returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0]
2661 return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0))
2663 #endif /* alpha_TARGET_ARCH */
2665 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2667 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2669 genJump (CmmLoad mem pk) = do
2670 Amode target code <- getAmode mem
2671 return (code `snocOL` JMP (OpAddr target))
2673 genJump (CmmLit lit) = do
2674 return (unitOL (JMP (OpImm (litToImm lit))))
2677 (reg,code) <- getSomeReg expr
2678 return (code `snocOL` JMP (OpReg reg))
2680 #endif /* i386_TARGET_ARCH */
2682 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2684 #if sparc_TARGET_ARCH
2686 genJump (CmmLabel lbl)
2687 = return (toOL [CALL (Left target) 0 True, NOP])
2689 target = ImmCLbl lbl
2692 = getRegister tree `thenNat` \ register ->
2693 getNewRegNat PtrRep `thenNat` \ tmp ->
2695 code = registerCode register tmp
2696 target = registerName register tmp
2698 return (code `snocOL` JMP dsts (AddrRegReg target g0) `snocOL` NOP)
2700 #endif /* sparc_TARGET_ARCH */
2702 #if powerpc_TARGET_ARCH
2703 genJump (CmmLit (CmmLabel lbl))
2704 = return (unitOL $ JMP lbl)
2708 (target,code) <- getSomeReg tree
2709 return (code `snocOL` MTCTR target `snocOL` BCTR [])
2710 #endif /* powerpc_TARGET_ARCH */
2713 -- -----------------------------------------------------------------------------
2714 -- Unconditional branches
2716 genBranch :: BlockId -> NatM InstrBlock
2718 #if alpha_TARGET_ARCH
2719 genBranch id = return (unitOL (BR id))
2722 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2723 genBranch id = return (unitOL (JXX ALWAYS id))
2726 #if sparc_TARGET_ARCH
2727 genBranch id = return (toOL [BI ALWAYS False id, NOP])
2730 #if powerpc_TARGET_ARCH
2731 genBranch id = return (unitOL (BCC ALWAYS id))
2735 -- -----------------------------------------------------------------------------
2736 -- Conditional jumps
2739 Conditional jumps are always to local labels, so we can use branch
2740 instructions. We peek at the arguments to decide what kind of
2743 ALPHA: For comparisons with 0, we're laughing, because we can just do
2744 the desired conditional branch.
2746 I386: First, we have to ensure that the condition
2747 codes are set according to the supplied comparison operation.
2749 SPARC: First, we have to ensure that the condition codes are set
2750 according to the supplied comparison operation. We generate slightly
2751 different code for floating point comparisons, because a floating
2752 point operation cannot directly precede a @BF@. We assume the worst
2753 and fill that slot with a @NOP@.
2755 SPARC: Do not fill the delay slots here; you will confuse the register
2761 :: BlockId -- the branch target
2762 -> CmmExpr -- the condition on which to branch
2765 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2767 #if alpha_TARGET_ARCH
2769 genCondJump id (StPrim op [x, StInt 0])
2770 = getRegister x `thenNat` \ register ->
2771 getNewRegNat (registerRep register)
2774 code = registerCode register tmp
2775 value = registerName register tmp
2776 pk = registerRep register
2777 target = ImmCLbl lbl
2779 returnSeq code [BI (cmpOp op) value target]
2781 cmpOp CharGtOp = GTT
2783 cmpOp CharEqOp = EQQ
2785 cmpOp CharLtOp = LTT
2794 cmpOp WordGeOp = ALWAYS
2795 cmpOp WordEqOp = EQQ
2797 cmpOp WordLtOp = NEVER
2798 cmpOp WordLeOp = EQQ
2800 cmpOp AddrGeOp = ALWAYS
2801 cmpOp AddrEqOp = EQQ
2803 cmpOp AddrLtOp = NEVER
2804 cmpOp AddrLeOp = EQQ
2806 genCondJump lbl (StPrim op [x, StDouble 0.0])
2807 = getRegister x `thenNat` \ register ->
2808 getNewRegNat (registerRep register)
2811 code = registerCode register tmp
2812 value = registerName register tmp
2813 pk = registerRep register
2814 target = ImmCLbl lbl
2816 return (code . mkSeqInstr (BF (cmpOp op) value target))
2818 cmpOp FloatGtOp = GTT
2819 cmpOp FloatGeOp = GE
2820 cmpOp FloatEqOp = EQQ
2821 cmpOp FloatNeOp = NE
2822 cmpOp FloatLtOp = LTT
2823 cmpOp FloatLeOp = LE
2824 cmpOp DoubleGtOp = GTT
2825 cmpOp DoubleGeOp = GE
2826 cmpOp DoubleEqOp = EQQ
2827 cmpOp DoubleNeOp = NE
2828 cmpOp DoubleLtOp = LTT
2829 cmpOp DoubleLeOp = LE
2831 genCondJump lbl (StPrim op [x, y])
2833 = trivialFCode pr instr x y `thenNat` \ register ->
2834 getNewRegNat F64 `thenNat` \ tmp ->
2836 code = registerCode register tmp
2837 result = registerName register tmp
2838 target = ImmCLbl lbl
2840 return (code . mkSeqInstr (BF cond result target))
2842 pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
2844 fltCmpOp op = case op of
2858 (instr, cond) = case op of
2859 FloatGtOp -> (FCMP TF LE, EQQ)
2860 FloatGeOp -> (FCMP TF LTT, EQQ)
2861 FloatEqOp -> (FCMP TF EQQ, NE)
2862 FloatNeOp -> (FCMP TF EQQ, EQQ)
2863 FloatLtOp -> (FCMP TF LTT, NE)
2864 FloatLeOp -> (FCMP TF LE, NE)
2865 DoubleGtOp -> (FCMP TF LE, EQQ)
2866 DoubleGeOp -> (FCMP TF LTT, EQQ)
2867 DoubleEqOp -> (FCMP TF EQQ, NE)
2868 DoubleNeOp -> (FCMP TF EQQ, EQQ)
2869 DoubleLtOp -> (FCMP TF LTT, NE)
2870 DoubleLeOp -> (FCMP TF LE, NE)
2872 genCondJump lbl (StPrim op [x, y])
2873 = trivialCode instr x y `thenNat` \ register ->
2874 getNewRegNat IntRep `thenNat` \ tmp ->
2876 code = registerCode register tmp
2877 result = registerName register tmp
2878 target = ImmCLbl lbl
2880 return (code . mkSeqInstr (BI cond result target))
2882 (instr, cond) = case op of
2883 CharGtOp -> (CMP LE, EQQ)
2884 CharGeOp -> (CMP LTT, EQQ)
2885 CharEqOp -> (CMP EQQ, NE)
2886 CharNeOp -> (CMP EQQ, EQQ)
2887 CharLtOp -> (CMP LTT, NE)
2888 CharLeOp -> (CMP LE, NE)
2889 IntGtOp -> (CMP LE, EQQ)
2890 IntGeOp -> (CMP LTT, EQQ)
2891 IntEqOp -> (CMP EQQ, NE)
2892 IntNeOp -> (CMP EQQ, EQQ)
2893 IntLtOp -> (CMP LTT, NE)
2894 IntLeOp -> (CMP LE, NE)
2895 WordGtOp -> (CMP ULE, EQQ)
2896 WordGeOp -> (CMP ULT, EQQ)
2897 WordEqOp -> (CMP EQQ, NE)
2898 WordNeOp -> (CMP EQQ, EQQ)
2899 WordLtOp -> (CMP ULT, NE)
2900 WordLeOp -> (CMP ULE, NE)
2901 AddrGtOp -> (CMP ULE, EQQ)
2902 AddrGeOp -> (CMP ULT, EQQ)
2903 AddrEqOp -> (CMP EQQ, NE)
2904 AddrNeOp -> (CMP EQQ, EQQ)
2905 AddrLtOp -> (CMP ULT, NE)
2906 AddrLeOp -> (CMP ULE, NE)
2908 #endif /* alpha_TARGET_ARCH */
2910 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2912 #if i386_TARGET_ARCH
2914 genCondJump id bool = do
2915 CondCode _ cond code <- getCondCode bool
2916 return (code `snocOL` JXX cond id)
2920 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2922 #if x86_64_TARGET_ARCH
2924 genCondJump id bool = do
2925 CondCode is_float cond cond_code <- getCondCode bool
2928 return (cond_code `snocOL` JXX cond id)
2930 lbl <- getBlockIdNat
2932 -- see comment with condFltReg
2933 let code = case cond of
2939 plain_test = unitOL (
2942 or_unordered = toOL [
2946 and_ordered = toOL [
2952 return (cond_code `appOL` code)
2956 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2958 #if sparc_TARGET_ARCH
2960 genCondJump id bool = do
2961 CondCode is_float cond code <- getCondCode bool
2966 then [NOP, BF cond False id, NOP]
2967 else [BI cond False id, NOP]
2971 #endif /* sparc_TARGET_ARCH */
2974 #if powerpc_TARGET_ARCH
2976 genCondJump id bool = do
2977 CondCode is_float cond code <- getCondCode bool
2978 return (code `snocOL` BCC cond id)
2980 #endif /* powerpc_TARGET_ARCH */
2983 -- -----------------------------------------------------------------------------
2984 -- Generating C calls
2986 -- Now the biggest nightmare---calls. Most of the nastiness is buried in
2987 -- @get_arg@, which moves the arguments to the correct registers/stack
2988 -- locations. Apart from that, the code is easy.
2990 -- (If applicable) Do not fill the delay slots here; you will confuse the
2991 -- register allocator.
2994 :: CmmCallTarget -- function to call
2995 -> [(CmmReg,MachHint)] -- where to put the result
2996 -> [(CmmExpr,MachHint)] -- arguments (of mixed type)
2997 -> Maybe [GlobalReg] -- volatile regs to save
3000 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3002 #if alpha_TARGET_ARCH
3006 genCCall fn cconv result_regs args
3007 = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args
3008 `thenNat` \ ((unused,_), argCode) ->
3010 nRegs = length allArgRegs - length unused
3011 code = asmSeqThen (map ($ []) argCode)
3014 LDA pv (AddrImm (ImmLab (ptext fn))),
3015 JSR ra (AddrReg pv) nRegs,
3016 LDGP gp (AddrReg ra)]
3018 ------------------------
3019 {- Try to get a value into a specific register (or registers) for
3020 a call. The first 6 arguments go into the appropriate
3021 argument register (separate registers for integer and floating
3022 point arguments, but used in lock-step), and the remaining
3023 arguments are dumped to the stack, beginning at 0(sp). Our
3024 first argument is a pair of the list of remaining argument
3025 registers to be assigned for this call and the next stack
3026 offset to use for overflowing arguments. This way,
3027 @get_Arg@ can be applied to all of a call's arguments using
3031 :: ([(Reg,Reg)], Int) -- Argument registers and stack offset (accumulator)
3032 -> StixTree -- Current argument
3033 -> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code
3035 -- We have to use up all of our argument registers first...
3037 get_arg ((iDst,fDst):dsts, offset) arg
3038 = getRegister arg `thenNat` \ register ->
3040 reg = if isFloatingRep pk then fDst else iDst
3041 code = registerCode register reg
3042 src = registerName register reg
3043 pk = registerRep register
3046 if isFloatingRep pk then
3047 ((dsts, offset), if isFixed register then
3048 code . mkSeqInstr (FMOV src fDst)
3051 ((dsts, offset), if isFixed register then
3052 code . mkSeqInstr (OR src (RIReg src) iDst)
3055 -- Once we have run out of argument registers, we move to the
3058 get_arg ([], offset) arg
3059 = getRegister arg `thenNat` \ register ->
3060 getNewRegNat (registerRep register)
3063 code = registerCode register tmp
3064 src = registerName register tmp
3065 pk = registerRep register
3066 sz = primRepToSize pk
3068 return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset)))
3070 #endif /* alpha_TARGET_ARCH */
3072 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3074 #if i386_TARGET_ARCH
3076 -- we only cope with a single result for foreign calls
3077 genCCall (CmmPrim op) [(r,_)] args vols = do
3079 MO_F32_Sqrt -> actuallyInlineFloatOp F32 (GSQRT F32) args
3080 MO_F64_Sqrt -> actuallyInlineFloatOp F64 (GSQRT F64) args
3082 MO_F32_Sin -> actuallyInlineFloatOp F32 (GSIN F32) args
3083 MO_F64_Sin -> actuallyInlineFloatOp F64 (GSIN F64) args
3085 MO_F32_Cos -> actuallyInlineFloatOp F32 (GCOS F32) args
3086 MO_F64_Cos -> actuallyInlineFloatOp F64 (GCOS F64) args
3088 MO_F32_Tan -> actuallyInlineFloatOp F32 (GTAN F32) args
3089 MO_F64_Tan -> actuallyInlineFloatOp F64 (GTAN F64) args
3091 other_op -> outOfLineFloatOp op r args vols
3093 actuallyInlineFloatOp rep instr [(x,_)]
3094 = do res <- trivialUFCode rep instr x
3096 return (any (getRegisterReg r))
3098 genCCall target dest_regs args vols = do
3099 sizes_n_codes <- mapM push_arg (reverse args)
3100 delta <- getDeltaNat
3102 (sizes, push_codes) = unzip sizes_n_codes
3103 tot_arg_size = sum sizes
3105 -- deal with static vs dynamic call targets
3106 (callinsns,cconv) <-
3109 CmmForeignCall (CmmLit (CmmLabel lbl)) conv
3110 -> -- ToDo: stdcall arg sizes
3111 return (unitOL (CALL (Left fn_imm) []), conv)
3112 where fn_imm = ImmCLbl lbl
3113 CmmForeignCall expr conv
3114 -> do (dyn_c, dyn_r, dyn_rep) <- get_op expr
3115 ASSERT(dyn_rep == I32)
3116 return (dyn_c `snocOL` CALL (Right dyn_r) [], conv)
3118 let push_code = concatOL push_codes
3119 call = callinsns `appOL`
3121 -- Deallocate parameters after call for ccall;
3122 -- but not for stdcall (callee does it)
3123 (if cconv == StdCallConv || tot_arg_size==0 then [] else
3124 [ADD I32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
3126 [DELTA (delta + tot_arg_size)]
3129 setDeltaNat (delta + tot_arg_size)
3132 -- assign the results, if necessary
3133 assign_code [] = nilOL
3134 assign_code [(dest,_hint)] =
3136 I64 -> toOL [MOV I32 (OpReg eax) (OpReg r_dest),
3137 MOV I32 (OpReg edx) (OpReg r_dest_hi)]
3138 F32 -> unitOL (GMOV fake0 r_dest)
3139 F64 -> unitOL (GMOV fake0 r_dest)
3140 rep -> unitOL (MOV rep (OpReg eax) (OpReg r_dest))
3142 r_dest_hi = getHiVRegFromLo r_dest
3143 rep = cmmRegRep dest
3144 r_dest = getRegisterReg dest
3145 assign_code many = panic "genCCall.assign_code many"
3147 return (push_code `appOL`
3149 assign_code dest_regs)
3156 push_arg :: (CmmExpr,MachHint){-current argument-}
3157 -> NatM (Int, InstrBlock) -- argsz, code
3159 push_arg (arg,_hint) -- we don't need the hints on x86
3160 | arg_rep == I64 = do
3161 ChildCode64 code r_lo <- iselExpr64 arg
3162 delta <- getDeltaNat
3163 setDeltaNat (delta - 8)
3165 r_hi = getHiVRegFromLo r_lo
3167 return (8, code `appOL`
3168 toOL [PUSH I32 (OpReg r_hi), DELTA (delta - 4),
3169 PUSH I32 (OpReg r_lo), DELTA (delta - 8),
3174 (code, reg, sz) <- get_op arg
3175 delta <- getDeltaNat
3176 let size = arg_size sz
3177 setDeltaNat (delta-size)
3178 if (case sz of F64 -> True; F32 -> True; _ -> False)
3181 toOL [SUB I32 (OpImm (ImmInt size)) (OpReg esp),
3183 GST sz reg (AddrBaseIndex (EABaseReg esp)
3189 PUSH I32 (OpReg reg) `snocOL`
3193 arg_rep = cmmExprRep arg
3196 get_op :: CmmExpr -> NatM (InstrBlock, Reg, MachRep) -- code, reg, size
3198 (reg,code) <- getSomeReg op
3199 return (code, reg, cmmExprRep op)
3201 #endif /* i386_TARGET_ARCH */
3203 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3205 outOfLineFloatOp :: CallishMachOp -> CmmReg -> [(CmmExpr,MachHint)]
3206 -> Maybe [GlobalReg] -> NatM InstrBlock
3207 outOfLineFloatOp mop res args vols
3208 | cmmRegRep res == F64
3209 = stmtToInstrs (CmmCall target [(res,FloatHint)] args vols)
3212 = do uq <- getUniqueNat
3214 tmp = CmmLocal (LocalReg uq F64)
3216 code1 <- stmtToInstrs (CmmCall target [(tmp,FloatHint)] (map promote args) vols)
3217 code2 <- stmtToInstrs (CmmAssign res (demote (CmmReg tmp)))
3218 return (code1 `appOL` code2)
3220 promote (x,hint) = (CmmMachOp (MO_S_Conv F32 F64) [x], hint)
3221 demote x = CmmMachOp (MO_S_Conv F64 F32) [x]
3223 target = CmmForeignCall (CmmLit lbl) CCallConv
3224 lbl = CmmLabel (mkForeignLabel fn Nothing False)
3227 MO_F32_Sqrt -> FSLIT("sqrt")
3228 MO_F32_Sin -> FSLIT("sin")
3229 MO_F32_Cos -> FSLIT("cos")
3230 MO_F32_Tan -> FSLIT("tan")
3231 MO_F32_Exp -> FSLIT("exp")
3232 MO_F32_Log -> FSLIT("log")
3234 MO_F32_Asin -> FSLIT("asin")
3235 MO_F32_Acos -> FSLIT("acos")
3236 MO_F32_Atan -> FSLIT("atan")
3238 MO_F32_Sinh -> FSLIT("sinh")
3239 MO_F32_Cosh -> FSLIT("cosh")
3240 MO_F32_Tanh -> FSLIT("tanh")
3241 MO_F32_Pwr -> FSLIT("pow")
3243 MO_F64_Sqrt -> FSLIT("sqrt")
3244 MO_F64_Sin -> FSLIT("sin")
3245 MO_F64_Cos -> FSLIT("cos")
3246 MO_F64_Tan -> FSLIT("tan")
3247 MO_F64_Exp -> FSLIT("exp")
3248 MO_F64_Log -> FSLIT("log")
3250 MO_F64_Asin -> FSLIT("asin")
3251 MO_F64_Acos -> FSLIT("acos")
3252 MO_F64_Atan -> FSLIT("atan")
3254 MO_F64_Sinh -> FSLIT("sinh")
3255 MO_F64_Cosh -> FSLIT("cosh")
3256 MO_F64_Tanh -> FSLIT("tanh")
3257 MO_F64_Pwr -> FSLIT("pow")
3259 other -> pprPanic "outOfLineFloatOp" (pprCallishMachOp mop)
3261 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
3263 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3265 #if x86_64_TARGET_ARCH
3267 genCCall (CmmPrim op) [(r,_)] args vols =
3268 outOfLineFloatOp op r args vols
3270 genCCall target dest_regs args vols = do
3272 -- load up the register arguments
3273 (stack_args, aregs, fregs, load_args_code)
3274 <- load_args args allArgRegs allFPArgRegs nilOL
3277 fp_regs_used = reverse (drop (length fregs) (reverse allFPArgRegs))
3278 int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
3279 arg_regs = int_regs_used ++ fp_regs_used
3280 -- for annotating the call instruction with
3282 sse_regs = length fp_regs_used
3284 tot_arg_size = arg_size * length stack_args
3286 -- On entry to the called function, %rsp should be aligned
3287 -- on a 16-byte boundary +8 (i.e. the first stack arg after
3288 -- the return address is 16-byte aligned). In STG land
3289 -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
3290 -- need to make sure we push a multiple of 16-bytes of args,
3291 -- plus the return address, to get the correct alignment.
3292 -- Urg, this is hard. We need to feed the delta back into
3293 -- the arg pushing code.
3294 (real_size, adjust_rsp) <-
3295 if tot_arg_size `rem` 16 == 0
3296 then return (tot_arg_size, nilOL)
3297 else do -- we need to adjust...
3298 delta <- getDeltaNat
3299 setDeltaNat (delta-8)
3300 return (tot_arg_size+8, toOL [
3301 SUB I64 (OpImm (ImmInt 8)) (OpReg rsp),
3305 -- push the stack args, right to left
3306 push_code <- push_args (reverse stack_args) nilOL
3307 delta <- getDeltaNat
3309 -- deal with static vs dynamic call targets
3310 (callinsns,cconv) <-
3313 CmmForeignCall (CmmLit (CmmLabel lbl)) conv
3314 -> -- ToDo: stdcall arg sizes
3315 return (unitOL (CALL (Left fn_imm) arg_regs), conv)
3316 where fn_imm = ImmCLbl lbl
3317 CmmForeignCall expr conv
3318 -> do (dyn_r, dyn_c) <- getSomeReg expr
3319 return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
3322 -- The x86_64 ABI requires us to set %al to the number of SSE
3323 -- registers that contain arguments, if the called routine
3324 -- is a varargs function. We don't know whether it's a
3325 -- varargs function or not, so we have to assume it is.
3327 -- It's not safe to omit this assignment, even if the number
3328 -- of SSE regs in use is zero. If %al is larger than 8
3329 -- on entry to a varargs function, seg faults ensue.
3330 assign_eax n = unitOL (MOV I32 (OpImm (ImmInt n)) (OpReg eax))
3332 let call = callinsns `appOL`
3334 -- Deallocate parameters after call for ccall;
3335 -- but not for stdcall (callee does it)
3336 (if cconv == StdCallConv || real_size==0 then [] else
3337 [ADD wordRep (OpImm (ImmInt real_size)) (OpReg esp)])
3339 [DELTA (delta + real_size)]
3342 setDeltaNat (delta + real_size)
3345 -- assign the results, if necessary
3346 assign_code [] = nilOL
3347 assign_code [(dest,_hint)] =
3349 F32 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3350 F64 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3351 rep -> unitOL (MOV rep (OpReg rax) (OpReg r_dest))
3353 rep = cmmRegRep dest
3354 r_dest = getRegisterReg dest
3355 assign_code many = panic "genCCall.assign_code many"
3357 return (load_args_code `appOL`
3360 assign_eax sse_regs `appOL`
3362 assign_code dest_regs)
3365 arg_size = 8 -- always, at the mo
3367 load_args :: [(CmmExpr,MachHint)]
3368 -> [Reg] -- int regs avail for args
3369 -> [Reg] -- FP regs avail for args
3371 -> NatM ([(CmmExpr,MachHint)],[Reg],[Reg],InstrBlock)
3372 load_args args [] [] code = return (args, [], [], code)
3373 -- no more regs to use
3374 load_args [] aregs fregs code = return ([], aregs, fregs, code)
3375 -- no more args to push
3376 load_args ((arg,hint) : rest) aregs fregs code
3377 | isFloatingRep arg_rep =
3381 arg_code <- getAnyReg arg
3382 load_args rest aregs rs (code `appOL` arg_code r)
3387 arg_code <- getAnyReg arg
3388 load_args rest rs fregs (code `appOL` arg_code r)
3390 arg_rep = cmmExprRep arg
3393 (args',ars,frs,code') <- load_args rest aregs fregs code
3394 return ((arg,hint):args', ars, frs, code')
3396 push_args [] code = return code
3397 push_args ((arg,hint):rest) code
3398 | isFloatingRep arg_rep = do
3399 (arg_reg, arg_code) <- getSomeReg arg
3400 delta <- getDeltaNat
3401 setDeltaNat (delta-arg_size)
3402 let code' = code `appOL` toOL [
3403 MOV arg_rep (OpReg arg_reg) (OpAddr (spRel 0)),
3404 SUB wordRep (OpImm (ImmInt arg_size)) (OpReg rsp) ,
3405 DELTA (delta-arg_size)]
3406 push_args rest code'
3409 -- we only ever generate word-sized function arguments. Promotion
3410 -- has already happened: our Int8# type is kept sign-extended
3411 -- in an Int#, for example.
3412 ASSERT(arg_rep == I64) return ()
3413 (arg_op, arg_code) <- getOperand arg
3414 delta <- getDeltaNat
3415 setDeltaNat (delta-arg_size)
3416 let code' = code `appOL` toOL [PUSH I64 arg_op,
3417 DELTA (delta-arg_size)]
3418 push_args rest code'
3420 arg_rep = cmmExprRep arg
3423 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3425 #if sparc_TARGET_ARCH
3427 The SPARC calling convention is an absolute
3428 nightmare. The first 6x32 bits of arguments are mapped into
3429 %o0 through %o5, and the remaining arguments are dumped to the
3430 stack, beginning at [%sp+92]. (Note that %o6 == %sp.)
3432 If we have to put args on the stack, move %o6==%sp down by
3433 the number of words to go on the stack, to ensure there's enough space.
3435 According to Fraser and Hanson's lcc book, page 478, fig 17.2,
3436 16 words above the stack pointer is a word for the address of
3437 a structure return value. I use this as a temporary location
3438 for moving values from float to int regs. Certainly it isn't
3439 safe to put anything in the 16 words starting at %sp, since
3440 this area can get trashed at any time due to window overflows
3441 caused by signal handlers.
3443 A final complication (if the above isn't enough) is that
3444 we can't blithely calculate the arguments one by one into
3445 %o0 .. %o5. Consider the following nested calls:
3449 Naive code moves a into %o0, and (fff b c) into %o1. Unfortunately
3450 the inner call will itself use %o0, which trashes the value put there
3451 in preparation for the outer call. Upshot: we need to calculate the
3452 args into temporary regs, and move those to arg regs or onto the
3453 stack only immediately prior to the call proper. Sigh.
3456 genCCall fn cconv kind args
3457 = mapNat arg_to_int_vregs args `thenNat` \ argcode_and_vregs ->
3459 (argcodes, vregss) = unzip argcode_and_vregs
3460 n_argRegs = length allArgRegs
3461 n_argRegs_used = min (length vregs) n_argRegs
3462 vregs = concat vregss
3464 -- deal with static vs dynamic call targets
3467 -> return (unitOL (CALL (Left fn__2) n_argRegs_used False))
3469 -> arg_to_int_vregs dyn `thenNat` \ (dyn_c, [dyn_r]) ->
3470 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3472 `thenNat` \ callinsns ->
3474 argcode = concatOL argcodes
3475 (move_sp_down, move_sp_up)
3476 = let diff = length vregs - n_argRegs
3477 nn = if odd diff then diff + 1 else diff -- keep 8-byte alignment
3480 else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
3482 = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
3484 return (argcode `appOL`
3485 move_sp_down `appOL`
3486 transfer_code `appOL`
3491 -- function names that begin with '.' are assumed to be special
3492 -- internally generated names like '.mul,' which don't get an
3493 -- underscore prefix
3494 -- ToDo:needed (WDP 96/03) ???
3495 fn_static = unLeft fn
3496 fn__2 = case (headFS fn_static) of
3497 '.' -> ImmLit (ftext fn_static)
3498 _ -> ImmCLbl (mkForeignLabel fn_static False)
3500 -- move args from the integer vregs into which they have been
3501 -- marshalled, into %o0 .. %o5, and the rest onto the stack.
3502 move_final :: [Reg] -> [Reg] -> Int -> [Instr]
3504 move_final [] _ offset -- all args done
3507 move_final (v:vs) [] offset -- out of aregs; move to stack
3508 = ST W v (spRel offset)
3509 : move_final vs [] (offset+1)
3511 move_final (v:vs) (a:az) offset -- move into an arg (%o[0..5]) reg
3512 = OR False g0 (RIReg v) a
3513 : move_final vs az offset
3515 -- generate code to calculate an argument, and move it into one
3516 -- or two integer vregs.
3517 arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
3518 arg_to_int_vregs arg
3519 | is64BitRep (repOfCmmExpr arg)
3520 = iselExpr64 arg `thenNat` \ (ChildCode64 code vr_lo) ->
3521 let r_lo = VirtualRegI vr_lo
3522 r_hi = getHiVRegFromLo r_lo
3523 in return (code, [r_hi, r_lo])
3525 = getRegister arg `thenNat` \ register ->
3526 getNewRegNat (registerRep register) `thenNat` \ tmp ->
3527 let code = registerCode register tmp
3528 src = registerName register tmp
3529 pk = registerRep register
3531 -- the value is in src. Get it into 1 or 2 int vregs.
3534 getNewRegNat WordRep `thenNat` \ v1 ->
3535 getNewRegNat WordRep `thenNat` \ v2 ->
3538 FMOV DF src f0 `snocOL`
3539 ST F f0 (spRel 16) `snocOL`
3540 LD W (spRel 16) v1 `snocOL`
3541 ST F (fPair f0) (spRel 16) `snocOL`
3547 getNewRegNat WordRep `thenNat` \ v1 ->
3550 ST F src (spRel 16) `snocOL`
3556 getNewRegNat WordRep `thenNat` \ v1 ->
3558 code `snocOL` OR False g0 (RIReg src) v1
3562 #endif /* sparc_TARGET_ARCH */
3564 #if powerpc_TARGET_ARCH
3566 #if darwin_TARGET_OS || linux_TARGET_OS
3568 The PowerPC calling convention for Darwin/Mac OS X
3569 is described in Apple's document
3570 "Inside Mac OS X - Mach-O Runtime Architecture".
3572 PowerPC Linux uses the System V Release 4 Calling Convention
3573 for PowerPC. It is described in the
3574 "System V Application Binary Interface PowerPC Processor Supplement".
3576 Both conventions are similar:
3577 Parameters may be passed in general-purpose registers starting at r3, in
3578 floating point registers starting at f1, or on the stack.
3580 But there are substantial differences:
3581 * The number of registers used for parameter passing and the exact set of
3582 nonvolatile registers differs (see MachRegs.lhs).
3583 * On Darwin, stack space is always reserved for parameters, even if they are
3584 passed in registers. The called routine may choose to save parameters from
3585 registers to the corresponding space on the stack.
3586 * On Darwin, a corresponding amount of GPRs is skipped when a floating point
3587 parameter is passed in an FPR.
3588 * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
3589 starting with an odd-numbered GPR. It may skip a GPR to achieve this.
3590 Darwin just treats an I64 like two separate I32s (high word first).
3591 * I64 and F64 arguments are 8-byte aligned on the stack for SysV, but only
3592 4-byte aligned like everything else on Darwin.
3593 * The SysV spec claims that F32 is represented as F64 on the stack. GCC on
3594 PowerPC Linux does not agree, so neither do we.
3596 According to both conventions, The parameter area should be part of the
3597 caller's stack frame, allocated in the caller's prologue code (large enough
3598 to hold the parameter lists for all called routines). The NCG already
3599 uses the stack for register spilling, leaving 64 bytes free at the top.
3600 If we need a larger parameter area than that, we just allocate a new stack
3601 frame just before ccalling.
3604 genCCall target dest_regs argsAndHints vols
3605 = ASSERT (not $ any (`elem` [I8,I16]) argReps)
3606 -- we rely on argument promotion in the codeGen
3608 (finalStack,passArgumentsCode,usedRegs) <- passArguments
3610 allArgRegs allFPArgRegs
3614 (labelOrExpr, reduceToF32) <- case target of
3615 CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
3616 CmmForeignCall expr conv -> return (Right expr, False)
3617 CmmPrim mop -> outOfLineFloatOp mop
3619 let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
3620 codeAfter = move_sp_up finalStack `appOL` moveResult reduceToF32
3625 `snocOL` BL lbl usedRegs
3628 (dynReg, dynCode) <- getSomeReg dyn
3630 `snocOL` MTCTR dynReg
3632 `snocOL` BCTRL usedRegs
3635 #if darwin_TARGET_OS
3636 initialStackOffset = 24
3637 -- size of linkage area + size of arguments, in bytes
3638 stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
3639 map machRepByteWidth argReps
3640 #elif linux_TARGET_OS
3641 initialStackOffset = 8
3642 stackDelta finalStack = roundTo 16 finalStack
3644 args = map fst argsAndHints
3645 argReps = map cmmExprRep args
3647 roundTo a x | x `mod` a == 0 = x
3648 | otherwise = x + a - (x `mod` a)
3650 move_sp_down finalStack
3652 toOL [STU I32 sp (AddrRegImm sp (ImmInt (-delta))),
3655 where delta = stackDelta finalStack
3656 move_sp_up finalStack
3658 toOL [ADD sp sp (RIImm (ImmInt delta)),
3661 where delta = stackDelta finalStack
3664 passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
3665 passArguments ((arg,I64):args) gprs fprs stackOffset
3666 accumCode accumUsed =
3668 ChildCode64 code vr_lo <- iselExpr64 arg
3669 let vr_hi = getHiVRegFromLo vr_lo
3671 #if darwin_TARGET_OS
3676 (accumCode `appOL` code
3677 `snocOL` storeWord vr_hi gprs stackOffset
3678 `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
3679 ((take 2 gprs) ++ accumUsed)
3681 storeWord vr (gpr:_) offset = MR gpr vr
3682 storeWord vr [] offset = ST I32 vr (AddrRegImm sp (ImmInt offset))
3684 #elif linux_TARGET_OS
3685 let stackOffset' = roundTo 8 stackOffset
3686 stackCode = accumCode `appOL` code
3687 `snocOL` ST I32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
3688 `snocOL` ST I32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
3689 regCode hireg loreg =
3690 accumCode `appOL` code
3691 `snocOL` MR hireg vr_hi
3692 `snocOL` MR loreg vr_lo
3695 hireg : loreg : regs | even (length gprs) ->
3696 passArguments args regs fprs stackOffset
3697 (regCode hireg loreg) (hireg : loreg : accumUsed)
3698 _skipped : hireg : loreg : regs ->
3699 passArguments args regs fprs stackOffset
3700 (regCode hireg loreg) (hireg : loreg : accumUsed)
3701 _ -> -- only one or no regs left
3702 passArguments args [] fprs (stackOffset'+8)
3706 passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
3707 | reg : _ <- regs = do
3708 register <- getRegister arg
3709 let code = case register of
3710 Fixed _ freg fcode -> fcode `snocOL` MR reg freg
3711 Any _ acode -> acode reg
3715 #if darwin_TARGET_OS
3716 -- The Darwin ABI requires that we reserve stack slots for register parameters
3717 (stackOffset + stackBytes)
3718 #elif linux_TARGET_OS
3719 -- ... the SysV ABI doesn't.
3722 (accumCode `appOL` code)
3725 (vr, code) <- getSomeReg arg
3729 (stackOffset' + stackBytes)
3730 (accumCode `appOL` code `snocOL` ST rep vr stackSlot)
3733 #if darwin_TARGET_OS
3734 -- stackOffset is at least 4-byte aligned
3735 -- The Darwin ABI is happy with that.
3736 stackOffset' = stackOffset
3738 -- ... the SysV ABI requires 8-byte alignment for doubles.
3739 stackOffset' | rep == F64 = roundTo 8 stackOffset
3740 | otherwise = stackOffset
3742 stackSlot = AddrRegImm sp (ImmInt stackOffset')
3743 (nGprs, nFprs, stackBytes, regs) = case rep of
3744 I32 -> (1, 0, 4, gprs)
3745 #if darwin_TARGET_OS
3746 -- The Darwin ABI requires that we skip a corresponding number of GPRs when
3748 F32 -> (1, 1, 4, fprs)
3749 F64 -> (2, 1, 8, fprs)
3750 #elif linux_TARGET_OS
3751 -- ... the SysV ABI doesn't.
3752 F32 -> (0, 1, 4, fprs)
3753 F64 -> (0, 1, 8, fprs)
3756 moveResult reduceToF32 =
3760 | reduceToF32 && rep == F32 -> unitOL (FRSP r_dest f1)
3761 | rep == F32 || rep == F64 -> unitOL (MR r_dest f1)
3762 | rep == I64 -> toOL [MR (getHiVRegFromLo r_dest) r3,
3764 | otherwise -> unitOL (MR r_dest r3)
3765 where rep = cmmRegRep dest
3766 r_dest = getRegisterReg dest
3768 outOfLineFloatOp mop =
3770 mopExpr <- cmmMakeDynamicReference addImportNat True $
3771 mkForeignLabel functionName Nothing True
3772 let mopLabelOrExpr = case mopExpr of
3773 CmmLit (CmmLabel lbl) -> Left lbl
3775 return (mopLabelOrExpr, reduce)
3777 (functionName, reduce) = case mop of
3778 MO_F32_Exp -> (FSLIT("exp"), True)
3779 MO_F32_Log -> (FSLIT("log"), True)
3780 MO_F32_Sqrt -> (FSLIT("sqrt"), True)
3782 MO_F32_Sin -> (FSLIT("sin"), True)
3783 MO_F32_Cos -> (FSLIT("cos"), True)
3784 MO_F32_Tan -> (FSLIT("tan"), True)
3786 MO_F32_Asin -> (FSLIT("asin"), True)
3787 MO_F32_Acos -> (FSLIT("acos"), True)
3788 MO_F32_Atan -> (FSLIT("atan"), True)
3790 MO_F32_Sinh -> (FSLIT("sinh"), True)
3791 MO_F32_Cosh -> (FSLIT("cosh"), True)
3792 MO_F32_Tanh -> (FSLIT("tanh"), True)
3793 MO_F32_Pwr -> (FSLIT("pow"), True)
3795 MO_F64_Exp -> (FSLIT("exp"), False)
3796 MO_F64_Log -> (FSLIT("log"), False)
3797 MO_F64_Sqrt -> (FSLIT("sqrt"), False)
3799 MO_F64_Sin -> (FSLIT("sin"), False)
3800 MO_F64_Cos -> (FSLIT("cos"), False)
3801 MO_F64_Tan -> (FSLIT("tan"), False)
3803 MO_F64_Asin -> (FSLIT("asin"), False)
3804 MO_F64_Acos -> (FSLIT("acos"), False)
3805 MO_F64_Atan -> (FSLIT("atan"), False)
3807 MO_F64_Sinh -> (FSLIT("sinh"), False)
3808 MO_F64_Cosh -> (FSLIT("cosh"), False)
3809 MO_F64_Tanh -> (FSLIT("tanh"), False)
3810 MO_F64_Pwr -> (FSLIT("pow"), False)
3811 other -> pprPanic "genCCall(ppc): unknown callish op"
3812 (pprCallishMachOp other)
3814 #endif /* darwin_TARGET_OS || linux_TARGET_OS */
3816 #endif /* powerpc_TARGET_ARCH */
3819 -- -----------------------------------------------------------------------------
3820 -- Generating a table-branch
3822 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
3824 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3825 genSwitch expr ids = do
3826 (reg,e_code) <- getSomeReg expr
3827 lbl <- getNewLabelNat
3829 jumpTable = map jumpTableEntry ids
3830 op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
3831 code = e_code `appOL` toOL [
3832 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3833 JMP_TBL op [ id | Just id <- ids ]
3837 #elif powerpc_TARGET_ARCH
3841 (reg,e_code) <- getSomeReg expr
3842 tmp <- getNewRegNat I32
3843 lbl <- getNewLabelNat
3844 dynRef <- cmmMakeDynamicReference addImportNat False lbl
3845 (tableReg,t_code) <- getSomeReg $ dynRef
3847 jumpTable = map jumpTableEntryRel ids
3849 jumpTableEntryRel Nothing
3850 = CmmStaticLit (CmmInt 0 wordRep)
3851 jumpTableEntryRel (Just (BlockId id))
3852 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3853 where blockLabel = mkAsmTempLabel id
3855 code = e_code `appOL` t_code `appOL` toOL [
3856 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3857 SLW tmp reg (RIImm (ImmInt 2)),
3858 LD I32 tmp (AddrRegReg tableReg tmp),
3859 ADD tmp tmp (RIReg tableReg),
3861 BCTR [ id | Just id <- ids ]
3866 (reg,e_code) <- getSomeReg expr
3867 tmp <- getNewRegNat I32
3868 lbl <- getNewLabelNat
3870 jumpTable = map jumpTableEntry ids
3872 code = e_code `appOL` toOL [
3873 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3874 SLW tmp reg (RIImm (ImmInt 2)),
3875 ADDIS tmp tmp (HA (ImmCLbl lbl)),
3876 LD I32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
3878 BCTR [ id | Just id <- ids ]
3882 genSwitch expr ids = panic "ToDo: genSwitch"
3885 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordRep)
3886 jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
3887 where blockLabel = mkAsmTempLabel id
3889 -- -----------------------------------------------------------------------------
3891 -- -----------------------------------------------------------------------------
3894 -- -----------------------------------------------------------------------------
3895 -- 'condIntReg' and 'condFltReg': condition codes into registers
3897 -- Turn those condition codes into integers now (when they appear on
3898 -- the right hand side of an assignment).
3900 -- (If applicable) Do not fill the delay slots here; you will confuse the
3901 -- register allocator.
3903 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
3905 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3907 #if alpha_TARGET_ARCH
3908 condIntReg = panic "MachCode.condIntReg (not on Alpha)"
3909 condFltReg = panic "MachCode.condFltReg (not on Alpha)"
3910 #endif /* alpha_TARGET_ARCH */
3912 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3914 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3916 condIntReg cond x y = do
3917 CondCode _ cond cond_code <- condIntCode cond x y
3918 tmp <- getNewRegNat I8
3920 code dst = cond_code `appOL` toOL [
3921 SETCC cond (OpReg tmp),
3922 MOVZxL I8 (OpReg tmp) (OpReg dst)
3925 return (Any I32 code)
3929 #if i386_TARGET_ARCH
3931 condFltReg cond x y = do
3932 CondCode _ cond cond_code <- condFltCode cond x y
3933 tmp <- getNewRegNat I8
3935 code dst = cond_code `appOL` toOL [
3936 SETCC cond (OpReg tmp),
3937 MOVZxL I8 (OpReg tmp) (OpReg dst)
3940 return (Any I32 code)
3944 #if x86_64_TARGET_ARCH
3946 condFltReg cond x y = do
3947 CondCode _ cond cond_code <- condFltCode cond x y
3948 tmp1 <- getNewRegNat wordRep
3949 tmp2 <- getNewRegNat wordRep
3951 -- We have to worry about unordered operands (eg. comparisons
3952 -- against NaN). If the operands are unordered, the comparison
3953 -- sets the parity flag, carry flag and zero flag.
3954 -- All comparisons are supposed to return false for unordered
3955 -- operands except for !=, which returns true.
3957 -- Optimisation: we don't have to test the parity flag if we
3958 -- know the test has already excluded the unordered case: eg >
3959 -- and >= test for a zero carry flag, which can only occur for
3960 -- ordered operands.
3962 -- ToDo: by reversing comparisons we could avoid testing the
3963 -- parity flag in more cases.
3968 NE -> or_unordered dst
3969 GU -> plain_test dst
3970 GEU -> plain_test dst
3971 _ -> and_ordered dst)
3973 plain_test dst = toOL [
3974 SETCC cond (OpReg tmp1),
3975 MOVZxL I8 (OpReg tmp1) (OpReg dst)
3977 or_unordered dst = toOL [
3978 SETCC cond (OpReg tmp1),
3979 SETCC PARITY (OpReg tmp2),
3980 OR I8 (OpReg tmp1) (OpReg tmp2),
3981 MOVZxL I8 (OpReg tmp2) (OpReg dst)
3983 and_ordered dst = toOL [
3984 SETCC cond (OpReg tmp1),
3985 SETCC NOTPARITY (OpReg tmp2),
3986 AND I8 (OpReg tmp1) (OpReg tmp2),
3987 MOVZxL I8 (OpReg tmp2) (OpReg dst)
3990 return (Any I32 code)
3994 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3996 #if sparc_TARGET_ARCH
3998 condIntReg EQQ x (StInt 0)
3999 = getRegister x `thenNat` \ register ->
4000 getNewRegNat IntRep `thenNat` \ tmp ->
4002 code = registerCode register tmp
4003 src = registerName register tmp
4004 code__2 dst = code `appOL` toOL [
4005 SUB False True g0 (RIReg src) g0,
4006 SUB True False g0 (RIImm (ImmInt (-1))) dst]
4008 return (Any IntRep code__2)
4011 = getRegister x `thenNat` \ register1 ->
4012 getRegister y `thenNat` \ register2 ->
4013 getNewRegNat IntRep `thenNat` \ tmp1 ->
4014 getNewRegNat IntRep `thenNat` \ tmp2 ->
4016 code1 = registerCode register1 tmp1
4017 src1 = registerName register1 tmp1
4018 code2 = registerCode register2 tmp2
4019 src2 = registerName register2 tmp2
4020 code__2 dst = code1 `appOL` code2 `appOL` toOL [
4021 XOR False src1 (RIReg src2) dst,
4022 SUB False True g0 (RIReg dst) g0,
4023 SUB True False g0 (RIImm (ImmInt (-1))) dst]
4025 return (Any IntRep code__2)
4027 condIntReg NE x (StInt 0)
4028 = getRegister x `thenNat` \ register ->
4029 getNewRegNat IntRep `thenNat` \ tmp ->
4031 code = registerCode register tmp
4032 src = registerName register tmp
4033 code__2 dst = code `appOL` toOL [
4034 SUB False True g0 (RIReg src) g0,
4035 ADD True False g0 (RIImm (ImmInt 0)) dst]
4037 return (Any IntRep code__2)
4040 = getRegister x `thenNat` \ register1 ->
4041 getRegister y `thenNat` \ register2 ->
4042 getNewRegNat IntRep `thenNat` \ tmp1 ->
4043 getNewRegNat IntRep `thenNat` \ tmp2 ->
4045 code1 = registerCode register1 tmp1
4046 src1 = registerName register1 tmp1
4047 code2 = registerCode register2 tmp2
4048 src2 = registerName register2 tmp2
4049 code__2 dst = code1 `appOL` code2 `appOL` toOL [
4050 XOR False src1 (RIReg src2) dst,
4051 SUB False True g0 (RIReg dst) g0,
4052 ADD True False g0 (RIImm (ImmInt 0)) dst]
4054 return (Any IntRep code__2)
4057 = getBlockIdNat `thenNat` \ lbl1 ->
4058 getBlockIdNat `thenNat` \ lbl2 ->
4059 condIntCode cond x y `thenNat` \ condition ->
4061 code = condCode condition
4062 cond = condName condition
4063 code__2 dst = code `appOL` toOL [
4064 BI cond False (ImmCLbl lbl1), NOP,
4065 OR False g0 (RIImm (ImmInt 0)) dst,
4066 BI ALWAYS False (ImmCLbl lbl2), NOP,
4068 OR False g0 (RIImm (ImmInt 1)) dst,
4071 return (Any IntRep code__2)
4074 = getBlockIdNat `thenNat` \ lbl1 ->
4075 getBlockIdNat `thenNat` \ lbl2 ->
4076 condFltCode cond x y `thenNat` \ condition ->
4078 code = condCode condition
4079 cond = condName condition
4080 code__2 dst = code `appOL` toOL [
4082 BF cond False (ImmCLbl lbl1), NOP,
4083 OR False g0 (RIImm (ImmInt 0)) dst,
4084 BI ALWAYS False (ImmCLbl lbl2), NOP,
4086 OR False g0 (RIImm (ImmInt 1)) dst,
4089 return (Any IntRep code__2)
4091 #endif /* sparc_TARGET_ARCH */
4093 #if powerpc_TARGET_ARCH
4094 condReg getCond = do
4095 lbl1 <- getBlockIdNat
4096 lbl2 <- getBlockIdNat
4097 CondCode _ cond cond_code <- getCond
4099 {- code dst = cond_code `appOL` toOL [
4108 code dst = cond_code
4112 RLWINM dst dst (bit + 1) 31 31
4115 negate_code | do_negate = unitOL (CRNOR bit bit bit)
4118 (bit, do_negate) = case cond of
4132 return (Any I32 code)
4134 condIntReg cond x y = condReg (condIntCode cond x y)
4135 condFltReg cond x y = condReg (condFltCode cond x y)
4136 #endif /* powerpc_TARGET_ARCH */
4139 -- -----------------------------------------------------------------------------
4140 -- 'trivial*Code': deal with trivial instructions
4142 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
4143 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
4144 -- Only look for constants on the right hand side, because that's
4145 -- where the generic optimizer will have put them.
4147 -- Similarly, for unary instructions, we don't have to worry about
4148 -- matching an StInt as the argument, because genericOpt will already
4149 -- have handled the constant-folding.
4153 -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
4154 ,IF_ARCH_i386 ((Operand -> Operand -> Instr)
4155 -> Maybe (Operand -> Operand -> Instr)
4156 ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr)
4157 -> Maybe (Operand -> Operand -> Instr)
4158 ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
4159 ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
4161 -> CmmExpr -> CmmExpr -- the two arguments
4164 #ifndef powerpc_TARGET_ARCH
4167 -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
4168 ,IF_ARCH_sparc((Size -> Reg -> Reg -> Reg -> Instr)
4169 ,IF_ARCH_i386 ((MachRep -> Reg -> Reg -> Reg -> Instr)
4170 ,IF_ARCH_x86_64 ((MachRep -> Operand -> Operand -> Instr)
4172 -> CmmExpr -> CmmExpr -- the two arguments
4178 -> IF_ARCH_alpha((RI -> Reg -> Instr)
4179 ,IF_ARCH_i386 ((Operand -> Instr)
4180 ,IF_ARCH_x86_64 ((Operand -> Instr)
4181 ,IF_ARCH_sparc((RI -> Reg -> Instr)
4182 ,IF_ARCH_powerpc((Reg -> Reg -> Instr)
4184 -> CmmExpr -- the one argument
4187 #ifndef powerpc_TARGET_ARCH
4190 -> IF_ARCH_alpha((Reg -> Reg -> Instr)
4191 ,IF_ARCH_i386 ((Reg -> Reg -> Instr)
4192 ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr)
4193 ,IF_ARCH_sparc((Reg -> Reg -> Instr)
4195 -> CmmExpr -- the one argument
4199 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4201 #if alpha_TARGET_ARCH
4203 trivialCode instr x (StInt y)
4205 = getRegister x `thenNat` \ register ->
4206 getNewRegNat IntRep `thenNat` \ tmp ->
4208 code = registerCode register tmp
4209 src1 = registerName register tmp
4210 src2 = ImmInt (fromInteger y)
4211 code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst)
4213 return (Any IntRep code__2)
4215 trivialCode instr x y
4216 = getRegister x `thenNat` \ register1 ->
4217 getRegister y `thenNat` \ register2 ->
4218 getNewRegNat IntRep `thenNat` \ tmp1 ->
4219 getNewRegNat IntRep `thenNat` \ tmp2 ->
4221 code1 = registerCode register1 tmp1 []
4222 src1 = registerName register1 tmp1
4223 code2 = registerCode register2 tmp2 []
4224 src2 = registerName register2 tmp2
4225 code__2 dst = asmSeqThen [code1, code2] .
4226 mkSeqInstr (instr src1 (RIReg src2) dst)
4228 return (Any IntRep code__2)
4231 trivialUCode instr x
4232 = getRegister x `thenNat` \ register ->
4233 getNewRegNat IntRep `thenNat` \ tmp ->
4235 code = registerCode register tmp
4236 src = registerName register tmp
4237 code__2 dst = code . mkSeqInstr (instr (RIReg src) dst)
4239 return (Any IntRep code__2)
4242 trivialFCode _ instr x y
4243 = getRegister x `thenNat` \ register1 ->
4244 getRegister y `thenNat` \ register2 ->
4245 getNewRegNat F64 `thenNat` \ tmp1 ->
4246 getNewRegNat F64 `thenNat` \ tmp2 ->
4248 code1 = registerCode register1 tmp1
4249 src1 = registerName register1 tmp1
4251 code2 = registerCode register2 tmp2
4252 src2 = registerName register2 tmp2
4254 code__2 dst = asmSeqThen [code1 [], code2 []] .
4255 mkSeqInstr (instr src1 src2 dst)
4257 return (Any F64 code__2)
4259 trivialUFCode _ instr x
4260 = getRegister x `thenNat` \ register ->
4261 getNewRegNat F64 `thenNat` \ tmp ->
4263 code = registerCode register tmp
4264 src = registerName register tmp
4265 code__2 dst = code . mkSeqInstr (instr src dst)
4267 return (Any F64 code__2)
4269 #endif /* alpha_TARGET_ARCH */
4271 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4273 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4276 The Rules of the Game are:
4278 * You cannot assume anything about the destination register dst;
4279 it may be anything, including a fixed reg.
4281 * You may compute an operand into a fixed reg, but you may not
4282 subsequently change the contents of that fixed reg. If you
4283 want to do so, first copy the value either to a temporary
4284 or into dst. You are free to modify dst even if it happens
4285 to be a fixed reg -- that's not your problem.
4287 * You cannot assume that a fixed reg will stay live over an
4288 arbitrary computation. The same applies to the dst reg.
4290 * Temporary regs obtained from getNewRegNat are distinct from
4291 each other and from all other regs, and stay live over
4292 arbitrary computations.
4294 --------------------
4296 SDM's version of The Rules:
4298 * If getRegister returns Any, that means it can generate correct
4299 code which places the result in any register, period. Even if that
4300 register happens to be read during the computation.
4302 Corollary #1: this means that if you are generating code for an
4303 operation with two arbitrary operands, you cannot assign the result
4304 of the first operand into the destination register before computing
4305 the second operand. The second operand might require the old value
4306 of the destination register.
4308 Corollary #2: A function might be able to generate more efficient
4309 code if it knows the destination register is a new temporary (and
4310 therefore not read by any of the sub-computations).
4312 * If getRegister returns Any, then the code it generates may modify only:
4313 (a) fresh temporaries
4314 (b) the destination register
4315 (c) known registers (eg. %ecx is used by shifts)
4316 In particular, it may *not* modify global registers, unless the global
4317 register happens to be the destination register.
4320 trivialCode rep instr (Just revinstr) (CmmLit lit_a) b
4321 | not (is64BitLit lit_a) = do
4322 b_code <- getAnyReg b
4325 = b_code dst `snocOL`
4326 revinstr (OpImm (litToImm lit_a)) (OpReg dst)
4328 return (Any rep code)
4330 trivialCode rep instr maybe_revinstr a b = genTrivialCode rep instr a b
4332 -- This is re-used for floating pt instructions too.
4333 genTrivialCode rep instr a b = do
4334 (b_op, b_code) <- getNonClobberedOperand b
4335 a_code <- getAnyReg a
4336 tmp <- getNewRegNat rep
4338 -- We want the value of b to stay alive across the computation of a.
4339 -- But, we want to calculate a straight into the destination register,
4340 -- because the instruction only has two operands (dst := dst `op` src).
4341 -- The troublesome case is when the result of b is in the same register
4342 -- as the destination reg. In this case, we have to save b in a
4343 -- new temporary across the computation of a.
4345 | dst `regClashesWithOp` b_op =
4347 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
4349 instr (OpReg tmp) (OpReg dst)
4353 instr b_op (OpReg dst)
4355 return (Any rep code)
4357 reg `regClashesWithOp` OpReg reg2 = reg == reg2
4358 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
4359 reg `regClashesWithOp` _ = False
4363 trivialUCode rep instr x = do
4364 x_code <- getAnyReg x
4370 return (Any rep code)
4374 #if i386_TARGET_ARCH
4376 trivialFCode pk instr x y = do
4377 (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
4378 (y_reg, y_code) <- getSomeReg y
4383 instr pk x_reg y_reg dst
4385 return (Any pk code)
4389 #if x86_64_TARGET_ARCH
4391 trivialFCode pk instr x y = genTrivialCode pk (instr pk) x y
4397 trivialUFCode rep instr x = do
4398 (x_reg, x_code) <- getSomeReg x
4404 return (Any rep code)
4406 #endif /* i386_TARGET_ARCH */
4408 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4410 #if sparc_TARGET_ARCH
4412 trivialCode instr x (StInt y)
4414 = getRegister x `thenNat` \ register ->
4415 getNewRegNat IntRep `thenNat` \ tmp ->
4417 code = registerCode register tmp
4418 src1 = registerName register tmp
4419 src2 = ImmInt (fromInteger y)
4420 code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
4422 return (Any IntRep code__2)
4424 trivialCode instr x y
4425 = getRegister x `thenNat` \ register1 ->
4426 getRegister y `thenNat` \ register2 ->
4427 getNewRegNat IntRep `thenNat` \ tmp1 ->
4428 getNewRegNat IntRep `thenNat` \ tmp2 ->
4430 code1 = registerCode register1 tmp1
4431 src1 = registerName register1 tmp1
4432 code2 = registerCode register2 tmp2
4433 src2 = registerName register2 tmp2
4434 code__2 dst = code1 `appOL` code2 `snocOL`
4435 instr src1 (RIReg src2) dst
4437 return (Any IntRep code__2)
4440 trivialFCode pk instr x y
4441 = getRegister x `thenNat` \ register1 ->
4442 getRegister y `thenNat` \ register2 ->
4443 getNewRegNat (registerRep register1)
4445 getNewRegNat (registerRep register2)
4447 getNewRegNat F64 `thenNat` \ tmp ->
4449 promote x = FxTOy F DF x tmp
4451 pk1 = registerRep register1
4452 code1 = registerCode register1 tmp1
4453 src1 = registerName register1 tmp1
4455 pk2 = registerRep register2
4456 code2 = registerCode register2 tmp2
4457 src2 = registerName register2 tmp2
4461 code1 `appOL` code2 `snocOL`
4462 instr (primRepToSize pk) src1 src2 dst
4463 else if pk1 == F32 then
4464 code1 `snocOL` promote src1 `appOL` code2 `snocOL`
4465 instr DF tmp src2 dst
4467 code1 `appOL` code2 `snocOL` promote src2 `snocOL`
4468 instr DF src1 tmp dst
4470 return (Any (if pk1 == pk2 then pk1 else F64) code__2)
4473 trivialUCode instr x
4474 = getRegister x `thenNat` \ register ->
4475 getNewRegNat IntRep `thenNat` \ tmp ->
4477 code = registerCode register tmp
4478 src = registerName register tmp
4479 code__2 dst = code `snocOL` instr (RIReg src) dst
4481 return (Any IntRep code__2)
4484 trivialUFCode pk instr x
4485 = getRegister x `thenNat` \ register ->
4486 getNewRegNat pk `thenNat` \ tmp ->
4488 code = registerCode register tmp
4489 src = registerName register tmp
4490 code__2 dst = code `snocOL` instr src dst
4492 return (Any pk code__2)
4494 #endif /* sparc_TARGET_ARCH */
4496 #if powerpc_TARGET_ARCH
4499 Wolfgang's PowerPC version of The Rules:
4501 A slightly modified version of The Rules to take advantage of the fact
4502 that PowerPC instructions work on all registers and don't implicitly
4503 clobber any fixed registers.
4505 * The only expression for which getRegister returns Fixed is (CmmReg reg).
4507 * If getRegister returns Any, then the code it generates may modify only:
4508 (a) fresh temporaries
4509 (b) the destination register
4510 It may *not* modify global registers, unless the global
4511 register happens to be the destination register.
4512 It may not clobber any other registers. In fact, only ccalls clobber any
4514 Also, it may not modify the counter register (used by genCCall).
4516 Corollary: If a getRegister for a subexpression returns Fixed, you need
4517 not move it to a fresh temporary before evaluating the next subexpression.
4518 The Fixed register won't be modified.
4519 Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
4521 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
4522 the value of the destination register.
4525 trivialCode rep signed instr x (CmmLit (CmmInt y _))
4526 | Just imm <- makeImmediate rep signed y
4528 (src1, code1) <- getSomeReg x
4529 let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
4530 return (Any rep code)
4532 trivialCode rep signed instr x y = do
4533 (src1, code1) <- getSomeReg x
4534 (src2, code2) <- getSomeReg y
4535 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
4536 return (Any rep code)
4538 trivialCodeNoImm :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4539 -> CmmExpr -> CmmExpr -> NatM Register
4540 trivialCodeNoImm rep instr x y = do
4541 (src1, code1) <- getSomeReg x
4542 (src2, code2) <- getSomeReg y
4543 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
4544 return (Any rep code)
4546 trivialUCode rep instr x = do
4547 (src, code) <- getSomeReg x
4548 let code' dst = code `snocOL` instr dst src
4549 return (Any rep code')
4551 -- There is no "remainder" instruction on the PPC, so we have to do
4553 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
4555 remainderCode :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4556 -> CmmExpr -> CmmExpr -> NatM Register
4557 remainderCode rep div x y = do
4558 (src1, code1) <- getSomeReg x
4559 (src2, code2) <- getSomeReg y
4560 let code dst = code1 `appOL` code2 `appOL` toOL [
4562 MULLW dst dst (RIReg src2),
4565 return (Any rep code)
4567 #endif /* powerpc_TARGET_ARCH */
4570 -- -----------------------------------------------------------------------------
4571 -- Coercing to/from integer/floating-point...
4573 -- @coerce(Int2FP|FP2Int)@ are more complicated integer/float
4574 -- conversions. We have to store temporaries in memory to move
4575 -- between the integer and the floating point register sets.
4577 -- @coerceDbl2Flt@ and @coerceFlt2Dbl@ are done this way because we
4578 -- pretend, on sparc at least, that double and float regs are seperate
4579 -- kinds, so the value has to be computed into one kind before being
4580 -- explicitly "converted" to live in the other kind.
4582 coerceInt2FP :: MachRep -> MachRep -> CmmExpr -> NatM Register
4583 coerceFP2Int :: MachRep -> MachRep -> CmmExpr -> NatM Register
4585 #if sparc_TARGET_ARCH
4586 coerceDbl2Flt :: CmmExpr -> NatM Register
4587 coerceFlt2Dbl :: CmmExpr -> NatM Register
4590 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4592 #if alpha_TARGET_ARCH
4595 = getRegister x `thenNat` \ register ->
4596 getNewRegNat IntRep `thenNat` \ reg ->
4598 code = registerCode register reg
4599 src = registerName register reg
4601 code__2 dst = code . mkSeqInstrs [
4603 LD TF dst (spRel 0),
4606 return (Any F64 code__2)
4610 = getRegister x `thenNat` \ register ->
4611 getNewRegNat F64 `thenNat` \ tmp ->
4613 code = registerCode register tmp
4614 src = registerName register tmp
4616 code__2 dst = code . mkSeqInstrs [
4618 ST TF tmp (spRel 0),
4621 return (Any IntRep code__2)
4623 #endif /* alpha_TARGET_ARCH */
4625 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4627 #if i386_TARGET_ARCH
4629 coerceInt2FP from to x = do
4630 (x_reg, x_code) <- getSomeReg x
4632 opc = case to of F32 -> GITOF; F64 -> GITOD
4633 code dst = x_code `snocOL` opc x_reg dst
4634 -- ToDo: works for non-I32 reps?
4636 return (Any to code)
4640 coerceFP2Int from to x = do
4641 (x_reg, x_code) <- getSomeReg x
4643 opc = case from of F32 -> GFTOI; F64 -> GDTOI
4644 code dst = x_code `snocOL` opc x_reg dst
4645 -- ToDo: works for non-I32 reps?
4647 return (Any to code)
4649 #endif /* i386_TARGET_ARCH */
4651 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4653 #if x86_64_TARGET_ARCH
4655 coerceFP2Int from to x = do
4656 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
4658 opc = case from of F32 -> CVTSS2SI; F64 -> CVTSD2SI
4659 code dst = x_code `snocOL` opc x_op dst
4661 return (Any to code) -- works even if the destination rep is <I32
4663 coerceInt2FP from to x = do
4664 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
4666 opc = case to of F32 -> CVTSI2SS; F64 -> CVTSI2SD
4667 code dst = x_code `snocOL` opc x_op dst
4669 return (Any to code) -- works even if the destination rep is <I32
4671 coerceFP2FP :: MachRep -> CmmExpr -> NatM Register
4672 coerceFP2FP to x = do
4673 (x_reg, x_code) <- getSomeReg x
4675 opc = case to of F32 -> CVTSD2SS; F64 -> CVTSS2SD
4676 code dst = x_code `snocOL` opc x_reg dst
4678 return (Any to code)
4682 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4684 #if sparc_TARGET_ARCH
4687 = getRegister x `thenNat` \ register ->
4688 getNewRegNat IntRep `thenNat` \ reg ->
4690 code = registerCode register reg
4691 src = registerName register reg
4693 code__2 dst = code `appOL` toOL [
4694 ST W src (spRel (-2)),
4695 LD W (spRel (-2)) dst,
4696 FxTOy W (primRepToSize pk) dst dst]
4698 return (Any pk code__2)
4701 coerceFP2Int fprep x
4702 = ASSERT(fprep == F64 || fprep == F32)
4703 getRegister x `thenNat` \ register ->
4704 getNewRegNat fprep `thenNat` \ reg ->
4705 getNewRegNat F32 `thenNat` \ tmp ->
4707 code = registerCode register reg
4708 src = registerName register reg
4709 code__2 dst = code `appOL` toOL [
4710 FxTOy (primRepToSize fprep) W src tmp,
4711 ST W tmp (spRel (-2)),
4712 LD W (spRel (-2)) dst]
4714 return (Any IntRep code__2)
4718 = getRegister x `thenNat` \ register ->
4719 getNewRegNat F64 `thenNat` \ tmp ->
4720 let code = registerCode register tmp
4721 src = registerName register tmp
4724 (\dst -> code `snocOL` FxTOy DF F src dst))
4728 = getRegister x `thenNat` \ register ->
4729 getNewRegNat F32 `thenNat` \ tmp ->
4730 let code = registerCode register tmp
4731 src = registerName register tmp
4734 (\dst -> code `snocOL` FxTOy F DF src dst))
4736 #endif /* sparc_TARGET_ARCH */
4738 #if powerpc_TARGET_ARCH
4739 coerceInt2FP fromRep toRep x = do
4740 (src, code) <- getSomeReg x
4741 lbl <- getNewLabelNat
4742 itmp <- getNewRegNat I32
4743 ftmp <- getNewRegNat F64
4744 dynRef <- cmmMakeDynamicReference addImportNat False lbl
4745 Amode addr addr_code <- getAmode dynRef
4747 code' dst = code `appOL` maybe_exts `appOL` toOL [
4750 CmmStaticLit (CmmInt 0x43300000 I32),
4751 CmmStaticLit (CmmInt 0x80000000 I32)],
4752 XORIS itmp src (ImmInt 0x8000),
4753 ST I32 itmp (spRel 3),
4754 LIS itmp (ImmInt 0x4330),
4755 ST I32 itmp (spRel 2),
4756 LD F64 ftmp (spRel 2)
4757 ] `appOL` addr_code `appOL` toOL [
4759 FSUB F64 dst ftmp dst
4760 ] `appOL` maybe_frsp dst
4762 maybe_exts = case fromRep of
4763 I8 -> unitOL $ EXTS I8 src src
4764 I16 -> unitOL $ EXTS I16 src src
4766 maybe_frsp dst = case toRep of
4767 F32 -> unitOL $ FRSP dst dst
4769 return (Any toRep code')
4771 coerceFP2Int fromRep toRep x = do
4772 -- the reps don't really matter: F*->F64 and I32->I* are no-ops
4773 (src, code) <- getSomeReg x
4774 tmp <- getNewRegNat F64
4776 code' dst = code `appOL` toOL [
4777 -- convert to int in FP reg
4779 -- store value (64bit) from FP to stack
4780 ST F64 tmp (spRel 2),
4781 -- read low word of value (high word is undefined)
4782 LD I32 dst (spRel 3)]
4783 return (Any toRep code')
4784 #endif /* powerpc_TARGET_ARCH */
4787 -- -----------------------------------------------------------------------------
4788 -- eXTRA_STK_ARGS_HERE
4790 -- We (allegedly) put the first six C-call arguments in registers;
4791 -- where do we start putting the rest of them?
4793 -- Moved from MachInstrs (SDM):
4795 #if alpha_TARGET_ARCH || sparc_TARGET_ARCH
4796 eXTRA_STK_ARGS_HERE :: Int
4798 = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???))