1 -----------------------------------------------------------------------------
3 -- Generating machine code (instruction selection)
5 -- (c) The University of Glasgow 1996-2004
7 -----------------------------------------------------------------------------
9 -- This is a big module, but, if you pay attention to
10 -- (a) the sectioning, (b) the type signatures, and
11 -- (c) the #if blah_TARGET_ARCH} things, the
12 -- structure should not be too overwhelming.
14 module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where
16 #include "HsVersions.h"
17 #include "nativeGen/NCG.h"
24 import PositionIndependentCode ( cmmMakeDynamicReference, initializePicBase )
26 -- Our intermediate code:
27 import PprCmm ( pprExpr )
33 import StaticFlags ( opt_PIC )
34 import ForeignCall ( CCallConv(..) )
38 import qualified Outputable
40 import FastTypes ( isFastTrue )
41 import Constants ( wORD_SIZE )
44 import Outputable ( assertPanic )
45 import TRACE ( trace )
48 import Control.Monad ( mapAndUnzipM )
49 import Maybe ( fromJust )
53 -- -----------------------------------------------------------------------------
54 -- Top-level of the instruction selector
56 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
57 -- They are really trees of insns to facilitate fast appending, where a
58 -- left-to-right traversal (pre-order?) yields the insns in the correct
61 type InstrBlock = OrdList Instr
63 cmmTopCodeGen :: CmmTop -> NatM [NatCmmTop]
64 cmmTopCodeGen (CmmProc info lab params blocks) = do
65 (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
66 picBaseMb <- getPicBaseMaybeNat
67 let proc = CmmProc info lab params (concat nat_blocks)
68 tops = proc : concat statics
70 Just picBase -> initializePicBase picBase tops
71 Nothing -> return tops
73 cmmTopCodeGen (CmmData sec dat) = do
74 return [CmmData sec dat] -- no translation, we just use CmmStatic
76 basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop])
77 basicBlockCodeGen (BasicBlock id stmts) = do
78 instrs <- stmtsToInstrs stmts
79 -- code generation may introduce new basic block boundaries, which
80 -- are indicated by the NEWBLOCK instruction. We must split up the
81 -- instruction stream into basic blocks again. Also, we extract
84 (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
86 mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
87 = ([], BasicBlock id instrs : blocks, statics)
88 mkBlocks (LDATA sec dat) (instrs,blocks,statics)
89 = (instrs, blocks, CmmData sec dat:statics)
90 mkBlocks instr (instrs,blocks,statics)
91 = (instr:instrs, blocks, statics)
93 return (BasicBlock id top : other_blocks, statics)
95 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
97 = do instrss <- mapM stmtToInstrs stmts
98 return (concatOL instrss)
100 stmtToInstrs :: CmmStmt -> NatM InstrBlock
101 stmtToInstrs stmt = case stmt of
102 CmmNop -> return nilOL
103 CmmComment s -> return (unitOL (COMMENT s))
106 | isFloatingRep kind -> assignReg_FltCode kind reg src
107 #if WORD_SIZE_IN_BITS==32
108 | kind == I64 -> assignReg_I64Code reg src
110 | otherwise -> assignReg_IntCode kind reg src
111 where kind = cmmRegRep reg
114 | isFloatingRep kind -> assignMem_FltCode kind addr src
115 #if WORD_SIZE_IN_BITS==32
116 | kind == I64 -> assignMem_I64Code addr src
118 | otherwise -> assignMem_IntCode kind addr src
119 where kind = cmmExprRep src
121 CmmCall target result_regs args vols
122 -> genCCall target result_regs args vols
124 CmmBranch id -> genBranch id
125 CmmCondBranch arg id -> genCondJump id arg
126 CmmSwitch arg ids -> genSwitch arg ids
127 CmmJump arg params -> genJump arg
129 -- -----------------------------------------------------------------------------
130 -- General things for putting together code sequences
132 -- Expand CmmRegOff. ToDo: should we do it this way around, or convert
133 -- CmmExprs into CmmRegOff?
134 mangleIndexTree :: CmmExpr -> CmmExpr
135 mangleIndexTree (CmmRegOff reg off)
136 = CmmMachOp (MO_Add rep) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) rep)]
137 where rep = cmmRegRep reg
139 -- -----------------------------------------------------------------------------
140 -- Code gen for 64-bit arithmetic on 32-bit platforms
143 Simple support for generating 64-bit code (ie, 64 bit values and 64
144 bit assignments) on 32-bit platforms. Unlike the main code generator
145 we merely shoot for generating working code as simply as possible, and
146 pay little attention to code quality. Specifically, there is no
147 attempt to deal cleverly with the fixed-vs-floating register
148 distinction; all values are generated into (pairs of) floating
149 registers, even if this would mean some redundant reg-reg moves as a
150 result. Only one of the VRegUniques is returned, since it will be
151 of the VRegUniqueLo form, and the upper-half VReg can be determined
152 by applying getHiVRegFromLo to it.
155 data ChildCode64 -- a.k.a "Register64"
158 Reg -- the lower 32-bit temporary which contains the
159 -- result; use getHiVRegFromLo to find the other
160 -- VRegUnique. Rules of this simplified insn
161 -- selection game are therefore that the returned
162 -- Reg may be modified
164 #if WORD_SIZE_IN_BITS==32
165 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
166 assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
169 #ifndef x86_64_TARGET_ARCH
170 iselExpr64 :: CmmExpr -> NatM ChildCode64
173 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
177 assignMem_I64Code addrTree valueTree = do
178 Amode addr addr_code <- getAmode addrTree
179 ChildCode64 vcode rlo <- iselExpr64 valueTree
181 rhi = getHiVRegFromLo rlo
183 -- Little-endian store
184 mov_lo = MOV I32 (OpReg rlo) (OpAddr addr)
185 mov_hi = MOV I32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
187 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
190 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
191 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
193 r_dst_lo = mkVReg u_dst I32
194 r_dst_hi = getHiVRegFromLo r_dst_lo
195 r_src_hi = getHiVRegFromLo r_src_lo
196 mov_lo = MOV I32 (OpReg r_src_lo) (OpReg r_dst_lo)
197 mov_hi = MOV I32 (OpReg r_src_hi) (OpReg r_dst_hi)
200 vcode `snocOL` mov_lo `snocOL` mov_hi
203 assignReg_I64Code lvalue valueTree
204 = panic "assignReg_I64Code(i386): invalid lvalue"
208 iselExpr64 (CmmLit (CmmInt i _)) = do
209 (rlo,rhi) <- getNewRegPairNat I32
211 r = fromIntegral (fromIntegral i :: Word32)
212 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
214 MOV I32 (OpImm (ImmInteger r)) (OpReg rlo),
215 MOV I32 (OpImm (ImmInteger q)) (OpReg rhi)
218 return (ChildCode64 code rlo)
220 iselExpr64 (CmmLoad addrTree I64) = do
221 Amode addr addr_code <- getAmode addrTree
222 (rlo,rhi) <- getNewRegPairNat I32
224 mov_lo = MOV I32 (OpAddr addr) (OpReg rlo)
225 mov_hi = MOV I32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
228 ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
232 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64)))
233 = return (ChildCode64 nilOL (mkVReg vu I32))
235 -- we handle addition, but rather badly
236 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
237 ChildCode64 code1 r1lo <- iselExpr64 e1
238 (rlo,rhi) <- getNewRegPairNat I32
240 r = fromIntegral (fromIntegral i :: Word32)
241 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
242 r1hi = getHiVRegFromLo r1lo
244 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
245 ADD I32 (OpImm (ImmInteger r)) (OpReg rlo),
246 MOV I32 (OpReg r1hi) (OpReg rhi),
247 ADC I32 (OpImm (ImmInteger q)) (OpReg rhi) ]
249 return (ChildCode64 code rlo)
251 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
252 ChildCode64 code1 r1lo <- iselExpr64 e1
253 ChildCode64 code2 r2lo <- iselExpr64 e2
254 (rlo,rhi) <- getNewRegPairNat I32
256 r1hi = getHiVRegFromLo r1lo
257 r2hi = getHiVRegFromLo r2lo
260 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
261 ADD I32 (OpReg r2lo) (OpReg rlo),
262 MOV I32 (OpReg r1hi) (OpReg rhi),
263 ADC I32 (OpReg r2hi) (OpReg rhi) ]
265 return (ChildCode64 code rlo)
268 = pprPanic "iselExpr64(i386)" (ppr expr)
270 #endif /* i386_TARGET_ARCH */
272 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
274 #if sparc_TARGET_ARCH
276 assignMem_I64Code addrTree valueTree
277 = iselExpr64 valueTree `thenNat` \ (ChildCode64 vcode vrlo) ->
278 getRegister addrTree `thenNat` \ register_addr ->
279 getNewRegNat IntRep `thenNat` \ t_addr ->
280 let rlo = VirtualRegI vrlo
281 rhi = getHiVRegFromLo rlo
282 code_addr = registerCode register_addr t_addr
283 reg_addr = registerName register_addr t_addr
285 mov_hi = ST W rhi (AddrRegImm reg_addr (ImmInt 0))
286 mov_lo = ST W rlo (AddrRegImm reg_addr (ImmInt 4))
288 return (vcode `appOL` code_addr `snocOL` mov_hi `snocOL` mov_lo)
291 assignReg_I64Code (StixTemp (StixVReg u_dst pk)) valueTree
292 = iselExpr64 valueTree `thenNat` \ (ChildCode64 vcode vr_src_lo) ->
294 r_dst_lo = mkVReg u_dst IntRep
295 r_src_lo = VirtualRegI vr_src_lo
296 r_dst_hi = getHiVRegFromLo r_dst_lo
297 r_src_hi = getHiVRegFromLo r_src_lo
298 mov_lo = mkMOV r_src_lo r_dst_lo
299 mov_hi = mkMOV r_src_hi r_dst_hi
300 mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
303 vcode `snocOL` mov_hi `snocOL` mov_lo
305 assignReg_I64Code lvalue valueTree
306 = pprPanic "assignReg_I64Code(sparc): invalid lvalue"
310 -- Don't delete this -- it's very handy for debugging.
312 -- | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
313 -- = panic "iselExpr64(???)"
315 iselExpr64 (CmmLoad I64 addrTree)
316 = getRegister addrTree `thenNat` \ register_addr ->
317 getNewRegNat IntRep `thenNat` \ t_addr ->
318 getNewRegNat IntRep `thenNat` \ rlo ->
319 let rhi = getHiVRegFromLo rlo
320 code_addr = registerCode register_addr t_addr
321 reg_addr = registerName register_addr t_addr
322 mov_hi = LD W (AddrRegImm reg_addr (ImmInt 0)) rhi
323 mov_lo = LD W (AddrRegImm reg_addr (ImmInt 4)) rlo
326 ChildCode64 (code_addr `snocOL` mov_hi `snocOL` mov_lo)
330 iselExpr64 (CmmReg (CmmLocal (LocalReg uq I64)))
331 = getNewRegNat IntRep `thenNat` \ r_dst_lo ->
332 let r_dst_hi = getHiVRegFromLo r_dst_lo
333 r_src_lo = mkVReg vu IntRep
334 r_src_hi = getHiVRegFromLo r_src_lo
335 mov_lo = mkMOV r_src_lo r_dst_lo
336 mov_hi = mkMOV r_src_hi r_dst_hi
337 mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
340 ChildCode64 (toOL [mov_hi, mov_lo]) (getVRegUnique r_dst_lo)
343 iselExpr64 (StCall fn cconv I64 args)
344 = genCCall fn cconv kind args `thenNat` \ call ->
345 getNewRegNat IntRep `thenNat` \ r_dst_lo ->
346 let r_dst_hi = getHiVRegFromLo r_dst_lo
347 mov_lo = mkMOV o0 r_dst_lo
348 mov_hi = mkMOV o1 r_dst_hi
349 mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
352 ChildCode64 (call `snocOL` mov_hi `snocOL` mov_lo)
353 (getVRegUnique r_dst_lo)
357 = pprPanic "iselExpr64(sparc)" (pprCmmExpr expr)
359 #endif /* sparc_TARGET_ARCH */
361 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
363 #if powerpc_TARGET_ARCH
365 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
366 getI64Amodes addrTree = do
367 Amode hi_addr addr_code <- getAmode addrTree
368 case addrOffset hi_addr 4 of
369 Just lo_addr -> return (hi_addr, lo_addr, addr_code)
370 Nothing -> do (hi_ptr, code) <- getSomeReg addrTree
371 return (AddrRegImm hi_ptr (ImmInt 0),
372 AddrRegImm hi_ptr (ImmInt 4),
375 assignMem_I64Code addrTree valueTree = do
376 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
377 ChildCode64 vcode rlo <- iselExpr64 valueTree
379 rhi = getHiVRegFromLo rlo
382 mov_hi = ST I32 rhi hi_addr
383 mov_lo = ST I32 rlo lo_addr
385 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
387 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
388 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
390 r_dst_lo = mkVReg u_dst I32
391 r_dst_hi = getHiVRegFromLo r_dst_lo
392 r_src_hi = getHiVRegFromLo r_src_lo
393 mov_lo = MR r_dst_lo r_src_lo
394 mov_hi = MR r_dst_hi r_src_hi
397 vcode `snocOL` mov_lo `snocOL` mov_hi
400 assignReg_I64Code lvalue valueTree
401 = panic "assignReg_I64Code(powerpc): invalid lvalue"
404 -- Don't delete this -- it's very handy for debugging.
406 -- | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
407 -- = panic "iselExpr64(???)"
409 iselExpr64 (CmmLoad addrTree I64) = do
410 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
411 (rlo, rhi) <- getNewRegPairNat I32
412 let mov_hi = LD I32 rhi hi_addr
413 mov_lo = LD I32 rlo lo_addr
414 return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
417 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64)))
418 = return (ChildCode64 nilOL (mkVReg vu I32))
420 iselExpr64 (CmmLit (CmmInt i _)) = do
421 (rlo,rhi) <- getNewRegPairNat I32
423 half0 = fromIntegral (fromIntegral i :: Word16)
424 half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
425 half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
426 half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
429 LIS rlo (ImmInt half1),
430 OR rlo rlo (RIImm $ ImmInt half0),
431 LIS rhi (ImmInt half3),
432 OR rlo rlo (RIImm $ ImmInt half2)
435 return (ChildCode64 code rlo)
437 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
438 ChildCode64 code1 r1lo <- iselExpr64 e1
439 ChildCode64 code2 r2lo <- iselExpr64 e2
440 (rlo,rhi) <- getNewRegPairNat I32
442 r1hi = getHiVRegFromLo r1lo
443 r2hi = getHiVRegFromLo r2lo
446 toOL [ ADDC rlo r1lo r2lo,
449 return (ChildCode64 code rlo)
452 = pprPanic "iselExpr64(powerpc)" (ppr expr)
454 #endif /* powerpc_TARGET_ARCH */
457 -- -----------------------------------------------------------------------------
458 -- The 'Register' type
460 -- 'Register's passed up the tree. If the stix code forces the register
461 -- to live in a pre-decided machine register, it comes out as @Fixed@;
462 -- otherwise, it comes out as @Any@, and the parent can decide which
463 -- register to put it in.
466 = Fixed MachRep Reg InstrBlock
467 | Any MachRep (Reg -> InstrBlock)
469 swizzleRegisterRep :: Register -> MachRep -> Register
470 swizzleRegisterRep (Fixed _ reg code) rep = Fixed rep reg code
471 swizzleRegisterRep (Any _ codefn) rep = Any rep codefn
474 -- -----------------------------------------------------------------------------
475 -- Utils based on getRegister, below
477 -- The dual to getAnyReg: compute an expression into a register, but
478 -- we don't mind which one it is.
479 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
481 r <- getRegister expr
484 tmp <- getNewRegNat rep
485 return (tmp, code tmp)
489 -- -----------------------------------------------------------------------------
490 -- Grab the Reg for a CmmReg
492 getRegisterReg :: CmmReg -> Reg
494 getRegisterReg (CmmLocal (LocalReg u pk))
497 getRegisterReg (CmmGlobal mid)
498 = case get_GlobalReg_reg_or_addr mid of
499 Left (RealReg rrno) -> RealReg rrno
500 _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
501 -- By this stage, the only MagicIds remaining should be the
502 -- ones which map to a real machine register on this
503 -- platform. Hence ...
506 -- -----------------------------------------------------------------------------
507 -- Generate code to get a subtree into a Register
509 -- Don't delete this -- it's very handy for debugging.
511 -- | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False
512 -- = panic "getRegister(???)"
514 getRegister :: CmmExpr -> NatM Register
516 getRegister (CmmReg reg)
517 = return (Fixed (cmmRegRep reg) (getRegisterReg reg) nilOL)
519 getRegister tree@(CmmRegOff _ _)
520 = getRegister (mangleIndexTree tree)
522 getRegister CmmPicBaseReg
524 reg <- getPicBaseNat wordRep
525 return (Fixed wordRep reg nilOL)
527 -- end of machine-"independent" bit; here we go on the rest...
529 #if alpha_TARGET_ARCH
531 getRegister (StDouble d)
532 = getBlockIdNat `thenNat` \ lbl ->
533 getNewRegNat PtrRep `thenNat` \ tmp ->
534 let code dst = mkSeqInstrs [
535 LDATA RoDataSegment lbl [
536 DATA TF [ImmLab (rational d)]
538 LDA tmp (AddrImm (ImmCLbl lbl)),
539 LD TF dst (AddrReg tmp)]
541 return (Any F64 code)
543 getRegister (StPrim primop [x]) -- unary PrimOps
545 IntNegOp -> trivialUCode (NEG Q False) x
547 NotOp -> trivialUCode NOT x
549 FloatNegOp -> trivialUFCode FloatRep (FNEG TF) x
550 DoubleNegOp -> trivialUFCode F64 (FNEG TF) x
552 OrdOp -> coerceIntCode IntRep x
555 Float2IntOp -> coerceFP2Int x
556 Int2FloatOp -> coerceInt2FP pr x
557 Double2IntOp -> coerceFP2Int x
558 Int2DoubleOp -> coerceInt2FP pr x
560 Double2FloatOp -> coerceFltCode x
561 Float2DoubleOp -> coerceFltCode x
563 other_op -> getRegister (StCall fn CCallConv F64 [x])
565 fn = case other_op of
566 FloatExpOp -> FSLIT("exp")
567 FloatLogOp -> FSLIT("log")
568 FloatSqrtOp -> FSLIT("sqrt")
569 FloatSinOp -> FSLIT("sin")
570 FloatCosOp -> FSLIT("cos")
571 FloatTanOp -> FSLIT("tan")
572 FloatAsinOp -> FSLIT("asin")
573 FloatAcosOp -> FSLIT("acos")
574 FloatAtanOp -> FSLIT("atan")
575 FloatSinhOp -> FSLIT("sinh")
576 FloatCoshOp -> FSLIT("cosh")
577 FloatTanhOp -> FSLIT("tanh")
578 DoubleExpOp -> FSLIT("exp")
579 DoubleLogOp -> FSLIT("log")
580 DoubleSqrtOp -> FSLIT("sqrt")
581 DoubleSinOp -> FSLIT("sin")
582 DoubleCosOp -> FSLIT("cos")
583 DoubleTanOp -> FSLIT("tan")
584 DoubleAsinOp -> FSLIT("asin")
585 DoubleAcosOp -> FSLIT("acos")
586 DoubleAtanOp -> FSLIT("atan")
587 DoubleSinhOp -> FSLIT("sinh")
588 DoubleCoshOp -> FSLIT("cosh")
589 DoubleTanhOp -> FSLIT("tanh")
591 pr = panic "MachCode.getRegister: no primrep needed for Alpha"
593 getRegister (StPrim primop [x, y]) -- dyadic PrimOps
595 CharGtOp -> trivialCode (CMP LTT) y x
596 CharGeOp -> trivialCode (CMP LE) y x
597 CharEqOp -> trivialCode (CMP EQQ) x y
598 CharNeOp -> int_NE_code x y
599 CharLtOp -> trivialCode (CMP LTT) x y
600 CharLeOp -> trivialCode (CMP LE) x y
602 IntGtOp -> trivialCode (CMP LTT) y x
603 IntGeOp -> trivialCode (CMP LE) y x
604 IntEqOp -> trivialCode (CMP EQQ) x y
605 IntNeOp -> int_NE_code x y
606 IntLtOp -> trivialCode (CMP LTT) x y
607 IntLeOp -> trivialCode (CMP LE) x y
609 WordGtOp -> trivialCode (CMP ULT) y x
610 WordGeOp -> trivialCode (CMP ULE) x y
611 WordEqOp -> trivialCode (CMP EQQ) x y
612 WordNeOp -> int_NE_code x y
613 WordLtOp -> trivialCode (CMP ULT) x y
614 WordLeOp -> trivialCode (CMP ULE) x y
616 AddrGtOp -> trivialCode (CMP ULT) y x
617 AddrGeOp -> trivialCode (CMP ULE) y x
618 AddrEqOp -> trivialCode (CMP EQQ) x y
619 AddrNeOp -> int_NE_code x y
620 AddrLtOp -> trivialCode (CMP ULT) x y
621 AddrLeOp -> trivialCode (CMP ULE) x y
623 FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
624 FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
625 FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
626 FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
627 FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
628 FloatLeOp -> cmpF_code (FCMP TF LE) NE x y
630 DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
631 DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
632 DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
633 DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
634 DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
635 DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y
637 IntAddOp -> trivialCode (ADD Q False) x y
638 IntSubOp -> trivialCode (SUB Q False) x y
639 IntMulOp -> trivialCode (MUL Q False) x y
640 IntQuotOp -> trivialCode (DIV Q False) x y
641 IntRemOp -> trivialCode (REM Q False) x y
643 WordAddOp -> trivialCode (ADD Q False) x y
644 WordSubOp -> trivialCode (SUB Q False) x y
645 WordMulOp -> trivialCode (MUL Q False) x y
646 WordQuotOp -> trivialCode (DIV Q True) x y
647 WordRemOp -> trivialCode (REM Q True) x y
649 FloatAddOp -> trivialFCode FloatRep (FADD TF) x y
650 FloatSubOp -> trivialFCode FloatRep (FSUB TF) x y
651 FloatMulOp -> trivialFCode FloatRep (FMUL TF) x y
652 FloatDivOp -> trivialFCode FloatRep (FDIV TF) x y
654 DoubleAddOp -> trivialFCode F64 (FADD TF) x y
655 DoubleSubOp -> trivialFCode F64 (FSUB TF) x y
656 DoubleMulOp -> trivialFCode F64 (FMUL TF) x y
657 DoubleDivOp -> trivialFCode F64 (FDIV TF) x y
659 AddrAddOp -> trivialCode (ADD Q False) x y
660 AddrSubOp -> trivialCode (SUB Q False) x y
661 AddrRemOp -> trivialCode (REM Q True) x y
663 AndOp -> trivialCode AND x y
664 OrOp -> trivialCode OR x y
665 XorOp -> trivialCode XOR x y
666 SllOp -> trivialCode SLL x y
667 SrlOp -> trivialCode SRL x y
669 ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
670 ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
671 ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"
673 FloatPowerOp -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
674 DoublePowerOp -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
676 {- ------------------------------------------------------------
677 Some bizarre special code for getting condition codes into
678 registers. Integer non-equality is a test for equality
679 followed by an XOR with 1. (Integer comparisons always set
680 the result register to 0 or 1.) Floating point comparisons of
681 any kind leave the result in a floating point register, so we
682 need to wrangle an integer register out of things.
684 int_NE_code :: StixTree -> StixTree -> NatM Register
687 = trivialCode (CMP EQQ) x y `thenNat` \ register ->
688 getNewRegNat IntRep `thenNat` \ tmp ->
690 code = registerCode register tmp
691 src = registerName register tmp
692 code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
694 return (Any IntRep code__2)
696 {- ------------------------------------------------------------
697 Comments for int_NE_code also apply to cmpF_code
700 :: (Reg -> Reg -> Reg -> Instr)
702 -> StixTree -> StixTree
705 cmpF_code instr cond x y
706 = trivialFCode pr instr x y `thenNat` \ register ->
707 getNewRegNat F64 `thenNat` \ tmp ->
708 getBlockIdNat `thenNat` \ lbl ->
710 code = registerCode register tmp
711 result = registerName register tmp
713 code__2 dst = code . mkSeqInstrs [
714 OR zeroh (RIImm (ImmInt 1)) dst,
715 BF cond result (ImmCLbl lbl),
716 OR zeroh (RIReg zeroh) dst,
719 return (Any IntRep code__2)
721 pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
722 ------------------------------------------------------------
724 getRegister (CmmLoad pk mem)
725 = getAmode mem `thenNat` \ amode ->
727 code = amodeCode amode
728 src = amodeAddr amode
729 size = primRepToSize pk
730 code__2 dst = code . mkSeqInstr (LD size dst src)
732 return (Any pk code__2)
734 getRegister (StInt i)
737 code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
739 return (Any IntRep code)
742 code dst = mkSeqInstr (LDI Q dst src)
744 return (Any IntRep code)
746 src = ImmInt (fromInteger i)
751 code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
753 return (Any PtrRep code)
756 imm__2 = case imm of Just x -> x
758 #endif /* alpha_TARGET_ARCH */
760 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
764 getRegister (CmmLit (CmmFloat f F32)) = do
765 lbl <- getNewLabelNat
766 let code dst = toOL [
769 CmmStaticLit (CmmFloat f F32)],
770 GLD F32 (ImmAddr (ImmCLbl lbl) 0) dst
773 return (Any F32 code)
776 getRegister (CmmLit (CmmFloat d F64))
778 = let code dst = unitOL (GLDZ dst)
779 in return (Any F64 code)
782 = let code dst = unitOL (GLD1 dst)
783 in return (Any F64 code)
786 lbl <- getNewLabelNat
787 let code dst = toOL [
790 CmmStaticLit (CmmFloat d F64)],
791 GLD F64 (ImmAddr (ImmCLbl lbl) 0) dst
794 return (Any F64 code)
796 #endif /* i386_TARGET_ARCH */
798 #if x86_64_TARGET_ARCH
800 getRegister (CmmLit (CmmFloat 0.0 rep)) = do
801 let code dst = unitOL (XOR rep (OpReg dst) (OpReg dst))
802 -- I don't know why there are xorpd, xorps, and pxor instructions.
803 -- They all appear to do the same thing --SDM
804 return (Any rep code)
806 getRegister (CmmLit (CmmFloat f rep)) = do
807 lbl <- getNewLabelNat
808 let code dst = toOL [
811 CmmStaticLit (CmmFloat f rep)],
812 MOV rep (OpAddr (ImmAddr (ImmCLbl lbl) 0)) (OpReg dst)
813 -- ToDo: should use %rip-relative
816 return (Any rep code)
818 #endif /* x86_64_TARGET_ARCH */
820 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
822 -- catch simple cases of zero- or sign-extended load
823 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad addr _]) = do
824 code <- intLoadCode (MOVZxL I8) addr
825 return (Any I32 code)
827 getRegister (CmmMachOp (MO_S_Conv I8 I32) [CmmLoad addr _]) = do
828 code <- intLoadCode (MOVSxL I8) addr
829 return (Any I32 code)
831 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad addr _]) = do
832 code <- intLoadCode (MOVZxL I16) addr
833 return (Any I32 code)
835 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad addr _]) = do
836 code <- intLoadCode (MOVSxL I16) addr
837 return (Any I32 code)
841 #if x86_64_TARGET_ARCH
843 -- catch simple cases of zero- or sign-extended load
844 getRegister (CmmMachOp (MO_U_Conv I8 I64) [CmmLoad addr _]) = do
845 code <- intLoadCode (MOVZxL I8) addr
846 return (Any I64 code)
848 getRegister (CmmMachOp (MO_S_Conv I8 I64) [CmmLoad addr _]) = do
849 code <- intLoadCode (MOVSxL I8) addr
850 return (Any I64 code)
852 getRegister (CmmMachOp (MO_U_Conv I16 I64) [CmmLoad addr _]) = do
853 code <- intLoadCode (MOVZxL I16) addr
854 return (Any I64 code)
856 getRegister (CmmMachOp (MO_S_Conv I16 I64) [CmmLoad addr _]) = do
857 code <- intLoadCode (MOVSxL I16) addr
858 return (Any I64 code)
860 getRegister (CmmMachOp (MO_U_Conv I32 I64) [CmmLoad addr _]) = do
861 code <- intLoadCode (MOV I32) addr -- 32-bit loads zero-extend
862 return (Any I64 code)
864 getRegister (CmmMachOp (MO_S_Conv I32 I64) [CmmLoad addr _]) = do
865 code <- intLoadCode (MOVSxL I32) addr
866 return (Any I64 code)
870 #if x86_64_TARGET_ARCH
871 getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do
872 x_code <- getAnyReg x
873 lbl <- getNewLabelNat
875 code dst = x_code dst `appOL` toOL [
876 -- This is how gcc does it, so it can't be that bad:
877 LDATA ReadOnlyData16 [
880 CmmStaticLit (CmmInt 0x80000000 I32),
881 CmmStaticLit (CmmInt 0 I32),
882 CmmStaticLit (CmmInt 0 I32),
883 CmmStaticLit (CmmInt 0 I32)
885 XOR F32 (OpAddr (ImmAddr (ImmCLbl lbl) 0)) (OpReg dst)
886 -- xorps, so we need the 128-bit constant
887 -- ToDo: rip-relative
890 return (Any F32 code)
892 getRegister (CmmMachOp (MO_S_Neg F64) [x]) = do
893 x_code <- getAnyReg x
894 lbl <- getNewLabelNat
896 -- This is how gcc does it, so it can't be that bad:
897 code dst = x_code dst `appOL` toOL [
898 LDATA ReadOnlyData16 [
901 CmmStaticLit (CmmInt 0x8000000000000000 I64),
902 CmmStaticLit (CmmInt 0 I64)
904 -- gcc puts an unpck here. Wonder if we need it.
905 XOR F64 (OpAddr (ImmAddr (ImmCLbl lbl) 0)) (OpReg dst)
906 -- xorpd, so we need the 128-bit constant
907 -- ToDo: rip-relative
910 return (Any F64 code)
913 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
915 getRegister (CmmMachOp mop [x]) -- unary MachOps
918 MO_S_Neg F32 -> trivialUFCode F32 (GNEG F32) x
919 MO_S_Neg F64 -> trivialUFCode F64 (GNEG F64) x
922 MO_S_Neg rep -> trivialUCode rep (NEGI rep) x
923 MO_Not rep -> trivialUCode rep (NOT rep) x
926 -- TODO: these are only nops if the arg is not a fixed register that
927 -- can't be byte-addressed.
928 MO_U_Conv I32 I8 -> conversionNop I32 x
929 MO_S_Conv I32 I8 -> conversionNop I32 x
930 MO_U_Conv I16 I8 -> conversionNop I16 x
931 MO_S_Conv I16 I8 -> conversionNop I16 x
932 MO_U_Conv I32 I16 -> conversionNop I32 x
933 MO_S_Conv I32 I16 -> conversionNop I32 x
934 #if x86_64_TARGET_ARCH
935 MO_U_Conv I64 I32 -> conversionNop I64 x
936 MO_S_Conv I64 I32 -> conversionNop I64 x
937 MO_U_Conv I64 I16 -> conversionNop I64 x
938 MO_S_Conv I64 I16 -> conversionNop I64 x
939 MO_U_Conv I64 I8 -> conversionNop I64 x
940 MO_S_Conv I64 I8 -> conversionNop I64 x
943 MO_U_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
944 MO_S_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
947 MO_U_Conv I8 I32 -> integerExtend I8 I32 MOVZxL x
948 MO_U_Conv I16 I32 -> integerExtend I16 I32 MOVZxL x
949 MO_U_Conv I8 I16 -> integerExtend I8 I16 MOVZxL x
951 MO_S_Conv I8 I32 -> integerExtend I8 I32 MOVSxL x
952 MO_S_Conv I16 I32 -> integerExtend I16 I32 MOVSxL x
953 MO_S_Conv I8 I16 -> integerExtend I8 I16 MOVSxL x
955 #if x86_64_TARGET_ARCH
956 MO_U_Conv I8 I64 -> integerExtend I8 I64 MOVZxL x
957 MO_U_Conv I16 I64 -> integerExtend I16 I64 MOVZxL x
958 MO_U_Conv I32 I64 -> integerExtend I32 I64 MOVZxL x
959 MO_S_Conv I8 I64 -> integerExtend I8 I64 MOVSxL x
960 MO_S_Conv I16 I64 -> integerExtend I16 I64 MOVSxL x
961 MO_S_Conv I32 I64 -> integerExtend I32 I64 MOVSxL x
962 -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
963 -- However, we don't want the register allocator to throw it
964 -- away as an unnecessary reg-to-reg move, so we keep it in
965 -- the form of a movzl and print it as a movl later.
969 MO_S_Conv F32 F64 -> conversionNop F64 x
970 MO_S_Conv F64 F32 -> conversionNop F32 x
972 MO_S_Conv F32 F64 -> coerceFP2FP F64 x
973 MO_S_Conv F64 F32 -> coerceFP2FP F32 x
977 | isFloatingRep from -> coerceFP2Int from to x
978 | isFloatingRep to -> coerceInt2FP from to x
980 other -> pprPanic "getRegister" (pprMachOp mop)
982 -- signed or unsigned extension.
983 integerExtend from to instr expr = do
984 (reg,e_code) <- if from == I8 then getByteReg expr
989 instr from (OpReg reg) (OpReg dst)
992 conversionNop new_rep expr
993 = do e_code <- getRegister expr
994 return (swizzleRegisterRep e_code new_rep)
997 getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
998 = ASSERT2(cmmExprRep x /= I8, pprExpr e)
1000 MO_Eq F32 -> condFltReg EQQ x y
1001 MO_Ne F32 -> condFltReg NE x y
1002 MO_S_Gt F32 -> condFltReg GTT x y
1003 MO_S_Ge F32 -> condFltReg GE x y
1004 MO_S_Lt F32 -> condFltReg LTT x y
1005 MO_S_Le F32 -> condFltReg LE x y
1007 MO_Eq F64 -> condFltReg EQQ x y
1008 MO_Ne F64 -> condFltReg NE x y
1009 MO_S_Gt F64 -> condFltReg GTT x y
1010 MO_S_Ge F64 -> condFltReg GE x y
1011 MO_S_Lt F64 -> condFltReg LTT x y
1012 MO_S_Le F64 -> condFltReg LE x y
1014 MO_Eq rep -> condIntReg EQQ x y
1015 MO_Ne rep -> condIntReg NE x y
1017 MO_S_Gt rep -> condIntReg GTT x y
1018 MO_S_Ge rep -> condIntReg GE x y
1019 MO_S_Lt rep -> condIntReg LTT x y
1020 MO_S_Le rep -> condIntReg LE x y
1022 MO_U_Gt rep -> condIntReg GU x y
1023 MO_U_Ge rep -> condIntReg GEU x y
1024 MO_U_Lt rep -> condIntReg LU x y
1025 MO_U_Le rep -> condIntReg LEU x y
1027 #if i386_TARGET_ARCH
1028 MO_Add F32 -> trivialFCode F32 GADD x y
1029 MO_Sub F32 -> trivialFCode F32 GSUB x y
1031 MO_Add F64 -> trivialFCode F64 GADD x y
1032 MO_Sub F64 -> trivialFCode F64 GSUB x y
1034 MO_S_Quot F32 -> trivialFCode F32 GDIV x y
1035 MO_S_Quot F64 -> trivialFCode F64 GDIV x y
1038 #if x86_64_TARGET_ARCH
1039 MO_Add F32 -> trivialFCode F32 ADD x y
1040 MO_Sub F32 -> trivialFCode F32 SUB x y
1042 MO_Add F64 -> trivialFCode F64 ADD x y
1043 MO_Sub F64 -> trivialFCode F64 SUB x y
1045 MO_S_Quot F32 -> trivialFCode F32 FDIV x y
1046 MO_S_Quot F64 -> trivialFCode F64 FDIV x y
1049 MO_Add rep -> add_code rep x y
1050 MO_Sub rep -> sub_code rep x y
1052 MO_S_Quot rep -> div_code rep True True x y
1053 MO_S_Rem rep -> div_code rep True False x y
1054 MO_U_Quot rep -> div_code rep False True x y
1055 MO_U_Rem rep -> div_code rep False False x y
1057 #if i386_TARGET_ARCH
1058 MO_Mul F32 -> trivialFCode F32 GMUL x y
1059 MO_Mul F64 -> trivialFCode F64 GMUL x y
1062 #if x86_64_TARGET_ARCH
1063 MO_Mul F32 -> trivialFCode F32 MUL x y
1064 MO_Mul F64 -> trivialFCode F64 MUL x y
1067 MO_Mul rep -> let op = IMUL rep in
1068 trivialCode rep op (Just op) x y
1070 MO_S_MulMayOflo rep -> imulMayOflo rep x y
1072 MO_And rep -> let op = AND rep in
1073 trivialCode rep op (Just op) x y
1074 MO_Or rep -> let op = OR rep in
1075 trivialCode rep op (Just op) x y
1076 MO_Xor rep -> let op = XOR rep in
1077 trivialCode rep op (Just op) x y
1079 {- Shift ops on x86s have constraints on their source, it
1080 either has to be Imm, CL or 1
1081 => trivialCode is not restrictive enough (sigh.)
1083 MO_Shl rep -> shift_code rep (SHL rep) x y {-False-}
1084 MO_U_Shr rep -> shift_code rep (SHR rep) x y {-False-}
1085 MO_S_Shr rep -> shift_code rep (SAR rep) x y {-False-}
1087 other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
1089 --------------------
1090 imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1091 imulMayOflo rep a b = do
1092 (a_reg, a_code) <- getNonClobberedReg a
1093 b_code <- getAnyReg b
1095 shift_amt = case rep of
1098 _ -> panic "shift_amt"
1100 code = a_code `appOL` b_code eax `appOL`
1102 IMUL2 rep (OpReg a_reg), -- result in %edx:%eax
1103 SAR rep (OpImm (ImmInt shift_amt)) (OpReg eax),
1104 -- sign extend lower part
1105 SUB rep (OpReg edx) (OpReg eax)
1106 -- compare against upper
1107 -- eax==0 if high part == sign extended low part
1110 return (Fixed rep eax code)
1112 --------------------
1113 shift_code :: MachRep
1114 -> (Operand -> Operand -> Instr)
1119 {- Case1: shift length as immediate -}
1120 shift_code rep instr x y@(CmmLit lit) = do
1121 x_code <- getAnyReg x
1124 = x_code dst `snocOL`
1125 instr (OpImm (litToImm lit)) (OpReg dst)
1127 return (Any rep code)
1129 {- Case2: shift length is complex (non-immediate) -}
1130 shift_code rep instr x y{-amount-} = do
1131 (x_reg, x_code) <- getNonClobberedReg x
1132 y_code <- getAnyReg y
1134 code = x_code `appOL`
1136 instr (OpReg ecx) (OpReg x_reg)
1138 return (Fixed rep x_reg code)
1140 --------------------
1141 add_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1142 add_code rep x (CmmLit (CmmInt y _)) = add_int rep x y
1143 add_code rep x y = trivialCode rep (ADD rep) (Just (ADD rep)) x y
1145 --------------------
1146 sub_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1147 sub_code rep x (CmmLit (CmmInt y _)) = add_int rep x (-y)
1148 sub_code rep x y = trivialCode rep (SUB rep) Nothing x y
1150 -- our three-operand add instruction:
1151 add_int rep x y = do
1152 (x_reg, x_code) <- getSomeReg x
1154 imm = ImmInt (fromInteger y)
1158 (OpAddr (AddrBaseIndex (Just x_reg) Nothing imm))
1161 return (Any rep code)
1163 ----------------------
1164 div_code rep signed quotient x y = do
1165 (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
1166 x_code <- getAnyReg x
1168 widen | signed = CLTD rep
1169 | otherwise = XOR rep (OpReg edx) (OpReg edx)
1171 instr | signed = IDIV
1174 code = y_code `appOL`
1176 toOL [widen, instr rep y_op]
1178 result | quotient = eax
1182 return (Fixed rep result code)
1185 getRegister (CmmLoad mem pk)
1188 Amode src mem_code <- getAmode mem
1190 code dst = mem_code `snocOL`
1191 IF_ARCH_i386(GLD pk src dst,
1192 MOV pk (OpAddr src) (OpReg dst))
1194 return (Any pk code)
1196 #if i386_TARGET_ARCH
1197 getRegister (CmmLoad mem pk)
1200 code <- intLoadCode (instr pk) mem
1201 return (Any pk code)
1203 instr I8 = MOVZxL pk
1206 -- we always zero-extend 8-bit loads, if we
1207 -- can't think of anything better. This is because
1208 -- we can't guarantee access to an 8-bit variant of every register
1209 -- (esi and edi don't have 8-bit variants), so to make things
1210 -- simpler we do our 8-bit arithmetic with full 32-bit registers.
1213 #if x86_64_TARGET_ARCH
1214 -- Simpler memory load code on x86_64
1215 getRegister (CmmLoad mem pk)
1217 code <- intLoadCode (MOV pk) mem
1218 return (Any pk code)
1221 getRegister (CmmLit (CmmInt 0 rep))
1223 -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
1224 adj_rep = case rep of I64 -> I32; _ -> rep
1225 rep1 = IF_ARCH_i386( rep, adj_rep )
1227 = unitOL (XOR rep1 (OpReg dst) (OpReg dst))
1229 return (Any rep code)
1231 #if x86_64_TARGET_ARCH
1232 -- optimisation for loading small literals on x86_64: take advantage
1233 -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
1234 -- instruction forms are shorter.
1235 getRegister (CmmLit lit)
1236 | I64 <- cmmLitRep lit, not (isBigLit lit)
1239 code dst = unitOL (MOV I32 (OpImm imm) (OpReg dst))
1241 return (Any I64 code)
1243 isBigLit (CmmInt i I64) = i < 0 || i > 0xffffffff
1245 -- note1: not the same as is64BitLit, because that checks for
1246 -- signed literals that fit in 32 bits, but we want unsigned
1248 -- note2: all labels are small, because we're assuming the
1249 -- small memory model (see gcc docs, -mcmodel=small).
1252 getRegister (CmmLit lit)
1256 code dst = unitOL (MOV rep (OpImm imm) (OpReg dst))
1258 return (Any rep code)
1260 getRegister other = pprPanic "getRegister(x86)" (ppr other)
1263 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
1264 -> NatM (Reg -> InstrBlock)
1265 intLoadCode instr mem = do
1266 Amode src mem_code <- getAmode mem
1267 return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
1269 -- Compute an expression into *any* register, adding the appropriate
1270 -- move instruction if necessary.
1271 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
1273 r <- getRegister expr
1276 anyReg :: Register -> NatM (Reg -> InstrBlock)
1277 anyReg (Any _ code) = return code
1278 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
1280 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
1281 -- Fixed registers might not be byte-addressable, so we make sure we've
1282 -- got a temporary, inserting an extra reg copy if necessary.
1283 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
1284 #if x86_64_TARGET_ARCH
1285 getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
1287 getByteReg expr = do
1288 r <- getRegister expr
1291 tmp <- getNewRegNat rep
1292 return (tmp, code tmp)
1294 | isVirtualReg reg -> return (reg,code)
1296 tmp <- getNewRegNat rep
1297 return (tmp, code `snocOL` reg2reg rep reg tmp)
1298 -- ToDo: could optimise slightly by checking for byte-addressable
1299 -- real registers, but that will happen very rarely if at all.
1302 -- Another variant: this time we want the result in a register that cannot
1303 -- be modified by code to evaluate an arbitrary expression.
1304 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1305 getNonClobberedReg expr = do
1306 r <- getRegister expr
1309 tmp <- getNewRegNat rep
1310 return (tmp, code tmp)
1312 -- only free regs can be clobbered
1313 | RealReg rr <- reg, isFastTrue (freeReg rr) -> do
1314 tmp <- getNewRegNat rep
1315 return (tmp, code `snocOL` reg2reg rep reg tmp)
1319 reg2reg :: MachRep -> Reg -> Reg -> Instr
1321 #if i386_TARGET_ARCH
1322 | isFloatingRep rep = GMOV src dst
1324 | otherwise = MOV rep (OpReg src) (OpReg dst)
1326 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1328 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1330 #if sparc_TARGET_ARCH
1332 getRegister (StFloat d)
1333 = getBlockIdNat `thenNat` \ lbl ->
1334 getNewRegNat PtrRep `thenNat` \ tmp ->
1335 let code dst = toOL [
1336 SEGMENT DataSegment,
1338 DATA F [ImmFloat d],
1339 SEGMENT TextSegment,
1340 SETHI (HI (ImmCLbl lbl)) tmp,
1341 LD F (AddrRegImm tmp (LO (ImmCLbl lbl))) dst]
1343 return (Any F32 code)
1345 getRegister (StDouble d)
1346 = getBlockIdNat `thenNat` \ lbl ->
1347 getNewRegNat PtrRep `thenNat` \ tmp ->
1348 let code dst = toOL [
1349 SEGMENT DataSegment,
1351 DATA DF [ImmDouble d],
1352 SEGMENT TextSegment,
1353 SETHI (HI (ImmCLbl lbl)) tmp,
1354 LD DF (AddrRegImm tmp (LO (ImmCLbl lbl))) dst]
1356 return (Any F64 code)
1359 getRegister (CmmMachOp mop [x]) -- unary PrimOps
1361 MO_NatS_Neg -> trivialUCode (SUB False False g0) x
1362 MO_Nat_Not -> trivialUCode (XNOR False g0) x
1363 MO_32U_to_8U -> trivialCode (AND False) x (StInt 255)
1365 MO_F32_Neg -> trivialUFCode F32 (FNEG F) x
1366 MO_F64_Neg -> trivialUFCode F64 (FNEG DF) x
1368 MO_F64_to_Flt -> coerceDbl2Flt x
1369 MO_F32_to_Dbl -> coerceFlt2Dbl x
1371 MO_F32_to_NatS -> coerceFP2Int F32 x
1372 MO_NatS_to_Flt -> coerceInt2FP F32 x
1373 MO_F64_to_NatS -> coerceFP2Int F64 x
1374 MO_NatS_to_Dbl -> coerceInt2FP F64 x
1376 -- Conversions which are a nop on sparc
1377 MO_32U_to_NatS -> conversionNop IntRep x
1378 MO_32S_to_NatS -> conversionNop IntRep x
1379 MO_NatS_to_32U -> conversionNop WordRep x
1380 MO_32U_to_NatU -> conversionNop WordRep x
1382 MO_NatU_to_NatS -> conversionNop IntRep x
1383 MO_NatS_to_NatU -> conversionNop WordRep x
1384 MO_NatP_to_NatU -> conversionNop WordRep x
1385 MO_NatU_to_NatP -> conversionNop PtrRep x
1386 MO_NatS_to_NatP -> conversionNop PtrRep x
1387 MO_NatP_to_NatS -> conversionNop IntRep x
1389 -- sign-extending widenings
1390 MO_8U_to_32U -> integerExtend False 24 x
1391 MO_8U_to_NatU -> integerExtend False 24 x
1392 MO_8S_to_NatS -> integerExtend True 24 x
1393 MO_16U_to_NatU -> integerExtend False 16 x
1394 MO_16S_to_NatS -> integerExtend True 16 x
1397 let fixed_x = if is_float_op -- promote to double
1398 then CmmMachOp MO_F32_to_Dbl [x]
1401 getRegister (StCall (Left fn) CCallConv F64 [fixed_x])
1403 integerExtend signed nBits x
1405 CmmMachOp (if signed then MO_Nat_Sar else MO_Nat_Shr)
1406 [CmmMachOp MO_Nat_Shl [x, StInt nBits], StInt nBits]
1408 conversionNop new_rep expr
1409 = getRegister expr `thenNat` \ e_code ->
1410 return (swizzleRegisterRep e_code new_rep)
1414 MO_F32_Exp -> (True, FSLIT("exp"))
1415 MO_F32_Log -> (True, FSLIT("log"))
1416 MO_F32_Sqrt -> (True, FSLIT("sqrt"))
1418 MO_F32_Sin -> (True, FSLIT("sin"))
1419 MO_F32_Cos -> (True, FSLIT("cos"))
1420 MO_F32_Tan -> (True, FSLIT("tan"))
1422 MO_F32_Asin -> (True, FSLIT("asin"))
1423 MO_F32_Acos -> (True, FSLIT("acos"))
1424 MO_F32_Atan -> (True, FSLIT("atan"))
1426 MO_F32_Sinh -> (True, FSLIT("sinh"))
1427 MO_F32_Cosh -> (True, FSLIT("cosh"))
1428 MO_F32_Tanh -> (True, FSLIT("tanh"))
1430 MO_F64_Exp -> (False, FSLIT("exp"))
1431 MO_F64_Log -> (False, FSLIT("log"))
1432 MO_F64_Sqrt -> (False, FSLIT("sqrt"))
1434 MO_F64_Sin -> (False, FSLIT("sin"))
1435 MO_F64_Cos -> (False, FSLIT("cos"))
1436 MO_F64_Tan -> (False, FSLIT("tan"))
1438 MO_F64_Asin -> (False, FSLIT("asin"))
1439 MO_F64_Acos -> (False, FSLIT("acos"))
1440 MO_F64_Atan -> (False, FSLIT("atan"))
1442 MO_F64_Sinh -> (False, FSLIT("sinh"))
1443 MO_F64_Cosh -> (False, FSLIT("cosh"))
1444 MO_F64_Tanh -> (False, FSLIT("tanh"))
1446 other -> pprPanic "getRegister(sparc) - binary CmmMachOp (2)"
1450 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1452 MO_32U_Gt -> condIntReg GTT x y
1453 MO_32U_Ge -> condIntReg GE x y
1454 MO_32U_Eq -> condIntReg EQQ x y
1455 MO_32U_Ne -> condIntReg NE x y
1456 MO_32U_Lt -> condIntReg LTT x y
1457 MO_32U_Le -> condIntReg LE x y
1459 MO_Nat_Eq -> condIntReg EQQ x y
1460 MO_Nat_Ne -> condIntReg NE x y
1462 MO_NatS_Gt -> condIntReg GTT x y
1463 MO_NatS_Ge -> condIntReg GE x y
1464 MO_NatS_Lt -> condIntReg LTT x y
1465 MO_NatS_Le -> condIntReg LE x y
1467 MO_NatU_Gt -> condIntReg GU x y
1468 MO_NatU_Ge -> condIntReg GEU x y
1469 MO_NatU_Lt -> condIntReg LU x y
1470 MO_NatU_Le -> condIntReg LEU x y
1472 MO_F32_Gt -> condFltReg GTT x y
1473 MO_F32_Ge -> condFltReg GE x y
1474 MO_F32_Eq -> condFltReg EQQ x y
1475 MO_F32_Ne -> condFltReg NE x y
1476 MO_F32_Lt -> condFltReg LTT x y
1477 MO_F32_Le -> condFltReg LE x y
1479 MO_F64_Gt -> condFltReg GTT x y
1480 MO_F64_Ge -> condFltReg GE x y
1481 MO_F64_Eq -> condFltReg EQQ x y
1482 MO_F64_Ne -> condFltReg NE x y
1483 MO_F64_Lt -> condFltReg LTT x y
1484 MO_F64_Le -> condFltReg LE x y
1486 MO_Nat_Add -> trivialCode (ADD False False) x y
1487 MO_Nat_Sub -> trivialCode (SUB False False) x y
1489 MO_NatS_Mul -> trivialCode (SMUL False) x y
1490 MO_NatU_Mul -> trivialCode (UMUL False) x y
1491 MO_NatS_MulMayOflo -> imulMayOflo x y
1493 -- ToDo: teach about V8+ SPARC div instructions
1494 MO_NatS_Quot -> idiv FSLIT(".div") x y
1495 MO_NatS_Rem -> idiv FSLIT(".rem") x y
1496 MO_NatU_Quot -> idiv FSLIT(".udiv") x y
1497 MO_NatU_Rem -> idiv FSLIT(".urem") x y
1499 MO_F32_Add -> trivialFCode F32 FADD x y
1500 MO_F32_Sub -> trivialFCode F32 FSUB x y
1501 MO_F32_Mul -> trivialFCode F32 FMUL x y
1502 MO_F32_Div -> trivialFCode F32 FDIV x y
1504 MO_F64_Add -> trivialFCode F64 FADD x y
1505 MO_F64_Sub -> trivialFCode F64 FSUB x y
1506 MO_F64_Mul -> trivialFCode F64 FMUL x y
1507 MO_F64_Div -> trivialFCode F64 FDIV x y
1509 MO_Nat_And -> trivialCode (AND False) x y
1510 MO_Nat_Or -> trivialCode (OR False) x y
1511 MO_Nat_Xor -> trivialCode (XOR False) x y
1513 MO_Nat_Shl -> trivialCode SLL x y
1514 MO_Nat_Shr -> trivialCode SRL x y
1515 MO_Nat_Sar -> trivialCode SRA x y
1517 MO_F32_Pwr -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1518 [promote x, promote y])
1519 where promote x = CmmMachOp MO_F32_to_Dbl [x]
1520 MO_F64_Pwr -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1523 other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
1525 idiv fn x y = getRegister (StCall (Left fn) CCallConv IntRep [x, y])
1527 --------------------
1528 imulMayOflo :: CmmExpr -> CmmExpr -> NatM Register
1530 = getNewRegNat IntRep `thenNat` \ t1 ->
1531 getNewRegNat IntRep `thenNat` \ t2 ->
1532 getNewRegNat IntRep `thenNat` \ res_lo ->
1533 getNewRegNat IntRep `thenNat` \ res_hi ->
1534 getRegister a1 `thenNat` \ reg1 ->
1535 getRegister a2 `thenNat` \ reg2 ->
1536 let code1 = registerCode reg1 t1
1537 code2 = registerCode reg2 t2
1538 src1 = registerName reg1 t1
1539 src2 = registerName reg2 t2
1540 code dst = code1 `appOL` code2 `appOL`
1542 SMUL False src1 (RIReg src2) res_lo,
1544 SRA res_lo (RIImm (ImmInt 31)) res_lo,
1545 SUB False False res_lo (RIReg res_hi) dst
1548 return (Any IntRep code)
1550 getRegister (CmmLoad pk mem) = do
1551 Amode src code <- getAmode mem
1553 size = primRepToSize pk
1554 code__2 dst = code `snocOL` LD size src dst
1556 return (Any pk code__2)
1558 getRegister (StInt i)
1561 src = ImmInt (fromInteger i)
1562 code dst = unitOL (OR False g0 (RIImm src) dst)
1564 return (Any IntRep code)
1570 SETHI (HI imm__2) dst,
1571 OR False dst (RIImm (LO imm__2)) dst]
1573 return (Any PtrRep code)
1575 = ncgPrimopMoan "getRegister(sparc)" (pprCmmExpr leaf)
1578 imm__2 = case imm of Just x -> x
1580 #endif /* sparc_TARGET_ARCH */
1582 #if powerpc_TARGET_ARCH
1583 getRegister (CmmLoad mem pk)
1586 Amode addr addr_code <- getAmode mem
1587 let code dst = ASSERT((regClass dst == RcDouble) == isFloatingRep pk)
1588 addr_code `snocOL` LD pk dst addr
1589 return (Any pk code)
1591 -- catch simple cases of zero- or sign-extended load
1592 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad mem _]) = do
1593 Amode addr addr_code <- getAmode mem
1594 return (Any I32 (\dst -> addr_code `snocOL` LD I8 dst addr))
1596 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
1598 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad mem _]) = do
1599 Amode addr addr_code <- getAmode mem
1600 return (Any I32 (\dst -> addr_code `snocOL` LD I16 dst addr))
1602 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad mem _]) = do
1603 Amode addr addr_code <- getAmode mem
1604 return (Any I32 (\dst -> addr_code `snocOL` LA I16 dst addr))
1606 getRegister (CmmMachOp mop [x]) -- unary MachOps
1608 MO_Not rep -> trivialUCode rep NOT x
1610 MO_S_Conv F64 F32 -> trivialUCode F32 FRSP x
1611 MO_S_Conv F32 F64 -> conversionNop F64 x
1614 | from == to -> conversionNop to x
1615 | isFloatingRep from -> coerceFP2Int from to x
1616 | isFloatingRep to -> coerceInt2FP from to x
1618 -- narrowing is a nop: we treat the high bits as undefined
1619 MO_S_Conv I32 to -> conversionNop to x
1620 MO_S_Conv I16 I8 -> conversionNop I8 x
1621 MO_S_Conv I8 to -> trivialUCode to (EXTS I8) x
1622 MO_S_Conv I16 to -> trivialUCode to (EXTS I16) x
1625 | from == to -> conversionNop to x
1626 -- narrowing is a nop: we treat the high bits as undefined
1627 MO_U_Conv I32 to -> conversionNop to x
1628 MO_U_Conv I16 I8 -> conversionNop I8 x
1629 MO_U_Conv I8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 I32))
1630 MO_U_Conv I16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 I32))
1632 MO_S_Neg F32 -> trivialUCode F32 FNEG x
1633 MO_S_Neg F64 -> trivialUCode F64 FNEG x
1634 MO_S_Neg rep -> trivialUCode rep NEG x
1637 conversionNop new_rep expr
1638 = do e_code <- getRegister expr
1639 return (swizzleRegisterRep e_code new_rep)
1641 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1643 MO_Eq F32 -> condFltReg EQQ x y
1644 MO_Ne F32 -> condFltReg NE x y
1646 MO_S_Gt F32 -> condFltReg GTT x y
1647 MO_S_Ge F32 -> condFltReg GE x y
1648 MO_S_Lt F32 -> condFltReg LTT x y
1649 MO_S_Le F32 -> condFltReg LE x y
1651 MO_Eq F64 -> condFltReg EQQ x y
1652 MO_Ne F64 -> condFltReg NE x y
1654 MO_S_Gt F64 -> condFltReg GTT x y
1655 MO_S_Ge F64 -> condFltReg GE x y
1656 MO_S_Lt F64 -> condFltReg LTT x y
1657 MO_S_Le F64 -> condFltReg LE x y
1659 MO_Eq rep -> condIntReg EQQ (extendUExpr rep x) (extendUExpr rep y)
1660 MO_Ne rep -> condIntReg NE (extendUExpr rep x) (extendUExpr rep y)
1662 MO_S_Gt rep -> condIntReg GTT (extendSExpr rep x) (extendSExpr rep y)
1663 MO_S_Ge rep -> condIntReg GE (extendSExpr rep x) (extendSExpr rep y)
1664 MO_S_Lt rep -> condIntReg LTT (extendSExpr rep x) (extendSExpr rep y)
1665 MO_S_Le rep -> condIntReg LE (extendSExpr rep x) (extendSExpr rep y)
1667 MO_U_Gt rep -> condIntReg GU (extendUExpr rep x) (extendUExpr rep y)
1668 MO_U_Ge rep -> condIntReg GEU (extendUExpr rep x) (extendUExpr rep y)
1669 MO_U_Lt rep -> condIntReg LU (extendUExpr rep x) (extendUExpr rep y)
1670 MO_U_Le rep -> condIntReg LEU (extendUExpr rep x) (extendUExpr rep y)
1672 MO_Add F32 -> trivialCodeNoImm F32 (FADD F32) x y
1673 MO_Sub F32 -> trivialCodeNoImm F32 (FSUB F32) x y
1674 MO_Mul F32 -> trivialCodeNoImm F32 (FMUL F32) x y
1675 MO_S_Quot F32 -> trivialCodeNoImm F32 (FDIV F32) x y
1677 MO_Add F64 -> trivialCodeNoImm F64 (FADD F64) x y
1678 MO_Sub F64 -> trivialCodeNoImm F64 (FSUB F64) x y
1679 MO_Mul F64 -> trivialCodeNoImm F64 (FMUL F64) x y
1680 MO_S_Quot F64 -> trivialCodeNoImm F64 (FDIV F64) x y
1682 -- optimize addition with 32-bit immediate
1686 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate I32 True (-imm)
1687 -> trivialCode I32 True ADD x (CmmLit $ CmmInt imm immrep)
1690 (src, srcCode) <- getSomeReg x
1691 let imm = litToImm lit
1692 code dst = srcCode `appOL` toOL [
1693 ADDIS dst src (HA imm),
1694 ADD dst dst (RIImm (LO imm))
1696 return (Any I32 code)
1697 _ -> trivialCode I32 True ADD x y
1699 MO_Add rep -> trivialCode rep True ADD x y
1701 case y of -- subfi ('substract from' with immediate) doesn't exist
1702 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
1703 -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
1704 _ -> trivialCodeNoImm rep SUBF y x
1706 MO_Mul rep -> trivialCode rep True MULLW x y
1708 MO_S_MulMayOflo I32 -> trivialCodeNoImm I32 MULLW_MayOflo x y
1710 MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= I32): not implemented"
1711 MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
1713 MO_S_Quot rep -> trivialCodeNoImm rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1714 MO_U_Quot rep -> trivialCodeNoImm rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1716 MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1717 MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1719 MO_And rep -> trivialCode rep False AND x y
1720 MO_Or rep -> trivialCode rep False OR x y
1721 MO_Xor rep -> trivialCode rep False XOR x y
1723 MO_Shl rep -> trivialCode rep False SLW x y
1724 MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
1725 MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
1727 getRegister (CmmLit (CmmInt i rep))
1728 | Just imm <- makeImmediate rep True i
1730 code dst = unitOL (LI dst imm)
1732 return (Any rep code)
1734 getRegister (CmmLit (CmmFloat f frep)) = do
1735 lbl <- getNewLabelNat
1736 dynRef <- cmmMakeDynamicReference addImportNat False lbl
1737 Amode addr addr_code <- getAmode dynRef
1739 LDATA ReadOnlyData [CmmDataLabel lbl,
1740 CmmStaticLit (CmmFloat f frep)]
1741 `consOL` (addr_code `snocOL` LD frep dst addr)
1742 return (Any frep code)
1744 getRegister (CmmLit lit)
1745 = let rep = cmmLitRep lit
1749 OR dst dst (RIImm (LO imm))
1751 in return (Any rep code)
1753 getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
1755 -- extend?Rep: wrap integer expression of type rep
1756 -- in a conversion to I32
1757 extendSExpr I32 x = x
1758 extendSExpr rep x = CmmMachOp (MO_S_Conv rep I32) [x]
1759 extendUExpr I32 x = x
1760 extendUExpr rep x = CmmMachOp (MO_U_Conv rep I32) [x]
1762 #endif /* powerpc_TARGET_ARCH */
1765 -- -----------------------------------------------------------------------------
1766 -- The 'Amode' type: Memory addressing modes passed up the tree.
1768 data Amode = Amode AddrMode InstrBlock
1771 Now, given a tree (the argument to an CmmLoad) that references memory,
1772 produce a suitable addressing mode.
1774 A Rule of the Game (tm) for Amodes: use of the addr bit must
1775 immediately follow use of the code part, since the code part puts
1776 values in registers which the addr then refers to. So you can't put
1777 anything in between, lest it overwrite some of those registers. If
1778 you need to do some other computation between the code part and use of
1779 the addr bit, first store the effective address from the amode in a
1780 temporary, then do the other computation, and then use the temporary:
1784 ... other computation ...
1788 getAmode :: CmmExpr -> NatM Amode
1789 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
1791 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1793 #if alpha_TARGET_ARCH
1795 getAmode (StPrim IntSubOp [x, StInt i])
1796 = getNewRegNat PtrRep `thenNat` \ tmp ->
1797 getRegister x `thenNat` \ register ->
1799 code = registerCode register tmp
1800 reg = registerName register tmp
1801 off = ImmInt (-(fromInteger i))
1803 return (Amode (AddrRegImm reg off) code)
1805 getAmode (StPrim IntAddOp [x, StInt i])
1806 = getNewRegNat PtrRep `thenNat` \ tmp ->
1807 getRegister x `thenNat` \ register ->
1809 code = registerCode register tmp
1810 reg = registerName register tmp
1811 off = ImmInt (fromInteger i)
1813 return (Amode (AddrRegImm reg off) code)
1817 = return (Amode (AddrImm imm__2) id)
1820 imm__2 = case imm of Just x -> x
1823 = getNewRegNat PtrRep `thenNat` \ tmp ->
1824 getRegister other `thenNat` \ register ->
1826 code = registerCode register tmp
1827 reg = registerName register tmp
1829 return (Amode (AddrReg reg) code)
1831 #endif /* alpha_TARGET_ARCH */
1833 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1835 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1837 -- This is all just ridiculous, since it carefully undoes
1838 -- what mangleIndexTree has just done.
1839 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
1840 | not (is64BitLit lit)
1841 -- ASSERT(rep == I32)???
1842 = do (x_reg, x_code) <- getSomeReg x
1843 let off = ImmInt (-(fromInteger i))
1844 return (Amode (AddrBaseIndex (Just x_reg) Nothing off) x_code)
1846 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
1847 | not (is64BitLit lit)
1848 -- ASSERT(rep == I32)???
1849 = do (x_reg, x_code) <- getSomeReg x
1850 let off = ImmInt (fromInteger i)
1851 return (Amode (AddrBaseIndex (Just x_reg) Nothing off) x_code)
1853 -- Turn (lit1 << n + lit2) into (lit2 + lit1 << n) so it will be
1854 -- recognised by the next rule.
1855 getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1857 = getAmode (CmmMachOp (MO_Add rep) [b,a])
1859 getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
1860 [y, CmmLit (CmmInt shift _)]])
1861 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1862 = do (x_reg, x_code) <- getNonClobberedReg x
1863 -- x must be in a temp, because it has to stay live over y_code
1864 -- we could compre x_reg and y_reg and do something better here...
1865 (y_reg, y_code) <- getSomeReg y
1867 code = x_code `appOL` y_code
1868 base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
1869 return (Amode (AddrBaseIndex (Just x_reg) (Just (y_reg,base)) (ImmInt 0))
1872 getAmode (CmmLit lit) | not (is64BitLit lit)
1873 = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1876 (reg,code) <- getSomeReg expr
1877 return (Amode (AddrBaseIndex (Just reg) Nothing (ImmInt 0)) code)
1879 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1881 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1883 #if sparc_TARGET_ARCH
1885 getAmode (CmmMachOp MO_Nat_Sub [x, StInt i])
1887 = getNewRegNat PtrRep `thenNat` \ tmp ->
1888 getRegister x `thenNat` \ register ->
1890 code = registerCode register tmp
1891 reg = registerName register tmp
1892 off = ImmInt (-(fromInteger i))
1894 return (Amode (AddrRegImm reg off) code)
1897 getAmode (CmmMachOp MO_Nat_Add [x, StInt i])
1899 = getNewRegNat PtrRep `thenNat` \ tmp ->
1900 getRegister x `thenNat` \ register ->
1902 code = registerCode register tmp
1903 reg = registerName register tmp
1904 off = ImmInt (fromInteger i)
1906 return (Amode (AddrRegImm reg off) code)
1908 getAmode (CmmMachOp MO_Nat_Add [x, y])
1909 = getNewRegNat PtrRep `thenNat` \ tmp1 ->
1910 getNewRegNat IntRep `thenNat` \ tmp2 ->
1911 getRegister x `thenNat` \ register1 ->
1912 getRegister y `thenNat` \ register2 ->
1914 code1 = registerCode register1 tmp1
1915 reg1 = registerName register1 tmp1
1916 code2 = registerCode register2 tmp2
1917 reg2 = registerName register2 tmp2
1918 code__2 = code1 `appOL` code2
1920 return (Amode (AddrRegReg reg1 reg2) code__2)
1924 = getNewRegNat PtrRep `thenNat` \ tmp ->
1926 code = unitOL (SETHI (HI imm__2) tmp)
1928 return (Amode (AddrRegImm tmp (LO imm__2)) code)
1931 imm__2 = case imm of Just x -> x
1934 = getNewRegNat PtrRep `thenNat` \ tmp ->
1935 getRegister other `thenNat` \ register ->
1937 code = registerCode register tmp
1938 reg = registerName register tmp
1941 return (Amode (AddrRegImm reg off) code)
1943 #endif /* sparc_TARGET_ARCH */
1945 #ifdef powerpc_TARGET_ARCH
1946 getAmode (CmmMachOp (MO_Sub I32) [x, CmmLit (CmmInt i _)])
1947 | Just off <- makeImmediate I32 True (-i)
1949 (reg, code) <- getSomeReg x
1950 return (Amode (AddrRegImm reg off) code)
1953 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit (CmmInt i _)])
1954 | Just off <- makeImmediate I32 True i
1956 (reg, code) <- getSomeReg x
1957 return (Amode (AddrRegImm reg off) code)
1959 -- optimize addition with 32-bit immediate
1961 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit lit])
1963 tmp <- getNewRegNat I32
1964 (src, srcCode) <- getSomeReg x
1965 let imm = litToImm lit
1966 code = srcCode `snocOL` ADDIS tmp src (HA imm)
1967 return (Amode (AddrRegImm tmp (LO imm)) code)
1969 getAmode (CmmLit lit)
1971 tmp <- getNewRegNat I32
1972 let imm = litToImm lit
1973 code = unitOL (LIS tmp (HA imm))
1974 return (Amode (AddrRegImm tmp (LO imm)) code)
1976 getAmode (CmmMachOp (MO_Add I32) [x, y])
1978 (regX, codeX) <- getSomeReg x
1979 (regY, codeY) <- getSomeReg y
1980 return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
1984 (reg, code) <- getSomeReg other
1987 return (Amode (AddrRegImm reg off) code)
1988 #endif /* powerpc_TARGET_ARCH */
1990 -- -----------------------------------------------------------------------------
1991 -- getOperand: sometimes any operand will do.
1993 -- getNonClobberedOperand: the value of the operand will remain valid across
1994 -- the computation of an arbitrary expression, unless the expression
1995 -- is computed directly into a register which the operand refers to
1996 -- (see trivialCode where this function is used for an example).
1998 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2000 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2001 getNonClobberedOperand (CmmLit lit)
2002 | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) =
2003 return (OpImm (litToImm lit), nilOL)
2004 getNonClobberedOperand (CmmLoad mem pk)
2005 | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2006 Amode src mem_code <- getAmode mem
2008 if (amodeCouldBeClobbered src)
2010 tmp <- getNewRegNat wordRep
2011 return (AddrBaseIndex (Just tmp) Nothing (ImmInt 0),
2012 unitOL (LEA I32 (OpAddr src) (OpReg tmp)))
2015 return (OpAddr src', save_code `appOL` mem_code)
2016 getNonClobberedOperand e = do
2017 (reg, code) <- getNonClobberedReg e
2018 return (OpReg reg, code)
2020 amodeCouldBeClobbered :: AddrMode -> Bool
2021 amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
2023 regClobbered (RealReg rr) = isFastTrue (freeReg rr)
2024 regClobbered _ = False
2026 -- getOperand: the operand is not required to remain valid across the
2027 -- computation of an arbitrary expression.
2028 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2029 getOperand (CmmLit lit)
2030 | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) =
2031 return (OpImm (litToImm lit), nilOL)
2032 getOperand (CmmLoad mem pk)
2033 | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2034 Amode src mem_code <- getAmode mem
2035 return (OpAddr src, mem_code)
2037 (reg, code) <- getNonClobberedReg e
2038 return (OpReg reg, code)
2040 isOperand :: CmmExpr -> Bool
2041 isOperand (CmmLoad _ _) = True
2042 isOperand (CmmLit lit) = not (is64BitLit lit) &&
2043 not (isFloatingRep (cmmLitRep lit))
2046 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
2047 getRegOrMem (CmmLoad mem pk)
2048 | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2049 Amode src mem_code <- getAmode mem
2050 return (OpAddr src, mem_code)
2052 (reg, code) <- getNonClobberedReg e
2053 return (OpReg reg, code)
2055 #if x86_64_TARGET_ARCH
2056 is64BitLit (CmmInt i I64) = i > 0x7fffffff || i < -0x80000000
2057 -- assume that labels are in the range 0-2^31-1: this assumes the
2058 -- small memory model (see gcc docs, -mcmodel=small).
2060 is64BitLit x = False
2063 -- -----------------------------------------------------------------------------
2064 -- The 'CondCode' type: Condition codes passed up the tree.
2066 data CondCode = CondCode Bool Cond InstrBlock
2068 -- Set up a condition code for a conditional branch.
2070 getCondCode :: CmmExpr -> NatM CondCode
2072 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2074 #if alpha_TARGET_ARCH
2075 getCondCode = panic "MachCode.getCondCode: not on Alphas"
2076 #endif /* alpha_TARGET_ARCH */
2078 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2080 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
2081 -- yes, they really do seem to want exactly the same!
2083 getCondCode (CmmMachOp mop [x, y])
2084 = ASSERT (cmmExprRep x /= I8) -- tmp, not set up to handle 8-bit comparisons
2086 MO_Eq F32 -> condFltCode EQQ x y
2087 MO_Ne F32 -> condFltCode NE x y
2089 MO_S_Gt F32 -> condFltCode GTT x y
2090 MO_S_Ge F32 -> condFltCode GE x y
2091 MO_S_Lt F32 -> condFltCode LTT x y
2092 MO_S_Le F32 -> condFltCode LE x y
2094 MO_Eq F64 -> condFltCode EQQ x y
2095 MO_Ne F64 -> condFltCode NE x y
2097 MO_S_Gt F64 -> condFltCode GTT x y
2098 MO_S_Ge F64 -> condFltCode GE x y
2099 MO_S_Lt F64 -> condFltCode LTT x y
2100 MO_S_Le F64 -> condFltCode LE x y
2102 MO_Eq rep -> condIntCode EQQ x y
2103 MO_Ne rep -> condIntCode NE x y
2105 MO_S_Gt rep -> condIntCode GTT x y
2106 MO_S_Ge rep -> condIntCode GE x y
2107 MO_S_Lt rep -> condIntCode LTT x y
2108 MO_S_Le rep -> condIntCode LE x y
2110 MO_U_Gt rep -> condIntCode GU x y
2111 MO_U_Ge rep -> condIntCode GEU x y
2112 MO_U_Lt rep -> condIntCode LU x y
2113 MO_U_Le rep -> condIntCode LEU x y
2115 other -> pprPanic "getCondCode(x86,sparc)" (pprMachOp mop)
2117 getCondCode other = pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
2119 #elif powerpc_TARGET_ARCH
2121 -- almost the same as everywhere else - but we need to
2122 -- extend small integers to 32 bit first
2124 getCondCode (CmmMachOp mop [x, y])
2126 MO_Eq F32 -> condFltCode EQQ x y
2127 MO_Ne F32 -> condFltCode NE x y
2129 MO_S_Gt F32 -> condFltCode GTT x y
2130 MO_S_Ge F32 -> condFltCode GE x y
2131 MO_S_Lt F32 -> condFltCode LTT x y
2132 MO_S_Le F32 -> condFltCode LE x y
2134 MO_Eq F64 -> condFltCode EQQ x y
2135 MO_Ne F64 -> condFltCode NE x y
2137 MO_S_Gt F64 -> condFltCode GTT x y
2138 MO_S_Ge F64 -> condFltCode GE x y
2139 MO_S_Lt F64 -> condFltCode LTT x y
2140 MO_S_Le F64 -> condFltCode LE x y
2142 MO_Eq rep -> condIntCode EQQ (extendUExpr rep x) (extendUExpr rep y)
2143 MO_Ne rep -> condIntCode NE (extendUExpr rep x) (extendUExpr rep y)
2145 MO_S_Gt rep -> condIntCode GTT (extendSExpr rep x) (extendSExpr rep y)
2146 MO_S_Ge rep -> condIntCode GE (extendSExpr rep x) (extendSExpr rep y)
2147 MO_S_Lt rep -> condIntCode LTT (extendSExpr rep x) (extendSExpr rep y)
2148 MO_S_Le rep -> condIntCode LE (extendSExpr rep x) (extendSExpr rep y)
2150 MO_U_Gt rep -> condIntCode GU (extendUExpr rep x) (extendUExpr rep y)
2151 MO_U_Ge rep -> condIntCode GEU (extendUExpr rep x) (extendUExpr rep y)
2152 MO_U_Lt rep -> condIntCode LU (extendUExpr rep x) (extendUExpr rep y)
2153 MO_U_Le rep -> condIntCode LEU (extendUExpr rep x) (extendUExpr rep y)
2155 other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
2157 getCondCode other = panic "getCondCode(2)(powerpc)"
2163 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
2164 -- passed back up the tree.
2166 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
2168 #if alpha_TARGET_ARCH
2169 condIntCode = panic "MachCode.condIntCode: not on Alphas"
2170 condFltCode = panic "MachCode.condFltCode: not on Alphas"
2171 #endif /* alpha_TARGET_ARCH */
2173 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2174 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2176 -- memory vs immediate
2177 condIntCode cond (CmmLoad x pk) (CmmLit lit) | not (is64BitLit lit) = do
2178 Amode x_addr x_code <- getAmode x
2181 code = x_code `snocOL`
2182 CMP pk (OpImm imm) (OpAddr x_addr)
2184 return (CondCode False cond code)
2187 condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
2188 (x_reg, x_code) <- getSomeReg x
2190 code = x_code `snocOL`
2191 TEST pk (OpReg x_reg) (OpReg x_reg)
2193 return (CondCode False cond code)
2195 -- anything vs operand
2196 condIntCode cond x y | isOperand y = do
2197 (x_reg, x_code) <- getNonClobberedReg x
2198 (y_op, y_code) <- getOperand y
2200 code = x_code `appOL` y_code `snocOL`
2201 CMP (cmmExprRep x) y_op (OpReg x_reg)
2203 return (CondCode False cond code)
2205 -- anything vs anything
2206 condIntCode cond x y = do
2207 (y_reg, y_code) <- getNonClobberedReg y
2208 (x_op, x_code) <- getRegOrMem x
2210 code = y_code `appOL`
2212 CMP (cmmExprRep x) (OpReg y_reg) x_op
2214 return (CondCode False cond code)
2217 #if i386_TARGET_ARCH
2218 condFltCode cond x y
2219 = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
2220 (x_reg, x_code) <- getNonClobberedReg x
2221 (y_reg, y_code) <- getSomeReg y
2223 code = x_code `appOL` y_code `snocOL`
2224 GCMP cond x_reg y_reg
2225 -- The GCMP insn does the test and sets the zero flag if comparable
2226 -- and true. Hence we always supply EQQ as the condition to test.
2227 return (CondCode True EQQ code)
2228 #endif /* i386_TARGET_ARCH */
2230 #if x86_64_TARGET_ARCH
2231 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
2232 -- an operand, but the right must be a reg. We can probably do better
2233 -- than this general case...
2234 condFltCode cond x y = do
2235 (x_reg, x_code) <- getNonClobberedReg x
2236 (y_op, y_code) <- getOperand y
2238 code = x_code `appOL`
2240 CMP (cmmExprRep x) y_op (OpReg x_reg)
2242 return (CondCode False (condToUnsigned cond) code)
2243 -- we need to use the unsigned comparison operators on the
2244 -- result of this comparison.
2247 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2249 #if sparc_TARGET_ARCH
2251 condIntCode cond x (StInt y)
2253 = getRegister x `thenNat` \ register ->
2254 getNewRegNat IntRep `thenNat` \ tmp ->
2256 code = registerCode register tmp
2257 src1 = registerName register tmp
2258 src2 = ImmInt (fromInteger y)
2259 code__2 = code `snocOL` SUB False True src1 (RIImm src2) g0
2261 return (CondCode False cond code__2)
2263 condIntCode cond x y
2264 = getRegister x `thenNat` \ register1 ->
2265 getRegister y `thenNat` \ register2 ->
2266 getNewRegNat IntRep `thenNat` \ tmp1 ->
2267 getNewRegNat IntRep `thenNat` \ tmp2 ->
2269 code1 = registerCode register1 tmp1
2270 src1 = registerName register1 tmp1
2271 code2 = registerCode register2 tmp2
2272 src2 = registerName register2 tmp2
2273 code__2 = code1 `appOL` code2 `snocOL`
2274 SUB False True src1 (RIReg src2) g0
2276 return (CondCode False cond code__2)
2279 condFltCode cond x y
2280 = getRegister x `thenNat` \ register1 ->
2281 getRegister y `thenNat` \ register2 ->
2282 getNewRegNat (registerRep register1)
2284 getNewRegNat (registerRep register2)
2286 getNewRegNat F64 `thenNat` \ tmp ->
2288 promote x = FxTOy F DF x tmp
2290 pk1 = registerRep register1
2291 code1 = registerCode register1 tmp1
2292 src1 = registerName register1 tmp1
2294 pk2 = registerRep register2
2295 code2 = registerCode register2 tmp2
2296 src2 = registerName register2 tmp2
2300 code1 `appOL` code2 `snocOL`
2301 FCMP True (primRepToSize pk1) src1 src2
2302 else if pk1 == F32 then
2303 code1 `snocOL` promote src1 `appOL` code2 `snocOL`
2304 FCMP True DF tmp src2
2306 code1 `appOL` code2 `snocOL` promote src2 `snocOL`
2307 FCMP True DF src1 tmp
2309 return (CondCode True cond code__2)
2311 #endif /* sparc_TARGET_ARCH */
2313 #if powerpc_TARGET_ARCH
2314 -- ###FIXME: I16 and I8!
2315 condIntCode cond x (CmmLit (CmmInt y rep))
2316 | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
2318 (src1, code) <- getSomeReg x
2320 code' = code `snocOL`
2321 (if condUnsigned cond then CMPL else CMP) I32 src1 (RIImm src2)
2322 return (CondCode False cond code')
2324 condIntCode cond x y = do
2325 (src1, code1) <- getSomeReg x
2326 (src2, code2) <- getSomeReg y
2328 code' = code1 `appOL` code2 `snocOL`
2329 (if condUnsigned cond then CMPL else CMP) I32 src1 (RIReg src2)
2330 return (CondCode False cond code')
2332 condFltCode cond x y = do
2333 (src1, code1) <- getSomeReg x
2334 (src2, code2) <- getSomeReg y
2336 code' = code1 `appOL` code2 `snocOL` FCMP src1 src2
2337 code'' = case cond of -- twiddle CR to handle unordered case
2338 GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
2339 LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
2342 ltbit = 0 ; eqbit = 2 ; gtbit = 1
2343 return (CondCode True cond code'')
2345 #endif /* powerpc_TARGET_ARCH */
2347 -- -----------------------------------------------------------------------------
2348 -- Generating assignments
2350 -- Assignments are really at the heart of the whole code generation
2351 -- business. Almost all top-level nodes of any real importance are
2352 -- assignments, which correspond to loads, stores, or register
2353 -- transfers. If we're really lucky, some of the register transfers
2354 -- will go away, because we can use the destination register to
2355 -- complete the code generation for the right hand side. This only
2356 -- fails when the right hand side is forced into a fixed register
2357 -- (e.g. the result of a call).
2359 assignMem_IntCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2360 assignReg_IntCode :: MachRep -> CmmReg -> CmmExpr -> NatM InstrBlock
2362 assignMem_FltCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2363 assignReg_FltCode :: MachRep -> CmmReg -> CmmExpr -> NatM InstrBlock
2365 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2367 #if alpha_TARGET_ARCH
2369 assignIntCode pk (CmmLoad dst _) src
2370 = getNewRegNat IntRep `thenNat` \ tmp ->
2371 getAmode dst `thenNat` \ amode ->
2372 getRegister src `thenNat` \ register ->
2374 code1 = amodeCode amode []
2375 dst__2 = amodeAddr amode
2376 code2 = registerCode register tmp []
2377 src__2 = registerName register tmp
2378 sz = primRepToSize pk
2379 code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2383 assignIntCode pk dst src
2384 = getRegister dst `thenNat` \ register1 ->
2385 getRegister src `thenNat` \ register2 ->
2387 dst__2 = registerName register1 zeroh
2388 code = registerCode register2 dst__2
2389 src__2 = registerName register2 dst__2
2390 code__2 = if isFixed register2
2391 then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2)
2396 #endif /* alpha_TARGET_ARCH */
2398 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2400 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2402 -- integer assignment to memory
2403 assignMem_IntCode pk addr src = do
2404 Amode addr code_addr <- getAmode addr
2405 (code_src, op_src) <- get_op_RI src
2407 code = code_src `appOL`
2409 MOV pk op_src (OpAddr addr)
2410 -- NOTE: op_src is stable, so it will still be valid
2411 -- after code_addr. This may involve the introduction
2412 -- of an extra MOV to a temporary register, but we hope
2413 -- the register allocator will get rid of it.
2417 get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand) -- code, operator
2418 get_op_RI (CmmLit lit) | not (is64BitLit lit)
2419 = return (nilOL, OpImm (litToImm lit))
2421 = do (reg,code) <- getNonClobberedReg op
2422 return (code, OpReg reg)
2425 -- Assign; dst is a reg, rhs is mem
2426 assignReg_IntCode pk reg (CmmLoad src _) = do
2427 load_code <- intLoadCode (MOV pk) src
2428 return (load_code (getRegisterReg reg))
2430 -- dst is a reg, but src could be anything
2431 assignReg_IntCode pk reg src = do
2432 code <- getAnyReg src
2433 return (code (getRegisterReg reg))
2435 #endif /* i386_TARGET_ARCH */
2437 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2439 #if sparc_TARGET_ARCH
2441 assignMem_IntCode pk addr src
2442 = getNewRegNat IntRep `thenNat` \ tmp ->
2443 getAmode addr `thenNat` \ amode ->
2444 getRegister src `thenNat` \ register ->
2446 code1 = amodeCode amode
2447 dst__2 = amodeAddr amode
2448 code2 = registerCode register tmp
2449 src__2 = registerName register tmp
2450 sz = primRepToSize pk
2451 code__2 = code1 `appOL` code2 `snocOL` ST sz src__2 dst__2
2455 assignReg_IntCode pk reg src
2456 = getRegister src `thenNat` \ register2 ->
2457 getRegisterReg reg `thenNat` \ register1 ->
2458 getNewRegNat IntRep `thenNat` \ tmp ->
2460 dst__2 = registerName register1 tmp
2461 code = registerCode register2 dst__2
2462 src__2 = registerName register2 dst__2
2463 code__2 = if isFixed register2
2464 then code `snocOL` OR False g0 (RIReg src__2) dst__2
2469 #endif /* sparc_TARGET_ARCH */
2471 #if powerpc_TARGET_ARCH
2473 assignMem_IntCode pk addr src = do
2474 (srcReg, code) <- getSomeReg src
2475 Amode dstAddr addr_code <- getAmode addr
2476 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2478 -- dst is a reg, but src could be anything
2479 assignReg_IntCode pk reg src
2481 r <- getRegister src
2483 Any _ code -> code dst
2484 Fixed _ freg fcode -> fcode `snocOL` MR dst freg
2486 dst = getRegisterReg reg
2488 #endif /* powerpc_TARGET_ARCH */
2491 -- -----------------------------------------------------------------------------
2492 -- Floating-point assignments
2494 #if alpha_TARGET_ARCH
2496 assignFltCode pk (CmmLoad dst _) src
2497 = getNewRegNat pk `thenNat` \ tmp ->
2498 getAmode dst `thenNat` \ amode ->
2499 getRegister src `thenNat` \ register ->
2501 code1 = amodeCode amode []
2502 dst__2 = amodeAddr amode
2503 code2 = registerCode register tmp []
2504 src__2 = registerName register tmp
2505 sz = primRepToSize pk
2506 code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2510 assignFltCode pk dst src
2511 = getRegister dst `thenNat` \ register1 ->
2512 getRegister src `thenNat` \ register2 ->
2514 dst__2 = registerName register1 zeroh
2515 code = registerCode register2 dst__2
2516 src__2 = registerName register2 dst__2
2517 code__2 = if isFixed register2
2518 then code . mkSeqInstr (FMOV src__2 dst__2)
2523 #endif /* alpha_TARGET_ARCH */
2525 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2527 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2529 -- Floating point assignment to memory
2530 assignMem_FltCode pk addr src = do
2531 (src_reg, src_code) <- getNonClobberedReg src
2532 Amode addr addr_code <- getAmode addr
2534 code = src_code `appOL`
2536 IF_ARCH_i386(GST pk src_reg addr,
2537 MOV pk (OpReg src_reg) (OpAddr addr))
2540 -- Floating point assignment to a register/temporary
2541 assignReg_FltCode pk reg src = do
2542 src_code <- getAnyReg src
2543 return (src_code (getRegisterReg reg))
2545 #endif /* i386_TARGET_ARCH */
2547 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2549 #if sparc_TARGET_ARCH
2551 -- Floating point assignment to memory
2552 assignMem_FltCode pk addr src
2553 = getNewRegNat pk `thenNat` \ tmp1 ->
2554 getAmode addr `thenNat` \ amode ->
2555 getRegister src `thenNat` \ register ->
2557 sz = primRepToSize pk
2558 dst__2 = amodeAddr amode
2560 code1 = amodeCode amode
2561 code2 = registerCode register tmp1
2563 src__2 = registerName register tmp1
2564 pk__2 = registerRep register
2565 sz__2 = primRepToSize pk__2
2567 code__2 = code1 `appOL` code2 `appOL`
2569 then unitOL (ST sz src__2 dst__2)
2570 else toOL [FxTOy sz__2 sz src__2 tmp1, ST sz tmp1 dst__2]
2574 -- Floating point assignment to a register/temporary
2575 -- Why is this so bizarrely ugly?
2576 assignReg_FltCode pk reg src
2577 = getRegisterReg reg `thenNat` \ register1 ->
2578 getRegister src `thenNat` \ register2 ->
2580 pk__2 = registerRep register2
2581 sz__2 = primRepToSize pk__2
2583 getNewRegNat pk__2 `thenNat` \ tmp ->
2585 sz = primRepToSize pk
2586 dst__2 = registerName register1 g0 -- must be Fixed
2587 reg__2 = if pk /= pk__2 then tmp else dst__2
2588 code = registerCode register2 reg__2
2589 src__2 = registerName register2 reg__2
2592 code `snocOL` FxTOy sz__2 sz src__2 dst__2
2593 else if isFixed register2 then
2594 code `snocOL` FMOV sz src__2 dst__2
2600 #endif /* sparc_TARGET_ARCH */
2602 #if powerpc_TARGET_ARCH
2605 assignMem_FltCode = assignMem_IntCode
2606 assignReg_FltCode = assignReg_IntCode
2608 #endif /* powerpc_TARGET_ARCH */
2611 -- -----------------------------------------------------------------------------
2612 -- Generating an non-local jump
2614 -- (If applicable) Do not fill the delay slots here; you will confuse the
2615 -- register allocator.
2617 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
2619 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2621 #if alpha_TARGET_ARCH
2623 genJump (CmmLabel lbl)
2624 | isAsmTemp lbl = returnInstr (BR target)
2625 | otherwise = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0]
2627 target = ImmCLbl lbl
2630 = getRegister tree `thenNat` \ register ->
2631 getNewRegNat PtrRep `thenNat` \ tmp ->
2633 dst = registerName register pv
2634 code = registerCode register pv
2635 target = registerName register pv
2637 if isFixed register then
2638 returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0]
2640 return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0))
2642 #endif /* alpha_TARGET_ARCH */
2644 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2646 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2648 genJump (CmmLoad mem pk) = do
2649 Amode target code <- getAmode mem
2650 return (code `snocOL` JMP (OpAddr target))
2652 genJump (CmmLit lit) = do
2653 return (unitOL (JMP (OpImm (litToImm lit))))
2656 (reg,code) <- getSomeReg expr
2657 return (code `snocOL` JMP (OpReg reg))
2659 #endif /* i386_TARGET_ARCH */
2661 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2663 #if sparc_TARGET_ARCH
2665 genJump (CmmLabel lbl)
2666 = return (toOL [CALL (Left target) 0 True, NOP])
2668 target = ImmCLbl lbl
2671 = getRegister tree `thenNat` \ register ->
2672 getNewRegNat PtrRep `thenNat` \ tmp ->
2674 code = registerCode register tmp
2675 target = registerName register tmp
2677 return (code `snocOL` JMP dsts (AddrRegReg target g0) `snocOL` NOP)
2679 #endif /* sparc_TARGET_ARCH */
2681 #if powerpc_TARGET_ARCH
2682 genJump (CmmLit (CmmLabel lbl))
2683 = return (unitOL $ JMP lbl)
2687 (target,code) <- getSomeReg tree
2688 return (code `snocOL` MTCTR target `snocOL` BCTR [])
2689 #endif /* powerpc_TARGET_ARCH */
2692 -- -----------------------------------------------------------------------------
2693 -- Unconditional branches
2695 genBranch :: BlockId -> NatM InstrBlock
2697 #if alpha_TARGET_ARCH
2698 genBranch id = return (unitOL (BR id))
2701 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2702 genBranch id = return (unitOL (JXX ALWAYS id))
2705 #if sparc_TARGET_ARCH
2706 genBranch id = return (toOL [BI ALWAYS False id, NOP])
2709 #if powerpc_TARGET_ARCH
2710 genBranch id = return (unitOL (BCC ALWAYS id))
2714 -- -----------------------------------------------------------------------------
2715 -- Conditional jumps
2718 Conditional jumps are always to local labels, so we can use branch
2719 instructions. We peek at the arguments to decide what kind of
2722 ALPHA: For comparisons with 0, we're laughing, because we can just do
2723 the desired conditional branch.
2725 I386: First, we have to ensure that the condition
2726 codes are set according to the supplied comparison operation.
2728 SPARC: First, we have to ensure that the condition codes are set
2729 according to the supplied comparison operation. We generate slightly
2730 different code for floating point comparisons, because a floating
2731 point operation cannot directly precede a @BF@. We assume the worst
2732 and fill that slot with a @NOP@.
2734 SPARC: Do not fill the delay slots here; you will confuse the register
2740 :: BlockId -- the branch target
2741 -> CmmExpr -- the condition on which to branch
2744 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2746 #if alpha_TARGET_ARCH
2748 genCondJump id (StPrim op [x, StInt 0])
2749 = getRegister x `thenNat` \ register ->
2750 getNewRegNat (registerRep register)
2753 code = registerCode register tmp
2754 value = registerName register tmp
2755 pk = registerRep register
2756 target = ImmCLbl lbl
2758 returnSeq code [BI (cmpOp op) value target]
2760 cmpOp CharGtOp = GTT
2762 cmpOp CharEqOp = EQQ
2764 cmpOp CharLtOp = LTT
2773 cmpOp WordGeOp = ALWAYS
2774 cmpOp WordEqOp = EQQ
2776 cmpOp WordLtOp = NEVER
2777 cmpOp WordLeOp = EQQ
2779 cmpOp AddrGeOp = ALWAYS
2780 cmpOp AddrEqOp = EQQ
2782 cmpOp AddrLtOp = NEVER
2783 cmpOp AddrLeOp = EQQ
2785 genCondJump lbl (StPrim op [x, StDouble 0.0])
2786 = getRegister x `thenNat` \ register ->
2787 getNewRegNat (registerRep register)
2790 code = registerCode register tmp
2791 value = registerName register tmp
2792 pk = registerRep register
2793 target = ImmCLbl lbl
2795 return (code . mkSeqInstr (BF (cmpOp op) value target))
2797 cmpOp FloatGtOp = GTT
2798 cmpOp FloatGeOp = GE
2799 cmpOp FloatEqOp = EQQ
2800 cmpOp FloatNeOp = NE
2801 cmpOp FloatLtOp = LTT
2802 cmpOp FloatLeOp = LE
2803 cmpOp DoubleGtOp = GTT
2804 cmpOp DoubleGeOp = GE
2805 cmpOp DoubleEqOp = EQQ
2806 cmpOp DoubleNeOp = NE
2807 cmpOp DoubleLtOp = LTT
2808 cmpOp DoubleLeOp = LE
2810 genCondJump lbl (StPrim op [x, y])
2812 = trivialFCode pr instr x y `thenNat` \ register ->
2813 getNewRegNat F64 `thenNat` \ tmp ->
2815 code = registerCode register tmp
2816 result = registerName register tmp
2817 target = ImmCLbl lbl
2819 return (code . mkSeqInstr (BF cond result target))
2821 pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
2823 fltCmpOp op = case op of
2837 (instr, cond) = case op of
2838 FloatGtOp -> (FCMP TF LE, EQQ)
2839 FloatGeOp -> (FCMP TF LTT, EQQ)
2840 FloatEqOp -> (FCMP TF EQQ, NE)
2841 FloatNeOp -> (FCMP TF EQQ, EQQ)
2842 FloatLtOp -> (FCMP TF LTT, NE)
2843 FloatLeOp -> (FCMP TF LE, NE)
2844 DoubleGtOp -> (FCMP TF LE, EQQ)
2845 DoubleGeOp -> (FCMP TF LTT, EQQ)
2846 DoubleEqOp -> (FCMP TF EQQ, NE)
2847 DoubleNeOp -> (FCMP TF EQQ, EQQ)
2848 DoubleLtOp -> (FCMP TF LTT, NE)
2849 DoubleLeOp -> (FCMP TF LE, NE)
2851 genCondJump lbl (StPrim op [x, y])
2852 = trivialCode instr x y `thenNat` \ register ->
2853 getNewRegNat IntRep `thenNat` \ tmp ->
2855 code = registerCode register tmp
2856 result = registerName register tmp
2857 target = ImmCLbl lbl
2859 return (code . mkSeqInstr (BI cond result target))
2861 (instr, cond) = case op of
2862 CharGtOp -> (CMP LE, EQQ)
2863 CharGeOp -> (CMP LTT, EQQ)
2864 CharEqOp -> (CMP EQQ, NE)
2865 CharNeOp -> (CMP EQQ, EQQ)
2866 CharLtOp -> (CMP LTT, NE)
2867 CharLeOp -> (CMP LE, NE)
2868 IntGtOp -> (CMP LE, EQQ)
2869 IntGeOp -> (CMP LTT, EQQ)
2870 IntEqOp -> (CMP EQQ, NE)
2871 IntNeOp -> (CMP EQQ, EQQ)
2872 IntLtOp -> (CMP LTT, NE)
2873 IntLeOp -> (CMP LE, NE)
2874 WordGtOp -> (CMP ULE, EQQ)
2875 WordGeOp -> (CMP ULT, EQQ)
2876 WordEqOp -> (CMP EQQ, NE)
2877 WordNeOp -> (CMP EQQ, EQQ)
2878 WordLtOp -> (CMP ULT, NE)
2879 WordLeOp -> (CMP ULE, NE)
2880 AddrGtOp -> (CMP ULE, EQQ)
2881 AddrGeOp -> (CMP ULT, EQQ)
2882 AddrEqOp -> (CMP EQQ, NE)
2883 AddrNeOp -> (CMP EQQ, EQQ)
2884 AddrLtOp -> (CMP ULT, NE)
2885 AddrLeOp -> (CMP ULE, NE)
2887 #endif /* alpha_TARGET_ARCH */
2889 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2891 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2893 genCondJump id bool = do
2894 CondCode _ cond code <- getCondCode bool
2895 return (code `snocOL` JXX cond id)
2897 #endif /* i386_TARGET_ARCH */
2900 #if sparc_TARGET_ARCH
2902 genCondJump id bool = do
2903 CondCode is_float cond code <- getCondCode bool
2908 then [NOP, BF cond False id, NOP]
2909 else [BI cond False id, NOP]
2913 #endif /* sparc_TARGET_ARCH */
2916 #if powerpc_TARGET_ARCH
2918 genCondJump id bool = do
2919 CondCode is_float cond code <- getCondCode bool
2920 return (code `snocOL` BCC cond id)
2922 #endif /* powerpc_TARGET_ARCH */
2925 -- -----------------------------------------------------------------------------
2926 -- Generating C calls
2928 -- Now the biggest nightmare---calls. Most of the nastiness is buried in
2929 -- @get_arg@, which moves the arguments to the correct registers/stack
2930 -- locations. Apart from that, the code is easy.
2932 -- (If applicable) Do not fill the delay slots here; you will confuse the
2933 -- register allocator.
2936 :: CmmCallTarget -- function to call
2937 -> [(CmmReg,MachHint)] -- where to put the result
2938 -> [(CmmExpr,MachHint)] -- arguments (of mixed type)
2939 -> Maybe [GlobalReg] -- volatile regs to save
2942 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2944 #if alpha_TARGET_ARCH
2948 genCCall fn cconv result_regs args
2949 = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args
2950 `thenNat` \ ((unused,_), argCode) ->
2952 nRegs = length allArgRegs - length unused
2953 code = asmSeqThen (map ($ []) argCode)
2956 LDA pv (AddrImm (ImmLab (ptext fn))),
2957 JSR ra (AddrReg pv) nRegs,
2958 LDGP gp (AddrReg ra)]
2960 ------------------------
2961 {- Try to get a value into a specific register (or registers) for
2962 a call. The first 6 arguments go into the appropriate
2963 argument register (separate registers for integer and floating
2964 point arguments, but used in lock-step), and the remaining
2965 arguments are dumped to the stack, beginning at 0(sp). Our
2966 first argument is a pair of the list of remaining argument
2967 registers to be assigned for this call and the next stack
2968 offset to use for overflowing arguments. This way,
2969 @get_Arg@ can be applied to all of a call's arguments using
2973 :: ([(Reg,Reg)], Int) -- Argument registers and stack offset (accumulator)
2974 -> StixTree -- Current argument
2975 -> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code
2977 -- We have to use up all of our argument registers first...
2979 get_arg ((iDst,fDst):dsts, offset) arg
2980 = getRegister arg `thenNat` \ register ->
2982 reg = if isFloatingRep pk then fDst else iDst
2983 code = registerCode register reg
2984 src = registerName register reg
2985 pk = registerRep register
2988 if isFloatingRep pk then
2989 ((dsts, offset), if isFixed register then
2990 code . mkSeqInstr (FMOV src fDst)
2993 ((dsts, offset), if isFixed register then
2994 code . mkSeqInstr (OR src (RIReg src) iDst)
2997 -- Once we have run out of argument registers, we move to the
3000 get_arg ([], offset) arg
3001 = getRegister arg `thenNat` \ register ->
3002 getNewRegNat (registerRep register)
3005 code = registerCode register tmp
3006 src = registerName register tmp
3007 pk = registerRep register
3008 sz = primRepToSize pk
3010 return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset)))
3012 #endif /* alpha_TARGET_ARCH */
3014 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3016 #if i386_TARGET_ARCH
3018 -- we only cope with a single result for foreign calls
3019 genCCall (CmmPrim op) [(r,_)] args vols = do
3021 MO_F32_Sqrt -> actuallyInlineFloatOp F32 (GSQRT F32) args
3022 MO_F64_Sqrt -> actuallyInlineFloatOp F64 (GSQRT F64) args
3024 MO_F32_Sin -> actuallyInlineFloatOp F32 (GSIN F32) args
3025 MO_F64_Sin -> actuallyInlineFloatOp F64 (GSIN F64) args
3027 MO_F32_Cos -> actuallyInlineFloatOp F32 (GCOS F32) args
3028 MO_F64_Cos -> actuallyInlineFloatOp F64 (GCOS F64) args
3030 MO_F32_Tan -> actuallyInlineFloatOp F32 (GTAN F32) args
3031 MO_F64_Tan -> actuallyInlineFloatOp F64 (GTAN F64) args
3033 other_op -> outOfLineFloatOp op r args vols
3035 actuallyInlineFloatOp rep instr [(x,_)]
3036 = do res <- trivialUFCode rep instr x
3038 return (any (getRegisterReg r))
3040 genCCall target dest_regs args vols = do
3041 sizes_n_codes <- mapM push_arg (reverse args)
3042 delta <- getDeltaNat
3044 (sizes, push_codes) = unzip sizes_n_codes
3045 tot_arg_size = sum sizes
3047 -- deal with static vs dynamic call targets
3048 (callinsns,cconv) <-
3051 CmmForeignCall (CmmLit (CmmLabel lbl)) conv
3052 -> -- ToDo: stdcall arg sizes
3053 return (unitOL (CALL (Left fn_imm)), conv)
3054 where fn_imm = ImmCLbl lbl
3055 CmmForeignCall expr conv
3056 -> do (dyn_c, dyn_r, dyn_rep) <- get_op expr
3057 ASSERT(dyn_rep == I32)
3058 return (dyn_c `snocOL` CALL (Right dyn_r), conv)
3060 let push_code = concatOL push_codes
3061 call = callinsns `appOL`
3063 -- Deallocate parameters after call for ccall;
3064 -- but not for stdcall (callee does it)
3065 (if cconv == StdCallConv || tot_arg_size==0 then [] else
3066 [ADD I32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
3068 [DELTA (delta + tot_arg_size)]
3071 setDeltaNat (delta + tot_arg_size)
3074 -- assign the results, if necessary
3075 assign_code [] = nilOL
3076 assign_code [(dest,_hint)] =
3078 I64 -> toOL [MOV I32 (OpReg eax) (OpReg r_dest),
3079 MOV I32 (OpReg edx) (OpReg r_dest_hi)]
3080 F32 -> unitOL (GMOV fake0 r_dest)
3081 F64 -> unitOL (GMOV fake0 r_dest)
3082 rep -> unitOL (MOV rep (OpReg eax) (OpReg r_dest))
3084 r_dest_hi = getHiVRegFromLo r_dest
3085 rep = cmmRegRep dest
3086 r_dest = getRegisterReg dest
3087 assign_code many = panic "genCCall.assign_code many"
3089 return (push_code `appOL`
3091 assign_code dest_regs)
3098 push_arg :: (CmmExpr,MachHint){-current argument-}
3099 -> NatM (Int, InstrBlock) -- argsz, code
3101 push_arg (arg,_hint) -- we don't need the hints on x86
3102 | arg_rep == I64 = do
3103 ChildCode64 code r_lo <- iselExpr64 arg
3104 delta <- getDeltaNat
3105 setDeltaNat (delta - 8)
3107 r_hi = getHiVRegFromLo r_lo
3109 return (8, code `appOL`
3110 toOL [PUSH I32 (OpReg r_hi), DELTA (delta - 4),
3111 PUSH I32 (OpReg r_lo), DELTA (delta - 8),
3116 (code, reg, sz) <- get_op arg
3117 delta <- getDeltaNat
3118 let size = arg_size sz
3119 setDeltaNat (delta-size)
3120 if (case sz of F64 -> True; F32 -> True; _ -> False)
3123 toOL [SUB I32 (OpImm (ImmInt size)) (OpReg esp),
3125 GST sz reg (AddrBaseIndex (Just esp)
3131 PUSH I32 (OpReg reg) `snocOL`
3135 arg_rep = cmmExprRep arg
3138 get_op :: CmmExpr -> NatM (InstrBlock, Reg, MachRep) -- code, reg, size
3140 (reg,code) <- getSomeReg op
3141 return (code, reg, cmmExprRep op)
3143 #endif /* i386_TARGET_ARCH */
3145 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3147 outOfLineFloatOp :: CallishMachOp -> CmmReg -> [(CmmExpr,MachHint)]
3148 -> Maybe [GlobalReg] -> NatM InstrBlock
3149 outOfLineFloatOp mop res args vols
3150 | cmmRegRep res == F64
3151 = stmtToInstrs (CmmCall target [(res,FloatHint)] args vols)
3154 = do uq <- getUniqueNat
3156 tmp = CmmLocal (LocalReg uq F64)
3158 code1 <- stmtToInstrs (CmmCall target [(tmp,FloatHint)] (map promote args) vols)
3159 code2 <- stmtToInstrs (CmmAssign res (demote (CmmReg tmp)))
3160 return (code1 `appOL` code2)
3162 promote (x,hint) = (CmmMachOp (MO_S_Conv F32 F64) [x], hint)
3163 demote x = CmmMachOp (MO_S_Conv F64 F32) [x]
3165 target = CmmForeignCall (CmmLit lbl) CCallConv
3166 lbl = CmmLabel (mkForeignLabel fn Nothing False)
3169 MO_F32_Sqrt -> FSLIT("sqrt")
3170 MO_F32_Sin -> FSLIT("sin")
3171 MO_F32_Cos -> FSLIT("cos")
3172 MO_F32_Tan -> FSLIT("tan")
3173 MO_F32_Exp -> FSLIT("exp")
3174 MO_F32_Log -> FSLIT("log")
3176 MO_F32_Asin -> FSLIT("asin")
3177 MO_F32_Acos -> FSLIT("acos")
3178 MO_F32_Atan -> FSLIT("atan")
3180 MO_F32_Sinh -> FSLIT("sinh")
3181 MO_F32_Cosh -> FSLIT("cosh")
3182 MO_F32_Tanh -> FSLIT("tanh")
3183 MO_F32_Pwr -> FSLIT("pow")
3185 MO_F64_Sqrt -> FSLIT("sqrt")
3186 MO_F64_Sin -> FSLIT("sin")
3187 MO_F64_Cos -> FSLIT("cos")
3188 MO_F64_Tan -> FSLIT("tan")
3189 MO_F64_Exp -> FSLIT("exp")
3190 MO_F64_Log -> FSLIT("log")
3192 MO_F64_Asin -> FSLIT("asin")
3193 MO_F64_Acos -> FSLIT("acos")
3194 MO_F64_Atan -> FSLIT("atan")
3196 MO_F64_Sinh -> FSLIT("sinh")
3197 MO_F64_Cosh -> FSLIT("cosh")
3198 MO_F64_Tanh -> FSLIT("tanh")
3199 MO_F64_Pwr -> FSLIT("pow")
3201 other -> pprPanic "outOfLineFloatOp" (pprCallishMachOp mop)
3203 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
3205 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3207 #if x86_64_TARGET_ARCH
3209 genCCall (CmmPrim op) [(r,_)] args vols =
3210 outOfLineFloatOp op r args vols
3212 genCCall target dest_regs args vols = do
3214 -- load up the register arguments
3215 (stack_args, sse_regs, load_args_code)
3216 <- load_args args allArgRegs allFPArgRegs 0 nilOL
3219 tot_arg_size = arg_size * length stack_args
3221 -- On entry to the called function, %rsp should be aligned
3222 -- on a 16-byte boundary +8 (i.e. the first stack arg after
3223 -- the return address is 16-byte aligned). In STG land
3224 -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
3225 -- need to make sure we push a multiple of 16-bytes of args,
3226 -- plus the return address, to get the correct alignment.
3227 -- Urg, this is hard. We need to feed the delta back into
3228 -- the arg pushing code.
3229 (real_size, adjust_rsp) <-
3230 if tot_arg_size `rem` 16 == 0
3231 then return (tot_arg_size, nilOL)
3232 else do -- we need to adjust...
3233 delta <- getDeltaNat
3234 setDeltaNat (delta-8)
3235 return (tot_arg_size+8, toOL [
3236 SUB I64 (OpImm (ImmInt 8)) (OpReg rsp),
3240 -- push the stack args, right to left
3241 push_code <- push_args (reverse stack_args) nilOL
3242 delta <- getDeltaNat
3244 -- deal with static vs dynamic call targets
3245 (callinsns,cconv) <-
3248 CmmForeignCall (CmmLit (CmmLabel lbl)) conv
3249 -> -- ToDo: stdcall arg sizes
3250 return (unitOL (CALL (Left fn_imm)), conv)
3251 where fn_imm = ImmCLbl lbl
3252 CmmForeignCall expr conv
3253 -> do (dyn_r, dyn_c) <- getSomeReg expr
3254 return (dyn_c `snocOL` CALL (Right dyn_r), conv)
3257 -- The x86_64 ABI requires us to set %al to the number of SSE
3258 -- registers that contain arguments, if the called routine
3259 -- is a varargs function. We don't know whether it's a
3260 -- varargs function or not, so we have to assume it is.
3262 -- It's not safe to omit this assignment, even if the number
3263 -- of SSE regs in use is zero. If %al is larger than 8
3264 -- on entry to a varargs function, seg faults ensue.
3265 assign_eax n = unitOL (MOV I32 (OpImm (ImmInt n)) (OpReg eax))
3267 let call = callinsns `appOL`
3269 -- Deallocate parameters after call for ccall;
3270 -- but not for stdcall (callee does it)
3271 (if cconv == StdCallConv || real_size==0 then [] else
3272 [ADD wordRep (OpImm (ImmInt real_size)) (OpReg esp)])
3274 [DELTA (delta + real_size)]
3277 setDeltaNat (delta + real_size)
3280 -- assign the results, if necessary
3281 assign_code [] = nilOL
3282 assign_code [(dest,_hint)] =
3284 F32 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3285 F64 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3286 rep -> unitOL (MOV rep (OpReg rax) (OpReg r_dest))
3288 rep = cmmRegRep dest
3289 r_dest = getRegisterReg dest
3290 assign_code many = panic "genCCall.assign_code many"
3292 return (load_args_code `appOL`
3295 assign_eax sse_regs `appOL`
3297 assign_code dest_regs)
3300 arg_size = 8 -- always, at the mo
3302 load_args :: [(CmmExpr,MachHint)]
3303 -> [Reg] -- int regs avail for args
3304 -> [Reg] -- FP regs avail for args
3305 -> Int -> InstrBlock
3306 -> NatM ([(CmmExpr,MachHint)],Int,InstrBlock)
3307 load_args args [] [] sse_regs code = return (args, sse_regs, code)
3308 -- no more regs to use
3309 load_args [] aregs fregs sse_regs code = return ([],sse_regs,code)
3310 -- no more args to push
3311 load_args ((arg,hint) : rest) aregs fregs sse_regs code
3312 | isFloatingRep arg_rep =
3316 arg_code <- getAnyReg arg
3317 load_args rest aregs rs (sse_regs+1) (code `appOL` arg_code r)
3322 arg_code <- getAnyReg arg
3323 load_args rest rs fregs sse_regs (code `appOL` arg_code r)
3325 arg_rep = cmmExprRep arg
3328 (args',sse',code') <- load_args rest aregs fregs sse_regs code
3329 return ((arg,hint):args', sse', code')
3331 push_args [] code = return code
3332 push_args ((arg,hint):rest) code
3333 | isFloatingRep arg_rep = do
3334 (arg_reg, arg_code) <- getSomeReg arg
3335 delta <- getDeltaNat
3336 setDeltaNat (delta-arg_size)
3337 let code' = code `appOL` toOL [
3338 MOV arg_rep (OpReg arg_reg) (OpAddr (spRel 0)),
3339 SUB wordRep (OpImm (ImmInt arg_size)) (OpReg rsp) ,
3340 DELTA (delta-arg_size)]
3341 push_args rest code'
3344 -- we only ever generate word-sized function arguments. Promotion
3345 -- has already happened: our Int8# type is kept sign-extended
3346 -- in an Int#, for example.
3347 ASSERT(arg_rep == I64) return ()
3348 (arg_op, arg_code) <- getOperand arg
3349 delta <- getDeltaNat
3350 setDeltaNat (delta-arg_size)
3351 let code' = code `appOL` toOL [PUSH I64 arg_op,
3352 DELTA (delta-arg_size)]
3353 push_args rest code'
3355 arg_rep = cmmExprRep arg
3358 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3360 #if sparc_TARGET_ARCH
3362 The SPARC calling convention is an absolute
3363 nightmare. The first 6x32 bits of arguments are mapped into
3364 %o0 through %o5, and the remaining arguments are dumped to the
3365 stack, beginning at [%sp+92]. (Note that %o6 == %sp.)
3367 If we have to put args on the stack, move %o6==%sp down by
3368 the number of words to go on the stack, to ensure there's enough space.
3370 According to Fraser and Hanson's lcc book, page 478, fig 17.2,
3371 16 words above the stack pointer is a word for the address of
3372 a structure return value. I use this as a temporary location
3373 for moving values from float to int regs. Certainly it isn't
3374 safe to put anything in the 16 words starting at %sp, since
3375 this area can get trashed at any time due to window overflows
3376 caused by signal handlers.
3378 A final complication (if the above isn't enough) is that
3379 we can't blithely calculate the arguments one by one into
3380 %o0 .. %o5. Consider the following nested calls:
3384 Naive code moves a into %o0, and (fff b c) into %o1. Unfortunately
3385 the inner call will itself use %o0, which trashes the value put there
3386 in preparation for the outer call. Upshot: we need to calculate the
3387 args into temporary regs, and move those to arg regs or onto the
3388 stack only immediately prior to the call proper. Sigh.
3391 genCCall fn cconv kind args
3392 = mapNat arg_to_int_vregs args `thenNat` \ argcode_and_vregs ->
3394 (argcodes, vregss) = unzip argcode_and_vregs
3395 n_argRegs = length allArgRegs
3396 n_argRegs_used = min (length vregs) n_argRegs
3397 vregs = concat vregss
3399 -- deal with static vs dynamic call targets
3402 -> return (unitOL (CALL (Left fn__2) n_argRegs_used False))
3404 -> arg_to_int_vregs dyn `thenNat` \ (dyn_c, [dyn_r]) ->
3405 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3407 `thenNat` \ callinsns ->
3409 argcode = concatOL argcodes
3410 (move_sp_down, move_sp_up)
3411 = let diff = length vregs - n_argRegs
3412 nn = if odd diff then diff + 1 else diff -- keep 8-byte alignment
3415 else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
3417 = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
3419 return (argcode `appOL`
3420 move_sp_down `appOL`
3421 transfer_code `appOL`
3426 -- function names that begin with '.' are assumed to be special
3427 -- internally generated names like '.mul,' which don't get an
3428 -- underscore prefix
3429 -- ToDo:needed (WDP 96/03) ???
3430 fn_static = unLeft fn
3431 fn__2 = case (headFS fn_static) of
3432 '.' -> ImmLit (ftext fn_static)
3433 _ -> ImmCLbl (mkForeignLabel fn_static False)
3435 -- move args from the integer vregs into which they have been
3436 -- marshalled, into %o0 .. %o5, and the rest onto the stack.
3437 move_final :: [Reg] -> [Reg] -> Int -> [Instr]
3439 move_final [] _ offset -- all args done
3442 move_final (v:vs) [] offset -- out of aregs; move to stack
3443 = ST W v (spRel offset)
3444 : move_final vs [] (offset+1)
3446 move_final (v:vs) (a:az) offset -- move into an arg (%o[0..5]) reg
3447 = OR False g0 (RIReg v) a
3448 : move_final vs az offset
3450 -- generate code to calculate an argument, and move it into one
3451 -- or two integer vregs.
3452 arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
3453 arg_to_int_vregs arg
3454 | is64BitRep (repOfCmmExpr arg)
3455 = iselExpr64 arg `thenNat` \ (ChildCode64 code vr_lo) ->
3456 let r_lo = VirtualRegI vr_lo
3457 r_hi = getHiVRegFromLo r_lo
3458 in return (code, [r_hi, r_lo])
3460 = getRegister arg `thenNat` \ register ->
3461 getNewRegNat (registerRep register) `thenNat` \ tmp ->
3462 let code = registerCode register tmp
3463 src = registerName register tmp
3464 pk = registerRep register
3466 -- the value is in src. Get it into 1 or 2 int vregs.
3469 getNewRegNat WordRep `thenNat` \ v1 ->
3470 getNewRegNat WordRep `thenNat` \ v2 ->
3473 FMOV DF src f0 `snocOL`
3474 ST F f0 (spRel 16) `snocOL`
3475 LD W (spRel 16) v1 `snocOL`
3476 ST F (fPair f0) (spRel 16) `snocOL`
3482 getNewRegNat WordRep `thenNat` \ v1 ->
3485 ST F src (spRel 16) `snocOL`
3491 getNewRegNat WordRep `thenNat` \ v1 ->
3493 code `snocOL` OR False g0 (RIReg src) v1
3497 #endif /* sparc_TARGET_ARCH */
3499 #if powerpc_TARGET_ARCH
3501 #if darwin_TARGET_OS || linux_TARGET_OS
3503 The PowerPC calling convention for Darwin/Mac OS X
3504 is described in Apple's document
3505 "Inside Mac OS X - Mach-O Runtime Architecture".
3507 PowerPC Linux uses the System V Release 4 Calling Convention
3508 for PowerPC. It is described in the
3509 "System V Application Binary Interface PowerPC Processor Supplement".
3511 Both conventions are similar:
3512 Parameters may be passed in general-purpose registers starting at r3, in
3513 floating point registers starting at f1, or on the stack.
3515 But there are substantial differences:
3516 * The number of registers used for parameter passing and the exact set of
3517 nonvolatile registers differs (see MachRegs.lhs).
3518 * On Darwin, stack space is always reserved for parameters, even if they are
3519 passed in registers. The called routine may choose to save parameters from
3520 registers to the corresponding space on the stack.
3521 * On Darwin, a corresponding amount of GPRs is skipped when a floating point
3522 parameter is passed in an FPR.
3523 * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
3524 starting with an odd-numbered GPR. It may skip a GPR to achieve this.
3525 Darwin just treats an I64 like two separate I32s (high word first).
3526 * I64 and F64 arguments are 8-byte aligned on the stack for SysV, but only
3527 4-byte aligned like everything else on Darwin.
3528 * The SysV spec claims that F32 is represented as F64 on the stack. GCC on
3529 PowerPC Linux does not agree, so neither do we.
3531 According to both conventions, The parameter area should be part of the
3532 caller's stack frame, allocated in the caller's prologue code (large enough
3533 to hold the parameter lists for all called routines). The NCG already
3534 uses the stack for register spilling, leaving 64 bytes free at the top.
3535 If we need a larger parameter area than that, we just allocate a new stack
3536 frame just before ccalling.
3539 genCCall target dest_regs argsAndHints vols
3540 = ASSERT (not $ any (`elem` [I8,I16]) argReps)
3541 -- we rely on argument promotion in the codeGen
3543 (finalStack,passArgumentsCode,usedRegs) <- passArguments
3545 allArgRegs allFPArgRegs
3549 (labelOrExpr, reduceToF32) <- case target of
3550 CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
3551 CmmForeignCall expr conv -> return (Right expr, False)
3552 CmmPrim mop -> outOfLineFloatOp mop
3554 let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
3555 codeAfter = move_sp_up finalStack `appOL` moveResult reduceToF32
3560 `snocOL` BL lbl usedRegs
3563 (dynReg, dynCode) <- getSomeReg dyn
3565 `snocOL` MTCTR dynReg
3567 `snocOL` BCTRL usedRegs
3570 #if darwin_TARGET_OS
3571 initialStackOffset = 24
3572 -- size of linkage area + size of arguments, in bytes
3573 stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
3574 map machRepByteWidth argReps
3575 #elif linux_TARGET_OS
3576 initialStackOffset = 8
3577 stackDelta finalStack = roundTo 16 finalStack
3579 args = map fst argsAndHints
3580 argReps = map cmmExprRep args
3582 roundTo a x | x `mod` a == 0 = x
3583 | otherwise = x + a - (x `mod` a)
3585 move_sp_down finalStack
3587 toOL [STU I32 sp (AddrRegImm sp (ImmInt (-delta))),
3590 where delta = stackDelta finalStack
3591 move_sp_up finalStack
3593 toOL [ADD sp sp (RIImm (ImmInt delta)),
3596 where delta = stackDelta finalStack
3599 passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
3600 passArguments ((arg,I64):args) gprs fprs stackOffset
3601 accumCode accumUsed =
3603 ChildCode64 code vr_lo <- iselExpr64 arg
3604 let vr_hi = getHiVRegFromLo vr_lo
3606 #if darwin_TARGET_OS
3611 (accumCode `appOL` code
3612 `snocOL` storeWord vr_hi gprs stackOffset
3613 `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
3614 ((take 2 gprs) ++ accumUsed)
3616 storeWord vr (gpr:_) offset = MR gpr vr
3617 storeWord vr [] offset = ST I32 vr (AddrRegImm sp (ImmInt offset))
3619 #elif linux_TARGET_OS
3620 let stackOffset' = roundTo 8 stackOffset
3621 stackCode = accumCode `appOL` code
3622 `snocOL` ST I32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
3623 `snocOL` ST I32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
3624 regCode hireg loreg =
3625 accumCode `appOL` code
3626 `snocOL` MR hireg vr_hi
3627 `snocOL` MR loreg vr_lo
3630 hireg : loreg : regs | even (length gprs) ->
3631 passArguments args regs fprs stackOffset
3632 (regCode hireg loreg) (hireg : loreg : accumUsed)
3633 _skipped : hireg : loreg : regs ->
3634 passArguments args regs fprs stackOffset
3635 (regCode hireg loreg) (hireg : loreg : accumUsed)
3636 _ -> -- only one or no regs left
3637 passArguments args [] fprs (stackOffset'+8)
3641 passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
3642 | reg : _ <- regs = do
3643 register <- getRegister arg
3644 let code = case register of
3645 Fixed _ freg fcode -> fcode `snocOL` MR reg freg
3646 Any _ acode -> acode reg
3650 #if darwin_TARGET_OS
3651 -- The Darwin ABI requires that we reserve stack slots for register parameters
3652 (stackOffset + stackBytes)
3653 #elif linux_TARGET_OS
3654 -- ... the SysV ABI doesn't.
3657 (accumCode `appOL` code)
3660 (vr, code) <- getSomeReg arg
3664 (stackOffset' + stackBytes)
3665 (accumCode `appOL` code `snocOL` ST rep vr stackSlot)
3668 #if darwin_TARGET_OS
3669 -- stackOffset is at least 4-byte aligned
3670 -- The Darwin ABI is happy with that.
3671 stackOffset' = stackOffset
3673 -- ... the SysV ABI requires 8-byte alignment for doubles.
3674 stackOffset' | rep == F64 = roundTo 8 stackOffset
3675 | otherwise = stackOffset
3677 stackSlot = AddrRegImm sp (ImmInt stackOffset')
3678 (nGprs, nFprs, stackBytes, regs) = case rep of
3679 I32 -> (1, 0, 4, gprs)
3680 #if darwin_TARGET_OS
3681 -- The Darwin ABI requires that we skip a corresponding number of GPRs when
3683 F32 -> (1, 1, 4, fprs)
3684 F64 -> (2, 1, 8, fprs)
3685 #elif linux_TARGET_OS
3686 -- ... the SysV ABI doesn't.
3687 F32 -> (0, 1, 4, fprs)
3688 F64 -> (0, 1, 8, fprs)
3691 moveResult reduceToF32 =
3695 | reduceToF32 && rep == F32 -> unitOL (FRSP r_dest f1)
3696 | rep == F32 || rep == F64 -> unitOL (MR r_dest f1)
3697 | rep == I64 -> toOL [MR (getHiVRegFromLo r_dest) r3,
3699 | otherwise -> unitOL (MR r_dest r3)
3700 where rep = cmmRegRep dest
3701 r_dest = getRegisterReg dest
3703 outOfLineFloatOp mop =
3705 mopExpr <- cmmMakeDynamicReference addImportNat True $
3706 mkForeignLabel functionName Nothing True
3707 let mopLabelOrExpr = case mopExpr of
3708 CmmLit (CmmLabel lbl) -> Left lbl
3710 return (mopLabelOrExpr, reduce)
3712 (functionName, reduce) = case mop of
3713 MO_F32_Exp -> (FSLIT("exp"), True)
3714 MO_F32_Log -> (FSLIT("log"), True)
3715 MO_F32_Sqrt -> (FSLIT("sqrt"), True)
3717 MO_F32_Sin -> (FSLIT("sin"), True)
3718 MO_F32_Cos -> (FSLIT("cos"), True)
3719 MO_F32_Tan -> (FSLIT("tan"), True)
3721 MO_F32_Asin -> (FSLIT("asin"), True)
3722 MO_F32_Acos -> (FSLIT("acos"), True)
3723 MO_F32_Atan -> (FSLIT("atan"), True)
3725 MO_F32_Sinh -> (FSLIT("sinh"), True)
3726 MO_F32_Cosh -> (FSLIT("cosh"), True)
3727 MO_F32_Tanh -> (FSLIT("tanh"), True)
3728 MO_F32_Pwr -> (FSLIT("pow"), True)
3730 MO_F64_Exp -> (FSLIT("exp"), False)
3731 MO_F64_Log -> (FSLIT("log"), False)
3732 MO_F64_Sqrt -> (FSLIT("sqrt"), False)
3734 MO_F64_Sin -> (FSLIT("sin"), False)
3735 MO_F64_Cos -> (FSLIT("cos"), False)
3736 MO_F64_Tan -> (FSLIT("tan"), False)
3738 MO_F64_Asin -> (FSLIT("asin"), False)
3739 MO_F64_Acos -> (FSLIT("acos"), False)
3740 MO_F64_Atan -> (FSLIT("atan"), False)
3742 MO_F64_Sinh -> (FSLIT("sinh"), False)
3743 MO_F64_Cosh -> (FSLIT("cosh"), False)
3744 MO_F64_Tanh -> (FSLIT("tanh"), False)
3745 MO_F64_Pwr -> (FSLIT("pow"), False)
3746 other -> pprPanic "genCCall(ppc): unknown callish op"
3747 (pprCallishMachOp other)
3749 #endif /* darwin_TARGET_OS || linux_TARGET_OS */
3751 #endif /* powerpc_TARGET_ARCH */
3754 -- -----------------------------------------------------------------------------
3755 -- Generating a table-branch
3757 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
3759 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3760 genSwitch expr ids = do
3761 (reg,e_code) <- getSomeReg expr
3762 lbl <- getNewLabelNat
3764 jumpTable = map jumpTableEntry ids
3765 op = OpAddr (AddrBaseIndex Nothing (Just (reg,wORD_SIZE)) (ImmCLbl lbl))
3766 code = e_code `appOL` toOL [
3767 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3768 JMP_TBL op [ id | Just id <- ids ]
3772 #elif powerpc_TARGET_ARCH
3776 (reg,e_code) <- getSomeReg expr
3777 tmp <- getNewRegNat I32
3778 lbl <- getNewLabelNat
3779 dynRef <- cmmMakeDynamicReference addImportNat False lbl
3780 (tableReg,t_code) <- getSomeReg $ dynRef
3782 jumpTable = map jumpTableEntryRel ids
3784 jumpTableEntryRel Nothing
3785 = CmmStaticLit (CmmInt 0 wordRep)
3786 jumpTableEntryRel (Just (BlockId id))
3787 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3788 where blockLabel = mkAsmTempLabel id
3790 code = e_code `appOL` t_code `appOL` toOL [
3791 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3792 SLW tmp reg (RIImm (ImmInt 2)),
3793 LD I32 tmp (AddrRegReg tableReg tmp),
3794 ADD tmp tmp (RIReg tableReg),
3796 BCTR [ id | Just id <- ids ]
3801 (reg,e_code) <- getSomeReg expr
3802 tmp <- getNewRegNat I32
3803 lbl <- getNewLabelNat
3805 jumpTable = map jumpTableEntry ids
3807 code = e_code `appOL` toOL [
3808 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3809 SLW tmp reg (RIImm (ImmInt 2)),
3810 ADDIS tmp tmp (HA (ImmCLbl lbl)),
3811 LD I32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
3813 BCTR [ id | Just id <- ids ]
3817 genSwitch expr ids = panic "ToDo: genSwitch"
3820 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordRep)
3821 jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
3822 where blockLabel = mkAsmTempLabel id
3824 -- -----------------------------------------------------------------------------
3826 -- -----------------------------------------------------------------------------
3829 -- -----------------------------------------------------------------------------
3830 -- 'condIntReg' and 'condFltReg': condition codes into registers
3832 -- Turn those condition codes into integers now (when they appear on
3833 -- the right hand side of an assignment).
3835 -- (If applicable) Do not fill the delay slots here; you will confuse the
3836 -- register allocator.
3838 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
3840 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3842 #if alpha_TARGET_ARCH
3843 condIntReg = panic "MachCode.condIntReg (not on Alpha)"
3844 condFltReg = panic "MachCode.condFltReg (not on Alpha)"
3845 #endif /* alpha_TARGET_ARCH */
3847 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3849 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3851 condIntReg cond x y = do
3852 CondCode _ cond cond_code <- condIntCode cond x y
3853 tmp <- getNewRegNat I8
3855 code dst = cond_code `appOL` toOL [
3856 SETCC cond (OpReg tmp),
3857 MOV I32 (OpReg tmp) (OpReg dst),
3858 AND I32 (OpImm (ImmInt 1)) (OpReg dst)
3860 -- NB. (1) Tha AND is needed here because the x86 only
3861 -- sets the low byte in the SETCC instruction.
3862 -- NB. (2) The extra temporary register is a hack to
3863 -- work around the fact that the setcc instructions only
3864 -- accept byte registers. dst might not be a byte-able reg,
3865 -- but currently all free registers are byte-able, so we're
3866 -- guaranteed that a new temporary is byte-able.
3868 return (Any I32 code)
3871 condFltReg cond x y = do
3872 lbl1 <- getBlockIdNat
3873 lbl2 <- getBlockIdNat
3874 CondCode _ cond cond_code <- condFltCode cond x y
3876 code dst = cond_code `appOL` toOL [
3878 MOV I32 (OpImm (ImmInt 0)) (OpReg dst),
3881 MOV I32 (OpImm (ImmInt 1)) (OpReg dst),
3884 -- SIGH, have to split up this block somehow...
3886 return (Any I32 code)
3888 #endif /* i386_TARGET_ARCH */
3890 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3892 #if sparc_TARGET_ARCH
3894 condIntReg EQQ x (StInt 0)
3895 = getRegister x `thenNat` \ register ->
3896 getNewRegNat IntRep `thenNat` \ tmp ->
3898 code = registerCode register tmp
3899 src = registerName register tmp
3900 code__2 dst = code `appOL` toOL [
3901 SUB False True g0 (RIReg src) g0,
3902 SUB True False g0 (RIImm (ImmInt (-1))) dst]
3904 return (Any IntRep code__2)
3907 = getRegister x `thenNat` \ register1 ->
3908 getRegister y `thenNat` \ register2 ->
3909 getNewRegNat IntRep `thenNat` \ tmp1 ->
3910 getNewRegNat IntRep `thenNat` \ tmp2 ->
3912 code1 = registerCode register1 tmp1
3913 src1 = registerName register1 tmp1
3914 code2 = registerCode register2 tmp2
3915 src2 = registerName register2 tmp2
3916 code__2 dst = code1 `appOL` code2 `appOL` toOL [
3917 XOR False src1 (RIReg src2) dst,
3918 SUB False True g0 (RIReg dst) g0,
3919 SUB True False g0 (RIImm (ImmInt (-1))) dst]
3921 return (Any IntRep code__2)
3923 condIntReg NE x (StInt 0)
3924 = getRegister x `thenNat` \ register ->
3925 getNewRegNat IntRep `thenNat` \ tmp ->
3927 code = registerCode register tmp
3928 src = registerName register tmp
3929 code__2 dst = code `appOL` toOL [
3930 SUB False True g0 (RIReg src) g0,
3931 ADD True False g0 (RIImm (ImmInt 0)) dst]
3933 return (Any IntRep code__2)
3936 = getRegister x `thenNat` \ register1 ->
3937 getRegister y `thenNat` \ register2 ->
3938 getNewRegNat IntRep `thenNat` \ tmp1 ->
3939 getNewRegNat IntRep `thenNat` \ tmp2 ->
3941 code1 = registerCode register1 tmp1
3942 src1 = registerName register1 tmp1
3943 code2 = registerCode register2 tmp2
3944 src2 = registerName register2 tmp2
3945 code__2 dst = code1 `appOL` code2 `appOL` toOL [
3946 XOR False src1 (RIReg src2) dst,
3947 SUB False True g0 (RIReg dst) g0,
3948 ADD True False g0 (RIImm (ImmInt 0)) dst]
3950 return (Any IntRep code__2)
3953 = getBlockIdNat `thenNat` \ lbl1 ->
3954 getBlockIdNat `thenNat` \ lbl2 ->
3955 condIntCode cond x y `thenNat` \ condition ->
3957 code = condCode condition
3958 cond = condName condition
3959 code__2 dst = code `appOL` toOL [
3960 BI cond False (ImmCLbl lbl1), NOP,
3961 OR False g0 (RIImm (ImmInt 0)) dst,
3962 BI ALWAYS False (ImmCLbl lbl2), NOP,
3964 OR False g0 (RIImm (ImmInt 1)) dst,
3967 return (Any IntRep code__2)
3970 = getBlockIdNat `thenNat` \ lbl1 ->
3971 getBlockIdNat `thenNat` \ lbl2 ->
3972 condFltCode cond x y `thenNat` \ condition ->
3974 code = condCode condition
3975 cond = condName condition
3976 code__2 dst = code `appOL` toOL [
3978 BF cond False (ImmCLbl lbl1), NOP,
3979 OR False g0 (RIImm (ImmInt 0)) dst,
3980 BI ALWAYS False (ImmCLbl lbl2), NOP,
3982 OR False g0 (RIImm (ImmInt 1)) dst,
3985 return (Any IntRep code__2)
3987 #endif /* sparc_TARGET_ARCH */
3989 #if powerpc_TARGET_ARCH
3990 condReg getCond = do
3991 lbl1 <- getBlockIdNat
3992 lbl2 <- getBlockIdNat
3993 CondCode _ cond cond_code <- getCond
3995 {- code dst = cond_code `appOL` toOL [
4004 code dst = cond_code
4008 RLWINM dst dst (bit + 1) 31 31
4011 negate_code | do_negate = unitOL (CRNOR bit bit bit)
4014 (bit, do_negate) = case cond of
4028 return (Any I32 code)
4030 condIntReg cond x y = condReg (condIntCode cond x y)
4031 condFltReg cond x y = condReg (condFltCode cond x y)
4032 #endif /* powerpc_TARGET_ARCH */
4035 -- -----------------------------------------------------------------------------
4036 -- 'trivial*Code': deal with trivial instructions
4038 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
4039 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
4040 -- Only look for constants on the right hand side, because that's
4041 -- where the generic optimizer will have put them.
4043 -- Similarly, for unary instructions, we don't have to worry about
4044 -- matching an StInt as the argument, because genericOpt will already
4045 -- have handled the constant-folding.
4049 -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
4050 ,IF_ARCH_i386 ((Operand -> Operand -> Instr)
4051 -> Maybe (Operand -> Operand -> Instr)
4052 ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr)
4053 -> Maybe (Operand -> Operand -> Instr)
4054 ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
4055 ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
4057 -> CmmExpr -> CmmExpr -- the two arguments
4060 #ifndef powerpc_TARGET_ARCH
4063 -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
4064 ,IF_ARCH_sparc((Size -> Reg -> Reg -> Reg -> Instr)
4065 ,IF_ARCH_i386 ((MachRep -> Reg -> Reg -> Reg -> Instr)
4066 ,IF_ARCH_x86_64 ((MachRep -> Operand -> Operand -> Instr)
4068 -> CmmExpr -> CmmExpr -- the two arguments
4074 -> IF_ARCH_alpha((RI -> Reg -> Instr)
4075 ,IF_ARCH_i386 ((Operand -> Instr)
4076 ,IF_ARCH_x86_64 ((Operand -> Instr)
4077 ,IF_ARCH_sparc((RI -> Reg -> Instr)
4078 ,IF_ARCH_powerpc((Reg -> Reg -> Instr)
4080 -> CmmExpr -- the one argument
4083 #ifndef powerpc_TARGET_ARCH
4086 -> IF_ARCH_alpha((Reg -> Reg -> Instr)
4087 ,IF_ARCH_i386 ((Reg -> Reg -> Instr)
4088 ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr)
4089 ,IF_ARCH_sparc((Reg -> Reg -> Instr)
4091 -> CmmExpr -- the one argument
4095 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4097 #if alpha_TARGET_ARCH
4099 trivialCode instr x (StInt y)
4101 = getRegister x `thenNat` \ register ->
4102 getNewRegNat IntRep `thenNat` \ tmp ->
4104 code = registerCode register tmp
4105 src1 = registerName register tmp
4106 src2 = ImmInt (fromInteger y)
4107 code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst)
4109 return (Any IntRep code__2)
4111 trivialCode instr x y
4112 = getRegister x `thenNat` \ register1 ->
4113 getRegister y `thenNat` \ register2 ->
4114 getNewRegNat IntRep `thenNat` \ tmp1 ->
4115 getNewRegNat IntRep `thenNat` \ tmp2 ->
4117 code1 = registerCode register1 tmp1 []
4118 src1 = registerName register1 tmp1
4119 code2 = registerCode register2 tmp2 []
4120 src2 = registerName register2 tmp2
4121 code__2 dst = asmSeqThen [code1, code2] .
4122 mkSeqInstr (instr src1 (RIReg src2) dst)
4124 return (Any IntRep code__2)
4127 trivialUCode instr x
4128 = getRegister x `thenNat` \ register ->
4129 getNewRegNat IntRep `thenNat` \ tmp ->
4131 code = registerCode register tmp
4132 src = registerName register tmp
4133 code__2 dst = code . mkSeqInstr (instr (RIReg src) dst)
4135 return (Any IntRep code__2)
4138 trivialFCode _ instr x y
4139 = getRegister x `thenNat` \ register1 ->
4140 getRegister y `thenNat` \ register2 ->
4141 getNewRegNat F64 `thenNat` \ tmp1 ->
4142 getNewRegNat F64 `thenNat` \ tmp2 ->
4144 code1 = registerCode register1 tmp1
4145 src1 = registerName register1 tmp1
4147 code2 = registerCode register2 tmp2
4148 src2 = registerName register2 tmp2
4150 code__2 dst = asmSeqThen [code1 [], code2 []] .
4151 mkSeqInstr (instr src1 src2 dst)
4153 return (Any F64 code__2)
4155 trivialUFCode _ instr x
4156 = getRegister x `thenNat` \ register ->
4157 getNewRegNat F64 `thenNat` \ tmp ->
4159 code = registerCode register tmp
4160 src = registerName register tmp
4161 code__2 dst = code . mkSeqInstr (instr src dst)
4163 return (Any F64 code__2)
4165 #endif /* alpha_TARGET_ARCH */
4167 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4169 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4172 The Rules of the Game are:
4174 * You cannot assume anything about the destination register dst;
4175 it may be anything, including a fixed reg.
4177 * You may compute an operand into a fixed reg, but you may not
4178 subsequently change the contents of that fixed reg. If you
4179 want to do so, first copy the value either to a temporary
4180 or into dst. You are free to modify dst even if it happens
4181 to be a fixed reg -- that's not your problem.
4183 * You cannot assume that a fixed reg will stay live over an
4184 arbitrary computation. The same applies to the dst reg.
4186 * Temporary regs obtained from getNewRegNat are distinct from
4187 each other and from all other regs, and stay live over
4188 arbitrary computations.
4190 --------------------
4192 SDM's version of The Rules:
4194 * If getRegister returns Any, that means it can generate correct
4195 code which places the result in any register, period. Even if that
4196 register happens to be read during the computation.
4198 Corollary #1: this means that if you are generating code for an
4199 operation with two arbitrary operands, you cannot assign the result
4200 of the first operand into the destination register before computing
4201 the second operand. The second operand might require the old value
4202 of the destination register.
4204 Corollary #2: A function might be able to generate more efficient
4205 code if it knows the destination register is a new temporary (and
4206 therefore not read by any of the sub-computations).
4208 * If getRegister returns Any, then the code it generates may modify only:
4209 (a) fresh temporaries
4210 (b) the destination register
4211 (c) known registers (eg. %ecx is used by shifts)
4212 In particular, it may *not* modify global registers, unless the global
4213 register happens to be the destination register.
4216 trivialCode rep instr (Just revinstr) (CmmLit lit_a) b
4217 | not (is64BitLit lit_a) = do
4218 b_code <- getAnyReg b
4221 = b_code dst `snocOL`
4222 revinstr (OpImm (litToImm lit_a)) (OpReg dst)
4224 return (Any rep code)
4226 trivialCode rep instr maybe_revinstr a b = do
4227 (b_op, b_code) <- getNonClobberedOperand b
4228 a_code <- getAnyReg a
4229 tmp <- getNewRegNat rep
4231 -- We want the value of b to stay alive across the computation of a.
4232 -- But, we want to calculate a straight into the destination register,
4233 -- because the instruction only has two operands (dst := dst `op` src).
4234 -- The troublesome case is when the result of b is in the same register
4235 -- as the destination reg. In this case, we have to save b in a
4236 -- new temporary across the computation of a.
4238 | dst `clashesWith` b_op =
4240 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
4242 instr (OpReg tmp) (OpReg dst)
4246 instr b_op (OpReg dst)
4248 return (Any rep code)
4250 reg `clashesWith` OpReg reg2 = reg == reg2
4251 reg `clashesWith` OpAddr amode = any (==reg) (addrModeRegs amode)
4252 reg `clashesWith` _ = False
4256 trivialUCode rep instr x = do
4257 x_code <- getAnyReg x
4263 return (Any rep code)
4267 #if i386_TARGET_ARCH
4269 trivialFCode pk instr x y = do
4270 (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
4271 (y_reg, y_code) <- getSomeReg y
4276 instr pk x_reg y_reg dst
4278 return (Any pk code)
4282 #if x86_64_TARGET_ARCH
4284 -- We use the 2-operand SSE2 floating pt instructions. ToDo: improve on
4285 -- this by using some of the special cases in trivialCode above.
4286 trivialFCode pk instr x y = do
4287 (y_reg, y_code) <- getNonClobberedReg y -- these work for float regs too
4288 x_code <- getAnyReg x
4293 instr pk (IF_ARCH_x86_64(OpReg,) y_reg)
4294 (IF_ARCH_x86_64(OpReg,) dst)
4296 return (Any pk code)
4302 trivialUFCode rep instr x = do
4303 (x_reg, x_code) <- getSomeReg x
4309 return (Any rep code)
4311 #endif /* i386_TARGET_ARCH */
4313 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4315 #if sparc_TARGET_ARCH
4317 trivialCode instr x (StInt y)
4319 = getRegister x `thenNat` \ register ->
4320 getNewRegNat IntRep `thenNat` \ tmp ->
4322 code = registerCode register tmp
4323 src1 = registerName register tmp
4324 src2 = ImmInt (fromInteger y)
4325 code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
4327 return (Any IntRep code__2)
4329 trivialCode instr x y
4330 = getRegister x `thenNat` \ register1 ->
4331 getRegister y `thenNat` \ register2 ->
4332 getNewRegNat IntRep `thenNat` \ tmp1 ->
4333 getNewRegNat IntRep `thenNat` \ tmp2 ->
4335 code1 = registerCode register1 tmp1
4336 src1 = registerName register1 tmp1
4337 code2 = registerCode register2 tmp2
4338 src2 = registerName register2 tmp2
4339 code__2 dst = code1 `appOL` code2 `snocOL`
4340 instr src1 (RIReg src2) dst
4342 return (Any IntRep code__2)
4345 trivialFCode pk instr x y
4346 = getRegister x `thenNat` \ register1 ->
4347 getRegister y `thenNat` \ register2 ->
4348 getNewRegNat (registerRep register1)
4350 getNewRegNat (registerRep register2)
4352 getNewRegNat F64 `thenNat` \ tmp ->
4354 promote x = FxTOy F DF x tmp
4356 pk1 = registerRep register1
4357 code1 = registerCode register1 tmp1
4358 src1 = registerName register1 tmp1
4360 pk2 = registerRep register2
4361 code2 = registerCode register2 tmp2
4362 src2 = registerName register2 tmp2
4366 code1 `appOL` code2 `snocOL`
4367 instr (primRepToSize pk) src1 src2 dst
4368 else if pk1 == F32 then
4369 code1 `snocOL` promote src1 `appOL` code2 `snocOL`
4370 instr DF tmp src2 dst
4372 code1 `appOL` code2 `snocOL` promote src2 `snocOL`
4373 instr DF src1 tmp dst
4375 return (Any (if pk1 == pk2 then pk1 else F64) code__2)
4378 trivialUCode instr x
4379 = getRegister x `thenNat` \ register ->
4380 getNewRegNat IntRep `thenNat` \ tmp ->
4382 code = registerCode register tmp
4383 src = registerName register tmp
4384 code__2 dst = code `snocOL` instr (RIReg src) dst
4386 return (Any IntRep code__2)
4389 trivialUFCode pk instr x
4390 = getRegister x `thenNat` \ register ->
4391 getNewRegNat pk `thenNat` \ tmp ->
4393 code = registerCode register tmp
4394 src = registerName register tmp
4395 code__2 dst = code `snocOL` instr src dst
4397 return (Any pk code__2)
4399 #endif /* sparc_TARGET_ARCH */
4401 #if powerpc_TARGET_ARCH
4404 Wolfgang's PowerPC version of The Rules:
4406 A slightly modified version of The Rules to take advantage of the fact
4407 that PowerPC instructions work on all registers and don't implicitly
4408 clobber any fixed registers.
4410 * The only expression for which getRegister returns Fixed is (CmmReg reg).
4412 * If getRegister returns Any, then the code it generates may modify only:
4413 (a) fresh temporaries
4414 (b) the destination register
4415 It may *not* modify global registers, unless the global
4416 register happens to be the destination register.
4417 It may not clobber any other registers. In fact, only ccalls clobber any
4419 Also, it may not modify the counter register (used by genCCall).
4421 Corollary: If a getRegister for a subexpression returns Fixed, you need
4422 not move it to a fresh temporary before evaluating the next subexpression.
4423 The Fixed register won't be modified.
4424 Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
4426 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
4427 the value of the destination register.
4430 trivialCode rep signed instr x (CmmLit (CmmInt y _))
4431 | Just imm <- makeImmediate rep signed y
4433 (src1, code1) <- getSomeReg x
4434 let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
4435 return (Any rep code)
4437 trivialCode rep signed instr x y = do
4438 (src1, code1) <- getSomeReg x
4439 (src2, code2) <- getSomeReg y
4440 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
4441 return (Any rep code)
4443 trivialCodeNoImm :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4444 -> CmmExpr -> CmmExpr -> NatM Register
4445 trivialCodeNoImm rep instr x y = do
4446 (src1, code1) <- getSomeReg x
4447 (src2, code2) <- getSomeReg y
4448 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
4449 return (Any rep code)
4451 trivialUCode rep instr x = do
4452 (src, code) <- getSomeReg x
4453 let code' dst = code `snocOL` instr dst src
4454 return (Any rep code')
4456 -- There is no "remainder" instruction on the PPC, so we have to do
4458 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
4460 remainderCode :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4461 -> CmmExpr -> CmmExpr -> NatM Register
4462 remainderCode rep div x y = do
4463 (src1, code1) <- getSomeReg x
4464 (src2, code2) <- getSomeReg y
4465 let code dst = code1 `appOL` code2 `appOL` toOL [
4467 MULLW dst dst (RIReg src2),
4470 return (Any rep code)
4472 #endif /* powerpc_TARGET_ARCH */
4475 -- -----------------------------------------------------------------------------
4476 -- Coercing to/from integer/floating-point...
4478 -- @coerce(Int2FP|FP2Int)@ are more complicated integer/float
4479 -- conversions. We have to store temporaries in memory to move
4480 -- between the integer and the floating point register sets.
4482 -- @coerceDbl2Flt@ and @coerceFlt2Dbl@ are done this way because we
4483 -- pretend, on sparc at least, that double and float regs are seperate
4484 -- kinds, so the value has to be computed into one kind before being
4485 -- explicitly "converted" to live in the other kind.
4487 coerceInt2FP :: MachRep -> MachRep -> CmmExpr -> NatM Register
4488 coerceFP2Int :: MachRep -> MachRep -> CmmExpr -> NatM Register
4490 #if sparc_TARGET_ARCH
4491 coerceDbl2Flt :: CmmExpr -> NatM Register
4492 coerceFlt2Dbl :: CmmExpr -> NatM Register
4495 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4497 #if alpha_TARGET_ARCH
4500 = getRegister x `thenNat` \ register ->
4501 getNewRegNat IntRep `thenNat` \ reg ->
4503 code = registerCode register reg
4504 src = registerName register reg
4506 code__2 dst = code . mkSeqInstrs [
4508 LD TF dst (spRel 0),
4511 return (Any F64 code__2)
4515 = getRegister x `thenNat` \ register ->
4516 getNewRegNat F64 `thenNat` \ tmp ->
4518 code = registerCode register tmp
4519 src = registerName register tmp
4521 code__2 dst = code . mkSeqInstrs [
4523 ST TF tmp (spRel 0),
4526 return (Any IntRep code__2)
4528 #endif /* alpha_TARGET_ARCH */
4530 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4532 #if i386_TARGET_ARCH
4534 coerceInt2FP from to x = do
4535 (x_reg, x_code) <- getSomeReg x
4537 opc = case to of F32 -> GITOF; F64 -> GITOD
4538 code dst = x_code `snocOL` opc x_reg dst
4539 -- ToDo: works for non-I32 reps?
4541 return (Any to code)
4545 coerceFP2Int from to x = do
4546 (x_reg, x_code) <- getSomeReg x
4548 opc = case from of F32 -> GFTOI; F64 -> GDTOI
4549 code dst = x_code `snocOL` opc x_reg dst
4550 -- ToDo: works for non-I32 reps?
4552 return (Any to code)
4554 #endif /* i386_TARGET_ARCH */
4556 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4558 #if x86_64_TARGET_ARCH
4560 coerceFP2Int from to x = do
4561 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
4563 opc = case from of F32 -> CVTSS2SI; F64 -> CVTSD2SI
4564 code dst = x_code `snocOL` opc x_op dst
4566 return (Any to code) -- works even if the destination rep is <I32
4568 coerceInt2FP from to x = do
4569 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
4571 opc = case to of F32 -> CVTSI2SS; F64 -> CVTSI2SD
4572 code dst = x_code `snocOL` opc x_op dst
4574 return (Any to code) -- works even if the destination rep is <I32
4576 coerceFP2FP :: MachRep -> CmmExpr -> NatM Register
4577 coerceFP2FP to x = do
4578 (x_reg, x_code) <- getSomeReg x
4580 opc = case to of F32 -> CVTSD2SS; F64 -> CVTSS2SD
4581 code dst = x_code `snocOL` opc x_reg dst
4583 return (Any to code)
4587 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4589 #if sparc_TARGET_ARCH
4592 = getRegister x `thenNat` \ register ->
4593 getNewRegNat IntRep `thenNat` \ reg ->
4595 code = registerCode register reg
4596 src = registerName register reg
4598 code__2 dst = code `appOL` toOL [
4599 ST W src (spRel (-2)),
4600 LD W (spRel (-2)) dst,
4601 FxTOy W (primRepToSize pk) dst dst]
4603 return (Any pk code__2)
4606 coerceFP2Int fprep x
4607 = ASSERT(fprep == F64 || fprep == F32)
4608 getRegister x `thenNat` \ register ->
4609 getNewRegNat fprep `thenNat` \ reg ->
4610 getNewRegNat F32 `thenNat` \ tmp ->
4612 code = registerCode register reg
4613 src = registerName register reg
4614 code__2 dst = code `appOL` toOL [
4615 FxTOy (primRepToSize fprep) W src tmp,
4616 ST W tmp (spRel (-2)),
4617 LD W (spRel (-2)) dst]
4619 return (Any IntRep code__2)
4623 = getRegister x `thenNat` \ register ->
4624 getNewRegNat F64 `thenNat` \ tmp ->
4625 let code = registerCode register tmp
4626 src = registerName register tmp
4629 (\dst -> code `snocOL` FxTOy DF F src dst))
4633 = getRegister x `thenNat` \ register ->
4634 getNewRegNat F32 `thenNat` \ tmp ->
4635 let code = registerCode register tmp
4636 src = registerName register tmp
4639 (\dst -> code `snocOL` FxTOy F DF src dst))
4641 #endif /* sparc_TARGET_ARCH */
4643 #if powerpc_TARGET_ARCH
4644 coerceInt2FP fromRep toRep x = do
4645 (src, code) <- getSomeReg x
4646 lbl <- getNewLabelNat
4647 itmp <- getNewRegNat I32
4648 ftmp <- getNewRegNat F64
4649 dynRef <- cmmMakeDynamicReference addImportNat False lbl
4650 Amode addr addr_code <- getAmode dynRef
4652 code' dst = code `appOL` maybe_exts `appOL` toOL [
4655 CmmStaticLit (CmmInt 0x43300000 I32),
4656 CmmStaticLit (CmmInt 0x80000000 I32)],
4657 XORIS itmp src (ImmInt 0x8000),
4658 ST I32 itmp (spRel 3),
4659 LIS itmp (ImmInt 0x4330),
4660 ST I32 itmp (spRel 2),
4661 LD F64 ftmp (spRel 2)
4662 ] `appOL` addr_code `appOL` toOL [
4664 FSUB F64 dst ftmp dst
4665 ] `appOL` maybe_frsp dst
4667 maybe_exts = case fromRep of
4668 I8 -> unitOL $ EXTS I8 src src
4669 I16 -> unitOL $ EXTS I16 src src
4671 maybe_frsp dst = case toRep of
4672 F32 -> unitOL $ FRSP dst dst
4674 return (Any toRep code')
4676 coerceFP2Int fromRep toRep x = do
4677 -- the reps don't really matter: F*->F64 and I32->I* are no-ops
4678 (src, code) <- getSomeReg x
4679 tmp <- getNewRegNat F64
4681 code' dst = code `appOL` toOL [
4682 -- convert to int in FP reg
4684 -- store value (64bit) from FP to stack
4685 ST F64 tmp (spRel 2),
4686 -- read low word of value (high word is undefined)
4687 LD I32 dst (spRel 3)]
4688 return (Any toRep code')
4689 #endif /* powerpc_TARGET_ARCH */
4692 -- -----------------------------------------------------------------------------
4693 -- eXTRA_STK_ARGS_HERE
4695 -- We (allegedly) put the first six C-call arguments in registers;
4696 -- where do we start putting the rest of them?
4698 -- Moved from MachInstrs (SDM):
4700 #if alpha_TARGET_ARCH || sparc_TARGET_ARCH
4701 eXTRA_STK_ARGS_HERE :: Int
4703 = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???))