1 -----------------------------------------------------------------------------
3 -- Generating machine code (instruction selection)
5 -- (c) The University of Glasgow 1996-2004
7 -----------------------------------------------------------------------------
9 -- This is a big module, but, if you pay attention to
10 -- (a) the sectioning, (b) the type signatures, and
11 -- (c) the #if blah_TARGET_ARCH} things, the
12 -- structure should not be too overwhelming.
14 module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where
16 #include "HsVersions.h"
17 #include "nativeGen/NCG.h"
24 import PositionIndependentCode
25 import RegAllocInfo ( mkBranchInstr )
27 -- Our intermediate code:
28 import PprCmm ( pprExpr )
34 import StaticFlags ( opt_PIC )
35 import ForeignCall ( CCallConv(..) )
40 import FastTypes ( isFastTrue )
41 import Constants ( wORD_SIZE )
44 import Outputable ( assertPanic )
45 import Debug.Trace ( trace )
48 import Control.Monad ( mapAndUnzipM )
49 import Data.Maybe ( fromJust )
54 -- -----------------------------------------------------------------------------
55 -- Top-level of the instruction selector
57 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
58 -- They are really trees of insns to facilitate fast appending, where a
59 -- left-to-right traversal (pre-order?) yields the insns in the correct
62 type InstrBlock = OrdList Instr
64 cmmTopCodeGen :: CmmTop -> NatM [NatCmmTop]
65 cmmTopCodeGen (CmmProc info lab params blocks) = do
66 (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
67 picBaseMb <- getPicBaseMaybeNat
68 let proc = CmmProc info lab params (concat nat_blocks)
69 tops = proc : concat statics
71 Just picBase -> initializePicBase picBase tops
72 Nothing -> return tops
74 cmmTopCodeGen (CmmData sec dat) = do
75 return [CmmData sec dat] -- no translation, we just use CmmStatic
77 basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop])
78 basicBlockCodeGen (BasicBlock id stmts) = do
79 instrs <- stmtsToInstrs stmts
80 -- code generation may introduce new basic block boundaries, which
81 -- are indicated by the NEWBLOCK instruction. We must split up the
82 -- instruction stream into basic blocks again. Also, we extract
85 (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
87 mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
88 = ([], BasicBlock id instrs : blocks, statics)
89 mkBlocks (LDATA sec dat) (instrs,blocks,statics)
90 = (instrs, blocks, CmmData sec dat:statics)
91 mkBlocks instr (instrs,blocks,statics)
92 = (instr:instrs, blocks, statics)
94 return (BasicBlock id top : other_blocks, statics)
96 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
98 = do instrss <- mapM stmtToInstrs stmts
99 return (concatOL instrss)
101 stmtToInstrs :: CmmStmt -> NatM InstrBlock
102 stmtToInstrs stmt = case stmt of
103 CmmNop -> return nilOL
104 CmmComment s -> return (unitOL (COMMENT s))
107 | isFloatingRep kind -> assignReg_FltCode kind reg src
108 #if WORD_SIZE_IN_BITS==32
109 | kind == I64 -> assignReg_I64Code reg src
111 | otherwise -> assignReg_IntCode kind reg src
112 where kind = cmmRegRep reg
115 | isFloatingRep kind -> assignMem_FltCode kind addr src
116 #if WORD_SIZE_IN_BITS==32
117 | kind == I64 -> assignMem_I64Code addr src
119 | otherwise -> assignMem_IntCode kind addr src
120 where kind = cmmExprRep src
122 CmmCall target result_regs args vols
123 -> genCCall target result_regs args vols
125 CmmBranch id -> genBranch id
126 CmmCondBranch arg id -> genCondJump id arg
127 CmmSwitch arg ids -> genSwitch arg ids
128 CmmJump arg params -> genJump arg
130 -- -----------------------------------------------------------------------------
131 -- General things for putting together code sequences
133 -- Expand CmmRegOff. ToDo: should we do it this way around, or convert
134 -- CmmExprs into CmmRegOff?
135 mangleIndexTree :: CmmExpr -> CmmExpr
136 mangleIndexTree (CmmRegOff reg off)
137 = CmmMachOp (MO_Add rep) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) rep)]
138 where rep = cmmRegRep reg
140 -- -----------------------------------------------------------------------------
141 -- Code gen for 64-bit arithmetic on 32-bit platforms
144 Simple support for generating 64-bit code (ie, 64 bit values and 64
145 bit assignments) on 32-bit platforms. Unlike the main code generator
146 we merely shoot for generating working code as simply as possible, and
147 pay little attention to code quality. Specifically, there is no
148 attempt to deal cleverly with the fixed-vs-floating register
149 distinction; all values are generated into (pairs of) floating
150 registers, even if this would mean some redundant reg-reg moves as a
151 result. Only one of the VRegUniques is returned, since it will be
152 of the VRegUniqueLo form, and the upper-half VReg can be determined
153 by applying getHiVRegFromLo to it.
156 data ChildCode64 -- a.k.a "Register64"
159 Reg -- the lower 32-bit temporary which contains the
160 -- result; use getHiVRegFromLo to find the other
161 -- VRegUnique. Rules of this simplified insn
162 -- selection game are therefore that the returned
163 -- Reg may be modified
165 #if WORD_SIZE_IN_BITS==32
166 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
167 assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
170 #ifndef x86_64_TARGET_ARCH
171 iselExpr64 :: CmmExpr -> NatM ChildCode64
174 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
178 assignMem_I64Code addrTree valueTree = do
179 Amode addr addr_code <- getAmode addrTree
180 ChildCode64 vcode rlo <- iselExpr64 valueTree
182 rhi = getHiVRegFromLo rlo
184 -- Little-endian store
185 mov_lo = MOV I32 (OpReg rlo) (OpAddr addr)
186 mov_hi = MOV I32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
188 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
191 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
192 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
194 r_dst_lo = mkVReg u_dst I32
195 r_dst_hi = getHiVRegFromLo r_dst_lo
196 r_src_hi = getHiVRegFromLo r_src_lo
197 mov_lo = MOV I32 (OpReg r_src_lo) (OpReg r_dst_lo)
198 mov_hi = MOV I32 (OpReg r_src_hi) (OpReg r_dst_hi)
201 vcode `snocOL` mov_lo `snocOL` mov_hi
204 assignReg_I64Code lvalue valueTree
205 = panic "assignReg_I64Code(i386): invalid lvalue"
209 iselExpr64 (CmmLit (CmmInt i _)) = do
210 (rlo,rhi) <- getNewRegPairNat I32
212 r = fromIntegral (fromIntegral i :: Word32)
213 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
215 MOV I32 (OpImm (ImmInteger r)) (OpReg rlo),
216 MOV I32 (OpImm (ImmInteger q)) (OpReg rhi)
219 return (ChildCode64 code rlo)
221 iselExpr64 (CmmLoad addrTree I64) = do
222 Amode addr addr_code <- getAmode addrTree
223 (rlo,rhi) <- getNewRegPairNat I32
225 mov_lo = MOV I32 (OpAddr addr) (OpReg rlo)
226 mov_hi = MOV I32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
229 ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
233 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64)))
234 = return (ChildCode64 nilOL (mkVReg vu I32))
236 -- we handle addition, but rather badly
237 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
238 ChildCode64 code1 r1lo <- iselExpr64 e1
239 (rlo,rhi) <- getNewRegPairNat I32
241 r = fromIntegral (fromIntegral i :: Word32)
242 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
243 r1hi = getHiVRegFromLo r1lo
245 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
246 ADD I32 (OpImm (ImmInteger r)) (OpReg rlo),
247 MOV I32 (OpReg r1hi) (OpReg rhi),
248 ADC I32 (OpImm (ImmInteger q)) (OpReg rhi) ]
250 return (ChildCode64 code rlo)
252 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
253 ChildCode64 code1 r1lo <- iselExpr64 e1
254 ChildCode64 code2 r2lo <- iselExpr64 e2
255 (rlo,rhi) <- getNewRegPairNat I32
257 r1hi = getHiVRegFromLo r1lo
258 r2hi = getHiVRegFromLo r2lo
261 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
262 ADD I32 (OpReg r2lo) (OpReg rlo),
263 MOV I32 (OpReg r1hi) (OpReg rhi),
264 ADC I32 (OpReg r2hi) (OpReg rhi) ]
266 return (ChildCode64 code rlo)
269 = pprPanic "iselExpr64(i386)" (ppr expr)
271 #endif /* i386_TARGET_ARCH */
273 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
275 #if sparc_TARGET_ARCH
277 assignMem_I64Code addrTree valueTree = do
278 Amode addr addr_code <- getAmode addrTree
279 ChildCode64 vcode rlo <- iselExpr64 valueTree
280 (src, code) <- getSomeReg addrTree
282 rhi = getHiVRegFromLo rlo
284 mov_hi = ST I32 rhi (AddrRegImm src (ImmInt 0))
285 mov_lo = ST I32 rlo (AddrRegImm src (ImmInt 4))
286 return (vcode `appOL` code `snocOL` mov_hi `snocOL` mov_lo)
288 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
289 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
291 r_dst_lo = mkVReg u_dst pk
292 r_dst_hi = getHiVRegFromLo r_dst_lo
293 r_src_hi = getHiVRegFromLo r_src_lo
294 mov_lo = mkMOV r_src_lo r_dst_lo
295 mov_hi = mkMOV r_src_hi r_dst_hi
296 mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
297 return (vcode `snocOL` mov_hi `snocOL` mov_lo)
298 assignReg_I64Code lvalue valueTree
299 = panic "assignReg_I64Code(sparc): invalid lvalue"
302 -- Don't delete this -- it's very handy for debugging.
304 -- | trace ("iselExpr64: " ++ showSDoc (ppr expr)) False
305 -- = panic "iselExpr64(???)"
307 iselExpr64 (CmmLoad addrTree I64) = do
308 Amode (AddrRegReg r1 r2) addr_code <- getAmode addrTree
309 rlo <- getNewRegNat I32
310 let rhi = getHiVRegFromLo rlo
311 mov_hi = LD I32 (AddrRegImm r1 (ImmInt 0)) rhi
312 mov_lo = LD I32 (AddrRegImm r1 (ImmInt 4)) rlo
314 ChildCode64 (addr_code `snocOL` mov_hi `snocOL` mov_lo)
318 iselExpr64 (CmmReg (CmmLocal (LocalReg uq I64))) = do
319 r_dst_lo <- getNewRegNat I32
320 let r_dst_hi = getHiVRegFromLo r_dst_lo
321 r_src_lo = mkVReg uq I32
322 r_src_hi = getHiVRegFromLo r_src_lo
323 mov_lo = mkMOV r_src_lo r_dst_lo
324 mov_hi = mkMOV r_src_hi r_dst_hi
325 mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
327 ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo
331 = pprPanic "iselExpr64(sparc)" (ppr expr)
333 #endif /* sparc_TARGET_ARCH */
335 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
337 #if powerpc_TARGET_ARCH
339 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
340 getI64Amodes addrTree = do
341 Amode hi_addr addr_code <- getAmode addrTree
342 case addrOffset hi_addr 4 of
343 Just lo_addr -> return (hi_addr, lo_addr, addr_code)
344 Nothing -> do (hi_ptr, code) <- getSomeReg addrTree
345 return (AddrRegImm hi_ptr (ImmInt 0),
346 AddrRegImm hi_ptr (ImmInt 4),
349 assignMem_I64Code addrTree valueTree = do
350 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
351 ChildCode64 vcode rlo <- iselExpr64 valueTree
353 rhi = getHiVRegFromLo rlo
356 mov_hi = ST I32 rhi hi_addr
357 mov_lo = ST I32 rlo lo_addr
359 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
361 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
362 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
364 r_dst_lo = mkVReg u_dst I32
365 r_dst_hi = getHiVRegFromLo r_dst_lo
366 r_src_hi = getHiVRegFromLo r_src_lo
367 mov_lo = MR r_dst_lo r_src_lo
368 mov_hi = MR r_dst_hi r_src_hi
371 vcode `snocOL` mov_lo `snocOL` mov_hi
374 assignReg_I64Code lvalue valueTree
375 = panic "assignReg_I64Code(powerpc): invalid lvalue"
378 -- Don't delete this -- it's very handy for debugging.
380 -- | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
381 -- = panic "iselExpr64(???)"
383 iselExpr64 (CmmLoad addrTree I64) = do
384 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
385 (rlo, rhi) <- getNewRegPairNat I32
386 let mov_hi = LD I32 rhi hi_addr
387 mov_lo = LD I32 rlo lo_addr
388 return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
391 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64)))
392 = return (ChildCode64 nilOL (mkVReg vu I32))
394 iselExpr64 (CmmLit (CmmInt i _)) = do
395 (rlo,rhi) <- getNewRegPairNat I32
397 half0 = fromIntegral (fromIntegral i :: Word16)
398 half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
399 half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
400 half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
403 LIS rlo (ImmInt half1),
404 OR rlo rlo (RIImm $ ImmInt half0),
405 LIS rhi (ImmInt half3),
406 OR rlo rlo (RIImm $ ImmInt half2)
409 return (ChildCode64 code rlo)
411 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
412 ChildCode64 code1 r1lo <- iselExpr64 e1
413 ChildCode64 code2 r2lo <- iselExpr64 e2
414 (rlo,rhi) <- getNewRegPairNat I32
416 r1hi = getHiVRegFromLo r1lo
417 r2hi = getHiVRegFromLo r2lo
420 toOL [ ADDC rlo r1lo r2lo,
423 return (ChildCode64 code rlo)
426 = pprPanic "iselExpr64(powerpc)" (ppr expr)
428 #endif /* powerpc_TARGET_ARCH */
431 -- -----------------------------------------------------------------------------
432 -- The 'Register' type
434 -- 'Register's passed up the tree. If the stix code forces the register
435 -- to live in a pre-decided machine register, it comes out as @Fixed@;
436 -- otherwise, it comes out as @Any@, and the parent can decide which
437 -- register to put it in.
440 = Fixed MachRep Reg InstrBlock
441 | Any MachRep (Reg -> InstrBlock)
443 swizzleRegisterRep :: Register -> MachRep -> Register
444 swizzleRegisterRep (Fixed _ reg code) rep = Fixed rep reg code
445 swizzleRegisterRep (Any _ codefn) rep = Any rep codefn
448 -- -----------------------------------------------------------------------------
449 -- Utils based on getRegister, below
451 -- The dual to getAnyReg: compute an expression into a register, but
452 -- we don't mind which one it is.
453 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
455 r <- getRegister expr
458 tmp <- getNewRegNat rep
459 return (tmp, code tmp)
463 -- -----------------------------------------------------------------------------
464 -- Grab the Reg for a CmmReg
466 getRegisterReg :: CmmReg -> Reg
468 getRegisterReg (CmmLocal (LocalReg u pk))
471 getRegisterReg (CmmGlobal mid)
472 = case get_GlobalReg_reg_or_addr mid of
473 Left (RealReg rrno) -> RealReg rrno
474 _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
475 -- By this stage, the only MagicIds remaining should be the
476 -- ones which map to a real machine register on this
477 -- platform. Hence ...
480 -- -----------------------------------------------------------------------------
481 -- Generate code to get a subtree into a Register
483 -- Don't delete this -- it's very handy for debugging.
485 -- | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False
486 -- = panic "getRegister(???)"
488 getRegister :: CmmExpr -> NatM Register
490 #if !x86_64_TARGET_ARCH
491 -- on x86_64, we have %rip for PicBaseReg, but it's not a full-featured
492 -- register, it can only be used for rip-relative addressing.
493 getRegister (CmmReg (CmmGlobal PicBaseReg))
495 reg <- getPicBaseNat wordRep
496 return (Fixed wordRep reg nilOL)
499 getRegister (CmmReg reg)
500 = return (Fixed (cmmRegRep reg) (getRegisterReg reg) nilOL)
502 getRegister tree@(CmmRegOff _ _)
503 = getRegister (mangleIndexTree tree)
506 #if WORD_SIZE_IN_BITS==32
507 -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
508 -- TO_W_(x), TO_W_(x >> 32)
510 getRegister (CmmMachOp (MO_U_Conv I64 I32)
511 [CmmMachOp (MO_U_Shr I64) [x,CmmLit (CmmInt 32 _)]]) = do
512 ChildCode64 code rlo <- iselExpr64 x
513 return $ Fixed I32 (getHiVRegFromLo rlo) code
515 getRegister (CmmMachOp (MO_S_Conv I64 I32)
516 [CmmMachOp (MO_U_Shr I64) [x,CmmLit (CmmInt 32 _)]]) = do
517 ChildCode64 code rlo <- iselExpr64 x
518 return $ Fixed I32 (getHiVRegFromLo rlo) code
520 getRegister (CmmMachOp (MO_U_Conv I64 I32) [x]) = do
521 ChildCode64 code rlo <- iselExpr64 x
522 return $ Fixed I32 rlo code
524 getRegister (CmmMachOp (MO_S_Conv I64 I32) [x]) = do
525 ChildCode64 code rlo <- iselExpr64 x
526 return $ Fixed I32 rlo code
530 -- end of machine-"independent" bit; here we go on the rest...
532 #if alpha_TARGET_ARCH
534 getRegister (StDouble d)
535 = getBlockIdNat `thenNat` \ lbl ->
536 getNewRegNat PtrRep `thenNat` \ tmp ->
537 let code dst = mkSeqInstrs [
538 LDATA RoDataSegment lbl [
539 DATA TF [ImmLab (rational d)]
541 LDA tmp (AddrImm (ImmCLbl lbl)),
542 LD TF dst (AddrReg tmp)]
544 return (Any F64 code)
546 getRegister (StPrim primop [x]) -- unary PrimOps
548 IntNegOp -> trivialUCode (NEG Q False) x
550 NotOp -> trivialUCode NOT x
552 FloatNegOp -> trivialUFCode FloatRep (FNEG TF) x
553 DoubleNegOp -> trivialUFCode F64 (FNEG TF) x
555 OrdOp -> coerceIntCode IntRep x
558 Float2IntOp -> coerceFP2Int x
559 Int2FloatOp -> coerceInt2FP pr x
560 Double2IntOp -> coerceFP2Int x
561 Int2DoubleOp -> coerceInt2FP pr x
563 Double2FloatOp -> coerceFltCode x
564 Float2DoubleOp -> coerceFltCode x
566 other_op -> getRegister (StCall fn CCallConv F64 [x])
568 fn = case other_op of
569 FloatExpOp -> FSLIT("exp")
570 FloatLogOp -> FSLIT("log")
571 FloatSqrtOp -> FSLIT("sqrt")
572 FloatSinOp -> FSLIT("sin")
573 FloatCosOp -> FSLIT("cos")
574 FloatTanOp -> FSLIT("tan")
575 FloatAsinOp -> FSLIT("asin")
576 FloatAcosOp -> FSLIT("acos")
577 FloatAtanOp -> FSLIT("atan")
578 FloatSinhOp -> FSLIT("sinh")
579 FloatCoshOp -> FSLIT("cosh")
580 FloatTanhOp -> FSLIT("tanh")
581 DoubleExpOp -> FSLIT("exp")
582 DoubleLogOp -> FSLIT("log")
583 DoubleSqrtOp -> FSLIT("sqrt")
584 DoubleSinOp -> FSLIT("sin")
585 DoubleCosOp -> FSLIT("cos")
586 DoubleTanOp -> FSLIT("tan")
587 DoubleAsinOp -> FSLIT("asin")
588 DoubleAcosOp -> FSLIT("acos")
589 DoubleAtanOp -> FSLIT("atan")
590 DoubleSinhOp -> FSLIT("sinh")
591 DoubleCoshOp -> FSLIT("cosh")
592 DoubleTanhOp -> FSLIT("tanh")
594 pr = panic "MachCode.getRegister: no primrep needed for Alpha"
596 getRegister (StPrim primop [x, y]) -- dyadic PrimOps
598 CharGtOp -> trivialCode (CMP LTT) y x
599 CharGeOp -> trivialCode (CMP LE) y x
600 CharEqOp -> trivialCode (CMP EQQ) x y
601 CharNeOp -> int_NE_code x y
602 CharLtOp -> trivialCode (CMP LTT) x y
603 CharLeOp -> trivialCode (CMP LE) x y
605 IntGtOp -> trivialCode (CMP LTT) y x
606 IntGeOp -> trivialCode (CMP LE) y x
607 IntEqOp -> trivialCode (CMP EQQ) x y
608 IntNeOp -> int_NE_code x y
609 IntLtOp -> trivialCode (CMP LTT) x y
610 IntLeOp -> trivialCode (CMP LE) x y
612 WordGtOp -> trivialCode (CMP ULT) y x
613 WordGeOp -> trivialCode (CMP ULE) x y
614 WordEqOp -> trivialCode (CMP EQQ) x y
615 WordNeOp -> int_NE_code x y
616 WordLtOp -> trivialCode (CMP ULT) x y
617 WordLeOp -> trivialCode (CMP ULE) x y
619 AddrGtOp -> trivialCode (CMP ULT) y x
620 AddrGeOp -> trivialCode (CMP ULE) y x
621 AddrEqOp -> trivialCode (CMP EQQ) x y
622 AddrNeOp -> int_NE_code x y
623 AddrLtOp -> trivialCode (CMP ULT) x y
624 AddrLeOp -> trivialCode (CMP ULE) x y
626 FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
627 FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
628 FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
629 FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
630 FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
631 FloatLeOp -> cmpF_code (FCMP TF LE) NE x y
633 DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
634 DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
635 DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
636 DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
637 DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
638 DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y
640 IntAddOp -> trivialCode (ADD Q False) x y
641 IntSubOp -> trivialCode (SUB Q False) x y
642 IntMulOp -> trivialCode (MUL Q False) x y
643 IntQuotOp -> trivialCode (DIV Q False) x y
644 IntRemOp -> trivialCode (REM Q False) x y
646 WordAddOp -> trivialCode (ADD Q False) x y
647 WordSubOp -> trivialCode (SUB Q False) x y
648 WordMulOp -> trivialCode (MUL Q False) x y
649 WordQuotOp -> trivialCode (DIV Q True) x y
650 WordRemOp -> trivialCode (REM Q True) x y
652 FloatAddOp -> trivialFCode FloatRep (FADD TF) x y
653 FloatSubOp -> trivialFCode FloatRep (FSUB TF) x y
654 FloatMulOp -> trivialFCode FloatRep (FMUL TF) x y
655 FloatDivOp -> trivialFCode FloatRep (FDIV TF) x y
657 DoubleAddOp -> trivialFCode F64 (FADD TF) x y
658 DoubleSubOp -> trivialFCode F64 (FSUB TF) x y
659 DoubleMulOp -> trivialFCode F64 (FMUL TF) x y
660 DoubleDivOp -> trivialFCode F64 (FDIV TF) x y
662 AddrAddOp -> trivialCode (ADD Q False) x y
663 AddrSubOp -> trivialCode (SUB Q False) x y
664 AddrRemOp -> trivialCode (REM Q True) x y
666 AndOp -> trivialCode AND x y
667 OrOp -> trivialCode OR x y
668 XorOp -> trivialCode XOR x y
669 SllOp -> trivialCode SLL x y
670 SrlOp -> trivialCode SRL x y
672 ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
673 ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
674 ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"
676 FloatPowerOp -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
677 DoublePowerOp -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
679 {- ------------------------------------------------------------
680 Some bizarre special code for getting condition codes into
681 registers. Integer non-equality is a test for equality
682 followed by an XOR with 1. (Integer comparisons always set
683 the result register to 0 or 1.) Floating point comparisons of
684 any kind leave the result in a floating point register, so we
685 need to wrangle an integer register out of things.
687 int_NE_code :: StixTree -> StixTree -> NatM Register
690 = trivialCode (CMP EQQ) x y `thenNat` \ register ->
691 getNewRegNat IntRep `thenNat` \ tmp ->
693 code = registerCode register tmp
694 src = registerName register tmp
695 code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
697 return (Any IntRep code__2)
699 {- ------------------------------------------------------------
700 Comments for int_NE_code also apply to cmpF_code
703 :: (Reg -> Reg -> Reg -> Instr)
705 -> StixTree -> StixTree
708 cmpF_code instr cond x y
709 = trivialFCode pr instr x y `thenNat` \ register ->
710 getNewRegNat F64 `thenNat` \ tmp ->
711 getBlockIdNat `thenNat` \ lbl ->
713 code = registerCode register tmp
714 result = registerName register tmp
716 code__2 dst = code . mkSeqInstrs [
717 OR zeroh (RIImm (ImmInt 1)) dst,
718 BF cond result (ImmCLbl lbl),
719 OR zeroh (RIReg zeroh) dst,
722 return (Any IntRep code__2)
724 pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
725 ------------------------------------------------------------
727 getRegister (CmmLoad pk mem)
728 = getAmode mem `thenNat` \ amode ->
730 code = amodeCode amode
731 src = amodeAddr amode
732 size = primRepToSize pk
733 code__2 dst = code . mkSeqInstr (LD size dst src)
735 return (Any pk code__2)
737 getRegister (StInt i)
740 code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
742 return (Any IntRep code)
745 code dst = mkSeqInstr (LDI Q dst src)
747 return (Any IntRep code)
749 src = ImmInt (fromInteger i)
754 code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
756 return (Any PtrRep code)
759 imm__2 = case imm of Just x -> x
761 #endif /* alpha_TARGET_ARCH */
763 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
767 getRegister (CmmLit (CmmFloat f F32)) = do
768 lbl <- getNewLabelNat
769 dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
770 Amode addr addr_code <- getAmode dynRef
774 CmmStaticLit (CmmFloat f F32)]
775 `consOL` (addr_code `snocOL`
778 return (Any F32 code)
781 getRegister (CmmLit (CmmFloat d F64))
783 = let code dst = unitOL (GLDZ dst)
784 in return (Any F64 code)
787 = let code dst = unitOL (GLD1 dst)
788 in return (Any F64 code)
791 lbl <- getNewLabelNat
792 dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
793 Amode addr addr_code <- getAmode dynRef
797 CmmStaticLit (CmmFloat d F64)]
798 `consOL` (addr_code `snocOL`
801 return (Any F64 code)
803 #endif /* i386_TARGET_ARCH */
805 #if x86_64_TARGET_ARCH
807 getRegister (CmmLit (CmmFloat 0.0 rep)) = do
808 let code dst = unitOL (XOR rep (OpReg dst) (OpReg dst))
809 -- I don't know why there are xorpd, xorps, and pxor instructions.
810 -- They all appear to do the same thing --SDM
811 return (Any rep code)
813 getRegister (CmmLit (CmmFloat f rep)) = do
814 lbl <- getNewLabelNat
815 let code dst = toOL [
818 CmmStaticLit (CmmFloat f rep)],
819 MOV rep (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
822 return (Any rep code)
824 #endif /* x86_64_TARGET_ARCH */
826 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
828 -- catch simple cases of zero- or sign-extended load
829 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad addr _]) = do
830 code <- intLoadCode (MOVZxL I8) addr
831 return (Any I32 code)
833 getRegister (CmmMachOp (MO_S_Conv I8 I32) [CmmLoad addr _]) = do
834 code <- intLoadCode (MOVSxL I8) addr
835 return (Any I32 code)
837 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad addr _]) = do
838 code <- intLoadCode (MOVZxL I16) addr
839 return (Any I32 code)
841 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad addr _]) = do
842 code <- intLoadCode (MOVSxL I16) addr
843 return (Any I32 code)
847 #if x86_64_TARGET_ARCH
849 -- catch simple cases of zero- or sign-extended load
850 getRegister (CmmMachOp (MO_U_Conv I8 I64) [CmmLoad addr _]) = do
851 code <- intLoadCode (MOVZxL I8) addr
852 return (Any I64 code)
854 getRegister (CmmMachOp (MO_S_Conv I8 I64) [CmmLoad addr _]) = do
855 code <- intLoadCode (MOVSxL I8) addr
856 return (Any I64 code)
858 getRegister (CmmMachOp (MO_U_Conv I16 I64) [CmmLoad addr _]) = do
859 code <- intLoadCode (MOVZxL I16) addr
860 return (Any I64 code)
862 getRegister (CmmMachOp (MO_S_Conv I16 I64) [CmmLoad addr _]) = do
863 code <- intLoadCode (MOVSxL I16) addr
864 return (Any I64 code)
866 getRegister (CmmMachOp (MO_U_Conv I32 I64) [CmmLoad addr _]) = do
867 code <- intLoadCode (MOV I32) addr -- 32-bit loads zero-extend
868 return (Any I64 code)
870 getRegister (CmmMachOp (MO_S_Conv I32 I64) [CmmLoad addr _]) = do
871 code <- intLoadCode (MOVSxL I32) addr
872 return (Any I64 code)
876 #if x86_64_TARGET_ARCH
877 getRegister (CmmMachOp (MO_Add I64) [CmmReg (CmmGlobal PicBaseReg),
878 CmmLit displacement])
879 = return $ Any I64 (\dst -> unitOL $
880 LEA I64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
883 #if x86_64_TARGET_ARCH
884 getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do
885 x_code <- getAnyReg x
886 lbl <- getNewLabelNat
888 code dst = x_code dst `appOL` toOL [
889 -- This is how gcc does it, so it can't be that bad:
890 LDATA ReadOnlyData16 [
893 CmmStaticLit (CmmInt 0x80000000 I32),
894 CmmStaticLit (CmmInt 0 I32),
895 CmmStaticLit (CmmInt 0 I32),
896 CmmStaticLit (CmmInt 0 I32)
898 XOR F32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
899 -- xorps, so we need the 128-bit constant
900 -- ToDo: rip-relative
903 return (Any F32 code)
905 getRegister (CmmMachOp (MO_S_Neg F64) [x]) = do
906 x_code <- getAnyReg x
907 lbl <- getNewLabelNat
909 -- This is how gcc does it, so it can't be that bad:
910 code dst = x_code dst `appOL` toOL [
911 LDATA ReadOnlyData16 [
914 CmmStaticLit (CmmInt 0x8000000000000000 I64),
915 CmmStaticLit (CmmInt 0 I64)
917 -- gcc puts an unpck here. Wonder if we need it.
918 XOR F64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
919 -- xorpd, so we need the 128-bit constant
922 return (Any F64 code)
925 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
927 getRegister (CmmMachOp mop [x]) -- unary MachOps
930 MO_S_Neg F32 -> trivialUFCode F32 (GNEG F32) x
931 MO_S_Neg F64 -> trivialUFCode F64 (GNEG F64) x
934 MO_S_Neg rep -> trivialUCode rep (NEGI rep) x
935 MO_Not rep -> trivialUCode rep (NOT rep) x
938 MO_U_Conv I32 I8 -> toI8Reg I32 x
939 MO_S_Conv I32 I8 -> toI8Reg I32 x
940 MO_U_Conv I16 I8 -> toI8Reg I16 x
941 MO_S_Conv I16 I8 -> toI8Reg I16 x
942 MO_U_Conv I32 I16 -> toI16Reg I32 x
943 MO_S_Conv I32 I16 -> toI16Reg I32 x
944 #if x86_64_TARGET_ARCH
945 MO_U_Conv I64 I32 -> conversionNop I64 x
946 MO_S_Conv I64 I32 -> conversionNop I64 x
947 MO_U_Conv I64 I16 -> toI16Reg I64 x
948 MO_S_Conv I64 I16 -> toI16Reg I64 x
949 MO_U_Conv I64 I8 -> toI8Reg I64 x
950 MO_S_Conv I64 I8 -> toI8Reg I64 x
953 MO_U_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
954 MO_S_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
957 MO_U_Conv I8 I32 -> integerExtend I8 I32 MOVZxL x
958 MO_U_Conv I16 I32 -> integerExtend I16 I32 MOVZxL x
959 MO_U_Conv I8 I16 -> integerExtend I8 I16 MOVZxL x
961 MO_S_Conv I8 I32 -> integerExtend I8 I32 MOVSxL x
962 MO_S_Conv I16 I32 -> integerExtend I16 I32 MOVSxL x
963 MO_S_Conv I8 I16 -> integerExtend I8 I16 MOVSxL x
965 #if x86_64_TARGET_ARCH
966 MO_U_Conv I8 I64 -> integerExtend I8 I64 MOVZxL x
967 MO_U_Conv I16 I64 -> integerExtend I16 I64 MOVZxL x
968 MO_U_Conv I32 I64 -> integerExtend I32 I64 MOVZxL x
969 MO_S_Conv I8 I64 -> integerExtend I8 I64 MOVSxL x
970 MO_S_Conv I16 I64 -> integerExtend I16 I64 MOVSxL x
971 MO_S_Conv I32 I64 -> integerExtend I32 I64 MOVSxL x
972 -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
973 -- However, we don't want the register allocator to throw it
974 -- away as an unnecessary reg-to-reg move, so we keep it in
975 -- the form of a movzl and print it as a movl later.
979 MO_S_Conv F32 F64 -> conversionNop F64 x
980 MO_S_Conv F64 F32 -> conversionNop F32 x
982 MO_S_Conv F32 F64 -> coerceFP2FP F64 x
983 MO_S_Conv F64 F32 -> coerceFP2FP F32 x
987 | isFloatingRep from -> coerceFP2Int from to x
988 | isFloatingRep to -> coerceInt2FP from to x
990 other -> pprPanic "getRegister" (pprMachOp mop)
992 -- signed or unsigned extension.
993 integerExtend from to instr expr = do
994 (reg,e_code) <- if from == I8 then getByteReg expr
999 instr from (OpReg reg) (OpReg dst)
1000 return (Any to code)
1002 toI8Reg new_rep expr
1003 = do codefn <- getAnyReg expr
1004 return (Any new_rep codefn)
1005 -- HACK: use getAnyReg to get a byte-addressable register.
1006 -- If the source was a Fixed register, this will add the
1007 -- mov instruction to put it into the desired destination.
1008 -- We're assuming that the destination won't be a fixed
1009 -- non-byte-addressable register; it won't be, because all
1010 -- fixed registers are word-sized.
1012 toI16Reg = toI8Reg -- for now
1014 conversionNop new_rep expr
1015 = do e_code <- getRegister expr
1016 return (swizzleRegisterRep e_code new_rep)
1019 getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
1020 = ASSERT2(cmmExprRep x /= I8, pprExpr e)
1022 MO_Eq F32 -> condFltReg EQQ x y
1023 MO_Ne F32 -> condFltReg NE x y
1024 MO_S_Gt F32 -> condFltReg GTT x y
1025 MO_S_Ge F32 -> condFltReg GE x y
1026 MO_S_Lt F32 -> condFltReg LTT x y
1027 MO_S_Le F32 -> condFltReg LE x y
1029 MO_Eq F64 -> condFltReg EQQ x y
1030 MO_Ne F64 -> condFltReg NE x y
1031 MO_S_Gt F64 -> condFltReg GTT x y
1032 MO_S_Ge F64 -> condFltReg GE x y
1033 MO_S_Lt F64 -> condFltReg LTT x y
1034 MO_S_Le F64 -> condFltReg LE x y
1036 MO_Eq rep -> condIntReg EQQ x y
1037 MO_Ne rep -> condIntReg NE x y
1039 MO_S_Gt rep -> condIntReg GTT x y
1040 MO_S_Ge rep -> condIntReg GE x y
1041 MO_S_Lt rep -> condIntReg LTT x y
1042 MO_S_Le rep -> condIntReg LE x y
1044 MO_U_Gt rep -> condIntReg GU x y
1045 MO_U_Ge rep -> condIntReg GEU x y
1046 MO_U_Lt rep -> condIntReg LU x y
1047 MO_U_Le rep -> condIntReg LEU x y
1049 #if i386_TARGET_ARCH
1050 MO_Add F32 -> trivialFCode F32 GADD x y
1051 MO_Sub F32 -> trivialFCode F32 GSUB x y
1053 MO_Add F64 -> trivialFCode F64 GADD x y
1054 MO_Sub F64 -> trivialFCode F64 GSUB x y
1056 MO_S_Quot F32 -> trivialFCode F32 GDIV x y
1057 MO_S_Quot F64 -> trivialFCode F64 GDIV x y
1060 #if x86_64_TARGET_ARCH
1061 MO_Add F32 -> trivialFCode F32 ADD x y
1062 MO_Sub F32 -> trivialFCode F32 SUB x y
1064 MO_Add F64 -> trivialFCode F64 ADD x y
1065 MO_Sub F64 -> trivialFCode F64 SUB x y
1067 MO_S_Quot F32 -> trivialFCode F32 FDIV x y
1068 MO_S_Quot F64 -> trivialFCode F64 FDIV x y
1071 MO_Add rep -> add_code rep x y
1072 MO_Sub rep -> sub_code rep x y
1074 MO_S_Quot rep -> div_code rep True True x y
1075 MO_S_Rem rep -> div_code rep True False x y
1076 MO_U_Quot rep -> div_code rep False True x y
1077 MO_U_Rem rep -> div_code rep False False x y
1079 #if i386_TARGET_ARCH
1080 MO_Mul F32 -> trivialFCode F32 GMUL x y
1081 MO_Mul F64 -> trivialFCode F64 GMUL x y
1084 #if x86_64_TARGET_ARCH
1085 MO_Mul F32 -> trivialFCode F32 MUL x y
1086 MO_Mul F64 -> trivialFCode F64 MUL x y
1089 MO_Mul rep -> let op = IMUL rep in
1090 trivialCode rep op (Just op) x y
1092 MO_S_MulMayOflo rep -> imulMayOflo rep x y
1094 MO_And rep -> let op = AND rep in
1095 trivialCode rep op (Just op) x y
1096 MO_Or rep -> let op = OR rep in
1097 trivialCode rep op (Just op) x y
1098 MO_Xor rep -> let op = XOR rep in
1099 trivialCode rep op (Just op) x y
1101 {- Shift ops on x86s have constraints on their source, it
1102 either has to be Imm, CL or 1
1103 => trivialCode is not restrictive enough (sigh.)
1105 MO_Shl rep -> shift_code rep (SHL rep) x y {-False-}
1106 MO_U_Shr rep -> shift_code rep (SHR rep) x y {-False-}
1107 MO_S_Shr rep -> shift_code rep (SAR rep) x y {-False-}
1109 other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
1111 --------------------
1112 imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1113 imulMayOflo rep a b = do
1114 (a_reg, a_code) <- getNonClobberedReg a
1115 b_code <- getAnyReg b
1117 shift_amt = case rep of
1120 _ -> panic "shift_amt"
1122 code = a_code `appOL` b_code eax `appOL`
1124 IMUL2 rep (OpReg a_reg), -- result in %edx:%eax
1125 SAR rep (OpImm (ImmInt shift_amt)) (OpReg eax),
1126 -- sign extend lower part
1127 SUB rep (OpReg edx) (OpReg eax)
1128 -- compare against upper
1129 -- eax==0 if high part == sign extended low part
1132 return (Fixed rep eax code)
1134 --------------------
1135 shift_code :: MachRep
1136 -> (Operand -> Operand -> Instr)
1141 {- Case1: shift length as immediate -}
1142 shift_code rep instr x y@(CmmLit lit) = do
1143 x_code <- getAnyReg x
1146 = x_code dst `snocOL`
1147 instr (OpImm (litToImm lit)) (OpReg dst)
1149 return (Any rep code)
1151 {- Case2: shift length is complex (non-immediate) -}
1152 shift_code rep instr x y{-amount-} = do
1153 (x_reg, x_code) <- getNonClobberedReg x
1154 y_code <- getAnyReg y
1156 code = x_code `appOL`
1158 instr (OpReg ecx) (OpReg x_reg)
1160 return (Fixed rep x_reg code)
1162 --------------------
1163 add_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1164 add_code rep x (CmmLit (CmmInt y _))
1165 | not (is64BitInteger y) = add_int rep x y
1166 add_code rep x y = trivialCode rep (ADD rep) (Just (ADD rep)) x y
1168 --------------------
1169 sub_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1170 sub_code rep x (CmmLit (CmmInt y _))
1171 | not (is64BitInteger (-y)) = add_int rep x (-y)
1172 sub_code rep x y = trivialCode rep (SUB rep) Nothing x y
1174 -- our three-operand add instruction:
1175 add_int rep x y = do
1176 (x_reg, x_code) <- getSomeReg x
1178 imm = ImmInt (fromInteger y)
1182 (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
1185 return (Any rep code)
1187 ----------------------
1188 div_code rep signed quotient x y = do
1189 (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
1190 x_code <- getAnyReg x
1192 widen | signed = CLTD rep
1193 | otherwise = XOR rep (OpReg edx) (OpReg edx)
1195 instr | signed = IDIV
1198 code = y_code `appOL`
1200 toOL [widen, instr rep y_op]
1202 result | quotient = eax
1206 return (Fixed rep result code)
1209 getRegister (CmmLoad mem pk)
1212 Amode src mem_code <- getAmode mem
1214 code dst = mem_code `snocOL`
1215 IF_ARCH_i386(GLD pk src dst,
1216 MOV pk (OpAddr src) (OpReg dst))
1218 return (Any pk code)
1220 #if i386_TARGET_ARCH
1221 getRegister (CmmLoad mem pk)
1224 code <- intLoadCode (instr pk) mem
1225 return (Any pk code)
1227 instr I8 = MOVZxL pk
1230 -- we always zero-extend 8-bit loads, if we
1231 -- can't think of anything better. This is because
1232 -- we can't guarantee access to an 8-bit variant of every register
1233 -- (esi and edi don't have 8-bit variants), so to make things
1234 -- simpler we do our 8-bit arithmetic with full 32-bit registers.
1237 #if x86_64_TARGET_ARCH
1238 -- Simpler memory load code on x86_64
1239 getRegister (CmmLoad mem pk)
1241 code <- intLoadCode (MOV pk) mem
1242 return (Any pk code)
1245 getRegister (CmmLit (CmmInt 0 rep))
1247 -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
1248 adj_rep = case rep of I64 -> I32; _ -> rep
1249 rep1 = IF_ARCH_i386( rep, adj_rep )
1251 = unitOL (XOR rep1 (OpReg dst) (OpReg dst))
1253 return (Any rep code)
1255 #if x86_64_TARGET_ARCH
1256 -- optimisation for loading small literals on x86_64: take advantage
1257 -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
1258 -- instruction forms are shorter.
1259 getRegister (CmmLit lit)
1260 | I64 <- cmmLitRep lit, not (isBigLit lit)
1263 code dst = unitOL (MOV I32 (OpImm imm) (OpReg dst))
1265 return (Any I64 code)
1267 isBigLit (CmmInt i I64) = i < 0 || i > 0xffffffff
1269 -- note1: not the same as is64BitLit, because that checks for
1270 -- signed literals that fit in 32 bits, but we want unsigned
1272 -- note2: all labels are small, because we're assuming the
1273 -- small memory model (see gcc docs, -mcmodel=small).
1276 getRegister (CmmLit lit)
1280 code dst = unitOL (MOV rep (OpImm imm) (OpReg dst))
1282 return (Any rep code)
1284 getRegister other = pprPanic "getRegister(x86)" (ppr other)
1287 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
1288 -> NatM (Reg -> InstrBlock)
1289 intLoadCode instr mem = do
1290 Amode src mem_code <- getAmode mem
1291 return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
1293 -- Compute an expression into *any* register, adding the appropriate
1294 -- move instruction if necessary.
1295 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
1297 r <- getRegister expr
1300 anyReg :: Register -> NatM (Reg -> InstrBlock)
1301 anyReg (Any _ code) = return code
1302 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
1304 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
1305 -- Fixed registers might not be byte-addressable, so we make sure we've
1306 -- got a temporary, inserting an extra reg copy if necessary.
1307 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
1308 #if x86_64_TARGET_ARCH
1309 getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
1311 getByteReg expr = do
1312 r <- getRegister expr
1315 tmp <- getNewRegNat rep
1316 return (tmp, code tmp)
1318 | isVirtualReg reg -> return (reg,code)
1320 tmp <- getNewRegNat rep
1321 return (tmp, code `snocOL` reg2reg rep reg tmp)
1322 -- ToDo: could optimise slightly by checking for byte-addressable
1323 -- real registers, but that will happen very rarely if at all.
1326 -- Another variant: this time we want the result in a register that cannot
1327 -- be modified by code to evaluate an arbitrary expression.
1328 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1329 getNonClobberedReg expr = do
1330 r <- getRegister expr
1333 tmp <- getNewRegNat rep
1334 return (tmp, code tmp)
1336 -- only free regs can be clobbered
1337 | RealReg rr <- reg, isFastTrue (freeReg rr) -> do
1338 tmp <- getNewRegNat rep
1339 return (tmp, code `snocOL` reg2reg rep reg tmp)
1343 reg2reg :: MachRep -> Reg -> Reg -> Instr
1345 #if i386_TARGET_ARCH
1346 | isFloatingRep rep = GMOV src dst
1348 | otherwise = MOV rep (OpReg src) (OpReg dst)
1350 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1352 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1354 #if sparc_TARGET_ARCH
1356 getRegister (CmmLit (CmmFloat f F32)) = do
1357 lbl <- getNewLabelNat
1358 let code dst = toOL [
1361 CmmStaticLit (CmmFloat f F32)],
1362 SETHI (HI (ImmCLbl lbl)) dst,
1363 LD F32 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1364 return (Any F32 code)
1366 getRegister (CmmLit (CmmFloat d F64)) = do
1367 lbl <- getNewLabelNat
1368 let code dst = toOL [
1371 CmmStaticLit (CmmFloat d F64)],
1372 SETHI (HI (ImmCLbl lbl)) dst,
1373 LD F64 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1374 return (Any F64 code)
1376 getRegister (CmmMachOp mop [x]) -- unary MachOps
1378 MO_S_Neg F32 -> trivialUFCode F32 (FNEG F32) x
1379 MO_S_Neg F64 -> trivialUFCode F64 (FNEG F64) x
1381 MO_S_Neg rep -> trivialUCode rep (SUB False False g0) x
1382 MO_Not rep -> trivialUCode rep (XNOR False g0) x
1384 MO_U_Conv I32 I8 -> trivialCode I8 (AND False) x (CmmLit (CmmInt 255 I8))
1386 MO_U_Conv F64 F32-> coerceDbl2Flt x
1387 MO_U_Conv F32 F64-> coerceFlt2Dbl x
1389 MO_S_Conv F32 I32-> coerceFP2Int F32 I32 x
1390 MO_S_Conv I32 F32-> coerceInt2FP I32 F32 x
1391 MO_S_Conv F64 I32-> coerceFP2Int F64 I32 x
1392 MO_S_Conv I32 F64-> coerceInt2FP I32 F64 x
1394 -- Conversions which are a nop on sparc
1396 | from == to -> conversionNop to x
1397 MO_U_Conv I32 to -> conversionNop to x
1398 MO_S_Conv I32 to -> conversionNop to x
1401 MO_U_Conv I8 I32 -> integerExtend False I8 I32 x
1402 MO_U_Conv I16 I32 -> integerExtend False I16 I32 x
1403 MO_U_Conv I8 I16 -> integerExtend False I8 I16 x
1404 MO_S_Conv I16 I32 -> integerExtend True I16 I32 x
1406 other_op -> panic "Unknown unary mach op"
1409 integerExtend signed from to expr = do
1410 (reg, e_code) <- getSomeReg expr
1414 ((if signed then SRA else SRL)
1415 reg (RIImm (ImmInt 0)) dst)
1416 return (Any to code)
1417 conversionNop new_rep expr
1418 = do e_code <- getRegister expr
1419 return (swizzleRegisterRep e_code new_rep)
1421 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1423 MO_Eq F32 -> condFltReg EQQ x y
1424 MO_Ne F32 -> condFltReg NE x y
1426 MO_S_Gt F32 -> condFltReg GTT x y
1427 MO_S_Ge F32 -> condFltReg GE x y
1428 MO_S_Lt F32 -> condFltReg LTT x y
1429 MO_S_Le F32 -> condFltReg LE x y
1431 MO_Eq F64 -> condFltReg EQQ x y
1432 MO_Ne F64 -> condFltReg NE x y
1434 MO_S_Gt F64 -> condFltReg GTT x y
1435 MO_S_Ge F64 -> condFltReg GE x y
1436 MO_S_Lt F64 -> condFltReg LTT x y
1437 MO_S_Le F64 -> condFltReg LE x y
1439 MO_Eq rep -> condIntReg EQQ x y
1440 MO_Ne rep -> condIntReg NE x y
1442 MO_S_Gt rep -> condIntReg GTT x y
1443 MO_S_Ge rep -> condIntReg GE x y
1444 MO_S_Lt rep -> condIntReg LTT x y
1445 MO_S_Le rep -> condIntReg LE x y
1447 MO_U_Gt I32 -> condIntReg GTT x y
1448 MO_U_Ge I32 -> condIntReg GE x y
1449 MO_U_Lt I32 -> condIntReg LTT x y
1450 MO_U_Le I32 -> condIntReg LE x y
1452 MO_U_Gt I16 -> condIntReg GU x y
1453 MO_U_Ge I16 -> condIntReg GEU x y
1454 MO_U_Lt I16 -> condIntReg LU x y
1455 MO_U_Le I16 -> condIntReg LEU x y
1457 MO_Add I32 -> trivialCode I32 (ADD False False) x y
1458 MO_Sub I32 -> trivialCode I32 (SUB False False) x y
1460 MO_S_MulMayOflo rep -> imulMayOflo rep x y
1462 -- ToDo: teach about V8+ SPARC div instructions
1463 MO_S_Quot I32 -> idiv FSLIT(".div") x y
1464 MO_S_Rem I32 -> idiv FSLIT(".rem") x y
1465 MO_U_Quot I32 -> idiv FSLIT(".udiv") x y
1466 MO_U_Rem I32 -> idiv FSLIT(".urem") x y
1468 MO_Add F32 -> trivialFCode F32 FADD x y
1469 MO_Sub F32 -> trivialFCode F32 FSUB x y
1470 MO_Mul F32 -> trivialFCode F32 FMUL x y
1471 MO_S_Quot F32 -> trivialFCode F32 FDIV x y
1473 MO_Add F64 -> trivialFCode F64 FADD x y
1474 MO_Sub F64 -> trivialFCode F64 FSUB x y
1475 MO_Mul F64 -> trivialFCode F64 FMUL x y
1476 MO_S_Quot F64 -> trivialFCode F64 FDIV x y
1478 MO_And rep -> trivialCode rep (AND False) x y
1479 MO_Or rep -> trivialCode rep (OR False) x y
1480 MO_Xor rep -> trivialCode rep (XOR False) x y
1482 MO_Mul rep -> trivialCode rep (SMUL False) x y
1484 MO_Shl rep -> trivialCode rep SLL x y
1485 MO_U_Shr rep -> trivialCode rep SRL x y
1486 MO_S_Shr rep -> trivialCode rep SRA x y
1489 MO_F32_Pwr -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1490 [promote x, promote y])
1491 where promote x = CmmMachOp MO_F32_to_Dbl [x]
1492 MO_F64_Pwr -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1495 other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
1497 --idiv fn x y = getRegister (StCall (Left fn) CCallConv I32 [x, y])
1499 --------------------
1500 imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1501 imulMayOflo rep a b = do
1502 (a_reg, a_code) <- getSomeReg a
1503 (b_reg, b_code) <- getSomeReg b
1504 res_lo <- getNewRegNat I32
1505 res_hi <- getNewRegNat I32
1507 shift_amt = case rep of
1510 _ -> panic "shift_amt"
1511 code dst = a_code `appOL` b_code `appOL`
1513 SMUL False a_reg (RIReg b_reg) res_lo,
1515 SRA res_lo (RIImm (ImmInt shift_amt)) res_lo,
1516 SUB False False res_lo (RIReg res_hi) dst
1518 return (Any I32 code)
1520 getRegister (CmmLoad mem pk) = do
1521 Amode src code <- getAmode mem
1523 code__2 dst = code `snocOL` LD pk src dst
1524 return (Any pk code__2)
1526 getRegister (CmmLit (CmmInt i _))
1529 src = ImmInt (fromInteger i)
1530 code dst = unitOL (OR False g0 (RIImm src) dst)
1532 return (Any I32 code)
1534 getRegister (CmmLit lit)
1535 = let rep = cmmLitRep lit
1539 OR False dst (RIImm (LO imm)) dst]
1540 in return (Any I32 code)
1542 #endif /* sparc_TARGET_ARCH */
1544 #if powerpc_TARGET_ARCH
1545 getRegister (CmmLoad mem pk)
1548 Amode addr addr_code <- getAmode mem
1549 let code dst = ASSERT((regClass dst == RcDouble) == isFloatingRep pk)
1550 addr_code `snocOL` LD pk dst addr
1551 return (Any pk code)
1553 -- catch simple cases of zero- or sign-extended load
1554 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad mem _]) = do
1555 Amode addr addr_code <- getAmode mem
1556 return (Any I32 (\dst -> addr_code `snocOL` LD I8 dst addr))
1558 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
1560 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad mem _]) = do
1561 Amode addr addr_code <- getAmode mem
1562 return (Any I32 (\dst -> addr_code `snocOL` LD I16 dst addr))
1564 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad mem _]) = do
1565 Amode addr addr_code <- getAmode mem
1566 return (Any I32 (\dst -> addr_code `snocOL` LA I16 dst addr))
1568 getRegister (CmmMachOp mop [x]) -- unary MachOps
1570 MO_Not rep -> trivialUCode rep NOT x
1572 MO_S_Conv F64 F32 -> trivialUCode F32 FRSP x
1573 MO_S_Conv F32 F64 -> conversionNop F64 x
1576 | from == to -> conversionNop to x
1577 | isFloatingRep from -> coerceFP2Int from to x
1578 | isFloatingRep to -> coerceInt2FP from to x
1580 -- narrowing is a nop: we treat the high bits as undefined
1581 MO_S_Conv I32 to -> conversionNop to x
1582 MO_S_Conv I16 I8 -> conversionNop I8 x
1583 MO_S_Conv I8 to -> trivialUCode to (EXTS I8) x
1584 MO_S_Conv I16 to -> trivialUCode to (EXTS I16) x
1587 | from == to -> conversionNop to x
1588 -- narrowing is a nop: we treat the high bits as undefined
1589 MO_U_Conv I32 to -> conversionNop to x
1590 MO_U_Conv I16 I8 -> conversionNop I8 x
1591 MO_U_Conv I8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 I32))
1592 MO_U_Conv I16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 I32))
1594 MO_S_Neg F32 -> trivialUCode F32 FNEG x
1595 MO_S_Neg F64 -> trivialUCode F64 FNEG x
1596 MO_S_Neg rep -> trivialUCode rep NEG x
1599 conversionNop new_rep expr
1600 = do e_code <- getRegister expr
1601 return (swizzleRegisterRep e_code new_rep)
1603 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1605 MO_Eq F32 -> condFltReg EQQ x y
1606 MO_Ne F32 -> condFltReg NE x y
1608 MO_S_Gt F32 -> condFltReg GTT x y
1609 MO_S_Ge F32 -> condFltReg GE x y
1610 MO_S_Lt F32 -> condFltReg LTT x y
1611 MO_S_Le F32 -> condFltReg LE x y
1613 MO_Eq F64 -> condFltReg EQQ x y
1614 MO_Ne F64 -> condFltReg NE x y
1616 MO_S_Gt F64 -> condFltReg GTT x y
1617 MO_S_Ge F64 -> condFltReg GE x y
1618 MO_S_Lt F64 -> condFltReg LTT x y
1619 MO_S_Le F64 -> condFltReg LE x y
1621 MO_Eq rep -> condIntReg EQQ (extendUExpr rep x) (extendUExpr rep y)
1622 MO_Ne rep -> condIntReg NE (extendUExpr rep x) (extendUExpr rep y)
1624 MO_S_Gt rep -> condIntReg GTT (extendSExpr rep x) (extendSExpr rep y)
1625 MO_S_Ge rep -> condIntReg GE (extendSExpr rep x) (extendSExpr rep y)
1626 MO_S_Lt rep -> condIntReg LTT (extendSExpr rep x) (extendSExpr rep y)
1627 MO_S_Le rep -> condIntReg LE (extendSExpr rep x) (extendSExpr rep y)
1629 MO_U_Gt rep -> condIntReg GU (extendUExpr rep x) (extendUExpr rep y)
1630 MO_U_Ge rep -> condIntReg GEU (extendUExpr rep x) (extendUExpr rep y)
1631 MO_U_Lt rep -> condIntReg LU (extendUExpr rep x) (extendUExpr rep y)
1632 MO_U_Le rep -> condIntReg LEU (extendUExpr rep x) (extendUExpr rep y)
1634 MO_Add F32 -> trivialCodeNoImm F32 (FADD F32) x y
1635 MO_Sub F32 -> trivialCodeNoImm F32 (FSUB F32) x y
1636 MO_Mul F32 -> trivialCodeNoImm F32 (FMUL F32) x y
1637 MO_S_Quot F32 -> trivialCodeNoImm F32 (FDIV F32) x y
1639 MO_Add F64 -> trivialCodeNoImm F64 (FADD F64) x y
1640 MO_Sub F64 -> trivialCodeNoImm F64 (FSUB F64) x y
1641 MO_Mul F64 -> trivialCodeNoImm F64 (FMUL F64) x y
1642 MO_S_Quot F64 -> trivialCodeNoImm F64 (FDIV F64) x y
1644 -- optimize addition with 32-bit immediate
1648 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate I32 True (-imm)
1649 -> trivialCode I32 True ADD x (CmmLit $ CmmInt imm immrep)
1652 (src, srcCode) <- getSomeReg x
1653 let imm = litToImm lit
1654 code dst = srcCode `appOL` toOL [
1655 ADDIS dst src (HA imm),
1656 ADD dst dst (RIImm (LO imm))
1658 return (Any I32 code)
1659 _ -> trivialCode I32 True ADD x y
1661 MO_Add rep -> trivialCode rep True ADD x y
1663 case y of -- subfi ('substract from' with immediate) doesn't exist
1664 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
1665 -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
1666 _ -> trivialCodeNoImm rep SUBF y x
1668 MO_Mul rep -> trivialCode rep True MULLW x y
1670 MO_S_MulMayOflo I32 -> trivialCodeNoImm I32 MULLW_MayOflo x y
1672 MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= I32): not implemented"
1673 MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
1675 MO_S_Quot rep -> trivialCodeNoImm rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1676 MO_U_Quot rep -> trivialCodeNoImm rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1678 MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1679 MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1681 MO_And rep -> trivialCode rep False AND x y
1682 MO_Or rep -> trivialCode rep False OR x y
1683 MO_Xor rep -> trivialCode rep False XOR x y
1685 MO_Shl rep -> trivialCode rep False SLW x y
1686 MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
1687 MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
1689 getRegister (CmmLit (CmmInt i rep))
1690 | Just imm <- makeImmediate rep True i
1692 code dst = unitOL (LI dst imm)
1694 return (Any rep code)
1696 getRegister (CmmLit (CmmFloat f frep)) = do
1697 lbl <- getNewLabelNat
1698 dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
1699 Amode addr addr_code <- getAmode dynRef
1701 LDATA ReadOnlyData [CmmDataLabel lbl,
1702 CmmStaticLit (CmmFloat f frep)]
1703 `consOL` (addr_code `snocOL` LD frep dst addr)
1704 return (Any frep code)
1706 getRegister (CmmLit lit)
1707 = let rep = cmmLitRep lit
1711 OR dst dst (RIImm (LO imm))
1713 in return (Any rep code)
1715 getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
1717 -- extend?Rep: wrap integer expression of type rep
1718 -- in a conversion to I32
1719 extendSExpr I32 x = x
1720 extendSExpr rep x = CmmMachOp (MO_S_Conv rep I32) [x]
1721 extendUExpr I32 x = x
1722 extendUExpr rep x = CmmMachOp (MO_U_Conv rep I32) [x]
1724 #endif /* powerpc_TARGET_ARCH */
1727 -- -----------------------------------------------------------------------------
1728 -- The 'Amode' type: Memory addressing modes passed up the tree.
1730 data Amode = Amode AddrMode InstrBlock
1733 Now, given a tree (the argument to an CmmLoad) that references memory,
1734 produce a suitable addressing mode.
1736 A Rule of the Game (tm) for Amodes: use of the addr bit must
1737 immediately follow use of the code part, since the code part puts
1738 values in registers which the addr then refers to. So you can't put
1739 anything in between, lest it overwrite some of those registers. If
1740 you need to do some other computation between the code part and use of
1741 the addr bit, first store the effective address from the amode in a
1742 temporary, then do the other computation, and then use the temporary:
1746 ... other computation ...
1750 getAmode :: CmmExpr -> NatM Amode
1751 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
1753 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1755 #if alpha_TARGET_ARCH
1757 getAmode (StPrim IntSubOp [x, StInt i])
1758 = getNewRegNat PtrRep `thenNat` \ tmp ->
1759 getRegister x `thenNat` \ register ->
1761 code = registerCode register tmp
1762 reg = registerName register tmp
1763 off = ImmInt (-(fromInteger i))
1765 return (Amode (AddrRegImm reg off) code)
1767 getAmode (StPrim IntAddOp [x, StInt i])
1768 = getNewRegNat PtrRep `thenNat` \ tmp ->
1769 getRegister x `thenNat` \ register ->
1771 code = registerCode register tmp
1772 reg = registerName register tmp
1773 off = ImmInt (fromInteger i)
1775 return (Amode (AddrRegImm reg off) code)
1779 = return (Amode (AddrImm imm__2) id)
1782 imm__2 = case imm of Just x -> x
1785 = getNewRegNat PtrRep `thenNat` \ tmp ->
1786 getRegister other `thenNat` \ register ->
1788 code = registerCode register tmp
1789 reg = registerName register tmp
1791 return (Amode (AddrReg reg) code)
1793 #endif /* alpha_TARGET_ARCH */
1795 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1797 #if x86_64_TARGET_ARCH
1799 getAmode (CmmMachOp (MO_Add I64) [CmmReg (CmmGlobal PicBaseReg),
1800 CmmLit displacement])
1801 = return $ Amode (ripRel (litToImm displacement)) nilOL
1805 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1807 -- This is all just ridiculous, since it carefully undoes
1808 -- what mangleIndexTree has just done.
1809 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
1810 | not (is64BitLit lit)
1811 -- ASSERT(rep == I32)???
1812 = do (x_reg, x_code) <- getSomeReg x
1813 let off = ImmInt (-(fromInteger i))
1814 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1816 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
1817 | not (is64BitLit lit)
1818 -- ASSERT(rep == I32)???
1819 = do (x_reg, x_code) <- getSomeReg x
1820 let off = ImmInt (fromInteger i)
1821 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1823 -- Turn (lit1 << n + lit2) into (lit2 + lit1 << n) so it will be
1824 -- recognised by the next rule.
1825 getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1827 = getAmode (CmmMachOp (MO_Add rep) [b,a])
1829 getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
1830 [y, CmmLit (CmmInt shift _)]])
1831 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1832 = do (x_reg, x_code) <- getNonClobberedReg x
1833 -- x must be in a temp, because it has to stay live over y_code
1834 -- we could compre x_reg and y_reg and do something better here...
1835 (y_reg, y_code) <- getSomeReg y
1837 code = x_code `appOL` y_code
1838 base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
1839 return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt 0))
1842 getAmode (CmmLit lit) | not (is64BitLit lit)
1843 = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1846 (reg,code) <- getSomeReg expr
1847 return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
1849 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1851 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1853 #if sparc_TARGET_ARCH
1855 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)])
1858 (reg, code) <- getSomeReg x
1860 off = ImmInt (-(fromInteger i))
1861 return (Amode (AddrRegImm reg off) code)
1864 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)])
1867 (reg, code) <- getSomeReg x
1869 off = ImmInt (fromInteger i)
1870 return (Amode (AddrRegImm reg off) code)
1872 getAmode (CmmMachOp (MO_Add rep) [x, y])
1874 (regX, codeX) <- getSomeReg x
1875 (regY, codeY) <- getSomeReg y
1877 code = codeX `appOL` codeY
1878 return (Amode (AddrRegReg regX regY) code)
1880 -- XXX Is this same as "leaf" in Stix?
1881 getAmode (CmmLit lit)
1883 tmp <- getNewRegNat I32
1885 code = unitOL (SETHI (HI imm__2) tmp)
1886 return (Amode (AddrRegImm tmp (LO imm__2)) code)
1888 imm__2 = litToImm lit
1892 (reg, code) <- getSomeReg other
1895 return (Amode (AddrRegImm reg off) code)
1897 #endif /* sparc_TARGET_ARCH */
1899 #ifdef powerpc_TARGET_ARCH
1900 getAmode (CmmMachOp (MO_Sub I32) [x, CmmLit (CmmInt i _)])
1901 | Just off <- makeImmediate I32 True (-i)
1903 (reg, code) <- getSomeReg x
1904 return (Amode (AddrRegImm reg off) code)
1907 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit (CmmInt i _)])
1908 | Just off <- makeImmediate I32 True i
1910 (reg, code) <- getSomeReg x
1911 return (Amode (AddrRegImm reg off) code)
1913 -- optimize addition with 32-bit immediate
1915 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit lit])
1917 tmp <- getNewRegNat I32
1918 (src, srcCode) <- getSomeReg x
1919 let imm = litToImm lit
1920 code = srcCode `snocOL` ADDIS tmp src (HA imm)
1921 return (Amode (AddrRegImm tmp (LO imm)) code)
1923 getAmode (CmmLit lit)
1925 tmp <- getNewRegNat I32
1926 let imm = litToImm lit
1927 code = unitOL (LIS tmp (HA imm))
1928 return (Amode (AddrRegImm tmp (LO imm)) code)
1930 getAmode (CmmMachOp (MO_Add I32) [x, y])
1932 (regX, codeX) <- getSomeReg x
1933 (regY, codeY) <- getSomeReg y
1934 return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
1938 (reg, code) <- getSomeReg other
1941 return (Amode (AddrRegImm reg off) code)
1942 #endif /* powerpc_TARGET_ARCH */
1944 -- -----------------------------------------------------------------------------
1945 -- getOperand: sometimes any operand will do.
1947 -- getNonClobberedOperand: the value of the operand will remain valid across
1948 -- the computation of an arbitrary expression, unless the expression
1949 -- is computed directly into a register which the operand refers to
1950 -- (see trivialCode where this function is used for an example).
1952 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1954 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1955 #if x86_64_TARGET_ARCH
1956 getNonClobberedOperand (CmmLit lit)
1957 | isSuitableFloatingPointLit lit = do
1958 lbl <- getNewLabelNat
1959 let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
1961 return (OpAddr (ripRel (ImmCLbl lbl)), code)
1963 getNonClobberedOperand (CmmLit lit)
1964 | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) =
1965 return (OpImm (litToImm lit), nilOL)
1966 getNonClobberedOperand (CmmLoad mem pk)
1967 | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
1968 Amode src mem_code <- getAmode mem
1970 if (amodeCouldBeClobbered src)
1972 tmp <- getNewRegNat wordRep
1973 return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
1974 unitOL (LEA I32 (OpAddr src) (OpReg tmp)))
1977 return (OpAddr src', save_code `appOL` mem_code)
1978 getNonClobberedOperand e = do
1979 (reg, code) <- getNonClobberedReg e
1980 return (OpReg reg, code)
1982 amodeCouldBeClobbered :: AddrMode -> Bool
1983 amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
1985 regClobbered (RealReg rr) = isFastTrue (freeReg rr)
1986 regClobbered _ = False
1988 -- getOperand: the operand is not required to remain valid across the
1989 -- computation of an arbitrary expression.
1990 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1991 #if x86_64_TARGET_ARCH
1992 getOperand (CmmLit lit)
1993 | isSuitableFloatingPointLit lit = do
1994 lbl <- getNewLabelNat
1995 let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
1997 return (OpAddr (ripRel (ImmCLbl lbl)), code)
1999 getOperand (CmmLit lit)
2000 | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) = do
2001 return (OpImm (litToImm lit), nilOL)
2002 getOperand (CmmLoad mem pk)
2003 | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2004 Amode src mem_code <- getAmode mem
2005 return (OpAddr src, mem_code)
2007 (reg, code) <- getSomeReg e
2008 return (OpReg reg, code)
2010 isOperand :: CmmExpr -> Bool
2011 isOperand (CmmLoad _ _) = True
2012 isOperand (CmmLit lit) = not (is64BitLit lit)
2013 || isSuitableFloatingPointLit lit
2016 -- if we want a floating-point literal as an operand, we can
2017 -- use it directly from memory. However, if the literal is
2018 -- zero, we're better off generating it into a register using
2020 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
2021 isSuitableFloatingPointLit _ = False
2023 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
2024 getRegOrMem (CmmLoad mem pk)
2025 | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2026 Amode src mem_code <- getAmode mem
2027 return (OpAddr src, mem_code)
2029 (reg, code) <- getNonClobberedReg e
2030 return (OpReg reg, code)
2032 #if x86_64_TARGET_ARCH
2033 is64BitLit (CmmInt i I64) = is64BitInteger i
2034 -- assume that labels are in the range 0-2^31-1: this assumes the
2035 -- small memory model (see gcc docs, -mcmodel=small).
2037 is64BitLit x = False
2040 is64BitInteger :: Integer -> Bool
2041 is64BitInteger i = i64 > 0x7fffffff || i64 < -0x80000000
2042 where i64 = fromIntegral i :: Int64
2043 -- a CmmInt is intended to be truncated to the appropriate
2044 -- number of bits, so here we truncate it to Int64. This is
2045 -- important because e.g. -1 as a CmmInt might be either
2046 -- -1 or 18446744073709551615.
2048 -- -----------------------------------------------------------------------------
2049 -- The 'CondCode' type: Condition codes passed up the tree.
2051 data CondCode = CondCode Bool Cond InstrBlock
2053 -- Set up a condition code for a conditional branch.
2055 getCondCode :: CmmExpr -> NatM CondCode
2057 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2059 #if alpha_TARGET_ARCH
2060 getCondCode = panic "MachCode.getCondCode: not on Alphas"
2061 #endif /* alpha_TARGET_ARCH */
2063 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2065 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
2066 -- yes, they really do seem to want exactly the same!
2068 getCondCode (CmmMachOp mop [x, y])
2071 MO_Eq F32 -> condFltCode EQQ x y
2072 MO_Ne F32 -> condFltCode NE x y
2074 MO_S_Gt F32 -> condFltCode GTT x y
2075 MO_S_Ge F32 -> condFltCode GE x y
2076 MO_S_Lt F32 -> condFltCode LTT x y
2077 MO_S_Le F32 -> condFltCode LE x y
2079 MO_Eq F64 -> condFltCode EQQ x y
2080 MO_Ne F64 -> condFltCode NE x y
2082 MO_S_Gt F64 -> condFltCode GTT x y
2083 MO_S_Ge F64 -> condFltCode GE x y
2084 MO_S_Lt F64 -> condFltCode LTT x y
2085 MO_S_Le F64 -> condFltCode LE x y
2087 MO_Eq rep -> condIntCode EQQ x y
2088 MO_Ne rep -> condIntCode NE x y
2090 MO_S_Gt rep -> condIntCode GTT x y
2091 MO_S_Ge rep -> condIntCode GE x y
2092 MO_S_Lt rep -> condIntCode LTT x y
2093 MO_S_Le rep -> condIntCode LE x y
2095 MO_U_Gt rep -> condIntCode GU x y
2096 MO_U_Ge rep -> condIntCode GEU x y
2097 MO_U_Lt rep -> condIntCode LU x y
2098 MO_U_Le rep -> condIntCode LEU x y
2100 other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y]))
2102 getCondCode other = pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
2104 #elif powerpc_TARGET_ARCH
2106 -- almost the same as everywhere else - but we need to
2107 -- extend small integers to 32 bit first
2109 getCondCode (CmmMachOp mop [x, y])
2111 MO_Eq F32 -> condFltCode EQQ x y
2112 MO_Ne F32 -> condFltCode NE x y
2114 MO_S_Gt F32 -> condFltCode GTT x y
2115 MO_S_Ge F32 -> condFltCode GE x y
2116 MO_S_Lt F32 -> condFltCode LTT x y
2117 MO_S_Le F32 -> condFltCode LE x y
2119 MO_Eq F64 -> condFltCode EQQ x y
2120 MO_Ne F64 -> condFltCode NE x y
2122 MO_S_Gt F64 -> condFltCode GTT x y
2123 MO_S_Ge F64 -> condFltCode GE x y
2124 MO_S_Lt F64 -> condFltCode LTT x y
2125 MO_S_Le F64 -> condFltCode LE x y
2127 MO_Eq rep -> condIntCode EQQ (extendUExpr rep x) (extendUExpr rep y)
2128 MO_Ne rep -> condIntCode NE (extendUExpr rep x) (extendUExpr rep y)
2130 MO_S_Gt rep -> condIntCode GTT (extendSExpr rep x) (extendSExpr rep y)
2131 MO_S_Ge rep -> condIntCode GE (extendSExpr rep x) (extendSExpr rep y)
2132 MO_S_Lt rep -> condIntCode LTT (extendSExpr rep x) (extendSExpr rep y)
2133 MO_S_Le rep -> condIntCode LE (extendSExpr rep x) (extendSExpr rep y)
2135 MO_U_Gt rep -> condIntCode GU (extendUExpr rep x) (extendUExpr rep y)
2136 MO_U_Ge rep -> condIntCode GEU (extendUExpr rep x) (extendUExpr rep y)
2137 MO_U_Lt rep -> condIntCode LU (extendUExpr rep x) (extendUExpr rep y)
2138 MO_U_Le rep -> condIntCode LEU (extendUExpr rep x) (extendUExpr rep y)
2140 other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
2142 getCondCode other = panic "getCondCode(2)(powerpc)"
2148 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
2149 -- passed back up the tree.
2151 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
2153 #if alpha_TARGET_ARCH
2154 condIntCode = panic "MachCode.condIntCode: not on Alphas"
2155 condFltCode = panic "MachCode.condFltCode: not on Alphas"
2156 #endif /* alpha_TARGET_ARCH */
2158 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2159 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2161 -- memory vs immediate
2162 condIntCode cond (CmmLoad x pk) (CmmLit lit) | not (is64BitLit lit) = do
2163 Amode x_addr x_code <- getAmode x
2166 code = x_code `snocOL`
2167 CMP pk (OpImm imm) (OpAddr x_addr)
2169 return (CondCode False cond code)
2172 condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
2173 (x_reg, x_code) <- getSomeReg x
2175 code = x_code `snocOL`
2176 TEST pk (OpReg x_reg) (OpReg x_reg)
2178 return (CondCode False cond code)
2180 -- anything vs operand
2181 condIntCode cond x y | isOperand y = do
2182 (x_reg, x_code) <- getNonClobberedReg x
2183 (y_op, y_code) <- getOperand y
2185 code = x_code `appOL` y_code `snocOL`
2186 CMP (cmmExprRep x) y_op (OpReg x_reg)
2188 return (CondCode False cond code)
2190 -- anything vs anything
2191 condIntCode cond x y = do
2192 (y_reg, y_code) <- getNonClobberedReg y
2193 (x_op, x_code) <- getRegOrMem x
2195 code = y_code `appOL`
2197 CMP (cmmExprRep x) (OpReg y_reg) x_op
2199 return (CondCode False cond code)
2202 #if i386_TARGET_ARCH
2203 condFltCode cond x y
2204 = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
2205 (x_reg, x_code) <- getNonClobberedReg x
2206 (y_reg, y_code) <- getSomeReg y
2208 code = x_code `appOL` y_code `snocOL`
2209 GCMP cond x_reg y_reg
2210 -- The GCMP insn does the test and sets the zero flag if comparable
2211 -- and true. Hence we always supply EQQ as the condition to test.
2212 return (CondCode True EQQ code)
2213 #endif /* i386_TARGET_ARCH */
2215 #if x86_64_TARGET_ARCH
2216 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
2217 -- an operand, but the right must be a reg. We can probably do better
2218 -- than this general case...
2219 condFltCode cond x y = do
2220 (x_reg, x_code) <- getNonClobberedReg x
2221 (y_op, y_code) <- getOperand y
2223 code = x_code `appOL`
2225 CMP (cmmExprRep x) y_op (OpReg x_reg)
2226 -- NB(1): we need to use the unsigned comparison operators on the
2227 -- result of this comparison.
2229 return (CondCode True (condToUnsigned cond) code)
2232 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2234 #if sparc_TARGET_ARCH
2236 condIntCode cond x (CmmLit (CmmInt y rep))
2239 (src1, code) <- getSomeReg x
2241 src2 = ImmInt (fromInteger y)
2242 code' = code `snocOL` SUB False True src1 (RIImm src2) g0
2243 return (CondCode False cond code')
2245 condIntCode cond x y = do
2246 (src1, code1) <- getSomeReg x
2247 (src2, code2) <- getSomeReg y
2249 code__2 = code1 `appOL` code2 `snocOL`
2250 SUB False True src1 (RIReg src2) g0
2251 return (CondCode False cond code__2)
2254 condFltCode cond x y = do
2255 (src1, code1) <- getSomeReg x
2256 (src2, code2) <- getSomeReg y
2257 tmp <- getNewRegNat F64
2259 promote x = FxTOy F32 F64 x tmp
2266 code1 `appOL` code2 `snocOL`
2267 FCMP True pk1 src1 src2
2268 else if pk1 == F32 then
2269 code1 `snocOL` promote src1 `appOL` code2 `snocOL`
2270 FCMP True F64 tmp src2
2272 code1 `appOL` code2 `snocOL` promote src2 `snocOL`
2273 FCMP True F64 src1 tmp
2274 return (CondCode True cond code__2)
2276 #endif /* sparc_TARGET_ARCH */
2278 #if powerpc_TARGET_ARCH
2279 -- ###FIXME: I16 and I8!
2280 condIntCode cond x (CmmLit (CmmInt y rep))
2281 | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
2283 (src1, code) <- getSomeReg x
2285 code' = code `snocOL`
2286 (if condUnsigned cond then CMPL else CMP) I32 src1 (RIImm src2)
2287 return (CondCode False cond code')
2289 condIntCode cond x y = do
2290 (src1, code1) <- getSomeReg x
2291 (src2, code2) <- getSomeReg y
2293 code' = code1 `appOL` code2 `snocOL`
2294 (if condUnsigned cond then CMPL else CMP) I32 src1 (RIReg src2)
2295 return (CondCode False cond code')
2297 condFltCode cond x y = do
2298 (src1, code1) <- getSomeReg x
2299 (src2, code2) <- getSomeReg y
2301 code' = code1 `appOL` code2 `snocOL` FCMP src1 src2
2302 code'' = case cond of -- twiddle CR to handle unordered case
2303 GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
2304 LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
2307 ltbit = 0 ; eqbit = 2 ; gtbit = 1
2308 return (CondCode True cond code'')
2310 #endif /* powerpc_TARGET_ARCH */
2312 -- -----------------------------------------------------------------------------
2313 -- Generating assignments
2315 -- Assignments are really at the heart of the whole code generation
2316 -- business. Almost all top-level nodes of any real importance are
2317 -- assignments, which correspond to loads, stores, or register
2318 -- transfers. If we're really lucky, some of the register transfers
2319 -- will go away, because we can use the destination register to
2320 -- complete the code generation for the right hand side. This only
2321 -- fails when the right hand side is forced into a fixed register
2322 -- (e.g. the result of a call).
2324 assignMem_IntCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2325 assignReg_IntCode :: MachRep -> CmmReg -> CmmExpr -> NatM InstrBlock
2327 assignMem_FltCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2328 assignReg_FltCode :: MachRep -> CmmReg -> CmmExpr -> NatM InstrBlock
2330 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2332 #if alpha_TARGET_ARCH
2334 assignIntCode pk (CmmLoad dst _) src
2335 = getNewRegNat IntRep `thenNat` \ tmp ->
2336 getAmode dst `thenNat` \ amode ->
2337 getRegister src `thenNat` \ register ->
2339 code1 = amodeCode amode []
2340 dst__2 = amodeAddr amode
2341 code2 = registerCode register tmp []
2342 src__2 = registerName register tmp
2343 sz = primRepToSize pk
2344 code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2348 assignIntCode pk dst src
2349 = getRegister dst `thenNat` \ register1 ->
2350 getRegister src `thenNat` \ register2 ->
2352 dst__2 = registerName register1 zeroh
2353 code = registerCode register2 dst__2
2354 src__2 = registerName register2 dst__2
2355 code__2 = if isFixed register2
2356 then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2)
2361 #endif /* alpha_TARGET_ARCH */
2363 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2365 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2367 -- integer assignment to memory
2368 assignMem_IntCode pk addr src = do
2369 Amode addr code_addr <- getAmode addr
2370 (code_src, op_src) <- get_op_RI src
2372 code = code_src `appOL`
2374 MOV pk op_src (OpAddr addr)
2375 -- NOTE: op_src is stable, so it will still be valid
2376 -- after code_addr. This may involve the introduction
2377 -- of an extra MOV to a temporary register, but we hope
2378 -- the register allocator will get rid of it.
2382 get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand) -- code, operator
2383 get_op_RI (CmmLit lit) | not (is64BitLit lit)
2384 = return (nilOL, OpImm (litToImm lit))
2386 = do (reg,code) <- getNonClobberedReg op
2387 return (code, OpReg reg)
2390 -- Assign; dst is a reg, rhs is mem
2391 assignReg_IntCode pk reg (CmmLoad src _) = do
2392 load_code <- intLoadCode (MOV pk) src
2393 return (load_code (getRegisterReg reg))
2395 -- dst is a reg, but src could be anything
2396 assignReg_IntCode pk reg src = do
2397 code <- getAnyReg src
2398 return (code (getRegisterReg reg))
2400 #endif /* i386_TARGET_ARCH */
2402 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2404 #if sparc_TARGET_ARCH
2406 assignMem_IntCode pk addr src = do
2407 (srcReg, code) <- getSomeReg src
2408 Amode dstAddr addr_code <- getAmode addr
2409 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2411 assignReg_IntCode pk reg src = do
2412 r <- getRegister src
2414 Any _ code -> code dst
2415 Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg dst) freg
2417 dst = getRegisterReg reg
2420 #endif /* sparc_TARGET_ARCH */
2422 #if powerpc_TARGET_ARCH
2424 assignMem_IntCode pk addr src = do
2425 (srcReg, code) <- getSomeReg src
2426 Amode dstAddr addr_code <- getAmode addr
2427 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2429 -- dst is a reg, but src could be anything
2430 assignReg_IntCode pk reg src
2432 r <- getRegister src
2434 Any _ code -> code dst
2435 Fixed _ freg fcode -> fcode `snocOL` MR dst freg
2437 dst = getRegisterReg reg
2439 #endif /* powerpc_TARGET_ARCH */
2442 -- -----------------------------------------------------------------------------
2443 -- Floating-point assignments
2445 #if alpha_TARGET_ARCH
2447 assignFltCode pk (CmmLoad dst _) src
2448 = getNewRegNat pk `thenNat` \ tmp ->
2449 getAmode dst `thenNat` \ amode ->
2450 getRegister src `thenNat` \ register ->
2452 code1 = amodeCode amode []
2453 dst__2 = amodeAddr amode
2454 code2 = registerCode register tmp []
2455 src__2 = registerName register tmp
2456 sz = primRepToSize pk
2457 code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2461 assignFltCode pk dst src
2462 = getRegister dst `thenNat` \ register1 ->
2463 getRegister src `thenNat` \ register2 ->
2465 dst__2 = registerName register1 zeroh
2466 code = registerCode register2 dst__2
2467 src__2 = registerName register2 dst__2
2468 code__2 = if isFixed register2
2469 then code . mkSeqInstr (FMOV src__2 dst__2)
2474 #endif /* alpha_TARGET_ARCH */
2476 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2478 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2480 -- Floating point assignment to memory
2481 assignMem_FltCode pk addr src = do
2482 (src_reg, src_code) <- getNonClobberedReg src
2483 Amode addr addr_code <- getAmode addr
2485 code = src_code `appOL`
2487 IF_ARCH_i386(GST pk src_reg addr,
2488 MOV pk (OpReg src_reg) (OpAddr addr))
2491 -- Floating point assignment to a register/temporary
2492 assignReg_FltCode pk reg src = do
2493 src_code <- getAnyReg src
2494 return (src_code (getRegisterReg reg))
2496 #endif /* i386_TARGET_ARCH */
2498 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2500 #if sparc_TARGET_ARCH
2502 -- Floating point assignment to memory
2503 assignMem_FltCode pk addr src = do
2504 Amode dst__2 code1 <- getAmode addr
2505 (src__2, code2) <- getSomeReg src
2506 tmp1 <- getNewRegNat pk
2508 pk__2 = cmmExprRep src
2509 code__2 = code1 `appOL` code2 `appOL`
2511 then unitOL (ST pk src__2 dst__2)
2512 else toOL [FxTOy pk__2 pk src__2 tmp1, ST pk tmp1 dst__2]
2515 -- Floating point assignment to a register/temporary
2516 -- ToDo: Verify correctness
2517 assignReg_FltCode pk reg src = do
2518 r <- getRegister src
2519 v1 <- getNewRegNat pk
2521 Any _ code -> code dst
2522 Fixed _ freg fcode -> fcode `snocOL` FMOV pk freg v1
2524 dst = getRegisterReg reg
2526 #endif /* sparc_TARGET_ARCH */
2528 #if powerpc_TARGET_ARCH
2531 assignMem_FltCode = assignMem_IntCode
2532 assignReg_FltCode = assignReg_IntCode
2534 #endif /* powerpc_TARGET_ARCH */
2537 -- -----------------------------------------------------------------------------
2538 -- Generating an non-local jump
2540 -- (If applicable) Do not fill the delay slots here; you will confuse the
2541 -- register allocator.
2543 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
2545 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2547 #if alpha_TARGET_ARCH
2549 genJump (CmmLabel lbl)
2550 | isAsmTemp lbl = returnInstr (BR target)
2551 | otherwise = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0]
2553 target = ImmCLbl lbl
2556 = getRegister tree `thenNat` \ register ->
2557 getNewRegNat PtrRep `thenNat` \ tmp ->
2559 dst = registerName register pv
2560 code = registerCode register pv
2561 target = registerName register pv
2563 if isFixed register then
2564 returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0]
2566 return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0))
2568 #endif /* alpha_TARGET_ARCH */
2570 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2572 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2574 genJump (CmmLoad mem pk) = do
2575 Amode target code <- getAmode mem
2576 return (code `snocOL` JMP (OpAddr target))
2578 genJump (CmmLit lit) = do
2579 return (unitOL (JMP (OpImm (litToImm lit))))
2582 (reg,code) <- getSomeReg expr
2583 return (code `snocOL` JMP (OpReg reg))
2585 #endif /* i386_TARGET_ARCH */
2587 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2589 #if sparc_TARGET_ARCH
2591 genJump (CmmLit (CmmLabel lbl))
2592 = return (toOL [CALL (Left target) 0 True, NOP])
2594 target = ImmCLbl lbl
2598 (target, code) <- getSomeReg tree
2599 return (code `snocOL` JMP (AddrRegReg target g0) `snocOL` NOP)
2601 #endif /* sparc_TARGET_ARCH */
2603 #if powerpc_TARGET_ARCH
2604 genJump (CmmLit (CmmLabel lbl))
2605 = return (unitOL $ JMP lbl)
2609 (target,code) <- getSomeReg tree
2610 return (code `snocOL` MTCTR target `snocOL` BCTR [])
2611 #endif /* powerpc_TARGET_ARCH */
2614 -- -----------------------------------------------------------------------------
2615 -- Unconditional branches
2617 genBranch :: BlockId -> NatM InstrBlock
2619 genBranch = return . toOL . mkBranchInstr
2621 -- -----------------------------------------------------------------------------
2622 -- Conditional jumps
2625 Conditional jumps are always to local labels, so we can use branch
2626 instructions. We peek at the arguments to decide what kind of
2629 ALPHA: For comparisons with 0, we're laughing, because we can just do
2630 the desired conditional branch.
2632 I386: First, we have to ensure that the condition
2633 codes are set according to the supplied comparison operation.
2635 SPARC: First, we have to ensure that the condition codes are set
2636 according to the supplied comparison operation. We generate slightly
2637 different code for floating point comparisons, because a floating
2638 point operation cannot directly precede a @BF@. We assume the worst
2639 and fill that slot with a @NOP@.
2641 SPARC: Do not fill the delay slots here; you will confuse the register
2647 :: BlockId -- the branch target
2648 -> CmmExpr -- the condition on which to branch
2651 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2653 #if alpha_TARGET_ARCH
2655 genCondJump id (StPrim op [x, StInt 0])
2656 = getRegister x `thenNat` \ register ->
2657 getNewRegNat (registerRep register)
2660 code = registerCode register tmp
2661 value = registerName register tmp
2662 pk = registerRep register
2663 target = ImmCLbl lbl
2665 returnSeq code [BI (cmpOp op) value target]
2667 cmpOp CharGtOp = GTT
2669 cmpOp CharEqOp = EQQ
2671 cmpOp CharLtOp = LTT
2680 cmpOp WordGeOp = ALWAYS
2681 cmpOp WordEqOp = EQQ
2683 cmpOp WordLtOp = NEVER
2684 cmpOp WordLeOp = EQQ
2686 cmpOp AddrGeOp = ALWAYS
2687 cmpOp AddrEqOp = EQQ
2689 cmpOp AddrLtOp = NEVER
2690 cmpOp AddrLeOp = EQQ
2692 genCondJump lbl (StPrim op [x, StDouble 0.0])
2693 = getRegister x `thenNat` \ register ->
2694 getNewRegNat (registerRep register)
2697 code = registerCode register tmp
2698 value = registerName register tmp
2699 pk = registerRep register
2700 target = ImmCLbl lbl
2702 return (code . mkSeqInstr (BF (cmpOp op) value target))
2704 cmpOp FloatGtOp = GTT
2705 cmpOp FloatGeOp = GE
2706 cmpOp FloatEqOp = EQQ
2707 cmpOp FloatNeOp = NE
2708 cmpOp FloatLtOp = LTT
2709 cmpOp FloatLeOp = LE
2710 cmpOp DoubleGtOp = GTT
2711 cmpOp DoubleGeOp = GE
2712 cmpOp DoubleEqOp = EQQ
2713 cmpOp DoubleNeOp = NE
2714 cmpOp DoubleLtOp = LTT
2715 cmpOp DoubleLeOp = LE
2717 genCondJump lbl (StPrim op [x, y])
2719 = trivialFCode pr instr x y `thenNat` \ register ->
2720 getNewRegNat F64 `thenNat` \ tmp ->
2722 code = registerCode register tmp
2723 result = registerName register tmp
2724 target = ImmCLbl lbl
2726 return (code . mkSeqInstr (BF cond result target))
2728 pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
2730 fltCmpOp op = case op of
2744 (instr, cond) = case op of
2745 FloatGtOp -> (FCMP TF LE, EQQ)
2746 FloatGeOp -> (FCMP TF LTT, EQQ)
2747 FloatEqOp -> (FCMP TF EQQ, NE)
2748 FloatNeOp -> (FCMP TF EQQ, EQQ)
2749 FloatLtOp -> (FCMP TF LTT, NE)
2750 FloatLeOp -> (FCMP TF LE, NE)
2751 DoubleGtOp -> (FCMP TF LE, EQQ)
2752 DoubleGeOp -> (FCMP TF LTT, EQQ)
2753 DoubleEqOp -> (FCMP TF EQQ, NE)
2754 DoubleNeOp -> (FCMP TF EQQ, EQQ)
2755 DoubleLtOp -> (FCMP TF LTT, NE)
2756 DoubleLeOp -> (FCMP TF LE, NE)
2758 genCondJump lbl (StPrim op [x, y])
2759 = trivialCode instr x y `thenNat` \ register ->
2760 getNewRegNat IntRep `thenNat` \ tmp ->
2762 code = registerCode register tmp
2763 result = registerName register tmp
2764 target = ImmCLbl lbl
2766 return (code . mkSeqInstr (BI cond result target))
2768 (instr, cond) = case op of
2769 CharGtOp -> (CMP LE, EQQ)
2770 CharGeOp -> (CMP LTT, EQQ)
2771 CharEqOp -> (CMP EQQ, NE)
2772 CharNeOp -> (CMP EQQ, EQQ)
2773 CharLtOp -> (CMP LTT, NE)
2774 CharLeOp -> (CMP LE, NE)
2775 IntGtOp -> (CMP LE, EQQ)
2776 IntGeOp -> (CMP LTT, EQQ)
2777 IntEqOp -> (CMP EQQ, NE)
2778 IntNeOp -> (CMP EQQ, EQQ)
2779 IntLtOp -> (CMP LTT, NE)
2780 IntLeOp -> (CMP LE, NE)
2781 WordGtOp -> (CMP ULE, EQQ)
2782 WordGeOp -> (CMP ULT, EQQ)
2783 WordEqOp -> (CMP EQQ, NE)
2784 WordNeOp -> (CMP EQQ, EQQ)
2785 WordLtOp -> (CMP ULT, NE)
2786 WordLeOp -> (CMP ULE, NE)
2787 AddrGtOp -> (CMP ULE, EQQ)
2788 AddrGeOp -> (CMP ULT, EQQ)
2789 AddrEqOp -> (CMP EQQ, NE)
2790 AddrNeOp -> (CMP EQQ, EQQ)
2791 AddrLtOp -> (CMP ULT, NE)
2792 AddrLeOp -> (CMP ULE, NE)
2794 #endif /* alpha_TARGET_ARCH */
2796 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2798 #if i386_TARGET_ARCH
2800 genCondJump id bool = do
2801 CondCode _ cond code <- getCondCode bool
2802 return (code `snocOL` JXX cond id)
2806 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2808 #if x86_64_TARGET_ARCH
2810 genCondJump id bool = do
2811 CondCode is_float cond cond_code <- getCondCode bool
2814 return (cond_code `snocOL` JXX cond id)
2816 lbl <- getBlockIdNat
2818 -- see comment with condFltReg
2819 let code = case cond of
2825 plain_test = unitOL (
2828 or_unordered = toOL [
2832 and_ordered = toOL [
2838 return (cond_code `appOL` code)
2842 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2844 #if sparc_TARGET_ARCH
2846 genCondJump (BlockId id) bool = do
2847 CondCode is_float cond code <- getCondCode bool
2852 then [NOP, BF cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2853 else [BI cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2857 #endif /* sparc_TARGET_ARCH */
2860 #if powerpc_TARGET_ARCH
2862 genCondJump id bool = do
2863 CondCode is_float cond code <- getCondCode bool
2864 return (code `snocOL` BCC cond id)
2866 #endif /* powerpc_TARGET_ARCH */
2869 -- -----------------------------------------------------------------------------
2870 -- Generating C calls
2872 -- Now the biggest nightmare---calls. Most of the nastiness is buried in
2873 -- @get_arg@, which moves the arguments to the correct registers/stack
2874 -- locations. Apart from that, the code is easy.
2876 -- (If applicable) Do not fill the delay slots here; you will confuse the
2877 -- register allocator.
2880 :: CmmCallTarget -- function to call
2881 -> [(CmmReg,MachHint)] -- where to put the result
2882 -> [(CmmExpr,MachHint)] -- arguments (of mixed type)
2883 -> Maybe [GlobalReg] -- volatile regs to save
2886 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2888 #if alpha_TARGET_ARCH
2892 genCCall fn cconv result_regs args
2893 = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args
2894 `thenNat` \ ((unused,_), argCode) ->
2896 nRegs = length allArgRegs - length unused
2897 code = asmSeqThen (map ($ []) argCode)
2900 LDA pv (AddrImm (ImmLab (ptext fn))),
2901 JSR ra (AddrReg pv) nRegs,
2902 LDGP gp (AddrReg ra)]
2904 ------------------------
2905 {- Try to get a value into a specific register (or registers) for
2906 a call. The first 6 arguments go into the appropriate
2907 argument register (separate registers for integer and floating
2908 point arguments, but used in lock-step), and the remaining
2909 arguments are dumped to the stack, beginning at 0(sp). Our
2910 first argument is a pair of the list of remaining argument
2911 registers to be assigned for this call and the next stack
2912 offset to use for overflowing arguments. This way,
2913 @get_Arg@ can be applied to all of a call's arguments using
2917 :: ([(Reg,Reg)], Int) -- Argument registers and stack offset (accumulator)
2918 -> StixTree -- Current argument
2919 -> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code
2921 -- We have to use up all of our argument registers first...
2923 get_arg ((iDst,fDst):dsts, offset) arg
2924 = getRegister arg `thenNat` \ register ->
2926 reg = if isFloatingRep pk then fDst else iDst
2927 code = registerCode register reg
2928 src = registerName register reg
2929 pk = registerRep register
2932 if isFloatingRep pk then
2933 ((dsts, offset), if isFixed register then
2934 code . mkSeqInstr (FMOV src fDst)
2937 ((dsts, offset), if isFixed register then
2938 code . mkSeqInstr (OR src (RIReg src) iDst)
2941 -- Once we have run out of argument registers, we move to the
2944 get_arg ([], offset) arg
2945 = getRegister arg `thenNat` \ register ->
2946 getNewRegNat (registerRep register)
2949 code = registerCode register tmp
2950 src = registerName register tmp
2951 pk = registerRep register
2952 sz = primRepToSize pk
2954 return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset)))
2956 #endif /* alpha_TARGET_ARCH */
2958 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2960 #if i386_TARGET_ARCH
2962 genCCall (CmmPrim MO_WriteBarrier) _ _ _ = return nilOL
2963 -- write barrier compiles to no code on x86/x86-64;
2964 -- we keep it this long in order to prevent earlier optimisations.
2966 -- we only cope with a single result for foreign calls
2967 genCCall (CmmPrim op) [(r,_)] args vols = do
2969 MO_F32_Sqrt -> actuallyInlineFloatOp F32 (GSQRT F32) args
2970 MO_F64_Sqrt -> actuallyInlineFloatOp F64 (GSQRT F64) args
2972 MO_F32_Sin -> actuallyInlineFloatOp F32 (GSIN F32) args
2973 MO_F64_Sin -> actuallyInlineFloatOp F64 (GSIN F64) args
2975 MO_F32_Cos -> actuallyInlineFloatOp F32 (GCOS F32) args
2976 MO_F64_Cos -> actuallyInlineFloatOp F64 (GCOS F64) args
2978 MO_F32_Tan -> actuallyInlineFloatOp F32 (GTAN F32) args
2979 MO_F64_Tan -> actuallyInlineFloatOp F64 (GTAN F64) args
2981 other_op -> outOfLineFloatOp op r args vols
2983 actuallyInlineFloatOp rep instr [(x,_)]
2984 = do res <- trivialUFCode rep instr x
2986 return (any (getRegisterReg r))
2988 genCCall target dest_regs args vols = do
2990 sizes = map (arg_size . cmmExprRep . fst) (reverse args)
2991 #if !darwin_TARGET_OS
2992 tot_arg_size = sum sizes
2994 raw_arg_size = sum sizes
2995 tot_arg_size = roundTo 16 raw_arg_size
2996 arg_pad_size = tot_arg_size - raw_arg_size
2997 delta0 <- getDeltaNat
2998 setDeltaNat (delta0 - arg_pad_size)
3001 push_codes <- mapM push_arg (reverse args)
3002 delta <- getDeltaNat
3005 -- deal with static vs dynamic call targets
3006 (callinsns,cconv) <-
3009 CmmForeignCall (CmmLit (CmmLabel lbl)) conv
3010 -> -- ToDo: stdcall arg sizes
3011 return (unitOL (CALL (Left fn_imm) []), conv)
3012 where fn_imm = ImmCLbl lbl
3013 CmmForeignCall expr conv
3014 -> do (dyn_c, dyn_r, dyn_rep) <- get_op expr
3015 ASSERT(dyn_rep == I32)
3016 return (dyn_c `snocOL` CALL (Right dyn_r) [], conv)
3019 #if darwin_TARGET_OS
3021 = toOL [SUB I32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
3022 DELTA (delta0 - arg_pad_size)]
3023 `appOL` concatOL push_codes
3026 = concatOL push_codes
3027 call = callinsns `appOL`
3029 -- Deallocate parameters after call for ccall;
3030 -- but not for stdcall (callee does it)
3031 (if cconv == StdCallConv || tot_arg_size==0 then [] else
3032 [ADD I32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
3034 [DELTA (delta + tot_arg_size)]
3037 setDeltaNat (delta + tot_arg_size)
3040 -- assign the results, if necessary
3041 assign_code [] = nilOL
3042 assign_code [(dest,_hint)] =
3044 I64 -> toOL [MOV I32 (OpReg eax) (OpReg r_dest),
3045 MOV I32 (OpReg edx) (OpReg r_dest_hi)]
3046 F32 -> unitOL (GMOV fake0 r_dest)
3047 F64 -> unitOL (GMOV fake0 r_dest)
3048 rep -> unitOL (MOV rep (OpReg eax) (OpReg r_dest))
3050 r_dest_hi = getHiVRegFromLo r_dest
3051 rep = cmmRegRep dest
3052 r_dest = getRegisterReg dest
3053 assign_code many = panic "genCCall.assign_code many"
3055 return (push_code `appOL`
3057 assign_code dest_regs)
3065 roundTo a x | x `mod` a == 0 = x
3066 | otherwise = x + a - (x `mod` a)
3069 push_arg :: (CmmExpr,MachHint){-current argument-}
3070 -> NatM InstrBlock -- code
3072 push_arg (arg,_hint) -- we don't need the hints on x86
3073 | arg_rep == I64 = do
3074 ChildCode64 code r_lo <- iselExpr64 arg
3075 delta <- getDeltaNat
3076 setDeltaNat (delta - 8)
3078 r_hi = getHiVRegFromLo r_lo
3080 return ( code `appOL`
3081 toOL [PUSH I32 (OpReg r_hi), DELTA (delta - 4),
3082 PUSH I32 (OpReg r_lo), DELTA (delta - 8),
3087 (code, reg, sz) <- get_op arg
3088 delta <- getDeltaNat
3089 let size = arg_size sz
3090 setDeltaNat (delta-size)
3091 if (case sz of F64 -> True; F32 -> True; _ -> False)
3092 then return (code `appOL`
3093 toOL [SUB I32 (OpImm (ImmInt size)) (OpReg esp),
3095 GST sz reg (AddrBaseIndex (EABaseReg esp)
3099 else return (code `snocOL`
3100 PUSH I32 (OpReg reg) `snocOL`
3104 arg_rep = cmmExprRep arg
3107 get_op :: CmmExpr -> NatM (InstrBlock, Reg, MachRep) -- code, reg, size
3109 (reg,code) <- getSomeReg op
3110 return (code, reg, cmmExprRep op)
3112 #endif /* i386_TARGET_ARCH */
3114 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3116 outOfLineFloatOp :: CallishMachOp -> CmmReg -> [(CmmExpr,MachHint)]
3117 -> Maybe [GlobalReg] -> NatM InstrBlock
3118 outOfLineFloatOp mop res args vols
3120 targetExpr <- cmmMakeDynamicReference addImportNat CallReference lbl
3121 let target = CmmForeignCall targetExpr CCallConv
3123 if cmmRegRep res == F64
3125 stmtToInstrs (CmmCall target [(res,FloatHint)] args vols)
3129 tmp = CmmLocal (LocalReg uq F64)
3131 code1 <- stmtToInstrs (CmmCall target [(tmp,FloatHint)] args vols)
3132 code2 <- stmtToInstrs (CmmAssign res (CmmReg tmp))
3133 return (code1 `appOL` code2)
3135 lbl = mkForeignLabel fn Nothing False
3138 MO_F32_Sqrt -> FSLIT("sqrtf")
3139 MO_F32_Sin -> FSLIT("sinf")
3140 MO_F32_Cos -> FSLIT("cosf")
3141 MO_F32_Tan -> FSLIT("tanf")
3142 MO_F32_Exp -> FSLIT("expf")
3143 MO_F32_Log -> FSLIT("logf")
3145 MO_F32_Asin -> FSLIT("asinf")
3146 MO_F32_Acos -> FSLIT("acosf")
3147 MO_F32_Atan -> FSLIT("atanf")
3149 MO_F32_Sinh -> FSLIT("sinhf")
3150 MO_F32_Cosh -> FSLIT("coshf")
3151 MO_F32_Tanh -> FSLIT("tanhf")
3152 MO_F32_Pwr -> FSLIT("powf")
3154 MO_F64_Sqrt -> FSLIT("sqrt")
3155 MO_F64_Sin -> FSLIT("sin")
3156 MO_F64_Cos -> FSLIT("cos")
3157 MO_F64_Tan -> FSLIT("tan")
3158 MO_F64_Exp -> FSLIT("exp")
3159 MO_F64_Log -> FSLIT("log")
3161 MO_F64_Asin -> FSLIT("asin")
3162 MO_F64_Acos -> FSLIT("acos")
3163 MO_F64_Atan -> FSLIT("atan")
3165 MO_F64_Sinh -> FSLIT("sinh")
3166 MO_F64_Cosh -> FSLIT("cosh")
3167 MO_F64_Tanh -> FSLIT("tanh")
3168 MO_F64_Pwr -> FSLIT("pow")
3170 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
3172 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3174 #if x86_64_TARGET_ARCH
3176 genCCall (CmmPrim MO_WriteBarrier) _ _ _ = return nilOL
3177 -- write barrier compiles to no code on x86/x86-64;
3178 -- we keep it this long in order to prevent earlier optimisations.
3180 genCCall (CmmPrim op) [(r,_)] args vols =
3181 outOfLineFloatOp op r args vols
3183 genCCall target dest_regs args vols = do
3185 -- load up the register arguments
3186 (stack_args, aregs, fregs, load_args_code)
3187 <- load_args args allArgRegs allFPArgRegs nilOL
3190 fp_regs_used = reverse (drop (length fregs) (reverse allFPArgRegs))
3191 int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
3192 arg_regs = [eax] ++ int_regs_used ++ fp_regs_used
3193 -- for annotating the call instruction with
3195 sse_regs = length fp_regs_used
3197 tot_arg_size = arg_size * length stack_args
3199 -- On entry to the called function, %rsp should be aligned
3200 -- on a 16-byte boundary +8 (i.e. the first stack arg after
3201 -- the return address is 16-byte aligned). In STG land
3202 -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
3203 -- need to make sure we push a multiple of 16-bytes of args,
3204 -- plus the return address, to get the correct alignment.
3205 -- Urg, this is hard. We need to feed the delta back into
3206 -- the arg pushing code.
3207 (real_size, adjust_rsp) <-
3208 if tot_arg_size `rem` 16 == 0
3209 then return (tot_arg_size, nilOL)
3210 else do -- we need to adjust...
3211 delta <- getDeltaNat
3212 setDeltaNat (delta-8)
3213 return (tot_arg_size+8, toOL [
3214 SUB I64 (OpImm (ImmInt 8)) (OpReg rsp),
3218 -- push the stack args, right to left
3219 push_code <- push_args (reverse stack_args) nilOL
3220 delta <- getDeltaNat
3222 -- deal with static vs dynamic call targets
3223 (callinsns,cconv) <-
3226 CmmForeignCall (CmmLit (CmmLabel lbl)) conv
3227 -> -- ToDo: stdcall arg sizes
3228 return (unitOL (CALL (Left fn_imm) arg_regs), conv)
3229 where fn_imm = ImmCLbl lbl
3230 CmmForeignCall expr conv
3231 -> do (dyn_r, dyn_c) <- getSomeReg expr
3232 return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
3235 -- The x86_64 ABI requires us to set %al to the number of SSE
3236 -- registers that contain arguments, if the called routine
3237 -- is a varargs function. We don't know whether it's a
3238 -- varargs function or not, so we have to assume it is.
3240 -- It's not safe to omit this assignment, even if the number
3241 -- of SSE regs in use is zero. If %al is larger than 8
3242 -- on entry to a varargs function, seg faults ensue.
3243 assign_eax n = unitOL (MOV I32 (OpImm (ImmInt n)) (OpReg eax))
3245 let call = callinsns `appOL`
3247 -- Deallocate parameters after call for ccall;
3248 -- but not for stdcall (callee does it)
3249 (if cconv == StdCallConv || real_size==0 then [] else
3250 [ADD wordRep (OpImm (ImmInt real_size)) (OpReg esp)])
3252 [DELTA (delta + real_size)]
3255 setDeltaNat (delta + real_size)
3258 -- assign the results, if necessary
3259 assign_code [] = nilOL
3260 assign_code [(dest,_hint)] =
3262 F32 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3263 F64 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3264 rep -> unitOL (MOV rep (OpReg rax) (OpReg r_dest))
3266 rep = cmmRegRep dest
3267 r_dest = getRegisterReg dest
3268 assign_code many = panic "genCCall.assign_code many"
3270 return (load_args_code `appOL`
3273 assign_eax sse_regs `appOL`
3275 assign_code dest_regs)
3278 arg_size = 8 -- always, at the mo
3280 load_args :: [(CmmExpr,MachHint)]
3281 -> [Reg] -- int regs avail for args
3282 -> [Reg] -- FP regs avail for args
3284 -> NatM ([(CmmExpr,MachHint)],[Reg],[Reg],InstrBlock)
3285 load_args args [] [] code = return (args, [], [], code)
3286 -- no more regs to use
3287 load_args [] aregs fregs code = return ([], aregs, fregs, code)
3288 -- no more args to push
3289 load_args ((arg,hint) : rest) aregs fregs code
3290 | isFloatingRep arg_rep =
3294 arg_code <- getAnyReg arg
3295 load_args rest aregs rs (code `appOL` arg_code r)
3300 arg_code <- getAnyReg arg
3301 load_args rest rs fregs (code `appOL` arg_code r)
3303 arg_rep = cmmExprRep arg
3306 (args',ars,frs,code') <- load_args rest aregs fregs code
3307 return ((arg,hint):args', ars, frs, code')
3309 push_args [] code = return code
3310 push_args ((arg,hint):rest) code
3311 | isFloatingRep arg_rep = do
3312 (arg_reg, arg_code) <- getSomeReg arg
3313 delta <- getDeltaNat
3314 setDeltaNat (delta-arg_size)
3315 let code' = code `appOL` toOL [
3316 MOV arg_rep (OpReg arg_reg) (OpAddr (spRel 0)),
3317 SUB wordRep (OpImm (ImmInt arg_size)) (OpReg rsp) ,
3318 DELTA (delta-arg_size)]
3319 push_args rest code'
3322 -- we only ever generate word-sized function arguments. Promotion
3323 -- has already happened: our Int8# type is kept sign-extended
3324 -- in an Int#, for example.
3325 ASSERT(arg_rep == I64) return ()
3326 (arg_op, arg_code) <- getOperand arg
3327 delta <- getDeltaNat
3328 setDeltaNat (delta-arg_size)
3329 let code' = code `appOL` toOL [PUSH I64 arg_op,
3330 DELTA (delta-arg_size)]
3331 push_args rest code'
3333 arg_rep = cmmExprRep arg
3336 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3338 #if sparc_TARGET_ARCH
3340 The SPARC calling convention is an absolute
3341 nightmare. The first 6x32 bits of arguments are mapped into
3342 %o0 through %o5, and the remaining arguments are dumped to the
3343 stack, beginning at [%sp+92]. (Note that %o6 == %sp.)
3345 If we have to put args on the stack, move %o6==%sp down by
3346 the number of words to go on the stack, to ensure there's enough space.
3348 According to Fraser and Hanson's lcc book, page 478, fig 17.2,
3349 16 words above the stack pointer is a word for the address of
3350 a structure return value. I use this as a temporary location
3351 for moving values from float to int regs. Certainly it isn't
3352 safe to put anything in the 16 words starting at %sp, since
3353 this area can get trashed at any time due to window overflows
3354 caused by signal handlers.
3356 A final complication (if the above isn't enough) is that
3357 we can't blithely calculate the arguments one by one into
3358 %o0 .. %o5. Consider the following nested calls:
3362 Naive code moves a into %o0, and (fff b c) into %o1. Unfortunately
3363 the inner call will itself use %o0, which trashes the value put there
3364 in preparation for the outer call. Upshot: we need to calculate the
3365 args into temporary regs, and move those to arg regs or onto the
3366 stack only immediately prior to the call proper. Sigh.
3369 genCCall target dest_regs argsAndHints vols = do
3371 args = map fst argsAndHints
3372 argcode_and_vregs <- mapM arg_to_int_vregs args
3374 (argcodes, vregss) = unzip argcode_and_vregs
3375 n_argRegs = length allArgRegs
3376 n_argRegs_used = min (length vregs) n_argRegs
3377 vregs = concat vregss
3378 -- deal with static vs dynamic call targets
3379 callinsns <- (case target of
3380 CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> do
3381 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3382 CmmForeignCall expr conv -> do
3383 (dyn_c, [dyn_r]) <- arg_to_int_vregs expr
3384 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3386 (res, reduce) <- outOfLineFloatOp mop
3387 lblOrMopExpr <- case res of
3389 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3391 (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
3392 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3393 if reduce then panic "genCCall(sparc): can not reduce" else return lblOrMopExpr
3397 argcode = concatOL argcodes
3398 (move_sp_down, move_sp_up)
3399 = let diff = length vregs - n_argRegs
3400 nn = if odd diff then diff + 1 else diff -- keep 8-byte alignment
3403 else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
3405 = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
3406 return (argcode `appOL`
3407 move_sp_down `appOL`
3408 transfer_code `appOL`
3413 -- move args from the integer vregs into which they have been
3414 -- marshalled, into %o0 .. %o5, and the rest onto the stack.
3415 move_final :: [Reg] -> [Reg] -> Int -> [Instr]
3417 move_final [] _ offset -- all args done
3420 move_final (v:vs) [] offset -- out of aregs; move to stack
3421 = ST I32 v (spRel offset)
3422 : move_final vs [] (offset+1)
3424 move_final (v:vs) (a:az) offset -- move into an arg (%o[0..5]) reg
3425 = OR False g0 (RIReg v) a
3426 : move_final vs az offset
3428 -- generate code to calculate an argument, and move it into one
3429 -- or two integer vregs.
3430 arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
3431 arg_to_int_vregs arg
3432 | (cmmExprRep arg) == I64
3434 (ChildCode64 code r_lo) <- iselExpr64 arg
3436 r_hi = getHiVRegFromLo r_lo
3437 return (code, [r_hi, r_lo])
3440 (src, code) <- getSomeReg arg
3441 tmp <- getNewRegNat (cmmExprRep arg)
3446 v1 <- getNewRegNat I32
3447 v2 <- getNewRegNat I32
3450 FMOV F64 src f0 `snocOL`
3451 ST F32 f0 (spRel 16) `snocOL`
3452 LD I32 (spRel 16) v1 `snocOL`
3453 ST F32 (fPair f0) (spRel 16) `snocOL`
3454 LD I32 (spRel 16) v2
3459 v1 <- getNewRegNat I32
3462 ST F32 src (spRel 16) `snocOL`
3463 LD I32 (spRel 16) v1
3468 v1 <- getNewRegNat I32
3470 code `snocOL` OR False g0 (RIReg src) v1
3474 outOfLineFloatOp mop =
3476 mopExpr <- cmmMakeDynamicReference addImportNat CallReference $
3477 mkForeignLabel functionName Nothing True
3478 let mopLabelOrExpr = case mopExpr of
3479 CmmLit (CmmLabel lbl) -> Left lbl
3481 return (mopLabelOrExpr, reduce)
3483 (reduce, functionName) = case mop of
3484 MO_F32_Exp -> (True, FSLIT("exp"))
3485 MO_F32_Log -> (True, FSLIT("log"))
3486 MO_F32_Sqrt -> (True, FSLIT("sqrt"))
3488 MO_F32_Sin -> (True, FSLIT("sin"))
3489 MO_F32_Cos -> (True, FSLIT("cos"))
3490 MO_F32_Tan -> (True, FSLIT("tan"))
3492 MO_F32_Asin -> (True, FSLIT("asin"))
3493 MO_F32_Acos -> (True, FSLIT("acos"))
3494 MO_F32_Atan -> (True, FSLIT("atan"))
3496 MO_F32_Sinh -> (True, FSLIT("sinh"))
3497 MO_F32_Cosh -> (True, FSLIT("cosh"))
3498 MO_F32_Tanh -> (True, FSLIT("tanh"))
3500 MO_F64_Exp -> (False, FSLIT("exp"))
3501 MO_F64_Log -> (False, FSLIT("log"))
3502 MO_F64_Sqrt -> (False, FSLIT("sqrt"))
3504 MO_F64_Sin -> (False, FSLIT("sin"))
3505 MO_F64_Cos -> (False, FSLIT("cos"))
3506 MO_F64_Tan -> (False, FSLIT("tan"))
3508 MO_F64_Asin -> (False, FSLIT("asin"))
3509 MO_F64_Acos -> (False, FSLIT("acos"))
3510 MO_F64_Atan -> (False, FSLIT("atan"))
3512 MO_F64_Sinh -> (False, FSLIT("sinh"))
3513 MO_F64_Cosh -> (False, FSLIT("cosh"))
3514 MO_F64_Tanh -> (False, FSLIT("tanh"))
3516 other -> pprPanic "outOfLineFloatOp(sparc) "
3517 (pprCallishMachOp mop)
3519 #endif /* sparc_TARGET_ARCH */
3521 #if powerpc_TARGET_ARCH
3523 #if darwin_TARGET_OS || linux_TARGET_OS
3525 The PowerPC calling convention for Darwin/Mac OS X
3526 is described in Apple's document
3527 "Inside Mac OS X - Mach-O Runtime Architecture".
3529 PowerPC Linux uses the System V Release 4 Calling Convention
3530 for PowerPC. It is described in the
3531 "System V Application Binary Interface PowerPC Processor Supplement".
3533 Both conventions are similar:
3534 Parameters may be passed in general-purpose registers starting at r3, in
3535 floating point registers starting at f1, or on the stack.
3537 But there are substantial differences:
3538 * The number of registers used for parameter passing and the exact set of
3539 nonvolatile registers differs (see MachRegs.lhs).
3540 * On Darwin, stack space is always reserved for parameters, even if they are
3541 passed in registers. The called routine may choose to save parameters from
3542 registers to the corresponding space on the stack.
3543 * On Darwin, a corresponding amount of GPRs is skipped when a floating point
3544 parameter is passed in an FPR.
3545 * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
3546 starting with an odd-numbered GPR. It may skip a GPR to achieve this.
3547 Darwin just treats an I64 like two separate I32s (high word first).
3548 * I64 and F64 arguments are 8-byte aligned on the stack for SysV, but only
3549 4-byte aligned like everything else on Darwin.
3550 * The SysV spec claims that F32 is represented as F64 on the stack. GCC on
3551 PowerPC Linux does not agree, so neither do we.
3553 According to both conventions, The parameter area should be part of the
3554 caller's stack frame, allocated in the caller's prologue code (large enough
3555 to hold the parameter lists for all called routines). The NCG already
3556 uses the stack for register spilling, leaving 64 bytes free at the top.
3557 If we need a larger parameter area than that, we just allocate a new stack
3558 frame just before ccalling.
3562 genCCall (CmmPrim MO_WriteBarrier) _ _ _
3563 = return $ unitOL LWSYNC
3565 genCCall target dest_regs argsAndHints vols
3566 = ASSERT (not $ any (`elem` [I8,I16]) argReps)
3567 -- we rely on argument promotion in the codeGen
3569 (finalStack,passArgumentsCode,usedRegs) <- passArguments
3571 allArgRegs allFPArgRegs
3575 (labelOrExpr, reduceToF32) <- case target of
3576 CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
3577 CmmForeignCall expr conv -> return (Right expr, False)
3578 CmmPrim mop -> outOfLineFloatOp mop
3580 let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
3581 codeAfter = move_sp_up finalStack `appOL` moveResult reduceToF32
3586 `snocOL` BL lbl usedRegs
3589 (dynReg, dynCode) <- getSomeReg dyn
3591 `snocOL` MTCTR dynReg
3593 `snocOL` BCTRL usedRegs
3596 #if darwin_TARGET_OS
3597 initialStackOffset = 24
3598 -- size of linkage area + size of arguments, in bytes
3599 stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
3600 map machRepByteWidth argReps
3601 #elif linux_TARGET_OS
3602 initialStackOffset = 8
3603 stackDelta finalStack = roundTo 16 finalStack
3605 args = map fst argsAndHints
3606 argReps = map cmmExprRep args
3608 roundTo a x | x `mod` a == 0 = x
3609 | otherwise = x + a - (x `mod` a)
3611 move_sp_down finalStack
3613 toOL [STU I32 sp (AddrRegImm sp (ImmInt (-delta))),
3616 where delta = stackDelta finalStack
3617 move_sp_up finalStack
3619 toOL [ADD sp sp (RIImm (ImmInt delta)),
3622 where delta = stackDelta finalStack
3625 passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
3626 passArguments ((arg,I64):args) gprs fprs stackOffset
3627 accumCode accumUsed =
3629 ChildCode64 code vr_lo <- iselExpr64 arg
3630 let vr_hi = getHiVRegFromLo vr_lo
3632 #if darwin_TARGET_OS
3637 (accumCode `appOL` code
3638 `snocOL` storeWord vr_hi gprs stackOffset
3639 `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
3640 ((take 2 gprs) ++ accumUsed)
3642 storeWord vr (gpr:_) offset = MR gpr vr
3643 storeWord vr [] offset = ST I32 vr (AddrRegImm sp (ImmInt offset))
3645 #elif linux_TARGET_OS
3646 let stackOffset' = roundTo 8 stackOffset
3647 stackCode = accumCode `appOL` code
3648 `snocOL` ST I32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
3649 `snocOL` ST I32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
3650 regCode hireg loreg =
3651 accumCode `appOL` code
3652 `snocOL` MR hireg vr_hi
3653 `snocOL` MR loreg vr_lo
3656 hireg : loreg : regs | even (length gprs) ->
3657 passArguments args regs fprs stackOffset
3658 (regCode hireg loreg) (hireg : loreg : accumUsed)
3659 _skipped : hireg : loreg : regs ->
3660 passArguments args regs fprs stackOffset
3661 (regCode hireg loreg) (hireg : loreg : accumUsed)
3662 _ -> -- only one or no regs left
3663 passArguments args [] fprs (stackOffset'+8)
3667 passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
3668 | reg : _ <- regs = do
3669 register <- getRegister arg
3670 let code = case register of
3671 Fixed _ freg fcode -> fcode `snocOL` MR reg freg
3672 Any _ acode -> acode reg
3676 #if darwin_TARGET_OS
3677 -- The Darwin ABI requires that we reserve stack slots for register parameters
3678 (stackOffset + stackBytes)
3679 #elif linux_TARGET_OS
3680 -- ... the SysV ABI doesn't.
3683 (accumCode `appOL` code)
3686 (vr, code) <- getSomeReg arg
3690 (stackOffset' + stackBytes)
3691 (accumCode `appOL` code `snocOL` ST rep vr stackSlot)
3694 #if darwin_TARGET_OS
3695 -- stackOffset is at least 4-byte aligned
3696 -- The Darwin ABI is happy with that.
3697 stackOffset' = stackOffset
3699 -- ... the SysV ABI requires 8-byte alignment for doubles.
3700 stackOffset' | rep == F64 = roundTo 8 stackOffset
3701 | otherwise = stackOffset
3703 stackSlot = AddrRegImm sp (ImmInt stackOffset')
3704 (nGprs, nFprs, stackBytes, regs) = case rep of
3705 I32 -> (1, 0, 4, gprs)
3706 #if darwin_TARGET_OS
3707 -- The Darwin ABI requires that we skip a corresponding number of GPRs when
3709 F32 -> (1, 1, 4, fprs)
3710 F64 -> (2, 1, 8, fprs)
3711 #elif linux_TARGET_OS
3712 -- ... the SysV ABI doesn't.
3713 F32 -> (0, 1, 4, fprs)
3714 F64 -> (0, 1, 8, fprs)
3717 moveResult reduceToF32 =
3721 | reduceToF32 && rep == F32 -> unitOL (FRSP r_dest f1)
3722 | rep == F32 || rep == F64 -> unitOL (MR r_dest f1)
3723 | rep == I64 -> toOL [MR (getHiVRegFromLo r_dest) r3,
3725 | otherwise -> unitOL (MR r_dest r3)
3726 where rep = cmmRegRep dest
3727 r_dest = getRegisterReg dest
3729 outOfLineFloatOp mop =
3731 mopExpr <- cmmMakeDynamicReference addImportNat CallReference $
3732 mkForeignLabel functionName Nothing True
3733 let mopLabelOrExpr = case mopExpr of
3734 CmmLit (CmmLabel lbl) -> Left lbl
3736 return (mopLabelOrExpr, reduce)
3738 (functionName, reduce) = case mop of
3739 MO_F32_Exp -> (FSLIT("exp"), True)
3740 MO_F32_Log -> (FSLIT("log"), True)
3741 MO_F32_Sqrt -> (FSLIT("sqrt"), True)
3743 MO_F32_Sin -> (FSLIT("sin"), True)
3744 MO_F32_Cos -> (FSLIT("cos"), True)
3745 MO_F32_Tan -> (FSLIT("tan"), True)
3747 MO_F32_Asin -> (FSLIT("asin"), True)
3748 MO_F32_Acos -> (FSLIT("acos"), True)
3749 MO_F32_Atan -> (FSLIT("atan"), True)
3751 MO_F32_Sinh -> (FSLIT("sinh"), True)
3752 MO_F32_Cosh -> (FSLIT("cosh"), True)
3753 MO_F32_Tanh -> (FSLIT("tanh"), True)
3754 MO_F32_Pwr -> (FSLIT("pow"), True)
3756 MO_F64_Exp -> (FSLIT("exp"), False)
3757 MO_F64_Log -> (FSLIT("log"), False)
3758 MO_F64_Sqrt -> (FSLIT("sqrt"), False)
3760 MO_F64_Sin -> (FSLIT("sin"), False)
3761 MO_F64_Cos -> (FSLIT("cos"), False)
3762 MO_F64_Tan -> (FSLIT("tan"), False)
3764 MO_F64_Asin -> (FSLIT("asin"), False)
3765 MO_F64_Acos -> (FSLIT("acos"), False)
3766 MO_F64_Atan -> (FSLIT("atan"), False)
3768 MO_F64_Sinh -> (FSLIT("sinh"), False)
3769 MO_F64_Cosh -> (FSLIT("cosh"), False)
3770 MO_F64_Tanh -> (FSLIT("tanh"), False)
3771 MO_F64_Pwr -> (FSLIT("pow"), False)
3772 other -> pprPanic "genCCall(ppc): unknown callish op"
3773 (pprCallishMachOp other)
3775 #endif /* darwin_TARGET_OS || linux_TARGET_OS */
3777 #endif /* powerpc_TARGET_ARCH */
3780 -- -----------------------------------------------------------------------------
3781 -- Generating a table-branch
3783 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
3785 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3789 (reg,e_code) <- getSomeReg expr
3790 lbl <- getNewLabelNat
3791 dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
3792 (tableReg,t_code) <- getSomeReg $ dynRef
3794 jumpTable = map jumpTableEntryRel ids
3796 jumpTableEntryRel Nothing
3797 = CmmStaticLit (CmmInt 0 wordRep)
3798 jumpTableEntryRel (Just (BlockId id))
3799 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3800 where blockLabel = mkAsmTempLabel id
3802 op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
3803 (EAIndex reg wORD_SIZE) (ImmInt 0))
3805 #if x86_64_TARGET_ARCH && darwin_TARGET_OS
3806 -- on Mac OS X/x86_64, put the jump table in the text section
3807 -- to work around a limitation of the linker.
3808 -- ld64 is unable to handle the relocations for
3810 -- if L0 is not preceded by a non-anonymous label in its section.
3812 code = e_code `appOL` t_code `appOL` toOL [
3813 ADD wordRep op (OpReg tableReg),
3814 JMP_TBL (OpReg tableReg) [ id | Just id <- ids ],
3815 LDATA Text (CmmDataLabel lbl : jumpTable)
3818 code = e_code `appOL` t_code `appOL` toOL [
3819 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3820 ADD wordRep op (OpReg tableReg),
3821 JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
3827 (reg,e_code) <- getSomeReg expr
3828 lbl <- getNewLabelNat
3830 jumpTable = map jumpTableEntry ids
3831 op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
3832 code = e_code `appOL` toOL [
3833 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3834 JMP_TBL op [ id | Just id <- ids ]
3838 #elif powerpc_TARGET_ARCH
3842 (reg,e_code) <- getSomeReg expr
3843 tmp <- getNewRegNat I32
3844 lbl <- getNewLabelNat
3845 dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
3846 (tableReg,t_code) <- getSomeReg $ dynRef
3848 jumpTable = map jumpTableEntryRel ids
3850 jumpTableEntryRel Nothing
3851 = CmmStaticLit (CmmInt 0 wordRep)
3852 jumpTableEntryRel (Just (BlockId id))
3853 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3854 where blockLabel = mkAsmTempLabel id
3856 code = e_code `appOL` t_code `appOL` toOL [
3857 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3858 SLW tmp reg (RIImm (ImmInt 2)),
3859 LD I32 tmp (AddrRegReg tableReg tmp),
3860 ADD tmp tmp (RIReg tableReg),
3862 BCTR [ id | Just id <- ids ]
3867 (reg,e_code) <- getSomeReg expr
3868 tmp <- getNewRegNat I32
3869 lbl <- getNewLabelNat
3871 jumpTable = map jumpTableEntry ids
3873 code = e_code `appOL` toOL [
3874 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3875 SLW tmp reg (RIImm (ImmInt 2)),
3876 ADDIS tmp tmp (HA (ImmCLbl lbl)),
3877 LD I32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
3879 BCTR [ id | Just id <- ids ]
3883 genSwitch expr ids = panic "ToDo: genSwitch"
3886 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordRep)
3887 jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
3888 where blockLabel = mkAsmTempLabel id
3890 -- -----------------------------------------------------------------------------
3892 -- -----------------------------------------------------------------------------
3895 -- -----------------------------------------------------------------------------
3896 -- 'condIntReg' and 'condFltReg': condition codes into registers
3898 -- Turn those condition codes into integers now (when they appear on
3899 -- the right hand side of an assignment).
3901 -- (If applicable) Do not fill the delay slots here; you will confuse the
3902 -- register allocator.
3904 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
3906 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3908 #if alpha_TARGET_ARCH
3909 condIntReg = panic "MachCode.condIntReg (not on Alpha)"
3910 condFltReg = panic "MachCode.condFltReg (not on Alpha)"
3911 #endif /* alpha_TARGET_ARCH */
3913 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3915 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3917 condIntReg cond x y = do
3918 CondCode _ cond cond_code <- condIntCode cond x y
3919 tmp <- getNewRegNat I8
3921 code dst = cond_code `appOL` toOL [
3922 SETCC cond (OpReg tmp),
3923 MOVZxL I8 (OpReg tmp) (OpReg dst)
3926 return (Any I32 code)
3930 #if i386_TARGET_ARCH
3932 condFltReg cond x y = do
3933 CondCode _ cond cond_code <- condFltCode cond x y
3934 tmp <- getNewRegNat I8
3936 code dst = cond_code `appOL` toOL [
3937 SETCC cond (OpReg tmp),
3938 MOVZxL I8 (OpReg tmp) (OpReg dst)
3941 return (Any I32 code)
3945 #if x86_64_TARGET_ARCH
3947 condFltReg cond x y = do
3948 CondCode _ cond cond_code <- condFltCode cond x y
3949 tmp1 <- getNewRegNat wordRep
3950 tmp2 <- getNewRegNat wordRep
3952 -- We have to worry about unordered operands (eg. comparisons
3953 -- against NaN). If the operands are unordered, the comparison
3954 -- sets the parity flag, carry flag and zero flag.
3955 -- All comparisons are supposed to return false for unordered
3956 -- operands except for !=, which returns true.
3958 -- Optimisation: we don't have to test the parity flag if we
3959 -- know the test has already excluded the unordered case: eg >
3960 -- and >= test for a zero carry flag, which can only occur for
3961 -- ordered operands.
3963 -- ToDo: by reversing comparisons we could avoid testing the
3964 -- parity flag in more cases.
3969 NE -> or_unordered dst
3970 GU -> plain_test dst
3971 GEU -> plain_test dst
3972 _ -> and_ordered dst)
3974 plain_test dst = toOL [
3975 SETCC cond (OpReg tmp1),
3976 MOVZxL I8 (OpReg tmp1) (OpReg dst)
3978 or_unordered dst = toOL [
3979 SETCC cond (OpReg tmp1),
3980 SETCC PARITY (OpReg tmp2),
3981 OR I8 (OpReg tmp1) (OpReg tmp2),
3982 MOVZxL I8 (OpReg tmp2) (OpReg dst)
3984 and_ordered dst = toOL [
3985 SETCC cond (OpReg tmp1),
3986 SETCC NOTPARITY (OpReg tmp2),
3987 AND I8 (OpReg tmp1) (OpReg tmp2),
3988 MOVZxL I8 (OpReg tmp2) (OpReg dst)
3991 return (Any I32 code)
3995 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3997 #if sparc_TARGET_ARCH
3999 condIntReg EQQ x (CmmLit (CmmInt 0 d)) = do
4000 (src, code) <- getSomeReg x
4001 tmp <- getNewRegNat I32
4003 code__2 dst = code `appOL` toOL [
4004 SUB False True g0 (RIReg src) g0,
4005 SUB True False g0 (RIImm (ImmInt (-1))) dst]
4006 return (Any I32 code__2)
4008 condIntReg EQQ x y = do
4009 (src1, code1) <- getSomeReg x
4010 (src2, code2) <- getSomeReg y
4011 tmp1 <- getNewRegNat I32
4012 tmp2 <- getNewRegNat I32
4014 code__2 dst = code1 `appOL` code2 `appOL` toOL [
4015 XOR False src1 (RIReg src2) dst,
4016 SUB False True g0 (RIReg dst) g0,
4017 SUB True False g0 (RIImm (ImmInt (-1))) dst]
4018 return (Any I32 code__2)
4020 condIntReg NE x (CmmLit (CmmInt 0 d)) = do
4021 (src, code) <- getSomeReg x
4022 tmp <- getNewRegNat I32
4024 code__2 dst = code `appOL` toOL [
4025 SUB False True g0 (RIReg src) g0,
4026 ADD True False g0 (RIImm (ImmInt 0)) dst]
4027 return (Any I32 code__2)
4029 condIntReg NE x y = do
4030 (src1, code1) <- getSomeReg x
4031 (src2, code2) <- getSomeReg y
4032 tmp1 <- getNewRegNat I32
4033 tmp2 <- getNewRegNat I32
4035 code__2 dst = code1 `appOL` code2 `appOL` toOL [
4036 XOR False src1 (RIReg src2) dst,
4037 SUB False True g0 (RIReg dst) g0,
4038 ADD True False g0 (RIImm (ImmInt 0)) dst]
4039 return (Any I32 code__2)
4041 condIntReg cond x y = do
4042 BlockId lbl1 <- getBlockIdNat
4043 BlockId lbl2 <- getBlockIdNat
4044 CondCode _ cond cond_code <- condIntCode cond x y
4046 code__2 dst = cond_code `appOL` toOL [
4047 BI cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
4048 OR False g0 (RIImm (ImmInt 0)) dst,
4049 BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
4050 NEWBLOCK (BlockId lbl1),
4051 OR False g0 (RIImm (ImmInt 1)) dst,
4052 NEWBLOCK (BlockId lbl2)]
4053 return (Any I32 code__2)
4055 condFltReg cond x y = do
4056 BlockId lbl1 <- getBlockIdNat
4057 BlockId lbl2 <- getBlockIdNat
4058 CondCode _ cond cond_code <- condFltCode cond x y
4060 code__2 dst = cond_code `appOL` toOL [
4062 BF cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
4063 OR False g0 (RIImm (ImmInt 0)) dst,
4064 BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
4065 NEWBLOCK (BlockId lbl1),
4066 OR False g0 (RIImm (ImmInt 1)) dst,
4067 NEWBLOCK (BlockId lbl2)]
4068 return (Any I32 code__2)
4070 #endif /* sparc_TARGET_ARCH */
4072 #if powerpc_TARGET_ARCH
4073 condReg getCond = do
4074 lbl1 <- getBlockIdNat
4075 lbl2 <- getBlockIdNat
4076 CondCode _ cond cond_code <- getCond
4078 {- code dst = cond_code `appOL` toOL [
4087 code dst = cond_code
4091 RLWINM dst dst (bit + 1) 31 31
4094 negate_code | do_negate = unitOL (CRNOR bit bit bit)
4097 (bit, do_negate) = case cond of
4111 return (Any I32 code)
4113 condIntReg cond x y = condReg (condIntCode cond x y)
4114 condFltReg cond x y = condReg (condFltCode cond x y)
4115 #endif /* powerpc_TARGET_ARCH */
4118 -- -----------------------------------------------------------------------------
4119 -- 'trivial*Code': deal with trivial instructions
4121 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
4122 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
4123 -- Only look for constants on the right hand side, because that's
4124 -- where the generic optimizer will have put them.
4126 -- Similarly, for unary instructions, we don't have to worry about
4127 -- matching an StInt as the argument, because genericOpt will already
4128 -- have handled the constant-folding.
4132 -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
4133 ,IF_ARCH_i386 ((Operand -> Operand -> Instr)
4134 -> Maybe (Operand -> Operand -> Instr)
4135 ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr)
4136 -> Maybe (Operand -> Operand -> Instr)
4137 ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
4138 ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
4140 -> CmmExpr -> CmmExpr -- the two arguments
4143 #ifndef powerpc_TARGET_ARCH
4146 -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
4147 ,IF_ARCH_sparc((MachRep -> Reg -> Reg -> Reg -> Instr)
4148 ,IF_ARCH_i386 ((MachRep -> Reg -> Reg -> Reg -> Instr)
4149 ,IF_ARCH_x86_64 ((MachRep -> Operand -> Operand -> Instr)
4151 -> CmmExpr -> CmmExpr -- the two arguments
4157 -> IF_ARCH_alpha((RI -> Reg -> Instr)
4158 ,IF_ARCH_i386 ((Operand -> Instr)
4159 ,IF_ARCH_x86_64 ((Operand -> Instr)
4160 ,IF_ARCH_sparc((RI -> Reg -> Instr)
4161 ,IF_ARCH_powerpc((Reg -> Reg -> Instr)
4163 -> CmmExpr -- the one argument
4166 #ifndef powerpc_TARGET_ARCH
4169 -> IF_ARCH_alpha((Reg -> Reg -> Instr)
4170 ,IF_ARCH_i386 ((Reg -> Reg -> Instr)
4171 ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr)
4172 ,IF_ARCH_sparc((Reg -> Reg -> Instr)
4174 -> CmmExpr -- the one argument
4178 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4180 #if alpha_TARGET_ARCH
4182 trivialCode instr x (StInt y)
4184 = getRegister x `thenNat` \ register ->
4185 getNewRegNat IntRep `thenNat` \ tmp ->
4187 code = registerCode register tmp
4188 src1 = registerName register tmp
4189 src2 = ImmInt (fromInteger y)
4190 code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst)
4192 return (Any IntRep code__2)
4194 trivialCode instr x y
4195 = getRegister x `thenNat` \ register1 ->
4196 getRegister y `thenNat` \ register2 ->
4197 getNewRegNat IntRep `thenNat` \ tmp1 ->
4198 getNewRegNat IntRep `thenNat` \ tmp2 ->
4200 code1 = registerCode register1 tmp1 []
4201 src1 = registerName register1 tmp1
4202 code2 = registerCode register2 tmp2 []
4203 src2 = registerName register2 tmp2
4204 code__2 dst = asmSeqThen [code1, code2] .
4205 mkSeqInstr (instr src1 (RIReg src2) dst)
4207 return (Any IntRep code__2)
4210 trivialUCode instr x
4211 = getRegister x `thenNat` \ register ->
4212 getNewRegNat IntRep `thenNat` \ tmp ->
4214 code = registerCode register tmp
4215 src = registerName register tmp
4216 code__2 dst = code . mkSeqInstr (instr (RIReg src) dst)
4218 return (Any IntRep code__2)
4221 trivialFCode _ instr x y
4222 = getRegister x `thenNat` \ register1 ->
4223 getRegister y `thenNat` \ register2 ->
4224 getNewRegNat F64 `thenNat` \ tmp1 ->
4225 getNewRegNat F64 `thenNat` \ tmp2 ->
4227 code1 = registerCode register1 tmp1
4228 src1 = registerName register1 tmp1
4230 code2 = registerCode register2 tmp2
4231 src2 = registerName register2 tmp2
4233 code__2 dst = asmSeqThen [code1 [], code2 []] .
4234 mkSeqInstr (instr src1 src2 dst)
4236 return (Any F64 code__2)
4238 trivialUFCode _ instr x
4239 = getRegister x `thenNat` \ register ->
4240 getNewRegNat F64 `thenNat` \ tmp ->
4242 code = registerCode register tmp
4243 src = registerName register tmp
4244 code__2 dst = code . mkSeqInstr (instr src dst)
4246 return (Any F64 code__2)
4248 #endif /* alpha_TARGET_ARCH */
4250 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4252 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4255 The Rules of the Game are:
4257 * You cannot assume anything about the destination register dst;
4258 it may be anything, including a fixed reg.
4260 * You may compute an operand into a fixed reg, but you may not
4261 subsequently change the contents of that fixed reg. If you
4262 want to do so, first copy the value either to a temporary
4263 or into dst. You are free to modify dst even if it happens
4264 to be a fixed reg -- that's not your problem.
4266 * You cannot assume that a fixed reg will stay live over an
4267 arbitrary computation. The same applies to the dst reg.
4269 * Temporary regs obtained from getNewRegNat are distinct from
4270 each other and from all other regs, and stay live over
4271 arbitrary computations.
4273 --------------------
4275 SDM's version of The Rules:
4277 * If getRegister returns Any, that means it can generate correct
4278 code which places the result in any register, period. Even if that
4279 register happens to be read during the computation.
4281 Corollary #1: this means that if you are generating code for an
4282 operation with two arbitrary operands, you cannot assign the result
4283 of the first operand into the destination register before computing
4284 the second operand. The second operand might require the old value
4285 of the destination register.
4287 Corollary #2: A function might be able to generate more efficient
4288 code if it knows the destination register is a new temporary (and
4289 therefore not read by any of the sub-computations).
4291 * If getRegister returns Any, then the code it generates may modify only:
4292 (a) fresh temporaries
4293 (b) the destination register
4294 (c) known registers (eg. %ecx is used by shifts)
4295 In particular, it may *not* modify global registers, unless the global
4296 register happens to be the destination register.
4299 trivialCode rep instr (Just revinstr) (CmmLit lit_a) b
4300 | not (is64BitLit lit_a) = do
4301 b_code <- getAnyReg b
4304 = b_code dst `snocOL`
4305 revinstr (OpImm (litToImm lit_a)) (OpReg dst)
4307 return (Any rep code)
4309 trivialCode rep instr maybe_revinstr a b = genTrivialCode rep instr a b
4311 -- This is re-used for floating pt instructions too.
4312 genTrivialCode rep instr a b = do
4313 (b_op, b_code) <- getNonClobberedOperand b
4314 a_code <- getAnyReg a
4315 tmp <- getNewRegNat rep
4317 -- We want the value of b to stay alive across the computation of a.
4318 -- But, we want to calculate a straight into the destination register,
4319 -- because the instruction only has two operands (dst := dst `op` src).
4320 -- The troublesome case is when the result of b is in the same register
4321 -- as the destination reg. In this case, we have to save b in a
4322 -- new temporary across the computation of a.
4324 | dst `regClashesWithOp` b_op =
4326 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
4328 instr (OpReg tmp) (OpReg dst)
4332 instr b_op (OpReg dst)
4334 return (Any rep code)
4336 reg `regClashesWithOp` OpReg reg2 = reg == reg2
4337 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
4338 reg `regClashesWithOp` _ = False
4342 trivialUCode rep instr x = do
4343 x_code <- getAnyReg x
4349 return (Any rep code)
4353 #if i386_TARGET_ARCH
4355 trivialFCode pk instr x y = do
4356 (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
4357 (y_reg, y_code) <- getSomeReg y
4362 instr pk x_reg y_reg dst
4364 return (Any pk code)
4368 #if x86_64_TARGET_ARCH
4370 trivialFCode pk instr x y = genTrivialCode pk (instr pk) x y
4376 trivialUFCode rep instr x = do
4377 (x_reg, x_code) <- getSomeReg x
4383 return (Any rep code)
4385 #endif /* i386_TARGET_ARCH */
4387 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4389 #if sparc_TARGET_ARCH
4391 trivialCode pk instr x (CmmLit (CmmInt y d))
4394 (src1, code) <- getSomeReg x
4395 tmp <- getNewRegNat I32
4397 src2 = ImmInt (fromInteger y)
4398 code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
4399 return (Any I32 code__2)
4401 trivialCode pk instr x y = do
4402 (src1, code1) <- getSomeReg x
4403 (src2, code2) <- getSomeReg y
4404 tmp1 <- getNewRegNat I32
4405 tmp2 <- getNewRegNat I32
4407 code__2 dst = code1 `appOL` code2 `snocOL`
4408 instr src1 (RIReg src2) dst
4409 return (Any I32 code__2)
4412 trivialFCode pk instr x y = do
4413 (src1, code1) <- getSomeReg x
4414 (src2, code2) <- getSomeReg y
4415 tmp1 <- getNewRegNat (cmmExprRep x)
4416 tmp2 <- getNewRegNat (cmmExprRep y)
4417 tmp <- getNewRegNat F64
4419 promote x = FxTOy F32 F64 x tmp
4426 code1 `appOL` code2 `snocOL`
4427 instr pk src1 src2 dst
4428 else if pk1 == F32 then
4429 code1 `snocOL` promote src1 `appOL` code2 `snocOL`
4430 instr F64 tmp src2 dst
4432 code1 `appOL` code2 `snocOL` promote src2 `snocOL`
4433 instr F64 src1 tmp dst
4434 return (Any (if pk1 == pk2 then pk1 else F64) code__2)
4437 trivialUCode pk instr x = do
4438 (src, code) <- getSomeReg x
4439 tmp <- getNewRegNat pk
4441 code__2 dst = code `snocOL` instr (RIReg src) dst
4442 return (Any pk code__2)
4445 trivialUFCode pk instr x = do
4446 (src, code) <- getSomeReg x
4447 tmp <- getNewRegNat pk
4449 code__2 dst = code `snocOL` instr src dst
4450 return (Any pk code__2)
4452 #endif /* sparc_TARGET_ARCH */
4454 #if powerpc_TARGET_ARCH
4457 Wolfgang's PowerPC version of The Rules:
4459 A slightly modified version of The Rules to take advantage of the fact
4460 that PowerPC instructions work on all registers and don't implicitly
4461 clobber any fixed registers.
4463 * The only expression for which getRegister returns Fixed is (CmmReg reg).
4465 * If getRegister returns Any, then the code it generates may modify only:
4466 (a) fresh temporaries
4467 (b) the destination register
4468 It may *not* modify global registers, unless the global
4469 register happens to be the destination register.
4470 It may not clobber any other registers. In fact, only ccalls clobber any
4472 Also, it may not modify the counter register (used by genCCall).
4474 Corollary: If a getRegister for a subexpression returns Fixed, you need
4475 not move it to a fresh temporary before evaluating the next subexpression.
4476 The Fixed register won't be modified.
4477 Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
4479 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
4480 the value of the destination register.
4483 trivialCode rep signed instr x (CmmLit (CmmInt y _))
4484 | Just imm <- makeImmediate rep signed y
4486 (src1, code1) <- getSomeReg x
4487 let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
4488 return (Any rep code)
4490 trivialCode rep signed instr x y = do
4491 (src1, code1) <- getSomeReg x
4492 (src2, code2) <- getSomeReg y
4493 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
4494 return (Any rep code)
4496 trivialCodeNoImm :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4497 -> CmmExpr -> CmmExpr -> NatM Register
4498 trivialCodeNoImm rep instr x y = do
4499 (src1, code1) <- getSomeReg x
4500 (src2, code2) <- getSomeReg y
4501 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
4502 return (Any rep code)
4504 trivialUCode rep instr x = do
4505 (src, code) <- getSomeReg x
4506 let code' dst = code `snocOL` instr dst src
4507 return (Any rep code')
4509 -- There is no "remainder" instruction on the PPC, so we have to do
4511 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
4513 remainderCode :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4514 -> CmmExpr -> CmmExpr -> NatM Register
4515 remainderCode rep div x y = do
4516 (src1, code1) <- getSomeReg x
4517 (src2, code2) <- getSomeReg y
4518 let code dst = code1 `appOL` code2 `appOL` toOL [
4520 MULLW dst dst (RIReg src2),
4523 return (Any rep code)
4525 #endif /* powerpc_TARGET_ARCH */
4528 -- -----------------------------------------------------------------------------
4529 -- Coercing to/from integer/floating-point...
4531 -- @coerce(Int2FP|FP2Int)@ are more complicated integer/float
4532 -- conversions. We have to store temporaries in memory to move
4533 -- between the integer and the floating point register sets.
4535 -- @coerceDbl2Flt@ and @coerceFlt2Dbl@ are done this way because we
4536 -- pretend, on sparc at least, that double and float regs are seperate
4537 -- kinds, so the value has to be computed into one kind before being
4538 -- explicitly "converted" to live in the other kind.
4540 coerceInt2FP :: MachRep -> MachRep -> CmmExpr -> NatM Register
4541 coerceFP2Int :: MachRep -> MachRep -> CmmExpr -> NatM Register
4543 #if sparc_TARGET_ARCH
4544 coerceDbl2Flt :: CmmExpr -> NatM Register
4545 coerceFlt2Dbl :: CmmExpr -> NatM Register
4548 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4550 #if alpha_TARGET_ARCH
4553 = getRegister x `thenNat` \ register ->
4554 getNewRegNat IntRep `thenNat` \ reg ->
4556 code = registerCode register reg
4557 src = registerName register reg
4559 code__2 dst = code . mkSeqInstrs [
4561 LD TF dst (spRel 0),
4564 return (Any F64 code__2)
4568 = getRegister x `thenNat` \ register ->
4569 getNewRegNat F64 `thenNat` \ tmp ->
4571 code = registerCode register tmp
4572 src = registerName register tmp
4574 code__2 dst = code . mkSeqInstrs [
4576 ST TF tmp (spRel 0),
4579 return (Any IntRep code__2)
4581 #endif /* alpha_TARGET_ARCH */
4583 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4585 #if i386_TARGET_ARCH
4587 coerceInt2FP from to x = do
4588 (x_reg, x_code) <- getSomeReg x
4590 opc = case to of F32 -> GITOF; F64 -> GITOD
4591 code dst = x_code `snocOL` opc x_reg dst
4592 -- ToDo: works for non-I32 reps?
4594 return (Any to code)
4598 coerceFP2Int from to x = do
4599 (x_reg, x_code) <- getSomeReg x
4601 opc = case from of F32 -> GFTOI; F64 -> GDTOI
4602 code dst = x_code `snocOL` opc x_reg dst
4603 -- ToDo: works for non-I32 reps?
4605 return (Any to code)
4607 #endif /* i386_TARGET_ARCH */
4609 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4611 #if x86_64_TARGET_ARCH
4613 coerceFP2Int from to x = do
4614 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
4616 opc = case from of F32 -> CVTSS2SI; F64 -> CVTSD2SI
4617 code dst = x_code `snocOL` opc x_op dst
4619 return (Any to code) -- works even if the destination rep is <I32
4621 coerceInt2FP from to x = do
4622 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
4624 opc = case to of F32 -> CVTSI2SS; F64 -> CVTSI2SD
4625 code dst = x_code `snocOL` opc x_op dst
4627 return (Any to code) -- works even if the destination rep is <I32
4629 coerceFP2FP :: MachRep -> CmmExpr -> NatM Register
4630 coerceFP2FP to x = do
4631 (x_reg, x_code) <- getSomeReg x
4633 opc = case to of F32 -> CVTSD2SS; F64 -> CVTSS2SD
4634 code dst = x_code `snocOL` opc x_reg dst
4636 return (Any to code)
4640 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4642 #if sparc_TARGET_ARCH
4644 coerceInt2FP pk1 pk2 x = do
4645 (src, code) <- getSomeReg x
4647 code__2 dst = code `appOL` toOL [
4648 ST pk1 src (spRel (-2)),
4649 LD pk1 (spRel (-2)) dst,
4650 FxTOy pk1 pk2 dst dst]
4651 return (Any pk2 code__2)
4654 coerceFP2Int pk fprep x = do
4655 (src, code) <- getSomeReg x
4656 reg <- getNewRegNat fprep
4657 tmp <- getNewRegNat pk
4659 code__2 dst = ASSERT(fprep == F64 || fprep == F32)
4661 FxTOy fprep pk src tmp,
4662 ST pk tmp (spRel (-2)),
4663 LD pk (spRel (-2)) dst]
4664 return (Any pk code__2)
4667 coerceDbl2Flt x = do
4668 (src, code) <- getSomeReg x
4669 return (Any F32 (\dst -> code `snocOL` FxTOy F64 F32 src dst))
4672 coerceFlt2Dbl x = do
4673 (src, code) <- getSomeReg x
4674 return (Any F64 (\dst -> code `snocOL` FxTOy F32 F64 src dst))
4676 #endif /* sparc_TARGET_ARCH */
4678 #if powerpc_TARGET_ARCH
4679 coerceInt2FP fromRep toRep x = do
4680 (src, code) <- getSomeReg x
4681 lbl <- getNewLabelNat
4682 itmp <- getNewRegNat I32
4683 ftmp <- getNewRegNat F64
4684 dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
4685 Amode addr addr_code <- getAmode dynRef
4687 code' dst = code `appOL` maybe_exts `appOL` toOL [
4690 CmmStaticLit (CmmInt 0x43300000 I32),
4691 CmmStaticLit (CmmInt 0x80000000 I32)],
4692 XORIS itmp src (ImmInt 0x8000),
4693 ST I32 itmp (spRel 3),
4694 LIS itmp (ImmInt 0x4330),
4695 ST I32 itmp (spRel 2),
4696 LD F64 ftmp (spRel 2)
4697 ] `appOL` addr_code `appOL` toOL [
4699 FSUB F64 dst ftmp dst
4700 ] `appOL` maybe_frsp dst
4702 maybe_exts = case fromRep of
4703 I8 -> unitOL $ EXTS I8 src src
4704 I16 -> unitOL $ EXTS I16 src src
4706 maybe_frsp dst = case toRep of
4707 F32 -> unitOL $ FRSP dst dst
4709 return (Any toRep code')
4711 coerceFP2Int fromRep toRep x = do
4712 -- the reps don't really matter: F*->F64 and I32->I* are no-ops
4713 (src, code) <- getSomeReg x
4714 tmp <- getNewRegNat F64
4716 code' dst = code `appOL` toOL [
4717 -- convert to int in FP reg
4719 -- store value (64bit) from FP to stack
4720 ST F64 tmp (spRel 2),
4721 -- read low word of value (high word is undefined)
4722 LD I32 dst (spRel 3)]
4723 return (Any toRep code')
4724 #endif /* powerpc_TARGET_ARCH */
4727 -- -----------------------------------------------------------------------------
4728 -- eXTRA_STK_ARGS_HERE
4730 -- We (allegedly) put the first six C-call arguments in registers;
4731 -- where do we start putting the rest of them?
4733 -- Moved from MachInstrs (SDM):
4735 #if alpha_TARGET_ARCH || sparc_TARGET_ARCH
4736 eXTRA_STK_ARGS_HERE :: Int
4738 = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???))