1 -----------------------------------------------------------------------------
3 -- Generating machine code (instruction selection)
5 -- (c) The University of Glasgow 1996-2004
7 -----------------------------------------------------------------------------
9 -- This is a big module, but, if you pay attention to
10 -- (a) the sectioning, (b) the type signatures, and
11 -- (c) the #if blah_TARGET_ARCH} things, the
12 -- structure should not be too overwhelming.
14 module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where
16 #include "HsVersions.h"
17 #include "nativeGen/NCG.h"
24 import PositionIndependentCode
25 import RegAllocInfo ( mkBranchInstr )
27 -- Our intermediate code:
28 import PprCmm ( pprExpr )
34 import StaticFlags ( opt_PIC )
35 import ForeignCall ( CCallConv(..) )
40 import FastTypes ( isFastTrue )
41 import Constants ( wORD_SIZE )
44 import Outputable ( assertPanic )
45 import Debug.Trace ( trace )
48 import Control.Monad ( mapAndUnzipM )
49 import Data.Maybe ( fromJust )
54 -- -----------------------------------------------------------------------------
55 -- Top-level of the instruction selector
57 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
58 -- They are really trees of insns to facilitate fast appending, where a
59 -- left-to-right traversal (pre-order?) yields the insns in the correct
62 type InstrBlock = OrdList Instr
64 cmmTopCodeGen :: CmmTop -> NatM [NatCmmTop]
65 cmmTopCodeGen (CmmProc info lab params blocks) = do
66 (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
67 picBaseMb <- getPicBaseMaybeNat
68 let proc = CmmProc info lab params (concat nat_blocks)
69 tops = proc : concat statics
71 Just picBase -> initializePicBase picBase tops
72 Nothing -> return tops
74 cmmTopCodeGen (CmmData sec dat) = do
75 return [CmmData sec dat] -- no translation, we just use CmmStatic
77 basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop])
78 basicBlockCodeGen (BasicBlock id stmts) = do
79 instrs <- stmtsToInstrs stmts
80 -- code generation may introduce new basic block boundaries, which
81 -- are indicated by the NEWBLOCK instruction. We must split up the
82 -- instruction stream into basic blocks again. Also, we extract
85 (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
87 mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
88 = ([], BasicBlock id instrs : blocks, statics)
89 mkBlocks (LDATA sec dat) (instrs,blocks,statics)
90 = (instrs, blocks, CmmData sec dat:statics)
91 mkBlocks instr (instrs,blocks,statics)
92 = (instr:instrs, blocks, statics)
94 return (BasicBlock id top : other_blocks, statics)
96 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
98 = do instrss <- mapM stmtToInstrs stmts
99 return (concatOL instrss)
101 stmtToInstrs :: CmmStmt -> NatM InstrBlock
102 stmtToInstrs stmt = case stmt of
103 CmmNop -> return nilOL
104 CmmComment s -> return (unitOL (COMMENT s))
107 | isFloatingRep kind -> assignReg_FltCode kind reg src
108 #if WORD_SIZE_IN_BITS==32
109 | kind == I64 -> assignReg_I64Code reg src
111 | otherwise -> assignReg_IntCode kind reg src
112 where kind = cmmRegRep reg
115 | isFloatingRep kind -> assignMem_FltCode kind addr src
116 #if WORD_SIZE_IN_BITS==32
117 | kind == I64 -> assignMem_I64Code addr src
119 | otherwise -> assignMem_IntCode kind addr src
120 where kind = cmmExprRep src
122 CmmCall target result_regs args vols
123 -> genCCall target result_regs args vols
125 CmmBranch id -> genBranch id
126 CmmCondBranch arg id -> genCondJump id arg
127 CmmSwitch arg ids -> genSwitch arg ids
128 CmmJump arg params -> genJump arg
130 -- -----------------------------------------------------------------------------
131 -- General things for putting together code sequences
133 -- Expand CmmRegOff. ToDo: should we do it this way around, or convert
134 -- CmmExprs into CmmRegOff?
135 mangleIndexTree :: CmmExpr -> CmmExpr
136 mangleIndexTree (CmmRegOff reg off)
137 = CmmMachOp (MO_Add rep) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) rep)]
138 where rep = cmmRegRep reg
140 -- -----------------------------------------------------------------------------
141 -- Code gen for 64-bit arithmetic on 32-bit platforms
144 Simple support for generating 64-bit code (ie, 64 bit values and 64
145 bit assignments) on 32-bit platforms. Unlike the main code generator
146 we merely shoot for generating working code as simply as possible, and
147 pay little attention to code quality. Specifically, there is no
148 attempt to deal cleverly with the fixed-vs-floating register
149 distinction; all values are generated into (pairs of) floating
150 registers, even if this would mean some redundant reg-reg moves as a
151 result. Only one of the VRegUniques is returned, since it will be
152 of the VRegUniqueLo form, and the upper-half VReg can be determined
153 by applying getHiVRegFromLo to it.
156 data ChildCode64 -- a.k.a "Register64"
159 Reg -- the lower 32-bit temporary which contains the
160 -- result; use getHiVRegFromLo to find the other
161 -- VRegUnique. Rules of this simplified insn
162 -- selection game are therefore that the returned
163 -- Reg may be modified
165 #if WORD_SIZE_IN_BITS==32
166 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
167 assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
170 #ifndef x86_64_TARGET_ARCH
171 iselExpr64 :: CmmExpr -> NatM ChildCode64
174 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
178 assignMem_I64Code addrTree valueTree = do
179 Amode addr addr_code <- getAmode addrTree
180 ChildCode64 vcode rlo <- iselExpr64 valueTree
182 rhi = getHiVRegFromLo rlo
184 -- Little-endian store
185 mov_lo = MOV I32 (OpReg rlo) (OpAddr addr)
186 mov_hi = MOV I32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
188 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
191 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
192 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
194 r_dst_lo = mkVReg u_dst I32
195 r_dst_hi = getHiVRegFromLo r_dst_lo
196 r_src_hi = getHiVRegFromLo r_src_lo
197 mov_lo = MOV I32 (OpReg r_src_lo) (OpReg r_dst_lo)
198 mov_hi = MOV I32 (OpReg r_src_hi) (OpReg r_dst_hi)
201 vcode `snocOL` mov_lo `snocOL` mov_hi
204 assignReg_I64Code lvalue valueTree
205 = panic "assignReg_I64Code(i386): invalid lvalue"
209 iselExpr64 (CmmLit (CmmInt i _)) = do
210 (rlo,rhi) <- getNewRegPairNat I32
212 r = fromIntegral (fromIntegral i :: Word32)
213 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
215 MOV I32 (OpImm (ImmInteger r)) (OpReg rlo),
216 MOV I32 (OpImm (ImmInteger q)) (OpReg rhi)
219 return (ChildCode64 code rlo)
221 iselExpr64 (CmmLoad addrTree I64) = do
222 Amode addr addr_code <- getAmode addrTree
223 (rlo,rhi) <- getNewRegPairNat I32
225 mov_lo = MOV I32 (OpAddr addr) (OpReg rlo)
226 mov_hi = MOV I32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
229 ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
233 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64)))
234 = return (ChildCode64 nilOL (mkVReg vu I32))
236 -- we handle addition, but rather badly
237 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
238 ChildCode64 code1 r1lo <- iselExpr64 e1
239 (rlo,rhi) <- getNewRegPairNat I32
241 r = fromIntegral (fromIntegral i :: Word32)
242 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
243 r1hi = getHiVRegFromLo r1lo
245 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
246 ADD I32 (OpImm (ImmInteger r)) (OpReg rlo),
247 MOV I32 (OpReg r1hi) (OpReg rhi),
248 ADC I32 (OpImm (ImmInteger q)) (OpReg rhi) ]
250 return (ChildCode64 code rlo)
252 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
253 ChildCode64 code1 r1lo <- iselExpr64 e1
254 ChildCode64 code2 r2lo <- iselExpr64 e2
255 (rlo,rhi) <- getNewRegPairNat I32
257 r1hi = getHiVRegFromLo r1lo
258 r2hi = getHiVRegFromLo r2lo
261 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
262 ADD I32 (OpReg r2lo) (OpReg rlo),
263 MOV I32 (OpReg r1hi) (OpReg rhi),
264 ADC I32 (OpReg r2hi) (OpReg rhi) ]
266 return (ChildCode64 code rlo)
269 = pprPanic "iselExpr64(i386)" (ppr expr)
271 #endif /* i386_TARGET_ARCH */
273 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
275 #if sparc_TARGET_ARCH
277 assignMem_I64Code addrTree valueTree = do
278 Amode addr addr_code <- getAmode addrTree
279 ChildCode64 vcode rlo <- iselExpr64 valueTree
280 (src, code) <- getSomeReg addrTree
282 rhi = getHiVRegFromLo rlo
284 mov_hi = ST I32 rhi (AddrRegImm src (ImmInt 0))
285 mov_lo = ST I32 rlo (AddrRegImm src (ImmInt 4))
286 return (vcode `appOL` code `snocOL` mov_hi `snocOL` mov_lo)
288 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
289 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
291 r_dst_lo = mkVReg u_dst pk
292 r_dst_hi = getHiVRegFromLo r_dst_lo
293 r_src_hi = getHiVRegFromLo r_src_lo
294 mov_lo = mkMOV r_src_lo r_dst_lo
295 mov_hi = mkMOV r_src_hi r_dst_hi
296 mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
297 return (vcode `snocOL` mov_hi `snocOL` mov_lo)
298 assignReg_I64Code lvalue valueTree
299 = panic "assignReg_I64Code(sparc): invalid lvalue"
302 -- Don't delete this -- it's very handy for debugging.
304 -- | trace ("iselExpr64: " ++ showSDoc (ppr expr)) False
305 -- = panic "iselExpr64(???)"
307 iselExpr64 (CmmLoad addrTree I64) = do
308 Amode (AddrRegReg r1 r2) addr_code <- getAmode addrTree
309 rlo <- getNewRegNat I32
310 let rhi = getHiVRegFromLo rlo
311 mov_hi = LD I32 (AddrRegImm r1 (ImmInt 0)) rhi
312 mov_lo = LD I32 (AddrRegImm r1 (ImmInt 4)) rlo
314 ChildCode64 (addr_code `snocOL` mov_hi `snocOL` mov_lo)
318 iselExpr64 (CmmReg (CmmLocal (LocalReg uq I64))) = do
319 r_dst_lo <- getNewRegNat I32
320 let r_dst_hi = getHiVRegFromLo r_dst_lo
321 r_src_lo = mkVReg uq I32
322 r_src_hi = getHiVRegFromLo r_src_lo
323 mov_lo = mkMOV r_src_lo r_dst_lo
324 mov_hi = mkMOV r_src_hi r_dst_hi
325 mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
327 ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo
331 = pprPanic "iselExpr64(sparc)" (ppr expr)
333 #endif /* sparc_TARGET_ARCH */
335 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
337 #if powerpc_TARGET_ARCH
339 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
340 getI64Amodes addrTree = do
341 Amode hi_addr addr_code <- getAmode addrTree
342 case addrOffset hi_addr 4 of
343 Just lo_addr -> return (hi_addr, lo_addr, addr_code)
344 Nothing -> do (hi_ptr, code) <- getSomeReg addrTree
345 return (AddrRegImm hi_ptr (ImmInt 0),
346 AddrRegImm hi_ptr (ImmInt 4),
349 assignMem_I64Code addrTree valueTree = do
350 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
351 ChildCode64 vcode rlo <- iselExpr64 valueTree
353 rhi = getHiVRegFromLo rlo
356 mov_hi = ST I32 rhi hi_addr
357 mov_lo = ST I32 rlo lo_addr
359 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
361 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
362 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
364 r_dst_lo = mkVReg u_dst I32
365 r_dst_hi = getHiVRegFromLo r_dst_lo
366 r_src_hi = getHiVRegFromLo r_src_lo
367 mov_lo = MR r_dst_lo r_src_lo
368 mov_hi = MR r_dst_hi r_src_hi
371 vcode `snocOL` mov_lo `snocOL` mov_hi
374 assignReg_I64Code lvalue valueTree
375 = panic "assignReg_I64Code(powerpc): invalid lvalue"
378 -- Don't delete this -- it's very handy for debugging.
380 -- | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
381 -- = panic "iselExpr64(???)"
383 iselExpr64 (CmmLoad addrTree I64) = do
384 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
385 (rlo, rhi) <- getNewRegPairNat I32
386 let mov_hi = LD I32 rhi hi_addr
387 mov_lo = LD I32 rlo lo_addr
388 return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
391 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64)))
392 = return (ChildCode64 nilOL (mkVReg vu I32))
394 iselExpr64 (CmmLit (CmmInt i _)) = do
395 (rlo,rhi) <- getNewRegPairNat I32
397 half0 = fromIntegral (fromIntegral i :: Word16)
398 half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
399 half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
400 half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
403 LIS rlo (ImmInt half1),
404 OR rlo rlo (RIImm $ ImmInt half0),
405 LIS rhi (ImmInt half3),
406 OR rlo rlo (RIImm $ ImmInt half2)
409 return (ChildCode64 code rlo)
411 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
412 ChildCode64 code1 r1lo <- iselExpr64 e1
413 ChildCode64 code2 r2lo <- iselExpr64 e2
414 (rlo,rhi) <- getNewRegPairNat I32
416 r1hi = getHiVRegFromLo r1lo
417 r2hi = getHiVRegFromLo r2lo
420 toOL [ ADDC rlo r1lo r2lo,
423 return (ChildCode64 code rlo)
426 = pprPanic "iselExpr64(powerpc)" (ppr expr)
428 #endif /* powerpc_TARGET_ARCH */
431 -- -----------------------------------------------------------------------------
432 -- The 'Register' type
434 -- 'Register's passed up the tree. If the stix code forces the register
435 -- to live in a pre-decided machine register, it comes out as @Fixed@;
436 -- otherwise, it comes out as @Any@, and the parent can decide which
437 -- register to put it in.
440 = Fixed MachRep Reg InstrBlock
441 | Any MachRep (Reg -> InstrBlock)
443 swizzleRegisterRep :: Register -> MachRep -> Register
444 swizzleRegisterRep (Fixed _ reg code) rep = Fixed rep reg code
445 swizzleRegisterRep (Any _ codefn) rep = Any rep codefn
448 -- -----------------------------------------------------------------------------
449 -- Utils based on getRegister, below
451 -- The dual to getAnyReg: compute an expression into a register, but
452 -- we don't mind which one it is.
453 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
455 r <- getRegister expr
458 tmp <- getNewRegNat rep
459 return (tmp, code tmp)
463 -- -----------------------------------------------------------------------------
464 -- Grab the Reg for a CmmReg
466 getRegisterReg :: CmmReg -> Reg
468 getRegisterReg (CmmLocal (LocalReg u pk))
471 getRegisterReg (CmmGlobal mid)
472 = case get_GlobalReg_reg_or_addr mid of
473 Left (RealReg rrno) -> RealReg rrno
474 _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
475 -- By this stage, the only MagicIds remaining should be the
476 -- ones which map to a real machine register on this
477 -- platform. Hence ...
480 -- -----------------------------------------------------------------------------
481 -- Generate code to get a subtree into a Register
483 -- Don't delete this -- it's very handy for debugging.
485 -- | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False
486 -- = panic "getRegister(???)"
488 getRegister :: CmmExpr -> NatM Register
490 #if !x86_64_TARGET_ARCH
491 -- on x86_64, we have %rip for PicBaseReg, but it's not a full-featured
492 -- register, it can only be used for rip-relative addressing.
493 getRegister (CmmReg (CmmGlobal PicBaseReg))
495 reg <- getPicBaseNat wordRep
496 return (Fixed wordRep reg nilOL)
499 getRegister (CmmReg reg)
500 = return (Fixed (cmmRegRep reg) (getRegisterReg reg) nilOL)
502 getRegister tree@(CmmRegOff _ _)
503 = getRegister (mangleIndexTree tree)
506 #if WORD_SIZE_IN_BITS==32
507 -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
508 -- TO_W_(x), TO_W_(x >> 32)
510 getRegister (CmmMachOp (MO_U_Conv I64 I32)
511 [CmmMachOp (MO_U_Shr I64) [x,CmmLit (CmmInt 32 _)]]) = do
512 ChildCode64 code rlo <- iselExpr64 x
513 return $ Fixed I32 (getHiVRegFromLo rlo) code
515 getRegister (CmmMachOp (MO_S_Conv I64 I32)
516 [CmmMachOp (MO_U_Shr I64) [x,CmmLit (CmmInt 32 _)]]) = do
517 ChildCode64 code rlo <- iselExpr64 x
518 return $ Fixed I32 (getHiVRegFromLo rlo) code
520 getRegister (CmmMachOp (MO_U_Conv I64 I32) [x]) = do
521 ChildCode64 code rlo <- iselExpr64 x
522 return $ Fixed I32 rlo code
524 getRegister (CmmMachOp (MO_S_Conv I64 I32) [x]) = do
525 ChildCode64 code rlo <- iselExpr64 x
526 return $ Fixed I32 rlo code
530 -- end of machine-"independent" bit; here we go on the rest...
532 #if alpha_TARGET_ARCH
534 getRegister (StDouble d)
535 = getBlockIdNat `thenNat` \ lbl ->
536 getNewRegNat PtrRep `thenNat` \ tmp ->
537 let code dst = mkSeqInstrs [
538 LDATA RoDataSegment lbl [
539 DATA TF [ImmLab (rational d)]
541 LDA tmp (AddrImm (ImmCLbl lbl)),
542 LD TF dst (AddrReg tmp)]
544 return (Any F64 code)
546 getRegister (StPrim primop [x]) -- unary PrimOps
548 IntNegOp -> trivialUCode (NEG Q False) x
550 NotOp -> trivialUCode NOT x
552 FloatNegOp -> trivialUFCode FloatRep (FNEG TF) x
553 DoubleNegOp -> trivialUFCode F64 (FNEG TF) x
555 OrdOp -> coerceIntCode IntRep x
558 Float2IntOp -> coerceFP2Int x
559 Int2FloatOp -> coerceInt2FP pr x
560 Double2IntOp -> coerceFP2Int x
561 Int2DoubleOp -> coerceInt2FP pr x
563 Double2FloatOp -> coerceFltCode x
564 Float2DoubleOp -> coerceFltCode x
566 other_op -> getRegister (StCall fn CCallConv F64 [x])
568 fn = case other_op of
569 FloatExpOp -> FSLIT("exp")
570 FloatLogOp -> FSLIT("log")
571 FloatSqrtOp -> FSLIT("sqrt")
572 FloatSinOp -> FSLIT("sin")
573 FloatCosOp -> FSLIT("cos")
574 FloatTanOp -> FSLIT("tan")
575 FloatAsinOp -> FSLIT("asin")
576 FloatAcosOp -> FSLIT("acos")
577 FloatAtanOp -> FSLIT("atan")
578 FloatSinhOp -> FSLIT("sinh")
579 FloatCoshOp -> FSLIT("cosh")
580 FloatTanhOp -> FSLIT("tanh")
581 DoubleExpOp -> FSLIT("exp")
582 DoubleLogOp -> FSLIT("log")
583 DoubleSqrtOp -> FSLIT("sqrt")
584 DoubleSinOp -> FSLIT("sin")
585 DoubleCosOp -> FSLIT("cos")
586 DoubleTanOp -> FSLIT("tan")
587 DoubleAsinOp -> FSLIT("asin")
588 DoubleAcosOp -> FSLIT("acos")
589 DoubleAtanOp -> FSLIT("atan")
590 DoubleSinhOp -> FSLIT("sinh")
591 DoubleCoshOp -> FSLIT("cosh")
592 DoubleTanhOp -> FSLIT("tanh")
594 pr = panic "MachCode.getRegister: no primrep needed for Alpha"
596 getRegister (StPrim primop [x, y]) -- dyadic PrimOps
598 CharGtOp -> trivialCode (CMP LTT) y x
599 CharGeOp -> trivialCode (CMP LE) y x
600 CharEqOp -> trivialCode (CMP EQQ) x y
601 CharNeOp -> int_NE_code x y
602 CharLtOp -> trivialCode (CMP LTT) x y
603 CharLeOp -> trivialCode (CMP LE) x y
605 IntGtOp -> trivialCode (CMP LTT) y x
606 IntGeOp -> trivialCode (CMP LE) y x
607 IntEqOp -> trivialCode (CMP EQQ) x y
608 IntNeOp -> int_NE_code x y
609 IntLtOp -> trivialCode (CMP LTT) x y
610 IntLeOp -> trivialCode (CMP LE) x y
612 WordGtOp -> trivialCode (CMP ULT) y x
613 WordGeOp -> trivialCode (CMP ULE) x y
614 WordEqOp -> trivialCode (CMP EQQ) x y
615 WordNeOp -> int_NE_code x y
616 WordLtOp -> trivialCode (CMP ULT) x y
617 WordLeOp -> trivialCode (CMP ULE) x y
619 AddrGtOp -> trivialCode (CMP ULT) y x
620 AddrGeOp -> trivialCode (CMP ULE) y x
621 AddrEqOp -> trivialCode (CMP EQQ) x y
622 AddrNeOp -> int_NE_code x y
623 AddrLtOp -> trivialCode (CMP ULT) x y
624 AddrLeOp -> trivialCode (CMP ULE) x y
626 FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
627 FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
628 FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
629 FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
630 FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
631 FloatLeOp -> cmpF_code (FCMP TF LE) NE x y
633 DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
634 DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
635 DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
636 DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
637 DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
638 DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y
640 IntAddOp -> trivialCode (ADD Q False) x y
641 IntSubOp -> trivialCode (SUB Q False) x y
642 IntMulOp -> trivialCode (MUL Q False) x y
643 IntQuotOp -> trivialCode (DIV Q False) x y
644 IntRemOp -> trivialCode (REM Q False) x y
646 WordAddOp -> trivialCode (ADD Q False) x y
647 WordSubOp -> trivialCode (SUB Q False) x y
648 WordMulOp -> trivialCode (MUL Q False) x y
649 WordQuotOp -> trivialCode (DIV Q True) x y
650 WordRemOp -> trivialCode (REM Q True) x y
652 FloatAddOp -> trivialFCode FloatRep (FADD TF) x y
653 FloatSubOp -> trivialFCode FloatRep (FSUB TF) x y
654 FloatMulOp -> trivialFCode FloatRep (FMUL TF) x y
655 FloatDivOp -> trivialFCode FloatRep (FDIV TF) x y
657 DoubleAddOp -> trivialFCode F64 (FADD TF) x y
658 DoubleSubOp -> trivialFCode F64 (FSUB TF) x y
659 DoubleMulOp -> trivialFCode F64 (FMUL TF) x y
660 DoubleDivOp -> trivialFCode F64 (FDIV TF) x y
662 AddrAddOp -> trivialCode (ADD Q False) x y
663 AddrSubOp -> trivialCode (SUB Q False) x y
664 AddrRemOp -> trivialCode (REM Q True) x y
666 AndOp -> trivialCode AND x y
667 OrOp -> trivialCode OR x y
668 XorOp -> trivialCode XOR x y
669 SllOp -> trivialCode SLL x y
670 SrlOp -> trivialCode SRL x y
672 ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
673 ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
674 ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"
676 FloatPowerOp -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
677 DoublePowerOp -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
679 {- ------------------------------------------------------------
680 Some bizarre special code for getting condition codes into
681 registers. Integer non-equality is a test for equality
682 followed by an XOR with 1. (Integer comparisons always set
683 the result register to 0 or 1.) Floating point comparisons of
684 any kind leave the result in a floating point register, so we
685 need to wrangle an integer register out of things.
687 int_NE_code :: StixTree -> StixTree -> NatM Register
690 = trivialCode (CMP EQQ) x y `thenNat` \ register ->
691 getNewRegNat IntRep `thenNat` \ tmp ->
693 code = registerCode register tmp
694 src = registerName register tmp
695 code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
697 return (Any IntRep code__2)
699 {- ------------------------------------------------------------
700 Comments for int_NE_code also apply to cmpF_code
703 :: (Reg -> Reg -> Reg -> Instr)
705 -> StixTree -> StixTree
708 cmpF_code instr cond x y
709 = trivialFCode pr instr x y `thenNat` \ register ->
710 getNewRegNat F64 `thenNat` \ tmp ->
711 getBlockIdNat `thenNat` \ lbl ->
713 code = registerCode register tmp
714 result = registerName register tmp
716 code__2 dst = code . mkSeqInstrs [
717 OR zeroh (RIImm (ImmInt 1)) dst,
718 BF cond result (ImmCLbl lbl),
719 OR zeroh (RIReg zeroh) dst,
722 return (Any IntRep code__2)
724 pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
725 ------------------------------------------------------------
727 getRegister (CmmLoad pk mem)
728 = getAmode mem `thenNat` \ amode ->
730 code = amodeCode amode
731 src = amodeAddr amode
732 size = primRepToSize pk
733 code__2 dst = code . mkSeqInstr (LD size dst src)
735 return (Any pk code__2)
737 getRegister (StInt i)
740 code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
742 return (Any IntRep code)
745 code dst = mkSeqInstr (LDI Q dst src)
747 return (Any IntRep code)
749 src = ImmInt (fromInteger i)
754 code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
756 return (Any PtrRep code)
759 imm__2 = case imm of Just x -> x
761 #endif /* alpha_TARGET_ARCH */
763 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
767 getRegister (CmmLit (CmmFloat f F32)) = do
768 lbl <- getNewLabelNat
769 dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
770 Amode addr addr_code <- getAmode dynRef
774 CmmStaticLit (CmmFloat f F32)]
775 `consOL` (addr_code `snocOL`
778 return (Any F32 code)
781 getRegister (CmmLit (CmmFloat d F64))
783 = let code dst = unitOL (GLDZ dst)
784 in return (Any F64 code)
787 = let code dst = unitOL (GLD1 dst)
788 in return (Any F64 code)
791 lbl <- getNewLabelNat
792 dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
793 Amode addr addr_code <- getAmode dynRef
797 CmmStaticLit (CmmFloat d F64)]
798 `consOL` (addr_code `snocOL`
801 return (Any F64 code)
803 #endif /* i386_TARGET_ARCH */
805 #if x86_64_TARGET_ARCH
807 getRegister (CmmLit (CmmFloat 0.0 rep)) = do
808 let code dst = unitOL (XOR rep (OpReg dst) (OpReg dst))
809 -- I don't know why there are xorpd, xorps, and pxor instructions.
810 -- They all appear to do the same thing --SDM
811 return (Any rep code)
813 getRegister (CmmLit (CmmFloat f rep)) = do
814 lbl <- getNewLabelNat
815 let code dst = toOL [
818 CmmStaticLit (CmmFloat f rep)],
819 MOV rep (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
822 return (Any rep code)
824 #endif /* x86_64_TARGET_ARCH */
826 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
828 -- catch simple cases of zero- or sign-extended load
829 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad addr _]) = do
830 code <- intLoadCode (MOVZxL I8) addr
831 return (Any I32 code)
833 getRegister (CmmMachOp (MO_S_Conv I8 I32) [CmmLoad addr _]) = do
834 code <- intLoadCode (MOVSxL I8) addr
835 return (Any I32 code)
837 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad addr _]) = do
838 code <- intLoadCode (MOVZxL I16) addr
839 return (Any I32 code)
841 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad addr _]) = do
842 code <- intLoadCode (MOVSxL I16) addr
843 return (Any I32 code)
847 #if x86_64_TARGET_ARCH
849 -- catch simple cases of zero- or sign-extended load
850 getRegister (CmmMachOp (MO_U_Conv I8 I64) [CmmLoad addr _]) = do
851 code <- intLoadCode (MOVZxL I8) addr
852 return (Any I64 code)
854 getRegister (CmmMachOp (MO_S_Conv I8 I64) [CmmLoad addr _]) = do
855 code <- intLoadCode (MOVSxL I8) addr
856 return (Any I64 code)
858 getRegister (CmmMachOp (MO_U_Conv I16 I64) [CmmLoad addr _]) = do
859 code <- intLoadCode (MOVZxL I16) addr
860 return (Any I64 code)
862 getRegister (CmmMachOp (MO_S_Conv I16 I64) [CmmLoad addr _]) = do
863 code <- intLoadCode (MOVSxL I16) addr
864 return (Any I64 code)
866 getRegister (CmmMachOp (MO_U_Conv I32 I64) [CmmLoad addr _]) = do
867 code <- intLoadCode (MOV I32) addr -- 32-bit loads zero-extend
868 return (Any I64 code)
870 getRegister (CmmMachOp (MO_S_Conv I32 I64) [CmmLoad addr _]) = do
871 code <- intLoadCode (MOVSxL I32) addr
872 return (Any I64 code)
876 #if x86_64_TARGET_ARCH
877 getRegister (CmmMachOp (MO_Add I64) [CmmReg (CmmGlobal PicBaseReg),
878 CmmLit displacement])
879 = return $ Any I64 (\dst -> unitOL $
880 LEA I64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
883 #if x86_64_TARGET_ARCH
884 getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do
885 x_code <- getAnyReg x
886 lbl <- getNewLabelNat
888 code dst = x_code dst `appOL` toOL [
889 -- This is how gcc does it, so it can't be that bad:
890 LDATA ReadOnlyData16 [
893 CmmStaticLit (CmmInt 0x80000000 I32),
894 CmmStaticLit (CmmInt 0 I32),
895 CmmStaticLit (CmmInt 0 I32),
896 CmmStaticLit (CmmInt 0 I32)
898 XOR F32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
899 -- xorps, so we need the 128-bit constant
900 -- ToDo: rip-relative
903 return (Any F32 code)
905 getRegister (CmmMachOp (MO_S_Neg F64) [x]) = do
906 x_code <- getAnyReg x
907 lbl <- getNewLabelNat
909 -- This is how gcc does it, so it can't be that bad:
910 code dst = x_code dst `appOL` toOL [
911 LDATA ReadOnlyData16 [
914 CmmStaticLit (CmmInt 0x8000000000000000 I64),
915 CmmStaticLit (CmmInt 0 I64)
917 -- gcc puts an unpck here. Wonder if we need it.
918 XOR F64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
919 -- xorpd, so we need the 128-bit constant
922 return (Any F64 code)
925 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
927 getRegister (CmmMachOp mop [x]) -- unary MachOps
930 MO_S_Neg F32 -> trivialUFCode F32 (GNEG F32) x
931 MO_S_Neg F64 -> trivialUFCode F64 (GNEG F64) x
934 MO_S_Neg rep -> trivialUCode rep (NEGI rep) x
935 MO_Not rep -> trivialUCode rep (NOT rep) x
938 MO_U_Conv I32 I8 -> toI8Reg I32 x
939 MO_S_Conv I32 I8 -> toI8Reg I32 x
940 MO_U_Conv I16 I8 -> toI8Reg I16 x
941 MO_S_Conv I16 I8 -> toI8Reg I16 x
942 MO_U_Conv I32 I16 -> toI16Reg I32 x
943 MO_S_Conv I32 I16 -> toI16Reg I32 x
944 #if x86_64_TARGET_ARCH
945 MO_U_Conv I64 I32 -> conversionNop I64 x
946 MO_S_Conv I64 I32 -> conversionNop I64 x
947 MO_U_Conv I64 I16 -> toI16Reg I64 x
948 MO_S_Conv I64 I16 -> toI16Reg I64 x
949 MO_U_Conv I64 I8 -> toI8Reg I64 x
950 MO_S_Conv I64 I8 -> toI8Reg I64 x
953 MO_U_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
954 MO_S_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
957 MO_U_Conv I8 I32 -> integerExtend I8 I32 MOVZxL x
958 MO_U_Conv I16 I32 -> integerExtend I16 I32 MOVZxL x
959 MO_U_Conv I8 I16 -> integerExtend I8 I16 MOVZxL x
961 MO_S_Conv I8 I32 -> integerExtend I8 I32 MOVSxL x
962 MO_S_Conv I16 I32 -> integerExtend I16 I32 MOVSxL x
963 MO_S_Conv I8 I16 -> integerExtend I8 I16 MOVSxL x
965 #if x86_64_TARGET_ARCH
966 MO_U_Conv I8 I64 -> integerExtend I8 I64 MOVZxL x
967 MO_U_Conv I16 I64 -> integerExtend I16 I64 MOVZxL x
968 MO_U_Conv I32 I64 -> integerExtend I32 I64 MOVZxL x
969 MO_S_Conv I8 I64 -> integerExtend I8 I64 MOVSxL x
970 MO_S_Conv I16 I64 -> integerExtend I16 I64 MOVSxL x
971 MO_S_Conv I32 I64 -> integerExtend I32 I64 MOVSxL x
972 -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
973 -- However, we don't want the register allocator to throw it
974 -- away as an unnecessary reg-to-reg move, so we keep it in
975 -- the form of a movzl and print it as a movl later.
979 MO_S_Conv F32 F64 -> conversionNop F64 x
980 MO_S_Conv F64 F32 -> conversionNop F32 x
982 MO_S_Conv F32 F64 -> coerceFP2FP F64 x
983 MO_S_Conv F64 F32 -> coerceFP2FP F32 x
987 | isFloatingRep from -> coerceFP2Int from to x
988 | isFloatingRep to -> coerceInt2FP from to x
990 other -> pprPanic "getRegister" (pprMachOp mop)
992 -- signed or unsigned extension.
993 integerExtend from to instr expr = do
994 (reg,e_code) <- if from == I8 then getByteReg expr
999 instr from (OpReg reg) (OpReg dst)
1000 return (Any to code)
1002 toI8Reg new_rep expr
1003 = do codefn <- getAnyReg expr
1004 return (Any new_rep codefn)
1005 -- HACK: use getAnyReg to get a byte-addressable register.
1006 -- If the source was a Fixed register, this will add the
1007 -- mov instruction to put it into the desired destination.
1008 -- We're assuming that the destination won't be a fixed
1009 -- non-byte-addressable register; it won't be, because all
1010 -- fixed registers are word-sized.
1012 toI16Reg = toI8Reg -- for now
1014 conversionNop new_rep expr
1015 = do e_code <- getRegister expr
1016 return (swizzleRegisterRep e_code new_rep)
1019 getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
1020 = ASSERT2(cmmExprRep x /= I8, pprExpr e)
1022 MO_Eq F32 -> condFltReg EQQ x y
1023 MO_Ne F32 -> condFltReg NE x y
1024 MO_S_Gt F32 -> condFltReg GTT x y
1025 MO_S_Ge F32 -> condFltReg GE x y
1026 MO_S_Lt F32 -> condFltReg LTT x y
1027 MO_S_Le F32 -> condFltReg LE x y
1029 MO_Eq F64 -> condFltReg EQQ x y
1030 MO_Ne F64 -> condFltReg NE x y
1031 MO_S_Gt F64 -> condFltReg GTT x y
1032 MO_S_Ge F64 -> condFltReg GE x y
1033 MO_S_Lt F64 -> condFltReg LTT x y
1034 MO_S_Le F64 -> condFltReg LE x y
1036 MO_Eq rep -> condIntReg EQQ x y
1037 MO_Ne rep -> condIntReg NE x y
1039 MO_S_Gt rep -> condIntReg GTT x y
1040 MO_S_Ge rep -> condIntReg GE x y
1041 MO_S_Lt rep -> condIntReg LTT x y
1042 MO_S_Le rep -> condIntReg LE x y
1044 MO_U_Gt rep -> condIntReg GU x y
1045 MO_U_Ge rep -> condIntReg GEU x y
1046 MO_U_Lt rep -> condIntReg LU x y
1047 MO_U_Le rep -> condIntReg LEU x y
1049 #if i386_TARGET_ARCH
1050 MO_Add F32 -> trivialFCode F32 GADD x y
1051 MO_Sub F32 -> trivialFCode F32 GSUB x y
1053 MO_Add F64 -> trivialFCode F64 GADD x y
1054 MO_Sub F64 -> trivialFCode F64 GSUB x y
1056 MO_S_Quot F32 -> trivialFCode F32 GDIV x y
1057 MO_S_Quot F64 -> trivialFCode F64 GDIV x y
1060 #if x86_64_TARGET_ARCH
1061 MO_Add F32 -> trivialFCode F32 ADD x y
1062 MO_Sub F32 -> trivialFCode F32 SUB x y
1064 MO_Add F64 -> trivialFCode F64 ADD x y
1065 MO_Sub F64 -> trivialFCode F64 SUB x y
1067 MO_S_Quot F32 -> trivialFCode F32 FDIV x y
1068 MO_S_Quot F64 -> trivialFCode F64 FDIV x y
1071 MO_Add rep -> add_code rep x y
1072 MO_Sub rep -> sub_code rep x y
1074 MO_S_Quot rep -> div_code rep True True x y
1075 MO_S_Rem rep -> div_code rep True False x y
1076 MO_U_Quot rep -> div_code rep False True x y
1077 MO_U_Rem rep -> div_code rep False False x y
1079 #if i386_TARGET_ARCH
1080 MO_Mul F32 -> trivialFCode F32 GMUL x y
1081 MO_Mul F64 -> trivialFCode F64 GMUL x y
1084 #if x86_64_TARGET_ARCH
1085 MO_Mul F32 -> trivialFCode F32 MUL x y
1086 MO_Mul F64 -> trivialFCode F64 MUL x y
1089 MO_Mul rep -> let op = IMUL rep in
1090 trivialCode rep op (Just op) x y
1092 MO_S_MulMayOflo rep -> imulMayOflo rep x y
1094 MO_And rep -> let op = AND rep in
1095 trivialCode rep op (Just op) x y
1096 MO_Or rep -> let op = OR rep in
1097 trivialCode rep op (Just op) x y
1098 MO_Xor rep -> let op = XOR rep in
1099 trivialCode rep op (Just op) x y
1101 {- Shift ops on x86s have constraints on their source, it
1102 either has to be Imm, CL or 1
1103 => trivialCode is not restrictive enough (sigh.)
1105 MO_Shl rep -> shift_code rep (SHL rep) x y {-False-}
1106 MO_U_Shr rep -> shift_code rep (SHR rep) x y {-False-}
1107 MO_S_Shr rep -> shift_code rep (SAR rep) x y {-False-}
1109 other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
1111 --------------------
1112 imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1113 imulMayOflo rep a b = do
1114 (a_reg, a_code) <- getNonClobberedReg a
1115 b_code <- getAnyReg b
1117 shift_amt = case rep of
1120 _ -> panic "shift_amt"
1122 code = a_code `appOL` b_code eax `appOL`
1124 IMUL2 rep (OpReg a_reg), -- result in %edx:%eax
1125 SAR rep (OpImm (ImmInt shift_amt)) (OpReg eax),
1126 -- sign extend lower part
1127 SUB rep (OpReg edx) (OpReg eax)
1128 -- compare against upper
1129 -- eax==0 if high part == sign extended low part
1132 return (Fixed rep eax code)
1134 --------------------
1135 shift_code :: MachRep
1136 -> (Operand -> Operand -> Instr)
1141 {- Case1: shift length as immediate -}
1142 shift_code rep instr x y@(CmmLit lit) = do
1143 x_code <- getAnyReg x
1146 = x_code dst `snocOL`
1147 instr (OpImm (litToImm lit)) (OpReg dst)
1149 return (Any rep code)
1151 {- Case2: shift length is complex (non-immediate)
1152 * y must go in %ecx.
1153 * we cannot do y first *and* put its result in %ecx, because
1154 %ecx might be clobbered by x.
1155 * if we do y second, then x cannot be
1156 in a clobbered reg. Also, we cannot clobber x's reg
1157 with the instruction itself.
1159 - do y first, put its result in a fresh tmp, then copy it to %ecx later
1160 - do y second and put its result into %ecx. x gets placed in a fresh
1161 tmp. This is likely to be better, becuase the reg alloc can
1162 eliminate this reg->reg move here (it won't eliminate the other one,
1163 because the move is into the fixed %ecx).
1165 shift_code rep instr x y{-amount-} = do
1166 x_code <- getAnyReg x
1167 tmp <- getNewRegNat rep
1168 y_code <- getAnyReg y
1170 code = x_code tmp `appOL`
1172 instr (OpReg ecx) (OpReg tmp)
1174 return (Fixed rep tmp code)
1176 --------------------
1177 add_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1178 add_code rep x (CmmLit (CmmInt y _))
1179 | not (is64BitInteger y) = add_int rep x y
1180 add_code rep x y = trivialCode rep (ADD rep) (Just (ADD rep)) x y
1182 --------------------
1183 sub_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1184 sub_code rep x (CmmLit (CmmInt y _))
1185 | not (is64BitInteger (-y)) = add_int rep x (-y)
1186 sub_code rep x y = trivialCode rep (SUB rep) Nothing x y
1188 -- our three-operand add instruction:
1189 add_int rep x y = do
1190 (x_reg, x_code) <- getSomeReg x
1192 imm = ImmInt (fromInteger y)
1196 (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
1199 return (Any rep code)
1201 ----------------------
1202 div_code rep signed quotient x y = do
1203 (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
1204 x_code <- getAnyReg x
1206 widen | signed = CLTD rep
1207 | otherwise = XOR rep (OpReg edx) (OpReg edx)
1209 instr | signed = IDIV
1212 code = y_code `appOL`
1214 toOL [widen, instr rep y_op]
1216 result | quotient = eax
1220 return (Fixed rep result code)
1223 getRegister (CmmLoad mem pk)
1226 Amode src mem_code <- getAmode mem
1228 code dst = mem_code `snocOL`
1229 IF_ARCH_i386(GLD pk src dst,
1230 MOV pk (OpAddr src) (OpReg dst))
1232 return (Any pk code)
1234 #if i386_TARGET_ARCH
1235 getRegister (CmmLoad mem pk)
1238 code <- intLoadCode (instr pk) mem
1239 return (Any pk code)
1241 instr I8 = MOVZxL pk
1244 -- we always zero-extend 8-bit loads, if we
1245 -- can't think of anything better. This is because
1246 -- we can't guarantee access to an 8-bit variant of every register
1247 -- (esi and edi don't have 8-bit variants), so to make things
1248 -- simpler we do our 8-bit arithmetic with full 32-bit registers.
1251 #if x86_64_TARGET_ARCH
1252 -- Simpler memory load code on x86_64
1253 getRegister (CmmLoad mem pk)
1255 code <- intLoadCode (MOV pk) mem
1256 return (Any pk code)
1259 getRegister (CmmLit (CmmInt 0 rep))
1261 -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
1262 adj_rep = case rep of I64 -> I32; _ -> rep
1263 rep1 = IF_ARCH_i386( rep, adj_rep )
1265 = unitOL (XOR rep1 (OpReg dst) (OpReg dst))
1267 return (Any rep code)
1269 #if x86_64_TARGET_ARCH
1270 -- optimisation for loading small literals on x86_64: take advantage
1271 -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
1272 -- instruction forms are shorter.
1273 getRegister (CmmLit lit)
1274 | I64 <- cmmLitRep lit, not (isBigLit lit)
1277 code dst = unitOL (MOV I32 (OpImm imm) (OpReg dst))
1279 return (Any I64 code)
1281 isBigLit (CmmInt i I64) = i < 0 || i > 0xffffffff
1283 -- note1: not the same as is64BitLit, because that checks for
1284 -- signed literals that fit in 32 bits, but we want unsigned
1286 -- note2: all labels are small, because we're assuming the
1287 -- small memory model (see gcc docs, -mcmodel=small).
1290 getRegister (CmmLit lit)
1294 code dst = unitOL (MOV rep (OpImm imm) (OpReg dst))
1296 return (Any rep code)
1298 getRegister other = pprPanic "getRegister(x86)" (ppr other)
1301 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
1302 -> NatM (Reg -> InstrBlock)
1303 intLoadCode instr mem = do
1304 Amode src mem_code <- getAmode mem
1305 return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
1307 -- Compute an expression into *any* register, adding the appropriate
1308 -- move instruction if necessary.
1309 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
1311 r <- getRegister expr
1314 anyReg :: Register -> NatM (Reg -> InstrBlock)
1315 anyReg (Any _ code) = return code
1316 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
1318 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
1319 -- Fixed registers might not be byte-addressable, so we make sure we've
1320 -- got a temporary, inserting an extra reg copy if necessary.
1321 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
1322 #if x86_64_TARGET_ARCH
1323 getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
1325 getByteReg expr = do
1326 r <- getRegister expr
1329 tmp <- getNewRegNat rep
1330 return (tmp, code tmp)
1332 | isVirtualReg reg -> return (reg,code)
1334 tmp <- getNewRegNat rep
1335 return (tmp, code `snocOL` reg2reg rep reg tmp)
1336 -- ToDo: could optimise slightly by checking for byte-addressable
1337 -- real registers, but that will happen very rarely if at all.
1340 -- Another variant: this time we want the result in a register that cannot
1341 -- be modified by code to evaluate an arbitrary expression.
1342 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1343 getNonClobberedReg expr = do
1344 r <- getRegister expr
1347 tmp <- getNewRegNat rep
1348 return (tmp, code tmp)
1350 -- only free regs can be clobbered
1351 | RealReg rr <- reg, isFastTrue (freeReg rr) -> do
1352 tmp <- getNewRegNat rep
1353 return (tmp, code `snocOL` reg2reg rep reg tmp)
1357 reg2reg :: MachRep -> Reg -> Reg -> Instr
1359 #if i386_TARGET_ARCH
1360 | isFloatingRep rep = GMOV src dst
1362 | otherwise = MOV rep (OpReg src) (OpReg dst)
1364 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1366 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1368 #if sparc_TARGET_ARCH
1370 getRegister (CmmLit (CmmFloat f F32)) = do
1371 lbl <- getNewLabelNat
1372 let code dst = toOL [
1375 CmmStaticLit (CmmFloat f F32)],
1376 SETHI (HI (ImmCLbl lbl)) dst,
1377 LD F32 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1378 return (Any F32 code)
1380 getRegister (CmmLit (CmmFloat d F64)) = do
1381 lbl <- getNewLabelNat
1382 let code dst = toOL [
1385 CmmStaticLit (CmmFloat d F64)],
1386 SETHI (HI (ImmCLbl lbl)) dst,
1387 LD F64 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1388 return (Any F64 code)
1390 getRegister (CmmMachOp mop [x]) -- unary MachOps
1392 MO_S_Neg F32 -> trivialUFCode F32 (FNEG F32) x
1393 MO_S_Neg F64 -> trivialUFCode F64 (FNEG F64) x
1395 MO_S_Neg rep -> trivialUCode rep (SUB False False g0) x
1396 MO_Not rep -> trivialUCode rep (XNOR False g0) x
1398 MO_U_Conv I32 I8 -> trivialCode I8 (AND False) x (CmmLit (CmmInt 255 I8))
1400 MO_U_Conv F64 F32-> coerceDbl2Flt x
1401 MO_U_Conv F32 F64-> coerceFlt2Dbl x
1403 MO_S_Conv F32 I32-> coerceFP2Int F32 I32 x
1404 MO_S_Conv I32 F32-> coerceInt2FP I32 F32 x
1405 MO_S_Conv F64 I32-> coerceFP2Int F64 I32 x
1406 MO_S_Conv I32 F64-> coerceInt2FP I32 F64 x
1408 -- Conversions which are a nop on sparc
1410 | from == to -> conversionNop to x
1411 MO_U_Conv I32 to -> conversionNop to x
1412 MO_S_Conv I32 to -> conversionNop to x
1415 MO_U_Conv I8 I32 -> integerExtend False I8 I32 x
1416 MO_U_Conv I16 I32 -> integerExtend False I16 I32 x
1417 MO_U_Conv I8 I16 -> integerExtend False I8 I16 x
1418 MO_S_Conv I16 I32 -> integerExtend True I16 I32 x
1420 other_op -> panic "Unknown unary mach op"
1423 integerExtend signed from to expr = do
1424 (reg, e_code) <- getSomeReg expr
1428 ((if signed then SRA else SRL)
1429 reg (RIImm (ImmInt 0)) dst)
1430 return (Any to code)
1431 conversionNop new_rep expr
1432 = do e_code <- getRegister expr
1433 return (swizzleRegisterRep e_code new_rep)
1435 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1437 MO_Eq F32 -> condFltReg EQQ x y
1438 MO_Ne F32 -> condFltReg NE x y
1440 MO_S_Gt F32 -> condFltReg GTT x y
1441 MO_S_Ge F32 -> condFltReg GE x y
1442 MO_S_Lt F32 -> condFltReg LTT x y
1443 MO_S_Le F32 -> condFltReg LE x y
1445 MO_Eq F64 -> condFltReg EQQ x y
1446 MO_Ne F64 -> condFltReg NE x y
1448 MO_S_Gt F64 -> condFltReg GTT x y
1449 MO_S_Ge F64 -> condFltReg GE x y
1450 MO_S_Lt F64 -> condFltReg LTT x y
1451 MO_S_Le F64 -> condFltReg LE x y
1453 MO_Eq rep -> condIntReg EQQ x y
1454 MO_Ne rep -> condIntReg NE x y
1456 MO_S_Gt rep -> condIntReg GTT x y
1457 MO_S_Ge rep -> condIntReg GE x y
1458 MO_S_Lt rep -> condIntReg LTT x y
1459 MO_S_Le rep -> condIntReg LE x y
1461 MO_U_Gt I32 -> condIntReg GTT x y
1462 MO_U_Ge I32 -> condIntReg GE x y
1463 MO_U_Lt I32 -> condIntReg LTT x y
1464 MO_U_Le I32 -> condIntReg LE x y
1466 MO_U_Gt I16 -> condIntReg GU x y
1467 MO_U_Ge I16 -> condIntReg GEU x y
1468 MO_U_Lt I16 -> condIntReg LU x y
1469 MO_U_Le I16 -> condIntReg LEU x y
1471 MO_Add I32 -> trivialCode I32 (ADD False False) x y
1472 MO_Sub I32 -> trivialCode I32 (SUB False False) x y
1474 MO_S_MulMayOflo rep -> imulMayOflo rep x y
1476 -- ToDo: teach about V8+ SPARC div instructions
1477 MO_S_Quot I32 -> idiv FSLIT(".div") x y
1478 MO_S_Rem I32 -> idiv FSLIT(".rem") x y
1479 MO_U_Quot I32 -> idiv FSLIT(".udiv") x y
1480 MO_U_Rem I32 -> idiv FSLIT(".urem") x y
1482 MO_Add F32 -> trivialFCode F32 FADD x y
1483 MO_Sub F32 -> trivialFCode F32 FSUB x y
1484 MO_Mul F32 -> trivialFCode F32 FMUL x y
1485 MO_S_Quot F32 -> trivialFCode F32 FDIV x y
1487 MO_Add F64 -> trivialFCode F64 FADD x y
1488 MO_Sub F64 -> trivialFCode F64 FSUB x y
1489 MO_Mul F64 -> trivialFCode F64 FMUL x y
1490 MO_S_Quot F64 -> trivialFCode F64 FDIV x y
1492 MO_And rep -> trivialCode rep (AND False) x y
1493 MO_Or rep -> trivialCode rep (OR False) x y
1494 MO_Xor rep -> trivialCode rep (XOR False) x y
1496 MO_Mul rep -> trivialCode rep (SMUL False) x y
1498 MO_Shl rep -> trivialCode rep SLL x y
1499 MO_U_Shr rep -> trivialCode rep SRL x y
1500 MO_S_Shr rep -> trivialCode rep SRA x y
1503 MO_F32_Pwr -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1504 [promote x, promote y])
1505 where promote x = CmmMachOp MO_F32_to_Dbl [x]
1506 MO_F64_Pwr -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1509 other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
1511 --idiv fn x y = getRegister (StCall (Left fn) CCallConv I32 [x, y])
1513 --------------------
1514 imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1515 imulMayOflo rep a b = do
1516 (a_reg, a_code) <- getSomeReg a
1517 (b_reg, b_code) <- getSomeReg b
1518 res_lo <- getNewRegNat I32
1519 res_hi <- getNewRegNat I32
1521 shift_amt = case rep of
1524 _ -> panic "shift_amt"
1525 code dst = a_code `appOL` b_code `appOL`
1527 SMUL False a_reg (RIReg b_reg) res_lo,
1529 SRA res_lo (RIImm (ImmInt shift_amt)) res_lo,
1530 SUB False False res_lo (RIReg res_hi) dst
1532 return (Any I32 code)
1534 getRegister (CmmLoad mem pk) = do
1535 Amode src code <- getAmode mem
1537 code__2 dst = code `snocOL` LD pk src dst
1538 return (Any pk code__2)
1540 getRegister (CmmLit (CmmInt i _))
1543 src = ImmInt (fromInteger i)
1544 code dst = unitOL (OR False g0 (RIImm src) dst)
1546 return (Any I32 code)
1548 getRegister (CmmLit lit)
1549 = let rep = cmmLitRep lit
1553 OR False dst (RIImm (LO imm)) dst]
1554 in return (Any I32 code)
1556 #endif /* sparc_TARGET_ARCH */
1558 #if powerpc_TARGET_ARCH
1559 getRegister (CmmLoad mem pk)
1562 Amode addr addr_code <- getAmode mem
1563 let code dst = ASSERT((regClass dst == RcDouble) == isFloatingRep pk)
1564 addr_code `snocOL` LD pk dst addr
1565 return (Any pk code)
1567 -- catch simple cases of zero- or sign-extended load
1568 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad mem _]) = do
1569 Amode addr addr_code <- getAmode mem
1570 return (Any I32 (\dst -> addr_code `snocOL` LD I8 dst addr))
1572 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
1574 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad mem _]) = do
1575 Amode addr addr_code <- getAmode mem
1576 return (Any I32 (\dst -> addr_code `snocOL` LD I16 dst addr))
1578 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad mem _]) = do
1579 Amode addr addr_code <- getAmode mem
1580 return (Any I32 (\dst -> addr_code `snocOL` LA I16 dst addr))
1582 getRegister (CmmMachOp mop [x]) -- unary MachOps
1584 MO_Not rep -> trivialUCode rep NOT x
1586 MO_S_Conv F64 F32 -> trivialUCode F32 FRSP x
1587 MO_S_Conv F32 F64 -> conversionNop F64 x
1590 | from == to -> conversionNop to x
1591 | isFloatingRep from -> coerceFP2Int from to x
1592 | isFloatingRep to -> coerceInt2FP from to x
1594 -- narrowing is a nop: we treat the high bits as undefined
1595 MO_S_Conv I32 to -> conversionNop to x
1596 MO_S_Conv I16 I8 -> conversionNop I8 x
1597 MO_S_Conv I8 to -> trivialUCode to (EXTS I8) x
1598 MO_S_Conv I16 to -> trivialUCode to (EXTS I16) x
1601 | from == to -> conversionNop to x
1602 -- narrowing is a nop: we treat the high bits as undefined
1603 MO_U_Conv I32 to -> conversionNop to x
1604 MO_U_Conv I16 I8 -> conversionNop I8 x
1605 MO_U_Conv I8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 I32))
1606 MO_U_Conv I16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 I32))
1608 MO_S_Neg F32 -> trivialUCode F32 FNEG x
1609 MO_S_Neg F64 -> trivialUCode F64 FNEG x
1610 MO_S_Neg rep -> trivialUCode rep NEG x
1613 conversionNop new_rep expr
1614 = do e_code <- getRegister expr
1615 return (swizzleRegisterRep e_code new_rep)
1617 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1619 MO_Eq F32 -> condFltReg EQQ x y
1620 MO_Ne F32 -> condFltReg NE x y
1622 MO_S_Gt F32 -> condFltReg GTT x y
1623 MO_S_Ge F32 -> condFltReg GE x y
1624 MO_S_Lt F32 -> condFltReg LTT x y
1625 MO_S_Le F32 -> condFltReg LE x y
1627 MO_Eq F64 -> condFltReg EQQ x y
1628 MO_Ne F64 -> condFltReg NE x y
1630 MO_S_Gt F64 -> condFltReg GTT x y
1631 MO_S_Ge F64 -> condFltReg GE x y
1632 MO_S_Lt F64 -> condFltReg LTT x y
1633 MO_S_Le F64 -> condFltReg LE x y
1635 MO_Eq rep -> condIntReg EQQ (extendUExpr rep x) (extendUExpr rep y)
1636 MO_Ne rep -> condIntReg NE (extendUExpr rep x) (extendUExpr rep y)
1638 MO_S_Gt rep -> condIntReg GTT (extendSExpr rep x) (extendSExpr rep y)
1639 MO_S_Ge rep -> condIntReg GE (extendSExpr rep x) (extendSExpr rep y)
1640 MO_S_Lt rep -> condIntReg LTT (extendSExpr rep x) (extendSExpr rep y)
1641 MO_S_Le rep -> condIntReg LE (extendSExpr rep x) (extendSExpr rep y)
1643 MO_U_Gt rep -> condIntReg GU (extendUExpr rep x) (extendUExpr rep y)
1644 MO_U_Ge rep -> condIntReg GEU (extendUExpr rep x) (extendUExpr rep y)
1645 MO_U_Lt rep -> condIntReg LU (extendUExpr rep x) (extendUExpr rep y)
1646 MO_U_Le rep -> condIntReg LEU (extendUExpr rep x) (extendUExpr rep y)
1648 MO_Add F32 -> trivialCodeNoImm F32 (FADD F32) x y
1649 MO_Sub F32 -> trivialCodeNoImm F32 (FSUB F32) x y
1650 MO_Mul F32 -> trivialCodeNoImm F32 (FMUL F32) x y
1651 MO_S_Quot F32 -> trivialCodeNoImm F32 (FDIV F32) x y
1653 MO_Add F64 -> trivialCodeNoImm F64 (FADD F64) x y
1654 MO_Sub F64 -> trivialCodeNoImm F64 (FSUB F64) x y
1655 MO_Mul F64 -> trivialCodeNoImm F64 (FMUL F64) x y
1656 MO_S_Quot F64 -> trivialCodeNoImm F64 (FDIV F64) x y
1658 -- optimize addition with 32-bit immediate
1662 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate I32 True (-imm)
1663 -> trivialCode I32 True ADD x (CmmLit $ CmmInt imm immrep)
1666 (src, srcCode) <- getSomeReg x
1667 let imm = litToImm lit
1668 code dst = srcCode `appOL` toOL [
1669 ADDIS dst src (HA imm),
1670 ADD dst dst (RIImm (LO imm))
1672 return (Any I32 code)
1673 _ -> trivialCode I32 True ADD x y
1675 MO_Add rep -> trivialCode rep True ADD x y
1677 case y of -- subfi ('substract from' with immediate) doesn't exist
1678 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
1679 -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
1680 _ -> trivialCodeNoImm rep SUBF y x
1682 MO_Mul rep -> trivialCode rep True MULLW x y
1684 MO_S_MulMayOflo I32 -> trivialCodeNoImm I32 MULLW_MayOflo x y
1686 MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= I32): not implemented"
1687 MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
1689 MO_S_Quot rep -> trivialCodeNoImm rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1690 MO_U_Quot rep -> trivialCodeNoImm rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1692 MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1693 MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1695 MO_And rep -> trivialCode rep False AND x y
1696 MO_Or rep -> trivialCode rep False OR x y
1697 MO_Xor rep -> trivialCode rep False XOR x y
1699 MO_Shl rep -> trivialCode rep False SLW x y
1700 MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
1701 MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
1703 getRegister (CmmLit (CmmInt i rep))
1704 | Just imm <- makeImmediate rep True i
1706 code dst = unitOL (LI dst imm)
1708 return (Any rep code)
1710 getRegister (CmmLit (CmmFloat f frep)) = do
1711 lbl <- getNewLabelNat
1712 dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
1713 Amode addr addr_code <- getAmode dynRef
1715 LDATA ReadOnlyData [CmmDataLabel lbl,
1716 CmmStaticLit (CmmFloat f frep)]
1717 `consOL` (addr_code `snocOL` LD frep dst addr)
1718 return (Any frep code)
1720 getRegister (CmmLit lit)
1721 = let rep = cmmLitRep lit
1725 OR dst dst (RIImm (LO imm))
1727 in return (Any rep code)
1729 getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
1731 -- extend?Rep: wrap integer expression of type rep
1732 -- in a conversion to I32
1733 extendSExpr I32 x = x
1734 extendSExpr rep x = CmmMachOp (MO_S_Conv rep I32) [x]
1735 extendUExpr I32 x = x
1736 extendUExpr rep x = CmmMachOp (MO_U_Conv rep I32) [x]
1738 #endif /* powerpc_TARGET_ARCH */
1741 -- -----------------------------------------------------------------------------
1742 -- The 'Amode' type: Memory addressing modes passed up the tree.
1744 data Amode = Amode AddrMode InstrBlock
1747 Now, given a tree (the argument to an CmmLoad) that references memory,
1748 produce a suitable addressing mode.
1750 A Rule of the Game (tm) for Amodes: use of the addr bit must
1751 immediately follow use of the code part, since the code part puts
1752 values in registers which the addr then refers to. So you can't put
1753 anything in between, lest it overwrite some of those registers. If
1754 you need to do some other computation between the code part and use of
1755 the addr bit, first store the effective address from the amode in a
1756 temporary, then do the other computation, and then use the temporary:
1760 ... other computation ...
1764 getAmode :: CmmExpr -> NatM Amode
1765 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
1767 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1769 #if alpha_TARGET_ARCH
1771 getAmode (StPrim IntSubOp [x, StInt i])
1772 = getNewRegNat PtrRep `thenNat` \ tmp ->
1773 getRegister x `thenNat` \ register ->
1775 code = registerCode register tmp
1776 reg = registerName register tmp
1777 off = ImmInt (-(fromInteger i))
1779 return (Amode (AddrRegImm reg off) code)
1781 getAmode (StPrim IntAddOp [x, StInt i])
1782 = getNewRegNat PtrRep `thenNat` \ tmp ->
1783 getRegister x `thenNat` \ register ->
1785 code = registerCode register tmp
1786 reg = registerName register tmp
1787 off = ImmInt (fromInteger i)
1789 return (Amode (AddrRegImm reg off) code)
1793 = return (Amode (AddrImm imm__2) id)
1796 imm__2 = case imm of Just x -> x
1799 = getNewRegNat PtrRep `thenNat` \ tmp ->
1800 getRegister other `thenNat` \ register ->
1802 code = registerCode register tmp
1803 reg = registerName register tmp
1805 return (Amode (AddrReg reg) code)
1807 #endif /* alpha_TARGET_ARCH */
1809 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1811 #if x86_64_TARGET_ARCH
1813 getAmode (CmmMachOp (MO_Add I64) [CmmReg (CmmGlobal PicBaseReg),
1814 CmmLit displacement])
1815 = return $ Amode (ripRel (litToImm displacement)) nilOL
1819 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1821 -- This is all just ridiculous, since it carefully undoes
1822 -- what mangleIndexTree has just done.
1823 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
1824 | not (is64BitLit lit)
1825 -- ASSERT(rep == I32)???
1826 = do (x_reg, x_code) <- getSomeReg x
1827 let off = ImmInt (-(fromInteger i))
1828 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1830 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
1831 | not (is64BitLit lit)
1832 -- ASSERT(rep == I32)???
1833 = do (x_reg, x_code) <- getSomeReg x
1834 let off = ImmInt (fromInteger i)
1835 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1837 -- Turn (lit1 << n + lit2) into (lit2 + lit1 << n) so it will be
1838 -- recognised by the next rule.
1839 getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1841 = getAmode (CmmMachOp (MO_Add rep) [b,a])
1843 getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
1844 [y, CmmLit (CmmInt shift _)]])
1845 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1846 = x86_complex_amode x y shift 0
1848 getAmode (CmmMachOp (MO_Add rep)
1849 [x, CmmMachOp (MO_Add _)
1850 [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)],
1851 CmmLit (CmmInt offset _)]])
1852 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1853 && not (is64BitInteger offset)
1854 = x86_complex_amode x y shift offset
1856 getAmode (CmmMachOp (MO_Add rep) [x,y])
1857 = x86_complex_amode x y 0 0
1859 getAmode (CmmLit lit) | not (is64BitLit lit)
1860 = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1863 (reg,code) <- getSomeReg expr
1864 return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
1867 x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
1868 x86_complex_amode base index shift offset
1869 = do (x_reg, x_code) <- getNonClobberedReg base
1870 -- x must be in a temp, because it has to stay live over y_code
1871 -- we could compre x_reg and y_reg and do something better here...
1872 (y_reg, y_code) <- getSomeReg index
1874 code = x_code `appOL` y_code
1875 base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
1876 return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset)))
1879 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1881 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1883 #if sparc_TARGET_ARCH
1885 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)])
1888 (reg, code) <- getSomeReg x
1890 off = ImmInt (-(fromInteger i))
1891 return (Amode (AddrRegImm reg off) code)
1894 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)])
1897 (reg, code) <- getSomeReg x
1899 off = ImmInt (fromInteger i)
1900 return (Amode (AddrRegImm reg off) code)
1902 getAmode (CmmMachOp (MO_Add rep) [x, y])
1904 (regX, codeX) <- getSomeReg x
1905 (regY, codeY) <- getSomeReg y
1907 code = codeX `appOL` codeY
1908 return (Amode (AddrRegReg regX regY) code)
1910 -- XXX Is this same as "leaf" in Stix?
1911 getAmode (CmmLit lit)
1913 tmp <- getNewRegNat I32
1915 code = unitOL (SETHI (HI imm__2) tmp)
1916 return (Amode (AddrRegImm tmp (LO imm__2)) code)
1918 imm__2 = litToImm lit
1922 (reg, code) <- getSomeReg other
1925 return (Amode (AddrRegImm reg off) code)
1927 #endif /* sparc_TARGET_ARCH */
1929 #ifdef powerpc_TARGET_ARCH
1930 getAmode (CmmMachOp (MO_Sub I32) [x, CmmLit (CmmInt i _)])
1931 | Just off <- makeImmediate I32 True (-i)
1933 (reg, code) <- getSomeReg x
1934 return (Amode (AddrRegImm reg off) code)
1937 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit (CmmInt i _)])
1938 | Just off <- makeImmediate I32 True i
1940 (reg, code) <- getSomeReg x
1941 return (Amode (AddrRegImm reg off) code)
1943 -- optimize addition with 32-bit immediate
1945 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit lit])
1947 tmp <- getNewRegNat I32
1948 (src, srcCode) <- getSomeReg x
1949 let imm = litToImm lit
1950 code = srcCode `snocOL` ADDIS tmp src (HA imm)
1951 return (Amode (AddrRegImm tmp (LO imm)) code)
1953 getAmode (CmmLit lit)
1955 tmp <- getNewRegNat I32
1956 let imm = litToImm lit
1957 code = unitOL (LIS tmp (HA imm))
1958 return (Amode (AddrRegImm tmp (LO imm)) code)
1960 getAmode (CmmMachOp (MO_Add I32) [x, y])
1962 (regX, codeX) <- getSomeReg x
1963 (regY, codeY) <- getSomeReg y
1964 return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
1968 (reg, code) <- getSomeReg other
1971 return (Amode (AddrRegImm reg off) code)
1972 #endif /* powerpc_TARGET_ARCH */
1974 -- -----------------------------------------------------------------------------
1975 -- getOperand: sometimes any operand will do.
1977 -- getNonClobberedOperand: the value of the operand will remain valid across
1978 -- the computation of an arbitrary expression, unless the expression
1979 -- is computed directly into a register which the operand refers to
1980 -- (see trivialCode where this function is used for an example).
1982 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1984 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1985 #if x86_64_TARGET_ARCH
1986 getNonClobberedOperand (CmmLit lit)
1987 | isSuitableFloatingPointLit lit = do
1988 lbl <- getNewLabelNat
1989 let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
1991 return (OpAddr (ripRel (ImmCLbl lbl)), code)
1993 getNonClobberedOperand (CmmLit lit)
1994 | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) =
1995 return (OpImm (litToImm lit), nilOL)
1996 getNonClobberedOperand (CmmLoad mem pk)
1997 | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
1998 Amode src mem_code <- getAmode mem
2000 if (amodeCouldBeClobbered src)
2002 tmp <- getNewRegNat wordRep
2003 return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
2004 unitOL (LEA I32 (OpAddr src) (OpReg tmp)))
2007 return (OpAddr src', save_code `appOL` mem_code)
2008 getNonClobberedOperand e = do
2009 (reg, code) <- getNonClobberedReg e
2010 return (OpReg reg, code)
2012 amodeCouldBeClobbered :: AddrMode -> Bool
2013 amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
2015 regClobbered (RealReg rr) = isFastTrue (freeReg rr)
2016 regClobbered _ = False
2018 -- getOperand: the operand is not required to remain valid across the
2019 -- computation of an arbitrary expression.
2020 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2021 #if x86_64_TARGET_ARCH
2022 getOperand (CmmLit lit)
2023 | isSuitableFloatingPointLit lit = do
2024 lbl <- getNewLabelNat
2025 let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
2027 return (OpAddr (ripRel (ImmCLbl lbl)), code)
2029 getOperand (CmmLit lit)
2030 | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) = do
2031 return (OpImm (litToImm lit), nilOL)
2032 getOperand (CmmLoad mem pk)
2033 | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2034 Amode src mem_code <- getAmode mem
2035 return (OpAddr src, mem_code)
2037 (reg, code) <- getSomeReg e
2038 return (OpReg reg, code)
2040 isOperand :: CmmExpr -> Bool
2041 isOperand (CmmLoad _ _) = True
2042 isOperand (CmmLit lit) = not (is64BitLit lit)
2043 || isSuitableFloatingPointLit lit
2046 -- if we want a floating-point literal as an operand, we can
2047 -- use it directly from memory. However, if the literal is
2048 -- zero, we're better off generating it into a register using
2050 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
2051 isSuitableFloatingPointLit _ = False
2053 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
2054 getRegOrMem (CmmLoad mem pk)
2055 | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2056 Amode src mem_code <- getAmode mem
2057 return (OpAddr src, mem_code)
2059 (reg, code) <- getNonClobberedReg e
2060 return (OpReg reg, code)
2062 #if x86_64_TARGET_ARCH
2063 is64BitLit (CmmInt i I64) = is64BitInteger i
2064 -- assume that labels are in the range 0-2^31-1: this assumes the
2065 -- small memory model (see gcc docs, -mcmodel=small).
2067 is64BitLit x = False
2070 is64BitInteger :: Integer -> Bool
2071 is64BitInteger i = i64 > 0x7fffffff || i64 < -0x80000000
2072 where i64 = fromIntegral i :: Int64
2073 -- a CmmInt is intended to be truncated to the appropriate
2074 -- number of bits, so here we truncate it to Int64. This is
2075 -- important because e.g. -1 as a CmmInt might be either
2076 -- -1 or 18446744073709551615.
2078 -- -----------------------------------------------------------------------------
2079 -- The 'CondCode' type: Condition codes passed up the tree.
2081 data CondCode = CondCode Bool Cond InstrBlock
2083 -- Set up a condition code for a conditional branch.
2085 getCondCode :: CmmExpr -> NatM CondCode
2087 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2089 #if alpha_TARGET_ARCH
2090 getCondCode = panic "MachCode.getCondCode: not on Alphas"
2091 #endif /* alpha_TARGET_ARCH */
2093 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2095 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
2096 -- yes, they really do seem to want exactly the same!
2098 getCondCode (CmmMachOp mop [x, y])
2101 MO_Eq F32 -> condFltCode EQQ x y
2102 MO_Ne F32 -> condFltCode NE x y
2104 MO_S_Gt F32 -> condFltCode GTT x y
2105 MO_S_Ge F32 -> condFltCode GE x y
2106 MO_S_Lt F32 -> condFltCode LTT x y
2107 MO_S_Le F32 -> condFltCode LE x y
2109 MO_Eq F64 -> condFltCode EQQ x y
2110 MO_Ne F64 -> condFltCode NE x y
2112 MO_S_Gt F64 -> condFltCode GTT x y
2113 MO_S_Ge F64 -> condFltCode GE x y
2114 MO_S_Lt F64 -> condFltCode LTT x y
2115 MO_S_Le F64 -> condFltCode LE x y
2117 MO_Eq rep -> condIntCode EQQ x y
2118 MO_Ne rep -> condIntCode NE x y
2120 MO_S_Gt rep -> condIntCode GTT x y
2121 MO_S_Ge rep -> condIntCode GE x y
2122 MO_S_Lt rep -> condIntCode LTT x y
2123 MO_S_Le rep -> condIntCode LE x y
2125 MO_U_Gt rep -> condIntCode GU x y
2126 MO_U_Ge rep -> condIntCode GEU x y
2127 MO_U_Lt rep -> condIntCode LU x y
2128 MO_U_Le rep -> condIntCode LEU x y
2130 other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y]))
2132 getCondCode other = pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
2134 #elif powerpc_TARGET_ARCH
2136 -- almost the same as everywhere else - but we need to
2137 -- extend small integers to 32 bit first
2139 getCondCode (CmmMachOp mop [x, y])
2141 MO_Eq F32 -> condFltCode EQQ x y
2142 MO_Ne F32 -> condFltCode NE x y
2144 MO_S_Gt F32 -> condFltCode GTT x y
2145 MO_S_Ge F32 -> condFltCode GE x y
2146 MO_S_Lt F32 -> condFltCode LTT x y
2147 MO_S_Le F32 -> condFltCode LE x y
2149 MO_Eq F64 -> condFltCode EQQ x y
2150 MO_Ne F64 -> condFltCode NE x y
2152 MO_S_Gt F64 -> condFltCode GTT x y
2153 MO_S_Ge F64 -> condFltCode GE x y
2154 MO_S_Lt F64 -> condFltCode LTT x y
2155 MO_S_Le F64 -> condFltCode LE x y
2157 MO_Eq rep -> condIntCode EQQ (extendUExpr rep x) (extendUExpr rep y)
2158 MO_Ne rep -> condIntCode NE (extendUExpr rep x) (extendUExpr rep y)
2160 MO_S_Gt rep -> condIntCode GTT (extendSExpr rep x) (extendSExpr rep y)
2161 MO_S_Ge rep -> condIntCode GE (extendSExpr rep x) (extendSExpr rep y)
2162 MO_S_Lt rep -> condIntCode LTT (extendSExpr rep x) (extendSExpr rep y)
2163 MO_S_Le rep -> condIntCode LE (extendSExpr rep x) (extendSExpr rep y)
2165 MO_U_Gt rep -> condIntCode GU (extendUExpr rep x) (extendUExpr rep y)
2166 MO_U_Ge rep -> condIntCode GEU (extendUExpr rep x) (extendUExpr rep y)
2167 MO_U_Lt rep -> condIntCode LU (extendUExpr rep x) (extendUExpr rep y)
2168 MO_U_Le rep -> condIntCode LEU (extendUExpr rep x) (extendUExpr rep y)
2170 other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
2172 getCondCode other = panic "getCondCode(2)(powerpc)"
2178 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
2179 -- passed back up the tree.
2181 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
2183 #if alpha_TARGET_ARCH
2184 condIntCode = panic "MachCode.condIntCode: not on Alphas"
2185 condFltCode = panic "MachCode.condFltCode: not on Alphas"
2186 #endif /* alpha_TARGET_ARCH */
2188 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2189 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2191 -- memory vs immediate
2192 condIntCode cond (CmmLoad x pk) (CmmLit lit) | not (is64BitLit lit) = do
2193 Amode x_addr x_code <- getAmode x
2196 code = x_code `snocOL`
2197 CMP pk (OpImm imm) (OpAddr x_addr)
2199 return (CondCode False cond code)
2202 condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
2203 (x_reg, x_code) <- getSomeReg x
2205 code = x_code `snocOL`
2206 TEST pk (OpReg x_reg) (OpReg x_reg)
2208 return (CondCode False cond code)
2210 -- anything vs operand
2211 condIntCode cond x y | isOperand y = do
2212 (x_reg, x_code) <- getNonClobberedReg x
2213 (y_op, y_code) <- getOperand y
2215 code = x_code `appOL` y_code `snocOL`
2216 CMP (cmmExprRep x) y_op (OpReg x_reg)
2218 return (CondCode False cond code)
2220 -- anything vs anything
2221 condIntCode cond x y = do
2222 (y_reg, y_code) <- getNonClobberedReg y
2223 (x_op, x_code) <- getRegOrMem x
2225 code = y_code `appOL`
2227 CMP (cmmExprRep x) (OpReg y_reg) x_op
2229 return (CondCode False cond code)
2232 #if i386_TARGET_ARCH
2233 condFltCode cond x y
2234 = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
2235 (x_reg, x_code) <- getNonClobberedReg x
2236 (y_reg, y_code) <- getSomeReg y
2238 code = x_code `appOL` y_code `snocOL`
2239 GCMP cond x_reg y_reg
2240 -- The GCMP insn does the test and sets the zero flag if comparable
2241 -- and true. Hence we always supply EQQ as the condition to test.
2242 return (CondCode True EQQ code)
2243 #endif /* i386_TARGET_ARCH */
2245 #if x86_64_TARGET_ARCH
2246 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
2247 -- an operand, but the right must be a reg. We can probably do better
2248 -- than this general case...
2249 condFltCode cond x y = do
2250 (x_reg, x_code) <- getNonClobberedReg x
2251 (y_op, y_code) <- getOperand y
2253 code = x_code `appOL`
2255 CMP (cmmExprRep x) y_op (OpReg x_reg)
2256 -- NB(1): we need to use the unsigned comparison operators on the
2257 -- result of this comparison.
2259 return (CondCode True (condToUnsigned cond) code)
2262 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2264 #if sparc_TARGET_ARCH
2266 condIntCode cond x (CmmLit (CmmInt y rep))
2269 (src1, code) <- getSomeReg x
2271 src2 = ImmInt (fromInteger y)
2272 code' = code `snocOL` SUB False True src1 (RIImm src2) g0
2273 return (CondCode False cond code')
2275 condIntCode cond x y = do
2276 (src1, code1) <- getSomeReg x
2277 (src2, code2) <- getSomeReg y
2279 code__2 = code1 `appOL` code2 `snocOL`
2280 SUB False True src1 (RIReg src2) g0
2281 return (CondCode False cond code__2)
2284 condFltCode cond x y = do
2285 (src1, code1) <- getSomeReg x
2286 (src2, code2) <- getSomeReg y
2287 tmp <- getNewRegNat F64
2289 promote x = FxTOy F32 F64 x tmp
2296 code1 `appOL` code2 `snocOL`
2297 FCMP True pk1 src1 src2
2298 else if pk1 == F32 then
2299 code1 `snocOL` promote src1 `appOL` code2 `snocOL`
2300 FCMP True F64 tmp src2
2302 code1 `appOL` code2 `snocOL` promote src2 `snocOL`
2303 FCMP True F64 src1 tmp
2304 return (CondCode True cond code__2)
2306 #endif /* sparc_TARGET_ARCH */
2308 #if powerpc_TARGET_ARCH
2309 -- ###FIXME: I16 and I8!
2310 condIntCode cond x (CmmLit (CmmInt y rep))
2311 | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
2313 (src1, code) <- getSomeReg x
2315 code' = code `snocOL`
2316 (if condUnsigned cond then CMPL else CMP) I32 src1 (RIImm src2)
2317 return (CondCode False cond code')
2319 condIntCode cond x y = do
2320 (src1, code1) <- getSomeReg x
2321 (src2, code2) <- getSomeReg y
2323 code' = code1 `appOL` code2 `snocOL`
2324 (if condUnsigned cond then CMPL else CMP) I32 src1 (RIReg src2)
2325 return (CondCode False cond code')
2327 condFltCode cond x y = do
2328 (src1, code1) <- getSomeReg x
2329 (src2, code2) <- getSomeReg y
2331 code' = code1 `appOL` code2 `snocOL` FCMP src1 src2
2332 code'' = case cond of -- twiddle CR to handle unordered case
2333 GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
2334 LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
2337 ltbit = 0 ; eqbit = 2 ; gtbit = 1
2338 return (CondCode True cond code'')
2340 #endif /* powerpc_TARGET_ARCH */
2342 -- -----------------------------------------------------------------------------
2343 -- Generating assignments
2345 -- Assignments are really at the heart of the whole code generation
2346 -- business. Almost all top-level nodes of any real importance are
2347 -- assignments, which correspond to loads, stores, or register
2348 -- transfers. If we're really lucky, some of the register transfers
2349 -- will go away, because we can use the destination register to
2350 -- complete the code generation for the right hand side. This only
2351 -- fails when the right hand side is forced into a fixed register
2352 -- (e.g. the result of a call).
2354 assignMem_IntCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2355 assignReg_IntCode :: MachRep -> CmmReg -> CmmExpr -> NatM InstrBlock
2357 assignMem_FltCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2358 assignReg_FltCode :: MachRep -> CmmReg -> CmmExpr -> NatM InstrBlock
2360 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2362 #if alpha_TARGET_ARCH
2364 assignIntCode pk (CmmLoad dst _) src
2365 = getNewRegNat IntRep `thenNat` \ tmp ->
2366 getAmode dst `thenNat` \ amode ->
2367 getRegister src `thenNat` \ register ->
2369 code1 = amodeCode amode []
2370 dst__2 = amodeAddr amode
2371 code2 = registerCode register tmp []
2372 src__2 = registerName register tmp
2373 sz = primRepToSize pk
2374 code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2378 assignIntCode pk dst src
2379 = getRegister dst `thenNat` \ register1 ->
2380 getRegister src `thenNat` \ register2 ->
2382 dst__2 = registerName register1 zeroh
2383 code = registerCode register2 dst__2
2384 src__2 = registerName register2 dst__2
2385 code__2 = if isFixed register2
2386 then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2)
2391 #endif /* alpha_TARGET_ARCH */
2393 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2395 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2397 -- integer assignment to memory
2399 -- specific case of adding/subtracting an integer to a particular address.
2400 -- ToDo: catch other cases where we can use an operation directly on a memory
2402 assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _,
2403 CmmLit (CmmInt i _)])
2404 | addr == addr2, pk /= I64 || not (is64BitInteger i),
2405 Just instr <- check op
2406 = do Amode amode code_addr <- getAmode addr
2407 let code = code_addr `snocOL`
2408 instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode)
2411 check (MO_Add _) = Just ADD
2412 check (MO_Sub _) = Just SUB
2417 assignMem_IntCode pk addr src = do
2418 Amode addr code_addr <- getAmode addr
2419 (code_src, op_src) <- get_op_RI src
2421 code = code_src `appOL`
2423 MOV pk op_src (OpAddr addr)
2424 -- NOTE: op_src is stable, so it will still be valid
2425 -- after code_addr. This may involve the introduction
2426 -- of an extra MOV to a temporary register, but we hope
2427 -- the register allocator will get rid of it.
2431 get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand) -- code, operator
2432 get_op_RI (CmmLit lit) | not (is64BitLit lit)
2433 = return (nilOL, OpImm (litToImm lit))
2435 = do (reg,code) <- getNonClobberedReg op
2436 return (code, OpReg reg)
2439 -- Assign; dst is a reg, rhs is mem
2440 assignReg_IntCode pk reg (CmmLoad src _) = do
2441 load_code <- intLoadCode (MOV pk) src
2442 return (load_code (getRegisterReg reg))
2444 -- dst is a reg, but src could be anything
2445 assignReg_IntCode pk reg src = do
2446 code <- getAnyReg src
2447 return (code (getRegisterReg reg))
2449 #endif /* i386_TARGET_ARCH */
2451 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2453 #if sparc_TARGET_ARCH
2455 assignMem_IntCode pk addr src = do
2456 (srcReg, code) <- getSomeReg src
2457 Amode dstAddr addr_code <- getAmode addr
2458 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2460 assignReg_IntCode pk reg src = do
2461 r <- getRegister src
2463 Any _ code -> code dst
2464 Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg dst) freg
2466 dst = getRegisterReg reg
2469 #endif /* sparc_TARGET_ARCH */
2471 #if powerpc_TARGET_ARCH
2473 assignMem_IntCode pk addr src = do
2474 (srcReg, code) <- getSomeReg src
2475 Amode dstAddr addr_code <- getAmode addr
2476 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2478 -- dst is a reg, but src could be anything
2479 assignReg_IntCode pk reg src
2481 r <- getRegister src
2483 Any _ code -> code dst
2484 Fixed _ freg fcode -> fcode `snocOL` MR dst freg
2486 dst = getRegisterReg reg
2488 #endif /* powerpc_TARGET_ARCH */
2491 -- -----------------------------------------------------------------------------
2492 -- Floating-point assignments
2494 #if alpha_TARGET_ARCH
2496 assignFltCode pk (CmmLoad dst _) src
2497 = getNewRegNat pk `thenNat` \ tmp ->
2498 getAmode dst `thenNat` \ amode ->
2499 getRegister src `thenNat` \ register ->
2501 code1 = amodeCode amode []
2502 dst__2 = amodeAddr amode
2503 code2 = registerCode register tmp []
2504 src__2 = registerName register tmp
2505 sz = primRepToSize pk
2506 code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2510 assignFltCode pk dst src
2511 = getRegister dst `thenNat` \ register1 ->
2512 getRegister src `thenNat` \ register2 ->
2514 dst__2 = registerName register1 zeroh
2515 code = registerCode register2 dst__2
2516 src__2 = registerName register2 dst__2
2517 code__2 = if isFixed register2
2518 then code . mkSeqInstr (FMOV src__2 dst__2)
2523 #endif /* alpha_TARGET_ARCH */
2525 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2527 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2529 -- Floating point assignment to memory
2530 assignMem_FltCode pk addr src = do
2531 (src_reg, src_code) <- getNonClobberedReg src
2532 Amode addr addr_code <- getAmode addr
2534 code = src_code `appOL`
2536 IF_ARCH_i386(GST pk src_reg addr,
2537 MOV pk (OpReg src_reg) (OpAddr addr))
2540 -- Floating point assignment to a register/temporary
2541 assignReg_FltCode pk reg src = do
2542 src_code <- getAnyReg src
2543 return (src_code (getRegisterReg reg))
2545 #endif /* i386_TARGET_ARCH */
2547 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2549 #if sparc_TARGET_ARCH
2551 -- Floating point assignment to memory
2552 assignMem_FltCode pk addr src = do
2553 Amode dst__2 code1 <- getAmode addr
2554 (src__2, code2) <- getSomeReg src
2555 tmp1 <- getNewRegNat pk
2557 pk__2 = cmmExprRep src
2558 code__2 = code1 `appOL` code2 `appOL`
2560 then unitOL (ST pk src__2 dst__2)
2561 else toOL [FxTOy pk__2 pk src__2 tmp1, ST pk tmp1 dst__2]
2564 -- Floating point assignment to a register/temporary
2565 -- ToDo: Verify correctness
2566 assignReg_FltCode pk reg src = do
2567 r <- getRegister src
2568 v1 <- getNewRegNat pk
2570 Any _ code -> code dst
2571 Fixed _ freg fcode -> fcode `snocOL` FMOV pk freg v1
2573 dst = getRegisterReg reg
2575 #endif /* sparc_TARGET_ARCH */
2577 #if powerpc_TARGET_ARCH
2580 assignMem_FltCode = assignMem_IntCode
2581 assignReg_FltCode = assignReg_IntCode
2583 #endif /* powerpc_TARGET_ARCH */
2586 -- -----------------------------------------------------------------------------
2587 -- Generating an non-local jump
2589 -- (If applicable) Do not fill the delay slots here; you will confuse the
2590 -- register allocator.
2592 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
2594 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2596 #if alpha_TARGET_ARCH
2598 genJump (CmmLabel lbl)
2599 | isAsmTemp lbl = returnInstr (BR target)
2600 | otherwise = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0]
2602 target = ImmCLbl lbl
2605 = getRegister tree `thenNat` \ register ->
2606 getNewRegNat PtrRep `thenNat` \ tmp ->
2608 dst = registerName register pv
2609 code = registerCode register pv
2610 target = registerName register pv
2612 if isFixed register then
2613 returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0]
2615 return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0))
2617 #endif /* alpha_TARGET_ARCH */
2619 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2621 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2623 genJump (CmmLoad mem pk) = do
2624 Amode target code <- getAmode mem
2625 return (code `snocOL` JMP (OpAddr target))
2627 genJump (CmmLit lit) = do
2628 return (unitOL (JMP (OpImm (litToImm lit))))
2631 (reg,code) <- getSomeReg expr
2632 return (code `snocOL` JMP (OpReg reg))
2634 #endif /* i386_TARGET_ARCH */
2636 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2638 #if sparc_TARGET_ARCH
2640 genJump (CmmLit (CmmLabel lbl))
2641 = return (toOL [CALL (Left target) 0 True, NOP])
2643 target = ImmCLbl lbl
2647 (target, code) <- getSomeReg tree
2648 return (code `snocOL` JMP (AddrRegReg target g0) `snocOL` NOP)
2650 #endif /* sparc_TARGET_ARCH */
2652 #if powerpc_TARGET_ARCH
2653 genJump (CmmLit (CmmLabel lbl))
2654 = return (unitOL $ JMP lbl)
2658 (target,code) <- getSomeReg tree
2659 return (code `snocOL` MTCTR target `snocOL` BCTR [])
2660 #endif /* powerpc_TARGET_ARCH */
2663 -- -----------------------------------------------------------------------------
2664 -- Unconditional branches
2666 genBranch :: BlockId -> NatM InstrBlock
2668 genBranch = return . toOL . mkBranchInstr
2670 -- -----------------------------------------------------------------------------
2671 -- Conditional jumps
2674 Conditional jumps are always to local labels, so we can use branch
2675 instructions. We peek at the arguments to decide what kind of
2678 ALPHA: For comparisons with 0, we're laughing, because we can just do
2679 the desired conditional branch.
2681 I386: First, we have to ensure that the condition
2682 codes are set according to the supplied comparison operation.
2684 SPARC: First, we have to ensure that the condition codes are set
2685 according to the supplied comparison operation. We generate slightly
2686 different code for floating point comparisons, because a floating
2687 point operation cannot directly precede a @BF@. We assume the worst
2688 and fill that slot with a @NOP@.
2690 SPARC: Do not fill the delay slots here; you will confuse the register
2696 :: BlockId -- the branch target
2697 -> CmmExpr -- the condition on which to branch
2700 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2702 #if alpha_TARGET_ARCH
2704 genCondJump id (StPrim op [x, StInt 0])
2705 = getRegister x `thenNat` \ register ->
2706 getNewRegNat (registerRep register)
2709 code = registerCode register tmp
2710 value = registerName register tmp
2711 pk = registerRep register
2712 target = ImmCLbl lbl
2714 returnSeq code [BI (cmpOp op) value target]
2716 cmpOp CharGtOp = GTT
2718 cmpOp CharEqOp = EQQ
2720 cmpOp CharLtOp = LTT
2729 cmpOp WordGeOp = ALWAYS
2730 cmpOp WordEqOp = EQQ
2732 cmpOp WordLtOp = NEVER
2733 cmpOp WordLeOp = EQQ
2735 cmpOp AddrGeOp = ALWAYS
2736 cmpOp AddrEqOp = EQQ
2738 cmpOp AddrLtOp = NEVER
2739 cmpOp AddrLeOp = EQQ
2741 genCondJump lbl (StPrim op [x, StDouble 0.0])
2742 = getRegister x `thenNat` \ register ->
2743 getNewRegNat (registerRep register)
2746 code = registerCode register tmp
2747 value = registerName register tmp
2748 pk = registerRep register
2749 target = ImmCLbl lbl
2751 return (code . mkSeqInstr (BF (cmpOp op) value target))
2753 cmpOp FloatGtOp = GTT
2754 cmpOp FloatGeOp = GE
2755 cmpOp FloatEqOp = EQQ
2756 cmpOp FloatNeOp = NE
2757 cmpOp FloatLtOp = LTT
2758 cmpOp FloatLeOp = LE
2759 cmpOp DoubleGtOp = GTT
2760 cmpOp DoubleGeOp = GE
2761 cmpOp DoubleEqOp = EQQ
2762 cmpOp DoubleNeOp = NE
2763 cmpOp DoubleLtOp = LTT
2764 cmpOp DoubleLeOp = LE
2766 genCondJump lbl (StPrim op [x, y])
2768 = trivialFCode pr instr x y `thenNat` \ register ->
2769 getNewRegNat F64 `thenNat` \ tmp ->
2771 code = registerCode register tmp
2772 result = registerName register tmp
2773 target = ImmCLbl lbl
2775 return (code . mkSeqInstr (BF cond result target))
2777 pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
2779 fltCmpOp op = case op of
2793 (instr, cond) = case op of
2794 FloatGtOp -> (FCMP TF LE, EQQ)
2795 FloatGeOp -> (FCMP TF LTT, EQQ)
2796 FloatEqOp -> (FCMP TF EQQ, NE)
2797 FloatNeOp -> (FCMP TF EQQ, EQQ)
2798 FloatLtOp -> (FCMP TF LTT, NE)
2799 FloatLeOp -> (FCMP TF LE, NE)
2800 DoubleGtOp -> (FCMP TF LE, EQQ)
2801 DoubleGeOp -> (FCMP TF LTT, EQQ)
2802 DoubleEqOp -> (FCMP TF EQQ, NE)
2803 DoubleNeOp -> (FCMP TF EQQ, EQQ)
2804 DoubleLtOp -> (FCMP TF LTT, NE)
2805 DoubleLeOp -> (FCMP TF LE, NE)
2807 genCondJump lbl (StPrim op [x, y])
2808 = trivialCode instr x y `thenNat` \ register ->
2809 getNewRegNat IntRep `thenNat` \ tmp ->
2811 code = registerCode register tmp
2812 result = registerName register tmp
2813 target = ImmCLbl lbl
2815 return (code . mkSeqInstr (BI cond result target))
2817 (instr, cond) = case op of
2818 CharGtOp -> (CMP LE, EQQ)
2819 CharGeOp -> (CMP LTT, EQQ)
2820 CharEqOp -> (CMP EQQ, NE)
2821 CharNeOp -> (CMP EQQ, EQQ)
2822 CharLtOp -> (CMP LTT, NE)
2823 CharLeOp -> (CMP LE, NE)
2824 IntGtOp -> (CMP LE, EQQ)
2825 IntGeOp -> (CMP LTT, EQQ)
2826 IntEqOp -> (CMP EQQ, NE)
2827 IntNeOp -> (CMP EQQ, EQQ)
2828 IntLtOp -> (CMP LTT, NE)
2829 IntLeOp -> (CMP LE, NE)
2830 WordGtOp -> (CMP ULE, EQQ)
2831 WordGeOp -> (CMP ULT, EQQ)
2832 WordEqOp -> (CMP EQQ, NE)
2833 WordNeOp -> (CMP EQQ, EQQ)
2834 WordLtOp -> (CMP ULT, NE)
2835 WordLeOp -> (CMP ULE, NE)
2836 AddrGtOp -> (CMP ULE, EQQ)
2837 AddrGeOp -> (CMP ULT, EQQ)
2838 AddrEqOp -> (CMP EQQ, NE)
2839 AddrNeOp -> (CMP EQQ, EQQ)
2840 AddrLtOp -> (CMP ULT, NE)
2841 AddrLeOp -> (CMP ULE, NE)
2843 #endif /* alpha_TARGET_ARCH */
2845 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2847 #if i386_TARGET_ARCH
2849 genCondJump id bool = do
2850 CondCode _ cond code <- getCondCode bool
2851 return (code `snocOL` JXX cond id)
2855 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2857 #if x86_64_TARGET_ARCH
2859 genCondJump id bool = do
2860 CondCode is_float cond cond_code <- getCondCode bool
2863 return (cond_code `snocOL` JXX cond id)
2865 lbl <- getBlockIdNat
2867 -- see comment with condFltReg
2868 let code = case cond of
2874 plain_test = unitOL (
2877 or_unordered = toOL [
2881 and_ordered = toOL [
2887 return (cond_code `appOL` code)
2891 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2893 #if sparc_TARGET_ARCH
2895 genCondJump (BlockId id) bool = do
2896 CondCode is_float cond code <- getCondCode bool
2901 then [NOP, BF cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2902 else [BI cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2906 #endif /* sparc_TARGET_ARCH */
2909 #if powerpc_TARGET_ARCH
2911 genCondJump id bool = do
2912 CondCode is_float cond code <- getCondCode bool
2913 return (code `snocOL` BCC cond id)
2915 #endif /* powerpc_TARGET_ARCH */
2918 -- -----------------------------------------------------------------------------
2919 -- Generating C calls
2921 -- Now the biggest nightmare---calls. Most of the nastiness is buried in
2922 -- @get_arg@, which moves the arguments to the correct registers/stack
2923 -- locations. Apart from that, the code is easy.
2925 -- (If applicable) Do not fill the delay slots here; you will confuse the
2926 -- register allocator.
2929 :: CmmCallTarget -- function to call
2930 -> [(CmmReg,MachHint)] -- where to put the result
2931 -> [(CmmExpr,MachHint)] -- arguments (of mixed type)
2932 -> Maybe [GlobalReg] -- volatile regs to save
2935 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2937 #if alpha_TARGET_ARCH
2941 genCCall fn cconv result_regs args
2942 = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args
2943 `thenNat` \ ((unused,_), argCode) ->
2945 nRegs = length allArgRegs - length unused
2946 code = asmSeqThen (map ($ []) argCode)
2949 LDA pv (AddrImm (ImmLab (ptext fn))),
2950 JSR ra (AddrReg pv) nRegs,
2951 LDGP gp (AddrReg ra)]
2953 ------------------------
2954 {- Try to get a value into a specific register (or registers) for
2955 a call. The first 6 arguments go into the appropriate
2956 argument register (separate registers for integer and floating
2957 point arguments, but used in lock-step), and the remaining
2958 arguments are dumped to the stack, beginning at 0(sp). Our
2959 first argument is a pair of the list of remaining argument
2960 registers to be assigned for this call and the next stack
2961 offset to use for overflowing arguments. This way,
2962 @get_Arg@ can be applied to all of a call's arguments using
2966 :: ([(Reg,Reg)], Int) -- Argument registers and stack offset (accumulator)
2967 -> StixTree -- Current argument
2968 -> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code
2970 -- We have to use up all of our argument registers first...
2972 get_arg ((iDst,fDst):dsts, offset) arg
2973 = getRegister arg `thenNat` \ register ->
2975 reg = if isFloatingRep pk then fDst else iDst
2976 code = registerCode register reg
2977 src = registerName register reg
2978 pk = registerRep register
2981 if isFloatingRep pk then
2982 ((dsts, offset), if isFixed register then
2983 code . mkSeqInstr (FMOV src fDst)
2986 ((dsts, offset), if isFixed register then
2987 code . mkSeqInstr (OR src (RIReg src) iDst)
2990 -- Once we have run out of argument registers, we move to the
2993 get_arg ([], offset) arg
2994 = getRegister arg `thenNat` \ register ->
2995 getNewRegNat (registerRep register)
2998 code = registerCode register tmp
2999 src = registerName register tmp
3000 pk = registerRep register
3001 sz = primRepToSize pk
3003 return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset)))
3005 #endif /* alpha_TARGET_ARCH */
3007 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3009 #if i386_TARGET_ARCH
3011 genCCall (CmmPrim MO_WriteBarrier) _ _ _ = return nilOL
3012 -- write barrier compiles to no code on x86/x86-64;
3013 -- we keep it this long in order to prevent earlier optimisations.
3015 -- we only cope with a single result for foreign calls
3016 genCCall (CmmPrim op) [(r,_)] args vols = do
3018 MO_F32_Sqrt -> actuallyInlineFloatOp F32 (GSQRT F32) args
3019 MO_F64_Sqrt -> actuallyInlineFloatOp F64 (GSQRT F64) args
3021 MO_F32_Sin -> actuallyInlineFloatOp F32 (GSIN F32) args
3022 MO_F64_Sin -> actuallyInlineFloatOp F64 (GSIN F64) args
3024 MO_F32_Cos -> actuallyInlineFloatOp F32 (GCOS F32) args
3025 MO_F64_Cos -> actuallyInlineFloatOp F64 (GCOS F64) args
3027 MO_F32_Tan -> actuallyInlineFloatOp F32 (GTAN F32) args
3028 MO_F64_Tan -> actuallyInlineFloatOp F64 (GTAN F64) args
3030 other_op -> outOfLineFloatOp op r args vols
3032 actuallyInlineFloatOp rep instr [(x,_)]
3033 = do res <- trivialUFCode rep instr x
3035 return (any (getRegisterReg r))
3037 genCCall target dest_regs args vols = do
3039 sizes = map (arg_size . cmmExprRep . fst) (reverse args)
3040 #if !darwin_TARGET_OS
3041 tot_arg_size = sum sizes
3043 raw_arg_size = sum sizes
3044 tot_arg_size = roundTo 16 raw_arg_size
3045 arg_pad_size = tot_arg_size - raw_arg_size
3046 delta0 <- getDeltaNat
3047 setDeltaNat (delta0 - arg_pad_size)
3050 push_codes <- mapM push_arg (reverse args)
3051 delta <- getDeltaNat
3054 -- deal with static vs dynamic call targets
3055 (callinsns,cconv) <-
3058 CmmForeignCall (CmmLit (CmmLabel lbl)) conv
3059 -> -- ToDo: stdcall arg sizes
3060 return (unitOL (CALL (Left fn_imm) []), conv)
3061 where fn_imm = ImmCLbl lbl
3062 CmmForeignCall expr conv
3063 -> do (dyn_c, dyn_r, dyn_rep) <- get_op expr
3064 ASSERT(dyn_rep == I32)
3065 return (dyn_c `snocOL` CALL (Right dyn_r) [], conv)
3068 #if darwin_TARGET_OS
3070 = toOL [SUB I32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
3071 DELTA (delta0 - arg_pad_size)]
3072 `appOL` concatOL push_codes
3075 = concatOL push_codes
3076 call = callinsns `appOL`
3078 -- Deallocate parameters after call for ccall;
3079 -- but not for stdcall (callee does it)
3080 (if cconv == StdCallConv || tot_arg_size==0 then [] else
3081 [ADD I32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
3083 [DELTA (delta + tot_arg_size)]
3086 setDeltaNat (delta + tot_arg_size)
3089 -- assign the results, if necessary
3090 assign_code [] = nilOL
3091 assign_code [(dest,_hint)] =
3093 I64 -> toOL [MOV I32 (OpReg eax) (OpReg r_dest),
3094 MOV I32 (OpReg edx) (OpReg r_dest_hi)]
3095 F32 -> unitOL (GMOV fake0 r_dest)
3096 F64 -> unitOL (GMOV fake0 r_dest)
3097 rep -> unitOL (MOV rep (OpReg eax) (OpReg r_dest))
3099 r_dest_hi = getHiVRegFromLo r_dest
3100 rep = cmmRegRep dest
3101 r_dest = getRegisterReg dest
3102 assign_code many = panic "genCCall.assign_code many"
3104 return (push_code `appOL`
3106 assign_code dest_regs)
3114 roundTo a x | x `mod` a == 0 = x
3115 | otherwise = x + a - (x `mod` a)
3118 push_arg :: (CmmExpr,MachHint){-current argument-}
3119 -> NatM InstrBlock -- code
3121 push_arg (arg,_hint) -- we don't need the hints on x86
3122 | arg_rep == I64 = do
3123 ChildCode64 code r_lo <- iselExpr64 arg
3124 delta <- getDeltaNat
3125 setDeltaNat (delta - 8)
3127 r_hi = getHiVRegFromLo r_lo
3129 return ( code `appOL`
3130 toOL [PUSH I32 (OpReg r_hi), DELTA (delta - 4),
3131 PUSH I32 (OpReg r_lo), DELTA (delta - 8),
3136 (code, reg, sz) <- get_op arg
3137 delta <- getDeltaNat
3138 let size = arg_size sz
3139 setDeltaNat (delta-size)
3140 if (case sz of F64 -> True; F32 -> True; _ -> False)
3141 then return (code `appOL`
3142 toOL [SUB I32 (OpImm (ImmInt size)) (OpReg esp),
3144 GST sz reg (AddrBaseIndex (EABaseReg esp)
3148 else return (code `snocOL`
3149 PUSH I32 (OpReg reg) `snocOL`
3153 arg_rep = cmmExprRep arg
3156 get_op :: CmmExpr -> NatM (InstrBlock, Reg, MachRep) -- code, reg, size
3158 (reg,code) <- getSomeReg op
3159 return (code, reg, cmmExprRep op)
3161 #endif /* i386_TARGET_ARCH */
3163 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3165 outOfLineFloatOp :: CallishMachOp -> CmmReg -> [(CmmExpr,MachHint)]
3166 -> Maybe [GlobalReg] -> NatM InstrBlock
3167 outOfLineFloatOp mop res args vols
3169 targetExpr <- cmmMakeDynamicReference addImportNat CallReference lbl
3170 let target = CmmForeignCall targetExpr CCallConv
3172 if cmmRegRep res == F64
3174 stmtToInstrs (CmmCall target [(res,FloatHint)] args vols)
3178 tmp = CmmLocal (LocalReg uq F64)
3180 code1 <- stmtToInstrs (CmmCall target [(tmp,FloatHint)] args vols)
3181 code2 <- stmtToInstrs (CmmAssign res (CmmReg tmp))
3182 return (code1 `appOL` code2)
3184 lbl = mkForeignLabel fn Nothing False
3187 MO_F32_Sqrt -> FSLIT("sqrtf")
3188 MO_F32_Sin -> FSLIT("sinf")
3189 MO_F32_Cos -> FSLIT("cosf")
3190 MO_F32_Tan -> FSLIT("tanf")
3191 MO_F32_Exp -> FSLIT("expf")
3192 MO_F32_Log -> FSLIT("logf")
3194 MO_F32_Asin -> FSLIT("asinf")
3195 MO_F32_Acos -> FSLIT("acosf")
3196 MO_F32_Atan -> FSLIT("atanf")
3198 MO_F32_Sinh -> FSLIT("sinhf")
3199 MO_F32_Cosh -> FSLIT("coshf")
3200 MO_F32_Tanh -> FSLIT("tanhf")
3201 MO_F32_Pwr -> FSLIT("powf")
3203 MO_F64_Sqrt -> FSLIT("sqrt")
3204 MO_F64_Sin -> FSLIT("sin")
3205 MO_F64_Cos -> FSLIT("cos")
3206 MO_F64_Tan -> FSLIT("tan")
3207 MO_F64_Exp -> FSLIT("exp")
3208 MO_F64_Log -> FSLIT("log")
3210 MO_F64_Asin -> FSLIT("asin")
3211 MO_F64_Acos -> FSLIT("acos")
3212 MO_F64_Atan -> FSLIT("atan")
3214 MO_F64_Sinh -> FSLIT("sinh")
3215 MO_F64_Cosh -> FSLIT("cosh")
3216 MO_F64_Tanh -> FSLIT("tanh")
3217 MO_F64_Pwr -> FSLIT("pow")
3219 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
3221 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3223 #if x86_64_TARGET_ARCH
3225 genCCall (CmmPrim MO_WriteBarrier) _ _ _ = return nilOL
3226 -- write barrier compiles to no code on x86/x86-64;
3227 -- we keep it this long in order to prevent earlier optimisations.
3229 genCCall (CmmPrim op) [(r,_)] args vols =
3230 outOfLineFloatOp op r args vols
3232 genCCall target dest_regs args vols = do
3234 -- load up the register arguments
3235 (stack_args, aregs, fregs, load_args_code)
3236 <- load_args args allArgRegs allFPArgRegs nilOL
3239 fp_regs_used = reverse (drop (length fregs) (reverse allFPArgRegs))
3240 int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
3241 arg_regs = [eax] ++ int_regs_used ++ fp_regs_used
3242 -- for annotating the call instruction with
3244 sse_regs = length fp_regs_used
3246 tot_arg_size = arg_size * length stack_args
3248 -- On entry to the called function, %rsp should be aligned
3249 -- on a 16-byte boundary +8 (i.e. the first stack arg after
3250 -- the return address is 16-byte aligned). In STG land
3251 -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
3252 -- need to make sure we push a multiple of 16-bytes of args,
3253 -- plus the return address, to get the correct alignment.
3254 -- Urg, this is hard. We need to feed the delta back into
3255 -- the arg pushing code.
3256 (real_size, adjust_rsp) <-
3257 if tot_arg_size `rem` 16 == 0
3258 then return (tot_arg_size, nilOL)
3259 else do -- we need to adjust...
3260 delta <- getDeltaNat
3261 setDeltaNat (delta-8)
3262 return (tot_arg_size+8, toOL [
3263 SUB I64 (OpImm (ImmInt 8)) (OpReg rsp),
3267 -- push the stack args, right to left
3268 push_code <- push_args (reverse stack_args) nilOL
3269 delta <- getDeltaNat
3271 -- deal with static vs dynamic call targets
3272 (callinsns,cconv) <-
3275 CmmForeignCall (CmmLit (CmmLabel lbl)) conv
3276 -> -- ToDo: stdcall arg sizes
3277 return (unitOL (CALL (Left fn_imm) arg_regs), conv)
3278 where fn_imm = ImmCLbl lbl
3279 CmmForeignCall expr conv
3280 -> do (dyn_r, dyn_c) <- getSomeReg expr
3281 return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
3284 -- The x86_64 ABI requires us to set %al to the number of SSE
3285 -- registers that contain arguments, if the called routine
3286 -- is a varargs function. We don't know whether it's a
3287 -- varargs function or not, so we have to assume it is.
3289 -- It's not safe to omit this assignment, even if the number
3290 -- of SSE regs in use is zero. If %al is larger than 8
3291 -- on entry to a varargs function, seg faults ensue.
3292 assign_eax n = unitOL (MOV I32 (OpImm (ImmInt n)) (OpReg eax))
3294 let call = callinsns `appOL`
3296 -- Deallocate parameters after call for ccall;
3297 -- but not for stdcall (callee does it)
3298 (if cconv == StdCallConv || real_size==0 then [] else
3299 [ADD wordRep (OpImm (ImmInt real_size)) (OpReg esp)])
3301 [DELTA (delta + real_size)]
3304 setDeltaNat (delta + real_size)
3307 -- assign the results, if necessary
3308 assign_code [] = nilOL
3309 assign_code [(dest,_hint)] =
3311 F32 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3312 F64 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3313 rep -> unitOL (MOV rep (OpReg rax) (OpReg r_dest))
3315 rep = cmmRegRep dest
3316 r_dest = getRegisterReg dest
3317 assign_code many = panic "genCCall.assign_code many"
3319 return (load_args_code `appOL`
3322 assign_eax sse_regs `appOL`
3324 assign_code dest_regs)
3327 arg_size = 8 -- always, at the mo
3329 load_args :: [(CmmExpr,MachHint)]
3330 -> [Reg] -- int regs avail for args
3331 -> [Reg] -- FP regs avail for args
3333 -> NatM ([(CmmExpr,MachHint)],[Reg],[Reg],InstrBlock)
3334 load_args args [] [] code = return (args, [], [], code)
3335 -- no more regs to use
3336 load_args [] aregs fregs code = return ([], aregs, fregs, code)
3337 -- no more args to push
3338 load_args ((arg,hint) : rest) aregs fregs code
3339 | isFloatingRep arg_rep =
3343 arg_code <- getAnyReg arg
3344 load_args rest aregs rs (code `appOL` arg_code r)
3349 arg_code <- getAnyReg arg
3350 load_args rest rs fregs (code `appOL` arg_code r)
3352 arg_rep = cmmExprRep arg
3355 (args',ars,frs,code') <- load_args rest aregs fregs code
3356 return ((arg,hint):args', ars, frs, code')
3358 push_args [] code = return code
3359 push_args ((arg,hint):rest) code
3360 | isFloatingRep arg_rep = do
3361 (arg_reg, arg_code) <- getSomeReg arg
3362 delta <- getDeltaNat
3363 setDeltaNat (delta-arg_size)
3364 let code' = code `appOL` arg_code `appOL` toOL [
3365 SUB wordRep (OpImm (ImmInt arg_size)) (OpReg rsp) ,
3366 DELTA (delta-arg_size),
3367 MOV arg_rep (OpReg arg_reg) (OpAddr (spRel 0))]
3368 push_args rest code'
3371 -- we only ever generate word-sized function arguments. Promotion
3372 -- has already happened: our Int8# type is kept sign-extended
3373 -- in an Int#, for example.
3374 ASSERT(arg_rep == I64) return ()
3375 (arg_op, arg_code) <- getOperand arg
3376 delta <- getDeltaNat
3377 setDeltaNat (delta-arg_size)
3378 let code' = code `appOL` toOL [PUSH I64 arg_op,
3379 DELTA (delta-arg_size)]
3380 push_args rest code'
3382 arg_rep = cmmExprRep arg
3385 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3387 #if sparc_TARGET_ARCH
3389 The SPARC calling convention is an absolute
3390 nightmare. The first 6x32 bits of arguments are mapped into
3391 %o0 through %o5, and the remaining arguments are dumped to the
3392 stack, beginning at [%sp+92]. (Note that %o6 == %sp.)
3394 If we have to put args on the stack, move %o6==%sp down by
3395 the number of words to go on the stack, to ensure there's enough space.
3397 According to Fraser and Hanson's lcc book, page 478, fig 17.2,
3398 16 words above the stack pointer is a word for the address of
3399 a structure return value. I use this as a temporary location
3400 for moving values from float to int regs. Certainly it isn't
3401 safe to put anything in the 16 words starting at %sp, since
3402 this area can get trashed at any time due to window overflows
3403 caused by signal handlers.
3405 A final complication (if the above isn't enough) is that
3406 we can't blithely calculate the arguments one by one into
3407 %o0 .. %o5. Consider the following nested calls:
3411 Naive code moves a into %o0, and (fff b c) into %o1. Unfortunately
3412 the inner call will itself use %o0, which trashes the value put there
3413 in preparation for the outer call. Upshot: we need to calculate the
3414 args into temporary regs, and move those to arg regs or onto the
3415 stack only immediately prior to the call proper. Sigh.
3418 genCCall target dest_regs argsAndHints vols = do
3420 args = map fst argsAndHints
3421 argcode_and_vregs <- mapM arg_to_int_vregs args
3423 (argcodes, vregss) = unzip argcode_and_vregs
3424 n_argRegs = length allArgRegs
3425 n_argRegs_used = min (length vregs) n_argRegs
3426 vregs = concat vregss
3427 -- deal with static vs dynamic call targets
3428 callinsns <- (case target of
3429 CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> do
3430 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3431 CmmForeignCall expr conv -> do
3432 (dyn_c, [dyn_r]) <- arg_to_int_vregs expr
3433 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3435 (res, reduce) <- outOfLineFloatOp mop
3436 lblOrMopExpr <- case res of
3438 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3440 (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
3441 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3442 if reduce then panic "genCCall(sparc): can not reduce" else return lblOrMopExpr
3446 argcode = concatOL argcodes
3447 (move_sp_down, move_sp_up)
3448 = let diff = length vregs - n_argRegs
3449 nn = if odd diff then diff + 1 else diff -- keep 8-byte alignment
3452 else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
3454 = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
3455 return (argcode `appOL`
3456 move_sp_down `appOL`
3457 transfer_code `appOL`
3462 -- move args from the integer vregs into which they have been
3463 -- marshalled, into %o0 .. %o5, and the rest onto the stack.
3464 move_final :: [Reg] -> [Reg] -> Int -> [Instr]
3466 move_final [] _ offset -- all args done
3469 move_final (v:vs) [] offset -- out of aregs; move to stack
3470 = ST I32 v (spRel offset)
3471 : move_final vs [] (offset+1)
3473 move_final (v:vs) (a:az) offset -- move into an arg (%o[0..5]) reg
3474 = OR False g0 (RIReg v) a
3475 : move_final vs az offset
3477 -- generate code to calculate an argument, and move it into one
3478 -- or two integer vregs.
3479 arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
3480 arg_to_int_vregs arg
3481 | (cmmExprRep arg) == I64
3483 (ChildCode64 code r_lo) <- iselExpr64 arg
3485 r_hi = getHiVRegFromLo r_lo
3486 return (code, [r_hi, r_lo])
3489 (src, code) <- getSomeReg arg
3490 tmp <- getNewRegNat (cmmExprRep arg)
3495 v1 <- getNewRegNat I32
3496 v2 <- getNewRegNat I32
3499 FMOV F64 src f0 `snocOL`
3500 ST F32 f0 (spRel 16) `snocOL`
3501 LD I32 (spRel 16) v1 `snocOL`
3502 ST F32 (fPair f0) (spRel 16) `snocOL`
3503 LD I32 (spRel 16) v2
3508 v1 <- getNewRegNat I32
3511 ST F32 src (spRel 16) `snocOL`
3512 LD I32 (spRel 16) v1
3517 v1 <- getNewRegNat I32
3519 code `snocOL` OR False g0 (RIReg src) v1
3523 outOfLineFloatOp mop =
3525 mopExpr <- cmmMakeDynamicReference addImportNat CallReference $
3526 mkForeignLabel functionName Nothing True
3527 let mopLabelOrExpr = case mopExpr of
3528 CmmLit (CmmLabel lbl) -> Left lbl
3530 return (mopLabelOrExpr, reduce)
3532 (reduce, functionName) = case mop of
3533 MO_F32_Exp -> (True, FSLIT("exp"))
3534 MO_F32_Log -> (True, FSLIT("log"))
3535 MO_F32_Sqrt -> (True, FSLIT("sqrt"))
3537 MO_F32_Sin -> (True, FSLIT("sin"))
3538 MO_F32_Cos -> (True, FSLIT("cos"))
3539 MO_F32_Tan -> (True, FSLIT("tan"))
3541 MO_F32_Asin -> (True, FSLIT("asin"))
3542 MO_F32_Acos -> (True, FSLIT("acos"))
3543 MO_F32_Atan -> (True, FSLIT("atan"))
3545 MO_F32_Sinh -> (True, FSLIT("sinh"))
3546 MO_F32_Cosh -> (True, FSLIT("cosh"))
3547 MO_F32_Tanh -> (True, FSLIT("tanh"))
3549 MO_F64_Exp -> (False, FSLIT("exp"))
3550 MO_F64_Log -> (False, FSLIT("log"))
3551 MO_F64_Sqrt -> (False, FSLIT("sqrt"))
3553 MO_F64_Sin -> (False, FSLIT("sin"))
3554 MO_F64_Cos -> (False, FSLIT("cos"))
3555 MO_F64_Tan -> (False, FSLIT("tan"))
3557 MO_F64_Asin -> (False, FSLIT("asin"))
3558 MO_F64_Acos -> (False, FSLIT("acos"))
3559 MO_F64_Atan -> (False, FSLIT("atan"))
3561 MO_F64_Sinh -> (False, FSLIT("sinh"))
3562 MO_F64_Cosh -> (False, FSLIT("cosh"))
3563 MO_F64_Tanh -> (False, FSLIT("tanh"))
3565 other -> pprPanic "outOfLineFloatOp(sparc) "
3566 (pprCallishMachOp mop)
3568 #endif /* sparc_TARGET_ARCH */
3570 #if powerpc_TARGET_ARCH
3572 #if darwin_TARGET_OS || linux_TARGET_OS
3574 The PowerPC calling convention for Darwin/Mac OS X
3575 is described in Apple's document
3576 "Inside Mac OS X - Mach-O Runtime Architecture".
3578 PowerPC Linux uses the System V Release 4 Calling Convention
3579 for PowerPC. It is described in the
3580 "System V Application Binary Interface PowerPC Processor Supplement".
3582 Both conventions are similar:
3583 Parameters may be passed in general-purpose registers starting at r3, in
3584 floating point registers starting at f1, or on the stack.
3586 But there are substantial differences:
3587 * The number of registers used for parameter passing and the exact set of
3588 nonvolatile registers differs (see MachRegs.lhs).
3589 * On Darwin, stack space is always reserved for parameters, even if they are
3590 passed in registers. The called routine may choose to save parameters from
3591 registers to the corresponding space on the stack.
3592 * On Darwin, a corresponding amount of GPRs is skipped when a floating point
3593 parameter is passed in an FPR.
3594 * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
3595 starting with an odd-numbered GPR. It may skip a GPR to achieve this.
3596 Darwin just treats an I64 like two separate I32s (high word first).
3597 * I64 and F64 arguments are 8-byte aligned on the stack for SysV, but only
3598 4-byte aligned like everything else on Darwin.
3599 * The SysV spec claims that F32 is represented as F64 on the stack. GCC on
3600 PowerPC Linux does not agree, so neither do we.
3602 According to both conventions, The parameter area should be part of the
3603 caller's stack frame, allocated in the caller's prologue code (large enough
3604 to hold the parameter lists for all called routines). The NCG already
3605 uses the stack for register spilling, leaving 64 bytes free at the top.
3606 If we need a larger parameter area than that, we just allocate a new stack
3607 frame just before ccalling.
3611 genCCall (CmmPrim MO_WriteBarrier) _ _ _
3612 = return $ unitOL LWSYNC
3614 genCCall target dest_regs argsAndHints vols
3615 = ASSERT (not $ any (`elem` [I8,I16]) argReps)
3616 -- we rely on argument promotion in the codeGen
3618 (finalStack,passArgumentsCode,usedRegs) <- passArguments
3620 allArgRegs allFPArgRegs
3624 (labelOrExpr, reduceToF32) <- case target of
3625 CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
3626 CmmForeignCall expr conv -> return (Right expr, False)
3627 CmmPrim mop -> outOfLineFloatOp mop
3629 let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
3630 codeAfter = move_sp_up finalStack `appOL` moveResult reduceToF32
3635 `snocOL` BL lbl usedRegs
3638 (dynReg, dynCode) <- getSomeReg dyn
3640 `snocOL` MTCTR dynReg
3642 `snocOL` BCTRL usedRegs
3645 #if darwin_TARGET_OS
3646 initialStackOffset = 24
3647 -- size of linkage area + size of arguments, in bytes
3648 stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
3649 map machRepByteWidth argReps
3650 #elif linux_TARGET_OS
3651 initialStackOffset = 8
3652 stackDelta finalStack = roundTo 16 finalStack
3654 args = map fst argsAndHints
3655 argReps = map cmmExprRep args
3657 roundTo a x | x `mod` a == 0 = x
3658 | otherwise = x + a - (x `mod` a)
3660 move_sp_down finalStack
3662 toOL [STU I32 sp (AddrRegImm sp (ImmInt (-delta))),
3665 where delta = stackDelta finalStack
3666 move_sp_up finalStack
3668 toOL [ADD sp sp (RIImm (ImmInt delta)),
3671 where delta = stackDelta finalStack
3674 passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
3675 passArguments ((arg,I64):args) gprs fprs stackOffset
3676 accumCode accumUsed =
3678 ChildCode64 code vr_lo <- iselExpr64 arg
3679 let vr_hi = getHiVRegFromLo vr_lo
3681 #if darwin_TARGET_OS
3686 (accumCode `appOL` code
3687 `snocOL` storeWord vr_hi gprs stackOffset
3688 `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
3689 ((take 2 gprs) ++ accumUsed)
3691 storeWord vr (gpr:_) offset = MR gpr vr
3692 storeWord vr [] offset = ST I32 vr (AddrRegImm sp (ImmInt offset))
3694 #elif linux_TARGET_OS
3695 let stackOffset' = roundTo 8 stackOffset
3696 stackCode = accumCode `appOL` code
3697 `snocOL` ST I32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
3698 `snocOL` ST I32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
3699 regCode hireg loreg =
3700 accumCode `appOL` code
3701 `snocOL` MR hireg vr_hi
3702 `snocOL` MR loreg vr_lo
3705 hireg : loreg : regs | even (length gprs) ->
3706 passArguments args regs fprs stackOffset
3707 (regCode hireg loreg) (hireg : loreg : accumUsed)
3708 _skipped : hireg : loreg : regs ->
3709 passArguments args regs fprs stackOffset
3710 (regCode hireg loreg) (hireg : loreg : accumUsed)
3711 _ -> -- only one or no regs left
3712 passArguments args [] fprs (stackOffset'+8)
3716 passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
3717 | reg : _ <- regs = do
3718 register <- getRegister arg
3719 let code = case register of
3720 Fixed _ freg fcode -> fcode `snocOL` MR reg freg
3721 Any _ acode -> acode reg
3725 #if darwin_TARGET_OS
3726 -- The Darwin ABI requires that we reserve stack slots for register parameters
3727 (stackOffset + stackBytes)
3728 #elif linux_TARGET_OS
3729 -- ... the SysV ABI doesn't.
3732 (accumCode `appOL` code)
3735 (vr, code) <- getSomeReg arg
3739 (stackOffset' + stackBytes)
3740 (accumCode `appOL` code `snocOL` ST rep vr stackSlot)
3743 #if darwin_TARGET_OS
3744 -- stackOffset is at least 4-byte aligned
3745 -- The Darwin ABI is happy with that.
3746 stackOffset' = stackOffset
3748 -- ... the SysV ABI requires 8-byte alignment for doubles.
3749 stackOffset' | rep == F64 = roundTo 8 stackOffset
3750 | otherwise = stackOffset
3752 stackSlot = AddrRegImm sp (ImmInt stackOffset')
3753 (nGprs, nFprs, stackBytes, regs) = case rep of
3754 I32 -> (1, 0, 4, gprs)
3755 #if darwin_TARGET_OS
3756 -- The Darwin ABI requires that we skip a corresponding number of GPRs when
3758 F32 -> (1, 1, 4, fprs)
3759 F64 -> (2, 1, 8, fprs)
3760 #elif linux_TARGET_OS
3761 -- ... the SysV ABI doesn't.
3762 F32 -> (0, 1, 4, fprs)
3763 F64 -> (0, 1, 8, fprs)
3766 moveResult reduceToF32 =
3770 | reduceToF32 && rep == F32 -> unitOL (FRSP r_dest f1)
3771 | rep == F32 || rep == F64 -> unitOL (MR r_dest f1)
3772 | rep == I64 -> toOL [MR (getHiVRegFromLo r_dest) r3,
3774 | otherwise -> unitOL (MR r_dest r3)
3775 where rep = cmmRegRep dest
3776 r_dest = getRegisterReg dest
3778 outOfLineFloatOp mop =
3780 mopExpr <- cmmMakeDynamicReference addImportNat CallReference $
3781 mkForeignLabel functionName Nothing True
3782 let mopLabelOrExpr = case mopExpr of
3783 CmmLit (CmmLabel lbl) -> Left lbl
3785 return (mopLabelOrExpr, reduce)
3787 (functionName, reduce) = case mop of
3788 MO_F32_Exp -> (FSLIT("exp"), True)
3789 MO_F32_Log -> (FSLIT("log"), True)
3790 MO_F32_Sqrt -> (FSLIT("sqrt"), True)
3792 MO_F32_Sin -> (FSLIT("sin"), True)
3793 MO_F32_Cos -> (FSLIT("cos"), True)
3794 MO_F32_Tan -> (FSLIT("tan"), True)
3796 MO_F32_Asin -> (FSLIT("asin"), True)
3797 MO_F32_Acos -> (FSLIT("acos"), True)
3798 MO_F32_Atan -> (FSLIT("atan"), True)
3800 MO_F32_Sinh -> (FSLIT("sinh"), True)
3801 MO_F32_Cosh -> (FSLIT("cosh"), True)
3802 MO_F32_Tanh -> (FSLIT("tanh"), True)
3803 MO_F32_Pwr -> (FSLIT("pow"), True)
3805 MO_F64_Exp -> (FSLIT("exp"), False)
3806 MO_F64_Log -> (FSLIT("log"), False)
3807 MO_F64_Sqrt -> (FSLIT("sqrt"), False)
3809 MO_F64_Sin -> (FSLIT("sin"), False)
3810 MO_F64_Cos -> (FSLIT("cos"), False)
3811 MO_F64_Tan -> (FSLIT("tan"), False)
3813 MO_F64_Asin -> (FSLIT("asin"), False)
3814 MO_F64_Acos -> (FSLIT("acos"), False)
3815 MO_F64_Atan -> (FSLIT("atan"), False)
3817 MO_F64_Sinh -> (FSLIT("sinh"), False)
3818 MO_F64_Cosh -> (FSLIT("cosh"), False)
3819 MO_F64_Tanh -> (FSLIT("tanh"), False)
3820 MO_F64_Pwr -> (FSLIT("pow"), False)
3821 other -> pprPanic "genCCall(ppc): unknown callish op"
3822 (pprCallishMachOp other)
3824 #endif /* darwin_TARGET_OS || linux_TARGET_OS */
3826 #endif /* powerpc_TARGET_ARCH */
3829 -- -----------------------------------------------------------------------------
3830 -- Generating a table-branch
3832 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
3834 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3838 (reg,e_code) <- getSomeReg expr
3839 lbl <- getNewLabelNat
3840 dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
3841 (tableReg,t_code) <- getSomeReg $ dynRef
3843 jumpTable = map jumpTableEntryRel ids
3845 jumpTableEntryRel Nothing
3846 = CmmStaticLit (CmmInt 0 wordRep)
3847 jumpTableEntryRel (Just (BlockId id))
3848 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3849 where blockLabel = mkAsmTempLabel id
3851 op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
3852 (EAIndex reg wORD_SIZE) (ImmInt 0))
3854 #if x86_64_TARGET_ARCH && darwin_TARGET_OS
3855 -- on Mac OS X/x86_64, put the jump table in the text section
3856 -- to work around a limitation of the linker.
3857 -- ld64 is unable to handle the relocations for
3859 -- if L0 is not preceded by a non-anonymous label in its section.
3861 code = e_code `appOL` t_code `appOL` toOL [
3862 ADD wordRep op (OpReg tableReg),
3863 JMP_TBL (OpReg tableReg) [ id | Just id <- ids ],
3864 LDATA Text (CmmDataLabel lbl : jumpTable)
3867 code = e_code `appOL` t_code `appOL` toOL [
3868 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3869 ADD wordRep op (OpReg tableReg),
3870 JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
3876 (reg,e_code) <- getSomeReg expr
3877 lbl <- getNewLabelNat
3879 jumpTable = map jumpTableEntry ids
3880 op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
3881 code = e_code `appOL` toOL [
3882 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3883 JMP_TBL op [ id | Just id <- ids ]
3887 #elif powerpc_TARGET_ARCH
3891 (reg,e_code) <- getSomeReg expr
3892 tmp <- getNewRegNat I32
3893 lbl <- getNewLabelNat
3894 dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
3895 (tableReg,t_code) <- getSomeReg $ dynRef
3897 jumpTable = map jumpTableEntryRel ids
3899 jumpTableEntryRel Nothing
3900 = CmmStaticLit (CmmInt 0 wordRep)
3901 jumpTableEntryRel (Just (BlockId id))
3902 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3903 where blockLabel = mkAsmTempLabel id
3905 code = e_code `appOL` t_code `appOL` toOL [
3906 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3907 SLW tmp reg (RIImm (ImmInt 2)),
3908 LD I32 tmp (AddrRegReg tableReg tmp),
3909 ADD tmp tmp (RIReg tableReg),
3911 BCTR [ id | Just id <- ids ]
3916 (reg,e_code) <- getSomeReg expr
3917 tmp <- getNewRegNat I32
3918 lbl <- getNewLabelNat
3920 jumpTable = map jumpTableEntry ids
3922 code = e_code `appOL` toOL [
3923 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3924 SLW tmp reg (RIImm (ImmInt 2)),
3925 ADDIS tmp tmp (HA (ImmCLbl lbl)),
3926 LD I32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
3928 BCTR [ id | Just id <- ids ]
3932 genSwitch expr ids = panic "ToDo: genSwitch"
3935 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordRep)
3936 jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
3937 where blockLabel = mkAsmTempLabel id
3939 -- -----------------------------------------------------------------------------
3941 -- -----------------------------------------------------------------------------
3944 -- -----------------------------------------------------------------------------
3945 -- 'condIntReg' and 'condFltReg': condition codes into registers
3947 -- Turn those condition codes into integers now (when they appear on
3948 -- the right hand side of an assignment).
3950 -- (If applicable) Do not fill the delay slots here; you will confuse the
3951 -- register allocator.
3953 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
3955 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3957 #if alpha_TARGET_ARCH
3958 condIntReg = panic "MachCode.condIntReg (not on Alpha)"
3959 condFltReg = panic "MachCode.condFltReg (not on Alpha)"
3960 #endif /* alpha_TARGET_ARCH */
3962 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3964 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3966 condIntReg cond x y = do
3967 CondCode _ cond cond_code <- condIntCode cond x y
3968 tmp <- getNewRegNat I8
3970 code dst = cond_code `appOL` toOL [
3971 SETCC cond (OpReg tmp),
3972 MOVZxL I8 (OpReg tmp) (OpReg dst)
3975 return (Any I32 code)
3979 #if i386_TARGET_ARCH
3981 condFltReg cond x y = do
3982 CondCode _ cond cond_code <- condFltCode cond x y
3983 tmp <- getNewRegNat I8
3985 code dst = cond_code `appOL` toOL [
3986 SETCC cond (OpReg tmp),
3987 MOVZxL I8 (OpReg tmp) (OpReg dst)
3990 return (Any I32 code)
3994 #if x86_64_TARGET_ARCH
3996 condFltReg cond x y = do
3997 CondCode _ cond cond_code <- condFltCode cond x y
3998 tmp1 <- getNewRegNat wordRep
3999 tmp2 <- getNewRegNat wordRep
4001 -- We have to worry about unordered operands (eg. comparisons
4002 -- against NaN). If the operands are unordered, the comparison
4003 -- sets the parity flag, carry flag and zero flag.
4004 -- All comparisons are supposed to return false for unordered
4005 -- operands except for !=, which returns true.
4007 -- Optimisation: we don't have to test the parity flag if we
4008 -- know the test has already excluded the unordered case: eg >
4009 -- and >= test for a zero carry flag, which can only occur for
4010 -- ordered operands.
4012 -- ToDo: by reversing comparisons we could avoid testing the
4013 -- parity flag in more cases.
4018 NE -> or_unordered dst
4019 GU -> plain_test dst
4020 GEU -> plain_test dst
4021 _ -> and_ordered dst)
4023 plain_test dst = toOL [
4024 SETCC cond (OpReg tmp1),
4025 MOVZxL I8 (OpReg tmp1) (OpReg dst)
4027 or_unordered dst = toOL [
4028 SETCC cond (OpReg tmp1),
4029 SETCC PARITY (OpReg tmp2),
4030 OR I8 (OpReg tmp1) (OpReg tmp2),
4031 MOVZxL I8 (OpReg tmp2) (OpReg dst)
4033 and_ordered dst = toOL [
4034 SETCC cond (OpReg tmp1),
4035 SETCC NOTPARITY (OpReg tmp2),
4036 AND I8 (OpReg tmp1) (OpReg tmp2),
4037 MOVZxL I8 (OpReg tmp2) (OpReg dst)
4040 return (Any I32 code)
4044 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4046 #if sparc_TARGET_ARCH
4048 condIntReg EQQ x (CmmLit (CmmInt 0 d)) = do
4049 (src, code) <- getSomeReg x
4050 tmp <- getNewRegNat I32
4052 code__2 dst = code `appOL` toOL [
4053 SUB False True g0 (RIReg src) g0,
4054 SUB True False g0 (RIImm (ImmInt (-1))) dst]
4055 return (Any I32 code__2)
4057 condIntReg EQQ x y = do
4058 (src1, code1) <- getSomeReg x
4059 (src2, code2) <- getSomeReg y
4060 tmp1 <- getNewRegNat I32
4061 tmp2 <- getNewRegNat I32
4063 code__2 dst = code1 `appOL` code2 `appOL` toOL [
4064 XOR False src1 (RIReg src2) dst,
4065 SUB False True g0 (RIReg dst) g0,
4066 SUB True False g0 (RIImm (ImmInt (-1))) dst]
4067 return (Any I32 code__2)
4069 condIntReg NE x (CmmLit (CmmInt 0 d)) = do
4070 (src, code) <- getSomeReg x
4071 tmp <- getNewRegNat I32
4073 code__2 dst = code `appOL` toOL [
4074 SUB False True g0 (RIReg src) g0,
4075 ADD True False g0 (RIImm (ImmInt 0)) dst]
4076 return (Any I32 code__2)
4078 condIntReg NE x y = do
4079 (src1, code1) <- getSomeReg x
4080 (src2, code2) <- getSomeReg y
4081 tmp1 <- getNewRegNat I32
4082 tmp2 <- getNewRegNat I32
4084 code__2 dst = code1 `appOL` code2 `appOL` toOL [
4085 XOR False src1 (RIReg src2) dst,
4086 SUB False True g0 (RIReg dst) g0,
4087 ADD True False g0 (RIImm (ImmInt 0)) dst]
4088 return (Any I32 code__2)
4090 condIntReg cond x y = do
4091 BlockId lbl1 <- getBlockIdNat
4092 BlockId lbl2 <- getBlockIdNat
4093 CondCode _ cond cond_code <- condIntCode cond x y
4095 code__2 dst = cond_code `appOL` toOL [
4096 BI cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
4097 OR False g0 (RIImm (ImmInt 0)) dst,
4098 BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
4099 NEWBLOCK (BlockId lbl1),
4100 OR False g0 (RIImm (ImmInt 1)) dst,
4101 NEWBLOCK (BlockId lbl2)]
4102 return (Any I32 code__2)
4104 condFltReg cond x y = do
4105 BlockId lbl1 <- getBlockIdNat
4106 BlockId lbl2 <- getBlockIdNat
4107 CondCode _ cond cond_code <- condFltCode cond x y
4109 code__2 dst = cond_code `appOL` toOL [
4111 BF cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
4112 OR False g0 (RIImm (ImmInt 0)) dst,
4113 BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
4114 NEWBLOCK (BlockId lbl1),
4115 OR False g0 (RIImm (ImmInt 1)) dst,
4116 NEWBLOCK (BlockId lbl2)]
4117 return (Any I32 code__2)
4119 #endif /* sparc_TARGET_ARCH */
4121 #if powerpc_TARGET_ARCH
4122 condReg getCond = do
4123 lbl1 <- getBlockIdNat
4124 lbl2 <- getBlockIdNat
4125 CondCode _ cond cond_code <- getCond
4127 {- code dst = cond_code `appOL` toOL [
4136 code dst = cond_code
4140 RLWINM dst dst (bit + 1) 31 31
4143 negate_code | do_negate = unitOL (CRNOR bit bit bit)
4146 (bit, do_negate) = case cond of
4160 return (Any I32 code)
4162 condIntReg cond x y = condReg (condIntCode cond x y)
4163 condFltReg cond x y = condReg (condFltCode cond x y)
4164 #endif /* powerpc_TARGET_ARCH */
4167 -- -----------------------------------------------------------------------------
4168 -- 'trivial*Code': deal with trivial instructions
4170 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
4171 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
4172 -- Only look for constants on the right hand side, because that's
4173 -- where the generic optimizer will have put them.
4175 -- Similarly, for unary instructions, we don't have to worry about
4176 -- matching an StInt as the argument, because genericOpt will already
4177 -- have handled the constant-folding.
4181 -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
4182 ,IF_ARCH_i386 ((Operand -> Operand -> Instr)
4183 -> Maybe (Operand -> Operand -> Instr)
4184 ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr)
4185 -> Maybe (Operand -> Operand -> Instr)
4186 ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
4187 ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
4189 -> CmmExpr -> CmmExpr -- the two arguments
4192 #ifndef powerpc_TARGET_ARCH
4195 -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
4196 ,IF_ARCH_sparc((MachRep -> Reg -> Reg -> Reg -> Instr)
4197 ,IF_ARCH_i386 ((MachRep -> Reg -> Reg -> Reg -> Instr)
4198 ,IF_ARCH_x86_64 ((MachRep -> Operand -> Operand -> Instr)
4200 -> CmmExpr -> CmmExpr -- the two arguments
4206 -> IF_ARCH_alpha((RI -> Reg -> Instr)
4207 ,IF_ARCH_i386 ((Operand -> Instr)
4208 ,IF_ARCH_x86_64 ((Operand -> Instr)
4209 ,IF_ARCH_sparc((RI -> Reg -> Instr)
4210 ,IF_ARCH_powerpc((Reg -> Reg -> Instr)
4212 -> CmmExpr -- the one argument
4215 #ifndef powerpc_TARGET_ARCH
4218 -> IF_ARCH_alpha((Reg -> Reg -> Instr)
4219 ,IF_ARCH_i386 ((Reg -> Reg -> Instr)
4220 ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr)
4221 ,IF_ARCH_sparc((Reg -> Reg -> Instr)
4223 -> CmmExpr -- the one argument
4227 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4229 #if alpha_TARGET_ARCH
4231 trivialCode instr x (StInt y)
4233 = getRegister x `thenNat` \ register ->
4234 getNewRegNat IntRep `thenNat` \ tmp ->
4236 code = registerCode register tmp
4237 src1 = registerName register tmp
4238 src2 = ImmInt (fromInteger y)
4239 code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst)
4241 return (Any IntRep code__2)
4243 trivialCode instr x y
4244 = getRegister x `thenNat` \ register1 ->
4245 getRegister y `thenNat` \ register2 ->
4246 getNewRegNat IntRep `thenNat` \ tmp1 ->
4247 getNewRegNat IntRep `thenNat` \ tmp2 ->
4249 code1 = registerCode register1 tmp1 []
4250 src1 = registerName register1 tmp1
4251 code2 = registerCode register2 tmp2 []
4252 src2 = registerName register2 tmp2
4253 code__2 dst = asmSeqThen [code1, code2] .
4254 mkSeqInstr (instr src1 (RIReg src2) dst)
4256 return (Any IntRep code__2)
4259 trivialUCode instr x
4260 = getRegister x `thenNat` \ register ->
4261 getNewRegNat IntRep `thenNat` \ tmp ->
4263 code = registerCode register tmp
4264 src = registerName register tmp
4265 code__2 dst = code . mkSeqInstr (instr (RIReg src) dst)
4267 return (Any IntRep code__2)
4270 trivialFCode _ instr x y
4271 = getRegister x `thenNat` \ register1 ->
4272 getRegister y `thenNat` \ register2 ->
4273 getNewRegNat F64 `thenNat` \ tmp1 ->
4274 getNewRegNat F64 `thenNat` \ tmp2 ->
4276 code1 = registerCode register1 tmp1
4277 src1 = registerName register1 tmp1
4279 code2 = registerCode register2 tmp2
4280 src2 = registerName register2 tmp2
4282 code__2 dst = asmSeqThen [code1 [], code2 []] .
4283 mkSeqInstr (instr src1 src2 dst)
4285 return (Any F64 code__2)
4287 trivialUFCode _ instr x
4288 = getRegister x `thenNat` \ register ->
4289 getNewRegNat F64 `thenNat` \ tmp ->
4291 code = registerCode register tmp
4292 src = registerName register tmp
4293 code__2 dst = code . mkSeqInstr (instr src dst)
4295 return (Any F64 code__2)
4297 #endif /* alpha_TARGET_ARCH */
4299 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4301 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4304 The Rules of the Game are:
4306 * You cannot assume anything about the destination register dst;
4307 it may be anything, including a fixed reg.
4309 * You may compute an operand into a fixed reg, but you may not
4310 subsequently change the contents of that fixed reg. If you
4311 want to do so, first copy the value either to a temporary
4312 or into dst. You are free to modify dst even if it happens
4313 to be a fixed reg -- that's not your problem.
4315 * You cannot assume that a fixed reg will stay live over an
4316 arbitrary computation. The same applies to the dst reg.
4318 * Temporary regs obtained from getNewRegNat are distinct from
4319 each other and from all other regs, and stay live over
4320 arbitrary computations.
4322 --------------------
4324 SDM's version of The Rules:
4326 * If getRegister returns Any, that means it can generate correct
4327 code which places the result in any register, period. Even if that
4328 register happens to be read during the computation.
4330 Corollary #1: this means that if you are generating code for an
4331 operation with two arbitrary operands, you cannot assign the result
4332 of the first operand into the destination register before computing
4333 the second operand. The second operand might require the old value
4334 of the destination register.
4336 Corollary #2: A function might be able to generate more efficient
4337 code if it knows the destination register is a new temporary (and
4338 therefore not read by any of the sub-computations).
4340 * If getRegister returns Any, then the code it generates may modify only:
4341 (a) fresh temporaries
4342 (b) the destination register
4343 (c) known registers (eg. %ecx is used by shifts)
4344 In particular, it may *not* modify global registers, unless the global
4345 register happens to be the destination register.
4348 trivialCode rep instr (Just revinstr) (CmmLit lit_a) b
4349 | not (is64BitLit lit_a) = do
4350 b_code <- getAnyReg b
4353 = b_code dst `snocOL`
4354 revinstr (OpImm (litToImm lit_a)) (OpReg dst)
4356 return (Any rep code)
4358 trivialCode rep instr maybe_revinstr a b = genTrivialCode rep instr a b
4360 -- This is re-used for floating pt instructions too.
4361 genTrivialCode rep instr a b = do
4362 (b_op, b_code) <- getNonClobberedOperand b
4363 a_code <- getAnyReg a
4364 tmp <- getNewRegNat rep
4366 -- We want the value of b to stay alive across the computation of a.
4367 -- But, we want to calculate a straight into the destination register,
4368 -- because the instruction only has two operands (dst := dst `op` src).
4369 -- The troublesome case is when the result of b is in the same register
4370 -- as the destination reg. In this case, we have to save b in a
4371 -- new temporary across the computation of a.
4373 | dst `regClashesWithOp` b_op =
4375 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
4377 instr (OpReg tmp) (OpReg dst)
4381 instr b_op (OpReg dst)
4383 return (Any rep code)
4385 reg `regClashesWithOp` OpReg reg2 = reg == reg2
4386 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
4387 reg `regClashesWithOp` _ = False
4391 trivialUCode rep instr x = do
4392 x_code <- getAnyReg x
4398 return (Any rep code)
4402 #if i386_TARGET_ARCH
4404 trivialFCode pk instr x y = do
4405 (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
4406 (y_reg, y_code) <- getSomeReg y
4411 instr pk x_reg y_reg dst
4413 return (Any pk code)
4417 #if x86_64_TARGET_ARCH
4419 trivialFCode pk instr x y = genTrivialCode pk (instr pk) x y
4425 trivialUFCode rep instr x = do
4426 (x_reg, x_code) <- getSomeReg x
4432 return (Any rep code)
4434 #endif /* i386_TARGET_ARCH */
4436 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4438 #if sparc_TARGET_ARCH
4440 trivialCode pk instr x (CmmLit (CmmInt y d))
4443 (src1, code) <- getSomeReg x
4444 tmp <- getNewRegNat I32
4446 src2 = ImmInt (fromInteger y)
4447 code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
4448 return (Any I32 code__2)
4450 trivialCode pk instr x y = do
4451 (src1, code1) <- getSomeReg x
4452 (src2, code2) <- getSomeReg y
4453 tmp1 <- getNewRegNat I32
4454 tmp2 <- getNewRegNat I32
4456 code__2 dst = code1 `appOL` code2 `snocOL`
4457 instr src1 (RIReg src2) dst
4458 return (Any I32 code__2)
4461 trivialFCode pk instr x y = do
4462 (src1, code1) <- getSomeReg x
4463 (src2, code2) <- getSomeReg y
4464 tmp1 <- getNewRegNat (cmmExprRep x)
4465 tmp2 <- getNewRegNat (cmmExprRep y)
4466 tmp <- getNewRegNat F64
4468 promote x = FxTOy F32 F64 x tmp
4475 code1 `appOL` code2 `snocOL`
4476 instr pk src1 src2 dst
4477 else if pk1 == F32 then
4478 code1 `snocOL` promote src1 `appOL` code2 `snocOL`
4479 instr F64 tmp src2 dst
4481 code1 `appOL` code2 `snocOL` promote src2 `snocOL`
4482 instr F64 src1 tmp dst
4483 return (Any (if pk1 == pk2 then pk1 else F64) code__2)
4486 trivialUCode pk instr x = do
4487 (src, code) <- getSomeReg x
4488 tmp <- getNewRegNat pk
4490 code__2 dst = code `snocOL` instr (RIReg src) dst
4491 return (Any pk code__2)
4494 trivialUFCode pk instr x = do
4495 (src, code) <- getSomeReg x
4496 tmp <- getNewRegNat pk
4498 code__2 dst = code `snocOL` instr src dst
4499 return (Any pk code__2)
4501 #endif /* sparc_TARGET_ARCH */
4503 #if powerpc_TARGET_ARCH
4506 Wolfgang's PowerPC version of The Rules:
4508 A slightly modified version of The Rules to take advantage of the fact
4509 that PowerPC instructions work on all registers and don't implicitly
4510 clobber any fixed registers.
4512 * The only expression for which getRegister returns Fixed is (CmmReg reg).
4514 * If getRegister returns Any, then the code it generates may modify only:
4515 (a) fresh temporaries
4516 (b) the destination register
4517 It may *not* modify global registers, unless the global
4518 register happens to be the destination register.
4519 It may not clobber any other registers. In fact, only ccalls clobber any
4521 Also, it may not modify the counter register (used by genCCall).
4523 Corollary: If a getRegister for a subexpression returns Fixed, you need
4524 not move it to a fresh temporary before evaluating the next subexpression.
4525 The Fixed register won't be modified.
4526 Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
4528 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
4529 the value of the destination register.
4532 trivialCode rep signed instr x (CmmLit (CmmInt y _))
4533 | Just imm <- makeImmediate rep signed y
4535 (src1, code1) <- getSomeReg x
4536 let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
4537 return (Any rep code)
4539 trivialCode rep signed instr x y = do
4540 (src1, code1) <- getSomeReg x
4541 (src2, code2) <- getSomeReg y
4542 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
4543 return (Any rep code)
4545 trivialCodeNoImm :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4546 -> CmmExpr -> CmmExpr -> NatM Register
4547 trivialCodeNoImm rep instr x y = do
4548 (src1, code1) <- getSomeReg x
4549 (src2, code2) <- getSomeReg y
4550 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
4551 return (Any rep code)
4553 trivialUCode rep instr x = do
4554 (src, code) <- getSomeReg x
4555 let code' dst = code `snocOL` instr dst src
4556 return (Any rep code')
4558 -- There is no "remainder" instruction on the PPC, so we have to do
4560 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
4562 remainderCode :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4563 -> CmmExpr -> CmmExpr -> NatM Register
4564 remainderCode rep div x y = do
4565 (src1, code1) <- getSomeReg x
4566 (src2, code2) <- getSomeReg y
4567 let code dst = code1 `appOL` code2 `appOL` toOL [
4569 MULLW dst dst (RIReg src2),
4572 return (Any rep code)
4574 #endif /* powerpc_TARGET_ARCH */
4577 -- -----------------------------------------------------------------------------
4578 -- Coercing to/from integer/floating-point...
4580 -- When going to integer, we truncate (round towards 0).
4582 -- @coerce(Int2FP|FP2Int)@ are more complicated integer/float
4583 -- conversions. We have to store temporaries in memory to move
4584 -- between the integer and the floating point register sets.
4586 -- @coerceDbl2Flt@ and @coerceFlt2Dbl@ are done this way because we
4587 -- pretend, on sparc at least, that double and float regs are seperate
4588 -- kinds, so the value has to be computed into one kind before being
4589 -- explicitly "converted" to live in the other kind.
4591 coerceInt2FP :: MachRep -> MachRep -> CmmExpr -> NatM Register
4592 coerceFP2Int :: MachRep -> MachRep -> CmmExpr -> NatM Register
4594 #if sparc_TARGET_ARCH
4595 coerceDbl2Flt :: CmmExpr -> NatM Register
4596 coerceFlt2Dbl :: CmmExpr -> NatM Register
4599 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4601 #if alpha_TARGET_ARCH
4604 = getRegister x `thenNat` \ register ->
4605 getNewRegNat IntRep `thenNat` \ reg ->
4607 code = registerCode register reg
4608 src = registerName register reg
4610 code__2 dst = code . mkSeqInstrs [
4612 LD TF dst (spRel 0),
4615 return (Any F64 code__2)
4619 = getRegister x `thenNat` \ register ->
4620 getNewRegNat F64 `thenNat` \ tmp ->
4622 code = registerCode register tmp
4623 src = registerName register tmp
4625 code__2 dst = code . mkSeqInstrs [
4627 ST TF tmp (spRel 0),
4630 return (Any IntRep code__2)
4632 #endif /* alpha_TARGET_ARCH */
4634 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4636 #if i386_TARGET_ARCH
4638 coerceInt2FP from to x = do
4639 (x_reg, x_code) <- getSomeReg x
4641 opc = case to of F32 -> GITOF; F64 -> GITOD
4642 code dst = x_code `snocOL` opc x_reg dst
4643 -- ToDo: works for non-I32 reps?
4645 return (Any to code)
4649 coerceFP2Int from to x = do
4650 (x_reg, x_code) <- getSomeReg x
4652 opc = case from of F32 -> GFTOI; F64 -> GDTOI
4653 code dst = x_code `snocOL` opc x_reg dst
4654 -- ToDo: works for non-I32 reps?
4656 return (Any to code)
4658 #endif /* i386_TARGET_ARCH */
4660 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4662 #if x86_64_TARGET_ARCH
4664 coerceFP2Int from to x = do
4665 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
4667 opc = case from of F32 -> CVTTSS2SIQ; F64 -> CVTTSD2SIQ
4668 code dst = x_code `snocOL` opc x_op dst
4670 return (Any to code) -- works even if the destination rep is <I32
4672 coerceInt2FP from to x = do
4673 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
4675 opc = case to of F32 -> CVTSI2SS; F64 -> CVTSI2SD
4676 code dst = x_code `snocOL` opc x_op dst
4678 return (Any to code) -- works even if the destination rep is <I32
4680 coerceFP2FP :: MachRep -> CmmExpr -> NatM Register
4681 coerceFP2FP to x = do
4682 (x_reg, x_code) <- getSomeReg x
4684 opc = case to of F32 -> CVTSD2SS; F64 -> CVTSS2SD
4685 code dst = x_code `snocOL` opc x_reg dst
4687 return (Any to code)
4691 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4693 #if sparc_TARGET_ARCH
4695 coerceInt2FP pk1 pk2 x = do
4696 (src, code) <- getSomeReg x
4698 code__2 dst = code `appOL` toOL [
4699 ST pk1 src (spRel (-2)),
4700 LD pk1 (spRel (-2)) dst,
4701 FxTOy pk1 pk2 dst dst]
4702 return (Any pk2 code__2)
4705 coerceFP2Int pk fprep x = do
4706 (src, code) <- getSomeReg x
4707 reg <- getNewRegNat fprep
4708 tmp <- getNewRegNat pk
4710 code__2 dst = ASSERT(fprep == F64 || fprep == F32)
4712 FxTOy fprep pk src tmp,
4713 ST pk tmp (spRel (-2)),
4714 LD pk (spRel (-2)) dst]
4715 return (Any pk code__2)
4718 coerceDbl2Flt x = do
4719 (src, code) <- getSomeReg x
4720 return (Any F32 (\dst -> code `snocOL` FxTOy F64 F32 src dst))
4723 coerceFlt2Dbl x = do
4724 (src, code) <- getSomeReg x
4725 return (Any F64 (\dst -> code `snocOL` FxTOy F32 F64 src dst))
4727 #endif /* sparc_TARGET_ARCH */
4729 #if powerpc_TARGET_ARCH
4730 coerceInt2FP fromRep toRep x = do
4731 (src, code) <- getSomeReg x
4732 lbl <- getNewLabelNat
4733 itmp <- getNewRegNat I32
4734 ftmp <- getNewRegNat F64
4735 dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
4736 Amode addr addr_code <- getAmode dynRef
4738 code' dst = code `appOL` maybe_exts `appOL` toOL [
4741 CmmStaticLit (CmmInt 0x43300000 I32),
4742 CmmStaticLit (CmmInt 0x80000000 I32)],
4743 XORIS itmp src (ImmInt 0x8000),
4744 ST I32 itmp (spRel 3),
4745 LIS itmp (ImmInt 0x4330),
4746 ST I32 itmp (spRel 2),
4747 LD F64 ftmp (spRel 2)
4748 ] `appOL` addr_code `appOL` toOL [
4750 FSUB F64 dst ftmp dst
4751 ] `appOL` maybe_frsp dst
4753 maybe_exts = case fromRep of
4754 I8 -> unitOL $ EXTS I8 src src
4755 I16 -> unitOL $ EXTS I16 src src
4757 maybe_frsp dst = case toRep of
4758 F32 -> unitOL $ FRSP dst dst
4760 return (Any toRep code')
4762 coerceFP2Int fromRep toRep x = do
4763 -- the reps don't really matter: F*->F64 and I32->I* are no-ops
4764 (src, code) <- getSomeReg x
4765 tmp <- getNewRegNat F64
4767 code' dst = code `appOL` toOL [
4768 -- convert to int in FP reg
4770 -- store value (64bit) from FP to stack
4771 ST F64 tmp (spRel 2),
4772 -- read low word of value (high word is undefined)
4773 LD I32 dst (spRel 3)]
4774 return (Any toRep code')
4775 #endif /* powerpc_TARGET_ARCH */
4778 -- -----------------------------------------------------------------------------
4779 -- eXTRA_STK_ARGS_HERE
4781 -- We (allegedly) put the first six C-call arguments in registers;
4782 -- where do we start putting the rest of them?
4784 -- Moved from MachInstrs (SDM):
4786 #if alpha_TARGET_ARCH || sparc_TARGET_ARCH
4787 eXTRA_STK_ARGS_HERE :: Int
4789 = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???))