1 -----------------------------------------------------------------------------
3 -- Generating machine code (instruction selection)
5 -- (c) The University of Glasgow 1996-2004
7 -----------------------------------------------------------------------------
9 -- This is a big module, but, if you pay attention to
10 -- (a) the sectioning, (b) the type signatures, and
11 -- (c) the #if blah_TARGET_ARCH} things, the
12 -- structure should not be too overwhelming.
14 module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where
16 #include "HsVersions.h"
17 #include "nativeGen/NCG.h"
24 import PositionIndependentCode
25 import RegAllocInfo ( mkBranchInstr )
27 -- Our intermediate code:
28 import PprCmm ( pprExpr )
32 import ClosureInfo ( C_SRT(..) )
35 import StaticFlags ( opt_PIC )
36 import ForeignCall ( CCallConv(..) )
41 import FastTypes ( isFastTrue )
42 import Constants ( wORD_SIZE )
45 import Debug.Trace ( trace )
48 import Control.Monad ( mapAndUnzipM )
49 import Data.Maybe ( fromJust )
54 -- -----------------------------------------------------------------------------
55 -- Top-level of the instruction selector
57 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
58 -- They are really trees of insns to facilitate fast appending, where a
59 -- left-to-right traversal (pre-order?) yields the insns in the correct
62 type InstrBlock = OrdList Instr
64 cmmTopCodeGen :: RawCmmTop -> NatM [NatCmmTop]
65 cmmTopCodeGen (CmmProc info lab params blocks) = do
66 (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
67 picBaseMb <- getPicBaseMaybeNat
68 let proc = CmmProc info lab params (concat nat_blocks)
69 tops = proc : concat statics
71 Just picBase -> initializePicBase picBase tops
72 Nothing -> return tops
74 cmmTopCodeGen (CmmData sec dat) = do
75 return [CmmData sec dat] -- no translation, we just use CmmStatic
77 basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop])
78 basicBlockCodeGen (BasicBlock id stmts) = do
79 instrs <- stmtsToInstrs stmts
80 -- code generation may introduce new basic block boundaries, which
81 -- are indicated by the NEWBLOCK instruction. We must split up the
82 -- instruction stream into basic blocks again. Also, we extract
85 (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
87 mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
88 = ([], BasicBlock id instrs : blocks, statics)
89 mkBlocks (LDATA sec dat) (instrs,blocks,statics)
90 = (instrs, blocks, CmmData sec dat:statics)
91 mkBlocks instr (instrs,blocks,statics)
92 = (instr:instrs, blocks, statics)
94 return (BasicBlock id top : other_blocks, statics)
96 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
98 = do instrss <- mapM stmtToInstrs stmts
99 return (concatOL instrss)
101 stmtToInstrs :: CmmStmt -> NatM InstrBlock
102 stmtToInstrs stmt = case stmt of
103 CmmNop -> return nilOL
104 CmmComment s -> return (unitOL (COMMENT s))
107 | isFloatingRep kind -> assignReg_FltCode kind reg src
108 #if WORD_SIZE_IN_BITS==32
109 | kind == I64 -> assignReg_I64Code reg src
111 | otherwise -> assignReg_IntCode kind reg src
112 where kind = cmmRegRep reg
115 | isFloatingRep kind -> assignMem_FltCode kind addr src
116 #if WORD_SIZE_IN_BITS==32
117 | kind == I64 -> assignMem_I64Code addr src
119 | otherwise -> assignMem_IntCode kind addr src
120 where kind = cmmExprRep src
122 CmmCall target result_regs args _
123 -> genCCall target result_regs args
125 CmmBranch id -> genBranch id
126 CmmCondBranch arg id -> genCondJump id arg
127 CmmSwitch arg ids -> genSwitch arg ids
128 CmmJump arg params -> genJump arg
130 -- -----------------------------------------------------------------------------
131 -- General things for putting together code sequences
133 -- Expand CmmRegOff. ToDo: should we do it this way around, or convert
134 -- CmmExprs into CmmRegOff?
135 mangleIndexTree :: CmmExpr -> CmmExpr
136 mangleIndexTree (CmmRegOff reg off)
137 = CmmMachOp (MO_Add rep) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) rep)]
138 where rep = cmmRegRep reg
140 -- -----------------------------------------------------------------------------
141 -- Code gen for 64-bit arithmetic on 32-bit platforms
144 Simple support for generating 64-bit code (ie, 64 bit values and 64
145 bit assignments) on 32-bit platforms. Unlike the main code generator
146 we merely shoot for generating working code as simply as possible, and
147 pay little attention to code quality. Specifically, there is no
148 attempt to deal cleverly with the fixed-vs-floating register
149 distinction; all values are generated into (pairs of) floating
150 registers, even if this would mean some redundant reg-reg moves as a
151 result. Only one of the VRegUniques is returned, since it will be
152 of the VRegUniqueLo form, and the upper-half VReg can be determined
153 by applying getHiVRegFromLo to it.
156 data ChildCode64 -- a.k.a "Register64"
159 Reg -- the lower 32-bit temporary which contains the
160 -- result; use getHiVRegFromLo to find the other
161 -- VRegUnique. Rules of this simplified insn
162 -- selection game are therefore that the returned
163 -- Reg may be modified
165 #if WORD_SIZE_IN_BITS==32
166 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
167 assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
170 #ifndef x86_64_TARGET_ARCH
171 iselExpr64 :: CmmExpr -> NatM ChildCode64
174 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
178 assignMem_I64Code addrTree valueTree = do
179 Amode addr addr_code <- getAmode addrTree
180 ChildCode64 vcode rlo <- iselExpr64 valueTree
182 rhi = getHiVRegFromLo rlo
184 -- Little-endian store
185 mov_lo = MOV I32 (OpReg rlo) (OpAddr addr)
186 mov_hi = MOV I32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
188 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
191 assignReg_I64Code (CmmLocal (LocalReg u_dst pk _)) valueTree = do
192 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
194 r_dst_lo = mkVReg u_dst I32
195 r_dst_hi = getHiVRegFromLo r_dst_lo
196 r_src_hi = getHiVRegFromLo r_src_lo
197 mov_lo = MOV I32 (OpReg r_src_lo) (OpReg r_dst_lo)
198 mov_hi = MOV I32 (OpReg r_src_hi) (OpReg r_dst_hi)
201 vcode `snocOL` mov_lo `snocOL` mov_hi
204 assignReg_I64Code lvalue valueTree
205 = panic "assignReg_I64Code(i386): invalid lvalue"
209 iselExpr64 (CmmLit (CmmInt i _)) = do
210 (rlo,rhi) <- getNewRegPairNat I32
212 r = fromIntegral (fromIntegral i :: Word32)
213 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
215 MOV I32 (OpImm (ImmInteger r)) (OpReg rlo),
216 MOV I32 (OpImm (ImmInteger q)) (OpReg rhi)
219 return (ChildCode64 code rlo)
221 iselExpr64 (CmmLoad addrTree I64) = do
222 Amode addr addr_code <- getAmode addrTree
223 (rlo,rhi) <- getNewRegPairNat I32
225 mov_lo = MOV I32 (OpAddr addr) (OpReg rlo)
226 mov_hi = MOV I32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
229 ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
233 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64 _)))
234 = return (ChildCode64 nilOL (mkVReg vu I32))
236 -- we handle addition, but rather badly
237 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
238 ChildCode64 code1 r1lo <- iselExpr64 e1
239 (rlo,rhi) <- getNewRegPairNat I32
241 r = fromIntegral (fromIntegral i :: Word32)
242 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
243 r1hi = getHiVRegFromLo r1lo
245 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
246 ADD I32 (OpImm (ImmInteger r)) (OpReg rlo),
247 MOV I32 (OpReg r1hi) (OpReg rhi),
248 ADC I32 (OpImm (ImmInteger q)) (OpReg rhi) ]
250 return (ChildCode64 code rlo)
252 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
253 ChildCode64 code1 r1lo <- iselExpr64 e1
254 ChildCode64 code2 r2lo <- iselExpr64 e2
255 (rlo,rhi) <- getNewRegPairNat I32
257 r1hi = getHiVRegFromLo r1lo
258 r2hi = getHiVRegFromLo r2lo
261 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
262 ADD I32 (OpReg r2lo) (OpReg rlo),
263 MOV I32 (OpReg r1hi) (OpReg rhi),
264 ADC I32 (OpReg r2hi) (OpReg rhi) ]
266 return (ChildCode64 code rlo)
268 iselExpr64 (CmmMachOp (MO_U_Conv _ I64) [expr]) = do
270 r_dst_lo <- getNewRegNat I32
271 let r_dst_hi = getHiVRegFromLo r_dst_lo
274 ChildCode64 (code `snocOL`
275 MOV I32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
280 = pprPanic "iselExpr64(i386)" (ppr expr)
282 #endif /* i386_TARGET_ARCH */
284 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
286 #if sparc_TARGET_ARCH
288 assignMem_I64Code addrTree valueTree = do
289 Amode addr addr_code <- getAmode addrTree
290 ChildCode64 vcode rlo <- iselExpr64 valueTree
291 (src, code) <- getSomeReg addrTree
293 rhi = getHiVRegFromLo rlo
295 mov_hi = ST I32 rhi (AddrRegImm src (ImmInt 0))
296 mov_lo = ST I32 rlo (AddrRegImm src (ImmInt 4))
297 return (vcode `appOL` code `snocOL` mov_hi `snocOL` mov_lo)
299 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
300 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
302 r_dst_lo = mkVReg u_dst pk
303 r_dst_hi = getHiVRegFromLo r_dst_lo
304 r_src_hi = getHiVRegFromLo r_src_lo
305 mov_lo = mkMOV r_src_lo r_dst_lo
306 mov_hi = mkMOV r_src_hi r_dst_hi
307 mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
308 return (vcode `snocOL` mov_hi `snocOL` mov_lo)
309 assignReg_I64Code lvalue valueTree
310 = panic "assignReg_I64Code(sparc): invalid lvalue"
313 -- Don't delete this -- it's very handy for debugging.
315 -- | trace ("iselExpr64: " ++ showSDoc (ppr expr)) False
316 -- = panic "iselExpr64(???)"
318 iselExpr64 (CmmLoad addrTree I64) = do
319 Amode (AddrRegReg r1 r2) addr_code <- getAmode addrTree
320 rlo <- getNewRegNat I32
321 let rhi = getHiVRegFromLo rlo
322 mov_hi = LD I32 (AddrRegImm r1 (ImmInt 0)) rhi
323 mov_lo = LD I32 (AddrRegImm r1 (ImmInt 4)) rlo
325 ChildCode64 (addr_code `snocOL` mov_hi `snocOL` mov_lo)
329 iselExpr64 (CmmReg (CmmLocal (LocalReg uq I64))) = do
330 r_dst_lo <- getNewRegNat I32
331 let r_dst_hi = getHiVRegFromLo r_dst_lo
332 r_src_lo = mkVReg uq I32
333 r_src_hi = getHiVRegFromLo r_src_lo
334 mov_lo = mkMOV r_src_lo r_dst_lo
335 mov_hi = mkMOV r_src_hi r_dst_hi
336 mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
338 ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo
342 = pprPanic "iselExpr64(sparc)" (ppr expr)
344 #endif /* sparc_TARGET_ARCH */
346 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
348 #if powerpc_TARGET_ARCH
350 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
351 getI64Amodes addrTree = do
352 Amode hi_addr addr_code <- getAmode addrTree
353 case addrOffset hi_addr 4 of
354 Just lo_addr -> return (hi_addr, lo_addr, addr_code)
355 Nothing -> do (hi_ptr, code) <- getSomeReg addrTree
356 return (AddrRegImm hi_ptr (ImmInt 0),
357 AddrRegImm hi_ptr (ImmInt 4),
360 assignMem_I64Code addrTree valueTree = do
361 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
362 ChildCode64 vcode rlo <- iselExpr64 valueTree
364 rhi = getHiVRegFromLo rlo
367 mov_hi = ST I32 rhi hi_addr
368 mov_lo = ST I32 rlo lo_addr
370 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
372 assignReg_I64Code (CmmLocal (LocalReg u_dst pk _)) valueTree = do
373 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
375 r_dst_lo = mkVReg u_dst I32
376 r_dst_hi = getHiVRegFromLo r_dst_lo
377 r_src_hi = getHiVRegFromLo r_src_lo
378 mov_lo = MR r_dst_lo r_src_lo
379 mov_hi = MR r_dst_hi r_src_hi
382 vcode `snocOL` mov_lo `snocOL` mov_hi
385 assignReg_I64Code lvalue valueTree
386 = panic "assignReg_I64Code(powerpc): invalid lvalue"
389 -- Don't delete this -- it's very handy for debugging.
391 -- | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
392 -- = panic "iselExpr64(???)"
394 iselExpr64 (CmmLoad addrTree I64) = do
395 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
396 (rlo, rhi) <- getNewRegPairNat I32
397 let mov_hi = LD I32 rhi hi_addr
398 mov_lo = LD I32 rlo lo_addr
399 return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
402 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64 _)))
403 = return (ChildCode64 nilOL (mkVReg vu I32))
405 iselExpr64 (CmmLit (CmmInt i _)) = do
406 (rlo,rhi) <- getNewRegPairNat I32
408 half0 = fromIntegral (fromIntegral i :: Word16)
409 half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
410 half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
411 half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
414 LIS rlo (ImmInt half1),
415 OR rlo rlo (RIImm $ ImmInt half0),
416 LIS rhi (ImmInt half3),
417 OR rlo rlo (RIImm $ ImmInt half2)
420 return (ChildCode64 code rlo)
422 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
423 ChildCode64 code1 r1lo <- iselExpr64 e1
424 ChildCode64 code2 r2lo <- iselExpr64 e2
425 (rlo,rhi) <- getNewRegPairNat I32
427 r1hi = getHiVRegFromLo r1lo
428 r2hi = getHiVRegFromLo r2lo
431 toOL [ ADDC rlo r1lo r2lo,
434 return (ChildCode64 code rlo)
437 = pprPanic "iselExpr64(powerpc)" (ppr expr)
439 #endif /* powerpc_TARGET_ARCH */
442 -- -----------------------------------------------------------------------------
443 -- The 'Register' type
445 -- 'Register's passed up the tree. If the stix code forces the register
446 -- to live in a pre-decided machine register, it comes out as @Fixed@;
447 -- otherwise, it comes out as @Any@, and the parent can decide which
448 -- register to put it in.
451 = Fixed MachRep Reg InstrBlock
452 | Any MachRep (Reg -> InstrBlock)
454 swizzleRegisterRep :: Register -> MachRep -> Register
455 swizzleRegisterRep (Fixed _ reg code) rep = Fixed rep reg code
456 swizzleRegisterRep (Any _ codefn) rep = Any rep codefn
459 -- -----------------------------------------------------------------------------
460 -- Utils based on getRegister, below
462 -- The dual to getAnyReg: compute an expression into a register, but
463 -- we don't mind which one it is.
464 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
466 r <- getRegister expr
469 tmp <- getNewRegNat rep
470 return (tmp, code tmp)
474 -- -----------------------------------------------------------------------------
475 -- Grab the Reg for a CmmReg
477 getRegisterReg :: CmmReg -> Reg
479 getRegisterReg (CmmLocal (LocalReg u pk _))
482 getRegisterReg (CmmGlobal mid)
483 = case get_GlobalReg_reg_or_addr mid of
484 Left (RealReg rrno) -> RealReg rrno
485 _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
486 -- By this stage, the only MagicIds remaining should be the
487 -- ones which map to a real machine register on this
488 -- platform. Hence ...
491 -- -----------------------------------------------------------------------------
492 -- Generate code to get a subtree into a Register
494 -- Don't delete this -- it's very handy for debugging.
496 -- | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False
497 -- = panic "getRegister(???)"
499 getRegister :: CmmExpr -> NatM Register
501 #if !x86_64_TARGET_ARCH
502 -- on x86_64, we have %rip for PicBaseReg, but it's not a full-featured
503 -- register, it can only be used for rip-relative addressing.
504 getRegister (CmmReg (CmmGlobal PicBaseReg))
506 reg <- getPicBaseNat wordRep
507 return (Fixed wordRep reg nilOL)
510 getRegister (CmmReg reg)
511 = return (Fixed (cmmRegRep reg) (getRegisterReg reg) nilOL)
513 getRegister tree@(CmmRegOff _ _)
514 = getRegister (mangleIndexTree tree)
517 #if WORD_SIZE_IN_BITS==32
518 -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
519 -- TO_W_(x), TO_W_(x >> 32)
521 getRegister (CmmMachOp (MO_U_Conv I64 I32)
522 [CmmMachOp (MO_U_Shr I64) [x,CmmLit (CmmInt 32 _)]]) = do
523 ChildCode64 code rlo <- iselExpr64 x
524 return $ Fixed I32 (getHiVRegFromLo rlo) code
526 getRegister (CmmMachOp (MO_S_Conv I64 I32)
527 [CmmMachOp (MO_U_Shr I64) [x,CmmLit (CmmInt 32 _)]]) = do
528 ChildCode64 code rlo <- iselExpr64 x
529 return $ Fixed I32 (getHiVRegFromLo rlo) code
531 getRegister (CmmMachOp (MO_U_Conv I64 I32) [x]) = do
532 ChildCode64 code rlo <- iselExpr64 x
533 return $ Fixed I32 rlo code
535 getRegister (CmmMachOp (MO_S_Conv I64 I32) [x]) = do
536 ChildCode64 code rlo <- iselExpr64 x
537 return $ Fixed I32 rlo code
541 -- end of machine-"independent" bit; here we go on the rest...
543 #if alpha_TARGET_ARCH
545 getRegister (StDouble d)
546 = getBlockIdNat `thenNat` \ lbl ->
547 getNewRegNat PtrRep `thenNat` \ tmp ->
548 let code dst = mkSeqInstrs [
549 LDATA RoDataSegment lbl [
550 DATA TF [ImmLab (rational d)]
552 LDA tmp (AddrImm (ImmCLbl lbl)),
553 LD TF dst (AddrReg tmp)]
555 return (Any F64 code)
557 getRegister (StPrim primop [x]) -- unary PrimOps
559 IntNegOp -> trivialUCode (NEG Q False) x
561 NotOp -> trivialUCode NOT x
563 FloatNegOp -> trivialUFCode FloatRep (FNEG TF) x
564 DoubleNegOp -> trivialUFCode F64 (FNEG TF) x
566 OrdOp -> coerceIntCode IntRep x
569 Float2IntOp -> coerceFP2Int x
570 Int2FloatOp -> coerceInt2FP pr x
571 Double2IntOp -> coerceFP2Int x
572 Int2DoubleOp -> coerceInt2FP pr x
574 Double2FloatOp -> coerceFltCode x
575 Float2DoubleOp -> coerceFltCode x
577 other_op -> getRegister (StCall fn CCallConv F64 [x])
579 fn = case other_op of
580 FloatExpOp -> FSLIT("exp")
581 FloatLogOp -> FSLIT("log")
582 FloatSqrtOp -> FSLIT("sqrt")
583 FloatSinOp -> FSLIT("sin")
584 FloatCosOp -> FSLIT("cos")
585 FloatTanOp -> FSLIT("tan")
586 FloatAsinOp -> FSLIT("asin")
587 FloatAcosOp -> FSLIT("acos")
588 FloatAtanOp -> FSLIT("atan")
589 FloatSinhOp -> FSLIT("sinh")
590 FloatCoshOp -> FSLIT("cosh")
591 FloatTanhOp -> FSLIT("tanh")
592 DoubleExpOp -> FSLIT("exp")
593 DoubleLogOp -> FSLIT("log")
594 DoubleSqrtOp -> FSLIT("sqrt")
595 DoubleSinOp -> FSLIT("sin")
596 DoubleCosOp -> FSLIT("cos")
597 DoubleTanOp -> FSLIT("tan")
598 DoubleAsinOp -> FSLIT("asin")
599 DoubleAcosOp -> FSLIT("acos")
600 DoubleAtanOp -> FSLIT("atan")
601 DoubleSinhOp -> FSLIT("sinh")
602 DoubleCoshOp -> FSLIT("cosh")
603 DoubleTanhOp -> FSLIT("tanh")
605 pr = panic "MachCode.getRegister: no primrep needed for Alpha"
607 getRegister (StPrim primop [x, y]) -- dyadic PrimOps
609 CharGtOp -> trivialCode (CMP LTT) y x
610 CharGeOp -> trivialCode (CMP LE) y x
611 CharEqOp -> trivialCode (CMP EQQ) x y
612 CharNeOp -> int_NE_code x y
613 CharLtOp -> trivialCode (CMP LTT) x y
614 CharLeOp -> trivialCode (CMP LE) x y
616 IntGtOp -> trivialCode (CMP LTT) y x
617 IntGeOp -> trivialCode (CMP LE) y x
618 IntEqOp -> trivialCode (CMP EQQ) x y
619 IntNeOp -> int_NE_code x y
620 IntLtOp -> trivialCode (CMP LTT) x y
621 IntLeOp -> trivialCode (CMP LE) x y
623 WordGtOp -> trivialCode (CMP ULT) y x
624 WordGeOp -> trivialCode (CMP ULE) x y
625 WordEqOp -> trivialCode (CMP EQQ) x y
626 WordNeOp -> int_NE_code x y
627 WordLtOp -> trivialCode (CMP ULT) x y
628 WordLeOp -> trivialCode (CMP ULE) x y
630 AddrGtOp -> trivialCode (CMP ULT) y x
631 AddrGeOp -> trivialCode (CMP ULE) y x
632 AddrEqOp -> trivialCode (CMP EQQ) x y
633 AddrNeOp -> int_NE_code x y
634 AddrLtOp -> trivialCode (CMP ULT) x y
635 AddrLeOp -> trivialCode (CMP ULE) x y
637 FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
638 FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
639 FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
640 FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
641 FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
642 FloatLeOp -> cmpF_code (FCMP TF LE) NE x y
644 DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
645 DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
646 DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
647 DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
648 DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
649 DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y
651 IntAddOp -> trivialCode (ADD Q False) x y
652 IntSubOp -> trivialCode (SUB Q False) x y
653 IntMulOp -> trivialCode (MUL Q False) x y
654 IntQuotOp -> trivialCode (DIV Q False) x y
655 IntRemOp -> trivialCode (REM Q False) x y
657 WordAddOp -> trivialCode (ADD Q False) x y
658 WordSubOp -> trivialCode (SUB Q False) x y
659 WordMulOp -> trivialCode (MUL Q False) x y
660 WordQuotOp -> trivialCode (DIV Q True) x y
661 WordRemOp -> trivialCode (REM Q True) x y
663 FloatAddOp -> trivialFCode FloatRep (FADD TF) x y
664 FloatSubOp -> trivialFCode FloatRep (FSUB TF) x y
665 FloatMulOp -> trivialFCode FloatRep (FMUL TF) x y
666 FloatDivOp -> trivialFCode FloatRep (FDIV TF) x y
668 DoubleAddOp -> trivialFCode F64 (FADD TF) x y
669 DoubleSubOp -> trivialFCode F64 (FSUB TF) x y
670 DoubleMulOp -> trivialFCode F64 (FMUL TF) x y
671 DoubleDivOp -> trivialFCode F64 (FDIV TF) x y
673 AddrAddOp -> trivialCode (ADD Q False) x y
674 AddrSubOp -> trivialCode (SUB Q False) x y
675 AddrRemOp -> trivialCode (REM Q True) x y
677 AndOp -> trivialCode AND x y
678 OrOp -> trivialCode OR x y
679 XorOp -> trivialCode XOR x y
680 SllOp -> trivialCode SLL x y
681 SrlOp -> trivialCode SRL x y
683 ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
684 ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
685 ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"
687 FloatPowerOp -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
688 DoublePowerOp -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
690 {- ------------------------------------------------------------
691 Some bizarre special code for getting condition codes into
692 registers. Integer non-equality is a test for equality
693 followed by an XOR with 1. (Integer comparisons always set
694 the result register to 0 or 1.) Floating point comparisons of
695 any kind leave the result in a floating point register, so we
696 need to wrangle an integer register out of things.
698 int_NE_code :: StixTree -> StixTree -> NatM Register
701 = trivialCode (CMP EQQ) x y `thenNat` \ register ->
702 getNewRegNat IntRep `thenNat` \ tmp ->
704 code = registerCode register tmp
705 src = registerName register tmp
706 code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
708 return (Any IntRep code__2)
710 {- ------------------------------------------------------------
711 Comments for int_NE_code also apply to cmpF_code
714 :: (Reg -> Reg -> Reg -> Instr)
716 -> StixTree -> StixTree
719 cmpF_code instr cond x y
720 = trivialFCode pr instr x y `thenNat` \ register ->
721 getNewRegNat F64 `thenNat` \ tmp ->
722 getBlockIdNat `thenNat` \ lbl ->
724 code = registerCode register tmp
725 result = registerName register tmp
727 code__2 dst = code . mkSeqInstrs [
728 OR zeroh (RIImm (ImmInt 1)) dst,
729 BF cond result (ImmCLbl lbl),
730 OR zeroh (RIReg zeroh) dst,
733 return (Any IntRep code__2)
735 pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
736 ------------------------------------------------------------
738 getRegister (CmmLoad pk mem)
739 = getAmode mem `thenNat` \ amode ->
741 code = amodeCode amode
742 src = amodeAddr amode
743 size = primRepToSize pk
744 code__2 dst = code . mkSeqInstr (LD size dst src)
746 return (Any pk code__2)
748 getRegister (StInt i)
751 code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
753 return (Any IntRep code)
756 code dst = mkSeqInstr (LDI Q dst src)
758 return (Any IntRep code)
760 src = ImmInt (fromInteger i)
765 code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
767 return (Any PtrRep code)
770 imm__2 = case imm of Just x -> x
772 #endif /* alpha_TARGET_ARCH */
774 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
778 getRegister (CmmLit (CmmFloat f F32)) = do
779 lbl <- getNewLabelNat
780 dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
781 Amode addr addr_code <- getAmode dynRef
785 CmmStaticLit (CmmFloat f F32)]
786 `consOL` (addr_code `snocOL`
789 return (Any F32 code)
792 getRegister (CmmLit (CmmFloat d F64))
794 = let code dst = unitOL (GLDZ dst)
795 in return (Any F64 code)
798 = let code dst = unitOL (GLD1 dst)
799 in return (Any F64 code)
802 lbl <- getNewLabelNat
803 dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
804 Amode addr addr_code <- getAmode dynRef
808 CmmStaticLit (CmmFloat d F64)]
809 `consOL` (addr_code `snocOL`
812 return (Any F64 code)
814 #endif /* i386_TARGET_ARCH */
816 #if x86_64_TARGET_ARCH
818 getRegister (CmmLit (CmmFloat 0.0 rep)) = do
819 let code dst = unitOL (XOR rep (OpReg dst) (OpReg dst))
820 -- I don't know why there are xorpd, xorps, and pxor instructions.
821 -- They all appear to do the same thing --SDM
822 return (Any rep code)
824 getRegister (CmmLit (CmmFloat f rep)) = do
825 lbl <- getNewLabelNat
826 let code dst = toOL [
829 CmmStaticLit (CmmFloat f rep)],
830 MOV rep (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
833 return (Any rep code)
835 #endif /* x86_64_TARGET_ARCH */
837 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
839 -- catch simple cases of zero- or sign-extended load
840 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad addr _]) = do
841 code <- intLoadCode (MOVZxL I8) addr
842 return (Any I32 code)
844 getRegister (CmmMachOp (MO_S_Conv I8 I32) [CmmLoad addr _]) = do
845 code <- intLoadCode (MOVSxL I8) addr
846 return (Any I32 code)
848 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad addr _]) = do
849 code <- intLoadCode (MOVZxL I16) addr
850 return (Any I32 code)
852 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad addr _]) = do
853 code <- intLoadCode (MOVSxL I16) addr
854 return (Any I32 code)
858 #if x86_64_TARGET_ARCH
860 -- catch simple cases of zero- or sign-extended load
861 getRegister (CmmMachOp (MO_U_Conv I8 I64) [CmmLoad addr _]) = do
862 code <- intLoadCode (MOVZxL I8) addr
863 return (Any I64 code)
865 getRegister (CmmMachOp (MO_S_Conv I8 I64) [CmmLoad addr _]) = do
866 code <- intLoadCode (MOVSxL I8) addr
867 return (Any I64 code)
869 getRegister (CmmMachOp (MO_U_Conv I16 I64) [CmmLoad addr _]) = do
870 code <- intLoadCode (MOVZxL I16) addr
871 return (Any I64 code)
873 getRegister (CmmMachOp (MO_S_Conv I16 I64) [CmmLoad addr _]) = do
874 code <- intLoadCode (MOVSxL I16) addr
875 return (Any I64 code)
877 getRegister (CmmMachOp (MO_U_Conv I32 I64) [CmmLoad addr _]) = do
878 code <- intLoadCode (MOV I32) addr -- 32-bit loads zero-extend
879 return (Any I64 code)
881 getRegister (CmmMachOp (MO_S_Conv I32 I64) [CmmLoad addr _]) = do
882 code <- intLoadCode (MOVSxL I32) addr
883 return (Any I64 code)
887 #if x86_64_TARGET_ARCH
888 getRegister (CmmMachOp (MO_Add I64) [CmmReg (CmmGlobal PicBaseReg),
889 CmmLit displacement])
890 = return $ Any I64 (\dst -> unitOL $
891 LEA I64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
894 #if x86_64_TARGET_ARCH
895 getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do
896 x_code <- getAnyReg x
897 lbl <- getNewLabelNat
899 code dst = x_code dst `appOL` toOL [
900 -- This is how gcc does it, so it can't be that bad:
901 LDATA ReadOnlyData16 [
904 CmmStaticLit (CmmInt 0x80000000 I32),
905 CmmStaticLit (CmmInt 0 I32),
906 CmmStaticLit (CmmInt 0 I32),
907 CmmStaticLit (CmmInt 0 I32)
909 XOR F32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
910 -- xorps, so we need the 128-bit constant
911 -- ToDo: rip-relative
914 return (Any F32 code)
916 getRegister (CmmMachOp (MO_S_Neg F64) [x]) = do
917 x_code <- getAnyReg x
918 lbl <- getNewLabelNat
920 -- This is how gcc does it, so it can't be that bad:
921 code dst = x_code dst `appOL` toOL [
922 LDATA ReadOnlyData16 [
925 CmmStaticLit (CmmInt 0x8000000000000000 I64),
926 CmmStaticLit (CmmInt 0 I64)
928 -- gcc puts an unpck here. Wonder if we need it.
929 XOR F64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
930 -- xorpd, so we need the 128-bit constant
933 return (Any F64 code)
936 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
938 getRegister (CmmMachOp mop [x]) -- unary MachOps
941 MO_S_Neg F32 -> trivialUFCode F32 (GNEG F32) x
942 MO_S_Neg F64 -> trivialUFCode F64 (GNEG F64) x
945 MO_S_Neg rep -> trivialUCode rep (NEGI rep) x
946 MO_Not rep -> trivialUCode rep (NOT rep) x
949 MO_U_Conv I32 I8 -> toI8Reg I32 x
950 MO_S_Conv I32 I8 -> toI8Reg I32 x
951 MO_U_Conv I16 I8 -> toI8Reg I16 x
952 MO_S_Conv I16 I8 -> toI8Reg I16 x
953 MO_U_Conv I32 I16 -> toI16Reg I32 x
954 MO_S_Conv I32 I16 -> toI16Reg I32 x
955 #if x86_64_TARGET_ARCH
956 MO_U_Conv I64 I32 -> conversionNop I64 x
957 MO_S_Conv I64 I32 -> conversionNop I64 x
958 MO_U_Conv I64 I16 -> toI16Reg I64 x
959 MO_S_Conv I64 I16 -> toI16Reg I64 x
960 MO_U_Conv I64 I8 -> toI8Reg I64 x
961 MO_S_Conv I64 I8 -> toI8Reg I64 x
964 MO_U_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
965 MO_S_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
968 MO_U_Conv I8 I32 -> integerExtend I8 I32 MOVZxL x
969 MO_U_Conv I16 I32 -> integerExtend I16 I32 MOVZxL x
970 MO_U_Conv I8 I16 -> integerExtend I8 I16 MOVZxL x
972 MO_S_Conv I8 I32 -> integerExtend I8 I32 MOVSxL x
973 MO_S_Conv I16 I32 -> integerExtend I16 I32 MOVSxL x
974 MO_S_Conv I8 I16 -> integerExtend I8 I16 MOVSxL x
976 #if x86_64_TARGET_ARCH
977 MO_U_Conv I8 I64 -> integerExtend I8 I64 MOVZxL x
978 MO_U_Conv I16 I64 -> integerExtend I16 I64 MOVZxL x
979 MO_U_Conv I32 I64 -> integerExtend I32 I64 MOVZxL x
980 MO_S_Conv I8 I64 -> integerExtend I8 I64 MOVSxL x
981 MO_S_Conv I16 I64 -> integerExtend I16 I64 MOVSxL x
982 MO_S_Conv I32 I64 -> integerExtend I32 I64 MOVSxL x
983 -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
984 -- However, we don't want the register allocator to throw it
985 -- away as an unnecessary reg-to-reg move, so we keep it in
986 -- the form of a movzl and print it as a movl later.
990 MO_S_Conv F32 F64 -> conversionNop F64 x
991 MO_S_Conv F64 F32 -> conversionNop F32 x
993 MO_S_Conv F32 F64 -> coerceFP2FP F64 x
994 MO_S_Conv F64 F32 -> coerceFP2FP F32 x
998 | isFloatingRep from -> coerceFP2Int from to x
999 | isFloatingRep to -> coerceInt2FP from to x
1001 other -> pprPanic "getRegister" (pprMachOp mop)
1003 -- signed or unsigned extension.
1004 integerExtend from to instr expr = do
1005 (reg,e_code) <- if from == I8 then getByteReg expr
1006 else getSomeReg expr
1010 instr from (OpReg reg) (OpReg dst)
1011 return (Any to code)
1013 toI8Reg new_rep expr
1014 = do codefn <- getAnyReg expr
1015 return (Any new_rep codefn)
1016 -- HACK: use getAnyReg to get a byte-addressable register.
1017 -- If the source was a Fixed register, this will add the
1018 -- mov instruction to put it into the desired destination.
1019 -- We're assuming that the destination won't be a fixed
1020 -- non-byte-addressable register; it won't be, because all
1021 -- fixed registers are word-sized.
1023 toI16Reg = toI8Reg -- for now
1025 conversionNop new_rep expr
1026 = do e_code <- getRegister expr
1027 return (swizzleRegisterRep e_code new_rep)
1030 getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
1031 = ASSERT2(cmmExprRep x /= I8, pprExpr e)
1033 MO_Eq F32 -> condFltReg EQQ x y
1034 MO_Ne F32 -> condFltReg NE x y
1035 MO_S_Gt F32 -> condFltReg GTT x y
1036 MO_S_Ge F32 -> condFltReg GE x y
1037 MO_S_Lt F32 -> condFltReg LTT x y
1038 MO_S_Le F32 -> condFltReg LE x y
1040 MO_Eq F64 -> condFltReg EQQ x y
1041 MO_Ne F64 -> condFltReg NE x y
1042 MO_S_Gt F64 -> condFltReg GTT x y
1043 MO_S_Ge F64 -> condFltReg GE x y
1044 MO_S_Lt F64 -> condFltReg LTT x y
1045 MO_S_Le F64 -> condFltReg LE x y
1047 MO_Eq rep -> condIntReg EQQ x y
1048 MO_Ne rep -> condIntReg NE x y
1050 MO_S_Gt rep -> condIntReg GTT x y
1051 MO_S_Ge rep -> condIntReg GE x y
1052 MO_S_Lt rep -> condIntReg LTT x y
1053 MO_S_Le rep -> condIntReg LE x y
1055 MO_U_Gt rep -> condIntReg GU x y
1056 MO_U_Ge rep -> condIntReg GEU x y
1057 MO_U_Lt rep -> condIntReg LU x y
1058 MO_U_Le rep -> condIntReg LEU x y
1060 #if i386_TARGET_ARCH
1061 MO_Add F32 -> trivialFCode F32 GADD x y
1062 MO_Sub F32 -> trivialFCode F32 GSUB x y
1064 MO_Add F64 -> trivialFCode F64 GADD x y
1065 MO_Sub F64 -> trivialFCode F64 GSUB x y
1067 MO_S_Quot F32 -> trivialFCode F32 GDIV x y
1068 MO_S_Quot F64 -> trivialFCode F64 GDIV x y
1071 #if x86_64_TARGET_ARCH
1072 MO_Add F32 -> trivialFCode F32 ADD x y
1073 MO_Sub F32 -> trivialFCode F32 SUB x y
1075 MO_Add F64 -> trivialFCode F64 ADD x y
1076 MO_Sub F64 -> trivialFCode F64 SUB x y
1078 MO_S_Quot F32 -> trivialFCode F32 FDIV x y
1079 MO_S_Quot F64 -> trivialFCode F64 FDIV x y
1082 MO_Add rep -> add_code rep x y
1083 MO_Sub rep -> sub_code rep x y
1085 MO_S_Quot rep -> div_code rep True True x y
1086 MO_S_Rem rep -> div_code rep True False x y
1087 MO_U_Quot rep -> div_code rep False True x y
1088 MO_U_Rem rep -> div_code rep False False x y
1090 #if i386_TARGET_ARCH
1091 MO_Mul F32 -> trivialFCode F32 GMUL x y
1092 MO_Mul F64 -> trivialFCode F64 GMUL x y
1095 #if x86_64_TARGET_ARCH
1096 MO_Mul F32 -> trivialFCode F32 MUL x y
1097 MO_Mul F64 -> trivialFCode F64 MUL x y
1100 MO_Mul rep -> let op = IMUL rep in
1101 trivialCode rep op (Just op) x y
1103 MO_S_MulMayOflo rep -> imulMayOflo rep x y
1105 MO_And rep -> let op = AND rep in
1106 trivialCode rep op (Just op) x y
1107 MO_Or rep -> let op = OR rep in
1108 trivialCode rep op (Just op) x y
1109 MO_Xor rep -> let op = XOR rep in
1110 trivialCode rep op (Just op) x y
1112 {- Shift ops on x86s have constraints on their source, it
1113 either has to be Imm, CL or 1
1114 => trivialCode is not restrictive enough (sigh.)
1116 MO_Shl rep -> shift_code rep (SHL rep) x y {-False-}
1117 MO_U_Shr rep -> shift_code rep (SHR rep) x y {-False-}
1118 MO_S_Shr rep -> shift_code rep (SAR rep) x y {-False-}
1120 other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
1122 --------------------
1123 imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1124 imulMayOflo rep a b = do
1125 (a_reg, a_code) <- getNonClobberedReg a
1126 b_code <- getAnyReg b
1128 shift_amt = case rep of
1131 _ -> panic "shift_amt"
1133 code = a_code `appOL` b_code eax `appOL`
1135 IMUL2 rep (OpReg a_reg), -- result in %edx:%eax
1136 SAR rep (OpImm (ImmInt shift_amt)) (OpReg eax),
1137 -- sign extend lower part
1138 SUB rep (OpReg edx) (OpReg eax)
1139 -- compare against upper
1140 -- eax==0 if high part == sign extended low part
1143 return (Fixed rep eax code)
1145 --------------------
1146 shift_code :: MachRep
1147 -> (Operand -> Operand -> Instr)
1152 {- Case1: shift length as immediate -}
1153 shift_code rep instr x y@(CmmLit lit) = do
1154 x_code <- getAnyReg x
1157 = x_code dst `snocOL`
1158 instr (OpImm (litToImm lit)) (OpReg dst)
1160 return (Any rep code)
1162 {- Case2: shift length is complex (non-immediate)
1163 * y must go in %ecx.
1164 * we cannot do y first *and* put its result in %ecx, because
1165 %ecx might be clobbered by x.
1166 * if we do y second, then x cannot be
1167 in a clobbered reg. Also, we cannot clobber x's reg
1168 with the instruction itself.
1170 - do y first, put its result in a fresh tmp, then copy it to %ecx later
1171 - do y second and put its result into %ecx. x gets placed in a fresh
1172 tmp. This is likely to be better, becuase the reg alloc can
1173 eliminate this reg->reg move here (it won't eliminate the other one,
1174 because the move is into the fixed %ecx).
1176 shift_code rep instr x y{-amount-} = do
1177 x_code <- getAnyReg x
1178 tmp <- getNewRegNat rep
1179 y_code <- getAnyReg y
1181 code = x_code tmp `appOL`
1183 instr (OpReg ecx) (OpReg tmp)
1185 return (Fixed rep tmp code)
1187 --------------------
1188 add_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1189 add_code rep x (CmmLit (CmmInt y _))
1190 | not (is64BitInteger y) = add_int rep x y
1191 add_code rep x y = trivialCode rep (ADD rep) (Just (ADD rep)) x y
1193 --------------------
1194 sub_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1195 sub_code rep x (CmmLit (CmmInt y _))
1196 | not (is64BitInteger (-y)) = add_int rep x (-y)
1197 sub_code rep x y = trivialCode rep (SUB rep) Nothing x y
1199 -- our three-operand add instruction:
1200 add_int rep x y = do
1201 (x_reg, x_code) <- getSomeReg x
1203 imm = ImmInt (fromInteger y)
1207 (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
1210 return (Any rep code)
1212 ----------------------
1213 div_code rep signed quotient x y = do
1214 (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
1215 x_code <- getAnyReg x
1217 widen | signed = CLTD rep
1218 | otherwise = XOR rep (OpReg edx) (OpReg edx)
1220 instr | signed = IDIV
1223 code = y_code `appOL`
1225 toOL [widen, instr rep y_op]
1227 result | quotient = eax
1231 return (Fixed rep result code)
1234 getRegister (CmmLoad mem pk)
1237 Amode src mem_code <- getAmode mem
1239 code dst = mem_code `snocOL`
1240 IF_ARCH_i386(GLD pk src dst,
1241 MOV pk (OpAddr src) (OpReg dst))
1243 return (Any pk code)
1245 #if i386_TARGET_ARCH
1246 getRegister (CmmLoad mem pk)
1249 code <- intLoadCode (instr pk) mem
1250 return (Any pk code)
1252 instr I8 = MOVZxL pk
1255 -- we always zero-extend 8-bit loads, if we
1256 -- can't think of anything better. This is because
1257 -- we can't guarantee access to an 8-bit variant of every register
1258 -- (esi and edi don't have 8-bit variants), so to make things
1259 -- simpler we do our 8-bit arithmetic with full 32-bit registers.
1262 #if x86_64_TARGET_ARCH
1263 -- Simpler memory load code on x86_64
1264 getRegister (CmmLoad mem pk)
1266 code <- intLoadCode (MOV pk) mem
1267 return (Any pk code)
1270 getRegister (CmmLit (CmmInt 0 rep))
1272 -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
1273 adj_rep = case rep of I64 -> I32; _ -> rep
1274 rep1 = IF_ARCH_i386( rep, adj_rep )
1276 = unitOL (XOR rep1 (OpReg dst) (OpReg dst))
1278 return (Any rep code)
1280 #if x86_64_TARGET_ARCH
1281 -- optimisation for loading small literals on x86_64: take advantage
1282 -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
1283 -- instruction forms are shorter.
1284 getRegister (CmmLit lit)
1285 | I64 <- cmmLitRep lit, not (isBigLit lit)
1288 code dst = unitOL (MOV I32 (OpImm imm) (OpReg dst))
1290 return (Any I64 code)
1292 isBigLit (CmmInt i I64) = i < 0 || i > 0xffffffff
1294 -- note1: not the same as is64BitLit, because that checks for
1295 -- signed literals that fit in 32 bits, but we want unsigned
1297 -- note2: all labels are small, because we're assuming the
1298 -- small memory model (see gcc docs, -mcmodel=small).
1301 getRegister (CmmLit lit)
1305 code dst = unitOL (MOV rep (OpImm imm) (OpReg dst))
1307 return (Any rep code)
1309 getRegister other = pprPanic "getRegister(x86)" (ppr other)
1312 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
1313 -> NatM (Reg -> InstrBlock)
1314 intLoadCode instr mem = do
1315 Amode src mem_code <- getAmode mem
1316 return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
1318 -- Compute an expression into *any* register, adding the appropriate
1319 -- move instruction if necessary.
1320 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
1322 r <- getRegister expr
1325 anyReg :: Register -> NatM (Reg -> InstrBlock)
1326 anyReg (Any _ code) = return code
1327 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
1329 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
1330 -- Fixed registers might not be byte-addressable, so we make sure we've
1331 -- got a temporary, inserting an extra reg copy if necessary.
1332 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
1333 #if x86_64_TARGET_ARCH
1334 getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
1336 getByteReg expr = do
1337 r <- getRegister expr
1340 tmp <- getNewRegNat rep
1341 return (tmp, code tmp)
1343 | isVirtualReg reg -> return (reg,code)
1345 tmp <- getNewRegNat rep
1346 return (tmp, code `snocOL` reg2reg rep reg tmp)
1347 -- ToDo: could optimise slightly by checking for byte-addressable
1348 -- real registers, but that will happen very rarely if at all.
1351 -- Another variant: this time we want the result in a register that cannot
1352 -- be modified by code to evaluate an arbitrary expression.
1353 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1354 getNonClobberedReg expr = do
1355 r <- getRegister expr
1358 tmp <- getNewRegNat rep
1359 return (tmp, code tmp)
1361 -- only free regs can be clobbered
1362 | RealReg rr <- reg, isFastTrue (freeReg rr) -> do
1363 tmp <- getNewRegNat rep
1364 return (tmp, code `snocOL` reg2reg rep reg tmp)
1368 reg2reg :: MachRep -> Reg -> Reg -> Instr
1370 #if i386_TARGET_ARCH
1371 | isFloatingRep rep = GMOV src dst
1373 | otherwise = MOV rep (OpReg src) (OpReg dst)
1375 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1377 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1379 #if sparc_TARGET_ARCH
1381 getRegister (CmmLit (CmmFloat f F32)) = do
1382 lbl <- getNewLabelNat
1383 let code dst = toOL [
1386 CmmStaticLit (CmmFloat f F32)],
1387 SETHI (HI (ImmCLbl lbl)) dst,
1388 LD F32 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1389 return (Any F32 code)
1391 getRegister (CmmLit (CmmFloat d F64)) = do
1392 lbl <- getNewLabelNat
1393 let code dst = toOL [
1396 CmmStaticLit (CmmFloat d F64)],
1397 SETHI (HI (ImmCLbl lbl)) dst,
1398 LD F64 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1399 return (Any F64 code)
1401 getRegister (CmmMachOp mop [x]) -- unary MachOps
1403 MO_S_Neg F32 -> trivialUFCode F32 (FNEG F32) x
1404 MO_S_Neg F64 -> trivialUFCode F64 (FNEG F64) x
1406 MO_S_Neg rep -> trivialUCode rep (SUB False False g0) x
1407 MO_Not rep -> trivialUCode rep (XNOR False g0) x
1409 MO_U_Conv I32 I8 -> trivialCode I8 (AND False) x (CmmLit (CmmInt 255 I8))
1411 MO_U_Conv F64 F32-> coerceDbl2Flt x
1412 MO_U_Conv F32 F64-> coerceFlt2Dbl x
1414 MO_S_Conv F32 I32-> coerceFP2Int F32 I32 x
1415 MO_S_Conv I32 F32-> coerceInt2FP I32 F32 x
1416 MO_S_Conv F64 I32-> coerceFP2Int F64 I32 x
1417 MO_S_Conv I32 F64-> coerceInt2FP I32 F64 x
1419 -- Conversions which are a nop on sparc
1421 | from == to -> conversionNop to x
1422 MO_U_Conv I32 to -> conversionNop to x
1423 MO_S_Conv I32 to -> conversionNop to x
1426 MO_U_Conv I8 I32 -> integerExtend False I8 I32 x
1427 MO_U_Conv I16 I32 -> integerExtend False I16 I32 x
1428 MO_U_Conv I8 I16 -> integerExtend False I8 I16 x
1429 MO_S_Conv I16 I32 -> integerExtend True I16 I32 x
1431 other_op -> panic "Unknown unary mach op"
1434 integerExtend signed from to expr = do
1435 (reg, e_code) <- getSomeReg expr
1439 ((if signed then SRA else SRL)
1440 reg (RIImm (ImmInt 0)) dst)
1441 return (Any to code)
1442 conversionNop new_rep expr
1443 = do e_code <- getRegister expr
1444 return (swizzleRegisterRep e_code new_rep)
1446 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1448 MO_Eq F32 -> condFltReg EQQ x y
1449 MO_Ne F32 -> condFltReg NE x y
1451 MO_S_Gt F32 -> condFltReg GTT x y
1452 MO_S_Ge F32 -> condFltReg GE x y
1453 MO_S_Lt F32 -> condFltReg LTT x y
1454 MO_S_Le F32 -> condFltReg LE x y
1456 MO_Eq F64 -> condFltReg EQQ x y
1457 MO_Ne F64 -> condFltReg NE x y
1459 MO_S_Gt F64 -> condFltReg GTT x y
1460 MO_S_Ge F64 -> condFltReg GE x y
1461 MO_S_Lt F64 -> condFltReg LTT x y
1462 MO_S_Le F64 -> condFltReg LE x y
1464 MO_Eq rep -> condIntReg EQQ x y
1465 MO_Ne rep -> condIntReg NE x y
1467 MO_S_Gt rep -> condIntReg GTT x y
1468 MO_S_Ge rep -> condIntReg GE x y
1469 MO_S_Lt rep -> condIntReg LTT x y
1470 MO_S_Le rep -> condIntReg LE x y
1472 MO_U_Gt I32 -> condIntReg GTT x y
1473 MO_U_Ge I32 -> condIntReg GE x y
1474 MO_U_Lt I32 -> condIntReg LTT x y
1475 MO_U_Le I32 -> condIntReg LE x y
1477 MO_U_Gt I16 -> condIntReg GU x y
1478 MO_U_Ge I16 -> condIntReg GEU x y
1479 MO_U_Lt I16 -> condIntReg LU x y
1480 MO_U_Le I16 -> condIntReg LEU x y
1482 MO_Add I32 -> trivialCode I32 (ADD False False) x y
1483 MO_Sub I32 -> trivialCode I32 (SUB False False) x y
1485 MO_S_MulMayOflo rep -> imulMayOflo rep x y
1487 -- ToDo: teach about V8+ SPARC div instructions
1488 MO_S_Quot I32 -> idiv FSLIT(".div") x y
1489 MO_S_Rem I32 -> idiv FSLIT(".rem") x y
1490 MO_U_Quot I32 -> idiv FSLIT(".udiv") x y
1491 MO_U_Rem I32 -> idiv FSLIT(".urem") x y
1493 MO_Add F32 -> trivialFCode F32 FADD x y
1494 MO_Sub F32 -> trivialFCode F32 FSUB x y
1495 MO_Mul F32 -> trivialFCode F32 FMUL x y
1496 MO_S_Quot F32 -> trivialFCode F32 FDIV x y
1498 MO_Add F64 -> trivialFCode F64 FADD x y
1499 MO_Sub F64 -> trivialFCode F64 FSUB x y
1500 MO_Mul F64 -> trivialFCode F64 FMUL x y
1501 MO_S_Quot F64 -> trivialFCode F64 FDIV x y
1503 MO_And rep -> trivialCode rep (AND False) x y
1504 MO_Or rep -> trivialCode rep (OR False) x y
1505 MO_Xor rep -> trivialCode rep (XOR False) x y
1507 MO_Mul rep -> trivialCode rep (SMUL False) x y
1509 MO_Shl rep -> trivialCode rep SLL x y
1510 MO_U_Shr rep -> trivialCode rep SRL x y
1511 MO_S_Shr rep -> trivialCode rep SRA x y
1514 MO_F32_Pwr -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1515 [promote x, promote y])
1516 where promote x = CmmMachOp MO_F32_to_Dbl [x]
1517 MO_F64_Pwr -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1520 other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
1522 --idiv fn x y = getRegister (StCall (Left fn) CCallConv I32 [x, y])
1524 --------------------
1525 imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1526 imulMayOflo rep a b = do
1527 (a_reg, a_code) <- getSomeReg a
1528 (b_reg, b_code) <- getSomeReg b
1529 res_lo <- getNewRegNat I32
1530 res_hi <- getNewRegNat I32
1532 shift_amt = case rep of
1535 _ -> panic "shift_amt"
1536 code dst = a_code `appOL` b_code `appOL`
1538 SMUL False a_reg (RIReg b_reg) res_lo,
1540 SRA res_lo (RIImm (ImmInt shift_amt)) res_lo,
1541 SUB False False res_lo (RIReg res_hi) dst
1543 return (Any I32 code)
1545 getRegister (CmmLoad mem pk) = do
1546 Amode src code <- getAmode mem
1548 code__2 dst = code `snocOL` LD pk src dst
1549 return (Any pk code__2)
1551 getRegister (CmmLit (CmmInt i _))
1554 src = ImmInt (fromInteger i)
1555 code dst = unitOL (OR False g0 (RIImm src) dst)
1557 return (Any I32 code)
1559 getRegister (CmmLit lit)
1560 = let rep = cmmLitRep lit
1564 OR False dst (RIImm (LO imm)) dst]
1565 in return (Any I32 code)
1567 #endif /* sparc_TARGET_ARCH */
1569 #if powerpc_TARGET_ARCH
1570 getRegister (CmmLoad mem pk)
1573 Amode addr addr_code <- getAmode mem
1574 let code dst = ASSERT((regClass dst == RcDouble) == isFloatingRep pk)
1575 addr_code `snocOL` LD pk dst addr
1576 return (Any pk code)
1578 -- catch simple cases of zero- or sign-extended load
1579 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad mem _]) = do
1580 Amode addr addr_code <- getAmode mem
1581 return (Any I32 (\dst -> addr_code `snocOL` LD I8 dst addr))
1583 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
1585 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad mem _]) = do
1586 Amode addr addr_code <- getAmode mem
1587 return (Any I32 (\dst -> addr_code `snocOL` LD I16 dst addr))
1589 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad mem _]) = do
1590 Amode addr addr_code <- getAmode mem
1591 return (Any I32 (\dst -> addr_code `snocOL` LA I16 dst addr))
1593 getRegister (CmmMachOp mop [x]) -- unary MachOps
1595 MO_Not rep -> trivialUCode rep NOT x
1597 MO_S_Conv F64 F32 -> trivialUCode F32 FRSP x
1598 MO_S_Conv F32 F64 -> conversionNop F64 x
1601 | from == to -> conversionNop to x
1602 | isFloatingRep from -> coerceFP2Int from to x
1603 | isFloatingRep to -> coerceInt2FP from to x
1605 -- narrowing is a nop: we treat the high bits as undefined
1606 MO_S_Conv I32 to -> conversionNop to x
1607 MO_S_Conv I16 I8 -> conversionNop I8 x
1608 MO_S_Conv I8 to -> trivialUCode to (EXTS I8) x
1609 MO_S_Conv I16 to -> trivialUCode to (EXTS I16) x
1612 | from == to -> conversionNop to x
1613 -- narrowing is a nop: we treat the high bits as undefined
1614 MO_U_Conv I32 to -> conversionNop to x
1615 MO_U_Conv I16 I8 -> conversionNop I8 x
1616 MO_U_Conv I8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 I32))
1617 MO_U_Conv I16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 I32))
1619 MO_S_Neg F32 -> trivialUCode F32 FNEG x
1620 MO_S_Neg F64 -> trivialUCode F64 FNEG x
1621 MO_S_Neg rep -> trivialUCode rep NEG x
1624 conversionNop new_rep expr
1625 = do e_code <- getRegister expr
1626 return (swizzleRegisterRep e_code new_rep)
1628 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1630 MO_Eq F32 -> condFltReg EQQ x y
1631 MO_Ne F32 -> condFltReg NE x y
1633 MO_S_Gt F32 -> condFltReg GTT x y
1634 MO_S_Ge F32 -> condFltReg GE x y
1635 MO_S_Lt F32 -> condFltReg LTT x y
1636 MO_S_Le F32 -> condFltReg LE x y
1638 MO_Eq F64 -> condFltReg EQQ x y
1639 MO_Ne F64 -> condFltReg NE x y
1641 MO_S_Gt F64 -> condFltReg GTT x y
1642 MO_S_Ge F64 -> condFltReg GE x y
1643 MO_S_Lt F64 -> condFltReg LTT x y
1644 MO_S_Le F64 -> condFltReg LE x y
1646 MO_Eq rep -> condIntReg EQQ (extendUExpr rep x) (extendUExpr rep y)
1647 MO_Ne rep -> condIntReg NE (extendUExpr rep x) (extendUExpr rep y)
1649 MO_S_Gt rep -> condIntReg GTT (extendSExpr rep x) (extendSExpr rep y)
1650 MO_S_Ge rep -> condIntReg GE (extendSExpr rep x) (extendSExpr rep y)
1651 MO_S_Lt rep -> condIntReg LTT (extendSExpr rep x) (extendSExpr rep y)
1652 MO_S_Le rep -> condIntReg LE (extendSExpr rep x) (extendSExpr rep y)
1654 MO_U_Gt rep -> condIntReg GU (extendUExpr rep x) (extendUExpr rep y)
1655 MO_U_Ge rep -> condIntReg GEU (extendUExpr rep x) (extendUExpr rep y)
1656 MO_U_Lt rep -> condIntReg LU (extendUExpr rep x) (extendUExpr rep y)
1657 MO_U_Le rep -> condIntReg LEU (extendUExpr rep x) (extendUExpr rep y)
1659 MO_Add F32 -> trivialCodeNoImm F32 (FADD F32) x y
1660 MO_Sub F32 -> trivialCodeNoImm F32 (FSUB F32) x y
1661 MO_Mul F32 -> trivialCodeNoImm F32 (FMUL F32) x y
1662 MO_S_Quot F32 -> trivialCodeNoImm F32 (FDIV F32) x y
1664 MO_Add F64 -> trivialCodeNoImm F64 (FADD F64) x y
1665 MO_Sub F64 -> trivialCodeNoImm F64 (FSUB F64) x y
1666 MO_Mul F64 -> trivialCodeNoImm F64 (FMUL F64) x y
1667 MO_S_Quot F64 -> trivialCodeNoImm F64 (FDIV F64) x y
1669 -- optimize addition with 32-bit immediate
1673 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate I32 True (-imm)
1674 -> trivialCode I32 True ADD x (CmmLit $ CmmInt imm immrep)
1677 (src, srcCode) <- getSomeReg x
1678 let imm = litToImm lit
1679 code dst = srcCode `appOL` toOL [
1680 ADDIS dst src (HA imm),
1681 ADD dst dst (RIImm (LO imm))
1683 return (Any I32 code)
1684 _ -> trivialCode I32 True ADD x y
1686 MO_Add rep -> trivialCode rep True ADD x y
1688 case y of -- subfi ('substract from' with immediate) doesn't exist
1689 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
1690 -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
1691 _ -> trivialCodeNoImm rep SUBF y x
1693 MO_Mul rep -> trivialCode rep True MULLW x y
1695 MO_S_MulMayOflo I32 -> trivialCodeNoImm I32 MULLW_MayOflo x y
1697 MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= I32): not implemented"
1698 MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
1700 MO_S_Quot rep -> trivialCodeNoImm rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1701 MO_U_Quot rep -> trivialCodeNoImm rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1703 MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1704 MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1706 MO_And rep -> trivialCode rep False AND x y
1707 MO_Or rep -> trivialCode rep False OR x y
1708 MO_Xor rep -> trivialCode rep False XOR x y
1710 MO_Shl rep -> trivialCode rep False SLW x y
1711 MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
1712 MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
1714 getRegister (CmmLit (CmmInt i rep))
1715 | Just imm <- makeImmediate rep True i
1717 code dst = unitOL (LI dst imm)
1719 return (Any rep code)
1721 getRegister (CmmLit (CmmFloat f frep)) = do
1722 lbl <- getNewLabelNat
1723 dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
1724 Amode addr addr_code <- getAmode dynRef
1726 LDATA ReadOnlyData [CmmDataLabel lbl,
1727 CmmStaticLit (CmmFloat f frep)]
1728 `consOL` (addr_code `snocOL` LD frep dst addr)
1729 return (Any frep code)
1731 getRegister (CmmLit lit)
1732 = let rep = cmmLitRep lit
1736 OR dst dst (RIImm (LO imm))
1738 in return (Any rep code)
1740 getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
1742 -- extend?Rep: wrap integer expression of type rep
1743 -- in a conversion to I32
1744 extendSExpr I32 x = x
1745 extendSExpr rep x = CmmMachOp (MO_S_Conv rep I32) [x]
1746 extendUExpr I32 x = x
1747 extendUExpr rep x = CmmMachOp (MO_U_Conv rep I32) [x]
1749 #endif /* powerpc_TARGET_ARCH */
1752 -- -----------------------------------------------------------------------------
1753 -- The 'Amode' type: Memory addressing modes passed up the tree.
1755 data Amode = Amode AddrMode InstrBlock
1758 Now, given a tree (the argument to an CmmLoad) that references memory,
1759 produce a suitable addressing mode.
1761 A Rule of the Game (tm) for Amodes: use of the addr bit must
1762 immediately follow use of the code part, since the code part puts
1763 values in registers which the addr then refers to. So you can't put
1764 anything in between, lest it overwrite some of those registers. If
1765 you need to do some other computation between the code part and use of
1766 the addr bit, first store the effective address from the amode in a
1767 temporary, then do the other computation, and then use the temporary:
1771 ... other computation ...
1775 getAmode :: CmmExpr -> NatM Amode
1776 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
1778 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1780 #if alpha_TARGET_ARCH
1782 getAmode (StPrim IntSubOp [x, StInt i])
1783 = getNewRegNat PtrRep `thenNat` \ tmp ->
1784 getRegister x `thenNat` \ register ->
1786 code = registerCode register tmp
1787 reg = registerName register tmp
1788 off = ImmInt (-(fromInteger i))
1790 return (Amode (AddrRegImm reg off) code)
1792 getAmode (StPrim IntAddOp [x, StInt i])
1793 = getNewRegNat PtrRep `thenNat` \ tmp ->
1794 getRegister x `thenNat` \ register ->
1796 code = registerCode register tmp
1797 reg = registerName register tmp
1798 off = ImmInt (fromInteger i)
1800 return (Amode (AddrRegImm reg off) code)
1804 = return (Amode (AddrImm imm__2) id)
1807 imm__2 = case imm of Just x -> x
1810 = getNewRegNat PtrRep `thenNat` \ tmp ->
1811 getRegister other `thenNat` \ register ->
1813 code = registerCode register tmp
1814 reg = registerName register tmp
1816 return (Amode (AddrReg reg) code)
1818 #endif /* alpha_TARGET_ARCH */
1820 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1822 #if x86_64_TARGET_ARCH
1824 getAmode (CmmMachOp (MO_Add I64) [CmmReg (CmmGlobal PicBaseReg),
1825 CmmLit displacement])
1826 = return $ Amode (ripRel (litToImm displacement)) nilOL
1830 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1832 -- This is all just ridiculous, since it carefully undoes
1833 -- what mangleIndexTree has just done.
1834 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
1835 | not (is64BitLit lit)
1836 -- ASSERT(rep == I32)???
1837 = do (x_reg, x_code) <- getSomeReg x
1838 let off = ImmInt (-(fromInteger i))
1839 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1841 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
1842 | not (is64BitLit lit)
1843 -- ASSERT(rep == I32)???
1844 = do (x_reg, x_code) <- getSomeReg x
1845 let off = ImmInt (fromInteger i)
1846 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1848 -- Turn (lit1 << n + lit2) into (lit2 + lit1 << n) so it will be
1849 -- recognised by the next rule.
1850 getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1852 = getAmode (CmmMachOp (MO_Add rep) [b,a])
1854 getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
1855 [y, CmmLit (CmmInt shift _)]])
1856 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1857 = x86_complex_amode x y shift 0
1859 getAmode (CmmMachOp (MO_Add rep)
1860 [x, CmmMachOp (MO_Add _)
1861 [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)],
1862 CmmLit (CmmInt offset _)]])
1863 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1864 && not (is64BitInteger offset)
1865 = x86_complex_amode x y shift offset
1867 getAmode (CmmMachOp (MO_Add rep) [x,y])
1868 = x86_complex_amode x y 0 0
1870 getAmode (CmmLit lit) | not (is64BitLit lit)
1871 = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1874 (reg,code) <- getSomeReg expr
1875 return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
1878 x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
1879 x86_complex_amode base index shift offset
1880 = do (x_reg, x_code) <- getNonClobberedReg base
1881 -- x must be in a temp, because it has to stay live over y_code
1882 -- we could compre x_reg and y_reg and do something better here...
1883 (y_reg, y_code) <- getSomeReg index
1885 code = x_code `appOL` y_code
1886 base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
1887 return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset)))
1890 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1892 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1894 #if sparc_TARGET_ARCH
1896 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)])
1899 (reg, code) <- getSomeReg x
1901 off = ImmInt (-(fromInteger i))
1902 return (Amode (AddrRegImm reg off) code)
1905 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)])
1908 (reg, code) <- getSomeReg x
1910 off = ImmInt (fromInteger i)
1911 return (Amode (AddrRegImm reg off) code)
1913 getAmode (CmmMachOp (MO_Add rep) [x, y])
1915 (regX, codeX) <- getSomeReg x
1916 (regY, codeY) <- getSomeReg y
1918 code = codeX `appOL` codeY
1919 return (Amode (AddrRegReg regX regY) code)
1921 -- XXX Is this same as "leaf" in Stix?
1922 getAmode (CmmLit lit)
1924 tmp <- getNewRegNat I32
1926 code = unitOL (SETHI (HI imm__2) tmp)
1927 return (Amode (AddrRegImm tmp (LO imm__2)) code)
1929 imm__2 = litToImm lit
1933 (reg, code) <- getSomeReg other
1936 return (Amode (AddrRegImm reg off) code)
1938 #endif /* sparc_TARGET_ARCH */
1940 #ifdef powerpc_TARGET_ARCH
1941 getAmode (CmmMachOp (MO_Sub I32) [x, CmmLit (CmmInt i _)])
1942 | Just off <- makeImmediate I32 True (-i)
1944 (reg, code) <- getSomeReg x
1945 return (Amode (AddrRegImm reg off) code)
1948 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit (CmmInt i _)])
1949 | Just off <- makeImmediate I32 True i
1951 (reg, code) <- getSomeReg x
1952 return (Amode (AddrRegImm reg off) code)
1954 -- optimize addition with 32-bit immediate
1956 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit lit])
1958 tmp <- getNewRegNat I32
1959 (src, srcCode) <- getSomeReg x
1960 let imm = litToImm lit
1961 code = srcCode `snocOL` ADDIS tmp src (HA imm)
1962 return (Amode (AddrRegImm tmp (LO imm)) code)
1964 getAmode (CmmLit lit)
1966 tmp <- getNewRegNat I32
1967 let imm = litToImm lit
1968 code = unitOL (LIS tmp (HA imm))
1969 return (Amode (AddrRegImm tmp (LO imm)) code)
1971 getAmode (CmmMachOp (MO_Add I32) [x, y])
1973 (regX, codeX) <- getSomeReg x
1974 (regY, codeY) <- getSomeReg y
1975 return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
1979 (reg, code) <- getSomeReg other
1982 return (Amode (AddrRegImm reg off) code)
1983 #endif /* powerpc_TARGET_ARCH */
1985 -- -----------------------------------------------------------------------------
1986 -- getOperand: sometimes any operand will do.
1988 -- getNonClobberedOperand: the value of the operand will remain valid across
1989 -- the computation of an arbitrary expression, unless the expression
1990 -- is computed directly into a register which the operand refers to
1991 -- (see trivialCode where this function is used for an example).
1993 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1995 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1996 #if x86_64_TARGET_ARCH
1997 getNonClobberedOperand (CmmLit lit)
1998 | isSuitableFloatingPointLit lit = do
1999 lbl <- getNewLabelNat
2000 let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
2002 return (OpAddr (ripRel (ImmCLbl lbl)), code)
2004 getNonClobberedOperand (CmmLit lit)
2005 | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) =
2006 return (OpImm (litToImm lit), nilOL)
2007 getNonClobberedOperand (CmmLoad mem pk)
2008 | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2009 Amode src mem_code <- getAmode mem
2011 if (amodeCouldBeClobbered src)
2013 tmp <- getNewRegNat wordRep
2014 return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
2015 unitOL (LEA I32 (OpAddr src) (OpReg tmp)))
2018 return (OpAddr src', save_code `appOL` mem_code)
2019 getNonClobberedOperand e = do
2020 (reg, code) <- getNonClobberedReg e
2021 return (OpReg reg, code)
2023 amodeCouldBeClobbered :: AddrMode -> Bool
2024 amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
2026 regClobbered (RealReg rr) = isFastTrue (freeReg rr)
2027 regClobbered _ = False
2029 -- getOperand: the operand is not required to remain valid across the
2030 -- computation of an arbitrary expression.
2031 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2032 #if x86_64_TARGET_ARCH
2033 getOperand (CmmLit lit)
2034 | isSuitableFloatingPointLit lit = do
2035 lbl <- getNewLabelNat
2036 let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
2038 return (OpAddr (ripRel (ImmCLbl lbl)), code)
2040 getOperand (CmmLit lit)
2041 | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) = do
2042 return (OpImm (litToImm lit), nilOL)
2043 getOperand (CmmLoad mem pk)
2044 | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2045 Amode src mem_code <- getAmode mem
2046 return (OpAddr src, mem_code)
2048 (reg, code) <- getSomeReg e
2049 return (OpReg reg, code)
2051 isOperand :: CmmExpr -> Bool
2052 isOperand (CmmLoad _ _) = True
2053 isOperand (CmmLit lit) = not (is64BitLit lit)
2054 || isSuitableFloatingPointLit lit
2057 -- if we want a floating-point literal as an operand, we can
2058 -- use it directly from memory. However, if the literal is
2059 -- zero, we're better off generating it into a register using
2061 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
2062 isSuitableFloatingPointLit _ = False
2064 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
2065 getRegOrMem (CmmLoad mem pk)
2066 | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2067 Amode src mem_code <- getAmode mem
2068 return (OpAddr src, mem_code)
2070 (reg, code) <- getNonClobberedReg e
2071 return (OpReg reg, code)
2073 #if x86_64_TARGET_ARCH
2074 is64BitLit (CmmInt i I64) = is64BitInteger i
2075 -- assume that labels are in the range 0-2^31-1: this assumes the
2076 -- small memory model (see gcc docs, -mcmodel=small).
2078 is64BitLit x = False
2081 is64BitInteger :: Integer -> Bool
2082 is64BitInteger i = i64 > 0x7fffffff || i64 < -0x80000000
2083 where i64 = fromIntegral i :: Int64
2084 -- a CmmInt is intended to be truncated to the appropriate
2085 -- number of bits, so here we truncate it to Int64. This is
2086 -- important because e.g. -1 as a CmmInt might be either
2087 -- -1 or 18446744073709551615.
2089 -- -----------------------------------------------------------------------------
2090 -- The 'CondCode' type: Condition codes passed up the tree.
2092 data CondCode = CondCode Bool Cond InstrBlock
2094 -- Set up a condition code for a conditional branch.
2096 getCondCode :: CmmExpr -> NatM CondCode
2098 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2100 #if alpha_TARGET_ARCH
2101 getCondCode = panic "MachCode.getCondCode: not on Alphas"
2102 #endif /* alpha_TARGET_ARCH */
2104 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2106 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
2107 -- yes, they really do seem to want exactly the same!
2109 getCondCode (CmmMachOp mop [x, y])
2112 MO_Eq F32 -> condFltCode EQQ x y
2113 MO_Ne F32 -> condFltCode NE x y
2115 MO_S_Gt F32 -> condFltCode GTT x y
2116 MO_S_Ge F32 -> condFltCode GE x y
2117 MO_S_Lt F32 -> condFltCode LTT x y
2118 MO_S_Le F32 -> condFltCode LE x y
2120 MO_Eq F64 -> condFltCode EQQ x y
2121 MO_Ne F64 -> condFltCode NE x y
2123 MO_S_Gt F64 -> condFltCode GTT x y
2124 MO_S_Ge F64 -> condFltCode GE x y
2125 MO_S_Lt F64 -> condFltCode LTT x y
2126 MO_S_Le F64 -> condFltCode LE x y
2128 MO_Eq rep -> condIntCode EQQ x y
2129 MO_Ne rep -> condIntCode NE x y
2131 MO_S_Gt rep -> condIntCode GTT x y
2132 MO_S_Ge rep -> condIntCode GE x y
2133 MO_S_Lt rep -> condIntCode LTT x y
2134 MO_S_Le rep -> condIntCode LE x y
2136 MO_U_Gt rep -> condIntCode GU x y
2137 MO_U_Ge rep -> condIntCode GEU x y
2138 MO_U_Lt rep -> condIntCode LU x y
2139 MO_U_Le rep -> condIntCode LEU x y
2141 other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y]))
2143 getCondCode other = pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
2145 #elif powerpc_TARGET_ARCH
2147 -- almost the same as everywhere else - but we need to
2148 -- extend small integers to 32 bit first
2150 getCondCode (CmmMachOp mop [x, y])
2152 MO_Eq F32 -> condFltCode EQQ x y
2153 MO_Ne F32 -> condFltCode NE x y
2155 MO_S_Gt F32 -> condFltCode GTT x y
2156 MO_S_Ge F32 -> condFltCode GE x y
2157 MO_S_Lt F32 -> condFltCode LTT x y
2158 MO_S_Le F32 -> condFltCode LE x y
2160 MO_Eq F64 -> condFltCode EQQ x y
2161 MO_Ne F64 -> condFltCode NE x y
2163 MO_S_Gt F64 -> condFltCode GTT x y
2164 MO_S_Ge F64 -> condFltCode GE x y
2165 MO_S_Lt F64 -> condFltCode LTT x y
2166 MO_S_Le F64 -> condFltCode LE x y
2168 MO_Eq rep -> condIntCode EQQ (extendUExpr rep x) (extendUExpr rep y)
2169 MO_Ne rep -> condIntCode NE (extendUExpr rep x) (extendUExpr rep y)
2171 MO_S_Gt rep -> condIntCode GTT (extendSExpr rep x) (extendSExpr rep y)
2172 MO_S_Ge rep -> condIntCode GE (extendSExpr rep x) (extendSExpr rep y)
2173 MO_S_Lt rep -> condIntCode LTT (extendSExpr rep x) (extendSExpr rep y)
2174 MO_S_Le rep -> condIntCode LE (extendSExpr rep x) (extendSExpr rep y)
2176 MO_U_Gt rep -> condIntCode GU (extendUExpr rep x) (extendUExpr rep y)
2177 MO_U_Ge rep -> condIntCode GEU (extendUExpr rep x) (extendUExpr rep y)
2178 MO_U_Lt rep -> condIntCode LU (extendUExpr rep x) (extendUExpr rep y)
2179 MO_U_Le rep -> condIntCode LEU (extendUExpr rep x) (extendUExpr rep y)
2181 other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
2183 getCondCode other = panic "getCondCode(2)(powerpc)"
2189 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
2190 -- passed back up the tree.
2192 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
2194 #if alpha_TARGET_ARCH
2195 condIntCode = panic "MachCode.condIntCode: not on Alphas"
2196 condFltCode = panic "MachCode.condFltCode: not on Alphas"
2197 #endif /* alpha_TARGET_ARCH */
2199 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2200 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2202 -- memory vs immediate
2203 condIntCode cond (CmmLoad x pk) (CmmLit lit) | not (is64BitLit lit) = do
2204 Amode x_addr x_code <- getAmode x
2207 code = x_code `snocOL`
2208 CMP pk (OpImm imm) (OpAddr x_addr)
2210 return (CondCode False cond code)
2213 condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
2214 (x_reg, x_code) <- getSomeReg x
2216 code = x_code `snocOL`
2217 TEST pk (OpReg x_reg) (OpReg x_reg)
2219 return (CondCode False cond code)
2221 -- anything vs operand
2222 condIntCode cond x y | isOperand y = do
2223 (x_reg, x_code) <- getNonClobberedReg x
2224 (y_op, y_code) <- getOperand y
2226 code = x_code `appOL` y_code `snocOL`
2227 CMP (cmmExprRep x) y_op (OpReg x_reg)
2229 return (CondCode False cond code)
2231 -- anything vs anything
2232 condIntCode cond x y = do
2233 (y_reg, y_code) <- getNonClobberedReg y
2234 (x_op, x_code) <- getRegOrMem x
2236 code = y_code `appOL`
2238 CMP (cmmExprRep x) (OpReg y_reg) x_op
2240 return (CondCode False cond code)
2243 #if i386_TARGET_ARCH
2244 condFltCode cond x y
2245 = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
2246 (x_reg, x_code) <- getNonClobberedReg x
2247 (y_reg, y_code) <- getSomeReg y
2249 code = x_code `appOL` y_code `snocOL`
2250 GCMP cond x_reg y_reg
2251 -- The GCMP insn does the test and sets the zero flag if comparable
2252 -- and true. Hence we always supply EQQ as the condition to test.
2253 return (CondCode True EQQ code)
2254 #endif /* i386_TARGET_ARCH */
2256 #if x86_64_TARGET_ARCH
2257 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
2258 -- an operand, but the right must be a reg. We can probably do better
2259 -- than this general case...
2260 condFltCode cond x y = do
2261 (x_reg, x_code) <- getNonClobberedReg x
2262 (y_op, y_code) <- getOperand y
2264 code = x_code `appOL`
2266 CMP (cmmExprRep x) y_op (OpReg x_reg)
2267 -- NB(1): we need to use the unsigned comparison operators on the
2268 -- result of this comparison.
2270 return (CondCode True (condToUnsigned cond) code)
2273 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2275 #if sparc_TARGET_ARCH
2277 condIntCode cond x (CmmLit (CmmInt y rep))
2280 (src1, code) <- getSomeReg x
2282 src2 = ImmInt (fromInteger y)
2283 code' = code `snocOL` SUB False True src1 (RIImm src2) g0
2284 return (CondCode False cond code')
2286 condIntCode cond x y = do
2287 (src1, code1) <- getSomeReg x
2288 (src2, code2) <- getSomeReg y
2290 code__2 = code1 `appOL` code2 `snocOL`
2291 SUB False True src1 (RIReg src2) g0
2292 return (CondCode False cond code__2)
2295 condFltCode cond x y = do
2296 (src1, code1) <- getSomeReg x
2297 (src2, code2) <- getSomeReg y
2298 tmp <- getNewRegNat F64
2300 promote x = FxTOy F32 F64 x tmp
2307 code1 `appOL` code2 `snocOL`
2308 FCMP True pk1 src1 src2
2309 else if pk1 == F32 then
2310 code1 `snocOL` promote src1 `appOL` code2 `snocOL`
2311 FCMP True F64 tmp src2
2313 code1 `appOL` code2 `snocOL` promote src2 `snocOL`
2314 FCMP True F64 src1 tmp
2315 return (CondCode True cond code__2)
2317 #endif /* sparc_TARGET_ARCH */
2319 #if powerpc_TARGET_ARCH
2320 -- ###FIXME: I16 and I8!
2321 condIntCode cond x (CmmLit (CmmInt y rep))
2322 | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
2324 (src1, code) <- getSomeReg x
2326 code' = code `snocOL`
2327 (if condUnsigned cond then CMPL else CMP) I32 src1 (RIImm src2)
2328 return (CondCode False cond code')
2330 condIntCode cond x y = do
2331 (src1, code1) <- getSomeReg x
2332 (src2, code2) <- getSomeReg y
2334 code' = code1 `appOL` code2 `snocOL`
2335 (if condUnsigned cond then CMPL else CMP) I32 src1 (RIReg src2)
2336 return (CondCode False cond code')
2338 condFltCode cond x y = do
2339 (src1, code1) <- getSomeReg x
2340 (src2, code2) <- getSomeReg y
2342 code' = code1 `appOL` code2 `snocOL` FCMP src1 src2
2343 code'' = case cond of -- twiddle CR to handle unordered case
2344 GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
2345 LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
2348 ltbit = 0 ; eqbit = 2 ; gtbit = 1
2349 return (CondCode True cond code'')
2351 #endif /* powerpc_TARGET_ARCH */
2353 -- -----------------------------------------------------------------------------
2354 -- Generating assignments
2356 -- Assignments are really at the heart of the whole code generation
2357 -- business. Almost all top-level nodes of any real importance are
2358 -- assignments, which correspond to loads, stores, or register
2359 -- transfers. If we're really lucky, some of the register transfers
2360 -- will go away, because we can use the destination register to
2361 -- complete the code generation for the right hand side. This only
2362 -- fails when the right hand side is forced into a fixed register
2363 -- (e.g. the result of a call).
2365 assignMem_IntCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2366 assignReg_IntCode :: MachRep -> CmmReg -> CmmExpr -> NatM InstrBlock
2368 assignMem_FltCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2369 assignReg_FltCode :: MachRep -> CmmReg -> CmmExpr -> NatM InstrBlock
2371 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2373 #if alpha_TARGET_ARCH
2375 assignIntCode pk (CmmLoad dst _) src
2376 = getNewRegNat IntRep `thenNat` \ tmp ->
2377 getAmode dst `thenNat` \ amode ->
2378 getRegister src `thenNat` \ register ->
2380 code1 = amodeCode amode []
2381 dst__2 = amodeAddr amode
2382 code2 = registerCode register tmp []
2383 src__2 = registerName register tmp
2384 sz = primRepToSize pk
2385 code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2389 assignIntCode pk dst src
2390 = getRegister dst `thenNat` \ register1 ->
2391 getRegister src `thenNat` \ register2 ->
2393 dst__2 = registerName register1 zeroh
2394 code = registerCode register2 dst__2
2395 src__2 = registerName register2 dst__2
2396 code__2 = if isFixed register2
2397 then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2)
2402 #endif /* alpha_TARGET_ARCH */
2404 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2406 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2408 -- integer assignment to memory
2410 -- specific case of adding/subtracting an integer to a particular address.
2411 -- ToDo: catch other cases where we can use an operation directly on a memory
2413 assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _,
2414 CmmLit (CmmInt i _)])
2415 | addr == addr2, pk /= I64 || not (is64BitInteger i),
2416 Just instr <- check op
2417 = do Amode amode code_addr <- getAmode addr
2418 let code = code_addr `snocOL`
2419 instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode)
2422 check (MO_Add _) = Just ADD
2423 check (MO_Sub _) = Just SUB
2428 assignMem_IntCode pk addr src = do
2429 Amode addr code_addr <- getAmode addr
2430 (code_src, op_src) <- get_op_RI src
2432 code = code_src `appOL`
2434 MOV pk op_src (OpAddr addr)
2435 -- NOTE: op_src is stable, so it will still be valid
2436 -- after code_addr. This may involve the introduction
2437 -- of an extra MOV to a temporary register, but we hope
2438 -- the register allocator will get rid of it.
2442 get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand) -- code, operator
2443 get_op_RI (CmmLit lit) | not (is64BitLit lit)
2444 = return (nilOL, OpImm (litToImm lit))
2446 = do (reg,code) <- getNonClobberedReg op
2447 return (code, OpReg reg)
2450 -- Assign; dst is a reg, rhs is mem
2451 assignReg_IntCode pk reg (CmmLoad src _) = do
2452 load_code <- intLoadCode (MOV pk) src
2453 return (load_code (getRegisterReg reg))
2455 -- dst is a reg, but src could be anything
2456 assignReg_IntCode pk reg src = do
2457 code <- getAnyReg src
2458 return (code (getRegisterReg reg))
2460 #endif /* i386_TARGET_ARCH */
2462 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2464 #if sparc_TARGET_ARCH
2466 assignMem_IntCode pk addr src = do
2467 (srcReg, code) <- getSomeReg src
2468 Amode dstAddr addr_code <- getAmode addr
2469 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2471 assignReg_IntCode pk reg src = do
2472 r <- getRegister src
2474 Any _ code -> code dst
2475 Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg dst) freg
2477 dst = getRegisterReg reg
2480 #endif /* sparc_TARGET_ARCH */
2482 #if powerpc_TARGET_ARCH
2484 assignMem_IntCode pk addr src = do
2485 (srcReg, code) <- getSomeReg src
2486 Amode dstAddr addr_code <- getAmode addr
2487 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2489 -- dst is a reg, but src could be anything
2490 assignReg_IntCode pk reg src
2492 r <- getRegister src
2494 Any _ code -> code dst
2495 Fixed _ freg fcode -> fcode `snocOL` MR dst freg
2497 dst = getRegisterReg reg
2499 #endif /* powerpc_TARGET_ARCH */
2502 -- -----------------------------------------------------------------------------
2503 -- Floating-point assignments
2505 #if alpha_TARGET_ARCH
2507 assignFltCode pk (CmmLoad dst _) src
2508 = getNewRegNat pk `thenNat` \ tmp ->
2509 getAmode dst `thenNat` \ amode ->
2510 getRegister src `thenNat` \ register ->
2512 code1 = amodeCode amode []
2513 dst__2 = amodeAddr amode
2514 code2 = registerCode register tmp []
2515 src__2 = registerName register tmp
2516 sz = primRepToSize pk
2517 code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2521 assignFltCode pk dst src
2522 = getRegister dst `thenNat` \ register1 ->
2523 getRegister src `thenNat` \ register2 ->
2525 dst__2 = registerName register1 zeroh
2526 code = registerCode register2 dst__2
2527 src__2 = registerName register2 dst__2
2528 code__2 = if isFixed register2
2529 then code . mkSeqInstr (FMOV src__2 dst__2)
2534 #endif /* alpha_TARGET_ARCH */
2536 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2538 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2540 -- Floating point assignment to memory
2541 assignMem_FltCode pk addr src = do
2542 (src_reg, src_code) <- getNonClobberedReg src
2543 Amode addr addr_code <- getAmode addr
2545 code = src_code `appOL`
2547 IF_ARCH_i386(GST pk src_reg addr,
2548 MOV pk (OpReg src_reg) (OpAddr addr))
2551 -- Floating point assignment to a register/temporary
2552 assignReg_FltCode pk reg src = do
2553 src_code <- getAnyReg src
2554 return (src_code (getRegisterReg reg))
2556 #endif /* i386_TARGET_ARCH */
2558 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2560 #if sparc_TARGET_ARCH
2562 -- Floating point assignment to memory
2563 assignMem_FltCode pk addr src = do
2564 Amode dst__2 code1 <- getAmode addr
2565 (src__2, code2) <- getSomeReg src
2566 tmp1 <- getNewRegNat pk
2568 pk__2 = cmmExprRep src
2569 code__2 = code1 `appOL` code2 `appOL`
2571 then unitOL (ST pk src__2 dst__2)
2572 else toOL [FxTOy pk__2 pk src__2 tmp1, ST pk tmp1 dst__2]
2575 -- Floating point assignment to a register/temporary
2576 -- ToDo: Verify correctness
2577 assignReg_FltCode pk reg src = do
2578 r <- getRegister src
2579 v1 <- getNewRegNat pk
2581 Any _ code -> code dst
2582 Fixed _ freg fcode -> fcode `snocOL` FMOV pk freg v1
2584 dst = getRegisterReg reg
2586 #endif /* sparc_TARGET_ARCH */
2588 #if powerpc_TARGET_ARCH
2591 assignMem_FltCode = assignMem_IntCode
2592 assignReg_FltCode = assignReg_IntCode
2594 #endif /* powerpc_TARGET_ARCH */
2597 -- -----------------------------------------------------------------------------
2598 -- Generating an non-local jump
2600 -- (If applicable) Do not fill the delay slots here; you will confuse the
2601 -- register allocator.
2603 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
2605 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2607 #if alpha_TARGET_ARCH
2609 genJump (CmmLabel lbl)
2610 | isAsmTemp lbl = returnInstr (BR target)
2611 | otherwise = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0]
2613 target = ImmCLbl lbl
2616 = getRegister tree `thenNat` \ register ->
2617 getNewRegNat PtrRep `thenNat` \ tmp ->
2619 dst = registerName register pv
2620 code = registerCode register pv
2621 target = registerName register pv
2623 if isFixed register then
2624 returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0]
2626 return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0))
2628 #endif /* alpha_TARGET_ARCH */
2630 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2632 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2634 genJump (CmmLoad mem pk) = do
2635 Amode target code <- getAmode mem
2636 return (code `snocOL` JMP (OpAddr target))
2638 genJump (CmmLit lit) = do
2639 return (unitOL (JMP (OpImm (litToImm lit))))
2642 (reg,code) <- getSomeReg expr
2643 return (code `snocOL` JMP (OpReg reg))
2645 #endif /* i386_TARGET_ARCH */
2647 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2649 #if sparc_TARGET_ARCH
2651 genJump (CmmLit (CmmLabel lbl))
2652 = return (toOL [CALL (Left target) 0 True, NOP])
2654 target = ImmCLbl lbl
2658 (target, code) <- getSomeReg tree
2659 return (code `snocOL` JMP (AddrRegReg target g0) `snocOL` NOP)
2661 #endif /* sparc_TARGET_ARCH */
2663 #if powerpc_TARGET_ARCH
2664 genJump (CmmLit (CmmLabel lbl))
2665 = return (unitOL $ JMP lbl)
2669 (target,code) <- getSomeReg tree
2670 return (code `snocOL` MTCTR target `snocOL` BCTR [])
2671 #endif /* powerpc_TARGET_ARCH */
2674 -- -----------------------------------------------------------------------------
2675 -- Unconditional branches
2677 genBranch :: BlockId -> NatM InstrBlock
2679 genBranch = return . toOL . mkBranchInstr
2681 -- -----------------------------------------------------------------------------
2682 -- Conditional jumps
2685 Conditional jumps are always to local labels, so we can use branch
2686 instructions. We peek at the arguments to decide what kind of
2689 ALPHA: For comparisons with 0, we're laughing, because we can just do
2690 the desired conditional branch.
2692 I386: First, we have to ensure that the condition
2693 codes are set according to the supplied comparison operation.
2695 SPARC: First, we have to ensure that the condition codes are set
2696 according to the supplied comparison operation. We generate slightly
2697 different code for floating point comparisons, because a floating
2698 point operation cannot directly precede a @BF@. We assume the worst
2699 and fill that slot with a @NOP@.
2701 SPARC: Do not fill the delay slots here; you will confuse the register
2707 :: BlockId -- the branch target
2708 -> CmmExpr -- the condition on which to branch
2711 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2713 #if alpha_TARGET_ARCH
2715 genCondJump id (StPrim op [x, StInt 0])
2716 = getRegister x `thenNat` \ register ->
2717 getNewRegNat (registerRep register)
2720 code = registerCode register tmp
2721 value = registerName register tmp
2722 pk = registerRep register
2723 target = ImmCLbl lbl
2725 returnSeq code [BI (cmpOp op) value target]
2727 cmpOp CharGtOp = GTT
2729 cmpOp CharEqOp = EQQ
2731 cmpOp CharLtOp = LTT
2740 cmpOp WordGeOp = ALWAYS
2741 cmpOp WordEqOp = EQQ
2743 cmpOp WordLtOp = NEVER
2744 cmpOp WordLeOp = EQQ
2746 cmpOp AddrGeOp = ALWAYS
2747 cmpOp AddrEqOp = EQQ
2749 cmpOp AddrLtOp = NEVER
2750 cmpOp AddrLeOp = EQQ
2752 genCondJump lbl (StPrim op [x, StDouble 0.0])
2753 = getRegister x `thenNat` \ register ->
2754 getNewRegNat (registerRep register)
2757 code = registerCode register tmp
2758 value = registerName register tmp
2759 pk = registerRep register
2760 target = ImmCLbl lbl
2762 return (code . mkSeqInstr (BF (cmpOp op) value target))
2764 cmpOp FloatGtOp = GTT
2765 cmpOp FloatGeOp = GE
2766 cmpOp FloatEqOp = EQQ
2767 cmpOp FloatNeOp = NE
2768 cmpOp FloatLtOp = LTT
2769 cmpOp FloatLeOp = LE
2770 cmpOp DoubleGtOp = GTT
2771 cmpOp DoubleGeOp = GE
2772 cmpOp DoubleEqOp = EQQ
2773 cmpOp DoubleNeOp = NE
2774 cmpOp DoubleLtOp = LTT
2775 cmpOp DoubleLeOp = LE
2777 genCondJump lbl (StPrim op [x, y])
2779 = trivialFCode pr instr x y `thenNat` \ register ->
2780 getNewRegNat F64 `thenNat` \ tmp ->
2782 code = registerCode register tmp
2783 result = registerName register tmp
2784 target = ImmCLbl lbl
2786 return (code . mkSeqInstr (BF cond result target))
2788 pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
2790 fltCmpOp op = case op of
2804 (instr, cond) = case op of
2805 FloatGtOp -> (FCMP TF LE, EQQ)
2806 FloatGeOp -> (FCMP TF LTT, EQQ)
2807 FloatEqOp -> (FCMP TF EQQ, NE)
2808 FloatNeOp -> (FCMP TF EQQ, EQQ)
2809 FloatLtOp -> (FCMP TF LTT, NE)
2810 FloatLeOp -> (FCMP TF LE, NE)
2811 DoubleGtOp -> (FCMP TF LE, EQQ)
2812 DoubleGeOp -> (FCMP TF LTT, EQQ)
2813 DoubleEqOp -> (FCMP TF EQQ, NE)
2814 DoubleNeOp -> (FCMP TF EQQ, EQQ)
2815 DoubleLtOp -> (FCMP TF LTT, NE)
2816 DoubleLeOp -> (FCMP TF LE, NE)
2818 genCondJump lbl (StPrim op [x, y])
2819 = trivialCode instr x y `thenNat` \ register ->
2820 getNewRegNat IntRep `thenNat` \ tmp ->
2822 code = registerCode register tmp
2823 result = registerName register tmp
2824 target = ImmCLbl lbl
2826 return (code . mkSeqInstr (BI cond result target))
2828 (instr, cond) = case op of
2829 CharGtOp -> (CMP LE, EQQ)
2830 CharGeOp -> (CMP LTT, EQQ)
2831 CharEqOp -> (CMP EQQ, NE)
2832 CharNeOp -> (CMP EQQ, EQQ)
2833 CharLtOp -> (CMP LTT, NE)
2834 CharLeOp -> (CMP LE, NE)
2835 IntGtOp -> (CMP LE, EQQ)
2836 IntGeOp -> (CMP LTT, EQQ)
2837 IntEqOp -> (CMP EQQ, NE)
2838 IntNeOp -> (CMP EQQ, EQQ)
2839 IntLtOp -> (CMP LTT, NE)
2840 IntLeOp -> (CMP LE, NE)
2841 WordGtOp -> (CMP ULE, EQQ)
2842 WordGeOp -> (CMP ULT, EQQ)
2843 WordEqOp -> (CMP EQQ, NE)
2844 WordNeOp -> (CMP EQQ, EQQ)
2845 WordLtOp -> (CMP ULT, NE)
2846 WordLeOp -> (CMP ULE, NE)
2847 AddrGtOp -> (CMP ULE, EQQ)
2848 AddrGeOp -> (CMP ULT, EQQ)
2849 AddrEqOp -> (CMP EQQ, NE)
2850 AddrNeOp -> (CMP EQQ, EQQ)
2851 AddrLtOp -> (CMP ULT, NE)
2852 AddrLeOp -> (CMP ULE, NE)
2854 #endif /* alpha_TARGET_ARCH */
2856 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2858 #if i386_TARGET_ARCH
2860 genCondJump id bool = do
2861 CondCode _ cond code <- getCondCode bool
2862 return (code `snocOL` JXX cond id)
2866 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2868 #if x86_64_TARGET_ARCH
2870 genCondJump id bool = do
2871 CondCode is_float cond cond_code <- getCondCode bool
2874 return (cond_code `snocOL` JXX cond id)
2876 lbl <- getBlockIdNat
2878 -- see comment with condFltReg
2879 let code = case cond of
2885 plain_test = unitOL (
2888 or_unordered = toOL [
2892 and_ordered = toOL [
2898 return (cond_code `appOL` code)
2902 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2904 #if sparc_TARGET_ARCH
2906 genCondJump (BlockId id) bool = do
2907 CondCode is_float cond code <- getCondCode bool
2912 then [NOP, BF cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2913 else [BI cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2917 #endif /* sparc_TARGET_ARCH */
2920 #if powerpc_TARGET_ARCH
2922 genCondJump id bool = do
2923 CondCode is_float cond code <- getCondCode bool
2924 return (code `snocOL` BCC cond id)
2926 #endif /* powerpc_TARGET_ARCH */
2929 -- -----------------------------------------------------------------------------
2930 -- Generating C calls
2932 -- Now the biggest nightmare---calls. Most of the nastiness is buried in
2933 -- @get_arg@, which moves the arguments to the correct registers/stack
2934 -- locations. Apart from that, the code is easy.
2936 -- (If applicable) Do not fill the delay slots here; you will confuse the
2937 -- register allocator.
2940 :: CmmCallTarget -- function to call
2941 -> CmmHintFormals -- where to put the result
2942 -> CmmActuals -- arguments (of mixed type)
2945 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2947 #if alpha_TARGET_ARCH
2951 genCCall fn cconv result_regs args
2952 = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args
2953 `thenNat` \ ((unused,_), argCode) ->
2955 nRegs = length allArgRegs - length unused
2956 code = asmSeqThen (map ($ []) argCode)
2959 LDA pv (AddrImm (ImmLab (ptext fn))),
2960 JSR ra (AddrReg pv) nRegs,
2961 LDGP gp (AddrReg ra)]
2963 ------------------------
2964 {- Try to get a value into a specific register (or registers) for
2965 a call. The first 6 arguments go into the appropriate
2966 argument register (separate registers for integer and floating
2967 point arguments, but used in lock-step), and the remaining
2968 arguments are dumped to the stack, beginning at 0(sp). Our
2969 first argument is a pair of the list of remaining argument
2970 registers to be assigned for this call and the next stack
2971 offset to use for overflowing arguments. This way,
2972 @get_Arg@ can be applied to all of a call's arguments using
2976 :: ([(Reg,Reg)], Int) -- Argument registers and stack offset (accumulator)
2977 -> StixTree -- Current argument
2978 -> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code
2980 -- We have to use up all of our argument registers first...
2982 get_arg ((iDst,fDst):dsts, offset) arg
2983 = getRegister arg `thenNat` \ register ->
2985 reg = if isFloatingRep pk then fDst else iDst
2986 code = registerCode register reg
2987 src = registerName register reg
2988 pk = registerRep register
2991 if isFloatingRep pk then
2992 ((dsts, offset), if isFixed register then
2993 code . mkSeqInstr (FMOV src fDst)
2996 ((dsts, offset), if isFixed register then
2997 code . mkSeqInstr (OR src (RIReg src) iDst)
3000 -- Once we have run out of argument registers, we move to the
3003 get_arg ([], offset) arg
3004 = getRegister arg `thenNat` \ register ->
3005 getNewRegNat (registerRep register)
3008 code = registerCode register tmp
3009 src = registerName register tmp
3010 pk = registerRep register
3011 sz = primRepToSize pk
3013 return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset)))
3015 #endif /* alpha_TARGET_ARCH */
3017 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3019 #if i386_TARGET_ARCH
3021 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
3022 -- write barrier compiles to no code on x86/x86-64;
3023 -- we keep it this long in order to prevent earlier optimisations.
3025 -- we only cope with a single result for foreign calls
3026 genCCall (CmmPrim op) [(r,_)] args = do
3028 MO_F32_Sqrt -> actuallyInlineFloatOp F32 (GSQRT F32) args
3029 MO_F64_Sqrt -> actuallyInlineFloatOp F64 (GSQRT F64) args
3031 MO_F32_Sin -> actuallyInlineFloatOp F32 (GSIN F32) args
3032 MO_F64_Sin -> actuallyInlineFloatOp F64 (GSIN F64) args
3034 MO_F32_Cos -> actuallyInlineFloatOp F32 (GCOS F32) args
3035 MO_F64_Cos -> actuallyInlineFloatOp F64 (GCOS F64) args
3037 MO_F32_Tan -> actuallyInlineFloatOp F32 (GTAN F32) args
3038 MO_F64_Tan -> actuallyInlineFloatOp F64 (GTAN F64) args
3040 other_op -> outOfLineFloatOp op r args
3042 actuallyInlineFloatOp rep instr [(x,_)]
3043 = do res <- trivialUFCode rep instr x
3045 return (any (getRegisterReg (CmmLocal r)))
3047 genCCall target dest_regs args = do
3049 sizes = map (arg_size . cmmExprRep . fst) (reverse args)
3050 #if !darwin_TARGET_OS
3051 tot_arg_size = sum sizes
3053 raw_arg_size = sum sizes
3054 tot_arg_size = roundTo 16 raw_arg_size
3055 arg_pad_size = tot_arg_size - raw_arg_size
3056 delta0 <- getDeltaNat
3057 setDeltaNat (delta0 - arg_pad_size)
3060 push_codes <- mapM push_arg (reverse args)
3061 delta <- getDeltaNat
3064 -- deal with static vs dynamic call targets
3065 (callinsns,cconv) <-
3068 CmmForeignCall (CmmLit (CmmLabel lbl)) conv
3069 -> -- ToDo: stdcall arg sizes
3070 return (unitOL (CALL (Left fn_imm) []), conv)
3071 where fn_imm = ImmCLbl lbl
3072 CmmForeignCall expr conv
3073 -> do (dyn_c, dyn_r, dyn_rep) <- get_op expr
3074 ASSERT(dyn_rep == I32)
3075 return (dyn_c `snocOL` CALL (Right dyn_r) [], conv)
3078 #if darwin_TARGET_OS
3080 = toOL [SUB I32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
3081 DELTA (delta0 - arg_pad_size)]
3082 `appOL` concatOL push_codes
3085 = concatOL push_codes
3086 call = callinsns `appOL`
3088 -- Deallocate parameters after call for ccall;
3089 -- but not for stdcall (callee does it)
3090 (if cconv == StdCallConv || tot_arg_size==0 then [] else
3091 [ADD I32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
3093 [DELTA (delta + tot_arg_size)]
3096 setDeltaNat (delta + tot_arg_size)
3099 -- assign the results, if necessary
3100 assign_code [] = nilOL
3101 assign_code [(dest,_hint)] =
3103 I64 -> toOL [MOV I32 (OpReg eax) (OpReg r_dest),
3104 MOV I32 (OpReg edx) (OpReg r_dest_hi)]
3105 F32 -> unitOL (GMOV fake0 r_dest)
3106 F64 -> unitOL (GMOV fake0 r_dest)
3107 rep -> unitOL (MOV rep (OpReg eax) (OpReg r_dest))
3109 r_dest_hi = getHiVRegFromLo r_dest
3110 rep = localRegRep dest
3111 r_dest = getRegisterReg (CmmLocal dest)
3112 assign_code many = panic "genCCall.assign_code many"
3114 return (push_code `appOL`
3116 assign_code dest_regs)
3124 roundTo a x | x `mod` a == 0 = x
3125 | otherwise = x + a - (x `mod` a)
3128 push_arg :: (CmmExpr,MachHint){-current argument-}
3129 -> NatM InstrBlock -- code
3131 push_arg (arg,_hint) -- we don't need the hints on x86
3132 | arg_rep == I64 = do
3133 ChildCode64 code r_lo <- iselExpr64 arg
3134 delta <- getDeltaNat
3135 setDeltaNat (delta - 8)
3137 r_hi = getHiVRegFromLo r_lo
3139 return ( code `appOL`
3140 toOL [PUSH I32 (OpReg r_hi), DELTA (delta - 4),
3141 PUSH I32 (OpReg r_lo), DELTA (delta - 8),
3146 (code, reg, sz) <- get_op arg
3147 delta <- getDeltaNat
3148 let size = arg_size sz
3149 setDeltaNat (delta-size)
3150 if (case sz of F64 -> True; F32 -> True; _ -> False)
3151 then return (code `appOL`
3152 toOL [SUB I32 (OpImm (ImmInt size)) (OpReg esp),
3154 GST sz reg (AddrBaseIndex (EABaseReg esp)
3158 else return (code `snocOL`
3159 PUSH I32 (OpReg reg) `snocOL`
3163 arg_rep = cmmExprRep arg
3166 get_op :: CmmExpr -> NatM (InstrBlock, Reg, MachRep) -- code, reg, size
3168 (reg,code) <- getSomeReg op
3169 return (code, reg, cmmExprRep op)
3171 #endif /* i386_TARGET_ARCH */
3173 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3175 outOfLineFloatOp :: CallishMachOp -> CmmFormal -> CmmActuals
3177 outOfLineFloatOp mop res args
3179 targetExpr <- cmmMakeDynamicReference addImportNat CallReference lbl
3180 let target = CmmForeignCall targetExpr CCallConv
3182 if localRegRep res == F64
3184 stmtToInstrs (CmmCall target [(res,FloatHint)] args CmmUnsafe)
3188 tmp = LocalReg uq F64 KindNonPtr
3190 code1 <- stmtToInstrs (CmmCall target [(tmp,FloatHint)] args CmmUnsafe)
3191 code2 <- stmtToInstrs (CmmAssign (CmmLocal res) (CmmReg (CmmLocal tmp)))
3192 return (code1 `appOL` code2)
3194 lbl = mkForeignLabel fn Nothing False
3197 MO_F32_Sqrt -> FSLIT("sqrtf")
3198 MO_F32_Sin -> FSLIT("sinf")
3199 MO_F32_Cos -> FSLIT("cosf")
3200 MO_F32_Tan -> FSLIT("tanf")
3201 MO_F32_Exp -> FSLIT("expf")
3202 MO_F32_Log -> FSLIT("logf")
3204 MO_F32_Asin -> FSLIT("asinf")
3205 MO_F32_Acos -> FSLIT("acosf")
3206 MO_F32_Atan -> FSLIT("atanf")
3208 MO_F32_Sinh -> FSLIT("sinhf")
3209 MO_F32_Cosh -> FSLIT("coshf")
3210 MO_F32_Tanh -> FSLIT("tanhf")
3211 MO_F32_Pwr -> FSLIT("powf")
3213 MO_F64_Sqrt -> FSLIT("sqrt")
3214 MO_F64_Sin -> FSLIT("sin")
3215 MO_F64_Cos -> FSLIT("cos")
3216 MO_F64_Tan -> FSLIT("tan")
3217 MO_F64_Exp -> FSLIT("exp")
3218 MO_F64_Log -> FSLIT("log")
3220 MO_F64_Asin -> FSLIT("asin")
3221 MO_F64_Acos -> FSLIT("acos")
3222 MO_F64_Atan -> FSLIT("atan")
3224 MO_F64_Sinh -> FSLIT("sinh")
3225 MO_F64_Cosh -> FSLIT("cosh")
3226 MO_F64_Tanh -> FSLIT("tanh")
3227 MO_F64_Pwr -> FSLIT("pow")
3229 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
3231 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3233 #if x86_64_TARGET_ARCH
3235 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
3236 -- write barrier compiles to no code on x86/x86-64;
3237 -- we keep it this long in order to prevent earlier optimisations.
3239 genCCall (CmmPrim op) [(r,_)] args =
3240 outOfLineFloatOp op r args
3242 genCCall target dest_regs args = do
3244 -- load up the register arguments
3245 (stack_args, aregs, fregs, load_args_code)
3246 <- load_args args allArgRegs allFPArgRegs nilOL
3249 fp_regs_used = reverse (drop (length fregs) (reverse allFPArgRegs))
3250 int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
3251 arg_regs = [eax] ++ int_regs_used ++ fp_regs_used
3252 -- for annotating the call instruction with
3254 sse_regs = length fp_regs_used
3256 tot_arg_size = arg_size * length stack_args
3258 -- On entry to the called function, %rsp should be aligned
3259 -- on a 16-byte boundary +8 (i.e. the first stack arg after
3260 -- the return address is 16-byte aligned). In STG land
3261 -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
3262 -- need to make sure we push a multiple of 16-bytes of args,
3263 -- plus the return address, to get the correct alignment.
3264 -- Urg, this is hard. We need to feed the delta back into
3265 -- the arg pushing code.
3266 (real_size, adjust_rsp) <-
3267 if tot_arg_size `rem` 16 == 0
3268 then return (tot_arg_size, nilOL)
3269 else do -- we need to adjust...
3270 delta <- getDeltaNat
3271 setDeltaNat (delta-8)
3272 return (tot_arg_size+8, toOL [
3273 SUB I64 (OpImm (ImmInt 8)) (OpReg rsp),
3277 -- push the stack args, right to left
3278 push_code <- push_args (reverse stack_args) nilOL
3279 delta <- getDeltaNat
3281 -- deal with static vs dynamic call targets
3282 (callinsns,cconv) <-
3285 CmmForeignCall (CmmLit (CmmLabel lbl)) conv
3286 -> -- ToDo: stdcall arg sizes
3287 return (unitOL (CALL (Left fn_imm) arg_regs), conv)
3288 where fn_imm = ImmCLbl lbl
3289 CmmForeignCall expr conv
3290 -> do (dyn_r, dyn_c) <- getSomeReg expr
3291 return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
3294 -- The x86_64 ABI requires us to set %al to the number of SSE
3295 -- registers that contain arguments, if the called routine
3296 -- is a varargs function. We don't know whether it's a
3297 -- varargs function or not, so we have to assume it is.
3299 -- It's not safe to omit this assignment, even if the number
3300 -- of SSE regs in use is zero. If %al is larger than 8
3301 -- on entry to a varargs function, seg faults ensue.
3302 assign_eax n = unitOL (MOV I32 (OpImm (ImmInt n)) (OpReg eax))
3304 let call = callinsns `appOL`
3306 -- Deallocate parameters after call for ccall;
3307 -- but not for stdcall (callee does it)
3308 (if cconv == StdCallConv || real_size==0 then [] else
3309 [ADD wordRep (OpImm (ImmInt real_size)) (OpReg esp)])
3311 [DELTA (delta + real_size)]
3314 setDeltaNat (delta + real_size)
3317 -- assign the results, if necessary
3318 assign_code [] = nilOL
3319 assign_code [(dest,_hint)] =
3321 F32 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3322 F64 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3323 rep -> unitOL (MOV rep (OpReg rax) (OpReg r_dest))
3325 rep = localRegRep dest
3326 r_dest = getRegisterReg (CmmLocal dest)
3327 assign_code many = panic "genCCall.assign_code many"
3329 return (load_args_code `appOL`
3332 assign_eax sse_regs `appOL`
3334 assign_code dest_regs)
3337 arg_size = 8 -- always, at the mo
3339 load_args :: [(CmmExpr,MachHint)]
3340 -> [Reg] -- int regs avail for args
3341 -> [Reg] -- FP regs avail for args
3343 -> NatM ([(CmmExpr,MachHint)],[Reg],[Reg],InstrBlock)
3344 load_args args [] [] code = return (args, [], [], code)
3345 -- no more regs to use
3346 load_args [] aregs fregs code = return ([], aregs, fregs, code)
3347 -- no more args to push
3348 load_args ((arg,hint) : rest) aregs fregs code
3349 | isFloatingRep arg_rep =
3353 arg_code <- getAnyReg arg
3354 load_args rest aregs rs (code `appOL` arg_code r)
3359 arg_code <- getAnyReg arg
3360 load_args rest rs fregs (code `appOL` arg_code r)
3362 arg_rep = cmmExprRep arg
3365 (args',ars,frs,code') <- load_args rest aregs fregs code
3366 return ((arg,hint):args', ars, frs, code')
3368 push_args [] code = return code
3369 push_args ((arg,hint):rest) code
3370 | isFloatingRep arg_rep = do
3371 (arg_reg, arg_code) <- getSomeReg arg
3372 delta <- getDeltaNat
3373 setDeltaNat (delta-arg_size)
3374 let code' = code `appOL` arg_code `appOL` toOL [
3375 SUB wordRep (OpImm (ImmInt arg_size)) (OpReg rsp) ,
3376 DELTA (delta-arg_size),
3377 MOV arg_rep (OpReg arg_reg) (OpAddr (spRel 0))]
3378 push_args rest code'
3381 -- we only ever generate word-sized function arguments. Promotion
3382 -- has already happened: our Int8# type is kept sign-extended
3383 -- in an Int#, for example.
3384 ASSERT(arg_rep == I64) return ()
3385 (arg_op, arg_code) <- getOperand arg
3386 delta <- getDeltaNat
3387 setDeltaNat (delta-arg_size)
3388 let code' = code `appOL` toOL [PUSH I64 arg_op,
3389 DELTA (delta-arg_size)]
3390 push_args rest code'
3392 arg_rep = cmmExprRep arg
3395 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3397 #if sparc_TARGET_ARCH
3399 The SPARC calling convention is an absolute
3400 nightmare. The first 6x32 bits of arguments are mapped into
3401 %o0 through %o5, and the remaining arguments are dumped to the
3402 stack, beginning at [%sp+92]. (Note that %o6 == %sp.)
3404 If we have to put args on the stack, move %o6==%sp down by
3405 the number of words to go on the stack, to ensure there's enough space.
3407 According to Fraser and Hanson's lcc book, page 478, fig 17.2,
3408 16 words above the stack pointer is a word for the address of
3409 a structure return value. I use this as a temporary location
3410 for moving values from float to int regs. Certainly it isn't
3411 safe to put anything in the 16 words starting at %sp, since
3412 this area can get trashed at any time due to window overflows
3413 caused by signal handlers.
3415 A final complication (if the above isn't enough) is that
3416 we can't blithely calculate the arguments one by one into
3417 %o0 .. %o5. Consider the following nested calls:
3421 Naive code moves a into %o0, and (fff b c) into %o1. Unfortunately
3422 the inner call will itself use %o0, which trashes the value put there
3423 in preparation for the outer call. Upshot: we need to calculate the
3424 args into temporary regs, and move those to arg regs or onto the
3425 stack only immediately prior to the call proper. Sigh.
3428 genCCall target dest_regs argsAndHints = do
3430 args = map fst argsAndHints
3431 argcode_and_vregs <- mapM arg_to_int_vregs args
3433 (argcodes, vregss) = unzip argcode_and_vregs
3434 n_argRegs = length allArgRegs
3435 n_argRegs_used = min (length vregs) n_argRegs
3436 vregs = concat vregss
3437 -- deal with static vs dynamic call targets
3438 callinsns <- (case target of
3439 CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> do
3440 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3441 CmmForeignCall expr conv -> do
3442 (dyn_c, [dyn_r]) <- arg_to_int_vregs expr
3443 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3445 (res, reduce) <- outOfLineFloatOp mop
3446 lblOrMopExpr <- case res of
3448 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3450 (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
3451 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3452 if reduce then panic "genCCall(sparc): can not reduce" else return lblOrMopExpr
3456 argcode = concatOL argcodes
3457 (move_sp_down, move_sp_up)
3458 = let diff = length vregs - n_argRegs
3459 nn = if odd diff then diff + 1 else diff -- keep 8-byte alignment
3462 else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
3464 = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
3465 return (argcode `appOL`
3466 move_sp_down `appOL`
3467 transfer_code `appOL`
3472 -- move args from the integer vregs into which they have been
3473 -- marshalled, into %o0 .. %o5, and the rest onto the stack.
3474 move_final :: [Reg] -> [Reg] -> Int -> [Instr]
3476 move_final [] _ offset -- all args done
3479 move_final (v:vs) [] offset -- out of aregs; move to stack
3480 = ST I32 v (spRel offset)
3481 : move_final vs [] (offset+1)
3483 move_final (v:vs) (a:az) offset -- move into an arg (%o[0..5]) reg
3484 = OR False g0 (RIReg v) a
3485 : move_final vs az offset
3487 -- generate code to calculate an argument, and move it into one
3488 -- or two integer vregs.
3489 arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
3490 arg_to_int_vregs arg
3491 | (cmmExprRep arg) == I64
3493 (ChildCode64 code r_lo) <- iselExpr64 arg
3495 r_hi = getHiVRegFromLo r_lo
3496 return (code, [r_hi, r_lo])
3499 (src, code) <- getSomeReg arg
3500 tmp <- getNewRegNat (cmmExprRep arg)
3505 v1 <- getNewRegNat I32
3506 v2 <- getNewRegNat I32
3509 FMOV F64 src f0 `snocOL`
3510 ST F32 f0 (spRel 16) `snocOL`
3511 LD I32 (spRel 16) v1 `snocOL`
3512 ST F32 (fPair f0) (spRel 16) `snocOL`
3513 LD I32 (spRel 16) v2
3518 v1 <- getNewRegNat I32
3521 ST F32 src (spRel 16) `snocOL`
3522 LD I32 (spRel 16) v1
3527 v1 <- getNewRegNat I32
3529 code `snocOL` OR False g0 (RIReg src) v1
3533 outOfLineFloatOp mop =
3535 mopExpr <- cmmMakeDynamicReference addImportNat CallReference $
3536 mkForeignLabel functionName Nothing True
3537 let mopLabelOrExpr = case mopExpr of
3538 CmmLit (CmmLabel lbl) -> Left lbl
3540 return (mopLabelOrExpr, reduce)
3542 (reduce, functionName) = case mop of
3543 MO_F32_Exp -> (True, FSLIT("exp"))
3544 MO_F32_Log -> (True, FSLIT("log"))
3545 MO_F32_Sqrt -> (True, FSLIT("sqrt"))
3547 MO_F32_Sin -> (True, FSLIT("sin"))
3548 MO_F32_Cos -> (True, FSLIT("cos"))
3549 MO_F32_Tan -> (True, FSLIT("tan"))
3551 MO_F32_Asin -> (True, FSLIT("asin"))
3552 MO_F32_Acos -> (True, FSLIT("acos"))
3553 MO_F32_Atan -> (True, FSLIT("atan"))
3555 MO_F32_Sinh -> (True, FSLIT("sinh"))
3556 MO_F32_Cosh -> (True, FSLIT("cosh"))
3557 MO_F32_Tanh -> (True, FSLIT("tanh"))
3559 MO_F64_Exp -> (False, FSLIT("exp"))
3560 MO_F64_Log -> (False, FSLIT("log"))
3561 MO_F64_Sqrt -> (False, FSLIT("sqrt"))
3563 MO_F64_Sin -> (False, FSLIT("sin"))
3564 MO_F64_Cos -> (False, FSLIT("cos"))
3565 MO_F64_Tan -> (False, FSLIT("tan"))
3567 MO_F64_Asin -> (False, FSLIT("asin"))
3568 MO_F64_Acos -> (False, FSLIT("acos"))
3569 MO_F64_Atan -> (False, FSLIT("atan"))
3571 MO_F64_Sinh -> (False, FSLIT("sinh"))
3572 MO_F64_Cosh -> (False, FSLIT("cosh"))
3573 MO_F64_Tanh -> (False, FSLIT("tanh"))
3575 other -> pprPanic "outOfLineFloatOp(sparc) "
3576 (pprCallishMachOp mop)
3578 #endif /* sparc_TARGET_ARCH */
3580 #if powerpc_TARGET_ARCH
3582 #if darwin_TARGET_OS || linux_TARGET_OS
3584 The PowerPC calling convention for Darwin/Mac OS X
3585 is described in Apple's document
3586 "Inside Mac OS X - Mach-O Runtime Architecture".
3588 PowerPC Linux uses the System V Release 4 Calling Convention
3589 for PowerPC. It is described in the
3590 "System V Application Binary Interface PowerPC Processor Supplement".
3592 Both conventions are similar:
3593 Parameters may be passed in general-purpose registers starting at r3, in
3594 floating point registers starting at f1, or on the stack.
3596 But there are substantial differences:
3597 * The number of registers used for parameter passing and the exact set of
3598 nonvolatile registers differs (see MachRegs.lhs).
3599 * On Darwin, stack space is always reserved for parameters, even if they are
3600 passed in registers. The called routine may choose to save parameters from
3601 registers to the corresponding space on the stack.
3602 * On Darwin, a corresponding amount of GPRs is skipped when a floating point
3603 parameter is passed in an FPR.
3604 * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
3605 starting with an odd-numbered GPR. It may skip a GPR to achieve this.
3606 Darwin just treats an I64 like two separate I32s (high word first).
3607 * I64 and F64 arguments are 8-byte aligned on the stack for SysV, but only
3608 4-byte aligned like everything else on Darwin.
3609 * The SysV spec claims that F32 is represented as F64 on the stack. GCC on
3610 PowerPC Linux does not agree, so neither do we.
3612 According to both conventions, The parameter area should be part of the
3613 caller's stack frame, allocated in the caller's prologue code (large enough
3614 to hold the parameter lists for all called routines). The NCG already
3615 uses the stack for register spilling, leaving 64 bytes free at the top.
3616 If we need a larger parameter area than that, we just allocate a new stack
3617 frame just before ccalling.
3621 genCCall (CmmPrim MO_WriteBarrier) _ _
3622 = return $ unitOL LWSYNC
3624 genCCall target dest_regs argsAndHints
3625 = ASSERT (not $ any (`elem` [I8,I16]) argReps)
3626 -- we rely on argument promotion in the codeGen
3628 (finalStack,passArgumentsCode,usedRegs) <- passArguments
3630 allArgRegs allFPArgRegs
3634 (labelOrExpr, reduceToF32) <- case target of
3635 CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
3636 CmmForeignCall expr conv -> return (Right expr, False)
3637 CmmPrim mop -> outOfLineFloatOp mop
3639 let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
3640 codeAfter = move_sp_up finalStack `appOL` moveResult reduceToF32
3645 `snocOL` BL lbl usedRegs
3648 (dynReg, dynCode) <- getSomeReg dyn
3650 `snocOL` MTCTR dynReg
3652 `snocOL` BCTRL usedRegs
3655 #if darwin_TARGET_OS
3656 initialStackOffset = 24
3657 -- size of linkage area + size of arguments, in bytes
3658 stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
3659 map machRepByteWidth argReps
3660 #elif linux_TARGET_OS
3661 initialStackOffset = 8
3662 stackDelta finalStack = roundTo 16 finalStack
3664 args = map fst argsAndHints
3665 argReps = map cmmExprRep args
3667 roundTo a x | x `mod` a == 0 = x
3668 | otherwise = x + a - (x `mod` a)
3670 move_sp_down finalStack
3672 toOL [STU I32 sp (AddrRegImm sp (ImmInt (-delta))),
3675 where delta = stackDelta finalStack
3676 move_sp_up finalStack
3678 toOL [ADD sp sp (RIImm (ImmInt delta)),
3681 where delta = stackDelta finalStack
3684 passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
3685 passArguments ((arg,I64):args) gprs fprs stackOffset
3686 accumCode accumUsed =
3688 ChildCode64 code vr_lo <- iselExpr64 arg
3689 let vr_hi = getHiVRegFromLo vr_lo
3691 #if darwin_TARGET_OS
3696 (accumCode `appOL` code
3697 `snocOL` storeWord vr_hi gprs stackOffset
3698 `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
3699 ((take 2 gprs) ++ accumUsed)
3701 storeWord vr (gpr:_) offset = MR gpr vr
3702 storeWord vr [] offset = ST I32 vr (AddrRegImm sp (ImmInt offset))
3704 #elif linux_TARGET_OS
3705 let stackOffset' = roundTo 8 stackOffset
3706 stackCode = accumCode `appOL` code
3707 `snocOL` ST I32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
3708 `snocOL` ST I32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
3709 regCode hireg loreg =
3710 accumCode `appOL` code
3711 `snocOL` MR hireg vr_hi
3712 `snocOL` MR loreg vr_lo
3715 hireg : loreg : regs | even (length gprs) ->
3716 passArguments args regs fprs stackOffset
3717 (regCode hireg loreg) (hireg : loreg : accumUsed)
3718 _skipped : hireg : loreg : regs ->
3719 passArguments args regs fprs stackOffset
3720 (regCode hireg loreg) (hireg : loreg : accumUsed)
3721 _ -> -- only one or no regs left
3722 passArguments args [] fprs (stackOffset'+8)
3726 passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
3727 | reg : _ <- regs = do
3728 register <- getRegister arg
3729 let code = case register of
3730 Fixed _ freg fcode -> fcode `snocOL` MR reg freg
3731 Any _ acode -> acode reg
3735 #if darwin_TARGET_OS
3736 -- The Darwin ABI requires that we reserve stack slots for register parameters
3737 (stackOffset + stackBytes)
3738 #elif linux_TARGET_OS
3739 -- ... the SysV ABI doesn't.
3742 (accumCode `appOL` code)
3745 (vr, code) <- getSomeReg arg
3749 (stackOffset' + stackBytes)
3750 (accumCode `appOL` code `snocOL` ST rep vr stackSlot)
3753 #if darwin_TARGET_OS
3754 -- stackOffset is at least 4-byte aligned
3755 -- The Darwin ABI is happy with that.
3756 stackOffset' = stackOffset
3758 -- ... the SysV ABI requires 8-byte alignment for doubles.
3759 stackOffset' | rep == F64 = roundTo 8 stackOffset
3760 | otherwise = stackOffset
3762 stackSlot = AddrRegImm sp (ImmInt stackOffset')
3763 (nGprs, nFprs, stackBytes, regs) = case rep of
3764 I32 -> (1, 0, 4, gprs)
3765 #if darwin_TARGET_OS
3766 -- The Darwin ABI requires that we skip a corresponding number of GPRs when
3768 F32 -> (1, 1, 4, fprs)
3769 F64 -> (2, 1, 8, fprs)
3770 #elif linux_TARGET_OS
3771 -- ... the SysV ABI doesn't.
3772 F32 -> (0, 1, 4, fprs)
3773 F64 -> (0, 1, 8, fprs)
3776 moveResult reduceToF32 =
3780 | reduceToF32 && rep == F32 -> unitOL (FRSP r_dest f1)
3781 | rep == F32 || rep == F64 -> unitOL (MR r_dest f1)
3782 | rep == I64 -> toOL [MR (getHiVRegFromLo r_dest) r3,
3784 | otherwise -> unitOL (MR r_dest r3)
3785 where rep = cmmRegRep (CmmLocal dest)
3786 r_dest = getRegisterReg (CmmLocal dest)
3788 outOfLineFloatOp mop =
3790 mopExpr <- cmmMakeDynamicReference addImportNat CallReference $
3791 mkForeignLabel functionName Nothing True
3792 let mopLabelOrExpr = case mopExpr of
3793 CmmLit (CmmLabel lbl) -> Left lbl
3795 return (mopLabelOrExpr, reduce)
3797 (functionName, reduce) = case mop of
3798 MO_F32_Exp -> (FSLIT("exp"), True)
3799 MO_F32_Log -> (FSLIT("log"), True)
3800 MO_F32_Sqrt -> (FSLIT("sqrt"), True)
3802 MO_F32_Sin -> (FSLIT("sin"), True)
3803 MO_F32_Cos -> (FSLIT("cos"), True)
3804 MO_F32_Tan -> (FSLIT("tan"), True)
3806 MO_F32_Asin -> (FSLIT("asin"), True)
3807 MO_F32_Acos -> (FSLIT("acos"), True)
3808 MO_F32_Atan -> (FSLIT("atan"), True)
3810 MO_F32_Sinh -> (FSLIT("sinh"), True)
3811 MO_F32_Cosh -> (FSLIT("cosh"), True)
3812 MO_F32_Tanh -> (FSLIT("tanh"), True)
3813 MO_F32_Pwr -> (FSLIT("pow"), True)
3815 MO_F64_Exp -> (FSLIT("exp"), False)
3816 MO_F64_Log -> (FSLIT("log"), False)
3817 MO_F64_Sqrt -> (FSLIT("sqrt"), False)
3819 MO_F64_Sin -> (FSLIT("sin"), False)
3820 MO_F64_Cos -> (FSLIT("cos"), False)
3821 MO_F64_Tan -> (FSLIT("tan"), False)
3823 MO_F64_Asin -> (FSLIT("asin"), False)
3824 MO_F64_Acos -> (FSLIT("acos"), False)
3825 MO_F64_Atan -> (FSLIT("atan"), False)
3827 MO_F64_Sinh -> (FSLIT("sinh"), False)
3828 MO_F64_Cosh -> (FSLIT("cosh"), False)
3829 MO_F64_Tanh -> (FSLIT("tanh"), False)
3830 MO_F64_Pwr -> (FSLIT("pow"), False)
3831 other -> pprPanic "genCCall(ppc): unknown callish op"
3832 (pprCallishMachOp other)
3834 #endif /* darwin_TARGET_OS || linux_TARGET_OS */
3836 #endif /* powerpc_TARGET_ARCH */
3839 -- -----------------------------------------------------------------------------
3840 -- Generating a table-branch
3842 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
3844 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3848 (reg,e_code) <- getSomeReg expr
3849 lbl <- getNewLabelNat
3850 dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
3851 (tableReg,t_code) <- getSomeReg $ dynRef
3853 jumpTable = map jumpTableEntryRel ids
3855 jumpTableEntryRel Nothing
3856 = CmmStaticLit (CmmInt 0 wordRep)
3857 jumpTableEntryRel (Just (BlockId id))
3858 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3859 where blockLabel = mkAsmTempLabel id
3861 op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
3862 (EAIndex reg wORD_SIZE) (ImmInt 0))
3864 #if x86_64_TARGET_ARCH && darwin_TARGET_OS
3865 -- on Mac OS X/x86_64, put the jump table in the text section
3866 -- to work around a limitation of the linker.
3867 -- ld64 is unable to handle the relocations for
3869 -- if L0 is not preceded by a non-anonymous label in its section.
3871 code = e_code `appOL` t_code `appOL` toOL [
3872 ADD wordRep op (OpReg tableReg),
3873 JMP_TBL (OpReg tableReg) [ id | Just id <- ids ],
3874 LDATA Text (CmmDataLabel lbl : jumpTable)
3877 code = e_code `appOL` t_code `appOL` toOL [
3878 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3879 ADD wordRep op (OpReg tableReg),
3880 JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
3886 (reg,e_code) <- getSomeReg expr
3887 lbl <- getNewLabelNat
3889 jumpTable = map jumpTableEntry ids
3890 op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
3891 code = e_code `appOL` toOL [
3892 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3893 JMP_TBL op [ id | Just id <- ids ]
3897 #elif powerpc_TARGET_ARCH
3901 (reg,e_code) <- getSomeReg expr
3902 tmp <- getNewRegNat I32
3903 lbl <- getNewLabelNat
3904 dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
3905 (tableReg,t_code) <- getSomeReg $ dynRef
3907 jumpTable = map jumpTableEntryRel ids
3909 jumpTableEntryRel Nothing
3910 = CmmStaticLit (CmmInt 0 wordRep)
3911 jumpTableEntryRel (Just (BlockId id))
3912 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3913 where blockLabel = mkAsmTempLabel id
3915 code = e_code `appOL` t_code `appOL` toOL [
3916 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3917 SLW tmp reg (RIImm (ImmInt 2)),
3918 LD I32 tmp (AddrRegReg tableReg tmp),
3919 ADD tmp tmp (RIReg tableReg),
3921 BCTR [ id | Just id <- ids ]
3926 (reg,e_code) <- getSomeReg expr
3927 tmp <- getNewRegNat I32
3928 lbl <- getNewLabelNat
3930 jumpTable = map jumpTableEntry ids
3932 code = e_code `appOL` toOL [
3933 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3934 SLW tmp reg (RIImm (ImmInt 2)),
3935 ADDIS tmp tmp (HA (ImmCLbl lbl)),
3936 LD I32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
3938 BCTR [ id | Just id <- ids ]
3942 genSwitch expr ids = panic "ToDo: genSwitch"
3945 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordRep)
3946 jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
3947 where blockLabel = mkAsmTempLabel id
3949 -- -----------------------------------------------------------------------------
3951 -- -----------------------------------------------------------------------------
3954 -- -----------------------------------------------------------------------------
3955 -- 'condIntReg' and 'condFltReg': condition codes into registers
3957 -- Turn those condition codes into integers now (when they appear on
3958 -- the right hand side of an assignment).
3960 -- (If applicable) Do not fill the delay slots here; you will confuse the
3961 -- register allocator.
3963 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
3965 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3967 #if alpha_TARGET_ARCH
3968 condIntReg = panic "MachCode.condIntReg (not on Alpha)"
3969 condFltReg = panic "MachCode.condFltReg (not on Alpha)"
3970 #endif /* alpha_TARGET_ARCH */
3972 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3974 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3976 condIntReg cond x y = do
3977 CondCode _ cond cond_code <- condIntCode cond x y
3978 tmp <- getNewRegNat I8
3980 code dst = cond_code `appOL` toOL [
3981 SETCC cond (OpReg tmp),
3982 MOVZxL I8 (OpReg tmp) (OpReg dst)
3985 return (Any I32 code)
3989 #if i386_TARGET_ARCH
3991 condFltReg cond x y = do
3992 CondCode _ cond cond_code <- condFltCode cond x y
3993 tmp <- getNewRegNat I8
3995 code dst = cond_code `appOL` toOL [
3996 SETCC cond (OpReg tmp),
3997 MOVZxL I8 (OpReg tmp) (OpReg dst)
4000 return (Any I32 code)
4004 #if x86_64_TARGET_ARCH
4006 condFltReg cond x y = do
4007 CondCode _ cond cond_code <- condFltCode cond x y
4008 tmp1 <- getNewRegNat wordRep
4009 tmp2 <- getNewRegNat wordRep
4011 -- We have to worry about unordered operands (eg. comparisons
4012 -- against NaN). If the operands are unordered, the comparison
4013 -- sets the parity flag, carry flag and zero flag.
4014 -- All comparisons are supposed to return false for unordered
4015 -- operands except for !=, which returns true.
4017 -- Optimisation: we don't have to test the parity flag if we
4018 -- know the test has already excluded the unordered case: eg >
4019 -- and >= test for a zero carry flag, which can only occur for
4020 -- ordered operands.
4022 -- ToDo: by reversing comparisons we could avoid testing the
4023 -- parity flag in more cases.
4028 NE -> or_unordered dst
4029 GU -> plain_test dst
4030 GEU -> plain_test dst
4031 _ -> and_ordered dst)
4033 plain_test dst = toOL [
4034 SETCC cond (OpReg tmp1),
4035 MOVZxL I8 (OpReg tmp1) (OpReg dst)
4037 or_unordered dst = toOL [
4038 SETCC cond (OpReg tmp1),
4039 SETCC PARITY (OpReg tmp2),
4040 OR I8 (OpReg tmp1) (OpReg tmp2),
4041 MOVZxL I8 (OpReg tmp2) (OpReg dst)
4043 and_ordered dst = toOL [
4044 SETCC cond (OpReg tmp1),
4045 SETCC NOTPARITY (OpReg tmp2),
4046 AND I8 (OpReg tmp1) (OpReg tmp2),
4047 MOVZxL I8 (OpReg tmp2) (OpReg dst)
4050 return (Any I32 code)
4054 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4056 #if sparc_TARGET_ARCH
4058 condIntReg EQQ x (CmmLit (CmmInt 0 d)) = do
4059 (src, code) <- getSomeReg x
4060 tmp <- getNewRegNat I32
4062 code__2 dst = code `appOL` toOL [
4063 SUB False True g0 (RIReg src) g0,
4064 SUB True False g0 (RIImm (ImmInt (-1))) dst]
4065 return (Any I32 code__2)
4067 condIntReg EQQ x y = do
4068 (src1, code1) <- getSomeReg x
4069 (src2, code2) <- getSomeReg y
4070 tmp1 <- getNewRegNat I32
4071 tmp2 <- getNewRegNat I32
4073 code__2 dst = code1 `appOL` code2 `appOL` toOL [
4074 XOR False src1 (RIReg src2) dst,
4075 SUB False True g0 (RIReg dst) g0,
4076 SUB True False g0 (RIImm (ImmInt (-1))) dst]
4077 return (Any I32 code__2)
4079 condIntReg NE x (CmmLit (CmmInt 0 d)) = do
4080 (src, code) <- getSomeReg x
4081 tmp <- getNewRegNat I32
4083 code__2 dst = code `appOL` toOL [
4084 SUB False True g0 (RIReg src) g0,
4085 ADD True False g0 (RIImm (ImmInt 0)) dst]
4086 return (Any I32 code__2)
4088 condIntReg NE x y = do
4089 (src1, code1) <- getSomeReg x
4090 (src2, code2) <- getSomeReg y
4091 tmp1 <- getNewRegNat I32
4092 tmp2 <- getNewRegNat I32
4094 code__2 dst = code1 `appOL` code2 `appOL` toOL [
4095 XOR False src1 (RIReg src2) dst,
4096 SUB False True g0 (RIReg dst) g0,
4097 ADD True False g0 (RIImm (ImmInt 0)) dst]
4098 return (Any I32 code__2)
4100 condIntReg cond x y = do
4101 BlockId lbl1 <- getBlockIdNat
4102 BlockId lbl2 <- getBlockIdNat
4103 CondCode _ cond cond_code <- condIntCode cond x y
4105 code__2 dst = cond_code `appOL` toOL [
4106 BI cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
4107 OR False g0 (RIImm (ImmInt 0)) dst,
4108 BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
4109 NEWBLOCK (BlockId lbl1),
4110 OR False g0 (RIImm (ImmInt 1)) dst,
4111 NEWBLOCK (BlockId lbl2)]
4112 return (Any I32 code__2)
4114 condFltReg cond x y = do
4115 BlockId lbl1 <- getBlockIdNat
4116 BlockId lbl2 <- getBlockIdNat
4117 CondCode _ cond cond_code <- condFltCode cond x y
4119 code__2 dst = cond_code `appOL` toOL [
4121 BF cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
4122 OR False g0 (RIImm (ImmInt 0)) dst,
4123 BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
4124 NEWBLOCK (BlockId lbl1),
4125 OR False g0 (RIImm (ImmInt 1)) dst,
4126 NEWBLOCK (BlockId lbl2)]
4127 return (Any I32 code__2)
4129 #endif /* sparc_TARGET_ARCH */
4131 #if powerpc_TARGET_ARCH
4132 condReg getCond = do
4133 lbl1 <- getBlockIdNat
4134 lbl2 <- getBlockIdNat
4135 CondCode _ cond cond_code <- getCond
4137 {- code dst = cond_code `appOL` toOL [
4146 code dst = cond_code
4150 RLWINM dst dst (bit + 1) 31 31
4153 negate_code | do_negate = unitOL (CRNOR bit bit bit)
4156 (bit, do_negate) = case cond of
4170 return (Any I32 code)
4172 condIntReg cond x y = condReg (condIntCode cond x y)
4173 condFltReg cond x y = condReg (condFltCode cond x y)
4174 #endif /* powerpc_TARGET_ARCH */
4177 -- -----------------------------------------------------------------------------
4178 -- 'trivial*Code': deal with trivial instructions
4180 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
4181 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
4182 -- Only look for constants on the right hand side, because that's
4183 -- where the generic optimizer will have put them.
4185 -- Similarly, for unary instructions, we don't have to worry about
4186 -- matching an StInt as the argument, because genericOpt will already
4187 -- have handled the constant-folding.
4191 -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
4192 ,IF_ARCH_i386 ((Operand -> Operand -> Instr)
4193 -> Maybe (Operand -> Operand -> Instr)
4194 ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr)
4195 -> Maybe (Operand -> Operand -> Instr)
4196 ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
4197 ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
4199 -> CmmExpr -> CmmExpr -- the two arguments
4202 #ifndef powerpc_TARGET_ARCH
4205 -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
4206 ,IF_ARCH_sparc((MachRep -> Reg -> Reg -> Reg -> Instr)
4207 ,IF_ARCH_i386 ((MachRep -> Reg -> Reg -> Reg -> Instr)
4208 ,IF_ARCH_x86_64 ((MachRep -> Operand -> Operand -> Instr)
4210 -> CmmExpr -> CmmExpr -- the two arguments
4216 -> IF_ARCH_alpha((RI -> Reg -> Instr)
4217 ,IF_ARCH_i386 ((Operand -> Instr)
4218 ,IF_ARCH_x86_64 ((Operand -> Instr)
4219 ,IF_ARCH_sparc((RI -> Reg -> Instr)
4220 ,IF_ARCH_powerpc((Reg -> Reg -> Instr)
4222 -> CmmExpr -- the one argument
4225 #ifndef powerpc_TARGET_ARCH
4228 -> IF_ARCH_alpha((Reg -> Reg -> Instr)
4229 ,IF_ARCH_i386 ((Reg -> Reg -> Instr)
4230 ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr)
4231 ,IF_ARCH_sparc((Reg -> Reg -> Instr)
4233 -> CmmExpr -- the one argument
4237 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4239 #if alpha_TARGET_ARCH
4241 trivialCode instr x (StInt y)
4243 = getRegister x `thenNat` \ register ->
4244 getNewRegNat IntRep `thenNat` \ tmp ->
4246 code = registerCode register tmp
4247 src1 = registerName register tmp
4248 src2 = ImmInt (fromInteger y)
4249 code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst)
4251 return (Any IntRep code__2)
4253 trivialCode instr x y
4254 = getRegister x `thenNat` \ register1 ->
4255 getRegister y `thenNat` \ register2 ->
4256 getNewRegNat IntRep `thenNat` \ tmp1 ->
4257 getNewRegNat IntRep `thenNat` \ tmp2 ->
4259 code1 = registerCode register1 tmp1 []
4260 src1 = registerName register1 tmp1
4261 code2 = registerCode register2 tmp2 []
4262 src2 = registerName register2 tmp2
4263 code__2 dst = asmSeqThen [code1, code2] .
4264 mkSeqInstr (instr src1 (RIReg src2) dst)
4266 return (Any IntRep code__2)
4269 trivialUCode instr x
4270 = getRegister x `thenNat` \ register ->
4271 getNewRegNat IntRep `thenNat` \ tmp ->
4273 code = registerCode register tmp
4274 src = registerName register tmp
4275 code__2 dst = code . mkSeqInstr (instr (RIReg src) dst)
4277 return (Any IntRep code__2)
4280 trivialFCode _ instr x y
4281 = getRegister x `thenNat` \ register1 ->
4282 getRegister y `thenNat` \ register2 ->
4283 getNewRegNat F64 `thenNat` \ tmp1 ->
4284 getNewRegNat F64 `thenNat` \ tmp2 ->
4286 code1 = registerCode register1 tmp1
4287 src1 = registerName register1 tmp1
4289 code2 = registerCode register2 tmp2
4290 src2 = registerName register2 tmp2
4292 code__2 dst = asmSeqThen [code1 [], code2 []] .
4293 mkSeqInstr (instr src1 src2 dst)
4295 return (Any F64 code__2)
4297 trivialUFCode _ instr x
4298 = getRegister x `thenNat` \ register ->
4299 getNewRegNat F64 `thenNat` \ tmp ->
4301 code = registerCode register tmp
4302 src = registerName register tmp
4303 code__2 dst = code . mkSeqInstr (instr src dst)
4305 return (Any F64 code__2)
4307 #endif /* alpha_TARGET_ARCH */
4309 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4311 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4314 The Rules of the Game are:
4316 * You cannot assume anything about the destination register dst;
4317 it may be anything, including a fixed reg.
4319 * You may compute an operand into a fixed reg, but you may not
4320 subsequently change the contents of that fixed reg. If you
4321 want to do so, first copy the value either to a temporary
4322 or into dst. You are free to modify dst even if it happens
4323 to be a fixed reg -- that's not your problem.
4325 * You cannot assume that a fixed reg will stay live over an
4326 arbitrary computation. The same applies to the dst reg.
4328 * Temporary regs obtained from getNewRegNat are distinct from
4329 each other and from all other regs, and stay live over
4330 arbitrary computations.
4332 --------------------
4334 SDM's version of The Rules:
4336 * If getRegister returns Any, that means it can generate correct
4337 code which places the result in any register, period. Even if that
4338 register happens to be read during the computation.
4340 Corollary #1: this means that if you are generating code for an
4341 operation with two arbitrary operands, you cannot assign the result
4342 of the first operand into the destination register before computing
4343 the second operand. The second operand might require the old value
4344 of the destination register.
4346 Corollary #2: A function might be able to generate more efficient
4347 code if it knows the destination register is a new temporary (and
4348 therefore not read by any of the sub-computations).
4350 * If getRegister returns Any, then the code it generates may modify only:
4351 (a) fresh temporaries
4352 (b) the destination register
4353 (c) known registers (eg. %ecx is used by shifts)
4354 In particular, it may *not* modify global registers, unless the global
4355 register happens to be the destination register.
4358 trivialCode rep instr (Just revinstr) (CmmLit lit_a) b
4359 | not (is64BitLit lit_a) = do
4360 b_code <- getAnyReg b
4363 = b_code dst `snocOL`
4364 revinstr (OpImm (litToImm lit_a)) (OpReg dst)
4366 return (Any rep code)
4368 trivialCode rep instr maybe_revinstr a b = genTrivialCode rep instr a b
4370 -- This is re-used for floating pt instructions too.
4371 genTrivialCode rep instr a b = do
4372 (b_op, b_code) <- getNonClobberedOperand b
4373 a_code <- getAnyReg a
4374 tmp <- getNewRegNat rep
4376 -- We want the value of b to stay alive across the computation of a.
4377 -- But, we want to calculate a straight into the destination register,
4378 -- because the instruction only has two operands (dst := dst `op` src).
4379 -- The troublesome case is when the result of b is in the same register
4380 -- as the destination reg. In this case, we have to save b in a
4381 -- new temporary across the computation of a.
4383 | dst `regClashesWithOp` b_op =
4385 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
4387 instr (OpReg tmp) (OpReg dst)
4391 instr b_op (OpReg dst)
4393 return (Any rep code)
4395 reg `regClashesWithOp` OpReg reg2 = reg == reg2
4396 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
4397 reg `regClashesWithOp` _ = False
4401 trivialUCode rep instr x = do
4402 x_code <- getAnyReg x
4408 return (Any rep code)
4412 #if i386_TARGET_ARCH
4414 trivialFCode pk instr x y = do
4415 (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
4416 (y_reg, y_code) <- getSomeReg y
4421 instr pk x_reg y_reg dst
4423 return (Any pk code)
4427 #if x86_64_TARGET_ARCH
4429 trivialFCode pk instr x y = genTrivialCode pk (instr pk) x y
4435 trivialUFCode rep instr x = do
4436 (x_reg, x_code) <- getSomeReg x
4442 return (Any rep code)
4444 #endif /* i386_TARGET_ARCH */
4446 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4448 #if sparc_TARGET_ARCH
4450 trivialCode pk instr x (CmmLit (CmmInt y d))
4453 (src1, code) <- getSomeReg x
4454 tmp <- getNewRegNat I32
4456 src2 = ImmInt (fromInteger y)
4457 code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
4458 return (Any I32 code__2)
4460 trivialCode pk instr x y = do
4461 (src1, code1) <- getSomeReg x
4462 (src2, code2) <- getSomeReg y
4463 tmp1 <- getNewRegNat I32
4464 tmp2 <- getNewRegNat I32
4466 code__2 dst = code1 `appOL` code2 `snocOL`
4467 instr src1 (RIReg src2) dst
4468 return (Any I32 code__2)
4471 trivialFCode pk instr x y = do
4472 (src1, code1) <- getSomeReg x
4473 (src2, code2) <- getSomeReg y
4474 tmp1 <- getNewRegNat (cmmExprRep x)
4475 tmp2 <- getNewRegNat (cmmExprRep y)
4476 tmp <- getNewRegNat F64
4478 promote x = FxTOy F32 F64 x tmp
4485 code1 `appOL` code2 `snocOL`
4486 instr pk src1 src2 dst
4487 else if pk1 == F32 then
4488 code1 `snocOL` promote src1 `appOL` code2 `snocOL`
4489 instr F64 tmp src2 dst
4491 code1 `appOL` code2 `snocOL` promote src2 `snocOL`
4492 instr F64 src1 tmp dst
4493 return (Any (if pk1 == pk2 then pk1 else F64) code__2)
4496 trivialUCode pk instr x = do
4497 (src, code) <- getSomeReg x
4498 tmp <- getNewRegNat pk
4500 code__2 dst = code `snocOL` instr (RIReg src) dst
4501 return (Any pk code__2)
4504 trivialUFCode pk instr x = do
4505 (src, code) <- getSomeReg x
4506 tmp <- getNewRegNat pk
4508 code__2 dst = code `snocOL` instr src dst
4509 return (Any pk code__2)
4511 #endif /* sparc_TARGET_ARCH */
4513 #if powerpc_TARGET_ARCH
4516 Wolfgang's PowerPC version of The Rules:
4518 A slightly modified version of The Rules to take advantage of the fact
4519 that PowerPC instructions work on all registers and don't implicitly
4520 clobber any fixed registers.
4522 * The only expression for which getRegister returns Fixed is (CmmReg reg).
4524 * If getRegister returns Any, then the code it generates may modify only:
4525 (a) fresh temporaries
4526 (b) the destination register
4527 It may *not* modify global registers, unless the global
4528 register happens to be the destination register.
4529 It may not clobber any other registers. In fact, only ccalls clobber any
4531 Also, it may not modify the counter register (used by genCCall).
4533 Corollary: If a getRegister for a subexpression returns Fixed, you need
4534 not move it to a fresh temporary before evaluating the next subexpression.
4535 The Fixed register won't be modified.
4536 Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
4538 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
4539 the value of the destination register.
4542 trivialCode rep signed instr x (CmmLit (CmmInt y _))
4543 | Just imm <- makeImmediate rep signed y
4545 (src1, code1) <- getSomeReg x
4546 let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
4547 return (Any rep code)
4549 trivialCode rep signed instr x y = do
4550 (src1, code1) <- getSomeReg x
4551 (src2, code2) <- getSomeReg y
4552 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
4553 return (Any rep code)
4555 trivialCodeNoImm :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4556 -> CmmExpr -> CmmExpr -> NatM Register
4557 trivialCodeNoImm rep instr x y = do
4558 (src1, code1) <- getSomeReg x
4559 (src2, code2) <- getSomeReg y
4560 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
4561 return (Any rep code)
4563 trivialUCode rep instr x = do
4564 (src, code) <- getSomeReg x
4565 let code' dst = code `snocOL` instr dst src
4566 return (Any rep code')
4568 -- There is no "remainder" instruction on the PPC, so we have to do
4570 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
4572 remainderCode :: MachRep -> (Reg -> Reg -> Reg -> Instr)
4573 -> CmmExpr -> CmmExpr -> NatM Register
4574 remainderCode rep div x y = do
4575 (src1, code1) <- getSomeReg x
4576 (src2, code2) <- getSomeReg y
4577 let code dst = code1 `appOL` code2 `appOL` toOL [
4579 MULLW dst dst (RIReg src2),
4582 return (Any rep code)
4584 #endif /* powerpc_TARGET_ARCH */
4587 -- -----------------------------------------------------------------------------
4588 -- Coercing to/from integer/floating-point...
4590 -- When going to integer, we truncate (round towards 0).
4592 -- @coerce(Int2FP|FP2Int)@ are more complicated integer/float
4593 -- conversions. We have to store temporaries in memory to move
4594 -- between the integer and the floating point register sets.
4596 -- @coerceDbl2Flt@ and @coerceFlt2Dbl@ are done this way because we
4597 -- pretend, on sparc at least, that double and float regs are seperate
4598 -- kinds, so the value has to be computed into one kind before being
4599 -- explicitly "converted" to live in the other kind.
4601 coerceInt2FP :: MachRep -> MachRep -> CmmExpr -> NatM Register
4602 coerceFP2Int :: MachRep -> MachRep -> CmmExpr -> NatM Register
4604 #if sparc_TARGET_ARCH
4605 coerceDbl2Flt :: CmmExpr -> NatM Register
4606 coerceFlt2Dbl :: CmmExpr -> NatM Register
4609 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4611 #if alpha_TARGET_ARCH
4614 = getRegister x `thenNat` \ register ->
4615 getNewRegNat IntRep `thenNat` \ reg ->
4617 code = registerCode register reg
4618 src = registerName register reg
4620 code__2 dst = code . mkSeqInstrs [
4622 LD TF dst (spRel 0),
4625 return (Any F64 code__2)
4629 = getRegister x `thenNat` \ register ->
4630 getNewRegNat F64 `thenNat` \ tmp ->
4632 code = registerCode register tmp
4633 src = registerName register tmp
4635 code__2 dst = code . mkSeqInstrs [
4637 ST TF tmp (spRel 0),
4640 return (Any IntRep code__2)
4642 #endif /* alpha_TARGET_ARCH */
4644 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4646 #if i386_TARGET_ARCH
4648 coerceInt2FP from to x = do
4649 (x_reg, x_code) <- getSomeReg x
4651 opc = case to of F32 -> GITOF; F64 -> GITOD
4652 code dst = x_code `snocOL` opc x_reg dst
4653 -- ToDo: works for non-I32 reps?
4655 return (Any to code)
4659 coerceFP2Int from to x = do
4660 (x_reg, x_code) <- getSomeReg x
4662 opc = case from of F32 -> GFTOI; F64 -> GDTOI
4663 code dst = x_code `snocOL` opc x_reg dst
4664 -- ToDo: works for non-I32 reps?
4666 return (Any to code)
4668 #endif /* i386_TARGET_ARCH */
4670 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4672 #if x86_64_TARGET_ARCH
4674 coerceFP2Int from to x = do
4675 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
4677 opc = case from of F32 -> CVTTSS2SIQ; F64 -> CVTTSD2SIQ
4678 code dst = x_code `snocOL` opc x_op dst
4680 return (Any to code) -- works even if the destination rep is <I32
4682 coerceInt2FP from to x = do
4683 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
4685 opc = case to of F32 -> CVTSI2SS; F64 -> CVTSI2SD
4686 code dst = x_code `snocOL` opc x_op dst
4688 return (Any to code) -- works even if the destination rep is <I32
4690 coerceFP2FP :: MachRep -> CmmExpr -> NatM Register
4691 coerceFP2FP to x = do
4692 (x_reg, x_code) <- getSomeReg x
4694 opc = case to of F32 -> CVTSD2SS; F64 -> CVTSS2SD
4695 code dst = x_code `snocOL` opc x_reg dst
4697 return (Any to code)
4701 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4703 #if sparc_TARGET_ARCH
4705 coerceInt2FP pk1 pk2 x = do
4706 (src, code) <- getSomeReg x
4708 code__2 dst = code `appOL` toOL [
4709 ST pk1 src (spRel (-2)),
4710 LD pk1 (spRel (-2)) dst,
4711 FxTOy pk1 pk2 dst dst]
4712 return (Any pk2 code__2)
4715 coerceFP2Int pk fprep x = do
4716 (src, code) <- getSomeReg x
4717 reg <- getNewRegNat fprep
4718 tmp <- getNewRegNat pk
4720 code__2 dst = ASSERT(fprep == F64 || fprep == F32)
4722 FxTOy fprep pk src tmp,
4723 ST pk tmp (spRel (-2)),
4724 LD pk (spRel (-2)) dst]
4725 return (Any pk code__2)
4728 coerceDbl2Flt x = do
4729 (src, code) <- getSomeReg x
4730 return (Any F32 (\dst -> code `snocOL` FxTOy F64 F32 src dst))
4733 coerceFlt2Dbl x = do
4734 (src, code) <- getSomeReg x
4735 return (Any F64 (\dst -> code `snocOL` FxTOy F32 F64 src dst))
4737 #endif /* sparc_TARGET_ARCH */
4739 #if powerpc_TARGET_ARCH
4740 coerceInt2FP fromRep toRep x = do
4741 (src, code) <- getSomeReg x
4742 lbl <- getNewLabelNat
4743 itmp <- getNewRegNat I32
4744 ftmp <- getNewRegNat F64
4745 dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
4746 Amode addr addr_code <- getAmode dynRef
4748 code' dst = code `appOL` maybe_exts `appOL` toOL [
4751 CmmStaticLit (CmmInt 0x43300000 I32),
4752 CmmStaticLit (CmmInt 0x80000000 I32)],
4753 XORIS itmp src (ImmInt 0x8000),
4754 ST I32 itmp (spRel 3),
4755 LIS itmp (ImmInt 0x4330),
4756 ST I32 itmp (spRel 2),
4757 LD F64 ftmp (spRel 2)
4758 ] `appOL` addr_code `appOL` toOL [
4760 FSUB F64 dst ftmp dst
4761 ] `appOL` maybe_frsp dst
4763 maybe_exts = case fromRep of
4764 I8 -> unitOL $ EXTS I8 src src
4765 I16 -> unitOL $ EXTS I16 src src
4767 maybe_frsp dst = case toRep of
4768 F32 -> unitOL $ FRSP dst dst
4770 return (Any toRep code')
4772 coerceFP2Int fromRep toRep x = do
4773 -- the reps don't really matter: F*->F64 and I32->I* are no-ops
4774 (src, code) <- getSomeReg x
4775 tmp <- getNewRegNat F64
4777 code' dst = code `appOL` toOL [
4778 -- convert to int in FP reg
4780 -- store value (64bit) from FP to stack
4781 ST F64 tmp (spRel 2),
4782 -- read low word of value (high word is undefined)
4783 LD I32 dst (spRel 3)]
4784 return (Any toRep code')
4785 #endif /* powerpc_TARGET_ARCH */
4788 -- -----------------------------------------------------------------------------
4789 -- eXTRA_STK_ARGS_HERE
4791 -- We (allegedly) put the first six C-call arguments in registers;
4792 -- where do we start putting the rest of them?
4794 -- Moved from MachInstrs (SDM):
4796 #if alpha_TARGET_ARCH || sparc_TARGET_ARCH
4797 eXTRA_STK_ARGS_HERE :: Int
4799 = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???))