2 -- The above warning supression flag is a temporary kludge.
3 -- While working on this module you are encouraged to remove it and fix
4 -- any warnings in the module. See
5 -- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
8 -----------------------------------------------------------------------------
10 -- Generating machine code (instruction selection)
12 -- (c) The University of Glasgow 1996-2004
14 -----------------------------------------------------------------------------
16 -- This is a big module, but, if you pay attention to
17 -- (a) the sectioning, (b) the type signatures, and
18 -- (c) the #if blah_TARGET_ARCH} things, the
19 -- structure should not be too overwhelming.
21 module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where
23 #include "HsVersions.h"
24 #include "nativeGen/NCG.h"
31 import PositionIndependentCode
32 import RegAllocInfo ( mkBranchInstr, mkRegRegMoveInstr )
36 -- Our intermediate code:
38 import PprCmm ( pprExpr )
41 import ClosureInfo ( C_SRT(..) )
44 import StaticFlags ( opt_PIC )
45 import ForeignCall ( CCallConv(..) )
48 import qualified Outputable as O
51 import FastBool ( isFastTrue )
52 import Constants ( wORD_SIZE )
54 import Debug.Trace ( trace )
56 import Control.Monad ( mapAndUnzipM )
57 import Data.Maybe ( fromJust )
63 -- -----------------------------------------------------------------------------
64 -- Top-level of the instruction selector
66 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
67 -- They are really trees of insns to facilitate fast appending, where a
68 -- left-to-right traversal (pre-order?) yields the insns in the correct
71 type InstrBlock = OrdList Instr
73 cmmTopCodeGen :: RawCmmTop -> NatM [NatCmmTop]
74 cmmTopCodeGen (CmmProc info lab params (ListGraph blocks)) = do
75 (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
76 picBaseMb <- getPicBaseMaybeNat
77 let proc = CmmProc info lab params (ListGraph $ concat nat_blocks)
78 tops = proc : concat statics
80 Just picBase -> initializePicBase picBase tops
81 Nothing -> return tops
83 cmmTopCodeGen (CmmData sec dat) = do
84 return [CmmData sec dat] -- no translation, we just use CmmStatic
86 basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop])
87 basicBlockCodeGen (BasicBlock id stmts) = do
88 instrs <- stmtsToInstrs stmts
89 -- code generation may introduce new basic block boundaries, which
90 -- are indicated by the NEWBLOCK instruction. We must split up the
91 -- instruction stream into basic blocks again. Also, we extract
94 (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
96 mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
97 = ([], BasicBlock id instrs : blocks, statics)
98 mkBlocks (LDATA sec dat) (instrs,blocks,statics)
99 = (instrs, blocks, CmmData sec dat:statics)
100 mkBlocks instr (instrs,blocks,statics)
101 = (instr:instrs, blocks, statics)
103 return (BasicBlock id top : other_blocks, statics)
105 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
107 = do instrss <- mapM stmtToInstrs stmts
108 return (concatOL instrss)
110 stmtToInstrs :: CmmStmt -> NatM InstrBlock
111 stmtToInstrs stmt = case stmt of
112 CmmNop -> return nilOL
113 CmmComment s -> return (unitOL (COMMENT s))
116 | isFloatType ty -> assignReg_FltCode size reg src
117 #if WORD_SIZE_IN_BITS==32
118 | isWord64 ty -> assignReg_I64Code reg src
120 | otherwise -> assignReg_IntCode size reg src
121 where ty = cmmRegType reg
122 size = cmmTypeSize ty
125 | isFloatType ty -> assignMem_FltCode size addr src
126 #if WORD_SIZE_IN_BITS==32
127 | isWord64 ty -> assignMem_I64Code addr src
129 | otherwise -> assignMem_IntCode size addr src
130 where ty = cmmExprType src
131 size = cmmTypeSize ty
133 CmmCall target result_regs args _ _
134 -> genCCall target result_regs args
136 CmmBranch id -> genBranch id
137 CmmCondBranch arg id -> genCondJump id arg
138 CmmSwitch arg ids -> genSwitch arg ids
139 CmmJump arg params -> genJump arg
141 panic "stmtToInstrs: return statement should have been cps'd away"
143 -- -----------------------------------------------------------------------------
144 -- General things for putting together code sequences
146 -- Expand CmmRegOff. ToDo: should we do it this way around, or convert
147 -- CmmExprs into CmmRegOff?
148 mangleIndexTree :: CmmExpr -> CmmExpr
149 mangleIndexTree (CmmRegOff reg off)
150 = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
151 where width = typeWidth (cmmRegType reg)
153 -- -----------------------------------------------------------------------------
154 -- Code gen for 64-bit arithmetic on 32-bit platforms
157 Simple support for generating 64-bit code (ie, 64 bit values and 64
158 bit assignments) on 32-bit platforms. Unlike the main code generator
159 we merely shoot for generating working code as simply as possible, and
160 pay little attention to code quality. Specifically, there is no
161 attempt to deal cleverly with the fixed-vs-floating register
162 distinction; all values are generated into (pairs of) floating
163 registers, even if this would mean some redundant reg-reg moves as a
164 result. Only one of the VRegUniques is returned, since it will be
165 of the VRegUniqueLo form, and the upper-half VReg can be determined
166 by applying getHiVRegFromLo to it.
169 data ChildCode64 -- a.k.a "Register64"
172 Reg -- the lower 32-bit temporary which contains the
173 -- result; use getHiVRegFromLo to find the other
174 -- VRegUnique. Rules of this simplified insn
175 -- selection game are therefore that the returned
176 -- Reg may be modified
178 #if WORD_SIZE_IN_BITS==32
179 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
180 assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
183 #ifndef x86_64_TARGET_ARCH
184 iselExpr64 :: CmmExpr -> NatM ChildCode64
187 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
191 assignMem_I64Code addrTree valueTree = do
192 Amode addr addr_code <- getAmode addrTree
193 ChildCode64 vcode rlo <- iselExpr64 valueTree
195 rhi = getHiVRegFromLo rlo
197 -- Little-endian store
198 mov_lo = MOV II32 (OpReg rlo) (OpAddr addr)
199 mov_hi = MOV II32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
201 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
204 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
205 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
207 r_dst_lo = mkVReg u_dst II32
208 r_dst_hi = getHiVRegFromLo r_dst_lo
209 r_src_hi = getHiVRegFromLo r_src_lo
210 mov_lo = MOV II32 (OpReg r_src_lo) (OpReg r_dst_lo)
211 mov_hi = MOV II32 (OpReg r_src_hi) (OpReg r_dst_hi)
214 vcode `snocOL` mov_lo `snocOL` mov_hi
217 assignReg_I64Code lvalue valueTree
218 = panic "assignReg_I64Code(i386): invalid lvalue"
222 iselExpr64 (CmmLit (CmmInt i _)) = do
223 (rlo,rhi) <- getNewRegPairNat II32
225 r = fromIntegral (fromIntegral i :: Word32)
226 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
228 MOV II32 (OpImm (ImmInteger r)) (OpReg rlo),
229 MOV II32 (OpImm (ImmInteger q)) (OpReg rhi)
232 return (ChildCode64 code rlo)
234 iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
235 Amode addr addr_code <- getAmode addrTree
236 (rlo,rhi) <- getNewRegPairNat II32
238 mov_lo = MOV II32 (OpAddr addr) (OpReg rlo)
239 mov_hi = MOV II32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
242 ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
246 iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
247 = return (ChildCode64 nilOL (mkVReg vu II32))
249 -- we handle addition, but rather badly
250 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
251 ChildCode64 code1 r1lo <- iselExpr64 e1
252 (rlo,rhi) <- getNewRegPairNat II32
254 r = fromIntegral (fromIntegral i :: Word32)
255 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
256 r1hi = getHiVRegFromLo r1lo
258 toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
259 ADD II32 (OpImm (ImmInteger r)) (OpReg rlo),
260 MOV II32 (OpReg r1hi) (OpReg rhi),
261 ADC II32 (OpImm (ImmInteger q)) (OpReg rhi) ]
263 return (ChildCode64 code rlo)
265 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
266 ChildCode64 code1 r1lo <- iselExpr64 e1
267 ChildCode64 code2 r2lo <- iselExpr64 e2
268 (rlo,rhi) <- getNewRegPairNat II32
270 r1hi = getHiVRegFromLo r1lo
271 r2hi = getHiVRegFromLo r2lo
274 toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
275 ADD II32 (OpReg r2lo) (OpReg rlo),
276 MOV II32 (OpReg r1hi) (OpReg rhi),
277 ADC II32 (OpReg r2hi) (OpReg rhi) ]
279 return (ChildCode64 code rlo)
281 iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do
283 r_dst_lo <- getNewRegNat II32
284 let r_dst_hi = getHiVRegFromLo r_dst_lo
287 ChildCode64 (code `snocOL`
288 MOV II32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
293 = pprPanic "iselExpr64(i386)" (ppr expr)
295 #endif /* i386_TARGET_ARCH */
297 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
299 #if sparc_TARGET_ARCH
301 assignMem_I64Code addrTree valueTree = do
302 Amode addr addr_code <- getAmode addrTree
303 ChildCode64 vcode rlo <- iselExpr64 valueTree
304 (src, code) <- getSomeReg addrTree
306 rhi = getHiVRegFromLo rlo
308 mov_hi = ST II32 rhi (AddrRegImm src (ImmInt 0))
309 mov_lo = ST II32 rlo (AddrRegImm src (ImmInt 4))
310 return (vcode `appOL` code `snocOL` mov_hi `snocOL` mov_lo)
312 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
313 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
315 r_dst_lo = mkVReg u_dst (cmmTypeSize pk)
316 r_dst_hi = getHiVRegFromLo r_dst_lo
317 r_src_hi = getHiVRegFromLo r_src_lo
318 mov_lo = mkMOV r_src_lo r_dst_lo
319 mov_hi = mkMOV r_src_hi r_dst_hi
320 mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
321 return (vcode `snocOL` mov_hi `snocOL` mov_lo)
322 assignReg_I64Code lvalue valueTree
323 = panic "assignReg_I64Code(sparc): invalid lvalue"
326 -- Don't delete this -- it's very handy for debugging.
328 -- | trace ("iselExpr64: " ++ showSDoc (ppr expr)) False
329 -- = panic "iselExpr64(???)"
331 iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
332 Amode (AddrRegReg r1 r2) addr_code <- getAmode addrTree
333 rlo <- getNewRegNat II32
334 let rhi = getHiVRegFromLo rlo
335 mov_hi = LD II32 (AddrRegImm r1 (ImmInt 0)) rhi
336 mov_lo = LD II32 (AddrRegImm r1 (ImmInt 4)) rlo
338 ChildCode64 (addr_code `snocOL` mov_hi `snocOL` mov_lo)
342 iselExpr64 (CmmReg (CmmLocal (LocalReg uq ty))) | isWord64 ty = do
343 r_dst_lo <- getNewRegNat II32
344 let r_dst_hi = getHiVRegFromLo r_dst_lo
345 r_src_lo = mkVReg uq II32
346 r_src_hi = getHiVRegFromLo r_src_lo
347 mov_lo = mkMOV r_src_lo r_dst_lo
348 mov_hi = mkMOV r_src_hi r_dst_hi
349 mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
351 ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo
355 = pprPanic "iselExpr64(sparc)" (ppr expr)
357 #endif /* sparc_TARGET_ARCH */
359 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
361 #if powerpc_TARGET_ARCH
363 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
364 getI64Amodes addrTree = do
365 Amode hi_addr addr_code <- getAmode addrTree
366 case addrOffset hi_addr 4 of
367 Just lo_addr -> return (hi_addr, lo_addr, addr_code)
368 Nothing -> do (hi_ptr, code) <- getSomeReg addrTree
369 return (AddrRegImm hi_ptr (ImmInt 0),
370 AddrRegImm hi_ptr (ImmInt 4),
373 assignMem_I64Code addrTree valueTree = do
374 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
375 ChildCode64 vcode rlo <- iselExpr64 valueTree
377 rhi = getHiVRegFromLo rlo
380 mov_hi = ST II32 rhi hi_addr
381 mov_lo = ST II32 rlo lo_addr
383 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
385 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
386 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
388 r_dst_lo = mkVReg u_dst II32
389 r_dst_hi = getHiVRegFromLo r_dst_lo
390 r_src_hi = getHiVRegFromLo r_src_lo
391 mov_lo = MR r_dst_lo r_src_lo
392 mov_hi = MR r_dst_hi r_src_hi
395 vcode `snocOL` mov_lo `snocOL` mov_hi
398 assignReg_I64Code lvalue valueTree
399 = panic "assignReg_I64Code(powerpc): invalid lvalue"
402 -- Don't delete this -- it's very handy for debugging.
404 -- | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
405 -- = panic "iselExpr64(???)"
407 iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
408 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
409 (rlo, rhi) <- getNewRegPairNat II32
410 let mov_hi = LD II32 rhi hi_addr
411 mov_lo = LD II32 rlo lo_addr
412 return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
415 iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
416 = return (ChildCode64 nilOL (mkVReg vu II32))
418 iselExpr64 (CmmLit (CmmInt i _)) = do
419 (rlo,rhi) <- getNewRegPairNat II32
421 half0 = fromIntegral (fromIntegral i :: Word16)
422 half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
423 half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
424 half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
427 LIS rlo (ImmInt half1),
428 OR rlo rlo (RIImm $ ImmInt half0),
429 LIS rhi (ImmInt half3),
430 OR rlo rlo (RIImm $ ImmInt half2)
433 return (ChildCode64 code rlo)
435 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
436 ChildCode64 code1 r1lo <- iselExpr64 e1
437 ChildCode64 code2 r2lo <- iselExpr64 e2
438 (rlo,rhi) <- getNewRegPairNat II32
440 r1hi = getHiVRegFromLo r1lo
441 r2hi = getHiVRegFromLo r2lo
444 toOL [ ADDC rlo r1lo r2lo,
447 return (ChildCode64 code rlo)
449 iselExpr64 (CmmMachOp (MO_UU_Conv W32 W64) [expr]) = do
450 (expr_reg,expr_code) <- getSomeReg expr
451 (rlo, rhi) <- getNewRegPairNat II32
452 let mov_hi = LI rhi (ImmInt 0)
453 mov_lo = MR rlo expr_reg
454 return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
457 = pprPanic "iselExpr64(powerpc)" (ppr expr)
459 #endif /* powerpc_TARGET_ARCH */
462 -- -----------------------------------------------------------------------------
463 -- The 'Register' type
465 -- 'Register's passed up the tree. If the stix code forces the register
466 -- to live in a pre-decided machine register, it comes out as @Fixed@;
467 -- otherwise, it comes out as @Any@, and the parent can decide which
468 -- register to put it in.
471 = Fixed Size Reg InstrBlock
472 | Any Size (Reg -> InstrBlock)
474 swizzleRegisterRep :: Register -> Size -> Register
475 -- Change the width; it's a no-op
476 swizzleRegisterRep (Fixed _ reg code) size = Fixed size reg code
477 swizzleRegisterRep (Any _ codefn) size = Any size codefn
480 -- -----------------------------------------------------------------------------
481 -- Utils based on getRegister, below
483 -- The dual to getAnyReg: compute an expression into a register, but
484 -- we don't mind which one it is.
485 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
487 r <- getRegister expr
490 tmp <- getNewRegNat rep
491 return (tmp, code tmp)
495 -- -----------------------------------------------------------------------------
496 -- Grab the Reg for a CmmReg
498 getRegisterReg :: CmmReg -> Reg
500 getRegisterReg (CmmLocal (LocalReg u pk))
501 = mkVReg u (cmmTypeSize pk)
503 getRegisterReg (CmmGlobal mid)
504 = case get_GlobalReg_reg_or_addr mid of
505 Left (RealReg rrno) -> RealReg rrno
506 _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
507 -- By this stage, the only MagicIds remaining should be the
508 -- ones which map to a real machine register on this
509 -- platform. Hence ...
512 -- -----------------------------------------------------------------------------
513 -- Generate code to get a subtree into a Register
515 -- Don't delete this -- it's very handy for debugging.
517 -- | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False
518 -- = panic "getRegister(???)"
520 getRegister :: CmmExpr -> NatM Register
522 #if !x86_64_TARGET_ARCH
523 -- on x86_64, we have %rip for PicBaseReg, but it's not a full-featured
524 -- register, it can only be used for rip-relative addressing.
525 getRegister (CmmReg (CmmGlobal PicBaseReg))
527 reg <- getPicBaseNat wordSize
528 return (Fixed wordSize reg nilOL)
531 getRegister (CmmReg reg)
532 = return (Fixed (cmmTypeSize (cmmRegType reg))
533 (getRegisterReg reg) nilOL)
535 getRegister tree@(CmmRegOff _ _)
536 = getRegister (mangleIndexTree tree)
539 #if WORD_SIZE_IN_BITS==32
540 -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
541 -- TO_W_(x), TO_W_(x >> 32)
543 getRegister (CmmMachOp (MO_UU_Conv W64 W32)
544 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
545 ChildCode64 code rlo <- iselExpr64 x
546 return $ Fixed II32 (getHiVRegFromLo rlo) code
548 getRegister (CmmMachOp (MO_SS_Conv W64 W32)
549 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
550 ChildCode64 code rlo <- iselExpr64 x
551 return $ Fixed II32 (getHiVRegFromLo rlo) code
553 getRegister (CmmMachOp (MO_UU_Conv W64 W32) [x]) = do
554 ChildCode64 code rlo <- iselExpr64 x
555 return $ Fixed II32 rlo code
557 getRegister (CmmMachOp (MO_SS_Conv W64 W32) [x]) = do
558 ChildCode64 code rlo <- iselExpr64 x
559 return $ Fixed II32 rlo code
563 -- end of machine-"independent" bit; here we go on the rest...
565 #if alpha_TARGET_ARCH
567 getRegister (StDouble d)
568 = getBlockIdNat `thenNat` \ lbl ->
569 getNewRegNat PtrRep `thenNat` \ tmp ->
570 let code dst = mkSeqInstrs [
571 LDATA RoDataSegment lbl [
572 DATA TF [ImmLab (rational d)]
574 LDA tmp (AddrImm (ImmCLbl lbl)),
575 LD TF dst (AddrReg tmp)]
577 return (Any FF64 code)
579 getRegister (StPrim primop [x]) -- unary PrimOps
581 IntNegOp -> trivialUCode (NEG Q False) x
583 NotOp -> trivialUCode NOT x
585 FloatNegOp -> trivialUFCode FloatRep (FNEG TF) x
586 DoubleNegOp -> trivialUFCode FF64 (FNEG TF) x
588 OrdOp -> coerceIntCode IntRep x
591 Float2IntOp -> coerceFP2Int x
592 Int2FloatOp -> coerceInt2FP pr x
593 Double2IntOp -> coerceFP2Int x
594 Int2DoubleOp -> coerceInt2FP pr x
596 Double2FloatOp -> coerceFltCode x
597 Float2DoubleOp -> coerceFltCode x
599 other_op -> getRegister (StCall fn CCallConv FF64 [x])
601 fn = case other_op of
602 FloatExpOp -> fsLit "exp"
603 FloatLogOp -> fsLit "log"
604 FloatSqrtOp -> fsLit "sqrt"
605 FloatSinOp -> fsLit "sin"
606 FloatCosOp -> fsLit "cos"
607 FloatTanOp -> fsLit "tan"
608 FloatAsinOp -> fsLit "asin"
609 FloatAcosOp -> fsLit "acos"
610 FloatAtanOp -> fsLit "atan"
611 FloatSinhOp -> fsLit "sinh"
612 FloatCoshOp -> fsLit "cosh"
613 FloatTanhOp -> fsLit "tanh"
614 DoubleExpOp -> fsLit "exp"
615 DoubleLogOp -> fsLit "log"
616 DoubleSqrtOp -> fsLit "sqrt"
617 DoubleSinOp -> fsLit "sin"
618 DoubleCosOp -> fsLit "cos"
619 DoubleTanOp -> fsLit "tan"
620 DoubleAsinOp -> fsLit "asin"
621 DoubleAcosOp -> fsLit "acos"
622 DoubleAtanOp -> fsLit "atan"
623 DoubleSinhOp -> fsLit "sinh"
624 DoubleCoshOp -> fsLit "cosh"
625 DoubleTanhOp -> fsLit "tanh"
627 pr = panic "MachCode.getRegister: no primrep needed for Alpha"
629 getRegister (StPrim primop [x, y]) -- dyadic PrimOps
631 CharGtOp -> trivialCode (CMP LTT) y x
632 CharGeOp -> trivialCode (CMP LE) y x
633 CharEqOp -> trivialCode (CMP EQQ) x y
634 CharNeOp -> int_NE_code x y
635 CharLtOp -> trivialCode (CMP LTT) x y
636 CharLeOp -> trivialCode (CMP LE) x y
638 IntGtOp -> trivialCode (CMP LTT) y x
639 IntGeOp -> trivialCode (CMP LE) y x
640 IntEqOp -> trivialCode (CMP EQQ) x y
641 IntNeOp -> int_NE_code x y
642 IntLtOp -> trivialCode (CMP LTT) x y
643 IntLeOp -> trivialCode (CMP LE) x y
645 WordGtOp -> trivialCode (CMP ULT) y x
646 WordGeOp -> trivialCode (CMP ULE) x y
647 WordEqOp -> trivialCode (CMP EQQ) x y
648 WordNeOp -> int_NE_code x y
649 WordLtOp -> trivialCode (CMP ULT) x y
650 WordLeOp -> trivialCode (CMP ULE) x y
652 AddrGtOp -> trivialCode (CMP ULT) y x
653 AddrGeOp -> trivialCode (CMP ULE) y x
654 AddrEqOp -> trivialCode (CMP EQQ) x y
655 AddrNeOp -> int_NE_code x y
656 AddrLtOp -> trivialCode (CMP ULT) x y
657 AddrLeOp -> trivialCode (CMP ULE) x y
659 FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
660 FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
661 FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
662 FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
663 FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
664 FloatLeOp -> cmpF_code (FCMP TF LE) NE x y
666 DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
667 DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
668 DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
669 DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
670 DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
671 DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y
673 IntAddOp -> trivialCode (ADD Q False) x y
674 IntSubOp -> trivialCode (SUB Q False) x y
675 IntMulOp -> trivialCode (MUL Q False) x y
676 IntQuotOp -> trivialCode (DIV Q False) x y
677 IntRemOp -> trivialCode (REM Q False) x y
679 WordAddOp -> trivialCode (ADD Q False) x y
680 WordSubOp -> trivialCode (SUB Q False) x y
681 WordMulOp -> trivialCode (MUL Q False) x y
682 WordQuotOp -> trivialCode (DIV Q True) x y
683 WordRemOp -> trivialCode (REM Q True) x y
685 FloatAddOp -> trivialFCode W32 (FADD TF) x y
686 FloatSubOp -> trivialFCode W32 (FSUB TF) x y
687 FloatMulOp -> trivialFCode W32 (FMUL TF) x y
688 FloatDivOp -> trivialFCode W32 (FDIV TF) x y
690 DoubleAddOp -> trivialFCode W64 (FADD TF) x y
691 DoubleSubOp -> trivialFCode W64 (FSUB TF) x y
692 DoubleMulOp -> trivialFCode W64 (FMUL TF) x y
693 DoubleDivOp -> trivialFCode W64 (FDIV TF) x y
695 AddrAddOp -> trivialCode (ADD Q False) x y
696 AddrSubOp -> trivialCode (SUB Q False) x y
697 AddrRemOp -> trivialCode (REM Q True) x y
699 AndOp -> trivialCode AND x y
700 OrOp -> trivialCode OR x y
701 XorOp -> trivialCode XOR x y
702 SllOp -> trivialCode SLL x y
703 SrlOp -> trivialCode SRL x y
705 ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
706 ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
707 ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"
709 FloatPowerOp -> getRegister (StCall (fsLit "pow") CCallConv FF64 [x,y])
710 DoublePowerOp -> getRegister (StCall (fsLit "pow") CCallConv FF64 [x,y])
712 {- ------------------------------------------------------------
713 Some bizarre special code for getting condition codes into
714 registers. Integer non-equality is a test for equality
715 followed by an XOR with 1. (Integer comparisons always set
716 the result register to 0 or 1.) Floating point comparisons of
717 any kind leave the result in a floating point register, so we
718 need to wrangle an integer register out of things.
720 int_NE_code :: StixTree -> StixTree -> NatM Register
723 = trivialCode (CMP EQQ) x y `thenNat` \ register ->
724 getNewRegNat IntRep `thenNat` \ tmp ->
726 code = registerCode register tmp
727 src = registerName register tmp
728 code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
730 return (Any IntRep code__2)
732 {- ------------------------------------------------------------
733 Comments for int_NE_code also apply to cmpF_code
736 :: (Reg -> Reg -> Reg -> Instr)
738 -> StixTree -> StixTree
741 cmpF_code instr cond x y
742 = trivialFCode pr instr x y `thenNat` \ register ->
743 getNewRegNat FF64 `thenNat` \ tmp ->
744 getBlockIdNat `thenNat` \ lbl ->
746 code = registerCode register tmp
747 result = registerName register tmp
749 code__2 dst = code . mkSeqInstrs [
750 OR zeroh (RIImm (ImmInt 1)) dst,
751 BF cond result (ImmCLbl lbl),
752 OR zeroh (RIReg zeroh) dst,
755 return (Any IntRep code__2)
757 pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
758 ------------------------------------------------------------
760 getRegister (CmmLoad pk mem)
761 = getAmode mem `thenNat` \ amode ->
763 code = amodeCode amode
764 src = amodeAddr amode
765 size = primRepToSize pk
766 code__2 dst = code . mkSeqInstr (LD size dst src)
768 return (Any pk code__2)
770 getRegister (StInt i)
773 code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
775 return (Any IntRep code)
778 code dst = mkSeqInstr (LDI Q dst src)
780 return (Any IntRep code)
782 src = ImmInt (fromInteger i)
787 code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
789 return (Any PtrRep code)
792 imm__2 = case imm of Just x -> x
794 #endif /* alpha_TARGET_ARCH */
796 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
800 getRegister (CmmLit (CmmFloat f W32)) = do
801 lbl <- getNewLabelNat
802 dflags <- getDynFlagsNat
803 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
804 Amode addr addr_code <- getAmode dynRef
808 CmmStaticLit (CmmFloat f W32)]
809 `consOL` (addr_code `snocOL`
812 return (Any FF32 code)
815 getRegister (CmmLit (CmmFloat d W64))
817 = let code dst = unitOL (GLDZ dst)
818 in return (Any FF64 code)
821 = let code dst = unitOL (GLD1 dst)
822 in return (Any FF64 code)
825 lbl <- getNewLabelNat
826 dflags <- getDynFlagsNat
827 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
828 Amode addr addr_code <- getAmode dynRef
832 CmmStaticLit (CmmFloat d W64)]
833 `consOL` (addr_code `snocOL`
836 return (Any FF64 code)
838 #endif /* i386_TARGET_ARCH */
840 #if x86_64_TARGET_ARCH
842 getRegister (CmmLit (CmmFloat 0.0 w)) = do
843 let size = floatSize w
844 code dst = unitOL (XOR size (OpReg dst) (OpReg dst))
845 -- I don't know why there are xorpd, xorps, and pxor instructions.
846 -- They all appear to do the same thing --SDM
847 return (Any size code)
849 getRegister (CmmLit (CmmFloat f w)) = do
850 lbl <- getNewLabelNat
851 let code dst = toOL [
854 CmmStaticLit (CmmFloat f w)],
855 MOV size (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
858 return (Any size code)
859 where size = floatSize w
861 #endif /* x86_64_TARGET_ARCH */
863 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
865 -- catch simple cases of zero- or sign-extended load
866 getRegister (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _]) = do
867 code <- intLoadCode (MOVZxL II8) addr
868 return (Any II32 code)
870 getRegister (CmmMachOp (MO_SS_Conv W8 W32) [CmmLoad addr _]) = do
871 code <- intLoadCode (MOVSxL II8) addr
872 return (Any II32 code)
874 getRegister (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad addr _]) = do
875 code <- intLoadCode (MOVZxL II16) addr
876 return (Any II32 code)
878 getRegister (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad addr _]) = do
879 code <- intLoadCode (MOVSxL II16) addr
880 return (Any II32 code)
884 #if x86_64_TARGET_ARCH
886 -- catch simple cases of zero- or sign-extended load
887 getRegister (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad addr _]) = do
888 code <- intLoadCode (MOVZxL II8) addr
889 return (Any II64 code)
891 getRegister (CmmMachOp (MO_SS_Conv W8 W64) [CmmLoad addr _]) = do
892 code <- intLoadCode (MOVSxL II8) addr
893 return (Any II64 code)
895 getRegister (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad addr _]) = do
896 code <- intLoadCode (MOVZxL II16) addr
897 return (Any II64 code)
899 getRegister (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad addr _]) = do
900 code <- intLoadCode (MOVSxL II16) addr
901 return (Any II64 code)
903 getRegister (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad addr _]) = do
904 code <- intLoadCode (MOV II32) addr -- 32-bit loads zero-extend
905 return (Any II64 code)
907 getRegister (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad addr _]) = do
908 code <- intLoadCode (MOVSxL II32) addr
909 return (Any II64 code)
913 #if x86_64_TARGET_ARCH
914 getRegister (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
915 CmmLit displacement])
916 = return $ Any II64 (\dst -> unitOL $
917 LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
920 #if x86_64_TARGET_ARCH
921 getRegister (CmmMachOp (MO_F_Neg W32) [x]) = do
922 x_code <- getAnyReg x
923 lbl <- getNewLabelNat
925 code dst = x_code dst `appOL` toOL [
926 -- This is how gcc does it, so it can't be that bad:
927 LDATA ReadOnlyData16 [
930 CmmStaticLit (CmmInt 0x80000000 W32),
931 CmmStaticLit (CmmInt 0 W32),
932 CmmStaticLit (CmmInt 0 W32),
933 CmmStaticLit (CmmInt 0 W32)
935 XOR FF32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
936 -- xorps, so we need the 128-bit constant
937 -- ToDo: rip-relative
940 return (Any FF32 code)
942 getRegister (CmmMachOp (MO_F_Neg W64) [x]) = do
943 x_code <- getAnyReg x
944 lbl <- getNewLabelNat
946 -- This is how gcc does it, so it can't be that bad:
947 code dst = x_code dst `appOL` toOL [
948 LDATA ReadOnlyData16 [
951 CmmStaticLit (CmmInt 0x8000000000000000 W64),
952 CmmStaticLit (CmmInt 0 W64)
954 -- gcc puts an unpck here. Wonder if we need it.
955 XOR FF64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
956 -- xorpd, so we need the 128-bit constant
959 return (Any FF64 code)
962 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
964 getRegister (CmmMachOp mop [x]) -- unary MachOps
967 MO_F_Neg W32 -> trivialUFCode FF32 (GNEG FF32) x
968 MO_F_Neg W64 -> trivialUFCode FF64 (GNEG FF64) x
971 MO_S_Neg w -> triv_ucode NEGI (intSize w)
972 MO_F_Neg w -> triv_ucode NEGI (floatSize w)
973 MO_Not w -> triv_ucode NOT (intSize w)
976 MO_UU_Conv W32 W8 -> toI8Reg W32 x
977 MO_SS_Conv W32 W8 -> toI8Reg W32 x
978 MO_UU_Conv W16 W8 -> toI8Reg W16 x
979 MO_SS_Conv W16 W8 -> toI8Reg W16 x
980 MO_UU_Conv W32 W16 -> toI16Reg W32 x
981 MO_SS_Conv W32 W16 -> toI16Reg W32 x
983 #if x86_64_TARGET_ARCH
984 MO_UU_Conv W64 W32 -> conversionNop II64 x
985 MO_SS_Conv W64 W32 -> conversionNop II64 x
986 MO_UU_Conv W64 W16 -> toI16Reg W64 x
987 MO_SS_Conv W64 W16 -> toI16Reg W64 x
988 MO_UU_Conv W64 W8 -> toI8Reg W64 x
989 MO_SS_Conv W64 W8 -> toI8Reg W64 x
992 MO_UU_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
993 MO_SS_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
996 MO_UU_Conv W8 W32 -> integerExtend W8 W32 MOVZxL x
997 MO_UU_Conv W16 W32 -> integerExtend W16 W32 MOVZxL x
998 MO_UU_Conv W8 W16 -> integerExtend W8 W16 MOVZxL x
1000 MO_SS_Conv W8 W32 -> integerExtend W8 W32 MOVSxL x
1001 MO_SS_Conv W16 W32 -> integerExtend W16 W32 MOVSxL x
1002 MO_SS_Conv W8 W16 -> integerExtend W8 W16 MOVSxL x
1004 #if x86_64_TARGET_ARCH
1005 MO_UU_Conv W8 W64 -> integerExtend W8 W64 MOVZxL x
1006 MO_UU_Conv W16 W64 -> integerExtend W16 W64 MOVZxL x
1007 MO_UU_Conv W32 W64 -> integerExtend W32 W64 MOVZxL x
1008 MO_SS_Conv W8 W64 -> integerExtend W8 W64 MOVSxL x
1009 MO_SS_Conv W16 W64 -> integerExtend W16 W64 MOVSxL x
1010 MO_SS_Conv W32 W64 -> integerExtend W32 W64 MOVSxL x
1011 -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
1012 -- However, we don't want the register allocator to throw it
1013 -- away as an unnecessary reg-to-reg move, so we keep it in
1014 -- the form of a movzl and print it as a movl later.
1017 #if i386_TARGET_ARCH
1018 MO_FF_Conv W32 W64 -> conversionNop FF64 x
1019 MO_FF_Conv W64 W32 -> conversionNop FF32 x
1021 MO_FF_Conv W32 W64 -> coerceFP2FP W64 x
1022 MO_FF_Conv W64 W32 -> coerceFP2FP W32 x
1025 MO_FS_Conv from to -> coerceFP2Int from to x
1026 MO_SF_Conv from to -> coerceInt2FP from to x
1028 other -> pprPanic "getRegister" (pprMachOp mop)
1030 triv_ucode :: (Size -> Operand -> Instr) -> Size -> NatM Register
1031 triv_ucode instr size = trivialUCode size (instr size) x
1033 -- signed or unsigned extension.
1034 integerExtend :: Width -> Width
1035 -> (Size -> Operand -> Operand -> Instr)
1036 -> CmmExpr -> NatM Register
1037 integerExtend from to instr expr = do
1038 (reg,e_code) <- if from == W8 then getByteReg expr
1039 else getSomeReg expr
1043 instr (intSize from) (OpReg reg) (OpReg dst)
1044 return (Any (intSize to) code)
1046 toI8Reg :: Width -> CmmExpr -> NatM Register
1047 toI8Reg new_rep expr
1048 = do codefn <- getAnyReg expr
1049 return (Any (intSize new_rep) codefn)
1050 -- HACK: use getAnyReg to get a byte-addressable register.
1051 -- If the source was a Fixed register, this will add the
1052 -- mov instruction to put it into the desired destination.
1053 -- We're assuming that the destination won't be a fixed
1054 -- non-byte-addressable register; it won't be, because all
1055 -- fixed registers are word-sized.
1057 toI16Reg = toI8Reg -- for now
1059 conversionNop :: Size -> CmmExpr -> NatM Register
1060 conversionNop new_size expr
1061 = do e_code <- getRegister expr
1062 return (swizzleRegisterRep e_code new_size)
1065 getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
1067 MO_F_Eq w -> condFltReg EQQ x y
1068 MO_F_Ne w -> condFltReg NE x y
1069 MO_F_Gt w -> condFltReg GTT x y
1070 MO_F_Ge w -> condFltReg GE x y
1071 MO_F_Lt w -> condFltReg LTT x y
1072 MO_F_Le w -> condFltReg LE x y
1074 MO_Eq rep -> condIntReg EQQ x y
1075 MO_Ne rep -> condIntReg NE x y
1077 MO_S_Gt rep -> condIntReg GTT x y
1078 MO_S_Ge rep -> condIntReg GE x y
1079 MO_S_Lt rep -> condIntReg LTT x y
1080 MO_S_Le rep -> condIntReg LE x y
1082 MO_U_Gt rep -> condIntReg GU x y
1083 MO_U_Ge rep -> condIntReg GEU x y
1084 MO_U_Lt rep -> condIntReg LU x y
1085 MO_U_Le rep -> condIntReg LEU x y
1087 #if i386_TARGET_ARCH
1088 MO_F_Add w -> trivialFCode w GADD x y
1089 MO_F_Sub w -> trivialFCode w GSUB x y
1090 MO_F_Quot w -> trivialFCode w GDIV x y
1091 MO_F_Mul w -> trivialFCode w GMUL x y
1094 #if x86_64_TARGET_ARCH
1095 MO_F_Add w -> trivialFCode w ADD x y
1096 MO_F_Sub w -> trivialFCode w SUB x y
1097 MO_F_Quot w -> trivialFCode w FDIV x y
1098 MO_F_Mul w -> trivialFCode w MUL x y
1101 MO_Add rep -> add_code rep x y
1102 MO_Sub rep -> sub_code rep x y
1104 MO_S_Quot rep -> div_code rep True True x y
1105 MO_S_Rem rep -> div_code rep True False x y
1106 MO_U_Quot rep -> div_code rep False True x y
1107 MO_U_Rem rep -> div_code rep False False x y
1109 MO_S_MulMayOflo rep -> imulMayOflo rep x y
1111 MO_Mul rep -> triv_op rep IMUL
1112 MO_And rep -> triv_op rep AND
1113 MO_Or rep -> triv_op rep OR
1114 MO_Xor rep -> triv_op rep XOR
1116 {- Shift ops on x86s have constraints on their source, it
1117 either has to be Imm, CL or 1
1118 => trivialCode is not restrictive enough (sigh.)
1120 MO_Shl rep -> shift_code rep SHL x y {-False-}
1121 MO_U_Shr rep -> shift_code rep SHR x y {-False-}
1122 MO_S_Shr rep -> shift_code rep SAR x y {-False-}
1124 other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
1126 --------------------
1127 triv_op width instr = trivialCode width op (Just op) x y
1128 where op = instr (intSize width)
1130 imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
1131 imulMayOflo rep a b = do
1132 (a_reg, a_code) <- getNonClobberedReg a
1133 b_code <- getAnyReg b
1135 shift_amt = case rep of
1138 _ -> panic "shift_amt"
1141 code = a_code `appOL` b_code eax `appOL`
1143 IMUL2 size (OpReg a_reg), -- result in %edx:%eax
1144 SAR size (OpImm (ImmInt shift_amt)) (OpReg eax),
1145 -- sign extend lower part
1146 SUB size (OpReg edx) (OpReg eax)
1147 -- compare against upper
1148 -- eax==0 if high part == sign extended low part
1151 return (Fixed size eax code)
1153 --------------------
1155 -> (Size -> Operand -> Operand -> Instr)
1160 {- Case1: shift length as immediate -}
1161 shift_code width instr x y@(CmmLit lit) = do
1162 x_code <- getAnyReg x
1164 size = intSize width
1166 = x_code dst `snocOL`
1167 instr size (OpImm (litToImm lit)) (OpReg dst)
1169 return (Any size code)
1171 {- Case2: shift length is complex (non-immediate)
1172 * y must go in %ecx.
1173 * we cannot do y first *and* put its result in %ecx, because
1174 %ecx might be clobbered by x.
1175 * if we do y second, then x cannot be
1176 in a clobbered reg. Also, we cannot clobber x's reg
1177 with the instruction itself.
1179 - do y first, put its result in a fresh tmp, then copy it to %ecx later
1180 - do y second and put its result into %ecx. x gets placed in a fresh
1181 tmp. This is likely to be better, becuase the reg alloc can
1182 eliminate this reg->reg move here (it won't eliminate the other one,
1183 because the move is into the fixed %ecx).
1185 shift_code width instr x y{-amount-} = do
1186 x_code <- getAnyReg x
1187 let size = intSize width
1188 tmp <- getNewRegNat size
1189 y_code <- getAnyReg y
1191 code = x_code tmp `appOL`
1193 instr size (OpReg ecx) (OpReg tmp)
1195 return (Fixed size tmp code)
1197 --------------------
1198 add_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
1199 add_code rep x (CmmLit (CmmInt y _))
1200 | is32BitInteger y = add_int rep x y
1201 add_code rep x y = trivialCode rep (ADD size) (Just (ADD size)) x y
1202 where size = intSize rep
1204 --------------------
1205 sub_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
1206 sub_code rep x (CmmLit (CmmInt y _))
1207 | is32BitInteger (-y) = add_int rep x (-y)
1208 sub_code rep x y = trivialCode rep (SUB (intSize rep)) Nothing x y
1210 -- our three-operand add instruction:
1211 add_int width x y = do
1212 (x_reg, x_code) <- getSomeReg x
1214 size = intSize width
1215 imm = ImmInt (fromInteger y)
1219 (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
1222 return (Any size code)
1224 ----------------------
1225 div_code width signed quotient x y = do
1226 (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
1227 x_code <- getAnyReg x
1229 size = intSize width
1230 widen | signed = CLTD size
1231 | otherwise = XOR size (OpReg edx) (OpReg edx)
1233 instr | signed = IDIV
1236 code = y_code `appOL`
1238 toOL [widen, instr size y_op]
1240 result | quotient = eax
1244 return (Fixed size result code)
1247 getRegister (CmmLoad mem pk)
1250 Amode src mem_code <- getAmode mem
1252 size = cmmTypeSize pk
1253 code dst = mem_code `snocOL`
1254 IF_ARCH_i386(GLD size src dst,
1255 MOV size (OpAddr src) (OpReg dst))
1256 return (Any size code)
1258 #if i386_TARGET_ARCH
1259 getRegister (CmmLoad mem pk)
1262 code <- intLoadCode instr mem
1263 return (Any size code)
1265 width = typeWidth pk
1266 size = intSize width
1267 instr = case width of
1270 -- We always zero-extend 8-bit loads, if we
1271 -- can't think of anything better. This is because
1272 -- we can't guarantee access to an 8-bit variant of every register
1273 -- (esi and edi don't have 8-bit variants), so to make things
1274 -- simpler we do our 8-bit arithmetic with full 32-bit registers.
1277 #if x86_64_TARGET_ARCH
1278 -- Simpler memory load code on x86_64
1279 getRegister (CmmLoad mem pk)
1281 code <- intLoadCode (MOV size) mem
1282 return (Any size code)
1283 where size = intSize $ typeWidth pk
1286 getRegister (CmmLit (CmmInt 0 width))
1288 size = intSize width
1290 -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
1291 adj_size = case size of II64 -> II32; _ -> size
1292 size1 = IF_ARCH_i386( size, adj_size )
1294 = unitOL (XOR size1 (OpReg dst) (OpReg dst))
1296 return (Any size code)
1298 #if x86_64_TARGET_ARCH
1299 -- optimisation for loading small literals on x86_64: take advantage
1300 -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
1301 -- instruction forms are shorter.
1302 getRegister (CmmLit lit)
1303 | isWord64 (cmmLitType lit), not (isBigLit lit)
1306 code dst = unitOL (MOV II32 (OpImm imm) (OpReg dst))
1308 return (Any II64 code)
1310 isBigLit (CmmInt i _) = i < 0 || i > 0xffffffff
1312 -- note1: not the same as (not.is32BitLit), because that checks for
1313 -- signed literals that fit in 32 bits, but we want unsigned
1315 -- note2: all labels are small, because we're assuming the
1316 -- small memory model (see gcc docs, -mcmodel=small).
1319 getRegister (CmmLit lit)
1321 size = cmmTypeSize (cmmLitType lit)
1323 code dst = unitOL (MOV size (OpImm imm) (OpReg dst))
1325 return (Any size code)
1327 getRegister other = pprPanic "getRegister(x86)" (ppr other)
1330 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
1331 -> NatM (Reg -> InstrBlock)
1332 intLoadCode instr mem = do
1333 Amode src mem_code <- getAmode mem
1334 return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
1336 -- Compute an expression into *any* register, adding the appropriate
1337 -- move instruction if necessary.
1338 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
1340 r <- getRegister expr
1343 anyReg :: Register -> NatM (Reg -> InstrBlock)
1344 anyReg (Any _ code) = return code
1345 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
1347 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
1348 -- Fixed registers might not be byte-addressable, so we make sure we've
1349 -- got a temporary, inserting an extra reg copy if necessary.
1350 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
1351 #if x86_64_TARGET_ARCH
1352 getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
1354 getByteReg expr = do
1355 r <- getRegister expr
1358 tmp <- getNewRegNat rep
1359 return (tmp, code tmp)
1361 | isVirtualReg reg -> return (reg,code)
1363 tmp <- getNewRegNat rep
1364 return (tmp, code `snocOL` reg2reg rep reg tmp)
1365 -- ToDo: could optimise slightly by checking for byte-addressable
1366 -- real registers, but that will happen very rarely if at all.
1369 -- Another variant: this time we want the result in a register that cannot
1370 -- be modified by code to evaluate an arbitrary expression.
1371 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1372 getNonClobberedReg expr = do
1373 r <- getRegister expr
1376 tmp <- getNewRegNat rep
1377 return (tmp, code tmp)
1379 -- only free regs can be clobbered
1380 | RealReg rr <- reg, isFastTrue (freeReg rr) -> do
1381 tmp <- getNewRegNat rep
1382 return (tmp, code `snocOL` reg2reg rep reg tmp)
1386 reg2reg :: Size -> Reg -> Reg -> Instr
1387 reg2reg size src dst
1388 #if i386_TARGET_ARCH
1389 | isFloatSize size = GMOV src dst
1391 | otherwise = MOV size (OpReg src) (OpReg dst)
1393 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1395 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1397 #if sparc_TARGET_ARCH
1399 -- getRegister :: CmmExpr -> NatM Register
1401 -- Load a literal float into a float register.
1402 -- The actual literal is stored in a new data area, and we load it
1404 getRegister (CmmLit (CmmFloat f W32)) = do
1406 -- a label for the new data area
1407 lbl <- getNewLabelNat
1408 tmp <- getNewRegNat II32
1410 let code dst = toOL [
1414 CmmStaticLit (CmmFloat f W32)],
1417 SETHI (HI (ImmCLbl lbl)) tmp,
1418 LD II32 (AddrRegImm tmp (LO (ImmCLbl lbl))) dst]
1420 return (Any FF32 code)
1422 getRegister (CmmLit (CmmFloat d W64)) = do
1423 lbl <- getNewLabelNat
1424 tmp <- getNewRegNat II32
1425 let code dst = toOL [
1428 CmmStaticLit (CmmFloat d W64)],
1429 SETHI (HI (ImmCLbl lbl)) tmp,
1430 LD II64 (AddrRegImm tmp (LO (ImmCLbl lbl))) dst]
1431 return (Any FF64 code)
1433 getRegister (CmmMachOp mop [x]) -- unary MachOps
1435 MO_F_Neg W32 -> trivialUFCode FF32 (FNEG FF32) x
1436 MO_F_Neg W64 -> trivialUFCode FF64 (FNEG FF64) x
1438 MO_S_Neg rep -> trivialUCode (intSize rep) (SUB False False g0) x
1439 MO_Not rep -> trivialUCode (intSize rep) (XNOR False g0) x
1441 MO_FF_Conv W64 W32-> coerceDbl2Flt x
1442 MO_FF_Conv W32 W64-> coerceFlt2Dbl x
1444 MO_FS_Conv from to -> coerceFP2Int from to x
1445 MO_SF_Conv from to -> coerceInt2FP from to x
1447 -- Conversions which are a nop on sparc
1449 | from == to -> conversionNop (intSize to) x
1450 MO_UU_Conv W32 W8 -> trivialCode W8 (AND False) x (CmmLit (CmmInt 255 W8))
1451 MO_UU_Conv W32 to -> conversionNop (intSize to) x
1452 MO_SS_Conv W32 to -> conversionNop (intSize to) x
1454 MO_UU_Conv W8 to@W32 -> conversionNop (intSize to) x
1455 MO_UU_Conv W16 to@W32 -> conversionNop (intSize to) x
1456 MO_UU_Conv W8 to@W16 -> conversionNop (intSize to) x
1459 MO_SS_Conv W8 W32 -> integerExtend W8 W32 x
1460 MO_SS_Conv W16 W32 -> integerExtend W16 W32 x
1461 MO_SS_Conv W8 W16 -> integerExtend W8 W16 x
1463 other_op -> panic ("Unknown unary mach op: " ++ show mop)
1466 -- | sign extend and widen
1468 :: Width -- ^ width of source expression
1469 -> Width -- ^ width of result
1470 -> CmmExpr -- ^ source expression
1473 integerExtend from to expr
1474 = do -- load the expr into some register
1475 (reg, e_code) <- getSomeReg expr
1476 tmp <- getNewRegNat II32
1478 = case (from, to) of
1485 -- local shift word left to load the sign bit
1486 `snocOL` SLL reg (RIImm (ImmInt bitCount)) tmp
1488 -- arithmetic shift right to sign extend
1489 `snocOL` SRA tmp (RIImm (ImmInt bitCount)) dst
1491 return (Any (intSize to) code)
1494 conversionNop new_rep expr
1495 = do e_code <- getRegister expr
1496 return (swizzleRegisterRep e_code new_rep)
1498 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1500 MO_Eq rep -> condIntReg EQQ x y
1501 MO_Ne rep -> condIntReg NE x y
1503 MO_S_Gt rep -> condIntReg GTT x y
1504 MO_S_Ge rep -> condIntReg GE x y
1505 MO_S_Lt rep -> condIntReg LTT x y
1506 MO_S_Le rep -> condIntReg LE x y
1508 MO_U_Gt W32 -> condIntReg GTT x y
1509 MO_U_Ge W32 -> condIntReg GE x y
1510 MO_U_Lt W32 -> condIntReg LTT x y
1511 MO_U_Le W32 -> condIntReg LE x y
1513 MO_U_Gt W16 -> condIntReg GU x y
1514 MO_U_Ge W16 -> condIntReg GEU x y
1515 MO_U_Lt W16 -> condIntReg LU x y
1516 MO_U_Le W16 -> condIntReg LEU x y
1518 MO_Add W32 -> trivialCode W32 (ADD False False) x y
1519 MO_Sub W32 -> trivialCode W32 (SUB False False) x y
1521 MO_S_MulMayOflo rep -> imulMayOflo rep x y
1523 -- ToDo: teach about V8+ SPARC div instructions
1524 MO_S_Quot W32 -> idiv FSLIT(".div") x y
1525 MO_S_Rem W32 -> idiv FSLIT(".rem") x y
1526 MO_U_Quot W32 -> idiv FSLIT(".udiv") x y
1527 MO_U_Rem W32 -> idiv FSLIT(".urem") x y
1530 MO_F_Eq w -> condFltReg EQQ x y
1531 MO_F_Ne w -> condFltReg NE x y
1533 MO_F_Gt w -> condFltReg GTT x y
1534 MO_F_Ge w -> condFltReg GE x y
1535 MO_F_Lt w -> condFltReg LTT x y
1536 MO_F_Le w -> condFltReg LE x y
1538 MO_F_Add w -> trivialFCode w FADD x y
1539 MO_F_Sub w -> trivialFCode w FSUB x y
1540 MO_F_Mul w -> trivialFCode w FMUL x y
1541 MO_F_Quot w -> trivialFCode w FDIV x y
1543 MO_And rep -> trivialCode rep (AND False) x y
1544 MO_Or rep -> trivialCode rep (OR False) x y
1545 MO_Xor rep -> trivialCode rep (XOR False) x y
1547 MO_Mul rep -> trivialCode rep (SMUL False) x y
1549 MO_Shl rep -> trivialCode rep SLL x y
1550 MO_U_Shr rep -> trivialCode rep SRL x y
1551 MO_S_Shr rep -> trivialCode rep SRA x y
1554 MO_F32_Pwr -> getRegister (StCall (Left (fsLit "pow")) CCallConv FF64
1555 [promote x, promote y])
1556 where promote x = CmmMachOp MO_F32_to_Dbl [x]
1557 MO_F64_Pwr -> getRegister (StCall (Left (fsLit "pow")) CCallConv FF64
1560 other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
1562 --idiv fn x y = getRegister (StCall (Left fn) CCallConv II32 [x, y])
1564 --------------------
1565 imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
1566 imulMayOflo rep a b = do
1567 (a_reg, a_code) <- getSomeReg a
1568 (b_reg, b_code) <- getSomeReg b
1569 res_lo <- getNewRegNat II32
1570 res_hi <- getNewRegNat II32
1572 shift_amt = case rep of
1575 _ -> panic "shift_amt"
1576 code dst = a_code `appOL` b_code `appOL`
1578 SMUL False a_reg (RIReg b_reg) res_lo,
1580 SRA res_lo (RIImm (ImmInt shift_amt)) res_lo,
1581 SUB False False res_lo (RIReg res_hi) dst
1583 return (Any II32 code)
1585 getRegister (CmmLoad mem pk) = do
1586 Amode src code <- getAmode mem
1588 code__2 dst = code `snocOL` LD (cmmTypeSize pk) src dst
1589 return (Any (cmmTypeSize pk) code__2)
1591 getRegister (CmmLit (CmmInt i _))
1594 src = ImmInt (fromInteger i)
1595 code dst = unitOL (OR False g0 (RIImm src) dst)
1597 return (Any II32 code)
1599 getRegister (CmmLit lit)
1600 = let rep = cmmLitType lit
1604 OR False dst (RIImm (LO imm)) dst]
1605 in return (Any II32 code)
1607 #endif /* sparc_TARGET_ARCH */
1609 #if powerpc_TARGET_ARCH
1610 getRegister (CmmLoad mem pk)
1613 Amode addr addr_code <- getAmode mem
1614 let code dst = ASSERT((regClass dst == RcDouble) == isFloatType pk)
1615 addr_code `snocOL` LD size dst addr
1616 return (Any size code)
1617 where size = cmmTypeSize pk
1619 -- catch simple cases of zero- or sign-extended load
1620 getRegister (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad mem _]) = do
1621 Amode addr addr_code <- getAmode mem
1622 return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr))
1624 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
1626 getRegister (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad mem _]) = do
1627 Amode addr addr_code <- getAmode mem
1628 return (Any II32 (\dst -> addr_code `snocOL` LD II16 dst addr))
1630 getRegister (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad mem _]) = do
1631 Amode addr addr_code <- getAmode mem
1632 return (Any II32 (\dst -> addr_code `snocOL` LA II16 dst addr))
1634 getRegister (CmmMachOp mop [x]) -- unary MachOps
1636 MO_Not rep -> triv_ucode_int rep NOT
1638 MO_F_Neg w -> triv_ucode_float w FNEG
1639 MO_S_Neg w -> triv_ucode_int w NEG
1641 MO_FF_Conv W64 W32 -> trivialUCode FF32 FRSP x
1642 MO_FF_Conv W32 W64 -> conversionNop FF64 x
1644 MO_FS_Conv from to -> coerceFP2Int from to x
1645 MO_SF_Conv from to -> coerceInt2FP from to x
1648 | from == to -> conversionNop (intSize to) x
1650 -- narrowing is a nop: we treat the high bits as undefined
1651 MO_SS_Conv W32 to -> conversionNop (intSize to) x
1652 MO_SS_Conv W16 W8 -> conversionNop II8 x
1653 MO_SS_Conv W8 to -> triv_ucode_int to (EXTS II8)
1654 MO_SS_Conv W16 to -> triv_ucode_int to (EXTS II16)
1657 | from == to -> conversionNop (intSize to) x
1658 -- narrowing is a nop: we treat the high bits as undefined
1659 MO_UU_Conv W32 to -> conversionNop (intSize to) x
1660 MO_UU_Conv W16 W8 -> conversionNop II8 x
1661 MO_UU_Conv W8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 W32))
1662 MO_UU_Conv W16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 W32))
1665 triv_ucode_int width instr = trivialUCode (intSize width) instr x
1666 triv_ucode_float width instr = trivialUCode (floatSize width) instr x
1668 conversionNop new_size expr
1669 = do e_code <- getRegister expr
1670 return (swizzleRegisterRep e_code new_size)
1672 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1674 MO_F_Eq w -> condFltReg EQQ x y
1675 MO_F_Ne w -> condFltReg NE x y
1676 MO_F_Gt w -> condFltReg GTT x y
1677 MO_F_Ge w -> condFltReg GE x y
1678 MO_F_Lt w -> condFltReg LTT x y
1679 MO_F_Le w -> condFltReg LE x y
1681 MO_Eq rep -> condIntReg EQQ (extendUExpr rep x) (extendUExpr rep y)
1682 MO_Ne rep -> condIntReg NE (extendUExpr rep x) (extendUExpr rep y)
1684 MO_S_Gt rep -> condIntReg GTT (extendSExpr rep x) (extendSExpr rep y)
1685 MO_S_Ge rep -> condIntReg GE (extendSExpr rep x) (extendSExpr rep y)
1686 MO_S_Lt rep -> condIntReg LTT (extendSExpr rep x) (extendSExpr rep y)
1687 MO_S_Le rep -> condIntReg LE (extendSExpr rep x) (extendSExpr rep y)
1689 MO_U_Gt rep -> condIntReg GU (extendUExpr rep x) (extendUExpr rep y)
1690 MO_U_Ge rep -> condIntReg GEU (extendUExpr rep x) (extendUExpr rep y)
1691 MO_U_Lt rep -> condIntReg LU (extendUExpr rep x) (extendUExpr rep y)
1692 MO_U_Le rep -> condIntReg LEU (extendUExpr rep x) (extendUExpr rep y)
1694 MO_F_Add w -> triv_float w FADD
1695 MO_F_Sub w -> triv_float w FSUB
1696 MO_F_Mul w -> triv_float w FMUL
1697 MO_F_Quot w -> triv_float w FDIV
1699 -- optimize addition with 32-bit immediate
1703 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate W32 True (-imm)
1704 -> trivialCode W32 True ADD x (CmmLit $ CmmInt imm immrep)
1707 (src, srcCode) <- getSomeReg x
1708 let imm = litToImm lit
1709 code dst = srcCode `appOL` toOL [
1710 ADDIS dst src (HA imm),
1711 ADD dst dst (RIImm (LO imm))
1713 return (Any II32 code)
1714 _ -> trivialCode W32 True ADD x y
1716 MO_Add rep -> trivialCode rep True ADD x y
1718 case y of -- subfi ('substract from' with immediate) doesn't exist
1719 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
1720 -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
1721 _ -> trivialCodeNoImm' (intSize rep) SUBF y x
1723 MO_Mul rep -> trivialCode rep True MULLW x y
1725 MO_S_MulMayOflo W32 -> trivialCodeNoImm' II32 MULLW_MayOflo x y
1727 MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= II32): not implemented"
1728 MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
1730 MO_S_Quot rep -> trivialCodeNoImm' (intSize rep) DIVW (extendSExpr rep x) (extendSExpr rep y)
1731 MO_U_Quot rep -> trivialCodeNoImm' (intSize rep) DIVWU (extendUExpr rep x) (extendUExpr rep y)
1733 MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1734 MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1736 MO_And rep -> trivialCode rep False AND x y
1737 MO_Or rep -> trivialCode rep False OR x y
1738 MO_Xor rep -> trivialCode rep False XOR x y
1740 MO_Shl rep -> trivialCode rep False SLW x y
1741 MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
1742 MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
1744 triv_float :: Width -> (Size -> Reg -> Reg -> Reg -> Instr) -> NatM Register
1745 triv_float width instr = trivialCodeNoImm (floatSize width) instr x y
1747 getRegister (CmmLit (CmmInt i rep))
1748 | Just imm <- makeImmediate rep True i
1750 code dst = unitOL (LI dst imm)
1752 return (Any (intSize rep) code)
1754 getRegister (CmmLit (CmmFloat f frep)) = do
1755 lbl <- getNewLabelNat
1756 dflags <- getDynFlagsNat
1757 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
1758 Amode addr addr_code <- getAmode dynRef
1759 let size = floatSize frep
1761 LDATA ReadOnlyData [CmmDataLabel lbl,
1762 CmmStaticLit (CmmFloat f frep)]
1763 `consOL` (addr_code `snocOL` LD size dst addr)
1764 return (Any size code)
1766 getRegister (CmmLit lit)
1767 = let rep = cmmLitType lit
1771 ADD dst dst (RIImm (LO imm))
1773 in return (Any (cmmTypeSize rep) code)
1775 getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
1777 -- extend?Rep: wrap integer expression of type rep
1778 -- in a conversion to II32
1779 extendSExpr W32 x = x
1780 extendSExpr rep x = CmmMachOp (MO_SS_Conv rep W32) [x]
1781 extendUExpr W32 x = x
1782 extendUExpr rep x = CmmMachOp (MO_UU_Conv rep W32) [x]
1784 #endif /* powerpc_TARGET_ARCH */
1787 -- -----------------------------------------------------------------------------
1788 -- The 'Amode' type: Memory addressing modes passed up the tree.
1790 data Amode = Amode AddrMode InstrBlock
1793 Now, given a tree (the argument to an CmmLoad) that references memory,
1794 produce a suitable addressing mode.
1796 A Rule of the Game (tm) for Amodes: use of the addr bit must
1797 immediately follow use of the code part, since the code part puts
1798 values in registers which the addr then refers to. So you can't put
1799 anything in between, lest it overwrite some of those registers. If
1800 you need to do some other computation between the code part and use of
1801 the addr bit, first store the effective address from the amode in a
1802 temporary, then do the other computation, and then use the temporary:
1806 ... other computation ...
1810 getAmode :: CmmExpr -> NatM Amode
1811 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
1813 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1815 #if alpha_TARGET_ARCH
1817 getAmode (StPrim IntSubOp [x, StInt i])
1818 = getNewRegNat PtrRep `thenNat` \ tmp ->
1819 getRegister x `thenNat` \ register ->
1821 code = registerCode register tmp
1822 reg = registerName register tmp
1823 off = ImmInt (-(fromInteger i))
1825 return (Amode (AddrRegImm reg off) code)
1827 getAmode (StPrim IntAddOp [x, StInt i])
1828 = getNewRegNat PtrRep `thenNat` \ tmp ->
1829 getRegister x `thenNat` \ register ->
1831 code = registerCode register tmp
1832 reg = registerName register tmp
1833 off = ImmInt (fromInteger i)
1835 return (Amode (AddrRegImm reg off) code)
1839 = return (Amode (AddrImm imm__2) id)
1842 imm__2 = case imm of Just x -> x
1845 = getNewRegNat PtrRep `thenNat` \ tmp ->
1846 getRegister other `thenNat` \ register ->
1848 code = registerCode register tmp
1849 reg = registerName register tmp
1851 return (Amode (AddrReg reg) code)
1853 #endif /* alpha_TARGET_ARCH */
1855 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1857 #if x86_64_TARGET_ARCH
1859 getAmode (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
1860 CmmLit displacement])
1861 = return $ Amode (ripRel (litToImm displacement)) nilOL
1865 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1867 -- This is all just ridiculous, since it carefully undoes
1868 -- what mangleIndexTree has just done.
1869 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
1871 -- ASSERT(rep == II32)???
1872 = do (x_reg, x_code) <- getSomeReg x
1873 let off = ImmInt (-(fromInteger i))
1874 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1876 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
1878 -- ASSERT(rep == II32)???
1879 = do (x_reg, x_code) <- getSomeReg x
1880 let off = ImmInt (fromInteger i)
1881 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1883 -- Turn (lit1 << n + lit2) into (lit2 + lit1 << n) so it will be
1884 -- recognised by the next rule.
1885 getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1887 = getAmode (CmmMachOp (MO_Add rep) [b,a])
1889 getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
1890 [y, CmmLit (CmmInt shift _)]])
1891 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1892 = x86_complex_amode x y shift 0
1894 getAmode (CmmMachOp (MO_Add rep)
1895 [x, CmmMachOp (MO_Add _)
1896 [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)],
1897 CmmLit (CmmInt offset _)]])
1898 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1899 && is32BitInteger offset
1900 = x86_complex_amode x y shift offset
1902 getAmode (CmmMachOp (MO_Add rep) [x,y])
1903 = x86_complex_amode x y 0 0
1905 getAmode (CmmLit lit) | is32BitLit lit
1906 = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1909 (reg,code) <- getSomeReg expr
1910 return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
1913 x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
1914 x86_complex_amode base index shift offset
1915 = do (x_reg, x_code) <- getNonClobberedReg base
1916 -- x must be in a temp, because it has to stay live over y_code
1917 -- we could compre x_reg and y_reg and do something better here...
1918 (y_reg, y_code) <- getSomeReg index
1920 code = x_code `appOL` y_code
1921 base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
1922 return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset)))
1925 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1927 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1929 #if sparc_TARGET_ARCH
1931 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)])
1934 (reg, code) <- getSomeReg x
1936 off = ImmInt (-(fromInteger i))
1937 return (Amode (AddrRegImm reg off) code)
1940 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)])
1943 (reg, code) <- getSomeReg x
1945 off = ImmInt (fromInteger i)
1946 return (Amode (AddrRegImm reg off) code)
1948 getAmode (CmmMachOp (MO_Add rep) [x, y])
1950 (regX, codeX) <- getSomeReg x
1951 (regY, codeY) <- getSomeReg y
1953 code = codeX `appOL` codeY
1954 return (Amode (AddrRegReg regX regY) code)
1956 -- XXX Is this same as "leaf" in Stix?
1957 getAmode (CmmLit lit)
1959 tmp <- getNewRegNat II32
1961 code = unitOL (SETHI (HI imm__2) tmp)
1962 return (Amode (AddrRegImm tmp (LO imm__2)) code)
1964 imm__2 = litToImm lit
1968 (reg, code) <- getSomeReg other
1971 return (Amode (AddrRegImm reg off) code)
1973 #endif /* sparc_TARGET_ARCH */
1975 #ifdef powerpc_TARGET_ARCH
1976 getAmode (CmmMachOp (MO_Sub W32) [x, CmmLit (CmmInt i _)])
1977 | Just off <- makeImmediate W32 True (-i)
1979 (reg, code) <- getSomeReg x
1980 return (Amode (AddrRegImm reg off) code)
1983 getAmode (CmmMachOp (MO_Add W32) [x, CmmLit (CmmInt i _)])
1984 | Just off <- makeImmediate W32 True i
1986 (reg, code) <- getSomeReg x
1987 return (Amode (AddrRegImm reg off) code)
1989 -- optimize addition with 32-bit immediate
1991 getAmode (CmmMachOp (MO_Add W32) [x, CmmLit lit])
1993 tmp <- getNewRegNat II32
1994 (src, srcCode) <- getSomeReg x
1995 let imm = litToImm lit
1996 code = srcCode `snocOL` ADDIS tmp src (HA imm)
1997 return (Amode (AddrRegImm tmp (LO imm)) code)
1999 getAmode (CmmLit lit)
2001 tmp <- getNewRegNat II32
2002 let imm = litToImm lit
2003 code = unitOL (LIS tmp (HA imm))
2004 return (Amode (AddrRegImm tmp (LO imm)) code)
2006 getAmode (CmmMachOp (MO_Add W32) [x, y])
2008 (regX, codeX) <- getSomeReg x
2009 (regY, codeY) <- getSomeReg y
2010 return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
2014 (reg, code) <- getSomeReg other
2017 return (Amode (AddrRegImm reg off) code)
2018 #endif /* powerpc_TARGET_ARCH */
2020 -- -----------------------------------------------------------------------------
2021 -- getOperand: sometimes any operand will do.
2023 -- getNonClobberedOperand: the value of the operand will remain valid across
2024 -- the computation of an arbitrary expression, unless the expression
2025 -- is computed directly into a register which the operand refers to
2026 -- (see trivialCode where this function is used for an example).
2028 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2030 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2031 #if x86_64_TARGET_ARCH
2032 getNonClobberedOperand (CmmLit lit)
2033 | isSuitableFloatingPointLit lit = do
2034 lbl <- getNewLabelNat
2035 let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
2037 return (OpAddr (ripRel (ImmCLbl lbl)), code)
2039 getNonClobberedOperand (CmmLit lit)
2040 | is32BitLit lit && not (isFloatType (cmmLitType lit)) =
2041 return (OpImm (litToImm lit), nilOL)
2042 getNonClobberedOperand (CmmLoad mem pk)
2043 | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
2044 Amode src mem_code <- getAmode mem
2046 if (amodeCouldBeClobbered src)
2048 tmp <- getNewRegNat wordSize
2049 return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
2050 unitOL (LEA II32 (OpAddr src) (OpReg tmp)))
2053 return (OpAddr src', save_code `appOL` mem_code)
2054 getNonClobberedOperand e = do
2055 (reg, code) <- getNonClobberedReg e
2056 return (OpReg reg, code)
2058 amodeCouldBeClobbered :: AddrMode -> Bool
2059 amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
2061 regClobbered (RealReg rr) = isFastTrue (freeReg rr)
2062 regClobbered _ = False
2064 -- getOperand: the operand is not required to remain valid across the
2065 -- computation of an arbitrary expression.
2066 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2067 #if x86_64_TARGET_ARCH
2068 getOperand (CmmLit lit)
2069 | isSuitableFloatingPointLit lit = do
2070 lbl <- getNewLabelNat
2071 let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
2073 return (OpAddr (ripRel (ImmCLbl lbl)), code)
2075 getOperand (CmmLit lit)
2076 | is32BitLit lit && not (isFloatType (cmmLitType lit)) = do
2077 return (OpImm (litToImm lit), nilOL)
2078 getOperand (CmmLoad mem pk)
2079 | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
2080 Amode src mem_code <- getAmode mem
2081 return (OpAddr src, mem_code)
2083 (reg, code) <- getSomeReg e
2084 return (OpReg reg, code)
2086 isOperand :: CmmExpr -> Bool
2087 isOperand (CmmLoad _ _) = True
2088 isOperand (CmmLit lit) = is32BitLit lit
2089 || isSuitableFloatingPointLit lit
2092 -- if we want a floating-point literal as an operand, we can
2093 -- use it directly from memory. However, if the literal is
2094 -- zero, we're better off generating it into a register using
2096 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
2097 isSuitableFloatingPointLit _ = False
2099 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
2100 getRegOrMem (CmmLoad mem pk)
2101 | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
2102 Amode src mem_code <- getAmode mem
2103 return (OpAddr src, mem_code)
2105 (reg, code) <- getNonClobberedReg e
2106 return (OpReg reg, code)
2108 #if x86_64_TARGET_ARCH
2109 is32BitLit (CmmInt i W64) = is32BitInteger i
2110 -- assume that labels are in the range 0-2^31-1: this assumes the
2111 -- small memory model (see gcc docs, -mcmodel=small).
2116 is32BitInteger :: Integer -> Bool
2117 is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000
2118 where i64 = fromIntegral i :: Int64
2119 -- a CmmInt is intended to be truncated to the appropriate
2120 -- number of bits, so here we truncate it to Int64. This is
2121 -- important because e.g. -1 as a CmmInt might be either
2122 -- -1 or 18446744073709551615.
2124 -- -----------------------------------------------------------------------------
2125 -- The 'CondCode' type: Condition codes passed up the tree.
2127 data CondCode = CondCode Bool Cond InstrBlock
2129 -- Set up a condition code for a conditional branch.
2131 getCondCode :: CmmExpr -> NatM CondCode
2133 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2135 #if alpha_TARGET_ARCH
2136 getCondCode = panic "MachCode.getCondCode: not on Alphas"
2137 #endif /* alpha_TARGET_ARCH */
2139 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2141 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
2142 -- yes, they really do seem to want exactly the same!
2144 getCondCode (CmmMachOp mop [x, y])
2147 MO_F_Eq W32 -> condFltCode EQQ x y
2148 MO_F_Ne W32 -> condFltCode NE x y
2149 MO_F_Gt W32 -> condFltCode GTT x y
2150 MO_F_Ge W32 -> condFltCode GE x y
2151 MO_F_Lt W32 -> condFltCode LTT x y
2152 MO_F_Le W32 -> condFltCode LE x y
2154 MO_F_Eq W64 -> condFltCode EQQ x y
2155 MO_F_Ne W64 -> condFltCode NE x y
2156 MO_F_Gt W64 -> condFltCode GTT x y
2157 MO_F_Ge W64 -> condFltCode GE x y
2158 MO_F_Lt W64 -> condFltCode LTT x y
2159 MO_F_Le W64 -> condFltCode LE x y
2161 MO_Eq rep -> condIntCode EQQ x y
2162 MO_Ne rep -> condIntCode NE x y
2164 MO_S_Gt rep -> condIntCode GTT x y
2165 MO_S_Ge rep -> condIntCode GE x y
2166 MO_S_Lt rep -> condIntCode LTT x y
2167 MO_S_Le rep -> condIntCode LE x y
2169 MO_U_Gt rep -> condIntCode GU x y
2170 MO_U_Ge rep -> condIntCode GEU x y
2171 MO_U_Lt rep -> condIntCode LU x y
2172 MO_U_Le rep -> condIntCode LEU x y
2174 other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y]))
2176 getCondCode other = pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
2178 #elif powerpc_TARGET_ARCH
2180 -- almost the same as everywhere else - but we need to
2181 -- extend small integers to 32 bit first
2183 getCondCode (CmmMachOp mop [x, y])
2185 MO_F_Eq W32 -> condFltCode EQQ x y
2186 MO_F_Ne W32 -> condFltCode NE x y
2187 MO_F_Gt W32 -> condFltCode GTT x y
2188 MO_F_Ge W32 -> condFltCode GE x y
2189 MO_F_Lt W32 -> condFltCode LTT x y
2190 MO_F_Le W32 -> condFltCode LE x y
2192 MO_F_Eq W64 -> condFltCode EQQ x y
2193 MO_F_Ne W64 -> condFltCode NE x y
2194 MO_F_Gt W64 -> condFltCode GTT x y
2195 MO_F_Ge W64 -> condFltCode GE x y
2196 MO_F_Lt W64 -> condFltCode LTT x y
2197 MO_F_Le W64 -> condFltCode LE x y
2199 MO_Eq rep -> condIntCode EQQ (extendUExpr rep x) (extendUExpr rep y)
2200 MO_Ne rep -> condIntCode NE (extendUExpr rep x) (extendUExpr rep y)
2202 MO_S_Gt rep -> condIntCode GTT (extendSExpr rep x) (extendSExpr rep y)
2203 MO_S_Ge rep -> condIntCode GE (extendSExpr rep x) (extendSExpr rep y)
2204 MO_S_Lt rep -> condIntCode LTT (extendSExpr rep x) (extendSExpr rep y)
2205 MO_S_Le rep -> condIntCode LE (extendSExpr rep x) (extendSExpr rep y)
2207 MO_U_Gt rep -> condIntCode GU (extendUExpr rep x) (extendUExpr rep y)
2208 MO_U_Ge rep -> condIntCode GEU (extendUExpr rep x) (extendUExpr rep y)
2209 MO_U_Lt rep -> condIntCode LU (extendUExpr rep x) (extendUExpr rep y)
2210 MO_U_Le rep -> condIntCode LEU (extendUExpr rep x) (extendUExpr rep y)
2212 other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
2214 getCondCode other = panic "getCondCode(2)(powerpc)"
2220 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
2221 -- passed back up the tree.
2223 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
2225 #if alpha_TARGET_ARCH
2226 condIntCode = panic "MachCode.condIntCode: not on Alphas"
2227 condFltCode = panic "MachCode.condFltCode: not on Alphas"
2228 #endif /* alpha_TARGET_ARCH */
2230 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2231 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2233 -- memory vs immediate
2234 condIntCode cond (CmmLoad x pk) (CmmLit lit) | is32BitLit lit = do
2235 Amode x_addr x_code <- getAmode x
2238 code = x_code `snocOL`
2239 CMP (cmmTypeSize pk) (OpImm imm) (OpAddr x_addr)
2241 return (CondCode False cond code)
2243 -- anything vs zero, using a mask
2244 -- TODO: Add some sanity checking!!!!
2245 condIntCode cond (CmmMachOp (MO_And rep) [x,o2]) (CmmLit (CmmInt 0 pk))
2246 | (CmmLit lit@(CmmInt mask pk2)) <- o2, is32BitLit lit
2248 (x_reg, x_code) <- getSomeReg x
2250 code = x_code `snocOL`
2251 TEST (intSize pk) (OpImm (ImmInteger mask)) (OpReg x_reg)
2253 return (CondCode False cond code)
2256 condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
2257 (x_reg, x_code) <- getSomeReg x
2259 code = x_code `snocOL`
2260 TEST (intSize pk) (OpReg x_reg) (OpReg x_reg)
2262 return (CondCode False cond code)
2264 -- anything vs operand
2265 condIntCode cond x y | isOperand y = do
2266 (x_reg, x_code) <- getNonClobberedReg x
2267 (y_op, y_code) <- getOperand y
2269 code = x_code `appOL` y_code `snocOL`
2270 CMP (cmmTypeSize (cmmExprType x)) y_op (OpReg x_reg)
2272 return (CondCode False cond code)
2274 -- anything vs anything
2275 condIntCode cond x y = do
2276 (y_reg, y_code) <- getNonClobberedReg y
2277 (x_op, x_code) <- getRegOrMem x
2279 code = y_code `appOL`
2281 CMP (cmmTypeSize (cmmExprType x)) (OpReg y_reg) x_op
2283 return (CondCode False cond code)
2286 #if i386_TARGET_ARCH
2287 condFltCode cond x y
2288 = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
2289 (x_reg, x_code) <- getNonClobberedReg x
2290 (y_reg, y_code) <- getSomeReg y
2292 code = x_code `appOL` y_code `snocOL`
2293 GCMP cond x_reg y_reg
2294 -- The GCMP insn does the test and sets the zero flag if comparable
2295 -- and true. Hence we always supply EQQ as the condition to test.
2296 return (CondCode True EQQ code)
2297 #endif /* i386_TARGET_ARCH */
2299 #if x86_64_TARGET_ARCH
2300 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
2301 -- an operand, but the right must be a reg. We can probably do better
2302 -- than this general case...
2303 condFltCode cond x y = do
2304 (x_reg, x_code) <- getNonClobberedReg x
2305 (y_op, y_code) <- getOperand y
2307 code = x_code `appOL`
2309 CMP (floatSize $ cmmExprWidth x) y_op (OpReg x_reg)
2310 -- NB(1): we need to use the unsigned comparison operators on the
2311 -- result of this comparison.
2313 return (CondCode True (condToUnsigned cond) code)
2316 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2318 #if sparc_TARGET_ARCH
2320 condIntCode cond x (CmmLit (CmmInt y rep))
2323 (src1, code) <- getSomeReg x
2325 src2 = ImmInt (fromInteger y)
2326 code' = code `snocOL` SUB False True src1 (RIImm src2) g0
2327 return (CondCode False cond code')
2329 condIntCode cond x y = do
2330 (src1, code1) <- getSomeReg x
2331 (src2, code2) <- getSomeReg y
2333 code__2 = code1 `appOL` code2 `snocOL`
2334 SUB False True src1 (RIReg src2) g0
2335 return (CondCode False cond code__2)
2338 condFltCode cond x y = do
2339 (src1, code1) <- getSomeReg x
2340 (src2, code2) <- getSomeReg y
2341 tmp <- getNewRegNat FF64
2343 promote x = FxTOy FF32 FF64 x tmp
2349 if pk1 `cmmEqType` pk2 then
2350 code1 `appOL` code2 `snocOL`
2351 FCMP True (cmmTypeSize pk1) src1 src2
2352 else if typeWidth pk1 == W32 then
2353 code1 `snocOL` promote src1 `appOL` code2 `snocOL`
2354 FCMP True FF64 tmp src2
2356 code1 `appOL` code2 `snocOL` promote src2 `snocOL`
2357 FCMP True FF64 src1 tmp
2358 return (CondCode True cond code__2)
2360 #endif /* sparc_TARGET_ARCH */
2362 #if powerpc_TARGET_ARCH
2363 -- ###FIXME: I16 and I8!
2364 condIntCode cond x (CmmLit (CmmInt y rep))
2365 | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
2367 (src1, code) <- getSomeReg x
2369 code' = code `snocOL`
2370 (if condUnsigned cond then CMPL else CMP) II32 src1 (RIImm src2)
2371 return (CondCode False cond code')
2373 condIntCode cond x y = do
2374 (src1, code1) <- getSomeReg x
2375 (src2, code2) <- getSomeReg y
2377 code' = code1 `appOL` code2 `snocOL`
2378 (if condUnsigned cond then CMPL else CMP) II32 src1 (RIReg src2)
2379 return (CondCode False cond code')
2381 condFltCode cond x y = do
2382 (src1, code1) <- getSomeReg x
2383 (src2, code2) <- getSomeReg y
2385 code' = code1 `appOL` code2 `snocOL` FCMP src1 src2
2386 code'' = case cond of -- twiddle CR to handle unordered case
2387 GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
2388 LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
2391 ltbit = 0 ; eqbit = 2 ; gtbit = 1
2392 return (CondCode True cond code'')
2394 #endif /* powerpc_TARGET_ARCH */
2396 -- -----------------------------------------------------------------------------
2397 -- Generating assignments
2399 -- Assignments are really at the heart of the whole code generation
2400 -- business. Almost all top-level nodes of any real importance are
2401 -- assignments, which correspond to loads, stores, or register
2402 -- transfers. If we're really lucky, some of the register transfers
2403 -- will go away, because we can use the destination register to
2404 -- complete the code generation for the right hand side. This only
2405 -- fails when the right hand side is forced into a fixed register
2406 -- (e.g. the result of a call).
2408 assignMem_IntCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
2409 assignReg_IntCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock
2411 assignMem_FltCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
2412 assignReg_FltCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock
2414 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2416 #if alpha_TARGET_ARCH
2418 assignIntCode pk (CmmLoad dst _) src
2419 = getNewRegNat IntRep `thenNat` \ tmp ->
2420 getAmode dst `thenNat` \ amode ->
2421 getRegister src `thenNat` \ register ->
2423 code1 = amodeCode amode []
2424 dst__2 = amodeAddr amode
2425 code2 = registerCode register tmp []
2426 src__2 = registerName register tmp
2427 sz = primRepToSize pk
2428 code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2432 assignIntCode pk dst src
2433 = getRegister dst `thenNat` \ register1 ->
2434 getRegister src `thenNat` \ register2 ->
2436 dst__2 = registerName register1 zeroh
2437 code = registerCode register2 dst__2
2438 src__2 = registerName register2 dst__2
2439 code__2 = if isFixed register2
2440 then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2)
2445 #endif /* alpha_TARGET_ARCH */
2447 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2449 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2451 -- integer assignment to memory
2453 -- specific case of adding/subtracting an integer to a particular address.
2454 -- ToDo: catch other cases where we can use an operation directly on a memory
2456 assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _,
2457 CmmLit (CmmInt i _)])
2458 | addr == addr2, pk /= II64 || is32BitInteger i,
2459 Just instr <- check op
2460 = do Amode amode code_addr <- getAmode addr
2461 let code = code_addr `snocOL`
2462 instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode)
2465 check (MO_Add _) = Just ADD
2466 check (MO_Sub _) = Just SUB
2471 assignMem_IntCode pk addr src = do
2472 Amode addr code_addr <- getAmode addr
2473 (code_src, op_src) <- get_op_RI src
2475 code = code_src `appOL`
2477 MOV pk op_src (OpAddr addr)
2478 -- NOTE: op_src is stable, so it will still be valid
2479 -- after code_addr. This may involve the introduction
2480 -- of an extra MOV to a temporary register, but we hope
2481 -- the register allocator will get rid of it.
2485 get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand) -- code, operator
2486 get_op_RI (CmmLit lit) | is32BitLit lit
2487 = return (nilOL, OpImm (litToImm lit))
2489 = do (reg,code) <- getNonClobberedReg op
2490 return (code, OpReg reg)
2493 -- Assign; dst is a reg, rhs is mem
2494 assignReg_IntCode pk reg (CmmLoad src _) = do
2495 load_code <- intLoadCode (MOV pk) src
2496 return (load_code (getRegisterReg reg))
2498 -- dst is a reg, but src could be anything
2499 assignReg_IntCode pk reg src = do
2500 code <- getAnyReg src
2501 return (code (getRegisterReg reg))
2503 #endif /* i386_TARGET_ARCH */
2505 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2507 #if sparc_TARGET_ARCH
2509 assignMem_IntCode pk addr src = do
2510 (srcReg, code) <- getSomeReg src
2511 Amode dstAddr addr_code <- getAmode addr
2512 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2514 assignReg_IntCode pk reg src = do
2515 r <- getRegister src
2517 Any _ code -> code dst
2518 Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg freg) dst
2520 dst = getRegisterReg reg
2523 #endif /* sparc_TARGET_ARCH */
2525 #if powerpc_TARGET_ARCH
2527 assignMem_IntCode pk addr src = do
2528 (srcReg, code) <- getSomeReg src
2529 Amode dstAddr addr_code <- getAmode addr
2530 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2532 -- dst is a reg, but src could be anything
2533 assignReg_IntCode pk reg src
2535 r <- getRegister src
2537 Any _ code -> code dst
2538 Fixed _ freg fcode -> fcode `snocOL` MR dst freg
2540 dst = getRegisterReg reg
2542 #endif /* powerpc_TARGET_ARCH */
2545 -- -----------------------------------------------------------------------------
2546 -- Floating-point assignments
2548 #if alpha_TARGET_ARCH
2550 assignFltCode pk (CmmLoad dst _) src
2551 = getNewRegNat pk `thenNat` \ tmp ->
2552 getAmode dst `thenNat` \ amode ->
2553 getRegister src `thenNat` \ register ->
2555 code1 = amodeCode amode []
2556 dst__2 = amodeAddr amode
2557 code2 = registerCode register tmp []
2558 src__2 = registerName register tmp
2559 sz = primRepToSize pk
2560 code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2564 assignFltCode pk dst src
2565 = getRegister dst `thenNat` \ register1 ->
2566 getRegister src `thenNat` \ register2 ->
2568 dst__2 = registerName register1 zeroh
2569 code = registerCode register2 dst__2
2570 src__2 = registerName register2 dst__2
2571 code__2 = if isFixed register2
2572 then code . mkSeqInstr (FMOV src__2 dst__2)
2577 #endif /* alpha_TARGET_ARCH */
2579 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2581 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2583 -- Floating point assignment to memory
2584 assignMem_FltCode pk addr src = do
2585 (src_reg, src_code) <- getNonClobberedReg src
2586 Amode addr addr_code <- getAmode addr
2588 code = src_code `appOL`
2590 IF_ARCH_i386(GST pk src_reg addr,
2591 MOV pk (OpReg src_reg) (OpAddr addr))
2594 -- Floating point assignment to a register/temporary
2595 assignReg_FltCode pk reg src = do
2596 src_code <- getAnyReg src
2597 return (src_code (getRegisterReg reg))
2599 #endif /* i386_TARGET_ARCH */
2601 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2603 #if sparc_TARGET_ARCH
2605 -- Floating point assignment to memory
2606 assignMem_FltCode pk addr src = do
2607 Amode dst__2 code1 <- getAmode addr
2608 (src__2, code2) <- getSomeReg src
2609 tmp1 <- getNewRegNat pk
2611 pk__2 = cmmExprType src
2612 code__2 = code1 `appOL` code2 `appOL`
2613 if sizeToWidth pk == typeWidth pk__2
2614 then unitOL (ST pk src__2 dst__2)
2615 else toOL [ FxTOy (cmmTypeSize pk__2) pk src__2 tmp1
2616 , ST pk tmp1 dst__2]
2619 -- Floating point assignment to a register/temporary
2620 assignReg_FltCode pk dstCmmReg srcCmmExpr = do
2621 srcRegister <- getRegister srcCmmExpr
2622 let dstReg = getRegisterReg dstCmmReg
2624 return $ case srcRegister of
2625 Any _ code -> code dstReg
2626 Fixed _ srcFixedReg srcCode -> srcCode `snocOL` FMOV pk srcFixedReg dstReg
2628 #endif /* sparc_TARGET_ARCH */
2630 #if powerpc_TARGET_ARCH
2633 assignMem_FltCode = assignMem_IntCode
2634 assignReg_FltCode = assignReg_IntCode
2636 #endif /* powerpc_TARGET_ARCH */
2639 -- -----------------------------------------------------------------------------
2640 -- Generating an non-local jump
2642 -- (If applicable) Do not fill the delay slots here; you will confuse the
2643 -- register allocator.
2645 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
2647 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2649 #if alpha_TARGET_ARCH
2651 genJump (CmmLabel lbl)
2652 | isAsmTemp lbl = returnInstr (BR target)
2653 | otherwise = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0]
2655 target = ImmCLbl lbl
2658 = getRegister tree `thenNat` \ register ->
2659 getNewRegNat PtrRep `thenNat` \ tmp ->
2661 dst = registerName register pv
2662 code = registerCode register pv
2663 target = registerName register pv
2665 if isFixed register then
2666 returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0]
2668 return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0))
2670 #endif /* alpha_TARGET_ARCH */
2672 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2674 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2676 genJump (CmmLoad mem pk) = do
2677 Amode target code <- getAmode mem
2678 return (code `snocOL` JMP (OpAddr target))
2680 genJump (CmmLit lit) = do
2681 return (unitOL (JMP (OpImm (litToImm lit))))
2684 (reg,code) <- getSomeReg expr
2685 return (code `snocOL` JMP (OpReg reg))
2687 #endif /* i386_TARGET_ARCH */
2689 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2691 #if sparc_TARGET_ARCH
2693 genJump (CmmLit (CmmLabel lbl))
2694 = return (toOL [CALL (Left target) 0 True, NOP])
2696 target = ImmCLbl lbl
2700 (target, code) <- getSomeReg tree
2701 return (code `snocOL` JMP (AddrRegReg target g0) `snocOL` NOP)
2703 #endif /* sparc_TARGET_ARCH */
2705 #if powerpc_TARGET_ARCH
2706 genJump (CmmLit (CmmLabel lbl))
2707 = return (unitOL $ JMP lbl)
2711 (target,code) <- getSomeReg tree
2712 return (code `snocOL` MTCTR target `snocOL` BCTR [])
2713 #endif /* powerpc_TARGET_ARCH */
2716 -- -----------------------------------------------------------------------------
2717 -- Unconditional branches
2719 genBranch :: BlockId -> NatM InstrBlock
2721 genBranch = return . toOL . mkBranchInstr
2723 -- -----------------------------------------------------------------------------
2724 -- Conditional jumps
2727 Conditional jumps are always to local labels, so we can use branch
2728 instructions. We peek at the arguments to decide what kind of
2731 ALPHA: For comparisons with 0, we're laughing, because we can just do
2732 the desired conditional branch.
2734 I386: First, we have to ensure that the condition
2735 codes are set according to the supplied comparison operation.
2737 SPARC: First, we have to ensure that the condition codes are set
2738 according to the supplied comparison operation. We generate slightly
2739 different code for floating point comparisons, because a floating
2740 point operation cannot directly precede a @BF@. We assume the worst
2741 and fill that slot with a @NOP@.
2743 SPARC: Do not fill the delay slots here; you will confuse the register
2749 :: BlockId -- the branch target
2750 -> CmmExpr -- the condition on which to branch
2753 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2755 #if alpha_TARGET_ARCH
2757 genCondJump id (StPrim op [x, StInt 0])
2758 = getRegister x `thenNat` \ register ->
2759 getNewRegNat (registerRep register)
2762 code = registerCode register tmp
2763 value = registerName register tmp
2764 pk = registerRep register
2765 target = ImmCLbl lbl
2767 returnSeq code [BI (cmpOp op) value target]
2769 cmpOp CharGtOp = GTT
2771 cmpOp CharEqOp = EQQ
2773 cmpOp CharLtOp = LTT
2782 cmpOp WordGeOp = ALWAYS
2783 cmpOp WordEqOp = EQQ
2785 cmpOp WordLtOp = NEVER
2786 cmpOp WordLeOp = EQQ
2788 cmpOp AddrGeOp = ALWAYS
2789 cmpOp AddrEqOp = EQQ
2791 cmpOp AddrLtOp = NEVER
2792 cmpOp AddrLeOp = EQQ
2794 genCondJump lbl (StPrim op [x, StDouble 0.0])
2795 = getRegister x `thenNat` \ register ->
2796 getNewRegNat (registerRep register)
2799 code = registerCode register tmp
2800 value = registerName register tmp
2801 pk = registerRep register
2802 target = ImmCLbl lbl
2804 return (code . mkSeqInstr (BF (cmpOp op) value target))
2806 cmpOp FloatGtOp = GTT
2807 cmpOp FloatGeOp = GE
2808 cmpOp FloatEqOp = EQQ
2809 cmpOp FloatNeOp = NE
2810 cmpOp FloatLtOp = LTT
2811 cmpOp FloatLeOp = LE
2812 cmpOp DoubleGtOp = GTT
2813 cmpOp DoubleGeOp = GE
2814 cmpOp DoubleEqOp = EQQ
2815 cmpOp DoubleNeOp = NE
2816 cmpOp DoubleLtOp = LTT
2817 cmpOp DoubleLeOp = LE
2819 genCondJump lbl (StPrim op [x, y])
2821 = trivialFCode pr instr x y `thenNat` \ register ->
2822 getNewRegNat FF64 `thenNat` \ tmp ->
2824 code = registerCode register tmp
2825 result = registerName register tmp
2826 target = ImmCLbl lbl
2828 return (code . mkSeqInstr (BF cond result target))
2830 pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
2832 fltCmpOp op = case op of
2846 (instr, cond) = case op of
2847 FloatGtOp -> (FCMP TF LE, EQQ)
2848 FloatGeOp -> (FCMP TF LTT, EQQ)
2849 FloatEqOp -> (FCMP TF EQQ, NE)
2850 FloatNeOp -> (FCMP TF EQQ, EQQ)
2851 FloatLtOp -> (FCMP TF LTT, NE)
2852 FloatLeOp -> (FCMP TF LE, NE)
2853 DoubleGtOp -> (FCMP TF LE, EQQ)
2854 DoubleGeOp -> (FCMP TF LTT, EQQ)
2855 DoubleEqOp -> (FCMP TF EQQ, NE)
2856 DoubleNeOp -> (FCMP TF EQQ, EQQ)
2857 DoubleLtOp -> (FCMP TF LTT, NE)
2858 DoubleLeOp -> (FCMP TF LE, NE)
2860 genCondJump lbl (StPrim op [x, y])
2861 = trivialCode instr x y `thenNat` \ register ->
2862 getNewRegNat IntRep `thenNat` \ tmp ->
2864 code = registerCode register tmp
2865 result = registerName register tmp
2866 target = ImmCLbl lbl
2868 return (code . mkSeqInstr (BI cond result target))
2870 (instr, cond) = case op of
2871 CharGtOp -> (CMP LE, EQQ)
2872 CharGeOp -> (CMP LTT, EQQ)
2873 CharEqOp -> (CMP EQQ, NE)
2874 CharNeOp -> (CMP EQQ, EQQ)
2875 CharLtOp -> (CMP LTT, NE)
2876 CharLeOp -> (CMP LE, NE)
2877 IntGtOp -> (CMP LE, EQQ)
2878 IntGeOp -> (CMP LTT, EQQ)
2879 IntEqOp -> (CMP EQQ, NE)
2880 IntNeOp -> (CMP EQQ, EQQ)
2881 IntLtOp -> (CMP LTT, NE)
2882 IntLeOp -> (CMP LE, NE)
2883 WordGtOp -> (CMP ULE, EQQ)
2884 WordGeOp -> (CMP ULT, EQQ)
2885 WordEqOp -> (CMP EQQ, NE)
2886 WordNeOp -> (CMP EQQ, EQQ)
2887 WordLtOp -> (CMP ULT, NE)
2888 WordLeOp -> (CMP ULE, NE)
2889 AddrGtOp -> (CMP ULE, EQQ)
2890 AddrGeOp -> (CMP ULT, EQQ)
2891 AddrEqOp -> (CMP EQQ, NE)
2892 AddrNeOp -> (CMP EQQ, EQQ)
2893 AddrLtOp -> (CMP ULT, NE)
2894 AddrLeOp -> (CMP ULE, NE)
2896 #endif /* alpha_TARGET_ARCH */
2898 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2900 #if i386_TARGET_ARCH
2902 genCondJump id bool = do
2903 CondCode _ cond code <- getCondCode bool
2904 return (code `snocOL` JXX cond id)
2908 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2910 #if x86_64_TARGET_ARCH
2912 genCondJump id bool = do
2913 CondCode is_float cond cond_code <- getCondCode bool
2916 return (cond_code `snocOL` JXX cond id)
2918 lbl <- getBlockIdNat
2920 -- see comment with condFltReg
2921 let code = case cond of
2927 plain_test = unitOL (
2930 or_unordered = toOL [
2934 and_ordered = toOL [
2940 return (cond_code `appOL` code)
2944 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2946 #if sparc_TARGET_ARCH
2948 genCondJump bid bool = do
2949 CondCode is_float cond code <- getCondCode bool
2954 then [NOP, BF cond False bid, NOP]
2955 else [BI cond False bid, NOP]
2959 #endif /* sparc_TARGET_ARCH */
2962 #if powerpc_TARGET_ARCH
2964 genCondJump id bool = do
2965 CondCode is_float cond code <- getCondCode bool
2966 return (code `snocOL` BCC cond id)
2968 #endif /* powerpc_TARGET_ARCH */
2971 -- -----------------------------------------------------------------------------
2972 -- Generating C calls
2974 -- Now the biggest nightmare---calls. Most of the nastiness is buried in
2975 -- @get_arg@, which moves the arguments to the correct registers/stack
2976 -- locations. Apart from that, the code is easy.
2978 -- (If applicable) Do not fill the delay slots here; you will confuse the
2979 -- register allocator.
2982 :: CmmCallTarget -- function to call
2983 -> HintedCmmFormals -- where to put the result
2984 -> HintedCmmActuals -- arguments (of mixed type)
2987 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2989 #if alpha_TARGET_ARCH
2993 genCCall fn cconv result_regs args
2994 = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args
2995 `thenNat` \ ((unused,_), argCode) ->
2997 nRegs = length allArgRegs - length unused
2998 code = asmSeqThen (map ($ []) argCode)
3001 LDA pv (AddrImm (ImmLab (ptext fn))),
3002 JSR ra (AddrReg pv) nRegs,
3003 LDGP gp (AddrReg ra)]
3005 ------------------------
3006 {- Try to get a value into a specific register (or registers) for
3007 a call. The first 6 arguments go into the appropriate
3008 argument register (separate registers for integer and floating
3009 point arguments, but used in lock-step), and the remaining
3010 arguments are dumped to the stack, beginning at 0(sp). Our
3011 first argument is a pair of the list of remaining argument
3012 registers to be assigned for this call and the next stack
3013 offset to use for overflowing arguments. This way,
3014 @get_Arg@ can be applied to all of a call's arguments using
3018 :: ([(Reg,Reg)], Int) -- Argument registers and stack offset (accumulator)
3019 -> StixTree -- Current argument
3020 -> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code
3022 -- We have to use up all of our argument registers first...
3024 get_arg ((iDst,fDst):dsts, offset) arg
3025 = getRegister arg `thenNat` \ register ->
3027 reg = if isFloatType pk then fDst else iDst
3028 code = registerCode register reg
3029 src = registerName register reg
3030 pk = registerRep register
3033 if isFloatType pk then
3034 ((dsts, offset), if isFixed register then
3035 code . mkSeqInstr (FMOV src fDst)
3038 ((dsts, offset), if isFixed register then
3039 code . mkSeqInstr (OR src (RIReg src) iDst)
3042 -- Once we have run out of argument registers, we move to the
3045 get_arg ([], offset) arg
3046 = getRegister arg `thenNat` \ register ->
3047 getNewRegNat (registerRep register)
3050 code = registerCode register tmp
3051 src = registerName register tmp
3052 pk = registerRep register
3053 sz = primRepToSize pk
3055 return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset)))
3057 #endif /* alpha_TARGET_ARCH */
3059 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3061 #if i386_TARGET_ARCH
3063 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
3064 -- write barrier compiles to no code on x86/x86-64;
3065 -- we keep it this long in order to prevent earlier optimisations.
3067 -- we only cope with a single result for foreign calls
3068 genCCall (CmmPrim op) [CmmHinted r _] args = do
3069 l1 <- getNewLabelNat
3070 l2 <- getNewLabelNat
3072 MO_F32_Sqrt -> actuallyInlineFloatOp GSQRT FF32 args
3073 MO_F64_Sqrt -> actuallyInlineFloatOp GSQRT FF64 args
3075 MO_F32_Sin -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF32 args
3076 MO_F64_Sin -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF64 args
3078 MO_F32_Cos -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF32 args
3079 MO_F64_Cos -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF64 args
3081 MO_F32_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF32 args
3082 MO_F64_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF64 args
3084 other_op -> outOfLineFloatOp op r args
3086 actuallyInlineFloatOp instr size [CmmHinted x _]
3087 = do res <- trivialUFCode size (instr size) x
3089 return (any (getRegisterReg (CmmLocal r)))
3091 genCCall target dest_regs args = do
3093 sizes = map (arg_size . cmmExprType . hintlessCmm) (reverse args)
3094 #if !darwin_TARGET_OS
3095 tot_arg_size = sum sizes
3097 raw_arg_size = sum sizes
3098 tot_arg_size = roundTo 16 raw_arg_size
3099 arg_pad_size = tot_arg_size - raw_arg_size
3100 delta0 <- getDeltaNat
3101 setDeltaNat (delta0 - arg_pad_size)
3104 push_codes <- mapM push_arg (reverse args)
3105 delta <- getDeltaNat
3108 -- deal with static vs dynamic call targets
3109 (callinsns,cconv) <-
3112 CmmCallee (CmmLit (CmmLabel lbl)) conv
3113 -> -- ToDo: stdcall arg sizes
3114 return (unitOL (CALL (Left fn_imm) []), conv)
3115 where fn_imm = ImmCLbl lbl
3117 -> do { (dyn_c, dyn_r) <- get_op expr
3118 ; ASSERT( isWord32 (cmmExprType expr) )
3119 return (dyn_c `snocOL` CALL (Right dyn_r) [], conv) }
3122 #if darwin_TARGET_OS
3124 = toOL [SUB II32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
3125 DELTA (delta0 - arg_pad_size)]
3126 `appOL` concatOL push_codes
3129 = concatOL push_codes
3130 call = callinsns `appOL`
3132 -- Deallocate parameters after call for ccall;
3133 -- but not for stdcall (callee does it)
3134 (if cconv == StdCallConv || tot_arg_size==0 then [] else
3135 [ADD II32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
3137 [DELTA (delta + tot_arg_size)]
3140 setDeltaNat (delta + tot_arg_size)
3143 -- assign the results, if necessary
3144 assign_code [] = nilOL
3145 assign_code [CmmHinted dest _hint]
3146 | isFloatType ty = unitOL (GMOV fake0 r_dest)
3147 | isWord64 ty = toOL [MOV II32 (OpReg eax) (OpReg r_dest),
3148 MOV II32 (OpReg edx) (OpReg r_dest_hi)]
3149 | otherwise = unitOL (MOV (intSize w) (OpReg eax) (OpReg r_dest))
3151 ty = localRegType dest
3153 r_dest_hi = getHiVRegFromLo r_dest
3154 r_dest = getRegisterReg (CmmLocal dest)
3155 assign_code many = panic "genCCall.assign_code many"
3157 return (push_code `appOL`
3159 assign_code dest_regs)
3162 arg_size :: CmmType -> Int -- Width in bytes
3163 arg_size ty = widthInBytes (typeWidth ty)
3165 roundTo a x | x `mod` a == 0 = x
3166 | otherwise = x + a - (x `mod` a)
3169 push_arg :: HintedCmmActual {-current argument-}
3170 -> NatM InstrBlock -- code
3172 push_arg (CmmHinted arg _hint) -- we don't need the hints on x86
3173 | isWord64 arg_ty = do
3174 ChildCode64 code r_lo <- iselExpr64 arg
3175 delta <- getDeltaNat
3176 setDeltaNat (delta - 8)
3178 r_hi = getHiVRegFromLo r_lo
3180 return ( code `appOL`
3181 toOL [PUSH II32 (OpReg r_hi), DELTA (delta - 4),
3182 PUSH II32 (OpReg r_lo), DELTA (delta - 8),
3187 (code, reg) <- get_op arg
3188 delta <- getDeltaNat
3189 let size = arg_size arg_ty -- Byte size
3190 setDeltaNat (delta-size)
3191 if (isFloatType arg_ty)
3192 then return (code `appOL`
3193 toOL [SUB II32 (OpImm (ImmInt size)) (OpReg esp),
3195 GST (floatSize (typeWidth arg_ty))
3196 reg (AddrBaseIndex (EABaseReg esp)
3200 else return (code `snocOL`
3201 PUSH II32 (OpReg reg) `snocOL`
3205 arg_ty = cmmExprType arg
3208 get_op :: CmmExpr -> NatM (InstrBlock, Reg) -- code, reg
3210 (reg,code) <- getSomeReg op
3213 #endif /* i386_TARGET_ARCH */
3215 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3217 outOfLineFloatOp :: CallishMachOp -> CmmFormal -> HintedCmmActuals
3219 outOfLineFloatOp mop res args
3221 dflags <- getDynFlagsNat
3222 targetExpr <- cmmMakeDynamicReference dflags addImportNat CallReference lbl
3223 let target = CmmCallee targetExpr CCallConv
3225 if isFloat64 (localRegType res)
3227 stmtToInstrs (CmmCall target [CmmHinted res NoHint] args CmmUnsafe CmmMayReturn)
3231 tmp = LocalReg uq f64
3233 code1 <- stmtToInstrs (CmmCall target [CmmHinted tmp NoHint] args CmmUnsafe CmmMayReturn)
3234 code2 <- stmtToInstrs (CmmAssign (CmmLocal res) (CmmReg (CmmLocal tmp)))
3235 return (code1 `appOL` code2)
3237 lbl = mkForeignLabel fn Nothing False
3240 MO_F32_Sqrt -> fsLit "sqrtf"
3241 MO_F32_Sin -> fsLit "sinf"
3242 MO_F32_Cos -> fsLit "cosf"
3243 MO_F32_Tan -> fsLit "tanf"
3244 MO_F32_Exp -> fsLit "expf"
3245 MO_F32_Log -> fsLit "logf"
3247 MO_F32_Asin -> fsLit "asinf"
3248 MO_F32_Acos -> fsLit "acosf"
3249 MO_F32_Atan -> fsLit "atanf"
3251 MO_F32_Sinh -> fsLit "sinhf"
3252 MO_F32_Cosh -> fsLit "coshf"
3253 MO_F32_Tanh -> fsLit "tanhf"
3254 MO_F32_Pwr -> fsLit "powf"
3256 MO_F64_Sqrt -> fsLit "sqrt"
3257 MO_F64_Sin -> fsLit "sin"
3258 MO_F64_Cos -> fsLit "cos"
3259 MO_F64_Tan -> fsLit "tan"
3260 MO_F64_Exp -> fsLit "exp"
3261 MO_F64_Log -> fsLit "log"
3263 MO_F64_Asin -> fsLit "asin"
3264 MO_F64_Acos -> fsLit "acos"
3265 MO_F64_Atan -> fsLit "atan"
3267 MO_F64_Sinh -> fsLit "sinh"
3268 MO_F64_Cosh -> fsLit "cosh"
3269 MO_F64_Tanh -> fsLit "tanh"
3270 MO_F64_Pwr -> fsLit "pow"
3272 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
3274 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3276 #if x86_64_TARGET_ARCH
3278 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
3279 -- write barrier compiles to no code on x86/x86-64;
3280 -- we keep it this long in order to prevent earlier optimisations.
3283 genCCall (CmmPrim op) [CmmHinted r _] args =
3284 outOfLineFloatOp op r args
3286 genCCall target dest_regs args = do
3288 -- load up the register arguments
3289 (stack_args, aregs, fregs, load_args_code)
3290 <- load_args args allArgRegs allFPArgRegs nilOL
3293 fp_regs_used = reverse (drop (length fregs) (reverse allFPArgRegs))
3294 int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
3295 arg_regs = [eax] ++ int_regs_used ++ fp_regs_used
3296 -- for annotating the call instruction with
3298 sse_regs = length fp_regs_used
3300 tot_arg_size = arg_size * length stack_args
3302 -- On entry to the called function, %rsp should be aligned
3303 -- on a 16-byte boundary +8 (i.e. the first stack arg after
3304 -- the return address is 16-byte aligned). In STG land
3305 -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
3306 -- need to make sure we push a multiple of 16-bytes of args,
3307 -- plus the return address, to get the correct alignment.
3308 -- Urg, this is hard. We need to feed the delta back into
3309 -- the arg pushing code.
3310 (real_size, adjust_rsp) <-
3311 if tot_arg_size `rem` 16 == 0
3312 then return (tot_arg_size, nilOL)
3313 else do -- we need to adjust...
3314 delta <- getDeltaNat
3315 setDeltaNat (delta-8)
3316 return (tot_arg_size+8, toOL [
3317 SUB II64 (OpImm (ImmInt 8)) (OpReg rsp),
3321 -- push the stack args, right to left
3322 push_code <- push_args (reverse stack_args) nilOL
3323 delta <- getDeltaNat
3325 -- deal with static vs dynamic call targets
3326 (callinsns,cconv) <-
3329 CmmCallee (CmmLit (CmmLabel lbl)) conv
3330 -> -- ToDo: stdcall arg sizes
3331 return (unitOL (CALL (Left fn_imm) arg_regs), conv)
3332 where fn_imm = ImmCLbl lbl
3334 -> do (dyn_r, dyn_c) <- getSomeReg expr
3335 return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
3338 -- The x86_64 ABI requires us to set %al to the number of SSE
3339 -- registers that contain arguments, if the called routine
3340 -- is a varargs function. We don't know whether it's a
3341 -- varargs function or not, so we have to assume it is.
3343 -- It's not safe to omit this assignment, even if the number
3344 -- of SSE regs in use is zero. If %al is larger than 8
3345 -- on entry to a varargs function, seg faults ensue.
3346 assign_eax n = unitOL (MOV II32 (OpImm (ImmInt n)) (OpReg eax))
3348 let call = callinsns `appOL`
3350 -- Deallocate parameters after call for ccall;
3351 -- but not for stdcall (callee does it)
3352 (if cconv == StdCallConv || real_size==0 then [] else
3353 [ADD (intSize wordWidth) (OpImm (ImmInt real_size)) (OpReg esp)])
3355 [DELTA (delta + real_size)]
3358 setDeltaNat (delta + real_size)
3361 -- assign the results, if necessary
3362 assign_code [] = nilOL
3363 assign_code [CmmHinted dest _hint] =
3364 case typeWidth rep of
3365 W32 | isFloatType rep -> unitOL (MOV (floatSize W32) (OpReg xmm0) (OpReg r_dest))
3366 W64 | isFloatType rep -> unitOL (MOV (floatSize W32) (OpReg xmm0) (OpReg r_dest))
3367 _ -> unitOL (MOV (cmmTypeSize rep) (OpReg rax) (OpReg r_dest))
3369 rep = localRegType dest
3370 r_dest = getRegisterReg (CmmLocal dest)
3371 assign_code many = panic "genCCall.assign_code many"
3373 return (load_args_code `appOL`
3376 assign_eax sse_regs `appOL`
3378 assign_code dest_regs)
3381 arg_size = 8 -- always, at the mo
3383 load_args :: [CmmHinted CmmExpr]
3384 -> [Reg] -- int regs avail for args
3385 -> [Reg] -- FP regs avail for args
3387 -> NatM ([CmmHinted CmmExpr],[Reg],[Reg],InstrBlock)
3388 load_args args [] [] code = return (args, [], [], code)
3389 -- no more regs to use
3390 load_args [] aregs fregs code = return ([], aregs, fregs, code)
3391 -- no more args to push
3392 load_args ((CmmHinted arg hint) : rest) aregs fregs code
3393 | isFloatType arg_rep =
3397 arg_code <- getAnyReg arg
3398 load_args rest aregs rs (code `appOL` arg_code r)
3403 arg_code <- getAnyReg arg
3404 load_args rest rs fregs (code `appOL` arg_code r)
3406 arg_rep = cmmExprType arg
3409 (args',ars,frs,code') <- load_args rest aregs fregs code
3410 return ((CmmHinted arg hint):args', ars, frs, code')
3412 push_args [] code = return code
3413 push_args ((CmmHinted arg hint):rest) code
3414 | isFloatType arg_rep = do
3415 (arg_reg, arg_code) <- getSomeReg arg
3416 delta <- getDeltaNat
3417 setDeltaNat (delta-arg_size)
3418 let code' = code `appOL` arg_code `appOL` toOL [
3419 SUB (intSize wordWidth) (OpImm (ImmInt arg_size)) (OpReg rsp) ,
3420 DELTA (delta-arg_size),
3421 MOV (floatSize width) (OpReg arg_reg) (OpAddr (spRel 0))]
3422 push_args rest code'
3425 -- we only ever generate word-sized function arguments. Promotion
3426 -- has already happened: our Int8# type is kept sign-extended
3427 -- in an Int#, for example.
3428 ASSERT(width == W64) return ()
3429 (arg_op, arg_code) <- getOperand arg
3430 delta <- getDeltaNat
3431 setDeltaNat (delta-arg_size)
3432 let code' = code `appOL` toOL [PUSH II64 arg_op,
3433 DELTA (delta-arg_size)]
3434 push_args rest code'
3436 arg_rep = cmmExprType arg
3437 width = typeWidth arg_rep
3440 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3442 #if sparc_TARGET_ARCH
3444 The SPARC calling convention is an absolute
3445 nightmare. The first 6x32 bits of arguments are mapped into
3446 %o0 through %o5, and the remaining arguments are dumped to the
3447 stack, beginning at [%sp+92]. (Note that %o6 == %sp.)
3449 If we have to put args on the stack, move %o6==%sp down by
3450 the number of words to go on the stack, to ensure there's enough space.
3452 According to Fraser and Hanson's lcc book, page 478, fig 17.2,
3453 16 words above the stack pointer is a word for the address of
3454 a structure return value. I use this as a temporary location
3455 for moving values from float to int regs. Certainly it isn't
3456 safe to put anything in the 16 words starting at %sp, since
3457 this area can get trashed at any time due to window overflows
3458 caused by signal handlers.
3460 A final complication (if the above isn't enough) is that
3461 we can't blithely calculate the arguments one by one into
3462 %o0 .. %o5. Consider the following nested calls:
3466 Naive code moves a into %o0, and (fff b c) into %o1. Unfortunately
3467 the inner call will itself use %o0, which trashes the value put there
3468 in preparation for the outer call. Upshot: we need to calculate the
3469 args into temporary regs, and move those to arg regs or onto the
3470 stack only immediately prior to the call proper. Sigh.
3473 genCCall target dest_regs argsAndHints = do
3475 args = map hintlessCmm argsAndHints
3476 argcode_and_vregs <- mapM arg_to_int_vregs args
3478 (argcodes, vregss) = unzip argcode_and_vregs
3479 n_argRegs = length allArgRegs
3480 n_argRegs_used = min (length vregs) n_argRegs
3481 vregs = concat vregss
3482 -- deal with static vs dynamic call targets
3483 callinsns <- (case target of
3484 CmmCallee (CmmLit (CmmLabel lbl)) conv -> do
3485 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3486 CmmCallee expr conv -> do
3487 (dyn_c, [dyn_r]) <- arg_to_int_vregs expr
3488 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3490 (res, reduce) <- outOfLineFloatOp mop
3491 lblOrMopExpr <- case res of
3493 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3495 (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
3496 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3497 if reduce then panic "genCCall(sparc): can not reduce" else return lblOrMopExpr
3501 argcode = concatOL argcodes
3502 (move_sp_down, move_sp_up)
3503 = let diff = length vregs - n_argRegs
3504 nn = if odd diff then diff + 1 else diff -- keep 8-byte alignment
3507 else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
3510 = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
3512 -- assign the results, if necessary
3513 assign_code [] = nilOL
3515 assign_code [CmmHinted dest _hint]
3516 = let rep = localRegType dest
3517 width = typeWidth rep
3518 r_dest = getRegisterReg (CmmLocal dest)
3523 = unitOL $ FMOV FF32 (RealReg $ fReg 0) r_dest
3527 = unitOL $ FMOV FF64 (RealReg $ fReg 0) r_dest
3529 | not $ isFloatType rep
3531 = unitOL $ mkRegRegMoveInstr (RealReg $ oReg 0) r_dest
3535 return (argcode `appOL`
3536 move_sp_down `appOL`
3537 transfer_code `appOL`
3541 assign_code dest_regs)
3543 -- move args from the integer vregs into which they have been
3544 -- marshalled, into %o0 .. %o5, and the rest onto the stack.
3545 move_final :: [Reg] -> [Reg] -> Int -> [Instr]
3547 move_final [] _ offset -- all args done
3550 move_final (v:vs) [] offset -- out of aregs; move to stack
3551 = ST II32 v (spRel offset)
3552 : move_final vs [] (offset+1)
3554 move_final (v:vs) (a:az) offset -- move into an arg (%o[0..5]) reg
3555 = OR False g0 (RIReg v) a
3556 : move_final vs az offset
3558 -- generate code to calculate an argument, and move it into one
3559 -- or two integer vregs.
3560 arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
3561 arg_to_int_vregs arg
3562 | isWord64 (cmmExprType arg)
3564 (ChildCode64 code r_lo) <- iselExpr64 arg
3566 r_hi = getHiVRegFromLo r_lo
3567 return (code, [r_hi, r_lo])
3570 (src, code) <- getSomeReg arg
3571 tmp <- getNewRegNat (cmmTypeSize $ cmmExprType arg)
3573 pk = cmmExprType arg
3574 Just f0_high = fPair f0
3575 case cmmTypeSize pk of
3577 v1 <- getNewRegNat II32
3578 v2 <- getNewRegNat II32
3581 FMOV FF64 src f0 `snocOL`
3582 ST FF32 f0 (spRel 16) `snocOL`
3583 LD II32 (spRel 16) v1 `snocOL`
3584 ST FF32 f0_high (spRel 16) `snocOL`
3585 LD II32 (spRel 16) v2
3590 v1 <- getNewRegNat II32
3593 ST FF32 src (spRel 16) `snocOL`
3594 LD II32 (spRel 16) v1
3599 v1 <- getNewRegNat II32
3601 code `snocOL` OR False g0 (RIReg src) v1
3605 outOfLineFloatOp mop =
3607 dflags <- getDynFlagsNat
3608 mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference $
3609 mkForeignLabel functionName Nothing True
3610 let mopLabelOrExpr = case mopExpr of
3611 CmmLit (CmmLabel lbl) -> Left lbl
3613 return (mopLabelOrExpr, reduce)
3615 (reduce, functionName) = case mop of
3616 MO_F32_Exp -> (True, fsLit "exp")
3617 MO_F32_Log -> (True, fsLit "log")
3618 MO_F32_Sqrt -> (True, fsLit "sqrt")
3620 MO_F32_Sin -> (True, fsLit "sin")
3621 MO_F32_Cos -> (True, fsLit "cos")
3622 MO_F32_Tan -> (True, fsLit "tan")
3624 MO_F32_Asin -> (True, fsLit "asin")
3625 MO_F32_Acos -> (True, fsLit "acos")
3626 MO_F32_Atan -> (True, fsLit "atan")
3628 MO_F32_Sinh -> (True, fsLit "sinh")
3629 MO_F32_Cosh -> (True, fsLit "cosh")
3630 MO_F32_Tanh -> (True, fsLit "tanh")
3632 MO_F64_Exp -> (False, fsLit "exp")
3633 MO_F64_Log -> (False, fsLit "log")
3634 MO_F64_Sqrt -> (False, fsLit "sqrt")
3636 MO_F64_Sin -> (False, fsLit "sin")
3637 MO_F64_Cos -> (False, fsLit "cos")
3638 MO_F64_Tan -> (False, fsLit "tan")
3640 MO_F64_Asin -> (False, fsLit "asin")
3641 MO_F64_Acos -> (False, fsLit "acos")
3642 MO_F64_Atan -> (False, fsLit "atan")
3644 MO_F64_Sinh -> (False, fsLit "sinh")
3645 MO_F64_Cosh -> (False, fsLit "cosh")
3646 MO_F64_Tanh -> (False, fsLit "tanh")
3648 other -> pprPanic "outOfLineFloatOp(sparc) "
3649 (pprCallishMachOp mop)
3651 #endif /* sparc_TARGET_ARCH */
3653 #if powerpc_TARGET_ARCH
3655 #if darwin_TARGET_OS || linux_TARGET_OS
3657 The PowerPC calling convention for Darwin/Mac OS X
3658 is described in Apple's document
3659 "Inside Mac OS X - Mach-O Runtime Architecture".
3661 PowerPC Linux uses the System V Release 4 Calling Convention
3662 for PowerPC. It is described in the
3663 "System V Application Binary Interface PowerPC Processor Supplement".
3665 Both conventions are similar:
3666 Parameters may be passed in general-purpose registers starting at r3, in
3667 floating point registers starting at f1, or on the stack.
3669 But there are substantial differences:
3670 * The number of registers used for parameter passing and the exact set of
3671 nonvolatile registers differs (see MachRegs.lhs).
3672 * On Darwin, stack space is always reserved for parameters, even if they are
3673 passed in registers. The called routine may choose to save parameters from
3674 registers to the corresponding space on the stack.
3675 * On Darwin, a corresponding amount of GPRs is skipped when a floating point
3676 parameter is passed in an FPR.
3677 * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
3678 starting with an odd-numbered GPR. It may skip a GPR to achieve this.
3679 Darwin just treats an I64 like two separate II32s (high word first).
3680 * I64 and FF64 arguments are 8-byte aligned on the stack for SysV, but only
3681 4-byte aligned like everything else on Darwin.
3682 * The SysV spec claims that FF32 is represented as FF64 on the stack. GCC on
3683 PowerPC Linux does not agree, so neither do we.
3685 According to both conventions, The parameter area should be part of the
3686 caller's stack frame, allocated in the caller's prologue code (large enough
3687 to hold the parameter lists for all called routines). The NCG already
3688 uses the stack for register spilling, leaving 64 bytes free at the top.
3689 If we need a larger parameter area than that, we just allocate a new stack
3690 frame just before ccalling.
3694 genCCall (CmmPrim MO_WriteBarrier) _ _
3695 = return $ unitOL LWSYNC
3697 genCCall target dest_regs argsAndHints
3698 = ASSERT (not $ any (`elem` [II8,II16]) $ map cmmTypeSize argReps)
3699 -- we rely on argument promotion in the codeGen
3701 (finalStack,passArgumentsCode,usedRegs) <- passArguments
3703 allArgRegs allFPArgRegs
3707 (labelOrExpr, reduceToFF32) <- case target of
3708 CmmCallee (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
3709 CmmCallee expr conv -> return (Right expr, False)
3710 CmmPrim mop -> outOfLineFloatOp mop
3712 let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
3713 codeAfter = move_sp_up finalStack `appOL` moveResult reduceToFF32
3718 `snocOL` BL lbl usedRegs
3721 (dynReg, dynCode) <- getSomeReg dyn
3723 `snocOL` MTCTR dynReg
3725 `snocOL` BCTRL usedRegs
3728 #if darwin_TARGET_OS
3729 initialStackOffset = 24
3730 -- size of linkage area + size of arguments, in bytes
3731 stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
3732 map (widthInBytes . typeWidth) argReps
3733 #elif linux_TARGET_OS
3734 initialStackOffset = 8
3735 stackDelta finalStack = roundTo 16 finalStack
3737 args = map hintlessCmm argsAndHints
3738 argReps = map cmmExprType args
3740 roundTo a x | x `mod` a == 0 = x
3741 | otherwise = x + a - (x `mod` a)
3743 move_sp_down finalStack
3745 toOL [STU II32 sp (AddrRegImm sp (ImmInt (-delta))),
3748 where delta = stackDelta finalStack
3749 move_sp_up finalStack
3751 toOL [ADD sp sp (RIImm (ImmInt delta)),
3754 where delta = stackDelta finalStack
3757 passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
3758 passArguments ((arg,arg_ty):args) gprs fprs stackOffset
3759 accumCode accumUsed | isWord64 arg_ty =
3761 ChildCode64 code vr_lo <- iselExpr64 arg
3762 let vr_hi = getHiVRegFromLo vr_lo
3764 #if darwin_TARGET_OS
3769 (accumCode `appOL` code
3770 `snocOL` storeWord vr_hi gprs stackOffset
3771 `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
3772 ((take 2 gprs) ++ accumUsed)
3774 storeWord vr (gpr:_) offset = MR gpr vr
3775 storeWord vr [] offset = ST II32 vr (AddrRegImm sp (ImmInt offset))
3777 #elif linux_TARGET_OS
3778 let stackOffset' = roundTo 8 stackOffset
3779 stackCode = accumCode `appOL` code
3780 `snocOL` ST II32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
3781 `snocOL` ST II32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
3782 regCode hireg loreg =
3783 accumCode `appOL` code
3784 `snocOL` MR hireg vr_hi
3785 `snocOL` MR loreg vr_lo
3788 hireg : loreg : regs | even (length gprs) ->
3789 passArguments args regs fprs stackOffset
3790 (regCode hireg loreg) (hireg : loreg : accumUsed)
3791 _skipped : hireg : loreg : regs ->
3792 passArguments args regs fprs stackOffset
3793 (regCode hireg loreg) (hireg : loreg : accumUsed)
3794 _ -> -- only one or no regs left
3795 passArguments args [] fprs (stackOffset'+8)
3799 passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
3800 | reg : _ <- regs = do
3801 register <- getRegister arg
3802 let code = case register of
3803 Fixed _ freg fcode -> fcode `snocOL` MR reg freg
3804 Any _ acode -> acode reg
3808 #if darwin_TARGET_OS
3809 -- The Darwin ABI requires that we reserve stack slots for register parameters
3810 (stackOffset + stackBytes)
3811 #elif linux_TARGET_OS
3812 -- ... the SysV ABI doesn't.
3815 (accumCode `appOL` code)
3818 (vr, code) <- getSomeReg arg
3822 (stackOffset' + stackBytes)
3823 (accumCode `appOL` code `snocOL` ST (cmmTypeSize rep) vr stackSlot)
3826 #if darwin_TARGET_OS
3827 -- stackOffset is at least 4-byte aligned
3828 -- The Darwin ABI is happy with that.
3829 stackOffset' = stackOffset
3831 -- ... the SysV ABI requires 8-byte alignment for doubles.
3832 stackOffset' | isFloatType rep && typeWidth rep == W64 =
3833 roundTo 8 stackOffset
3834 | otherwise = stackOffset
3836 stackSlot = AddrRegImm sp (ImmInt stackOffset')
3837 (nGprs, nFprs, stackBytes, regs) = case cmmTypeSize rep of
3838 II32 -> (1, 0, 4, gprs)
3839 #if darwin_TARGET_OS
3840 -- The Darwin ABI requires that we skip a corresponding number of GPRs when
3842 FF32 -> (1, 1, 4, fprs)
3843 FF64 -> (2, 1, 8, fprs)
3844 #elif linux_TARGET_OS
3845 -- ... the SysV ABI doesn't.
3846 FF32 -> (0, 1, 4, fprs)
3847 FF64 -> (0, 1, 8, fprs)
3850 moveResult reduceToFF32 =
3853 [CmmHinted dest _hint]
3854 | reduceToFF32 && isFloat32 rep -> unitOL (FRSP r_dest f1)
3855 | isFloat32 rep || isFloat64 rep -> unitOL (MR r_dest f1)
3856 | isWord64 rep -> toOL [MR (getHiVRegFromLo r_dest) r3,
3858 | otherwise -> unitOL (MR r_dest r3)
3859 where rep = cmmRegType (CmmLocal dest)
3860 r_dest = getRegisterReg (CmmLocal dest)
3862 outOfLineFloatOp mop =
3864 dflags <- getDynFlagsNat
3865 mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference $
3866 mkForeignLabel functionName Nothing True
3867 let mopLabelOrExpr = case mopExpr of
3868 CmmLit (CmmLabel lbl) -> Left lbl
3870 return (mopLabelOrExpr, reduce)
3872 (functionName, reduce) = case mop of
3873 MO_F32_Exp -> (fsLit "exp", True)
3874 MO_F32_Log -> (fsLit "log", True)
3875 MO_F32_Sqrt -> (fsLit "sqrt", True)
3877 MO_F32_Sin -> (fsLit "sin", True)
3878 MO_F32_Cos -> (fsLit "cos", True)
3879 MO_F32_Tan -> (fsLit "tan", True)
3881 MO_F32_Asin -> (fsLit "asin", True)
3882 MO_F32_Acos -> (fsLit "acos", True)
3883 MO_F32_Atan -> (fsLit "atan", True)
3885 MO_F32_Sinh -> (fsLit "sinh", True)
3886 MO_F32_Cosh -> (fsLit "cosh", True)
3887 MO_F32_Tanh -> (fsLit "tanh", True)
3888 MO_F32_Pwr -> (fsLit "pow", True)
3890 MO_F64_Exp -> (fsLit "exp", False)
3891 MO_F64_Log -> (fsLit "log", False)
3892 MO_F64_Sqrt -> (fsLit "sqrt", False)
3894 MO_F64_Sin -> (fsLit "sin", False)
3895 MO_F64_Cos -> (fsLit "cos", False)
3896 MO_F64_Tan -> (fsLit "tan", False)
3898 MO_F64_Asin -> (fsLit "asin", False)
3899 MO_F64_Acos -> (fsLit "acos", False)
3900 MO_F64_Atan -> (fsLit "atan", False)
3902 MO_F64_Sinh -> (fsLit "sinh", False)
3903 MO_F64_Cosh -> (fsLit "cosh", False)
3904 MO_F64_Tanh -> (fsLit "tanh", False)
3905 MO_F64_Pwr -> (fsLit "pow", False)
3906 other -> pprPanic "genCCall(ppc): unknown callish op"
3907 (pprCallishMachOp other)
3909 #endif /* darwin_TARGET_OS || linux_TARGET_OS */
3911 #endif /* powerpc_TARGET_ARCH */
3914 -- -----------------------------------------------------------------------------
3915 -- Generating a table-branch
3917 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
3919 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3923 (reg,e_code) <- getSomeReg expr
3924 lbl <- getNewLabelNat
3925 dflags <- getDynFlagsNat
3926 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
3927 (tableReg,t_code) <- getSomeReg $ dynRef
3929 jumpTable = map jumpTableEntryRel ids
3931 jumpTableEntryRel Nothing
3932 = CmmStaticLit (CmmInt 0 wordWidth)
3933 jumpTableEntryRel (Just (BlockId id))
3934 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3935 where blockLabel = mkAsmTempLabel id
3937 op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
3938 (EAIndex reg wORD_SIZE) (ImmInt 0))
3940 #if x86_64_TARGET_ARCH
3941 #if darwin_TARGET_OS
3942 -- on Mac OS X/x86_64, put the jump table in the text section
3943 -- to work around a limitation of the linker.
3944 -- ld64 is unable to handle the relocations for
3946 -- if L0 is not preceded by a non-anonymous label in its section.
3948 code = e_code `appOL` t_code `appOL` toOL [
3949 ADD (intSize wordWidth) op (OpReg tableReg),
3950 JMP_TBL (OpReg tableReg) [ id | Just id <- ids ],
3951 LDATA Text (CmmDataLabel lbl : jumpTable)
3954 -- HACK: On x86_64 binutils<2.17 is only able to generate PC32
3955 -- relocations, hence we only get 32-bit offsets in the jump
3956 -- table. As these offsets are always negative we need to properly
3957 -- sign extend them to 64-bit. This hack should be removed in
3958 -- conjunction with the hack in PprMach.hs/pprDataItem once
3959 -- binutils 2.17 is standard.
3960 code = e_code `appOL` t_code `appOL` toOL [
3961 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3963 (OpAddr (AddrBaseIndex (EABaseReg tableReg)
3964 (EAIndex reg wORD_SIZE) (ImmInt 0)))
3966 ADD (intSize wordWidth) (OpReg reg) (OpReg tableReg),
3967 JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
3971 code = e_code `appOL` t_code `appOL` toOL [
3972 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3973 ADD (intSize wordWidth) op (OpReg tableReg),
3974 JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
3980 (reg,e_code) <- getSomeReg expr
3981 lbl <- getNewLabelNat
3983 jumpTable = map jumpTableEntry ids
3984 op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
3985 code = e_code `appOL` toOL [
3986 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3987 JMP_TBL op [ id | Just id <- ids ]
3991 #elif powerpc_TARGET_ARCH
3995 (reg,e_code) <- getSomeReg expr
3996 tmp <- getNewRegNat II32
3997 lbl <- getNewLabelNat
3998 dflags <- getDynFlagsNat
3999 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
4000 (tableReg,t_code) <- getSomeReg $ dynRef
4002 jumpTable = map jumpTableEntryRel ids
4004 jumpTableEntryRel Nothing
4005 = CmmStaticLit (CmmInt 0 wordWidth)
4006 jumpTableEntryRel (Just (BlockId id))
4007 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
4008 where blockLabel = mkAsmTempLabel id
4010 code = e_code `appOL` t_code `appOL` toOL [
4011 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
4012 SLW tmp reg (RIImm (ImmInt 2)),
4013 LD II32 tmp (AddrRegReg tableReg tmp),
4014 ADD tmp tmp (RIReg tableReg),
4016 BCTR [ id | Just id <- ids ]
4021 (reg,e_code) <- getSomeReg expr
4022 tmp <- getNewRegNat II32
4023 lbl <- getNewLabelNat
4025 jumpTable = map jumpTableEntry ids
4027 code = e_code `appOL` toOL [
4028 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
4029 SLW tmp reg (RIImm (ImmInt 2)),
4030 ADDIS tmp tmp (HA (ImmCLbl lbl)),
4031 LD II32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
4033 BCTR [ id | Just id <- ids ]
4036 #elif sparc_TARGET_ARCH
4039 = error "MachCodeGen: sparc genSwitch PIC not finished\n"
4042 = error "MachCodeGen: sparc genSwitch non-PIC not finished\n"
4044 #error "ToDo: genSwitch"
4047 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordWidth)
4048 jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
4049 where blockLabel = mkAsmTempLabel id
4051 -- -----------------------------------------------------------------------------
4053 -- -----------------------------------------------------------------------------
4056 -- -----------------------------------------------------------------------------
4057 -- 'condIntReg' and 'condFltReg': condition codes into registers
4059 -- Turn those condition codes into integers now (when they appear on
4060 -- the right hand side of an assignment).
4062 -- (If applicable) Do not fill the delay slots here; you will confuse the
4063 -- register allocator.
4065 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
4067 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4069 #if alpha_TARGET_ARCH
4070 condIntReg = panic "MachCode.condIntReg (not on Alpha)"
4071 condFltReg = panic "MachCode.condFltReg (not on Alpha)"
4072 #endif /* alpha_TARGET_ARCH */
4074 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4076 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4078 condIntReg cond x y = do
4079 CondCode _ cond cond_code <- condIntCode cond x y
4080 tmp <- getNewRegNat II8
4082 code dst = cond_code `appOL` toOL [
4083 SETCC cond (OpReg tmp),
4084 MOVZxL II8 (OpReg tmp) (OpReg dst)
4087 return (Any II32 code)
4091 #if i386_TARGET_ARCH
4093 condFltReg cond x y = do
4094 CondCode _ cond cond_code <- condFltCode cond x y
4095 tmp <- getNewRegNat II8
4097 code dst = cond_code `appOL` toOL [
4098 SETCC cond (OpReg tmp),
4099 MOVZxL II8 (OpReg tmp) (OpReg dst)
4102 return (Any II32 code)
4106 #if x86_64_TARGET_ARCH
4108 condFltReg cond x y = do
4109 CondCode _ cond cond_code <- condFltCode cond x y
4110 tmp1 <- getNewRegNat wordSize
4111 tmp2 <- getNewRegNat wordSize
4113 -- We have to worry about unordered operands (eg. comparisons
4114 -- against NaN). If the operands are unordered, the comparison
4115 -- sets the parity flag, carry flag and zero flag.
4116 -- All comparisons are supposed to return false for unordered
4117 -- operands except for !=, which returns true.
4119 -- Optimisation: we don't have to test the parity flag if we
4120 -- know the test has already excluded the unordered case: eg >
4121 -- and >= test for a zero carry flag, which can only occur for
4122 -- ordered operands.
4124 -- ToDo: by reversing comparisons we could avoid testing the
4125 -- parity flag in more cases.
4130 NE -> or_unordered dst
4131 GU -> plain_test dst
4132 GEU -> plain_test dst
4133 _ -> and_ordered dst)
4135 plain_test dst = toOL [
4136 SETCC cond (OpReg tmp1),
4137 MOVZxL II8 (OpReg tmp1) (OpReg dst)
4139 or_unordered dst = toOL [
4140 SETCC cond (OpReg tmp1),
4141 SETCC PARITY (OpReg tmp2),
4142 OR II8 (OpReg tmp1) (OpReg tmp2),
4143 MOVZxL II8 (OpReg tmp2) (OpReg dst)
4145 and_ordered dst = toOL [
4146 SETCC cond (OpReg tmp1),
4147 SETCC NOTPARITY (OpReg tmp2),
4148 AND II8 (OpReg tmp1) (OpReg tmp2),
4149 MOVZxL II8 (OpReg tmp2) (OpReg dst)
4152 return (Any II32 code)
4156 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4158 #if sparc_TARGET_ARCH
4160 condIntReg EQQ x (CmmLit (CmmInt 0 d)) = do
4161 (src, code) <- getSomeReg x
4162 tmp <- getNewRegNat II32
4164 code__2 dst = code `appOL` toOL [
4165 SUB False True g0 (RIReg src) g0,
4166 SUB True False g0 (RIImm (ImmInt (-1))) dst]
4167 return (Any II32 code__2)
4169 condIntReg EQQ x y = do
4170 (src1, code1) <- getSomeReg x
4171 (src2, code2) <- getSomeReg y
4172 tmp1 <- getNewRegNat II32
4173 tmp2 <- getNewRegNat II32
4175 code__2 dst = code1 `appOL` code2 `appOL` toOL [
4176 XOR False src1 (RIReg src2) dst,
4177 SUB False True g0 (RIReg dst) g0,
4178 SUB True False g0 (RIImm (ImmInt (-1))) dst]
4179 return (Any II32 code__2)
4181 condIntReg NE x (CmmLit (CmmInt 0 d)) = do
4182 (src, code) <- getSomeReg x
4183 tmp <- getNewRegNat II32
4185 code__2 dst = code `appOL` toOL [
4186 SUB False True g0 (RIReg src) g0,
4187 ADD True False g0 (RIImm (ImmInt 0)) dst]
4188 return (Any II32 code__2)
4190 condIntReg NE x y = do
4191 (src1, code1) <- getSomeReg x
4192 (src2, code2) <- getSomeReg y
4193 tmp1 <- getNewRegNat II32
4194 tmp2 <- getNewRegNat II32
4196 code__2 dst = code1 `appOL` code2 `appOL` toOL [
4197 XOR False src1 (RIReg src2) dst,
4198 SUB False True g0 (RIReg dst) g0,
4199 ADD True False g0 (RIImm (ImmInt 0)) dst]
4200 return (Any II32 code__2)
4202 condIntReg cond x y = do
4203 bid1@(BlockId lbl1) <- getBlockIdNat
4204 bid2@(BlockId lbl2) <- getBlockIdNat
4205 CondCode _ cond cond_code <- condIntCode cond x y
4207 code__2 dst = cond_code `appOL` toOL [
4208 BI cond False bid1, NOP,
4209 OR False g0 (RIImm (ImmInt 0)) dst,
4210 BI ALWAYS False bid2, NOP,
4212 OR False g0 (RIImm (ImmInt 1)) dst,
4214 return (Any II32 code__2)
4216 condFltReg cond x y = do
4217 bid1@(BlockId lbl1) <- getBlockIdNat
4218 bid2@(BlockId lbl2) <- getBlockIdNat
4219 CondCode _ cond cond_code <- condFltCode cond x y
4221 code__2 dst = cond_code `appOL` toOL [
4223 BF cond False bid1, NOP,
4224 OR False g0 (RIImm (ImmInt 0)) dst,
4225 BI ALWAYS False bid2, NOP,
4227 OR False g0 (RIImm (ImmInt 1)) dst,
4229 return (Any II32 code__2)
4231 #endif /* sparc_TARGET_ARCH */
4233 #if powerpc_TARGET_ARCH
4234 condReg getCond = do
4235 lbl1 <- getBlockIdNat
4236 lbl2 <- getBlockIdNat
4237 CondCode _ cond cond_code <- getCond
4239 {- code dst = cond_code `appOL` toOL [
4248 code dst = cond_code
4252 RLWINM dst dst (bit + 1) 31 31
4255 negate_code | do_negate = unitOL (CRNOR bit bit bit)
4258 (bit, do_negate) = case cond of
4272 return (Any II32 code)
4274 condIntReg cond x y = condReg (condIntCode cond x y)
4275 condFltReg cond x y = condReg (condFltCode cond x y)
4276 #endif /* powerpc_TARGET_ARCH */
4279 -- -----------------------------------------------------------------------------
4280 -- 'trivial*Code': deal with trivial instructions
4282 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
4283 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
4284 -- Only look for constants on the right hand side, because that's
4285 -- where the generic optimizer will have put them.
4287 -- Similarly, for unary instructions, we don't have to worry about
4288 -- matching an StInt as the argument, because genericOpt will already
4289 -- have handled the constant-folding.
4292 :: Width -- Int only
4293 -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
4294 ,IF_ARCH_i386 ((Operand -> Operand -> Instr)
4295 -> Maybe (Operand -> Operand -> Instr)
4296 ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr)
4297 -> Maybe (Operand -> Operand -> Instr)
4298 ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
4299 ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
4301 -> CmmExpr -> CmmExpr -- the two arguments
4304 #ifndef powerpc_TARGET_ARCH
4306 :: Width -- Floating point only
4307 -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
4308 ,IF_ARCH_sparc((Size -> Reg -> Reg -> Reg -> Instr)
4309 ,IF_ARCH_i386 ((Size -> Reg -> Reg -> Reg -> Instr)
4310 ,IF_ARCH_x86_64 ((Size -> Operand -> Operand -> Instr)
4312 -> CmmExpr -> CmmExpr -- the two arguments
4318 -> IF_ARCH_alpha((RI -> Reg -> Instr)
4319 ,IF_ARCH_i386 ((Operand -> Instr)
4320 ,IF_ARCH_x86_64 ((Operand -> Instr)
4321 ,IF_ARCH_sparc((RI -> Reg -> Instr)
4322 ,IF_ARCH_powerpc((Reg -> Reg -> Instr)
4324 -> CmmExpr -- the one argument
4327 #ifndef powerpc_TARGET_ARCH
4330 -> IF_ARCH_alpha((Reg -> Reg -> Instr)
4331 ,IF_ARCH_i386 ((Reg -> Reg -> Instr)
4332 ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr)
4333 ,IF_ARCH_sparc((Reg -> Reg -> Instr)
4335 -> CmmExpr -- the one argument
4339 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4341 #if alpha_TARGET_ARCH
4343 trivialCode instr x (StInt y)
4345 = getRegister x `thenNat` \ register ->
4346 getNewRegNat IntRep `thenNat` \ tmp ->
4348 code = registerCode register tmp
4349 src1 = registerName register tmp
4350 src2 = ImmInt (fromInteger y)
4351 code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst)
4353 return (Any IntRep code__2)
4355 trivialCode instr x y
4356 = getRegister x `thenNat` \ register1 ->
4357 getRegister y `thenNat` \ register2 ->
4358 getNewRegNat IntRep `thenNat` \ tmp1 ->
4359 getNewRegNat IntRep `thenNat` \ tmp2 ->
4361 code1 = registerCode register1 tmp1 []
4362 src1 = registerName register1 tmp1
4363 code2 = registerCode register2 tmp2 []
4364 src2 = registerName register2 tmp2
4365 code__2 dst = asmSeqThen [code1, code2] .
4366 mkSeqInstr (instr src1 (RIReg src2) dst)
4368 return (Any IntRep code__2)
4371 trivialUCode instr x
4372 = getRegister x `thenNat` \ register ->
4373 getNewRegNat IntRep `thenNat` \ tmp ->
4375 code = registerCode register tmp
4376 src = registerName register tmp
4377 code__2 dst = code . mkSeqInstr (instr (RIReg src) dst)
4379 return (Any IntRep code__2)
4382 trivialFCode _ instr x y
4383 = getRegister x `thenNat` \ register1 ->
4384 getRegister y `thenNat` \ register2 ->
4385 getNewRegNat FF64 `thenNat` \ tmp1 ->
4386 getNewRegNat FF64 `thenNat` \ tmp2 ->
4388 code1 = registerCode register1 tmp1
4389 src1 = registerName register1 tmp1
4391 code2 = registerCode register2 tmp2
4392 src2 = registerName register2 tmp2
4394 code__2 dst = asmSeqThen [code1 [], code2 []] .
4395 mkSeqInstr (instr src1 src2 dst)
4397 return (Any FF64 code__2)
4399 trivialUFCode _ instr x
4400 = getRegister x `thenNat` \ register ->
4401 getNewRegNat FF64 `thenNat` \ tmp ->
4403 code = registerCode register tmp
4404 src = registerName register tmp
4405 code__2 dst = code . mkSeqInstr (instr src dst)
4407 return (Any FF64 code__2)
4409 #endif /* alpha_TARGET_ARCH */
4411 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4413 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4416 The Rules of the Game are:
4418 * You cannot assume anything about the destination register dst;
4419 it may be anything, including a fixed reg.
4421 * You may compute an operand into a fixed reg, but you may not
4422 subsequently change the contents of that fixed reg. If you
4423 want to do so, first copy the value either to a temporary
4424 or into dst. You are free to modify dst even if it happens
4425 to be a fixed reg -- that's not your problem.
4427 * You cannot assume that a fixed reg will stay live over an
4428 arbitrary computation. The same applies to the dst reg.
4430 * Temporary regs obtained from getNewRegNat are distinct from
4431 each other and from all other regs, and stay live over
4432 arbitrary computations.
4434 --------------------
4436 SDM's version of The Rules:
4438 * If getRegister returns Any, that means it can generate correct
4439 code which places the result in any register, period. Even if that
4440 register happens to be read during the computation.
4442 Corollary #1: this means that if you are generating code for an
4443 operation with two arbitrary operands, you cannot assign the result
4444 of the first operand into the destination register before computing
4445 the second operand. The second operand might require the old value
4446 of the destination register.
4448 Corollary #2: A function might be able to generate more efficient
4449 code if it knows the destination register is a new temporary (and
4450 therefore not read by any of the sub-computations).
4452 * If getRegister returns Any, then the code it generates may modify only:
4453 (a) fresh temporaries
4454 (b) the destination register
4455 (c) known registers (eg. %ecx is used by shifts)
4456 In particular, it may *not* modify global registers, unless the global
4457 register happens to be the destination register.
4460 trivialCode width instr (Just revinstr) (CmmLit lit_a) b
4461 | is32BitLit lit_a = do
4462 b_code <- getAnyReg b
4465 = b_code dst `snocOL`
4466 revinstr (OpImm (litToImm lit_a)) (OpReg dst)
4468 return (Any (intSize width) code)
4470 trivialCode width instr maybe_revinstr a b
4471 = genTrivialCode (intSize width) instr a b
4473 -- This is re-used for floating pt instructions too.
4474 genTrivialCode rep instr a b = do
4475 (b_op, b_code) <- getNonClobberedOperand b
4476 a_code <- getAnyReg a
4477 tmp <- getNewRegNat rep
4479 -- We want the value of b to stay alive across the computation of a.
4480 -- But, we want to calculate a straight into the destination register,
4481 -- because the instruction only has two operands (dst := dst `op` src).
4482 -- The troublesome case is when the result of b is in the same register
4483 -- as the destination reg. In this case, we have to save b in a
4484 -- new temporary across the computation of a.
4486 | dst `regClashesWithOp` b_op =
4488 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
4490 instr (OpReg tmp) (OpReg dst)
4494 instr b_op (OpReg dst)
4496 return (Any rep code)
4498 reg `regClashesWithOp` OpReg reg2 = reg == reg2
4499 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
4500 reg `regClashesWithOp` _ = False
4504 trivialUCode rep instr x = do
4505 x_code <- getAnyReg x
4510 return (Any rep code)
4514 #if i386_TARGET_ARCH
4516 trivialFCode width instr x y = do
4517 (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
4518 (y_reg, y_code) <- getSomeReg y
4520 size = floatSize width
4524 instr size x_reg y_reg dst
4525 return (Any size code)
4529 #if x86_64_TARGET_ARCH
4530 trivialFCode pk instr x y
4531 = genTrivialCode size (instr size) x y
4532 where size = floatSize pk
4537 trivialUFCode size instr x = do
4538 (x_reg, x_code) <- getSomeReg x
4544 return (Any size code)
4546 #endif /* i386_TARGET_ARCH */
4548 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4550 #if sparc_TARGET_ARCH
4552 trivialCode pk instr x (CmmLit (CmmInt y d))
4555 (src1, code) <- getSomeReg x
4556 tmp <- getNewRegNat II32
4558 src2 = ImmInt (fromInteger y)
4559 code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
4560 return (Any II32 code__2)
4562 trivialCode pk instr x y = do
4563 (src1, code1) <- getSomeReg x
4564 (src2, code2) <- getSomeReg y
4565 tmp1 <- getNewRegNat II32
4566 tmp2 <- getNewRegNat II32
4568 code__2 dst = code1 `appOL` code2 `snocOL`
4569 instr src1 (RIReg src2) dst
4570 return (Any II32 code__2)
4573 trivialFCode pk instr x y = do
4574 (src1, code1) <- getSomeReg x
4575 (src2, code2) <- getSomeReg y
4576 tmp1 <- getNewRegNat (cmmTypeSize $ cmmExprType x)
4577 tmp2 <- getNewRegNat (cmmTypeSize $ cmmExprType y)
4578 tmp <- getNewRegNat FF64
4580 promote x = FxTOy FF32 FF64 x tmp
4586 if pk1 `cmmEqType` pk2 then
4587 code1 `appOL` code2 `snocOL`
4588 instr (floatSize pk) src1 src2 dst
4589 else if typeWidth pk1 == W32 then
4590 code1 `snocOL` promote src1 `appOL` code2 `snocOL`
4591 instr FF64 tmp src2 dst
4593 code1 `appOL` code2 `snocOL` promote src2 `snocOL`
4594 instr FF64 src1 tmp dst
4595 return (Any (cmmTypeSize $ if pk1 `cmmEqType` pk2 then pk1 else cmmFloat W64)
4599 trivialUCode size instr x = do
4600 (src, code) <- getSomeReg x
4601 tmp <- getNewRegNat size
4603 code__2 dst = code `snocOL` instr (RIReg src) dst
4604 return (Any size code__2)
4607 trivialUFCode pk instr x = do
4608 (src, code) <- getSomeReg x
4609 tmp <- getNewRegNat pk
4611 code__2 dst = code `snocOL` instr src dst
4612 return (Any pk code__2)
4614 #endif /* sparc_TARGET_ARCH */
4616 #if powerpc_TARGET_ARCH
4619 Wolfgang's PowerPC version of The Rules:
4621 A slightly modified version of The Rules to take advantage of the fact
4622 that PowerPC instructions work on all registers and don't implicitly
4623 clobber any fixed registers.
4625 * The only expression for which getRegister returns Fixed is (CmmReg reg).
4627 * If getRegister returns Any, then the code it generates may modify only:
4628 (a) fresh temporaries
4629 (b) the destination register
4630 It may *not* modify global registers, unless the global
4631 register happens to be the destination register.
4632 It may not clobber any other registers. In fact, only ccalls clobber any
4634 Also, it may not modify the counter register (used by genCCall).
4636 Corollary: If a getRegister for a subexpression returns Fixed, you need
4637 not move it to a fresh temporary before evaluating the next subexpression.
4638 The Fixed register won't be modified.
4639 Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
4641 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
4642 the value of the destination register.
4645 trivialCode rep signed instr x (CmmLit (CmmInt y _))
4646 | Just imm <- makeImmediate rep signed y
4648 (src1, code1) <- getSomeReg x
4649 let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
4650 return (Any (intSize rep) code)
4652 trivialCode rep signed instr x y = do
4653 (src1, code1) <- getSomeReg x
4654 (src2, code2) <- getSomeReg y
4655 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
4656 return (Any (intSize rep) code)
4658 trivialCodeNoImm' :: Size -> (Reg -> Reg -> Reg -> Instr)
4659 -> CmmExpr -> CmmExpr -> NatM Register
4660 trivialCodeNoImm' size instr x y = do
4661 (src1, code1) <- getSomeReg x
4662 (src2, code2) <- getSomeReg y
4663 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
4664 return (Any size code)
4666 trivialCodeNoImm :: Size -> (Size -> Reg -> Reg -> Reg -> Instr)
4667 -> CmmExpr -> CmmExpr -> NatM Register
4668 trivialCodeNoImm size instr x y = trivialCodeNoImm' size (instr size) x y
4670 trivialUCode rep instr x = do
4671 (src, code) <- getSomeReg x
4672 let code' dst = code `snocOL` instr dst src
4673 return (Any rep code')
4675 -- There is no "remainder" instruction on the PPC, so we have to do
4677 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
4679 remainderCode :: Width -> (Reg -> Reg -> Reg -> Instr)
4680 -> CmmExpr -> CmmExpr -> NatM Register
4681 remainderCode rep div x y = do
4682 (src1, code1) <- getSomeReg x
4683 (src2, code2) <- getSomeReg y
4684 let code dst = code1 `appOL` code2 `appOL` toOL [
4686 MULLW dst dst (RIReg src2),
4689 return (Any (intSize rep) code)
4691 #endif /* powerpc_TARGET_ARCH */
4694 -- -----------------------------------------------------------------------------
4695 -- Coercing to/from integer/floating-point...
4697 -- When going to integer, we truncate (round towards 0).
4699 -- @coerce(Int2FP|FP2Int)@ are more complicated integer/float
4700 -- conversions. We have to store temporaries in memory to move
4701 -- between the integer and the floating point register sets.
4703 -- @coerceDbl2Flt@ and @coerceFlt2Dbl@ are done this way because we
4704 -- pretend, on sparc at least, that double and float regs are seperate
4705 -- kinds, so the value has to be computed into one kind before being
4706 -- explicitly "converted" to live in the other kind.
4708 coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
4709 coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
4711 #if sparc_TARGET_ARCH
4712 coerceDbl2Flt :: CmmExpr -> NatM Register
4713 coerceFlt2Dbl :: CmmExpr -> NatM Register
4716 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4718 #if alpha_TARGET_ARCH
4721 = getRegister x `thenNat` \ register ->
4722 getNewRegNat IntRep `thenNat` \ reg ->
4724 code = registerCode register reg
4725 src = registerName register reg
4727 code__2 dst = code . mkSeqInstrs [
4729 LD TF dst (spRel 0),
4732 return (Any FF64 code__2)
4736 = getRegister x `thenNat` \ register ->
4737 getNewRegNat FF64 `thenNat` \ tmp ->
4739 code = registerCode register tmp
4740 src = registerName register tmp
4742 code__2 dst = code . mkSeqInstrs [
4744 ST TF tmp (spRel 0),
4747 return (Any IntRep code__2)
4749 #endif /* alpha_TARGET_ARCH */
4751 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4753 #if i386_TARGET_ARCH
4755 coerceInt2FP from to x = do
4756 (x_reg, x_code) <- getSomeReg x
4758 opc = case to of W32 -> GITOF; W64 -> GITOD
4759 code dst = x_code `snocOL` opc x_reg dst
4760 -- ToDo: works for non-II32 reps?
4761 return (Any (floatSize to) code)
4765 coerceFP2Int from to x = do
4766 (x_reg, x_code) <- getSomeReg x
4768 opc = case from of W32 -> GFTOI; W64 -> GDTOI
4769 code dst = x_code `snocOL` opc x_reg dst
4770 -- ToDo: works for non-II32 reps?
4772 return (Any (intSize to) code)
4774 #endif /* i386_TARGET_ARCH */
4776 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4778 #if x86_64_TARGET_ARCH
4780 coerceFP2Int from to x = do
4781 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
4783 opc = case from of W32 -> CVTTSS2SIQ; W64 -> CVTTSD2SIQ
4784 code dst = x_code `snocOL` opc x_op dst
4786 return (Any (intSize to) code) -- works even if the destination rep is <II32
4788 coerceInt2FP from to x = do
4789 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
4791 opc = case to of W32 -> CVTSI2SS; W64 -> CVTSI2SD
4792 code dst = x_code `snocOL` opc x_op dst
4794 return (Any (floatSize to) code) -- works even if the destination rep is <II32
4796 coerceFP2FP :: Width -> CmmExpr -> NatM Register
4797 coerceFP2FP to x = do
4798 (x_reg, x_code) <- getSomeReg x
4800 opc = case to of W32 -> CVTSD2SS; W64 -> CVTSS2SD
4801 code dst = x_code `snocOL` opc x_reg dst
4803 return (Any (floatSize to) code)
4806 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4808 #if sparc_TARGET_ARCH
4810 coerceInt2FP width1 width2 x = do
4811 (src, code) <- getSomeReg x
4813 code__2 dst = code `appOL` toOL [
4814 ST (intSize width1) src (spRel (-2)),
4815 LD (intSize width1) (spRel (-2)) dst,
4816 FxTOy (intSize width1) (floatSize width2) dst dst]
4817 return (Any (floatSize $ width2) code__2)
4820 coerceFP2Int width1 width2 x = do
4821 let pk = intSize width1
4822 fprep = floatSize width2
4824 (src, code) <- getSomeReg x
4825 reg <- getNewRegNat fprep
4826 tmp <- getNewRegNat pk
4828 code__2 dst = ASSERT(fprep == FF64 || fprep == FF32)
4830 FxTOy fprep pk src tmp,
4831 ST pk tmp (spRel (-2)),
4832 LD pk (spRel (-2)) dst]
4833 return (Any pk code__2)
4836 coerceDbl2Flt x = do
4837 (src, code) <- getSomeReg x
4838 return (Any FF32 (\dst -> code `snocOL` FxTOy FF64 FF32 src dst))
4841 coerceFlt2Dbl x = do
4842 (src, code) <- getSomeReg x
4843 return (Any FF64 (\dst -> code `snocOL` FxTOy FF32 FF64 src dst))
4845 #endif /* sparc_TARGET_ARCH */
4847 #if powerpc_TARGET_ARCH
4848 coerceInt2FP fromRep toRep x = do
4849 (src, code) <- getSomeReg x
4850 lbl <- getNewLabelNat
4851 itmp <- getNewRegNat II32
4852 ftmp <- getNewRegNat FF64
4853 dflags <- getDynFlagsNat
4854 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
4855 Amode addr addr_code <- getAmode dynRef
4857 code' dst = code `appOL` maybe_exts `appOL` toOL [
4860 CmmStaticLit (CmmInt 0x43300000 W32),
4861 CmmStaticLit (CmmInt 0x80000000 W32)],
4862 XORIS itmp src (ImmInt 0x8000),
4863 ST II32 itmp (spRel 3),
4864 LIS itmp (ImmInt 0x4330),
4865 ST II32 itmp (spRel 2),
4866 LD FF64 ftmp (spRel 2)
4867 ] `appOL` addr_code `appOL` toOL [
4869 FSUB FF64 dst ftmp dst
4870 ] `appOL` maybe_frsp dst
4872 maybe_exts = case fromRep of
4873 W8 -> unitOL $ EXTS II8 src src
4874 W16 -> unitOL $ EXTS II16 src src
4876 maybe_frsp dst = case toRep of
4877 W32 -> unitOL $ FRSP dst dst
4879 return (Any (floatSize toRep) code')
4881 coerceFP2Int fromRep toRep x = do
4882 -- the reps don't really matter: F*->FF64 and II32->I* are no-ops
4883 (src, code) <- getSomeReg x
4884 tmp <- getNewRegNat FF64
4886 code' dst = code `appOL` toOL [
4887 -- convert to int in FP reg
4889 -- store value (64bit) from FP to stack
4890 ST FF64 tmp (spRel 2),
4891 -- read low word of value (high word is undefined)
4892 LD II32 dst (spRel 3)]
4893 return (Any (intSize toRep) code')
4894 #endif /* powerpc_TARGET_ARCH */
4897 -- -----------------------------------------------------------------------------
4898 -- eXTRA_STK_ARGS_HERE
4900 -- We (allegedly) put the first six C-call arguments in registers;
4901 -- where do we start putting the rest of them?
4903 -- Moved from MachInstrs (SDM):
4905 #if alpha_TARGET_ARCH || sparc_TARGET_ARCH
4906 eXTRA_STK_ARGS_HERE :: Int
4908 = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???))