2 -- The above warning supression flag is a temporary kludge.
3 -- While working on this module you are encouraged to remove it and fix
4 -- any warnings in the module. See
5 -- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
8 -----------------------------------------------------------------------------
10 -- Generating machine code (instruction selection)
12 -- (c) The University of Glasgow 1996-2004
14 -----------------------------------------------------------------------------
16 -- This is a big module, but, if you pay attention to
17 -- (a) the sectioning, (b) the type signatures, and
18 -- (c) the #if blah_TARGET_ARCH} things, the
19 -- structure should not be too overwhelming.
21 module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where
23 #include "HsVersions.h"
24 #include "nativeGen/NCG.h"
31 import PositionIndependentCode
32 import RegAllocInfo ( mkBranchInstr, mkRegRegMoveInstr )
36 -- Our intermediate code:
38 import PprCmm ( pprExpr )
41 import ClosureInfo ( C_SRT(..) )
44 import StaticFlags ( opt_PIC )
45 import ForeignCall ( CCallConv(..) )
48 import qualified Outputable as O
51 import FastBool ( isFastTrue )
52 import Constants ( wORD_SIZE )
54 import Debug.Trace ( trace )
56 import Control.Monad ( mapAndUnzipM )
57 import Data.Maybe ( fromJust )
63 -- -----------------------------------------------------------------------------
64 -- Top-level of the instruction selector
66 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
67 -- They are really trees of insns to facilitate fast appending, where a
68 -- left-to-right traversal (pre-order?) yields the insns in the correct
71 type InstrBlock = OrdList Instr
73 cmmTopCodeGen :: RawCmmTop -> NatM [NatCmmTop]
74 cmmTopCodeGen (CmmProc info lab params (ListGraph blocks)) = do
75 (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
76 picBaseMb <- getPicBaseMaybeNat
77 let proc = CmmProc info lab params (ListGraph $ concat nat_blocks)
78 tops = proc : concat statics
80 Just picBase -> initializePicBase picBase tops
81 Nothing -> return tops
83 cmmTopCodeGen (CmmData sec dat) = do
84 return [CmmData sec dat] -- no translation, we just use CmmStatic
86 basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop])
87 basicBlockCodeGen (BasicBlock id stmts) = do
88 instrs <- stmtsToInstrs stmts
89 -- code generation may introduce new basic block boundaries, which
90 -- are indicated by the NEWBLOCK instruction. We must split up the
91 -- instruction stream into basic blocks again. Also, we extract
94 (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
96 mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
97 = ([], BasicBlock id instrs : blocks, statics)
98 mkBlocks (LDATA sec dat) (instrs,blocks,statics)
99 = (instrs, blocks, CmmData sec dat:statics)
100 mkBlocks instr (instrs,blocks,statics)
101 = (instr:instrs, blocks, statics)
103 return (BasicBlock id top : other_blocks, statics)
105 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
107 = do instrss <- mapM stmtToInstrs stmts
108 return (concatOL instrss)
110 stmtToInstrs :: CmmStmt -> NatM InstrBlock
111 stmtToInstrs stmt = case stmt of
112 CmmNop -> return nilOL
113 CmmComment s -> return (unitOL (COMMENT s))
116 | isFloatType ty -> assignReg_FltCode size reg src
117 #if WORD_SIZE_IN_BITS==32
118 | isWord64 ty -> assignReg_I64Code reg src
120 | otherwise -> assignReg_IntCode size reg src
121 where ty = cmmRegType reg
122 size = cmmTypeSize ty
125 | isFloatType ty -> assignMem_FltCode size addr src
126 #if WORD_SIZE_IN_BITS==32
127 | isWord64 ty -> assignMem_I64Code addr src
129 | otherwise -> assignMem_IntCode size addr src
130 where ty = cmmExprType src
131 size = cmmTypeSize ty
133 CmmCall target result_regs args _ _
134 -> genCCall target result_regs args
136 CmmBranch id -> genBranch id
137 CmmCondBranch arg id -> genCondJump id arg
138 CmmSwitch arg ids -> genSwitch arg ids
139 CmmJump arg params -> genJump arg
141 panic "stmtToInstrs: return statement should have been cps'd away"
143 -- -----------------------------------------------------------------------------
144 -- General things for putting together code sequences
146 -- Expand CmmRegOff. ToDo: should we do it this way around, or convert
147 -- CmmExprs into CmmRegOff?
148 mangleIndexTree :: CmmExpr -> CmmExpr
149 mangleIndexTree (CmmRegOff reg off)
150 = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
151 where width = typeWidth (cmmRegType reg)
153 -- -----------------------------------------------------------------------------
154 -- Code gen for 64-bit arithmetic on 32-bit platforms
157 Simple support for generating 64-bit code (ie, 64 bit values and 64
158 bit assignments) on 32-bit platforms. Unlike the main code generator
159 we merely shoot for generating working code as simply as possible, and
160 pay little attention to code quality. Specifically, there is no
161 attempt to deal cleverly with the fixed-vs-floating register
162 distinction; all values are generated into (pairs of) floating
163 registers, even if this would mean some redundant reg-reg moves as a
164 result. Only one of the VRegUniques is returned, since it will be
165 of the VRegUniqueLo form, and the upper-half VReg can be determined
166 by applying getHiVRegFromLo to it.
169 data ChildCode64 -- a.k.a "Register64"
172 Reg -- the lower 32-bit temporary which contains the
173 -- result; use getHiVRegFromLo to find the other
174 -- VRegUnique. Rules of this simplified insn
175 -- selection game are therefore that the returned
176 -- Reg may be modified
178 #if WORD_SIZE_IN_BITS==32
179 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
180 assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
183 #ifndef x86_64_TARGET_ARCH
184 iselExpr64 :: CmmExpr -> NatM ChildCode64
187 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
191 assignMem_I64Code addrTree valueTree = do
192 Amode addr addr_code <- getAmode addrTree
193 ChildCode64 vcode rlo <- iselExpr64 valueTree
195 rhi = getHiVRegFromLo rlo
197 -- Little-endian store
198 mov_lo = MOV II32 (OpReg rlo) (OpAddr addr)
199 mov_hi = MOV II32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
201 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
204 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
205 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
207 r_dst_lo = mkVReg u_dst II32
208 r_dst_hi = getHiVRegFromLo r_dst_lo
209 r_src_hi = getHiVRegFromLo r_src_lo
210 mov_lo = MOV II32 (OpReg r_src_lo) (OpReg r_dst_lo)
211 mov_hi = MOV II32 (OpReg r_src_hi) (OpReg r_dst_hi)
214 vcode `snocOL` mov_lo `snocOL` mov_hi
217 assignReg_I64Code lvalue valueTree
218 = panic "assignReg_I64Code(i386): invalid lvalue"
222 iselExpr64 (CmmLit (CmmInt i _)) = do
223 (rlo,rhi) <- getNewRegPairNat II32
225 r = fromIntegral (fromIntegral i :: Word32)
226 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
228 MOV II32 (OpImm (ImmInteger r)) (OpReg rlo),
229 MOV II32 (OpImm (ImmInteger q)) (OpReg rhi)
232 return (ChildCode64 code rlo)
234 iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
235 Amode addr addr_code <- getAmode addrTree
236 (rlo,rhi) <- getNewRegPairNat II32
238 mov_lo = MOV II32 (OpAddr addr) (OpReg rlo)
239 mov_hi = MOV II32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
242 ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
246 iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
247 = return (ChildCode64 nilOL (mkVReg vu II32))
249 -- we handle addition, but rather badly
250 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
251 ChildCode64 code1 r1lo <- iselExpr64 e1
252 (rlo,rhi) <- getNewRegPairNat II32
254 r = fromIntegral (fromIntegral i :: Word32)
255 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
256 r1hi = getHiVRegFromLo r1lo
258 toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
259 ADD II32 (OpImm (ImmInteger r)) (OpReg rlo),
260 MOV II32 (OpReg r1hi) (OpReg rhi),
261 ADC II32 (OpImm (ImmInteger q)) (OpReg rhi) ]
263 return (ChildCode64 code rlo)
265 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
266 ChildCode64 code1 r1lo <- iselExpr64 e1
267 ChildCode64 code2 r2lo <- iselExpr64 e2
268 (rlo,rhi) <- getNewRegPairNat II32
270 r1hi = getHiVRegFromLo r1lo
271 r2hi = getHiVRegFromLo r2lo
274 toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
275 ADD II32 (OpReg r2lo) (OpReg rlo),
276 MOV II32 (OpReg r1hi) (OpReg rhi),
277 ADC II32 (OpReg r2hi) (OpReg rhi) ]
279 return (ChildCode64 code rlo)
281 iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do
283 r_dst_lo <- getNewRegNat II32
284 let r_dst_hi = getHiVRegFromLo r_dst_lo
287 ChildCode64 (code `snocOL`
288 MOV II32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
293 = pprPanic "iselExpr64(i386)" (ppr expr)
295 #endif /* i386_TARGET_ARCH */
297 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
299 #if sparc_TARGET_ARCH
301 assignMem_I64Code addrTree valueTree = do
302 Amode addr addr_code <- getAmode addrTree
303 ChildCode64 vcode rlo <- iselExpr64 valueTree
304 (src, code) <- getSomeReg addrTree
306 rhi = getHiVRegFromLo rlo
308 mov_hi = ST II32 rhi (AddrRegImm src (ImmInt 0))
309 mov_lo = ST II32 rlo (AddrRegImm src (ImmInt 4))
310 return (vcode `appOL` code `snocOL` mov_hi `snocOL` mov_lo)
312 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
313 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
315 r_dst_lo = mkVReg u_dst (cmmTypeSize pk)
316 r_dst_hi = getHiVRegFromLo r_dst_lo
317 r_src_hi = getHiVRegFromLo r_src_lo
318 mov_lo = mkMOV r_src_lo r_dst_lo
319 mov_hi = mkMOV r_src_hi r_dst_hi
320 mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
321 return (vcode `snocOL` mov_hi `snocOL` mov_lo)
322 assignReg_I64Code lvalue valueTree
323 = panic "assignReg_I64Code(sparc): invalid lvalue"
326 -- Don't delete this -- it's very handy for debugging.
328 -- | trace ("iselExpr64: " ++ showSDoc (ppr expr)) False
329 -- = panic "iselExpr64(???)"
331 iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
332 Amode (AddrRegReg r1 r2) addr_code <- getAmode addrTree
333 rlo <- getNewRegNat II32
334 let rhi = getHiVRegFromLo rlo
335 mov_hi = LD II32 (AddrRegImm r1 (ImmInt 0)) rhi
336 mov_lo = LD II32 (AddrRegImm r1 (ImmInt 4)) rlo
338 ChildCode64 (addr_code `snocOL` mov_hi `snocOL` mov_lo)
342 iselExpr64 (CmmReg (CmmLocal (LocalReg uq ty))) | isWord64 ty = do
343 r_dst_lo <- getNewRegNat II32
344 let r_dst_hi = getHiVRegFromLo r_dst_lo
345 r_src_lo = mkVReg uq II32
346 r_src_hi = getHiVRegFromLo r_src_lo
347 mov_lo = mkMOV r_src_lo r_dst_lo
348 mov_hi = mkMOV r_src_hi r_dst_hi
349 mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
351 ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo
355 = pprPanic "iselExpr64(sparc)" (ppr expr)
357 #endif /* sparc_TARGET_ARCH */
359 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
361 #if powerpc_TARGET_ARCH
363 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
364 getI64Amodes addrTree = do
365 Amode hi_addr addr_code <- getAmode addrTree
366 case addrOffset hi_addr 4 of
367 Just lo_addr -> return (hi_addr, lo_addr, addr_code)
368 Nothing -> do (hi_ptr, code) <- getSomeReg addrTree
369 return (AddrRegImm hi_ptr (ImmInt 0),
370 AddrRegImm hi_ptr (ImmInt 4),
373 assignMem_I64Code addrTree valueTree = do
374 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
375 ChildCode64 vcode rlo <- iselExpr64 valueTree
377 rhi = getHiVRegFromLo rlo
380 mov_hi = ST II32 rhi hi_addr
381 mov_lo = ST II32 rlo lo_addr
383 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
385 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
386 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
388 r_dst_lo = mkVReg u_dst II32
389 r_dst_hi = getHiVRegFromLo r_dst_lo
390 r_src_hi = getHiVRegFromLo r_src_lo
391 mov_lo = MR r_dst_lo r_src_lo
392 mov_hi = MR r_dst_hi r_src_hi
395 vcode `snocOL` mov_lo `snocOL` mov_hi
398 assignReg_I64Code lvalue valueTree
399 = panic "assignReg_I64Code(powerpc): invalid lvalue"
402 -- Don't delete this -- it's very handy for debugging.
404 -- | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
405 -- = panic "iselExpr64(???)"
407 iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
408 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
409 (rlo, rhi) <- getNewRegPairNat II32
410 let mov_hi = LD II32 rhi hi_addr
411 mov_lo = LD II32 rlo lo_addr
412 return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
415 iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
416 = return (ChildCode64 nilOL (mkVReg vu II32))
418 iselExpr64 (CmmLit (CmmInt i _)) = do
419 (rlo,rhi) <- getNewRegPairNat II32
421 half0 = fromIntegral (fromIntegral i :: Word16)
422 half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
423 half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
424 half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
427 LIS rlo (ImmInt half1),
428 OR rlo rlo (RIImm $ ImmInt half0),
429 LIS rhi (ImmInt half3),
430 OR rlo rlo (RIImm $ ImmInt half2)
433 return (ChildCode64 code rlo)
435 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
436 ChildCode64 code1 r1lo <- iselExpr64 e1
437 ChildCode64 code2 r2lo <- iselExpr64 e2
438 (rlo,rhi) <- getNewRegPairNat II32
440 r1hi = getHiVRegFromLo r1lo
441 r2hi = getHiVRegFromLo r2lo
444 toOL [ ADDC rlo r1lo r2lo,
447 return (ChildCode64 code rlo)
449 iselExpr64 (CmmMachOp (MO_UU_Conv W32 W64) [expr]) = do
450 (expr_reg,expr_code) <- getSomeReg expr
451 (rlo, rhi) <- getNewRegPairNat II32
452 let mov_hi = LI rhi (ImmInt 0)
453 mov_lo = MR rlo expr_reg
454 return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
457 = pprPanic "iselExpr64(powerpc)" (ppr expr)
459 #endif /* powerpc_TARGET_ARCH */
462 -- -----------------------------------------------------------------------------
463 -- The 'Register' type
465 -- 'Register's passed up the tree. If the stix code forces the register
466 -- to live in a pre-decided machine register, it comes out as @Fixed@;
467 -- otherwise, it comes out as @Any@, and the parent can decide which
468 -- register to put it in.
471 = Fixed Size Reg InstrBlock
472 | Any Size (Reg -> InstrBlock)
474 swizzleRegisterRep :: Register -> Size -> Register
475 -- Change the width; it's a no-op
476 swizzleRegisterRep (Fixed _ reg code) size = Fixed size reg code
477 swizzleRegisterRep (Any _ codefn) size = Any size codefn
480 -- -----------------------------------------------------------------------------
481 -- Utils based on getRegister, below
483 -- The dual to getAnyReg: compute an expression into a register, but
484 -- we don't mind which one it is.
485 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
487 r <- getRegister expr
490 tmp <- getNewRegNat rep
491 return (tmp, code tmp)
495 -- -----------------------------------------------------------------------------
496 -- Grab the Reg for a CmmReg
498 getRegisterReg :: CmmReg -> Reg
500 getRegisterReg (CmmLocal (LocalReg u pk))
501 = mkVReg u (cmmTypeSize pk)
503 getRegisterReg (CmmGlobal mid)
504 = case get_GlobalReg_reg_or_addr mid of
505 Left (RealReg rrno) -> RealReg rrno
506 _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
507 -- By this stage, the only MagicIds remaining should be the
508 -- ones which map to a real machine register on this
509 -- platform. Hence ...
512 -- -----------------------------------------------------------------------------
513 -- Generate code to get a subtree into a Register
515 -- Don't delete this -- it's very handy for debugging.
517 -- | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False
518 -- = panic "getRegister(???)"
520 getRegister :: CmmExpr -> NatM Register
522 #if !x86_64_TARGET_ARCH
523 -- on x86_64, we have %rip for PicBaseReg, but it's not a full-featured
524 -- register, it can only be used for rip-relative addressing.
525 getRegister (CmmReg (CmmGlobal PicBaseReg))
527 reg <- getPicBaseNat wordSize
528 return (Fixed wordSize reg nilOL)
531 getRegister (CmmReg reg)
532 = return (Fixed (cmmTypeSize (cmmRegType reg))
533 (getRegisterReg reg) nilOL)
535 getRegister tree@(CmmRegOff _ _)
536 = getRegister (mangleIndexTree tree)
539 #if WORD_SIZE_IN_BITS==32
540 -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
541 -- TO_W_(x), TO_W_(x >> 32)
543 getRegister (CmmMachOp (MO_UU_Conv W64 W32)
544 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
545 ChildCode64 code rlo <- iselExpr64 x
546 return $ Fixed II32 (getHiVRegFromLo rlo) code
548 getRegister (CmmMachOp (MO_SS_Conv W64 W32)
549 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
550 ChildCode64 code rlo <- iselExpr64 x
551 return $ Fixed II32 (getHiVRegFromLo rlo) code
553 getRegister (CmmMachOp (MO_UU_Conv W64 W32) [x]) = do
554 ChildCode64 code rlo <- iselExpr64 x
555 return $ Fixed II32 rlo code
557 getRegister (CmmMachOp (MO_SS_Conv W64 W32) [x]) = do
558 ChildCode64 code rlo <- iselExpr64 x
559 return $ Fixed II32 rlo code
563 -- end of machine-"independent" bit; here we go on the rest...
565 #if alpha_TARGET_ARCH
567 getRegister (StDouble d)
568 = getBlockIdNat `thenNat` \ lbl ->
569 getNewRegNat PtrRep `thenNat` \ tmp ->
570 let code dst = mkSeqInstrs [
571 LDATA RoDataSegment lbl [
572 DATA TF [ImmLab (rational d)]
574 LDA tmp (AddrImm (ImmCLbl lbl)),
575 LD TF dst (AddrReg tmp)]
577 return (Any FF64 code)
579 getRegister (StPrim primop [x]) -- unary PrimOps
581 IntNegOp -> trivialUCode (NEG Q False) x
583 NotOp -> trivialUCode NOT x
585 FloatNegOp -> trivialUFCode FloatRep (FNEG TF) x
586 DoubleNegOp -> trivialUFCode FF64 (FNEG TF) x
588 OrdOp -> coerceIntCode IntRep x
591 Float2IntOp -> coerceFP2Int x
592 Int2FloatOp -> coerceInt2FP pr x
593 Double2IntOp -> coerceFP2Int x
594 Int2DoubleOp -> coerceInt2FP pr x
596 Double2FloatOp -> coerceFltCode x
597 Float2DoubleOp -> coerceFltCode x
599 other_op -> getRegister (StCall fn CCallConv FF64 [x])
601 fn = case other_op of
602 FloatExpOp -> fsLit "exp"
603 FloatLogOp -> fsLit "log"
604 FloatSqrtOp -> fsLit "sqrt"
605 FloatSinOp -> fsLit "sin"
606 FloatCosOp -> fsLit "cos"
607 FloatTanOp -> fsLit "tan"
608 FloatAsinOp -> fsLit "asin"
609 FloatAcosOp -> fsLit "acos"
610 FloatAtanOp -> fsLit "atan"
611 FloatSinhOp -> fsLit "sinh"
612 FloatCoshOp -> fsLit "cosh"
613 FloatTanhOp -> fsLit "tanh"
614 DoubleExpOp -> fsLit "exp"
615 DoubleLogOp -> fsLit "log"
616 DoubleSqrtOp -> fsLit "sqrt"
617 DoubleSinOp -> fsLit "sin"
618 DoubleCosOp -> fsLit "cos"
619 DoubleTanOp -> fsLit "tan"
620 DoubleAsinOp -> fsLit "asin"
621 DoubleAcosOp -> fsLit "acos"
622 DoubleAtanOp -> fsLit "atan"
623 DoubleSinhOp -> fsLit "sinh"
624 DoubleCoshOp -> fsLit "cosh"
625 DoubleTanhOp -> fsLit "tanh"
627 pr = panic "MachCode.getRegister: no primrep needed for Alpha"
629 getRegister (StPrim primop [x, y]) -- dyadic PrimOps
631 CharGtOp -> trivialCode (CMP LTT) y x
632 CharGeOp -> trivialCode (CMP LE) y x
633 CharEqOp -> trivialCode (CMP EQQ) x y
634 CharNeOp -> int_NE_code x y
635 CharLtOp -> trivialCode (CMP LTT) x y
636 CharLeOp -> trivialCode (CMP LE) x y
638 IntGtOp -> trivialCode (CMP LTT) y x
639 IntGeOp -> trivialCode (CMP LE) y x
640 IntEqOp -> trivialCode (CMP EQQ) x y
641 IntNeOp -> int_NE_code x y
642 IntLtOp -> trivialCode (CMP LTT) x y
643 IntLeOp -> trivialCode (CMP LE) x y
645 WordGtOp -> trivialCode (CMP ULT) y x
646 WordGeOp -> trivialCode (CMP ULE) x y
647 WordEqOp -> trivialCode (CMP EQQ) x y
648 WordNeOp -> int_NE_code x y
649 WordLtOp -> trivialCode (CMP ULT) x y
650 WordLeOp -> trivialCode (CMP ULE) x y
652 AddrGtOp -> trivialCode (CMP ULT) y x
653 AddrGeOp -> trivialCode (CMP ULE) y x
654 AddrEqOp -> trivialCode (CMP EQQ) x y
655 AddrNeOp -> int_NE_code x y
656 AddrLtOp -> trivialCode (CMP ULT) x y
657 AddrLeOp -> trivialCode (CMP ULE) x y
659 FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
660 FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
661 FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
662 FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
663 FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
664 FloatLeOp -> cmpF_code (FCMP TF LE) NE x y
666 DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
667 DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
668 DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
669 DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
670 DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
671 DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y
673 IntAddOp -> trivialCode (ADD Q False) x y
674 IntSubOp -> trivialCode (SUB Q False) x y
675 IntMulOp -> trivialCode (MUL Q False) x y
676 IntQuotOp -> trivialCode (DIV Q False) x y
677 IntRemOp -> trivialCode (REM Q False) x y
679 WordAddOp -> trivialCode (ADD Q False) x y
680 WordSubOp -> trivialCode (SUB Q False) x y
681 WordMulOp -> trivialCode (MUL Q False) x y
682 WordQuotOp -> trivialCode (DIV Q True) x y
683 WordRemOp -> trivialCode (REM Q True) x y
685 FloatAddOp -> trivialFCode W32 (FADD TF) x y
686 FloatSubOp -> trivialFCode W32 (FSUB TF) x y
687 FloatMulOp -> trivialFCode W32 (FMUL TF) x y
688 FloatDivOp -> trivialFCode W32 (FDIV TF) x y
690 DoubleAddOp -> trivialFCode W64 (FADD TF) x y
691 DoubleSubOp -> trivialFCode W64 (FSUB TF) x y
692 DoubleMulOp -> trivialFCode W64 (FMUL TF) x y
693 DoubleDivOp -> trivialFCode W64 (FDIV TF) x y
695 AddrAddOp -> trivialCode (ADD Q False) x y
696 AddrSubOp -> trivialCode (SUB Q False) x y
697 AddrRemOp -> trivialCode (REM Q True) x y
699 AndOp -> trivialCode AND x y
700 OrOp -> trivialCode OR x y
701 XorOp -> trivialCode XOR x y
702 SllOp -> trivialCode SLL x y
703 SrlOp -> trivialCode SRL x y
705 ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
706 ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
707 ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"
709 FloatPowerOp -> getRegister (StCall (fsLit "pow") CCallConv FF64 [x,y])
710 DoublePowerOp -> getRegister (StCall (fsLit "pow") CCallConv FF64 [x,y])
712 {- ------------------------------------------------------------
713 Some bizarre special code for getting condition codes into
714 registers. Integer non-equality is a test for equality
715 followed by an XOR with 1. (Integer comparisons always set
716 the result register to 0 or 1.) Floating point comparisons of
717 any kind leave the result in a floating point register, so we
718 need to wrangle an integer register out of things.
720 int_NE_code :: StixTree -> StixTree -> NatM Register
723 = trivialCode (CMP EQQ) x y `thenNat` \ register ->
724 getNewRegNat IntRep `thenNat` \ tmp ->
726 code = registerCode register tmp
727 src = registerName register tmp
728 code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
730 return (Any IntRep code__2)
732 {- ------------------------------------------------------------
733 Comments for int_NE_code also apply to cmpF_code
736 :: (Reg -> Reg -> Reg -> Instr)
738 -> StixTree -> StixTree
741 cmpF_code instr cond x y
742 = trivialFCode pr instr x y `thenNat` \ register ->
743 getNewRegNat FF64 `thenNat` \ tmp ->
744 getBlockIdNat `thenNat` \ lbl ->
746 code = registerCode register tmp
747 result = registerName register tmp
749 code__2 dst = code . mkSeqInstrs [
750 OR zeroh (RIImm (ImmInt 1)) dst,
751 BF cond result (ImmCLbl lbl),
752 OR zeroh (RIReg zeroh) dst,
755 return (Any IntRep code__2)
757 pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
758 ------------------------------------------------------------
760 getRegister (CmmLoad pk mem)
761 = getAmode mem `thenNat` \ amode ->
763 code = amodeCode amode
764 src = amodeAddr amode
765 size = primRepToSize pk
766 code__2 dst = code . mkSeqInstr (LD size dst src)
768 return (Any pk code__2)
770 getRegister (StInt i)
773 code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
775 return (Any IntRep code)
778 code dst = mkSeqInstr (LDI Q dst src)
780 return (Any IntRep code)
782 src = ImmInt (fromInteger i)
787 code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
789 return (Any PtrRep code)
792 imm__2 = case imm of Just x -> x
794 #endif /* alpha_TARGET_ARCH */
796 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
800 getRegister (CmmLit (CmmFloat f W32)) = do
801 lbl <- getNewLabelNat
802 dflags <- getDynFlagsNat
803 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
804 Amode addr addr_code <- getAmode dynRef
808 CmmStaticLit (CmmFloat f W32)]
809 `consOL` (addr_code `snocOL`
812 return (Any FF32 code)
815 getRegister (CmmLit (CmmFloat d W64))
817 = let code dst = unitOL (GLDZ dst)
818 in return (Any FF64 code)
821 = let code dst = unitOL (GLD1 dst)
822 in return (Any FF64 code)
825 lbl <- getNewLabelNat
826 dflags <- getDynFlagsNat
827 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
828 Amode addr addr_code <- getAmode dynRef
832 CmmStaticLit (CmmFloat d W64)]
833 `consOL` (addr_code `snocOL`
836 return (Any FF64 code)
838 #endif /* i386_TARGET_ARCH */
840 #if x86_64_TARGET_ARCH
842 getRegister (CmmLit (CmmFloat 0.0 w)) = do
843 let size = floatSize w
844 code dst = unitOL (XOR size (OpReg dst) (OpReg dst))
845 -- I don't know why there are xorpd, xorps, and pxor instructions.
846 -- They all appear to do the same thing --SDM
847 return (Any size code)
849 getRegister (CmmLit (CmmFloat f w)) = do
850 lbl <- getNewLabelNat
851 let code dst = toOL [
854 CmmStaticLit (CmmFloat f w)],
855 MOV size (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
858 return (Any size code)
859 where size = floatSize w
861 #endif /* x86_64_TARGET_ARCH */
863 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
865 -- catch simple cases of zero- or sign-extended load
866 getRegister (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _]) = do
867 code <- intLoadCode (MOVZxL II8) addr
868 return (Any II32 code)
870 getRegister (CmmMachOp (MO_SS_Conv W8 W32) [CmmLoad addr _]) = do
871 code <- intLoadCode (MOVSxL II8) addr
872 return (Any II32 code)
874 getRegister (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad addr _]) = do
875 code <- intLoadCode (MOVZxL II16) addr
876 return (Any II32 code)
878 getRegister (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad addr _]) = do
879 code <- intLoadCode (MOVSxL II16) addr
880 return (Any II32 code)
884 #if x86_64_TARGET_ARCH
886 -- catch simple cases of zero- or sign-extended load
887 getRegister (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad addr _]) = do
888 code <- intLoadCode (MOVZxL II8) addr
889 return (Any II64 code)
891 getRegister (CmmMachOp (MO_SS_Conv W8 W64) [CmmLoad addr _]) = do
892 code <- intLoadCode (MOVSxL II8) addr
893 return (Any II64 code)
895 getRegister (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad addr _]) = do
896 code <- intLoadCode (MOVZxL II16) addr
897 return (Any II64 code)
899 getRegister (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad addr _]) = do
900 code <- intLoadCode (MOVSxL II16) addr
901 return (Any II64 code)
903 getRegister (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad addr _]) = do
904 code <- intLoadCode (MOV II32) addr -- 32-bit loads zero-extend
905 return (Any II64 code)
907 getRegister (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad addr _]) = do
908 code <- intLoadCode (MOVSxL II32) addr
909 return (Any II64 code)
913 #if x86_64_TARGET_ARCH
914 getRegister (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
915 CmmLit displacement])
916 = return $ Any II64 (\dst -> unitOL $
917 LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
920 #if x86_64_TARGET_ARCH
921 getRegister (CmmMachOp (MO_F_Neg W32) [x]) = do
922 x_code <- getAnyReg x
923 lbl <- getNewLabelNat
925 code dst = x_code dst `appOL` toOL [
926 -- This is how gcc does it, so it can't be that bad:
927 LDATA ReadOnlyData16 [
930 CmmStaticLit (CmmInt 0x80000000 W32),
931 CmmStaticLit (CmmInt 0 W32),
932 CmmStaticLit (CmmInt 0 W32),
933 CmmStaticLit (CmmInt 0 W32)
935 XOR FF32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
936 -- xorps, so we need the 128-bit constant
937 -- ToDo: rip-relative
940 return (Any FF32 code)
942 getRegister (CmmMachOp (MO_F_Neg W64) [x]) = do
943 x_code <- getAnyReg x
944 lbl <- getNewLabelNat
946 -- This is how gcc does it, so it can't be that bad:
947 code dst = x_code dst `appOL` toOL [
948 LDATA ReadOnlyData16 [
951 CmmStaticLit (CmmInt 0x8000000000000000 W64),
952 CmmStaticLit (CmmInt 0 W64)
954 -- gcc puts an unpck here. Wonder if we need it.
955 XOR FF64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
956 -- xorpd, so we need the 128-bit constant
959 return (Any FF64 code)
962 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
964 getRegister (CmmMachOp mop [x]) -- unary MachOps
967 MO_F_Neg W32 -> trivialUFCode FF32 (GNEG FF32) x
968 MO_F_Neg W64 -> trivialUFCode FF64 (GNEG FF64) x
971 MO_S_Neg w -> triv_ucode NEGI (intSize w)
972 MO_F_Neg w -> triv_ucode NEGI (floatSize w)
973 MO_Not w -> triv_ucode NOT (intSize w)
976 MO_UU_Conv W32 W8 -> toI8Reg W32 x
977 MO_SS_Conv W32 W8 -> toI8Reg W32 x
978 MO_UU_Conv W16 W8 -> toI8Reg W16 x
979 MO_SS_Conv W16 W8 -> toI8Reg W16 x
980 MO_UU_Conv W32 W16 -> toI16Reg W32 x
981 MO_SS_Conv W32 W16 -> toI16Reg W32 x
983 #if x86_64_TARGET_ARCH
984 MO_UU_Conv W64 W32 -> conversionNop II64 x
985 MO_SS_Conv W64 W32 -> conversionNop II64 x
986 MO_UU_Conv W64 W16 -> toI16Reg W64 x
987 MO_SS_Conv W64 W16 -> toI16Reg W64 x
988 MO_UU_Conv W64 W8 -> toI8Reg W64 x
989 MO_SS_Conv W64 W8 -> toI8Reg W64 x
992 MO_UU_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
993 MO_SS_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
996 MO_UU_Conv W8 W32 -> integerExtend W8 W32 MOVZxL x
997 MO_UU_Conv W16 W32 -> integerExtend W16 W32 MOVZxL x
998 MO_UU_Conv W8 W16 -> integerExtend W8 W16 MOVZxL x
1000 MO_SS_Conv W8 W32 -> integerExtend W8 W32 MOVSxL x
1001 MO_SS_Conv W16 W32 -> integerExtend W16 W32 MOVSxL x
1002 MO_SS_Conv W8 W16 -> integerExtend W8 W16 MOVSxL x
1004 #if x86_64_TARGET_ARCH
1005 MO_UU_Conv W8 W64 -> integerExtend W8 W64 MOVZxL x
1006 MO_UU_Conv W16 W64 -> integerExtend W16 W64 MOVZxL x
1007 MO_UU_Conv W32 W64 -> integerExtend W32 W64 MOVZxL x
1008 MO_SS_Conv W8 W64 -> integerExtend W8 W64 MOVSxL x
1009 MO_SS_Conv W16 W64 -> integerExtend W16 W64 MOVSxL x
1010 MO_SS_Conv W32 W64 -> integerExtend W32 W64 MOVSxL x
1011 -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
1012 -- However, we don't want the register allocator to throw it
1013 -- away as an unnecessary reg-to-reg move, so we keep it in
1014 -- the form of a movzl and print it as a movl later.
1017 #if i386_TARGET_ARCH
1018 MO_FF_Conv W32 W64 -> conversionNop FF64 x
1019 MO_FF_Conv W64 W32 -> conversionNop FF32 x
1021 MO_FF_Conv W32 W64 -> coerceFP2FP W64 x
1022 MO_FF_Conv W64 W32 -> coerceFP2FP W32 x
1025 MO_FS_Conv from to -> coerceFP2Int from to x
1026 MO_SF_Conv from to -> coerceInt2FP from to x
1028 other -> pprPanic "getRegister" (pprMachOp mop)
1030 triv_ucode :: (Size -> Operand -> Instr) -> Size -> NatM Register
1031 triv_ucode instr size = trivialUCode size (instr size) x
1033 -- signed or unsigned extension.
1034 integerExtend :: Width -> Width
1035 -> (Size -> Operand -> Operand -> Instr)
1036 -> CmmExpr -> NatM Register
1037 integerExtend from to instr expr = do
1038 (reg,e_code) <- if from == W8 then getByteReg expr
1039 else getSomeReg expr
1043 instr (intSize from) (OpReg reg) (OpReg dst)
1044 return (Any (intSize to) code)
1046 toI8Reg :: Width -> CmmExpr -> NatM Register
1047 toI8Reg new_rep expr
1048 = do codefn <- getAnyReg expr
1049 return (Any (intSize new_rep) codefn)
1050 -- HACK: use getAnyReg to get a byte-addressable register.
1051 -- If the source was a Fixed register, this will add the
1052 -- mov instruction to put it into the desired destination.
1053 -- We're assuming that the destination won't be a fixed
1054 -- non-byte-addressable register; it won't be, because all
1055 -- fixed registers are word-sized.
1057 toI16Reg = toI8Reg -- for now
1059 conversionNop :: Size -> CmmExpr -> NatM Register
1060 conversionNop new_size expr
1061 = do e_code <- getRegister expr
1062 return (swizzleRegisterRep e_code new_size)
1065 getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
1067 MO_F_Eq w -> condFltReg EQQ x y
1068 MO_F_Ne w -> condFltReg NE x y
1069 MO_F_Gt w -> condFltReg GTT x y
1070 MO_F_Ge w -> condFltReg GE x y
1071 MO_F_Lt w -> condFltReg LTT x y
1072 MO_F_Le w -> condFltReg LE x y
1074 MO_Eq rep -> condIntReg EQQ x y
1075 MO_Ne rep -> condIntReg NE x y
1077 MO_S_Gt rep -> condIntReg GTT x y
1078 MO_S_Ge rep -> condIntReg GE x y
1079 MO_S_Lt rep -> condIntReg LTT x y
1080 MO_S_Le rep -> condIntReg LE x y
1082 MO_U_Gt rep -> condIntReg GU x y
1083 MO_U_Ge rep -> condIntReg GEU x y
1084 MO_U_Lt rep -> condIntReg LU x y
1085 MO_U_Le rep -> condIntReg LEU x y
1087 #if i386_TARGET_ARCH
1088 MO_F_Add w -> trivialFCode w GADD x y
1089 MO_F_Sub w -> trivialFCode w GSUB x y
1090 MO_F_Quot w -> trivialFCode w GDIV x y
1091 MO_F_Mul w -> trivialFCode w GMUL x y
1094 #if x86_64_TARGET_ARCH
1095 MO_F_Add w -> trivialFCode w ADD x y
1096 MO_F_Sub w -> trivialFCode w SUB x y
1097 MO_F_Quot w -> trivialFCode w FDIV x y
1098 MO_F_Mul w -> trivialFCode w MUL x y
1101 MO_Add rep -> add_code rep x y
1102 MO_Sub rep -> sub_code rep x y
1104 MO_S_Quot rep -> div_code rep True True x y
1105 MO_S_Rem rep -> div_code rep True False x y
1106 MO_U_Quot rep -> div_code rep False True x y
1107 MO_U_Rem rep -> div_code rep False False x y
1109 MO_S_MulMayOflo rep -> imulMayOflo rep x y
1111 MO_Mul rep -> triv_op rep IMUL
1112 MO_And rep -> triv_op rep AND
1113 MO_Or rep -> triv_op rep OR
1114 MO_Xor rep -> triv_op rep XOR
1116 {- Shift ops on x86s have constraints on their source, it
1117 either has to be Imm, CL or 1
1118 => trivialCode is not restrictive enough (sigh.)
1120 MO_Shl rep -> shift_code rep SHL x y {-False-}
1121 MO_U_Shr rep -> shift_code rep SHR x y {-False-}
1122 MO_S_Shr rep -> shift_code rep SAR x y {-False-}
1124 other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
1126 --------------------
1127 triv_op width instr = trivialCode width op (Just op) x y
1128 where op = instr (intSize width)
1130 imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
1131 imulMayOflo rep a b = do
1132 (a_reg, a_code) <- getNonClobberedReg a
1133 b_code <- getAnyReg b
1135 shift_amt = case rep of
1138 _ -> panic "shift_amt"
1141 code = a_code `appOL` b_code eax `appOL`
1143 IMUL2 size (OpReg a_reg), -- result in %edx:%eax
1144 SAR size (OpImm (ImmInt shift_amt)) (OpReg eax),
1145 -- sign extend lower part
1146 SUB size (OpReg edx) (OpReg eax)
1147 -- compare against upper
1148 -- eax==0 if high part == sign extended low part
1151 return (Fixed size eax code)
1153 --------------------
1155 -> (Size -> Operand -> Operand -> Instr)
1160 {- Case1: shift length as immediate -}
1161 shift_code width instr x y@(CmmLit lit) = do
1162 x_code <- getAnyReg x
1164 size = intSize width
1166 = x_code dst `snocOL`
1167 instr size (OpImm (litToImm lit)) (OpReg dst)
1169 return (Any size code)
1171 {- Case2: shift length is complex (non-immediate)
1172 * y must go in %ecx.
1173 * we cannot do y first *and* put its result in %ecx, because
1174 %ecx might be clobbered by x.
1175 * if we do y second, then x cannot be
1176 in a clobbered reg. Also, we cannot clobber x's reg
1177 with the instruction itself.
1179 - do y first, put its result in a fresh tmp, then copy it to %ecx later
1180 - do y second and put its result into %ecx. x gets placed in a fresh
1181 tmp. This is likely to be better, becuase the reg alloc can
1182 eliminate this reg->reg move here (it won't eliminate the other one,
1183 because the move is into the fixed %ecx).
1185 shift_code width instr x y{-amount-} = do
1186 x_code <- getAnyReg x
1187 let size = intSize width
1188 tmp <- getNewRegNat size
1189 y_code <- getAnyReg y
1191 code = x_code tmp `appOL`
1193 instr size (OpReg ecx) (OpReg tmp)
1195 return (Fixed size tmp code)
1197 --------------------
1198 add_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
1199 add_code rep x (CmmLit (CmmInt y _))
1200 | is32BitInteger y = add_int rep x y
1201 add_code rep x y = trivialCode rep (ADD size) (Just (ADD size)) x y
1202 where size = intSize rep
1204 --------------------
1205 sub_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
1206 sub_code rep x (CmmLit (CmmInt y _))
1207 | is32BitInteger (-y) = add_int rep x (-y)
1208 sub_code rep x y = trivialCode rep (SUB (intSize rep)) Nothing x y
1210 -- our three-operand add instruction:
1211 add_int width x y = do
1212 (x_reg, x_code) <- getSomeReg x
1214 size = intSize width
1215 imm = ImmInt (fromInteger y)
1219 (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
1222 return (Any size code)
1224 ----------------------
1225 div_code width signed quotient x y = do
1226 (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
1227 x_code <- getAnyReg x
1229 size = intSize width
1230 widen | signed = CLTD size
1231 | otherwise = XOR size (OpReg edx) (OpReg edx)
1233 instr | signed = IDIV
1236 code = y_code `appOL`
1238 toOL [widen, instr size y_op]
1240 result | quotient = eax
1244 return (Fixed size result code)
1247 getRegister (CmmLoad mem pk)
1250 Amode src mem_code <- getAmode mem
1252 size = cmmTypeSize pk
1253 code dst = mem_code `snocOL`
1254 IF_ARCH_i386(GLD size src dst,
1255 MOV size (OpAddr src) (OpReg dst))
1256 return (Any size code)
1258 #if i386_TARGET_ARCH
1259 getRegister (CmmLoad mem pk)
1262 code <- intLoadCode instr mem
1263 return (Any size code)
1265 width = typeWidth pk
1266 size = intSize width
1267 instr = case width of
1270 -- We always zero-extend 8-bit loads, if we
1271 -- can't think of anything better. This is because
1272 -- we can't guarantee access to an 8-bit variant of every register
1273 -- (esi and edi don't have 8-bit variants), so to make things
1274 -- simpler we do our 8-bit arithmetic with full 32-bit registers.
1277 #if x86_64_TARGET_ARCH
1278 -- Simpler memory load code on x86_64
1279 getRegister (CmmLoad mem pk)
1281 code <- intLoadCode (MOV size) mem
1282 return (Any size code)
1283 where size = intSize $ typeWidth pk
1286 getRegister (CmmLit (CmmInt 0 width))
1288 size = intSize width
1290 -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
1291 adj_size = case size of II64 -> II32; _ -> size
1292 size1 = IF_ARCH_i386( size, adj_size )
1294 = unitOL (XOR size1 (OpReg dst) (OpReg dst))
1296 return (Any size code)
1298 #if x86_64_TARGET_ARCH
1299 -- optimisation for loading small literals on x86_64: take advantage
1300 -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
1301 -- instruction forms are shorter.
1302 getRegister (CmmLit lit)
1303 | isWord64 (cmmLitType lit), not (isBigLit lit)
1306 code dst = unitOL (MOV II32 (OpImm imm) (OpReg dst))
1308 return (Any II64 code)
1310 isBigLit (CmmInt i _) = i < 0 || i > 0xffffffff
1312 -- note1: not the same as (not.is32BitLit), because that checks for
1313 -- signed literals that fit in 32 bits, but we want unsigned
1315 -- note2: all labels are small, because we're assuming the
1316 -- small memory model (see gcc docs, -mcmodel=small).
1319 getRegister (CmmLit lit)
1321 size = cmmTypeSize (cmmLitType lit)
1323 code dst = unitOL (MOV size (OpImm imm) (OpReg dst))
1325 return (Any size code)
1327 getRegister other = pprPanic "getRegister(x86)" (ppr other)
1330 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
1331 -> NatM (Reg -> InstrBlock)
1332 intLoadCode instr mem = do
1333 Amode src mem_code <- getAmode mem
1334 return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
1336 -- Compute an expression into *any* register, adding the appropriate
1337 -- move instruction if necessary.
1338 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
1340 r <- getRegister expr
1343 anyReg :: Register -> NatM (Reg -> InstrBlock)
1344 anyReg (Any _ code) = return code
1345 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
1347 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
1348 -- Fixed registers might not be byte-addressable, so we make sure we've
1349 -- got a temporary, inserting an extra reg copy if necessary.
1350 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
1351 #if x86_64_TARGET_ARCH
1352 getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
1354 getByteReg expr = do
1355 r <- getRegister expr
1358 tmp <- getNewRegNat rep
1359 return (tmp, code tmp)
1361 | isVirtualReg reg -> return (reg,code)
1363 tmp <- getNewRegNat rep
1364 return (tmp, code `snocOL` reg2reg rep reg tmp)
1365 -- ToDo: could optimise slightly by checking for byte-addressable
1366 -- real registers, but that will happen very rarely if at all.
1369 -- Another variant: this time we want the result in a register that cannot
1370 -- be modified by code to evaluate an arbitrary expression.
1371 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1372 getNonClobberedReg expr = do
1373 r <- getRegister expr
1376 tmp <- getNewRegNat rep
1377 return (tmp, code tmp)
1379 -- only free regs can be clobbered
1380 | RealReg rr <- reg, isFastTrue (freeReg rr) -> do
1381 tmp <- getNewRegNat rep
1382 return (tmp, code `snocOL` reg2reg rep reg tmp)
1386 reg2reg :: Size -> Reg -> Reg -> Instr
1387 reg2reg size src dst
1388 #if i386_TARGET_ARCH
1389 | isFloatSize size = GMOV src dst
1391 | otherwise = MOV size (OpReg src) (OpReg dst)
1393 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1395 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1397 #if sparc_TARGET_ARCH
1399 -- getRegister :: CmmExpr -> NatM Register
1401 -- Load a literal float into a float register.
1402 -- The actual literal is stored in a new data area, and we load it
1404 getRegister (CmmLit (CmmFloat f W32)) = do
1406 -- a label for the new data area
1407 lbl <- getNewLabelNat
1408 tmp <- getNewRegNat II32
1410 let code dst = toOL [
1414 CmmStaticLit (CmmFloat f W32)],
1417 SETHI (HI (ImmCLbl lbl)) tmp,
1418 LD II32 (AddrRegImm tmp (LO (ImmCLbl lbl))) dst]
1420 return (Any FF32 code)
1422 getRegister (CmmLit (CmmFloat d W64)) = do
1423 lbl <- getNewLabelNat
1424 tmp <- getNewRegNat II32
1425 let code dst = toOL [
1428 CmmStaticLit (CmmFloat d W64)],
1429 SETHI (HI (ImmCLbl lbl)) tmp,
1430 LD II64 (AddrRegImm tmp (LO (ImmCLbl lbl))) dst]
1431 return (Any FF64 code)
1433 getRegister (CmmMachOp mop [x]) -- unary MachOps
1435 MO_F_Neg W32 -> trivialUFCode FF32 (FNEG FF32) x
1436 MO_F_Neg W64 -> trivialUFCode FF64 (FNEG FF64) x
1438 MO_S_Neg rep -> trivialUCode (intSize rep) (SUB False False g0) x
1439 MO_Not rep -> trivialUCode (intSize rep) (XNOR False g0) x
1441 MO_FF_Conv W64 W32-> coerceDbl2Flt x
1442 MO_FF_Conv W32 W64-> coerceFlt2Dbl x
1444 MO_FS_Conv from to -> coerceFP2Int from to x
1445 MO_SF_Conv from to -> coerceInt2FP from to x
1447 -- Conversions which are a nop on sparc
1449 | from == to -> conversionNop (intSize to) x
1450 MO_UU_Conv W32 W8 -> trivialCode W8 (AND False) x (CmmLit (CmmInt 255 W8))
1451 MO_UU_Conv W32 to -> conversionNop (intSize to) x
1452 MO_SS_Conv W32 to -> conversionNop (intSize to) x
1455 MO_UU_Conv W8 W32 -> integerExtend False W8 W32 x
1456 MO_UU_Conv W16 W32 -> integerExtend False W16 W32 x
1457 MO_UU_Conv W8 W16 -> integerExtend False W8 W16 x
1458 MO_SS_Conv W16 W32 -> integerExtend True W16 W32 x
1460 other_op -> panic "Unknown unary mach op"
1463 integerExtend signed from to expr = do
1464 (reg, e_code) <- getSomeReg expr
1468 ((if signed then SRA else SRL)
1469 reg (RIImm (ImmInt 0)) dst)
1470 return (Any (intSize to) code)
1471 conversionNop new_rep expr
1472 = do e_code <- getRegister expr
1473 return (swizzleRegisterRep e_code new_rep)
1475 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1477 MO_Eq rep -> condIntReg EQQ x y
1478 MO_Ne rep -> condIntReg NE x y
1480 MO_S_Gt rep -> condIntReg GTT x y
1481 MO_S_Ge rep -> condIntReg GE x y
1482 MO_S_Lt rep -> condIntReg LTT x y
1483 MO_S_Le rep -> condIntReg LE x y
1485 MO_U_Gt W32 -> condIntReg GTT x y
1486 MO_U_Ge W32 -> condIntReg GE x y
1487 MO_U_Lt W32 -> condIntReg LTT x y
1488 MO_U_Le W32 -> condIntReg LE x y
1490 MO_U_Gt W16 -> condIntReg GU x y
1491 MO_U_Ge W16 -> condIntReg GEU x y
1492 MO_U_Lt W16 -> condIntReg LU x y
1493 MO_U_Le W16 -> condIntReg LEU x y
1495 MO_Add W32 -> trivialCode W32 (ADD False False) x y
1496 MO_Sub W32 -> trivialCode W32 (SUB False False) x y
1498 MO_S_MulMayOflo rep -> imulMayOflo rep x y
1500 -- ToDo: teach about V8+ SPARC div instructions
1501 MO_S_Quot W32 -> idiv FSLIT(".div") x y
1502 MO_S_Rem W32 -> idiv FSLIT(".rem") x y
1503 MO_U_Quot W32 -> idiv FSLIT(".udiv") x y
1504 MO_U_Rem W32 -> idiv FSLIT(".urem") x y
1507 MO_F_Eq w -> condFltReg EQQ x y
1508 MO_F_Ne w -> condFltReg NE x y
1510 MO_F_Gt w -> condFltReg GTT x y
1511 MO_F_Ge w -> condFltReg GE x y
1512 MO_F_Lt w -> condFltReg LTT x y
1513 MO_F_Le w -> condFltReg LE x y
1515 MO_F_Add w -> trivialFCode w FADD x y
1516 MO_F_Sub w -> trivialFCode w FSUB x y
1517 MO_F_Mul w -> trivialFCode w FMUL x y
1518 MO_F_Quot w -> trivialFCode w FDIV x y
1520 MO_And rep -> trivialCode rep (AND False) x y
1521 MO_Or rep -> trivialCode rep (OR False) x y
1522 MO_Xor rep -> trivialCode rep (XOR False) x y
1524 MO_Mul rep -> trivialCode rep (SMUL False) x y
1526 MO_Shl rep -> trivialCode rep SLL x y
1527 MO_U_Shr rep -> trivialCode rep SRL x y
1528 MO_S_Shr rep -> trivialCode rep SRA x y
1531 MO_F32_Pwr -> getRegister (StCall (Left (fsLit "pow")) CCallConv FF64
1532 [promote x, promote y])
1533 where promote x = CmmMachOp MO_F32_to_Dbl [x]
1534 MO_F64_Pwr -> getRegister (StCall (Left (fsLit "pow")) CCallConv FF64
1537 other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
1539 --idiv fn x y = getRegister (StCall (Left fn) CCallConv II32 [x, y])
1541 --------------------
1542 imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
1543 imulMayOflo rep a b = do
1544 (a_reg, a_code) <- getSomeReg a
1545 (b_reg, b_code) <- getSomeReg b
1546 res_lo <- getNewRegNat II32
1547 res_hi <- getNewRegNat II32
1549 shift_amt = case rep of
1552 _ -> panic "shift_amt"
1553 code dst = a_code `appOL` b_code `appOL`
1555 SMUL False a_reg (RIReg b_reg) res_lo,
1557 SRA res_lo (RIImm (ImmInt shift_amt)) res_lo,
1558 SUB False False res_lo (RIReg res_hi) dst
1560 return (Any II32 code)
1562 getRegister (CmmLoad mem pk) = do
1563 Amode src code <- getAmode mem
1565 code__2 dst = code `snocOL` LD (cmmTypeSize pk) src dst
1566 return (Any (cmmTypeSize pk) code__2)
1568 getRegister (CmmLit (CmmInt i _))
1571 src = ImmInt (fromInteger i)
1572 code dst = unitOL (OR False g0 (RIImm src) dst)
1574 return (Any II32 code)
1576 getRegister (CmmLit lit)
1577 = let rep = cmmLitType lit
1581 OR False dst (RIImm (LO imm)) dst]
1582 in return (Any II32 code)
1584 #endif /* sparc_TARGET_ARCH */
1586 #if powerpc_TARGET_ARCH
1587 getRegister (CmmLoad mem pk)
1590 Amode addr addr_code <- getAmode mem
1591 let code dst = ASSERT((regClass dst == RcDouble) == isFloatType pk)
1592 addr_code `snocOL` LD size dst addr
1593 return (Any size code)
1594 where size = cmmTypeSize pk
1596 -- catch simple cases of zero- or sign-extended load
1597 getRegister (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad mem _]) = do
1598 Amode addr addr_code <- getAmode mem
1599 return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr))
1601 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
1603 getRegister (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad mem _]) = do
1604 Amode addr addr_code <- getAmode mem
1605 return (Any II32 (\dst -> addr_code `snocOL` LD II16 dst addr))
1607 getRegister (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad mem _]) = do
1608 Amode addr addr_code <- getAmode mem
1609 return (Any II32 (\dst -> addr_code `snocOL` LA II16 dst addr))
1611 getRegister (CmmMachOp mop [x]) -- unary MachOps
1613 MO_Not rep -> triv_ucode_int rep NOT
1615 MO_F_Neg w -> triv_ucode_float w FNEG
1616 MO_S_Neg w -> triv_ucode_int w NEG
1618 MO_FF_Conv W64 W32 -> trivialUCode FF32 FRSP x
1619 MO_FF_Conv W32 W64 -> conversionNop FF64 x
1621 MO_FS_Conv from to -> coerceFP2Int from to x
1622 MO_SF_Conv from to -> coerceInt2FP from to x
1625 | from == to -> conversionNop (intSize to) x
1627 -- narrowing is a nop: we treat the high bits as undefined
1628 MO_SS_Conv W32 to -> conversionNop (intSize to) x
1629 MO_SS_Conv W16 W8 -> conversionNop II8 x
1630 MO_SS_Conv W8 to -> triv_ucode_int to (EXTS II8)
1631 MO_SS_Conv W16 to -> triv_ucode_int to (EXTS II16)
1634 | from == to -> conversionNop (intSize to) x
1635 -- narrowing is a nop: we treat the high bits as undefined
1636 MO_UU_Conv W32 to -> conversionNop (intSize to) x
1637 MO_UU_Conv W16 W8 -> conversionNop II8 x
1638 MO_UU_Conv W8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 W32))
1639 MO_UU_Conv W16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 W32))
1642 triv_ucode_int width instr = trivialUCode (intSize width) instr x
1643 triv_ucode_float width instr = trivialUCode (floatSize width) instr x
1645 conversionNop new_size expr
1646 = do e_code <- getRegister expr
1647 return (swizzleRegisterRep e_code new_size)
1649 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1651 MO_F_Eq w -> condFltReg EQQ x y
1652 MO_F_Ne w -> condFltReg NE x y
1653 MO_F_Gt w -> condFltReg GTT x y
1654 MO_F_Ge w -> condFltReg GE x y
1655 MO_F_Lt w -> condFltReg LTT x y
1656 MO_F_Le w -> condFltReg LE x y
1658 MO_Eq rep -> condIntReg EQQ (extendUExpr rep x) (extendUExpr rep y)
1659 MO_Ne rep -> condIntReg NE (extendUExpr rep x) (extendUExpr rep y)
1661 MO_S_Gt rep -> condIntReg GTT (extendSExpr rep x) (extendSExpr rep y)
1662 MO_S_Ge rep -> condIntReg GE (extendSExpr rep x) (extendSExpr rep y)
1663 MO_S_Lt rep -> condIntReg LTT (extendSExpr rep x) (extendSExpr rep y)
1664 MO_S_Le rep -> condIntReg LE (extendSExpr rep x) (extendSExpr rep y)
1666 MO_U_Gt rep -> condIntReg GU (extendUExpr rep x) (extendUExpr rep y)
1667 MO_U_Ge rep -> condIntReg GEU (extendUExpr rep x) (extendUExpr rep y)
1668 MO_U_Lt rep -> condIntReg LU (extendUExpr rep x) (extendUExpr rep y)
1669 MO_U_Le rep -> condIntReg LEU (extendUExpr rep x) (extendUExpr rep y)
1671 MO_F_Add w -> triv_float w FADD
1672 MO_F_Sub w -> triv_float w FSUB
1673 MO_F_Mul w -> triv_float w FMUL
1674 MO_F_Quot w -> triv_float w FDIV
1676 -- optimize addition with 32-bit immediate
1680 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate W32 True (-imm)
1681 -> trivialCode W32 True ADD x (CmmLit $ CmmInt imm immrep)
1684 (src, srcCode) <- getSomeReg x
1685 let imm = litToImm lit
1686 code dst = srcCode `appOL` toOL [
1687 ADDIS dst src (HA imm),
1688 ADD dst dst (RIImm (LO imm))
1690 return (Any II32 code)
1691 _ -> trivialCode W32 True ADD x y
1693 MO_Add rep -> trivialCode rep True ADD x y
1695 case y of -- subfi ('substract from' with immediate) doesn't exist
1696 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
1697 -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
1698 _ -> trivialCodeNoImm' (intSize rep) SUBF y x
1700 MO_Mul rep -> trivialCode rep True MULLW x y
1702 MO_S_MulMayOflo W32 -> trivialCodeNoImm' II32 MULLW_MayOflo x y
1704 MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= II32): not implemented"
1705 MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
1707 MO_S_Quot rep -> trivialCodeNoImm' (intSize rep) DIVW (extendSExpr rep x) (extendSExpr rep y)
1708 MO_U_Quot rep -> trivialCodeNoImm' (intSize rep) DIVWU (extendUExpr rep x) (extendUExpr rep y)
1710 MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1711 MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1713 MO_And rep -> trivialCode rep False AND x y
1714 MO_Or rep -> trivialCode rep False OR x y
1715 MO_Xor rep -> trivialCode rep False XOR x y
1717 MO_Shl rep -> trivialCode rep False SLW x y
1718 MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
1719 MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
1721 triv_float :: Width -> (Size -> Reg -> Reg -> Reg -> Instr) -> NatM Register
1722 triv_float width instr = trivialCodeNoImm (floatSize width) instr x y
1724 getRegister (CmmLit (CmmInt i rep))
1725 | Just imm <- makeImmediate rep True i
1727 code dst = unitOL (LI dst imm)
1729 return (Any (intSize rep) code)
1731 getRegister (CmmLit (CmmFloat f frep)) = do
1732 lbl <- getNewLabelNat
1733 dflags <- getDynFlagsNat
1734 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
1735 Amode addr addr_code <- getAmode dynRef
1736 let size = floatSize frep
1738 LDATA ReadOnlyData [CmmDataLabel lbl,
1739 CmmStaticLit (CmmFloat f frep)]
1740 `consOL` (addr_code `snocOL` LD size dst addr)
1741 return (Any size code)
1743 getRegister (CmmLit lit)
1744 = let rep = cmmLitType lit
1748 ADD dst dst (RIImm (LO imm))
1750 in return (Any (cmmTypeSize rep) code)
1752 getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
1754 -- extend?Rep: wrap integer expression of type rep
1755 -- in a conversion to II32
1756 extendSExpr W32 x = x
1757 extendSExpr rep x = CmmMachOp (MO_SS_Conv rep W32) [x]
1758 extendUExpr W32 x = x
1759 extendUExpr rep x = CmmMachOp (MO_UU_Conv rep W32) [x]
1761 #endif /* powerpc_TARGET_ARCH */
1764 -- -----------------------------------------------------------------------------
1765 -- The 'Amode' type: Memory addressing modes passed up the tree.
1767 data Amode = Amode AddrMode InstrBlock
1770 Now, given a tree (the argument to an CmmLoad) that references memory,
1771 produce a suitable addressing mode.
1773 A Rule of the Game (tm) for Amodes: use of the addr bit must
1774 immediately follow use of the code part, since the code part puts
1775 values in registers which the addr then refers to. So you can't put
1776 anything in between, lest it overwrite some of those registers. If
1777 you need to do some other computation between the code part and use of
1778 the addr bit, first store the effective address from the amode in a
1779 temporary, then do the other computation, and then use the temporary:
1783 ... other computation ...
1787 getAmode :: CmmExpr -> NatM Amode
1788 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
1790 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1792 #if alpha_TARGET_ARCH
1794 getAmode (StPrim IntSubOp [x, StInt i])
1795 = getNewRegNat PtrRep `thenNat` \ tmp ->
1796 getRegister x `thenNat` \ register ->
1798 code = registerCode register tmp
1799 reg = registerName register tmp
1800 off = ImmInt (-(fromInteger i))
1802 return (Amode (AddrRegImm reg off) code)
1804 getAmode (StPrim IntAddOp [x, StInt i])
1805 = getNewRegNat PtrRep `thenNat` \ tmp ->
1806 getRegister x `thenNat` \ register ->
1808 code = registerCode register tmp
1809 reg = registerName register tmp
1810 off = ImmInt (fromInteger i)
1812 return (Amode (AddrRegImm reg off) code)
1816 = return (Amode (AddrImm imm__2) id)
1819 imm__2 = case imm of Just x -> x
1822 = getNewRegNat PtrRep `thenNat` \ tmp ->
1823 getRegister other `thenNat` \ register ->
1825 code = registerCode register tmp
1826 reg = registerName register tmp
1828 return (Amode (AddrReg reg) code)
1830 #endif /* alpha_TARGET_ARCH */
1832 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1834 #if x86_64_TARGET_ARCH
1836 getAmode (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
1837 CmmLit displacement])
1838 = return $ Amode (ripRel (litToImm displacement)) nilOL
1842 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1844 -- This is all just ridiculous, since it carefully undoes
1845 -- what mangleIndexTree has just done.
1846 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
1848 -- ASSERT(rep == II32)???
1849 = do (x_reg, x_code) <- getSomeReg x
1850 let off = ImmInt (-(fromInteger i))
1851 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1853 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
1855 -- ASSERT(rep == II32)???
1856 = do (x_reg, x_code) <- getSomeReg x
1857 let off = ImmInt (fromInteger i)
1858 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1860 -- Turn (lit1 << n + lit2) into (lit2 + lit1 << n) so it will be
1861 -- recognised by the next rule.
1862 getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1864 = getAmode (CmmMachOp (MO_Add rep) [b,a])
1866 getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
1867 [y, CmmLit (CmmInt shift _)]])
1868 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1869 = x86_complex_amode x y shift 0
1871 getAmode (CmmMachOp (MO_Add rep)
1872 [x, CmmMachOp (MO_Add _)
1873 [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)],
1874 CmmLit (CmmInt offset _)]])
1875 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1876 && is32BitInteger offset
1877 = x86_complex_amode x y shift offset
1879 getAmode (CmmMachOp (MO_Add rep) [x,y])
1880 = x86_complex_amode x y 0 0
1882 getAmode (CmmLit lit) | is32BitLit lit
1883 = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1886 (reg,code) <- getSomeReg expr
1887 return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
1890 x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
1891 x86_complex_amode base index shift offset
1892 = do (x_reg, x_code) <- getNonClobberedReg base
1893 -- x must be in a temp, because it has to stay live over y_code
1894 -- we could compre x_reg and y_reg and do something better here...
1895 (y_reg, y_code) <- getSomeReg index
1897 code = x_code `appOL` y_code
1898 base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
1899 return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset)))
1902 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1904 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1906 #if sparc_TARGET_ARCH
1908 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)])
1911 (reg, code) <- getSomeReg x
1913 off = ImmInt (-(fromInteger i))
1914 return (Amode (AddrRegImm reg off) code)
1917 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)])
1920 (reg, code) <- getSomeReg x
1922 off = ImmInt (fromInteger i)
1923 return (Amode (AddrRegImm reg off) code)
1925 getAmode (CmmMachOp (MO_Add rep) [x, y])
1927 (regX, codeX) <- getSomeReg x
1928 (regY, codeY) <- getSomeReg y
1930 code = codeX `appOL` codeY
1931 return (Amode (AddrRegReg regX regY) code)
1933 -- XXX Is this same as "leaf" in Stix?
1934 getAmode (CmmLit lit)
1936 tmp <- getNewRegNat II32
1938 code = unitOL (SETHI (HI imm__2) tmp)
1939 return (Amode (AddrRegImm tmp (LO imm__2)) code)
1941 imm__2 = litToImm lit
1945 (reg, code) <- getSomeReg other
1948 return (Amode (AddrRegImm reg off) code)
1950 #endif /* sparc_TARGET_ARCH */
1952 #ifdef powerpc_TARGET_ARCH
1953 getAmode (CmmMachOp (MO_Sub W32) [x, CmmLit (CmmInt i _)])
1954 | Just off <- makeImmediate W32 True (-i)
1956 (reg, code) <- getSomeReg x
1957 return (Amode (AddrRegImm reg off) code)
1960 getAmode (CmmMachOp (MO_Add W32) [x, CmmLit (CmmInt i _)])
1961 | Just off <- makeImmediate W32 True i
1963 (reg, code) <- getSomeReg x
1964 return (Amode (AddrRegImm reg off) code)
1966 -- optimize addition with 32-bit immediate
1968 getAmode (CmmMachOp (MO_Add W32) [x, CmmLit lit])
1970 tmp <- getNewRegNat II32
1971 (src, srcCode) <- getSomeReg x
1972 let imm = litToImm lit
1973 code = srcCode `snocOL` ADDIS tmp src (HA imm)
1974 return (Amode (AddrRegImm tmp (LO imm)) code)
1976 getAmode (CmmLit lit)
1978 tmp <- getNewRegNat II32
1979 let imm = litToImm lit
1980 code = unitOL (LIS tmp (HA imm))
1981 return (Amode (AddrRegImm tmp (LO imm)) code)
1983 getAmode (CmmMachOp (MO_Add W32) [x, y])
1985 (regX, codeX) <- getSomeReg x
1986 (regY, codeY) <- getSomeReg y
1987 return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
1991 (reg, code) <- getSomeReg other
1994 return (Amode (AddrRegImm reg off) code)
1995 #endif /* powerpc_TARGET_ARCH */
1997 -- -----------------------------------------------------------------------------
1998 -- getOperand: sometimes any operand will do.
2000 -- getNonClobberedOperand: the value of the operand will remain valid across
2001 -- the computation of an arbitrary expression, unless the expression
2002 -- is computed directly into a register which the operand refers to
2003 -- (see trivialCode where this function is used for an example).
2005 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2007 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2008 #if x86_64_TARGET_ARCH
2009 getNonClobberedOperand (CmmLit lit)
2010 | isSuitableFloatingPointLit lit = do
2011 lbl <- getNewLabelNat
2012 let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
2014 return (OpAddr (ripRel (ImmCLbl lbl)), code)
2016 getNonClobberedOperand (CmmLit lit)
2017 | is32BitLit lit && not (isFloatType (cmmLitType lit)) =
2018 return (OpImm (litToImm lit), nilOL)
2019 getNonClobberedOperand (CmmLoad mem pk)
2020 | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
2021 Amode src mem_code <- getAmode mem
2023 if (amodeCouldBeClobbered src)
2025 tmp <- getNewRegNat wordSize
2026 return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
2027 unitOL (LEA II32 (OpAddr src) (OpReg tmp)))
2030 return (OpAddr src', save_code `appOL` mem_code)
2031 getNonClobberedOperand e = do
2032 (reg, code) <- getNonClobberedReg e
2033 return (OpReg reg, code)
2035 amodeCouldBeClobbered :: AddrMode -> Bool
2036 amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
2038 regClobbered (RealReg rr) = isFastTrue (freeReg rr)
2039 regClobbered _ = False
2041 -- getOperand: the operand is not required to remain valid across the
2042 -- computation of an arbitrary expression.
2043 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2044 #if x86_64_TARGET_ARCH
2045 getOperand (CmmLit lit)
2046 | isSuitableFloatingPointLit lit = do
2047 lbl <- getNewLabelNat
2048 let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
2050 return (OpAddr (ripRel (ImmCLbl lbl)), code)
2052 getOperand (CmmLit lit)
2053 | is32BitLit lit && not (isFloatType (cmmLitType lit)) = do
2054 return (OpImm (litToImm lit), nilOL)
2055 getOperand (CmmLoad mem pk)
2056 | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
2057 Amode src mem_code <- getAmode mem
2058 return (OpAddr src, mem_code)
2060 (reg, code) <- getSomeReg e
2061 return (OpReg reg, code)
2063 isOperand :: CmmExpr -> Bool
2064 isOperand (CmmLoad _ _) = True
2065 isOperand (CmmLit lit) = is32BitLit lit
2066 || isSuitableFloatingPointLit lit
2069 -- if we want a floating-point literal as an operand, we can
2070 -- use it directly from memory. However, if the literal is
2071 -- zero, we're better off generating it into a register using
2073 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
2074 isSuitableFloatingPointLit _ = False
2076 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
2077 getRegOrMem (CmmLoad mem pk)
2078 | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
2079 Amode src mem_code <- getAmode mem
2080 return (OpAddr src, mem_code)
2082 (reg, code) <- getNonClobberedReg e
2083 return (OpReg reg, code)
2085 #if x86_64_TARGET_ARCH
2086 is32BitLit (CmmInt i W64) = is32BitInteger i
2087 -- assume that labels are in the range 0-2^31-1: this assumes the
2088 -- small memory model (see gcc docs, -mcmodel=small).
2093 is32BitInteger :: Integer -> Bool
2094 is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000
2095 where i64 = fromIntegral i :: Int64
2096 -- a CmmInt is intended to be truncated to the appropriate
2097 -- number of bits, so here we truncate it to Int64. This is
2098 -- important because e.g. -1 as a CmmInt might be either
2099 -- -1 or 18446744073709551615.
2101 -- -----------------------------------------------------------------------------
2102 -- The 'CondCode' type: Condition codes passed up the tree.
2104 data CondCode = CondCode Bool Cond InstrBlock
2106 -- Set up a condition code for a conditional branch.
2108 getCondCode :: CmmExpr -> NatM CondCode
2110 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2112 #if alpha_TARGET_ARCH
2113 getCondCode = panic "MachCode.getCondCode: not on Alphas"
2114 #endif /* alpha_TARGET_ARCH */
2116 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2118 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
2119 -- yes, they really do seem to want exactly the same!
2121 getCondCode (CmmMachOp mop [x, y])
2124 MO_F_Eq W32 -> condFltCode EQQ x y
2125 MO_F_Ne W32 -> condFltCode NE x y
2126 MO_F_Gt W32 -> condFltCode GTT x y
2127 MO_F_Ge W32 -> condFltCode GE x y
2128 MO_F_Lt W32 -> condFltCode LTT x y
2129 MO_F_Le W32 -> condFltCode LE x y
2131 MO_F_Eq W64 -> condFltCode EQQ x y
2132 MO_F_Ne W64 -> condFltCode NE x y
2133 MO_F_Gt W64 -> condFltCode GTT x y
2134 MO_F_Ge W64 -> condFltCode GE x y
2135 MO_F_Lt W64 -> condFltCode LTT x y
2136 MO_F_Le W64 -> condFltCode LE x y
2138 MO_Eq rep -> condIntCode EQQ x y
2139 MO_Ne rep -> condIntCode NE x y
2141 MO_S_Gt rep -> condIntCode GTT x y
2142 MO_S_Ge rep -> condIntCode GE x y
2143 MO_S_Lt rep -> condIntCode LTT x y
2144 MO_S_Le rep -> condIntCode LE x y
2146 MO_U_Gt rep -> condIntCode GU x y
2147 MO_U_Ge rep -> condIntCode GEU x y
2148 MO_U_Lt rep -> condIntCode LU x y
2149 MO_U_Le rep -> condIntCode LEU x y
2151 other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y]))
2153 getCondCode other = pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
2155 #elif powerpc_TARGET_ARCH
2157 -- almost the same as everywhere else - but we need to
2158 -- extend small integers to 32 bit first
2160 getCondCode (CmmMachOp mop [x, y])
2162 MO_F_Eq W32 -> condFltCode EQQ x y
2163 MO_F_Ne W32 -> condFltCode NE x y
2164 MO_F_Gt W32 -> condFltCode GTT x y
2165 MO_F_Ge W32 -> condFltCode GE x y
2166 MO_F_Lt W32 -> condFltCode LTT x y
2167 MO_F_Le W32 -> condFltCode LE x y
2169 MO_F_Eq W64 -> condFltCode EQQ x y
2170 MO_F_Ne W64 -> condFltCode NE x y
2171 MO_F_Gt W64 -> condFltCode GTT x y
2172 MO_F_Ge W64 -> condFltCode GE x y
2173 MO_F_Lt W64 -> condFltCode LTT x y
2174 MO_F_Le W64 -> condFltCode LE x y
2176 MO_Eq rep -> condIntCode EQQ (extendUExpr rep x) (extendUExpr rep y)
2177 MO_Ne rep -> condIntCode NE (extendUExpr rep x) (extendUExpr rep y)
2179 MO_S_Gt rep -> condIntCode GTT (extendSExpr rep x) (extendSExpr rep y)
2180 MO_S_Ge rep -> condIntCode GE (extendSExpr rep x) (extendSExpr rep y)
2181 MO_S_Lt rep -> condIntCode LTT (extendSExpr rep x) (extendSExpr rep y)
2182 MO_S_Le rep -> condIntCode LE (extendSExpr rep x) (extendSExpr rep y)
2184 MO_U_Gt rep -> condIntCode GU (extendUExpr rep x) (extendUExpr rep y)
2185 MO_U_Ge rep -> condIntCode GEU (extendUExpr rep x) (extendUExpr rep y)
2186 MO_U_Lt rep -> condIntCode LU (extendUExpr rep x) (extendUExpr rep y)
2187 MO_U_Le rep -> condIntCode LEU (extendUExpr rep x) (extendUExpr rep y)
2189 other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
2191 getCondCode other = panic "getCondCode(2)(powerpc)"
2197 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
2198 -- passed back up the tree.
2200 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
2202 #if alpha_TARGET_ARCH
2203 condIntCode = panic "MachCode.condIntCode: not on Alphas"
2204 condFltCode = panic "MachCode.condFltCode: not on Alphas"
2205 #endif /* alpha_TARGET_ARCH */
2207 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2208 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2210 -- memory vs immediate
2211 condIntCode cond (CmmLoad x pk) (CmmLit lit) | is32BitLit lit = do
2212 Amode x_addr x_code <- getAmode x
2215 code = x_code `snocOL`
2216 CMP (cmmTypeSize pk) (OpImm imm) (OpAddr x_addr)
2218 return (CondCode False cond code)
2220 -- anything vs zero, using a mask
2221 -- TODO: Add some sanity checking!!!!
2222 condIntCode cond (CmmMachOp (MO_And rep) [x,o2]) (CmmLit (CmmInt 0 pk))
2223 | (CmmLit lit@(CmmInt mask pk2)) <- o2, is32BitLit lit
2225 (x_reg, x_code) <- getSomeReg x
2227 code = x_code `snocOL`
2228 TEST (intSize pk) (OpImm (ImmInteger mask)) (OpReg x_reg)
2230 return (CondCode False cond code)
2233 condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
2234 (x_reg, x_code) <- getSomeReg x
2236 code = x_code `snocOL`
2237 TEST (intSize pk) (OpReg x_reg) (OpReg x_reg)
2239 return (CondCode False cond code)
2241 -- anything vs operand
2242 condIntCode cond x y | isOperand y = do
2243 (x_reg, x_code) <- getNonClobberedReg x
2244 (y_op, y_code) <- getOperand y
2246 code = x_code `appOL` y_code `snocOL`
2247 CMP (cmmTypeSize (cmmExprType x)) y_op (OpReg x_reg)
2249 return (CondCode False cond code)
2251 -- anything vs anything
2252 condIntCode cond x y = do
2253 (y_reg, y_code) <- getNonClobberedReg y
2254 (x_op, x_code) <- getRegOrMem x
2256 code = y_code `appOL`
2258 CMP (cmmTypeSize (cmmExprType x)) (OpReg y_reg) x_op
2260 return (CondCode False cond code)
2263 #if i386_TARGET_ARCH
2264 condFltCode cond x y
2265 = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
2266 (x_reg, x_code) <- getNonClobberedReg x
2267 (y_reg, y_code) <- getSomeReg y
2269 code = x_code `appOL` y_code `snocOL`
2270 GCMP cond x_reg y_reg
2271 -- The GCMP insn does the test and sets the zero flag if comparable
2272 -- and true. Hence we always supply EQQ as the condition to test.
2273 return (CondCode True EQQ code)
2274 #endif /* i386_TARGET_ARCH */
2276 #if x86_64_TARGET_ARCH
2277 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
2278 -- an operand, but the right must be a reg. We can probably do better
2279 -- than this general case...
2280 condFltCode cond x y = do
2281 (x_reg, x_code) <- getNonClobberedReg x
2282 (y_op, y_code) <- getOperand y
2284 code = x_code `appOL`
2286 CMP (floatSize $ cmmExprWidth x) y_op (OpReg x_reg)
2287 -- NB(1): we need to use the unsigned comparison operators on the
2288 -- result of this comparison.
2290 return (CondCode True (condToUnsigned cond) code)
2293 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2295 #if sparc_TARGET_ARCH
2297 condIntCode cond x (CmmLit (CmmInt y rep))
2300 (src1, code) <- getSomeReg x
2302 src2 = ImmInt (fromInteger y)
2303 code' = code `snocOL` SUB False True src1 (RIImm src2) g0
2304 return (CondCode False cond code')
2306 condIntCode cond x y = do
2307 (src1, code1) <- getSomeReg x
2308 (src2, code2) <- getSomeReg y
2310 code__2 = code1 `appOL` code2 `snocOL`
2311 SUB False True src1 (RIReg src2) g0
2312 return (CondCode False cond code__2)
2315 condFltCode cond x y = do
2316 (src1, code1) <- getSomeReg x
2317 (src2, code2) <- getSomeReg y
2318 tmp <- getNewRegNat FF64
2320 promote x = FxTOy FF32 FF64 x tmp
2326 if pk1 `cmmEqType` pk2 then
2327 code1 `appOL` code2 `snocOL`
2328 FCMP True (cmmTypeSize pk1) src1 src2
2329 else if typeWidth pk1 == W32 then
2330 code1 `snocOL` promote src1 `appOL` code2 `snocOL`
2331 FCMP True FF64 tmp src2
2333 code1 `appOL` code2 `snocOL` promote src2 `snocOL`
2334 FCMP True FF64 src1 tmp
2335 return (CondCode True cond code__2)
2337 #endif /* sparc_TARGET_ARCH */
2339 #if powerpc_TARGET_ARCH
2340 -- ###FIXME: I16 and I8!
2341 condIntCode cond x (CmmLit (CmmInt y rep))
2342 | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
2344 (src1, code) <- getSomeReg x
2346 code' = code `snocOL`
2347 (if condUnsigned cond then CMPL else CMP) II32 src1 (RIImm src2)
2348 return (CondCode False cond code')
2350 condIntCode cond x y = do
2351 (src1, code1) <- getSomeReg x
2352 (src2, code2) <- getSomeReg y
2354 code' = code1 `appOL` code2 `snocOL`
2355 (if condUnsigned cond then CMPL else CMP) II32 src1 (RIReg src2)
2356 return (CondCode False cond code')
2358 condFltCode cond x y = do
2359 (src1, code1) <- getSomeReg x
2360 (src2, code2) <- getSomeReg y
2362 code' = code1 `appOL` code2 `snocOL` FCMP src1 src2
2363 code'' = case cond of -- twiddle CR to handle unordered case
2364 GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
2365 LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
2368 ltbit = 0 ; eqbit = 2 ; gtbit = 1
2369 return (CondCode True cond code'')
2371 #endif /* powerpc_TARGET_ARCH */
2373 -- -----------------------------------------------------------------------------
2374 -- Generating assignments
2376 -- Assignments are really at the heart of the whole code generation
2377 -- business. Almost all top-level nodes of any real importance are
2378 -- assignments, which correspond to loads, stores, or register
2379 -- transfers. If we're really lucky, some of the register transfers
2380 -- will go away, because we can use the destination register to
2381 -- complete the code generation for the right hand side. This only
2382 -- fails when the right hand side is forced into a fixed register
2383 -- (e.g. the result of a call).
2385 assignMem_IntCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
2386 assignReg_IntCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock
2388 assignMem_FltCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
2389 assignReg_FltCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock
2391 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2393 #if alpha_TARGET_ARCH
2395 assignIntCode pk (CmmLoad dst _) src
2396 = getNewRegNat IntRep `thenNat` \ tmp ->
2397 getAmode dst `thenNat` \ amode ->
2398 getRegister src `thenNat` \ register ->
2400 code1 = amodeCode amode []
2401 dst__2 = amodeAddr amode
2402 code2 = registerCode register tmp []
2403 src__2 = registerName register tmp
2404 sz = primRepToSize pk
2405 code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2409 assignIntCode pk dst src
2410 = getRegister dst `thenNat` \ register1 ->
2411 getRegister src `thenNat` \ register2 ->
2413 dst__2 = registerName register1 zeroh
2414 code = registerCode register2 dst__2
2415 src__2 = registerName register2 dst__2
2416 code__2 = if isFixed register2
2417 then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2)
2422 #endif /* alpha_TARGET_ARCH */
2424 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2426 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2428 -- integer assignment to memory
2430 -- specific case of adding/subtracting an integer to a particular address.
2431 -- ToDo: catch other cases where we can use an operation directly on a memory
2433 assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _,
2434 CmmLit (CmmInt i _)])
2435 | addr == addr2, pk /= II64 || is32BitInteger i,
2436 Just instr <- check op
2437 = do Amode amode code_addr <- getAmode addr
2438 let code = code_addr `snocOL`
2439 instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode)
2442 check (MO_Add _) = Just ADD
2443 check (MO_Sub _) = Just SUB
2448 assignMem_IntCode pk addr src = do
2449 Amode addr code_addr <- getAmode addr
2450 (code_src, op_src) <- get_op_RI src
2452 code = code_src `appOL`
2454 MOV pk op_src (OpAddr addr)
2455 -- NOTE: op_src is stable, so it will still be valid
2456 -- after code_addr. This may involve the introduction
2457 -- of an extra MOV to a temporary register, but we hope
2458 -- the register allocator will get rid of it.
2462 get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand) -- code, operator
2463 get_op_RI (CmmLit lit) | is32BitLit lit
2464 = return (nilOL, OpImm (litToImm lit))
2466 = do (reg,code) <- getNonClobberedReg op
2467 return (code, OpReg reg)
2470 -- Assign; dst is a reg, rhs is mem
2471 assignReg_IntCode pk reg (CmmLoad src _) = do
2472 load_code <- intLoadCode (MOV pk) src
2473 return (load_code (getRegisterReg reg))
2475 -- dst is a reg, but src could be anything
2476 assignReg_IntCode pk reg src = do
2477 code <- getAnyReg src
2478 return (code (getRegisterReg reg))
2480 #endif /* i386_TARGET_ARCH */
2482 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2484 #if sparc_TARGET_ARCH
2486 assignMem_IntCode pk addr src = do
2487 (srcReg, code) <- getSomeReg src
2488 Amode dstAddr addr_code <- getAmode addr
2489 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2491 assignReg_IntCode pk reg src = do
2492 r <- getRegister src
2494 Any _ code -> code dst
2495 Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg freg) dst
2497 dst = getRegisterReg reg
2500 #endif /* sparc_TARGET_ARCH */
2502 #if powerpc_TARGET_ARCH
2504 assignMem_IntCode pk addr src = do
2505 (srcReg, code) <- getSomeReg src
2506 Amode dstAddr addr_code <- getAmode addr
2507 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2509 -- dst is a reg, but src could be anything
2510 assignReg_IntCode pk reg src
2512 r <- getRegister src
2514 Any _ code -> code dst
2515 Fixed _ freg fcode -> fcode `snocOL` MR dst freg
2517 dst = getRegisterReg reg
2519 #endif /* powerpc_TARGET_ARCH */
2522 -- -----------------------------------------------------------------------------
2523 -- Floating-point assignments
2525 #if alpha_TARGET_ARCH
2527 assignFltCode pk (CmmLoad dst _) src
2528 = getNewRegNat pk `thenNat` \ tmp ->
2529 getAmode dst `thenNat` \ amode ->
2530 getRegister src `thenNat` \ register ->
2532 code1 = amodeCode amode []
2533 dst__2 = amodeAddr amode
2534 code2 = registerCode register tmp []
2535 src__2 = registerName register tmp
2536 sz = primRepToSize pk
2537 code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2541 assignFltCode pk dst src
2542 = getRegister dst `thenNat` \ register1 ->
2543 getRegister src `thenNat` \ register2 ->
2545 dst__2 = registerName register1 zeroh
2546 code = registerCode register2 dst__2
2547 src__2 = registerName register2 dst__2
2548 code__2 = if isFixed register2
2549 then code . mkSeqInstr (FMOV src__2 dst__2)
2554 #endif /* alpha_TARGET_ARCH */
2556 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2558 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2560 -- Floating point assignment to memory
2561 assignMem_FltCode pk addr src = do
2562 (src_reg, src_code) <- getNonClobberedReg src
2563 Amode addr addr_code <- getAmode addr
2565 code = src_code `appOL`
2567 IF_ARCH_i386(GST pk src_reg addr,
2568 MOV pk (OpReg src_reg) (OpAddr addr))
2571 -- Floating point assignment to a register/temporary
2572 assignReg_FltCode pk reg src = do
2573 src_code <- getAnyReg src
2574 return (src_code (getRegisterReg reg))
2576 #endif /* i386_TARGET_ARCH */
2578 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2580 #if sparc_TARGET_ARCH
2582 -- Floating point assignment to memory
2583 assignMem_FltCode pk addr src = do
2584 Amode dst__2 code1 <- getAmode addr
2585 (src__2, code2) <- getSomeReg src
2586 tmp1 <- getNewRegNat pk
2588 pk__2 = cmmExprType src
2589 code__2 = code1 `appOL` code2 `appOL`
2590 if sizeToWidth pk == typeWidth pk__2
2591 then unitOL (ST pk src__2 dst__2)
2592 else toOL [ FxTOy (cmmTypeSize pk__2) pk src__2 tmp1
2593 , ST pk tmp1 dst__2]
2596 -- Floating point assignment to a register/temporary
2597 assignReg_FltCode pk dstCmmReg srcCmmExpr = do
2598 srcRegister <- getRegister srcCmmExpr
2599 let dstReg = getRegisterReg dstCmmReg
2601 return $ case srcRegister of
2602 Any _ code -> code dstReg
2603 Fixed _ srcFixedReg srcCode -> srcCode `snocOL` FMOV pk srcFixedReg dstReg
2605 #endif /* sparc_TARGET_ARCH */
2607 #if powerpc_TARGET_ARCH
2610 assignMem_FltCode = assignMem_IntCode
2611 assignReg_FltCode = assignReg_IntCode
2613 #endif /* powerpc_TARGET_ARCH */
2616 -- -----------------------------------------------------------------------------
2617 -- Generating an non-local jump
2619 -- (If applicable) Do not fill the delay slots here; you will confuse the
2620 -- register allocator.
2622 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
2624 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2626 #if alpha_TARGET_ARCH
2628 genJump (CmmLabel lbl)
2629 | isAsmTemp lbl = returnInstr (BR target)
2630 | otherwise = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0]
2632 target = ImmCLbl lbl
2635 = getRegister tree `thenNat` \ register ->
2636 getNewRegNat PtrRep `thenNat` \ tmp ->
2638 dst = registerName register pv
2639 code = registerCode register pv
2640 target = registerName register pv
2642 if isFixed register then
2643 returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0]
2645 return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0))
2647 #endif /* alpha_TARGET_ARCH */
2649 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2651 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2653 genJump (CmmLoad mem pk) = do
2654 Amode target code <- getAmode mem
2655 return (code `snocOL` JMP (OpAddr target))
2657 genJump (CmmLit lit) = do
2658 return (unitOL (JMP (OpImm (litToImm lit))))
2661 (reg,code) <- getSomeReg expr
2662 return (code `snocOL` JMP (OpReg reg))
2664 #endif /* i386_TARGET_ARCH */
2666 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2668 #if sparc_TARGET_ARCH
2670 genJump (CmmLit (CmmLabel lbl))
2671 = return (toOL [CALL (Left target) 0 True, NOP])
2673 target = ImmCLbl lbl
2677 (target, code) <- getSomeReg tree
2678 return (code `snocOL` JMP (AddrRegReg target g0) `snocOL` NOP)
2680 #endif /* sparc_TARGET_ARCH */
2682 #if powerpc_TARGET_ARCH
2683 genJump (CmmLit (CmmLabel lbl))
2684 = return (unitOL $ JMP lbl)
2688 (target,code) <- getSomeReg tree
2689 return (code `snocOL` MTCTR target `snocOL` BCTR [])
2690 #endif /* powerpc_TARGET_ARCH */
2693 -- -----------------------------------------------------------------------------
2694 -- Unconditional branches
2696 genBranch :: BlockId -> NatM InstrBlock
2698 genBranch = return . toOL . mkBranchInstr
2700 -- -----------------------------------------------------------------------------
2701 -- Conditional jumps
2704 Conditional jumps are always to local labels, so we can use branch
2705 instructions. We peek at the arguments to decide what kind of
2708 ALPHA: For comparisons with 0, we're laughing, because we can just do
2709 the desired conditional branch.
2711 I386: First, we have to ensure that the condition
2712 codes are set according to the supplied comparison operation.
2714 SPARC: First, we have to ensure that the condition codes are set
2715 according to the supplied comparison operation. We generate slightly
2716 different code for floating point comparisons, because a floating
2717 point operation cannot directly precede a @BF@. We assume the worst
2718 and fill that slot with a @NOP@.
2720 SPARC: Do not fill the delay slots here; you will confuse the register
2726 :: BlockId -- the branch target
2727 -> CmmExpr -- the condition on which to branch
2730 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2732 #if alpha_TARGET_ARCH
2734 genCondJump id (StPrim op [x, StInt 0])
2735 = getRegister x `thenNat` \ register ->
2736 getNewRegNat (registerRep register)
2739 code = registerCode register tmp
2740 value = registerName register tmp
2741 pk = registerRep register
2742 target = ImmCLbl lbl
2744 returnSeq code [BI (cmpOp op) value target]
2746 cmpOp CharGtOp = GTT
2748 cmpOp CharEqOp = EQQ
2750 cmpOp CharLtOp = LTT
2759 cmpOp WordGeOp = ALWAYS
2760 cmpOp WordEqOp = EQQ
2762 cmpOp WordLtOp = NEVER
2763 cmpOp WordLeOp = EQQ
2765 cmpOp AddrGeOp = ALWAYS
2766 cmpOp AddrEqOp = EQQ
2768 cmpOp AddrLtOp = NEVER
2769 cmpOp AddrLeOp = EQQ
2771 genCondJump lbl (StPrim op [x, StDouble 0.0])
2772 = getRegister x `thenNat` \ register ->
2773 getNewRegNat (registerRep register)
2776 code = registerCode register tmp
2777 value = registerName register tmp
2778 pk = registerRep register
2779 target = ImmCLbl lbl
2781 return (code . mkSeqInstr (BF (cmpOp op) value target))
2783 cmpOp FloatGtOp = GTT
2784 cmpOp FloatGeOp = GE
2785 cmpOp FloatEqOp = EQQ
2786 cmpOp FloatNeOp = NE
2787 cmpOp FloatLtOp = LTT
2788 cmpOp FloatLeOp = LE
2789 cmpOp DoubleGtOp = GTT
2790 cmpOp DoubleGeOp = GE
2791 cmpOp DoubleEqOp = EQQ
2792 cmpOp DoubleNeOp = NE
2793 cmpOp DoubleLtOp = LTT
2794 cmpOp DoubleLeOp = LE
2796 genCondJump lbl (StPrim op [x, y])
2798 = trivialFCode pr instr x y `thenNat` \ register ->
2799 getNewRegNat FF64 `thenNat` \ tmp ->
2801 code = registerCode register tmp
2802 result = registerName register tmp
2803 target = ImmCLbl lbl
2805 return (code . mkSeqInstr (BF cond result target))
2807 pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
2809 fltCmpOp op = case op of
2823 (instr, cond) = case op of
2824 FloatGtOp -> (FCMP TF LE, EQQ)
2825 FloatGeOp -> (FCMP TF LTT, EQQ)
2826 FloatEqOp -> (FCMP TF EQQ, NE)
2827 FloatNeOp -> (FCMP TF EQQ, EQQ)
2828 FloatLtOp -> (FCMP TF LTT, NE)
2829 FloatLeOp -> (FCMP TF LE, NE)
2830 DoubleGtOp -> (FCMP TF LE, EQQ)
2831 DoubleGeOp -> (FCMP TF LTT, EQQ)
2832 DoubleEqOp -> (FCMP TF EQQ, NE)
2833 DoubleNeOp -> (FCMP TF EQQ, EQQ)
2834 DoubleLtOp -> (FCMP TF LTT, NE)
2835 DoubleLeOp -> (FCMP TF LE, NE)
2837 genCondJump lbl (StPrim op [x, y])
2838 = trivialCode instr x y `thenNat` \ register ->
2839 getNewRegNat IntRep `thenNat` \ tmp ->
2841 code = registerCode register tmp
2842 result = registerName register tmp
2843 target = ImmCLbl lbl
2845 return (code . mkSeqInstr (BI cond result target))
2847 (instr, cond) = case op of
2848 CharGtOp -> (CMP LE, EQQ)
2849 CharGeOp -> (CMP LTT, EQQ)
2850 CharEqOp -> (CMP EQQ, NE)
2851 CharNeOp -> (CMP EQQ, EQQ)
2852 CharLtOp -> (CMP LTT, NE)
2853 CharLeOp -> (CMP LE, NE)
2854 IntGtOp -> (CMP LE, EQQ)
2855 IntGeOp -> (CMP LTT, EQQ)
2856 IntEqOp -> (CMP EQQ, NE)
2857 IntNeOp -> (CMP EQQ, EQQ)
2858 IntLtOp -> (CMP LTT, NE)
2859 IntLeOp -> (CMP LE, NE)
2860 WordGtOp -> (CMP ULE, EQQ)
2861 WordGeOp -> (CMP ULT, EQQ)
2862 WordEqOp -> (CMP EQQ, NE)
2863 WordNeOp -> (CMP EQQ, EQQ)
2864 WordLtOp -> (CMP ULT, NE)
2865 WordLeOp -> (CMP ULE, NE)
2866 AddrGtOp -> (CMP ULE, EQQ)
2867 AddrGeOp -> (CMP ULT, EQQ)
2868 AddrEqOp -> (CMP EQQ, NE)
2869 AddrNeOp -> (CMP EQQ, EQQ)
2870 AddrLtOp -> (CMP ULT, NE)
2871 AddrLeOp -> (CMP ULE, NE)
2873 #endif /* alpha_TARGET_ARCH */
2875 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2877 #if i386_TARGET_ARCH
2879 genCondJump id bool = do
2880 CondCode _ cond code <- getCondCode bool
2881 return (code `snocOL` JXX cond id)
2885 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2887 #if x86_64_TARGET_ARCH
2889 genCondJump id bool = do
2890 CondCode is_float cond cond_code <- getCondCode bool
2893 return (cond_code `snocOL` JXX cond id)
2895 lbl <- getBlockIdNat
2897 -- see comment with condFltReg
2898 let code = case cond of
2904 plain_test = unitOL (
2907 or_unordered = toOL [
2911 and_ordered = toOL [
2917 return (cond_code `appOL` code)
2921 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2923 #if sparc_TARGET_ARCH
2925 genCondJump bid bool = do
2926 CondCode is_float cond code <- getCondCode bool
2931 then [NOP, BF cond False bid, NOP]
2932 else [BI cond False bid, NOP]
2936 #endif /* sparc_TARGET_ARCH */
2939 #if powerpc_TARGET_ARCH
2941 genCondJump id bool = do
2942 CondCode is_float cond code <- getCondCode bool
2943 return (code `snocOL` BCC cond id)
2945 #endif /* powerpc_TARGET_ARCH */
2948 -- -----------------------------------------------------------------------------
2949 -- Generating C calls
2951 -- Now the biggest nightmare---calls. Most of the nastiness is buried in
2952 -- @get_arg@, which moves the arguments to the correct registers/stack
2953 -- locations. Apart from that, the code is easy.
2955 -- (If applicable) Do not fill the delay slots here; you will confuse the
2956 -- register allocator.
2959 :: CmmCallTarget -- function to call
2960 -> HintedCmmFormals -- where to put the result
2961 -> HintedCmmActuals -- arguments (of mixed type)
2964 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2966 #if alpha_TARGET_ARCH
2970 genCCall fn cconv result_regs args
2971 = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args
2972 `thenNat` \ ((unused,_), argCode) ->
2974 nRegs = length allArgRegs - length unused
2975 code = asmSeqThen (map ($ []) argCode)
2978 LDA pv (AddrImm (ImmLab (ptext fn))),
2979 JSR ra (AddrReg pv) nRegs,
2980 LDGP gp (AddrReg ra)]
2982 ------------------------
2983 {- Try to get a value into a specific register (or registers) for
2984 a call. The first 6 arguments go into the appropriate
2985 argument register (separate registers for integer and floating
2986 point arguments, but used in lock-step), and the remaining
2987 arguments are dumped to the stack, beginning at 0(sp). Our
2988 first argument is a pair of the list of remaining argument
2989 registers to be assigned for this call and the next stack
2990 offset to use for overflowing arguments. This way,
2991 @get_Arg@ can be applied to all of a call's arguments using
2995 :: ([(Reg,Reg)], Int) -- Argument registers and stack offset (accumulator)
2996 -> StixTree -- Current argument
2997 -> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code
2999 -- We have to use up all of our argument registers first...
3001 get_arg ((iDst,fDst):dsts, offset) arg
3002 = getRegister arg `thenNat` \ register ->
3004 reg = if isFloatType pk then fDst else iDst
3005 code = registerCode register reg
3006 src = registerName register reg
3007 pk = registerRep register
3010 if isFloatType pk then
3011 ((dsts, offset), if isFixed register then
3012 code . mkSeqInstr (FMOV src fDst)
3015 ((dsts, offset), if isFixed register then
3016 code . mkSeqInstr (OR src (RIReg src) iDst)
3019 -- Once we have run out of argument registers, we move to the
3022 get_arg ([], offset) arg
3023 = getRegister arg `thenNat` \ register ->
3024 getNewRegNat (registerRep register)
3027 code = registerCode register tmp
3028 src = registerName register tmp
3029 pk = registerRep register
3030 sz = primRepToSize pk
3032 return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset)))
3034 #endif /* alpha_TARGET_ARCH */
3036 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3038 #if i386_TARGET_ARCH
3040 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
3041 -- write barrier compiles to no code on x86/x86-64;
3042 -- we keep it this long in order to prevent earlier optimisations.
3044 -- we only cope with a single result for foreign calls
3045 genCCall (CmmPrim op) [CmmHinted r _] args = do
3046 l1 <- getNewLabelNat
3047 l2 <- getNewLabelNat
3049 MO_F32_Sqrt -> actuallyInlineFloatOp GSQRT FF32 args
3050 MO_F64_Sqrt -> actuallyInlineFloatOp GSQRT FF64 args
3052 MO_F32_Sin -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF32 args
3053 MO_F64_Sin -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF64 args
3055 MO_F32_Cos -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF32 args
3056 MO_F64_Cos -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF64 args
3058 MO_F32_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF32 args
3059 MO_F64_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF64 args
3061 other_op -> outOfLineFloatOp op r args
3063 actuallyInlineFloatOp instr size [CmmHinted x _]
3064 = do res <- trivialUFCode size (instr size) x
3066 return (any (getRegisterReg (CmmLocal r)))
3068 genCCall target dest_regs args = do
3070 sizes = map (arg_size . cmmExprType . hintlessCmm) (reverse args)
3071 #if !darwin_TARGET_OS
3072 tot_arg_size = sum sizes
3074 raw_arg_size = sum sizes
3075 tot_arg_size = roundTo 16 raw_arg_size
3076 arg_pad_size = tot_arg_size - raw_arg_size
3077 delta0 <- getDeltaNat
3078 setDeltaNat (delta0 - arg_pad_size)
3081 push_codes <- mapM push_arg (reverse args)
3082 delta <- getDeltaNat
3085 -- deal with static vs dynamic call targets
3086 (callinsns,cconv) <-
3089 CmmCallee (CmmLit (CmmLabel lbl)) conv
3090 -> -- ToDo: stdcall arg sizes
3091 return (unitOL (CALL (Left fn_imm) []), conv)
3092 where fn_imm = ImmCLbl lbl
3094 -> do { (dyn_c, dyn_r) <- get_op expr
3095 ; ASSERT( isWord32 (cmmExprType expr) )
3096 return (dyn_c `snocOL` CALL (Right dyn_r) [], conv) }
3099 #if darwin_TARGET_OS
3101 = toOL [SUB II32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
3102 DELTA (delta0 - arg_pad_size)]
3103 `appOL` concatOL push_codes
3106 = concatOL push_codes
3107 call = callinsns `appOL`
3109 -- Deallocate parameters after call for ccall;
3110 -- but not for stdcall (callee does it)
3111 (if cconv == StdCallConv || tot_arg_size==0 then [] else
3112 [ADD II32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
3114 [DELTA (delta + tot_arg_size)]
3117 setDeltaNat (delta + tot_arg_size)
3120 -- assign the results, if necessary
3121 assign_code [] = nilOL
3122 assign_code [CmmHinted dest _hint]
3123 | isFloatType ty = unitOL (GMOV fake0 r_dest)
3124 | isWord64 ty = toOL [MOV II32 (OpReg eax) (OpReg r_dest),
3125 MOV II32 (OpReg edx) (OpReg r_dest_hi)]
3126 | otherwise = unitOL (MOV (intSize w) (OpReg eax) (OpReg r_dest))
3128 ty = localRegType dest
3130 r_dest_hi = getHiVRegFromLo r_dest
3131 r_dest = getRegisterReg (CmmLocal dest)
3132 assign_code many = panic "genCCall.assign_code many"
3134 return (push_code `appOL`
3136 assign_code dest_regs)
3139 arg_size :: CmmType -> Int -- Width in bytes
3140 arg_size ty = widthInBytes (typeWidth ty)
3142 roundTo a x | x `mod` a == 0 = x
3143 | otherwise = x + a - (x `mod` a)
3146 push_arg :: HintedCmmActual {-current argument-}
3147 -> NatM InstrBlock -- code
3149 push_arg (CmmHinted arg _hint) -- we don't need the hints on x86
3150 | isWord64 arg_ty = do
3151 ChildCode64 code r_lo <- iselExpr64 arg
3152 delta <- getDeltaNat
3153 setDeltaNat (delta - 8)
3155 r_hi = getHiVRegFromLo r_lo
3157 return ( code `appOL`
3158 toOL [PUSH II32 (OpReg r_hi), DELTA (delta - 4),
3159 PUSH II32 (OpReg r_lo), DELTA (delta - 8),
3164 (code, reg) <- get_op arg
3165 delta <- getDeltaNat
3166 let size = arg_size arg_ty -- Byte size
3167 setDeltaNat (delta-size)
3168 if (isFloatType arg_ty)
3169 then return (code `appOL`
3170 toOL [SUB II32 (OpImm (ImmInt size)) (OpReg esp),
3172 GST (floatSize (typeWidth arg_ty))
3173 reg (AddrBaseIndex (EABaseReg esp)
3177 else return (code `snocOL`
3178 PUSH II32 (OpReg reg) `snocOL`
3182 arg_ty = cmmExprType arg
3185 get_op :: CmmExpr -> NatM (InstrBlock, Reg) -- code, reg
3187 (reg,code) <- getSomeReg op
3190 #endif /* i386_TARGET_ARCH */
3192 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3194 outOfLineFloatOp :: CallishMachOp -> CmmFormal -> HintedCmmActuals
3196 outOfLineFloatOp mop res args
3198 dflags <- getDynFlagsNat
3199 targetExpr <- cmmMakeDynamicReference dflags addImportNat CallReference lbl
3200 let target = CmmCallee targetExpr CCallConv
3202 if isFloat64 (localRegType res)
3204 stmtToInstrs (CmmCall target [CmmHinted res NoHint] args CmmUnsafe CmmMayReturn)
3208 tmp = LocalReg uq f64
3210 code1 <- stmtToInstrs (CmmCall target [CmmHinted tmp NoHint] args CmmUnsafe CmmMayReturn)
3211 code2 <- stmtToInstrs (CmmAssign (CmmLocal res) (CmmReg (CmmLocal tmp)))
3212 return (code1 `appOL` code2)
3214 lbl = mkForeignLabel fn Nothing False
3217 MO_F32_Sqrt -> fsLit "sqrtf"
3218 MO_F32_Sin -> fsLit "sinf"
3219 MO_F32_Cos -> fsLit "cosf"
3220 MO_F32_Tan -> fsLit "tanf"
3221 MO_F32_Exp -> fsLit "expf"
3222 MO_F32_Log -> fsLit "logf"
3224 MO_F32_Asin -> fsLit "asinf"
3225 MO_F32_Acos -> fsLit "acosf"
3226 MO_F32_Atan -> fsLit "atanf"
3228 MO_F32_Sinh -> fsLit "sinhf"
3229 MO_F32_Cosh -> fsLit "coshf"
3230 MO_F32_Tanh -> fsLit "tanhf"
3231 MO_F32_Pwr -> fsLit "powf"
3233 MO_F64_Sqrt -> fsLit "sqrt"
3234 MO_F64_Sin -> fsLit "sin"
3235 MO_F64_Cos -> fsLit "cos"
3236 MO_F64_Tan -> fsLit "tan"
3237 MO_F64_Exp -> fsLit "exp"
3238 MO_F64_Log -> fsLit "log"
3240 MO_F64_Asin -> fsLit "asin"
3241 MO_F64_Acos -> fsLit "acos"
3242 MO_F64_Atan -> fsLit "atan"
3244 MO_F64_Sinh -> fsLit "sinh"
3245 MO_F64_Cosh -> fsLit "cosh"
3246 MO_F64_Tanh -> fsLit "tanh"
3247 MO_F64_Pwr -> fsLit "pow"
3249 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
3251 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3253 #if x86_64_TARGET_ARCH
3255 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
3256 -- write barrier compiles to no code on x86/x86-64;
3257 -- we keep it this long in order to prevent earlier optimisations.
3260 genCCall (CmmPrim op) [CmmHinted r _] args =
3261 outOfLineFloatOp op r args
3263 genCCall target dest_regs args = do
3265 -- load up the register arguments
3266 (stack_args, aregs, fregs, load_args_code)
3267 <- load_args args allArgRegs allFPArgRegs nilOL
3270 fp_regs_used = reverse (drop (length fregs) (reverse allFPArgRegs))
3271 int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
3272 arg_regs = [eax] ++ int_regs_used ++ fp_regs_used
3273 -- for annotating the call instruction with
3275 sse_regs = length fp_regs_used
3277 tot_arg_size = arg_size * length stack_args
3279 -- On entry to the called function, %rsp should be aligned
3280 -- on a 16-byte boundary +8 (i.e. the first stack arg after
3281 -- the return address is 16-byte aligned). In STG land
3282 -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
3283 -- need to make sure we push a multiple of 16-bytes of args,
3284 -- plus the return address, to get the correct alignment.
3285 -- Urg, this is hard. We need to feed the delta back into
3286 -- the arg pushing code.
3287 (real_size, adjust_rsp) <-
3288 if tot_arg_size `rem` 16 == 0
3289 then return (tot_arg_size, nilOL)
3290 else do -- we need to adjust...
3291 delta <- getDeltaNat
3292 setDeltaNat (delta-8)
3293 return (tot_arg_size+8, toOL [
3294 SUB II64 (OpImm (ImmInt 8)) (OpReg rsp),
3298 -- push the stack args, right to left
3299 push_code <- push_args (reverse stack_args) nilOL
3300 delta <- getDeltaNat
3302 -- deal with static vs dynamic call targets
3303 (callinsns,cconv) <-
3306 CmmCallee (CmmLit (CmmLabel lbl)) conv
3307 -> -- ToDo: stdcall arg sizes
3308 return (unitOL (CALL (Left fn_imm) arg_regs), conv)
3309 where fn_imm = ImmCLbl lbl
3311 -> do (dyn_r, dyn_c) <- getSomeReg expr
3312 return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
3315 -- The x86_64 ABI requires us to set %al to the number of SSE
3316 -- registers that contain arguments, if the called routine
3317 -- is a varargs function. We don't know whether it's a
3318 -- varargs function or not, so we have to assume it is.
3320 -- It's not safe to omit this assignment, even if the number
3321 -- of SSE regs in use is zero. If %al is larger than 8
3322 -- on entry to a varargs function, seg faults ensue.
3323 assign_eax n = unitOL (MOV II32 (OpImm (ImmInt n)) (OpReg eax))
3325 let call = callinsns `appOL`
3327 -- Deallocate parameters after call for ccall;
3328 -- but not for stdcall (callee does it)
3329 (if cconv == StdCallConv || real_size==0 then [] else
3330 [ADD (intSize wordWidth) (OpImm (ImmInt real_size)) (OpReg esp)])
3332 [DELTA (delta + real_size)]
3335 setDeltaNat (delta + real_size)
3338 -- assign the results, if necessary
3339 assign_code [] = nilOL
3340 assign_code [CmmHinted dest _hint] =
3341 case typeWidth rep of
3342 W32 | isFloatType rep -> unitOL (MOV (floatSize W32) (OpReg xmm0) (OpReg r_dest))
3343 W64 | isFloatType rep -> unitOL (MOV (floatSize W32) (OpReg xmm0) (OpReg r_dest))
3344 _ -> unitOL (MOV (cmmTypeSize rep) (OpReg rax) (OpReg r_dest))
3346 rep = localRegType dest
3347 r_dest = getRegisterReg (CmmLocal dest)
3348 assign_code many = panic "genCCall.assign_code many"
3350 return (load_args_code `appOL`
3353 assign_eax sse_regs `appOL`
3355 assign_code dest_regs)
3358 arg_size = 8 -- always, at the mo
3360 load_args :: [CmmHinted CmmExpr]
3361 -> [Reg] -- int regs avail for args
3362 -> [Reg] -- FP regs avail for args
3364 -> NatM ([CmmHinted CmmExpr],[Reg],[Reg],InstrBlock)
3365 load_args args [] [] code = return (args, [], [], code)
3366 -- no more regs to use
3367 load_args [] aregs fregs code = return ([], aregs, fregs, code)
3368 -- no more args to push
3369 load_args ((CmmHinted arg hint) : rest) aregs fregs code
3370 | isFloatType arg_rep =
3374 arg_code <- getAnyReg arg
3375 load_args rest aregs rs (code `appOL` arg_code r)
3380 arg_code <- getAnyReg arg
3381 load_args rest rs fregs (code `appOL` arg_code r)
3383 arg_rep = cmmExprType arg
3386 (args',ars,frs,code') <- load_args rest aregs fregs code
3387 return ((CmmHinted arg hint):args', ars, frs, code')
3389 push_args [] code = return code
3390 push_args ((CmmHinted arg hint):rest) code
3391 | isFloatType arg_rep = do
3392 (arg_reg, arg_code) <- getSomeReg arg
3393 delta <- getDeltaNat
3394 setDeltaNat (delta-arg_size)
3395 let code' = code `appOL` arg_code `appOL` toOL [
3396 SUB (intSize wordWidth) (OpImm (ImmInt arg_size)) (OpReg rsp) ,
3397 DELTA (delta-arg_size),
3398 MOV (floatSize width) (OpReg arg_reg) (OpAddr (spRel 0))]
3399 push_args rest code'
3402 -- we only ever generate word-sized function arguments. Promotion
3403 -- has already happened: our Int8# type is kept sign-extended
3404 -- in an Int#, for example.
3405 ASSERT(width == W64) return ()
3406 (arg_op, arg_code) <- getOperand arg
3407 delta <- getDeltaNat
3408 setDeltaNat (delta-arg_size)
3409 let code' = code `appOL` toOL [PUSH II64 arg_op,
3410 DELTA (delta-arg_size)]
3411 push_args rest code'
3413 arg_rep = cmmExprType arg
3414 width = typeWidth arg_rep
3417 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3419 #if sparc_TARGET_ARCH
3421 The SPARC calling convention is an absolute
3422 nightmare. The first 6x32 bits of arguments are mapped into
3423 %o0 through %o5, and the remaining arguments are dumped to the
3424 stack, beginning at [%sp+92]. (Note that %o6 == %sp.)
3426 If we have to put args on the stack, move %o6==%sp down by
3427 the number of words to go on the stack, to ensure there's enough space.
3429 According to Fraser and Hanson's lcc book, page 478, fig 17.2,
3430 16 words above the stack pointer is a word for the address of
3431 a structure return value. I use this as a temporary location
3432 for moving values from float to int regs. Certainly it isn't
3433 safe to put anything in the 16 words starting at %sp, since
3434 this area can get trashed at any time due to window overflows
3435 caused by signal handlers.
3437 A final complication (if the above isn't enough) is that
3438 we can't blithely calculate the arguments one by one into
3439 %o0 .. %o5. Consider the following nested calls:
3443 Naive code moves a into %o0, and (fff b c) into %o1. Unfortunately
3444 the inner call will itself use %o0, which trashes the value put there
3445 in preparation for the outer call. Upshot: we need to calculate the
3446 args into temporary regs, and move those to arg regs or onto the
3447 stack only immediately prior to the call proper. Sigh.
3450 genCCall target dest_regs argsAndHints = do
3452 args = map hintlessCmm argsAndHints
3453 argcode_and_vregs <- mapM arg_to_int_vregs args
3455 (argcodes, vregss) = unzip argcode_and_vregs
3456 n_argRegs = length allArgRegs
3457 n_argRegs_used = min (length vregs) n_argRegs
3458 vregs = concat vregss
3459 -- deal with static vs dynamic call targets
3460 callinsns <- (case target of
3461 CmmCallee (CmmLit (CmmLabel lbl)) conv -> do
3462 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3463 CmmCallee expr conv -> do
3464 (dyn_c, [dyn_r]) <- arg_to_int_vregs expr
3465 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3467 (res, reduce) <- outOfLineFloatOp mop
3468 lblOrMopExpr <- case res of
3470 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3472 (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
3473 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3474 if reduce then panic "genCCall(sparc): can not reduce" else return lblOrMopExpr
3478 argcode = concatOL argcodes
3479 (move_sp_down, move_sp_up)
3480 = let diff = length vregs - n_argRegs
3481 nn = if odd diff then diff + 1 else diff -- keep 8-byte alignment
3484 else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
3487 = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
3489 -- assign the results, if necessary
3490 assign_code [] = nilOL
3492 assign_code [CmmHinted dest _hint]
3493 = let rep = localRegType dest
3494 width = typeWidth rep
3495 r_dest = getRegisterReg (CmmLocal dest)
3500 = unitOL $ FMOV FF32 (RealReg $ fReg 0) r_dest
3504 = unitOL $ FMOV FF64 (RealReg $ fReg 0) r_dest
3506 | not $ isFloatType rep
3508 = unitOL $ mkRegRegMoveInstr (RealReg $ oReg 0) r_dest
3512 return (argcode `appOL`
3513 move_sp_down `appOL`
3514 transfer_code `appOL`
3518 assign_code dest_regs)
3520 -- move args from the integer vregs into which they have been
3521 -- marshalled, into %o0 .. %o5, and the rest onto the stack.
3522 move_final :: [Reg] -> [Reg] -> Int -> [Instr]
3524 move_final [] _ offset -- all args done
3527 move_final (v:vs) [] offset -- out of aregs; move to stack
3528 = ST II32 v (spRel offset)
3529 : move_final vs [] (offset+1)
3531 move_final (v:vs) (a:az) offset -- move into an arg (%o[0..5]) reg
3532 = OR False g0 (RIReg v) a
3533 : move_final vs az offset
3535 -- generate code to calculate an argument, and move it into one
3536 -- or two integer vregs.
3537 arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
3538 arg_to_int_vregs arg
3539 | isWord64 (cmmExprType arg)
3541 (ChildCode64 code r_lo) <- iselExpr64 arg
3543 r_hi = getHiVRegFromLo r_lo
3544 return (code, [r_hi, r_lo])
3547 (src, code) <- getSomeReg arg
3548 tmp <- getNewRegNat (cmmTypeSize $ cmmExprType arg)
3550 pk = cmmExprType arg
3551 Just f0_high = fPair f0
3552 case cmmTypeSize pk of
3554 v1 <- getNewRegNat II32
3555 v2 <- getNewRegNat II32
3558 FMOV FF64 src f0 `snocOL`
3559 ST FF32 f0 (spRel 16) `snocOL`
3560 LD II32 (spRel 16) v1 `snocOL`
3561 ST FF32 f0_high (spRel 16) `snocOL`
3562 LD II32 (spRel 16) v2
3567 v1 <- getNewRegNat II32
3570 ST FF32 src (spRel 16) `snocOL`
3571 LD II32 (spRel 16) v1
3576 v1 <- getNewRegNat II32
3578 code `snocOL` OR False g0 (RIReg src) v1
3582 outOfLineFloatOp mop =
3584 dflags <- getDynFlagsNat
3585 mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference $
3586 mkForeignLabel functionName Nothing True
3587 let mopLabelOrExpr = case mopExpr of
3588 CmmLit (CmmLabel lbl) -> Left lbl
3590 return (mopLabelOrExpr, reduce)
3592 (reduce, functionName) = case mop of
3593 MO_F32_Exp -> (True, fsLit "exp")
3594 MO_F32_Log -> (True, fsLit "log")
3595 MO_F32_Sqrt -> (True, fsLit "sqrt")
3597 MO_F32_Sin -> (True, fsLit "sin")
3598 MO_F32_Cos -> (True, fsLit "cos")
3599 MO_F32_Tan -> (True, fsLit "tan")
3601 MO_F32_Asin -> (True, fsLit "asin")
3602 MO_F32_Acos -> (True, fsLit "acos")
3603 MO_F32_Atan -> (True, fsLit "atan")
3605 MO_F32_Sinh -> (True, fsLit "sinh")
3606 MO_F32_Cosh -> (True, fsLit "cosh")
3607 MO_F32_Tanh -> (True, fsLit "tanh")
3609 MO_F64_Exp -> (False, fsLit "exp")
3610 MO_F64_Log -> (False, fsLit "log")
3611 MO_F64_Sqrt -> (False, fsLit "sqrt")
3613 MO_F64_Sin -> (False, fsLit "sin")
3614 MO_F64_Cos -> (False, fsLit "cos")
3615 MO_F64_Tan -> (False, fsLit "tan")
3617 MO_F64_Asin -> (False, fsLit "asin")
3618 MO_F64_Acos -> (False, fsLit "acos")
3619 MO_F64_Atan -> (False, fsLit "atan")
3621 MO_F64_Sinh -> (False, fsLit "sinh")
3622 MO_F64_Cosh -> (False, fsLit "cosh")
3623 MO_F64_Tanh -> (False, fsLit "tanh")
3625 other -> pprPanic "outOfLineFloatOp(sparc) "
3626 (pprCallishMachOp mop)
3628 #endif /* sparc_TARGET_ARCH */
3630 #if powerpc_TARGET_ARCH
3632 #if darwin_TARGET_OS || linux_TARGET_OS
3634 The PowerPC calling convention for Darwin/Mac OS X
3635 is described in Apple's document
3636 "Inside Mac OS X - Mach-O Runtime Architecture".
3638 PowerPC Linux uses the System V Release 4 Calling Convention
3639 for PowerPC. It is described in the
3640 "System V Application Binary Interface PowerPC Processor Supplement".
3642 Both conventions are similar:
3643 Parameters may be passed in general-purpose registers starting at r3, in
3644 floating point registers starting at f1, or on the stack.
3646 But there are substantial differences:
3647 * The number of registers used for parameter passing and the exact set of
3648 nonvolatile registers differs (see MachRegs.lhs).
3649 * On Darwin, stack space is always reserved for parameters, even if they are
3650 passed in registers. The called routine may choose to save parameters from
3651 registers to the corresponding space on the stack.
3652 * On Darwin, a corresponding amount of GPRs is skipped when a floating point
3653 parameter is passed in an FPR.
3654 * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
3655 starting with an odd-numbered GPR. It may skip a GPR to achieve this.
3656 Darwin just treats an I64 like two separate II32s (high word first).
3657 * I64 and FF64 arguments are 8-byte aligned on the stack for SysV, but only
3658 4-byte aligned like everything else on Darwin.
3659 * The SysV spec claims that FF32 is represented as FF64 on the stack. GCC on
3660 PowerPC Linux does not agree, so neither do we.
3662 According to both conventions, The parameter area should be part of the
3663 caller's stack frame, allocated in the caller's prologue code (large enough
3664 to hold the parameter lists for all called routines). The NCG already
3665 uses the stack for register spilling, leaving 64 bytes free at the top.
3666 If we need a larger parameter area than that, we just allocate a new stack
3667 frame just before ccalling.
3671 genCCall (CmmPrim MO_WriteBarrier) _ _
3672 = return $ unitOL LWSYNC
3674 genCCall target dest_regs argsAndHints
3675 = ASSERT (not $ any (`elem` [II8,II16]) $ map cmmTypeSize argReps)
3676 -- we rely on argument promotion in the codeGen
3678 (finalStack,passArgumentsCode,usedRegs) <- passArguments
3680 allArgRegs allFPArgRegs
3684 (labelOrExpr, reduceToFF32) <- case target of
3685 CmmCallee (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
3686 CmmCallee expr conv -> return (Right expr, False)
3687 CmmPrim mop -> outOfLineFloatOp mop
3689 let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
3690 codeAfter = move_sp_up finalStack `appOL` moveResult reduceToFF32
3695 `snocOL` BL lbl usedRegs
3698 (dynReg, dynCode) <- getSomeReg dyn
3700 `snocOL` MTCTR dynReg
3702 `snocOL` BCTRL usedRegs
3705 #if darwin_TARGET_OS
3706 initialStackOffset = 24
3707 -- size of linkage area + size of arguments, in bytes
3708 stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
3709 map (widthInBytes . typeWidth) argReps
3710 #elif linux_TARGET_OS
3711 initialStackOffset = 8
3712 stackDelta finalStack = roundTo 16 finalStack
3714 args = map hintlessCmm argsAndHints
3715 argReps = map cmmExprType args
3717 roundTo a x | x `mod` a == 0 = x
3718 | otherwise = x + a - (x `mod` a)
3720 move_sp_down finalStack
3722 toOL [STU II32 sp (AddrRegImm sp (ImmInt (-delta))),
3725 where delta = stackDelta finalStack
3726 move_sp_up finalStack
3728 toOL [ADD sp sp (RIImm (ImmInt delta)),
3731 where delta = stackDelta finalStack
3734 passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
3735 passArguments ((arg,arg_ty):args) gprs fprs stackOffset
3736 accumCode accumUsed | isWord64 arg_ty =
3738 ChildCode64 code vr_lo <- iselExpr64 arg
3739 let vr_hi = getHiVRegFromLo vr_lo
3741 #if darwin_TARGET_OS
3746 (accumCode `appOL` code
3747 `snocOL` storeWord vr_hi gprs stackOffset
3748 `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
3749 ((take 2 gprs) ++ accumUsed)
3751 storeWord vr (gpr:_) offset = MR gpr vr
3752 storeWord vr [] offset = ST II32 vr (AddrRegImm sp (ImmInt offset))
3754 #elif linux_TARGET_OS
3755 let stackOffset' = roundTo 8 stackOffset
3756 stackCode = accumCode `appOL` code
3757 `snocOL` ST II32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
3758 `snocOL` ST II32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
3759 regCode hireg loreg =
3760 accumCode `appOL` code
3761 `snocOL` MR hireg vr_hi
3762 `snocOL` MR loreg vr_lo
3765 hireg : loreg : regs | even (length gprs) ->
3766 passArguments args regs fprs stackOffset
3767 (regCode hireg loreg) (hireg : loreg : accumUsed)
3768 _skipped : hireg : loreg : regs ->
3769 passArguments args regs fprs stackOffset
3770 (regCode hireg loreg) (hireg : loreg : accumUsed)
3771 _ -> -- only one or no regs left
3772 passArguments args [] fprs (stackOffset'+8)
3776 passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
3777 | reg : _ <- regs = do
3778 register <- getRegister arg
3779 let code = case register of
3780 Fixed _ freg fcode -> fcode `snocOL` MR reg freg
3781 Any _ acode -> acode reg
3785 #if darwin_TARGET_OS
3786 -- The Darwin ABI requires that we reserve stack slots for register parameters
3787 (stackOffset + stackBytes)
3788 #elif linux_TARGET_OS
3789 -- ... the SysV ABI doesn't.
3792 (accumCode `appOL` code)
3795 (vr, code) <- getSomeReg arg
3799 (stackOffset' + stackBytes)
3800 (accumCode `appOL` code `snocOL` ST (cmmTypeSize rep) vr stackSlot)
3803 #if darwin_TARGET_OS
3804 -- stackOffset is at least 4-byte aligned
3805 -- The Darwin ABI is happy with that.
3806 stackOffset' = stackOffset
3808 -- ... the SysV ABI requires 8-byte alignment for doubles.
3809 stackOffset' | isFloatType rep && typeWidth rep == W64 =
3810 roundTo 8 stackOffset
3811 | otherwise = stackOffset
3813 stackSlot = AddrRegImm sp (ImmInt stackOffset')
3814 (nGprs, nFprs, stackBytes, regs) = case cmmTypeSize rep of
3815 II32 -> (1, 0, 4, gprs)
3816 #if darwin_TARGET_OS
3817 -- The Darwin ABI requires that we skip a corresponding number of GPRs when
3819 FF32 -> (1, 1, 4, fprs)
3820 FF64 -> (2, 1, 8, fprs)
3821 #elif linux_TARGET_OS
3822 -- ... the SysV ABI doesn't.
3823 FF32 -> (0, 1, 4, fprs)
3824 FF64 -> (0, 1, 8, fprs)
3827 moveResult reduceToFF32 =
3830 [CmmHinted dest _hint]
3831 | reduceToFF32 && isFloat32 rep -> unitOL (FRSP r_dest f1)
3832 | isFloat32 rep || isFloat64 rep -> unitOL (MR r_dest f1)
3833 | isWord64 rep -> toOL [MR (getHiVRegFromLo r_dest) r3,
3835 | otherwise -> unitOL (MR r_dest r3)
3836 where rep = cmmRegType (CmmLocal dest)
3837 r_dest = getRegisterReg (CmmLocal dest)
3839 outOfLineFloatOp mop =
3841 dflags <- getDynFlagsNat
3842 mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference $
3843 mkForeignLabel functionName Nothing True
3844 let mopLabelOrExpr = case mopExpr of
3845 CmmLit (CmmLabel lbl) -> Left lbl
3847 return (mopLabelOrExpr, reduce)
3849 (functionName, reduce) = case mop of
3850 MO_F32_Exp -> (fsLit "exp", True)
3851 MO_F32_Log -> (fsLit "log", True)
3852 MO_F32_Sqrt -> (fsLit "sqrt", True)
3854 MO_F32_Sin -> (fsLit "sin", True)
3855 MO_F32_Cos -> (fsLit "cos", True)
3856 MO_F32_Tan -> (fsLit "tan", True)
3858 MO_F32_Asin -> (fsLit "asin", True)
3859 MO_F32_Acos -> (fsLit "acos", True)
3860 MO_F32_Atan -> (fsLit "atan", True)
3862 MO_F32_Sinh -> (fsLit "sinh", True)
3863 MO_F32_Cosh -> (fsLit "cosh", True)
3864 MO_F32_Tanh -> (fsLit "tanh", True)
3865 MO_F32_Pwr -> (fsLit "pow", True)
3867 MO_F64_Exp -> (fsLit "exp", False)
3868 MO_F64_Log -> (fsLit "log", False)
3869 MO_F64_Sqrt -> (fsLit "sqrt", False)
3871 MO_F64_Sin -> (fsLit "sin", False)
3872 MO_F64_Cos -> (fsLit "cos", False)
3873 MO_F64_Tan -> (fsLit "tan", False)
3875 MO_F64_Asin -> (fsLit "asin", False)
3876 MO_F64_Acos -> (fsLit "acos", False)
3877 MO_F64_Atan -> (fsLit "atan", False)
3879 MO_F64_Sinh -> (fsLit "sinh", False)
3880 MO_F64_Cosh -> (fsLit "cosh", False)
3881 MO_F64_Tanh -> (fsLit "tanh", False)
3882 MO_F64_Pwr -> (fsLit "pow", False)
3883 other -> pprPanic "genCCall(ppc): unknown callish op"
3884 (pprCallishMachOp other)
3886 #endif /* darwin_TARGET_OS || linux_TARGET_OS */
3888 #endif /* powerpc_TARGET_ARCH */
3891 -- -----------------------------------------------------------------------------
3892 -- Generating a table-branch
3894 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
3896 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3900 (reg,e_code) <- getSomeReg expr
3901 lbl <- getNewLabelNat
3902 dflags <- getDynFlagsNat
3903 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
3904 (tableReg,t_code) <- getSomeReg $ dynRef
3906 jumpTable = map jumpTableEntryRel ids
3908 jumpTableEntryRel Nothing
3909 = CmmStaticLit (CmmInt 0 wordWidth)
3910 jumpTableEntryRel (Just (BlockId id))
3911 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3912 where blockLabel = mkAsmTempLabel id
3914 op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
3915 (EAIndex reg wORD_SIZE) (ImmInt 0))
3917 #if x86_64_TARGET_ARCH
3918 #if darwin_TARGET_OS
3919 -- on Mac OS X/x86_64, put the jump table in the text section
3920 -- to work around a limitation of the linker.
3921 -- ld64 is unable to handle the relocations for
3923 -- if L0 is not preceded by a non-anonymous label in its section.
3925 code = e_code `appOL` t_code `appOL` toOL [
3926 ADD (intSize wordWidth) op (OpReg tableReg),
3927 JMP_TBL (OpReg tableReg) [ id | Just id <- ids ],
3928 LDATA Text (CmmDataLabel lbl : jumpTable)
3931 -- HACK: On x86_64 binutils<2.17 is only able to generate PC32
3932 -- relocations, hence we only get 32-bit offsets in the jump
3933 -- table. As these offsets are always negative we need to properly
3934 -- sign extend them to 64-bit. This hack should be removed in
3935 -- conjunction with the hack in PprMach.hs/pprDataItem once
3936 -- binutils 2.17 is standard.
3937 code = e_code `appOL` t_code `appOL` toOL [
3938 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3940 (OpAddr (AddrBaseIndex (EABaseReg tableReg)
3941 (EAIndex reg wORD_SIZE) (ImmInt 0)))
3943 ADD (intSize wordWidth) (OpReg reg) (OpReg tableReg),
3944 JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
3948 code = e_code `appOL` t_code `appOL` toOL [
3949 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3950 ADD (intSize wordWidth) op (OpReg tableReg),
3951 JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
3957 (reg,e_code) <- getSomeReg expr
3958 lbl <- getNewLabelNat
3960 jumpTable = map jumpTableEntry ids
3961 op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
3962 code = e_code `appOL` toOL [
3963 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3964 JMP_TBL op [ id | Just id <- ids ]
3968 #elif powerpc_TARGET_ARCH
3972 (reg,e_code) <- getSomeReg expr
3973 tmp <- getNewRegNat II32
3974 lbl <- getNewLabelNat
3975 dflags <- getDynFlagsNat
3976 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
3977 (tableReg,t_code) <- getSomeReg $ dynRef
3979 jumpTable = map jumpTableEntryRel ids
3981 jumpTableEntryRel Nothing
3982 = CmmStaticLit (CmmInt 0 wordWidth)
3983 jumpTableEntryRel (Just (BlockId id))
3984 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
3985 where blockLabel = mkAsmTempLabel id
3987 code = e_code `appOL` t_code `appOL` toOL [
3988 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
3989 SLW tmp reg (RIImm (ImmInt 2)),
3990 LD II32 tmp (AddrRegReg tableReg tmp),
3991 ADD tmp tmp (RIReg tableReg),
3993 BCTR [ id | Just id <- ids ]
3998 (reg,e_code) <- getSomeReg expr
3999 tmp <- getNewRegNat II32
4000 lbl <- getNewLabelNat
4002 jumpTable = map jumpTableEntry ids
4004 code = e_code `appOL` toOL [
4005 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
4006 SLW tmp reg (RIImm (ImmInt 2)),
4007 ADDIS tmp tmp (HA (ImmCLbl lbl)),
4008 LD II32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
4010 BCTR [ id | Just id <- ids ]
4013 #elif sparc_TARGET_ARCH
4016 = error "MachCodeGen: sparc genSwitch PIC not finished\n"
4019 = error "MachCodeGen: sparc genSwitch non-PIC not finished\n"
4021 #error "ToDo: genSwitch"
4024 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordWidth)
4025 jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
4026 where blockLabel = mkAsmTempLabel id
4028 -- -----------------------------------------------------------------------------
4030 -- -----------------------------------------------------------------------------
4033 -- -----------------------------------------------------------------------------
4034 -- 'condIntReg' and 'condFltReg': condition codes into registers
4036 -- Turn those condition codes into integers now (when they appear on
4037 -- the right hand side of an assignment).
4039 -- (If applicable) Do not fill the delay slots here; you will confuse the
4040 -- register allocator.
4042 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
4044 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4046 #if alpha_TARGET_ARCH
4047 condIntReg = panic "MachCode.condIntReg (not on Alpha)"
4048 condFltReg = panic "MachCode.condFltReg (not on Alpha)"
4049 #endif /* alpha_TARGET_ARCH */
4051 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4053 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4055 condIntReg cond x y = do
4056 CondCode _ cond cond_code <- condIntCode cond x y
4057 tmp <- getNewRegNat II8
4059 code dst = cond_code `appOL` toOL [
4060 SETCC cond (OpReg tmp),
4061 MOVZxL II8 (OpReg tmp) (OpReg dst)
4064 return (Any II32 code)
4068 #if i386_TARGET_ARCH
4070 condFltReg cond x y = do
4071 CondCode _ cond cond_code <- condFltCode cond x y
4072 tmp <- getNewRegNat II8
4074 code dst = cond_code `appOL` toOL [
4075 SETCC cond (OpReg tmp),
4076 MOVZxL II8 (OpReg tmp) (OpReg dst)
4079 return (Any II32 code)
4083 #if x86_64_TARGET_ARCH
4085 condFltReg cond x y = do
4086 CondCode _ cond cond_code <- condFltCode cond x y
4087 tmp1 <- getNewRegNat wordSize
4088 tmp2 <- getNewRegNat wordSize
4090 -- We have to worry about unordered operands (eg. comparisons
4091 -- against NaN). If the operands are unordered, the comparison
4092 -- sets the parity flag, carry flag and zero flag.
4093 -- All comparisons are supposed to return false for unordered
4094 -- operands except for !=, which returns true.
4096 -- Optimisation: we don't have to test the parity flag if we
4097 -- know the test has already excluded the unordered case: eg >
4098 -- and >= test for a zero carry flag, which can only occur for
4099 -- ordered operands.
4101 -- ToDo: by reversing comparisons we could avoid testing the
4102 -- parity flag in more cases.
4107 NE -> or_unordered dst
4108 GU -> plain_test dst
4109 GEU -> plain_test dst
4110 _ -> and_ordered dst)
4112 plain_test dst = toOL [
4113 SETCC cond (OpReg tmp1),
4114 MOVZxL II8 (OpReg tmp1) (OpReg dst)
4116 or_unordered dst = toOL [
4117 SETCC cond (OpReg tmp1),
4118 SETCC PARITY (OpReg tmp2),
4119 OR II8 (OpReg tmp1) (OpReg tmp2),
4120 MOVZxL II8 (OpReg tmp2) (OpReg dst)
4122 and_ordered dst = toOL [
4123 SETCC cond (OpReg tmp1),
4124 SETCC NOTPARITY (OpReg tmp2),
4125 AND II8 (OpReg tmp1) (OpReg tmp2),
4126 MOVZxL II8 (OpReg tmp2) (OpReg dst)
4129 return (Any II32 code)
4133 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4135 #if sparc_TARGET_ARCH
4137 condIntReg EQQ x (CmmLit (CmmInt 0 d)) = do
4138 (src, code) <- getSomeReg x
4139 tmp <- getNewRegNat II32
4141 code__2 dst = code `appOL` toOL [
4142 SUB False True g0 (RIReg src) g0,
4143 SUB True False g0 (RIImm (ImmInt (-1))) dst]
4144 return (Any II32 code__2)
4146 condIntReg EQQ x y = do
4147 (src1, code1) <- getSomeReg x
4148 (src2, code2) <- getSomeReg y
4149 tmp1 <- getNewRegNat II32
4150 tmp2 <- getNewRegNat II32
4152 code__2 dst = code1 `appOL` code2 `appOL` toOL [
4153 XOR False src1 (RIReg src2) dst,
4154 SUB False True g0 (RIReg dst) g0,
4155 SUB True False g0 (RIImm (ImmInt (-1))) dst]
4156 return (Any II32 code__2)
4158 condIntReg NE x (CmmLit (CmmInt 0 d)) = do
4159 (src, code) <- getSomeReg x
4160 tmp <- getNewRegNat II32
4162 code__2 dst = code `appOL` toOL [
4163 SUB False True g0 (RIReg src) g0,
4164 ADD True False g0 (RIImm (ImmInt 0)) dst]
4165 return (Any II32 code__2)
4167 condIntReg NE x y = do
4168 (src1, code1) <- getSomeReg x
4169 (src2, code2) <- getSomeReg y
4170 tmp1 <- getNewRegNat II32
4171 tmp2 <- getNewRegNat II32
4173 code__2 dst = code1 `appOL` code2 `appOL` toOL [
4174 XOR False src1 (RIReg src2) dst,
4175 SUB False True g0 (RIReg dst) g0,
4176 ADD True False g0 (RIImm (ImmInt 0)) dst]
4177 return (Any II32 code__2)
4179 condIntReg cond x y = do
4180 bid1@(BlockId lbl1) <- getBlockIdNat
4181 bid2@(BlockId lbl2) <- getBlockIdNat
4182 CondCode _ cond cond_code <- condIntCode cond x y
4184 code__2 dst = cond_code `appOL` toOL [
4185 BI cond False bid1, NOP,
4186 OR False g0 (RIImm (ImmInt 0)) dst,
4187 BI ALWAYS False bid2, NOP,
4189 OR False g0 (RIImm (ImmInt 1)) dst,
4191 return (Any II32 code__2)
4193 condFltReg cond x y = do
4194 bid1@(BlockId lbl1) <- getBlockIdNat
4195 bid2@(BlockId lbl2) <- getBlockIdNat
4196 CondCode _ cond cond_code <- condFltCode cond x y
4198 code__2 dst = cond_code `appOL` toOL [
4200 BF cond False bid1, NOP,
4201 OR False g0 (RIImm (ImmInt 0)) dst,
4202 BI ALWAYS False bid2, NOP,
4204 OR False g0 (RIImm (ImmInt 1)) dst,
4206 return (Any II32 code__2)
4208 #endif /* sparc_TARGET_ARCH */
4210 #if powerpc_TARGET_ARCH
4211 condReg getCond = do
4212 lbl1 <- getBlockIdNat
4213 lbl2 <- getBlockIdNat
4214 CondCode _ cond cond_code <- getCond
4216 {- code dst = cond_code `appOL` toOL [
4225 code dst = cond_code
4229 RLWINM dst dst (bit + 1) 31 31
4232 negate_code | do_negate = unitOL (CRNOR bit bit bit)
4235 (bit, do_negate) = case cond of
4249 return (Any II32 code)
4251 condIntReg cond x y = condReg (condIntCode cond x y)
4252 condFltReg cond x y = condReg (condFltCode cond x y)
4253 #endif /* powerpc_TARGET_ARCH */
4256 -- -----------------------------------------------------------------------------
4257 -- 'trivial*Code': deal with trivial instructions
4259 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
4260 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
4261 -- Only look for constants on the right hand side, because that's
4262 -- where the generic optimizer will have put them.
4264 -- Similarly, for unary instructions, we don't have to worry about
4265 -- matching an StInt as the argument, because genericOpt will already
4266 -- have handled the constant-folding.
4269 :: Width -- Int only
4270 -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
4271 ,IF_ARCH_i386 ((Operand -> Operand -> Instr)
4272 -> Maybe (Operand -> Operand -> Instr)
4273 ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr)
4274 -> Maybe (Operand -> Operand -> Instr)
4275 ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
4276 ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
4278 -> CmmExpr -> CmmExpr -- the two arguments
4281 #ifndef powerpc_TARGET_ARCH
4283 :: Width -- Floating point only
4284 -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
4285 ,IF_ARCH_sparc((Size -> Reg -> Reg -> Reg -> Instr)
4286 ,IF_ARCH_i386 ((Size -> Reg -> Reg -> Reg -> Instr)
4287 ,IF_ARCH_x86_64 ((Size -> Operand -> Operand -> Instr)
4289 -> CmmExpr -> CmmExpr -- the two arguments
4295 -> IF_ARCH_alpha((RI -> Reg -> Instr)
4296 ,IF_ARCH_i386 ((Operand -> Instr)
4297 ,IF_ARCH_x86_64 ((Operand -> Instr)
4298 ,IF_ARCH_sparc((RI -> Reg -> Instr)
4299 ,IF_ARCH_powerpc((Reg -> Reg -> Instr)
4301 -> CmmExpr -- the one argument
4304 #ifndef powerpc_TARGET_ARCH
4307 -> IF_ARCH_alpha((Reg -> Reg -> Instr)
4308 ,IF_ARCH_i386 ((Reg -> Reg -> Instr)
4309 ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr)
4310 ,IF_ARCH_sparc((Reg -> Reg -> Instr)
4312 -> CmmExpr -- the one argument
4316 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4318 #if alpha_TARGET_ARCH
4320 trivialCode instr x (StInt y)
4322 = getRegister x `thenNat` \ register ->
4323 getNewRegNat IntRep `thenNat` \ tmp ->
4325 code = registerCode register tmp
4326 src1 = registerName register tmp
4327 src2 = ImmInt (fromInteger y)
4328 code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst)
4330 return (Any IntRep code__2)
4332 trivialCode instr x y
4333 = getRegister x `thenNat` \ register1 ->
4334 getRegister y `thenNat` \ register2 ->
4335 getNewRegNat IntRep `thenNat` \ tmp1 ->
4336 getNewRegNat IntRep `thenNat` \ tmp2 ->
4338 code1 = registerCode register1 tmp1 []
4339 src1 = registerName register1 tmp1
4340 code2 = registerCode register2 tmp2 []
4341 src2 = registerName register2 tmp2
4342 code__2 dst = asmSeqThen [code1, code2] .
4343 mkSeqInstr (instr src1 (RIReg src2) dst)
4345 return (Any IntRep code__2)
4348 trivialUCode instr x
4349 = getRegister x `thenNat` \ register ->
4350 getNewRegNat IntRep `thenNat` \ tmp ->
4352 code = registerCode register tmp
4353 src = registerName register tmp
4354 code__2 dst = code . mkSeqInstr (instr (RIReg src) dst)
4356 return (Any IntRep code__2)
4359 trivialFCode _ instr x y
4360 = getRegister x `thenNat` \ register1 ->
4361 getRegister y `thenNat` \ register2 ->
4362 getNewRegNat FF64 `thenNat` \ tmp1 ->
4363 getNewRegNat FF64 `thenNat` \ tmp2 ->
4365 code1 = registerCode register1 tmp1
4366 src1 = registerName register1 tmp1
4368 code2 = registerCode register2 tmp2
4369 src2 = registerName register2 tmp2
4371 code__2 dst = asmSeqThen [code1 [], code2 []] .
4372 mkSeqInstr (instr src1 src2 dst)
4374 return (Any FF64 code__2)
4376 trivialUFCode _ instr x
4377 = getRegister x `thenNat` \ register ->
4378 getNewRegNat FF64 `thenNat` \ tmp ->
4380 code = registerCode register tmp
4381 src = registerName register tmp
4382 code__2 dst = code . mkSeqInstr (instr src dst)
4384 return (Any FF64 code__2)
4386 #endif /* alpha_TARGET_ARCH */
4388 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4390 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4393 The Rules of the Game are:
4395 * You cannot assume anything about the destination register dst;
4396 it may be anything, including a fixed reg.
4398 * You may compute an operand into a fixed reg, but you may not
4399 subsequently change the contents of that fixed reg. If you
4400 want to do so, first copy the value either to a temporary
4401 or into dst. You are free to modify dst even if it happens
4402 to be a fixed reg -- that's not your problem.
4404 * You cannot assume that a fixed reg will stay live over an
4405 arbitrary computation. The same applies to the dst reg.
4407 * Temporary regs obtained from getNewRegNat are distinct from
4408 each other and from all other regs, and stay live over
4409 arbitrary computations.
4411 --------------------
4413 SDM's version of The Rules:
4415 * If getRegister returns Any, that means it can generate correct
4416 code which places the result in any register, period. Even if that
4417 register happens to be read during the computation.
4419 Corollary #1: this means that if you are generating code for an
4420 operation with two arbitrary operands, you cannot assign the result
4421 of the first operand into the destination register before computing
4422 the second operand. The second operand might require the old value
4423 of the destination register.
4425 Corollary #2: A function might be able to generate more efficient
4426 code if it knows the destination register is a new temporary (and
4427 therefore not read by any of the sub-computations).
4429 * If getRegister returns Any, then the code it generates may modify only:
4430 (a) fresh temporaries
4431 (b) the destination register
4432 (c) known registers (eg. %ecx is used by shifts)
4433 In particular, it may *not* modify global registers, unless the global
4434 register happens to be the destination register.
4437 trivialCode width instr (Just revinstr) (CmmLit lit_a) b
4438 | is32BitLit lit_a = do
4439 b_code <- getAnyReg b
4442 = b_code dst `snocOL`
4443 revinstr (OpImm (litToImm lit_a)) (OpReg dst)
4445 return (Any (intSize width) code)
4447 trivialCode width instr maybe_revinstr a b
4448 = genTrivialCode (intSize width) instr a b
4450 -- This is re-used for floating pt instructions too.
4451 genTrivialCode rep instr a b = do
4452 (b_op, b_code) <- getNonClobberedOperand b
4453 a_code <- getAnyReg a
4454 tmp <- getNewRegNat rep
4456 -- We want the value of b to stay alive across the computation of a.
4457 -- But, we want to calculate a straight into the destination register,
4458 -- because the instruction only has two operands (dst := dst `op` src).
4459 -- The troublesome case is when the result of b is in the same register
4460 -- as the destination reg. In this case, we have to save b in a
4461 -- new temporary across the computation of a.
4463 | dst `regClashesWithOp` b_op =
4465 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
4467 instr (OpReg tmp) (OpReg dst)
4471 instr b_op (OpReg dst)
4473 return (Any rep code)
4475 reg `regClashesWithOp` OpReg reg2 = reg == reg2
4476 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
4477 reg `regClashesWithOp` _ = False
4481 trivialUCode rep instr x = do
4482 x_code <- getAnyReg x
4487 return (Any rep code)
4491 #if i386_TARGET_ARCH
4493 trivialFCode width instr x y = do
4494 (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
4495 (y_reg, y_code) <- getSomeReg y
4497 size = floatSize width
4501 instr size x_reg y_reg dst
4502 return (Any size code)
4506 #if x86_64_TARGET_ARCH
4507 trivialFCode pk instr x y
4508 = genTrivialCode size (instr size) x y
4509 where size = floatSize pk
4514 trivialUFCode size instr x = do
4515 (x_reg, x_code) <- getSomeReg x
4521 return (Any size code)
4523 #endif /* i386_TARGET_ARCH */
4525 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4527 #if sparc_TARGET_ARCH
4529 trivialCode pk instr x (CmmLit (CmmInt y d))
4532 (src1, code) <- getSomeReg x
4533 tmp <- getNewRegNat II32
4535 src2 = ImmInt (fromInteger y)
4536 code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
4537 return (Any II32 code__2)
4539 trivialCode pk instr x y = do
4540 (src1, code1) <- getSomeReg x
4541 (src2, code2) <- getSomeReg y
4542 tmp1 <- getNewRegNat II32
4543 tmp2 <- getNewRegNat II32
4545 code__2 dst = code1 `appOL` code2 `snocOL`
4546 instr src1 (RIReg src2) dst
4547 return (Any II32 code__2)
4550 trivialFCode pk instr x y = do
4551 (src1, code1) <- getSomeReg x
4552 (src2, code2) <- getSomeReg y
4553 tmp1 <- getNewRegNat (cmmTypeSize $ cmmExprType x)
4554 tmp2 <- getNewRegNat (cmmTypeSize $ cmmExprType y)
4555 tmp <- getNewRegNat FF64
4557 promote x = FxTOy FF32 FF64 x tmp
4563 if pk1 `cmmEqType` pk2 then
4564 code1 `appOL` code2 `snocOL`
4565 instr (floatSize pk) src1 src2 dst
4566 else if typeWidth pk1 == W32 then
4567 code1 `snocOL` promote src1 `appOL` code2 `snocOL`
4568 instr FF64 tmp src2 dst
4570 code1 `appOL` code2 `snocOL` promote src2 `snocOL`
4571 instr FF64 src1 tmp dst
4572 return (Any (cmmTypeSize $ if pk1 `cmmEqType` pk2 then pk1 else cmmFloat W64)
4576 trivialUCode size instr x = do
4577 (src, code) <- getSomeReg x
4578 tmp <- getNewRegNat size
4580 code__2 dst = code `snocOL` instr (RIReg src) dst
4581 return (Any size code__2)
4584 trivialUFCode pk instr x = do
4585 (src, code) <- getSomeReg x
4586 tmp <- getNewRegNat pk
4588 code__2 dst = code `snocOL` instr src dst
4589 return (Any pk code__2)
4591 #endif /* sparc_TARGET_ARCH */
4593 #if powerpc_TARGET_ARCH
4596 Wolfgang's PowerPC version of The Rules:
4598 A slightly modified version of The Rules to take advantage of the fact
4599 that PowerPC instructions work on all registers and don't implicitly
4600 clobber any fixed registers.
4602 * The only expression for which getRegister returns Fixed is (CmmReg reg).
4604 * If getRegister returns Any, then the code it generates may modify only:
4605 (a) fresh temporaries
4606 (b) the destination register
4607 It may *not* modify global registers, unless the global
4608 register happens to be the destination register.
4609 It may not clobber any other registers. In fact, only ccalls clobber any
4611 Also, it may not modify the counter register (used by genCCall).
4613 Corollary: If a getRegister for a subexpression returns Fixed, you need
4614 not move it to a fresh temporary before evaluating the next subexpression.
4615 The Fixed register won't be modified.
4616 Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
4618 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
4619 the value of the destination register.
4622 trivialCode rep signed instr x (CmmLit (CmmInt y _))
4623 | Just imm <- makeImmediate rep signed y
4625 (src1, code1) <- getSomeReg x
4626 let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
4627 return (Any (intSize rep) code)
4629 trivialCode rep signed instr x y = do
4630 (src1, code1) <- getSomeReg x
4631 (src2, code2) <- getSomeReg y
4632 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
4633 return (Any (intSize rep) code)
4635 trivialCodeNoImm' :: Size -> (Reg -> Reg -> Reg -> Instr)
4636 -> CmmExpr -> CmmExpr -> NatM Register
4637 trivialCodeNoImm' size instr x y = do
4638 (src1, code1) <- getSomeReg x
4639 (src2, code2) <- getSomeReg y
4640 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
4641 return (Any size code)
4643 trivialCodeNoImm :: Size -> (Size -> Reg -> Reg -> Reg -> Instr)
4644 -> CmmExpr -> CmmExpr -> NatM Register
4645 trivialCodeNoImm size instr x y = trivialCodeNoImm' size (instr size) x y
4647 trivialUCode rep instr x = do
4648 (src, code) <- getSomeReg x
4649 let code' dst = code `snocOL` instr dst src
4650 return (Any rep code')
4652 -- There is no "remainder" instruction on the PPC, so we have to do
4654 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
4656 remainderCode :: Width -> (Reg -> Reg -> Reg -> Instr)
4657 -> CmmExpr -> CmmExpr -> NatM Register
4658 remainderCode rep div x y = do
4659 (src1, code1) <- getSomeReg x
4660 (src2, code2) <- getSomeReg y
4661 let code dst = code1 `appOL` code2 `appOL` toOL [
4663 MULLW dst dst (RIReg src2),
4666 return (Any (intSize rep) code)
4668 #endif /* powerpc_TARGET_ARCH */
4671 -- -----------------------------------------------------------------------------
4672 -- Coercing to/from integer/floating-point...
4674 -- When going to integer, we truncate (round towards 0).
4676 -- @coerce(Int2FP|FP2Int)@ are more complicated integer/float
4677 -- conversions. We have to store temporaries in memory to move
4678 -- between the integer and the floating point register sets.
4680 -- @coerceDbl2Flt@ and @coerceFlt2Dbl@ are done this way because we
4681 -- pretend, on sparc at least, that double and float regs are seperate
4682 -- kinds, so the value has to be computed into one kind before being
4683 -- explicitly "converted" to live in the other kind.
4685 coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
4686 coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
4688 #if sparc_TARGET_ARCH
4689 coerceDbl2Flt :: CmmExpr -> NatM Register
4690 coerceFlt2Dbl :: CmmExpr -> NatM Register
4693 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4695 #if alpha_TARGET_ARCH
4698 = getRegister x `thenNat` \ register ->
4699 getNewRegNat IntRep `thenNat` \ reg ->
4701 code = registerCode register reg
4702 src = registerName register reg
4704 code__2 dst = code . mkSeqInstrs [
4706 LD TF dst (spRel 0),
4709 return (Any FF64 code__2)
4713 = getRegister x `thenNat` \ register ->
4714 getNewRegNat FF64 `thenNat` \ tmp ->
4716 code = registerCode register tmp
4717 src = registerName register tmp
4719 code__2 dst = code . mkSeqInstrs [
4721 ST TF tmp (spRel 0),
4724 return (Any IntRep code__2)
4726 #endif /* alpha_TARGET_ARCH */
4728 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4730 #if i386_TARGET_ARCH
4732 coerceInt2FP from to x = do
4733 (x_reg, x_code) <- getSomeReg x
4735 opc = case to of W32 -> GITOF; W64 -> GITOD
4736 code dst = x_code `snocOL` opc x_reg dst
4737 -- ToDo: works for non-II32 reps?
4738 return (Any (floatSize to) code)
4742 coerceFP2Int from to x = do
4743 (x_reg, x_code) <- getSomeReg x
4745 opc = case from of W32 -> GFTOI; W64 -> GDTOI
4746 code dst = x_code `snocOL` opc x_reg dst
4747 -- ToDo: works for non-II32 reps?
4749 return (Any (intSize to) code)
4751 #endif /* i386_TARGET_ARCH */
4753 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4755 #if x86_64_TARGET_ARCH
4757 coerceFP2Int from to x = do
4758 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
4760 opc = case from of W32 -> CVTTSS2SIQ; W64 -> CVTTSD2SIQ
4761 code dst = x_code `snocOL` opc x_op dst
4763 return (Any (intSize to) code) -- works even if the destination rep is <II32
4765 coerceInt2FP from to x = do
4766 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
4768 opc = case to of W32 -> CVTSI2SS; W64 -> CVTSI2SD
4769 code dst = x_code `snocOL` opc x_op dst
4771 return (Any (floatSize to) code) -- works even if the destination rep is <II32
4773 coerceFP2FP :: Width -> CmmExpr -> NatM Register
4774 coerceFP2FP to x = do
4775 (x_reg, x_code) <- getSomeReg x
4777 opc = case to of W32 -> CVTSD2SS; W64 -> CVTSS2SD
4778 code dst = x_code `snocOL` opc x_reg dst
4780 return (Any (floatSize to) code)
4783 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4785 #if sparc_TARGET_ARCH
4787 coerceInt2FP width1 width2 x = do
4788 (src, code) <- getSomeReg x
4790 code__2 dst = code `appOL` toOL [
4791 ST (intSize width1) src (spRel (-2)),
4792 LD (intSize width1) (spRel (-2)) dst,
4793 FxTOy (intSize width1) (floatSize width2) dst dst]
4794 return (Any (floatSize $ width2) code__2)
4797 coerceFP2Int width1 width2 x = do
4798 let pk = intSize width1
4799 fprep = floatSize width2
4801 (src, code) <- getSomeReg x
4802 reg <- getNewRegNat fprep
4803 tmp <- getNewRegNat pk
4805 code__2 dst = ASSERT(fprep == FF64 || fprep == FF32)
4807 FxTOy fprep pk src tmp,
4808 ST pk tmp (spRel (-2)),
4809 LD pk (spRel (-2)) dst]
4810 return (Any pk code__2)
4813 coerceDbl2Flt x = do
4814 (src, code) <- getSomeReg x
4815 return (Any FF32 (\dst -> code `snocOL` FxTOy FF64 FF32 src dst))
4818 coerceFlt2Dbl x = do
4819 (src, code) <- getSomeReg x
4820 return (Any FF64 (\dst -> code `snocOL` FxTOy FF32 FF64 src dst))
4822 #endif /* sparc_TARGET_ARCH */
4824 #if powerpc_TARGET_ARCH
4825 coerceInt2FP fromRep toRep x = do
4826 (src, code) <- getSomeReg x
4827 lbl <- getNewLabelNat
4828 itmp <- getNewRegNat II32
4829 ftmp <- getNewRegNat FF64
4830 dflags <- getDynFlagsNat
4831 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
4832 Amode addr addr_code <- getAmode dynRef
4834 code' dst = code `appOL` maybe_exts `appOL` toOL [
4837 CmmStaticLit (CmmInt 0x43300000 W32),
4838 CmmStaticLit (CmmInt 0x80000000 W32)],
4839 XORIS itmp src (ImmInt 0x8000),
4840 ST II32 itmp (spRel 3),
4841 LIS itmp (ImmInt 0x4330),
4842 ST II32 itmp (spRel 2),
4843 LD FF64 ftmp (spRel 2)
4844 ] `appOL` addr_code `appOL` toOL [
4846 FSUB FF64 dst ftmp dst
4847 ] `appOL` maybe_frsp dst
4849 maybe_exts = case fromRep of
4850 W8 -> unitOL $ EXTS II8 src src
4851 W16 -> unitOL $ EXTS II16 src src
4853 maybe_frsp dst = case toRep of
4854 W32 -> unitOL $ FRSP dst dst
4856 return (Any (floatSize toRep) code')
4858 coerceFP2Int fromRep toRep x = do
4859 -- the reps don't really matter: F*->FF64 and II32->I* are no-ops
4860 (src, code) <- getSomeReg x
4861 tmp <- getNewRegNat FF64
4863 code' dst = code `appOL` toOL [
4864 -- convert to int in FP reg
4866 -- store value (64bit) from FP to stack
4867 ST FF64 tmp (spRel 2),
4868 -- read low word of value (high word is undefined)
4869 LD II32 dst (spRel 3)]
4870 return (Any (intSize toRep) code')
4871 #endif /* powerpc_TARGET_ARCH */
4874 -- -----------------------------------------------------------------------------
4875 -- eXTRA_STK_ARGS_HERE
4877 -- We (allegedly) put the first six C-call arguments in registers;
4878 -- where do we start putting the rest of them?
4880 -- Moved from MachInstrs (SDM):
4882 #if alpha_TARGET_ARCH || sparc_TARGET_ARCH
4883 eXTRA_STK_ARGS_HERE :: Int
4885 = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???))