2 -- The above warning supression flag is a temporary kludge.
3 -- While working on this module you are encouraged to remove it and fix
4 -- any warnings in the module. See
5 -- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
8 -----------------------------------------------------------------------------
10 -- Generating machine code (instruction selection)
12 -- (c) The University of Glasgow 1996-2004
14 -----------------------------------------------------------------------------
16 -- This is a big module, but, if you pay attention to
17 -- (a) the sectioning, (b) the type signatures, and
18 -- (c) the #if blah_TARGET_ARCH} things, the
19 -- structure should not be too overwhelming.
21 module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where
23 #include "HsVersions.h"
24 #include "nativeGen/NCG.h"
31 import PositionIndependentCode
32 import RegAllocInfo ( mkBranchInstr, mkRegRegMoveInstr )
36 -- Our intermediate code:
38 import PprCmm ( pprExpr )
41 import ClosureInfo ( C_SRT(..) )
44 import StaticFlags ( opt_PIC )
45 import ForeignCall ( CCallConv(..) )
48 import qualified Outputable as O
51 import FastBool ( isFastTrue )
52 import Constants ( wORD_SIZE )
54 import Debug.Trace ( trace )
56 import Control.Monad ( mapAndUnzipM )
57 import Data.Maybe ( fromJust )
63 -- -----------------------------------------------------------------------------
64 -- Top-level of the instruction selector
66 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
67 -- They are really trees of insns to facilitate fast appending, where a
68 -- left-to-right traversal (pre-order?) yields the insns in the correct
71 type InstrBlock = OrdList Instr
73 cmmTopCodeGen :: RawCmmTop -> NatM [NatCmmTop]
74 cmmTopCodeGen (CmmProc info lab params (ListGraph blocks)) = do
75 (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
76 picBaseMb <- getPicBaseMaybeNat
77 let proc = CmmProc info lab params (ListGraph $ concat nat_blocks)
78 tops = proc : concat statics
80 Just picBase -> initializePicBase picBase tops
81 Nothing -> return tops
83 cmmTopCodeGen (CmmData sec dat) = do
84 return [CmmData sec dat] -- no translation, we just use CmmStatic
86 basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop])
87 basicBlockCodeGen (BasicBlock id stmts) = do
88 instrs <- stmtsToInstrs stmts
89 -- code generation may introduce new basic block boundaries, which
90 -- are indicated by the NEWBLOCK instruction. We must split up the
91 -- instruction stream into basic blocks again. Also, we extract
94 (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
96 mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
97 = ([], BasicBlock id instrs : blocks, statics)
98 mkBlocks (LDATA sec dat) (instrs,blocks,statics)
99 = (instrs, blocks, CmmData sec dat:statics)
100 mkBlocks instr (instrs,blocks,statics)
101 = (instr:instrs, blocks, statics)
103 return (BasicBlock id top : other_blocks, statics)
105 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
107 = do instrss <- mapM stmtToInstrs stmts
108 return (concatOL instrss)
110 stmtToInstrs :: CmmStmt -> NatM InstrBlock
111 stmtToInstrs stmt = case stmt of
112 CmmNop -> return nilOL
113 CmmComment s -> return (unitOL (COMMENT s))
116 | isFloatType ty -> assignReg_FltCode size reg src
117 #if WORD_SIZE_IN_BITS==32
118 | isWord64 ty -> assignReg_I64Code reg src
120 | otherwise -> assignReg_IntCode size reg src
121 where ty = cmmRegType reg
122 size = cmmTypeSize ty
125 | isFloatType ty -> assignMem_FltCode size addr src
126 #if WORD_SIZE_IN_BITS==32
127 | isWord64 ty -> assignMem_I64Code addr src
129 | otherwise -> assignMem_IntCode size addr src
130 where ty = cmmExprType src
131 size = cmmTypeSize ty
133 CmmCall target result_regs args _ _
134 -> genCCall target result_regs args
136 CmmBranch id -> genBranch id
137 CmmCondBranch arg id -> genCondJump id arg
138 CmmSwitch arg ids -> genSwitch arg ids
139 CmmJump arg params -> genJump arg
141 panic "stmtToInstrs: return statement should have been cps'd away"
143 -- -----------------------------------------------------------------------------
144 -- General things for putting together code sequences
146 -- Expand CmmRegOff. ToDo: should we do it this way around, or convert
147 -- CmmExprs into CmmRegOff?
148 mangleIndexTree :: CmmExpr -> CmmExpr
149 mangleIndexTree (CmmRegOff reg off)
150 = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
151 where width = typeWidth (cmmRegType reg)
153 -- -----------------------------------------------------------------------------
154 -- Code gen for 64-bit arithmetic on 32-bit platforms
157 Simple support for generating 64-bit code (ie, 64 bit values and 64
158 bit assignments) on 32-bit platforms. Unlike the main code generator
159 we merely shoot for generating working code as simply as possible, and
160 pay little attention to code quality. Specifically, there is no
161 attempt to deal cleverly with the fixed-vs-floating register
162 distinction; all values are generated into (pairs of) floating
163 registers, even if this would mean some redundant reg-reg moves as a
164 result. Only one of the VRegUniques is returned, since it will be
165 of the VRegUniqueLo form, and the upper-half VReg can be determined
166 by applying getHiVRegFromLo to it.
169 data ChildCode64 -- a.k.a "Register64"
172 Reg -- the lower 32-bit temporary which contains the
173 -- result; use getHiVRegFromLo to find the other
174 -- VRegUnique. Rules of this simplified insn
175 -- selection game are therefore that the returned
176 -- Reg may be modified
178 #if WORD_SIZE_IN_BITS==32
179 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
180 assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
183 #ifndef x86_64_TARGET_ARCH
184 iselExpr64 :: CmmExpr -> NatM ChildCode64
187 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
191 assignMem_I64Code addrTree valueTree = do
192 Amode addr addr_code <- getAmode addrTree
193 ChildCode64 vcode rlo <- iselExpr64 valueTree
195 rhi = getHiVRegFromLo rlo
197 -- Little-endian store
198 mov_lo = MOV II32 (OpReg rlo) (OpAddr addr)
199 mov_hi = MOV II32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
201 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
204 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
205 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
207 r_dst_lo = mkVReg u_dst II32
208 r_dst_hi = getHiVRegFromLo r_dst_lo
209 r_src_hi = getHiVRegFromLo r_src_lo
210 mov_lo = MOV II32 (OpReg r_src_lo) (OpReg r_dst_lo)
211 mov_hi = MOV II32 (OpReg r_src_hi) (OpReg r_dst_hi)
214 vcode `snocOL` mov_lo `snocOL` mov_hi
217 assignReg_I64Code lvalue valueTree
218 = panic "assignReg_I64Code(i386): invalid lvalue"
222 iselExpr64 (CmmLit (CmmInt i _)) = do
223 (rlo,rhi) <- getNewRegPairNat II32
225 r = fromIntegral (fromIntegral i :: Word32)
226 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
228 MOV II32 (OpImm (ImmInteger r)) (OpReg rlo),
229 MOV II32 (OpImm (ImmInteger q)) (OpReg rhi)
232 return (ChildCode64 code rlo)
234 iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
235 Amode addr addr_code <- getAmode addrTree
236 (rlo,rhi) <- getNewRegPairNat II32
238 mov_lo = MOV II32 (OpAddr addr) (OpReg rlo)
239 mov_hi = MOV II32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
242 ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
246 iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
247 = return (ChildCode64 nilOL (mkVReg vu II32))
249 -- we handle addition, but rather badly
250 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
251 ChildCode64 code1 r1lo <- iselExpr64 e1
252 (rlo,rhi) <- getNewRegPairNat II32
254 r = fromIntegral (fromIntegral i :: Word32)
255 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
256 r1hi = getHiVRegFromLo r1lo
258 toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
259 ADD II32 (OpImm (ImmInteger r)) (OpReg rlo),
260 MOV II32 (OpReg r1hi) (OpReg rhi),
261 ADC II32 (OpImm (ImmInteger q)) (OpReg rhi) ]
263 return (ChildCode64 code rlo)
265 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
266 ChildCode64 code1 r1lo <- iselExpr64 e1
267 ChildCode64 code2 r2lo <- iselExpr64 e2
268 (rlo,rhi) <- getNewRegPairNat II32
270 r1hi = getHiVRegFromLo r1lo
271 r2hi = getHiVRegFromLo r2lo
274 toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
275 ADD II32 (OpReg r2lo) (OpReg rlo),
276 MOV II32 (OpReg r1hi) (OpReg rhi),
277 ADC II32 (OpReg r2hi) (OpReg rhi) ]
279 return (ChildCode64 code rlo)
281 iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do
283 r_dst_lo <- getNewRegNat II32
284 let r_dst_hi = getHiVRegFromLo r_dst_lo
287 ChildCode64 (code `snocOL`
288 MOV II32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
293 = pprPanic "iselExpr64(i386)" (ppr expr)
295 #endif /* i386_TARGET_ARCH */
297 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
299 #if sparc_TARGET_ARCH
301 assignMem_I64Code addrTree valueTree = do
302 Amode addr addr_code <- getAmode addrTree
303 ChildCode64 vcode rlo <- iselExpr64 valueTree
304 (src, code) <- getSomeReg addrTree
306 rhi = getHiVRegFromLo rlo
308 mov_hi = ST II32 rhi (AddrRegImm src (ImmInt 0))
309 mov_lo = ST II32 rlo (AddrRegImm src (ImmInt 4))
310 return (vcode `appOL` code `snocOL` mov_hi `snocOL` mov_lo)
312 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
313 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
315 r_dst_lo = mkVReg u_dst (cmmTypeSize pk)
316 r_dst_hi = getHiVRegFromLo r_dst_lo
317 r_src_hi = getHiVRegFromLo r_src_lo
318 mov_lo = mkMOV r_src_lo r_dst_lo
319 mov_hi = mkMOV r_src_hi r_dst_hi
320 mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
321 return (vcode `snocOL` mov_hi `snocOL` mov_lo)
322 assignReg_I64Code lvalue valueTree
323 = panic "assignReg_I64Code(sparc): invalid lvalue"
326 -- Don't delete this -- it's very handy for debugging.
328 -- | trace ("iselExpr64: " ++ showSDoc (ppr expr)) False
329 -- = panic "iselExpr64(???)"
331 iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
332 Amode (AddrRegReg r1 r2) addr_code <- getAmode addrTree
333 rlo <- getNewRegNat II32
334 let rhi = getHiVRegFromLo rlo
335 mov_hi = LD II32 (AddrRegImm r1 (ImmInt 0)) rhi
336 mov_lo = LD II32 (AddrRegImm r1 (ImmInt 4)) rlo
338 ChildCode64 (addr_code `snocOL` mov_hi `snocOL` mov_lo)
342 iselExpr64 (CmmReg (CmmLocal (LocalReg uq ty))) | isWord64 ty = do
343 r_dst_lo <- getNewRegNat II32
344 let r_dst_hi = getHiVRegFromLo r_dst_lo
345 r_src_lo = mkVReg uq II32
346 r_src_hi = getHiVRegFromLo r_src_lo
347 mov_lo = mkMOV r_src_lo r_dst_lo
348 mov_hi = mkMOV r_src_hi r_dst_hi
349 mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
351 ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo
355 = pprPanic "iselExpr64(sparc)" (ppr expr)
357 #endif /* sparc_TARGET_ARCH */
359 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
361 #if powerpc_TARGET_ARCH
363 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
364 getI64Amodes addrTree = do
365 Amode hi_addr addr_code <- getAmode addrTree
366 case addrOffset hi_addr 4 of
367 Just lo_addr -> return (hi_addr, lo_addr, addr_code)
368 Nothing -> do (hi_ptr, code) <- getSomeReg addrTree
369 return (AddrRegImm hi_ptr (ImmInt 0),
370 AddrRegImm hi_ptr (ImmInt 4),
373 assignMem_I64Code addrTree valueTree = do
374 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
375 ChildCode64 vcode rlo <- iselExpr64 valueTree
377 rhi = getHiVRegFromLo rlo
380 mov_hi = ST II32 rhi hi_addr
381 mov_lo = ST II32 rlo lo_addr
383 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
385 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
386 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
388 r_dst_lo = mkVReg u_dst II32
389 r_dst_hi = getHiVRegFromLo r_dst_lo
390 r_src_hi = getHiVRegFromLo r_src_lo
391 mov_lo = MR r_dst_lo r_src_lo
392 mov_hi = MR r_dst_hi r_src_hi
395 vcode `snocOL` mov_lo `snocOL` mov_hi
398 assignReg_I64Code lvalue valueTree
399 = panic "assignReg_I64Code(powerpc): invalid lvalue"
402 -- Don't delete this -- it's very handy for debugging.
404 -- | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
405 -- = panic "iselExpr64(???)"
407 iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
408 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
409 (rlo, rhi) <- getNewRegPairNat II32
410 let mov_hi = LD II32 rhi hi_addr
411 mov_lo = LD II32 rlo lo_addr
412 return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
415 iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
416 = return (ChildCode64 nilOL (mkVReg vu II32))
418 iselExpr64 (CmmLit (CmmInt i _)) = do
419 (rlo,rhi) <- getNewRegPairNat II32
421 half0 = fromIntegral (fromIntegral i :: Word16)
422 half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
423 half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
424 half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
427 LIS rlo (ImmInt half1),
428 OR rlo rlo (RIImm $ ImmInt half0),
429 LIS rhi (ImmInt half3),
430 OR rlo rlo (RIImm $ ImmInt half2)
433 return (ChildCode64 code rlo)
435 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
436 ChildCode64 code1 r1lo <- iselExpr64 e1
437 ChildCode64 code2 r2lo <- iselExpr64 e2
438 (rlo,rhi) <- getNewRegPairNat II32
440 r1hi = getHiVRegFromLo r1lo
441 r2hi = getHiVRegFromLo r2lo
444 toOL [ ADDC rlo r1lo r2lo,
447 return (ChildCode64 code rlo)
449 iselExpr64 (CmmMachOp (MO_UU_Conv W32 W64) [expr]) = do
450 (expr_reg,expr_code) <- getSomeReg expr
451 (rlo, rhi) <- getNewRegPairNat II32
452 let mov_hi = LI rhi (ImmInt 0)
453 mov_lo = MR rlo expr_reg
454 return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
457 = pprPanic "iselExpr64(powerpc)" (ppr expr)
459 #endif /* powerpc_TARGET_ARCH */
462 -- -----------------------------------------------------------------------------
463 -- The 'Register' type
465 -- 'Register's passed up the tree. If the stix code forces the register
466 -- to live in a pre-decided machine register, it comes out as @Fixed@;
467 -- otherwise, it comes out as @Any@, and the parent can decide which
468 -- register to put it in.
471 = Fixed Size Reg InstrBlock
472 | Any Size (Reg -> InstrBlock)
474 swizzleRegisterRep :: Register -> Size -> Register
475 -- Change the width; it's a no-op
476 swizzleRegisterRep (Fixed _ reg code) size = Fixed size reg code
477 swizzleRegisterRep (Any _ codefn) size = Any size codefn
480 -- -----------------------------------------------------------------------------
481 -- Utils based on getRegister, below
483 -- The dual to getAnyReg: compute an expression into a register, but
484 -- we don't mind which one it is.
485 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
487 r <- getRegister expr
490 tmp <- getNewRegNat rep
491 return (tmp, code tmp)
495 -- -----------------------------------------------------------------------------
496 -- Grab the Reg for a CmmReg
498 getRegisterReg :: CmmReg -> Reg
500 getRegisterReg (CmmLocal (LocalReg u pk))
501 = mkVReg u (cmmTypeSize pk)
503 getRegisterReg (CmmGlobal mid)
504 = case get_GlobalReg_reg_or_addr mid of
505 Left (RealReg rrno) -> RealReg rrno
506 _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
507 -- By this stage, the only MagicIds remaining should be the
508 -- ones which map to a real machine register on this
509 -- platform. Hence ...
512 -- -----------------------------------------------------------------------------
513 -- Generate code to get a subtree into a Register
515 -- Don't delete this -- it's very handy for debugging.
517 -- | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False
518 -- = panic "getRegister(???)"
520 getRegister :: CmmExpr -> NatM Register
522 #if !x86_64_TARGET_ARCH
523 -- on x86_64, we have %rip for PicBaseReg, but it's not a full-featured
524 -- register, it can only be used for rip-relative addressing.
525 getRegister (CmmReg (CmmGlobal PicBaseReg))
527 reg <- getPicBaseNat wordSize
528 return (Fixed wordSize reg nilOL)
531 getRegister (CmmReg reg)
532 = return (Fixed (cmmTypeSize (cmmRegType reg))
533 (getRegisterReg reg) nilOL)
535 getRegister tree@(CmmRegOff _ _)
536 = getRegister (mangleIndexTree tree)
539 #if WORD_SIZE_IN_BITS==32
540 -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
541 -- TO_W_(x), TO_W_(x >> 32)
543 getRegister (CmmMachOp (MO_UU_Conv W64 W32)
544 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
545 ChildCode64 code rlo <- iselExpr64 x
546 return $ Fixed II32 (getHiVRegFromLo rlo) code
548 getRegister (CmmMachOp (MO_SS_Conv W64 W32)
549 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
550 ChildCode64 code rlo <- iselExpr64 x
551 return $ Fixed II32 (getHiVRegFromLo rlo) code
553 getRegister (CmmMachOp (MO_UU_Conv W64 W32) [x]) = do
554 ChildCode64 code rlo <- iselExpr64 x
555 return $ Fixed II32 rlo code
557 getRegister (CmmMachOp (MO_SS_Conv W64 W32) [x]) = do
558 ChildCode64 code rlo <- iselExpr64 x
559 return $ Fixed II32 rlo code
563 -- end of machine-"independent" bit; here we go on the rest...
565 #if alpha_TARGET_ARCH
567 getRegister (StDouble d)
568 = getBlockIdNat `thenNat` \ lbl ->
569 getNewRegNat PtrRep `thenNat` \ tmp ->
570 let code dst = mkSeqInstrs [
571 LDATA RoDataSegment lbl [
572 DATA TF [ImmLab (rational d)]
574 LDA tmp (AddrImm (ImmCLbl lbl)),
575 LD TF dst (AddrReg tmp)]
577 return (Any FF64 code)
579 getRegister (StPrim primop [x]) -- unary PrimOps
581 IntNegOp -> trivialUCode (NEG Q False) x
583 NotOp -> trivialUCode NOT x
585 FloatNegOp -> trivialUFCode FloatRep (FNEG TF) x
586 DoubleNegOp -> trivialUFCode FF64 (FNEG TF) x
588 OrdOp -> coerceIntCode IntRep x
591 Float2IntOp -> coerceFP2Int x
592 Int2FloatOp -> coerceInt2FP pr x
593 Double2IntOp -> coerceFP2Int x
594 Int2DoubleOp -> coerceInt2FP pr x
596 Double2FloatOp -> coerceFltCode x
597 Float2DoubleOp -> coerceFltCode x
599 other_op -> getRegister (StCall fn CCallConv FF64 [x])
601 fn = case other_op of
602 FloatExpOp -> fsLit "exp"
603 FloatLogOp -> fsLit "log"
604 FloatSqrtOp -> fsLit "sqrt"
605 FloatSinOp -> fsLit "sin"
606 FloatCosOp -> fsLit "cos"
607 FloatTanOp -> fsLit "tan"
608 FloatAsinOp -> fsLit "asin"
609 FloatAcosOp -> fsLit "acos"
610 FloatAtanOp -> fsLit "atan"
611 FloatSinhOp -> fsLit "sinh"
612 FloatCoshOp -> fsLit "cosh"
613 FloatTanhOp -> fsLit "tanh"
614 DoubleExpOp -> fsLit "exp"
615 DoubleLogOp -> fsLit "log"
616 DoubleSqrtOp -> fsLit "sqrt"
617 DoubleSinOp -> fsLit "sin"
618 DoubleCosOp -> fsLit "cos"
619 DoubleTanOp -> fsLit "tan"
620 DoubleAsinOp -> fsLit "asin"
621 DoubleAcosOp -> fsLit "acos"
622 DoubleAtanOp -> fsLit "atan"
623 DoubleSinhOp -> fsLit "sinh"
624 DoubleCoshOp -> fsLit "cosh"
625 DoubleTanhOp -> fsLit "tanh"
627 pr = panic "MachCode.getRegister: no primrep needed for Alpha"
629 getRegister (StPrim primop [x, y]) -- dyadic PrimOps
631 CharGtOp -> trivialCode (CMP LTT) y x
632 CharGeOp -> trivialCode (CMP LE) y x
633 CharEqOp -> trivialCode (CMP EQQ) x y
634 CharNeOp -> int_NE_code x y
635 CharLtOp -> trivialCode (CMP LTT) x y
636 CharLeOp -> trivialCode (CMP LE) x y
638 IntGtOp -> trivialCode (CMP LTT) y x
639 IntGeOp -> trivialCode (CMP LE) y x
640 IntEqOp -> trivialCode (CMP EQQ) x y
641 IntNeOp -> int_NE_code x y
642 IntLtOp -> trivialCode (CMP LTT) x y
643 IntLeOp -> trivialCode (CMP LE) x y
645 WordGtOp -> trivialCode (CMP ULT) y x
646 WordGeOp -> trivialCode (CMP ULE) x y
647 WordEqOp -> trivialCode (CMP EQQ) x y
648 WordNeOp -> int_NE_code x y
649 WordLtOp -> trivialCode (CMP ULT) x y
650 WordLeOp -> trivialCode (CMP ULE) x y
652 AddrGtOp -> trivialCode (CMP ULT) y x
653 AddrGeOp -> trivialCode (CMP ULE) y x
654 AddrEqOp -> trivialCode (CMP EQQ) x y
655 AddrNeOp -> int_NE_code x y
656 AddrLtOp -> trivialCode (CMP ULT) x y
657 AddrLeOp -> trivialCode (CMP ULE) x y
659 FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
660 FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
661 FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
662 FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
663 FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
664 FloatLeOp -> cmpF_code (FCMP TF LE) NE x y
666 DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
667 DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
668 DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
669 DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
670 DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
671 DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y
673 IntAddOp -> trivialCode (ADD Q False) x y
674 IntSubOp -> trivialCode (SUB Q False) x y
675 IntMulOp -> trivialCode (MUL Q False) x y
676 IntQuotOp -> trivialCode (DIV Q False) x y
677 IntRemOp -> trivialCode (REM Q False) x y
679 WordAddOp -> trivialCode (ADD Q False) x y
680 WordSubOp -> trivialCode (SUB Q False) x y
681 WordMulOp -> trivialCode (MUL Q False) x y
682 WordQuotOp -> trivialCode (DIV Q True) x y
683 WordRemOp -> trivialCode (REM Q True) x y
685 FloatAddOp -> trivialFCode W32 (FADD TF) x y
686 FloatSubOp -> trivialFCode W32 (FSUB TF) x y
687 FloatMulOp -> trivialFCode W32 (FMUL TF) x y
688 FloatDivOp -> trivialFCode W32 (FDIV TF) x y
690 DoubleAddOp -> trivialFCode W64 (FADD TF) x y
691 DoubleSubOp -> trivialFCode W64 (FSUB TF) x y
692 DoubleMulOp -> trivialFCode W64 (FMUL TF) x y
693 DoubleDivOp -> trivialFCode W64 (FDIV TF) x y
695 AddrAddOp -> trivialCode (ADD Q False) x y
696 AddrSubOp -> trivialCode (SUB Q False) x y
697 AddrRemOp -> trivialCode (REM Q True) x y
699 AndOp -> trivialCode AND x y
700 OrOp -> trivialCode OR x y
701 XorOp -> trivialCode XOR x y
702 SllOp -> trivialCode SLL x y
703 SrlOp -> trivialCode SRL x y
705 ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
706 ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
707 ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"
709 FloatPowerOp -> getRegister (StCall (fsLit "pow") CCallConv FF64 [x,y])
710 DoublePowerOp -> getRegister (StCall (fsLit "pow") CCallConv FF64 [x,y])
712 {- ------------------------------------------------------------
713 Some bizarre special code for getting condition codes into
714 registers. Integer non-equality is a test for equality
715 followed by an XOR with 1. (Integer comparisons always set
716 the result register to 0 or 1.) Floating point comparisons of
717 any kind leave the result in a floating point register, so we
718 need to wrangle an integer register out of things.
720 int_NE_code :: StixTree -> StixTree -> NatM Register
723 = trivialCode (CMP EQQ) x y `thenNat` \ register ->
724 getNewRegNat IntRep `thenNat` \ tmp ->
726 code = registerCode register tmp
727 src = registerName register tmp
728 code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
730 return (Any IntRep code__2)
732 {- ------------------------------------------------------------
733 Comments for int_NE_code also apply to cmpF_code
736 :: (Reg -> Reg -> Reg -> Instr)
738 -> StixTree -> StixTree
741 cmpF_code instr cond x y
742 = trivialFCode pr instr x y `thenNat` \ register ->
743 getNewRegNat FF64 `thenNat` \ tmp ->
744 getBlockIdNat `thenNat` \ lbl ->
746 code = registerCode register tmp
747 result = registerName register tmp
749 code__2 dst = code . mkSeqInstrs [
750 OR zeroh (RIImm (ImmInt 1)) dst,
751 BF cond result (ImmCLbl lbl),
752 OR zeroh (RIReg zeroh) dst,
755 return (Any IntRep code__2)
757 pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
758 ------------------------------------------------------------
760 getRegister (CmmLoad pk mem)
761 = getAmode mem `thenNat` \ amode ->
763 code = amodeCode amode
764 src = amodeAddr amode
765 size = primRepToSize pk
766 code__2 dst = code . mkSeqInstr (LD size dst src)
768 return (Any pk code__2)
770 getRegister (StInt i)
773 code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
775 return (Any IntRep code)
778 code dst = mkSeqInstr (LDI Q dst src)
780 return (Any IntRep code)
782 src = ImmInt (fromInteger i)
787 code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
789 return (Any PtrRep code)
792 imm__2 = case imm of Just x -> x
794 #endif /* alpha_TARGET_ARCH */
796 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
800 getRegister (CmmLit (CmmFloat f W32)) = do
801 lbl <- getNewLabelNat
802 dflags <- getDynFlagsNat
803 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
804 Amode addr addr_code <- getAmode dynRef
808 CmmStaticLit (CmmFloat f W32)]
809 `consOL` (addr_code `snocOL`
812 return (Any FF32 code)
815 getRegister (CmmLit (CmmFloat d W64))
817 = let code dst = unitOL (GLDZ dst)
818 in return (Any FF64 code)
821 = let code dst = unitOL (GLD1 dst)
822 in return (Any FF64 code)
825 lbl <- getNewLabelNat
826 dflags <- getDynFlagsNat
827 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
828 Amode addr addr_code <- getAmode dynRef
832 CmmStaticLit (CmmFloat d W64)]
833 `consOL` (addr_code `snocOL`
836 return (Any FF64 code)
838 #endif /* i386_TARGET_ARCH */
840 #if x86_64_TARGET_ARCH
842 getRegister (CmmLit (CmmFloat 0.0 w)) = do
843 let size = floatSize w
844 code dst = unitOL (XOR size (OpReg dst) (OpReg dst))
845 -- I don't know why there are xorpd, xorps, and pxor instructions.
846 -- They all appear to do the same thing --SDM
847 return (Any size code)
849 getRegister (CmmLit (CmmFloat f w)) = do
850 lbl <- getNewLabelNat
851 let code dst = toOL [
854 CmmStaticLit (CmmFloat f w)],
855 MOV size (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
858 return (Any size code)
859 where size = floatSize w
861 #endif /* x86_64_TARGET_ARCH */
863 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
865 -- catch simple cases of zero- or sign-extended load
866 getRegister (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _]) = do
867 code <- intLoadCode (MOVZxL II8) addr
868 return (Any II32 code)
870 getRegister (CmmMachOp (MO_SS_Conv W8 W32) [CmmLoad addr _]) = do
871 code <- intLoadCode (MOVSxL II8) addr
872 return (Any II32 code)
874 getRegister (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad addr _]) = do
875 code <- intLoadCode (MOVZxL II16) addr
876 return (Any II32 code)
878 getRegister (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad addr _]) = do
879 code <- intLoadCode (MOVSxL II16) addr
880 return (Any II32 code)
884 #if x86_64_TARGET_ARCH
886 -- catch simple cases of zero- or sign-extended load
887 getRegister (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad addr _]) = do
888 code <- intLoadCode (MOVZxL II8) addr
889 return (Any II64 code)
891 getRegister (CmmMachOp (MO_SS_Conv W8 W64) [CmmLoad addr _]) = do
892 code <- intLoadCode (MOVSxL II8) addr
893 return (Any II64 code)
895 getRegister (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad addr _]) = do
896 code <- intLoadCode (MOVZxL II16) addr
897 return (Any II64 code)
899 getRegister (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad addr _]) = do
900 code <- intLoadCode (MOVSxL II16) addr
901 return (Any II64 code)
903 getRegister (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad addr _]) = do
904 code <- intLoadCode (MOV II32) addr -- 32-bit loads zero-extend
905 return (Any II64 code)
907 getRegister (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad addr _]) = do
908 code <- intLoadCode (MOVSxL II32) addr
909 return (Any II64 code)
913 #if x86_64_TARGET_ARCH
914 getRegister (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
915 CmmLit displacement])
916 = return $ Any II64 (\dst -> unitOL $
917 LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
920 #if x86_64_TARGET_ARCH
921 getRegister (CmmMachOp (MO_F_Neg W32) [x]) = do
922 x_code <- getAnyReg x
923 lbl <- getNewLabelNat
925 code dst = x_code dst `appOL` toOL [
926 -- This is how gcc does it, so it can't be that bad:
927 LDATA ReadOnlyData16 [
930 CmmStaticLit (CmmInt 0x80000000 W32),
931 CmmStaticLit (CmmInt 0 W32),
932 CmmStaticLit (CmmInt 0 W32),
933 CmmStaticLit (CmmInt 0 W32)
935 XOR FF32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
936 -- xorps, so we need the 128-bit constant
937 -- ToDo: rip-relative
940 return (Any FF32 code)
942 getRegister (CmmMachOp (MO_F_Neg W64) [x]) = do
943 x_code <- getAnyReg x
944 lbl <- getNewLabelNat
946 -- This is how gcc does it, so it can't be that bad:
947 code dst = x_code dst `appOL` toOL [
948 LDATA ReadOnlyData16 [
951 CmmStaticLit (CmmInt 0x8000000000000000 W64),
952 CmmStaticLit (CmmInt 0 W64)
954 -- gcc puts an unpck here. Wonder if we need it.
955 XOR FF64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
956 -- xorpd, so we need the 128-bit constant
959 return (Any FF64 code)
962 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
964 getRegister (CmmMachOp mop [x]) -- unary MachOps
967 MO_F_Neg W32 -> trivialUFCode FF32 (GNEG FF32) x
968 MO_F_Neg W64 -> trivialUFCode FF64 (GNEG FF64) x
971 MO_S_Neg w -> triv_ucode NEGI (intSize w)
972 MO_F_Neg w -> triv_ucode NEGI (floatSize w)
973 MO_Not w -> triv_ucode NOT (intSize w)
976 MO_UU_Conv W32 W8 -> toI8Reg W32 x
977 MO_SS_Conv W32 W8 -> toI8Reg W32 x
978 MO_UU_Conv W16 W8 -> toI8Reg W16 x
979 MO_SS_Conv W16 W8 -> toI8Reg W16 x
980 MO_UU_Conv W32 W16 -> toI16Reg W32 x
981 MO_SS_Conv W32 W16 -> toI16Reg W32 x
983 #if x86_64_TARGET_ARCH
984 MO_UU_Conv W64 W32 -> conversionNop II64 x
985 MO_SS_Conv W64 W32 -> conversionNop II64 x
986 MO_UU_Conv W64 W16 -> toI16Reg W64 x
987 MO_SS_Conv W64 W16 -> toI16Reg W64 x
988 MO_UU_Conv W64 W8 -> toI8Reg W64 x
989 MO_SS_Conv W64 W8 -> toI8Reg W64 x
992 MO_UU_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
993 MO_SS_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
996 MO_UU_Conv W8 W32 -> integerExtend W8 W32 MOVZxL x
997 MO_UU_Conv W16 W32 -> integerExtend W16 W32 MOVZxL x
998 MO_UU_Conv W8 W16 -> integerExtend W8 W16 MOVZxL x
1000 MO_SS_Conv W8 W32 -> integerExtend W8 W32 MOVSxL x
1001 MO_SS_Conv W16 W32 -> integerExtend W16 W32 MOVSxL x
1002 MO_SS_Conv W8 W16 -> integerExtend W8 W16 MOVSxL x
1004 #if x86_64_TARGET_ARCH
1005 MO_UU_Conv W8 W64 -> integerExtend W8 W64 MOVZxL x
1006 MO_UU_Conv W16 W64 -> integerExtend W16 W64 MOVZxL x
1007 MO_UU_Conv W32 W64 -> integerExtend W32 W64 MOVZxL x
1008 MO_SS_Conv W8 W64 -> integerExtend W8 W64 MOVSxL x
1009 MO_SS_Conv W16 W64 -> integerExtend W16 W64 MOVSxL x
1010 MO_SS_Conv W32 W64 -> integerExtend W32 W64 MOVSxL x
1011 -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
1012 -- However, we don't want the register allocator to throw it
1013 -- away as an unnecessary reg-to-reg move, so we keep it in
1014 -- the form of a movzl and print it as a movl later.
1017 #if i386_TARGET_ARCH
1018 MO_FF_Conv W32 W64 -> conversionNop FF64 x
1019 MO_FF_Conv W64 W32 -> conversionNop FF32 x
1021 MO_FF_Conv W32 W64 -> coerceFP2FP W64 x
1022 MO_FF_Conv W64 W32 -> coerceFP2FP W32 x
1025 MO_FS_Conv from to -> coerceFP2Int from to x
1026 MO_SF_Conv from to -> coerceInt2FP from to x
1028 other -> pprPanic "getRegister" (pprMachOp mop)
1030 triv_ucode :: (Size -> Operand -> Instr) -> Size -> NatM Register
1031 triv_ucode instr size = trivialUCode size (instr size) x
1033 -- signed or unsigned extension.
1034 integerExtend :: Width -> Width
1035 -> (Size -> Operand -> Operand -> Instr)
1036 -> CmmExpr -> NatM Register
1037 integerExtend from to instr expr = do
1038 (reg,e_code) <- if from == W8 then getByteReg expr
1039 else getSomeReg expr
1043 instr (intSize from) (OpReg reg) (OpReg dst)
1044 return (Any (intSize to) code)
1046 toI8Reg :: Width -> CmmExpr -> NatM Register
1047 toI8Reg new_rep expr
1048 = do codefn <- getAnyReg expr
1049 return (Any (intSize new_rep) codefn)
1050 -- HACK: use getAnyReg to get a byte-addressable register.
1051 -- If the source was a Fixed register, this will add the
1052 -- mov instruction to put it into the desired destination.
1053 -- We're assuming that the destination won't be a fixed
1054 -- non-byte-addressable register; it won't be, because all
1055 -- fixed registers are word-sized.
1057 toI16Reg = toI8Reg -- for now
1059 conversionNop :: Size -> CmmExpr -> NatM Register
1060 conversionNop new_size expr
1061 = do e_code <- getRegister expr
1062 return (swizzleRegisterRep e_code new_size)
1065 getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
1067 MO_F_Eq w -> condFltReg EQQ x y
1068 MO_F_Ne w -> condFltReg NE x y
1069 MO_F_Gt w -> condFltReg GTT x y
1070 MO_F_Ge w -> condFltReg GE x y
1071 MO_F_Lt w -> condFltReg LTT x y
1072 MO_F_Le w -> condFltReg LE x y
1074 MO_Eq rep -> condIntReg EQQ x y
1075 MO_Ne rep -> condIntReg NE x y
1077 MO_S_Gt rep -> condIntReg GTT x y
1078 MO_S_Ge rep -> condIntReg GE x y
1079 MO_S_Lt rep -> condIntReg LTT x y
1080 MO_S_Le rep -> condIntReg LE x y
1082 MO_U_Gt rep -> condIntReg GU x y
1083 MO_U_Ge rep -> condIntReg GEU x y
1084 MO_U_Lt rep -> condIntReg LU x y
1085 MO_U_Le rep -> condIntReg LEU x y
1087 #if i386_TARGET_ARCH
1088 MO_F_Add w -> trivialFCode w GADD x y
1089 MO_F_Sub w -> trivialFCode w GSUB x y
1090 MO_F_Quot w -> trivialFCode w GDIV x y
1091 MO_F_Mul w -> trivialFCode w GMUL x y
1094 #if x86_64_TARGET_ARCH
1095 MO_F_Add w -> trivialFCode w ADD x y
1096 MO_F_Sub w -> trivialFCode w SUB x y
1097 MO_F_Quot w -> trivialFCode w FDIV x y
1098 MO_F_Mul w -> trivialFCode w MUL x y
1101 MO_Add rep -> add_code rep x y
1102 MO_Sub rep -> sub_code rep x y
1104 MO_S_Quot rep -> div_code rep True True x y
1105 MO_S_Rem rep -> div_code rep True False x y
1106 MO_U_Quot rep -> div_code rep False True x y
1107 MO_U_Rem rep -> div_code rep False False x y
1109 MO_S_MulMayOflo rep -> imulMayOflo rep x y
1111 MO_Mul rep -> triv_op rep IMUL
1112 MO_And rep -> triv_op rep AND
1113 MO_Or rep -> triv_op rep OR
1114 MO_Xor rep -> triv_op rep XOR
1116 {- Shift ops on x86s have constraints on their source, it
1117 either has to be Imm, CL or 1
1118 => trivialCode is not restrictive enough (sigh.)
1120 MO_Shl rep -> shift_code rep SHL x y {-False-}
1121 MO_U_Shr rep -> shift_code rep SHR x y {-False-}
1122 MO_S_Shr rep -> shift_code rep SAR x y {-False-}
1124 other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
1126 --------------------
1127 triv_op width instr = trivialCode width op (Just op) x y
1128 where op = instr (intSize width)
1130 imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
1131 imulMayOflo rep a b = do
1132 (a_reg, a_code) <- getNonClobberedReg a
1133 b_code <- getAnyReg b
1135 shift_amt = case rep of
1138 _ -> panic "shift_amt"
1141 code = a_code `appOL` b_code eax `appOL`
1143 IMUL2 size (OpReg a_reg), -- result in %edx:%eax
1144 SAR size (OpImm (ImmInt shift_amt)) (OpReg eax),
1145 -- sign extend lower part
1146 SUB size (OpReg edx) (OpReg eax)
1147 -- compare against upper
1148 -- eax==0 if high part == sign extended low part
1151 return (Fixed size eax code)
1153 --------------------
1155 -> (Size -> Operand -> Operand -> Instr)
1160 {- Case1: shift length as immediate -}
1161 shift_code width instr x y@(CmmLit lit) = do
1162 x_code <- getAnyReg x
1164 size = intSize width
1166 = x_code dst `snocOL`
1167 instr size (OpImm (litToImm lit)) (OpReg dst)
1169 return (Any size code)
1171 {- Case2: shift length is complex (non-immediate)
1172 * y must go in %ecx.
1173 * we cannot do y first *and* put its result in %ecx, because
1174 %ecx might be clobbered by x.
1175 * if we do y second, then x cannot be
1176 in a clobbered reg. Also, we cannot clobber x's reg
1177 with the instruction itself.
1179 - do y first, put its result in a fresh tmp, then copy it to %ecx later
1180 - do y second and put its result into %ecx. x gets placed in a fresh
1181 tmp. This is likely to be better, becuase the reg alloc can
1182 eliminate this reg->reg move here (it won't eliminate the other one,
1183 because the move is into the fixed %ecx).
1185 shift_code width instr x y{-amount-} = do
1186 x_code <- getAnyReg x
1187 let size = intSize width
1188 tmp <- getNewRegNat size
1189 y_code <- getAnyReg y
1191 code = x_code tmp `appOL`
1193 instr size (OpReg ecx) (OpReg tmp)
1195 return (Fixed size tmp code)
1197 --------------------
1198 add_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
1199 add_code rep x (CmmLit (CmmInt y _))
1200 | is32BitInteger y = add_int rep x y
1201 add_code rep x y = trivialCode rep (ADD size) (Just (ADD size)) x y
1202 where size = intSize rep
1204 --------------------
1205 sub_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
1206 sub_code rep x (CmmLit (CmmInt y _))
1207 | is32BitInteger (-y) = add_int rep x (-y)
1208 sub_code rep x y = trivialCode rep (SUB (intSize rep)) Nothing x y
1210 -- our three-operand add instruction:
1211 add_int width x y = do
1212 (x_reg, x_code) <- getSomeReg x
1214 size = intSize width
1215 imm = ImmInt (fromInteger y)
1219 (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
1222 return (Any size code)
1224 ----------------------
1225 div_code width signed quotient x y = do
1226 (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
1227 x_code <- getAnyReg x
1229 size = intSize width
1230 widen | signed = CLTD size
1231 | otherwise = XOR size (OpReg edx) (OpReg edx)
1233 instr | signed = IDIV
1236 code = y_code `appOL`
1238 toOL [widen, instr size y_op]
1240 result | quotient = eax
1244 return (Fixed size result code)
1247 getRegister (CmmLoad mem pk)
1250 Amode src mem_code <- getAmode mem
1252 size = cmmTypeSize pk
1253 code dst = mem_code `snocOL`
1254 IF_ARCH_i386(GLD size src dst,
1255 MOV size (OpAddr src) (OpReg dst))
1256 return (Any size code)
1258 #if i386_TARGET_ARCH
1259 getRegister (CmmLoad mem pk)
1262 code <- intLoadCode instr mem
1263 return (Any size code)
1265 width = typeWidth pk
1266 size = intSize width
1267 instr = case width of
1270 -- We always zero-extend 8-bit loads, if we
1271 -- can't think of anything better. This is because
1272 -- we can't guarantee access to an 8-bit variant of every register
1273 -- (esi and edi don't have 8-bit variants), so to make things
1274 -- simpler we do our 8-bit arithmetic with full 32-bit registers.
1277 #if x86_64_TARGET_ARCH
1278 -- Simpler memory load code on x86_64
1279 getRegister (CmmLoad mem pk)
1281 code <- intLoadCode (MOV size) mem
1282 return (Any size code)
1283 where size = intSize $ typeWidth pk
1286 getRegister (CmmLit (CmmInt 0 width))
1288 size = intSize width
1290 -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
1291 adj_size = case size of II64 -> II32; _ -> size
1292 size1 = IF_ARCH_i386( size, adj_size )
1294 = unitOL (XOR size1 (OpReg dst) (OpReg dst))
1296 return (Any size code)
1298 #if x86_64_TARGET_ARCH
1299 -- optimisation for loading small literals on x86_64: take advantage
1300 -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
1301 -- instruction forms are shorter.
1302 getRegister (CmmLit lit)
1303 | isWord64 (cmmLitType lit), not (isBigLit lit)
1306 code dst = unitOL (MOV II32 (OpImm imm) (OpReg dst))
1308 return (Any II64 code)
1310 isBigLit (CmmInt i _) = i < 0 || i > 0xffffffff
1312 -- note1: not the same as (not.is32BitLit), because that checks for
1313 -- signed literals that fit in 32 bits, but we want unsigned
1315 -- note2: all labels are small, because we're assuming the
1316 -- small memory model (see gcc docs, -mcmodel=small).
1319 getRegister (CmmLit lit)
1321 size = cmmTypeSize (cmmLitType lit)
1323 code dst = unitOL (MOV size (OpImm imm) (OpReg dst))
1325 return (Any size code)
1327 getRegister other = pprPanic "getRegister(x86)" (ppr other)
1330 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
1331 -> NatM (Reg -> InstrBlock)
1332 intLoadCode instr mem = do
1333 Amode src mem_code <- getAmode mem
1334 return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
1336 -- Compute an expression into *any* register, adding the appropriate
1337 -- move instruction if necessary.
1338 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
1340 r <- getRegister expr
1343 anyReg :: Register -> NatM (Reg -> InstrBlock)
1344 anyReg (Any _ code) = return code
1345 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
1347 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
1348 -- Fixed registers might not be byte-addressable, so we make sure we've
1349 -- got a temporary, inserting an extra reg copy if necessary.
1350 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
1351 #if x86_64_TARGET_ARCH
1352 getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
1354 getByteReg expr = do
1355 r <- getRegister expr
1358 tmp <- getNewRegNat rep
1359 return (tmp, code tmp)
1361 | isVirtualReg reg -> return (reg,code)
1363 tmp <- getNewRegNat rep
1364 return (tmp, code `snocOL` reg2reg rep reg tmp)
1365 -- ToDo: could optimise slightly by checking for byte-addressable
1366 -- real registers, but that will happen very rarely if at all.
1369 -- Another variant: this time we want the result in a register that cannot
1370 -- be modified by code to evaluate an arbitrary expression.
1371 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1372 getNonClobberedReg expr = do
1373 r <- getRegister expr
1376 tmp <- getNewRegNat rep
1377 return (tmp, code tmp)
1379 -- only free regs can be clobbered
1380 | RealReg rr <- reg, isFastTrue (freeReg rr) -> do
1381 tmp <- getNewRegNat rep
1382 return (tmp, code `snocOL` reg2reg rep reg tmp)
1386 reg2reg :: Size -> Reg -> Reg -> Instr
1387 reg2reg size src dst
1388 #if i386_TARGET_ARCH
1389 | isFloatSize size = GMOV src dst
1391 | otherwise = MOV size (OpReg src) (OpReg dst)
1393 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1395 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1397 #if sparc_TARGET_ARCH
1399 -- getRegister :: CmmExpr -> NatM Register
1401 -- Load a literal float into a float register.
1402 -- The actual literal is stored in a new data area, and we load it
1404 getRegister (CmmLit (CmmFloat f W32)) = do
1406 -- a label for the new data area
1407 lbl <- getNewLabelNat
1408 tmp <- getNewRegNat II32
1410 let code dst = toOL [
1414 CmmStaticLit (CmmFloat f W32)],
1417 SETHI (HI (ImmCLbl lbl)) tmp,
1418 LD II32 (AddrRegImm tmp (LO (ImmCLbl lbl))) dst]
1420 return (Any FF32 code)
1422 getRegister (CmmLit (CmmFloat d W64)) = do
1423 lbl <- getNewLabelNat
1424 tmp <- getNewRegNat II32
1425 let code dst = toOL [
1428 CmmStaticLit (CmmFloat d W64)],
1429 SETHI (HI (ImmCLbl lbl)) tmp,
1430 LD II64 (AddrRegImm tmp (LO (ImmCLbl lbl))) dst]
1431 return (Any FF64 code)
1433 getRegister (CmmMachOp mop [x]) -- unary MachOps
1435 MO_F_Neg W32 -> trivialUFCode FF32 (FNEG FF32) x
1436 MO_F_Neg W64 -> trivialUFCode FF64 (FNEG FF64) x
1438 MO_S_Neg rep -> trivialUCode (intSize rep) (SUB False False g0) x
1439 MO_Not rep -> trivialUCode (intSize rep) (XNOR False g0) x
1441 MO_FF_Conv W64 W32-> coerceDbl2Flt x
1442 MO_FF_Conv W32 W64-> coerceFlt2Dbl x
1444 MO_FS_Conv from to -> coerceFP2Int from to x
1445 MO_SF_Conv from to -> coerceInt2FP from to x
1447 -- Conversions which are a nop on sparc
1449 | from == to -> conversionNop (intSize to) x
1450 MO_UU_Conv W32 W8 -> trivialCode W8 (AND False) x (CmmLit (CmmInt 255 W8))
1451 MO_UU_Conv W32 to -> conversionNop (intSize to) x
1452 MO_SS_Conv W32 to -> conversionNop (intSize to) x
1454 MO_UU_Conv W8 to@W32 -> conversionNop (intSize to) x
1455 MO_UU_Conv W16 to@W32 -> conversionNop (intSize to) x
1456 MO_UU_Conv W8 to@W16 -> conversionNop (intSize to) x
1459 MO_SS_Conv W8 W32 -> integerExtend W8 W32 x
1460 MO_SS_Conv W16 W32 -> integerExtend W16 W32 x
1461 MO_SS_Conv W8 W16 -> integerExtend W8 W16 x
1463 other_op -> panic ("Unknown unary mach op: " ++ show mop)
1466 -- | sign extend and widen
1468 :: Width -- ^ width of source expression
1469 -> Width -- ^ width of result
1470 -> CmmExpr -- ^ source expression
1473 integerExtend from to expr
1474 = do -- load the expr into some register
1475 (reg, e_code) <- getSomeReg expr
1476 tmp <- getNewRegNat II32
1478 = case (from, to) of
1485 -- local shift word left to load the sign bit
1486 `snocOL` SLL reg (RIImm (ImmInt bitCount)) tmp
1488 -- arithmetic shift right to sign extend
1489 `snocOL` SRA tmp (RIImm (ImmInt bitCount)) dst
1491 return (Any (intSize to) code)
1494 conversionNop new_rep expr
1495 = do e_code <- getRegister expr
1496 return (swizzleRegisterRep e_code new_rep)
1498 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1500 MO_Eq rep -> condIntReg EQQ x y
1501 MO_Ne rep -> condIntReg NE x y
1503 MO_S_Gt rep -> condIntReg GTT x y
1504 MO_S_Ge rep -> condIntReg GE x y
1505 MO_S_Lt rep -> condIntReg LTT x y
1506 MO_S_Le rep -> condIntReg LE x y
1508 MO_U_Gt W32 -> condIntReg GTT x y
1509 MO_U_Ge W32 -> condIntReg GE x y
1510 MO_U_Lt W32 -> condIntReg LTT x y
1511 MO_U_Le W32 -> condIntReg LE x y
1513 MO_U_Gt W16 -> condIntReg GU x y
1514 MO_U_Ge W16 -> condIntReg GEU x y
1515 MO_U_Lt W16 -> condIntReg LU x y
1516 MO_U_Le W16 -> condIntReg LEU x y
1518 MO_Add W32 -> trivialCode W32 (ADD False False) x y
1519 MO_Sub W32 -> trivialCode W32 (SUB False False) x y
1521 MO_S_MulMayOflo rep -> imulMayOflo rep x y
1523 MO_S_Quot W32 -> idiv True False x y
1524 MO_U_Quot W32 -> idiv False False x y
1526 MO_S_Rem W32 -> irem True x y
1527 MO_U_Rem W32 -> irem False x y
1529 MO_F_Eq w -> condFltReg EQQ x y
1530 MO_F_Ne w -> condFltReg NE x y
1532 MO_F_Gt w -> condFltReg GTT x y
1533 MO_F_Ge w -> condFltReg GE x y
1534 MO_F_Lt w -> condFltReg LTT x y
1535 MO_F_Le w -> condFltReg LE x y
1537 MO_F_Add w -> trivialFCode w FADD x y
1538 MO_F_Sub w -> trivialFCode w FSUB x y
1539 MO_F_Mul w -> trivialFCode w FMUL x y
1540 MO_F_Quot w -> trivialFCode w FDIV x y
1542 MO_And rep -> trivialCode rep (AND False) x y
1543 MO_Or rep -> trivialCode rep (OR False) x y
1544 MO_Xor rep -> trivialCode rep (XOR False) x y
1546 MO_Mul rep -> trivialCode rep (SMUL False) x y
1548 MO_Shl rep -> trivialCode rep SLL x y
1549 MO_U_Shr rep -> trivialCode rep SRL x y
1550 MO_S_Shr rep -> trivialCode rep SRA x y
1553 MO_F32_Pwr -> getRegister (StCall (Left (fsLit "pow")) CCallConv FF64
1554 [promote x, promote y])
1555 where promote x = CmmMachOp MO_F32_to_Dbl [x]
1556 MO_F64_Pwr -> getRegister (StCall (Left (fsLit "pow")) CCallConv FF64
1559 other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
1561 -- idiv fn x y = getRegister (StCall (Left fn) CCallConv II32 [x, y])
1564 -- | Generate an integer division instruction.
1565 idiv :: Bool -> Bool -> CmmExpr -> CmmExpr -> NatM Register
1567 -- For unsigned division with a 32 bit numerator,
1568 -- we can just clear the Y register.
1569 idiv False cc x y = do
1570 (a_reg, a_code) <- getSomeReg x
1571 (b_reg, b_code) <- getSomeReg y
1578 , UDIV cc a_reg (RIReg b_reg) dst]
1580 return (Any II32 code)
1583 -- For _signed_ division with a 32 bit numerator,
1584 -- we have to sign extend the numerator into the Y register.
1585 idiv True cc x y = do
1586 (a_reg, a_code) <- getSomeReg x
1587 (b_reg, b_code) <- getSomeReg y
1589 tmp <- getNewRegNat II32
1595 [ SRA a_reg (RIImm (ImmInt 16)) tmp -- sign extend
1596 , SRA tmp (RIImm (ImmInt 16)) tmp
1599 , SDIV cc a_reg (RIReg b_reg) dst]
1601 return (Any II32 code)
1604 -- | Do an integer remainder.
1606 -- NOTE: The SPARC v8 architecture manual says that integer division
1607 -- instructions _may_ generate a remainder, depending on the implementation.
1608 -- If so it is _recommended_ that the remainder is placed in the Y register.
1610 -- The UltraSparc 2007 manual says Y is _undefined_ after division.
1612 -- The SPARC T2 doesn't store the remainder, not sure about the others.
1613 -- It's probably best not to worry about it, and just generate our own
1616 irem :: Bool -> CmmExpr -> CmmExpr -> NatM Register
1618 -- For unsigned operands:
1619 -- Division is between a 64 bit numerator and a 32 bit denominator,
1620 -- so we still have to clear the Y register.
1622 (a_reg, a_code) <- getSomeReg x
1623 (b_reg, b_code) <- getSomeReg y
1625 tmp_reg <- getNewRegNat II32
1632 , UDIV False a_reg (RIReg b_reg) tmp_reg
1633 , UMUL False tmp_reg (RIReg b_reg) tmp_reg
1634 , SUB False False a_reg (RIReg tmp_reg) dst]
1636 return (Any II32 code)
1639 -- For signed operands:
1640 -- Make sure to sign extend into the Y register, or the remainder
1641 -- will have the wrong sign when the numerator is negative.
1643 -- TODO: When sign extending, GCC only shifts the a_reg right by 17 bits,
1644 -- not the full 32. Not sure why this is, something to do with overflow?
1645 -- If anyone cares enough about the speed of signed remainder they
1646 -- can work it out themselves (then tell me). -- BL 2009/01/20
1649 (a_reg, a_code) <- getSomeReg x
1650 (b_reg, b_code) <- getSomeReg y
1652 tmp1_reg <- getNewRegNat II32
1653 tmp2_reg <- getNewRegNat II32
1659 [ SRA a_reg (RIImm (ImmInt 16)) tmp1_reg -- sign extend
1660 , SRA tmp1_reg (RIImm (ImmInt 16)) tmp1_reg -- sign extend
1663 , SDIV False a_reg (RIReg b_reg) tmp2_reg
1664 , SMUL False tmp2_reg (RIReg b_reg) tmp2_reg
1665 , SUB False False a_reg (RIReg tmp2_reg) dst]
1667 return (Any II32 code)
1670 imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
1671 imulMayOflo rep a b = do
1672 (a_reg, a_code) <- getSomeReg a
1673 (b_reg, b_code) <- getSomeReg b
1674 res_lo <- getNewRegNat II32
1675 res_hi <- getNewRegNat II32
1677 shift_amt = case rep of
1680 _ -> panic "shift_amt"
1681 code dst = a_code `appOL` b_code `appOL`
1683 SMUL False a_reg (RIReg b_reg) res_lo,
1685 SRA res_lo (RIImm (ImmInt shift_amt)) res_lo,
1686 SUB False False res_lo (RIReg res_hi) dst
1688 return (Any II32 code)
1690 getRegister (CmmLoad mem pk) = do
1691 Amode src code <- getAmode mem
1693 code__2 dst = code `snocOL` LD (cmmTypeSize pk) src dst
1694 return (Any (cmmTypeSize pk) code__2)
1696 getRegister (CmmLit (CmmInt i _))
1699 src = ImmInt (fromInteger i)
1700 code dst = unitOL (OR False g0 (RIImm src) dst)
1702 return (Any II32 code)
1704 getRegister (CmmLit lit)
1705 = let rep = cmmLitType lit
1709 OR False dst (RIImm (LO imm)) dst]
1710 in return (Any II32 code)
1712 #endif /* sparc_TARGET_ARCH */
1714 #if powerpc_TARGET_ARCH
1715 getRegister (CmmLoad mem pk)
1718 Amode addr addr_code <- getAmode mem
1719 let code dst = ASSERT((regClass dst == RcDouble) == isFloatType pk)
1720 addr_code `snocOL` LD size dst addr
1721 return (Any size code)
1722 where size = cmmTypeSize pk
1724 -- catch simple cases of zero- or sign-extended load
1725 getRegister (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad mem _]) = do
1726 Amode addr addr_code <- getAmode mem
1727 return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr))
1729 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
1731 getRegister (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad mem _]) = do
1732 Amode addr addr_code <- getAmode mem
1733 return (Any II32 (\dst -> addr_code `snocOL` LD II16 dst addr))
1735 getRegister (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad mem _]) = do
1736 Amode addr addr_code <- getAmode mem
1737 return (Any II32 (\dst -> addr_code `snocOL` LA II16 dst addr))
1739 getRegister (CmmMachOp mop [x]) -- unary MachOps
1741 MO_Not rep -> triv_ucode_int rep NOT
1743 MO_F_Neg w -> triv_ucode_float w FNEG
1744 MO_S_Neg w -> triv_ucode_int w NEG
1746 MO_FF_Conv W64 W32 -> trivialUCode FF32 FRSP x
1747 MO_FF_Conv W32 W64 -> conversionNop FF64 x
1749 MO_FS_Conv from to -> coerceFP2Int from to x
1750 MO_SF_Conv from to -> coerceInt2FP from to x
1753 | from == to -> conversionNop (intSize to) x
1755 -- narrowing is a nop: we treat the high bits as undefined
1756 MO_SS_Conv W32 to -> conversionNop (intSize to) x
1757 MO_SS_Conv W16 W8 -> conversionNop II8 x
1758 MO_SS_Conv W8 to -> triv_ucode_int to (EXTS II8)
1759 MO_SS_Conv W16 to -> triv_ucode_int to (EXTS II16)
1762 | from == to -> conversionNop (intSize to) x
1763 -- narrowing is a nop: we treat the high bits as undefined
1764 MO_UU_Conv W32 to -> conversionNop (intSize to) x
1765 MO_UU_Conv W16 W8 -> conversionNop II8 x
1766 MO_UU_Conv W8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 W32))
1767 MO_UU_Conv W16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 W32))
1770 triv_ucode_int width instr = trivialUCode (intSize width) instr x
1771 triv_ucode_float width instr = trivialUCode (floatSize width) instr x
1773 conversionNop new_size expr
1774 = do e_code <- getRegister expr
1775 return (swizzleRegisterRep e_code new_size)
1777 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1779 MO_F_Eq w -> condFltReg EQQ x y
1780 MO_F_Ne w -> condFltReg NE x y
1781 MO_F_Gt w -> condFltReg GTT x y
1782 MO_F_Ge w -> condFltReg GE x y
1783 MO_F_Lt w -> condFltReg LTT x y
1784 MO_F_Le w -> condFltReg LE x y
1786 MO_Eq rep -> condIntReg EQQ (extendUExpr rep x) (extendUExpr rep y)
1787 MO_Ne rep -> condIntReg NE (extendUExpr rep x) (extendUExpr rep y)
1789 MO_S_Gt rep -> condIntReg GTT (extendSExpr rep x) (extendSExpr rep y)
1790 MO_S_Ge rep -> condIntReg GE (extendSExpr rep x) (extendSExpr rep y)
1791 MO_S_Lt rep -> condIntReg LTT (extendSExpr rep x) (extendSExpr rep y)
1792 MO_S_Le rep -> condIntReg LE (extendSExpr rep x) (extendSExpr rep y)
1794 MO_U_Gt rep -> condIntReg GU (extendUExpr rep x) (extendUExpr rep y)
1795 MO_U_Ge rep -> condIntReg GEU (extendUExpr rep x) (extendUExpr rep y)
1796 MO_U_Lt rep -> condIntReg LU (extendUExpr rep x) (extendUExpr rep y)
1797 MO_U_Le rep -> condIntReg LEU (extendUExpr rep x) (extendUExpr rep y)
1799 MO_F_Add w -> triv_float w FADD
1800 MO_F_Sub w -> triv_float w FSUB
1801 MO_F_Mul w -> triv_float w FMUL
1802 MO_F_Quot w -> triv_float w FDIV
1804 -- optimize addition with 32-bit immediate
1808 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate W32 True (-imm)
1809 -> trivialCode W32 True ADD x (CmmLit $ CmmInt imm immrep)
1812 (src, srcCode) <- getSomeReg x
1813 let imm = litToImm lit
1814 code dst = srcCode `appOL` toOL [
1815 ADDIS dst src (HA imm),
1816 ADD dst dst (RIImm (LO imm))
1818 return (Any II32 code)
1819 _ -> trivialCode W32 True ADD x y
1821 MO_Add rep -> trivialCode rep True ADD x y
1823 case y of -- subfi ('substract from' with immediate) doesn't exist
1824 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
1825 -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
1826 _ -> trivialCodeNoImm' (intSize rep) SUBF y x
1828 MO_Mul rep -> trivialCode rep True MULLW x y
1830 MO_S_MulMayOflo W32 -> trivialCodeNoImm' II32 MULLW_MayOflo x y
1832 MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= II32): not implemented"
1833 MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
1835 MO_S_Quot rep -> trivialCodeNoImm' (intSize rep) DIVW (extendSExpr rep x) (extendSExpr rep y)
1836 MO_U_Quot rep -> trivialCodeNoImm' (intSize rep) DIVWU (extendUExpr rep x) (extendUExpr rep y)
1838 MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1839 MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1841 MO_And rep -> trivialCode rep False AND x y
1842 MO_Or rep -> trivialCode rep False OR x y
1843 MO_Xor rep -> trivialCode rep False XOR x y
1845 MO_Shl rep -> trivialCode rep False SLW x y
1846 MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
1847 MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
1849 triv_float :: Width -> (Size -> Reg -> Reg -> Reg -> Instr) -> NatM Register
1850 triv_float width instr = trivialCodeNoImm (floatSize width) instr x y
1852 getRegister (CmmLit (CmmInt i rep))
1853 | Just imm <- makeImmediate rep True i
1855 code dst = unitOL (LI dst imm)
1857 return (Any (intSize rep) code)
1859 getRegister (CmmLit (CmmFloat f frep)) = do
1860 lbl <- getNewLabelNat
1861 dflags <- getDynFlagsNat
1862 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
1863 Amode addr addr_code <- getAmode dynRef
1864 let size = floatSize frep
1866 LDATA ReadOnlyData [CmmDataLabel lbl,
1867 CmmStaticLit (CmmFloat f frep)]
1868 `consOL` (addr_code `snocOL` LD size dst addr)
1869 return (Any size code)
1871 getRegister (CmmLit lit)
1872 = let rep = cmmLitType lit
1876 ADD dst dst (RIImm (LO imm))
1878 in return (Any (cmmTypeSize rep) code)
1880 getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
1882 -- extend?Rep: wrap integer expression of type rep
1883 -- in a conversion to II32
1884 extendSExpr W32 x = x
1885 extendSExpr rep x = CmmMachOp (MO_SS_Conv rep W32) [x]
1886 extendUExpr W32 x = x
1887 extendUExpr rep x = CmmMachOp (MO_UU_Conv rep W32) [x]
1889 #endif /* powerpc_TARGET_ARCH */
1892 -- -----------------------------------------------------------------------------
1893 -- The 'Amode' type: Memory addressing modes passed up the tree.
1895 data Amode = Amode AddrMode InstrBlock
1898 Now, given a tree (the argument to an CmmLoad) that references memory,
1899 produce a suitable addressing mode.
1901 A Rule of the Game (tm) for Amodes: use of the addr bit must
1902 immediately follow use of the code part, since the code part puts
1903 values in registers which the addr then refers to. So you can't put
1904 anything in between, lest it overwrite some of those registers. If
1905 you need to do some other computation between the code part and use of
1906 the addr bit, first store the effective address from the amode in a
1907 temporary, then do the other computation, and then use the temporary:
1911 ... other computation ...
1915 getAmode :: CmmExpr -> NatM Amode
1916 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
1918 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1920 #if alpha_TARGET_ARCH
1922 getAmode (StPrim IntSubOp [x, StInt i])
1923 = getNewRegNat PtrRep `thenNat` \ tmp ->
1924 getRegister x `thenNat` \ register ->
1926 code = registerCode register tmp
1927 reg = registerName register tmp
1928 off = ImmInt (-(fromInteger i))
1930 return (Amode (AddrRegImm reg off) code)
1932 getAmode (StPrim IntAddOp [x, StInt i])
1933 = getNewRegNat PtrRep `thenNat` \ tmp ->
1934 getRegister x `thenNat` \ register ->
1936 code = registerCode register tmp
1937 reg = registerName register tmp
1938 off = ImmInt (fromInteger i)
1940 return (Amode (AddrRegImm reg off) code)
1944 = return (Amode (AddrImm imm__2) id)
1947 imm__2 = case imm of Just x -> x
1950 = getNewRegNat PtrRep `thenNat` \ tmp ->
1951 getRegister other `thenNat` \ register ->
1953 code = registerCode register tmp
1954 reg = registerName register tmp
1956 return (Amode (AddrReg reg) code)
1958 #endif /* alpha_TARGET_ARCH */
1960 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1962 #if x86_64_TARGET_ARCH
1964 getAmode (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
1965 CmmLit displacement])
1966 = return $ Amode (ripRel (litToImm displacement)) nilOL
1970 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1972 -- This is all just ridiculous, since it carefully undoes
1973 -- what mangleIndexTree has just done.
1974 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
1976 -- ASSERT(rep == II32)???
1977 = do (x_reg, x_code) <- getSomeReg x
1978 let off = ImmInt (-(fromInteger i))
1979 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1981 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
1983 -- ASSERT(rep == II32)???
1984 = do (x_reg, x_code) <- getSomeReg x
1985 let off = ImmInt (fromInteger i)
1986 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1988 -- Turn (lit1 << n + lit2) into (lit2 + lit1 << n) so it will be
1989 -- recognised by the next rule.
1990 getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1992 = getAmode (CmmMachOp (MO_Add rep) [b,a])
1994 getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
1995 [y, CmmLit (CmmInt shift _)]])
1996 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1997 = x86_complex_amode x y shift 0
1999 getAmode (CmmMachOp (MO_Add rep)
2000 [x, CmmMachOp (MO_Add _)
2001 [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)],
2002 CmmLit (CmmInt offset _)]])
2003 | shift == 0 || shift == 1 || shift == 2 || shift == 3
2004 && is32BitInteger offset
2005 = x86_complex_amode x y shift offset
2007 getAmode (CmmMachOp (MO_Add rep) [x,y])
2008 = x86_complex_amode x y 0 0
2010 getAmode (CmmLit lit) | is32BitLit lit
2011 = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
2014 (reg,code) <- getSomeReg expr
2015 return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
2018 x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
2019 x86_complex_amode base index shift offset
2020 = do (x_reg, x_code) <- getNonClobberedReg base
2021 -- x must be in a temp, because it has to stay live over y_code
2022 -- we could compre x_reg and y_reg and do something better here...
2023 (y_reg, y_code) <- getSomeReg index
2025 code = x_code `appOL` y_code
2026 base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
2027 return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset)))
2030 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
2032 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2034 #if sparc_TARGET_ARCH
2036 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)])
2039 (reg, code) <- getSomeReg x
2041 off = ImmInt (-(fromInteger i))
2042 return (Amode (AddrRegImm reg off) code)
2045 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)])
2048 (reg, code) <- getSomeReg x
2050 off = ImmInt (fromInteger i)
2051 return (Amode (AddrRegImm reg off) code)
2053 getAmode (CmmMachOp (MO_Add rep) [x, y])
2055 (regX, codeX) <- getSomeReg x
2056 (regY, codeY) <- getSomeReg y
2058 code = codeX `appOL` codeY
2059 return (Amode (AddrRegReg regX regY) code)
2061 -- XXX Is this same as "leaf" in Stix?
2062 getAmode (CmmLit lit)
2064 tmp <- getNewRegNat II32
2066 code = unitOL (SETHI (HI imm__2) tmp)
2067 return (Amode (AddrRegImm tmp (LO imm__2)) code)
2069 imm__2 = litToImm lit
2073 (reg, code) <- getSomeReg other
2076 return (Amode (AddrRegImm reg off) code)
2078 #endif /* sparc_TARGET_ARCH */
2080 #ifdef powerpc_TARGET_ARCH
2081 getAmode (CmmMachOp (MO_Sub W32) [x, CmmLit (CmmInt i _)])
2082 | Just off <- makeImmediate W32 True (-i)
2084 (reg, code) <- getSomeReg x
2085 return (Amode (AddrRegImm reg off) code)
2088 getAmode (CmmMachOp (MO_Add W32) [x, CmmLit (CmmInt i _)])
2089 | Just off <- makeImmediate W32 True i
2091 (reg, code) <- getSomeReg x
2092 return (Amode (AddrRegImm reg off) code)
2094 -- optimize addition with 32-bit immediate
2096 getAmode (CmmMachOp (MO_Add W32) [x, CmmLit lit])
2098 tmp <- getNewRegNat II32
2099 (src, srcCode) <- getSomeReg x
2100 let imm = litToImm lit
2101 code = srcCode `snocOL` ADDIS tmp src (HA imm)
2102 return (Amode (AddrRegImm tmp (LO imm)) code)
2104 getAmode (CmmLit lit)
2106 tmp <- getNewRegNat II32
2107 let imm = litToImm lit
2108 code = unitOL (LIS tmp (HA imm))
2109 return (Amode (AddrRegImm tmp (LO imm)) code)
2111 getAmode (CmmMachOp (MO_Add W32) [x, y])
2113 (regX, codeX) <- getSomeReg x
2114 (regY, codeY) <- getSomeReg y
2115 return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
2119 (reg, code) <- getSomeReg other
2122 return (Amode (AddrRegImm reg off) code)
2123 #endif /* powerpc_TARGET_ARCH */
2125 -- -----------------------------------------------------------------------------
2126 -- getOperand: sometimes any operand will do.
2128 -- getNonClobberedOperand: the value of the operand will remain valid across
2129 -- the computation of an arbitrary expression, unless the expression
2130 -- is computed directly into a register which the operand refers to
2131 -- (see trivialCode where this function is used for an example).
2133 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2135 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2136 #if x86_64_TARGET_ARCH
2137 getNonClobberedOperand (CmmLit lit)
2138 | isSuitableFloatingPointLit lit = do
2139 lbl <- getNewLabelNat
2140 let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
2142 return (OpAddr (ripRel (ImmCLbl lbl)), code)
2144 getNonClobberedOperand (CmmLit lit)
2145 | is32BitLit lit && not (isFloatType (cmmLitType lit)) =
2146 return (OpImm (litToImm lit), nilOL)
2147 getNonClobberedOperand (CmmLoad mem pk)
2148 | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
2149 Amode src mem_code <- getAmode mem
2151 if (amodeCouldBeClobbered src)
2153 tmp <- getNewRegNat wordSize
2154 return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
2155 unitOL (LEA II32 (OpAddr src) (OpReg tmp)))
2158 return (OpAddr src', save_code `appOL` mem_code)
2159 getNonClobberedOperand e = do
2160 (reg, code) <- getNonClobberedReg e
2161 return (OpReg reg, code)
2163 amodeCouldBeClobbered :: AddrMode -> Bool
2164 amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
2166 regClobbered (RealReg rr) = isFastTrue (freeReg rr)
2167 regClobbered _ = False
2169 -- getOperand: the operand is not required to remain valid across the
2170 -- computation of an arbitrary expression.
2171 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2172 #if x86_64_TARGET_ARCH
2173 getOperand (CmmLit lit)
2174 | isSuitableFloatingPointLit lit = do
2175 lbl <- getNewLabelNat
2176 let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
2178 return (OpAddr (ripRel (ImmCLbl lbl)), code)
2180 getOperand (CmmLit lit)
2181 | is32BitLit lit && not (isFloatType (cmmLitType lit)) = do
2182 return (OpImm (litToImm lit), nilOL)
2183 getOperand (CmmLoad mem pk)
2184 | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
2185 Amode src mem_code <- getAmode mem
2186 return (OpAddr src, mem_code)
2188 (reg, code) <- getSomeReg e
2189 return (OpReg reg, code)
2191 isOperand :: CmmExpr -> Bool
2192 isOperand (CmmLoad _ _) = True
2193 isOperand (CmmLit lit) = is32BitLit lit
2194 || isSuitableFloatingPointLit lit
2197 -- if we want a floating-point literal as an operand, we can
2198 -- use it directly from memory. However, if the literal is
2199 -- zero, we're better off generating it into a register using
2201 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
2202 isSuitableFloatingPointLit _ = False
2204 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
2205 getRegOrMem (CmmLoad mem pk)
2206 | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
2207 Amode src mem_code <- getAmode mem
2208 return (OpAddr src, mem_code)
2210 (reg, code) <- getNonClobberedReg e
2211 return (OpReg reg, code)
2213 #if x86_64_TARGET_ARCH
2214 is32BitLit (CmmInt i W64) = is32BitInteger i
2215 -- assume that labels are in the range 0-2^31-1: this assumes the
2216 -- small memory model (see gcc docs, -mcmodel=small).
2221 is32BitInteger :: Integer -> Bool
2222 is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000
2223 where i64 = fromIntegral i :: Int64
2224 -- a CmmInt is intended to be truncated to the appropriate
2225 -- number of bits, so here we truncate it to Int64. This is
2226 -- important because e.g. -1 as a CmmInt might be either
2227 -- -1 or 18446744073709551615.
2229 -- -----------------------------------------------------------------------------
2230 -- The 'CondCode' type: Condition codes passed up the tree.
2232 data CondCode = CondCode Bool Cond InstrBlock
2234 -- Set up a condition code for a conditional branch.
2236 getCondCode :: CmmExpr -> NatM CondCode
2238 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2240 #if alpha_TARGET_ARCH
2241 getCondCode = panic "MachCode.getCondCode: not on Alphas"
2242 #endif /* alpha_TARGET_ARCH */
2244 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2246 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
2247 -- yes, they really do seem to want exactly the same!
2249 getCondCode (CmmMachOp mop [x, y])
2252 MO_F_Eq W32 -> condFltCode EQQ x y
2253 MO_F_Ne W32 -> condFltCode NE x y
2254 MO_F_Gt W32 -> condFltCode GTT x y
2255 MO_F_Ge W32 -> condFltCode GE x y
2256 MO_F_Lt W32 -> condFltCode LTT x y
2257 MO_F_Le W32 -> condFltCode LE x y
2259 MO_F_Eq W64 -> condFltCode EQQ x y
2260 MO_F_Ne W64 -> condFltCode NE x y
2261 MO_F_Gt W64 -> condFltCode GTT x y
2262 MO_F_Ge W64 -> condFltCode GE x y
2263 MO_F_Lt W64 -> condFltCode LTT x y
2264 MO_F_Le W64 -> condFltCode LE x y
2266 MO_Eq rep -> condIntCode EQQ x y
2267 MO_Ne rep -> condIntCode NE x y
2269 MO_S_Gt rep -> condIntCode GTT x y
2270 MO_S_Ge rep -> condIntCode GE x y
2271 MO_S_Lt rep -> condIntCode LTT x y
2272 MO_S_Le rep -> condIntCode LE x y
2274 MO_U_Gt rep -> condIntCode GU x y
2275 MO_U_Ge rep -> condIntCode GEU x y
2276 MO_U_Lt rep -> condIntCode LU x y
2277 MO_U_Le rep -> condIntCode LEU x y
2279 other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y]))
2281 getCondCode other = pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
2283 #elif powerpc_TARGET_ARCH
2285 -- almost the same as everywhere else - but we need to
2286 -- extend small integers to 32 bit first
2288 getCondCode (CmmMachOp mop [x, y])
2290 MO_F_Eq W32 -> condFltCode EQQ x y
2291 MO_F_Ne W32 -> condFltCode NE x y
2292 MO_F_Gt W32 -> condFltCode GTT x y
2293 MO_F_Ge W32 -> condFltCode GE x y
2294 MO_F_Lt W32 -> condFltCode LTT x y
2295 MO_F_Le W32 -> condFltCode LE x y
2297 MO_F_Eq W64 -> condFltCode EQQ x y
2298 MO_F_Ne W64 -> condFltCode NE x y
2299 MO_F_Gt W64 -> condFltCode GTT x y
2300 MO_F_Ge W64 -> condFltCode GE x y
2301 MO_F_Lt W64 -> condFltCode LTT x y
2302 MO_F_Le W64 -> condFltCode LE x y
2304 MO_Eq rep -> condIntCode EQQ (extendUExpr rep x) (extendUExpr rep y)
2305 MO_Ne rep -> condIntCode NE (extendUExpr rep x) (extendUExpr rep y)
2307 MO_S_Gt rep -> condIntCode GTT (extendSExpr rep x) (extendSExpr rep y)
2308 MO_S_Ge rep -> condIntCode GE (extendSExpr rep x) (extendSExpr rep y)
2309 MO_S_Lt rep -> condIntCode LTT (extendSExpr rep x) (extendSExpr rep y)
2310 MO_S_Le rep -> condIntCode LE (extendSExpr rep x) (extendSExpr rep y)
2312 MO_U_Gt rep -> condIntCode GU (extendUExpr rep x) (extendUExpr rep y)
2313 MO_U_Ge rep -> condIntCode GEU (extendUExpr rep x) (extendUExpr rep y)
2314 MO_U_Lt rep -> condIntCode LU (extendUExpr rep x) (extendUExpr rep y)
2315 MO_U_Le rep -> condIntCode LEU (extendUExpr rep x) (extendUExpr rep y)
2317 other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
2319 getCondCode other = panic "getCondCode(2)(powerpc)"
2325 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
2326 -- passed back up the tree.
2328 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
2330 #if alpha_TARGET_ARCH
2331 condIntCode = panic "MachCode.condIntCode: not on Alphas"
2332 condFltCode = panic "MachCode.condFltCode: not on Alphas"
2333 #endif /* alpha_TARGET_ARCH */
2335 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2336 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2338 -- memory vs immediate
2339 condIntCode cond (CmmLoad x pk) (CmmLit lit) | is32BitLit lit = do
2340 Amode x_addr x_code <- getAmode x
2343 code = x_code `snocOL`
2344 CMP (cmmTypeSize pk) (OpImm imm) (OpAddr x_addr)
2346 return (CondCode False cond code)
2348 -- anything vs zero, using a mask
2349 -- TODO: Add some sanity checking!!!!
2350 condIntCode cond (CmmMachOp (MO_And rep) [x,o2]) (CmmLit (CmmInt 0 pk))
2351 | (CmmLit lit@(CmmInt mask pk2)) <- o2, is32BitLit lit
2353 (x_reg, x_code) <- getSomeReg x
2355 code = x_code `snocOL`
2356 TEST (intSize pk) (OpImm (ImmInteger mask)) (OpReg x_reg)
2358 return (CondCode False cond code)
2361 condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
2362 (x_reg, x_code) <- getSomeReg x
2364 code = x_code `snocOL`
2365 TEST (intSize pk) (OpReg x_reg) (OpReg x_reg)
2367 return (CondCode False cond code)
2369 -- anything vs operand
2370 condIntCode cond x y | isOperand y = do
2371 (x_reg, x_code) <- getNonClobberedReg x
2372 (y_op, y_code) <- getOperand y
2374 code = x_code `appOL` y_code `snocOL`
2375 CMP (cmmTypeSize (cmmExprType x)) y_op (OpReg x_reg)
2377 return (CondCode False cond code)
2379 -- anything vs anything
2380 condIntCode cond x y = do
2381 (y_reg, y_code) <- getNonClobberedReg y
2382 (x_op, x_code) <- getRegOrMem x
2384 code = y_code `appOL`
2386 CMP (cmmTypeSize (cmmExprType x)) (OpReg y_reg) x_op
2388 return (CondCode False cond code)
2391 #if i386_TARGET_ARCH
2392 condFltCode cond x y
2393 = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
2394 (x_reg, x_code) <- getNonClobberedReg x
2395 (y_reg, y_code) <- getSomeReg y
2397 code = x_code `appOL` y_code `snocOL`
2398 GCMP cond x_reg y_reg
2399 -- The GCMP insn does the test and sets the zero flag if comparable
2400 -- and true. Hence we always supply EQQ as the condition to test.
2401 return (CondCode True EQQ code)
2402 #endif /* i386_TARGET_ARCH */
2404 #if x86_64_TARGET_ARCH
2405 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
2406 -- an operand, but the right must be a reg. We can probably do better
2407 -- than this general case...
2408 condFltCode cond x y = do
2409 (x_reg, x_code) <- getNonClobberedReg x
2410 (y_op, y_code) <- getOperand y
2412 code = x_code `appOL`
2414 CMP (floatSize $ cmmExprWidth x) y_op (OpReg x_reg)
2415 -- NB(1): we need to use the unsigned comparison operators on the
2416 -- result of this comparison.
2418 return (CondCode True (condToUnsigned cond) code)
2421 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2423 #if sparc_TARGET_ARCH
2425 condIntCode cond x (CmmLit (CmmInt y rep))
2428 (src1, code) <- getSomeReg x
2430 src2 = ImmInt (fromInteger y)
2431 code' = code `snocOL` SUB False True src1 (RIImm src2) g0
2432 return (CondCode False cond code')
2434 condIntCode cond x y = do
2435 (src1, code1) <- getSomeReg x
2436 (src2, code2) <- getSomeReg y
2438 code__2 = code1 `appOL` code2 `snocOL`
2439 SUB False True src1 (RIReg src2) g0
2440 return (CondCode False cond code__2)
2443 condFltCode cond x y = do
2444 (src1, code1) <- getSomeReg x
2445 (src2, code2) <- getSomeReg y
2446 tmp <- getNewRegNat FF64
2448 promote x = FxTOy FF32 FF64 x tmp
2454 if pk1 `cmmEqType` pk2 then
2455 code1 `appOL` code2 `snocOL`
2456 FCMP True (cmmTypeSize pk1) src1 src2
2457 else if typeWidth pk1 == W32 then
2458 code1 `snocOL` promote src1 `appOL` code2 `snocOL`
2459 FCMP True FF64 tmp src2
2461 code1 `appOL` code2 `snocOL` promote src2 `snocOL`
2462 FCMP True FF64 src1 tmp
2463 return (CondCode True cond code__2)
2465 #endif /* sparc_TARGET_ARCH */
2467 #if powerpc_TARGET_ARCH
2468 -- ###FIXME: I16 and I8!
2469 condIntCode cond x (CmmLit (CmmInt y rep))
2470 | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
2472 (src1, code) <- getSomeReg x
2474 code' = code `snocOL`
2475 (if condUnsigned cond then CMPL else CMP) II32 src1 (RIImm src2)
2476 return (CondCode False cond code')
2478 condIntCode cond x y = do
2479 (src1, code1) <- getSomeReg x
2480 (src2, code2) <- getSomeReg y
2482 code' = code1 `appOL` code2 `snocOL`
2483 (if condUnsigned cond then CMPL else CMP) II32 src1 (RIReg src2)
2484 return (CondCode False cond code')
2486 condFltCode cond x y = do
2487 (src1, code1) <- getSomeReg x
2488 (src2, code2) <- getSomeReg y
2490 code' = code1 `appOL` code2 `snocOL` FCMP src1 src2
2491 code'' = case cond of -- twiddle CR to handle unordered case
2492 GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
2493 LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
2496 ltbit = 0 ; eqbit = 2 ; gtbit = 1
2497 return (CondCode True cond code'')
2499 #endif /* powerpc_TARGET_ARCH */
2501 -- -----------------------------------------------------------------------------
2502 -- Generating assignments
2504 -- Assignments are really at the heart of the whole code generation
2505 -- business. Almost all top-level nodes of any real importance are
2506 -- assignments, which correspond to loads, stores, or register
2507 -- transfers. If we're really lucky, some of the register transfers
2508 -- will go away, because we can use the destination register to
2509 -- complete the code generation for the right hand side. This only
2510 -- fails when the right hand side is forced into a fixed register
2511 -- (e.g. the result of a call).
2513 assignMem_IntCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
2514 assignReg_IntCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock
2516 assignMem_FltCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
2517 assignReg_FltCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock
2519 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2521 #if alpha_TARGET_ARCH
2523 assignIntCode pk (CmmLoad dst _) src
2524 = getNewRegNat IntRep `thenNat` \ tmp ->
2525 getAmode dst `thenNat` \ amode ->
2526 getRegister src `thenNat` \ register ->
2528 code1 = amodeCode amode []
2529 dst__2 = amodeAddr amode
2530 code2 = registerCode register tmp []
2531 src__2 = registerName register tmp
2532 sz = primRepToSize pk
2533 code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2537 assignIntCode pk dst src
2538 = getRegister dst `thenNat` \ register1 ->
2539 getRegister src `thenNat` \ register2 ->
2541 dst__2 = registerName register1 zeroh
2542 code = registerCode register2 dst__2
2543 src__2 = registerName register2 dst__2
2544 code__2 = if isFixed register2
2545 then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2)
2550 #endif /* alpha_TARGET_ARCH */
2552 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2554 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2556 -- integer assignment to memory
2558 -- specific case of adding/subtracting an integer to a particular address.
2559 -- ToDo: catch other cases where we can use an operation directly on a memory
2561 assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _,
2562 CmmLit (CmmInt i _)])
2563 | addr == addr2, pk /= II64 || is32BitInteger i,
2564 Just instr <- check op
2565 = do Amode amode code_addr <- getAmode addr
2566 let code = code_addr `snocOL`
2567 instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode)
2570 check (MO_Add _) = Just ADD
2571 check (MO_Sub _) = Just SUB
2576 assignMem_IntCode pk addr src = do
2577 Amode addr code_addr <- getAmode addr
2578 (code_src, op_src) <- get_op_RI src
2580 code = code_src `appOL`
2582 MOV pk op_src (OpAddr addr)
2583 -- NOTE: op_src is stable, so it will still be valid
2584 -- after code_addr. This may involve the introduction
2585 -- of an extra MOV to a temporary register, but we hope
2586 -- the register allocator will get rid of it.
2590 get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand) -- code, operator
2591 get_op_RI (CmmLit lit) | is32BitLit lit
2592 = return (nilOL, OpImm (litToImm lit))
2594 = do (reg,code) <- getNonClobberedReg op
2595 return (code, OpReg reg)
2598 -- Assign; dst is a reg, rhs is mem
2599 assignReg_IntCode pk reg (CmmLoad src _) = do
2600 load_code <- intLoadCode (MOV pk) src
2601 return (load_code (getRegisterReg reg))
2603 -- dst is a reg, but src could be anything
2604 assignReg_IntCode pk reg src = do
2605 code <- getAnyReg src
2606 return (code (getRegisterReg reg))
2608 #endif /* i386_TARGET_ARCH */
2610 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2612 #if sparc_TARGET_ARCH
2614 assignMem_IntCode pk addr src = do
2615 (srcReg, code) <- getSomeReg src
2616 Amode dstAddr addr_code <- getAmode addr
2617 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2619 assignReg_IntCode pk reg src = do
2620 r <- getRegister src
2622 Any _ code -> code dst
2623 Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg freg) dst
2625 dst = getRegisterReg reg
2628 #endif /* sparc_TARGET_ARCH */
2630 #if powerpc_TARGET_ARCH
2632 assignMem_IntCode pk addr src = do
2633 (srcReg, code) <- getSomeReg src
2634 Amode dstAddr addr_code <- getAmode addr
2635 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2637 -- dst is a reg, but src could be anything
2638 assignReg_IntCode pk reg src
2640 r <- getRegister src
2642 Any _ code -> code dst
2643 Fixed _ freg fcode -> fcode `snocOL` MR dst freg
2645 dst = getRegisterReg reg
2647 #endif /* powerpc_TARGET_ARCH */
2650 -- -----------------------------------------------------------------------------
2651 -- Floating-point assignments
2653 #if alpha_TARGET_ARCH
2655 assignFltCode pk (CmmLoad dst _) src
2656 = getNewRegNat pk `thenNat` \ tmp ->
2657 getAmode dst `thenNat` \ amode ->
2658 getRegister src `thenNat` \ register ->
2660 code1 = amodeCode amode []
2661 dst__2 = amodeAddr amode
2662 code2 = registerCode register tmp []
2663 src__2 = registerName register tmp
2664 sz = primRepToSize pk
2665 code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2669 assignFltCode pk dst src
2670 = getRegister dst `thenNat` \ register1 ->
2671 getRegister src `thenNat` \ register2 ->
2673 dst__2 = registerName register1 zeroh
2674 code = registerCode register2 dst__2
2675 src__2 = registerName register2 dst__2
2676 code__2 = if isFixed register2
2677 then code . mkSeqInstr (FMOV src__2 dst__2)
2682 #endif /* alpha_TARGET_ARCH */
2684 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2686 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2688 -- Floating point assignment to memory
2689 assignMem_FltCode pk addr src = do
2690 (src_reg, src_code) <- getNonClobberedReg src
2691 Amode addr addr_code <- getAmode addr
2693 code = src_code `appOL`
2695 IF_ARCH_i386(GST pk src_reg addr,
2696 MOV pk (OpReg src_reg) (OpAddr addr))
2699 -- Floating point assignment to a register/temporary
2700 assignReg_FltCode pk reg src = do
2701 src_code <- getAnyReg src
2702 return (src_code (getRegisterReg reg))
2704 #endif /* i386_TARGET_ARCH */
2706 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2708 #if sparc_TARGET_ARCH
2710 -- Floating point assignment to memory
2711 assignMem_FltCode pk addr src = do
2712 Amode dst__2 code1 <- getAmode addr
2713 (src__2, code2) <- getSomeReg src
2714 tmp1 <- getNewRegNat pk
2716 pk__2 = cmmExprType src
2717 code__2 = code1 `appOL` code2 `appOL`
2718 if sizeToWidth pk == typeWidth pk__2
2719 then unitOL (ST pk src__2 dst__2)
2720 else toOL [ FxTOy (cmmTypeSize pk__2) pk src__2 tmp1
2721 , ST pk tmp1 dst__2]
2724 -- Floating point assignment to a register/temporary
2725 assignReg_FltCode pk dstCmmReg srcCmmExpr = do
2726 srcRegister <- getRegister srcCmmExpr
2727 let dstReg = getRegisterReg dstCmmReg
2729 return $ case srcRegister of
2730 Any _ code -> code dstReg
2731 Fixed _ srcFixedReg srcCode -> srcCode `snocOL` FMOV pk srcFixedReg dstReg
2733 #endif /* sparc_TARGET_ARCH */
2735 #if powerpc_TARGET_ARCH
2738 assignMem_FltCode = assignMem_IntCode
2739 assignReg_FltCode = assignReg_IntCode
2741 #endif /* powerpc_TARGET_ARCH */
2744 -- -----------------------------------------------------------------------------
2745 -- Generating an non-local jump
2747 -- (If applicable) Do not fill the delay slots here; you will confuse the
2748 -- register allocator.
2750 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
2752 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2754 #if alpha_TARGET_ARCH
2756 genJump (CmmLabel lbl)
2757 | isAsmTemp lbl = returnInstr (BR target)
2758 | otherwise = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0]
2760 target = ImmCLbl lbl
2763 = getRegister tree `thenNat` \ register ->
2764 getNewRegNat PtrRep `thenNat` \ tmp ->
2766 dst = registerName register pv
2767 code = registerCode register pv
2768 target = registerName register pv
2770 if isFixed register then
2771 returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0]
2773 return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0))
2775 #endif /* alpha_TARGET_ARCH */
2777 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2779 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2781 genJump (CmmLoad mem pk) = do
2782 Amode target code <- getAmode mem
2783 return (code `snocOL` JMP (OpAddr target))
2785 genJump (CmmLit lit) = do
2786 return (unitOL (JMP (OpImm (litToImm lit))))
2789 (reg,code) <- getSomeReg expr
2790 return (code `snocOL` JMP (OpReg reg))
2792 #endif /* i386_TARGET_ARCH */
2794 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2796 #if sparc_TARGET_ARCH
2798 genJump (CmmLit (CmmLabel lbl))
2799 = return (toOL [CALL (Left target) 0 True, NOP])
2801 target = ImmCLbl lbl
2805 (target, code) <- getSomeReg tree
2806 return (code `snocOL` JMP (AddrRegReg target g0) `snocOL` NOP)
2808 #endif /* sparc_TARGET_ARCH */
2810 #if powerpc_TARGET_ARCH
2811 genJump (CmmLit (CmmLabel lbl))
2812 = return (unitOL $ JMP lbl)
2816 (target,code) <- getSomeReg tree
2817 return (code `snocOL` MTCTR target `snocOL` BCTR [])
2818 #endif /* powerpc_TARGET_ARCH */
2821 -- -----------------------------------------------------------------------------
2822 -- Unconditional branches
2824 genBranch :: BlockId -> NatM InstrBlock
2826 genBranch = return . toOL . mkBranchInstr
2828 -- -----------------------------------------------------------------------------
2829 -- Conditional jumps
2832 Conditional jumps are always to local labels, so we can use branch
2833 instructions. We peek at the arguments to decide what kind of
2836 ALPHA: For comparisons with 0, we're laughing, because we can just do
2837 the desired conditional branch.
2839 I386: First, we have to ensure that the condition
2840 codes are set according to the supplied comparison operation.
2842 SPARC: First, we have to ensure that the condition codes are set
2843 according to the supplied comparison operation. We generate slightly
2844 different code for floating point comparisons, because a floating
2845 point operation cannot directly precede a @BF@. We assume the worst
2846 and fill that slot with a @NOP@.
2848 SPARC: Do not fill the delay slots here; you will confuse the register
2854 :: BlockId -- the branch target
2855 -> CmmExpr -- the condition on which to branch
2858 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2860 #if alpha_TARGET_ARCH
2862 genCondJump id (StPrim op [x, StInt 0])
2863 = getRegister x `thenNat` \ register ->
2864 getNewRegNat (registerRep register)
2867 code = registerCode register tmp
2868 value = registerName register tmp
2869 pk = registerRep register
2870 target = ImmCLbl lbl
2872 returnSeq code [BI (cmpOp op) value target]
2874 cmpOp CharGtOp = GTT
2876 cmpOp CharEqOp = EQQ
2878 cmpOp CharLtOp = LTT
2887 cmpOp WordGeOp = ALWAYS
2888 cmpOp WordEqOp = EQQ
2890 cmpOp WordLtOp = NEVER
2891 cmpOp WordLeOp = EQQ
2893 cmpOp AddrGeOp = ALWAYS
2894 cmpOp AddrEqOp = EQQ
2896 cmpOp AddrLtOp = NEVER
2897 cmpOp AddrLeOp = EQQ
2899 genCondJump lbl (StPrim op [x, StDouble 0.0])
2900 = getRegister x `thenNat` \ register ->
2901 getNewRegNat (registerRep register)
2904 code = registerCode register tmp
2905 value = registerName register tmp
2906 pk = registerRep register
2907 target = ImmCLbl lbl
2909 return (code . mkSeqInstr (BF (cmpOp op) value target))
2911 cmpOp FloatGtOp = GTT
2912 cmpOp FloatGeOp = GE
2913 cmpOp FloatEqOp = EQQ
2914 cmpOp FloatNeOp = NE
2915 cmpOp FloatLtOp = LTT
2916 cmpOp FloatLeOp = LE
2917 cmpOp DoubleGtOp = GTT
2918 cmpOp DoubleGeOp = GE
2919 cmpOp DoubleEqOp = EQQ
2920 cmpOp DoubleNeOp = NE
2921 cmpOp DoubleLtOp = LTT
2922 cmpOp DoubleLeOp = LE
2924 genCondJump lbl (StPrim op [x, y])
2926 = trivialFCode pr instr x y `thenNat` \ register ->
2927 getNewRegNat FF64 `thenNat` \ tmp ->
2929 code = registerCode register tmp
2930 result = registerName register tmp
2931 target = ImmCLbl lbl
2933 return (code . mkSeqInstr (BF cond result target))
2935 pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
2937 fltCmpOp op = case op of
2951 (instr, cond) = case op of
2952 FloatGtOp -> (FCMP TF LE, EQQ)
2953 FloatGeOp -> (FCMP TF LTT, EQQ)
2954 FloatEqOp -> (FCMP TF EQQ, NE)
2955 FloatNeOp -> (FCMP TF EQQ, EQQ)
2956 FloatLtOp -> (FCMP TF LTT, NE)
2957 FloatLeOp -> (FCMP TF LE, NE)
2958 DoubleGtOp -> (FCMP TF LE, EQQ)
2959 DoubleGeOp -> (FCMP TF LTT, EQQ)
2960 DoubleEqOp -> (FCMP TF EQQ, NE)
2961 DoubleNeOp -> (FCMP TF EQQ, EQQ)
2962 DoubleLtOp -> (FCMP TF LTT, NE)
2963 DoubleLeOp -> (FCMP TF LE, NE)
2965 genCondJump lbl (StPrim op [x, y])
2966 = trivialCode instr x y `thenNat` \ register ->
2967 getNewRegNat IntRep `thenNat` \ tmp ->
2969 code = registerCode register tmp
2970 result = registerName register tmp
2971 target = ImmCLbl lbl
2973 return (code . mkSeqInstr (BI cond result target))
2975 (instr, cond) = case op of
2976 CharGtOp -> (CMP LE, EQQ)
2977 CharGeOp -> (CMP LTT, EQQ)
2978 CharEqOp -> (CMP EQQ, NE)
2979 CharNeOp -> (CMP EQQ, EQQ)
2980 CharLtOp -> (CMP LTT, NE)
2981 CharLeOp -> (CMP LE, NE)
2982 IntGtOp -> (CMP LE, EQQ)
2983 IntGeOp -> (CMP LTT, EQQ)
2984 IntEqOp -> (CMP EQQ, NE)
2985 IntNeOp -> (CMP EQQ, EQQ)
2986 IntLtOp -> (CMP LTT, NE)
2987 IntLeOp -> (CMP LE, NE)
2988 WordGtOp -> (CMP ULE, EQQ)
2989 WordGeOp -> (CMP ULT, EQQ)
2990 WordEqOp -> (CMP EQQ, NE)
2991 WordNeOp -> (CMP EQQ, EQQ)
2992 WordLtOp -> (CMP ULT, NE)
2993 WordLeOp -> (CMP ULE, NE)
2994 AddrGtOp -> (CMP ULE, EQQ)
2995 AddrGeOp -> (CMP ULT, EQQ)
2996 AddrEqOp -> (CMP EQQ, NE)
2997 AddrNeOp -> (CMP EQQ, EQQ)
2998 AddrLtOp -> (CMP ULT, NE)
2999 AddrLeOp -> (CMP ULE, NE)
3001 #endif /* alpha_TARGET_ARCH */
3003 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3005 #if i386_TARGET_ARCH
3007 genCondJump id bool = do
3008 CondCode _ cond code <- getCondCode bool
3009 return (code `snocOL` JXX cond id)
3013 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3015 #if x86_64_TARGET_ARCH
3017 genCondJump id bool = do
3018 CondCode is_float cond cond_code <- getCondCode bool
3021 return (cond_code `snocOL` JXX cond id)
3023 lbl <- getBlockIdNat
3025 -- see comment with condFltReg
3026 let code = case cond of
3032 plain_test = unitOL (
3035 or_unordered = toOL [
3039 and_ordered = toOL [
3045 return (cond_code `appOL` code)
3049 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3051 #if sparc_TARGET_ARCH
3053 genCondJump bid bool = do
3054 CondCode is_float cond code <- getCondCode bool
3059 then [NOP, BF cond False bid, NOP]
3060 else [BI cond False bid, NOP]
3064 #endif /* sparc_TARGET_ARCH */
3067 #if powerpc_TARGET_ARCH
3069 genCondJump id bool = do
3070 CondCode is_float cond code <- getCondCode bool
3071 return (code `snocOL` BCC cond id)
3073 #endif /* powerpc_TARGET_ARCH */
3076 -- -----------------------------------------------------------------------------
3077 -- Generating C calls
3079 -- Now the biggest nightmare---calls. Most of the nastiness is buried in
3080 -- @get_arg@, which moves the arguments to the correct registers/stack
3081 -- locations. Apart from that, the code is easy.
3083 -- (If applicable) Do not fill the delay slots here; you will confuse the
3084 -- register allocator.
3087 :: CmmCallTarget -- function to call
3088 -> HintedCmmFormals -- where to put the result
3089 -> HintedCmmActuals -- arguments (of mixed type)
3092 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3094 #if alpha_TARGET_ARCH
3098 genCCall fn cconv result_regs args
3099 = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args
3100 `thenNat` \ ((unused,_), argCode) ->
3102 nRegs = length allArgRegs - length unused
3103 code = asmSeqThen (map ($ []) argCode)
3106 LDA pv (AddrImm (ImmLab (ptext fn))),
3107 JSR ra (AddrReg pv) nRegs,
3108 LDGP gp (AddrReg ra)]
3110 ------------------------
3111 {- Try to get a value into a specific register (or registers) for
3112 a call. The first 6 arguments go into the appropriate
3113 argument register (separate registers for integer and floating
3114 point arguments, but used in lock-step), and the remaining
3115 arguments are dumped to the stack, beginning at 0(sp). Our
3116 first argument is a pair of the list of remaining argument
3117 registers to be assigned for this call and the next stack
3118 offset to use for overflowing arguments. This way,
3119 @get_Arg@ can be applied to all of a call's arguments using
3123 :: ([(Reg,Reg)], Int) -- Argument registers and stack offset (accumulator)
3124 -> StixTree -- Current argument
3125 -> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code
3127 -- We have to use up all of our argument registers first...
3129 get_arg ((iDst,fDst):dsts, offset) arg
3130 = getRegister arg `thenNat` \ register ->
3132 reg = if isFloatType pk then fDst else iDst
3133 code = registerCode register reg
3134 src = registerName register reg
3135 pk = registerRep register
3138 if isFloatType pk then
3139 ((dsts, offset), if isFixed register then
3140 code . mkSeqInstr (FMOV src fDst)
3143 ((dsts, offset), if isFixed register then
3144 code . mkSeqInstr (OR src (RIReg src) iDst)
3147 -- Once we have run out of argument registers, we move to the
3150 get_arg ([], offset) arg
3151 = getRegister arg `thenNat` \ register ->
3152 getNewRegNat (registerRep register)
3155 code = registerCode register tmp
3156 src = registerName register tmp
3157 pk = registerRep register
3158 sz = primRepToSize pk
3160 return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset)))
3162 #endif /* alpha_TARGET_ARCH */
3164 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3166 #if i386_TARGET_ARCH
3168 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
3169 -- write barrier compiles to no code on x86/x86-64;
3170 -- we keep it this long in order to prevent earlier optimisations.
3172 -- we only cope with a single result for foreign calls
3173 genCCall (CmmPrim op) [CmmHinted r _] args = do
3174 l1 <- getNewLabelNat
3175 l2 <- getNewLabelNat
3177 MO_F32_Sqrt -> actuallyInlineFloatOp GSQRT FF32 args
3178 MO_F64_Sqrt -> actuallyInlineFloatOp GSQRT FF64 args
3180 MO_F32_Sin -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF32 args
3181 MO_F64_Sin -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF64 args
3183 MO_F32_Cos -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF32 args
3184 MO_F64_Cos -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF64 args
3186 MO_F32_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF32 args
3187 MO_F64_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF64 args
3189 other_op -> outOfLineFloatOp op r args
3191 actuallyInlineFloatOp instr size [CmmHinted x _]
3192 = do res <- trivialUFCode size (instr size) x
3194 return (any (getRegisterReg (CmmLocal r)))
3196 genCCall target dest_regs args = do
3198 sizes = map (arg_size . cmmExprType . hintlessCmm) (reverse args)
3199 #if !darwin_TARGET_OS
3200 tot_arg_size = sum sizes
3202 raw_arg_size = sum sizes
3203 tot_arg_size = roundTo 16 raw_arg_size
3204 arg_pad_size = tot_arg_size - raw_arg_size
3205 delta0 <- getDeltaNat
3206 setDeltaNat (delta0 - arg_pad_size)
3209 push_codes <- mapM push_arg (reverse args)
3210 delta <- getDeltaNat
3213 -- deal with static vs dynamic call targets
3214 (callinsns,cconv) <-
3217 CmmCallee (CmmLit (CmmLabel lbl)) conv
3218 -> -- ToDo: stdcall arg sizes
3219 return (unitOL (CALL (Left fn_imm) []), conv)
3220 where fn_imm = ImmCLbl lbl
3222 -> do { (dyn_c, dyn_r) <- get_op expr
3223 ; ASSERT( isWord32 (cmmExprType expr) )
3224 return (dyn_c `snocOL` CALL (Right dyn_r) [], conv) }
3227 #if darwin_TARGET_OS
3229 = toOL [SUB II32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
3230 DELTA (delta0 - arg_pad_size)]
3231 `appOL` concatOL push_codes
3234 = concatOL push_codes
3235 call = callinsns `appOL`
3237 -- Deallocate parameters after call for ccall;
3238 -- but not for stdcall (callee does it)
3239 (if cconv == StdCallConv || tot_arg_size==0 then [] else
3240 [ADD II32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
3242 [DELTA (delta + tot_arg_size)]
3245 setDeltaNat (delta + tot_arg_size)
3248 -- assign the results, if necessary
3249 assign_code [] = nilOL
3250 assign_code [CmmHinted dest _hint]
3251 | isFloatType ty = unitOL (GMOV fake0 r_dest)
3252 | isWord64 ty = toOL [MOV II32 (OpReg eax) (OpReg r_dest),
3253 MOV II32 (OpReg edx) (OpReg r_dest_hi)]
3254 | otherwise = unitOL (MOV (intSize w) (OpReg eax) (OpReg r_dest))
3256 ty = localRegType dest
3258 r_dest_hi = getHiVRegFromLo r_dest
3259 r_dest = getRegisterReg (CmmLocal dest)
3260 assign_code many = panic "genCCall.assign_code many"
3262 return (push_code `appOL`
3264 assign_code dest_regs)
3267 arg_size :: CmmType -> Int -- Width in bytes
3268 arg_size ty = widthInBytes (typeWidth ty)
3270 roundTo a x | x `mod` a == 0 = x
3271 | otherwise = x + a - (x `mod` a)
3274 push_arg :: HintedCmmActual {-current argument-}
3275 -> NatM InstrBlock -- code
3277 push_arg (CmmHinted arg _hint) -- we don't need the hints on x86
3278 | isWord64 arg_ty = do
3279 ChildCode64 code r_lo <- iselExpr64 arg
3280 delta <- getDeltaNat
3281 setDeltaNat (delta - 8)
3283 r_hi = getHiVRegFromLo r_lo
3285 return ( code `appOL`
3286 toOL [PUSH II32 (OpReg r_hi), DELTA (delta - 4),
3287 PUSH II32 (OpReg r_lo), DELTA (delta - 8),
3292 (code, reg) <- get_op arg
3293 delta <- getDeltaNat
3294 let size = arg_size arg_ty -- Byte size
3295 setDeltaNat (delta-size)
3296 if (isFloatType arg_ty)
3297 then return (code `appOL`
3298 toOL [SUB II32 (OpImm (ImmInt size)) (OpReg esp),
3300 GST (floatSize (typeWidth arg_ty))
3301 reg (AddrBaseIndex (EABaseReg esp)
3305 else return (code `snocOL`
3306 PUSH II32 (OpReg reg) `snocOL`
3310 arg_ty = cmmExprType arg
3313 get_op :: CmmExpr -> NatM (InstrBlock, Reg) -- code, reg
3315 (reg,code) <- getSomeReg op
3318 #endif /* i386_TARGET_ARCH */
3320 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3322 outOfLineFloatOp :: CallishMachOp -> CmmFormal -> HintedCmmActuals
3324 outOfLineFloatOp mop res args
3326 dflags <- getDynFlagsNat
3327 targetExpr <- cmmMakeDynamicReference dflags addImportNat CallReference lbl
3328 let target = CmmCallee targetExpr CCallConv
3330 if isFloat64 (localRegType res)
3332 stmtToInstrs (CmmCall target [CmmHinted res NoHint] args CmmUnsafe CmmMayReturn)
3336 tmp = LocalReg uq f64
3338 code1 <- stmtToInstrs (CmmCall target [CmmHinted tmp NoHint] args CmmUnsafe CmmMayReturn)
3339 code2 <- stmtToInstrs (CmmAssign (CmmLocal res) (CmmReg (CmmLocal tmp)))
3340 return (code1 `appOL` code2)
3342 lbl = mkForeignLabel fn Nothing False
3345 MO_F32_Sqrt -> fsLit "sqrtf"
3346 MO_F32_Sin -> fsLit "sinf"
3347 MO_F32_Cos -> fsLit "cosf"
3348 MO_F32_Tan -> fsLit "tanf"
3349 MO_F32_Exp -> fsLit "expf"
3350 MO_F32_Log -> fsLit "logf"
3352 MO_F32_Asin -> fsLit "asinf"
3353 MO_F32_Acos -> fsLit "acosf"
3354 MO_F32_Atan -> fsLit "atanf"
3356 MO_F32_Sinh -> fsLit "sinhf"
3357 MO_F32_Cosh -> fsLit "coshf"
3358 MO_F32_Tanh -> fsLit "tanhf"
3359 MO_F32_Pwr -> fsLit "powf"
3361 MO_F64_Sqrt -> fsLit "sqrt"
3362 MO_F64_Sin -> fsLit "sin"
3363 MO_F64_Cos -> fsLit "cos"
3364 MO_F64_Tan -> fsLit "tan"
3365 MO_F64_Exp -> fsLit "exp"
3366 MO_F64_Log -> fsLit "log"
3368 MO_F64_Asin -> fsLit "asin"
3369 MO_F64_Acos -> fsLit "acos"
3370 MO_F64_Atan -> fsLit "atan"
3372 MO_F64_Sinh -> fsLit "sinh"
3373 MO_F64_Cosh -> fsLit "cosh"
3374 MO_F64_Tanh -> fsLit "tanh"
3375 MO_F64_Pwr -> fsLit "pow"
3377 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
3379 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3381 #if x86_64_TARGET_ARCH
3383 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
3384 -- write barrier compiles to no code on x86/x86-64;
3385 -- we keep it this long in order to prevent earlier optimisations.
3388 genCCall (CmmPrim op) [CmmHinted r _] args =
3389 outOfLineFloatOp op r args
3391 genCCall target dest_regs args = do
3393 -- load up the register arguments
3394 (stack_args, aregs, fregs, load_args_code)
3395 <- load_args args allArgRegs allFPArgRegs nilOL
3398 fp_regs_used = reverse (drop (length fregs) (reverse allFPArgRegs))
3399 int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
3400 arg_regs = [eax] ++ int_regs_used ++ fp_regs_used
3401 -- for annotating the call instruction with
3403 sse_regs = length fp_regs_used
3405 tot_arg_size = arg_size * length stack_args
3407 -- On entry to the called function, %rsp should be aligned
3408 -- on a 16-byte boundary +8 (i.e. the first stack arg after
3409 -- the return address is 16-byte aligned). In STG land
3410 -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
3411 -- need to make sure we push a multiple of 16-bytes of args,
3412 -- plus the return address, to get the correct alignment.
3413 -- Urg, this is hard. We need to feed the delta back into
3414 -- the arg pushing code.
3415 (real_size, adjust_rsp) <-
3416 if tot_arg_size `rem` 16 == 0
3417 then return (tot_arg_size, nilOL)
3418 else do -- we need to adjust...
3419 delta <- getDeltaNat
3420 setDeltaNat (delta-8)
3421 return (tot_arg_size+8, toOL [
3422 SUB II64 (OpImm (ImmInt 8)) (OpReg rsp),
3426 -- push the stack args, right to left
3427 push_code <- push_args (reverse stack_args) nilOL
3428 delta <- getDeltaNat
3430 -- deal with static vs dynamic call targets
3431 (callinsns,cconv) <-
3434 CmmCallee (CmmLit (CmmLabel lbl)) conv
3435 -> -- ToDo: stdcall arg sizes
3436 return (unitOL (CALL (Left fn_imm) arg_regs), conv)
3437 where fn_imm = ImmCLbl lbl
3439 -> do (dyn_r, dyn_c) <- getSomeReg expr
3440 return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
3443 -- The x86_64 ABI requires us to set %al to the number of SSE
3444 -- registers that contain arguments, if the called routine
3445 -- is a varargs function. We don't know whether it's a
3446 -- varargs function or not, so we have to assume it is.
3448 -- It's not safe to omit this assignment, even if the number
3449 -- of SSE regs in use is zero. If %al is larger than 8
3450 -- on entry to a varargs function, seg faults ensue.
3451 assign_eax n = unitOL (MOV II32 (OpImm (ImmInt n)) (OpReg eax))
3453 let call = callinsns `appOL`
3455 -- Deallocate parameters after call for ccall;
3456 -- but not for stdcall (callee does it)
3457 (if cconv == StdCallConv || real_size==0 then [] else
3458 [ADD (intSize wordWidth) (OpImm (ImmInt real_size)) (OpReg esp)])
3460 [DELTA (delta + real_size)]
3463 setDeltaNat (delta + real_size)
3466 -- assign the results, if necessary
3467 assign_code [] = nilOL
3468 assign_code [CmmHinted dest _hint] =
3469 case typeWidth rep of
3470 W32 | isFloatType rep -> unitOL (MOV (floatSize W32) (OpReg xmm0) (OpReg r_dest))
3471 W64 | isFloatType rep -> unitOL (MOV (floatSize W32) (OpReg xmm0) (OpReg r_dest))
3472 _ -> unitOL (MOV (cmmTypeSize rep) (OpReg rax) (OpReg r_dest))
3474 rep = localRegType dest
3475 r_dest = getRegisterReg (CmmLocal dest)
3476 assign_code many = panic "genCCall.assign_code many"
3478 return (load_args_code `appOL`
3481 assign_eax sse_regs `appOL`
3483 assign_code dest_regs)
3486 arg_size = 8 -- always, at the mo
3488 load_args :: [CmmHinted CmmExpr]
3489 -> [Reg] -- int regs avail for args
3490 -> [Reg] -- FP regs avail for args
3492 -> NatM ([CmmHinted CmmExpr],[Reg],[Reg],InstrBlock)
3493 load_args args [] [] code = return (args, [], [], code)
3494 -- no more regs to use
3495 load_args [] aregs fregs code = return ([], aregs, fregs, code)
3496 -- no more args to push
3497 load_args ((CmmHinted arg hint) : rest) aregs fregs code
3498 | isFloatType arg_rep =
3502 arg_code <- getAnyReg arg
3503 load_args rest aregs rs (code `appOL` arg_code r)
3508 arg_code <- getAnyReg arg
3509 load_args rest rs fregs (code `appOL` arg_code r)
3511 arg_rep = cmmExprType arg
3514 (args',ars,frs,code') <- load_args rest aregs fregs code
3515 return ((CmmHinted arg hint):args', ars, frs, code')
3517 push_args [] code = return code
3518 push_args ((CmmHinted arg hint):rest) code
3519 | isFloatType arg_rep = do
3520 (arg_reg, arg_code) <- getSomeReg arg
3521 delta <- getDeltaNat
3522 setDeltaNat (delta-arg_size)
3523 let code' = code `appOL` arg_code `appOL` toOL [
3524 SUB (intSize wordWidth) (OpImm (ImmInt arg_size)) (OpReg rsp) ,
3525 DELTA (delta-arg_size),
3526 MOV (floatSize width) (OpReg arg_reg) (OpAddr (spRel 0))]
3527 push_args rest code'
3530 -- we only ever generate word-sized function arguments. Promotion
3531 -- has already happened: our Int8# type is kept sign-extended
3532 -- in an Int#, for example.
3533 ASSERT(width == W64) return ()
3534 (arg_op, arg_code) <- getOperand arg
3535 delta <- getDeltaNat
3536 setDeltaNat (delta-arg_size)
3537 let code' = code `appOL` toOL [PUSH II64 arg_op,
3538 DELTA (delta-arg_size)]
3539 push_args rest code'
3541 arg_rep = cmmExprType arg
3542 width = typeWidth arg_rep
3545 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3547 #if sparc_TARGET_ARCH
3549 The SPARC calling convention is an absolute
3550 nightmare. The first 6x32 bits of arguments are mapped into
3551 %o0 through %o5, and the remaining arguments are dumped to the
3552 stack, beginning at [%sp+92]. (Note that %o6 == %sp.)
3554 If we have to put args on the stack, move %o6==%sp down by
3555 the number of words to go on the stack, to ensure there's enough space.
3557 According to Fraser and Hanson's lcc book, page 478, fig 17.2,
3558 16 words above the stack pointer is a word for the address of
3559 a structure return value. I use this as a temporary location
3560 for moving values from float to int regs. Certainly it isn't
3561 safe to put anything in the 16 words starting at %sp, since
3562 this area can get trashed at any time due to window overflows
3563 caused by signal handlers.
3565 A final complication (if the above isn't enough) is that
3566 we can't blithely calculate the arguments one by one into
3567 %o0 .. %o5. Consider the following nested calls:
3571 Naive code moves a into %o0, and (fff b c) into %o1. Unfortunately
3572 the inner call will itself use %o0, which trashes the value put there
3573 in preparation for the outer call. Upshot: we need to calculate the
3574 args into temporary regs, and move those to arg regs or onto the
3575 stack only immediately prior to the call proper. Sigh.
3578 genCCall target dest_regs argsAndHints = do
3580 args = map hintlessCmm argsAndHints
3581 argcode_and_vregs <- mapM arg_to_int_vregs args
3583 (argcodes, vregss) = unzip argcode_and_vregs
3584 n_argRegs = length allArgRegs
3585 n_argRegs_used = min (length vregs) n_argRegs
3586 vregs = concat vregss
3587 -- deal with static vs dynamic call targets
3588 callinsns <- (case target of
3589 CmmCallee (CmmLit (CmmLabel lbl)) conv -> do
3590 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3591 CmmCallee expr conv -> do
3592 (dyn_c, [dyn_r]) <- arg_to_int_vregs expr
3593 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3595 (res, reduce) <- outOfLineFloatOp mop
3596 lblOrMopExpr <- case res of
3598 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3600 (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
3601 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3602 if reduce then panic "genCCall(sparc): can not reduce" else return lblOrMopExpr
3606 argcode = concatOL argcodes
3607 (move_sp_down, move_sp_up)
3608 = let diff = length vregs - n_argRegs
3609 nn = if odd diff then diff + 1 else diff -- keep 8-byte alignment
3612 else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
3615 = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
3617 -- assign the results, if necessary
3618 assign_code [] = nilOL
3620 assign_code [CmmHinted dest _hint]
3621 = let rep = localRegType dest
3622 width = typeWidth rep
3623 r_dest = getRegisterReg (CmmLocal dest)
3628 = unitOL $ FMOV FF32 (RealReg $ fReg 0) r_dest
3632 = unitOL $ FMOV FF64 (RealReg $ fReg 0) r_dest
3634 | not $ isFloatType rep
3636 = unitOL $ mkRegRegMoveInstr (RealReg $ oReg 0) r_dest
3640 return (argcode `appOL`
3641 move_sp_down `appOL`
3642 transfer_code `appOL`
3646 assign_code dest_regs)
3648 -- move args from the integer vregs into which they have been
3649 -- marshalled, into %o0 .. %o5, and the rest onto the stack.
3650 move_final :: [Reg] -> [Reg] -> Int -> [Instr]
3652 move_final [] _ offset -- all args done
3655 move_final (v:vs) [] offset -- out of aregs; move to stack
3656 = ST II32 v (spRel offset)
3657 : move_final vs [] (offset+1)
3659 move_final (v:vs) (a:az) offset -- move into an arg (%o[0..5]) reg
3660 = OR False g0 (RIReg v) a
3661 : move_final vs az offset
3663 -- generate code to calculate an argument, and move it into one
3664 -- or two integer vregs.
3665 arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
3666 arg_to_int_vregs arg
3667 | isWord64 (cmmExprType arg)
3669 (ChildCode64 code r_lo) <- iselExpr64 arg
3671 r_hi = getHiVRegFromLo r_lo
3672 return (code, [r_hi, r_lo])
3675 (src, code) <- getSomeReg arg
3676 tmp <- getNewRegNat (cmmTypeSize $ cmmExprType arg)
3678 pk = cmmExprType arg
3679 Just f0_high = fPair f0
3680 case cmmTypeSize pk of
3682 v1 <- getNewRegNat II32
3683 v2 <- getNewRegNat II32
3686 FMOV FF64 src f0 `snocOL`
3687 ST FF32 f0 (spRel 16) `snocOL`
3688 LD II32 (spRel 16) v1 `snocOL`
3689 ST FF32 f0_high (spRel 16) `snocOL`
3690 LD II32 (spRel 16) v2
3695 v1 <- getNewRegNat II32
3698 ST FF32 src (spRel 16) `snocOL`
3699 LD II32 (spRel 16) v1
3704 v1 <- getNewRegNat II32
3706 code `snocOL` OR False g0 (RIReg src) v1
3710 outOfLineFloatOp mop =
3712 dflags <- getDynFlagsNat
3713 mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference $
3714 mkForeignLabel functionName Nothing True
3715 let mopLabelOrExpr = case mopExpr of
3716 CmmLit (CmmLabel lbl) -> Left lbl
3718 return (mopLabelOrExpr, reduce)
3720 (reduce, functionName) = case mop of
3721 MO_F32_Exp -> (True, fsLit "exp")
3722 MO_F32_Log -> (True, fsLit "log")
3723 MO_F32_Sqrt -> (True, fsLit "sqrt")
3725 MO_F32_Sin -> (True, fsLit "sin")
3726 MO_F32_Cos -> (True, fsLit "cos")
3727 MO_F32_Tan -> (True, fsLit "tan")
3729 MO_F32_Asin -> (True, fsLit "asin")
3730 MO_F32_Acos -> (True, fsLit "acos")
3731 MO_F32_Atan -> (True, fsLit "atan")
3733 MO_F32_Sinh -> (True, fsLit "sinh")
3734 MO_F32_Cosh -> (True, fsLit "cosh")
3735 MO_F32_Tanh -> (True, fsLit "tanh")
3737 MO_F64_Exp -> (False, fsLit "exp")
3738 MO_F64_Log -> (False, fsLit "log")
3739 MO_F64_Sqrt -> (False, fsLit "sqrt")
3741 MO_F64_Sin -> (False, fsLit "sin")
3742 MO_F64_Cos -> (False, fsLit "cos")
3743 MO_F64_Tan -> (False, fsLit "tan")
3745 MO_F64_Asin -> (False, fsLit "asin")
3746 MO_F64_Acos -> (False, fsLit "acos")
3747 MO_F64_Atan -> (False, fsLit "atan")
3749 MO_F64_Sinh -> (False, fsLit "sinh")
3750 MO_F64_Cosh -> (False, fsLit "cosh")
3751 MO_F64_Tanh -> (False, fsLit "tanh")
3753 other -> pprPanic "outOfLineFloatOp(sparc) "
3754 (pprCallishMachOp mop)
3756 #endif /* sparc_TARGET_ARCH */
3758 #if powerpc_TARGET_ARCH
3760 #if darwin_TARGET_OS || linux_TARGET_OS
3762 The PowerPC calling convention for Darwin/Mac OS X
3763 is described in Apple's document
3764 "Inside Mac OS X - Mach-O Runtime Architecture".
3766 PowerPC Linux uses the System V Release 4 Calling Convention
3767 for PowerPC. It is described in the
3768 "System V Application Binary Interface PowerPC Processor Supplement".
3770 Both conventions are similar:
3771 Parameters may be passed in general-purpose registers starting at r3, in
3772 floating point registers starting at f1, or on the stack.
3774 But there are substantial differences:
3775 * The number of registers used for parameter passing and the exact set of
3776 nonvolatile registers differs (see MachRegs.lhs).
3777 * On Darwin, stack space is always reserved for parameters, even if they are
3778 passed in registers. The called routine may choose to save parameters from
3779 registers to the corresponding space on the stack.
3780 * On Darwin, a corresponding amount of GPRs is skipped when a floating point
3781 parameter is passed in an FPR.
3782 * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
3783 starting with an odd-numbered GPR. It may skip a GPR to achieve this.
3784 Darwin just treats an I64 like two separate II32s (high word first).
3785 * I64 and FF64 arguments are 8-byte aligned on the stack for SysV, but only
3786 4-byte aligned like everything else on Darwin.
3787 * The SysV spec claims that FF32 is represented as FF64 on the stack. GCC on
3788 PowerPC Linux does not agree, so neither do we.
3790 According to both conventions, The parameter area should be part of the
3791 caller's stack frame, allocated in the caller's prologue code (large enough
3792 to hold the parameter lists for all called routines). The NCG already
3793 uses the stack for register spilling, leaving 64 bytes free at the top.
3794 If we need a larger parameter area than that, we just allocate a new stack
3795 frame just before ccalling.
3799 genCCall (CmmPrim MO_WriteBarrier) _ _
3800 = return $ unitOL LWSYNC
3802 genCCall target dest_regs argsAndHints
3803 = ASSERT (not $ any (`elem` [II8,II16]) $ map cmmTypeSize argReps)
3804 -- we rely on argument promotion in the codeGen
3806 (finalStack,passArgumentsCode,usedRegs) <- passArguments
3808 allArgRegs allFPArgRegs
3812 (labelOrExpr, reduceToFF32) <- case target of
3813 CmmCallee (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
3814 CmmCallee expr conv -> return (Right expr, False)
3815 CmmPrim mop -> outOfLineFloatOp mop
3817 let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
3818 codeAfter = move_sp_up finalStack `appOL` moveResult reduceToFF32
3823 `snocOL` BL lbl usedRegs
3826 (dynReg, dynCode) <- getSomeReg dyn
3828 `snocOL` MTCTR dynReg
3830 `snocOL` BCTRL usedRegs
3833 #if darwin_TARGET_OS
3834 initialStackOffset = 24
3835 -- size of linkage area + size of arguments, in bytes
3836 stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
3837 map (widthInBytes . typeWidth) argReps
3838 #elif linux_TARGET_OS
3839 initialStackOffset = 8
3840 stackDelta finalStack = roundTo 16 finalStack
3842 args = map hintlessCmm argsAndHints
3843 argReps = map cmmExprType args
3845 roundTo a x | x `mod` a == 0 = x
3846 | otherwise = x + a - (x `mod` a)
3848 move_sp_down finalStack
3850 toOL [STU II32 sp (AddrRegImm sp (ImmInt (-delta))),
3853 where delta = stackDelta finalStack
3854 move_sp_up finalStack
3856 toOL [ADD sp sp (RIImm (ImmInt delta)),
3859 where delta = stackDelta finalStack
3862 passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
3863 passArguments ((arg,arg_ty):args) gprs fprs stackOffset
3864 accumCode accumUsed | isWord64 arg_ty =
3866 ChildCode64 code vr_lo <- iselExpr64 arg
3867 let vr_hi = getHiVRegFromLo vr_lo
3869 #if darwin_TARGET_OS
3874 (accumCode `appOL` code
3875 `snocOL` storeWord vr_hi gprs stackOffset
3876 `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
3877 ((take 2 gprs) ++ accumUsed)
3879 storeWord vr (gpr:_) offset = MR gpr vr
3880 storeWord vr [] offset = ST II32 vr (AddrRegImm sp (ImmInt offset))
3882 #elif linux_TARGET_OS
3883 let stackOffset' = roundTo 8 stackOffset
3884 stackCode = accumCode `appOL` code
3885 `snocOL` ST II32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
3886 `snocOL` ST II32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
3887 regCode hireg loreg =
3888 accumCode `appOL` code
3889 `snocOL` MR hireg vr_hi
3890 `snocOL` MR loreg vr_lo
3893 hireg : loreg : regs | even (length gprs) ->
3894 passArguments args regs fprs stackOffset
3895 (regCode hireg loreg) (hireg : loreg : accumUsed)
3896 _skipped : hireg : loreg : regs ->
3897 passArguments args regs fprs stackOffset
3898 (regCode hireg loreg) (hireg : loreg : accumUsed)
3899 _ -> -- only one or no regs left
3900 passArguments args [] fprs (stackOffset'+8)
3904 passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
3905 | reg : _ <- regs = do
3906 register <- getRegister arg
3907 let code = case register of
3908 Fixed _ freg fcode -> fcode `snocOL` MR reg freg
3909 Any _ acode -> acode reg
3913 #if darwin_TARGET_OS
3914 -- The Darwin ABI requires that we reserve stack slots for register parameters
3915 (stackOffset + stackBytes)
3916 #elif linux_TARGET_OS
3917 -- ... the SysV ABI doesn't.
3920 (accumCode `appOL` code)
3923 (vr, code) <- getSomeReg arg
3927 (stackOffset' + stackBytes)
3928 (accumCode `appOL` code `snocOL` ST (cmmTypeSize rep) vr stackSlot)
3931 #if darwin_TARGET_OS
3932 -- stackOffset is at least 4-byte aligned
3933 -- The Darwin ABI is happy with that.
3934 stackOffset' = stackOffset
3936 -- ... the SysV ABI requires 8-byte alignment for doubles.
3937 stackOffset' | isFloatType rep && typeWidth rep == W64 =
3938 roundTo 8 stackOffset
3939 | otherwise = stackOffset
3941 stackSlot = AddrRegImm sp (ImmInt stackOffset')
3942 (nGprs, nFprs, stackBytes, regs) = case cmmTypeSize rep of
3943 II32 -> (1, 0, 4, gprs)
3944 #if darwin_TARGET_OS
3945 -- The Darwin ABI requires that we skip a corresponding number of GPRs when
3947 FF32 -> (1, 1, 4, fprs)
3948 FF64 -> (2, 1, 8, fprs)
3949 #elif linux_TARGET_OS
3950 -- ... the SysV ABI doesn't.
3951 FF32 -> (0, 1, 4, fprs)
3952 FF64 -> (0, 1, 8, fprs)
3955 moveResult reduceToFF32 =
3958 [CmmHinted dest _hint]
3959 | reduceToFF32 && isFloat32 rep -> unitOL (FRSP r_dest f1)
3960 | isFloat32 rep || isFloat64 rep -> unitOL (MR r_dest f1)
3961 | isWord64 rep -> toOL [MR (getHiVRegFromLo r_dest) r3,
3963 | otherwise -> unitOL (MR r_dest r3)
3964 where rep = cmmRegType (CmmLocal dest)
3965 r_dest = getRegisterReg (CmmLocal dest)
3967 outOfLineFloatOp mop =
3969 dflags <- getDynFlagsNat
3970 mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference $
3971 mkForeignLabel functionName Nothing True
3972 let mopLabelOrExpr = case mopExpr of
3973 CmmLit (CmmLabel lbl) -> Left lbl
3975 return (mopLabelOrExpr, reduce)
3977 (functionName, reduce) = case mop of
3978 MO_F32_Exp -> (fsLit "exp", True)
3979 MO_F32_Log -> (fsLit "log", True)
3980 MO_F32_Sqrt -> (fsLit "sqrt", True)
3982 MO_F32_Sin -> (fsLit "sin", True)
3983 MO_F32_Cos -> (fsLit "cos", True)
3984 MO_F32_Tan -> (fsLit "tan", True)
3986 MO_F32_Asin -> (fsLit "asin", True)
3987 MO_F32_Acos -> (fsLit "acos", True)
3988 MO_F32_Atan -> (fsLit "atan", True)
3990 MO_F32_Sinh -> (fsLit "sinh", True)
3991 MO_F32_Cosh -> (fsLit "cosh", True)
3992 MO_F32_Tanh -> (fsLit "tanh", True)
3993 MO_F32_Pwr -> (fsLit "pow", True)
3995 MO_F64_Exp -> (fsLit "exp", False)
3996 MO_F64_Log -> (fsLit "log", False)
3997 MO_F64_Sqrt -> (fsLit "sqrt", False)
3999 MO_F64_Sin -> (fsLit "sin", False)
4000 MO_F64_Cos -> (fsLit "cos", False)
4001 MO_F64_Tan -> (fsLit "tan", False)
4003 MO_F64_Asin -> (fsLit "asin", False)
4004 MO_F64_Acos -> (fsLit "acos", False)
4005 MO_F64_Atan -> (fsLit "atan", False)
4007 MO_F64_Sinh -> (fsLit "sinh", False)
4008 MO_F64_Cosh -> (fsLit "cosh", False)
4009 MO_F64_Tanh -> (fsLit "tanh", False)
4010 MO_F64_Pwr -> (fsLit "pow", False)
4011 other -> pprPanic "genCCall(ppc): unknown callish op"
4012 (pprCallishMachOp other)
4014 #endif /* darwin_TARGET_OS || linux_TARGET_OS */
4016 #endif /* powerpc_TARGET_ARCH */
4019 -- -----------------------------------------------------------------------------
4020 -- Generating a table-branch
4022 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
4024 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4028 (reg,e_code) <- getSomeReg expr
4029 lbl <- getNewLabelNat
4030 dflags <- getDynFlagsNat
4031 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
4032 (tableReg,t_code) <- getSomeReg $ dynRef
4034 jumpTable = map jumpTableEntryRel ids
4036 jumpTableEntryRel Nothing
4037 = CmmStaticLit (CmmInt 0 wordWidth)
4038 jumpTableEntryRel (Just (BlockId id))
4039 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
4040 where blockLabel = mkAsmTempLabel id
4042 op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
4043 (EAIndex reg wORD_SIZE) (ImmInt 0))
4045 #if x86_64_TARGET_ARCH
4046 #if darwin_TARGET_OS
4047 -- on Mac OS X/x86_64, put the jump table in the text section
4048 -- to work around a limitation of the linker.
4049 -- ld64 is unable to handle the relocations for
4051 -- if L0 is not preceded by a non-anonymous label in its section.
4053 code = e_code `appOL` t_code `appOL` toOL [
4054 ADD (intSize wordWidth) op (OpReg tableReg),
4055 JMP_TBL (OpReg tableReg) [ id | Just id <- ids ],
4056 LDATA Text (CmmDataLabel lbl : jumpTable)
4059 -- HACK: On x86_64 binutils<2.17 is only able to generate PC32
4060 -- relocations, hence we only get 32-bit offsets in the jump
4061 -- table. As these offsets are always negative we need to properly
4062 -- sign extend them to 64-bit. This hack should be removed in
4063 -- conjunction with the hack in PprMach.hs/pprDataItem once
4064 -- binutils 2.17 is standard.
4065 code = e_code `appOL` t_code `appOL` toOL [
4066 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
4068 (OpAddr (AddrBaseIndex (EABaseReg tableReg)
4069 (EAIndex reg wORD_SIZE) (ImmInt 0)))
4071 ADD (intSize wordWidth) (OpReg reg) (OpReg tableReg),
4072 JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
4076 code = e_code `appOL` t_code `appOL` toOL [
4077 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
4078 ADD (intSize wordWidth) op (OpReg tableReg),
4079 JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
4085 (reg,e_code) <- getSomeReg expr
4086 lbl <- getNewLabelNat
4088 jumpTable = map jumpTableEntry ids
4089 op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
4090 code = e_code `appOL` toOL [
4091 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
4092 JMP_TBL op [ id | Just id <- ids ]
4096 #elif powerpc_TARGET_ARCH
4100 (reg,e_code) <- getSomeReg expr
4101 tmp <- getNewRegNat II32
4102 lbl <- getNewLabelNat
4103 dflags <- getDynFlagsNat
4104 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
4105 (tableReg,t_code) <- getSomeReg $ dynRef
4107 jumpTable = map jumpTableEntryRel ids
4109 jumpTableEntryRel Nothing
4110 = CmmStaticLit (CmmInt 0 wordWidth)
4111 jumpTableEntryRel (Just (BlockId id))
4112 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
4113 where blockLabel = mkAsmTempLabel id
4115 code = e_code `appOL` t_code `appOL` toOL [
4116 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
4117 SLW tmp reg (RIImm (ImmInt 2)),
4118 LD II32 tmp (AddrRegReg tableReg tmp),
4119 ADD tmp tmp (RIReg tableReg),
4121 BCTR [ id | Just id <- ids ]
4126 (reg,e_code) <- getSomeReg expr
4127 tmp <- getNewRegNat II32
4128 lbl <- getNewLabelNat
4130 jumpTable = map jumpTableEntry ids
4132 code = e_code `appOL` toOL [
4133 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
4134 SLW tmp reg (RIImm (ImmInt 2)),
4135 ADDIS tmp tmp (HA (ImmCLbl lbl)),
4136 LD II32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
4138 BCTR [ id | Just id <- ids ]
4141 #elif sparc_TARGET_ARCH
4144 = error "MachCodeGen: sparc genSwitch PIC not finished\n"
4147 = error "MachCodeGen: sparc genSwitch non-PIC not finished\n"
4149 #error "ToDo: genSwitch"
4152 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordWidth)
4153 jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
4154 where blockLabel = mkAsmTempLabel id
4156 -- -----------------------------------------------------------------------------
4158 -- -----------------------------------------------------------------------------
4161 -- -----------------------------------------------------------------------------
4162 -- 'condIntReg' and 'condFltReg': condition codes into registers
4164 -- Turn those condition codes into integers now (when they appear on
4165 -- the right hand side of an assignment).
4167 -- (If applicable) Do not fill the delay slots here; you will confuse the
4168 -- register allocator.
4170 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
4172 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4174 #if alpha_TARGET_ARCH
4175 condIntReg = panic "MachCode.condIntReg (not on Alpha)"
4176 condFltReg = panic "MachCode.condFltReg (not on Alpha)"
4177 #endif /* alpha_TARGET_ARCH */
4179 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4181 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4183 condIntReg cond x y = do
4184 CondCode _ cond cond_code <- condIntCode cond x y
4185 tmp <- getNewRegNat II8
4187 code dst = cond_code `appOL` toOL [
4188 SETCC cond (OpReg tmp),
4189 MOVZxL II8 (OpReg tmp) (OpReg dst)
4192 return (Any II32 code)
4196 #if i386_TARGET_ARCH
4198 condFltReg cond x y = do
4199 CondCode _ cond cond_code <- condFltCode cond x y
4200 tmp <- getNewRegNat II8
4202 code dst = cond_code `appOL` toOL [
4203 SETCC cond (OpReg tmp),
4204 MOVZxL II8 (OpReg tmp) (OpReg dst)
4207 return (Any II32 code)
4211 #if x86_64_TARGET_ARCH
4213 condFltReg cond x y = do
4214 CondCode _ cond cond_code <- condFltCode cond x y
4215 tmp1 <- getNewRegNat wordSize
4216 tmp2 <- getNewRegNat wordSize
4218 -- We have to worry about unordered operands (eg. comparisons
4219 -- against NaN). If the operands are unordered, the comparison
4220 -- sets the parity flag, carry flag and zero flag.
4221 -- All comparisons are supposed to return false for unordered
4222 -- operands except for !=, which returns true.
4224 -- Optimisation: we don't have to test the parity flag if we
4225 -- know the test has already excluded the unordered case: eg >
4226 -- and >= test for a zero carry flag, which can only occur for
4227 -- ordered operands.
4229 -- ToDo: by reversing comparisons we could avoid testing the
4230 -- parity flag in more cases.
4235 NE -> or_unordered dst
4236 GU -> plain_test dst
4237 GEU -> plain_test dst
4238 _ -> and_ordered dst)
4240 plain_test dst = toOL [
4241 SETCC cond (OpReg tmp1),
4242 MOVZxL II8 (OpReg tmp1) (OpReg dst)
4244 or_unordered dst = toOL [
4245 SETCC cond (OpReg tmp1),
4246 SETCC PARITY (OpReg tmp2),
4247 OR II8 (OpReg tmp1) (OpReg tmp2),
4248 MOVZxL II8 (OpReg tmp2) (OpReg dst)
4250 and_ordered dst = toOL [
4251 SETCC cond (OpReg tmp1),
4252 SETCC NOTPARITY (OpReg tmp2),
4253 AND II8 (OpReg tmp1) (OpReg tmp2),
4254 MOVZxL II8 (OpReg tmp2) (OpReg dst)
4257 return (Any II32 code)
4261 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4263 #if sparc_TARGET_ARCH
4265 condIntReg EQQ x (CmmLit (CmmInt 0 d)) = do
4266 (src, code) <- getSomeReg x
4267 tmp <- getNewRegNat II32
4269 code__2 dst = code `appOL` toOL [
4270 SUB False True g0 (RIReg src) g0,
4271 SUB True False g0 (RIImm (ImmInt (-1))) dst]
4272 return (Any II32 code__2)
4274 condIntReg EQQ x y = do
4275 (src1, code1) <- getSomeReg x
4276 (src2, code2) <- getSomeReg y
4277 tmp1 <- getNewRegNat II32
4278 tmp2 <- getNewRegNat II32
4280 code__2 dst = code1 `appOL` code2 `appOL` toOL [
4281 XOR False src1 (RIReg src2) dst,
4282 SUB False True g0 (RIReg dst) g0,
4283 SUB True False g0 (RIImm (ImmInt (-1))) dst]
4284 return (Any II32 code__2)
4286 condIntReg NE x (CmmLit (CmmInt 0 d)) = do
4287 (src, code) <- getSomeReg x
4288 tmp <- getNewRegNat II32
4290 code__2 dst = code `appOL` toOL [
4291 SUB False True g0 (RIReg src) g0,
4292 ADD True False g0 (RIImm (ImmInt 0)) dst]
4293 return (Any II32 code__2)
4295 condIntReg NE x y = do
4296 (src1, code1) <- getSomeReg x
4297 (src2, code2) <- getSomeReg y
4298 tmp1 <- getNewRegNat II32
4299 tmp2 <- getNewRegNat II32
4301 code__2 dst = code1 `appOL` code2 `appOL` toOL [
4302 XOR False src1 (RIReg src2) dst,
4303 SUB False True g0 (RIReg dst) g0,
4304 ADD True False g0 (RIImm (ImmInt 0)) dst]
4305 return (Any II32 code__2)
4307 condIntReg cond x y = do
4308 bid1@(BlockId lbl1) <- getBlockIdNat
4309 bid2@(BlockId lbl2) <- getBlockIdNat
4310 CondCode _ cond cond_code <- condIntCode cond x y
4312 code__2 dst = cond_code `appOL` toOL [
4313 BI cond False bid1, NOP,
4314 OR False g0 (RIImm (ImmInt 0)) dst,
4315 BI ALWAYS False bid2, NOP,
4317 OR False g0 (RIImm (ImmInt 1)) dst,
4319 return (Any II32 code__2)
4321 condFltReg cond x y = do
4322 bid1@(BlockId lbl1) <- getBlockIdNat
4323 bid2@(BlockId lbl2) <- getBlockIdNat
4324 CondCode _ cond cond_code <- condFltCode cond x y
4326 code__2 dst = cond_code `appOL` toOL [
4328 BF cond False bid1, NOP,
4329 OR False g0 (RIImm (ImmInt 0)) dst,
4330 BI ALWAYS False bid2, NOP,
4332 OR False g0 (RIImm (ImmInt 1)) dst,
4334 return (Any II32 code__2)
4336 #endif /* sparc_TARGET_ARCH */
4338 #if powerpc_TARGET_ARCH
4339 condReg getCond = do
4340 lbl1 <- getBlockIdNat
4341 lbl2 <- getBlockIdNat
4342 CondCode _ cond cond_code <- getCond
4344 {- code dst = cond_code `appOL` toOL [
4353 code dst = cond_code
4357 RLWINM dst dst (bit + 1) 31 31
4360 negate_code | do_negate = unitOL (CRNOR bit bit bit)
4363 (bit, do_negate) = case cond of
4377 return (Any II32 code)
4379 condIntReg cond x y = condReg (condIntCode cond x y)
4380 condFltReg cond x y = condReg (condFltCode cond x y)
4381 #endif /* powerpc_TARGET_ARCH */
4384 -- -----------------------------------------------------------------------------
4385 -- 'trivial*Code': deal with trivial instructions
4387 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
4388 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
4389 -- Only look for constants on the right hand side, because that's
4390 -- where the generic optimizer will have put them.
4392 -- Similarly, for unary instructions, we don't have to worry about
4393 -- matching an StInt as the argument, because genericOpt will already
4394 -- have handled the constant-folding.
4397 :: Width -- Int only
4398 -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
4399 ,IF_ARCH_i386 ((Operand -> Operand -> Instr)
4400 -> Maybe (Operand -> Operand -> Instr)
4401 ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr)
4402 -> Maybe (Operand -> Operand -> Instr)
4403 ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
4404 ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
4406 -> CmmExpr -> CmmExpr -- the two arguments
4409 #ifndef powerpc_TARGET_ARCH
4411 :: Width -- Floating point only
4412 -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
4413 ,IF_ARCH_sparc((Size -> Reg -> Reg -> Reg -> Instr)
4414 ,IF_ARCH_i386 ((Size -> Reg -> Reg -> Reg -> Instr)
4415 ,IF_ARCH_x86_64 ((Size -> Operand -> Operand -> Instr)
4417 -> CmmExpr -> CmmExpr -- the two arguments
4423 -> IF_ARCH_alpha((RI -> Reg -> Instr)
4424 ,IF_ARCH_i386 ((Operand -> Instr)
4425 ,IF_ARCH_x86_64 ((Operand -> Instr)
4426 ,IF_ARCH_sparc((RI -> Reg -> Instr)
4427 ,IF_ARCH_powerpc((Reg -> Reg -> Instr)
4429 -> CmmExpr -- the one argument
4432 #ifndef powerpc_TARGET_ARCH
4435 -> IF_ARCH_alpha((Reg -> Reg -> Instr)
4436 ,IF_ARCH_i386 ((Reg -> Reg -> Instr)
4437 ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr)
4438 ,IF_ARCH_sparc((Reg -> Reg -> Instr)
4440 -> CmmExpr -- the one argument
4444 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4446 #if alpha_TARGET_ARCH
4448 trivialCode instr x (StInt y)
4450 = getRegister x `thenNat` \ register ->
4451 getNewRegNat IntRep `thenNat` \ tmp ->
4453 code = registerCode register tmp
4454 src1 = registerName register tmp
4455 src2 = ImmInt (fromInteger y)
4456 code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst)
4458 return (Any IntRep code__2)
4460 trivialCode instr x y
4461 = getRegister x `thenNat` \ register1 ->
4462 getRegister y `thenNat` \ register2 ->
4463 getNewRegNat IntRep `thenNat` \ tmp1 ->
4464 getNewRegNat IntRep `thenNat` \ tmp2 ->
4466 code1 = registerCode register1 tmp1 []
4467 src1 = registerName register1 tmp1
4468 code2 = registerCode register2 tmp2 []
4469 src2 = registerName register2 tmp2
4470 code__2 dst = asmSeqThen [code1, code2] .
4471 mkSeqInstr (instr src1 (RIReg src2) dst)
4473 return (Any IntRep code__2)
4476 trivialUCode instr x
4477 = getRegister x `thenNat` \ register ->
4478 getNewRegNat IntRep `thenNat` \ tmp ->
4480 code = registerCode register tmp
4481 src = registerName register tmp
4482 code__2 dst = code . mkSeqInstr (instr (RIReg src) dst)
4484 return (Any IntRep code__2)
4487 trivialFCode _ instr x y
4488 = getRegister x `thenNat` \ register1 ->
4489 getRegister y `thenNat` \ register2 ->
4490 getNewRegNat FF64 `thenNat` \ tmp1 ->
4491 getNewRegNat FF64 `thenNat` \ tmp2 ->
4493 code1 = registerCode register1 tmp1
4494 src1 = registerName register1 tmp1
4496 code2 = registerCode register2 tmp2
4497 src2 = registerName register2 tmp2
4499 code__2 dst = asmSeqThen [code1 [], code2 []] .
4500 mkSeqInstr (instr src1 src2 dst)
4502 return (Any FF64 code__2)
4504 trivialUFCode _ instr x
4505 = getRegister x `thenNat` \ register ->
4506 getNewRegNat FF64 `thenNat` \ tmp ->
4508 code = registerCode register tmp
4509 src = registerName register tmp
4510 code__2 dst = code . mkSeqInstr (instr src dst)
4512 return (Any FF64 code__2)
4514 #endif /* alpha_TARGET_ARCH */
4516 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4518 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
4521 The Rules of the Game are:
4523 * You cannot assume anything about the destination register dst;
4524 it may be anything, including a fixed reg.
4526 * You may compute an operand into a fixed reg, but you may not
4527 subsequently change the contents of that fixed reg. If you
4528 want to do so, first copy the value either to a temporary
4529 or into dst. You are free to modify dst even if it happens
4530 to be a fixed reg -- that's not your problem.
4532 * You cannot assume that a fixed reg will stay live over an
4533 arbitrary computation. The same applies to the dst reg.
4535 * Temporary regs obtained from getNewRegNat are distinct from
4536 each other and from all other regs, and stay live over
4537 arbitrary computations.
4539 --------------------
4541 SDM's version of The Rules:
4543 * If getRegister returns Any, that means it can generate correct
4544 code which places the result in any register, period. Even if that
4545 register happens to be read during the computation.
4547 Corollary #1: this means that if you are generating code for an
4548 operation with two arbitrary operands, you cannot assign the result
4549 of the first operand into the destination register before computing
4550 the second operand. The second operand might require the old value
4551 of the destination register.
4553 Corollary #2: A function might be able to generate more efficient
4554 code if it knows the destination register is a new temporary (and
4555 therefore not read by any of the sub-computations).
4557 * If getRegister returns Any, then the code it generates may modify only:
4558 (a) fresh temporaries
4559 (b) the destination register
4560 (c) known registers (eg. %ecx is used by shifts)
4561 In particular, it may *not* modify global registers, unless the global
4562 register happens to be the destination register.
4565 trivialCode width instr (Just revinstr) (CmmLit lit_a) b
4566 | is32BitLit lit_a = do
4567 b_code <- getAnyReg b
4570 = b_code dst `snocOL`
4571 revinstr (OpImm (litToImm lit_a)) (OpReg dst)
4573 return (Any (intSize width) code)
4575 trivialCode width instr maybe_revinstr a b
4576 = genTrivialCode (intSize width) instr a b
4578 -- This is re-used for floating pt instructions too.
4579 genTrivialCode rep instr a b = do
4580 (b_op, b_code) <- getNonClobberedOperand b
4581 a_code <- getAnyReg a
4582 tmp <- getNewRegNat rep
4584 -- We want the value of b to stay alive across the computation of a.
4585 -- But, we want to calculate a straight into the destination register,
4586 -- because the instruction only has two operands (dst := dst `op` src).
4587 -- The troublesome case is when the result of b is in the same register
4588 -- as the destination reg. In this case, we have to save b in a
4589 -- new temporary across the computation of a.
4591 | dst `regClashesWithOp` b_op =
4593 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
4595 instr (OpReg tmp) (OpReg dst)
4599 instr b_op (OpReg dst)
4601 return (Any rep code)
4603 reg `regClashesWithOp` OpReg reg2 = reg == reg2
4604 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
4605 reg `regClashesWithOp` _ = False
4609 trivialUCode rep instr x = do
4610 x_code <- getAnyReg x
4615 return (Any rep code)
4619 #if i386_TARGET_ARCH
4621 trivialFCode width instr x y = do
4622 (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
4623 (y_reg, y_code) <- getSomeReg y
4625 size = floatSize width
4629 instr size x_reg y_reg dst
4630 return (Any size code)
4634 #if x86_64_TARGET_ARCH
4635 trivialFCode pk instr x y
4636 = genTrivialCode size (instr size) x y
4637 where size = floatSize pk
4642 trivialUFCode size instr x = do
4643 (x_reg, x_code) <- getSomeReg x
4649 return (Any size code)
4651 #endif /* i386_TARGET_ARCH */
4653 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4655 #if sparc_TARGET_ARCH
4657 trivialCode pk instr x (CmmLit (CmmInt y d))
4660 (src1, code) <- getSomeReg x
4661 tmp <- getNewRegNat II32
4663 src2 = ImmInt (fromInteger y)
4664 code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
4665 return (Any II32 code__2)
4667 trivialCode pk instr x y = do
4668 (src1, code1) <- getSomeReg x
4669 (src2, code2) <- getSomeReg y
4670 tmp1 <- getNewRegNat II32
4671 tmp2 <- getNewRegNat II32
4673 code__2 dst = code1 `appOL` code2 `snocOL`
4674 instr src1 (RIReg src2) dst
4675 return (Any II32 code__2)
4678 trivialFCode pk instr x y = do
4679 (src1, code1) <- getSomeReg x
4680 (src2, code2) <- getSomeReg y
4681 tmp1 <- getNewRegNat (cmmTypeSize $ cmmExprType x)
4682 tmp2 <- getNewRegNat (cmmTypeSize $ cmmExprType y)
4683 tmp <- getNewRegNat FF64
4685 promote x = FxTOy FF32 FF64 x tmp
4691 if pk1 `cmmEqType` pk2 then
4692 code1 `appOL` code2 `snocOL`
4693 instr (floatSize pk) src1 src2 dst
4694 else if typeWidth pk1 == W32 then
4695 code1 `snocOL` promote src1 `appOL` code2 `snocOL`
4696 instr FF64 tmp src2 dst
4698 code1 `appOL` code2 `snocOL` promote src2 `snocOL`
4699 instr FF64 src1 tmp dst
4700 return (Any (cmmTypeSize $ if pk1 `cmmEqType` pk2 then pk1 else cmmFloat W64)
4704 trivialUCode size instr x = do
4705 (src, code) <- getSomeReg x
4706 tmp <- getNewRegNat size
4708 code__2 dst = code `snocOL` instr (RIReg src) dst
4709 return (Any size code__2)
4712 trivialUFCode pk instr x = do
4713 (src, code) <- getSomeReg x
4714 tmp <- getNewRegNat pk
4716 code__2 dst = code `snocOL` instr src dst
4717 return (Any pk code__2)
4719 #endif /* sparc_TARGET_ARCH */
4721 #if powerpc_TARGET_ARCH
4724 Wolfgang's PowerPC version of The Rules:
4726 A slightly modified version of The Rules to take advantage of the fact
4727 that PowerPC instructions work on all registers and don't implicitly
4728 clobber any fixed registers.
4730 * The only expression for which getRegister returns Fixed is (CmmReg reg).
4732 * If getRegister returns Any, then the code it generates may modify only:
4733 (a) fresh temporaries
4734 (b) the destination register
4735 It may *not* modify global registers, unless the global
4736 register happens to be the destination register.
4737 It may not clobber any other registers. In fact, only ccalls clobber any
4739 Also, it may not modify the counter register (used by genCCall).
4741 Corollary: If a getRegister for a subexpression returns Fixed, you need
4742 not move it to a fresh temporary before evaluating the next subexpression.
4743 The Fixed register won't be modified.
4744 Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
4746 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
4747 the value of the destination register.
4750 trivialCode rep signed instr x (CmmLit (CmmInt y _))
4751 | Just imm <- makeImmediate rep signed y
4753 (src1, code1) <- getSomeReg x
4754 let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
4755 return (Any (intSize rep) code)
4757 trivialCode rep signed instr x y = do
4758 (src1, code1) <- getSomeReg x
4759 (src2, code2) <- getSomeReg y
4760 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
4761 return (Any (intSize rep) code)
4763 trivialCodeNoImm' :: Size -> (Reg -> Reg -> Reg -> Instr)
4764 -> CmmExpr -> CmmExpr -> NatM Register
4765 trivialCodeNoImm' size instr x y = do
4766 (src1, code1) <- getSomeReg x
4767 (src2, code2) <- getSomeReg y
4768 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
4769 return (Any size code)
4771 trivialCodeNoImm :: Size -> (Size -> Reg -> Reg -> Reg -> Instr)
4772 -> CmmExpr -> CmmExpr -> NatM Register
4773 trivialCodeNoImm size instr x y = trivialCodeNoImm' size (instr size) x y
4775 trivialUCode rep instr x = do
4776 (src, code) <- getSomeReg x
4777 let code' dst = code `snocOL` instr dst src
4778 return (Any rep code')
4780 -- There is no "remainder" instruction on the PPC, so we have to do
4782 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
4784 remainderCode :: Width -> (Reg -> Reg -> Reg -> Instr)
4785 -> CmmExpr -> CmmExpr -> NatM Register
4786 remainderCode rep div x y = do
4787 (src1, code1) <- getSomeReg x
4788 (src2, code2) <- getSomeReg y
4789 let code dst = code1 `appOL` code2 `appOL` toOL [
4791 MULLW dst dst (RIReg src2),
4794 return (Any (intSize rep) code)
4796 #endif /* powerpc_TARGET_ARCH */
4799 -- -----------------------------------------------------------------------------
4800 -- Coercing to/from integer/floating-point...
4802 -- When going to integer, we truncate (round towards 0).
4804 -- @coerce(Int2FP|FP2Int)@ are more complicated integer/float
4805 -- conversions. We have to store temporaries in memory to move
4806 -- between the integer and the floating point register sets.
4808 -- @coerceDbl2Flt@ and @coerceFlt2Dbl@ are done this way because we
4809 -- pretend, on sparc at least, that double and float regs are seperate
4810 -- kinds, so the value has to be computed into one kind before being
4811 -- explicitly "converted" to live in the other kind.
4813 coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
4814 coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
4816 #if sparc_TARGET_ARCH
4817 coerceDbl2Flt :: CmmExpr -> NatM Register
4818 coerceFlt2Dbl :: CmmExpr -> NatM Register
4821 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4823 #if alpha_TARGET_ARCH
4826 = getRegister x `thenNat` \ register ->
4827 getNewRegNat IntRep `thenNat` \ reg ->
4829 code = registerCode register reg
4830 src = registerName register reg
4832 code__2 dst = code . mkSeqInstrs [
4834 LD TF dst (spRel 0),
4837 return (Any FF64 code__2)
4841 = getRegister x `thenNat` \ register ->
4842 getNewRegNat FF64 `thenNat` \ tmp ->
4844 code = registerCode register tmp
4845 src = registerName register tmp
4847 code__2 dst = code . mkSeqInstrs [
4849 ST TF tmp (spRel 0),
4852 return (Any IntRep code__2)
4854 #endif /* alpha_TARGET_ARCH */
4856 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4858 #if i386_TARGET_ARCH
4860 coerceInt2FP from to x = do
4861 (x_reg, x_code) <- getSomeReg x
4863 opc = case to of W32 -> GITOF; W64 -> GITOD
4864 code dst = x_code `snocOL` opc x_reg dst
4865 -- ToDo: works for non-II32 reps?
4866 return (Any (floatSize to) code)
4870 coerceFP2Int from to x = do
4871 (x_reg, x_code) <- getSomeReg x
4873 opc = case from of W32 -> GFTOI; W64 -> GDTOI
4874 code dst = x_code `snocOL` opc x_reg dst
4875 -- ToDo: works for non-II32 reps?
4877 return (Any (intSize to) code)
4879 #endif /* i386_TARGET_ARCH */
4881 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4883 #if x86_64_TARGET_ARCH
4885 coerceFP2Int from to x = do
4886 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
4888 opc = case from of W32 -> CVTTSS2SIQ; W64 -> CVTTSD2SIQ
4889 code dst = x_code `snocOL` opc x_op dst
4891 return (Any (intSize to) code) -- works even if the destination rep is <II32
4893 coerceInt2FP from to x = do
4894 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
4896 opc = case to of W32 -> CVTSI2SS; W64 -> CVTSI2SD
4897 code dst = x_code `snocOL` opc x_op dst
4899 return (Any (floatSize to) code) -- works even if the destination rep is <II32
4901 coerceFP2FP :: Width -> CmmExpr -> NatM Register
4902 coerceFP2FP to x = do
4903 (x_reg, x_code) <- getSomeReg x
4905 opc = case to of W32 -> CVTSD2SS; W64 -> CVTSS2SD
4906 code dst = x_code `snocOL` opc x_reg dst
4908 return (Any (floatSize to) code)
4911 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4913 #if sparc_TARGET_ARCH
4915 coerceInt2FP width1 width2 x = do
4916 (src, code) <- getSomeReg x
4918 code__2 dst = code `appOL` toOL [
4919 ST (intSize width1) src (spRel (-2)),
4920 LD (intSize width1) (spRel (-2)) dst,
4921 FxTOy (intSize width1) (floatSize width2) dst dst]
4922 return (Any (floatSize $ width2) code__2)
4925 coerceFP2Int width1 width2 x = do
4926 let pk = intSize width1
4927 fprep = floatSize width2
4929 (src, code) <- getSomeReg x
4930 reg <- getNewRegNat fprep
4931 tmp <- getNewRegNat pk
4933 code__2 dst = ASSERT(fprep == FF64 || fprep == FF32)
4935 FxTOy fprep pk src tmp,
4936 ST pk tmp (spRel (-2)),
4937 LD pk (spRel (-2)) dst]
4938 return (Any pk code__2)
4941 coerceDbl2Flt x = do
4942 (src, code) <- getSomeReg x
4943 return (Any FF32 (\dst -> code `snocOL` FxTOy FF64 FF32 src dst))
4946 coerceFlt2Dbl x = do
4947 (src, code) <- getSomeReg x
4948 return (Any FF64 (\dst -> code `snocOL` FxTOy FF32 FF64 src dst))
4950 #endif /* sparc_TARGET_ARCH */
4952 #if powerpc_TARGET_ARCH
4953 coerceInt2FP fromRep toRep x = do
4954 (src, code) <- getSomeReg x
4955 lbl <- getNewLabelNat
4956 itmp <- getNewRegNat II32
4957 ftmp <- getNewRegNat FF64
4958 dflags <- getDynFlagsNat
4959 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
4960 Amode addr addr_code <- getAmode dynRef
4962 code' dst = code `appOL` maybe_exts `appOL` toOL [
4965 CmmStaticLit (CmmInt 0x43300000 W32),
4966 CmmStaticLit (CmmInt 0x80000000 W32)],
4967 XORIS itmp src (ImmInt 0x8000),
4968 ST II32 itmp (spRel 3),
4969 LIS itmp (ImmInt 0x4330),
4970 ST II32 itmp (spRel 2),
4971 LD FF64 ftmp (spRel 2)
4972 ] `appOL` addr_code `appOL` toOL [
4974 FSUB FF64 dst ftmp dst
4975 ] `appOL` maybe_frsp dst
4977 maybe_exts = case fromRep of
4978 W8 -> unitOL $ EXTS II8 src src
4979 W16 -> unitOL $ EXTS II16 src src
4981 maybe_frsp dst = case toRep of
4982 W32 -> unitOL $ FRSP dst dst
4984 return (Any (floatSize toRep) code')
4986 coerceFP2Int fromRep toRep x = do
4987 -- the reps don't really matter: F*->FF64 and II32->I* are no-ops
4988 (src, code) <- getSomeReg x
4989 tmp <- getNewRegNat FF64
4991 code' dst = code `appOL` toOL [
4992 -- convert to int in FP reg
4994 -- store value (64bit) from FP to stack
4995 ST FF64 tmp (spRel 2),
4996 -- read low word of value (high word is undefined)
4997 LD II32 dst (spRel 3)]
4998 return (Any (intSize toRep) code')
4999 #endif /* powerpc_TARGET_ARCH */
5002 -- -----------------------------------------------------------------------------
5003 -- eXTRA_STK_ARGS_HERE
5005 -- We (allegedly) put the first six C-call arguments in registers;
5006 -- where do we start putting the rest of them?
5008 -- Moved from MachInstrs (SDM):
5010 #if alpha_TARGET_ARCH || sparc_TARGET_ARCH
5011 eXTRA_STK_ARGS_HERE :: Int
5013 = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???))