import Cmm
import MachOp
import CLabel
+import ClosureInfo ( C_SRT(..) )
-- The rest:
import StaticFlags ( opt_PIC )
import Constants ( wORD_SIZE )
#ifdef DEBUG
-import Outputable ( assertPanic )
import Debug.Trace ( trace )
#endif
import Data.Maybe ( fromJust )
import Data.Bits
import Data.Word
+import Data.Int
-- -----------------------------------------------------------------------------
-- Top-level of the instruction selector
type InstrBlock = OrdList Instr
-cmmTopCodeGen :: CmmTop -> NatM [NatCmmTop]
+cmmTopCodeGen :: RawCmmTop -> NatM [NatCmmTop]
cmmTopCodeGen (CmmProc info lab params blocks) = do
(nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
picBaseMb <- getPicBaseMaybeNat
| otherwise -> assignMem_IntCode kind addr src
where kind = cmmExprRep src
- CmmCall target result_regs args vols
- -> genCCall target result_regs args vols
+ CmmCall target result_regs args _
+ -> genCCall target result_regs args
CmmBranch id -> genBranch id
CmmCondBranch arg id -> genCondJump id arg
return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
-assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
+assignReg_I64Code (CmmLocal (LocalReg u_dst pk _)) valueTree = do
ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
let
r_dst_lo = mkVReg u_dst I32
rlo
)
-iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64)))
+iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64 _)))
= return (ChildCode64 nilOL (mkVReg vu I32))
-- we handle addition, but rather badly
-- in
return (ChildCode64 code rlo)
+iselExpr64 (CmmMachOp (MO_U_Conv _ I64) [expr]) = do
+ fn <- getAnyReg expr
+ r_dst_lo <- getNewRegNat I32
+ let r_dst_hi = getHiVRegFromLo r_dst_lo
+ code = fn r_dst_lo
+ return (
+ ChildCode64 (code `snocOL`
+ MOV I32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
+ r_dst_lo
+ )
+
iselExpr64 expr
= pprPanic "iselExpr64(i386)" (ppr expr)
-- in
return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
-assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
+assignReg_I64Code (CmmLocal (LocalReg u_dst pk _)) valueTree = do
ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
let
r_dst_lo = mkVReg u_dst I32
return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
rlo
-iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64)))
+iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64 _)))
= return (ChildCode64 nilOL (mkVReg vu I32))
iselExpr64 (CmmLit (CmmInt i _)) = do
-- in
return (ChildCode64 code rlo)
+iselExpr64 (CmmMachOp (MO_U_Conv I32 I64) [expr]) = do
+ (expr_reg,expr_code) <- getSomeReg expr
+ (rlo, rhi) <- getNewRegPairNat I32
+ let mov_hi = LI rhi (ImmInt 0)
+ mov_lo = MR rlo expr_reg
+ return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
+ rlo
iselExpr64 expr
= pprPanic "iselExpr64(powerpc)" (ppr expr)
getRegisterReg :: CmmReg -> Reg
-getRegisterReg (CmmLocal (LocalReg u pk))
+getRegisterReg (CmmLocal (LocalReg u pk _))
= mkVReg u pk
getRegisterReg (CmmGlobal mid)
-- in
return (Any rep code)
- {- Case2: shift length is complex (non-immediate) -}
+ {- Case2: shift length is complex (non-immediate)
+ * y must go in %ecx.
+ * we cannot do y first *and* put its result in %ecx, because
+ %ecx might be clobbered by x.
+ * if we do y second, then x cannot be
+ in a clobbered reg. Also, we cannot clobber x's reg
+ with the instruction itself.
+ * so we can either:
+ - do y first, put its result in a fresh tmp, then copy it to %ecx later
+ - do y second and put its result into %ecx. x gets placed in a fresh
+ tmp. This is likely to be better, becuase the reg alloc can
+ eliminate this reg->reg move here (it won't eliminate the other one,
+ because the move is into the fixed %ecx).
+ -}
shift_code rep instr x y{-amount-} = do
- (x_reg, x_code) <- getNonClobberedReg x
+ x_code <- getAnyReg x
+ tmp <- getNewRegNat rep
y_code <- getAnyReg y
let
- code = x_code `appOL`
+ code = x_code tmp `appOL`
y_code ecx `snocOL`
- instr (OpReg ecx) (OpReg x_reg)
+ instr (OpReg ecx) (OpReg tmp)
-- in
- return (Fixed rep x_reg code)
+ return (Fixed rep tmp code)
--------------------
add_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
[y, CmmLit (CmmInt shift _)]])
| shift == 0 || shift == 1 || shift == 2 || shift == 3
- = do (x_reg, x_code) <- getNonClobberedReg x
- -- x must be in a temp, because it has to stay live over y_code
- -- we could compre x_reg and y_reg and do something better here...
- (y_reg, y_code) <- getSomeReg y
- let
- code = x_code `appOL` y_code
- base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
- return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt 0))
- code)
+ = x86_complex_amode x y shift 0
+
+getAmode (CmmMachOp (MO_Add rep)
+ [x, CmmMachOp (MO_Add _)
+ [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)],
+ CmmLit (CmmInt offset _)]])
+ | shift == 0 || shift == 1 || shift == 2 || shift == 3
+ && not (is64BitInteger offset)
+ = x86_complex_amode x y shift offset
+
+getAmode (CmmMachOp (MO_Add rep) [x,y])
+ = x86_complex_amode x y 0 0
getAmode (CmmLit lit) | not (is64BitLit lit)
= return (Amode (ImmAddr (litToImm lit) 0) nilOL)
(reg,code) <- getSomeReg expr
return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
+
+x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
+x86_complex_amode base index shift offset
+ = do (x_reg, x_code) <- getNonClobberedReg base
+ -- x must be in a temp, because it has to stay live over y_code
+ -- we could compre x_reg and y_reg and do something better here...
+ (y_reg, y_code) <- getSomeReg index
+ let
+ code = x_code `appOL` y_code
+ base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
+ return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset)))
+ code)
+
#endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
#endif
is64BitInteger :: Integer -> Bool
-is64BitInteger i = i > 0x7fffffff || i < -0x80000000
+is64BitInteger i = i64 > 0x7fffffff || i64 < -0x80000000
+ where i64 = fromIntegral i :: Int64
+ -- a CmmInt is intended to be truncated to the appropriate
+ -- number of bits, so here we truncate it to Int64. This is
+ -- important because e.g. -1 as a CmmInt might be either
+ -- -1 or 18446744073709551615.
-- -----------------------------------------------------------------------------
-- The 'CondCode' type: Condition codes passed up the tree.
#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-- integer assignment to memory
+
+-- specific case of adding/subtracting an integer to a particular address.
+-- ToDo: catch other cases where we can use an operation directly on a memory
+-- address.
+assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _,
+ CmmLit (CmmInt i _)])
+ | addr == addr2, pk /= I64 || not (is64BitInteger i),
+ Just instr <- check op
+ = do Amode amode code_addr <- getAmode addr
+ let code = code_addr `snocOL`
+ instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode)
+ return code
+ where
+ check (MO_Add _) = Just ADD
+ check (MO_Sub _) = Just SUB
+ check _ = Nothing
+ -- ToDo: more?
+
+-- general case
assignMem_IntCode pk addr src = do
Amode addr code_addr <- getAmode addr
(code_src, op_src) <- get_op_RI src
genCCall
:: CmmCallTarget -- function to call
- -> [(CmmReg,MachHint)] -- where to put the result
- -> [(CmmExpr,MachHint)] -- arguments (of mixed type)
- -> Maybe [GlobalReg] -- volatile regs to save
+ -> CmmHintFormals -- where to put the result
+ -> CmmActuals -- arguments (of mixed type)
-> NatM InstrBlock
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
#if i386_TARGET_ARCH
-genCCall (CmmPrim MO_WriteBarrier) _ _ _ = return nilOL
+genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
-- write barrier compiles to no code on x86/x86-64;
-- we keep it this long in order to prevent earlier optimisations.
-- we only cope with a single result for foreign calls
-genCCall (CmmPrim op) [(r,_)] args vols = do
+genCCall (CmmPrim op) [(r,_)] args = do
case op of
MO_F32_Sqrt -> actuallyInlineFloatOp F32 (GSQRT F32) args
MO_F64_Sqrt -> actuallyInlineFloatOp F64 (GSQRT F64) args
MO_F32_Tan -> actuallyInlineFloatOp F32 (GTAN F32) args
MO_F64_Tan -> actuallyInlineFloatOp F64 (GTAN F64) args
- other_op -> outOfLineFloatOp op r args vols
+ other_op -> outOfLineFloatOp op r args
where
actuallyInlineFloatOp rep instr [(x,_)]
= do res <- trivialUFCode rep instr x
any <- anyReg res
- return (any (getRegisterReg r))
+ return (any (getRegisterReg (CmmLocal r)))
-genCCall target dest_regs args vols = do
+genCCall target dest_regs args = do
let
sizes = map (arg_size . cmmExprRep . fst) (reverse args)
#if !darwin_TARGET_OS
rep -> unitOL (MOV rep (OpReg eax) (OpReg r_dest))
where
r_dest_hi = getHiVRegFromLo r_dest
- rep = cmmRegRep dest
- r_dest = getRegisterReg dest
+ rep = localRegRep dest
+ r_dest = getRegisterReg (CmmLocal dest)
assign_code many = panic "genCCall.assign_code many"
return (push_code `appOL`
#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-outOfLineFloatOp :: CallishMachOp -> CmmReg -> [(CmmExpr,MachHint)]
- -> Maybe [GlobalReg] -> NatM InstrBlock
-outOfLineFloatOp mop res args vols
+outOfLineFloatOp :: CallishMachOp -> CmmFormal -> CmmActuals
+ -> NatM InstrBlock
+outOfLineFloatOp mop res args
= do
targetExpr <- cmmMakeDynamicReference addImportNat CallReference lbl
let target = CmmForeignCall targetExpr CCallConv
- if cmmRegRep res == F64
+ if localRegRep res == F64
then
- stmtToInstrs (CmmCall target [(res,FloatHint)] args vols)
+ stmtToInstrs (CmmCall target [(res,FloatHint)] args CmmUnsafe)
else do
uq <- getUniqueNat
let
- tmp = CmmLocal (LocalReg uq F64)
+ tmp = LocalReg uq F64 KindNonPtr
-- in
- code1 <- stmtToInstrs (CmmCall target [(tmp,FloatHint)] args vols)
- code2 <- stmtToInstrs (CmmAssign res (CmmReg tmp))
+ code1 <- stmtToInstrs (CmmCall target [(tmp,FloatHint)] args CmmUnsafe)
+ code2 <- stmtToInstrs (CmmAssign (CmmLocal res) (CmmReg (CmmLocal tmp)))
return (code1 `appOL` code2)
where
lbl = mkForeignLabel fn Nothing False
#if x86_64_TARGET_ARCH
-genCCall (CmmPrim MO_WriteBarrier) _ _ _ = return nilOL
+genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
-- write barrier compiles to no code on x86/x86-64;
-- we keep it this long in order to prevent earlier optimisations.
-genCCall (CmmPrim op) [(r,_)] args vols =
- outOfLineFloatOp op r args vols
+genCCall (CmmPrim op) [(r,_)] args =
+ outOfLineFloatOp op r args
-genCCall target dest_regs args vols = do
+genCCall target dest_regs args = do
-- load up the register arguments
(stack_args, aregs, fregs, load_args_code)
F64 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
rep -> unitOL (MOV rep (OpReg rax) (OpReg r_dest))
where
- rep = cmmRegRep dest
- r_dest = getRegisterReg dest
+ rep = localRegRep dest
+ r_dest = getRegisterReg (CmmLocal dest)
assign_code many = panic "genCCall.assign_code many"
return (load_args_code `appOL`
(arg_reg, arg_code) <- getSomeReg arg
delta <- getDeltaNat
setDeltaNat (delta-arg_size)
- let code' = code `appOL` toOL [
- MOV arg_rep (OpReg arg_reg) (OpAddr (spRel 0)),
+ let code' = code `appOL` arg_code `appOL` toOL [
SUB wordRep (OpImm (ImmInt arg_size)) (OpReg rsp) ,
- DELTA (delta-arg_size)]
+ DELTA (delta-arg_size),
+ MOV arg_rep (OpReg arg_reg) (OpAddr (spRel 0))]
push_args rest code'
| otherwise = do
stack only immediately prior to the call proper. Sigh.
-}
-genCCall target dest_regs argsAndHints vols = do
+genCCall target dest_regs argsAndHints = do
let
args = map fst argsAndHints
argcode_and_vregs <- mapM arg_to_int_vregs args
-}
-genCCall (CmmPrim MO_WriteBarrier) _ _ _
+genCCall (CmmPrim MO_WriteBarrier) _ _
= return $ unitOL LWSYNC
-genCCall target dest_regs argsAndHints vols
+genCCall target dest_regs argsAndHints
= ASSERT (not $ any (`elem` [I8,I16]) argReps)
-- we rely on argument promotion in the codeGen
do
| rep == I64 -> toOL [MR (getHiVRegFromLo r_dest) r3,
MR r_dest r4]
| otherwise -> unitOL (MR r_dest r3)
- where rep = cmmRegRep dest
- r_dest = getRegisterReg dest
+ where rep = cmmRegRep (CmmLocal dest)
+ r_dest = getRegisterReg (CmmLocal dest)
outOfLineFloatOp mop =
do
-- -----------------------------------------------------------------------------
-- Coercing to/from integer/floating-point...
+-- When going to integer, we truncate (round towards 0).
+
-- @coerce(Int2FP|FP2Int)@ are more complicated integer/float
-- conversions. We have to store temporaries in memory to move
-- between the integer and the floating point register sets.
coerceFP2Int from to x = do
(x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
let
- opc = case from of F32 -> CVTSS2SI; F64 -> CVTSD2SI
+ opc = case from of F32 -> CVTTSS2SIQ; F64 -> CVTTSD2SIQ
code dst = x_code `snocOL` opc x_op dst
-- in
return (Any to code) -- works even if the destination rep is <I32