X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=compiler%2FnativeGen%2FMachCodeGen.hs;h=1d1cfa159699e757bdf289c5736532bc37042f36;hp=3ff958df7ebc3419e78fac4c28c11df6ea9d4750;hb=d31dfb32ea936c22628b508c28a36c12e631430a;hpb=7d817d447d3ee0df22691afad29c94ebbb334120 diff --git a/compiler/nativeGen/MachCodeGen.hs b/compiler/nativeGen/MachCodeGen.hs index 3ff958d..1d1cfa1 100644 --- a/compiler/nativeGen/MachCodeGen.hs +++ b/compiler/nativeGen/MachCodeGen.hs @@ -29,6 +29,7 @@ import PprCmm ( pprExpr ) import Cmm import MachOp import CLabel +import ClosureInfo ( C_SRT(..) ) -- The rest: import StaticFlags ( opt_PIC ) @@ -61,7 +62,7 @@ import Data.Int type InstrBlock = OrdList Instr -cmmTopCodeGen :: CmmTop -> NatM [NatCmmTop] +cmmTopCodeGen :: RawCmmTop -> NatM [NatCmmTop] cmmTopCodeGen (CmmProc info lab params blocks) = do (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks picBaseMb <- getPicBaseMaybeNat @@ -119,8 +120,8 @@ stmtToInstrs stmt = case stmt of | otherwise -> assignMem_IntCode kind addr src where kind = cmmExprRep src - CmmCall target result_regs args vols - -> genCCall target result_regs args vols + CmmCall target result_regs args _ + -> genCCall target result_regs args CmmBranch id -> genBranch id CmmCondBranch arg id -> genCondJump id arg @@ -188,7 +189,7 @@ assignMem_I64Code addrTree valueTree = do return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi) -assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do +assignReg_I64Code (CmmLocal (LocalReg u_dst pk _)) valueTree = do ChildCode64 vcode r_src_lo <- iselExpr64 valueTree let r_dst_lo = mkVReg u_dst I32 @@ -230,7 +231,7 @@ iselExpr64 (CmmLoad addrTree I64) = do rlo ) -iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64))) +iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64 _))) = return (ChildCode64 nilOL (mkVReg vu I32)) -- we handle addition, but rather badly @@ -265,6 +266,17 @@ iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do -- in return (ChildCode64 code rlo) +iselExpr64 (CmmMachOp (MO_U_Conv _ I64) [expr]) = do + fn <- getAnyReg expr + r_dst_lo <- getNewRegNat I32 + let r_dst_hi = getHiVRegFromLo r_dst_lo + code = fn r_dst_lo + return ( + ChildCode64 (code `snocOL` + MOV I32 (OpImm (ImmInt 0)) (OpReg r_dst_hi)) + r_dst_lo + ) + iselExpr64 expr = pprPanic "iselExpr64(i386)" (ppr expr) @@ -388,7 +400,7 @@ iselExpr64 (CmmLoad addrTree I64) = do return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi) rlo -iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64))) +iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64 _))) = return (ChildCode64 nilOL (mkVReg vu I32)) iselExpr64 (CmmLit (CmmInt i _)) = do @@ -465,7 +477,7 @@ getSomeReg expr = do getRegisterReg :: CmmReg -> Reg -getRegisterReg (CmmLocal (LocalReg u pk)) +getRegisterReg (CmmLocal (LocalReg u pk _)) = mkVReg u pk getRegisterReg (CmmGlobal mid) @@ -1148,16 +1160,30 @@ getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps -- in return (Any rep code) - {- Case2: shift length is complex (non-immediate) -} + {- Case2: shift length is complex (non-immediate) + * y must go in %ecx. + * we cannot do y first *and* put its result in %ecx, because + %ecx might be clobbered by x. + * if we do y second, then x cannot be + in a clobbered reg. Also, we cannot clobber x's reg + with the instruction itself. + * so we can either: + - do y first, put its result in a fresh tmp, then copy it to %ecx later + - do y second and put its result into %ecx. x gets placed in a fresh + tmp. This is likely to be better, becuase the reg alloc can + eliminate this reg->reg move here (it won't eliminate the other one, + because the move is into the fixed %ecx). + -} shift_code rep instr x y{-amount-} = do - (x_reg, x_code) <- getNonClobberedReg x + x_code <- getAnyReg x + tmp <- getNewRegNat rep y_code <- getAnyReg y let - code = x_code `appOL` + code = x_code tmp `appOL` y_code ecx `snocOL` - instr (OpReg ecx) (OpReg x_reg) + instr (OpReg ecx) (OpReg tmp) -- in - return (Fixed rep x_reg code) + return (Fixed rep tmp code) -------------------- add_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register @@ -2913,9 +2939,8 @@ genCondJump id bool = do genCCall :: CmmCallTarget -- function to call - -> [(CmmReg,MachHint)] -- where to put the result - -> [(CmmExpr,MachHint)] -- arguments (of mixed type) - -> Maybe [GlobalReg] -- volatile regs to save + -> CmmHintFormals -- where to put the result + -> CmmActuals -- arguments (of mixed type) -> NatM InstrBlock -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -2994,12 +3019,12 @@ genCCall fn cconv result_regs args #if i386_TARGET_ARCH -genCCall (CmmPrim MO_WriteBarrier) _ _ _ = return nilOL +genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL -- write barrier compiles to no code on x86/x86-64; -- we keep it this long in order to prevent earlier optimisations. -- we only cope with a single result for foreign calls -genCCall (CmmPrim op) [(r,_)] args vols = do +genCCall (CmmPrim op) [(r,_)] args = do case op of MO_F32_Sqrt -> actuallyInlineFloatOp F32 (GSQRT F32) args MO_F64_Sqrt -> actuallyInlineFloatOp F64 (GSQRT F64) args @@ -3013,14 +3038,14 @@ genCCall (CmmPrim op) [(r,_)] args vols = do MO_F32_Tan -> actuallyInlineFloatOp F32 (GTAN F32) args MO_F64_Tan -> actuallyInlineFloatOp F64 (GTAN F64) args - other_op -> outOfLineFloatOp op r args vols + other_op -> outOfLineFloatOp op r args where actuallyInlineFloatOp rep instr [(x,_)] = do res <- trivialUFCode rep instr x any <- anyReg res - return (any (getRegisterReg r)) + return (any (getRegisterReg (CmmLocal r))) -genCCall target dest_regs args vols = do +genCCall target dest_regs args = do let sizes = map (arg_size . cmmExprRep . fst) (reverse args) #if !darwin_TARGET_OS @@ -3083,8 +3108,8 @@ genCCall target dest_regs args vols = do rep -> unitOL (MOV rep (OpReg eax) (OpReg r_dest)) where r_dest_hi = getHiVRegFromLo r_dest - rep = cmmRegRep dest - r_dest = getRegisterReg dest + rep = localRegRep dest + r_dest = getRegisterReg (CmmLocal dest) assign_code many = panic "genCCall.assign_code many" return (push_code `appOL` @@ -3148,23 +3173,23 @@ genCCall target dest_regs args vols = do #if i386_TARGET_ARCH || x86_64_TARGET_ARCH -outOfLineFloatOp :: CallishMachOp -> CmmReg -> [(CmmExpr,MachHint)] - -> Maybe [GlobalReg] -> NatM InstrBlock -outOfLineFloatOp mop res args vols +outOfLineFloatOp :: CallishMachOp -> CmmFormal -> CmmActuals + -> NatM InstrBlock +outOfLineFloatOp mop res args = do targetExpr <- cmmMakeDynamicReference addImportNat CallReference lbl let target = CmmForeignCall targetExpr CCallConv - if cmmRegRep res == F64 + if localRegRep res == F64 then - stmtToInstrs (CmmCall target [(res,FloatHint)] args vols) + stmtToInstrs (CmmCall target [(res,FloatHint)] args CmmUnsafe) else do uq <- getUniqueNat let - tmp = CmmLocal (LocalReg uq F64) + tmp = LocalReg uq F64 KindNonPtr -- in - code1 <- stmtToInstrs (CmmCall target [(tmp,FloatHint)] args vols) - code2 <- stmtToInstrs (CmmAssign res (CmmReg tmp)) + code1 <- stmtToInstrs (CmmCall target [(tmp,FloatHint)] args CmmUnsafe) + code2 <- stmtToInstrs (CmmAssign (CmmLocal res) (CmmReg (CmmLocal tmp))) return (code1 `appOL` code2) where lbl = mkForeignLabel fn Nothing False @@ -3208,14 +3233,14 @@ outOfLineFloatOp mop res args vols #if x86_64_TARGET_ARCH -genCCall (CmmPrim MO_WriteBarrier) _ _ _ = return nilOL +genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL -- write barrier compiles to no code on x86/x86-64; -- we keep it this long in order to prevent earlier optimisations. -genCCall (CmmPrim op) [(r,_)] args vols = - outOfLineFloatOp op r args vols +genCCall (CmmPrim op) [(r,_)] args = + outOfLineFloatOp op r args -genCCall target dest_regs args vols = do +genCCall target dest_regs args = do -- load up the register arguments (stack_args, aregs, fregs, load_args_code) @@ -3347,10 +3372,10 @@ genCCall target dest_regs args vols = do (arg_reg, arg_code) <- getSomeReg arg delta <- getDeltaNat setDeltaNat (delta-arg_size) - let code' = code `appOL` toOL [ - MOV arg_rep (OpReg arg_reg) (OpAddr (spRel 0)), + let code' = code `appOL` arg_code `appOL` toOL [ SUB wordRep (OpImm (ImmInt arg_size)) (OpReg rsp) , - DELTA (delta-arg_size)] + DELTA (delta-arg_size), + MOV arg_rep (OpReg arg_reg) (OpAddr (spRel 0))] push_args rest code' | otherwise = do @@ -3401,7 +3426,7 @@ genCCall target dest_regs args vols = do stack only immediately prior to the call proper. Sigh. -} -genCCall target dest_regs argsAndHints vols = do +genCCall target dest_regs argsAndHints = do let args = map fst argsAndHints argcode_and_vregs <- mapM arg_to_int_vregs args @@ -3597,7 +3622,7 @@ outOfLineFloatOp mop = genCCall (CmmPrim MO_WriteBarrier) _ _ _ = return $ unitOL LWSYNC -genCCall target dest_regs argsAndHints vols +genCCall target dest_regs argsAndHints = ASSERT (not $ any (`elem` [I8,I16]) argReps) -- we rely on argument promotion in the codeGen do