From bd3a364da7956c269d31645995d0d775c52f6a84 Mon Sep 17 00:00:00 2001 From: "Michael D. Adams" Date: Fri, 25 May 2007 19:38:04 +0000 Subject: [PATCH] Moved global register saving from the backend to codeGen This frees the Cmm data type from keeping a list of live global registers in CmmCall which helps prepare for the CPS conversion phase. CPS conversion does its own liveness analysis and takes input that should not directly refer to parameter registers (e.g. R1, F5, D3, L2). Since these are the only things which could occur in the live global register list, CPS conversion makes that field of the CmmCall constructor obsolite. Once the CPS conversion pass is fully implemented, global register saving will move from codeGen into the CPS pass. Until then, this patch is worth scrutinizing and testing to ensure it doesn't cause any performance or correctness problems as the code passed to the backends by the CPS converting will look very similar to the code that this patch makes codeGen pass to the backend. --- compiler/cmm/Cmm.hs | 4 ---- compiler/cmm/CmmLint.hs | 2 +- compiler/cmm/CmmOpt.hs | 6 +++--- compiler/cmm/PprC.hs | 30 +++++++++--------------------- compiler/cmm/PprCmm.hs | 6 +++--- compiler/codeGen/CgForeignCall.hs | 19 ++++++++++++++----- compiler/codeGen/CgUtils.hs | 12 +++++++++++- compiler/nativeGen/AsmCodeGen.lhs | 13 ++++--------- compiler/nativeGen/MachCodeGen.hs | 33 ++++++++++++++++----------------- compiler/nativeGen/MachRegs.lhs | 4 ++++ 10 files changed, 65 insertions(+), 64 deletions(-) diff --git a/compiler/cmm/Cmm.hs b/compiler/cmm/Cmm.hs index 7438750..986f486 100644 --- a/compiler/cmm/Cmm.hs +++ b/compiler/cmm/Cmm.hs @@ -116,10 +116,6 @@ data CmmStmt CmmCallTarget CmmFormals -- zero or more results CmmActuals -- zero or more arguments - (Maybe [GlobalReg]) -- Global regs that may need to be saved - -- if they will be clobbered by the call. - -- Nothing <=> save *all* globals that - -- might be clobbered. | CmmBranch BlockId -- branch to another BB in this fn diff --git a/compiler/cmm/CmmLint.hs b/compiler/cmm/CmmLint.hs index 875876f..0812347 100644 --- a/compiler/cmm/CmmLint.hs +++ b/compiler/cmm/CmmLint.hs @@ -117,7 +117,7 @@ lintCmmStmt (CmmStore l r) = do lintCmmExpr l lintCmmExpr r return () -lintCmmStmt (CmmCall _target _res args _vols) = mapM_ (lintCmmExpr.fst) args +lintCmmStmt (CmmCall _target _res args) = mapM_ (lintCmmExpr.fst) args lintCmmStmt (CmmCondBranch e _id) = lintCmmExpr e >> checkCond e >> return () lintCmmStmt (CmmSwitch e _branches) = do erep <- lintCmmExpr e diff --git a/compiler/cmm/CmmOpt.hs b/compiler/cmm/CmmOpt.hs index 9a51215..aa5a788 100644 --- a/compiler/cmm/CmmOpt.hs +++ b/compiler/cmm/CmmOpt.hs @@ -140,7 +140,7 @@ lookForInline u expr (stmt:stmts) getStmtUses :: CmmStmt -> UniqFM Int getStmtUses (CmmAssign _ e) = getExprUses e getStmtUses (CmmStore e1 e2) = plusUFM_C (+) (getExprUses e1) (getExprUses e2) -getStmtUses (CmmCall target _ es _) +getStmtUses (CmmCall target _ es) = plusUFM_C (+) (uses target) (getExprsUses (map fst es)) where uses (CmmForeignCall e _) = getExprUses e uses _ = emptyUFM @@ -161,8 +161,8 @@ getExprsUses es = foldr (plusUFM_C (+)) emptyUFM (map getExprUses es) inlineStmt :: Unique -> CmmExpr -> CmmStmt -> CmmStmt inlineStmt u a (CmmAssign r e) = CmmAssign r (inlineExpr u a e) inlineStmt u a (CmmStore e1 e2) = CmmStore (inlineExpr u a e1) (inlineExpr u a e2) -inlineStmt u a (CmmCall target regs es vols) - = CmmCall (infn target) regs es' vols +inlineStmt u a (CmmCall target regs es) + = CmmCall (infn target) regs es' where infn (CmmForeignCall fn cconv) = CmmForeignCall fn cconv infn (CmmPrim p) = CmmPrim p es' = [ (inlineExpr u a e, hint) | (e,hint) <- es ] diff --git a/compiler/cmm/PprC.hs b/compiler/cmm/PprC.hs index b8ba5b7..d9bdca5 100644 --- a/compiler/cmm/PprC.hs +++ b/compiler/cmm/PprC.hs @@ -198,11 +198,11 @@ pprStmt stmt = case stmt of where rep = cmmExprRep src - CmmCall (CmmForeignCall fn cconv) results args volatile -> + CmmCall (CmmForeignCall fn cconv) results args -> -- Controversial: leave this out for now. -- pprUndef fn $$ - pprCall ppr_fn cconv results args volatile + pprCall ppr_fn cconv results args where ppr_fn = case fn of CmmLit (CmmLabel lbl) -> pprCLabel lbl @@ -219,8 +219,8 @@ pprStmt stmt = case stmt of ptext SLIT("#undef") <+> pprCLabel lbl pprUndef _ = empty - CmmCall (CmmPrim op) results args volatile -> - pprCall ppr_fn CCallConv results args volatile + CmmCall (CmmPrim op) results args -> + pprCall ppr_fn CCallConv results args where ppr_fn = pprCallishMachOp_for_C op @@ -719,15 +719,14 @@ pprLocalReg (LocalReg uniq _rep) = char '_' <> ppr uniq -- Foreign Calls pprCall :: SDoc -> CCallConv -> [(CmmReg,MachHint)] -> [(CmmExpr,MachHint)] - -> Maybe [GlobalReg] -> SDoc + -> SDoc -pprCall ppr_fn cconv results args vols +pprCall ppr_fn cconv results args | not (is_cish cconv) = panic "pprCall: unknown calling convention" | otherwise - = save vols $$ - ptext SLIT("CALLER_SAVE_SYSTEM") $$ + = #if x86_64_TARGET_ARCH -- HACK around gcc optimisations. -- x86_64 needs a __DISCARD__() here, to create a barrier between @@ -739,9 +738,7 @@ pprCall ppr_fn cconv results args vols then ptext SLIT("__DISCARD__();") else empty) $$ #endif - ppr_assign results (ppr_fn <> parens (commafy (map pprArg args))) <> semi $$ - ptext SLIT("CALLER_RESTORE_SYSTEM") $$ - restore vols + ppr_assign results (ppr_fn <> parens (commafy (map pprArg args))) <> semi where ppr_assign [] rhs = rhs ppr_assign [(reg@(CmmGlobal BaseReg), hint)] rhs @@ -769,15 +766,6 @@ pprCall ppr_fn cconv results args vols pprUnHint SignedHint rep = parens (machRepCType rep) pprUnHint _ _ = empty - save = save_restore SLIT("CALLER_SAVE") - restore = save_restore SLIT("CALLER_RESTORE") - - -- Nothing says "I don't know what's live; save everything" - -- CALLER_SAVE_USER is defined in ghc/includes/Regs.h - save_restore txt Nothing = ptext txt <> ptext SLIT("_USER") - save_restore txt (Just these) = vcat (map saveRestoreGlobal these) - where saveRestoreGlobal r = ptext txt <> char '_' <> pprGlobalRegName r - pprGlobalRegName :: GlobalReg -> SDoc pprGlobalRegName gr = case gr of VanillaReg n -> char 'R' <> int n -- without the .w suffix @@ -859,7 +847,7 @@ te_Lit _ = return () te_Stmt :: CmmStmt -> TE () te_Stmt (CmmAssign r e) = te_Reg r >> te_Expr e te_Stmt (CmmStore l r) = te_Expr l >> te_Expr r -te_Stmt (CmmCall _ rs es _) = mapM_ (te_Reg.fst) rs >> +te_Stmt (CmmCall _ rs es) = mapM_ (te_Reg.fst) rs >> mapM_ (te_Expr.fst) es te_Stmt (CmmCondBranch e _) = te_Expr e te_Stmt (CmmSwitch e _) = te_Expr e diff --git a/compiler/cmm/PprCmm.hs b/compiler/cmm/PprCmm.hs index 0c79f6f..4ade7a4 100644 --- a/compiler/cmm/PprCmm.hs +++ b/compiler/cmm/PprCmm.hs @@ -150,7 +150,7 @@ pprStmt stmt = case stmt of -- call "ccall" foo(x, y)[r1, r2]; -- ToDo ppr volatile - CmmCall (CmmForeignCall fn cconv) results args _volatile -> + CmmCall (CmmForeignCall fn cconv) results args -> hcat [ ptext SLIT("call"), space, doubleQuotes(ppr cconv), space, target fn, parens ( commafy $ map ppr args ), @@ -161,9 +161,9 @@ pprStmt stmt = case stmt of target (CmmLit lit) = pprLit lit target fn' = parens (ppr fn') - CmmCall (CmmPrim op) results args volatile -> + CmmCall (CmmPrim op) results args -> pprStmt (CmmCall (CmmForeignCall (CmmLit lbl) CCallConv) - results args volatile) + results args) where lbl = CmmLabel (mkForeignLabel (mkFastString (show op)) Nothing False) diff --git a/compiler/codeGen/CgForeignCall.hs b/compiler/codeGen/CgForeignCall.hs index da52bd0..c4af511 100644 --- a/compiler/codeGen/CgForeignCall.hs +++ b/compiler/codeGen/CgForeignCall.hs @@ -36,6 +36,12 @@ import Constants import StaticFlags import Outputable +import MachRegs (callerSaveVolatileRegs) + -- HACK: this is part of the NCG so we shouldn't use this, but we need + -- it for now to eliminate the need for saved regs to be in CmmCall. + -- The long term solution is to factor callerSaveVolatileRegs + -- from nativeGen into codeGen + import Control.Monad -- ----------------------------------------------------------------------------- @@ -105,30 +111,33 @@ emitForeignCall' emitForeignCall' safety results target args vols | not (playSafe safety) = do temp_args <- load_args_into_temps args - stmtC (CmmCall target results temp_args vols) + let (caller_save, caller_load) = callerSaveVolatileRegs vols + stmtsC caller_save + stmtC (CmmCall target results temp_args) + stmtsC caller_load | otherwise = do id <- newTemp wordRep temp_args <- load_args_into_temps args temp_target <- load_target_into_temp target + let (caller_save, caller_load) = callerSaveVolatileRegs vols emitSaveThreadState + stmtsC caller_save stmtC (CmmCall (CmmForeignCall suspendThread CCallConv) [(id,PtrHint)] [ (CmmReg (CmmGlobal BaseReg), PtrHint) ] - vols ) - stmtC (CmmCall temp_target results temp_args vols) + stmtC (CmmCall temp_target results temp_args) stmtC (CmmCall (CmmForeignCall resumeThread CCallConv) [ (CmmGlobal BaseReg, PtrHint) ] -- Assign the result to BaseReg: we -- might now have a different -- Capability! [ (CmmReg id, PtrHint) ] - vols ) + stmtsC caller_load emitLoadThreadState - suspendThread = CmmLit (CmmLabel (mkRtsCodeLabel SLIT("suspendThread"))) resumeThread = CmmLit (CmmLabel (mkRtsCodeLabel SLIT("resumeThread"))) diff --git a/compiler/codeGen/CgUtils.hs b/compiler/codeGen/CgUtils.hs index 0e8d6c8..2da6005 100644 --- a/compiler/codeGen/CgUtils.hs +++ b/compiler/codeGen/CgUtils.hs @@ -53,6 +53,12 @@ import FastString import PackageConfig import Outputable +import MachRegs (callerSaveVolatileRegs) + -- HACK: this is part of the NCG so we shouldn't use this, but we need + -- it for now to eliminate the need for saved regs to be in CmmCall. + -- The long term solution is to factor callerSaveVolatileRegs + -- from nativeGen into codeGen + import Data.Char import Data.Bits import Data.Word @@ -276,8 +282,12 @@ emitRtsCall' -> [(CmmExpr,MachHint)] -> Maybe [GlobalReg] -> Code -emitRtsCall' res fun args vols = stmtC (CmmCall target res args vols) +emitRtsCall' res fun args vols = do + stmtsC caller_save + stmtC (CmmCall target res args) + stmtsC caller_load where + (caller_save, caller_load) = callerSaveVolatileRegs vols target = CmmForeignCall fun_expr CCallConv fun_expr = mkLblExpr (mkRtsCodeLabel fun) diff --git a/compiler/nativeGen/AsmCodeGen.lhs b/compiler/nativeGen/AsmCodeGen.lhs index ff3063c..f909d24 100644 --- a/compiler/nativeGen/AsmCodeGen.lhs +++ b/compiler/nativeGen/AsmCodeGen.lhs @@ -444,16 +444,11 @@ fixAssign (CmmAssign (CmmGlobal reg) src) where reg_or_addr = get_GlobalReg_reg_or_addr reg -fixAssign (CmmCall target results args vols) +fixAssign (CmmCall target results args) = mapAndUnzipUs fixResult results `thenUs` \ (results',stores) -> - returnUs (caller_save ++ - CmmCall target results' args vols : - caller_restore ++ + returnUs (CmmCall target results' args : concat stores) where - -- we also save/restore any caller-saves STG registers here - (caller_save, caller_restore) = callerSaveVolatileRegs vols - fixResult g@(CmmGlobal reg,hint) = case get_GlobalReg_reg_or_addr reg of Left realreg -> returnUs (g, []) @@ -539,7 +534,7 @@ cmmStmtConFold stmt -> do addr' <- cmmExprConFold JumpReference addr return $ CmmJump addr' regs - CmmCall target regs args vols + CmmCall target regs args -> do target' <- case target of CmmForeignCall e conv -> do e' <- cmmExprConFold CallReference e @@ -548,7 +543,7 @@ cmmStmtConFold stmt args' <- mapM (\(arg, hint) -> do arg' <- cmmExprConFold DataReference arg return (arg', hint)) args - return $ CmmCall target' regs args' vols + return $ CmmCall target' regs args' CmmCondBranch test dest -> do test' <- cmmExprConFold DataReference test diff --git a/compiler/nativeGen/MachCodeGen.hs b/compiler/nativeGen/MachCodeGen.hs index 3abf6a4..39e0ac6 100644 --- a/compiler/nativeGen/MachCodeGen.hs +++ b/compiler/nativeGen/MachCodeGen.hs @@ -119,8 +119,8 @@ stmtToInstrs stmt = case stmt of | otherwise -> assignMem_IntCode kind addr src where kind = cmmExprRep src - CmmCall target result_regs args vols - -> genCCall target result_regs args vols + CmmCall target result_regs args + -> genCCall target result_regs args CmmBranch id -> genBranch id CmmCondBranch arg id -> genCondJump id arg @@ -2940,7 +2940,6 @@ genCCall :: CmmCallTarget -- function to call -> [(CmmReg,MachHint)] -- where to put the result -> [(CmmExpr,MachHint)] -- arguments (of mixed type) - -> Maybe [GlobalReg] -- volatile regs to save -> NatM InstrBlock -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -3019,12 +3018,12 @@ genCCall fn cconv result_regs args #if i386_TARGET_ARCH -genCCall (CmmPrim MO_WriteBarrier) _ _ _ = return nilOL +genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL -- write barrier compiles to no code on x86/x86-64; -- we keep it this long in order to prevent earlier optimisations. -- we only cope with a single result for foreign calls -genCCall (CmmPrim op) [(r,_)] args vols = do +genCCall (CmmPrim op) [(r,_)] args = do case op of MO_F32_Sqrt -> actuallyInlineFloatOp F32 (GSQRT F32) args MO_F64_Sqrt -> actuallyInlineFloatOp F64 (GSQRT F64) args @@ -3038,14 +3037,14 @@ genCCall (CmmPrim op) [(r,_)] args vols = do MO_F32_Tan -> actuallyInlineFloatOp F32 (GTAN F32) args MO_F64_Tan -> actuallyInlineFloatOp F64 (GTAN F64) args - other_op -> outOfLineFloatOp op r args vols + other_op -> outOfLineFloatOp op r args where actuallyInlineFloatOp rep instr [(x,_)] = do res <- trivialUFCode rep instr x any <- anyReg res return (any (getRegisterReg r)) -genCCall target dest_regs args vols = do +genCCall target dest_regs args = do let sizes = map (arg_size . cmmExprRep . fst) (reverse args) #if !darwin_TARGET_OS @@ -3174,21 +3173,21 @@ genCCall target dest_regs args vols = do #if i386_TARGET_ARCH || x86_64_TARGET_ARCH outOfLineFloatOp :: CallishMachOp -> CmmReg -> [(CmmExpr,MachHint)] - -> Maybe [GlobalReg] -> NatM InstrBlock -outOfLineFloatOp mop res args vols + -> NatM InstrBlock +outOfLineFloatOp mop res args = do targetExpr <- cmmMakeDynamicReference addImportNat CallReference lbl let target = CmmForeignCall targetExpr CCallConv if cmmRegRep res == F64 then - stmtToInstrs (CmmCall target [(res,FloatHint)] args vols) + stmtToInstrs (CmmCall target [(res,FloatHint)] args) else do uq <- getUniqueNat let tmp = CmmLocal (LocalReg uq F64) -- in - code1 <- stmtToInstrs (CmmCall target [(tmp,FloatHint)] args vols) + code1 <- stmtToInstrs (CmmCall target [(tmp,FloatHint)] args) code2 <- stmtToInstrs (CmmAssign res (CmmReg tmp)) return (code1 `appOL` code2) where @@ -3233,14 +3232,14 @@ outOfLineFloatOp mop res args vols #if x86_64_TARGET_ARCH -genCCall (CmmPrim MO_WriteBarrier) _ _ _ = return nilOL +genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL -- write barrier compiles to no code on x86/x86-64; -- we keep it this long in order to prevent earlier optimisations. -genCCall (CmmPrim op) [(r,_)] args vols = - outOfLineFloatOp op r args vols +genCCall (CmmPrim op) [(r,_)] args = + outOfLineFloatOp op r args -genCCall target dest_regs args vols = do +genCCall target dest_regs args = do -- load up the register arguments (stack_args, aregs, fregs, load_args_code) @@ -3426,7 +3425,7 @@ genCCall target dest_regs args vols = do stack only immediately prior to the call proper. Sigh. -} -genCCall target dest_regs argsAndHints vols = do +genCCall target dest_regs argsAndHints = do let args = map fst argsAndHints argcode_and_vregs <- mapM arg_to_int_vregs args @@ -3622,7 +3621,7 @@ outOfLineFloatOp mop = genCCall (CmmPrim MO_WriteBarrier) _ _ _ = return $ unitOL LWSYNC -genCCall target dest_regs argsAndHints vols +genCCall target dest_regs argsAndHints = ASSERT (not $ any (`elem` [I8,I16]) argReps) -- we rely on argument promotion in the codeGen do diff --git a/compiler/nativeGen/MachRegs.lhs b/compiler/nativeGen/MachRegs.lhs index df3be5e..c4f84a4 100644 --- a/compiler/nativeGen/MachRegs.lhs +++ b/compiler/nativeGen/MachRegs.lhs @@ -343,6 +343,10 @@ get_Regtable_addr_from_offset rep offset -- Here we generate the sequence of saves/restores required around a -- foreign call instruction. +-- TODO: reconcile with includes/Regs.h +-- * Regs.h claims that BaseReg should be saved last and loaded first +-- * This might not have been tickled before since BaseReg is callee save +-- * Regs.h saves SparkHd, ParkT1, SparkBase and SparkLim callerSaveVolatileRegs :: Maybe [GlobalReg] -> ([CmmStmt], [CmmStmt]) callerSaveVolatileRegs vols = (caller_save, caller_load) where -- 1.7.10.4