X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=compiler%2FcodeGen%2FStgCmmExpr.hs;h=eee4a08bc78b72780abb80eb20baa02d767f7704;hb=927df6486bc0dcb598b82702ca40c8fad0d9b25f;hp=74c69b7216819e636143fb47751e8a56a07bc1a9;hpb=176fa33f17dd78355cc572e006d2ab26898e2c69;p=ghc-hetmet.git diff --git a/compiler/codeGen/StgCmmExpr.hs b/compiler/codeGen/StgCmmExpr.hs index 74c69b7..eee4a08 100644 --- a/compiler/codeGen/StgCmmExpr.hs +++ b/compiler/codeGen/StgCmmExpr.hs @@ -27,19 +27,24 @@ import StgCmmClosure import StgSyn -import MkZipCfgCmm +import MkGraph import BlockId -import Cmm() import CmmExpr import CoreSyn import DataCon +import ForeignCall import Id +import PrimOp +import SMRep import TyCon +import Type import CostCentre ( CostCentreStack, currentCCS ) +import Control.Monad (when) import Maybes import Util import FastString import Outputable +import UniqSupply ------------------------------------------------------------------------ -- cgExpr: the main function @@ -50,16 +55,21 @@ cgExpr :: StgExpr -> FCode () cgExpr (StgApp fun args) = cgIdApp fun args cgExpr (StgOpApp op args ty) = cgOpApp op args ty cgExpr (StgConApp con args) = cgConApp con args - cgExpr (StgSCC cc expr) = do { emitSetCCC cc; cgExpr expr } cgExpr (StgTick m n expr) = do { emit (mkTickBox m n); cgExpr expr } -cgExpr (StgLit lit) = emitReturn [CmmLit (mkSimpleLit lit)] +cgExpr (StgLit lit) = do cmm_lit <- cgLit lit + emitReturn [CmmLit cmm_lit] -cgExpr (StgLet binds expr) = do { emit (mkComment $ mkFastString "calling cgBind"); cgBind binds; emit (mkComment $ mkFastString "calling cgExpr"); cgExpr expr } -cgExpr (StgLetNoEscape _ _ binds expr) = do { cgLneBinds binds; cgExpr expr } +cgExpr (StgLet binds expr) = do { cgBind binds; cgExpr expr } +cgExpr (StgLetNoEscape _ _ binds expr) = + do { us <- newUniqSupply + ; let join_id = mkBlockId (uniqFromSupply us) + ; cgLneBinds join_id binds + ; cgExpr expr + ; emit $ mkLabel join_id} -cgExpr (StgCase expr _live_vars _save_vars bndr srt alt_type alts) - = cgCase expr bndr srt alt_type alts +cgExpr (StgCase expr _live_vars _save_vars bndr srt alt_type alts) = + cgCase expr bndr srt alt_type alts cgExpr (StgLam {}) = panic "cgExpr: StgLam" @@ -68,7 +78,7 @@ cgExpr (StgLam {}) = panic "cgExpr: StgLam" ------------------------------------------------------------------------ {- Generating code for a let-no-escape binding, aka join point is very -very similar to whatwe do for a case expression. The duality is +very similar to what we do for a case expression. The duality is between let-no-escape x = b in e @@ -82,33 +92,46 @@ bound only to stable things like stack locations.. The 'e' part will execute *next*, just like the scrutinee of a case. -} ------------------------- -cgLneBinds :: StgBinding -> FCode () -cgLneBinds (StgNonRec bndr rhs) - = do { local_cc <- saveCurrentCostCentre - -- See Note [Saving the current cost centre] - ; (bndr,info) <- cgLetNoEscapeRhs local_cc bndr rhs - ; addBindC bndr info } - -cgLneBinds (StgRec pairs) - = do { local_cc <- saveCurrentCostCentre - ; new_bindings <- fixC (\ new_bindings -> do - { addBindsC new_bindings - ; listFCs [ cgLetNoEscapeRhs local_cc b e - | (b,e) <- pairs ] }) - - ; addBindsC new_bindings } +cgLneBinds :: BlockId -> StgBinding -> FCode () +cgLneBinds join_id (StgNonRec bndr rhs) + = do { local_cc <- saveCurrentCostCentre + -- See Note [Saving the current cost centre] + ; info <- cgLetNoEscapeRhs join_id local_cc bndr rhs + ; addBindC (cg_id info) info } + +cgLneBinds join_id (StgRec pairs) + = do { local_cc <- saveCurrentCostCentre + ; new_bindings <- fixC (\ new_bindings -> do + { addBindsC new_bindings + ; listFCs [ cgLetNoEscapeRhs join_id local_cc b e + | (b,e) <- pairs ] }) + ; addBindsC new_bindings } + ------------------------- cgLetNoEscapeRhs - :: Maybe LocalReg -- Saved cost centre + :: BlockId -- join point for successor of let-no-escape + -> Maybe LocalReg -- Saved cost centre -> Id -> StgRhs - -> FCode (Id, CgIdInfo) + -> FCode CgIdInfo -cgLetNoEscapeRhs local_cc bndr (StgRhsClosure cc _bi _ _upd srt args body) - = cgLetNoEscapeClosure bndr local_cc cc srt args body -cgLetNoEscapeRhs local_cc bndr (StgRhsCon cc con args) - = cgLetNoEscapeClosure bndr local_cc cc NoSRT [] (StgConApp con args) +cgLetNoEscapeRhs join_id local_cc bndr rhs = + do { (info, rhs_body) <- getCodeR $ cgLetNoEscapeRhsBody local_cc bndr rhs + ; let (bid, _) = expectJust "cgLetNoEscapeRhs" $ maybeLetNoEscape info + ; emit (outOfLine $ mkLabel bid <*> rhs_body <*> mkBranch join_id) + ; return info + } + +cgLetNoEscapeRhsBody + :: Maybe LocalReg -- Saved cost centre + -> Id + -> StgRhs + -> FCode CgIdInfo +cgLetNoEscapeRhsBody local_cc bndr (StgRhsClosure cc _bi _ _upd _ args body) + = cgLetNoEscapeClosure bndr local_cc cc (nonVoidIds args) body +cgLetNoEscapeRhsBody local_cc bndr (StgRhsCon cc con args) + = cgLetNoEscapeClosure bndr local_cc cc [] (StgConApp con args) -- For a constructor RHS we want to generate a single chunk of -- code which can be jumped to from many places, which will -- return the constructor. It's easy; just behave as if it @@ -119,21 +142,19 @@ cgLetNoEscapeClosure :: Id -- binder -> Maybe LocalReg -- Slot for saved current cost centre -> CostCentreStack -- XXX: *** NOT USED *** why not? - -> SRT - -> [Id] -- Args (as in \ args -> body) + -> [NonVoid Id] -- Args (as in \ args -> body) -> StgExpr -- Body (as in above) - -> FCode (Id, CgIdInfo) + -> FCode CgIdInfo -cgLetNoEscapeClosure bndr cc_slot _unused_cc srt args body +cgLetNoEscapeClosure bndr cc_slot _unused_cc args body = do { arg_regs <- forkProc $ do { restoreCurrentCostCentre cc_slot ; arg_regs <- bindArgsToRegs args - ; c_srt <- getSRTInfo srt - ; altHeapCheck arg_regs c_srt (cgExpr body) + ; altHeapCheck arg_regs (cgExpr body) -- Using altHeapCheck just reduces -- instructions to save on stack ; return arg_regs } - ; return (bndr, lneIdInfo bndr arg_regs) } + ; return $ lneIdInfo bndr arg_regs} ------------------------------------------------------------------------ @@ -252,25 +273,74 @@ data GcPlan -- of the case alternative(s) into the upstream check ------------------------------------- +-- See Note [case on Bool] cgCase :: StgExpr -> Id -> SRT -> AltType -> [StgAlt] -> FCode () +{- +cgCase (OpApp ) bndr srt AlgAlt [(DataAlt flase, a2] + | isBoolTy (idType bndr) + , isDeadBndr bndr + = +-} + + -- Note [ticket #3132]: we might be looking at a case of a lifted Id + -- that was cast to an unlifted type. The Id will always be bottom, + -- but we don't want the code generator to fall over here. If we + -- just emit an assignment here, the assignment will be + -- type-incorrect Cmm. Hence, we emit the usual enter/return code, + -- (and because bottom must be untagged, it will be entered and the + -- program will crash). + -- The Sequel is a type-correct assignment, albeit bogus. + -- The (dead) continuation loops; it would be better to invoke some kind + -- of panic function here. + -- + -- However, we also want to allow an assignment to be generated + -- in the case when the types are compatible, because this allows + -- some slightly-dodgy but occasionally-useful casts to be used, + -- such as in RtClosureInspect where we cast an HValue to a MutVar# + -- so we can print out the contents of the MutVar#. If we generate + -- code that enters the HValue, then we'll get a runtime panic, because + -- the HValue really is a MutVar#. The types are compatible though, + -- so we can just generate an assignment. +cgCase (StgApp v []) bndr _ alt_type@(PrimAlt _) alts + | isUnLiftedType (idType v) + || reps_compatible + = -- assignment suffices for unlifted types + do { when (not reps_compatible) $ + panic "cgCase: reps do not match, perhaps a dodgy unsafeCoerce?" + ; v_info <- getCgIdInfo v + ; emit (mkAssign (CmmLocal (idToReg (NonVoid bndr))) (idInfoToAmode v_info)) + ; _ <- bindArgsToRegs [NonVoid bndr] + ; cgAlts NoGcInAlts (NonVoid bndr) alt_type alts } + where + reps_compatible = idCgRep v == idCgRep bndr + +cgCase scrut@(StgApp v []) _ _ (PrimAlt _) _ + = -- fail at run-time, not compile-time + do { mb_cc <- maybeSaveCostCentre True + ; withSequel (AssignTo [idToReg (NonVoid v)] False) (cgExpr scrut) + ; restoreCurrentCostCentre mb_cc + ; emit $ mkComment $ mkFastString "should be unreachable code" + ; emit $ withFreshLabel "l" (\l -> mkLabel l <*> mkBranch l)} + cgCase scrut bndr srt alt_type alts - = do { up_hp_usg <- getVirtHp -- Upstream heap usage - ; let ret_bndrs = chooseReturnBndrs bndr alt_type alts - alt_regs = map idToReg ret_bndrs - simple_scrut = isSimpleScrut scrut alt_type - gc_plan | not simple_scrut = GcInAlts alt_regs srt - | isSingleton alts = NoGcInAlts - | up_hp_usg > 0 = NoGcInAlts - | otherwise = GcInAlts alt_regs srt - - ; mb_cc <- maybeSaveCostCentre simple_scrut - ; c_srt <- getSRTInfo srt - ; withSequel (AssignTo alt_regs c_srt) - (cgExpr scrut) - ; restoreCurrentCostCentre mb_cc - - ; bindArgsToRegs ret_bndrs - ; cgAlts gc_plan bndr alt_type alts } + = -- the general case + do { up_hp_usg <- getVirtHp -- Upstream heap usage + ; let ret_bndrs = chooseReturnBndrs bndr alt_type alts + alt_regs = map idToReg ret_bndrs + simple_scrut = isSimpleScrut scrut alt_type + gcInAlts | not simple_scrut = True + | isSingleton alts = False + | up_hp_usg > 0 = False + | otherwise = True + gc_plan = if gcInAlts then GcInAlts alt_regs srt else NoGcInAlts + + ; mb_cc <- maybeSaveCostCentre simple_scrut + ; withSequel (AssignTo alt_regs gcInAlts) (cgExpr scrut) + ; restoreCurrentCostCentre mb_cc + + -- JD: We need Note: [Better Alt Heap Checks] + ; _ <- bindArgsToRegs ret_bndrs + ; cgAlts gc_plan (NonVoid bndr) alt_type alts } ----------------- maybeSaveCostCentre :: Bool -> FCode (Maybe LocalReg) @@ -279,17 +349,25 @@ maybeSaveCostCentre simple_scrut | otherwise = return Nothing - ----------------- isSimpleScrut :: StgExpr -> AltType -> Bool --- Simple scrutinee, does not allocate -isSimpleScrut (StgOpApp _ _ _) _ = True -isSimpleScrut (StgLit _) _ = True -isSimpleScrut (StgApp _ []) (PrimAlt _) = True +-- Simple scrutinee, does not block or allocate; hence safe to amalgamate +-- heap usage from alternatives into the stuff before the case +-- NB: if you get this wrong, and claim that the expression doesn't allocate +-- when it does, you'll deeply mess up allocation +isSimpleScrut (StgOpApp op _ _) _ = isSimpleOp op +isSimpleScrut (StgLit _) _ = True -- case 1# of { 0# -> ..; ... } +isSimpleScrut (StgApp _ []) (PrimAlt _) = True -- case x# of { 0# -> ..; ... } isSimpleScrut _ _ = False +isSimpleOp :: StgOp -> Bool +-- True iff the op cannot block or allocate +isSimpleOp (StgFCallOp (CCall (CCallSpec _ _ safe)) _) = not (playSafe safe) +isSimpleOp (StgPrimOp op) = not (primOpOutOfLine op) +isSimpleOp (StgPrimCallOp _) = False + ----------------- -chooseReturnBndrs :: Id -> AltType -> [StgAlt] -> [Id] +chooseReturnBndrs :: Id -> AltType -> [StgAlt] -> [NonVoid Id] -- These are the binders of a case that are assigned -- by the evaluation of the scrutinee -- Only non-void ones come back @@ -300,19 +378,16 @@ chooseReturnBndrs _bndr (UbxTupAlt _) [(_, ids, _, _)] = nonVoidIds ids -- 'bndr' is not assigned! chooseReturnBndrs bndr (AlgAlt _) _alts - = [bndr] -- Only 'bndr' is assigned + = nonVoidIds [bndr] -- Only 'bndr' is assigned chooseReturnBndrs bndr PolyAlt _alts - = [bndr] -- Only 'bndr' is assigned + = nonVoidIds [bndr] -- Only 'bndr' is assigned chooseReturnBndrs _ _ _ = panic "chooseReturnBndrs" -- UbxTupALt has only one alternative -nonVoidIds :: [Id] -> [Id] -nonVoidIds ids = [id | id <- ids, not (isVoidRep (idPrimRep id))] - ------------------------------------- -cgAlts :: GcPlan -> Id -> AltType -> [StgAlt] -> FCode () +cgAlts :: GcPlan -> NonVoid Id -> AltType -> [StgAlt] -> FCode () -- At this point the result of the case are in the binders cgAlts gc_plan _bndr PolyAlt [(_, _, _, rhs)] = maybeAltHeapCheck gc_plan (cgExpr rhs) @@ -347,7 +422,7 @@ cgAlts gc_plan bndr (AlgAlt tycon) alts | (DataAlt con, cmm) <- tagged_cmms ] -- Is the constructor tag in the node reg? - ; if isSmallFamily fam_sz + ; if isSmallFamily fam_sz then let -- Yes, bndr_reg has constr. tag in ls bits tag_expr = cmmConstrTag1 (CmmReg bndr_reg) branches' = [(tag+1,branch) | (tag,branch) <- branches] @@ -366,7 +441,7 @@ cgAlts _ _ _ _ = panic "cgAlts" -- UbxTupAlt and PolyAlt have only one alternative ------------------- -cgAltRhss :: GcPlan -> Id -> [StgAlt] -> FCode [(AltCon, CmmAGraph)] +cgAltRhss :: GcPlan -> NonVoid Id -> [StgAlt] -> FCode [(AltCon, CmmAGraph)] cgAltRhss gc_plan bndr alts = forkAlts (map cg_alt alts) where @@ -375,16 +450,13 @@ cgAltRhss gc_plan bndr alts cg_alt (con, bndrs, _uses, rhs) = getCodeR $ maybeAltHeapCheck gc_plan $ - do { bindConArgs con base_reg bndrs + do { _ <- bindConArgs con base_reg bndrs ; cgExpr rhs ; return con } maybeAltHeapCheck :: GcPlan -> FCode a -> FCode a -maybeAltHeapCheck NoGcInAlts code - = code -maybeAltHeapCheck (GcInAlts regs srt) code - = do { c_srt <- getSRTInfo srt - ; altHeapCheck regs c_srt code } +maybeAltHeapCheck NoGcInAlts code = code +maybeAltHeapCheck (GcInAlts regs _) code = altHeapCheck regs code ----------------------------------------------------------------------------- -- Tail calls @@ -392,19 +464,28 @@ maybeAltHeapCheck (GcInAlts regs srt) code cgConApp :: DataCon -> [StgArg] -> FCode () cgConApp con stg_args + | isUnboxedTupleCon con -- Unboxed tuple: assign and return + = do { arg_exprs <- getNonVoidArgAmodes stg_args + ; tickyUnboxedTupleReturn (length arg_exprs) + ; emitReturn arg_exprs } + + | otherwise -- Boxed constructors; allocate and return = ASSERT( stg_args `lengthIs` dataConRepArity con ) - do { idinfo <- buildDynCon (dataConWorkId con) currentCCS con stg_args + do { (idinfo, init) <- buildDynCon (dataConWorkId con) currentCCS con stg_args -- The first "con" says that the name bound to this closure is -- is "con", which is a bit of a fudge, but it only affects profiling + ; emit init ; emitReturn [idInfoToAmode idinfo] } + cgIdApp :: Id -> [StgArg] -> FCode () +cgIdApp fun_id [] | isVoidId fun_id = emitReturn [] cgIdApp fun_id args = do { fun_info <- getCgIdInfo fun_id - ; case maybeLetNoEscape fun_info of - Just (blk_id, lne_regs) -> cgLneJump blk_id lne_regs args - Nothing -> cgTailCall fun_id fun_info args } + ; case maybeLetNoEscape fun_info of + Just (blk_id, lne_regs) -> cgLneJump blk_id lne_regs args + Nothing -> cgTailCall fun_id fun_info args } cgLneJump :: BlockId -> [LocalReg] -> [StgArg] -> FCode () cgLneJump blk_id lne_regs args -- Join point; discard sequel @@ -413,39 +494,118 @@ cgLneJump blk_id lne_regs args -- Join point; discard sequel <*> mkBranch blk_id) } cgTailCall :: Id -> CgIdInfo -> [StgArg] -> FCode () -cgTailCall fun_id fun_info args - = case (getCallMethod fun_name (idCafInfo fun_id) lf_info (length args)) of +cgTailCall fun_id fun_info args = do + dflags <- getDynFlags + case (getCallMethod dflags fun_name (idCafInfo fun_id) lf_info (length args)) of - -- A value in WHNF, so we can just return it. + -- A value in WHNF, so we can just return it. ReturnIt -> emitReturn [fun] -- ToDo: does ReturnIt guarantee tagged? EnterIt -> ASSERT( null args ) -- Discarding arguments - do { [ret,call] <- forkAlts [ + do { let fun' = CmmLoad fun (cmmExprType fun) + ; [ret,call] <- forkAlts [ getCode $ emitReturn [fun], -- Is tagged; no need to untag - getCode $ emitCall (entryCode fun) [fun]] -- Not tagged + getCode $ do -- emit (mkAssign nodeReg fun) + emitCall (NativeNodeCall, NativeReturn) + (entryCode fun') [fun]] -- Not tagged ; emit (mkCmmIfThenElse (cmmIsTagged fun) ret call) } SlowCall -> do -- A slow function call via the RTS apply routines { tickySlowCall lf_info args + ; emit $ mkComment $ mkFastString "slowCall" ; slowCall fun args } -- A direct function call (possibly with some left-over arguments) DirectEntry lbl arity -> do { tickyDirectCall arity args ; if node_points then - do call <- getCode $ directCall lbl arity args - emit (mkAssign nodeReg fun <*> call) - -- directCall lbl (arity+1) (StgVarArg fun_id : args)) - -- >>= (emit . (mkComment (mkFastString "DirectEntry") <*>)) - else directCall lbl arity args } + do emit $ mkComment $ mkFastString "directEntry" + emit (mkAssign nodeReg fun) + directCall lbl arity args + else do emit $ mkComment $ mkFastString "directEntry else" + directCall lbl arity args } JumpToIt {} -> panic "cgTailCall" -- ??? where - fun_name = idName fun_id - fun = idInfoToAmode fun_info - lf_info = cgIdInfoLF fun_info + fun_name = idName fun_id + fun = idInfoToAmode fun_info + lf_info = cgIdInfoLF fun_info node_points = nodeMustPointToIt lf_info +{- Note [case on Bool] + ~~~~~~~~~~~~~~~~~~~ +A case on a Boolean value does two things: + 1. It looks up the Boolean in a closure table and assigns the + result to the binder. + 2. It branches to the True or False case through analysis + of the closure assigned to the binder. +But the indirection through the closure table is unnecessary +if the assignment to the binder will be dead code (use isDeadBndr). + +The following example illustrates how badly the code turns out: + STG: + case <=## [ww_s7Hx y_s7HD] of wild2_sbH8 { + GHC.Types.False -> // sbH8 dead + GHC.Types.True -> // sbH8 dead + }; + Cmm: + _s7HD::F64 = F64[_sbH7::I64 + 7]; // MidAssign + _ccsW::I64 = %MO_F_Le_W64(_s7Hx::F64, _s7HD::F64); // MidAssign + // emitReturn // MidComment + _sbH8::I64 = I64[ghczmprim_GHCziBool_Bool_closure_tbl + (_ccsW::I64 << 3)]; // MidAssign + _ccsX::I64 = _sbH8::I64 & 7; // MidAssign + if (_ccsX::I64 >= 2) goto ccsH; else goto ccsI; // LastCondBranch + +The assignments to _sbH8 and _ccsX are completely unnecessary. +Instead, we should branch based on the value of _ccsW. +-} + +{- Note [Better Alt Heap Checks] +If two function calls can share a return point, then they will also +get the same info table. Therefore, it's worth our effort to make +those opportunities appear as frequently as possible. + +Here are a few examples of how it should work: + + STG: + case f x of + True -> + False -> + Cmm: + r = call f(x) returns to L; + L: + if r & 7 >= 2 goto L1 else goto L2; + L1: + if Hp > HpLim then + r = gc(r); + goto L; + + L2: + +Note that the code following both the call to f(x) and the code to gc(r) +should be the same, which will allow the common blockifier to discover +that they are the same. Therefore, both function calls will return to the same +block, and they will use the same info table. + +Here's an example of the Cmm code we want from a primOp. +The primOp doesn't produce an info table for us to reuse, but that's okay: +we should still generate the same code: + STG: + case f x of + 0 -> <0-case code -- including allocation> + _ -> + Cmm: + r = a +# b; + L: + if r == 0 then goto L1 else goto L2; + L1: + if Hp > HpLim then + r = gc(r); + goto L; + <0-case code -- including allocation> + L2: + +-}