X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=compiler%2FcodeGen%2FStgCmmExpr.hs;h=47bf6c433dc8f8b72a90c24c8f6d2f36f1747af0;hb=8a14a210f12644e5d6ef92fdfcddc8613eae0b2c;hp=379f1cde37d755bdde2427fef3d054ab725fdcf0;hpb=e6243a818496aad82b6f47511d3bd9bc800f747d;p=ghc-hetmet.git diff --git a/compiler/codeGen/StgCmmExpr.hs b/compiler/codeGen/StgCmmExpr.hs index 379f1cd..47bf6c4 100644 --- a/compiler/codeGen/StgCmmExpr.hs +++ b/compiler/codeGen/StgCmmExpr.hs @@ -115,10 +115,10 @@ cgLetNoEscapeRhs local_cc bndr rhs = ; return info } -cgLetNoEscapeRhsBody local_cc bndr (StgRhsClosure cc _bi _ _upd srt args body) - = cgLetNoEscapeClosure bndr local_cc cc srt (nonVoidIds args) body +cgLetNoEscapeRhsBody local_cc bndr (StgRhsClosure cc _bi _ _upd _ args body) + = cgLetNoEscapeClosure bndr local_cc cc (nonVoidIds args) body cgLetNoEscapeRhsBody local_cc bndr (StgRhsCon cc con args) - = cgLetNoEscapeClosure bndr local_cc cc NoSRT [] (StgConApp con args) + = cgLetNoEscapeClosure bndr local_cc cc [] (StgConApp con args) -- For a constructor RHS we want to generate a single chunk of -- code which can be jumped to from many places, which will -- return the constructor. It's easy; just behave as if it @@ -129,17 +129,15 @@ cgLetNoEscapeClosure :: Id -- binder -> Maybe LocalReg -- Slot for saved current cost centre -> CostCentreStack -- XXX: *** NOT USED *** why not? - -> SRT -> [NonVoid Id] -- Args (as in \ args -> body) -> StgExpr -- Body (as in above) -> FCode CgIdInfo -cgLetNoEscapeClosure bndr cc_slot _unused_cc srt args body +cgLetNoEscapeClosure bndr cc_slot _unused_cc args body = do { arg_regs <- forkProc $ do { restoreCurrentCostCentre cc_slot ; arg_regs <- bindArgsToRegs args - ; c_srt <- getSRTInfo srt - ; altHeapCheck arg_regs c_srt (cgExpr body) + ; altHeapCheck arg_regs (cgExpr body) -- Using altHeapCheck just reduces -- instructions to save on stack ; return arg_regs } @@ -262,28 +260,31 @@ data GcPlan -- of the case alternative(s) into the upstream check ------------------------------------- +-- See Note [case on Bool] cgCase :: StgExpr -> Id -> SRT -> AltType -> [StgAlt] -> FCode () --- cgCase (OpApp ) bndr srt AlgAlt [(DataAlt flase, a2] - -- | isBoolTy (idType bndr) - -- , isDeadBndr bndr - -- = +{- +cgCase (OpApp ) bndr srt AlgAlt [(DataAlt flase, a2] + | isBoolTy (idType bndr) + , isDeadBndr bndr + = +-} cgCase scrut bndr srt alt_type alts = do { up_hp_usg <- getVirtHp -- Upstream heap usage ; let ret_bndrs = chooseReturnBndrs bndr alt_type alts alt_regs = map idToReg ret_bndrs simple_scrut = isSimpleScrut scrut alt_type - gc_plan | not simple_scrut = GcInAlts alt_regs srt - | isSingleton alts = NoGcInAlts - | up_hp_usg > 0 = NoGcInAlts - | otherwise = GcInAlts alt_regs srt + gcInAlts | not simple_scrut = True + | isSingleton alts = False + | up_hp_usg > 0 = False + | otherwise = True + gc_plan = if gcInAlts then GcInAlts alt_regs srt else NoGcInAlts ; mb_cc <- maybeSaveCostCentre simple_scrut - ; c_srt <- getSRTInfo srt - ; withSequel (AssignTo alt_regs c_srt) - (cgExpr scrut) + ; withSequel (AssignTo alt_regs gcInAlts) (cgExpr scrut) ; restoreCurrentCostCentre mb_cc + -- JD: We need Note: [Better Alt Heap Checks] ; bindArgsToRegs ret_bndrs ; cgAlts gc_plan (NonVoid bndr) alt_type alts } @@ -395,16 +396,15 @@ cgAltRhss gc_plan bndr alts cg_alt (con, bndrs, _uses, rhs) = getCodeR $ maybeAltHeapCheck gc_plan $ - do { pprTrace "binding args for" (ppr bndr <+> ppr con) $ bindConArgs con base_reg bndrs + do { bindConArgs con base_reg bndrs ; cgExpr rhs ; return con } maybeAltHeapCheck :: GcPlan -> FCode a -> FCode a maybeAltHeapCheck NoGcInAlts code = code -maybeAltHeapCheck (GcInAlts regs srt) code - = do { c_srt <- getSRTInfo srt - ; altHeapCheck regs c_srt code } +maybeAltHeapCheck (GcInAlts regs _) code + = altHeapCheck regs code ----------------------------------------------------------------------------- -- Tail calls @@ -482,4 +482,77 @@ cgTailCall fun_id fun_info args node_points = nodeMustPointToIt lf_info +{- Note [case on Bool] + ~~~~~~~~~~~~~~~~~~~ +A case on a Boolean value does two things: + 1. It looks up the Boolean in a closure table and assigns the + result to the binder. + 2. It branches to the True or False case through analysis + of the closure assigned to the binder. +But the indirection through the closure table is unnecessary +if the assignment to the binder will be dead code (use isDeadBndr). + +The following example illustrates how badly the code turns out: + STG: + case <=## [ww_s7Hx y_s7HD] of wild2_sbH8 { + GHC.Bool.False -> // sbH8 dead + GHC.Bool.True -> // sbH8 dead + }; + Cmm: + _s7HD::F64 = F64[_sbH7::I64 + 7]; // MidAssign + _ccsW::I64 = %MO_F_Le_W64(_s7Hx::F64, _s7HD::F64); // MidAssign + // emitReturn // MidComment + _sbH8::I64 = I64[ghczmprim_GHCziBool_Bool_closure_tbl + (_ccsW::I64 << 3)]; // MidAssign + _ccsX::I64 = _sbH8::I64 & 7; // MidAssign + if (_ccsX::I64 >= 2) goto ccsH; else goto ccsI; // LastCondBranch + +The assignments to _sbH8 and _ccsX are completely unnecessary. +Instead, we should branch based on the value of _ccsW. +-} +{- Note [Better Alt Heap Checks] +If two function calls can share a return point, then they will also +get the same info table. Therefore, it's worth our effort to make +those opportunities appear as frequently as possible. + +Here are a few examples of how it should work: + + STG: + case f x of + True -> + False -> + Cmm: + r = call f(x) returns to L; + L: + if r & 7 >= 2 goto L1 else goto L2; + L1: + if Hp > HpLim then + r = gc(r); + goto L; + + L2: + +Note that the code following both the call to f(x) and the code to gc(r) +should be the same, which will allow the common blockifier to discover +that they are the same. Therefore, both function calls will return to the same +block, and they will use the same info table. + +Here's an example of the Cmm code we want from a primOp. +The primOp doesn't produce an info table for us to reuse, but that's okay: +we should still generate the same code: + STG: + case f x of + 0 -> <0-case code -- including allocation> + _ -> + Cmm: + r = a +# b; + L: + if r == 0 then goto L1 else goto L2; + L1: + if Hp > HpLim then + r = gc(r); + goto L; + <0-case code -- including allocation> + L2: + +-}