Changes to the way stack checks are handled in GHCi, to fix a rare bug
when a stack check fails in a BCO.
We now aggregate all stack use from case alternatives up to the
enclosing function/thunk BCO, and do a single stack check at the
beginning of that BCO. This simplifies the stack check failure code,
because it doesn't have to cope with the case when a case alternative
needs to restart.
We still employ the trick of doing a fixed stack check before every
BCO, only inserting an actual stack check instruction in the BCO if it
needs more stack than this fixed amount. The fixed stack check is now
only done before running a function/thunk BCO.
-> Int
-> Int
-> [StgWord]
-> Int
-> Int
-> [StgWord]
+ -> Bool -- True <=> is a return point, rather than a function
-> [Ptr ()]
-> ProtoBCO name
-> [Ptr ()]
-> ProtoBCO name
-mkProtoBCO nm instrs_ordlist origin arity bitmap_size bitmap mallocd_blocks
+mkProtoBCO nm instrs_ordlist origin arity bitmap_size bitmap
+ is_ret mallocd_blocks
= ProtoBCO {
protoBCOName = nm,
protoBCOInstrs = maybe_with_stack_check,
= ProtoBCO {
protoBCOName = nm,
protoBCOInstrs = maybe_with_stack_check,
-- (hopefully rare) cases when the (overestimated) stack use
-- exceeds iNTERP_STACK_CHECK_THRESH.
maybe_with_stack_check
-- (hopefully rare) cases when the (overestimated) stack use
-- exceeds iNTERP_STACK_CHECK_THRESH.
maybe_with_stack_check
+ | is_ret = peep_d
+ -- don't do stack checks at return points;
+ -- everything is aggregated up to the top BCO
+ -- (which must be a function)
| stack_overest >= 65535
= pprPanic "mkProtoBCO: stack use won't fit in 16 bits"
(int stack_overest)
| stack_overest >= iNTERP_STACK_CHECK_THRESH
| stack_overest >= 65535
= pprPanic "mkProtoBCO: stack use won't fit in 16 bits"
(int stack_overest)
| stack_overest >= iNTERP_STACK_CHECK_THRESH
- = (STKCHECK stack_overest) : peep_d
+ = STKCHECK stack_overest : peep_d
| otherwise
= peep_d -- the supposedly common case
stack_overest = sum (map bciStackUse peep_d)
| otherwise
= peep_d -- the supposedly common case
stack_overest = sum (map bciStackUse peep_d)
- + 10 {- just to be really really sure -}
-- Merge local pushes
peep_d = peep (fromOL instrs_ordlist)
-- Merge local pushes
peep_d = peep (fromOL instrs_ordlist)
-- by just re-using the single top-level definition. So
-- for the wrapper itself, we must allocate it directly.
emitBc (mkProtoBCO (getName id) (toOL [PACK data_con 0, ENTER])
-- by just re-using the single top-level definition. So
-- for the wrapper itself, we must allocate it directly.
emitBc (mkProtoBCO (getName id) (toOL [PACK data_con 0, ENTER])
- (Right rhs) 0 0 [{-no bitmap-}])
+ (Right rhs) 0 0 [{-no bitmap-}] False{-not alts-})
| otherwise
= schemeR [{- No free variables -}] (id, rhs)
| otherwise
= schemeR [{- No free variables -}] (id, rhs)
in
schemeE szw_args 0 p_init body `thenBc` \ body_code ->
emitBc (mkProtoBCO (getName nm) body_code (Right original_body)
in
schemeE szw_args 0 p_init body `thenBc` \ body_code ->
emitBc (mkProtoBCO (getName nm) body_code (Right original_body)
- arity bitmap_size bitmap)
+ arity bitmap_size bitmap False{-not alts-})
fvsToEnv :: BCEnv -> VarSet -> [Id]
fvsToEnv :: BCEnv -> VarSet -> [Id]
let
alt_bco_name = getName bndr
alt_bco = mkProtoBCO alt_bco_name alt_final (Left alts)
let
alt_bco_name = getName bndr
alt_bco = mkProtoBCO alt_bco_name alt_final (Left alts)
- 0{-no arity-} d{-bitmap size-} bitmap
+ 0{-no arity-} d{-bitmap size-} bitmap True{-is alts-}
-- in
-- trace ("case: bndr = " ++ showSDocDebug (ppr bndr) ++ "\ndepth = " ++ show d ++ "\nenv = \n" ++ showSDocDebug (ppBCEnv p) ++
-- "\n bitmap = " ++ show bitmap) $ do
-- in
-- trace ("case: bndr = " ++ showSDocDebug (ppr bndr) ++ "\ndepth = " ++ show d ++ "\nenv = \n" ++ showSDocDebug (ppBCEnv p) ++
-- "\n bitmap = " ++ show bitmap) $ do
-- The stack use, in words, of each bytecode insn. These _must_ be
-- correct, or overestimates of reality, to be safe.
-- The stack use, in words, of each bytecode insn. These _must_ be
-- correct, or overestimates of reality, to be safe.
+-- NOTE: we aggregate the stack use from case alternatives too, so that
+-- we can do a single stack check at the beginning of a function only.
+
+-- This could all be made more accurate by keeping track of a proper
+-- stack high water mark, but it doesn't seem worth the hassle.
+
+protoBCOStackUse :: ProtoBCO a -> Int
+protoBCOStackUse bco = sum (map bciStackUse (protoBCOInstrs bco))
+
bciStackUse :: BCInstr -> Int
bciStackUse STKCHECK{} = 0
bciStackUse PUSH_L{} = 1
bciStackUse :: BCInstr -> Int
bciStackUse STKCHECK{} = 0
bciStackUse PUSH_L{} = 1
bciStackUse PUSH_G{} = 1
bciStackUse PUSH_PRIMOP{} = 1
bciStackUse PUSH_BCO{} = 1
bciStackUse PUSH_G{} = 1
bciStackUse PUSH_PRIMOP{} = 1
bciStackUse PUSH_BCO{} = 1
-bciStackUse PUSH_ALTS{} = 2
-bciStackUse PUSH_ALTS_UNLIFTED{} = 2
+bciStackUse (PUSH_ALTS bco) = 2 + protoBCOStackUse bco
+bciStackUse (PUSH_ALTS_UNLIFTED bco _) = 2 + protoBCOStackUse bco
bciStackUse (PUSH_UBX _ nw) = nw
bciStackUse PUSH_APPLY_N{} = 1
bciStackUse PUSH_APPLY_V{} = 1
bciStackUse (PUSH_UBX _ nw) = nw
bciStackUse PUSH_APPLY_N{} = 1
bciStackUse PUSH_APPLY_V{} = 1
Sp--; Sp[0] = (W_)&stg_enter_info;
RETURN_TO_SCHEDULER(ThreadInterpret, HeapOverflow);
}
Sp--; Sp[0] = (W_)&stg_enter_info;
RETURN_TO_SCHEDULER(ThreadInterpret, HeapOverflow);
}
-
- // "Standard" stack check
- if (Sp - (INTERP_STACK_CHECK_THRESH+1) < SpLim) {
- Sp--; Sp[0] = (W_)&stg_enter_info;
- RETURN_TO_SCHEDULER(ThreadInterpret, StackOverflow);
- }
+ // Stack checks aren't necessary at return points, the stack use
+ // is aggregated into the enclosing function entry point.
goto run_BCO;
run_BCO_return_unboxed:
goto run_BCO;
run_BCO_return_unboxed:
if (doYouWantToGC()) {
RETURN_TO_SCHEDULER(ThreadInterpret, HeapOverflow);
}
if (doYouWantToGC()) {
RETURN_TO_SCHEDULER(ThreadInterpret, HeapOverflow);
}
-
- // "Standard" stack check
- if (Sp - (INTERP_STACK_CHECK_THRESH+1) < SpLim) {
- RETURN_TO_SCHEDULER(ThreadInterpret, StackOverflow);
- }
+ // Stack checks aren't necessary at return points, the stack use
+ // is aggregated into the enclosing function entry point.
goto run_BCO;
run_BCO_fun:
goto run_BCO;
run_BCO_fun:
RETURN_TO_SCHEDULER(ThreadInterpret, HeapOverflow);
}
RETURN_TO_SCHEDULER(ThreadInterpret, HeapOverflow);
}
- // "Standard" stack check
- if (Sp - (INTERP_STACK_CHECK_THRESH+1) < SpLim) {
+ // Stack check
+ if (Sp - INTERP_STACK_CHECK_THRESH < SpLim) {
Sp -= 2;
Sp[1] = (W_)obj;
Sp[0] = (W_)&stg_apply_interp_info; // placeholder, really
Sp -= 2;
Sp[1] = (W_)obj;
Sp[0] = (W_)&stg_apply_interp_info; // placeholder, really
- case bci_STKCHECK:
- {
- // An explicit stack check; we hope these will be rare.
+ case bci_STKCHECK: {
+ // Explicit stack check at the beginning of a function
+ // *only* (stack checks in case alternatives are
+ // propagated to the enclosing function).
int stk_words_reqd = BCO_NEXT + 1;
if (Sp - stk_words_reqd < SpLim) {
int stk_words_reqd = BCO_NEXT + 1;
if (Sp - stk_words_reqd < SpLim) {
+ Sp -= 2;
+ Sp[1] = (W_)obj;
+ Sp[0] = (W_)&stg_apply_interp_info;
RETURN_TO_SCHEDULER(ThreadInterpret, StackOverflow);
RETURN_TO_SCHEDULER(ThreadInterpret, StackOverflow);
+ } else {
+ goto nextInsn;