From 2777940384ce4740954062bedd0f6813698fc72a Mon Sep 17 00:00:00 2001 From: Simon Marlow Date: Wed, 10 Oct 2007 09:32:41 +0000 Subject: [PATCH] GHCi: use non-updatable thunks for breakpoints The extra safe points introduced for breakpoints were previously compiled as normal updatable thunks, but they are guaranteed single-entry, so we can use non-updatable thunks here. This restores the tail-call property where it was lost in some cases (although stack squeezing probably often recovered it), and should improve performance. --- compiler/ghci/ByteCodeAsm.lhs | 2 ++ compiler/ghci/ByteCodeGen.lhs | 12 +++++-- compiler/ghci/ByteCodeInstr.lhs | 7 ++-- includes/Bytecodes.h | 53 ++++++++++++++--------------- includes/StgMiscClosures.h | 2 ++ rts/Apply.cmm | 70 +++++++++++++++++++++++++++++++++++++++ rts/Interpreter.c | 13 +++++++- 7 files changed, 128 insertions(+), 31 deletions(-) diff --git a/compiler/ghci/ByteCodeAsm.lhs b/compiler/ghci/ByteCodeAsm.lhs index 572c706..9da0e34 100644 --- a/compiler/ghci/ByteCodeAsm.lhs +++ b/compiler/ghci/ByteCodeAsm.lhs @@ -278,6 +278,7 @@ mkBits findLabel st proto_insns SLIDE n by -> instr3 st bci_SLIDE n by ALLOC_AP n -> instr2 st bci_ALLOC_AP n + ALLOC_AP_NOUPD n -> instr2 st bci_ALLOC_AP_NOUPD n ALLOC_PAP arity n -> instr3 st bci_ALLOC_PAP arity n MKAP off sz -> instr3 st bci_MKAP off sz MKPAP off sz -> instr3 st bci_MKPAP off sz @@ -439,6 +440,7 @@ instrSize16s instr PUSH_APPLY_PPPPPP{} -> 1 SLIDE{} -> 3 ALLOC_AP{} -> 2 + ALLOC_AP_NOUPD{} -> 2 ALLOC_PAP{} -> 3 MKAP{} -> 3 MKPAP{} -> 3 diff --git a/compiler/ghci/ByteCodeGen.lhs b/compiler/ghci/ByteCodeGen.lhs index 846737e..bb0f591 100644 --- a/compiler/ghci/ByteCodeGen.lhs +++ b/compiler/ghci/ByteCodeGen.lhs @@ -427,9 +427,15 @@ schemeE d s p (AnnLet binds (_,body)) return (push_code `appOL` more_push_code) alloc_code = toOL (zipWith mkAlloc sizes arities) - where mkAlloc sz 0 = ALLOC_AP sz + where mkAlloc sz 0 + | is_tick = ALLOC_AP_NOUPD sz + | otherwise = ALLOC_AP sz mkAlloc sz arity = ALLOC_PAP arity sz + is_tick = case binds of + AnnNonRec id _ -> occNameFS (getOccName id) == tickFS + _other -> False + compile_bind d' fvs x rhs size arity off = do bco <- schemeR fvs (x,rhs) build_thunk d' fvs size bco off arity @@ -1519,5 +1525,7 @@ newUnique = BcM $ newId :: Type -> BcM Id newId ty = do uniq <- newUnique - return $ mkSysLocal FSLIT("ticked") uniq ty + return $ mkSysLocal tickFS uniq ty + +tickFS = FSLIT("ticked") \end{code} diff --git a/compiler/ghci/ByteCodeInstr.lhs b/compiler/ghci/ByteCodeInstr.lhs index 5c9c295..50dbec1 100644 --- a/compiler/ghci/ByteCodeInstr.lhs +++ b/compiler/ghci/ByteCodeInstr.lhs @@ -104,8 +104,9 @@ data BCInstr | SLIDE Int{-this many-} Int{-down by this much-} -- To do with the heap - | ALLOC_AP !Int -- make an AP with this many payload words - | ALLOC_PAP !Int !Int -- make a PAP with this arity / payload words + | ALLOC_AP !Int -- make an AP with this many payload words + | ALLOC_AP_NOUPD !Int -- make an AP_NOUPD with this many payload words + | ALLOC_PAP !Int !Int -- make a PAP with this arity / payload words | MKAP !Int{-ptr to AP is this far down stack-} !Int{-# words-} | MKPAP !Int{-ptr to PAP is this far down stack-} !Int{-# words-} | UNPACK !Int -- unpack N words from t.o.s Constr @@ -202,6 +203,7 @@ instance Outputable BCInstr where ppr (SLIDE n d) = text "SLIDE " <+> int n <+> int d ppr (ALLOC_AP sz) = text "ALLOC_AP " <+> int sz + ppr (ALLOC_AP_NOUPD sz) = text "ALLOC_AP_NOUPD " <+> int sz ppr (ALLOC_PAP arity sz) = text "ALLOC_PAP " <+> int arity <+> int sz ppr (MKAP offset sz) = text "MKAP " <+> int sz <+> text "words," <+> int offset <+> text "stkoff" @@ -266,6 +268,7 @@ bciStackUse PUSH_APPLY_PPPP{} = 1 bciStackUse PUSH_APPLY_PPPPP{} = 1 bciStackUse PUSH_APPLY_PPPPPP{} = 1 bciStackUse ALLOC_AP{} = 1 +bciStackUse ALLOC_AP_NOUPD{} = 1 bciStackUse ALLOC_PAP{} = 1 bciStackUse (UNPACK sz) = sz bciStackUse LABEL{} = 0 diff --git a/includes/Bytecodes.h b/includes/Bytecodes.h index 3df7ddd..4aff907 100644 --- a/includes/Bytecodes.h +++ b/includes/Bytecodes.h @@ -50,32 +50,33 @@ /* #define bci_PUSH_APPLY_PPPPPPP 25 */ #define bci_SLIDE 26 #define bci_ALLOC_AP 27 -#define bci_ALLOC_PAP 28 -#define bci_MKAP 29 -#define bci_MKPAP 30 -#define bci_UNPACK 31 -#define bci_PACK 32 -#define bci_TESTLT_I 33 -#define bci_TESTEQ_I 34 -#define bci_TESTLT_F 35 -#define bci_TESTEQ_F 36 -#define bci_TESTLT_D 37 -#define bci_TESTEQ_D 38 -#define bci_TESTLT_P 39 -#define bci_TESTEQ_P 40 -#define bci_CASEFAIL 41 -#define bci_JMP 42 -#define bci_CCALL 43 -#define bci_SWIZZLE 44 -#define bci_ENTER 45 -#define bci_RETURN 46 -#define bci_RETURN_P 47 -#define bci_RETURN_N 48 -#define bci_RETURN_F 49 -#define bci_RETURN_D 50 -#define bci_RETURN_L 51 -#define bci_RETURN_V 52 -#define bci_BRK_FUN 53 +#define bci_ALLOC_AP_NOUPD 28 +#define bci_ALLOC_PAP 29 +#define bci_MKAP 30 +#define bci_MKPAP 31 +#define bci_UNPACK 32 +#define bci_PACK 33 +#define bci_TESTLT_I 34 +#define bci_TESTEQ_I 35 +#define bci_TESTLT_F 36 +#define bci_TESTEQ_F 37 +#define bci_TESTLT_D 38 +#define bci_TESTEQ_D 39 +#define bci_TESTLT_P 40 +#define bci_TESTEQ_P 41 +#define bci_CASEFAIL 42 +#define bci_JMP 43 +#define bci_CCALL 44 +#define bci_SWIZZLE 45 +#define bci_ENTER 46 +#define bci_RETURN 47 +#define bci_RETURN_P 48 +#define bci_RETURN_N 49 +#define bci_RETURN_F 50 +#define bci_RETURN_D 51 +#define bci_RETURN_L 52 +#define bci_RETURN_V 53 +#define bci_BRK_FUN 54 /* If you need to go past 255 then you will run into the flags */ /* If you need to go below 0x0100 then you will run into the instructions */ diff --git a/includes/StgMiscClosures.h b/includes/StgMiscClosures.h index 5620996..ea9e805 100644 --- a/includes/StgMiscClosures.h +++ b/includes/StgMiscClosures.h @@ -115,6 +115,7 @@ RTS_INFO(stg_MUT_CONS_info); RTS_INFO(stg_catch_info); RTS_INFO(stg_PAP_info); RTS_INFO(stg_AP_info); +RTS_INFO(stg_AP_NOUPD_info); RTS_INFO(stg_AP_STACK_info); RTS_INFO(stg_dummy_ret_info); RTS_INFO(stg_raise_info); @@ -172,6 +173,7 @@ RTS_ENTRY(stg_MUT_CONS_entry); RTS_ENTRY(stg_catch_entry); RTS_ENTRY(stg_PAP_entry); RTS_ENTRY(stg_AP_entry); +RTS_ENTRY(stg_AP_NOUPD_entry); RTS_ENTRY(stg_AP_STACK_entry); RTS_ENTRY(stg_dummy_ret_entry); RTS_ENTRY(stg_raise_entry); diff --git a/rts/Apply.cmm b/rts/Apply.cmm index 0498f00..a98edee 100644 --- a/rts/Apply.cmm +++ b/rts/Apply.cmm @@ -223,6 +223,76 @@ for: #endif } +/* AP_NOUPD is exactly like AP, except that no update frame is pushed. + Use for thunks that are guaranteed to be entered once only, such as + those generated by the byte-code compiler for inserting breakpoints. */ + +INFO_TABLE(stg_AP_NOUPD,/*special layout*/0,0,AP,"AP_NOUPD","AP_NOUPD") +{ + W_ Words; + W_ ap; + + ap = R1; + + Words = TO_W_(StgAP_n_args(ap)); + + /* + * Check for stack overflow. IMPORTANT: use a _NP check here, + * because if the check fails, we might end up blackholing this very + * closure, in which case we must enter the blackhole on return rather + * than continuing to evaluate the now-defunct closure. + */ + STK_CHK_NP(WDS(Words)); + Sp = Sp - WDS(Words); + + TICK_ENT_AP(); + LDV_ENTER(ap); + + // Enter PAP cost centre + ENTER_CCS_PAP_CL(ap); // ToDo: ENTER_CC_AP_CL + + // Reload the stack + W_ i; + W_ p; + p = ap + SIZEOF_StgHeader + OFFSET_StgAP_payload; + i = 0; +for: + if (i < Words) { + Sp(i) = W_[p]; + p = p + WDS(1); + i = i + 1; + goto for; + } + + R1 = StgAP_fun(ap); + + // Off we go! + TICK_ENT_VIA_NODE(); + +#ifdef NO_ARG_REGS + jump %GET_ENTRY(UNTAG(R1)); +#else + W_ info; + info = %GET_FUN_INFO(UNTAG(R1)); + W_ type; + type = TO_W_(StgFunInfoExtra_fun_type(info)); + if (type == ARG_GEN) { + jump StgFunInfoExtra_slow_apply(info); + } + if (type == ARG_GEN_BIG) { + jump StgFunInfoExtra_slow_apply(info); + } + if (type == ARG_BCO) { + Sp_adj(-2); + Sp(1) = R1; + Sp(0) = stg_apply_interp_info; + jump stg_yield_to_interpreter; + } + jump W_[stg_ap_stack_entries + + WDS(TO_W_(StgFunInfoExtra_fun_type(info)))]; +#endif +} + /* ----------------------------------------------------------------------------- Entry Code for an AP_STACK. diff --git a/rts/Interpreter.c b/rts/Interpreter.c index 527ebde..00830f4 100644 --- a/rts/Interpreter.c +++ b/rts/Interpreter.c @@ -1049,6 +1049,17 @@ run_BCO: goto nextInsn; } + case bci_ALLOC_AP_NOUPD: { + StgAP* ap; + int n_payload = BCO_NEXT; + ap = (StgAP*)allocate(AP_sizeW(n_payload)); + Sp[-1] = (W_)ap; + ap->n_args = n_payload; + SET_HDR(ap, &stg_AP_NOUPD_info, CCS_SYSTEM/*ToDo*/) + Sp --; + goto nextInsn; + } + case bci_ALLOC_PAP: { StgPAP* pap; int arity = BCO_NEXT; @@ -1370,7 +1381,7 @@ run_BCO: // Errors default: barf("interpretBCO: unknown or unimplemented opcode %d", - (int)BCO_NEXT); + (int)(bci & 0xFF)); } /* switch on opcode */ } -- 1.7.10.4