From bf8b921f2baf1135d23e6a80a8b0bc5e258c1c45 Mon Sep 17 00:00:00 2001 From: simonmar Date: Thu, 27 Mar 2003 13:54:32 +0000 Subject: [PATCH] [project @ 2003-03-27 13:54:31 by simonmar] Two performance tweaks: - Use specialised indirections, which perform the right kind of return without needing to enter the object they point to. This saves a small percentages of memory reads. - Tweak the update code to generate better code with gcc. This saves a few instructions per update. --- ghc/includes/StgMiscClosures.h | 11 ++++++++- ghc/includes/Updates.h | 28 ++++++++++++++--------- ghc/rts/StgMiscClosures.hc | 34 +++++++++++++++++++++++++--- ghc/rts/Storage.h | 20 +++++++++++------ ghc/rts/Updates.hc | 48 ++++++++++++++++++++-------------------- 5 files changed, 96 insertions(+), 45 deletions(-) diff --git a/ghc/includes/StgMiscClosures.h b/ghc/includes/StgMiscClosures.h index a038445..6cc9173 100644 --- a/ghc/includes/StgMiscClosures.h +++ b/ghc/includes/StgMiscClosures.h @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: StgMiscClosures.h,v 1.46 2003/02/21 05:34:15 sof Exp $ + * $Id: StgMiscClosures.h,v 1.47 2003/03/27 13:54:31 simonmar Exp $ * * (c) The GHC Team, 1998-2002 * @@ -54,6 +54,15 @@ extern DLL_IMPORT_RTS const StgRetInfoTable stg_apply_interp_info; /* info tables */ extern DLL_IMPORT_RTS const StgInfoTable stg_IND_info; +extern DLL_IMPORT_RTS const StgInfoTable stg_IND_direct_info; +extern DLL_IMPORT_RTS const StgInfoTable stg_IND_0_info; +extern DLL_IMPORT_RTS const StgInfoTable stg_IND_1_info; +extern DLL_IMPORT_RTS const StgInfoTable stg_IND_2_info; +extern DLL_IMPORT_RTS const StgInfoTable stg_IND_3_info; +extern DLL_IMPORT_RTS const StgInfoTable stg_IND_4_info; +extern DLL_IMPORT_RTS const StgInfoTable stg_IND_5_info; +extern DLL_IMPORT_RTS const StgInfoTable stg_IND_6_info; +extern DLL_IMPORT_RTS const StgInfoTable stg_IND_7_info; extern DLL_IMPORT_RTS const StgInfoTable stg_IND_STATIC_info; extern DLL_IMPORT_RTS const StgInfoTable stg_IND_PERM_info; extern DLL_IMPORT_RTS const StgInfoTable stg_IND_OLDGEN_info; diff --git a/ghc/includes/Updates.h b/ghc/includes/Updates.h index 0820b50..ac98731 100644 --- a/ghc/includes/Updates.h +++ b/ghc/includes/Updates.h @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: Updates.h,v 1.29 2003/01/25 15:54:48 wolfgang Exp $ + * $Id: Updates.h,v 1.30 2003/03/27 13:54:31 simonmar Exp $ * * (c) The GHC Team, 1998-1999 * @@ -27,16 +27,22 @@ */ #ifdef TICKY_TICKY -# define UPD_IND(updclosure, heapptr) UPD_PERM_IND(updclosure,heapptr) +# define UPD_IND(updclosure, heapptr) \ + UPD_PERM_IND(updclosure,heapptr) +# define UPD_SPEC_IND(updclosure, ind_info, heapptr, and_then) \ + UPD_PERM_IND(updclosure,heapptr); and_then #else -# define UPD_IND(updclosure, heapptr) UPD_REAL_IND(updclosure,heapptr) +# define UPD_IND(updclosure, heapptr) \ + UPD_REAL_IND(updclosure,&stg_IND_info,heapptr,) +# define UPD_SPEC_IND(updclosure, ind_info, heapptr, and_then) \ + UPD_REAL_IND(updclosure,ind_info,heapptr,and_then) #endif /* UPD_IND actually does a PERM_IND if TICKY_TICKY is on; if you *really* need an IND use UPD_REAL_IND */ #ifdef SMP -#define UPD_REAL_IND(updclosure, heapptr) \ +#define UPD_REAL_IND(updclosure, ind_info, heapptr, and_then) \ { \ const StgInfoTable *info; \ if (Bdescr((P_)updclosure)->u.back != (bdescr *)BaseReg) { \ @@ -45,19 +51,21 @@ info = updclosure->header.info; \ } \ AWAKEN_BQ(info,updclosure); \ - updateWithIndirection(info, \ + updateWithIndirection(info, ind_info, \ (StgClosure *)updclosure, \ - (StgClosure *)heapptr); \ + (StgClosure *)heapptr, \ + and_then); \ } #else -#define UPD_REAL_IND(updclosure, heapptr) \ +#define UPD_REAL_IND(updclosure, ind_info, heapptr, and_then) \ { \ const StgInfoTable *info; \ info = ((StgClosure *)updclosure)->header.info; \ AWAKEN_BQ(info,updclosure); \ - updateWithIndirection(info, \ + updateWithIndirection(((StgClosure *)updclosure)->header.info, ind_info, \ (StgClosure *)updclosure, \ - (StgClosure *)heapptr); \ + (StgClosure *)heapptr, \ + and_then); \ } #endif @@ -91,7 +99,7 @@ AWAKEN_BQ(info,updclosure); \ updateWithIndirection(info, \ (StgClosure *)updclosure, \ - (StgClosure *)heapptr); \ + (StgClosure *)heapptr,); \ } #elif defined(RTS_SUPPORTS_THREADS) diff --git a/ghc/rts/StgMiscClosures.hc b/ghc/rts/StgMiscClosures.hc index 6433a90..5f4c6ce 100644 --- a/ghc/rts/StgMiscClosures.hc +++ b/ghc/rts/StgMiscClosures.hc @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: StgMiscClosures.hc,v 1.83 2003/01/08 12:37:45 simonmar Exp $ + * $Id: StgMiscClosures.hc,v 1.84 2003/03/27 13:54:32 simonmar Exp $ * * (c) The GHC Team, 1998-2002 * @@ -246,7 +246,13 @@ FN_(stg_BCO_entry) { } /* ----------------------------------------------------------------------------- - Entry code for an indirection. + Info tables for indirections. + + SPECIALISED INDIRECTIONS: we have a specialised indirection for each + kind of return (direct, vectored 0-7), so that we can avoid entering + the object when we know what kind of return it will do. The update + code (Updates.hc) updates objects with the appropriate kind of + indirection. We only do this for young-gen indirections. -------------------------------------------------------------------------- */ INFO_TABLE(stg_IND_info,stg_IND_entry,1,0,IND,,IF_,"IND","IND"); @@ -260,6 +266,28 @@ IF_(stg_IND_entry) FE_ } +#define IND_SPEC(n,ret) \ +INFO_TABLE(stg_IND_##n##_info,stg_IND_##n##_entry,1,0,IND,,IF_,"IND","IND"); \ +IF_(stg_IND_##n##_entry) \ +{ \ + FB_ \ + TICK_ENT_DYN_IND(Node); /* tick */ \ + R1.p = (P_) ((StgInd*)R1.p)->indirectee; \ + TICK_ENT_VIA_NODE(); \ + JMP_(ret); \ + FE_ \ +} + +IND_SPEC(direct, ENTRY_CODE(Sp[0])) +IND_SPEC(0, RET_VEC(Sp[0],0)) +IND_SPEC(1, RET_VEC(Sp[0],1)) +IND_SPEC(2, RET_VEC(Sp[0],2)) +IND_SPEC(3, RET_VEC(Sp[0],3)) +IND_SPEC(4, RET_VEC(Sp[0],4)) +IND_SPEC(5, RET_VEC(Sp[0],5)) +IND_SPEC(6, RET_VEC(Sp[0],6)) +IND_SPEC(7, RET_VEC(Sp[0],7)) + INFO_TABLE(stg_IND_STATIC_info,stg_IND_STATIC_entry,1,0,IND_STATIC,,IF_,"IND_STATIC","IND_STATIC"); IF_(stg_IND_STATIC_entry) { @@ -580,7 +608,7 @@ IF_(stg_CAF_BLACKHOLE_entry) FE_ } -#ifdef TICKY_TICKY +#ifdef EAGER_BLACKHOLING INFO_TABLE(stg_SE_BLACKHOLE_info, stg_SE_BLACKHOLE_entry,0,2,SE_BLACKHOLE,,IF_,"SE_BLACKHOLE","SE_BLACKHOLE"); IF_(stg_SE_BLACKHOLE_entry) { diff --git a/ghc/rts/Storage.h b/ghc/rts/Storage.h index 12f9982..79fee9d 100644 --- a/ghc/rts/Storage.h +++ b/ghc/rts/Storage.h @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: Storage.h,v 1.50 2003/03/26 17:40:57 sof Exp $ + * $Id: Storage.h,v 1.51 2003/03/27 13:54:32 simonmar Exp $ * * (c) The GHC Team, 1998-2002 * @@ -163,15 +163,16 @@ recordOldToNewPtrs(StgMutClosure *p) // We zero out the slop when PROFILING is on. // #ifndef DEBUG #if !defined(DEBUG) && !defined(PROFILING) -#define updateWithIndirection(info, p1, p2) \ +#define updateWithIndirection(info, ind_info, p1, p2, and_then) \ { \ bdescr *bd; \ \ bd = Bdescr((P_)p1); \ if (bd->gen_no == 0) { \ ((StgInd *)p1)->indirectee = p2; \ - SET_INFO(p1,&stg_IND_info); \ + SET_INFO(p1,ind_info); \ TICK_UPD_NEW_IND(); \ + and_then; \ } else { \ ((StgIndOldGen *)p1)->indirectee = p2; \ if (info != &stg_BLACKHOLE_BQ_info) { \ @@ -182,6 +183,7 @@ recordOldToNewPtrs(StgMutClosure *p) } \ SET_INFO(p1,&stg_IND_OLDGEN_info); \ TICK_UPD_OLD_IND(); \ + and_then; \ } \ } #elif defined(PROFILING) @@ -195,7 +197,7 @@ recordOldToNewPtrs(StgMutClosure *p) // the invariants that every closure keeps its creation time in the profiling // field. So, we call LDV_recordCreate(). -#define updateWithIndirection(info, p1, p2) \ +#define updateWithIndirection(info, ind_info, p1, p2, and_then) \ { \ bdescr *bd; \ \ @@ -203,9 +205,10 @@ recordOldToNewPtrs(StgMutClosure *p) bd = Bdescr((P_)p1); \ if (bd->gen_no == 0) { \ ((StgInd *)p1)->indirectee = p2; \ - SET_INFO(p1,&stg_IND_info); \ + SET_INFO(p1,ind_info); \ LDV_recordCreate((p1)); \ TICK_UPD_NEW_IND(); \ + and_then; \ } else { \ ((StgIndOldGen *)p1)->indirectee = p2; \ if (info != &stg_BLACKHOLE_BQ_info) { \ @@ -216,6 +219,7 @@ recordOldToNewPtrs(StgMutClosure *p) } \ SET_INFO(p1,&stg_IND_OLDGEN_info); \ LDV_recordCreate((p1)); \ + and_then; \ } \ } @@ -229,7 +233,7 @@ recordOldToNewPtrs(StgMutClosure *p) * already have been updated (the mutable list will get messed up * otherwise). */ -#define updateWithIndirection(info, p1, p2) \ +#define updateWithIndirection(info, ind_info, p1, p2, and_then) \ { \ bdescr *bd; \ \ @@ -237,8 +241,9 @@ recordOldToNewPtrs(StgMutClosure *p) bd = Bdescr((P_)p1); \ if (bd->gen_no == 0) { \ ((StgInd *)p1)->indirectee = p2; \ - SET_INFO(p1,&stg_IND_info); \ + SET_INFO(p1,ind_info); \ TICK_UPD_NEW_IND(); \ + and_then; \ } else { \ if (info != &stg_BLACKHOLE_BQ_info) { \ { \ @@ -259,6 +264,7 @@ recordOldToNewPtrs(StgMutClosure *p) ((StgIndOldGen *)p1)->indirectee = p2; \ SET_INFO(p1,&stg_IND_OLDGEN_info); \ TICK_UPD_OLD_IND(); \ + and_then; \ } \ } #endif diff --git a/ghc/rts/Updates.hc b/ghc/rts/Updates.hc index 373ce1b..ac5b948 100644 --- a/ghc/rts/Updates.hc +++ b/ghc/rts/Updates.hc @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: Updates.hc,v 1.38 2002/12/11 15:36:54 simonmar Exp $ + * $Id: Updates.hc,v 1.39 2003/03/27 13:54:32 simonmar Exp $ * * (c) The GHC Team, 1998-2002 * @@ -32,17 +32,19 @@ /* on entry to the update code (1) R1 points to the closure being returned - (2) R2 contains the tag (if we returned directly, non-vectored) - (3) Sp points to the update frame + (2) Sp points to the update frame */ -/* Why updatee is placed in a temporary variable here: this helps - gcc's aliasing by indicating that the location of the updatee - doesn't change across assignments. Saves one instruction in the - update code. - */ +/* The update fragment has been tuned so as to generate reasonable + code with gcc, which accounts for some of the strangeness in the + way it is written. + + In particular, the JMP_(ret) bit is passed down and pinned on the + end of each branch (there end up being two major branches in the + code), since we don't mind duplicating this jump. +*/ -#define UPD_FRAME_ENTRY_TEMPLATE(label,ret) \ +#define UPD_FRAME_ENTRY_TEMPLATE(label,ind_info,ret) \ STGFUN(label); \ STGFUN(label) \ { \ @@ -51,29 +53,27 @@ \ updatee = ((StgUpdateFrame *)Sp)->updatee; \ \ + /* remove the update frame from the stack */ \ + Sp += sizeofW(StgUpdateFrame); \ + \ /* Tick - it must be a con, all the paps are handled \ * in stg_upd_PAP and PAP_entry below \ */ \ TICK_UPD_CON_IN_NEW(sizeW_fromITBL(get_itbl(updatee))); \ \ - UPD_IND(updatee, R1.cl); \ - \ - /* remove the update frame from the stack */ \ - Sp += sizeofW(StgUpdateFrame); \ - \ - JMP_(ret); \ + UPD_SPEC_IND(updatee, ind_info, R1.cl, JMP_(ret)); \ FE_ \ } -UPD_FRAME_ENTRY_TEMPLATE(stg_upd_frame_ret,ENTRY_CODE(Sp[0])); -UPD_FRAME_ENTRY_TEMPLATE(stg_upd_frame_0_ret,RET_VEC(Sp[0],0)); -UPD_FRAME_ENTRY_TEMPLATE(stg_upd_frame_1_ret,RET_VEC(Sp[0],1)); -UPD_FRAME_ENTRY_TEMPLATE(stg_upd_frame_2_ret,RET_VEC(Sp[0],2)); -UPD_FRAME_ENTRY_TEMPLATE(stg_upd_frame_3_ret,RET_VEC(Sp[0],3)); -UPD_FRAME_ENTRY_TEMPLATE(stg_upd_frame_4_ret,RET_VEC(Sp[0],4)); -UPD_FRAME_ENTRY_TEMPLATE(stg_upd_frame_5_ret,RET_VEC(Sp[0],5)); -UPD_FRAME_ENTRY_TEMPLATE(stg_upd_frame_6_ret,RET_VEC(Sp[0],6)); -UPD_FRAME_ENTRY_TEMPLATE(stg_upd_frame_7_ret,RET_VEC(Sp[0],7)); +UPD_FRAME_ENTRY_TEMPLATE(stg_upd_frame_ret,&stg_IND_direct_info,ENTRY_CODE(Sp[0])); +UPD_FRAME_ENTRY_TEMPLATE(stg_upd_frame_0_ret,&stg_IND_0_info,RET_VEC(Sp[0],0)); +UPD_FRAME_ENTRY_TEMPLATE(stg_upd_frame_1_ret,&stg_IND_1_info,RET_VEC(Sp[0],1)); +UPD_FRAME_ENTRY_TEMPLATE(stg_upd_frame_2_ret,&stg_IND_2_info,RET_VEC(Sp[0],2)); +UPD_FRAME_ENTRY_TEMPLATE(stg_upd_frame_3_ret,&stg_IND_3_info,RET_VEC(Sp[0],3)); +UPD_FRAME_ENTRY_TEMPLATE(stg_upd_frame_4_ret,&stg_IND_4_info,RET_VEC(Sp[0],4)); +UPD_FRAME_ENTRY_TEMPLATE(stg_upd_frame_5_ret,&stg_IND_5_info,RET_VEC(Sp[0],5)); +UPD_FRAME_ENTRY_TEMPLATE(stg_upd_frame_6_ret,&stg_IND_6_info,RET_VEC(Sp[0],6)); +UPD_FRAME_ENTRY_TEMPLATE(stg_upd_frame_7_ret,&stg_IND_7_info,RET_VEC(Sp[0],7)); /* Make sure this table is big enough to handle the maximum vectored -- 1.7.10.4