Fix #249 (-caf-all bugs)
[ghc-hetmet.git] / rts / Updates.h
1 /* -----------------------------------------------------------------------------
2  *
3  * (c) The GHC Team, 1998-2004
4  *
5  * Performing updates.
6  *
7  * ---------------------------------------------------------------------------*/
8
9 #ifndef UPDATES_H
10 #define UPDATES_H
11
12 /* -----------------------------------------------------------------------------
13    Updates
14
15    We have two layers of update macros.  The top layer, UPD_IND() and
16    friends perform all the work of an update.  In detail:
17
18       - if the closure being updated is a blocking queue, then all the
19         threads waiting on the blocking queue are updated.
20
21       - then the lower level updateWithIndirection() macro is invoked 
22         to actually replace the closure with an indirection (see below).
23
24    -------------------------------------------------------------------------- */
25
26 #  define SEMI ;
27 # define UPD_IND(updclosure, heapptr) \
28    UPD_REAL_IND(updclosure,INFO_PTR(stg_IND_info),heapptr,SEMI)
29 # define UPD_SPEC_IND(updclosure, ind_info, heapptr, and_then) \
30    UPD_REAL_IND(updclosure,ind_info,heapptr,and_then)
31
32 /* These macros have to work in both C and C--, so here's the
33  * impedance matching:
34  */
35 #ifdef CMINUSMINUS
36 #define BLOCK_BEGIN
37 #define BLOCK_END
38 #define DECLARE_IPTR(info)  W_ info
39 #define FCALL               foreign "C"
40 #define INFO_PTR(info)      info
41 #define ARG_PTR             "ptr"
42 #else
43 #define BLOCK_BEGIN         {
44 #define BLOCK_END           }
45 #define DECLARE_IPTR(info)  const StgInfoTable *(info)
46 #define FCALL               /* nothing */
47 #define INFO_PTR(info)      &info
48 #define StgBlockingQueue_blocking_queue(closure) \
49     (((StgBlockingQueue *)closure)->blocking_queue)
50 #define ARG_PTR             /* nothing */
51 #endif
52
53 /* krc: there used to be an UPD_REAL_IND and an
54    UPD_PERM_IND, the latter of which was used for
55    ticky and cost-centre profiling.
56    for now, we just have UPD_REAL_IND. */
57 #define UPD_REAL_IND(updclosure, ind_info, heapptr, and_then)   \
58         BLOCK_BEGIN                                             \
59         DECLARE_IPTR(info);                                     \
60         info = GET_INFO(updclosure);                            \
61         updateWithIndirection(ind_info,                         \
62                               updclosure,                       \
63                               heapptr,                          \
64                               and_then);                        \
65         BLOCK_END
66
67 #if defined(RTS_SUPPORTS_THREADS)
68
69 #  define UPD_IND_NOLOCK(updclosure, heapptr)                   \
70         BLOCK_BEGIN                                             \
71         updateWithIndirection(INFO_PTR(stg_IND_info),           \
72                               updclosure,                       \
73                               heapptr,);                        \
74         BLOCK_END
75
76 #else
77 #define UPD_IND_NOLOCK(updclosure,heapptr) UPD_IND(updclosure,heapptr)
78 #endif /* RTS_SUPPORTS_THREADS */
79
80 /* -----------------------------------------------------------------------------
81    Awaken any threads waiting on a blocking queue (BLACKHOLE_BQ).
82    -------------------------------------------------------------------------- */
83
84 #if defined(PAR) 
85
86 /* 
87    In a parallel setup several types of closures might have a blocking queue:
88      BLACKHOLE_BQ ... same as in the default concurrent setup; it will be
89                       reawakened via calling UPD_IND on that closure after
90                       having finished the computation of the graph
91      FETCH_ME_BQ  ... a global indirection (FETCH_ME) may be entered by a 
92                       local TSO, turning it into a FETCH_ME_BQ; it will be
93                       reawakened via calling processResume
94      RBH          ... a revertible black hole may be entered by another 
95                       local TSO, putting it onto its blocking queue; since
96                       RBHs only exist while the corresponding closure is in 
97                       transit, they will be reawakened via calling 
98                       convertToFetchMe (upon processing an ACK message)
99
100    In a parallel setup a blocking queue may contain 3 types of closures:
101      TSO           ... as in the default concurrent setup
102      BLOCKED_FETCH ... indicating that a TSO on another PE is waiting for
103                        the result of the current computation
104      CONSTR        ... an RBHSave closure (which contains data ripped out of
105                        the closure to make room for a blocking queue; since
106                        it only contains data we use the exisiting type of
107                        a CONSTR closure); this closure is the end of a 
108                        blocking queue for an RBH closure; it only exists in
109                        this kind of blocking queue and must be at the end
110                        of the queue
111 */                    
112 extern void awakenBlockedQueue(StgBlockingQueueElement *q, StgClosure *node);
113 #define DO_AWAKEN_BQ(bqe, node)  STGCALL2(awakenBlockedQueue, bqe, node);
114
115 #define AWAKEN_BQ(info,closure)                                         \
116         if (info == &stg_BLACKHOLE_BQ_info ||               \
117             info == &stg_FETCH_ME_BQ_info ||                \
118             get_itbl(closure)->type == RBH) {                           \
119                 DO_AWAKEN_BQ(((StgBlockingQueue *)closure)->blocking_queue, closure);                           \
120         }
121
122 #elif defined(GRAN)
123
124 extern void awakenBlockedQueue(StgBlockingQueueElement *q, StgClosure *node);
125 #define DO_AWAKEN_BQ(bq, node)  STGCALL2(awakenBlockedQueue, bq, node);
126
127 /* In GranSim we don't have FETCH_ME or FETCH_ME_BQ closures, so they are
128    not checked. The rest of the code is the same as for GUM.
129 */
130 #define AWAKEN_BQ(info,closure)                                         \
131         if (info == &stg_BLACKHOLE_BQ_info ||               \
132             get_itbl(closure)->type == RBH) {                           \
133                 DO_AWAKEN_BQ(((StgBlockingQueue *)closure)->blocking_queue, closure);                           \
134         }
135
136 #endif /* GRAN || PAR */
137
138
139 /* -----------------------------------------------------------------------------
140    Updates: lower-level macros which update a closure with an
141    indirection to another closure.
142
143    There are several variants of this code.
144
145        PROFILING:
146    -------------------------------------------------------------------------- */
147
148 /* LDV profiling:
149  * We call LDV_recordDead_FILL_SLOP_DYNAMIC(p1) regardless of the generation in 
150  * which p1 resides.
151  *
152  * Note: 
153  *   After all, we do *NOT* need to call LDV_RECORD_CREATE() for both IND and 
154  *   IND_OLDGEN closures because they are inherently used. But, it corrupts
155  *   the invariants that every closure keeps its creation time in the profiling
156  *  field. So, we call LDV_RECORD_CREATE().
157  */
158
159 /* In the DEBUG case, we also zero out the slop of the old closure,
160  * so that the sanity checker can tell where the next closure is.
161  *
162  * Two important invariants: we should never try to update a closure
163  * to point to itself, and the closure being updated should not
164  * already have been updated (the mutable list will get messed up
165  * otherwise).
166  *
167  * NB. We do *not* do this in THREADED_RTS mode, because when we have the
168  * possibility of multiple threads entering the same closure, zeroing
169  * the slop in one of the threads would have a disastrous effect on
170  * the other (seen in the wild!).
171  */
172 #ifdef CMINUSMINUS
173
174 #define FILL_SLOP(p)                                                    \
175   W_ inf;                                                               \
176   W_ sz;                                                                \
177   W_ i;                                                                 \
178   inf = %GET_STD_INFO(p);                                               \
179   if (%INFO_TYPE(inf) != HALF_W_(BLACKHOLE)                             \
180         && %INFO_TYPE(inf) != HALF_W_(CAF_BLACKHOLE)) {                 \
181       if (%INFO_TYPE(inf) == HALF_W_(THUNK_SELECTOR)) {                 \
182           sz = BYTES_TO_WDS(SIZEOF_StgSelector_NoThunkHdr);             \
183      } else {                                                           \
184           if (%INFO_TYPE(inf) == HALF_W_(AP_STACK)) {                   \
185               sz = StgAP_STACK_size(p) + BYTES_TO_WDS(SIZEOF_StgAP_STACK_NoThunkHdr); \
186           } else {                                                      \
187               if (%INFO_TYPE(inf) == HALF_W_(AP)) {                     \
188                   sz = TO_W_(StgAP_n_args(p)) +  BYTES_TO_WDS(SIZEOF_StgAP_NoThunkHdr); \
189               } else {                                                  \
190                   sz = TO_W_(%INFO_PTRS(inf)) + TO_W_(%INFO_NPTRS(inf)); \
191               }                                                         \
192           }                                                             \
193       }                                                                 \
194       i = 0;                                                            \
195       for:                                                              \
196         if (i < sz) {                                                   \
197           StgThunk_payload(p,i) = 0;                                    \
198           i = i + 1;                                                    \
199           goto for;                                                     \
200         }                                                               \
201   }
202
203 #else /* !CMINUSMINUS */
204
205 INLINE_HEADER void
206 FILL_SLOP(StgClosure *p)
207 {                                               
208     StgInfoTable *inf = get_itbl(p);            
209     nat i, sz;
210
211     switch (inf->type) {
212     case BLACKHOLE:
213     case CAF_BLACKHOLE:
214         goto no_slop;
215         // we already filled in the slop when we overwrote the thunk
216         // with BLACKHOLE, and also an evacuated BLACKHOLE is only the
217         // size of an IND.
218     case THUNK_SELECTOR:
219         sz = sizeofW(StgSelector) - sizeofW(StgThunkHeader);
220         break;
221     case AP:
222         sz = ((StgAP *)p)->n_args + sizeofW(StgAP) - sizeofW(StgThunkHeader);
223         break;
224     case AP_STACK:
225         sz = ((StgAP_STACK *)p)->size + sizeofW(StgAP_STACK) - sizeofW(StgThunkHeader);
226         break;
227     default:
228         sz = inf->layout.payload.ptrs + inf->layout.payload.nptrs;
229         break;
230     }
231     for (i = 0; i < sz; i++) {
232         ((StgThunk *)p)->payload[i] = 0;
233     }
234 no_slop:
235     ;
236 }
237
238 #endif /* CMINUSMINUS */
239
240 #if !defined(DEBUG) || defined(THREADED_RTS)
241 #define DEBUG_FILL_SLOP(p) /* do nothing */
242 #else
243 #define DEBUG_FILL_SLOP(p) FILL_SLOP(p)
244 #endif
245
246 /* We have two versions of this macro (sadly), one for use in C-- code,
247  * and the other for C.
248  *
249  * The and_then argument is a performance hack so that we can paste in
250  * the continuation code directly.  It helps shave a couple of
251  * instructions off the common case in the update code, which is
252  * worthwhile (the update code is often part of the inner loop).
253  * (except that gcc now appears to common up this code again and
254  * invert the optimisation.  Grrrr --SDM).
255  */
256 #ifdef CMINUSMINUS
257 #define generation(n) (W_[generations] + n*SIZEOF_generation)
258 #define updateWithIndirection(ind_info, p1, p2, and_then)       \
259     W_ bd;                                                      \
260                                                                 \
261     DEBUG_FILL_SLOP(p1);                                        \
262     LDV_RECORD_DEAD_FILL_SLOP_DYNAMIC(p1);                      \
263     StgInd_indirectee(p1) = p2;                                 \
264     prim %write_barrier() [];                                   \
265     bd = Bdescr(p1);                                            \
266     if (bdescr_gen_no(bd) != 0 :: CInt) {                       \
267       recordMutableCap(p1, TO_W_(bdescr_gen_no(bd)), R1);       \
268       SET_INFO(p1, stg_IND_OLDGEN_info);                        \
269       LDV_RECORD_CREATE(p1);                                    \
270       TICK_UPD_OLD_IND();                                       \
271       and_then;                                                 \
272     } else {                                                    \
273       SET_INFO(p1, ind_info);                                   \
274       LDV_RECORD_CREATE(p1);                                    \
275       TICK_UPD_NEW_IND();                                       \
276       and_then;                                                 \
277   }
278 #else
279 #define updateWithIndirection(ind_info, p1, p2, and_then)       \
280   {                                                             \
281     bdescr *bd;                                                 \
282                                                                 \
283     /* cas(p1, 0, &stg_WHITEHOLE_info); */                      \
284     ASSERT( (P_)p1 != (P_)p2 && !closure_IND(p1) );             \
285     DEBUG_FILL_SLOP(p1);                                        \
286     LDV_RECORD_DEAD_FILL_SLOP_DYNAMIC(p1);                      \
287     ((StgInd *)p1)->indirectee = p2;                            \
288     write_barrier();                                            \
289     bd = Bdescr((P_)p1);                                        \
290     if (bd->gen_no != 0) {                                      \
291       recordMutableGenLock(p1, &generations[bd->gen_no]);       \
292       SET_INFO(p1, &stg_IND_OLDGEN_info);                       \
293       TICK_UPD_OLD_IND();                                       \
294       and_then;                                                 \
295     } else {                                                    \
296       SET_INFO(p1, ind_info);                                   \
297       LDV_RECORD_CREATE(p1);                                    \
298       TICK_UPD_NEW_IND();                                       \
299       and_then;                                                 \
300     }                                                           \
301   }
302 #endif /* CMINUSMINUS */
303 #endif /* UPDATES_H */