Add Coercion.lhs
[ghc-hetmet.git] / rts / Updates.h
1 /* -----------------------------------------------------------------------------
2  *
3  * (c) The GHC Team, 1998-2004
4  *
5  * Performing updates.
6  *
7  * ---------------------------------------------------------------------------*/
8
9 #ifndef UPDATES_H
10 #define UPDATES_H
11
12 /* -----------------------------------------------------------------------------
13    Updates
14
15    We have two layers of update macros.  The top layer, UPD_IND() and
16    friends perform all the work of an update.  In detail:
17
18       - if the closure being updated is a blocking queue, then all the
19         threads waiting on the blocking queue are updated.
20
21       - then the lower level updateWithIndirection() macro is invoked 
22         to actually replace the closure with an indirection (see below).
23
24    -------------------------------------------------------------------------- */
25
26 #ifdef TICKY_TICKY
27 # define UPD_IND(updclosure, heapptr) \
28    UPD_PERM_IND(updclosure,heapptr)
29 # define UPD_SPEC_IND(updclosure, ind_info, heapptr, and_then) \
30    UPD_PERM_IND(updclosure,heapptr); and_then
31 #else
32 #  define SEMI ;
33 # define UPD_IND(updclosure, heapptr) \
34    UPD_REAL_IND(updclosure,INFO_PTR(stg_IND_info),heapptr,SEMI)
35 # define UPD_SPEC_IND(updclosure, ind_info, heapptr, and_then) \
36    UPD_REAL_IND(updclosure,ind_info,heapptr,and_then)
37 #endif
38
39 /* These macros have to work in both C and C--, so here's the
40  * impedence matching:
41  */
42 #ifdef CMINUSMINUS
43 #define BLOCK_BEGIN
44 #define BLOCK_END
45 #define DECLARE_IPTR(info)  W_ info
46 #define FCALL               foreign "C"
47 #define INFO_PTR(info)      info
48 #define ARG_PTR             "ptr"
49 #else
50 #define BLOCK_BEGIN         {
51 #define BLOCK_END           }
52 #define DECLARE_IPTR(info)  const StgInfoTable *(info)
53 #define FCALL               /* nothing */
54 #define INFO_PTR(info)      &info
55 #define StgBlockingQueue_blocking_queue(closure) \
56     (((StgBlockingQueue *)closure)->blocking_queue)
57 #define ARG_PTR             /* nothing */
58 #endif
59
60 /* UPD_IND actually does a PERM_IND if TICKY_TICKY is on;
61    if you *really* need an IND use UPD_REAL_IND
62  */
63 #define UPD_REAL_IND(updclosure, ind_info, heapptr, and_then)   \
64         BLOCK_BEGIN                                             \
65         DECLARE_IPTR(info);                                     \
66         info = GET_INFO(updclosure);                            \
67         updateWithIndirection(ind_info,                         \
68                               updclosure,                       \
69                               heapptr,                          \
70                               and_then);                        \
71         BLOCK_END
72
73 #if defined(PROFILING) || defined(TICKY_TICKY)
74 #define UPD_PERM_IND(updclosure, heapptr)       \
75         BLOCK_BEGIN                             \
76         updateWithPermIndirection(updclosure,   \
77                                   heapptr);     \
78         BLOCK_END
79 #endif
80
81 #if defined(RTS_SUPPORTS_THREADS)
82
83 # ifdef TICKY_TICKY
84 #  define UPD_IND_NOLOCK(updclosure, heapptr)   \
85         BLOCK_BEGIN                             \
86         updateWithPermIndirection(updclosure,   \
87                                   heapptr);     \
88         BLOCK_END
89 # else
90 #  define UPD_IND_NOLOCK(updclosure, heapptr)                   \
91         BLOCK_BEGIN                                             \
92         updateWithIndirection(INFO_PTR(stg_IND_info),           \
93                               updclosure,                       \
94                               heapptr,);                        \
95         BLOCK_END
96 # endif
97
98 #else
99 #define UPD_IND_NOLOCK(updclosure,heapptr) UPD_IND(updclosure,heapptr)
100 #endif
101
102 /* -----------------------------------------------------------------------------
103    Awaken any threads waiting on a blocking queue (BLACKHOLE_BQ).
104    -------------------------------------------------------------------------- */
105
106 #if defined(PAR) 
107
108 /* 
109    In a parallel setup several types of closures might have a blocking queue:
110      BLACKHOLE_BQ ... same as in the default concurrent setup; it will be
111                       reawakened via calling UPD_IND on that closure after
112                       having finished the computation of the graph
113      FETCH_ME_BQ  ... a global indirection (FETCH_ME) may be entered by a 
114                       local TSO, turning it into a FETCH_ME_BQ; it will be
115                       reawakened via calling processResume
116      RBH          ... a revertible black hole may be entered by another 
117                       local TSO, putting it onto its blocking queue; since
118                       RBHs only exist while the corresponding closure is in 
119                       transit, they will be reawakened via calling 
120                       convertToFetchMe (upon processing an ACK message)
121
122    In a parallel setup a blocking queue may contain 3 types of closures:
123      TSO           ... as in the default concurrent setup
124      BLOCKED_FETCH ... indicating that a TSO on another PE is waiting for
125                        the result of the current computation
126      CONSTR        ... an RBHSave closure (which contains data ripped out of
127                        the closure to make room for a blocking queue; since
128                        it only contains data we use the exisiting type of
129                        a CONSTR closure); this closure is the end of a 
130                        blocking queue for an RBH closure; it only exists in
131                        this kind of blocking queue and must be at the end
132                        of the queue
133 */                    
134 extern void awakenBlockedQueue(StgBlockingQueueElement *q, StgClosure *node);
135 #define DO_AWAKEN_BQ(bqe, node)  STGCALL2(awakenBlockedQueue, bqe, node);
136
137 #define AWAKEN_BQ(info,closure)                                         \
138         if (info == &stg_BLACKHOLE_BQ_info ||               \
139             info == &stg_FETCH_ME_BQ_info ||                \
140             get_itbl(closure)->type == RBH) {                           \
141                 DO_AWAKEN_BQ(((StgBlockingQueue *)closure)->blocking_queue, closure);                           \
142         }
143
144 #elif defined(GRAN)
145
146 extern void awakenBlockedQueue(StgBlockingQueueElement *q, StgClosure *node);
147 #define DO_AWAKEN_BQ(bq, node)  STGCALL2(awakenBlockedQueue, bq, node);
148
149 /* In GranSim we don't have FETCH_ME or FETCH_ME_BQ closures, so they are
150    not checked. The rest of the code is the same as for GUM.
151 */
152 #define AWAKEN_BQ(info,closure)                                         \
153         if (info == &stg_BLACKHOLE_BQ_info ||               \
154             get_itbl(closure)->type == RBH) {                           \
155                 DO_AWAKEN_BQ(((StgBlockingQueue *)closure)->blocking_queue, closure);                           \
156         }
157
158 #endif /* GRAN || PAR */
159
160
161 /* -----------------------------------------------------------------------------
162    Updates: lower-level macros which update a closure with an
163    indirection to another closure.
164
165    There are several variants of this code.
166
167        PROFILING:
168    -------------------------------------------------------------------------- */
169
170 /* LDV profiling:
171  * We call LDV_recordDead_FILL_SLOP_DYNAMIC(p1) regardless of the generation in 
172  * which p1 resides.
173  *
174  * Note: 
175  *   After all, we do *NOT* need to call LDV_RECORD_CREATE() for both IND and 
176  *   IND_OLDGEN closures because they are inherently used. But, it corrupts
177  *   the invariants that every closure keeps its creation time in the profiling
178  *  field. So, we call LDV_RECORD_CREATE().
179  */
180
181 /* In the DEBUG case, we also zero out the slop of the old closure,
182  * so that the sanity checker can tell where the next closure is.
183  *
184  * Two important invariants: we should never try to update a closure
185  * to point to itself, and the closure being updated should not
186  * already have been updated (the mutable list will get messed up
187  * otherwise).
188  *
189  * NB. We do *not* do this in THREADED_RTS mode, because when we have the
190  * possibility of multiple threads entering the same closure, zeroing
191  * the slop in one of the threads would have a disastrous effect on
192  * the other (seen in the wild!).
193  */
194 #ifdef CMINUSMINUS
195
196 #define FILL_SLOP(p)                                                    \
197   W_ inf;                                                               \
198   W_ sz;                                                                \
199   W_ i;                                                                 \
200   inf = %GET_STD_INFO(p);                                               \
201   if (%INFO_TYPE(inf) != HALF_W_(BLACKHOLE)                             \
202         && %INFO_TYPE(inf) != HALF_W_(CAF_BLACKHOLE)) {                 \
203       if (%INFO_TYPE(inf) == HALF_W_(THUNK_SELECTOR)) {                 \
204           sz = BYTES_TO_WDS(SIZEOF_StgSelector_NoThunkHdr);             \
205      } else {                                                           \
206           if (%INFO_TYPE(inf) == HALF_W_(AP_STACK)) {                   \
207               sz = StgAP_STACK_size(p) + BYTES_TO_WDS(SIZEOF_StgAP_STACK_NoThunkHdr); \
208           } else {                                                      \
209               if (%INFO_TYPE(inf) == HALF_W_(AP)) {                     \
210                   sz = TO_W_(StgAP_n_args(p)) +  BYTES_TO_WDS(SIZEOF_StgAP_NoThunkHdr); \
211               } else {                                                  \
212                   sz = TO_W_(%INFO_PTRS(inf)) + TO_W_(%INFO_NPTRS(inf)); \
213               }                                                         \
214           }                                                             \
215       }                                                                 \
216       i = 0;                                                            \
217       for:                                                              \
218         if (i < sz) {                                                   \
219           StgThunk_payload(p,i) = 0;                                    \
220           i = i + 1;                                                    \
221           goto for;                                                     \
222         }                                                               \
223   }
224
225 #else /* !CMINUSMINUS */
226
227 INLINE_HEADER void
228 FILL_SLOP(StgClosure *p)
229 {                                               
230     StgInfoTable *inf = get_itbl(p);            
231     nat i, sz;
232
233     switch (inf->type) {
234     case BLACKHOLE:
235     case CAF_BLACKHOLE:
236         goto no_slop;
237         // we already filled in the slop when we overwrote the thunk
238         // with BLACKHOLE, and also an evacuated BLACKHOLE is only the
239         // size of an IND.
240     case THUNK_SELECTOR:
241         sz = sizeofW(StgSelector) - sizeofW(StgThunkHeader);
242         break;
243     case AP:
244         sz = ((StgAP *)p)->n_args + sizeofW(StgAP) - sizeofW(StgThunkHeader);
245         break;
246     case AP_STACK:
247         sz = ((StgAP_STACK *)p)->size + sizeofW(StgAP_STACK) - sizeofW(StgThunkHeader);
248         break;
249     default:
250         sz = inf->layout.payload.ptrs + inf->layout.payload.nptrs;
251         break;
252     }
253     for (i = 0; i < sz; i++) {
254         ((StgThunk *)p)->payload[i] = 0;
255     }
256 no_slop:
257     ;
258 }
259
260 #endif /* CMINUSMINUS */
261
262 #if !defined(DEBUG) || defined(THREADED_RTS)
263 #define DEBUG_FILL_SLOP(p) /* do nothing */
264 #else
265 #define DEBUG_FILL_SLOP(p) FILL_SLOP(p)
266 #endif
267
268 /* We have two versions of this macro (sadly), one for use in C-- code,
269  * and the other for C.
270  *
271  * The and_then argument is a performance hack so that we can paste in
272  * the continuation code directly.  It helps shave a couple of
273  * instructions off the common case in the update code, which is
274  * worthwhile (the update code is often part of the inner loop).
275  * (except that gcc now appears to common up this code again and
276  * invert the optimisation.  Grrrr --SDM).
277  */
278 #ifdef CMINUSMINUS
279 #define generation(n) (W_[generations] + n*SIZEOF_generation)
280 #define updateWithIndirection(ind_info, p1, p2, and_then)       \
281     W_ bd;                                                      \
282                                                                 \
283     DEBUG_FILL_SLOP(p1);                                        \
284     LDV_RECORD_DEAD_FILL_SLOP_DYNAMIC(p1);                      \
285     StgInd_indirectee(p1) = p2;                                 \
286     prim %write_barrier() [];                                   \
287     bd = Bdescr(p1);                                            \
288     if (bdescr_gen_no(bd) != 0 :: CInt) {                       \
289       recordMutableCap(p1, TO_W_(bdescr_gen_no(bd)), R1);       \
290       SET_INFO(p1, stg_IND_OLDGEN_info);                        \
291       LDV_RECORD_CREATE(p1);                                    \
292       TICK_UPD_OLD_IND();                                       \
293       and_then;                                                 \
294     } else {                                                    \
295       SET_INFO(p1, ind_info);                                   \
296       LDV_RECORD_CREATE(p1);                                    \
297       TICK_UPD_NEW_IND();                                       \
298       and_then;                                                 \
299   }
300 #else
301 #define updateWithIndirection(ind_info, p1, p2, and_then)       \
302   {                                                             \
303     bdescr *bd;                                                 \
304                                                                 \
305     /* cas(p1, 0, &stg_WHITEHOLE_info); */                      \
306     ASSERT( (P_)p1 != (P_)p2 && !closure_IND(p1) );             \
307     DEBUG_FILL_SLOP(p1);                                        \
308     LDV_RECORD_DEAD_FILL_SLOP_DYNAMIC(p1);                      \
309     ((StgInd *)p1)->indirectee = p2;                            \
310     write_barrier();                                            \
311     bd = Bdescr((P_)p1);                                        \
312     if (bd->gen_no != 0) {                                      \
313       recordMutableGenLock(p1, &generations[bd->gen_no]);       \
314       SET_INFO(p1, &stg_IND_OLDGEN_info);                       \
315       TICK_UPD_OLD_IND();                                       \
316       and_then;                                                 \
317     } else {                                                    \
318       SET_INFO(p1, ind_info);                                   \
319       LDV_RECORD_CREATE(p1);                                    \
320       TICK_UPD_NEW_IND();                                       \
321       and_then;                                                 \
322     }                                                           \
323   }
324 #endif
325
326 /* The permanent indirection version isn't performance critical.  We
327  * therefore use an inline C function instead of the C-- macro.
328  */
329 #ifndef CMINUSMINUS
330 INLINE_HEADER void
331 updateWithPermIndirection(StgClosure *p1,
332                           StgClosure *p2) 
333 {
334   bdescr *bd;
335
336   ASSERT( p1 != p2 && !closure_IND(p1) );
337
338   /*
339    * @LDV profiling
340    * Destroy the old closure.
341    * Nb: LDV_* stuff cannot mix with ticky-ticky
342    */
343   LDV_RECORD_DEAD_FILL_SLOP_DYNAMIC(p1);
344
345   bd = Bdescr((P_)p1);
346   if (bd->gen_no != 0) {
347     recordMutableGenLock(p1, &generations[bd->gen_no]);
348     ((StgInd *)p1)->indirectee = p2;
349     SET_INFO(p1, &stg_IND_OLDGEN_PERM_info);
350     /*
351      * @LDV profiling
352      * We have just created a new closure.
353      */
354     LDV_RECORD_CREATE(p1);
355     TICK_UPD_OLD_PERM_IND();
356   } else {
357     ((StgInd *)p1)->indirectee = p2;
358     SET_INFO(p1, &stg_IND_PERM_info);
359     /*
360      * @LDV profiling
361      * We have just created a new closure.
362      */
363     LDV_RECORD_CREATE(p1);
364     TICK_UPD_NEW_PERM_IND(p1);
365   }
366 }
367 #endif
368
369 #endif /* UPDATES_H */