Do not link ghc stage1 using -threaded, only for stage2 or 3
[ghc-hetmet.git] / rts / Updates.h
1 /* -----------------------------------------------------------------------------
2  *
3  * (c) The GHC Team, 1998-2004
4  *
5  * Performing updates.
6  *
7  * ---------------------------------------------------------------------------*/
8
9 #ifndef UPDATES_H
10 #define UPDATES_H
11
12 /* -----------------------------------------------------------------------------
13    Updates
14
15    We have two layers of update macros.  The top layer, UPD_IND() and
16    friends perform all the work of an update.  In detail:
17
18       - if the closure being updated is a blocking queue, then all the
19         threads waiting on the blocking queue are updated.
20
21       - then the lower level updateWithIndirection() macro is invoked 
22         to actually replace the closure with an indirection (see below).
23
24    -------------------------------------------------------------------------- */
25
26 #  define SEMI ;
27 # define UPD_IND(updclosure, heapptr) \
28    UPD_REAL_IND(updclosure,INFO_PTR(stg_IND_info),heapptr,SEMI)
29 # define UPD_SPEC_IND(updclosure, ind_info, heapptr, and_then) \
30    UPD_REAL_IND(updclosure,ind_info,heapptr,and_then)
31
32 /* These macros have to work in both C and C--, so here's the
33  * impedance matching:
34  */
35 #ifdef CMINUSMINUS
36 #define BLOCK_BEGIN
37 #define BLOCK_END
38 #define INFO_PTR(info)      info
39 #else
40 #define BLOCK_BEGIN         {
41 #define BLOCK_END           }
42 #define INFO_PTR(info)      &info
43 #define StgBlockingQueue_blocking_queue(closure) \
44     (((StgBlockingQueue *)closure)->blocking_queue)
45 #endif
46
47 /* krc: there used to be an UPD_REAL_IND and an
48    UPD_PERM_IND, the latter of which was used for
49    ticky and cost-centre profiling.
50    for now, we just have UPD_REAL_IND. */
51 #define UPD_REAL_IND(updclosure, ind_info, heapptr, and_then)   \
52         BLOCK_BEGIN                                             \
53         updateWithIndirection(ind_info,                         \
54                               updclosure,                       \
55                               heapptr,                          \
56                               and_then);                        \
57         BLOCK_END
58
59 /* -----------------------------------------------------------------------------
60    Awaken any threads waiting on a blocking queue (BLACKHOLE_BQ).
61    -------------------------------------------------------------------------- */
62
63 #if defined(PAR) 
64
65 /* 
66    In a parallel setup several types of closures might have a blocking queue:
67      BLACKHOLE_BQ ... same as in the default concurrent setup; it will be
68                       reawakened via calling UPD_IND on that closure after
69                       having finished the computation of the graph
70      FETCH_ME_BQ  ... a global indirection (FETCH_ME) may be entered by a 
71                       local TSO, turning it into a FETCH_ME_BQ; it will be
72                       reawakened via calling processResume
73      RBH          ... a revertible black hole may be entered by another 
74                       local TSO, putting it onto its blocking queue; since
75                       RBHs only exist while the corresponding closure is in 
76                       transit, they will be reawakened via calling 
77                       convertToFetchMe (upon processing an ACK message)
78
79    In a parallel setup a blocking queue may contain 3 types of closures:
80      TSO           ... as in the default concurrent setup
81      BLOCKED_FETCH ... indicating that a TSO on another PE is waiting for
82                        the result of the current computation
83      CONSTR        ... an RBHSave closure (which contains data ripped out of
84                        the closure to make room for a blocking queue; since
85                        it only contains data we use the exisiting type of
86                        a CONSTR closure); this closure is the end of a 
87                        blocking queue for an RBH closure; it only exists in
88                        this kind of blocking queue and must be at the end
89                        of the queue
90 */                    
91 extern void awakenBlockedQueue(StgBlockingQueueElement *q, StgClosure *node);
92 #define DO_AWAKEN_BQ(bqe, node)  STGCALL2(awakenBlockedQueue, bqe, node);
93
94 #define AWAKEN_BQ(info,closure)                                         \
95         if (info == &stg_BLACKHOLE_BQ_info ||               \
96             info == &stg_FETCH_ME_BQ_info ||                \
97             get_itbl(closure)->type == RBH) {                           \
98                 DO_AWAKEN_BQ(((StgBlockingQueue *)closure)->blocking_queue, closure);                           \
99         }
100
101 #elif defined(GRAN)
102
103 extern void awakenBlockedQueue(StgBlockingQueueElement *q, StgClosure *node);
104 #define DO_AWAKEN_BQ(bq, node)  STGCALL2(awakenBlockedQueue, bq, node);
105
106 /* In GranSim we don't have FETCH_ME or FETCH_ME_BQ closures, so they are
107    not checked. The rest of the code is the same as for GUM.
108 */
109 #define AWAKEN_BQ(info,closure)                                         \
110         if (info == &stg_BLACKHOLE_BQ_info ||               \
111             get_itbl(closure)->type == RBH) {                           \
112                 DO_AWAKEN_BQ(((StgBlockingQueue *)closure)->blocking_queue, closure);                           \
113         }
114
115 #endif /* GRAN || PAR */
116
117
118 /* -----------------------------------------------------------------------------
119    Updates: lower-level macros which update a closure with an
120    indirection to another closure.
121
122    There are several variants of this code.
123
124        PROFILING:
125    -------------------------------------------------------------------------- */
126
127 /* LDV profiling:
128  * We call LDV_recordDead_FILL_SLOP_DYNAMIC(p1) regardless of the generation in 
129  * which p1 resides.
130  *
131  * Note: 
132  *   After all, we do *NOT* need to call LDV_RECORD_CREATE() for both IND and 
133  *   IND_OLDGEN closures because they are inherently used. But, it corrupts
134  *   the invariants that every closure keeps its creation time in the profiling
135  *  field. So, we call LDV_RECORD_CREATE().
136  */
137
138 /* In the DEBUG case, we also zero out the slop of the old closure,
139  * so that the sanity checker can tell where the next closure is.
140  *
141  * Two important invariants: we should never try to update a closure
142  * to point to itself, and the closure being updated should not
143  * already have been updated (the mutable list will get messed up
144  * otherwise).
145  *
146  * NB. We do *not* do this in THREADED_RTS mode, because when we have the
147  * possibility of multiple threads entering the same closure, zeroing
148  * the slop in one of the threads would have a disastrous effect on
149  * the other (seen in the wild!).
150  */
151 #ifdef CMINUSMINUS
152
153 #define FILL_SLOP(p)                                                    \
154   W_ inf;                                                               \
155   W_ sz;                                                                \
156   W_ i;                                                                 \
157   inf = %GET_STD_INFO(p);                                               \
158   if (%INFO_TYPE(inf) != HALF_W_(BLACKHOLE)                             \
159         && %INFO_TYPE(inf) != HALF_W_(CAF_BLACKHOLE)) {                 \
160       if (%INFO_TYPE(inf) == HALF_W_(THUNK_SELECTOR)) {                 \
161           sz = BYTES_TO_WDS(SIZEOF_StgSelector_NoThunkHdr);             \
162      } else {                                                           \
163           if (%INFO_TYPE(inf) == HALF_W_(AP_STACK)) {                   \
164               sz = StgAP_STACK_size(p) + BYTES_TO_WDS(SIZEOF_StgAP_STACK_NoThunkHdr); \
165           } else {                                                      \
166               if (%INFO_TYPE(inf) == HALF_W_(AP)) {                     \
167                   sz = TO_W_(StgAP_n_args(p)) +  BYTES_TO_WDS(SIZEOF_StgAP_NoThunkHdr); \
168               } else {                                                  \
169                   sz = TO_W_(%INFO_PTRS(inf)) + TO_W_(%INFO_NPTRS(inf)); \
170               }                                                         \
171           }                                                             \
172       }                                                                 \
173       i = 0;                                                            \
174       for:                                                              \
175         if (i < sz) {                                                   \
176           StgThunk_payload(p,i) = 0;                                    \
177           i = i + 1;                                                    \
178           goto for;                                                     \
179         }                                                               \
180   }
181
182 #else /* !CMINUSMINUS */
183
184 INLINE_HEADER void
185 FILL_SLOP(StgClosure *p)
186 {                                               
187     StgInfoTable *inf = get_itbl(p);            
188     nat i, sz;
189
190     switch (inf->type) {
191     case BLACKHOLE:
192     case CAF_BLACKHOLE:
193         goto no_slop;
194         // we already filled in the slop when we overwrote the thunk
195         // with BLACKHOLE, and also an evacuated BLACKHOLE is only the
196         // size of an IND.
197     case THUNK_SELECTOR:
198         sz = sizeofW(StgSelector) - sizeofW(StgThunkHeader);
199         break;
200     case AP:
201         sz = ((StgAP *)p)->n_args + sizeofW(StgAP) - sizeofW(StgThunkHeader);
202         break;
203     case AP_STACK:
204         sz = ((StgAP_STACK *)p)->size + sizeofW(StgAP_STACK) - sizeofW(StgThunkHeader);
205         break;
206     default:
207         sz = inf->layout.payload.ptrs + inf->layout.payload.nptrs;
208         break;
209     }
210     for (i = 0; i < sz; i++) {
211         ((StgThunk *)p)->payload[i] = 0;
212     }
213 no_slop:
214     ;
215 }
216
217 #endif /* CMINUSMINUS */
218
219 #if !defined(DEBUG) || defined(THREADED_RTS)
220 #define DEBUG_FILL_SLOP(p) /* do nothing */
221 #else
222 #define DEBUG_FILL_SLOP(p) FILL_SLOP(p)
223 #endif
224
225 /* We have two versions of this macro (sadly), one for use in C-- code,
226  * and the other for C.
227  *
228  * The and_then argument is a performance hack so that we can paste in
229  * the continuation code directly.  It helps shave a couple of
230  * instructions off the common case in the update code, which is
231  * worthwhile (the update code is often part of the inner loop).
232  * (except that gcc now appears to common up this code again and
233  * invert the optimisation.  Grrrr --SDM).
234  */
235 #ifdef CMINUSMINUS
236 #define generation(n) (W_[generations] + n*SIZEOF_generation)
237 #define updateWithIndirection(ind_info, p1, p2, and_then)       \
238     W_ bd;                                                      \
239                                                                 \
240     DEBUG_FILL_SLOP(p1);                                        \
241     LDV_RECORD_DEAD_FILL_SLOP_DYNAMIC(p1);                      \
242     StgInd_indirectee(p1) = p2;                                 \
243     prim %write_barrier() [];                                   \
244     bd = Bdescr(p1);                                            \
245     if (bdescr_gen_no(bd) != 0 :: CInt) {                       \
246       recordMutableCap(p1, TO_W_(bdescr_gen_no(bd)), R1);       \
247       SET_INFO(p1, stg_IND_OLDGEN_info);                        \
248       LDV_RECORD_CREATE(p1);                                    \
249       TICK_UPD_OLD_IND();                                       \
250       and_then;                                                 \
251     } else {                                                    \
252       SET_INFO(p1, ind_info);                                   \
253       LDV_RECORD_CREATE(p1);                                    \
254       TICK_UPD_NEW_IND();                                       \
255       and_then;                                                 \
256   }
257 #else
258 #define updateWithIndirection(ind_info, p1, p2, and_then)       \
259   {                                                             \
260     bdescr *bd;                                                 \
261                                                                 \
262     ASSERT( (P_)p1 != (P_)p2 );                                 \
263     /* not necessarily true: ASSERT( !closure_IND(p1) ); */     \
264     /* occurs in RaiseAsync.c:raiseAsync() */                   \
265     DEBUG_FILL_SLOP(p1);                                        \
266     LDV_RECORD_DEAD_FILL_SLOP_DYNAMIC(p1);                      \
267     ((StgInd *)p1)->indirectee = p2;                            \
268     write_barrier();                                            \
269     bd = Bdescr((P_)p1);                                        \
270     if (bd->gen_no != 0) {                                      \
271       recordMutableGenLock(p1, bd->gen_no);                     \
272       SET_INFO(p1, &stg_IND_OLDGEN_info);                       \
273       TICK_UPD_OLD_IND();                                       \
274       and_then;                                                 \
275     } else {                                                    \
276       SET_INFO(p1, ind_info);                                   \
277       LDV_RECORD_CREATE(p1);                                    \
278       TICK_UPD_NEW_IND();                                       \
279       and_then;                                                 \
280     }                                                           \
281   }
282 #endif /* CMINUSMINUS */
283 #endif /* UPDATES_H */