1 /* -----------------------------------------------------------------------------
2 * $Id: TailCalls.h,v 1.20 2005/03/08 09:01:20 simonmar Exp $
4 * (c) The GHC Team, 1998-1999
6 * Stuff for implementing proper tail jumps.
8 * ---------------------------------------------------------------------------*/
13 /* -----------------------------------------------------------------------------
14 Unmangled tail-jumping: use the mini interpretter.
15 -------------------------------------------------------------------------- */
17 #ifdef USE_MINIINTERPRETER
19 #define JMP_(cont) return((StgFunPtr)(cont))
25 extern void __DISCARD__(void);
27 /* -----------------------------------------------------------------------------
29 -------------------------------------------------------------------------- */
33 /* Note about discard: possibly there to fool GCC into clearing up
34 before we do the jump eg. if there are some arguments left on the C
35 stack that GCC hasn't popped yet. Also possibly to fool any
36 optimisations (a function call often acts as a barrier). Not sure
37 if any of this is necessary now -- SDM
39 Comment to above note: I don't think the __DISCARD__() in JMP_ is
40 necessary. Arguments should be popped from the C stack immediately
41 after returning from a function, as long as we pass -fno-defer-pop
42 to gcc. Moreover, a goto to a first-class label acts as a barrier
43 for optimisations in the same way a function call does.
47 /* The goto here seems to cause gcc -O2 to delete all the code after
48 it - including the FE_ marker and the epilogue code - exactly what
56 __target = (void *)(cont); \
60 #endif /* i386_HOST_ARCH */
62 /* -----------------------------------------------------------------------------
63 Tail calling on x86_64
64 -------------------------------------------------------------------------- */
69 NOTE about __DISCARD__():
71 On x86_64 this is necessary to work around bugs in the register
72 variable support in gcc. Without the __DISCARD__() call, gcc will
73 silently throw away assignements to global register variables that
74 happen before the jump.
85 without the dummy function call, gcc throws away the assignment to R1
86 (gcc 3.4.3) gcc bug #20359.
93 __target = (void *)(cont); \
97 #endif /* x86_64_HOST_ARCH */
99 /* -----------------------------------------------------------------------------
100 Tail calling on Sparc
101 -------------------------------------------------------------------------- */
103 #ifdef sparc_HOST_ARCH
105 #define JMP_(cont) ((F_) (cont))()
106 /* Oh so happily, the above turns into a "call" instruction,
107 which, on a SPARC, is nothing but a "jmpl" with the
108 return address in %o7 [which we don't care about].
111 /* Don't need these for sparc mangling */
115 #endif /* sparc_HOST_ARCH */
117 /* -----------------------------------------------------------------------------
118 Tail calling on Alpha
119 -------------------------------------------------------------------------- */
121 #ifdef alpha_HOST_ARCH
124 register void *_procedure __asm__("$27");
128 do { _procedure = (void *)(cont); \
133 /* Don't need these for alpha mangling */
137 #endif /* alpha_HOST_ARCH */
139 /* -----------------------------------------------------------------------------
142 Description of HP's weird procedure linkage, many thanks to Andy Bennet
143 <andy_bennett@hp.com>:
145 I've been digging a little further into the problem of how HP-UX does
146 dynamic procedure calls. My solution in the last e-mail inserting an extra
147 'if' statement into the JMP_ I think is probably the best general solution I
148 can come up with. There are still a few problems with it however: It wont
149 work, if JMP_ ever has to call anything in a shared library, if this is
150 likely to be required it'll need something more elaborate. It also wont work
151 with PA-RISC 2.0 wide mode (64-bit) which uses a different format PLT.
153 I had some feedback from someone in HP's compiler lab and the problem
154 relates to the linker on HP-UX, not gcc as I first suspected. The reason the
155 'hsc' executable works is most likely due to a change in 'ld's behaviour for
156 performance reasons between your revision and mine.
158 The major issue relating to this is shared libraries and how they are
159 implented under HP-UX. The whole point of the Procedure Label Table (PLT) is
160 to allow a function pointer to hold the address of the function and a
161 pointer to the library's global data lookup table (DLT) used by position
162 independent code (PIC). This makes the PLT absolutely essential for shared
163 library calls. HP has two linker introduced assembly functions for dealing
164 with dynamic calls, $$dyncall and $$dyncall_external. The former does a
165 check to see if the address is a PLT pointer and dereferences if necessary
166 or just calls the address otherwise; the latter skips the check and just
167 does the indirect jump no matter what.
169 Since $$dyncall_external runs faster due to its not having the test, the
170 linker nowadays prefers to generate calls to that, rather than $$dyncall. It
171 makes this decision based on the presence of any shared library. If it even
172 smells an sl's existence at link time, it rigs the runtime system to
173 generate PLT references for everything on the assumption that the result
174 will be slightly more efficient. This is what is crashing GHC since the
175 calls it is generating have no understanding of the procedure label proper.
176 The only way to get real addresses is to link everything archive, including
177 system libraries, at which point it assumes you probably are going to be
178 using calls similar to GHC's (its rigged for HP's +ESfic compiler option)
179 but uses $$dyncall if necessary to cope, just in case you aren't.
181 -------------------------------------------------------------------------- */
183 #ifdef hppa1_1_hp_hpux_TARGET
186 do { void *_procedure = (void *)(cont); \
187 if (((int) _procedure) & 2) \
188 _procedure = (void *)(*((int *) (_procedure - 2))); \
192 #endif /* hppa1_1_hp_hpux_TARGET */
194 /* -----------------------------------------------------------------------------
195 Tail calling on PowerPC
196 -------------------------------------------------------------------------- */
198 #ifdef powerpc_HOST_ARCH
203 target = (void *)(cont); \
209 The __DISCARD__ is there because Apple's April 2002 Beta of GCC 3.1
210 sometimes generates incorrect code otherwise.
211 It tends to "forget" to update global register variables in the presence
212 of decrement/increment operators:
213 JMP_(*(--Sp)) is wrongly compiled as JMP_(Sp[-1]).
214 Calling __DISCARD__ in between works around this problem.
218 I would _love_ to use the following instead,
219 but some versions of Apple's GCC fail to generate code for it
220 if it is called for a casted data pointer - which is exactly what
221 we are going to do...
223 #define JMP_(cont) ((F_) (cont))()
226 #endif /* powerpc_HOST_ARCH */
228 #ifdef powerpc64_HOST_ARCH
229 #define JMP_(cont) ((F_) (cont))()
232 /* -----------------------------------------------------------------------------
234 -------------------------------------------------------------------------- */
236 #ifdef ia64_HOST_ARCH
238 /* The compiler can more intelligently decide how to do this. We therefore
239 * implement it as a call and optimise to a jump at mangle time. */
240 #define JMP_(cont) ((F_) (cont))(); __asm__ volatile ("--- TAILCALL ---");
242 /* Don't emit calls to __DISCARD__ as this causes hassles */
243 #define __DISCARD__()
247 /* -----------------------------------------------------------------------------
250 These are markers indicating the start and end of Real Code in a
251 function. All instructions between the actual start and end of the
252 function and these markers is shredded by the mangler.
253 -------------------------------------------------------------------------- */
255 /* The following __DISCARD__() has become necessary with gcc 2.96 on x86.
256 * It prevents gcc from moving stack manipulation code from the function
257 * body (aka the Real Code) into the function prologue, ie, from moving it
258 * over the --- BEGIN --- marker. It should be noted that (like some
259 * other black magic in GHC's code), there is no essential reason why gcc
260 * could not move some stack manipulation code across the __DISCARD__() -
261 * it just doesn't choose to do it at the moment.
266 #define FB_ __asm__ volatile ("--- BEGIN ---"); __DISCARD__ ();
270 #define FE_ __asm__ volatile ("--- END ---");
273 #endif /* !USE_MINIINTERPRETER */
275 #endif /* TAILCALLS_H */