X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=ghc%2Fincludes%2FTailCalls.h;h=aa2d6f9c210aba61301ffee7a285d65cb64c82a6;hb=ceefdee5e6de7ae96b461b4104c32779bbb58e53;hp=f0fd6a6503c05d075c35106b9cd44fb56dc2c4c1;hpb=a5841e5388e6ab0304c8fee308491fce1c0904e4;p=ghc-hetmet.git diff --git a/ghc/includes/TailCalls.h b/ghc/includes/TailCalls.h index f0fd6a6..aa2d6f9 100644 --- a/ghc/includes/TailCalls.h +++ b/ghc/includes/TailCalls.h @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: TailCalls.h,v 1.5 2000/04/05 14:26:31 panne Exp $ + * $Id: TailCalls.h,v 1.20 2005/03/08 09:01:20 simonmar Exp $ * * (c) The GHC Team, 1998-1999 * @@ -16,25 +16,32 @@ #ifdef USE_MINIINTERPRETER -#define JMP_(cont) return(stgCast(StgFunPtr,cont)) +#define JMP_(cont) return((StgFunPtr)(cont)) #define FB_ #define FE_ #else +extern void __DISCARD__(void); + /* ----------------------------------------------------------------------------- Tail calling on x86 -------------------------------------------------------------------------- */ -#if i386_TARGET_ARCH - -extern void __DISCARD__(void); +#if i386_HOST_ARCH /* Note about discard: possibly there to fool GCC into clearing up before we do the jump eg. if there are some arguments left on the C stack that GCC hasn't popped yet. Also possibly to fool any optimisations (a function call often acts as a barrier). Not sure if any of this is necessary now -- SDM + + Comment to above note: I don't think the __DISCARD__() in JMP_ is + necessary. Arguments should be popped from the C stack immediately + after returning from a function, as long as we pass -fno-defer-pop + to gcc. Moreover, a goto to a first-class label acts as a barrier + for optimisations in the same way a function call does. + -= chak */ /* The goto here seems to cause gcc -O2 to delete all the code after @@ -44,19 +51,56 @@ extern void __DISCARD__(void); #define JMP_(cont) \ { \ - void *target; \ + void *__target; \ __DISCARD__(); \ - target = (void *)(cont); \ - goto *target; \ + __target = (void *)(cont); \ + goto *__target; \ + } + +#endif /* i386_HOST_ARCH */ + +/* ----------------------------------------------------------------------------- + Tail calling on x86_64 + -------------------------------------------------------------------------- */ + +#if x86_64_HOST_ARCH + +/* + NOTE about __DISCARD__(): + + On x86_64 this is necessary to work around bugs in the register + variable support in gcc. Without the __DISCARD__() call, gcc will + silently throw away assignements to global register variables that + happen before the jump. + + Here's the example: + + extern void g(void); + static void f(void) { + R1 = g; + __DISCARD__() + goto *R1; + } + + without the dummy function call, gcc throws away the assignment to R1 + (gcc 3.4.3) gcc bug #20359. +*/ + +#define JMP_(cont) \ + { \ + void *__target; \ + __DISCARD__(); \ + __target = (void *)(cont); \ + goto *__target; \ } -#endif /* i386_TARGET_ARCH */ +#endif /* x86_64_HOST_ARCH */ /* ----------------------------------------------------------------------------- Tail calling on Sparc -------------------------------------------------------------------------- */ -#ifdef sparc_TARGET_ARCH +#ifdef sparc_HOST_ARCH #define JMP_(cont) ((F_) (cont))() /* Oh so happily, the above turns into a "call" instruction, @@ -68,41 +112,139 @@ extern void __DISCARD__(void); #define FB_ #define FE_ -#endif /* sparc_TARGET_ARCH */ +#endif /* sparc_HOST_ARCH */ /* ----------------------------------------------------------------------------- Tail calling on Alpha -------------------------------------------------------------------------- */ -#ifdef alpha_TARGET_ARCH +#ifdef alpha_HOST_ARCH +#if IN_STG_CODE register void *_procedure __asm__("$27"); +#endif -#define JMP_(cont) \ - do { _procedure = (void *)(cont); \ - goto *_procedure; \ +#define JMP_(cont) \ + do { _procedure = (void *)(cont); \ + __DISCARD__(); \ + goto *_procedure; \ } while(0) /* Don't need these for alpha mangling */ #define FB_ #define FE_ -#endif /* alpha_TARGET_ARCH */ +#endif /* alpha_HOST_ARCH */ /* ----------------------------------------------------------------------------- Tail calling on HP + +Description of HP's weird procedure linkage, many thanks to Andy Bennet +: + +I've been digging a little further into the problem of how HP-UX does +dynamic procedure calls. My solution in the last e-mail inserting an extra +'if' statement into the JMP_ I think is probably the best general solution I +can come up with. There are still a few problems with it however: It wont +work, if JMP_ ever has to call anything in a shared library, if this is +likely to be required it'll need something more elaborate. It also wont work +with PA-RISC 2.0 wide mode (64-bit) which uses a different format PLT. + +I had some feedback from someone in HP's compiler lab and the problem +relates to the linker on HP-UX, not gcc as I first suspected. The reason the +'hsc' executable works is most likely due to a change in 'ld's behaviour for +performance reasons between your revision and mine. + +The major issue relating to this is shared libraries and how they are +implented under HP-UX. The whole point of the Procedure Label Table (PLT) is +to allow a function pointer to hold the address of the function and a +pointer to the library's global data lookup table (DLT) used by position +independent code (PIC). This makes the PLT absolutely essential for shared +library calls. HP has two linker introduced assembly functions for dealing +with dynamic calls, $$dyncall and $$dyncall_external. The former does a +check to see if the address is a PLT pointer and dereferences if necessary +or just calls the address otherwise; the latter skips the check and just +does the indirect jump no matter what. + +Since $$dyncall_external runs faster due to its not having the test, the +linker nowadays prefers to generate calls to that, rather than $$dyncall. It +makes this decision based on the presence of any shared library. If it even +smells an sl's existence at link time, it rigs the runtime system to +generate PLT references for everything on the assumption that the result +will be slightly more efficient. This is what is crashing GHC since the +calls it is generating have no understanding of the procedure label proper. +The only way to get real addresses is to link everything archive, including +system libraries, at which point it assumes you probably are going to be +using calls similar to GHC's (its rigged for HP's +ESfic compiler option) +but uses $$dyncall if necessary to cope, just in case you aren't. + -------------------------------------------------------------------------- */ #ifdef hppa1_1_hp_hpux_TARGET -#define JMP_(cont) \ - do { void *_procedure = (void *)(cont); \ - goto *_procedure; \ +#define JMP_(cont) \ + do { void *_procedure = (void *)(cont); \ + if (((int) _procedure) & 2) \ + _procedure = (void *)(*((int *) (_procedure - 2))); \ + goto *_procedure; \ } while(0) #endif /* hppa1_1_hp_hpux_TARGET */ /* ----------------------------------------------------------------------------- + Tail calling on PowerPC + -------------------------------------------------------------------------- */ + +#ifdef powerpc_HOST_ARCH + +#define JMP_(cont) \ + { \ + void *target; \ + target = (void *)(cont); \ + __DISCARD__(); \ + goto *target; \ + } + +/* + The __DISCARD__ is there because Apple's April 2002 Beta of GCC 3.1 + sometimes generates incorrect code otherwise. + It tends to "forget" to update global register variables in the presence + of decrement/increment operators: + JMP_(*(--Sp)) is wrongly compiled as JMP_(Sp[-1]). + Calling __DISCARD__ in between works around this problem. +*/ + +/* + I would _love_ to use the following instead, + but some versions of Apple's GCC fail to generate code for it + if it is called for a casted data pointer - which is exactly what + we are going to do... + + #define JMP_(cont) ((F_) (cont))() +*/ + +#endif /* powerpc_HOST_ARCH */ + +#ifdef powerpc64_HOST_ARCH +#define JMP_(cont) ((F_) (cont))() +#endif + +/* ----------------------------------------------------------------------------- + Tail calling on IA64 + -------------------------------------------------------------------------- */ + +#ifdef ia64_HOST_ARCH + +/* The compiler can more intelligently decide how to do this. We therefore + * implement it as a call and optimise to a jump at mangle time. */ +#define JMP_(cont) ((F_) (cont))(); __asm__ volatile ("--- TAILCALL ---"); + +/* Don't emit calls to __DISCARD__ as this causes hassles */ +#define __DISCARD__() + +#endif + +/* ----------------------------------------------------------------------------- FUNBEGIN and FUNEND. These are markers indicating the start and end of Real Code in a @@ -110,8 +252,18 @@ register void *_procedure __asm__("$27"); function and these markers is shredded by the mangler. -------------------------------------------------------------------------- */ +/* The following __DISCARD__() has become necessary with gcc 2.96 on x86. + * It prevents gcc from moving stack manipulation code from the function + * body (aka the Real Code) into the function prologue, ie, from moving it + * over the --- BEGIN --- marker. It should be noted that (like some + * other black magic in GHC's code), there is no essential reason why gcc + * could not move some stack manipulation code across the __DISCARD__() - + * it just doesn't choose to do it at the moment. + * -= chak + */ + #ifndef FB_ -#define FB_ __asm__ volatile ("--- BEGIN ---"); +#define FB_ __asm__ volatile ("--- BEGIN ---"); __DISCARD__ (); #endif #ifndef FE_