From: matthewc Date: Mon, 3 Jun 2002 13:08:41 +0000 (+0000) Subject: [project @ 2002-06-03 13:08:37 by matthewc] X-Git-Tag: Approx_11550_changesets_converted~1997 X-Git-Url: http://git.megacz.com/?a=commitdiff_plain;h=cb5ccf0a7588f9d67835d99a8135b02f3f5ba884;p=ghc-hetmet.git [project @ 2002-06-03 13:08:37 by matthewc] Initial mangling and tailcalls support for IA64. Function prologues and epilogues are deleted and we use a single register stack frame throughout (with a little register renaming in the mangler...) Dropthrough from fast to slow entry point is also implemented. Tailcalls are marked and converted into jumps at mangle time. --- diff --git a/ghc/compiler/main/DriverFlags.hs b/ghc/compiler/main/DriverFlags.hs index ba60961..10551a2 100644 --- a/ghc/compiler/main/DriverFlags.hs +++ b/ghc/compiler/main/DriverFlags.hs @@ -1,5 +1,5 @@ ----------------------------------------------------------------------------- --- $Id: DriverFlags.hs,v 1.94 2002/05/15 08:59:59 chak Exp $ +-- $Id: DriverFlags.hs,v 1.95 2002/06/03 13:08:37 matthewc Exp $ -- -- Driver flags -- @@ -565,6 +565,9 @@ machdepCCOpts "-DSTOLEN_X86_REGS="++show n_regs ] ) + | prefixMatch "ia64" cTARGETPLATFORM + = return ( [], ["-fomit-frame-pointer", "-G0"] ) + | prefixMatch "mips" cTARGETPLATFORM = return ( ["-static"], [] ) diff --git a/ghc/driver/mangler/ghc-asm.lprl b/ghc/driver/mangler/ghc-asm.lprl index 9be5c67..b14a31f 100644 --- a/ghc/driver/mangler/ghc-asm.lprl +++ b/ghc/driver/mangler/ghc-asm.lprl @@ -213,6 +213,34 @@ sub init_TARGET_STUFF { $T_create_word = "\t.word"; #--------------------------------------------------------# + } elsif ( $TargetPlatform =~ /^ia64-.*-linux$/ ) { + + $T_STABBY = 0; # 1 iff .stab things (usually if a.out format) + $T_US = ''; # _ if symbols have an underscore on the front + $T_PRE_APP = '#'; + $T_CONST_LBL = '^\.LC(\d+):$'; # regexp for what such a lbl looks like + $T_POST_LBL = ':'; + + $T_MOVE_DIRVS = '^(\s*\.(global|proc|pred\.safe_across_calls|text|data|section|align|size|type|ident)\s+.*\n)'; + $T_COPY_DIRVS = '\.(global|proc)'; + + $T_hsc_cc_PAT = '\.string.*\)(hsc|cc) (.*)\\\\t(.*)"'; + $T_DOT_WORD = '\.(long|value|byte|zero)'; + $T_DOT_GLOBAL = '\.global'; + $T_HDR_literal = "\.section\t\.rodata\n"; + $T_HDR_misc = "\.text\n\t\.align 8\n"; + $T_HDR_data = "\.data\n\t\.align 8\n"; + $T_HDR_consist = "\.text\n"; + $T_HDR_closure = "\.data\n\t\.align 8\n"; + $T_HDR_srt = "\.text\n\t\.align 8\n"; + $T_HDR_info = "\.text\n\t\.align 8\n"; + $T_HDR_entry = "\.text\n\t\.align 16\n"; + $T_HDR_fast = "\.text\n\t\.align 16\n"; + $T_HDR_vector = "\.text\n\t\.align 8\n"; + $T_HDR_direct = "\.text\n\t\.align 8\n"; + $T_create_word = "\t.word"; + + #--------------------------------------------------------# } elsif ( $TargetPlatform =~ /^m68k-.*-sunos4/ ) { $T_STABBY = 1; # 1 iff .stab things (usually if a.out format) @@ -437,7 +465,7 @@ sub mangle_asm { next if /${T_PRE_APP}(NO_)?APP/o; next if /^;/ && $TargetPlatform =~ /^hppa/; - next if /(^$|^\t\.file\t|^ # )/ && $TargetPlatform =~ /^(mips)-/; + next if /(^$|^\t\.file\t|^ # )/ && $TargetPlatform =~ /^(mips|ia64)-/; if ( $TargetPlatform =~ /^mips-/ && /^\t\.(globl\S+\.text|comm\t)/ ) { @@ -657,7 +685,10 @@ sub mangle_asm { # toss all prologue stuff; HPPA is pretty weird # (see elsewhere) - $c = &mash_hppa_prologue($c) if $TargetPlatform =~ /^hppa/; + $c = &hppa_mash_prologue($c) if $TargetPlatform =~ /^hppa-/; + + # do some register renaming before dropping the prologue + $c = &ia64_rename_outputs($c) if $TargetPlatform =~ /^ia64-/; # be slightly paranoid to make sure there's # nothing surprising in there @@ -684,6 +715,14 @@ sub mangle_asm { $r = $& . $r; } + } elsif ($TargetPlatform =~ /^ia64-/) { + $p =~ s/^\t\.prologue .*\n//; + $p =~ s/^\t\.save ar\.pfs, r\d+\n\talloc r\d+ = ar\.pfs, .*\n//; + $p =~ s/^\t\.fframe \d+\n\tadds r12 = -\d+, r12\n//; + $p =~ s/^\t\.save rp, r\d+\n\tmov r\d+ = b0\n//; + $p =~ s/^\t\.(mii|mmi)\n//; # bundling is no longer sensible + $p =~ s/^\t;;\n//g; # discard stops + $p =~ s/^\t\/\/.*\n//g; # gcc inserts timings in // comments } elsif ($TargetPlatform =~ /^m68k-/) { $p =~ s/^\tlink a6,#-?\d.*\n//; $p =~ s/^\tpea a6@\n\tmovel sp,a6\n//; @@ -760,6 +799,14 @@ sub mangle_asm { $e =~ s/^\tpopl\s+\%ecx\n//; $e =~ s/^\taddl\s+\$\d+,\s*\%esp\n//; $e =~ s/^\tsubl\s+\$-\d+,\s*\%esp\n//; + } elsif ($TargetPlatform =~ /^ia64-/) { + $e =~ s/^\tmov ar\.pfs = r\d+\n//; + $e =~ s/^\tmov b0 = r\d+\n//; + $e =~ s/^\t\.restore sp\n\tadds r12 = \d+, r12\n//; + $e =~ s/^\tbr\.ret\.sptk\.many b0\n//; + $e =~ s/^\t\.(mii|mmi|mib)\n//g; # bundling is no longer sensible + $e =~ s/^\t;;\n//g; # discard stops - stop at end of body is sufficient + $e =~ s/^\t\/\/.*\n//g; # gcc inserts timings in // comments } elsif ($TargetPlatform =~ /^m68k-/) { $e =~ s/^\tunlk a6\n//; $e =~ s/^\trts\n//; @@ -801,6 +848,10 @@ sub mangle_asm { $c =~ s/^\t(call|jbsr|jal)\s+${T_US}__DISCARD__\n//go; $c =~ s/^\tbl\s+L___DISCARD__\$stub\n//go if $TargetPlatform =~ /^powerpc-apple-.*/; + # IA64: mangle tailcalls into jumps here + $c =~ s/^\tbr\.call\.sptk\.many b0 = (.*)\n\t;;\n(\tmov r1 = r\d+\n)?\t;;\n\t--- TAILCALL ---\n/\tbr\.few $1\n/g + if $TargetPlatform =~ /^ia64-/; + # MIPS: that may leave some gratuitous asm macros around # (no harm done; but we get rid of them to be tidier) $c =~ s/^\t\.set\tnoreorder\n\t\.set\tnomacro\n\taddu\t(\S+)\n\t\.set\tmacro\n\t\.set\treorder\n/\taddu\t$1\n/ @@ -1049,6 +1100,9 @@ sub mangle_asm { # The next two only apply if we're not stealing %esi or %edi. $c =~ s/^\tmovl\s+\$${T_US}${symb}_fast\d*,\s*\%esi\n\tjmp\s+\*\%esi\n// if ($StolenX86Regs < 3); $c =~ s/^\tmovl\s+\$${T_US}${symb}_fast\d*,\s*\%edi\n\tjmp\s+\*\%edi\n// if ($StolenX86Regs < 4); + } elsif ( $TargetPlatform =~ /^ia64-/ ) { + #$c =~ s/^\tbr\.few ${symb}_fast\d*#\n\t;;\n(\t;;\n\t\.endp ${symb}_entry#\n)/$1/; + $c =~ s/^\tbr\.few ${symb}_fast\d*#\n(\t;;\n\t\.endp ${symb}_entry#\n)/$1/; } elsif ( $TargetPlatform =~ /^mips-/ ) { $c =~ s/^\tjmp \$31,\(\$27\),0\n\t\.align 4\n\t\.end/\t.align 4\n\t.end/; } elsif ( $TargetPlatform =~ /^m68k-/ ) { @@ -1174,7 +1228,7 @@ sub mangle_asm { \end{code} \begin{code} -sub mash_hppa_prologue { # OK, epilogue, too +sub hppa_mash_prologue { # OK, epilogue, too local($_) = @_; # toss all prologue stuff @@ -1198,6 +1252,45 @@ sub mash_hppa_prologue { # OK, epilogue, too } \end{code} +On IA64 we use a single register frame throughout STG execution, and delete +the frame management instructions from the prologue and epilogue - similarly +to the memory stack. Unfortunately, gcc always uses absolute register names +instead of logical names like out0. This means that outputs (i.e. inputs to +other functions) will end up in the wrong registers relative to our "frame". +Hence this evil register renaming.... + +\begin{code} +sub ia64_rename_outputs { + local($_) = @_; + + return ($_) if (!/^\talloc r\d+ = ar\.pfs, (\d+), (\d+), (\d+), (\d+)$/); + + local($inputs,$locals,$outputs,$rotating) = ($1,$2,$3,$4); + local($oldbase,$newbase,$old,$new,$i); + local($LOCALS) = 24; # must correspond to value in StgCRun.c + + # Check everything fits in our standard frame. Only 8 outputs should + # ever go in registers. + + die "No inputs allowed: $inputs" if ($inputs > 0); + die "Too many locals: $locals" if ($locals > $LOCALS); + die "Too many outputs: $outputs" if ($outputs > 8); + die "No rotating registers allowed: $rotating" if ($rotating > 0); + + $outbase = 32 + $inputs + $locals; + $newbase = 32 + 0 + $LOCALS; + + # Always do renaming from the top to avoid collisions + for ($i = $outputs-1; $i >= 0; $i--) { + $old = $outbase + $i; + $new = $newbase + $i; + s/\br$old\b/r$new/g; + } + + return ($_); +} +\end{code} + \begin{code} sub print_doctored { local($_, $need_fallthru_patch) = @_; @@ -1347,6 +1440,8 @@ right after the table itself. (The code pasting is done elsewhere.) sub rev_tbl { local($symb, $tbl, $discard1) = @_; + return ($tbl) if ($TargetPlatform =~ /^ia64-/); + local($before) = ''; local($label) = ''; local(@imports) = (); # hppa only diff --git a/ghc/includes/Stg.h b/ghc/includes/Stg.h index 636bb1e..f176700 100644 --- a/ghc/includes/Stg.h +++ b/ghc/includes/Stg.h @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: Stg.h,v 1.45 2002/02/13 07:48:19 sof Exp $ + * $Id: Stg.h,v 1.46 2002/06/03 13:08:41 matthewc Exp $ * * (c) The GHC Team, 1998-1999 * @@ -81,7 +81,7 @@ * * UNDEFINING THIS WON'T WORK ON ITS OWN. You have been warned. */ -#ifndef USE_MINIINTERPRETER +#if !defined(USE_MINIINTERPRETER) && !defined(ia64_TARGET_ARCH) #define TABLES_NEXT_TO_CODE #endif diff --git a/ghc/includes/TailCalls.h b/ghc/includes/TailCalls.h index 2dbd99c..2d3f36c 100644 --- a/ghc/includes/TailCalls.h +++ b/ghc/includes/TailCalls.h @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: TailCalls.h,v 1.9 2002/05/28 09:22:08 wolfgang Exp $ + * $Id: TailCalls.h,v 1.10 2002/06/03 13:08:41 matthewc Exp $ * * (c) The GHC Team, 1998-1999 * @@ -186,6 +186,21 @@ but uses $$dyncall if necessary to cope, just in case you aren't. #endif /* powerpc_TARGET_ARCH */ /* ----------------------------------------------------------------------------- + Tail calling on IA64 + -------------------------------------------------------------------------- */ + +#ifdef ia64_TARGET_ARCH + +/* The compiler can more intelligently decide how to do this. We therefore + * implement it as a call and optimise to a jump at mangle time. */ +#define JMP_(cont) ((F_) (cont))(); __asm__ volatile ("--- TAILCALL ---"); + +/* Don't emit calls to __DISCARD__ as this causes hassles */ +#define __DISCARD__() + +#endif + +/* ----------------------------------------------------------------------------- FUNBEGIN and FUNEND. These are markers indicating the start and end of Real Code in a diff --git a/ghc/rts/StgCRun.c b/ghc/rts/StgCRun.c index 3e16e48..0ef550c 100644 --- a/ghc/rts/StgCRun.c +++ b/ghc/rts/StgCRun.c @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: StgCRun.c,v 1.33 2002/05/21 14:58:49 wolfgang Exp $ + * $Id: StgCRun.c,v 1.34 2002/06/03 13:08:41 matthewc Exp $ * * (c) The GHC Team, 1998-2000 * @@ -465,7 +465,7 @@ StgRun(StgFunPtr f, StgRegTable *basereg) extern StgThreadReturnCode StgRun(StgFunPtr f, StgRegTable *basereg); -void StgRunIsImplementedInAssembler() +void StgRunIsImplementedInAssembler(void) { __asm__ volatile ( "\n.globl _StgRun\n" @@ -488,5 +488,41 @@ void StgRunIsImplementedInAssembler() #endif +/* ----------------------------------------------------------------------------- + IA64 architecture + + Again, in assembler - so we can fiddle with the register stack. + -------------------------------------------------------------------------- */ + +#ifdef ia64_TARGET_ARCH + +/* the memory stack is rarely used, so 16K is excessive */ +#undef RESERVED_C_STACK_BYTES +#define RESERVED_C_STACK_BYTES 1024 + +void StgRunIsImplementedInAssembler(void) +{ + __asm__ volatile( + ".global StgRun\n" + "StgRun:\n" + "\talloc r55 = ar.pfs, 0, 24, 8, 0\n" /* setup register frame */ + "\tmov r54 = b0\n" /* save return address */ + "\tadds sp = -%0, sp\n" /* setup stack */ + "\tld8 r16=[r32],8 ;;\n" /* branch to f using descriptor */ + "\tld8 r1=[r32]\n" + "\tmov b6=r16\n" + "\tbr.few b6 ;;\n" + ".global StgReturn\n" + "StgReturn:\n" + "\tmov r8 = 0\n" /* return value in r8 */ + "\tmov ar.pfs = r55\n" /* restore register frame */ + "\tmov b0 = r54\n" /* restore return address */ + "\tadds sp = %0, sp\n" /* restore stack */ + "\tbr.ret.sptk.many b0 ;;\n" /* return */ + : : "i"(RESERVED_C_STACK_BYTES)); +} + +#endif + #endif /* !USE_MINIINTERPRETER */