From 400b162bb412a83da133ef7e61a527be962a071b Mon Sep 17 00:00:00 2001 From: matthewc Date: Fri, 7 Jun 2002 09:40:10 +0000 Subject: [PATCH] [project @ 2002-06-07 09:40:05 by matthewc] STG to machine register mapping for IA64. --- ghc/driver/mangler/ghc-asm.lprl | 44 +-------------------------------- ghc/includes/MachRegs.h | 44 ++++++++++++++++++++++++++++++++- ghc/rts/StgCRun.c | 52 ++++++++++++++++++++++++++++----------- 3 files changed, 81 insertions(+), 59 deletions(-) diff --git a/ghc/driver/mangler/ghc-asm.lprl b/ghc/driver/mangler/ghc-asm.lprl index b14a31f..4dc5b49 100644 --- a/ghc/driver/mangler/ghc-asm.lprl +++ b/ghc/driver/mangler/ghc-asm.lprl @@ -687,9 +687,6 @@ sub mangle_asm { # (see elsewhere) $c = &hppa_mash_prologue($c) if $TargetPlatform =~ /^hppa-/; - # do some register renaming before dropping the prologue - $c = &ia64_rename_outputs($c) if $TargetPlatform =~ /^ia64-/; - # be slightly paranoid to make sure there's # nothing surprising in there if ( $c =~ /--- BEGIN ---/ ) { @@ -717,7 +714,7 @@ sub mangle_asm { } elsif ($TargetPlatform =~ /^ia64-/) { $p =~ s/^\t\.prologue .*\n//; - $p =~ s/^\t\.save ar\.pfs, r\d+\n\talloc r\d+ = ar\.pfs, .*\n//; + $p =~ s/^\t\.save ar\.pfs, r\d+\n\talloc r\d+ = ar\.pfs, 0, 31, \d+, 0\n//; $p =~ s/^\t\.fframe \d+\n\tadds r12 = -\d+, r12\n//; $p =~ s/^\t\.save rp, r\d+\n\tmov r\d+ = b0\n//; $p =~ s/^\t\.(mii|mmi)\n//; # bundling is no longer sensible @@ -1252,45 +1249,6 @@ sub hppa_mash_prologue { # OK, epilogue, too } \end{code} -On IA64 we use a single register frame throughout STG execution, and delete -the frame management instructions from the prologue and epilogue - similarly -to the memory stack. Unfortunately, gcc always uses absolute register names -instead of logical names like out0. This means that outputs (i.e. inputs to -other functions) will end up in the wrong registers relative to our "frame". -Hence this evil register renaming.... - -\begin{code} -sub ia64_rename_outputs { - local($_) = @_; - - return ($_) if (!/^\talloc r\d+ = ar\.pfs, (\d+), (\d+), (\d+), (\d+)$/); - - local($inputs,$locals,$outputs,$rotating) = ($1,$2,$3,$4); - local($oldbase,$newbase,$old,$new,$i); - local($LOCALS) = 24; # must correspond to value in StgCRun.c - - # Check everything fits in our standard frame. Only 8 outputs should - # ever go in registers. - - die "No inputs allowed: $inputs" if ($inputs > 0); - die "Too many locals: $locals" if ($locals > $LOCALS); - die "Too many outputs: $outputs" if ($outputs > 8); - die "No rotating registers allowed: $rotating" if ($rotating > 0); - - $outbase = 32 + $inputs + $locals; - $newbase = 32 + 0 + $LOCALS; - - # Always do renaming from the top to avoid collisions - for ($i = $outputs-1; $i >= 0; $i--) { - $old = $outbase + $i; - $new = $newbase + $i; - s/\br$old\b/r$new/g; - } - - return ($_); -} -\end{code} - \begin{code} sub print_doctored { local($_, $need_fallthru_patch) = @_; diff --git a/ghc/includes/MachRegs.h b/ghc/includes/MachRegs.h index 4da9e91..50058aa 100644 --- a/ghc/includes/MachRegs.h +++ b/ghc/includes/MachRegs.h @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: MachRegs.h,v 1.10 2001/01/18 11:28:50 sewardj Exp $ + * $Id: MachRegs.h,v 1.11 2002/06/07 09:40:10 matthewc Exp $ * * (c) The GHC Team, 1998-1999 * @@ -392,6 +392,48 @@ #endif /* powerpc */ /* ----------------------------------------------------------------------------- + The IA64 register mapping + + We place the general registers in the locals area of the register stack, + so that the call mechanism takes care of saving them for us. We reserve + the first 16 for gcc's use - since gcc uses the highest used register to + determine the register stack frame size, this gives us a constant size + register stack frame. + + \tr{f16-f32} are the callee-saved floating point registers. + -------------------------------------------------------------------------- */ + +#ifdef ia64_TARGET_ARCH + +#define REG(x) __asm__(#x) + +#define REG_R1 loc16 +#define REG_R2 loc17 +#define REG_R3 loc18 +#define REG_R4 loc19 +#define REG_R5 loc20 +#define REG_R6 loc21 +#define REG_R7 loc22 +#define REG_R8 loc23 + +#define REG_F1 f16 +#define REG_F2 f17 +#define REG_F3 f18 +#define REG_F4 f19 + +#define REG_D1 f20 +#define REG_D2 f21 + +#define REG_Sp loc24 +#define REG_Su loc25 +#define REG_SpLim loc26 + +#define REG_Hp loc27 +#define REG_HpLim loc28 + +#endif /* ia64 */ + +/* ----------------------------------------------------------------------------- The Sun SPARC register mapping The SPARC register (window) story: Remember, within the Haskell diff --git a/ghc/rts/StgCRun.c b/ghc/rts/StgCRun.c index 0ef550c..db2d084 100644 --- a/ghc/rts/StgCRun.c +++ b/ghc/rts/StgCRun.c @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: StgCRun.c,v 1.34 2002/06/03 13:08:41 matthewc Exp $ + * $Id: StgCRun.c,v 1.35 2002/06/07 09:40:10 matthewc Exp $ * * (c) The GHC Team, 1998-2000 * @@ -465,7 +465,7 @@ StgRun(StgFunPtr f, StgRegTable *basereg) extern StgThreadReturnCode StgRun(StgFunPtr f, StgRegTable *basereg); -void StgRunIsImplementedInAssembler(void) +static void StgRunIsImplementedInAssembler(void) { __asm__ volatile ( "\n.globl _StgRun\n" @@ -491,7 +491,13 @@ void StgRunIsImplementedInAssembler(void) /* ----------------------------------------------------------------------------- IA64 architecture - Again, in assembler - so we can fiddle with the register stack. + Again, in assembler - so we can fiddle with the register stack, and because + gcc doesn't handle asm-clobbered callee-saves correctly. + + loc0 - loc15: preserved locals + loc16 - loc28: STG registers + loc29: saved ar.pfs + loc30: saved b0 -------------------------------------------------------------------------- */ #ifdef ia64_TARGET_ARCH @@ -500,26 +506,42 @@ void StgRunIsImplementedInAssembler(void) #undef RESERVED_C_STACK_BYTES #define RESERVED_C_STACK_BYTES 1024 -void StgRunIsImplementedInAssembler(void) +static void StgRunIsImplementedInAssembler(void) { __asm__ volatile( ".global StgRun\n" "StgRun:\n" - "\talloc r55 = ar.pfs, 0, 24, 8, 0\n" /* setup register frame */ - "\tmov r54 = b0\n" /* save return address */ - "\tadds sp = -%0, sp\n" /* setup stack */ - "\tld8 r16=[r32],8 ;;\n" /* branch to f using descriptor */ - "\tld8 r1=[r32]\n" - "\tmov b6=r16\n" - "\tbr.few b6 ;;\n" + "\talloc loc29 = ar.pfs, 0, 31, 8, 0\n" /* setup register frame */ + "\tld8 r18 = [r32],8\n" /* get procedure address */ + "\tadds sp = -%0, sp ;;\n" /* setup stack */ + "\tld8 gp = [r32]\n" /* get procedure GP */ + "\tadds r16 = %0-(6*16), sp\n" + "\tadds r17 = %0-(5*16), sp ;;\n" + "\tstf.spill [r16] = f16,32\n" /* spill callee-saved fp regs */ + "\tstf.spill [r17] = f17,32\n" + "\tmov b6 = r18 ;;\n" /* set target address */ + "\tstf.spill [r16] = f18,32\n" + "\tstf.spill [r17] = f19,32\n" + "\tmov loc30 = b0 ;;\n" /* save return address */ + "\tstf.spill [r16] = f20,32\n" + "\tstf.spill [r17] = f21,32\n" + "\tbr.few b6 ;;\n" /* branch to function */ ".global StgReturn\n" "StgReturn:\n" - "\tmov r8 = 0\n" /* return value in r8 */ - "\tmov ar.pfs = r55\n" /* restore register frame */ - "\tmov b0 = r54\n" /* restore return address */ + "\tmov r8 = loc16\n" /* return value in r8 */ + "\tadds r16 = %0-(6*16), sp\n" + "\tadds r17 = %0-(5*16), sp ;;\n" + "\tldf.fill f16 = [r16],32\n" /* start restoring fp regs */ + "\tldf.fill f17 = [r17],32\n" + "\tmov ar.pfs = loc29 ;;\n" /* restore register frame */ + "\tldf.fill f18 = [r16],32\n" + "\tldf.fill f19 = [r17],32\n" + "\tmov b0 = loc30 ;;\n" /* restore return address */ + "\tldf.fill f20 = [r16],32\n" + "\tldf.fill f21 = [r17],32\n" "\tadds sp = %0, sp\n" /* restore stack */ "\tbr.ret.sptk.many b0 ;;\n" /* return */ - : : "i"(RESERVED_C_STACK_BYTES)); + : : "i"(RESERVED_C_STACK_BYTES + 6*16)); } #endif -- 1.7.10.4