$T_X86_BADJMP = '^\tjmp\s+[^\.\*]';
$T_MOVE_DIRVS = '^(\s*(\.(p2)?align\s.*|\.globl\s+\S+|\.text|\.data|\.section\s+.*|\.type\s+.*|\.size\s+\S+\s*,\s*\d+|\.ident.*|\.local.*)\n)';
- $T_COPY_DIRVS = '^\s*\.(globl|type|size|local)';
+ if ( $TargetPlatform =~ /solaris2/ ) {
+ # newer Solaris linkers are picky about .size information, so
+ # omit it (see #1421)
+ $T_COPY_DIRVS = '^\s*\.(globl|local)';
+ } else {
+ $T_COPY_DIRVS = '^\s*\.(globl|type|size|local)';
+ }
$T_DOT_WORD = '\.(long|value|word|byte|zero)';
$T_DOT_GLOBAL = '\.globl';
$T_DOT_WORD = '\.(long|value|byte|zero)';
$T_DOT_GLOBAL = '\.global';
$T_HDR_literal = "\.section\t\.rodata\n";
- $T_HDR_misc = "\.text\n\t\.align 8\n";
+ $T_HDR_misc = "\.text\n\t\.align 16\n"; # May contain code; align like 'entry'
$T_HDR_data = "\.data\n\t\.align 8\n";
$T_HDR_rodata = "\.section\t\.rodata\n\t\.align 8\n";
$T_HDR_closure = "\.data\n\t\.align 8\n";
$T_CONST_LBL = '^\.LC(\d+):$'; # regexp for what such a lbl looks like
$T_POST_LBL = ':';
- $T_MOVE_DIRVS = '^(\s*\.(globl|text|data|section|align|size|type|ident|local)\s+.*\n)';
+ $T_MOVE_DIRVS = '^(\s*\.(globl|text|data|section|align|size|type|ident|local)([ \t].*)?\n)';
$T_COPY_DIRVS = '\.(globl|type|size|local)';
$T_DOT_WORD = '\.(quad|long|value|byte|zero)';
# where x is in the text section and y in the rodata section.
# It works if y is in the text section, though. This is probably
# going to cause difficulties for PIC, I imagine.
+ #
+ # See Note [x86-64-relative] in includes/InfoTables.h
$T_HDR_relrodata= "\.text\n\t\.align 8\n";
$T_HDR_closure = "\.data\n\t\.align 8\n";
$T_HDR_vector = "\t\.text\n\t\.align 2\n";
#--------------------------------------------------------#
+ } elsif ( $TargetPlatform =~ /^x86_64-apple-darwin.*/ ) {
+ # Apple PowerPC Darwin/MacOS X.
+ $T_STABBY = 0; # 1 iff .stab things (usually if a.out format)
+ $T_US = '_'; # _ if symbols have an underscore on the front
+ $T_PRE_APP = 'DOESNT APPLY'; # regexp that says what comes before APP/NO_APP
+ $T_CONST_LBL = '^\LC\d+:'; # regexp for what such a lbl looks like
+ $T_POST_LBL = ':';
+
+ $T_MOVE_DIRVS = '^(\s*(\.align \d+|\.text|\.data|\.const_data|\.cstring|\.non_lazy_symbol_pointer|\.const|\.static_const|\.literal4|\.literal8|\.static_data|\.globl \S+|\.section .*|\.lcomm.*)\n)';
+ $T_COPY_DIRVS = '\.(globl|lcomm)';
+
+ $T_DOT_WORD = '\.(quad|long|short|byte|fill|space)';
+ $T_DOT_GLOBAL = '\.globl';
+ $T_HDR_toc = "\.toc\n";
+ $T_HDR_literal16= "\t\.literal8\n\t\.align 4\n";
+ $T_HDR_literal = "\t\.const\n\t\.align 4\n";
+ $T_HDR_misc = "\t\.text\n\t\.align 2\n";
+ $T_HDR_data = "\t\.data\n\t\.align 2\n";
+ $T_HDR_rodata = "\t\.const\n\t\.align 2\n";
+ $T_HDR_relrodata= "\t\.const_data\n\t\.align 2\n";
+ $T_HDR_closure = "\t\.data\n\t\.align 2\n";
+ $T_HDR_info = "\t\.text\n\t\.align 2\n";
+ $T_HDR_entry = "\t\.text\n\t\.align 2\n";
+ $T_HDR_vector = "\t\.text\n\t\.align 2\n";
+
+ #--------------------------------------------------------#
} elsif ( $TargetPlatform =~ /^powerpc-.*-linux/ ) {
# PowerPC Linux
$T_STABBY = 0; # 1 iff .stab things (usually if a.out format)
$T_CONST_LBL = '^\.LLC(\d+):$'; # regexp for what such a lbl looks like
$T_POST_LBL = ':';
- $T_MOVE_DIRVS = '^((\s+\.align\s+\d+|\s+\.proc\s+\d+|\s+\.global\s+\S+|\.text|\.data|\.seg|\.stab.*|\s+?\.section.*|\s+\.type.*|\s+\.size.*)\n)';
- $T_COPY_DIRVS = '\.(global|globl|proc|stab)';
+ $T_MOVE_DIRVS = '^((\s+\.align\s+\d+|\s+\.proc\s+\d+|\s+\.global\s+\S+|\s+\.local\s+\S+|\.text|\.data|\.seg|\.stab.*|\s+?\.section.*|\s+\.type.*|\s+\.size.*)\n)';
+ $T_COPY_DIRVS = '\.(global|local|globl|proc|stab)';
$T_DOT_WORD = '\.(long|word|nword|xword|byte|half|short|skip|uahalf|uaword)';
$T_DOT_GLOBAL = '^\t\.global';
local($*) = 1;
local($i, $c);
+ # ia64-specific information for code chunks
+ my $ia64_locnum;
+ my $ia64_outnum;
&init_TARGET_STUFF();
&init_FUNNY_THINGS();
# Labels ending "_str": these are literal strings.
} elsif ( /^${T_US}([A-Za-z0-9_]+)_str${T_POST_LBL}$/ ) {
$chk[++$i] = $_;
- $chkcat[$i] = 'rodata';
+ $chkcat[$i] = 'relrodata';
$chksymb[$i] = '';
} elsif ( $TargetPlatform =~ /-darwin/
&& (/^\s*\.subsections_via_symbols/
$chkcat[$i] = 'data';
$chksymb[$i] = '';
- } elsif ( /^${T_US}([A-Za-z0-9_]+)_hpc?${T_POST_LBL}$/o ) {
+ } elsif ( /^${T_US}([A-Za-z0-9_]+)_hpc${T_POST_LBL}$/o ) {
# hpc shares tick boxes across modules
$chk[++$i] = $_;
$chkcat[$i] = 'data';
}
}
$numchks = $#chk + 1;
+ $chk[$numchks] = ''; # We might push .note.GNU-stack into this
+ $chkcat[$numchks] = 'verbatim'; # If we do, write it straight back out
# open CHUNKS, ">/tmp/chunks1" or die "Cannot open /tmp/chunks1: $!\n";
# for (my $i = 0; $i < @chk; ++$i) { print CHUNKS "======= $i =======\n", $chk[$i] }
# (see elsewhere)
$c = &hppa_mash_prologue($c) if $TargetPlatform =~ /^hppa-/;
+ undef $ia64_locnum;
+ undef $ia64_outnum;
+
# be slightly paranoid to make sure there's
# nothing surprising in there
if ( $c =~ /--- BEGIN ---/ ) {
} elsif ($TargetPlatform =~ /^ia64-/) {
$p =~ s/^\t\.prologue .*\n//;
- $p =~ s/^\t\.save ar\.pfs, r\d+\n\talloc r\d+ = ar\.pfs, 0, 3[12], \d+, 0\n//;
+
+ # Record the number of local and out registers for register relocation later
+ $p =~ s/^\t\.save ar\.pfs, r\d+\n\talloc r\d+ = ar\.pfs, 0, (\d+), (\d+), 0\n//;
+ $ia64_locnum = $1;
+ $ia64_outnum = $2;
+
$p =~ s/^\t\.fframe \d+\n\tadds r12 = -\d+, r12\n//;
$p =~ s/^\t\.save rp, r\d+\n\tmov r\d+ = b0\n//;
- $p =~ s/^\t\.(mii|mmi)\n//g; # bundling is no longer sensible
+
+ # Ignore save/restore of these registers; they're taken
+ # care of in StgRun()
+ $p =~ s/^\t\.save ar\.lc, r\d+\n//;
+ $p =~ s/^\t\.save pr, r\d+\n//;
+ $p =~ s/^\tmov r\d+ = ar\.lc\n//;
+ $p =~ s/^\tmov r\d+ = pr\n//;
+
+ # Remove .proc and .body directives
+ $p =~ s/^\t\.proc [a-zA-Z0-9_.]+#\n//;
+ $p =~ s/^\t\.body\n//;
+
+ # If there's a label, move it to the body
+ if ($p =~ /^[a-zA-Z0-9.]+:\n/) {
+ $p = $` . $';
+ $r = $& . $r;
+ }
+
+ # Remove floating-point spill instructions.
+ # Only fp registers 2-5 and 16-23 are saved by the runtime.
+ if ($p =~ s/^\tstf\.spill \[r1[4-9]\] = f([2-5]|1[6-9]|2[0-3])(, [0-9]+)?\n//g) {
+ # Being paranoid, only try to remove these if we saw a
+ # spill operation.
+ $p =~ s/^\tmov r1[4-9] = r12\n//;
+ $p =~ s/^\tadds r1[4-9] = -[0-9]+, r12\n//g;
+ $p =~ s/^\t\.save\.f 0x[0-9a-fA-F]\n//g;
+ $p =~ s/^\t\.save\.gf 0x0, 0x[0-9a-fA-F]+\n//g;
+ }
+
+ $p =~ s/^\tnop(?:\.[mifb])?\s+\d+\n//g; # remove nop instructions
+ $p =~ s/^\t\.(mii|mmi|mfi)\n//g; # bundling is no longer sensible
$p =~ s/^\t;;\n//g; # discard stops
$p =~ s/^\t\/\/.*\n//g; # gcc inserts timings in // comments
- # GCC 3.3 saves r1 in the prologue, move this to the body
- if ($p =~ /^\tmov r\d+ = r1\n/) {
- $p = $` . $';
- $r = $& . $r;
- }
+ # GCC 3.3 saves r1 in the prologue, move this to the body
+ # (Does this register get restored anywhere?)
+ if ($p =~ /^\tmov r\d+ = r1\n/) {
+ $p = $` . $';
+ $r = $& . $r;
+ }
} elsif ($TargetPlatform =~ /^m68k-/) {
$p =~ s/^\tlink a6,#-?\d.*\n//;
$p =~ s/^\tpea a6@\n\tmovel sp,a6\n//;
# toss all epilogue stuff; again, paranoidly
if ( $c =~ /--- END ---/ ) {
- if (($r, $e) = split(/--- END ---/, $c)) {
+ # Gcc may decide to replicate the function epilogue. We want
+ # to process all epilogues, so we split the function and then
+ # loop here.
+ @fragments = split(/--- END ---/, $c);
+ $r = shift(@fragments);
+
+ # Rebuild `c'; processed fragments will be appended to `c'
+ $c = $r;
+
+ foreach $e (@fragments) {
+ # etail holds code that is after the epilogue in the assembly-code
+ # layout and should not be filtered as part of the epilogue.
+ $etail = "";
if ($TargetPlatform =~ /^i386-/) {
$e =~ s/^\tret\n//;
$e =~ s/^\tpopl\s+\%edi\n//;
$e =~ s/^\taddl\s+\$\d+,\s*\%esp\n//;
$e =~ s/^\tsubl\s+\$-\d+,\s*\%esp\n//;
} elsif ($TargetPlatform =~ /^ia64-/) {
- $e =~ s/^\tmov ar\.pfs = r\d+\n//;
- $e =~ s/^\tmov b0 = r\d+\n//;
- $e =~ s/^\t\.restore sp\n\tadds r12 = \d+, r12\n//;
- $e =~ s/^\tbr\.ret\.sptk\.many b0\n//;
- $e =~ s/^\t\.(mii|mmi|mib)\n//g; # bundling is no longer sensible
- $e =~ s/^\t;;\n//g; # discard stops - stop at end of body is sufficient
- $e =~ s/^\t\/\/.*\n//g; # gcc inserts timings in // comments
+ # The epilogue is first split into:
+ # $e, the epilogue code (up to the return instruction)
+ # $etail, non-epilogue code (after the return instruction)
+ # The return instruction is stripped in the process.
+ if (!(($e, $etail) = split(/^\tbr\.ret\.sptk\.many b0\n/, $e))) {
+ die "Epilogue doesn't seem to have one return instruction: $e\n";
+ }
+ # Remove 'endp' directive from the tail
+ $etail =~ s/^\t\.endp [a-zA-Z0-9_.]+#\n//;
+
+ # If a return value is saved here, discard it
+ $e =~ s/^\tmov r8 = r14\n//;
+
+ # Remove floating-point fill instructions.
+ # Only fp registers 2-5 and 16-23 are saved by the runtime.
+ if ($e =~ s/^\tldf\.fill f([2-5]|1[6-9]|2[0-3]) = \[r1[4-9]\](, [0-9]+)?\n//g) {
+ # Being paranoid, only try to remove this if we saw a fill
+ # operation.
+ $e =~ s/^\tadds r1[4-9] = [0-9]+, r12//g;
+ }
+
+ $e =~ s/^\tnop(?:\.[mifb])?\s+\d+\n//g; # remove nop instructions
+ $e =~ s/^\tmov ar\.pfs = r\d+\n//;
+ $e =~ s/^\tmov ar\.lc = r\d+\n//;
+ $e =~ s/^\tmov pr = r\d+, -1\n//;
+ $e =~ s/^\tmov b0 = r\d+\n//;
+ $e =~ s/^\t\.restore sp\n\tadds r12 = \d+, r12\n//;
+ #$e =~ s/^\tbr\.ret\.sptk\.many b0\n//; # already removed
+ $e =~ s/^\t\.(mii|mmi|mfi|mib)\n//g; # bundling is no longer sensible
+ $e =~ s/^\t;;\n//g; # discard stops - stop at end of body is sufficient
+ $e =~ s/^\t\/\/.*\n//g; # gcc inserts timings in // comments
} elsif ($TargetPlatform =~ /^m68k-/) {
$e =~ s/^\tunlk a6\n//;
$e =~ s/^\trts\n//;
print STDERR "WARNING: Epilogue junk?: $e\n" if $e =~ /^\t\s*[^\.\s\n]/;
# glue together what's left
- $c = $r . $e;
- $c =~ s/\n\t\n/\n/; # junk blank line
+ $c .= $e . $etail;
}
+ $c =~ s/\n\t\n/\n/; # junk blank line
}
+ else {
+ if ($TargetPlatform =~ /^ia64-/) {
+ # On IA64, remove an .endp directive even if no epilogue was found.
+ # Code optimizations may have removed the "--- END ---" token.
+ $c =~ s/^\t\.endp [a-zA-Z0-9_.]+#\n//;
+ }
+ }
# On SPARCs, we don't do --- BEGIN/END ---, we just
# toss the register-windowing save/restore/ret* instructions
$c =~ s/^\tbl\s+\.__DISCARD__\n\s+nop\n//go if $TargetPlatform =~ /^powerpc64-.*-linux/;
$c =~ s/^\tcall\s+L___DISCARD__\$stub\n//go if $TargetPlatform =~ /i386-apple-darwin.*/;
- # IA64: mangle tailcalls into jumps here
- if ($TargetPlatform =~ /^ia64-/) {
- while ($c =~ s/^\tbr\.call\.sptk\.many b0 = (.*)\n(?:^\.L([0-9]*):\n)?(?:\t;;\n)?(?:\tmov r1 = r\d+\n)?(?:\t;;\n)?\t--- TAILCALL ---\n(?:\t;;\n\tbr \.L\d+\n)?/\tbr\.few $1\n/) {
- # Eek, the gcc optimiser is getting smarter... if we see a jump to the --- TAILCALL ---
- # marker then we reapply the substitution at the source sites
- $c =~ s/^\tbr \.L$2\n/\t--- TAILCALL ---\n/g if ($2);
- }
+ # IA64: fix register allocation; mangle tailcalls into jumps
+ if ($TargetPlatform =~ /^ia64-/) {
+ ia64_rename_registers($ia64_locnum, $ia64_outnum) if (defined($ia64_locnum));
+ ia64_mangle_tailcalls();
}
# MIPS: that may leave some gratuitous asm macros around
}
}
- elsif ( $i < ($numchks - 1)
- && ( $to_move =~ /${T_COPY_DIRVS}/
- || ($TargetPlatform =~ /^hppa/ && $to_move =~ /align/ && $chkcat[$i+1] eq 'literal') )) {
- $chk[$i + 1] = $to_move . $chk[$i + 1];
- # otherwise they're tossed
- }
+ elsif ( ( $i < ($numchks - 1)
+ && ( $to_move =~ /${T_COPY_DIRVS}/
+ || ( $TargetPlatform =~ /^hppa/
+ && $to_move =~ /align/
+ && $chkcat[$i+1] eq 'literal')
+ )
+ )
+ || ($to_move =~ /^[ \t]*\.section[ \t]+\.note\.GNU-stack,/)
+ ) {
+ $chk[$i + 1] = $to_move . $chk[$i + 1];
+ # otherwise they're tossed
+ }
$c =~ s/${T_MOVE_DIRVS}FUNNY#END#THING/FUNNY#END#THING/o;
}
}
}
- for ($i = $FIRST_MANGLABLE; $i < $numchks; $i++) {
+ # $numchks + 1 as we have the extra one for .note.GNU-stack
+ for ($i = $FIRST_MANGLABLE; $i < $numchks + 1; $i++) {
# print STDERR "$i: cat $chkcat[$i], symb $chksymb[$i]\n";
next if $chkcat[$i] eq 'DONE ALREADY';
&print_doctored($chk[$i], 0);
}
+ } elsif ( $chkcat[$i] eq 'verbatim' ) {
+ print OUTASM $chk[$i];
+
} elsif ( $chkcat[$i] eq 'toss' ) {
print STDERR "*** NB: TOSSING code for $chksymb[$i] !!! ***\n";
}
\end{code}
+On IA64, tail calls are converted to branches at this point. The mangler
+searches for function calls immediately followed by a '--- TAILCALL ---'
+token. Since the compiler can put various combinations of labels, bundling
+directives, nop instructions, stops, and a move of the return value
+between the branch and the tail call, proper matching of the tail call
+gets a little hairy. This subroutine does the mangling.
+
+Here is an example of a tail call before mangling:
+
+\begin{verbatim}
+ br.call.sptk.many b0 = b6
+.L211
+ ;;
+ .mmi
+ mov r1 = r32
+ ;;
+ nop.m 0
+ nop.i 0
+ ;;
+ --- TAILCALL --
+ ;;
+.L123
+\end{verbatim}
+
+\begin{code}
+sub ia64_mangle_tailcalls {
+ # Function input and output are in $c
+
+ # Construct the tailcall-mangling expression the first time this function
+ # is called.
+ if (!defined($IA64_MATCH_TAILCALL)) {
+ # One-line pattern matching constructs. None of these
+ # should bind references; all parenthesized terms
+ # should be (?:) terms.
+ my $stop = q/(?:\t;;\n)/;
+ my $bundle = q/(?:\t\.(?:mii|mib|mmi|mmb|mfi|mfb|mbb|bbb)\n)/;
+ my $nop = q/(?:\tnop(?:\.[mifb])?\s+\d+\n)/;
+ my $movgp = q/(?:\tmov r1 = r\d+\n)/;
+ my $postbr = q/(?:\tbr \.L\d+\n)/;
+
+ my $noeffect = "(?:$stop$bundle?|$nop)*";
+ my $postbundle = "(?:$bundle?$nop?$nop?$postbr)?";
+
+ # Important parts of the pattern match. The branch target
+ # and subsequent jump label are bound to $1 and $2
+ # respectively. Sometimes there is no label.
+ my $callbr = q/^\tbr\.call\.sptk\.many b0 = (.*)\n/;
+ my $label = q/(?:^\.L([0-9]*):\n)/;
+ my $tailcall = q/\t--- TAILCALL ---\n/;
+
+ $IA64_MATCH_TAILCALL =
+ $callbr . $label . '?' . $noeffect . $movgp . '?' . $noeffect .
+ $tailcall . $stop . '?' . '(?:' . $postbundle . ')?';
+ }
+
+ # Find and mangle tailcalls
+ while ($c =~ s/$IA64_MATCH_TAILCALL/\tbr\.few $1\n/o) {
+ # Eek, the gcc optimiser is getting smarter... if we see a jump to the
+ # --- TAILCALL --- marker then we reapply the substitution at the source sites
+ $c =~ s/^\tbr \.L$2\n/\t--- TAILCALL ---\n/g if ($2);
+ }
+
+ # Verify that all instances of TAILCALL were processed
+ if ($c =~ /^\t--- TAILCALL ---\n/) {
+ die "Unmangled TAILCALL tokens remain after mangling"
+ }
+}
+\end{code}
+
+The number of registers allocated on the IA64 register stack is set
+upon entry to the runtime with an `alloc' instruction at the entry
+point of \verb+StgRun()+. Gcc uses its own `alloc' to allocate
+however many registers it likes in each function. When we discard
+gcc's alloc, we have to reconcile its register assignment with what
+the STG uses.
+
+There are three stack areas: fixed registers, input/local registers,
+and output registers. We move the output registers to the output
+register space and leave the other registers where they are.
+
+\begin{code}
+sub ia64_rename_registers() {
+ # The text to be mangled is in $c
+ # Find number of registers in each stack area
+ my ($loc, $out) = @_;
+ my $cout;
+ my $first_out_reg;
+ my $regnum;
+ my $fragment;
+
+ # These are the register numbers used in the STG runtime
+ my $STG_FIRST_OUT_REG = 32 + 34;
+ my $STG_LAST_OUT_REG = $STG_FIRST_OUT_REG + 7;
+
+ $first_out_reg = 32 + $loc;
+
+ if ($first_out_reg > $STG_FIRST_OUT_REG) {
+ die "Too many local registers allocated by gcc";
+ }
+
+ # Split the string into fragments containing one register name each.
+ # Rename the register in each fragment and concatenate.
+ $cout = "";
+ foreach $fragment (split(/(?=r\d+[^a-zA-Z0-9_.])/s, $c)) {
+ if ($fragment =~ /^r(\d+)((?:[^a-zA-Z0-9_.].*)?)$/s) {
+ $regnum = $1;
+
+ if ($regnum < $first_out_reg) {
+ # This is a local or fixed register
+
+ # Local registers 32 and 33 (r64 and r65) are
+ # used to hold saved state; they shouldn't be touched
+ if ($regnum == 64 || $regnum == 65) {
+ die "Reserved register $regnum is in use";
+ }
+ }
+ else {
+ # This is an output register
+ $regnum = $regnum - $first_out_reg + $STG_FIRST_OUT_REG;
+ if ($regnum > $STG_LAST_OUT_REG) {
+ die "Register number ($regnum) is out of expected range";
+ }
+ }
+
+ # Update this fragment
+ $fragment = "r" . $regnum . $2;
+ }
+ $cout .= $fragment;
+ }
+
+ $c = $cout;
+}
+
+\end{code}
+
\begin{code}
sub hppa_mash_prologue { # OK, epilogue, too
local($_) = @_;
$line =~ s/$infoname/0/
|| $line =~ s/([A-Za-z0-9_]+_srtd)$/\1 - $infoname/
|| $line =~ s/([A-Za-z0-9_]+_srt(\+\d+)?)$/\1 - $infoname/
+ || $line =~ s/([A-Za-z0-9_]+_str)$/\1 - $infoname/
|| $line =~ s/([A-Za-z0-9_]+_slow)$/\1 - $infoname/
|| $line =~ s/([A-Za-z0-9_]+_btm)$/\1 - $infoname/
|| $line =~ s/([A-Za-z0-9_]+_alt)$/\1 - $infoname/
}
}
+ if ( $TargetPlatform =~ /x86_64-apple-darwin/ ) {
+ # Tack a label to the front of the info table, too.
+ # For now, this just serves to work around a crash in Apple's new
+ # 64-bit linker (it seems to assume that there is no data before the
+ # first label in a section).
+
+ # The plan for the future is to do this on all Darwin platforms, and
+ # to add a reference to this label after the entry code, just as the
+ # NCG does, so we can enable dead-code-stripping in the linker without
+ # losing our info tables. (Hence the name _dsp, for dead-strip preventer)
+
+ $before .= "\n${infoname}_dsp:\n";
+ }
+
$tbl = $before
. (($TargetPlatform !~ /^hppa/) ? '' : join("\n", @imports) . "\n")
. join("\n", @words) . "\n"