X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=ghc%2Fdriver%2Fmangler%2Fghc-asm.lprl;h=e53680e8782313f69a46711635ab91b28ffc9e1f;hb=fb7a723bfd7650a705cb226e07c5b08b7a8e9279;hp=e640f423dd8007fb3fac7b7ba6c2b062feda3e8d;hpb=b76a58ad69739031015e5494fcd731bbe1a1bdf4;p=ghc-hetmet.git diff --git a/ghc/driver/mangler/ghc-asm.lprl b/ghc/driver/mangler/ghc-asm.lprl index e640f42..e53680e 100644 --- a/ghc/driver/mangler/ghc-asm.lprl +++ b/ghc/driver/mangler/ghc-asm.lprl @@ -13,6 +13,18 @@ stuff to do with the C stack. Any other required tidying up. \end{itemize} +General note [chak]: Many regexps are very fragile because they rely on white +space being in the right place. This caused trouble with gcc 2.95 (at least +on Linux), where the use of white space in .s files generated by gcc suddenly +changed. To guarantee compatibility across different versions of gcc, make +sure (at least on i386-.*-linux) that regexps tolerate varying amounts of white +space between an assembler statement and its arguments as well as after a the +comma separating multiple arguments. + +\emph{For the time being, I have corrected the regexps for i386-.*-linux. I +didn't touch all the regexps for other i386 platforms, as I don't have +a box to test these changes.} + HPPA specific notes: \begin{itemize} \item @@ -42,6 +54,32 @@ for the same reason. Advantage: No more ridiculous call sequences. %************************************************************************ %* * +\subsection{Top-level code} +%* * +%************************************************************************ + +\begin{code} +$TargetPlatform = $TARGETPLATFORM; + +($Pgm = $0) =~ s|.*/||; +$ifile = $ARGV[0]; +$ofile = $ARGV[1]; + +if ( $TargetPlatform =~ /^i386-/ ) { + if ($ARGV[2] eq '') { + $StolenX86Regs = 4; + } else { + $StolenX86Regs = $ARGV[2]; + } +} + +&mangle_asm($ifile,$ofile); + +exit(0); +\end{code} + +%************************************************************************ +%* * \subsection{Constants for various architectures} %* * %************************************************************************ @@ -55,21 +93,21 @@ sub init_TARGET_STUFF { $T_STABBY = 0; # 1 iff .stab things (usually if a.out format) $T_US = ''; # _ if symbols have an underscore on the front $T_PRE_APP = 'DONT THINK THIS APPLIES'; # regexp that says what comes before APP/NO_APP - $T_CONST_LBL = '^\$C(\d+):$'; # regexp for what such a lbl looks like + $T_CONST_LBL = '^\$L?C(\d+):$'; # regexp for what such a lbl looks like $T_POST_LBL = ':'; $T_MOVE_DIRVS = '^(\s*(\.align\s+\d+|\.(globl|ent)\s+\S+|\#.*|\.(file|loc)\s+\S+\s+\S+|\.text|\.r?data)\n)'; $T_COPY_DIRVS = '^\s*(\#|\.(file|globl|ent|loc))'; $T_hsc_cc_PAT = '\.ascii.*\)(hsc|cc) (.*)\\\\11"\n\t\.ascii\s+"(.*)\\\\0"'; - $T_DOT_WORD = '\.quad'; + $T_DOT_WORD = '\.(long|quad|byte|word)'; $T_DOT_GLOBAL = '^\t\.globl'; $T_HDR_literal = "\.rdata\n\t\.align 3\n"; $T_HDR_misc = "\.text\n\t\.align 3\n"; $T_HDR_data = "\.data\n\t\.align 3\n"; $T_HDR_consist = "\.text\n"; $T_HDR_closure = "\.data\n\t\.align 3\n"; - $T_HDR_srt = "\.data\n\t\.align 3\n"; + $T_HDR_srt = "\.text\n\t\.align 3\n"; $T_HDR_info = "\.text\n\t\.align 3\n"; $T_HDR_entry = "\.text\n\t\.align 3\n"; $T_HDR_fast = "\.text\n\t\.align 3\n"; @@ -89,7 +127,7 @@ sub init_TARGET_STUFF { $T_COPY_DIRVS = '^\s+\.(IMPORT|EXPORT)'; $T_hsc_cc_PAT = '\.STRING.*\)(hsc|cc) (.*)\\\\x09(.*)\\\\x00"'; - $T_DOT_WORD = '\.word'; + $T_DOT_WORD = '\.(blockz|word|half|byte)'; $T_DOT_GLOBAL = '^\s+\.EXPORT'; $T_HDR_literal = "\t.SPACE \$TEXT\$\n\t.SUBSPA \$LIT\$\n"; $T_HDR_misc = "\t.SPACE \$TEXT\$\n\t.SUBSPA \$CODE\$\n\t\.align 4\n"; @@ -104,7 +142,7 @@ sub init_TARGET_STUFF { $T_HDR_direct = "\t.SPACE \$TEXT\$\n\t.SUBSPA \$CODE\$\n\t\.align 4\n"; #--------------------------------------------------------# - } elsif ( $TargetPlatform =~ /^i386-.*-(linuxaout|freebsd2|nextstep3|cygwin32|mingw32)$/ ) { + } elsif ( $TargetPlatform =~ /^i386-.*-(linuxaout|freebsd2|openbsd|nextstep3|cygwin32|mingw32)$/ ) { # NeXT added but not tested. CaS $T_STABBY = 1; # 1 iff .stab things (usually if a.out format) @@ -126,7 +164,8 @@ sub init_TARGET_STUFF { $T_HDR_data = "\.data\n\t\.align 2\n"; $T_HDR_consist = "\.text\n"; $T_HDR_closure = "\.data\n\t\.align 2\n"; - $T_HDR_srt = "\.data\n\t\.align 2\n"; + $T_HDR_closure = "\.data\n\t\.align 2\n\t.long 0\n" if ( $TargetPlatform =~ /.*-mingw32$/ ); + $T_HDR_srt = "\.text\n\t\.align 2\n"; $T_HDR_info = "\.text\n\t\.align 2\n"; # NB: requires padding $T_HDR_entry = "\.text\n"; # no .align so we're right next to _info (arguably wrong...?) $T_HDR_fast = "\.text\n\t\.align 2,0x90\n"; @@ -134,22 +173,22 @@ sub init_TARGET_STUFF { $T_HDR_direct = "\.text\n\t\.align 2,0x90\n"; #--------------------------------------------------------# - } elsif ( $TargetPlatform =~ /^i386-.*-(solaris2|linux|freebsd3)$/ ) { + } elsif ( $TargetPlatform =~ /^i386-.*-(solaris2|linux|freebsd|netbsd)$/ ) { $T_STABBY = 0; # 1 iff .stab things (usually if a.out format) $T_US = ''; # _ if symbols have an underscore on the front $T_PRE_APP = # regexp that says what comes before APP/NO_APP - ($TargetPlatform =~ /-(linux|freebsd3)$/) ? '#' : '/' ; + ($TargetPlatform =~ /-(linux|freebsd|netbsd)$/) ? '#' : '/' ; $T_CONST_LBL = '^\.LC(\d+):$'; # regexp for what such a lbl looks like $T_POST_LBL = ':'; $T_X86_PRE_LLBL_PAT = '\.L'; $T_X86_PRE_LLBL = '.L'; - $T_X86_BADJMP = '^\tjmp [^\.\*]'; + $T_X86_BADJMP = '^\tjmp\s+[^\.\*]'; - $T_MOVE_DIRVS = '^(\s*(\.(p2)?align\s+\d+(,0x90)?|\.globl\s+\S+|\.text|\.data|\.section\s+.*|\.type\s+.*|\.Lfe.*\n\t\.size\s+.*|\.size\s+.*|\.ident.*)\n)'; + $T_MOVE_DIRVS = '^(\s*(\.(p2)?align\s+\d+(,\s*0x90)?|\.globl\s+\S+|\.text|\.data|\.section\s+.*|\.type\s+.*|\.Lfe.*\n\s*\.size\s+.*|\.size\s+.*|\.ident.*)\n)'; $T_COPY_DIRVS = '\.(globl)'; - if ( $TargetPlatform =~ /freebsd3/ ) { + if ( $TargetPlatform =~ /freebsd|netbsd/ ) { $T_hsc_cc_PAT = '\.ascii.*\)(hsc|cc) (.*)\\\\11"\n\t\.ascii\s+"(.*)\\\\0"'; } else { $T_hsc_cc_PAT = '\.string.*\)(hsc|cc) (.*)\\\\t(.*)"'; @@ -162,12 +201,12 @@ sub init_TARGET_STUFF { $T_HDR_data = "\.data\n\t\.align 4\n"; # ToDo: change align?? $T_HDR_consist = "\.text\n"; $T_HDR_closure = "\.data\n\t\.align 4\n"; # ToDo: change align? - $T_HDR_srt = "\.data\n\t\.align 4\n"; # ToDo: change align? - $T_HDR_info = "\.text\n\t\.align 16\n"; # NB: requires padding + $T_HDR_srt = "\.text\n\t\.align 4\n"; # ToDo: change align? + $T_HDR_info = "\.text\n\t\.align 4\n"; # NB: requires padding $T_HDR_entry = "\.text\n"; # no .align so we're right next to _info (arguably wrong...?) - $T_HDR_fast = "\.text\n\t\.align 16\n"; - $T_HDR_vector = "\.text\n\t\.align 16\n"; # NB: requires padding - $T_HDR_direct = "\.text\n\t\.align 16\n"; + $T_HDR_fast = "\.text\n\t\.align 4\n"; + $T_HDR_vector = "\.text\n\t\.align 4\n"; # NB: requires padding + $T_HDR_direct = "\.text\n\t\.align 4\n"; #--------------------------------------------------------# } elsif ( $TargetPlatform =~ /^m68k-.*-sunos4/ ) { @@ -189,7 +228,7 @@ sub init_TARGET_STUFF { $T_HDR_data = "\.data\n\t\.even\n"; $T_HDR_consist = "\.text\n"; $T_HDR_closure = "\.data\n\t\.even\n"; - $T_HDR_srt = "\.data\n\t\.even\n"; + $T_HDR_srt = "\.text\n\t\.even\n"; $T_HDR_info = "\.text\n\t\.even\n"; $T_HDR_entry = "\.text\n\t\.even\n"; $T_HDR_fast = "\.text\n\t\.even\n"; @@ -216,7 +255,7 @@ sub init_TARGET_STUFF { $T_HDR_data = "\t\.data\n\t\.align 2\n"; $T_HDR_consist = 'TOO LAZY TO DO THIS TOO'; $T_HDR_closure = "\t\.data\n\t\.align 2\n"; - $T_HDR_srt = "\t\.data\n\t\.align 2\n"; + $T_HDR_srt = "\t\.text\n\t\.align 2\n"; $T_HDR_info = "\t\.text\n\t\.align 2\n"; $T_HDR_entry = "\t\.text\n\t\.align 2\n"; $T_HDR_fast = "\t\.text\n\t\.align 2\n"; @@ -264,7 +303,7 @@ sub init_TARGET_STUFF { $T_COPY_DIRVS = '\.(global|proc|stab)'; $T_hsc_cc_PAT = '\.asciz.*\)(hsc|cc) (.*)\\\\t(.*)"'; - $T_DOT_WORD = '\.(word|byte|half|skip)'; + $T_DOT_WORD = '\.(word|byte|half|skip|uahalf|uaword)'; $T_DOT_GLOBAL = '^\t\.global'; $T_HDR_literal = "\.text\n\t\.align 8\n"; $T_HDR_misc = "\.text\n\t\.align 4\n"; @@ -359,15 +398,6 @@ sub mangle_asm { &init_TARGET_STUFF(); &init_FUNNY_THINGS(); - # perl4 on alphas SEGVs when give ${foo} substitutions in patterns. - # To avoid them we declare some locals that allows to avoid using curlies. - local($TUS) = ${T_US}; - local($TPOSTLBL) = ${T_POST_LBL}; - local($TMOVEDIRVS) = ${T_MOVE_DIRVS}; - local($TPREAPP) = ${T_PRE_APP}; - local($TCOPYDIRVS) = ${T_COPY_DIRVS}; - local($TDOTWORD) = ${T_DOT_WORD}; - open(INASM, "< $in_asmf") || &tidy_up_and_die(1,"$Pgm: failed to open `$in_asmf' (to read)\n"); open(OUTASM,"> $out_asmf") @@ -391,10 +421,11 @@ sub mangle_asm { $i = 0; $chkcat[0] = 'misc'; $chk[0] = ''; while () { - next if $T_STABBY && /^\.stab.*$TUS[@]?__stg_split_marker/o; + tr/\r//d if $TargetPlatform =~ /-mingw32$/; + next if $T_STABBY && /^\.stab.*${T_US}__stg_split_marker/o; next if $T_STABBY && /^\.stab.*ghc.*c_ID/; next if /^\t\.def.*endef$/; - next if /$TPREAPP(NO_)?APP/o; + next if /${T_PRE_APP}(NO_)?APP/o; next if /^;/ && $TargetPlatform =~ /^hppa/; next if /(^$|^\t\.file\t|^ # )/ && $TargetPlatform =~ /^(mips|powerpc|rs6000)-/; @@ -402,7 +433,7 @@ sub mangle_asm { last if /^_section_\.text:$/ && $TargetPlatform =~ /^powerpc-|^rs6000-/; if ( $TargetPlatform =~ /^mips-/ - && /^\t\.(globl \S+ \.text|comm\t)/ ) { + && /^\t\.(globl\S+\.text|comm\t)/ ) { $EXTERN_DECLS .= $_ unless /(__DISCARD__|\b(PK_|ASSIGN_)(FLT|DBL)\b)/; # As a temporary solution for compiling "foreign export" declarations, @@ -434,12 +465,12 @@ sub mangle_asm { $chkcat[$i] = 'literal'; $chksymb[$i] = $1; - } elsif ( /^$TUS[@]?__stg_split_marker(\d+)$TPOSTLBL[@]?$/o ) { + } elsif ( /^${T_US}__stg_split_marker(\d*)${T_POST_LBL}$/o ) { $chk[++$i] = $_; $chkcat[$i] = 'splitmarker'; $chksymb[$i] = $1; - } elsif ( /^$TUS[@]?([A-Za-z0-9_]+)_info$TPOSTLBL[@]?$/o ) { + } elsif ( /^${T_US}([A-Za-z0-9_]+)_info${T_POST_LBL}$/o ) { $symb = $1; $chk[++$i] = $_; $chkcat[$i] = 'infotbl'; @@ -449,46 +480,50 @@ sub mangle_asm { $infochk{$symb} = $i; - } elsif ( /^$TUS[@]?([A-Za-z0-9_]+)_(entry|ret)$TPOSTLBL[@]?$/o ) { + } elsif ( /^${T_US}([A-Za-z0-9_]+)_(entry|ret)${T_POST_LBL}$/o ) { $chk[++$i] = $_; $chkcat[$i] = 'slow'; $chksymb[$i] = $1; $slowchk{$1} = $i; - } elsif ( /^$TUS[@]?([A-Za-z0-9_]+)_fast\d*$TPOSTLBL[@]?$/o ) { + } elsif ( /^${T_US}([A-Za-z0-9_]+)_fast\d*${T_POST_LBL}$/o ) { $chk[++$i] = $_; $chkcat[$i] = 'fast'; $chksymb[$i] = $1; $fastchk{$1} = $i; - } elsif ( /^$TUS[@]?([A-Za-z0-9_]+)_closure$TPOSTLBL[@]?$/o ) { + } elsif ( /^${T_US}([A-Za-z0-9_]+)_closure${T_POST_LBL}$/o ) { $chk[++$i] = $_; $chkcat[$i] = 'closure'; $chksymb[$i] = $1; $closurechk{$1} = $i; - } elsif ( /^$TUS[@]?([A-Za-z0-9_]+)_srt$TPOSTLBL[@]?$/o ) { + } elsif ( /^${T_US}([A-Za-z0-9_]+)_srt${T_POST_LBL}$/o ) { $chk[++$i] = $_; $chkcat[$i] = 'srt'; $chksymb[$i] = $1; $srtchk{$1} = $i; - } elsif ( /^$TUS[@]?ghc.*c_ID$TPOSTLBL/o ) { + } elsif ( /^${T_US}([A-Za-z0-9_]+)_ct${T_POST_LBL}$/o ) { + $chk[++$i] = $_; + $chkcat[$i] = 'data'; + $chksymb[$i] = ''; + + } elsif ( /^${T_US}ghc.*c_ID${T_POST_LBL}/o ) { $chk[++$i] = $_; $chkcat[$i] = 'consist'; - } elsif ( /^($TUS[@]?__gnu_compiled_c|gcc2_compiled\.)$TPOSTLBL/o ) { + } elsif ( /^(${T_US}__gnu_compiled_c|gcc2_compiled\.)${T_POST_LBL}/o ) { ; # toss it - } elsif ( /^$TUS[A-Za-z0-9_]+\.\d+$TPOSTLBL[@]?$/o - || /^$TUS[@]?.*_CAT$TPOSTLBL[@]?$/o # PROF: _entryname_CAT - || /^$TUS[@]?CC_.*_struct$TPOSTLBL[@]?$/o # PROF: _CC_ccident_struct - || /^$TUS[@]?.*_done$TPOSTLBL[@]?$/o # PROF: _module_done - || /^$TUS[@]?_module_registered$TPOSTLBL[@]?$/o # PROF: _module_registered + } elsif ( /^${T_US}[A-Za-z0-9_]+\.\d+${T_POST_LBL}$/o + || /^${T_US}.*_CAT${T_POST_LBL}$/o # PROF: _entryname_CAT + || /^${T_US}.*_done${T_POST_LBL}$/o # PROF: _module_done + || /^${T_US}_module_registered${T_POST_LBL}$/o # PROF: _module_registered ) { $chk[++$i] = $_; $chkcat[$i] = 'data'; @@ -504,20 +539,20 @@ sub mangle_asm { $chkcat[$i] = 'toc'; $chksymb[$i] = $1; - } elsif ( /^$TUS[@]?CC(S)?_.*$/ ) { + } elsif ( /^${T_US}([A-Za-z0-9_]+)_cc(s)?${T_POST_LBL}$/o ) { # all CC_ symbols go in the data section... $chk[++$i] = $_; $chkcat[$i] = 'data'; $chksymb[$i] = ''; - } elsif ( /^$TUS[@]?([A-Za-z0-9_]+)_(alt|dflt)$TPOSTLBL[@]?$/o ) { + } elsif ( /^${T_US}([A-Za-z0-9_]+)_(alt|dflt)${T_POST_LBL}$/o ) { $chk[++$i] = $_; $chkcat[$i] = 'misc'; $chksymb[$i] = ''; #$symbtmp = $1; #$chksymb[$i] = $symbtmp if ($TargetPlatform =~ /^powerpc-|^rs6000-/) ; #rm andre - } elsif ( /^$TUS[@]?([A-Za-z0-9_]+)_vtbl$TPOSTLBL[@]?$/o ) { + } elsif ( /^${T_US}([A-Za-z0-9_]+)_vtbl${T_POST_LBL}$/o ) { $chk[++$i] = $_; $chkcat[$i] = 'vector'; $chksymb[$i] = $1; @@ -534,34 +569,33 @@ sub mangle_asm { $chksymb[$i] = ''; } elsif ( $TargetPlatform =~ /^i386-.*-solaris2/ - && /^(_uname|uname|stat|fstat):/ ) { - # for some utterly bizarre reason, this platform - # likes to drop little local C routines with these names - # into each and every .o file that #includes the - # relevant system .h file. Yuck. We just don't - # tolerate them in .hc files (which we are processing - # here). If you need to call one of these things from - # Haskell, make a call to your own C wrapper, then - # put that C wrapper (which calls one of these) in a - # plain .c file. WDP 95/12 + && /^[A-Za-z0-9][A-Za-z0-9_]*:/ ) { + # Some Solaris system headers contain function definitions (as + # opposed to mere prototypes), which end up in the .hc file when + # a Haskell module foreign imports the corresponding system + # functions (most notably stat()). We put them into the text + # segment. Note that this currently does not extend to function + # names starting with an underscore. + # - chak 7/2001 $chk[++$i] = $_; - $chkcat[$i] = 'toss'; + $chkcat[$i] = 'misc'; $chksymb[$i] = $1; - } elsif ( /^$TUS[@]?[A-Za-z0-9_]/o + } elsif ( /^${T_US}[A-Za-z0-9_]/o && ( $TargetPlatform !~ /^hppa/ # need to avoid local labels in this case || ! /^L\$\d+$/ ) && ( $TargetPlatform !~ /^powerpc|^rs6000/ # ditto || ! /^(L\.\.\d+|LT\.\..*):$/ ) ) { local($thing); chop($thing = $_); - print "Funny global thing?: $_" + print STDERR "Funny global thing?: $_" unless $KNOWN_FUNNY_THING{$thing} - || /^$TUS[@]?stg_.*$TPOSTLBL[@]?$/o # RTS internals - || /^$TUS[@]__fexp_.*$TPOSTLBL$/o # foreign export - || /^$TUS[@]?_reg.*$TPOSTLBL$/o # PROF: __reg - || /^$TUS[@]?.*_btm$TPOSTLBL$/o # large bitmaps - || /^$TUS[@]?.*_closure_tbl$TPOSTLBL$/o; # closure tables + || /^${T_US}stg_.*${T_POST_LBL}$/o # RTS internals + || /^${T_US}__fexp_.*${T_POST_LBL}$/o # foreign export + || /^${T_US}__stginit.*${T_POST_LBL}$/o # __stginit + || /^${T_US}.*_btm${T_POST_LBL}$/o # large bitmaps + || /^${T_US}.*_closure_tbl${T_POST_LBL}$/o # closure tables + || /^_uname:/o; # x86/Solaris2 $chk[++$i] = $_; $chkcat[$i] = 'misc'; if ($TargetPlatform =~ /^powerpc-|^rs6000-/) @@ -575,6 +609,10 @@ sub mangle_asm { } $numchks = $#chk + 1; + # open CHUNKS, ">/tmp/chunks1" or die "Cannot open /tmp/chunks1: $!\n"; + # for (my $i = 0; $i < @chk; ++$i) { print CHUNKS "======= $i =======\n", $chk[$i] } + # close CHUNKS; + # the division into chunks is imperfect; # we throw some things over the fence into the next # chunk. @@ -584,12 +622,13 @@ sub mangle_asm { # output. local($FIRST_MANGLABLE) = ($TargetPlatform =~ /^(alpha-|hppa|mips-)/) ? 1 : 0; + local($FIRST_TOSSABLE ) = ($TargetPlatform =~ /^(hppa|mips-)/) ? 1 : 0; # print STDERR "first chunk to mangle: $FIRST_MANGLABLE\n"; # Alphas: NB: we start meddling at chunk 1, not chunk 0 # The first ".rdata" is quite magical; as of GCC 2.7.x, it - # spits a ".quad 0" in after the v first ".rdata"; we + # spits a ".quad 0" in after the very first ".rdata"; we # detect this special case (tossing the ".quad 0")! local($magic_rdata_seen) = 0; @@ -626,7 +665,7 @@ sub mangle_asm { } }; - for ($i = $FIRST_MANGLABLE; $i < $numchks; $i++) { + for ($i = $FIRST_TOSSABLE; $i < $numchks; $i++) { $c = $chk[$i]; # convenience copy # print STDERR "\nCHK $i (BEFORE) (",$chkcat[$i],"):\n", $c; @@ -641,10 +680,11 @@ sub mangle_asm { if (($p, $r) = split(/--- BEGIN ---/, $c)) { if ($TargetPlatform =~ /^i386-/) { - $p =~ s/^\tpushl \%edi\n//; - $p =~ s/^\tpushl \%esi\n//; - $p =~ s/^\tsubl \$\d+,\%esp\n//; - $p =~ s/^\tmovl \$\d+,\%eax\n\tcall __alloca\n// if ($TargetPlatform =~ /^.*-cygwin32/); + $p =~ s/^\tpushl\s+\%edi\n//; + $p =~ s/^\tpushl\s+\%esi\n//; + $p =~ s/^\tpushl\s+\%ebx\n//; + $p =~ s/^\tsubl\s+\$\d+,\s*\%esp\n//; + $p =~ s/^\tmovl\s+\$\d+,\s*\%eax\n\tcall\s+__alloca\n// if ($TargetPlatform =~ /^.*-cygwin32/); } elsif ($TargetPlatform =~ /^m68k-/) { $p =~ s/^\tlink a6,#-?\d.*\n//; $p =~ s/^\tpea a6@\n\tmovel sp,a6\n//; @@ -681,6 +721,9 @@ sub mangle_asm { print STDERR "$Pgm: unknown prologue mangling? $TargetPlatform\n"; } + # HWL HACK: dont die, just print a warning + #print stderr "HWL: this should die! Prologue junk?: $p\n" if $p =~ /^\t[^\.]/ + # && $TargetPlatform !~ /^powerpc-/; #ToDo: remove test die "Prologue junk?: $p\n" if $p =~ /^\t[^\.]/ && $TargetPlatform !~ /^powerpc-/; #ToDo: remove test @@ -700,9 +743,12 @@ sub mangle_asm { if (($r, $e) = split(/--- END ---/, $c)) { if ($TargetPlatform =~ /^i386-/) { $e =~ s/^\tret\n//; - $e =~ s/^\tpopl \%edi\n//; - $e =~ s/^\tpopl \%esi\n//; - $e =~ s/^\taddl \$\d+,\%esp\n//; + $e =~ s/^\tpopl\s+\%edi\n//; + $e =~ s/^\tpopl\s+\%esi\n//; + $e =~ s/^\tpopl\s+\%edx\n//; + $e =~ s/^\tpopl\s+\%ecx\n//; + $e =~ s/^\taddl\s+\$\d+,\s*\%esp\n//; + $e =~ s/^\tsubl\s+\$-\d+,\s*\%esp\n//; } elsif ($TargetPlatform =~ /^m68k-/) { $e =~ s/^\tunlk a6\n//; $e =~ s/^\trts\n//; @@ -720,8 +766,9 @@ sub mangle_asm { } else { print STDERR "$Pgm: unknown epilogue mangling? $TargetPlatform\n"; } - die "Epilogue junk?: $e\n" if $e =~ /^\t[^\.]/ - && $TargetPlatform !~ /^powerpc-/; #ToDo: remove test + + print STDERR "WARNING: Epilogue junk?: $e\n" if $e =~ /^\t\s*[^\.\s\n]/ + && $TargetPlatform !~ /^powerpc-/; #ToDo: remove test # glue together what's left $c = $r . $e; @@ -733,7 +780,7 @@ sub mangle_asm { # toss the register-windowing save/restore/ret* instructions # directly: if ( $TargetPlatform =~ /^sparc-/ ) { - $c =~ s/^\t(save .*|restore|ret|retl)\n//g; + $c =~ s/^\t(save.*|restore|ret|retl)\n//g; # throw away PROLOGUE comments $c =~ s/^\t!#PROLOGUE# 0\n\t!#PROLOGUE# 1\n//; } @@ -741,7 +788,7 @@ sub mangle_asm { # On Alphas, the prologue mangling is done a little later (below) # toss all calls to __DISCARD__ - $c =~ s/^\t(call|jbsr|jal)\s+$TUS[@]?__DISCARD__\n//go; + $c =~ s/^\t(call|jbsr|jal)\s+${T_US}__DISCARD__\n//go; # MIPS: that may leave some gratuitous asm macros around # (no harm done; but we get rid of them to be tidier) @@ -764,18 +811,38 @@ sub mangle_asm { # pin a funny end-thing on (for easier matching): $c .= 'FUNNY#END#THING'; - while ( $c =~ /$TMOVEDIRVS[@]?FUNNY#END#THING/o ) { # [@]? is a silly hack to avoid having to use curlies for T_PRE_APP - # (this SEGVs perl4 on alphas, you see) + while ( $c =~ /${T_MOVE_DIRVS}FUNNY#END#THING/o ) { $to_move = $1; - if ( $i < ($numchks - 1) - && ( $to_move =~ /$TCOPYDIRVS/ + + # on x86 we try not to copy any directives into a literal + # chunk, rather we keep looking for the next real chunk. This + # is because we get things like + # + # .globl blah_closure + # .LC32 + # .string "..." + # blah_closure: + # ... + # + if ( $TargetPlatform =~ /^(i386|sparc)/ && $to_move =~ /${T_COPY_DIRVS}/ ) { + $j = $i + 1; + while ( $j < $numchks && $chk[$j] =~ /$T_CONST_LBL/) { + $j++; + } + if ( $j < $numchks ) { + $chk[$j] = $to_move . $chk[$j]; + } + } + + elsif ( $i < ($numchks - 1) + && ( $to_move =~ /${T_COPY_DIRVS}/ || ($TargetPlatform =~ /^hppa/ && $to_move =~ /align/ && $chkcat[$i+1] eq 'literal') )) { $chk[$i + 1] = $to_move . $chk[$i + 1]; # otherwise they're tossed } - $c =~ s/$TMOVEDIRVS[@]?FUNNY#END#THING/FUNNY#END#THING/o; # [@]? is a hack (see above) + $c =~ s/${T_MOVE_DIRVS}FUNNY#END#THING/FUNNY#END#THING/o; } if ( $TargetPlatform =~ /^alpha-/ && $c =~ /^\t\.ent\s+(\S+)/ ) { @@ -798,6 +865,10 @@ sub mangle_asm { $chk[$i] = $c; # update w/ convenience copy } + # open CHUNKS, ">/tmp/chunks2" or die "Cannot open /tmp/chunks2: $!\n"; + # for (my $i = 0; $i < @chk; ++$i) { print CHUNKS "======= $i =======\n", $chk[$i] } + # close CHUNKS; + if ( $TargetPlatform =~ /^alpha-/ ) { # print out the header stuff first $chk[0] =~ s/^(\t\.file.*)"(ghc\d+\.c)"/$1"$ifile_root.hc"/; @@ -874,7 +945,7 @@ sub mangle_asm { }; &print_doctored($chk[$i], 0); if ($TargetPlatform =~ /^powerpc-|^rs6000-/ && $printDS) { -#ok if ($chksymb[$i] !~ /\_regMain/) { +#ok if ($chksymb[$i] !~ /\__stginit_Main/) { print OUTASM "\.csect ${chksymb[$i]}[DS]\n"; print OUTASM "${p}TOC[tc0], 0\n"; #ok } @@ -912,7 +983,7 @@ sub mangle_asm { # # -- 2/98 SOF if ( $TargetPlatform =~ /^hppa/ ) { - $chk[$i] =~ s/^$TUS[@]?ghc.*c_ID$TPOSTLBL/$consist/o; + $chk[$i] =~ s/^${T_US}ghc.*c_ID$TPOSTLBL/$consist/o; $chk[$i] =~ s/\t$T_hsc_cc_PAT/$T_HDR_misc/o; $consist = $chk[$i]; #clumsily } @@ -967,7 +1038,7 @@ sub mangle_asm { $chk[$infochk{$symb}] =~ s/\.long ([A-Za-z]\S+_upd)/\.long \.\1/; print OUTASM $chk[$infochk{$symb}]; } else { - print OUTASM &rev_tbl($symb, $chk[$infochk{$symb}], 1); + print OUTASM &rev_tbl($symb, $chk[$infochk{$symb}], 1); } # entry code will be put here! @@ -1000,12 +1071,12 @@ sub mangle_asm { } elsif ( $TargetPlatform =~ /^i386-/ ) { # Reg alloc depending, gcc generated code may jump to the fast entry point via # a number of registers. - $c =~ s/^\tmovl \$${T_US}${symb}_fast\d*,\%edx\n\tjmp \*\%edx\n//; - $c =~ s/^\tmovl \$${T_US}${symb}_fast\d*,\%ecx\n\tjmp \*\%ecx\n//; - $c =~ s/^\tmovl \$${T_US}${symb}_fast\d*,\%eax\n\tjmp \*\%eax\n//; + $c =~ s/^\tmovl\s+\$${T_US}${symb}_fast\d*,\s*\%edx\n\tjmp\s+\*\%edx\n//; + $c =~ s/^\tmovl\s+\$${T_US}${symb}_fast\d*,\s*\%ecx\n\tjmp\s+\*\%ecx\n//; + $c =~ s/^\tmovl\s+\$${T_US}${symb}_fast\d*,\s*\%eax\n\tjmp\s+\*\%eax\n//; # The next two only apply if we're not stealing %esi or %edi. - $c =~ s/^\tmovl \$${T_US}${symb}_fast\d*,\%esi\n\tjmp \*\%esi\n// if ($StolenX86Regs < 3); - $c =~ s/^\tmovl \$${T_US}${symb}_fast\d*,\%edi\n\tjmp \*\%edi\n// if ($StolenX86Regs < 4); + $c =~ s/^\tmovl\s+\$${T_US}${symb}_fast\d*,\s*\%esi\n\tjmp\s+\*\%esi\n// if ($StolenX86Regs < 3); + $c =~ s/^\tmovl\s+\$${T_US}${symb}_fast\d*,\s*\%edi\n\tjmp\s+\*\%edi\n// if ($StolenX86Regs < 4); } elsif ( $TargetPlatform =~ /^mips-/ ) { $c =~ s/^\tjmp \$31,\(\$27\),0\n\t\.align 4\n\t\.end/\t.align 4\n\t.end/; } elsif ( $TargetPlatform =~ /^m68k-/ ) { @@ -1014,8 +1085,8 @@ sub mangle_asm { } elsif ( $TargetPlatform =~ /^powerpc-|^rs6000-/ ) { $c =~ s/^\tb \.${T_US}${symb}_fast\d+\n//; } elsif ( $TargetPlatform =~ /^sparc-/ ) { - $c =~ s/^\tcall ${T_US}${symb}_fast\d+,.*\n\tnop\n//; - $c =~ s/^\tcall ${T_US}${symb}_fast\d+,.*\n(\t[a-z].*\n)/$1/; + $c =~ s/^\tcall\s+${T_US}${symb}_fast\d+,.*\n\t\s*nop\n//; + $c =~ s/^\tcall\s+${T_US}${symb}_fast\d+,.*\n(\t\s*[a-z].*\n)/$1/; } else { print STDERR "$Pgm: mystery slow-fast dropthrough: $TargetPlatform\n"; } @@ -1026,7 +1097,7 @@ sub mangle_asm { # references to fast-entry point. # (questionable re hppa and mips...) print STDERR "still has jump to fast entry point:\n$c" - if $c =~ /$TUS[@]?$symb[@]?_fast/; # NB: paranoia + if $c =~ /\b${T_US}${symb}_fast/; } print OUTASM $T_HDR_entry; @@ -1155,7 +1226,7 @@ sub print_doctored { if ( $TargetPlatform !~ /^i386-/ || ! /^\t[a-z]/ # no instructions in here, apparently - || /^${T_US}_reg[A-Za-z0-9_]+:/) { + || /^${T_US}__stginit_[A-Za-z0-9_]+${T_POST_LBL}/) { print OUTASM $_; return; } @@ -1198,26 +1269,21 @@ sub print_doctored { # jmp * # -# the short form may tickle perl bug: -# s/^\tmovl \$${T_US}(.*),(\%e[abcd]x)\n\tjmp \*$2/\tjmp $T_US$1/g; - s/^\tmovl \$${T_US}(.*),\%eax\n\tjmp \*\%eax/\tjmp $T_US$1/g; - s/^\tmovl \$${T_US}(.*),\%ebx\n\tjmp \*\%ebx/\tjmp $T_US$1/g; - s/^\tmovl \$${T_US}(.*),\%ecx\n\tjmp \*\%ecx/\tjmp $T_US$1/g; - s/^\tmovl \$${T_US}(.*),\%edx\n\tjmp \*\%edx/\tjmp $T_US$1/g; + s/^\tmovl\s+\$${T_US}(.*),(\%e[abcd]x)\n\tjmp\s+\*\2/\tjmp $T_US$1/g; if ($StolenX86Regs <= 2 ) { # YURGH! spurious uses of esi? - s/^\tmovl (.*),\%esi\n\tjmp \*%esi\n/\tmovl $1,\%eax\n\tjmp \*\%eax\n/g; - s/^\tjmp \*(-?\d*)\((.*\%esi.*)\)\n/\tmovl $2,\%eax\n\tjmp \*$1\(\%eax\)\n/g; - s/^\tjmp \*\%esi\n/\tmovl \%esi,\%eax\n\tjmp \*\%eax\n/g; + s/^\tmovl\s+(.*),\s*\%esi\n\tjmp\s+\*%esi\n/\tmovl $1,\%eax\n\tjmp \*\%eax\n/g; + s/^\tjmp\s+\*(-?\d*)\((.*\%esi.*)\)\n/\tmovl $2,\%eax\n\tjmp \*$1\(\%eax\)\n/g; + s/^\tjmp\s+\*\%esi\n/\tmovl \%esi,\%eax\n\tjmp \*\%eax\n/g; die "$Pgm: (mangler) still have jump involving \%esi!\n$_" - if /(jmp|call) .*\%esi/; + if /(jmp|call)\s+.*\%esi/; } if ($StolenX86Regs <= 3 ) { # spurious uses of edi? - s/^\tmovl (.*),\%edi\n\tjmp \*%edi\n/\tmovl $1,\%eax\n\tjmp \*\%eax\n/g; - s/^\tjmp \*(-?\d*)\((.*\%edi.*)\)\n/\tmovl $2,\%eax\n\tjmp \*$1\(\%eax\)\n/g; - s/^\tjmp \*\%edi\n/\tmovl \%edi,\%eax\n\tjmp \*\%eax\n/g; + s/^\tmovl\s+(.*),\s*\%edi\n\tjmp\s+\*%edi\n/\tmovl $1,\%eax\n\tjmp \*\%eax\n/g; + s/^\tjmp\s+\*(-?\d*\(.*\%edi.*\))\n/\tmovl $1,\%eax\n\tjmp \*\%eax\n/g; + s/^\tjmp\s+\*\%edi\n/\tmovl \%edi,\%eax\n\tjmp \*\%eax\n/g; die "$Pgm: (mangler) still have jump involving \%edi!\n$_" - if /(jmp|call) .*\%edi/; + if /(jmp|call)\s+.*\%edi/; } # OK, now we can decide what our patch-up code is going to @@ -1254,20 +1320,20 @@ sub print_doctored { # fix _all_ non-local jumps: - s/^\tjmp \*${T_X86_PRE_LLBL_PAT}/\tJMP___SL/go; - s/^\tjmp ${T_X86_PRE_LLBL_PAT}/\tJMP___L/go; + s/^\tjmp\s+\*${T_X86_PRE_LLBL_PAT}/\tJMP___SL/go; + s/^\tjmp\s+${T_X86_PRE_LLBL_PAT}/\tJMP___L/go; - s/^(\tjmp .*\n)/$exit_patch$1/g; # here's the fix... + s/^(\tjmp\s+.*\n)/$exit_patch$1/g; # here's the fix... s/^\tJMP___SL/\tjmp \*${T_X86_PRE_LLBL}/go; s/^\tJMP___L/\tjmp ${T_X86_PRE_LLBL}/go; if ($StolenX86Regs == 2 ) { die "ARGH! Jump uses \%esi or \%edi with -monly-2-regs:\n$_" - if /^\t(jmp|call) .*\%e(si|di)/; + if /^\t(jmp|call)\s+.*\%e(si|di)/; } elsif ($StolenX86Regs == 3 ) { die "ARGH! Jump uses \%edi with -monly-3-regs:\n$_" - if /^\t(jmp|call) .*\%edi/; + if /^\t(jmp|call)\s+.*\%edi/; } # -------------------------------------------------------- @@ -1311,29 +1377,23 @@ sub rev_tbl { local(@lines) = split(/\n/, $tbl); local($i, $j); #local ($i, $extra, $words_to_pad, $j); - # see comment in mangleAsm as to why this silliness is needed. - local($TDOTWORD) = ${T_DOT_WORD}; - local($TDOTGLOBAL) = ${T_DOT_GLOBAL}; - local($TUS) = ${T_US}; - local($TPOSTLBL) = ${T_POST_LBL}; - # Deal with the header... - for ($i = 0; $i <= $#lines && $lines[$i] !~ /^\t?$TDOTWORD\s+/o; $i++) { + for ($i = 0; $i <= $#lines && $lines[$i] !~ /^\t?${T_DOT_WORD}\s+/o; $i++) { $label .= $lines[$i] . "\n", - next if $lines[$i] =~ /^[A-Za-z0-9_]+_info$TPOSTLBL[@]?$/o - || $lines[$i] =~ /$TDOTGLOBAL/o - || $lines[$i] =~ /^$TUS[@]?\S+_vtbl$TPOSTLBL[@]?$/o; + next if $lines[$i] =~ /^[A-Za-z0-9_]+_info${T_POST_LBL}$/o + || $lines[$i] =~ /${T_DOT_GLOBAL}/o + || $lines[$i] =~ /^${T_US}\S+_vtbl${T_POST_LBL}$/o; $before .= $lines[$i] . "\n"; # otherwise... } # Grab the table data... if ( $TargetPlatform !~ /^hppa/ ) { - for ( ; $i <= $#lines && $lines[$i] =~ /^\t?$TDOTWORD\s+/o; $i++) { + for ( ; $i <= $#lines && $lines[$i] =~ /^\t?${T_DOT_WORD}\s+/o; $i++) { push(@words, $lines[$i]); } } else { # hppa weirdness - for ( ; $i <= $#lines && $lines[$i] =~ /^\s+\.(word|IMPORT)/; $i++) { + for ( ; $i <= $#lines && $lines[$i] =~ /^\s+(${T_DOT_WORD}|\.IMPORT)/; $i++) { if ($lines[$i] =~ /^\s+\.IMPORT/) { push(@imports, $lines[$i]); } else { @@ -1348,7 +1408,8 @@ sub rev_tbl { # now throw away the first word (SRT) iff it is empty. # The .zero business is for Linux/ELF. # The .skip business is for Sparc/Solaris/ELF. - if ($discard1 && $words[0] =~ /^\t?($TDOTWORD\s+0|\.zero\s+4|\.skip\s+4)/) { + # The .blockz business is for HPPA. + if ($discard1 && $words[0] =~ /^\t?(${T_DOT_WORD}\s+0|\.zero\s+4|\.skip\s+4|\.blockz\s+4)/) { shift(@words) } @@ -1366,9 +1427,13 @@ sub rev_tbl { # Alphas:If we have anonymous text (not part of a procedure), the # linker may complain about missing exception information. Bleh. + # To suppress this, we place a .ent/.end pair around the code. + # At the same time, we have to be careful and not enclose any leading + # .file/.loc directives. if ( $TargetPlatform =~ /^alpha-/ && $label =~ /^([A-Za-z0-9_]+):$/) { - $before = "\t.ent $1\n" . $before; - $after .= "\t.end $1\n"; + local ($ident) = $1; + $before =~ s/^((\s*\.(file|loc)\s+[^\n]*\n)*)/$1\t.ent $ident\n/; + $after .= "\t.end $ident\n"; } $tbl = $before @@ -1459,7 +1524,12 @@ sub mangle_powerpc_tailjump { }; $c; } +\end{code} -# make "require"r happy... -1; +\begin{code} +sub tidy_up_and_die { + local($return_val, $msg) = @_; + print STDERR $msg; + exit (($return_val == 0) ? 0 : 1); +} \end{code}